15. Functional Approximation By NN and RF#

15.1. Functional Approximations by Trees and Neural Networks#

Here we show how the function $\( x \mapsto exp(4 x) \)$ can be easily approximated by a tree-based methods (Trees, Random Forest) and a neural network (2 Layered Neural Network)

15.2. Functional Approximation by a Tree#

import numpy as np
from SyncRNG import SyncRNG
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.models import Sequential
from keras.layers import Dense
import warnings
warnings.filterwarnings('ignore')
s = SyncRNG(seed = 123456)

X_train = np.zeros( 1000 ).reshape( 1000 , 1 )
print( X_train.shape )
for i in range( 0 , X_train.shape[0] ):
    X_train[ i , 0 ] = s.rand()


Y_train = np.exp( 4 * X_train )


TreeModel = DecisionTreeRegressor( random_state = 0, ccp_alpha = 2.02 )
TreeModel.fit( X_train , Y_train )
pred_TM = TreeModel.predict( X_train )


fig = plt.figure( figsize = ( 5 , 5 ) , dpi = 100 )
ax1 = fig.add_subplot( 111 )
ax1.scatter( X_train , Y_train, c='black', marker="o" , s = 20 )
ax1.scatter(  X_train , pred_TM , c='green', marker="o" , s = 20)
plt.xlabel( "z" )
plt.ylabel( "g(z)" )
plt.show()
(1000, 1)
../_images/495640f4fee9efb566aaed12abc9bd5767fef99df807fae6c0fd5adc45b925fc.png
s = SyncRNG(seed = 123456)

X_train = np.zeros( 1000 ).reshape( 1000 , 1 )
for i in range( 0 , X_train.shape[0] ):
    X_train[ i , 0 ] = s.rand()

Y_train = np.exp( 4 * X_train )

TreeModel = DecisionTreeRegressor( random_state = 0, ccp_alpha = 2.02*0.0005/0.01 )
TreeModel.fit( X_train , Y_train )
pred_TM = TreeModel.predict( X_train )

fig = plt.figure( figsize = ( 5 , 5 ) , dpi = 100 )
ax1 = fig.add_subplot( 111 )
ax1.scatter( X_train , Y_train, c='black', marker="o" , s = 20 )
ax1.scatter(  X_train , pred_TM , c='green', marker="o" , s = 20)
plt.xlabel( "z" )
plt.ylabel( "g(z)" )
plt.show()
../_images/a9e635fc20c6c58d59f914b4d8b60f5d4db636c7c9f9cefeb291b1fb886a321c.png

15.3. Functional Approximation by RF#

Here we show how the function $\( x \mapsto exp(4 x) \)$ can be easily approximated by a tree-based method (Random Forest) and a neural network (2 Layered Neural Network)

s = SyncRNG(seed = 123456)

X_train = np.zeros( 1000 ).reshape( 1000 , 1 )
for i in range( 0 , X_train.shape[0] ):
    X_train[ i , 0 ] = s.rand()

Y_train = np.exp( 4 * X_train )

## REstrictions to make RF from SKlearn
## similar to random forest from r package

if Y_train is not None and ( Y_train.dtype != str ):
    mtry = max( [ np.round( ( X_train.shape[ 1 ]/3 ) ).astype(int) , 1 ] )
else:
    mtry = np.round( np.sqrt( X_train.shape[ 1 ] ) ).astype(int)

if Y_train is not None and ( Y_train.dtype != str ):
    nodesize1 = 5
else:
    nodesize1 = 1

RFModel = RandomForestRegressor( random_state = 0 , n_estimators = 500 ,\
                               max_features = mtry , \
                               n_jobs = 4 , \
                               min_samples_leaf = nodesize1 ).\
                               fit( X_train , Y_train )

RFModel.fit( X_train , Y_train )
pred_RF = RFModel.predict( X_train )


fig = plt.figure( figsize = ( 5 , 5 ) , dpi = 100 )
ax1 = fig.add_subplot( 111 )
ax1.scatter( X_train , Y_train, c='black', marker="o" , s = 20 )
ax1.scatter(  X_train , pred_RF , c='blue', marker="o" , s = 5)
plt.xlabel( "z" )
plt.ylabel( "g(z)" )
plt.show()
<ipython-input-26-0d93ab1bbc20>:22: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  RFModel = RandomForestRegressor( random_state = 0 , n_estimators = 500 ,\
<ipython-input-26-0d93ab1bbc20>:28: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  RFModel.fit( X_train , Y_train )
../_images/34d18b5f62289960bb3dad46ddd53dcca23cf7548e47a056ffbf2a9d20ea22cd.png

15.4. Boosted Trees#

# max_depth = interaction.depth
# learning_rate = shrinkage
# n_estimators = n.trees
BoostTreemodel = GradientBoostingRegressor(random_state=0 ,  max_depth = 4 , learning_rate = 0.01 , n_estimators = 100 )
BoostTreemodel.fit( X_train, Y_train )
pred_BT = BoostTreemodel.predict( X_train )

fig = plt.figure( figsize = ( 5 , 5 ) , dpi = 100 )
ax1 = fig.add_subplot( 111 )
ax1.scatter( X_train , Y_train, c='black', marker="o" , s = 20 )
ax1.scatter(  X_train , pred_BT , c='blue', marker="o" , s = 20)
plt.xlabel( "z" )
plt.ylabel( "g(z)" )
plt.show()
c:\python\python38\lib\site-packages\sklearn\utils\validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  return f(**kwargs)
../_images/4160f0a2ab7833aeb19af15d1e057822e4725f4fb6e0a9308334d3c0a404bcec.png
BoostTreemodel = GradientBoostingRegressor(random_state=0 ,  max_depth = 4 , learning_rate = 0.01 , n_estimators = 1000 )
BoostTreemodel.fit( X_train, Y_train )
pred_BT = BoostTreemodel.predict( X_train )

fig = plt.figure( figsize = ( 10 , 10 ) , dpi = 80 )
ax1 = fig.add_subplot( 111 )
ax1.scatter( X_train , Y_train, c='black', marker="o" , s = 30 )
ax1.scatter(  X_train , pred_BT , c='blue', marker="o" , s = 30)
plt.xlabel( "z" )
plt.ylabel( "g(z)" )
plt.show()
c:\python\python38\lib\site-packages\sklearn\utils\validation.py:72: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  return f(**kwargs)
../_images/8876129e45b69ac7e6b993724f88242f7f6804f11ae96dfea98c5eb4e3c51bd0.png

15.5. Same Example with a Neural Network#

# define the keras model
model = Sequential()
model.add(Dense(200, input_dim = X_train.shape[1], activation = 'relu'))
model.add(Dense(20, activation = 'relu'))
model.add(Dense(1))

# compile the keras model
opt = keras.optimizers.Adam(learning_rate=0.01)
mse = tf.keras.losses.MeanSquaredError()
mae = tf.keras.metrics.MeanAbsoluteError(name="mean_absolute_error", dtype=None)

model.compile(loss=mse, optimizer= opt , metrics=mae)
model.summary(line_length=None, positions=None, print_fn=None)
Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_6 (Dense)              (None, 200)               400       
_________________________________________________________________
dense_7 (Dense)              (None, 20)                4020      
_________________________________________________________________
dense_8 (Dense)              (None, 1)                 21        
=================================================================
Total params: 4,441
Trainable params: 4,441
Non-trainable params: 0
_________________________________________________________________
num_epochs = 1
model.fit( X_train, Y_train, epochs= num_epochs , batch_size=10, verbose = 0 )

pred_NN = model.predict( X_train )

fig = plt.figure( figsize = ( 10 , 10 ) , dpi = 80 )
ax1 = fig.add_subplot( 111 )
ax1.scatter( X_train , Y_train, c='black', marker="o" , s = 30 )
ax1.scatter(  X_train , pred_NN , c='blue', marker="o" , s = 30)
plt.xlabel( "z" )
plt.ylabel( "g(z)" )
plt.show()
../_images/b20ecd53db34278e5a53120cf2bc16c0b8ee5000317739446ca8d953712de051.png
num_epochs = 100
model.fit( X_train, Y_train, epochs= num_epochs , batch_size=10, verbose = 0 )

pred_NN = model.predict( X_train )

fig = plt.figure( figsize = ( 10 , 10 ) , dpi = 80 )
ax1 = fig.add_subplot( 111 )
ax1.scatter( X_train , Y_train, c='black', marker="o" , s = 30 )
ax1.scatter(  X_train , pred_NN , c='blue', marker="o" , s = 30)
plt.xlabel( "z" )
plt.ylabel( "g(z)" )
plt.show()
../_images/924eb4989f864c8db32eba1974148479fd6487e8b6490f717177dbc65fbc30a5.png