To find optimal parameters for Neural network one would usually use RandomizedSearchCV or GridSearchCV from sklearn library.
Tensorflow keras models, such as KerasClassifier, when calling fit() function does not permit to have different number of neurons.
GridSearchCV and RandomizedSearchCV call fit() function on each parameter iteration, thus we need to create new subclass of *KerasClassifier* to be able to specify different number of neurons per layer.
To create a keras model we need a function in the global scope which we will call *build_model2*.
It will build a neural network with 2 hidden layers , with dropout after each hidden layer and custom output_bias.
Output_bias is important for problems with a highly unbalanced dataset.
def build_model2(nfirst,nfeatures,nhidden1,nhidden2,dropout,output_bias,lr): output_bias = tf.keras.initializers.Constant(np.log([output_bias])) model = keras.Sequential([keras.layers.Dense(nfirst, activation='relu', input_shape=(nfeatures,)),keras.layers.Dropout(dropout), keras.layers.Dense(nhidden1, activation='relu'), keras.layers.Dropout(dropout)]) if nhidden2!=0: model.add(keras.layers.Dense(nhidden2, activation='relu')) model.add(keras.layers.Dropout(dropout)) model.add(keras.layers.Dense(1, activation='sigmoid',bias_initializer=output_bias)) model.compile(optimizer=keras.optimizers.Adam(lr=lr),loss=keras.losses.BinaryCrossentropy(),metrics= ['loss', 'auc', 'precision', 'recall']) return model
Now we will create custom sklearn classifier based on keras model, which will support GridSearch with different number of neurons for hidden layers.
To avoid potential memory leaks we also create custom destructor.
from sklearn.model_selection import cross_validate
from sklearn.base import BaseEstimator,ClassifierMixin
class MyNN(BaseEstimator, ClassifierMixin):
def __init__(self, lr=0.005,nfirst=1,nhidden1=10,nhidden2=0,dropout=0,output_bias=1,batch_size=100,epochs=10,scale_pos_weight=1):
self.lr=lr
self.epochs=epochs
self.batch_size=batch_size
self.nfirst=nfirst
self.nhidden1=nhidden1
self.nhidden2=nhidden2
self.dropout=dropout
self.output_bias=output_bias
self.scale_pos_weight=scale_pos_weight
def fit(self, X, y,**fit_params): ##{'nhidden': 40,'nfirst': 20,'epochs': 1,'drop2': 0.2,'drop1': 0.2,'batch_size': 100}
try:
if X.isnull().values.any() or y.isnull().values.any():
print("X or y contain nans")
except:
pass
self.classes_ = unique_labels(y)
if self.scale_pos_weight is not None:
fit_params['class_weight']={0:1,1:self.scale_pos_weight}
self.model = KerasClassifier(build_model2,**{'nfeatures':X.shape[-1],'lr':self.lr,'nhidden1': self.nhidden1,'nhidden2': self.nhidden2,'nfirst': self.nfirst,\
'epochs': self.epochs,'dropout': self.dropout, 'batch_size': self.batch_size,'output_bias':self.output_bias},verbose=0)
fit_paramsnoevalset=fit_params.copy()
for k in ['eval_metric','eval_set']:#,entriesToRemove:
fit_paramsnoevalset.pop(k, None)
if fit_params.get('eval_set') is None:
self.history=self.model.fit(X,y,**fit_paramsnoevalset)
else:
self.history=self.model.fit(X,y,validation_data=(fit_params['eval_set'][0][0], fit_params['eval_set'][0][1]),**fit_paramsnoevalset)
if fit_params['eval_metric'] not in [m._name for m in METRICS]:
try:
scorer=SCORERS[fit_params['eval_metric']]._score_func
except:#like minusf1
scorer=fit_params['eval_metric']
self.score=scorer(fit_params['eval_set'][0][1],self.model.predict(fit_params['eval_set'][0][0]))
else:
self.score=self.history.history['val_'+fit_params['eval_metric']]
return self.model
def evals_result(self):
return {'validation_0':self.score}
def predict(self, X):
return self.model.predict(X)
def predict_proba(self, X):
return self.model.predict_proba(X)
def __del__(self):
tf.keras.backend.clear_session()
gc.collect()
if hasattr(self, 'model'):
del self.model
And now we can use MyNN model in GridSearchCV or RandomizedSearch like this (after specifying cross validation iterator via cv, and dftrain as training set, and setting scale_pos_weight for classification problem where only 10% of classes is positive):
randcv = RandomizedSearchCV(estimator=MyNN(lr=0.005,nfirst=10,nhidden1=10,nhidden2=0,dropout=0.2,output_bias=0.1,batch_size=100,epochs=1),\
param_distributions=dict( epochs=[ 50,100,200], batch_size=[ 10,100],nhidden1=[2,5,10],nfirst=[10,20],dropout=[0.2],output_bias=[0.1,0.9],scale_pos_weight=[1,10]),\
n_iter=30, scoring='f1', n_jobs=1, cv=cv, verbose=1).fit(dftrain[xs], dftrain['y'])
pd.DataFrame(randcv.cv_results_).sort_values(by='mean_test_score',ascending=False)
https://www.tensorflow.org/api_docs/python/tf/keras/wrappers/scikit_learn/KerasClassifier
![[<<] PriceDerivatives blog](https://www.pricederivatives.com/en/wp-content/uploads/2014/03/cropped-pricederivatives-blog-logo-Copy3.png)