To find optimal parameters for Neural network one would usually use RandomizedSearchCV or GridSearchCV from sklearn library.
Tensorflow keras models, such as KerasClassifier, when calling fit() function does not permit to have different number of neurons.
GridSearchCV and RandomizedSearchCV call fit() function on each parameter iteration, thus we need to create new subclass of *KerasClassifier* to be able to specify different number of neurons per layer.
To create a keras model we need a function in the global scope which we will call *build_model2*.
It will build a neural network with 2 hidden layers , with dropout after each hidden layer and custom output_bias.
Output_bias is important for problems with a highly unbalanced dataset.
def build_model2(nfirst,nfeatures,nhidden1,nhidden2,dropout,output_bias,lr): output_bias = tf.keras.initializers.Constant(np.log([output_bias])) model = keras.Sequential([keras.layers.Dense(nfirst, activation='relu', input_shape=(nfeatures,)),keras.layers.Dropout(dropout), keras.layers.Dense(nhidden1, activation='relu'), keras.layers.Dropout(dropout)]) if nhidden2!=0: model.add(keras.layers.Dense(nhidden2, activation='relu')) model.add(keras.layers.Dropout(dropout)) model.add(keras.layers.Dense(1, activation='sigmoid',bias_initializer=output_bias)) model.compile(optimizer=keras.optimizers.Adam(lr=lr),loss=keras.losses.BinaryCrossentropy(),metrics= ['loss', 'auc', 'precision', 'recall']) return model
Now we will create custom sklearn classifier based on keras model, which will support GridSearch with different number of neurons for hidden layers.
To avoid potential memory leaks we also create custom destructor.
from sklearn.model_selection import cross_validate from sklearn.base import BaseEstimator,ClassifierMixin class MyNN(BaseEstimator, ClassifierMixin): def __init__(self, lr=0.005,nfirst=1,nhidden1=10,nhidden2=0,dropout=0,output_bias=1,batch_size=100,epochs=10,scale_pos_weight=1): self.lr=lr self.epochs=epochs self.batch_size=batch_size self.nfirst=nfirst self.nhidden1=nhidden1 self.nhidden2=nhidden2 self.dropout=dropout self.output_bias=output_bias self.scale_pos_weight=scale_pos_weight def fit(self, X, y,**fit_params): ##{'nhidden': 40,'nfirst': 20,'epochs': 1,'drop2': 0.2,'drop1': 0.2,'batch_size': 100} try: if X.isnull().values.any() or y.isnull().values.any(): print("X or y contain nans") except: pass self.classes_ = unique_labels(y) if self.scale_pos_weight is not None: fit_params['class_weight']={0:1,1:self.scale_pos_weight} self.model = KerasClassifier(build_model2,**{'nfeatures':X.shape[-1],'lr':self.lr,'nhidden1': self.nhidden1,'nhidden2': self.nhidden2,'nfirst': self.nfirst,\ 'epochs': self.epochs,'dropout': self.dropout, 'batch_size': self.batch_size,'output_bias':self.output_bias},verbose=0) fit_paramsnoevalset=fit_params.copy() for k in ['eval_metric','eval_set']:#,entriesToRemove: fit_paramsnoevalset.pop(k, None) if fit_params.get('eval_set') is None: self.history=self.model.fit(X,y,**fit_paramsnoevalset) else: self.history=self.model.fit(X,y,validation_data=(fit_params['eval_set'][0][0], fit_params['eval_set'][0][1]),**fit_paramsnoevalset) if fit_params['eval_metric'] not in [m._name for m in METRICS]: try: scorer=SCORERS[fit_params['eval_metric']]._score_func except:#like minusf1 scorer=fit_params['eval_metric'] self.score=scorer(fit_params['eval_set'][0][1],self.model.predict(fit_params['eval_set'][0][0])) else: self.score=self.history.history['val_'+fit_params['eval_metric']] return self.model def evals_result(self): return {'validation_0':self.score} def predict(self, X): return self.model.predict(X) def predict_proba(self, X): return self.model.predict_proba(X) def __del__(self): tf.keras.backend.clear_session() gc.collect() if hasattr(self, 'model'): del self.model
And now we can use MyNN model in GridSearchCV or RandomizedSearch like this (after specifying cross validation iterator via cv, and dftrain as training set, and setting scale_pos_weight for classification problem where only 10% of classes is positive):
randcv = RandomizedSearchCV(estimator=MyNN(lr=0.005,nfirst=10,nhidden1=10,nhidden2=0,dropout=0.2,output_bias=0.1,batch_size=100,epochs=1),\ param_distributions=dict( epochs=[ 50,100,200], batch_size=[ 10,100],nhidden1=[2,5,10],nfirst=[10,20],dropout=[0.2],output_bias=[0.1,0.9],scale_pos_weight=[1,10]),\ n_iter=30, scoring='f1', n_jobs=1, cv=cv, verbose=1).fit(dftrain[xs], dftrain['y']) pd.DataFrame(randcv.cv_results_).sort_values(by='mean_test_score',ascending=False)
https://www.tensorflow.org/api_docs/python/tf/keras/wrappers/scikit_learn/KerasClassifier