How to run sklearn’s GridSearchCV with Tensorflow keras models.

To find optimal parameters for Neural network one would usually use RandomizedSearchCV or GridSearchCV from sklearn library.

Tensorflow keras models, such as KerasClassifier, when calling fit() function does not permit to have different number of neurons.

GridSearchCV and RandomizedSearchCV call fit() function on each parameter iteration, thus we need to create new subclass of *KerasClassifier* to be able to specify different number of neurons per layer.

To create a keras model we need a function in the global scope which we will call *build_model2*.
It will build a neural network with 2 hidden layers , with dropout after each hidden layer and custom output_bias.
Output_bias is important for problems with a highly unbalanced dataset.

def build_model2(nfirst,nfeatures,nhidden1,nhidden2,dropout,output_bias,lr): 
output_bias = tf.keras.initializers.Constant(np.log([output_bias]))
model = keras.Sequential([keras.layers.Dense(nfirst, activation='relu', input_shape=(nfeatures,)),keras.layers.Dropout(dropout),
keras.layers.Dense(nhidden1, activation='relu'), keras.layers.Dropout(dropout)])
if nhidden2!=0:
model.add(keras.layers.Dense(nhidden2, activation='relu'))
model.add(keras.layers.Dropout(dropout))
model.add(keras.layers.Dense(1, activation='sigmoid',bias_initializer=output_bias))

model.compile(optimizer=keras.optimizers.Adam(lr=lr),loss=keras.losses.BinaryCrossentropy(),metrics= ['loss', 'auc', 'precision', 'recall'])
return model

Now we will create custom sklearn classifier based on keras model, which will support GridSearch with different number of neurons for hidden layers.
To avoid potential memory leaks we also create custom destructor.

from sklearn.model_selection import cross_validate
from sklearn.base import BaseEstimator,ClassifierMixin

class MyNN(BaseEstimator, ClassifierMixin):
    def __init__(self, lr=0.005,nfirst=1,nhidden1=10,nhidden2=0,dropout=0,output_bias=1,batch_size=100,epochs=10,scale_pos_weight=1):

        self.lr=lr
        self.epochs=epochs
        self.batch_size=batch_size
        self.nfirst=nfirst
        self.nhidden1=nhidden1
        self.nhidden2=nhidden2
        self.dropout=dropout
        self.output_bias=output_bias
        self.scale_pos_weight=scale_pos_weight

    def fit(self, X, y,**fit_params): ##{'nhidden': 40,'nfirst': 20,'epochs': 1,'drop2': 0.2,'drop1': 0.2,'batch_size': 100}

        try:
            if X.isnull().values.any() or y.isnull().values.any():
                print("X or y contain nans")
        except:
            pass
        self.classes_ = unique_labels(y)
        if self.scale_pos_weight is not None:
            fit_params['class_weight']={0:1,1:self.scale_pos_weight}
        self.model = KerasClassifier(build_model2,**{'nfeatures':X.shape[-1],'lr':self.lr,'nhidden1': self.nhidden1,'nhidden2': self.nhidden2,'nfirst': self.nfirst,\
                                                     'epochs': self.epochs,'dropout': self.dropout, 'batch_size': self.batch_size,'output_bias':self.output_bias},verbose=0)
        
        fit_paramsnoevalset=fit_params.copy()
        for k in ['eval_metric','eval_set']:#,entriesToRemove:
           fit_paramsnoevalset.pop(k, None)

        if fit_params.get('eval_set') is None: 
            self.history=self.model.fit(X,y,**fit_paramsnoevalset)

        else:
            self.history=self.model.fit(X,y,validation_data=(fit_params['eval_set'][0][0], fit_params['eval_set'][0][1]),**fit_paramsnoevalset) 
            if fit_params['eval_metric'] not in [m._name for m in METRICS]:
                try:
                    scorer=SCORERS[fit_params['eval_metric']]._score_func
                except:#like minusf1 
                    scorer=fit_params['eval_metric']
                self.score=scorer(fit_params['eval_set'][0][1],self.model.predict(fit_params['eval_set'][0][0]))    
            else:
                self.score=self.history.history['val_'+fit_params['eval_metric']]
        return self.model 
         
    def evals_result(self):
          return {'validation_0':self.score}
          
    def predict(self, X):
        return self.model.predict(X)
    
    def predict_proba(self, X):
        return self.model.predict_proba(X)
    
    def __del__(self):
          tf.keras.backend.clear_session()
          gc.collect()
          if hasattr(self, 'model'):
            del self.model

And now we can use MyNN model in GridSearchCV or RandomizedSearch like this (after specifying cross validation iterator via cv, and dftrain as training set, and setting scale_pos_weight for classification problem where only 10% of classes is positive):

randcv = RandomizedSearchCV(estimator=MyNN(lr=0.005,nfirst=10,nhidden1=10,nhidden2=0,dropout=0.2,output_bias=0.1,batch_size=100,epochs=1),\
                            param_distributions=dict( epochs=[ 50,100,200], batch_size=[ 10,100],nhidden1=[2,5,10],nfirst=[10,20],dropout=[0.2],output_bias=[0.1,0.9],scale_pos_weight=[1,10]),\
                            n_iter=30, scoring='f1', n_jobs=1, cv=cv, verbose=1).fit(dftrain[xs], dftrain['y'])

pd.DataFrame(randcv.cv_results_).sort_values(by='mean_test_score',ascending=False)

https://www.tensorflow.org/api_docs/python/tf/keras/wrappers/scikit_learn/KerasClassifier

Posted in machine learning Tagged with: ,