Skip to content
Extraits de code Groupes Projets
Valider 37c1c035 rédigé par William RUFFINE's avatar William RUFFINE Validation de Kubat
Parcourir les fichiers

Adding the modelisations

parent 628fe5b0
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
import numpy as np
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import RFE
from sklearn.model_selection import GridSearchCV
def logmodel_prediction(X, y, test_size, random_state):
"""
Returns the score for the logistic regression of the
dataframe X and the target value Y.
Parameters
----------
X : the dataframe to use in the logistic regression
y : the target to reach in the logistic regression
test_size : the size of the test dataframe
random_state : controls the shuffling applied to the data before applying the split.
"""
X_train, X_test, y_train, y_test = train_test_split(X,
y,
test_size=test_size,
random_state=random_state)
logmodel = LogisticRegression()
logmodel.fit(X_train, y_train)
predictions = logmodel.predict(X_test)
print(classification_report(y_test, predictions))
print("score :",logmodel.score(X_test, y_test))
def random_forest_prediction(X, y, test_size, random_state1, random_state2):
"""
Returns the score for the random forest prediction of the
dataframe X and the target value Y.
Parameters
----------
X : the dataframe to use in the random forest prediction
y : the target to reach in the random forest prediction
test_size : the size of the test dataframe
random_state1 : controls the shuffling applied to the data before applying the split.
random_state2 : controles the shuffling applied to the data in the random forest
classifier
"""
X_train, X_test, y_train, y_test = train_test_split(X,
y,
test_size=test_size,
random_state=random_state1)
est = RandomForestClassifier(random_state=random_state2)
est.fit(X_train, y_train)
predictions = est.predict(X_test)
print(classification_report(y_test, predictions))
print("score: ",est.score(X_test, y_test))
def RFE_predicion(X, y, test_size, random_state, n_feature_to_select, verbose=1):
"""
Returns the score for the random forest prediction using recursive feature
elimination on the dataframe X and the target value Y.
Parameters
----------
X : the dataframe to use in the random forest prediction
y : the target to reach in the random forest prediction
test_size : the size of the test dataframe
random_state : controls the shuffling applied to the data before applying the split.
n_feature_to_select : the number of feature to select
"""
X_train, X_test, y_train, y_test = train_test_split(X,
y,
test_size=test_size,
random_state=random_state)
Estimator = RandomForestClassifier(random_state=42)
featSelect = RFE(estimator=Estimator,
n_features_to_select=n_feature_to_select,
verbose=verbose)
featSelect.fit(X_train, y_train)
predictions = featSelect.predict(X_test)
print(classification_report(y_test, predictions))
print("score: ",featSelect.score(X_test, y_test))
def GSCV_prediction(X, y, test_size, random_state):
"""
Returns the score for the random forest prediction using the Grid Search CV method
on the dataframe X and the target value Y.
Parameters
----------
X : the dataframe to use in the random forest prediction
y : the target to reach in the random forest prediction
test_size : the size of the test dataframe
random_state : controls the shuffling applied to the data before applying the split.
"""
X_train, X_test, y_train, y_test = train_test_split(X,
y,
test_size=test_size,
random_state=random_state)
Estimator = RandomForestClassifier(random_state=42)
parameters = {
'n_estimators': [50,100,150,200,250,300],
'max_depth': np.arange(6,16,2),
'min_samples_split': np.arange(10,30,5),
'min_samples_leaf': np.arange(5,20,5)
}
gd_sr2 = GridSearchCV(estimator=Estimator,
param_grid=parameters,
cv=5,
n_jobs=-1)
gd_sr2.fit(X_train, y_train)
predictions = gd_sr2.predict(X_test)
print("Parameters chosen: ", gd_sr2.best_params_)
print(classification_report(y_test, predictions))
print("score: ",gd_sr2.score(X_test, y_test))
\ No newline at end of file
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Veuillez vous inscrire ou vous pour commenter