whystea3
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression,Ridge
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import roc_auc_score
from sklearn.ensemble import RandomForestClassifier,BaggingClassifier,AdaBoostClassifier,GradientBoostingClassifier
from sklearn.metrics import roc_curve
from sklearn.model_selection import GridSearchCV
path='data/data_loan.csv'
data=pd.read_csv(path)
data_x=data.drop(columns=['user_id','y'])
data_y=data['y']
x,test_x,y,test_y=train_test_split(data_x,data_y,train_size=0.7)
models=[LogisticRegression,Ridge,SVC,DecisionTreeClassifier,KNeighborsClassifier,roc_auc_score
,RandomForestClassifier,BaggingClassifier,AdaBoostClassifier,GradientBoostingClassifier]
for cls in models:
cls=LogisticRegression()
cls.fit(x,y)
pre_y=cls.predict_proba(test_x)[:,1]
grid_param=[{'n_estimators':[50,100,200,300],'max_depth':[3,5,7]}]
cls=RandomForestClassifier()
searcher=GridSearchCV(cls,grid_param,cv=5,scoring='roc_auc',n_jobs=4)
searcher.fit(x,y)
best_cls=searcher.best_estimator_
pre_y=best_cls.predict_proba(test_x)[:,1]