如何选择合适的估算器(estimator)

https://scikit-learn.org/stable/tutorial/machine_learning_map/index.html

 

 

模型跑分,选个高分的:

        

import pandas as pd
from sklearn.preprocessing import LabelEncoder
from collections import defaultdict
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier as DTC
from sklearn.linear_model import LinearRegression as LR
from sklearn.neighbors import KNeighborsClassifier as KNN
from sklearn.ensemble import AdaBoostClassifier as ADA
from sklearn.ensemble import BaggingClassifier as BC
from sklearn.ensemble import GradientBoostingClassifier as GDBC
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.naive_bayes import BernoulliNB as BLNB
from sklearn.naive_bayes import GaussianNB as GNB
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
#获得训练集和测试集
data=pd.read_csv(r'E:\workspaces\python\sklearn\data\loans.csv')
X=data.drop('safe_loans',axis=1)
y=data.safe_loans
d=defaultdict(LabelEncoder)
X_trans = X.apply(lambda x: d[x.name].fit_transform(x))
x_train,x_test,y_train,y_test=train_test_split(X_trans,y,test_size=0.2,random_state=1)
#训练数据获得得分
def func(clf):
    clf.fit(x_train, y_train)
    score = clf.score(x_test, y_test)
    return score
#决策树
print('决策树结果为:{}'.format(func(DTC())))
#线性回归
print('线性回归结果为:{}'.format(func(LR())))
#KNN
print('KNN结果为:{}'.format(func(KNN())))
#随机森林
print('随机森林结果为:{}'.format(func(RFC(n_estimators=20))))
#ADAboost
print('Adaboost结果为:{}'.format(func(ADA(n_estimators=20))))
#GDBC
print('GDBT结果为:{}'.format(func(GDBC(n_estimators=20))))
#Bagging
print('Bagging结果为:{}'.format(func(BC(n_estimators=20))))
#伯努利贝叶斯
print('伯努利贝叶斯结果为:{}'.format(func(BLNB())))
#高斯贝叶斯
print('高斯贝叶斯结果为:{}'.format(func(GNB())))

 

posted @ 2020-03-21 15:47  406383048  阅读(421)  评论(0)    收藏  举报