集成学习
集成学习
1.随机森林
from sklearn.ensemble import RandomForestClassifier
# 定义一个随机森林分类器对象
rf = RandomForestClassifier(random_state=0)
rf.fit(x_train,y_train)
score_rf = rf.score(x_test,y_test)
# 输出准确率
print('Random Forest : ', score_rf)
2.AdaBoost自适应提升
from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import train_test_split,cross_val_score
cancer = load_breast_cancer()
x_train, x_test, y_train, y_test = train_test_split(cancer.data,cancer.target, test_size=0.3, random_state=1)
abc = AdaBoostClassifier(DecisionTreeClassifier(), algorithm='SAMME.R', n_estimators=20, learning_rate=0.1)# n_estimators 基分类器个数
abc.fit(x_train,y_train)
score_abc = abc.score(x_test,y_test)
# 输出准确率
print('Ada Boost : ', score_abc)
3.梯度提升GradientBoosting
from sklearn.datasets import load_breast_cancer
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split,cross_val_score
import matplotlib.pyplot as plt
import numpy as np
cancer = load_breast_cancer()
x_train, x_test, y_train, y_test = train_test_split(cancer.data,cancer.target,test_size=0.3, random_state=1)
gbc = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1)
dt = DecisionTreeClassifier()
gbc.fit(x_train,y_train)
dt.fit(x_train,y_train)
score_gbc = gbc.score(x_test,y_test)
score_dt = dt.score(x_test,y_test)
# 输出准确率
print('Gradient Boost : ', score_gbc)
print('Decision Tree : ', score_dt)
# 测试learning_rate参数对分类效果的影响
gbc_scores = []
for i in np.arange(0.1,1,0.05):
gbc.learning_rate = i
gbc.fit(x_train,y_train)
gbc_score = gbc.score(x_test,y_test)
gbc_scores.append(gbc_score)
# 绘制测试结果
plt.figure()
plt.title('Gradient Boost')
plt.xlabel('learning_rate')
plt.ylabel('Accuracy')
plt.plot(range(len(gbc_scores)),gbc_scores)
plt.show()
gbc_scores = []
dt_scores = []
# 使用cross_val_score进行交叉验证
for i in range(20):
gbc_score = cross_val_score(gbc,cancer.data,cancer.target,cv=10).mean()
gbc_scores.append(gbc_score)
dt_score = cross_val_score(dt,cancer.data,cancer.target,cv=10).mean()
dt_scores.append(dt_score)

浙公网安备 33010602011771号