【754】机器学习、多分类、情感分析
多分类问题
- 参考:3 种方法实现逻辑回归多分类【OneVsOne, OneVsRest】
- 参考:《少年的你》短评情感分析——机器学习之逻辑回归
- 参考:sklearn.multiclass.OneVsOneClassifier【中文版】、【英文版】
- 参考:Python机器学习-多元分类的5种模型
逻辑回归
SVM
- 参考:如何使用sklearn中的SVM【SVC分类、SVR回归】
- 参考:Scikit-learn实战之SVM分类
决策树
- 参考:决策树实现鸢尾花三分类
随机森林
AdaBoost
朴素贝叶斯
KNN
GradientBoosting
参考:机器学习:sklearn中xgboost模块的XGBClassifier函数(分类)
将多分类转换为二分类来进行相应的计算
相关代码:
1. LogisticRegression
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.multiclass import OneVsRestClassifier
from sklearn.multiclass import OneVsOneClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
data_iris = datasets.load_iris()
x, y = data_iris.data, data_iris.target
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3,random_state = 0)
# 使用multiclass的OvO多分类策略,分类器使用LogisticRegression
model = OneVsOneClassifier(LogisticRegression(C=1.0, tol=1e-6))
model.fit(x_train, y_train)
y_pred = model.predict(x_test)
print(accuracy_score(y_test, y_pred))
============================================
0.9555555555555556
# 使用multiclass的OvR多分类策略,分类器使用LogisticRegression
model = OneVsRestClassifier(LogisticRegression(C=1.0, tol=1e-6))
model.fit(x_train, y_train)
y_pred = model.predict(x_test)
print(accuracy_score(y_test, y_pred))
============================================
0.8888888888888888
2. SVM
SVC
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.multiclass import OneVsRestClassifier
from sklearn.multiclass import OneVsOneClassifier
from sklearn import svm
from sklearn.metrics import accuracy_score
data_iris = datasets.load_iris()
x, y = data_iris.data, data_iris.target
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3,random_state = 0)
# 使用multiclass的OvO多分类策略,分类器使用SVM
model = OneVsOneClassifier(svm.SVC())
model.fit(x_train, y_train)
y_pred = model.predict(x_test)
print(accuracy_score(y_test, y_pred))
============================================
0.9555555555555556
# 使用multiclass的OvR多分类策略,分类器使用SVM
model = OneVsRestClassifier(svm.SVC())
model.fit(x_train, y_train)
y_pred = model.predict(x_test)
print(accuracy_score(y_test, y_pred))
============================================
0.8888888888888888
LinearSVC
# 使用multiclass的OvR多分类策略,分类器使用SVM
model = OneVsRestClassifier(svm.LinearSVC())
model.fit(x_train, y_train)
y_pred = model.predict(x_test)
print(accuracy_score(y_test, y_pred))
3. Decision Tree
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier()
model.fit(x_train, y_train)
y_pred = model.predict(x_test)
print(accuracy_score(y_test, y_pred))
4. Random Forest
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(n_jobs=2)
model.fit(x_train, y_train)
y_pred = model.predict(x_test)
print(accuracy_score(y_test, y_pred))
5. AdaBoost
from sklearn.ensemble import AdaBoostClassifier
model = AdaBoostClassifier(DecisionTreeClassifier(),
algorithm="SAMME",
n_estimators=200,
learning_rate=0.5)
model.fit(x_train, y_train)
y_pred = model.predict(x_test)
print(accuracy_score(y_test, y_pred))
6. 朴素贝叶斯
MultinomialNB
from sklearn.naive_bayes import MultinomialNB, GaussianNB
model = MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)
model.fit(x_train, y_train)
y_pred = model.predict(x_test)
print(accuracy_score(y_test, y_pred))
GaussianNB
model = GaussianNB()
model.fit(x_train, y_train)
y_pred = model.predict(x_test)
print(accuracy_score(y_test, y_pred))
BernoulliNB
model = BernoulliNB()
model.fit(x_train, y_train)
y_pred = model.predict(x_test)
print(accuracy_score(y_test, y_pred))
7. KNN
from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier()
model.fit(x_train, y_train)
y_pred = model.predict(x_test)
print(accuracy_score(y_test, y_pred))
8. GradientBoosting
from sklearn.ensemble import GradientBoostingClassifier
model = GradientBoostingClassifier()
model.fit(x_train, y_train)
y_pred = model.predict(x_test)
print(accuracy_score(y_test, y_pred))
9. XGBoost
from xgboost.sklearn import XGBClassifier
model = XGBClassifier()
model.fit(x_train, y_train)
y_pred = model.predict(x_test)
print(accuracy_score(y_test, y_pred))
浙公网安备 33010602011771号