机器学习任务3
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
# 1. 加载 Iris 数据集
iris = load_iris()
X = iris.data
y = iris.target
# 2. 使用留出法分割数据集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, stratify=y)
# 3. 创建 C4.5 分类器
# 使用 criterion='entropy' 来实现 C4.5,并设置后剪枝参数 ccp_alpha
clf = DecisionTreeClassifier(criterion='entropy', ccp_alpha=0.01)
# 4. 交叉验证评估模型性能
cross_val_scores = cross_val_score(clf, X_train, y_train, cv=5, scoring='accuracy')
print(f'五折交叉验证准确度: {cross_val_scores.mean()}')
# 5. 在训练集上训练模型
clf.fit(X_train, y_train)
# 6. 在测试集上评估模型
y_pred = clf.predict(X_test)
# 7. 计算性能指标
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
# 输出测试结果
print(f'测试集准确度: {accuracy}')
print(f'测试集精度: {precision}')
print(f'测试集召回率: {recall}')
print(f'测试集 F1 值: {f1}')

浙公网安备 33010602011771号