from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
# 加载iris数据集
iris = load_iris()
X, y = iris.data, iris.target
# 使用留出法留出1/3的样本作为测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1/3, random_state=42)
# 初始化决策树分类器,设置预剪枝参数
# 注意:scikit-learn中的DecisionTreeClassifier不直接支持C4.5算法,但可以使用信息增益作为划分标准
clf = DecisionTreeClassifier(criterion='entropy', random_state=42, max_depth=None) # 使用信息增益,不进行预剪枝
# 训练模型
clf.fit(X_train, y_train)
# 使用五折交叉验证评估模型性能
scores = cross_val_score(clf, X_train, y_train, cv=5, scoring='accuracy')
# 预测测试集
y_pred = clf.predict(X_test)
# 计算准确度、精度、召回率和F1值
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')
# 打印结果
print(f"五折交叉验证准确度: {scores.mean():.2f} +/- {scores.std():.2f}")
print(f"测试集准确度: {accuracy:.2f}")
print(f"测试集精度: {precision:.2f}")
print(f"测试集召回率: {recall:.2f}")
print(f"测试集F1值: {f1:.2f}")
浙公网安备 33010602011771号