机器学习任务3

import numpy as np 

import pandas as pd 

from sklearn.datasets import load_iris 

from sklearn.tree import DecisionTreeClassifier 

from sklearn.model_selection import train_test_split, cross_val_score 

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score 

 

# 1. 加载 Iris 数据集 

iris = load_iris() 

X = iris.data 

y = iris.target 

 

# 2. 使用留出法分割数据集 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, stratify=y) 

 

# 3. 创建 C4.5 分类器 

# 使用 criterion='entropy' 来实现 C4.5,并设置后剪枝参数 ccp_alpha 

clf = DecisionTreeClassifier(criterion='entropy', ccp_alpha=0.01) 

 

# 4. 交叉验证评估模型性能 

cross_val_scores = cross_val_score(clf, X_train, y_train, cv=5, scoring='accuracy') 

print(f'五折交叉验证准确度: {cross_val_scores.mean()}') 

 

# 5. 在训练集上训练模型 

clf.fit(X_train, y_train) 

 

# 6. 在测试集上评估模型 

y_pred = clf.predict(X_test) 

 

# 7. 计算性能指标 

accuracy = accuracy_score(y_test, y_pred) 

precision = precision_score(y_test, y_pred, average='weighted') 

recall = recall_score(y_test, y_pred, average='weighted') 

f1 = f1_score(y_test, y_pred, average='weighted') 

 

# 输出测试结果 

print(f'测试集准确度: {accuracy}') 

print(f'测试集精度: {precision}') 

print(f'测试集召回率: {recall}') 

print(f'测试集 F1 值: {f1}')

 

posted @ 2024-10-11 10:53  芊羽鱼  阅读(4)  评论(0)    收藏  举报