# 导入必要的库
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import cross_val_score, KFold, cross_val_predict
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
# (1) 使用 pandas 从本地读取 Iris 数据集
# data = pd.read_csv('iris.csv')
# print(data) # 打印数据集
# (2) 从 scikit-learn 直接加载 Iris 数据集
iris = load_iris()
print("Iris 数据集:")
print(iris.data) # 打印数据集
# (3) 实现五折交叉验证进行模型训练
# 定义模型
model = LogisticRegression(max_iter=200)
# 定义交叉验证方法
kf = KFold(n_splits=5, shuffle=True, random_state=42)
# 进行五折交叉验证
scores = cross_val_score(model, iris.data, iris.target, cv=kf, scoring='accuracy')
# 计算预测值
y_pred = cross_val_predict(model, iris.data, iris.target, cv=kf)
# (4) 计算并输出模型的准确度、精度、召回率和 F1 值
# 输出交叉验证的准确度
print("交叉验证准确度分数:", scores)
print("平均准确度:", scores.mean())
# 输出分类报告
target_names_chinese = ['山鸢尾', '变色鸢尾', '维吉尼亚鸢尾']
report_chinese = classification_report(iris.target, y_pred, target_names=target_names_chinese)
# 解析分类报告,提取精度、召回率和 F1 值
report_lines = report_chinese.split('\n')
for line in report_lines:
if line.strip() and not line.startswith('avg') and not line.startswith('micro') and not line.startswith('macro') and not line.startswith('weighted'):
parts = line.split()
if len(parts) == 5:
class_name = parts[0]
precision = float(parts[1])
recall = float(parts[2])
f1_score = float(parts[3])
support = int(parts[4])
print(f"类别: {class_name}, 精度: {precision:.2f}, 召回率: {recall:.2f}, F1 值: {f1_score:.2f}, 支持数: {support}")
print("中文分类报告:\n", report_chinese)