1115随笔
今天完成了python的算法学习,写了300行代码完成老师留下的作业。
接下来我准备上网查阅资料,为我接下来的工作做准备
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import KFold
import warnings
忽略警告信息
warnings.filterwarnings('ignore')
print("===== 实验:数据准备与模型评估 =====\n")
任务2:从pandas读取本地iris数据集
print("1. 从本地读取iris数据集(使用pandas):")
try:
# 尝试读取本地iris.data文件
iris_file_path = 'c:\Users\admin\Desktop\十一月使用\机械学习\iris\iris.data'
column_names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class']
df_local = pd.read_csv(iris_file_path, header=None, names=column_names)
print(f" 成功读取本地数据文件,数据形状:{df_local.shape}")
print(" 数据前5行:")
print(df_local.head())
print()
except Exception as e:
print(f" 读取本地文件时出错:{e}")
print()
任务3:从scikit-learn直接加载iris数据集
print("2. 从scikit-learn加载iris数据集:")
iris = load_iris()
X = iris.data
y = iris.target
print(f" 数据集特征形状:{X.shape}")
print(f" 数据集标签形状:{y.shape}")
print(f" 特征名称:{iris.feature_names}")
print(f" 目标类别:{iris.target_names}")
print()
任务4:实现五折交叉验证
print("3. 五折交叉验证模型训练:")
创建决策树分类器
clf = DecisionTreeClassifier(random_state=42)
使用KFold进行手动五折交叉验证
kf = KFold(n_splits=5, shuffle=True, random_state=42)
存储每个折的评估指标
accuracy_scores = []
precision_scores = []
recall_scores = []
f1_scores = []
进行交叉验证
for fold, (train_idx, test_idx) in enumerate(kf.split(X), 1):
X_train, X_test = X[train_idx], X[test_idx]
y_train, y_test = y[train_idx], y[test_idx]
# 训练模型
clf.fit(X_train, y_train)
# 预测
y_pred = clf.predict(X_test)
# 计算评估指标
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')
# 存储结果
accuracy_scores.append(accuracy)
precision_scores.append(precision)
recall_scores.append(recall)
f1_scores.append(f1)
print(f" 折 {fold}: 准确度 = {accuracy:.4f}, 精度 = {precision:.4f}, 召回率 = {recall:.4f}, F1值 = {f1:.4f}")
print()
任务5:计算并输出最终评估指标
print("4. 五折交叉验证平均评估指标:")
print(f" 平均准确度: {np.mean(accuracy_scores):.4f} ± {np.std(accuracy_scores):.4f}")
print(f" 平均精度: {np.mean(precision_scores):.4f} ± {np.std(precision_scores):.4f}")
print(f" 平均召回率: {np.mean(recall_scores):.4f} ± {np.std(recall_scores):.4f}")
print(f" 平均F1值: {np.mean(f1_scores):.4f} ± {np.std(f1_scores):.4f}")
print()
也可以使用sklearn的cross_val_score进行快速验证
print("5. 使用sklearn的cross_val_score进行验证:")
accuracy_cv = cross_val_score(clf, X, y, cv=5, scoring='accuracy')
print(f" 交叉验证准确度: {np.mean(accuracy_cv):.4f} ± {np.std(accuracy_cv):.4f}")
print("\n===== 实验完成 =====")

浙公网安备 33010602011771号