2024.11.28(周四)
import pandas as pd import numpy as np from sklearn.datasets import load_iris from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score from sklearn.model_selection import train_test_split # (1)利用 pandas 库从本地读取 iris 数据集 # 假设数据集已经保存为 CSV 格式,或者你可以直接使用 sklearn 加载数据 # iris_data = pd.read_csv('iris.csv') # 示例代码,从本地读取数据 # (2)从 scikit-learn 库中直接加载 iris 数据集 iris = load_iris() X = iris.data # 特征 y = iris.target # 标签 # 手动实现五折交叉验证 def manual_k_fold_cross_validation(X, y, model, k=5): # 划分数据集 n_samples = X.shape[0] indices = np.arange(n_samples) np.random.shuffle(indices) fold_size = n_samples // k accuracy_list = [] precision_list = [] recall_list = [] f1_list = [] for fold in range(k): # 确定本次交叉验证的训练集和测试集 test_indices = indices[fold * fold_size: (fold + 1) * fold_size] train_indices = np.concatenate([indices[:fold * fold_size], indices[(fold + 1) * fold_size:]]) X_train, X_test = X[train_indices], X[test_indices] y_train, y_test = y[train_indices], y[test_indices] # 训练模型 model.fit(X_train, y_train) # 预测测试集 y_pred = model.predict(X_test) # 计算评估指标 accuracy = accuracy_score(y_test, y_pred) precision = precision_score(y_test, y_pred, average='macro') recall = recall_score(y_test, y_pred, average='macro') f1 = f1_score(y_test, y_pred, average='macro') # 将每一折的结果保存到列表中 accuracy_list.append(accuracy) precision_list.append(precision) recall_list.append(recall) f1_list.append(f1) # 返回平均值 return np.mean(accuracy_list), np.mean(precision_list), np.mean(recall_list), np.mean(f1_list) # 使用随机森林分类器 model = RandomForestClassifier(random_state=42) # 进行五折交叉验证 accuracy, precision, recall, f1 = manual_k_fold_cross_validation(X, y, model, k=5) # 输出评估结果 print(f"五折交叉验证结果:") print(f"准确度 (Accuracy): {accuracy:.4f}") print(f"精度 (Precision): {precision:.4f}") print(f"召回率 (Recall): {recall:.4f}") print(f"F1 值 (F1 Score): {f1:.4f}")