12.6(1)

机器学习实验如下import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import requests
import os

1. 使用pandas从本地读取iris数据集

def read_with_pandas():
# 如果文件不存在则下载
if not os.path.exists('iris.csv'):
print("正在下载iris数据集...")
try:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
response = requests.get(url)
with open('iris.csv', 'wb') as f:
f.write(response.content)
print("数据集下载完成!")
except:
print("下载失败,将直接使用sklearn数据集")
return None

# 读取数据
columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class']
df = pd.read_csv('iris.csv', header=None, names=columns)

print("\n【Pandas加载结果】")
print(f"数据集形状: {df.shape}")
print("前5行数据:")
print(df.head())
return df

2. 从scikit-learn直接加载iris数据集

def load_from_sklearn():
iris = load_iris()
X, y = iris.data, iris.target

print("\n【Sklearn加载结果】")
print(f"数据集形状: {X.shape}")
print(f"特征名称: {iris.feature_names}")
print(f"类别名称: {iris.target_names}")
return X, y, iris.target_names

3-4. 五折交叉验证和模型评估

def cross_validation_and_evaluation(X, y, target_names):
# 创建分类器
rf = RandomForestClassifier(n_estimators=100, random_state=42)

# 五折交叉验证
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# 存储评估指标
accuracies = []
precisions = []
recalls = []
f1_scores = []

print("\n【五折交叉验证结果】")
for i, (train_idx, test_idx) in enumerate(kf.split(X)):
    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]
    
    # 训练模型
    rf.fit(X_train, y_train)
    
    # 预测
    y_pred = rf.predict(X_test)
    
    # 计算指标
    acc = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro')
    recall = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')
    
    # 存储结果
    accuracies.append(acc)
    precisions.append(precision)
    recalls.append(recall)
    f1_scores.append(f1)
    
    print(f"\n折 {i+1}:")
    print(f"  准确度: {acc:.4f}")
    print(f"  精度: {precision:.4f}")
    print(f"  召回率: {recall:.4f}")
    print(f"  F1值: {f1:.4f}")

# 输出平均结果
print("\n【平均评估指标】")
print(f"平均准确度: {np.mean(accuracies):.4f}")
print(f"平均精度: {np.mean(precisions):.4f}")
print(f"平均召回率: {np.mean(recalls):.4f}")
print(f"平均F1值: {np.mean(f1_scores):.4f}")

主函数

def main():
print("========== Iris数据集实验 ==========")

# 步骤1: 用pandas读取
print("\n步骤1: 使用pandas读取iris数据集")
df = read_with_pandas()

# 步骤2: 用sklearn读取
print("\n步骤2: 从scikit-learn加载iris数据集")
X, y, target_names = load_from_sklearn()

# 步骤3-4: 交叉验证和评估
print("\n步骤3-4: 五折交叉验证和模型评估")
cross_validation_and_evaluation(X, y, target_names)

print("\n========== 实验完成 ==========")

if name == "main":
main()

posted @ 2025-12-06 22:19  山蚯  阅读(2)  评论(0)    收藏  举报