机器学习任务4
# 导入必要的库
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
# 1. 加载 iris 数据集
iris = datasets.load_iris()
X = iris.data # 特征
y = iris.target # 标签
# 2. 使用留出法留出 1/3 的样本作为测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, stratify=y)
# 3. 创建并训练支持向量机(SVM)模型
# SVC是scikit-learn中SVM的实现,其参数如下:
# - kernel: 指定核函数类型,如 'linear', 'poly', 'rbf' 等,默认值为 'rbf'
# - C: 正则化参数,默认值为 1.0
# - gamma: 核函数的系数,默认值为 'scale'
model = SVC(kernel='linear', C=1.0, gamma='scale') # 使用线性核
model.fit(X_train, y_train)
# 4. 使用五折交叉验证评估模型性能
scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
print("五折交叉验证的准确度:", scores)
print("平均准确度:", np.mean(scores))
# 5. 使用测试集测试模型性能
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
# 6. 输出测试结果的详细分类报告
print("测试集的准确度:", accuracy)
print(classification_report(y_test, y_pred, target_names=iris.target_names))

浙公网安备 33010602011771号