SMO算法实现与测试
一、实验目的
深入理解支持向量机(SVM)的算法原理,能够使用Python语言实现支持向量机的训练与测试,并且使用五折交叉验证算法进行模型训练与评估。
二、实验内容
(1)从scikit-learn库中加载iris数据集或本地读取,进行数据分析:
(2)采用五折交叉验证划分训练集和测试集,使用训练集对SMO支持向量机分类算法进行训练;
(3)使用五折交叉验证对模型性能(准确度、精度、召回率和F1值)进行测试;
(4)通过对测试结果进行比较分析,评估模型性能:
(5)完成实验报告中实验四的部分。
三、算法步骤、代码、及结果
def evaluate_svm_models(results, X_scaled, y, target_names):
"""
评估不同SVM模型的性能
"""
print("\n" + "=" * 60)
print("支持向量机模型性能评估")
print("=" * 60)
# 比较不同核函数的性能
print("\n不同核函数SVM性能比较:")
print("核函数\t\t平均准确度\t平均精度\t平均召回率\t平均F1值")
print("-" * 80)
best_model = None
best_score = 0
for kernel_name, result in results.items():
avg_accuracy = np.mean(result['test_accuracy'])
avg_precision = np.mean(result['test_precision'])
avg_recall = np.mean(result['test_recall'])
avg_f1 = np.mean(result['test_f1'])
print(f"{kernel_name}\t{avg_accuracy:.4f}\t\t{avg_precision:.4f}\t\t{avg_recall:.4f}\t\t{avg_f1:.4f}")
if avg_accuracy > best_score:
best_score = avg_accuracy
best_model = kernel_name
print(f"\n最佳模型: {best_model} (准确度: {best_score:.4f})")
# 详细分析最佳模型
best_results = results[best_model]
print(f"\n{best_model} SVM 详细结果:")
print("折次\t训练准确度\t测试准确度\t精度\t\t召回率\t\tF1值")
print("-" * 85)
for i in range(5):
print(f"{i + 1}\t{best_results['train_accuracy'][i]:.4f}\t\t{best_results['test_accuracy'][i]:.4f}\t\t"
f"{best_results['test_precision'][i]:.4f}\t\t{best_results['test_recall'][i]:.4f}\t\t{best_results['test_f1'][i]:.4f}")
# 过拟合分析
train_test_gap = np.mean(best_results['train_accuracy']) - np.mean(best_results['test_accuracy'])
print(f"\n训练-测试差距: {train_test_gap:.4f}")
if train_test_gap > 0.1:
print("⚠️ 可能存在过拟合")
elif train_test_gap < 0.05:
print("✅ 模型泛化能力良好")
else:
print("⚠️ 模型拟合程度适中")
return best_model, best_results
def plot_confusion_matrix_and_support_vectors(best_results, X_scaled, y, target_names):
"""
绘制混淆矩阵和支持向量可视化
"""
# 使用第一个折的最佳模型
best_estimator = best_results['estimators'][0]
# 获取第一个折的测试集
kf = KFold(n_splits=5, shuffle=True, random_state=42)
for train_index, test_index in kf.split(X_scaled):
X_test = X_scaled[test_index]
y_test = y[test_index]
y_pred = best_estimator.predict(X_test)
# 绘制混淆矩阵
plt.figure(figsize=(10, 4))
plt.subplot(1, 2, 1)
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
xticklabels=target_names, yticklabels=target_names)
plt.title('SVM混淆矩阵')
plt.ylabel('真实标签')
plt.xlabel('预测标签')
# 支持向量统计
plt.subplot(1, 2, 2)
n_support_vectors = best_estimator.n_support_
classes = target_names
plt.bar(classes, n_support_vectors, color=['lightblue', 'lightgreen', 'lightcoral'])
plt.title('各类别支持向量数量')
plt.ylabel('支持向量数量')
plt.tight_layout()
plt.show()
break
def simple_svm_parameter_tuning(X_scaled, y):
"""
简化的SVM参数调优(避免并行计算问题)
"""
print("\n" + "=" * 60)
print("SVM参数调优")
print("=" * 60)
# 手动测试不同参数组合
parameters = [
{'kernel': ['linear'], 'C': [0.1, 1, 10]},
{'kernel': ['rbf'], 'C': [0.1, 1, 10], 'gamma': [0.1, 1, 10]},
]
best_score = 0
best_params = {}
for param_set in parameters:
kernel = param_set['kernel'][0]
if kernel == 'linear':
for C in param_set['C']:
svm = SVC(kernel='linear', C=C, random_state=42)
scores = cross_validate(svm, X_scaled, y, cv=5, scoring='accuracy')
avg_score = np.mean(scores['test_score'])
print(f"线性核 C={C}: 准确度 = {avg_score:.4f}")
if avg_score > best_score:
best_score = avg_score
best_params = {'kernel': 'linear', 'C': C}
elif kernel == 'rbf':
for C in param_set['C']:
for gamma in param_set['gamma']:
svm = SVC(kernel='rbf', C=C, gamma=gamma, random_state=42)
scores = cross_validate(svm, X_scaled, y, cv=5, scoring='accuracy')
avg_score = np.mean(scores['test_score'])
print(f"RBF核 C={C}, gamma={gamma}: 准确度 = {avg_score:.4f}")
if avg_score > best_score:
best_score = avg_score
best_params = {'kernel': 'rbf', 'C': C, 'gamma': gamma}
print(f"\n最佳参数: {best_params}")
print(f"最佳交叉验证准确度: {best_score:.4f}")
# 创建最佳模型
best_estimator = SVC(**best_params, random_state=42)
best_estimator.fit(X_scaled, y)
return best_estimator, best_score

浙公网安备 33010602011771号