SMO算法实现与测试

一、实验目的

深入理解支持向量机(SVM)的算法原理,能够使用Python语言实现支持向量机的训练与测试,并且使用五折交叉验证算法进行模型训练与评估。

二、实验内容
(1)从scikit-learn库中加载iris数据集或本地读取,进行数据分析:
(2)采用五折交叉验证划分训练集和测试集,使用训练集对SMO支持向量机分类算法进行训练;
(3)使用五折交叉验证对模型性能(准确度、精度、召回率和F1值)进行测试;
(4)通过对测试结果进行比较分析,评估模型性能:
(5)完成实验报告中实验四的部分。

三、算法步骤、代码、及结果

def evaluate_svm_models(results, X_scaled, y, target_names):
    """
    评估不同SVM模型的性能
    """
    print("\n" + "=" * 60)
    print("支持向量机模型性能评估")
    print("=" * 60)

    # 比较不同核函数的性能
    print("\n不同核函数SVM性能比较:")
    print("核函数\t\t平均准确度\t平均精度\t平均召回率\t平均F1值")
    print("-" * 80)

    best_model = None
    best_score = 0

    for kernel_name, result in results.items():
        avg_accuracy = np.mean(result['test_accuracy'])
        avg_precision = np.mean(result['test_precision'])
        avg_recall = np.mean(result['test_recall'])
        avg_f1 = np.mean(result['test_f1'])

        print(f"{kernel_name}\t{avg_accuracy:.4f}\t\t{avg_precision:.4f}\t\t{avg_recall:.4f}\t\t{avg_f1:.4f}")

        if avg_accuracy > best_score:
            best_score = avg_accuracy
            best_model = kernel_name

    print(f"\n最佳模型: {best_model} (准确度: {best_score:.4f})")

    # 详细分析最佳模型
    best_results = results[best_model]
    print(f"\n{best_model} SVM 详细结果:")
    print("折次\t训练准确度\t测试准确度\t精度\t\t召回率\t\tF1值")
    print("-" * 85)

    for i in range(5):
        print(f"{i + 1}\t{best_results['train_accuracy'][i]:.4f}\t\t{best_results['test_accuracy'][i]:.4f}\t\t"
              f"{best_results['test_precision'][i]:.4f}\t\t{best_results['test_recall'][i]:.4f}\t\t{best_results['test_f1'][i]:.4f}")

    # 过拟合分析
    train_test_gap = np.mean(best_results['train_accuracy']) - np.mean(best_results['test_accuracy'])
    print(f"\n训练-测试差距: {train_test_gap:.4f}")
    if train_test_gap > 0.1:
        print("⚠️  可能存在过拟合")
    elif train_test_gap < 0.05:
        print("✅ 模型泛化能力良好")
    else:
        print("⚠️  模型拟合程度适中")

    return best_model, best_results


def plot_confusion_matrix_and_support_vectors(best_results, X_scaled, y, target_names):
    """
    绘制混淆矩阵和支持向量可视化
    """
    # 使用第一个折的最佳模型
    best_estimator = best_results['estimators'][0]

    # 获取第一个折的测试集
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    for train_index, test_index in kf.split(X_scaled):
        X_test = X_scaled[test_index]
        y_test = y[test_index]
        y_pred = best_estimator.predict(X_test)

        # 绘制混淆矩阵
        plt.figure(figsize=(10, 4))

        plt.subplot(1, 2, 1)
        cm = confusion_matrix(y_test, y_pred)
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                    xticklabels=target_names, yticklabels=target_names)
        plt.title('SVM混淆矩阵')
        plt.ylabel('真实标签')
        plt.xlabel('预测标签')

        # 支持向量统计
        plt.subplot(1, 2, 2)
        n_support_vectors = best_estimator.n_support_
        classes = target_names
        plt.bar(classes, n_support_vectors, color=['lightblue', 'lightgreen', 'lightcoral'])
        plt.title('各类别支持向量数量')
        plt.ylabel('支持向量数量')

        plt.tight_layout()
        plt.show()
        break


def simple_svm_parameter_tuning(X_scaled, y):
    """
    简化的SVM参数调优(避免并行计算问题)
    """
    print("\n" + "=" * 60)
    print("SVM参数调优")
    print("=" * 60)

    # 手动测试不同参数组合
    parameters = [
        {'kernel': ['linear'], 'C': [0.1, 1, 10]},
        {'kernel': ['rbf'], 'C': [0.1, 1, 10], 'gamma': [0.1, 1, 10]},
    ]

    best_score = 0
    best_params = {}

    for param_set in parameters:
        kernel = param_set['kernel'][0]

        if kernel == 'linear':
            for C in param_set['C']:
                svm = SVC(kernel='linear', C=C, random_state=42)
                scores = cross_validate(svm, X_scaled, y, cv=5, scoring='accuracy')
                avg_score = np.mean(scores['test_score'])
                print(f"线性核 C={C}: 准确度 = {avg_score:.4f}")

                if avg_score > best_score:
                    best_score = avg_score
                    best_params = {'kernel': 'linear', 'C': C}

        elif kernel == 'rbf':
            for C in param_set['C']:
                for gamma in param_set['gamma']:
                    svm = SVC(kernel='rbf', C=C, gamma=gamma, random_state=42)
                    scores = cross_validate(svm, X_scaled, y, cv=5, scoring='accuracy')
                    avg_score = np.mean(scores['test_score'])
                    print(f"RBF核 C={C}, gamma={gamma}: 准确度 = {avg_score:.4f}")

                    if avg_score > best_score:
                        best_score = avg_score
                        best_params = {'kernel': 'rbf', 'C': C, 'gamma': gamma}

    print(f"\n最佳参数: {best_params}")
    print(f"最佳交叉验证准确度: {best_score:.4f}")

    # 创建最佳模型
    best_estimator = SVC(**best_params, random_state=42)
    best_estimator.fit(X_scaled, y)

    return best_estimator, best_score
posted @ 2025-11-26 16:21  Look_Back  阅读(11)  评论(0)    收藏  举报