MLP 的局限:从 MNIST 到 Cats vs Dogs

!!!本次模拟训练的时长在没有下载的基础上且使用cuda加速的情况下是4min多。 (需要保证体验的话,需要使用cuda或者mps进行加速,且提前下载数据集)

在上一篇实验中,我们看到 MLP 在 MNIST 手写数字识别上可以达到接近 97.5% 的准确率。这说明 MLP 具备了较强的拟合能力,只要样本量足够并且合理调节超参数,就能在像手写体这样结构相对简单的数据集上取得非常不错的表现。
但这里也埋下了一个重要的问题:这样的高准确率是否意味着 MLP 在其他任务上也能同样泛化?
MNIST 数据集本身有几个特点:
1:图片分辨率低(28×28 灰度),输入维度相对较小;
2:样本居中、背景干净、噪声少,模式相对统一;
3:任务目标简单:10 类数字分类,类间差异明显。
这就解释了为什么 MLP 在 MNIST 上可以轻松达到很高的准确率——因为它不需要建模复杂的局部结构,只要把像素展平后学习全局模式,就足以区分数字。然而,如果我们把手写图片做一些简单的扰动,比如:
a:微小平移(数字偏移几像素);b:轻微旋转(±15°);c:添加噪声(模糊或随机点);d:换成其他的数据源,接下来我们通过设计实验来观察MLP在其他物体识别泛化能力怎样。

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision.transforms import functional as TF
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rcParams
import matplotlib.font_manager as fm
import pandas as pd

# 尝试多个中文字体(按优先级)
chinese_fonts = [
    'PingFang SC',      # macOS 默认
    'Heiti TC',         # macOS 黑体
    'STHeiti',          # 华文黑体
    'Arial Unicode MS', # 支持中文的Arial
    'SimHei',           # 黑体
    'Microsoft YaHei',  # 微软雅黑
]

# 查找可用的中文字体
available_fonts = [f.name for f in fm.fontManager.ttflist]
font_found = None

for font in chinese_fonts:
    if font in available_fonts:
        font_found = font
        break

if font_found:
    rcParams['font.sans-serif'] = [font_found]
    rcParams['axes.unicode_minus'] = False
    print(f"Using font: {font_found}")
else:
    print("Warning: No Chinese font found, using English labels")

# 设置设备
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}\n")

# ==================== MLP模型定义 ====================

class MLP(nn.Module):
    def __init__(self, input_size=784, hidden_sizes=[512, 256], num_classes=10):
        super().__init__()
        layers = []
        
        # 构建网络
        prev_size = input_size
        for hidden_size in hidden_sizes:
            layers.append(nn.Linear(prev_size, hidden_size))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(0.2))
            prev_size = hidden_size
        
        layers.append(nn.Linear(prev_size, num_classes))
        self.network = nn.Sequential(*layers)
    
    def forward(self, x):
        x = x.view(x.size(0), -1)  # 展平
        return self.network(x)

# ==================== 训练和评估函数 ====================

def train_model(model, train_loader, criterion, optimizer, epochs=10, device=device):
    """训练模型"""
    model = model.to(device)
    model.train()
    
    for epoch in range(epochs):
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
        
        if (epoch + 1) % 2 == 0:
            print(f"  Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_loader):.4f}")

def evaluate_model(model, test_loader, device=device):
    """评估模型准确率"""
    model.eval()
    correct, total = 0, 0
    
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    accuracy = 100 * correct / total
    return accuracy

# ==================== 数据增强和扰动 ====================

class AddGaussianNoise:
    """添加高斯噪声"""
    def __init__(self, mean=0., std=0.1):
        self.mean = mean
        self.std = std
    
    def __call__(self, tensor):
        return tensor + torch.randn(tensor.size()) * self.std + self.mean

class RandomShift:
    """随机平移"""
    def __init__(self, shift_range=4):
        self.shift_range = shift_range
    
    def __call__(self, img):
        shift_x = np.random.randint(-self.shift_range, self.shift_range + 1)
        shift_y = np.random.randint(-self.shift_range, self.shift_range + 1)
        return TF.affine(img, angle=0, translate=(shift_x, shift_y), scale=1.0, shear=0)

# ==================== 实验1: 基准测试 ====================

def experiment_baseline():
    """基准实验:原始MNIST"""
    print("="*60)
    print("实验1: 基准测试 - 原始MNIST")
    print("="*60)
    
    # 准备数据
    transform = transforms.Compose([transforms.ToTensor()])
    train_dataset = torchvision.datasets.MNIST(root='./data', train=True, transform=transform, download=True)
    test_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform=transform)
    
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=0)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1000, shuffle=False, num_workers=0)
    
    # 训练模型
    model = MLP(input_size=784, hidden_sizes=[512, 256], num_classes=10)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    print("开始训练...")
    train_model(model, train_loader, criterion, optimizer, epochs=10)
    
    # 评估
    accuracy = evaluate_model(model, test_loader)
    print(f"✅ 基准准确率: {accuracy:.2f}%\n")
    
    return model, accuracy

# ==================== 实验2: 平移扰动 ====================

def experiment_translation(trained_model):
    """实验2: 测试平移不变性"""
    print("="*60)
    print("实验2: 平移扰动测试")
    print("="*60)
    
    shift_ranges = [0, 2, 4, 6, 8]
    accuracies = []
    
    for shift in shift_ranges:
        if shift == 0:
            transform = transforms.Compose([transforms.ToTensor()])
        else:
            transform = transforms.Compose([
                RandomShift(shift_range=shift),
                transforms.ToTensor()
            ])
        
        test_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform=transform)
        test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1000, shuffle=False, num_workers=0)
        
        accuracy = evaluate_model(trained_model, test_loader)
        accuracies.append(accuracy)
        print(f"  平移范围 ±{shift}px: {accuracy:.2f}%")
    
    print()
    return shift_ranges, accuracies

# ==================== 实验3: 旋转扰动 ====================

def experiment_rotation(trained_model):
    """实验3: 测试旋转不变性"""
    print("="*60)
    print("实验3: 旋转扰动测试")
    print("="*60)
    
    rotation_angles = [0, 5, 10, 15, 20, 30]
    accuracies = []
    
    for angle in rotation_angles:
        if angle == 0:
            transform = transforms.Compose([transforms.ToTensor()])
        else:
            transform = transforms.Compose([
                transforms.RandomRotation(degrees=(angle, angle)),
                transforms.ToTensor()
            ])
        
        test_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform=transform)
        test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1000, shuffle=False, num_workers=0)
        
        accuracy = evaluate_model(trained_model, test_loader)
        accuracies.append(accuracy)
        print(f"  旋转角度 {angle}°: {accuracy:.2f}%")
    
    print()
    return rotation_angles, accuracies

# ==================== 实验4: 噪声扰动 ====================

def experiment_noise(trained_model):
    """实验4: 测试噪声鲁棒性"""
    print("="*60)
    print("实验4: 噪声扰动测试")
    print("="*60)
    
    noise_levels = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5]
    accuracies = []
    
    for noise_std in noise_levels:
        if noise_std == 0.0:
            transform = transforms.Compose([transforms.ToTensor()])
        else:
            transform = transforms.Compose([
                transforms.ToTensor(),
                AddGaussianNoise(mean=0., std=noise_std)
            ])
        
        test_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform=transform)
        test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1000, shuffle=False, num_workers=0)
        
        accuracy = evaluate_model(trained_model, test_loader)
        accuracies.append(accuracy)
        print(f"  噪声标准差 {noise_std:.1f}: {accuracy:.2f}%")
    
    print()
    return noise_levels, accuracies

# ==================== 实验5: 组合扰动 ====================

def experiment_combined(trained_model):
    """实验5: 组合扰动测试"""
    print("="*60)
    print("实验5: 组合扰动测试")
    print("="*60)
    
    test_cases = [
        ("原始", transforms.Compose([transforms.ToTensor()])),
        ("平移+旋转", transforms.Compose([
            RandomShift(shift_range=4),
            transforms.RandomRotation(degrees=10),
            transforms.ToTensor()
        ])),
        ("平移+噪声", transforms.Compose([
            RandomShift(shift_range=4),
            transforms.ToTensor(),
            AddGaussianNoise(std=0.2)
        ])),
        ("旋转+噪声", transforms.Compose([
            transforms.RandomRotation(degrees=10),
            transforms.ToTensor(),
            AddGaussianNoise(std=0.2)
        ])),
        ("全部扰动", transforms.Compose([
            RandomShift(shift_range=4),
            transforms.RandomRotation(degrees=10),
            transforms.ToTensor(),
            AddGaussianNoise(std=0.2)
        ]))
    ]
    
    case_names = []
    accuracies = []
    
    for name, transform in test_cases:
        test_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform=transform)
        test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1000, shuffle=False, num_workers=0)
        
        accuracy = evaluate_model(trained_model, test_loader)
        case_names.append(name)
        accuracies.append(accuracy)
        print(f"  {name}: {accuracy:.2f}%")
    
    print()
    return case_names, accuracies

# ==================== 实验6: Fashion-MNIST ====================

def experiment_fashion_mnist():
    """实验6: Fashion-MNIST数据集"""
    print("="*60)
    print("实验6: Fashion-MNIST 泛化测试")
    print("="*60)
    
    # 准备数据
    transform = transforms.Compose([transforms.ToTensor()])
    train_dataset = torchvision.datasets.FashionMNIST(root='./data', train=True, transform=transform, download=True)
    test_dataset = torchvision.datasets.FashionMNIST(root='./data', train=False, transform=transform)
    
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=0)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1000, shuffle=False, num_workers=0)
    
    # 训练模型
    model = MLP(input_size=784, hidden_sizes=[512, 256], num_classes=10)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    print("开始训练...")
    train_model(model, train_loader, criterion, optimizer, epochs=10)
    
    # 评估
    accuracy = evaluate_model(model, test_loader)
    print(f"✅ Fashion-MNIST准确率: {accuracy:.2f}%\n")
    
    return accuracy

# ==================== 实验7: CIFAR-10 ====================

def experiment_cifar10():
    """实验7: CIFAR-10数据集(彩色图像)"""
    print("="*60)
    print("实验7: CIFAR-10 泛化测试")
    print("="*60)
    
    # 准备数据
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
    
    train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, transform=transform, download=True)
    test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, transform=transform)
    
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=0)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1000, shuffle=False, num_workers=0)
    
    # 训练模型(输入是32x32x3=3072维)
    model = MLP(input_size=3072, hidden_sizes=[1024, 512, 256], num_classes=10)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    print("开始训练...")
    train_model(model, train_loader, criterion, optimizer, epochs=15)
    
    # 评估
    accuracy = evaluate_model(model, test_loader)
    print(f"✅ CIFAR-10准确率: {accuracy:.2f}%\n")
    
    return accuracy

# ==================== 运行所有实验 ====================

print("🚀 开始MLP泛化能力综合测试\n")

# 实验1: 基准
trained_model, baseline_acc = experiment_baseline()

# 实验2-4: 扰动测试
shift_x, shift_acc = experiment_translation(trained_model)
rotation_x, rotation_acc = experiment_rotation(trained_model)
noise_x, noise_acc = experiment_noise(trained_model)

# 实验5: 组合扰动
combined_names, combined_acc = experiment_combined(trained_model)

# 实验6-7: 其他数据集
fashion_acc = experiment_fashion_mnist()
cifar_acc = experiment_cifar10()

# ==================== 可视化结果 ====================

fig = plt.figure(figsize=(18, 12))

# 子图1: 平移扰动
ax1 = plt.subplot(3, 3, 1)
ax1.plot(shift_x, shift_acc, marker='o', linewidth=2, markersize=10, color='#FF6B6B')
ax1.axhline(y=baseline_acc, color='gray', linestyle='--', alpha=0.5, label='基准')
ax1.set_xlabel('平移范围 (±pixels)', fontsize=11)
ax1.set_ylabel('准确率 (%)', fontsize=11)
ax1.set_title('平移扰动对准确率的影响', fontsize=12, fontweight='bold')
ax1.grid(True, alpha=0.3)
ax1.legend()
ax1.fill_between(shift_x, shift_acc, baseline_acc, alpha=0.2, color='#FF6B6B')

# 子图2: 旋转扰动
ax2 = plt.subplot(3, 3, 2)
ax2.plot(rotation_x, rotation_acc, marker='s', linewidth=2, markersize=10, color='#4ECDC4')
ax2.axhline(y=baseline_acc, color='gray', linestyle='--', alpha=0.5, label='基准')
ax2.set_xlabel('旋转角度 (度)', fontsize=11)
ax2.set_ylabel('准确率 (%)', fontsize=11)
ax2.set_title('旋转扰动对准确率的影响', fontsize=12, fontweight='bold')
ax2.grid(True, alpha=0.3)
ax2.legend()
ax2.fill_between(rotation_x, rotation_acc, baseline_acc, alpha=0.2, color='#4ECDC4')

# 子图3: 噪声扰动
ax3 = plt.subplot(3, 3, 3)
ax3.plot(noise_x, noise_acc, marker='D', linewidth=2, markersize=10, color='#95E1D3')
ax3.axhline(y=baseline_acc, color='gray', linestyle='--', alpha=0.5, label='基准')
ax3.set_xlabel('噪声标准差', fontsize=11)
ax3.set_ylabel('准确率 (%)', fontsize=11)
ax3.set_title('噪声扰动对准确率的影响', fontsize=12, fontweight='bold')
ax3.grid(True, alpha=0.3)
ax3.legend()
ax3.fill_between(noise_x, noise_acc, baseline_acc, alpha=0.2, color='#95E1D3')

# 子图4: 组合扰动
ax4 = plt.subplot(3, 3, 4)
colors = ['#74B9FF', '#FDA7DF', '#F8C471', '#A29BFE', '#E17055']
bars = ax4.bar(range(len(combined_names)), combined_acc, color=colors, alpha=0.8)
ax4.axhline(y=baseline_acc, color='gray', linestyle='--', alpha=0.5)
ax4.set_xlabel('扰动类型', fontsize=11)
ax4.set_ylabel('准确率 (%)', fontsize=11)
ax4.set_title('组合扰动测试', fontsize=12, fontweight='bold')
ax4.set_xticks(range(len(combined_names)))
ax4.set_xticklabels(combined_names, rotation=45, ha='right', fontsize=9)
ax4.grid(True, alpha=0.3, axis='y')

# 在柱子上标注数值
for i, (bar, acc) in enumerate(zip(bars, combined_acc)):
    height = bar.get_height()
    ax4.text(bar.get_x() + bar.get_width()/2., height,
             f'{acc:.1f}%', ha='center', va='bottom', fontsize=9)

# 子图5: 准确率下降对比
ax5 = plt.subplot(3, 3, 5)
perturbations = ['平移±8px', '旋转30°', '噪声0.5', '全部组合']
acc_drops = [
    baseline_acc - shift_acc[-1],
    baseline_acc - rotation_acc[-1],
    baseline_acc - noise_acc[-1],
    baseline_acc - combined_acc[-1]
]
colors_drop = ['#FF6B6B', '#4ECDC4', '#95E1D3', '#E17055']
bars = ax5.barh(range(len(perturbations)), acc_drops, color=colors_drop, alpha=0.8)
ax5.set_yticks(range(len(perturbations)))
ax5.set_yticklabels(perturbations)
ax5.set_xlabel('准确率下降 (%)', fontsize=11)
ax5.set_title('不同扰动的影响程度', fontsize=12, fontweight='bold')
ax5.grid(True, alpha=0.3, axis='x')

# 标注数值
for i, (bar, drop) in enumerate(zip(bars, acc_drops)):
    width = bar.get_width()
    ax5.text(width, bar.get_y() + bar.get_height()/2.,
             f' {drop:.1f}%', ha='left', va='center', fontsize=10, fontweight='bold')

# 子图6: 跨数据集性能对比
ax6 = plt.subplot(3, 3, 6)
datasets = ['MNIST\n(手写数字)', 'Fashion-MNIST\n(服装)', 'CIFAR-10\n(彩色物体)']
dataset_accs = [baseline_acc, fashion_acc, cifar_acc]
colors_dataset = ['#6C5CE7', '#00B894', '#FD79A8']
bars = ax6.bar(range(len(datasets)), dataset_accs, color=colors_dataset, alpha=0.8, width=0.6)
ax6.set_ylabel('准确率 (%)', fontsize=11)
ax6.set_title('不同数据集性能对比', fontsize=12, fontweight='bold')
ax6.set_xticks(range(len(datasets)))
ax6.set_xticklabels(datasets, fontsize=10)
ax6.set_ylim([0, 100])
ax6.grid(True, alpha=0.3, axis='y')

# 标注数值
for bar, acc in zip(bars, dataset_accs):
    height = bar.get_height()
    ax6.text(bar.get_x() + bar.get_width()/2., height + 2,
             f'{acc:.1f}%', ha='center', va='bottom', fontsize=11, fontweight='bold')

# 子图7: 综合鲁棒性雷达图
ax7 = plt.subplot(3, 3, 7, projection='polar')
categories = ['平移不变性', '旋转不变性', '噪声鲁棒性', '跨域泛化']
# 计算各项得分(基于准确率保持率)
scores = [
    (shift_acc[-1] / baseline_acc) * 100,
    (rotation_acc[-1] / baseline_acc) * 100,
    (noise_acc[-1] / baseline_acc) * 100,
    (fashion_acc / baseline_acc) * 100
]
scores += scores[:1]  # 闭合

angles = np.linspace(0, 2 * np.pi, len(categories), endpoint=False).tolist()
angles += angles[:1]

ax7.plot(angles, scores, 'o-', linewidth=2, color='#6C5CE7')
ax7.fill(angles, scores, alpha=0.25, color='#6C5CE7')
ax7.set_xticks(angles[:-1])
ax7.set_xticklabels(categories, fontsize=10)
ax7.set_ylim(0, 100)
ax7.set_title('MLP鲁棒性评分', fontsize=12, fontweight='bold', pad=20)
ax7.grid(True)

# 子图8: 数据复杂度分析
ax8 = plt.subplot(3, 3, 8)
complexity_data = {
    'MNIST': {'分辨率': 28*28, '通道数': 1, '复杂度': 1, '准确率': baseline_acc, 'color': '#6C5CE7'},
    'Fashion-MNIST': {'分辨率': 28*28, '通道数': 1, '复杂度': 2, '准确率': fashion_acc, 'color': '#00B894'},
    'CIFAR-10': {'分辨率': 32*32*3, '通道数': 3, '复杂度': 3, '准确率': cifar_acc, 'color': '#FD79A8'}
}

for i, (name, data) in enumerate(complexity_data.items()):
    ax8.scatter(data['复杂度'], data['准确率'], s=500, alpha=0.6, 
               color=data['color'], label=name, edgecolors='black', linewidth=2)
    ax8.text(data['复杂度'], data['准确率'] - 3, name, 
            ha='center', fontsize=9, fontweight='bold')

ax8.set_xlabel('数据复杂度', fontsize=11)
ax8.set_ylabel('准确率 (%)', fontsize=11)
ax8.set_title('数据复杂度与性能关系', fontsize=12, fontweight='bold')
ax8.set_xticks([1, 2, 3])
ax8.set_xticklabels(['简单', '中等', '复杂'])
ax8.grid(True, alpha=0.3)
ax8.set_ylim([30, 100])

# 子图9: 总结表格
ax9 = plt.subplot(3, 3, 9)
ax9.axis('off')

summary_data = [
    ['测试项目', '准确率', '性能变化'],
    ['基准 (MNIST)', f'{baseline_acc:.1f}%', '-'],
    ['平移 ±8px', f'{shift_acc[-1]:.1f}%', f'↓{baseline_acc-shift_acc[-1]:.1f}%'],
    ['旋转 30°', f'{rotation_acc[-1]:.1f}%', f'↓{baseline_acc-rotation_acc[-1]:.1f}%'],
    ['噪声 0.5', f'{noise_acc[-1]:.1f}%', f'↓{baseline_acc-noise_acc[-1]:.1f}%'],
    ['组合扰动', f'{combined_acc[-1]:.1f}%', f'↓{baseline_acc-combined_acc[-1]:.1f}%'],
    ['Fashion-MNIST', f'{fashion_acc:.1f}%', f'↓{baseline_acc-fashion_acc:.1f}%'],
    ['CIFAR-10', f'{cifar_acc:.1f}%', f'↓{baseline_acc-cifar_acc:.1f}%']
]

table = ax9.table(cellText=summary_data, cellLoc='center', loc='center',
                 colWidths=[0.4, 0.3, 0.3])
table.auto_set_font_size(False)
table.set_fontsize(10)
table.scale(1, 2)

# 设置表头样式
for i in range(3):
    table[(0, i)].set_facecolor('#6C5CE7')
    table[(0, i)].set_text_props(weight='bold', color='white')

# 设置行颜色
for i in range(1, len(summary_data)):
    for j in range(3):
        if i % 2 == 0:
            table[(i, j)].set_facecolor('#F0F0F0')
        else:
            table[(i, j)].set_facecolor('white')

ax9.set_title('实验结果汇总', fontsize=12, fontweight='bold', pad=20)

# 总标题
fig.suptitle('MLP泛化能力综合评估报告', fontsize=16, fontweight='bold', y=0.995)

plt.tight_layout()
plt.savefig('mlp_generalization_analysis.png', dpi=300, bbox_inches='tight')
print("="*60)
print("📊 可视化报告已保存: mlp_generalization_analysis.png")
print("="*60)
plt.show()

# ==================== 文字总结 ====================

print("\n" + "="*80)
print("📋 MLP泛化能力分析总结")
print("="*80)

print("\n【核心发现】")
print("1. 🔴 空间不变性弱")
print(f"   - 平移±8px导致准确率下降: {baseline_acc - shift_acc[-1]:.1f}%")
print(f"   - 旋转30°导致准确率下降: {baseline_acc - rotation_acc[-1]:.1f}%")
print("   💡 原因: MLP缺乏局部特征提取能力,对空间位置敏感")

print("\n2. 🟠 噪声鲁棒性差")
print(f"   - 噪声标准差0.5导致准确率下降: {baseline_acc - noise_acc[-1]:.1f}%")
print("   💡 原因: 全连接层将所有像素等权对待,无法区分信号和噪声")

print("\n3. 🟡 组合扰动影响严重")
print(f"   - 多重扰动准确率: {combined_acc[-1]:.1f}%")
print(f"   - 相比基准下降: {baseline_acc - combined_acc[-1]:.1f}%")
print("   💡 原因: 缺乏抗干扰机制,扰动效应叠加")

print("\n4. 🟢 跨域泛化能力有限")
print(f"   - MNIST (简单): {baseline_acc:.1f}%")
print(f"   - Fashion-MNIST (中等): {fashion_acc:.1f}%")
print(f"   - CIFAR-10 (复杂): {cifar_acc:.1f}%")
print("   💡 结论: 数据复杂度越高,MLP性能下降越明显")

print("\n【MLP的局限性】")
print("❌ 无法学习平移不变性 - 同一物体在不同位置被视为不同模式")
print("❌ 无法学习旋转不变性 - 对角度变化极度敏感")
print("❌ 无法提取局部特征 - 丢失了图像的空间结构信息")
print("❌ 参数量大易过拟合 - 全连接导致参数爆炸")
print("❌ 泛化能力弱 - 在复杂、真实场景下性能大幅下降")

print("\n【为什么需要CNN?】")
print("✅ 卷积操作天然具有平移不变性")
print("✅ 局部感受野保留空间结构")
print("✅ 权重共享大幅减少参数")
print("✅ 层级特征提取(边缘→纹理→物体)")
print("✅ 更强的泛化能力和鲁棒性")

print("\n" + "="*80)
print("🎯 结论: MLP在MNIST上表现优秀,但面对真实世界的复杂场景")
print("       (位置变化、视角变化、光照变化、遮挡等)时能力有限。")
print("       这就是为什么计算机视觉领域需要卷积神经网络(CNN)!")
print("="*80)

image

image
通过分析的表格可以明显看出MLP的局限性,下一章我们将介绍MLP-CNN

posted @ 2025-10-09 15:21  方子敬  阅读(8)  评论(0)    收藏  举报