CNN 发展历程的下一站 —— VGG

VGG 模型的关键改进点

(2014, Very Deep Convolutional Networks for Large-Scale Image Recognition)

改进点 AlexNet VGG 意义
卷积核设计 大卷积核(11×11, 5×5)+ 少量层 小卷积核(3×3)堆叠,网络更深(11-19 层) 小卷积核参数更少,叠加后等效大感受野,能捕捉更丰富特征
网络深度 5 个卷积层 + 3 个全连接层 16/19 层卷积层 + 3 个全连接层 更深的层次结构,显著提升特征提取能力
池化方式 每隔几层使用 MaxPooling 同样采用 MaxPooling(2×2, stride=2) 保持空间降采样一致性
通道数设计 Conv1: 96 → Conv5: 256 Conv1: 64 → Conv5: 512 通道数逐层翻倍,容量更大
激活函数 ReLU ReLU 保持一致,VGG 没有替换
归一化 LRN(局部响应归一化) 无(未使用 LRN/BN) 论文中未引入 BN,后续 BN 成为主流
参数规模 ~24M ~138M(VGG-16) 大幅增加,计算和存储开销显著
主要贡献 更深 + 大卷积核 更深 + 小卷积核堆叠 提供了统一、简洁的架构范式

VGG vs AlexNet(CIFAR-10)实验设计概览

维度 AlexNet-mini VGG-mini(11/16) 统一设置
卷积核 3×3 堆叠 + 几层较宽通道 纯 3×3 堆叠,更深 输入 3×32×32
深度 5 个卷积 stage 5 个 stage(每 stage 2~3 个 conv) 训练/测试增强一致
通道规划 64→192→384→256→256 64→128→256→512→512 Label Smoothing=0.1
归一化 (可选)BN BN(实操中常配套) Optim=AdamW, WD=5e-4
正则化 Dropout(0.5) Dropout(0.3~0.5) StepLR/OneCycleLR
池化 MaxPool 2×2 MaxPool 2×2 Epoch=5~30(视时间)
期望 收敛较快,容量中等 表达力更强,最终更优 统一评测 Top-1 / Loss
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
from tqdm import tqdm
import numpy as np
import time

# ============ 设备 ============
device = torch.device("cuda" if torch.cuda.is_available() else 
                      "mps" if torch.backends.mps.is_available() else "cpu")
print("使用设备:", device)

# ============ 数据增强 ============
mean, std = (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)

transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
testset  = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)

trainloader = DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)
testloader  = DataLoader(testset, batch_size=128, shuffle=False, num_workers=2)

# ============ 模型定义 ============
class AlexNetMini(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64, 192, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(192, 384, 3, padding=1), nn.ReLU(),
            nn.Conv2d(384, 256, 3, padding=1), nn.ReLU(),
            nn.Conv2d(256, 256, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2)
        )
        self.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(256*4*4, 1024), nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, 512), nn.ReLU(),
            nn.Linear(512, num_classes)
        )
    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        return self.classifier(x)

class VGG11Mini(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()
        def block(in_c, out_c, n_conv):
            layers = []
            for i in range(n_conv):
                layers.append(nn.Conv2d(in_c if i==0 else out_c, out_c, 3, padding=1))
                layers.append(nn.ReLU(inplace=True))
            layers.append(nn.MaxPool2d(2))
            return nn.Sequential(*layers)

        self.features = nn.Sequential(
            block(3, 64, 1),
            block(64, 128, 1),
            block(128, 256, 2),
            block(256, 512, 2),
            block(512, 512, 2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Dropout(0.5),
            nn.Linear(512, 512), nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )
    def forward(self, x):
        x = self.features(x)
        return self.classifier(x)

# ============ 训练与测试函数 ============
def train_epoch(model, loader, criterion, optimizer):
    model.train()
    loss_total, correct, total = 0, 0, 0
    for imgs, labels in loader:
        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        loss_total += loss.item()
        _, pred = outputs.max(1)
        total += labels.size(0)
        correct += pred.eq(labels).sum().item()
    return loss_total/len(loader), 100.*correct/total

def test_epoch(model, loader, criterion):
    model.eval()
    loss_total, correct, total = 0, 0, 0
    with torch.no_grad():
        for imgs, labels in loader:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            loss_total += loss.item()
            _, pred = outputs.max(1)
            total += labels.size(0)
            correct += pred.eq(labels).sum().item()
    return loss_total/len(loader), 100.*correct/total

# ============ 训练流程 ============
def run_exp(model, name, epochs=5, lr=0.001):
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    hist = {"train_loss":[], "train_acc":[], "test_loss":[], "test_acc":[]}

    for epoch in range(1, epochs+1):
        tl, ta = train_epoch(model, trainloader, criterion, optimizer)
        vl, va = test_epoch(model, testloader, criterion)
        hist["train_loss"].append(tl)
        hist["train_acc"].append(ta)
        hist["test_loss"].append(vl)
        hist["test_acc"].append(va)
        print(f"{name} | Epoch {epoch}/{epochs} | TL={tl:.3f} TA={ta:.2f}% | VL={vl:.3f} VA={va:.2f}%")
    return hist

# ============ 同时训练 AlexNet-mini & VGG11-mini ============
alex_hist = run_exp(AlexNetMini(), "AlexNet-mini", epochs=5)
vgg_hist  = run_exp(VGG11Mini(), "VGG11-mini", epochs=5)

# ============ 绘制对比曲线 ============
epochs = range(1, len(alex_hist["train_loss"])+1)

plt.figure(figsize=(14,6))
# Loss
plt.subplot(1,2,1)
plt.plot(epochs, alex_hist["train_loss"], "r-", label="AlexNet Train")
plt.plot(epochs, alex_hist["test_loss"], "r--", label="AlexNet Test")
plt.plot(epochs, vgg_hist["train_loss"], "b-", label="VGG11 Train")
plt.plot(epochs, vgg_hist["test_loss"], "b--", label="VGG11 Test")
plt.title("Loss Comparison"); plt.xlabel("Epoch"); plt.ylabel("Loss"); plt.legend(); plt.grid(True, alpha=0.3)

# Accuracy
plt.subplot(1,2,2)
plt.plot(epochs, alex_hist["train_acc"], "r-", label="AlexNet Train")
plt.plot(epochs, alex_hist["test_acc"], "r--", label="AlexNet Test")
plt.plot(epochs, vgg_hist["train_acc"], "b-", label="VGG11 Train")
plt.plot(epochs, vgg_hist["test_acc"], "b--", label="VGG11 Test")
plt.title("Accuracy Comparison"); plt.xlabel("Epoch"); plt.ylabel("Accuracy (%)"); plt.legend(); plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

image

image
图中可以看到在只训练五轮的时候VGG的表现不如Alex,可能的原因是模型相对简单的Alex收敛的更快,导致预测的结果便好,需要调节一些超参来看对训练结果的影响

posted @ 2025-10-14 22:29  方子敬  阅读(24)  评论(0)    收藏  举报