深度学习卷积(Convolution)

图像卷积模型搭建

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

# 使用GPU(如果可用),否则使用CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# --------------------------
# 1. 数据加载与预处理
# --------------------------

# Fashion-MNIST 数据集:28x28 灰度图像,10 个类别
transform = transforms.ToTensor()

train_dataset = torchvision.datasets.FashionMNIST(
    root='./data',
    train=True,
    download=True,
    transform=transform
)

test_dataset = torchvision.datasets.FashionMNIST(
    root='./data',
    train=False,
    download=True,
    transform=transform
)

batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

# --------------------------
# 2. 定义模型
# --------------------------
net = nn.Sequential(
    nn.Conv2d(1, 6, kernel_size=5, padding=2), nn.Sigmoid(),
    nn.AvgPool2d(kernel_size=2, stride=2),
    nn.Conv2d(6, 16, kernel_size=5), nn.Sigmoid(),
    nn.AvgPool2d(kernel_size=2, stride=2),
    nn.Flatten(),
    nn.Linear(16 * 5 * 5, 120), nn.Sigmoid(),
    nn.Linear(120, 84), nn.Sigmoid(),
    nn.Linear(84, 10)
)

net = net.to(device)

# --------------------------
# 3. 定义损失函数与优化器
# --------------------------
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.9)  # 学习率可根据需要调整

# --------------------------
# 4. 训练函数与测试函数定义
# --------------------------

def train_one_epoch(model, dataloader, optimizer, criterion, device):
    model.train()
    total_loss = 0.0
    correct = 0
    total = 0
    for inputs, targets in dataloader:
        inputs, targets = inputs.to(device), targets.to(device)
        
        # 前向传播
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        
        # 反向传播与更新参数
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item() * inputs.size(0)
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()
        
    avg_loss = total_loss / total
    accuracy = correct / total
    return avg_loss, accuracy

def evaluate(model, dataloader, criterion, device):
    model.eval()
    total_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, targets in dataloader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            
            total_loss += loss.item() * inputs.size(0)
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
            
    avg_loss = total_loss / total
    accuracy = correct / total
    return avg_loss, accuracy

# --------------------------
# 5. 模型训练与测试
# --------------------------
epochs = 10
for epoch in range(1, epochs + 1):
    train_loss, train_acc = train_one_epoch(net, train_loader, optimizer, criterion, device)
    test_loss, test_acc = evaluate(net, test_loader, criterion, device)
    
    print(f"Epoch [{epoch}/{epochs}]: "
          f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc*100:.2f}% "
          f"| Test Loss: {test_loss:.4f}, Test Acc: {test_acc*100:.2f}%")

# 在训练完成后可以保存模型
torch.save(net.state_dict(), "fashion_mnist_model.pth")
print("Model saved as fashion_mnist_model.pth")

卷积层相关

卷积层常见的输入形状总结

对于二维卷积层 nn.Conv2d,输入的形状通常是 \([N, C_\text{in}, H_\text{in}, W_\text{in}]\),输出形状为 \([N, C_\text{out}, H_\text{out}, W_\text{out}]\)

  • 输入形状

    • \(N\): 批量大小(batch size)。
    • \(C_\text{in}\): 输入通道数。
    • \(H_\text{in}\): 输入图像的高度。
    • \(W_\text{in}\): 输入图像的宽度。
  • 输出形状计算

    • 高度 \(H_\text{out}\)

      \[H_\text{out} = \left\lfloor \frac{H_\text{in} + 2 \times \text{padding} - \text{kernel_size}}{\text{stride}} \right\rfloor + 1 \]

    • 宽度 \(W_\text{out}\)

      \[W_\text{out} = \left\lfloor \frac{W_\text{in} + 2 \times \text{padding} - \text{kernel_size}}{\text{stride}} \right\rfloor + 1 \]

    • 输出通道数 \(C_\text{out}\)
      等于卷积层中 \(\text{out_channels}\) 的值。

通道数的影响

输入多通道:

  • 如果输入特征图有多个通道(\(C_\text{in}\)),每个卷积核会在所有输入通道上分别进行卷积操作。
  • 卷积核的形状为 \([C_\text{in}, \text{kernel\_size}, \text{kernel\_size}]\)
  • 最终结果是将所有通道的卷积结果相加,得到一个单一输出通道的值。

输出多通道:

  • 如果卷积层的输出通道数为 \(C_\text{out}\),则需要 \(C_\text{out}\) 个卷积核,每个卷积核都会生成一个输出通道。
  • 最终输出特征图的形状为 \([N, C_\text{out}, H_\text{out}, W_\text{out}]\)
posted @ 2024-12-16 10:32  afengleafs  阅读(12)  评论(0)    收藏  举报