CNN
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
# 定义卷积神经网络
class ConvNet(nn.Module):
def __init__(self):
super(ConvNet, self).__init__()
self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
self.bn1 = nn.BatchNorm2d(32)
self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
self.bn2 = nn.BatchNorm2d(64)
self.pool = nn.AvgPool2d(kernel_size=2, stride=2, padding=0)
self.fc1 = nn.Linear(64 * 7 * 7, 128)
self.fc2 = nn.Linear(128, 10)
self.dropout = nn.Dropout(p=0.4)
def forward(self, x):
x = self.pool(F.relu(self.bn1(self.conv1(x))))
x = self.pool(F.relu(self.bn2(self.conv2(x))))
x = x.view(-1, 64 * 7 * 7)
x = F.relu(self.fc1(x))
x = self.dropout(x)
x = self.fc2(x)
return x
# 数据预处理和加载
# 使用Compose将多个变换组合在一起:ToTensor()将图像转换为张量,Normalize()进行标准化
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])
# 下载并加载训练集
train_dataset = datasets.MNIST('./data', train=True, download=True, transform=transform)
# 下载并加载测试集
test_dataset = datasets.MNIST('./data', train=False, transform=transform)
# 使用DataLoader加载数据集
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)
# 初始化网络、损失函数和优化器
model = ConvNet()
criterion = nn.CrossEntropyLoss() # 交叉熵损失函数
optimizer = optim.Adam(model.parameters(), lr=0.001) # Adam优化器,学习率为0.001
# 训练网络的函数
def train(model, device, train_loader, optimizer, criterion, epoch):
model.train() # 设置模型为训练模式
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device) # 将数据和标签移动到设备上(GPU或CPU)
optimizer.zero_grad() # 清空梯度
output = model(data) # 前向传播
loss = criterion(output, target) # 计算损失
loss.backward() # 反向传播
optimizer.step() # 更新参数
if batch_idx % 100 == 0: # 每100个批次打印一次训练信息
print(f'Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} '
f'({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}')
# 测试网络的函数
def test(model, device, test_loader, criterion):
model.eval() # 设置模型为评估模式
test_loss = 0
correct = 0
with torch.no_grad(): # 不计算梯度
for data, target in test_loader:
data, target = data.to(device), target.to(device) # 将数据和标签移动到设备上
output = model(data) # 前向传播
test_loss += criterion(output, target).item() # 累加批量损失
pred = output.argmax(dim=1, keepdim=True) # 获取最大概率的索引
correct += pred.eq(target.view_as(pred)).sum().item() # 统计正确预测的数量
test_loss /= len(test_loader.dataset) # 计算平均损失
print(f'\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} '
f'({100. * correct / len(test_loader.dataset):.0f}%)\n')
# 定义一个函数来显示图像及其预测标签和真实标签
def show_images(images, labels, preds):
plt.figure(figsize=(10, 5))
for i in range(10):
plt.subplot(2, 5, i + 1)
plt.imshow(images[i].numpy().squeeze(), cmap='gray')
plt.title(f"True: {labels[i].item()}\nPred: {preds[i].item()}")
plt.axis('off')
plt.show()
# 获取一些测试样本并显示
def show_test_samples(model, device, test_loader):
model.eval()
data, target = next(iter(test_loader))
data, target = data.to(device), target.to(device)
output = model(data)
preds = output.argmax(dim=1, keepdim=True)
# 将图像从GPU移回CPU并转换为numpy格式
images = data.cpu()
labels = target.cpu()
preds = preds.cpu()
# 显示图像及其标签
show_images(images, labels, preds)
# 训练和测试循环
print(torch.cuda.is_available())
print(torch.__version__)
if torch.cuda.is_available():
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 选择设备
model.to(device) # 将模型移动到设备上
for epoch in range(1, 11): # 进行10个训练周期
train(model, device, train_loader, optimizer, criterion, epoch) # 训练模型
test(model, device, test_loader, criterion) # 测试模型
# 在训练和测试循环后调用这个函数来显示一些测试图像
show_test_samples(model, device, test_loader)
import math
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
# 1. 数据预处理
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,)),
transforms.Lambda(lambda x: x.view(-1)) # 展平为序列
])
train_dataset = datasets.MNIST('./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST('./data', train=False, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)
# 2. 定义标准Transformer模型
class PositionalEncoding(nn.Module):
def __init__(self, d_model, max_len=784):
super().__init__()
position = torch.arange(max_len).unsqueeze(1)
div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
pe = torch.zeros(max_len, d_model)
pe[:, 0::2] = torch.sin(position * div_term)
pe[:, 1::2] = torch.cos(position * div_term)
self.register_buffer('pe', pe)
def forward(self, x):
return x + self.pe[:x.size(1)]
class MNISTTransformer(nn.Module):
def __init__(self, num_classes=10, d_model=64, nhead=4, num_layers=3):
super().__init__()
self.embedding = nn.Linear(1, d_model)
self.pos_encoder = PositionalEncoding(d_model)
encoder_layer = nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward=256, batch_first=True)
self.transformer = nn.TransformerEncoder(encoder_layer, num_layers)
self.classifier = nn.Linear(d_model, num_classes)
def forward(self, x):
x = x.unsqueeze(-1) # [B, 784] -> [B, 784, 1]
x = self.embedding(x) # [B, 784, 1] -> [B, 784, d_model]
x = self.pos_encoder(x)
x = x.transpose(0, 1) # [B, 784, d_model] -> [784, B, d_model]
x = self.transformer(x)
x = x.mean(dim=0) # 全局平均池化
x = self.classifier(x)
return x
# 3. 训练和测试
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MNISTTransformer().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
def train(epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
def test():
model.eval()
correct = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
accuracy = 100. * correct / len(test_loader.dataset)
print(f'Test Accuracy: {accuracy:.2f}%')
# 训练10个epoch
for epoch in range(1, 11):
print(epoch)
train(epoch)
test()
ViT
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
# 1. 数据预处理
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])
train_dataset = datasets.MNIST('./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST('./data', train=False, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)
# 2. 定义ViT模型
class PatchEmbedding(nn.Module):
def __init__(self, img_size=28, patch_size=7, in_channels=1, embed_dim=64):
super().__init__()
self.proj = nn.Conv2d(in_channels, embed_dim, kernel_size=patch_size, stride=patch_size)
self.cls_token = nn.Parameter(torch.randn(1, 1, embed_dim))
self.pos_embed = nn.Parameter(torch.randn(1, (img_size // patch_size) ** 2 + 1, embed_dim))
def forward(self, x):
x = self.proj(x) # [B, C, H, W] -> [B, E, H/P, W/P]
x = x.flatten(2).transpose(1, 2) # [B, E, N] -> [B, N, E]
cls_token = self.cls_token.expand(x.shape[0], -1, -1)
x = torch.cat((cls_token, x), dim=1)
x += self.pos_embed
return x
class TransformerEncoder(nn.Module):
def __init__(self, embed_dim=64, num_heads=4, ff_dim=128, dropout=0.1):
super().__init__()
self.attention = nn.MultiheadAttention(embed_dim, num_heads, dropout=dropout)
self.norm1 = nn.LayerNorm(embed_dim)
self.mlp = nn.Sequential(
nn.Linear(embed_dim, ff_dim),
nn.GELU(),
nn.Dropout(dropout),
nn.Linear(ff_dim, embed_dim),
nn.Dropout(dropout)
)
self.norm2 = nn.LayerNorm(embed_dim)
def forward(self, x):
attn_output, _ = self.attention(x, x, x)
x = x + attn_output
x = self.norm1(x)
mlp_output = self.mlp(x)
x = x + mlp_output
x = self.norm2(x)
return x
class ViT(nn.Module):
def __init__(self, num_classes=10):
super().__init__()
self.patch_embed = PatchEmbedding()
self.transformer = nn.Sequential(*[TransformerEncoder() for _ in range(4)])
self.head = nn.Linear(64, num_classes)
def forward(self, x):
x = self.patch_embed(x)
x = self.transformer(x)
x = x[:, 0] # 取cls token
x = self.head(x)
return x
# 3. 训练和测试
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ViT().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
def train(epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
def test():
model.eval()
correct = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
accuracy = 100. * correct / len(test_loader.dataset)
print(f'Test Accuracy: {accuracy:.2f}%')
# 训练10个epoch
for epoch in range(1, 11):
print(epoch)
train(epoch)
test()