多层感知机与简易CNN的PyTorch实现
相关内容:
具体实现:
导入相关包和数据集:
# 导入相关包 import torch import torchvision import torch.nn as nn import torchvision.transforms as transforms batch_size = 256 # MNIST 数据集导入 train_dataset = torchvision.datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True) test_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor())# 不需要再下载 train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)
多层感知机:
# 多层感知机模型 class Model_1(nn.Module): def __init__(self, input_size, hidden_size, output_size): super(Model_1, self).__init__() self.l1 = nn.Linear(input_size, hidden_size) self.relu = nn.ReLU() self.l2 = nn.Linear(hidden_size, output_size) def forward(self, x): y = self.l1(x) y = self.relu(y) y = self.l2(y) return y
超参数的选取与TensorFlow实现保持一致:
# 超参数 input_size = 784#28*28 num_epochs = 5 num_hiddens = 256 output_size = 10 learning_rate = 0.5 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')# cuda model = Model_1(input_size, num_hiddens, output_size).to(device) #model = nn.Sequential(nn.Flatten(), nn.Linear(input_size, num_hiddens), nn.ReLU(), nn.Linear(num_hiddens, num_classes)) # 损失函数 criterion = nn.CrossEntropyLoss() # 优化器 optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
训练代码:
# train n_total_steps = len(train_loader) for epoch in range(num_epochs): for i, (images, labels) in enumerate(train_loader): # reshape 相当于Flatten() images = images.reshape(-1, input_size).to(device) labels = labels.to(device) # forward outputs = model(images) loss = criterion(outputs, labels) # backward optimizer.zero_grad() loss.backward() optimizer.step() if (i + 1) % 100 == 0: print(f'epoch {epoch+1} / {num_epochs}, step {i+1}/{n_total_steps}, loss = {loss.item():.4f}')
训练输出:
epoch 1 / 5, step 100/235, loss = 0.2301 epoch 1 / 5, step 200/235, loss = 0.2396 epoch 2 / 5, step 100/235, loss = 0.1522 epoch 2 / 5, step 200/235, loss = 0.1654 epoch 3 / 5, step 100/235, loss = 0.1569 epoch 3 / 5, step 200/235, loss = 0.1311 epoch 4 / 5, step 100/235, loss = 0.0831 epoch 4 / 5, step 200/235, loss = 0.0854 epoch 5 / 5, step 100/235, loss = 0.0426 epoch 5 / 5, step 200/235, loss = 0.0717
测试代码:
# test with torch.no_grad(): n_correct = 0 n_samples = 0 for images, labels in test_loader: images = images.reshape(-1, input_size).to(device) labels = labels.to(device) outputs = model(images) _, pred = torch.max(outputs, 1) n_samples += images.shape[0] n_correct += (pred == labels).sum().item() acc = 100.0 * n_correct / n_samples print(f'Accuracy = {acc}') # 测试结果:Accuracy = 97.19
简易CNN实现:
# 简易CNN import torch.nn.functional as F class CNNModel(nn.Module): def __init__(self): super(CNNModel, self).__init__() # 输入数据形状变化:n*28*28->n*24*24->n*12*12 self.conv = nn.Conv2d(1, 6, 5)# 输入数据的通道数 输出数据的通道数 卷积核大小 self.pool = nn.MaxPool2d(2, 2) self.f1 = nn.Linear(6*12*12, 256) self.f2 = nn.Linear(256, 10) def forward(self, x): y = self.pool(F.relu(self.conv(x))) y = y.view(-1, 6*12*12) y = F.relu(self.f1(y)) y = self.f2(y) return y
超参数:
num_epochs = 5 learning_rate = 0.001 model = CNNModel().to(device) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
训练和测试代码与上文几乎一致,这里只给出训练和测试的结果:
epoch 1 / 5, step 100/235, loss = 0.2931 epoch 1 / 5, step 200/235, loss = 0.1786 epoch 2 / 5, step 100/235, loss = 0.1415 epoch 2 / 5, step 200/235, loss = 0.0902 epoch 3 / 5, step 100/235, loss = 0.0830 epoch 3 / 5, step 200/235, loss = 0.1001 epoch 4 / 5, step 100/235, loss = 0.0452 epoch 4 / 5, step 200/235, loss = 0.0352 epoch 5 / 5, step 100/235, loss = 0.0272 epoch 5 / 5, step 200/235, loss = 0.0731 准确率:Accuracy = 98.28
转载请注明出处