7.2.0 头文件

import torch
from torch import nn
from d2l import torch as d2l
from matplotlib import pyplot as plt

 

7.2.1 定义AlexNet网络模型

# 定义AlexNet网络模型
net = nn.Sequential(
    # 用尺寸更大的11*11卷积窗口来捕捉对象,用更大的步幅来减少输出的高度和宽度
    nn.Conv2d(1, 96, kernel_size=11, stride=4, padding=1),      # 卷积层:输入通道为1,输出通道为96,卷积核尺寸11×11,上下左右填充1层,输出尺寸(1, 96, 54, 54)
    nn.ReLU(),                                                  # 输出尺寸为(1, 96, 54, 54)
    nn.MaxPool2d(kernel_size=3, stride=2),                      # 最大池化层:池化窗口为3×3,步幅为2×2,输出尺寸为(1, 96, 26, 26)
    nn.Conv2d(96, 256, kernel_size=5, padding=2),               # 卷积层,输入通道为96,输出通道为256,卷积核尺寸为5×5,上下左右各填充2层,输出尺寸为(1, 256, 26, 26)
    nn.ReLU(),                                                  # 输出尺寸为(1, 256, 26, 26)
    nn.MaxPool2d(kernel_size=3, stride=2),                      # 最大池化层:池化窗口为3×3,步幅为2×2,输出尺寸为(1, 256, 12, 12)
    nn.Conv2d(256, 384, kernel_size=3, padding=1),              # 卷积层,输入通道为256,输出通道为384,卷积核尺寸为3×3,上下左右各填充1层,输出尺寸为(1, 384, 12, 12)
    nn.ReLU(),                                                  # 输出尺寸为(1, 384, 12, 12)
    nn.Conv2d(384, 384, kernel_size=3, padding=1),              # 卷积层,输入通道为384,输出通道为384,卷积核尺寸为3×3,上下左右各填充1层,输出尺寸为(1, 384, 12, 12)
    nn.ReLU(),                                                  # 输出尺寸为(1, 384, 12, 12)
    nn.Conv2d(384, 256, kernel_size=3, padding=1),              # 卷积层,输入通道为384,输出通道为256,卷积核尺寸为3×3,上下左右各填充1层,输出尺寸为(1, 256, 12, 12)
    nn.ReLU(),                                                  # 输出尺寸为(1, 256, 12, 12)
    nn.MaxPool2d(kernel_size=3, stride=2),                      # 最大池化层:池化窗口为3×3,步幅为2×2,输出尺寸为(1, 256, 5, 5)
    nn.Flatten(),                                               # 输出尺寸为(1, 6400)
    # 用dropout层来减轻过拟合
    nn.Linear(6400, 4096),                                      # 输出尺寸为(1, 4096)
    nn.ReLU(),                                                  # 输出尺寸为(1, 4096)
    nn.Dropout(p=0.5),                                          # 输出尺寸为(1, 4096)
    nn.Linear(4096, 4096),                                      # 输出尺寸为(1, 4096)
    nn.ReLU(),                                                  # 输出尺寸为(1, 4096)
    nn.Dropout(p=0.5),                                          # 输出尺寸为(1, 4096)
    # Fashion-MNIST数据集只有10个类别,所以输出为10
    nn.Linear(4096, 10))                                        # 输出尺寸为(1, 10)

# 定义输入特征X,X的尺寸为(批量大小:1,通道数:1,行数:224,列数:224)
X = torch.randn(1, 1, 224, 224)
for layer in net:
    X=layer(X)
    print(layer.__class__.__name__,'output shape:\t',X.shape)
# 输出:
# Conv2d output shape:     torch.Size([1, 96, 54, 54])
# ReLU output shape:     torch.Size([1, 96, 54, 54])
# MaxPool2d output shape:     torch.Size([1, 96, 26, 26])
# Conv2d output shape:     torch.Size([1, 256, 26, 26])
# ReLU output shape:     torch.Size([1, 256, 26, 26])
# MaxPool2d output shape:     torch.Size([1, 256, 12, 12])
# Conv2d output shape:     torch.Size([1, 384, 12, 12])
# ReLU output shape:     torch.Size([1, 384, 12, 12])
# Conv2d output shape:     torch.Size([1, 384, 12, 12])
# ReLU output shape:     torch.Size([1, 384, 12, 12])
# Conv2d output shape:     torch.Size([1, 256, 12, 12])
# ReLU output shape:     torch.Size([1, 256, 12, 12])
# MaxPool2d output shape:     torch.Size([1, 256, 5, 5])
# Flatten output shape:     torch.Size([1, 6400])
# Linear output shape:     torch.Size([1, 4096])
# ReLU output shape:     torch.Size([1, 4096])
# Dropout output shape:     torch.Size([1, 4096])
# Linear output shape:     torch.Size([1, 4096])
# ReLU output shape:     torch.Size([1, 4096])
# Dropout output shape:     torch.Size([1, 4096])
# Linear output shape:     torch.Size([1, 10])

 

7.2.2 下载fashion_mnist数据集

# 设置批量大小
batch_size = 128
# 下载fashion_mnist,并对数据集进行打乱和按批量大小进行切割的操作,得到可迭代的训练集和测试集(训练集和测试集的形式都为(特征数据集合,数字标签集合)),同时将图像从28×28放大为224×224
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224)

 

7.2.3 训练过程

# 定义学习率和训练轮数
lr, num_epochs = 0.01, 10
# 使用GPU对模型进行训练,输出最后一轮训练时的平均损失、在训练集上的平均准确率、在测试集上的平均准测率,输出每秒能够训练多少张图像
d2l.train_ch6(net, train_iter, test_iter, num_epochs, lr, d2l.try_gpu())

 # loss 0.330, train acc 0.879, test acc 0.884
 # 188.7 examples/sec on cuda:0

 

 

7.2.4 训练结果可视化

plt.savefig('OutPut.png')

 

 

本小节完整代码如下

import torch
from torch import nn
from d2l import torch as d2l
from matplotlib import pyplot as plt

# ------------------------------定义AlexNet网络模型------------------------------------

# 定义AlexNet网络模型
net = nn.Sequential(
    # 用尺寸更大的11*11卷积窗口来捕捉对象,用更大的步幅来减少输出的高度和宽度
    nn.Conv2d(1, 96, kernel_size=11, stride=4, padding=1),      # 卷积层:输入通道为1,输出通道为96,卷积核尺寸11×11,上下左右填充1层,输出尺寸(1, 96, 54, 54)
    nn.ReLU(),                                                  # 输出尺寸为(1, 96, 54, 54)
    nn.MaxPool2d(kernel_size=3, stride=2),                      # 最大池化层:池化窗口为3×3,步幅为2×2,输出尺寸为(1, 96, 26, 26)
    nn.Conv2d(96, 256, kernel_size=5, padding=2),               # 卷积层,输入通道为96,输出通道为256,卷积核尺寸为5×5,上下左右各填充2层,输出尺寸为(1, 256, 26, 26)
    nn.ReLU(),                                                  # 输出尺寸为(1, 256, 26, 26)
    nn.MaxPool2d(kernel_size=3, stride=2),                      # 最大池化层:池化窗口为3×3,步幅为2×2,输出尺寸为(1, 256, 12, 12)
    nn.Conv2d(256, 384, kernel_size=3, padding=1),              # 卷积层,输入通道为256,输出通道为384,卷积核尺寸为3×3,上下左右各填充1层,输出尺寸为(1, 384, 12, 12)
    nn.ReLU(),                                                  # 输出尺寸为(1, 384, 12, 12)
    nn.Conv2d(384, 384, kernel_size=3, padding=1),              # 卷积层,输入通道为384,输出通道为384,卷积核尺寸为3×3,上下左右各填充1层,输出尺寸为(1, 384, 12, 12)
    nn.ReLU(),                                                  # 输出尺寸为(1, 384, 12, 12)
    nn.Conv2d(384, 256, kernel_size=3, padding=1),              # 卷积层,输入通道为384,输出通道为256,卷积核尺寸为3×3,上下左右各填充1层,输出尺寸为(1, 256, 12, 12)
    nn.ReLU(),                                                  # 输出尺寸为(1, 256, 12, 12)
    nn.MaxPool2d(kernel_size=3, stride=2),                      # 最大池化层:池化窗口为3×3,步幅为2×2,输出尺寸为(1, 256, 5, 5)
    nn.Flatten(),                                               # 输出尺寸为(1, 6400)
    # 用dropout层来减轻过拟合
    nn.Linear(6400, 4096),                                      # 输出尺寸为(1, 4096)
    nn.ReLU(),                                                  # 输出尺寸为(1, 4096)
    nn.Dropout(p=0.5),                                          # 输出尺寸为(1, 4096)
    nn.Linear(4096, 4096),                                      # 输出尺寸为(1, 4096)
    nn.ReLU(),                                                  # 输出尺寸为(1, 4096)
    nn.Dropout(p=0.5),                                          # 输出尺寸为(1, 4096)
    # Fashion-MNIST数据集只有10个类别,所以输出为10
    nn.Linear(4096, 10))                                        # 输出尺寸为(1, 10)

# 定义输入特征X,X的尺寸为(批量大小:1,通道数:1,行数:224,列数:224)
X = torch.randn(1, 1, 224, 224)
for layer in net:
    X=layer(X)
    print(layer.__class__.__name__,'output shape:\t',X.shape)
# 输出:
# Conv2d output shape:     torch.Size([1, 96, 54, 54])
# ReLU output shape:     torch.Size([1, 96, 54, 54])
# MaxPool2d output shape:     torch.Size([1, 96, 26, 26])
# Conv2d output shape:     torch.Size([1, 256, 26, 26])
# ReLU output shape:     torch.Size([1, 256, 26, 26])
# MaxPool2d output shape:     torch.Size([1, 256, 12, 12])
# Conv2d output shape:     torch.Size([1, 384, 12, 12])
# ReLU output shape:     torch.Size([1, 384, 12, 12])
# Conv2d output shape:     torch.Size([1, 384, 12, 12])
# ReLU output shape:     torch.Size([1, 384, 12, 12])
# Conv2d output shape:     torch.Size([1, 256, 12, 12])
# ReLU output shape:     torch.Size([1, 256, 12, 12])
# MaxPool2d output shape:     torch.Size([1, 256, 5, 5])
# Flatten output shape:     torch.Size([1, 6400])
# Linear output shape:     torch.Size([1, 4096])
# ReLU output shape:     torch.Size([1, 4096])
# Dropout output shape:     torch.Size([1, 4096])
# Linear output shape:     torch.Size([1, 4096])
# ReLU output shape:     torch.Size([1, 4096])
# Dropout output shape:     torch.Size([1, 4096])
# Linear output shape:     torch.Size([1, 10])

# ------------------------------下载fashion_mnist数据集------------------------------------

# 设置批量大小
batch_size = 128
# 下载fashion_mnist,并对数据集进行打乱和按批量大小进行切割的操作,得到可迭代的训练集和测试集(训练集和测试集的形式都为(特征数据集合,数字标签集合)),同时将图像从28×28放大为224×224
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224)

# ------------------------------训练过程------------------------------------

# 定义学习率和训练轮数
lr, num_epochs = 0.01, 10
# 使用GPU对模型进行训练,输出最后一轮训练时的平均损失、在训练集上的平均准确率、在测试集上的平均准测率,输出每秒能够训练多少张图像
d2l.train_ch6(net, train_iter, test_iter, num_epochs, lr, d2l.try_gpu())
 # loss 0.330, train acc 0.879, test acc 0.884
 # 188.7 examples/sec on cuda:0
# ------------------------------训练结果可视化------------------------------------ plt.savefig('OutPut.png')

 

posted on 2022-11-10 16:49  yc-limitless  阅读(68)  评论(0)    收藏  举报