第二次作业:卷积神经网络 part 2

代码练习

MobileNetV1

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import torch.optim as optim

class Block(nn.Module):
    '''Depthwise conv + Pointwise conv'''
    def __init__(self, in_planes, out_planes, stride=1):
        super(Block, self).__init__()
        # Depthwise 卷积,3*3 的卷积核,分为 in_planes,即各层单独进行卷积
        self.conv1 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=in_planes, bias=False)
        self.bn1 = nn.BatchNorm2d(in_planes)
        # Pointwise 卷积,1*1 的卷积核
        self.conv2 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn2 = nn.BatchNorm2d(out_planes)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        return out

创建DataLoader

# 使用GPU训练,可以在菜单 "代码执行工具" -> "更改运行时类型" 里进行设置
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,  download=True, transform=transform_train)
testset  = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False, num_workers=2)

创建MobileNetV1网络

class MobileNetV1(nn.Module):
    # (128,2) means conv planes=128, stride=2
    cfg = [(64,1), (128,2), (128,1), (256,2), (256,1), (512,2), (512,1), 
           (1024,2), (1024,1)]

    def __init__(self, num_classes=10):
        super(MobileNetV1, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(32)
        self.layers = self._make_layers(in_planes=32)
        self.linear = nn.Linear(1024, num_classes)

    def _make_layers(self, in_planes):
        layers = []
        for x in self.cfg:
            out_planes = x[0]
            stride = x[1]
            layers.append(Block(in_planes, out_planes, stride))
            in_planes = out_planes
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layers(out)
        out = F.avg_pool2d(out, 2)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out
# 网络放到GPU上
net = MobileNetV1().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

模型训练

for epoch in range(10):  # 重复多轮训练
    for i, (inputs, labels) in enumerate(trainloader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        # 优化器梯度归零
        optimizer.zero_grad()
        # 正向传播 + 反向传播 + 优化 
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        # 输出统计信息
        if i % 100 == 0:   
            print('Epoch: %d Minibatch: %5d loss: %.3f' %(epoch + 1, i + 1, loss.item()))

print('Finished Training')

使用GPU进行训练,训练完成

模型测试

correct = 0
total = 0

for data in testloader:
    images, labels = data
    images, labels = images.to(device), labels.to(device)
    outputs = net(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %.2f %%' % (
    100 * correct / total))

MobileNetV2 网络

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import torch.optim as optim

class Block(nn.Module):
    '''expand + depthwise + pointwise'''
    def __init__(self, in_planes, out_planes, expansion, stride):
        super(Block, self).__init__()
        self.stride = stride
        # 通过 expansion 增大 feature map 的数量
        planes = expansion * in_planes
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, groups=planes, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn3 = nn.BatchNorm2d(out_planes)

        # 步长为 1 时,如果 in 和 out 的 feature map 通道不同,用一个卷积改变通道数
        if stride == 1 and in_planes != out_planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False),
                nn.BatchNorm2d(out_planes))
        # 步长为 1 时,如果 in 和 out 的 feature map 通道相同,直接返回输入
        if stride == 1 and in_planes == out_planes:
            self.shortcut = nn.Sequential()

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        # 步长为1,加 shortcut 操作
        if self.stride == 1:
            return out + self.shortcut(x)
        # 步长为2,直接输出
        else:
            return out

创建 MobileNetV2 网络

class MobileNetV2(nn.Module):
    # (expansion, out_planes, num_blocks, stride)
    cfg = [(1,  16, 1, 1),
           (6,  24, 2, 1), 
           (6,  32, 3, 2),
           (6,  64, 4, 2),
           (6,  96, 3, 1),
           (6, 160, 3, 2),
           (6, 320, 1, 1)]

    def __init__(self, num_classes=10):
        super(MobileNetV2, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(32)
        self.layers = self._make_layers(in_planes=32)
        self.conv2 = nn.Conv2d(320, 1280, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn2 = nn.BatchNorm2d(1280)
        self.linear = nn.Linear(1280, num_classes)

    def _make_layers(self, in_planes):
        layers = []
        for expansion, out_planes, num_blocks, stride in self.cfg:
            strides = [stride] + [1]*(num_blocks-1)
            for stride in strides:
                layers.append(Block(in_planes, out_planes, expansion, stride))
                in_planes = out_planes
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layers(out)
        out = F.relu(self.bn2(self.conv2(out)))
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

创建 DataLoader

# 使用GPU训练,可以在菜单 "代码执行工具" -> "更改运行时类型" 里进行设置
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,  download=True, transform=transform_train)
testset  = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False, num_workers=2)
# 网络放到GPU上
net = MobileNetV2().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

模型训练

for epoch in range(10):  # 重复多轮训练
    for i, (inputs, labels) in enumerate(trainloader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        # 优化器梯度归零
        optimizer.zero_grad()
        # 正向传播 + 反向传播 + 优化 
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        # 输出统计信息
        if i % 100 == 0:   
            print('Epoch: %d Minibatch: %5d loss: %.3f' %(epoch + 1, i + 1, loss.item()))

print('Finished Training')

模型测试

correct = 0
total = 0

for data in testloader:
    images, labels = data
    images, labels = images.to(device), labels.to(device)
    outputs = net(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %.2f %%' % (
    100 * correct / total))

HybridSN 高光谱分类网络

class_num = 16
class HybridSN(nn.Module):
   def __init__(self):
        super(HybridSN, self).__init__()
        self.L = 30
        self.S = 25

        self.bn1 = nn.BatchNorm2d(64)
        self.conv1 = nn.Conv3d(1, 8, (7, 3, 3), stride=1, padding=0)
        self.conv2 = nn.Conv3d(8, 16, (5, 3, 3), stride=1, padding=0)
        self.conv3 = nn.Conv3d(16, 32, (3, 3, 3), stride=1, padding=0)
        self.conv4 = nn.Conv2d(30, 64, kernel_size=3, stride=1,padding=0)
        
        self.fc1 = nn.Linear(480896, 256)
        self.dropout1 = nn.Dropout(p=0.4)
        self.fc2 = nn.Linear(256, 128)
        self.dropout2 = nn.Dropout(p=0.4)
        self.fc3 = nn.Linear(128, class_num)

    def forward(self, x):
        out = x.reshape(batch, 30, 25, 25)
        out = self.conv4(out)
        out = self.bn1(out)
        out = F.relu(out)
        
        out = out.reshape(batch, 1, 64, 23, 23)
        out = F.relu(self.conv1(out))
        out = F.relu(self.conv2(out))
        out = F.relu(self.conv3(out))
        
        out = out.view(-1, 32 * 52 * 17 * 17)
        out = self.fc1(out)
        out = F.relu(out)
        out = self.dropout1(out)
        out = self.fc2(out)
        out = F.relu(out)
        out = self.dropout2(out)
        out = self.fc3(out)
        return out
# 随机输入,测试网络结构是否通
# x = torch.randn(1, 1, 30, 25, 25)
# net = HybridSN()
# y = net(x)

论文阅读心得

MobileNetV1网络

  MobileNetV1是一种为移动设备设计的通用计算机视觉神经网络,MobileNet基于深度可分离卷积构建了非常轻量且延迟小的模型,并且可以通过两个超参数来进一步控制模型的大小,该模型能够应用到终端设备中,具有很重要的实践意义。

深度可分离卷积

  • MobileNet V1使用的是深度可分离卷积(Depthwise Separable Convolution,DSC)
    DSC包含两部分:深度卷积(depthwise convolution,DWC)+ 逐点卷积(pointwise convolution,PWC)
    DWC对输入的通道进行滤波,其不增加通道的数量,PWC用于将PWC不同的通道进行连接,其可以增加通道的数量

  下图(a)为传统的卷积方式,卷积核参数为Dk⋅Dk⋅M⋅N,其中Dk为卷积核大小,M为输入的通道数,N为输出的通道数。(b)和(c)DWC(b)中卷积核参数为Dk⋅Dk⋅1⋅M,其中M个Dk⋅Dk的核和输入特征的对应通道进行卷积,如下式所示。PWC(c)中卷积核参数为1⋅1⋅M⋅N,每个卷积核在特征维度上分别对输入的M个特征进行加权,最终得到N个特征(M≠N时,完成了升维或者降维)。

传统卷积的计算量为:Dk⋅Dk⋅M⋅N⋅DF⋅DF
DSC总共的计算量为:DK⋅DK⋅M⋅DF⋅DF+M⋅N⋅DF⋅DF
当使用3*3的卷积核时,计算量大概会减少8-9倍,准确率仅会有些许降低。

MobilNet V1的网络结构图

除第一层外其它均是深度可分离卷积,除了最后一层全连接层外每层都接BN和ReLU,总共28层。

MobileNetV2网络

  MobileNetV2 与第一代的MobileNet相比,总体而言,MobileNetV2模型在整体延迟范围内上实现相同的准确度要更快。
  MobileNetV2提出新的层单元inverted residual with linear bottleneck,该结构类似于残差网络单元,都包含shorcut,区别在于该结构是输入输出维度少,中间通过线性卷积先扩展升维,然后通过深度卷积进行特征提取,最后再映射降维,可以很好地保持网络性能且网络更加轻量。
  MobileNetV2在V1基础上对基本模块做了改进,形成带残差的瓶颈 depthwise 可分离卷积,如下图:

  MobileNetV2基本模块在最后一个 1x1 卷积非线性运算没有使用 ReLU6,而是使用 Linear(x) = x,也就是去掉了非线性变换,论文中所说可以保持网络瓶颈层良好的表达能力。
  MobileNetV2 是在 V1 基础上改进得到的新架构,利用“线性瓶颈”和“逆向残差”改善了性能,降低了模型内存占用,并且在多种视觉任务中取得了不俗的成绩。

HybridSN 高光谱分类网络

  该论文构建了一个混合网络解决高光谱图像分类问题
  首先是利用3D卷积,然后再用2D卷积

3D卷积与2D卷积

二维卷积

三维卷积

3D卷积卷积核本身是3D的,多了一个深度通道,而这个深度通道可能是视频上的连续帧,也可能是立体图像中的不同切片,所以主要应用在这两个方向

  • 运算量
      2D卷积的浮点数运算次数:\(2×C_{in}×K^{2}×C_{out}×W×H\)
      3D卷积的卷积核多了一个维度,输出数据多了一个时间维度,总运算次数:\(2×C_{in}×K^{3}×C_{out}×W×H×T\)
      因此3D卷积运算次数是2D卷积的K×T倍
  • 3D卷积可以保留输入信号的时间信息,而2D卷积不会
  • 3D卷积有空间的卷积和时间的卷积
      
posted @ 2020-08-08 18:46  Anything781  阅读(323)  评论(0)    收藏  举报