06-NIN 图像分类

NIN网络中,主要提及两点:

1. )多层感知机

2.)global average pooling

  

 

图1  线性卷积层和mlpconv层的区别

 

图2 NIN网络架构

第一个卷积核是11x11x3x96,因此在一个patch块上卷积的输出是1x1x96的feature map(一个96维的向量).在其后又接了一个MLP层,输出仍然是96.因此这个MLP层就等价于一个1 x 1 的卷积层,这样工程上任然按照之前的方式实现,不需要额外工作.

 

 

图3 全连接层FC和global average pooling的区别

  传统的cnn是在较低层使用卷积,如分类任务中,最后的卷积层所得feature map被矢量化进行全连接层,然后使用softmax 回归进行分类。一般来说,在卷积的末端完成的卷积与传统分类器的桥接。全连接阶段易于过拟合,妨碍整个网络的泛化能力,一般应有一些规则方法来处理过拟合。

  在传统CNN中很难解释最后的全连接层输出的类别信息的误差怎么传递给前边的卷积层.而global average pooling更容易解释.另外,全连接层容易过拟合,往往依赖于dropout等正则化手段.

  global average pooling的概念非常简单,分类任务有多少个类别,就控制最终产生多少个feature map.对每个feature map的数值求平均作为某类别的置信度,类似FC层输出的特征向量,再经过softmax分类.其优点有:

    1.)参数数量减少,减轻过拟合(应用于AlexNet,模型230MB->29MB);

    2.)更符合卷积网络的结构,使feature map和类别信息直接映射;

    3.)求和取平均操作综合了空间信息,使得对输入的空间变换更鲁棒(与卷积层相连的FC按顺序对特征进行了重新编排(flatten),可能破坏了特征的位置信息).

    4.)FC层输入的大小须固定,这限制了网络输入的图像大小.

NIN的pytorch实现:

 1 import torch.nn as nn
 2 
 3 
 4 #定义模型
 5 class NIN_Net(nn.Module):#继承父类nn.Module
 6     def __init__(self):
 7         super(NIN_Net, self).__init__()#super可以指代父类而不需要显式的声明,这对更改基类(此处为__init__())的时候是有帮助的,使得代码更容易维护
 8         self.classifier = nn.Sequential(
 9                 nn.Conv2d(3, 192, kernel_size=5, stride=1, padding=2),
10                 nn.ReLU(inplace=True),
11                 nn.Conv2d(192, 160, kernel_size=1, stride=1, padding=0),
12                 nn.ReLU(inplace=True),
13                 nn.Conv2d(160,  96, kernel_size=1, stride=1, padding=0),
14                 nn.ReLU(inplace=True),
15                 nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
16                 nn.Dropout(0.5),
17 
18                 nn.Conv2d(96, 192, kernel_size=5, stride=1, padding=2),
19                 nn.ReLU(inplace=True),
20                 nn.Conv2d(192, 192, kernel_size=1, stride=1, padding=0),
21                 nn.ReLU(inplace=True),
22                 nn.Conv2d(192, 192, kernel_size=1, stride=1, padding=0),
23                 nn.ReLU(inplace=True),
24                 nn.AvgPool2d(kernel_size=3, stride=2, padding=1),
25                 nn.Dropout(0.5),
26 
27                 nn.Conv2d(192, 192, kernel_size=3, stride=1, padding=1),
28                 nn.ReLU(inplace=True),
29                 nn.Conv2d(192, 192, kernel_size=1, stride=1, padding=0),
30                 nn.ReLU(inplace=True),
31                 nn.Conv2d(192,  10, kernel_size=1, stride=1, padding=0),
32                 nn.ReLU(inplace=True),
33                 nn.AvgPool2d(kernel_size=8, stride=1, padding=0),
34 
35                 )
36 
37     def forward(self, x):
38         x = self.classifier(x)
39         
40         #将得到的数据平展
41         x = x.view(x.size(0), 10)
42         return x
View Code

classfyNet_main.py

  1 import torch
  2 from torch.utils.data import DataLoader
  3 from torch import nn, optim
  4 from torchvision import datasets, transforms
  5 from torchvision.transforms.functional import InterpolationMode
  6 
  7 from matplotlib import pyplot as plt
  8 
  9 
 10 import time
 11 
 12 from Lenet5 import Lenet5_new
 13 from Resnet18 import ResNet18,ResNet18_new
 14 from AlexNet import AlexNet
 15 from Vgg16 import VGGNet16
 16 from Densenet import DenseNet121, DenseNet169, DenseNet201, DenseNet264
 17 
 18 from NIN import NIN_Net
 19 from GoogleNet import GoogLeNet
 20 
 21 def main():
 22     
 23     print("Load datasets...")
 24     
 25     # transforms.RandomHorizontalFlip(p=0.5)---以0.5的概率对图片做水平横向翻转
 26     # transforms.ToTensor()---shape从(H,W,C)->(C,H,W), 每个像素点从(0-255)映射到(0-1):直接除以255
 27     # transforms.Normalize---先将输入归一化到(0,1),像素点通过"(x-mean)/std",将每个元素分布到(-1,1)
 28     transform_train = transforms.Compose([
 29                         transforms.Resize((224, 224), interpolation=InterpolationMode.BICUBIC),
 30                         # transforms.RandomCrop(32, padding=4),  # 先四周填充0,在吧图像随机裁剪成32*32
 31                         transforms.RandomHorizontalFlip(p=0.5),
 32                         transforms.ToTensor(),
 33                         transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
 34                     ])
 35 
 36     transform_test = transforms.Compose([
 37                         transforms.Resize((224, 224), interpolation=InterpolationMode.BICUBIC),
 38                         # transforms.RandomCrop(32, padding=4),  # 先四周填充0,在吧图像随机裁剪成32*32
 39                         transforms.ToTensor(),
 40                         transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
 41                     ])
 42     
 43     # 内置函数下载数据集
 44     train_dataset = datasets.CIFAR10(root="./data/Cifar10/", train=True, 
 45                                      transform = transform_train,
 46                                      download=True)
 47     test_dataset = datasets.CIFAR10(root = "./data/Cifar10/", 
 48                                     train = False, 
 49                                     transform = transform_test,
 50                                     download=True)
 51     
 52     print(len(train_dataset), len(test_dataset))
 53     
 54     Batch_size = 64
 55     train_loader = DataLoader(train_dataset, batch_size=Batch_size,  shuffle = True, num_workers=4)
 56     test_loader = DataLoader(test_dataset, batch_size = Batch_size, shuffle = False, num_workers=4)
 57     
 58     # 设置CUDA
 59     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 60 
 61     # 初始化模型
 62     # 直接更换模型就行,其他无需操作
 63     # model = Lenet5_new().to(device)
 64     # model = ResNet18().to(device)
 65     # model = ResNet18_new().to(device)
 66     # model = VGGNet16().to(device)
 67     # model = DenseNet121().to(device)
 68     # model  = DenseNet169().to(device)
 69 
 70     # model = NIN_Net().to(device)
 71     
 72     model = GoogLeNet().to(device)
 73     
 74     # model = AlexNet(num_classes=10, init_weights=True).to(device)
 75     print(" GoogLeNet train...")
 76       
 77     # 构造损失函数和优化器
 78     criterion = nn.CrossEntropyLoss() # 多分类softmax构造损失
 79     # opt = optim.SGD(model.parameters(), lr=0.01, momentum=0.8, weight_decay=0.001)
 80     opt = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.0005)
 81     
 82     # 动态更新学习率 ------每隔step_size : lr = lr * gamma
 83     schedule = optim.lr_scheduler.StepLR(opt, step_size=10, gamma=0.6, last_epoch=-1)
 84     
 85     # 开始训练
 86     print("Start Train...")
 87 
 88     epochs = 100
 89    
 90     loss_list = []
 91     train_acc_list =[]
 92     test_acc_list = []
 93     epochs_list = []
 94     
 95     for epoch in range(0, epochs):
 96          
 97         start = time.time()
 98         
 99         model.train()
100         
101         running_loss = 0.0
102         batch_num = 0
103         
104         for i, (inputs, labels) in enumerate(train_loader):
105             
106             inputs, labels = inputs.to(device), labels.to(device)
107             
108             # 将数据送入模型训练
109             outputs = model(inputs)
110             # 计算损失
111             loss = criterion(outputs, labels).to(device)
112             
113             # 重置梯度
114             opt.zero_grad()
115             # 计算梯度,反向传播
116             loss.backward()
117             # 根据反向传播的梯度值优化更新参数
118             opt.step()
119             
120             # 100个batch的 loss 之和
121             running_loss += loss.item()
122             # loss_list.append(loss.item())
123             batch_num+=1
124             
125             
126         epochs_list.append(epoch)
127             
128         # 每一轮结束输出一下当前的学习率 lr
129         lr_1 = opt.param_groups[0]['lr']
130         print("learn_rate:%.15f" % lr_1)
131         schedule.step()
132         
133         end = time.time()
134         print('epoch = %d/100, batch_num = %d, loss = %.6f, time = %.3f' % (epoch+1, batch_num, running_loss/batch_num, end-start))
135         running_loss=0.0    
136         
137         # 每个epoch训练结束,都进行一次测试验证
138         model.eval()
139         train_correct = 0.0
140         train_total = 0
141 
142         test_correct = 0.0
143         test_total = 0
144         
145          # 训练模式不需要反向传播更新梯度
146         with torch.no_grad():
147             
148             # print("=======================train=======================")
149             for inputs, labels in train_loader:
150                 inputs, labels = inputs.to(device), labels.to(device)
151                 outputs = model(inputs)
152 
153                 pred = outputs.argmax(dim=1)  # 返回每一行中最大值元素索引
154                 train_total += inputs.size(0)
155                 train_correct += torch.eq(pred, labels).sum().item()
156           
157             
158             # print("=======================test=======================")
159             for inputs, labels in test_loader:
160                 inputs, labels = inputs.to(device), labels.to(device)
161                 outputs = model(inputs)
162 
163                 pred = outputs.argmax(dim=1)  # 返回每一行中最大值元素索引
164                 test_total += inputs.size(0)
165                 test_correct += torch.eq(pred, labels).sum().item()
166 
167             print("train_total = %d, Accuracy = %.5f %%,  test_total= %d, Accuracy = %.5f %%" %(train_total, 100 * train_correct / train_total, test_total, 100 * test_correct / test_total))    
168 
169             train_acc_list.append(100 * train_correct / train_total)
170             test_acc_list.append(100 * test_correct / test_total)
171 
172         # print("Accuracy of the network on the 10000 test images:%.5f %%" % (100 * test_correct / test_total))
173         # print("===============================================")
174 
175     fig = plt.figure(figsize=(4, 4))
176     
177     plt.plot(epochs_list, train_acc_list, label='train_acc_list')
178     plt.plot(epochs_list, test_acc_list, label='test_acc_list')
179     plt.legend()
180     plt.title("train_test_acc")
181     plt.savefig('GoogLeNet_acc_epoch_{:04d}.png'.format(epochs))
182     plt.close()
183     
184 if __name__ == "__main__":
185     
186     main()
View Code

 

 

 

 

posted @ 2023-01-12 13:58  赵家小伙儿  阅读(100)  评论(0)    收藏  举报