# 深度学习（二）之猫狗分类

## 任务目标

• 自定义卷积神经网络：$$87.26\%$$
• 使用resnet34做特征提取：$$93.6\%$$
• 使用resnet34和VGG16做特征提取：$$94.88\%$$

python：3.9.7

torch：1.11.0（使用resnet34和VGG16做特征提取使用的pytorch 版本是1.9.1）

## 数据集

root_dir = "./train"
import os
from PIL import Image
imgs_name = os.listdir(root_dir)

imgs_path = []
labels_data = []

for name in imgs_name:
if name[:3] == "dog":
label = 0
if name[:3] == "cat":
label = 1
img_path = os.path.join(root_dir,name)
imgs_path.append(img_path)
labels_data.append(label)


## 数据增强

# 对训练图片进行处理变换
my_transforms = transforms.Compose([
transforms.Resize(75),
transforms.RandomResizedCrop(64), #随机裁剪一个area然后再resize
transforms.RandomHorizontalFlip(), #随机水平翻转
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# 对验证集的图片进行处理变换
valid_transforms = transforms.Compose([
transforms.Resize((64,64)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


## 模型一：自定义网络

import torch.nn.functional as F
class MyNet(nn.Module):
def __init__(self):
super(MyNet,self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(3,32,kernel_size=3),
nn.ReLU(),
nn.BatchNorm2d(32),
nn.MaxPool2d(2,2),
nn.Dropout(0.25)
)
self.conv2 = nn.Sequential(
nn.Conv2d(32,64,kernel_size=3),
nn.ReLU(),
nn.BatchNorm2d(64),
nn.MaxPool2d(2,2),
nn.Dropout(0.25)
)

self.conv3 = nn.Sequential(
nn.Conv2d(64,128,kernel_size=3),
nn.ReLU(),
nn.BatchNorm2d(128),
nn.MaxPool2d(2,2),
nn.Dropout(0.25)
)

self.fc = nn.Sequential(
nn.Linear(128*6*6,256),
nn.Dropout(0.2),
nn.Linear(256,2),
)
def forward(self,x):
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
x = x.view(x.size(0),-1)
x = self.fc(x)
return F.log_softmax(x,dim=1)


## 模型二：使用resnet34做特征提取


# 使用Resnet特征
resnet = models.resnet34(pretrained=True)
modules = list(resnet.children())[:-2]      # delete the last fc layer.
res_feature = nn.Sequential(*modules).eval() # 训练时，不改变resnet参数

# 定义网络
class MyNet(nn.Module):
def __init__(self,resnet_feature):
super(MyNet,self).__init__()
self.resnet_feature=resnet_feature
self.fc = nn.Sequential(
nn.Linear(512*2*2,256),
nn.Dropout(0.25),
nn.Linear(256,2)
)
def forward(self,x):
x = self.resnet_feature(x)
x = x.view(x.size(0),-1)
x = self.fc(x)
return F.log_softmax(x,dim=1)


## 模型三：resnet34&vgg16做特征提取

# 使用VGG特征
model = models.vgg16(pretrained=True)
vgg_feature = model.features # 训练的时候忘记设置vgg模式为eval()，也就是说vgg的参数在训练的时候会发生改变

# 使用Resnet特征
resnet = models.resnet34(pretrained=True)
modules = list(resnet.children())[:-2]      # delete the last fc layer.
res_feature = nn.Sequential(*modules).eval()

import torch.nn.functional as F
class MyNet(nn.Module):
def __init__(self,resnet_feature,vgg_feature):
super(MyNet,self).__init__()
self.resnet_feature=resnet_feature
self.vgg_feature = vgg_feature

self.fc = nn.Sequential(
nn.Linear(1024*2*2,256),
nn.Dropout(0.25),
nn.Linear(256,2)
)
def forward(self,x):
x1 = self.resnet_feature(x)
x2 = self.vgg_feature(x)
# 将特征融合在一起
x = torch.cat((x1,x2),1)
x = x.view(x.size(0),-1)
x = self.fc(x)
return F.log_softmax(x,dim=1)


## trick

1. 在训练时，可以动态的改变学习率，使用pytorch的lr_scheduler在训练的过程中动态修改学习率。

2. 对图像进行数据增强。

3. 找一台好电脑进行训练，别用小水管。

## 参考

1. kaggle猫狗数据集：Dogs vs. Cats | Kaggle
2. netron app：Netron
3. pytorch optim：torch.optim — PyTorch 1.11.0 documentation
posted @ 2022-03-20 22:04  渣渣辉啊  阅读(6446)  评论(4编辑  收藏  举报