04 图像分类器
你已经了解了如何定义神经网络,计算损失值和网络里权重的更新。
现在你也许会想应该怎么处理数据?
通常来说,当你处理图像,文本,语音或者视频数据时,你可以使用标准 python 包将数据加载成numpy 数组格式,然后将这个数组转换成 torch.Tensor
对于图像,可以用 Pillow,OpenCV
对于语音,可以用 scipy,librosa
对于文本,可以直接用 Python 或 Cython 基础数据加载模块,或者用NLTK和 SpaCy
特别是对于视觉,我们已经创建了一个叫做 totchvision 的包,该包含有支持加载类似Imagenet,CIFAR10,MNIST 等公共数据集的数据加载模块torchvision.datasets 和支持加载图像数据数据转换模块 torch.utils.data.DataLoader。
这提供了极大的便利,并且避免了编写“样板代码”。
对于本教程,我们将使用CIFAR10数据集,它包含十个类:‘airplane’,‘automobile’, ‘bird’, ‘cat’,‘deer’, ‘dog’, ‘frog’, ‘horse’, ‘ship’, ‘truck’。CIFAR-10 中的图像尺寸为33232,也就是RGB的3层颜色通道,每层通道内的尺寸为3232。

01 训练一个图像分类器
我们将按次序的做如下几步:
01使用torchvision加载并且归一化CIFAR10的训练和测试数据集
加载并归一化 CIFAR10 使用 torchvision ,用它来加载 CIFAR10 数据非常简
import matplotlib.pyplot as plt
import numpy as np
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data.dataloader import DataLoader
def imgshow(img):
img = img / 2 + 0.5 # 图片经过正则化处理了 反归一化操作
npimg = img.numpy()
plt.imshow(np.transpose(npimg, (1, 2, 0)))
plt.show()
# torchvision 数据集的输出是范围在[0,1]之间的 PILImage,我们将他们转换成归一化范围为[-1,1]之
# 间的张量 Tensors
# 定义一个转换器,先把数据加载成张量,然后对每个通道进行均值为0.5,方差为0.5的归一化
transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
]
)
# 加载数据集
trainset = torchvision.datasets.CIFAR10(root="./data", train=True, download=True, transform=transform)
# print(trainset) # 返回的是一个对象
# Dataset CIFAR10
# Number of datapoints: 50000
# Root location: ./data
# Split: Train
# StandardTransform
# Transform: Compose(
# ToTensor()
# Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
# )
# print(type(trainset)) # 返回的是一个对象<class 'torchvision.datasets.cifar.CIFAR10'>
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=2)
# print(type(trainloader)) # <class 'torch.utils.data.dataloader.DataLoader'>
testset = torchvision.datasets.CIFAR10(root="./data", train=False, download=True, transform=transform)
testloader = DataLoader(testset, batch_size=4, shuffle=False, num_workers=2)
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
if __name__ == '__main__':
dataiter = iter(trainloader)
images,labels = dataiter.next()
print(images.shape,labels)
imgshow(torchvision.utils.make_grid(images))
print(' '.join('%5s' % classes[labels[j]] for j in range(4)))
02定义一个卷积神经网络
# 定义神经网络
class MyNet(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16*5*5,120)
self.fc2 = nn.Linear(120,80)
self.fc3 = nn.Linear(80,10)
def forward(self,x):
x = F.relu(self.conv1(x))
x = self.pool(x)
x = F.relu(self.conv2(x))
x = self.pool(x)
x = x.view(-1,16*5*5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
03定义一个损失函数
criterion = nn.CrossEntropyLoss() # 分类交叉熵Cross - Entropy作损失函数
optimezer = optim.SGD(params=net.parameters(),lr=0.001,momentum=0.9) # 动量SGD做优化器
04在训练样本数据上训练网络
for epoch in range(1, 10):
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
datas, labels = data
optimezer.zero_grad()
outputs = net(datas)
loss = criterion(outputs, labels)
loss.backward()
optimezer.step()
running_loss += loss.item()
if i % 2000 == 1999: # print every 2000 mini-batches
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss / 2000))
running_loss = 0.
05在测试样本数据上测试网络
correct = 0
total = 0
with torch.no_grad():
for data in testloader:
images, labels = data
outputs = net(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy of the network on the 10000 test images: %d %%' % (
100 * correct / total))
所有代码汇总
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data.dataloader import DataLoader
# 下载器进行数据下载
def data_loaader():
# 数据格式转换
transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
]
)
# 训练即数据加载
trainset = torchvision.datasets.CIFAR10(root="./data", train=True, download=True, transform=transform)
# 将训练集数据放到加载器中
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=2)
testset = torchvision.datasets.CIFAR10(root="./data", train=False, download=True, transform=transform)
testloader = DataLoader(testset, batch_size=4, shuffle=False, num_workers=2)
return trainloader, testloader
# 定义神经网络
class MyNet(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 80)
self.fc3 = nn.Linear(80, 10)
def forward(self, x):
x = F.relu(self.conv1(x))
x = self.pool(x)
x = F.relu(self.conv2(x))
x = self.pool(x)
x = x.view(-1, 16 * 5 * 5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
#定义训练函数
def train_():
net = MyNet()
trainloader, testloader = data_loaader()
criterion = nn.CrossEntropyLoss() # 分类交叉熵Cross - Entropy作损失函数
optimezer = optim.SGD(params=net.parameters(), lr=0.001, momentum=0.9) # 动量SGD做优化器
for epoch in range(3):
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
datas, labels = data
optimezer.zero_grad()
outputs = net(datas)
loss = criterion(outputs, labels)
loss.backward()
optimezer.step()
running_loss += loss.item()
if i % 2000 == 1999: # print every 2000 mini-batches
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss / 2000))
running_loss = 0.
return net,testloader
# 定义测试函数
def test():
net,testloader = train_()
correct = 0
total = 0
with torch.no_grad():
for data in testloader:
images, labels = data
outputs = net(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy of the network on the 10000 test images: %d %%' % (
100 * correct / total))
if __name__ == '__main__':
test()

浙公网安备 33010602011771号