手写汉字识别
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
def classes_txt(root, out_path, num_class=None):
dirs = os.listdir(root)
if num_class is not None and len(dirs) > num_class:
end = num_class - 1
else:
end = len(dirs) - 1
dirs.sort()
with open(out_path, 'w') as f:
for dir in dirs[:end+1]:
files = os.listdir(os.path.join(root, dir))
for file in files:
f.write(os.path.join(root, dir, file) + '\n')
生成训练和测试集的路径文档
root_train = './data/train'
root_test = './data/test'
out_train = './train.txt'
out_test = './test.txt'
classes_txt(root_train, out_train, 100)
classes_txt(root_test, out_test, 100)
class MyDataset(Dataset):
def init(self, txt_path, num_class, transforms=None):
self.images = []
self.labels = []
with open(txt_path, 'r') as f:
for line in f:
line = line.strip('\n')
img_path, label = line.split('\')[-2], int(line.split('\')[-1].split('.')[0])
self.images.append(line)
self.labels.append(label)
self.transforms = transforms
def __getitem__(self, index):
image = Image.open(self.images[index]).convert('RGB')
label = self.labels[index]
if self.transforms is not None:
image = self.transforms(image)
return image, label
def __len__(self):
return len(self.labels)
图像预处理(调整大小、灰度化、转张量)
transform = transforms.Compose([
transforms.Resize((64, 64)),
transforms.Grayscale(),
transforms.ToTensor()
])
加载训练集和测试集
train_set = MyDataset('./train.txt', 100, transform)
test_set = MyDataset('./test.txt', 100, transform)
train_loader = DataLoader(train_set, batch_size=50, shuffle=True)
test_loader = DataLoader(test_set, batch_size=50, shuffle=True)
class MYNET(nn.Module):
def init(self):
super(MYNET, self).init()
self.conv1 = nn.Conv2d(1, 6, 3) # 输入通道1,输出通道6,卷积核3x3
self.conv2 = nn.Conv2d(6, 16, 5) # 输入通道6,输出通道16,卷积核5x5
self.pool = nn.MaxPool2d(2, 2) # 最大池化,核2x2,步长2
self.fc1 = nn.Linear(2704, 512) # 全连接层,输入2704,输出512
self.fc2 = nn.Linear(512, 84) # 全连接层,输入512,输出84
self.fc3 = nn.Linear(84, 100) # 全连接层,输入84,输出100(类别数)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x))) # 卷积+激活+池化
x = self.pool(F.relu(self.conv2(x))) # 卷积+激活+池化
x = x.view(-1, 2704) # 展平
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
model = MYNET()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
summary(model, (1, 64, 64)) # 查看网络结构
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
EPOCH = 3
for epoch in range(EPOCH):
for step, (x, y) in enumerate(train_loader):
x, y = x.to(device), y.to(device)
output = model(x)
loss = loss_func(output, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
# 每50步进行性能评估
if step % 50 == 0:
test_output = model(test_set[:][0].to(device))
pred_y = torch.max(test_output, 1)[1].data.squeeze()
accuracy = (pred_y == test_set[:][1].to(device)).sum().item() / test_set[:][1].size(0)
print(f'迭代次数: {epoch} | 训练损失: {loss.data:.4f} | 测试准确率: {accuracy}')
torch.save(model.state_dict(), './tmp/model.pkl')
加载模型
model = MYNET()
model.load_state_dict(torch.load('./tmp/model.pkl'))
model.eval()
测试单张图像
transform = transforms.Compose([
transforms.Resize((64, 64)),
transforms.Grayscale(),
transforms.ToTensor()
])
img = Image.open('./data/test/00008/816.png') # 替换为你的测试图像路径
img = transform(img)
img = img.view(1, 1, 64, 64)
output = model(img)
_, prediction = torch.max(output, 1)
print(f'预测类别: {prediction.numpy()[0]}')

浙公网安备 33010602011771号