手写汉字识别

import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image

def classes_txt(root, out_path, num_class=None):
dirs = os.listdir(root)
if num_class is not None and len(dirs) > num_class:
end = num_class - 1
else:
end = len(dirs) - 1
dirs.sort()
with open(out_path, 'w') as f:
for dir in dirs[:end+1]:
files = os.listdir(os.path.join(root, dir))
for file in files:
f.write(os.path.join(root, dir, file) + '\n')

生成训练和测试集的路径文档

root_train = './data/train'
root_test = './data/test'
out_train = './train.txt'
out_test = './test.txt'
classes_txt(root_train, out_train, 100)
classes_txt(root_test, out_test, 100)

class MyDataset(Dataset):
def init(self, txt_path, num_class, transforms=None):
self.images = []
self.labels = []
with open(txt_path, 'r') as f:
for line in f:
line = line.strip('\n')
img_path, label = line.split('\')[-2], int(line.split('\')[-1].split('.')[0])
self.images.append(line)
self.labels.append(label)
self.transforms = transforms

def __getitem__(self, index):
    image = Image.open(self.images[index]).convert('RGB')
    label = self.labels[index]
    if self.transforms is not None:
        image = self.transforms(image)
    return image, label

def __len__(self):
    return len(self.labels)

图像预处理(调整大小、灰度化、转张量)

transform = transforms.Compose([
transforms.Resize((64, 64)),
transforms.Grayscale(),
transforms.ToTensor()
])

加载训练集和测试集

train_set = MyDataset('./train.txt', 100, transform)
test_set = MyDataset('./test.txt', 100, transform)

train_loader = DataLoader(train_set, batch_size=50, shuffle=True)
test_loader = DataLoader(test_set, batch_size=50, shuffle=True)

class MYNET(nn.Module):
def init(self):
super(MYNET, self).init()
self.conv1 = nn.Conv2d(1, 6, 3) # 输入通道1,输出通道6,卷积核3x3
self.conv2 = nn.Conv2d(6, 16, 5) # 输入通道6,输出通道16,卷积核5x5
self.pool = nn.MaxPool2d(2, 2) # 最大池化,核2x2,步长2
self.fc1 = nn.Linear(2704, 512) # 全连接层,输入2704,输出512
self.fc2 = nn.Linear(512, 84) # 全连接层,输入512,输出84
self.fc3 = nn.Linear(84, 100) # 全连接层,输入84,输出100(类别数)

def forward(self, x):
    x = self.pool(F.relu(self.conv1(x)))  # 卷积+激活+池化
    x = self.pool(F.relu(self.conv2(x)))  # 卷积+激活+池化
    x = x.view(-1, 2704)                  # 展平
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = self.fc3(x)
    return x

model = MYNET()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
summary(model, (1, 64, 64)) # 查看网络结构

loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

EPOCH = 3
for epoch in range(EPOCH):
for step, (x, y) in enumerate(train_loader):
x, y = x.to(device), y.to(device)
output = model(x)
loss = loss_func(output, y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    # 每50步进行性能评估
    if step % 50 == 0:
        test_output = model(test_set[:][0].to(device))
        pred_y = torch.max(test_output, 1)[1].data.squeeze()
        accuracy = (pred_y == test_set[:][1].to(device)).sum().item() / test_set[:][1].size(0)
        print(f'迭代次数: {epoch} | 训练损失: {loss.data:.4f} | 测试准确率: {accuracy}')

torch.save(model.state_dict(), './tmp/model.pkl')

加载模型

model = MYNET()
model.load_state_dict(torch.load('./tmp/model.pkl'))
model.eval()

测试单张图像

transform = transforms.Compose([
transforms.Resize((64, 64)),
transforms.Grayscale(),
transforms.ToTensor()
])
img = Image.open('./data/test/00008/816.png') # 替换为你的测试图像路径
img = transform(img)
img = img.view(1, 1, 64, 64)
output = model(img)
_, prediction = torch.max(output, 1)
print(f'预测类别: {prediction.numpy()[0]}')

posted @ 2025-11-13 22:06  kk/  阅读(3)  评论(0)    收藏  举报