文字识别
`import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from PIL import Image, ImageDraw, ImageFont
import os
--------------------------
1. 生成虚拟汉字图像(修复textsize问题)
--------------------------
def get_font_path():
"""自动获取系统中的中文字体路径(适配Windows/macOS)"""
try:
if os.name == 'nt': # Windows
font_paths = [
"C:/Windows/Fonts/simhei.ttf", # 黑体
"C:/Windows/Fonts/simsun.ttc", # 宋体
"C:/Windows/Fonts/microsoftyahei.ttf"
]
else: # macOS
font_paths = [
"/Library/Fonts/Songti.ttc",
"/Library/Fonts/Heiti.ttc",
"/System/Library/Fonts/PingFang.ttc"
]
for path in font_paths:
if os.path.exists(path):
return path
raise Exception("未找到中文字体,请手动指定字体路径")
except:
return "C:/Windows/Fonts/simhei.ttf" # 手动指定路径(根据实际修改)
def generate_char_image(char, font_path, size=(28, 28)):
"""生成单个汉字的28×28灰度图像(兼容新版PIL)"""
img = Image.new("L", size, color=255) # 白色背景
draw = ImageDraw.Draw(img)
try:
font = ImageFont.truetype(font_path, size=20)
except:
font = ImageFont.load_default() # 加载默认字体
# 修复:用getbbox()替代textsize()获取文字尺寸
bbox = draw.textbbox((0, 0), char, font=font) # 返回(x0, y0, x1, y1)
char_width = bbox[2] - bbox[0] # 文字宽度
char_height = bbox[3] - bbox[1] # 文字高度
# 居中绘制
x = (size[0] - char_width) // 2
y = (size[1] - char_height) // 2
draw.text((x, y), char, fill=0, font=font) # 黑色文字
return np.array(img)
生成训练数据(10个汉字:一到十)
chars = ["一", "二", "三", "四", "五", "六", "七", "八", "九", "十"]
num_samples_per_char = 1000
font_path = get_font_path()
images = []
labels = []
for char in chars:
for _ in range(num_samples_per_char):
img = generate_char_image(char, font_path)
# 加轻微噪声
noise = np.random.randint(0, 2, size=img.shape) * 255
img = np.clip(img - noise, 0, 255)
images.append(img)
labels.append(char)
images = np.array(images, dtype=np.float32)
labels = np.array(labels)
--------------------------
2. 数据预处理
--------------------------
images = images / 255.0 # 归一化
le = LabelEncoder()
labels_encoded = le.fit_transform(labels)
划分数据集并转Tensor
x_train, x_test, y_train, y_test = train_test_split(
images, labels_encoded, test_size=0.2, random_state=42
)
x_train = torch.tensor(x_train).unsqueeze(1) # 增加通道维度
x_test = torch.tensor(x_test).unsqueeze(1)
y_train = torch.tensor(y_train, dtype=torch.long)
y_test = torch.tensor(y_test, dtype=torch.long)
数据加载器
train_loader = DataLoader(TensorDataset(x_train, y_train), batch_size=32, shuffle=True)
test_loader = DataLoader(TensorDataset(x_test, y_test), batch_size=32, shuffle=False)
--------------------------
3. 模型定义
--------------------------
class SimpleCNN(nn.Module):
def init(self, num_classes=10):
super().init()
self.conv_layers = nn.Sequential(
nn.Conv2d(1, 16, 3, padding=1),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.ReLU(),
nn.MaxPool2d(2, 2)
)
self.fc_layers = nn.Sequential(
nn.Linear(32 * 7 * 7, 128),
nn.ReLU(),
nn.Linear(128, num_classes)
)
def forward(self, x):
x = self.conv_layers(x)
x = x.view(-1, 32 * 7 * 7)
x = self.fc_layers(x)
return x
model = SimpleCNN(num_classes=len(chars))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
--------------------------
4. 训练模型
--------------------------
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
for epoch in range(10):
model.train()
train_loss = 0.0
for batch_x, batch_y in train_loader:
batch_x, batch_y = batch_x.to(device), batch_y.to(device)
optimizer.zero_grad()
outputs = model(batch_x)
loss = criterion(outputs, batch_y)
loss.backward()
optimizer.step()
train_loss += loss.item() * batch_x.size(0)
# 测试准确率
model.eval()
correct, total = 0, 0
with torch.no_grad():
for batch_x, batch_y in test_loader:
batch_x, batch_y = batch_x.to(device), batch_y.to(device)
outputs = model(batch_x)
_, predicted = torch.max(outputs.data, 1)
total += batch_y.size(0)
correct += (predicted == batch_y).sum().item()
print(f"Epoch {epoch+1}/10")
print(f"训练损失: {train_loss/len(train_loader.dataset):.4f}")
print(f"测试准确率: {100 * correct / total:.2f}%\n")
--------------------------
5. 预测新汉字
--------------------------
def predict_new_char(char):
img = generate_char_image(char, font_path)
img = img / 255.0
img_tensor = torch.tensor(img, dtype=torch.float32).unsqueeze(0).unsqueeze(0)
img_tensor = img_tensor.to(device)
model.eval()
with torch.no_grad():
output = model(img_tensor)
pred_idx = torch.argmax(output).item()
return le.inverse_transform([pred_idx])[0]
测试预测
test_chars = ["三", "七", "五"]
for char in test_chars:
pred = predict_new_char(char)
print(f"输入汉字: {char}, 模型预测: {pred}")`

浙公网安备 33010602011771号