文字识别

`import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from PIL import Image, ImageDraw, ImageFont
import os

--------------------------

1. 生成虚拟汉字图像（修复textsize问题）

--------------------------

def get_font_path():
"""自动获取系统中的中文字体路径（适配Windows/macOS）"""
try:
if os.name == 'nt': # Windows
font_paths = [
"C:/Windows/Fonts/simhei.ttf", # 黑体
"C:/Windows/Fonts/simsun.ttc", # 宋体
"C:/Windows/Fonts/microsoftyahei.ttf"
]
else: # macOS
font_paths = [
"/Library/Fonts/Songti.ttc",
"/Library/Fonts/Heiti.ttc",
"/System/Library/Fonts/PingFang.ttc"
]
for path in font_paths:
if os.path.exists(path):
return path
raise Exception("未找到中文字体，请手动指定字体路径")
except:
return "C:/Windows/Fonts/simhei.ttf" # 手动指定路径（根据实际修改）

def generate_char_image(char, font_path, size=(28, 28)):
"""生成单个汉字的28×28灰度图像（兼容新版PIL）"""
img = Image.new("L", size, color=255) # 白色背景
draw = ImageDraw.Draw(img)
try:
font = ImageFont.truetype(font_path, size=20)
except:
font = ImageFont.load_default() # 加载默认字体

# 修复：用getbbox()替代textsize()获取文字尺寸
bbox = draw.textbbox((0, 0), char, font=font)  # 返回(x0, y0, x1, y1)
char_width = bbox[2] - bbox[0]  # 文字宽度
char_height = bbox[3] - bbox[1]  # 文字高度

# 居中绘制
x = (size[0] - char_width) // 2
y = (size[1] - char_height) // 2
draw.text((x, y), char, fill=0, font=font)  # 黑色文字

return np.array(img)

生成训练数据（10个汉字：一到十）

chars = ["一", "二", "三", "四", "五", "六", "七", "八", "九", "十"]
num_samples_per_char = 1000
font_path = get_font_path()

images = []
labels = []
for char in chars:
for _ in range(num_samples_per_char):
img = generate_char_image(char, font_path)
# 加轻微噪声
noise = np.random.randint(0, 2, size=img.shape) * 255
img = np.clip(img - noise, 0, 255)
images.append(img)
labels.append(char)

images = np.array(images, dtype=np.float32)
labels = np.array(labels)

--------------------------

2. 数据预处理

--------------------------

images = images / 255.0 # 归一化
le = LabelEncoder()
labels_encoded = le.fit_transform(labels)

划分数据集并转Tensor

x_train, x_test, y_train, y_test = train_test_split(
images, labels_encoded, test_size=0.2, random_state=42
)
x_train = torch.tensor(x_train).unsqueeze(1) # 增加通道维度
x_test = torch.tensor(x_test).unsqueeze(1)
y_train = torch.tensor(y_train, dtype=torch.long)
y_test = torch.tensor(y_test, dtype=torch.long)

数据加载器

train_loader = DataLoader(TensorDataset(x_train, y_train), batch_size=32, shuffle=True)
test_loader = DataLoader(TensorDataset(x_test, y_test), batch_size=32, shuffle=False)

--------------------------

3. 模型定义

--------------------------

class SimpleCNN(nn.Module):
def init(self, num_classes=10):
super().init()
self.conv_layers = nn.Sequential(
nn.Conv2d(1, 16, 3, padding=1),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.ReLU(),
nn.MaxPool2d(2, 2)
)
self.fc_layers = nn.Sequential(
nn.Linear(32 * 7 * 7, 128),
nn.ReLU(),
nn.Linear(128, num_classes)
)

def forward(self, x):
    x = self.conv_layers(x)
    x = x.view(-1, 32 * 7 * 7)
    x = self.fc_layers(x)
    return x

model = SimpleCNN(num_classes=len(chars))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

--------------------------

4. 训练模型

--------------------------

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(10):
model.train()
train_loss = 0.0
for batch_x, batch_y in train_loader:
batch_x, batch_y = batch_x.to(device), batch_y.to(device)
optimizer.zero_grad()
outputs = model(batch_x)
loss = criterion(outputs, batch_y)
loss.backward()
optimizer.step()
train_loss += loss.item() * batch_x.size(0)

# 测试准确率
model.eval()
correct, total = 0, 0
with torch.no_grad():
    for batch_x, batch_y in test_loader:
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)
        outputs = model(batch_x)
        _, predicted = torch.max(outputs.data, 1)
        total += batch_y.size(0)
        correct += (predicted == batch_y).sum().item()

print(f"Epoch {epoch+1}/10")
print(f"训练损失: {train_loss/len(train_loader.dataset):.4f}")
print(f"测试准确率: {100 * correct / total:.2f}%\n")

--------------------------

5. 预测新汉字

--------------------------

def predict_new_char(char):
img = generate_char_image(char, font_path)
img = img / 255.0
img_tensor = torch.tensor(img, dtype=torch.float32).unsqueeze(0).unsqueeze(0)
img_tensor = img_tensor.to(device)

model.eval()
with torch.no_grad():
    output = model(img_tensor)
    pred_idx = torch.argmax(output).item()
return le.inverse_transform([pred_idx])[0]

测试预测

test_chars = ["三", "七", "五"]
for char in test_chars:
pred = predict_new_char(char)
print(f"输入汉字: {char}, 模型预测: {pred}")`

posted @ 2025-11-11 19:16 黎孜阅读(0) 评论(0) 收藏举报

刷新页面返回顶部

gmzz-333

文字识别

--------------------------

1. 生成虚拟汉字图像（修复textsize问题）

--------------------------

生成训练数据（10个汉字：一到十）

--------------------------

2. 数据预处理

--------------------------

划分数据集并转Tensor

数据加载器

--------------------------

3. 模型定义

--------------------------

--------------------------

4. 训练模型

--------------------------

--------------------------

5. 预测新汉字

--------------------------

测试预测

公告