代码
import cv2
import numpy as np
from paddleocr import PaddleOCR, draw_ocr
class TextRecognitionSystem:
def init(self, lang='ch'):
"""初始化OCR模型"""
# use_angle_cls=True:启用文本方向分类(解决倾斜文本)
# lang='ch':支持中英混合,可选'en'(英文)、'japan'(日文)等
self.ocr = PaddleOCR(
use_angle_cls=True,
lang=lang,
use_gpu=False # 若有GPU可设为True
)
def preprocess_image(self, image_path):
"""图像预处理:增强对比度、去噪"""
# 读取图像
image = cv2.imread(image_path)
if image is None:
raise ValueError(f"无法读取图像:{image_path}")
# 灰度化
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# 自适应直方图均衡化(增强对比度)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
enhanced = clahe.apply(gray)
# 高斯去噪
denoised = cv2.GaussianBlur(enhanced, (3, 3), 0)
return image, denoised # 返回原图(用于可视化)和预处理图(用于识别)
def detect_and_recognize(self, image_path):
"""文本检测+识别主流程"""
# 图像预处理
raw_image, processed_image = self.preprocess_image(image_path)
# OCR识别(PaddleOCR会自动处理检测+识别)
result = self.ocr.ocr(processed_image, cls=True)
# 解析结果
text_boxes = [] # 文本框坐标
text_contents = [] # 识别文本
confidences = [] # 置信度
if result[0] is not None:
for line in result[0]:
box = line[0] # 文本框四边形坐标 [[x1,y1],[x2,y2],[x3,y3],[x4,y4]]
text = line[1][0] # 识别出的文本
confidence = line[1][1] # 置信度
text_boxes.append(box)
text_contents.append(text)
confidences.append(confidence)
return raw_image, text_boxes, text_contents, confidences
def visualize_result(self, image, boxes, texts, save_path='ocr_result.jpg'):
"""可视化识别结果(绘制文本框和文字)"""
# 转换图像格式(BGR→RGB)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# 绘制文本框和文字
result_image = draw_ocr(
image_rgb,
boxes,
texts,
font_path='simfang.ttf' # 需提供中文字体文件,若无则注释此行
)
# 保存结果
cv2.imwrite(save_path, cv2.cvtColor(result_image, cv2.COLOR_RGB2BGR))
print(f"识别结果已保存至:{save_path}")
return result_image
-------------------------- 测试运行 --------------------------
if name == "main":
# 初始化识别系统
ocr_system = TextRecognitionSystem(lang='ch')
# 待识别的图像路径(替换为你的图像路径)
image_path = "test.jpg"
try:
# 执行识别
raw_image, boxes, texts, confidences = ocr_system.detect_and_recognize(image_path)
# 打印识别结果
print("===== 文字识别结果 =====")
for i, (text, conf) in enumerate(zip(texts, confidences)):
print(f"文本{i + 1}:{text}(置信度:{conf:.2f})")
# 可视化结果(可选)
ocr_system.visualize_result(raw_image, boxes, texts)
except Exception as e:
print(f"识别失败:{e}")

浙公网安备 33010602011771号