代码

import cv2
import numpy as np
from paddleocr import PaddleOCR, draw_ocr

class TextRecognitionSystem:
def init(self, lang='ch'):
"""初始化OCR模型"""
# use_angle_cls=True：启用文本方向分类（解决倾斜文本）
# lang='ch'：支持中英混合，可选'en'（英文）、'japan'（日文）等
self.ocr = PaddleOCR(
use_angle_cls=True,
lang=lang,
use_gpu=False # 若有GPU可设为True
)

def preprocess_image(self, image_path):
    """图像预处理：增强对比度、去噪"""
    # 读取图像
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError(f"无法读取图像：{image_path}")

    # 灰度化
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # 自适应直方图均衡化（增强对比度）
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    enhanced = clahe.apply(gray)

    # 高斯去噪
    denoised = cv2.GaussianBlur(enhanced, (3, 3), 0)

    return image, denoised  # 返回原图（用于可视化）和预处理图（用于识别）

def detect_and_recognize(self, image_path):
    """文本检测+识别主流程"""
    # 图像预处理
    raw_image, processed_image = self.preprocess_image(image_path)

    # OCR识别（PaddleOCR会自动处理检测+识别）
    result = self.ocr.ocr(processed_image, cls=True)

    # 解析结果
    text_boxes = []  # 文本框坐标
    text_contents = []  # 识别文本
    confidences = []  # 置信度

    if result[0] is not None:
        for line in result[0]:
            box = line[0]  # 文本框四边形坐标 [[x1,y1],[x2,y2],[x3,y3],[x4,y4]]
            text = line[1][0]  # 识别出的文本
            confidence = line[1][1]  # 置信度

            text_boxes.append(box)
            text_contents.append(text)
            confidences.append(confidence)

    return raw_image, text_boxes, text_contents, confidences

def visualize_result(self, image, boxes, texts, save_path='ocr_result.jpg'):
    """可视化识别结果（绘制文本框和文字）"""
    # 转换图像格式（BGR→RGB）
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # 绘制文本框和文字
    result_image = draw_ocr(
        image_rgb,
        boxes,
        texts,
        font_path='simfang.ttf'  # 需提供中文字体文件，若无则注释此行
    )

    # 保存结果
    cv2.imwrite(save_path, cv2.cvtColor(result_image, cv2.COLOR_RGB2BGR))
    print(f"识别结果已保存至：{save_path}")

    return result_image

-------------------------- 测试运行 --------------------------

if name == "main":
# 初始化识别系统
ocr_system = TextRecognitionSystem(lang='ch')

# 待识别的图像路径（替换为你的图像路径）
image_path = "test.jpg"

try:
    # 执行识别
    raw_image, boxes, texts, confidences = ocr_system.detect_and_recognize(image_path)

    # 打印识别结果
    print("===== 文字识别结果 =====")
    for i, (text, conf) in enumerate(zip(texts, confidences)):
        print(f"文本{i + 1}：{text}（置信度：{conf:.2f}）")

    # 可视化结果（可选）
    ocr_system.visualize_result(raw_image, boxes, texts)

except Exception as e:
    print(f"识别失败：{e}")

posted @ 2025-11-20 23:09 滴滴同学阅读(0) 评论(0) 收藏举报

刷新页面返回顶部

pyt051022

代码

-------------------------- 测试运行 --------------------------

公告