代码

import cv2
import numpy as np
from paddleocr import PaddleOCR, draw_ocr

class TextRecognitionSystem:
def init(self, lang='ch'):
"""初始化OCR模型"""
# use_angle_cls=True:启用文本方向分类(解决倾斜文本)
# lang='ch':支持中英混合,可选'en'(英文)、'japan'(日文)等
self.ocr = PaddleOCR(
use_angle_cls=True,
lang=lang,
use_gpu=False # 若有GPU可设为True
)

def preprocess_image(self, image_path):
    """图像预处理:增强对比度、去噪"""
    # 读取图像
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError(f"无法读取图像:{image_path}")

    # 灰度化
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # 自适应直方图均衡化(增强对比度)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    enhanced = clahe.apply(gray)

    # 高斯去噪
    denoised = cv2.GaussianBlur(enhanced, (3, 3), 0)

    return image, denoised  # 返回原图(用于可视化)和预处理图(用于识别)

def detect_and_recognize(self, image_path):
    """文本检测+识别主流程"""
    # 图像预处理
    raw_image, processed_image = self.preprocess_image(image_path)

    # OCR识别(PaddleOCR会自动处理检测+识别)
    result = self.ocr.ocr(processed_image, cls=True)

    # 解析结果
    text_boxes = []  # 文本框坐标
    text_contents = []  # 识别文本
    confidences = []  # 置信度

    if result[0] is not None:
        for line in result[0]:
            box = line[0]  # 文本框四边形坐标 [[x1,y1],[x2,y2],[x3,y3],[x4,y4]]
            text = line[1][0]  # 识别出的文本
            confidence = line[1][1]  # 置信度

            text_boxes.append(box)
            text_contents.append(text)
            confidences.append(confidence)

    return raw_image, text_boxes, text_contents, confidences

def visualize_result(self, image, boxes, texts, save_path='ocr_result.jpg'):
    """可视化识别结果(绘制文本框和文字)"""
    # 转换图像格式(BGR→RGB)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # 绘制文本框和文字
    result_image = draw_ocr(
        image_rgb,
        boxes,
        texts,
        font_path='simfang.ttf'  # 需提供中文字体文件,若无则注释此行
    )

    # 保存结果
    cv2.imwrite(save_path, cv2.cvtColor(result_image, cv2.COLOR_RGB2BGR))
    print(f"识别结果已保存至:{save_path}")

    return result_image

-------------------------- 测试运行 --------------------------

if name == "main":
# 初始化识别系统
ocr_system = TextRecognitionSystem(lang='ch')

# 待识别的图像路径(替换为你的图像路径)
image_path = "test.jpg"

try:
    # 执行识别
    raw_image, boxes, texts, confidences = ocr_system.detect_and_recognize(image_path)

    # 打印识别结果
    print("===== 文字识别结果 =====")
    for i, (text, conf) in enumerate(zip(texts, confidences)):
        print(f"文本{i + 1}:{text}(置信度:{conf:.2f})")

    # 可视化结果(可选)
    ocr_system.visualize_result(raw_image, boxes, texts)

except Exception as e:
    print(f"识别失败:{e}")
posted @ 2025-11-20 23:09  滴滴同学  阅读(0)  评论(0)    收藏  举报