身份证信息识别

python依赖

paddleocr==2.5.0.3
paddlepaddle==2.3.1

python示例

#!/user/bin/env python
# coding=utf-8
import logging
import re

from paddleocr import PaddleOCR


class ID:
    ocr_cls = PaddleOCR(use_angle_cls=True, use_gpu=False, lang="ch")

    @classmethod
    def parse_back(cls, img):
        issued = ""
        validity = ""
        ocr_result = cls.ocr_cls.ocr(img, cls=True)
        for i, x in enumerate(ocr_result):
            info = x[-1][0]
            try:
                if info.startswith("签发机关"):
                    if info == "签发机关":
                        issued = ocr_result[i + 1][-1][0].strip()
                    else:
                        issued = info[4:]
                elif info.startswith("有效期限"):
                    if info == "有效期限":
                        validity = ocr_result[i + 1][-1][0].strip()
                    else:
                        validity = info[3:]
            except Exception as e:
                logging.error(f'error,e={e.args}')

        return {"issued": issued,
                "validity": validity}

    @classmethod
    def parse_front(cls, img):
        name = ""
        sex = ""
        nation = ""
        birthday = ""
        address = ""
        id_no = ""
        ocr_result = cls.ocr_cls.ocr(img, cls=True)
        for i, x in enumerate(ocr_result):
            info = x[-1][0]
            try:
                if info.startswith("姓"):
                    if info == "姓" and i < len(ocr_result) - 1:
                        if ocr_result[i + 1][-1][0].strip() == "名":
                            name = ocr_result[i + 2][-1][0].strip()
                        elif ocr_result[i + 1][-1][0].strip().startswith("名"):
                            name = ocr_result[i + 1][-1][0].strip()
                    elif info == "姓名" and i < len(ocr_result) - 1:
                        name = ocr_result[i + 1][-1][0].strip()
                    elif info.startswith("姓名"):
                        name = info[2:]
                elif info.startswith("性"):
                    if info == "性" and i < len(ocr_result) - 1:
                        if ocr_result[i + 1][-1][0].strip() == "别":
                            sex = ocr_result[i + 2][-1][0].strip()[:1]
                        elif ocr_result[i + 1][-1][0].strip().startswith("别"):
                            sex = ocr_result[i + 1][-1][0].strip()[1:2]
                    elif info == "性别" and i < len(ocr_result) - 1:
                        sex = ocr_result[i + 1][-1][0].strip()[:1]
                    elif info.startswith("性别"):
                        sex = info[2:3]
                    if info.find("民") != -1:
                        if info.find("民族") != -1:
                            if info.endswith("民族"):
                                nation = ocr_result[i + 1][-1][0].strip()
                            else:
                                nation = info[info.index("民族") + 2:].strip()
                        else:
                            if ocr_result[i + 1][-1][0].strip().startswith("族"):
                                if ocr_result[i + 1][-1][0].strip() == "族":
                                    nation = ocr_result[i + 2][-1][0].strip()
                                else:
                                    nation = ocr_result[i + 1][-1][0].strip()[1:]
                elif re.search(r"(\d{4}年\d{1,2}月\d{1,2}日)", info):
                    search_obj = re.search(r"(\d{4}年\d{1,2}月\d{1,2}日)", info)
                    birthday = search_obj.groups()[0]
                elif info.startswith("住"):
                    if info == "住":
                        if ocr_result[i + 1][-1][0].strip().startswith("址"):
                            if ocr_result[i + 1][-1][0].strip() == "址":
                                address = ocr_result[i + 2][-1][0].strip()
                            else:
                                address = ocr_result[i + 1][-1][0].strip()[1:]
                    if info == "住址":
                        address = ocr_result[i + 1][-1][0].strip()
                    else:
                        address = info[2:].strip()
                elif info.startswith("公民身份号码"):
                    if info == "公民身份号码":
                        id_no = ocr_result[i + 1][-1][0].strip()
                    else:
                        id_no = info[6:]

            except Exception as e:
                logging.error(f'error,e={e.args}')

        return {"name": name, "sex": sex, "nation": nation,
                "birthday": birthday, "address": address, "id_no": id_no}


font_img_path = "./6.jpg"
id_font = ID.parse_front(img=font_img_path)
print(id_font)
back_img_path = "./7.jpg"
id_back = ID.parse_back(back_img_path)
print(id_back)

posted @ 2022-08-13 22:42 NAVYSUMMER 阅读(210) 评论(0) 收藏举报

刷新页面返回顶部

NAVYSUMMER

身份证信息识别

公告