身份证信息识别
python依赖
paddleocr==2.5.0.3 paddlepaddle==2.3.1
python示例
#!/user/bin/env python
# coding=utf-8
import logging
import re
from paddleocr import PaddleOCR
class ID:
ocr_cls = PaddleOCR(use_angle_cls=True, use_gpu=False, lang="ch")
@classmethod
def parse_back(cls, img):
issued = ""
validity = ""
ocr_result = cls.ocr_cls.ocr(img, cls=True)
for i, x in enumerate(ocr_result):
info = x[-1][0]
try:
if info.startswith("签发机关"):
if info == "签发机关":
issued = ocr_result[i + 1][-1][0].strip()
else:
issued = info[4:]
elif info.startswith("有效期限"):
if info == "有效期限":
validity = ocr_result[i + 1][-1][0].strip()
else:
validity = info[3:]
except Exception as e:
logging.error(f'error,e={e.args}')
return {"issued": issued,
"validity": validity}
@classmethod
def parse_front(cls, img):
name = ""
sex = ""
nation = ""
birthday = ""
address = ""
id_no = ""
ocr_result = cls.ocr_cls.ocr(img, cls=True)
for i, x in enumerate(ocr_result):
info = x[-1][0]
try:
if info.startswith("姓"):
if info == "姓" and i < len(ocr_result) - 1:
if ocr_result[i + 1][-1][0].strip() == "名":
name = ocr_result[i + 2][-1][0].strip()
elif ocr_result[i + 1][-1][0].strip().startswith("名"):
name = ocr_result[i + 1][-1][0].strip()
elif info == "姓名" and i < len(ocr_result) - 1:
name = ocr_result[i + 1][-1][0].strip()
elif info.startswith("姓名"):
name = info[2:]
elif info.startswith("性"):
if info == "性" and i < len(ocr_result) - 1:
if ocr_result[i + 1][-1][0].strip() == "别":
sex = ocr_result[i + 2][-1][0].strip()[:1]
elif ocr_result[i + 1][-1][0].strip().startswith("别"):
sex = ocr_result[i + 1][-1][0].strip()[1:2]
elif info == "性别" and i < len(ocr_result) - 1:
sex = ocr_result[i + 1][-1][0].strip()[:1]
elif info.startswith("性别"):
sex = info[2:3]
if info.find("民") != -1:
if info.find("民族") != -1:
if info.endswith("民族"):
nation = ocr_result[i + 1][-1][0].strip()
else:
nation = info[info.index("民族") + 2:].strip()
else:
if ocr_result[i + 1][-1][0].strip().startswith("族"):
if ocr_result[i + 1][-1][0].strip() == "族":
nation = ocr_result[i + 2][-1][0].strip()
else:
nation = ocr_result[i + 1][-1][0].strip()[1:]
elif re.search(r"(\d{4}年\d{1,2}月\d{1,2}日)", info):
search_obj = re.search(r"(\d{4}年\d{1,2}月\d{1,2}日)", info)
birthday = search_obj.groups()[0]
elif info.startswith("住"):
if info == "住":
if ocr_result[i + 1][-1][0].strip().startswith("址"):
if ocr_result[i + 1][-1][0].strip() == "址":
address = ocr_result[i + 2][-1][0].strip()
else:
address = ocr_result[i + 1][-1][0].strip()[1:]
if info == "住址":
address = ocr_result[i + 1][-1][0].strip()
else:
address = info[2:].strip()
elif info.startswith("公民身份号码"):
if info == "公民身份号码":
id_no = ocr_result[i + 1][-1][0].strip()
else:
id_no = info[6:]
except Exception as e:
logging.error(f'error,e={e.args}')
return {"name": name, "sex": sex, "nation": nation,
"birthday": birthday, "address": address, "id_no": id_no}
font_img_path = "./6.jpg"
id_font = ID.parse_front(img=font_img_path)
print(id_font)
back_img_path = "./7.jpg"
id_back = ID.parse_back(back_img_path)
print(id_back)

浙公网安备 33010602011771号