OpenCV---xx2(OCR文本处理识别)
OCR文字扫描识别
流程
1. 图片处理 透视变换
2. 应用tesseract开源库识别
代码详细--1 图片处理部分
import cv2 as cv
import sys
import numpy as np
def trans_img(img, point):
# 轮廓坐标
sort_x = sorted(point, key=lambda x: x[0][0])
# print(sort_x)
left_point = sort_x[:2]
right_point = sort_x[2:]
# 左上到左下
left_point = sorted(left_point, key=lambda x: x[0][1])
# 右上到右下
right_point = sorted(right_point, key=lambda x: x[0][1])
points = []
points.append([left_point[0][0]])
points.append([left_point[1][0]])
points.append([right_point[0][0]])
points.append([right_point[1][0]])
points = np.array(points, dtype='float32')
# print(right_point)
# 得到了轮廓的长宽
w1 = np.sqrt((left_point[0][0][0] - right_point[0][0][0]) ** 2 + (left_point[0][0][1] - right_point[0][0][1]) ** 2)
w2 = np.sqrt((left_point[1][0][0] - right_point[1][0][0]) ** 2 + (left_point[1][0][1] - right_point[1][0][1]) ** 2)
width = max(w1, w2)
h1 = np.sqrt((left_point[0][0][0] - left_point[1][0][0]) ** 2 + (left_point[0][0][1] - left_point[1][0][1]) ** 2)
h2 = np.sqrt(
(right_point[0][0][0] - right_point[1][0][0]) ** 2 + (right_point[0][0][1] - right_point[1][0][1]) ** 2)
height = max(h1, h2)
# 变换后对应的坐标 将原图轮廓的坐标 通过投影拉直 变为规则的图像 img是原坐标存在的图像 长宽是原图的长宽 轮廓的四角通过M映射拉到dst
dst = np.array([[0, 0], [0, height - 1], [width - 1, 0], [width - 1, height - 1]], dtype='float32')
M = cv.getPerspectiveTransform(points, dst)
warped = cv.warpPerspective(img, M, (int(width), int(height)))
warped = resize(warped, width=500)
warped = cv.cvtColor(warped, cv.COLOR_BGR2GRAY)
cv.imshow('war', warped)
# warped = cv.GaussianBlur(warped, (3, 3), 10)
_, warped = cv.threshold(warped, 80, 255, cv.THRESH_BINARY_INV)
# warped = cv.morphologyEx(warped, 0, (5, 5))
warped = cv.ximgproc.thinning(warped)
return warped
def resize(img, height=None, width=None):
h, w = img.shape[:2]
if height is None and width is None:
return img
if width is None:
ratio = height / h
dim = (int(w * ratio), height)
else:
ratio = width / w
dim = (width, int(ratio * h))
resized = cv.resize(img, dim)
return resized
if __name__ == '__main__':
img = cv.imread('./imgs/img.png')
if img is None:
print("图片读取失败")
sys.exit()
img = resize(img, width=500)
# print(img.shape[0])
# 图像预处理cv 边缘检测
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
gray = cv.GaussianBlur(gray, (3, 3), 10)
cv.imshow('gr', gray)
# cont = cv.morphologyEx(gray,cv.MORPH_TOPHAT,(5,5))
# cv.imshow('top',cont)
cont = cv.Canny(gray, 40, 120)
cv.imshow('can', cont)
# cont = cv.morphologyEx(cont, 1, (5, 10))
# 闭运算
cont = cv.morphologyEx(cont, cv.MORPH_CLOSE, (30, 30))
cv.imshow('cont', cont)
# 拿到最外层轮廓
contours, _ = cv.findContours(cont, 0, 2)
contours = sorted(contours, key=cv.contourArea, reverse=True)[:3]
aim_cnt = contours[0]
for i in contours:
peri = cv.arcLength(i, True)
# 精度 越小越精准 越大越矩形 一般用长度的百分比来近似
temp_img = cv.approxPolyDP(i, 0.02 * peri, True)
# print(len(temp_img))
# print(temp_img)
# print(len(temp_img))
# print(temp_img)
if len(temp_img) == 4:
aim_cnt = temp_img
# print(aim_cnt)
break
else:
print('无符合的矩形')
# 在原图内画出轮廓
cv.drawContours(img, [aim_cnt], -1, (255, 0, 0), 3)
cv.imshow('img', img)
# print(aim_cnt)
tran_cont = trans_img(img, aim_cnt)
# 最后结果
cv.imshow('see1', tran_cont)
_ = cv.imwrite('./res_img.png',tran_cont)
cv.waitKey(0)
cv.destroyAllWindows()
灰度处理

边缘检测

闭运算后

轮廓拟合处理

透视变换

一些处理

第二部分 -- 文字识别
好吧 我失败了 tesseract的开源库的识别率太低了 就不展示了
恕我直言 识别了个xx 如果想要更高的识别率 可以自己训练模型


浙公网安备 33010602011771号