OpenCV---xx2(OCR文本处理识别)

OCR文字扫描识别

流程

1. 图片处理 透视变换
2. 应用tesseract开源库识别

代码详细--1 图片处理部分

import cv2 as cv
import sys
import numpy as np


def trans_img(img, point):
	# 轮廓坐标
	sort_x = sorted(point, key=lambda x: x[0][0])
	# print(sort_x)
	left_point = sort_x[:2]
	right_point = sort_x[2:]
	# 左上到左下
	left_point = sorted(left_point, key=lambda x: x[0][1])
	# 右上到右下
	right_point = sorted(right_point, key=lambda x: x[0][1])

	points = []
	points.append([left_point[0][0]])
	points.append([left_point[1][0]])
	points.append([right_point[0][0]])
	points.append([right_point[1][0]])

	points = np.array(points, dtype='float32')
	# print(right_point)
	# 得到了轮廓的长宽
	w1 = np.sqrt((left_point[0][0][0] - right_point[0][0][0]) ** 2 + (left_point[0][0][1] - right_point[0][0][1]) ** 2)
	w2 = np.sqrt((left_point[1][0][0] - right_point[1][0][0]) ** 2 + (left_point[1][0][1] - right_point[1][0][1]) ** 2)
	width = max(w1, w2)

	h1 = np.sqrt((left_point[0][0][0] - left_point[1][0][0]) ** 2 + (left_point[0][0][1] - left_point[1][0][1]) ** 2)
	h2 = np.sqrt(
		(right_point[0][0][0] - right_point[1][0][0]) ** 2 + (right_point[0][0][1] - right_point[1][0][1]) ** 2)
	height = max(h1, h2)

	# 变换后对应的坐标 将原图轮廓的坐标 通过投影拉直 变为规则的图像 img是原坐标存在的图像 长宽是原图的长宽 轮廓的四角通过M映射拉到dst
	dst = np.array([[0, 0], [0, height - 1], [width - 1, 0], [width - 1, height - 1]], dtype='float32')
	M = cv.getPerspectiveTransform(points, dst)

	warped = cv.warpPerspective(img, M, (int(width), int(height)))
	warped = resize(warped, width=500)
	warped = cv.cvtColor(warped, cv.COLOR_BGR2GRAY)
	cv.imshow('war', warped)
	# warped = cv.GaussianBlur(warped, (3, 3), 10)

	_, warped = cv.threshold(warped, 80, 255, cv.THRESH_BINARY_INV)
	# warped = cv.morphologyEx(warped, 0, (5, 5))
	warped = cv.ximgproc.thinning(warped)
	return warped


def resize(img, height=None, width=None):
	h, w = img.shape[:2]
	if height is None and width is None:
		return img
	if width is None:
		ratio = height / h
		dim = (int(w * ratio), height)
	else:
		ratio = width / w
		dim = (width, int(ratio * h))

	resized = cv.resize(img, dim)
	return resized


if __name__ == '__main__':
	img = cv.imread('./imgs/img.png')

	if img is None:
		print("图片读取失败")
		sys.exit()

	img = resize(img, width=500)

	# print(img.shape[0])
	# 图像预处理cv 边缘检测

	gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
	gray = cv.GaussianBlur(gray, (3, 3), 10)
	cv.imshow('gr', gray)
	# cont = cv.morphologyEx(gray,cv.MORPH_TOPHAT,(5,5))
	# cv.imshow('top',cont)
	cont = cv.Canny(gray, 40, 120)
	cv.imshow('can', cont)
	# cont = cv.morphologyEx(cont, 1, (5, 10))
	# 闭运算
	cont = cv.morphologyEx(cont, cv.MORPH_CLOSE, (30, 30))
	cv.imshow('cont', cont)

	# 拿到最外层轮廓
	contours, _ = cv.findContours(cont, 0, 2)

	contours = sorted(contours, key=cv.contourArea, reverse=True)[:3]

	aim_cnt = contours[0]

	for i in contours:
		peri = cv.arcLength(i, True)
		# 精度 越小越精准 越大越矩形 一般用长度的百分比来近似
		temp_img = cv.approxPolyDP(i, 0.02 * peri, True)
		# print(len(temp_img))
		# print(temp_img)
		# print(len(temp_img))
		# print(temp_img)
		if len(temp_img) == 4:
			aim_cnt = temp_img
			# print(aim_cnt)
			break
	else:
		print('无符合的矩形')

	# 在原图内画出轮廓
	cv.drawContours(img, [aim_cnt], -1, (255, 0, 0), 3)
	cv.imshow('img', img)
	# print(aim_cnt)
	tran_cont = trans_img(img, aim_cnt)

	# 最后结果
	cv.imshow('see1', tran_cont)
	_ = cv.imwrite('./res_img.png',tran_cont)

	cv.waitKey(0)
	cv.destroyAllWindows()

灰度处理
image

边缘检测
image

闭运算后
image

轮廓拟合处理
image

透视变换
image

一些处理
image

第二部分 -- 文字识别

好吧 我失败了 tesseract的开源库的识别率太低了 就不展示了

恕我直言 识别了个xx 如果想要更高的识别率 可以自己训练模型
image

posted @ 2022-03-29 19:17  cc学习之路  阅读(330)  评论(0)    收藏  举报