OCR之find_text_dongle.hdev
* This example demonstrates how to use the operator
* find_text to segment text in an image before
* performing OCR.
* 关闭更新
dev_update_off ()
dev_close_window ()
*读取图像
read_image (Image, 'ocr/dongle_01')
*开启窗体
dev_open_window_fit_image (Image, 0, 0, -1, -1, WindowHandle)
*设置显示字体
set_display_font (WindowHandle, 14, 'mono', 'true', 'false')
* 读取训练好的OCR文件
* Read the OCR classifier from file
read_ocr_class_mlp ('DotPrint_NoRej', OCRHandle)
* 创建一个文字读取器
* Create the text model and specify the text properties
create_text_model_reader ('manual', [], TextModel)
* 设置一些参数,字符宽高,是否是点印,文字行数,是否识别返回标点,是否返回分隔符等
set_text_model_param (TextModel, 'manual_char_width', 24)
set_text_model_param (TextModel, 'manual_char_height', 33)
set_text_model_param (TextModel, 'manual_is_dotprint', 'true')
set_text_model_param (TextModel, 'manual_max_line_num', 2)
set_text_model_param (TextModel, 'manual_return_punctuation', 'false')
set_text_model_param (TextModel, 'manual_return_separators', 'false')
set_text_model_param (TextModel, 'manual_stroke_width', 4)
set_text_model_param (TextModel, 'manual_eliminate_horizontal_lines', 'true')
* *定义一行文字的机构情况:第一行定义为"6 1 8"表示有两个分隔符吧这段文字分割成3部分
*因为"/"有时候被定义为字符,有时候被定义为分隔符,所以第二段定义为两种方式。”8 10“有一个分隔符和“19“ 没有分隔符
*Define different text line structures for each line.
* Note that for the second line two structures are defined,
* because sometimes the '/' is classified as separator and
* sometimes as character.
set_text_model_param (TextModel, 'manual_text_line_structure_0', '6 1 8')
set_text_model_param (TextModel, 'manual_text_line_structure_1', '8 10')
set_text_model_param (TextModel, 'manual_text_line_structure_2', '19')
*定义两段文字的构成方式
* Define the regular expression, which is later used
* in do_ocr_word_mlp to increase the robustness of the OCR.
TextPattern1 := '(FLEXID[0-9][A-Z][0-9]{3}[A-F0-9]{4})'
TextPattern2 := '([A-Z]{3}[0-9]{5}.?[A-Z][0-9]{4}[A-Z][0-9]{4})'
Expression := TextPattern1 + '|' + TextPattern2
*
* Main loop
*主循环
NumImages := 8
for I := 1 to NumImages by 1
*读取图像
read_image (Image, 'ocr/dongle_' + I$'02')
* 处理图像
* Preprocessing:
*
* The domain is reduced to the dark area where the
* text is assumed to be found.
* Then, the contrast is improved and the image
* is aligned horizontally
*二值化阈值分割
binary_threshold (Image, Region, 'max_separability', 'dark', UsedThreshold)
*矩形掩码开运算,抑制噪声
opening_rectangle1 (Region, RegionOpening, 400, 50)
*矩形掩码腐蚀运算
erosion_rectangle1 (RegionOpening, RegionOpening, 11, 11)
*联通区域
connection (RegionOpening, ConnectedRegions)
*选择目标区域
select_shape_std (ConnectedRegions, SelectedRegion, 'max_area', 70)
*裁剪区域
reduce_domain (Image, SelectedRegion, ImageReduced)
*增强对比度
scale_image_max (ImageReduced, ImageScaleMax)
*求目标区域中文字的与水平轴的角度
text_line_orientation (SelectedRegion, ImageScaleMax, 30, rad(-30), rad(30), OrientationAngle)
*把图转到文字水平位置
rotate_image (ImageScaleMax, ImageRotate, deg(-OrientationAngle), 'constant')
* 识别文字,显示每一个目标区域识别结果
* Find text and display results for every segmented region
find_text (ImageRotate, TextModel, TextResult)
*获取文字行数
get_text_result (TextResult, 'manual_num_lines', NumLines)
*显示转正图像
dev_display (ImageRotate)
*线一行一行的去识别每一个字符
for J := 0 to NumLines - 1 by 1
*获取第一行内容
get_text_object (Line, TextResult, ['manual_line',J])
* The OCR uses regular expressions to read the text
* more robustly.
*这个算子的意思是,识别有关联的一组文字,用do_ocr_multi_class_mlp 也是完全可以的
*do_ocr_multi_class_mlp (Line, ImageRotate, OCRHandle, Class1, Confidence1)
do_ocr_word_mlp (Line, ImageRotate, OCRHandle, Expression, 3, 5, Class, Confidence, Word, Score)
* 显示结果
* Display results
*求一行的最小外接矩形
smallest_rectangle1 (Line, Row1, Column1, Row2, Column2)
*获取识别到的文字数量
count_obj (Line, NumberOfCharacters)
*设置显示颜色
dev_set_colored (6)
*显示一行
dev_display (Line)
*设置显示颜色为白色
dev_set_color ('white')
*在OCR区域下方,显示对应的字符
for K := 1 to NumberOfCharacters by 1
select_obj (Line, Character, K)
set_tposition (WindowHandle, Row2[0] + 4, Column1[K - 1])
write_string (WindowHandle, Word{K - 1})
endfor
endfor
*如果图片没识别完,就在右下角继续显示'Press F5 to continue'
if (I < NumImages)
disp_continue_message (WindowHandle, 'black', 'true')
stop ()
endif
*销毁句柄释放内存
* Clean up memory
clear_text_result (TextResult)
endfor
* Clean up memory
*清除Text_model句柄
clear_text_model (TextModel)
*清除OCR句柄
clear_ocr_class_mlp (OCRHandle)

浙公网安备 33010602011771号