使用 Julia 和 Tesseract 实现图像验证码识别
一、环境准备
安装 Julia
从官网下载安装:https://julialang.org/downloads
更多内容访问ttocr.com或联系1436423940
安装 Tesseract OCR
macOS
brew install tesseract
Ubuntu
sudo apt install tesseract-ocr
安装 Julia 包
打开 Julia REPL,输入:
using Pkg
Pkg.add(["Images", "ImageIO", "FileIO"])
二、识别验证码图像
创建文件 captcha_ocr.jl:
using FileIO
using Images
using Dates
调用 Tesseract 命令行识别验证码
function recognize_captcha(image_path::String)
output_base = "julia_output"
whitelist = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
cmd = tesseract $image_path $output_base -l eng -c tessedit_char_whitelist=$whitelist
run(cmd)
txt_path = output_base * ".txt"
if isfile(txt_path)
content = read(txt_path, String)
rm(txt_path) # 删除临时文件
return strip(filter(c -> isdigit(c) || isuppercase(c), content))
else
return "识别失败"
end
end
主函数
function main()
image_path = "captcha1.png" # 替换为你的验证码图片路径
result = recognize_captcha(image_path)
println("识别结果: $result")
end
main()
三、运行代码
julia captcha_ocr.jl
输出示例:
识别结果: 7XF9
四、批量处理验证码
扩展脚本支持文件夹批量识别:
using Glob
function batch_recognize(folder::String)
images = glob("*.png", folder)
for img in images
result = recognize_captcha(img)
println("$(basename(img)) -> $result")
end
end
batch_recognize("captchas")
using CSV
function save_results_to_csv(folder::String, output_csv::String)
images = glob("*.png", folder)
results = [(basename(img), recognize_captcha(img)) for img in images]
CSV.write(output_csv, ["filename" => [x[1] for x in results], "text" => [x[2] for x in results]])
end
save_results_to_csv("captchas", "results.csv")
浙公网安备 33010602011771号