使用 Nim 和 Tesseract 实现验证码识别
一、环境准备
安装 Nim
参考官网:https://nim-lang.org/install.html
macOS / Linux 可使用 choosenim:
curl https://nim-lang.org/choosenim/init.sh -sSf | sh
安装 Tesseract OCR
macOS
brew install tesseract
更多内容访问ttocr.com或联系1436423940
Ubuntu
sudo apt install tesseract-ocr
二、创建 Nim 项目
mkdir captcha_nim
cd captcha_nim
创建文件 captcha.nim:
import osproc, strutils, os
调用 tesseract 并提取识别结果
proc recognizeCaptcha(imagePath: string): string =
let outputBase = "nim_output"
let whitelist = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
let cmd = "tesseract " & imagePath & " " & outputBase &
" -l eng -c tessedit_char_whitelist=" & whitelist
discard execShellCmd(cmd)
let outputFile = outputBase & ".txt"
if fileExists(outputFile):
let content = readFile(outputFile)
removeFile(outputFile)
result = content.filterIt(it in {'A'..'Z', '0'..'9'}).strip()
else:
result = "识别失败"
主函数
when isMainModule:
let imagePath = "captcha1.png" # 替换为你自己的图片路径
let result = recognizeCaptcha(imagePath)
echo "识别结果: ", result
三、运行程序
nim compile --run captcha.nim
输出示例:
识别结果: 9XHA
四、扩展:批量识别图片
修改 when isMainModule 部分:
when isMainModule:
let folder = "captchas"
for file in walkDir(folder):
if file.endsWith(".png"):
let result = recognizeCaptcha(file)
echo file.extractFilename(), " -> ", result
五、输出结果为 CSV
进一步添加写入 CSV 功能:
import sequtils
when isMainModule:
let folder = "captchas"
var results: seq[string] = @["filename,text"]
for file in walkDir(folder):
if file.endsWith(".png"):
let res = recognizeCaptcha(file)
results.add(file.extractFilename() & "," & res)
writeFile("results.csv", results.join("\n"))
echo "结果已保存到 results.csv"
浙公网安备 33010602011771号