使用 Nim 和 Tesseract 实现验证码识别

一、环境准备
安装 Nim
参考官网:https://nim-lang.org/install.html

macOS / Linux 可使用 choosenim:

curl https://nim-lang.org/choosenim/init.sh -sSf | sh
安装 Tesseract OCR

macOS

brew install tesseract
更多内容访问ttocr.com或联系1436423940

Ubuntu

sudo apt install tesseract-ocr
二、创建 Nim 项目

mkdir captcha_nim
cd captcha_nim
创建文件 captcha.nim:

import osproc, strutils, os

调用 tesseract 并提取识别结果

proc recognizeCaptcha(imagePath: string): string =
let outputBase = "nim_output"
let whitelist = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
let cmd = "tesseract " & imagePath & " " & outputBase &
" -l eng -c tessedit_char_whitelist=" & whitelist
discard execShellCmd(cmd)

let outputFile = outputBase & ".txt"
if fileExists(outputFile):
let content = readFile(outputFile)
removeFile(outputFile)
result = content.filterIt(it in {'A'..'Z', '0'..'9'}).strip()
else:
result = "识别失败"

主函数

when isMainModule:
let imagePath = "captcha1.png" # 替换为你自己的图片路径
let result = recognizeCaptcha(imagePath)
echo "识别结果: ", result
三、运行程序

nim compile --run captcha.nim
输出示例:

识别结果: 9XHA
四、扩展:批量识别图片
修改 when isMainModule 部分:

when isMainModule:
let folder = "captchas"
for file in walkDir(folder):
if file.endsWith(".png"):
let result = recognizeCaptcha(file)
echo file.extractFilename(), " -> ", result
五、输出结果为 CSV
进一步添加写入 CSV 功能:

import sequtils

when isMainModule:
let folder = "captchas"
var results: seq[string] = @["filename,text"]
for file in walkDir(folder):
if file.endsWith(".png"):
let res = recognizeCaptcha(file)
results.add(file.extractFilename() & "," & res)
writeFile("results.csv", results.join("\n"))
echo "结果已保存到 results.csv"

posted @ 2025-07-01 12:58  ttocr、com  阅读(14)  评论(0)    收藏  举报