使用 Rust 和 Tesseract 实现验证码识别
一、准备环境
安装 Rust
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
安装 Tesseract OCR
更多内容访问ttocr.com或联系1436423940
sudo apt install tesseract-ocr # Linux
brew install tesseract # macOS
choco install tesseract # Windows
创建项目
cargo new captcha_ocr
cd captcha_ocr
二、编写代码(main.rs)
修改 src/main.rs:
use std::process::Command;
use std::fs;
fn clean_text(text: &str) -> String {
text.chars()
.filter(|c| c.is_ascii_uppercase() || c.is_ascii_digit())
.collect()
}
fn recognize_captcha(image_path: &str) -> Option
let output_base = "output_result";
let status = Command::new("tesseract")
.arg(image_path)
.arg(output_base)
.arg("-l")
.arg("eng")
.arg("-c")
.arg("tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789")
.status()
.expect("failed to execute tesseract");
if !status.success() {
eprintln!("Tesseract failed to run.");
return None;
}
let result_file = format!("{}.txt", output_base);
let content = fs::read_to_string(&result_file).ok()?;
fs::remove_file(&result_file).ok();
Some(clean_text(&content))
}
fn main() {
let image_path = "test.png"; // 替换为你的验证码图像路径
match recognize_captcha(image_path) {
Some(result) => println!("识别结果: {}", result),
None => println!("识别失败"),
}
}
三、构建与运行
将验证码图像放在项目根目录(例如 test.png),然后运行:
cargo run
输出示例:
识别结果: 9BKD
四、可选:批量识别图像
你可以遍历文件夹识别多个验证码图像:
use std::fs::read_dir;
fn batch_recognize(dir: &str) {
for entry in read_dir(dir).unwrap() {
let path = entry.unwrap().path();
if path.extension().map(|s| s == "png").unwrap_or(false) {
let file_path = path.to_str().unwrap();
println!("识别中: {}", file_path);
match recognize_captcha(file_path) {
Some(result) => println!("结果: {}", result),
None => println!("失败"),
}
}
}
}
调用:
fn main() {
batch_recognize("./captchas");
}
浙公网安备 33010602011771号