Swift 进行验证码识别:集成 Tesseract OCR
- 环境准备
1.1 安装 Tesseract OCR
在 macOS 上可以使用 Homebrew 进行安装:
bash
brew install tesseract
安装完成后,检查 Tesseract 是否安装成功:
bash
tesseract --version
1.2 创建 Swift 项目
如果是 macOS 应用,可以使用 Swift Package Manager (SPM),或者直接在 Xcode 项目中集成 OCR 识别功能。
创建一个新的 Swift 项目:
bash
mkdir SwiftCaptchaOCR
cd SwiftCaptchaOCR
swift package init --type executable
编辑 Package.swift,添加 Tesseract 相关库:
swift
// swift-tools-version:5.5
import PackageDescription
let package = Package(
name: "SwiftCaptchaOCR",
dependencies: [
.package(url: "https://github.com/gali8/Tesseract-OCR-iOS.git", from: "4.0.0")
],
targets: [
.executableTarget(
name: "SwiftCaptchaOCR",
dependencies: ["Tesseract-OCR-iOS"]
)
]
)
然后运行:
bash
swift build
2. 代码实现
在 Sources/SwiftCaptchaOCR/main.swift 中写入以下代码:
swift
import Foundation
import TesseractOCR
import Cocoa
func preprocessImage(inputPath: String, outputPath: String) {
guard let image = NSImage(contentsOfFile: inputPath) else {
print("无法加载图片")
return
}
// 转换为灰度图像
let grayscaleImage = convertToGrayscale(image: image)
// 二值化处理
let binaryImage = applyThreshold(image: grayscaleImage, threshold: 128)
// 保存处理后的图片
saveImage(image: binaryImage, outputPath: outputPath)
}
func convertToGrayscale(image: NSImage) -> NSImage {
let rep = NSBitmapImageRep(data: image.tiffRepresentation!)
let grayscaleRep = rep?.converting(to: .deviceGray, renderingIntent: .default)
let grayImage = NSImage(size: image.size)
grayImage.addRepresentation(grayscaleRep!)
return grayImage
}
func applyThreshold(image: NSImage, threshold: CGFloat) -> NSImage {
let rep = NSBitmapImageRep(data: image.tiffRepresentation!)!
let width = rep.pixelsWide
let height = rep.pixelsHigh
for x in 0..<width {
for y in 0..<height {
let color = rep.colorAt(x: x, y: y)!.whiteComponent
let newColor = color > threshold / 255.0 ? NSColor.white : NSColor.black
rep.setColor(newColor, atX: x, y: y)
}
}
let newImage = NSImage(size: image.size)
newImage.addRepresentation(rep)
return newImage
}
func saveImage(image: NSImage, outputPath: String) {
let rep = NSBitmapImageRep(data: image.tiffRepresentation!)
let pngData = rep?.representation(using: .png, properties: [:])
try? pngData?.write(to: URL(fileURLWithPath: outputPath))
}
func recognizeCaptcha(imagePath: String) -> String {
guard let tesseract = G8Tesseract(language: "eng") else {
return "Tesseract 初始化失败"
}
tesseract.image = NSImage(contentsOfFile: imagePath)
tesseract.recognize()
return tesseract.recognizedText ?? "识别失败"
}
let inputImagePath = "captcha.png" // 请替换为你的验证码图片路径
let processedImagePath = "processed_captcha.png"
// 预处理验证码图像
preprocessImage(inputPath: inputImagePath, outputPath: processedImagePath)
// OCR 识别
let result = recognizeCaptcha(imagePath: processedImagePath)
print("识别出的验证码: (result)")
3. 代码解析
3.1 图像预处理
为了提高 OCR 识别率,我们进行了以下优化:
转换为灰度图像:
swift
func convertToGrayscale(image: NSImage) -> NSImage {
let rep = NSBitmapImageRep(data: image.tiffRepresentation!)
let grayscaleRep = rep?.converting(to: .deviceGray, renderingIntent: .default)
let grayImage = NSImage(size: image.size)
grayImage.addRepresentation(grayscaleRep!)
return grayImage
}
二值化处理,增强字符对比度:
swift
func applyThreshold(image: NSImage, threshold: CGFloat) -> NSImage {
let rep = NSBitmapImageRep(data: image.tiffRepresentation!)!
for x in 0..<rep.pixelsWide {
for y in 0..<rep.pixelsHigh {
let color = rep.colorAt(x: x, y: y)!.whiteComponent
let newColor = color > threshold / 255.0 ? NSColor.white : NSColor.black
rep.setColor(newColor, atX: x, y: y)
}
}
let newImage = NSImage(size: image.size)
newImage.addRepresentation(rep)
return newImage
}
3.2 OCR 解析
初始化 Tesseract OCR:
swift
guard let tesseract = G8Tesseract(language: "eng") else {
return "Tesseract 初始化失败"
}
加载图像并执行 OCR:
swift
tesseract.image = NSImage(contentsOfFile: imagePath)
tesseract.recognize()
tesseract.recognizedText ?? "识别失败"
4. 运行程序
确保 captcha.png 在项目目录下,然后运行:
bash
swift run
示例输出:
makefile
识别出的验证码: X9F2G
5. 提高 OCR 识别率
5.1 设置 Tesseract PSM 模式
Tesseract 提供不同的页面分割模式(PSM),可以调整以优化验证码识别:
swift
更多内容访问ttocr.com或联系1436423940
tesseract.setVariableValue("6", forKey: "tessedit_pageseg_mode")
5.2 只识别特定字符
swift
tesseract.charWhitelist = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
5.3 进一步优化
如果验证码干扰较多,可以使用 Core Image 进行滤波、去噪等处理。
浙公网安备 33010602011771号