使用 Go 构建图像验证码识别命令行工具(基于 TensorFlow Lite)

本项目介绍如何使用 Go 实现一个命令行工具,从本地加载图像验证码并调用 TensorFlow Lite 模型进行识别。

一、准备工作
项目结构:

captcha-cli/
├── main.go
├── imageutil.go
├── tflite_predict.go
├── model/
│ └── captcha.tflite
├── go.mod
安装依赖:
更多内容访问ttocr.com或联系1436423940
go mod init captcha-cli
go get github.com/mattn/go-tflite
go get github.com/nfnt/resize
二、图像预处理(imageutil.go)

package main

import (
"image"
"image/jpeg"
"image/png"
"os"
"strings"

"github.com/nfnt/resize"

)

func LoadImage(filePath string, width, height int) ([]float32, error) {
file, err := os.Open(filePath)
if err != nil {
return nil, err
}
defer file.Close()

var img image.Image
if strings.HasSuffix(filePath, ".png") {
	img, err = png.Decode(file)
} else {
	img, err = jpeg.Decode(file)
}
if err != nil {
	return nil, err
}

resized := resize.Resize(uint(width), uint(height), img, resize.Bilinear)
data := make([]float32, width*height*3)

idx := 0
for y := 0; y < height; y++ {
	for x := 0; x < width; x++ {
		r, g, b, _ := resized.At(x, y).RGBA()
		data[idx] = float32(r>>8) / 255.0
		data[idx+1] = float32(g>>8) / 255.0
		data[idx+2] = float32(b>>8) / 255.0
		idx += 3
	}
}

return data, nil

}
三、模型推理部分(tflite_predict.go)

package main

import (
"fmt"

"github.com/mattn/go-tflite"

)

var letters = []rune("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ")

func Argmax(logits []float32) int {
maxIdx := 0
for i := 1; i < len(logits); i++ {
if logits[i] > logits[maxIdx] {
maxIdx = i
}
}
return maxIdx
}

func Predict(imagePath string) (string, error) {
model := tflite.NewModelFromFile("model/captcha.tflite")
if model == nil {
return "", fmt.Errorf("模型加载失败")
}
defer model.Delete()

options := tflite.NewInterpreterOptions()
interpreter := tflite.NewInterpreter(model, options)
defer interpreter.Delete()

if interpreter.AllocateTensors() != nil {
	return "", fmt.Errorf("张量分配失败")
}

input, err := LoadImage(imagePath, 160, 60)
if err != nil {
	return "", err
}

interpreter.GetInputTensor(0).CopyFromBuffer(input)
if err := interpreter.Invoke(); err != nil {
	return "", err
}

result := ""
for i := 0; i < 4; i++ {
	output := interpreter.GetOutputTensor(i)
	buf := make([]float32, len(letters))
	output.CopyToBuffer(&buf[0])
	result += string(letters[Argmax(buf)])
}
return result, nil

}
四、主函数入口(main.go)

package main

import (
"fmt"
"os"
)

func main() {
if len(os.Args) < 2 {
fmt.Println("用法:captcha-cli path/to/image.png")
return
}

imagePath := os.Args[1]
result, err := Predict(imagePath)
if err != nil {
	fmt.Println("识别出错:", err)
	return
}

fmt.Println("识别结果:", result)

}
五、使用方式
编译:

go build -o captcha-cli
运行:

./captcha-cli test_images/7YQK.png
输出示例:

识别结果:7YQK

posted @ 2025-06-01 12:04  ttocr、com  阅读(13)  评论(0)    收藏  举报