Go语言汉字验证码识别实战:从零实现
本文将手把手教你用Go语言实现一个完整的汉字验证码识别系统,包含以下核心部分:
一、项目初始化
bash
mkdir captcha-recognizer
cd captcha-recognizer
go mod init github.com/yourname/captcha-recognizer
二、核心代码实现
2.1 图像预处理(preprocess.go)
go
package main
更多内容访问ttocr.com或联系1436423940
import (
"image"
"image/color"
)
// 灰度化处理
func grayscale(img image.Image) *image.Gray {
bounds := img.Bounds()
grayImg := image.NewGray(bounds)
for y := bounds.Min.Y; y < bounds.Max.Y; y++ {
for x := bounds.Min.X; x < bounds.Max.X; x++ {
r, g, b, _ := img.At(x, y).RGBA()
grayValue := uint8((r*299 + g*587 + b*114) / 1000 >> 8)
grayImg.Set(x, y, color.Gray{Y: grayValue})
}
}
return grayImg
}
// 二值化处理
func binarize(gray *image.Gray, threshold uint8) *image.Gray {
bounds := gray.Bounds()
binImg := image.NewGray(bounds)
for y := bounds.Min.Y; y < bounds.Max.Y; y++ {
for x := bounds.Min.X; x < bounds.Max.X; x++ {
if gray.GrayAt(x, y).Y < threshold {
binImg.SetGray(x, y, color.Gray{Y: 0})
} else {
binImg.SetGray(x, y, color.Gray{Y: 255})
}
}
}
return binImg
}
2.2 字符分割(segment.go)
go
package main
import "image"
// 基于垂直投影的字符分割
func segmentCharacters(binary image.Gray) []image.Gray {
// 计算垂直投影
projection := make([]int, binary.Bounds().Dx())
for x := 0; x < binary.Bounds().Dx(); x++ {
for y := 0; y < binary.Bounds().Dy(); y++ {
if binary.GrayAt(x, y).Y == 0 {
projection[x]++
}
}
}
// 寻找分割点
var segments []int
inChar := false
for i, v := range projection {
if v > 0 {
if !inChar {
inChar = true
segments = append(segments, i)
}
} else {
if inChar {
inChar = false
segments = append(segments, i-1)
}
}
}
// 提取字符
var chars []*image.Gray
for i := 0; i < len(segments); i += 2 {
start := segments[i]
end := segments[i+1]
if end-start < 5 { // 过滤小噪点
continue
}
rect := image.Rect(start, 0, end, binary.Bounds().Dy())
char := image.NewGray(rect)
for y := 0; y < rect.Dy(); y++ {
for x := start; x < end; x++ {
char.SetGray(x-start, y, binary.GrayAt(x, y))
}
}
chars = append(chars, char)
}
return chars
}
2.3 特征提取(feature.go)
go
package main
import "image"
// 提取8x8网格特征
func extractFeatures(char *image.Gray) []float64 {
features := make([]float64, 64) // 8x8网格
cellWidth := char.Bounds().Dx() / 8
cellHeight := char.Bounds().Dy() / 8
for i := 0; i < 64; i++ {
gridX := i % 8
gridY := i / 8
// 计算每个网格的黑色像素占比
blackPixels := 0
for y := gridY * cellHeight; y < (gridY+1)*cellHeight; y++ {
for x := gridX * cellWidth; x < (gridX+1)*cellWidth; x++ {
if char.GrayAt(x, y).Y == 0 {
blackPixels++
}
}
}
features[i] = float64(blackPixels) / float64(cellWidth*cellHeight)
}
return features
}
2.4 分类识别(classify.go)
go
package main
// 简单分类器
type Classifier struct {
weights map[string][]float64
biases map[string]float64
}
func NewClassifier() *Classifier {
return &Classifier{
weights: make(map[string][]float64),
biases: make(map[string]float64),
}
}
// 加载预训练模型
func (c *Classifier) LoadModel(weights map[string][]float64, biases map[string]float64) {
c.weights = weights
c.biases = biases
}
// 预测字符
func (c *Classifier) Predict(features []float64) string {
var (
bestClass string
maxScore float64 = -1
)
for class, weights := range c.weights {
score := c.biases[class]
for i := 0; i < len(features); i++ {
score += weights[i] * features[i]
}
if score > maxScore {
maxScore = score
bestClass = class
}
}
return bestClass
}
三、完整使用示例
3.1 主程序(main.go)
go
package main
import (
"fmt"
"image/jpeg"
"log"
"os"
)
func main() {
// 1. 加载验证码图片
file, err := os.Open("captcha.jpg")
if err != nil {
log.Fatal(err)
}
defer file.Close()
img, err := jpeg.Decode(file)
if err != nil {
log.Fatal(err)
}
// 2. 图像预处理
gray := grayscale(img)
binary := binarize(gray, 150) // 调整阈值
// 3. 字符分割
chars := segmentCharacters(binary)
if len(chars) == 0 {
log.Fatal("未检测到字符")
}
// 4. 初始化分类器
classifier := NewClassifier()
// 加载预训练模型 (示例数据)
weights := map[string][]float64{
"北": make([]float64, 64),
"京": make([]float64, 64),
// 添加更多字符...
}
biases := map[string]float64{
"北": 0.1,
"京": -0.2,
}
classifier.LoadModel(weights, biases)
// 5. 识别每个字符
var result string
for _, char := range chars {
features := extractFeatures(char)
predicted := classifier.Predict(features)
result += predicted
}
fmt.Printf("识别结果: %s\n", result)
}
四、模型训练建议
4.1 使用Python训练模型
python
train.py
from sklearn import svm
import numpy as np
import joblib
准备训练数据
X: 特征矩阵 (n_samples, n_features)
y: 标签列表 (n_samples,)
X, y = load_training_data()
训练SVM分类器
model = svm.SVC(kernel='linear')
model.fit(X, y)
保存模型权重
weights = {class: model.coef_[i] for i, class in enumerate(model.classes_)}
biases = {class: model.intercept_[i] for i, class in enumerate(model.classes_)}
np.savez('model.npz', weights=weights, biases=biases)
五、性能优化技巧
5.1 并行处理字符
go
func parallelRecognize(chars []*image.Gray, classifier *Classifier) string {
var (
wg sync.WaitGroup
results = make([]string, len(chars))
)
for i, char := range chars {
wg.Add(1)
go func(idx int, c *image.Gray) {
defer wg.Done()
features := extractFeatures(c)
results[idx] = classifier.Predict(features)
}(i, char)
}
wg.Wait()
return strings.Join(results, "")
}
5.2 内存池优化
go
var grayPool = sync.Pool{
New: func() interface{} {
return image.NewGray(image.Rect(0, 0, 200, 80))
},
}
func getGrayImage(width, height int) image.Gray {
img := grayPool.Get().(image.Gray)
img.Rect = image.Rect(0, 0, width, height)
return img
}
func putGrayImage(img *image.Gray) {
grayPool.Put(img)
}
浙公网安备 33010602011771号