java 图片识别文字(中英文混合)
调用 tess4j 库来识别图片文字
依赖的maven库
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.26</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
<version>1.7.26</version>
</dependency>
<!-- https://mvnrepository.com/artifact/net.sourceforge.tess4j/tess4j -->
<dependency>
<groupId>net.sourceforge.tess4j</groupId>
<artifactId>tess4j</artifactId>
<version>5.1.1</version>
</dependency>
图片识别文字
package com;
import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.Tesseract1;
import net.sourceforge.tess4j.TesseractException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
public class TestOCR {
private static final Logger logger = LoggerFactory.getLogger(TestOCR.class);
public static void main(String[] args) {
String result = doOCR("字库位置", "要识别的图片地址");
System.out.println(result);
}
private static String doOCR(String dataPath, String imgPath) {
File imageFile = new File(imgPath);
ITesseract instance = new Tesseract1();
//字库位置
instance.setDatapath(dataPath);
//eng+chi_sim代表中英文混合
instance.setLanguage("eng+chi_sim");//eng :英文 chi_sim :简体中文
try {
return instance.doOCR(imageFile);
} catch (TesseractException e) {
logger.error("", e);
}
return "";
}
}
字库下载
下载中文包:https://github.com/tesseract-ocr/tessdata 选择chi_sim.traineddata文件进行下载,英文包在tess4j jar包中可以获取。

浙公网安备 33010602011771号