public static OcrLicenseVo parseOcrLicense(OcrResultVo item) {
int[][][] dtPolyList = new int[9][2][2];
for (int i = 0; i < item.getRecTexts().size(); i++) {
String value = StringUtils.trimToEmpty(item.getRecTexts().get(i));
if(StringUtils.anyContains(BREAK_NAME, value)){
break;
}
if(StringUtils.isBlank(value) && StringUtils.anyContains(CONTINUE_NAME, value)){
continue;
}
short idx;
if(StringUtils.startsWith(value, "称")){
idx = orgNameIndex;
int[][] dtPoly = null;
if(StringUtils.length(value) > 1){
item.getRecTexts().set(i, StringUtils.substring(value, 1));
dtPoly = item.getDtPolys().get(i-1);
} else {
dtPoly = item.getDtPolys().get(i);
}
dtPolyList[idx] = dtPoly;
}else if(StringUtils.startsWith(value, "注册资本")){
idx = capitalIndex;
if(StringUtils.length(value) > 4){
item.getRecTexts().set(i, StringUtils.substring(value, 4));
}
int[][] dtPoly = item.getDtPolys().get(i);
dtPolyList[idx] = dtPoly;
}else if(StringUtils.startsWith(value, "成立日期")){
idx = establishDateIndex;
int[][] dtPoly = item.getDtPolys().get(i);
if(StringUtils.length(value) > 4){
item.getRecTexts().set(i, StringUtils.substring(value, 4));
}
dtPolyList[idx] = dtPoly;
} else if(StringUtils.startsWith(value, "营业期限")){
idx = termIndex;
int[][] dtPoly = item.getDtPolys().get(i);
if(StringUtils.length(value) > 4){
item.getRecTexts().set(i, StringUtils.substring(value, 4));
}
dtPolyList[idx] = dtPoly;
} else if(StringUtils.startsWith(value, "法定代表人")){
idx = legalPersonIndex;
int[][] dtPoly = item.getDtPolys().get(i);
if(StringUtils.length(value) > 5){
item.getRecTexts().set(i, StringUtils.substring(value, 5));
}
dtPolyList[idx] = dtPoly;
} else if(StringUtils.startsWith(value, "定代表人")){
idx = legalPersonIndex;
int[][] dtPoly = item.getDtPolys().get(i);
if(StringUtils.length(value) > 4){
item.getRecTexts().set(i, StringUtils.substring(value, 4));
}
dtPolyList[idx] = dtPoly;
} else if(StringUtils.startsWith(value, "经营范围")){
idx = scopeIndex;
int[][] dtPoly = item.getDtPolys().get(i);
if(StringUtils.length(value) > 4){
item.getRecTexts().set(i, StringUtils.substring(value, 4));
}
dtPolyList[idx] = dtPoly;
} else if(StringUtils.startsWith(value, "所")){
idx = addressIndex;
int[][] dtPoly = null;
if(StringUtils.length(value) > 1){
item.getRecTexts().set(i, StringUtils.substring(value, 1));
dtPoly = item.getDtPolys().get(i-1);
} else {
dtPoly = item.getDtPolys().get(i);
}
dtPolyList[idx] = dtPoly;
}else if(StringUtils.startsWith(value, "型")){
idx = orgTypeIndex;
int[][] dtPoly = null;
if(StringUtils.length(value) > 1){
item.getRecTexts().set(i, StringUtils.substring(value, 1));
dtPoly = item.getDtPolys().get(i-1);
} else {
dtPoly = item.getDtPolys().get(i);
}
dtPolyList[idx] = dtPoly;
} else {
idx = getValueIndex(value);
if(idx >= 0){
int[][] dtPoly = item.getDtPolys().get(i);
dtPolyList[idx] = dtPoly;
}
}
}
String[] licenseValues = new String[9];
for (int i = 0; i < item.getRecTexts().size(); i++) {
String value = StringUtils.trimToEmpty(item.getRecTexts().get(i));
if(StringUtils.anyContains(BREAK_NAME, value)){
break;
}
if(StringUtils.isBlank(value) && StringUtils.anyContains(CONTINUE_NAME, value)){
continue;
}
if (LICENSE_PATTERN_SIMPLE.matcher(value).matches()) {
licenseValues[creditCodeIndex] = value;
log.info("scan value {} {} {}", i, 0, value);
} else {
int[][] currentPoly = item.getDtPolys().get(i);
for (int index = 0; index < dtPolyList.length; index++) {
int[][] dtPoly = dtPolyList[index];
int hdiff = Math.abs(currentPoly[0][0] - dtPoly[0][0]);
int vdiff = Math.abs(currentPoly[3][1] - dtPoly[3][1]);
if(hdiff == 0 && vdiff == 0){
log.info("scan name {} {} {}", i, index, value);
break;
}
//看2.png 类和名(称}第0个坐标的横坐标太近,不是名称
//看3.png 械和 住所第0个坐标的横坐标太远,不是地址
if(Objects.nonNull(dtPoly) && hdiff <50 && vdiff <80){
//index=0是营业执照,通过正则获取
if(index == 0){
continue;
}
licenseValues[index] = StringUtils.trimToEmpty(licenseValues[index]) + value;
} else {
log.info("skip {} {} {}", i, index, value);
}
}
}
}
OcrLicenseVo ocrLicenseVo = new OcrLicenseVo();
ocrLicenseVo.setCreditCode(licenseValues[creditCodeIndex]);
ocrLicenseVo.setOrgName(licenseValues[orgNameIndex]);
ocrLicenseVo.setCapital(licenseValues[capitalIndex]);
ocrLicenseVo.setOrgType(licenseValues[orgTypeIndex]);
ocrLicenseVo.setEstablishDate(licenseValues[establishDateIndex]);
ocrLicenseVo.setLegalPerson(licenseValues[legalPersonIndex]);
ocrLicenseVo.setAddress(licenseValues[addressIndex]);
ocrLicenseVo.setScope(licenseValues[scopeIndex]);
ocrLicenseVo.setTerm(licenseValues[termIndex]);
return ocrLicenseVo;
}
package com.nvxclouds.baize.breeze.system.service.impl;
import cn.hutool.core.util.ObjectUtil;
import com.fasterxml.jackson.core.type.TypeReference;
import com.nvxclouds.baize.breeze.common.core.domain.R;
import com.nvxclouds.baize.breeze.common.core.exception.ServiceException;
import com.nvxclouds.baize.breeze.common.core.utils.StringUtils;
import com.nvxclouds.baize.breeze.common.json.utils.JsonObject;
import com.nvxclouds.baize.breeze.common.json.utils.JsonUtils;
import com.nvxclouds.baize.breeze.common.oss.core.OssClient;
import com.nvxclouds.baize.breeze.common.oss.factory.OssFactory;
import com.nvxclouds.baize.breeze.common.web.utils.ResourceUtil;
import com.nvxclouds.baize.breeze.system.domain.vo.OcrLicenseVo;
import com.nvxclouds.baize.breeze.system.domain.vo.OcrResultVo;
import com.nvxclouds.baize.breeze.system.domain.vo.SysOssVo;
import com.nvxclouds.baize.breeze.system.service.IFantasiaService;
import com.nvxclouds.baize.breeze.system.service.ISysOssService;
import jakarta.validation.constraints.NotNull;
import lombok.extern.slf4j.Slf4j;
import okhttp3.*;
import okhttp3.Request.Builder;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import java.io.IOException;
import java.nio.file.Path;
import java.util.List;
import java.util.Objects;
import java.util.concurrent.TimeUnit;
import java.util.regex.Pattern;
import static com.nvxclouds.baize.breeze.system.constants.OrcLicenseConstant.*;
/**
* Copyright (c) 2025 NVXClouds.Co.Ltd. All rights reserved.
*
* @author muzhi
* @version 3.0.0
* @date 2025-09-23 23:15
*/
@Slf4j
@Service
public class FantasiaServiceImpl implements IFantasiaService {
private static final MediaType mediaType = MediaType.parse("application/json; charset=utf-8");
private static final OkHttpClient CLIENT = new OkHttpClient.Builder()
.connectTimeout(10, TimeUnit.SECONDS).writeTimeout(30, TimeUnit.SECONDS).readTimeout(30, TimeUnit.SECONDS)
.build();
@Value("${app.fantasia-ocr-mock-enabled}")
private boolean fantasiaOcrMockEnabled;
@Value("${app.fantasia-ocr-url}")
private String fantasiaOcrUrl;
@Value("${app.fantasia-ocr-input-path}")
private String fantasiaOcrInputPath;
@Value("${app.fantasia-ocr-output-path}")
private String fantasiaOcrOutputPath;
@Autowired
private ISysOssService sysOssService;
/**
* 统一社会信用代码正则 + 校验位验证
* 18 位:登记管理部门码 1 位 + 机构类别码 1 位 + 登记管理机关行政区划码 6 位
* + 主体标识码 9 位(组织机构代码)+ 校验码 1 位
*/
public static final Pattern LICENSE_PATTERN_STRICT = Pattern.compile("^[1-9A-HJ-NP-Z]{2}\\d{6}[0-9A-HJ-NP-Z]{9}[0-9A-HJ-NP-TV-Y]$");
// 18 位,第 1 位是 1-9 的数字或大写字母,后 17 位只能是数字或大写字母
public static final Pattern LICENSE_PATTERN_SIMPLE = Pattern.compile("^[1-9A-HJ-NP-Z]{2}[0-9A-HJ-NP-Z]{16}$");
@Override
public OcrLicenseVo ocrLicense(@NotNull Long ossId) throws IOException {
if(fantasiaOcrMockEnabled){
String responseBody = ResourceUtil.getResourceAsString("/mock_ocr_business_license.json");
OcrResultVo item = JsonUtils.parseObject(responseBody, new TypeReference<OcrResultVo>() {
});
if(Objects.nonNull(item)){
if(Objects.nonNull(item.getRecTexts()) && !item.getRecTexts().isEmpty()){
return parseOcrLicense(item);
}
}
}
Path destPath = download(ossId, fantasiaOcrInputPath);
JsonObject requestData = new JsonObject().put("file_path", destPath.toString());
RequestBody body = RequestBody.create(JsonUtils.toJsonString(requestData), mediaType);
Request request = new Builder().url(fantasiaOcrUrl).post(body).build();
try (Response response = CLIENT.newCall(request).execute()) {
if (response.isSuccessful()) {
String responseBody = response.body().string();
R<List<OcrResultVo>> result = JsonUtils.parseObject(responseBody, new TypeReference<R<List<OcrResultVo>>>() {
});
if(R.isSuccess(result)&&result.getData()!=null&&result.getData().size()>0){
OcrResultVo item = result.getData().get(0);
if(Objects.nonNull(item.getRecTexts()) && !item.getRecTexts().isEmpty()){
destPath.toFile().delete();
OcrLicenseVo vo = parseOcrLicense(item);
log.info("ocr license {}", JsonUtils.toJsonString(vo));
return vo;
}
}
} else {
log.error("Request failed with HTTP code: " + response.code());
}
}
return null;
}
private Path download(@NotNull Long ossId, String path) {
SysOssVo sysOss = sysOssService.getById(ossId);
if (ObjectUtil.isNull(sysOss)) {
throw new ServiceException("文件数据不存在!");
}
OssClient storage = OssFactory.instance();
Path dest = storage.getObjectContent(sysOss.getFileName(), path);
return dest;
}
public static OcrLicenseVo parseOcrLicense(OcrResultVo item) {
int[][][] nameDtPolyArray = new int[9][2][2];
//计算name文本快的宽度和高度
int leftPoly = Short.MAX_VALUE;
int rightPoly = Short.MAX_VALUE;
int polyHeight = Short.MAX_VALUE;
for (int i = 0; i < item.getRecTexts().size(); i++) {
String value = StringUtils.trimToEmpty(item.getRecTexts().get(i));
if(StringUtils.valueContainsAny(BREAK_NAME_LIST, value)){
break;
}
if(StringUtils.isBlank(value) && StringUtils.valueContainsAny(CONTINUE_NAME_LIST, value)){
continue;
}
short idx;
if(StringUtils.startsWith(value, "名称")){
idx = orgNameIndex;
if(StringUtils.length(value) >= 2){
item.getRecTexts().set(i, StringUtils.substring(value, 2));
}
}else if(StringUtils.startsWith(value, "称")){
idx = capitalIndex;
if(StringUtils.length(value) >= 1){
item.getRecTexts().set(i, StringUtils.substring(value, 1));
}
}else if(StringUtils.startsWith(value, "注册资本")){
idx = capitalIndex;
if(StringUtils.length(value) >= 4){
item.getRecTexts().set(i, StringUtils.substring(value, 4));
}
}else if(StringUtils.startsWith(value, "册资本")){
idx = capitalIndex;
if(StringUtils.length(value) >= 3){
item.getRecTexts().set(i, StringUtils.substring(value, 3));
}
}else if(StringUtils.startsWith(value, "资本")){
idx = capitalIndex;
if(StringUtils.length(value) >= 2){
item.getRecTexts().set(i, StringUtils.substring(value, 2));
}
}else if(StringUtils.startsWith(value, "成立日期")){
idx = establishDateIndex;
if(StringUtils.length(value) >= 4){
item.getRecTexts().set(i, StringUtils.substring(value, 4));
}
} else if(StringUtils.startsWith(value, "立日期")){
idx = establishDateIndex;
if(StringUtils.length(value) >= 3){
item.getRecTexts().set(i, StringUtils.substring(value, 3));
}
} else if(StringUtils.startsWith(value, "营业期限")){
idx = termIndex;
if(StringUtils.length(value) >= 4){
item.getRecTexts().set(i, StringUtils.substring(value, 4));
}
} else if(StringUtils.startsWith(value, "业期限")){
idx = termIndex;
if(StringUtils.length(value) >= 3){
item.getRecTexts().set(i, StringUtils.substring(value, 3));
}
} else if(StringUtils.startsWith(value, "期限")){
idx = termIndex;
if(StringUtils.length(value) >= 2){
item.getRecTexts().set(i, StringUtils.substring(value, 2));
}
} else if(StringUtils.startsWith(value, "法定代表人")){
idx = legalPersonIndex;
if(StringUtils.length(value) >= 5){
item.getRecTexts().set(i, StringUtils.substring(value, 5));
}
} else if(StringUtils.startsWith(value, "定代表人")){
idx = legalPersonIndex;
if(StringUtils.length(value) >= 4){
item.getRecTexts().set(i, StringUtils.substring(value, 4));
}
} else if(StringUtils.startsWith(value, "代表人")){
idx = legalPersonIndex;
if(StringUtils.length(value) >= 3){
item.getRecTexts().set(i, StringUtils.substring(value, 3));
}
} else if(StringUtils.startsWith(value, "经营范围")){
idx = scopeIndex;
if(StringUtils.length(value) >= 4){
item.getRecTexts().set(i, StringUtils.substring(value, 4));
}
} else if(StringUtils.startsWith(value, "营范围")){
idx = scopeIndex;
if(StringUtils.length(value) >= 3){
item.getRecTexts().set(i, StringUtils.substring(value, 3));
}
} else if(StringUtils.startsWith(value, "住所")){
idx = addressIndex;
if(StringUtils.length(value) >= 2){
item.getRecTexts().set(i, StringUtils.substring(value, 2));
}
} else if(StringUtils.startsWith(value, "所")){
idx = addressIndex;
if(StringUtils.length(value) >= 1){
item.getRecTexts().set(i, StringUtils.substring(value, 1));
}
}else if(StringUtils.startsWith(value, "类型")){
idx = orgTypeIndex;
if(StringUtils.length(value) >= 2){
item.getRecTexts().set(i, StringUtils.substring(value, 2));
}
} else if(StringUtils.startsWith(value, "型")){
idx = orgTypeIndex;
if(StringUtils.length(value) >= 1){
item.getRecTexts().set(i, StringUtils.substring(value, 1));
}
} else {
idx = getValueIndexByName(value);
}
if(idx >= 0){
int[][] dtPoly = item.getDtPolys().get(i);
nameDtPolyArray[idx] = dtPoly;
int x0 = dtPoly[0][0];
int y0 = dtPoly[0][1];
int x1 = dtPoly[1][0];
int y3 = dtPoly[3][1];
int height = y3 - y0;
if(x0 < leftPoly){
leftPoly = x0;
}
if(x1 < rightPoly){
rightPoly = x1;
}
if(height < polyHeight){
polyHeight = height;
}
}
}
int nameWidth = rightPoly - leftPoly;
int nameWidthThreshold = nameWidth + nameWidth / 4;
int heightDiffMin = -1 * polyHeight * 4 /5 ;
int heightDiffMax = polyHeight + polyHeight/5 ;
log.info("横向宽度 {} 阈值 {}", nameWidth, nameWidthThreshold);
String[] licenseValues = new String[9];
for (int i = 0; i < item.getRecTexts().size(); i++) {
String value = StringUtils.trimToEmpty(item.getRecTexts().get(i));
if(StringUtils.valueContainsAny(BREAK_NAME_LIST, value)){
break;
}
if(StringUtils.valueEqualsAnyIgnoreCase(LICENSE_NAME_LIST, value)){
continue;
}
if(StringUtils.isBlank(value)|| StringUtils.valueContainsAny(CONTINUE_NAME_LIST, value)){
continue;
}
if (LICENSE_PATTERN_SIMPLE.matcher(value).matches()) {
licenseValues[creditCodeIndex] = value;
log.info("peek {} {} {} {}", i, value, 0, LICENSE_NAME_LIST.get(0));
} else {
int[][] valuePoly = item.getDtPolys().get(i);
//查找最匹配的name index
int valueIndex = -1;
int minValueNameDiffX = Short.MAX_VALUE;
int minValueNameDiffy = Short.MAX_VALUE;
int[][] lastScopeValuePoly = null;
//index=0是营业执照,通过正则获取,所以这里从1开始
for (int index = 1; index < nameDtPolyArray.length; index++) {
int[][] namePoly = nameDtPolyArray[index];
if(Objects.isNull(namePoly) || namePoly.length !=4){
log.debug("skip not exist name {} {} {} {}", i, value, index, LICENSE_NAME_LIST.get(index));
continue;
}
try{
int valueNameDiffx = valuePoly[0][0] - namePoly[0][0];
int valueNameDiffy = valuePoly[3][1] - namePoly[3][1];
if(valueNameDiffx == 0 && valueNameDiffy == 0){
log.info("name ploy equals value ploy {} {} {} {}", i, index, LICENSE_NAME_LIST.get(index), value);
valueIndex = index;
break;
}
if(0 <= valueNameDiffx && valueNameDiffx <nameWidthThreshold && heightDiffMin < valueNameDiffy){
if(index == scopeIndex){
log.info("set scope {} {} {} {}", i, value, valueIndex, LICENSE_NAME_LIST.get(index));
if( (Objects.isNull(lastScopeValuePoly) || valuePoly[3][1] - lastScopeValuePoly[3][1]<heightDiffMax)){
if(valueNameDiffx <= minValueNameDiffX && valueNameDiffy < minValueNameDiffy){
minValueNameDiffX = valueNameDiffx;
minValueNameDiffy = valueNameDiffy;
valueIndex = index;
lastScopeValuePoly = namePoly;
}
}
} else if((valueNameDiffx < minValueNameDiffX || valueNameDiffx - minValueNameDiffX < 10) && minValueNameDiffy < valueNameDiffy && valueNameDiffy < heightDiffMax){
minValueNameDiffX = valueNameDiffx;
minValueNameDiffy = valueNameDiffy;
valueIndex = index;
}
} else {
log.info("skip {} {} {} {}", i, value, index, LICENSE_NAME_LIST.get(index));
}
} catch (Exception ex) {
log.error("fail {} {} {} {}", i, value, index, LICENSE_NAME_LIST.get(index), ex);
}
}
if(valueIndex > 0){
log.info("set value {} {} {} {}", i, value, valueIndex, LICENSE_NAME_LIST.get(valueIndex));
licenseValues[valueIndex] = StringUtils.trimToEmpty(licenseValues[valueIndex]) + value;
}
}
}
OcrLicenseVo ocrLicenseVo = new OcrLicenseVo();
ocrLicenseVo.setCreditCode(licenseValues[creditCodeIndex]);
ocrLicenseVo.setOrgName(licenseValues[orgNameIndex]);
ocrLicenseVo.setCapital(licenseValues[capitalIndex]);
ocrLicenseVo.setOrgType(licenseValues[orgTypeIndex]);
ocrLicenseVo.setEstablishDate(licenseValues[establishDateIndex]);
ocrLicenseVo.setLegalPerson(licenseValues[legalPersonIndex]);
ocrLicenseVo.setAddress(licenseValues[addressIndex]);
ocrLicenseVo.setScope(licenseValues[scopeIndex]);
ocrLicenseVo.setTerm(licenseValues[termIndex]);
return ocrLicenseVo;
}
private static short getValueIndexByName(String value) {
short index = -1;
switch (StringUtils.trimToEmpty(value)) {
case creditCodeTitle:
index = creditCodeIndex;
break;
case orgNameTitle:
case orgNameTitle2:
index = orgNameIndex;
break;
case capitalTitle:
case capitalTitle2:
case capitalTitle3:
index = capitalIndex;
break;
case orgTypeTitle:
case orgTypeTitle2:
index = orgTypeIndex;
break;
case establishDateTitle:
case establishDateTitle2:
index = establishDateIndex;
break;
case legalPersonTitle:
case legalPersonTitle2:
case legalPersonTitle3:
index = legalPersonIndex;
break;
case addressTitle:
index = addressIndex;
break;
case scopeTitle:
case scopeTitle2:
index = scopeIndex;
break;
case termTitle:
case termTitle2:
case termTitle3:
index = termIndex;
break;
default:
break;
}
return index;
}
}