OcrLicenseVo

public static OcrLicenseVo parseOcrLicense(OcrResultVo item) {
        int[][][] dtPolyList = new int[9][2][2];
        for (int i = 0; i < item.getRecTexts().size(); i++) {
            String value = StringUtils.trimToEmpty(item.getRecTexts().get(i));
            if(StringUtils.anyContains(BREAK_NAME, value)){
                break;
            }
            if(StringUtils.isBlank(value) && StringUtils.anyContains(CONTINUE_NAME, value)){
                continue;
            }
            short idx;
            if(StringUtils.startsWith(value, "称")){
                idx = orgNameIndex;
                int[][] dtPoly = null;
                if(StringUtils.length(value) > 1){
                    item.getRecTexts().set(i, StringUtils.substring(value, 1));
                    dtPoly = item.getDtPolys().get(i-1);
                } else {
                    dtPoly = item.getDtPolys().get(i);
                }
                dtPolyList[idx] = dtPoly;
            }else if(StringUtils.startsWith(value, "注册资本")){
                idx = capitalIndex;
                if(StringUtils.length(value) > 4){
                    item.getRecTexts().set(i, StringUtils.substring(value, 4));
                }
                int[][] dtPoly = item.getDtPolys().get(i);
                dtPolyList[idx] = dtPoly;
            }else if(StringUtils.startsWith(value, "成立日期")){
                idx = establishDateIndex;
                int[][] dtPoly = item.getDtPolys().get(i);
                if(StringUtils.length(value) > 4){
                    item.getRecTexts().set(i, StringUtils.substring(value, 4));
                }
                dtPolyList[idx] = dtPoly;
            } else if(StringUtils.startsWith(value, "营业期限")){
                idx = termIndex;
                int[][] dtPoly = item.getDtPolys().get(i);
                if(StringUtils.length(value) > 4){
                    item.getRecTexts().set(i, StringUtils.substring(value, 4));
                }
                dtPolyList[idx] = dtPoly;
            } else if(StringUtils.startsWith(value, "法定代表人")){
                idx = legalPersonIndex;
                int[][] dtPoly = item.getDtPolys().get(i);
                if(StringUtils.length(value) > 5){
                    item.getRecTexts().set(i, StringUtils.substring(value, 5));
                }
                dtPolyList[idx] = dtPoly;
            } else if(StringUtils.startsWith(value, "定代表人")){
                idx = legalPersonIndex;
                int[][] dtPoly = item.getDtPolys().get(i);
                if(StringUtils.length(value) > 4){
                    item.getRecTexts().set(i, StringUtils.substring(value, 4));
                }
                dtPolyList[idx] = dtPoly;
            } else if(StringUtils.startsWith(value, "经营范围")){
                idx = scopeIndex;
                int[][] dtPoly = item.getDtPolys().get(i);
                if(StringUtils.length(value) > 4){
                    item.getRecTexts().set(i, StringUtils.substring(value, 4));
                }
                dtPolyList[idx] = dtPoly;
            } else if(StringUtils.startsWith(value, "所")){
                idx = addressIndex;
                int[][] dtPoly = null;
                if(StringUtils.length(value) > 1){
                    item.getRecTexts().set(i, StringUtils.substring(value, 1));
                    dtPoly = item.getDtPolys().get(i-1);
                } else {
                    dtPoly = item.getDtPolys().get(i);
                }
                dtPolyList[idx] = dtPoly;
            }else if(StringUtils.startsWith(value, "型")){
                idx = orgTypeIndex;
                int[][] dtPoly = null;
                if(StringUtils.length(value) > 1){
                    item.getRecTexts().set(i, StringUtils.substring(value, 1));
                    dtPoly = item.getDtPolys().get(i-1);
                } else {
                    dtPoly = item.getDtPolys().get(i);
                }
                dtPolyList[idx] = dtPoly;
            } else {
                idx = getValueIndex(value);
                if(idx >= 0){
                    int[][] dtPoly = item.getDtPolys().get(i);
                    dtPolyList[idx] = dtPoly;
                }
            }
        }

        String[] licenseValues = new String[9];
        for (int i = 0; i < item.getRecTexts().size(); i++) {
            String value = StringUtils.trimToEmpty(item.getRecTexts().get(i));
            if(StringUtils.anyContains(BREAK_NAME, value)){
                break;
            }
            if(StringUtils.isBlank(value) && StringUtils.anyContains(CONTINUE_NAME, value)){
                continue;
            }
            if (LICENSE_PATTERN_SIMPLE.matcher(value).matches()) {
                licenseValues[creditCodeIndex] = value;
                log.info("scan value {} {} {}", i, 0, value);
            } else {
                int[][] currentPoly = item.getDtPolys().get(i);
                for (int index = 0; index < dtPolyList.length; index++) {
                    int[][] dtPoly = dtPolyList[index];
                    int hdiff = Math.abs(currentPoly[0][0] - dtPoly[0][0]);
                    int vdiff = Math.abs(currentPoly[3][1] - dtPoly[3][1]);
                    if(hdiff == 0 && vdiff == 0){
                        log.info("scan name {} {} {}", i, index, value);
                        break;
                    }
                    //看2.png 类和名(称}第0个坐标的横坐标太近,不是名称
                    //看3.png 械和 住所第0个坐标的横坐标太远,不是地址
                    if(Objects.nonNull(dtPoly) && hdiff <50 && vdiff <80){
                        //index=0是营业执照,通过正则获取
                        if(index == 0){
                            continue;
                        }
                        licenseValues[index] = StringUtils.trimToEmpty(licenseValues[index]) + value;
                    } else {
                        log.info("skip {} {} {}", i, index, value);
                    }
                }
            }
        }

        OcrLicenseVo ocrLicenseVo = new OcrLicenseVo();
        ocrLicenseVo.setCreditCode(licenseValues[creditCodeIndex]);
        ocrLicenseVo.setOrgName(licenseValues[orgNameIndex]);
        ocrLicenseVo.setCapital(licenseValues[capitalIndex]);
        ocrLicenseVo.setOrgType(licenseValues[orgTypeIndex]);
        ocrLicenseVo.setEstablishDate(licenseValues[establishDateIndex]);
        ocrLicenseVo.setLegalPerson(licenseValues[legalPersonIndex]);
        ocrLicenseVo.setAddress(licenseValues[addressIndex]);
        ocrLicenseVo.setScope(licenseValues[scopeIndex]);
        ocrLicenseVo.setTerm(licenseValues[termIndex]);
        return ocrLicenseVo;
    }

 

package com.nvxclouds.baize.breeze.system.service.impl;

import cn.hutool.core.util.ObjectUtil;
import com.fasterxml.jackson.core.type.TypeReference;
import com.nvxclouds.baize.breeze.common.core.domain.R;
import com.nvxclouds.baize.breeze.common.core.exception.ServiceException;
import com.nvxclouds.baize.breeze.common.core.utils.StringUtils;
import com.nvxclouds.baize.breeze.common.json.utils.JsonObject;
import com.nvxclouds.baize.breeze.common.json.utils.JsonUtils;
import com.nvxclouds.baize.breeze.common.oss.core.OssClient;
import com.nvxclouds.baize.breeze.common.oss.factory.OssFactory;
import com.nvxclouds.baize.breeze.common.web.utils.ResourceUtil;
import com.nvxclouds.baize.breeze.system.domain.vo.OcrLicenseVo;
import com.nvxclouds.baize.breeze.system.domain.vo.OcrResultVo;
import com.nvxclouds.baize.breeze.system.domain.vo.SysOssVo;
import com.nvxclouds.baize.breeze.system.service.IFantasiaService;
import com.nvxclouds.baize.breeze.system.service.ISysOssService;
import jakarta.validation.constraints.NotNull;
import lombok.extern.slf4j.Slf4j;
import okhttp3.*;
import okhttp3.Request.Builder;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;

import java.io.IOException;
import java.nio.file.Path;
import java.util.List;
import java.util.Objects;
import java.util.concurrent.TimeUnit;
import java.util.regex.Pattern;

import static com.nvxclouds.baize.breeze.system.constants.OrcLicenseConstant.*;

/**
 * Copyright (c) 2025 NVXClouds.Co.Ltd. All rights reserved.
 *
 * @author muzhi
 * @version 3.0.0
 * @date 2025-09-23 23:15
 */
@Slf4j
@Service
public class FantasiaServiceImpl implements IFantasiaService {
    private static final MediaType mediaType = MediaType.parse("application/json; charset=utf-8");
    private static final OkHttpClient CLIENT = new OkHttpClient.Builder()
        .connectTimeout(10, TimeUnit.SECONDS).writeTimeout(30, TimeUnit.SECONDS).readTimeout(30, TimeUnit.SECONDS)
        .build();

    @Value("${app.fantasia-ocr-mock-enabled}")
    private boolean fantasiaOcrMockEnabled;
    @Value("${app.fantasia-ocr-url}")
    private String fantasiaOcrUrl;
    @Value("${app.fantasia-ocr-input-path}")
    private String fantasiaOcrInputPath;
    @Value("${app.fantasia-ocr-output-path}")
    private String fantasiaOcrOutputPath;
    @Autowired
    private ISysOssService sysOssService;

    /**
     * 统一社会信用代码正则 + 校验位验证
     * 18 位:登记管理部门码 1 位 + 机构类别码 1 位 + 登记管理机关行政区划码 6 位
     *       + 主体标识码 9 位(组织机构代码)+ 校验码 1 位
     */
    public static final Pattern LICENSE_PATTERN_STRICT = Pattern.compile("^[1-9A-HJ-NP-Z]{2}\\d{6}[0-9A-HJ-NP-Z]{9}[0-9A-HJ-NP-TV-Y]$");
    // 18 位,第 1 位是 1-9 的数字或大写字母,后 17 位只能是数字或大写字母
    public static final Pattern LICENSE_PATTERN_SIMPLE = Pattern.compile("^[1-9A-HJ-NP-Z]{2}[0-9A-HJ-NP-Z]{16}$");

    @Override
    public OcrLicenseVo ocrLicense(@NotNull Long ossId) throws IOException {
        if(fantasiaOcrMockEnabled){
            String responseBody = ResourceUtil.getResourceAsString("/mock_ocr_business_license.json");
            OcrResultVo item = JsonUtils.parseObject(responseBody, new TypeReference<OcrResultVo>() {
            });
            if(Objects.nonNull(item)){
                if(Objects.nonNull(item.getRecTexts()) && !item.getRecTexts().isEmpty()){
                    return parseOcrLicense(item);
                }
            }
        }
        Path destPath = download(ossId, fantasiaOcrInputPath);
        JsonObject requestData = new JsonObject().put("file_path", destPath.toString());
        RequestBody body = RequestBody.create(JsonUtils.toJsonString(requestData), mediaType);
        Request request = new Builder().url(fantasiaOcrUrl).post(body).build();

        try (Response response = CLIENT.newCall(request).execute()) {
            if (response.isSuccessful()) {
                String responseBody = response.body().string();
                R<List<OcrResultVo>> result = JsonUtils.parseObject(responseBody, new TypeReference<R<List<OcrResultVo>>>() {
                });
                if(R.isSuccess(result)&&result.getData()!=null&&result.getData().size()>0){
                    OcrResultVo item = result.getData().get(0);
                    if(Objects.nonNull(item.getRecTexts()) && !item.getRecTexts().isEmpty()){
                        destPath.toFile().delete();
                        OcrLicenseVo vo = parseOcrLicense(item);
                        log.info("ocr license {}", JsonUtils.toJsonString(vo));
                        return vo;
                    }
                }
            } else {
                log.error("Request failed with HTTP code: " + response.code());
            }
        }
        return null;
    }

    private Path download(@NotNull Long ossId, String path) {
        SysOssVo sysOss = sysOssService.getById(ossId);
        if (ObjectUtil.isNull(sysOss)) {
            throw new ServiceException("文件数据不存在!");
        }
        OssClient storage = OssFactory.instance();
        Path dest = storage.getObjectContent(sysOss.getFileName(), path);
        return dest;
    }

    public static OcrLicenseVo parseOcrLicense(OcrResultVo item) {
        int[][][] nameDtPolyArray = new int[9][2][2];
        //计算name文本快的宽度和高度
        int leftPoly = Short.MAX_VALUE;
        int rightPoly = Short.MAX_VALUE;
        int polyHeight = Short.MAX_VALUE;
        for (int i = 0; i < item.getRecTexts().size(); i++) {
            String value = StringUtils.trimToEmpty(item.getRecTexts().get(i));
            if(StringUtils.valueContainsAny(BREAK_NAME_LIST, value)){
                break;
            }
            if(StringUtils.isBlank(value) && StringUtils.valueContainsAny(CONTINUE_NAME_LIST, value)){
                continue;
            }
            short idx;
            if(StringUtils.startsWith(value, "名称")){
                idx = orgNameIndex;
                if(StringUtils.length(value) >= 2){
                    item.getRecTexts().set(i, StringUtils.substring(value, 2));
                }
            }else if(StringUtils.startsWith(value, "称")){
                idx = capitalIndex;
                if(StringUtils.length(value) >= 1){
                    item.getRecTexts().set(i, StringUtils.substring(value, 1));
                }
            }else if(StringUtils.startsWith(value, "注册资本")){
                idx = capitalIndex;
                if(StringUtils.length(value) >= 4){
                    item.getRecTexts().set(i, StringUtils.substring(value, 4));
                }
            }else if(StringUtils.startsWith(value, "册资本")){
                idx = capitalIndex;
                if(StringUtils.length(value) >= 3){
                    item.getRecTexts().set(i, StringUtils.substring(value, 3));
                }
            }else if(StringUtils.startsWith(value, "资本")){
                idx = capitalIndex;
                if(StringUtils.length(value) >= 2){
                    item.getRecTexts().set(i, StringUtils.substring(value, 2));
                }
            }else if(StringUtils.startsWith(value, "成立日期")){
                idx = establishDateIndex;
                if(StringUtils.length(value) >= 4){
                    item.getRecTexts().set(i, StringUtils.substring(value, 4));
                }
            } else if(StringUtils.startsWith(value, "立日期")){
                idx = establishDateIndex;
                if(StringUtils.length(value) >= 3){
                    item.getRecTexts().set(i, StringUtils.substring(value, 3));
                }
            } else if(StringUtils.startsWith(value, "营业期限")){
                idx = termIndex;
                if(StringUtils.length(value) >= 4){
                    item.getRecTexts().set(i, StringUtils.substring(value, 4));
                }
            } else if(StringUtils.startsWith(value, "业期限")){
                idx = termIndex;
                if(StringUtils.length(value) >= 3){
                    item.getRecTexts().set(i, StringUtils.substring(value, 3));
                }
            } else if(StringUtils.startsWith(value, "期限")){
                idx = termIndex;
                if(StringUtils.length(value) >= 2){
                    item.getRecTexts().set(i, StringUtils.substring(value, 2));
                }
            } else if(StringUtils.startsWith(value, "法定代表人")){
                idx = legalPersonIndex;
                if(StringUtils.length(value) >= 5){
                    item.getRecTexts().set(i, StringUtils.substring(value, 5));
                }
            } else if(StringUtils.startsWith(value, "定代表人")){
                idx = legalPersonIndex;
                if(StringUtils.length(value) >= 4){
                    item.getRecTexts().set(i, StringUtils.substring(value, 4));
                }
            } else if(StringUtils.startsWith(value, "代表人")){
                idx = legalPersonIndex;
                if(StringUtils.length(value) >= 3){
                    item.getRecTexts().set(i, StringUtils.substring(value, 3));
                }
            } else if(StringUtils.startsWith(value, "经营范围")){
                idx = scopeIndex;
                if(StringUtils.length(value) >= 4){
                    item.getRecTexts().set(i, StringUtils.substring(value, 4));
                }
            } else if(StringUtils.startsWith(value, "营范围")){
                idx = scopeIndex;
                if(StringUtils.length(value) >= 3){
                    item.getRecTexts().set(i, StringUtils.substring(value, 3));
                }
            } else if(StringUtils.startsWith(value, "住所")){
                idx = addressIndex;
                if(StringUtils.length(value) >= 2){
                    item.getRecTexts().set(i, StringUtils.substring(value, 2));
                }
            } else if(StringUtils.startsWith(value, "所")){
                idx = addressIndex;
                if(StringUtils.length(value) >= 1){
                    item.getRecTexts().set(i, StringUtils.substring(value, 1));
                }
            }else if(StringUtils.startsWith(value, "类型")){
                idx = orgTypeIndex;
                if(StringUtils.length(value) >= 2){
                    item.getRecTexts().set(i, StringUtils.substring(value, 2));
                }
            } else if(StringUtils.startsWith(value, "型")){
                idx = orgTypeIndex;
                if(StringUtils.length(value) >= 1){
                    item.getRecTexts().set(i, StringUtils.substring(value, 1));
                }
            } else {
                idx = getValueIndexByName(value);
            }
            if(idx >= 0){
                int[][] dtPoly = item.getDtPolys().get(i);
                nameDtPolyArray[idx] = dtPoly;
                int x0 = dtPoly[0][0];
                int y0 = dtPoly[0][1];
                int x1 = dtPoly[1][0];
                int y3 = dtPoly[3][1];
                int height = y3 - y0;
                if(x0 < leftPoly){
                    leftPoly = x0;
                }
                if(x1 < rightPoly){
                    rightPoly = x1;
                }

                if(height < polyHeight){
                    polyHeight = height;
                }
            }
        }

        int nameWidth = rightPoly - leftPoly;
        int nameWidthThreshold = nameWidth + nameWidth / 4;
        int heightDiffMin = -1 * polyHeight * 4 /5 ;
        int heightDiffMax = polyHeight + polyHeight/5 ;
        log.info("横向宽度 {} 阈值 {}", nameWidth, nameWidthThreshold);

        String[] licenseValues = new String[9];
        for (int i = 0; i < item.getRecTexts().size(); i++) {
            String value = StringUtils.trimToEmpty(item.getRecTexts().get(i));
            if(StringUtils.valueContainsAny(BREAK_NAME_LIST, value)){
                break;
            }
            if(StringUtils.valueEqualsAnyIgnoreCase(LICENSE_NAME_LIST, value)){
                continue;
            }
            if(StringUtils.isBlank(value)|| StringUtils.valueContainsAny(CONTINUE_NAME_LIST, value)){
                continue;
            }
            if (LICENSE_PATTERN_SIMPLE.matcher(value).matches()) {
                licenseValues[creditCodeIndex] = value;
                log.info("peek  {} {} {} {}", i, value, 0, LICENSE_NAME_LIST.get(0));
            } else {
                int[][] valuePoly = item.getDtPolys().get(i);

                //查找最匹配的name index
                int valueIndex = -1;
                int minValueNameDiffX = Short.MAX_VALUE;
                int minValueNameDiffy = Short.MAX_VALUE;
                int[][] lastScopeValuePoly = null;
                //index=0是营业执照,通过正则获取,所以这里从1开始
                for (int index = 1; index < nameDtPolyArray.length; index++) {
                    int[][] namePoly = nameDtPolyArray[index];
                    if(Objects.isNull(namePoly) || namePoly.length !=4){
                        log.debug("skip not exist name {} {} {} {}", i, value, index, LICENSE_NAME_LIST.get(index));
                        continue;
                    }
                    try{
                        int valueNameDiffx = valuePoly[0][0] - namePoly[0][0];
                        int valueNameDiffy = valuePoly[3][1] - namePoly[3][1];
                        if(valueNameDiffx == 0 && valueNameDiffy == 0){
                            log.info("name ploy equals value ploy  {} {} {} {}", i, index, LICENSE_NAME_LIST.get(index), value);
                            valueIndex = index;
                            break;
                        }

                        if(0 <= valueNameDiffx && valueNameDiffx <nameWidthThreshold && heightDiffMin < valueNameDiffy){
                            if(index == scopeIndex){
                                log.info("set scope {} {} {} {}", i, value, valueIndex, LICENSE_NAME_LIST.get(index));
                                if( (Objects.isNull(lastScopeValuePoly) || valuePoly[3][1] - lastScopeValuePoly[3][1]<heightDiffMax)){
                                    if(valueNameDiffx <= minValueNameDiffX && valueNameDiffy < minValueNameDiffy){
                                        minValueNameDiffX = valueNameDiffx;
                                        minValueNameDiffy = valueNameDiffy;
                                        valueIndex = index;
                                        lastScopeValuePoly = namePoly;
                                    }
                                }
                            } else if((valueNameDiffx < minValueNameDiffX || valueNameDiffx - minValueNameDiffX < 10) && minValueNameDiffy < valueNameDiffy && valueNameDiffy < heightDiffMax){
                                minValueNameDiffX = valueNameDiffx;
                                minValueNameDiffy = valueNameDiffy;
                                valueIndex = index;
                            }
                        } else {
                            log.info("skip {} {} {} {}", i, value, index, LICENSE_NAME_LIST.get(index));
                        }
                    } catch (Exception ex) {
                        log.error("fail {} {} {} {}", i, value, index, LICENSE_NAME_LIST.get(index), ex);
                    }
                }
                if(valueIndex > 0){
                    log.info("set value {} {} {} {}", i, value, valueIndex, LICENSE_NAME_LIST.get(valueIndex));
                    licenseValues[valueIndex] = StringUtils.trimToEmpty(licenseValues[valueIndex]) + value;
                }
            }
        }

        OcrLicenseVo ocrLicenseVo = new OcrLicenseVo();
        ocrLicenseVo.setCreditCode(licenseValues[creditCodeIndex]);
        ocrLicenseVo.setOrgName(licenseValues[orgNameIndex]);
        ocrLicenseVo.setCapital(licenseValues[capitalIndex]);
        ocrLicenseVo.setOrgType(licenseValues[orgTypeIndex]);
        ocrLicenseVo.setEstablishDate(licenseValues[establishDateIndex]);
        ocrLicenseVo.setLegalPerson(licenseValues[legalPersonIndex]);
        ocrLicenseVo.setAddress(licenseValues[addressIndex]);
        ocrLicenseVo.setScope(licenseValues[scopeIndex]);
        ocrLicenseVo.setTerm(licenseValues[termIndex]);
        return ocrLicenseVo;
    }

    private static short getValueIndexByName(String value) {
        short index = -1;
        switch (StringUtils.trimToEmpty(value)) {
            case creditCodeTitle:
                index = creditCodeIndex;
                break;
            case orgNameTitle:
            case orgNameTitle2:
                index = orgNameIndex;
                break;
            case capitalTitle:
            case capitalTitle2:
            case capitalTitle3:
                index = capitalIndex;
                break;
            case orgTypeTitle:
            case orgTypeTitle2:
                index = orgTypeIndex;
                break;
            case establishDateTitle:
            case establishDateTitle2:
                index = establishDateIndex;
                break;
            case legalPersonTitle:
            case legalPersonTitle2:
            case legalPersonTitle3:
                index = legalPersonIndex;
                break;
            case addressTitle:
                index = addressIndex;
                break;
            case scopeTitle:
            case scopeTitle2:
                index = scopeIndex;
                break;
            case termTitle:
            case termTitle2:
            case termTitle3:
                index = termIndex;
                break;
            default:
                break;
        }
        return index;
    }
}

 

posted @ 2025-09-26 10:23  牧之丨  阅读(8)  评论(0)    收藏  举报