easyExcel分批导入文件

 

原文地址 : https://blog.csdn.net/qq_42060055/article/details/116139016

 

一些关于easyExcel导入文件操作

需求: 导入大数据量文件 其中数据达到万级、十万级, 错误文件进行错误单元格标红, 可导出修改完继续导入

由于数据量多大 一次行全部读到内存中可能会导致内存溢出问题

使用easyExcel poi的监听器进行操作

三步曲:

1、解析excel为inputStream流, 读取流,解析excel

2、判断excel中每条数据的格式, 正确和错误相对记录

3、通过监听器每解析150条数据, 进行入库操作, 错误数据存在内存中(考虑错误数据不多的情况)

// 这里用到ossfs 反正就是读取excel为input流,
涉及到两个系统之间流的传输, 这里直接把文件上传到oss
try {
    in = new FileInputStream(localFileName);
} catch (FileNotFoundException e) {
    in = HttpUtil.io(HttpUtil.Atom.builder().url(diseaseDto.getFileUrl()).build());
}

  

// 这里解析excel其中
OltHosIcdDiseaseListener为自定义监听器
try {
    LoggerUtil.info(LOGGER, "开始解析IcdDisease");
    OltHosIcdDiseaseListener oltHosIcdDiseaseListener = new OltHosIcdDiseaseListener(isCfgPrd, icdCodeList, delIcdCodeList, diseaseDto, oltConfigService, exportTaskHandler);
    excelReader = EasyExcel.read(in, oltHosIcdDiseaseListener).build();
    ReadSheet readSheet = EasyExcel.readSheet(0).build();
    excelReader.read(readSheet);
} finally {
    try {
        if (in != null) {
            in.close();
        }
        if (excelReader != null) {
            // 这里千万别忘记关闭,读的时候会创建临时文件,到时磁盘会崩的
            excelReader.finish();
        }
    } catch (Exception e) {
        LoggerUtil.error(LOGGER, "{0},{1}", e.getMessage(), e);
    }
}

  

// 通过构造函数, 初始化一些list与对象
// 这个是导入的核心方法, 所有的导入逻辑, 判断逻辑与入库都在这里操作
// 采用无对象方式
@Slf4j
public class OltHosIcdDiseaseListener extends AnalysisEventListener<Map<Integer, String>> {
    private OltConfigService oltConfigService;
    private ExportTaskHandler exportTaskHandler;
    private static final int batchCount = 150;
    private int countNum = 0;
    private boolean isCfgPrd;
    private int successCount = 0;
    private int errorCount = 0;
 
    private List<String> checkRepeatCode = new ArrayList<>();
    private List<String> icdCodeList;
    private List<String> delIcdCodeList;
    private OltHosIcdDiseaseDto diseaseDto;
    private List<OltHosIcdDiseaseDto> successList = new ArrayList<>();
    private List<OltHosIcdDiseaseDto> errorList = new ArrayList<>();
    private List<OltHosIcdDiseaseDto> tempErrorList = new ArrayList<>();
 
 
    public OltHosIcdDiseaseListener(boolean isCfgPrd, List<String> icdCodeList, List<String> delIcdCodeList, OltHosIcdDiseaseDto diseaseDto,
                                    OltConfigService oltConfigService, ExportTaskHandler exportTaskHandler) {
        this.isCfgPrd = isCfgPrd;
        this.icdCodeList = icdCodeList;
        this.delIcdCodeList = delIcdCodeList;
        this.diseaseDto = diseaseDto;
        this.oltConfigService = oltConfigService;
        this.exportTaskHandler = exportTaskHandler;
    }
 
/**
  * 这个每一条数据解析都会来调用
  * data --> 实体类
  * analysisContext excel信息
 */
@Override
public void invoke(Map<Integer, String> data, AnalysisContext context) {
    int rouNumber = context.readRowHolder().getRowIndex() + 1;
 
    // 这里是因为表头在第二行
    if (rouNumber == 2) {
        // 这里是校验表头
        checkExcelHead(data);
    } else if (rouNumber > 2) {
        // 这里是校验数据
        checkReadData(data);
    }
 
    // 超过150条就先入库
    if (countNum >= batchCount) {
        // 处理excel导出的正确数据
        batchOperateData();
    }
    countNum++;
}
 
/**
 * @author songhc
 * @create 
 * @desc 调用完成监听, 确保数据已全部处理完
 **/
@Override
public void doAfterAllAnalysed(AnalysisContext analysisContext) {
    // 这里也要保存数据,确保最后遗留的数据也存储到数据库
    Map<String, Object> objMap = new HashMap<>();
    // 处理excel导出的正确数据
    batchOperateData();
 
    // 错误数据填充oss表格
    Object object = uploadErrorData(errorList, diseaseDto);
 
    objMap.put("errorInfo", object);
    objMap.put("successCount", successCount);
    objMap.put("errorCount", errorCount);
 
    // 错误数据记录redis, 下次使用
    RedisStringHandler.set(String.format(RedisKeyConstants.EXPORT_ERROR_RESULT, "disease" + diseaseDto.getUserId() + "_" + diseaseDto.getRgId() + "_" + diseaseDto.getHosId()), JSONObject.toJSONString(objMap));
}
 
// 这里是封装所有的错误数据
// 包括封装单元格
private Object uploadErrorData (List<OltHosIcdDiseaseDto> errorList, OltHosIcdDiseaseDto dto) {
    Map<Integer, List<Integer>> map = new HashMap<>();
    LinkedList<OltHosIcdDiseaseDto> newErrorList = new LinkedList<>();
    if (CollectionUtils.isNotEmpty(errorList)) {
        for (int i = 0; i < errorList.size(); i++) {
            OltHosIcdDiseaseDto e = errorList.get(i);
            List<Integer> integerList = new ArrayList<>();
            if (e.getErrorReasonMap() != null && !e.getErrorReasonMap().isEmpty()) {
                List<String> reasonList = new ArrayList<>();
                for (Integer key: e.getErrorReasonMap().keySet()) {
                    // 标红单元格
                    integerList.add(key);
                    reasonList.add(e.getErrorReasonMap().get(key));
                }
                map.put(i + 2, integerList);
                e.setErrorReason(String.join("、", reasonList));
            }
            newErrorList.add(e);
        }
    }
 
 
    // 封装导出服务入参
    String uuid = UUIDUtil.create();
    String errorFileName = dto.getHosName() + "(待处理诊断数据)" + dto.getStatDataStr() + ".xlsx";
    SysExportRecordDto sysExportRecordDto = SysExportRecordDto.builder().batchId(uuid).userId(dto.getUserId()).pfCode(dto.getPfCode())
            .source(dto.getSource()).fileName(errorFileName).creator(dto.getCreator()).operator(dto.getCreator()).build();
    // 创建导出记录
    QueueHandler.createTaskRecord(sysExportRecordDto);
    // 获取url
    // 伪代码
    String fileName = "aaa.xlsx";
    String BUCKET_NAME = "bbb";
    String fileUrl = String.format(OssClientConfig.OSS_REAL_PATH, BUCKET_NAME,
            UploadFileType.getFolderByType(UploadFileType.REPORT)).concat(fileName);
    // 加入异步线程任务
    this.exportTaskHandler.exportIcdErrorDiseaseData(OltErrorResult.builder().map(map).errorList(newErrorList)
                    .fileName(errorFileName).source(dto.getSource()).build(),
            uuid, errorFileName, fileUrl);
    // 构建返回队列信息
    return QueueHandler.buildQueueInfo(sysExportRecordDto);
}
 
private void batchOperateData() {
    checkErrorExcelList(tempErrorList, icdCodeList);
    checkSuccessExcelList(successList, tempErrorList, icdCodeList);
 
    // 将临时错误数据存储到所有错误数据列表
    this.errorList.addAll(tempErrorList);
    // 清理list
    this.successList.clear();
    this.tempErrorList.clear();
    this.countNum = 0;
}
 
private void checkExcelHead(Map<Integer, String> data) {
    boolean templateFlag = true;
    // 第二行  校验excel标题
    try {
        String diseaseCategoryStr = data.get(0);
        if (StringUtils.isBlank(diseaseCategoryStr) || !"诊eee(必填)".equals(diseaseCategoryStr)) {
            templateFlag = false;
        }
    } catch (Exception e) {
        templateFlag = false;
    }
    try {
        String icdNameStr = data.get(1);
        if (StringUtils.isBlank(icdNameStr) || !"医vv称(必填)".equals(icdNameStr)) {
            templateFlag = false;
        }
    } catch (Exception e) {
        templateFlag = false;
    }
    try {
        String icdCodeStr = data.get(2);
        if (StringUtils.isBlank(icdCodeStr) || !"医aa(必填)".equals(icdCodeStr)) {
            templateFlag = false;
        }
    } catch (Exception e) {
        templateFlag = false;
    }
    if (!templateFlag) {
        throw new PlatException("文件模版不匹配");
    }
}
 
private void checkReadData(Map<Integer, String> data) {
    // 循环cell
    OltHosIcdDiseaseDto temDisDto = OltHosIcdDiseaseDto.buildDefault();
    temDisDto.setHosId(diseaseDto.getHosId());
    // key为所在的列, value为错误原因
    Map<Integer, String> map = new HashMap<>();
    boolean flag = true;
    try {
        // 解析第二列
        String diseaseCategory = data.get(0);
        if (StringUtils.isBlank(diseaseCategory)) {
            temDisDto.setDiseaseCategoryStr(StringUtils.EMPTY);
            map.put(0, "aaa为空");
            flag = false;
        } else {
            temDisDto.setDiseaseCategoryStr(diseaseCategory);
        }
    } catch (Exception e) {
        temDisDto.setDiseaseCategoryStr(StringUtils.EMPTY);
        map.put(0, "bbb为空");
        flag = false;
    }
 
    try {
        String icdName = data.get(1);
        if (StringUtils.isBlank(icdName)) {
            temDisDto.setIcdName(StringUtils.EMPTY);
            map.put(1, "为空");
            flag = false;
        } else {
            temDisDto.setIcdName(icdName);
        }
    } catch (Exception e) {
        temDisDto.setIcdName(StringUtils.EMPTY);
        map.put(1, "ccc称为空");
        flag = false;
    }
 
    try {
        String icdCode = data.get(2);
        if (StringUtils.isBlank(icdCode)) {
            temDisDto.setIcdCode(StringUtils.EMPTY);
            map.put(2, "ddd为空");
            flag = false;
        } else {
            temDisDto.setIcdCode(icdCode);
        }
    } catch (Exception e) {
        temDisDto.setIcdCode(StringUtils.EMPTY);
        map.put(2, "ddd为空");
        flag = false;
    }
 
 
    try {
        if (!DiseaseCategory.TCM_SYNDROME.getDesc().equals(temDisDto.getDiseaseCategoryStr())) {
            String standardIcdName = data.get(3);
            if (isCfgPrd && StringUtils.isBlank(standardIcdName)) {
                temDisDto.setStandardIcdName(StringUtils.EMPTY);
                map.put(3, "vvv为空");
                flag = false;
            } else {
                temDisDto.setStandardIcdName(standardIcdName);
            }
        }
    } catch (Exception e) {
        temDisDto.setStandardIcdName(StringUtils.EMPTY);
        map.put(3, "vvv为空");
        flag = false;
    }
 
    try {
        if (!DiseaseCategory.TCM_SYNDROME.getDesc().equals(temDisDto.getDiseaseCategoryStr())) {
            String standardIcdCode = data.get(4);
            if (isCfgPrd && StringUtils.isBlank(standardIcdCode)) {
                temDisDto.setStandardIcdCode(StringUtils.EMPTY);
                map.put(4, "eee为空");
                flag = false;
            } else {
                temDisDto.setStandardIcdCode(standardIcdCode);
            }
        }
    } catch (Exception e) {
        temDisDto.setStandardIcdCode(StringUtils.EMPTY);
        map.put(4, "eee为空");
        flag = false;
    }
    temDisDto.setErrorReasonMap(map);
 
 
    // 如果flag为 false 说明数据有问题
    if (!flag) {
        tempErrorList.add(temDisDto);
    } else {
        successList.add(temDisDto);
    }
}
 
private void checkErrorExcelList(List<OltHosIcdDiseaseDto> errorList, List<String> icdCodeList) {
    if (CollectionUtils.isNotEmpty(errorList)) {
        // 错误就往里加, 正确重新定义列表
        errorList.forEach(e -> {
            Map<Integer, String> map = new HashMap<>();
            if (!DiseaseCategory.belongTo(e.getDiseaseCategoryStr())) {
                map.put(0, "aaa不正确");
            } else {
                e.setDiseaseCategory(DiseaseCategory.getCodeByDesc(e.getDiseaseCategoryStr()));
            }
            
            // excel是否存在重复数据
            if (checkRepeatCode.contains(e.getIcdCode())) {
                map.put(2, "bbb重复");
            }
 
            if (CollectionUtils.isNotEmpty(icdCodeList) && icdCodeList.contains(e.getIcdCode())) {
                map.put(2, "ttt重复");
            }
            if (e.getErrorReasonMap() != null && !e.getErrorReasonMap().isEmpty()) {
                Map<Integer, String> errorReasonMap = e.getErrorReasonMap();
                errorReasonMap.putAll(map);
                e.setErrorReasonMap(errorReasonMap);
            }
            errorCount++;
        });
    }
}
 
/**
 * 侵入式给errorList赋值
 * @param list
 * @param errorList
 * @param icdCodeList
 */
private void checkSuccessExcelList(List<OltHosIcdDiseaseDto> list, List<OltHosIcdDiseaseDto> errorList,
                                   List<String> icdCodeList) {
    List<OltHosIcdDiseaseDto> newList = new ArrayList<>();
 
    if (CollectionUtils.isNotEmpty(list)) {
        // 错误就往里加, 正确重新定义列表
        list.forEach(e -> {
            Map<Integer, String> map = new HashMap<>();
            boolean flag = false;
            // 判
            if (!DiseaseCategory.belongTo(e.getDiseaseCategoryStr())) {
                map.put(0, "不正确");
                flag = true;
            } else {
                e.setDiseaseCategory(DiseaseCategory.getCodeByDesc(e.getDiseaseCategoryStr()));
            }
 
            // excel是否存在重复数据
            if (checkRepeatCode.contains(e.getIcdCode())) {
                map.put(2, "重复");
                flag = true;
            } else {
                // 判断诊断编码
                if (CollectionUtils.isNotEmpty(icdCodeList) && icdCodeList.contains(e.getIcdCode())) {
                    map.put(2, "重复");
                    flag = true;
                }
            }
            e.setErrorReasonMap(map);
            if (flag) {
                errorCount++;
                errorList.add(e);
            } else {
                e.setIcdPinyin(HzUtils.getPinyinCap(e.getIcdName(), HzUtils.CaseType.UPPERCASE));
                e.setIcdWb(HzUtils.getWbCap(e.getIcdName(), HzUtils.CaseType.UPPERCASE));
                newList.add(e);
                checkRepeatCode.add(e.getIcdCode());
                successCount++;
            }
        });
    }
 
    // 正确数据入库
    if (CollectionUtils.isNotEmpty(newList)) {
        oltConfigService.batchAddHosIcdDisease(delIcdCodeList, newList);
    }
}

  其中,导入错误数据用了easyExcel的模版填充方式, 模版存于oss上

/**
 * @author songhc
 * @create
 * @desc 导出错误数据
 **/
@Async
public void exportIcdErrorDiseaseData(OltErrorResult dto, String fileBatch, String fileName, String fileUrl) {
    Map<Integer, List> map = new HashMap<>();
    map.put(0, dto.getErrorList());
    Map<Integer, Map<Integer, List<Integer>>> styleMap = new HashMap<>();
    styleMap.put(0, dto.getMap());
    ExportExistHandler.exportExistTemplateData(map, styleMap, fileBatch, fileName, fileUrl);
}

  接下来就是填充错误模版的实现

/**
 * @param errorMap key为sheetNo, value为填充的数据
 * @param styleMap key为sheetNo, value为错误数据坐标
 * @param fileBatch 批次号
 * @param fileName 文件名
 * @param fileUrl 文件路径
 * @description 导出服务封装方法(无需分页查询, 数据为动态传入)
 * @className exportNoModelData
 */
public static void exportExistTemplateData(Map<Integer, List> errorMap, Map<Integer, Map<Integer, List<Integer>>> styleMap, String fileBatch, String fileName, String fileUrl) {
    String ossFileName = fileName.substring(0, fileName.lastIndexOf('.'))
            .concat("-").concat(LocalDateTime.now()
                    .format(DateTimeFormatter.ofPattern("yyyyMMddHHmmss"))).concat(fileName.substring(fileName.lastIndexOf('.')));
    InputStream inputStream = HttpUtil.io(HttpUtil.Atom.builder().url(fileUrl).build());
    if (null == inputStream) {
        return;
    }
    String localFileName = String.format(TaskNoteHandler.staticExportConfig.getExportPath(), ossFileName);
    ExcelWriter excelWriter = null;
    int resultCount = 0;
    try {
        if (errorMap != null && !errorMap.isEmpty()) {
            excelWriter = EasyExcel.write(localFileName)
                        .withTemplate(inputStream)
                        .build();
            // for循环是一个excel可能有多个sheet的兼容写法
            for (Integer i: errorMap.keySet()) {
                // 这里使用easyExcel的 registerWriteHandler 方法, 自定义CellColorSheetWriteHandler实现, 给每一个单元格填充颜色
                WriteSheet writeSheet = EasyExcel.writerSheet(i).registerWriteHandler(new CellColorSheetWriteHandler(styleMap.get(i),
                        IndexedColors.RED1.getIndex())).build();
                excelWriter.fill(errorMap.get(i), writeSheet);
            }
        }
    } catch (Exception e){
        LoggerUtil.error(LOGGER, "文件写入异常,error{0}", e);
        // 文件导出失败
        TaskNoteHandler.doUploadFailed(fileBatch, resultCount);
        return;
    } finally {
        // 关闭流
        if (excelWriter != null) {
            excelWriter.finish();
        }
    }
    // 1、上传文件(多种方案);2、更新记录
    TaskNoteHandler.doUploadAndNote(fileBatch, ossFileName, localFileName, resultCount);
}

  

/**
 * @description 自定义单元格格式拦截器
 * @className CellColorSheetWriteHandler
 * @package 
 * @Author songhc
 */
public class CellColorSheetWriteHandler implements CellWriteHandler {
 
    /**
     * map
     * key:第i行
     * value:第i行中单元格索引集合
     */
    private Map<Integer, List<Integer>> map;
    /**
     * 颜色
     */
    private Short colorIndex;
 
    /**
     * 有参构造
     */
    public CellColorSheetWriteHandler(Map<Integer, List<Integer>> map, Short colorIndex) {
        this.map = map;
        this.colorIndex = colorIndex;
    }
 
    /**
     * 无参构造
     */
    public CellColorSheetWriteHandler() {
    }
 
    @Override
    public void beforeCellCreate(WriteSheetHolder writeSheetHolder, WriteTableHolder writeTableHolder, Row row, Head head, Integer integer, Integer integer1, Boolean aBoolean) {
    }
 
    /**
     * 在单元格创建后调用
     */
    @Override
    public void afterCellCreate(WriteSheetHolder writeSheetHolder, WriteTableHolder writeTableHolder, Cell cell, Head head, Integer integer, Boolean aBoolean) {
    }
 
    @Override
    public void afterCellDataConverted(WriteSheetHolder writeSheetHolder, WriteTableHolder writeTableHolder, CellData cellData, Cell cell, Head head, Integer integer, Boolean aBoolean) {
    }
 
    /**
     * 在单元上的所有操作完成后调用
     */
    @Override
    public void afterCellDispose(WriteSheetHolder writeSheetHolder, WriteTableHolder writeTableHolder, List<CellData> cellDataList, Cell cell, Head head, Integer relativeRowIndex, Boolean isHead) {
        //当前行的第i列
        int i = cell.getColumnIndex();
        // 根据单元格获取workbook
        Workbook workbook = cell.getSheet().getWorkbook();
        //不处理第一行
        if (0 != cell.getRowIndex()) {
            List<Integer> integerList = map.get(cell.getRowIndex());
            // 自定义单元格样式 
            if (CollectionUtils.isNotEmpty(integerList)) {
                if (integerList.contains(i)) {
                    // 单元格策略
                    WriteCellStyle contentWriteCellStyle = new WriteCellStyle();
                    // 设置背景颜色白色
                    contentWriteCellStyle.setFillForegroundColor(colorIndex);
                    // 设置垂直居中为居中对齐
                     contentWriteCellStyle.setVerticalAlignment(VerticalAlignment.CENTER);
                    // 设置左右对齐为中央对齐
                     contentWriteCellStyle.setHorizontalAlignment(HorizontalAlignment.RIGHT);
                    // 设置单元格上下左右边框为细边框
                      contentWriteCellStyle.setBorderBottom(BorderStyle.THIN);
                      contentWriteCellStyle.setBorderLeft(BorderStyle.THIN);
                     contentWriteCellStyle.setBorderRight(BorderStyle.THIN);
                     contentWriteCellStyle.setBorderTop(BorderStyle.THIN);
                    // 创建字体实例
                     WriteFont cellWriteFont = new WriteFont();
                    // 设置字体大小
                     cellWriteFont.setFontName("宋体");
                     cellWriteFont.setFontHeightInPoints((short) 10);
                    //设置字体颜色
                    // cellWriteFont.setColor(IndexedColors.BLACK1.getIndex());
                    //单元格颜色
                    //contentWriteCellStyle.setFillForegroundColor(IndexedColors.GREY_25_PERCENT.getIndex());
                    contentWriteCellStyle.setWriteFont(cellWriteFont);
                    CellStyle cellStyle = StyleUtil.buildHeadCellStyle(workbook, contentWriteCellStyle);
                    //设置当前行第i列的样式
                    cell.getRow().getCell(i).setCellStyle(cellStyle);
                }
            }
        }
    }
}

  对于一个excel多sheet, 操作也是一样

// 不同的是这里可以定义多个监听器
// readSheet(0)  —-> 这里的数据代表sheet的位置
OltDrugFrequencyListener oltDrugFrequencyListener = new OltDrugFrequencyListener(isCfgPrd, dfCodeList, frequencyDto, oltConfigService);
OltDrugUsageListener oltDrugUsageListener = new OltDrugUsageListener(isCfgPrd, dUCodeList, OltDrugUsageDto.builder().hosId(frequencyDto.getHosId()).build(), oltConfigService);
OltDrugDurationListener oltDrugDurationListener = new OltDrugDurationListener(durationCodeList, OltDrugDurationDefDto.builder().hosId(frequencyDto.getHosId()).build(), oltConfigService);
 
ReadSheet readSheet = EasyExcel.readSheet(0).registerReadListener(oltDrugFrequencyListener).build();
ReadSheet readSheet2 = EasyExcel.readSheet(2).registerReadListener(oltDrugUsageListener).build();
ReadSheet readSheet4 = EasyExcel.readSheet(4).registerReadListener(oltDrugDurationListener).build();
excelReader.read(readSheet, readSheet2, readSheet4);

  经过测试, 该方法导入2W条数据差不多需要10秒, 也不会影响内存

 

posted @ 2024-04-09 19:50  红尘沙漏  阅读(6)  评论(0编辑  收藏  举报