JAVA大数据量导出问题

1问题背景

最近公司在做一个导出的功能，是这样子的，公司内部有很多套系统，现在需要开发了一个门户系统，希望能连接各个系统，在门户系统上希望可以给其他子系统增加一个导出功能，支持对其他各个系统的数据导出。
由此带来几个问题:
1. 因为是生产系统，所以只能对需要使用该功能的系统将其数据源配置在jboss 上(因种种安全上的考量，JDBC的直连方式不被允许)
2. 只能对其他生产系统进行查操作,禁止一切修改
3. 查询的sql语句由消费系统来写，由此带来的性能问题等引发了大数据量查询不能造成本系统服务压力，已经使用者系统压力的问题解决

2问题解决

本篇主要对大数量查询导出问题作出记录，jboss数据源获取方式和使用不做赘述。

　　 1 分页查询

　　　　因为不确定sql 段落内容和数据量情况，采用每100条数据为一次查询，然后一万条数据写入一张表。

  //每页最大数
        int rowCount = 100;
        int pageCount ;

        int excelCount = 10000;
        int sign = 0;

        int searchCount ;
        //
        if (count % excelCount == 0) {
            pageCount = count / excelCount;
        } else {
            pageCount = count / excelCount + 1;
        }
        if (count % rowCount == 0) {
            searchCount = count / rowCount;
        } else {
            searchCount = count / rowCount + 1;
        }


        //列头
        List<String> headers = new ArrayList<>();

        //需要压缩的文件名
        String zipName = StringFormateUtil.getDateTime() + "";
        String tempPath = ReportTaskUtil.PATH_FILE + "/" + zipName;
        //判断临时文件夹
        File file = new File(tempPath);
        //新建一个临时文件夹
        ReportTaskUtil.judeDirExists(file);

        Map<String, Object> result = new HashMap<>();

        result.put("tempPath", tempPath);
        result.put("zipName", zipName);
        response.getData().setRows(result);
        for (int i = 0; i < pageCount; i++) {

            if(Thread.currentThread().isInterrupted()){
                throw new InterruptedException("线程强制中断");
            }

            //每一页对应一个 excel
            ExcelUtil07 excelUtil07 = null;
            //查 100 条每次，防止 查询过大 造成 系统压力
            for (int m = 0; m <100 ; m++) {

                    if(searchCount==0){
                        break;
                    }
                    searchCount--;

                String exeSql2 = String.format(" select * from (%s) a  limit %d, %d", sql, sign, rowCount);
                try (PreparedStatement preparedStatement2 = conn.prepareStatement(exeSql2);
                     ResultSet rs2 = preparedStatement2.executeQuery();) {
                    //查询列集合第一次
                    if(firstQuery){
                        //获取查询的字段
                        ResultSetMetaData rsmd = rs2.getMetaData();
                        headers = getHeaders(rsmd);
                        firstQuery = false;
                        //excel 格式 名称 等设置
                        excelUtil07 = new ExcelUtil07(i+"_"+reportName, headers);
                    }

                    sign += rowCount;
                    // 行的集合
                    List<List<String>> rows = new ArrayList<>();
                    while (rs2.next()) {
                        List<String> v = new ArrayList<>();
                        for (int j = 0; j < headers.size(); j++) {
                            v.add(rs2.getString(j + 1));
                        }
                        rows.add(v);
                    }
                    //每一百行 写入一次
                    if(!excelUtil07.create07Xlsx(rows)){
                        LOGGER.error("07excel 生成失败");
                        response.setSuccess(false);
                        response.setErrorMsg("07excel 生成失败");
                        return response;
                    }
                } catch (Exception e) {
                    LOGGER.error(" 执行异常", e);
                    response.setSuccess(false);
                    response.setErrorMsg(" 执行异常");
                    break;
                }
            }
            excelUtil07.export(tempPath);
            firstQuery  = true;
        }

　　 2 excel 内存问题

　　　　为了尽可能减少内存的开销，防止同一时间存在太多对象，我们这里使用SXSSFWorkbook 类，该类接收一个int 类型的参数，从源码可以知道此参数可以控制内存中的对象,多余数量的对象会存在磁盘上

　　　　@param rowAccessWindowSize the number of rows that are kept in memory until flushed out, see above.

package com.suning.bi.util;

import com.suning.bi.entity.ReportResponse;
import org.apache.poi.hssf.usermodel.HSSFCellStyle;
import org.apache.poi.hssf.usermodel.HSSFFont;
import org.apache.poi.hssf.util.HSSFColor;
import org.apache.poi.ss.usermodel.CellStyle;
import org.apache.poi.ss.usermodel.Font;
import org.apache.poi.ss.usermodel.HorizontalAlignment;
import org.apache.poi.xssf.streaming.SXSSFCell;
import org.apache.poi.xssf.streaming.SXSSFRow;
import org.apache.poi.xssf.streaming.SXSSFSheet;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.FileOutputStream;
import java.io.IOException;
import java.util.*;

/**
 * @author 17103349
 */
public class ExcelUtil07 {

    private static Logger LOGGER = LoggerFactory.getLogger(ExcelUtil07.class);

    private SXSSFWorkbook sxssfWorkbook;
    private SXSSFCell sxssfCell;
    private SXSSFSheet sheet;
    private SXSSFRow sxssfRow;
    private String fileName;
    private int numStart = 0;

    /**
     * @param fileName 文件名
     * @param titles   标题头
     */
    public ExcelUtil07(String fileName, List<String> titles) {


        this.fileName = fileName;
        //设置Workbook 超出100 将缓存在磁盘，保证 row最多有100在内存中，防止内存开销过大
        sxssfWorkbook = new SXSSFWorkbook(100);
        sxssfCell = null;
        sxssfRow = null;
        sheet = sxssfWorkbook.createSheet(fileName);

        CellStyle style = sxssfWorkbook.createCellStyle();

        style.setFillForegroundColor(HSSFColor.SEA_GREEN.index);
        style.setFillPattern(HSSFCellStyle.SOLID_FOREGROUND);
        style.setAlignment(HorizontalAlignment.CENTER); // 居中

        style.setBorderBottom(HSSFCellStyle.BORDER_THIN);// 下边框
        style.setBorderLeft(HSSFCellStyle.BORDER_THIN);// 左边框
        style.setBorderRight(HSSFCellStyle.BORDER_THIN);// 右边框
        style.setBorderTop(HSSFCellStyle.BORDER_THIN);// 上边框


        Font font = sxssfWorkbook.createFont();
        font.setFontName("仿宋_GB2312");
        //粗体显示
        font.setBold(true);
        //字体大小
        font.setFontHeightInPoints((short) 12);

        style.setFont(font);
        //设置自动换行
        style.setWrapText(true);

        sxssfRow = sheet.createRow(0);

        //设置头
        for (int a = 0; a < titles.size(); a++) {
            sheet.setColumnWidth(a, 3766);

            SXSSFCell sxssfCell = sxssfRow.createCell(a);

            sxssfCell.setCellValue(titles.get(a));

            sxssfCell.setCellStyle(style);

        }
    }

    /**
     * 07版本excel 生成 (poi 3.8以后 有临时文件)
     *
     * @param dataLists 数据集
     * @return
     */
    public boolean create07Xlsx(List<List<String>> dataLists) {
        try {
            int size = dataLists.size();
            for (int a = 1; a <=size; a++) {

                sxssfRow = sheet.createRow(a+numStart);
                List<String> data = dataLists.get(a - 1);
                int cell = 0;
                for (int i = 0; i < data.size(); i++) {
                    Object valueObject = data.get(i);
                    sxssfCell = sxssfRow.createCell(cell);
                    sxssfCell.setCellValue(valueObject == null ? "" : valueObject.toString());
                    sxssfCell.getCellStyle().setWrapText(true);
                    cell++;
                }
                //数据清理
                data.clear();
            }
            numStart+=size;

            //数据清理 回收对象
            dataLists.clear();
            //设置空引用
        } catch (Exception e) {
            LOGGER.error("xlsx 文件{}生成失败", fileName, e);
            return false;
        }
        return true;
    }

    /**
     * 导出正式文件
     * @return
     */
    public boolean export(String path){
        try {
            FileOutputStream fos = new FileOutputStream(path + "/" + fileName+ReportTaskUtil.XLSX_TYPE);
            sxssfWorkbook.write(fos);
            fos.close();
            sxssfWorkbook.dispose();
        } catch (IOException e) {
            LOGGER.error("xlsx 正式文件{}生成失败", fileName, e);
            return false;
        }
        return true;
    }


}

　　3 生成的多个文件，对其进行打包处理上传ftp服务器

public boolean fileToZip(String sourceFilePath, String zipFilePath, String fileName) {
        boolean flag = false;
        File sourceFile = new File(sourceFilePath);

        if (sourceFile.exists() == false) {
            LOGGER.info("待压缩的文件目录：" + sourceFilePath + "不存在.");
            sourceFile.mkdir(); // 新建目录
        }
        File zipFile = new File(zipFilePath + "/" + fileName + ".zip");
        if (zipFile.exists()) {
            LOGGER.info(zipFilePath + "目录下存在名字为:" + fileName + ".zip" + "打包文件.");
        } else {
            File[] sourceFiles = sourceFile.listFiles();
            if (null == sourceFiles || sourceFiles.length < 1) {
                LOGGER.info("待压缩的文件目录：" + sourceFilePath + "里面不存在文件，无需压缩.");
            } else {
                try (FileOutputStream fos = new FileOutputStream(zipFile); ZipOutputStream zos = new ZipOutputStream(new BufferedOutputStream(fos));) {
                    byte[] bufs = new byte[1024 * 10];
                    for (int i = 0; i < sourceFiles.length; i++) {
                        String name = sourceFiles[i].getName();
                        ZipEntry zipEntry = new ZipEntry(name);
                        zos.putNextEntry(zipEntry);
                        try (FileInputStream fis = new FileInputStream(sourceFiles[i]); BufferedInputStream bis = new BufferedInputStream(fis, 1024 * 10);) {
                            int read = 0;
                            while ((read = bis.read(bufs, 0, 1024 * 10)) != -1) {
                                zos.write(bufs, 0, read);
                            }
                        } catch (Exception e) {
                            flag = false;
                            LOGGER.error("IO异常", e);
                        }
                    }
                } catch (IOException e) {
                    flag = false;
                    LOGGER.error("IO异常", e);
                }
                flag = true;
            }
        }
        return flag;
    }

3 遗留问题

这里有个遗留问题，因为笔者的任务序列使用队列做的，笔者在有想过在遇到 sql 执行效率和性能问题时，强制终止线程，但是对于 stop 和 interrupt 方式没有考虑好，interrupt 不能保证线程能立刻马上终止，stop则会遇到线程不安全和资源释放的问题，现在还在纠结采用的方案。

其次，因为其他生产系统不受控制，对于其他系统的数据库压力问题，在执行过程中不可控，也比较糟心。

4 最后

写到这里，基本就记录完毕了，笔者不是什么大神。。很普通，有什么不对和改进的地方，希望能和大家一起探讨学习

发表于 2018-10-17 15:09 励志做瘦子阅读(998) 评论(0) 收藏举报

JAVA基于MYSQL做大数据量的导出问题

JAVA大数据量导出问题

1问题背景

2问题解决

3 遗留问题

4 最后