基于Easy Excel的大数据导出导入工具
背景:大数据导入导出时,经常出现OOM。所以使用Easy Excel框架优化,但是需要改接口太多了,所以封装了一个工具简化公共操作。
代码实现
package com.easy.excel.convert.utils;
import com.alibaba.excel.EasyExcel;
import com.alibaba.excel.ExcelWriter;
import com.alibaba.excel.read.listener.PageReadListener;
import com.alibaba.excel.write.builder.ExcelWriterBuilder;
import com.alibaba.excel.write.metadata.WriteSheet;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.concurrent.*;
import java.util.function.BiFunction;
import java.util.function.Consumer;
/**
* EasyExcel 大数据量导入导出的批处理工具。
* 通过分页/分批处理避免一次性把数据放入内存导致 OOM。
*
* @author Ryan
*/
public final class EasyExcelBatchHelper {
private static final Logger log = LoggerFactory.getLogger(EasyExcelBatchHelper.class);
private EasyExcelBatchHelper() {
}
/**
* 按批次导入(流),适合大文件。调用方负责管理输入流生命周期。
*
* @param inputStream Excel 输入流
* @param headClass 目标映射类
* @param batchSize 每批大小,最小为 1
* @param batchHandler 每批次处理逻辑(例如入库),空批次不会回调
* @param <T> 目标类型
*/
public static <T> void importByBatch(InputStream inputStream,
Class<T> headClass,
int batchSize,
Consumer<List<T>> batchHandler) {
Objects.requireNonNull(inputStream, "inputStream cannot be null");
Objects.requireNonNull(headClass, "headClass cannot be null");
Objects.requireNonNull(batchHandler, "batchHandler cannot be null");
int size = Math.max(batchSize, 1);
EasyExcel.read(inputStream, headClass, new PageReadListener<T>(data -> {
if (data == null || data.isEmpty()) {
return;
}
batchHandler.accept(data);
}, size))
.autoCloseStream(false)
.sheet()
.doRead();
}
/**
* 按批次导入(文件路径)。
*
* @param file Excel 文件
* @param headClass 目标映射类
* @param batchSize 每批大小
* @param batchHandler 每批次处理逻辑
* @param <T> 目标类型
*/
public static <T> void importByBatch(File file,
Class<T> headClass,
int batchSize,
Consumer<List<T>> batchHandler) {
Objects.requireNonNull(file, "file cannot be null");
try (InputStream in = new FileInputStream(file)) {
importByBatch(in, headClass, batchSize, batchHandler);
} catch (FileNotFoundException e) {
throw new IllegalArgumentException("Excel 文件不存在: " + file.getAbsolutePath(), e);
} catch (Exception e) {
throw new RuntimeException("批量导入 Excel 失败", e);
}
}
/**
* 按批次导出(流),调用方管理输出流。
*
* @param outputStream 输出流
* @param headClass 头信息/数据模型
* @param totalCount 总条数,用于计算分页次数
* @param batchSize 每批大小,最小 1
* @param pageQuery 分页查询方法,参数:offset, limit -> 当前批数据
* @param beforeWrite 写入前对当前批次的处理逻辑(可选,可为 null)
* @param <T> 数据类型
*/
public static <T> void exportByBatch(OutputStream outputStream,
Class<T> headClass,
int totalCount,
int batchSize,
BiFunction<Integer, Integer, List<T>> pageQuery,
Consumer<List<T>> beforeWrite) {
Objects.requireNonNull(outputStream, "outputStream cannot be null");
Objects.requireNonNull(headClass, "headClass cannot be null");
Objects.requireNonNull(pageQuery, "pageQuery cannot be null");
int size = Math.max(batchSize, 1);
if (totalCount <= 0) {
log.warn("exportByBatch called with totalCount <= 0, nothing to write");
return;
}
int pageCount = (totalCount + size - 1) / size;
ExcelWriterBuilder builder = EasyExcel.write(outputStream, headClass).autoCloseStream(false);
try (ExcelWriter writer = builder.build()) {
WriteSheet sheet = EasyExcel.writerSheet("Sheet1").build();
for (int page = 0; page < pageCount; page++) {
int offset = page * size;
List<T> batch = pageQuery.apply(offset, size);
if (batch == null || batch.isEmpty()) {
log.debug("No data returned for page {}, offset {}, size {}", page, offset, size);
continue;
}
if (beforeWrite != null) {
beforeWrite.accept(batch);
}
writer.write(batch, sheet);
}
} catch (Exception e) {
throw new RuntimeException("批量导出 Excel 失败", e);
}
}
/**
* 按批次导出(文件路径)。
*
* @param file 输出文件
* @param headClass 头信息/数据模型
* @param totalCount 总条数
* @param batchSize 每批大小
* @param pageQuery 分页查询方法,参数:offset, limit -> 当前批数据
* @param beforeWrite 写入前处理逻辑(可选)
* @param <T> 数据类型
*/
public static <T> void exportByBatch(File file,
Class<T> headClass,
int totalCount,
int batchSize,
BiFunction<Integer, Integer, List<T>> pageQuery,
Consumer<List<T>> beforeWrite) {
Objects.requireNonNull(file, "file cannot be null");
try (OutputStream out = Files.newOutputStream(file.toPath())) {
exportByBatch(out, headClass, totalCount, batchSize, pageQuery, beforeWrite);
} catch (Exception e) {
throw new RuntimeException("批量导出 Excel 失败", e);
}
}
/**
* 分页查询函数接口,支持额外条件对象。
*
* @param <T> 数据类型
* @param <C> 条件类型
*/
@FunctionalInterface
public interface PageQueryWithCondition<T, C> {
/**
* 分页查询数据
*
* @param page 当前页号(从 0 开始)
* @param limit 每页大小
* @param condition 查询条件对象
* @return 当前页数据
*/
List<T> query(int page, int limit, C condition);
}
/**
* 多线程按批次导出(流),支持额外查询条件,确保导出顺序与全量导出一致。
* 使用生产者-消费者模式,限制并发查询数量,每完成一个批次立即写入并释放内存,避免OOM。
*
* @param outputStream 输出流
* @param headClass 头信息/数据模型
* @param totalCount 总条数,用于计算分页次数
* @param batchSize 每批大小,最小 1
* @param condition 查询条件对象
* @param pageQuery 分页查询方法,参数:page, limit, condition -> 当前批数据
* @param beforeWrite 写入前对当前批次的处理逻辑(可选,可为 null)
* @param threadPoolSize 线程池大小,默认使用CPU核心数
* @param maxConcurrentQueries 最大并发查询数,限制同时查询的批次数量,防止OOM(默认2倍线程池大小)
* @param <T> 数据类型
* @param <C> 条件类型
*/
public static <T, C> void exportByBatchParallel(OutputStream outputStream,
Class<T> headClass,
int totalCount,
int batchSize,
C condition,
PageQueryWithCondition<T, C> pageQuery,
Consumer<List<T>> beforeWrite,
int threadPoolSize,
int maxConcurrentQueries) {
Objects.requireNonNull(outputStream, "outputStream cannot be null");
Objects.requireNonNull(headClass, "headClass cannot be null");
Objects.requireNonNull(pageQuery, "pageQuery cannot be null");
int size = Math.max(batchSize, 1);
if (totalCount <= 0) {
log.warn("exportByBatchParallel called with totalCount <= 0, nothing to write");
return;
}
int pageCount = (totalCount + size - 1) / size;
int poolSize = threadPoolSize > 0 ? threadPoolSize : Runtime.getRuntime().availableProcessors();
int maxConcurrent = maxConcurrentQueries > 0 ? maxConcurrentQueries : Math.max(2, poolSize * 2);
// 限制并发查询数量,防止OOM
Semaphore semaphore = new Semaphore(maxConcurrent);
ThreadPoolExecutor executor = new ThreadPoolExecutor(
poolSize,
poolSize,
10L,
TimeUnit.SECONDS,
new LinkedBlockingQueue<>(1000),
Executors.defaultThreadFactory(),
new ThreadPoolExecutor.CallerRunsPolicy()
);
// 使用TreeMap存储已完成的批次(按页号排序),便于按顺序写入
// 使用ConcurrentHashMap + 同步块确保线程安全
ConcurrentHashMap<Integer, List<T>> resultMap = new ConcurrentHashMap<>(pageCount);
// 用于跟踪下一个需要写入的页号
int[] nextPageToWrite = {0};
Object writeLock = new Object();
ExcelWriterBuilder builder = EasyExcel.write(outputStream, headClass).autoCloseStream(false);
try (ExcelWriter writer = builder.build()) {
WriteSheet sheet = EasyExcel.writerSheet("Sheet1").build();
// 提交所有查询任务
List<Future<?>> futures = new ArrayList<>(pageCount);
for (int page = 0; page < pageCount; page++) {
final int currentPage = page;
Future<?> future = executor.submit(() -> {
try {
// 获取信号量,限制并发查询数量
semaphore.acquire();
try {
List<T> batch = pageQuery.query(currentPage, size, condition);
List<T> result = batch != null ? batch : new ArrayList<>();
log.debug("Page {} query completed, size {}, result size {}",
currentPage, size, result.size());
// 将结果放入Map
resultMap.put(currentPage, result);
// 尝试按顺序写入(每完成一个批次就尝试写入,减少内存占用)
synchronized (writeLock) {
while (resultMap.containsKey(nextPageToWrite[0])) {
// 按顺序写入
List<T> data = resultMap.remove(nextPageToWrite[0]);
if (data != null && !data.isEmpty()) {
if (beforeWrite != null) {
beforeWrite.accept(data);
}
writer.write(data, sheet);
log.debug("Page {} written to Excel, size {}", nextPageToWrite[0], data.size());
}
nextPageToWrite[0]++;
}
}
} finally {
semaphore.release();
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new RuntimeException("查询被中断: page=" + currentPage, e);
} catch (Exception e) {
log.error("Page {} query failed, size {}", currentPage, size, e);
throw new RuntimeException("分页查询失败: page=" + currentPage, e);
}
});
futures.add(future);
}
// 等待所有查询任务完成,并处理剩余的数据
for (Future<?> future : futures) {
try {
future.get();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new RuntimeException("导出被中断", e);
} catch (ExecutionException e) {
throw new RuntimeException("查询任务失败", e.getCause());
}
}
// 处理Map中剩余的数据(按顺序)
synchronized (writeLock) {
while (resultMap.containsKey(nextPageToWrite[0])) {
List<T> data = resultMap.remove(nextPageToWrite[0]);
if (data != null && !data.isEmpty()) {
if (beforeWrite != null) {
beforeWrite.accept(data);
}
writer.write(data, sheet);
log.debug("Page {} written to Excel, size {}", nextPageToWrite[0], data.size());
}
nextPageToWrite[0]++;
}
}
} catch (Exception e) {
throw new RuntimeException("批量导出 Excel 失败", e);
} finally {
executor.shutdown();
try {
if (!executor.awaitTermination(60, TimeUnit.SECONDS)) {
executor.shutdownNow();
}
} catch (InterruptedException e) {
executor.shutdownNow();
Thread.currentThread().interrupt();
}
}
}
/**
* 多线程按批次导出(流),使用默认线程池大小和并发查询数。
*
* @param outputStream 输出流
* @param headClass 头信息/数据模型
* @param totalCount 总条数
* @param batchSize 每批大小
* @param condition 查询条件对象
* @param pageQuery 分页查询方法
* @param beforeWrite 写入前处理逻辑(可选)
* @param <T> 数据类型
* @param <C> 条件类型
*/
public static <T, C> void exportByBatchParallel(OutputStream outputStream,
Class<T> headClass,
int totalCount,
int batchSize,
C condition,
PageQueryWithCondition<T, C> pageQuery,
Consumer<List<T>> beforeWrite) {
exportByBatchParallel(outputStream, headClass, totalCount, batchSize, condition,
pageQuery, beforeWrite, 0, 0);
}
/**
* 多线程按批次导出(文件路径),支持额外查询条件。
*
* @param file 输出文件
* @param headClass 头信息/数据模型
* @param totalCount 总条数
* @param batchSize 每批大小
* @param condition 查询条件对象
* @param pageQuery 分页查询方法,参数:page, limit, condition -> 当前批数据
* @param beforeWrite 写入前处理逻辑(可选)
* @param threadPoolSize 线程池大小,默认使用CPU核心数
* @param maxConcurrentQueries 最大并发查询数,限制同时查询的批次数量,防止OOM(默认2倍线程池大小)
* @param <T> 数据类型
* @param <C> 条件类型
*/
public static <T, C> void exportByBatchParallel(File file,
Class<T> headClass,
int totalCount,
int batchSize,
C condition,
PageQueryWithCondition<T, C> pageQuery,
Consumer<List<T>> beforeWrite,
int threadPoolSize,
int maxConcurrentQueries) {
Objects.requireNonNull(file, "file cannot be null");
try (OutputStream out = Files.newOutputStream(file.toPath())) {
exportByBatchParallel(out, headClass, totalCount, batchSize, condition,
pageQuery, beforeWrite, threadPoolSize, maxConcurrentQueries);
} catch (Exception e) {
throw new RuntimeException("批量导出 Excel 失败", e);
}
}
/**
* 多线程按批次导出(文件路径),使用默认线程池大小和并发查询数。
*
* @param file 输出文件
* @param headClass 头信息/数据模型
* @param totalCount 总条数
* @param batchSize 每批大小
* @param condition 查询条件对象
* @param pageQuery 分页查询方法
* @param beforeWrite 写入前处理逻辑(可选)
* @param <T> 数据类型
* @param <C> 条件类型
*/
public static <T, C> void exportByBatchParallel(File file,
Class<T> headClass,
int totalCount,
int batchSize,
C condition,
PageQueryWithCondition<T, C> pageQuery,
Consumer<List<T>> beforeWrite) {
exportByBatchParallel(file, headClass, totalCount, batchSize, condition,
pageQuery, beforeWrite, 0, 0);
}
}
测试代码
package com.easy.excel.convert;
import com.alibaba.excel.EasyExcel;
import com.easy.excel.convert.enums.SexEnum;
import com.easy.excel.convert.enums.YesOrNoEnum;
import com.easy.excel.convert.utils.EasyExcelBatchHelper;
import lombok.extern.slf4j.Slf4j;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.DisplayName;
import org.junit.jupiter.api.Test;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import static org.junit.jupiter.api.Assertions.*;
/**
* <p>Title: EasyExcel批量处理工具测试类</p>
* <p>Description: 用于测试EasyExcelBatchHelper的批量导入导出功能</p>
*
* @author Ryan
*/
@Slf4j
@DisplayName("EasyExcel批量处理工具测试")
public class EasyExcelBatchHelperTest {
private List<TestData> mockData;
/**
* 查询条件类(示例)
*/
static class QueryCondition {
private String keyword;
private Integer minAge;
public QueryCondition(String keyword, Integer minAge) {
this.keyword = keyword;
this.minAge = minAge;
}
public String getKeyword() {
return keyword;
}
public Integer getMinAge() {
return minAge;
}
}
@BeforeEach
public void setUp() {
// 准备测试数据
mockData = new ArrayList<>();
for (int i = 1; i <= 1000000; i++) {
TestData data = new TestData();
data.setId(i);
data.setName("用户" + i);
data.setValid(i % 2 == 0 ? YesOrNoEnum.YES.getCode() : YesOrNoEnum.NO.getCode());
data.setAge(20 + (i % 30));
data.setSex(i % 2 == 0 ? SexEnum.MALE.getCode() : SexEnum.FEMALE.getCode());
mockData.add(data);
}
}
@Test
@DisplayName("测试批量导入Excel")
public void testImportByBatch() {
// 先创建一个测试文件
File exportFile = new File("D:\\test\\test.xlsx");
EasyExcel.write(exportFile, TestData.class)
.sheet("测试数据")
.doWrite(mockData.subList(0, 50)); // 只写入50条数据用于测试
// 使用批量导入
AtomicInteger batchCount = new AtomicInteger(0);
AtomicInteger totalCount = new AtomicInteger(0);
EasyExcelBatchHelper.importByBatch(
exportFile,
TestData.class,
10, // 每批10条
batch -> {
batchCount.incrementAndGet();
totalCount.addAndGet(batch.size());
log.info("处理第 {} 批,本批数据量: {}", batchCount.get(), batch.size());
// 这里可以添加实际的业务逻辑,比如保存到数据库
assertNotNull(batch);
assertFalse(batch.isEmpty());
}
);
log.info("批量导入测试完成,共处理 {} 批,{} 条数据", batchCount.get(), totalCount.get());
}
@Test
@DisplayName("测试批量导出Excel(单线程)")
public void testExportByBatch() {
int totalCount = mockData.size();
int batchSize = 10000;
long startTime = System.currentTimeMillis();
// 模拟分页查询
File testFile = new File("D:\\simple_export_test.xlsx");
EasyExcelBatchHelper.exportByBatch(
testFile,
TestData.class,
totalCount,
batchSize,
(offset, limit) -> {
// 模拟分页查询逻辑
// 假设每个分页查询需要1S
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
log.error("线程中断", e);
}
int start = offset;
int end = Math.min(start + limit, mockData.size());
return mockData.subList(start, end);
},
batch -> {
// 写入前处理:可以在这里进行数据转换、校验等
}
);
long end = System.currentTimeMillis();
log.info("批量导出耗时: {} ms", end - startTime);
}
@Test
@DisplayName("测试多线程批量导出Excel")
public void testExportByBatchParallel() {
int totalCount = mockData.size();
int batchSize = 10000;
long startTime = System.currentTimeMillis();
// 模拟带条件的分页查询
File testFile = new File("D:\\thread_export_test.xlsx");
EasyExcelBatchHelper.exportByBatchParallel(
testFile,
TestData.class,
totalCount,
batchSize,
null,
(page, limit, cond) -> {
// 模拟分页查询逻辑
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
log.error("线程中断", e);
}
int start = page * limit;
int end = Math.min(start + limit, mockData.size());
return mockData.subList(start, end);
},
null,
4, // 线程池大小
8 // 最大并发查询数
);
long end = System.currentTimeMillis();
log.info("批量导出耗时: {} ms", end - startTime);
}
@Test
@DisplayName("测试空数据导出")
public void testEmptyDataExport() {
File emptyFile = new File("D:\\empty_test.xlsx");
EasyExcelBatchHelper.exportByBatch(
emptyFile,
TestData.class,
0, // 总数为0
10,
(offset, limit) -> new ArrayList<>(),
null
);
// 空数据导出不应该抛出异常
assertTrue(true, "空数据导出应该正常完成");
log.info("空数据导出测试完成");
}
@Test
@DisplayName("测试流式导入导出")
public void testStreamImportExport() throws Exception {
// 使用流进行导出
File streamFile = new File("D:\\test\\test.xlsx");
try (FileOutputStream out = new FileOutputStream(streamFile)) {
EasyExcelBatchHelper.exportByBatch(
out,
TestData.class,
mockData.size(),
20,
(offset, limit) -> {
int start = offset;
int end = Math.min(start + limit, mockData.size());
return mockData.subList(start, end);
},
null
);
}
assertTrue(streamFile.exists(), "流式导出文件应该存在");
// 使用流进行导入
AtomicInteger streamImportCount = new AtomicInteger(0);
try (FileInputStream in = new FileInputStream(streamFile)) {
EasyExcelBatchHelper.importByBatch(
in,
TestData.class,
15,
batch -> streamImportCount.addAndGet(batch.size())
);
}
assertTrue(streamImportCount.get() > 0, "流式导入应该有数据");
log.info("流式导入导出测试完成,导入 {} 条数据", streamImportCount.get());
}
}

浙公网安备 33010602011771号