数据库大量数据分段读取,并行处理demo
//线程池设置非常重要,否则会卡住 经过测试,就是连接池太少导致卡住的,不是 batch 太小或 数据库隔离级别的问题(batch 5000,隔离级别可注释)
ds.setMaxTotal(16); // 至少 >= 线程数
ds.setMaxIdle(8);
ds.setMinIdle(2);
package com.hd.protag.mapper;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.hd.protag.bean.Protag;
import org.apache.commons.dbcp2.BasicDataSource;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.jdbc.datasource.DataSourceTransactionManager;
import org.springframework.transaction.PlatformTransactionManager;
import org.springframework.transaction.support.TransactionTemplate;
import java.io.IOException;
import java.sql.Timestamp;
import java.sql.Types;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
public class TagParallelProcessor {
public final JdbcTemplate jdbcTemplate;
private final ObjectMapper mapper = new ObjectMapper();
private static final TypeReference<List<Protag>> LIST_TYPE = new TypeReference<List<Protag>>() {};
private static final int BATCH = 500;
private static BasicDataSource ds;
static {
ds = new BasicDataSource();
ds.setMaxTotal(12); // 至少 >= 线程数
ds.setMaxIdle(8);
ds.setMinIdle(2);
ds.setUrl("jdbc:sqlserver://xxx:1433;databaseName=xxx");
ds.setUsername("xxx");
ds.setPassword("xxx");
}
public TagParallelProcessor() {
this.jdbcTemplate = new JdbcTemplate(ds);
}
public void handleAll() throws InterruptedException {
long total = jdbcTemplate.queryForObject("SELECT COUNT(*) FROM temp032_t", Long.class);
System.out.println("总行数 = " + total);
if (total == 0) return;
int threads = 8;
long step = total / threads + 1;
ExecutorService pool = Executors.newFixedThreadPool(threads);
CountDownLatch latch = new CountDownLatch(threads);
for (int i = 0; i < threads; i++) {
final long offset = i * step;
pool.submit(() -> {
try {
handleRange(offset, step);
} catch (Exception e) {
e.printStackTrace();
} finally {
latch.countDown();
}
});
}
latch.await();
pool.shutdown();
}
public void handleRange(long start, long end) {
System.out.printf("线程 %s: offset=%d, rows=%d 开始查询%n", Thread.currentThread().getName(), start, end);
TransactionTemplate tx = new TransactionTemplate(txManager());
String sql = "SELECT sellid, ddate, tags FROM temp032_t ORDER BY sellid OFFSET ? ROWS FETCH NEXT ? ROWS ONLY";
jdbcTemplate.query(
sql,
new Object[]{start, end},
new int[]{Types.BIGINT, Types.BIGINT},
rs -> {
List<Protag> buffer = new ArrayList<>(BATCH);
while (rs.next()) {
String sellid = rs.getString("sellid");
Timestamp ddate = rs.getTimestamp("ddate");
String tags = rs.getString("tags");
try {
List<Protag> list = mapper.readValue(tags, LIST_TYPE);
System.out.printf("sellid=%s, tags 解析成 %d 条%n", sellid, list.size());
for (Protag p : list) {
p.setSellid(sellid);
p.setDdate(ddate.toLocalDateTime());
buffer.add(p);
if (buffer.size() >= BATCH) {
insertBatch(tx, buffer);
buffer.clear();
}
}
} catch (IOException e) {
System.err.println("JSON解析失败 sellid=" + sellid);
}
}
if (!buffer.isEmpty()) insertBatch(tx, buffer);
return null;
});
}
private void insertBatch(TransactionTemplate tx, List<Protag> list) {
System.out.printf("线程 %s 准备插入 %d 条%n", Thread.currentThread().getName(), list.size());
tx.execute(status -> {
jdbcTemplate.execute("SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED");
try {
jdbcTemplate.batchUpdate(
"INSERT INTO temp031_t (sellid, ddate, name, type, showType, seq, tagType, proTag) " +
"VALUES (?,?,?,?,?,?,?,?)",
list,
BATCH,
(ps, t) -> {
ps.setString(1, t.getSellid());
ps.setTimestamp(2, Timestamp.valueOf(t.getDdate()));
ps.setString(3, t.getName());
ps.setInt(4, t.getType());
ps.setString(5, t.getShowType());
ps.setInt(6, t.getSeq());
ps.setString(7, t.getTagType());
ps.setString(8, t.getTagType() + "-" + t.getName());
});
System.out.printf("线程 %s 插入成功%n", Thread.currentThread().getName());
} catch (Exception ex) {
ex.printStackTrace();
status.setRollbackOnly();
}
return null;
});
}
public PlatformTransactionManager txManager() {
DataSourceTransactionManager tm = new DataSourceTransactionManager();
tm.setDataSource(jdbcTemplate.getDataSource());
return tm;
}
}

浙公网安备 33010602011771号