数据库大量数据分段读取,并行处理demo

    //线程池设置非常重要,否则会卡住  经过测试,就是连接池太少导致卡住的,不是 batch 太小或 数据库隔离级别的问题(batch 5000,隔离级别可注释)
    ds.setMaxTotal(16); // 至少 >= 线程数
    ds.setMaxIdle(8);
    ds.setMinIdle(2);

package com.hd.protag.mapper;

import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.hd.protag.bean.Protag;
import org.apache.commons.dbcp2.BasicDataSource;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.jdbc.datasource.DataSourceTransactionManager;
import org.springframework.transaction.PlatformTransactionManager;
import org.springframework.transaction.support.TransactionTemplate;

import java.io.IOException;
import java.sql.Timestamp;
import java.sql.Types;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

public class TagParallelProcessor {

public final JdbcTemplate jdbcTemplate;
private final ObjectMapper mapper = new ObjectMapper();

private static final TypeReference<List<Protag>> LIST_TYPE = new TypeReference<List<Protag>>() {};
private static final int BATCH = 500;

private static BasicDataSource ds;
static {
    ds = new BasicDataSource();
    ds.setMaxTotal(12); // 至少 >= 线程数
    ds.setMaxIdle(8);
    ds.setMinIdle(2);

    ds.setUrl("jdbc:sqlserver://xxx:1433;databaseName=xxx");
    ds.setUsername("xxx");
    ds.setPassword("xxx");

}

public TagParallelProcessor() {
    this.jdbcTemplate = new JdbcTemplate(ds);
}

public void handleAll() throws InterruptedException {
    long total = jdbcTemplate.queryForObject("SELECT COUNT(*) FROM temp032_t", Long.class);
    System.out.println("总行数 = " + total);
    if (total == 0) return;

    int threads = 8;
    long step = total / threads + 1;
    ExecutorService pool = Executors.newFixedThreadPool(threads);
    CountDownLatch latch = new CountDownLatch(threads);

    for (int i = 0; i < threads; i++) {
        final long offset = i * step;
        pool.submit(() -> {
            try {
                handleRange(offset, step);
            } catch (Exception e) {
                e.printStackTrace();
            } finally {
                latch.countDown();
            }
        });
    }
    latch.await();
    pool.shutdown();
}

public void handleRange(long start, long end) {
    System.out.printf("线程 %s: offset=%d, rows=%d 开始查询%n", Thread.currentThread().getName(), start, end);

    TransactionTemplate tx = new TransactionTemplate(txManager());
    String sql = "SELECT sellid, ddate, tags FROM temp032_t ORDER BY sellid OFFSET ? ROWS FETCH NEXT ? ROWS ONLY";

    jdbcTemplate.query(
            sql,
            new Object[]{start, end},
            new int[]{Types.BIGINT, Types.BIGINT},
            rs -> {
                List<Protag> buffer = new ArrayList<>(BATCH);
                while (rs.next()) {
                    String sellid = rs.getString("sellid");
                    Timestamp ddate = rs.getTimestamp("ddate");
                    String tags = rs.getString("tags");

                    try {
                        List<Protag> list = mapper.readValue(tags, LIST_TYPE);
                        System.out.printf("sellid=%s, tags 解析成 %d 条%n", sellid, list.size());

                        for (Protag p : list) {
                            p.setSellid(sellid);
                            p.setDdate(ddate.toLocalDateTime());
                            buffer.add(p);
                            if (buffer.size() >= BATCH) {
                                insertBatch(tx, buffer);
                                buffer.clear();
                            }
                        }
                    } catch (IOException e) {
                        System.err.println("JSON解析失败 sellid=" + sellid);
                    }
                }
                if (!buffer.isEmpty()) insertBatch(tx, buffer);
                return null;
            });
}

private void insertBatch(TransactionTemplate tx, List<Protag> list) {
    System.out.printf("线程 %s 准备插入 %d 条%n", Thread.currentThread().getName(), list.size());

    tx.execute(status -> {
        jdbcTemplate.execute("SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED");

        try {
            jdbcTemplate.batchUpdate(

                    "INSERT INTO temp031_t (sellid, ddate, name, type, showType, seq, tagType, proTag) " +
                            "VALUES (?,?,?,?,?,?,?,?)",
                    list,
                    BATCH,
                    (ps, t) -> {
                        ps.setString(1, t.getSellid());
                        ps.setTimestamp(2, Timestamp.valueOf(t.getDdate()));
                        ps.setString(3, t.getName());
                        ps.setInt(4, t.getType());
                        ps.setString(5, t.getShowType());
                        ps.setInt(6, t.getSeq());
                        ps.setString(7, t.getTagType());
                        ps.setString(8, t.getTagType() + "-" + t.getName());
                    });
            System.out.printf("线程 %s 插入成功%n", Thread.currentThread().getName());
        } catch (Exception ex) {
            ex.printStackTrace();
            status.setRollbackOnly();
        }
        return null;
    });
}

public PlatformTransactionManager txManager() {
    DataSourceTransactionManager tm = new DataSourceTransactionManager();
    tm.setDataSource(jdbcTemplate.getDataSource());
    return tm;
}

}

posted @ 2025-07-21 16:32  自在现实  阅读(7)  评论(0)    收藏  举报