并发抽取数据(ORACLE到mysql)

今天业务需求需要从ORACLE数据库抽取数据库到MYSQL,自己写了个并发抽取玩一下

 

service层,对数据进行了切分,每50万一组

@Service
public class DemoService {
    @Autowired
    private JdbcTemplate jdbcTemplate;

    @Autowired
    private DoInsert doInsert;

    public void demoInsert( ) throws SQLException {
        int min=1;
        int max=2222789;
        List<String> sqls=new ArrayList<>();
        while(min<2222789){
            int sign=min+500000;
            String sql="select db_id,owner,sgement_name,segment_type,partition_name,segment_size,id from DB_SEGMENTS where id>="+min+" and id<="+sign;
            sqls.add(sql);
            min=sign+1;
        }
        String sql="select db_id,owner,sgement_name,segment_type,partition_name,segment_size,id from DB_SEGMENTS where id>="+min+" and id<="+max;
        sqls.add(sql);
        for(String s:sqls){
            doInsert.insert(s,jdbcTemplate);
        }
    }
}

 

读取一万条数据,做一次insert操作

@Slf4j
@Component
public class DoInsert {
    @Async("testExecutor")
    public void insert(String sql, JdbcTemplate jdbcTemplate) throws SQLException {
        JdbcConnection jdbcConnection = new JdbcConnection("jdbc:oracle:thin:@192.168.0.210:1521/exam",
                "issue","issue","ORACLE");
        jdbcConnection.creatConnection();
        ResultSet rs = jdbcConnection.executeQuery(sql);
        List<Object[]> result = new ArrayList<>();
        String insert_sql = "insert into db_segments values (?,?,?,?,?,?,?)";
        while (rs.next()){
            result.add(new Object[]{
                    rs.getLong(1), rs.getString(2), rs.getString(3),
                    rs.getString(4), rs.getString(5), rs.getLong(6),
                    rs.getInt(7)
            });
            if (result.size() == 10000) {
                jdbcTemplate.batchUpdate(insert_sql, result);
                result =new ArrayList<>();
            }
        }
        if (result.size() != 0) {
            jdbcTemplate.batchUpdate(insert_sql, result);
        }
    }
}

线程池配置:
package com.example.demo;

import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.scheduling.annotation.EnableAsync;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;

import java.util.concurrent.Executor;
import java.util.concurrent.ThreadPoolExecutor;

@Configuration
@EnableAsync

public class TestThreadPoolConfig {

    private static final int CORE_POOL_SIZE = 4;

    private static final int MAX_POOL_SIZE = 8;

    private static final int QUEUE_CAPACITY = 20;

    private String threadNamePrefix = "test_thread_";

    @Bean
    public Executor testExecutor() {
        ThreadPoolTaskExecutor taskExecutor = new ThreadPoolTaskExecutor();
        taskExecutor.setCorePoolSize(4);
        taskExecutor.setMaxPoolSize(12);
        taskExecutor.setQueueCapacity(50);
        taskExecutor.setKeepAliveSeconds(60);
        taskExecutor.setThreadNamePrefix("InstMin-");
        taskExecutor.initialize();
        return taskExecutor;
    }
}


JdbcConnection连接类:
package com.example.demo;

import lombok.extern.slf4j.Slf4j;

import java.sql.*;
import java.util.Properties;

@Slf4j
public class JdbcConnection {
    private String URL;
    private String USERNAME;
    private String PASSWORD;
    private String type;
    private Connection conn = null;

    public String getURL() {
        return this.URL;
    }

    public void setURL(String uRL) {
        this.URL = uRL;
    }

    public String getPASSWORD() {
        return this.PASSWORD;
    }

    public void setPASSWORD(String pASSWORD) {
        this.PASSWORD = pASSWORD;
    }

    public JdbcConnection(String URL, String PASSWORD, String USERNAME, String type) {
        this.URL=URL;
        this.type=type;
        this.PASSWORD=PASSWORD;
        this.USERNAME=USERNAME;
        String DRIVER = null;
        if(type.equals("ORACLE")){
            DRIVER="oracle.jdbc.driver.OracleDriver";
        }else if((type.equals("MYSQL"))) {
            DRIVER="com.mysql.cj.jdbc.Driver";
        }else if((type.equals("PGSQL"))){
            DRIVER="org.postgresql.Driver";
        }
        try {
            Class.forName(DRIVER);
        } catch (Exception var2) {
            log.info("加载数据库驱动失败!");
            log.info(var2.getMessage());
        }
    }

    public boolean creatConnection() {
        boolean isCreated = true;
        Properties props = new Properties() ;
        props.put( "user" , USERNAME) ;
        props.put( "password" , PASSWORD) ;
        try {
            if(type.equals("ORACLE")){
                props.put( "oracle.net.CONNECT_TIMEOUT" , "5000") ;
            }else if((type.equals("MYSQL"))) {
                props.put( "connectTimeout" , "5000");
            }else if((type.equals("PGSQL"))){
                props.put( "loginTimeout" , "5");
            }
            this.conn = DriverManager.getConnection(URL, props);
        } catch (Exception var3) {
            isCreated = false;
            log.info(var3.getMessage());
        }
        return isCreated;
    }


    public boolean executeUpdate(String sql) {
        if (this.conn == null) {
            this.creatConnection();
        }
        try {
            Statement stmt = this.conn.createStatement();
            stmt.executeUpdate(sql);
        } catch (SQLException var4) {
            log.info("SQL更新失败!");
            log.info(var4.getMessage());
            return false;
        }
        return true;
    }

    public ResultSet executeQuery(String sql) {
        ResultSet rs = null;
        if (this.conn == null) {
            this.creatConnection();
        }
        try {
            Statement stmt = this.conn.createStatement(ResultSet.TYPE_FORWARD_ONLY,
                    ResultSet.CONCUR_READ_ONLY);
            stmt.setFetchSize(5000);
            stmt.setQueryTimeout(3600);
            rs = stmt.executeQuery(sql);
        } catch (SQLException var4) {
            log.info("SQL查询失败!");
            log.info(var4.getMessage());
        }
        return rs;
    }

    public void closeConnection() {
        if (this.conn != null) {
            try {
                this.conn.close();
            } catch (SQLException var5) {
                var5.printStackTrace();
            } finally {
                this.conn = null;
            }
        }
    }
}

 

application配置文件

server.port=8091

spring.datasource.url=jdbc:mysql://10.50.16.201:3306/monitor?useUnicode=true&characterEncoding=utf8&useSSL=false&serverTimezone=UTC&allowPublicKeyRetrieval=true&allowMultiQueries=true&rewriteBatchedStatements=true
#spring.datasource.url=jdbc:oracle:thin:@192.168.0.210:1521/exam
spring.datasource.username=issue
spring.datasource.password=issue
spring.datasource.driver-class-name=oracle.jdbc.driver.OracleDriver

 

第一次执行的时候执行抽取的时候,appleication配置datasource没有加 allowMultiQueries=true

200万的数据抽取了10几分钟,感觉不对,观察mysql的processlist;发现4个回话并行执行insert into xx values(xx,xx,xx,xx);

state事件为waiting for handler commit,官网说明(The thread is waiting for a transaction to commit versus other parts of query processing.)

观察了一下redo size大小2G,观察了一下io也都正常,结论就是事务太多,commit过于频繁,网络开销较高.

然后联想到mysqldump导入数据库的情况,insert into 都是多个value(一次插入多条,减少交互次数,提高效率),类似 insert into xx values (xx,xx,xx,xx),(xx,xx,xx,xx)...

怀疑还是jdbcTemplate.batchUpdate方法有问题,查阅相关资料,只有当allowMultiQueries=true时,才能发挥batch提交的特点,mysql数据库行为变为一个insert into 插入多个value;

修改参数重新执行:几秒就完成了200w数据的抽取;

哎 坑!!!

 

后面又用mysql-》oracle做了测试,发现oracle的jdbc驱动在处理batchUpdate就没类似的性能问题,也是几秒钟就完成了抽取;

 

 

感觉还是mybaits这种批量插入比较好(可控,简单明了),自己拼接多个value的插入

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
        "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.example.demo.DemoMapper">
    <insert id="batchInsert" parameterType="java.util.List">
        insert into db_segments(db_id,owner,sgement_name,segment_type,partition_name,segment_size,id)values
        <foreach collection="list" separator="," item="item">
            (#{item.db_id},#{item.owner},#{item.sgement_name},#{item.segment_type},#{item.partition_name},#{item.segment_size},#{item.id})
        </foreach>
    </insert>
</mapper>

  


posted @ 2022-03-02 14:04  阿西吧li  阅读(275)  评论(0编辑  收藏  举报