springboot 使用 doris-streamloader 到doris 防止批量更新 事务卡主
背景:
使用mybatis 批量实时和更新doris时 经常出现连接不上的错误,导致kafka死信队列堆积很多滞后消费
https://doris.apache.org/zh-CN/docs/2.0/ecosystem/doris-streamloader/
package com.jiaoda.sentiment.data.etl.service.update;
import cn.hutool.core.text.CharSequenceUtil;
import cn.hutool.core.util.StrUtil;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.jiaoda.sentiment.data.etl.service.biz.DwdPublicOpinionDataService;
import lombok.Data;
import lombok.extern.log4j.Log4j2;
import org.apache.commons.codec.binary.Base64;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHeaders;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.methods.HttpPut;
import org.apache.http.entity.ContentType;
import org.apache.http.entity.FileEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.DefaultRedirectStrategy;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import sun.misc.BASE64Encoder;
import javax.annotation.PostConstruct;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import static java.util.jar.Pack200.Unpacker.TRUE;
/**
* @author by jerryjhhe
* @description todo
* @create 2024/5/22 13:41
*/
@Service
@Log4j2
public class DorisStreamLoadClient {
@Value("${spring.datasource.dynamic.datasource.master.url}")
private String dorisIP;
private final String user = "root";
private final String password = "";
private final String credentials = user + ":" + password;
BASE64Encoder encoder = new BASE64Encoder();
//通过BASE64对账号密码加密
String ticket = encoder.encode(credentials.getBytes());
private final static String DATABASE = "analysis"; // 要导入的数据库
private final static String TABLE = "dwd_public_opinion_data"; // 要导入的表
/* private final String loadUrl = String.format("http://%s:%s/api/%s/%s/_stream_load",
dorisIP, 8030, DATABASE, TABLE);*/
private String urlTemplateContent = "http://{}:8030/api/{}/{}/_stream_load";
private final CloseableHttpClient client = httpClientBuilder.build();
@PostConstruct
public void init() {
dorisIP = dorisIP.split(":")[2].replace("//", "");
log.info("DorisStreamLoadClient doris ip :{}", dorisIP);
}
private final static HttpClientBuilder httpClientBuilder = HttpClients
.custom()
.setRedirectStrategy(new DefaultRedirectStrategy() {
@Override
protected boolean isRedirectable(String method) {
// 如果连接目标是 FE,则需要处理 307 redirect。
return true;
}
});
private String basicAuthHeader(String username, String password) {
final String tobeEncode = username + ":" + password;
byte[] encoded = Base64.encodeBase64(tobeEncode.getBytes(StandardCharsets.UTF_8));
return "Basic " + new String(encoded);
}
public StreamLoadResult putData(File file, String db, String table) throws IOException {
String loadUrl = CharSequenceUtil.format(urlTemplateContent, dorisIP, db, table);
try (CloseableHttpClient client = httpClientBuilder.build()) {
HttpPut put = new HttpPut(loadUrl);
put.setHeader(HttpHeaders.EXPECT, "100-continue");
put.setHeader(HttpHeaders.AUTHORIZATION, basicAuthHeader(user,password));
// 可以在 Header 中设置 stream load 相关属性,这里我们设置 label 和 column_separator。
put.setHeader("label", "label_" + StrUtil.uuid());
put.setHeader("format", "json");
put.setHeader("Content-Type", ContentType.APPLICATION_JSON.toString());
put.setHeader("strip_outer_array", TRUE);
// Array 中的每行数据的字段顺序完全一致。Doris 仅会根据第一行的字段顺序做解析,然后以下标的形式访问之后的数据。该方式可以提升 3-5X 的导入效率。
put.setHeader("fuzzy_parse", TRUE);
// put.setHeader("jsonpaths","[\"$.siteid\",\"$.username\"]");
// put.setHeader("columns","siteid,username,doris_update_time=current_timestamp()");
FileEntity entity = new FileEntity(file);
put.setEntity(entity);
System.out.print(entity);
try (CloseableHttpResponse response = client.execute(put)) {
response.setHeader(HttpHeaders.AUTHORIZATION, basicAuthHeader(user,password));
String loadResult = "";
if (response.getEntity() != null) {
loadResult = EntityUtils.toString(response.getEntity());
}
final int statusCode = response.getStatusLine().getStatusCode();
if (statusCode != 200) {
throw new IOException(
String.format("Stream load failed. status: %s load result: %s", statusCode, loadResult));
}
log.info("Get load result: {}" , loadResult);
return JSON.parseObject(loadResult,StreamLoadResult.class);
}
}
}
public static void main(String[] args) throws IOException {
DorisStreamLoadClient dorisStreamLoadClient = new DorisStreamLoadClient();
StreamLoadResult streamLoadResult = dorisStreamLoadClient.putData(new File("C:\\home\\doris_stream_load\\update_dwdPublicOpinionData.csv"), "analysis", "dwd_public_opinion_data");
System.out.println(streamLoadResult);
}
@Data
public static class StreamLoadResult {
private long Txnid;
private String Label;
private String Comment;
private boolean TwoPhaseCommit;
private String Status;
private String Message;
private long numberTotalRows;
private long numberLoadedRows;
private long numberFilteredRows;
private long numberUnselectedRows;
private long loadBytes;
private long loadTimeMs;
private long beginTxnTimeMs;
private long streamLoadPutTimeMs;
private long readDataTimeMs;
private long writeDataTimeMs;
private long commitAndPublishTimeMs;
}
}
使用:
public void test(Object dwdPublicOpinionDataList){
try {
String jsonString = JSON.toJSONString(dwdPublicOpinionDataList);
FileUtils.write(new File(path), jsonString, "utf-8", true);
DorisStreamLoadClient.StreamLoadResult streamLoadResult = dorisStreamLoadClient.putData(new File(path), "analysis", "dwd_public_opinion_data");
if ("Success".equals(streamLoadResult.getStatus())) {
//成功后的逻辑
}
} catch (
IOException e) {
log.error("dorisStreamLoadClient{}失败 :{}", path, e);
} finally {
try {
FileUtils.delete(new File(path));
} catch (IOException e) {
log.error("删除{}失败 :{}", path, e);
return;
}
}

浙公网安备 33010602011771号