SqlServerCDC数据格式化代码
import com.ververica.cdc.connectors.sqlserver.SqlServerSource;
import com.ververica.cdc.debezium.DebeziumDeserializationSchema;
import com.ververica.cdc.debezium.DebeziumSourceFunction;
import com.ververica.cdc.debezium.JsonDebeziumDeserializationSchema;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
//直连生产环境,只能低峰时操作(连接中间库cdc数据源,低峰时才可以测试是否可以获取到该数据源的变更数据)
public class Test02 {
/**
* 构造SQL Server CDC数据源
*/
public static DebeziumSourceFunction<DataChangeInfo> buildDataChangeSource() {
//String[] tables = SQLServerConstant.SQLSERVER_TABLE_LIST.replace(" ", "").split(",");
return SqlServerSource.<DataChangeInfo>builder()
.hostname(SQLServerConstant.SQLSERVER_HOST)
.port(SQLServerConstant.SQLSERVER_PORT)
.database(SQLServerConstant.SQLSERVER_DATABASE) // monitor sqlserver database
.tableList("dbo.c2") // monitor products table
.username(SQLServerConstant.SQLSERVER_USER_NAME)
.password(SQLServerConstant.SQLSERVER_PASSWORD)
/*
*initial初始化快照,即全量导入后增量导入(检测更新数据写入)
* latest:只进行增量导入(不读取历史变化)
*/
.startupOptions(com.ververica.cdc.connectors.base.options.StartupOptions.latest())
.deserializer(new SQLServerJsonDebeziumDeserializationSchema()) // converts SourceRecord to JSON String
.build();
}
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
DebeziumSourceFunction<DataChangeInfo> dataChangeInfoMySqlSource = buildDataChangeSource();
DataStream<DataChangeInfo> streamSource = env
.addSource(dataChangeInfoMySqlSource, "SQLServer-source")
.setParallelism(1);
streamSource.print();
env.execute("SQLServer-stream-cdc");
}
}
import com.alibaba.fastjson.JSONObject;
import com.ververica.cdc.debezium.DebeziumDeserializationSchema;
import io.debezium.data.Envelope;
import lombok.extern.slf4j.Slf4j;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.util.Collector;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.Struct;
import org.apache.kafka.connect.data.Field;
import org.apache.kafka.connect.source.SourceRecord;
import java.time.Instant;
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.util.List;
import java.util.Optional;
@Slf4j
public class SQLServerJsonDebeziumDeserializationSchema implements DebeziumDeserializationSchema
public static final String TS_MS = "ts_ms";
public static final String BEFORE = "before";
public static final String AFTER = "after";
public static final String SOURCE = "source";
public static final String CREATE = "CREATE";
public static final String UPDATE = "UPDATE";
@Override
public void deserialize(SourceRecord sourceRecord, Collector<DataChangeInfo> collector) throws Exception {
try {
String topic = sourceRecord.topic();
String[] fields = topic.split("\\.");
String database = fields[1];
String tableName = fields[2];
Struct struct = (Struct) sourceRecord.value();
final Struct source = struct.getStruct(SOURCE);
DataChangeInfo dataChangeInfo = new DataChangeInfo();
dataChangeInfo.setBeforeData(getJsonObject(struct, BEFORE).toJSONString());
dataChangeInfo.setAfterData(getJsonObject(struct, AFTER).toJSONString());
// 获取操作类型 CREATE UPDATE DELETE 1新增 2修改 3删除
Envelope.Operation operation = Envelope.operationFor(sourceRecord);
String type = operation.toString().toUpperCase();
int eventType = type.equals(CREATE) ? 1 : UPDATE.equals(type) ? 2 : 3;
dataChangeInfo.setEventType(eventType);
dataChangeInfo.setDatabase(database);
dataChangeInfo.setTableName(tableName);
ZoneId zone = ZoneId.systemDefault();
Long timestamp = Optional.ofNullable(struct.get(TS_MS)).map(x -> Long.parseLong(x.toString())).orElseGet(System::currentTimeMillis);
dataChangeInfo.setChangeTime(LocalDateTime.ofInstant(Instant.ofEpochMilli(timestamp), zone));
dataChangeInfo.setOp(struct.get("op").toString());
//7.输出数据
collector.collect(dataChangeInfo);
} catch (Exception e) {
log.error("SQLServer消息读取自定义序列化报错:{}", e.getMessage());
}
}
/**
*
* 从源数据获取出变更之前或之后的数据
*/
private JSONObject getJsonObject(Struct value, String fieldElement) {
Struct element = value.getStruct(fieldElement);
JSONObject jsonObject = new JSONObject();
if (element != null) {
Schema afterSchema = element.schema();
List<Field> fieldList = afterSchema.fields();
for (Field field : fieldList) {
Object afterValue = element.get(field);
jsonObject.put(field.name(), afterValue);
}
}
return jsonObject;
}
@Override
public TypeInformation<DataChangeInfo> getProducedType() {
return TypeInformation.of(DataChangeInfo.class);
}
}
import lombok.Data;
import java.io.Serializable;
import java.time.LocalDateTime;
@Data
public class DataChangeInfo implements Serializable {
/**
* 数据库名
/
private String database;
/*
* 表名
/
private String tableName;
/*
* 变更时间
/
private LocalDateTime changeTime;
/*
* 变更类型 1新增 2修改 3删除
/
private Integer eventType;
private String op;
/*
* 变更前数据
/
private String beforeData;
/*
* 变更后数据
*/
private String afterData;
}
public class SQLServerConstant {
//public static final String SQLSERVER_HOST = "xx"; //数据库地址
public static final String SQLSERVER_HOST = "xx"; //数据库地址
public static final Integer SQLSERVER_PORT = 1433; //端口
public static final String SQLSERVER_DATABASE = "flinkcdc"; //库
public static final String SQLSERVER_TABLE_LIST= "dbo.xx"; // 表
public static final String SQLSERVER_USER_NAME = "xx"; //用户
public static final String SQLSERVER_PASSWORD = "xx"; //密码
}

浙公网安备 33010602011771号