从flink checkpoint中解析出iceberg的snapshot ID
在使用flink sql 读取 iceberg 的快照的时候发现一个问题。就是我不知道当前消费到哪个快照ID。同时flink 1.19其实已经取消了checkpoint数据的解析工具。所以我打算自己实现一个java程序来解析flink checkpoint。其中主要是用的对象是flink-core自身的CheckpointMetadata对象。下面是完整的代码和依赖:
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-core</artifactId>
<version>1.19.3</version>
<!-- <scope>provided</scope>-->
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-runtime</artifactId>
<version>1.19.3</version>
<!-- <scope>provided</scope>-->
</dependency>
<dependency>
<groupId>org.apache.iceberg</groupId>
<artifactId>iceberg-flink-1.19</artifactId>
<version>1.6.1</version>
</dependency>
</dependencies>
下面是具体的代码:
package com.demo.flink;
import org.apache.flink.api.common.typeutils.base.LongSerializer;
import org.apache.flink.core.fs.FSDataInputStream;
import org.apache.flink.core.memory.DataInputDeserializer;
import org.apache.flink.runtime.checkpoint.Checkpoints;
import org.apache.flink.runtime.checkpoint.OperatorState;
import org.apache.flink.runtime.checkpoint.OperatorSubtaskState;
import org.apache.flink.runtime.checkpoint.StateObjectCollection;
import org.apache.flink.runtime.checkpoint.metadata.CheckpointMetadata;
import org.apache.flink.runtime.state.*;
import java.io.*;
import java.util.Map;
public class CheckpointMetadataParser {
public static void main(String[] args) throws IOException {
// 读取元数据文件
File f=new File("src/main/resources/_metadata");
FileInputStream fis=new FileInputStream(f);
BufferedInputStream bis = new BufferedInputStream(fis);
DataInputStream dis = new DataInputStream(bis);
// 通过 Flink 的 Checkpoints 类解析元数据文件
CheckpointMetadata savepoint = Checkpoints.loadCheckpointMetadata(dis,
CheckpointMetadataParser.class.getClassLoader(), f.getAbsolutePath());
// 打印当前的 CheckpointId
System.out.println("CheckpointId:" + savepoint.getCheckpointId());
for(OperatorState operatorState : savepoint.getOperatorStates()) {
Map<Integer, OperatorSubtaskState> subtaskStateMap = operatorState.getSubtaskStates();
for(Integer subtaskId : subtaskStateMap.keySet()) {
OperatorSubtaskState subtaskState = subtaskStateMap.get(subtaskId);
StateObjectCollection<OperatorStateHandle> managedOperatorState = subtaskState.getManagedOperatorState();
if(managedOperatorState!=null) {
for(OperatorStateHandle stateHandle: managedOperatorState){
if(stateHandle.getStateNameToPartitionOffsets().containsKey("snapshot-id-state")){
System.out.println("Found snapshot-id-state in: " + stateHandle);
// 获取偏移量状态
OperatorStateHandle.StateMetaInfo metaInfo = stateHandle.getStateNameToPartitionOffsets().get("snapshot-id-state");
long offset = metaInfo.getOffsets()[0];
System.out.println("State offset: " + offset);
// 读取状态偏移量
try(FSDataInputStream in = stateHandle.getDelegateStateHandle().openInputStream()){
in.seek(offset);
byte[] data = new byte[8];
in.read(data);
DataInputDeserializer dataInput = new DataInputDeserializer(data);
// 使用 LongSerializer反序列化
LongSerializer serializer = LongSerializer.INSTANCE;
Long snapshotId = serializer.deserialize(dataInput);
System.out.println("Snapshot ID: " + snapshotId);
}
}
}
}
}
}
}
}

浙公网安备 33010602011771号