datasophon集成Flink1.20.0
DataSophon集成Flink1.20.0升级手册
构建压缩包
下载flink官方包 flink-1.16.2-bin-scala_2.12.tgz
tar -zxvf flink-1.20.0-bin-scala_2.12.tgz
tar czf flink-1.20.0.tar.gz flink-1.20.0
# 默认支持hudi
cp ./hudi-flink1.19-bundle-0.13.0.jar /flink-1.20.0/lib
md5sum flink-1.20.0.tar.gz | awk '{print $1}' > flink-1.20.0.tar.gz.md5
cp ./flink-1.20.0.tar.gz ./flink-1.20.0.tar.gz.md5 /opt/datasophon/DDP/packages/
修改service_ddl.json
vi /opt/datasophon/datasophon-manager-1.2.1/conf/meta/DDP-1.2.1/FLINK/service_ddl.json
{
"name": "FLINK",
"label": "Flink",
"description": "实时计算引擎",
"version": "1.20.0",
"sortNum": 6,
"dependencies": [],
"packageName": "flink-1.20.0.tar.gz",
"decompressPackageName": "flink-1.20.0",
"runAs": "root",
"roles": [
{
"name": "FlinkClient",
"label": "FlinkClient",
"roleType": "client",
"cardinality": "1+",
"logFile": "logs/flink.log"
}
],
"configWriter": {
"generators": [
{
"filename": "config.yaml",
"configFormat": "custom",
"templateName": "properties3.ftl",
"outputDirectory": "conf",
"includeParams": [
"jobmanager.bind-host",
"jobmanager.rpc.address",
"jobmanager.rpc.port",
"jobmanager.memory.process.size",
"jobmanager.execution.failover-strategy",
"taskmanager.bind-host",
"taskmanager.host",
"taskmanager.numberOfTaskSlots",
"taskmanager.memory.process.size",
"parallelism.default",
"enableJMHA",
"high-availability.type",
"high-availability.storageDir",
"high-availability.zookeeper.quorum",
"high-availability.zookeeper.path.root",
"high-availability.zookeeper.client.acl",
"execution.checkpointing.interval",
"execution.checkpointing.mode",
"state.backend",
"state.checkpoints.dir",
"state.savepoints.dir",
"custom.flink.conf.yaml",
"classloader.check-leaked-classloader"
]
},
{
"filename": "masters",
"configFormat": "custom",
"templateName": "flink_masters.ftl",
"outputDirectory": "conf",
"includeParams": [
"flink.master.hostnames"
]
},
{
"filename": "workers",
"configFormat": "custom",
"templateName": "flink_workers.ftl",
"outputDirectory": "conf",
"includeParams": [
"flink.worker.hostnames"
]
}
]
},
"parameters": [
{
"name": "jobmanager.bind-host",
"label": "JobManager绑定主机",
"description": "绑定JobManager的主机地址",
"required": true,
"type": "input",
"value": "0.0.0.0",
"configurableInWizard": true,
"hidden": false,
"defaultValue": "0.0.0.0"
},
{
"name": "jobmanager.rpc.address",
"label": "JobManager RPC地址",
"description": "JobManager的RPC服务地址",
"required": true,
"type": "input",
"value": "bigdata1", // master服务器的hostname
"configurableInWizard": true,
"hidden": false,
"defaultValue": "bigdata1"
},
{
"name": "jobmanager.rpc.port",
"label": "JobManager RPC端口",
"description": "JobManager的RPC服务端口",
"required": true,
"type": "input",
"value": "6123",
"configurableInWizard": true,
"hidden": false,
"defaultValue": "6123"
},
{
"name": "jobmanager.memory.process.size",
"label": "JobManager进程内存大小",
"description": "",
"required": true,
"type": "input",
"value": "1600m",
"configurableInWizard": true,
"hidden": false,
"defaultValue": "1600m"
},
{
"name": "jobmanager.execution.failover-strategy",
"label": "JobManager执行故障转移策略",
"description": "",
"required": true,
"type": "input",
"value": "region",
"configurableInWizard": true,
"hidden": false,
"defaultValue": "region"
},
{
"name": "taskmanager.bind-host",
"label": "TaskManager绑定主机",
"description": "绑定TaskManager的主机地址",
"required": true,
"type": "input",
"value": "0.0.0.0",
"configurableInWizard": true,
"hidden": false,
"defaultValue": "0.0.0.0"
},
{
"name": "taskmanager.host",
"label": "TaskManager主机",
"description": "TaskManager所在服务器的hostname",
"required": true,
"type": "input",
"value": "bigdata1",
"configurableInWizard": true,
"hidden": false,
"defaultValue": "bigdata1"
},
{
"name": "taskmanager.numberOfTaskSlots",
"label": "TaskManager任务槽数量",
"description": "",
"required": true,
"type": "input",
"value": "1",
"configurableInWizard": true,
"hidden": false,
"defaultValue": "1"
},
{
"name": "taskmanager.memory.process.size",
"label": "TaskManager进程内存大小",
"description": "",
"required": true,
"type": "input",
"value": "1024m",
"configurableInWizard": true,
"hidden": false,
"defaultValue": "1024m"
},
{
"name": "parallelism.default",
"label": "默认并行度",
"description": "设置Flink作业的默认并行度",
"required": true,
"type": "input",
"value": "1",
"configurableInWizard": true,
"hidden": false,
"defaultValue": "1"
},
{
"name": "enableJMHA",
"label": "开启JobManager高可用",
"description": "",
"required": true,
"type": "switch",
"value": false,
"configurableInWizard": true,
"hidden": false,
"defaultValue": false
},
{
"name": "high-availability.type",
"label": "使用zookeeper搭建高可用",
"description": "使用zookeeper搭建高可用",
"required": true,
"type": "input",
"value": "zookeeper",
"configurableInWizard": true,
"hidden": false,
"defaultValue": "zookeeper"
},{
"name": "high-availability.storageDir",
"label": "元数据存储HDFS目录",
"description": "存储JobManager的元数据到HDFS",
"required": true,
"type": "input",
"value": "hdfs://nameservice1/flink/ha/",
"configurableInWizard": true,
"hidden": false,
"defaultValue": "hdfs://bigdata1:8020/flink/ha/"
},{
"name": "high-availability.zookeeper.quorum",
"label": "ZK集群地址",
"description": "配置ZK集群地址",
"required": true,
"type": "input",
"value": "${zkUrls}",
"configurableInWizard": true,
"hidden": false,
"defaultValue": ""
},
{
"name": "high-availability.zookeeper.path.root",
"label": "ZK元数据目录",
"description": "配置ZK元数据目录",
"required": true,
"type": "input",
"value": "/flink",
"configurableInWizard": true,
"hidden": false,
"defaultValue": "/flink"
},
{
"name": "high-availability.zookeeper.client.acl",
"label": "high-availability.zookeeper.client.acl",
"description": "默认是 open,如果zookeeper security启用了更改成creator",
"required": true,
"type": "input",
"value": "open",
"configurableInWizard": true,
"hidden": false,
"defaultValue": "open"
},
{
"name": "execution.checkpointing.interval",
"label": "创建检查点的间隔时间",
"description": "设置检查点创建的间隔时间",
"required": true,
"type": "input",
"value": "3min",
"configurableInWizard": true,
"hidden": false,
"defaultValue": "3min"
},
{
"name": "execution.checkpointing.mode",
"label": "定义检查点的模式",
"description": "定义检查点的模式[EXACTLY_ONCE, AT_LEAST_ONCE...]",
"required": true,
"type": "input",
"value": "EXACTLY_ONCE",
"configurableInWizard": true,
"hidden": false,
"defaultValue": "EXACTLY_ONCE"
},
{
"name": "state.backend",
"label": "配置状态后端-使用文件系统作为快照存储",
"description": "配置状态后端[hashmap, rocksdb,<class-name-of-factory>...]",
"required": true,
"type": "input",
"value": "filesystem",
"configurableInWizard": true,
"hidden": false,
"defaultValue": "rocksdb"
},
{
"name": "state.checkpoints.dir",
"label": "检查点存储的目录",
"description": "检查点存储的目录",
"required": true,
"type": "input",
"value": "hdfs://nameservice1/flink/flink-checkpoints",
"configurableInWizard": true,
"hidden": false,
"defaultValue": "hdfs://nameservice1/flink-checkpoints"
},
{
"name": "state.savepoints.dir",
"label": "保存点(用户手动触发的检查点)存储的目录",
"description": "保存点(用户手动触发的检查点)存储的目录",
"required": true,
"type": "input",
"value": "hdfs://nameservice1/flink/flink-savepoints",
"configurableInWizard": true,
"hidden": false,
"defaultValue": "hdfs://nameservice1/flink-savepoints"
},
{
"name": "custom.flink.conf.yaml",
"label": "自定义配置flink-conf.yaml",
"description": "自定义配置",
"configType": "custom",
"required": false,
"type": "multipleWithKey",
"value": [],
"configurableInWizard": true,
"hidden": false,
"defaultValue": ""
},
{
"name": "classloader.check-leaked-classloader",
"label": "禁用classloader.check",
"description": "禁用classloader.check",
"required": true,
"type": "switch",
"value": false,
"configurableInWizard": true,
"hidden": false,
"defaultValue": false
},
{
"name": "flink.master.hostnames",
"label": "Flink Master 主机名和端口列表",
"description": "逗号分隔的 Flink Master 主机名和端口列表(如 bigdata1:8082,bigdata2:8082)",
"required": true,
"type": "input",
"value": "bigdata1:8082,bigdata2:8082", // 示例值,可以支持多个,用逗号分隔
"configurableInWizard": true,
"hidden": false,
"defaultValue": "localhost:8082" // 示例默认值
},
{
"name": "flink.worker.hostnames",
"label": "Flink Worker 主机名列表",
"description": "逗号分隔的 Flink Worker 主机名或 IP 地址列表",
"required": true,
"type": "input",
"value": "bigdata1,bigdata2,bigdata3",
"configurableInWizard": true,
"hidden": false,
"defaultValue": "localhost"
}
]
}
vi /opt/datasophon/datasophon-worker/conf/templates/flink-conf.ftl
<#list parameters?keys as key>
<#if parameters[key]?is_hash_ex>
<#nested key, parameters[key]>
<#else>
${key}: ${parameters[key]}
</#if>
</#list>
<#-- flink-conf.ftl -->
<#-- 处理 JobManager 配置 -->
jobmanager:
bind-host: ${parameters.jobmanager.bind-host!"0.0.0.0"}
rpc:
address: ${parameters.jobmanager.rpc.address!"localhost"}
port: ${parameters.jobmanager.rpc.port!6123}
memory:
process:
size: ${parameters.jobmanager.memory.process.size!"1600m"}
execution:
failover-strategy: ${parameters.jobmanager.execution.failover-strategy!"region"}
<#-- 处理 TaskManager 配置 -->
taskmanager:
bind-host: ${parameters.taskmanager.bind-host!"0.0.0.0"}
host: ${parameters.taskmanager.host!"localhost"}
numberOfTaskSlots: ${parameters.taskmanager.numberOfTaskSlots!1}
memory:
process:
size: ${parameters.taskmanager.memory.process.size!"1728m"}
<#-- 如果有自定义配置,可以添加到这里 -->
<#-- 遍历所有可能的自定义配置项 -->
<#list parameters?keys as key>
<#-- 检查当前key是否有子key(即是否为对象),如果有,则递归处理 -->
<#if parameters[key]?is_hash_ex>
<#-- 递归处理对象 -->
<#nested key, parameters[key]>
<#else>
<#-- 直接输出键值对 -->
${key}: ${parameters[key]}
</#if>
</#list>
<#-- 递归宏定义,用于处理嵌套对象 -->
<#macro nested parentKey, obj>
<#list obj?keys as subKey>
<#-- 构造完整的键名(如:custom.flink.conf.yaml.someKey) -->
<#local fullKey>${parentKey}.${subKey}</#local>
<#-- 检查当前subKey是否有子key -->
<#if obj[subKey]?is_hash_ex>
<#-- 递归处理嵌套对象 -->
<@nested fullKey, obj[subKey]>
<#else>
<#-- 输出键值对 -->
${fullKey}: ${obj[subKey]}
</#if>
</#list>
</#macro>
<#-- 调用递归宏处理自定义配置(假设自定义配置在custom.flink.conf.yaml下) -->
<#if parameters.custom.flink.conf.yaml?has_content>
<#-- 注意:这里不再直接列表处理,而是使用递归宏 -->
<@nested "custom.flink.conf.yaml", parameters.custom.flink.conf.yaml>
</#if>
新增flink_masters.ftl
vi /opt/datasophon/datasophon-worker/conf/templates/flink_masters.ftl
<#list itemList as outerItem>
<#list outerItem.value?split(",") as innerItem>
${innerItem?trim}
<#if innerItem_has_next>
</#if>
</#list>
</#list>
注意:所有节点都需要操作
新增flink_workers.ftl
vi /opt/datasophon/datasophon-worker/conf/templates/flink_workers.ftl
<#list itemList as outerItem>
<#list outerItem.value?split(",") as innerItem>
${innerItem?trim}
<#if innerItem_has_next>
</#if>
</#list>
</#list>
注意:所有节点都需要操作
修改环境变量
vim /etc/profile.d/datasophon-env.sh
export FLINK_HOME=/opt/datasophon/flink-1.20.0
export HADOOP_CLASSPATH=`hadoop classpath`
source /etc/profile.d/datasophon-env.sh
各节点同样操作
重启
各节点worker重启
sh /opt/datasophon/datasophon-worker/bin/datasophon-worker.sh restart worker
主节点重启api
sh /opt/datasophon/datasophon-manager-1.2.1/bin/datasophon-api.sh restart api
测试
flink run -d -t yarn-per-job $FLINK_HOME/examples/streaming/WordCount.jar
flink run-application -t yarn-application $FLINK_HOME/examples/streaming/TopSpeedWindowing.jar

浙公网安备 33010602011771号