datasophon集成Flink1.20.0

DataSophon集成Flink1.20.0升级手册

构建压缩包

下载flink官方包 flink-1.16.2-bin-scala_2.12.tgz

tar -zxvf flink-1.20.0-bin-scala_2.12.tgz
tar czf flink-1.20.0.tar.gz flink-1.20.0

# 默认支持hudi
cp ./hudi-flink1.19-bundle-0.13.0.jar /flink-1.20.0/lib

md5sum flink-1.20.0.tar.gz | awk '{print $1}' > flink-1.20.0.tar.gz.md5

cp ./flink-1.20.0.tar.gz ./flink-1.20.0.tar.gz.md5 /opt/datasophon/DDP/packages/

修改service_ddl.json

vi /opt/datasophon/datasophon-manager-1.2.1/conf/meta/DDP-1.2.1/FLINK/service_ddl.json
{
  "name": "FLINK",
  "label": "Flink",
  "description": "实时计算引擎",
  "version": "1.20.0",
  "sortNum": 6,
  "dependencies": [],
  "packageName": "flink-1.20.0.tar.gz",
  "decompressPackageName": "flink-1.20.0",
  "runAs": "root",
  "roles": [
    {
      "name": "FlinkClient",
      "label": "FlinkClient",
      "roleType": "client",
      "cardinality": "1+",
      "logFile": "logs/flink.log"
    }
  ],
  "configWriter": {
    "generators": [
      {
        "filename": "config.yaml",
        "configFormat": "custom",
        "templateName": "properties3.ftl",
        "outputDirectory": "conf",
        "includeParams": [
          "jobmanager.bind-host",
          "jobmanager.rpc.address",
          "jobmanager.rpc.port",
          "jobmanager.memory.process.size",
          "jobmanager.execution.failover-strategy",
          "taskmanager.bind-host",
          "taskmanager.host",
          "taskmanager.numberOfTaskSlots",
          "taskmanager.memory.process.size",
          "parallelism.default",
		  "enableJMHA",
		  "high-availability.type",
		  "high-availability.storageDir",
		  "high-availability.zookeeper.quorum",
		  "high-availability.zookeeper.path.root",
		  "high-availability.zookeeper.client.acl",
		  "execution.checkpointing.interval",
		  "execution.checkpointing.mode",
		  "state.backend",
		  "state.checkpoints.dir",
		  "state.savepoints.dir",
		  "custom.flink.conf.yaml",
		  "classloader.check-leaked-classloader"
        ]
      },
	  {
        "filename": "masters",
        "configFormat": "custom",
        "templateName": "flink_masters.ftl",
        "outputDirectory": "conf",
        "includeParams": [
          "flink.master.hostnames"
        ]
      },
      {
        "filename": "workers",
        "configFormat": "custom",
        "templateName": "flink_workers.ftl",
        "outputDirectory": "conf",
        "includeParams": [
          "flink.worker.hostnames"
        ]
      }
    ]
  },
  "parameters": [
    {
      "name": "jobmanager.bind-host",
      "label": "JobManager绑定主机",
      "description": "绑定JobManager的主机地址",
      "required": true,
      "type": "input",
      "value": "0.0.0.0",
      "configurableInWizard": true,
      "hidden": false,
      "defaultValue": "0.0.0.0"
    },
    {
      "name": "jobmanager.rpc.address",
      "label": "JobManager RPC地址",
      "description": "JobManager的RPC服务地址",
      "required": true,
      "type": "input",
      "value": "bigdata1",  // master服务器的hostname
      "configurableInWizard": true,
      "hidden": false,
      "defaultValue": "bigdata1"
    },
    {
      "name": "jobmanager.rpc.port",
      "label": "JobManager RPC端口",
      "description": "JobManager的RPC服务端口",
      "required": true,
      "type": "input",
      "value": "6123",
      "configurableInWizard": true,
      "hidden": false,
      "defaultValue": "6123"
    },
    {
      "name": "jobmanager.memory.process.size",
      "label": "JobManager进程内存大小",
      "description": "",
      "required": true,
      "type": "input",
      "value": "1600m",
      "configurableInWizard": true,
      "hidden": false,
      "defaultValue": "1600m"
    },
    {
      "name": "jobmanager.execution.failover-strategy",
      "label": "JobManager执行故障转移策略",
      "description": "",
      "required": true,
      "type": "input",
      "value": "region",
      "configurableInWizard": true,
      "hidden": false,
      "defaultValue": "region"
    },
    {
      "name": "taskmanager.bind-host",
      "label": "TaskManager绑定主机",
      "description": "绑定TaskManager的主机地址",
      "required": true,
      "type": "input",
      "value": "0.0.0.0",
      "configurableInWizard": true,
      "hidden": false,
      "defaultValue": "0.0.0.0"
    },
    {
      "name": "taskmanager.host",
      "label": "TaskManager主机",
      "description": "TaskManager所在服务器的hostname",
      "required": true,
      "type": "input",
      "value": "bigdata1",
      "configurableInWizard": true,
      "hidden": false,
      "defaultValue": "bigdata1"
    },
    {
      "name": "taskmanager.numberOfTaskSlots",
      "label": "TaskManager任务槽数量",
      "description": "",
      "required": true,
      "type": "input",
      "value": "1",
      "configurableInWizard": true,
      "hidden": false,
      "defaultValue": "1"
    },
    {
      "name": "taskmanager.memory.process.size",
      "label": "TaskManager进程内存大小",
      "description": "",
      "required": true,
      "type": "input",
      "value": "1024m",
      "configurableInWizard": true,
      "hidden": false,
      "defaultValue": "1024m"
    },
    {
      "name": "parallelism.default",
      "label": "默认并行度",
      "description": "设置Flink作业的默认并行度",
      "required": true,
      "type": "input",
      "value": "1",
      "configurableInWizard": true,
      "hidden": false,
      "defaultValue": "1"
    },
    {
      "name": "enableJMHA",
      "label": "开启JobManager高可用",
      "description": "",
      "required": true,
      "type": "switch",
      "value": false,
      "configurableInWizard": true,
      "hidden": false,
      "defaultValue": false
    },
    {
      "name": "high-availability.type",
      "label": "使用zookeeper搭建高可用",
      "description": "使用zookeeper搭建高可用",
      "required": true,
      "type": "input",
      "value": "zookeeper",
      "configurableInWizard": true,
      "hidden": false,
      "defaultValue": "zookeeper"
    },{
      "name": "high-availability.storageDir",
      "label": "元数据存储HDFS目录",
      "description": "存储JobManager的元数据到HDFS",
      "required": true,
      "type": "input",
      "value": "hdfs://nameservice1/flink/ha/",
      "configurableInWizard": true,
      "hidden": false,
      "defaultValue": "hdfs://bigdata1:8020/flink/ha/"
    },{
      "name": "high-availability.zookeeper.quorum",
      "label": "ZK集群地址",
      "description": "配置ZK集群地址",
      "required": true,
      "type": "input",
      "value": "${zkUrls}",
      "configurableInWizard": true,
      "hidden": false,
      "defaultValue": ""
    },
    {
      "name": "high-availability.zookeeper.path.root",
      "label": "ZK元数据目录",
      "description": "配置ZK元数据目录",
      "required": true,
      "type": "input",
      "value": "/flink",
      "configurableInWizard": true,
      "hidden": false,
      "defaultValue": "/flink"
    },
    {
      "name": "high-availability.zookeeper.client.acl",
      "label": "high-availability.zookeeper.client.acl",
      "description": "默认是 open,如果zookeeper security启用了更改成creator",
      "required": true,
      "type": "input",
      "value": "open",
      "configurableInWizard": true,
      "hidden": false,
      "defaultValue": "open"
    },
    {
      "name": "execution.checkpointing.interval",
      "label": "创建检查点的间隔时间",
      "description": "设置检查点创建的间隔时间",
      "required": true,
      "type": "input",
      "value": "3min",
      "configurableInWizard": true,
      "hidden": false,
      "defaultValue": "3min"
    },
    {
      "name": "execution.checkpointing.mode",
      "label": "定义检查点的模式",
      "description": "定义检查点的模式[EXACTLY_ONCE, AT_LEAST_ONCE...]",
      "required": true,
      "type": "input",
      "value": "EXACTLY_ONCE",
      "configurableInWizard": true,
      "hidden": false,
      "defaultValue": "EXACTLY_ONCE"
    },
    {
      "name": "state.backend",
      "label": "配置状态后端-使用文件系统作为快照存储",
      "description": "配置状态后端[hashmap, rocksdb,<class-name-of-factory>...]",
      "required": true,
      "type": "input",
      "value": "filesystem",
      "configurableInWizard": true,
      "hidden": false,
      "defaultValue": "rocksdb"
    },
    {
      "name": "state.checkpoints.dir",
      "label": "检查点存储的目录",
      "description": "检查点存储的目录",
      "required": true,
      "type": "input",
      "value": "hdfs://nameservice1/flink/flink-checkpoints",
      "configurableInWizard": true,
      "hidden": false,
      "defaultValue": "hdfs://nameservice1/flink-checkpoints"
    },
    {
      "name": "state.savepoints.dir",
      "label": "保存点(用户手动触发的检查点)存储的目录",
      "description": "保存点(用户手动触发的检查点)存储的目录",
      "required": true,
      "type": "input",
      "value": "hdfs://nameservice1/flink/flink-savepoints",
      "configurableInWizard": true,
      "hidden": false,
      "defaultValue": "hdfs://nameservice1/flink-savepoints"
    },
    {
      "name": "custom.flink.conf.yaml",
      "label": "自定义配置flink-conf.yaml",
      "description": "自定义配置",
      "configType": "custom",
      "required": false,
      "type": "multipleWithKey",
      "value": [],
      "configurableInWizard": true,
      "hidden": false,
      "defaultValue": ""
    },
    {
      "name": "classloader.check-leaked-classloader",
      "label": "禁用classloader.check",
      "description": "禁用classloader.check",
      "required": true,
      "type": "switch",
      "value": false,
      "configurableInWizard": true,
      "hidden": false,
      "defaultValue": false
    },
	{
      "name": "flink.master.hostnames",
      "label": "Flink Master 主机名和端口列表",
      "description": "逗号分隔的 Flink Master 主机名和端口列表(如 bigdata1:8082,bigdata2:8082)",
      "required": true,
      "type": "input",
      "value": "bigdata1:8082,bigdata2:8082", // 示例值,可以支持多个,用逗号分隔
      "configurableInWizard": true,
      "hidden": false,
      "defaultValue": "localhost:8082" // 示例默认值
    },
    {
      "name": "flink.worker.hostnames",
      "label": "Flink Worker 主机名列表",
      "description": "逗号分隔的 Flink Worker 主机名或 IP 地址列表",
      "required": true,
      "type": "input",
      "value": "bigdata1,bigdata2,bigdata3",
      "configurableInWizard": true,
      "hidden": false,
      "defaultValue": "localhost"
    }
  ]
}
vi /opt/datasophon/datasophon-worker/conf/templates/flink-conf.ftl
<#list parameters?keys as key>
  <#if parameters[key]?is_hash_ex>
    <#nested key, parameters[key]>
  <#else>
    ${key}: ${parameters[key]}
  </#if>
</#list>




<#-- flink-conf.ftl -->
<#-- 处理 JobManager 配置 -->
jobmanager:
  bind-host: ${parameters.jobmanager.bind-host!"0.0.0.0"}
  rpc:
    address: ${parameters.jobmanager.rpc.address!"localhost"}
    port: ${parameters.jobmanager.rpc.port!6123}
  memory:
    process:
      size: ${parameters.jobmanager.memory.process.size!"1600m"}
  execution:
    failover-strategy: ${parameters.jobmanager.execution.failover-strategy!"region"}

<#-- 处理 TaskManager 配置 -->
taskmanager:
  bind-host: ${parameters.taskmanager.bind-host!"0.0.0.0"}
  host: ${parameters.taskmanager.host!"localhost"}
  numberOfTaskSlots: ${parameters.taskmanager.numberOfTaskSlots!1}
  memory:
    process:
      size: ${parameters.taskmanager.memory.process.size!"1728m"}

<#-- 如果有自定义配置,可以添加到这里 -->
<#-- 遍历所有可能的自定义配置项 -->
<#list parameters?keys as key>
  <#-- 检查当前key是否有子key(即是否为对象),如果有,则递归处理 -->
  <#if parameters[key]?is_hash_ex>
    <#-- 递归处理对象 -->
    <#nested key, parameters[key]>
  <#else>
    <#-- 直接输出键值对 -->
    ${key}: ${parameters[key]}
  </#if>
</#list>

<#-- 递归宏定义,用于处理嵌套对象 -->
<#macro nested parentKey, obj>
  <#list obj?keys as subKey>
    <#-- 构造完整的键名(如:custom.flink.conf.yaml.someKey) -->
    <#local fullKey>${parentKey}.${subKey}</#local>
    <#-- 检查当前subKey是否有子key -->
    <#if obj[subKey]?is_hash_ex>
      <#-- 递归处理嵌套对象 -->
      <@nested fullKey, obj[subKey]>
    <#else>
      <#-- 输出键值对 -->
      ${fullKey}: ${obj[subKey]}
    </#if>
  </#list>
</#macro>

<#-- 调用递归宏处理自定义配置(假设自定义配置在custom.flink.conf.yaml下) -->
<#if parameters.custom.flink.conf.yaml?has_content>
  <#-- 注意:这里不再直接列表处理,而是使用递归宏 -->
  <@nested "custom.flink.conf.yaml", parameters.custom.flink.conf.yaml>
</#if>
vi /opt/datasophon/datasophon-worker/conf/templates/flink_masters.ftl
<#list itemList as outerItem>
<#list outerItem.value?split(",") as innerItem>
${innerItem?trim}
<#if innerItem_has_next>
</#if>
</#list>
</#list>

注意:所有节点都需要操作

vi /opt/datasophon/datasophon-worker/conf/templates/flink_workers.ftl
<#list itemList as outerItem>
<#list outerItem.value?split(",") as innerItem>
${innerItem?trim}
<#if innerItem_has_next>
</#if>
</#list>
</#list>

注意:所有节点都需要操作

修改环境变量

vim /etc/profile.d/datasophon-env.sh
export FLINK_HOME=/opt/datasophon/flink-1.20.0
export HADOOP_CLASSPATH=`hadoop classpath`
source /etc/profile.d/datasophon-env.sh

各节点同样操作

重启

各节点worker重启

sh /opt/datasophon/datasophon-worker/bin/datasophon-worker.sh restart worker

主节点重启api

sh /opt/datasophon/datasophon-manager-1.2.1/bin/datasophon-api.sh restart api

测试

flink run -d -t yarn-per-job $FLINK_HOME/examples/streaming/WordCount.jar
flink run-application -t yarn-application $FLINK_HOME/examples/streaming/TopSpeedWindowing.jar
posted @ 2024-11-28 20:18  xiongshengxiao  阅读(246)  评论(0)    收藏  举报