Flink 整合 Kafka 之 电信案例 SQL 版、Flink SQL 开启 checkpoint
Flink 整合 Kafka 之 电信案例 SQL 版
package com.shujia.flink.dx
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.table.api.EnvironmentSettings
import org.apache.flink.table.api.bridge.scala.StreamTableEnvironment
object Demo3CityFlowOnSql {
def main(args: Array[String]): Unit = {
//创建flink 环境
val bsEnv: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
//设置table 环境的一些参数
val bsSettings: EnvironmentSettings = EnvironmentSettings.newInstance()
.useBlinkPlanner() //使用blink计划器
.inStreamingMode() //流模式
.build()
// 创建flink table 环境
val bsTableEnv: StreamTableEnvironment = StreamTableEnvironment.create(bsEnv, bsSettings)
/**
* 1、读取kafka中电信用户位置数据
*
*/
bsTableEnv.executeSql(
"""
|
|CREATE TABLE dianxin (
|mdn STRING,
|grid STRING,
|city STRING,
|county STRING,
|tTime INT,
|start_time STRING,
|end_time STRING,
|`date` STRING
|) WITH (
| 'connector' = 'kafka',
| 'topic' = 'dianxin1',
| 'properties.bootstrap.servers' = 'master:9092,node1:9092,node2:9092',
| 'properties.group.id' = 'testGroup',
| 'format' = 'csv',
| 'scan.startup.mode' = 'earliest-offset',
| 'csv.ignore-parse-errors' = 'true' //忽略数据解析错误
|)
|
""".stripMargin)
bsTableEnv.executeSql(
"""
|CREATE TABLE print_table(
|city STRING,
|num BIGINT
|)
|WITH ('connector' = 'print')
|
""".stripMargin)
//将结果写入MySQL
bsTableEnv.executeSql(
"""
|CREATE TABLE city_num (
| city STRING,
| num BIGINT,
| PRIMARY KEY (city) NOT ENFORCED
|) WITH (
| 'connector' = 'jdbc',
| 'url' = 'jdbc:mysql://master:3306/bigdata',
| 'table-name' = 'city_count',
| 'username' = 'root',
| 'password' = '123456'
|)
|
""".stripMargin)
bsTableEnv.executeSql(
"""
|insert into city_num
|select
|city,
|count(distinct mdn) as num
|from dianxin
|group by city
|
""".stripMargin)
/**
* 将代码打包上传集群,任务提交命令如下,可能会报一堆错误,总的来说就是 Flink lib 目录下缺少了依赖jar包,单独将缺少了的依赖jar包上传至 Flink lib 目录下即可
* flink run -m yarn-cluster -yjm 1024m -ytm 1096m -c com.shujia.flink.dx.Demo3CityFlowOnSql flink-1.0.jar
*/
}
}
Flink SQL 开启 checkpoint
加上开启 checkpoint 的代码即可
package com.shujia.flink.table
import org.apache.flink.runtime.state.filesystem.FsStateBackend
import org.apache.flink.streaming.api.CheckpointingMode
import org.apache.flink.streaming.api.environment.CheckpointConfig.ExternalizedCheckpointCleanup
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.table.api.EnvironmentSettings
import org.apache.flink.table.api.bridge.scala.StreamTableEnvironment
object Demo11CheckPoint {
def main(args: Array[String]): Unit = {
//创建flink 环境
val bsEnv: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
bsEnv.setParallelism(1)
// 每 1000ms 开始一次 checkpoint
bsEnv.enableCheckpointing(1000)
// 高级选项:
// 设置模式为精确一次 (这是默认值)
bsEnv.getCheckpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE)
// 确认 checkpoints 之间的时间会进行 500 ms
bsEnv.getCheckpointConfig.setMinPauseBetweenCheckpoints(500)
// Checkpoint 必须在一分钟内完成,否则就会被抛弃
bsEnv.getCheckpointConfig.setCheckpointTimeout(60000)
// 同一时间只允许一个 checkpoint 进行
bsEnv.getCheckpointConfig.setMaxConcurrentCheckpoints(1)
//当作业取消时,保留作业的 checkpoint。注意,这种情况下,需要手动清除该作业保留的 checkpoint。
bsEnv.getCheckpointConfig.enableExternalizedCheckpoints(ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION)
//将状态保存到hdfs的状态后端
val stateBackend = new FsStateBackend("hdfs://master:9000/flink/checkpoint")
//设置状态后端
bsEnv.setStateBackend(stateBackend)
//设置table 环境的一些参数
val bsSettings: EnvironmentSettings = EnvironmentSettings.newInstance()
.useBlinkPlanner() //使用blink计划器
.inStreamingMode() //流模式
.build()
// 创建flink table 环境
val bsTableEnv: StreamTableEnvironment = StreamTableEnvironment.create(bsEnv, bsSettings)
bsTableEnv.executeSql(
"""
|
|CREATE TABLE dianxin (
|mdn STRING,
|grid STRING,
|city STRING,
|county STRING,
|tTime INT,
|start_time STRING,
|end_time STRING,
|`date` STRING
|) WITH (
| 'connector' = 'kafka',
| 'topic' = 'dianxin1',
| 'properties.bootstrap.servers' = 'master:9092,node1:9092,node2:9092',
| 'properties.group.id' = 'testGroup',
| 'format' = 'csv',
| 'scan.startup.mode' = 'earliest-offset',
| 'csv.ignore-parse-errors' = 'true'
|)
|
""".stripMargin)
bsTableEnv.executeSql(
"""
|CREATE TABLE print_table(
|city STRING,
|num BIGINT
|)
|WITH ('connector' = 'print')
|
""".stripMargin)
bsTableEnv.executeSql(
"""
|insert into print_table
|select
|city,
|count(distinct mdn) as num
|from dianxin
|group by city
|
""".stripMargin)
}
}

浙公网安备 33010602011771号