一 从kafka构建source 和 sink
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
val props = new Properties()
props.put("bootstrap.servers", "localhost:9092")
props.put("group.id", "consumer-group")
props.put(
"key.deserializer",
"org.apache.kafka.common.serialization.StringDeserialization"
)
props.put(
"value.deserializer",
"org.apache.kafka.common.serialization.StringDeserialization"
)
props.put("auto.offset.reset", "latest")
val stream = env
.addSource(
new FlinkKafkaConsumer011[String](
"test",
new SimpleStringSchema(),
props
)
)
stream.addSink(
new FlinkKafkaProducer011[String](
"localhost:9092",
"test",
new SimpleStringSchema()
)
)
stream.print()
env.execute()
}
二 SinkToES
package test3
import java.util
import test2.{SensorReading, SensorSource}
import org.apache.flink.api.common.functions.RuntimeContext
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.connectors.elasticsearch.{ElasticsearchSinkFunction, RequestIndexer}
import org.apache.flink.streaming.connectors.elasticsearch7.ElasticsearchSink
import org.apache.http.HttpHost
import org.elasticsearch.client.Requests
object SinkToES {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
val stream = env
.addSource(new SensorSource)
// es的主机和端口
val httpHosts = new util.ArrayList[HttpHost]()
httpHosts.add(new HttpHost("localhost", 9200))
// 定义了如何将数据写入到es中去
val esSinkBuilder = new ElasticsearchSink.Builder[SensorReading](
httpHosts, // es的主机名
// 匿名类,定义如何将数据写入到es中
new ElasticsearchSinkFunction[SensorReading] {
override def process(t: SensorReading,
runtimeContext: RuntimeContext,
requestIndexer: RequestIndexer): Unit = {
// 哈希表的key为string,value为string
val json = new util.HashMap[String, String]()
json.put("data", t.toString)
// 构建一个写入es的请求
val indexRequest = Requests
.indexRequest()
.index("sensor") // 索引的名字是sensor
.source(json)
requestIndexer.add(indexRequest)
}
}
)
// 用来定义每次写入多少条数据
// 成批的写入到es中去
esSinkBuilder.setBulkFlushMaxActions(10)
stream.addSink(esSinkBuilder.build())
env.execute()
}
}
三 Sink to mysql
package test3
import java.sql.{Connection, DriverManager, PreparedStatement}
import test2.{SensorReading, SensorSource}
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.sink.{RichSinkFunction, SinkFunction}
import org.apache.flink.streaming.api.scala._
object SinkToMySQL {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
val stream = env.addSource(new SensorSource)
stream.addSink(new MyJdbcSink)
env.execute()
}
class MyJdbcSink extends RichSinkFunction[SensorReading] {
// 连接
var conn: Connection = _
// 插入语句
var insertStmt: PreparedStatement = _
// 更新语句
var updateStmt: PreparedStatement = _
// 生命周期开始,建立连接
override def open(parameters: Configuration): Unit = {
conn = DriverManager.getConnection(
"jdbc:mysql://localhost:3306/test",
"root",
"root"
)
insertStmt = conn.prepareStatement(
"INSERT INTO temperatures (sensor, temp) VALUES (?, ?)"
)
updateStmt = conn.prepareStatement(
"UPDATE temperatures SET temp = ? WHERE sensor = ?"
)
}
// 执行sql语句
override def invoke(value: SensorReading, context: SinkFunction.Context[_]): Unit = {
updateStmt.setDouble(1, value.temperature)
updateStmt.setString(2, value.id)
updateStmt.execute()
if (updateStmt.getUpdateCount == 0) {
insertStmt.setString(1, value.id)
insertStmt.setDouble(2, value.temperature)
insertStmt.execute()
}
}
// 生命周期结束,清理工作
override def close(): Unit = {
insertStmt.close()
updateStmt.close()
conn.close()
}
}
}
四 SinkToRedis
package test3
import test2.{SensorReading, SensorSource}
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.connectors.redis.RedisSink
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig
import org.apache.flink.streaming.connectors.redis.common.mapper.{RedisCommand, RedisCommandDescription, RedisMapper}
object SinkToRedis {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
val stream = env.addSource(new SensorSource)
// redis的主机
val conf = new FlinkJedisPoolConfig.Builder().setHost("127.0.0.1").build()
stream.addSink(new RedisSink[SensorReading](conf, new MyRedisMapper))
env.execute()
}
class MyRedisMapper extends RedisMapper[SensorReading] {
// 要使用的redis命令
override def getCommandDescription: RedisCommandDescription = {
new RedisCommandDescription(RedisCommand.HSET, "sensor")
}
// 哈希表中的key是什么
override def getKeyFromData(t: SensorReading): String = t.id
// 哈希表中的value是什么
override def getValueFromData(t: SensorReading): String = t.temperature.toString
}
}