6 Flink API

1. map

map和spark的map一样

map(r => {
(r.id,r.name)
})

也可以自定义mapfunction

package test2

import org.apache.flink.api.common.functions.MapFunction
import org.apache.flink.streaming.api.scala._

object MapExample {
  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setParallelism(1)

    val stream = env
      .addSource(new SensorSource)

    // `MyMapFunction`实现了`MapFunction`接口
    stream.map(new MyMapFunction).print()

    // 使用匿名类的方式实现`MapFunction`接口
    stream
      .map(
        new MapFunction[SensorReading, String] {
          override def map(value: SensorReading): String = value.id
        }
      )
      .print()

    // 使用匿名函数的方式抽取传感器ID
    stream.map(r => r.id).print()

    env.execute()
  }

  class MyMapFunction extends MapFunction[SensorReading, String] {
    override def map(value: SensorReading): String = value.id
  }
}

二.其它算子

1.flatMap 和 FlatMapFunction

package test2

import org.apache.flink.api.common.functions.FlatMapFunction
import org.apache.flink.streaming.api.scala._
import org.apache.flink.util.Collector

object FlatMapExample {
  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setParallelism(1)

    val stream = env
      .addSource(new SensorSource)

    // 使用`FlatMapFunction`实现`MapExample.scala`中的功能
    stream
      .flatMap(
        new FlatMapFunction[SensorReading, String] {
          override def flatMap(value: SensorReading, out: Collector[String]): Unit = {
            // 使用`collect`方法向下游发送抽取的传感器ID
            out.collect(value.id)
          }
        }
      )
      .print()

    // 使用`FlatMapFunction`实现`FilterExample.scala`中的功能
    stream
        .flatMap(
          new FlatMapFunction[SensorReading, SensorReading] {
            override def flatMap(value: SensorReading, out: Collector[SensorReading]): Unit = {
              if (value.id.equals("sensor_1")) {
                out.collect(value)
              }
            }
          }
        )
        .print()

    env.execute()
  }
}

2.filter 和 FilterFunction

3 connect 和 CoMapFunction , CoFlatMapFunction

合并两条流,CoMapFunction , CoFlatMapFunction分别对两条流做操作

package test2

import org.apache.flink.streaming.api.functions.co.CoMapFunction
import org.apache.flink.streaming.api.scala._

object CoMapExample {
  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setParallelism(1)

    val one: DataStream[(Int, Long)] = env.fromElements((1, 1L))
    val two: DataStream[(Int, String)] = env.fromElements((1, "two"))

    // 将key相同的联合到一起
    val connected: ConnectedStreams[(Int, Long), (Int, String)] = one.keyBy(_._1)
      .connect(two.keyBy(_._1))

    val printed: DataStream[String] = connected
      .map(new MyCoMap)

    printed.print

    env.execute()
  }

  class MyCoMap extends CoMapFunction[(Int, Long), (Int, String), String] {
    override def map1(value: (Int, Long)): String = value._2.toString + "来自第一条流"

    override def map2(value: (Int, String)): String = value._2 + "来自第二条流"
  }
}
package test2

import org.apache.flink.streaming.api.functions.co.{CoFlatMapFunction, CoMapFunction}
import org.apache.flink.streaming.api.scala._
import org.apache.flink.util.Collector

object CoFlatMapExample {
  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
//    env.setParallelism(1)
    println(env.getParallelism) // 打印默认并行度

    val one: DataStream[(Int, Long)] = env
      .fromElements((1, 1L))
      .setParallelism(1)
    val two: DataStream[(Int, String)] = env
      .fromElements((1, "two"))
      .setParallelism(1)

    // 将key相同的联合到一起
    val connected: ConnectedStreams[(Int, Long), (Int, String)] = one.keyBy(_._1)
      .connect(two.keyBy(_._1))

    val printed: DataStream[String] = connected
      .flatMap(new MyCoFlatMap)

    printed.print

    env.execute()
  }

  class MyCoFlatMap extends CoFlatMapFunction[(Int, Long), (Int, String), String] {
    override def flatMap1(value: (Int, Long), out: Collector[String]): Unit = {
      out.collect(value._2.toString + "来自第一条流")
      out.collect(value._2.toString + "来自第一条流")
    }

    override def flatMap2(value: (Int, String), out: Collector[String]): Unit = {
      out.collect(value._2 + "来自第二条流")
    }
  }

}

4 split 和 select

分开两条流

package test2

import org.apache.flink.streaming.api.scala._

object SplitExample {
  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setParallelism(1)

    val inputStream: DataStream[(Int, String)] = env
      .fromElements(
        (1001, "1001"),
        (999, "999")
      )

    val splitted: SplitStream[(Int, String)] = inputStream
      .split(t => if (t._1 > 1000) Seq("large") else Seq("small"))

    val large: DataStream[(Int, String)] = splitted.select("large")
    val small: DataStream[(Int, String)] = splitted.select("small")
    val all: DataStream[(Int, String)] = splitted.select("small", "large")

    large.print()

    env.execute()
  }
}

5 union

合并多条流

package test2

import org.apache.flink.streaming.api.scala._

object UnionExample {
  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setParallelism(1)

    // 传感器ID为sensor_1的数据为来自巴黎的流
    val parisStream: DataStream[SensorReading] = env
      .addSource(new SensorSource)
      .filter(r => r.id.equals("sensor_1"))

    // 传感器ID为sensor_2的数据为来自东京的流
    val tokyoStream: DataStream[SensorReading] = env
      .addSource(new SensorSource)
      .filter(r => r.id.equals("sensor_2"))

    // 传感器ID为sensor_3的数据为来自里约的流
    val rioStream: DataStream[SensorReading] = env
      .addSource(new SensorSource)
      .filter(r => r.id.equals("sensor_3"))

    val allCities: DataStream[SensorReading] = parisStream
      .union(
        tokyoStream,
        rioStream
      )

    allCities.print()

    env.execute()
  }
}
posted @ 2020-07-22 08:31  哥的寂寞你不懂  阅读(202)  评论(0)    收藏  举报