词频统计
一个文件中统计每个单词出现的频率,分隔符为\t
Java实现
public static void streamSource(String path) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStreamSource<String> streamSource = env.readTextFile(path); streamSource.flatMap((String value, Collector<String> out) -> { Arrays.stream(value.split("\t")).forEach(out::collect); }).returns(Types.STRING) .map(word -> Tuple2.of(word, 1)) .returns(Types.TUPLE(Types.STRING, Types.INT)) .keyBy(t -> t.f0) .sum(1) .print(); env.execute("word count"); }
public class StreamingJob { public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStreamSource<String> text = env.socketTextStream("192.168.21.128", 9999); text.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() { @Override public void flatMap(String value, Collector<Tuple2<String, Integer>> collector) throws Exception { String[] tokens = value.toLowerCase().split(","); for (String token : tokens) { if (token.length() > 0) { collector.collect(new Tuple2<String, Integer>(token, 1)); } } } }).keyBy(0).timeWindow(Time.seconds(5)).sum(1).print(); env.execute("StreamingJob"); } }
测试:

Scala实现
object CountWord { def main(args: Array[String]): Unit = { val path = "D:/flink/data/hello.txt" val env = ExecutionEnvironment.getExecutionEnvironment val data = env.readTextFile(path) import org.apache.flink.streaming.api.scala._ data.flatMap(_.toLowerCase.split("\t")) .filter(_.nonEmpty) .map((_,1)) .groupBy(0) .sum(1).print() } }
流处理scala
object WordCountStreaming { def main(args: Array[String]): Unit = { val senv = StreamExecutionEnvironment.getExecutionEnvironment val text = senv.socketTextStream("192.168.21.128", 9999) implicit val typeInfo = TypeInformation.of(classOf[String]) import org.apache.flink.streaming.api.scala._ text.flatMap(_.split(",")) .filter(_.nonEmpty) .map((_, 1)) .keyBy(0) .timeWindow(Time.seconds(3)) .sum(1) .print() senv.execute("Streaming WordCount") } }
立志如山 静心求实
浙公网安备 33010602011771号