博客园 首页 私信博主 显示目录 隐藏目录 管理 动画

练习 :Flink 字频统计

xiaoming,english,90
xiaoming,math,80
xiaohong,english,98
xiaohong,math,82

test

 1 import org.apache.flink.api.common.typeinfo.Types;
 2 import org.apache.flink.api.java.ExecutionEnvironment;
 3 import org.apache.flink.api.java.operators.AggregateOperator;
 4 import org.apache.flink.api.java.operators.DataSource;
 5 import org.apache.flink.api.java.operators.FlatMapOperator;
 6 import org.apache.flink.api.java.operators.UnsortedGrouping;
 7 import org.apache.flink.api.java.tuple.Tuple2;
 8 import org.apache.flink.util.Collector;
 9 
10 public class WordCount {
11     public static void main(String[] args) {
12         System.out.println("字频统计");
13         //1 环境
14         ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
15         env.setParallelism(1);
16         //2 读取文件
17         DataSource<String> source = env.readTextFile("src\\main\\resources\\file.txt");
18         //3 得到二元组
19         FlatMapOperator<String, Tuple2<String, Long>> wordAndOne = source.flatMap(
20                 (String line, Collector<Tuple2<String, Long>> out) -> {
21                     String[] split = line.split(",");
22                     for (String s : split) {
23                         out.collect(Tuple2.of(s, 1L));
24                     }
25                 }
26         ).returns(Types.TUPLE(Types.STRING, Types.LONG));
27 //        wordAndOne.print("word count");
28         //4 分组
29         UnsortedGrouping<Tuple2<String, Long>> wordAndOneGroup = wordAndOne.groupBy(0);
30         //5 聚合
31         AggregateOperator<Tuple2<String, Long>> sum = wordAndOneGroup.sum(1);
32         //6 打印
33         try {
34             sum.print();
35         } catch (Exception e) {
36             e.printStackTrace();
37         }
38     }
39 }

 

posted @ 2022-04-08 17:24  CHANG_09  阅读(47)  评论(0)    收藏  举报