Flink1.18传统批处理代码编写

package com.xiaohu.wc;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.operators.AggregateOperator;
import org.apache.flink.api.java.operators.DataSource;
import org.apache.flink.api.java.operators.FlatMapOperator;
import org.apache.flink.api.java.operators.UnsortedGrouping;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.util.Collector;


/*
    从flink1.12开始推荐使用DataStreamAPI
 */
public class WordCountBatchDemo {
    public static void main(String[] args) throws Exception {
        //创建执行环境
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

        //从文件中读取数据
        DataSource<String> lineDS = env.readTextFile("input/word.txt");

        //切分转换
        FlatMapOperator<String, Tuple2<String, Long>> wordAndOne = lineDS.flatMap(new FlatMapFunction<String, Tuple2<String, Long>>() {
            @Override
            public void flatMap(String s, Collector<Tuple2<String, Long>> collector) throws Exception {
                String[] s1 = s.split(" ");
                for (String word : s1) {
                    Tuple2<String, Long> tuple2 = Tuple2.of(word, 1L);
                    collector.collect(tuple2);
                }
            }
        });

        //分组
        UnsortedGrouping<Tuple2<String, Long>> wordGroup = wordAndOne.groupBy(0);

        //聚合
        AggregateOperator<Tuple2<String, Long>> wordCount = wordGroup.sum(1);

        //打印
        wordCount.print();
    }
}
posted @ 2025-02-26 20:18  Xiaohu_BigData  阅读(27)  评论(0)    收藏  举报