百里登风

导航

3、Flink批处理案例实现-Java

在创建好的Flink项目的基础上,新建一个类

 

 

package com.gong.batch;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.util.Collector;

import java.lang.reflect.Parameter;

public class WordCount {
    public static void main(String[] args) throws Exception{
        //解析命令行传过来的参数args
      ParameterTool params=ParameterTool.fromArgs(args);

        //获取一个flink的执行环境
      final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

      //读取输入数据
      DataSet<String> dataSet =null;
      if(params.has("input")){//判断参数是否带有input
         dataSet =env.readTextFile(params.get("input"));
      }else {
        System.out.println("数据不存在");
      }
      //单词词频统计
      DataSet<Tuple2<String,Integer>> counts=dataSet.flatMap(new Tokenizer())
              .groupBy(0)
              .sum(1);

      if(params.has("output")){
          //数据输出为csv格式
          counts.writeAsCsv(params.get("output"),"\n"," ");
          //提交执行flink应用
          env.execute("wordcount exmple ");
      }else {
          counts.print();
      }
    }
    public static final class Tokenizer implements FlatMapFunction<String, Tuple2<String,Integer>>{
        @Override
        public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
            String[] tokens =value.toLowerCase().split("\\W+");
            for (String token:tokens){
                out.collect(new Tuple2<>(token,1));
            }
        }
    }
}

 

posted on 2020-09-07 18:31  百里登峰  阅读(390)  评论(0编辑  收藏  举报