博客园 首页 私信博主 显示目录 隐藏目录 管理 动画

练习:flink table

data.txt

521588,167760,3,5999.00,1,2021-06-18 00:11:23
521590,167762,2,3999.00,1,2021-06-18 11:12:23
521591,167774,1,7888.00,2,2021-07-19 10:10:26
521592,167788,10,1278.00,3,2021-04-20 10:10:19
521593,167724,2,3888.00,2,2021-03-20 11:10:30
521576,167735,2,1888.00,1,2021-01-18 14:10:20

data2.txt

Jack,Ipad,1
Andy,Iphone,2
Lucy,Iphone,3
Andy,HuaWei,1
HanMeiMei,Oppop,2
DengChao,SAMSUNG,3
DengChao,Iphone,4

data3.txt

XiAn,1300.00,1590742505000
BeiJing,3281.12,1590742507000
ShangHai,3100.02,1590742518000
TianJing,1921.05,1590742523000
ShangHai,3400.02,1590742533000
BeiJing,2121.01,1590742537000
ShangHai,3671.78,1590742543000
XiAn,2677.95,1590742552000
XiAn,2271.95,1590742581000

bean

 

 1 package bean;
 2 
 3 import lombok.AllArgsConstructor;
 4 import lombok.Data;
 5 import lombok.NoArgsConstructor;
 6 
 7 @Data
 8 @NoArgsConstructor
 9 @AllArgsConstructor
10 public class Goods {
11     private String uname;
12     private String gname;
13     private  Integer nums;
14 }
 1 package bean;
 2 
 3 import lombok.AllArgsConstructor;
 4 import lombok.Data;
 5 import lombok.NoArgsConstructor;
 6 
 7 @Data
 8 @NoArgsConstructor
 9 @AllArgsConstructor
10 public class Monthly {
11     private String month;
12     private Double price;
13 }
 1 package bean;
 2 
 3 import lombok.AllArgsConstructor;
 4 import lombok.Data;
 5 import lombok.NoArgsConstructor;
 6 
 7 @Data
 8 @NoArgsConstructor
 9 @AllArgsConstructor
10 public class Order {
11     private String id;
12     private String order_id;
13     private Integer sku_id;
14     private Double order_price;
15     private Integer sku_num;
16     private String create_time;
17 }
 1 package bean;
 2 
 3 import lombok.AllArgsConstructor;
 4 import lombok.Data;
 5 import lombok.NoArgsConstructor;
 6 
 7 @Data
 8 @NoArgsConstructor
 9 @AllArgsConstructor
10 public class Region {
11     private String address;
12     private Double money;
13     private Long timestemp;
14 }

utils

 1 package utils;
 2 
 3 import org.apache.hadoop.conf.Configuration;
 4 import java.util.HashMap;
 5 import java.util.Properties;
 6 
 7 public class Propss {
 8     public static Properties producer_Props = new Properties();
 9     public static Properties consumer_Props = new Properties();
10     public static HashMap<String, Object> kafka_Producer = new HashMap<>();
11     public static HashMap<String, Object> kafka_Consumer = new HashMap<>();
12     public static Configuration setConf(Configuration conf){
13          conf.set("hbase.zookeeper.quorum","hadoop106,hadoop107,hadoop108");
14         conf.set("hbae.zookeeper.property.client","2181");
15         return conf;
16     }
17     //january february march april may june july august september october november december
18     static{
19         kafka_Consumer.put("bootstrap.servers","hadoop106:9092,hadoop107:9092,hadoop108:9092");
20         kafka_Consumer.put("group.id", "com/test");
21         //from beginning
22         kafka_Consumer.put("auto.offset.reset","earliest");
23         kafka_Consumer.put("key.deserializer","org.apache.kafka.common.serialization.StringDeserializer");
24         kafka_Consumer.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
25 
26         kafka_Producer.put("bootstrap.servers","hadoop106:9092,hadoop107:9092,hadoop108:9092");
27         kafka_Producer.put("ack","all");
28         kafka_Producer.put("key.serializer","org.apache.kafka.common.serialization.StringSerializer");
29         kafka_Producer.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
30         kafka_Producer.put("auto.offset.reset","earliest");
31 
32         producer_Props.setProperty("bootstrap.servers","hadoop106:9092,hadoop107:9092,hadoop108:9092");
33         producer_Props.setProperty("ack","all");
34         producer_Props.setProperty("key.serializer","org.apache.kafka.common.serialization.StringSerializer");
35         producer_Props.setProperty("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
36         producer_Props.setProperty("auto.offset.reset","earliest");
37 
38         consumer_Props.setProperty("bootstrap.servers","hadoop106:9092,hadoop107:9092,hadoop108:9092");
39         consumer_Props.setProperty("group.id", "com/test");
40         consumer_Props.setProperty("key.deserializer","org.apache.kafka.common.serialization.StringDeserializer");
41         consumer_Props.setProperty("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
42         consumer_Props.setProperty("auto.offset.reset","earliest");
43     }
44 }

test

  1 package test;
  2 
  3 import bean.Order;
  4 import org.apache.flink.api.common.functions.MapFunction;
  5 import org.apache.flink.api.common.serialization.SimpleStringEncoder;
  6 import org.apache.flink.api.common.serialization.SimpleStringSchema;
  7 import org.apache.flink.core.fs.Path;
  8 import org.apache.flink.streaming.api.datastream.DataStreamSource;
  9 import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 10 import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 11 import org.apache.flink.streaming.api.functions.ProcessFunction;
 12 import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink;
 13 import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
 14 import org.apache.flink.table.api.*;
 15 import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
 16 import org.apache.flink.table.descriptors.Csv;
 17 import org.apache.flink.table.descriptors.FileSystem;
 18 import org.apache.flink.table.descriptors.Schema;
 19 import org.apache.flink.types.Row;
 20 import org.apache.flink.util.CloseableIterator;
 21 import org.apache.flink.util.Collector;
 22 import org.apache.flink.util.OutputTag;
 23 
 24 public class FlinkTest1 {
 25     public static void main(String[] args) throws Exception {
 26 
 27         //1)将以上数据复制到 data.txt 中,使用 flink 正确读取该文件信息并封装成订单详情对象。(flink Stream)
 28         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
 29         env.setParallelism(1);
 30 
 31         String path = FlinkTest1.class.getClassLoader().getResource("data.txt").getPath();
 32 
 33         DataStreamSource<String> source = env.readTextFile(path);
 34         SingleOutputStreamOperator<Order> map = source.map(new MapFunction<String, Order>() {
 35             @Override
 36             public Order map(String s) throws Exception {
 37                 String[] split = s.split(",");
 38                 return new Order(split[0],split[1],Integer.valueOf(split[2]),Double.valueOf(split[3]),Integer.valueOf(split[4]),split[5]);
 39             }
 40         });
 41         map.print();
 42 
 43         //2)将订单金额按照价格从到到低降序排序,并将结果打印到控制台 。(flink table api in batch)
 44         EnvironmentSettings build = EnvironmentSettings.newInstance().useBlinkPlanner().inBatchMode().build();
 45         TableEnvironment tableEnv = TableEnvironment.create(build);
 46 
 47 
 48         tableEnv.connect(new FileSystem().path(path))
 49                 .withFormat(new Csv())
 50                 .withSchema(new Schema().field("id", DataTypes.STRING())
 51                         .field("order_id", DataTypes.STRING())
 52                         .field("sku_id", DataTypes.INT())
 53                         .field("order_price", DataTypes.DOUBLE())
 54                         .field("sku_num", DataTypes.INT())
 55                         .field("create_time", DataTypes.STRING())
 56                 ).createTemporaryTable("table1");
 57 
 58         Table table = tableEnv.sqlQuery("select * from table1 order by order_price desc");
 59         TableResult execute = table.execute();
 60         CloseableIterator<Row> collect = execute.collect();
 61         while (collect.hasNext()){
 62             System.out.println(collect.next());
 63         }
 64 
 65 
 66         //3)求2021年6月订单金额总和,并将结果保存到HDFS中。()
 67         //flink table api in batch mode
 68         Table table3 = tableEnv.sqlQuery("select sum(order_price) from table1 where substring(create_time,0,7) = '2021-06'");
 69         TableResult execute3 = table3.execute();
 70         CloseableIterator<Row> collect3 = execute3.collect();
 71         while (collect3.hasNext()){
 72             System.out.println(collect3.next());
 73         }
 74         //how to sink?
 75         //tableEnv.  (something to stream)
 76         
 77         //flink table api in stream mode
 78         EnvironmentSettings build_ = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build();
 79         StreamTableEnvironment tableEnv_ = StreamTableEnvironment.create(env,build_);
 80         DataStreamSource<String> source_ = env.readTextFile(path);
 81         SingleOutputStreamOperator<Order> map_ = source_.map(new MapFunction<String, Order>() {
 82             @Override
 83             public Order map(String s) throws Exception {
 84                 String[] split = s.split(",");
 85                 return new Order(split[0], split[1], Integer.valueOf(split[2]), Double.valueOf(split[3]),Integer.valueOf(split[4]),  split[5]);
 86             }
 87         });
 88 
 89         tableEnv_.createTemporaryView("table1",map_);
 90         //print
 91         Table table3_ = tableEnv_.sqlQuery("select sum(order_price) as aa from table1 where substring(create_time,0,7) = '2021-06'");
 92         TableResult execute_ = table3_.execute();
 93         CloseableIterator<Row> collect_ = execute_.collect();
 94         while (collect_.hasNext()){
 95             System.out.println(collect_.next());
 96         }
 97         //table to stream failed
 98 //        SingleOutputStreamOperator<String> map2 = tableEnv_.toAppendStream(table1, Row.class).map(f -> f.getField(1).toString());
 99 //        SingleOutputStreamOperator<String> map2 = tableEnv_.toDataStream(table1, Row.class).map(f -> f.getField(1).toString());
100         //table to stream succeed  then sink
101         SingleOutputStreamOperator<Row> map3 = tableEnv_.toRetractStream(table3, Row.class).map(x -> x.f1);
102         SingleOutputStreamOperator<String> maped3 = map3.map(new MapFunction<Row, String>() {
103             @Override
104             public String map(Row row) throws Exception {
105                 Object field = row.getField(0);
106                 return field.toString();
107             }
108         });
109         maped3.addSink(StreamingFileSink.forRowFormat(new Path("hdfs://hadoop106:8020/week1-output"),new SimpleStringEncoder<String>("utf-8")).build());
110 
111         //4)求各sku_id订单金额的平均值,并将个各商品类别平均值最高的订单信息打印到控制台。(flink in batch mode)
112         Table table4 = tableEnv.sqlQuery("select sku_id,avg(order_price) aa from table1 group by sku_id order by aa desc limit 1");
113         CloseableIterator<Row> collect4 = table4.execute().collect();
114         while (collect4.hasNext()){
115             System.out.println(collect4.next());
116         }
117         //5)将2021年各月产生的总销售额,并将结果打印到控制台。(flink in batch mode)
118         Table table5 = tableEnv.sqlQuery("select substring(create_time,0,7),sum(order_price) aa from table1 group by substring(create_time,0,7)");
119         CloseableIterator<Row> collect5 = table5.execute().collect();
120         while (collect5.hasNext()){
121             System.out.println(collect5.next());
122         }
123         //6)将各个月2021年各月的销售额,销售总额>5000通过主流输出,销售总额<3000通过侧输出流输出。(flink in stream mode)
124         Table table6 = tableEnv_.sqlQuery("select substring(create_time,0,7),sum(order_price) aa from table1 group by substring(create_time,0,7)");
125 
126         SingleOutputStreamOperator<Row> map6 = tableEnv_.toRetractStream(table6, Row.class).map(x -> x.f1);
127         SingleOutputStreamOperator<Row> process6 = map6.process(new ProcessFunction<Row, Row>() {
128             @Override
129             public void processElement(Row row, Context context, Collector<Row> collector) throws Exception {
130                 OutputTag<Row> rowOutputTag = new OutputTag<Row>("low") {
131                 };
132                 Object field = row.getField(1);
133                 Double sum = Double.valueOf(field.toString());
134                 if (sum > 5000) {
135                     collector.collect(row);
136                 } else if (sum < 3000) {
137                     context.output(rowOutputTag, row);
138                 }
139             }
140         });
141         process6.print("high");
142         OutputTag<Row> rowOutputTag = new OutputTag<Row>("low") {};
143         process6.getSideOutput(rowOutputTag).print("low");
144 
145 
146         
147         //7)将2021年总销售额SINK 到Kafka。(flink in stream mode)
148         Table table7 = tableEnv_.sqlQuery("select substring(create_time,0,4),sum(order_price) aa from table1 group by substring(create_time,0,4)");
149         SingleOutputStreamOperator<Row> map7 = tableEnv_.toRetractStream(table7, Row.class).map(x -> x.f1);
150         SingleOutputStreamOperator<String> maped7 = map7.map(new MapFunction<Row, String>() {
151             @Override
152             public String map(Row row) throws Exception {
153                 return row.getField(0).toString() + "," + row.getField(1).toString();
154             }
155         });
156 
157         maped7.addSink(new FlinkKafkaProducer<String>("hadoop106:9092","week_1", new SimpleStringSchema()));
158         
159         //exe
160         env.execute();
161 
162     }
163 }
 1 package test;
 2 
 3 import bean.Goods;
 4 import bean.Order;
 5 import org.apache.flink.api.common.functions.MapFunction;
 6 import org.apache.flink.api.common.serialization.SimpleStringEncoder;
 7 import org.apache.flink.core.fs.Path;
 8 import org.apache.flink.streaming.api.datastream.DataStreamSource;
 9 import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
10 import org.apache.flink.streaming.api.environment.LocalStreamEnvironment;
11 import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
12 import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink;
13 import org.apache.flink.table.api.*;
14 import org.apache.flink.table.descriptors.Csv;
15 import org.apache.flink.table.descriptors.FileSystem;
16 import org.apache.flink.table.descriptors.Schema;
17 import org.apache.flink.types.Row;
18 import org.apache.flink.util.CloseableIterator;
19 
20 public class FlinkTest2 {
21     public static void main(String[] args) throws Exception {
22         //题目二:使用flinkjava或scalaAPI完成下列需求
23 
24         //1)将以上数据保存到HDFS目录下(目录自定义),使用flink正确读出数据(flink stream)
25         LocalStreamEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
26         env.setParallelism(1);
27         String path = FlinkTest1.class.getClassLoader().getResource("data2.txt").getPath();
28         DataStreamSource<String> source = env.readTextFile(path);
29         
30         SingleOutputStreamOperator<String> map = source.map(new MapFunction<String, String>() {
31             @Override
32             public String map(String s) throws Exception {
33                 return s;
34             }
35         });
36         map.print();
37         //sink
38         map.addSink(StreamingFileSink.forRowFormat(new Path("hdfs://hadoop106:8020/week1-output"),new SimpleStringEncoder<String>("utf-8")).build());
39         
40         //strings to Goods
41 //        SingleOutputStreamOperator<Goods> gmap = source.map((MapFunction<String, Goods>) s -> {
42 //            String[] split = s.split(",");
43 //            return new Goods(split[0], split[1], Integer.valueOf(split[2]));
44 //        });
45 
46         //flink in batch mode
47         EnvironmentSettings build = EnvironmentSettings.newInstance().useBlinkPlanner().inBatchMode().build();
48         TableEnvironment tableEnv = TableEnvironment.create(build);
49 
50         tableEnv.connect(new FileSystem().path(path))
51                 .withFormat(new Csv())
52                 .withSchema(new Schema()
53                 .field("uname", DataTypes.STRING())
54                 .field("gname",DataTypes.STRING())
55                 .field("nums",DataTypes.INT()))
56                 .createTemporaryTable("table1");
57         //2)求购入商品为Iphone且购买数量>=2的用户信息,将结果打印到控制台。
58         Table table = tableEnv.sqlQuery("select * from table1 where gname = 'Iphone' and nums >= 2");
59         TableResult execute = table.execute();
60         CloseableIterator<Row> collect = execute.collect();
61         while (collect.hasNext()){
62             System.out.println(collect.next());
63         }
64         //3)求各品牌商品总销售台数,并按照升序排并将结果打印到控制台。
65         Table table3 = tableEnv.sqlQuery("select gname,sum(nums) aa from table1  group by gname order by aa");
66         TableResult execute3 = table3.execute();
67         CloseableIterator<Row> collect3 = execute3.collect();
68         while (collect3.hasNext()){
69             System.out.println(collect3.next());
70         }
71         //4)完整注释(5分)
72         env.execute();
73     }
74 }
  1 package test;
  2 
  3 import akka.stream.impl.FailedSource;
  4 import bean.Region;
  5 import org.apache.commons.collections.IteratorUtils;
  6 import org.apache.flink.api.common.functions.MapFunction;
  7 import org.apache.flink.api.common.serialization.SimpleStringSchema;
  8 import org.apache.flink.api.java.tuple.Tuple;
  9 import org.apache.flink.api.java.tuple.Tuple2;
 10 import org.apache.flink.runtime.state.filesystem.FsStateBackend;
 11 import org.apache.flink.streaming.api.CheckpointingMode;
 12 import org.apache.flink.streaming.api.datastream.DataStreamSource;
 13 import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 14 import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 15 import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
 16 import org.apache.flink.streaming.api.functions.windowing.WindowFunction;
 17 import org.apache.flink.streaming.api.windowing.time.Time;
 18 import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
 19 import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
 20 import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
 21 import org.apache.flink.util.Collector;
 22 import utils.Propss;
 23 
 24 import java.util.Iterator;
 25 
 26 public class FlinkTest3 {
 27     public static void main(String[] args) throws Exception {
 28         //1)将以下数据添加到kafka  主题名为 region_order 中显示结果准确。(flink)
 29         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
 30         env.setParallelism(1);
 31         String path = FlinkTest1.class.getClassLoader().getResource("data3.txt").getPath();
 32         DataStreamSource<String> source = env.readTextFile(path);
 33 
 34         //封装对象
 35         SingleOutputStreamOperator<Region> map = source.map((MapFunction<String, Region>) s -> {
 36             String[] split = s.split(",");
 37             return new Region(split[0], Double.valueOf(split[1]), Long.valueOf(split[2]));
 38         });
 39 
 40         SingleOutputStreamOperator<String> maps = source.map(new MapFunction<String, String>() {
 41             @Override
 42             public String map(String s) throws Exception {
 43                 return s;
 44             }
 45         });
 46         //sink to topic region_order
 47         maps.addSink(new FlinkKafkaProducer<String>("hadoop106:9092","region_order",new SimpleStringSchema()));
 48 
 49         //2)使用flink流处理api正确实时消费kafka主题region_order中的信息,并输出到控制台。
 50         FlinkKafkaConsumer<String> sss = new FlinkKafkaConsumer<>("region_order", new SimpleStringSchema(), Propss.consumer_Props);
 51         sss.setStartFromEarliest();
 52         DataStreamSource<String> source1 = env.addSource(sss);
 53         source1.print();
 54 
 55         //3)设置CheckPoint,检查点周期性生成时间为5秒。
 56         env.enableCheckpointing(5000, CheckpointingMode.EXACTLY_ONCE);
 57 
 58         //4) 设置状态后端,将CheckPoint的数据保存到HDFS(路径自定义)
 59         env.getCheckpointConfig().setCheckpointTimeout(60000);
 60         env.setStateBackend(new FsStateBackend("hdfs://hadoop106:8020/checkP"));
 61         //5)使用watermark,设置延迟时间为10秒,生成的频率为3秒。
 62         SingleOutputStreamOperator<Region> map5 = source1.map(new MapFunction<String, Region>() {
 63             @Override
 64             public Region map(String s) throws Exception {
 65                 String[] split = s.split(",");
 66                 return new Region(split[0], Double.valueOf(split[1]), Long.valueOf(split[2]));
 67             }
 68         });
 69         //频率为3秒
 70         env.getConfig().setAutoWatermarkInterval(3000);
 71         //延迟时间为10秒
 72         SingleOutputStreamOperator<Region> ope = map5.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<Region>(Time.seconds(10)) {
 73             @Override
 74             public long extractTimestamp(Region region) {
 75                 return region.getTimestemp();
 76             }
 77         });
 78         //6)使用窗口,计算各城市每分钟产生交易额的总和,并将结果保存到本地。(5分)
 79         ope.keyBy("address").timeWindow(Time.seconds(60)).sum("money").print();
 80         //7)使用窗口,计算各城市每分钟产生交易额的平均值,并发到Kafka主题Minamount中。(5分)
 81 
 82         SingleOutputStreamOperator<Tuple2<String, Double>> address = ope.keyBy("address").timeWindow(Time.seconds(60)).apply(
 83                 new WindowFunction<Region, Tuple2<String, Double>, Tuple, TimeWindow>() {
 84                     @Override
 85                     public void apply(Tuple tuple, TimeWindow timeWindow, Iterable<Region> iterable, Collector<Tuple2<String, Double>> collector) throws Exception {
 86                         Object field = tuple.getField(0);
 87                         String address = field.toString();
 88                         Iterator<Region> iterator = iterable.iterator();
 89                         Double sum = 0.0;
 90                         while (iterator.hasNext()) {
 91                             Region next = iterator.next();
 92                             sum += next.getMoney();
 93                         }
 94                         int size = IteratorUtils.toList(iterable.iterator()).size();
 95                         collector.collect(new Tuple2<>(address, sum / size));
 96                     }
 97                 });
 98         
 99         SingleOutputStreamOperator<String> mapstr = address.map(new MapFunction<Tuple2<String, Double>, String>() {
100             @Override
101             public String map(Tuple2<String, Double> stringDoubleTuple2) throws Exception {
102                 return stringDoubleTuple2.f0 + "," + stringDoubleTuple2.f1;
103             }
104         });
105         
106         mapstr.addSink(new FlinkKafkaProducer<String>("hadoop106:9092","Minamount",new SimpleStringSchema()));
107 
108 
109         //8)使用Kafka Api读取Minamount主题中接收到的数据,并将结果打印到控制台。(2分)
110         FlinkKafkaConsumer<String> minamount = new FlinkKafkaConsumer<>("Minamount", new SimpleStringSchema(), Propss.consumer_Props);
111         minamount.setStartFromEarliest();
112         DataStreamSource<String> stringDataStreamSource = env.addSource(minamount);
113         stringDataStreamSource.print();
114         //9)完整注释(5分)
115         env.execute();
116         
117     }
118 }

 



posted @ 2022-03-28 16:19  CHANG_09  阅读(114)  评论(0)    收藏  举报