练习:flink table
data.txt
521588,167760,3,5999.00,1,2021-06-18 00:11:23
521590,167762,2,3999.00,1,2021-06-18 11:12:23
521591,167774,1,7888.00,2,2021-07-19 10:10:26
521592,167788,10,1278.00,3,2021-04-20 10:10:19
521593,167724,2,3888.00,2,2021-03-20 11:10:30
521576,167735,2,1888.00,1,2021-01-18 14:10:20
data2.txt
Jack,Ipad,1
Andy,Iphone,2
Lucy,Iphone,3
Andy,HuaWei,1
HanMeiMei,Oppop,2
DengChao,SAMSUNG,3
DengChao,Iphone,4
data3.txt
XiAn,1300.00,1590742505000
BeiJing,3281.12,1590742507000
ShangHai,3100.02,1590742518000
TianJing,1921.05,1590742523000
ShangHai,3400.02,1590742533000
BeiJing,2121.01,1590742537000
ShangHai,3671.78,1590742543000
XiAn,2677.95,1590742552000
XiAn,2271.95,1590742581000
bean
1 package bean; 2 3 import lombok.AllArgsConstructor; 4 import lombok.Data; 5 import lombok.NoArgsConstructor; 6 7 @Data 8 @NoArgsConstructor 9 @AllArgsConstructor 10 public class Goods { 11 private String uname; 12 private String gname; 13 private Integer nums; 14 }
1 package bean; 2 3 import lombok.AllArgsConstructor; 4 import lombok.Data; 5 import lombok.NoArgsConstructor; 6 7 @Data 8 @NoArgsConstructor 9 @AllArgsConstructor 10 public class Monthly { 11 private String month; 12 private Double price; 13 }
1 package bean; 2 3 import lombok.AllArgsConstructor; 4 import lombok.Data; 5 import lombok.NoArgsConstructor; 6 7 @Data 8 @NoArgsConstructor 9 @AllArgsConstructor 10 public class Order { 11 private String id; 12 private String order_id; 13 private Integer sku_id; 14 private Double order_price; 15 private Integer sku_num; 16 private String create_time; 17 }
1 package bean; 2 3 import lombok.AllArgsConstructor; 4 import lombok.Data; 5 import lombok.NoArgsConstructor; 6 7 @Data 8 @NoArgsConstructor 9 @AllArgsConstructor 10 public class Region { 11 private String address; 12 private Double money; 13 private Long timestemp; 14 }
utils
1 package utils; 2 3 import org.apache.hadoop.conf.Configuration; 4 import java.util.HashMap; 5 import java.util.Properties; 6 7 public class Propss { 8 public static Properties producer_Props = new Properties(); 9 public static Properties consumer_Props = new Properties(); 10 public static HashMap<String, Object> kafka_Producer = new HashMap<>(); 11 public static HashMap<String, Object> kafka_Consumer = new HashMap<>(); 12 public static Configuration setConf(Configuration conf){ 13 conf.set("hbase.zookeeper.quorum","hadoop106,hadoop107,hadoop108"); 14 conf.set("hbae.zookeeper.property.client","2181"); 15 return conf; 16 } 17 //january february march april may june july august september october november december 18 static{ 19 kafka_Consumer.put("bootstrap.servers","hadoop106:9092,hadoop107:9092,hadoop108:9092"); 20 kafka_Consumer.put("group.id", "com/test"); 21 //from beginning 22 kafka_Consumer.put("auto.offset.reset","earliest"); 23 kafka_Consumer.put("key.deserializer","org.apache.kafka.common.serialization.StringDeserializer"); 24 kafka_Consumer.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 25 26 kafka_Producer.put("bootstrap.servers","hadoop106:9092,hadoop107:9092,hadoop108:9092"); 27 kafka_Producer.put("ack","all"); 28 kafka_Producer.put("key.serializer","org.apache.kafka.common.serialization.StringSerializer"); 29 kafka_Producer.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer"); 30 kafka_Producer.put("auto.offset.reset","earliest"); 31 32 producer_Props.setProperty("bootstrap.servers","hadoop106:9092,hadoop107:9092,hadoop108:9092"); 33 producer_Props.setProperty("ack","all"); 34 producer_Props.setProperty("key.serializer","org.apache.kafka.common.serialization.StringSerializer"); 35 producer_Props.setProperty("value.serializer", "org.apache.kafka.common.serialization.StringSerializer"); 36 producer_Props.setProperty("auto.offset.reset","earliest"); 37 38 consumer_Props.setProperty("bootstrap.servers","hadoop106:9092,hadoop107:9092,hadoop108:9092"); 39 consumer_Props.setProperty("group.id", "com/test"); 40 consumer_Props.setProperty("key.deserializer","org.apache.kafka.common.serialization.StringDeserializer"); 41 consumer_Props.setProperty("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 42 consumer_Props.setProperty("auto.offset.reset","earliest"); 43 } 44 }
test
1 package test; 2 3 import bean.Order; 4 import org.apache.flink.api.common.functions.MapFunction; 5 import org.apache.flink.api.common.serialization.SimpleStringEncoder; 6 import org.apache.flink.api.common.serialization.SimpleStringSchema; 7 import org.apache.flink.core.fs.Path; 8 import org.apache.flink.streaming.api.datastream.DataStreamSource; 9 import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 10 import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 11 import org.apache.flink.streaming.api.functions.ProcessFunction; 12 import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink; 13 import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer; 14 import org.apache.flink.table.api.*; 15 import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 16 import org.apache.flink.table.descriptors.Csv; 17 import org.apache.flink.table.descriptors.FileSystem; 18 import org.apache.flink.table.descriptors.Schema; 19 import org.apache.flink.types.Row; 20 import org.apache.flink.util.CloseableIterator; 21 import org.apache.flink.util.Collector; 22 import org.apache.flink.util.OutputTag; 23 24 public class FlinkTest1 { 25 public static void main(String[] args) throws Exception { 26 27 //1)将以上数据复制到 data.txt 中,使用 flink 正确读取该文件信息并封装成订单详情对象。(flink Stream) 28 StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 29 env.setParallelism(1); 30 31 String path = FlinkTest1.class.getClassLoader().getResource("data.txt").getPath(); 32 33 DataStreamSource<String> source = env.readTextFile(path); 34 SingleOutputStreamOperator<Order> map = source.map(new MapFunction<String, Order>() { 35 @Override 36 public Order map(String s) throws Exception { 37 String[] split = s.split(","); 38 return new Order(split[0],split[1],Integer.valueOf(split[2]),Double.valueOf(split[3]),Integer.valueOf(split[4]),split[5]); 39 } 40 }); 41 map.print(); 42 43 //2)将订单金额按照价格从到到低降序排序,并将结果打印到控制台 。(flink table api in batch) 44 EnvironmentSettings build = EnvironmentSettings.newInstance().useBlinkPlanner().inBatchMode().build(); 45 TableEnvironment tableEnv = TableEnvironment.create(build); 46 47 48 tableEnv.connect(new FileSystem().path(path)) 49 .withFormat(new Csv()) 50 .withSchema(new Schema().field("id", DataTypes.STRING()) 51 .field("order_id", DataTypes.STRING()) 52 .field("sku_id", DataTypes.INT()) 53 .field("order_price", DataTypes.DOUBLE()) 54 .field("sku_num", DataTypes.INT()) 55 .field("create_time", DataTypes.STRING()) 56 ).createTemporaryTable("table1"); 57 58 Table table = tableEnv.sqlQuery("select * from table1 order by order_price desc"); 59 TableResult execute = table.execute(); 60 CloseableIterator<Row> collect = execute.collect(); 61 while (collect.hasNext()){ 62 System.out.println(collect.next()); 63 } 64 65 66 //3)求2021年6月订单金额总和,并将结果保存到HDFS中。() 67 //flink table api in batch mode 68 Table table3 = tableEnv.sqlQuery("select sum(order_price) from table1 where substring(create_time,0,7) = '2021-06'"); 69 TableResult execute3 = table3.execute(); 70 CloseableIterator<Row> collect3 = execute3.collect(); 71 while (collect3.hasNext()){ 72 System.out.println(collect3.next()); 73 } 74 //how to sink? 75 //tableEnv. (something to stream) 76 77 //flink table api in stream mode 78 EnvironmentSettings build_ = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build(); 79 StreamTableEnvironment tableEnv_ = StreamTableEnvironment.create(env,build_); 80 DataStreamSource<String> source_ = env.readTextFile(path); 81 SingleOutputStreamOperator<Order> map_ = source_.map(new MapFunction<String, Order>() { 82 @Override 83 public Order map(String s) throws Exception { 84 String[] split = s.split(","); 85 return new Order(split[0], split[1], Integer.valueOf(split[2]), Double.valueOf(split[3]),Integer.valueOf(split[4]), split[5]); 86 } 87 }); 88 89 tableEnv_.createTemporaryView("table1",map_); 90 //print 91 Table table3_ = tableEnv_.sqlQuery("select sum(order_price) as aa from table1 where substring(create_time,0,7) = '2021-06'"); 92 TableResult execute_ = table3_.execute(); 93 CloseableIterator<Row> collect_ = execute_.collect(); 94 while (collect_.hasNext()){ 95 System.out.println(collect_.next()); 96 } 97 //table to stream failed 98 // SingleOutputStreamOperator<String> map2 = tableEnv_.toAppendStream(table1, Row.class).map(f -> f.getField(1).toString()); 99 // SingleOutputStreamOperator<String> map2 = tableEnv_.toDataStream(table1, Row.class).map(f -> f.getField(1).toString()); 100 //table to stream succeed then sink 101 SingleOutputStreamOperator<Row> map3 = tableEnv_.toRetractStream(table3, Row.class).map(x -> x.f1); 102 SingleOutputStreamOperator<String> maped3 = map3.map(new MapFunction<Row, String>() { 103 @Override 104 public String map(Row row) throws Exception { 105 Object field = row.getField(0); 106 return field.toString(); 107 } 108 }); 109 maped3.addSink(StreamingFileSink.forRowFormat(new Path("hdfs://hadoop106:8020/week1-output"),new SimpleStringEncoder<String>("utf-8")).build()); 110 111 //4)求各sku_id订单金额的平均值,并将个各商品类别平均值最高的订单信息打印到控制台。(flink in batch mode) 112 Table table4 = tableEnv.sqlQuery("select sku_id,avg(order_price) aa from table1 group by sku_id order by aa desc limit 1"); 113 CloseableIterator<Row> collect4 = table4.execute().collect(); 114 while (collect4.hasNext()){ 115 System.out.println(collect4.next()); 116 } 117 //5)将2021年各月产生的总销售额,并将结果打印到控制台。(flink in batch mode) 118 Table table5 = tableEnv.sqlQuery("select substring(create_time,0,7),sum(order_price) aa from table1 group by substring(create_time,0,7)"); 119 CloseableIterator<Row> collect5 = table5.execute().collect(); 120 while (collect5.hasNext()){ 121 System.out.println(collect5.next()); 122 } 123 //6)将各个月2021年各月的销售额,销售总额>5000通过主流输出,销售总额<3000通过侧输出流输出。(flink in stream mode) 124 Table table6 = tableEnv_.sqlQuery("select substring(create_time,0,7),sum(order_price) aa from table1 group by substring(create_time,0,7)"); 125 126 SingleOutputStreamOperator<Row> map6 = tableEnv_.toRetractStream(table6, Row.class).map(x -> x.f1); 127 SingleOutputStreamOperator<Row> process6 = map6.process(new ProcessFunction<Row, Row>() { 128 @Override 129 public void processElement(Row row, Context context, Collector<Row> collector) throws Exception { 130 OutputTag<Row> rowOutputTag = new OutputTag<Row>("low") { 131 }; 132 Object field = row.getField(1); 133 Double sum = Double.valueOf(field.toString()); 134 if (sum > 5000) { 135 collector.collect(row); 136 } else if (sum < 3000) { 137 context.output(rowOutputTag, row); 138 } 139 } 140 }); 141 process6.print("high"); 142 OutputTag<Row> rowOutputTag = new OutputTag<Row>("low") {}; 143 process6.getSideOutput(rowOutputTag).print("low"); 144 145 146 147 //7)将2021年总销售额SINK 到Kafka。(flink in stream mode) 148 Table table7 = tableEnv_.sqlQuery("select substring(create_time,0,4),sum(order_price) aa from table1 group by substring(create_time,0,4)"); 149 SingleOutputStreamOperator<Row> map7 = tableEnv_.toRetractStream(table7, Row.class).map(x -> x.f1); 150 SingleOutputStreamOperator<String> maped7 = map7.map(new MapFunction<Row, String>() { 151 @Override 152 public String map(Row row) throws Exception { 153 return row.getField(0).toString() + "," + row.getField(1).toString(); 154 } 155 }); 156 157 maped7.addSink(new FlinkKafkaProducer<String>("hadoop106:9092","week_1", new SimpleStringSchema())); 158 159 //exe 160 env.execute(); 161 162 } 163 }
1 package test; 2 3 import bean.Goods; 4 import bean.Order; 5 import org.apache.flink.api.common.functions.MapFunction; 6 import org.apache.flink.api.common.serialization.SimpleStringEncoder; 7 import org.apache.flink.core.fs.Path; 8 import org.apache.flink.streaming.api.datastream.DataStreamSource; 9 import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 10 import org.apache.flink.streaming.api.environment.LocalStreamEnvironment; 11 import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 12 import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink; 13 import org.apache.flink.table.api.*; 14 import org.apache.flink.table.descriptors.Csv; 15 import org.apache.flink.table.descriptors.FileSystem; 16 import org.apache.flink.table.descriptors.Schema; 17 import org.apache.flink.types.Row; 18 import org.apache.flink.util.CloseableIterator; 19 20 public class FlinkTest2 { 21 public static void main(String[] args) throws Exception { 22 //题目二:使用flinkjava或scalaAPI完成下列需求 23 24 //1)将以上数据保存到HDFS目录下(目录自定义),使用flink正确读出数据(flink stream) 25 LocalStreamEnvironment env = StreamExecutionEnvironment.createLocalEnvironment(); 26 env.setParallelism(1); 27 String path = FlinkTest1.class.getClassLoader().getResource("data2.txt").getPath(); 28 DataStreamSource<String> source = env.readTextFile(path); 29 30 SingleOutputStreamOperator<String> map = source.map(new MapFunction<String, String>() { 31 @Override 32 public String map(String s) throws Exception { 33 return s; 34 } 35 }); 36 map.print(); 37 //sink 38 map.addSink(StreamingFileSink.forRowFormat(new Path("hdfs://hadoop106:8020/week1-output"),new SimpleStringEncoder<String>("utf-8")).build()); 39 40 //strings to Goods 41 // SingleOutputStreamOperator<Goods> gmap = source.map((MapFunction<String, Goods>) s -> { 42 // String[] split = s.split(","); 43 // return new Goods(split[0], split[1], Integer.valueOf(split[2])); 44 // }); 45 46 //flink in batch mode 47 EnvironmentSettings build = EnvironmentSettings.newInstance().useBlinkPlanner().inBatchMode().build(); 48 TableEnvironment tableEnv = TableEnvironment.create(build); 49 50 tableEnv.connect(new FileSystem().path(path)) 51 .withFormat(new Csv()) 52 .withSchema(new Schema() 53 .field("uname", DataTypes.STRING()) 54 .field("gname",DataTypes.STRING()) 55 .field("nums",DataTypes.INT())) 56 .createTemporaryTable("table1"); 57 //2)求购入商品为Iphone且购买数量>=2的用户信息,将结果打印到控制台。 58 Table table = tableEnv.sqlQuery("select * from table1 where gname = 'Iphone' and nums >= 2"); 59 TableResult execute = table.execute(); 60 CloseableIterator<Row> collect = execute.collect(); 61 while (collect.hasNext()){ 62 System.out.println(collect.next()); 63 } 64 //3)求各品牌商品总销售台数,并按照升序排并将结果打印到控制台。 65 Table table3 = tableEnv.sqlQuery("select gname,sum(nums) aa from table1 group by gname order by aa"); 66 TableResult execute3 = table3.execute(); 67 CloseableIterator<Row> collect3 = execute3.collect(); 68 while (collect3.hasNext()){ 69 System.out.println(collect3.next()); 70 } 71 //4)完整注释(5分) 72 env.execute(); 73 } 74 }
1 package test; 2 3 import akka.stream.impl.FailedSource; 4 import bean.Region; 5 import org.apache.commons.collections.IteratorUtils; 6 import org.apache.flink.api.common.functions.MapFunction; 7 import org.apache.flink.api.common.serialization.SimpleStringSchema; 8 import org.apache.flink.api.java.tuple.Tuple; 9 import org.apache.flink.api.java.tuple.Tuple2; 10 import org.apache.flink.runtime.state.filesystem.FsStateBackend; 11 import org.apache.flink.streaming.api.CheckpointingMode; 12 import org.apache.flink.streaming.api.datastream.DataStreamSource; 13 import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 14 import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 15 import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor; 16 import org.apache.flink.streaming.api.functions.windowing.WindowFunction; 17 import org.apache.flink.streaming.api.windowing.time.Time; 18 import org.apache.flink.streaming.api.windowing.windows.TimeWindow; 19 import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; 20 import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer; 21 import org.apache.flink.util.Collector; 22 import utils.Propss; 23 24 import java.util.Iterator; 25 26 public class FlinkTest3 { 27 public static void main(String[] args) throws Exception { 28 //1)将以下数据添加到kafka 主题名为 region_order 中显示结果准确。(flink) 29 StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 30 env.setParallelism(1); 31 String path = FlinkTest1.class.getClassLoader().getResource("data3.txt").getPath(); 32 DataStreamSource<String> source = env.readTextFile(path); 33 34 //封装对象 35 SingleOutputStreamOperator<Region> map = source.map((MapFunction<String, Region>) s -> { 36 String[] split = s.split(","); 37 return new Region(split[0], Double.valueOf(split[1]), Long.valueOf(split[2])); 38 }); 39 40 SingleOutputStreamOperator<String> maps = source.map(new MapFunction<String, String>() { 41 @Override 42 public String map(String s) throws Exception { 43 return s; 44 } 45 }); 46 //sink to topic region_order 47 maps.addSink(new FlinkKafkaProducer<String>("hadoop106:9092","region_order",new SimpleStringSchema())); 48 49 //2)使用flink流处理api正确实时消费kafka主题region_order中的信息,并输出到控制台。 50 FlinkKafkaConsumer<String> sss = new FlinkKafkaConsumer<>("region_order", new SimpleStringSchema(), Propss.consumer_Props); 51 sss.setStartFromEarliest(); 52 DataStreamSource<String> source1 = env.addSource(sss); 53 source1.print(); 54 55 //3)设置CheckPoint,检查点周期性生成时间为5秒。 56 env.enableCheckpointing(5000, CheckpointingMode.EXACTLY_ONCE); 57 58 //4) 设置状态后端,将CheckPoint的数据保存到HDFS(路径自定义) 59 env.getCheckpointConfig().setCheckpointTimeout(60000); 60 env.setStateBackend(new FsStateBackend("hdfs://hadoop106:8020/checkP")); 61 //5)使用watermark,设置延迟时间为10秒,生成的频率为3秒。 62 SingleOutputStreamOperator<Region> map5 = source1.map(new MapFunction<String, Region>() { 63 @Override 64 public Region map(String s) throws Exception { 65 String[] split = s.split(","); 66 return new Region(split[0], Double.valueOf(split[1]), Long.valueOf(split[2])); 67 } 68 }); 69 //频率为3秒 70 env.getConfig().setAutoWatermarkInterval(3000); 71 //延迟时间为10秒 72 SingleOutputStreamOperator<Region> ope = map5.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<Region>(Time.seconds(10)) { 73 @Override 74 public long extractTimestamp(Region region) { 75 return region.getTimestemp(); 76 } 77 }); 78 //6)使用窗口,计算各城市每分钟产生交易额的总和,并将结果保存到本地。(5分) 79 ope.keyBy("address").timeWindow(Time.seconds(60)).sum("money").print(); 80 //7)使用窗口,计算各城市每分钟产生交易额的平均值,并发到Kafka主题Minamount中。(5分) 81 82 SingleOutputStreamOperator<Tuple2<String, Double>> address = ope.keyBy("address").timeWindow(Time.seconds(60)).apply( 83 new WindowFunction<Region, Tuple2<String, Double>, Tuple, TimeWindow>() { 84 @Override 85 public void apply(Tuple tuple, TimeWindow timeWindow, Iterable<Region> iterable, Collector<Tuple2<String, Double>> collector) throws Exception { 86 Object field = tuple.getField(0); 87 String address = field.toString(); 88 Iterator<Region> iterator = iterable.iterator(); 89 Double sum = 0.0; 90 while (iterator.hasNext()) { 91 Region next = iterator.next(); 92 sum += next.getMoney(); 93 } 94 int size = IteratorUtils.toList(iterable.iterator()).size(); 95 collector.collect(new Tuple2<>(address, sum / size)); 96 } 97 }); 98 99 SingleOutputStreamOperator<String> mapstr = address.map(new MapFunction<Tuple2<String, Double>, String>() { 100 @Override 101 public String map(Tuple2<String, Double> stringDoubleTuple2) throws Exception { 102 return stringDoubleTuple2.f0 + "," + stringDoubleTuple2.f1; 103 } 104 }); 105 106 mapstr.addSink(new FlinkKafkaProducer<String>("hadoop106:9092","Minamount",new SimpleStringSchema())); 107 108 109 //8)使用Kafka Api读取Minamount主题中接收到的数据,并将结果打印到控制台。(2分) 110 FlinkKafkaConsumer<String> minamount = new FlinkKafkaConsumer<>("Minamount", new SimpleStringSchema(), Propss.consumer_Props); 111 minamount.setStartFromEarliest(); 112 DataStreamSource<String> stringDataStreamSource = env.addSource(minamount); 113 stringDataStreamSource.print(); 114 //9)完整注释(5分) 115 env.execute(); 116 117 } 118 }

浙公网安备 33010602011771号