《初级》Flink API练习

来源于:https://blog.csdn.net/huzechen/article/details/100140768

 

Fliter

import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;


public class TestFilter {
public static void main(String[] args) throws Exception {
final StreamExecutionEnvironment env=StreamExecutionEnvironment.getExecutionEnvironment();

DataStream<Long> input=env.generateSequence(-5,5);

input.filter(new FilterFunction<Long>() {
@Override
public boolean filter(Long value) throws Exception {
return value>0;
}
}).print();

env.execute();
}
}
IntervalJoin

import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple5;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.co.ProcessJoinFunction;
import org.apache.flink.streaming.api.functions.timestamps.AscendingTimestampExtractor;
import org.apache.flink.streaming.api.windowing.assigners.EventTimeSessionWindows;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.util.Collector;


public class TestIntervalJoin {
public static void main(String[] args) throws Exception {
final StreamExecutionEnvironment env=StreamExecutionEnvironment.getExecutionEnvironment();

env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

DataStream<Transcript> input1=env.fromElements(TRANSCRIPTS).assignTimestampsAndWatermarks(new AscendingTimestampExtractor<Transcript>() {
@Override
public long extractAscendingTimestamp(Transcript element) {
return element.time;
}
});


DataStream<Student> input2=env.fromElements(STUDENTS).assignTimestampsAndWatermarks(new AscendingTimestampExtractor<Student>() {
@Override
public long extractAscendingTimestamp(Student element) {
return element.time;
}
});

KeyedStream<Transcript,String> keyedStream=input1.keyBy(new KeySelector<Transcript, String>() {
@Override
public String getKey(Transcript value) throws Exception {
return value.id;
}
});

KeyedStream<Student,String> otherKeyedStream=input2.keyBy(new KeySelector<Student, String>() {
@Override
public String getKey(Student value) throws Exception {
return value.id;
}
});

//e1.timestamp + lowerBound <= e2.timestamp <= e1.timestamp + upperBound

// key1 == key2 && leftTs - 2 < rightTs < leftTs + 2

keyedStream.intervalJoin(otherKeyedStream)
.between(Time.milliseconds(-2), Time.milliseconds(2))
.upperBoundExclusive()
.lowerBoundExclusive()
.process(new ProcessJoinFunction<Transcript, Student, Tuple5<String,String,String,String,Integer>>() {

@Override
public void processElement(Transcript transcript, Student student, Context ctx, Collector<Tuple5<String, String, String, String, Integer>> out) throws Exception {
out.collect(Tuple5.of(transcript.id,transcript.name,student.class_,transcript.subject,transcript.score));
}

}).print();

env.execute();

}

public static final Transcript[] TRANSCRIPTS = new Transcript[] {
new Transcript("1","张三","语文",100,System.currentTimeMillis()),
new Transcript("2","李四","语文",78,System.currentTimeMillis()),
new Transcript("3","王五","语文",99,System.currentTimeMillis()),
new Transcript("4","赵六","语文",81,System.currentTimeMillis()),
new Transcript("5","钱七","语文",59,System.currentTimeMillis()),
new Transcript("6","马二","语文",97,System.currentTimeMillis())
};

public static final Student[] STUDENTS = new Student[] {
new Student("1","张三","class1",System.currentTimeMillis()),
new Student("2","李四","class1",System.currentTimeMillis()),
new Student("3","王五","class1",System.currentTimeMillis()),
new Student("4","赵六","class2",System.currentTimeMillis()),
new Student("5","钱七","class2",System.currentTimeMillis()),
new Student("6","马二","class2",System.currentTimeMillis())
};

private static class Transcript{
private String id;
private String name;
private String subject;
private int score;
private long time;

public Transcript(String id, String name, String subject, int score, long time) {
this.id = id;
this.name = name;
this.subject = subject;
this.score = score;
this.time = time;
}

public String getId() {
return id;
}

public void setId(String id) {
this.id = id;
}

public String getName() {
return name;
}

public void setName(String name) {
this.name = name;
}

public String getSubject() {
return subject;
}

public void setSubject(String subject) {
this.subject = subject;
}

public int getScore() {
return score;
}

public void setScore(int score) {
this.score = score;
}

public long getTime() {
return time;
}

public void setTime(long time) {
this.time = time;
}
}

private static class Student{
private String id;
private String name;
private String class_;
private long time;

public Student(String id, String name, String class_, long time) {
this.id = id;
this.name = name;
this.class_ = class_;
this.time = time;
}

public String getId() {
return id;
}

public void setId(String id) {
this.id = id;
}

public String getName() {
return name;
}

public void setName(String name) {
this.name = name;
}

public String getClass_() {
return class_;
}

public void setClass_(String class_) {
this.class_ = class_;
}

public long getTime() {
return time;
}

public void setTime(long time) {
this.time = time;
}
}
}
Connect

import org.apache.flink.streaming.api.datastream.ConnectedStreams;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.co.CoFlatMapFunction;
import org.apache.flink.util.Collector;


public class TestConnect {
public static void main(String[] args) throws Exception {
final StreamExecutionEnvironment env=StreamExecutionEnvironment.getExecutionEnvironment();

DataStream<Long> someStream = env.generateSequence(0,10);
DataStream<String> otherStream = env.fromElements(WORDS);

ConnectedStreams<Long, String> connectedStreams = someStream.connect(otherStream);

DataStream<String> result=connectedStreams.flatMap(new CoFlatMapFunction<Long, String, String>() {

@Override
public void flatMap1(Long value, Collector<String> out) throws Exception {
out.collect(value.toString());
}

@Override
public void flatMap2(String value, Collector<String> out) {
for (String word: value.split("\\W+")) {
out.collect(word);
}
}
});

result.print();

env.execute();
}

public static final String[] WORDS = new String[] {
"And thus the native hue of resolution",
"Is sicklied o'er with the pale cast of thought;",
"And enterprises of great pith and moment,",
"With this regard, their currents turn awry,",
"And lose the name of action.--Soft you now!",
"The fair Ophelia!--Nymph, in thy orisons",
"Be all my sins remember'd."
};
}
 

KeyBy

import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple4;
import org.apache.flink.streaming.api.datastream.*;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.KeyedProcessFunction;


public class TestKeyBy {
public static void main(String[] args) throws Exception {
//统计各班语文成绩最高分是谁
final StreamExecutionEnvironment env=StreamExecutionEnvironment.getExecutionEnvironment();

DataStream<Tuple4<String,String,String,Integer>> input=env.fromElements(TRANSCRIPT);
// System.out.println("-----------"+input.getParallelism());
//input.print();

KeyedStream<Tuple4<String,String,String,Integer>,Tuple> keyedStream = input.keyBy("f0");


// KeyedStream<Tuple4<String,String,String,Integer>,String> keyedStream = input.keyBy(new KeySelector<Tuple4<String, String, String, Integer>, String>() {
//
// @Override
// public String getKey(Tuple4<String, String, String, Integer> value) throws Exception {
// return value.f0;
// }
// });


// keyedStream.process(new KeyedProcessFunction<Tuple, Tuple4<String,String,String,Integer>, Object>() {
//
// @Override
// public void processElement(Tuple4<String, String, String, Integer> value, Context ctx, Collector<Object> out) throws Exception {
// System.out.println(ctx.getCurrentKey());
// }
// });
//System.out.println("***********"+keyedStream.getParallelism());

// System.out.println("---------444444---"+keyedStream.max(3).getParallelism());
keyedStream.maxBy("f3").print();

env.execute();

// SingleOutputStreamOperator<Tuple4<String,String,String,Integer>> sumed=keyed.min(3);
//
// //使用了DataStreamUtils就不需要env.execute()
// Iterator<Tuple4<String,String,String,Integer>> it=DataStreamUtils.collect(sumed);
//
// while (it.hasNext()){
// System.out.println(it.next());
// }

}

public static final Tuple4[] TRANSCRIPT = new Tuple4[] {
Tuple4.of("class1","张三","语文",100),
Tuple4.of("class1","李四","语文",78),
Tuple4.of("class1","王五","语文",99),
Tuple4.of("class2","赵六","语文",81),
Tuple4.of("class2","钱七","语文",59),
Tuple4.of("class2","马二","语文",97)
};
}
Map

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;


public class TestMap {
public static void main(String[] args) throws Exception {
final StreamExecutionEnvironment env=StreamExecutionEnvironment.getExecutionEnvironment();

DataStream<Long> input=env.generateSequence(0,10);

DataStream plusOne=input.map(new MapFunction<Long, Long>() {

@Override
public Long map(Long value) throws Exception {
System.out.println("--------------------"+value);
return value+1;
}
});

plusOne.print();

env.execute();
}
}
 

Fold

import org.apache.flink.api.common.functions.FoldFunction;
import org.apache.flink.api.java.tuple.Tuple4;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;


public class TestFold {
public static void main(String[] args) throws Exception {
final StreamExecutionEnvironment env=StreamExecutionEnvironment.getExecutionEnvironment();

DataStream<Tuple4<String,String,String,Integer>> input=env.fromElements(TRANSCRIPT);

DataStream<String> result =input.keyBy(0).fold("Start", new FoldFunction<Tuple4<String,String,String,Integer>,String>() {

@Override
public String fold(String accumulator, Tuple4<String, String, String, Integer> value) throws Exception {
return accumulator + "=" + value.f1;
}
});

result.print();

env.execute();
}

public static final Tuple4[] TRANSCRIPT = new Tuple4[] {
Tuple4.of("class1","张三","语文",100),
Tuple4.of("class1","李四","语文",78),
Tuple4.of("class1","王五","语文",99),
Tuple4.of("class2","赵六","语文",81),
Tuple4.of("class2","钱七","语文",59),
Tuple4.of("class2","马二","语文",97)
};
}
 

Reduce

import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple4;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

public class TestReduce {
public static void main(String[] args) throws Exception {
final StreamExecutionEnvironment env=StreamExecutionEnvironment.getExecutionEnvironment();

DataStream<Tuple4<String,String,String,Integer>> input=env.fromElements(TRANSCRIPT);

KeyedStream<Tuple4<String,String,String,Integer>,Tuple> keyedStream = input.keyBy(0);

keyedStream.reduce(new ReduceFunction<Tuple4<String, String, String, Integer>>() {
@Override
public Tuple4<String, String, String, Integer> reduce(Tuple4<String, String, String, Integer> value1, Tuple4<String, String, String, Integer> value2) throws Exception {
value1.f3+=value2.f3;
return value1;
}
}).print();

env.execute();
}


public static final Tuple4[] TRANSCRIPT = new Tuple4[] {
Tuple4.of("class1","张三","语文",100),
Tuple4.of("class1","李四","语文",78),
Tuple4.of("class1","王五","语文",99),
Tuple4.of("class2","赵六","语文",81),
Tuple4.of("class2","钱七","语文",59),
Tuple4.of("class2","马二","语文",97)
};
}
 

Project

import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple4;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;


public class TestProject {
public static void main(String[] args) throws Exception {
final StreamExecutionEnvironment env=StreamExecutionEnvironment.getExecutionEnvironment();

DataStreamSource<Tuple4<String,String,String,Integer>> input=env.fromElements(TRANSCRIPT);

DataStream<Tuple2<String, Integer>> out = input.project(1,3);

out.print();

env.execute();

}

public static final Tuple4[] TRANSCRIPT = new Tuple4[] {
Tuple4.of("class1","张三","语文",100),
Tuple4.of("class1","李四","语文",78),
Tuple4.of("class1","王五","语文",99),
Tuple4.of("class2","赵六","语文",81),
Tuple4.of("class2","钱七","语文",59),
Tuple4.of("class2","马二","语文",97)
};
}
 

SplitAndSelect

import org.apache.flink.streaming.api.collector.selector.OutputSelector;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SplitStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

import java.util.ArrayList;
import java.util.List;


public class TestSplitAndSelect {
public static void main(String[] args) throws Exception {
final StreamExecutionEnvironment env=StreamExecutionEnvironment.getExecutionEnvironment();

DataStream<Long> input=env.generateSequence(0,10);

SplitStream<Long> splitStream = input.split(new OutputSelector<Long>() {

@Override
public Iterable<String> select(Long value) {
List<String> output = new ArrayList<String>();
if (value % 2 == 0) {
output.add("even");
}
else {
output.add("odd");
}
return output;
}

});

//splitStream.print();

DataStream<Long> even = splitStream.select("even");
DataStream<Long> odd = splitStream.select("odd");
DataStream<Long> all = splitStream.select("even","odd");

//even.print();

odd.print();

//all.print();

env.execute();
}
}
 

Flatmap

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;


public class TestFlatmap {
public static void main(String[] args) throws Exception {
final StreamExecutionEnvironment env=StreamExecutionEnvironment.getExecutionEnvironment();

DataStream<String> input=env.fromElements(WORDS);

DataStream<String> wordStream=input.flatMap(new FlatMapFunction<String, String>() {
@Override
public void flatMap(String value, Collector<String> out) throws Exception {

String[] tokens = value.toLowerCase().split("\\W+");

for (String token : tokens) {
if (token.length() > 0) {
out.collect(token);
}
}
}
});

wordStream.print();

env.execute();
}

public static final String[] WORDS = new String[] {
"To be, or not to be,--that is the question:--",
"Whether 'tis nobler in the mind to suffer",
"The slings and arrows of outrageous fortune",
"And by opposing end them?--To die,--to sleep,--",
"Be all my sins remember'd."
};
}
 
————————————————
版权声明:本文为CSDN博主「小晨说数据」的原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接及本声明。
原文链接:https://blog.csdn.net/huzechen/article/details/100140768

posted @ 2020-02-27 02:58  苦行者的刀  阅读(165)  评论(0)    收藏  举报