import java.util.Arrays;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.api.java.function.VoidFunction;
import scala.Tuple2;
/**
* @author 作者 E-mail:
* @version 创建时间:2017年8月30日 上午10:02:51
* 类说明
*/
public class WordCountSotr {
public static void main(String[] args) {
SparkConf conf = new SparkConf().setAppName("sortAction").setMaster("local");
JavaSparkContext sc = new JavaSparkContext(conf);
JavaRDD<String> tf = sc.textFile("G://121.txt");
//去掉空格
JavaRDD<String> flatMap = tf.flatMap(new FlatMapFunction<String, String>() {
private static final long serialVersionUID = 1L;
@Override
public Iterable<String> call(String paramT) throws Exception {
return Arrays.asList(paramT.split(" ")) ;
}
});
//将单词拆分成(words, 1)
JavaPairRDD<String, Integer> words = flatMap.mapToPair(new PairFunction<String, String, Integer>() {
private static final long serialVersionUID = 1L;
@Override
public Tuple2<String, Integer> call(String paramT) throws Exception {
return new Tuple2<String, Integer>(paramT, 1);
}
});
// 每个单词出现的次数
JavaPairRDD<String, Integer> reduceByKey = words.reduceByKey(new Function2<Integer, Integer, Integer>() {
private static final long serialVersionUID = 1L;
@Override
public Integer call(Integer paramT1, Integer paramT2) throws Exception {
return paramT1 + paramT2;
}
});
// reduceByKey 数据格式(word,2), (hellon, 3)
//进行key,value反转
JavaPairRDD<Integer, String> sortByKey = reduceByKey.mapToPair(new PairFunction<Tuple2<String,Integer>, Integer, String>() {
private static final long serialVersionUID = 1L;
@Override
public Tuple2<Integer, String> call(Tuple2<String, Integer> paramT) throws Exception {
return new Tuple2<Integer, String>(paramT._2, paramT._1);
}
});
JavaPairRDD<Integer, String> reduceByKey2 = sortByKey.reduceByKey(new Function2<String, String, String>() {
private static final long serialVersionUID = 1L;
@Override
public String call(String paramT1, String paramT2) throws Exception {
return paramT1 + paramT2;
}
});
JavaPairRDD<Integer, String> sortByKey2 = reduceByKey2.sortByKey(false);
System.out.println("..."+sortByKey2);
//再次將value-key 反轉
JavaPairRDD<String, Integer> mapToPair = sortByKey2.mapToPair(new PairFunction<Tuple2<Integer,String>, String, Integer>() {
private static final long serialVersionUID = 1L;
@Override
public Tuple2<String, Integer> call(Tuple2<Integer, String> paramT) throws Exception {
return new Tuple2<String, Integer>(paramT._2, paramT._1);
}
});
mapToPair.foreach(new VoidFunction<Tuple2<String,Integer>>() {
private static final long serialVersionUID = 1L;
@Override
public void call(Tuple2<String, Integer> paramT) throws Exception {
System.out.println("sort by key....."+ paramT._1 + "word..."+paramT._2);
}
});
sc.close();
}
}