java 实现top N排序

用spark api 实现java top N 排序

import java.util.List;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.PairFunction;

import scala.Tuple2;

/**
* @author 作者 E-mail:
* @version 创建时间:2017年8月31日 下午3:36:03
* 类说明 
*/
public class Top3Sort {

	public static void main(String[] args) {
		SparkConf conf = new SparkConf().setAppName("top3").setMaster("local");
		JavaSparkContext sc = new JavaSparkContext(conf);
		JavaRDD<String> tf = sc.textFile("G://top3.txt");
		JavaPairRDD<Integer, String> mapToPair = tf.mapToPair(new PairFunction<String, Integer,String >() {

			private static final long serialVersionUID = 1L;

			@Override
			public Tuple2<Integer, String> call(String t) throws Exception {
				
				return new Tuple2<Integer, String>(Integer.valueOf(t), t);
			}
		});
		JavaPairRDD<Integer, String> sortByKey = mapToPair.sortByKey(false);
		JavaRDD<Integer> map = sortByKey.map(new Function<Tuple2<Integer,String>, Integer>() {
			private static final long serialVersionUID = 1L;

			@Override
			public Integer call(Tuple2<Integer, String> v1) throws Exception {
				// TODO Auto-generated method stub
				return v1._1;
			}
		});
          //取出前N List<Integer> take = map.take(5); for (Integer nums: take){ System.out.println("...."+nums); } sc.close(); } }

  测试数据:

1
2
3
4
6
77
88
99
987
121
12121
4556

 

posted on 2017-08-31 17:08  ptbx  阅读(819)  评论(0)    收藏  举报