Operator_repartitionAndSortWithinPartition

package com.bjsxt.spark.transformations;

import java.io.Serializable;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;

import org.apache.cassandra.cli.CliParser.newColumnFamily_return;
import org.apache.spark.Partitioner;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function2;

import scala.Tuple2;
class MySort implements Serializable,Comparator<Integer>{

/**
*
*/
private static final long serialVersionUID = 1L;

@Override
public int compare(Integer o1, Integer o2) {
return o2-o1;
}

}

public class Operator_repartitionAndSortWithinPartition {

public static void main(String[] args) {
SparkConf sparkConf = new SparkConf().setAppName("RepartitionAndSortWithinPartitionsOperator").setMaster("local");
JavaSparkContext sc = new JavaSparkContext(sparkConf);


List<Tuple2<Integer,Integer>> list = Arrays.asList(
new Tuple2<Integer,Integer>(2, 3),
new Tuple2<Integer,Integer>(1, 2),
new Tuple2<Integer,Integer>(6, 7),
new Tuple2<Integer,Integer>(3, 4),
new Tuple2<Integer,Integer>(5, 6),
new Tuple2<Integer,Integer>(4, 5)
);


JavaPairRDD<Integer,Integer> rdd = sc.parallelizePairs(list,1);

JavaPairRDD<Integer, Integer> rdd1 = rdd.repartitionAndSortWithinPartitions(new Partitioner() {

/**
*
*/
private static final long serialVersionUID = 1L;

/**
* 设置分区数据
* 0 1 2
* @return
*/
@Override
public int numPartitions() {
return 3;
}

@Override
public int getPartition(Object key) {
return Integer.valueOf(key+"") % numPartitions();
}
},new MySort());

System.out.println("rdd1.partitions().size():" + rdd1.partitions().size());


rdd1.mapPartitionsWithIndex(new Function2<Integer, Iterator<Tuple2<Integer,Integer>>, Iterator<Tuple2<Integer,Integer>>>() {

/**
*
*/
private static final long serialVersionUID = 1L;

@Override
public Iterator<Tuple2<Integer, Integer>> call(Integer v1, Iterator<Tuple2<Integer, Integer>> v2) throws Exception {
while(v2.hasNext()){
System.out.println("partitionId:" + v1 + " value:" + v2.next());
}
return v2;
}
}, true).count();

}
}

posted @ 2018-06-18 11:47  uuhh  阅读(1)  评论(0)    收藏  举报