spark--transform算子--mapPartitionsWithIndex

import org.apache.spark.{SparkConf, SparkContext}
import scala.collection.mutable.ArrayBuffer

/**
  * Created by liupeng on 2017/6/15.
  */
object T_mapPartitionsWithIndex {

  System.setProperty("hadoop.home.dir","F:\\hadoop-2.6.5")

  def fun_index(index : Int, iter : Iterator[String]) : Iterator[String] = {
    var list  = ArrayBuffer[String]()
    while (iter.hasNext)
    {
      val name : String = iter.next()
      var fs = index + ":" + name
      list += fs
      println(fs)
    }
    return list.iterator
  }

  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("mapPartitionsWithIndex_test").setMaster("local")
    val sc = new SparkContext(conf)

    //准备一下数据
    val names: List[String] = List("liupeng", "xuliuxi", "xiaoma")
    val nameRDD = sc.parallelize(names, 2)
    //  按照分区以及索引遍历
    //如果想知道谁分到了一起,mapPartitionsWithIndex这个算子可以拿到每个partition的index
    val nameWithPartionIndex = nameRDD.mapPartitionsWithIndex(fun_index)
    println(nameWithPartionIndex.count())
  }
}
运行结果:
0:liupeng

1:xuliuxi
1:xiaoma

3
posted @ 2017-07-18 21:52  书灯  阅读(21)  评论(0)    收藏  举报  来源