SampleOperator

package com.bjsxt.scala.spark.operator

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext

object SampleOperator {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("AggregateOperator").setMaster("local")
    val sc = new SparkContext(conf)
    val list = Array(
        "Angelababy",
        "Angelababy",
        "Angelababy",
        "Angelababy",
        "Angelababy",
        "xuruyun",
        "baibaihe",
        "liutao",
        "xiaobao")  
    val rdd = sc.parallelize(list, 2)   
    //第一个参数:代表抽样方式  false:不放回式抽样  true:放回式抽样   第二个参数:抽样比例
    val sampleRDD = rdd.sample(false, 0.5)    
    sampleRDD.foreach { println }
    sc.stop()
  }
}

  

posted @ 2018-06-18 14:56  uuhh  阅读(84)  评论(0)    收藏  举报