package com.bjsxt.scala.spark.operator
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
object SampleOperator {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("AggregateOperator").setMaster("local")
val sc = new SparkContext(conf)
val list = Array(
"Angelababy",
"Angelababy",
"Angelababy",
"Angelababy",
"Angelababy",
"xuruyun",
"baibaihe",
"liutao",
"xiaobao")
val rdd = sc.parallelize(list, 2)
//第一个参数:代表抽样方式 false:不放回式抽样 true:放回式抽样 第二个参数:抽样比例
val sampleRDD = rdd.sample(false, 0.5)
sampleRDD.foreach { println }
sc.stop()
}
}