第五章_Spark核心编程_Rdd_行动算子_save oprator
1.定义
/* * 1.定义 * def saveAsTextFile(path: String): Unit * def saveAsObjectFile(path: String): Unit * def saveAsSequenceFile( * path: String, * codec: Option[Class[_ <: CompressionCodec]] = None): Unit * 2.功能 * 将数据保存到不同格式的文件中 * 3.note * saveAsTextFile : 保存文本文件 * saveAsObjectFile : 保存对象序列化字节文件 * saveAsSequenceFile : 保存SequenceFile文件 * */
2.示例
object saveTest extends App { val sparkconf: SparkConf = new SparkConf().setMaster("local").setAppName("distinctTest") val sc: SparkContext = new SparkContext(sparkconf) private val rdd = sc.makeRDD(List((1,"x"),(1,"x"),(2,"x"),(2,"x"),(2,"x")), 2) rdd.saveAsTextFile("Spark_319/src/output/01") rdd.saveAsObjectFile("Spark_319/src/output/02") rdd.saveAsSequenceFile("Spark_319/src/output/03") sc.stop() }
3.指定压缩方式
object RddActionOperator_saveAsTextFile extends App { private val sc: SparkContext = CommonUtils.getSparkContext("countByKey Action operator") private val rdd: RDD[(String, Int)] = sc.parallelize(List( ("s", 2), ("s", 1), ("p", 1), ("p", 1), ("p", 1), ), 1) //将rdd内容作为字符串,保存到指定目录,并指定压缩方式 //rdd.saveAsTextFile("src/main/data/output/listrdd1", new GzipCodec().getClass) rdd.saveAsTextFile("src/main/data/output/listrdd1", classOf[GzipCodec]) //part-00000.gz sc.stop() //while (true) {} }