![]()
package cn.itcast.sql
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
/**
* Author itcast
* Desc 演示SparkSQL-RDD_DF_DS相互转换
*/
object Demo03_RDD_DF_DS {
def main(args: Array[String]): Unit = {
//TODO 0.准备环境
val spark: SparkSession = SparkSession.builder().appName("sparksql").master("local[*]").getOrCreate()
val sc: SparkContext = spark.sparkContext
sc.setLogLevel("WARN")
//TODO 1.加载数据
val lines: RDD[String] = sc.textFile("data/input/person.txt")
//TODO 2.处理数据
val personRDD: RDD[Person] = lines.map(line => {
val arr: Array[String] = line.split(" ")
Person(arr(0).toInt, arr(1), arr(2).toInt)
})
//转换1:RDD-->DF
import spark.implicits._
val personDF: DataFrame = personRDD.toDF()
//转换2:RDD-->DS
val personDS: Dataset[Person] = personRDD.toDS()
//转换3:DF-->RDD,注意:DF没有泛型,转为RDD时使用的是Row
val rdd: RDD[Row] = personDF.rdd
//转换4:DS-->RDD
val rdd1: RDD[Person] = personDS.rdd
//转换5:DF-->DS
val ds: Dataset[Person] = personDF.as[Person]
//转换6:DS-->DF
//val df: DataFrame = personDS.toDF()
val df: DataFrame = personDF.toDF()
//TODO 3.输出结果
personDF.printSchema()
personDF.show()
personDS.printSchema()
personDS.show()
rdd.foreach(println)
rdd1.foreach(println)
//TODO 4.关闭资源
spark.stop()
}
case class Person(id:Int,name:String,age:Int)
}