import org.apache.spark.sql.{Row, SparkSession}
import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
case class Person(val name:String, val age:Int)
/**
* RDD转DataFrame
*/
object DataFrameRDD {
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder()
.appName("")
.master("local")
.getOrCreate()
val rdd = spark.sparkContext.textFile("person.text")
/**
* 1.RDD转DataFrame
* 导入包,支持把一个RDD隐式转换为DataFrame,
* 这里的spark不是某个包下的东西,而是上面声明的变量
*/
import spark.implicits._
val df = rdd.map(_.split(",")).map(attributes=>Person(attributes(0),attributes(1).trim.toInt)).toDF()
//注册表
df.createOrReplaceTempView("person")
val frame = spark.sql("select * from person where age > 20")
df.map(t=>
"name:"+t(0) + "," + "age:"+t(1)
).show()
/**
* 2.RDD转DataFrame
*/
//构建表头
val fields = Array(StructField("name",StringType,true),StructField("age",IntegerType,true))
val schema = StructType(fields)
//表中的记录
val rowRdd = rdd.map(_.split(",")).map(attributes=> Row(attributes(0),attributes(1).trim.toInt))
//把表头和表中的记录拼接起来
val dataframe = spark.createDataFrame(rowRdd,schema)
//注册表
dataframe.createOrReplaceTempView("person")
val dataframe2 = spark.sql("select * from person where age > 20")
dataframe2.map(t=>
"name:"+t(0) + "," + "age:"+t(1)
).show()
}
}