package com.spark
import org.apache.spark.sql.SparkSession
/**
* DataFrame API基本操作
*/
object DataFrameAPP1 {
def main(args: Array[String]): Unit = {
val path="E:\\data\\infos.txt"
val spark=SparkSession.builder().appName("DataFrameApp").master("local[2]").getOrCreate()
val peopleDF=spark.read.format("json").load(path)
peopleDF.printSchema()
//输出前20条数据
peopleDF.show()
//select name from table
peopleDF.select("name").show()
//select name ,age+10 as age2 from table
peopleDF.select(peopleDF.col("name"),(peopleDF.col("age")+10).as("age2")).show()
//select * from table where age>19
peopleDF.filter(peopleDF.col("age")>19).show()
//select age,count(1) from table group by age
peopleDF.groupBy("age").count().show()
spark.stop()
}
}