package cn.spark.study.dataFramecore

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.SaveMode

object ParquetMergeSchema {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("ParquetMergeSchema").setMaster("local");
val sc = new SparkContext(conf);
val sqlContext = new SQLContext(sc);
import sqlContext.implicits._;
val studentWithNameAge = Array(("leo",23),("tom",25)).toSeq;
val studentWithNameAgeDF = sc.parallelize(studentWithNameAge, 2).toDF("name","age");
studentWithNameAgeDF.save("D:/sparkTest/student2","parquet",SaveMode.Append);

val studentWithNameGrade = Array(("leo","A"),("tom","B")).toSeq;
val studentWithNameGradeDF = sc.parallelize(studentWithNameGrade, 2).toDF("name","grade");
studentWithNameGradeDF.save("D:/sparkTest/student2","parquet",SaveMode.Append);
val students = sqlContext.read.option("mergeSchema", "true")
.parquet("D:/sparkTest/student2")
;
students.printSchema();
students.show;
}
}

追加多次会用null补