1.1 从 hive读数据
object HiveRead {
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder()
.master("local[*]")
.appName("HiveRead")
.enableHiveSupport()
.getOrCreate()
import spark.implicits._
spark.sql("show databases")
spark.sql("use gmall")
spark.sql("select count(*) from ads_uv_count").show()
spark.close()
}
}
1.2 从 hive写数据
object HiveWrite2 {
def main(args: Array[String]): Unit = {
System.setProperty("HADOOP_USER_NAME","xingmeng")
val spark = SparkSession.builder()
.master("local[*]")
.appName("HiveRead")
.enableHiveSupport()
.config("spark.sql.warehouse.dir","hdfs://hadoop102:9000/user/hive/warehouse")
.getOrCreate()
//先创建一个数据库
// spark.sql("create database spark1016")
// spark.sql("use spark1016")
// spark.sql("create table user1(id int, name string)").show()
// spark.sql("insert into table user1 VALUES(10,'lisi')")
val df = spark.read.json("F:/BaiduNetdiskDownload/15-spark/spark-coreData/users.json")
spark.sql("use spark1016")
val df1 = spark.sql("select * from a")
val df2 = spark.sql("select sum(age) sum_age from a group by name")
df1.write.saveAsTable("a1")
//hive 聚合后,分区数会成为200
df2.coalesce(1).write.mode("overwrite").saveAsTable("a2")
spark.close()
}
}