spark sql

Posted on 2018-09-10 13:19  打杂滴  阅读(98)  评论(0)    收藏  举报

scala> import org.apache.spark.sql.hive.HiveContext

import org.apache.spark.sql.hive.HiveContext

scala> val hivecon=new HiveContext(sc)

warning: there was one deprecation warning; re-run with -deprecation for details

hivecon: org.apache.spark.sql.hive.HiveContext = org.apache.spark.sql.hive.HiveContext@56db9f2d

scala> hivecon.sql("use gamedw")

res5: org.apache.spark.sql.DataFrame = []

scala> hivecon.sql("select collect_set(custname),sex from cust group by sex")

res6: org.apache.spark.sql.DataFrame = [collect_set(custname): array<string>, sex: int]

scala> hivecon.sql("select collect_set(custname),sex from cust group by sex").show
+---------------------+---+
|collect_set(custname)|sex|
+---------------------+---+
| [mahuateng, liuya...|  1|
| [liuqin, hello, w...|  0|
+---------------------+---+

 

博客园  ©  2004-2025
浙公网安备 33010602011771号 浙ICP备2021040463号-3