大数据之Spark Sql的业务实现与应用

import org.apache.spark.sql.SparkSession

object Test1 {
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder().master("local").appName("test1").getOrCreate()
spark.sparkContext.setLogLevel("WARN")

val file = "D:\\mockData.txt"
// val file = args(0)
val df = spark.read
.option("sep", "\t")
.schema("date string,monitor_id string,camera_id string,car string,action_time string,speed int,road_id string,area_id string")
.csv(file)
df.createOrReplaceTempView("monitor_flow_action")

val df1 = spark.sql("select area_id,road_id,max(speed) max_speed from monitor_flow_action " +
"group by area_id,road_id,camera_id " +
"order by area_id,road_id,max_speed desc")
df1.show()

val df2 = spark.sql("select camera_id,round(avg(speed),2) avg_speed from monitor_flow_action group by camera_id order by avg_speed desc")
df2.show()

val df3 = spark.sql("select area_id,road_id,count(distinct car) car_cnt from monitor_flow_action group by area_id,road_id order by car_cnt")
df3.show()

val df4 = spark.sql(
"""
select t.* from (
select *,row_number() over(partition by road_id order by speed desc) rank
from monitor_flow_action
) t
where t.rank<=5
""")
df4.show()

spark.udf.register("get_area", (area_id: String) => {
var area = ""
if (area_id >= "01" && area_id <= "04") {
area = "海淀区"
} else if (area_id >= "05" && area_id <= "07") {
area = "和平区"
} else if (area_id >= "08" && area_id <= "09") {
area = "门头沟区"
} else if (area_id >= "10" && area_id <= "11") {
area = "西城区"
} else {
area = "房山区"
}
area
})
val df5 = spark.sql("select get_area(area_id) area_name,round(avg(speed),2) avg_speed from monitor_flow_action group by area_name")
df5.show()
spark.close()
}
}
posted @ 2020-06-09 20:19  潇洒哥浩浩  阅读(261)  评论(0)    收藏  举报