1 package com.holiday01 2 3 import org.apache.spark.sql.SparkSession 4 5 object Stature_sparksql { 6 7 def main(args: Array[String]): Unit = { 8 //资源的调度 9 val sparkSession = SparkSession.builder().appName("Stature_sparksql").master("local").getOrCreate() 10 val sparkContext = sparkSession.sparkContext 11 val spark = sparkContext.textFile("D:\\IDEA_Maven\\holiday\\src\\main\\resources\\stature.txt").map(_.split(",")) 12 val spark01 = spark.map(x => { 13 val id = x(0).toInt 14 val sex = x(1) 15 val height = x(2).toInt 16 Stature_sparksql(id, sex, height) 17 }) 18 19 import sparkSession.implicits._ 20 val frame = spark01.toDF("id", "sex", "height") 21 //将以上数据注册为一张表 22 frame.createOrReplaceTempView("stature") 23 //用 SQL 语句的方式统计男性中身高超过 180cm 的人数 24 sparkSession.sql("select count(*) from stature where sex='M' and height>180").show() 25 //用 SQL 语句的方式统计女性中身高超过 160cm 的人数 26 sparkSession.sql("select count(*) from stature where sex='F' and height>160").show() 27 //对人群按照性别分组并统计男女人数 28 sparkSession.sql("select sex,count(*) from stature group by sex ").show() 29 //统计并打印身高大于 175cm 的前 5 名男性 30 sparkSession.sql("select * from stature where sex ='M' and height >175 order by height desc limit 5").show() 31 //对所有人按身高进行排序并打印前 5 名的信息 32 sparkSession.sql("select * from stature order by height desc limit 5 ").show() 33 //统计男性的平均身高 34 sparkSession.sql("select avg(height) from stature where sex ='M'").show() 35 println("统计男性的平均身高") 36 sparkSession.sql("select max(height) from stature where sex='F'").show() 37 println("统计女性身高的最大值") 38 } 39 40 case class Stature_sparksql(id: Int, sex: String, height: Int) 41 42 }
1 序号 性别(M男,F女) 身高(cm) 2 1 F 168 3 2 M 185 4 3 M 168 5 4 F 155 6 5 F 159 7 6 F 162 8 7 F 158 9 8 M 178 10 9 M 180 11 10 F 163 12 11 M 190 13 12 M 190 14 13 F 153 15 14 M 173 16 15 F 157 17 16 F 178 18 17 M 200 19 18 M 188 20 19 F 160 21 20 M 177