案例 读取分析json格式的Array
【1】数据 文件jsonArrayFile 数据如下
{"name":"zhangsan","age":18,"scores":[{"xueqi":1,"yuwen":98,"shuxue":90,"yingyu":100},{"xueqi":2,"yuwen":98,"shuxue":78,"yingyu":100}]}
{"name":"lisi","age":19,"scores":[{"xueqi":1,"yuwen":58,"shuxue":50,"yingyu":78},{"xueqi":2,"yuwen":56,"shuxue":76,"yingyu":13}]}
{"name":"wangwu","age":17,"scores":[{"xueqi":1,"yuwen":18,"shuxue":90,"yingyu":45},{"xueqi":2,"yuwen":76,"shuxue":42,"yingyu":45}]}
{"name":"zhaoliu","age":20,"scores":[{"xueqi":1,"yuwen":68,"shuxue":23,"yingyu":63},{"xueqi":2,"yuwen":23,"shuxue":45,"yingyu":87}]}
{"name":"tianqi","age":22,"scores":[{"xueqi":1,"yuwen":88,"shuxue":91,"yingyu":41},{"xueqi":2,"yuwen":56,"shuxue":79,"yingyu":45}]}
【2】scala代码实现
1 package com.it.baizhan.scalacode.sparksql.examples 2 3 import org.apache.spark.sql.SparkSession 4 5 /** 6 * 读取分析json格式的Array : 7 * explode(集合) : 一对多,将集合中数据转换成一行行的数据 8 */ 9 object ReadJsonArrayData { 10 def main(args: Array[String]): Unit = { 11 val session = SparkSession.builder().appName("test").master("local").getOrCreate() 12 val frame = session.read.json("./data/jsonArrayFile") 13 14 /** 15 * +---+--------+------------------------------------+ 16 * |age|name |scores | 17 * +---+--------+------------------------------------+ 18 * |18 |zhangsan|[[90, 1, 100, 98], [78, 2, 100, 98]]| 19 * |19 |lisi |[[50, 1, 78, 58], [76, 2, 13, 56]] | 20 * |17 |wangwu |[[90, 1, 45, 18], [42, 2, 45, 76]] | 21 * |20 |zhaoliu |[[23, 1, 63, 68], [45, 2, 87, 23]] | 22 * |22 |tianqi |[[91, 1, 41, 88], [79, 2, 45, 56]] | 23 * +---+--------+------------------------------------+ 24 */ 25 import session.implicits._ 26 import org.apache.spark.sql.functions._ 27 28 val df1 = frame.select(frame.col("name"),frame.col("age"),explode(frame.col("scores")).as("el")) 29 df1.select($"name",col("age"),col("el.xueqi"),col("el.yuwen"), 30 col("el.shuxue"),col("el.yingyu")).show() 31 32 /** 33 * +--------+---+-----+-----+------+------+ 34 * | name|age|xueqi|yuwen|shuxue|yingyu| 35 * +--------+---+-----+-----+------+------+ 36 * |zhangsan| 18| 1| 98| 90| 100| 37 * |zhangsan| 18| 2| 98| 78| 100| 38 * | lisi| 19| 1| 58| 50| 78| 39 * | lisi| 19| 2| 56| 76| 13| 40 * | wangwu| 17| 1| 18| 90| 45| 41 * | wangwu| 17| 2| 76| 42| 45| 42 * | zhaoliu| 20| 1| 68| 23| 63| 43 * | zhaoliu| 20| 2| 23| 45| 87| 44 * | tianqi| 22| 1| 88| 91| 41| 45 * | tianqi| 22| 2| 56| 79| 45| 46 * +--------+---+-----+-----+------+------+ 47 */ 48 49 50 // frame.createTempView("temp") 51 // val df = session.sql( 52 // """ 53 // | select 54 // | name,age ,el.xueqi,el.yuwen,el.shuxue,el.yingyu 55 // | from 56 // | (select name,age,explode(scores) as el from temp ) t 57 // """.stripMargin).show() 58 } 59 60 }

浙公网安备 33010602011771号