pyspark 解析kafka数组结构数据

from pyspark.sql.functions import get_json_object, col,from_unixtime, instr, length, regexp_replace, explode, from_json
from pyspark.sql.types import * 
# 定义数组结构 schema = ArrayType(StructType([ StructField("home", StringType()), StructField("room", StringType()), StructField("operation", StringType()), StructField("time", StringType()) ])) # kafka = kafka.select(col("value").cast("string").alias("data"))
# 使用from_json解析 再使用explode将数组结构拆分成多行数据 kafka = kafka.select(from_json(col("value").cast("string"), schema).alias("data") ).select(explode("data").alias("data") ).selectExpr("data.home","data.room","data.operation", "data.time" )

  

posted @ 2024-11-07 15:57  Young_Mo  阅读(26)  评论(0)    收藏  举报