pyspark list[dict]转pyspark df
数据处理把 list嵌套字段转成 pyspark dataframe
# coding=utf-8
from pyspark.sql import SparkSession
from pyspark.sql.types import *
import pandas as pd
from pyspark.sql import Row
class SparkContext:
def __init__(self, name="cleaner"):
self.spark = (
SparkSession.builder.appName(name)
.config("hive.exec.dynamic.partition", True)
.config("hive.exec.dynamic.partition.mode", "nonstrict")
.enableHiveSupport()
.getOrCreate()
)
self.spark.sparkContext.setLogLevel("ERROR")
def __enter__(self):
return self.spark
def __exit__(self, exc_type, exc_val, exc_tb):
self.spark.stop()
def main():
data = [{'ent_name': '百度', 'credit_code': '1234567890'},
{'ent_name': 'abc', 'credit_code': '121212222'}
]
row_data = [Row(**row) for row in data]
with SparkContext('test_df') as spark:
df = spark.createDataFrame(row_data)
df.show()
if __name__ == '__main__':
main()
浙公网安备 33010602011771号