RDD转dataframe

from pyspark.sql import SparkSession,Row
from pyspark.sql.types import StructField, StructType, StringType, IntegerType, LongType
data = [('Alex','male',3),('Nancy','female',6),['Jack','male',9]] # mixed
rdd_ = spark.sparkContext.parallelize(data)

# schema
schema = StructType([
        # true代表不为空
        StructField("name", StringType(), True),
        StructField("gender", StringType(), True),
        StructField("num", StringType(), True)
    ])
df = spark.createDataFrame(rdd_, schema=schema)  # working when the struct of data is same.
print(df.show()) 

 

posted @ 2020-07-07 14:33  muyue123  阅读(115)  评论(0编辑  收藏  举报