import pandas as pd
from pyspark.sql import SQLContext
from pyspark import SparkContext
from pyspark.sql import SparkSession
sc = SparkContext()#连接spark
sqlContest = SQLContext(sc)#连接sparksql
pd_df = pd.DataFrame()#创建pandas dataframe
spark_df = sqlContest.createDataFrame(pd_df)#pandas dataframe转为sparksql dataframe
y = spark_df.rdd.map(lambda x: (x, x*2))
y.collect()
spark = SparkSession.builder.appName("sparksql_DataFrame").getOrCreate()
sparksql_df = spark.createDataFrame()#创建sparksql dataframe
pandas_df = sparksql_df.toPandas()#sparksql dataframe转为pandas dataframe