上传文件到hdfs:

hadoop fs -put /root/people.json /

/root/people.json:本地路径

/:hdfs路径

from pyspark.sql import SparkSession
spark = SparkSession \
    .builder \
    .appName("Python Spark SQL basic example") \
    .config("spark.some.config.option", "some-value") \
    .getOrCreate()
------------------
df = spark.read.json("/people.json")
df.show()
df.printSchema()
df.select("name").show()
df.select(df['name'], df['age'] + 1).show()
df.filter(df['age'] > 21).show()

df.groupBy("age").count().show()
--------------------------------
df.createOrReplaceTempView("people")

sqlDF = spark.sql("SELECT * FROM people")
sqlDF.show()