from pyspark import SparkContext

from pyspark import SparkConf

string_test = 'pyspark_test'

conf = SparkConf().setAppName(string_test).setMaster('yarn')

sc = SparkContext(conf=conf)

hdfs_data = sc.textFile("hdfs://master:9000/data/hive/warehouse/initial_data.db/appstart/appstart_copy_96")

hdfs_data.collect()

sc.stop()

 

master:9000为core-site.xml的fs.defaultFS配置项。