from pyspark import SparkContext

from pyspark import SparkConf

string_test = 'pyspark_test'

conf = SparkConf().setAppName(string_test).setMaster('yarn')

sc = SparkContext(conf=conf)

list_test = [1, 2, 3]

x = sc.parallelize(list_test)

y = x.map(lambda x: (x, x * 2))

print x.collect()

print y.collect()

sc.stop()

 

pyspark api : https://www.iteblog.com/archives/1395.html