02.spark start
0.基本文件
app.prod.log4j.prop
log4j.rootLogger=INFO,rollingAppender
log4j.appender.rollingAppender=org.apache.log4j.rolling.RollingFileAppender
log4j.appender.rollingAppender.rollingPolicy=org.apache.log4j.rolling.TimeBasedRollingPolicy
log4j.appender.rollingAppender.rollingPolicy.fileNamePattern=log/app_%d{yyyy-MM}.log
log4j.appender.rollingAppender.layout = org.apache.log4j.PatternLayout
log4j.appender.rollingAppender.layout.conversionPattern =%-d{yyyy-MM-dd HH:mm:ss} %X{currentUser} [%C{2}.%M(%L)]-[%p] %m%n
1.standalone模式
SimpleApp.py
#!/usr/bin/env python
# encoding=utf-8
from pyspark.sql import SparkSession
# standalone模式下读取local本地文件
def test_local():
    local_file = "README.md"
    spark = SparkSession.builder.appName("SimpleApp").getOrCreate()
    data_rdd = spark.read.text(local_file)
    for row in data_rdd.collect():
        print row['value']
    spark.stop()
if __name__ == '__main__':
    test_local()
test.sh
#!/bin/bash
cur_dir=`pwd`
SPARK_SUBMIT="/home/workspace/spark-2.1.1-bin-hadoop2.6/bin/spark-submit"
MASTER="local[12]"
today=`date +"%Y%m%d"`
exec 1>> ${cur_dir}/log/hdfs_${today}.log
exec 2>> ${cur_dir}/log/hdfs_${today}.log
${SPARK_SUBMIT} --master ${MASTER} \
    --driver-java-options "-Dlog4j.configuration=file://${cur_dir}/app.prod.log4j.prop" \
    SimpleApp.py
2.yarn模式
SimpleApp.py
#!/usr/bin/env python
# encoding=utf-8
from pyspark.sql import SparkSession
# yarn模式下读取hadoop上的文件
def test_yarn():
    local_file = "/home/workspace/study/README.md"
    spark = SparkSession.builder.appName("SimpleApp").getOrCreate()
    data_rdd = spark.read.text(local_file)
    for row in data_rdd.collect():
        print row['value']
    spark.stop()
if __name__ == '__main__':
    test_yarn()
test.sh
#!/bin/bash
cur_dir=`pwd`
SPARK_SUBMIT="/home/workspace/spark-2.1.1-bin-hadoop2.6/bin/spark-submit"
# When running with master 'yarn' either HADOOP_CONF_DIR or YARN_CONF_DIR must be set in the environment
MASTER="yarn"
export HADOOP_CONF_DIR=/etc/hive/conf
exec 2>&1 >> ${cur_dir}/log/hdfs.log
${SPARK_SUBMIT} --master ${MASTER} \
    --driver-java-options "-Dlog4j.configuration=file://${cur_dir}/app.prod.log4j.prop" \
    SimpleApp.py
    http://www.cnblogs.com/makexu/

 
                
            
         
         浙公网安备 33010602011771号
浙公网安备 33010602011771号