02.spark start

0.基本文件

    app.prod.log4j.prop

log4j.rootLogger=INFO,rollingAppender

log4j.appender.rollingAppender=org.apache.log4j.rolling.RollingFileAppender
log4j.appender.rollingAppender.rollingPolicy=org.apache.log4j.rolling.TimeBasedRollingPolicy
log4j.appender.rollingAppender.rollingPolicy.fileNamePattern=log/app_%d{yyyy-MM}.log
log4j.appender.rollingAppender.layout = org.apache.log4j.PatternLayout
log4j.appender.rollingAppender.layout.conversionPattern =%-d{yyyy-MM-dd HH:mm:ss} %X{currentUser} [%C{2}.%M(%L)]-[%p] %m%n
1.standalone模式

    SimpleApp.py

#!/usr/bin/env python
# encoding=utf-8

from pyspark.sql import SparkSession


# standalone模式下读取local本地文件
def test_local():
    local_file = "README.md"
    spark = SparkSession.builder.appName("SimpleApp").getOrCreate()
    data_rdd = spark.read.text(local_file)

    for row in data_rdd.collect():
        print row['value']
    spark.stop()


if __name__ == '__main__':
    test_local()

    test.sh

#!/bin/bash

cur_dir=`pwd`

SPARK_SUBMIT="/home/workspace/spark-2.1.1-bin-hadoop2.6/bin/spark-submit"
MASTER="local[12]"

today=`date +"%Y%m%d"`
exec 1>> ${cur_dir}/log/hdfs_${today}.log
exec 2>> ${cur_dir}/log/hdfs_${today}.log

${SPARK_SUBMIT} --master ${MASTER} \
    --driver-java-options "-Dlog4j.configuration=file://${cur_dir}/app.prod.log4j.prop" \
    SimpleApp.py

  

2.yarn模式

    SimpleApp.py

#!/usr/bin/env python
# encoding=utf-8

from pyspark.sql import SparkSession


# yarn模式下读取hadoop上的文件
def test_yarn():
    local_file = "/home/workspace/study/README.md"
    spark = SparkSession.builder.appName("SimpleApp").getOrCreate()
    data_rdd = spark.read.text(local_file)

    for row in data_rdd.collect():
        print row['value']
    spark.stop()


if __name__ == '__main__':
    test_yarn()

    test.sh

#!/bin/bash

cur_dir=`pwd`


SPARK_SUBMIT="/home/workspace/spark-2.1.1-bin-hadoop2.6/bin/spark-submit"

# When running with master 'yarn' either HADOOP_CONF_DIR or YARN_CONF_DIR must be set in the environment
MASTER="yarn"
export HADOOP_CONF_DIR=/etc/hive/conf

exec 2>&1 >> ${cur_dir}/log/hdfs.log

${SPARK_SUBMIT} --master ${MASTER} \
    --driver-java-options "-Dlog4j.configuration=file://${cur_dir}/app.prod.log4j.prop" \
    SimpleApp.py

 

posted @ 2018-03-01 21:38  桃源仙居  阅读(184)  评论(0)    收藏  举报