一次流式处理的submit
考虑很多:
压背、限流、JVM优化,出错的重试等
#!/bin/bash num_executors=1 executor_memory=1g driver_memory=1g executor_cores=1 realtime_queue=root # backpressure receiver_max_rate=100 receiver_initial_rate=30 my_job_name="streamingSYN" main_class="com.df.QZ.HeartOrderChart" spark-submit --master yarn --deploy-mode cluster \ --name ${my_job_name} \ --class ${main_class} \ --driver-memory ${driver_memory} \ --num-executors ${num_executors} --executor-cores ${executor_cores} --executor-memory ${executor_memory} \ --queue ${realtime_queue} \ --conf spark.driver.extraJavaOptions=-Dlog4j.configuration=log4j-yarn.properties \ --conf spark.executor.extraJavaOptions=-Dlog4j.configuration=log4j-yarn.properties \ --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \ --conf spark.locality.wait=10 \ --conf spark.task.maxFailures=8 \ --conf spark.ui.killEnabled=false \ --conf spark.logConf=true \ --conf spark.streaming.blockInterval=200 \ --conf spark.streaming.receiver.writeAheadLog.enable=true \ --conf spark.streaming.backpressure.enabled=true \ --conf spark.streaming.backpressure.pid.minRate=10 \ --conf spark.streaming.receiver.maxRate=${receiver_max_rate} \ --conf spark.streaming.kafka.maxRatePerPartition=${receiver_max_rate} \ --conf spark.streaming.backpressure.initialRate=${receiver_initial_rate} \ --conf spark.yarn.driver.memoryOverhead=512 \ --conf spark.yarn.executor.memoryOverhead=1024 \ --conf spark.yarn.maxAppAttempts=4 \ --conf spark.yarn.am.attemptFailuresValidityInterval=1h \ --conf spark.yarn.max.executor.failures=$((8 * ${num_executors})) \ --conf spark.yarn.executor.failuresValidityInterval=1h \ --driver-java-options "-XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -XX:ParallelCMSThreads=4 -XX:+CMSParallelRemarkEnabled -XX:+UseCMSCompactAtFullCollection -XX:CMSInitiatingOccupancyFraction=70 -XX:CMSFullGCsBeforeCompaction=2 -XX:-UseCompressedOops -XX:+PrintHeapAtGC" \ hdfs://df1:9000/Thermodynamic-1.0-SNAPSHOT.jar \ 1 df1:9092,df2:9092,df3:9092 driverinfo cm1 df1:2181,df2:2181,df3:2181