spark简单入门

查看spark版本  spark-submit --version

查看hadoop版本 Hadoop version

下载spark 对应版本 spark-2.0.2

下载idea的scala插件https://plugins.jetbrains.com/plugin/1347-scala

 

 

package com.bj58
import org.apache.spark.SparkContext import org.apache.spark.SparkContext._ import org.apache.spark.SparkConf
/** * Hello world! * */ //object App extends Application {

object App{

def main(args: Array[String]) { println( "Hello World!" )
// val logFile = "/usr/local/spark/spark-1.3.1-bin-hadoop2.6/README.md"
/**为你的spark安装目录**/ // val conf = new SparkConf().setAppName("App") //
val sc = new SparkContext(conf) // val logData = sc.textFile(logFile,2).cache() //
val numAs = logData.filter(line => line.contains("a")).count() //
val numBs = logData.filter(line => line.contains("b")).count() // //
println("Lines with a: %s,Lines with b: %s".format(numAs,numBs))
if (args.length < 2) { System.err.println("Usage: <infile> <outfile>")
System.exit(1) }
val conf = new SparkConf().setAppName("App")
val sc = new SparkContext(conf)
val line = sc.textFile(args(0))
var counts = line.flatMap(_.split(" ")).map((_, 1)).reduceByKey(_+_) // // .collect()
counts.collect().foreach(println)
counts.saveAsTextFile(args(1))
sc.stop() }
}

打包并启动jar

 

#  --queue root.online.hdp_teu_dia \
$sparkbin --class "com.energy1010.App"  \
  --master yarn \
  --deploy-mode cluster \
  --queue root.offline.normal \
  --name Test\
  --executor-memory 10G  \
  --num-executors 20 \
  --executor-cores 2 \
  --driver-memory 10g \
  ./spark.jar ${inputpath} ${Outpath}

 

终端输出:

17/05/09 16:53:37 INFO yarn.Client main: Application report for application_1491903146022_2119985 (state: RUNNING)
17/05/09 16:53:38 INFO yarn.Client main: Application report for application_1491903146022_2119985 (state: FINISHED)
17/05/09 16:53:38 INFO yarn.Client main:
         client token: N/A
         diagnostics: N/A
         ApplicationMaster host: 10.126.14.136
         ApplicationMaster RPC port: 0
         queue: root.offline.normal
         start time: 1494319993385
         final status: SUCCEEDED
         tracking URL: http://tjtx-81-187.org:9088/proxy/application_1491903146022_2119985/history/application_1491903146022_2119985/1
         user: hdp_teu_dia
17/05/09 16:53:38 INFO util.ShutdownHookManager Thread-3: Shutdown hook called
17/05/09 16:53:38 INFO util.ShutdownHookManager Thread-3: Deleting directory /tmp/spark-79598c10-7db4-4ead-9a44-3ce7681c2cee
done:20170509 16:53:38
http://tjtx-81-187.org:9088/cluster/apps

 

posted @ 2017-05-09 17:01  energy1989  阅读(64)  评论(0编辑  收藏  举报