笔记:尚硅谷大数据Spark-2019

使用IntelliJ Idea编写WordCount程序在Spark上运行
import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import org.apache.spark.rdd.RDD
object WordCount {
def main(args:Array[String]): Unit ={
/*
// 1.
val inputFile="file:///usr/local/spark/mycode/word.txt"
val conf=new SparkConf().setAppName("WordCount").setMaster("local")
val sc=new SparkContext(conf)
val textFile=sc.textFile(inputFile)
val wordCount=(textFile flatMap (line => line.split(" "))).map(word=>(word,1)).reduceByKey((a, b)=>a+b)
wordCount.foreach(println)*/
// 2.
val config:SparkConf=new SparkConf().setMaster("local[*]").setAppName("WordCount")
val sc =new SparkContext(config)
val lines: RDD[String] = sc.textFile("in/word.txt")
val words: RDD[String] = lines.flatMap(_.split(" "))
var wordToOne: RDD[(String, Int)] = words.map((_, 1))
var wordToSum: RDD[(String, Int)] = wordToOne.reduceByKey(_ + _)
var result: Array[(String, Int)] = wordToSum.collect()
result.foreach(println)
}
}
将Spark程序部署到Yarn中执行
浙公网安备 33010602011771号