spark wordcount统计排序演示(core与streaming)

sparkcontext演示

package com.sgm.spark
import org.apache.spark.{SparkConf, SparkContext}
object test {
  def main(args: Array[String]): Unit = {
    val sparkConf = new SparkConf().setAppName("NetworkWordCount").setMaster("local[2]")
    val sc=new SparkContext(sparkConf)
    val lines = sc.textFile("C:\\Users\\itlocal\\IdeaProjects\\nginxlog\\checkpoint\\1.txt")
    val words=lines.flatMap(_.split(" "))
//    val numpattern="10".r
//    words.foreach(word=>numpattern.findAllIn(word).foreach(println))
    val wordPairs = words.map(line => (line, 1)).reduceByKey(_+_)
    val sortword=wordPairs.map(line=>(line._2,line._1)).sortByKey({false}).take(3) //排序
    wordPairs.foreach(println)
    sortword.foreach(println)

streamingcontext演示

package com.sgm.spark
import org.apache.spark.SparkConf
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.{Seconds, StreamingContext}
object NewWordCount {
  def main(args: Array[String]) {
    if (args.length < 2) {
      System.err.println("Usage: NetworkWordCount <hostname> <port>")
      System.exit(1)
    }
    // Create the context with a 1 second batch size
    val sparkConf = new SparkConf().setAppName("NetworkWordCount").setMaster("local[2]")
    val ssc = new StreamingContext(sparkConf, Seconds(20))
    val lines = ssc.socketTextStream(args(0), args(1).toInt, StorageLevel.MEMORY_AND_DISK_SER)
    val words = lines.flatMap(_.split(" "))
    //val word=lines.flatMap(x=>x.split(" ")) 是words的复杂写法
    val wordCounts = words.map(x => (x, 1)).reduceByKey(_ + _).map(line=>(line._2,line._1))
    wordCounts.print()
    wordCounts.foreachRDD(line=>line.sortByKey({false}).take(3).foreach(println))
    ssc.start()
    ssc.awaitTermination()
  }
}
// scalastyle:on println

 

StreamingContext
posted @ 2018-06-22 10:48  林夕之风  阅读(890)  评论(0)    收藏  举报