Submit

package com.shujia.spark.sql

import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}

object Demo5Submit {
  def main(args: Array[String]): Unit = {

    val spark: SparkSession = SparkSession
      .builder()
      .appName("submit")
      .getOrCreate()

    //导入隐式转换
    import spark.implicits._
    //导入spark 所有的函数
    import org.apache.spark.sql.functions._

    //读取数据
    val linesDF: DataFrame = spark
      .read
      .format("csv")
      .option("sep", "\t")
      .schema("lines STRING")
      .load("/data/words") //指定hdfs 的路径


    //统计单词的数量
    val wordCountDF: DataFrame = linesDF
      .select(explode(split($"lines", ",")) as "word")
      .groupBy($"word")
      .agg(count($"word") as "c")


    //保存数据
    wordCountDF
      .write
      .format("csv")
      .option("sep", "\t")
      .mode(SaveMode.Overwrite)
      .save("/data/wc")


    /**
      * 在jar所在的位置运行
      * spark-submit --class com.shujia.spark.sql.Demo5Submit --master yarn-client  spark-1.0.jar
      *
      */

  }

}

 

posted @ 2021-07-21 16:53  坤坤无敌  阅读(705)  评论(0)    收藏  举报