DailySale

package com.bjsxt.scala.spark.sql.demo

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.Row
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.types.StructField
import org.apache.spark.sql.types.StringType
import org.apache.spark.sql.types.DoubleType
import org.apache.spark.sql.functions._
import org.apache.spark.sql.types.IntegerType

/**
 * 日期,销售额
 */
 
object DailySale {
  
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf()
        .setMaster("local")
        .setAppName("DailySale")
    val sc = new SparkContext(conf)
    val sqlContext = new SQLContext(sc)
  
    import sqlContext.implicits._
    
    val userSaleLog = Array(
        "2016-9-01,55,1122",
        "2016-9-01,23,1133",
        "2016-9-01,15,",
        "2016-9-02,56,1144",
        "2016-9-02,78,1155",
        "2016-9-03,113,1123")
    val userSaleLogRDD = sc.parallelize(userSaleLog, 5)
    
    val filteredUserSaleLogRDD = userSaleLogRDD
        .filter { log => if (log.split(",").length == 3) true else false }
      
    val userSaleLogRowRDD = filteredUserSaleLogRDD
        .map { log => Row(log.split(",")(0), log.split(",")(1).toInt) }
    
    val structType = StructType(Array(
        StructField("date", StringType, true),
        StructField("sale_amount", IntegerType, true)))
    
    val userSaleLogDF = sqlContext.createDataFrame(userSaleLogRowRDD, structType)  
    
    userSaleLogDF.groupBy("date")
        .agg('date, sum('sale_amount))
        .map { row => Row(row(1), row(2)) }
        .collect()
        .foreach(println)  
  }
  
}

  

posted @ 2018-06-23 16:41  uuhh  阅读(103)  评论(0)    收藏  举报