DailyUV

package com.bjsxt.scala.spark.sql.demo

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.Row
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.types.StructField
import org.apache.spark.sql.types.StringType
import org.apache.spark.sql.types.IntegerType
import org.apache.spark.sql.functions._
object DailyUV {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf()
        .setMaster("local")  
        .setAppName("DailyUV")
    val sc = new SparkContext(conf)
    val sqlContext = new SQLContext(sc)
    import sqlContext.implicits._
    val userAccessLog = Array(
        "2016-9-01,1122",
        "2016-9-01,1122",
        "2016-9-01,1123",
        "2016-9-01,1124",
        "2016-9-01,1124",
        "2016-9-02,1122",
        "2016-9-02,1121",
        "2016-9-02,1123",
        "2016-9-02,1123");
    val userAccessLogRDD = sc.parallelize(userAccessLog, 5)
    
    val userAccessLogRowRDD = userAccessLogRDD
        .map { log => Row(log.split(",")(0), log.split(",")(1).toInt) }

    val structType = StructType(Array(
        StructField("date", StringType, true),
        StructField("userid", IntegerType, true)))  
    val userAccessLogRowDF = sqlContext.createDataFrame(userAccessLogRowRDD, structType)  
    userAccessLogRowDF.groupBy("date")
        .agg('date, countDistinct('userid))  
        .map { row => Row(row(1), row(2)) }   
        .collect()
        .foreach(println)  
  }
}

  

posted @ 2018-06-23 16:42  uuhh  阅读(78)  评论(0)    收藏  举报