import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.{Row, SQLContext, SparkSession}
import org.apache.spark.storage.StorageLevel
import org.apache.log4j.{Level, Logger}
import org.apache.spark.sql.types._
import org.apache.spark.{SparkConf, SparkContext, sql}
import org.apache.spark.SparkContext._
import org.apache.spark.storage.StorageLevel
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.types
import org.apache.spark.sql.types._
import org.apache.spark.sql.Row
object App {
def init(): Unit = {
//Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
org.apache.log4j.Logger.getLogger("org.apache.spark").setLevel(org.apache.log4j.Level.ERROR)
}
case class OrderLine(store_id: Int, channel: String, gmv: BigDecimal, qty: Int)
def main(args: Array[String]): Unit = {
init()
val conf = new SparkConf().setMaster("local[*]").setAppName("spark-rdd-test")
val sc = new SparkContext(conf)
val sqlContext = new SQLContext(sc)
import sqlContext.implicits._
val df = sqlContext.createDataFrame(List(
//SNG
(1094, "SNG", 50.00, 1), (1094, "SNG", 150.00, 3), (1094, "SNG", 250.00, 5),
//WMDJ
(1094, "WMDJ", 10.00, 1), (1094, "WMDJ", 20.00, 2), (1094, "WMDJ", 30.00, 3),
(1094, "WMDJ", 40.00, 4), (1094, "WMDJ", 50.00, 5),
//JDDJ
(1094, "JDDJ", 100.00, 1), (1094, "JDDJ", 200.00, 20)
)).toDF("store_id", "channel", "gmv", "qty")
// store_id: Int, channel: String, gmv: BigDecimal, qty: Int
df.createOrReplaceTempView("cte_order")
df.cache()
df.printSchema()
/*
root
|-- store_id: integer (nullable = false)
|-- channel: string (nullable = true)
|-- gmv: double (nullable = false)
|-- qty: integer (nullable = false)
* */
df.show()
/*
+--------+-------+-----+---+
|store_id|channel| gmv|qty|
+--------+-------+-----+---+
| 1094| SNG| 50.0| 1|
| 1094| SNG|150.0| 3|
| 1094| SNG|250.0| 5|
| 1094| WMDJ| 10.0| 1|
| 1094| WMDJ| 20.0| 2|
| 1094| WMDJ| 30.0| 3|
| 1094| WMDJ| 40.0| 4|
| 1094| WMDJ| 50.0| 5|
| 1094| JDDJ|100.0| 1|
| 1094| JDDJ|200.0| 20|
+--------+-------+-----+---+
* */
println(df.collect().toList)
// List(
// [1094,SNG,50.0,1], [1094,SNG,150.0,3], [1094,SNG,250.0,5],
// [1094,WMDJ,10.0,1], [1094,WMDJ,20.0,2], [1094,WMDJ,30.0,3], [1094,WMDJ,40.0,4], [1094,WMDJ,50.0,5],
// [1094,JDDJ,100.0,1], [1094,JDDJ,200.0,20]
// )
val query = sqlContext.sql(
"""
|select
| channel,
| sum(qty) as sum_qty,
| sum(gmv) as sales
|from cte_order
|group by channel
""".stripMargin)
query.printSchema()
/*
root
|-- channel: string (nullable = true)
|-- sum_qty: long (nullable = true)
|-- sales: double (nullable = true)
* */
query.show()
/*
+-------+-------+-----+
|channel|sum_qty|sales|
+-------+-------+-----+
| WMDJ| 15|150.0|
| JDDJ| 21|300.0|
| SNG| 9|450.0|
+-------+-------+-----+
* */
println(query.collect().toList)
// List([WMDJ,15,150.0], [JDDJ,21,300.0], [SNG,9,450.0])
val table = df.sqlContext.sql(
"""
|select
| channel,
| row_number() over(order by gmv desc) as rowIndex,
| row_number() over(partition by channel order by gmv desc) as par_rowIndex,
| count(gmv) over(partition by channel) as counts,
| sum(gmv) over(partition by channel) as sales
|from cte_order
""".stripMargin)
table.printSchema()
/*
root
|-- channel: string (nullable = true)
|-- rowIndex: integer (nullable = true)
|-- par_rowIndex: integer (nullable = true)
|-- counts: long (nullable = false)
|-- sales: double (nullable = true)
* */
table.show()
/*
+-------+--------+------------+------+-----+
|channel|rowIndex|par_rowIndex|counts|sales|
+-------+--------+------------+------+-----+
| JDDJ| 2| 1| 2|300.0|
| JDDJ| 4| 2| 2|300.0|
| SNG| 1| 1| 3|450.0|
| SNG| 3| 2| 3|450.0|
| SNG| 5| 3| 3|450.0|
| WMDJ| 6| 1| 5|150.0|
| WMDJ| 7| 2| 5|150.0|
| WMDJ| 8| 3| 5|150.0|
| WMDJ| 9| 4| 5|150.0|
| WMDJ| 10| 5| 5|150.0|
+-------+--------+------------+------+-----+
* */
println(table.collect().toList)
// List(
// [JDDJ,2,1,2,300.0], [JDDJ,4,2,2,300.0],
// [SNG,1,1,3,450.0], [SNG,3,2,3,450.0], [SNG,5,3,3,450.0],
// [WMDJ,6,1,5,150.0], [WMDJ,7,2,5,150.0], [WMDJ,8,3,5,150.0], [WMDJ,9,4,5,150.0], [WMDJ,10,5,5,150.0]
// )
df.unpersist()
}
}
object App {
def init(): Unit = {
//Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
org.apache.log4j.Logger.getLogger("org.apache.spark").setLevel(org.apache.log4j.Level.ERROR)
}
case class OrderLine(store_id: Int, channel: String, gmv: Double, qty: Int)
def main(args: Array[String]): Unit = {
init()
val conf = new SparkConf().setMaster("local[*]").setAppName("spark-rdd-test")
val sparkSession = SparkSession.builder().config(conf).getOrCreate()
import sparkSession.implicits._
val df = sparkSession.createDataFrame(List(
//SNG
new OrderLine(1094, "SNG", 50.00, 1),
new OrderLine(1094, "SNG", 150.00, 3),
new OrderLine(1094, "SNG", 250.00, 5),
//WMDJ
new OrderLine(1094, "WMDJ", 10.00, 1),
new OrderLine(1094, "WMDJ", 20.00, 2),
new OrderLine(1094, "WMDJ", 30.00, 3),
new OrderLine(1094, "WMDJ", 40.00, 4),
new OrderLine(1094, "WMDJ", 50.00, 5),
//JDDJ
new OrderLine(1094, "JDDJ", 100.00, 1),
new OrderLine(1094, "JDDJ", 200.00, 20)
))//.toDF()
//.toDF("store_id", "channel", "gmv", "qty")
df.createOrReplaceTempView("cte_order")
df.cache()
df.printSchema()
/*
root
|-- store_id: integer (nullable = false)
|-- channel: string (nullable = true)
|-- gmv: double (nullable = false)
|-- qty: integer (nullable = false)
* */
df.show()
/*
+--------+-------+-----+---+
|store_id|channel| gmv|qty|
+--------+-------+-----+---+
| 1094| SNG| 50.0| 1|
| 1094| SNG|150.0| 3|
| 1094| SNG|250.0| 5|
| 1094| WMDJ| 10.0| 1|
| 1094| WMDJ| 20.0| 2|
| 1094| WMDJ| 30.0| 3|
| 1094| WMDJ| 40.0| 4|
| 1094| WMDJ| 50.0| 5|
| 1094| JDDJ|100.0| 1|
| 1094| JDDJ|200.0| 20|
+--------+-------+-----+---+
* */
println(df.collect().toList)
// List(
// [1094,SNG,50.0,1], [1094,SNG,150.0,3], [1094,SNG,250.0,5],
// [1094,WMDJ,10.0,1], [1094,WMDJ,20.0,2], [1094,WMDJ,30.0,3], [1094,WMDJ,40.0,4], [1094,WMDJ,50.0,5],
// [1094,JDDJ,100.0,1], [1094,JDDJ,200.0,20]
// )
val query = sparkSession.sql(
"""
|select
| channel,
| sum(qty) as sum_qty,
| sum(gmv) as sales
|from cte_order
|group by channel
""".stripMargin)
query.printSchema()
/*
root
|-- channel: string (nullable = true)
|-- sum_qty: long (nullable = true)
|-- sales: double (nullable = true)
* */
query.show()
/*
+-------+-------+-----+
|channel|sum_qty|sales|
+-------+-------+-----+
| WMDJ| 15|150.0|
| JDDJ| 21|300.0|
| SNG| 9|450.0|
+-------+-------+-----+
* */
println(query.collect().toList)
// List([WMDJ,15,150.0], [JDDJ,21,300.0], [SNG,9,450.0])
val table = df.sqlContext.sql(
"""
|select
| channel,
| row_number() over(order by gmv desc) as rowIndex,
| row_number() over(partition by channel order by gmv desc) as par_rowIndex,
| count(gmv) over(partition by channel) as counts,
| sum(gmv) over(partition by channel) as sales
|from cte_order
""".stripMargin)
table.printSchema()
/*
root
|-- channel: string (nullable = true)
|-- rowIndex: integer (nullable = true)
|-- par_rowIndex: integer (nullable = true)
|-- counts: long (nullable = false)
|-- sales: double (nullable = true)
* */
table.show()
/*
+-------+--------+------------+------+-----+
|channel|rowIndex|par_rowIndex|counts|sales|
+-------+--------+------------+------+-----+
| JDDJ| 2| 1| 2|300.0|
| JDDJ| 4| 2| 2|300.0|
| SNG| 1| 1| 3|450.0|
| SNG| 3| 2| 3|450.0|
| SNG| 6| 3| 3|450.0|
| WMDJ| 5| 1| 5|150.0|
| WMDJ| 7| 2| 5|150.0|
| WMDJ| 8| 3| 5|150.0|
| WMDJ| 9| 4| 5|150.0|
| WMDJ| 10| 5| 5|150.0|
+-------+--------+------------+------+-----+
* */
println(table.collect().toList)
// List(
// [JDDJ,2,1,2,300.0], [JDDJ,4,2,2,300.0],
// [SNG,1,1,3,450.0], [SNG,3,2,3,450.0], [SNG,6,3,3,450.0],
// [WMDJ,5,1,5,150.0], [WMDJ,7,2,5,150.0], [WMDJ,8,3,5,150.0], [WMDJ,9,4,5,150.0], [WMDJ,10,5,5,150.0]
// )
df.unpersist()
}
}