1、数据格式
aaaa 201701 11.1,3.8,2.5 aaaa 201702 2.1,3.3,2.5 aaaa 201703 34.1,3.2,2.0 aaaa 201704 2.2,3.3,2.5 aaaa 201705 13.1,3.5,2.5 aaaa 201706 22.4,3.3,2.5 aaaa 201707 2.1,3.3,2.0 aaaa 201708 10.1,4.3,2.5 bbbb 201701 2.8,3.3,2.5 bbbb 201703 2.2,3.3,4.2 bbbb 201704 2.1,3.3,2.5 bbbb 201705 2.3,3.7,2.5 bbbb 201709 2.1,3.4,2.5 bbbb 201719 2.1,3.3,2.5 bbbb 201712 2.1,3.3,2.0
2、处理流程
package streaming
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.{SparkConf, SparkContext}
import redis.RedisClient
import redis.clients.jedis.Jedis
import org.apache.log4j.Logger
import spire.math.Interval
import scala.collection.mutable
import scala.collection.mutable.{ArrayBuffer, ListBuffer}
/**
* Created by hadoop on 2017/7/18.
*/
case class Da(kk: String,
tt:Int,
va:mutable.Buffer[String]
)
object Streaming {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("streaming..").setMaster("local[4]")
val sc = new SparkContext(conf)
val ssc = new StreamingContext(sc, Seconds(5))
//val fileStream = ssc.fileStream("D:\\tttttttttttt\\")
//fileStream.print()
//val lines = ssc.socketTextStream("192.168.25.129",9999)
var lines = ssc.textFileStream("D:\\tt\\")
/*var jd = new Jedis("192.168.25.128",6379)
var jdv = jd.get("aaaa")
println("aaaa: "+jdv)
var broadcast = sc.broadcast(jd.get("aaaa"))
println(broadcast)*/
val data = lines.flatMap( x=>{
var dataBuff=mutable.Buffer[Da]()
var v = mutable.Buffer[String]()
v = (x.split("\t")(2)).split(",").toBuffer
val dd = Da(x.split("\t")(0).toString,x.split("\t")(1).toInt,v)
dataBuff.append(dd)
dataBuff
})
//获取数据
val orders = data.map(x => {
(x.kk,x)
})
//获取数据个数
val orders2 = data.map(x => {
(x.kk,1)
})
val counts = orders2.reduceByKey(_+_)
val oss = orders.groupByKey()
val result = oss.join(counts).map( xx => {
val sum = xx._2._2
val dada = xx._2._1.toList
val zong = ArrayBuffer[Double]()
if(dada(0).va != null && dada(0).va.size > 0 && dada.size >0){
for(clo <- 0 until dada(0).va.size){
var tmp = 0.0
for(row <- 0 until dada.size){
tmp += dada(row).va(clo).toDouble
println("--"+dada(row).va(clo).toDouble)
}
zong+=tmp/sum
}
}
zong
})
result.print()
ssc.start()
ssc.awaitTermination()
}
}
浙公网安备 33010602011771号