aggregateMessages
def aggregateMessages[Msg: ClassTag](
sendMsg: EdgeContext[VD, ED, Msg] => Unit,
mergeMsg: (Msg, Msg) => Msg,
tripletFields: TripletFields = TripletFields.All)
: VertexRDD[A]注:聚合信息的核心方法是:aggregateMessages,其操作的本质是sendMsg和mergeMsg,
具体而言,是依次在图的每条边(edgeTriplets)上根据sendMsg函数的要求,把该边上A端的节点信息发送给B端,如:把src节点信息发到dst节点信息,或者把dst节点信息发送到src节点上,
然后在B端调用merge函数将可能收到的多个msg合并成一个msg.
tripletFields字段指定要操作哪些字段,如果仅操作部分字段的话,通过此参数进行限定可提高性能。aggregateMessages方法最终返回一个新的顶点集:VertexRDD,这个新的顶点集中每个vertex节点上包含上聚合后的信息。
collectNeighborIds与collectNeighbors函数就是对aggregateMessages的简单封装以实现聚合相邻节点id和相邻节点的功能
vertecs.txt:
(4,4.0)
(0,0.0)
(1,1.0)
(6,6.0)
(3,3.0)
(7,7.0)
(9,9.0)
(8,8.0)
(5,5.0)
(2,2.0)
edges:
Edge(0,2,1)
Edge(0,3,1)
Edge(0,5,1)
Edge(0,5,1)
Edge(0,7,1)
Edge(0,8,1)
Edge(0,8,1)
Edge(0,8,1)
Edge(0,9,1)
Edge(1,0,1)
package graphx import org.apache.log4j.{Level, Logger} import org.apache.spark.graphx._ import org.apache.spark.rdd.RDD import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.graphx.util.GraphGenerators object test07 { def main(args:Array[String]){ //屏蔽日志 Logger.getLogger("org.apache.spark").setLevel(Level.WARN) Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF) //设置运行环境 val conf: SparkConf = new SparkConf().setAppName("my graphx").setMaster("local") val sc = new SparkContext(conf) //TODO 构造graph val graph: Graph[Double, Int] = GraphGenerators.logNormalGraph(sc, numVertices = 10).mapVertices( (id, _) => id.toDouble ) println("vertecs.txt:") graph.vertices.collect().foreach(println) println("edges:") graph.edges.collect().foreach(println) println("triplets:") graph.triplets.collect().foreach(println) // Compute the number of older followers and their total age val olderFollowers: VertexRDD[(PartitionID, Double)] = graph.aggregateMessages[(PartitionID, Double)]( triplet => { // Map Function if (triplet.srcAttr > triplet.dstAttr) { triplet.sendToDst(1, triplet.srcAttr) // Send message to destination vertex containing counter and age } }, // Add counter and age (a, b) => (a._1 + b._1, a._2 + b._2) // Reduce Function ) println("olderFollowers") olderFollowers.collect().foreach(println) val avgAgeOfOlderFollowers: VertexRDD[Double] = olderFollowers.mapValues({ (id, value) => value match { case (count, totalAge) => totalAge / count } }) println("avgAgeOfOlderFollowers") avgAgeOfOlderFollowers.collect().foreach(println) val avgAgeOfOlderFollowers2: VertexRDD[Double] = olderFollowers.mapValues({ (id,value)=>value._2/value._1 }) println("avgAgeOfOlderFollowers2") avgAgeOfOlderFollowers2.collect().foreach(println) //节点得出度 graph.aggregateMessages[Int](_.sendToSrc(1), _ + _).foreach(println) } }
posted on 2020-10-28 16:19 happygril3 阅读(194) 评论(0) 收藏 举报
浙公网安备 33010602011771号