joinvertices
joinvertices:
graph1.joinVertices(graph2)((id2, VD1, data2) = > VD2)
这里的id2是graph2的点, data2是graph2的属性,这样写就会用新属性VD2覆盖老属性VD1
outerjoinvertices:
这个操作其实跟上面的类似,关键就在于对只在tmgraph中出现的点的处理上
package graphx import org.apache.log4j.{Level, Logger} import org.apache.spark.graphx._ import org.apache.spark.rdd.RDD import org.apache.spark.{SparkConf, SparkContext} /* joinvertices: tmgraph.joinVertices(tmgraph2)((vid, old, vdata) = > vdata) 这里的vid是tmgraph2里面的点, vdata是相应的属性,这样写就会用新属性覆盖老属性 */ //连接操作 object test04 { def main(args:Array[String]){ //屏蔽日志 Logger.getLogger("org.apache.spark").setLevel(Level.WARN) Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF) //设置运行环境 val conf: SparkConf = new SparkConf().setAppName("my graphx").setMaster("local") val sc = new SparkContext(conf) //TODO 构造vertextArray和edgeArray //顶点的数据类型 VD:(String,Int) val vertexArray = Array( (1L, ("Alice", 28)), (2L, ("Bob", 27)), (3L, ("Charlie", 65)), (4L, ("David", 42)), (5L, ("Ed", 55)), (6L, ("Fran", 50)) ) //边的数据类型ED:(Int) val edgeArray = Array( Edge(2L, 1L, 7), Edge(2L, 4L, 2), Edge(3L, 2L, 4), Edge(3L, 6L, 3), Edge(4L, 1L, 1), Edge(5L, 2L, 2), Edge(5L, 3L, 8), Edge(5L, 6L, 3) ) //TODO 构造vertextRDD和edgeRDD val vertextRDD: RDD[(VertexId, (String, PartitionID))] = sc.makeRDD(vertexArray) val edgeRDD: RDD[Edge[PartitionID]] = sc.makeRDD(edgeArray) //TODO 构造graph val graph:Graph[(String,Int),Int] = Graph(vertextRDD,edgeRDD) case class User(name:String,age:Int,inDeg:Int,OutDeg:Int) //创建一个新图,顶点VD的数据类型为User,并从graph做类型转换 val initialUserGraph: Graph[User, PartitionID] = graph.mapVertices({ case (id, (name, age)) => User(name, age, 0, 0) }) //initialUserGraph与inDegrees、outDegrees(RDD)进行连接,并修改initialUserGraph中inDeg值、outDeg值 val userGraph : Graph[User, PartitionID] = initialUserGraph.outerJoinVertices(initialUserGraph.inDegrees) { case (id, u, inDegOpt) => User(u.name, u.age, inDegOpt.getOrElse(0), u.OutDeg) }.outerJoinVertices(initialUserGraph.outDegrees) { case (id, u, outDegOpt) => User(u.name, u.age, u.inDeg, outDegOpt.getOrElse(0)) } println("连接图的属性:") val vertices: VertexRDD[User] = userGraph.vertices vertices.collect().foreach({ case (id,user)=>println(user.name,user.age) }) println("出度和入读相同的人员:") val vertices2: VertexRDD[User] = vertices.filter({ case (id, user) => user.inDeg == user.OutDeg }) vertices2.collect().foreach({ case (id,user)=>println(user.name,user.age) }) } }
posted on 2020-10-27 18:06 happygril3 阅读(195) 评论(0) 收藏 举报
浙公网安备 33010602011771号