属性操作
graph包含三个基本的类集合视图
val vertices: VertexRDD[VD]
val edges: EdgeRDD[ED]
val triplets: RDD[EdgeTriplet[VD, ED]],即可理解为:RDD(srcId,srcAttr,dstId,dstAttr,attr)
图信息接口
val numEdges: Long
val numVertices: Long
val inDegrees: VertexRDD[Int]
val outDegrees: VertexRDD[Int]
val degrees: VertexRDD[Int]
package graphx import org.apache.log4j.{Level,Logger} import org.apache.spark.{SparkConf,SparkContext} import org.apache.spark.graphx._ import org.apache.spark.rdd.RDD //属性操作 object test02 { def main(args:Array[String]){ //屏蔽日志 Logger.getLogger("org.apache.spark").setLevel(Level.WARN) Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF) //设置运行环境 val conf: SparkConf = new SparkConf().setAppName("my graphx").setMaster("local") val sc = new SparkContext(conf) //TODO 构造vertextArray和edgeArray //顶点的数据类型 VD:(String,Int) val vertexArray = Array( (1L, ("Alice", 28)), (2L, ("Bob", 27)), (3L, ("Charlie", 65)), (4L, ("David", 42)), (5L, ("Ed", 55)), (6L, ("Fran", 50)) ) //边的数据类型ED:(Int) val edgeArray = Array( Edge(2L, 1L, 7), Edge(2L, 4L, 2), Edge(3L, 2L, 4), Edge(3L, 6L, 3), Edge(4L, 1L, 1), Edge(5L, 2L, 2), Edge(5L, 3L, 8), Edge(5L, 6L, 3) ) //TODO 构造vertextRDD和edgeRDD val vertextRDD: RDD[(VertexId, (String, PartitionID))] = sc.makeRDD(vertexArray) val edgeRDD: RDD[Edge[PartitionID]] = sc.makeRDD(edgeArray) //TODO 构造graph val graph= Graph(vertextRDD,edgeRDD) // TODO 1.属性展示 //TODO 节点操作 println("找出年龄大于30的点:") val vertices: VertexRDD[(String, PartitionID)] = graph.vertices val vertexResult: VertexRDD[(String, PartitionID)] = vertices.filter({ case (id, (name, age)) => age > 30 }) println("vertexResult") vertexResult.collect().foreach({ case(id,(name,age))=>println(name) }) //TODO 边操作 println("列出边属性>5的边:") val edges: EdgeRDD[PartitionID] = graph.edges //方式一 val edgeResult: RDD[Edge[PartitionID]] = edges.filter({ case Edge(src, dst, prop) => prop > 5 }) println("edgeResult") edgeResult.collect().foreach({ case Edge(src, dst, prop) => println(src,dst) }) //TODO 方式二 val edgeResult2: RDD[Edge[PartitionID]] = edges.filter(e=>e.attr>5) println("edgeResult2") edgeResult2.collect().foreach(e=>println(e.srcId,e.dstId)) ///TODO triplets操作;((srcId, srcAttr), (dstId, dstAttr), attr) println("列出边属性>5的tripltes:") val triplets: RDD[EdgeTriplet[(String, PartitionID), PartitionID]] = graph.triplets val tripletsResult: RDD[EdgeTriplet[(String, PartitionID), PartitionID]] = triplets.filter(t=>t.attr>5) tripletsResult.collect().foreach(t=>println(t.srcId,t.srcAttr,t.dstId,t.dstAttr)) //TODO Degrees操作 println("degree:") val degrees: VertexRDD[PartitionID] = graph.degrees degrees.foreach(println) println("inDegrees:") val inDegrees: VertexRDD[PartitionID] = graph.inDegrees inDegrees.foreach(println) println("outDegrees:") val outDegrees: VertexRDD[PartitionID] = graph.outDegrees outDegrees.foreach(println) println("找出图中最大的出度、入度、度数:") def max(a:(VertexId,Int),b:(VertexId,Int)):(VertexId,Int)={ if (a._2 > b._2) a else b } println("max of outDegree:"+graph.outDegrees.reduce(max)) println("max of inDegree:"+graph.inDegrees.reduce(max)) println("max of Degree:"+graph.degrees.reduce(max)) } }
posted on 2020-10-27 18:00 happygril3 阅读(65) 评论(0) 收藏 举报
浙公网安备 33010602011771号