属性操作

graph包含三个基本的类集合视图

val vertices: VertexRDD[VD]

val edges: EdgeRDD[ED]

val triplets: RDD[EdgeTriplet[VD, ED]],即可理解为:RDD(srcId,srcAttr,dstId,dstAttr,attr)

图信息接口

val numEdges: Long

val numVertices: Long
val inDegrees: VertexRDD[Int]

val outDegrees: VertexRDD[Int]

val degrees: VertexRDD[Int]

package graphx

import org.apache.log4j.{Level,Logger}
import org.apache.spark.{SparkConf,SparkContext}
import org.apache.spark.graphx._
import org.apache.spark.rdd.RDD
//属性操作
object test02 {

  def main(args:Array[String]){


    //屏蔽日志
    Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
    Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF)

    //设置运行环境
    val conf: SparkConf = new SparkConf().setAppName("my graphx").setMaster("local")
    val sc = new SparkContext(conf)

    //TODO 构造vertextArray和edgeArray

    //顶点的数据类型 VD:(String,Int)
    val vertexArray = Array(
      (1L, ("Alice", 28)),
      (2L, ("Bob", 27)),
      (3L, ("Charlie", 65)),
      (4L, ("David", 42)),
      (5L, ("Ed", 55)),
      (6L, ("Fran", 50))
    )


    //边的数据类型ED:(Int)
    val edgeArray = Array(
      Edge(2L, 1L, 7),
      Edge(2L, 4L, 2),
      Edge(3L, 2L, 4),
      Edge(3L, 6L, 3),
      Edge(4L, 1L, 1),
      Edge(5L, 2L, 2),
      Edge(5L, 3L, 8),
      Edge(5L, 6L, 3)
    )




    //TODO 构造vertextRDD和edgeRDD

    val vertextRDD: RDD[(VertexId, (String, PartitionID))] = sc.makeRDD(vertexArray)
    val edgeRDD: RDD[Edge[PartitionID]] = sc.makeRDD(edgeArray)

    //TODO 构造graph
    val graph= Graph(vertextRDD,edgeRDD)



    // TODO 1.属性展示

    //TODO 节点操作
    println("找出年龄大于30的点:")
    val vertices: VertexRDD[(String, PartitionID)] = graph.vertices
    val vertexResult: VertexRDD[(String, PartitionID)] = vertices.filter({
      case (id, (name, age)) => age > 30
    })
    println("vertexResult")
    vertexResult.collect().foreach({
      case(id,(name,age))=>println(name)
    })



    //TODO 边操作
    println("列出边属性>5的边:")

    val edges: EdgeRDD[PartitionID] = graph.edges

    //方式一
    val edgeResult: RDD[Edge[PartitionID]] = edges.filter({
      case Edge(src, dst, prop) => prop > 5
    })
    println("edgeResult")
    edgeResult.collect().foreach({
      case Edge(src, dst, prop) => println(src,dst)
    })

    //TODO 方式二
    val edgeResult2: RDD[Edge[PartitionID]] = edges.filter(e=>e.attr>5)
    println("edgeResult2")
    edgeResult2.collect().foreach(e=>println(e.srcId,e.dstId))


    ///TODO triplets操作;((srcId, srcAttr), (dstId, dstAttr), attr)
    println("列出边属性>5的tripltes:")

    val triplets: RDD[EdgeTriplet[(String, PartitionID), PartitionID]] = graph.triplets
    val tripletsResult: RDD[EdgeTriplet[(String, PartitionID), PartitionID]] = triplets.filter(t=>t.attr>5)
    tripletsResult.collect().foreach(t=>println(t.srcId,t.srcAttr,t.dstId,t.dstAttr))

    //TODO Degrees操作

    println("degree:")
    val degrees: VertexRDD[PartitionID] = graph.degrees
    degrees.foreach(println)

    println("inDegrees:")
    val inDegrees: VertexRDD[PartitionID] = graph.inDegrees
    inDegrees.foreach(println)

    println("outDegrees:")
    val outDegrees: VertexRDD[PartitionID] = graph.outDegrees
    outDegrees.foreach(println)

    println("找出图中最大的出度、入度、度数:")

    def max(a:(VertexId,Int),b:(VertexId,Int)):(VertexId,Int)={
      if (a._2 > b._2) a else b
    }

    println("max of outDegree:"+graph.outDegrees.reduce(max))
    println("max of inDegree:"+graph.inDegrees.reduce(max))
    println("max of Degree:"+graph.degrees.reduce(max))

  }

}

 

 

 

posted on 2020-10-27 18:00  happygril3  阅读(65)  评论(0)    收藏  举报

导航