subgraph

1. subgraph:

返回的对象是一个图,图中包含着的顶点和边分别要满足vpred和epred两个函数。

def subgraph(

    epred: EdgeTriplet[VD, ED] => Boolean = (x => true),

    vpred: (VertexId, VD) => Boolean = ((v, d) => true)

): Graph[VD, ED]

注意:

如果一个边被砍掉了,这个边关联的两个顶点并不会受影响

如果一个顶点没了,其对应的边也就没了

所以,subgraph一般用于:restrict the graph to the vertices and edges of interest或者eliminate broken links.

package graphx

import org.apache.log4j.{Level, Logger}
import org.apache.spark.graphx._
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

//结构操作
object test05 {

  def main(args:Array[String]){


    //屏蔽日志
    Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
    Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF)

    //设置运行环境
    val conf: SparkConf = new SparkConf().setAppName("my graphx").setMaster("local")
    val sc = new SparkContext(conf)

    //TODO 构造vertextArray和edgeArray

    //顶点的数据类型 VD:(String,Int)
    val vertexArray = Array(
      (1L, ("Alice", 28)),
      (2L, ("Bob", 27)),
      (3L, ("Charlie", 65)),
      (4L, ("David", 42)),
      (5L, ("Ed", 55)),
      (6L, ("Fran", 50))
    )


    //边的数据类型ED:(Int)
    val edgeArray = Array(
      Edge(2L, 1L, 7),
      Edge(3L, 2L, 4),
      Edge(3L, 6L, 3),
      Edge(5L, 2L, 2),
      Edge(5L, 3L, 8),
      Edge(5L, 6L, 3)
    )




    //TODO 构造vertextRDD和edgeRDD

    val vertextRDD: RDD[(VertexId, (String, PartitionID))] = sc.makeRDD(vertexArray)
    val edgeRDD: RDD[Edge[PartitionID]] = sc.makeRDD(edgeArray)

    //TODO 构造graph
    val graph:Graph[(String,Int),Int] = Graph(vertextRDD,edgeRDD)


    println("顶点年纪>30的子图:")
    val subGraph = graph.subgraph(
      vpred = (id, vd) => vd._2 >= 30,
      epred = (e) =>e.attr>5
    )

    println("子图所有顶点:")
    subGraph.vertices.collect.foreach(v => println(s"${v._2._1} is ${v._2._2}"))

    println("子图所有边:")
    subGraph.edges.collect.foreach(e => println(s"${e.srcId} to ${e.dstId} att ${e.attr}"))



  }

}

 

posted on 2020-10-27 18:37  happygril3  阅读(567)  评论(0)    收藏  举报

导航