建图

 

 

 

 

 

方式一

package graphx

import org.apache.log4j.{Level, Logger}
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.graphx._
import org.apache.spark.rdd.RDD

import scala.io.BufferedSource

// 字符串转码,解决乱码问题
import java.nio.charset.StandardCharsets

import scala.io.Source
import collection.mutable.ArrayBuffer

object test09 {
  def main(args: Array[String]): Unit = {

    //屏蔽日志
    Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
    Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF)


    val conf: SparkConf = new SparkConf().setAppName("my scala").setMaster("local")
    val sc = new SparkContext(conf)



    val verticsLines: RDD[String] = sc.textFile("in/vertecs.txt")
    println("edgeLines")
    verticsLines.collect().foreach(println)

    //只有一种属性
    val vertextRDD: RDD[(VertexId, String)] = verticsLines.map({
      line =>
        val strings: Array[String] = line.split(",")
        val id: VertexId = strings(0).toLong
        val name: String = strings(1)

        (id, name)

    })


    println("verticsRDD")
    vertextRDD.collect().foreach(println)

    val edgeLines: RDD[String] = sc.textFile("in/relation.txt")
    println("edgeLines")
    edgeLines.collect().foreach(println)



    val edgeRDD: RDD[Edge[String]] = edgeLines.map({
      line =>
        val strings: Array[String] = line.split(",")
        val src: VertexId = strings(0).toLong
        val dst: VertexId = strings(1).toLong
        val attr: String = strings(2)
        Edge(src, dst, attr)

    })

    println("edgeRDD")
    edgeRDD.collect().foreach(println)
    val graph = Graph(vertextRDD,edgeRDD)
    println("graph")
    graph.vertices.collect().foreach(println)
    graph.edges.collect().foreach(println)
    graph.triplets.collect().foreach(println)



  }

}

 

package graphx

import org.apache.log4j.{Level, Logger}
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.graphx._

object intopiece {

  def main(args: Array[String]): Unit = {

    //屏蔽日志
    Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
    Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF)

    val conf: SparkConf = new SparkConf().setAppName("my intopiece").setMaster("local")
    val sc = new SparkContext(conf)

    //节点
    val intopiece: RDD[String] = sc.textFile("in/intopieces.txt")
    val intopieceRDD: RDD[(VertexId, VertexId, VertexId)] = intopiece.map({
      line =>
        val strings: Array[String] = line.split(",")
        val id: VertexId = strings(0).toLong
        val black: VertexId = strings(1).toLong
        val grey: VertexId = strings(2).toLong
        (id, black, grey)
    })


    println("intopieceRDD")
    intopieceRDD.collect().foreach(println)
    //有两种属性
    val intopieceRDD2: RDD[(VertexId, (VertexId, VertexId))] = intopieceRDD.map {
      case (id, black, grey) => (id, (black, grey))
    }





    val relation: RDD[String] = sc.textFile("in/into_relation.txt")
    val relationRDD: RDD[Edge[String]] = relation.map({
      line =>
        val strings: Array[String] = line.split(",")
        val src: VertexId = strings(0).toLong
        val dst: VertexId = strings(1).toLong
        val attr: String = strings(2)
        Edge(src, dst, attr)
    })

    println("relationRDD")
    relationRDD.collect().foreach(println)
    val graph = Graph(intopieceRDD2,relationRDD)
    println("graph:")
    graph.vertices.collect().foreach(println)
    graph.edges.collect().foreach(println)



  }

}

 

方式二

package graphx
import org.apache.spark.graphx._

import scala.io.BufferedSource
// To make some of the examples work we will also need RDD
import org.apache.spark.rdd.RDD
import org.apache.spark.SparkContext


import org.apache.log4j.{Level, Logger}
import org.apache.spark.SparkConf


import scala.io.Source
import collection.mutable.ArrayBuffer
object test10 {
  def main(args: Array[String]) {

    //屏蔽日志
    Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
    Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF)

    val conf = new SparkConf().setAppName("Simple Application").setMaster("local")
    val sc = new SparkContext(conf)

    val vertexArr = new ArrayBuffer[(Long, String)]()
    val edgeArr = new ArrayBuffer[Edge[String]]()

    // 读入时指定编码
    val sourceV: BufferedSource = Source.fromFile("in/vertecs.txt", "UTF-8")
    val lines: Iterator[String] = sourceV.getLines()


    while (lines.hasNext) {
      val pp = lines.next().split(",")
      vertexArr += ((pp(0).toLong, pp(1)))
    }
    println(vertexArr.length)

    val sourceE: BufferedSource = Source.fromFile("in/relation.txt", "UTF-8")
    val linesE: Iterator[String] = sourceE.getLines()

    while (linesE.hasNext) {
      val ee = linesE.next().split(",")
      edgeArr += Edge(ee(0).toLong, ee(1).toLong, ee(2))
    }

    // 创建点RDD
    val users: RDD[(VertexId, String)] = sc.parallelize(vertexArr)

    // 创建边RDD
    val relationships: RDD[Edge[String]] = sc.parallelize(edgeArr)

    // 定义一个默认用户,避免有不存在用户的关系
    val graph = Graph(users, relationships)

    // 输出Graph的信息
    graph.vertices.collect().foreach(println(_))
  }

}

 

posted on 2020-10-28 15:28  happygril3  阅读(68)  评论(0)    收藏  举报

导航