DataFrameOpsFromJsonRdd

package com.bjsxt.scala.spark.sql.createdf

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.Column
import org.apache.spark.sql.types.DataTypes
import java.util.List
import org.apache.spark.sql.Column
import org.apache.spark.sql.Column
import org.apache.spark.sql.Column

object DataFrameOpsFromJsonRdd {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf() //创建sparkConf对象
    conf.setAppName("My First Spark App") //设置应用程序的名称,在程序运行的监控页面可以看到名称
    conf.setMaster("local")
    val sc = new SparkContext(conf)    
    val sqlContext = new SQLContext(sc)    
    val infos = Array("{'name':'zhangsan', 'age':55}","{'name':'lisi', 'age':30}","{'name':'wangwu', 'age':19}")
    val scores = Array("{'name':'zhangsan', 'score':155}","{'name':'lisi', 'score':130}")
    /**
      * infoRdd  jsonRDD
      */
    val infoRdd = sc.parallelize(infos)
    val scoreRdd = sc.parallelize(scores)    
    val infoDF = sqlContext.read.json(infoRdd)
    val scoreDF = sqlContext.read.json(scoreRdd)    
//    infoDF.registerTempTable("people")    
    infoDF.join(scoreDF, infoDF("name")===(scoreDF("name"))).select(infoDF("name"),infoDF("age"),scoreDF("score")).show()
    infoDF.registerTempTable("info")
    scoreDF.registerTempTable("score")
    val sql = "SELECT a.name,a.age,b.score FROM info a JOIN score b ON (a.name=b.name)"
    sqlContext.sql(sql).show()    
    sc.stop()
   /* df.show(
    df.printSchema()
    df.select("name").show()
    df.select(df("name"), df("age")+10).show()
    df.filter(df("age")>10).show()
    df.groupBy("age").count.show()*/
  }
}

  

posted @ 2018-06-23 16:40  uuhh  阅读(72)  评论(0)    收藏  举报