spark

对hive表操作

spark-shell 进入界面

spark.sql("").show -----sql 语句

spark.table("").show 查看表

spark.sql("select guid,count(1) as num from phonec group by guid").show

image-20230308202850200

spark.sql("select guid,count(1) as num from phonec group by guid").filter("guid is not null").show

image-20230308203051032

spark.sql("select guid,count(1) as num from phonec group by guid").filter("guid is not null").write.saveAsTable("hive_table_1");

image-20230308203313712

 

然后再通过hive再把表导出即可

 

DataFream

import org.apache.hadoop.shaded.org.eclipse.jetty.websocket.common.frames.DataFrame;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;


public class TopN {
  public static void main(String[] args) {
      var spark = SparkSession.builder().appName("TopN").master("local[2]").getOrCreate();

      Dataset<Row> accessDF = spark.read().
              format("csv").
              option("header","true").
              option("encoding","utf-8").
              load("E:\\333.csv");

      //输出结构信息
      accessDF.printSchema();

      //创建一个视图
      accessDF.createOrReplaceTempView("people");

      Dataset<Row> sqlDF = spark.sql("SELECT * FROM people");

      //输出内容信息 默认20条
      sqlDF.select(sqlDF.col("id"),sqlDF.col("差评率")).show();

      //查询某列
      accessDF.select("id","好评率").show();

      //查询所有
      accessDF.show(false);


      System.out.println("-------------------");
      viodeAccess(spark,accessDF);

      spark.stop();
      //下一步连接数据库 然后把数据写到数据库中 然后数据库提取
      //还有一个就是打包成jar 然后写 怎么写
       

  }

  /**
    *
    * @param sparkSession
    * @param dataFrame
    */
  public static void viodeAccess(SparkSession sparkSession, Dataset dataFrame)
  {
      dataFrame.createOrReplaceTempView("person");

      Dataset<Row> sql = sparkSession.sql("select * from person where id = '17118937440'");

      sql.show();
  }
}
posted @ 2023-03-09 09:41  爽爽子的秃头生活  阅读(30)  评论(1)    收藏  举报