spark
spark-shell 进入界面
spark.sql("").show -----sql 语句
spark.table("").show 查看表
spark.sql("select guid,count(1) as num

spark.sql("select guid,count(1) as num from phonec group by guid").filter("guid is not null").show

spark.sql("select guid,count(1) as num from phonec group by guid").filter("guid is not null").write.saveAsTable("hive_table_1");

然后再通过hive再把表导出即可
DataFream
import org.apache.hadoop.shaded.org.eclipse.jetty.websocket.common.frames.DataFrame;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
public class TopN {
public static void main(String[] args) {
var spark = SparkSession.builder().appName("TopN").master("local[2]").getOrCreate();
Dataset<Row> accessDF = spark.read().
format("csv").
option("header","true").
option("encoding","utf-8").
load("E:\\333.csv");
//输出结构信息
accessDF.printSchema();
//创建一个视图
accessDF.createOrReplaceTempView("people");
Dataset<Row> sqlDF = spark.sql("SELECT * FROM people");
//输出内容信息 默认20条
sqlDF.select(sqlDF.col("id"),sqlDF.col("差评率")).show();
//查询某列
accessDF.select("id","好评率").show();
//查询所有
accessDF.show(false);
System.out.println("-------------------");
viodeAccess(spark,accessDF);
spark.stop();
//下一步连接数据库 然后把数据写到数据库中 然后数据库提取
//还有一个就是打包成jar 然后写 怎么写
}
/**
*
* @param sparkSession
* @param dataFrame
*/
public static void viodeAccess(SparkSession sparkSession, Dataset dataFrame)
{
dataFrame.createOrReplaceTempView("person");
Dataset<Row> sql = sparkSession.sql("select * from person where id = '17118937440'");
sql.show();
}
}

浙公网安备 33010602011771号