DataFrameTest

package com.bjsxt.sparksql.dataframe;


import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.VoidFunction;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SQLContext;

import scala.Function1;
import scala.runtime.BoxedUnit;

public class DataFrameTest {
	public static void main(String[] args) {
		SparkConf conf = new SparkConf();
		conf.setMaster("local").setAppName("RDD");
		JavaSparkContext sc = new JavaSparkContext(conf);
		SQLContext sqlContext = new SQLContext(sc);
		JavaRDD<String> lineRDD = sc.textFile("sparksql/person.txt");
		
		JavaRDD<Person> personRDD = lineRDD.map(new Function<String, Person>() {
			private static final long serialVersionUID = 1L;

			@Override
			public Person call(String line) throws Exception {
				Person p = new Person();
				p.setId(line.split(",")[0]);
				p.setName(line.split(",")[1]);
				p.setAge(Integer.valueOf(line.split(",")[2]));
				return p;
			}
		});
		/**
		 * 传入进去Person.class的时候,sqlContext是通过反射的方式创建DataFrame
		 * 在底层通过反射的方式获得Person的所有field,结合RDD本身,就生成了DataFrame
		 */
		DataFrame df = sqlContext.createDataFrame(personRDD, Person.class);
		df.show();
		df.printSchema();
		df.registerTempTable("person");
		DataFrame resultDataFrame = sqlContext.sql("select  name,age,id from person where id = 2");
		JavaRDD<Row> javaRDD = resultDataFrame.javaRDD();
		/**
		 * 自己写的sql语句查询出来的DataFrame显示表的时候会安装查询的字段来显示,字段不会按照Ascii码来排序
		 */
		javaRDD.foreach(new VoidFunction<Row>() {
			private static final long serialVersionUID = 1L;

			@Override
			public void call(Row row) throws Exception {
				System.out.println("name = "+ row.getAs(0));
				System.out.println("name = "+ row.getAs("name"));
				System.out.println("name = "+ row.getString(0));
				System.out.println("age = "+ row.getAs(1));
				System.out.println("age = "+ row.getAs("age"));
				System.out.println("age = "+ row.getInt(1));
				System.out.println("id = "+ row.getAs(2));
				System.out.println("id = "+ row.getAs("id"));
				System.out.println("id = "+ row.getString(2));
			}
		});
//		/**
//		 * 将DataFrame转成JavaRDD
//		 * 注意:
//		 * 1.可以使用row.getInt(0),row.getString(1)...通过下标获取返回Row类型的数据,但是要注意列顺序问题---不常用
//		 * 2.可以使用row.getAs("列名")来获取对应的列值。
//		 * 
//		 */
//		JavaRDD<Row> javaRDD = df.javaRDD();
//		JavaRDD<Person> map = javaRDD.map(new Function<Row, Person>() {
//			private static final long serialVersionUID = 1L;
//
//			@Override
//			public Person call(Row row) throws Exception {
//				Person p = new Person();
//				
//				
////				p.setId(row.getString(0));
////				p.setName(row.getString(1));
////				p.setAge(row.getInt(2));
//				
////				p.setId(row.getString(1));
////				p.setName(row.getString(2));
////				p.setAge(row.getInt(0));
//				
//				p.setId((String)row.getAs("id"));
//				p.setName((String)row.getAs("name"));
//				p.setAge((Integer)row.getAs("age"));
//				return p;
//			}
//		});
//		map.foreach(new VoidFunction<Person>() {
//			private static final long serialVersionUID = 1L;
//
//			@Override
//			public void call(Person t) throws Exception {
//				System.out.println(t);
//			}
//		});
		
		sc.stop();
	}
}

  

posted @ 2018-06-18 13:39  uuhh  阅读(1)  评论(0)    收藏  举报