sparksql对oracle时间做业务处理

目前sparksql对oracle的时间支持不是很好,查询日期时间时只会显示日期而时间则不会显示,下面是自己改的两个java解决方案

方案一

ReadOracle.java
package BigDataSQL;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SQLContext;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

public class ReadOracle {
    public static void main(String[] args) {

        String driver = "oracle.jdbc.driver.OracleDriver";
        SparkConf conf = new SparkConf().setAppName("Spark03").setMaster("local");
        JavaSparkContext sc = new JavaSparkContext(conf); //其底层就是scala的sparkcontext

        SQLContext sqlContext = new SQLContext(sc);

        String url = "jdbc:oracle:thin:@//192.168.1.**8:1521/orcl";
        Map<String, String> options = new HashMap<String, String>();
        options.put("user", "test");
        options.put("password", "test");
        options.put("url", url);
        options.put("dbtable", "TM_***_UPDOWN");//数据表
        //一次取多少行数据
        options.put("fetchSize", "20");
        options.put("driver", driver);
        org.apache.spark.sql.Dataset jdbcDF = sqlContext.read().format("jdbc").options(options).load();


        List<Row> teenagerNames = jdbcDF.collectAsList();

        for (Row s : teenagerNames) {
            System.out.println(s);
        }
    }

}

方案二

ReadOracle03.java
package BigDataSQL;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.jdbc.JdbcDialect;
import org.apache.spark.sql.jdbc.JdbcDialects;
import org.apache.spark.sql.jdbc.JdbcType;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.MetadataBuilder;
import scala.Option;

import java.sql.Types;
import java.util.HashMap;
import java.util.Map;

public class ReadOracle03 {
    public static void oracleInit() {
        JdbcDialect dialect = new JdbcDialect() {

            //判断是否为oracle库
            @Override
            public boolean canHandle(String url) {
                return url.startsWith("jdbc:oracle");
            }

            //用于读取Oracle数据库时数据类型的转换
            @Override
            public Option<DataType> getCatalystType(int sqlType, String typeName, int size, MetadataBuilder md) {
                if (sqlType == Types.DATE && typeName.equals("DATE") && size == 0)
                    return Option.apply(DataTypes.TimestampType);
                return Option.empty();
            }

            //用于写Oracle数据库时数据类型的转换
            @Override
            public Option<JdbcType> getJDBCType(DataType dt) {
                if (DataTypes.StringType.sameType(dt)) {
                    return Option.apply(
                            new JdbcType("VARCHAR2(255)", Types.VARCHAR));
                } else if (DataTypes.BooleanType.sameType(dt)) {
                    return Option.apply(
                            new JdbcType("NUMBER(1)", Types.NUMERIC));
                } else if (DataTypes.IntegerType.sameType(dt)) {
                    return Option.apply(
                            new JdbcType("NUMBER(10)", Types.NUMERIC));
                } else if (DataTypes.LongType.sameType(dt)) {
                    return Option.apply(
                            new JdbcType("NUMBER(19)", Types.NUMERIC));
                } else if (DataTypes.DoubleType.sameType(dt)) {
                    return Option.apply(
                            new JdbcType("NUMBER(19,4)", Types.NUMERIC));
                } else if (DataTypes.FloatType.sameType(dt)) {
                    return Option.apply(
                            new JdbcType("NUMBER(19,4)", Types.NUMERIC));
                } else if (DataTypes.ShortType.sameType(dt)) {
                    return Option.apply(
                            new JdbcType("NUMBER(5)", Types.NUMERIC));
                } else if (DataTypes.ByteType.sameType(dt)) {
                    return Option.apply(
                            new JdbcType("NUMBER(3)", Types.NUMERIC));
                } else if (DataTypes.BinaryType.sameType(dt)) {
                    return Option.apply(
                            new JdbcType("BLOB", Types.BLOB));
                } else if (DataTypes.TimestampType.sameType(dt)) {
                    return Option.apply(
                            new JdbcType("DATE", Types.DATE));
                } else if (DataTypes.DateType.sameType(dt)) {
                    return Option.apply(
                            new JdbcType("DATE", Types.DATE));
                } else if (DataTypes.createDecimalType()
                        .sameType(dt)) { //unlimited
/*                    return DecimalType.Fixed(precision, scale)
                            =>Some(JdbcType("NUMBER(" + precision + "," + scale + ")",
                            java.sql.Types.NUMERIC))*/
                    return Option.apply(
                            new JdbcType("NUMBER(38,4)", Types.NUMERIC));
                }
                return Option.empty();
            }
        };
        //注册此方言
        JdbcDialects.registerDialect(dialect);
//        JdbcDialects.unregisterDialect(dialect);
    }


    public static void main(String[] args) {
        SparkConf conf = new SparkConf();
        conf.setAppName("Test");
        //设置启动方式为本地方式
        conf.setMaster("local[2]");
        String driver="oracle.jdbc.driver.OracleDriver";
        String url = "jdbc:oracle:thin:@//192.168.1.**8:1521/orcl";
        JavaSparkContext sc = new JavaSparkContext(conf);

        SQLContext sqlContext = new SQLContext(sc);
        oracleInit();
       // System.out.println("================" + sqlContext.getSQLDialect());

        Map<String, String> options = new HashMap<String, String>();
        options.put("user", "test");
        options.put("password", "test");
        options.put("url", url);
        options.put("dbtable", "TM_****UPDOWN");//数据表
        //一次取多少行数据
        options.put("AAAA", "20");
        options.put("driver", driver);
        org.apache.spark.sql.Dataset jdbcDF = sqlContext.read().format("jdbc").options(options).load();

        jdbcDF.createOrReplaceTempView("AAA03");
        org.apache.spark.sql.Dataset    AAAA03 = sqlContext.sql("select  site_time ,line_no,count(*) Num from AAA03 where  site_time  BETWEEN '2018-02-01 12:30:00' and  '2018-02-01 12:40:00' GROUP BY site_time ,line_no");

        AAAA03.show();
        //jdbcDF.show(false);
    }

}

方案二查询的结果,建议使用方案二

 

posted @ 2018-06-01 11:03  努力中国  阅读(965)  评论(0)    收藏  举报