spark-excel使用hdfs文件上传


package com.byd.bigdata.spark.job.cyb;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;

import java.io.*;
import java.net.URL;
import java.security.PrivilegedAction;
import java.util.ArrayList;
import java.util.List;


public class CsvUpload {
    public static void main(String[] args) {
//        if (args.length <= 3) {
//            System.out.println("参数个数不足，请参考参数");
//            return;
//        }
        readExcel2Csv();

    }


    public static void readExcel2Csv() {
        SparkSession spark = SparkSession.builder().appName("test").master("local[*]").getOrCreate();
        //定义表头结构
//        List<StructField> resStructFields = new ArrayList<>();
//        resStructFields.add(DataTypes.createStructField("id1", DataTypes.StringType, false));
//        resStructFields.add(DataTypes.createStructField("name", DataTypes.StringType, false));
//        resStructFields.add(DataTypes.createStructField("sex", DataTypes.StringType, false));
//
//        StructType schema = DataTypes.createStructType(resStructFields);
        String srcPath = "file:///D:/test.xlsx";
        Dataset<org.apache.spark.sql.Row> load = spark.read()
                .format("com.crealytics.spark.excel")
                .option("header", "true")
                .option("useHeader", "false")
                .option("sheetName", "Sheet1")
                .option("treatEmptyValuesAsNulls", "true")
                .option("inferSchema", "true")
                .option("addColorColumns", "False")
//                .schema(schema)
                .load(srcPath);

        String objPath = "file:///D:/test" + System.currentTimeMillis();
        load.coalesce(1).write().mode(SaveMode.Overwrite)
                .option("header", "false")
                .option("encoding", "utf-8")
                .option("timestampFormat", "yyyy/MM/dd HH:mm:ss ZZ")
                .csv(objPath);

        load.show(5);
        Configuration localConf = new Configuration();
        FileSystem local = null;
        List<Path> upPath = new ArrayList<>();

        try {
            local = FileSystem.getLocal(localConf);
            RemoteIterator<LocatedFileStatus> localSta = local.listFiles(new Path(objPath), true);
            while (localSta.hasNext()) {
                LocatedFileStatus file = localSta.next();
                String fileName = file.getPath().getName();
                long len = file.getLen();
                if (len == 0 || fileName.contains("_SUCCESS")) {
                    System.out.println("not up load " + fileName);
                } else {
                    System.out.println("------------------this is my want " + file.getPath());
                    upPath.add(file.getPath());
                }
            }


        } catch (IOException e) {
            throw new RuntimeException(e);
        }

//      hdfs file upload 
        uploadFile(upPath, "test");

    }

    public static void uploadFile(List<Path> upPath, String tableName) {
        URL uri = Thread.currentThread().getContextClassLoader().getResource("");
        String basePath = uri.getPath();
        Configuration conf = new Configuration();
        conf.addResource(basePath + "hadoop/prod/hdfs-site.xml");
        conf.addResource(basePath + "hadoop/prod/core-site.xml");
        conf.addResource(basePath + "hadoop/prod/yarn-site.xml");

        System.setProperty("java.security.krb5.conf", basePath + "prod/krb5.conf");

        UserGroupInformation.setConfiguration(conf);

        try {
            UserGroupInformation.loginUserFromKeytab("ic.bigdata", basePath + "prod/ic.bigdata.keytab");
            UserGroupInformation.getLoginUser().doAs(new PrivilegedAction<Object>() {

                @Override
                public Object run() {
                    FileSystem fs = null;
                    try {
                        fs = FileSystem.newInstance(conf);
                        Path remotePath = new Path("/external/hive/ods.db/" + tableName);
                        System.out.println("==start upload ====");
                        fs.copyFromLocalFile(upPath.get(0), remotePath);
                        System.out.println("==end  upload ====");

                    } catch (IOException e) {
                        throw new RuntimeException(e);
                    } finally {
                        try {
                            fs.close();
                        } catch (IOException e) {
                            throw new RuntimeException(e);
                        }
                    }
                    return null;
                }
            });


        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }


}


```java
posted @ 2023-03-20 14:45 堕落先锋阅读(65) 评论(0) 编辑收藏举报
乌云散尽

spark-excel使用hdfs文件上传

公告