package com.byd.bigdata.spark.job.cyb;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import java.io.*;
import java.net.URL;
import java.security.PrivilegedAction;
import java.util.ArrayList;
import java.util.List;
public class CsvUpload {
public static void main(String[] args) {
// if (args.length <= 3) {
// System.out.println("参数个数不足,请参考参数");
// return;
// }
readExcel2Csv();
}
public static void readExcel2Csv() {
SparkSession spark = SparkSession.builder().appName("test").master("local[*]").getOrCreate();
//定义表头结构
// List<StructField> resStructFields = new ArrayList<>();
// resStructFields.add(DataTypes.createStructField("id1", DataTypes.StringType, false));
// resStructFields.add(DataTypes.createStructField("name", DataTypes.StringType, false));
// resStructFields.add(DataTypes.createStructField("sex", DataTypes.StringType, false));
//
// StructType schema = DataTypes.createStructType(resStructFields);
String srcPath = "file:///D:/test.xlsx";
Dataset<org.apache.spark.sql.Row> load = spark.read()
.format("com.crealytics.spark.excel")
.option("header", "true")
.option("useHeader", "false")
.option("sheetName", "Sheet1")
.option("treatEmptyValuesAsNulls", "true")
.option("inferSchema", "true")
.option("addColorColumns", "False")
// .schema(schema)
.load(srcPath);
String objPath = "file:///D:/test" + System.currentTimeMillis();
load.coalesce(1).write().mode(SaveMode.Overwrite)
.option("header", "false")
.option("encoding", "utf-8")
.option("timestampFormat", "yyyy/MM/dd HH:mm:ss ZZ")
.csv(objPath);
load.show(5);
Configuration localConf = new Configuration();
FileSystem local = null;
List<Path> upPath = new ArrayList<>();
try {
local = FileSystem.getLocal(localConf);
RemoteIterator<LocatedFileStatus> localSta = local.listFiles(new Path(objPath), true);
while (localSta.hasNext()) {
LocatedFileStatus file = localSta.next();
String fileName = file.getPath().getName();
long len = file.getLen();
if (len == 0 || fileName.contains("_SUCCESS")) {
System.out.println("not up load " + fileName);
} else {
System.out.println("------------------this is my want " + file.getPath());
upPath.add(file.getPath());
}
}
} catch (IOException e) {
throw new RuntimeException(e);
}
// hdfs file upload
uploadFile(upPath, "test");
}
public static void uploadFile(List<Path> upPath, String tableName) {
URL uri = Thread.currentThread().getContextClassLoader().getResource("");
String basePath = uri.getPath();
Configuration conf = new Configuration();
conf.addResource(basePath + "hadoop/prod/hdfs-site.xml");
conf.addResource(basePath + "hadoop/prod/core-site.xml");
conf.addResource(basePath + "hadoop/prod/yarn-site.xml");
System.setProperty("java.security.krb5.conf", basePath + "prod/krb5.conf");
UserGroupInformation.setConfiguration(conf);
try {
UserGroupInformation.loginUserFromKeytab("ic.bigdata", basePath + "prod/ic.bigdata.keytab");
UserGroupInformation.getLoginUser().doAs(new PrivilegedAction<Object>() {
@Override
public Object run() {
FileSystem fs = null;
try {
fs = FileSystem.newInstance(conf);
Path remotePath = new Path("/external/hive/ods.db/" + tableName);
System.out.println("==start upload ====");
fs.copyFromLocalFile(upPath.get(0), remotePath);
System.out.println("==end upload ====");
} catch (IOException e) {
throw new RuntimeException(e);
} finally {
try {
fs.close();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
return null;
}
});
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
```java