HDFS API相关操作，大文件上传、合并、删除、修改、查看，复制、移动等相关操作

文件合并，大文件IOUtils等的操作，本地文件系统的获取，文件合并上传，合并下载等操作
package com.byd.bigdata.spark.job;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.Progressable;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.catalyst.encoders.RowEncoder;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;

import java.io.*;
import java.net.URL;
import java.security.PrivilegedAction;
import java.util.ArrayList;
import java.util.UUID;

public class RoadCsvRead2Ods {
    public static void main(String[] args) {
//        String filePath =args[0];
//        if(filePath == null){
//            filePath = "/data/bigdatamgr/blims/20220816/HCEC-2335-20220719-153805-164659.csv";
//        }
//        read2Ods(filePath);
        getAllFiles();
    }


    public static void getAllFiles() {
        //1、kerberos验证
        URL resource = Thread.currentThread().getContextClassLoader().getResource("");
        String basePath = resource.getPath();
        Configuration conf = new Configuration();
        String keyTab = basePath + "kerberos/prod/ic.bigdatamgr.keytab";
        String krb5 = basePath + "kerberos/prod/krb5.conf";
        String priciple = "ic.bigdatamgr";

        conf.set("", krb5);
        UserGroupInformation.setConfiguration(conf);
        try {
            UserGroupInformation.loginUserFromKeytab(priciple, keyTab);
            UserGroupInformation loginUser = UserGroupInformation.getLoginUser();
            loginUser.doAs(new PrivilegedAction<Object>() {
                @Override
                public Object run() {
                    conf.addResource(basePath + "hadoop/prod/core-site.xml");
                    conf.addResource(basePath + "hadoop/prod/hdfs-site.xml");
                    conf.addResource(basePath + "hadoop/prod/yarn-site.xml");
                    try {
                        FileSystem fs = FileSystem.newInstance(conf);
                        FileContext fc = FileContext.getFileContext(conf);
                        LocalFileSystem localFileSystem = FileSystem.getLocal(conf);
                        FileContext.Util util = fc.util();// 集群上的复制文件或者移动文件

                        //一、下载操作
                        downLoad(fs, localFileSystem, fc);
                        fs.close();


                    } catch (IOException e) {
                        throw new RuntimeException(e);
                    }
                    return null;
                }
            });
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    public void cat(FileSystem fs) throws Exception {
        FSDataInputStream inputStream = fs.open(new Path("/hdfsapi/test3/a.txt"));
        IOUtils.copyBytes(inputStream, System.out, 1024);
        inputStream.close();

    }

    /**
     * 文件重命名
     *
     * @throws Exception
     */
    public void rename(FileSystem fs) throws Exception {
        Path oldPath = new Path("/hdfsapi/test3/a.txt");
        Path newPath = new Path("/hdfsapi/test3/b.txt");
        fs.rename(oldPath, newPath);
    }

    // 大文件上传
    public void copyFromLocalFileWithProgress(FileSystem fs) throws Exception {
        InputStream in = new BufferedInputStream(new FileInputStream(new File("D:\\迅雷下载\\用1.mp4")));
        FSDataOutputStream outputStream = fs.create(new Path("/hdfsapi/test/gakki.mp4"),
                new Progressable() {
                    @Override
                    public void progress() {
                        System.out.print(".");   // 带进度提醒
                    }
                });
        IOUtils.copyBytes(in, outputStream, 4096);
    }

    public static void upload(FileSystem fs, FileSystem local) {
        Path targetPath = new Path("/data/bigdatamgr/blims_ori/blf/20220804/SA3HF-8697-20220619-235730-235800.blf");
        Path srcPath = new Path("D:\\test\\a.txt");


//1. 最简单的
        try {
            fs.copyFromLocalFile(false, false, srcPath, targetPath);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
//2.遍历目录，IOUtils.copyBytes(),顺序复制,通过获取本地文件系统或者直接new FileInputStream()的方式实现 2种
        try {
            //获取本地文件系统
            LocalFileSystem localFs = FileSystem.getLocal(new Configuration());
            if (localFs.isDirectory(srcPath)) {
                RemoteIterator<LocatedFileStatus> lsfr = localFs.listFiles(srcPath, true);
                while (lsfr.hasNext()) {
                    LocatedFileStatus next = lsfr.next();
                    FSDataInputStream fin = fs.open(next.getPath());
                    FSDataOutputStream fout = fs.create(new Path("/tmp/data/" + next.getPath().getName()));
                    IOUtils.copyBytes(fin, fout, 4096);
                    fin.close();
                    fout.close();
                }
            } else {
//                FSDataInputStream fin = fs.open(srcPath);
                InputStream fin = new FileInputStream("D://a.txt");
                FSDataOutputStream fout = fs.create(targetPath);
                IOUtils.copyBytes(fin, fout, 4096);
                fin.close();
                fout.close();
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        }

//针对大文件上传
        InputStream in = null;
        try {
            in = new BufferedInputStream(new FileInputStream(new File("D:\\迅雷下载\\用1.mp4")));
            FSDataOutputStream outputStream = fs.create(new Path("/hdfsapi/test/gakki.mp4"),
                    new Progressable() {
                        @Override
                        public void progress() {
                            System.out.print(".");   // 带进度提醒
                        }
                    });
            IOUtils.copyBytes(in, outputStream, 4096);
        } catch (FileNotFoundException e) {
            throw new RuntimeException(e);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }


    }

    //下载
    public static void downLoad(FileSystem fs, FileSystem local, FileContext fc) {
        Path srcPath = new Path("/data/bigdatamgr/blims_ori/blf/20220804/SA3HF-8697-20220619-235730-235800.blf");
        Path targetPath = new Path("D:\\test\\a.txt");

//                        1.copytToLocalFile
        try {
            fs.copyToLocalFile(false, srcPath, targetPath);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
//                        2.字节流的复制
        FileOutputStream localFileOut = null;
        FSDataInputStream remoteFin = null;
        try {
            remoteFin = fs.open(srcPath);
            localFileOut = new FileOutputStream("D:\\test\\a.txt");
            IOUtils.copyBytes(remoteFin, localFileOut, 2048);
            remoteFin.close();
            localFileOut.close();
        } catch (FileNotFoundException e) {
            throw new RuntimeException(e);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }


//        4.下载前做合并，
        try {
            LocalFileSystem localFs = FileSystem.getLocal(new Configuration());
            FSDataOutputStream fsdOut = localFs.create(targetPath);
            if (fs.isDirectory(srcPath)) {
                RemoteIterator<LocatedFileStatus> lfsr = fs.listFiles(srcPath, true);
                while (lfsr.hasNext()) {
                    LocatedFileStatus next = lfsr.next();
                    FSDataInputStream fdIn = fs.open(next.getPath());
                    IOUtils.copyBytes(fdIn, fsdOut, 2048);
                    fdIn.close();
                }
                fsdOut.close();
            } else {
                FSDataInputStream fsIn = fs.open(srcPath);
                IOUtils.copyBytes(fsIn, fsdOut, 2048);
                fsIn.close();
                fsdOut.close();
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }


    // cluster merge small file
    public static void mergeFile(FileSystem fs) {
        Path targetPath = new Path("/tmp/merger");
        Path srcPath = new Path("/tmp/files");
        try {
            FSDataOutputStream fout = fs.create(targetPath);
            if (fs.isDirectory(srcPath)) {
                RemoteIterator<LocatedFileStatus> lsfr = fs.listFiles(srcPath, true);
                while (lsfr.hasNext()) {
                    LocatedFileStatus next = lsfr.next();
                    FSDataInputStream fin = fs.open(next.getPath());
                    IOUtils.copyBytes(fin, fout, 4096);
                    fin.close();
                }
            }
            fout.close();
            fs.close();
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    public static void copyOrMoveFiles() {
        Path targetPath = new Path("/tmp/merger");
        Path srcPath = new Path("/tmp/files");

        Configuration conf = new Configuration();
        conf.addResource("basedir/hdfs-site.xml");
        conf.addResource("basedir/core-site.xml");
        try {
//            FileSystem fs = FileSystem.newInstance(conf);
            FileContext fc = FileContext.getFileContext(conf);
            FileContext.Util util = fc.util();
            util.copy(srcPath, targetPath, false, false);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }

    }

    public static void read2Ods(String filePath) {
        SparkSession spark = SparkSession.builder().enableHiveSupport().appName("readCsv2OdsJob")
                .master("yarn")
                .enableHiveSupport()
                .getOrCreate();
        Dataset<Row> ds = spark.read()
                .option("header", "true")
                .option("delimiter", ",")
                .csv(filePath);
        ArrayList<StructField> resStructType = new ArrayList<>();
        resStructType.add(DataTypes.createStructField("id", DataTypes.StringType, true));
        resStructType.add(DataTypes.createStructField("car_vin", DataTypes.StringType, true));
        resStructType.add(DataTypes.createStructField("s_id", DataTypes.StringType, true));
        resStructType.add(DataTypes.createStructField("channel", DataTypes.StringType, true));
        resStructType.add(DataTypes.createStructField("millisecond", DataTypes.StringType, true));
        resStructType.add(DataTypes.createStructField("micro_sec", DataTypes.StringType, true));
        resStructType.add(DataTypes.createStructField("s_data", DataTypes.StringType, true));

        StructType structType = DataTypes.createStructType(resStructType);


        Dataset<Row> transDs = ds.map(new MapFunction<Row, Row>() {
            @Override
            public Row call(Row row) throws Exception {
                String id = UUID.randomUUID().toString();
                String car_vin = row.getAs("car_vin");
                String s_id = row.getAs("s_id");
                String channel = row.getAs("channel");
                String millisecond = row.getAs("millisecond");
                String micro_sec = row.getAs("micro_sec");
                String s_data = row.getAs("s_data");
                Row row1 = RowFactory.create(id, car_vin, s_id, channel, millisecond, micro_sec, s_data);
                return row1;
            }
        }, RowEncoder.apply(structType));
        transDs.show(10);
//        transDs.write().mode(SaveMode.Append).saveAsTable("");
//        transDs.write().mode(SaveMode.Overwrite).insertInto("");


    }


    public static void loadCsv() {
        SparkSession spark = SparkSession.builder().appName("Move").enableHiveSupport().getOrCreate();
        JavaSparkContext jsc = new JavaSparkContext(spark.sparkContext());
        Configuration conf = jsc.hadoopConfiguration();
        try {
            FileSystem fs = FileSystem.get(conf);
            Path path = new Path("/tmp/blims");
//            boolean directory = fs.isDirectory();
//            getExistsFile(fs,path);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }

    }


}


```java
posted @ 2022-08-19 14:15 堕落先锋阅读(176) 评论(0) 编辑收藏举报
乌云散尽

HDFS API相关操作，大文件上传、合并、删除、修改、查看，复制、移动等相关操作

公告