Java中一些文件操作常用转换

工作中偶尔遇到一些操作文件时，需要转换文件格式的情况，以下为一些常用操作：

　　1、上传文件时将图片转pdf再上传

@SneakyThrows
    public static void main(String[] args) {
        
        File file = new File("C:\\Users\\Administrator\\Downloads\\1.jpg");
        FileInputStream fileInputStream;
        MultipartFile multipartFile = null;
        try {
            fileInputStream = new FileInputStream(file);
            multipartFile = new MockMultipartFile(file.getName(),file.getName(),
                    ContentType.APPLICATION_OCTET_STREAM.toString(),fileInputStream);
        } catch (Exception e) {
            log.error("file转MultipartFile失败", e);
        }
        MultipartFile[] multipartFiles = {multipartFile};
        byte[] bytes = getPdfBytes(multipartFiles);
        System.out.println(Base64.getEncoder().encodeToString(bytes));
        InputStream inputStream = new ByteArrayInputStream(bytes);
        MultipartFile mfile = new MockMultipartFile(ContentType.APPLICATION_OCTET_STREAM.toString(), inputStream);
        //...以下为上传部分
    }


    @SneakyThrows
    public static byte[] getPdfBytes(MultipartFile[] imagesFiles) {
        PDDocument document = new PDDocument();
        for (MultipartFile datum : imagesFiles) {
            String filename = datum.getOriginalFilename();
            String fileSuffix = filename.substring(filename.lastIndexOf(".") + 1);
            Iterator readers = ImageIO.getImageReadersByFormatName(fileSuffix);
            ImageReader reader = (ImageReader) readers.next();
            ImageInputStream input = ImageIO.createImageInputStream(datum.getInputStream());
            reader.setInput(input, true);
            int width = reader.getWidth(0);
            int height = reader.getHeight(0);
            PDPage pdPage = new PDPage(new PDRectangle(width, height));
            document.addPage(pdPage);
            PDImageXObject pdImageXObject = PDImageXObject.createFromByteArray(document, datum.getBytes(), "图片转pdf失败");
            PDPageContentStream contentStream = new PDPageContentStream(document, pdPage);
            //写入图片
            contentStream.drawImage(pdImageXObject, 0, 0);
            contentStream.close();
        }
        ByteArrayOutputStream output = new ByteArrayOutputStream();
        document.save(output);
        document.close();
        return output.toByteArray();
    }

　　2、将pdf转换成jpg

@SneakyThrows
    public static void main(String[] args) {
        String filePath = "C:\\Users\\Administrator\\Downloads\\202xxxxx税票.pdf"; // 替换为你的文件路径
        byte[] fileContent = Files.readAllBytes(Paths.get(filePath));
        String base64 = Base64.getEncoder().encodeToString(fileContent);
        String jpg_base64 = base64pdftojpg(base64);
        System.out.println(jpg_base64);
}

　　方法传入base64的pdf文件得到base64类型的jpg

private static String base64pdftojpg(String base64) {
        String jpg_base64 = null;
        Base64Decoder decoder = new Base64Decoder();
        try {
            // Base64解码
            byte[] pdf_bytes = decoder.decode(base64);
            PDDocument doc = PDDocument.load(pdf_bytes);
            int size = doc.getNumberOfPages();
            /*图像合并使用的参数*/
            //定义宽度
            int width = 0;
            // 保存一张图片中的RGB数据
            int[] singleImgRGB;
            // 定义高度，后面用于叠加
            int shiftHeight = 0;
            //保存每张图片的像素值
            BufferedImage imageResult = null;
            // 利用PdfBox生成图像
            PDDocument pdDocument = doc;
            PDFRenderer renderer = new PDFRenderer(pdDocument);
            /*根据总页数, 按照50页生成一张长图片的逻辑, 进行拆分*/
            // 每50页转成1张图片
            int pageLength = size; //有多少转多少
            // 总计循环的次数
            int totalCount = pdDocument.getNumberOfPages() / pageLength + 1;
            for (int m = 0; m < totalCount; m++) {
                for (int i = 0; i < pageLength; i++) {
                    int pageIndex = i + (m * pageLength);
                    if (pageIndex == pdDocument.getNumberOfPages()) {
                        break;
                    }
                    // 96为图片的dpi，dpi越大，则图片越清晰，图片越大，转换耗费的时间也越多
                    BufferedImage image = renderer.renderImageWithDPI(pageIndex, 106, ImageType.RGB);
                    int imageHeight = image.getHeight();
                    int imageWidth = image.getWidth();
                    if (i == 0) {
                        //计算高度和偏移量
                        //使用第一张图片宽度;
                        width = imageWidth;
                        // 保存每页图片的像素值
                        // 加个判断：如果m次循环后所剩的图片总数小于pageLength，则图片高度按剩余的张数绘制，否则会出现长图片下面全是黑色的情况
                        if ((pdDocument.getNumberOfPages() - m * pageLength) < pageLength) {
                            imageResult = new BufferedImage(width, imageHeight * (pdDocument.getNumberOfPages() - m * pageLength), BufferedImage.TYPE_INT_RGB);
                        } else {
                            imageResult = new BufferedImage(width, imageHeight * pageLength, BufferedImage.TYPE_INT_RGB);
                        }
                    } else {
                        // 将高度不断累加
                        shiftHeight += imageHeight;
                    }
                    singleImgRGB = image.getRGB(0, 0, width, imageHeight, null, 0, width);
                    imageResult.setRGB(0, shiftHeight, width, imageHeight, singleImgRGB, 0, width);
                }
                // 这个很重要，下面会有说明
                shiftHeight = 0;
            }
            pdDocument.close();
            ByteArrayOutputStream baos = new ByteArrayOutputStream();//io流
            ImageIO.write(imageResult, "jpg", baos);//写入流中
            byte[] jpg_Bytes = baos.toByteArray();//转换成字节
            BASE64Encoder encoder = new BASE64Encoder();
            jpg_base64 = encoder.encodeBuffer(jpg_Bytes).trim();//转换成base64串
            jpg_base64 = jpg_base64.replaceAll("\n", "").replaceAll("\r", "");//删除 \r\n
            baos.close();
            doc.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return jpg_base64;
    }

　　以上方法使用到的Maven依赖如下：

　　　　　<dependency>
            <groupId>org.apache.pdfbox</groupId>
            <artifactId>pdfbox</artifactId>
            <version>2.0.24</version>
        </dependency>
        <dependency>
            <groupId>org.apache.pdfbox</groupId>
            <artifactId>fontbox</artifactId>
            <version>2.0.24</version>
        </dependency>
        <dependency>
            <groupId>org.springframework</groupId>
            <artifactId>spring-test</artifactId>
        </dependency>

　　有个需要注意到的地方，使用pdfbox转换时，抽象类 NativeFontDirFinder 下的 getSearchableDirectories() 方法指定各个开发平台上字体的存放路径

public abstract class NativeFontDirFinder implements FontDirFinder {
    public NativeFontDirFinder() {
    }

    public List<File> find() {
        List<File> fontDirList = new ArrayList();
        String[] searchableDirectories = this.getSearchableDirectories();
        if (searchableDirectories != null) {
            String[] var3 = searchableDirectories;
            int var4 = searchableDirectories.length;

            for(int var5 = 0; var5 < var4; ++var5) {
                String searchableDirectorie = var3[var5];
                File fontDir = new File(searchableDirectorie);

                try {
                    if (fontDir.exists() && fontDir.canRead()) {
                        fontDirList.add(fontDir);
                    }
                } catch (SecurityException var9) {
                }
            }
        }

        return fontDirList;
    }

    protected abstract String[] getSearchableDirectories();
}

　　Linux平台的字体存放路径如下，转换时需要在会扫描到字体路径中（随便其中一个）存放有需要的字体，否则会有中文丢失不显示，部分字体乱码等情况

public class UnixFontDirFinder extends NativeFontDirFinder {
    public UnixFontDirFinder() {
    }

    protected String[] getSearchableDirectories() {
        return new String[]{System.getProperty("user.home") + "/.fonts", "/usr/local/fonts", "/usr/local/share/fonts", "/usr/share/fonts", "/usr/X11R6/lib/X11/fonts", "/usr/share/X11/fonts"};
    }
}

　　3、图片文件压缩，可以调整desFileSize的大小到能接受的文件清晰度，这样做的目的可以降低图片文件大小，以base64在接口间传输时，提高处理速率；

/**
     * 根据指定大小压缩图片
     *
     * @param base64String  源图片base64字符串
     * @param desFileSize 指定图片大小，单位kb
     * @return 压缩质量后的图片base64字符串
     */
    public static String compressPicForScale(String base64String, long desFileSize) {
        byte[] imageBytes = Base64.getDecoder().decode(base64String);
        if (imageBytes == null || imageBytes.length <= 0 || imageBytes.length < desFileSize * 1024) {
            return base64String;
        }
        long srcSize = imageBytes.length;
        double accuracy = getAccuracy(srcSize / 1024);
        try {
            while (imageBytes.length > desFileSize * 1024) {
                ByteArrayInputStream inputStream = new ByteArrayInputStream(imageBytes);
                ByteArrayOutputStream outputStream = new ByteArrayOutputStream(imageBytes.length);
                Thumbnails.of(inputStream)
                        .scale(accuracy)
                        .outputQuality(accuracy)
                        .toOutputStream(outputStream);
                imageBytes = outputStream.toByteArray();
            }
            log.info("【图片压缩】 | 图片原大小={}kb | 压缩后大小={}kb", srcSize / 1024, imageBytes.length / 1024);
        } catch (Exception e) {
            log.error("【图片压缩】msg=图片压缩失败!", e);
        }
        String base64 = Base64.getEncoder().encodeToString(imageBytes);
        return base64;
    }

    /**
     * 自动调节精度(经验数值)
     *
     * @param size 源图片大小
     * @return 图片压缩质量比
     */
    private static double getAccuracy(long size) {
        double accuracy;
        if (size < 900) {
            accuracy = 0.85;
        } else if (size < 2047) {
            accuracy = 0.6;
        } else if (size < 3275) {
            accuracy = 0.44;
        } else {
            accuracy = 0.4;
        }
        return accuracy;
    }

NativeFontDirFinder

posted on 2024-10-30 15:30 追枫狼阅读(75) 评论(0) 收藏举报

刷新页面返回顶部

追枫狼

导航

公告

Java中一些文件操作常用转换