Java word文档中的图片抽离方法

Java word文档中的图片抽离方法

package com.example.core.mydemo.aspose;

import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFPictureData;

import java.awt.image.BufferedImage;
import java.io.*;
import java.util.List;

import javax.imageio.ImageIO;

/**
 * 将word中的图片转换为图片(可以理解为将word文档中的图片抽离出来),而不是将word文档转换为图片
 */
public class WordToImageConverter {
    public static void main(String[] args) {
        try {
            String wordFilePath = "D:\\pdf\\testword.docx";
            String outputImagePath = "D:\\pdf\\image33.png";

            XWPFDocument document = new XWPFDocument(new FileInputStream(wordFilePath));

            List<XWPFPictureData> pictures = document.getAllPictures();
            System.out.println("pictures size=" + pictures.size());

            for (int i = 0; i < pictures.size(); i++) {
                XWPFPictureData picture = pictures.get(i);
                byte[] pictureData = picture.getData();
                BufferedImage image = ImageIO.read(new ByteArrayInputStream(pictureData));

                String imageFileName = outputImagePath + "_" + (i + 1) + ".png";
                if(image != null) {
                    ImageIO.write(image, "png", new FileOutputStream(new File(imageFileName)));
                }
            }

            document.close();
            System.out.println("image ok 2");
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

 

posted on 2025-10-14 14:40  oktokeep  阅读(4)  评论(0)    收藏  举报