自写工具类实现word转html、图片和html转word功能。

项目需求需要生成的报告在页面进行预览，并可以进行编辑后更新报告。为了满足此需求进行开发记录。

引入maven依赖

<dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi</artifactId>
            <version>4.1.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml</artifactId>
            <version>4.1.2</version>
        </dependency>
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>easyexcel</artifactId>
            <version>2.2.10</version>
            <exclusions>
                <exclusion>
                    <artifactId>poi</artifactId>
                    <groupId>org.apache.poi</groupId>
                </exclusion>
                <exclusion>
                    <artifactId>poi-ooxml</artifactId>
                    <groupId>org.apache.poi</groupId>
                </exclusion>
                <exclusion>
                    <artifactId>poi-ooxml-schemas</artifactId>
                    <groupId>org.apache.poi</groupId>
                </exclusion>
            </exclusions>
        </dependency>
<!-- 要注意converter.xhtml版本，其中的poi-ooxml改动比较大-->
        <dependency>
            <groupId>fr.opensagres.xdocreport</groupId>
            <artifactId>fr.opensagres.poi.xwpf.converter.xhtml</artifactId>
            <version>2.0.2</version>
            <exclusions>
                <exclusion>
                    <groupId>org.apache.poi</groupId>
                    <artifactId>poi</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.apache.poi</groupId>
                    <artifactId>poi-ooxml</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.apache.poi</groupId>
                    <artifactId>poi-scratchpad</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.apache.poi</groupId>
                    <artifactId>poi-ooxml-schemas</artifactId>
                </exclusion>
                <!--                <exclusion>-->
                <!--                    <groupId>org.apache.poi</groupId>-->
                <!--                    <artifactId>ooxml-schemas</artifactId>-->
                <!--                </exclusion>-->
                <exclusion>
                    <groupId>fr.opensagres.xdocreport</groupId>
                    <artifactId>fr.opensagres.xdocreport.core</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>org.docx4j</groupId>
            <artifactId>docx4j-ImportXHTML</artifactId>
            <version>3.0.0</version>
        </dependency>

工具类如下：

import com.aspose.words.ImageSaveOptions;
import com.aspose.words.SaveFormat;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.xwpf.usermodel.XWPFDocument;

import wordUtil.LicenseLoad;

import javax.imageio.ImageIO;
import javax.imageio.stream.ImageInputStream;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.*;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import fr.opensagres.poi.xwpf.converter.core.FileImageExtractor;
import fr.opensagres.poi.xwpf.converter.core.FileURIResolver;
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLConverter;
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLOptions;

import org.docx4j.XmlUtils;
import org.docx4j.convert.in.xhtml.XHTMLImporterImpl;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;

@Slf4j
public class WordUtil {

//    private static final Logger logger = LoggerFactory.getLogger(WordUtil.class);

    public static void wordToImg(InputStream inputStream, File imgFile) {
        try {
            //验证License 若不验证则转化出的pdf文档会有水印产生
            LicenseLoad.getLicense();
            com.aspose.words.Document doc = new com.aspose.words.Document(inputStream);
            ImageSaveOptions options = new ImageSaveOptions(SaveFormat.PNG);
            options.setPrettyFormat(true);
            options.setUseAntiAliasing(true);
            options.setUseHighQualityRendering(true);
            int pageCount = doc.getPageCount();

            List<BufferedImage> imageList = new ArrayList<BufferedImage>();
            for (int i = 0; i < pageCount; i++) {
                OutputStream output = new ByteArrayOutputStream();
                doc.save(output, options);
                ImageInputStream imageInputStream = javax.imageio.ImageIO.createImageInputStream(parse(output));
                imageList.add(javax.imageio.ImageIO.read(imageInputStream));
            }
            BufferedImage mergeImage = mergeImage(false, imageList);
            //将其保存在C:/imageSort/targetPIC/下
            ImageIO.write(mergeImage, "png", imgFile);
        } catch (FileNotFoundException e) {
            log.error("word转图片时发生错误:{}", e);
        } catch (IOException e) {
            log.error("word转图片时发生错误:{}", e);
        } catch (Exception e) {
            log.error("word转图片时发生错误:{}", e);
        }
    }

    /**
     * docx格式word转换为html
     *
     * @param fileName   docx文件路径
     * @param outPutFile html输出文件路径
     * @throws TransformerException
     * @throws IOException
     * @throws ParserConfigurationException
     */
    public static void docx2Html(String fileName, String outPutFile) throws Exception {
        String fileOutName = outPutFile;
        XWPFDocument document = new XWPFDocument(new FileInputStream(fileName));
        XHTMLOptions options = XHTMLOptions.create().indent(4);
        // 导出图片
        File imageFolder = new File(StringUtils.substringBeforeLast(outPutFile, "/") + "/");
        options.setExtractor(new FileImageExtractor(imageFolder));
        // URI resolver
        options.URIResolver(new FileURIResolver(imageFolder));
        File outFile = new File(fileOutName);
        outFile.getParentFile().mkdirs();
        OutputStream out = new FileOutputStream(outFile);
        XHTMLConverter.getInstance().convert(document, out, options);
    }

    public static void htmlContent2Docx(String content, String docxFilePath) throws Exception {
        // To docx, with content controls
        WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.createPackage();

        XHTMLImporterImpl XHTMLImporter = new XHTMLImporterImpl(wordMLPackage);
        //XHTMLImporter.setDivHandler(new DivToSdt());

        wordMLPackage.getMainDocumentPart().getContent().addAll(
                XHTMLImporter.convert(content, null));

        System.out.println(XmlUtils.marshaltoString(wordMLPackage
                .getMainDocumentPart().getJaxbElement(), true, true));

        wordMLPackage.save(new java.io.File(docxFilePath));
    }

    private static BufferedImage mergeImage(boolean isHorizontal, List<BufferedImage> imgs) throws IOException {
        // 生成新图片
        BufferedImage destImage = null;
        // 计算新图片的长和高
        int allw = 0, allh = 0, allwMax = 0, allhMax = 0;
        // 获取总长、总宽、最长、最宽
        for (int i = 0; i < imgs.size(); i++) {
            BufferedImage img = imgs.get(i);
            allw += img.getWidth();
            if (imgs.size() != i + 1) {
                allh += img.getHeight() + 5;
            } else {
                allh += img.getHeight();
            }
            if (img.getWidth() > allwMax) {
                allwMax = img.getWidth();
            }
            if (img.getHeight() > allhMax) {
                allhMax = img.getHeight();
            }
        }
        // 创建新图片
        if (isHorizontal) {
            destImage = new BufferedImage(allw, allhMax, BufferedImage.TYPE_INT_RGB);
        } else {
            destImage = new BufferedImage(allwMax, allh, BufferedImage.TYPE_INT_RGB);
        }
        Graphics2D g2 = (Graphics2D) destImage.getGraphics();
        g2.setBackground(Color.LIGHT_GRAY);
        g2.clearRect(0, 0, allw, allh);
        g2.setPaint(Color.RED);

        // 合并所有子图片到新图片
        int wx = 0, wy = 0;
        for (int i = 0; i < imgs.size(); i++) {
            BufferedImage img = imgs.get(i);
            int w1 = img.getWidth();
            int h1 = img.getHeight();
            // 从图片中读取RGB
            int[] ImageArrayOne = new int[w1 * h1];
            // 逐行扫描图像中各个像素的RGB到数组中
            ImageArrayOne = img.getRGB(0, 0, w1, h1, ImageArrayOne, 0, w1);
            if (isHorizontal) {
                // 水平方向合并
                // 设置上半部分或左半部分的RGB
                destImage.setRGB(wx, 0, w1, h1, ImageArrayOne, 0, w1);
            } else {
                // 垂直方向合并
                // 设置上半部分或左半部分的RGB
                destImage.setRGB(0, wy, w1, h1, ImageArrayOne, 0, w1);
            }
            wx += w1;
            wy += h1 + 5;
        }
        return destImage;
    }

    public static ByteArrayInputStream parse(OutputStream out) throws Exception {
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        baos = (ByteArrayOutputStream) out;
        ByteArrayInputStream swapStream = new ByteArrayInputStream(baos.toByteArray());
        return swapStream;
    }

    public static void main(String[] args) throws IOException {

        //加载测试文档
//        Document doc = new Document("E:\\line_chart.docx");
//
//        //将文档指定页保存为Png格式的图片
//        BufferedImage image = doc.saveToImages(0, ImageType.Bitmap);
//        File file = new File("E:\\ToPNG.png");
//        ImageIO.write(image, "PNG", file);
        wordToImg(new FileInputStream("E:\\line_chart3.docx"), new File("E:\\ToPNG3.png"));

    }
}

另外，在word转图片时用到了Aspose.Words，因为它是收费的（转化后的结果有水印），所以找到了破解版。参考文章：https://www.cnblogs.com/huaixiaonian/p/14700007.html
关于word中的图表生成（折线图、饼状图、柱状图等），参考项目：https://gitee.com/filer/fly-office

docker启动项目时的乱码问题

cd /usr/share/fonts
docker cp SIMSUN.TTC sz_wxb:/usr/share/fonts/SIMSUN.TTC
//这里用的是宋体，前期要先把宋体文字包放到/usr/share/fonts/路径下

posted @ 2021-08-09 14:00 ayueC 阅读(186) 评论(0) 收藏举报

刷新页面返回顶部

ayueC

自写工具类实现word转html、图片和html转word功能。

docker启动项目时的乱码问题

公告