自写工具类实现word转html、图片和html转word功能。
项目需求需要生成的报告在页面进行预览,并可以进行编辑后更新报告。为了满足此需求进行开发记录。
-
引入maven依赖
<dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>4.1.2</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>4.1.2</version> </dependency> <dependency> <groupId>com.alibaba</groupId> <artifactId>easyexcel</artifactId> <version>2.2.10</version> <exclusions> <exclusion> <artifactId>poi</artifactId> <groupId>org.apache.poi</groupId> </exclusion> <exclusion> <artifactId>poi-ooxml</artifactId> <groupId>org.apache.poi</groupId> </exclusion> <exclusion> <artifactId>poi-ooxml-schemas</artifactId> <groupId>org.apache.poi</groupId> </exclusion> </exclusions> </dependency> <!-- 要注意converter.xhtml版本,其中的poi-ooxml改动比较大--> <dependency> <groupId>fr.opensagres.xdocreport</groupId> <artifactId>fr.opensagres.poi.xwpf.converter.xhtml</artifactId> <version>2.0.2</version> <exclusions> <exclusion> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> </exclusion> <exclusion> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> </exclusion> <exclusion> <groupId>org.apache.poi</groupId> <artifactId>poi-scratchpad</artifactId> </exclusion> <exclusion> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml-schemas</artifactId> </exclusion> <!-- <exclusion>--> <!-- <groupId>org.apache.poi</groupId>--> <!-- <artifactId>ooxml-schemas</artifactId>--> <!-- </exclusion>--> <exclusion> <groupId>fr.opensagres.xdocreport</groupId> <artifactId>fr.opensagres.xdocreport.core</artifactId> </exclusion> </exclusions> </dependency> <dependency> <groupId>org.docx4j</groupId> <artifactId>docx4j-ImportXHTML</artifactId> <version>3.0.0</version> </dependency>
-
工具类如下:
import com.aspose.words.ImageSaveOptions; import com.aspose.words.SaveFormat; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.StringUtils; import org.apache.poi.xwpf.usermodel.XWPFDocument; import wordUtil.LicenseLoad; import javax.imageio.ImageIO; import javax.imageio.stream.ImageInputStream; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.TransformerException; import java.awt.*; import java.awt.image.BufferedImage; import java.io.*; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import fr.opensagres.poi.xwpf.converter.core.FileImageExtractor; import fr.opensagres.poi.xwpf.converter.core.FileURIResolver; import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLConverter; import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLOptions; import org.docx4j.XmlUtils; import org.docx4j.convert.in.xhtml.XHTMLImporterImpl; import org.docx4j.openpackaging.packages.WordprocessingMLPackage; @Slf4j public class WordUtil { // private static final Logger logger = LoggerFactory.getLogger(WordUtil.class); public static void wordToImg(InputStream inputStream, File imgFile) { try { //验证License 若不验证则转化出的pdf文档会有水印产生 LicenseLoad.getLicense(); com.aspose.words.Document doc = new com.aspose.words.Document(inputStream); ImageSaveOptions options = new ImageSaveOptions(SaveFormat.PNG); options.setPrettyFormat(true); options.setUseAntiAliasing(true); options.setUseHighQualityRendering(true); int pageCount = doc.getPageCount(); List<BufferedImage> imageList = new ArrayList<BufferedImage>(); for (int i = 0; i < pageCount; i++) { OutputStream output = new ByteArrayOutputStream(); doc.save(output, options); ImageInputStream imageInputStream = javax.imageio.ImageIO.createImageInputStream(parse(output)); imageList.add(javax.imageio.ImageIO.read(imageInputStream)); } BufferedImage mergeImage = mergeImage(false, imageList); //将其保存在C:/imageSort/targetPIC/下 ImageIO.write(mergeImage, "png", imgFile); } catch (FileNotFoundException e) { log.error("word转图片时发生错误:{}", e); } catch (IOException e) { log.error("word转图片时发生错误:{}", e); } catch (Exception e) { log.error("word转图片时发生错误:{}", e); } } /** * docx格式word转换为html * * @param fileName docx文件路径 * @param outPutFile html输出文件路径 * @throws TransformerException * @throws IOException * @throws ParserConfigurationException */ public static void docx2Html(String fileName, String outPutFile) throws Exception { String fileOutName = outPutFile; XWPFDocument document = new XWPFDocument(new FileInputStream(fileName)); XHTMLOptions options = XHTMLOptions.create().indent(4); // 导出图片 File imageFolder = new File(StringUtils.substringBeforeLast(outPutFile, "/") + "/"); options.setExtractor(new FileImageExtractor(imageFolder)); // URI resolver options.URIResolver(new FileURIResolver(imageFolder)); File outFile = new File(fileOutName); outFile.getParentFile().mkdirs(); OutputStream out = new FileOutputStream(outFile); XHTMLConverter.getInstance().convert(document, out, options); } public static void htmlContent2Docx(String content, String docxFilePath) throws Exception { // To docx, with content controls WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.createPackage(); XHTMLImporterImpl XHTMLImporter = new XHTMLImporterImpl(wordMLPackage); //XHTMLImporter.setDivHandler(new DivToSdt()); wordMLPackage.getMainDocumentPart().getContent().addAll( XHTMLImporter.convert(content, null)); System.out.println(XmlUtils.marshaltoString(wordMLPackage .getMainDocumentPart().getJaxbElement(), true, true)); wordMLPackage.save(new java.io.File(docxFilePath)); } private static BufferedImage mergeImage(boolean isHorizontal, List<BufferedImage> imgs) throws IOException { // 生成新图片 BufferedImage destImage = null; // 计算新图片的长和高 int allw = 0, allh = 0, allwMax = 0, allhMax = 0; // 获取总长、总宽、最长、最宽 for (int i = 0; i < imgs.size(); i++) { BufferedImage img = imgs.get(i); allw += img.getWidth(); if (imgs.size() != i + 1) { allh += img.getHeight() + 5; } else { allh += img.getHeight(); } if (img.getWidth() > allwMax) { allwMax = img.getWidth(); } if (img.getHeight() > allhMax) { allhMax = img.getHeight(); } } // 创建新图片 if (isHorizontal) { destImage = new BufferedImage(allw, allhMax, BufferedImage.TYPE_INT_RGB); } else { destImage = new BufferedImage(allwMax, allh, BufferedImage.TYPE_INT_RGB); } Graphics2D g2 = (Graphics2D) destImage.getGraphics(); g2.setBackground(Color.LIGHT_GRAY); g2.clearRect(0, 0, allw, allh); g2.setPaint(Color.RED); // 合并所有子图片到新图片 int wx = 0, wy = 0; for (int i = 0; i < imgs.size(); i++) { BufferedImage img = imgs.get(i); int w1 = img.getWidth(); int h1 = img.getHeight(); // 从图片中读取RGB int[] ImageArrayOne = new int[w1 * h1]; // 逐行扫描图像中各个像素的RGB到数组中 ImageArrayOne = img.getRGB(0, 0, w1, h1, ImageArrayOne, 0, w1); if (isHorizontal) { // 水平方向合并 // 设置上半部分或左半部分的RGB destImage.setRGB(wx, 0, w1, h1, ImageArrayOne, 0, w1); } else { // 垂直方向合并 // 设置上半部分或左半部分的RGB destImage.setRGB(0, wy, w1, h1, ImageArrayOne, 0, w1); } wx += w1; wy += h1 + 5; } return destImage; } public static ByteArrayInputStream parse(OutputStream out) throws Exception { ByteArrayOutputStream baos = new ByteArrayOutputStream(); baos = (ByteArrayOutputStream) out; ByteArrayInputStream swapStream = new ByteArrayInputStream(baos.toByteArray()); return swapStream; } public static void main(String[] args) throws IOException { //加载测试文档 // Document doc = new Document("E:\\line_chart.docx"); // // //将文档指定页保存为Png格式的图片 // BufferedImage image = doc.saveToImages(0, ImageType.Bitmap); // File file = new File("E:\\ToPNG.png"); // ImageIO.write(image, "PNG", file); wordToImg(new FileInputStream("E:\\line_chart3.docx"), new File("E:\\ToPNG3.png")); } }
-
另外,在word转图片时用到了Aspose.Words,因为它是收费的(转化后的结果有水印),所以找到了破解版。参考文章:https://www.cnblogs.com/huaixiaonian/p/14700007.html
-
关于word中的图表生成(折线图、饼状图、柱状图等),参考项目:https://gitee.com/filer/fly-office
docker启动项目时的乱码问题
cd /usr/share/fonts
docker cp SIMSUN.TTC sz_wxb:/usr/share/fonts/SIMSUN.TTC
//这里用的是宋体,前期要先把宋体文字包放到/usr/share/fonts/路径下
既要仰望星空,又要脚踏实地