openOffice word转pdf,pdf转图片优化版

之前写了一个版本的,不过代码繁琐而且不好用,效率有些问题。尤其pdf转图片速度太慢。下面是优化版本的代码。

spriing_boot 版本信息:2.0.1.RELEASE

优化版代码https://gitee.com/liran123/transfer_easy_fast

 

1、配置信息:

package com.yunfatong.conf;

import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.math.NumberUtils;
import org.jodconverter.DocumentConverter;
import org.jodconverter.LocalConverter;
import org.jodconverter.office.LocalOfficeManager;
import org.jodconverter.office.OfficeManager;
import org.springframework.boot.autoconfigure.condition.ConditionalOnBean;
import org.springframework.boot.autoconfigure.condition.ConditionalOnClass;
import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;

import java.util.HashSet;
import java.util.Set;

/**
 * openoffice 配置
 *
 * @author liran
 * @date 20190517
 */
@Configuration
@ConditionalOnClass({DocumentConverter.class})
@ConditionalOnProperty(prefix = "jodconverter", name = {"enabled"}, havingValue = "true", matchIfMissing = true)
@EnableConfigurationProperties({JodConverterProperties.class})
@Slf4j
public class JodConverterAutoConfiguration {
    private final JodConverterProperties properties;

    public JodConverterAutoConfiguration(JodConverterProperties properties) {
        this.properties = properties;
    }

    private OfficeManager createOfficeManager() {
        LocalOfficeManager.Builder builder = LocalOfficeManager.builder();
        if (!StringUtils.isBlank(this.properties.getPortNumbers())) {
            Set<Integer> iports = new HashSet<>();
            String[] var3 = StringUtils.split(this.properties.getPortNumbers(), ", ");
            int var4 = var3.length;

            for (int var5 = 0; var5 < var4; ++var5) {
                String portNumber = var3[var5];
                iports.add(NumberUtils.toInt(portNumber, 2002));
            }

            builder.portNumbers(ArrayUtils.toPrimitive(iports.toArray(new Integer[iports.size()])));
        }
        builder.officeHome(this.properties.getOfficeHome());
        builder.workingDir(this.properties.getWorkingDir());
        builder.templateProfileDir(this.properties.getTemplateProfileDir());
        builder.killExistingProcess(this.properties.isKillExistingProcess());
        builder.processTimeout(this.properties.getProcessTimeout());
        builder.processRetryInterval(this.properties.getProcessRetryInterval());
        builder.taskExecutionTimeout(this.properties.getTaskExecutionTimeout());
        builder.maxTasksPerProcess(this.properties.getMaxTasksPerProcess());
        builder.taskQueueTimeout(this.properties.getTaskQueueTimeout());
        return builder.build();
    }

    @Bean(initMethod = "start", destroyMethod = "stop")
    @ConditionalOnMissingBean
    public OfficeManager officeManager() {
        return this.createOfficeManager();
    }

    @Bean
    @ConditionalOnMissingBean
    @ConditionalOnBean({OfficeManager.class})
    public DocumentConverter jodConverter(OfficeManager officeManager) {
        return LocalConverter.make(officeManager);
    }
}
package com.yunfatong.conf;

import org.springframework.boot.context.properties.ConfigurationProperties;

import java.util.regex.Pattern;

/**
 * openoffice 配置
 *
 * @author liran
 * @date 20190517
 */
@ConfigurationProperties("jodconverter")
public class JodConverterProperties {
    private boolean enabled;
    private String officeHome;
    private String portNumbers = "2002";
    private String workingDir;
    private String templateProfileDir;
    private boolean killExistingProcess = true;
    private long processTimeout = 120000L;
    private long processRetryInterval = 250L;
    private long taskExecutionTimeout = 120000L;
    private int maxTasksPerProcess = 200;
    private long taskQueueTimeout = 30000L;

    public JodConverterProperties() {
    }

    public boolean isEnabled() {
        return this.enabled;
    }

    public void setEnabled(boolean enabled) {
        this.enabled = enabled;
    }

    public String getOfficeHome() {
        //根据不同系统分别设置
        //office-home: C:\Program Files (x86)\OpenOffice 4  #windows下默认 不用修改
        // #office-home: /opt/openoffice4      #linux 默认 不用修改
        String osName = System.getProperty("os.name");
        if (Pattern.matches("Linux.*", osName)) {
            this.officeHome = "/opt/openoffice4";
        } else if (Pattern.matches("Windows.*", osName)) {
            this.officeHome = "C:\\Program Files (x86)\\OpenOffice 4";
        } else {
            this.officeHome = "/opt/openoffice4";
        }
        return this.officeHome;
    }

    public void setOfficeHome(String officeHome) {
        this.officeHome = officeHome;
    }

    public String getPortNumbers() {
        return this.portNumbers;
    }

    public void setPortNumbers(String portNumbers) {
        this.portNumbers = portNumbers;
    }

    public String getWorkingDir() {
        return this.workingDir;
    }

    public void setWorkingDir(String workingDir) {
        this.workingDir = workingDir;
    }

    public String getTemplateProfileDir() {
        return this.templateProfileDir;
    }

    public void setTemplateProfileDir(String templateProfileDir) {
        this.templateProfileDir = templateProfileDir;
    }

    public boolean isKillExistingProcess() {
        return this.killExistingProcess;
    }

    public void setKillExistingProcess(boolean killExistingProcess) {
        this.killExistingProcess = killExistingProcess;
    }

    public long getProcessTimeout() {
        return this.processTimeout;
    }

    public void setProcessTimeout(long processTimeout) {
        this.processTimeout = processTimeout;
    }

    public long getProcessRetryInterval() {
        return this.processRetryInterval;
    }

    public void setProcessRetryInterval(long procesRetryInterval) {
        this.processRetryInterval = procesRetryInterval;
    }

    public long getTaskExecutionTimeout() {
        return this.taskExecutionTimeout;
    }

    public void setTaskExecutionTimeout(long taskExecutionTimeout) {
        this.taskExecutionTimeout = taskExecutionTimeout;
    }

    public int getMaxTasksPerProcess() {
        return this.maxTasksPerProcess;
    }

    public void setMaxTasksPerProcess(int maxTasksPerProcess) {
        this.maxTasksPerProcess = maxTasksPerProcess;
    }

    public long getTaskQueueTimeout() {
        return this.taskQueueTimeout;
    }

    public void setTaskQueueTimeout(long taskQueueTimeout) {
        this.taskQueueTimeout = taskQueueTimeout;
    }
}

application.yml 

jodconverter:
  enabled: true
  office-home: linuxOrwindows
  #/opt/openoffice4      #linux 默认 不用修改 C:\Program Files (x86)\OpenOffice 4  #windows下默认 不用修改
  port-numbers: 2002
  max-tasks-per-process: 10

 

2、转换入口

package com.yunfatong.ojd.util.pdf;

import cn.hutool.core.date.DatePattern;
import com.yunfatong.ojd.common.exception.CommonException;
import com.yunfatong.ojd.service.FileSystemStorageService;
import com.yunfatong.ojd.util.SpringUtil;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringUtils;
import org.jodconverter.DocumentConverter;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;

import java.io.File;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.List;

/**
* word 转pdf
*
* @author lr
*/
@Component
@Slf4j
public class TransferUtil {
//这里没有@Autowired 主要是配置不启用的话 无法注入
private DocumentConverter documentConverter;
@Autowired
private FileSystemStorageService fileSystemStorageService;
/**
* fileSystemStorageService 就是拼接出本地路径的作用
* storage.winLocation=D:\\ojd\\upload\\images\\
* ##上传图片linux存储路径
* storage.linuxLocation=/home/ojd/upload/images/
*/

final static String WORD_SUFFIX_DOC = "doc";
final static String WORD_SUFFIX_DOCX = "docx";

/**
* word ->pdf
*
* @param webPath 浏览器可访问路径(数据库存的)如 /test/wd.word
* @return 相同文件夹下的转换后的pdf 路径 如/test/wd_20190517151515333.pdf
* @throws Exception
*/
public String transferWordToPdf(String webPath) throws Exception {
if(documentConverter==null){
documentConverter = SpringUtil.getBean(DocumentConverter.class);
}
//转换成本地实际磁盘路径
String originLocalFilePath = fileSystemStorageService.getLocation(webPath);
File inputFile = new File(originLocalFilePath);
if (!inputFile.exists() ||
        !inputFile.isFile() ||
         (!StringUtils.contains(inputFile.getName(), WORD_SUFFIX_DOC) &&
          !StringUtils.contains(inputFile.getName(), WORD_SUFFIX_DOCX))) {
throw new CommonException("word -> pdf转换错误 当前文件不是word或 文件不存在: " + webPath);
}

DateTimeFormatter formatter = DateTimeFormatter.ofPattern(DatePattern.PURE_DATETIME_MS_PATTERN);
String timeNow = formatter.format(LocalDateTime.now());
String newPdfWebPath = StringUtils.substringBeforeLast(webPath, ".") + "_" + timeNow + ".pdf";
try {
File outputFile = new File(fileSystemStorageService.getLocation(newPdfWebPath));
documentConverter.convert(inputFile).to(outputFile).execute();
} catch (Exception e) {
log.error("word->pdf 转换错误------------> Exception:{}", e);
throw e;
}
return newPdfWebPath;
}

public List<String> transferPdfToImage(String webPath) throws Exception {
String originLocalFilePath = fileSystemStorageService.getLocation(webPath);
File inputFile = new File(originLocalFilePath);
if (!inputFile.exists() ||
        !inputFile.isFile() ||
        webPath.lastIndexOf(".pdf") < 0) {
throw new CommonException("pdf-> img 源文件不是pdf文件 或者文件不存在!" + webPath);
}
String localPdfpath = fileSystemStorageService.getLocation(webPath);
String newImgWebPathPreSuffix = StringUtils.substringBeforeLast(webPath, ".");
String localImgPath = fileSystemStorageService.getLocation(newImgWebPathPreSuffix);
PdfTransferUtil pdfTranfer = new PdfTransferUtil();
List<byte[]> ins = pdfTranfer.pdf2Image(localPdfpath, "png", 1.5f);
List<String> webPaths = new ArrayList<>(ins.size());
for (int i = 0; i < ins.size(); i++) {
byte[] data = ins.get(i);
String pathReal = localImgPath + "_ojd_" + i + ".png";
FileUtils.writeByteArrayToFile(new File(pathReal), data);
webPaths.add(pathReal);
}
return webPaths;
}

}

 pdf 转图片参考 https://gitee.com/cycmy/pdftranfer.git

package com.yunfatong.ojd.util.pdf;

import lombok.extern.slf4j.Slf4j;
import org.icepdf.core.pobjects.Document;
import org.icepdf.core.pobjects.Page;
import org.icepdf.core.util.GraphicsRenderingHints;

import javax.imageio.ImageIO;
import javax.imageio.stream.ImageOutputStream;
import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;

/**
 * @author lr
 *  
 */
@Slf4j
public class PdfTransferUtil {

    //*********************************pdf to image **********************************************************

    /**
     * 将指定pdf字节数组转换为指定格式图片二进制数组
     *
     * @param pdfBytes  PDF字节数组
     * @param imageType 转换图片格式  默认png
     * @param zoom      缩略图显示倍数,1表示不缩放,0.3则缩小到30%
     * @return List<byte [ ]>
     * @throws Exception
     */
    public List<byte[]> pdf2Image(byte[] pdfBytes, String imageType, float zoom) throws Exception {
        Document document = new Document();
        document.setByteArray(pdfBytes, 0, pdfBytes.length, null);
        return pageExtraction(document, imageType, 0f, zoom);
    }

    /**
     * 将指定pdf输入流转换为指定格式图片二进制数组
     *
     * @param inputPDF  PDF二进制流
     * @param imageType 转换图片格式 默认png
     * @param zoom      缩略图显示倍数,1表示不缩放,0.3则缩小到30%
     * @return List<byte [ ]>
     * @throws Exception
     */
    public List<byte[]> pdf2Image(InputStream inputPDF, String imageType, float zoom) throws Exception {

        Document document = new Document();
        document.setInputStream(inputPDF, null);
        return pageExtraction(document, imageType, 0f, zoom);
    }

    /**
     * 将指定pdf文件转换为指定格式图片二进制数组
     *
     * @param pdfPath   原文件路径,例如d:/test.pdf
     * @param imageType 转换图片格式 默认png
     * @param zoom      缩略图显示倍数,1表示不缩放,0.3则缩小到30%
     * @return List<byte [ ]>
     * @throws Exception
     */
    public List<byte[]> pdf2Image(String pdfPath, String imageType, float zoom) throws Exception {
        Document document = new Document();
        document.setFile(pdfPath);
        return pageExtraction(document, imageType, 0f, zoom);
    }
    //*********************************pdf to image **********************************************************

    private List<byte[]> pageExtraction(Document document, String imageType, float rotation, float zoom) {
        // setup two threads to handle image extraction.
        ExecutorService executorService = Executors.newFixedThreadPool(5);
        try {
            // create a list of callables.
            int pages = document.getNumberOfPages();
            List<byte[]> result = new ArrayList<byte[]>(pages);
            List<Callable<byte[]>> callables = new ArrayList<Callable<byte[]>>(pages);
            for (int i = 0; i < pages; i++) {
                callables.add(new CapturePage(document, i, imageType, rotation, zoom));
            }
            List<Future<byte[]>> listFuture = executorService.invokeAll(callables);
            executorService.submit(new DocumentCloser(document)).get();
            for (Future<byte[]> future : listFuture) {
                result.add(future.get());
            }
            return result;
        } catch (Exception ex) {
            log.error(" pdf 转换图片错误  Error handling PDF document " + ex);
        } finally {
            executorService.shutdown();
        }
        return null;
    }

    public class CapturePage implements Callable<byte[]> {
        private Document document;
        private int pageNumber;
        private String imageType;
        private float rotation;
        private float zoom;

        private CapturePage(Document document, int pageNumber, String imageType, float rotation, float zoom) {
            this.document = document;
            this.pageNumber = pageNumber;
            this.imageType = imageType;
            this.rotation = rotation;
            this.zoom = zoom;
        }

        @Override
        public byte[] call() throws Exception {
            BufferedImage image = (BufferedImage) document.getPageImage(pageNumber, GraphicsRenderingHints.SCREEN, Page.BOUNDARY_CROPBOX, rotation, zoom);
            ByteArrayOutputStream bs = new ByteArrayOutputStream();
            ImageOutputStream imOut = ImageIO.createImageOutputStream(bs);
            ImageIO.write(image, imageType, imOut);
            image.flush();
            return bs.toByteArray();
        }
    }

    /**
     * Disposes the document.
     */
    public class DocumentCloser implements Callable<Void> {
        private Document document;

        private DocumentCloser(Document document) {
            this.document = document;
        }

        @Override
        public Void call() {
            if (document != null) {
                document.dispose();
                log.info("Document disposed");
            }
            return null;
        }
    }

}

springutils

package com.yunfatong.ojd.util;

/**
 * @Auther liran
 * @Date 2018/8/30 14:49
 * @Description
 */
import org.springframework.beans.BeansException;
import org.springframework.context.ApplicationContext;
import org.springframework.context.ApplicationContextAware;
import org.springframework.stereotype.Component;

@Component
public class SpringUtil implements ApplicationContextAware {

    private static ApplicationContext applicationContext;

    @Override
    public void setApplicationContext(ApplicationContext applicationContext) throws BeansException {
        if(SpringUtil.applicationContext == null) {
            SpringUtil.applicationContext = applicationContext;
        }
        System.out.println("========ApplicationContext配置成功,在普通类可以通过调用SpringUtils.getAppContext()获取applicationContext对象,applicationContext="+SpringUtil.applicationContext+"========");

    }

    //获取applicationContext
    public static ApplicationContext getApplicationContext() {
        return applicationContext;
    }

    //通过name获取 Bean.
    public static Object getBean(String name){
        return getApplicationContext().getBean(name);
    }

    //通过class获取Bean.
    public static <T> T getBean(Class<T> clazz){
        return getApplicationContext().getBean(clazz);
    }

    //通过name,以及Clazz返回指定的Bean
    public static <T> T getBean(String name,Class<T> clazz){
        return getApplicationContext().getBean(name, clazz);
    }

}

 

pom.xml

 

  <!--word转换pdf begin-->
        <dependency>
            <groupId>org.jodconverter</groupId>
            <artifactId>jodconverter-core</artifactId>
            <version>4.2.2</version>
        </dependency>

        <dependency>
            <groupId>org.jodconverter</groupId>
            <artifactId>jodconverter-local</artifactId>
            <version>4.2.2</version>
        </dependency>
        <dependency>
            <groupId>org.jodconverter</groupId>
            <artifactId>jodconverter-spring-boot-starter</artifactId>
            <version>4.2.2</version>
        </dependency>
        <!--word转换pdf end-->

        <!--PDF转图片-->
        <dependency>
            <groupId>org.icepdf.os</groupId>
            <artifactId>icepdf-core</artifactId>
            <version>6.2.2</version>
            <exclusions>
                <exclusion>
                    <groupId>javax.media</groupId>
                    <artifactId>jai_core</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>org.icepdf.os</groupId>
            <artifactId>icepdf-viewer</artifactId>
            <version>6.2.2</version>
        </dependency>
        <!--PDF转图片 end-->

 

3、调用测试:

import com.yunfatong.ojd.service.FileSystemStorageService;
import lombok.extern.slf4j.Slf4j;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.test.context.junit4.SpringRunner;

import java.util.List;

@RunWith(SpringRunner.class)
@SpringBootTest
@Slf4j
public class WordTransferPdfUtilTest {
    @Autowired
    TransferUtil wordTransferPdfUtil;
    @Autowired
    FileSystemStorageService fileSystemStorageService;


    @Test
    public void transferLocalFile() {
   try {
/*******************word 转pdf******************/
long time = System.currentTimeMillis();
System.out.println("start :======" + time);
wordTransferPdfUtil.transferWordToPdf("courtChongqing/test_new/555.docx");
log.error(System.currentTimeMillis() + " time============================== :" + ((System.currentTimeMillis() - time) / 1000));

/*******************pdf转图片******************/
long time2 = System.currentTimeMillis();
List<String> pdfImages2 = wordTransferPdfUtil.transferPdfToImage("courtChongqing/test_new/333.pdf");
for (String pdfImage : pdfImages2) {
log.error(pdfImage);
}
log.error(" time===============================22222222 :" + ((System.currentTimeMillis() - time2) / 1000));
// System.out.println("pdf path =============" + path);
} catch (Exception e) {
e.printStackTrace();
}
 } }

 

posted @ 2019-05-17 20:47  _Phoenix  阅读(4692)  评论(3编辑  收藏  举报