1 import java.io.File;
2 import java.io.FileOutputStream;
3 import java.io.IOException;
4 import java.io.OutputStreamWriter;
5 import java.io.Writer;
6
7 import org.apache.pdfbox.pdmodel.PDDocument;
8 import org.apache.pdfbox.util.PDFTextStripper;
9
10 public class PdfToWord{
11 public static void main(String[] args){
12 try{
13 String pdfFile = "C:/xxxxx.pdf";
14 PDDocument doc = PDDocument.load(new File(pdfFile));
15 int pagenumber = doc.getNumberOfPages();
16 pdfFile = pdfFile.substring(0, pdfFile.lastIndexOf("."));
17 String fileName = pdfFile + ".doc";
18 File file = new File(fileName);
19 if (!file.exists()){
20 file.createNewFile();
21 }
22 FileOutputStream fos = new FileOutputStream(fileName);
23 Writer writer = new OutputStreamWriter(fos, "UTF-8");
24 PDFTextStripper stripper = new PDFTextStripper();
25 stripper.setSortByPosition(true);// 排序
26 stripper.setStartPage(1);// 设置转换的开始页
27 stripper.setEndPage(pagenumber);// 设置转换的结束页
28 stripper.writeText(doc, writer);
29 writer.close();
30 doc.close();
31 System.out.println("pdf转换word成功!");
32 }
33 catch (IOException e){
34 e.printStackTrace();
35 }
36 }
37 }
代码所用到的jar包 pdfbox-1.8.2.jar 另行下载