word或Excel程序是以一种COM组件的形式存在的。如果能从Java中调用word的COM组件,就能够使用它的方法获取Word文档中的文本信息,目前网上也有很多提供这种操作的工具。使用jacob前应确保本机安装有Word的应用程序,否则无法建立Java-COM桥,进而无法解析。
jacob的下载地址为http://sourceforge.net/project/showfiles.php?group_id=109543&package_id=118368
下载到本机后解压缩。将jacob.jar复制到项目目录,增加到Java Build Path,然后将jacob.dll文件放入系统盘的system32文件夹下。如果没有放的话,会出现"no jacob-1.14.3-x86 in java.library.path"的异常。
1
package test;
2
3
import com.jacob.activeX.ActiveXComponent;
4
import com.jacob.com.Dispatch;
5
import com.jacob.com.Variant;
6
7
public class WordReader {
8
9
public static void extractDoc(String inputFile, String outputFile){
10
boolean flag = false;
11
//打开word应用程序,生成一个ActivexComponent对象
12
ActiveXComponent app = new ActiveXComponent("Word.Application");
13
try{
14
//设置Word不可见
15
app.setProperty("Visible", new Variant(false));
16
//打开word文件
17
Dispatch doc1 = app.getProperty("Documents").toDispatch();
18
Dispatch doc2 = Dispatch.invoke(doc1, "Open", Dispatch.Method, new Object[]{ inputFile, new Variant(false),new Variant(true)},new int[1]).toDispatch();
19
//作文txt格式保存到临时文件
20
Dispatch.invoke(doc2, "SaveAs", Dispatch.Method, new Object[]{outputFile, new Variant(7)}, new int[1]);
21
//关闭word
22
Variant f = new Variant(false);
23
Dispatch.call(doc2, "Close", f);
24
flag = true;
25
}catch(Exception e){
26
e.printStackTrace();
27
}finally{
28
app.invoke("Quit", new Variant[]{});
29
}
30
if(flag == true){
31
System.out.println("Transformed Successfully!");
32
}else{
33
System.out.println("Transform Failed");
34
}
35
36
}
37
38
/**
39
* @param args
40
*/
41
public static void main(String[] args) {
42
//对测试文本进行处理
43
WordReader.extractDoc("d:/index/网点地址.doc", "d:/index/网点地址.txt");
44
}
45
}
46
package test;2

3
import com.jacob.activeX.ActiveXComponent;4
import com.jacob.com.Dispatch;5
import com.jacob.com.Variant;6

7
public class WordReader {8

9
public static void extractDoc(String inputFile, String outputFile){10
boolean flag = false;11
//打开word应用程序,生成一个ActivexComponent对象12
ActiveXComponent app = new ActiveXComponent("Word.Application");13
try{14
//设置Word不可见15
app.setProperty("Visible", new Variant(false));16
//打开word文件17
Dispatch doc1 = app.getProperty("Documents").toDispatch();18
Dispatch doc2 = Dispatch.invoke(doc1, "Open", Dispatch.Method, new Object[]{ inputFile, new Variant(false),new Variant(true)},new int[1]).toDispatch();19
//作文txt格式保存到临时文件20
Dispatch.invoke(doc2, "SaveAs", Dispatch.Method, new Object[]{outputFile, new Variant(7)}, new int[1]);21
//关闭word22
Variant f = new Variant(false);23
Dispatch.call(doc2, "Close", f);24
flag = true;25
}catch(Exception e){26
e.printStackTrace();27
}finally{28
app.invoke("Quit", new Variant[]{});29
}30
if(flag == true){31
System.out.println("Transformed Successfully!");32
}else{33
System.out.println("Transform Failed");34
}35
36
}37
38
/**39
* @param args40
*/41
public static void main(String[] args) {42
//对测试文本进行处理43
WordReader.extractDoc("d:/index/网点地址.doc", "d:/index/网点地址.txt");44
}45
}46



浙公网安备 33010602011771号