<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.sy.parse</groupId>
<artifactId>parse-word</artifactId>
<version>1.0-SNAPSHOT</version>
<packaging>jar</packaging>
<name>parse-word</name>
<url>http://www.example.com</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.17</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.17</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml-schemas -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>3.17</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-scratchpad -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>3.17</version>
</dependency>
</dependencies>
<build>
<finalName>parse-word</finalName>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<!-- JAR Maven 管理-->
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>3.1.0</version>
<configuration>
<archive>
<manifest>
<!-- 配置主程序 java -jar 默认Class -->
<addClasspath>true</addClasspath>
<classpathPrefix>lib/</classpathPrefix>
<!--Main程序入口-->
<mainClass>com.sy.parse.App</mainClass>
</manifest>
</archive>
</configuration>
</plugin>
<!-- maven 打包集成插件 -->
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
<configuration>
<descriptorRefs>
<!-- 将依赖一起打包到 JAR -->
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<archive>
<manifest>
<!-- 配置主程序 java -jar 默认Class -->
<addClasspath>true</addClasspath>
<classpathPrefix>lib/</classpathPrefix>
<!--Main程序入口-->
<mainClass>com.sy.parse.App</mainClass>
</manifest>
</archive>
</configuration>
</plugin>
</plugins>
</build>
</project>
package com.sy.parse;
import com.sy.parse.utils.ReadExcel;
import com.sy.parse.utils.ReadWord;
import java.util.Arrays;
import java.util.List;
/**
* 读excel word 存入文本中
*
* @author Alice on 2021-08-20
*/
public class App {
public static void main(String[] args) throws Exception {
//step1.读取excel
List<String> excelName = Arrays.asList("1");
for (int i = 0; i < excelName.size(); i++) {
String fileName = excelName.get(i);
String path = "/excel/" + fileName + ".xls";
ReadExcel.readExcel(path, fileName);
}
//step2.读取word
List<String> wordName = Arrays.asList("a", "b");
for (int i = 0; i < wordName.size(); i++) {
String fileName = wordName.get(i);
String path = "/word/" + fileName + ".doc";
ReadWord.readWord(path, fileName);
}
}
}
package com.sy.parse.utils;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import java.io.InputStream;
/**
* 读excel
*
* @author Alice on 2021-08-20
*/
public class ReadExcel {
public static final String LINE = System.getProperty("line.separator");
public static void readExcel(String path, String fileName) throws Exception {
String filePath = ReadExcel.class.getResource(path).getPath();
System.out.println("================");
System.out.println(filePath);
if (filePath.endsWith(".xls")) {
//InputStream is = new FileInputStream(new File(filePath));
InputStream is = ReadExcel.class.getResourceAsStream(path);
HSSFWorkbook sheets = new HSSFWorkbook(is);
int sheetSize = sheets.getNumberOfSheets();
String content = "";
for (int i = 0; i < sheetSize; i++) {
HSSFSheet sheet = sheets.getSheetAt(i);
//int rows = sheet.getPhysicalNumberOfRows();
int rows = sheet.getLastRowNum();
for (int j = 0; j <= rows; j++) {
HSSFRow row = sheet.getRow(j);
if (row != null) {
int cells = row.getLastCellNum();
String con = "";
for (int k = 0; k <= cells; k++) {
if (row.getCell(k) != null) {
String cell = row.getCell(k).toString();
con += cell + " ";
}
}
System.out.println(con);
content += con + LINE;
}
}
}
WriteFile.writeFile(fileName, content);
sheets.close();
} else if (filePath.endsWith(".xlsx")) {
//InputStream is = new FileInputStream(new File(filePath));
InputStream is = ReadExcel.class.getResourceAsStream(path);
XSSFWorkbook sheets = new XSSFWorkbook(is);
int sheetSize = sheets.getNumberOfSheets();
String content = "";
for (int i = 0; i < sheetSize; i++) {
XSSFSheet sheet = sheets.getSheetAt(i);
int rows = sheet.getLastRowNum();
for (int j = 0; j <= rows; j++) {
XSSFRow row = sheet.getRow(j);
if (row != null) {
int cells = row.getLastCellNum();
String con = "";
for (int k = 0; k <= cells; k++) {
if (row.getCell(k) != null) {
String cell = row.getCell(k).toString();
con += cell + " ";
}
}
System.out.println(con);
content += con + LINE;
}
}
}
WriteFile.writeFile(fileName, content);
sheets.close();
} else {
System.out.println("此文件不是excel文件.");
}
}
}
package com.sy.parse.utils;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import java.io.InputStream;
/**
* 读word
*
* @author Alice on 2021-08-20
*/
public class ReadWord {
public static void readWord(String path, String fileName) throws Exception {
String filePath = ReadWord.class.getResource(path).getPath();
String result = "";
if (filePath.endsWith(".doc")) {
//InputStream is = new FileInputStream(new File(filePath));
InputStream is = ReadExcel.class.getResourceAsStream(path);
WordExtractor re = new WordExtractor(is);
result = re.getText();
re.close();
} else if (filePath.endsWith(".docx")) {
OPCPackage opcPackage = POIXMLDocument.openPackage(filePath);
POIXMLTextExtractor extractor = new XWPFWordExtractor(opcPackage);
result = extractor.getText();
extractor.close();
} else {
System.out.println("此文件不是word文件.");
}
System.out.println("============================");
System.out.println(result);
WriteFile.writeFile(fileName, result);
}
}
package com.sy.parse.utils;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
public class WriteFile {
public static void writeFile(String fileName, String content) throws Exception {
String fileDir = "/home/bj-word/";
File file = new File(fileDir);
if (!file.exists() && !file.isDirectory()) {
file.mkdirs();
}
String storeFilePath = fileDir + fileName + ".txt";
bufferedWrite(storeFilePath, content);
}
public static void fileWrite(String path, String content) throws Exception {
try (FileWriter fw = new FileWriter(path)) {
fw.append(content);
}
}
public static void bufferedWrite(String path, String content) throws Exception {
try (BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(path))) {
bufferedWriter.write(content);
}
}
}