【Java】Word题库解析2
初稿见:https://www.cnblogs.com/mindzone/p/18362194
一、新增需求
在原稿题库之后,还需要生成一份纯题目 + 纯答案
答案放在开头,题目里面去掉答案

在检查题型时还发现部分内容略有区别:

所以在判断是否为答案的时候需要兼容这种答案
二、关于老版本支持
doc2000版需要追加一个scratchpad的库支持才行
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>5.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>5.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>5.0.0</version>
</dependency>
需要导入的资源:
import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.usermodel.CharacterProperties; import org.apache.poi.hwpf.usermodel.CharacterRun; import org.apache.poi.hwpf.usermodel.Paragraph; import org.apache.poi.hwpf.usermodel.Range;
三、工具类实现
package cn.cloud9.word;
import com.alibaba.druid.util.StringUtils;
import lombok.*;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.CharacterProperties;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import java.io.File;
import java.io.FileInputStream;
import java.util.*;
import java.util.stream.Collectors;
public class ExamUtil {
private static final List<String> ANSWER_PREFIX = Arrays.asList("答案:", "参考答案:");
private static final List<String> OPTIONS = Arrays.asList("A", "B", "C", "D", "E", "F", "G");;
private static final String NUMBER_REGEXP = "^[1-9]\\d*";
private static final String SPLIT_IDENTIFY = "\\.";
@Data
@AllArgsConstructor
@NoArgsConstructor
@Builder
@ToString
public static final class RoughItem {
public int serial;
public String exCode;
public String content;
}
@Data
@AllArgsConstructor
@NoArgsConstructor
@Builder
@ToString
public static final class ExamItem {
public String no;
public String title;
public String type;
public String answer;
public String explain;
}
@SneakyThrows
public static XWPFDocument getWordFileDocxType(String path) {
FileInputStream fileInputStream = new FileInputStream(path);
XWPFDocument xwpfDocument = new XWPFDocument(fileInputStream);
fileInputStream.close();
return xwpfDocument;
}
@SneakyThrows
public static HWPFDocument getWordFileDocType(String path) {
FileInputStream fileInputStream = new FileInputStream(path);
HWPFDocument hwpfDocument = new HWPFDocument(fileInputStream);
fileInputStream.close();
return hwpfDocument;
}
@SneakyThrows
public static void main(String[] args) {
int examCount = 0;
String exCode = "";
List<RoughItem> roughItems = new ArrayList<>();
CharacterProperties props = new CharacterProperties();
props.setFontSize(32);
String filePath = "C:\\Users\\Administrator\\Documents\\Tencent Files\\1791255334\\FileRecv\\答案 (增加 1301-2100共 800)中级保育师增加题库 .doc";
String newFilePath = "C:\\Users\\Administrator\\Documents\\Tencent Files\\1791255334\\FileRecv\\答案 (增加 1301-2100共 800)中级保育师增加题库 " + new Date().getTime() + ".doc";
HWPFDocument wordFile = getWordFileDocType(filePath);
Range range = wordFile.getRange();
int numParagraphs = range.numParagraphs();
for (int i = 0; i < numParagraphs; i++) {
Paragraph paragraph = range.getParagraph(i);
String text = paragraph.text();
if (StringUtils.isEmpty(text)) continue;
/* 按点号分割字符串 */
String[] split = text.split(SPLIT_IDENTIFY);
/* 首个字符串是否匹配数值序号 */
boolean isExamNo = split[0].matches(NUMBER_REGEXP);
/* 是否为答案 */
boolean isAnswer = text.startsWith(ANSWER_PREFIX.get(0)) || text.startsWith(ANSWER_PREFIX.get(1));
/* 是否为选项 */
boolean isOptions = OPTIONS.contains(split[0]);
/* 当判断为题目序列时,迭代计数变量,是一道新的题目 */
if (isExamNo) {
++ examCount;
exCode = split[0];
ExamUtil.RoughItem roughItem = ExamUtil.RoughItem.builder()
.serial(examCount)
.content(text)
.exCode(exCode)
.build() ;
roughItems.add(roughItem);
} else if (isAnswer || isOptions) {
/* 反之不是题目序列,而是选项,答案,解析时,保存起来 */
RoughItem roughItem = RoughItem.builder()
.serial(examCount)
.content(text)
.exCode(exCode)
.build() ;
roughItems.add(roughItem);
}
/* 答案部分是一个完整段落,所以对其删除即可 */
if (isAnswer) paragraph.delete();
}
List<ExamItem> examItems = new ArrayList<>();
/* 收集完成后使用序列进行分组处理 */
Map<Integer, List<RoughItem>> listMap = roughItems.stream().collect(Collectors.groupingBy(RoughItem::getSerial));
listMap.forEach((k, v) -> {
/* 第一项一定是题目 */
RoughItem titleItem = v.get(0);
String content = titleItem.getContent();
content = content.replaceAll("\r", "");
/* 处理集合得到答案和解析,解析不一定存在,所以orElse设置空串默认值 */
String answer = v.stream()
.map(RoughItem::getContent)
.filter(xContent -> xContent.startsWith(ANSWER_PREFIX.get(0)) || xContent.startsWith(ANSWER_PREFIX.get(1)))
.map(x -> x.replaceAll(ANSWER_PREFIX.get(1), "").replaceAll(ANSWER_PREFIX.get(0), ""))
.findFirst()
.orElse("");
answer = answer.replaceAll("\r", "");
/* 包装成题目对象后给调用者消费 */
ExamItem build = ExamItem
.builder()
.no(titleItem.getExCode())
.title(content)
.type(null)
.answer(answer)
.explain(null)
.build();
examItems.add(build);
});
examItems.forEach(System.out::println);
/* 创建一行para,写N个答案在一行中 rowSize = N */
int examTotal = examItems.size();
int rowSize = 10;
boolean isComplete = examTotal % rowSize == 0;
int totalRow = examTotal / rowSize;
totalRow = isComplete ? totalRow : totalRow + 1;
/* 因为用的是insertBefore方式插入,所以需要反着翻页写入 */
for (int currentRow = totalRow; currentRow >= 1; currentRow--) {
int begin = (currentRow - 1) * rowSize;
int end = (currentRow * rowSize) - 1;
StringBuilder rowText = new StringBuilder();
for (int exIdx = begin; exIdx <= end; exIdx++) {
if (exIdx < 0) break;
else if (exIdx >= examTotal) break;
ExamItem examItem = examItems.get(exIdx);
String no = examItem.getNo();
String answer = examItem.getAnswer();
rowText.append(no).append(".").append(answer).append(" ");
}
rowText.append("\r");
CharacterRun characterRun = range.insertBefore(rowText.toString());
}
wordFile.write(new File(newFilePath));
}
}
四、答案嵌套在题目里的处理
选项嵌套在选项,题目中,需要再写逻辑判断



为了处理这种类型的题库文档,单开了一个新的工具类处理
细节部分看代码实现就行
package cn.cloud9.word;
import com.alibaba.druid.util.StringUtils;
import lombok.*;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.CharacterProperties;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import java.io.File;
import java.io.FileInputStream;
import java.util.*;
import java.util.stream.Collectors;
public class ExamUtil2 {
// private static final List<String> ANSWER_PREFIX = Arrays.asList("答案:", "参考答案:");
private static final List<String> ANSWER_IDENT = Arrays.asList("(正确答案)", "【正确答案】");
private static final List<String> ANSWER_IDENT2 = Arrays.asList("×", "√");
private static final List<String> ANSWER_IDENT3 = Arrays.asList("A", "B", "C", "D", "E", "F", "G");;
private static final List<String> OPTIONS = Arrays.asList("A", "B", "C", "D", "E", "F", "G");;
private static final List<String> OPTIONS2 = Arrays.asList("A、", "B、", "C、", "D、", "E、", "F、", "G、");
private static final String NUMBER_REGEXP = "^[1-9]\\d*";
private static final String SPLIT_IDENTIFY = "\\.";
@Data
@AllArgsConstructor
@NoArgsConstructor
@Builder
@ToString
public static final class RoughItem {
public int serial;
public String exCode;
public String content;
}
@Data
@AllArgsConstructor
@NoArgsConstructor
@Builder
@ToString
public static final class ExamItem {
public String no;
public String title;
public String type;
public String answer;
public String explain;
}
@SneakyThrows
public static XWPFDocument getWordFileDocxType(String path) {
FileInputStream fileInputStream = new FileInputStream(path);
XWPFDocument xwpfDocument = new XWPFDocument(fileInputStream);
fileInputStream.close();
return xwpfDocument;
}
@SneakyThrows
public static HWPFDocument getWordFileDocType(String path) {
FileInputStream fileInputStream = new FileInputStream(path);
HWPFDocument hwpfDocument = new HWPFDocument(fileInputStream);
fileInputStream.close();
return hwpfDocument;
}
@SneakyThrows
public static void main(String[] args) {
int examCount = 0;
String exCode = "";
List<RoughItem> roughItems = new ArrayList<>();
CharacterProperties props = new CharacterProperties();
props.setFontSize(32);
String filePath = "C:\\Users\\Administrator\\Documents\\Tencent Files\\1791255334\\FileRecv\\11 ( )高级保育师理论题库增加.doc";
String newFilePath = "C:\\Users\\Administrator\\Documents\\Tencent Files\\1791255334\\FileRecv\\11 ( )高级保育师理论题库增加- " + new Date().getTime() + ".doc";
HWPFDocument wordFile = getWordFileDocType(filePath);
Range range = wordFile.getRange();
int numParagraphs = range.numParagraphs();
for (int i = 0; i < numParagraphs; i++) {
Paragraph paragraph = range.getParagraph(i);
String text = paragraph.text();
if (StringUtils.isEmpty(text)) continue;
/* 按点号分割字符串 */
String[] split = text.split(SPLIT_IDENTIFY);
/* 首个字符串是否匹配数值序号 */
boolean isExamNo = split[0].matches(NUMBER_REGEXP);
/* 是否为选项 */
boolean isOptions = OPTIONS.contains(split[0]) || OPTIONS2.stream().anyMatch(text::contains);
/* 是否为答案 */
boolean rightOption = ANSWER_IDENT.stream().anyMatch(text::contains) && isOptions; /* 答案在选项中 */
boolean rightOption2 = ANSWER_IDENT2.stream().anyMatch(text::contains) && isExamNo; /* 答案填放在题目里面 */
boolean rightOption3 = ANSWER_IDENT3.stream().anyMatch(text::contains) && isExamNo; /* 答案填放在题目里面 */
boolean isAnswer = rightOption || rightOption2 || rightOption3;
/* 当判断为题目序列时,迭代计数变量,是一道新的题目 */
if (isExamNo) {
++ examCount;
exCode = split[0];
ExamUtil2.RoughItem roughItem = ExamUtil2.RoughItem.builder()
.serial(examCount)
.content(text)
.exCode(exCode)
.build() ;
roughItems.add(roughItem);
}
if (isAnswer) {
String correctOption = "";
if (rightOption) {
for (String answer : ANSWER_IDENT) text = text.replaceAll(answer, "");
paragraph.replaceText(text, false);
correctOption = String.valueOf(text.charAt(0));
}
if (rightOption2) {
correctOption = text.contains(ANSWER_IDENT2.get(0)) ? ANSWER_IDENT2.get(0) : ANSWER_IDENT2.get(1);
for (String answer : ANSWER_IDENT2) text = text.replaceAll(answer, "");
paragraph.replaceText(text, false);
}
if (rightOption3) {
for (String option : ANSWER_IDENT3) {
if (text.contains(option)) {
correctOption = option;
text = text.replaceAll(option, "");
break;
}
}
paragraph.replaceText(text, false);
}
RoughItem roughItem = RoughItem.builder()
.serial(examCount)
.content(correctOption)
.exCode(exCode)
.build() ;
roughItems.add(roughItem);
}
}
List<ExamItem> examItems = new ArrayList<>();
/* 收集完成后使用序列进行分组处理 */
Map<Integer, List<RoughItem>> listMap = roughItems.stream().collect(Collectors.groupingBy(RoughItem::getSerial));
listMap.forEach((k, v) -> {
if (v.size() == 1) return;
/* 第一项一定是题目 */
RoughItem titleItem = v.get(0);
String content = titleItem.getContent();
content = content.replaceAll("\r", "");
/* 处理集合得到答案和解析,解析不一定存在,所以orElse设置空串默认值 */
String answer = v.get(1).content;
answer = answer.replaceAll("\r", "");
/* 包装成题目对象后给调用者消费 */
ExamItem build = ExamItem
.builder()
.no(titleItem.getExCode())
.title(content)
.type(null)
.answer(answer)
.explain(null)
.build();
examItems.add(build);
});
examItems.forEach(System.out::println);
/* 创建一行para,写10个答案上来 */
int examTotal = examItems.size();
int rowSize = 10;
boolean isComplete = examTotal % rowSize == 0;
int totalRow = examTotal / rowSize;
totalRow = isComplete ? totalRow : totalRow + 1;
for (int currentRow = totalRow; currentRow >= 1; currentRow--) {
int begin = (currentRow - 1) * rowSize;
int end = (currentRow * rowSize) - 1;
StringBuilder rowText = new StringBuilder();
for (int exIdx = begin; exIdx <= end; exIdx++) {
if (exIdx < 0) break;
else if (exIdx >= examTotal) break;
ExamItem examItem = examItems.get(exIdx);
String no = examItem.getNo();
String answer = examItem.getAnswer();
rowText.append(no).append(".").append(answer).append(" ");
}
rowText.append("\r");
CharacterRun characterRun = range.insertBefore(rowText.toString());
}
wordFile.write(new File(newFilePath));
}
}

浙公网安备 33010602011771号