【Java】Word题库解析
一、需求场景:
一共四种题型,单选、多选、判断、简答
题目构成要素:题目、选项、答案、解析
一种题型一个Word文档存放,需要把这些题目写入DB维护
二、题库格式:
单选案例:

多选案例:

判断案例:


可以看出,单选,多选和判断都是一样的
- 题目有数字和点开头,并设置了标题样式
- 选项由ABCDEF和点组成
- 每一个答案的前缀固定有【答案:】
- 每一个解析的前缀固定有【解析:】
简答题的部分组成没有选项,只有题目 + 答案
三、解析实现
依赖poi实现,mvn坐标:
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.2</version>
</dependency>
文档读取:
@SneakyThrows
public static XWPFDocument getWordFile(String path) {
FileInputStream fileInputStream = new FileInputStream(path);
XWPFDocument xwpfDocument = new XWPFDocument(fileInputStream);
fileInputStream.close();
return xwpfDocument;
}
获取所有段落:
List<XWPFParagraph> paragraphs = xwpfDocument.getParagraphs();
根据格式得知,每一个题目和题型都是一个段落,选项,答案,解析也是段落
相互之间没有关联性,和上一次的HTML报告相似
但是每个标题存在一个序号数前缀,使用一个迭代值进行计数
循环至下一个带序号数前缀的段落对象时,就是下一道题目了
为了保存每次读取的段落,需要创建一个原始的Item类
序列值用来分组管理,把题目、选项、答案、解析合并起来
@Data
@AllArgsConstructor
@NoArgsConstructor
@Builder
@ToString
public static final class RoughItem {
public int serial;
public String content;
}
最终要保存成一个题目对象
题目对象只有四个属性,题目、题型、答案、解析
@Data
@AllArgsConstructor
@NoArgsConstructor
@Builder
@ToString
public static final class ExamItem {
public String title;
public String type;
public String answer;
public String explain;
}
完整工具类实现:
package jnpf.util;
import lombok.*;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.function.Consumer;
import java.util.stream.Collectors;
public class DbcpExamUtil {
private static final List<String> OPTIONS = Arrays.asList("A", "B", "C", "D", "E", "F", "G");;
private static final String ANSWER_PREFIX = "答案:";
private static final String EXPLAIN_PREFIX = "解析:";
private static final String NUMBER_REGEXP = "^[1-9]\\d*";
private static final String TYPE1_RADIO = "0";
private static final String TYPE2_CHECKBOX = "1";
private static final String TYPE3_TRUE_OR_FASE = "2";
private static final String TYPE4_SHORT_QA = "3";
private static final String SPLIT_IDENTIFY = "\\.";
@Data
@AllArgsConstructor
@NoArgsConstructor
@Builder
@ToString
public static final class RoughItem {
public int serial;
public String content;
}
@Data
@AllArgsConstructor
@NoArgsConstructor
@Builder
@ToString
public static final class ExamItem {
public String title;
public String type;
public String answer;
public String explain;
}
@SneakyThrows
public static XWPFDocument getWordFile(String path) {
FileInputStream fileInputStream = new FileInputStream(path);
XWPFDocument xwpfDocument = new XWPFDocument(fileInputStream);
fileInputStream.close();
return xwpfDocument;
}
@SneakyThrows
public static void radioTypeRead(String path, Consumer<ExamItem> consumer) {
XWPFDocument xwpfDocument = getWordFile(path);
int examCount = 0;
List<DbcpExamUtil.RoughItem> roughItems = new ArrayList<>();
List<XWPFParagraph> paragraphs = xwpfDocument.getParagraphs();
for (XWPFParagraph xwpfParagraph : paragraphs) {
String text = xwpfParagraph.getText();
/* 无内容段落跳过 */
if (StringUtils.isBlank(text)) continue;
/* 按点号分割字符串 */
String[] split = text.split(SPLIT_IDENTIFY);
/* 首个字符串是否匹配数值序号 */
boolean isExamNo = split[0].matches(NUMBER_REGEXP);
/* 是否为选项 */
boolean isOptions = OPTIONS.contains(split[0]);
/* 是否为答案 */
boolean isAnswer = text.startsWith(ANSWER_PREFIX);
/* 是否为解析 */
boolean isExplain = text.startsWith(EXPLAIN_PREFIX);
/* 当判断为题目序列时,迭代计数变量,是一道新的题目 */
if (isExamNo) {
++ examCount;
DbcpExamUtil.RoughItem roughItem = DbcpExamUtil.RoughItem.builder().serial(examCount).content(text).build() ;
roughItems.add(roughItem);
} else if (isOptions || isAnswer || isExplain) {
/* 反之不是题目序列,而是选项,答案,解析时,保存起来 */
DbcpExamUtil.RoughItem roughItem = DbcpExamUtil.RoughItem.builder().serial(examCount).content(text).build() ;
roughItems.add(roughItem);
}
}
/* 收集完成后使用序列进行分组处理 */
Map<Integer, List<RoughItem>> listMap = roughItems.stream().collect(Collectors.groupingBy(DbcpExamUtil.RoughItem::getSerial));
listMap.forEach((k, v) -> {
/* 第一项一定是题目 */
RoughItem titleItem = v.get(0);
String content = titleItem.getContent();
/* 将选项和题目合并为题目 */
String collect = v.parallelStream().map(RoughItem::getContent).filter(xContent -> OPTIONS.contains(xContent.split("\\.")[0])).collect(Collectors.joining("\n"));
content = content + "\n" + collect;
/* 处理集合得到答案和解析,解析不一定存在,所以orElse设置空串默认值 */
String answer = v.parallelStream().map(RoughItem::getContent).filter(xContent -> xContent.startsWith(ANSWER_PREFIX)).map(x -> x.replace(ANSWER_PREFIX, "")).findFirst().orElse("");
String explain = v.parallelStream().map(RoughItem::getContent).filter(xContent -> xContent.startsWith(EXPLAIN_PREFIX)).map(x -> x.replace(EXPLAIN_PREFIX, "")).findFirst().orElse("");
/* 包装成题目对象后给调用者消费 */
consumer.accept(ExamItem
.builder()
.title(content)
.type(TYPE1_RADIO)
.answer(answer)
.explain(explain)
.build());
});
}
@SneakyThrows
public static void checkBoxTypeRead(String path, Consumer<ExamItem> consumer) {
int examCount = 0;
List<DbcpExamUtil.RoughItem> roughItems = new ArrayList<>();
XWPFDocument xwpfDocument = getWordFile(path);
List<XWPFParagraph> paragraphs = xwpfDocument.getParagraphs();
for (XWPFParagraph xwpfParagraph : paragraphs) {
String text = xwpfParagraph.getText();
if (StringUtils.isBlank(text)) continue;
String[] split = text.split(SPLIT_IDENTIFY);
boolean isExamNo = split[0].matches(NUMBER_REGEXP);
boolean isOptions = OPTIONS.contains(split[0]);
boolean isAnswer = text.startsWith(ANSWER_PREFIX);
boolean isExplain = text.startsWith(EXPLAIN_PREFIX);
if (isExamNo) {
++ examCount;
DbcpExamUtil.RoughItem roughItem = DbcpExamUtil.RoughItem.builder().serial(examCount).content(text).build() ;
roughItems.add(roughItem);
} else if (isOptions || isAnswer || isExplain) {
DbcpExamUtil.RoughItem roughItem = DbcpExamUtil.RoughItem.builder().serial(examCount).content(text).build() ;
roughItems.add(roughItem);
}
}
System.out.println(examCount);
Map<Integer, List<DbcpExamUtil.RoughItem>> listMap = roughItems.stream().collect(Collectors.groupingBy(DbcpExamUtil.RoughItem::getSerial));
listMap.forEach((k, v) -> {
RoughItem titleItem = v.get(0);
String content = titleItem.getContent();
String collect = v.parallelStream().map(RoughItem::getContent).filter(xContent -> OPTIONS.contains(xContent.split("\\.")[0])).collect(Collectors.joining("\n"));
content = content + "\n" + collect;
String answer = v.parallelStream().map(RoughItem::getContent).filter(xContent -> xContent.startsWith(ANSWER_PREFIX)).map(x -> x.replace(ANSWER_PREFIX, "")).findFirst().orElse("");
String explain = v.parallelStream().map(RoughItem::getContent).filter(xContent -> xContent.startsWith(EXPLAIN_PREFIX)).map(x -> x.replace(EXPLAIN_PREFIX, "")).findFirst().orElse("");
consumer.accept(ExamItem
.builder()
.title(content)
.type(TYPE2_CHECKBOX)
.answer(answer)
.explain(explain)
.build());
});
}
@SneakyThrows
public static void trueOrFalseTypeRead(String path, Consumer<ExamItem> consumer) {
int examCount = 0;
List<DbcpExamUtil.RoughItem> roughItems = new ArrayList<>();
XWPFDocument xwpfDocument = getWordFile(path);
List<XWPFParagraph> paragraphs = xwpfDocument.getParagraphs();
for (XWPFParagraph xwpfParagraph : paragraphs) {
String text = xwpfParagraph.getText();
if (StringUtils.isBlank(text)) continue;
String[] split = text.split(SPLIT_IDENTIFY);
boolean isExamNo = split[0].matches(NUMBER_REGEXP);
boolean isOptions = OPTIONS.contains(split[0]);
boolean isAnswer = text.startsWith(ANSWER_PREFIX);
boolean isExplain = text.startsWith(EXPLAIN_PREFIX);
if (isExamNo) {
++ examCount;
DbcpExamUtil.RoughItem roughItem = DbcpExamUtil.RoughItem.builder().serial(examCount).content(text).build() ;
roughItems.add(roughItem);
} else if (isOptions || isAnswer || isExplain) {
DbcpExamUtil.RoughItem roughItem = DbcpExamUtil.RoughItem.builder().serial(examCount).content(text).build() ;
roughItems.add(roughItem);
}
}
System.out.println(examCount);
Map<Integer, List<DbcpExamUtil.RoughItem>> listMap = roughItems.stream().collect(Collectors.groupingBy(DbcpExamUtil.RoughItem::getSerial));
listMap.forEach((k, v) -> {
RoughItem titleItem = v.get(0);
String content = titleItem.getContent();
String collect = v.parallelStream().map(RoughItem::getContent).filter(xContent -> OPTIONS.contains(xContent.split("\\.")[0])).collect(Collectors.joining("\n"));
content = content + "\n" + collect;
String answer = v.parallelStream().map(RoughItem::getContent).filter(xContent -> xContent.startsWith(ANSWER_PREFIX)).map(x -> x.replace(ANSWER_PREFIX, "")).findFirst().orElse("");
String explain = v.parallelStream().map(RoughItem::getContent).filter(xContent -> xContent.startsWith(EXPLAIN_PREFIX)).map(x -> x.replace(EXPLAIN_PREFIX, "")).findFirst().orElse("");
consumer.accept(ExamItem
.builder()
.title(content)
.type(TYPE3_TRUE_OR_FASE)
.answer(answer)
.explain(explain)
.build());
});
}
public static void shortQaTypeRead(String path, Consumer<ExamItem> consumer) {
int examCount = 0;
List<DbcpExamUtil.RoughItem> roughItems = new ArrayList<>();
XWPFDocument xwpfDocument = getWordFile(path);
List<XWPFParagraph> paragraphs = xwpfDocument.getParagraphs();
for (XWPFParagraph xwpfParagraph : paragraphs) {
String text = xwpfParagraph.getText();
if (StringUtils.isBlank(text)) continue;
String style = xwpfParagraph.getStyle();
boolean isTittle = StringUtils.isNotBlank(style);
if (isTittle) {
++ examCount;
DbcpExamUtil.RoughItem roughItem = DbcpExamUtil.RoughItem.builder().serial(examCount).content(text).build() ;
roughItems.add(roughItem);
} else {
DbcpExamUtil.RoughItem roughItem = DbcpExamUtil.RoughItem.builder().serial(examCount).content(text).build() ;
roughItems.add(roughItem);
}
}
Map<Integer, List<DbcpExamUtil.RoughItem>> listMap = roughItems.stream().collect(Collectors.groupingBy(DbcpExamUtil.RoughItem::getSerial));
listMap.forEach((k, v) -> {
RoughItem titleItem = v.get(0);
String content = titleItem.getContent();
String answer = v.stream().skip(1).map(RoughItem::getContent).collect(Collectors.joining("\n"));
consumer.accept(ExamItem
.builder()
.title(content)
.type(TYPE4_SHORT_QA)
.answer(answer)
.explain("")
.build());
});
}
}
调用工具方法:
@Override
public void qaImport() {
String T1 = "D:\\exam-repo\\单选题-答案.docx";
String T2 = "D:\\exam-repo\\多选题-答案.docx";
String T3 = "D:\\exam-repo\\判断题-答案.docx";
String T4 = "D:\\exam-repo\\简答题.docx";
DbcpExamUtil.radioTypeRead(T1, ei -> {
baseMapper.insert(TrnExQabank.builder()
.qaSubject(ei.getTitle())
.qaType(ei.getType())
.qaAnswer(ei.getAnswer())
.qaAnaly(ei.getExplain())
.build());
});
DbcpExamUtil.checkBoxTypeRead(T2, ei -> {
baseMapper.insert(TrnExQabank.builder()
.qaSubject(ei.getTitle())
.qaType(ei.getType())
.qaAnswer(ei.getAnswer())
.qaAnaly(ei.getExplain())
.build());
});
DbcpExamUtil.trueOrFalseTypeRead(T3, ei -> {
baseMapper.insert(TrnExQabank.builder()
.qaSubject(ei.getTitle())
.qaType(ei.getType())
.qaAnswer(ei.getAnswer())
.qaAnaly(ei.getExplain())
.build());
});
DbcpExamUtil.shortQaTypeRead(T4, ei -> {
baseMapper.insert(TrnExQabank.builder()
.qaSubject(ei.getTitle())
.qaType(ei.getType())
.qaAnswer(ei.getAnswer())
.qaAnaly(ei.getExplain())
.build());
});
}

浙公网安备 33010602011771号