Java 解析 Word

717 阅读1分钟

添加maven依赖

<dependency>
  <groupId>org.apache.poi</groupId>
  <artifactId>poi-ooxml</artifactId>
  <version>4.1.2</version>
</dependency>

1. 读取word内容

String fileName = "xtaq.docx";

try (XWPFDocument doc = new XWPFDocument(Files.newInputStream(Paths.get(fileName)))) {

   //读取word中所有的文字,包含表格
  readAllText(doc);

} catch (Exception e) {
  e.printStackTrace();
}
private void readAllText(XWPFDocument doc) {
  XWPFWordExtractor xwpfWordExtractor = new XWPFWordExtractor(doc);
  String docText = xwpfWordExtractor.getText();
  System.out.println(docText);
}

2. 解析出所有段落

List<XWPFParagraph> xwpfParagraphList = doc.getParagraphs();
for (XWPFParagraph xwpfParagraph : xwpfParagraphList) {
  String style = xwpfParagraph.getStyle();
  if (style != null) {
    System.out.println(xwpfParagraph.getText());
  }
}