原理
使用 Apache POI 的 HWPFDocument 类读取 Doc 文件, 使用 WordToHtmlConverter 转换成 HTML, 设置 PictureManager 处理图片 src(以下示例是将图片转换成base64)
实现
接口层
@PostMapping(value = "/convert-doc")
public String convertDoc(@RequestParam("file") MultipartFile file) {
String result = null;
try {
result = service.convertDoc(file.getInputStream());
} catch (IOException e) {
BizException.fail("文件读取失败");
}
return result;
}
服务层
public String convertDoc(InputStream inputStream) {
HWPFDocument doc = null;
try {
doc = new HWPFDocument(inputStream);
} catch (IOException e) {
BizException.fail("不是doc文件");
}
ByteArrayOutputStream outStream = null;
try {
WordToHtmlConverter converter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
converter.setPicturesManager((content, pictureType, suggestedName, widthInches, heightInches) ->
"data:" + pictureType.getMime() + ";base64," + Base64.getEncoder().encodeToString(content)
);
converter.processDocument(doc);
Document htmlDocument = converter.getDocument();
outStream = new ByteArrayOutputStream();
Transformer serializer = TransformerFactory.newInstance().newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(new DOMSource(htmlDocument), new StreamResult(outStream));
outStream.close();
} catch (Exception e) {
BizException.fail("文件转换失败");
}
return outStream.toString();
}