实现docx转html

1,686 阅读2分钟

【实现docx转html 】

在前端页面上传word文档.docx 将内容转为html

前端实现

项目采用vue3 element-plus

  1. 安装mammoth
npm i mammoth -S
  1. 添加上传文件标签
<el-upload
    class="upload-demo" 
    :http-request="handleFileSelect"
    :limit="1">
    <el-button type="primary">选择文件</el-button>
</el-upload>
  1. 添加文件上传回调函数
//导入包
import mammoth from 'mammoth';
methods: {
    handleFileSelect(event) {
        this.readFileInputEventAsArrayBuffer(event, arrayBuffer => {
            mammoth
                .convertToHtml({ arrayBuffer: arrayBuffer })
                .then(this.displayResult)
                .done();
        });
    },
    readFileInputEventAsArrayBuffer(event, callback) {
        var file = event.file;
        var reader = new FileReader();
        reader.onload = function(loadEvent) {
            var arrayBuffer = loadEvent.target.result;
            callback(arrayBuffer);
        };
        reader.readAsArrayBuffer(file);
    },
    displayResult(result) {
        this.dataForm.content = result.value;
        this.$nextTick(() => {
            document.getElementById('document-file').value = '';
        });
    },
}
 
  1. 完成转换

存在问题 如果是文档中带有图片标签返回的是img标签 src属性为base64

<img src='data:image/jpg;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAAkCAYAAABIdFAMAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJbWFnZVJlYWR5ccllPAAAAHhJREFUeNo8zjsOxCAMAAAABJRU5ErkJggg=='>

解决方案

  1. 将base64转为文件对象
base64toFile(dataurl, filename = 'file') {
  let arr = dataurl.split(',')
  let mime = arr[0].match(/:(.*?);/)[1]

  let suffix = mime.split('/')[1]

  let bstr = atob(arr[1])

  let n = bstr.length

  let u8arr = new Uint8Array(n)

  while (n--) {
    u8arr[n] = bstr.charCodeAt(n)
  }
  return new File([u8arr], `${filename}.${suffix}`, {
    type: mime
  })
},
  1. 调用文件上传接口 将图片存入数据库
 async displayResult(result) {
  let data = await this.replaceBase64AndUploadImg(result.value);
  this.dataForm.content = data
  this.$nextTick(() => {
    document.getElementById('document-file').value = ''
  })
},
async replaceBase64AndUploadImg(data) {
  let reg = new RegExp(/src="?(data.*?)("|>|\\s+)/)
  let value = reg.exec(data)
  return upload(this.base64toFile(value[1])).then(async res => {
  // 文件上传成功后 后端会返回文件路径
  //文件上传成功回调将src属性修改为文件在服务器中的地址
    data = data.replace(value[1], this.$baseUrl + '' + res.data)
    console.log(data)
    if (reg.test(data)) {
    //递归调用函数 将文档中所以base64 存入数据库
      return await this.replaceBase64AndUploadImg(data)
    } else {
      return data
    }
  })
},

后端实现

项目采用express

  1. 添加接口
const express = require('express');
const router = express.Router();
const fs = require('fs');
const path = require('path');
const mammoth = require('mammoth');
var multer = require('multer');
var upload = multer({
    dest: 'uploads/',
});
//上传图片的模板
router.post('/', upload.single('file'), (req, res) => {
    fs.readFile(req.file.path, (err, data) => {
        handleFileSelect(data);
    });
    function handleFileSelect(arrayBuffer) {
        mammoth.convertToHtml({ buffer: arrayBuffer }).then(displayResult).done();
    }
    async function displayResult(result) {
        let data = await replaceBase64AndUploadImg(result.value);
        res.send({
            code: 200,
            data,
        });
    }
    async function writeFile(dataurl) {
        let arr = dataurl.split(',')
          let mime = arr[0].match(/:(.*?);/)[1]

          let suffix = mime.split('/')[1]

        console.log(dataurl);
        const dataBuffer =  Buffer.from(arr[1], 'base64')
        //声明图片名字为时间戳和随机数拼接成的,尽量确保唯一性
        let time = Date.now() + parseInt(Math.random() * 999) + parseInt(Math.random() * 2222);
        //拓展名
        // let extname = file.mimetype.split('/')[1];
        //拼接成图片名
        let keepname = time + '.' + suffix;
        //两个参数
        //1.图片的绝对路径
        //2.写入的内容
        return new Promise((succ, rej) => {
            fs.writeFile(path.join(__dirname, '../../static/uploadsImg/' + keepname), dataBuffer, err => {
                if (err) {
                    console.log(err);
                }
                succ('/uploadsImg/' + keepname);
            });
        });
    }
    //将图片存入本地 替换src为图片路径
    async function replaceBase64AndUploadImg(data) {
        let reg = new RegExp(/src="?(data.*?)("|>|\\s+)/);
        if (!reg.test(data)) {
            return data;
        }
        let value = reg.exec(data);
        return writeFile(value[1]).then(async res => {
            data = data.replace(value[1], res);
            if (reg.test(data)) {
                return await replaceBase64AndUploadImg(data);
            } else {
                return data;
            }
        });
    }
});
module.exports = router;

  1. app.js中注册
app.use('/docxToHtml', docxToHtml) //docx 转 html
  1. 前端调用接口 直接返回已经处理好的html
export const docxToHtml = query => {
    //文件使用FormData格式传输参数
    let data = new FormData()
    data.append("file", query || new File([], "file"));
    return request({
      url: '/docxToHtml',
      method: 'post',
      data
    });
}

为什么后端这个参数变了 mammoth.convertToHtml({ buffer }).then(displayResult).done();

在调试代码的时候 发现报错

Could not find file in options

然后在源码中排查

image.png

image.png

所以将arrayBuffer 改为buffer