js 导出 docx 文档

618 阅读3分钟

前言

该文档主要解决html转换为 docx 文本的解决。

一、使用 html-docx-js-typescript

npm i html-docx-js-typescript
pnpm add html-docx-js-typescript

导出docx 文件

import { asBlob } from 'html-docx-js-typescript'
asBlob("内容").then(data => {
    downloadFile(URL.createObjectURL(data as Blob), `${new Date().getTime()}.docx`)
  })

如何分页

在需要分页的部分前面加上下面的标签

<span><br clear="all"" style="page-break-before:always" ></span>

使用这种方式优点是非常简介,但同时缺点是相对应的无法被 docx预览库在线预览

二、使用 docx.js 生成

docx.js 生成逻辑是基于 tinymce 编辑器生成的富文本样式生成具体标签等处理可能需要根据具体情况调整

npm i docx
pnpm add docx

index.ts


import { Document, Packer } from 'docx'
import { richTextToParagraph } from './html'
import { downloadFile } from '../../es'

export function pureRichTextExportDocx(richText: string | string[], filename?: string) {
  docxPackerToBlob(converRichTextToDocument(richText)).then(blob => {
    const url = URL.createObjectURL(blob)
    downloadFile(url, filename ? `${filename}.docx` : undefined)
  })
}

function docxPackerToBlob(doc: Document) {
  return Packer.toBlob(doc)
}

export function pureRictTextToBlob(richText: string | string[]) {
  return docxPackerToBlob(converRichTextToDocument(richText))
}

export function converRichTextToDocument(richText: string | string[]) {
  const doc = new Document({
    features: {
      updateFields: true
    },

    sections: [
      {
        properties: {
          page: {
            // 这里因为配置已经是 mm => twip 单位所以不用在转换了
            margin: {
              top: 1814,
              bottom: 1814,
              left: 1417,
              right: 1417
            },
            size: {
              width: 11906,
              height: 16838
            }
          }
        },
        children: richTextToParagraph(richText)
      }
    ]
  })

  return doc
}

html.ts


import { Paragraph, TextRun, ParagraphChild } from 'docx'
import { cssStylesToAttrs, getInlineStyles, tagsToAttrs } from './style'

export function richTextToParagraph(richText: string | string[]) {
  const paragraphNodes: Paragraph[] = []

  const richTextArr = Array.isArray(richText) ? richText : [richText]

  richTextArr.forEach(rich => {
    paragraphNodes.push(
      new Paragraph({
        pageBreakBefore: true
      })
    )
    paragraphNodes.push(...convertRichTextToParagraphNodes(rich))
  })
  return paragraphNodes
}

/**
 * @see https://docx.js.org/
 */
function convertRichTextToParagraphNodes(richText: string) {
  const container = document.createElement('div')
  container.innerHTML = richText
  const parsedHtml = new DOMParser().parseFromString(richText, 'text/html')

  const paragraphNodes: Paragraph[] = []

  parsedHtml.body.querySelectorAll('p,h1,h2,h3,h4,h6').forEach(element => {
    const paragraphChildrenNodes: ParagraphChild[] = []

    const rootStyles = cssStylesToAttrs(getInlineStyles(element))

    element.childNodes.forEach(node => {
      if (node.nodeType === Node.TEXT_NODE) {
        paragraphChildrenNodes.push(
          new TextRun({
            text: node.textContent || '',
            ...customMerge(cssStylesToAttrs(getInlineStyles(node)), rootStyles)
          })
        )
      } else if (node.nodeType === Node.ELEMENT_NODE) {
        const elementNode = node as any

        const attrsTags = ['strong', 'em', 'i']
        const tagNames = Array.from(elementNode.querySelectorAll(attrsTags.join(','))).map(
          (node: any) => node.tagName.toLowerCase()
        )
        const tagAttrs = tagsToAttrs(tagNames)

        const deepAttrs = customMerge(
          customMerge(cssStylesToAttrs(getInlineStyles(elementNode)), rootStyles),
          tagAttrs
        )

        paragraphChildrenNodes.push(
          new TextRun({
            text: elementNode.textContent,
            ...deepAttrs
          })
        )
      }
    })
    const paragraph = new Paragraph({
      children: paragraphChildrenNodes,
      spacing: {
        line: rootStyles.lineSpacing,
        lineRule: 'exact'
      },
      alignment: rootStyles.alignment,
      indent: rootStyles.indent
    })
    paragraphNodes.push(paragraph)
  })

  return paragraphNodes
}

// 自定义合并函数,忽略obj2中的undefined值
function customMerge(target, source) {
  Object.keys(source).forEach(key => {
    if (source[key] !== undefined) {
      target[key] = source[key]
    }
  })
  return target
}

export function recursiveHtmlNodes(childrenNodes: ChildNode[], paragraphNodes: Paragraph[] = []) {
  childrenNodes.forEach(node => {
    const paragraphNode = new Paragraph({
      children: [
        new TextRun({
          text: node.textContent || '',
          ...cssStylesToAttrs(getInlineStyles(node))
        })
      ]
    })

    paragraphNodes.push(paragraphNode)

    if (node.childNodes) {
      recursiveHtmlNodes(Array.from(node.childNodes), paragraphNodes)
    }
  })
  return paragraphNodes
}

style.ts

import { CSSProperties } from 'vue'
import { camelize } from 'vue'
import { AlignmentType, UnderlineType, ShadingType } from 'docx'

const styleTransformMap = {
  color: {
    property: 'color',
    handler: value => {
      //  dom 元素会将 hex 转为 rgb 需要手动转回来
      return rgbToHexColor(value)
    }
  },
  backgroundColor: {
    property: 'shading',
    handler: value => {
      return {
        type: ShadingType.SOLID,
        color: rgbToHexColor(value),
        fill: rgbToHexColor(value)
      }
    }
  },

  textDecoration: {
    property: 'underline',
    handler: (value, style) => {
      return {
        type: UnderlineType.SINGLE,
        color: style.color ? rgbToHexColor(style.color) : undefined
      }
    }
  },

  /**
   * @default
   *
   *  默认字号小四
   */
  fontSize: {
    property: 'size',
    handler: value => {
      return convertFontSize(value)
    }
  },
  textIndent: {
    /**
     * @default
     * 默认处理 pt
     * Twips 单位
     */
    property: 'indent',
    handler: (value, style) => {
      const fontSize = convertToNumber(style.fontSize)
      const em = convertToNumber(value)
      return {
        firstLine: convertPtToTwips(fontSize * em)
      }
    }
  },
  textAlign: {
    property: 'alignment',
    handler: value => {
      if (value === 'center') {
        return AlignmentType.CENTER
      }

      if (value === 'right') {
        return AlignmentType.RIGHT
      }
      return AlignmentType.LEFT
    }
  },
  fontWeight: {
    property: 'bold',
    handler: value => !!value
  },
  fontFamily: {
    property: 'font',
    handler: value => {
      return value
    }
  },
  lineHeight: {
    // convertFontSize
    property: 'lineSpacing',
    handler: value => {
      if (!value) return
      return convertToNumber(value) * 20
    }
  }
}

const tagTrasnformMap = {
  // 斜线
  em: {
    property: 'italics',
    handler: () => {
      return true
    }
  },
  strong: {
    property: 'bold',
    handler: () => {
      return true
    }
  }
}

export function cssStylesToAttrs(style: CSSProperties): any {
  const attrs: any = {}
  Object.keys(style).forEach(key => {
    const _key = camelize(key)

    const value = style[key]
    const config = styleTransformMap[_key]

    if (config) {
      attrs[config.property] = config.handler(value, style)
      return
    }

    attrs[_key] = value
  })

  return attrs
}

export function tagsToAttrs(tags: string[]) {
  const attrs: any = {}

  tags.forEach(tag => {
    const config = tagTrasnformMap[tag]
    if (!config) return

    attrs[config.property] = config.handler()
  })

  return attrs
}

// 行内样式转对象
export function getInlineStyles(element: any) {
  const styles: CSSProperties = {}

  const cahceStyles = parseStringStyle(element?.style?.cssText || '')

  Object.keys(cahceStyles).forEach(key => {
    styles[camelize(key)] = cahceStyles[key]
  })

  return styles
}

/**
 * docx 字体大小为 half-point 半磅  10pt * 2
 */
function convertFontSize(fontSize) {
  // 这里只使用了 pt 只做pt 处理
  return convertToNumber(fontSize) * 2
}

function convertPtToTwips(pt: number) {
  return pt * 20
}

// 提取数字
function convertToNumber(cssString) {
  const match = cssString.match(/(\d+(\.\d+)?)/)

  return match ? parseFloat(match[0]) : 0
}

// rgb 转为 hex 颜色格式
function rgbToHexColor(rgb) {
  if (isHexColor(rgb)) return rgb

  // 首先使用正则表达式匹配RGB颜色值
  const rgbMatch = rgb.match(/^rgba?\((\d{1,3}),\s*(\d{1,3}),\s*(\d{1,3})(?:,\s*[\d.]+)?\)$/)

  // 将匹配到的RGB值转换为十六进制字符串
  function convertToHex(rgbComponent) {
    const hex = Number(rgbComponent).toString(16)
    return hex.length === 1 ? '0' + hex : hex
  }

  if (rgbMatch) {
    const r = convertToHex(rgbMatch[1])
    const g = convertToHex(rgbMatch[2])
    const b = convertToHex(rgbMatch[3])

    // 返回十六进制颜色值
    return '#' + r + g + b
  } else {
    return rgb // 如果输入不是有效的RGB值,则返回原始字符串
  }
}

function isHexColor(hex) {
  const regExp = /^#(?:[A-Fa-f0-9]{3}){1,2}$|^#(?:[A-Fa-f0-9]{4}){1,2}$/
  return regExp.test(hex)
}

import { CSSProperties } from 'vue'

const listDelimiterRE = /;(?![^(]*\))/g
const propertyDelimiterRE = /:([^]+)/
const styleCommentRE = /\/\*[^]*?\*\//g

/**
 *
 * @example
 * "border: 1px solid transparent;color:red;" => { border:"1px solid transparent", color: 'red'  }
 */
export function parseStringStyle(cssText: string): CSSProperties {
  const ret: CSSProperties = {}
  cssText
    .replace(styleCommentRE, '')
    .split(listDelimiterRE)
    .forEach(item => {
      if (item) {
        const tmp = item.split(propertyDelimiterRE)
        tmp.length > 1 && (ret[tmp[0].trim()] = tmp[1].trim())
      }
    })
  return ret
}