前言
该文档主要解决html转换为 docx 文本的解决。
一、使用 html-docx-js-typescript
npm i html-docx-js-typescript
pnpm add html-docx-js-typescript
导出docx 文件
import { asBlob } from 'html-docx-js-typescript'
asBlob("内容").then(data => {
downloadFile(URL.createObjectURL(data as Blob), `${new Date().getTime()}.docx`)
})
如何分页
在需要分页的部分前面加上下面的标签
<span><br clear="all"" style="page-break-before:always" ></span>
使用这种方式优点是非常简介,但同时缺点是相对应的无法被 docx预览库在线预览
二、使用 docx.js 生成
docx.js 生成逻辑是基于 tinymce 编辑器生成的富文本样式生成具体标签等处理可能需要根据具体情况调整
npm i docx
pnpm add docx
index.ts
import { Document, Packer } from 'docx'
import { richTextToParagraph } from './html'
import { downloadFile } from '../../es'
export function pureRichTextExportDocx(richText: string | string[], filename?: string) {
docxPackerToBlob(converRichTextToDocument(richText)).then(blob => {
const url = URL.createObjectURL(blob)
downloadFile(url, filename ? `${filename}.docx` : undefined)
})
}
function docxPackerToBlob(doc: Document) {
return Packer.toBlob(doc)
}
export function pureRictTextToBlob(richText: string | string[]) {
return docxPackerToBlob(converRichTextToDocument(richText))
}
export function converRichTextToDocument(richText: string | string[]) {
const doc = new Document({
features: {
updateFields: true
},
sections: [
{
properties: {
page: {
// 这里因为配置已经是 mm => twip 单位所以不用在转换了
margin: {
top: 1814,
bottom: 1814,
left: 1417,
right: 1417
},
size: {
width: 11906,
height: 16838
}
}
},
children: richTextToParagraph(richText)
}
]
})
return doc
}
html.ts
import { Paragraph, TextRun, ParagraphChild } from 'docx'
import { cssStylesToAttrs, getInlineStyles, tagsToAttrs } from './style'
export function richTextToParagraph(richText: string | string[]) {
const paragraphNodes: Paragraph[] = []
const richTextArr = Array.isArray(richText) ? richText : [richText]
richTextArr.forEach(rich => {
paragraphNodes.push(
new Paragraph({
pageBreakBefore: true
})
)
paragraphNodes.push(...convertRichTextToParagraphNodes(rich))
})
return paragraphNodes
}
/**
* @see https://docx.js.org/
*/
function convertRichTextToParagraphNodes(richText: string) {
const container = document.createElement('div')
container.innerHTML = richText
const parsedHtml = new DOMParser().parseFromString(richText, 'text/html')
const paragraphNodes: Paragraph[] = []
parsedHtml.body.querySelectorAll('p,h1,h2,h3,h4,h6').forEach(element => {
const paragraphChildrenNodes: ParagraphChild[] = []
const rootStyles = cssStylesToAttrs(getInlineStyles(element))
element.childNodes.forEach(node => {
if (node.nodeType === Node.TEXT_NODE) {
paragraphChildrenNodes.push(
new TextRun({
text: node.textContent || '',
...customMerge(cssStylesToAttrs(getInlineStyles(node)), rootStyles)
})
)
} else if (node.nodeType === Node.ELEMENT_NODE) {
const elementNode = node as any
const attrsTags = ['strong', 'em', 'i']
const tagNames = Array.from(elementNode.querySelectorAll(attrsTags.join(','))).map(
(node: any) => node.tagName.toLowerCase()
)
const tagAttrs = tagsToAttrs(tagNames)
const deepAttrs = customMerge(
customMerge(cssStylesToAttrs(getInlineStyles(elementNode)), rootStyles),
tagAttrs
)
paragraphChildrenNodes.push(
new TextRun({
text: elementNode.textContent,
...deepAttrs
})
)
}
})
const paragraph = new Paragraph({
children: paragraphChildrenNodes,
spacing: {
line: rootStyles.lineSpacing,
lineRule: 'exact'
},
alignment: rootStyles.alignment,
indent: rootStyles.indent
})
paragraphNodes.push(paragraph)
})
return paragraphNodes
}
// 自定义合并函数,忽略obj2中的undefined值
function customMerge(target, source) {
Object.keys(source).forEach(key => {
if (source[key] !== undefined) {
target[key] = source[key]
}
})
return target
}
export function recursiveHtmlNodes(childrenNodes: ChildNode[], paragraphNodes: Paragraph[] = []) {
childrenNodes.forEach(node => {
const paragraphNode = new Paragraph({
children: [
new TextRun({
text: node.textContent || '',
...cssStylesToAttrs(getInlineStyles(node))
})
]
})
paragraphNodes.push(paragraphNode)
if (node.childNodes) {
recursiveHtmlNodes(Array.from(node.childNodes), paragraphNodes)
}
})
return paragraphNodes
}
style.ts
import { CSSProperties } from 'vue'
import { camelize } from 'vue'
import { AlignmentType, UnderlineType, ShadingType } from 'docx'
const styleTransformMap = {
color: {
property: 'color',
handler: value => {
// dom 元素会将 hex 转为 rgb 需要手动转回来
return rgbToHexColor(value)
}
},
backgroundColor: {
property: 'shading',
handler: value => {
return {
type: ShadingType.SOLID,
color: rgbToHexColor(value),
fill: rgbToHexColor(value)
}
}
},
textDecoration: {
property: 'underline',
handler: (value, style) => {
return {
type: UnderlineType.SINGLE,
color: style.color ? rgbToHexColor(style.color) : undefined
}
}
},
/**
* @default
*
* 默认字号小四
*/
fontSize: {
property: 'size',
handler: value => {
return convertFontSize(value)
}
},
textIndent: {
/**
* @default
* 默认处理 pt
* Twips 单位
*/
property: 'indent',
handler: (value, style) => {
const fontSize = convertToNumber(style.fontSize)
const em = convertToNumber(value)
return {
firstLine: convertPtToTwips(fontSize * em)
}
}
},
textAlign: {
property: 'alignment',
handler: value => {
if (value === 'center') {
return AlignmentType.CENTER
}
if (value === 'right') {
return AlignmentType.RIGHT
}
return AlignmentType.LEFT
}
},
fontWeight: {
property: 'bold',
handler: value => !!value
},
fontFamily: {
property: 'font',
handler: value => {
return value
}
},
lineHeight: {
// convertFontSize
property: 'lineSpacing',
handler: value => {
if (!value) return
return convertToNumber(value) * 20
}
}
}
const tagTrasnformMap = {
// 斜线
em: {
property: 'italics',
handler: () => {
return true
}
},
strong: {
property: 'bold',
handler: () => {
return true
}
}
}
export function cssStylesToAttrs(style: CSSProperties): any {
const attrs: any = {}
Object.keys(style).forEach(key => {
const _key = camelize(key)
const value = style[key]
const config = styleTransformMap[_key]
if (config) {
attrs[config.property] = config.handler(value, style)
return
}
attrs[_key] = value
})
return attrs
}
export function tagsToAttrs(tags: string[]) {
const attrs: any = {}
tags.forEach(tag => {
const config = tagTrasnformMap[tag]
if (!config) return
attrs[config.property] = config.handler()
})
return attrs
}
// 行内样式转对象
export function getInlineStyles(element: any) {
const styles: CSSProperties = {}
const cahceStyles = parseStringStyle(element?.style?.cssText || '')
Object.keys(cahceStyles).forEach(key => {
styles[camelize(key)] = cahceStyles[key]
})
return styles
}
/**
* docx 字体大小为 half-point 半磅 10pt * 2
*/
function convertFontSize(fontSize) {
// 这里只使用了 pt 只做pt 处理
return convertToNumber(fontSize) * 2
}
function convertPtToTwips(pt: number) {
return pt * 20
}
// 提取数字
function convertToNumber(cssString) {
const match = cssString.match(/(\d+(\.\d+)?)/)
return match ? parseFloat(match[0]) : 0
}
// rgb 转为 hex 颜色格式
function rgbToHexColor(rgb) {
if (isHexColor(rgb)) return rgb
// 首先使用正则表达式匹配RGB颜色值
const rgbMatch = rgb.match(/^rgba?\((\d{1,3}),\s*(\d{1,3}),\s*(\d{1,3})(?:,\s*[\d.]+)?\)$/)
// 将匹配到的RGB值转换为十六进制字符串
function convertToHex(rgbComponent) {
const hex = Number(rgbComponent).toString(16)
return hex.length === 1 ? '0' + hex : hex
}
if (rgbMatch) {
const r = convertToHex(rgbMatch[1])
const g = convertToHex(rgbMatch[2])
const b = convertToHex(rgbMatch[3])
// 返回十六进制颜色值
return '#' + r + g + b
} else {
return rgb // 如果输入不是有效的RGB值,则返回原始字符串
}
}
function isHexColor(hex) {
const regExp = /^#(?:[A-Fa-f0-9]{3}){1,2}$|^#(?:[A-Fa-f0-9]{4}){1,2}$/
return regExp.test(hex)
}
import { CSSProperties } from 'vue'
const listDelimiterRE = /;(?![^(]*\))/g
const propertyDelimiterRE = /:([^]+)/
const styleCommentRE = /\/\*[^]*?\*\//g
/**
*
* @example
* "border: 1px solid transparent;color:red;" => { border:"1px solid transparent", color: 'red' }
*/
export function parseStringStyle(cssText: string): CSSProperties {
const ret: CSSProperties = {}
cssText
.replace(styleCommentRE, '')
.split(listDelimiterRE)
.forEach(item => {
if (item) {
const tmp = item.split(propertyDelimiterRE)
tmp.length > 1 && (ret[tmp[0].trim()] = tmp[1].trim())
}
})
return ret
}