场景
有这样一个需求:复制一个DOCX的文档内容到目标文档末尾,其中包括段落、图片、表格等,并且保持目标文档的样式(页边距、页眉页尾等)
调研
查找了一些资料后,主要有这么几个方向:调用Office、WPS、LibreOffice的Word Application完成合并功能、引入第三方库(实际底层可能也是用的前一种方法)
方案
我这里使用的是标准库 python-docx
from docx import Document
from docx.text.paragraph import Paragraph
from docx.oxml import CT_P, CT_Tbl
from docx.table import _Cell, Table
from docx.document import Document as _Document
def append_doc_to_another(main_doc: Document, doc_to_append_path: str):
"""
Merge the contents of multiple Word documents
:param main_doc: Document
:param doc_to_append_path: path str
:return: Document
"""
source_doc = Document(doc_to_append_path)
for block in iter_block_items(source_doc):
if isinstance(block, Paragraph):
main_doc.element.body.append(block._element)
elif isinstance(block, Table):
main_doc.element.body.append(block._tbl)
else:
raise Exception(f"Unexpected type: {type(block)}")
return main_doc
def iter_block_items(parent):
"""
Yield each block item child of *parent*, in document order.
Each item will be an instance of either Table or Paragraph.
"""
if isinstance(parent, _Document):
parent_elm = parent.element.body
elif isinstance(parent, _Cell):
parent_elm = parent._tc
else:
raise ValueError("Unexpected block!")
for child in parent_elm.iterchildren():
if isinstance(child, CT_P):
yield Paragraph(child, parent)
elif isinstance(child, CT_Tbl):
yield Table(child, parent)