「这是我参与2022首次更文挑战的第27天,活动详情查看:2022首次更文挑战」
使用库:python-docx
安装:pip3 install python-docx
官方文档: python-docx.readthedocs.io/en/latest/#
!pip3 install python-docx
Collecting python-docx
Downloading python-docx-0.8.11.tar.gz (5.6 MB)
[K |████████████████████████████████| 5.6 MB 143 kB/s
[?25hCollecting lxml>=2.3.2
Downloading lxml-4.6.3-cp39-cp39-macosx_10_9_x86_64.whl (4.6 MB)
[K |████████████████████████████████| 4.6 MB 143 kB/s
[?25hBuilding wheels for collected packages: python-docx
Building wheel for python-docx (setup.py) ... [?25ldone
[?25h Created wheel for python-docx: filename=python_docx-0.8.11-py3-none-any.whl size=184600 sha256=1c87605adcd69ddfb607ab8adbe12a22072acb88e6d4b92da91dfee86b112b42
Stored in directory: /Users/lichizou/Library/Caches/pip/wheels/83/8b/7c/09ae60c42c7ba4ed2dddaf2b8b9186cb105255856d6ed3dba5
Successfully built python-docx
Installing collected packages: lxml, python-docx
Successfully installed lxml-4.6.3 python-docx-0.8.11
word结构与python对象
文档:Document
整个文章是一个Document对象,对象里包含多个段落对象Paragraph,放在
document.paragraphs
段落:Paragraph
一个段落对象Paragraph含有多个文字块run对象,放在
paragraph.runs里
文字块:Run
docx文档最小单位,对象哪文本样式一致
from docx import Document
# 新建文档
doc_1 = Document()
?doc_1.
[0;31mType:[0m Document
[0;31mString form:[0m <docx.document.Document object at 0x112138d00>
[0;31mFile:[0m /usr/local/lib/python3.9/site-packages/docx/document.py
[0;31mDocstring:[0m
WordprocessingML (WML) document.
Not intended to be constructed directly. Use :func:`docx.Document` to open or create
a document.
doc_1.paragraphs
[]
标题
# 添加标题
doc_1.add_heading('1级标题', level = 0)
doc_1.add_heading('2级标题', level = 1)
doc_1.add_heading('3级标题', level = 2)
<docx.text.paragraph.Paragraph at 0x10e9d0880>
段落
数据对象:paragraph
段落的换行符需要手动添加
# 新增段落
paragraph_1 = doc_1.add_paragraph('段落1')
# 在当前段落新增段落
paragraph_2 = doc_1.add_paragraph('段落2')
prior_paragraph = paragraph_1.insert_paragraph_before('段落1前')
缩进和间距
#对齐:左侧,右侧,两端对齐,居中,分散对齐
from docx.enum.text import WD_ALIGN_PARAGRAPH
#LEFT: 左对齐
#CENTER: 文字居中
#RIGHT: 右对齐
#JUSTIFY: 文本两端对齐
paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
间距:段前,段后,行距
# SINGLE :单倍行距(默认)
#ONE_POINT_FIVE : 1.5倍行距
# DOUBLE2 : 倍行距
#AT_LEAST : 最小值
#EXACTLY:固定值
# MULTIPLE : 多倍行距
# 行间距
paragraph.line_spacing_rule = WD_LINE_SPACING.EXACTLY #固定值
paragraph.line_spacing = Pt(18) # 固定值18磅
paragraph.line_spacing_rule = WD_LINE_SPACING.MULTIPLE #多倍行距
paragraph.line_spacing = 1.75 # 1.75倍行间距
换行和分页
# 在第二页增加1个段落
paragraph_3 = doc_1.add_paragraph('第二页第1个段落')
多节页眉与页脚的设置
from docx import Document
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
# 页眉与页脚
doc = Document()
header = doc.sections[0].header
print('页眉默认段落数:', len(header.paragraphs))
页眉默认段落数: 1
par = header.paragraphs[0]
par.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
par.add_run('第一页页眉')
<docx.text.run.Run at 0x117ccac10>
footer = doc.sections[0].footer
par = footer.paragraphs[0]
par.add_run('第一页的页脚')
par.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
# 默认之后的节的页眉与页脚样式和内容与前一节相同
doc.add_section()
doc.add_section()
<docx.section.Section at 0x117d43250>
# 第二页页眉设置居中
header = doc.sections[1].header
header.is_linked_to_previous = False # 第二个节页眉的样式与内容与第一个节不同
par = header.paragraphs[0]
par.add_run('第二页页眉')
par.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
# 第二页页脚设置居中
footer = doc.sections[1].footer
footer.is_linked_to_previous = False
par = footer.paragraphs[0]
par.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
par.add_run('第二页页脚')
<docx.text.run.Run at 0x117ccaa00>
header = doc.sections[2].header
header.is_linked_to_previous = False # 第二个节页眉的样式与内容与第一个节不同
par = header.paragraphs[0]
par.add_run('第三页页眉')
par.alignment = WD_PARAGRAPH_ALIGNMENT.RIGHT
footer = doc.sections[2].footer
footer.is_linked_to_previous = False
par = footer.paragraphs[0]
par.alignment = WD_PARAGRAPH_ALIGNMENT.RIGHT # 对齐单词不要拼错
par.add_run('第三页页脚')
<docx.text.run.Run at 0x117cc56d0>
doc.save('doc_14.docx')
字体
from docx.shared import RGBColor,Pt
#all_caps:全部大写字母
#bold:加粗
#color:字体颜色
#double_strike:双删除线
#hidden : 隐藏
#imprint : 印记
#italic : 斜体
#name :字体
#shadow :阴影
#strike : 删除线
#subscript :下标
#superscript :上标
#underline :下划线
# 对段落1设置字体样式
paragraph_1.add_run('粗体').bold = True
paragraph_1.add_run('普通字体')
<docx.text.run.Run at 0x10e9d0730>
paragraph_1.add_run('斜体').italic = True
插入:图片或表格
doc = Document('doc_5.docx')
# 插入图片
doc.add_picture('./图片/周杰伦.jpg', width=Inches(1.0), height=Inches(1.0))
<docx.shape.InlineShape at 0x11842a400>
# 插入表格
table = doc.add_table(rows=2, cols=1)
table.style = 'Medium Grid 1 Accent 1' # and: Light Shading Accent 1
# 填写表格单元格内容
table.cell(0, 0).text = 'row1, col1'
table.rows[1].cells[0].text = 'row2, col1'
# 给表格增加一行
new_row = table.add_row().cells
new_row[0].text = 'row3, col1'
# 给表格增加一列
help(table.add_column)
Help on method add_column in module docx.table:
add_column(width) method of docx.table.Table instance
Return a |_Column| object of *width*, newly added rightmost to the
table.
doc.save('doc_7.docx')
案例2:整体页面结构
from docx import Document
from docx.shared import RGBColor, Pt, Inches, Cm
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
from docx.oxml.ns import qn
doc_2 = Document()
# 设置全局字体
doc_2.styles['Normal'].font.name = '宋体'
# 中文字体
doc_2.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), '宋体')
# 添加标题
heading_2 = doc_2.add_heading('title 1', level = 0)
heading_2.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER # default left
# 新增段落
paragraph_2_1 = doc_2.add_paragraph()
# 设置段落格式
paragraph_2_1.paragraph_format.first_line_indent = Cm(0.75) # 首行缩进0.75cm
paragraph_2_1.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT # 居左对齐
paragraph_2_1.paragraph_format.space_after = Inches(1.0) # 段落后距离1.0英寸,72磅
paragraph_2_1.paragraph_format.line_spacing = 1.5 # 1.5倍行距
text = """random库函数:
1、seek() 种子,默认种子是系统时钟
2、random()函数,生成0到1的随机小数
3、uniform(a,b)生成a到b的随机小数
4、randint(a,b)生成一个a到b的随即整数
5、randrange(a,b,c) 生成一个a到b,以c递增的数
6、choice(<list>) 随机返回一个列表里面的元素
7、shuffle(<list>)将列表的元素随机打乱
8、sample(<list>,k)从列表中随机抽取k个元素
"""
r_2 = paragraph_2_1.add_run(text)
r_2.font.size = Pt(10)
r_2.font.bold = True
r_2.font.color.rgb = RGBColor(255, 0, 0)
doc_2.save('doc_2.docx')
案例3-字体设置
from docx import Document
from docx.oxml.ns import qn
from docx.enum.style import WD_STYLE_TYPE
doc_3 = Document() # 新建docx文档
# 设置文档里的可选字体样式
def add_font_style(doc, font_name_list):
for font in font_name_list:
if font in doc.styles:
continue
style_font = doc.styles.add_style(font, WD_STYLE_TYPE.CHARACTER)
style_font.font.name = font
doc.styles[font]._element.rPr.rFonts.set(qn('w:eastAsia'), font) # w:不能加空格
#par = doc.add_paragraph()
#text = par.add_run(text, style=font)
add_font_style(doc_3, ['宋体', '楷体', '华文中宋'])
par_3 = doc_3.add_paragraph()
r_3 = par_3.add_run('abhEFDSF,;宋体', style = '宋体')
font = r_3.font
font.name = 'Cambira'
par_3.add_run('abhEFDSF,;楷体\n', style='楷体').font.name = 'Cambira'
par_3.add_run('abhEFDSF,;华文中宋\n', style='华文中宋').font.name = 'Cambira'
doc_3.save('doc_3.docx')
help(par_3.add_run)
Help on method add_run in module docx.text.paragraph:
add_run(text=None, style=None) method of docx.text.paragraph.Paragraph instance
Append a run to this paragraph containing *text* and having character
style identified by style ID *style*. *text* can contain tab
(``\t``) characters, which are converted to the appropriate XML form
for a tab. *text* can also include newline (``\n``) or carriage
return (``\r``) characters, each of which is converted to a line
break.
# 封装字体设置
def set_font_style(doc, font, text):
if font not in doc.styles:
style_font = doc.styles.add_style(font, WD_STYLE_TYPE.CHARACTER)
style_font.font.name = font
doc.styles[font]._element.rPr.rFonts.set(qn('w:eastAsia'), font) # w:不能加空格
par = doc.add_paragraph()
text = par.add_run(text, style = font)
doc_4 = Document()
font_style_list = ['宋体', '楷体', '华文中宋']
for style in font_style_list:
text = 'example:' + style + '\n'
set_font_style(doc_4, style, text)
doc_4.save('doc_5.docx')
保存文件
# 普通保存
doc_1.save('doc_1.docx')
练习
根据excel表格
批量生成邀请函
分析
1.确定word的格式:在word先写一下
标题:1级标题,楷体,居中。固定文字:邀请函
段落1: 楷体,左对齐。4个文字块:
文字块1:尊敬的
文字块2: 加粗,下划线。文字为变量,对应excel的a列
文字块3: 公司
文字块4: 加粗,下划线。文字为变量,对应excel的b列和c列的拼接。b+c
文字块5: ,您好:
段落2: 楷体,左缩进2个字符
文字块1:现诚挚邀请您于2021年10月27日参加
文字块2:黑体。DataWhale
文字块3: 主办的享受开源2050活动,地点在北京鸟巢,希望您届时莅临参加。
段落三:楷体,右对齐
文字块1: 邀请时间:
文字块2: 来自excel的d列,做格式化:”x年x月x日“
页眉页脚,不设置,采用默认缩进
2.对接excel
from openpyxl import load_workbook
from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.enum.style import WD_STYLE_TYPE
from docx.oxml.ns import qn
from docx.shared import Pt
从excel获取数据,格式化成dict
wb = load_workbook(filename='./图片/excel到word.xlsx')
wb.sheetnames
['邀请人员']
sheet = wb['邀请人员']
4
def format_date(d):
return d.strftime('%Y年%m月%d日')
format_date(sheet['d2'].value)
'2021年02月15日'
def get_format_cell(row):
dic = {}
dic['company'] = row[0].value
dic['people'] = row[1].value + row[2].value
dic['date'] = row[3].value.strftime('%Y年%m月%d日')
return dic
for row in sheet.rows:
if (row[0].row > 1):
print(get_format_cell(row))
#print(f"{row[0].value}:{row[1].value + row[2].value}:{format_date(row[3].value)}")
{'company': '阿里', 'people': '数据工程师牛云', 'date': '2021年02月15日'}
{'company': '腾讯', 'people': '数据分析师牛化腾', 'date': '2021年02月16日'}
{'company': '百度', 'people': '数据架构师张艳红', 'date': '2021年02月17日'}
{'company': '京东', 'people': '算法工程师王强东', 'date': '2021年02月18日'}
word格式测试
def add_style(doc, font_name):
style = doc.styles.add_style(font_name, WD_STYLE_TYPE.CHARACTER)
style.font.name = font_name
doc.styles[font_name]._element.rPr.rFonts.set(qn('w:eastAsia'), font_name)
doc = Document()
add_style(doc, '楷体-简')
add_style(doc, '黑体')
doc.styles['Normal'].font.name = '楷体-简'
doc.styles['Normal'].font.size = Pt(12)
doc.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), '楷体-简')
h1 = doc.add_paragraph('邀 请 函')
h1.alignment = WD_ALIGN_PARAGRAPH.CENTER
r1 = h1.runs[0]
r1.bold = True
r1.font.size = Pt(20)
p1 = doc.add_paragraph('')
#p1.style.font.name = '楷体'
p1.alignment = WD_ALIGN_PARAGRAPH.LEFT
p1.add_run('尊敬的')
r1 = p1.add_run('京东') #
r1.bold = True
r1.underline = True
p1.add_run('公司')
r2 = p1.add_run('算法工程师王强')
r2.bold = True
r2.underline = True
p1.add_run(',您好:')
<docx.text.run.Run at 0x11d9c3820>
p2 = doc.add_paragraph('')
#p2.paragraph_format.first_line_indent = -8
p2.add_run(' 现诚挚邀请您于2021年10月27日参加')
p2.add_run('DataWhale', style='黑体')
p2.add_run('主办的享受开源2050活动,地点在北京鸟巢,希望您届时莅临参加。')
<docx.text.run.Run at 0x11d9a16a0>
p3 = doc.add_paragraph('')
p3.alignment = WD_ALIGN_PARAGRAPH.RIGHT
p3.add_run('邀请时间:')
r1 = p3.add_run('2020年01年01日')
r1.bold = True
r1.underline = True
doc.save('ty.docx')
整合,从ecxel获取数据,输出到docx
def get_invate_file(dict):
doc = Document()
add_style(doc, '楷体-简')
add_style(doc, '黑体')
doc.styles['Normal'].font.name = '楷体-简'
doc.styles['Normal'].font.size = Pt(12)
doc.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), '楷体-简')
h1 = doc.add_paragraph('邀 请 函')
h1.alignment = WD_ALIGN_PARAGRAPH.CENTER
r1 = h1.runs[0]
r1.bold = True
r1.font.size = Pt(20)
p1 = doc.add_paragraph('')
#p1.style.font.name = '楷体'
p1.alignment = WD_ALIGN_PARAGRAPH.LEFT
p1.add_run('尊敬的')
r1 = p1.add_run(dict['company']) #
r1.bold = True
r1.underline = True
p1.add_run('公司')
r2 = p1.add_run(dict['people'])
r2.bold = True
r2.underline = True
p1.add_run(',您好:')
p2 = doc.add_paragraph('')
#p2.paragraph_format.first_line_indent = -8
p2.add_run(' 现诚挚邀请您于2021年10月27日参加')
p2.add_run('DataWhale', style='黑体')
p2.add_run('主办的享受开源2050活动,地点在北京鸟巢,希望您届时莅临参加。')
p3 = doc.add_paragraph('')
p3.alignment = WD_ALIGN_PARAGRAPH.RIGHT
p3.add_run('邀请时间:')
r1 = p3.add_run(dict['date'])
r1.bold = True
r1.underline = True
file_name = '邀请函_' + dict['company'] + '_' + dict['people'] + '.docx'
doc.save(file_name)
for row in sheet.rows:
if (row[0].row > 1):
get_invate_file(get_format_cell(row))