Python办公自动化Excel

130 阅读23分钟

本文已参与「新人创作礼」活动,一起开启掘金创作之路

官网:docs.python.org/3.4/library…

字符串拼接方法

  • f-string方法
  • format()函数
    • 运算符

pathlib模块

pathlib官方文档

初级体验

from pathlib2 import Path

# 获取当前目录
current_path = Path.cwd()
print(current_path)

# 输出如下:
# /Users/Anders/Documents/

# 获取Home目录
home_path = Path.home()
print(home_path)

# 输出如下:
# /Users/Anders

父级操作

from pathlib2 import Path

# 获取当前目录
current_path = Path.cwd()

# 获取上级父目录
print(current_path.parent)

# 获取上上级父目录
print(current_path.parent.parent)

# 获取上上上级父目录
print(current_path.parent.parent.parent)

# 获取上上上上级父目录
print(current_path.parent.parent.parent.parent)

# 获取上上上上级父目录
print(current_path.parent.parent.parent.parent.parent)

# 输出如下:
# /Users/Anders/Documents/Jupyter
# /Users/Anders/Documents
# /Users/Anders
# /Users
# /

遍历父目录

# 获取当前目录
from pathlib2 import Path

current_path = Path.cwd()

for p in current_path.parents:
    print(p)

# 输出如下:
# /Users/Anders/Documents/Jupyter
# /Users/Anders/Documents
# /Users/Anders
# /Users
# /

文件名操作

:::info name 文件名
suffix 文件的扩展名
suffixes 返回多个扩展名列表
stem 文件的主名(不包含扩展名)
with_name(name) 替换 文件名并返回一个新的路径
with_suffix(suffix) 替换扩展名,返回新的路径,扩展名存在则不变 :::

路径的拼接与分解

from pathlib2 import Path

# 直接传进一个完整字符串
example_path1 = Path('/Users/Anders/Documents/powershell-2.jpg')

# 也可以传进多个字符串
example_path2 = Path('/', 'Users', 'dongh', 'Documents', 'python_learn', 'file1.txt')

# 也可以利用Path.joinpath()
example_path3 = Path('/Users/Anders/Documents/').joinpath('python_learn')

# 利用 / 可以创建子路径
example_path4 = Path('/Users/Anders/Documents')
example_path5 = example_path4 / 'python_learn/pic-2.jpg'

遍历文件夹

from pathlib2 import Path

# 返回目录中最后一个部分的扩展名
example_path = Path('/Users/Anders/Documents')
[path for path in example_path.iterdir()]

# 输出如下:
# [PosixPath('/Users/Anders/Documents/abc.jpg'),
#  PosixPath('/Users/Anders/Documents/book-master'),
#  PosixPath('/Users/Anders/Documents/Database'),
#  PosixPath('/Users/Anders/Documents/Git'),
#  PosixPath('/Users/Anders/Documents/AppProjects')]

文件操作

操作语法是:open(mode=‘r’, bufferiong=-1, encoding=None, errors=None, newline=None)

from pathlib2 import Path

example_path = Path('/Users/Anders/Documents/information/JH.txt')

with example_path.open(encoding = 'GB2312') as f:
    print(f.read())
    
            
# or
example_path = Path('/Users/Anders/Documents/information/JH.txt')
example_path.read_text(encoding='GB2312')

对于简单的文件读写,在pathlib库中有几个简便的方法:

.read_text(): 以文本模式打开路径并并以字符串形式返回内容。 .read_bytes(): 以二进制/字节模式打开路径并以字节串的形式返回内容。 .write_text(): 打开路径并向其写入字符串数据。 .write_bytes(): 以二进制/字节模式打开路径并向其写入数据。

创建和删除文件夹

关于这里的创建文件目录mkdir方法接收两个参数:

  • parents:如果父目录不存在,是否创建父目录。
  • exist_ok:只有在目录不存在时创建目录,目录已存在时不会抛出异常。
from pathlib2 import Path

example_path = Path('/Users/Anders/Documents/test1/test2/test3')

# 创建文件目录,在这个例子中因为本身不存在test1,test2,test3,由于parents为True,所以都会被创建出来。
example_path.mkdir(parents = True, exist_ok = True)
# 删除路径对象目录,如果要删除的文件夹内包含文件就会报错
example_path.rmdir()

判断文件及文件夹对象是否存在

关于文件的判断还有很多相关属性

is_dir() 是否是目录 is_file() 是否是普通文件 is_symlink() 是否是软链接 is_socket() 是否是socket文件 is_block_device() 是否是块设备 is_char_device() 是否是字符设备 is_absolute() 是否是绝对路径 resolve() 返回一个新的路径,这个新路径就是当前Path对象的绝对路径,如果是软链接则直接被解析 absolute() 也可以获取绝对路径,但是推荐resolve() exists() 该路径是否指向现有的目录或文件:

from pathlib2 import Path

example_path = Path('/Users/Anders/Documents/pic-2.jpg')

# 判断对象是否存在
print(example_path.exists())
# 输出如下:
# True

# 判断对象是否是目录
print(example_path.is_dir())
# 输出如下:
# False

# 判断对象是否是文件
print(example_path.is_file())
# 输出如下:
# True

文件的信息

只需要通过**.stat()**方法就可以返还指定路径的文件信息

from pathlib2 import Path

example_path = Path('/Users/Anders/Documents/pic.jpg')
print(example_path.stat())
# 输出如下:
# os.stat_result(st_mode=33188, st_ino=8598206944, st_dev=16777220, st_nlink=1, st_uid=501, st_gid=20, st_size=38054, st_atime=1549547190, st_mtime=1521009880, st_ctime=1521009883)
# 文件大小 最后访问时间 最后修改时间 创建时间
print(example_path.stat().st_size)
# 输出如下:
# 38054

操作工作簿

移动并重命名工作簿

from pathlib import Path  # pathlib模块是python内置模块
old_file_path = Path('F:\\python\\第2章\\员工档案.xlsx')  # 原路径
new_file_path = Path('F:\\table\\员工信息表.xlsx')      # 新路径
old_file_path.rename(new_file_path)     # 重命名  rename只能在同一个磁盘分区

解析工作簿的路径信息

from pathlib import Path
file_path = Path('F:\\python\\第2章\\出库表.xlsx')
path = file_path.parent      # 文件路径
file_name = file_path.name   # 文件名
stem_name = file_path.stem   # 文件主名
suf_name = file_path.suffix  # 文件扩展名
print(path)
print(file_name)
print(stem_name)
print(suf_name)

提取文件夹内所有工作簿的文件名

from pathlib import Path
folder_path = Path('F:\\python\\第2章\\工作信息表\\')
file_list = folder_path.glob('*.xls*') # glob用于查找符合指定规则的文件或文件夹
lists = []
for i in file_list:
    file_name = i.name
    lists.append(file_name)
print(lists)

#['供应商信息表.xlsx', '出库表.xlsx', '同比增长情况表.xls', '员工档案表.xlsx', '库存表.xlsx']

新建一个工作簿

import xlwings as xw
app = xw.App(visible=False, add_book=False)  # 启动Excel程序
workbook = app.books.add()       # 新建工作簿
workbook.save('F:\\test\\1月销售表.xlsx')   # 保存新建工作簿
workbook.close()  # 关闭工作簿
app.quit()    # 退出Excel程序

循环新建多个表

import xlwings as xw
app = xw.App(visible=False, add_book=False)
for i in range(1, 6):
    workbook = app.books.add()
    workbook.save(f'F:\\test\\销售表{i}.xlsx') # f-string 替换{}里内容
    workbook.close()
app.quit()

打开一个已有的工作簿

import xlwings as xw
app = xw.App(visible=True, add_book=False)
file_path = '员工信息表.xlsx'
app.books.open(file_path) 

打开文件夹下的所有工作簿

from pathlib import Path
import xlwings as xw
app = xw.App(visible=True, add_book=False)
folder_path = Path('F:\\python\\第2章\\工作信息表\\')
file_list = folder_path.glob('*.xls*')
for i in file_list:
    app.books.open(i)

批量重命名多个工作簿

from pathlib import Path
folder_path = Path('F:\\python\\第2章\\table\\')
file_list = folder_path.glob('*月.xlsx')
for i in file_list:
    old_file_name = i.name
    new_file_name = old_file_name.replace('月', '月销售表')
    # with_name是pathlib路径对象函数,用于替换原路径文件名
    new_file_path = i.with_name(new_file_name)  # 用新的文件名构造新的文件路径  
    i.rename(new_file_path)  # 执行重命名操作

批量转换工作簿的文件格式

from pathlib import Path
import xlwings as xw
app = xw.App(visible=False, add_book=False)
folder_path = Path('F:\\python\\第2章\\工作信息表\\')
file_list = folder_path.glob('*.xlsx')
for i in file_list:
    new_file_path = str(i.with_suffix('.xls')) # SaveAs不能识别路径
    workbook = app.books.open(i) # 打开要转换文件格式的工作簿
    workbook.api.SaveAs(new_file_path, FileFormat=56) # 56代表.xls. 51代表.xlsx
    workbook.close()
app.quit()

将一个工作簿拆分为多个工作簿

import xlwings as xw
app = xw.App(visible=False, add_book=False)
file_path = 'F:\\python\\第2章\\新能源汽车备案信息.xlsx'
workbook = app.books.open(file_path)
worksheet = workbook.sheets
for i in worksheet:
    new_workbook = app.books.add()
    new_worksheet = new_workbook.sheets[0]
    i.copy(before=new_worksheet) # 将来源工作簿的当前工作表复制到新建工作簿的第一个工作表之前
    new_workbook.save('F:\\python\\第2章\\汽车备案信息\\{}.xlsx'.format(i.name))
    new_workbook.close()
app.quit()

将多个工作簿合并为一个工作簿

from pathlib import Path
import pandas as pd
folder_path = Path('F:\\python\\第2章\\上半年销售统计\\')
file_list = folder_path.glob('*.xls*') 
with pd.ExcelWriter('F:\\python\\第2章\\总表.xlsx') as workbook:
    for i in file_list:
        stem_name = i.stem
        data = pd.read_excel(i, sheet_name=0)
        data.to_excel(workbook, sheet_name=stem_name, index=False)

按照扩展名分类工作簿

from pathlib import Path
folder_path = Path('第2章\\工作文件\\')  # 给出要分类的文件夹路径
file_list = folder_path.glob('*.xls*')
for i in file_list:
    suf_name = i.suffix
    new_folder_path = folder_path / suf_name  # 构造以扩展名命名的文件夹的完整路径
    if not new_folder_path.exists():
        new_folder_path.mkdir()
    i.replace(new_folder_path / i.name)   # 将工作簿移动到以扩展名的文件下
#  replace用于使用新路径覆盖原路径

按照日期分类工作簿

from time import localtime
from pathlib import Path
folder_path = Path('F:\\python\\第2章\\工作文件\\')
file_list = folder_path.glob('*.xls*')
for i in file_list:
    lm_time = i.stat().st_mtime  # 获取最后修改时间
    year = localtime(lm_time).tm_year  # 提取年份
    month = localtime(lm_time).tm_mon  # 提取月份
    new_folder_path = folder_path / str(year) / str(month)
    if not new_folder_path.exists():
        new_folder_path.mkdir(parents=True)
    i.replace(new_folder_path / i.name)

精确查找工作簿

from pathlib import Path
folder_path = input('请输入查找路径(如C:\\):')
file_name = input('请输入要查找的工作簿名称:')
folder_path = Path(folder_path)
file_list = folder_path.rglob(file_name) # rglob 用于指定文件夹及其子文件夹中查找名称符合指定规则的文件或文件夹
for i in file_list:
    print(i)
    
# 请输入查找路径(如C:\):I:\
# 请输入要查找的工作簿名称:出库表.xlsx
# I:\Projects\jupyter\Excel\第2章\出库表.xlsx
# I:\Projects\jupyter\Excel\第2章\table\出库表.xlsx
# I:\Projects\jupyter\Excel\第2章\工作信息表\出库表.xlsx

按关键词查找工作簿

from pathlib import Path
folder_path = input('请输入查找路径(如C:\\):')
keyword = input('请输入关键词:')
folder_path = Path(folder_path)
file_list = folder_path.rglob(f'*{keyword}*.xls*')
for i in file_list:
    print(i)
    
# 请输入查找路径(如C:\):I:\
# 请输入关键词:供应商
# I:\Projects\jupyter\Excel\第2章\工作信息表\供应商信息表.xlsx

保护一个工作簿的结构

import xlwings as xw
app = xw.App(visible=False, add_book=False)
workbook = app.books.open('F:\\python\\第2章\\办公用品采购表.xlsx')
workbook.api.Protect(Password='123', Structure=True, Windows=True)  # 密码 工作簿结构不被修改 窗口不被修改
workbook.save()
workbook.close()
app.quit()

加密保护一个工作簿

import xlwings as xw
app = xw.App(visible=False, add_book=False)
workbook = app.books.open('F:\\python\\第2章\\办公用品采购表.xlsx')
workbook.api.Password = '123'  # 设置工作簿打开密码
workbook.save()
workbook.close()
app.quit()

加密保护多个工作簿

from pathlib import Path
import xlwings as xw
app = xw.App(visible=False, add_book=False)
folder_path = Path('F:\\python\\第2章\\工作信息表\\')
file_list = folder_path.glob('*.xls*')
for i in file_list:
    workbook = app.books.open(i)
    workbook.api.Password = '123'
    workbook.save()
    workbook.close()
app.quit()

操作工作表

提取一个工作簿中所有工作表的名称(方法一)

import xlwings as xw
app = xw.App(visible=False, add_book=False)
workbook = app.books.open('F:\\python\\第3章\\新能源汽车备案信息.xlsx')
worksheet = workbook.sheets
lists = []
for i in worksheet:
    sheet_name = i.name
    lists.append(sheet_name)
print(lists)
workbook.close()
app.quit()

import pandas as pd
file_path = 'F:\\python\\第3章\\新能源汽车备案信息.xlsx'
data = pd.read_excel(file_path, sheet_name=None)
worksheet_name = list(data.keys())
print(worksheet_name)

# 第三行代码读取工作簿中所有工作表中的数据后,生成一个字典,字典的键为工作表的名称,值为对应的数据

在一个工作簿中新增一个工作表

import xlwings as xw
app = xw.App(visible=False, add_book=False)
workbook = app.books.open('F:\\python\\第3章\\新能源汽车备案信息.xlsx')
worksheet = workbook.sheets
new_sheet_name = '产品信息表'
lists = []
for i in worksheet:
    sheet_name = i.name
    lists.append(sheet_name)
if new_sheet_name not in lists:
    worksheet.add(name=new_sheet_name)  # 新增工作表
workbook.save()
workbook.close()
app.quit()

在一个工作簿中删除一个工作表

import xlwings as xw
app = xw.App(visible=False, add_book=False)
workbook = app.books.open('F:\\python\\第3章\\新能源汽车备案信息.xlsx')
worksheet = workbook.sheets
del_sheet_name = '汽车备案信息'
for i in worksheet:
    sheet_name = i.name
    if sheet_name == del_sheet_name:
        i.delete()  # 删除当前工作表
        break
workbook.save()
workbook.close()
app.quit()

在多个工作簿中批量新增工作表

from pathlib import Path
import xlwings as xw
app = xw.App(visible=False, add_book=False)
folder_path = Path('F:\\python\\第3章\\上半年销售统计\\')
file_list = folder_path.glob('*.xls*')
new_sheet_name = '产品信息表'
for i in file_list:
    workbook = app.books.open(i)
    worksheet = workbook.sheets
    lists = []
    for j in worksheet:
        sheet_name = j.name
        lists.append(sheet_name)
    if new_sheet_name not in lists:
        worksheet.add(name=new_sheet_name)
    workbook.save()
    workbook.close()
app.quit()

在多个工作簿中批量删除工作表

from pathlib import Path
import xlwings as xw
app = xw.App(visible=False, add_book=False)
folder_path = Path('F:\\python\\第3章\\汽车信息\\')
file_list = folder_path.glob('*.xls*')
del_sheet_name = 'Sheet1'
for i in file_list:
    workbook = app.books.open(i)
    worksheet = workbook.sheets
    for j in worksheet:
        sheet_name = j.name
        if sheet_name == del_sheet_name:
            j.delete()
            break
    workbook.save()
    workbook.close()
app.quit()

重命名一个工作簿中的一个工作表

import xlwings as xw
app = xw.App(visible=False, add_book=False)
workbook = app.books.open('F:\\python\\第3章\\新能源汽车备案信息.xlsx')
worksheet = workbook.sheets
for i in worksheet:
    if i.name == '汽车备案信息':
        i.name = '汽车信息'
        break
workbook.save()
workbook.close()
app.quit()

重命名一个工作簿中的所有工作表

import xlwings as xw
app = xw.App(visible=False, add_book=False)
workbook = app.books.open('F:\\python\\第3章\\上半年销售统计.xlsx')
worksheet = workbook.sheets
for i in worksheet:
    i.name = i.name.replace('销售表', '')
workbook.save()
workbook.close()
app.quit()

重命名多个工作簿中的同名工作表

from pathlib import Path
import xlwings as xw
app = xw.App(visible=False, add_book=False)
folder_path = Path('F:\\python\\第3章\\销售统计\\')
file_list = folder_path.glob('*.xls*')
for i in file_list:
    workbook = app.books.open(i)
    worksheet = workbook.sheets
    for j in worksheet:
        if j.name == '产品信息':
            j.name = '配件信息'
            break
    workbook.save()
    workbook.close()
app.quit()

将一个工作表复制到另一个工作簿

import xlwings as xw
app = xw.App(visible=False, add_book=False)
workbook1 = app.books.open('F:\\python\\第3章\\产品信息表.xlsx')
workbook2 = app.books.open('F:\\python\\第3章\\1月销售表.xlsx')
worksheet1 = workbook1.sheets['配件信息']
worksheet2 = workbook2.sheets[0]  # sheets[0] 表示第一个工作表
worksheet1.copy(before=worksheet2)
workbook2.save()
app.quit()

将一个工作表批量复制到多个工作簿

from pathlib import Path
import xlwings as xw
app = xw.App(visible=False, add_book=False)
workbook1 = app.books.open('F:\\python\\第3章\\产品信息表.xlsx')
worksheet1 = workbook1.sheets['配件信息']
folder_path = Path('F:\\python\\第3章\\上半年销售统计\\')
file_list = folder_path.glob('*.xls*')
for i in file_list:
    workbook2 = app.books.open(i)
    worksheet2 = workbook2.sheets[0]
    worksheet1.copy(before=worksheet2)
    workbook2.save()
app.quit()

按条件将一个工作表拆分为多个工作簿

import pandas as pd
file_path = 'F:\\python\\第3章\\销售表.xlsx'
data = pd.read_excel(file_path, sheet_name='总表')
pro_data = data.groupby('产品名称')
for i, j in pro_data:   # 组名  数据
    new_file_path = 'F:\\python\\第3章\\拆分\\' + i + '.xlsx'
    j.to_excel(new_file_path, sheet_name=i, index=False)

按条件将一个工作表拆分为多个工作表

import pandas as pd
file_path = 'F:\\python\\第3章\\销售表.xlsx'
data = pd.read_excel(file_path, sheet_name='总表')
pro_data = data.groupby('产品名称')
with pd.ExcelWriter('F:\\python\\第3章\\各产品销售表.xlsx') as workbook:
    for i, j in pro_data:
        j.to_excel(workbook, sheet_name=i, index=False)

将一个工作表横向拆分为多个工作表

import pandas as pd
file_path = 'F:\\python\\第3章\\销售数量统计.xlsx'
data = pd.read_excel(file_path, sheet_name='总表')
head_col = list(data.columns)
same_col = data[['配件编号', '配件名称']]
with pd.ExcelWriter('F:\\python\\第3章\\各产品销售表1.xlsx') as workbook:
    for i in head_col[2:]:
        dif_col = data[i]
        sheet_data = pd.concat([same_col, dif_col], axis=1)
        sheet_data.to_excel(workbook, sheet_name=i, index=False)

纵向合并多个工作表为一个工作表

import pandas as pd
file_path = 'F:\\python\\第3章\\上半年销售统计.xlsx'
data = pd.read_excel(file_path, sheet_name=None)
all_data = pd.concat(data, ignore_index=True)
new_file_path = 'F:\\python\\第3章\\销售统计.xlsx'
all_data.to_excel(new_file_path, sheet_name='总表', index=False)

横向合并多个工作表为一个工作表

import pandas as pd
file_path = 'F:\\python\\第3章\\产品各月销售数量表.xlsx'
data = pd.read_excel(file_path, sheet_name=None)
all_data = data['1月'][['配件编号', '配件名称']]
for i in data:
    col = data[i].iloc[:, [2]]
    all_data = pd.concat([all_data, col], axis=1)
new_file_path = 'F:\\python\\第3章\\合并表.xlsx'
all_data.to_excel(new_file_path, sheet_name='总表', index=False)

设置工作表的标签颜色

import xlwings as xw
app = xw.App(visible=False, add_book=False)
workbook = app.books.open('F:\\python\\第3章\\新能源汽车备案信息.xlsx')
worksheet = workbook.sheets
for i in worksheet:
    if i.name == '汽车备案信息':
        i.api.Tab.Color = 255
workbook.save()
workbook.close()
app.quit()

隐藏一个工作簿中的一个工作表

import xlwings as xw
app = xw.App(visible=False, add_book=False)
workbook = app.books.open('F:\\python\\第3章\\新能源汽车备案信息.xlsx')
worksheet = workbook.sheets
for i in worksheet:
    if i.name == '汽车备案信息':
        i.visible = False
workbook.save()
workbook.close()
app.quit()

隐藏多个工作簿中的一个同名工作表

from pathlib import Path
import xlwings as xw
app = xw.App(visible=False, add_book=False)
folder_path = Path('F:\\python\\第3章\\区域销售统计\\')
file_list = folder_path.glob('*.xls*')
for i in file_list:
    workbook = app.books.open(i)
    worksheet = workbook.sheets
    for j in worksheet:
        if j.name == '供应商信息':
            j.visible = False
    workbook.save()
    workbook.close()
app.quit()

隐藏多个工作簿中的多个同名工作表

from pathlib import Path
import xlwings as xw
app = xw.App(visible=False, add_book=False)
folder_path = Path('F:\\python\\第3章\\区域销售统计\\')
file_list = folder_path.glob('*.xls*')
lists = ['配件信息', '供应商信息']
for i in file_list:
    workbook = app.books.open(i)
    worksheet = workbook.sheets
    for j in worksheet:
        if j.name in lists:
            j.visible = False
    workbook.save()
    workbook.close()
app.quit()

保护一个工作表

import xlwings as xw
app = xw.App(visible=False, add_book=False)
workbook = app.books.open('F:\\python\\第3章\\产品信息表.xlsx')
worksheet = workbook.sheets['配件信息']
worksheet.api.Protect(Password='123', Contents=True)
workbook.save()
workbook.close()
app.quit()

行 / 列操作

根据数据内容自动调整一个工作表的行高和列宽

import xlwings as xw
app = xw.App(visible=False, add_book=False)
workbook= app.books.open('新能源汽车备案信息.xlsx')
worksheet = workbook.sheets[0]
worksheet.autofit()
workbook.save()
workbook.close()
app.quit()

精确调整一个工作表的行高和列宽

import xlwings as xw
app = xw.App(visible=False, add_book=False)
workbook = app.books.open('产品信息表.xlsx')
worksheet = workbook.sheets[0]
area = worksheet.range('A1').expand('table')
area.column_width = 15
area.row_height = 20
workbook.save()
workbook.close()
app.quit()

调整一个工作簿中所有工作表的行高和列宽

import xlwings as xw
app = xw.App(visible=False, add_book=False)
workbook = app.books.open('新能源汽车备案信息.xlsx')
worksheet = workbook.sheets
for i in worksheet:
    i.autofit()
workbook.save()
workbook.close()
app.quit()

调整多个工作簿的行高和列宽

from pathlib import Path
import xlwings as xw
app = xw.App(visible=False, add_book=False)
folder_path = Path('F:\\python\\第4章\\区域销售统计\\')
file_list = folder_path.glob('*.xls*')
for i in file_list:
    workbook = app.books.open(i)
    worksheet = workbook.sheets
    for j in worksheet:
        j.autofit()
    workbook.save()
    workbook.close()
app.quit()

在一个工作表中插入空白行

from openpyxl import load_workbook  # openpyxl 模块可用于.xlsx格式读写和修改
workbook = load_workbook('工资表.xlsx')
worksheet = workbook['工资表']
worksheet.insert_rows(6, 1)
workbook.save('工资表1.xlsx')

在一个工作表中每隔一行插入空白行

from openpyxl import load_workbook
workbook = load_workbook('工资表1.xlsx')
worksheet = workbook['工资表']
num = 2  # 设置插入空白行的数量
last_num = worksheet.max_row  # 获取工作表数据区域行数
for i in range(0, last_num):
    worksheet.insert_rows(i * (num + 1) + 3, num)  # 插入空白行
workbook.save('工资表2.xlsx')

在一个工作表中插入空白列

from openpyxl import load_workbook
workbook = load_workbook('新能源汽车备案信息.xlsx')
worksheet = workbook['汽车备案信息']
worksheet.insert_cols(5, 1)
workbook.save('新能源汽车备案信息1.xlsx')

在一个工作表中删除行

from openpyxl import load_workbook
workbook = load_workbook('新能源汽车备案信息.xlsx')
worksheet = workbook['汽车备案信息']
worksheet.delete_rows(5, 2)
workbook.save('新能源汽车备案信息1.xlsx')

在一个工作表中删除列(方法一)

from openpyxl import load_workbook
workbook = load_workbook('新能源汽车备案信息.xlsx')
worksheet = workbook['汽车备案信息']
worksheet.delete_cols(5, 2)
workbook.save('新能源汽车备案信息1.xlsx')
import pandas as pd
data = pd.read_excel('销售表.xlsx', sheet_name=0)
data.drop(columns=['成本价', '产品成本'], inplace=True)  # 删除指定列
data.to_excel('销售表1.xlsx', sheet_name='总表', index=False)

在一个工作表中追加行数据

import xlwings as xw
app = xw.App(visible=False, add_book=False)
new_data = [['8', '重庆**汽车有限公司', '孙**', '187****2245'], ['9', '四川**汽车有限公司', '肖**', '177****2245']]
workbook = app.books.open('产品信息表.xlsx')
worksheet = workbook.sheets['供应商信息']
data = worksheet.range('A1').expand('table')
num = data.shape[0]
worksheet.range(num + 1, 1).value = new_data
workbook.save()
workbook.close()
app.quit()

在多个工作簿的同名工作表中追加行数据

from pathlib import Path
import xlwings as xw
app = xw.App(visible=False, add_book=False)
folder_path = Path('F:\\python\\第4章\\区域销售统计\\')
file_list = folder_path.glob('*.xls*')
new_data = [['8', '重庆**汽车有限公司', '孙**', '187****2245'], ['9', '四川**汽车有限公司', '肖**', '177****2245']]
for i in file_list:
    workbook = app.books.open(i)
    worksheet = workbook.sheets['供应商信息']
    data = worksheet.range('A1').expand('table')
    num = data.shape[0]
    worksheet.range(num + 1, 1).value = new_data
    workbook.save()
    workbook.close()
app.quit()

在一个工作表中追加列数据(打标签)

import pandas as pd
data = pd.read_excel('销售表.xlsx', sheet_name=0)
max_data = data['利润'].max()
level = [0, 5000, 10000, max_data]
level_names = ['差', '良', '优']
data['等级'] = pd.cut(data['利润'], level, labels=level_names)
data.to_excel('销售表1.xlsx', sheet_name='总表', index=False)

提取一个工作表的行数据和列数据

import pandas as pd
data = pd.read_excel('销售表.xlsx', sheet_name='总表')
row_data = data.iloc[0:10]  # 提取前10行
col_data = data[['单号', '销售日期', '产品名称', '利润']]
range_data = data.iloc[0:5][['单号', '销售日期', '产品名称', '利润']]
row_data.to_excel('提取行数据.xlsx', sheet_name='前10行数据', index=False)
col_data.to_excel('提取列数据.xlsx', sheet_name='利润表', index=False)
range_data.to_excel('提取数据.xlsx', sheet_name='Sheet1', index=False)

提取一个工作簿中所有工作表的行数据

import pandas as pd
data = pd.read_excel('办公用品采购表.xlsx', sheet_name=None)
with pd.ExcelWriter('提取表.xlsx') as workbook:
    for i, j in data.items():  # items()用于返回字典的键值对
        row_data = j.iloc[0:5]
        row_data.to_excel(workbook, sheet_name=i, index=False)

替换一个工作表的数据

import pandas as pd
data = pd.read_excel('销售表.xlsx', sheet_name=0)
data = data.replace('离合器', '刹车片')
data.to_excel('销售表1.xlsx', sheet_name='总表', index=False)

替换一个工作簿中所有工作表的数据

import pandas as pd
data = pd.read_excel('办公用品采购表.xlsx', sheet_name=None)
with pd.ExcelWriter('办公用品采购表1.xlsx') as workbook:
    for i, j in data.items():
        data = j.replace('固体胶', '透明胶带')
        data.to_excel(workbook, sheet_name=i, index=False)

替换一个工作表的列数据

import xlwings as xw
app = xw.App(visible=False, add_book=False)
workbook = app.books.open('产品信息表1.xlsx')
worksheet = workbook.sheets['配件信息']
data = worksheet.range('A2').expand('table').value
for i, j in enumerate(data):
    data[i][3] = float(j[3]) * (1 + 0.1)
worksheet.range('A2').expand('table').value = data
workbook.save()
workbook.close()
app.quit()

替换一个工作表指定列数据对应的列数据

import xlwings as xw
app = xw.App(visible=False, add_book=False)
workbook = app.books.open('销售表.xlsx')
worksheet = workbook.sheets['总表']
data = worksheet.range('A1').expand('table').value
for i, j in enumerate(data):
    if (j[2] == '里程表') and (j[3] == 850):
        data[i][3] = 900
worksheet.range('A1').expand('table').value = data
workbook.save()
workbook.close()
app.quit()

转置一个工作表的行列

import xlwings as xw
app = xw.App(visible=False, add_book=False)
workbook = app.books.open('产品分析表.xlsx')
worksheet = workbook.sheets[0]
data = worksheet.range('A1').expand('table').options(transpose=True).value
worksheet.clear()  # 清除工作表的内容和格式设置
worksheet.range('A1').expand().value = data
workbook.save('产品分析表1.xlsx')
workbook.close()
app.quit()

从指定行列冻结一个工作表的窗格

from openpyxl import load_workbook
workbook = load_workbook('销售表.xlsx')
worksheet = workbook['总表']
worksheet.freeze_panes = 'B2'
workbook.save('销售表1.xlsx')

将一个工作表的一列拆分为多列

import pandas as pd
data = pd.read_excel('产品规格表.xlsx', sheet_name=0)
data_col = data['产品规格'].str.split('*', expand=True)
data['长(cm)'] = data_col[0]
data['宽(cm)'] = data_col[1]
data['高(cm)'] = data_col[2]
data.drop(columns=['产品规格'], inplace=True)
data.to_excel('产品规格表1.xlsx', sheet_name='规格表', index=False)

将一个工作表的多列合并为一列

import pandas as pd
data = pd.read_excel('产品规格表1.xlsx', sheet_name='规格表')
data['产品规格'] = data['长(cm)'].astype(str) + '*' + data['宽(cm)'].astype(str) + '*' + data['高(cm)'].astype(str)
data.drop(columns=['长(cm)', '宽(cm)', '高(cm)'], inplace=True)
data.to_excel('产品规格表2.xlsx', sheet_name='Sheet1', index=False)

在一个工作表中隐藏行数据

from openpyxl import load_workbook
workbook = load_workbook('新能源汽车备案信息.xlsx')
worksheet = workbook['汽车备案信息']
worksheet.row_dimensions.group(2, 10, hidden=True)
workbook.save('新能源汽车备案信息1.xlsx')

在一个工作表中隐藏列数据

from openpyxl import load_workbook
workbook = load_workbook('新能源汽车备案信息.xlsx')
worksheet = workbook['汽车备案信息']
worksheet.column_dimensions.group('A', 'D', hidden=True)
workbook.save('新能源汽车备案信息1.xlsx')

单元格操作

在单元格中输入内容

import xlwings as xw
app = xw.App(visible=False, add_book=False)
workbook = app.books.add()
worksheet = workbook.sheets.add(name='销售情况')
worksheet.range('A1').value = [['产品名称', '销售数量', '销售单价', '销售额'], ['大衣', 15, 400, 6000], ['羽绒服', 20, 500, 10000]]
workbook.save('产品表.xlsx')
workbook.close()
app.quit()

设置单元格数据的字体格式

import xlwings as xw
app = xw.App(visible=False, add_book=False)
workbook = app.books.open('订单表.xlsx')
worksheet = workbook.sheets[0]
header = worksheet.range('A1:I1')
header.font.name = '微软雅黑'
header.font.size = 10
header.font.bold = True
header.font.color = (255, 255, 255)
header.color = (0, 0, 0)  # 单元格填充颜色
data = worksheet.range('A2').expand('table')  # 选中数据行所在的单元格区域
data.font.name = '微软雅黑'
data.font.size = 10
workbook.save('订单表1.xlsx')
workbook.close()
app.quit()

设置单元格数据的对齐方式

import xlwings as xw
app = xw.App(visible=False, add_book=False)
workbook = app.books.open('订单表1.xlsx')
worksheet = workbook.sheets[0]
header = worksheet.range('A1:I1')
header.api.HorizontalAlignment = -4108
header.api.VerticalAlignment = -4108
data = worksheet.range('A2').expand('table')
data.api.HorizontalAlignment = -4152  # 设置数据行的水平对齐方式
data.api.VerticalAlignment = -4108    # 设置数据行的垂直对齐方式
workbook.save('订单表2.xlsx')
workbook.close()
app.quit()

HorizontalAlignment设置水平对齐方式

对齐方式属性值
常规1
靠右-4152
靠左-4131
居中-4108
填充5
两端对齐-4130

设置单元格的边框样式

import xlwings as xw
app = xw.App(visible=False, add_book=False)
workbook = app.books.open('订单表2.xlsx')
worksheet = workbook.sheets[0]
area = worksheet.range('A1').expand('table')
for i in area:
    for j in range(7, 11):
        i.api.Borders(j).LineStyle = 1
        i.api.Borders(j).Weight = 2
        i.api.Borders(j).Color = xw.utils.rgb_to_int((255, 0, 0)) # 设置边框的颜色
workbook.save('订单表3.xlsx')
workbook.close()
app.quit()

修改单元格的数字格式

import xlwings as xw
app = xw.App(visible=False, add_book=False)
workbook = app.books.open('订单表3.xlsx')
worksheet = workbook.sheets[0]
row_num = worksheet.range('A1').expand('table').last_cell.row
worksheet.range(f'B2:B{row_num}').number_format = 'yyyy年m月d日'
worksheet.range(f'D2:D{row_num}').number_format = '¥#,##0'
worksheet.range(f'E2:E{row_num}').number_format = '¥#,##0'
worksheet.range(f'G2:G{row_num}').number_format = '¥#,##0.00'
worksheet.range(f'H2:H{row_num}').number_format = '¥#,##0.00'
worksheet.range(f'I2:I{row_num}').number_format = '¥#,##0.00'
workbook.save('订单表4.xlsx')
workbook.close()
app.quit()

合并单元格制作表格标题(方法一)

import xlwings as xw
app = xw.App(visible=False, add_book=False)
workbook = app.books.open('订单表5.xlsx')
worksheet = workbook.sheets[0]
title = worksheet.range('A1:I1') # 指定要合并的单元格区域
title.merge()   # 合并单元格
title.font.name = '微软雅黑'
title.font.size = 18
title.font.bold = True
title.api.HorizontalAlignment = -4108
title.api.VerticalAlignment = -4108
title.row_height = 30
workbook.save('订单表6.xlsx')
workbook.close()
app.quit()

合并单元格制作表格标题(方法二)

from openpyxl import load_workbook
from openpyxl.styles import Font, Alignment
workbook = load_workbook('订单表5.xlsx')
worksheet = workbook['总表']
worksheet.merge_cells('A1:I1')  # 指定要合并的单元格区域
worksheet['A1'].font = Font(name='微软雅黑', size=18, bold=True)
worksheet['A1'].alignment = Alignment(horizontal='center', vertical='center')
worksheet.row_dimensions[1].height = 30
workbook.save('订单表6.xlsx')

合并内容相同的连续单元格

from openpyxl import load_workbook
workbook = load_workbook('订单金额表.xlsx')
worksheet = workbook['Sheet1']
lists = []
num = 2  # 从第二行开始
while True: # 构造永久循环
    datas = worksheet.cell(num, 1).value
    if datas:
        lists.append(datas)
    else: # 如果读取的数据为空
        break  # 则强制结束循环
    num += 1
s = 0
e = 0
data = lists[0]
for m in range(len(lists)):
    if lists[m] != data:
        data = lists[m]
        e = m - 1
        if e >= s:
            worksheet.merge_cells(f'A{s + 2}:A{e + 2}')  # 合并A列相同内容的单元格
            s = e + 1
    if m == len(lists) - 1:
        e = m
        worksheet.merge_cells(f'A{s + 2}:A{e + 2}')     # 合并A列相同内容的单元格
workbook.save('订单金额表1.xlsx')

在空白单元格中填充数据

import pandas as pd
data = pd.read_excel('销售表.xlsx', sheet_name='总表')
data['销售金额'].fillna(0, inplace=True)
data['利润'].fillna(0, inplace=True)
data.to_excel('销售表1.xlsx', sheet_name='总表', index=False)

删除工作表中的重复行

import pandas as pd
data = pd.read_excel('销售表1.xlsx', sheet_name='总表')
data = data.drop_duplicates()  # 删除重复行
data.to_excel('销售表2.xlsx', sheet_name='总表', index=False)

将单元格中的公式转换为数值

import xlwings as xw
app = xw.App(visible=False, add_book=False)
workbook = app.books.open('销售表2.xlsx')
worksheet = workbook.sheets[0]
data = worksheet.range('A1').expand('table').value
worksheet.range('A1').expand('table').value = data
workbook.save('销售表3.xlsx')
workbook.close()
app.quit()

数据处理与的分析操作

排序一个工作表中的数据(方法一)

import pandas as pd
data = pd.read_excel('销售表.xlsx', sheet_name='总表')
data = data.sort_values(by='利润', ascending=False)  # 降序
data.to_excel('销售表1.xlsx', sheet_name='总表', index=False)

排序一个工作表中的数据(方法二)

import xlwings as xw
import pandas as pd
app = xw.App(visible=False, add_book=False)
workbook = app.books.open('销售表.xlsx')
worksheet = workbook.sheets['总表']
data = worksheet.range('A1').expand('table').options(pd.DataFrame).value
result = data.sort_values(by='利润', ascending=False)
worksheet.range('A1').value = result
workbook.save('销售表1.xlsx')
workbook.close()
app.quit()

排序一个工作簿中所有工作表的数据

import xlwings as xw
import pandas as pd
app = xw.App(visible=False, add_book=False)
workbook = app.books.open('各月销售数量表.xlsx')
worksheet = workbook.sheets
for i in worksheet:
    data = i.range('A1').expand('table').options(pd.DataFrame).value # 将数据转化为DataFrame格式
    result = data.sort_values(by='销售数量', ascending=False)
    i.range('A1').value = result
workbook.save('各月销售数量表1.xlsx')
workbook.close()
app.quit()

排序多个工作簿中同名工作表的数据

from pathlib import Path
import xlwings as xw
import pandas as pd
app = xw.App(visible=False, add_book=False)
folder_path = Path('各地区销售数量')
file_list = folder_path.glob('*.xls*')
for i in file_list:
    workbook = app.books.open(i)
    worksheet = workbook.sheets['销售数量']
    data = worksheet.range('A1').expand('table').options(pd.DataFrame).value
    result = data.sort_values(by='销售数量', ascending=False)
    worksheet.range('A1').value = result
    workbook.save()
    workbook.close()
app.quit()

根据单个条件筛选一个工作表中的数据

import pandas as pd
data = pd.read_excel('销售表.xlsx', sheet_name='总表')
pro_data = data[data['产品名称'] == '离合器']
num_data = data[data['销售数量'] >= 100]
pro_data.to_excel('离合器.xlsx', sheet_name='离合器', index=False)
num_data.to_excel('销售数量大于等于100的记录.xlsx', sheet_name='销售数量大于等于100的记录', index=False)

根据多个条件筛选一个工作表中的数据

import pandas as pd
data = pd.read_excel('销售表.xlsx', sheet_name='总表')
condition1 = (data['产品名称'] == '转速表') & (data['销售数量'] >= 50)
condition2 = (data['产品名称'] == '转速表') | (data['销售数量'] >= 50)
data1 = data[condition1]
data2 = data[condition2]
data1.to_excel('销售表1.xlsx', sheet_name='与条件筛选', index=False)
data2.to_excel('销售表2.xlsx', sheet_name='或条件筛选', index=False)

筛选一个工作簿中所有工作表的数据

import pandas as pd
all_data = pd.read_excel('办公用品采购表.xlsx', sheet_name=None)
with pd.ExcelWriter('筛选表.xlsx') as workbook:
    for i in all_data:
        data = all_data[i]       
        filter_data = data[data['采购物品'] == '办公桌']
        filter_data.to_excel(workbook, sheet_name=i, index=False)

筛选一个工作簿中所有工作表的数据并汇总

import pandas as pd
all_data = pd.read_excel('办公用品采购表.xlsx', sheet_name=None)
datas = pd.DataFrame()
for i in all_data:
    data = all_data[i]
    filter_data = data[data['采购物品'] == '办公桌']
    datas = pd.concat([datas, filter_data], axis=0)
datas.to_excel('办公桌.xlsx', sheet_name='办公桌', index=False)

分类汇总一个工作表

import xlwings as xw
import pandas as pd
app = xw.App(visible=False, add_book=False)
workbook = app.books.open('销售表.xlsx')
worksheet = workbook.sheets['总表']
data = worksheet.range('A1').expand('table').options(pd.DataFrame, dtype=float).value
result = data.groupby('产品名称').sum()
worksheet1 = workbook.sheets.add(name='分类汇总')
worksheet1.range('A1').value = result[['销售数量', '销售金额']]
workbook.save('分类汇总表.xlsx')
workbook.close()
app.quit()

对一个工作表求和

import xlwings as xw
import pandas as pd
app = xw.App(visible=False, add_book=False)
workbook = app.books.open('办公用品采购表.xlsx')
worksheet = workbook.sheets['1月']
data = worksheet.range('A1').expand('table').options(pd.DataFrame).value
result = data['采购金额'].sum()
worksheet.range('B15').value = '合计'
worksheet.range('C15').value = result
workbook.save('求和表.xlsx')
workbook.close()
app.quit()

对一个工作簿的所有工作表分别求和

import xlwings as xw
import pandas as pd
app = xw.App(visible=False, add_book=False)
workbook = app.books.open('办公用品采购表.xlsx')
worksheet = workbook.sheets
for i in worksheet:
    data = i.range('A1').expand('table').options(pd.DataFrame).value
    result = data['采购金额'].sum()
    column = i.range('A1').expand('table').value[0].index('采购金额') + 1
    row = i.range('A1').expand('table').shape[0]
    i.range(row + 1, column - 1).value = '合计'
    i.range(row + 1, column).value = result
workbook.save('求和表.xlsx')
workbook.close()
app.quit()

在一个工作表中制作数据透视表

import xlwings as xw
import pandas as pd
app = xw.App(visible=False, add_book=False)
workbook = app.books.open('销售表.xlsx')
worksheet = workbook.sheets['总表']
data = worksheet.range('A1').expand('table').options(pd.DataFrame, dtype=float).value
pivot = pd.pivot_table(data, values=['销售数量', '销售金额'], index=['产品名称'], aggfunc={'销售数量': 'sum', '销售金额': 'sum'}, fill_value=0, margins=True, margins_name='合计')
worksheet1 = workbook.sheets.add(name='数据透视表')
worksheet1.range('A1').value = pivot
workbook.save('数据透视表.xlsx')
workbook.close()
app.quit()

使用相关系数判断数据的相关性

import pandas as pd
data = pd.read_excel('销售额统计表.xlsx', sheet_name=0, index_col='序号')
result = data.corr() # 	计算任意两个变量之间的相关系数
print(result)

image.png

使用描述统计和直方图制定目标

import pandas as pd
import matplotlib.pyplot as plt
import xlwings as xw
data = pd.read_excel('员工销售业绩表.xlsx', sheet_name=0)
data_describe = data['销售额(万元)'].astype(float).describe()
data_cut = pd.cut(data['销售额(万元)'], 6)
data1 = pd.DataFrame()
data1['计数'] = data['销售额(万元)'].groupby(data_cut).count()
data2 = data1.reset_index()
data2['销售额(万元)'] = data2['销售额(万元)'].apply(lambda x:str(x))
figure = plt.figure()
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
n, bins, patches = plt.hist(data['销售额(万元)'], bins=6, edgecolor='black', linewidth=1)
plt.xticks(bins)
plt.title('员工销售业绩频率分析')
plt.xlabel('销售额(万元)')
plt.ylabel('频数')
app = xw.App(visible=False, add_book=False)
workbook = app.books.open('员工销售业绩表.xlsx')
worksheet = workbook.sheets[0]
worksheet.range('E1').value = data_describe
worksheet.range('H1').value = data2
worksheet.pictures.add(figure, name='图片1', update=True, left=400, top=200)
worksheet.autofit()
workbook.save('描述统计.xlsx')
workbook.close()
app.quit()

拟合回归方程并判断拟合程度

import pandas as pd
from sklearn import linear_model
df = pd.read_excel('各月销售额与广告费支出表.xlsx', sheet_name=0)
x = df[['视频门户广告费(万元)', '电视台广告费(万元)']]
y = df['销售额(万元)']
model = linear_model.LinearRegression()
model.fit(x, y)
R2 = model.score(x, y)
print(R2)

使用回归方程预测未来值

import pandas as pd
from sklearn import linear_model
df = pd.read_excel('各月销售额与广告费支出表.xlsx', sheet_name=0)
x = df[['视频门户广告费(万元)', '电视台广告费(万元)']]
y = df['销售额(万元)']
model = linear_model.LinearRegression()
model.fit(x, y)
coef = model.coef_
model_intercept = model.intercept_
equation = f'y={coef[0]}*x1+{coef[1]}*x2{model_intercept:+}'
print(equation)
x1 = 40
x2 = 30
y = coef[0] * x1 + coef[1] * x2 + model_intercept
print(y)