python自动化-excel操作

163 阅读6分钟

「这是我参与2022首次更文挑战的第26天,活动详情查看:2022首次更文挑战」 使用库openpyxl

官方文档:openpyxl.readthedocs.io/en/stable/

# 安装库
!pip install openpyxl
Requirement already satisfied: openpyxl in /Users/lichizou/anaconda3/lib/python3.7/site-packages (2.6.2)
Requirement already satisfied: et-xmlfile in /Users/lichizou/anaconda3/lib/python3.7/site-packages (from openpyxl) (1.0.1)
Requirement already satisfied: jdcal in /Users/lichizou/anaconda3/lib/python3.7/site-packages (from openpyxl) (1.4.1)

1.读excel

# 读取对应表格
from openpyxl import load_workbook
excel = load_workbook(filename='test.xlsx')
excel.sheetnames
['work']
sheet = excel.sheetnames[0]
sheet
'work'
# 获得当前激活的sheet
sheet = excel.active
sheet
<Worksheet "work">
# 获得指定名称的sheet
sheet = excel[excel.sheetnames[0]]
sheet
<Worksheet "work">
# 读取单元格
cell = sheet.cell(row=1, column=2) # 表格从1开始计数
cell.value
'人流数'
cell_1 = sheet['A1'] # 相当于一行一列
cell_1.value
'时间点'
print(f'行:{cell_1.row},列:{cell_1.column},坐标:{cell_1.coordinate}')
行:1,列:1,坐标:A1
#读取多个格子的值
# 1.指定坐标
cells = sheet['A1:C8']
print(cells)
((<Cell 'work'.A1>, <Cell 'work'.B1>, <Cell 'work'.C1>), (<Cell 'work'.A2>, <Cell 'work'.B2>, <Cell 'work'.C2>), (<Cell 'work'.A3>, <Cell 'work'.B3>, <Cell 'work'.C3>), (<Cell 'work'.A4>, <Cell 'work'.B4>, <Cell 'work'.C4>), (<Cell 'work'.A5>, <Cell 'work'.B5>, <Cell 'work'.C5>), (<Cell 'work'.A6>, <Cell 'work'.B6>, <Cell 'work'.C6>), (<Cell 'work'.A7>, <Cell 'work'.B7>, <Cell 'work'.C7>), (<Cell 'work'.A8>, <Cell 'work'.B8>, <Cell 'work'.C8>))
for row in cells:
    for cel in row:
        print(f'{cel.coordinate}:{cel.value}')
A1:时间点
B1:人流数
C1:None
A2:2020-09-01
B2:63306.166000000005
C2:None
A3:2020-09-01
B3:40535.964
C3:None
A4:2020-09-01
B4:30026.64
C4:None
A5:2020-09-01
B5:46291.07
C5:None
A6:2020-09-01
B6:38909.521
C6:None
A7:2020-09-01
B7:28149.975000000002
C7:None
A8:2020-09-01
B8:40661.075000000004
C8:None
# 指定行的值
row = sheet[1]
for r in row:
    print(f'{r.coordinate}:{r.value}')
A1:时间点
B1:人流数
C1:None
row = sheet[1:2] # 二维元祖
print(row)
((<Cell 'work'.A1>, <Cell 'work'.B1>, <Cell 'work'.C1>), (<Cell 'work'.A2>, <Cell 'work'.B2>, <Cell 'work'.C2>))
for r in row:
    for cel in r:
         print(f'{cel.coordinate}:{cel.value}')
A1:时间点
B1:人流数
C1:None
A2:2020-09-01
B2:63306.166000000005
C2:None
#  指定列的值
col = sheet['A']
type(col)
tuple
for c in range(0,5):
    print(f'{col[c].coordinate}:{col[c].value}')
A1:时间点
A2:2020-09-01
A3:2020-09-01
A4:2020-09-01
A5:2020-09-01
col = sheet['A': 'C'] # 二维元祖
type(col)
tuple
for c in range(0,5):
    print(f'{col[0][c].coordinate}:{col[0][c].value}') 
A1:时间点
A2:2020-09-01
A3:2020-09-01
A4:2020-09-01
A5:2020-09-01
# 指定范围
def get_data(sheet, min_row=1, max_row=5, min_col=2, max_col=6):
    pass
# 行获取
for row in sheet.iter_rows(min_row=1, max_row=5, min_col=2, max_col=6):
    print(row) # 一维元祖
    for c in row:
        print(f'{c.coordinate}:{c.value}')
(<Cell 'work'.B1>, <Cell 'work'.C1>, <Cell 'work'.D1>, <Cell 'work'.E1>, <Cell 'work'.F1>)
B1:人流数
C1:None
D1:None
E1:None
F1:None
(<Cell 'work'.B2>, <Cell 'work'.C2>, <Cell 'work'.D2>, <Cell 'work'.E2>, <Cell 'work'.F2>)
B2:63306.166000000005
C2:None
D2:None
E2:None
F2:None
(<Cell 'work'.B3>, <Cell 'work'.C3>, <Cell 'work'.D3>, <Cell 'work'.E3>, <Cell 'work'.F3>)
B3:40535.964
C3:None
D3:None
E3:None
F3:None
(<Cell 'work'.B4>, <Cell 'work'.C4>, <Cell 'work'.D4>, <Cell 'work'.E4>, <Cell 'work'.F4>)
B4:30026.64
C4:None
D4:None
E4:None
F4:None
(<Cell 'work'.B5>, <Cell 'work'.C5>, <Cell 'work'.D5>, <Cell 'work'.E5>, <Cell 'work'.F5>)
B5:46291.07
C5:None
D5:None
E5:None
F5:None
# 列获取
for col in sheet.iter_cols(min_row=1, max_row=5, min_col=1, max_col=4):
    print(col) # 一维元祖
    for c in col:
        print(f'{c.coordinate}:{c.value}')
(<Cell 'work'.A1>, <Cell 'work'.A2>, <Cell 'work'.A3>, <Cell 'work'.A4>, <Cell 'work'.A5>)
A1:时间点
A2:2020-09-01
A3:2020-09-01
A4:2020-09-01
A5:2020-09-01
(<Cell 'work'.B1>, <Cell 'work'.B2>, <Cell 'work'.B3>, <Cell 'work'.B4>, <Cell 'work'.B5>)
B1:人流数
B2:63306.166000000005
B3:40535.964
B4:30026.64
B5:46291.07
(<Cell 'work'.C1>, <Cell 'work'.C2>, <Cell 'work'.C3>, <Cell 'work'.C4>, <Cell 'work'.C5>)
C1:None
C2:None
C3:None
C4:None
C5:None
(<Cell 'work'.D1>, <Cell 'work'.D2>, <Cell 'work'.D3>, <Cell 'work'.D4>, <Cell 'work'.D5>)
D1:None
D2:None
D3:None
D4:None
D5:None

练习

找出text_1.xlsx中sheet1表中空着的格子,并输出这些格子的坐标

分析:

法1:

1.找到有数据的方块的起始行数和列数,占有的行数和列数

2.其余为空格子

法2:

暴力遍历

# 数据模拟:复制test_1的sheet1到sheet2,随机删除一些数据.
wb = load_workbook(filename='test_1.xlsx')
wb.sheetnames
['Sheet1', 'Sheet1(2)']
sheet = wb.worksheets[1]
sheet.title
'Sheet1(2)'
# 获得数据区间,在这个区间遍历数据
range_str = sheet.calculate_dimension()
range_str
'A1:B51104'
range_list = range_str.split(':')
range_list
['A1', 'B51104']
import re
star_row = int(re.findall("\d+",range_list[0])[0])
star_row
1
end_row = int(re.findall("\d+",range_list[1])[0])
end_row
51104
star_col = re.findall("[A-Za-z]",range_list[0])[0]
star_col
'A'
star_col = ord(star_col) - 64
star_col
1
end_col = re.findall("[A-Za-z]",range_list[1])[0]
end_col
'B'
end_col = ord(end_col) - 64
end_col
2
# 其实可以直接取sheet的max_column,min_max_column,max_row,min_row
rows = sheet.iter_rows(min_col=star_col,max_col=end_col,min_row=star_row,max_row=end_row)
for col in rows:
    for c in col:
        if c.value == None:
            print(f'empty:{c.coordinate}')
empty:B7
empty:A16
empty:B25
empty:A27
empty:A31
empty:B50908
empty:A50913
empty:B51067
empty:A51071
empty:A51100
wb.save()

分析:

法1:
1.获取sheet中占有的行数和列数,在那个范围内是否存在空数据:起始行和起始列,行数和列数
2.表格不占的空间,则都是空格子

法2:
暴力遍历,找到值为None的格子

2.写excel

# 写入单元格并保存
from openpyxl import load_workbook
excel = load_workbook(filename='test.xlsx')
sheet = excel.active
# 法1
sheet['c1'] = 'add1' # 成功写入,不区分大小写
# 法2
cell = sheet['D1']
cell.value = 'add2'
excel.save(filename='test.xlsx') # 存入原表
# 新建一个空的excel表,并增加sheet写入数据
from openpyxl import Workbook
wb = Workbook()
wb
<openpyxl.workbook.workbook.Workbook at 0x11faa7fa0>
wb_filename = 'test_excel.xlsx'
ws1 = wb.active
ws1.title = 'sheet1-range'
# 插入多行数据
for d in range(1,20): # 插入19行
    # 每行值为0-9
    #ws1.append(range(d * 10))
    ws1.append(range(10)) # Value must be a list, tuple, range or generator, or a dict

ws1.calculate_dimension
<bound method Worksheet.calculate_dimension of <Worksheet "sheet1-range">>
ws2 = wb.create_sheet(title='shee2-pi')
ws2['a1'] = 3.14
ws3 = wb.create_sheet(title='data') # 空sheet
# 写入公式
ws3['a1'] = '=sum(1,10)' 
ws3['a1'].value # 实际为:11
'=sum(1,10)'
# 插入5行
ws3.insert_rows(1, amount=3) # 在第一行前插入了3个空白行,11现在的位置:a4
help(ws3.insert_rows)
Help on method insert_rows in module openpyxl.worksheet.worksheet:

insert_rows(idx, amount=1) method of openpyxl.worksheet.worksheet.Worksheet instance
    Insert row or rows before row==idx
help(ws3.delete_rows)
Help on method delete_rows in module openpyxl.worksheet.worksheet:

delete_rows(idx, amount=1) method of openpyxl.worksheet.worksheet.Worksheet instance
    Delete row or rows from row==idx
# 删除前面的2个空白行
ws3.delete_rows(1, 2) # 11现在在a2的位置
help(ws3.insert_cols)
Help on method insert_cols in module openpyxl.worksheet.worksheet:

insert_cols(idx, amount=1) method of openpyxl.worksheet.worksheet.Worksheet instance
    Insert column or columns before col==idx
# 在第一列前插入5列
ws3.insert_cols(1, 5) # 11现在在f2
# 删除前面的3列
ws3.delete_cols(1, 3) # 11现在在c2
help(wb.copy_worksheet)
Help on method copy_worksheet in module openpyxl.workbook.workbook:

copy_worksheet(from_worksheet) method of openpyxl.workbook.workbook.Workbook instance
    Copy an existing worksheet in the current workbook
    
    .. warning::
        This function cannot copy worksheets between workbooks.
        worksheets can only be copied within the workbook that they belong
    
    :param from_worksheet: the worksheet to be copied from
    :return: copy of the initial worksheet
help(wb.create_sheet)
Help on method create_sheet in module openpyxl.workbook.workbook:

create_sheet(title=None, index=None) method of openpyxl.workbook.workbook.Workbook instance
    Create a worksheet (at an optional index).
    
    :param title: optional title of the sheet
    :type title: str
    :param index: optional position at which the sheet will be inserted
    :type index: int
# 复制一个sheet到新增的
ws4 = wb.create_sheet('copy-sheet2')
ws4 = wb.copy_worksheet(ws2) # 这里不需要赋值,实际会生成2个表
ws5 = wb.copy_worksheet(ws1) # 会新建一个sheet,名字是ws1的名字+Copy,
help(ws3.move_range)
Help on method move_range in module openpyxl.worksheet.worksheet:

move_range(cell_range, rows=0, cols=0, translate=False) method of openpyxl.worksheet.worksheet.Worksheet instance
    Move a cell range by the number of rows and/or columns:
    down if rows > 0 and up if rows < 0
    right if cols > 0 and left if cols < 0
    Existing cells will be overwritten.
    Formulae and references will not be updated.
help(wb.move_sheet)
Help on method move_sheet in module openpyxl.workbook.workbook:

move_sheet(sheet, offset=0) method of openpyxl.workbook.workbook.Workbook instance
    Move a sheet or sheetname
wb.move_sheet(ws1, 3) # 往右挪了3个表,现在是第四个表
# 将11向上娜1行,向左移2列,回到a1
# c2也可以是一个区间块
ws3.move_range('c2', rows=-1, cols=-2)
wb.save(filename=wb_filename)

3.excel样式

#设置字体样式
from openpyxl.styles import Font
wb = Workbook()
sheet = wb.active
cell = sheet['a1']
cell.value = '测试文字'
cell.font = Font(name='楷体', size=15, bold=True, italic=True, color='ff0000', strike=True) # 需要每个格子设置font属性
# wb.save('excel_font_setting.xlsx')
#设置对齐方式
from openpyxl.styles import Alignment
# 水平对齐:`distributed, justify, center, left, fill, centerContinuous, right, general`
# 垂直对齐:`bottom, distributed, justify, center, top`
alignment = Alignment(horizontal='center', vertical='center', text_rotation=0, wrap_text=True, shrink_to_fit=False, indent=0)
cell = sheet['a2']
cell.value = '居中对齐'
cell.alignment = alignment
# 设置单元格填充色
from openpyxl.styles import PatternFill
fill = PatternFill(patternType='lightDown',bgColor='00FF00') # 6位颜色,填充有条纹的绿色
cell = sheet['a3']
cell.value = '设置填充色'
cell.fill = fill
cell.fill
<openpyxl.styles.fills.PatternFill object>
Parameters:
patternType=None, fgColor=<openpyxl.styles.colors.Color object>
Parameters:
rgb='FFFFFFFF', indexed=None, auto=None, theme=None, tint=0.0, type='rgb', bgColor=<openpyxl.styles.colors.Color object>
Parameters:
rgb='FF000000', indexed=None, auto=None, theme=None, tint=0.0, type='rgb'
# 设置单元格边框样式:4条线
from openpyxl.styles import Border, Side
left_side = Side(border_style='double', color='FF0000')
right_side = Side(border_style='dashed', color='00FF00')
top_side = Side(border_style='mediumDashDot', color='0000FF')
bottom_side = Side(border_style='dashDot', color='FF00FF')
border = Border(left=left_side,right=right_side,top=top_side,bottom=bottom_side)
cell = sheet['a4']
cell.value = '设置边框样式'
cell.border = border 
#设置行高与列宽
sheet.row_dimensions[1].height = 50
sheet.column_dimensions['a'].width = 20

wb.save('excel_font_setting.xlsx')

练习

打开test文件,找出文件中购买数量buy_mount超过5的行,并对其标红、加粗、附上边框。

分析:

题目用意在于对指定列,找到特定范围值,进行样式设置.

改为:对test_1的sheet1中的第二列“车辆数”,对值大于250的,设置其样式为:字体设置为红色,加粗,并设置边框,边框形式自定义

from openpyxl import load_workbook
from openpyxl.styles import Font, Side, Border
# 设置样式
def set_cell_style(cell):
    #print(f'set:{cell.coordinate}:{c.value}')
    cell.font = Font(color='ff0000', bold=True)
    side = Side(border_style='slantDashDot')
    cell.border = Border(left=side,right=side,top=side,bottom=side)
wb = load_workbook(filename='test_1.xlsx')
sheet = wb.active
sheet.calculate_dimension()
'A1:B51104'
# 获得行数
row_num = int(sheet.calculate_dimension()[4:])
row_num
51104
rows = sheet.iter_rows(min_col=2,max_col=2,min_row=2,max_row=row_num)
rows
<generator object Worksheet._cells_by_row at 0x121b8a900>
for col in rows:
   for c in col:
       if int(c.value) > 250:
           set_cell_style(c)
cell = sheet['c1']
cell.value = 123123
set_cell_style(cell)
set:C1:116.348
wb.save('test_1.xlsx') # 要保存后更改才生效,一般应搞个新文件,不更改原文件,这里不管了.