摘要:每天花2小时做重复的Excel操作?Python可以帮你压缩到2分钟。本文分享10个实用的自动化办公技巧,覆盖Excel读写、PDF处理、文件批量操作等高频场景。
准备工作
pip install openpyxl pandas pdfplumber reportlab python-docx
技巧1:批量合并多个Excel文件
最常见的需求——把几十个Excel汇总成一个:
import pandas as pd
from pathlib import Path
def merge_excel(folder, output='merged.xlsx'):
files = list(Path(folder).glob('*.xlsx'))
dfs = []
for f in files:
df = pd.read_excel(f)
df['来源文件'] = f.name # 标记来源
dfs.append(df)
result = pd.concat(dfs, ignore_index=True)
result.to_excel(output, index=False)
print(f'合并完成:{len(files)}个文件 → {len(result)}行数据')
merge_excel('./monthly_reports/')
技巧2:按条件拆分Excel
反过来,把一个大表按部门/类别拆成多个文件:
def split_excel(input_file, column, output_dir='output'):
Path(output_dir).mkdir(exist_ok=True)
df = pd.read_excel(input_file)
for value, group in df.groupby(column):
filename = f'{output_dir}/{value}.xlsx'
group.to_excel(filename, index=False)
print(f'{value}: {len(group)}行 → {filename}')
split_excel('employees.xlsx', '部门')
技巧3:Excel数据清洗一条龙
def clean_data(input_file, output_file='cleaned.xlsx'):
df = pd.read_excel(input_file)
# 去除空行
df.dropna(how='all', inplace=True)
# 去除前后空格
str_cols = df.select_dtypes(include='object').columns
df[str_cols] = df[str_cols].apply(lambda x: x.str.strip())
# 去重
before = len(df)
df.drop_duplicates(inplace=True)
print(f'去重:{before} → {len(df)}行')
# 统一日期格式
for col in df.columns:
if '日期' in col or 'date' in col.lower():
df[col] = pd.to_datetime(df[col], errors='coerce')
# 手机号脱敏
if '手机' in df.columns:
df['手机'] = df['手机'].astype(str).apply(
lambda x: x[:3] + '****' + x[7:] if len(x) == 11 else x
)
df.to_excel(output_file, index=False)
print(f'清洗完成 → {output_file}')
clean_data('raw_data.xlsx')
技巧4:自动生成数据报表
from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from openpyxl.chart import BarChart, Reference
def create_report(data, output='report.xlsx'):
wb = Workbook()
ws = wb.active
ws.title = '月度报表'
# 标题样式
title_font = Font(name='微软雅黑', size=14, bold=True)
header_fill = PatternFill(start_color='4472C4', fill_type='solid')
header_font = Font(name='微软雅黑', size=11, bold=True, color='FFFFFF')
# 写标题
ws.merge_cells('A1:D1')
ws['A1'] = '2026年2月销售报表'
ws['A1'].font = title_font
ws['A1'].alignment = Alignment(horizontal='center')
# 写表头
headers = ['部门', '销售额', '目标', '完成率']
for col, header in enumerate(headers, 1):
cell = ws.cell(row=3, column=col, value=header)
cell.font = header_font
cell.fill = header_fill
cell.alignment = Alignment(horizontal='center')
# 写数据
for row_idx, row_data in enumerate(data, 4):
for col_idx, value in enumerate(row_data, 1):
ws.cell(row=row_idx, column=col_idx, value=value)
# 添加图表
chart = BarChart()
chart.title = '各部门销售额'
chart.y_axis.title = '金额(万元)'
data_ref = Reference(ws, min_col=2, min_row=3, max_row=3+len(data), max_col=3)
cats = Reference(ws, min_col=1, min_row=4, max_row=3+len(data))
chart.add_data(data_ref, titles_from_data=True)
chart.set_categories(cats)
ws.add_chart(chart, 'F3')
# 调整列宽
for col in ['A', 'B', 'C', 'D']:
ws.column_dimensions[col].width = 15
wb.save(output)
print(f'报表生成 → {output}')
data = [
['技术部', 150, 120, '125%'],
['市场部', 200, 180, '111%'],
['运营部', 80, 100, '80%'],
['销售部', 300, 250, '120%'],
]
create_report(data)
技巧5:PDF文本提取
import pdfplumber
def extract_pdf_text(pdf_path):
with pdfplumber.open(pdf_path) as pdf:
all_text = []
for i, page in enumerate(pdf.pages):
text = page.extract_text()
if text:
all_text.append(f'--- 第{i+1}页 ---\n{text}')
return '\n\n'.join(all_text)
text = extract_pdf_text('contract.pdf')
print(text)
技巧6:PDF表格提取为Excel
import pdfplumber
import pandas as pd
def pdf_tables_to_excel(pdf_path, output='tables.xlsx'):
all_tables = []
with pdfplumber.open(pdf_path) as pdf:
for page in pdf.pages:
tables = page.extract_tables()
for table in tables:
df = pd.DataFrame(table[1:], columns=table[0])
all_tables.append(df)
if all_tables:
with pd.ExcelWriter(output) as writer:
for i, df in enumerate(all_tables):
df.to_excel(writer, sheet_name=f'表格{i+1}', index=False)
print(f'提取了{len(all_tables)}个表格 → {output}')
else:
print('未找到表格')
pdf_tables_to_excel('financial_report.pdf')
技巧7:批量PDF转图片
import subprocess
from pathlib import Path
def pdf_to_images(pdf_path, output_dir='images', dpi=200):
"""需要安装poppler: apt install poppler-utils"""
Path(output_dir).mkdir(exist_ok=True)
subprocess.run([
'pdftoppm', '-png', '-r', str(dpi),
pdf_path, f'{output_dir}/page'
])
images = list(Path(output_dir).glob('*.png'))
print(f'转换完成:{len(images)}张图片')
技巧8:批量文件重命名
from pathlib import Path
import re
def batch_rename(folder, pattern, replacement):
"""正则批量重命名"""
folder = Path(folder)
renamed = 0
for f in folder.iterdir():
new_name = re.sub(pattern, replacement, f.name)
if new_name != f.name:
f.rename(folder / new_name)
print(f'{f.name} → {new_name}')
renamed += 1
print(f'重命名了{renamed}个文件')
# 示例:把"报告_2026_01.xlsx"改成"2026-01-报告.xlsx"
batch_rename('./reports/', r'(.+)_(\d{4})_(\d{2})', r'\2-\3-\1')
技巧9:自动发送邮件报表
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.base import MIMEBase
from email.mime.text import MIMEText
from email import encoders
def send_report(to_email, subject, body, attachment_path,
smtp_host='smtp.example.com', smtp_port=465,
username='[email]', password='[password]'):
msg = MIMEMultipart()
msg['From'] = username
msg['To'] = to_email
msg['Subject'] = subject
msg.attach(MIMEText(body, 'html', 'utf-8'))
# 添加附件
with open(attachment_path, 'rb') as f:
part = MIMEBase('application', 'octet-stream')
part.set_payload(f.read())
encoders.encode_base64(part)
part.add_header('Content-Disposition',
f'attachment; filename="{Path(attachment_path).name}"')
msg.attach(part)
with smtplib.SMTP_SSL(smtp_host, smtp_port) as server:
server.login(username, password)
server.send_message(msg)
print(f'邮件已发送至 {to_email}')
技巧10:定时自动执行
把上面的脚本串起来,用schedule库定时执行:
import schedule
import time
def daily_job():
print('开始执行日报任务...')
# 1. 合并当天数据
merge_excel('./today_data/')
# 2. 清洗数据
clean_data('merged.xlsx')
# 3. 生成报表
create_report(load_data('cleaned.xlsx'))
# 4. 发送邮件
send_report('[email]', '日报', '<h1>今日报表</h1>', 'report.xlsx')
print('日报任务完成!')
schedule.every().day.at('09:00').do(daily_job)
while True:
schedule.run_pending()
time.sleep(60)
总结
这10个技巧覆盖了日常办公自动化的大部分场景。核心思路就是:把重复操作抽象成函数,用Python批量执行。
一个建议:先从最痛的点开始自动化。如果你每天花1小时合并Excel,那就先自动化这个。等尝到甜头了,再逐步扩展到其他场景。
Python自动化办公的投入产出比极高——花半天写个脚本,能省下未来几百个小时的重复劳动。