因为工作需要,需要将excel中多个sheet里面的数据按某列分拆,比如按照部门分拆成独立文件。
以前手动分拆,费时费力,痛下决心,搜了一下python处理excel很不错,就边学边做,终于可以了,代码贴在这里。
# from operator import index
import pandas as pd
import os
import openpyxl
# import shutil
def splitExcel(filePath,colName):
fileName = filePath[:filePath.index('.')]
outPath = f'{fileName}-拆分后'
fileExt = '.xlsx'
fileData = pd.ExcelFile(filePath)
sheet_names = fileData.sheet_names
# print(sheet_names)
if not os.path.exists(outPath):
os.mkdir(outPath)
# else:
# shutil.rmtree(outPath,ignore_errors=True)
print('开始处理')
for sheetName in sheet_names:
print(sheetName + '==》开始处理')
sheetData = pd.read_excel(filePath,sheet_name=sheetName)
sheetDataByColName= sheetData.groupby(colName)
for k ,v in sheetDataByColName:
outFilePath = '/'.join([outPath,k+fileExt])
if os.path.exists(outFilePath):
book = openpyxl.load_workbook(outFilePath)
writer = pd.ExcelWriter(outFilePath,engine="openpyxl")
writer.book = book
v.to_excel(excel_writer=writer,sheet_name=sheetName,index=False)
writer.close()
else:
v.to_excel(outFilePath,sheet_name=sheetName,index=False)
print(sheetName + '==》处理完毕')
print('所有处理完毕')
filePath='xxx文件.xlsx'
colName="机构名称"
splitExcel(filePath=filePath,colName=colName)