因为工作需要,需要将excel中多个sheet里面的数据按某列分拆,比如按照部门分拆成独立文件。
以前手动分拆,费时费力,痛下决心,搜了一下python处理excel很不错,就边学边做,终于可以了,代码贴在这里。
# from codecs import ignore_errors# from operator import indeximport pandas as pdimport osimport openpyxl# import shutildef splitExcel(filePath,colName): fileName = filePath[:filePath.index('.')] outPath = f'{fileName}-拆分后' fileExt = '.xlsx' fileData = pd.ExcelFile(filePath) sheet_names = fileData.sheet_names # print(sheet_names) if not os.path.exists(outPath): os.mkdir(outPath) # else: # shutil.rmtree(outPath,ignore_errors=True) print('开始处理') for sheetName in sheet_names: print(sheetName + '==》开始处理') sheetData = pd.read_excel(filePath,sheet_name=sheetName) sheetDataByColName= sheetData.groupby(colName) for k ,v in sheetDataByColName: outFilePath = '/'.join([outPath,k+fileExt]) if os.path.exists(outFilePath): book = openpyxl.load_workbook(outFilePath) writer = pd.ExcelWriter(outFilePath,engine="openpyxl") writer.book = book v.to_excel(excel_writer=writer,sheet_name=sheetName,index=False) writer.close() else: v.to_excel(outFilePath,sheet_name=sheetName,index=False) print(sheetName + '==》处理完毕') print('所有处理完毕')filePath='xxx文件.xlsx'colName="机构名称"splitExcel(filePath=filePath,colName=colName)