多线程处理一个excel

29 阅读1分钟

多线程版本1

def process_row_thread1(row):
    # 这里处理单行数据
    # sheet[f'I{row[0]}'] = get_maturity(row[0])
    # print(f'{row[0]}处理完毕')
    print(get_maturity(row[0]))
# 多线程版本(线程池)
def detail_excel_file_thread1(excel_path):
    workbook = openpyxl.load_workbook(excel_path)
    sheet = workbook['Sheet']

    # 准备一个行任务列表
    rows = list(sheet.iter_rows(values_only=True))

    # 过滤掉第一行,因为它通常包含标题
    rows = rows[1:]

    # 使用线程池来并发处理每一行
    with ThreadPoolExecutor(max_workers=5) as executor:  # 可以根据CPU核心数调整max_workers
        futures = [executor.submit(process_row_thread1, row) for row in rows]

        for future in as_completed(futures):
            try:
                future.result()
            except Exception as exc:
                print(f'{exc}')

    print('成熟度编辑完成')
    workbook.save(excel_path)

多线程版本2

def process_row_thread2(sheet,row,row_index):
    value = row[0]
    if value is not None and row_index>0:
        print(get_maturity(value))
        print(f'{value}处理完毕')

def detail_excel_file_thread2(excel_path):
    workbook = openpyxl.load_workbook(excel_path)
    sheet = workbook.active  # 假设我们要处理的活动工作表

    # 准备一个行任务列表
    rows = list(sheet.iter_rows(values_only=True))  # 假设第一行是标题行

    # 创建一个线程锁列表


    # 创建一个线程列表
    threads = []

    # 为每行数据创建一个线程
    for row_index, row in enumerate(rows):
        t = threading.Thread(target=process_row_thread2, args=(sheet,row,row_index))
        threads.append(t)
        t.start()

    # 等待所有线程完成
    for t in threads:
        t.join()

    print('成熟度编辑完成')
    workbook.save(excel_path)