使用openpyxl读取Excel批注

439 阅读1分钟

本文已参与「新人创作礼」活动,一起开启掘金创作之路。

项目使用poetry管理,已安装的openpyxl版本如下

% cat pyproject.toml|grep openpyxl                              22-04-30 - 22:39:33
openpyxl = "^3.0.9"

大致代码:

sheet.cell(row, col).comment.text

完整的模块示例:

#!/usr/bin/env python
import re
from datetime import datetime
from io import BytesIO
from pathlib import Path
from typing import List, Union

from fastapi import HTTPException
from openpyxl import load_workbook

RE_SPACES = re.compile(r"\s{2,}")


def slim(s: str) -> str:
    return RE_SPACES.sub(" ", s)


class ValidationError(HTTPException):
    def __init__(self, detail: str, status_code: int = 400):
        super().__init__(status_code, detail=detail)


def remove_author(s: str) -> str:
    return s.replace("作者:\n", "").replace("Administrator:\n", "")


def read_excel(filename: Union[Path, str, bytes, BytesIO]):
    if isinstance(filename, bytes):
        filename = BytesIO(filename)
    return load_workbook(filename)


def load(filename: Union[Path, str, bytes, BytesIO]):
    wb = read_excel(filename)
    try:
        sheet = wb["工资表"]
    except KeyError:
        try:
            sheet = wb["Sheet1"]
        except KeyError:
            raise ValidationError(f"未找到名称为“工资表”的工作表")
    title = sheet.cell(1, 1).value.strip()
    now = datetime.now()
    if "月" in title:
        remark = title.split("年")[-1].strip("表").replace("份", "")
    else:
        if (month := now.month - 1) == 0:
            month = 12
        remark = f"{month}月工资"
    day = f"{now:%Y.%m.%d}"
    lines: List[list] = []
    for row in range(4, sheet.max_row):
        xuhao = sheet.cell(row, 1).value
        if xuhao and (isinstance(xuhao, int) or xuhao.isdigit()):
            name = sheet.cell(row, 2).value
            total = 0
            if (base := sheet.cell(row, 4).value) is None:
                base = "/"
            else:
                if isinstance(base, str):
                    if base.startswith("="):
                        base = eval(base[1:])
                    else:
                        raise TypeError(f"Expect int value, got: {base=}")
                total += base
            commission_comment = ""  # 提成批注
            commission_cell = sheet.cell(row, 5)
            if (commission := commission_cell.value) is None:
                commission = "/"
            else:
                total += commission
                if _cc := commission_cell.comment:
                    if _ct := _cc.text:
                        commission_comment = remove_author(_ct)
            if (attend := sheet.cell(row, 6).value) is None:
                if (attend := sheet.cell(row, 13).value) is None:
                    attend = "/"
            if (attend_money := sheet.cell(row, 7).value) is not None:
                total += attend_money
                attend = attend.strip().strip("+-/").strip()
                if attend_money > 0:
                    attend += f" +{attend_money}"
                else:
                    attend += f" {attend_money}"
            if (late := sheet.cell(row, 8).value) is None:
                late = "/"
            else:
                late = slim(late)
            if late_money := sheet.cell(row, 9).value:
                total += late_money
                if late_money > 0:
                    late = f"{late}{late_money}"
                else:
                    late = late.strip("/") + str(late_money)
            if subsidy_value := sheet.cell(row, 11).value:  # 补助
                total += subsidy_value
            subsidy = "/"
            if _c := sheet.cell(row, 10).comment:
                if _s := _c.text:
                    subsidy = remove_author(_s)

            one = [
                name,
                base,
                commission,
                attend,
                late,
                subsidy,
                total,
                remark,
                day,
                commission_comment,
            ]
            lines.append(one)
    return lines


def main():
    import sys

    if not sys.argv[1:]:
        print("No args, do nothing.")
        return
    print(load(sys.argv[1]))


if __name__ == "__main__":
    main()

注:脚本用于提取工资表里的信息,用在一个FastAPI项目中,需要Python3.8+