NumPy 源码解析（四十八）

`.\numpy\numpy\_core\fromnumeric.pyi`

from collections.abc import Sequence  # 导入 Sequence 抽象基类
from typing import Any, overload, TypeVar, Literal, SupportsIndex  # 导入类型相关模块

import numpy as np  # 导入 NumPy 库
from numpy import (  # 导入 NumPy 的部分子模块和类型
    number,
    uint64,
    int_,
    int64,
    intp,
    float16,
    floating,
    complexfloating,
    object_,
    generic,
    _OrderKACF,
    _OrderACF,
    _ModeKind,
    _PartitionKind,
    _SortKind,
    _SortSide,
    _CastingKind,
)
from numpy._typing import (  # 导入 NumPy 的类型注解
    DTypeLike,
    _DTypeLike,
    ArrayLike,
    _ArrayLike,
    NDArray,
    _ShapeLike,
    _Shape,
    _ArrayLikeBool_co,
    _ArrayLikeUInt_co,
    _ArrayLikeInt_co,
    _ArrayLikeFloat_co,
    _ArrayLikeComplex_co,
    _ArrayLikeObject_co,
    _IntLike_co,
    _BoolLike_co,
    _ComplexLike_co,
    _NumberLike_co,
    _ScalarLike_co,
)

_SCT = TypeVar("_SCT", bound=generic)  # 定义类型变量 _SCT
_SCT_uifcO = TypeVar("_SCT_uifcO", bound=number[Any] | object_)  # 定义类型变量 _SCT_uifcO
_ArrayType = TypeVar("_ArrayType", bound=NDArray[Any])  # 定义类型变量 _ArrayType

__all__: list[str]  # 定义 __all__ 变量为字符串列表

@overload
def take(
    a: _ArrayLike[_SCT],
    indices: _IntLike_co,
    axis: None = ...,
    out: None = ...,
    mode: _ModeKind = ...,
) -> _SCT: ...
@overload
def take(
    a: ArrayLike,
    indices: _IntLike_co,
    axis: None | SupportsIndex = ...,
    out: None = ...,
    mode: _ModeKind = ...,
) -> Any: ...
@overload
def take(
    a: _ArrayLike[_SCT],
    indices: _ArrayLikeInt_co,
    axis: None | SupportsIndex = ...,
    out: None = ...,
    mode: _ModeKind = ...,
) -> NDArray[_SCT]: ...
@overload
def take(
    a: ArrayLike,
    indices: _ArrayLikeInt_co,
    axis: None | SupportsIndex = ...,
    out: None = ...,
    mode: _ModeKind = ...,
) -> NDArray[Any]: ...
@overload
def take(
    a: ArrayLike,
    indices: _ArrayLikeInt_co,
    axis: None | SupportsIndex = ...,
    out: _ArrayType = ...,
    mode: _ModeKind = ...,
) -> _ArrayType: ...
# take 函数的重载定义，用于从数组中获取指定索引的元素或子数组

@overload
def reshape(
    a: _ArrayLike[_SCT],
    newshape: _ShapeLike,
    order: _OrderACF = ...,
    copy: None | bool = ...,
) -> NDArray[_SCT]: ...
@overload
def reshape(
    a: ArrayLike,
    newshape: _ShapeLike,
    order: _OrderACF = ...,
    copy: None | bool = ...,
) -> NDArray[Any]: ...
# reshape 函数的重载定义，用于改变数组的形状

@overload
def choose(
    a: _IntLike_co,
    choices: ArrayLike,
    out: None = ...,
    mode: _ModeKind = ...,
) -> Any: ...
@overload
def choose(
    a: _ArrayLikeInt_co,
    choices: _ArrayLike[_SCT],
    out: None = ...,
    mode: _ModeKind = ...,
) -> NDArray[_SCT]: ...
@overload
def choose(
    a: _ArrayLikeInt_co,
    choices: ArrayLike,
    out: None = ...,
    mode: _ModeKind = ...,
) -> NDArray[Any]: ...
@overload
def choose(
    a: _ArrayLikeInt_co,
    choices: ArrayLike,
    out: _ArrayType = ...,
    mode: _ModeKind = ...,
) -> _ArrayType: ...
# choose 函数的重载定义，用于根据索引数组从选择列表中获取元素或子数组

@overload
def repeat(
    a: _ArrayLike[_SCT],
    repeats: _ArrayLikeInt_co,
    axis: None | SupportsIndex = ...,
) -> NDArray[_SCT]: ...
@overload
def repeat(
    a: ArrayLike,
    repeats: _ArrayLikeInt_co,
    axis: None | SupportsIndex = ...,
) -> NDArray[Any]: ...
# repeat 函数的重载定义，用于在指定轴上重复数组元素

def put(
    a: NDArray[Any],


以上是对给定代码段的注释，每个函数和类型变量都有详细的解释说明其作用和参数用途。
    ind: _ArrayLikeInt_co,  # 定义一个参数 ind，类型为 _ArrayLikeInt_co，表示这是一个约束为整数数组的类型
    v: ArrayLike,  # 定义一个参数 v，类型为 ArrayLike，表示这是一个类似数组的对象
    mode: _ModeKind = ...,  # 定义一个参数 mode，类型为 _ModeKind，初始化为省略号（待后续具体赋值）
# 定义 swapaxes 函数的类型签名，用于交换数组的两个轴
@overload
def swapaxes(
    a: _ArrayLike[_SCT],
    axis1: SupportsIndex,
    axis2: SupportsIndex,
) -> NDArray[_SCT]: ...

# 另一种 swapaxes 函数的类型签名，支持交换任意类型的数组的两个轴
@overload
def swapaxes(
    a: ArrayLike,
    axis1: SupportsIndex,
    axis2: SupportsIndex,
) -> NDArray[Any]: ...

# 定义 transpose 函数的类型签名，用于数组转置操作
@overload
def transpose(
    a: _ArrayLike[_SCT],
    axes: None | _ShapeLike = ...
) -> NDArray[_SCT]: ...

# 另一种 transpose 函数的类型签名，支持对任意类型的数组进行转置
@overload
def transpose(
    a: ArrayLike,
    axes: None | _ShapeLike = ...
) -> NDArray[Any]: ...

# 定义 matrix_transpose 函数的类型签名，用于矩阵转置
@overload
def matrix_transpose(x: _ArrayLike[_SCT]) -> NDArray[_SCT]: ...

# 另一种 matrix_transpose 函数的类型签名，支持对任意类型的矩阵进行转置
@overload
def matrix_transpose(x: ArrayLike) -> NDArray[Any]: ...

# 定义 partition 函数的类型签名，用于对数组进行分区操作
@overload
def partition(
    a: _ArrayLike[_SCT],
    kth: _ArrayLikeInt_co,
    axis: None | SupportsIndex = ...,
    kind: _PartitionKind = ...,
    order: None | str | Sequence[str] = ...,
) -> NDArray[_SCT]: ...

# 另一种 partition 函数的类型签名，支持对任意类型的数组进行分区
@overload
def partition(
    a: ArrayLike,
    kth: _ArrayLikeInt_co,
    axis: None | SupportsIndex = ...,
    kind: _PartitionKind = ...,
    order: None | str | Sequence[str] = ...,
) -> NDArray[Any]: ...

# 定义 argpartition 函数的类型签名，用于对数组进行分区并返回索引
def argpartition(
    a: ArrayLike,
    kth: _ArrayLikeInt_co,
    axis: None | SupportsIndex = ...,
    kind: _PartitionKind = ...,
    order: None | str | Sequence[str] = ...,
) -> NDArray[intp]: ...

# 定义 sort 函数的类型签名，用于对数组进行排序操作
@overload
def sort(
    a: _ArrayLike[_SCT],
    axis: None | SupportsIndex = ...,
    kind: None | _SortKind = ...,
    order: None | str | Sequence[str] = ...,
    *,
    stable: None | bool = ...,
) -> NDArray[_SCT]: ...

# 另一种 sort 函数的类型签名，支持对任意类型的数组进行排序
@overload
def sort(
    a: ArrayLike,
    axis: None | SupportsIndex = ...,
    kind: None | _SortKind = ...,
    order: None | str | Sequence[str] = ...,
    *,
    stable: None | bool = ...,
) -> NDArray[Any]: ...

# 定义 argsort 函数的类型签名，用于对数组进行排序并返回索引
def argsort(
    a: ArrayLike,
    axis: None | SupportsIndex = ...,
    kind: None | _SortKind = ...,
    order: None | str | Sequence[str] = ...,
    *,
    stable: None | bool = ...,
) -> NDArray[intp]: ...

# 定义 argmax 函数的类型签名，用于找出数组中最大值的索引
@overload
def argmax(
    a: ArrayLike,
    axis: None = ...,
    out: None = ...,
    *,
    keepdims: Literal[False] = ...,
) -> intp: ...

# 另一种 argmax 函数的类型签名，支持对任意类型的数组找出最大值的索引
@overload
def argmax(
    a: ArrayLike,
    axis: None | SupportsIndex = ...,
    out: None = ...,
    *,
    keepdims: bool = ...,
) -> Any: ...

# 另一种 argmax 函数的类型签名，支持对任意类型的数组找出最大值的索引并保持维度
@overload
def argmax(
    a: ArrayLike,
    axis: None | SupportsIndex = ...,
    out: _ArrayType = ...,
    *,
    keepdims: bool = ...,
) -> _ArrayType: ...

# 定义 argmin 函数的类型签名，用于找出数组中最小值的索引
@overload
def argmin(
    a: ArrayLike,
    axis: None = ...,
    out: None = ...,
    *,
    keepdims: Literal[False] = ...,
) -> intp: ...

# 另一种 argmin 函数的类型签名，支持对任意类型的数组找出最小值的索引
@overload
def argmin(
    a: ArrayLike,
    axis: None | SupportsIndex = ...,
    out: None = ...,
    *,
    keepdims: bool = ...,
) -> Any: ...

# 另一种 argmin 函数的类型签名，支持对任意类型的数组找出最小值的索引并保持维度
@overload
def argmin(
    a: ArrayLike,
    axis: None | SupportsIndex = ...,
    out: _ArrayType = ...,
    *,
    keepdims: bool = ...,
) -> _ArrayType: ...

# 定义 searchsorted 函数的类型签名，用于在有序数组中寻找插入值的位置
@overload
def searchsorted(
    a: ArrayLike,
    v: _ScalarLike_co,
    side: _SortSide = ...,
    sorter: None | _ArrayLikeInt_co = ...,  # 1D int array
) -> intp: ...

# 另一种 searchsorted 函数的类型签名，支持在任意类型的有序数组中寻找插入值的位置
@overload
def searchsorted(
    a: ArrayLike,
    v: _ScalarLike_co,
    side: _SortSide = ...,
    sorter: None | _ArrayLikeInt_co = ...,
) -> intp: ...
    v: ArrayLike,
    # v 是一个类似数组的对象，可能是数组或类似数组的结构

    side: _SortSide = ...,
    # side 是一个 _SortSide 类型的变量，默认为未指定值

    sorter: None | _ArrayLikeInt_co = ...,
    # sorter 是一个可选的 _ArrayLikeInt_co 类型的变量，表示可以是空值或整数数组
    # 该数组应为一维整数数组
# 定义一个函数签名，用于接收一个 NDArray[intp] 类型的参数，并返回 NDArray[intp] 类型的结果
) -> NDArray[intp]: ...

# 重载函数签名，接收一个 _ArrayLike[_SCT] 类型的参数 a 和 _ShapeLike 类型的参数 new_shape，返回 NDArray[_SCT] 类型的结果
def resize(
    a: _ArrayLike[_SCT],
    new_shape: _ShapeLike,
) -> NDArray[_SCT]: ...

# 重载函数签名，接收一个 ArrayLike 类型的参数 a 和 _ShapeLike 类型的参数 new_shape，返回 NDArray[Any] 类型的结果
def resize(
    a: ArrayLike,
    new_shape: _ShapeLike,
) -> NDArray[Any]: ...

# 重载函数签名，接收一个 _SCT 类型的参数 a 和可选的 None 或 _ShapeLike 类型的参数 axis，返回 _SCT 类型的结果
def squeeze(
    a: _SCT,
    axis: None | _ShapeLike = ...,
) -> _SCT: ...

# 重载函数签名，接收一个 _ArrayLike[_SCT] 类型的参数 a 和可选的 None 或 _ShapeLike 类型的参数 axis，返回 NDArray[_SCT] 类型的结果
def squeeze(
    a: _ArrayLike[_SCT],
    axis: None | _ShapeLike = ...,
) -> NDArray[_SCT]: ...

# 重载函数签名，接收一个 ArrayLike 类型的参数 a 和可选的 None 或 _ShapeLike 类型的参数 axis，返回 NDArray[Any] 类型的结果
def squeeze(
    a: ArrayLike,
    axis: None | _ShapeLike = ...,
) -> NDArray[Any]: ...

# 重载函数签名，接收一个 _ArrayLike[_SCT] 类型的参数 a，以及可选的 offset、axis1 和 axis2 参数（用于 >= 2D 数组），返回 NDArray[_SCT] 类型的结果
def diagonal(
    a: _ArrayLike[_SCT],
    offset: SupportsIndex = ...,
    axis1: SupportsIndex = ...,
    axis2: SupportsIndex = ...,  # >= 2D array
) -> NDArray[_SCT]: ...

# 重载函数签名，接收一个 ArrayLike 类型的参数 a，以及可选的 offset、axis1 和 axis2 参数（用于 >= 2D 数组），返回 NDArray[Any] 类型的结果
def diagonal(
    a: ArrayLike,
    offset: SupportsIndex = ...,
    axis1: SupportsIndex = ...,
    axis2: SupportsIndex = ...,  # >= 2D array
) -> NDArray[Any]: ...

# 重载函数签名，接收一个 ArrayLike 类型的参数 a，以及可选的 offset、axis1、axis2、dtype 和 out 参数（用于 >= 2D 数组），返回 Any 类型的结果
def trace(
    a: ArrayLike,  # >= 2D array
    offset: SupportsIndex = ...,
    axis1: SupportsIndex = ...,
    axis2: SupportsIndex = ...,
    dtype: DTypeLike = ...,
    out: None = ...,
) -> Any: ...

# 重载函数签名，接收一个 ArrayLike 类型的参数 a，以及可选的 offset、axis1、axis2、dtype 和 out 参数（用于 >= 2D 数组），返回 _ArrayType 类型的结果
def trace(
    a: ArrayLike,  # >= 2D array
    offset: SupportsIndex = ...,
    axis1: SupportsIndex = ...,
    axis2: SupportsIndex = ...,
    dtype: DTypeLike = ...,
    out: _ArrayType = ...,
) -> _ArrayType: ...

# 重载函数签名，接收一个 _ArrayLike[_SCT] 类型的参数 a 和可选的 _OrderKACF 类型的参数 order，返回 NDArray[_SCT] 类型的结果
def ravel(a: _ArrayLike[_SCT], order: _OrderKACF = ...) -> NDArray[_SCT]: ...

# 重载函数签名，接收一个 ArrayLike 类型的参数 a 和可选的 _OrderKACF 类型的参数 order，返回 NDArray[Any] 类型的结果
def ravel(a: ArrayLike, order: _OrderKACF = ...) -> NDArray[Any]: ...

# 定义一个函数非零，接收一个 ArrayLike 类型的参数 a，返回一个元组，包含 NDArray[intp] 类型的结果
def nonzero(a: ArrayLike) -> tuple[NDArray[intp], ...]: ...

# 定义一个函数 shape，接收一个 ArrayLike 类型的参数 a，返回 _Shape 类型的结果
def shape(a: ArrayLike) -> _Shape: ...

# 重载函数签名，接收一个 _ArrayLikeBool_co 类型的参数 condition（用于 1D 布尔数组）、一个 _ArrayLike[_SCT] 类型的参数 a 和可选的 None 或 SupportsIndex 类型的参数 axis，返回 NDArray[_SCT] 类型的结果
def compress(
    condition: _ArrayLikeBool_co,  # 1D bool array
    a: _ArrayLike[_SCT],
    axis: None | SupportsIndex = ...,
    out: None = ...,
) -> NDArray[_SCT]: ...

# 重载函数签名，接收一个 _ArrayLikeBool_co 类型的参数 condition（用于 1D 布尔数组）、一个 ArrayLike 类型的参数 a 和可选的 None 或 SupportsIndex 类型的参数 axis，返回 NDArray[Any] 类型的结果
def compress(
    condition: _ArrayLikeBool_co,  # 1D bool array
    a: ArrayLike,
    axis: None | SupportsIndex = ...,
    out: None = ...,
) -> NDArray[Any]: ...

# 重载函数签名，接收一个 _ArrayLikeBool_co 类型的参数 condition（用于 1D 布尔数组）、一个 ArrayLike 类型的参数 a 和可选的 None 或 SupportsIndex 类型的参数 axis，返回 _ArrayType 类型的结果
def compress(
    condition: _ArrayLikeBool_co,  # 1D bool array
    a: ArrayLike,
    axis: None | SupportsIndex = ...,
    out: _ArrayType = ...,
) -> _ArrayType: ...

# 重载函数签名，接收多个参数，包括 _SCT 类型的参数 a、可选的 None 或 ArrayLike 类型的参数 a_min 和 a_max 等，返回 _SCT 类型的结果
def clip(
    a: _SCT,
    a_min: None | ArrayLike,
    a_max: None | ArrayLike,
    out: None = ...,
    *,
    dtype: None = ...,
    where: None | _ArrayLikeBool_co = ...,
    order: _OrderKACF = ...,
    subok: bool = ...,
    signature: str | tuple[None | str, ...] = ...,
    casting: _CastingKind = ...,
) -> _SCT: ...

# 重载函数签名，接收多个参数，包括 _ScalarLike_co 类型的参数 a、可选的 None 或 ArrayLike 类型的参数 a_min 和 a_max 等，返回 Any 类型的结果
def clip(
    a: _ScalarLike_co,
    a_min: None | ArrayLike,
    a_max: None | ArrayLike,
    out: None = ...,
    *,
    dtype: None = ...,
    where: None | _ArrayLikeBool_co = ...,
    order: _OrderKACF = ...,
    subok: bool = ...,
    signature: str | tuple[None | str, ...] = ...,
    casting: _CastingKind = ...,
) -> Any: ...

# 重载函数签名，接收多个参数，包括 _ArrayLike[_SCT] 类型的参数 a、可选的 None 或 ArrayLike 类型的参数 a_min 和 a_max 等，返回 _ArrayType 类型的结果
def clip(
    a: _ArrayLike[_SCT],
    a_min: None | ArrayLike,
    a_max: None | ArrayLike,
    out: None = ...,
    *,
    dtype: None = ...,
    where: None | _ArrayLikeBool_co = ...,
    order: _OrderKACF = ...,  # 定义一个名为 `order` 的变量，类型为 `_OrderKACF`，并初始化为 `...`
    subok: bool = ...,  # 定义一个名为 `subok` 的变量，类型为 `bool`，并初始化为 `...`
    signature: str | tuple[None | str, ...] = ...,  # 定义一个名为 `signature` 的变量，类型为 `str` 或包含 `None` 或多个 `str` 的元组，并初始化为 `...`
    casting: _CastingKind = ...,  # 定义一个名为 `casting` 的变量，类型为 `_CastingKind`，并初始化为 `...`
# 函数签名，指定函数返回类型为 NDArray[_SCT]
def clip(
    # 第一个参数 a，可以是类数组类型
    a: ArrayLike,
    # 最小值限制，可以为 None 或者类数组类型
    a_min: None | ArrayLike,
    # 最大值限制，可以为 None 或者类数组类型
    a_max: None | ArrayLike,
    # 输出数组，如果为 None 则表示不输出
    out: None = ...,
    # 数据类型，如果为 None 则表示不指定
    *,
    dtype: None = ...,
    # where 参数，用于指定条件
    where: None | _ArrayLikeBool_co = ...,
    # 数组顺序，如 C 或者 F
    order: _OrderKACF = ...,
    # 是否允许子类数组
    subok: bool = ...,
    # 函数签名，可以是字符串或者元组
    signature: str | tuple[None | str, ...] = ...,
    # 强制转换类型
    casting: _CastingKind = ...,
) -> NDArray[Any]: ...
@overload
def clip(
    # 第一个参数 a，可以是类数组类型
    a: ArrayLike,
    # 最小值限制，可以为 None 或者类数组类型
    a_min: None | ArrayLike,
    # 最大值限制，可以为 None 或者类数组类型
    a_max: None | ArrayLike,
    # 输出数组，指定为 _ArrayType 类型
    out: _ArrayType = ...,
    # 数据类型，指定为 DTypeLike 类型
    *,
    dtype: DTypeLike,
    # where 参数，用于指定条件
    where: None | _ArrayLikeBool_co = ...,
    # 数组顺序，如 C 或者 F
    order: _OrderKACF = ...,
    # 是否允许子类数组
    subok: bool = ...,
    # 函数签名，可以是字符串或者元组
    signature: str | tuple[None | str, ...] = ...,
    # 强制转换类型
    casting: _CastingKind = ...,
) -> Any: ...
@overload
def clip(
    # 第一个参数 a，可以是类数组类型
    a: ArrayLike,
    # 最小值限制，可以为 None 或者类数组类型
    a_min: None | ArrayLike,
    # 最大值限制，可以为 None 或者类数组类型
    a_max: None | ArrayLike,
    # 输出数组，指定为 _ArrayType 类型
    out: _ArrayType,
    # 数据类型，可以为 DTypeLike 类型，默认为 None
    *,
    dtype: DTypeLike = ...,
    # where 参数，用于指定条件
    where: None | _ArrayLikeBool_co = ...,
    # 数组顺序，如 C 或者 F
    order: _OrderKACF = ...,
    # 是否允许子类数组
    subok: bool = ...,
    # 函数签名，可以是字符串或者元组
    signature: str | tuple[None | str, ...] = ...,
    # 强制转换类型
    casting: _CastingKind = ...,
) -> _ArrayType: ...

@overload
def sum(
    # 第一个参数 a，可以是类数组类型
    a: _ArrayLike[_SCT],
    # 沿着哪个轴求和，如果为 None 则表示全部求和
    axis: None = ...,
    # 指定输出数据类型，如果为 None 则表示不指定
    dtype: None = ...,
    # 输出数组，如果为 None 则表示不输出
    out: None  = ...,
    # 是否保持维度，对应结果是否保持原数组的维度
    keepdims: bool = ...,
    # 初始值，对应求和的初始值
    initial: _NumberLike_co = ...,
    # where 参数，用于指定条件
    where: _ArrayLikeBool_co = ...,
) -> _SCT: ...
@overload
def sum(
    # 第一个参数 a，可以是类数组类型
    a: ArrayLike,
    # 沿着哪个轴求和，如果为 None 则表示全部求和
    axis: None | _ShapeLike = ...,
    # 指定输出数据类型，可以是 DTypeLike 类型
    dtype: DTypeLike = ...,
    # 输出数组，如果为 None 则表示不输出
    out: None  = ...,
    # 是否保持维度，对应结果是否保持原数组的维度
    keepdims: bool = ...,
    # 初始值，对应求和的初始值
    initial: _NumberLike_co = ...,
    # where 参数，用于指定条件
    where: _ArrayLikeBool_co = ...,
) -> Any: ...
@overload
def sum(
    # 第一个参数 a，可以是类数组类型
    a: ArrayLike,
    # 沿着哪个轴求和，如果为 None 则表示全部求和
    axis: None | _ShapeLike = ...,
    # 指定输出数据类型，可以是 DTypeLike 类型
    dtype: DTypeLike = ...,
    # 输出数组，指定为 _ArrayType 类型
    out: _ArrayType  = ...,
    # 是否保持维度，对应结果是否保持原数组的维度
    keepdims: bool = ...,
    # 初始值，对应求和的初始值
    initial: _NumberLike_co = ...,
    # where 参数，用于指定条件
    where: _ArrayLikeBool_co = ...,
) -> _ArrayType: ...

@overload
def all(
    # 第一个参数 a，可以是类数组类型
    a: ArrayLike,
    # 沿着哪个轴进行 all 操作，如果为 None 则表示全部维度
    axis: None = ...,
    # 输出数组，如果为 None 则表示不输出
    out: None = ...,
    # 是否保持维度，对应结果是否保持原数组的维度
    keepdims: Literal[False] = ...,
    # where 参数，用于指定条件
    *,
    where: _ArrayLikeBool_co = ...,
) -> np.bool: ...
@overload
def all(
    # 第一个参数 a，可以是类数组类型
    a: ArrayLike,
    # 沿着哪个轴进行 all 操作，如果为 None 则表示全部维度
    axis: None | _ShapeLike = ...,
    # 输出数组，如果为 None 则表示不输出
    out: None = ...,
    # 是否保持维度，对应结果是否保持原数组的维度
    keepdims: bool = ...,
    # where 参数，用于指定条件
    *,
    where: _ArrayLikeBool_co = ...,
) -> Any: ...
@overload
def all(
    # 第一个参数 a，可以是类数组类型
    a: ArrayLike,
    # 沿着哪个轴进行 all 操作，如果为 None 则表示全部维度
    axis: None | _ShapeLike = ...,
    # 输出数组，指定为 _ArrayType 类型
    out: _ArrayType = ...,
    # 是否保持维度，对应结果是否保持原数组的维度
    keepdims: bool = ...,
    # where 参数，用于指定条件
    *,
    where: _ArrayLikeBool_co = ...,
) -> _ArrayType: ...

@overload
def any(
    # 第一个参数 a，可以是类数组类型
    a: ArrayLike,
    # 沿着哪个轴进行 any 操作，如果为 None 则表示全部维度
    axis: None = ...,
    # 输出数组，如果为 None 则表示不输出
    out: None = ...,
    # 是否保持维度，对应结果是否保持原数组的维度
    keepdims: Literal[False] = ...,
    # where 参数，用于指定条件
    *,
    where: _ArrayLikeBool_co = ...,
) -> np.bool: ...
@overload
def any(
    # 第一个参数 a，可以是类数组类型
    a: ArrayLike,
    # 沿着哪个轴进行 any 操作，如果为 None 则表示全部维度
    axis: None | _ShapeLike = ...,
    # 输出数组，如果为 None 则表示不输出
    out: None = ...,
    # 是否保持维度，对应结果是否保持原数组的维度
    keepdims: bool = ...,
    # where 参数，用于指定条件
# 返回值类型注解，表示此函数返回一个 NDArray 对象，其元素类型为 `_SCT`
) -> NDArray[_SCT]: ...

# `cumsum` 函数的第一个重载：计算数组元素的累积和
@overload
def cumsum(
    a: ArrayLike,
    axis: None | SupportsIndex = ...,
    dtype: None = ...,
    out: None = ...,
) -> NDArray[Any]: ...

# `cumsum` 函数的第二个重载：计算数组元素的累积和，指定元素类型为 `_SCT`
@overload
def cumsum(
    a: ArrayLike,
    axis: None | SupportsIndex = ...,
    dtype: _DTypeLike[_SCT] = ...,
    out: None = ...,
) -> NDArray[_SCT]: ...

# `cumsum` 函数的第三个重载：计算数组元素的累积和，指定元素类型为 `DTypeLike`
@overload
def cumsum(
    a: ArrayLike,
    axis: None | SupportsIndex = ...,
    dtype: DTypeLike = ...,
    out: None = ...,
) -> NDArray[Any]: ...

# `cumsum` 函数的第四个重载：计算数组元素的累积和，指定元素类型为 `DTypeLike`，并将结果存入指定数组类型 `out`
@overload
def cumsum(
    a: ArrayLike,
    axis: None | SupportsIndex = ...,
    dtype: DTypeLike = ...,
    out: _ArrayType = ...,
) -> _ArrayType: ...

# `ptp` 函数的第一个重载：计算数组沿指定轴的最大值与最小值之差
@overload
def ptp(
    a: _ArrayLike[_SCT],
    axis: None = ...,
    out: None = ...,
    keepdims: Literal[False] = ...,
) -> _SCT: ...

# `ptp` 函数的第二个重载：计算数组沿指定轴的最大值与最小值之差，返回任意类型的值
@overload
def ptp(
    a: ArrayLike,
    axis: None | _ShapeLike = ...,
    out: None = ...,
    keepdims: bool = ...,
) -> Any: ...

# `ptp` 函数的第三个重载：计算数组沿指定轴的最大值与最小值之差，结果存入指定数组类型 `out`
@overload
def ptp(
    a: ArrayLike,
    axis: None | _ShapeLike = ...,
    out: _ArrayType = ...,
    keepdims: bool = ...,
) -> _ArrayType: ...

# `amax` 函数的第一个重载：计算数组沿指定轴的最大值
@overload
def amax(
    a: _ArrayLike[_SCT],
    axis: None = ...,
    out: None = ...,
    keepdims: Literal[False] = ...,
    initial: _NumberLike_co = ...,
    where: _ArrayLikeBool_co = ...,
) -> _SCT: ...

# `amax` 函数的第二个重载：计算数组沿指定轴的最大值，返回任意类型的值
@overload
def amax(
    a: ArrayLike,
    axis: None | _ShapeLike = ...,
    out: None = ...,
    keepdims: bool = ...,
    initial: _NumberLike_co = ...,
    where: _ArrayLikeBool_co = ...,
) -> Any: ...

# `amax` 函数的第三个重载：计算数组沿指定轴的最大值，结果存入指定数组类型 `out`
@overload
def amax(
    a: ArrayLike,
    axis: None | _ShapeLike = ...,
    out: _ArrayType = ...,
    keepdims: bool = ...,
    initial: _NumberLike_co = ...,
    where: _ArrayLikeBool_co = ...,
) -> _ArrayType: ...

# `amin` 函数的第一个重载：计算数组沿指定轴的最小值
@overload
def amin(
    a: _ArrayLike[_SCT],
    axis: None = ...,
    out: None = ...,
    keepdims: Literal[False] = ...,
    initial: _NumberLike_co = ...,
    where: _ArrayLikeBool_co = ...,
) -> _SCT: ...

# `amin` 函数的第二个重载：计算数组沿指定轴的最小值，返回任意类型的值
@overload
def amin(
    a: ArrayLike,
    axis: None | _ShapeLike = ...,
    out: None = ...,
    keepdims: bool = ...,
    initial: _NumberLike_co = ...,
    where: _ArrayLikeBool_co = ...,
) -> Any: ...

# `amin` 函数的第三个重载：计算数组沿指定轴的最小值，结果存入指定数组类型 `out`
@overload
def amin(
    a: ArrayLike,
    axis: None | _ShapeLike = ...,
    out: _ArrayType = ...,
    keepdims: bool = ...,
    initial: _NumberLike_co = ...,
    where: _ArrayLikeBool_co = ...,
) -> _ArrayType: ...

# `prod` 函数的第一个重载：计算数组元素沿指定轴的乘积
# 注意：对于对象数组，参数 `initial` 不必是数值标量。
# 唯一的要求是它与传递给数组元素的 `.__mul__()` 方法兼容。
@overload
def prod(
    a: _ArrayLikeBool_co,
    axis: None = ...,
    dtype: None = ...,
    out: None = ...,
    keepdims: Literal[False] = ...,
    initial: _NumberLike_co = ...,
    where: _ArrayLikeBool_co = ...,
) -> int_: ...
    keepdims: Literal[False] = ...,  # 参数 keepdims，默认为 False，用于指定是否保留维度信息
    initial: _NumberLike_co = ...,   # 参数 initial，默认为任何数字类型，用于指定初始值
    where: _ArrayLikeBool_co = ...,  # 参数 where，默认为任何布尔数组类型，用于条件选择
# 定义函数签名，指定返回类型为 uint64 的 prod 函数重载
@overload
def prod(
    # 第一个参数 a：接受 _ArrayLikeInt_co 类型的数组或可迭代对象
    a: _ArrayLikeInt_co,
    # axis 参数：指定沿着哪个轴进行计算，这里为 None 表示沿所有维度进行计算
    axis: None = ...,
    # dtype 参数：指定输出的数据类型，默认为 None，即保持输入的数据类型
    dtype: None = ...,
    # out 参数：指定输出结果的存储位置，默认为 None，表示新创建数组来存储结果
    out: None = ...,
    # keepdims 参数：指定是否保持维度，这里设为 False，即不保持
    keepdims: Literal[False] = ...,
    # initial 参数：指定初始值，可以是数字或者支持的数据类型
    initial: _NumberLike_co = ...,
    # where 参数：指定条件，用于选择参与计算的元素，默认为全部参与
    where: _ArrayLikeBool_co = ...,
) -> int64: ...
# 其他数据类型的 prod 函数重载类似，具体参数含义相同

# cumprod 函数的定义，用于计算累积乘积
@overload
def cumprod(
    # a 参数：接受 _ArrayLikeBool_co 类型的数组或可迭代对象
    a: _ArrayLikeBool_co,
    # axis 参数：指定沿着哪个轴进行计算，支持 None 或者整数类型，表示全部或指定轴
    axis: None | SupportsIndex = ...,
    # dtype 参数：指定输出的数据类型，默认为 None，即保持输入的数据类型
    dtype: None = ...,
    # out 参数：指定输出结果的存储位置，默认为 None，表示新创建数组来存储结果
    out: None = ...,
) -> NDArray[int_]: ...
# 其他数据类型的 cumprod 函数重载类似，具体参数含义相同
# 函数签名声明，定义 cumprod 函数，返回类型为 NDArray[_SCT]
@overload
def cumprod(
    a: _ArrayLikeComplex_co | _ArrayLikeObject_co,
    axis: None | SupportsIndex = ...,
    dtype: DTypeLike = ...,
    out: None = ...,
) -> NDArray[Any]: ...

# 函数签名声明，定义 cumprod 函数，返回类型为 _ArrayType
@overload
def cumprod(
    a: _ArrayLikeComplex_co | _ArrayLikeObject_co,
    axis: None | SupportsIndex = ...,
    dtype: DTypeLike = ...,
    out: _ArrayType = ...,
) -> _ArrayType: ...

# 函数签名声明，定义 ndim 函数，接收 ArrayLike 类型参数，返回 int 类型
def ndim(a: ArrayLike) -> int: ...

# 函数签名声明，定义 size 函数，接收 ArrayLike 类型参数和 axis 参数（可选的 int 或 None），返回 int 类型
def size(a: ArrayLike, axis: None | int = ...) -> int: ...

# 函数签名声明，定义 around 函数，对 _BoolLike_co 类型的参数 a 进行处理，返回 float16 类型
@overload
def around(
    a: _BoolLike_co,
    decimals: SupportsIndex = ...,
    out: None = ...,
) -> float16: ...

# 函数签名声明，定义 around 函数，对 _SCT_uifcO 类型的参数 a 进行处理，返回 _SCT_uifcO 类型
@overload
def around(
    a: _SCT_uifcO,
    decimals: SupportsIndex = ...,
    out: None = ...,
) -> _SCT_uifcO: ...

# 函数签名声明，定义 around 函数，对 _ComplexLike_co 或 object_ 类型的参数 a 进行处理，返回 Any 类型
@overload
def around(
    a: _ComplexLike_co | object_,
    decimals: SupportsIndex = ...,
    out: None = ...,
) -> Any: ...

# 函数签名声明，定义 around 函数，对 _ArrayLikeBool_co 类型的参数 a 进行处理，返回 NDArray[float16] 类型
@overload
def around(
    a: _ArrayLikeBool_co,
    decimals: SupportsIndex = ...,
    out: None = ...,
) -> NDArray[float16]: ...

# 函数签名声明，定义 around 函数，对 _ArrayLike[_SCT_uifcO] 类型的参数 a 进行处理，返回 NDArray[_SCT_uifcO] 类型
@overload
def around(
    a: _ArrayLike[_SCT_uifcO],
    decimals: SupportsIndex = ...,
    out: None = ...,
) -> NDArray[_SCT_uifcO]: ...

# 函数签名声明，定义 around 函数，对 _ArrayLikeComplex_co 或 _ArrayLikeObject_co 类型的参数 a 进行处理，返回 NDArray[Any] 类型
@overload
def around(
    a: _ArrayLikeComplex_co | _ArrayLikeObject_co,
    decimals: SupportsIndex = ...,
    out: None = ...,
) -> NDArray[Any]: ...

# 函数签名声明，定义 around 函数，对 _ArrayLikeComplex_co 或 _ArrayLikeObject_co 类型的参数 a 进行处理，返回 _ArrayType 类型
@overload
def around(
    a: _ArrayLikeComplex_co | _ArrayLikeObject_co,
    decimals: SupportsIndex = ...,
    out: _ArrayType = ...,
) -> _ArrayType: ...

# 函数签名声明，定义 mean 函数，对 _ArrayLikeFloat_co 类型的参数 a 进行处理，返回 floating[Any] 类型
@overload
def mean(
    a: _ArrayLikeFloat_co,
    axis: None = ...,
    dtype: None = ...,
    out: None = ...,
    keepdims: Literal[False] = ...,
    *,
    where: _ArrayLikeBool_co = ...,
) -> floating[Any]: ...

# 函数签名声明，定义 mean 函数，对 _ArrayLikeComplex_co 类型的参数 a 进行处理，返回 complexfloating[Any, Any] 类型
@overload
def mean(
    a: _ArrayLikeComplex_co,
    axis: None = ...,
    dtype: None = ...,
    out: None = ...,
    keepdims: Literal[False] = ...,
    *,
    where: _ArrayLikeBool_co = ...,
) -> complexfloating[Any, Any]: ...

# 函数签名声明，定义 mean 函数，对 _ArrayLikeComplex_co 或 _ArrayLikeObject_co 类型的参数 a 进行处理，返回 Any 类型
@overload
def mean(
    a: _ArrayLikeComplex_co | _ArrayLikeObject_co,
    axis: None | _ShapeLike = ...,
    dtype: None = ...,
    out: None = ...,
    keepdims: bool = ...,
    *,
    where: _ArrayLikeBool_co = ...,
) -> Any: ...

# 函数签名声明，定义 mean 函数，对 _ArrayLikeComplex_co 或 _ArrayLikeObject_co 类型的参数 a 进行处理，返回 _SCT 类型
@overload
def mean(
    a: _ArrayLikeComplex_co | _ArrayLikeObject_co,
    axis: None = ...,
    dtype: _DTypeLike[_SCT] = ...,
    out: None = ...,
    keepdims: Literal[False] = ...,
    *,
    where: _ArrayLikeBool_co = ...,
) -> _SCT: ...

# 函数签名声明，定义 mean 函数，对 _ArrayLikeComplex_co 或 _ArrayLikeObject_co 类型的参数 a 进行处理，返回 Any 类型
@overload
def mean(
    a: _ArrayLikeComplex_co | _ArrayLikeObject_co,
    axis: None | _ShapeLike = ...,
    dtype: DTypeLike = ...,
    out: None = ...,
    keepdims: bool = ...,
    *,
    where: _ArrayLikeBool_co = ...,
) -> Any: ...

# 函数签名声明，定义 mean 函数，对 _ArrayLikeComplex_co 或 _ArrayLikeObject_co 类型的参数 a 进行处理，返回 _ArrayType 类型
@overload
def mean(
    a: _ArrayLikeComplex_co | _ArrayLikeObject_co,
    axis: None | _ShapeLike = ...,
    dtype: DTypeLike = ...,
    out: _ArrayType = ...,
    keepdims: bool = ...,
    *,
    where: _ArrayLikeBool_co = ...,
) -> _ArrayType: ...

# 函数签名声明，定义 std 函数，对 _ArrayLikeComplex_co 类型的参数 a 进行处理，返回 Any 类型
@overload
def std(
    a: _ArrayLikeComplex_co,
    axis: None = ...,
    dtype: None = ...,
    out: None = ...,
    ddof: int | float = ...,
    keepdims: Literal[False] = ...,
    # keepdims 参数，指定是否保留每个维度的长度信息，这里默认为 False
    *,
    # 星号 * 表示这之后的参数只能通过关键字指定
    where: _ArrayLikeBool_co = ...,
    # where 参数，用于指定条件，必须是类数组类型，含有布尔值元素
    mean: _ArrayLikeComplex_co = ...,
    # mean 参数，用于指定均值，必须是类数组类型，含有复数元素
    correction: int | float = ...,
    # correction 参数，用于指定修正值，可以是整数或浮点数类型
# 函数签名，定义了 std 函数的重载情况，返回类型可能是任意类型
@overload
def std(
    # 参数 a 可以是复杂数组或对象数组
    a: _ArrayLikeComplex_co | _ArrayLikeObject_co,
    # 没有指定轴，或者轴的形状是 None 或可形状化的对象
    axis: None | _ShapeLike = ...,
    # 数据类型默认为 None
    dtype: None = ...,
    # 输出对象默认为 None
    out: None = ...,
    # 自由度修正参数，默认为整数或浮点数
    ddof: int | float = ...,
    # 是否保持维度的布尔值，默认为 False
    keepdims: bool = ...,
    # 以下是关键字参数，使用 * 标记强制指定后续参数
    *,
    # where 参数作为布尔数组
    where: _ArrayLikeBool_co = ...,
    # 均值作为复杂数组或对象数组
    mean: _ArrayLikeComplex_co | _ArrayLikeObject_co = ...,
    # 修正值，默认为整数或浮点数
    correction: int | float = ...,
) -> Any: ...
# 下一重载情况的 std 函数，返回类型为 _SCT 类型
@overload
def std(
    # 参数 a 可以是复杂数组或对象数组
    a: _ArrayLikeComplex_co | _ArrayLikeObject_co,
    # 没有指定轴
    axis: None = ...,
    # 数据类型是 _DTypeLike[_SCT] 类型
    dtype: _DTypeLike[_SCT] = ...,
    # 输出对象默认为 None
    out: None = ...,
    # 自由度修正参数，默认为整数或浮点数
    ddof: int | float = ...,
    # 是否保持维度的布尔值，默认为 False
    keepdims: Literal[False] = ...,
    # 以下是关键字参数，使用 * 标记强制指定后续参数
    *,
    # where 参数作为布尔数组
    where: _ArrayLikeBool_co = ...,
    # 均值作为复杂数组或对象数组
    mean: _ArrayLikeComplex_co | _ArrayLikeObject_co = ...,
    # 修正值，默认为整数或浮点数
    correction: int | float = ...,
) -> _SCT: ...
# 下一重载情况的 std 函数，返回类型可能是任意类型
@overload
def std(
    # 参数 a 可以是复杂数组或对象数组
    a: _ArrayLikeComplex_co | _ArrayLikeObject_co,
    # 没有指定轴，或者轴的形状是 None 或可形状化的对象
    axis: None | _ShapeLike = ...,
    # 数据类型可以是 DTypeLike 类型
    dtype: DTypeLike = ...,
    # 输出对象默认为 None
    out: None = ...,
    # 自由度修正参数，默认为整数或浮点数
    ddof: int | float = ...,
    # 是否保持维度的布尔值，默认为 False
    keepdims: bool = ...,
    # 以下是关键字参数，使用 * 标记强制指定后续参数
    *,
    # where 参数作为布尔数组
    where: _ArrayLikeBool_co = ...,
    # 均值作为复杂数组或对象数组
    mean: _ArrayLikeComplex_co | _ArrayLikeObject_co = ...,
    # 修正值，默认为整数或浮点数
    correction: int | float = ...,
) -> Any: ...
# 最后一重载情况的 std 函数，返回类型是 _ArrayType 类型
@overload
def std(
    # 参数 a 可以是复杂数组或对象数组
    a: _ArrayLikeComplex_co | _ArrayLikeObject_co,
    # 没有指定轴，或者轴的形状是 None 或可形状化的对象
    axis: None | _ShapeLike = ...,
    # 数据类型可以是 DTypeLike 类型
    dtype: DTypeLike = ...,
    # 输出对象可以是 _ArrayType 类型
    out: _ArrayType = ...,
    # 自由度修正参数，默认为整数或浮点数
    ddof: int | float = ...,
    # 是否保持维度的布尔值，默认为 False
    keepdims: bool = ...,
    # 以下是关键字参数，使用 * 标记强制指定后续参数
    *,
    # where 参数作为布尔数组
    where: _ArrayLikeBool_co = ...,
    # 均值作为复杂数组或对象数组
    mean: _ArrayLikeComplex_co | _ArrayLikeObject_co = ...,
    # 修正值，默认为整数或浮点数
    correction: int | float = ...,
) -> _ArrayType: ...

# 函数签名，定义了 var 函数的重载情况，返回类型可能是任意类型
@overload
def var(
    # 参数 a 可以是复杂数组
    a: _ArrayLikeComplex_co,
    # 没有指定轴
    axis: None = ...,
    # 数据类型默认为 None
    dtype: None = ...,
    # 输出对象默认为 None
    out: None = ...,
    # 自由度修正参数，默认为整数或浮点数
    ddof: int | float = ...,
    # 是否保持维度的布尔值，默认为 False
    keepdims: Literal[False] = ...,
    # 以下是关键字参数，使用 * 标记强制指定后续参数
    *,
    # where 参数作为布尔数组
    where: _ArrayLikeBool_co = ...,
    # 均值作为复杂数组
    mean: _ArrayLikeComplex_co = ...,
    # 修正值，默认为整数或浮点数
    correction: int | float = ...,
) -> floating[Any]: ...
# 下一重载情况的 var 函数，返回类型可能是任意类型
@overload
def var(
    # 参数 a 可以是复杂数组或对象数组
    a: _ArrayLikeComplex_co | _ArrayLikeObject_co,
    # 没有指定轴，或者轴的形状是 None 或可形状化的对象
    axis: None | _ShapeLike = ...,
    # 数据类型默认为 None
    dtype: None = ...,
    # 输出对象默认为 None
    out: None = ...,
    # 自由度修正参数，默认为整数或浮点数
    ddof: int | float = ...,
    # 是否保持维度的布尔值，默认为 False
    keepdims: bool = ...,
    # 以下是关键字参数，使用 * 标记强制指定后续参数
    *,
    # where 参数作为布尔数组
    where: _ArrayLikeBool_co = ...,
    # 均值作为复杂数组或对象数组
    mean: _ArrayLikeComplex_co | _ArrayLikeObject_co = ...,
    # 修正值，默认为整数或浮点数
    correction: int | float = ...,
) -> Any: ...
# 下一重载情况的 var 函数，返回类型是 _SCT 类型
@overload
def var(
    # 参数 a 可以是复杂数组或对象数组
    a: _ArrayLikeComplex_co | _ArrayLikeObject_co,
    # 没有指定轴
    axis: None = ...,
    # 数据类型是 _DTypeLike[_SCT] 类型
    dtype: _DTypeLike[_SCT] = ...,
    # 输出对象默认为 None
    out: None = ...,
    # 自由度修正参数，默认为整数或浮点数
    ddof: int | float = ...,
    # 是否保持维度的布尔值，默认为 False
    keepdims: Literal[False] = ...,
    # 以下是关键字参数，使用 * 标记强制指定后续参数
    *,
    # where 参数作为布尔数组
    where: _ArrayLikeBool_co = ...,
    # 均值作为复杂数组或对象数组
    mean: _ArrayLikeComplex_co | _ArrayLikeObject_co = ...,
    # 修正值，默认为整数或浮点数
    correction: int | float = ...,
) -> _SCT: ...
# 下一重载情
    dtype: DTypeLike = ...,  
    # dtype 参数指定返回数组的数据类型，可以是 DTypeLike 类型的任何值，通常是一个数据类型或与数组兼容的对象

    out: _ArrayType = ...,  
    # out 参数指定结果存放的数组，类型为 _ArrayType，通常用于指定结果的存储位置以节省内存

    ddof: int | float = ...,  
    # ddof 参数表示自由度的校正值，可以是整数或浮点数，用于调整标准差计算中的自由度

    keepdims: bool = ...,  
    # keepdims 参数指定是否保持减少维度后的维度数，为布尔值，控制是否保留维度

    *,  
    # 星号 * 后的参数表示接下来的参数必须使用关键字传递，而非位置传递

    where: _ArrayLikeBool_co = ...,  
    # where 参数是一个布尔类型的数组或类似数组，用于指定元素操作的条件

    mean: _ArrayLikeComplex_co | _ArrayLikeObject_co = ...,  
    # mean 参数是一个复杂数或对象数组，用于计算平均值时的输入数组

    correction: int | float = ...,  
    # correction 参数是一个整数或浮点数，用于在计算过程中进行修正
# 定义一个函数签名，该函数接受参数并返回一个 _ArrayType 类型的值
) -> _ArrayType: ...
# 将 amax 赋值给 max，可能是为了简化变量命名或避免命名冲突
max = amax
# 将 amin 赋值给 min，可能是为了简化变量命名或避免命名冲突
min = amin
# 将 around 赋值给 round，可能是为了简化变量命名或避免命名冲突
round = around

`.\numpy\numpy\_core\function_base.py`

import functools  # 导入 functools 模块，用于创建偏函数
import warnings  # 导入 warnings 模块，用于警告处理
import operator  # 导入 operator 模块，用于操作符函数
import types  # 导入 types 模块，用于动态类型创建

import numpy as np  # 导入 NumPy 库，并使用 np 别名
from . import numeric as _nx  # 从当前包中导入 numeric 模块，并使用 _nx 别名
from .numeric import result_type, nan, asanyarray, ndim  # 从 numeric 模块导入指定函数
from numpy._core.multiarray import add_docstring  # 从核心 multiarray 模块导入函数
from numpy._core._multiarray_umath import _array_converter  # 从核心 _multiarray_umath 模块导入函数
from numpy._core import overrides  # 从核心模块导入 overrides 函数

__all__ = ['logspace', 'linspace', 'geomspace']  # 定义模块的公共接口

# 创建一个偏函数 array_function_dispatch，通过 functools.partial 进行部分应用
array_function_dispatch = functools.partial(
    overrides.array_function_dispatch, module='numpy')


def _linspace_dispatcher(start, stop, num=None, endpoint=None, retstep=None,
                         dtype=None, axis=None, *, device=None):
    return (start, stop)  # 返回 start 和 stop 参数的元组


# 使用 array_function_dispatch 装饰器，将 _linspace_dispatcher 函数注册为 array function
@array_function_dispatch(_linspace_dispatcher)
def linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None,
             axis=0, *, device=None):
    """
    Return evenly spaced numbers over a specified interval.

    Returns `num` evenly spaced samples, calculated over the
    interval [`start`, `stop`].

    The endpoint of the interval can optionally be excluded.

    .. versionchanged:: 1.16.0
        Non-scalar `start` and `stop` are now supported.

    .. versionchanged:: 1.20.0
        Values are rounded towards ``-inf`` instead of ``0`` when an
        integer ``dtype`` is specified. The old behavior can
        still be obtained with ``np.linspace(start, stop, num).astype(int)``

    Parameters
    ----------
    start : array_like
        The starting value of the sequence.
    stop : array_like
        The end value of the sequence, unless `endpoint` is set to False.
        In that case, the sequence consists of all but the last of ``num + 1``
        evenly spaced samples, so that `stop` is excluded.  Note that the step
        size changes when `endpoint` is False.
    num : int, optional
        Number of samples to generate. Default is 50. Must be non-negative.
    endpoint : bool, optional
        If True, `stop` is the last sample. Otherwise, it is not included.
        Default is True.
    retstep : bool, optional
        If True, return (`samples`, `step`), where `step` is the spacing
        between samples.
    dtype : dtype, optional
        The type of the output array.  If `dtype` is not given, the data type
        is inferred from `start` and `stop`. The inferred dtype will never be
        an integer; `float` is chosen even if the arguments would produce an
        array of integers.

        .. versionadded:: 1.9.0
    axis : int, optional
        The axis in the result to store the samples.  Relevant only if start
        or stop are array-like.  By default (0), the samples will be along a
        new axis inserted at the beginning. Use -1 to get an axis at the end.

        .. versionadded:: 1.16.0
    device : str, optional
        The device on which to place the created array. Default: None.
        For Array-API interoperability only, so must be ``"cpu"`` if passed.

        .. versionadded:: 2.0.0

    Returns
    -------
    # 将 num 转换为整数索引
    num = operator.index(num)
    # 如果 num 小于 0，则抛出数值错误异常
    if num < 0:
        raise ValueError(
            "Number of samples, %s, must be non-negative." % num
        )
    # 如果 endpoint 为 False，则将 div 设置为 num，否则设置为 num - 1
    div = (num - 1) if endpoint else num

    # 转换起始点和结束点，获取转换后的起始点和结束点以及数据类型
    conv = _array_converter(start, stop)
    start, stop = conv.as_arrays()
    dt = conv.result_type(ensure_inexact=True)

    # 如果未指定 dtype，则将其设置为 dt，并将 integer_dtype 设置为 False
    if dtype is None:
        dtype = dt
        integer_dtype = False
    else:
        # 检查 dtype 是否为整数类型
        integer_dtype = _nx.issubdtype(dtype, _nx.integer)

    # 使用 dtype=type(dt) 强制浮点点评估：
    # 计算 delta，即停止点与起始点之差
    delta = np.subtract(stop, start, dtype=type(dt))
    
    # 创建一个数组 y，使用 arange 函数生成从 0 到 num-1 的一维数组，根据 delta 的维度进行形状调整
    y = _nx.arange(
        0, num, dtype=dt, device=device
    ).reshape((-1,) + (1,) * ndim(delta))

    # 对于 div 大于 0 的情况：
    if div > 0:
        # 判断是否为标量 delta，用于决定是否进行就地乘法优化
        _mult_inplace = _nx.isscalar(delta)
        # 计算步长 step，即 delta 除以 div
        step = delta / div
        # 判断是否存在步长为零的情况
        any_step_zero = (
            step == 0 if _mult_inplace else _nx.asanyarray(step == 0).any())
        # 如果存在步长为零的情况：
        if any_step_zero:
            # 将 y 除以 div，并根据 _mult_inplace 进行就地乘法或普通乘法
            y /= div
            if _mult_inplace:
                y *= delta
            else:
                y = y * delta
        else:
            # 如果步长不为零，根据 _mult_inplace 进行就地乘法或普通乘法
            if _mult_inplace:
                y *= step
            else:
                y = y * step
    # 如果不满足前述条件，则步长设为NaN，表示未定义的步长
    else:
        step = nan
        
    # 将y与delta相乘，允许对输出类进行可能的覆盖
    y = y * delta

    # 将起始值start加到y上
    y += start

    # 如果需要包含终点且序列长度大于1，则将最后一个元素设为停止值stop
    if endpoint and num > 1:
        y[-1, ...] = stop

    # 如果axis不等于0，则将y数组的轴移动到指定的axis位置
    if axis != 0:
        y = _nx.moveaxis(y, 0, axis)

    # 如果需要整数类型，则将y向下取整
    if integer_dtype:
        _nx.floor(y, out=y)

    # 将y数组转换为指定的dtype，并使用conv.wrap进行包装处理
    y = conv.wrap(y.astype(dtype, copy=False))
    
    # 如果需要返回步长，则返回y和step
    if retstep:
        return y, step
    # 否则只返回y
    else:
        return y
# 定义日志空间函数的调度器，用于分派参数到具体的实现函数
def _logspace_dispatcher(start, stop, num=None, endpoint=None, base=None,
                         dtype=None, axis=None):
    # 返回起始点、终止点和基数，作为元组
    return (start, stop, base)


# 使用装饰器将_logspace_dispatcher函数注册为logspace函数的分发函数
@array_function_dispatch(_logspace_dispatcher)
def logspace(start, stop, num=50, endpoint=True, base=10.0, dtype=None,
             axis=0):
    """
    Return numbers spaced evenly on a log scale.

    In linear space, the sequence starts at ``base ** start``
    (`base` to the power of `start`) and ends with ``base ** stop``
    (see `endpoint` below).

    .. versionchanged:: 1.16.0
        Non-scalar `start` and `stop` are now supported.

    .. versionchanged:: 1.25.0
        Non-scalar 'base` is now supported

    Parameters
    ----------
    start : array_like
        ``base ** start`` is the starting value of the sequence.
    stop : array_like
        ``base ** stop`` is the final value of the sequence, unless `endpoint`
        is False.  In that case, ``num + 1`` values are spaced over the
        interval in log-space, of which all but the last (a sequence of
        length `num`) are returned.
    num : integer, optional
        Number of samples to generate.  Default is 50.
    endpoint : boolean, optional
        If true, `stop` is the last sample. Otherwise, it is not included.
        Default is True.
    base : array_like, optional
        The base of the log space. The step size between the elements in
        ``ln(samples) / ln(base)`` (or ``log_base(samples)``) is uniform.
        Default is 10.0.
    dtype : dtype
        The type of the output array.  If `dtype` is not given, the data type
        is inferred from `start` and `stop`. The inferred type will never be
        an integer; `float` is chosen even if the arguments would produce an
        array of integers.
    axis : int, optional
        The axis in the result to store the samples.  Relevant only if start,
        stop, or base are array-like.  By default (0), the samples will be
        along a new axis inserted at the beginning. Use -1 to get an axis at
        the end.

        .. versionadded:: 1.16.0


    Returns
    -------
    samples : ndarray
        `num` samples, equally spaced on a log scale.

    See Also
    --------
    arange : Similar to linspace, with the step size specified instead of the
             number of samples. Note that, when used with a float endpoint, the
             endpoint may or may not be included.
    linspace : Similar to logspace, but with the samples uniformly distributed
               in linear space, instead of log space.
    geomspace : Similar to logspace, but with endpoints specified directly.
    :ref:`how-to-partition`

    Notes
    -----
    If base is a scalar, logspace is equivalent to the code

    >>> y = np.linspace(start, stop, num=num, endpoint=endpoint)
    ... # doctest: +SKIP
    >>> power(base, y).astype(dtype)
    ... # doctest: +SKIP

    Examples
    --------
    >>> np.logspace(2.0, 3.0, num=4)
    """
    array([ 100.        ,  215.443469  ,  464.15888336, 1000.        ])
    # 创建一个包含四个元素的 NumPy 数组，这些元素是以对数刻度在 2.0 到 3.0 之间均匀分布的值

    >>> np.logspace(2.0, 3.0, num=4, endpoint=False)
    # 使用对数刻度生成器创建一个包含四个元素的 NumPy 数组，基于 2.0 到 3.0 之间的对数刻度，但不包括终点值

    array([100.        ,  177.827941  ,  316.22776602,  562.34132519])
    # 上述生成器生成的 NumPy 数组，包含了四个对数刻度值，第一个元素为 100.0

    >>> np.logspace(2.0, 3.0, num=4, base=2.0)
    # 使用指定的基数 2.0，生成一个包含四个元素的 NumPy 数组，这些元素是以对数刻度在 2.0 到 3.0 之间均匀分布的值

    array([4.        ,  5.0396842 ,  6.34960421,  8.        ])
    # 使用基数 2.0 生成的 NumPy 数组，包含了四个对数刻度值

    >>> np.logspace(2.0, 3.0, num=4, base=[2.0, 3.0], axis=-1)
    # 使用不同的基数数组 [2.0, 3.0]，在最后一个轴上生成一个二维 NumPy 数组，包含了对数刻度值

    array([[ 4.        ,  5.0396842 ,  6.34960421,  8.        ],
           [ 9.        , 12.98024613, 18.72075441, 27.        ]])
    # 上述生成器生成的二维 NumPy 数组，每行代表一个基数，每列代表对应的对数刻度值

    Graphical illustration:

    >>> import matplotlib.pyplot as plt
    # 导入 matplotlib 的 pyplot 模块

    >>> N = 10
    # 定义变量 N，并赋值为 10

    >>> x1 = np.logspace(0.1, 1, N, endpoint=True)
    # 使用对数刻度生成器生成一个包含 N 个元素的 NumPy 数组 x1，包含了在 0.1 到 1 之间的对数刻度值，包括终点值

    >>> x2 = np.logspace(0.1, 1, N, endpoint=False)
    # 使用对数刻度生成器生成一个包含 N 个元素的 NumPy 数组 x2，包含了在 0.1 到 1 之间的对数刻度值，不包括终点值

    >>> y = np.zeros(N)
    # 创建一个长度为 N 的全零 NumPy 数组 y

    >>> plt.plot(x1, y, 'o')
    # 在图中绘制以 x1 为横坐标，y 为纵坐标的散点图，点形状为圆圈'o'

    [<matplotlib.lines.Line2D object at 0x...>]
    # 返回一个 matplotlib.lines.Line2D 对象的列表，表示绘制的散点图线条

    >>> plt.plot(x2, y + 0.5, 'o')
    # 在图中绘制以 x2 为横坐标，y + 0.5 为纵坐标的散点图，点形状为圆圈'o'

    [<matplotlib.lines.Line2D object at 0x...>]
    # 返回一个 matplotlib.lines.Line2D 对象的列表，表示绘制的散点图线条

    >>> plt.ylim([-0.5, 1])
    # 设置纵坐标轴的范围为 -0.5 到 1

    (-0.5, 1)
    # 返回设置的纵坐标轴范围的元组

    >>> plt.show()
    # 显示绘制的整个图形

    """
    if not isinstance(base, (float, int)) and np.ndim(base):
        # 如果 base 不是浮点数或整数，并且是一个多维数组，则对其进行广播，因为它可能影响轴的解释方式。
        # 计算 start、stop 和 base 的广播维度的最大值
        ndmax = np.broadcast(start, stop, base).ndim
        # 将 start、stop 和 base 转换为广播后的数组
        start, stop, base = (
            np.array(a, copy=None, subok=True, ndmin=ndmax)
            for a in (start, stop, base)
        )
        # 在指定轴上扩展 base 数组的维度
        base = np.expand_dims(base, axis=axis)
    # 使用 linspace 函数生成在指定范围内的均匀分布数组 y
    y = linspace(start, stop, num=num, endpoint=endpoint, axis=axis)
    if dtype is None:
        # 如果未指定 dtype，则返回 base 的 y 次幂作为结果
        return _nx.power(base, y)
    # 否则返回转换为指定 dtype 的 base 的 y 次幂作为结果
    return _nx.power(base, y).astype(dtype, copy=False)
# 定义一个分派函数 `_geomspace_dispatcher`，用于分发参数到具体的函数处理
def _geomspace_dispatcher(start, stop, num=None, endpoint=None, dtype=None,
                          axis=None):
    # 返回 start 和 stop 参数的元组
    return (start, stop)


# 使用装饰器 `array_function_dispatch` 对 `_geomspace_dispatcher` 进行装饰
@array_function_dispatch(_geomspace_dispatcher)
def geomspace(start, stop, num=50, endpoint=True, dtype=None, axis=0):
    """
    Return numbers spaced evenly on a log scale (a geometric progression).

    This is similar to `logspace`, but with endpoints specified directly.
    Each output sample is a constant multiple of the previous.

    .. versionchanged:: 1.16.0
        Non-scalar `start` and `stop` are now supported.

    Parameters
    ----------
    start : array_like
        The starting value of the sequence.
    stop : array_like
        The final value of the sequence, unless `endpoint` is False.
        In that case, ``num + 1`` values are spaced over the
        interval in log-space, of which all but the last (a sequence of
        length `num`) are returned.
    num : integer, optional
        Number of samples to generate.  Default is 50.
    endpoint : boolean, optional
        If true, `stop` is the last sample. Otherwise, it is not included.
        Default is True.
    dtype : dtype
        The type of the output array.  If `dtype` is not given, the data type
        is inferred from `start` and `stop`. The inferred dtype will never be
        an integer; `float` is chosen even if the arguments would produce an
        array of integers.
    axis : int, optional
        The axis in the result to store the samples.  Relevant only if start
        or stop are array-like.  By default (0), the samples will be along a
        new axis inserted at the beginning. Use -1 to get an axis at the end.

        .. versionadded:: 1.16.0

    Returns
    -------
    samples : ndarray
        `num` samples, equally spaced on a log scale.

    See Also
    --------
    logspace : Similar to geomspace, but with endpoints specified using log
               and base.
    linspace : Similar to geomspace, but with arithmetic instead of geometric
               progression.
    arange : Similar to linspace, with the step size specified instead of the
             number of samples.
    :ref:`how-to-partition`

    Notes
    -----
    If the inputs or dtype are complex, the output will follow a logarithmic
    spiral in the complex plane.  (There are an infinite number of spirals
    passing through two points; the output will follow the shortest such path.)

    Examples
    --------
    >>> np.geomspace(1, 1000, num=4)
    array([    1.,    10.,   100.,  1000.])
    >>> np.geomspace(1, 1000, num=3, endpoint=False)
    array([   1.,   10.,  100.])
    >>> np.geomspace(1, 1000, num=4, endpoint=False)
    array([   1.        ,    5.62341325,   31.6227766 ,  177.827941  ])
    >>> np.geomspace(1, 256, num=9)
    array([   1.,    2.,    4.,    8.,   16.,   32.,   64.,  128.,  256.])

    Note that the above may not produce exact integers:

    >>> np.geomspace(1, 256, num=9, dtype=int)
    """
    # 函数文档字符串已经提供了对函数功能和参数的详细解释，无需额外注释
    pass  # 实际上，函数体未提供任何具体的实现，只是作为函数文档的容器存在
    """
    Compute a sequence of numbers in geometric progression.

    Parameters:
    - `start`: array_like
        The starting value of the sequence.
    - `stop`: array_like
        The end value of the sequence, unless `endpoint` is False.
    - `num`: int, optional
        Number of samples to generate. Default is 50.
    - `endpoint`: bool, optional
        If True, `stop` is the last sample. If False, it's not included.
    - `base`: float, optional
        The base of the geometric progression. Default is 10.0.
    - `dtype`: dtype, optional
        The data type of the output array. If not specified, it's determined from inputs.

    Returns:
    - `result`: ndarray
        The array of values in geometric progression.

    Examples:
    >>> np.geomspace(1, 256, num=9)
    array([  1,   2,   4,   8,  16,  32,  64, 128, 256])

    >>> np.geomspace(1000, 1, num=4)
    array([1000.,  100.,   10.,    1.])

    >>> np.geomspace(-1000, -1, num=4)
    array([-1000.,  -100.,   -10.,    -1.])

    >>> np.geomspace(1j, 1000j, num=4)
    array([0.   +1.j, 0.  +10.j, 0. +100.j, 0.+1000.j])

    >>> np.geomspace(-1+0j, 1+0j, num=5)
    array([-1.00000000e+00+1.22464680e-16j, -7.07106781e-01+7.07106781e-01j,
            6.12323400e-17+1.00000000e+00j,  7.07106781e-01+7.07106781e-01j,
            1.00000000e+00+0.00000000e+00j])

    >>> import matplotlib.pyplot as plt
    >>> N = 10
    >>> y = np.zeros(N)
    >>> plt.semilogx(np.geomspace(1, 1000, N, endpoint=True), y + 1, 'o')
    [<matplotlib.lines.Line2D object at 0x...>]
    >>> plt.semilogx(np.geomspace(1, 1000, N, endpoint=False), y + 2, 'o')
    [<matplotlib.lines.Line2D object at 0x...>]
    >>> plt.axis([0.5, 2000, 0, 3])
    [0.5, 2000, 0, 3]
    >>> plt.grid(True, color='0.7', linestyle='-', which='both', axis='both')
    >>> plt.show()
    """

    # Convert input arguments to numpy arrays
    start = asanyarray(start)
    stop = asanyarray(stop)

    # Check for zero values in start or stop, as geometric sequence cannot include zero
    if _nx.any(start == 0) or _nx.any(stop == 0):
        raise ValueError('Geometric sequence cannot include zero')

    # Determine the data type for the result array
    dt = result_type(start, stop, float(num), _nx.zeros((), dtype))

    # If `dtype` is provided, convert it to numpy dtype
    if dtype is None:
        dtype = dt
    else:
        dtype = _nx.dtype(dtype)

    # Ensure start and stop are of the same data type
    start = start.astype(dt, copy=True)
    stop = stop.astype(dt, copy=True)

    # Normalize start and stop for negative real and complex inputs
    out_sign = _nx.sign(start)
    start /= out_sign
    stop = stop / out_sign

    # Compute logarithms of start and stop values
    log_start = _nx.log10(start)
    log_stop = _nx.log10(stop)

    # Compute the geometric progression using logspace
    result = logspace(log_start, log_stop, num=num,
                      endpoint=endpoint, base=10.0, dtype=dt)

    # Adjust endpoints to match the start and stop arguments
    if num > 0:
        result[0] = start
        if num > 1 and endpoint:
            result[-1] = stop

    # Adjust the sign of the result array
    result *= out_sign

    # Move axis if necessary
    if axis != 0:
        result = _nx.moveaxis(result, 0, axis)

    # Return the final result array with the specified dtype
    return result.astype(dtype, copy=False)
def _needs_add_docstring(obj):
    """
    Returns true if the only way to set the docstring of `obj` from python is
    via add_docstring.

    This function errs on the side of being overly conservative.
    """
    # 定义 CPython 中的 Py_TPFLAGS_HEAPTYPE 标志
    Py_TPFLAGS_HEAPTYPE = 1 << 9

    # 如果 obj 是函数类型、方法类型或者属性类型，则返回 False
    if isinstance(obj, (types.FunctionType, types.MethodType, property)):
        return False

    # 如果 obj 是类型对象并且其 __flags__ 属性包含 Py_TPFLAGS_HEAPTYPE 标志，则返回 False
    if isinstance(obj, type) and obj.__flags__ & Py_TPFLAGS_HEAPTYPE:
        return False

    # 否则返回 True，即需要通过 add_docstring 来设置 obj 的文档字符串
    return True


def _add_docstring(obj, doc, warn_on_python):
    """
    Add a docstring `doc` to the object `obj`, optionally warn if attaching
    to a pure-python object.

    Parameters
    ----------
    obj : object
        The object to attach the docstring to.
    doc : str
        The docstring to attach.
    warn_on_python : bool
        Whether to emit a warning if attaching docstring to a pure-python object.

    Notes
    -----
    If `warn_on_python` is True and `_needs_add_docstring` returns False for `obj`,
    emit a UserWarning.

    Attempt to add `doc` as the docstring to `obj`. If an exception occurs during
    this operation, it is caught and ignored.
    """
    # 如果 warn_on_python 为 True 且 _needs_add_docstring 返回 False，则发出警告
    if warn_on_python and not _needs_add_docstring(obj):
        warnings.warn(
            "add_newdoc was used on a pure-python object {}. "
            "Prefer to attach it directly to the source."
            .format(obj),
            UserWarning,
            stacklevel=3)
    
    # 尝试将 doc 添加为 obj 的文档字符串
    try:
        add_docstring(obj, doc)
    except Exception:
        pass


def add_newdoc(place, obj, doc, warn_on_python=True):
    """
    Add documentation to an existing object, typically one defined in C

    The purpose is to allow easier editing of the docstrings without requiring
    a re-compile. This exists primarily for internal use within numpy itself.

    Parameters
    ----------
    place : str
        The absolute name of the module to import from
    obj : str or None
        The name of the object to add documentation to, typically a class or
        function name.
    doc : {str, Tuple[str, str], List[Tuple[str, str]]}
        If a string, the documentation to apply to `obj`

        If a tuple, then the first element is interpreted as an attribute
        of `obj` and the second as the docstring to apply -
        ``(method, docstring)``

        If a list, then each element of the list should be a tuple of length
        two - ``[(method1, docstring1), (method2, docstring2), ...]``
    warn_on_python : bool, optional
        If True, emit `UserWarning` if this is used to attach documentation
        to a pure-python object. Default is True.

    Notes
    -----
    This routine never raises an error if the docstring can't be written, but
    will raise an error if the object being documented does not exist.

    This routine cannot modify read-only docstrings, as appear
    in new-style classes or built-in functions. Because this
    routine never raises an error the caller must check manually
    that the docstrings were changed.

    Since this function grabs the ``char *`` from a c-level str object and puts
    it into the ``tp_doc`` slot of the type of `obj`, it violates a number of
    C-API best-practices, by:

    - modifying a `PyTypeObject` after calling `PyType_Ready`
    - calling `Py_INCREF` on the str and losing the reference, so the str
      will never be released

    If possible it should be avoided.
    """
    # 根据 place 和 obj 导入模块，并获取其中的 obj 对象
    new = getattr(__import__(place, globals(), {}, [obj]), obj)
    
    # 如果 doc 是字符串类型，调用 _add_docstring 添加文档字符串给 new 对象
    if isinstance(doc, str):
        _add_docstring(new, doc.strip(), warn_on_python)
    # 如果 doc 是一个元组（tuple）类型
    elif isinstance(doc, tuple):
        # 将元组拆解成两个变量 attr 和 docstring
        attr, docstring = doc
        # 获取 new 对象中名为 attr 的属性，并添加文档字符串
        _add_docstring(getattr(new, attr), docstring.strip(), warn_on_python)
    
    # 如果 doc 是一个列表（list）类型
    elif isinstance(doc, list):
        # 遍历列表中的每个元素，每个元素是一个元组 (attr, docstring)
        for attr, docstring in doc:
            # 获取 new 对象中名为 attr 的属性，并添加文档字符串
            _add_docstring(
                getattr(new, attr), docstring.strip(), warn_on_python
            )

`.\numpy\numpy\_core\function_base.pyi`

# 导入必要的类型和函数，包括字面量类型、重载函数、任意类型、支持索引的类型变量
from typing import (
    Literal as L,
    overload,
    Any,
    SupportsIndex,
    TypeVar,
)

# 从 numpy 库中导入特定的数据类型：浮点数、复数浮点数、泛型
from numpy import floating, complexfloating, generic
# 从 numpy._typing 模块导入特定的类型别名
from numpy._typing import (
    NDArray,
    DTypeLike,
    _DTypeLike,
    _ArrayLikeFloat_co,
    _ArrayLikeComplex_co,
)

# 类型变量，用于支持泛型
_SCT = TypeVar("_SCT", bound=generic)

# __all__ 是模块中公开的对象列表，这里初始化为空列表
__all__: list[str]

# 以下是对 linspace 函数的多个重载定义，用于创建均匀间隔的数组
@overload
def linspace(
    start: _ArrayLikeFloat_co,
    stop: _ArrayLikeFloat_co,
    num: SupportsIndex = ...,
    endpoint: bool = ...,
    retstep: L[False] = ...,
    dtype: None = ...,
    axis: SupportsIndex = ...,
    *,
    device: None | L["cpu"] = ...,
) -> NDArray[floating[Any]]: ...
@overload
def linspace(
    start: _ArrayLikeComplex_co,
    stop: _ArrayLikeComplex_co,
    num: SupportsIndex = ...,
    endpoint: bool = ...,
    retstep: L[False] = ...,
    dtype: None = ...,
    axis: SupportsIndex = ...,
    *,
    device: None | L["cpu"] = ...,
) -> NDArray[complexfloating[Any, Any]]: ...
@overload
def linspace(
    start: _ArrayLikeComplex_co,
    stop: _ArrayLikeComplex_co,
    num: SupportsIndex = ...,
    endpoint: bool = ...,
    retstep: L[False] = ...,
    dtype: _DTypeLike[_SCT] = ...,
    axis: SupportsIndex = ...,
    *,
    device: None | L["cpu"] = ...,
) -> NDArray[_SCT]: ...
@overload
def linspace(
    start: _ArrayLikeComplex_co,
    stop: _ArrayLikeComplex_co,
    num: SupportsIndex = ...,
    endpoint: bool = ...,
    retstep: L[False] = ...,
    dtype: DTypeLike = ...,
    axis: SupportsIndex = ...,
    *,
    device: None | L["cpu"] = ...,
) -> NDArray[Any]: ...
@overload
def linspace(
    start: _ArrayLikeFloat_co,
    stop: _ArrayLikeFloat_co,
    num: SupportsIndex = ...,
    endpoint: bool = ...,
    retstep: L[True] = ...,
    dtype: None = ...,
    axis: SupportsIndex = ...,
    *,
    device: None | L["cpu"] = ...,
) -> tuple[NDArray[floating[Any]], floating[Any]]: ...
@overload
def linspace(
    start: _ArrayLikeComplex_co,
    stop: _ArrayLikeComplex_co,
    num: SupportsIndex = ...,
    endpoint: bool = ...,
    retstep: L[True] = ...,
    dtype: None = ...,
    axis: SupportsIndex = ...,
    *,
    device: None | L["cpu"] = ...,
) -> tuple[NDArray[complexfloating[Any, Any]], complexfloating[Any, Any]]: ...
@overload
def linspace(
    start: _ArrayLikeComplex_co,
    stop: _ArrayLikeComplex_co,
    num: SupportsIndex = ...,
    endpoint: bool = ...,
    retstep: L[True] = ...,
    dtype: _DTypeLike[_SCT] = ...,
    axis: SupportsIndex = ...,
    *,
    device: None | L["cpu"] = ...,
) -> tuple[NDArray[_SCT], _SCT]: ...
@overload
def linspace(
    start: _ArrayLikeComplex_co,
    stop: _ArrayLikeComplex_co,
    num: SupportsIndex = ...,
    endpoint: bool = ...,
    retstep: L[True] = ...,
    dtype: DTypeLike = ...,
    axis: SupportsIndex = ...,
    *,
    device: None | L["cpu"] = ...,
) -> tuple[NDArray[Any], Any]: ...



# logspace 函数的重载定义，用于创建对数间隔的数组
@overload
def logspace(
    start: _ArrayLikeFloat_co,
    stop: _ArrayLikeFloat_co,
    num: SupportsIndex = ...,
    endpoint: bool = ...,
    base: _ArrayLikeFloat_co = ...,  # base 是一个类型为 _ArrayLikeFloat_co 的变量，通常用于表示基础数组或数据结构
    dtype: None = ...,  # dtype 是一个类型为 None 的变量，通常用于指定数据类型
    axis: SupportsIndex = ...,  # axis 是一个类型为 SupportsIndex 的变量，通常用于表示支持索引的轴或维度
# 定义一个函数签名，该函数返回浮点数类型的 NumPy 数组
) -> NDArray[floating[Any]]: ...

# logspace 函数的重载定义，用于生成等比数列
@overload
def logspace(
    start: _ArrayLikeComplex_co,
    stop: _ArrayLikeComplex_co,
    num: SupportsIndex = ...,
    endpoint: bool = ...,
    base: _ArrayLikeComplex_co = ...,
    dtype: None = ...,
    axis: SupportsIndex = ...,
) -> NDArray[complexfloating[Any, Any]]: ...

# logspace 函数的重载定义，用于生成等比数列
@overload
def logspace(
    start: _ArrayLikeComplex_co,
    stop: _ArrayLikeComplex_co,
    num: SupportsIndex = ...,
    endpoint: bool = ...,
    base: _ArrayLikeComplex_co = ...,
    dtype: _DTypeLike[_SCT] = ...,
    axis: SupportsIndex = ...,
) -> NDArray[_SCT]: ...

# logspace 函数的重载定义，用于生成等比数列
@overload
def logspace(
    start: _ArrayLikeComplex_co,
    stop: _ArrayLikeComplex_co,
    num: SupportsIndex = ...,
    endpoint: bool = ...,
    base: _ArrayLikeComplex_co = ...,
    dtype: DTypeLike = ...,
    axis: SupportsIndex = ...,
) -> NDArray[Any]: ...

# geomspace 函数的重载定义，用于生成等比几何数列
@overload
def geomspace(
    start: _ArrayLikeFloat_co,
    stop: _ArrayLikeFloat_co,
    num: SupportsIndex = ...,
    endpoint: bool = ...,
    dtype: None = ...,
    axis: SupportsIndex = ...,
) -> NDArray[floating[Any]]: ...

# geomspace 函数的重载定义，用于生成等比几何数列
@overload
def geomspace(
    start: _ArrayLikeComplex_co,
    stop: _ArrayLikeComplex_co,
    num: SupportsIndex = ...,
    endpoint: bool = ...,
    dtype: None = ...,
    axis: SupportsIndex = ...,
) -> NDArray[complexfloating[Any, Any]]: ...

# geomspace 函数的重载定义，用于生成等比几何数列
@overload
def geomspace(
    start: _ArrayLikeComplex_co,
    stop: _ArrayLikeComplex_co,
    num: SupportsIndex = ...,
    endpoint: bool = ...,
    dtype: _DTypeLike[_SCT] = ...,
    axis: SupportsIndex = ...,
) -> NDArray[_SCT]: ...

# geomspace 函数的重载定义，用于生成等比几何数列
@overload
def geomspace(
    start: _ArrayLikeComplex_co,
    stop: _ArrayLikeComplex_co,
    num: SupportsIndex = ...,
    endpoint: bool = ...,
    dtype: DTypeLike = ...,
    axis: SupportsIndex = ...,
) -> NDArray[Any]: ...

# add_newdoc 函数用于添加文档字符串到指定的对象（模块、类、函数等）
def add_newdoc(
    place: str,
    obj: str,
    doc: str | tuple[str, str] | list[tuple[str, str]],
    warn_on_python: bool = ...,
) -> None: ...

`.\numpy\numpy\_core\getlimits.py`

"""Machine limits for Float32 and Float64 and (long double) if available...

"""
# 定义模块中公开的所有变量和函数名
__all__ = ['finfo', 'iinfo']

# 引入警告模块
import warnings

# 从内部模块导入设置模块函数
from .._utils import set_module
# 从内部模块导入机器精度相关的类
from ._machar import MachAr
# 从当前目录下的numeric模块导入array、inf、nan等
from . import numeric
# 从当前目录下的numerictypes模块导入别名ntypes
from . import numerictypes as ntypes
# 从numeric模块导入array、inf、nan等
from .numeric import array, inf, nan
# 从umath模块导入log10、exp2、nextafter、isnan等
from .umath import log10, exp2, nextafter, isnan


# 定义函数_fr0，用于将rank-0的数组修正为rank-1
def _fr0(a):
    """fix rank-0 --> rank-1"""
    if a.ndim == 0:
        a = a.copy()
        a.shape = (1,)
    return a


# 定义函数_fr1，用于将rank大于0的数组修正为rank-0
def _fr1(a):
    """fix rank > 0 --> rank-0"""
    if a.size == 1:
        a = a.copy()
        a.shape = ()
    return a


# 定义类MachArLike，用于模拟MachAr实例
class MachArLike:
    """ Object to simulate MachAr instance """
    
    # 初始化方法，接收多个参数来模拟MachAr实例
    def __init__(self, ftype, *, eps, epsneg, huge, tiny,
                 ibeta, smallest_subnormal=None, **kwargs):
        # 使用_MACHAR_PARAMS字典中的参数来设置实例的params属性
        self.params = _MACHAR_PARAMS[ftype]
        # 设置实例的ftype属性为传入的ftype参数
        self.ftype = ftype
        # 设置实例的title属性为params字典中的title值
        self.title = self.params['title']
        
        # 如果未提供smallest_subnormal参数，则计算最小的subnormal值
        if not smallest_subnormal:
            self._smallest_subnormal = nextafter(
                self.ftype(0), self.ftype(1), dtype=self.ftype)
        else:
            self._smallest_subnormal = smallest_subnormal
        
        # 使用_float_to_float方法将eps参数转换为浮点数，设置为实例的epsilon属性
        self.epsilon = self.eps = self._float_to_float(eps)
        # 使用_float_to_float方法将epsneg参数转换为浮点数，设置为实例的epsneg属性
        self.epsneg = self._float_to_float(epsneg)
        # 使用_float_to_float方法将huge参数转换为浮点数，设置为实例的xmax属性
        self.xmax = self.huge = self._float_to_float(huge)
        # 使用_float_to_float方法将tiny参数转换为浮点数，设置为实例的xmin属性
        self.xmin = self._float_to_float(tiny)
        # 使用_float_to_float方法将tiny参数转换为浮点数，设置为实例的smallest_normal属性
        self.smallest_normal = self.tiny = self._float_to_float(tiny)
        # 使用params字典中的itype属性将ibeta参数转换为整数，设置为实例的ibeta属性
        self.ibeta = self.params['itype'](ibeta)
        
        # 将kwargs中的任何其他参数更新到实例的属性中
        self.__dict__.update(kwargs)
        
        # 计算精度并设置实例的precision属性
        self.precision = int(-log10(self.eps))
        # 使用_float_conv方法计算分辨率并设置实例的resolution属性
        self.resolution = self._float_to_float(
            self._float_conv(10) ** (-self.precision))
        
        # 使用_float_to_str方法将eps、epsneg、xmin、xmax、resolution属性转换为字符串，并设置相应的_str属性
        self._str_eps = self._float_to_str(self.eps)
        self._str_epsneg = self._float_to_str(self.epsneg)
        self._str_xmin = self._float_to_str(self.xmin)
        self._str_xmax = self._float_to_str(self.xmax)
        self._str_resolution = self._float_to_str(self.resolution)
        self._str_smallest_normal = self._float_to_str(self.xmin)

    @property
    # 定义smallest_subnormal属性的getter方法，返回最小subnormal值的浮点数表示
    def smallest_subnormal(self):
        """Return the value for the smallest subnormal.

        Returns
        -------
        smallest_subnormal : float
            value for the smallest subnormal.

        Warns
        -----
        UserWarning
            If the calculated value for the smallest subnormal is zero.
        """
        # 检查计算出的最小subnormal值是否为零，如果是则发出警告
        value = self._smallest_subnormal
        if self.ftype(0) == value:
            warnings.warn(
                'The value of the smallest subnormal for {} type '
                'is zero.'.format(self.ftype), UserWarning, stacklevel=2)

        # 使用_float_to_float方法将计算出的值转换为浮点数并返回
        return self._float_to_float(value)

    @property
    # 定义_str_smallest_subnormal属性的getter方法，返回最小subnormal值的字符串表示
    def _str_smallest_subnormal(self):
        """Return the string representation of the smallest subnormal."""
        # 使用_float_to_str方法将最小subnormal值转换为字符串并返回
        return self._float_to_str(self.smallest_subnormal)
    # 将浮点数转换为浮点数
    def _float_to_float(self, value):
        """Converts float to float.

        Parameters
        ----------
        value : float
            value to be converted.
        """
        # 调用内部方法 _float_conv 进行转换
        return _fr1(self._float_conv(value))

    # 将浮点数转换为某种格式的数组
    def _float_conv(self, value):
        """Converts float to conv.

        Parameters
        ----------
        value : float
            value to be converted.
        """
        # 使用数组将给定的浮点数转换为指定类型(self.ftype)
        return array([value], self.ftype)

    # 将浮点数转换为字符串
    def _float_to_str(self, value):
        """Converts float to str.

        Parameters
        ----------
        value : float
            value to be converted.
        """
        # 从参数中提取格式化字符串，并将浮点数转换为符合格式的字符串
        return self.params['fmt'] % array(_fr0(value)[0], self.ftype)
# 将复数类型映射为对应的浮点数类型
_convert_to_float = {
    ntypes.csingle: ntypes.single,           # 将单精度复数映射为单精度浮点数
    ntypes.complex128: ntypes.float64,       # 将复数128位类型映射为双精度浮点数
    ntypes.clongdouble: ntypes.longdouble    # 将长双精度复数映射为长双精度浮点数
    }

# 创建 MachAr / 类似 MachAr 对象的参数
_title_fmt = 'numpy {} precision floating point number'
_MACHAR_PARAMS = {
    ntypes.double: dict(
        itype = ntypes.int64,                 # 双精度浮点数的整数类型为64位整数
        fmt = '%24.16e',                      # 格式化输出双精度浮点数的格式
        title = _title_fmt.format('double')), # 双精度浮点数的标题
    ntypes.single: dict(
        itype = ntypes.int32,                 # 单精度浮点数的整数类型为32位整数
        fmt = '%15.7e',                       # 格式化输出单精度浮点数的格式
        title = _title_fmt.format('single')), # 单精度浮点数的标题
    ntypes.longdouble: dict(
        itype = ntypes.longlong,              # 长双精度浮点数的整数类型为长长整数
        fmt = '%s',                           # 格式化输出长双精度浮点数的格式
        title = _title_fmt.format('long double')),  # 长双精度浮点数的标题
    ntypes.half: dict(
        itype = ntypes.int16,                 # 半精度浮点数的整数类型为16位整数
        fmt = '%12.5e',                       # 格式化输出半精度浮点数的格式
        title = _title_fmt.format('half'))    # 半精度浮点数的标题
}

# 用于识别浮点数类型的关键字。关键字的生成方式如下：
#    ftype = np.longdouble        # 或 float64、float32 等
#    v = (ftype(-1.0) / ftype(10.0))
#    v.view(v.dtype.newbyteorder('<')).tobytes()
#
# 使用除法来解决某些平台上 strtold 的不足之处。
# 参考：
# https://perl5.git.perl.org/perl.git/blob/3118d7d684b56cbeb702af874f4326683c45f045:/Configure

_KNOWN_TYPES = {}

def _register_type(machar, bytepat):
    _KNOWN_TYPES[bytepat] = machar

_float_ma = {}

def _register_known_types():
    # 已知的 float16 参数
    # 参见 MachAr 类的文档字符串，描述这些参数的含义。
    f16 = ntypes.float16
    float16_ma = MachArLike(f16,
                            machep=-10,
                            negep=-11,
                            minexp=-14,
                            maxexp=16,
                            it=10,
                            iexp=5,
                            ibeta=2,
                            irnd=5,
                            ngrd=0,
                            eps=exp2(f16(-10)),
                            epsneg=exp2(f16(-11)),
                            huge=f16(65504),
                            tiny=f16(2 ** -14))
    _register_type(float16_ma, b'f\xae')
    _float_ma[16] = float16_ma

    # 已知的 float32 参数
    f32 = ntypes.float32
    float32_ma = MachArLike(f32,
                            machep=-23,
                            negep=-24,
                            minexp=-126,
                            maxexp=128,
                            it=23,
                            iexp=8,
                            ibeta=2,
                            irnd=5,
                            ngrd=0,
                            eps=exp2(f32(-23)),
                            epsneg=exp2(f32(-24)),
                            huge=f32((1 - 2 ** -24) * 2**128),
                            tiny=exp2(f32(-126)))
    _register_type(float32_ma, b'\xcd\xcc\xcc\xbd')
    _float_ma[32] = float32_ma

    # 已知的 float64 参数
    f64 = ntypes.float64
    epsneg_f64 = 2.0 ** -53.0
    tiny_f64 = 2.0 ** -1022.0
    # 创建一个 MachArLike 实例，表示 64 位浮点数的机器精度
    float64_ma = MachArLike(f64,
                            machep=-52,
                            negep=-53,
                            minexp=-1022,
                            maxexp=1024,
                            it=52,
                            iexp=11,
                            ibeta=2,
                            irnd=5,
                            ngrd=0,
                            eps=2.0 ** -52.0,
                            epsneg=epsneg_f64,
                            huge=(1.0 - epsneg_f64) / tiny_f64 * f64(4),
                            tiny=tiny_f64)
    # 将该实例注册到类型系统中，用特定的字节序列标识 64 位浮点数
    _register_type(float64_ma, b'\x9a\x99\x99\x99\x99\x99\xb9\xbf')
    # 将该机器精度信息存储到 _float_ma 字典中，键为 64
    _float_ma[64] = float64_ma

    # 已知的 IEEE 754 128 位二进制浮点数参数
    ld = ntypes.longdouble
    epsneg_f128 = exp2(ld(-113))
    tiny_f128 = exp2(ld(-16382))
    # 忽略运行时错误，当这不是 f128 时
    with numeric.errstate(all='ignore'):
        huge_f128 = (ld(1) - epsneg_f128) / tiny_f128 * ld(4)
    # 创建一个 MachArLike 实例，表示 128 位浮点数的机器精度
    float128_ma = MachArLike(ld,
                             machep=-112,
                             negep=-113,
                             minexp=-16382,
                             maxexp=16384,
                             it=112,
                             iexp=15,
                             ibeta=2,
                             irnd=5,
                             ngrd=0,
                             eps=exp2(ld(-112)),
                             epsneg=epsneg_f128,
                             huge=huge_f128,
                             tiny=tiny_f128)
    # 将该实例注册到类型系统中，用特定的字节序列标识 128 位浮点数
    _register_type(float128_ma,
        b'\x9a\x99\x99\x99\x99\x99\x99\x99\x99\x99\x99\x99\x99\x99\xfb\xbf')
    # 将该机器精度信息存储到 _float_ma 字典中，键为 128
    _float_ma[128] = float128_ma

    # 已知的 Intel 80 位扩展精度浮点数（float80）参数
    epsneg_f80 = exp2(ld(-64))
    tiny_f80 = exp2(ld(-16382))
    # 忽略运行时错误，当这不是 f80 时
    with numeric.errstate(all='ignore'):
        huge_f80 = (ld(1) - epsneg_f80) / tiny_f80 * ld(4)
    # 创建一个 MachArLike 实例，表示 80 位浮点数的机器精度
    float80_ma = MachArLike(ld,
                            machep=-63,
                            negep=-64,
                            minexp=-16382,
                            maxexp=16384,
                            it=63,
                            iexp=15,
                            ibeta=2,
                            irnd=5,
                            ngrd=0,
                            eps=exp2(ld(-63)),
                            epsneg=epsneg_f80,
                            huge=huge_f80,
                            tiny=tiny_f80)
    # 将该实例注册到类型系统中，用特定的字节序列标识 80 位浮点数
    _register_type(float80_ma, b'\xcd\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xfb\xbf')
    # 将该机器精度信息存储到 _float_ma 字典中，键为 80
    _float_ma[80] = float80_ma

    # 猜测 / 已知的双倍精度浮点数参数；参考：
    # https://en.wikipedia.org/wiki/Quadruple-precision_floating-point_format#Double-double_arithmetic
    # 这些数字具有与 float64 相同的指数范围，但扩展了精度
    # 在双重精度浮点数中，表示尾数的位数。
    huge_dd = nextafter(ld(inf), ld(0), dtype=ld)
    # 由于双重精度浮点数中最小正常数难以计算，因此将其设为 NaN。
    smallest_normal_dd = nan
    # 将双重精度浮点数中最小次正常数设为与普通双精度浮点数相同的值。
    smallest_subnormal_dd = ld(nextafter(0., 1.))
    # 创建一个类似于 MachAr 的对象，用于描述双重精度浮点数的机器精度特性。
    float_dd_ma = MachArLike(ld,
                             machep=-105,
                             negep=-106,
                             minexp=-1022,
                             maxexp=1024,
                             it=105,
                             iexp=11,
                             ibeta=2,
                             irnd=5,
                             ngrd=0,
                             eps=exp2(ld(-105)),
                             epsneg=exp2(ld(-106)),
                             huge=huge_dd,
                             tiny=smallest_normal_dd,
                             smallest_subnormal=smallest_subnormal_dd)
    # 将双重精度浮点数的低位和高位顺序注册为一种类型，例如 PPC 64 架构。
    _register_type(float_dd_ma,
        b'\x9a\x99\x99\x99\x99\x99Y<\x9a\x99\x99\x99\x99\x99\xb9\xbf')
    # 将双重精度浮点数的高位和低位顺序注册为一种类型，例如 PPC 64 架构（小端序）。
    _register_type(float_dd_ma,
        b'\x9a\x99\x99\x99\x99\x99\xb9\xbf\x9a\x99\x99\x99\x99\x99Y<')
    # 将双重精度浮点数的机器精度特性对象添加到浮点数类型的字典中，使用 'dd' 作为键。
    _float_ma['dd'] = float_dd_ma
# 定义一个函数 `_get_machar`，用于获取 `MachAr` 实例或类似 `MachAr` 的实例
def _get_machar(ftype):
    # 尝试根据各种已知的浮点类型签名获取浮点类型的参数
    params = _MACHAR_PARAMS.get(ftype)
    # 如果找不到与给定浮点类型匹配的参数，则抛出 ValueError 异常
    if params is None:
        raise ValueError(repr(ftype))
    
    # 检测已知/猜测的浮点类型
    key = (ftype(-1.0) / ftype(10.))
    # 将 key 转换为小字节序，并转换为字节序列
    key = key.view(key.dtype.newbyteorder("<")).tobytes()
    
    # 初始化 ma_like 变量
    ma_like = None
    
    # 如果 ftype 是 ntypes.longdouble 类型
    if ftype == ntypes.longdouble:
        # 可能是 80 位 == 10 字节扩展精度，其中最后几个字节可能是随机垃圾
        # 比较前 10 字节与模式的首字节以避免在随机垃圾上分支
        ma_like = _KNOWN_TYPES.get(key[:10])
    
    # 如果 ma_like 仍然为空
    if ma_like is None:
        # 查看是否已知整个 key
        ma_like = _KNOWN_TYPES.get(key)
    
    # 如果 ma_like 仍然为空并且 key 的长度为 16
    if ma_like is None and len(key) == 16:
        # 机器限制可能是伪装成 np.float128 的 f80，找出所有长度为 16 的键并创建新字典，
        # 但使键只有 10 个字节长，最后几个字节可能是随机垃圾
        _kt = {k[:10]: v for k, v in _KNOWN_TYPES.items() if len(k) == 16}
        ma_like = _kt.get(key[:10])
    
    # 如果 ma_like 不为空，则返回它
    if ma_like is not None:
        return ma_like
    
    # 如果上述检测都失败，则发出警告，并返回通过 _discovered_machar 函数探测到的浮点类型
    warnings.warn(
        f'Signature {key} for {ftype} does not match any known type: '
        'falling back to type probe function.\n'
        'This warnings indicates broken support for the dtype!',
        UserWarning, stacklevel=2)
    return _discovered_machar(ftype)


# 定义一个函数 `_discovered_machar`，用于创建包含浮点类型信息的 `MachAr` 实例
def _discovered_machar(ftype):
    # 获取浮点类型的参数
    params = _MACHAR_PARAMS[ftype]
    # 返回一个 MachAr 实例，使用 lambda 表达式定义各种函数
    return MachAr(
        lambda v: array([v], ftype),  # 将值 v 转换为包含单个元素的数组，并指定类型为 ftype
        lambda v: _fr0(v.astype(params['itype']))[0],  # 对值 v 进行转换并返回其第一个元素
        lambda v: array(_fr0(v)[0], ftype),  # 将值 v 转换为数组并指定类型为 ftype
        lambda v: params['fmt'] % array(_fr0(v)[0], ftype),  # 使用格式化字符串返回转换后的值 v
        params['title']  # 返回浮点类型的标题
    )


# 设置一个类 `finfo`，表示浮点类型的机器限制
@set_module('numpy')
class finfo:
    """
    finfo(dtype)

    浮点类型的机器限制。

    Attributes
    ----------
    bits : int
        类型占用的位数。
    """
    dtype : dtype
        # 返回`finfo`返回信息的数据类型。对于复数输入，返回的数据类型是它的实部和虚部相关联的`float*`数据类型。
    eps : float
        # 返回1.0和大于1.0的下一个最小可表示浮点数之间的差异。例如，在IEEE-754标准的64位二进制浮点数中，`eps = 2**-52`，约为2.22e-16。
    epsneg : float
        # 返回1.0和小于1.0的下一个最小可表示浮点数之间的差异。例如，在IEEE-754标准的64位二进制浮点数中，`epsneg = 2**-53`，约为1.11e-16。
    iexp : int
        # 浮点表示的指数部分中的位数。
    machep : int
        # 返回`eps`的幂。
    max : floating point number of the appropriate type
        # 可表示的最大数。
    maxexp : int
        # 导致溢出的基数(2)的最小正幂。
    min : floating point number of the appropriate type
        # 可表示的最小数，通常为`-max`。
    minexp : int
        # 与尾数中没有前导 0 一致的基数(2)的最小负幂。
    negep : int
        # 返回`epsneg`的幂。
    nexp : int
        # 指数的位数，包括其符号和偏置。
    nmant : int
        # 尾数的位数。
    precision : int
        # 此种类型浮点数精确的大致十进制数字个数。
    resolution : floating point number of the appropriate type
        # 此类型的大致十进制分辨率，即`10**-precision`。
    tiny : float
        # `smallest_normal`的别名，保留了向后兼容性。
    smallest_normal : float
        # 以下IEEE-754标准为首位的最小正浮点数（参见注释）。
    smallest_subnormal : float
        # 以下IEEE-754标准为首位的最小正浮点数。

    Parameters
    ----------
    dtype : float, dtype, or instance
        # 要获取信息的浮点数或复数浮点数数据类型的种类。

    See Also
    --------
    iinfo : 整数数据类型的等价物。
    spacing : 值与最近的相邻数之间的距离
    nextafter : x1朝向x2的下一个浮点数值

    Notes
    -----
    对于NumPy的开发人员：不要在模块级别实例化此对象。
    初始计算这些参数是昂贵的，并且会对导入时间产生负面影响。这些对象是缓存的，所以在函数内部重复调用`finfo()`并不是问题。

    请注意，`smallest_normal`实际上不是最小的正数。
    # 创建一个缓存字典用于存储各种 NumPy 数值类型的 finfo 对象
    _finfo_cache = {}
    # 定义一个特殊方法 __new__，用于创建新的对象实例，接收类 cls 和数据类型 dtype 作为参数
    def __new__(cls, dtype):
        try:
            # 尝试从 _finfo_cache 中获取 dtype 对应的对象（通常情况）
            obj = cls._finfo_cache.get(dtype)  # most common path
            # 如果对象不为 None，直接返回该对象
            if obj is not None:
                return obj
        except TypeError:
            pass

        # 如果 dtype 为 None，发出警告（从 NumPy 1.25 开始弃用）
        if dtype is None:
            # Deprecated in NumPy 1.25, 2023-01-16
            warnings.warn(
                "finfo() dtype cannot be None. This behavior will "
                "raise an error in the future. (Deprecated in NumPy 1.25)",
                DeprecationWarning,
                stacklevel=2
            )

        try:
            # 尝试将 dtype 转换为 numpy 的数据类型
            dtype = numeric.dtype(dtype)
        except TypeError:
            # 如果给定的 dtype 是一个 float 实例，转换为其类型的数据类型
            dtype = numeric.dtype(type(dtype))

        # 重新尝试从 _finfo_cache 中获取 dtype 对应的对象
        obj = cls._finfo_cache.get(dtype)
        # 如果对象不为 None，直接返回该对象
        if obj is not None:
            return obj

        # 将当前 dtype 加入到待处理的数据类型列表中
        dtypes = [dtype]
        # 将 dtype 转换为相应的数据类型
        newdtype = ntypes.obj2sctype(dtype)
        # 如果转换后的数据类型与原始 dtype 不同，加入到待处理数据类型列表中
        if newdtype is not dtype:
            dtypes.append(newdtype)
            dtype = newdtype

        # 如果 dtype 不是 numeric.inexact 的子类，抛出数值错误异常
        if not issubclass(dtype, numeric.inexact):
            raise ValueError("data type %r not inexact" % (dtype))

        # 尝试从 _finfo_cache 中再次获取 dtype 对应的对象
        obj = cls._finfo_cache.get(dtype)
        # 如果对象不为 None，直接返回该对象
        if obj is not None:
            return obj

        # 如果 dtype 不是 numeric.floating 的子类，尝试将其转换为相应的浮点数数据类型
        if not issubclass(dtype, numeric.floating):
            newdtype = _convert_to_float[dtype]
            # 如果转换后的数据类型与原始 dtype 不同，更新数据类型列表
            if newdtype is not dtype:
                # 数据类型已更改，例如从 complex128 更改为 float64
                dtypes.append(newdtype)
                dtype = newdtype

                # 尝试从 _finfo_cache 中获取新的 dtype 对应的对象
                obj = cls._finfo_cache.get(dtype, None)
                # 如果对象不为 None，将原始的 dtypes 添加到缓存中并返回结果
                if obj is not None:
                    for dt in dtypes:
                        cls._finfo_cache[dt] = obj
                    return obj

        # 使用 object.__new__ 创建新的对象实例，并初始化
        obj = object.__new__(cls)._init(dtype)
        # 将所有待处理的 dtypes 添加到缓存中
        for dt in dtypes:
            cls._finfo_cache[dt] = obj
        return obj
    # 初始化方法，设置对象的数据类型和机器参数
    def _init(self, dtype):
        # 将数据类型设置为指定的 dtype
        self.dtype = numeric.dtype(dtype)
        # 获取指定数据类型的机器参数
        machar = _get_machar(dtype)

        # 使用 machar 对象设置对象的属性
        for word in ['precision', 'iexp',
                     'maxexp', 'minexp', 'negep',
                     'machep']:
            setattr(self, word, getattr(machar, word))
        
        # 设置另一组属性，从 machar 对象获取
        for word in ['resolution', 'epsneg', 'smallest_subnormal']:
            setattr(self, word, getattr(machar, word).flat[0])
        
        # 设置一些其他的属性
        self.bits = self.dtype.itemsize * 8  # 计算位数
        self.max = machar.huge.flat[0]  # 最大值
        self.min = -self.max  # 最小值
        self.eps = machar.eps.flat[0]  # 机器精度
        self.nexp = machar.iexp  # 指数位数
        self.nmant = machar.it  # 尾数位数
        self._machar = machar  # 保留 machar 对象的引用
        self._str_tiny = machar._str_xmin.strip()  # 最小正数的字符串表示
        self._str_max = machar._str_xmax.strip()  # 最大数的字符串表示
        self._str_epsneg = machar._str_epsneg.strip()  # 负的机器精度的字符串表示
        self._str_eps = machar._str_eps.strip()  # 机器精度的字符串表示
        self._str_resolution = machar._str_resolution.strip()  # 分辨率的字符串表示
        self._str_smallest_normal = machar._str_smallest_normal.strip()  # 最小正常数的字符串表示
        self._str_smallest_subnormal = machar._str_smallest_subnormal.strip()  # 最小非正常数的字符串表示
        return self  # 返回初始化后的对象

    # 返回对象的字符串表示形式
    def __str__(self):
        # 格式化字符串，显示对象的机器参数
        fmt = (
            'Machine parameters for %(dtype)s\n'
            '---------------------------------------------------------------\n'
            'precision = %(precision)3s   resolution = %(_str_resolution)s\n'
            'machep = %(machep)6s   eps =        %(_str_eps)s\n'
            'negep =  %(negep)6s   epsneg =     %(_str_epsneg)s\n'
            'minexp = %(minexp)6s   tiny =       %(_str_tiny)s\n'
            'maxexp = %(maxexp)6s   max =        %(_str_max)s\n'
            'nexp =   %(nexp)6s   min =        -max\n'
            'smallest_normal = %(_str_smallest_normal)s   '
            'smallest_subnormal = %(_str_smallest_subnormal)s\n'
            '---------------------------------------------------------------\n'
            )
        return fmt % self.__dict__  # 使用对象的字典属性进行格式化并返回字符串

    # 返回对象的官方字符串表示形式
    def __repr__(self):
        c = self.__class__.__name__  # 获取类名
        d = self.__dict__.copy()  # 复制对象的字典属性
        d['klass'] = c  # 添加类名属性到字典中
        return (("%(klass)s(resolution=%(resolution)s, min=-%(_str_max)s,"
                 " max=%(_str_max)s, dtype=%(dtype)s)") % d)  # 返回格式化后的官方字符串表示形式

    # 计算属性，返回最小正常数的值
    @property
    def smallest_normal(self):
        """Return the value for the smallest normal.

        Returns
        -------
        smallest_normal : float
            Value for the smallest normal.

        Warns
        -----
        UserWarning
            If the calculated value for the smallest normal is requested for
            double-double.
        """
        # 检查最小正常数是否对于 double-double 类型是未定义的
        if isnan(self._machar.smallest_normal.flat[0]):
            # 如果是，发出警告
            warnings.warn(
                'The value of smallest normal is undefined for double double',
                UserWarning, stacklevel=2)
        return self._machar.smallest_normal.flat[0]  # 返回最小正常数的值
    def tiny(self):
        """
        返回 tiny 的值，它是 smallest_normal 的别名。

        Returns
        -------
        tiny : float
            最小正常值的值，即 smallest_normal 的别名。

        Warns
        -----
        UserWarning
            如果请求了双倍精度 (double-double) 的最小正常值计算结果。
        """
        return self.smallest_normal
@set_module('numpy')
class iinfo:
    """
    iinfo(type)

    Machine limits for integer types.

    Attributes
    ----------
    bits : int
        The number of bits occupied by the type.
    dtype : dtype
        Returns the dtype for which `iinfo` returns information.
    min : int
        The smallest integer expressible by the type.
    max : int
        The largest integer expressible by the type.

    Parameters
    ----------
    int_type : integer type, dtype, or instance
        The kind of integer data type to get information about.

    See Also
    --------
    finfo : The equivalent for floating point data types.

    Examples
    --------
    With types:

    >>> ii16 = np.iinfo(np.int16)
    >>> ii16.min
    -32768
    >>> ii16.max
    32767
    >>> ii32 = np.iinfo(np.int32)
    >>> ii32.min
    -2147483648
    >>> ii32.max
    2147483647

    With instances:

    >>> ii32 = np.iinfo(np.int32(10))
    >>> ii32.min
    -2147483648
    >>> ii32.max
    2147483647

    """

    _min_vals = {}  # 存储已计算的最小值的缓存字典
    _max_vals = {}  # 存储已计算的最大值的缓存字典

    def __init__(self, int_type):
        try:
            self.dtype = numeric.dtype(int_type)  # 获取输入类型的数据类型
        except TypeError:
            self.dtype = numeric.dtype(type(int_type))  # 获取输入类型的数据类型
        self.kind = self.dtype.kind  # 获取数据类型的种类标识符
        self.bits = self.dtype.itemsize * 8  # 计算数据类型所占比特数
        self.key = "%s%d" % (self.kind, self.bits)  # 生成用于缓存的键
        if self.kind not in 'iu':  # 如果数据类型标识符不是无符号整数或有符号整数
            raise ValueError("Invalid integer data type %r." % (self.kind,))

    @property
    def min(self):
        """Minimum value of given dtype."""
        if self.kind == 'u':  # 如果是无符号整数类型
            return 0  # 返回最小值为0
        else:
            try:
                val = iinfo._min_vals[self.key]  # 尝试从缓存中获取最小值
            except KeyError:
                val = int(-(1 << (self.bits-1)))  # 计算有符号整数类型的最小值
                iinfo._min_vals[self.key] = val  # 将计算结果存入缓存
            return val  # 返回最小值

    @property
    def max(self):
        """Maximum value of given dtype."""
        try:
            val = iinfo._max_vals[self.key]  # 尝试从缓存中获取最大值
        except KeyError:
            if self.kind == 'u':  # 如果是无符号整数类型
                val = int((1 << self.bits) - 1)  # 计算无符号整数类型的最大值
            else:
                val = int((1 << (self.bits-1)) - 1)  # 计算有符号整数类型的最大值
            iinfo._max_vals[self.key] = val  # 将计算结果存入缓存
        return val  # 返回最大值

    def __str__(self):
        """String representation."""
        fmt = (
            'Machine parameters for %(dtype)s\n'
            '---------------------------------------------------------------\n'
            'min = %(min)s\n'
            'max = %(max)s\n'
            '---------------------------------------------------------------\n'
            )
        return fmt % {'dtype': self.dtype, 'min': self.min, 'max': self.max}  # 返回对象的字符串表示形式

    def __repr__(self):
        return "%s(min=%s, max=%s, dtype=%s)" % (self.__class__.__name__,
                                    self.min, self.max, self.dtype)  # 返回对象的详细字符串表示形式

`.\numpy\numpy\_core\getlimits.pyi`

# 从 numpy 模块中导入特定名称的函数和对象，并使用 as 关键字进行重命名
from numpy import (
    finfo as finfo,  # 导入 finfo 函数并重命名为 finfo
    iinfo as iinfo,  # 导入 iinfo 函数并重命名为 iinfo
)

# 定义一个列表类型的全局变量 __all__，用于指定在使用 from ... import * 时导入的符号（symbols）
__all__: list[str]

`.\numpy\numpy\_core\include\numpy\arrayobject.h`

#ifndef NUMPY_CORE_INCLUDE_NUMPY_ARRAYOBJECT_H_
#define NUMPY_CORE_INCLUDE_NUMPY_ARRAYOBJECT_H_
#define Py_ARRAYOBJECT_H

#include "ndarrayobject.h"

#endif  /* NUMPY_CORE_INCLUDE_NUMPY_ARRAYOBJECT_H_ */

`.\numpy\numpy\_core\include\numpy\arrayscalars.h`

#ifndef NUMPY_CORE_INCLUDE_NUMPY_ARRAYSCALARS_H_
#define NUMPY_CORE_INCLUDE_NUMPY_ARRAYSCALARS_H_

#ifndef _MULTIARRAYMODULE
// 定义一个布尔类型的标量对象结构
typedef struct {
        PyObject_HEAD
        npy_bool obval;
} PyBoolScalarObject;
#endif


// 定义一个有符号字符类型的标量对象结构
typedef struct {
        PyObject_HEAD
        signed char obval;
} PyByteScalarObject;


// 定义一个短整型类型的标量对象结构
typedef struct {
        PyObject_HEAD
        short obval;
} PyShortScalarObject;


// 定义一个整型类型的标量对象结构
typedef struct {
        PyObject_HEAD
        int obval;
} PyIntScalarObject;


// 定义一个长整型类型的标量对象结构
typedef struct {
        PyObject_HEAD
        long obval;
} PyLongScalarObject;


// 定义一个长长整型类型的标量对象结构
typedef struct {
        PyObject_HEAD
        npy_longlong obval;
} PyLongLongScalarObject;


// 定义一个无符号字符类型的标量对象结构
typedef struct {
        PyObject_HEAD
        unsigned char obval;
} PyUByteScalarObject;


// 定义一个无符号短整型类型的标量对象结构
typedef struct {
        PyObject_HEAD
        unsigned short obval;
} PyUShortScalarObject;


// 定义一个无符号整型类型的标量对象结构
typedef struct {
        PyObject_HEAD
        unsigned int obval;
} PyUIntScalarObject;


// 定义一个无符号长整型类型的标量对象结构
typedef struct {
        PyObject_HEAD
        unsigned long obval;
} PyULongScalarObject;


// 定义一个无符号长长整型类型的标量对象结构
typedef struct {
        PyObject_HEAD
        npy_ulonglong obval;
} PyULongLongScalarObject;


// 定义一个半精度浮点数类型的标量对象结构
typedef struct {
        PyObject_HEAD
        npy_half obval;
} PyHalfScalarObject;


// 定义一个单精度浮点数类型的标量对象结构
typedef struct {
        PyObject_HEAD
        float obval;
} PyFloatScalarObject;


// 定义一个双精度浮点数类型的标量对象结构
typedef struct {
        PyObject_HEAD
        double obval;
} PyDoubleScalarObject;


// 定义一个长双精度浮点数类型的标量对象结构
typedef struct {
        PyObject_HEAD
        npy_longdouble obval;
} PyLongDoubleScalarObject;


// 定义一个复数-单精度浮点数类型的标量对象结构
typedef struct {
        PyObject_HEAD
        npy_cfloat obval;
} PyCFloatScalarObject;


// 定义一个复数-双精度浮点数类型的标量对象结构
typedef struct {
        PyObject_HEAD
        npy_cdouble obval;
} PyCDoubleScalarObject;


// 定义一个复数-长双精度浮点数类型的标量对象结构
typedef struct {
        PyObject_HEAD
        npy_clongdouble obval;
} PyCLongDoubleScalarObject;


// 定义一个通用对象类型的标量对象结构
typedef struct {
        PyObject_HEAD
        PyObject * obval;
} PyObjectScalarObject;

// 定义一个日期时间类型的标量对象结构
typedef struct {
        PyObject_HEAD
        npy_datetime obval;
        PyArray_DatetimeMetaData obmeta;
} PyDatetimeScalarObject;

// 定义一个时间间隔类型的标量对象结构
typedef struct {
        PyObject_HEAD
        npy_timedelta obval;
        PyArray_DatetimeMetaData obmeta;
} PyTimedeltaScalarObject;


// 定义一个字符类型的标量对象结构
typedef struct {
        PyObject_HEAD
        char obval;
} PyScalarObject;

// 将字符串标量对象定义为字节对象（PyBytesObject）
#define PyStringScalarObject PyBytesObject

#ifndef Py_LIMITED_API
// 定义一个Unicode字符串类型的标量对象结构
typedef struct {
        /* 注意：PyObject_HEAD 宏定义位于此处 */
        PyUnicodeObject base;
        Py_UCS4 *obval;
    #if NPY_FEATURE_VERSION >= NPY_1_20_API_VERSION
        char *buffer_fmt;
    #endif
} PyUnicodeScalarObject;
#endif


// 定义一个变长字符串类型的标量对象结构
typedef struct {
        PyObject_VAR_HEAD
        char *obval;
#if defined(NPY_INTERNAL_BUILD) && NPY_INTERNAL_BUILD
        /* 在内部使用子类允许访问名称/字段 */
        _PyArray_LegacyDescr *descr;
#else
        PyArray_Descr *descr;
#endif
        int flags;
        PyObject *base;
    #if NPY_FEATURE_VERSION >= NPY_1_20_API_VERSION
        void *_buffer_info;  /* 私有缓冲区信息，标记以允许警告 */
    #endif
/* 定义了一些宏和结构体用于处理 NumPy 中的标量对象 */

} PyVoidScalarObject;

/* 宏定义部分 */

/* 定义了返回 NumPy 中布尔标量对象 False 的宏 */
#define PyArrayScalar_False ((PyObject *)(&(_PyArrayScalar_BoolValues[0])))

/* 定义了返回 NumPy 中布尔标量对象 True 的宏 */
#define PyArrayScalar_True ((PyObject *)(&(_PyArrayScalar_BoolValues[1])))

/* 定义了根据长整型值 i 返回对应 NumPy 中标量对象的宏 */
#define PyArrayScalar_FromLong(i) \
        ((PyObject *)(&(_PyArrayScalar_BoolValues[((i)!=0)])))

/* 定义了根据长整型值 i 返回对应 NumPy 中标量对象并增加引用计数的宏 */
#define PyArrayScalar_RETURN_BOOL_FROM_LONG(i)                  \
        return Py_INCREF(PyArrayScalar_FromLong(i)), \
                PyArrayScalar_FromLong(i)

/* 定义了返回 NumPy 中布尔标量对象 False 并增加引用计数的宏 */
#define PyArrayScalar_RETURN_FALSE              \
        return Py_INCREF(PyArrayScalar_False),  \
                PyArrayScalar_False

/* 定义了返回 NumPy 中布尔标量对象 True 并增加引用计数的宏 */
#define PyArrayScalar_RETURN_TRUE               \
        return Py_INCREF(PyArrayScalar_True),   \
                PyArrayScalar_True

/* 定义了根据类名 cls 动态生成对应的新的 NumPy 标量对象的宏 */
#define PyArrayScalar_New(cls) \
        Py##cls##ArrType_Type.tp_alloc(&Py##cls##ArrType_Type, 0)

#ifndef Py_LIMITED_API
/* 非限制 API 情况下，使用 PyArrayScalar_VAL 宏获取标量对象的值 */
#define PyArrayScalar_VAL(obj, cls)             \
        ((Py##cls##ScalarObject *)obj)->obval

/* 非限制 API 情况下，使用 PyArrayScalar_ASSIGN 宏给标量对象赋值 */
#define PyArrayScalar_ASSIGN(obj, cls, val) \
        PyArrayScalar_VAL(obj, cls) = val
#endif

#endif  /* NUMPY_CORE_INCLUDE_NUMPY_ARRAYSCALARS_H_ */

`.\numpy\numpy\_core\include\numpy\dtype_api.h`

/*
 * The public DType API
 */

#ifndef NUMPY_CORE_INCLUDE_NUMPY___DTYPE_API_H_
#define NUMPY_CORE_INCLUDE_NUMPY___DTYPE_API_H_

// 定义 PyArrayMethodObject_tag 结构体
struct PyArrayMethodObject_tag;

/*
 * Largely opaque struct for DType classes (i.e. metaclass instances).
 * The internal definition is currently in `ndarraytypes.h` (export is a bit
 * more complex because `PyArray_Descr` is a DTypeMeta internally but not
 * externally).
 */
#if !(defined(NPY_INTERNAL_BUILD) && NPY_INTERNAL_BUILD)

#ifndef Py_LIMITED_API

// 定义 PyArray_DTypeMeta_tag 结构体
typedef struct PyArray_DTypeMeta_tag {
    // 继承自 PyHeapTypeObject
    PyHeapTypeObject super;

    /*
    * Most DTypes will have a singleton default instance, for the
    * parametric legacy DTypes (bytes, string, void, datetime) this
    * may be a pointer to the *prototype* instance?
    */
    // 单例实例指针，用于大多数 DTypes，默认实例
    PyArray_Descr *singleton;
    // 复制传统 DTypes 的类型编号，通常无效
    int type_num;

    // 标量实例的类型对象（可能为 NULL）
    // DType 实例的标量类型对象
    PyTypeObject *scalar_type;
    /*
    * DType flags to signal legacy, parametric, or
    * abstract.  But plenty of space for additional information/flags.
    */
    // DType 标志，指示传统、参数化或抽象
    npy_uint64 flags;

    /*
    * Use indirection in order to allow a fixed size for this struct.
    * A stable ABI size makes creating a static DType less painful
    * while also ensuring flexibility for all opaque API (with one
    * indirection due the pointer lookup).
    */
    // 允许固定大小结构体的间接引用
    void *dt_slots;
    // 保留字段，允许增长（当前也超出此范围）
    void *reserved[3];
} PyArray_DTypeMeta;

#else

// 限制 API 的情况下，将 PyArray_DTypeMeta 视为 PyTypeObject
typedef PyTypeObject PyArray_DTypeMeta;

#endif /* Py_LIMITED_API */

#endif  /* not internal build */

/*
 * ******************************************************
 *         ArrayMethod API (Casting and UFuncs)
 * ******************************************************
 */

// 定义枚举类型，用于描述 ArrayMethod 的属性
typedef enum {
    /* Flag for whether the GIL is required */
    NPY_METH_REQUIRES_PYAPI = 1 << 0,
    /*
     * Some functions cannot set floating point error flags, this flag
     * gives us the option (not requirement) to skip floating point error
     * setup/check. No function should set error flags and ignore them
     * since it would interfere with chaining operations (e.g. casting).
     */
    // 指示是否需要 GIL
    NPY_METH_NO_FLOATINGPOINT_ERRORS = 1 << 1,
    /* Whether the method supports unaligned access (not runtime) */
    // 方法是否支持非对齐访问
    NPY_METH_SUPPORTS_UNALIGNED = 1 << 2,
    /*
     * Used for reductions to allow reordering the operation.  At this point
     * assume that if set, it also applies to normal operations though!
     */
    // 用于允许重排序操作的标志
    NPY_METH_IS_REORDERABLE = 1 << 3,
    /*
     * Private flag for now for *logic* functions.  The logical functions
     * `logical_or` and `logical_and` can always cast the inputs to booleans
     * "safely" (because that is how the cast to bool is defined).
     * @seberg: I am not sure this is the best way to handle this, so its
     * private for now (also it is very limited anyway).
     * There is one "exception". NA aware dtypes cannot cast to bool
     * (hopefully), so the `??->?` loop should error even with this flag.
     * But a second NA fallback loop will be necessary.
     */
    _NPY_METH_FORCE_CAST_INPUTS = 1 << 17,
    
    /* All flags which can change at runtime */
    NPY_METH_RUNTIME_FLAGS = (
            NPY_METH_REQUIRES_PYAPI |
            NPY_METH_NO_FLOATINGPOINT_ERRORS),
    
    
    注释：
    
    
    # 定义一个私有标志位，目前仅用于逻辑函数。逻辑函数 `logical_or` 和 `logical_and` 可以安全地将输入强制转换为布尔值
    # （因为这是 bool 强制转换的定义方式）。
    # @seberg: 我不确定这是否是处理此问题的最佳方式，因此目前将其设置为私有（而且其功能也非常有限）。
    # 有一个“例外情况”。对 NA 意识到的数据类型不能转换为布尔值（希望如此），因此即使有此标志，`??->?` 循环也应出错。
    # 但是第二个 NA 回退循环将是必要的。
    _NPY_METH_FORCE_CAST_INPUTS = 1 << 17,
    
    # 所有可能在运行时更改的标志位
    NPY_METH_RUNTIME_FLAGS = (
            NPY_METH_REQUIRES_PYAPI |
            NPY_METH_NO_FLOATINGPOINT_ERRORS),
/*
 * 结构体定义：NPY_ARRAYMETHOD_FLAGS
 * ------------------------------
 * 描述一个数组方法的标志位集合。
 */
} NPY_ARRAYMETHOD_FLAGS;


/*
 * 结构体定义：PyArrayMethod_Context_tag
 * ------------------------------------
 * 描述数组方法的上下文信息。
 */
typedef struct PyArrayMethod_Context_tag {
    /* 调用者，通常是原始的通用函数。可能为NULL */
    PyObject *caller;
    /* 方法的"self"对象，目前是一个不透明对象 */
    struct PyArrayMethodObject_tag *method;

    /* 操作数描述符，在resolve_descriptors函数中填充 */
    PyArray_Descr *const *descriptors;
    /* 结构体可能会扩展（对DType作者无害） */
} PyArrayMethod_Context;


/*
 * 主要对象：PyArrayMethod_Spec
 * ----------------------------
 * 用于创建新数组方法的主要对象。使用Python有限API中的典型“slots”机制。
 */
typedef struct {
    const char *name;
    int nin, nout;
    NPY_CASTING casting;
    NPY_ARRAYMETHOD_FLAGS flags;
    PyArray_DTypeMeta **dtypes;
    PyType_Slot *slots;
} PyArrayMethod_Spec;


/*
 * 数组方法的槽位定义
 * -----------------
 *
 * 数组方法的创建槽位ID。一旦完全公开，ID将固定，但可以弃用和任意扩展。
 */
#define _NPY_METH_resolve_descriptors_with_scalars 1
#define NPY_METH_resolve_descriptors 2
#define NPY_METH_get_loop 3
#define NPY_METH_get_reduction_initial 4
/* 用于构造/默认get_loop的特定循环： */
#define NPY_METH_strided_loop 5
#define NPY_METH_contiguous_loop 6
#define NPY_METH_unaligned_strided_loop 7
#define NPY_METH_unaligned_contiguous_loop 8
#define NPY_METH_contiguous_indexed_loop 9
#define _NPY_METH_static_data 10


/*
 * 解析描述符函数
 * ---------------
 *
 * 必须能够处理所有输出（但不是输入）的NULL值，并填充loop_descrs。
 * 如果操作不可能无错误地执行，则返回-1；如果没有错误设置，则返回0。
 * 对于正常函数，几乎总是返回"safe"（或者"equivalent"）。
 *
 * 如果所有输出DType都是非参数化的，则resolve_descriptors函数是可选的。
 */
typedef NPY_CASTING (PyArrayMethod_ResolveDescriptors)(
        /* "method"目前是不透明的（例如在Python中包装时必需）。 */
        struct PyArrayMethodObject_tag *method,
        /* 方法创建时使用的DTypes */
        PyArray_DTypeMeta *const *dtypes,
        /* 输入描述符（实例）。输出可能为NULL。 */
        PyArray_Descr *const *given_descrs,
        /* 必须在错误时不持有引用的确切循环描述符 */
        PyArray_Descr **loop_descrs,
        npy_intp *view_offset);


/*
 * 很少需要的、稍微更强大版本的resolve_descriptors函数。
 * 详细信息请参见`PyArrayMethod_ResolveDescriptors`。
 *
 * 注意：此函数现在是私有的，因为不清楚如何以及确切传递额外信息以处理标量。
 * 参见gh-24915。
 */
/**
 * Define a typedef for a function resolving descriptors with a scalar, given a
 * PyArrayMethodObject, an array of dtype meta pointers, an array of possibly
 * NULL descriptors, an array of input scalars or NULL, an array of loop
 * descriptors, and a view offset.
 *
 * @param method The PyArrayMethodObject for which descriptors are being resolved.
 * @param dtypes An array of dtype meta pointers for the method.
 * @param given_descrs An array of possibly NULL descriptors.
 * @param input_scalars An array of input scalars or NULL.
 * @param loop_descrs An array of loop descriptors to be filled.
 * @param view_offset A pointer to the view offset value.
 * @returns NPY_CASTING indicating the casting method.
 */
typedef NPY_CASTING (PyArrayMethod_ResolveDescriptorsWithScalar)(
        struct PyArrayMethodObject_tag *method,
        PyArray_DTypeMeta *const *dtypes,
        PyArray_Descr *const *given_descrs,
        PyObject *const *input_scalars,
        PyArray_Descr **loop_descrs,
        npy_intp *view_offset);



/**
 * Define a typedef for a strided loop function, taking a PyArrayMethod_Context
 * pointer, a data pointer array, dimension sizes, stride sizes, and transfer
 * data.
 *
 * @param context The PyArrayMethod_Context containing loop information.
 * @param data An array of pointers to data arrays for the strided loop.
 * @param dimensions An array of dimension sizes.
 * @param strides An array of stride sizes.
 * @param transferdata The transfer data for the strided loop.
 * @returns An integer indicating success or failure of the strided loop.
 */
typedef int (PyArrayMethod_StridedLoop)(PyArrayMethod_Context *context,
        char *const *data, const npy_intp *dimensions, const npy_intp *strides,
        NpyAuxData *transferdata);


/**
 * Define a typedef for a function getting a loop from an ArrayMethod_Context,
 * considering alignment, move references, strides, loop function pointer,
 * transfer data, and method flags.
 *
 * @param context The PyArrayMethod_Context containing method context.
 * @param aligned Whether the loop should be aligned.
 * @param move_references Whether references should be moved.
 * @param strides An array of stride sizes.
 * @param out_loop The output pointer to the strided loop function.
 * @param out_transferdata The output pointer to the transfer data.
 * @param flags The method flags for the function.
 * @returns An integer indicating success or failure in getting the loop.
 */
typedef int (PyArrayMethod_GetLoop)(
        PyArrayMethod_Context *context,
        int aligned, int move_references,
        const npy_intp *strides,
        PyArrayMethod_StridedLoop **out_loop,
        NpyAuxData **out_transferdata,
        NPY_ARRAYMETHOD_FLAGS *flags);

/**
 * Query an ArrayMethod for the initial value for use in reduction, considering
 * context, reduction status, and initial data to be filled.
 *
 * @param context The PyArrayMethod_Context for accessing descriptors.
 * @param reduction_is_empty Whether the reduction is empty.
 * @param initial Pointer to initial data to be filled if possible.
 * @returns -1, 0, or 1 indicating error, no initial value, or successful
 *          initialization.
 */
typedef int (PyArrayMethod_GetReductionInitial)(
        PyArrayMethod_Context *context, npy_bool reduction_is_empty,
        void *initial);

/*
 * The following functions are only used by the wrapping array method defined
 * in umath/wrapping_array_method.c
 */
/*
 * The function to convert the given descriptors (passed in to
 * `resolve_descriptors`) and translates them for the wrapped loop.
 * The new descriptors MUST be viewable with the old ones, `NULL` must be
 * supported (for outputs) and should normally be forwarded.
 *
 * The function must clean up on error.
 *
 * NOTE: We currently assume that this translation gives "viewable" results.
 *       I.e. there is no additional casting related to the wrapping process.
 *       In principle that could be supported, but not sure it is useful.
 *       This currently also means that e.g. alignment must apply identically
 *       to the new dtypes.
 *
 * TODO: Due to the fact that `resolve_descriptors` is also used for `can_cast`
 *       there is no way to "pass out" the result of this function.  This means
 *       it will be called twice for every ufunc call.
 *       (I am considering including `auxdata` as an "optional" parameter to
 *       `resolve_descriptors`, so that it can be filled there if not NULL.)
 */
typedef int (PyArrayMethod_TranslateGivenDescriptors)(int nin, int nout,
        PyArray_DTypeMeta *const wrapped_dtypes[],
        PyArray_Descr *const given_descrs[], PyArray_Descr *new_descrs[]);

/**
 * The function to convert the actual loop descriptors (as returned by the
 * original `resolve_descriptors` function) to the ones the output array
 * should use.
 * This function must return "viewable" types, it must not mutate them in any
 * form that would break the inner-loop logic.  Does not need to support NULL.
 *
 * The function must clean up on error.
 *
 * @param nargs Number of arguments
 * @param new_dtypes The DTypes of the output (usually probably not needed)
 * @param given_descrs Original given_descrs to the resolver, necessary to
 *        fetch any information related to the new dtypes from the original.
 * @param original_descrs The `loop_descrs` returned by the wrapped loop.
 * @param loop_descrs The output descriptors, compatible to `original_descrs`.
 *
 * @returns 0 on success, -1 on failure.
 */
typedef int (PyArrayMethod_TranslateLoopDescriptors)(int nin, int nout,
        PyArray_DTypeMeta *const new_dtypes[], PyArray_Descr *const given_descrs[],
        PyArray_Descr *original_descrs[], PyArray_Descr *loop_descrs[]);
/*
 * A traverse loop working on a single array. This is similar to the general
 * strided-loop function. This is designed for loops that need to visit every
 * element of a single array.
 *
 * Currently this is used for array clearing, via the NPY_DT_get_clear_loop
 * API hook, and zero-filling, via the NPY_DT_get_fill_zero_loop API hook.
 * These are most useful for handling arrays storing embedded references to
 * python objects or heap-allocated data.
 *
 * The `void *traverse_context` is passed in because we may need to pass in
 * Interpreter state or similar in the future, but we don't want to pass in
 * a full context (with pointers to dtypes, method, caller which all make
 * no sense for a traverse function).
 *
 * We assume for now that this context can be just passed through in the
 * the future (for structured dtypes).
 *
 */
typedef int (PyArrayMethod_TraverseLoop)(
        void *traverse_context, const PyArray_Descr *descr, char *data,
        npy_intp size, npy_intp stride, NpyAuxData *auxdata);


/*
 * Simplified get_loop function specific to dtype traversal
 *
 * It should set the flags needed for the traversal loop and set out_loop to the
 * loop function, which must be a valid PyArrayMethod_TraverseLoop
 * pointer. Currently this is used for zero-filling and clearing arrays storing
 * embedded references.
 *
 */
typedef int (PyArrayMethod_GetTraverseLoop)(
        void *traverse_context, const PyArray_Descr *descr,
        int aligned, npy_intp fixed_stride,
        PyArrayMethod_TraverseLoop **out_loop, NpyAuxData **out_auxdata,
        NPY_ARRAYMETHOD_FLAGS *flags);


/*
 * Type of the C promoter function, which must be wrapped into a
 * PyCapsule with name "numpy._ufunc_promoter".
 *
 * Note that currently the output dtypes are always NULL unless they are
 * also part of the signature. This is an implementation detail and could
 * change in the future. However, in general promoters should not have a
 * need for output dtypes.
 * (There are potential use-cases, these are currently unsupported.)
 */
typedef int (PyArrayMethod_PromoterFunction)(PyObject *ufunc,
        PyArray_DTypeMeta *const op_dtypes[], PyArray_DTypeMeta *const signature[],
        PyArray_DTypeMeta *new_op_dtypes[]);

/*
 * ****************************
 *          DTYPE API
 * ****************************
 */

#define NPY_DT_ABSTRACT 1 << 1
// 标志位，表示数据类型是抽象的
#define NPY_DT_PARAMETRIC 1 << 2
// 标志位，表示数据类型是参数化的
#define NPY_DT_NUMERIC 1 << 3
// 标志位，表示数据类型是数值类型

/*
 * These correspond to slots in the NPY_DType_Slots struct and must
 * be in the same order as the members of that struct. If new slots
 * get added or old slots get removed NPY_NUM_DTYPE_SLOTS must also
 * be updated
 */

#define NPY_DT_discover_descr_from_pyobject 1
// 数据类型 API 中的槽位，用于从 Python 对象中发现描述符
// 此槽位被视为私有，因为其 API 尚未确定
#define _NPY_DT_is_known_scalar_type 2
// 槽位，用于确定标量类型是否已知
#define NPY_DT_default_descr 3
// 槽位，用于获取默认描述符
#define NPY_DT_common_dtype 4
// 槽位，用于获取通用数据类型
#define NPY_DT_common_instance 5
// 槽位，用于获取通用实例
#define NPY_DT_ensure_canonical 6
// 槽位，用于确保规范化
#define NPY_DT_setitem 7
// 槽位，用于设置项目
#define NPY_DT_getitem 8
// 槽位，用于获取项目
// 定义常量 NPY_DT_get_clear_loop 的值为 9
#define NPY_DT_get_clear_loop 9

// 定义常量 NPY_DT_get_fill_zero_loop 的值为 10
#define NPY_DT_get_fill_zero_loop 10

// 定义常量 NPY_DT_finalize_descr 的值为 11
#define NPY_DT_finalize_descr 11

// 这些 PyArray_ArrFunc 槽位将会被弃用并最终替换
// getitem 和 setitem 可以作为性能优化定义;
// 默认情况下，用户自定义的数据类型调用 `legacy_getitem_using_DType`
// 和 `legacy_setitem_using_DType`，分别对应获取和设置操作。此功能仅支持基本的 NumPy 数据类型。

// 用于将 dtype 的槽位与 arrfuncs 的槽位分隔开来
// 本意仅用于内部使用，但在此处定义以增加清晰度
#define _NPY_DT_ARRFUNCS_OFFSET (1 << 10)

// 禁用 Cast 操作
// #define NPY_DT_PyArray_ArrFuncs_cast 0 + _NPY_DT_ARRFUNCS_OFFSET

// 定义常量 NPY_DT_PyArray_ArrFuncs_getitem 的值为 1 + _NPY_DT_ARRFUNCS_OFFSET
#define NPY_DT_PyArray_ArrFuncs_getitem 1 + _NPY_DT_ARRFUNCS_OFFSET

// 定义常量 NPY_DT_PyArray_ArrFuncs_setitem 的值为 2 + _NPY_DT_ARRFUNCS_OFFSET
#define NPY_DT_PyArray_ArrFuncs_setitem 2 + _NPY_DT_ARRFUNCS_OFFSET

// 禁用 Copyswap 操作
// #define NPY_DT_PyArray_ArrFuncs_copyswapn 3 + _NPY_DT_ARRFUNCS_OFFSET
// #define NPY_DT_PyArray_ArrFuncs_copyswap 4 + _NPY_DT_ARRFUNCS_OFFSET

// 定义常量 NPY_DT_PyArray_ArrFuncs_compare 的值为 5 + _NPY_DT_ARRFUNCS_OFFSET
#define NPY_DT_PyArray_ArrFuncs_compare 5 + _NPY_DT_ARRFUNCS_OFFSET

// 定义常量 NPY_DT_PyArray_ArrFuncs_argmax 的值为 6 + _NPY_DT_ARRFUNCS_OFFSET
#define NPY_DT_PyArray_ArrFuncs_argmax 6 + _NPY_DT_ARRFUNCS_OFFSET

// 定义常量 NPY_DT_PyArray_ArrFuncs_dotfunc 的值为 7 + _NPY_DT_ARRFUNCS_OFFSET
#define NPY_DT_PyArray_ArrFuncs_dotfunc 7 + _NPY_DT_ARRFUNCS_OFFSET

// 定义常量 NPY_DT_PyArray_ArrFuncs_scanfunc 的值为 8 + _NPY_DT_ARRFUNCS_OFFSET
#define NPY_DT_PyArray_ArrFuncs_scanfunc 8 + _NPY_DT_ARRFUNCS_OFFSET

// 定义常量 NPY_DT_PyArray_ArrFuncs_fromstr 的值为 9 + _NPY_DT_ARRFUNCS_OFFSET
#define NPY_DT_PyArray_ArrFuncs_fromstr 9 + _NPY_DT_ARRFUNCS_OFFSET

// 定义常量 NPY_DT_PyArray_ArrFuncs_nonzero 的值为 10 + _NPY_DT_ARRFUNCS_OFFSET
#define NPY_DT_PyArray_ArrFuncs_nonzero 10 + _NPY_DT_ARRFUNCS_OFFSET

// 定义常量 NPY_DT_PyArray_ArrFuncs_fill 的值为 11 + _NPY_DT_ARRFUNCS_OFFSET
#define NPY_DT_PyArray_ArrFuncs_fill 11 + _NPY_DT_ARRFUNCS_OFFSET

// 定义常量 NPY_DT_PyArray_ArrFuncs_fillwithscalar 的值为 12 + _NPY_DT_ARRFUNCS_OFFSET
#define NPY_DT_PyArray_ArrFuncs_fillwithscalar 12 + _NPY_DT_ARRFUNCS_OFFSET

// 定义常量 NPY_DT_PyArray_ArrFuncs_sort 的值为 13 + _NPY_DT_ARRFUNCS_OFFSET
#define NPY_DT_PyArray_ArrFuncs_sort 13 + _NPY_DT_ARRFUNCS_OFFSET

// 定义常量 NPY_DT_PyArray_ArrFuncs_argsort 的值为 14 + _NPY_DT_ARRFUNCS_OFFSET
#define NPY_DT_PyArray_ArrFuncs_argsort 14 + _NPY_DT_ARRFUNCS_OFFSET

// Casting 相关的槽位被禁用。参考
// https://github.com/numpy/numpy/pull/23173#discussion_r1101098163
// #define NPY_DT_PyArray_ArrFuncs_castdict 15 + _NPY_DT_ARRFUNCS_OFFSET
// #define NPY_DT_PyArray_ArrFuncs_scalarkind 16 + _NPY_DT_ARRFUNCS_OFFSET
// #define NPY_DT_PyArray_ArrFuncs_cancastscalarkindto 17 + _NPY_DT_ARRFUNCS_OFFSET
// #define NPY_DT_PyArray_ArrFuncs_cancastto 18 + _NPY_DT_ARRFUNCS_OFFSET

// 这些在 NumPy 1.19 中已被弃用，因此在此处被禁用
// #define NPY_DT_PyArray_ArrFuncs_fastclip 19 + _NPY_DT_ARRFUNCS_OFFSET
// #define NPY_DT_PyArray_ArrFuncs_fastputmask 20 + _NPY_DT_ARRFUNCS_OFFSET
// #define NPY_DT_PyArray_ArrFuncs_fasttake 21 + _NPY_DT_ARRFUNCS_OFFSET

// 定义常量 NPY_DT_PyArray_ArrFuncs_argmin 的值为 22 + _NPY_DT_ARRFUNCS_OFFSET
#define NPY_DT_PyArray_ArrFuncs_argmin 22 + _NPY_DT_ARRFUNCS_OFFSET

// TODO: 这些槽位可能仍需要进一步思考，或者有一种“增长”的方式？
// 定义结构体 PyArrayDTypeMeta_Spec，描述了数组数据类型元信息
typedef struct {
    PyTypeObject *typeobj;    /* Python 标量的类型或者为 NULL */
    int flags;                /* 标志，包括参数化和抽象 */
    /* 用于定义空结束的转换定义。对于新创建的 DType，请使用 NULL */
    PyArrayMethod_Spec **casts;
    PyType_Slot *slots;
    /* 基类或者为 NULL（将始终是 `np.dtype` 的子类） */
    PyTypeObject *baseclass;
} PyArrayDTypeMeta_Spec;
/*
 * typedef声明定义了多个函数指针类型，这些类型用于与PyArray_DTypeMeta交互。
 * 这些函数包括从Python对象中发现描述符、确定是否为已知的标量类型、获取默认描述符、
 * 确定两个数据类型的常见数据类型、获取公共实例、确保规范化描述符、最终化描述符。
 */

/*
 * Convenience utility for getting a reference to the DType metaclass associated
 * with a dtype instance.
 */
#define NPY_DTYPE(descr) ((PyArray_DTypeMeta *)Py_TYPE(descr))

/*
 * 定义了一个静态内联函数NPY_DT_NewRef，用于增加PyArray_DTypeMeta实例的引用计数，
 * 并返回该实例的指针。
 */

/*
 * 结束了头文件的条件编译指令，防止重复包含NUMPY_CORE_INCLUDE_NUMPY___DTYPE_API_H_宏定义的头文件内容。
 */
#endif  /* NUMPY_CORE_INCLUDE_NUMPY___DTYPE_API_H_ */

`.\numpy\numpy\_core\include\numpy\halffloat.h`

#ifndef NUMPY_CORE_INCLUDE_NUMPY_HALFFLOAT_H_
#define NUMPY_CORE_INCLUDE_NUMPY_HALFFLOAT_H_

#include <Python.h>
#include <numpy/npy_math.h>

#ifdef __cplusplus
extern "C" {
#endif

/*
 * Half-precision routines
 */

/* 将半精度浮点数转换为单精度浮点数 */
float npy_half_to_float(npy_half h);
/* 将半精度浮点数转换为双精度浮点数 */
double npy_half_to_double(npy_half h);
/* 将单精度浮点数转换为半精度浮点数 */
npy_half npy_float_to_half(float f);
/* 将双精度浮点数转换为半精度浮点数 */
npy_half npy_double_to_half(double d);

/* 比较操作 */
int npy_half_eq(npy_half h1, npy_half h2);      /* 比较两个半精度浮点数是否相等 */
int npy_half_ne(npy_half h1, npy_half h2);      /* 比较两个半精度浮点数是否不相等 */
int npy_half_le(npy_half h1, npy_half h2);      /* 判断第一个半精度浮点数是否小于等于第二个 */
int npy_half_lt(npy_half h1, npy_half h2);      /* 判断第一个半精度浮点数是否小于第二个 */
int npy_half_ge(npy_half h1, npy_half h2);      /* 判断第一个半精度浮点数是否大于等于第二个 */
int npy_half_gt(npy_half h1, npy_half h2);      /* 判断第一个半精度浮点数是否大于第二个 */

/* 更快的非NaN变体，当已知 h1 和 h2 都不是 NaN 时使用 */
int npy_half_eq_nonan(npy_half h1, npy_half h2);    /* 快速比较两个非NaN半精度浮点数是否相等 */
int npy_half_lt_nonan(npy_half h1, npy_half h2);    /* 快速比较第一个非NaN半精度浮点数是否小于第二个 */
int npy_half_le_nonan(npy_half h1, npy_half h2);    /* 快速比较第一个非NaN半精度浮点数是否小于等于第二个 */

/* 杂项函数 */
int npy_half_iszero(npy_half h);            /* 判断半精度浮点数是否为零 */
int npy_half_isnan(npy_half h);             /* 判断半精度浮点数是否为 NaN */
int npy_half_isinf(npy_half h);             /* 判断半精度浮点数是否为 无穷 */
int npy_half_isfinite(npy_half h);          /* 判断半精度浮点数是否有限 */
int npy_half_signbit(npy_half h);           /* 判断半精度浮点数的符号位 */
npy_half npy_half_copysign(npy_half x, npy_half y);   /* 将 x 的符号位设置为 y 的符号位 */
npy_half npy_half_spacing(npy_half h);      /* 返回相邻两个半精度浮点数之间的距离 */
npy_half npy_half_nextafter(npy_half x, npy_half y); /* 返回在 x 和 y 之间且与 x 最接近的半精度浮点数 */
npy_half npy_half_divmod(npy_half x, npy_half y, npy_half *modulus); /* 返回 x/y 的商和余数 */

/*
 * Half-precision constants
 */

#define NPY_HALF_ZERO   (0x0000u)   /* 半精度零 */
#define NPY_HALF_PZERO  (0x0000u)   /* 正半精度零 */
#define NPY_HALF_NZERO  (0x8000u)   /* 负半精度零 */
#define NPY_HALF_ONE    (0x3c00u)   /* 半精度一 */
#define NPY_HALF_NEGONE (0xbc00u)   /* 半精度负一 */
#define NPY_HALF_PINF   (0x7c00u)   /* 正无穷 */
#define NPY_HALF_NINF   (0xfc00u)   /* 负无穷 */
#define NPY_HALF_NAN    (0x7e00u)   /* NaN */

#define NPY_MAX_HALF    (0x7bffu)   /* 最大半精度浮点数 */

/*
 * Bit-level conversions
 */

npy_uint16 npy_floatbits_to_halfbits(npy_uint32 f);   /* 将单精度浮点数位表示转换为半精度浮点数位表示 */
npy_uint16 npy_doublebits_to_halfbits(npy_uint64 d);  /* 将双精度浮点数位表示转换为半精度浮点数位表示 */
npy_uint32 npy_halfbits_to_floatbits(npy_uint16 h);   /* 将半精度浮点数位表示转换为单精度浮点数位表示 */
npy_uint64 npy_halfbits_to_doublebits(npy_uint16 h);  /* 将半精度浮点数位表示转换为双精度浮点数位表示 */

#ifdef __cplusplus
}
#endif

#endif  /* NUMPY_CORE_INCLUDE_NUMPY_HALFFLOAT_H_ */

`.\numpy\numpy\_core\include\numpy\libdivide\libdivide.h`

// libdivide.h - 优化的整数除法
// https://libdivide.com
//
// 版权所有 (C) 2010 - 2019 ridiculous_fish, <libdivide@ridiculousfish.com>
// 版权所有 (C) 2016 - 2019 Kim Walisch, <kim.walisch@gmail.com>
//
// libdivide 根据 Boost 或 zlib 许可双重许可
// 您可以根据这两种许可的条款使用 libdivide。
// 更多详情请参阅 LICENSE.txt。

#ifndef NUMPY_CORE_INCLUDE_NUMPY_LIBDIVIDE_LIBDIVIDE_H_
#define NUMPY_CORE_INCLUDE_NUMPY_LIBDIVIDE_LIBDIVIDE_H_

#define LIBDIVIDE_VERSION "3.0"        // 定义 libdivide 的版本号字符串
#define LIBDIVIDE_VERSION_MAJOR 3      // 定义 libdivide 的主版本号
#define LIBDIVIDE_VERSION_MINOR 0      // 定义 libdivide 的次版本号

#include <stdint.h>                    // 包含 C 标准库的头文件，用于整数类型

#if defined(__cplusplus)
    #include <cstdlib>                 // 如果是 C++ 环境，包含 C++ 标准库的头文件
    #include <cstdio>                  // 包含 C 标准输入输出库的头文件
    #include <type_traits>             // 包含类型特性库的头文件
#else
    #include <stdlib.h>                // 如果是 C 环境，包含 C 标准库的头文件
    #include <stdio.h>                 // 包含 C 标准输入输出库的头文件
#endif

#if defined(LIBDIVIDE_AVX512)
    #include <immintrin.h>             // 如果启用 AVX512，包含 AVX512 指令集头文件
#elif defined(LIBDIVIDE_AVX2)
    #include <immintrin.h>             // 如果启用 AVX2，包含 AVX2 指令集头文件
#elif defined(LIBDIVIDE_SSE2)
    #include <emmintrin.h>             // 如果启用 SSE2，包含 SSE2 指令集头文件
#endif

#if defined(_MSC_VER)
    #include <intrin.h>                // 如果是 Microsoft Visual C++ 编译器，包含内部函数头文件
    // 禁用警告 C4146: 对无符号类型应用一元减号运算符，结果仍然是无符号数
    #pragma warning(disable: 4146)
    #define LIBDIVIDE_VC               // 定义标记 LIBDIVIDE_VC，表示在 VC 编译器下编译
#endif

#if !defined(__has_builtin)
    #define __has_builtin(x) 0         // 如果没有定义 __has_builtin 宏，则定义为 0
#endif

#if defined(__SIZEOF_INT128__)
    #define HAS_INT128_T               // 如果支持 __int128_t 类型，定义 HAS_INT128_T
    // clang-cl 在 Windows 上尚不支持 128 位整数除法
    #if !(defined(__clang__) && defined(LIBDIVIDE_VC))
        #define HAS_INT128_DIV         // 如果不是 clang-cl 并且不是 VC 编译器，定义 HAS_INT128_DIV
    #endif
#endif

#if defined(__x86_64__) || defined(_M_X64)
    #define LIBDIVIDE_X86_64           // 如果是 x86_64 架构，定义 LIBDIVIDE_X86_64
#endif

#if defined(__i386__)
    #define LIBDIVIDE_i386             // 如果是 i386 架构，定义 LIBDIVIDE_i386
#endif

#if defined(__GNUC__) || defined(__clang__)
    #define LIBDIVIDE_GCC_STYLE_ASM    // 如果是 GCC 或 Clang 编译器，定义 LIBDIVIDE_GCC_STYLE_ASM
#endif

#if defined(__cplusplus) || defined(LIBDIVIDE_VC)
    #define LIBDIVIDE_FUNCTION __FUNCTION__  // 如果是 C++ 或 VC 编译器，使用 __FUNCTION__
#else
    #define LIBDIVIDE_FUNCTION __func__      // 否则使用 __func__
#endif

#define LIBDIVIDE_ERROR(msg) \
    do { \
        fprintf(stderr, "libdivide.h:%d: %s(): Error: %s\n", \
            __LINE__, LIBDIVIDE_FUNCTION, msg); \
        abort(); \
    } while (0)                         // 定义宏 LIBDIVIDE_ERROR，输出错误信息并中止程序

#if defined(LIBDIVIDE_ASSERTIONS_ON)
    #define LIBDIVIDE_ASSERT(x) \
        do { \
            if (!(x)) { \
                fprintf(stderr, "libdivide.h:%d: %s(): Assertion failed: %s\n", \
                    __LINE__, LIBDIVIDE_FUNCTION, #x); \
                abort(); \
            } \
        } while (0)                     // 如果定义了 LIBDIVIDE_ASSERTIONS_ON，定义宏 LIBDIVIDE_ASSERT，用于断言检查
#else
    #define LIBDIVIDE_ASSERT(x)          // 否则定义为空
#endif

#ifdef __cplusplus
namespace libdivide {
#endif

// 为防止编译器填充，对分隔符结构体进行打包。
// 当使用大量 libdivide 分隔符数组时，这将减少内存使用量高达 43%，
// 并通过减少内存带宽提高最多 10% 的性能。
#pragma pack(push, 1)

struct libdivide_u32_t {
    uint32_t magic;     // 魔数，用于快速整数除法
    uint8_t more;       // 其他数据
};

struct libdivide_s32_t {
    int32_t magic;      // 魔数，用于快速整数除法
    uint8_t more;       // 其他数据
};

struct libdivide_u64_t {
    uint64_t magic;     // 魔数，用于快速整数除法
    uint8_t more;       // 其他数据
};

struct libdivide_s64_t {
    int64_t magic;      // 魔数，用于快速整数除法
    uint8_t more;       // 其他数据
};

struct libdivide_u32_branchfree_t {
    // 定义一个无符号32位整数变量magic，用于存储魔数或特定标识符
    uint32_t magic;
    // 定义一个无符号8位整数变量more，通常用于表示额外的标志或状态信息
    uint8_t more;
};

// 结构体定义，用于存储带有分支优化的32位有符号整数除法信息
struct libdivide_s32_branchfree_t {
    int32_t magic; // 魔数，用于除法优化
    uint8_t more; // 更多信息，用于指示除法的具体实现方式
};

// 结构体定义，用于存储带有分支优化的64位无符号整数除法信息
struct libdivide_u64_branchfree_t {
    uint64_t magic; // 魔数，用于除法优化
    uint8_t more; // 更多信息，用于指示除法的具体实现方式
};

// 结构体定义，用于存储带有分支优化的64位有符号整数除法信息
struct libdivide_s64_branchfree_t {
    int64_t magic; // 魔数，用于除法优化
    uint8_t more; // 更多信息，用于指示除法的具体实现方式
};

#pragma pack(pop)

// "more"字段的解释：
//
// * 位 0-5 是移位值（用于移位路径或乘法路径）。
// * 位 6 是乘法路径的加法指示器。
// * 位 7 如果被设置表示除数为负数。我们使用位 7 作为负除数指示器，
//   这样我们可以有效地使用符号扩展来创建一个所有位均设置为 1 的位掩码
//   （如果除数为负数），或者为 0（如果除数为正数）。
//
// u32: [0-4] 移位值
//      [5] 忽略
//      [6] 加法指示器
//      魔数为 0 表示移位路径
//
// s32: [0-4] 移位值
//      [5] 忽略
//      [6] 加法指示器
//      [7] 表示负除数
//      魔数为 0 表示移位路径
//
// u64: [0-5] 移位值
//      [6] 加法指示器
//      魔数为 0 表示移位路径
//
// s64: [0-5] 移位值
//      [6] 加法指示器
//      [7] 表示负除数
//      魔数为 0 表示移位路径
//
// 在 s32 和 s64 分支优化模式下，根据除数是否为负数，魔数会被取反。
// 在分支优化策略中，不对魔数进行取反。

// 枚举常量定义
enum {
    LIBDIVIDE_32_SHIFT_MASK = 0x1F, // 32位移位掩码
    LIBDIVIDE_64_SHIFT_MASK = 0x3F, // 64位移位掩码
    LIBDIVIDE_ADD_MARKER = 0x40,    // 加法指示器
    LIBDIVIDE_NEGATIVE_DIVISOR = 0x80 // 负除数指示器
};

// 静态内联函数声明，用于生成带有分支优化的32位有符号整数除法信息
static inline struct libdivide_s32_t libdivide_s32_gen(int32_t d);

// 静态内联函数声明，用于生成带有分支优化的32位无符号整数除法信息
static inline struct libdivide_u32_t libdivide_u32_gen(uint32_t d);

// 静态内联函数声明，用于生成带有分支优化的64位有符号整数除法信息
static inline struct libdivide_s64_t libdivide_s64_gen(int64_t d);

// 静态内联函数声明，用于生成带有分支优化的64位无符号整数除法信息
static inline struct libdivide_u64_t libdivide_u64_gen(uint64_t d);

// 静态内联函数声明，用于执行带有分支优化的32位有符号整数除法
static inline int32_t  libdivide_s32_do(int32_t numer, const struct libdivide_s32_t *denom);

// 静态内联函数声明，用于执行带有分支优化的32位无符号整数除法
static inline uint32_t libdivide_u32_do(uint32_t numer, const struct libdivide_u32_t *denom);

// 静态内联函数声明，用于执行带有分支优化的64位有符号整数除法
static inline int64_t  libdivide_s64_do(int64_t numer, const struct libdivide_s64_t *denom);

// 静态内联函数声明，用于执行带有分支优化的64位无符号整数除法
static inline uint64_t libdivide_u64_do(uint64_t numer, const struct libdivide_u64_t *denom);

// 静态内联函数声明，用于执行带有分支优化的32位有符号整数除法（分支优化模式）
static inline int32_t  libdivide_s32_branchfree_do(int32_t numer, const struct libdivide_s32_branchfree_t *denom);

// 静态内联函数声明，用于执行带有分支优化的32位无符号整数除法（分支优化模式）
static inline uint32_t libdivide_u32_branchfree_do(uint32_t numer, const struct libdivide_u32_branchfree_t *denom);

// 静态内联函数声明，用于执行带有分支优化的64位有符号整数除法（分支优化模式）
static inline int64_t  libdivide_s64_branchfree_do(int64_t numer, const struct libdivide_s64_branchfree_t *denom);

// 静态内联函数声明，用于执行带有分支优化的64位无符号整数除法（分支优化模式）
static inline uint64_t libdivide_u64_branchfree_do(uint64_t numer, const struct libdivide_u64_branchfree_t *denom);
// 恢复 libdivide_s32_t 结构体指针的实际值，用于除法运算
static inline int32_t  libdivide_s32_recover(const struct libdivide_s32_t *denom);
// 恢复 libdivide_u32_t 结构体指针的实际值，用于无符号整数除法运算
static inline uint32_t libdivide_u32_recover(const struct libdivide_u32_t *denom);
// 恢复 libdivide_s64_t 结构体指针的实际值，用于64位有符号整数除法运算
static inline int64_t  libdivide_s64_recover(const struct libdivide_s64_t *denom);
// 恢复 libdivide_u64_t 结构体指针的实际值，用于64位无符号整数除法运算
static inline uint64_t libdivide_u64_recover(const struct libdivide_u64_t *denom);

// 恢复 libdivide_s32_branchfree_t 结构体指针的实际值，用于无分支的32位有符号整数除法运算
static inline int32_t  libdivide_s32_branchfree_recover(const struct libdivide_s32_branchfree_t *denom);
// 恢复 libdivide_u32_branchfree_t 结构体指针的实际值，用于无分支的32位无符号整数除法运算
static inline uint32_t libdivide_u32_branchfree_recover(const struct libdivide_u32_branchfree_t *denom);
// 恢复 libdivide_s64_branchfree_t 结构体指针的实际值，用于无分支的64位有符号整数除法运算
static inline int64_t  libdivide_s64_branchfree_recover(const struct libdivide_s64_branchfree_t *denom);
// 恢复 libdivide_u64_branchfree_t 结构体指针的实际值，用于无分支的64位无符号整数除法运算
static inline uint64_t libdivide_u64_branchfree_recover(const struct libdivide_u64_branchfree_t *denom);

//////// Internal Utility Functions

// 32位无符号整数乘法的高32位计算
static inline uint32_t libdivide_mullhi_u32(uint32_t x, uint32_t y) {
    uint64_t xl = x, yl = y; // 将参数转换为64位整数以避免溢出
    uint64_t rl = xl * yl; // 计算乘积
    return (uint32_t)(rl >> 32); // 返回高32位结果
}

// 32位有符号整数乘法的高32位计算
static inline int32_t libdivide_mullhi_s32(int32_t x, int32_t y) {
    int64_t xl = x, yl = y; // 将参数转换为64位整数以避免溢出
    int64_t rl = xl * yl; // 计算乘积
    // 需要算术右移来处理符号
    return (int32_t)(rl >> 32); // 返回高32位结果
}

// 64位无符号整数乘法的高64位计算
static inline uint64_t libdivide_mullhi_u64(uint64_t x, uint64_t y) {
#if defined(LIBDIVIDE_VC) && \
    defined(LIBDIVIDE_X86_64)
    return __umulh(x, y); // 使用硬件提供的无符号64位乘法高64位计算
#elif defined(HAS_INT128_T)
    __uint128_t xl = x, yl = y; // 使用128位整数类型计算
    __uint128_t rl = xl * yl; // 计算乘积
    return (uint64_t)(rl >> 64); // 返回高64位结果
#else
    // 使用32位乘法计算完整的128位乘积，以处理平台不支持128位整数的情况
    uint32_t mask = 0xFFFFFFFF;
    uint32_t x0 = (uint32_t)(x & mask);
    uint32_t x1 = (uint32_t)(x >> 32);
    uint32_t y0 = (uint32_t)(y & mask);
    uint32_t y1 = (uint32_t)(y >> 32);
    uint32_t x0y0_hi = libdivide_mullhi_u32(x0, y0); // 计算低位乘积的高32位
    uint64_t x0y1 = x0 * (uint64_t)y1;
    uint64_t x1y0 = x1 * (uint64_t)y0;
    uint64_t x1y1 = x1 * (uint64_t)y1;
    uint64_t temp = x1y0 + x0y0_hi;
    uint64_t temp_lo = temp & mask;
    uint64_t temp_hi = temp >> 32;

    return x1y1 + temp_hi + ((temp_lo + x0y1) >> 32); // 计算最终的高64位结果
#endif
}

// 64位有符号整数乘法的高64位计算
static inline int64_t libdivide_mullhi_s64(int64_t x, int64_t y) {
#if defined(LIBDIVIDE_VC) && \
    defined(LIBDIVIDE_X86_64)
    return __mulh(x, y); // 使用硬件提供的有符号64位乘法高64位计算
#elif defined(HAS_INT128_T)
    __int128_t xl = x, yl = y; // 使用128位整数类型计算
    __int128_t rl = xl * yl; // 计算乘积
    return (int64_t)(rl >> 64); // 返回高64位结果
#else
    // 使用32位乘法计算完整的128位乘积，以处理平台不支持128位整数的情况
    uint32_t mask = 0xFFFFFFFF;
    uint32_t x0 = (uint32_t)(x & mask);
    uint32_t y0 = (uint32_t)(y & mask);
    int32_t x1 = (int32_t)(x >> 32);
    int32_t y1 = (int32_t)(y >> 32);
    uint32_t x0y0_hi = libdivide_mullhi_u32(x0, y0); // 计算低位乘积的高32位
    int64_t t = x1 * (int64_t)y0 + x0y0_hi;
    int64_t w1 = x0 * (int64_t)y1 + (t & mask);

    return x1 * (int64_t)y1 + (t >> 32) + (w1 >> 32); // 计算最终的高64位结果
#endif
}

// 计算32位无符号整数的前导零数
static inline int32_t libdivide_count_leading_zeros32(uint32_t val) {
#if defined(__GNUC__) || \
    __has_builtin(__builtin_clz)
    // 使用快速计算前导零数的内建函数
    # 调用内建函数 __builtin_clz() 来计算参数 val 的前导零的数量，并将结果返回
    return __builtin_clz(val);
#elif defined(LIBDIVIDE_VC)
    // 如果使用的是 Visual C++ 编译器
    unsigned long result;
    // 使用 _BitScanReverse 函数查找 val 的最高位索引，将结果保存在 result 中
    if (_BitScanReverse(&result, val)) {
        // 如果找到最高位索引，则返回 31 减去 result 的值
        return 31 - result;
    }
    // 如果未找到最高位索引，则返回 0
    return 0;
#else
    // 如果使用的是其他编译器
    if (val == 0)
        // 如果 val 为 0，则返回 32（因为 0 的最高位索引为 31，再加上 1）
        return 32;
    int32_t result = 8;
    // hi 初始化为 0xFF 左移 24 位，即 0xFF000000，用于逐步检测 val 的最高位
    uint32_t hi = 0xFFU << 24;
    // 当 val 的高位为 0 时，向右移动 hi，并增加 result 的值
    while ((val & hi) == 0) {
        hi >>= 8;
        result += 8;
    }
    // 当 val 的高位不为 0 时，向左移动 hi，并减少 result 的值，直到找到最高位
    while (val & hi) {
        result -= 1;
        hi <<= 1;
    }
    // 返回找到的最高位索引
    return result;
#endif
}

static inline int32_t libdivide_count_leading_zeros64(uint64_t val) {
#if defined(__GNUC__) || \
    __has_builtin(__builtin_clzll)
    // 如果使用的是 GCC 编译器或者支持 __builtin_clzll 内置函数
    // 使用 __builtin_clzll 快速计算前导零的个数
    return __builtin_clzll(val);
#elif defined(LIBDIVIDE_VC) && defined(_WIN64)
    // 如果使用的是 Visual C++ 编译器且为 64 位 Windows
    unsigned long result;
    // 使用 _BitScanReverse64 函数查找 val 的最高位索引，将结果保存在 result 中
    if (_BitScanReverse64(&result, val)) {
        // 如果找到最高位索引，则返回 63 减去 result 的值
        return 63 - result;
    }
    // 如果未找到最高位索引，则返回 0
    return 0;
#else
    // 其他情况，分别处理 val 的高位和低位
    uint32_t hi = val >> 32;
    uint32_t lo = val & 0xFFFFFFFF;
    // 如果高位不为 0，则递归调用 libdivide_count_leading_zeros32 处理高位
    if (hi != 0) return libdivide_count_leading_zeros32(hi);
    // 否则返回 32 加上 libdivide_count_leading_zeros32 处理低位的结果
    return 32 + libdivide_count_leading_zeros32(lo);
#endif
}

// libdivide_64_div_32_to_32: divides a 64-bit uint {u1, u0} by a 32-bit
// uint {v}. The result must fit in 32 bits.
// Returns the quotient directly and the remainder in *r
static inline uint32_t libdivide_64_div_32_to_32(uint32_t u1, uint32_t u0, uint32_t v, uint32_t *r) {
#if (defined(LIBDIVIDE_i386) || defined(LIBDIVIDE_X86_64)) && \
     defined(LIBDIVIDE_GCC_STYLE_ASM)
    // 如果使用的是支持 GCC 风格内联汇编的 i386 或 X86_64 架构
    uint32_t result;
    // 使用 inline assembly 执行除法操作，结果保存在 result 中，余数保存在 *r 中
    __asm__("divl %[v]"
            : "=a"(result), "=d"(*r)
            : [v] "r"(v), "a"(u0), "d"(u1)
            );
    // 返回除法结果
    return result;
#else
    // 其他情况，将 u1 和 u0 合并成一个 64 位数 n
    uint64_t n = ((uint64_t)u1 << 32) | u0;
    // 执行除法操作，将结果强制转换为 32 位整数，保存在 result 中
    uint32_t result = (uint32_t)(n / v);
    // 计算余数，保存在 *r 中
    *r = (uint32_t)(n - result * (uint64_t)v);
    // 返回除法结果
    return result;
#endif
}

// libdivide_128_div_64_to_64: divides a 128-bit uint {u1, u0} by a 64-bit
// uint {v}. The result must fit in 64 bits.
// Returns the quotient directly and the remainder in *r
static uint64_t libdivide_128_div_64_to_64(uint64_t u1, uint64_t u0, uint64_t v, uint64_t *r) {
#if defined(LIBDIVIDE_X86_64) && \
    defined(LIBDIVIDE_GCC_STYLE_ASM)
    // 如果使用的是 X86_64 架构并且支持 GCC 风格内联汇编
    uint64_t result;
    // 使用 inline assembly 执行除法操作，结果保存在 result 中，余数保存在 *r 中
    __asm__("divq %[v]"
            : "=a"(result), "=d"(*r)
            : [v] "r"(v), "a"(u0), "d"(u1)
            );
    // 返回除法结果
    return result;
#elif defined(HAS_INT128_T) && \
      defined(HAS_INT128_DIV)
    // 如果支持 __uint128_t 类型和 __uint128_t 的除法操作
    __uint128_t n = ((__uint128_t)u1 << 64) | u0;
    // 执行除法操作，将结果强制转换为 64 位整数，保存在 result 中
    uint64_t result = (uint64_t)(n / v);
    // 计算余数，保存在 *r 中
    *r = (uint64_t)(n - result * (__uint128_t)v);
    // 返回除法结果
    return result;
#else
    // 其他情况，使用 Hacker's Delight 中的代码进行处理
    const uint64_t b = (1ULL << 32); // Number base (32 bits)
    uint64_t un1, un0; // Norm. dividend LSD's
    uint64_t vn1, vn0; // Norm. divisor digits
    uint64_t q1, q0; // Quotient digits
    uint64_t un64, un21, un10; // Dividend digit pairs
    uint64_t rhat; // A remainder
    int32_t s; // Shift amount for norm

    // If overflow, set rem. to an impossible value,
    // 检查除数是否大于等于被除数的高位部分，如果是则返回最大可能的商
    if (u1 >= v) {
        *r = (uint64_t) -1;  // 将余数指针指向最大的无符号整数，表示无法整除
        return (uint64_t) -1;  // 返回最大的无符号整数，表示无法整除
    }

    // 计算除数的前导零位数
    s = libdivide_count_leading_zeros64(v);
    if (s > 0) {
        // 将除数标准化
        v = v << s;  // 将除数左移 s 位，使得除数的高位非零
        un64 = (u1 << s) | (u0 >> (64 - s));  // 将被除数高位左移 s 位，并将低位右移 s 位后与高位进行或运算，得到标准化后的被除数高位部分
        un10 = u0 << s;  // 将被除数整体左移 s 位
    } else {
        // 处理当 s = 0 的情况，即除数的前导零位数为 0
        un64 = u1;
        un10 = u0;
    }

    // 将除数分解为两个 32 位的数字
    vn1 = v >> 32;  // 获取除数的高 32 位
    vn0 = v & 0xFFFFFFFF;  // 获取除数的低 32 位

    // 将被除数右半部分分解为两个 32 位的数字
    un1 = un10 >> 32;  // 获取被除数的右半部分的高 32 位
    un0 = un10 & 0xFFFFFFFF;  // 获取被除数的右半部分的低 32 位

    // 计算第一个商数 q1
    q1 = un64 / vn1;  // 计算高位商数的估计值
    rhat = un64 - q1 * vn1;  // 计算余数的估计值

    while (q1 >= b || q1 * vn0 > b * rhat + un1) {
        q1 = q1 - 1;  // 如果估计的 q1 过大，则减小 q1
        rhat = rhat + vn1;  // 调整余数的估计值
        if (rhat >= b)
            break;
    }

     // 乘法和减法操作
    un21 = un64 * b + un1 - q1 * v;

    // 计算第二个商数 q0
    q0 = un21 / vn1;  // 计算低位商数的估计值
    rhat = un21 - q0 * vn1;  // 计算余数的估计值

    while (q0 >= b || q0 * vn0 > b * rhat + un0) {
        q0 = q0 - 1;  // 如果估计的 q0 过大，则减小 q0
        rhat = rhat + vn1;  // 调整余数的估计值
        if (rhat >= b)
            break;
    }

    *r = (un21 * b + un0 - q0 * v) >> s;  // 计算最终余数并将其右移 s 位
    return q1 * b + q0;  // 返回最终的商
#endif
}

// Bitshift a u128 in place, left (signed_shift > 0) or right (signed_shift < 0)
static inline void libdivide_u128_shift(uint64_t *u1, uint64_t *u0, int32_t signed_shift) {
    if (signed_shift > 0) {
        // 如果 signed_shift 大于 0，则左移 u1 和 u0
        uint32_t shift = signed_shift;
        *u1 <<= shift;  // 左移 u1
        *u1 |= *u0 >> (64 - shift);  // 将 u0 右移以合并到 u1 中
        *u0 <<= shift;  // 左移 u0
    }
    else if (signed_shift < 0) {
        // 如果 signed_shift 小于 0，则右移 u0 和 u1
        uint32_t shift = -signed_shift;
        *u0 >>= shift;  // 右移 u0
        *u0 |= *u1 << (64 - shift);  // 将 u1 左移以合并到 u0 中
        *u1 >>= shift;  // 右移 u1
    }
}

// Computes a 128 / 128 -> 64 bit division, with a 128 bit remainder.
static uint64_t libdivide_128_div_128_to_64(uint64_t u_hi, uint64_t u_lo, uint64_t v_hi, uint64_t v_lo, uint64_t *r_hi, uint64_t *r_lo) {
#if defined(HAS_INT128_T) && \
    defined(HAS_INT128_DIV)
    __uint128_t ufull = u_hi;
    __uint128_t vfull = v_hi;
    ufull = (ufull << 64) | u_lo;
    vfull = (vfull << 64) | v_lo;
    uint64_t res = (uint64_t)(ufull / vfull);  // 计算 ufull / vfull 的整数部分
    __uint128_t remainder = ufull - (vfull * res);  // 计算余数
    *r_lo = (uint64_t)remainder;  // 余数的低 64 位
    *r_hi = (uint64_t)(remainder >> 64);  // 余数的高 64 位
    return res;  // 返回整数部分
#else
    // Adapted from "Unsigned Doubleword Division" in Hacker's Delight
    // We want to compute u / v
    typedef struct { uint64_t hi; uint64_t lo; } u128_t;
    u128_t u = {u_hi, u_lo};
    u128_t v = {v_hi, v_lo};

    if (v.hi == 0) {
        // divisor v is a 64 bit value, so we just need one 128/64 division
        // Note that we are simpler than Hacker's Delight here, because we know
        // the quotient fits in 64 bits whereas Hacker's Delight demands a full
        // 128 bit quotient
        *r_hi = 0;
        return libdivide_128_div_64_to_64(u.hi, u.lo, v.lo, r_lo);  // 执行 128/64 位的除法计算
    }
    // Here v >= 2**64
    // We know that v.hi != 0, so count leading zeros is OK
    // We have 0 <= n <= 63
    uint32_t n = libdivide_count_leading_zeros64(v.hi);  // 计算 v.hi 前导零的数量

    // Normalize the divisor so its MSB is 1
    u128_t v1t = v;
    libdivide_u128_shift(&v1t.hi, &v1t.lo, n);  // 将 v1t 规范化，使其最高位为 1
    uint64_t v1 = v1t.hi; // i.e. v1 = v1t >> 64

    // To ensure no overflow
    u128_t u1 = u;
    libdivide_u128_shift(&u1.hi, &u1.lo, -1);  // 将 u1 规范化，防止溢出

    // Get quotient from divide unsigned insn.
    uint64_t rem_ignored;
    uint64_t q1 = libdivide_128_div_64_to_64(u1.hi, u1.lo, v1, &rem_ignored);  // 执行无符号 128/64 位的除法计算

    // Undo normalization and division of u by 2.
    u128_t q0 = {0, q1};
    libdivide_u128_shift(&q0.hi, &q0.lo, n);  // 恢复 q0 的结果
    libdivide_u128_shift(&q0.hi, &q0.lo, -63);  // 反向移位，相当于除以 2^63

    // Make q0 correct or too small by 1
    // Equivalent to `if (q0 != 0) q0 = q0 - 1;`
    if (q0.hi != 0 || q0.lo != 0) {
        q0.hi -= (q0.lo == 0); // borrow
        q0.lo -= 1;
    }

    // Now q0 is correct.
    // Compute q0 * v as q0v
    // = (q0.hi << 64 + q0.lo) * (v.hi << 64 + v.lo)
    // = (q0.hi * v.hi << 128) + (q0.hi * v.lo << 64) +
    //   (q0.lo * v.hi <<  64) + q0.lo * v.lo)
    // Each term is 128 bit
    // High half of full product (upper 128 bits!) are dropped
    u128_t q0v = {0, 0};
    // 计算 q0v.hi，使用 q0 和 v 的部分乘积，以及 q0.lo 和 v.lo 的高位乘积
    q0v.hi = q0.hi * v.lo + q0.lo * v.hi + libdivide_mullhi_u64(q0.lo, v.lo);
    // 计算 q0v.lo，即 q0.lo 和 v.lo 的乘积
    q0v.lo = q0.lo * v.lo;

    // 计算 u - q0v 得到余数 u_q0v
    // 这就是余数
    u128_t u_q0v = u;
    // 减去 q0v.hi，并处理借位（如果有）
    u_q0v.hi -= q0v.hi + (u.lo < q0v.lo); // 第二项是借位
    // 减去 q0v.lo
    u_q0v.lo -= q0v.lo;

    // 检查 u_q0v 是否大于等于 v
    // 这检查余数是否大于等于除数
    if ((u_q0v.hi > v.hi) ||
        (u_q0v.hi == v.hi && u_q0v.lo >= v.lo)) {
        // 增加 q0
        q0.lo += 1;
        // 处理进位
        q0.hi += (q0.lo == 0);

        // 从余数中减去 v
        u_q0v.hi -= v.hi + (u_q0v.lo < v.lo);
        u_q0v.lo -= v.lo;
    }

    // 将余数的结果写入 r_hi 和 r_lo
    *r_hi = u_q0v.hi;
    *r_lo = u_q0v.lo;

    // 断言 q0.hi 必须为 0
    LIBDIVIDE_ASSERT(q0.hi == 0);
    // 返回商的低位
    return q0.lo;
#endif
}

////////// UINT32

// 定义一个内联函数，用于生成 libdivide_u32_t 结构体对象
static inline struct libdivide_u32_t libdivide_internal_u32_gen(uint32_t d, int branchfree) {
    // 如果除数为0，抛出错误
    if (d == 0) {
        LIBDIVIDE_ERROR("divider must be != 0");
    }

    struct libdivide_u32_t result;
    // 计算除数 d 的 floor(log2(d))
    uint32_t floor_log_2_d = 31 - libdivide_count_leading_zeros32(d);

    // 如果 d 是2的幂次方
    if ((d & (d - 1)) == 0) {
        // 如果是无分支版本的除法，需要在移位值中减去1，因为算法中有一个固定的右移1位
        // 在恢复算法中需要将这个1加回来
        result.magic = 0;
        result.more = (uint8_t)(floor_log_2_d - (branchfree != 0));
    } else {
        uint8_t more;
        uint32_t rem, proposed_m;
        // 通过调用 libdivide_64_div_32_to_32 函数计算 2^floor_log_2_d / d，并得到余数 rem
        proposed_m = libdivide_64_div_32_to_32(1U << floor_log_2_d, 0, d, &rem);

        // 确保余数 rem 大于0且小于d
        LIBDIVIDE_ASSERT(rem > 0 && rem < d);
        const uint32_t e = d - rem;

        // 如果不是无分支版本，并且 e < 2^floor_log_2_d，则选择当前的幂次方
        if (!branchfree && (e < (1U << floor_log_2_d))) {
            more = floor_log_2_d;
        } else {
            // 否则需要使用一般的33位算法，通过对两倍的 rem 进行调整来计算较大的除法
            proposed_m += proposed_m;
            const uint32_t twice_rem = rem + rem;
            if (twice_rem >= d || twice_rem < rem) proposed_m += 1;
            more = floor_log_2_d | LIBDIVIDE_ADD_MARKER;
        }
        result.magic = 1 + proposed_m;
        result.more = more;
        // result.more 的移位通常应为 ceil(log2(d))。但如果使用较小的幂次方，则从移位中减去1，
        // 因为我们使用了较小的幂次方。如果使用较大的幂次方，则通过添加指示符来处理移位。
        // 所以在这两种情况下，floor_log_2_d 都是正确的值。
    }
    return result;
}

// 生成 libdivide_u32_t 结构体对象，使用默认的非无分支版本
struct libdivide_u32_t libdivide_u32_gen(uint32_t d) {
    return libdivide_internal_u32_gen(d, 0);
}

// 生成 libdivide_u32_branchfree_t 结构体对象，用于无分支版本的除法
struct libdivide_u32_branchfree_t libdivide_u32_branchfree_gen(uint32_t d) {
    // 如果除数为1，抛出错误
    if (d == 1) {
        LIBDIVIDE_ERROR("branchfree divider must be != 1");
    }
    // 调用内部函数生成 libdivide_u32_t 结构体对象，然后构造 libdivide_u32_branchfree_t 对象返回
    struct libdivide_u32_t tmp = libdivide_internal_u32_gen(d, 1);
    struct libdivide_u32_branchfree_t ret = {tmp.magic, (uint8_t)(tmp.more & LIBDIVIDE_32_SHIFT_MASK)};
    return ret;
}

// 执行无分支或非无分支版本的32位除法
uint32_t libdivide_u32_do(uint32_t numer, const struct libdivide_u32_t *denom) {
    uint8_t more = denom->more;
    // 如果 magic 为0，表示使用右移来实现除法
    if (!denom->magic) {
        return numer >> more;
    }
    else {
        // 使用 libdivide_mullhi_u32 函数计算 numer 与 denom->magic 的乘积的高32位
        uint32_t q = libdivide_mullhi_u32(denom->magic, numer);
        
        // 检查 more 变量是否包含 LIBDIVIDE_ADD_MARKER 标志位
        if (more & LIBDIVIDE_ADD_MARKER) {
            // 如果包含标志位，则计算 ((numer - q) >> 1) + q，并右移 (more & LIBDIVIDE_32_SHIFT_MASK) 位
            uint32_t t = ((numer - q) >> 1) + q;
            return t >> (more & LIBDIVIDE_32_SHIFT_MASK);
        }
        else {
            // 如果没有包含标志位，说明所有高位都为0，可以直接右移 more 位
            // 所有上位位都为0，不需要屏蔽掉它们。
            return q >> more;
        }
    }
}

// 使用无分支方法实现的32位无符号整数除法
uint32_t libdivide_u32_branchfree_do(uint32_t numer, const struct libdivide_u32_branchfree_t *denom) {
    // 计算商 q = magic * numer 的高32位
    uint32_t q = libdivide_mullhi_u32(denom->magic, numer);
    // 计算 t = ((numer - q) >> 1) + q
    uint32_t t = ((numer - q) >> 1) + q;
    // 返回 t 右移 denom->more 位后的结果
    return t >> denom->more;
}

// 恢复函数，用于处理32位无符号整数的除法结果恢复
uint32_t libdivide_u32_recover(const struct libdivide_u32_t *denom) {
    // 从 denom 结构中提取 more 字段
    uint8_t more = denom->more;
    // 从 more 字段中提取 shift 值
    uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK;

    // 如果 magic 为零，返回 2^shift
    if (!denom->magic) {
        return 1U << shift;
    } else if (!(more & LIBDIVIDE_ADD_MARKER)) {
        // 计算 hi_dividend = 2^shift
        uint32_t hi_dividend = 1U << shift;
        uint32_t rem_ignored;
        // 返回 ceil(2^shift / magic)，其中 magic 不是2的幂
        return 1 + libdivide_64_div_32_to_32(hi_dividend, 0, denom->magic, &rem_ignored);
    } else {
        // 计算 d = (2^(32+shift) + magic)，注意 magic 是一个32位数
        uint64_t half_n = 1ULL << (32 + shift);
        uint64_t d = (1ULL << 32) | denom->magic;
        // 计算半商 half_q = 2^(32+shift) / d
        uint32_t half_q = (uint32_t)(half_n / d);
        uint64_t rem = half_n % d;
        // 计算全商 full_q = 2^(32+shift) / d * 2，并考虑是否需要向上取整
        uint32_t full_q = half_q + half_q + ((rem << 1) >= d);

        // 返回 full_q + 1，用于恢复精确的商值
        return full_q + 1;
    }
}

// 使用无分支方法实现的32位无符号整数除法结果恢复函数
uint32_t libdivide_u32_branchfree_recover(const struct libdivide_u32_branchfree_t *denom) {
    // 从 denom 结构中提取 more 字段
    uint8_t more = denom->more;
    // 从 more 字段中提取 shift 值
    uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK;

    // 如果 magic 为零，返回 2^(shift+1)
    if (!denom->magic) {
        return 1U << (shift + 1);
    } else {
        // 这里我们希望计算 d = 2^(32+shift+1)/(m+2^32)。
        // 注意 (m + 2^32) 是一个 33 位数字。暂时使用 64 位除法。
        // 还要注意 shift 可能最大为 31，所以 shift + 1 将会溢出。
        // 因此，我们先计算 2^(32+shift)/(m+2^32)，然后将商和余数各自加倍。
        uint64_t half_n = 1ULL << (32 + shift);  // 计算 2^(32+shift)
        uint64_t d = (1ULL << 32) | denom->magic;  // 计算 (m + 2^32)
        
        // 注意商保证 <= 32 位，但余数可能需要 33 位！
        uint32_t half_q = (uint32_t)(half_n / d);  // 计算一半的商
        uint64_t rem = half_n % d;  // 计算余数
        
        // 我们计算了 2^(32+shift)/(m+2^32)
        // 需要将其加倍，如果加倍后的余数会使商增加，则将商加1。
        // 注意 rem<<1 不会溢出，因为 rem < d 且 d 是 33 位数字。
        uint32_t full_q = half_q + half_q + ((rem << 1) >= d);

        // 在 gen 中我们向下舍入了（因此 +1）
        return full_q + 1;  // 返回舍入后的结果
    }
}

/////////// UINT64

// 生成用于除法的数据结构，包含魔数和额外信息
static inline struct libdivide_u64_t libdivide_internal_u64_gen(uint64_t d, int branchfree) {
    // 如果除数为0，抛出错误
    if (d == 0) {
        LIBDIVIDE_ERROR("divider must be != 0");
    }

    // 结果数据结构
    struct libdivide_u64_t result;
    // 计算除数的二进制中最高位1之前的0的个数
    uint32_t floor_log_2_d = 63 - libdivide_count_leading_zeros64(d);

    // 如果除数是2的幂次方
    if ((d & (d - 1)) == 0) {
        // 如果是无分支优化的除法，需要调整额外信息
        if (branchfree != 0) {
            result.magic = 0;
            result.more = (uint8_t)(floor_log_2_d - 1);
        } else {
            result.magic = 0;
            result.more = (uint8_t)floor_log_2_d;
        }
    } else {
        uint64_t proposed_m, rem;
        uint8_t more;
        
        // 计算 (1 << (64 + floor_log_2_d)) / d，并返回余数
        proposed_m = libdivide_128_div_64_to_64(1ULL << floor_log_2_d, 0, d, &rem);

        // 确保余数在0到d之间
        LIBDIVIDE_ASSERT(rem > 0 && rem < d);
        const uint64_t e = d - rem;

        // 如果不是无分支优化且 e < 2**floor_log_2_d，则使用此幂次方
        if (!branchfree && e < (1ULL << floor_log_2_d)) {
            more = floor_log_2_d;
        } else {
            // 否则使用通用的65位算法，通过双倍调整来计算更大的除法
            proposed_m += proposed_m;
            const uint64_t twice_rem = rem + rem;
            if (twice_rem >= d || twice_rem < rem) proposed_m += 1;
            more = floor_log_2_d | LIBDIVIDE_ADD_MARKER;
        }
        result.magic = 1 + proposed_m;
        result.more = more;
    }
    return result;
}

// 生成用于除法的数据结构，无分支优化版本
struct libdivide_u64_t libdivide_u64_gen(uint64_t d) {
    return libdivide_internal_u64_gen(d, 0);
}

// 生成用于无分支优化除法的数据结构
struct libdivide_u64_branchfree_t libdivide_u64_branchfree_gen(uint64_t d) {
    // 如果除数为1，抛出错误
    if (d == 1) {
        LIBDIVIDE_ERROR("branchfree divider must be != 1");
    }
    // 调用内部生成函数，获取数据结构，并截取需要的位数信息
    struct libdivide_u64_t tmp = libdivide_internal_u64_gen(d, 1);
    struct libdivide_u64_branchfree_t ret = {tmp.magic, (uint8_t)(tmp.more & LIBDIVIDE_64_SHIFT_MASK)};
    return ret;
}

// 执行64位无符号整数的除法操作
uint64_t libdivide_u64_do(uint64_t numer, const struct libdivide_u64_t *denom) {
    uint8_t more = denom->more;
    // 如果魔数为0，直接右移操作数
    if (!denom->magic) {
        return numer >> more;
    }
    else {
        // 使用 libdivide_mullhi_u64 函数计算 denom->magic 和 numer 的乘积的高位
        uint64_t q = libdivide_mullhi_u64(denom->magic, numer);
        // 如果 more 包含 LIBDIVIDE_ADD_MARKER 标记
        if (more & LIBDIVIDE_ADD_MARKER) {
            // 计算 t 的值，其中 ((numer - q) >> 1) + q
            uint64_t t = ((numer - q) >> 1) + q;
            // 返回 t 右移 (more & LIBDIVIDE_64_SHIFT_MASK) 位后的结果
            return t >> (more & LIBDIVIDE_64_SHIFT_MASK);
        }
        else {
             // 所有的高位都是 0，
             // 不需要屏蔽它们。
            // 直接返回 q 右移 more 位后的结果
            return q >> more;
        }
    }
}

// 使用分支无关的方法计算64位无符号整数的除法，denom是除数结构体指针
uint64_t libdivide_u64_branchfree_do(uint64_t numer, const struct libdivide_u64_branchfree_t *denom) {
    // 计算商q，使用乘法高位返回乘积
    uint64_t q = libdivide_mullhi_u64(denom->magic, numer);
    // 计算t = ((numer - q) >> 1) + q
    uint64_t t = ((numer - q) >> 1) + q;
    // 返回t右移denom->more位的结果
    return t >> denom->more;
}

// 恢复64位无符号整数的除数，denom是除数结构体指针
uint64_t libdivide_u64_recover(const struct libdivide_u64_t *denom) {
    uint8_t more = denom->more;
    uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK;

    // 如果magic为0，返回2^(shift+1)
    if (!denom->magic) {
        return 1ULL << (shift + 1);
    }
    // 如果more不包含LIBDIVIDE_ADD_MARKER标记
    else if (!(more & LIBDIVIDE_ADD_MARKER)) {
        // 计算hi_dividend = 2^shift
        uint64_t hi_dividend = 1ULL << shift;
        uint64_t rem_ignored;
        // 返回1 + libdivide_128_div_64_to_64(hi_dividend, 0, denom->magic, &rem_ignored)的结果
        return 1 + libdivide_128_div_64_to_64(hi_dividend, 0, denom->magic, &rem_ignored);
    }
    // 否则
    else {
        // 计算half_n_hi = 2^shift, half_n_lo = 0
        uint64_t half_n_hi = 1ULL << shift, half_n_lo = 0;
        // d_hi = 1, d_lo = denom->magic
        const uint64_t d_hi = 1, d_lo = denom->magic;
        uint64_t r_hi, r_lo;
        // 计算half_q = libdivide_128_div_128_to_64(half_n_hi, half_n_lo, d_hi, d_lo, &r_hi, &r_lo)的结果
        uint64_t half_q = libdivide_128_div_128_to_64(half_n_hi, half_n_lo, d_hi, d_lo, &r_hi, &r_lo);
        // 计算2^(64+shift)/(m+2^64)，并检查余数是否超过除数
        uint64_t dr_lo = r_lo + r_lo;
        uint64_t dr_hi = r_hi + r_hi + (dr_lo < r_lo); // 最后一项是进位
        int dr_exceeds_d = (dr_hi > d_hi) || (dr_hi == d_hi && dr_lo >= d_lo);
        // 计算full_q = half_q + half_q + (dr_exceeds_d ? 1 : 0)
        uint64_t full_q = half_q + half_q + (dr_exceeds_d ? 1 : 0);
        // 返回full_q + 1的结果
        return full_q + 1;
    }
}

// 使用分支无关的方法恢复64位无符号整数的除数，denom是除数结构体指针
uint64_t libdivide_u64_branchfree_recover(const struct libdivide_u64_branchfree_t *denom) {
    uint8_t more = denom->more;
    uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK;

    // 如果magic为0，返回2^(shift+1)
    if (!denom->magic) {
        return 1ULL << (shift + 1);
    } else {
        // 在这里，我们希望计算 d = 2^(64+shift+1)/(m+2^64)。
        // 注意 (m + 2^64) 是一个 65 位数。这变得复杂了。请看 libdivide_u32_recover 以了解我们在这里做了什么。
        // TODO: 做一些比 128 位数学更好的事情

        // 完整的 n 是一个（可能）129 位值
        // half_n 是一个 128 位值
        // 计算 half_n 的高 64 位。低 64 位为 0。
        uint64_t half_n_hi = 1ULL << shift, half_n_lo = 0;
        // d 是一个 65 位值。最高位始终设为 1。
        const uint64_t d_hi = 1, d_lo = denom->magic;
        // 请注意，商保证 <= 64 位，但余数可能需要 65 位！
        uint64_t r_hi, r_lo;
        uint64_t half_q = libdivide_128_div_128_to_64(half_n_hi, half_n_lo, d_hi, d_lo, &r_hi, &r_lo);
        // 我们计算了 2^(64+shift)/(m+2^64)
        // 将余数加倍 ('dr') 并检查它是否大于 d
        // 请注意，d 是一个 65 位值，因此r1 很小，因此 r1 + r1
        // 无法溢出
        uint64_t dr_lo = r_lo + r_lo;
        uint64_t dr_hi = r_hi + r_hi + (dr_lo < r_lo); // 最后一个项是进位
        int dr_exceeds_d = (dr_hi > d_hi) || (dr_hi == d_hi && dr_lo >= d_lo);
        uint64_t full_q = half_q + half_q + (dr_exceeds_d ? 1 : 0);
        // 返回完整的商加 1
        return full_q + 1;
    }
}

/////////// SINT32

// 生成用于32位有符号整数的除法信息，根据给定的除数和是否分支优化来生成
static inline struct libdivide_s32_t libdivide_internal_s32_gen(int32_t d, int branchfree) {
    // 如果除数为0，则抛出错误
    if (d == 0) {
        LIBDIVIDE_ERROR("divider must be != 0");
    }

    struct libdivide_s32_t result;

    // 如果除数是2的幂或负数的2的幂，则必须使用移位操作
    // 这尤其重要，因为对于-1，魔术算法无法正常工作。
    // 要检查除数是否是2的幂或其倒数，只需检查其绝对值是否恰好有一个位设置为1。
    // 即使对于INT_MIN，这也适用，因为abs(INT_MIN) == INT_MIN，而INT_MIN有一个位设置为1且是2的幂。
    uint32_t ud = (uint32_t)d;
    uint32_t absD = (d < 0) ? -ud : ud;
    uint32_t floor_log_2_d = 31 - libdivide_count_leading_zeros32(absD);
    // 检查是否恰好有一个位设置为1，
    // 不关心absD是否为0，因为那会导致除以0
    if ((absD & (absD - 1)) == 0) {
        // 分支优化和普通路径完全相同
        result.magic = 0;
        result.more = floor_log_2_d | (d < 0 ? LIBDIVIDE_NEGATIVE_DIVISOR : 0);
    } else {
        LIBDIVIDE_ASSERT(floor_log_2_d >= 1);

        uint8_t more;
        // 这里的被除数是2 ** (floor_log_2_d + 31)，因此低32位为0，高位为floor_log_2_d - 1
        uint32_t rem, proposed_m;
        proposed_m = libdivide_64_div_32_to_32(1U << (floor_log_2_d - 1), 0, absD, &rem);
        const uint32_t e = absD - rem;

        // 如果不是分支优化且e < 2 ** floor_log_2_d，则这个幂次可以使用
        if (!branchfree && e < (1U << floor_log_2_d)) {
            // 这个幂次有效
            more = floor_log_2_d - 1;
        } else {
            // 我们需要再高一点。这不应使得proposed_m溢出，但当作为int32_t解释时会使其变负。
            proposed_m += proposed_m;
            const uint32_t twice_rem = rem + rem;
            if (twice_rem >= absD || twice_rem < rem) proposed_m += 1;
            more = floor_log_2_d | LIBDIVIDE_ADD_MARKER;
        }

        proposed_m += 1;
        int32_t magic = (int32_t)proposed_m;

        // 如果除数为负数，则标记为负数。注意在分支完整情况下只有魔术数会被取反。
        if (d < 0) {
            more |= LIBDIVIDE_NEGATIVE_DIVISOR;
            if (!branchfree) {
                magic = -magic;
            }
        }

        result.more = more;
        result.magic = magic;
    }
    return result;
}

// 生成用于32位有符号整数的除法信息，使用普通的生成函数
struct libdivide_s32_t libdivide_s32_gen(int32_t d) {
    return libdivide_internal_s32_gen(d, 0);
}

// 生成用于32位有符号整数的除法信息，使用分支优化的生成函数
struct libdivide_s32_branchfree_t libdivide_s32_branchfree_gen(int32_t d) {
    struct libdivide_s32_t tmp = libdivide_internal_s32_gen(d, 1);
    struct libdivide_s32_branchfree_t result = {tmp.magic, tmp.more};
    return result;
}

// 执行32位有符号整数的除法运算
int32_t libdivide_s32_do(int32_t numer, const struct libdivide_s32_t *denom) {
    // 从结构体指针 denom 中读取 more 字段，并将其转换为 uint8_t 类型的变量 more
    uint8_t more = denom->more;
    // 从 more 中提取 shift 值，使用与操作和预定义的掩码 LIBDIVIDE_32_SHIFT_MASK
    uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK;

    // 如果 denom->magic 为假（即为0）
    if (!denom->magic) {
        // 将 more 的最高位作为符号位，转换为 uint32_t 类型的变量 sign
        uint32_t sign = (int8_t)more >> 7;
        // 创建掩码 mask，用来屏蔽 uq 中超出位移范围的位
        uint32_t mask = (1U << shift) - 1;
        // 计算 uq，将 numer 和根据符号位 mask 进行调整后相加
        uint32_t uq = numer + ((numer >> 31) & mask);
        // 将 uq 转换为 int32_t 类型的变量 q
        int32_t q = (int32_t)uq;
        // 右移 shift 位，对 q 进行修正
        q >>= shift;
        // 根据符号位 sign 对 q 进行调整
        q = (q ^ sign) - sign;
        // 返回计算结果 q
        return q;
    } else {
        // 使用 denom->magic 和 numer 调用 libdivide_mullhi_s32 函数，将结果保存到 uq 中
        uint32_t uq = (uint32_t)libdivide_mullhi_s32(denom->magic, numer);
        // 如果 more 中包含 LIBDIVIDE_ADD_MARKER 标记
        if (more & LIBDIVIDE_ADD_MARKER) {
            // 从 more 中提取符号位，转换为 int32_t 类型的变量 sign
            int32_t sign = (int8_t)more >> 7;
            // 根据符号位 sign 调整 uq 的值，以保证符号扩展正确
            uq += ((uint32_t)numer ^ sign) - sign;
        }
        // 将 uq 转换为 int32_t 类型的变量 q
        int32_t q = (int32_t)uq;
        // 右移 shift 位，对 q 进行修正
        q >>= shift;
        // 如果 q 小于0，则将其增加1
        q += (q < 0);
        // 返回计算结果 q
        return q;
    }
}

// 对应于分支自由的 libdivide_s32_do 函数，用于执行带分支的除法操作
int32_t libdivide_s32_branchfree_do(int32_t numer, const struct libdivide_s32_branchfree_t *denom) {
    // 获取更多信息字节，其中包含了移位数量
    uint8_t more = denom->more;
    uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK;
    // 必须进行算术右移并进行符号扩展
    int32_t sign = (int8_t)more >> 7;
    int32_t magic = denom->magic;
    // 使用 libdivide_mullhi_s32 计算乘法高位结果
    int32_t q = libdivide_mullhi_s32(magic, numer);
    // 加上被除数本身
    q += numer;

    // 如果 q 是非负数，无需进一步处理
    // 如果 q 是负数，根据是否为 2 的幂次方，添加 (2**shift)-1 或 (2**shift)
    uint32_t is_power_of_2 = (magic == 0);
    uint32_t q_sign = (uint32_t)(q >> 31);
    q += q_sign & ((1U << shift) - is_power_of_2);

    // 算术右移
    q >>= shift;
    // 根据需要取反
    q = (q ^ sign) - sign;

    return q;
}

// 根据分支全的 libdivide_s32_recover 函数，用于恢复原始除数
int32_t libdivide_s32_recover(const struct libdivide_s32_t *denom) {
    // 获取更多信息字节，其中包含了移位数量
    uint8_t more = denom->more;
    uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK;
    if (!denom->magic) {
        // 如果 magic 为 0，说明是 2 的幂次方
        uint32_t absD = 1U << shift;
        // 如果是负除数，取其相反数
        if (more & LIBDIVIDE_NEGATIVE_DIVISOR) {
            absD = -absD;
        }
        return (int32_t)absD;
    } else {
        // 无符号数的运算更为简单
        // 在分支全的情况下，我们只对魔数取反，不知道具体情况
        // 但有足够信息确定魔数的符号性质。如果除数是负数，LIBDIVIDE_NEGATIVE_DIVISOR 标志被设置。
        // 如果 ADD_MARKER 被设置，魔数的符号与除数相反。
        int negative_divisor = (more & LIBDIVIDE_NEGATIVE_DIVISOR);
        int magic_was_negated = (more & LIBDIVIDE_ADD_MARKER)
            ? denom->magic > 0 : denom->magic < 0;

        // 处理 2 的幂次方的情况（包括分支自由）
        if (denom->magic == 0) {
            int32_t result = 1U << shift;
            return negative_divisor ? -result : result;
        }

        uint32_t d = (uint32_t)(magic_was_negated ? -denom->magic : denom->magic);
        uint64_t n = 1ULL << (32 + shift); // 这个移位不超过 30
        uint32_t q = (uint32_t)(n / d);
        int32_t result = (int32_t)q;
        result += 1;
        return negative_divisor ? -result : result;
    }
}

// 对应于分支自由的 libdivide_s32_recover 函数，用于恢复原始除数
int32_t libdivide_s32_branchfree_recover(const struct libdivide_s32_branchfree_t *denom) {
    return libdivide_s32_recover((const struct libdivide_s32_t *)denom);
}

///////////// SINT64

// 内部函数，生成带分支的 libdivide_s64_t 结构
static inline struct libdivide_s64_t libdivide_internal_s64_gen(int64_t d, int branchfree) {
    if (d == 0) {
        LIBDIVIDE_ERROR("divider must be != 0");
    }

    struct libdivide_s64_t result;

    // 如果 d 是 2 的幂次方，或者是负数的 2 的幂次方，必须使用移位。
    // 这对于 magic 算法无法处理 -1 特别重要。
    // 要检查 d 是否是 2 的幂次方或其倒数，仅需检查
    // 将浮点数转换为无符号64位整数
    uint64_t ud = (uint64_t)d;
    // 计算 d 的绝对值
    uint64_t absD = (d < 0) ? -ud : ud;
    // 计算 absD 的 floor(log2(absD))，即 absD 的二进制位数减一
    uint32_t floor_log_2_d = 63 - libdivide_count_leading_zeros64(absD);
    // 检查 absD 是否恰好只有一位为1，即是否为2的幂
    if ((absD & (absD - 1)) == 0) {
        // 如果 absD 是2的幂，设置 result 的 magic 为 0，more 为 floor_log_2_d 或者带有符号位的标记
        result.magic = 0;
        result.more = floor_log_2_d | (d < 0 ? LIBDIVIDE_NEGATIVE_DIVISOR : 0);
    } else {
        // 如果 absD 不是2的幂，则需要进一步计算更多信息
        uint8_t more;
        uint64_t rem, proposed_m;
        // 使用 libdivide_128_div_64_to_64 计算商 proposed_m 和余数 rem
        proposed_m = libdivide_128_div_64_to_64(1ULL << (floor_log_2_d - 1), 0, absD, &rem);
        const uint64_t e = absD - rem;

        // 判断是否需要分支执行非分支化路径
        if (!branchfree && e < (1ULL << floor_log_2_d)) {
            // 如果不需要分支执行且 e < 2^floor_log_2_d，则选择 floor_log_2_d - 1 作为 more
            more = floor_log_2_d - 1;
        } else {
            // 否则，选择更高的位数，可能会导致 proposed_m 为负数
            proposed_m += proposed_m;
            const uint64_t twice_rem = rem + rem;
            if (twice_rem >= absD || twice_rem < rem) proposed_m += 1;
            // 在非分支化情况下设置 LIBDIVIDE_NEGATIVE_DIVISOR 位
            more = floor_log_2_d | LIBDIVIDE_ADD_MARKER;
        }
        proposed_m += 1;
        // 将 proposed_m 转换为 int64_t 类型作为 magic
        int64_t magic = (int64_t)proposed_m;

        // 如果 d 是负数，设置 more 的 LIBDIVIDE_NEGATIVE_DIVISOR 位，并根据情况调整 magic
        if (d < 0) {
            more |= LIBDIVIDE_NEGATIVE_DIVISOR;
            if (!branchfree) {
                magic = -magic;
            }
        }

        // 设置 result 的 more 和 magic
        result.more = more;
        result.magic = magic;
    }
    // 返回计算结果 result
    return result;
}

// 生成一个 libdivide_s64_t 结构体，通过调用内部函数 libdivide_internal_s64_gen
struct libdivide_s64_t libdivide_s64_gen(int64_t d) {
    return libdivide_internal_s64_gen(d, 0);
}

// 生成一个 libdivide_s64_branchfree_t 结构体，通过调用内部函数 libdivide_internal_s64_gen
struct libdivide_s64_branchfree_t libdivide_s64_branchfree_gen(int64_t d) {
    // 调用 libdivide_internal_s64_gen 获取 libdivide_s64_t 结构体
    struct libdivide_s64_t tmp = libdivide_internal_s64_gen(d, 1);
    // 构造 libdivide_s64_branchfree_t 结构体并返回
    struct libdivide_s64_branchfree_t ret = {tmp.magic, tmp.more};
    return ret;
}

// 执行 libdivide_s64_t 结构体定义的除法操作
int64_t libdivide_s64_do(int64_t numer, const struct libdivide_s64_t *denom) {
    uint8_t more = denom->more;
    uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK;

    if (!denom->magic) { // 如果 magic 字段为 0，使用移位路径
        uint64_t mask = (1ULL << shift) - 1;
        uint64_t uq = numer + ((numer >> 63) & mask);
        int64_t q = (int64_t)uq;
        q >>= shift;
        int64_t sign = (int8_t)more >> 7; // 必须是算术右移并且符号扩展
        q = (q ^ sign) - sign;
        return q;
    } else {
        uint64_t uq = (uint64_t)libdivide_mullhi_s64(denom->magic, numer);
        if (more & LIBDIVIDE_ADD_MARKER) {
            int64_t sign = (int8_t)more >> 7; // 必须是算术右移并且符号扩展
            uq += ((uint64_t)numer ^ sign) - sign;
        }
        int64_t q = (int64_t)uq;
        q >>= shift;
        q += (q < 0); // 如果 q 小于 0，则加 1
        return q;
    }
}

// 执行 libdivide_s64_branchfree_t 结构体定义的分支消除除法操作
int64_t libdivide_s64_branchfree_do(int64_t numer, const struct libdivide_s64_branchfree_t *denom) {
    uint8_t more = denom->more;
    uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK;
    int64_t sign = (int8_t)more >> 7; // 必须是算术右移并且符号扩展
    int64_t magic = denom->magic;
    int64_t q = libdivide_mullhi_s64(magic, numer);
    q += numer;

    uint64_t is_power_of_2 = (magic == 0);
    uint64_t q_sign = (uint64_t)(q >> 63);
    q += q_sign & ((1ULL << shift) - is_power_of_2);

    q >>= shift; // 算术右移
    q = (q ^ sign) - sign; // 根据符号扩展修正 q
    return q;
}

// 根据 libdivide_s64_t 结构体恢复被除数
int64_t libdivide_s64_recover(const struct libdivide_s64_t *denom) {
    uint8_t more = denom->more;
    uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK;
    if (denom->magic == 0) { // 如果 magic 字段为 0，使用移位路径
        uint64_t absD = 1ULL << shift;
        if (more & LIBDIVIDE_NEGATIVE_DIVISOR) {
            absD = -absD;
        }
        return (int64_t)absD;

        }
        uint64_t uabsD = denom->magic;
        return (int64_t)uabsD;
    }
    } else {
        // 如果条件不满足，则执行以下操作，处理除法操作

        // 检查是否为无符号数，无符号数的处理更加简单
        int negative_divisor = (more & LIBDIVIDE_NEGATIVE_DIVISOR);

        // 检查魔数是否被否定
        int magic_was_negated = (more & LIBDIVIDE_ADD_MARKER)
            ? denom->magic > 0 : denom->magic < 0;

        // 将魔数转换为无符号64位整数
        uint64_t d = (uint64_t)(magic_was_negated ? -denom->magic : denom->magic);

        // 左移操作，设置64位整数的高位
        uint64_t n_hi = 1ULL << shift, n_lo = 0;

        // 忽略的余数变量
        uint64_t rem_ignored;

        // 调用libdivide库中的128位除法函数，计算商q，其中参数为64位整数
        uint64_t q = libdivide_128_div_64_to_64(n_hi, n_lo, d, &rem_ignored);

        // 将q + 1转换为int64_t类型作为结果
        int64_t result = (int64_t)(q + 1);

        // 如果是负数除数，将结果取反
        if (negative_divisor) {
            result = -result;
        }

        // 返回最终结果
        return result;
    }
// 结束函数 libdivide_s64_branchfree_recover 的定义，它接受一个指向 libdivide_s64_branchfree_t 结构体的指针参数，并调用 libdivide_s64_recover 来恢复相同类型的 libdivide_s64_t 结构体。
int64_t libdivide_s64_branchfree_recover(const struct libdivide_s64_branchfree_t *denom) {
    return libdivide_s64_recover((const struct libdivide_s64_t *)denom);
}

// 如果定义了 LIBDIVIDE_AVX512 宏，则以下是针对 AVX512 指令集的函数定义：

// 以下四个函数是对于 AVX512 指令集的向量化除法运算函数，分别对应不同的数据类型和分支预测方式。
static inline __m512i libdivide_u32_do_vector(__m512i numers, const struct libdivide_u32_t *denom);
static inline __m512i libdivide_s32_do_vector(__m512i numers, const struct libdivide_s32_t *denom);
static inline __m512i libdivide_u64_do_vector(__m512i numers, const struct libdivide_u64_t *denom);
static inline __m512i libdivide_s64_do_vector(__m512i numers, const struct libdivide_s64_t *denom);

// 以下四个函数是对于 AVX512 指令集的分支预测优化后的向量化除法运算函数，同样对应不同的数据类型。
static inline __m512i libdivide_u32_branchfree_do_vector(__m512i numers, const struct libdivide_u32_branchfree_t *denom);
static inline __m512i libdivide_s32_branchfree_do_vector(__m512i numers, const struct libdivide_s32_branchfree_t *denom);
static inline __m512i libdivide_u64_branchfree_do_vector(__m512i numers, const struct libdivide_u64_branchfree_t *denom);
static inline __m512i libdivide_s64_branchfree_do_vector(__m512i numers, const struct libdivide_s64_branchfree_t *denom);

// 下面是一些内部实用函数的定义：

// 以下函数用于计算 __m512i 类型的向量中每个元素的符号位。它通过算术右移 63 位来获得每个元素的最高位的复制。
static inline __m512i libdivide_s64_signbits(__m512i v) {;
    return _mm512_srai_epi64(v, 63);
}

// 以下函数将 __m512i 类型的向量 v 中的每个元素右移 amt 位。
static inline __m512i libdivide_s64_shift_right_vector(__m512i v, int amt) {
    return _mm512_srai_epi64(v, amt);
}

// 这里假设 b 中包含一个重复的 32 位值。函数执行两个 __m512i 类型向量 a 和 b 的无符号整数乘法，并返回结果的高位部分。
static inline __m512i libdivide_mullhi_u32_vector(__m512i a, __m512i b) {
    __m512i hi_product_0Z2Z = _mm512_srli_epi64(_mm512_mul_epu32(a, b), 32);
    __m512i a1X3X = _mm512_srli_epi64(a, 32);
    __m512i mask = _mm512_set_epi32(-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0);
    __m512i hi_product_Z1Z3 = _mm512_and_si512(_mm512_mul_epu32(a1X3X, b), mask);
    return _mm512_or_si512(hi_product_0Z2Z, hi_product_Z1Z3);
}

// 假设 b 中包含一个重复的 32 位值。函数执行两个 __m512i 类型向量 a 和 b 的有符号整数乘法，并返回结果的高位部分。
static inline __m512i libdivide_mullhi_s32_vector(__m512i a, __m512i b) {
    __m512i hi_product_0Z2Z = _mm512_srli_epi64(_mm512_mul_epi32(a, b), 32);
    __m512i a1X3X = _mm512_srli_epi64(a, 32);
    __m512i mask = _mm512_set_epi32(-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0);
    __m512i hi_product_Z1Z3 = _mm512_and_si512(_mm512_mul_epi32(a1X3X, b), mask);
    return _mm512_or_si512(hi_product_0Z2Z, hi_product_Z1Z3);
}

// 这里假设 y 中包含一个重复的 64 位值。函数执行两个 __m512i 类型向量 x 和 y 的无符号整数乘法，并返回结果的高位部分。
static inline __m512i libdivide_mullhi_u64_vector(__m512i x, __m512i y) {
    __m512i lomask = _mm512_set1_epi64(0xffffffff);
    __m512i xh = _mm512_shuffle_epi32(x, (_MM_PERM_ENUM) 0xB1);
    __m512i yh = _mm512_shuffle_epi32(y, (_MM_PERM_ENUM) 0xB1);
    __m512i w0 = _mm512_mul_epu32(x, y);
    __m512i w1 = _mm512_mul_epu32(x, yh);
    __m512i w2 = _mm512_mul_epu32(xh, y);
    __m512i w3 = _mm512_mul_epu32(xh, yh);
    __m512i w0h = _mm512_srli_epi64(w0, 32);
    __m512i s1 = _mm512_add_epi64(w1, w0h);
    __m512i s1l = _mm512_and_si512(s1, lomask);
    // 将 s1 的每个元素逻辑右移 32 位，并存储在 s1h 中
    __m512i s1h = _mm512_srli_epi64(s1, 32);
    // 将 w2 和 s1l 的对应元素相加，并存储在 s2 中
    __m512i s2 = _mm512_add_epi64(w2, s1l);
    // 将 s2 的每个元素逻辑右移 32 位，并存储在 s2h 中
    __m512i s2h = _mm512_srli_epi64(s2, 32);
    // 将 w3 和 s1h 的对应元素相加，并存储在 hi 中
    __m512i hi = _mm512_add_epi64(w3, s1h);
    // 继续将 hi 和 s2h 的对应元素相加，并更新 hi
    hi = _mm512_add_epi64(hi, s2h);

    // 返回最终结果 hi
    return hi;
}

// 结束 libdivide_mullhi_s64_vector 函数的定义

static inline __m512i libdivide_mullhi_s64_vector(__m512i x, __m512i y) {
    // 调用 libdivide_mullhi_u64_vector 函数计算无符号整数 x 和 y 的乘积的高位
    __m512i p = libdivide_mullhi_u64_vector(x, y);
    // 计算 x 和 y 的符号位并求与，得到 t1
    __m512i t1 = _mm512_and_si512(libdivide_s64_signbits(x), y);
    // 计算 y 和 x 的符号位并求与，得到 t2
    __m512i t2 = _mm512_and_si512(libdivide_s64_signbits(y), x);
    // 从 p 中减去 t1
    p = _mm512_sub_epi64(p, t1);
    // 从 p 中减去 t2
    p = _mm512_sub_epi64(p, t2);
    // 返回 p
    return p;
}

////////// UINT32

// libdivide_u32_do_vector 函数的定义
__m512i libdivide_u32_do_vector(__m512i numers, const struct libdivide_u32_t *denom) {
    // 从 denom 结构中读取 more 字段
    uint8_t more = denom->more;
    // 如果 denom->magic 为 0
    if (!denom->magic) {
        // 对 numers 中的每个元素逻辑右移 more 位并返回结果
        return _mm512_srli_epi32(numers, more);
    }
    else {
        // 计算 numers 与 denom->magic 的乘积的高位并存入 q
        __m512i q = libdivide_mullhi_u32_vector(numers, _mm512_set1_epi32(denom->magic));
        // 如果 more 的 LIBDIVIDE_ADD_MARKER 标记为真
        if (more & LIBDIVIDE_ADD_MARKER) {
            // 从 numers 减去 q，结果右移 1 位后加上 q，并将结果右移 shift 位并返回
            uint32_t shift = more & LIBDIVIDE_32_SHIFT_MASK;
            __m512i t = _mm512_add_epi32(_mm512_srli_epi32(_mm512_sub_epi32(numers, q), 1), q);
            return _mm512_srli_epi32(t, shift);
        }
        else {
            // 对 q 中的每个元素逻辑右移 more 位并返回结果
            return _mm512_srli_epi32(q, more);
        }
    }
}

// libdivide_u32_branchfree_do_vector 函数的定义
__m512i libdivide_u32_branchfree_do_vector(__m512i numers, const struct libdivide_u32_branchfree_t *denom) {
    // 计算 numers 与 denom->magic 的乘积的高位并存入 q
    __m512i q = libdivide_mullhi_u32_vector(numers, _mm512_set1_epi32(denom->magic));
    // 计算 numers 减去 q，结果右移 1 位后加上 q，并将结果右移 denom->more 位并返回
    __m512i t = _mm512_add_epi32(_mm512_srli_epi32(_mm512_sub_epi32(numers, q), 1), q);
    return _mm512_srli_epi32(t, denom->more);
}

////////// UINT64

// libdivide_u64_do_vector 函数的定义
__m512i libdivide_u64_do_vector(__m512i numers, const struct libdivide_u64_t *denom) {
    // 从 denom 结构中读取 more 字段
    uint8_t more = denom->more;
    // 如果 denom->magic 为 0
    if (!denom->magic) {
        // 对 numers 中的每个元素逻辑右移 more 位并返回结果
        return _mm512_srli_epi64(numers, more);
    }
    else {
        // 计算 numers 与 denom->magic 的乘积的高位并存入 q
        __m512i q = libdivide_mullhi_u64_vector(numers, _mm512_set1_epi64(denom->magic));
        // 如果 more 的 LIBDIVIDE_ADD_MARKER 标记为真
        if (more & LIBDIVIDE_ADD_MARKER) {
            // 从 numers 减去 q，结果右移 1 位后加上 q，并将结果右移 shift 位并返回
            uint32_t shift = more & LIBDIVIDE_64_SHIFT_MASK;
            __m512i t = _mm512_add_epi64(_mm512_srli_epi64(_mm512_sub_epi64(numers, q), 1), q);
            return _mm512_srli_epi64(t, shift);
        }
        else {
            // 对 q 中的每个元素逻辑右移 more 位并返回结果
            return _mm512_srli_epi64(q, more);
        }
    }
}

// libdivide_u64_branchfree_do_vector 函数的定义
__m512i libdivide_u64_branchfree_do_vector(__m512i numers, const struct libdivide_u64_branchfree_t *denom) {
    // 计算 numers 与 denom->magic 的乘积的高位并存入 q
    __m512i q = libdivide_mullhi_u64_vector(numers, _mm512_set1_epi64(denom->magic));
    // 计算 numers 减去 q，结果右移 1 位后加上 q，并将结果右移 denom->more 位并返回
    __m512i t = _mm512_add_epi64(_mm512_srli_epi64(_mm512_sub_epi64(numers, q), 1), q);
    return _mm512_srli_epi64(t, denom->more);
}

////////// SINT32

// libdivide_s32_do_vector 函数的定义
__m512i libdivide_s32_do_vector(__m512i numers, const struct libdivide_s32_t *denom) {
    // 从 denom 结构中读取 more 字段
    uint8_t more = denom->more;
    // 如果分母的 magic 值为零，执行以下代码块
    if (!denom->magic) {
        // 提取更多位的值中的 LIBDIVIDE_32_SHIFT_MASK，并赋给 shift
        uint32_t shift = more & LIBDIVIDE_32_SHIFT_MASK;
        // 创建一个 mask，用于掩码操作
        uint32_t mask = (1U << shift) - 1;
        // 创建一个 roundToZeroTweak 的 SIMD 寄存器，用于舍入到零的调整
        __m512i roundToZeroTweak = _mm512_set1_epi32(mask);
        // 计算 q = numer + ((numer >> 31) & roundToZeroTweak);
        __m512i q = _mm512_add_epi32(numers, _mm512_and_si512(_mm512_srai_epi32(numers, 31), roundToZeroTweak));
        // 对 q 进行算术右移操作
        q = _mm512_srai_epi32(q, shift);
        // 创建一个 sign 寄存器，其中包含 more 的最高位符号位
        __m512i sign = _mm512_set1_epi32((int8_t)more >> 7);
        // 执行 q = (q ^ sign) - sign 的计算
        q = _mm512_sub_epi32(_mm512_xor_si512(q, sign), sign);
        // 返回计算结果 q
        return q;
    }
    // 如果分母的 magic 值非零，执行以下代码块
    else {
        // 使用 libdivide_mullhi_s32_vector 计算 q = numer * denom->magic 的高位结果
        __m512i q = libdivide_mullhi_s32_vector(numers, _mm512_set1_epi32(denom->magic));
        // 如果 more 中包含 LIBDIVIDE_ADD_MARKER 标记
        if (more & LIBDIVIDE_ADD_MARKER) {
            // 创建一个 sign 寄存器，其中包含 more 的最高位符号位
            __m512i sign = _mm512_set1_epi32((int8_t)more >> 7);
            // 执行 q += ((numer ^ sign) - sign) 的计算，进行算术右移操作
            q = _mm512_add_epi32(q, _mm512_sub_epi32(_mm512_xor_si512(numers, sign), sign));
        }
        // 对 q 进行算术右移操作，shift 由 more 的低位表示
        q = _mm512_srai_epi32(q, more & LIBDIVIDE_32_SHIFT_MASK);
        // 对 q 进行修正，q += (q < 0)，即当 q 小于零时加 1
        q = _mm512_add_epi32(q, _mm512_srli_epi32(q, 31));
        // 返回计算结果 q
        return q;
    }
}

__m512i libdivide_s32_branchfree_do_vector(__m512i numers, const struct libdivide_s32_branchfree_t *denom) {
    int32_t magic = denom->magic;  // 从结构体中获取魔数
    uint8_t more = denom->more;  // 从结构体中获取更多信息
    uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK;  // 从更多信息中提取出位移量

    // 必须是算术右移
    __m512i sign = _mm512_set1_epi32((int8_t)more >> 7);  // 创建一个包含符号位的向量

    // 计算乘法的高位结果
    __m512i q = libdivide_mullhi_s32_vector(numers, _mm512_set1_epi32(magic));
    q = _mm512_add_epi32(q, numers); // q += numers

    // 如果 q 是非负数，无需处理
    // 如果 q 是负数，根据是否是2的幂，要添加 (2**shift)-1 或 2**shift
    uint32_t is_power_of_2 = (magic == 0);
    __m512i q_sign = _mm512_srai_epi32(q, 31); // q_sign = q >> 31
    __m512i mask = _mm512_set1_epi32((1U << shift) - is_power_of_2);
    q = _mm512_add_epi32(q, _mm512_and_si512(q_sign, mask)); // q = q + (q_sign & mask)
    q = _mm512_srai_epi32(q, shift); // q >>= shift
    q = _mm512_sub_epi32(_mm512_xor_si512(q, sign), sign); // q = (q ^ sign) - sign
    return q;
}

////////// SINT64

__m512i libdivide_s64_do_vector(__m512i numers, const struct libdivide_s64_t *denom) {
    uint8_t more = denom->more;  // 从结构体中获取更多信息
    int64_t magic = denom->magic;  // 从结构体中获取魔数

    if (magic == 0) { // 如果是位移路径
        uint32_t shift = more & LIBDIVIDE_64_SHIFT_MASK;  // 从更多信息中提取出位移量
        uint64_t mask = (1ULL << shift) - 1;  // 创建掩码
        __m512i roundToZeroTweak = _mm512_set1_epi64(mask);

        // q = numer + ((numer >> 63) & roundToZeroTweak);
        __m512i q = _mm512_add_epi64(numers, _mm512_and_si512(libdivide_s64_signbits(numers), roundToZeroTweak));
        q = libdivide_s64_shift_right_vector(q, shift);

        __m512i sign = _mm512_set1_epi32((int8_t)more >> 7);  // 创建一个包含符号位的向量

        // q = (q ^ sign) - sign;
        q = _mm512_sub_epi64(_mm512_xor_si512(q, sign), sign);

        return q;
    } else {
        __m512i q = libdivide_mullhi_s64_vector(numers, _mm512_set1_epi64(magic));

        if (more & LIBDIVIDE_ADD_MARKER) {  // 如果有加法标记
            // 必须是算术右移
            __m512i sign = _mm512_set1_epi32((int8_t)more >> 7);

            // q += ((numer ^ sign) - sign);
            q = _mm512_add_epi64(q, _mm512_sub_epi64(_mm512_xor_si512(numers, sign), sign));
        }

        // q >>= denom->mult_path.shift
        q = libdivide_s64_shift_right_vector(q, more & LIBDIVIDE_64_SHIFT_MASK);
        q = _mm512_add_epi64(q, _mm512_srli_epi64(q, 63)); // q += (q < 0)

        return q;
    }
}

__m512i libdivide_s64_branchfree_do_vector(__m512i numers, const struct libdivide_s64_branchfree_t *denom) {
    int64_t magic = denom->magic;  // 从结构体中获取魔数
    uint8_t more = denom->more;  // 从结构体中获取更多信息
    uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK;  // 从更多信息中提取出位移量

    // 必须是算术右移
    __m512i sign = _mm512_set1_epi32((int8_t)more >> 7);  // 创建一个包含符号位的向量

    // 计算乘法的高位结果
    __m512i q = libdivide_mullhi_s64_vector(numers, _mm512_set1_epi64(magic));
    q = _mm512_add_epi64(q, numers); // q += numers

    // 如果 q 是非负数，无需处理.
    // 如果 q 是负数，我们希望根据 d 是否为 2 的幂来添加 (2**shift)-1 或 (2**shift)
    uint32_t is_power_of_2 = (magic == 0);
    // 使用 libdivide 库函数计算 q 的符号位
    __m512i q_sign = libdivide_s64_signbits(q); // q_sign = q >> 63
    // 创建一个掩码，根据 d 是否为 2 的幂来确定要添加的值
    __m512i mask = _mm512_set1_epi64((1ULL << shift) - is_power_of_2);
    // 将 q 的值增加 (q_sign & mask) 的结果
    q = _mm512_add_epi64(q, _mm512_and_si512(q_sign, mask)); // q = q + (q_sign & mask)
    // 使用 libdivide 库函数将 q 右移 shift 位
    q = libdivide_s64_shift_right_vector(q, shift); // q >>= shift
    // 对 q 应用逐位异或和减法操作，以获取最终的结果
    q = _mm512_sub_epi64(_mm512_xor_si512(q, sign), sign); // q = (q ^ sign) - sign
    // 返回计算结果 q
    return q;
}

#elif defined(LIBDIVIDE_AVX2)

// 声明一系列静态内联函数，用于处理 AVX2 指令集下的向量化除法操作

// 处理无符号32位整数的向量化除法操作
static inline __m256i libdivide_u32_do_vector(__m256i numers, const struct libdivide_u32_t *denom);

// 处理有符号32位整数的向量化除法操作
static inline __m256i libdivide_s32_do_vector(__m256i numers, const struct libdivide_s32_t *denom);

// 处理无符号64位整数的向量化除法操作
static inline __m256i libdivide_u64_do_vector(__m256i numers, const struct libdivide_u64_t *denom);

// 处理有符号64位整数的向量化除法操作
static inline __m256i libdivide_s64_do_vector(__m256i numers, const struct libdivide_s64_t *denom);

// 处理无符号32位整数的无分支向量化除法操作
static inline __m256i libdivide_u32_branchfree_do_vector(__m256i numers, const struct libdivide_u32_branchfree_t *denom);

// 处理有符号32位整数的无分支向量化除法操作
static inline __m256i libdivide_s32_branchfree_do_vector(__m256i numers, const struct libdivide_s32_branchfree_t *denom);

// 处理无符号64位整数的无分支向量化除法操作
static inline __m256i libdivide_u64_branchfree_do_vector(__m256i numers, const struct libdivide_u64_branchfree_t *denom);

// 处理有符号64位整数的无分支向量化除法操作
static inline __m256i libdivide_s64_branchfree_do_vector(__m256i numers, const struct libdivide_s64_branchfree_t *denom);

//////// Internal Utility Functions

// 实现 _mm256_srai_epi64(v, 63) 的功能（来自 AVX512）
static inline __m256i libdivide_s64_signbits(__m256i v) {
    // 复制高位，并生成符号位掩码
    __m256i hiBitsDuped = _mm256_shuffle_epi32(v, _MM_SHUFFLE(3, 3, 1, 1));
    __m256i signBits = _mm256_srai_epi32(hiBitsDuped, 31); // 右移获得符号位
    return signBits;
}

// 实现 _mm256_srai_epi64 的功能（来自 AVX512）
static inline __m256i libdivide_s64_shift_right_vector(__m256i v, int amt) {
    const int b = 64 - amt;
    __m256i m = _mm256_set1_epi64x(1ULL << (b - 1)); // 创建掩码
    __m256i x = _mm256_srli_epi64(v, amt); // 右移指定位数
    __m256i result = _mm256_sub_epi64(_mm256_xor_si256(x, m), m); // 计算结果
    return result;
}

// 这里假定 b 包含一个重复的32位值
static inline __m256i libdivide_mullhi_u32_vector(__m256i a, __m256i b) {
    // 计算高位乘积
    __m256i hi_product_0Z2Z = _mm256_srli_epi64(_mm256_mul_epu32(a, b), 32);
    __m256i a1X3X = _mm256_srli_epi64(a, 32);
    __m256i mask = _mm256_set_epi32(-1, 0, -1, 0, -1, 0, -1, 0);
    __m256i hi_product_Z1Z3 = _mm256_and_si256(_mm256_mul_epu32(a1X3X, b), mask);
    return _mm256_or_si256(hi_product_0Z2Z, hi_product_Z1Z3);
}

// 假定 b 是一个重复的32位值
static inline __m256i libdivide_mullhi_s32_vector(__m256i a, __m256i b) {
    // 计算有符号整数的高位乘积
    __m256i hi_product_0Z2Z = _mm256_srli_epi64(_mm256_mul_epi32(a, b), 32);
    __m256i a1X3X = _mm256_srli_epi64(a, 32);
    __m256i mask = _mm256_set_epi32(-1, 0, -1, 0, -1, 0, -1, 0);
    __m256i hi_product_Z1Z3 = _mm256_and_si256(_mm256_mul_epi32(a1X3X, b), mask);
    return _mm256_or_si256(hi_product_0Z2Z, hi_product_Z1Z3);
}

// 这里假定 y 包含一个重复的64位值
// 参考：https://stackoverflow.com/a/28827013
static inline __m256i libdivide_mullhi_u64_vector(__m256i x, __m256i y) {
    __m256i lomask = _mm256_set1_epi64x(0xffffffff);
    __m256i xh = _mm256_shuffle_epi32(x, 0xB1); // x0l, x0h, x1l, x1h
    __m256i yh = _mm256_shuffle_epi32(y, 0xB1); // y0l, y0h, y1l, y1h
    __m256i w0 = _mm256_mul_epu32(x, y); // x0l*y0l, x1l*y1l
    # 计算低位乘积 x0l*y0h 和 x1l*y1h，并将结果存储在 w1 中
    __m256i w1 = _mm256_mul_epu32(x, yh);
    
    # 计算高位乘积 x0h*y0l 和 x1h*y0l，并将结果存储在 w2 中
    __m256i w2 = _mm256_mul_epu32(xh, y);
    
    # 计算高位乘积 x0h*y0h 和 x1h*y1h，并将结果存储在 w3 中
    __m256i w3 = _mm256_mul_epu32(xh, yh);
    
    # 将 w0 向右移动 32 位，获取高位部分存储在 w0h 中
    __m256i w0h = _mm256_srli_epi64(w0, 32);
    
    # 将 w1 和 w0h 相加，得到 s1
    __m256i s1 = _mm256_add_epi64(w1, w0h);
    
    # 将 s1 与 lomask 按位与，获取低位部分存储在 s1l 中
    __m256i s1l = _mm256_and_si256(s1, lomask);
    
    # 将 s1 向右移动 32 位，获取高位部分存储在 s1h 中
    __m256i s1h = _mm256_srli_epi64(s1, 32);
    
    # 将 w2 和 s1l 相加，得到 s2
    __m256i s2 = _mm256_add_epi64(w2, s1l);
    
    # 将 s2 向右移动 32 位，获取高位部分存储在 s2h 中
    __m256i s2h = _mm256_srli_epi64(s2, 32);
    
    # 将 w3 和 s1h 相加，然后加上 s2h，得到最终结果存储在 hi 中
    __m256i hi = _mm256_add_epi64(w3, s1h);
    hi = _mm256_add_epi64(hi, s2h);
    
    # 返回最终计算结果 hi
    return hi;
}

// 结束静态内联函数 libdivide_mullhi_s64_vector 的定义

// 使用无符号 64 位整数向量进行乘法高位运算，返回一个 256 位整数向量
static inline __m256i libdivide_mullhi_s64_vector(__m256i x, __m256i y) {
    // 调用 libdivide_mullhi_u64_vector 函数执行无符号 64 位整数向量的乘法高位运算
    __m256i p = libdivide_mullhi_u64_vector(x, y);
    // 计算 x 和 y 的符号位，并与乘法结果相与，存储到 t1 和 t2 中
    __m256i t1 = _mm256_and_si256(libdivide_s64_signbits(x), y);
    __m256i t2 = _mm256_and_si256(libdivide_s64_signbits(y), x);
    // 从乘法结果中减去 t1 和 t2，得到最终结果 p
    p = _mm256_sub_epi64(p, t1);
    p = _mm256_sub_epi64(p, t2);
    // 返回乘法高位运算结果
    return p;
}

////////// UINT32

// 使用无符号 32 位整数向量执行除法运算，返回一个 256 位整数向量
__m256i libdivide_u32_do_vector(__m256i numers, const struct libdivide_u32_t *denom) {
    // 获取结构体 denom 中的 more 字段
    uint8_t more = denom->more;
    // 如果 magic 字段为 0，则执行逻辑右移操作
    if (!denom->magic) {
        return _mm256_srli_epi32(numers, more);
    }
    else {
        // 否则，执行乘法高位运算并右移操作
        __m256i q = libdivide_mullhi_u32_vector(numers, _mm256_set1_epi32(denom->magic));
        // 如果 more 中包含 LIBDIVIDE_ADD_MARKER 标志位
        if (more & LIBDIVIDE_ADD_MARKER) {
            // 计算 t = ((numer - q) >> 1) + q
            // 然后再右移 denom->shift 位
            uint32_t shift = more & LIBDIVIDE_32_SHIFT_MASK;
            __m256i t = _mm256_add_epi32(_mm256_srli_epi32(_mm256_sub_epi32(numers, q), 1), q);
            return _mm256_srli_epi32(t, shift);
        }
        else {
            // 否则，直接右移 more 位
            return _mm256_srli_epi32(q, more);
        }
    }
}

// 使用无符号 32 位整数向量执行分支无关的除法运算，返回一个 256 位整数向量
__m256i libdivide_u32_branchfree_do_vector(__m256i numers, const struct libdivide_u32_branchfree_t *denom) {
    // 执行乘法高位运算
    __m256i q = libdivide_mullhi_u32_vector(numers, _mm256_set1_epi32(denom->magic));
    // 计算 t = ((numer - q) >> 1) + q，然后右移 denom->more 位
    __m256i t = _mm256_add_epi32(_mm256_srli_epi32(_mm256_sub_epi32(numers, q), 1), q);
    return _mm256_srli_epi32(t, denom->more);
}

////////// UINT64

// 使用无符号 64 位整数向量执行除法运算，返回一个 256 位整数向量
__m256i libdivide_u64_do_vector(__m256i numers, const struct libdivide_u64_t *denom) {
    // 获取结构体 denom 中的 more 字段
    uint8_t more = denom->more;
    // 如果 magic 字段为 0，则执行逻辑右移操作
    if (!denom->magic) {
        return _mm256_srli_epi64(numers, more);
    }
    else {
        // 否则，执行乘法高位运算并右移操作
        __m256i q = libdivide_mullhi_u64_vector(numers, _mm256_set1_epi64x(denom->magic));
        // 如果 more 中包含 LIBDIVIDE_ADD_MARKER 标志位
        if (more & LIBDIVIDE_ADD_MARKER) {
            // 计算 t = ((numer - q) >> 1) + q
            // 然后再右移 denom->shift 位
            uint32_t shift = more & LIBDIVIDE_64_SHIFT_MASK;
            __m256i t = _mm256_add_epi64(_mm256_srli_epi64(_mm256_sub_epi64(numers, q), 1), q);
            return _mm256_srli_epi64(t, shift);
        }
        else {
            // 否则，直接右移 more 位
            return _mm256_srli_epi64(q, more);
        }
    }
}

// 使用无符号 64 位整数向量执行分支无关的除法运算，返回一个 256 位整数向量
__m256i libdivide_u64_branchfree_do_vector(__m256i numers, const struct libdivide_u64_branchfree_t *denom) {
    // 执行乘法高位运算
    __m256i q = libdivide_mullhi_u64_vector(numers, _mm256_set1_epi64x(denom->magic));
    // 计算 t = ((numer - q) >> 1) + q，然后右移 denom->more 位
    __m256i t = _mm256_add_epi64(_mm256_srli_epi64(_mm256_sub_epi64(numers, q), 1), q);
    return _mm256_srli_epi64(t, denom->more);
}

////////// SINT32

// 使用有符号 32 位整数向量执行除法运算，返回一个 256 位整数向量
__m256i libdivide_s32_do_vector(__m256i numers, const struct libdivide_s32_t *denom) {
    // 获取结构体 denom 中的 more 字段
    uint8_t more = denom->more;

// 如果 more 中包含 LIBDIVIDE_ADD_MARKER 标志位
if (!denom->magic) {
    // 如果 magic 字段为 0，则执行逻辑右移操作
    return _mm256_srli_epi32(numers, more);
} else {
    // 否则，执行乘法高位运算并右移操作
__m256i q = libdivide_mullhi_u32_vector(numers, _mm256_set1_epi32(denom->magic));
//  uint32_t t = ((numer - q) >> 1) + q
    // 检查分母的魔数是否为零，如果为零则执行以下操作
    if (!denom->magic) {
        // 从 more 中提取 LIBDIVIDE_32_SHIFT_MASK，表示移位数
        uint32_t shift = more & LIBDIVIDE_32_SHIFT_MASK;
        // 创建一个掩码，用于取低 shift 位的数
        uint32_t mask = (1U << shift) - 1;
        // 创建一个全为 mask 的 __m256i 向量，用于向下取整使用
        __m256i roundToZeroTweak = _mm256_set1_epi32(mask);
        // 计算 q = numer + ((numer >> 31) & roundToZeroTweak);
        __m256i q = _mm256_add_epi32(numers, _mm256_and_si256(_mm256_srai_epi32(numers, 31), roundToZeroTweak));
        // 对 q 进行右移操作
        q = _mm256_srai_epi32(q, shift);
        // 创建一个符号向量，用于处理负数的情况
        __m256i sign = _mm256_set1_epi32((int8_t)more >> 7);
        // 执行 q = (q ^ sign) - sign;
        q = _mm256_sub_epi32(_mm256_xor_si256(q, sign), sign);
        // 返回计算结果 q
        return q;
    }
    else {
        // 使用 libdivide_mullhi_s32_vector 函数计算乘法高位结果，结果存入 q 中
        __m256i q = libdivide_mullhi_s32_vector(numers, _mm256_set1_epi32(denom->magic));
        // 如果 more 中包含 LIBDIVIDE_ADD_MARKER 标记，则执行以下操作
        if (more & LIBDIVIDE_ADD_MARKER) {
             // more 的高位表示算术右移
            __m256i sign = _mm256_set1_epi32((int8_t)more >> 7);
             // q += ((numer ^ sign) - sign);
            q = _mm256_add_epi32(q, _mm256_sub_epi32(_mm256_xor_si256(numers, sign), sign));
        }
        // 对 q 进行右移操作
        q = _mm256_srai_epi32(q, more & LIBDIVIDE_32_SHIFT_MASK);
        // q += (q < 0)，处理负数情况下的修正
        q = _mm256_add_epi32(q, _mm256_srli_epi32(q, 31));
        // 返回计算结果 q
        return q;
    }
}

__m256i libdivide_s32_branchfree_do_vector(__m256i numers, const struct libdivide_s32_branchfree_t *denom) {
    int32_t magic = denom->magic;  // 从结构体中获取魔数 magic
    uint8_t more = denom->more;    // 从结构体中获取更多信息字段
    uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK;  // 计算需要移位的位数，通过与操作获取

    // 必须是算术右移
    __m256i sign = _mm256_set1_epi32((int8_t)more >> 7);  // 根据 more 的最高位设置符号位

    __m256i q = libdivide_mullhi_s32_vector(numers, _mm256_set1_epi32(magic));  // 计算高位乘积
    q = _mm256_add_epi32(q, numers);  // q += numers

    // 如果 q 是非负数，无需处理
    // 如果 q 是负数，根据是否是2的幂，添加 (2**shift)-1 或者 (2**shift)
    uint32_t is_power_of_2 = (magic == 0);
    __m256i q_sign = _mm256_srai_epi32(q, 31);  // q_sign = q >> 31
    __m256i mask = _mm256_set1_epi32((1U << shift) - is_power_of_2);
    q = _mm256_add_epi32(q, _mm256_and_si256(q_sign, mask));  // q = q + (q_sign & mask)
    q = _mm256_srai_epi32(q, shift);  // q >>= shift
    q = _mm256_sub_epi32(_mm256_xor_si256(q, sign), sign);  // q = (q ^ sign) - sign

    return q;
}

////////// SINT64

__m256i libdivide_s64_do_vector(__m256i numers, const struct libdivide_s64_t *denom) {
    uint8_t more = denom->more;  // 从结构体中获取更多信息字段
    int64_t magic = denom->magic;  // 从结构体中获取魔数

    if (magic == 0) {  // 如果魔数为0，使用移位路径
        uint32_t shift = more & LIBDIVIDE_64_SHIFT_MASK;  // 计算需要移位的位数
        uint64_t mask = (1ULL << shift) - 1;  // 计算掩码
        __m256i roundToZeroTweak = _mm256_set1_epi64x(mask);

        // q = numer + ((numer >> 63) & roundToZeroTweak);
        __m256i q = _mm256_add_epi64(numers, _mm256_and_si256(libdivide_s64_signbits(numers), roundToZeroTweak));
        q = libdivide_s64_shift_right_vector(q, shift);

        __m256i sign = _mm256_set1_epi32((int8_t)more >> 7);  // 根据 more 的最高位设置符号位

        q = _mm256_sub_epi64(_mm256_xor_si256(q, sign), sign);  // q = (q ^ sign) - sign

        return q;
    }
    else {  // 非移位路径
        __m256i q = libdivide_mullhi_s64_vector(numers, _mm256_set1_epi64x(magic));  // 计算高位乘积

        if (more & LIBDIVIDE_ADD_MARKER) {
            // 必须是算术右移
            __m256i sign = _mm256_set1_epi32((int8_t)more >> 7);
            q = _mm256_add_epi64(q, _mm256_sub_epi64(_mm256_xor_si256(numers, sign), sign));
        }

        q = libdivide_s64_shift_right_vector(q, more & LIBDIVIDE_64_SHIFT_MASK);  // q >>= denom->mult_path.shift
        q = _mm256_add_epi64(q, _mm256_srli_epi64(q, 63));  // q += (q < 0)

        return q;
    }
}

__m256i libdivide_s64_branchfree_do_vector(__m256i numers, const struct libdivide_s64_branchfree_t *denom) {
    int64_t magic = denom->magic;  // 从结构体中获取魔数
    uint8_t more = denom->more;    // 从结构体中获取更多信息字段
    uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK;  // 计算需要移位的位数

    // 必须是算术右移
    __m256i sign = _mm256_set1_epi32((int8_t)more >> 7);  // 根据 more 的最高位设置符号位

    __m256i q = libdivide_mullhi_s64_vector(numers, _mm256_set1_epi64x(magic));  // 计算高位乘积
    q = _mm256_add_epi64(q, numers);  // q += numers

    // 如果 q 是非负数，无需处理.
    // 如果 q 是负数，我们希望根据 d 是否为2的幂来添加 (2**shift)-1 或 (2**shift)
    uint32_t is_power_of_2 = (magic == 0);  // 检查 magic 是否为零，以确定 d 是否为2的幂
    // 计算 q 的符号位
    __m256i q_sign = libdivide_s64_signbits(q);
    // 创建一个掩码，用于根据 d 是否为2的幂来选择要添加的值
    __m256i mask = _mm256_set1_epi64x((1ULL << shift) - is_power_of_2);
    // 将 q 增加 (q_sign & mask) 的结果
    q = _mm256_add_epi64(q, _mm256_and_si256(q_sign, mask));
    // 使用 libdivide_s64_shift_right_vector 函数将 q 右移 shift 位
    q = libdivide_s64_shift_right_vector(q, shift);
    // 执行 q 的按位异或和减法操作，计算最终的结果
    q = _mm256_sub_epi64(_mm256_xor_si256(q, sign), sign);
    // 返回计算后的结果 q
    return q;
// 结束前面的条件分支代码块
}

// 如果定义了 LIBDIVIDE_SSE2，以下是针对 SSE2 的函数声明

// 定义了 libdivide_u32_do_vector 函数，处理无符号 32 位整数向量除法
static inline __m128i libdivide_u32_do_vector(__m128i numers, const struct libdivide_u32_t *denom);

// 定义了 libdivide_s32_do_vector 函数，处理有符号 32 位整数向量除法
static inline __m128i libdivide_s32_do_vector(__m128i numers, const struct libdivide_s32_t *denom);

// 定义了 libdivide_u64_do_vector 函数，处理无符号 64 位整数向量除法
static inline __m128i libdivide_u64_do_vector(__m128i numers, const struct libdivide_u64_t *denom);

// 定义了 libdivide_s64_do_vector 函数，处理有符号 64 位整数向量除法
static inline __m128i libdivide_s64_do_vector(__m128i numers, const struct libdivide_s64_t *denom);

// 定义了 libdivide_u32_branchfree_do_vector 函数，处理无符号 32 位整数向量分支无关除法
static inline __m128i libdivide_u32_branchfree_do_vector(__m128i numers, const struct libdivide_u32_branchfree_t *denom);

// 定义了 libdivide_s32_branchfree_do_vector 函数，处理有符号 32 位整数向量分支无关除法
static inline __m128i libdivide_s32_branchfree_do_vector(__m128i numers, const struct libdivide_s32_branchfree_t *denom);

// 定义了 libdivide_u64_branchfree_do_vector 函数，处理无符号 64 位整数向量分支无关除法
static inline __m128i libdivide_u64_branchfree_do_vector(__m128i numers, const struct libdivide_u64_branchfree_t *denom);

// 定义了 libdivide_s64_branchfree_do_vector 函数，处理有符号 64 位整数向量分支无关除法
static inline __m128i libdivide_s64_branchfree_do_vector(__m128i numers, const struct libdivide_s64_branchfree_t *denom);

//////// 内部实用函数

// 实现了 _mm_srai_epi64(v, 63) 的功能（来自 AVX512）
static inline __m128i libdivide_s64_signbits(__m128i v) {
    // 复制高位到每个位置
    __m128i hiBitsDuped = _mm_shuffle_epi32(v, _MM_SHUFFLE(3, 3, 1, 1));
    // 右移并提取符号位
    __m128i signBits = _mm_srai_epi32(hiBitsDuped, 31);
    return signBits;
}

// 实现了 _mm_srai_epi64 的功能（来自 AVX512）
static inline __m128i libdivide_s64_shift_right_vector(__m128i v, int amt) {
    const int b = 64 - amt;
    __m128i m = _mm_set1_epi64x(1ULL << (b - 1));
    __m128i x = _mm_srli_epi64(v, amt);
    __m128i result = _mm_sub_epi64(_mm_xor_si128(x, m), m);
    return result;
}

// 这里假设 b 包含一个重复的 32 位值
static inline __m128i libdivide_mullhi_u32_vector(__m128i a, __m128i b) {
    // 高位乘法结果，右移并合并
    __m128i hi_product_0Z2Z = _mm_srli_epi64(_mm_mul_epu32(a, b), 32);
    __m128i a1X3X = _mm_srli_epi64(a, 32);
    __m128i mask = _mm_set_epi32(-1, 0, -1, 0);
    __m128i hi_product_Z1Z3 = _mm_and_si128(_mm_mul_epu32(a1X3X, b), mask);
    return _mm_or_si128(hi_product_0Z2Z, hi_product_Z1Z3);
}

// SSE2 没有带符号乘法指令，但我们可以将无符号转换为带符号。这里假设 b 是一个重复的 32 位值
static inline __m128i libdivide_mullhi_s32_vector(__m128i a, __m128i b) {
    __m128i p = libdivide_mullhi_u32_vector(a, b);
    // t1 = (a >> 31) & y，算术右移
    __m128i t1 = _mm_and_si128(_mm_srai_epi32(a, 31), b);
    __m128i t2 = _mm_and_si128(_mm_srai_epi32(b, 31), a);
    p = _mm_sub_epi32(p, t1);
    p = _mm_sub_epi32(p, t2);
    return p;
}

// 这里假设 y 包含一个重复的 64 位值
static inline __m128i libdivide_mullhi_u64_vector(__m128i x, __m128i y) {
    __m128i lomask = _mm_set1_epi64x(0xffffffff);
    __m128i xh = _mm_shuffle_epi32(x, 0xB1);        // x0l, x0h, x1l, x1h
    __m128i yh = _mm_shuffle_epi32(y, 0xB1);        // y0l, y0h, y1l, y1h
    __m128i w0 = _mm_mul_epu32(x, y);               // x0l*y0l, x1l*y1l
    // 计算两个 64 位整数向量 x 和 y 的乘积的高位部分
    __m128i w1 = _mm_mul_epu32(x, yh);              // 计算 x0l*y0h, x1l*y1h
    __m128i w2 = _mm_mul_epu32(xh, y);              // 计算 x0h*y0l, x1h*y0l
    __m128i w3 = _mm_mul_epu32(xh, yh);             // 计算 x0h*y0h, x1h*y1h
    
    // 将 w0 向右移动 32 位，得到高位部分 w0h
    __m128i w0h = _mm_srli_epi64(w0, 32);
    
    // 计算 s1 = w1 + w0h
    __m128i s1 = _mm_add_epi64(w1, w0h);
    
    // 取 s1 的低位部分并与 lomask 进行按位与操作，得到 s1 的低位 s1l
    __m128i s1l = _mm_and_si128(s1, lomask);
    
    // 将 s1 向右移动 32 位，得到 s1 的高位 s1h
    __m128i s1h = _mm_srli_epi64(s1, 32);
    
    // 计算 s2 = w2 + s1l
    __m128i s2 = _mm_add_epi64(w2, s1l);
    
    // 将 s2 向右移动 32 位，得到 s2 的高位 s2h
    __m128i s2h = _mm_srli_epi64(s2, 32);
    
    // 计算 hi = w3 + s1h + s2h
    __m128i hi = _mm_add_epi64(w3, s1h);
    hi = _mm_add_epi64(hi, s2h);
    
    // 返回计算结果 hi
    return hi;
}

// 结束函数 libdivide_mullhi_s64_vector 的定义

// 计算有符号64位整数的乘法高位结果，返回128位整数结果
static inline __m128i libdivide_mullhi_s64_vector(__m128i x, __m128i y) {
    // 调用无符号64位整数乘法高位计算函数得到结果
    __m128i p = libdivide_mullhi_u64_vector(x, y);
    // 计算 x 和 y 的符号位，并与之前结果进行与运算
    __m128i t1 = _mm_and_si128(libdivide_s64_signbits(x), y);
    __m128i t2 = _mm_and_si128(libdivide_s64_signbits(y), x);
    // 结果减去符号位处理后的值
    p = _mm_sub_epi64(p, t1);
    p = _mm_sub_epi64(p, t2);
    // 返回最终结果
    return p;
}

////////// UINT32

// 执行32位无符号整数除法的向量化计算
__m128i libdivide_u32_do_vector(__m128i numers, const struct libdivide_u32_t *denom) {
    uint8_t more = denom->more;
    // 如果 magic 为零，直接右移操作数
    if (!denom->magic) {
        return _mm_srli_epi32(numers, more);
    }
    else {
        // 计算乘法高位结果
        __m128i q = libdivide_mullhi_u32_vector(numers, _mm_set1_epi32(denom->magic));
        // 如果 more 的标记位为 LIBDIVIDE_ADD_MARKER
        if (more & LIBDIVIDE_ADD_MARKER) {
            // 计算 t = ((numer - q) >> 1) + q
            // 然后右移 shift 位
            uint32_t shift = more & LIBDIVIDE_32_SHIFT_MASK;
            __m128i t = _mm_add_epi32(_mm_srli_epi32(_mm_sub_epi32(numers, q), 1), q);
            return _mm_srli_epi32(t, shift);
        }
        else {
            // 否则直接右移 q
            return _mm_srli_epi32(q, more);
        }
    }
}

// 执行32位无符号整数分支无关的向量化除法计算
__m128i libdivide_u32_branchfree_do_vector(__m128i numers, const struct libdivide_u32_branchfree_t *denom) {
    // 计算乘法高位结果
    __m128i q = libdivide_mullhi_u32_vector(numers, _mm_set1_epi32(denom->magic));
    // 计算 t = ((numer - q) >> 1) + q，并右移 denom->more 位
    __m128i t = _mm_add_epi32(_mm_srli_epi32(_mm_sub_epi32(numers, q), 1), q);
    return _mm_srli_epi32(t, denom->more);
}

////////// UINT64

// 执行64位无符号整数除法的向量化计算
__m128i libdivide_u64_do_vector(__m128i numers, const struct libdivide_u64_t *denom) {
    uint8_t more = denom->more;
    // 如果 magic 为零，直接右移操作数
    if (!denom->magic) {
        return _mm_srli_epi64(numers, more);
    }
    else {
        // 计算乘法高位结果
        __m128i q = libdivide_mullhi_u64_vector(numers, _mm_set1_epi64x(denom->magic));
        // 如果 more 的标记位为 LIBDIVIDE_ADD_MARKER
        if (more & LIBDIVIDE_ADD_MARKER) {
            // 计算 t = ((numer - q) >> 1) + q
            // 然后右移 shift 位
            uint32_t shift = more & LIBDIVIDE_64_SHIFT_MASK;
            __m128i t = _mm_add_epi64(_mm_srli_epi64(_mm_sub_epi64(numers, q), 1), q);
            return _mm_srli_epi64(t, shift);
        }
        else {
            // 否则直接右移 q
            return _mm_srli_epi64(q, more);
        }
    }
}

// 执行64位无符号整数分支无关的向量化除法计算
__m128i libdivide_u64_branchfree_do_vector(__m128i numers, const struct libdivide_u64_branchfree_t *denom) {
    // 计算乘法高位结果
    __m128i q = libdivide_mullhi_u64_vector(numers, _mm_set1_epi64x(denom->magic));
    // 计算 t = ((numer - q) >> 1) + q，并右移 denom->more 位
    __m128i t = _mm_add_epi64(_mm_srli_epi64(_mm_sub_epi64(numers, q), 1), q);
    return _mm_srli_epi64(t, denom->more);
}

////////// SINT32

// 执行32位有符号整数除法的向量化计算
__m128i libdivide_s32_do_vector(__m128i numers, const struct libdivide_s32_t *denom) {
    uint8_t more = denom->more;
    // 检查分母的魔数是否为零
    if (!denom->magic) {
        // 从 more 中获取 LIBDIVIDE_32_SHIFT_MASK，用作移位操作的位数
        uint32_t shift = more & LIBDIVIDE_32_SHIFT_MASK;
        // 创建一个 mask，用于获取最低 shift 位的掩码
        uint32_t mask = (1U << shift) - 1;
        // 创建一个 __m128i 类型的常数，用 mask 初始化，用于舍入向零
        __m128i roundToZeroTweak = _mm_set1_epi32(mask);
        // 计算 q = numer + ((numer >> 31) & roundToZeroTweak);
        __m128i q = _mm_add_epi32(numers, _mm_and_si128(_mm_srai_epi32(numers, 31), roundToZeroTweak));
        // 对 q 进行算术右移 shift 位
        q = _mm_srai_epi32(q, shift);
        // 创建一个 sign 常数，从 more 中获取并符号扩展
        __m128i sign = _mm_set1_epi32((int8_t)more >> 7);
        // 计算 q = (q ^ sign) - sign;
        q = _mm_sub_epi32(_mm_xor_si128(q, sign), sign);
        // 返回计算结果 q
        return q;
    }
    else {
        // 使用 libdivide_mullhi_s32_vector 计算 q = numer * denom->magic 的高位
        __m128i q = libdivide_mullhi_s32_vector(numers, _mm_set1_epi32(denom->magic));
        // 检查是否需要添加标记
        if (more & LIBDIVIDE_ADD_MARKER) {
             // 从 more 中获取符号位并扩展为整数
            __m128i sign = _mm_set1_epi32((int8_t)more >> 7);
             // q += ((numer ^ sign) - sign);
            q = _mm_add_epi32(q, _mm_sub_epi32(_mm_xor_si128(numers, sign), sign));
        }
        // 对 q 进行算术右移，从 more 中获取移位的位数
        q = _mm_srai_epi32(q, more & LIBDIVIDE_32_SHIFT_MASK);
        // 如果 q < 0，则 q += 1（将符号位扩展到更高位）
        q = _mm_add_epi32(q, _mm_srli_epi32(q, 31)); // q += (q < 0)
        // 返回计算结果 q
        return q;
    }
}

__m128i libdivide_s32_branchfree_do_vector(__m128i numers, const struct libdivide_s32_branchfree_t *denom) {
    int32_t magic = denom->magic;  // 从结构体中获取魔数
    uint8_t more = denom->more;  // 从结构体中获取更多信息
    uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK;  // 使用位掩码提取移位量

     // 必须是算术右移
    __m128i sign = _mm_set1_epi32((int8_t)more >> 7);  // 设置符号位掩码
    __m128i q = libdivide_mullhi_s32_vector(numers, _mm_set1_epi32(magic));  // 计算乘法的高位部分
    q = _mm_add_epi32(q, numers); // q += numers

    // 如果 q 是非负数，则不需要处理
    // 如果 q 是负数，我们希望根据 d 是否为2的幂，添加 (2**shift)-1 或者 (2**shift)
    uint32_t is_power_of_2 = (magic == 0);  // 判断魔数是否为0
    __m128i q_sign = _mm_srai_epi32(q, 31); // q_sign = q >> 31
    __m128i mask = _mm_set1_epi32((1U << shift) - is_power_of_2);  // 根据移位量生成掩码
    q = _mm_add_epi32(q, _mm_and_si128(q_sign, mask)); // q = q + (q_sign & mask)
    q = _mm_srai_epi32(q, shift); // q >>= shift
    q = _mm_sub_epi32(_mm_xor_si128(q, sign), sign); // q = (q ^ sign) - sign
    return q;
}

////////// SINT64

__m128i libdivide_s64_do_vector(__m128i numers, const struct libdivide_s64_t *denom) {
    uint8_t more = denom->more;  // 从结构体中获取更多信息
    int64_t magic = denom->magic;  // 从结构体中获取魔数
    if (magic == 0) { // shift path 如果魔数为0，则使用移位路径
        uint32_t shift = more & LIBDIVIDE_64_SHIFT_MASK;  // 使用位掩码提取移位量
        uint64_t mask = (1ULL << shift) - 1;  // 根据移位量生成掩码
        __m128i roundToZeroTweak = _mm_set1_epi64x(mask);  // 设置舍入到零的调整值

        // q = numer + ((numer >> 63) & roundToZeroTweak);
        __m128i q = _mm_add_epi64(numers, _mm_and_si128(libdivide_s64_signbits(numers), roundToZeroTweak));  // 执行加法

        q = libdivide_s64_shift_right_vector(q, shift);  // 右移操作
        __m128i sign = _mm_set1_epi32((int8_t)more >> 7);  // 设置符号位掩码
         // q = (q ^ sign) - sign;
        q = _mm_sub_epi64(_mm_xor_si128(q, sign), sign);  // 执行减法
        return q;
    }
    else {
        __m128i q = libdivide_mullhi_s64_vector(numers, _mm_set1_epi64x(magic));  // 执行乘法的高位计算
        if (more & LIBDIVIDE_ADD_MARKER) {  // 如果设置了加法标记
            // 必须是算术右移
            __m128i sign = _mm_set1_epi32((int8_t)more >> 7);  // 设置符号位掩码
            // q += ((numer ^ sign) - sign);
            q = _mm_add_epi64(q, _mm_sub_epi64(_mm_xor_si128(numers, sign), sign));  // 执行加法
        }
        // q >>= denom->mult_path.shift
        q = libdivide_s64_shift_right_vector(q, more & LIBDIVIDE_64_SHIFT_MASK);  // 右移操作
        q = _mm_add_epi64(q, _mm_srli_epi64(q, 63)); // q += (q < 0)
        return q;
    }
}

__m128i libdivide_s64_branchfree_do_vector(__m128i numers, const struct libdivide_s64_branchfree_t *denom) {
    int64_t magic = denom->magic;  // 从结构体中获取魔数
    uint8_t more = denom->more;  // 从结构体中获取更多信息
    uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK;  // 使用位掩码提取移位量
    // 必须是算术右移
    __m128i sign = _mm_set1_epi32((int8_t)more >> 7);  // 设置符号位掩码

     // libdivide_mullhi_s64(numers, magic);
    __m128i q = libdivide_mullhi_s64_vector(numers, _mm_set1_epi64x(magic));  // 执行乘法的高位计算
    q = _mm_add_epi64(q, numers); // q += numers

    // 如果 q 是非负数，则不需要处理。
    // 如果 q 是负数，我们希望根据 d 是否为2的幂，添加 (2**shift)-1 if d is
    // 检查 magic 是否为 0，如果是，则 is_power_of_2 为 1，否则为 0
    uint32_t is_power_of_2 = (magic == 0);
    // 计算 q 的符号位，并右移 63 位得到 q_sign
    __m128i q_sign = libdivide_s64_signbits(q);
    // 创建一个掩码 mask，用于对 q 进行修正，使其为 2 的 shift 次幂
    __m128i mask = _mm_set1_epi64x((1ULL << shift) - is_power_of_2);
    // 将 q 和 (q_sign & mask) 进行按位与操作后加到 q 上
    q = _mm_add_epi64(q, _mm_and_si128(q_sign, mask));
    // 对 q 进行右移 shift 位操作
    q = libdivide_s64_shift_right_vector(q, shift);
    // 对 q 执行异或运算后减去 sign，得到最终结果
    q = _mm_sub_epi64(_mm_xor_si128(q, sign), sign);
    // 返回处理后的 q
    return q;
#ifdef __cplusplus

// 如果正在使用 C++ 编译器，则进入 C++ 相关的部分


// The C++ divider class is templated on both an integer type
// (like uint64_t) and an algorithm type.
// * BRANCHFULL is the default algorithm type.
// * BRANCHFREE is the branchfree algorithm type.

// 定义了一个 C++ 分割器类，该类模板化于整数类型（如 uint64_t）和算法类型。
// * BRANCHFULL 是默认的算法类型。
// * BRANCHFREE 是无分支算法类型。


enum {
    BRANCHFULL,
    BRANCHFREE
};

// 定义了两个枚举常量 BRANCHFULL 和 BRANCHFREE，分别代表默认算法和无分支算法。


#if defined(LIBDIVIDE_AVX512)
    #define LIBDIVIDE_VECTOR_TYPE __m512i
#elif defined(LIBDIVIDE_AVX2)
    #define LIBDIVIDE_VECTOR_TYPE __m256i
#elif defined(LIBDIVIDE_SSE2)
    #define LIBDIVIDE_VECTOR_TYPE __m128i
#endif

// 根据编译器定义的 SIMD 扩展，选择适当的向量类型。


#if !defined(LIBDIVIDE_VECTOR_TYPE)
    #define LIBDIVIDE_DIVIDE_VECTOR(ALGO)
#else
    #define LIBDIVIDE_DIVIDE_VECTOR(ALGO) \
        LIBDIVIDE_VECTOR_TYPE divide(LIBDIVIDE_VECTOR_TYPE n) const { \
            return libdivide_##ALGO##_do_vector(n, &denom); \
        }
#endif

// 如果未定义向量类型，定义一个空的宏 LIBDIVIDE_DIVIDE_VECTOR(ALGO)，否则定义一个返回 SIMD 向量的宏，使用 libdivide 库中指定算法 ALGO 处理向量 n。


#define DISPATCHER_GEN(T, ALGO) \
    libdivide_##ALGO##_t denom; \
    dispatcher() { } \
    dispatcher(T d) \
        : denom(libdivide_##ALGO##_gen(d)) \
    { } \
    T divide(T n) const { \
        return libdivide_##ALGO##_do(n, &denom); \
    } \
    LIBDIVIDE_DIVIDE_VECTOR(ALGO) \
    T recover() const { \
        return libdivide_##ALGO##_recover(&denom); \
    }

// 宏 DISPATCHER_GEN(T, ALGO) 生成基于类型 T 和算法 ALGO 的 C++ 方法，这些方法重定向到 libdivide 的 C API。


template<bool IS_INTEGRAL, bool IS_SIGNED, int SIZEOF, int ALGO> struct dispatcher { };

// dispatcher 结构模板，根据 IS_INTEGRAL（是否整数）、IS_SIGNED（是否有符号）、SIZEOF（字节大小）、ALGO（算法类型）选择特定的分派器。


template<> struct dispatcher<true, true, sizeof(int32_t), BRANCHFULL> { DISPATCHER_GEN(int32_t, s32) };
template<> struct dispatcher<true, true, sizeof(int32_t), BRANCHFREE> { DISPATCHER_GEN(int32_t, s32_branchfree) };
template<> struct dispatcher<true, false, sizeof(uint32_t), BRANCHFULL> { DISPATCHER_GEN(uint32_t, u32) };
template<> struct dispatcher<true, false, sizeof(uint32_t), BRANCHFREE> { DISPATCHER_GEN(uint32_t, u32_branchfree) };
template<> struct dispatcher<true, true, sizeof(int64_t), BRANCHFULL> { DISPATCHER_GEN(int64_t, s64) };
template<> struct dispatcher<true, true, sizeof(int64_t), BRANCHFREE> { DISPATCHER_GEN(int64_t, s64_branchfree) };
template<> struct dispatcher<true, false, sizeof(uint64_t), BRANCHFULL> { DISPATCHER_GEN(uint64_t, u64) };
template<> struct dispatcher<true, false, sizeof(uint64_t), BRANCHFREE> { DISPATCHER_GEN(uint64_t, u64_branchfree) };

// 部分模板特化，根据整数和算法类型选择相应的 dispatcher 结构模板，并使用 DISPATCHER_GEN 宏生成相应的方法。


template<typename T, int ALGO = BRANCHFULL>
class divider {
public:
    divider() { }
    divider(T d) : div(d) { }
    T divide(T n) const {
        return libdivide_##ALGO##_do(n, &denom);
    }
    LIBDIVIDE_DIVIDE_VECTOR(ALGO)
    T recover() const {
        return libdivide_##ALGO##_recover(&denom);
    }

// divider 类模板，用于用户使用（C++ API），根据整数类型 T 和算法 ALGO 选择分派器，提供除法和向量除法功能。
    // 调用 divide 方法，使用当前对象中的 div 对象对 n 进行除法运算，返回结果
    T divide(T n) const {
        return div.divide(n);
    }

    // 调用 recover 方法，返回当前对象中的 div 对象所使用的初始化值
    // 这个值被用于初始化这个 divider 对象
    T recover() const {
        return div.recover();
    }

    // 重载 == 操作符，比较两个 divider 对象是否相等
    // 当且仅当两个对象的 div 对象的 denom 成员的 magic 和 more 成员都相等时返回 true
    bool operator==(const divider<T, ALGO>& other) const {
        return div.denom.magic == other.denom.magic &&
               div.denom.more == other.denom.more;
    }

    // 重载 != 操作符，比较两个 divider 对象是否不相等
    // 当两个对象使用 == 操作符返回 false 时返回 true，否则返回 false
    bool operator!=(const divider<T, ALGO>& other) const {
        return !(*this == other);
    }
#if defined(LIBDIVIDE_VECTOR_TYPE)
    // 如果定义了 LIBDIVIDE_VECTOR_TYPE 宏，则编译以下代码块

    // Treats the vector as packed integer values with the same type as
    // the divider (e.g. s32, u32, s64, u64) and divides each of
    // them by the divider, returning the packed quotients.
    // 将向量视为打包的整数值，其类型与除数相同（例如 s32, u32, s64, u64），
    // 并将每个值除以除数，返回打包后的商。

    LIBDIVIDE_VECTOR_TYPE divide(LIBDIVIDE_VECTOR_TYPE n) const {
        return div.divide(n);
        // 调用 div 对象的 divide 方法来执行向量的除法操作，并返回结果
    }
#endif

private:
    // Storage for the actual divisor
    // 实际除数的存储
    dispatcher<std::is_integral<T>::value,
               std::is_signed<T>::value, sizeof(T), ALGO> div;
    // 使用模板类 dispatcher 存储实际的除数，根据模板参数 T、ALGO 确定具体类型和算法。
};

// Overload of operator / for scalar division
// 标量除法运算符重载
template<typename T, int ALGO>
T operator/(T n, const divider<T, ALGO>& div) {
    return div.divide(n);
    // 调用 div 对象的 divide 方法执行标量的除法操作，并返回结果
}

// Overload of operator /= for scalar division
// 标量除法赋值运算符重载
template<typename T, int ALGO>
T& operator/=(T& n, const divider<T, ALGO>& div) {
    n = div.divide(n);
    return n;
    // 调用 div 对象的 divide 方法执行标量的除法操作，并将结果赋值给 n 后返回 n
}

#if defined(LIBDIVIDE_VECTOR_TYPE)
    // Overload of operator / for vector division
    // 向量除法运算符重载
    template<typename T, int ALGO>
    LIBDIVIDE_VECTOR_TYPE operator/(LIBDIVIDE_VECTOR_TYPE n, const divider<T, ALGO>& div) {
        return div.divide(n);
        // 调用 div 对象的 divide 方法执行向量的除法操作，并返回结果
    }
    // Overload of operator /= for vector division
    // 向量除法赋值运算符重载
    template<typename T, int ALGO>
    LIBDIVIDE_VECTOR_TYPE& operator/=(LIBDIVIDE_VECTOR_TYPE& n, const divider<T, ALGO>& div) {
        n = div.divide(n);
        return n;
        // 调用 div 对象的 divide 方法执行向量的除法操作，并将结果赋值给 n 后返回 n
    }
#endif

// libdivdie::branchfree_divider<T>
// libdivide 命名空间中的 branchfree_divider<T> 别名定义
template <typename T>
using branchfree_divider = divider<T, BRANCHFREE>;

}  // namespace libdivide

#endif  // __cplusplus

#endif  // NUMPY_CORE_INCLUDE_NUMPY_LIBDIVIDE_LIBDIVIDE_H_

NumPy-源码解析-四十八-

NumPy 源码解析（四十八）

.\numpy\numpy\_core\fromnumeric.pyi

.\numpy\numpy\_core\function_base.py

.\numpy\numpy\_core\function_base.pyi

.\numpy\numpy\_core\getlimits.py

.\numpy\numpy\_core\getlimits.pyi

.\numpy\numpy\_core\include\numpy\arrayobject.h

.\numpy\numpy\_core\include\numpy\arrayscalars.h

.\numpy\numpy\_core\include\numpy\dtype_api.h

.\numpy\numpy\_core\include\numpy\halffloat.h

.\numpy\numpy\_core\include\numpy\libdivide\libdivide.h

`.\numpy\numpy\_core\fromnumeric.pyi`

`.\numpy\numpy\_core\function_base.py`

`.\numpy\numpy\_core\function_base.pyi`

`.\numpy\numpy\_core\getlimits.py`

`.\numpy\numpy\_core\getlimits.pyi`

`.\numpy\numpy\_core\include\numpy\arrayobject.h`

`.\numpy\numpy\_core\include\numpy\arrayscalars.h`

`.\numpy\numpy\_core\include\numpy\dtype_api.h`

`.\numpy\numpy\_core\include\numpy\halffloat.h`

`.\numpy\numpy\_core\include\numpy\libdivide\libdivide.h`