NumPy-源码解析-五十二-

60 阅读1小时+

NumPy 源码解析(五十二)

.\numpy\numpy\_core\records.pyi

# 导入标准库 os,用于与操作系统交互
import os
# 从 collections.abc 模块导入 Sequence 和 Iterable 类型,用于声明支持序列和可迭代的抽象基类
from collections.abc import Sequence, Iterable
# 导入 typing 模块中的各种类型,用于类型提示
from typing import (
    Any,
    TypeVar,
    overload,
    Protocol,
    SupportsIndex,
    Literal
)
# 从 numpy 库导入特定的类型和类
from numpy import (
    ndarray,
    dtype,
    generic,
    void,
    _ByteOrder,
    _SupportsBuffer,
    _ShapeType,
    _DType_co,
    _OrderKACF,
)
# 从 numpy._typing 模块导入特定的类型别名
from numpy._typing import (
    ArrayLike,
    DTypeLike,
    NDArray,
    _ShapeLike,
    _ArrayLikeInt_co,
    _ArrayLikeVoid_co,
    _NestedSequence,
)

# 创建一个泛型类型变量 _SCT,其上界为 generic 类型
_SCT = TypeVar("_SCT", bound=generic)

# 定义一个 _RecArray 类型别名,表示元素类型为任意类型的 recarray 类
_RecArray = recarray[Any, dtype[_SCT]]

# 定义一个协议 _SupportsReadInto,规定实现了 seek、tell 和 readinto 方法
class _SupportsReadInto(Protocol):
    def seek(self, offset: int, whence: int, /) -> object: ...
    def tell(self, /) -> int: ...
    def readinto(self, buffer: memoryview, /) -> int: ...

# 定义一个 record 类,继承自 void 类
class record(void):
    # 重载 __getattribute__ 方法,接受一个字符串参数,返回任意类型的对象
    def __getattribute__(self, attr: str) -> Any: ...
    # 重载 __setattr__ 方法,接受一个字符串参数和一个 ArrayLike 类型的值,无返回值
    def __setattr__(self, attr: str, val: ArrayLike) -> None: ...
    # 定义 pprint 方法,返回一个字符串表示对象的打印输出
    def pprint(self) -> str: ...
    # 重载 __getitem__ 方法,接受一个键参数,返回任意类型的对象
    @overload
    def __getitem__(self, key: str | SupportsIndex) -> Any: ...
    # 重载 __getitem__ 方法,接受一个列表参数,返回 record 类型的对象
    @overload
    def __getitem__(self, key: list[str]) -> record: ...

# 定义一个 recarray 类,继承自 ndarray 类,元素类型为 _ShapeType 和 _DType_co 类型的泛型
class recarray(ndarray[_ShapeType, _DType_co]):
    # NOTE: While not strictly mandatory, we're demanding here that arguments
    # for the `format_parser`- and `dtype`-based dtype constructors are
    # mutually exclusive
    # 重载 __new__ 方法,根据参数不同进行不同的构造,返回一个 recarray 类型的对象
    @overload
    def __new__(
        subtype,
        shape: _ShapeLike,
        dtype: None = ...,
        buf: None | _SupportsBuffer = ...,
        offset: SupportsIndex = ...,
        strides: None | _ShapeLike = ...,
        *,
        formats: DTypeLike,
        names: None | str | Sequence[str] = ...,
        titles: None | str | Sequence[str] = ...,
        byteorder: None | _ByteOrder = ...,
        aligned: bool = ...,
        order: _OrderKACF = ...,
    ) -> recarray[Any, dtype[record]]: ...
    # 重载 __new__ 方法,根据参数不同进行不同的构造,返回一个 recarray 类型的对象
    @overload
    def __new__(
        subtype,
        shape: _ShapeLike,
        dtype: DTypeLike,
        buf: None | _SupportsBuffer = ...,
        offset: SupportsIndex = ...,
        strides: None | _ShapeLike = ...,
        formats: None = ...,
        names: None = ...,
        titles: None = ...,
        byteorder: None = ...,
        aligned: Literal[False] = ...,
        order: _OrderKACF = ...,
    ) -> recarray[Any, dtype[Any]]: ...
    # 定义 __array_finalize__ 方法,接受一个对象参数,无返回值
    def __array_finalize__(self, obj: object) -> None: ...
    # 重载 __getattribute__ 方法,接受一个字符串参数,返回任意类型的对象
    def __getattribute__(self, attr: str) -> Any: ...
    # 重载 __setattr__ 方法,接受一个字符串参数和一个 ArrayLike 类型的值,无返回值
    def __setattr__(self, attr: str, val: ArrayLike) -> None: ...
    # 重载 __getitem__ 方法,接受一个索引参数,返回任意类型的对象
    @overload
    def __getitem__(self, indx: (
        SupportsIndex
        | _ArrayLikeInt_co
        | tuple[SupportsIndex | _ArrayLikeInt_co, ...]
    )) -> Any: ...
    # 重载 __getitem__ 方法,返回一个特定类型的 recarray 类对象
    @overload
    def __getitem__(self: recarray[Any, dtype[void]], indx: (
        None
        | slice
        | ellipsis
        | SupportsIndex
        | _ArrayLikeInt_co
        | tuple[None | slice | ellipsis | _ArrayLikeInt_co | SupportsIndex, ...]
    )) -> recarray[Any, _DType_co]: ...
    # 重载
    # 定义特殊方法 __getitem__,用于支持多种类型的索引方式,返回值为 ndarray[Any, _DType_co]
    def __getitem__(self, indx: (
        None
        | slice
        | ellipsis
        | SupportsIndex
        | _ArrayLikeInt_co
        | tuple[None | slice | ellipsis | _ArrayLikeInt_co | SupportsIndex, ...]
    )) -> ndarray[Any, _DType_co]: ...

    # 定义方法重载 __getitem__,接受字符串 indx 参数,返回值为 NDArray[Any]
    @overload
    def __getitem__(self, indx: str) -> NDArray[Any]: ...

    # 定义方法重载 __getitem__,接受列表参数 indx 包含字符串类型元素,返回值为 recarray[_ShapeType, dtype[record]]
    @overload
    def __getitem__(self, indx: list[str]) -> recarray[_ShapeType, dtype[record]]: ...

    # 定义方法 field,接受整数或字符串类型的 attr 参数,可选的 val 参数默认为 None,返回值为 Any
    @overload
    def field(self, attr: int | str, val: None = ...) -> Any: ...

    # 定义方法重载 field,接受整数或字符串类型的 attr 参数和 ArrayLike 类型的 val 参数,无返回值
    @overload
    def field(self, attr: int | str, val: ArrayLike) -> None: ...
class format_parser:
    # 定义类变量 dtype,默认为 void 类型
    dtype: dtype[void]
    
    # 初始化方法
    def __init__(
        self,
        formats: DTypeLike,        # 数据类型或类似数据类型的参数
        names: None | str | Sequence[str],  # 名称,可以是字符串或字符串序列,或者为 None
        titles: None | str | Sequence[str],  # 标题,可以是字符串或字符串序列,或者为 None
        aligned: bool = ...,       # 对齐标志,默认为省略值(未指定)
        byteorder: None | _ByteOrder = ...,  # 字节顺序,可以是 None 或 _ByteOrder 类型
    ) -> None: ...  # 初始化方法无返回值,注释内容省略

__all__: list[str]  # __all__ 变量,包含导出的符号名称列表

@overload
def fromarrays(
    arrayList: Iterable[ArrayLike],  # 可迭代的数组或类似数组的列表
    dtype: DTypeLike = ...,          # 数据类型或类似数据类型,默认为省略值(未指定)
    shape: None | _ShapeLike = ...,  # 形状,可以是 None 或形状类似数据类型
    formats: None = ...,             # 格式,可以是 None
    names: None = ...,               # 名称,可以是 None
    titles: None = ...,              # 标题,可以是 None
    aligned: bool = ...,             # 对齐标志,默认为省略值(未指定)
    byteorder: None = ...,           # 字节顺序,默认为 None
) -> _RecArray[Any]: ...             # 返回类型为 _RecArray,包含任意类型数据

@overload
def fromarrays(
    arrayList: Iterable[ArrayLike],  # 可迭代的数组或类似数组的列表
    dtype: None = ...,               # 数据类型,默认为省略值(未指定)
    shape: None | _ShapeLike = ...,  # 形状,可以是 None 或形状类似数据类型
    *,
    formats: DTypeLike,              # 格式,数据类型或类似数据类型
    names: None | str | Sequence[str] = ...,  # 名称,可以是 None、字符串或字符串序列
    titles: None | str | Sequence[str] = ...,  # 标题,可以是 None、字符串或字符串序列
    aligned: bool = ...,             # 对齐标志,默认为省略值(未指定)
    byteorder: None | _ByteOrder = ...,  # 字节顺序,可以是 None 或 _ByteOrder 类型
) -> _RecArray[record]: ...         # 返回类型为 _RecArray,记录类型数据

@overload
def fromrecords(
    recList: _ArrayLikeVoid_co | tuple[Any, ...] | _NestedSequence[tuple[Any, ...]],  # 记录列表或类似记录的数据类型
    dtype: DTypeLike = ...,          # 数据类型或类似数据类型,默认为省略值(未指定)
    shape: None | _ShapeLike = ...,  # 形状,可以是 None 或形状类似数据类型
    formats: None = ...,             # 格式,可以是 None
    names: None = ...,               # 名称,可以是 None
    titles: None = ...,              # 标题,可以是 None
    aligned: bool = ...,             # 对齐标志,默认为省略值(未指定)
    byteorder: None = ...,           # 字节顺序,默认为 None
) -> _RecArray[record]: ...          # 返回类型为 _RecArray,记录类型数据

@overload
def fromrecords(
    recList: _ArrayLikeVoid_co | tuple[Any, ...] | _NestedSequence[tuple[Any, ...]],  # 记录列表或类似记录的数据类型
    dtype: None = ...,               # 数据类型,默认为省略值(未指定)
    shape: None | _ShapeLike = ...,  # 形状,可以是 None 或形状类似数据类型
    *,
    formats: DTypeLike = ...,        # 格式,数据类型或类似数据类型
    names: None | str | Sequence[str] = ...,  # 名称,可以是 None、字符串或字符串序列
    titles: None | str | Sequence[str] = ...,  # 标题,可以是 None、字符串或字符串序列
    aligned: bool = ...,             # 对齐标志,默认为省略值(未指定)
    byteorder: None | _ByteOrder = ...,  # 字节顺序,可以是 None 或 _ByteOrder 类型
) -> _RecArray[record]: ...         # 返回类型为 _RecArray,记录类型数据

@overload
def fromstring(
    datastring: _SupportsBuffer,     # 支持缓冲区的数据字符串
    dtype: DTypeLike,               # 数据类型或类似数据类型
    shape: None | _ShapeLike = ...,  # 形状,可以是 None 或形状类似数据类型
    offset: int = ...,              # 偏移量,默认为省略值(未指定)
    formats: None = ...,            # 格式,可以是 None
    names: None = ...,              # 名称,可以是 None
    titles: None = ...,             # 标题,可以是 None
    aligned: bool = ...,            # 对齐标志,默认为省略值(未指定)
    byteorder: None = ...,          # 字节顺序,默认为 None
) -> _RecArray[record]: ...         # 返回类型为 _RecArray,记录类型数据

@overload
def fromstring(
    datastring: _SupportsBuffer,     # 支持缓冲区的数据字符串
    dtype: None = ...,              # 数据类型,默认为省略值(未指定)
    shape: None | _ShapeLike = ...,  # 形状,可以是 None 或形状类似数据类型
    offset: int = ...,              # 偏移量,默认为省略值(未指定)
    *,
    formats: DTypeLike,             # 格式,数据类型或类似数据类型
    names: None | str | Sequence[str] = ...,  # 名称,可以是 None、字符串或字符串序列
    titles: None | str | Sequence[str] = ...,  # 标题,可以是 None、字符串或字符串序列
    aligned: bool = ...,            # 对齐标志,默认为省略值(未指定)
    byteorder: None | _ByteOrder = ...,  # 字节顺序,可以是 None 或 _ByteOrder 类型
) -> _RecArray[record]: ...         # 返回类型为 _RecArray,记录类型数据

@overload
def fromfile(
    fd: str | bytes | os.PathLike[str] | os.PathLike[bytes] | _SupportsReadInto,  # 文件描述符,支持多种类型
    dtype: DTypeLike,               # 数据类型或类似数据类型
    shape: None | _ShapeLike = ...,  # 形状,可以是 None 或形状类似数据类型
    offset: int = ...,              # 偏移量,默认为省略值(未指定)
    formats: None = ...,            # 格式,可以是 None
    names: None = ...,              # 名称,可以是 None
    titles: None = ...,             # 标题,可以是 None
    aligned: bool = ...,            # 对齐标志,默认为省略值(未指定)
    byteorder: None = ...,          # 字节顺序,默认为 None
) -> _RecArray[Any]: ...            # 返回类型为 _RecArray,包含任意类型数据

@overload
def fromfile(
    fd: str | bytes | os.PathLike[str] | os.PathLike[bytes] | _SupportsReadInto,  # 文件描述符,支持多种类型
    dtype: None = ...,              # 数据类型,默认为省略值(未指定)
    shape: None | _ShapeLike = ...,  # 形状,可以是 None 或形状类似数据类型
    offset: int = ...,              # 偏移量,默认为省略值(未指定)
    *,
    formats: DTypeLike,             # 格式,数据类型或类似数据类型
    names: None | str | Sequence[str] = ...,  # 名称,可以是 None、字符串或字符串序列
    titles: None | str | Sequence[str] = ...,  # 标题,可以是 None、字符串或字符串序列
# 定义一个函数签名,用于接受多个参数并返回一个 _RecArray[record] 类型的对象
@overload
def array(
    obj: _SCT | NDArray[_SCT],  # 参数 obj 可以是 _SCT 或 NDArray[_SCT] 类型
    dtype: None = ...,          # 参数 dtype 默认为 None
    shape: None | _ShapeLike = ...,  # 参数 shape 可以是 None 或 _ShapeLike 类型
    offset: int = ...,          # 参数 offset 默认为 int 类型
    formats: None = ...,        # 参数 formats 默认为 None
    names: None = ...,          # 参数 names 默认为 None
    titles: None = ...,         # 参数 titles 默认为 None
    aligned: bool = ...,        # 参数 aligned 默认为 bool 类型
    byteorder: None = ...,      # 参数 byteorder 默认为 None
    copy: bool = ...,           # 参数 copy 默认为 bool 类型
) -> _RecArray[_SCT]: ...       # 函数返回类型为 _RecArray[_SCT]

@overload
def array(
    obj: ArrayLike,             # 参数 obj 可以是 ArrayLike 类型
    dtype: DTypeLike,           # 参数 dtype 可以是 DTypeLike 类型
    shape: None | _ShapeLike = ...,  # 参数 shape 可以是 None 或 _ShapeLike 类型
    offset: int = ...,          # 参数 offset 默认为 int 类型
    formats: None = ...,        # 参数 formats 默认为 None
    names: None = ...,          # 参数 names 默认为 None
    titles: None = ...,         # 参数 titles 默认为 None
    aligned: bool = ...,        # 参数 aligned 默认为 bool 类型
    byteorder: None = ...,      # 参数 byteorder 默认为 None
    copy: bool = ...,           # 参数 copy 默认为 bool 类型
) -> _RecArray[Any]: ...        # 函数返回类型为 _RecArray[Any]

@overload
def array(
    obj: ArrayLike,             # 参数 obj 可以是 ArrayLike 类型
    dtype: None = ...,          # 参数 dtype 默认为 None
    shape: None | _ShapeLike = ...,  # 参数 shape 可以是 None 或 _ShapeLike 类型
    offset: int = ...,          # 参数 offset 默认为 int 类型
    *,
    formats: DTypeLike,         # 参数 formats 必须是 DTypeLike 类型
    names: None | str | Sequence[str] = ...,  # 参数 names 可以是 None、str 或 Sequence[str] 类型
    titles: None | str | Sequence[str] = ..., # 参数 titles 可以是 None、str 或 Sequence[str] 类型
    aligned: bool = ...,        # 参数 aligned 默认为 bool 类型
    byteorder: None | _ByteOrder = ...,  # 参数 byteorder 可以是 None 或 _ByteOrder 类型
    copy: bool = ...,           # 参数 copy 默认为 bool 类型
) -> _RecArray[record]: ...     # 函数返回类型为 _RecArray[record]

@overload
def array(
    obj: None,                  # 参数 obj 必须是 None 类型
    dtype: DTypeLike,           # 参数 dtype 可以是 DTypeLike 类型
    shape: _ShapeLike,          # 参数 shape 必须是 _ShapeLike 类型
    offset: int = ...,          # 参数 offset 默认为 int 类型
    formats: None = ...,        # 参数 formats 默认为 None
    names: None = ...,          # 参数 names 默认为 None
    titles: None = ...,         # 参数 titles 默认为 None
    aligned: bool = ...,        # 参数 aligned 默认为 bool 类型
    byteorder: None = ...,      # 参数 byteorder 默认为 None
    copy: bool = ...,           # 参数 copy 默认为 bool 类型
) -> _RecArray[Any]: ...        # 函数返回类型为 _RecArray[Any]

@overload
def array(
    obj: None,                  # 参数 obj 必须是 None 类型
    dtype: None = ...,          # 参数 dtype 默认为 None
    *,
    shape: _ShapeLike,          # 参数 shape 必须是 _ShapeLike 类型
    formats: DTypeLike,         # 参数 formats 必须是 DTypeLike 类型
    names: None | str | Sequence[str] = ...,  # 参数 names 可以是 None、str 或 Sequence[str] 类型
    titles: None | str | Sequence[str] = ..., # 参数 titles 可以是 None、str 或 Sequence[str] 类型
    aligned: bool = ...,        # 参数 aligned 默认为 bool 类型
    byteorder: None | _ByteOrder = ...,  # 参数 byteorder

.\numpy\numpy\_core\shape_base.py

# 定义导出的函数和变量名列表,用于模块导入时指定可导出的内容
__all__ = ['atleast_1d', 'atleast_2d', 'atleast_3d', 'block', 'hstack',
           'stack', 'vstack']

# 导入必要的模块和函数
import functools
import itertools
import operator
import warnings

# 从当前包中导入特定模块或函数
from . import numeric as _nx
from . import overrides
from .multiarray import array, asanyarray, normalize_axis_index
from . import fromnumeric as _from_nx

# 使用 functools.partial 创建一个函数 array_function_dispatch,作为数组函数调度器
array_function_dispatch = functools.partial(
    overrides.array_function_dispatch, module='numpy')

# 定义一个函数 _atleast_1d_dispatcher,该函数简单地返回传入的所有参数数组
def _atleast_1d_dispatcher(*arys):
    return arys

# 使用 array_function_dispatch 装饰器注册 _atleast_1d_dispatcher 函数为 atleast_1d 的调度器
@array_function_dispatch(_atleast_1d_dispatcher)
def atleast_1d(*arys):
    """
    Convert inputs to arrays with at least one dimension.

    Scalar inputs are converted to 1-dimensional arrays, whilst
    higher-dimensional inputs are preserved.

    Parameters
    ----------
    arys1, arys2, ... : array_like
        One or more input arrays.

    Returns
    -------
    ret : ndarray
        An array, or tuple of arrays, each with ``a.ndim >= 1``.
        Copies are made only if necessary.

    See Also
    --------
    atleast_2d, atleast_3d

    Examples
    --------
    >>> np.atleast_1d(1.0)
    array([1.])

    >>> x = np.arange(9.0).reshape(3,3)
    >>> np.atleast_1d(x)
    array([[0., 1., 2.],
           [3., 4., 5.],
           [6., 7., 8.]])
    >>> np.atleast_1d(x) is x
    True

    >>> np.atleast_1d(1, [3, 4])
    (array([1]), array([3, 4]))

    """
    # 如果只有一个输入数组,则将其转换为 ndarray,如果是标量则转换成一维数组
    if len(arys) == 1:
        result = asanyarray(arys[0])
        if result.ndim == 0:
            result = result.reshape(1)
        return result
    # 如果有多个输入数组,则逐个转换为 ndarray,标量转换成一维数组
    res = []
    for ary in arys:
        result = asanyarray(ary)
        if result.ndim == 0:
            result = result.reshape(1)
        res.append(result)
    return tuple(res)

# 定义一个函数 _atleast_2d_dispatcher,该函数简单地返回传入的所有参数数组
def _atleast_2d_dispatcher(*arys):
    return arys

# 使用 array_function_dispatch 装饰器注册 _atleast_2d_dispatcher 函数为 atleast_2d 的调度器
@array_function_dispatch(_atleast_2d_dispatcher)
def atleast_2d(*arys):
    """
    View inputs as arrays with at least two dimensions.

    Parameters
    ----------
    arys1, arys2, ... : array_like
        One or more array-like sequences.  Non-array inputs are converted
        to arrays.  Arrays that already have two or more dimensions are
        preserved.

    Returns
    -------
    res, res2, ... : ndarray
        An array, or tuple of arrays, each with ``a.ndim >= 2``.
        Copies are avoided where possible, and views with two or more
        dimensions are returned.

    See Also
    --------
    atleast_1d, atleast_3d

    Examples
    --------
    >>> np.atleast_2d(3.0)
    array([[3.]])

    >>> x = np.arange(3.0)
    >>> np.atleast_2d(x)
    array([[0., 1., 2.]])
    >>> np.atleast_2d(x).base is x
    True

    >>> np.atleast_2d(1, [1, 2], [[1, 2]])
    (array([[1]]), array([[1, 2]]), array([[1, 2]]))

    """
    # 将输入数组转换为至少二维的 ndarray 视图
    res = []
    for ary in arys:
        ary = asanyarray(ary)
        if ary.ndim == 0:
            result = ary.reshape(1, 1)
        elif ary.ndim == 1:
            result = ary[_nx.newaxis, :]
        else:
            result = ary
        res.append(result)
    # 如果结果列表的长度为1,则返回列表中的唯一元素
    if len(res) == 1:
        return res[0]
    # 否则,返回结果列表转换成元组的形式
    else:
        return tuple(res)
# 定义一个分派函数 `_atleast_3d_dispatcher`,接受任意数量的参数并将它们作为元组返回
def _atleast_3d_dispatcher(*arys):
    return arys


# 使用装饰器 `array_function_dispatch` 将 `_atleast_3d_dispatcher` 与 `atleast_3d` 函数关联
@array_function_dispatch(_atleast_3d_dispatcher)
# 定义函数 `atleast_3d`,将输入视为至少三维的数组
def atleast_3d(*arys):
    """
    View inputs as arrays with at least three dimensions.

    Parameters
    ----------
    arys1, arys2, ... : array_like
        One or more array-like sequences.  Non-array inputs are converted to
        arrays.  Arrays that already have three or more dimensions are
        preserved.

    Returns
    -------
    res1, res2, ... : ndarray
        An array, or tuple of arrays, each with ``a.ndim >= 3``.  Copies are
        avoided where possible, and views with three or more dimensions are
        returned.  For example, a 1-D array of shape ``(N,)`` becomes a view
        of shape ``(1, N, 1)``, and a 2-D array of shape ``(M, N)`` becomes a
        view of shape ``(M, N, 1)``.

    See Also
    --------
    atleast_1d, atleast_2d

    Examples
    --------
    >>> np.atleast_3d(3.0)
    array([[[3.]]])

    >>> x = np.arange(3.0)
    >>> np.atleast_3d(x).shape
    (1, 3, 1)

    >>> x = np.arange(12.0).reshape(4,3)
    >>> np.atleast_3d(x).shape
    (4, 3, 1)
    >>> np.atleast_3d(x).base is x.base  # x is a reshape, so not base itself
    True

    >>> for arr in np.atleast_3d([1, 2], [[1, 2]], [[[1, 2]]]):
    ...     print(arr, arr.shape) # doctest: +SKIP
    ...
    [[[1]
      [2]]] (1, 2, 1)
    [[[1]
      [2]]] (1, 2, 1)
    [[[1 2]]] (1, 1, 2)

    """
    # 初始化结果列表
    res = []
    # 遍历输入的每个数组
    for ary in arys:
        # 将每个数组转换为 ndarray 类型
        ary = asanyarray(ary)
        # 根据数组的维度进行不同的处理
        if ary.ndim == 0:
            result = ary.reshape(1, 1, 1)
        elif ary.ndim == 1:
            result = ary[_nx.newaxis, :, _nx.newaxis]
        elif ary.ndim == 2:
            result = ary[:, :, _nx.newaxis]
        else:
            result = ary
        # 将处理后的结果添加到结果列表中
        res.append(result)
    # 如果结果列表中只有一个元素,则返回该元素
    if len(res) == 1:
        return res[0]
    else:
        # 否则返回结果元组
        return tuple(res)


# 定义一个分派函数 `_arrays_for_stack_dispatcher`,用于处理堆叠操作中的数组序列
def _arrays_for_stack_dispatcher(arrays):
    # 如果输入的数组不具备 `__getitem__` 方法,则抛出类型错误
    if not hasattr(arrays, "__getitem__"):
        raise TypeError('arrays to stack must be passed as a "sequence" type '
                        'such as list or tuple.')
    # 将输入的数组转换为元组并返回
    return tuple(arrays)


# 定义一个分派函数 `_vhstack_dispatcher`,接受元组参数 `tup` 并调用 `_arrays_for_stack_dispatcher` 处理
def _vhstack_dispatcher(tup, *, dtype=None, casting=None):
    return _arrays_for_stack_dispatcher(tup)


# 使用装饰器 `array_function_dispatch` 将 `_vhstack_dispatcher` 与 `vstack` 函数关联
@array_function_dispatch(_vhstack_dispatcher)
# 定义函数 `vstack`,用于沿第一个轴垂直堆叠数组序列
def vstack(tup, *, dtype=None, casting="same_kind"):
    """
    Stack arrays in sequence vertically (row wise).

    This is equivalent to concatenation along the first axis after 1-D arrays
    of shape `(N,)` have been reshaped to `(1,N)`. Rebuilds arrays divided by
    `vsplit`.

    This function makes most sense for arrays with up to 3 dimensions. For
    instance, for pixel-data with a height (first axis), width (second axis),
    and r/g/b channels (third axis). The functions `concatenate`, `stack` and
    `block` provide more general stacking and concatenation operations.

    Parameters
    ----------

    """
    # tup 是包含多个 ndarray 的序列
    # 所有的 ndarray 必须在除第一个轴以外的维度上具有相同的形状
    # 1-D 数组必须具有相同的长度
    tup : sequence of ndarrays
        The arrays must have the same shape along all but the first axis.
        1-D arrays must have the same length.

    # dtype 可以是字符串或 dtype 对象
    # 如果提供了 dtype,则返回的数组将具有此数据类型
    # 不能与参数 `out` 同时提供
    # 自 NumPy 版本 1.24 起添加
    dtype : str or dtype
        If provided, the destination array will have this dtype. Cannot be
        provided together with `out`.

        .. versionadded:: 1.24

    # casting 控制数据类型转换的方式
    # 可选值包括 {'no', 'equiv', 'safe', 'same_kind', 'unsafe'},默认为 'same_kind'
    # 自 NumPy 版本 1.24 起添加
    casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
        Controls what kind of data casting may occur. Defaults to 'same_kind'.

        .. versionadded:: 1.24

    # 返回由给定数组堆叠而成的 ndarray
    # 返回的数组至少是二维的
    Returns
    -------
    stacked : ndarray
        The array formed by stacking the given arrays, will be at least 2-D.

    # 参见下列函数,用于在不同的情况下连接多个数组:
    # concatenate: 沿着已存在的轴连接数组序列
    # stack: 沿着新轴连接数组序列
    # block: 从块的嵌套列表中组装 nd-array
    # hstack: 按列方向(水平方向)连接数组序列
    # dstack: 按深度方向(第三个轴)连接数组序列
    # column_stack: 将1-D 数组按列堆叠成一个 2-D 数组
    # vsplit: 按行方向(垂直方向)将数组分割成多个子数组

    See Also
    --------
    concatenate : Join a sequence of arrays along an existing axis.
    stack : Join a sequence of arrays along a new axis.
    block : Assemble an nd-array from nested lists of blocks.
    hstack : Stack arrays in sequence horizontally (column wise).
    dstack : Stack arrays in sequence depth wise (along third axis).
    column_stack : Stack 1-D arrays as columns into a 2-D array.
    vsplit : Split an array into multiple sub-arrays vertically (row-wise).

    # 示例
    # 堆叠两个一维数组 `a` 和 `b`,生成一个二维数组
    >>> a = np.array([1, 2, 3])
    >>> b = np.array([4, 5, 6])
    >>> np.vstack((a,b))
    array([[1, 2, 3],
           [4, 5, 6]])

    # 堆叠两个二维数组 `a` 和 `b`,生成一个二维数组
    >>> a = np.array([[1], [2], [3]])
    >>> b = np.array([[4], [5], [6]])
    >>> np.vstack((a,b))
    array([[1],
           [2],
           [3],
           [4],
           [5],
           [6]])

    """
    # 将 tup 中的数组至少转换为二维数组
    arrs = atleast_2d(*tup)
    # 如果转换后的结果不是一个元组,则将其包装成一个元组
    if not isinstance(arrs, tuple):
        arrs = (arrs,)
    # 调用底层函数 `_nx.concatenate` 进行数组堆叠操作
    return _nx.concatenate(arrs, 0, dtype=dtype, casting=casting)
@array_function_dispatch(_vhstack_dispatcher)
# 使用装饰器将函数与_dispatcher函数关联,用于分发不同输入的处理
def hstack(tup, *, dtype=None, casting="same_kind"):
    """
    Stack arrays in sequence horizontally (column wise).

    This is equivalent to concatenation along the second axis, except for 1-D
    arrays where it concatenates along the first axis. Rebuilds arrays divided
    by `hsplit`.

    This function makes most sense for arrays with up to 3 dimensions. For
    instance, for pixel-data with a height (first axis), width (second axis),
    and r/g/b channels (third axis). The functions `concatenate`, `stack` and
    `block` provide more general stacking and concatenation operations.

    Parameters
    ----------
    tup : sequence of ndarrays
        The arrays must have the same shape along all but the second axis,
        except 1-D arrays which can be any length.

    dtype : str or dtype
        If provided, the destination array will have this dtype. Cannot be
        provided together with `out`.

        .. versionadded:: 1.24

    casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
        Controls what kind of data casting may occur. Defaults to 'same_kind'.

        .. versionadded:: 1.24

    Returns
    -------
    stacked : ndarray
        The array formed by stacking the given arrays.

    See Also
    --------
    concatenate : Join a sequence of arrays along an existing axis.
    stack : Join a sequence of arrays along a new axis.
    block : Assemble an nd-array from nested lists of blocks.
    vstack : Stack arrays in sequence vertically (row wise).
    dstack : Stack arrays in sequence depth wise (along third axis).
    column_stack : Stack 1-D arrays as columns into a 2-D array.
    hsplit : Split an array into multiple sub-arrays 
             horizontally (column-wise).

    Examples
    --------
    >>> a = np.array((1,2,3))
    >>> b = np.array((4,5,6))
    >>> np.hstack((a,b))
    array([1, 2, 3, 4, 5, 6])
    >>> a = np.array([[1],[2],[3]])
    >>> b = np.array([[4],[5],[6]])
    >>> np.hstack((a,b))
    array([[1, 4],
           [2, 5],
           [3, 6]])

    """
    arrs = atleast_1d(*tup)
    # 将输入数组至少视为一维数组
    if not isinstance(arrs, tuple):
        arrs = (arrs,)
    # 作为特殊情况,对于一维数组,第0维被视为“水平”的
    if arrs and arrs[0].ndim == 1:
        return _nx.concatenate(arrs, 0, dtype=dtype, casting=casting)
    else:
        return _nx.concatenate(arrs, 1, dtype=dtype, casting=casting)


def _stack_dispatcher(arrays, axis=None, out=None, *,
                      dtype=None, casting=None):
    arrays = _arrays_for_stack_dispatcher(arrays)
    if out is not None:
        # optimize for the typical case where only arrays is provided
        arrays = list(arrays)
        arrays.append(out)
    return arrays


@array_function_dispatch(_stack_dispatcher)
# 使用装饰器将函数与_dispatcher函数关联,用于分发不同输入的处理
def stack(arrays, axis=0, out=None, *, dtype=None, casting="same_kind"):
    """
    Join a sequence of arrays along a new axis.
    """
    """
    The `axis` parameter specifies the index of the new axis in the
    dimensions of the result. For example, if `axis=0` it will be the first
    dimension and if `axis=-1` it will be the last dimension.
    
    .. versionadded:: 1.10.0
    
    Parameters
    ----------
    arrays : sequence of array_like
        Each array must have the same shape.
    
    axis : int, optional
        The axis in the result array along which the input arrays are stacked.
    
    out : ndarray, optional
        If provided, the destination to place the result. The shape must be
        correct, matching that of what stack would have returned if no
        out argument were specified.
    
    dtype : str or dtype
        If provided, the destination array will have this dtype. Cannot be
        provided together with `out`.
    
        .. versionadded:: 1.24
    
    casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
        Controls what kind of data casting may occur. Defaults to 'same_kind'.
    
        .. versionadded:: 1.24
    
    
    Returns
    -------
    stacked : ndarray
        The stacked array has one more dimension than the input arrays.
    
    See Also
    --------
    concatenate : Join a sequence of arrays along an existing axis.
    block : Assemble an nd-array from nested lists of blocks.
    split : Split array into a list of multiple sub-arrays of equal size.
    
    Examples
    --------
    >>> rng = np.random.default_rng()
    >>> arrays = [rng.normal(size=(3,4)) for _ in range(10)]
    >>> np.stack(arrays, axis=0).shape
    (10, 3, 4)
    
    >>> np.stack(arrays, axis=1).shape
    (3, 10, 4)
    
    >>> np.stack(arrays, axis=2).shape
    (3, 4, 10)
    
    >>> a = np.array([1, 2, 3])
    >>> b = np.array([4, 5, 6])
    >>> np.stack((a, b))
    array([[1, 2, 3],
           [4, 5, 6]])
    
    >>> np.stack((a, b), axis=-1)
    array([[1, 4],
           [2, 5],
           [3, 6]])
    
    """
    
    arrays = [asanyarray(arr) for arr in arrays]
    # 转换所有输入数组为数组对象
    
    if not arrays:
        raise ValueError('need at least one array to stack')
    # 如果数组列表为空,则抛出数值错误
    
    shapes = {arr.shape for arr in arrays}
    # 获取所有输入数组的形状集合
    
    if len(shapes) != 1:
        raise ValueError('all input arrays must have the same shape')
    # 如果形状集合的长度不为1,则抛出数值错误,要求所有输入数组必须具有相同的形状
    
    result_ndim = arrays[0].ndim + 1
    # 计算结果数组的维数为第一个数组的维数加1
    
    axis = normalize_axis_index(axis, result_ndim)
    # 根据结果数组的维数规范化轴索引
    
    sl = (slice(None),) * axis + (_nx.newaxis,)
    # 创建切片对象以扩展数组,添加一个新轴
    
    expanded_arrays = [arr[sl] for arr in arrays]
    # 使用切片对象扩展所有输入数组
    
    return _nx.concatenate(expanded_arrays, axis=axis, out=out,
                           dtype=dtype, casting=casting)
    # 使用 numpy 的 concatenate 函数连接扩展后的数组,指定轴、输出、数据类型和数据类型转换方式
# Internal functions to eliminate the overhead of repeated dispatch in one of
# the two possible paths inside np.block.
# Use getattr to protect against __array_function__ being disabled.
# 使用 getattr 来保护免受 __array_function__ 被禁用的影响。
_size = getattr(_from_nx.size, '__wrapped__', _from_nx.size)
# 获取 _from_nx.size 的包装版本,如果不存在,则返回原始版本 _from_nx.size。
_ndim = getattr(_from_nx.ndim, '__wrapped__', _from_nx.ndim)
# 获取 _from_nx.ndim 的包装版本,如果不存在,则返回原始版本 _from_nx.ndim。
_concatenate = getattr(_from_nx.concatenate,
                       '__wrapped__', _from_nx.concatenate)
# 获取 _from_nx.concatenate 的包装版本,如果不存在,则返回原始版本 _from_nx.concatenate。

def _block_format_index(index):
    """
    Convert a list of indices ``[0, 1, 2]`` into ``"arrays[0][1][2]"``.
    将索引列表 ``[0, 1, 2]`` 转换为 ``"arrays[0][1][2]"`` 的字符串形式。
    """
    idx_str = ''.join('[{}]'.format(i) for i in index if i is not None)
    # 生成索引的字符串表示,跳过为 None 的索引。
    return 'arrays' + idx_str
    # 返回格式化后的索引字符串。

def _block_check_depths_match(arrays, parent_index=[]):
    """
    Recursive function checking that the depths of nested lists in `arrays`
    all match. Mismatch raises a ValueError as described in the block
    docstring below.

    The entire index (rather than just the depth) needs to be calculated
    for each innermost list, in case an error needs to be raised, so that
    the index of the offending list can be printed as part of the error.

    Parameters
    ----------
    arrays : nested list of arrays
        The arrays to check
    parent_index : list of int
        The full index of `arrays` within the nested lists passed to
        `_block_check_depths_match` at the top of the recursion.

    Returns
    -------
    first_index : list of int
        The full index of an element from the bottom of the nesting in
        `arrays`. If any element at the bottom is an empty list, this will
        refer to it, and the last index along the empty axis will be None.
    max_arr_ndim : int
        The maximum of the ndims of the arrays nested in `arrays`.
    final_size: int
        The number of elements in the final array. This is used the motivate
        the choice of algorithm used using benchmarking wisdom.
    
    递归函数,用于检查 `arrays` 中嵌套列表的深度是否匹配。如果不匹配,会抛出一个 ValueError,
    如下面 block 文档字符串中描述的那样。

    需要为每个最内层列表计算整个索引(而不仅仅是深度),以防需要引发错误,以便可以打印错误的列表索引作为错误的一部分。

    参数
    ----------
    arrays : 嵌套数组的列表
        要检查的数组
    parent_index : int 列表
        传递给 `_block_check_depths_match` 函数的嵌套列表中 `arrays` 的完整索引。

    返回
    -------
    first_index : int 列表
        `arrays` 最底层嵌套元素的完整索引。如果底层有任何空列表,则会引用它,并且沿着空轴的最后一个索引将为 None。
    max_arr_ndim : int
        `arrays` 中嵌套数组的最大维度。
    final_size: int
        最终数组中的元素数目。这是用于选择使用基准智慧的算法的动机。
    """
    if type(arrays) is tuple:
        # not strictly necessary, but saves us from:
        #  - more than one way to do things - no point treating tuples like
        #    lists
        #  - horribly confusing behaviour that results when tuples are
        #    treated like ndarray
        raise TypeError(
            '{} is a tuple. '
            'Only lists can be used to arrange blocks, and np.block does '
            'not allow implicit conversion from tuple to ndarray.'.format(
                _block_format_index(parent_index)
            )
        )
    # 如果数组类型为元组,则抛出类型错误,因为 np.block 不允许将元组隐式转换为 ndarray。
    # 如果 arrays 是列表且长度大于 0
    elif type(arrays) is list and len(arrays) > 0:
        # 生成一个生成器表达式,用于检查每个数组的深度匹配情况
        idxs_ndims = (_block_check_depths_match(arr, parent_index + [i])
                      for i, arr in enumerate(arrays))

        # 获取第一个数组的索引、最大维度和最终大小
        first_index, max_arr_ndim, final_size = next(idxs_ndims)
        
        # 遍历生成器中的剩余元组
        for index, ndim, size in idxs_ndims:
            # 累加最终大小
            final_size += size
            # 更新最大维度
            if ndim > max_arr_ndim:
                max_arr_ndim = ndim
            # 检查索引的长度是否与第一个索引相同,若不同则抛出 ValueError
            if len(index) != len(first_index):
                raise ValueError(
                    "List depths are mismatched. First element was at depth "
                    "{}, but there is an element at depth {} ({})".format(
                        len(first_index),
                        len(index),
                        _block_format_index(index)
                    )
                )
            # 传播标志,指示底部的空列表
            if index[-1] is None:
                first_index = index
        
        # 返回第一个索引、最大维度和最终大小
        return first_index, max_arr_ndim, final_size
    
    # 如果 arrays 是列表但长度为 0
    elif type(arrays) is list and len(arrays) == 0:
        # 已经达到空列表的底部
        return parent_index + [None], 0, 0
    
    # 如果 arrays 是标量或者数组
    else:
        # 计算 arrays 的大小
        size = _size(arrays)
        # 返回父索引、arrays 的维度和大小
        return parent_index, _ndim(arrays), size
# 确保数组 `a` 至少具有 `ndim` 维度,通过必要时在 `a.shape` 前面添加一些维度为1的方式来实现
def _atleast_nd(a, ndim):
    return array(a, ndmin=ndim, copy=None, subok=True)


# 对给定的值列表进行累加操作,返回累加后的结果列表
def _accumulate(values):
    return list(itertools.accumulate(values))


# 给定数组的形状列表和轴向,返回连接后的形状和切片前缀
def _concatenate_shapes(shapes, axis):
    """Given array shapes, return the resulting shape and slices prefixes.

    These help in nested concatenation.

    Returns
    -------
    shape: tuple of int
        This tuple satisfies::

            shape, _ = _concatenate_shapes([arr.shape for shape in arrs], axis)
            shape == concatenate(arrs, axis).shape

    slice_prefixes: tuple of (slice(start, end), )
        For a list of arrays being concatenated, this returns the slice
        in the larger array at axis that needs to be sliced into.

        For example, the following holds::

            ret = concatenate([a, b, c], axis)
            _, (sl_a, sl_b, sl_c) = concatenate_slices([a, b, c], axis)

            ret[(slice(None),) * axis + sl_a] == a
            ret[(slice(None),) * axis + sl_b] == b
            ret[(slice(None),) * axis + sl_c] == c

        These are called slice prefixes since they are used in the recursive
        blocking algorithm to compute the left-most slices during the
        recursion. Therefore, they must be prepended to rest of the slice
        that was computed deeper in the recursion.

        These are returned as tuples to ensure that they can quickly be added
        to existing slice tuple without creating a new tuple every time.

    """
    # 缓存将会被重复使用的结果。
    shape_at_axis = [shape[axis] for shape in shapes]

    # 选择任意一个形状
    first_shape = shapes[0]
    first_shape_pre = first_shape[:axis]
    first_shape_post = first_shape[axis+1:]

    # 检查是否存在任何形状与第一个形状的前缀或后缀不匹配
    if any(shape[:axis] != first_shape_pre or
           shape[axis+1:] != first_shape_post for shape in shapes):
        raise ValueError(
            'Mismatched array shapes in block along axis {}.'.format(axis))

    # 计算连接后的形状
    shape = (first_shape_pre + (sum(shape_at_axis),) + first_shape[axis+1:])

    # 计算轴向上的偏移量
    offsets_at_axis = _accumulate(shape_at_axis)
    
    # 返回切片前缀列表,以确保在递归中计算最左边的切片时使用
    slice_prefixes = [(slice(start, end),)
                      for start, end in zip([0] + offsets_at_axis,
                                            offsets_at_axis)]
    return shape, slice_prefixes


# 递归地计算数组的信息,包括最终数组的形状、切片列表和可以用于在新数组内赋值的数组列表
def _block_info_recursion(arrays, max_depth, result_ndim, depth=0):
    """
    Returns the shape of the final array, along with a list
    of slices and a list of arrays that can be used for assignment inside the
    new array

    Parameters
    ----------
    arrays : nested list of arrays
        The arrays to check
    max_depth : list of int
        The number of nested lists
    result_ndim : int
        The number of dimensions in the final array.

    Returns
    -------
    shape : tuple of int
        The shape that the final array will take on.
    """
    # slices: list of tuple of slices
    #        The slices into the full array required for assignment. These are
    #        required to be prepended with ``(Ellipsis, )`` to obtain to correct
    #        final index.
    # arrays: list of ndarray
    #        The data to assign to each slice of the full array
    """
    if depth < max_depth:
        # Recursively call _block_info_recursion for each array in arrays
        shapes, slices, arrays = zip(
            *[_block_info_recursion(arr, max_depth, result_ndim, depth+1)
              for arr in arrays])

        # Calculate the axis and shape after concatenating shapes
        axis = result_ndim - max_depth + depth
        shape, slice_prefixes = _concatenate_shapes(shapes, axis)

        # Prepend the slice prefix to each inner slice and flatten them
        slices = [slice_prefix + the_slice
                  for slice_prefix, inner_slices in zip(slice_prefixes, slices)
                  for the_slice in inner_slices]

        # Flatten the list of arrays into a single list
        arrays = functools.reduce(operator.add, arrays)

        return shape, slices, arrays
    else:
        # Base case: depth >= max_depth, return shape, slices, and arrays as they are
        # We've 'bottomed out' - arrays is either a scalar or an array
        # type(arrays) is not list
        # Return the slice and the array inside a list to be consistent with
        # the recursive case.
        arr = _atleast_nd(arrays, result_ndim)
        return arr.shape, [()], [arr]
# 定义一个内部函数 `_block`,用于基于重复连接实现块的操作。
# `arrays` 是传递给 `block` 函数的参数,表示需要处理的数组列表。
# `max_depth` 是 `arrays` 中嵌套列表的最大深度。
# `result_ndim` 是 `arrays` 中数组的最大维度和列表的深度之间的较大者。
# `depth` 是当前递归的深度,默认为0。

if depth < max_depth:
    # 如果当前深度还未达到最大深度,则递归调用 `_block` 函数处理每个数组,并返回结果。
    arrs = [_block(arr, max_depth, result_ndim, depth+1)
            for arr in arrays]
    # 将处理后的数组列表按照轴 `-(max_depth-depth)` 进行连接。
    return _concatenate(arrs, axis=-(max_depth-depth))
else:
    # 如果已经到达最大深度,则调用 `_atleast_nd` 函数确保数组至少具有 `result_ndim` 维度。
    # 这表示处理到了数组的最底层,`arrays` 可能是标量或数组。
    return _atleast_nd(arrays, result_ndim)


def _block_dispatcher(arrays):
    # 使用 `type(arrays) is list` 来匹配 `np.block()` 的行为,特别处理列表而不是泛型可迭代对象或元组。
    # 同时,我们知道 `list.__array_function__` 永远不会存在。
    if type(arrays) is list:
        # 遍历输入的列表 `arrays` 中的子列表,使用递归方式调用 `_block_dispatcher`。
        for subarrays in arrays:
            yield from _block_dispatcher(subarrays)
    else:
        # 如果 `arrays` 不是列表,则直接生成该数组。
        yield arrays


@array_function_dispatch(_block_dispatcher)
def block(arrays):
    """
    Assemble an nd-array from nested lists of blocks.

    Blocks in the innermost lists are concatenated (see `concatenate`) along
    the last dimension (-1), then these are concatenated along the
    second-last dimension (-2), and so on until the outermost list is reached.

    Blocks can be of any dimension, but will not be broadcasted using
    the normal rules. Instead, leading axes of size 1 are inserted, 
    to make ``block.ndim`` the same for all blocks. This is primarily useful
    for working with scalars, and means that code like ``np.block([v, 1])``
    is valid, where ``v.ndim == 1``.

    When the nested list is two levels deep, this allows block matrices to be
    constructed from their components.

    .. versionadded:: 1.13.0

    Parameters
    ----------
    arrays : nested list of array_like or scalars (but not tuples)
        If passed a single ndarray or scalar (a nested list of depth 0), this
        is returned unmodified (and not copied).

        Elements shapes must match along the appropriate axes (without
        broadcasting), but leading 1s will be prepended to the shape as
        necessary to make the dimensions match.

    Returns
    -------
    block_array : ndarray
        The array assembled from the given blocks.

        The dimensionality of the output is equal to the greatest of:

        * the dimensionality of all the inputs
        * the depth to which the input list is nested

    Raises
    ------
    ValueError
        * If list depths are mismatched - for instance, ``[[a, b], c]`` is
          illegal, and should be spelt ``[[a, b], [c]]``
        * If lists are empty - for instance, ``[[a, b], []]``

    See Also
    --------
    # concatenate : Join a sequence of arrays along an existing axis.
    # stack : Join a sequence of arrays along a new axis.
    # vstack : Stack arrays in sequence vertically (row wise).
    # hstack : Stack arrays in sequence horizontally (column wise).
    # dstack : Stack arrays in sequence depth wise (along third axis).
    # column_stack : Stack 1-D arrays as columns into a 2-D array.
    # vsplit : Split an array into multiple sub-arrays vertically (row-wise).
    
    # Notes
    # -----
    
    # When called with only scalars, ``np.block`` is equivalent to an ndarray
    # call. So ``np.block([[1, 2], [3, 4]])`` is equivalent to
    # ``np.array([[1, 2], [3, 4]])``.
    
    # This function does not enforce that the blocks lie on a fixed grid.
    # ``np.block([[a, b], [c, d]])`` is not restricted to arrays of the form::
    
    #     AAAbb
    #     AAAbb
    #     cccDD
    
    # But is also allowed to produce, for some ``a, b, c, d``::
    
    #     AAAbb
    #     AAAbb
    #     cDDDD
    
    # Since concatenation happens along the last axis first, `block` is *not*
    # capable of producing the following directly::
    
    #     AAAbb
    #     cccbb
    #     cccDD
    
    # Matlab's "square bracket stacking", ``[A, B, ...; p, q, ...]``, is
    # equivalent to ``np.block([[A, B, ...], [p, q, ...]])``.
    
    # Examples
    # --------
    # The most common use of this function is to build a block matrix
    
    # >>> A = np.eye(2) * 2
    # >>> B = np.eye(3) * 3
    # >>> np.block([
    # ...     [A,               np.zeros((2, 3))],
    # ...     [np.ones((3, 2)), B               ]
    # ... ])
    # array([[2., 0., 0., 0., 0.],
    #        [0., 2., 0., 0., 0.],
    #        [1., 1., 3., 0., 0.],
    #        [1., 1., 0., 3., 0.],
    #        [1., 1., 0., 0., 3.]])
    
    # With a list of depth 1, `block` can be used as `hstack`
    
    # >>> np.block([1, 2, 3])              # hstack([1, 2, 3])
    # array([1, 2, 3])
    
    # >>> a = np.array([1, 2, 3])
    # >>> b = np.array([4, 5, 6])
    # >>> np.block([a, b, 10])             # hstack([a, b, 10])
    # array([ 1,  2,  3,  4,  5,  6, 10])
    
    # >>> A = np.ones((2, 2), int)
    # >>> B = 2 * A
    # >>> np.block([A, B])                 # hstack([A, B])
    # array([[1, 1, 2, 2],
    #        [1, 1, 2, 2]])
    
    # With a list of depth 2, `block` can be used in place of `vstack`:
    
    # >>> a = np.array([1, 2, 3])
    # >>> b = np.array([4, 5, 6])
    # >>> np.block([[a], [b]])             # vstack([a, b])
    # array([[1, 2, 3],
    #        [4, 5, 6]])
    
    # >>> A = np.ones((2, 2), int)
    # >>> B = 2 * A
    # >>> np.block([[A], [B]])             # vstack([A, B])
    # array([[1, 1],
    #        [1, 1],
    #        [2, 2],
    #        [2, 2]])
    
    # It can also be used in places of `atleast_1d` and `atleast_2d`
    
    # >>> a = np.array(0)
    # >>> b = np.array([1])
    # >>> np.block([a])                    # atleast_1d(a)
    # array([0])
    # >>> np.block([b])                    # atleast_1d(b)
    # array([1])
    
    # >>> np.block([[a]])                  # atleast_2d(a)
    # array([[0]])
    # >>> np.block([[b]])                  # atleast_2d(b)
    # array([[1]])
    array([[1]])



"""
arrays, list_ndim, result_ndim, final_size = _block_setup(arrays)
"""

"""
# 通过调用 _block_setup 函数设置必要的变量
# arrays: 输入的数组列表
# list_ndim: 输入数组中最高维度的数量
# result_ndim: 结果数组的维度
# final_size: 最终拼接的数组的大小
# 这些变量将在后续的条件判断和函数调用中使用
"""

"""
# 根据性能测试结果,发现在 i7-7700HQ 处理器和双通道 2400MHz 内存上,
# 通过直接拼接生成大小约为 256x256 的数组会更快。
# dtype 的选择并不明显影响性能。
#
# 使用重复拼接生成二维数组需要对数组进行两次复制。
#
# 最快的算法取决于 CPU 功率和内存速度的比例。
# 可以通过监控基准测试的结果
# https://pv.github.io/numpy-bench/#bench_shape_base.Block2D.time_block2d
# 来调整这个参数,直到实现 `_block_info_recursion` 算法的 C 版本,
# 这可能会比 Python 版本更快。
"""

if list_ndim * final_size > (2 * 512 * 512):
    return _block_slicing(arrays, list_ndim, result_ndim)
else:
    return _block_concatenate(arrays, list_ndim, result_ndim)
# 这些辅助函数主要用于测试。
# 它们允许我们编写测试,直接调用 `_block_slicing` 或 `_block_concatenate`,
# 而不需要阻塞大数组以触发所需的路径。

def _block_setup(arrays):
    """
    返回 (`arrays`, list_ndim, result_ndim, final_size)
    """
    # 调用 `_block_check_depths_match` 函数检查数组的深度匹配情况,并返回底部索引、数组维度、最终大小
    bottom_index, arr_ndim, final_size = _block_check_depths_match(arrays)
    # 计算列表的维度
    list_ndim = len(bottom_index)
    # 如果 bottom_index 不为空且最后一个元素为 None,则抛出 ValueError 异常
    if bottom_index and bottom_index[-1] is None:
        raise ValueError(
            'List at {} cannot be empty'.format(
                _block_format_index(bottom_index)
            )
        )
    # 计算结果的维度,为数组维度和列表维度的较大值
    result_ndim = max(arr_ndim, list_ndim)
    # 返回 arrays、列表维度、结果维度和最终大小的元组
    return arrays, list_ndim, result_ndim, final_size


def _block_slicing(arrays, list_ndim, result_ndim):
    # 通过 `_block_info_recursion` 函数获取形状、切片和处理后的数组
    shape, slices, arrays = _block_info_recursion(
        arrays, list_ndim, result_ndim)
    # 计算数组的数据类型
    dtype = _nx.result_type(*[arr.dtype for arr in arrays])

    # 测试是否优先使用 F(Fortran)顺序,仅在所有输入数组都为 F 顺序且非 C(连续)顺序时选择 F
    F_order = all(arr.flags['F_CONTIGUOUS'] for arr in arrays)
    C_order = all(arr.flags['C_CONTIGUOUS'] for arr in arrays)
    order = 'F' if F_order and not C_order else 'C'
    # 使用 `_nx.empty` 创建一个指定形状、数据类型和顺序的空数组
    result = _nx.empty(shape=shape, dtype=dtype, order=order)
    # 注意:在 C 实现中,可以使用函数 `PyArray_CreateMultiSortedStridePerm` 来更高级地猜测所需的顺序。

    # 将处理后的数组填充到结果数组的相应切片位置
    for the_slice, arr in zip(slices, arrays):
        result[(Ellipsis,) + the_slice] = arr
    # 返回填充后的结果数组
    return result


def _block_concatenate(arrays, list_ndim, result_ndim):
    # 调用 `_block` 函数处理数组
    result = _block(arrays, list_ndim, result_ndim)
    # 如果列表维度为 0,处理一个特殊情况,其中 `_block` 返回一个视图,因为 `arrays` 是单个 numpy 数组而不是 numpy 数组列表。
    # 这可能会复制标量或列表两次,但对于关注性能的用户来说,这不太可能是一个常见情况。
    if list_ndim == 0:
        result = result.copy()  # 复制结果,以防 `arrays` 是单个数组的情况
    # 返回处理后的结果
    return result

.\numpy\numpy\_core\shape_base.pyi

# 导入必要的模块和类型定义
from collections.abc import Sequence
from typing import TypeVar, overload, Any, SupportsIndex

# 导入必要的numpy模块和类型定义
from numpy import generic, _CastingKind
from numpy._typing import (
    NDArray,
    ArrayLike,
    DTypeLike,
    _ArrayLike,
    _DTypeLike,
)

# __all__列表用于指定在使用 from 模块 import * 语句时需要导入的名称
__all__: list[str]

# 函数重载定义,用于确保输入参数至少是1维数组
@overload
def atleast_1d(arys: _ArrayLike[_SCT], /) -> NDArray[_SCT]: ...
@overload
def atleast_1d(arys: ArrayLike, /) -> NDArray[Any]: ...
@overload
def atleast_1d(*arys: ArrayLike) -> tuple[NDArray[Any], ...]: ...

# 函数重载定义,用于确保输入参数至少是2维数组
@overload
def atleast_2d(arys: _ArrayLike[_SCT], /) -> NDArray[_SCT]: ...
@overload
def atleast_2d(arys: ArrayLike, /) -> NDArray[Any]: ...
@overload
def atleast_2d(*arys: ArrayLike) -> tuple[NDArray[Any], ...]: ...

# 函数重载定义,用于确保输入参数至少是3维数组
@overload
def atleast_3d(arys: _ArrayLike[_SCT], /) -> NDArray[_SCT]: ...
@overload
def atleast_3d(arys: ArrayLike, /) -> NDArray[Any]: ...
@overload
def atleast_3d(*arys: ArrayLike) -> tuple[NDArray[Any], ...]: ...

# 垂直堆叠多个数组
@overload
def vstack(
    tup: Sequence[_ArrayLike[_SCT]],
    *,
    dtype: None = ...,
    casting: _CastingKind = ...
) -> NDArray[_SCT]: ...
@overload
def vstack(
    tup: Sequence[ArrayLike],
    *,
    dtype: _DTypeLike[_SCT],
    casting: _CastingKind = ...
) -> NDArray[_SCT]: ...
@overload
def vstack(
    tup: Sequence[ArrayLike],
    *,
    dtype: DTypeLike = ...,
    casting: _CastingKind = ...
) -> NDArray[Any]: ...

# 水平堆叠多个数组
@overload
def hstack(
    tup: Sequence[_ArrayLike[_SCT]],
    *,
    dtype: None = ...,
    casting: _CastingKind = ...
) -> NDArray[_SCT]: ...
@overload
def hstack(
    tup: Sequence[ArrayLike],
    *,
    dtype: _DTypeLike[_SCT],
    casting: _CastingKind = ...
) -> NDArray[_SCT]: ...
@overload
def hstack(
    tup: Sequence[ArrayLike],
    *,
    dtype: DTypeLike = ...,
    casting: _CastingKind = ...
) -> NDArray[Any]: ...

# 沿指定轴堆叠多个数组
@overload
def stack(
    arrays: Sequence[_ArrayLike[_SCT]],
    axis: SupportsIndex = ...,
    out: None = ...,
    *,
    dtype: None = ...,
    casting: _CastingKind = ...
) -> NDArray[_SCT]: ...
@overload
def stack(
    arrays: Sequence[ArrayLike],
    axis: SupportsIndex = ...,
    out: None = ...,
    *,
    dtype: _DTypeLike[_SCT],
    casting: _CastingKind = ...
) -> NDArray[_SCT]: ...
@overload
def stack(
    arrays: Sequence[ArrayLike],
    axis: SupportsIndex = ...,
    out: None = ...,
    *,
    dtype: DTypeLike = ...,
    casting: _CastingKind = ...
) -> NDArray[Any]: ...
@overload
def stack(
    arrays: Sequence[ArrayLike],
    axis: SupportsIndex = ...,
    out: _ArrayType = ...,
    *,
    dtype: DTypeLike = ...,
    casting: _CastingKind = ...
) -> _ArrayType: ...

# 将多个数组块连接成一个数组
@overload
def block(arrays: _ArrayLike[_SCT]) -> NDArray[_SCT]: ...
@overload
def block(arrays: ArrayLike) -> NDArray[Any]: ...

.\numpy\numpy\_core\src\common\array_assign.c

/*
 * This file implements some helper functions for the array assignment
 * routines. The actual assignment routines are in array_assign_*.c
 *
 * Written by Mark Wiebe (mwwiebe@gmail.com)
 * Copyright (c) 2011 by Enthought, Inc.
 *
 * See LICENSE.txt for the license.
 */
// 定义宏以使用 NPY API 版本,不包含废弃的 API
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
// 定义宏 _MULTIARRAYMODULE
#define _MULTIARRAYMODULE

// 包含 Python 头文件
#include <Python.h>

// 包含 NumPy 的数组类型头文件
#include <numpy/ndarraytypes.h>

// 包含 NumPy 的配置文件
#include "npy_config.h"

// 包含自定义的头文件
#include "shape.h"
#include "array_assign.h"
#include "common.h"
#include "lowlevel_strided_loops.h"
#include "mem_overlap.h"

// 该函数在 array_assign.h 中有详细的参数文档
NPY_NO_EXPORT int
broadcast_strides(int ndim, npy_intp const *shape,
                int strides_ndim, npy_intp const *strides_shape, npy_intp const *strides,
                char const *strides_name,
                npy_intp *out_strides)
{
    // 计算开始的维度索引
    int idim, idim_start = ndim - strides_ndim;

    /* Can't broadcast to fewer dimensions */
    // 如果 idim_start 小于 0,无法进行广播
    if (idim_start < 0) {
        goto broadcast_error;
    }

    /*
     * Process from the end to the start, so that 'strides' and 'out_strides'
     * can point to the same memory.
     */
    // 从后向前处理,以便 'strides' 和 'out_strides' 可以指向相同的内存
    for (idim = ndim - 1; idim >= idim_start; --idim) {
        npy_intp strides_shape_value = strides_shape[idim - idim_start];
        // 如果维度大小为 1,则输出的步长为 0
        if (strides_shape_value == 1) {
            out_strides[idim] = 0;
        }
        // 否则,维度大小必须与 shape 相同
        else if (strides_shape_value != shape[idim]) {
            goto broadcast_error;
        }
        else {
            out_strides[idim] = strides[idim - idim_start];
        }
    }

    /* New dimensions get a zero stride */
    // 新维度的步长设置为 0
    for (idim = 0; idim < idim_start; ++idim) {
        out_strides[idim] = 0;
    }

    return 0;

broadcast_error: {
        // 将 strides_shape 转换为字符串形式
        PyObject *shape1 = convert_shape_to_string(strides_ndim,
                                                   strides_shape, "");
        if (shape1 == NULL) {
            return -1;
        }

        // 将 shape 转换为字符串形式
        PyObject *shape2 = convert_shape_to_string(ndim, shape, "");
        if (shape2 == NULL) {
            Py_DECREF(shape1);
            return -1;
        }
        // 抛出值错误,指示无法广播的原因
        PyErr_Format(PyExc_ValueError,
                "could not broadcast %s from shape %S into shape %S",
                strides_name, shape1, shape2);
        Py_DECREF(shape1);
        Py_DECREF(shape2);
        return -1;
    }
}
    /*
     * 以下代码假设以下情况:
     *  * alignment 是 C 标准要求的2的幂次方。
     *  * 从指针到 uintp 的转换得到的是一个可以进行位操作的合理表示
     *    (这可能不是 C 标准要求的,但由 glibc 假定,因此应该是可以的)。
     *  * 将 stride 从 intp 转换为 uintp(以避免依赖于有符号 int 表示)保留与 alignment 的余数,
     *    因此 stride % a 与 ((unsigned intp) stride) % a 相同。这是 C 标准要求的。
     *
     *  代码检查 `data` 的最低 log2(alignment) 位和所有 `strides` 的最低 log2(alignment) 位是否为0,
     *  因为这意味着对所有整数 n,(data + n*stride) % alignment == 0。
     */
    if (alignment > 1) {
        npy_uintp align_check = (npy_uintp)data; // 将 data 转换为 uintp 类型,作为对齐检查的初始值
        int i;
    
        for (i = 0; i < ndim; i++) {
            /* 如果 shape[i] > 1,则需要考虑 strides[i] 是否为 0 */
            if (shape[i] > 1) {
                align_check |= (npy_uintp)strides[i]; // 将 strides[i] 转换为 uintp 类型,并将其加入对齐检查中
            }
            else if (shape[i] == 0) {
                /* 元素数为零的数组始终是对齐的 */
                return 1;
            }
        }
    
        return npy_is_aligned((void *)align_check, alignment); // 调用外部函数检查 align_check 是否对齐于 alignment
    }
    else if (alignment == 1) {
        return 1; // 如果 alignment 为 1,任何数据都是对齐的
    }
    else {
        /* 当 alignment == 0 时总是返回 false,表示无法对齐 */
        return 0;
    }
# 检查给定的数组对象是否是按要求对齐的
NPY_NO_EXPORT int
IsAligned(PyArrayObject *ap)
{
    return raw_array_is_aligned(PyArray_NDIM(ap), PyArray_DIMS(ap),
                                PyArray_DATA(ap), PyArray_STRIDES(ap),
                                PyArray_DESCR(ap)->alignment);
}

# 检查给定的数组对象是否是按指定的无符号整数对齐
NPY_NO_EXPORT int
IsUintAligned(PyArrayObject *ap)
{
    return raw_array_is_aligned(PyArray_NDIM(ap), PyArray_DIMS(ap),
                                PyArray_DATA(ap), PyArray_STRIDES(ap),
                                npy_uint_alignment(PyArray_ITEMSIZE(ap)));
}

# 返回数组对象是否具有重叠数据区域,1表示有重叠,0表示没有重叠
NPY_NO_EXPORT int
arrays_overlap(PyArrayObject *arr1, PyArrayObject *arr2)
{
    mem_overlap_t result;

    # 调用函数判断两个数组对象是否共享内存,以边界共享为准
    result = solve_may_share_memory(arr1, arr2, NPY_MAY_SHARE_BOUNDS);
    # 如果不共享内存,返回0;否则返回1
    if (result == MEM_OVERLAP_NO) {
        return 0;
    }
    else {
        return 1;
    }
}

.\numpy\numpy\_core\src\common\array_assign.h

#ifndef NUMPY_CORE_SRC_COMMON_ARRAY_ASSIGN_H_
#define NUMPY_CORE_SRC_COMMON_ARRAY_ASSIGN_H_

/*
 * An array assignment function for copying arrays, treating the
 * arrays as flat according to their respective ordering rules.
 * This function makes a temporary copy of 'src' if 'src' and
 * 'dst' overlap, to be able to handle views of the same data with
 * different strides.
 *
 * dst: The destination array.
 * dst_order: The rule for how 'dst' is to be made flat.
 * src: The source array.
 * src_order: The rule for how 'src' is to be made flat.
 * casting: An exception is raised if the copy violates this
 *          casting rule.
 *
 * Returns 0 on success, -1 on failure.
 */

/* Not yet implemented
NPY_NO_EXPORT int
PyArray_AssignArrayAsFlat(PyArrayObject *dst, NPY_ORDER dst_order,
                  PyArrayObject *src, NPY_ORDER src_order,
                  NPY_CASTING casting,
                  npy_bool preservena, npy_bool *preservewhichna);
*/

/*
 * Declares a function to assign the content of 'src' array to 'dst' array,
 * handling a possible 'wheremask' array and casting rules.
 *
 * Returns 0 on success, -1 on failure.
 */
NPY_NO_EXPORT int
PyArray_AssignArray(PyArrayObject *dst, PyArrayObject *src,
                    PyArrayObject *wheremask,
                    NPY_CASTING casting);

/*
 * Assigns a raw scalar value to every element of 'dst' array,
 * considering casting rules and optional 'wheremask'.
 *
 * Returns 0 on success, -1 on failure.
 */
NPY_NO_EXPORT int
PyArray_AssignRawScalar(PyArrayObject *dst,
                        PyArray_Descr *src_dtype, char *src_data,
                        PyArrayObject *wheremask,
                        NPY_CASTING casting);

/******** LOW-LEVEL SCALAR TO ARRAY ASSIGNMENT ********/

/*
 * Assigns the scalar value to every element of the destination raw array.
 *
 * Returns 0 on success, -1 on failure.
 */
NPY_NO_EXPORT int
raw_array_assign_scalar(int ndim, npy_intp const *shape,
        PyArray_Descr *dst_dtype, char *dst_data, npy_intp const *dst_strides,
        PyArray_Descr *src_dtype, char *src_data);

/*
 * Assigns the scalar value to every element of the destination raw array
 * where the 'wheremask' value is True.
 *
 * Returns 0 on success, -1 on failure.
 */
NPY_NO_EXPORT int
raw_array_wheremasked_assign_scalar(int ndim, npy_intp const *shape,
        PyArray_Descr *dst_dtype, char *dst_data, npy_intp const *dst_strides,
        PyArray_Descr *src_dtype, char *src_data,
        PyArray_Descr *wheremask_dtype, char *wheremask_data,
        npy_intp const *wheremask_strides);

/******** LOW-LEVEL ARRAY MANIPULATION HELPERS ********/

/*
 * Internal detail of how much to buffer during array assignments which
 * need it. This is for more complex NA masking operations where masks
 * need to be inverted or combined together.
 */
#define NPY_ARRAY_ASSIGN_BUFFERSIZE 8192

/*
 * Broadcasts strides to match the given dimensions. Can be used,
 * for instance, to set up a raw iteration.
 *
 * 'strides_name' is used to produce an error message if the strides
 * cannot be broadcast.
 *
 * Returns 0 on success, -1 on failure.
 */
NPY_NO_EXPORT int
/*
 * 计算广播操作后的数组的步幅。
 * ndim: 数组的维度
 * shape: 数组的形状
 * strides_ndim: 步幅数组的维度
 * strides_shape: 步幅数组的形状
 * strides: 步幅数组的值
 * strides_name: 步幅数组的名称
 * out_strides: 输出的步幅数组
 */
broadcast_strides(int ndim, npy_intp const *shape,
                int strides_ndim, npy_intp const *strides_shape, npy_intp const *strides,
                char const *strides_name,
                npy_intp *out_strides);

/*
 * 检查一个数据指针加上一组步幅是否指向所有元素都按照给定的对齐方式对齐的原始数组。
 * 如果数据对齐到给定的对齐方式返回 1,否则返回 0。
 * alignment 应为二的幂,或者可以是特殊值 0 表示不能对齐,此时总是返回 0(false)。
 */
NPY_NO_EXPORT int
raw_array_is_aligned(int ndim, npy_intp const *shape,
                     char *data, npy_intp const *strides, int alignment);

/*
 * 检查数组是否按照其数据类型的“真实对齐方式”对齐。
 */
NPY_NO_EXPORT int
IsAligned(PyArrayObject *ap);

/*
 * 检查数组是否按照其数据类型的“无符号整数对齐方式”对齐。
 */
NPY_NO_EXPORT int
IsUintAligned(PyArrayObject *ap);

/* 如果两个数组有重叠的数据返回 1,否则返回 0 */
NPY_NO_EXPORT int
arrays_overlap(PyArrayObject *arr1, PyArrayObject *arr2);


#endif  /* NUMPY_CORE_SRC_COMMON_ARRAY_ASSIGN_H_ */

.\numpy\numpy\_core\src\common\binop_override.h

#ifndef NUMPY_CORE_SRC_COMMON_BINOP_OVERRIDE_H_
#define NUMPY_CORE_SRC_COMMON_BINOP_OVERRIDE_H_

#include <string.h>  // 引入字符串操作相关的头文件
#include <Python.h>  // 引入 Python C API 的头文件
#include "numpy/arrayobject.h"  // 引入 NumPy 数组对象的头文件

#include "get_attr_string.h"  // 引入获取属性字符串的自定义头文件
#include "npy_static_data.h"  // 引入 NumPy 静态数据的头文件

static int
binop_should_defer(PyObject *self, PyObject *other, int inplace)
{
    /*
     * This function assumes that self.__binop__(other) is underway and
     * implements the rules described above. Python's C API is funny, and
     * makes it tricky to tell whether a given slot is called for __binop__
     * ("forward") or __rbinop__ ("reversed"). You are responsible for
     * determining this before calling this function; it only provides the
     * logic for forward binop implementations.
     */
    
    /*
     * NB: there's another copy of this code in
     *    numpy.ma.core.MaskedArray._delegate_binop
     * which should possibly be updated when this is.
     */
    
    PyObject *attr;  // 定义 PyObject 类型的变量 attr
    double self_prio, other_prio;  // 定义双精度浮点数变量 self_prio 和 other_prio
    int defer;  // 定义整型变量 defer

    /*
     * attribute check is expensive for scalar operations, avoid if possible
     */
    if (other == NULL ||  // 如果 other 为 NULL,直接返回 0
        self == NULL ||  // 如果 self 为 NULL,直接返回 0
        Py_TYPE(self) == Py_TYPE(other) ||  // 如果 self 和 other 类型相同,直接返回 0
        PyArray_CheckExact(other) ||  // 如果 other 是精确匹配的 NumPy 数组,直接返回 0
        PyArray_CheckAnyScalarExact(other)) {  // 如果 other 是任意标量的精确匹配,直接返回 0
        return 0;
    }

    /*
     * Classes with __array_ufunc__ are living in the future, and only need to
     * check whether __array_ufunc__ equals None.
     */
    attr = PyArray_LookupSpecial(other, npy_interned_str.array_ufunc);  // 查找 other 对象中的 __array_ufunc__ 属性
    if (attr != NULL) {  // 如果找到了 __array_ufunc__ 属性
        defer = !inplace && (attr == Py_None);  // 如果不是原地操作且 __array_ufunc__ 为 None,则推迟执行
        Py_DECREF(attr);  // 减少引用计数
        return defer;  // 返回 defer 值
    }
    else if (PyErr_Occurred()) {  // 如果出现了错误
        PyErr_Clear(); /* TODO[gh-14801]: propagate crashes during attribute access? */
        // 清除错误信息,暂时保留这个 TODO 注释
    }

    /*
     * Otherwise, we need to check for the legacy __array_priority__. But if
     * other.__class__ is a subtype of self.__class__, then it's already had
     * a chance to run, so no need to defer to it.
     */
    if (PyType_IsSubtype(Py_TYPE(other), Py_TYPE(self))) {  // 如果 other 是 self 的子类
        return 0;  // 直接返回 0,不推迟执行
    }

    self_prio = PyArray_GetPriority((PyObject *)self, NPY_SCALAR_PRIORITY);  // 获取 self 的优先级
    other_prio = PyArray_GetPriority((PyObject *)other, NPY_SCALAR_PRIORITY);  // 获取 other 的优先级
    return self_prio < other_prio;  // 返回比较结果,判断是否推迟执行
}

#endif  // NUMPY_CORE_SRC_COMMON_BINOP_OVERRIDE_H_
/*
 * 定义宏BINOP_IS_FORWARD,用于检查是否是正向二元操作。
 * 如果 m2 的类型有定义 tp_as_number 并且其 SLOT_NAME 的函数指针不等于 test_func,
 * 则返回真,表示这是正向操作。
 */
#define BINOP_IS_FORWARD(m1, m2, SLOT_NAME, test_func)  \
    (Py_TYPE(m2)->tp_as_number != NULL &&                               \
     (void*)(Py_TYPE(m2)->tp_as_number->SLOT_NAME) != (void*)(test_func))

/*
 * 定义宏BINOP_GIVE_UP_IF_NEEDED,用于在需要时放弃二元操作的执行。
 * 如果是正向操作并且 binop_should_defer((PyObject*)m1, (PyObject*)m2, 0) 返回真,
 * 则增加 Py_NotImplemented 的引用计数并返回 Py_NotImplemented。
 */
#define BINOP_GIVE_UP_IF_NEEDED(m1, m2, slot_expr, test_func)           \
    do {                                                                \
        if (BINOP_IS_FORWARD(m1, m2, slot_expr, test_func) &&           \
            binop_should_defer((PyObject*)m1, (PyObject*)m2, 0)) {      \
            Py_INCREF(Py_NotImplemented);                               \
            return Py_NotImplemented;                                   \
        }                                                               \
    } while (0)

/*
 * 定义宏INPLACE_GIVE_UP_IF_NEEDED,用于在需要时放弃增强赋值操作的执行。
 * 如果是正向操作并且 binop_should_defer((PyObject*)m1, (PyObject*)m2, 1) 返回真,
 * 则增加 Py_NotImplemented 的引用计数并返回 Py_NotImplemented。
 */
#define INPLACE_GIVE_UP_IF_NEEDED(m1, m2, slot_expr, test_func)         \
    do {                                                                \
        if (BINOP_IS_FORWARD(m1, m2, slot_expr, test_func) &&           \
            binop_should_defer((PyObject*)m1, (PyObject*)m2, 1)) {      \
            Py_INCREF(Py_NotImplemented);                               \
            return Py_NotImplemented;                                   \
        }                                                               \
    } while (0)

/*
 * 定义宏RICHCMP_GIVE_UP_IF_NEEDED,用于在需要时放弃富比较操作的执行。
 * 如果 binop_should_defer((PyObject*)m1, (PyObject*)m2, 0) 返回真,
 * 则增加 Py_NotImplemented 的引用计数并返回 Py_NotImplemented。
 */
#define RICHCMP_GIVE_UP_IF_NEEDED(m1, m2)                               \
    do {                                                                \
        if (binop_should_defer((PyObject*)m1, (PyObject*)m2, 0)) {      \
            Py_INCREF(Py_NotImplemented);                               \
            return Py_NotImplemented;                                   \
        }                                                               \
    } while (0)



// 使用 do-while 循环语句,这种循环至少会执行一次,因为条件判断放在循环体末尾
#endif  /* NUMPY_CORE_SRC_COMMON_BINOP_OVERRIDE_H_ */



#endif  /* NUMPY_CORE_SRC_COMMON_BINOP_OVERRIDE_H_ */



# endif  /* NUMPY_CORE_SRC_COMMON_BINOP_OVERRIDE_H_ */



# endif  /* NUMPY_CORE_SRC_COMMON_BINOP_OVERRIDE_H_ */



# endif  /* NUMPY_CORE_SRC_COMMON_BINOP_OVERRIDE_H_ */



# endif  /* NUMPY_CORE_SRC_COMMON_BINOP_OVERRIDE_H_ */

.\numpy\numpy\_core\src\common\cblasfuncs.c

/*
 * This module provides a BLAS optimized matrix multiply,
 * inner product and dot for numpy arrays
 */
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
#define _MULTIARRAYMODULE

#define PY_SSIZE_T_CLEAN
#include <Python.h>

#include "numpy/arrayobject.h"
#include "numpy/npy_math.h"
#include "npy_cblas.h"
#include "arraytypes.h"
#include "common.h"
#include "dtypemeta.h"

#include <assert.h>


static const double oneD[2] = {1.0, 0.0}, zeroD[2] = {0.0, 0.0};
static const float oneF[2] = {1.0, 0.0}, zeroF[2] = {0.0, 0.0};


/*
 * Helper: dispatch to appropriate cblas_?gemm for typenum.
 */
static void
gemm(int typenum, enum CBLAS_ORDER order,
     enum CBLAS_TRANSPOSE transA, enum CBLAS_TRANSPOSE transB,
     npy_intp m, npy_intp n, npy_intp k,
     PyArrayObject *A, npy_intp lda, PyArrayObject *B, npy_intp ldb, PyArrayObject *R)
{
    const void *Adata = PyArray_DATA(A), *Bdata = PyArray_DATA(B);
    void *Rdata = PyArray_DATA(R);
    npy_intp ldc = PyArray_DIM(R, 1) > 1 ? PyArray_DIM(R, 1) : 1;

    switch (typenum) {
        case NPY_DOUBLE:
            // Perform double precision matrix multiplication using cblas_dgemm
            CBLAS_FUNC(cblas_dgemm)(order, transA, transB, m, n, k, 1.,
                        Adata, lda, Bdata, ldb, 0., Rdata, ldc);
            break;
        case NPY_FLOAT:
            // Perform single precision matrix multiplication using cblas_sgemm
            CBLAS_FUNC(cblas_sgemm)(order, transA, transB, m, n, k, 1.f,
                        Adata, lda, Bdata, ldb, 0.f, Rdata, ldc);
            break;
        case NPY_CDOUBLE:
            // Perform complex double precision matrix multiplication using cblas_zgemm
            CBLAS_FUNC(cblas_zgemm)(order, transA, transB, m, n, k, oneD,
                        Adata, lda, Bdata, ldb, zeroD, Rdata, ldc);
            break;
        case NPY_CFLOAT:
            // Perform complex single precision matrix multiplication using cblas_cgemm
            CBLAS_FUNC(cblas_cgemm)(order, transA, transB, m, n, k, oneF,
                        Adata, lda, Bdata, ldb, zeroF, Rdata, ldc);
            break;
    }
}


/*
 * Helper: dispatch to appropriate cblas_?gemv for typenum.
 */
static void
gemv(int typenum, enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans,
     PyArrayObject *A, npy_intp lda, PyArrayObject *X, npy_intp incX,
     PyArrayObject *R)
{
    const void *Adata = PyArray_DATA(A), *Xdata = PyArray_DATA(X);
    void *Rdata = PyArray_DATA(R);

    npy_intp m = PyArray_DIM(A, 0), n = PyArray_DIM(A, 1);

    switch (typenum) {
        case NPY_DOUBLE:
            // Perform double precision matrix-vector multiplication using cblas_dgemv
            CBLAS_FUNC(cblas_dgemv)(order, trans, m, n, 1., Adata, lda, Xdata, incX,
                        0., Rdata, 1);
            break;
        case NPY_FLOAT:
            // Perform single precision matrix-vector multiplication using cblas_sgemv
            CBLAS_FUNC(cblas_sgemv)(order, trans, m, n, 1.f, Adata, lda, Xdata, incX,
                        0.f, Rdata, 1);
            break;
        case NPY_CDOUBLE:
            // Perform complex double precision matrix-vector multiplication using cblas_zgemv
            CBLAS_FUNC(cblas_zgemv)(order, trans, m, n, oneD, Adata, lda, Xdata, incX,
                        zeroD, Rdata, 1);
            break;
        case NPY_CFLOAT:
            // Perform complex single precision matrix-vector multiplication using cblas_cgemv
            CBLAS_FUNC(cblas_cgemv)(order, trans, m, n, oneF, Adata, lda, Xdata, incX,
                        zeroF, Rdata, 1);
            break;
    }
}
/*
 * Perform a symmetric rank-k update operation using BLAS routines based on the
 * type of elements in the arrays A and R.
 */
syrk(int typenum, enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans,
     npy_intp n, npy_intp k,
     PyArrayObject *A, npy_intp lda, PyArrayObject *R)
{
    // 获取数组 A 的数据指针
    const void *Adata = PyArray_DATA(A);
    // 获取数组 R 的数据指针
    void *Rdata = PyArray_DATA(R);
    // 计算 R 的第二维度,如果大于 1 则使用该值,否则设为 1
    npy_intp ldc = PyArray_DIM(R, 1) > 1 ? PyArray_DIM(R, 1) : 1;

    npy_intp i;
    npy_intp j;

    // 根据 typenum 的值选择相应的 BLAS 函数进行操作
    switch (typenum) {
        case NPY_DOUBLE:
            // 双精度情况下的 BLAS 函数调用
            CBLAS_FUNC(cblas_dsyrk)(order, CblasUpper, trans, n, k, 1.,
                        Adata, lda, 0., Rdata, ldc);

            // 对称阵处理,调整非对角元素的值
            for (i = 0; i < n; i++) {
                for (j = i + 1; j < n; j++) {
                    *((npy_double*)PyArray_GETPTR2(R, j, i)) =
                            *((npy_double*)PyArray_GETPTR2(R, i, j));
                }
            }
            break;
        case NPY_FLOAT:
            // 单精度情况下的 BLAS 函数调用
            CBLAS_FUNC(cblas_ssyrk)(order, CblasUpper, trans, n, k, 1.f,
                        Adata, lda, 0.f, Rdata, ldc);

            // 对称阵处理,调整非对角元素的值
            for (i = 0; i < n; i++) {
                for (j = i + 1; j < n; j++) {
                    *((npy_float*)PyArray_GETPTR2(R, j, i)) =
                            *((npy_float*)PyArray_GETPTR2(R, i, j));
                }
            }
            break;
        case NPY_CDOUBLE:
            // 复双精度情况下的 BLAS 函数调用
            CBLAS_FUNC(cblas_zsyrk)(order, CblasUpper, trans, n, k, oneD,
                        Adata, lda, zeroD, Rdata, ldc);

            // 对称阵处理,调整非对角元素的值
            for (i = 0; i < n; i++) {
                for (j = i + 1; j < n; j++) {
                    *((npy_cdouble*)PyArray_GETPTR2(R, j, i)) =
                            *((npy_cdouble*)PyArray_GETPTR2(R, i, j));
                }
            }
            break;
        case NPY_CFLOAT:
            // 复单精度情况下的 BLAS 函数调用
            CBLAS_FUNC(cblas_csyrk)(order, CblasUpper, trans, n, k, oneF,
                        Adata, lda, zeroF, Rdata, ldc);

            // 对称阵处理,调整非对角元素的值
            for (i = 0; i < n; i++) {
                for (j = i + 1; j < n; j++) {
                    *((npy_cfloat*)PyArray_GETPTR2(R, j, i)) =
                            *((npy_cfloat*)PyArray_GETPTR2(R, i, j));
                }
            }
            break;
    }
}


/*
 * Determine the shape of the matrix represented by the PyArrayObject.
 * Returns one of _scalar, _column, _row, or _matrix.
 */
static MatrixShape
_select_matrix_shape(PyArrayObject *array)
{
    // 根据数组的维度确定其形状
    switch (PyArray_NDIM(array)) {
        case 0:
            return _scalar;
        case 1:
            // 一维数组,根据第一维大小判断是列向量还是标量
            if (PyArray_DIM(array, 0) > 1)
                return _column;
            return _scalar;
        case 2:
            // 二维数组,根据维度大小判断是列向量、行向量还是矩阵
            if (PyArray_DIM(array, 0) > 1) {
                if (PyArray_DIM(array, 1) == 1)
                    return _column;
                else
                    return _matrix;
            }
            if (PyArray_DIM(array, 1) == 1)
                return _scalar;
            return _row;
    }
    return _matrix;  // 默认返回矩阵形状
}


/*
 * Check if the array strides are misaligned and return 1 if true, 0 otherwise.
 * Ensures that the data segment is aligned with an itemsize address.
 */
NPY_NO_EXPORT int
_bad_strides(PyArrayObject *ap)
{
    int itemsize = PyArray_ITEMSIZE(ap);
    int i, N=PyArray_NDIM(ap);

    // 检查数组的步幅是否对齐,返回值为 1 表示不对齐,为 0 表示对齐
    # 获取数组的步长信息
    npy_intp *strides = PyArray_STRIDES(ap);
    # 获取数组的维度信息
    npy_intp *dims = PyArray_DIMS(ap);
    
    # 检查数组的数据指针是否按照元素大小对齐
    if (((npy_intp)(PyArray_DATA(ap)) % itemsize) != 0) {
        # 如果不是对齐的,返回错误码 1
        return 1;
    }
    
    # 遍历数组的每一个维度
    for (i = 0; i < N; i++) {
        # 检查数组的步长是否为负数或者不是元素大小的整数倍
        if ((strides[i] < 0) || (strides[i] % itemsize) != 0) {
            # 如果步长不合法,返回错误码 1
            return 1;
        }
        # 检查数组步长为 0 但维度大于 1 的情况
        if ((strides[i] == 0 && dims[i] > 1)) {
            # 如果条件不满足,返回错误码 1
            return 1;
        }
    }
    
    # 若所有检查通过,返回正常码 0
    return 0;
/*
 * dot(a,b)
 * 返回浮点类型数组 a 和 b 的点积。
 * 与通用的 numpy 等效函数类似,点积的求和是在 a 的最后一个维度和 b 的倒数第二个维度上进行。
 * 注意:第一个参数不是共轭的。
 *
 * 这个函数是供 PyArray_MatrixProduct2 使用的。假定在进入时,数组 ap1 和 ap2 具有相同的数据类型 typenum,
 * 可以是 float、double、cfloat 或 cdouble,并且维度 <= 2。假定 __array_ufunc__ 的处理已经完成。
 */
NPY_NO_EXPORT PyObject *
cblas_matrixproduct(int typenum, PyArrayObject *ap1, PyArrayObject *ap2,
                    PyArrayObject *out)
{
    PyArrayObject *result = NULL, *out_buf = NULL;
    npy_intp j, lda, ldb;
    npy_intp l;
    int nd;
    npy_intp ap1stride = 0;
    npy_intp dimensions[NPY_MAXDIMS];
    npy_intp numbytes;
    MatrixShape ap1shape, ap2shape;

    // 检查 ap1 是否有不良步长
    if (_bad_strides(ap1)) {
        // 创建 ap1 的副本,按照任意顺序
        PyObject *op1 = PyArray_NewCopy(ap1, NPY_ANYORDER);

        // 释放原 ap1 对象
        Py_DECREF(ap1);
        // 将 op1 转换为 PyArrayObject 类型并赋值给 ap1
        ap1 = (PyArrayObject *)op1;
        if (ap1 == NULL) {
            goto fail;
        }
    }
    // 检查 ap2 是否有不良步长
    if (_bad_strides(ap2)) {
        // 创建 ap2 的副本,按照任意顺序
        PyObject *op2 = PyArray_NewCopy(ap2, NPY_ANYORDER);

        // 释放原 ap2 对象
        Py_DECREF(ap2);
        // 将 op2 转换为 PyArrayObject 类型并赋值给 ap2
        ap2 = (PyArrayObject *)op2;
        if (ap2 == NULL) {
            goto fail;
        }
    }
    // 选择 ap1 和 ap2 的矩阵形状
    ap1shape = _select_matrix_shape(ap1);
    ap2shape = _select_matrix_shape(ap2);

    // 在此处继续执行矩阵乘法的计算
    // 检查是否有一个操作数是标量(scalar)
    if (ap1shape == _scalar || ap2shape == _scalar) {
        PyArrayObject *oap1, *oap2;
        oap1 = ap1; oap2 = ap2;
        /* One of ap1 or ap2 is a scalar */
        // 如果 ap1 是标量,则将 ap2 视为标量
        if (ap1shape == _scalar) {
            /* Make ap2 the scalar */
            PyArrayObject *t = ap1;
            ap1 = ap2;
            ap2 = t;
            ap1shape = ap2shape;
            ap2shape = _scalar;
        }

        // 如果 ap1 的形状为行向量(_row)
        if (ap1shape == _row) {
            // 获取 ap1 的步长(stride)
            ap1stride = PyArray_STRIDE(ap1, 1);
        }
        // 否则,如果 ap1 的维度大于 0
        else if (PyArray_NDIM(ap1) > 0) {
            // 获取 ap1 的步长(stride)
            ap1stride = PyArray_STRIDE(ap1, 0);
        }

        // 如果 ap1 或 ap2 中有任意一个是标量(维度为 0if (PyArray_NDIM(ap1) == 0 || PyArray_NDIM(ap2) == 0) {
            npy_intp *thisdims;
            // 如果 ap1 是标量
            if (PyArray_NDIM(ap1) == 0) {
                // 使用 ap2 的维度和数量设置 dimensions 数组和 l
                nd = PyArray_NDIM(ap2);
                thisdims = PyArray_DIMS(ap2);
            }
            // 否则,ap2 是标量
            else {
                // 使用 ap1 的维度和数量设置 dimensions 数组和 l
                nd = PyArray_NDIM(ap1);
                thisdims = PyArray_DIMS(ap1);
            }
            // 计算总长度 l 并设置 dimensions 数组
            l = 1;
            for (j = 0; j < nd; j++) {
                dimensions[j] = thisdims[j];
                l *= dimensions[j];
            }
        }
        // 否则,ap1 和 ap2 都不是标量(维度大于 0else {
            // 获取 ap1 和 ap2 的最后一个维度的长度
            l = PyArray_DIM(oap1, PyArray_NDIM(oap1) - 1);

            // 检查 ap1 和 ap2 是否对齐
            if (PyArray_DIM(oap2, 0) != l) {
                // 如果不对齐,调用错误处理函数并跳转到失败位置
                dot_alignment_error(oap1, PyArray_NDIM(oap1) - 1, oap2, 0);
                goto fail;
            }

            // 计算 nd 作为 ap1 和 ap2 的总维度数减去 2
            nd = PyArray_NDIM(ap1) + PyArray_NDIM(ap2) - 2;

            /*
             * nd = 0 or 1 or 2. If nd == 0 do nothing ...
             */

            // 如果 nd 等于 1
            if (nd == 1) {
                /*
                 * Either PyArray_NDIM(ap1) is 1 dim or PyArray_NDIM(ap2) is
                 * 1 dim and the other is 2 dim
                 */
                // 设置 dimensions[0] 为 ap1 或 ap2 的合适维度长度
                dimensions[0] = (PyArray_NDIM(oap1) == 2) ?
                                PyArray_DIM(oap1, 0) : PyArray_DIM(oap2, 1);
                // 设置 l 为 dimensions[0]
                l = dimensions[0];
                /*
                 * Fix it so that dot(shape=(N,1), shape=(1,))
                 * and dot(shape=(1,), shape=(1,N)) both return
                 * an (N,) array (but use the fast scalar code)
                 */
            }
            // 否则,如果 nd 等于 2
            else if (nd == 2) {
                // 设置 dimensions[0] 和 dimensions[1] 为 ap1 和 ap2 的合适维度长度
                dimensions[0] = PyArray_DIM(oap1, 0);
                dimensions[1] = PyArray_DIM(oap2, 1);
                /*
                 * We need to make sure that dot(shape=(1,1), shape=(1,N))
                 * and dot(shape=(N,1),shape=(1,1)) uses
                 * scalar multiplication appropriately
                 */
                // 如果 ap1 的形状为行向量,则 l 设置为 dimensions[1],否则为 dimensions[0]
                if (ap1shape == _row) {
                    l = dimensions[1];
                }
                else {
                    l = dimensions[0];
                }
            }

            // 检查求和维度是否为 0 大小
            if (PyArray_DIM(oap1, PyArray_NDIM(oap1) - 1) == 0) {
                // 如果是,则 l 设置为 0
                l = 0;
            }
        }
    }
    else {
        /*
         * (PyArray_NDIM(ap1) <= 2 && PyArray_NDIM(ap2) <= 2)
         * Both ap1 and ap2 are vectors or matrices
         */
        // 获取 ap1 的最后一个维度的大小
        l = PyArray_DIM(ap1, PyArray_NDIM(ap1) - 1);

        // 检查 ap2 的第一个维度是否与 ap1 的最后一个维度大小相等
        if (PyArray_DIM(ap2, 0) != l) {
            // 如果不相等,触发 dot_alignment_error,并跳转到 fail 标签
            dot_alignment_error(ap1, PyArray_NDIM(ap1) - 1, ap2, 0);
            goto fail;
        }
        // 计算输出数组的维度数
        nd = PyArray_NDIM(ap1) + PyArray_NDIM(ap2) - 2;

        // 根据 nd 的值设置输出数组的维度
        if (nd == 1) {
            dimensions[0] = (PyArray_NDIM(ap1) == 2) ?
                            PyArray_DIM(ap1, 0) : PyArray_DIM(ap2, 1);
        }
        else if (nd == 2) {
            dimensions[0] = PyArray_DIM(ap1, 0);
            dimensions[1] = PyArray_DIM(ap2, 1);
        }
    }

    // 为求和创建新的数组,返回结果给 out_buf
    out_buf = new_array_for_sum(ap1, ap2, out, nd, dimensions, typenum, &result);
    if (out_buf == NULL) {
        goto fail;
    }

    // 获取输出数组的字节数
    numbytes = PyArray_NBYTES(out_buf);
    // 将输出数组的数据区域清零
    memset(PyArray_DATA(out_buf), 0, numbytes);
    // 如果 numbytes 为零或者 l 为零,则释放对象并返回结果
    if (numbytes == 0 || l == 0) {
            Py_DECREF(ap1);
            Py_DECREF(ap2);
            Py_DECREF(out_buf);
            return PyArray_Return(result);
    }

    }
    else if ((ap2shape == _column) && (ap1shape != _matrix)) {
        NPY_BEGIN_ALLOW_THREADS;

        /* Dot product between two vectors -- Level 1 BLAS */
        // 使用 Level 1 BLAS 计算两个向量的点积
        PyDataType_GetArrFuncs(PyArray_DESCR(out_buf))->dotfunc(
                 PyArray_DATA(ap1), PyArray_STRIDE(ap1, (ap1shape == _row)),
                 PyArray_DATA(ap2), PyArray_STRIDE(ap2, 0),
                 PyArray_DATA(out_buf), l, NULL);
        NPY_END_ALLOW_THREADS;
    }
    else if (ap1shape == _matrix && ap2shape != _matrix) {
        /* Matrix vector multiplication -- Level 2 BLAS */
        /* lda must be MAX(M,1) */
        enum CBLAS_ORDER Order;
        npy_intp ap2s;

        // 如果 ap1 不是单一段,进行复制
        if (!PyArray_ISONESEGMENT(ap1)) {
            PyObject *new;
            new = PyArray_Copy(ap1);
            Py_DECREF(ap1);
            ap1 = (PyArrayObject *)new;
            if (new == NULL) {
                goto fail;
            }
        }
        NPY_BEGIN_ALLOW_THREADS
        // 根据 ap1 是否连续设置 Order 和 lda
        if (PyArray_ISCONTIGUOUS(ap1)) {
            Order = CblasRowMajor;
            lda = (PyArray_DIM(ap1, 1) > 1 ? PyArray_DIM(ap1, 1) : 1);
        }
        else {
            Order = CblasColMajor;
            lda = (PyArray_DIM(ap1, 0) > 1 ? PyArray_DIM(ap1, 0) : 1);
        }
        // 计算 ap2 的步长
        ap2s = PyArray_STRIDE(ap2, 0) / PyArray_ITEMSIZE(ap2);
        // 使用 Level 2 BLAS 计算矩阵向量乘法
        gemv(typenum, Order, CblasNoTrans, ap1, lda, ap2, ap2s, out_buf);
        NPY_END_ALLOW_THREADS;
    }
    # 如果第一个数组是向量而第二个数组是矩阵
    else if (ap1shape != _matrix && ap2shape == _matrix) {
        /* Vector matrix multiplication -- Level 2 BLAS */
        # 定义 CBLAS_ORDER 枚举变量 Order
        enum CBLAS_ORDER Order;
        # 定义 ap1s 为整数指针
        npy_intp ap1s;

        # 如果 ap2 不是连续存储的,则复制 ap2 并使其连续
        if (!PyArray_ISONESEGMENT(ap2)) {
            PyObject *new;
            new = PyArray_Copy(ap2);
            Py_DECREF(ap2);
            ap2 = (PyArrayObject *)new;
            if (new == NULL) {
                goto fail;
            }
        }
        # 进入多线程执行区域
        NPY_BEGIN_ALLOW_THREADS
        # 如果 ap2 是连续存储的,则按行主序处理
        if (PyArray_ISCONTIGUOUS(ap2)) {
            Order = CblasRowMajor;
            lda = (PyArray_DIM(ap2, 1) > 1 ? PyArray_DIM(ap2, 1) : 1);
        }
        # 否则按列主序处理
        else {
            Order = CblasColMajor;
            lda = (PyArray_DIM(ap2, 0) > 1 ? PyArray_DIM(ap2, 0) : 1);
        }
        # 根据 ap1shape 类型确定 ap1s 的值
        if (ap1shape == _row) {
            ap1s = PyArray_STRIDE(ap1, 1) / PyArray_ITEMSIZE(ap1);
        }
        else {
            ap1s = PyArray_STRIDE(ap1, 0) / PyArray_ITEMSIZE(ap1);
        }
        # 执行向量矩阵乘法运算,结果存储在 out_buf 中
        gemv(typenum, Order, CblasTrans, ap2, lda, ap1, ap1s, out_buf);
        # 离开多线程执行区域
        NPY_END_ALLOW_THREADS;
    }
    else {
        /*
         * (PyArray_NDIM(ap1) == 2 && PyArray_NDIM(ap2) == 2)
         * 矩阵乘法 -- Level 3 BLAS
         *  L x M  乘以 M x N
         */
        enum CBLAS_ORDER Order;
        enum CBLAS_TRANSPOSE Trans1, Trans2;
        npy_intp M, N, L;

        /* 优化可能性: */
        /*
         * 如果适用,可以处理单段数组
         * 使用适当的 Order、Trans1 和 Trans2 的值。
         */
        if (!PyArray_IS_C_CONTIGUOUS(ap2) && !PyArray_IS_F_CONTIGUOUS(ap2)) {
            PyObject *new = PyArray_Copy(ap2);

            Py_DECREF(ap2);
            ap2 = (PyArrayObject *)new;
            if (new == NULL) {
                goto fail;
            }
        }
        if (!PyArray_IS_C_CONTIGUOUS(ap1) && !PyArray_IS_F_CONTIGUOUS(ap1)) {
            PyObject *new = PyArray_Copy(ap1);

            Py_DECREF(ap1);
            ap1 = (PyArrayObject *)new;
            if (new == NULL) {
                goto fail;
            }
        }

        NPY_BEGIN_ALLOW_THREADS;

        Order = CblasRowMajor;
        Trans1 = CblasNoTrans;
        Trans2 = CblasNoTrans;
        L = PyArray_DIM(ap1, 0);
        N = PyArray_DIM(ap2, 1);
        M = PyArray_DIM(ap2, 0);
        lda = (PyArray_DIM(ap1, 1) > 1 ? PyArray_DIM(ap1, 1) : 1);
        ldb = (PyArray_DIM(ap2, 1) > 1 ? PyArray_DIM(ap2, 1) : 1);

        /*
         * 避免对 Fortran 排序的数组进行临时拷贝
         */
        if (PyArray_IS_F_CONTIGUOUS(ap1)) {
            Trans1 = CblasTrans;
            lda = (PyArray_DIM(ap1, 0) > 1 ? PyArray_DIM(ap1, 0) : 1);
        }
        if (PyArray_IS_F_CONTIGUOUS(ap2)) {
            Trans2 = CblasTrans;
            ldb = (PyArray_DIM(ap2, 0) > 1 ? PyArray_DIM(ap2, 0) : 1);
        }

        /*
         * 如果是矩阵乘以其转置的情况,则使用 syrk 函数。
         * 否则,对所有其他情况使用 gemm 函数。
         */
        if (
            (PyArray_BYTES(ap1) == PyArray_BYTES(ap2)) &&
            (PyArray_DIM(ap1, 0) == PyArray_DIM(ap2, 1)) &&
            (PyArray_DIM(ap1, 1) == PyArray_DIM(ap2, 0)) &&
            (PyArray_STRIDE(ap1, 0) == PyArray_STRIDE(ap2, 1)) &&
            (PyArray_STRIDE(ap1, 1) == PyArray_STRIDE(ap2, 0)) &&
            ((Trans1 == CblasTrans) ^ (Trans2 == CblasTrans)) &&
            ((Trans1 == CblasNoTrans) ^ (Trans2 == CblasNoTrans))
        ) {
            if (Trans1 == CblasNoTrans) {
                syrk(typenum, Order, Trans1, N, M, ap1, lda, out_buf);
            }
            else {
                syrk(typenum, Order, Trans1, N, M, ap2, ldb, out_buf);
            }
        }
        else {
            gemm(typenum, Order, Trans1, Trans2, L, N, M, ap1, lda, ap2, ldb,
                 out_buf);
        }
        NPY_END_ALLOW_THREADS;
    }


    Py_DECREF(ap1);
    Py_DECREF(ap2);

    /* 触发可能的回写到 `result` */
    PyArray_ResolveWritebackIfCopy(out_buf);
    Py_DECREF(out_buf);
    # 将 result 转换为 Python 对象并返回
    return PyArray_Return(result);
    // 递减并清理 Python 对象 ap1 的引用计数
    Py_XDECREF(ap1);
    // 递减并清理 Python 对象 ap2 的引用计数
    Py_XDECREF(ap2);
    // 递减并清理 Python 对象 out_buf 的引用计数
    Py_XDECREF(out_buf);
    // 递减并清理 Python 对象 result 的引用计数,并返回 NULL 指针表示函数执行失败
    return NULL;
}

.\numpy\numpy\_core\src\common\cblasfuncs.h

#ifndef NUMPY_CORE_SRC_COMMON_CBLASFUNCS_H_
#define NUMPY_CORE_SRC_COMMON_CBLASFUNCS_H_

// 定义条件编译宏,用于避免重复包含该头文件
NPY_NO_EXPORT PyObject *
// 函数声明:计算矩阵乘积的CBLAS函数
cblas_matrixproduct(int, PyArrayObject *, PyArrayObject *, PyArrayObject *);

#endif  /* NUMPY_CORE_SRC_COMMON_CBLASFUNCS_H_ */

.\numpy\numpy\_core\src\common\common.hpp

#ifndef NUMPY_CORE_SRC_COMMON_COMMON_HPP
#define NUMPY_CORE_SRC_COMMON_COMMON_HPP
/*
 * 下面的 C++ 头文件可以独立安全使用,但它们被汇集在一起是为了我们和未来对支持预编译头文件的需求方便。
 */
#include "npdef.hpp"           // 导入 npdef.hpp 头文件
#include "utils.hpp"           // 导入 utils.hpp 头文件
#include "npstd.hpp"           // 导入 npstd.hpp 头文件
#include "half.hpp"            // 导入 half.hpp 头文件
#include "meta.hpp"            // 导入 meta.hpp 头文件
#include "float_status.hpp"    // 导入 float_status.hpp 头文件

#endif // NUMPY_CORE_SRC_COMMON_COMMON_HPP

.\numpy\numpy\_core\src\common\dlpack\dlpack.h

// 根据以下链接获取的代码片段:
// https://github.com/dmlc/dlpack/blob/bbd2f4d32427e548797929af08cfe2a9cbb3cf12/include/dlpack/dlpack.h
// 并在其中为 DLManagedTensorVersioned 添加了 typedef

/*!
 *  版权声明,版权归 2017 年 Contributors 所有
 * \file dlpack.h
 * \brief DLPack 的公共头文件
 */
#ifndef DLPACK_DLPACK_H_
#define DLPACK_DLPACK_H_

/**
 * \brief 与 C++ 的兼容性宏定义
 */
#ifdef __cplusplus
#define DLPACK_EXTERN_C extern "C"
#else
#define DLPACK_EXTERN_C
#endif

/*! \brief DLPack 当前的主版本号 */
#define DLPACK_MAJOR_VERSION 1

/*! \brief DLPack 当前的次版本号 */
#define DLPACK_MINOR_VERSION 0

/*! \brief DLPACK_DLL 在 Windows 下的前缀定义 */
#ifdef _WIN32
#ifdef DLPACK_EXPORTS
#define DLPACK_DLL __declspec(dllexport)
#else
#define DLPACK_DLL __declspec(dllimport)
#endif
#else
#define DLPACK_DLL
#endif

#include <stdint.h>
#include <stddef.h>

#ifdef __cplusplus
extern "C" {
#endif

/*!
 * \brief DLPack 的版本信息结构体
 *
 * 主版本号的改变表示 ABI 的数据布局发生了改变 - DLManagedTensorVersioned。
 * 次版本号的改变表示添加了新的代码,比如新的设备类型,但 ABI 保持不变。
 * 如果获取的 DLPack 张量的主版本号与此头文件中指定的版本号不一致
 * (即 major != DLPACK_MAJOR_VERSION),消费者必须调用删除器函数
 * (并且这样做是安全的)。不能访问其他字段,因为内存布局已经发生了改变。
 *
 * 在次版本号不匹配的情况下,只要消费者知道如何解释所有字段,张量就可以安全使用。
 * 次版本号的更新表示添加了枚举值。
 */
typedef struct {
  /*! \brief DLPack 的主版本号 */
  uint32_t major;
  /*! \brief DLPack 的次版本号 */
  uint32_t minor;
} DLPackVersion;

/*!
 * \brief DLDevice 中的设备类型枚举
 */
#ifdef __cplusplus
typedef enum : int32_t {
#else
typedef enum {
#endif
/*!
 * \brief CPU device
 */
kDLCPU = 1,
/*!
 * \brief CUDA GPU device
 */
kDLCUDA = 2,
/*!
 * \brief Pinned CUDA CPU memory by cudaMallocHost
 */
kDLCUDAHost = 3,
/*!
 * \brief OpenCL devices.
 */
kDLOpenCL = 4,
/*!
 * \brief Vulkan buffer for next generation graphics.
 */
kDLVulkan = 7,
/*!
 * \brief Metal for Apple GPU.
 */
kDLMetal = 8,
/*!
 * \brief Verilog simulator buffer
 */
kDLVPI = 9,
/*!
 * \brief ROCm GPUs for AMD GPUs
 */
kDLROCM = 10,
/*!
 * \brief Pinned ROCm CPU memory allocated by hipMallocHost
 */
kDLROCMHost = 11,
/*!
 * \brief Reserved extension device type,
 *        used for quickly test extension device
 *        The semantics can differ depending on the implementation.
 */
kDLExtDev = 12,
/*!
 * \brief CUDA managed/unified memory allocated by cudaMallocManaged
 */
kDLCUDAManaged = 13,
/*!
 * \brief Unified shared memory allocated on a oneAPI non-partitioned
 *        device. Call to oneAPI runtime is required to determine the device
 *        type, the USM allocation type and the sycl context it is bound to.
 */
kDLOneAPI = 14,
/*!
 * \brief GPU support for next generation WebGPU standard.
 */
kDLWebGPU = 15,
/*!
 * \brief Qualcomm Hexagon DSP
 */
kDLHexagon = 16,
/*!
 * \brief Microsoft MAIA devices
 */
kDLMAIA = 17,
} DLDeviceType;

/*!
 * \brief A Device for Tensor and operator.
 */
typedef struct {
  /*!
   * \brief The device type used in the device.
   */
  DLDeviceType device_type;
  /*!
   * \brief The device index.
   *        For vanilla CPU memory, pinned memory, or managed memory, this is set to 0.
   */
  int32_t device_id;
} DLDevice;

/*!
 * \brief The type code options DLDataType.
 */
typedef enum {
  /*!
   * \brief signed integer
   */
  kDLInt = 0U,
  /*!
   * \brief unsigned integer
   */
  kDLUInt = 1U,
  /*!
   * \brief IEEE floating point
   */
  kDLFloat = 2U,
  /*!
   * \brief Opaque handle type, reserved for testing purposes.
   *        Frameworks need to agree on the handle data type for the exchange to be well-defined.
   */
  kDLOpaqueHandle = 3U,
  /*!
   * \brief bfloat16
   */
  kDLBfloat = 4U,
  /*!
   * \brief complex number
   *        (C/C++/Python layout: compact struct per complex number)
   */
  kDLComplex = 5U,
  /*!
   * \brief boolean
   */
  kDLBool = 6U,
} DLDataTypeCode;

/*!
 * \brief The data type the tensor can hold. The data type is assumed to follow the
 *        native endian-ness. An explicit error message should be raised when attempting to
 *        export an array with non-native endianness
 *
 *  Examples
 *   - float: type_code = 2, bits = 32, lanes = 1
 *   - float4(vectorized 4 float): type_code = 2, bits = 32, lanes = 4
 *   - int8: type_code = 0, bits = 8, lanes = 1
 *   - std::complex<float>: type_code = 5, bits = 64, lanes = 1
 *   - bool: type_code = 6, bits = 8, lanes = 1 (as per common array library convention, the underlying storage size of bool is 8 bits)
 */
/*!
 * \brief 定义了描述张量数据类型的结构体。
 *        包含了数据类型的基本信息:代码、位数和张量的通道数。
 */
typedef struct {
  /*!
   * \brief 基本类型的类型代码。
   *        使用 uint8_t 类型而非 DLDataTypeCode 是为了减小内存占用,
   *        但其值应为 DLDataTypeCode 枚举值之一。
   */
  uint8_t code;
  /*!
   * \brief 位数,常见选择为 81632。
   */
  uint8_t bits;
  /*! \brief 张量的通道数,用于向量类型。 */
  uint16_t lanes;
} DLDataType;

/*!
 * \brief 简单的 C 张量对象,不管理内存。
 */
typedef struct {
  /*!
   * \brief 数据指针指向已分配的数据。在 CUDA 设备上,这将是设备指针或 OpenCL 中的 cl_mem 句柄。
   *        在某些设备类型上可能是不透明的。此指针始终按照 CUDA 的标准对齐到 256 字节。
   *        `byte_offset` 字段应用于指向数据的起始位置。
   *
   *        注意:截至 202111 月,多个库(如 CuPy、PyTorch、TensorFlow、TVM 等)在 CPU/CUDA/ROCm 上
   *        不遵循这种 256 字节对齐的要求,并始终使用 `byte_offset=0`。这必须修复
   *        (修复后将更新此注释);目前建议不依赖于数据指针的正确对齐。
   *
   *        对于给定的 DLTensor,存储数据所需的内存大小计算如下:
   *
   *        \code{.c}
   *        static inline size_t GetDataSize(const DLTensor* t) {
   *          size_t size = 1;
   *          for (tvm_index_t i = 0; i < t->ndim; ++i) {
   *            size *= t->shape[i];
   *          }
   *          size *= (t->dtype.bits * t->dtype.lanes + 7) / 8;
   *          return size;
   *        }
   *        \endcode
   *
   *        注意,如果张量的大小为零,则数据指针应设置为 `NULL`。
   */
  void* data;
  /*! \brief 张量所在设备 */
  DLDevice device;
  /*! \brief 张量的维数 */
  int32_t ndim;
  /*! \brief 指向数据类型的指针 */
  DLDataType dtype;
  /*! \brief 张量的形状 */
  int64_t* shape;
  /*!
   * \brief 张量的步长(以元素数而非字节为单位)
   *        可以为 NULL,表示张量是紧凑且按行主序排列。
   */
  int64_t* strides;
  /*! \brief 指向数据起始指针的字节偏移量 */
  uint64_t byte_offset;
} DLTensor;

/*!
 * \brief C 张量对象,管理 DLTensor 的内存。
 *        此数据结构旨在通过另一个框架借用 DLTensor。它不用于传输张量。
 *        当借用框架不再需要张量时,应调用 deleter 通知主机不再需要该资源。
 *
 * \note 此数据结构在 DLPack 交换中被用作 Legacy DLManagedTensor,并在 DLPack v0.8 后已弃用。
 *       推荐使用 DLManagedTensorVersioned 替代。
 *       此数据结构在未来版本中可能会被重命名或删除。
 *
 * \sa DLManagedTensorVersioned
 */
typedef struct DLManagedTensor {
  /*! \brief DLTensor which is being memory managed */
  DLTensor dl_tensor;
  /*! \brief the context of the original host framework of DLManagedTensor in
   *   which DLManagedTensor is used in the framework. It can also be NULL.
   */
  void * manager_ctx;
  /*!
   * \brief Destructor - this should be called
   * to destruct the manager_ctx which backs the DLManagedTensor. It can be
   * NULL if there is no way for the caller to provide a reasonable destructor.
   * The destructor deletes the argument self as well.
   */
  void (*deleter)(struct DLManagedTensor * self);
} DLManagedTensor;


// bit masks used in in the DLManagedTensorVersioned

/*! \brief bit mask to indicate that the tensor is read only. */
#define DLPACK_FLAG_BITMASK_READ_ONLY (1UL << 0UL)

/*!
 * \brief bit mask to indicate that the tensor is a copy made by the producer.
 *
 * If set, the tensor is considered solely owned throughout its lifetime by the
 * consumer, until the producer-provided deleter is invoked.
 */
#define DLPACK_FLAG_BITMASK_IS_COPIED (1UL << 1UL)


/*!
 * \brief A versioned and managed C Tensor object, manage memory of DLTensor.
 *
 * This data structure is intended to facilitate the borrowing of DLTensor by
 * another framework. It is not meant to transfer the tensor. When the borrowing
 * framework doesn't need the tensor, it should call the deleter to notify the
 * host that the resource is no longer needed.
 *
 * \note This is the current standard DLPack exchange data structure.
 */
typedef struct DLManagedTensorVersioned {
  /*!
   * \brief The API and ABI version of the current managed Tensor
   */
  DLPackVersion version;
  /*!
   * \brief the context of the original host framework.
   *
   * Stores DLManagedTensorVersioned is used in the
   * framework. It can also be NULL.
   */
  void *manager_ctx;
  /*!
   * \brief Destructor.
   *
   * This should be called to destruct manager_ctx which holds the DLManagedTensorVersioned.
   * It can be NULL if there is no way for the caller to provide a reasonable
   * destructor. The destructor deletes the argument self as well.
   */
  void (*deleter)(struct DLManagedTensorVersioned *self);
  /*!
   * \brief Additional bitmask flags information about the tensor.
   *
   * By default the flags should be set to 0.
   *
   * \note Future ABI changes should keep everything until this field
   *       stable, to ensure that deleter can be correctly called.
   *
   * \sa DLPACK_FLAG_BITMASK_READ_ONLY
   * \sa DLPACK_FLAG_BITMASK_IS_COPIED
   */
  uint64_t flags;
  /*! \brief DLTensor which is being memory managed */
  DLTensor dl_tensor;
} DLManagedTensorVersioned;


#ifdef __cplusplus
}  // DLPACK_EXTERN_C
#endif
#endif  // DLPACK_DLPACK_H_

.\numpy\numpy\_core\src\common\float_status.hpp

#ifndef NUMPY_CORE_SRC_COMMON_FLOAT_STATUS_HPP
#define NUMPY_CORE_SRC_COMMON_FLOAT_STATUS_HPP

#include "npstd.hpp"

#include <fenv.h>

namespace np {

/// @addtogroup cpp_core_utility
/// @{
/**
 * Class wraps floating-point environment operations,
 * provides lazy access to its functionality.
 */
class FloatStatus {
 public:
/*
 * According to the C99 standard FE_DIVBYZERO, etc. may not be provided when
 * unsupported.  In such cases NumPy will not report these correctly, but we
 * should still allow compiling (whether tests pass or not).
 * By defining them as 0 locally, we make them no-ops.  Unlike these defines,
 * for example `musl` still defines all of the functions (as no-ops):
 *     https://git.musl-libc.org/cgit/musl/tree/src/fenv/fenv.c
 * and does similar replacement in its tests:
 * http://nsz.repo.hu/git/?p=libc-test;a=blob;f=src/common/mtest.h;h=706c1ba23ea8989b17a2f72ed1a919e187c06b6a;hb=HEAD#l30
 */

#ifdef FE_DIVBYZERO
    // If FE_DIVBYZERO is defined in <fenv.h>, use its value; otherwise, use 0
    static constexpr int kDivideByZero = FE_DIVBYZERO;
#else
    static constexpr int kDivideByZero = 0;
#endif

#ifdef FE_INVALID
    // If FE_INVALID is defined in <fenv.h>, use its value; otherwise, use 0
    static constexpr int kInvalid = FE_INVALID;
#else
    static constexpr int kInvalid = 0;
#endif

#ifdef FE_INEXACT
    // If FE_INEXACT is defined in <fenv.h>, use its value; otherwise, use 0
    static constexpr int kInexact = FE_INEXACT;
#else
    static constexpr int kInexact = 0;
#endif

#ifdef FE_OVERFLOW
    // If FE_OVERFLOW is defined in <fenv.h>, use its value; otherwise, use 0
    static constexpr int kOverflow = FE_OVERFLOW;
#else
    static constexpr int kOverflow = 0;
#endif

#ifdef FE_UNDERFLOW
    // If FE_UNDERFLOW is defined in <fenv.h>, use its value; otherwise, use 0
    static constexpr int kUnderflow = FE_UNDERFLOW;
#else
    static constexpr int kUnderflow = 0;
#endif

    // Calculate the bitwise OR of all supported floating-point exceptions
    static constexpr int kAllExcept = (kDivideByZero | kInvalid | kInexact |
                                       kOverflow | kUnderflow);

    // Constructor initializes the floating-point status
    FloatStatus(bool clear_on_dst=true)
        : clear_on_dst_(clear_on_dst)
    {
        // If any floating-point exceptions are supported, fetch the current status
        if constexpr (kAllExcept != 0) {
            fpstatus_ = fetestexcept(kAllExcept);
        }
        else {
            fpstatus_ = 0;
        }
    }

    // Destructor clears floating-point exceptions if required
    ~FloatStatus()
    {
        // If any floating-point exceptions are supported and set, clear them
        if constexpr (kAllExcept != 0) {
            if (fpstatus_ != 0 && clear_on_dst_) {
                feclearexcept(kAllExcept);
            }
        }
    }

    // Check if Divide By Zero exception is set
    constexpr bool IsDivideByZero() const
    {
        return (fpstatus_ & kDivideByZero) != 0;
    }

    // Check if Inexact exception is set
    constexpr bool IsInexact() const
    {
        return (fpstatus_ & kInexact) != 0;
    }

    // Check if Invalid exception is set
    constexpr bool IsInvalid() const
    {
        return (fpstatus_ & kInvalid) != 0;
    }

    // Check if Overflow exception is set
    constexpr bool IsOverFlow() const
    {
        return (fpstatus_ & kOverflow) != 0;
    }

    // Check if Underflow exception is set
    constexpr bool IsUnderFlow() const
    {
        return (fpstatus_ & kUnderflow) != 0;
    }

    // Raise Divide By Zero exception
    static void RaiseDivideByZero()
    {
        if constexpr (kDivideByZero != 0) {
            feraiseexcept(kDivideByZero);
        }
    }

    // Raise Inexact exception
    static void RaiseInexact()
    {
        if constexpr (kInexact != 0) {
            feraiseexcept(kInexact);
        }
    }

    // Raise Invalid exception
    static void RaiseInvalid()
    {
        if constexpr (kInvalid != 0) {
            feraiseexcept(kInvalid);
        }
    }

    // End of class definition for FloatStatus
#endif
    }
    # 静态函数:触发溢出异常
    static void RaiseOverflow()
    {
        # 如果 kOverflow 不为零,则触发对应的浮点异常
        if constexpr (kOverflow != 0) {
            feraiseexcept(kOverflow);
        }
    }
    # 静态函数:触发下溢异常
    static void RaiseUnderflow()
    {
        # 如果 kUnderflow 不为零,则触发对应的浮点异常
        if constexpr (kUnderflow != 0) {
            feraiseexcept(kUnderflow);
        }
    }

  private:
    # 清除目标上的状态标志
    bool clear_on_dst_;
    # 浮点运算状态
    int fpstatus_;
};

/// @} cpp_core_utility
// 结束命名空间 np

} // namespace np

#endif // NUMPY_CORE_SRC_COMMON_FLOAT_STATUS_HPP
// 结束条件编译指令 NUMPY_CORE_SRC_COMMON_FLOAT_STATUS_HPP

.\numpy\numpy\_core\src\common\get_attr_string.h

#ifndef NUMPY_CORE_SRC_COMMON_GET_ATTR_STRING_H_
#define NUMPY_CORE_SRC_COMMON_GET_ATTR_STRING_H_

#include <Python.h>
#include "ufunc_object.h"

// 定义内联函数,用于检查是否为基本的 Python 类型
static inline npy_bool
_is_basic_python_type(PyTypeObject *tp)
{
    return (
        /* 基本的数值类型 */
        tp == &PyBool_Type ||
        tp == &PyLong_Type ||
        tp == &PyFloat_Type ||
        tp == &PyComplex_Type ||

        /* 基本的序列类型 */
        tp == &PyList_Type ||
        tp == &PyTuple_Type ||
        tp == &PyDict_Type ||
        tp == &PySet_Type ||
        tp == &PyFrozenSet_Type ||
        tp == &PyUnicode_Type ||
        tp == &PyBytes_Type ||

        /* 其他内建类型 */
        tp == &PySlice_Type ||
        tp == Py_TYPE(Py_None) ||
        tp == Py_TYPE(Py_Ellipsis) ||
        tp == Py_TYPE(Py_NotImplemented) ||

        /* TODO: ndarray,但是在此处我们无法看到 PyArray_Type */

        /* 结尾的哨兵,用于吸收末尾的 || */
        NPY_FALSE
    );
}


/*
 * 查找特殊方法,遵循 Python 的查找方式,查找类型对象而不是实例本身。
 *
 * 假设特殊方法是特定于 numpy 的,因此不查看内建类型。但会检查基本的 ndarray 和 numpy 标量类型。
 *
 * 未来可以更像 _Py_LookupSpecial 的实现。
 */
static inline PyObject *
PyArray_LookupSpecial(PyObject *obj, PyObject *name_unicode)
{
    PyTypeObject *tp = Py_TYPE(obj);

    /* 不需要在简单类型上检查特殊属性 */
    if (_is_basic_python_type(tp)) {
        return NULL;
    }
    
    // 获取类型对象 tp 的特定属性
    PyObject *res = PyObject_GetAttr((PyObject *)tp, name_unicode);

    // 处理属性获取异常
    if (res == NULL && PyErr_ExceptionMatches(PyExc_AttributeError)) {
        PyErr_Clear();
    }

    return res;
}


/*
 * PyArray_LookupSpecial_OnInstance:
 *
 * 实现了不正确的特殊方法查找规则,违反了 Python 的约定,查找实例而不是类型。
 *
 * 为了向后兼容而保留。未来应该弃用此功能。
 */
static inline PyObject *
PyArray_LookupSpecial_OnInstance(PyObject *obj, PyObject *name_unicode)
{
    PyTypeObject *tp = Py_TYPE(obj);

    /* 不需要在简单类型上检查特殊属性 */
    if (_is_basic_python_type(tp)) {
        return NULL;
    }

    // 获取实例对象 obj 的特定属性
    PyObject *res = PyObject_GetAttr(obj, name_unicode);

    // 处理属性获取异常
    if (res == NULL && PyErr_ExceptionMatches(PyExc_AttributeError)) {
        PyErr_Clear();
    }

    return res;
}

#endif  /* NUMPY_CORE_SRC_COMMON_GET_ATTR_STRING_H_ */

.\numpy\numpy\_core\src\common\gil_utils.c

#define NPY_NO_DEPRECATED_API NPY_API_VERSION
#define _MULTIARRAYMODULE


// 定义宏:禁用已废弃的 NumPy API,并使用当前版本的 API
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
// 定义宏:用于多维数组模块
#define _MULTIARRAYMODULE



#define PY_SSIZE_T_CLEAN
#include <Python.h>


// 定义宏:启用 "PY_SSIZE_T_CLEAN",确保所有使用 Py_ssize_t 类型的 API 都能正确清除内存
#define PY_SSIZE_T_CLEAN
// 引入 Python.h 头文件,包含了 Python C API 的核心功能
#include <Python.h>



#include <numpy/ndarraytypes.h>


// 引入 numpy/ndarraytypes.h 头文件,该文件定义了 NumPy 的数组类型和相关的宏定义
#include <numpy/ndarraytypes.h>



#include <stdarg.h>


// 引入 stdarg.h 头文件,提供了支持可变参数函数的宏定义和类型
#include <stdarg.h>



NPY_NO_EXPORT void
npy_gil_error(PyObject *type, const char *format, ...)
{
    va_list va;
    va_start(va, format);
    NPY_ALLOW_C_API_DEF;
    NPY_ALLOW_C_API;
    if (!PyErr_Occurred()) {
#if !defined(PYPY_VERSION)
        PyErr_FormatV(type, format, va);
#else
        PyObject *exc_str = PyUnicode_FromFormatV(format, va);
        if (exc_str == NULL) {
            // no reason to have special handling for this error case, since
            // this function sets an error anyway
            NPY_DISABLE_C_API;
            va_end(va);
            return;
        }
        PyErr_SetObject(type, exc_str);
        Py_DECREF(exc_str);
#endif
    }
    NPY_DISABLE_C_API;
    va_end(va);
}


// 函数定义:npy_gil_error,用于在释放全局解释器锁 (GIL) 期间出现错误时报告错误
NPY_NO_EXPORT void
npy_gil_error(PyObject *type, const char *format, ...)
{
    // 定义可变参数列表
    va_list va;
    // 初始化可变参数列表
    va_start(va, format);
    // 定义并允许 NumPy C API 使用
    NPY_ALLOW_C_API_DEF;
    NPY_ALLOW_C_API;
    // 如果没有已设置的异常
    if (!PyErr_Occurred()) {
        // 如果不是 PyPy 版本
#if !defined(PYPY_VERSION)
        // 使用格式化字符串和可变参数设置异常
        PyErr_FormatV(type, format, va);
#else
        // 否则,根据格式化字符串和可变参数创建 Python Unicode 字符串对象
        PyObject *exc_str = PyUnicode_FromFormatV(format, va);
        // 如果创建失败
        if (exc_str == NULL) {
            // 没有特别处理此错误情况的原因,因为该函数无论如何都会设置错误
            NPY_DISABLE_C_API;
            // 结束可变参数列表
            va_end(va);
            // 返回
            return;
        }
        // 将 Python 对象设置为异常对象
        PyErr_SetObject(type, exc_str);
        // 释放 Python 对象的引用计数
        Py_DECREF(exc_str);
#endif
    }
    // 禁用 NumPy C API 使用
    NPY_DISABLE_C_API;
    // 结束可变参数列表
    va_end(va);
}

.\numpy\numpy\_core\src\common\gil_utils.h

#ifndef NUMPY_CORE_SRC_COMMON_GIL_UTILS_H_
#define NUMPY_CORE_SRC_COMMON_GIL_UTILS_H_

#ifdef __cplusplus
extern "C" {
#endif

// 声明一个不导出的函数,用于处理 GIL 错误,接受一个异常类型和格式化字符串参数
NPY_NO_EXPORT void
npy_gil_error(PyObject *type, const char *format, ...);

#ifdef __cplusplus
}
#endif

#endif /* NUMPY_CORE_SRC_COMMON_GIL_UTILS_H_ */

.\numpy\numpy\_core\src\common\half.hpp

#ifndef NUMPY_CORE_SRC_COMMON_HALF_HPP
#define NUMPY_CORE_SRC_COMMON_HALF_HPP

#include "npstd.hpp"

#include "npy_cpu_dispatch.h" // NPY_HAVE_CPU_FEATURES
#include "half_private.hpp"

// TODO(@seiko2plus):
// - covers half-precision operations that being supported by numpy/halffloat.h
// - add support for arithmetic operations
// - enables __fp16 causes massive FP exceptions on aarch64,
//   needs a deep investigation

namespace np {

/// @addtogroup cpp_core_types
/// @{

/// Provides a type that implements 16-bit floating point (half-precision).
/// This type is ensured to be 16-bit size.
#if 1 // ndef __ARM_FP16_FORMAT_IEEE
class Half final {
  public:
    /// Whether `Half` has a full native HW support.
    static constexpr bool kNative = false;
    /// Whether `Half` has a native HW support for single/double conversion.
    template<typename T>
    static constexpr bool kNativeConversion = (
        (
            std::is_same_v<T, float> &&
        #if defined(NPY_HAVE_FP16) || defined(NPY_HAVE_VSX3)
            true
        #else
            false
        #endif
        ) || (
            std::is_same_v<T, double> &&
        #if defined(NPY_HAVE_AVX512FP16) || (defined(NPY_HAVE_VSX3) && defined(NPY_HAVE_VSX3_HALF_DOUBLE))
            true
        #else
            false
        #endif
        )
    );

    /// Default constructor. initialize nothing.
    Half() = default;

    /// Construct from float
    /// If there are no hardware optimization available, rounding will always
    /// be set to ties to even.
    explicit Half(float f)
    {
    #if defined(NPY_HAVE_FP16)
        // Load float `f` into SSE register
        __m128 mf = _mm_load_ss(&f);
        // Convert float to half-precision and store in bits_
        bits_ = static_cast<uint16_t>(_mm_cvtsi128_si32(_mm_cvtps_ph(mf, _MM_FROUND_TO_NEAREST_INT)));
    #elif defined(NPY_HAVE_VSX3) && defined(NPY_HAVE_VSX_ASM)
        // Load float `f` into vector register
        __vector float vf32 = vec_splats(f);
        __vector unsigned short vf16;
        // Convert float to half-precision using VSX3 instruction set
        __asm__ __volatile__ ("xvcvsphp %x0,%x1" : "=wa" (vf16) : "wa" (vf32));
        #ifdef __BIG_ENDIAN__
        bits_ = vec_extract(vf16, 1); // Extract half-precision value
        #else
        bits_ = vec_extract(vf16, 0); // Extract half-precision value
        #endif
    #else
        // Fallback: convert float `f` to half-precision using software implementation
        bits_ = half_private::FromFloatBits(BitCast<uint32_t>(f));
    #endif
    }

    /// Construct from double.
    /// If there are no hardware optimization available, rounding will always
    /// be set to ties to even.
    explicit Half(double f)
    {
    #if defined(NPY_HAVE_AVX512FP16)
        // Load double `f` into SSE register
        __m128d md = _mm_load_sd(&f);
        // Convert double to half-precision and store in bits_
        bits_ = static_cast<uint16_t>(_mm_cvtsi128_si32(_mm_castph_si128(_mm_cvtpd_ph(md))));
    #elif defined(NPY_HAVE_VSX3) && defined(NPY_HAVE_VSX3_HALF_DOUBLE)
        // Convert double `f` to half-precision using VSX3 instruction set
        __asm__ __volatile__ ("xscvdphp %x0,%x1" : "=wa" (bits_) : "wa" (f));
    #else
        // Fallback: convert double `f` to half-precision using software implementation
        bits_ = half_private::FromDoubleBits(BitCast<uint64_t>(f));
    #endif
    }

    /// Cast to float
    explicit operator float() const
    {
    #if defined(NPY_HAVE_FP16)
        float ret;
        // Convert half-precision to float
        _mm_store_ss(&ret, _mm_cvtph_ps(_mm_cvtsi32_si128(bits_)));
        return ret;
    ```
    #elif defined(NPY_HAVE_VSX3) && defined(vec_extract_fp_from_shorth)
        // 如果定义了 NPY_HAVE_VSX3 和 vec_extract_fp_from_shorth,使用它们来提取特定位数的浮点数
        return vec_extract(vec_extract_fp_from_shorth(vec_splats(bits_)), 0);
    #elif defined(NPY_HAVE_VSX3) && defined(NPY_HAVE_VSX_ASM)
        // 如果定义了 NPY_HAVE_VSX3 和 NPY_HAVE_VSX_ASM,使用内联汇编来执行类型转换操作
        __vector float vf32;
        __asm__ __volatile__("xvcvhpsp %x0,%x1"
                             : "=wa"(vf32)
                             : "wa"(vec_splats(bits_)));
        return vec_extract(vf32, 0);
    #else
        // 否则,使用 BitCast 进行类型转换并返回结果
        return BitCast<float>(half_private::ToFloatBits(bits_));
    #endif
    }

    /// Cast to double
    explicit operator double() const
    {
    #if defined(NPY_HAVE_AVX512FP16)
        // 如果定义了 NPY_HAVE_AVX512FP16,使用 AVX512 指令集进行半精度到双精度的转换
        double ret;
        _mm_store_sd(&ret, _mm_cvtph_pd(_mm_castsi128_ph(_mm_cvtsi32_si128(bits_))));
        return ret;
    #elif defined(NPY_HAVE_VSX3) && defined(NPY_HAVE_VSX3_HALF_DOUBLE)
        // 如果定义了 NPY_HAVE_VSX3 和 NPY_HAVE_VSX3_HALF_DOUBLE,使用 VSX3 指令集进行半精度到双精度的转换
        double f64;
        __asm__ __volatile__("xscvhpdp %x0,%x1"
                             : "=wa"(f64)
                             : "wa"(bits_));
        return f64;
    #else
        // 否则,使用 BitCast 进行类型转换并返回结果
        return BitCast<double>(half_private::ToDoubleBits(bits_));
    #endif
    }

    /// Returns a new Half constructed from the IEEE 754 binary16.
    static constexpr Half FromBits(uint16_t bits)
    {
        // 使用给定的 16 位表示构造一个新的 Half 类型对象
        Half h{};
        h.bits_ = bits;
        return h;
    }
    /// Returns the IEEE 754 binary16 representation.
    constexpr uint16_t Bits() const
    {
        // 返回当前 Half 对象的 16 位表示
        return bits_;
    }

    /// @name Comparison operators (ordered)
    /// @{
    constexpr bool operator==(Half r) const
    {
        // 比较两个 Half 对象是否相等,NaN 时返回 false
        return !(IsNaN() || r.IsNaN()) && Equal(r);
    }
    constexpr bool operator<(Half r) const
    {
        // 比较当前 Half 对象是否小于另一个 Half 对象,NaN 时返回 false
        return !(IsNaN() || r.IsNaN()) && Less(r);
    }
    constexpr bool operator<=(Half r) const
    {
        // 比较当前 Half 对象是否小于等于另一个 Half 对象,NaN 时返回 false
        return !(IsNaN() || r.IsNaN()) && LessEqual(r);
    }
    constexpr bool operator>(Half r) const
    {
        // 比较当前 Half 对象是否大于另一个 Half 对象,NaN 时返回通过反向比较结果
        return r < *this;
    }
    constexpr bool operator>=(Half r) const
    {
        // 比较当前 Half 对象是否大于等于另一个 Half 对象,NaN 时返回通过反向比较结果
        return r <= *this;
    }
    /// @}

    /// @name Comparison operators (unordered)
    /// @{
    constexpr bool operator!=(Half r) const
    {
        // 比较两个 Half 对象是否不相等,NaN 时返回 true
        return !(*this == r);
    }
    /// @} Comparison operators

    /// @name Comparison with no guarantee of NaN behavior
    /// @{
    constexpr bool Less(Half r) const
    {
        // 比较两个 Half 对象的大小,忽略 NaN 的特殊处理
        uint_fast16_t a = static_cast<uint_fast16_t>(bits_),
                      b = static_cast<uint_fast16_t>(r.bits_);
        bool sign_a = (a & 0x8000u) == 0x8000u;
        bool sign_b = (b & 0x8000u) == 0x8000u;
        // 如果两个数符号相同
        //   如果 a 有符号并且 a < b,则返回 true,并确保 a 不等于 b,以避免两者均为 +-0 的情况
        //   如果 a 无符号并且 a < b,则返回 true
        //   否则返回 false
        // 如果两个数符号不同
        //   如果 a 为负数,并且 a 和 b 不全为 0x8000u,则返回 true
        //   否则返回 false
        return (sign_a == sign_b) ? (sign_a ^ (a < b)) && (a != b)
                                  : sign_a && ((a | b) != 0x8000u);
    }
    // 定义一个 constexpr 方法 LessEqual,用于比较当前 Half 对象与参数 r 的大小关系
    constexpr bool LessEqual(Half r) const
    {
        // 将当前对象和参数 r 的 bits_ 转换为 uint_fast16_t 类型
        uint_fast16_t a = static_cast<uint_fast16_t>(bits_),
                      b = static_cast<uint_fast16_t>(r.bits_);
        // 检查当前对象和参数 r 的符号位
        bool sign_a = (a & 0x8000u) == 0x8000u;
        bool sign_b = (b & 0x8000u) == 0x8000u;
        
        // 如果符号相同
        //   测试 `a` > `b` 当 `a` 为正号时
        //       或者 `a` < `b` 当 `a` 不是正号时
        //       或者 a == b (即使上面使用了 <= 也需要测试 +-0 情况)
        // 否则
        //   测试 `a` 是否为正号
        //       或者 `a` 和 `b` 是否都等于 +-0.0
        return (sign_a == sign_b) ? (sign_a ^ (a < b)) || (a == b)
                                  : sign_a || ((a | b) == 0x8000u);
    }
    
    // 定义一个 constexpr 方法 Equal,用于比较当前 Half 对象与参数 r 是否相等
    constexpr bool Equal(Half r) const
    {
        // 不使用 fast16 转换,因为解包操作可能涉及
        uint16_t a = bits_, b = r.bits_;
        // 检查当前对象和参数 r 是否相等
        return a == b || ((a | b) == 0x8000u);
    }
    
    /// @} Comparison
    
    /// @name Properties
    // @{
    
    // 定义一个 constexpr 方法 IsNaN,用于检查当前 Half 对象是否表示 NaN
    constexpr bool IsNaN() const
    {
        // 检查 bits_ 是否符合 NaN 的二进制表示条件
        return ((bits_ & 0x7c00u) == 0x7c00u) &&
               ((bits_ & 0x03ffu) != 0);
    }
    /// @} Properties
    
  private:
    uint16_t bits_;
#else // __ARM_FP16_FORMAT_IEEE
// 如果不是使用 IEEE 格式的 ARM 浮点数,定义一个名为 Half 的类
class Half final {
  public:
    // 表示此类使用本机格式
    static constexpr bool kNative = true;
    // 模板,用于检查 T 是否可以转换为本机格式
    template<typename T>
    static constexpr bool kNativeConversion = (
        std::is_same_v<T, float> || std::is_same_v<T, double>
    );

    // 默认构造函数
    Half() = default;

    // 将 __fp16 转换为 Half 类型的构造函数
    constexpr Half(__fp16 h) : half_(h)
    {}

    // 将 Half 类型转换为 __fp16 类型的隐式转换操作符
    constexpr operator __fp16() const
    { return half_; }

    // 根据给定的位模式创建 Half 类型对象
    static Half FromBits(uint16_t bits)
    {
        Half h;
        h.half_ = BitCast<__fp16>(bits);
        return h;
    }

    // 返回当前 Half 对象的位模式
    uint16_t Bits() const
    { return BitCast<uint16_t>(half_); }

    // 比较当前对象与参数对象的大小关系
    constexpr bool Less(Half r) const
    { return half_ < r.half_; }

    // 比较当前对象与参数对象的大小关系(包含等于)
    constexpr bool LessEqual(Half r) const
    { return half_ <= r.half_; }

    // 检查当前对象是否等于参数对象
    constexpr bool Equal(Half r) const
    { return half_ == r.half_; }

    // 检查当前对象是否表示 NaN(Not a Number)
    constexpr bool IsNaN() const
    { return half_ != half_; }

  private:
    // 内部存储的 __fp16 类型的数据
    __fp16 half_;
};
#endif // __ARM_FP16_FORMAT_IEEE

/// @} cpp_core_types

} // namespace np

#endif // NUMPY_CORE_SRC_COMMON_HALF_HPP

.\numpy\numpy\_core\src\common\half_private.hpp

#ifndef NUMPY_CORE_SRC_COMMON_HALF_PRIVATE_HPP
#define NUMPY_CORE_SRC_COMMON_HALF_PRIVATE_HPP

#include "npstd.hpp"
#include "float_status.hpp"

/*
 * The following functions that emulating float/double/half conversions
 * are copied from npymath without any changes to its functionality.
 */
namespace np { namespace half_private {

template<bool gen_overflow=true, bool gen_underflow=true, bool round_even=true>
inline uint16_t FromFloatBits(uint32_t f)
{
    uint32_t f_exp, f_sig;
    uint16_t h_sgn, h_exp, h_sig;

    // Extract the sign bit from the float representation and move it to the appropriate position in half precision
    h_sgn = (uint16_t) ((f&0x80000000u) >> 16);
    // Extract the exponent bits from the float representation
    f_exp = (f&0x7f800000u);

    /* Exponent overflow/NaN converts to signed inf/NaN */
    if (f_exp >= 0x47800000u) {
        if (f_exp == 0x7f800000u) {
            /* Inf or NaN */
            // Extract the significand bits from the float representation
            f_sig = (f&0x007fffffu);
            if (f_sig != 0) {
                /* NaN - propagate the flag in the significand... */
                // Convert NaN to half precision format while preserving the NaN flag
                uint16_t ret = (uint16_t) (0x7c00u + (f_sig >> 13));
                /* ...but make sure it stays a NaN */
                if (ret == 0x7c00u) {
                    ret++;
                }
                return h_sgn + ret;
            } else {
                /* signed inf */
                return (uint16_t) (h_sgn + 0x7c00u);
            }
        } else {
            if constexpr (gen_overflow) {
                /* overflow to signed inf */
                // Raise overflow exception if enabled
                FloatStatus::RaiseOverflow();
            }
            return (uint16_t) (h_sgn + 0x7c00u);
        }
    }

    /* Exponent underflow converts to a subnormal half or signed zero */
    // 检查浮点数是否小于或等于 0x38000000u
    if (f_exp <= 0x38000000u) {
        /*
         * Signed zeros, subnormal floats, and floats with small
         * exponents all convert to signed zero half-floats.
         */
        // 处理有符号零、次正规化浮点数和小指数浮点数,它们都转换为有符号零的半精度浮点数
        if (f_exp < 0x33000000u) {
            if constexpr (gen_underflow) {
                /* 如果 f 不等于 0,则发生下溢到 0 */
                if ((f&0x7fffffff) != 0) {
                    FloatStatus::RaiseUnderflow();
                }
            }
            // 返回半精度浮点数的符号部分
            return h_sgn;
        }
        /* 构造次正规化数的尾数 */
        f_exp >>= 23;
        f_sig = (0x00800000u + (f&0x007fffffu));
        if constexpr (gen_underflow) {
            /* 如果它不是精确表示,则发生下溢 */
            if ((f_sig&(((uint32_t)1 << (126 - f_exp)) - 1)) != 0) {
                FloatStatus::RaiseUnderflow();
            }
        }
        /*
         * 通常尾数向右移动 13 位。对于次正规化数,需要额外的移动。
         * 这个移动是为了在最大指数给出次正规化的情况下 `f_exp = 0x38000000 >> 23 = 112`,
         * 这会偏移新的第一位。最多移动 1+10 位。
         */
        f_sig >>= (113 - f_exp);
        /* 处理四舍五入,通过将超过半精度的位数加 1 */
        if constexpr (round_even) {
            /*
             * 如果半精度尾数的最后一位是 0(已经是偶数),并且剩余的位模式是 1000...0,
             * 则我们不会向半精度尾数后的位添加一。然而,(113 - f_exp) 移位最多可能丢失 11 位,
             * 所以 || 检查它们在原始中。在所有其他情况下,我们可以直接加一。
             */
            if (((f_sig&0x00003fffu) != 0x00001000u) || (f&0x000007ffu)) {
                f_sig += 0x00001000u;
            }
        }
        else {
            f_sig += 0x00001000u;
        }
        h_sig = (uint16_t) (f_sig >> 13);
        /*
         * 如果舍入导致一个位溢出到 h_exp,它将从零增加到一,并且 h_sig 将为零。
         * 这是正确的结果。
         */
        return (uint16_t) (h_sgn + h_sig);
    }

    /* 常规情况,没有溢出或下溢 */
    h_exp = (uint16_t) ((f_exp - 0x38000000u) >> 13);
    /* 处理四舍五入,通过将超过半精度的位数加 1 */
    f_sig = (f&0x007fffffu);
    if constexpr (round_even) {
        /*
         * 如果半精度尾数的最后一位是 0(已经是偶数),并且剩余的位模式是 1000...0,
         * 则我们不会向半精度尾数后的位添加一。在所有其他情况下,我们会添加一。
         */
        if ((f_sig&0x00003fffu) != 0x00001000u) {
            f_sig += 0x00001000u;
        }
    }
    else {
        f_sig += 0x00001000u;
    }
    h_sig = (uint16_t) (f_sig >> 13);
    /*
     * 如果四舍五入导致一个位溢出到 h_exp 中,h_exp 将增加一,并且 h_sig 将变为零。
     * 这是正确的结果。h_exp 最多可能增加到 15,此时结果将溢出到有符号的无穷大。
     */
    if constexpr (gen_overflow) {
        // 如果生成的结果溢出,将 h_sig 加到 h_exp 上
        h_sig += h_exp;
        // 如果 h_sig 等于 0x7c00u(十进制为 31744),表示结果溢出到正无穷大
        if (h_sig == 0x7c00u) {
            // 触发浮点溢出状态
            FloatStatus::RaiseOverflow();
        }
        // 返回带符号的结果:符号位 + h_sig
        return h_sgn + h_sig;
    }
    else {
        // 返回带符号的结果:符号位 + h_exp + h_sig
        return h_sgn + h_exp + h_sig;
    }
    // 从双精度浮点数表示中提取出半精度浮点数
    template<bool gen_overflow=true, bool gen_underflow=true, bool round_even=true>
    inline uint16_t FromDoubleBits(uint64_t d)
    {
        uint64_t d_exp, d_sig;
        uint16_t h_sgn, h_exp, h_sig;

        // 提取符号位并右移至对应半精度浮点数的位置
        h_sgn = (d & 0x8000000000000000ULL) >> 48;
        // 提取双精度浮点数的指数位
        d_exp = (d & 0x7ff0000000000000ULL);

        /* Exponent overflow/NaN converts to signed inf/NaN */
        // 如果指数大于等于最大半精度浮点数的指数范围,则处理溢出或 NaN
        if (d_exp >= 0x40f0000000000000ULL) {
            if (d_exp == 0x7ff0000000000000ULL) {
                /* Inf or NaN */
                // 提取双精度浮点数的有效位
                d_sig = (d & 0x000fffffffffffffULL);
                if (d_sig != 0) {
                    /* NaN - 将标志传播到尾数... */
                    uint16_t ret = (uint16_t)(0x7c00u + (d_sig >> 42));
                    /* ...但确保它仍然是 NaN */
                    if (ret == 0x7c00u) {
                        ret++;
                    }
                    return h_sgn + ret;
                } else {
                    /* signed inf */
                    // 返回带符号的无穷大
                    return h_sgn + 0x7c00u;
                }
            } else {
                /* overflow to signed inf */
                // 指数溢出转换为带符号的无穷大
                if constexpr (gen_overflow) {
                    FloatStatus::RaiseOverflow();  // 如果允许溢出生成,则引发溢出状态
                }
                return h_sgn + 0x7c00u;
            }
        }

        /* Exponent underflow converts to subnormal half or signed zero */
        // 指数下溢转换为次正规化的半精度浮点数或带符号零
    // 如果双精度浮点数的指数小于或等于 0x3f00000000000000ULL
    if (d_exp <= 0x3f00000000000000ULL) {
        /*
         * 对于有符号零、次标准浮点数和指数较小的浮点数,它们都转换为有符号零的半精度浮点数。
         */
        if (d_exp < 0x3e60000000000000ULL) {
            // 如果生成下溢时为真
            if constexpr (gen_underflow) {
                /* 如果 d 不等于 0,则它下溢到 0 */
                if ((d&0x7fffffffffffffffULL) != 0) {
                    FloatStatus::RaiseUnderflow();
                }
            }
            // 返回有符号的零半精度浮点数
            return h_sgn;
        }
        /* 构造次标准的尾数 */
        d_exp >>= 52;
        d_sig = (0x0010000000000000ULL + (d&0x000fffffffffffffULL));
        // 如果生成下溢时为真
        if constexpr (gen_underflow) {
            /* 如果它不能被精确表示,则它下溢了 */
            if ((d_sig&(((uint64_t)1 << (1051 - d_exp)) - 1)) != 0) {
                FloatStatus::RaiseUnderflow();
            }
        }
        /*
         * 不像浮点数,双精度浮点数有足够的空间将尾数左移以对齐次标准尾数,不会丢失最后的位。
         * 给出次标准的最小可能指数是:
         * `d_exp = 0x3e60000000000000 >> 52 = 998`。所有更大的次标准都相对于它做了偏移。
         * 这在与正常分支中的右移比较时增加了 10+1 位的偏移。
         */
        assert(d_exp - 998 >= 0);
        d_sig <<= (d_exp - 998);
        /* 通过在超过半精度后添加 1 来处理舍入 */
        if constexpr (round_even) {
            /*
             * 如果半精度尾数中的最后一位是 0(已经是偶数),并且剩余的位模式是 1000...0,
             * 那么我们不在半精度尾数后面加一。在所有其他情况下,我们加一。
             */
            if ((d_sig&0x003fffffffffffffULL) != 0x0010000000000000ULL) {
                d_sig += 0x0010000000000000ULL;
            }
        }
        else {
            d_sig += 0x0010000000000000ULL;
        }
        h_sig = (uint16_t) (d_sig >> 53);
        /*
         * 如果舍入导致位溢出到 h_exp,它将从零增加到一,并且 h_sig 将为零。
         * 这是正确的结果。
         */
        return h_sgn + h_sig;
    }

    /* 普通情况,没有溢出或下溢 */
    h_exp = (uint16_t) ((d_exp - 0x3f00000000000000ULL) >> 42);
    /* 通过在超过半精度后添加 1 来处理舍入 */
    d_sig = (d&0x000fffffffffffffULL);
    if constexpr (round_even) {
        /*
         * 如果半精度尾数中的最后一位是 0(已经是偶数),并且剩余的位模式是 1000...0,
         * 那么我们不在半精度尾数后面加一。在所有其他情况下,我们加一。
         */
        if ((d_sig&0x000007ffffffffffULL) != 0x0000020000000000ULL) {
            d_sig += 0x0000020000000000ULL;
        }
    }
    else {
        d_sig += 0x0000020000000000ULL;
    }
    // 将 d_sig 右移 42 位,并将结果转换为 uint16_t 类型,存入 h_sig 中
    h_sig = (uint16_t) (d_sig >> 42);

    /*
     * 如果舍入导致一个比特溢出到 h_exp 中,h_exp 将增加一,并且 h_sig 将变为零。
     * 这是正确的结果。h_exp 最多可能增加到 15,此时结果会溢出为带符号的无穷大。
     */
    // 如果编译时条件 gen_overflow 为真
    if constexpr (gen_overflow) {
        // 将 h_sig 与 h_exp 相加
        h_sig += h_exp;
        // 如果 h_sig 等于 0x7c00u
        if (h_sig == 0x7c00u) {
            // 触发浮点溢出异常
            FloatStatus::RaiseOverflow();
        }
        // 返回 h_sgn + h_sig 的结果
        return h_sgn + h_sig;
    }
    else {
        // 返回 h_sgn + h_exp + h_sig 的结果
        return h_sgn + h_exp + h_sig;
    }
constexpr uint32_t ToFloatBits(uint16_t h)
{
    // 提取 h 的指数部分
    uint16_t h_exp = (h&0x7c00u);
    // 提取 h 的符号位,并左移 16 位
    uint32_t f_sgn = ((uint32_t)h&0x8000u) << 16;
    switch (h_exp) {
        case 0x0000u: { // 0 或者亚正规化数
            // 提取 h 的尾数部分
            uint16_t h_sig = (h&0x03ffu);
            // 如果尾数部分为零,返回符号位
            if (h_sig == 0) {
                return f_sgn;
            }
            // 亚正规化数,尾数部分左移一位直到最高位为 1
            h_sig <<= 1;
            while ((h_sig&0x0400u) == 0) {
                h_sig <<= 1;
                h_exp++;
            }
            // 计算浮点数的指数部分,尾数部分,和符号位
            uint32_t f_exp = ((uint32_t)(127 - 15 - h_exp)) << 23;
            uint32_t f_sig = ((uint32_t)(h_sig&0x03ffu)) << 13;
            return f_sgn + f_exp + f_sig;
        }
        case 0x7c00u: // 无穷大或者 NaN
            // 全 1 的指数部分和尾数部分的拷贝
            return f_sgn + 0x7f800000u + (((uint32_t)(h&0x03ffu)) << 13);
        default: // 规格化数
            // 只需要调整指数并左移
            return f_sgn + (((uint32_t)(h&0x7fffu) + 0x1c000u) << 13);
    }
}

constexpr uint64_t ToDoubleBits(uint16_t h)
{
    // 提取 h 的指数部分
    uint16_t h_exp = (h&0x7c00u);
    // 提取 h 的符号位,并左移 48 位
    uint64_t d_sgn = ((uint64_t)h&0x8000u) << 48;
    switch (h_exp) {
        case 0x0000u: { // 0 或者亚正规化数
            // 提取 h 的尾数部分
            uint16_t h_sig = (h&0x03ffu);
            // 如果尾数部分为零,返回符号位
            if (h_sig == 0) {
                return d_sgn;
            }
            // 亚正规化数,尾数部分左移一位直到最高位为 1
            h_sig <<= 1;
            while ((h_sig&0x0400u) == 0) {
                h_sig <<= 1;
                h_exp++;
            }
            // 计算双精度浮点数的指数部分,尾数部分,和符号位
            uint64_t d_exp = ((uint64_t)(1023 - 15 - h_exp)) << 52;
            uint64_t d_sig = ((uint64_t)(h_sig&0x03ffu)) << 42;
            return d_sgn + d_exp + d_sig;
        }
        case 0x7c00u: // 无穷大或者 NaN
            // 全 1 的指数部分和尾数部分的拷贝
            return d_sgn + 0x7ff0000000000000ULL + (((uint64_t)(h&0x03ffu)) << 42);
        default: // 规格化数
            // 只需要调整指数并左移
            return d_sgn + (((uint64_t)(h&0x7fffu) + 0xfc000u) << 42);
    }
}

.\numpy\numpy\_core\src\common\lowlevel_strided_loops.h

// 检查是否定义了 NUMPY_CORE_SRC_COMMON_LOWLEVEL_STRIDED_LOOPS_H_,如果没有则定义
#ifndef NUMPY_CORE_SRC_COMMON_LOWLEVEL_STRIDED_LOOPS_H_
// 宏定义,用于导入相关头文件
#define NUMPY_CORE_SRC_COMMON_LOWLEVEL_STRIDED_LOOPS_H_
// 导入通用头文件
#include "common.h"
// 导入 numpy 配置文件
#include "npy_config.h"
// 导入数组方法头文件
#include "array_method.h"
// 导入数据类型转换头文件
#include "dtype_transfer.h"
// 导入内存重叠头文件
#include "mem_overlap.h"
// 导入映射头文件
#include "mapping.h"

/* For PyArray_ macros used below */
// 导入 ndarray 对象头文件
#include "numpy/ndarrayobject.h"

/*
 * 注意:此 API 目前应保持私有,以便进行进一步的细化。
 * 我认为 'aligned' 机制需要进行更改,例如。
 *
 * 注意:2018 年进行了更新,以区分 "true""uint" 对齐。
 */

/*
 * 此函数指针用于输入任意跨步的一维数组段并输出相同大小的任意跨步的数组段的一元操作。
 * 当步长或项目大小具有特定已知值时,它可以是完全通用的函数或专用函数。
 *
 * 一元操作的示例包括直接复制、字节交换和强制转换操作,
 *
 * 'transferdata' 参数略微特殊,遵循在 ndarraytypes.h 中定义的通用辅助数据模式
 * 使用 NPY_AUXDATA_CLONE 和 NPY_AUXDATA_FREE 处理这些数据。
 */
// TODO: FIX! That comment belongs to something now in array-method

/*
 * 这是用于指向与 PyArrayMethod_StridedLoop 完全相同的函数指针,
 * 但具有额外的掩码来控制被转换的值。
 *
 * TODO:我们应该将这个掩码“功能”移动到 ArrayMethod 本身去
 * 可能。虽然对于 NumPy 内部的事情来说,这个方法运作良好,并且暴露它应该经过深思熟虑,
 * 以便在可能的情况下有用于 NumPy 之外。
 *
 * 特别地,如果 mask[i*mask_stride] 为真,则对第 'i' 元素进行操作。
 */
typedef int (PyArray_MaskedStridedUnaryOp)(
        PyArrayMethod_Context *context, char *const *args,
        const npy_intp *dimensions, const npy_intp *strides,
        npy_bool *mask, npy_intp mask_stride,
        NpyAuxData *auxdata);

/*
 * 返回一个指向用于复制步进内存的专用函数的函数指针。如果输入出现问题,则返回 NULL。
 *
 * aligned:
 *      如果 src 和 dst 指针总是指向与 dtype->elsize 相等的 uint 对齐的位置,则为1,否则为0。
 * src_stride:
 *      如果 src 步幅总是相同的,则应为 src 步幅,否则为 NPY_MAX_INTP。
 * dst_stride:
 *      如果 dst 步幅总是相同的,则应为 dst 步幅,否则为 NPY_MAX_INTP。
 * itemsize:
 *      如果项目大小总是相同的,则应为项目大小,否则为0。
 *
 */
// 导出函数
NPY_NO_EXPORT PyArrayMethod_StridedLoop *
PyArray_GetStridedCopyFn(int aligned,
                        npy_intp src_stride, npy_intp dst_stride,
                        npy_intp itemsize);
/*
 * 返回一个指向特定函数的函数指针,用于复制和交换步进内存。假设每个元素都是单个需要交换的值。
 * 这个函数假设了步进内存是对齐的,并使用了给定的 src_stride 和 dst_stride 参数。
 * 
 * 参数和 PyArray_GetStridedCopyFn 中描述的一样。
 */
NPY_NO_EXPORT PyArrayMethod_StridedLoop *
PyArray_GetStridedCopySwapFn(int aligned,
                            npy_intp src_stride, npy_intp dst_stride,
                            npy_intp itemsize);

/*
 * 返回一个指向特定函数的函数指针,用于复制和交换步进内存。假设每个元素是一对需要交换的值。
 * 这个函数假设了步进内存是对齐的,并使用了给定的 src_stride 和 dst_stride 参数。
 * 
 * 参数和 PyArray_GetStridedCopyFn 中描述的一样。
 */
NPY_NO_EXPORT PyArrayMethod_StridedLoop *
PyArray_GetStridedCopySwapPairFn(int aligned,
                            npy_intp src_stride, npy_intp dst_stride,
                            npy_intp itemsize);

/*
 * 返回一个传输函数和传输数据对,将数据从源复制到目标,如果数据不适合,则截断它,并且如果空间过多,则用零字节填充。
 * 这个函数假设了步进内存是对齐的,并使用了给定的 src_stride 和 dst_stride 参数。
 * 
 * 参数和 PyArray_GetStridedCopyFn 中描述的一样。
 * 
 * 返回 NPY_SUCCEED 或 NPY_FAIL。
 */
NPY_NO_EXPORT int
PyArray_GetStridedZeroPadCopyFn(int aligned, int unicode_swap,
                            npy_intp src_stride, npy_intp dst_stride,
                            npy_intp src_itemsize, npy_intp dst_itemsize,
                            PyArrayMethod_StridedLoop **outstransfer,
                            NpyAuxData **outtransferdata);

/*
 * 对于内置数值类型之间的转换,返回一个函数指针,用于从 src_type_num 转换到 dst_type_num。
 * 如果不支持某个转换,则返回 NULL 而不设置 Python 异常。
 */
NPY_NO_EXPORT PyArrayMethod_StridedLoop *
PyArray_GetStridedNumericCastFn(int aligned,
                            npy_intp src_stride, npy_intp dst_stride,
                            int src_type_num, int dst_type_num);

/*
 * 获取一个操作,该操作复制给定 dtype 的元素,如果 dtype 不是 NBO 则进行字节顺序交换。
 * 
 * 返回 NPY_SUCCEED 或 NPY_FAIL。
 */
NPY_NO_EXPORT int
PyArray_GetDTypeCopySwapFn(int aligned,
                            npy_intp src_stride, npy_intp dst_stride,
                            PyArray_Descr *dtype,
                            PyArrayMethod_StridedLoop **outstransfer,
                            NpyAuxData **outtransferdata);
/*
 * If it's possible, gives back a transfer function which casts and/or
 * byte swaps data with the dtype 'src_dtype' into data with the dtype
 * 'dst_dtype'.  If the outtransferdata is populated with a non-NULL value,
 * it must be deallocated with the NPY_AUXDATA_FREE
 * function when the transfer function is no longer required.
 *
 * aligned:
 *      Should be 1 if the src and dst pointers always point to
 *      locations at which a uint of equal size to dtype->elsize
 *      would be aligned, 0 otherwise.
 * src_stride:
 *      Should be the src stride if it will always be the same,
 *      NPY_MAX_INTP otherwise.
 * dst_stride:
 *      Should be the dst stride if it will always be the same,
 *      NPY_MAX_INTP otherwise.
 * src_dtype:
 *      The data type of source data. Must not be NULL.
 * dst_dtype:
 *      The data type of destination data.  If this is NULL and
 *      move_references is 1, a transfer function which decrements
 *      source data references is produced.
 * move_references:
 *      If 0, the destination data gets new reference ownership.
 *      If 1, the references from the source data are moved to
 *      the destination data.
 * cast_info:
 *      A pointer to an (uninitialized) `NPY_cast_info` struct, the caller
 *      must call `NPY_cast_info_xfree` on it (except on error) and handle
 *      its memory livespan.
 * out_needs_api:
 *      If this is non-NULL, and the transfer function produced needs
 *      to call into the (Python) API, this gets set to 1.  This
 *      remains untouched if no API access is required.
 *
 * WARNING: If you set move_references to 1, it is best that src_stride is
 *          never zero when calling the transfer function.  Otherwise, the
 *          first destination reference will get the value and all the rest
 *          will get NULL.
 *
 * Returns NPY_SUCCEED or NPY_FAIL.
 */
NPY_NO_EXPORT int
PyArray_GetDTypeTransferFunction(int aligned,
                            npy_intp src_stride, npy_intp dst_stride,
                            PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype,
                            int move_references,
                            NPY_cast_info *cast_info,
                            NPY_ARRAYMETHOD_FLAGS *out_flags);

/*
 * If it's possible, gives back a transfer function which copies fields
 * from one structured dtype to another, optionally moving references.
 *
 * aligned:
 *      Should be 1 if the src and dst pointers always point to
 *      locations at which a uint of equal size to dtype->elsize
 *      would be aligned, 0 otherwise.
 * src_stride:
 *      Should be the src stride if it will always be the same,
 *      NPY_MAX_INTP otherwise.
 * dst_stride:
 *      Should be the dst stride if it will always be the same,
 *      NPY_MAX_INTP otherwise.
 * src_dtype:
 *      The data type of source data. Must not be NULL.
 * dst_dtype:
 *      The data type of destination data.  If this is NULL and
 *      move_references is 1, a transfer function which decrements
 *      source data references is produced.
 * move_references:
 *      If 0, the destination data gets new reference ownership.
 *      If 1, the references from the source data are moved to
 *      the destination data.
 * out_stransfer:
 *      A pointer to a `PyArrayMethod_StridedLoop` struct pointer,
 *      which will contain the transfer function upon successful return.
 * out_transferdata:
 *      A pointer to an `NpyAuxData` struct pointer, which will
 *      contain auxiliary data needed for the transfer function.
 * out_flags:
 *      Pointer to `NPY_ARRAYMETHOD_FLAGS` where the flags describing
 *      the transfer function will be stored.
 *
 * Returns NPY_SUCCEED or NPY_FAIL.
 */
NPY_NO_EXPORT int
get_fields_transfer_function(int aligned,
        npy_intp src_stride, npy_intp dst_stride,
        PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype,
        int move_references,
        PyArrayMethod_StridedLoop **out_stransfer,
        NpyAuxData **out_transferdata,
        NPY_ARRAYMETHOD_FLAGS *out_flags);

/*
 * If it's possible, gives back a transfer function which handles
 * subarray assignment from one structured dtype to another,
 * optionally moving references.
 *
 * aligned:
 *      Should be 1 if the src and dst pointers always point to
 *      locations at which a uint of equal size to dtype->elsize
 *      would be aligned, 0 otherwise.
 * src_stride:
 *      Should be the src stride if it will always be the same,
 *      NPY_MAX_INTP otherwise.
 * dst_stride:
 *      Should be the dst stride if it will always be the same,
 *      NPY_MAX_INTP otherwise.
 * src_dtype:
 *      The data type of source data. Must not be NULL.
 * dst_dtype:
 *      The data type of destination data.  If this is NULL and
 *      move_references is 1, a transfer function which decrements
 *      source data references is produced.
 * move_references:
 *      If 0, the destination data gets new reference ownership.
 *      If 1, the references from the source data are moved to
 *      the destination data.
 * out_stransfer:
 *      A pointer to a `PyArrayMethod_StridedLoop` struct pointer,
 *      which will contain the transfer function upon successful return.
 * out_transferdata:
 *      A pointer to an `NpyAuxData` struct pointer, which will
 *      contain auxiliary data needed for the transfer function.
 * out_flags:
 *      Pointer to `NPY_ARRAYMETHOD_FLAGS` where the flags describing
 *      the transfer function will be stored.
 *
 * Returns NPY_SUCCEED or NPY_FAIL.
 */
NPY_NO_EXPORT int
get_subarray_transfer_function(int aligned,
        npy_intp src_stride, npy_intp dst_stride,
        PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype,
        int move_references,
        PyArrayMethod_StridedLoop **out_stransfer,
        NpyAuxData **out_transferdata,
        NPY_ARRAYMETHOD_FLAGS *out_flags);
/*
 * This is identical to PyArray_GetDTypeTransferFunction, but returns a
 * transfer function which also takes a mask as a parameter.  The mask is used
 * to determine which values to copy, and data is transferred exactly when
 * mask[i*mask_stride] is true.
 *
 * If move_references is true, values which are not copied to the
 * destination will still have their source reference decremented.
 *
 * If mask_dtype is NPY_BOOL or NPY_UINT8, each full element is either
 * transferred or not according to the mask as described above. If
 * dst_dtype and mask_dtype are both struct dtypes, their names must
 * match exactly, and the dtype of each leaf field in mask_dtype must
 * be either NPY_BOOL or NPY_UINT8.
 */
NPY_NO_EXPORT int
PyArray_GetMaskedDTypeTransferFunction(int aligned,
                            npy_intp src_stride,
                            npy_intp dst_stride,
                            npy_intp mask_stride,
                            PyArray_Descr *src_dtype,
                            PyArray_Descr *dst_dtype,
                            PyArray_Descr *mask_dtype,
                            int move_references,
                            NPY_cast_info *cast_info,
                            NPY_ARRAYMETHOD_FLAGS *out_flags);
/*
 * Casts the specified number of elements from 'src' with data type
 * 'src_dtype' to 'dst' with 'dst_dtype'. See
 * PyArray_GetDTypeTransferFunction for more details.
 *
 * Returns NPY_SUCCEED or NPY_FAIL.
 */
NPY_NO_EXPORT int
PyArray_CastRawArrays(npy_intp count,
                      char *src, char *dst,
                      npy_intp src_stride, npy_intp dst_stride,
                      PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype,
                      int move_references);
/*
 * 这两个函数用于复制或转换一个n维数组的数据到/从一个一维跨步缓冲区中。
 * 这些函数仅会使用提供的dst_stride/src_stride和dst_strides[0]/src_strides[0]调用'stransfer',
 * 因此调用者可以使用这些值来专门化函数。
 * 注意,即使ndim == 0,也需要设置所有内容,就好像ndim == 1一样。
 *
 * 返回值是无法复制的元素数量。返回值为0表示已复制所有元素,返回值大于0表示在复制'count'个元素之前到达了n维数组的末尾。
 * 返回值为负表示发生了错误。
 *
 * ndim:
 *      n维数组的维度数。
 * dst/src/mask:
 *      目标、源或掩码的起始指针。
 * dst_stride/src_stride/mask_stride:
 *      一维跨步缓冲区的跨步。
 * dst_strides/src_strides:
 *      n维数组的跨步。
 * dst_strides_inc/src_strides_inc:
 *      添加到..._strides指针以获取下一个跨步的增量。
 * coords:
 *      n维数组中的起始坐标。
 * coords_inc:
 *      添加到坐标指针以获取下一个坐标的增量。
 * shape:
 *      n维数组的形状。
 * shape_inc:
 *      添加到形状指针以获取下一个形状条目的增量。
 * count:
 *      要传输的元素数量。
 * src_itemsize:
 *      每个元素的大小。如果在不同大小的元素之间传输(例如强制转换操作),则'stransfer'函数应针对此进行专门化,
 *      在这种情况下,'stransfer'将使用此参数作为源项大小。
 * cast_info:
 *      指向NPY_cast_info结构的指针,该结构概述了执行强制转换所需的所有信息。
 */
NPY_NO_EXPORT npy_intp
PyArray_TransferNDimToStrided(npy_intp ndim,
                char *dst, npy_intp dst_stride,
                char *src, npy_intp const *src_strides, npy_intp src_strides_inc,
                npy_intp const *coords, npy_intp coords_inc,
                npy_intp const *shape, npy_intp shape_inc,
                npy_intp count, npy_intp src_itemsize,
                NPY_cast_info *cast_info);

/*
 * 将一维跨步缓冲区的数据传输到n维数组中的这个函数。
 * 这些函数仅会使用提供的dst_strides/src_strides和dst_strides_inc调用'stransfer',
 * 因此调用者可以使用这些值来专门化函数。
 *
 * 返回值是无法复制的元素数量。返回值为0表示已复制所有元素,返回值大于0表示在复制'count'个元素之前到达了n维数组的末尾。
 * 返回值为负表示发生了错误。
 *
 * ndim:
 *      n维数组的维度数。
 * dst/src/mask:
 *      目标、源或掩码的起始指针。
 * dst_strides/src_strides:
 *      n维数组的跨步。
 * dst_strides_inc:
 *      添加到dst_strides指针以获取下一个跨步的增量。
 * src_stride:
 *      一维跨步缓冲区的跨步。
 * coords:
 *      n维数组中的起始坐标。
 * coords_inc:
 *      添加到坐标指针以获取下一个坐标的增量。
 * shape:
 *      n维数组的形状。
 * shape_inc:
 *      添加到形状指针以获取下一个形状条目的增量。
 * count:
 *      要传输的元素数量。
 * src_itemsize:
 *      每个元素的大小。如果在不同大小的元素之间传输(例如强制转换操作),则'stransfer'函数应针对此进行专门化,
 *      在这种情况下,'stransfer'将使用此参数作为源项大小。
 * cast_info:
 *      指向NPY_cast_info结构的指针,该结构概述了执行强制转换所需的所有信息。
 */
NPY_NO_EXPORT npy_intp
PyArray_TransferStridedToNDim(npy_intp ndim,
                char *dst, npy_intp const *dst_strides, npy_intp dst_strides_inc,
                char *src, npy_intp src_stride,
                npy_intp const *coords, npy_intp coords_inc,
                npy_intp const *shape, npy_intp shape_inc,
                npy_intp count, npy_intp src_itemsize,
                NPY_cast_info *cast_info);
/*
 * Transfers data from a strided array to an N-dimensional array with optional masking.
 * Coordinates, strides, shapes, and casting information are provided to control the transfer.
 * Returns void.
 */
PyArray_TransferMaskedStridedToNDim(npy_intp ndim,
                char *dst, npy_intp const *dst_strides, npy_intp dst_strides_inc,
                char *src, npy_intp src_stride,
                npy_bool *mask, npy_intp mask_stride,
                npy_intp const *coords, npy_intp coords_inc,
                npy_intp const *shape, npy_intp shape_inc,
                npy_intp count, npy_intp src_itemsize,
                NPY_cast_info *cast_info);

/*
 * Retrieves values from a simple mapping iterator at specified indices, storing results.
 * Handles alignment and casting information provided.
 * Returns an integer indicating success (0) or failure (-1).
 */
NPY_NO_EXPORT int
mapiter_trivial_get(
        PyArrayObject *self, PyArrayObject *ind, PyArrayObject *result,
        int is_aligned, NPY_cast_info *cast_info);

/*
 * Sets values into a simple mapping iterator at specified indices, using given data.
 * Handles alignment and casting information provided.
 * Returns an integer indicating success (0) or failure (-1).
 */
NPY_NO_EXPORT int
mapiter_trivial_set(
        PyArrayObject *self, PyArrayObject *ind, PyArrayObject *result,
        int is_aligned, NPY_cast_info *cast_info);

/*
 * Retrieves values from a more complex mapping iterator object.
 * Handles casting and alignment considerations, along with specified flags.
 * Returns an integer indicating success (0) or failure (-1).
 */
NPY_NO_EXPORT int
mapiter_get(
        PyArrayMapIterObject *mit, NPY_cast_info *cast_info,
        NPY_ARRAYMETHOD_FLAGS flags, int is_aligned);

/*
 * Sets values into a more complex mapping iterator object.
 * Handles casting and alignment considerations, along with specified flags.
 * Returns an integer indicating success (0) or failure (-1).
 */
NPY_NO_EXPORT int
mapiter_set(
        PyArrayMapIterObject *mit, NPY_cast_info *cast_info,
        NPY_ARRAYMETHOD_FLAGS flags, int is_aligned);

/*
 * Prepares shape and strides for a simple raw array iteration.
 * Orders strides into FORTRAN order, reverses negative strides, and coalesces axes where possible.
 * Returns 0 on success, -1 on failure.
 *
 * Intended for lightweight iteration over raw arrays without PyArrayObject buffering.
 * Used together with NPY_RAW_ITER_START and NPY_RAW_ITER_ONE_NEXT for loop handling.
 */
NPY_NO_EXPORT int
PyArray_PrepareOneRawArrayIter(int ndim, npy_intp const *shape,
                            char *data, npy_intp const *strides,
                            int *out_ndim, npy_intp *out_shape,
                            char **out_data, npy_intp *out_strides);

/*
 * Prepares shape and strides for two operands' raw array iteration.
 * Only uses strides of the first operand for dimension reordering.
 * Returns 0 on success, -1 on failure.
 *
 * Assumes operands have already been broadcasted.
 * Used with NPY_RAW_ITER_START and NPY_RAW_ITER_TWO_NEXT for loop handling.
 */
NPY_NO_EXPORT int
/*
 * 准备三个原始数组的迭代器,用于处理具有三个操作数的情况。
 * 在调用此函数之前,应该已经完成三个操作数的广播,因为ndim和shape只针对所有操作数一次性指定。
 *
 * 只使用第一个操作数的步幅来重新排序维度,不考虑所有步幅的组合,这与NpyIter对象不同。
 *
 * 您可以与NPY_RAW_ITER_START和NPY_RAW_ITER_THREE_NEXT一起使用,处理除了最内部循环之外的所有循环模板(即idim == 0的循环)。
 *
 * 成功时返回0,失败时返回-1。
 */
PyArray_PrepareThreeRawArrayIter(int ndim, npy_intp const *shape,
                            char *dataA, npy_intp const *stridesA,
                            char *dataB, npy_intp const *stridesB,
                            char *dataC, npy_intp const *stridesC,
                            int *out_ndim, npy_intp *out_shape,
                            char **out_dataA, npy_intp *out_stridesA,
                            char **out_dataB, npy_intp *out_stridesB,
                            char **out_dataC, npy_intp *out_stridesC);

/*
 * 返回从地址'addr'开始必须从'nvals'个尺寸为'esize'的元素中剥离的元素数量,
 * 以达到可块对齐。'alignment'参数传递所需的字节对齐,必须是2的幂。
 * 此函数用于为块化准备数组。参见下面'numpy_blocked_end'函数的文档,了解此函数的使用示例。
 */
static inline npy_intp
npy_aligned_block_offset(const void * addr, const npy_uintp esize,
                         const npy_uintp alignment, const npy_uintp nvals)
{
    npy_uintp offset, peel;

    // 计算地址偏移量,使其达到块对齐
    offset = (npy_uintp)addr & (alignment - 1);
    // 计算需要剥离的元素数量
    peel = offset ? (alignment - offset) / esize : 0;
    // 如果需要剥离的元素数量超过了nvals,则限制为nvals
    peel = (peel <= nvals) ? peel : nvals;
    // 确保peel在合理范围内,不超过NPY_MAX_INTP
    assert(peel <= NPY_MAX_INTP);
    return (npy_intp)peel;
}
/*
 * Calculate the upper loop bound for iterating over a raw array,
 * handling peeling by 'offset' elements and blocking to a vector
 * size of 'vsz' in bytes.
 *
 * Example usage:
 * npy_intp i;
 * double v[101];
 * npy_intp esize = sizeof(v[0]);
 * npy_intp peel = npy_aligned_block_offset(v, esize, 16, n);
 * // peel to alignment 16
 * for (i = 0; i < peel; i++)
 *   <scalar-op>
 * // simd vectorized operation
 * for (; i < npy_blocked_end(peel, esize, 16, n); i += 16 / esize)
 *   <blocked-op>
 * // handle scalar rest
 * for (; i < n; i++)
 *   <scalar-op>
 */
static inline npy_intp
npy_blocked_end(const npy_uintp peel, const npy_uintp esize,
                const npy_uintp vsz, const npy_uintp nvals)
{
    // Calculate the difference between total elements and peeled elements
    npy_uintp ndiff = nvals - peel;
    // Calculate the result for blocked end by rounding down to nearest multiple
    npy_uintp res = (ndiff - ndiff % (vsz / esize));

    // Ensure the total number of elements is at least as large as peeled elements
    assert(nvals >= peel);
    // Ensure the result fits within the maximum integer size
    assert(res <= NPY_MAX_INTP);

    // Return the calculated blocked end position
    return (npy_intp)(res);
}


/* byte swapping functions */

// Swap bytes of a 16-bit unsigned integer
static inline npy_uint16
npy_bswap2(npy_uint16 x)
{
    return ((x & 0xffu) << 8) | (x >> 8);
}

/*
 * Treat a memory area as int16 and byte-swap unaligned memory,
 * handling CPUs that don't support unaligned access.
 */
static inline void
npy_bswap2_unaligned(char * x)
{
    // Swap bytes for 16-bit unaligned memory
    char a = x[0];
    x[0] = x[1];
    x[1] = a;
}

// Swap bytes of a 32-bit unsigned integer
static inline npy_uint32
npy_bswap4(npy_uint32 x)
{
#ifdef HAVE___BUILTIN_BSWAP32
    // Use compiler's built-in function for byte swapping if available
    return __builtin_bswap32(x);
#else
    // Manually swap bytes for 32-bit unsigned integer
    return ((x & 0xffu) << 24) | ((x & 0xff00u) << 8) |
           ((x & 0xff0000u) >> 8) | (x >> 24);
#endif
}

/*
 * Byte-swap unaligned memory for 32-bit values.
 * Swaps bytes to handle CPUs that don't support unaligned access.
 */
static inline void
npy_bswap4_unaligned(char * x)
{
    // Swap bytes for 32-bit unaligned memory
    char a = x[0];
    x[0] = x[3];
    x[3] = a;
    a = x[1];
    x[1] = x[2];
    x[2] = a;
}

// Swap bytes of a 64-bit unsigned integer
static inline npy_uint64
npy_bswap8(npy_uint64 x)
{
#ifdef HAVE___BUILTIN_BSWAP64
    // Use compiler's built-in function for byte swapping if available
    return __builtin_bswap64(x);
#else
    // Manually swap bytes for 64-bit unsigned integer
    return ((x & 0xffULL) << 56) |
           ((x & 0xff00ULL) << 40) |
           ((x & 0xff0000ULL) << 24) |
           ((x & 0xff000000ULL) << 8) |
           ((x & 0xff00000000ULL) >> 8) |
           ((x & 0xff0000000000ULL) >> 24) |
           ((x & 0xff000000000000ULL) >> 40) |
           (x >> 56);
#endif
}

/*
 * Byte-swap unaligned memory for 64-bit values.
 * Swaps bytes to handle CPUs that don't support unaligned access.
 */
static inline void
npy_bswap8_unaligned(char * x)
{
    // Swap bytes for 64-bit unaligned memory
    char a = x[0]; x[0] = x[7]; x[7] = a;
    a = x[1]; x[1] = x[6]; x[6] = a;
    a = x[2]; x[2] = x[5]; x[5] = a;
    a = x[3]; x[3] = x[4]; x[4] = a;
}


/* Start raw iteration */

// Initialize raw iteration over an array
#define NPY_RAW_ITER_START(idim, ndim, coord, shape) \
        // Initialize coordinates to zero
        memset((coord), 0, (ndim) * sizeof(coord[0])); \
        // Start raw iteration loop
        do {

// Move to the next n-dimensional coordinate for one raw array
#define NPY_RAW_ITER_ONE_NEXT(idim, ndim, coord, shape, data, strides) \
            // Loop through dimensions starting from 1
            for ((idim) = 1; (idim) < (ndim); ++(idim)) { \
                // Check if current coordinate exceeds shape limit
                if (++(coord)[idim] == (shape)[idim]) { \
                    // Reset coordinate and adjust data pointer for overflow
                    (coord)[idim] = 0; \
                    (data) -= ((shape)[idim] - 1) * (strides)[idim]; \
                } \
                else { \
                    // Move data pointer and break the loop
                    (data) += (strides)[idim]; \
                    break; \
                } \
            } \
        } while ((idim) < (ndim))
/* Increment to the next n-dimensional coordinate for two raw arrays */
#define NPY_RAW_ITER_TWO_NEXT(idim, ndim, coord, shape, \
                              dataA, stridesA, dataB, stridesB) \
            for ((idim) = 1; (idim) < (ndim); ++(idim)) { \
                // 检查当前维度坐标是否需要进位
                if (++(coord)[idim] == (shape)[idim]) { \
                    // 如果需要进位,重置当前维度坐标为0
                    (coord)[idim] = 0; \
                    // 调整数据指针A和B以反映下一个坐标的数据位置
                    (dataA) -= ((shape)[idim] - 1) * (stridesA)[idim]; \
                    (dataB) -= ((shape)[idim] - 1) * (stridesB)[idim]; \
                } \
                else { \
                    // 如果不需要进位,增加数据指针A和B以跳到下一个坐标位置
                    (dataA) += (stridesA)[idim]; \
                    (dataB) += (stridesB)[idim]; \
                    break; \
                } \
            } \
        } while ((idim) < (ndim))

/* Increment to the next n-dimensional coordinate for three raw arrays */
#define NPY_RAW_ITER_THREE_NEXT(idim, ndim, coord, shape, \
                              dataA, stridesA, \
                              dataB, stridesB, \
                              dataC, stridesC) \
            for ((idim) = 1; (idim) < (ndim); ++(idim)) { \
                // 检查当前维度坐标是否需要进位
                if (++(coord)[idim] == (shape)[idim]) { \
                    // 如果需要进位,重置当前维度坐标为0
                    (coord)[idim] = 0; \
                    // 调整数据指针A、B和C以反映下一个坐标的数据位置
                    (dataA) -= ((shape)[idim] - 1) * (stridesA)[idim]; \
                    (dataB) -= ((shape)[idim] - 1) * (stridesB)[idim]; \
                    (dataC) -= ((shape)[idim] - 1) * (stridesC)[idim]; \
                } \
                else { \
                    // 如果不需要进位,增加数据指针A、B和C以跳到下一个坐标位置
                    (dataA) += (stridesA)[idim]; \
                    (dataB) += (stridesB)[idim]; \
                    (dataC) += (stridesC)[idim]; \
                    break; \
                } \
            } \
        } while ((idim) < (ndim))

/* Increment to the next n-dimensional coordinate for four raw arrays */
#define NPY_RAW_ITER_FOUR_NEXT(idim, ndim, coord, shape, \
                              dataA, stridesA, \
                              dataB, stridesB, \
                              dataC, stridesC, \
                              dataD, stridesD) \
            for ((idim) = 1; (idim) < (ndim); ++(idim)) { \
                // 检查当前维度坐标是否需要进位
                if (++(coord)[idim] == (shape)[idim]) { \
                    // 如果需要进位,重置当前维度坐标为0
                    (coord)[idim] = 0; \
                    // 调整数据指针A、B、C和D以反映下一个坐标的数据位置
                    (dataA) -= ((shape)[idim] - 1) * (stridesA)[idim]; \
                    (dataB) -= ((shape)[idim] - 1) * (stridesB)[idim]; \
                    (dataC) -= ((shape)[idim] - 1) * (stridesC)[idim]; \
                    (dataD) -= ((shape)[idim] - 1) * (stridesD)[idim]; \
                } \
                else { \
                    // 如果不需要进位,增加数据指针A、B、C和D以跳到下一个坐标位置
                    (dataA) += (stridesA)[idim]; \
                    (dataB) += (stridesB)[idim]; \
                    (dataC) += (stridesC)[idim]; \
                    (dataD) += (stridesD)[idim]; \
                    break; \
                } \
            } \
        } while ((idim) < (ndim))
/*
 *            TRIVIAL ITERATION
 *
 * In some cases when the iteration order isn't important, iteration over
 * arrays is trivial.  This is the case when:
 *   * The array has 0 or 1 dimensions.
 *   * The array is C or Fortran contiguous.
 * Use of an iterator can be skipped when this occurs.  These macros assist
 * in detecting and taking advantage of the situation.  Note that it may
 * be worthwhile to further check if the stride is a contiguous stride
 * and take advantage of that.
 *
 * Here is example code for a single array:
 *
 *      if (PyArray_TRIVIALLY_ITERABLE(self)) {
 *          char *data;
 *          npy_intp count, stride;
 *
 *          PyArray_PREPARE_TRIVIAL_ITERATION(self, count, data, stride);
 *
 *          while (count--) {
 *              // Use the data pointer
 *
 *              data += stride;
 *          }
 *      }
 *      else {
 *          // Create iterator, etc...
 *      }
 *
 */

/*
 * Note: Equivalently iterable macro requires one of arr1 or arr2 be
 *       trivially iterable to be valid.
 */

/**
 * Determine whether an array is safe for trivial iteration.
 *
 * This macro checks if the array meets conditions for trivial iteration:
 * - Has 0 or 1 dimensions
 * - Is C contiguous or Fortran contiguous
 */
#define PyArray_TRIVIALLY_ITERABLE(arr) ( \
                    PyArray_NDIM(arr) <= 1 || \
                    PyArray_CHKFLAGS(arr, NPY_ARRAY_C_CONTIGUOUS) || \
                    PyArray_CHKFLAGS(arr, NPY_ARRAY_F_CONTIGUOUS) \
                    )

/**
 * Calculate the stride for trivial iteration over a single array or pair of arrays.
 *
 * This macro computes the stride to be used for iterating over an array or pair of arrays:
 * - If size is 1, returns 0 (no iteration needed)
 * - If array has 1 dimension, returns its stride
 * - If array has more dimensions, returns item size
 */
#define PyArray_TRIVIAL_PAIR_ITERATION_STRIDE(size, arr) ( \
        assert(PyArray_TRIVIALLY_ITERABLE(arr)), \
        size == 1 ? 0 : ((PyArray_NDIM(arr) == 1) ? \
                             PyArray_STRIDE(arr, 0) : PyArray_ITEMSIZE(arr)))

/**
 * Check if two arrays can be iterated over trivially and safely.
 *
 * This function checks conditions under which two arrays can be safely iterated over:
 * - Both arrays are read-only
 * - Arrays do not share overlapping memory
 * - Strides match and one array's base address is before the other's
 *   to ensure correct data dependency
 *
 * @param arr1 First array object
 * @param arr2 Second array object
 * @param arr1_read Flag indicating if arr1 is read-only
 * @param arr2_read Flag indicating if arr2 is read-only
 * @return 1 if trivial iteration is safe, 0 otherwise
 */
static inline int
PyArray_EQUIVALENTLY_ITERABLE_OVERLAP_OK(PyArrayObject *arr1, PyArrayObject *arr2,
                                         int arr1_read, int arr2_read)
{
    npy_intp size1, size2, stride1, stride2;
    int arr1_ahead = 0, arr2_ahead = 0;

    if (arr1_read && arr2_read) {
        return 1;
    }

    size1 = PyArray_SIZE(arr1);
    stride1 = PyArray_TRIVIAL_PAIR_ITERATION_STRIDE(size1, arr1);

    /*
     * arr1 == arr2 is common for in-place operations, so we fast-path it here.
     * TODO: The stride1 != 0 check rejects broadcast arrays.  This may affect
     *       self-overlapping arrays, but seems only necessary due to
     *       `try_trivial_single_output_loop` not rejecting broadcast outputs.
     */
    if (arr1 == arr2 && stride1 != 0) {
        return 1;
    }

    if (solve_may_share_memory(arr1, arr2, 1) == 0) {
        return 1;
    }
    /*
     * 获取 arr2 的大小(元素个数)
     * PyArray_SIZE 是一个宏,用于获取数组的大小
     */
    size2 = PyArray_SIZE(arr2);

    /*
     * 计算 arr2 的迭代步长(stride)
     * PyArray_TRIVIAL_PAIR_ITERATION_STRIDE 是一个宏,用于计算数组的迭代步长
     */
    stride2 = PyArray_TRIVIAL_PAIR_ITERATION_STRIDE(size2, arr2);

    /*
     * 如果 arr1 的步长大于 0,则判断 arr1 是否在 arr2 的前面
     * 否则,如果 arr1 的步长小于 0,则判断 arr1 是否在 arr2 的前面
     * 这里通过比较字节数组的起始地址来判断数组的相对位置
     */
    if (stride1 > 0) {
        arr1_ahead = (stride1 >= stride2 &&
                      PyArray_BYTES(arr1) >= PyArray_BYTES(arr2));
    }
    else if (stride1 < 0) {
        arr1_ahead = (stride1 <= stride2 &&
                      PyArray_BYTES(arr1) <= PyArray_BYTES(arr2));
    }

    /*
     * 如果 arr2 的步长大于 0,则判断 arr2 是否在 arr1 的前面
     * 否则,如果 arr2 的步长小于 0,则判断 arr2 是否在 arr1 的前面
     * 这里通过比较字节数组的起始地址来判断数组的相对位置
     */
    if (stride2 > 0) {
        arr2_ahead = (stride2 >= stride1 &&
                      PyArray_BYTES(arr2) >= PyArray_BYTES(arr1));
    }
    else if (stride2 < 0) {
        arr2_ahead = (stride2 <= stride1 &&
                      PyArray_BYTES(arr2) <= PyArray_BYTES(arr1));
    }

    /*
     * 返回两个条件的逻辑与结果:
     * - 如果 arr1 未读取或者 arr1 在 arr2 前面,则返回 true
     * - 如果 arr2 未读取或者 arr2 在 arr1 前面,则返回 true
     * 否则返回 false
     */
    return (!arr1_read || arr1_ahead) && (!arr2_read || arr2_ahead);
#endif  /* NUMPY_CORE_SRC_COMMON_LOWLEVEL_STRIDED_LOOPS_H_ */



#endif  /* NUMPY_CORE_SRC_COMMON_LOWLEVEL_STRIDED_LOOPS_H_ */



#define PyArray_EQUIVALENTLY_ITERABLE_BASE(arr1, arr2) (            \
                        PyArray_NDIM(arr1) == PyArray_NDIM(arr2) && \
                        PyArray_CompareLists(PyArray_DIMS(arr1), \
                                             PyArray_DIMS(arr2), \
                                             PyArray_NDIM(arr1)) && \
                        (PyArray_FLAGS(arr1)&(NPY_ARRAY_C_CONTIGUOUS| \
                                      NPY_ARRAY_F_CONTIGUOUS)) & \
                                (PyArray_FLAGS(arr2)&(NPY_ARRAY_C_CONTIGUOUS| \
                                              NPY_ARRAY_F_CONTIGUOUS)) \
                        )



// 定义宏 PyArray_EQUIVALENTLY_ITERABLE_BASE,用于比较两个 NumPy 数组是否可以等效迭代
#define PyArray_EQUIVALENTLY_ITERABLE_BASE(arr1, arr2) (            \
                        PyArray_NDIM(arr1) == PyArray_NDIM(arr2) && \
                        PyArray_CompareLists(PyArray_DIMS(arr1), \
                                             PyArray_DIMS(arr2), \
                                             PyArray_NDIM(arr1)) && \
                        (PyArray_FLAGS(arr1)&(NPY_ARRAY_C_CONTIGUOUS| \
                                      NPY_ARRAY_F_CONTIGUOUS)) & \
                                (PyArray_FLAGS(arr2)&(NPY_ARRAY_C_CONTIGUOUS| \
                                              NPY_ARRAY_F_CONTIGUOUS)) \
                        )



#define PyArray_EQUIVALENTLY_ITERABLE(arr1, arr2, arr1_read, arr2_read) ( \
                        PyArray_EQUIVALENTLY_ITERABLE_BASE(arr1, arr2) && \
                        PyArray_EQUIVALENTLY_ITERABLE_OVERLAP_OK( \
                            arr1, arr2, arr1_read, arr2_read))



// 定义宏 PyArray_EQUIVALENTLY_ITERABLE,用于比较两个 NumPy 数组是否可以等效迭代,并且允许重叠
#define PyArray_EQUIVALENTLY_ITERABLE(arr1, arr2, arr1_read, arr2_read) ( \
                        PyArray_EQUIVALENTLY_ITERABLE_BASE(arr1, arr2) && \
                        PyArray_EQUIVALENTLY_ITERABLE_OVERLAP_OK( \
                            arr1, arr2, arr1_read, arr2_read))



#define PyArray_PREPARE_TRIVIAL_ITERATION(arr, count, data, stride) \
                    count = PyArray_SIZE(arr); \
                    data = PyArray_BYTES(arr); \
                    stride = ((PyArray_NDIM(arr) == 0) ? 0 : \
                                    ((PyArray_NDIM(arr) == 1) ? \
                                            PyArray_STRIDE(arr, 0) : \
                                            PyArray_ITEMSIZE(arr)));



// 定义宏 PyArray_PREPARE_TRIVIAL_ITERATION,用于准备简单迭代的参数
#define PyArray_PREPARE_TRIVIAL_ITERATION(arr, count, data, stride) \
                    count = PyArray_SIZE(arr); \
                    data = PyArray_BYTES(arr); \
                    stride = ((PyArray_NDIM(arr) == 0) ? 0 : \
                                    ((PyArray_NDIM(arr) == 1) ? \
                                            PyArray_STRIDE(arr, 0) : \
                                            PyArray_ITEMSIZE(arr)));



#define PyArray_PREPARE_TRIVIAL_PAIR_ITERATION(arr1, arr2, \
                                        count, \
                                        data1, data2, \
                                        stride1, stride2) { \
                    npy_intp size1 = PyArray_SIZE(arr1); \
                    npy_intp size2 = PyArray_SIZE(arr2); \
                    count = ((size1 > size2) || size1 == 0) ? size1 : size2; \
                    data1 = PyArray_BYTES(arr1); \
                    data2 = PyArray_BYTES(arr2); \
                    stride1 = PyArray_TRIVIAL_PAIR_ITERATION_STRIDE(size1, arr1); \
                    stride2 = PyArray_TRIVIAL_PAIR_ITERATION_STRIDE(size2, arr2); \
                }



// 定义宏 PyArray_PREPARE_TRIVIAL_PAIR_ITERATION,用于准备简单对成对迭代的参数
#define PyArray_PREPARE_TRIVIAL_PAIR_ITERATION(arr1, arr2, \
                                        count, \
                                        data1, data2, \
                                        stride1, stride2) { \
                    npy_intp size1 = PyArray_SIZE(arr1); \
                    npy_intp size2 = PyArray_SIZE(arr2); \
                    count = ((size1 > size2) || size1 == 0) ? size1 : size2; \
                    data1 = PyArray_BYTES(arr1); \
                    data2 = PyArray_BYTES(arr2); \
                    stride1 = PyArray_TRIVIAL_PAIR_ITERATION_STRIDE(size1, arr1); \
                    stride2 = PyArray_TRIVIAL_PAIR_ITERATION_STRIDE(size2, arr2); \
                }



#endif  /* NUMPY_CORE_SRC_COMMON_LOWLEVEL_STRIDED_LOOPS_H_ */



#endif  /* NUMPY_CORE_SRC_COMMON_LOWLEVEL_STRIDED_LOOPS_H_ */