NumPy 源码解析（七十五）

`.\numpy\numpy\_core\src\npysort\x86_simd_argsort.dispatch.cpp`

#ifndef __CYGWIN__
#ifndef __CYGWIN__ 检查是否未定义宏 __CYGWIN__
#include "x86_simd_qsort.hpp"
包含头文件 "x86_simd_qsort.hpp"

#include "x86-simd-sort/src/x86simdsort-static-incl.h"
包含头文件 "x86-simd-sort/src/x86simdsort-static-incl.h"

#define DISPATCH_ARG_METHODS(TYPE) \
宏定义 DISPATCH_ARG_METHODS(TYPE)，该宏用于生成模板函数

template<> void NPY_CPU_DISPATCH_CURFX(ArgQSelect)(TYPE* arr, npy_intp* arg, npy_intp num, npy_intp kth) \
模板特化，定义类型为 TYPE 的函数 NPY_CPU_DISPATCH_CURFX(ArgQSelect)，参数为 arr 数组，arg 数组，num 元素数目，kth 第 k 小元素位置
{ \
    调用 x86simdsortStatic::argselect 函数，将 arr 转换为 size_t* 类型的 arg 数组，选择第 kth 小的元素
    x86simdsortStatic::argselect(arr, reinterpret_cast<size_t*>(arg), kth, num, true); \
} \

template<> void NPY_CPU_DISPATCH_CURFX(ArgQSort)(TYPE* arr, npy_intp *arg, npy_intp size) \
模板特化，定义类型为 TYPE 的函数 NPY_CPU_DISPATCH_CURFX(ArgQSort)，参数为 arr 数组，arg 数组，size 元素数目
{ \
    调用 x86simdsortStatic::argsort 函数，将 arr 转换为 size_t* 类型的 arg 数组，按升序排序
    x86simdsortStatic::argsort(arr, reinterpret_cast<size_t*>(arg), size, true); \
} \

namespace np { namespace qsort_simd {
声明命名空间 np::qsort_simd

    DISPATCH_ARG_METHODS(uint32_t)
    通过 DISPATCH_ARG_METHODS 宏生成 uint32_t 类型的函数实现

    DISPATCH_ARG_METHODS(int32_t)
    通过 DISPATCH_ARG_METHODS 宏生成 int32_t 类型的函数实现

    DISPATCH_ARG_METHODS(float)
    通过 DISPATCH_ARG_METHODS 宏生成 float 类型的函数实现

    DISPATCH_ARG_METHODS(uint64_t)
    通过 DISPATCH_ARG_METHODS 宏生成 uint64_t 类型的函数实现

    DISPATCH_ARG_METHODS(int64_t)
    通过 DISPATCH_ARG_METHODS 宏生成 int64_t 类型的函数实现

    DISPATCH_ARG_METHODS(double)
    通过 DISPATCH_ARG_METHODS 宏生成 double 类型的函数实现

}} // namespace np::simd

#endif // __CYGWIN__
#endif // __CYGWIN__

`.\numpy\numpy\_core\src\npysort\x86_simd_qsort.dispatch.cpp`

#ifndef __CYGWIN__

# 如果未定义 __CYGWIN__ 宏，则执行以下代码块


#include "x86_simd_qsort.hpp"

# 包含 x86_simd_qsort.hpp 文件，可能是定义了 SIMD 加速的快速排序算法的头文件


#include "x86-simd-sort/src/x86simdsort-static-incl.h"

# 包含 x86simdsort 静态库的头文件，这个库提供了 x86 平台上 SIMD 排序算法的静态函数声明


#define DISPATCH_SORT_METHODS(TYPE) \
template<> void NPY_CPU_DISPATCH_CURFX(QSelect)(TYPE *arr, npy_intp num, npy_intp kth) \
{ \
    x86simdsortStatic::qselect(arr, kth, num, true); \
} \
template<> void NPY_CPU_DISPATCH_CURFX(QSort)(TYPE *arr, npy_intp num) \
{ \
    x86simdsortStatic::qsort(arr, num, true); \
} \

# 定义了一个宏 DISPATCH_SORT_METHODS，用于生成针对不同数据类型（TYPE）的 SIMD 加速的快速选择（QSelect）和快速排序（QSort）函数模板。这些函数通过调用 x86simdsortStatic 命名空间中相应的静态函数实现。


namespace np { namespace qsort_simd {

# 进入 np::qsort_simd 命名空间，该命名空间用于存放针对 SIMD 加速的快速排序相关函数模板。


    DISPATCH_SORT_METHODS(uint32_t)
    DISPATCH_SORT_METHODS(int32_t)
    DISPATCH_SORT_METHODS(float)
    DISPATCH_SORT_METHODS(uint64_t)
    DISPATCH_SORT_METHODS(int64_t)
    DISPATCH_SORT_METHODS(double)

# 实例化不同数据类型的 DISPATCH_SORT_METHODS 宏，生成对应的快速选择和快速排序函数模板的具体实现。


}} // namespace np::qsort_simd

# 退出 np::qsort_simd 命名空间。


#endif // __CYGWIN__

# 结束条件编译指令，标志着对非 Cygwin 环境下的代码块的结束。

`.\numpy\numpy\_core\src\npysort\x86_simd_qsort.hpp`

#ifndef NUMPY_SRC_COMMON_NPYSORT_X86_SIMD_QSORT_HPP
#define NUMPY_SRC_COMMON_NPYSORT_X86_SIMD_QSORT_HPP

// 如果未定义 NUMPY_SRC_COMMON_NPYSORT_X86_SIMD_QSORT_HPP 宏，则开始此头文件的条件编译


#include "common.hpp"

// 包含 common.hpp 头文件，用于引入常用的公共功能和定义


namespace np { namespace qsort_simd {

// 命名空间 np::qsort_simd 的开始


#ifndef NPY_DISABLE_OPTIMIZATION
    #include "x86_simd_qsort.dispatch.h"
#endif

// 如果未禁用优化，则包含 x86_simd_qsort.dispatch.h 头文件，该文件用于优化排序算法的分发


NPY_CPU_DISPATCH_DECLARE(template <typename T> void QSort, (T *arr, npy_intp size))

// 声明模板函数 QSort，用于对类型 T 的数组 arr 进行快速排序，size 为数组大小


NPY_CPU_DISPATCH_DECLARE(template <typename T> void QSelect, (T* arr, npy_intp num, npy_intp kth))

// 声明模板函数 QSelect，用于在类型 T 的数组 arr 中选择第 kth 小的元素，num 是数组中的元素数目


#ifndef NPY_DISABLE_OPTIMIZATION
    #include "x86_simd_argsort.dispatch.h"
#endif

// 如果未禁用优化，则包含 x86_simd_argsort.dispatch.h 头文件，用于优化参数排序算法的分发


NPY_CPU_DISPATCH_DECLARE(template <typename T> void ArgQSort, (T *arr, npy_intp* arg, npy_intp size))

// 声明模板函数 ArgQSort，用于对类型 T 的数组 arr 和对应的参数数组 arg 进行排序，size 是数组大小


NPY_CPU_DISPATCH_DECLARE(template <typename T> void ArgQSelect, (T *arr, npy_intp* arg, npy_intp kth, npy_intp size))

// 声明模板函数 ArgQSelect，用于在类型 T 的数组 arr 中基于参数数组 arg 选择第 kth 小的元素，size 是数组大小


#ifndef NPY_DISABLE_OPTIMIZATION
    #include "x86_simd_qsort_16bit.dispatch.h"
#endif

// 如果未禁用优化，则包含 x86_simd_qsort_16bit.dispatch.h 头文件，用于优化16位整数排序算法的分发


NPY_CPU_DISPATCH_DECLARE(template <typename T> void QSort, (T *arr, npy_intp size))

// 再次声明模板函数 QSort，用于对类型 T 的数组 arr 进行快速排序，size 为数组大小


NPY_CPU_DISPATCH_DECLARE(template <typename T> void QSelect, (T* arr, npy_intp num, npy_intp kth))

// 再次声明模板函数 QSelect，用于在类型 T 的数组 arr 中选择第 kth 小的元素，num 是数组中的元素数目


} } // np::qsort_simd

// 命名空间 np::qsort_simd 的结束


#endif // NUMPY_SRC_COMMON_NPYSORT_X86_SIMD_QSORT_HPP

// 结束条件编译指令，关闭 NUMPY_SRC_COMMON_NPYSORT_X86_SIMD_QSORT_HPP 宏定义的作用范围

`.\numpy\numpy\_core\src\npysort\x86_simd_qsort_16bit.dispatch.cpp`

/*
 * 包含 x86 SIMD 快速排序头文件
 */
#include "x86_simd_qsort.hpp"

/*
 * 如果不是在 Cygwin 环境下编译，则包含 x86 SIMD 排序的静态头文件
 */
#ifndef __CYGWIN__
#include "x86-simd-sort/src/x86simdsort-static-incl.h"

/*
 * 对于 MSVC 编译器，由于未设置 __AVX512VBMI2__ 宏，需要手动包含此文件
 */
#ifdef _MSC_VER
#include "x86-simd-sort/src/avx512-16bit-qsort.hpp"
#endif

namespace np { namespace qsort_simd {

/*
 * QSelect 分发函数：
 */

/*
 * 特化模板，用于处理 Half 类型数组的 QSelect 操作
 */
template<> void NPY_CPU_DISPATCH_CURFX(QSelect)(Half *arr, npy_intp num, npy_intp kth)
{
    /*
     * 如果支持 AVX512_SPR 指令集，则调用 x86simdsortStatic 的 qselect 函数处理 _Float16 数组
     */
#if defined(NPY_HAVE_AVX512_SPR)
    x86simdsortStatic::qselect(reinterpret_cast<_Float16*>(arr), kth, num, true);
#else
    /*
     * 否则，调用 avx512_qselect_fp16 函数处理 uint16_t 数组
     */
    avx512_qselect_fp16(reinterpret_cast<uint16_t*>(arr), kth, num, true, false);
#endif
}

/*
 * 特化模板，用于处理 uint16_t 类型数组的 QSelect 操作
 */
template<> void NPY_CPU_DISPATCH_CURFX(QSelect)(uint16_t *arr, npy_intp num, npy_intp kth)
{
    /*
     * 调用 x86simdsortStatic 的 qselect 函数处理 uint16_t 数组
     */
    x86simdsortStatic::qselect(arr, kth, num);
}

/*
 * 特化模板，用于处理 int16_t 类型数组的 QSelect 操作
 */
template<> void NPY_CPU_DISPATCH_CURFX(QSelect)(int16_t *arr, npy_intp num, npy_intp kth)
{
    /*
     * 调用 x86simdsortStatic 的 qselect 函数处理 int16_t 数组
     */
    x86simdsortStatic::qselect(arr, kth, num);
}

/*
 * QSort 分发函数：
 */

/*
 * 特化模板，用于处理 Half 类型数组的 QSort 操作
 */
template<> void NPY_CPU_DISPATCH_CURFX(QSort)(Half *arr, npy_intp size)
{
    /*
     * 如果支持 AVX512_SPR 指令集，则调用 x86simdsortStatic 的 qsort 函数处理 _Float16 数组
     */
#if defined(NPY_HAVE_AVX512_SPR)
    x86simdsortStatic::qsort(reinterpret_cast<_Float16*>(arr), size, true);
#else
    /*
     * 否则，调用 avx512_qsort_fp16 函数处理 uint16_t 数组
     */
    avx512_qsort_fp16(reinterpret_cast<uint16_t*>(arr), size, true, false);
#endif
}

/*
 * 特化模板，用于处理 uint16_t 类型数组的 QSort 操作
 */
template<> void NPY_CPU_DISPATCH_CURFX(QSort)(uint16_t *arr, npy_intp size)
{
    /*
     * 调用 x86simdsortStatic 的 qsort 函数处理 uint16_t 数组
     */
    x86simdsortStatic::qsort(arr, size);
}

/*
 * 特化模板，用于处理 int16_t 类型数组的 QSort 操作
 */
template<> void NPY_CPU_DISPATCH_CURFX(QSort)(int16_t *arr, npy_intp size)
{
    /*
     * 调用 x86simdsortStatic 的 qsort 函数处理 int16_t 数组
     */
    x86simdsortStatic::qsort(arr, size);
}

}} // namespace np::qsort_simd

#endif // __CYGWIN__

`.\numpy\numpy\_core\src\umath\clip.cpp`

/**
 * This module provides the inner loops for the clip ufunc
 */

// 包含必要的头文件，定义和声明
#include <type_traits>

#define _UMATHMODULE
#define _MULTIARRAYMODULE
#define NPY_NO_DEPRECATED_API NPY_API_VERSION

// 引入 Python 头文件
#define PY_SSIZE_T_CLEAN
#include <Python.h>

// 引入 NumPy 中的特定头文件
#include "numpy/halffloat.h"
#include "numpy/ndarraytypes.h"
#include "numpy/npy_common.h"
#include "numpy/npy_math.h"
#include "numpy/utils.h"

// 引入特定的宏定义文件
#include "fast_loop_macros.h"

// 引入自定义的 NumPy 标签
#include "../common/numpy_tag.h"

// 定义模板函数 _NPY_MIN，用于整数类型的最小值比较
template <class T>
T
_NPY_MIN(T a, T b, npy::integral_tag const &)
{
    return PyArray_MIN(a, b);
}

// 定义模板函数 _NPY_MAX，用于整数类型的最大值比较
template <class T>
T
_NPY_MAX(T a, T b, npy::integral_tag const &)
{
    return PyArray_MAX(a, b);
}

// 定义针对 npy_half 类型的最小值比较函数
npy_half
_NPY_MIN(npy_half a, npy_half b, npy::half_tag const &)
{
    return npy_half_isnan(a) || npy_half_le(a, b) ? (a) : (b);
}

// 定义针对 npy_half 类型的最大值比较函数
npy_half
_NPY_MAX(npy_half a, npy_half b, npy::half_tag const &)
{
    return npy_half_isnan(a) || npy_half_ge(a, b) ? (a) : (b);
}

// 定义针对浮点数类型的最小值比较函数
template <class T>
T
_NPY_MIN(T a, T b, npy::floating_point_tag const &)
{
    return npy_isnan(a) ? (a) : PyArray_MIN(a, b);
}

// 定义针对浮点数类型的最大值比较函数
template <class T>
T
_NPY_MAX(T a, T b, npy::floating_point_tag const &)
{
    return npy_isnan(a) ? (a) : PyArray_MAX(a, b);
}

// 定义宏函数 PyArray_CLT，用于复数类型的小于比较
#define PyArray_CLT(p,q,suffix) (((npy_creal##suffix(p)==npy_creal##suffix(q)) ? (npy_cimag##suffix(p) < npy_cimag##suffix(q)) : \
                               (npy_creal##suffix(p) < npy_creal##suffix(q))))

// 定义宏函数 PyArray_CGT，用于复数类型的大于比较
#define PyArray_CGT(p,q,suffix) (((npy_creal##suffix(p)==npy_creal##suffix(q)) ? (npy_cimag##suffix(p) > npy_cimag##suffix(q)) : \
                               (npy_creal##suffix(p) > npy_creal##suffix(q))))

// 定义针对 npy_cdouble 类型的最小值比较函数
npy_cdouble
_NPY_MIN(npy_cdouble a, npy_cdouble b, npy::complex_tag const &)
{
    return npy_isnan(npy_creal(a)) || npy_isnan(npy_cimag(a)) || PyArray_CLT(a, b,)
                ? (a)
                : (b);
}

// 定义针对 npy_cfloat 类型的最小值比较函数
npy_cfloat
_NPY_MIN(npy_cfloat a, npy_cfloat b, npy::complex_tag const &)
{
    return npy_isnan(npy_crealf(a)) || npy_isnan(npy_cimagf(a)) || PyArray_CLT(a, b, f)
                ? (a)
                : (b);
}

// 定义针对 npy_clongdouble 类型的最小值比较函数
npy_clongdouble
_NPY_MIN(npy_clongdouble a, npy_clongdouble b, npy::complex_tag const &)
{
    return npy_isnan(npy_creall(a)) || npy_isnan(npy_cimagl(a)) || PyArray_CLT(a, b, l)
                ? (a)
                : (b);
}

// 定义针对 npy_cdouble 类型的最大值比较函数
npy_cdouble
_NPY_MAX(npy_cdouble a, npy_cdouble b, npy::complex_tag const &)
{
    return npy_isnan(npy_creal(a)) || npy_isnan(npy_cimag(a)) || PyArray_CGT(a, b,)
                ? (a)
                : (b);
}

// 定义针对 npy_cfloat 类型的最大值比较函数
npy_cfloat
_NPY_MAX(npy_cfloat a, npy_cfloat b, npy::complex_tag const &)
{
    return npy_isnan(npy_crealf(a)) || npy_isnan(npy_cimagf(a)) || PyArray_CGT(a, b, f)
                ? (a)
                : (b);
}

// 定义针对 npy_clongdouble 类型的最大值比较函数
npy_clongdouble
_NPY_MAX(npy_clongdouble a, npy_clongdouble b, npy::complex_tag const &)
{
    return npy_isnan(npy_creall(a)) || npy_isnan(npy_cimagl(a)) || PyArray_CGT(a, b, l)
                ? (a)
                : (b);
}

// 取消前面定义的宏函数 PyArray_CLT 和 PyArray_CGT
#undef PyArray_CLT
#undef PyArray_CGT

// 定义针对日期类型的最小值比较函数，但没有提供实现
template <class T>
T
_NPY_MIN(T a, T b, npy::date_tag const &)
{
    # 如果 a 等于 NPY_DATETIME_NAT，则返回 a，否则继续判断下一条件
    return (a) == NPY_DATETIME_NAT   ? (a)
           # 如果 b 等于 NPY_DATETIME_NAT，则返回 b，否则继续判断下一条件
           : (b) == NPY_DATETIME_NAT ? (b)
           # 如果 a 小于 b，则返回 a，否则返回 b
           : (a) < (b)               ? (a)
                                     # 如果上述所有条件都不满足，则返回 b
                                     : (b);
/* 
 * 最大值计算函数，返回两个值中的较大值，考虑特殊情况
 */
template <class T>
T
_NPY_MAX(T a, T b, npy::date_tag const &)
{
    return (a) == NPY_DATETIME_NAT   ? (a)
           : (b) == NPY_DATETIME_NAT ? (b)
           : (a) > (b)               ? (a)
                                     : (b);
}

/* 
 * 最小值计算函数，根据给定的标签调用 _NPY_MAX 函数，返回两个值中的较小值
 */
template <class Tag, class T = typename Tag::type>
T
_NPY_MIN(T const &a, T const &b)
{
    return _NPY_MIN(a, b, Tag{});
}

/* 
 * 最大值计算函数，根据给定的标签调用 _NPY_MAX 函数，返回两个值中的较大值
 */
template <class Tag, class T = typename Tag::type>
T
_NPY_MAX(T const &a, T const &b)
{
    return _NPY_MAX(a, b, Tag{});
}

/* 
 * 裁剪函数，根据给定的标签和最小/最大值对数据进行裁剪，确保数据在指定范围内
 */
template <class Tag, class T>
T
_NPY_CLIP(T x, T min, T max)
{
    return _NPY_MIN<Tag>(_NPY_MAX<Tag>((x), (min)), (max));
}

/* 
 * 针对非浮点数的裁剪函数实现，处理输入和输出数据的指针，确保数据在指定范围内
 */
template <class Tag, class T>
static inline void
_npy_clip_const_minmax_(
    char *ip, npy_intp is, char *op, npy_intp os, npy_intp n, T min_val, T max_val,
    std::false_type /* non-floating point */
)
{
    /* 连续内存，使用分支让编译器优化 */
    if (is == sizeof(T) && os == sizeof(T)) {
        for (npy_intp i = 0; i < n; i++, ip += sizeof(T), op += sizeof(T)) {
            *(T *)op = _NPY_CLIP<Tag>(*(T *)ip, min_val, max_val);
        }
    }
    else {
        for (npy_intp i = 0; i < n; i++, ip += is, op += os) {
            *(T *)op = _NPY_CLIP<Tag>(*(T *)ip, min_val, max_val);
        }
    }
}

/* 
 * 针对浮点数的裁剪函数实现，处理输入和输出数据的指针，确保数据在指定范围内，
 * 同时处理 NaN 值的情况
 */
template <class Tag, class T>
static inline void
_npy_clip_const_minmax_(
    char *ip, npy_intp is, char *op, npy_intp os, npy_intp n, T min_val, T max_val,
    std::true_type  /* floating point */
)
{
    if (!npy_isnan(min_val) && !npy_isnan(max_val)) {
        /*
         * min_val 和 max_val 都不是 NaN，因此下面的比较会正确处理 NaN 的传播
         */

        /* 连续内存，使用分支让编译器优化 */
        if (is == sizeof(T) && os == sizeof(T)) {
            for (npy_intp i = 0; i < n; i++, ip += sizeof(T), op += sizeof(T)) {
                T x = *(T *)ip;
                if (x < min_val) {
                    x = min_val;
                }
                if (x > max_val) {
                    x = max_val;
                }
                *(T *)op = x;
            }
        }
        else {
            for (npy_intp i = 0; i < n; i++, ip += is, op += os) {
                T x = *(T *)ip;
                if (x < min_val) {
                    x = min_val;
                }
                if (x > max_val) {
                    x = max_val;
                }
                *(T *)op = x;
            }
        }
    }
    else {
        /* min_val 和/或 max_val 是 NaN */
        T x = npy_isnan(min_val) ? min_val : max_val;
        for (npy_intp i = 0; i < n; i++, op += os) {
            *(T *)op = x;
        }
    }
}

/* 
 * 裁剪函数的入口点，调用裁剪函数处理输入参数和维度信息
 */
template <class Tag, class T = typename Tag::type>
static void
_npy_clip(char **args, npy_intp const *dimensions, npy_intp const *steps)
{
    npy_intp n = dimensions[0];
    // 检查步长数组中的第二个和第三个元素是否为零
    if (steps[1] == 0 && steps[2] == 0) {
        /* min and max are constant throughout the loop, the most common case */
        // 获取循环期间常量的最小和最大值
        T min_val = *(T *)args[1];  // 获取最小值
        T max_val = *(T *)args[2];  // 获取最大值

        // 调用针对常量最小和最大值的剪切函数
        _npy_clip_const_minmax_<Tag, T>(
            args[0], steps[0], args[3], steps[3], n, min_val, max_val,
            std::is_base_of<npy::floating_point_tag, Tag>{}
        );
    }
    else {
        // 设置输入和输出指针及其对应的步长
        char *ip1 = args[0], *ip2 = args[1], *ip3 = args[2], *op1 = args[3];
        npy_intp is1 = steps[0], is2 = steps[1],
                 is3 = steps[2], os1 = steps[3];
        // 迭代处理每个元素
        for (npy_intp i = 0; i < n;
             i++, ip1 += is1, ip2 += is2, ip3 += is3, op1 += os1)
        {
            // 调用通用剪切函数来计算并将结果存储到输出指针
            *(T *)op1 = _NPY_CLIP<Tag>(*(T *)ip1, *(T *)ip2, *(T *)ip3);
        }
    }
    // 清除浮点状态障碍
    npy_clear_floatstatus_barrier((char *)dimensions);
extern "C" {
// 定义一个 C 语言风格的外部函数接口块

NPY_NO_EXPORT void
BOOL_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
          void *NPY_UNUSED(func))
{
    // 调用 _npy_clip 函数处理布尔类型数据
    return _npy_clip<npy::bool_tag>(args, dimensions, steps);
}
// 定义一个不导出的函数，用于处理布尔类型数据

NPY_NO_EXPORT void
BYTE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
          void *NPY_UNUSED(func))
{
    // 调用 _npy_clip 函数处理字节类型数据
    return _npy_clip<npy::byte_tag>(args, dimensions, steps);
}
// 定义一个不导出的函数，用于处理字节类型数据

NPY_NO_EXPORT void
UBYTE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
           void *NPY_UNUSED(func))
{
    // 调用 _npy_clip 函数处理无符号字节类型数据
    return _npy_clip<npy::ubyte_tag>(args, dimensions, steps);
}
// 定义一个不导出的函数，用于处理无符号字节类型数据

NPY_NO_EXPORT void
SHORT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
           void *NPY_UNUSED(func))
{
    // 调用 _npy_clip 函数处理短整型数据
    return _npy_clip<npy::short_tag>(args, dimensions, steps);
}
// 定义一个不导出的函数，用于处理短整型数据

NPY_NO_EXPORT void
USHORT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
            void *NPY_UNUSED(func))
{
    // 调用 _npy_clip 函数处理无符号短整型数据
    return _npy_clip<npy::ushort_tag>(args, dimensions, steps);
}
// 定义一个不导出的函数，用于处理无符号短整型数据

NPY_NO_EXPORT void
INT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
         void *NPY_UNUSED(func))
{
    // 调用 _npy_clip 函数处理整型数据
    return _npy_clip<npy::int_tag>(args, dimensions, steps);
}
// 定义一个不导出的函数，用于处理整型数据

NPY_NO_EXPORT void
UINT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
          void *NPY_UNUSED(func))
{
    // 调用 _npy_clip 函数处理无符号整型数据
    return _npy_clip<npy::uint_tag>(args, dimensions, steps);
}
// 定义一个不导出的函数，用于处理无符号整型数据

NPY_NO_EXPORT void
LONG_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
          void *NPY_UNUSED(func))
{
    // 调用 _npy_clip 函数处理长整型数据
    return _npy_clip<npy::long_tag>(args, dimensions, steps);
}
// 定义一个不导出的函数，用于处理长整型数据

NPY_NO_EXPORT void
ULONG_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
           void *NPY_UNUSED(func))
{
    // 调用 _npy_clip 函数处理无符号长整型数据
    return _npy_clip<npy::ulong_tag>(args, dimensions, steps);
}
// 定义一个不导出的函数，用于处理无符号长整型数据

NPY_NO_EXPORT void
LONGLONG_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
              void *NPY_UNUSED(func))
{
    // 调用 _npy_clip 函数处理长长整型数据
    return _npy_clip<npy::longlong_tag>(args, dimensions, steps);
}
// 定义一个不导出的函数，用于处理长长整型数据

NPY_NO_EXPORT void
ULONGLONG_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
               void *NPY_UNUSED(func))
{
    // 调用 _npy_clip 函数处理无符号长长整型数据
    return _npy_clip<npy::ulonglong_tag>(args, dimensions, steps);
}
// 定义一个不导出的函数，用于处理无符号长长整型数据

NPY_NO_EXPORT void
HALF_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
          void *NPY_UNUSED(func))
{
    // 调用 _npy_clip 函数处理半精度浮点数据
    return _npy_clip<npy::half_tag>(args, dimensions, steps);
}
// 定义一个不导出的函数，用于处理半精度浮点数据

NPY_NO_EXPORT void
FLOAT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
           void *NPY_UNUSED(func))
{
    // 调用 _npy_clip 函数处理单精度浮点数据
    return _npy_clip<npy::float_tag>(args, dimensions, steps);
}
// 定义一个不导出的函数，用于处理单精度浮点数据

NPY_NO_EXPORT void
DOUBLE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
            void *NPY_UNUSED(func))
{
    // 调用 _npy_clip 函数处理双精度浮点数据
    return _npy_clip<npy::double_tag>(args, dimensions, steps);
}
// 定义一个不导出的函数，用于处理双精度浮点数据

NPY_NO_EXPORT void
LONGDOUBLE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
                void *NPY_UNUSED(func))
{
    // 调用 _npy_clip 函数处理长双精度浮点数据
    return _npy_clip<npy::longdouble_tag>(args, dimensions, steps);
}
// 定义一个不导出的函数，用于处理长双精度浮点数据

NPY_NO_EXPORT void
# 定义 CFLOAT_clip 函数，处理复数浮点数类型（cfloat）的数组剪切操作
CFLOAT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
            void *NPY_UNUSED(func))
{
    # 调用模板函数 _npy_clip，传入复数浮点数标签类型（cfloat_tag），处理剪切操作
    return _npy_clip<npy::cfloat_tag>(args, dimensions, steps);
}

# 定义 CDOUBLE_clip 函数，处理双精度复数浮点数类型（cdouble）的数组剪切操作
NPY_NO_EXPORT void
CDOUBLE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
             void *NPY_UNUSED(func))
{
    # 调用模板函数 _npy_clip，传入双精度复数浮点数标签类型（cdouble_tag），处理剪切操作
    return _npy_clip<npy::cdouble_tag>(args, dimensions, steps);
}

# 定义 CLONGDOUBLE_clip 函数，处理长双精度复数浮点数类型（clongdouble）的数组剪切操作
NPY_NO_EXPORT void
CLONGDOUBLE_clip(char **args, npy_intp const *dimensions,
                 npy_intp const *steps, void *NPY_UNUSED(func))
{
    # 调用模板函数 _npy_clip，传入长双精度复数浮点数标签类型（clongdouble_tag），处理剪切操作
    return _npy_clip<npy::clongdouble_tag>(args, dimensions, steps);
}

# 定义 DATETIME_clip 函数，处理日期时间类型（datetime）的数组剪切操作
NPY_NO_EXPORT void
DATETIME_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
              void *NPY_UNUSED(func))
{
    # 调用模板函数 _npy_clip，传入日期时间标签类型（datetime_tag），处理剪切操作
    return _npy_clip<npy::datetime_tag>(args, dimensions, steps);
}

# 定义 TIMEDELTA_clip 函数，处理时间差类型（timedelta）的数组剪切操作
NPY_NO_EXPORT void
TIMEDELTA_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
               void *NPY_UNUSED(func))
{
    # 调用模板函数 _npy_clip，传入时间差标签类型（timedelta_tag），处理剪切操作
    return _npy_clip<npy::timedelta_tag>(args, dimensions, steps);
}

`.\numpy\numpy\_core\src\umath\clip.h`

#ifndef _NPY_UMATH_CLIP_H_
#define _NPY_UMATH_CLIP_H_

#ifdef __cplusplus
extern "C" {
#endif

// 声明函数 BOOL_clip，用于对布尔类型数据进行截取
NPY_NO_EXPORT void
BOOL_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
          void *NPY_UNUSED(func));

// 声明函数 BYTE_clip，用于对字节类型数据进行截取
NPY_NO_EXPORT void
BYTE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
          void *NPY_UNUSED(func));

// 声明函数 UBYTE_clip，用于对无符号字节类型数据进行截取
NPY_NO_EXPORT void
UBYTE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
           void *NPY_UNUSED(func));

// 声明函数 SHORT_clip，用于对短整型数据进行截取
NPY_NO_EXPORT void
SHORT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
           void *NPY_UNUSED(func));

// 声明函数 USHORT_clip，用于对无符号短整型数据进行截取
NPY_NO_EXPORT void
USHORT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
            void *NPY_UNUSED(func));

// 声明函数 INT_clip，用于对整型数据进行截取
NPY_NO_EXPORT void
INT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
         void *NPY_UNUSED(func));

// 声明函数 UINT_clip，用于对无符号整型数据进行截取
NPY_NO_EXPORT void
UINT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
          void *NPY_UNUSED(func));

// 声明函数 LONG_clip，用于对长整型数据进行截取
NPY_NO_EXPORT void
LONG_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
          void *NPY_UNUSED(func));

// 声明函数 ULONG_clip，用于对无符号长整型数据进行截取
NPY_NO_EXPORT void
ULONG_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
           void *NPY_UNUSED(func));

// 声明函数 LONGLONG_clip，用于对长长整型数据进行截取
NPY_NO_EXPORT void
LONGLONG_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
              void *NPY_UNUSED(func));

// 声明函数 ULONGLONG_clip，用于对无符号长长整型数据进行截取
NPY_NO_EXPORT void
ULONGLONG_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
               void *NPY_UNUSED(func));

// 声明函数 HALF_clip，用于对半精度浮点型数据进行截取
NPY_NO_EXPORT void
HALF_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
          void *NPY_UNUSED(func));

// 声明函数 FLOAT_clip，用于对单精度浮点型数据进行截取
NPY_NO_EXPORT void
FLOAT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
           void *NPY_UNUSED(func));

// 声明函数 DOUBLE_clip，用于对双精度浮点型数据进行截取
NPY_NO_EXPORT void
DOUBLE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
            void *NPY_UNUSED(func));

// 声明函数 LONGDOUBLE_clip，用于对长双精度浮点型数据进行截取
NPY_NO_EXPORT void
LONGDOUBLE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
                void *NPY_UNUSED(func));

// 声明函数 CFLOAT_clip，用于对复数单精度浮点型数据进行截取
NPY_NO_EXPORT void
CFLOAT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
            void *NPY_UNUSED(func));

// 声明函数 CDOUBLE_clip，用于对复数双精度浮点型数据进行截取
NPY_NO_EXPORT void
CDOUBLE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
             void *NPY_UNUSED(func));

// 声明函数 CLONGDOUBLE_clip，用于对复数长双精度浮点型数据进行截取
NPY_NO_EXPORT void
CLONGDOUBLE_clip(char **args, npy_intp const *dimensions,
                 npy_intp const *steps, void *NPY_UNUSED(func));

// 声明函数 DATETIME_clip，用于对日期时间类型数据进行截取
NPY_NO_EXPORT void
DATETIME_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
              void *NPY_UNUSED(func));

// 声明函数 TIMEDELTA_clip，用于对时间间隔类型数据进行截取
NPY_NO_EXPORT void
TIMEDELTA_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
               void *NPY_UNUSED(func));

#ifdef __cplusplus
}
#endif

#endif

`.\numpy\numpy\_core\src\umath\dispatching.c`

/*
 * This file implements universal function dispatching and promotion (which
 * is necessary to happen before dispatching).
 * This is part of the UFunc object.  Promotion and dispatching uses the
 * following things:
 *
 * - operand_DTypes:  The datatypes as passed in by the user.
 * - signature: The DTypes fixed by the user with `dtype=` or `signature=`.
 * - ufunc._loops: A list of all ArrayMethods and promoters, it contains
 *   tuples `(dtypes, ArrayMethod)` or `(dtypes, promoter)`.
 * - ufunc._dispatch_cache: A cache to store previous promotion and/or
 *   dispatching results.
 * - The actual arrays are used to support the old code paths where necessary.
 *   (this includes any value-based casting/promotion logic)
 *
 * In general, `operand_DTypes` is always overridden by `signature`.  If a
 * DType is included in the `signature` it must match precisely.
 *
 * The process of dispatching and promotion can be summarized in the following
 * steps:
 *
 * 1. Override any `operand_DTypes` from `signature`.
 * 2. Check if the new `operand_Dtypes` is cached (if it is, go to 4.)
 * 3. Find the best matching "loop".  This is done using multiple dispatching
 *    on all `operand_DTypes` and loop `dtypes`.  A matching loop must be
 *    one whose DTypes are superclasses of the `operand_DTypes` (that are
 *    defined).  The best matching loop must be better than any other matching
 *    loop.  This result is cached.
 * 4. If the found loop is a promoter: We call the promoter. It can modify
 *    the `operand_DTypes` currently.  Then go back to step 2.
 *    (The promoter can call arbitrary code, so it could even add the matching
 *    loop first.)
 * 5. The final `ArrayMethod` is found, its registered `dtypes` is copied
 *    into the `signature` so that it is available to the ufunc loop.
 *
 */
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
#define _MULTIARRAYMODULE
#define _UMATHMODULE

#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include <convert_datatype.h>

#include "numpy/ndarraytypes.h"
#include "numpy/npy_3kcompat.h"
#include "common.h"
#include "npy_pycompat.h"

#include "dispatching.h"
#include "dtypemeta.h"
#include "npy_hashtable.h"
#include "legacy_array_method.h"
#include "ufunc_object.h"
#include "ufunc_type_resolution.h"


#define PROMOTION_DEBUG_TRACING 0


/* forward declaration */
static inline PyObject *
promote_and_get_info_and_ufuncimpl(PyUFuncObject *ufunc,
        PyArrayObject *const ops[],
        PyArray_DTypeMeta *signature[],
        PyArray_DTypeMeta *op_dtypes[],
        npy_bool allow_legacy_promotion);


/**
 * Function to add a new loop to the ufunc.  This mainly appends it to the
 * list (as it currently is just a list).
 *
 * @param ufunc The universal function to add the loop to.
 * @param info The tuple (dtype_tuple, ArrayMethod/promoter).
 * @param ignore_duplicate If 1 and a loop with the same `dtype_tuple` is
 *        found, the function does nothing.
 */
NPY_NO_EXPORT int
/*
 * PyUFunc_AddLoop: Add a loop implementation to a ufunc object based on provided info.
 * Validates the info object to ensure it meets expected format and content.
 */
PyUFunc_AddLoop(PyUFuncObject *ufunc, PyObject *info, int ignore_duplicate)
{
    /*
     * Validate the info object, this should likely move to a different
     * entry-point in the future (and is mostly unnecessary currently).
     */
    if (!PyTuple_CheckExact(info) || PyTuple_GET_SIZE(info) != 2) {
        PyErr_SetString(PyExc_TypeError,
                "Info must be a tuple: "
                "(tuple of DTypes or None, ArrayMethod or promoter)");
        return -1;
    }

    // Extract the DType tuple from info
    PyObject *DType_tuple = PyTuple_GetItem(info, 0);

    // Check if the length of DType tuple matches the number of operands in ufunc
    if (PyTuple_GET_SIZE(DType_tuple) != ufunc->nargs) {
        PyErr_SetString(PyExc_TypeError,
                "DType tuple length does not match ufunc number of operands");
        return -1;
    }

    // Validate each item in DType tuple
    for (Py_ssize_t i = 0; i < PyTuple_GET_SIZE(DType_tuple); i++) {
        PyObject *item = PyTuple_GET_ITEM(DType_tuple, i);
        if (item != Py_None
                && !PyObject_TypeCheck(item, &PyArrayDTypeMeta_Type)) {
            PyErr_SetString(PyExc_TypeError,
                    "DType tuple may only contain None and DType classes");
            return -1;
        }
    }

    // Validate the second argument (meth_or_promoter) in info
    PyObject *meth_or_promoter = PyTuple_GET_ITEM(info, 1);
    if (!PyObject_TypeCheck(meth_or_promoter, &PyArrayMethod_Type)
            && !PyCapsule_IsValid(meth_or_promoter, "numpy._ufunc_promoter")) {
        PyErr_SetString(PyExc_TypeError,
                "Second argument to info must be an ArrayMethod or promoter");
        return -1;
    }

    // Initialize ufunc->_loops if it's NULL
    if (ufunc->_loops == NULL) {
        ufunc->_loops = PyList_New(0);
        if (ufunc->_loops == NULL) {
            return -1;
        }
    }

    // Get the current list of loops registered in ufunc
    PyObject *loops = ufunc->_loops;
    Py_ssize_t length = PyList_Size(loops);

    // Iterate through existing loops to check for duplicates
    for (Py_ssize_t i = 0; i < length; i++) {
        PyObject *item = PyList_GetItemRef(loops, i);
        PyObject *cur_DType_tuple = PyTuple_GetItem(item, 0);
        Py_DECREF(item);

        // Compare current DType tuple with the one being added
        int cmp = PyObject_RichCompareBool(cur_DType_tuple, DType_tuple, Py_EQ);
        if (cmp < 0) {
            return -1;
        }
        if (cmp == 0) {
            continue;
        }
        // If ignore_duplicate is enabled, return success
        if (ignore_duplicate) {
            return 0;
        }
        // Otherwise, raise an error for duplicate registration
        PyErr_Format(PyExc_TypeError,
                "A loop/promoter has already been registered with '%s' for %R",
                ufunc_get_name_cstr(ufunc), DType_tuple);
        return -1;
    }

    // Append the new info (tuple) to the list of loops for ufunc
    if (PyList_Append(loops, info) < 0) {
        return -1;
    }

    // Return success
    return 0;
}
    // 检查传入的对象是否为 ufunc 对象，如果不是则设置类型错误并返回 -1
    if (!PyObject_TypeCheck(ufunc, &PyUFunc_Type)) {
        PyErr_SetString(PyExc_TypeError,
                "ufunc object passed is not a ufunc!");
        return -1;
    }
    // 从给定的规格和私有数据创建一个 PyBoundArrayMethodObject 对象
    PyBoundArrayMethodObject *bmeth =
            (PyBoundArrayMethodObject *)PyArrayMethod_FromSpec_int(spec, priv);
    // 如果创建失败，则返回 -1
    if (bmeth == NULL) {
        return -1;
    }
    // 计算参数的个数，包括输入和输出参数
    int nargs = bmeth->method->nin + bmeth->method->nout;
    // 根据给定的数据类型数组创建一个元组对象
    PyObject *dtypes = PyArray_TupleFromItems(
            nargs, (PyObject **)bmeth->dtypes, 1);
    // 如果创建元组失败，则返回 -1
    if (dtypes == NULL) {
        return -1;
    }
    // 使用 dtypes 和 bmeth->method 创建一个元组对象
    PyObject *info = PyTuple_Pack(2, dtypes, bmeth->method);
    // 减少 bmeth 和 dtypes 的引用计数
    Py_DECREF(bmeth);
    Py_DECREF(dtypes);
    // 如果创建元组失败，则返回 -1
    if (info == NULL) {
        return -1;
    }
    // 调用 PyUFunc_AddLoop 将 info 传递给 ufunc 对象的添加循环方法，并返回结果
    return PyUFunc_AddLoop((PyUFuncObject *)ufunc, info, 0);
/**
 * Resolves the implementation to use, this uses typical multiple dispatching
 * methods of finding the best matching implementation or resolver.
 * (Based on `isinstance()`, the knowledge that non-abstract DTypes cannot
 * be subclassed is used, however.)
 *
 * NOTE: This currently does not take into account output dtypes which do not
 *       have to match.  The possible extension here is that if an output
 *       is given (and thus an output dtype), but not part of the signature
 *       we could ignore it for matching, but *prefer* a loop that matches
 *       better.
 *       Why is this not done currently?  First, it seems a niche feature that
 *       loops can only be distinguished based on the output dtype.  Second,
 *       there are some nasty theoretical things because:
 *
 *            np.add(f4, f4, out=f8)
 *            np.add(f4, f4, out=f8, dtype=f8)
 *
 *       are different, the first uses the f4 loop, the second the f8 loop.
 *       The problem is, that the current cache only uses the op_dtypes and
 *       both are `(f4, f4, f8)`.  The cache would need to store also which
 *       output was provided by `dtype=`/`signature=`.
 *
 * @param ufunc The universal function object representing the function to resolve.
 * @param op_dtypes The array of DTypeMeta objects representing operand types.
 * @param only_promoters Flag indicating whether to consider only promoters.
 * @param out_info Output parameter returning the best implementation information.
 *        WARNING: This is a borrowed reference!
 * @returns -1 on error, 0 on success. Note that the output can be NULL on success if nothing is found.
 */
static int
resolve_implementation_info(PyUFuncObject *ufunc,
        PyArray_DTypeMeta *op_dtypes[], npy_bool only_promoters,
        PyObject **out_info)
{
    int nin = ufunc->nin, nargs = ufunc->nargs;
    Py_ssize_t size = PySequence_Length(ufunc->_loops);
    PyObject *best_dtypes = NULL;
    PyObject *best_resolver_info = NULL;

#if PROMOTION_DEBUG_TRACING
    printf("Promoting for '%s' promoters only: %d\n",
            ufunc->name ? ufunc->name : "<unknown>", (int)only_promoters);
    printf("    DTypes: ");
    PyObject *tmp = PyArray_TupleFromItems(ufunc->nargs, op_dtypes, 1);
    PyObject_Print(tmp, stdout, 0);
    Py_DECREF(tmp);
    printf("\n");
#endif

    // Logic to resolve the best implementation goes here

    if (best_dtypes == NULL) {
        /* The non-legacy lookup failed */
        *out_info = NULL;
        return 0;
    }

    *out_info = best_resolver_info;
    return 0;
}
/*
 * 调用推广器并递归处理
 * 
 * ufunc: 要操作的通用函数对象
 * info: 一个元组，包含有关推广的信息；若为NULL，则表示进行减少特殊路径
 * op_dtypes: 操作数的数据类型元数据数组
 * signature: 签名的数据类型元数据数组
 * operands: 操作数数组
 * 返回值: 推广和解析后的信息及ufunc实现的解析结果，或者在出错时返回NULL
 */
call_promoter_and_recurse(PyUFuncObject *ufunc, PyObject *info,
        PyArray_DTypeMeta *op_dtypes[], PyArray_DTypeMeta *signature[],
        PyArrayObject *const operands[])
{
    int nargs = ufunc->nargs; // 获取ufunc对象的参数个数
    PyObject *resolved_info = NULL; // 初始化解析后的信息为NULL

    int promoter_result; // 推广器的结果
    PyArray_DTypeMeta *new_op_dtypes[NPY_MAXARGS]; // 新的操作数数据类型元数据数组

    if (info != NULL) { // 如果信息不为NULL
        PyObject *promoter = PyTuple_GET_ITEM(info, 1); // 获取元组中第二个元素作为推广器

        if (PyCapsule_CheckExact(promoter)) { // 检查推广器是否为PyCapsule类型
            /* 我们也可以选择包装Python函数... */
            PyArrayMethod_PromoterFunction *promoter_function = PyCapsule_GetPointer(
                    promoter, "numpy._ufunc_promoter"); // 获取推广器函数指针
            if (promoter_function == NULL) {
                return NULL; // 如果无法获取推广器函数指针，返回NULL
            }
            promoter_result = promoter_function((PyObject *)ufunc,
                    op_dtypes, signature, new_op_dtypes); // 调用推广器函数
        }
        else {
            PyErr_SetString(PyExc_NotImplementedError,
                    "Calling python functions for promotion is not implemented.");
            return NULL; // 如果推广器不是PyCapsule类型，返回NULL并设置错误信息
        }
        if (promoter_result < 0) { // 如果推广结果小于0，返回NULL
            return NULL;
        }

        /*
         * 如果没有数据类型发生变化，我们将会无限递归，终止。
         * （当然，仍然可能无限递归。）
         *
         * TODO: 我们可以允许用户直接信号这一点，并且还可以移动
         *       调用以几乎立即进行。这有时会不必要地调用它，
         *       但可能会增加灵活性。
         */
        int dtypes_changed = 0; // 标记数据类型是否发生变化
        for (int i = 0; i < nargs; i++) {
            if (new_op_dtypes[i] != op_dtypes[i]) { // 检查每个操作数的数据类型是否发生变化
                dtypes_changed = 1;
                break;
            }
        }
        if (!dtypes_changed) { // 如果数据类型没有发生变化，跳转到finish标签处
            goto finish;
        }
    }
    else {
        /* 减少特殊路径 */
        new_op_dtypes[0] = NPY_DT_NewRef(op_dtypes[1]); // 复制第二个操作数的数据类型为第一个
        new_op_dtypes[1] = NPY_DT_NewRef(op_dtypes[1]); // 复制第二个操作数的数据类型为第二个
        Py_XINCREF(op_dtypes[2]); // 增加第三个操作数的引用计数
        new_op_dtypes[2] = op_dtypes[2]; // 第三个操作数的数据类型保持不变
    }

    /*
     * 进行递归调用，推广函数必须确保新元组严格更精确
     * （从而保证最终完成）
     */
    if (Py_EnterRecursiveCall(" during ufunc promotion.") != 0) { // 进入递归调用检查
        goto finish; // 如果递归深度超过限制，跳转到finish标签处
    }
    resolved_info = promote_and_get_info_and_ufuncimpl(ufunc,
            operands, signature, new_op_dtypes,
            /* no legacy promotion */ NPY_FALSE); // 执行推广和获取信息及ufunc实现的解析

    Py_LeaveRecursiveCall(); // 离开递归调用

  finish: // 结束标签，释放资源
    for (int i = 0; i < nargs; i++) {
        Py_XDECREF(new_op_dtypes[i]); // 释放新操作数数据类型的引用
    }
    return resolved_info; // 返回解析后的信息
}

/*
 * 将DType 'signature'转换为旧ufunc类型解析器在'ufunc_type_resolution.c'中使用的描述符元组。
 *
 * 注意，当我们使用类型解析的旧路径而不是推广时，我们不需要传递类型元组，
 * 因为在这种情况下签名始终是正确的。
 */
/*
 * 创建一个新的类型元组，根据给定的签名数组和元组指针进行填充。
 * 如果创建过程中发生错误，将返回-1，否则返回0。
 */
static int
_make_new_typetup(
        int nop, PyArray_DTypeMeta *signature[], PyObject **out_typetup) {
    *out_typetup = PyTuple_New(nop);  // 创建一个新的元组，长度为nop，存储在out_typetup中
    if (*out_typetup == NULL) {  // 如果元组创建失败，返回-1
        return -1;
    }

    int none_count = 0;  // 记录为None的元素数量
    for (int i = 0; i < nop; i++) {  // 遍历每个元素
        PyObject *item;
        if (signature[i] == NULL) {  // 如果签名中的元素为空
            item = Py_None;  // 使用Python中的None来填充item
            none_count++;  // 计数加一
        }
        else {
            if (!NPY_DT_is_legacy(signature[i])) {  // 如果不是遗留类型
                /*
                 * 遗留类型解析无法处理这些情况。
                 * 在将来，这条路径将返回`None`或类似的值，
                 * 如果使用了遗留类型解析，则稍后设置错误。
                 */
                PyErr_SetString(PyExc_RuntimeError,
                        "Internal NumPy error: new DType in signature not yet "
                        "supported. (This should be unreachable code!)");
                Py_SETREF(*out_typetup, NULL);  // 将out_typetup设置为NULL
                return -1;  // 返回-1表示错误
            }
            item = (PyObject *)signature[i]->singleton;  // 否则使用签名对应的单例对象填充item
        }
        Py_INCREF(item);  // 增加item的引用计数
        PyTuple_SET_ITEM(*out_typetup, i, item);  // 将item放入元组的第i个位置
    }
    if (none_count == nop) {
        /* 整个签名都是None，简单地忽略类型元组 */
        Py_DECREF(*out_typetup);  // 释放类型元组的引用
        *out_typetup = NULL;  // 将类型元组设置为NULL
    }
    return 0;  // 返回0表示成功
}


/*
 * 使用遗留类型解析器填充操作的数据类型数组，并使用借用引用。这可能会更改内容，
 * 因为它将使用遗留类型解析，可以特殊处理0维数组（使用基于值的逻辑）。
 */
static int
legacy_promote_using_legacy_type_resolver(PyUFuncObject *ufunc,
        PyArrayObject *const *ops, PyArray_DTypeMeta *signature[],
        PyArray_DTypeMeta *operation_DTypes[], int *out_cacheable,
        npy_bool check_only)
{
    int nargs = ufunc->nargs;  // 获取操作数的数量
    PyArray_Descr *out_descrs[NPY_MAXARGS] = {NULL};  // 创建描述符数组，初始化为NULL

    PyObject *type_tuple = NULL;  // 创建一个Python对象指针，初始化为NULL
    if (_make_new_typetup(nargs, signature, &type_tuple) < 0) {  // 创建新的类型元组
        return -1;  // 如果创建失败，直接返回-1
    }

    /*
     * 我们使用不安全的强制转换。当然，这不准确，但在这里没关系，
     * 因为对于提升/分派来说，强制转换的安全性没有影响。
     * 实际操作数是否可以强制转换必须在类型解析步骤中检查（这可能也会调用此函数！）。
     */
    if (ufunc->type_resolver(ufunc,
            NPY_UNSAFE_CASTING, (PyArrayObject **)ops, type_tuple,
            out_descrs) < 0) {
        Py_XDECREF(type_tuple);  // 释放类型元组的引用
        /* 不是所有的遗留解析器在失败时都会清理： */
        for (int i = 0; i < nargs; i++) {
            Py_CLEAR(out_descrs[i]);  // 清理描述符数组中的每个元素
        }
        return -1;  // 返回-1表示失败
    }
    Py_XDECREF(type_tuple);  // 释放类型元组的引用
    if (NPY_UNLIKELY(check_only)) {
        /*
         * 当启用警告时，我们不替换数据类型，而只检查旧结果是否与新结果相同。
         * 由于噪音的原因，我们只在*输出*数据类型上执行此操作，忽略浮点精度变化，
         * 如 `np.float32(3.1) < 3.1` 的比较。
         */
        for (int i = ufunc->nin; i < ufunc->nargs; i++) {
            /*
             * 如果提供了输出并且新的数据类型匹配，则可能会略微损失精度，例如:
             * `np.true_divide(float32_arr0d, 1, out=float32_arr0d)`
             * （在此之前操作的是 float64，尽管这种情况可能很少见）
             */
            if (ops[i] != NULL
                    && PyArray_EquivTypenums(
                            operation_DTypes[i]->type_num,
                            PyArray_DESCR(ops[i])->type_num)) {
                continue;
            }
            /* 否则，如果数据类型不匹配，则发出警告 */
            if (!PyArray_EquivTypenums(
                    operation_DTypes[i]->type_num, out_descrs[i]->type_num)) {
                if (PyErr_WarnFormat(PyExc_UserWarning, 1,
                        "result dtype changed due to the removal of value-based "
                        "promotion from NumPy. Changed from %S to %S.",
                        out_descrs[i], operation_DTypes[i]->singleton) < 0) {
                    return -1;
                }
                return 0;
            }
        }
        return 0;
    }

    for (int i = 0; i < nargs; i++) {
        Py_XSETREF(operation_DTypes[i], NPY_DTYPE(out_descrs[i]));
        Py_INCREF(operation_DTypes[i]);
        Py_DECREF(out_descrs[i]);
    }
    /*
     * 日期时间的传统解析器忽略了签名，这在使用时应该是警告/异常（如果使用）。
     * 在这种情况下，签名被（错误地）修改，缓存是不可能的。
     */
    for (int i = 0; i < nargs; i++) {
        if (signature[i] != NULL && signature[i] != operation_DTypes[i]) {
            Py_INCREF(operation_DTypes[i]);
            Py_SETREF(signature[i], operation_DTypes[i]);
            *out_cacheable = 0;
        }
    }
    return 0;
/*
 * 注意，此函数返回一个对 info 的借用引用，因为它将 info 添加到循环中。
 */
NPY_NO_EXPORT PyObject *
add_and_return_legacy_wrapping_ufunc_loop(PyUFuncObject *ufunc,
        PyArray_DTypeMeta *operation_dtypes[], int ignore_duplicate)
{
    // 创建包含操作数据类型元组的 Python 元组对象
    PyObject *DType_tuple = PyArray_TupleFromItems(ufunc->nargs,
            (PyObject **)operation_dtypes, 0);
    if (DType_tuple == NULL) {
        return NULL;
    }

    // 创建新的遗留包装数组方法对象
    PyArrayMethodObject *method = PyArray_NewLegacyWrappingArrayMethod(
            ufunc, operation_dtypes);
    if (method == NULL) {
        Py_DECREF(DType_tuple);
        return NULL;
    }

    // 打包 DType 元组和方法对象为一个元组，作为 info
    PyObject *info = PyTuple_Pack(2, DType_tuple, method);
    Py_DECREF(DType_tuple);
    Py_DECREF(method);
    if (info == NULL) {
        return NULL;
    }

    // 将 info 添加到 ufunc 的循环列表中
    if (PyUFunc_AddLoop(ufunc, info, ignore_duplicate) < 0) {
        Py_DECREF(info);
        return NULL;
    }
    Py_DECREF(info);  /* 现在从 ufunc 的循环列表中借用引用 */
    return info;
}


/*
 * 这是查找正确的 DType 签名和 ArrayMethod 的主要实现函数，用于 ufunc。此函数可以
 * 在 `do_legacy_fallback` 设置为 False 时递归调用。
 *
 * 如果需要基于值的提升，会提前由 `promote_and_get_ufuncimpl` 处理。
 */
static inline PyObject *
promote_and_get_info_and_ufuncimpl(PyUFuncObject *ufunc,
        PyArrayObject *const ops[],
        PyArray_DTypeMeta *signature[],
        PyArray_DTypeMeta *op_dtypes[],
        npy_bool allow_legacy_promotion)
{
    /*
     * 获取分发信息，其中包含实现和 DType 签名元组。具体步骤如下：
     *
     * 1. 检查缓存。
     * 2. 检查所有注册的循环/提升器，找到最佳匹配。
     * 3. 如果找不到匹配项，则回退到遗留实现。
     */
    PyObject *info = PyArrayIdentityHash_GetItem(ufunc->_dispatch_cache,
                (PyObject **)op_dtypes);
    if (info != NULL && PyObject_TypeCheck(
            PyTuple_GET_ITEM(info, 1), &PyArrayMethod_Type)) {
        /* 找到了 ArrayMethod 并且不是提升器：返回它 */
        return info;
    }

    /*
     * 如果 `info == NULL`，从缓存加载失败，使用 `resolve_implementation_info`
     * 进行完整解析（成功时缓存其结果）。
     */
    if (info == NULL) {
        if (resolve_implementation_info(ufunc,
                op_dtypes, NPY_FALSE, &info) < 0) {
            return NULL;
        }
        if (info != NULL && PyObject_TypeCheck(
                PyTuple_GET_ITEM(info, 1), &PyArrayMethod_Type)) {
            /*
             * 找到了 ArrayMethod 并且不是提升器。在返回之前，将其添加到缓存
             * 中，以便将来更快地查找。
             */
            if (PyArrayIdentityHash_SetItem(ufunc->_dispatch_cache,
                    (PyObject **)op_dtypes, info, 0) < 0) {
                return NULL;
            }
            return info;
        }
    /*
     * 此时，如果没有匹配的循环，则 `info` 为 NULL；如果有匹配的循环，它是一个需要使用/调用的推广器。
     * TODO: 可能需要找到更好的减少解决方案，但这种方式是一个真正的后备（未注册，因此优先级最低）。
     */
    if (info != NULL || op_dtypes[0] == NULL) {
        // 调用推广器并递归处理
        info = call_promoter_and_recurse(ufunc,
                info, op_dtypes, signature, ops);
        if (info == NULL && PyErr_Occurred()) {
            return NULL;
        }
        else if (info != NULL) {
            /* 将结果使用原始类型添加到缓存中： */
            if (PyArrayIdentityHash_SetItem(ufunc->_dispatch_cache,
                    (PyObject **)op_dtypes, info, 0) < 0) {
                return NULL;
            }
            return info;
        }
    }

    /*
     * 即使使用推广器也找不到循环。
     * 推广失败，这通常应该是一个错误。
     * 但是，我们需要在这里给传统实现一个机会（它将修改 `op_dtypes`）。
     */
    if (!allow_legacy_promotion || ufunc->type_resolver == NULL ||
            (ufunc->ntypes == 0 && ufunc->userloops == NULL)) {
        /* 已经尝试过或不是“传统”的 ufunc（未找到循环，返回） */
        return NULL;
    }

    PyArray_DTypeMeta *new_op_dtypes[NPY_MAXARGS] = {NULL};
    int cacheable = 1;  /* TODO: 只有比较过时才需要这个 */
    if (legacy_promote_using_legacy_type_resolver(ufunc,
            ops, signature, new_op_dtypes, &cacheable, NPY_FALSE) < 0) {
        return NULL;
    }
    // 推广并获取信息和 ufunc 实现
    info = promote_and_get_info_and_ufuncimpl(ufunc,
            ops, signature, new_op_dtypes, NPY_FALSE);
    if (info == NULL) {
        /*
         * NOTE: This block exists solely to support numba's DUFuncs which add
         * new loops dynamically, so our list may get outdated.  Thus, we
         * have to make sure that the loop exists.
         *
         * Before adding a new loop, ensure that it actually exists. There
         * is a tiny chance that this would not work, but it would require an
         * extension additionally have a custom loop getter.
         * This check should ensure a the right error message, but in principle
         * we could try to call the loop getter here.
         */
        
        // 获取当前 ufunc 的类型字符串
        const char *types = ufunc->types;
        // 初始化循环是否存在的标志
        npy_bool loop_exists = NPY_FALSE;
        
        // 遍历 ufunc 的所有类型
        for (int i = 0; i < ufunc->ntypes; ++i) {
            loop_exists = NPY_TRUE;  /* 假设循环存在，如果不存在则中断 */
            
            // 检查每个参数类型是否与新操作的类型匹配
            for (int j = 0; j < ufunc->nargs; ++j) {
                if (types[j] != new_op_dtypes[j]->type_num) {
                    loop_exists = NPY_FALSE;
                    break;
                }
            }
            
            // 如果循环存在，则退出循环
            if (loop_exists) {
                break;
            }
            
            // 移动到下一组参数的类型字符串
            types += ufunc->nargs;
        }

        // 如果循环存在，则添加并返回旧的包装 ufunc 循环信息
        if (loop_exists) {
            info = add_and_return_legacy_wrapping_ufunc_loop(
                    ufunc, new_op_dtypes, 0);
        }
    }

    // 释放新操作数据类型的引用计数
    for (int i = 0; i < ufunc->nargs; i++) {
        Py_XDECREF(new_op_dtypes[i]);
    }

    /* 使用原始类型将此项添加到缓存中： */
    // 如果可缓存且添加到缓存失败，则返回 NULL
    if (cacheable && PyArrayIdentityHash_SetItem(ufunc->_dispatch_cache,
            (PyObject **)op_dtypes, info, 0) < 0) {
        return NULL;
    }
    
    // 返回操作信息
    return info;
/**
 * The central entry-point for the promotion and dispatching machinery.
 *
 * It currently may work with the operands (although it would be possible to
 * only work with DType (classes/types).  This is because it has to ensure
 * that legacy (value-based promotion) is used when necessary.
 *
 * NOTE: The machinery here currently ignores output arguments unless
 *       they are part of the signature.  This slightly limits unsafe loop
 *       specializations, which is important for the `ensure_reduce_compatible`
 *       fallback mode.
 *       To fix this, the caching mechanism (and dispatching) can be extended.
 *       When/if that happens, the `ensure_reduce_compatible` could be
 *       deprecated (it should never kick in because promotion kick in first).
 *
 * @param ufunc The ufunc object, used mainly for the fallback.
 * @param ops The array operands (used only for the fallback).
 * @param signature As input, the DType signature fixed explicitly by the user.
 *        The signature is *filled* in with the operation signature we end up
 *        using.
 * @param op_dtypes The operand DTypes (without casting) which are specified
 *        either by the `signature` or by an `operand`.
 *        (outputs and the second input can be NULL for reductions).
 *        NOTE: In some cases, the promotion machinery may currently modify
 *        these including clearing the output.
 * @param force_legacy_promotion If set, we have to use the old type resolution
 *        to implement value-based promotion/casting.
 * @param promoting_pyscalars Indication that some of the initial inputs were
 *        int, float, or complex.  In this case weak-scalar promotion is used
 *        which can lead to a lower result precision even when legacy promotion
 *        does not kick in: `np.int8(1) + 1` is the example.
 *        (Legacy promotion is skipped because `np.int8(1)` is also scalar)
 * @param ensure_reduce_compatible Must be set for reductions, in which case
 *        the found implementation is checked for reduce-like compatibility.
 *        If it is *not* compatible and `signature[2] != NULL`, we assume its
 *        output DType is correct (see NOTE above).
 *        If removed, promotion may require information about whether this
 *        is a reduction, so the more likely case is to always keep fixing this
 *        when necessary, but push down the handling so it can be cached.
 */
NPY_NO_EXPORT PyArrayMethodObject *
promote_and_get_ufuncimpl(PyUFuncObject *ufunc,
        PyArrayObject *const ops[],
        PyArray_DTypeMeta *signature[],
        PyArray_DTypeMeta *op_dtypes[],
        npy_bool force_legacy_promotion,
        npy_bool allow_legacy_promotion,
        npy_bool promoting_pyscalars,
        npy_bool ensure_reduce_compatible)
{
    // 获取 ufunc 的输入数量和参数数量
    int nin = ufunc->nin, nargs = ufunc->nargs;

    /*
     * Get the actual DTypes we operate with by setting op_dtypes[i] from
     * signature[i].
     */
    for (int i = 0; i < nargs; i++) {
        // 如果签名中的操作数不为空
        if (signature[i] != NULL) {
            /*
             * 忽略操作数输入，不能覆盖签名，因为它是固定的（不能被提升！）
             */
            Py_INCREF(signature[i]);  // 增加对签名对象的引用计数
            Py_XSETREF(op_dtypes[i], signature[i]);  // 设置操作数的数据类型为签名中对应的数据类型
            assert(i >= ufunc->nin || !NPY_DT_is_abstract(signature[i]));  // 断言，确保在输入数量之外或者签名不是抽象的
        }
        // 如果签名中的操作数为空，并且索引大于等于输入数量
        else if (i >= nin) {
            /*
             * 如果不在签名中，我们暂时忽略输出，这总是会得到正确的结果
             * （限制注册包含类型转换的专用循环）
             * （见 resolve_implementation_info 中的注释）
             */
            Py_CLEAR(op_dtypes[i]);  // 清除操作数的数据类型引用
        }
    }

    int current_promotion_state = get_npy_promotion_state();

    // 如果强制使用传统提升，并且当前提升状态是 NPY_USE_LEGACY_PROMOTION，并且有注册的类型或者用户自定义循环
    if (force_legacy_promotion
            && current_promotion_state == NPY_USE_LEGACY_PROMOTION
            && (ufunc->ntypes != 0 || ufunc->userloops != NULL)) {
        /*
         * 对于基于值的逻辑，我们必须使用传统提升。
         * 首先一次性调用旧解析器以获取“实际”循环数据类型。
         * 在此（额外的）提升之后，我们甚至可以使用正常的缓存。
         */
        int cacheable = 1;  // 未使用，因为我们修改了原始的 `op_dtypes`
        if (legacy_promote_using_legacy_type_resolver(ufunc,
                ops, signature, op_dtypes, &cacheable, NPY_FALSE) < 0) {
            goto handle_error;
        }
    }

    /* 暂停警告并始终使用“新”路径 */
    set_npy_promotion_state(NPY_USE_WEAK_PROMOTION);
    // 提升并获取信息及 ufuncimpl 对象
    PyObject *info = promote_and_get_info_and_ufuncimpl(ufunc,
            ops, signature, op_dtypes, allow_legacy_promotion);
    set_npy_promotion_state(current_promotion_state);

    // 如果获取信息失败，则处理错误
    if (info == NULL) {
        goto handle_error;
    }

    // 获取方法对象和所有数据类型元组
    PyArrayMethodObject *method = (PyArrayMethodObject *)PyTuple_GET_ITEM(info, 1);
    PyObject *all_dtypes = PyTuple_GET_ITEM(info, 0);

    /* 如果有必要，检查旧结果是否会有不同 */
    if (NPY_UNLIKELY(current_promotion_state == NPY_USE_WEAK_PROMOTION_AND_WARN)
            && (force_legacy_promotion || promoting_pyscalars)
            && npy_give_promotion_warnings()) {
        PyArray_DTypeMeta *check_dtypes[NPY_MAXARGS];
        for (int i = 0; i < nargs; i++) {
            check_dtypes[i] = (PyArray_DTypeMeta *)PyTuple_GET_ITEM(
                    all_dtypes, i);
        }
        /* 在调用传统提升之前，假装那是状态： */
        set_npy_promotion_state(NPY_USE_LEGACY_PROMOTION);
        // 调用传统类型解析器进行提升
        int res = legacy_promote_using_legacy_type_resolver(ufunc,
                ops, signature, check_dtypes, NULL, NPY_TRUE);
        /* 重置提升状态： */
        set_npy_promotion_state(NPY_USE_WEAK_PROMOTION_AND_WARN);
        if (res < 0) {
            goto handle_error;
        }
    }
    /*
     * 在某些情况下（仅逻辑 ufunc），我们找到的循环可能不兼容于 reduce 操作。
     * 因为机制无法区分带输出的 reduce 操作和普通的 ufunc 调用，
     * 所以我们必须假设结果的 DType 是正确的，并为输入强制指定（如果尚未强制）。
     * 注意：这假设所有的循环都是“安全的”，参见本注释中的注意事项。
     * 这一点可以放宽，如果放宽了，则可能需要缓存调用是否用于 reduce 操作。
     */
    if (ensure_reduce_compatible && signature[0] == NULL &&
            PyTuple_GET_ITEM(all_dtypes, 0) != PyTuple_GET_ITEM(all_dtypes, 2)) {
        signature[0] = (PyArray_DTypeMeta *)PyTuple_GET_ITEM(all_dtypes, 2);
        Py_INCREF(signature[0]);
        return promote_and_get_ufuncimpl(ufunc,
                ops, signature, op_dtypes,
                force_legacy_promotion, allow_legacy_promotion,
                promoting_pyscalars, NPY_FALSE);
    }

    // 遍历所有参数，确保签名与传入的 dtype 相匹配
    for (int i = 0; i < nargs; i++) {
        // 如果当前参数的签名为空，则使用传入的 dtype 作为签名
        if (signature[i] == NULL) {
            signature[i] = (PyArray_DTypeMeta *)PyTuple_GET_ITEM(all_dtypes, i);
            Py_INCREF(signature[i]);
        }
        // 否则，如果签名不匹配传入的 dtype，则处理错误
        else if ((PyObject *)signature[i] != PyTuple_GET_ITEM(all_dtypes, i)) {
            /*
             * 如果签名被强制，缓存中可能包含一个通过促进找到的不兼容循环（未强制签名）。
             * 在这种情况下，拒绝它。
             */
            goto handle_error;
        }
    }

    // 处理正常情况，返回方法
    return method;

  handle_error:
    /* 只在此处设置“未找到循环错误” */
    if (!PyErr_Occurred()) {
        raise_no_loop_found_error(ufunc, (PyObject **)op_dtypes);
    }
    /*
     * 否则，如果发生了错误，但如果错误是 DTypePromotionError，
     * 则将其链接，因为 DTypePromotionError 实际上意味着没有可用的循环。
     * （我们通过使用促进未能找到循环。）
     */
    else if (PyErr_ExceptionMatches(npy_static_pydata.DTypePromotionError)) {
        PyObject *err_type = NULL, *err_value = NULL, *err_traceback = NULL;
        PyErr_Fetch(&err_type, &err_value, &err_traceback);
        raise_no_loop_found_error(ufunc, (PyObject **)op_dtypes);
        npy_PyErr_ChainExceptionsCause(err_type, err_value, err_traceback);
    }
    // 返回空指针表示出现错误
    return NULL;
/*
 * Generic promoter used by as a final fallback on ufuncs.  Most operations are
 * homogeneous, so we can try to find the homogeneous dtype on the inputs
 * and use that.
 * We need to special case the reduction case, where op_dtypes[0] == NULL
 * is possible.
 */
NPY_NO_EXPORT int
default_ufunc_promoter(PyObject *ufunc,
        PyArray_DTypeMeta *op_dtypes[], PyArray_DTypeMeta *signature[],
        PyArray_DTypeMeta *new_op_dtypes[])
{
    /* If nin < 2 promotion is a no-op, so it should not be registered */
    PyUFuncObject *ufunc_obj = (PyUFuncObject *)ufunc;
    assert(ufunc_obj->nin > 1);  // Ensure the number of inputs is more than 1

    // Check if op_dtypes[0] is NULL, indicating a reduction operation
    if (op_dtypes[0] == NULL) {
        assert(ufunc_obj->nin == 2 && ufunc_obj->nout == 1);  /* must be reduction */
        
        // Increase reference count and set new_op_dtypes to op_dtypes[1]
        Py_INCREF(op_dtypes[1]);
        new_op_dtypes[0] = op_dtypes[1];
        Py_INCREF(op_dtypes[1]);
        new_op_dtypes[1] = op_dtypes[1];
        Py_INCREF(op_dtypes[1]);
        new_op_dtypes[2] = op_dtypes[1];

        return 0;  // Return success
    }

    PyArray_DTypeMeta *common = NULL;

    /*
     * If a signature is used and homogeneous in its outputs use that
     * (Could/should likely be rather applied to inputs also, although outs
     * only could have some advantage and input dtypes are rarely enforced.)
     */
    for (int i = ufunc_obj->nin; i < ufunc_obj->nargs; i++) {
        if (signature[i] != NULL) {
            // Set 'common' to the first non-NULL signature dtype
            if (common == NULL) {
                Py_INCREF(signature[i]);
                common = signature[i];
            } else if (common != signature[i]) {
                Py_CLEAR(common);  /* Not homogeneous, unset common */
                break;
            }
        }
    }

    /* Otherwise, use the common DType of all input operands */
    if (common == NULL) {
        // Find a common dtype among input operands
        common = PyArray_PromoteDTypeSequence(ufunc_obj->nin, op_dtypes);
        if (common == NULL) {
            if (PyErr_ExceptionMatches(PyExc_TypeError)) {
                PyErr_Clear();  /* Do not propagate normal promotion errors */
            }
            return -1;  // Return error
        }
    }

    // Set new_op_dtypes to 'common' for all input operands
    for (int i = 0; i < ufunc_obj->nargs; i++) {
        PyArray_DTypeMeta *tmp = common;
        if (signature[i]) {
            tmp = signature[i];  /* never replace a fixed one. */
        }
        Py_INCREF(tmp);
        new_op_dtypes[i] = tmp;
    }

    // For output operands beyond inputs, increment reference and set new_op_dtypes
    for (int i = ufunc_obj->nin; i < ufunc_obj->nargs; i++) {
        Py_XINCREF(op_dtypes[i]);
        new_op_dtypes[i] = op_dtypes[i];
    }

    Py_DECREF(common);  // Decrease reference count for 'common'
    return 0;  // Return success
}
/*
 * 仅处理对象类型的通用函数促进器。
 * 这个函数用于设置通用函数的输入和输出数据类型，以便处理对象类型。
 */
object_only_ufunc_promoter(PyObject *ufunc,
        PyArray_DTypeMeta *NPY_UNUSED(op_dtypes[]),
        PyArray_DTypeMeta *signature[],
        PyArray_DTypeMeta *new_op_dtypes[])
{
    // 指向对象类型的数据类型元数据
    PyArray_DTypeMeta *object_DType = &PyArray_ObjectDType;

    // 遍历通用函数的参数
    for (int i = 0; i < ((PyUFuncObject *)ufunc)->nargs; i++) {
        // 如果参数签名为空，则使用对象类型作为新的操作数据类型
        if (signature[i] == NULL) {
            Py_INCREF(object_DType);  // 增加对象类型的引用计数
            new_op_dtypes[i] = object_DType;  // 设置新的操作数据类型为对象类型
        }
    }

    return 0;  // 返回成功状态
}

/*
 * 逻辑通用函数的特殊促进器。逻辑通用函数可以始终使用??->?规则并正确输出结果
 * （只要输出不是对象类型）。
 */
static int
logical_ufunc_promoter(PyObject *NPY_UNUSED(ufunc),
        PyArray_DTypeMeta *op_dtypes[], PyArray_DTypeMeta *signature[],
        PyArray_DTypeMeta *new_op_dtypes[])
{
    /*
     * 如果我们找到任何对象类型的数据类型，我们目前会强制转换为对象类型。
     * 然而，如果输出已经指定且不是对象类型，则没有必要强制转换，
     * 最好是对输入进行安全的类型转换。
     */
    int force_object = 0;

    // 遍历三个可能的参数位置
    for (int i = 0; i < 3; i++) {
        PyArray_DTypeMeta *item;
        if (signature[i] != NULL) {
            item = signature[i];
            Py_INCREF(item);
            // 如果参数签名中的数据类型为对象类型，设置强制为对象类型标志
            if (item->type_num == NPY_OBJECT) {
                force_object = 1;
            }
        }
        else {
            /* 总是覆盖为布尔类型 */
            item = &PyArray_BoolDType;
            Py_INCREF(item);
            // 如果操作数据类型不为空且为对象类型，设置强制为对象类型标志
            if (op_dtypes[i] != NULL && op_dtypes[i]->type_num == NPY_OBJECT) {
                force_object = 1;
            }
        }
        new_op_dtypes[i] = item;  // 设置新的操作数据类型
    }

    // 如果不需要强制为对象类型或者输出数据类型不是对象类型，返回成功状态
    if (!force_object || (op_dtypes[2] != NULL
                          && op_dtypes[2]->type_num != NPY_OBJECT)) {
        return 0;
    }

    /*
     * 实际上，我们仍然必须使用对象类型循环，尽可能将所有内容设置为对象类型
     * （可能不起作用，但试试看）。
     *
     * 注意：更改此处以检查 `op_dtypes[0] == NULL`，停止为 `np.logical_and.reduce(obj_arr)` 返回 `object`，
     * 这也会影响 `np.all` 和 `np.any`！
     */
    for (int i = 0; i < 3; i++) {
        if (signature[i] != NULL) {
            continue;
        }
        // 设置新的操作数据类型为对象数据类型的引用
        Py_SETREF(new_op_dtypes[i], NPY_DT_NewRef(&PyArray_ObjectDType));
    }
    return 0;  // 返回成功状态
}

/*
 * 安装逻辑通用函数的促进器
 */
NPY_NO_EXPORT int
install_logical_ufunc_promoter(PyObject *ufunc)
{
    // 如果输入对象不是通用函数类型，则抛出运行时错误
    if (PyObject_Type(ufunc) != (PyObject *)&PyUFunc_Type) {
        PyErr_SetString(PyExc_RuntimeError,
                "internal numpy array, logical ufunc was not a ufunc?!");
        return -1;  // 返回错误状态
    }
    // 创建一个包含三个 PyArrayDescr_Type 的元组
    PyObject *dtype_tuple = PyTuple_Pack(3,
            &PyArrayDescr_Type, &PyArrayDescr_Type, &PyArrayDescr_Type, NULL);
    if (dtype_tuple == NULL) {
        return -1;  // 返回错误状态
    }
    // 创建逻辑通用函数的促进器对象
    PyObject *promoter = PyCapsule_New(&logical_ufunc_promoter,
            "numpy._ufunc_promoter", NULL);

    /* 代码块结束 */
}
    // 检查 promoter 指针是否为空
    if (promoter == NULL) {
        // 如果为空，释放 dtype_tuple 对象的引用计数并返回错误码 -1
        Py_DECREF(dtype_tuple);
        return -1;
    }

    // 将 dtype_tuple 和 promoter 封装成一个元组对象 info
    PyObject *info = PyTuple_Pack(2, dtype_tuple, promoter);
    // 释放 dtype_tuple 和 promoter 对象的引用计数
    Py_DECREF(dtype_tuple);
    Py_DECREF(promoter);

    // 检查 info 是否创建成功
    if (info == NULL) {
        // 如果创建失败，返回错误码 -1
        return -1;
    }

    // 调用 PyUFunc_AddLoop 将 info 作为参数传递给 ufunc 的 AddLoop 方法
    // 并返回其返回值作为函数的返回值
    return PyUFunc_AddLoop((PyUFuncObject *)ufunc, info, 0);
/*
 * Return the PyArrayMethodObject or PyCapsule that matches a registered
 * tuple of identical dtypes. Return a borrowed ref of the first match.
 */
NPY_NO_EXPORT PyObject *
get_info_no_cast(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtype,
                 int ndtypes)
{
    // 创建一个新的元组，用于存放指定数量的数据类型对象
    PyObject *t_dtypes = PyTuple_New(ndtypes);
    if (t_dtypes == NULL) {
        return NULL;
    }
    // 将 op_dtype 数据类型对象复制到 t_dtypes 元组中的每一个位置
    for (int i=0; i < ndtypes; i++) {
        PyTuple_SetItem(t_dtypes, i, (PyObject *)op_dtype);
    }
    // 获取 ufunc 对象的 _loops 属性，该属性应该是一个列表对象
    PyObject *loops = ufunc->_loops;
    // 获取列表 loops 的长度
    Py_ssize_t length = PyList_Size(loops);
    // 遍历 loops 列表中的每一个元素
    for (Py_ssize_t i = 0; i < length; i++) {
        // 获取列表中第 i 个元素的引用
        PyObject *item = PyList_GetItemRef(loops, i);
        // 获取 item 元素中的第一个元组，即 DType 元组
        PyObject *cur_DType_tuple = PyTuple_GetItem(item, 0);
        // 减少 item 的引用计数，这里应该是为了安全起见，避免内存泄漏
        Py_DECREF(item);
        // 比较 cur_DType_tuple 和 t_dtypes 是否相等
        int cmp = PyObject_RichCompareBool(cur_DType_tuple,
                                           t_dtypes, Py_EQ);
        // 如果比较出错，释放 t_dtypes，并返回空指针
        if (cmp < 0) {
            Py_DECREF(t_dtypes);
            return NULL;
        }
        // 如果 cur_DType_tuple 和 t_dtypes 不相等，继续下一个循环
        if (cmp == 0) {
            continue;
        }
        /* 找到匹配项 */
        // 释放 t_dtypes，并返回匹配项元组的第二个元素
        Py_DECREF(t_dtypes);
        return PyTuple_GetItem(item, 1);
    }
    // 释放 t_dtypes，并返回 None 对象
    Py_DECREF(t_dtypes);
    Py_RETURN_NONE;
}

/* UFUNC_API
 *     Register a new promoter for a ufunc.  A promoter is a function stored
 *     in a PyCapsule (see in-line comments).  It is passed the operation and
 *     requested DType signatures and can mutate it to attempt a new search
 *     for a matching loop/promoter.
 *
 * @param ufunc The ufunc object to register the promoter with.
 * @param DType_tuple A Python tuple containing DTypes or None matching the
 *        number of inputs and outputs of the ufunc.
 * @param promoter A PyCapsule with name "numpy._ufunc_promoter" containing
 *        a pointer to a `PyArrayMethod_PromoterFunction`.
 */
NPY_NO_EXPORT int
PyUFunc_AddPromoter(
        PyObject *ufunc, PyObject *DType_tuple, PyObject *promoter)
{
    // 检查 ufunc 是否为 PyUFuncObject 类型的对象
    if (!PyObject_TypeCheck(ufunc, &PyUFunc_Type)) {
        PyErr_SetString(PyExc_TypeError,
                "ufunc object passed is not a ufunc!");
        return -1;
    }
    // 检查 promoter 是否为 PyCapsule 类型的对象
    if (!PyCapsule_CheckExact(promoter)) {
        PyErr_SetString(PyExc_TypeError,
                "promoter must (currently) be a PyCapsule.");
        return -1;
    }
    // 检查 promoter 是否包含 "numpy._ufunc_promoter" 的名称
    if (PyCapsule_GetPointer(promoter, "numpy._ufunc_promoter") == NULL) {
        return -1;
    }
    // 创建一个包含 DType_tuple 和 promoter 的元组 info
    PyObject *info = PyTuple_Pack(2, DType_tuple, promoter);
    if (info == NULL) {
        return -1;
    }
    // 将 info 添加到 ufunc 对象的循环列表中，返回添加的结果
    return PyUFunc_AddLoop((PyUFuncObject *)ufunc, info, 0);
}

`.\numpy\numpy\_core\src\umath\dispatching.h`

#ifndef _NPY_DISPATCHING_H
#define _NPY_DISPATCHING_H

// 如果 _NPY_DISPATCHING_H 宏未定义，则定义它，用于防止头文件重复包含


#define _UMATHMODULE

// 定义 _UMATHMODULE 宏，用于标记当前为 ufunc 模块


#include <numpy/ufuncobject.h>
#include "array_method.h"

// 引入必要的头文件：<numpy/ufuncobject.h> 是 NumPy 中 ufunc 对象的头文件，
// "array_method.h" 是本地头文件，可能包含了一些数组方法的声明和定义


#ifdef __cplusplus
extern "C" {
#endif

// 如果是 C++ 编译环境，则声明下面的内容是用 C 语言编写的


NPY_NO_EXPORT int
PyUFunc_AddLoop(PyUFuncObject *ufunc, PyObject *info, int ignore_duplicate);

// 声明一个不导出的函数 PyUFunc_AddLoop，用于向给定的 ufunc 添加一个循环


NPY_NO_EXPORT int
PyUFunc_AddLoopFromSpec_int(PyObject *ufunc, PyArrayMethod_Spec *spec, int priv);

// 声明一个不导出的函数 PyUFunc_AddLoopFromSpec_int，从给定的规范 spec 中添加一个循环到 ufunc


NPY_NO_EXPORT PyArrayMethodObject *
promote_and_get_ufuncimpl(PyUFuncObject *ufunc,
        PyArrayObject *const ops[],
        PyArray_DTypeMeta *signature[],
        PyArray_DTypeMeta *op_dtypes[],
        npy_bool force_legacy_promotion,
        npy_bool allow_legacy_promotion,
        npy_bool promote_pyscalars,
        npy_bool ensure_reduce_compatible);

// 声明一个不导出的函数 promote_and_get_ufuncimpl，用于推广并获取 ufunc 的实现对象


NPY_NO_EXPORT PyObject *
add_and_return_legacy_wrapping_ufunc_loop(PyUFuncObject *ufunc,
        PyArray_DTypeMeta *operation_dtypes[], int ignore_duplicate);

// 声明一个不导出的函数 add_and_return_legacy_wrapping_ufunc_loop，用于添加并返回具有遗留包装的 ufunc 循环


NPY_NO_EXPORT int
default_ufunc_promoter(PyObject *ufunc,
        PyArray_DTypeMeta *op_dtypes[], PyArray_DTypeMeta *signature[],
        PyArray_DTypeMeta *new_op_dtypes[]);

// 声明一个不导出的函数 default_ufunc_promoter，用于设置默认的 ufunc 推广策略


NPY_NO_EXPORT int
object_only_ufunc_promoter(PyObject *ufunc,
        PyArray_DTypeMeta *NPY_UNUSED(op_dtypes[]),
        PyArray_DTypeMeta *signature[],
        PyArray_DTypeMeta *new_op_dtypes[]);

// 声明一个不导出的函数 object_only_ufunc_promoter，用于设置仅对象类型的 ufunc 推广策略


NPY_NO_EXPORT int
install_logical_ufunc_promoter(PyObject *ufunc);

// 声明一个不导出的函数 install_logical_ufunc_promoter，用于安装逻辑运算 ufunc 的推广策略


#ifdef __cplusplus
}
#endif

// 结束 C 语言函数声明的部分，如果是在 C++ 编译环境下，取消 C 风格的函数名修饰


#endif  /*_NPY_DISPATCHING_H */

// 结束 _NPY_DISPATCHING_H 头文件的条件编译部分

`.\numpy\numpy\_core\src\umath\extobj.c`

/* 定义宏，指定使用的 NumPy API 版本 */
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
/* 定义宏，标识当前编译环境为多维数组模块 */
#define _MULTIARRAYMODULE
/* 定义宏，标识当前编译环境为数学函数模块 */
#define _UMATHMODULE

/* 清除 PY_SSIZE_T 类型的旧定义 */
#define PY_SSIZE_T_CLEAN
/* 包含 Python 核心头文件 */
#include <Python.h>

/* 包含 NumPy 的配置文件 */
#include "npy_config.h"

/* 包含 NumPy 的命令行参数解析头文件 */
#include "npy_argparse.h"

/* 包含 NumPy 的类型转换工具函数 */
#include "conversion_utils.h"

/* 包含外部对象的头文件 */
#include "extobj.h"
/* 包含 NumPy 的通用函数对象头文件 */
#include "numpy/ufuncobject.h"

/* 包含通用的工具函数和宏定义 */
#include "common.h"

/* 定义错误处理的策略：忽略除零错误，其他错误警告 */
#define UFUNC_ERR_IGNORE 0
#define UFUNC_ERR_WARN   1
#define UFUNC_ERR_RAISE  2
#define UFUNC_ERR_CALL   3
#define UFUNC_ERR_PRINT  4
#define UFUNC_ERR_LOG    5

/* 定义错误掩码的位移和掩码值 */
#define UFUNC_MASK_DIVIDEBYZERO 0x07
#define UFUNC_MASK_OVERFLOW (0x07 << UFUNC_SHIFT_OVERFLOW)
#define UFUNC_MASK_UNDERFLOW (0x07 << UFUNC_SHIFT_UNDERFLOW)
#define UFUNC_MASK_INVALID (0x07 << UFUNC_SHIFT_INVALID)

/* 定义错误类型的位移值 */
#define UFUNC_SHIFT_DIVIDEBYZERO 0
#define UFUNC_SHIFT_OVERFLOW     3
#define UFUNC_SHIFT_UNDERFLOW    6
#define UFUNC_SHIFT_INVALID      9

/* 默认的用户错误处理模式：忽略下溢出错误，其他警告 */
#define UFUNC_ERR_DEFAULT                               \
        (UFUNC_ERR_WARN << UFUNC_SHIFT_DIVIDEBYZERO) +  \
        (UFUNC_ERR_WARN << UFUNC_SHIFT_OVERFLOW) +      \
        (UFUNC_ERR_WARN << UFUNC_SHIFT_INVALID)

/* 定义错误处理函数，根据错误类型和处理方法处理异常 */
static int
_error_handler(const char *name, int method, PyObject *pyfunc, char *errtype,
               int retstatus);

/* 定义宏，用于处理浮点数异常 */
#define HANDLEIT(NAME, str) {if (retstatus & NPY_FPE_##NAME) {          \
            handle = errmask & UFUNC_MASK_##NAME;                       \
            if (handle &&                                               \
                _error_handler(name, handle >> UFUNC_SHIFT_##NAME,      \
                               pyfunc, str, retstatus) < 0)      \
                return -1;                                              \
        }}

/* 处理浮点数异常的函数 */
static int
PyUFunc_handlefperr(
        const char *name, int errmask, PyObject *pyfunc, int retstatus)
{
    int handle;
    /* 如果有错误掩码和异常状态 */
    if (errmask && retstatus) {
        /* 处理除零错误 */
        HANDLEIT(DIVIDEBYZERO, "divide by zero");
        /* 处理溢出错误 */
        HANDLEIT(OVERFLOW, "overflow");
        /* 处理下溢出错误 */
        HANDLEIT(UNDERFLOW, "underflow");
        /* 处理无效数值错误 */
        HANDLEIT(INVALID, "invalid value");
    }
    return 0;
}

/* 取消 HANDLEIT 宏的定义 */
#undef HANDLEIT

/* 定义外部对象的析构函数，释放资源 */
static void
extobj_capsule_destructor(PyObject *capsule)
{
    /* 从 Capsule 中获取扩展对象指针 */
    npy_extobj *extobj = PyCapsule_GetPointer(capsule, "numpy.ufunc.extobj");
    /* 清理扩展对象 */
    npy_extobj_clear(extobj);
    /* 释放内存 */
    PyMem_FREE(extobj);
}

/* 创建扩展对象的 Capsule 对象 */
static PyObject *
make_extobj_capsule(npy_intp bufsize, int errmask, PyObject *pyfunc)
{
    /* 分配扩展对象的内存 */
    npy_extobj *extobj = PyMem_Malloc(sizeof(npy_extobj));
    /* 如果内存分配失败 */
    if (extobj == NULL) {
        PyErr_NoMemory();
        return NULL;
    }
    /* 设置扩展对象的属性 */
    extobj->bufsize = bufsize;
    extobj->errmask = errmask;
    Py_XINCREF(pyfunc);
    extobj->pyfunc = pyfunc;

    /* 创建并返回 Capsule 对象 */
    PyObject *capsule = PyCapsule_New(
            extobj, "numpy.ufunc.extobj",
            (destructor)&extobj_capsule_destructor);
    /* 如果创建 Capsule 失败 */
    if (capsule == NULL) {
        /* 清理扩展对象 */
        npy_extobj_clear(extobj);
        /* 释放内存 */
        PyMem_Free(extobj);
        return NULL;
    }
    return capsule;
}
/*
 * Fetch the current error/extobj state and fill it into `npy_extobj *extobj`.
 * On success, the filled `extobj` must be cleared using `npy_extobj_clear`.
 * Returns -1 on failure and 0 on success.
 */
static int
fetch_curr_extobj_state(npy_extobj *extobj)
{
    // 获取全局的 extobj 对象封装，使用默认的 extobj capsule
    PyObject *capsule;
    if (PyContextVar_Get(
            npy_static_pydata.npy_extobj_contextvar,
            npy_static_pydata.default_extobj_capsule, &capsule) < 0) {
        return -1; // 获取失败，返回错误状态
    }
    // 从 capsule 中获取指向 npy_extobj 结构体的指针
    npy_extobj *obj = PyCapsule_GetPointer(capsule, "numpy.ufunc.extobj");
    if (obj == NULL) {
        Py_DECREF(capsule);
        return -1; // 获取指针失败，返回错误状态
    }

    // 将获取到的 extobj 数据填充到传入的 extobj 结构体中
    extobj->bufsize = obj->bufsize;
    extobj->errmask = obj->errmask;
    extobj->pyfunc = obj->pyfunc;
    Py_INCREF(extobj->pyfunc); // 增加 Python 函数对象的引用计数，避免被释放

    Py_DECREF(capsule); // 释放 capsule 对象的引用
    return 0; // 返回成功状态
}


NPY_NO_EXPORT int
init_extobj(void)
{
    // 创建默认的 extobj capsule
    npy_static_pydata.default_extobj_capsule = make_extobj_capsule(
            NPY_BUFSIZE, UFUNC_ERR_DEFAULT, Py_None);
    if (npy_static_pydata.default_extobj_capsule == NULL) {
        return -1; // 创建失败，返回错误状态
    }
    // 创建并初始化 extobj 的上下文变量
    npy_static_pydata.npy_extobj_contextvar = PyContextVar_New(
            "numpy.ufunc.extobj", npy_static_pydata.default_extobj_capsule);
    if (npy_static_pydata.npy_extobj_contextvar == NULL) {
        Py_CLEAR(npy_static_pydata.default_extobj_capsule);
        return -1; // 创建失败，释放之前分配的 capsule 并返回错误状态
    }
    return 0; // 返回成功状态
}


/*
 * Parsing helper for extobj_seterrobj to extract the modes
 * "ignore", "raise", etc.
 */
static int
errmodeconverter(PyObject *obj, int *mode)
{
    if (obj == Py_None) {
        return 1; // 如果输入是 Py_None，返回成功状态，并将 mode 设置为 1
    }
    int i = 0;
    // 遍历预定义的错误模式字符串，与输入对象进行比较
    for (; i <= UFUNC_ERR_LOG; i++) {
        // 使用 PyObject_RichCompareBool 检查是否匹配错误模式字符串
        int eq = PyObject_RichCompareBool(
                obj, npy_interned_str.errmode_strings[i], Py_EQ);
        if (eq == -1) {
            return 0; // 比较失败，返回错误状态
        }
        else if (eq) {
            break; // 找到匹配的错误模式，跳出循环
        }
    }
    // 如果未找到匹配的错误模式，抛出 ValueError 异常
    if (i > UFUNC_ERR_LOG) {
        PyErr_Format(PyExc_ValueError, "invalid error mode %.100R", obj);
        return 0; // 返回错误状态
    }

    *mode = i; // 将匹配的错误模式索引赋值给 mode
    return 1; // 返回成功状态
 }


/*
 * This function is currently exposed as `umath._seterrobj()`, it is private
 * and returns a capsule representing the errstate.  This capsule is then
 * assigned to the `_extobj_contextvar` in Python.
 */
NPY_NO_EXPORT PyObject *
extobj_make_extobj(PyObject *NPY_UNUSED(mod),
        PyObject *const *args, Py_ssize_t len_args, PyObject *kwnames)
{
    // 初始化错误模式变量
    int all_mode = -1;
    int divide_mode = -1;
    int over_mode = -1;
    int under_mode = -1;
    int invalid_mode = -1;
    npy_intp bufsize = -1;
    PyObject *pyfunc = NULL;

    // 解析传入的参数和关键字参数
    NPY_PREPARE_ARGPARSER;

    // 函数体未提供完整，请参阅原始文档或源代码了解更多细节
}
    # 调用函数 npy_parse_arguments，解析传入的参数并设置错误处理模式和缓冲区大小
    if (npy_parse_arguments("_seterrobj", args, len_args, kwnames,
            "$all", &errmodeconverter, &all_mode,
            "$divide", &errmodeconverter, &divide_mode,
            "$over", &errmodeconverter, &over_mode,
            "$under", &errmodeconverter, &under_mode,
            "$invalid", &errmodeconverter, &invalid_mode,
            "$bufsize", &PyArray_IntpFromPyIntConverter, &bufsize,
            "$call", NULL, &pyfunc,
            NULL, NULL, NULL) < 0) {
        return NULL;
    }

    /* 检查新的缓冲区大小是否有效（负数表示不改变） */
    if (bufsize >= 0) {
        if (bufsize > 10e6) {
            PyErr_Format(PyExc_ValueError,
                    "Buffer size, %" NPY_INTP_FMT ", is too big",
                    bufsize);
            return NULL;
        }
        if (bufsize < 5) {
            PyErr_Format(PyExc_ValueError,
                    "Buffer size, %" NPY_INTP_FMT ", is too small",
                    bufsize);
            return NULL;
        }
        if (bufsize % 16 != 0) {
            PyErr_Format(PyExc_ValueError,
                    "Buffer size, %" NPY_INTP_FMT ", is not a multiple of 16",
                    bufsize);
            return NULL;
        }
    }

    /* 验证 pyfunc 是否为 None、可调用对象，或者具有可调用的 write 方法 */
    if (pyfunc != NULL && pyfunc != Py_None && !PyCallable_Check(pyfunc)) {
        PyObject *temp;
        temp = PyObject_GetAttrString(pyfunc, "write");
        if (temp == NULL || !PyCallable_Check(temp)) {
            PyErr_SetString(PyExc_TypeError,
                            "python object must be callable or have "
                            "a callable write method");
            Py_XDECREF(temp);
            return NULL;
        }
        Py_DECREF(temp);
    }

    /* 获取当前的 extobj 状态，如果获取失败则返回 NULL */
    npy_extobj extobj;
    if (fetch_curr_extobj_state(&extobj) < 0) {
        return NULL;
    }

    /* 根据 all_mode 设置默认错误处理模式 */
    if (all_mode != -1) {
        /* 如果传入了 all_mode，则用它来设置未明确指定的其他模式 */
        divide_mode = divide_mode == -1 ? all_mode : divide_mode;
        over_mode = over_mode == -1 ? all_mode : over_mode;
        under_mode = under_mode == -1 ? all_mode : under_mode;
        invalid_mode = invalid_mode == -1 ? all_mode : invalid_mode;
    }
    /* 根据 divide_mode 设置 extobj 的错误掩码 */
    if (divide_mode != -1) {
        extobj.errmask &= ~UFUNC_MASK_DIVIDEBYZERO;
        extobj.errmask |= divide_mode << UFUNC_SHIFT_DIVIDEBYZERO;
    }
    /* 根据 over_mode 设置 extobj 的错误掩码 */
    if (over_mode != -1) {
        extobj.errmask &= ~UFUNC_MASK_OVERFLOW;
        extobj.errmask |= over_mode << UFUNC_SHIFT_OVERFLOW;
    }
    /* 根据 under_mode 设置 extobj 的错误掩码 */
    if (under_mode != -1) {
        extobj.errmask &= ~UFUNC_MASK_UNDERFLOW;
        extobj.errmask |= under_mode << UFUNC_SHIFT_UNDERFLOW;
    }
    /* 根据 invalid_mode 设置 extobj 的错误掩码 */
    if (invalid_mode != -1) {
        extobj.errmask &= ~UFUNC_MASK_INVALID;
        extobj.errmask |= invalid_mode << UFUNC_SHIFT_INVALID;
    }

    /* 如果缓冲区大小大于 0，则设置 extobj 的缓冲区大小 */
    if (bufsize > 0) {
        extobj.bufsize = bufsize;
    }
    // 检查传入的 pyfunc 是否为非空指针
    if (pyfunc != NULL) {
        // 增加 pyfunc 的引用计数，防止其被释放
        Py_INCREF(pyfunc);
        // 将传入的 pyfunc 设置为 extobj 结构体中的 pyfunc 成员
        Py_SETREF(extobj.pyfunc, pyfunc);
    }
    // 使用 extobj 结构体中的 bufsize、errmask 和 pyfunc 成员创建一个新的 Capsule 对象
    PyObject *capsule = make_extobj_capsule(
            extobj.bufsize, extobj.errmask, extobj.pyfunc);
    // 清空 extobj 结构体的内容，准备返回 Capsule 对象
    npy_extobj_clear(&extobj);
    // 返回创建的 Capsule 对象
    return capsule;
/*
 * For inspection purposes, allow fetching a dictionary representing the
 * current extobj/errobj.
 */
NPY_NO_EXPORT PyObject *
extobj_get_extobj_dict(PyObject *NPY_UNUSED(mod), PyObject *NPY_UNUSED(noarg))
{
    PyObject *result = NULL, *bufsize_obj = NULL;
    npy_extobj extobj;
    int mode;

    // 获取当前的 extobj 状态，如果获取失败则跳转到失败处理标签
    if (fetch_curr_extobj_state(&extobj) < 0) {
        goto fail;
    }

    // 创建一个新的空字典对象用于存储结果
    result = PyDict_New();
    if (result == NULL) {
        goto fail;
    }

    /* 设置所有的错误模式：*/

    // 设置除零错误处理模式
    mode = (extobj.errmask & UFUNC_MASK_DIVIDEBYZERO) >> UFUNC_SHIFT_DIVIDEBYZERO;
    if (PyDict_SetItemString(result, "divide",
                             npy_interned_str.errmode_strings[mode]) < 0) {
        goto fail;
    }

    // 设置溢出错误处理模式
    mode = (extobj.errmask & UFUNC_MASK_OVERFLOW) >> UFUNC_SHIFT_OVERFLOW;
    if (PyDict_SetItemString(result, "over",
                             npy_interned_str.errmode_strings[mode]) < 0) {
        goto fail;
    }

    // 设置下溢错误处理模式
    mode = (extobj.errmask & UFUNC_MASK_UNDERFLOW) >> UFUNC_SHIFT_UNDERFLOW;
    if (PyDict_SetItemString(result, "under",
                             npy_interned_str.errmode_strings[mode]) < 0) {
        goto fail;
    }

    // 设置无效操作错误处理模式
    mode = (extobj.errmask & UFUNC_MASK_INVALID) >> UFUNC_SHIFT_INVALID;
    if (PyDict_SetItemString(result, "invalid",
                             npy_interned_str.errmode_strings[mode]) < 0) {
        goto fail;
    }

    /* 设置可调用对象：*/

    // 将 extobj.pyfunc 设置为 "call" 字段的值
    if (PyDict_SetItemString(result, "call", extobj.pyfunc) < 0) {
        goto fail;
    }

    /* 设置 bufsize 字段：*/

    // 将 extobj.bufsize 转换为 Python 的长整型对象
    bufsize_obj = PyLong_FromSsize_t(extobj.bufsize);
    if (bufsize_obj == NULL) {
        goto fail;
    }
    // 将 bufsize_obj 设置为 "bufsize" 字段的值
    if (PyDict_SetItemString(result, "bufsize", bufsize_obj) < 0) {
        goto fail;
    }
    Py_DECREF(bufsize_obj);

    // 清理并释放 extobj 结构体
    npy_extobj_clear(&extobj);

    // 返回结果字典
    return result;

  fail:
    // 失败时释放申请的资源并返回 NULL
    Py_XDECREF(result);
    Py_XDECREF(bufsize_obj);
    npy_extobj_clear(&extobj);
    return NULL;
}
    # 如果发生错误但只是发出警告，使用 PyErr_Warn 发出 RuntimeWarning 警告
    case UFUNC_ERR_WARN:
        # 根据错误类型和函数名格式化错误消息
        PyOS_snprintf(msg, sizeof(msg), "%s encountered in %s", errtype, name);
        # 发出 RuntimeWarning 警告，如果发生错误返回负值则跳转到 fail 标签处
        if (PyErr_Warn(PyExc_RuntimeWarning, msg) < 0) {
            goto fail;
        }
        break;
        
    # 如果发生错误需要抛出异常，使用 PyErr_Format 抛出 FloatingPointError 异常
    case UFUNC_ERR_RAISE:
        PyErr_Format(PyExc_FloatingPointError, "%s encountered in %s",
                errtype, name);
        goto fail;
        
    # 如果发生错误需要调用 Python 回调函数，处理 UFUNC_ERR_CALL 情况
    case UFUNC_ERR_CALL:
        # 如果 pyfunc 是 None，则抛出 NameError 异常
        if (pyfunc == Py_None) {
            PyErr_Format(PyExc_NameError,
                    "python callback specified for %s (in " \
                    " %s) but no function found.",
                    errtype, name);
            goto fail;
        }
        # 构建参数 args，包含错误类型和返回状态的 Python 对象
        args = Py_BuildValue("NN", PyUnicode_FromString(errtype),
                PyLong_FromLong((long) retstatus));
        # 如果构建参数失败，则跳转到 fail 标签处
        if (args == NULL) {
            goto fail;
        }
        # 调用 pyfunc 函数，并传入参数 args，处理返回结果
        ret = PyObject_CallObject(pyfunc, args);
        Py_DECREF(args);
        # 如果调用失败，则跳转到 fail 标签处
        if (ret == NULL) {
            goto fail;
        }
        Py_DECREF(ret);
        break;
        
    # 如果发生错误需要记录日志，处理 UFUNC_ERR_LOG 情况
    case UFUNC_ERR_LOG:
        # 如果 pyfunc 是 None，则抛出 NameError 异常
        if (pyfunc == Py_None) {
            PyErr_Format(PyExc_NameError,
                    "log specified for %s (in %s) but no " \
                    "object with write method found.",
                    errtype, name);
            goto fail;
        }
        # 根据错误类型和函数名格式化警告消息
        PyOS_snprintf(msg, sizeof(msg),
                "Warning: %s encountered in %s\n", errtype, name);
        # 调用 pyfunc 对象的 write 方法，写入警告消息
        ret = PyObject_CallMethod(pyfunc, "write", "s", msg);
        # 如果调用失败，则跳转到 fail 标签处
        if (ret == NULL) {
            goto fail;
        }
        Py_DECREF(ret);
        break;
    }
    # 禁用 C API
    NPY_DISABLE_C_API;
    # 返回 0 表示处理完成
    return 0;
/*
 * Disable the C API and return -1 to indicate failure.
 */
fail:
    NPY_DISABLE_C_API;
    return -1;
}


/*
 * Extracts some values from the global pyvals tuple.
 * all destinations may be NULL, in which case they are not retrieved
 * ref - should hold the global tuple
 * name - is the name of the ufunc (ufuncobj->name)
 *
 * bufsize - receives the buffer size to use
 * errmask - receives the bitmask for error handling
 * pyfunc - receives the python object to call with the error,
 *          if an error handling method is 'call'
 */
static int
_extract_pyvals(int *bufsize, int *errmask, PyObject **pyfunc)
{
    npy_extobj extobj;
    // Fetches the current state of the extended objects (extobj)
    if (fetch_curr_extobj_state(&extobj) < 0) {
        return -1;
    }

    // Retrieve bufsize if not NULL
    if (bufsize != NULL) {
        *bufsize = extobj.bufsize;
    }

    // Retrieve errmask if not NULL
    if (errmask != NULL) {
        *errmask = extobj.errmask;
    }

    // Retrieve pyfunc if not NULL and increment its reference count
    if (pyfunc != NULL) {
        *pyfunc = extobj.pyfunc;
        Py_INCREF(*pyfunc);
    }
    // Clears the extobj structure
    npy_extobj_clear(&extobj);
    return 0;
}

/*UFUNC_API
 * Signal a floating point error respecting the error signaling setting in
 * the NumPy errstate. Takes the name of the operation to use in the error
 * message and an integer flag that is one of NPY_FPE_DIVIDEBYZERO,
 * NPY_FPE_OVERFLOW, NPY_FPE_UNDERFLOW, NPY_FPE_INVALID to indicate
 * which errors to check for.
 *
 * Returns -1 on failure (an error was raised) and 0 on success.
 */
NPY_NO_EXPORT int
PyUFunc_GiveFloatingpointErrors(const char *name, int fpe_errors)
{
    int bufsize, errmask;
    PyObject *pyfunc = NULL;

    // Extracts bufsize, errmask, and pyfunc from the global pyvals tuple
    if (_extract_pyvals(&bufsize, &errmask, &pyfunc) < 0) {
        Py_XDECREF(pyfunc);
        return -1;
    }
    // Handles floating point errors using extracted parameters
    if (PyUFunc_handlefperr(name, errmask, pyfunc, fpe_errors)) {
        Py_XDECREF(pyfunc);
        return -1;
    }
    Py_XDECREF(pyfunc);
    return 0;
}


/*
 * check the floating point status
 *  - errmask: mask of status to check
 *  - extobj: ufunc pyvals object
 *            may be null, in which case the thread global one is fetched
 *  - ufunc_name: name of ufunc
 */
NPY_NO_EXPORT int
_check_ufunc_fperr(int errmask, const char *ufunc_name) {
    int fperr;
    PyObject *pyfunc = NULL;
    int ret;

    // If errmask is 0, no error checking is needed
    if (!errmask) {
        return 0;
    }
    // Fetches the floating point status using the ufunc name
    fperr = npy_get_floatstatus_barrier((char*)ufunc_name);
    // If no floating point errors occurred, return success
    if (!fperr) {
        return 0;
    }

    // Retrieves pyvals tuple and handles floating point errors
    if (_extract_pyvals(NULL, NULL, &pyfunc) < 0) {
        Py_XDECREF(pyfunc);
        return -1;
    }

    ret = PyUFunc_handlefperr(ufunc_name, errmask, pyfunc, fperr);
    Py_XDECREF(pyfunc);

    return ret;
}


NPY_NO_EXPORT int
_get_bufsize_errmask(int *buffersize, int *errormask)
{
    // Retrieves bufsize and errmask using _extract_pyvals function
    return _extract_pyvals(buffersize, errormask, NULL);
}

`.\numpy\numpy\_core\src\umath\extobj.h`

#ifndef _NPY_PRIVATE__EXTOBJ_H_
#define _NPY_PRIVATE__EXTOBJ_H_

#include <numpy/ndarraytypes.h>  /* for NPY_NO_EXPORT */

/*
 * Represent the current ufunc error (and buffer) state.  we are using a
 * capsule for now to store this, but it could make sense to refactor it into
 * a proper (immutable) object.
 * NOTE: Part of this information should be integrated into the public API
 *       probably.  We expect extending it e.g. with a "fast" flag.
 *       (although the public only needs to know *if* errors are checked, not
 *       what we do with them, like warn, raise, ...).
 */
typedef struct {
    int errmask;                // 错误掩码，用于表示错误状态
    npy_intp bufsize;           // 缓冲区大小，用于表示缓冲区状态
    PyObject *pyfunc;           // Python 对象，用于存储相关函数对象
} npy_extobj;

/* Clearing is only `pyfunc` XDECREF, but could grow in principle */
static inline void
npy_extobj_clear(npy_extobj *extobj)
{
    Py_XDECREF(extobj->pyfunc); // 释放 extobj 结构体中的 pyfunc 对象
}

NPY_NO_EXPORT int
_check_ufunc_fperr(int errmask, const char *ufunc_name);

NPY_NO_EXPORT int
_get_bufsize_errmask(int *buffersize, int *errormask);

NPY_NO_EXPORT int
init_extobj(void);   // 初始化 extobj 结构体

/*
 * Private Python exposure of the extobj.
 */
NPY_NO_EXPORT PyObject *
extobj_make_extobj(PyObject *NPY_UNUSED(mod),
        PyObject *const *args, Py_ssize_t len_args, PyObject *kwnames);

NPY_NO_EXPORT PyObject *
extobj_get_extobj_dict(PyObject *NPY_UNUSED(mod), PyObject *NPY_UNUSED(noarg));

#endif


这段代码是一个 C 语言头文件，主要定义了一个结构体 `npy_extobj` 和若干函数。注释解释了结构体字段的含义，以及每个函数的作用和用途。

`.\numpy\numpy\_core\src\umath\fast_loop_macros.h`

/**
 * 宏定义，用于构建快速的ufunc内部循环。
 *
 * 这些宏期望能够访问典型ufunc循环的参数，
 *
 *     char **args
 *     npy_intp const *dimensions
 *     npy_intp const *steps
 */
#ifndef _NPY_UMATH_FAST_LOOP_MACROS_H_
#define _NPY_UMATH_FAST_LOOP_MACROS_H_

#include <assert.h>

#include "simd/simd.h"

/*
 * numpy支持的最大SIMD向量大小（字节）
 * 目前是一个非常大的值，因为它仅用于内存重叠检查
 */
#if NPY_SIMD > 0
    // 足够用于编译器展开
    #define AUTOVEC_OVERLAP_SIZE NPY_SIMD_WIDTH*4
#else
    #define AUTOVEC_OVERLAP_SIZE 1024
#endif

/*
 * MAX_STEP_SIZE用于确定是否需要使用ufunc的SIMD版本。
 * 非常大的步长可能比使用标量处理还要慢。选择2097152（= 2MB）的值是基于两方面的考虑：
 * 1）典型的Linux内核页面大小为4KB，但有时也可能是2MB，与步长大小这么大相比，
 *    可能会导致16个不同页面上的所有加载/存储散射指令变慢。
 * 2）它还满足MAX_STEP_SIZE*16/esize < NPY_MAX_INT32的条件，这使得我们可以使用i32版本的gather/scatter
 *    （而不是i64版本），因为步长大于NPY_MAX_INT32*esize/16将需要使用i64gather/scatter。
 *    esize = 元素大小 = 浮点数/双精度数的4/8字节。
 */
#define MAX_STEP_SIZE 2097152

/**
 * 计算两个指针之间的绝对偏移量。
 *
 * @param a 指针a
 * @param b 指针b
 * @return 两个指针之间的绝对偏移量
 */
static inline npy_uintp
abs_ptrdiff(char *a, char *b)
{
    return (a > b) ? (a - b) : (b - a);
}

/**
 * 简单的未优化循环宏，以并行方式迭代ufunc参数。
 * @{
 */

/** (<ignored>) -> (op1) */
#define OUTPUT_LOOP\
    char *op1 = args[1];\
    npy_intp os1 = steps[1];\
    npy_intp n = dimensions[0];\
    npy_intp i;\
    for(i = 0; i < n; i++, op1 += os1)

/** (ip1) -> (op1) */
#define UNARY_LOOP\
    char *ip1 = args[0], *op1 = args[1];\
    npy_intp is1 = steps[0], os1 = steps[1];\
    npy_intp n = dimensions[0];\
    npy_intp i;\
    for(i = 0; i < n; i++, ip1 += is1, op1 += os1)

/** (ip1) -> (op1, op2) */
#define UNARY_LOOP_TWO_OUT\
    char *ip1 = args[0], *op1 = args[1], *op2 = args[2];\
    npy_intp is1 = steps[0], os1 = steps[1], os2 = steps[2];\
    npy_intp n = dimensions[0];\
    npy_intp i;\
    for(i = 0; i < n; i++, ip1 += is1, op1 += os1, op2 += os2)

#define BINARY_DEFS\
    char *ip1 = args[0], *ip2 = args[1], *op1 = args[2];\
    npy_intp is1 = steps[0], is2 = steps[1], os1 = steps[2];\
    npy_intp n = dimensions[0];\
    npy_intp i;\

#define BINARY_LOOP_SLIDING\
    for(i = 0; i < n; i++, ip1 += is1, ip2 += is2, op1 += os1)

/** (ip1, ip2) -> (op1) */
#define BINARY_LOOP\
    BINARY_DEFS\
    BINARY_LOOP_SLIDING

/** (ip1, ip2) -> (op1, op2) */
#define BINARY_LOOP_TWO_OUT\
    char *ip1 = args[0], *ip2 = args[1], *op1 = args[2], *op2 = args[3];\
    // 定义并初始化四个变量，分别表示输入步长和输出步长
    npy_intp is1 = steps[0], is2 = steps[1], os1 = steps[2], os2 = steps[3];
    // 获取维度数组的第一个元素，表示循环的次数
    npy_intp n = dimensions[0];
    // 定义循环变量 i，并进行循环操作
    npy_intp i;
    // 循环迭代 n 次，每次迭代更新输入指针 ip1 和 ip2，输出指针 op1 和 op2
    for(i = 0; i < n; i++, ip1 += is1, ip2 += is2, op1 += os1, op2 += os2)
/**
 * 定义一个三元循环宏，接受四个参数，分别是输入指针和步长，以及输出指针和步长。
 * 在循环中，使用指定的步长迭代输入指针，直到迭代完成所有给定的维度。
 */
#define TERNARY_LOOP\
    char *ip1 = args[0], *ip2 = args[1], *ip3 = args[2], *op1 = args[3];\
    npy_intp is1 = steps[0], is2 = steps[1], is3 = steps[2], os1 = steps[3];\
    npy_intp n = dimensions[0];\
    npy_intp i;\
    for(i = 0; i < n; i++, ip1 += is1, ip2 += is2, ip3 += is3, op1 += os1)

/** @} */

/**
 * 检查一元循环是否输入和输出是连续的。
 * 这里要求输入和输出的步长应该分别等于输入类型和输出类型的大小。
 */
#define IS_UNARY_CONT(tin, tout) (steps[0] == sizeof(tin) && \
                                  steps[1] == sizeof(tout))

/**
 * 检查输出是否是连续的。
 * 这里要求输出的步长应该等于输出类型的大小。
 */
#define IS_OUTPUT_CONT(tout) (steps[1] == sizeof(tout))

/*
 * 确保维度是非零的，使用断言来检查，以便后续代码可以忽略访问无效内存的问题。
 */
#define IS_BINARY_REDUCE (assert(dimensions[0] != 0), \
        (args[0] == args[2])\
        && (steps[0] == steps[2])\
        && (steps[0] == 0))

/**
 * 检查二元约简操作是否输入是连续的。
 * 这里要求第一个输入的步长应该等于第一个输入类型的大小。
 */
#define IS_BINARY_REDUCE_INPUT_CONT(tin) (assert(dimensions[0] != 0), \
         steps[1] == sizeof(tin))

/**
 * 检查二元循环是否输入和输出是连续的。
 * 这里要求第一个和第二个输入的步长应该等于输入类型的大小，输出的步长应该等于输出类型的大小。
 */
#define IS_BINARY_CONT(tin, tout) (steps[0] == sizeof(tin) && \
                                   steps[1] == sizeof(tin) && \
                                   steps[2] == sizeof(tout))

/**
 * 检查二元循环是否第一个输入是标量，而第二个输入和输出是连续的。
 * 这里要求第一个输入的步长为0，第二个输入的步长应该等于输入类型的大小，输出的步长应该等于输出类型的大小。
 */
#define IS_BINARY_CONT_S1(tin, tout) (steps[0] == 0 && \
                                   steps[1] == sizeof(tin) && \
                                   steps[2] == sizeof(tout))

/**
 * 检查二元循环是否第二个输入是标量，而第一个输入和输出是连续的。
 * 这里要求第一个输入的步长应该等于输入类型的大小，第二个输入的步长为0，输出的步长应该等于输出类型的大小。
 */
#define IS_BINARY_CONT_S2(tin, tout) (steps[0] == sizeof(tin) && \
                                   steps[1] == 0 && \
                                   steps[2] == sizeof(tout))

/*
 * 带有连续性特化的循环宏。
 * op 应该是处理输入类型为 `tin` 的 `in`，并将结果存储在 `tout *out` 中的代码。
 * 结合 NPY_GCC_OPT_3 可以允许自动向量化，应仅在值得避免代码膨胀时使用。
 */
#define BASE_UNARY_LOOP(tin, tout, op) \
    UNARY_LOOP { \
        const tin in = *(tin *)ip1; \
        tout *out = (tout *)op1; \
        op; \
    }

/**
 * 快速的一元循环宏。
 * 如果输入和输出是连续的，条件允许编译器优化通用宏。
 * 如果输入和输出是相同的，使用 BASE_UNARY_LOOP 处理。
 * 否则，使用 BASE_UNARY_LOOP 处理。
 */
#define UNARY_LOOP_FAST(tin, tout, op)          \
    do { \
        if (IS_UNARY_CONT(tin, tout)) { \
            if (args[0] == args[1]) { \
                BASE_UNARY_LOOP(tin, tout, op) \
            } \
            else { \
                BASE_UNARY_LOOP(tin, tout, op) \
            } \
        } \
        else { \
            BASE_UNARY_LOOP(tin, tout, op) \
        } \
    } \
    while (0)

/*
 * 带有连续性特化的循环宏。
 * op 应该是处理输入类型为 `tin` 的 `in1` 和 `in2`，并将结果存储在 `tout *out` 中的代码。
 * 结合 NPY_GCC_OPT_3 可以允许自动向量化，应仅在值得避免代码膨胀时使用。
 */
#define BASE_BINARY_LOOP(tin, tout, op) \
    BINARY_LOOP { \
        # 宏定义 BINARY_LOOP 的开始，这是一个多行宏
        const tin in1 = *(tin *)ip1; \
        # 定义并初始化变量 in1，类型为 tin，从指针 ip1 解引用得到值
        const tin in2 = *(tin *)ip2; \
        # 定义并初始化变量 in2，类型为 tin，从指针 ip2 解引用得到值
        tout *out = (tout *)op1; \
        # 定义输出指针 out，类型为 tout，指向 op1 所指向的地址
        op; \
        # 调用宏中的 op 操作，这里假设它会使用 in1、in2 和 out 进行某种运算
    }
/*
 * 定义了一个宏 `IVDEP_LOOP`，根据 GCC 编译器版本选择性地插入 GCC ivdep 声明，用于向编译器提示向量化优化信息。
 * 在 GCC 6 及以上版本中，使用 `_Pragma("GCC ivdep")` 实现向量化优化。
 * 在旧版本的 GCC 中，定义为空。
 */
#if __GNUC__ >= 6
#define IVDEP_LOOP _Pragma("GCC ivdep")
#else
#define IVDEP_LOOP
#endif

/*
 * 定义了一个宏 `BASE_BINARY_LOOP_INP`，用于执行基本的二进制操作循环，支持向量化优化。
 * 宏接受输入参数 `tin`（输入类型）、`tout`（输出类型）、`op`（操作），其中 `BINARY_DEFS` 是一个未定义的宏。
 * 使用 `IVDEP_LOOP` 声明以尝试向量化优化循环。
 * 循环遍历 `n` 次，依次处理 `ip1`、`ip2` 指针，以及 `op1` 指针，每次迭代更新指针位置。
 * 从 `ip1`、`ip2` 中读取 `tin` 类型的数据，并将结果存储为 `tout` 类型的数据。
 */
#define BASE_BINARY_LOOP_INP(tin, tout, op) \
    BINARY_DEFS\
    IVDEP_LOOP \
    for(i = 0; i < n; i++, ip1 += is1, ip2 += is2, op1 += os1) { \
        const tin in1 = *(tin *)ip1; \
        const tin in2 = *(tin *)ip2; \
        tout *out = (tout *)op1; \
        op; \
    }

/*
 * 定义了一个宏 `BASE_BINARY_LOOP_S`，用于执行基本的二进制操作循环，不支持向量化优化。
 * 宏接受输入参数 `tin`（输入类型）、`tout`（输出类型）、`cin`（常量输入）、`vin`（变量输入）、`op`（操作）。
 * 首先读取 `cin` 常量输入，然后执行循环处理 `vin` 变量输入。
 * 每次迭代更新 `op1` 指针，并从 `vinp` 中读取 `tin` 类型数据，最终将结果存储为 `tout` 类型数据。
 */
#define BASE_BINARY_LOOP_S(tin, tout, cin, cinp, vin, vinp, op) \
    const tin cin = *(tin *)cinp; \
    BINARY_LOOP { \
        const tin vin = *(tin *)vinp; \
        tout *out = (tout *)op1; \
        op; \
    }

/*
 * 定义了一个宏 `BASE_BINARY_LOOP_S_INP`，用于执行基本的二进制操作循环，不支持向量化优化。
 * 与 `BASE_BINARY_LOOP_S` 类似，不同之处在于输出的位置由 `vinp` 决定。
 * 每次迭代更新 `vinp` 指针，并从 `vinp` 中读取 `tin` 类型数据，最终将结果存储在 `vinp` 指向的位置。
 */
#define BASE_BINARY_LOOP_S_INP(tin, tout, cin, cinp, vin, vinp, op) \
    const tin cin = *(tin *)cinp; \
    BINARY_LOOP { \
        const tin vin = *(tin *)vinp; \
        tout *out = (tout *)vinp; \
        op; \
    }

/*
 * 定义了一个宏 `BINARY_LOOP_FAST`，用于根据输入的数据类型 `tin` 和 `tout` 执行二进制操作循环。
 * 宏接受输入参数 `tin`（输入类型）、`tout`（输出类型）、`op`（操作）。
 * 根据输入类型的连续性，选择最优的循环方式：向量化、标量操作或混合操作。
 * 如果输入类型 `tin` 和 `tout` 是连续的，则尝试使用 `BASE_BINARY_LOOP_INP` 进行向量化操作。
 * 如果 `tin` 和 `tout` 中只有一个是连续的，则使用 `BASE_BINARY_LOOP_S_INP` 或 `BASE_BINARY_LOOP_S` 进行操作。
 * 否则，使用普通的 `BASE_BINARY_LOOP` 进行操作。
 */
#define BINARY_LOOP_FAST(tin, tout, op)         \
    do { \
        /* condition allows compiler to optimize the generic macro */ \
        if (IS_BINARY_CONT(tin, tout)) { \
            if (abs_ptrdiff(args[2], args[0]) == 0 && \
                    abs_ptrdiff(args[2], args[1]) >= AUTOVEC_OVERLAP_SIZE) { \
                BASE_BINARY_LOOP_INP(tin, tout, op) \
            } \
            else if (abs_ptrdiff(args[2], args[1]) == 0 && \
                         abs_ptrdiff(args[2], args[0]) >= AUTOVEC_OVERLAP_SIZE) { \
                BASE_BINARY_LOOP_INP(tin, tout, op) \
            } \
            else { \
                BASE_BINARY_LOOP(tin, tout, op) \
            } \
        } \
        else if (IS_BINARY_CONT_S1(tin, tout)) { \
            if (abs_ptrdiff(args[2], args[1]) == 0) { \
                BASE_BINARY_LOOP_S_INP(tin, tout, in1, args[0], in2, ip2, op) \
            } \
            else { \
                BASE_BINARY_LOOP_S(tin, tout, in1, args[0], in2, ip2, op) \
            } \
        } \
        else if (IS_BINARY_CONT_S2(tin, tout)) { \
            if (abs_ptrdiff(args[2], args[0]) == 0) { \
                BASE_BINARY_LOOP_S_INP(tin, tout, in2, args[1], in1, ip1, op) \
            } \
            else { \
                BASE_BINARY_LOOP_S(tin, tout, in2, args[1], in1, ip1, op) \
            }\
        } \
        else { \
            BASE_BINARY_LOOP(tin, tout, op) \
        } \
    } \
    while (0)

/*
 * 定义了一个宏 `BINARY_REDUCE_LOOP_INNER`，用于执行二进制操作的内部循环。
 * 宏设置了 `ip2` 指向 `args[1]`，`is2` 为 `steps[1]`，`n` 为 `dimensions[0]`。
 * 循环处理 `n` 次，每次更新 `ip2` 指针位置。
 */
#define BINARY_REDUCE_LOOP_INNER\
    char *ip2 = args[1]; \
    npy_intp is2 = steps[1]; \
    npy_intp n = dimensions[0]; \
    npy_intp i; \
    for(i = 0; i < n; i++, ip2 += is2)

/*
 * 定义了一个宏 `BINARY_REDUCE_LOOP`，用于执行二进制操作的循环。
 * 宏设置了 `iop1` 指向 `args[0]`，并读取 `args[0]` 中的数据作为 `io1`。
 * 使用 `BINARY_REDUCE_LOOP_INNER` 执行二进制操作的内部循环。
 * 循环处理 `dimensions[0]` 次，每次更新 `iop1` 指针位置。
 */
#define BINARY_REDUCE_LOOP(TYPE)\
    char *iop1 = args[0]; \
    TYPE io1 = *(TYPE *)iop1; \
    BINARY_REDUCE_LOOP_INNER
/*
 * 定义一个基础的二元约简循环宏。
 * 宏接受两个参数：TYPE 是数据类型，op 是在循环内部执行的操作。
 * 在内部，从指针 ip2 中读取 TYPE 类型的数据，然后执行操作 op。
 */
#define BASE_BINARY_REDUCE_LOOP(TYPE, op) \
    BINARY_REDUCE_LOOP_INNER { \
        const TYPE in2 = *(TYPE *)ip2; \
        op; \
    }

/*
 * 定义一个快速版本的二元约简循环宏。
 * 宏接受两个参数：TYPE 是数据类型，op 是在循环内部执行的操作。
 * 根据 IS_BINARY_REDUCE_INPUT_CONT(TYPE) 的条件，选择性地调用 BASE_BINARY_REDUCE_LOOP 宏。
 */
#define BINARY_REDUCE_LOOP_FAST_INNER(TYPE, op)\
    /* 条件允许编译器优化通用宏 */ \
    if(IS_BINARY_REDUCE_INPUT_CONT(TYPE)) { \
        BASE_BINARY_REDUCE_LOOP(TYPE, op) \
    } \
    else { \
        BASE_BINARY_REDUCE_LOOP(TYPE, op) \
    }

/*
 * 定义一个快速版本的二元约简循环宏。
 * 宏接受两个参数：TYPE 是数据类型，op 是在循环内部执行的操作。
 * 使用 args 数组中的第一个元素作为指向数据的指针，执行循环内部操作，
 * 并在循环结束后将计算结果写回 args[0] 指向的位置。
 */
#define BINARY_REDUCE_LOOP_FAST(TYPE, op)\
    do { \
        char *iop1 = args[0]; \
        TYPE io1 = *(TYPE *)iop1; \
        BINARY_REDUCE_LOOP_FAST_INNER(TYPE, op); \
        *((TYPE *)iop1) = io1; \
    } \
    while (0)

/*
 * 检查步长是否为元素大小，并且输入和输出地址是否相等或者在寄存器内不重叠。
 * 通过检查 steps 数组元素与 esize 是否相等来保证 steps >= 0。
 */
#define IS_BINARY_STRIDE_ONE(esize, vsize) \
    ((steps[0] == esize) && \
     (steps[1] == esize) && \
     (steps[2] == esize) && \
     (abs_ptrdiff(args[2], args[0]) >= vsize) && \
     (abs_ptrdiff(args[2], args[1]) >= vsize))

/*
 * 检查是否可以对一元操作进行阻塞。
 * 检查条件包括：步长是否与元素大小相等、输入和输出地址是否对齐以及地址之间是否有足够的空间。
 */
#define IS_BLOCKABLE_UNARY(esize, vsize) \
    (steps[0] == (esize) && steps[0] == steps[1] && \
     (npy_is_aligned(args[0], esize) && npy_is_aligned(args[1], esize)) && \
     ((abs_ptrdiff(args[1], args[0]) >= (vsize)) || \
      ((abs_ptrdiff(args[1], args[0]) == 0))))

/*
 * 避免对于非常大的步长使用 SIMD 操作的宏定义。
 * 主要原因包括：
 * 1) 支持大步长需要使用 i64gather/scatter_ps 指令，性能不佳。
 * 2) 当加载/存储操作跨越页面边界时，使用 gather 和 scatter 指令会变慢。
 * 因此，只依赖于 i32gather/scatter_ps 指令，确保索引 < INT_MAX 以避免溢出。
 * 同时要求输入和输出数组在内存中不重叠。
 */
#define IS_BINARY_SMALL_STEPS_AND_NOMEMOVERLAP \
    ((labs(steps[0]) < MAX_STEP_SIZE)  && \
     (labs(steps[1]) < MAX_STEP_SIZE)  && \
     (labs(steps[2]) < MAX_STEP_SIZE)  && \
     (nomemoverlap(args[0], steps[0] * dimensions[0], args[2], steps[2] * dimensions[0])) && \
     (nomemoverlap(args[1], steps[1] * dimensions[0], args[2], steps[2] * dimensions[0])))

/*
 * 检查是否可以对两个输出的一元操作进行阻塞。
 * 条件包括：步长是否小于 MAX_STEP_SIZE、输入和输出地址是否在内存中不重叠。
 */
#define IS_UNARY_TWO_OUT_SMALL_STEPS_AND_NOMEMOVERLAP \
    ((labs(steps[0]) < MAX_STEP_SIZE)  && \
     (labs(steps[1]) < MAX_STEP_SIZE)  && \
     (labs(steps[2]) < MAX_STEP_SIZE)  && \
     (nomemoverlap(args[0], steps[0] * dimensions[0], args[2], steps[2] * dimensions[0])) && \
     (nomemoverlap(args[0], steps[0] * dimensions[0], args[1], steps[1] * dimensions[0])))
/*
 * 宏定义：检查是否可以对一元操作进行输出阻塞
 * 1) 第一个步长应该是输入元素大小的倍数，并且步长应小于最大步长以提高性能
 * 2) 输入和输出数组在内存中不应该有重叠
 */
#define IS_OUTPUT_BLOCKABLE_UNARY(esizein, esizeout, vsize) \
    ((steps[0] & (esizein-1)) == 0 && \
     steps[1] == (esizeout) && llabs(steps[0]) < MAX_STEP_SIZE && \
     (nomemoverlap(args[1], steps[1] * dimensions[0], args[0], steps[0] * dimensions[0])))

/*
 * 宏定义：检查是否可以对归约操作进行阻塞
 * 1) 第二个步长应该等于元素大小，并且输入和输出指针之间的距离应大于等于向量大小
 * 2) 输入和输出指针应该按元素大小对齐
 */
#define IS_BLOCKABLE_REDUCE(esize, vsize) \
    (steps[1] == (esize) && abs_ptrdiff(args[1], args[0]) >= (vsize) && \
     npy_is_aligned(args[1], (esize)) && \
     npy_is_aligned(args[0], (esize)))

/*
 * 宏定义：检查是否可以对二元操作进行阻塞
 * 1) 三个步长应该相等且等于元素大小，并且输入指针应该按元素大小对齐
 * 2) 输入指针之间的距离应大于等于向量大小或者为零
 */
#define IS_BLOCKABLE_BINARY(esize, vsize) \
    (steps[0] == steps[1] && steps[1] == steps[2] && steps[2] == (esize) && \
     npy_is_aligned(args[2], (esize)) && npy_is_aligned(args[1], (esize)) && \
     npy_is_aligned(args[0], (esize)) && \
     (abs_ptrdiff(args[2], args[0]) >= (vsize) || \
      abs_ptrdiff(args[2], args[0]) == 0) && \
     (abs_ptrdiff(args[2], args[1]) >= (vsize) || \
      abs_ptrdiff(args[2], args[1]) >= 0))

/*
 * 宏定义：检查是否可以对带有标量的二元操作进行阻塞（第一个标量）
 * 1) 第一个步长应为零，第二个和第三个步长应相等且等于元素大小，并且输入指针应按元素大小对齐
 * 2) 第一个标量指针与第二个标量指针之间的距离应大于等于向量大小，并且第一个标量指针与第三个指针之间的距离应大于等于元素大小
 */
#define IS_BLOCKABLE_BINARY_SCALAR1(esize, vsize) \
    (steps[0] == 0 && steps[1] == steps[2] && steps[2] == (esize) && \
     npy_is_aligned(args[2], (esize)) && npy_is_aligned(args[1], (esize)) && \
     ((abs_ptrdiff(args[2], args[1]) >= (vsize)) || \
      (abs_ptrdiff(args[2], args[1]) == 0)) && \
     abs_ptrdiff(args[2], args[0]) >= (esize))

/*
 * 宏定义：检查是否可以对带有标量的二元操作进行阻塞（第二个标量）
 * 1) 第二个步长应为零，第一个和第三个步长应相等且等于元素大小，并且输入指针应按元素大小对齐
 * 2) 第二个标量指针与第一个标量指针之间的距离应大于等于向量大小，并且第二个标量指针与第三个指针之间的距离应大于等于元素大小
 */
#define IS_BLOCKABLE_BINARY_SCALAR2(esize, vsize) \
    (steps[1] == 0 && steps[0] == steps[2] && steps[2] == (esize) && \
     npy_is_aligned(args[2], (esize)) && npy_is_aligned(args[0], (esize)) && \
     ((abs_ptrdiff(args[2], args[0]) >= (vsize)) || \
      (abs_ptrdiff(args[2], args[0]) == 0)) && \
     abs_ptrdiff(args[2], args[1]) >= (esize))

#undef abs_ptrdiff

/*
 * 宏定义：将变量按指定对齐方式对齐
 * 1) 使用 npy_aligned_block_offset 函数计算对齐偏移量 peel
 * 2) 循环从 peel 开始，直到满足对齐条件结束
 */
#define LOOP_BLOCK_ALIGN_VAR(var, type, alignment)\
    npy_intp i, peel = npy_aligned_block_offset(var, sizeof(type),\
                                                alignment, n);\
    for(i = 0; i < peel; i++)

/*
 * 宏定义：按块循环处理数组
 * 1) 循环处理被块分隔的数据块，每次增加一个块的大小
 */
#define LOOP_BLOCKED(type, vsize)\
    for(; i < npy_blocked_end(peel, sizeof(type), vsize, n);\
            i += (vsize / sizeof(type)))

/*
 * 宏定义：处理剩余的未被块处理的部分
 * 1) 处理剩余的不满足块处理条件的数据部分
 */
#define LOOP_BLOCKED_END\
    for (; i < n; i++)

#endif /* _NPY_UMATH_FAST_LOOP_MACROS_H_ */

`.\numpy\numpy\_core\src\umath\legacy_array_method.c`

/*
 * This file defines most of the machinery in order to wrap legacy style
 * ufunc loops into new style arraymethods.
 */

#define NPY_NO_DEPRECATED_API NPY_API_VERSION  // Define to use only non-deprecated NumPy API
#define _MULTIARRAYMODULE  // Define to include multiarray module functionality
#define _UMATHMODULE  // Define to include umath module functionality

#define PY_SSIZE_T_CLEAN  // Define to use Python 3's clean API for Py_ssize_t
#include <Python.h>  // Include Python's C API header

#include "numpy/ndarraytypes.h"  // Include NumPy's ndarraytypes header

#include "convert_datatype.h"  // Include header for datatype conversion
#include "array_method.h"  // Include header for array methods
#include "array_coercion.h"  // Include header for array coercion
#include "dtype_transfer.h"  // Include header for dtype transfer
#include "legacy_array_method.h"  // Include header for legacy array methods
#include "dtypemeta.h"  // Include header for dtype metadata

#include "ufunc_object.h"  // Include header for ufunc object
#include "ufunc_type_resolution.h"  // Include header for ufunc type resolution

// Define a structure for auxiliary data used in legacy array methods
typedef struct {
    NpyAuxData base;  // Base structure for auxiliary data
    PyUFuncGenericFunction loop;  // Legacy ufunc loop function pointer
    void *user_data;  // Additional user data associated with the loop
    int pyerr_check;  // Flag indicating whether to check PyErr_Occurred()
} legacy_array_method_auxdata;

// Use a free list for caching auxiliary data, only if GIL is enabled
#ifndef Py_GIL_DISABLED
#define NPY_LOOP_DATA_CACHE_SIZE 5  // Size of the loop data cache
static int loop_data_num_cached = 0;
static legacy_array_method_auxdata *loop_data_cache[NPY_LOOP_DATA_CACHE_SIZE];
#else
#define NPY_LOOP_DATA_CACHE_SIZE 0
#endif

// Free function for legacy array method auxiliary data
static void
legacy_array_method_auxdata_free(NpyAuxData *data)
{
#if NPY_LOOP_DATA_CACHE_SIZE > 0
    if (loop_data_num_cached < NPY_LOOP_DATA_CACHE_SIZE) {
        loop_data_cache[loop_data_num_cached] = (
                (legacy_array_method_auxdata *)data);
        loop_data_num_cached++;
    }
    else
#endif
    {
        PyMem_Free(data);  // Free the memory allocated for auxiliary data
    }
}

// Function to allocate new loop data
NpyAuxData *
get_new_loop_data(
        PyUFuncGenericFunction loop, void *user_data, int pyerr_check)
{
    legacy_array_method_auxdata *data;
#if NPY_LOOP_DATA_CACHE_SIZE > 0
    if (NPY_LIKELY(loop_data_num_cached > 0)) {
        loop_data_num_cached--;
        data = loop_data_cache[loop_data_num_cached];
    }
    else
#endif
    {
        data = PyMem_Malloc(sizeof(legacy_array_method_auxdata));  // Allocate memory for new auxiliary data
        if (data == NULL) {
            return NULL;  // Return NULL if memory allocation fails
        }
        data->base.free = legacy_array_method_auxdata_free;  // Set free function for the auxiliary data
        data->base.clone = NULL;  // No need for cloning (at least for now)
    }
    data->loop = loop;  // Assign legacy ufunc loop function pointer
    data->user_data = user_data;  // Assign additional user data
    data->pyerr_check = pyerr_check;  // Set flag for PyErr_Occurred() checking
    return (NpyAuxData *)data;  // Return the allocated auxiliary data
}

#undef NPY_LOOP_DATA_CACHE_SIZE

/*
 * This is a thin wrapper around the legacy loop signature.
 */
static int
generic_wrapped_legacy_loop(PyArrayMethod_Context *NPY_UNUSED(context),
        char *const *data, const npy_intp *dimensions, const npy_intp *strides,
        NpyAuxData *auxdata)
{
    legacy_array_method_auxdata *ldata = (legacy_array_method_auxdata *)auxdata;

    ldata->loop((char **)data, dimensions, strides, ldata->user_data);  // Call the legacy ufunc loop
    if (ldata->pyerr_check && PyErr_Occurred()) {  // Check for Python errors if flag is set
        return -1;  // Return -1 on error
    }
    return 0;  // Return 0 indicating success
}
/*
 * Signal that the old type-resolution function must be used to resolve
 * the descriptors (mainly/only used for datetimes due to the unit).
 *
 * ArrayMethod's are expected to implement this, but it is too tricky
 * to support properly.  So we simply set an error that should never be seen.
 */
NPY_NO_EXPORT NPY_CASTING
wrapped_legacy_resolve_descriptors(PyArrayMethodObject *NPY_UNUSED(self),
        PyArray_DTypeMeta *const NPY_UNUSED(dtypes[]),
        PyArray_Descr *const NPY_UNUSED(given_descrs[]),
        PyArray_Descr *NPY_UNUSED(loop_descrs[]),
        npy_intp *NPY_UNUSED(view_offset))
{
    // 设置运行时错误，指出不能使用旧的 ArrayMethod 包装而不调用 ufunc 本身
    PyErr_SetString(PyExc_RuntimeError,
            "cannot use legacy wrapping ArrayMethod without calling the ufunc "
            "itself.  If this error is hit, the solution will be to port the "
            "legacy ufunc loop implementation to the new API.");
    return -1; // 返回错误代码 -1
}

/*
 * Much the same as the default type resolver, but tries a bit harder to
 * preserve metadata.
 */
static NPY_CASTING
simple_legacy_resolve_descriptors(
        PyArrayMethodObject *method,
        PyArray_DTypeMeta *const *dtypes,
        PyArray_Descr *const *given_descrs,
        PyArray_Descr **output_descrs,
        npy_intp *NPY_UNUSED(view_offset))
{
    int i = 0;
    int nin = method->nin; // 获取输入数量
    int nout = method->nout; // 获取输出数量

    if (nin == 2 && nout == 1 && given_descrs[2] != NULL
            && dtypes[0] == dtypes[2]) {
        /*
         * Could be a reduction, which requires `descr[0] is descr[2]`
         * (identity) at least currently. This is because `op[0] is op[2]`.
         * (If the output descriptor is not passed, the below works.)
         */
        // 确保给定的描述符是规范的
        output_descrs[2] = NPY_DT_CALL_ensure_canonical(given_descrs[2]);
        if (output_descrs[2] == NULL) {
            Py_CLEAR(output_descrs[2]);
            return -1; // 失败，返回错误代码 -1
        }
        Py_INCREF(output_descrs[2]); // 增加引用计数
        output_descrs[0] = output_descrs[2]; // 设置第一个输出描述符为第三个描述符的规范版本
        if (dtypes[1] == dtypes[2]) {
            /* Same for the second one (accumulation is stricter) */
            Py_INCREF(output_descrs[2]); // 增加引用计数
            output_descrs[1] = output_descrs[2]; // 设置第二个输出描述符为第三个描述符的规范版本
        }
        else {
            // 确保给定的描述符是规范的
            output_descrs[1] = NPY_DT_CALL_ensure_canonical(given_descrs[1]);
            if (output_descrs[1] == NULL) {
                i = 2;
                goto fail; // 失败，跳转到失败处理标签
            }
        }
        return NPY_NO_CASTING; // 返回无需转换
    }

    for (; i < nin + nout; i++) {
        if (given_descrs[i] != NULL) {
            // 确保给定的描述符是规范的
            output_descrs[i] = NPY_DT_CALL_ensure_canonical(given_descrs[i]);
        }
        else if (dtypes[i] == dtypes[0] && i > 0) {
            /* Preserve metadata from the first operand if same dtype */
            Py_INCREF(output_descrs[0]); // 增加引用计数
            output_descrs[i] = output_descrs[0]; // 使用第一个操作数的元数据
        }
        else {
            // 使用默认的描述符
            output_descrs[i] = NPY_DT_CALL_default_descr(dtypes[i]);
        }
        if (output_descrs[i] == NULL) {
            goto fail; // 失败，跳转到失败处理标签
        }
    }

    return NPY_NO_CASTING; // 返回无需转换

  fail:
    // 失败处理
    # 逆向遍历循环，从 i 的初始值开始，每次递减直到 i 大于等于 0
    for (; i >= 0; i--) {
        # 使用 Py_CLEAR 宏来清空 output_descrs 数组中索引为 i 的元素，释放其引用计数
        Py_CLEAR(output_descrs[i]);
    }
    # 返回 -1，表示函数执行出错或异常
    return -1;
/*
 * This function retrieves the legacy inner-loop for a ufunc. If performance is
 * slow, caching might be considered.
 */
NPY_NO_EXPORT int
get_wrapped_legacy_ufunc_loop(PyArrayMethod_Context *context,
        int aligned, int move_references,
        const npy_intp *NPY_UNUSED(strides),
        PyArrayMethod_StridedLoop **out_loop,
        NpyAuxData **out_transferdata,
        NPY_ARRAYMETHOD_FLAGS *flags)
{
    assert(aligned);  // Ensure 'aligned' flag is true
    assert(!move_references);  // Ensure 'move_references' flag is false

    if (context->caller == NULL ||
            !PyObject_TypeCheck(context->caller, &PyUFunc_Type)) {
        PyErr_Format(PyExc_RuntimeError,
                "cannot call %s without its ufunc as caller context.",
                context->method->name);
        return -1;  // Return error if caller is not a valid PyUFunc_Type
    }

    PyUFuncObject *ufunc = (PyUFuncObject *)context->caller;
    void *user_data;
    int needs_api = 0;

    PyUFuncGenericFunction loop = NULL;
    /* Note that `needs_api` is not reliable (it was in fact unused normally) */
    // Attempt to select the default legacy inner loop for the ufunc
    if (PyUFunc_DefaultLegacyInnerLoopSelector(ufunc,
            context->descriptors, &loop, &user_data, &needs_api) < 0) {
        return -1;  // Return error if selecting the loop fails
    }
    *flags = context->method->flags & NPY_METH_RUNTIME_FLAGS;
    if (needs_api) {
        *flags |= NPY_METH_REQUIRES_PYAPI;  // Set flag if API access is needed
    }

    *out_loop = &generic_wrapped_legacy_loop;  // Assign the generic legacy loop
    // Obtain transfer data for the loop
    *out_transferdata = get_new_loop_data(
            loop, user_data, (*flags & NPY_METH_REQUIRES_PYAPI) != 0);
    if (*out_transferdata == NULL) {
        PyErr_NoMemory();  // Return error if memory allocation fails
        return -1;
    }
    return 0;  // Return success
}



/*
 * This function copies the cached initial value for the ufunc method.
 * It assumes all internal numeric types are trivially copied.
 */
static int
copy_cached_initial(
        PyArrayMethod_Context *context, npy_bool NPY_UNUSED(reduction_is_empty),
        void *initial)
{
    // Copy the legacy initial value from method context
    memcpy(initial, context->method->legacy_initial,
           context->descriptors[0]->elsize);
    return 1;  // Return success
}


/*
 * This function attempts to obtain the initial value from the calling ufunc.
 * It is only called when necessary, particularly for internal numeric dtypes.
 */
static int
get_initial_from_ufunc(
        PyArrayMethod_Context *context, npy_bool reduction_is_empty,
        void *initial)
{
    if (context->caller == NULL
            || !PyObject_TypeCheck(context->caller, &PyUFunc_Type)) {
        /* Impossible in NumPy 1.24;  guard in case it becomes possible. */
        PyErr_SetString(PyExc_ValueError,
                "getting initial failed because it can only done for legacy "
                "ufunc loops when the ufunc is provided.");
        return -1;  // Return error if caller is not a valid PyUFunc_Type
    }
    // Check if reordering is possible for the dtype
    npy_bool reorderable;
    // 获取当前上下文中的默认标识对象，用于指定的通用函数调用
    PyObject *identity_obj = PyUFunc_GetDefaultIdentity(
            (PyUFuncObject *)context->caller, &reorderable);
    // 如果获取失败，则返回错误代码
    if (identity_obj == NULL) {
        return -1;
    }
    // 如果默认标识对象是 Py_None，表示通用函数没有默认标识（理论上不应发生）
    if (identity_obj == Py_None) {
        /* UFunc has no identity (should not happen) */
        // 释放对 Py_None 的引用并返回 0
        Py_DECREF(identity_obj);
        return 0;
    }
    // 如果输入类型是无符号整数并且标识对象是 PyLong 对象
    if (PyTypeNum_ISUNSIGNED(context->descriptors[1]->type_num)
            && PyLong_CheckExact(identity_obj)) {
        /*
         * 这是一个小技巧，直到我们有真正的循环特定标识为止。
         * Python 中 -1 不能转换为无符号整数，因此将其转换为 NumPy 标量，
         * 但对于位运算函数，我们使用 -1 表示全部为 1。
         * （内置标识在这里不会溢出，尽管我们可能会不必要地转换 0 和 1。）
         */
        // 将标识对象转换为 NumPy 的数组标量对象
        Py_SETREF(identity_obj, PyObject_CallFunctionObjArgs(
                     (PyObject *)&PyLongArrType_Type, identity_obj, NULL));
        // 如果转换失败，则返回错误代码
        if (identity_obj == NULL) {
            return -1;
        }
    }
    // 如果输入类型是对象类型且归约不是空的
    else if (context->descriptors[0]->type_num == NPY_OBJECT
            && !reduction_is_empty) {
        /* 允许 `sum([object()])` 起作用，但在空时使用 0。 */
        // 释放对标识对象的引用并返回 0
        Py_DECREF(identity_obj);
        return 0;
    }

    // 将初始值和标识对象打包到数组中
    int res = PyArray_Pack(context->descriptors[0], initial, identity_obj);
    // 释放对标识对象的引用
    Py_DECREF(identity_obj);
    // 如果打包失败，则返回错误代码
    if (res < 0) {
        return -1;
    }

    // 如果输入类型是数字，可以缓存以避免通过 Python int 转换
    if (PyTypeNum_ISNUMBER(context->descriptors[0]->type_num)) {
        /* 对于数字，我们可以缓存以避免通过 Python int 转换 */
        // 将初始值的一部分复制到方法的缓存中，并设置获取初始值的方法
        memcpy(context->method->legacy_initial, initial,
               context->descriptors[0]->elsize);
        context->method->get_reduction_initial = &copy_cached_initial;
    }

    // 归约可以使用初始值
    // 返回 1 表示成功
    return 1;
/*
 * 结束函数 `PyArray_NewLegacyWrappingArrayMethod` 的定义。
 */
}


/*
 * 获取封装了 ufunc 实例的未绑定 ArrayMethod。
 * 注意，此函数将结果存储在 ufunc 上，然后只返回相同的结果。
 */
NPY_NO_EXPORT PyArrayMethodObject *
PyArray_NewLegacyWrappingArrayMethod(PyUFuncObject *ufunc,
        PyArray_DTypeMeta *signature[])
{
    // 创建方法名称字符串，格式为 "legacy_ufunc_wrapper_for_{ufunc->name}"，长度限制为 100
    char method_name[101];
    const char *name = ufunc->name ? ufunc->name : "<unknown>";
    snprintf(method_name, 100, "legacy_ufunc_wrapper_for_%s", name);

    /*
     * 假设我们在任何（传统）dtype 标记时需要 Python API。
     */
    int any_output_flexible = 0;
    NPY_ARRAYMETHOD_FLAGS flags = 0;
    if (ufunc->nargs == 3 &&
            signature[0]->type_num == NPY_BOOL &&
            signature[1]->type_num == NPY_BOOL &&
            signature[2]->type_num == NPY_BOOL && (
                strcmp(ufunc->name, "logical_or") == 0 ||
                strcmp(ufunc->name, "logical_and") == 0 ||
                strcmp(ufunc->name, "logical_xor") == 0)) {
        /*
         * 这是一个逻辑 ufunc，并且`??->?`循环。始终可以将任何输入强制转换为布尔值，
         * 因为这种转换由真值定义。
         * 这使得我们可以确保两件事：
         * 1. `np.all`/`np.any` 知道强制转换输入是可以的
         *    （它们必须这样做，因为没有 `?l->?` 等循环）
         * 2. 逻辑函数自动适用于任何实现到布尔值的 DType。
         */
        flags = _NPY_METH_FORCE_CAST_INPUTS;
    }

    // 获取归约初始值的函数指针
    PyArrayMethod_GetReductionInitial *get_reduction_intial = NULL;
    if (ufunc->nin == 2 && ufunc->nout == 1) {
        npy_bool reorderable = NPY_FALSE;
        // 获取默认的标识对象，可能返回 NULL
        PyObject *identity_obj = PyUFunc_GetDefaultIdentity(
                ufunc, &reorderable);
        if (identity_obj == NULL) {
            return NULL;
        }
        /*
         * TODO: 对于对象，"reorderable" 是必需的吗？因为否则我们会禁用多轴归约 `arr.sum(0, 1)`。
         *       但是对于 `arr = array([["a", "b"], ["c", "d"]], dtype="object")`，
         *       它实际上不可重新排序（顺序更改结果）。
         */
        if (reorderable) {
            flags |= NPY_METH_IS_REORDERABLE;
        }
        if (identity_obj != Py_None) {
            get_reduction_intial = &get_initial_from_ufunc;
        }
    }

    // 遍历所有输入和输出的签名，设置相应的标志
    for (int i = 0; i < ufunc->nin+ufunc->nout; i++) {
        if (signature[i]->singleton->flags & (
                NPY_ITEM_REFCOUNT | NPY_ITEM_IS_POINTER | NPY_NEEDS_PYAPI)) {
            flags |= NPY_METH_REQUIRES_PYAPI;
        }
        if (NPY_DT_is_parametric(signature[i])) {
            any_output_flexible = 1;
        }
    }

    // 定义 PyType_Slot 数组，指定不同的功能方法和对应的函数指针
    PyType_Slot slots[4] = {
        {NPY_METH_get_loop, &get_wrapped_legacy_ufunc_loop},
        {NPY_METH_resolve_descriptors, &simple_legacy_resolve_descriptors},
        {NPY_METH_get_reduction_initial, get_reduction_intial},
        {0, NULL},
    };
    // 如果存在任何输出灵活性，执行以下操作
    if (any_output_flexible) {
        // 设置函数指针以使用包装后的传统描述符解析器
        slots[1].pfunc = &wrapped_legacy_resolve_descriptors;
    }

    // 定义 PyArrayMethod_Spec 结构体并初始化
    PyArrayMethod_Spec spec = {
        .name = method_name,           // 方法名称
        .nin = ufunc->nin,             // 输入数量
        .nout = ufunc->nout,           // 输出数量
        .casting = NPY_NO_CASTING,     // 禁用类型转换
        .flags = flags,                // 标志位
        .dtypes = signature,           // 签名数据类型
        .slots = slots,                // 方法结构体数组
    };

    // 通过 PyArrayMethod_FromSpec_int 函数创建 PyBoundArrayMethodObject 对象
    PyBoundArrayMethodObject *bound_res = PyArrayMethod_FromSpec_int(&spec, 1);
    // 如果创建失败，返回空指针
    if (bound_res == NULL) {
        return NULL;
    }

    // 获取 PyArrayMethodObject 对象
    PyArrayMethodObject *res = bound_res->method;
    // 增加对象的引用计数
    Py_INCREF(res);
    // 减少 PyBoundArrayMethodObject 对象的引用计数
    Py_DECREF(bound_res);

    // 返回 PyArrayMethodObject 对象
    return res;
}



# 这是一个代码块的结束符号，表示一个代码块的结尾。

`.\numpy\numpy\_core\src\umath\legacy_array_method.h`

#ifndef _NPY_LEGACY_ARRAY_METHOD_H
#define _NPY_LEGACY_ARRAY_METHOD_H

#include "numpy/ndarraytypes.h"
#include "numpy/ufuncobject.h"
#include "array_method.h"

#ifdef __cplusplus
extern "C" {
#endif

// 定义了一个新的类型 PyArrayMethodObject，用于表示数组方法对象
NPY_NO_EXPORT PyArrayMethodObject *
PyArray_NewLegacyWrappingArrayMethod(PyUFuncObject *ufunc,
        PyArray_DTypeMeta *signature[]);

/*
 * 下面两个符号在头文件中定义，以便其他地方可以使用它们来探测特殊情况
 * （或者一个 ArrayMethod 是否为 "legacy" 类型）。
 */

// 获取被包装的 legacy ufunc 循环函数的信息，并返回相关参数
NPY_NO_EXPORT int
get_wrapped_legacy_ufunc_loop(PyArrayMethod_Context *context,
        int aligned, int move_references,
        const npy_intp *NPY_UNUSED(strides),
        PyArrayMethod_StridedLoop **out_loop,
        NpyAuxData **out_transferdata,
        NPY_ARRAYMETHOD_FLAGS *flags);

// 解析 legacy 类型数组方法的描述符，返回最终的数据类型和形状信息
NPY_NO_EXPORT NPY_CASTING
wrapped_legacy_resolve_descriptors(PyArrayMethodObject *,
        PyArray_DTypeMeta *const *, PyArray_Descr *const *, PyArray_Descr **, npy_intp *);

#ifdef __cplusplus
}
#endif

#endif  /*_NPY_LEGACY_ARRAY_METHOD_H */

`.\numpy\numpy\_core\src\umath\npy_simd_data.h`

#ifndef __NPY_SIMD_DATA_H_
#define __NPY_SIMD_DATA_H_

#if defined NPY_HAVE_AVX512F
#if !(defined(__clang__) && (__clang_major__ < 10 || (__clang_major__ == 10 && __clang_minor__ < 1)))
/*
 * Constants used in vector implementation of float64 exp(x)
 */
#define NPY_RINT_CVT_MAGIC 0x1.8p52           // Magic number for rounding in floating-point operations
#define NPY_INV_LN2_MUL_32 0x1.71547652b82fep+5 // Precomputed constant for logarithm base 2
#define NPY_TANG_NEG_L1 -0x1.62e42fefp-6       // Constant coefficient for tangent polynomial approximation
#define NPY_TANG_NEG_L2 -0x1.473de6af278edp-39 // Constant coefficient for tangent polynomial approximation
#define NPY_TANG_A1 0x1p-1                      // Coefficient for tangent polynomial approximation
#define NPY_TANG_A2 0x1.5555555548f7cp-3        // Coefficient for tangent polynomial approximation
#define NPY_TANG_A3 0x1.5555555545d4ep-5        // Coefficient for tangent polynomial approximation
#define NPY_TANG_A4 0x1.11115b7aa905ep-7        // Coefficient for tangent polynomial approximation
#define NPY_TANG_A5 0x1.6c1728d739765p-10       // Coefficient for tangent polynomial approximation

#if !defined NPY_HAVE_AVX512_SKX || !defined NPY_CAN_LINK_SVML
/* Lookup table for 2^(j/32) */
static npy_uint64 EXP_Table_top[32] = {
    0x3FF0000000000000,    // 2^0
    0x3FF059B0D3158540,    // 2^(1/32)
    0x3FF0B5586CF98900,    // 2^(2/32)
    0x3FF11301D0125B40,    // 2^(3/32)
    0x3FF172B83C7D5140,    // 2^(4/32)
    0x3FF1D4873168B980,    // 2^(5/32)
    0x3FF2387A6E756200,    // 2^(6/32)
    0x3FF29E9DF51FDEC0,    // 2^(7/32)
    0x3FF306FE0A31B700,    // 2^(8/32)
    0x3FF371A7373AA9C0,    // 2^(9/32)
    0x3FF3DEA64C123400,    // 2^(10/32)
    0x3FF44E0860618900,    // 2^(11/32)
    0x3FF4BFDAD5362A00,    // 2^(12/32)
    0x3FF5342B569D4F80,    // 2^(13/32)
    0x3FF5AB07DD485400,    // 2^(14/32)
    0x3FF6247EB03A5580,    // 2^(15/32)
    0x3FF6A09E667F3BC0,    // 2^(16/32)
    0x3FF71F75E8EC5F40,    // 2^(17/32)
    0x3FF7A11473EB0180,    // 2^(18/32)
    0x3FF82589994CCE00,    // 2^(19/32)
    0x3FF8ACE5422AA0C0,    // 2^(20/32)
    0x3FF93737B0CDC5C0,    // 2^(21/32)
    0x3FF9C49182A3F080,    // 2^(22/32)
    0x3FFA5503B23E2540,    // 2^(23/32)
    0x3FFAE89F995AD380,    // 2^(24/32)
    0x3FFB7F76F2FB5E40,    // 2^(25/32)
    0x3FFC199BDD855280,    // 2^(26/32)
    0x3FFCB720DCEF9040,    // 2^(27/32)
    0x3FFD5818DCFBA480,    // 2^(28/32)
    0x3FFDFC97337B9B40,    // 2^(29/32)
    0x3FFEA4AFA2A490C0,    // 2^(30/32)
    0x3FFF50765B6E4540,    // 2^(31/32)
};

static npy_uint64 EXP_Table_tail[32] = {
    0x0000000000000000,    // 2^(-32)
    0x3D0A1D73E2A475B4,    // 2^(-31/32)
    0x3CEEC5317256E308,    // 2^(-30/32)
    0x3CF0A4EBBF1AED93,    // 2^(-29/32)
    0x3D0D6E6FBE462876,    // 2^(-28/32)
    0x3D053C02DC0144C8,    // 2^(-27/32)
    0x3D0C3360FD6D8E0B,    // 2^(-26/32)
    0x3D009612E8AFAD12,    // 2^(-25/32)
    0x3CF52DE8D5A46306,    // 2^(-24/32)
    0x3CE54E28AA05E8A9,    // 2^(-23/32)
    0x3D011ADA0911F09F,    // 2^(-22/32)
    0x3D068189B7A04EF8,    // 2^(-21/32)
    0x3D038EA1CBD7F621,    // 2^(-20/32)
    0x3CBDF0A83C49D86A,    // 2^(-19/32)
    0x3D04AC64980A8C8F,    // 2^(-18/32)
    0x3CD2C7C3E81BF4B7,    // 2^(-17/32)
    0x3CE921165F626CDD,    // 2^(-16/32)
    0x3D09EE91B8797785,    // 2^(-15/32)
    0x3CDB5F54408FDB37,    // 2^(-14/32)
    0x3CF28ACF88AFAB35,    // 2^(-13/32)
    0x3CFB5BA7C55A192D,    // 2^(-12/32)
    0x3D027A280E1F92A0,    // 2^(-11/32)
    0x3CF01C7C46B071F3,    // 2^(-10/32)
    0x3CFC8B424491CAF8,    // 2^(-9/32)
    0x3D06AF439A68BB99,    // 2^(-8/32)
    0x3CDBAA9EC206AD4F,    // 2^(-7/32)
    0x3CFC2220CB12A092,    // 2^(-6/32)
    0x3D048A81E5E8F4A5,    // 2^(-5/32)
    0x3CDC976816BAD9B8,    // 2^(-4/32)
    0x3CFEB968CAC39ED3,    // 2^(-3/32)
    0x3CF9858F73A18F5E,    // 2^(-2/32)
    0x3C99D3E12DD8A18B,    // 2^(-1/32)
};
#endif // #if !defined NPY_HAVE_AVX512_SKX || !defined NPY_CAN_LINK_SVML
#endif
#endif

/*
 * Constants used in vector implementation of exp(x)
 */
#define NPY_RINT_CVT_MAGICf 0x1.800000p+23f         // Magic number for rounding in floating-point operations
#define NPY_CODY_WAITE_LOGE_2_HIGHf -6.93145752e-1f // High part of Cody
#define NPY_COEFF_Q0_EXPf 1.000000000000000000000e+00f
#define NPY_COEFF_Q1_EXPf -2.742335390411667452936e-01f
#define NPY_COEFF_Q2_EXPf 2.159509375685829852307e-02f

/*
 * Constants used in vector implementation of log(x)
 */
#define NPY_COEFF_P0_LOGf 0.000000000000000000000e+00f
#define NPY_COEFF_P1_LOGf 9.999999999999998702752e-01f
#define NPY_COEFF_P2_LOGf 2.112677543073053063722e+00f
#define NPY_COEFF_P3_LOGf 1.480000633576506585156e+00f
#define NPY_COEFF_P4_LOGf 3.808837741388407920751e-01f
#define NPY_COEFF_P5_LOGf 2.589979117907922693523e-02f
#define NPY_COEFF_Q0_LOGf 1.000000000000000000000e+00f
#define NPY_COEFF_Q1_LOGf 2.612677543073109236779e+00f
#define NPY_COEFF_Q2_LOGf 2.453006071784736363091e+00f
#define NPY_COEFF_Q3_LOGf 9.864942958519418960339e-01f
#define NPY_COEFF_Q4_LOGf 1.546476374983906719538e-01f
#define NPY_COEFF_Q5_LOGf 5.875095403124574342950e-03f

/*
 * Lookup table of log(c_k)
 * Reference form: Tang, Ping-Tak Peter. "Table-driven implementation of the
 *     logarithm function in IEEE floating-point arithmetic." ACM Transactions
 *     on Mathematical Software (TOMS) 16.4 (1990): 378-400.
 */
#if defined NPY_HAVE_AVX512F
#if !(defined(__clang__) && (__clang_major__ < 10 || (__clang_major__ == 10 && __clang_minor__ < 1)))
#if !defined NPY_HAVE_AVX512_SKX || !defined NPY_CAN_LINK_SVML
static npy_uint64 LOG_TABLE_TOP[64] = {
    // 64-bit integer array defining the upper 64 bits of each entry in the logarithm lookup table
    0x0000000000000000,  // First entry: 0x0000000000000000
    0x3F8FC0A8B1000000,  // Second entry: 0x3F8FC0A8B1000000
    0x3F9F829B0E780000,  // Third entry: 0x3F9F829B0E780000
    0x3FA77458F6340000,  // Fourth entry: 0x3FA77458F6340000
    0x3FAF0A30C0100000,  // Fifth entry: 0x3FAF0A30C0100000
    0x3FB341D7961C0000,  // Sixth entry: 0x3FB341D7961C0000
    0x3FB6F0D28AE60000,  // Seventh entry: 0x3FB6F0D28AE60000
    0x3FBA926D3A4A0000,  // Eighth entry: 0x3FBA926D3A4A0000
    0x3FBE27076E2A0000,  // Ninth entry: 0x3FBE27076E2A0000
    0x3FC0D77E7CD10000,  // Tenth entry: 0x3FC0D77E7CD10000
    0x3FC29552F8200000,  // Eleventh entry: 0x3FC29552F8200000
    0x3FC44D2B6CCB0000,  // Twelfth entry: 0x3FC44D2B6CCB0000
    0x3FC5FF3070A80000,  // Thirteenth entry: 0x3FC5FF3070A80000
    0x3FC7AB8902110000,  // Fourteenth entry: 0x3FC7AB8902110000
    0x3FC9525A9CF40000,  // Fifteenth entry: 0x3FC9525A9CF40000
    0x3FCAF3C94E810000,  // Sixteenth entry: 0x3FCAF3C94E810000
    0x3FCC8FF7C79B0000,  // Seventeenth entry: 0x3FCC8FF7C79B0000
    0x3FCE27076E2B0000,  // Eighteenth entry: 0x3FCE27076E2B0000
    0x3FCFB9186D5E0000,  // Nineteenth entry: 0x3FCFB9186D5E0000
    0x3FD0A324E2738000,  // Twentieth entry: 0x3FD0A324E2738000
    0x3FD1675CABAB8000,  // Twenty-first entry: 0x3FD1675CABAB8000
    0x3FD22941FBCF8000,  // Twenty-second entry: 0x3FD22941FBCF8000
    0x3FD2E8E2BAE10000,  // Twenty-third entry: 0x3FD2E8E2BAE10000
    0x3FD3A64C55698000,  // Twenty-fourth entry: 0x3FD3A64C55698000
    0x3FD4618BC21C8000,  // Twenty-fifth entry: 0x3FD4618BC21C8000
    0x3FD51AAD872E0000,  // Twenty-sixth entry: 0x3FD51AAD872E0000
    0x3FD5D1BDBF580000,  // Twenty-seventh entry: 0x3FD5D1BDBF580000
    0x3FD686C81E9B0000,  // Twenty-eighth entry: 0x3FD686C81E9B0000
    0x3FD739D7F6BC0000,  // Twenty-ninth entry: 0x3FD739D7F6BC0000
    0x3FD7EAF83B828000,  // Thirtieth entry: 0x3FD7EAF83B828000
    0x3FD89A3386C18000,  // Thirty-first entry: 0x3FD89A3386C18000
    0x3FD947941C210000,  // Thirty-second entry: 0x3FD947941C210000
    0x3FD9F323ECBF8000,  // Thirty-third entry: 0x3FD9F323ECBF8000
    0x3FDA9CEC9A9A0000,  // Thirty-fourth entry: 0x3FDA9CEC9A9A0000
    0x3FDB44F77BCC8000,  // Thirty-fifth entry: 0x3FDB44F77BCC8000
    0x3FDBEB4D9DA70000,  // Thirty-sixth entry: 0x3FDBEB4D9DA70000
    0x3FDC8FF7C79A8000,  // Thirty-seventh entry: 0x3FDC8FF7C79A8000
    0x3FDD32FE7E010000,  // Thirty-eighth entry: 0x3FDD32FE7E010000
    0x3FDDD46A04C20000,  // Thirty-ninth entry: 0x3FDDD46A04C20000
    0x3FDE744261D68000,  // Fortieth entry: 0x3FDE744261D68000
    0x3FDF128F5FAF0000,  // Forty-first entry: 0x3FDF128F5FAF0000
    0x3FDFAF588F790000,  // Forty-second entry: 0x3FDFAF588F790000
    0x3FE02552A5A5C000,  // Forty-third entry: 0x3FE02552A5A5C000
    0x3FE0723E5C1CC000,  // Forty-fourth entry: 0x3FE0723E5C1CC000
    0x3FE0BE72E425400
    # 定义一个包含多个64位整数的列表
    numbers = [
        0xBD5FE0E183092C59,
        0x3D2980267C7E09E4,
        0xBD62303B9CB0D5E1,
        0x3D662A6617CC9717,
        0xBD4717B6B33E44F8,
        0xBD62968C836CC8C2,
        0x3D6AAC6CA17A4554,
        0x3D6E5CBD3D50FFFC,
        0xBD6C69A65A23A170,
        0xBD35B967F4471DFC,
        0x3D6F4799F4F6543E,
        0xBD6B0B0DE3077D7E,
        0xBD537B720E4A694B,
        0x3D65AD1D904C1D4E,
        0xBD600349CC67F9B2,
        0xBD697794F689F843,
        0xBD3A342C2AF0003C,
        0x3D5F1546AAA3361C,
        0x3D50E35F73F7A018,
        0x3D630701CE63EAB9,
        0xBD3A6976F5EB0963,
        0x3D5D309C2CC91A85,
        0xBD6D0B1C68651946,
        0xBD609EC17A426426,
        0xBD3F4BD8DB0A7CC1,
        0x3D4394A11B1C1EE4,
        0x3D54AEC442BE1015,
        0xBD67FCB18ED9D603,
        0x3D67E1B259D2F3DA,
        0xBD6ED2A52C73BF78,
        0x3D56FABA4CDD147D,
        0x3D584BF2B68D766F,
        0x3D40931A909FEA5E,
        0x3D4EC5197DDB55D3,
        0x3D5B7BF7861D37AC,
        0x3D5A21AC25DB1EF3,
        0xBD542A9E21373414,
        0xBD6DAFA08CECADB1,
        0x3D3E1F8DF68DBCF3,
        0x3D3BB2CD720EC44C,
        0xBD49C24CA098362B,
        0x3D60FEC69C695D7F,
        0x3D6F404E57963891,
        0xBD657D49676844CC,
        0x3D592DFBC7D93617,
        0x3D65E9A98F33A396,
        0x3D52DD98B97BAEF0,
        0x3D1A07BD8B34BE7C,
        0xBD17AFA4392F1BA7,
        0xBD5DCA290F818480,
        0x3D5D1772F5386374,
        0x3D60BE1FB590A1F5,
        0xBD6E2CE9146D271A,
        0xBD65E6563BBD9FC9,
        0x3D66FAA404263D0B,
        0xBD5AA33736867A17,
        0x3D6EC27D0B7B37B3,
        0xBD244FDD840B8591,
        0x3D6BB09CB0985646,
        0x3D46ABB9DF22BC57,
        0xBD58CD7DC73BD194,
        0x3D6F2CFB29AAA5F0,
        0x3D66757006095FD2,
    ]
#define NPY_TANG_LOG_A1 0x1.55555555554e6p-4
// 定义常量 NPY_TANG_LOG_A1，表示浮点数 0x1.55555555554e6 乘以 2 的 -4 次方

#define NPY_TANG_LOG_A2 0x1.9999999bac6d4p-7
// 定义常量 NPY_TANG_LOG_A2，表示浮点数 0x1.9999999bac6d4 乘以 2 的 -7 次方

#define NPY_TANG_LOG_A3 0x1.2492307f1519fp-9
// 定义常量 NPY_TANG_LOG_A3，表示浮点数 0x1.2492307f1519f 乘以 2 的 -9 次方

#define NPY_TANG_LOG_A4 0x1.c8034c85dfffp-12
// 定义常量 NPY_TANG_LOG_A4，表示浮点数 0x1.c8034c85dfff 乘以 2 的 -12 次方

#define NPY_TANG_LOG_LN2HI 0x1.62e42fefa4p-1
// 定义常量 NPY_TANG_LOG_LN2HI，表示浮点数 0x1.62e42fefa4 乘以 2 的 -1 次方

#define NPY_TANG_LOG_LN2LO -0x1.8432a1b0e2634p-43
// 定义常量 NPY_TANG_LOG_LN2LO，表示浮点数 -0x1.8432a1b0e2634 乘以 2 的 -43 次方
#endif
#endif

#endif

`.\numpy\numpy\_core\src\umath\override.c`

/*
 * 定义 NPY_NO_DEPRECATED_API 为 NPY_API_VERSION，禁用过时 API
 * 定义 NO_IMPORT_ARRAY，禁止导入数组
 */
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
#define NO_IMPORT_ARRAY

#include "numpy/ndarraytypes.h"    // 导入 NumPy 的 ndarray 类型定义
#include "numpy/ufuncobject.h"     // 导入 NumPy 的 ufunc 对象定义
#include "npy_import.h"            // 导入 NumPy 的导入函数
#include "npy_static_data.h"       // 导入 NumPy 的静态数据
#include "multiarraymodule.h"      // 导入 NumPy 的多维数组模块
#include "npy_pycompat.h"          // 导入 NumPy 的 Python 兼容性函数
#include "override.h"              // 导入 NumPy 的覆盖功能
#include "ufunc_override.h"        // 导入 NumPy 的ufunc覆盖功能

/*
 * 对于每个位置参数和可能的 "out" 关键字参数，查找标准 ufunc 行为的覆盖，
 * 即非默认的 __array_ufunc__ 方法。
 *
 * 返回覆盖数量，设置 PyObject 数组中的相应对象到 ``with_override``，
 * 并将对应的 __array_ufunc__ 方法设置到 ``methods`` 中（都使用新引用）。
 *
 * 对于给定类别的第一个覆盖只返回一次。
 *
 * 失败时返回 -1。
 */
static int
get_array_ufunc_overrides(PyObject *in_args, PyObject *out_args, PyObject *wheremask_obj,
                          PyObject **with_override, PyObject **methods)
{
    int i;
    int num_override_args = 0;
    int narg, nout, nwhere;

    narg = (int)PyTuple_GET_SIZE(in_args);  // 获取输入参数元组的大小
    /* out_args 可以为 NULL： */
    nout = (out_args != NULL) ? (int)PyTuple_GET_SIZE(out_args) : 0;  // 获取输出参数元组的大小
    nwhere = (wheremask_obj != NULL) ? 1 : 0;  // 检查是否有 wheremask_obj

    for (i = 0; i < narg + nout + nwhere; ++i) {
        PyObject *obj;
        int j;
        int new_class = 1;

        if (i < narg) {
            obj = PyTuple_GET_ITEM(in_args, i);  // 获取输入参数元组中的对象
        }
        else if (i < narg + nout){
            obj = PyTuple_GET_ITEM(out_args, i - narg);  // 获取输出参数元组中的对象
        }
        else {
            obj = wheremask_obj;  // 获取 wheremask_obj 对象
        }
        /*
         * 是否之前见过这个类？如果是，则忽略。
         */
        for (j = 0; j < num_override_args; j++) {
            new_class = (Py_TYPE(obj) != Py_TYPE(with_override[j]));  // 检查是否为新类
            if (!new_class) {
                break;
            }
        }
        if (new_class) {
            /*
             * 现在查看对象是否提供 __array_ufunc__ 方法。但是，我们应该
             * 忽略基本的 ndarray.__ufunc__，因此我们跳过任何 ndarray 以及
             * 未覆盖 __array_ufunc__ 的任何 ndarray 子类实例。
             */
            PyObject *method = PyUFuncOverride_GetNonDefaultArrayUfunc(obj);  // 获取非默认的 __array_ufunc__ 方法
            if (method == NULL) {
                continue;
            }
            if (method == Py_None) {
                PyErr_Format(PyExc_TypeError,
                             "operand '%.200s' does not support ufuncs "
                             "(__array_ufunc__=None)",
                             obj->ob_type->tp_name);  // 报告错误，操作数不支持 ufuncs
                Py_DECREF(method);
                goto fail;
            }
            Py_INCREF(obj);
            with_override[num_override_args] = obj;  // 设置覆盖对象
            methods[num_override_args] = method;    // 设置方法
            ++num_override_args;
        }
    }
    return num_override_args;

fail:
    for (i = 0; i < num_override_args; i++) {
        Py_DECREF(with_override[i]);
        Py_DECREF(methods[i]);
    }
    # 返回整数 -1，通常用于表示函数执行失败或未找到期望的结果
    return -1;
/*
 * Build a dictionary from the keyword arguments, but replace out with the
 * normalized version (and always pass it even if it was passed by position).
 */
static int
initialize_normal_kwds(PyObject *out_args,
        PyObject *const *args, Py_ssize_t len_args, PyObject *kwnames,
        PyObject *normal_kwds)
{
    // 如果传入了关键字参数的名称元组 kwnames
    if (kwnames != NULL) {
        // 遍历 kwnames 中的每个元素
        for (Py_ssize_t i = 0; i < PyTuple_GET_SIZE(kwnames); i++) {
            // 将 args[i + len_args] 作为值，以 PyTuple_GET_ITEM(kwnames, i) 作为键，存入 normal_kwds 字典中
            if (PyDict_SetItem(normal_kwds,
                    PyTuple_GET_ITEM(kwnames, i), args[i + len_args]) < 0) {
                return -1;  // 如果设置失败，返回 -1
            }
        }
    }

    // 如果传入了 out_args
    if (out_args != NULL) {
        /* Replace `out` argument with the normalized version */
        // 将 npy_interned_str.out 作为键，out_args 作为值，存入 normal_kwds 字典中
        int res = PyDict_SetItem(normal_kwds, npy_interned_str.out, out_args);
        if (res < 0) {
            return -1;  // 如果设置失败，返回 -1
        }
    }
    else {
        /* Ensure that `out` is not present. */
        // 确保 normal_kwds 字典中没有名为 npy_interned_str.out 的键
        int res = PyDict_Contains(normal_kwds, npy_interned_str.out);
        if (res < 0) {
            return -1;  // 如果操作失败，返回 -1
        }
        if (res) {
            // 如果存在 npy_interned_str.out 这个键，则从 normal_kwds 字典中删除它
            return PyDict_DelItem(normal_kwds, npy_interned_str.out);
        }
    }
    return 0;  // 函数执行成功，返回 0
}

/*
 * ufunc() and ufunc.outer() accept 'sig' or 'signature'.  We guarantee
 * that it is passed as 'signature' by renaming 'sig' if present.
 * Note that we have already validated that only one of them was passed
 * before checking for overrides.
 */
static int
normalize_signature_keyword(PyObject *normal_kwds)
{
    /* If the keywords include `sig` rename to `signature`. */
    // 如果 normal_kwds 字典中包含键为 "sig"
    PyObject* obj = NULL;
    int result = PyDict_GetItemStringRef(normal_kwds, "sig", &obj);
    if (result == -1) {
        return -1;  // 如果操作失败，返回 -1
    }
    if (result == 1) {
        // 如果找到了键为 "sig" 的项
        if (PyDict_SetItemString(normal_kwds, "signature", obj) < 0) {
            Py_DECREF(obj);  // 设置失败时释放 obj 对象
            return -1;  // 返回 -1 表示失败
        }
        Py_DECREF(obj);  // 释放 obj 对象
        // 成功将 "sig" 改名为 "signature"，现在从 normal_kwds 中删除 "sig"
        if (PyDict_DelItemString(normal_kwds, "sig") < 0) {
            return -1;  // 删除失败，返回 -1
        }
    }
    return 0;  // 函数执行成功，返回 0
}

static int
copy_positional_args_to_kwargs(const char **keywords,
        PyObject *const *args, Py_ssize_t len_args,
        PyObject *normal_kwds)
{
    // 遍历传入的位置参数和对应的关键字数组
    for (Py_ssize_t i = 0; i < len_args; i++) {
        // 如果关键字为 NULL，说明是输入或输出且未在此处设置
        if (keywords[i] == NULL) {
            continue;  // 跳过此次循环
        }
        // 对于 reduce 函数来说，只有 5 个关键字参数是相关的
        if (NPY_UNLIKELY(i == 5)) {
            /*
             * This is only relevant for reduce, which is the only one with
             * 5 keyword arguments.
             */
            assert(strcmp(keywords[i], "initial") == 0);  // 确保关键字为 "initial"
            if (args[i] == npy_static_pydata._NoValue) {
                continue;  // 如果是特殊值 _NoValue，继续下一轮循环
            }
        }

        // 将 args[i] 作为值，keywords[i] 作为键，存入 normal_kwds 字典中
        int res = PyDict_SetItemString(normal_kwds, keywords[i], args[i]);
        if (res < 0) {
            return -1;  // 如果设置失败，返回 -1
        }
    }
    return 0;  // 函数执行成功，返回 0
}
/*
 * Check a set of args for the `__array_ufunc__` method.  If more than one of
 * the input arguments implements `__array_ufunc__`, they are tried in the
 * order: subclasses before superclasses, otherwise left to right. The first
 * (non-None) routine returning something other than `NotImplemented`
 * determines the result. If all of the `__array_ufunc__` operations return
 * `NotImplemented` (or are None), a `TypeError` is raised.
 *
 * Returns 0 on success and 1 on exception. On success, *result contains the
 * result of the operation, if any. If *result is NULL, there is no override.
 */
NPY_NO_EXPORT int
PyUFunc_CheckOverride(PyUFuncObject *ufunc, char *method,
        PyObject *in_args, PyObject *out_args, PyObject *wheremask_obj,
        PyObject *const *args, Py_ssize_t len_args, PyObject *kwnames,
        PyObject **result)
{
    int status;

    int num_override_args;
    PyObject *with_override[NPY_MAXARGS];
    PyObject *array_ufunc_methods[NPY_MAXARGS];

    PyObject *method_name = NULL;
    PyObject *normal_kwds = NULL;

    PyObject *override_args = NULL;

    /*
     * Check inputs for overrides
     */
    num_override_args = get_array_ufunc_overrides(
           in_args, out_args, wheremask_obj, with_override, array_ufunc_methods);
    if (num_override_args == -1) {
        goto fail;
    }
    /* No overrides, bail out.*/
    if (num_override_args == 0) {
        *result = NULL;
        return 0;
    }

    /*
     * Normalize ufunc arguments, note that any input and output arguments
     * have already been stored in `in_args` and `out_args`.
     */
    normal_kwds = PyDict_New();
    if (normal_kwds == NULL) {
        goto fail;
    }
    if (initialize_normal_kwds(out_args,
            args, len_args, kwnames, normal_kwds) < 0) {
        goto fail;
    }

    /*
     * Reduce-like methods can pass keyword arguments also by position,
     * in which case the additional positional arguments have to be copied
     * into the keyword argument dictionary. The `__call__` and `__outer__`
     * method have to normalize sig and signature.
     */

    /* ufunc.__call__ */
    if (strcmp(method, "__call__") == 0) {
        status = normalize_signature_keyword(normal_kwds);
    }
    /* ufunc.reduce */
    else if (strcmp(method, "reduce") == 0) {
        static const char *keywords[] = {
                NULL, "axis", "dtype", NULL, "keepdims",
                "initial", "where"};
        status = copy_positional_args_to_kwargs(keywords,
                args, len_args, normal_kwds);
    }
    /* ufunc.accumulate */
    else if (strcmp(method, "accumulate") == 0) {
        static const char *keywords[] = {
                NULL, "axis", "dtype", NULL};
        status = copy_positional_args_to_kwargs(keywords,
                args, len_args, normal_kwds);
    }
    /* ufunc.reduceat */


注释：
    # 如果方法名与 "reduceat" 相同，则执行以下操作
    else if (strcmp(method, "reduceat") == 0) {
        # 关键字参数列表，仅包含 "axis" 和 "dtype"，其余为 NULL
        static const char *keywords[] = {
                NULL, NULL, "axis", "dtype", NULL};
        # 将位置参数转换为关键字参数
        status = copy_positional_args_to_kwargs(keywords,
                args, len_args, normal_kwds);
    }
    /* ufunc.outer (与调用相同) */
    else if (strcmp(method, "outer") == 0) {
        # 标准化签名中的关键字参数
        status = normalize_signature_keyword(normal_kwds);
    }
    /* ufunc.at */
    else if (strcmp(method, "at") == 0) {
        # 状态设置为 0
        status = 0;
    }
    /* 未知的方法 */
    else {
        # 抛出类型错误异常，指明未知的 ufunc 方法
        PyErr_Format(PyExc_TypeError,
                     "Internal Numpy error: unknown ufunc method '%s' in call "
                     "to PyUFunc_CheckOverride", method);
        # 状态设置为 -1，表示出错
        status = -1;
    }
    # 如果状态不为 0，则跳转到失败处理部分
    if (status != 0) {
        goto fail;
    }

    # 从方法名字符串创建 PyUnicode 对象
    method_name = PyUnicode_FromString(method);
    # 如果创建失败，则跳转到失败处理部分
    if (method_name == NULL) {
        goto fail;
    }

    # 获取输入参数元组的长度
    int len = (int)PyTuple_GET_SIZE(in_args);

    /* 按正确的顺序调用 __array_ufunc__ 函数 */
    }
    # 状态设置为 0
    status = 0;
    /* 找到覆盖，返回它 */
    goto cleanup;
fail:
    # 设置状态为 -1，表示操作失败
    status = -1;
cleanup:
    # 清理操作开始，循环处理所有的覆盖参数
    for (int i = 0; i < num_override_args; i++) {
        // 释放 Python 对象的引用，防止内存泄漏
        Py_XDECREF(with_override[i]);
        // 释放 Python 对象的引用，防止内存泄漏
        Py_XDECREF(array_ufunc_methods[i]);
    }
    // 释放 Python 对象的引用，防止内存泄漏
    Py_XDECREF(method_name);
    // 释放 Python 对象的引用，防止内存泄漏
    Py_XDECREF(normal_kwds);
    // 返回函数执行的状态值
    return status;
}

`.\numpy\numpy\_core\src\umath\override.h`

#ifndef _NPY_UMATH_OVERRIDE_H
#define _NPY_UMATH_OVERRIDE_H

定义了 `_NPY_UMATH_OVERRIDE_H` 宏，用于防止头文件的多重包含。


#include "npy_config.h"
#include "numpy/ufuncobject.h"

包含了两个头文件：`npy_config.h` 和 `numpy/ufuncobject.h`，这些文件包含了所需的配置信息和函数声明。


NPY_NO_EXPORT int
PyUFunc_CheckOverride(PyUFuncObject *ufunc, char *method,
        PyObject *in_args, PyObject *out_args, PyObject *wheremask_obj,
        PyObject *const *args, Py_ssize_t len_args, PyObject *kwnames,
        PyObject **result);

声明了一个名为 `PyUFunc_CheckOverride` 的函数，其参数包括一个指向 `PyUFuncObject` 结构的指针 `ufunc`，以及一系列用于函数调用和返回结果的参数。


#endif

结束了条件编译指令，确保头文件内容在多次包含时不会重复定义。

`.\numpy\numpy\_core\src\umath\reduction.c`

/*
 * This file implements generic methods for computing reductions on arrays.
 *
 * Written by Mark Wiebe (mwwiebe@gmail.com)
 * Copyright (c) 2011 by Enthought, Inc.
 *
 * See LICENSE.txt for the license.
 */
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
#define _MULTIARRAYMODULE
#define _UMATHMODULE

#define PY_SSIZE_T_CLEAN
#include <Python.h>

#include "npy_config.h"
#include "numpy/arrayobject.h"


#include "array_assign.h"
#include "array_coercion.h"
#include "array_method.h"
#include "ctors.h"
#include "refcount.h"

#include "numpy/ufuncobject.h"
#include "lowlevel_strided_loops.h"
#include "reduction.h"
#include "extobj.h"  /* for _check_ufunc_fperr */


/*
 * Count the number of dimensions selected in 'axis_flags'
 */
static int
count_axes(int ndim, const npy_bool *axis_flags)
{
    int idim;
    int naxes = 0;

    for (idim = 0; idim < ndim; ++idim) {
        if (axis_flags[idim]) {
            naxes++;
        }
    }
    return naxes;
}

/*
 * This function initializes a result array for a reduction operation
 * which has no identity. This means it needs to copy the first element
 * it sees along the reduction axes to result.
 *
 * If a reduction has an identity, such as 0 or 1, the result should be
 * fully initialized to the identity, because this function raises an
 * exception when there are no elements to reduce (which is appropriate if,
 * and only if, the reduction operation has no identity).
 *
 * This means it copies the subarray indexed at zero along each reduction axis
 * into 'result'.
 *
 * result  : The array into which the result is computed. This must have
 *           the same number of dimensions as 'operand', but for each
 *           axis i where 'axis_flags[i]' is True, it has a single element.
 * operand : The array being reduced.
 * axis_flags : An array of boolean flags, one for each axis of 'operand'.
 *              When a flag is True, it indicates to reduce along that axis.
 * funcname : The name of the reduction operation, for the purpose of
 *            better quality error messages. For example, "numpy.max"
 *            would be a good name for NumPy's max function.
 *
 * Returns -1 if an error occurred, and otherwise the reduce arrays size,
 * which is the number of elements already initialized.
 */
static npy_intp
PyArray_CopyInitialReduceValues(
                    PyArrayObject *result, PyArrayObject *operand,
                    const npy_bool *axis_flags, const char *funcname,
                    int keepdims)
{
    npy_intp shape[NPY_MAXDIMS], strides[NPY_MAXDIMS];
    npy_intp *shape_orig = PyArray_SHAPE(operand);
    npy_intp *strides_orig = PyArray_STRIDES(operand);
    PyArrayObject *op_view = NULL;

    int ndim = PyArray_NDIM(operand);

    /*
     * Copy the subarray of the first element along each reduction axis.
     *
     * Adjust the shape to only look at the first element along
     * any of the reduction axes. If keepdims is False remove the axes
     * entirely.
     */
    # 初始化输出维度计数器为0
    int idim_out = 0;
    # 初始化总大小为1
    npy_intp size = 1;
    # 遍历输入数组的每一个维度
    for (int idim = 0; idim < ndim; idim++) {
        # 检查当前维度是否为归约轴
        if (axis_flags[idim]) {
            # 如果归约轴上的原始形状为0，则抛出异常并返回-1
            if (NPY_UNLIKELY(shape_orig[idim] == 0)) {
                PyErr_Format(PyExc_ValueError,
                        "zero-size array to reduction operation %s "
                        "which has no identity", funcname);
                return -1;
            }
            # 如果需要保持归约后的维度，则设置当前输出维度为1，并且步长为0
            if (keepdims) {
                shape[idim_out] = 1;
                strides[idim_out] = 0;
                idim_out++;
            }
        }
        else {
            # 如果当前维度不是归约轴，则计算总大小，并将当前形状和步长复制到输出数组中
            size *= shape_orig[idim];
            shape[idim_out] = shape_orig[idim];
            strides[idim_out] = strides_orig[idim];
            idim_out++;
        }
    }

    # 获取操作数的描述符
    PyArray_Descr *descr = PyArray_DESCR(operand);
    # 增加描述符的引用计数
    Py_INCREF(descr);
    # 根据描述符创建新的数组视图
    op_view = (PyArrayObject *)PyArray_NewFromDescr(
            &PyArray_Type, descr, idim_out, shape, strides,
            PyArray_DATA(operand), 0, NULL);
    # 如果创建数组视图失败，则返回-1
    if (op_view == NULL) {
        return -1;
    }

    /*
     * 将元素复制到结果数组中以便开始操作。
     */
    # 将操作视图中的元素复制到结果数组中
    int res = PyArray_CopyInto(result, op_view);
    # 减少操作视图的引用计数
    Py_DECREF(op_view);
    # 如果复制操作失败，则返回-1
    if (res < 0) {
        return -1;
    }

    /*
     * 如果没有归约轴，则已经完成。
     * 注意，如果只有一个归约轴，原则上可以在设置迭代器之前通过移除该轴来更有效地设置迭代（简化迭代，因为`skip_first_count`（返回的大小）可以设置为0）。
     */
    # 返回总的元素个数作为归约操作的结果大小
    return size;
/*
 * This function executes all the standard NumPy reduction function
 * boilerplate code, just calling the appropriate inner loop function where
 * necessary.
 *
 * context     : The ArrayMethod context (with ufunc, method, and descriptors).
 * operand     : The array to be reduced.
 * out         : NULL, or the array into which to place the result.
 * wheremask   : Reduction mask of valid values used for `where=`.
 * axis_flags  : Flags indicating the reduction axes of 'operand'.
 * keepdims    : If true, leaves the reduction dimensions in the result
 *               with size one.
 * subok       : If true, the result uses the subclass of operand, otherwise
 *               it is always a base class ndarray.
 * initial     : Initial value, if NULL the default is fetched from the
 *               ArrayMethod (typically as the default from the ufunc).
 * loop        : `reduce_loop` from `ufunc_object.c`.  TODO: Refactor
 * buffersize  : Buffer size for the iterator. For the default, pass in 0.
 * funcname    : The name of the reduction function, for error messages.
 * errormask   : forwarded from _get_bufsize_errmask
 *
 * TODO FIXME: if you squint, this is essentially an second independent
 * implementation of generalized ufuncs with signature (i)->(), plus a few
 * extra bells and whistles. (Indeed, as far as I can tell, it was originally
 * split out to support a fancy version of count_nonzero... which is not
 * actually a reduction function at all, it's just a (i)->() function!) So
 * probably these two implementation should be merged into one. (In fact it
 * would be quite nice to support axis= and keepdims etc. for arbitrary
 * generalized ufuncs!)
 */

NPY_NO_EXPORT PyArrayObject *
PyUFunc_ReduceWrapper(PyArrayMethod_Context *context,
        PyArrayObject *operand, PyArrayObject *out, PyArrayObject *wheremask,
        npy_bool *axis_flags, int keepdims,
        PyObject *initial, PyArray_ReduceLoopFunc *loop,
        npy_intp buffersize, const char *funcname, int errormask)
{
    // Ensure the loop function is not NULL
    assert(loop != NULL);

    // Initialize the result array object
    PyArrayObject *result = NULL;

    // Number of initial elements to skip in reduction
    npy_intp skip_first_count = 0;

    /* Iterator parameters */
    NpyIter *iter = NULL;           // Numpy iterator
    PyArrayObject *op[3];           // Array operands for the iterator
    PyArray_Descr *op_dtypes[3];    // Data types of the operands
    npy_uint32 it_flags, op_flags[3];   // Iterator and operand flags

    /* Loop auxdata (must be freed on error) */
    NpyAuxData *auxdata = NULL;     // Auxiliary data used during iteration

    /* Set up the iterator */
    op[0] = out;                    // Output array
    op[1] = operand;                // Input array
    op_dtypes[0] = context->descriptors[0];  // Data type of output
    op_dtypes[1] = context->descriptors[1];  // Data type of input

    /* Buffer to use when we need an initial value */
    char *initial_buf = NULL;       // Buffer for initial value storage

    /* More than one axis means multiple orders are possible */
    if (!(context->method->flags & NPY_METH_IS_REORDERABLE)
            && count_axes(PyArray_NDIM(operand), axis_flags) > 1) {
        // Error if the reduction operation is not reorderable and more than one axis is specified
        PyErr_Format(PyExc_ValueError,
                "reduction operation '%s' is not reorderable, "
                "so at most one axis may be specified",
                funcname);
        goto fail;  // Jump to fail label in case of error
    }
    // 定义迭代器的标志位，指定缓冲、外部循环、内增长、接受零尺寸、引用允许、延迟缓冲分配、重叠时复制
    it_flags = NPY_ITER_BUFFERED |
            NPY_ITER_EXTERNAL_LOOP |
            NPY_ITER_GROWINNER |
            NPY_ITER_ZEROSIZE_OK |
            NPY_ITER_REFS_OK |
            NPY_ITER_DELAY_BUFALLOC |
            NPY_ITER_COPY_IF_OVERLAP;
    
    // 如果方法不可重新排序，则设置不反转步幅标志位
    if (!(context->method->flags & NPY_METH_IS_REORDERABLE)) {
        it_flags |= NPY_ITER_DONT_NEGATE_STRIDES;
    }
    
    // 设置第一个操作数的标志位，读写、对齐、分配、无子类型
    op_flags[0] = NPY_ITER_READWRITE |
                  NPY_ITER_ALIGNED |
                  NPY_ITER_ALLOCATE |
                  NPY_ITER_NO_SUBTYPE;
    
    // 设置第二个操作数的标志位，只读、对齐、无广播
    op_flags[1] = NPY_ITER_READONLY |
                  NPY_ITER_ALIGNED |
                  NPY_ITER_NO_BROADCAST;

    // 如果存在 where 掩码
    if (wheremask != NULL) {
        // 设置第三个操作数为 where 掩码
        op[2] = wheremask;
        /* wheremask 被保证为 NPY_BOOL 类型，因此借用其引用 */
        op_dtypes[2] = PyArray_DESCR(wheremask);
        assert(op_dtypes[2]->type_num == NPY_BOOL);
        if (op_dtypes[2] == NULL) {
            goto fail;
        }
        // 设置第三个操作数的标志位为只读
        op_flags[2] = NPY_ITER_READONLY;
    }
    
    // 设置结果数组的轴映射，默认使用操作数和 where 掩码的默认轴
    int result_axes[NPY_MAXDIMS];
    int *op_axes[3] = {result_axes, NULL, NULL};

    // 当前轴索引
    int curr_axis = 0;
    // 遍历操作数的维度
    for (int i = 0; i < PyArray_NDIM(operand); i++) {
        // 如果轴标志存在
        if (axis_flags[i]) {
            // 如果保持维度
            if (keepdims) {
                result_axes[i] = NPY_ITER_REDUCTION_AXIS(curr_axis);
                curr_axis++;
            }
            else {
                result_axes[i] = NPY_ITER_REDUCTION_AXIS(-1);
            }
        }
        else {
            result_axes[i] = curr_axis;
            curr_axis++;
        }
    }
    
    // 如果输出数组存在
    if (out != NULL) {
        /* NpyIter 在这种常见情况下不会提供良好的错误消息。 */
        // 检查输出数组的维度是否匹配当前轴数量
        if (NPY_UNLIKELY(curr_axis != PyArray_NDIM(out))) {
            if (keepdims) {
                PyErr_Format(PyExc_ValueError,
                        "output parameter for reduction operation %s has the "
                        "wrong number of dimensions: Found %d but expected %d "
                        "(must match the operand's when keepdims=True)",
                        funcname, PyArray_NDIM(out), curr_axis);
            }
            else {
                PyErr_Format(PyExc_ValueError,
                        "output parameter for reduction operation %s has the "
                        "wrong number of dimensions: Found %d but expected %d",
                        funcname, PyArray_NDIM(out), curr_axis);
            }
            goto fail;
        }
    }
    
    // 使用 NpyIter_AdvancedNew 创建高级迭代器
    iter = NpyIter_AdvancedNew(wheremask == NULL ? 2 : 3, op, it_flags,
                               NPY_KEEPORDER, NPY_UNSAFE_CASTING,
                               op_flags,
                               op_dtypes,
                               PyArray_NDIM(operand), op_axes, NULL, buffersize);
    if (iter == NULL) {
        goto fail;
    }

    // 检查迭代器是否为空迭代
    npy_bool empty_iteration = NpyIter_GetIterSize(iter) == 0;
    // 获取迭代器的第一个操作数数组作为结果
    result = NpyIter_GetOperandArray(iter)[0];
    /*
     * Get the initial value (if it exists).  If the iteration is empty
     * then we assume the reduction is also empty.  The reason is that when
     * the outer iteration is empty we just won't use the initial value
     * in any case.  (`np.sum(np.zeros((0, 3)), axis=0)` is a length 3
     * reduction but has an empty result.)
     */
    // 检查是否存在初始值。如果迭代为空，则假设归约也为空。
    // 当外部迭代为空时，无论如何都不会使用初始值。
    // (`np.sum(np.zeros((0, 3)), axis=0)` 是一个长度为 3 的归约，但结果为空。
    if ((initial == NULL && context->method->get_reduction_initial == NULL)
            || initial == Py_None) {
        /* There is no initial value, or initial value was explicitly unset */
        // 没有初始值，或者初始值被明确地取消设置
    }
    else {
        /* Not all functions will need initialization, but init always: */
        // 并非所有函数都需要初始化，但初始化始终需要：
        // 分配初始缓冲区
        initial_buf = PyMem_Calloc(1, op_dtypes[0]->elsize);
        if (initial_buf == NULL) {
            PyErr_NoMemory();  // 分配内存失败，抛出内存错误
            goto fail;
        }
        if (initial != NULL) {
            /* must use user provided initial value */
            // 必须使用用户提供的初始值
            if (PyArray_Pack(op_dtypes[0], initial_buf, initial) < 0) {
                goto fail;
            }
        }
        else {
            /*
             * Fetch initial from ArrayMethod, we pretend the reduction is
             * empty when the iteration is.  This may be wrong, but when it is,
             * we will not need the identity as the result is also empty.
             */
            // 从 ArrayMethod 获取初始值，当迭代为空时，我们假装归约也为空。
            // 这可能是错误的，但当这种情况发生时，由于结果也为空，我们不需要标识。
            int has_initial = context->method->get_reduction_initial(
                    context, empty_iteration, initial_buf);
            if (has_initial < 0) {
                goto fail;
            }
            if (!has_initial) {
                /* We have no initial value available, free buffer to indicate */
                // 没有可用的初始值，释放缓冲区以指示
                PyMem_FREE(initial_buf);
                initial_buf = NULL;
            }
        }
    }

    PyArrayMethod_StridedLoop *strided_loop;
    NPY_ARRAYMETHOD_FLAGS flags = 0;

    int needs_api = (flags & NPY_METH_REQUIRES_PYAPI) != 0;
    needs_api |= NpyIter_IterationNeedsAPI(iter);
    if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
        /* Start with the floating-point exception flags cleared */
        // 从清除浮点异常标志开始
        npy_clear_floatstatus_barrier((char*)&iter);
    }

    /*
     * Initialize the result to the reduction unit if possible,
     * otherwise copy the initial values and get a view to the rest.
     */
    // 如果可能的话，将结果初始化为归约单元，否则复制初始值并获取其余部分的视图。
    if (initial_buf != NULL) {
        /* Loop provided an identity or default value, assign to result. */
        // 循环提供了标识或默认值，将其分配给结果。
        int ret = raw_array_assign_scalar(
                PyArray_NDIM(result), PyArray_DIMS(result),
                PyArray_DESCR(result),
                PyArray_BYTES(result), PyArray_STRIDES(result),
                op_dtypes[0], initial_buf);
        if (ret < 0) {
            goto fail;
        }
    }
    else {
        /* 只能在有初始值（来自标识或参数）的情况下使用 */
        if (wheremask != NULL) {
            PyErr_Format(PyExc_ValueError,
                    "reduction operation '%s' does not have an identity, "
                    "so to use a where mask one has to specify 'initial'",
                    funcname);
            goto fail;
        }

        /*
         * 对于一维数组，skip_first_count 可以优化为 0，但无初始值的约简操作并不常见。
         * （见 CopyInitialReduceValues 中的注释）
         */
        skip_first_count = PyArray_CopyInitialReduceValues(
                result, operand, axis_flags, funcname, keepdims);
        if (skip_first_count < 0) {
            goto fail;
        }
    }

    if (!NpyIter_Reset(iter, NULL)) {
        goto fail;
    }

    /*
     * 需要确保在获取固定步长之前重置迭代器。（在此之前缓冲区信息是未初始化的。）
     */
    npy_intp fixed_strides[3];
    NpyIter_GetInnerFixedStrideArray(iter, fixed_strides);
    if (wheremask != NULL) {
        if (PyArrayMethod_GetMaskedStridedLoop(context,
                1, fixed_strides, &strided_loop, &auxdata, &flags) < 0) {
            goto fail;
        }
    }
    else {
        if (context->method->get_strided_loop(context,
                1, 0, fixed_strides, &strided_loop, &auxdata, &flags) < 0) {
            goto fail;
        }
    }

    if (!empty_iteration) {
        NpyIter_IterNextFunc *iternext;
        char **dataptr;
        npy_intp *strideptr;
        npy_intp *countptr;

        iternext = NpyIter_GetIterNext(iter, NULL);
        if (iternext == NULL) {
            goto fail;
        }
        dataptr = NpyIter_GetDataPtrArray(iter);
        strideptr = NpyIter_GetInnerStrideArray(iter);
        countptr = NpyIter_GetInnerLoopSizePtr(iter);

        if (loop(context, strided_loop, auxdata,
                iter, dataptr, strideptr, countptr, iternext,
                needs_api, skip_first_count) < 0) {
            goto fail;
        }
    }

    if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
        /* 注意：即使在错误情况下，我们也可以检查浮点错误 */
        if (_check_ufunc_fperr(errormask, "reduce") < 0) {
            goto fail;
        }
    }

    if (out != NULL) {
        result = out;
    }
    Py_INCREF(result);

    if (initial_buf != NULL && PyDataType_REFCHK(PyArray_DESCR(result))) {
        PyArray_ClearBuffer(PyArray_DESCR(result), initial_buf, 0, 1, 1);
    }
    PyMem_FREE(initial_buf);
    NPY_AUXDATA_FREE(auxdata);
    if (!NpyIter_Deallocate(iter)) {
        Py_DECREF(result);
        return NULL;
    }
    return result;
fail:
    // 检查 initial_buf 是否非空，并且 op_dtypes[0] 是可引用的 PyDataType
    if (initial_buf != NULL && PyDataType_REFCHK(op_dtypes[0])) {
        // 清理 op_dtypes[0] 的缓冲区
        PyArray_ClearBuffer(op_dtypes[0], initial_buf, 0, 1, 1);
    }
    // 释放 initial_buf 占用的内存
    PyMem_FREE(initial_buf);
    // 释放所有辅助数据
    NPY_AUXDATA_FREE(auxdata);
    // 如果 iter 非空，释放迭代器资源
    if (iter != NULL) {
        NpyIter_Deallocate(iter);
    }

    // 返回 NULL，表示函数执行失败
    return NULL;
}

NumPy-源码解析-七十五-

NumPy 源码解析（七十五）

.\numpy\numpy\_core\src\npysort\x86_simd_argsort.dispatch.cpp

.\numpy\numpy\_core\src\npysort\x86_simd_qsort.dispatch.cpp

.\numpy\numpy\_core\src\npysort\x86_simd_qsort.hpp

.\numpy\numpy\_core\src\npysort\x86_simd_qsort_16bit.dispatch.cpp

.\numpy\numpy\_core\src\umath\clip.cpp

.\numpy\numpy\_core\src\umath\clip.h

.\numpy\numpy\_core\src\umath\dispatching.c

.\numpy\numpy\_core\src\umath\dispatching.h

.\numpy\numpy\_core\src\umath\extobj.c

.\numpy\numpy\_core\src\umath\extobj.h

.\numpy\numpy\_core\src\umath\fast_loop_macros.h

.\numpy\numpy\_core\src\umath\legacy_array_method.c

.\numpy\numpy\_core\src\umath\legacy_array_method.h

.\numpy\numpy\_core\src\umath\npy_simd_data.h

.\numpy\numpy\_core\src\umath\override.c

.\numpy\numpy\_core\src\umath\override.h

.\numpy\numpy\_core\src\umath\reduction.c

`.\numpy\numpy\_core\src\npysort\x86_simd_argsort.dispatch.cpp`

`.\numpy\numpy\_core\src\npysort\x86_simd_qsort.dispatch.cpp`

`.\numpy\numpy\_core\src\npysort\x86_simd_qsort.hpp`

`.\numpy\numpy\_core\src\npysort\x86_simd_qsort_16bit.dispatch.cpp`

`.\numpy\numpy\_core\src\umath\clip.cpp`

`.\numpy\numpy\_core\src\umath\clip.h`

`.\numpy\numpy\_core\src\umath\dispatching.c`

`.\numpy\numpy\_core\src\umath\dispatching.h`

`.\numpy\numpy\_core\src\umath\extobj.c`

`.\numpy\numpy\_core\src\umath\extobj.h`

`.\numpy\numpy\_core\src\umath\fast_loop_macros.h`

`.\numpy\numpy\_core\src\umath\legacy_array_method.c`

`.\numpy\numpy\_core\src\umath\legacy_array_method.h`

`.\numpy\numpy\_core\src\umath\npy_simd_data.h`

`.\numpy\numpy\_core\src\umath\override.c`

`.\numpy\numpy\_core\src\umath\override.h`

`.\numpy\numpy\_core\src\umath\reduction.c`