NumPy 源码解析（六十九）

`.\numpy\numpy\_core\src\multiarray\multiarraymodule.h`

#ifndef NUMPY_CORE_SRC_MULTIARRAY_MULTIARRAYMODULE_H_
#define NUMPY_CORE_SRC_MULTIARRAY_MULTIARRAYMODULE_H_

/*
 * A struct storing thread-unsafe global state for the _multiarray_umath
 * module. We should refactor so the global state is thread-safe,
 * e.g. by adding locking.
 */
// 定义一个结构体，用于存储 _multiarray_umath 模块的线程不安全的全局状态
typedef struct npy_thread_unsafe_state_struct {
    /*
     * Cached references to objects obtained via an import. All of these are
     * can be initialized at any time by npy_cache_import.
     *
     * Currently these are not initialized in a thread-safe manner but the
     * failure mode is a reference leak for references to imported immortal
     * modules so it will never lead to a crash unless users are doing something
     * janky that we don't support like reloading.
     *
     * TODO: maybe make each entry a struct that looks like:
     *
     *      struct {
     *          atomic_int initialized;
     *          PyObject *value;
     *      }
     *
     * so the initialization is thread-safe and the only possibile lock
     * contention happens before the cache is initialized, not on every single
     * read.
     */
    // 下面的字段是通过导入获取的对象的缓存引用，这些引用可以随时由 npy_cache_import 初始化
    // 当前这些字段的初始化方式不是线程安全的，但失败模式是对导入的不可销毁模块的引用泄漏，
    // 因此不会导致崩溃，除非用户在重新加载等不支持的情况下做了一些奇怪的事情。
    // TODO: 可以考虑将每个条目改造成像这样的结构体：
    //      struct {
    //          atomic_int initialized;
    //          PyObject *value;
    //      }
    // 这样初始化将是线程安全的，唯一可能的锁争用发生在缓存初始化之前，而不是在每次读取时。
    PyObject *_add_dtype_helper;
    PyObject *_all;
    PyObject *_amax;
    PyObject *_amin;
    PyObject *_any;
    PyObject *array_function_errmsg_formatter;
    PyObject *array_ufunc_errmsg_formatter;
    PyObject *_clip;
    PyObject *_commastring;
    PyObject *_convert_to_stringdtype_kwargs;
    PyObject *_default_array_repr;
    PyObject *_default_array_str;
    PyObject *_dump;
    PyObject *_dumps;
    PyObject *_getfield_is_safe;
    PyObject *internal_gcd_func;
    PyObject *_mean;
    PyObject *NO_NEP50_WARNING;
    PyObject *npy_ctypes_check;
    PyObject *numpy_matrix;
    PyObject *_prod;
    PyObject *_promote_fields;
    PyObject *_std;
    PyObject *_sum;
    PyObject *_ufunc_doc_signature_formatter;
    PyObject *_var;
    PyObject *_view_is_safe;
    PyObject *_void_scalar_to_string;

    /*
     * Used to test the internal-only scaled float test dtype
     */
    // 用于测试仅内部使用的缩放浮点测试数据类型
    npy_bool get_sfloat_dtype_initialized;

    /*
     * controls the global madvise hugepage setting
     */
    // 控制全局 madvise 巨页设置
    int madvise_hugepage;

    /*
     * used to detect module reloading in the reload guard
     */
    // 用于在重新加载保护中检测模块重新加载
    int reload_guard_initialized;

     /*
      * global variable to determine if legacy printing is enabled,
      * accessible from C. For simplicity the mode is encoded as an
      * integer where INT_MAX means no legacy mode, and '113'/'121'
      * means 1.13/1.21 legacy mode; and 0 maps to INT_MAX. We can
      * upgrade this if we have more complex requirements in the future.
      */
    // 全局变量，用于确定是否启用传统打印，可从 C 中访问。
    // 简单起见，模式被编码为一个整数，其中 INT_MAX 表示没有传统模式，
    // '113'/'121' 表示 1.13/1.21 传统模式；而 0 映射到 INT_MAX。
    // 如果将来有更复杂的要求，可以升级这个设置。
    int legacy_print_mode;

    /*
     * Holds the user-defined setting for whether or not to warn
     * if there is no memory policy set
     */
    // 存储用户定义的设置，用于确定是否在没有设置内存策略时发出警告
    int warn_if_no_mem_policy;

} npy_thread_unsafe_state_struct;

// 声明一个外部可见性为隐藏的 npy_thread_unsafe_state_struct 类型的变量
NPY_VISIBILITY_HIDDEN extern npy_thread_unsafe_state_struct npy_thread_unsafe_state;

#endif  /* NUMPY_CORE_SRC_MULTIARRAY_MULTIARRAYMODULE_H_ */

`.\numpy\numpy\_core\src\multiarray\nditer_api.c`

/*
 * This file implements most of the main API functions of NumPy's nditer.
 * This excludes functions specialized using the templating system.
 *
 * Copyright (c) 2010-2011 by Mark Wiebe (mwwiebe@gmail.com)
 * The University of British Columbia
 *
 * Copyright (c) 2011 Enthought, Inc
 *
 * See LICENSE.txt for the license.
 */
#define NPY_NO_DEPRECATED_API NPY_API_VERSION

/* Allow this .c file to include nditer_impl.h */
#define NPY_ITERATOR_IMPLEMENTATION_CODE

#include "nditer_impl.h"
#include "templ_common.h"
#include "ctors.h"
#include "refcount.h"

/* Internal helper functions private to this file */

/*
 * Check and adjust the size of the reduction buffers.
 * This function is used internally by the iterator.
 *
 * Parameters:
 * - iter: Pointer to the NpyIter struct representing the iterator.
 * - count: The current count of elements being iterated.
 * - reduce_innersize: Pointer to the size of the inner reduction buffer.
 * - reduce_outerdim: Pointer to the size of the outer reduction buffer.
 *
 * Returns:
 * - The adjusted size of the reduction buffers.
 */
static npy_intp
npyiter_checkreducesize(NpyIter *iter, npy_intp count,
                        npy_intp *reduce_innersize,
                        npy_intp *reduce_outerdim);

/*NUMPY_API
 * Removes an axis from iteration. This requires that NPY_ITER_MULTI_INDEX
 * was set for iterator creation, and does not work if buffering is
 * enabled. This function also resets the iterator to its initial state.
 *
 * Returns NPY_SUCCEED or NPY_FAIL.
 */
NPY_NO_EXPORT int
NpyIter_RemoveAxis(NpyIter *iter, int axis)
{
    npy_uint32 itflags = NIT_ITFLAGS(iter);  // Get the iterator flags
    int idim, ndim = NIT_NDIM(iter);         // Get the number of dimensions
    int iop, nop = NIT_NOP(iter);            // Get the number of operands

    int xdim = 0;                            // Initialize xdim variable
    npy_int8 *perm = NIT_PERM(iter);         // Get the permutation array
    NpyIter_AxisData *axisdata_del = NIT_AXISDATA(iter), *axisdata;  // Get axis data
    npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);  // Calculate size of axis data

    npy_intp *baseoffsets = NIT_BASEOFFSETS(iter);     // Get base offsets
    char **resetdataptr = NIT_RESETDATAPTR(iter);      // Get reset data pointers

    if (!(itflags & NPY_ITFLAG_HASMULTIINDEX)) {
        PyErr_SetString(PyExc_RuntimeError,
                "Iterator RemoveAxis may only be called "
                "if a multi-index is being tracked");
        return NPY_FAIL;
    }
    else if (itflags & NPY_ITFLAG_HASINDEX) {
        PyErr_SetString(PyExc_RuntimeError,
                "Iterator RemoveAxis may not be called on "
                "an index is being tracked");
        return NPY_FAIL;
    }
    else if (itflags & NPY_ITFLAG_BUFFER) {
        PyErr_SetString(PyExc_RuntimeError,
                "Iterator RemoveAxis may not be called on "
                "a buffered iterator");
        return NPY_FAIL;
    }
    else if (axis < 0 || axis >= ndim) {
        PyErr_SetString(PyExc_ValueError,
                "axis out of bounds in iterator RemoveAxis");
        return NPY_FAIL;
    }

    /* Reverse axis, since the iterator treats them that way */
    axis = ndim - 1 - axis;

    /* First find the axis in question */
    
    for (idim = 0; idim < ndim; ++idim) {
        /* 如果这是我们要找的轴，并且是正向迭代，则完成 */
        if (perm[idim] == axis) {
            xdim = idim;
            break;
        }
        /* 如果这是我们要找的轴，但是是反向迭代，则需要反转该轴 */
        else if (-1 - perm[idim] == axis) {
            npy_intp *strides = NAD_STRIDES(axisdata_del);
            npy_intp shape = NAD_SHAPE(axisdata_del), offset;

            xdim = idim;

            /*
             * 调整 baseoffsets 并将 resetbaseptr 重置回该轴的起始位置。
             */
            for (iop = 0; iop < nop; ++iop) {
                offset = (shape-1)*strides[iop];
                baseoffsets[iop] += offset;
                resetdataptr[iop] += offset;
            }
            break;
        }

        NIT_ADVANCE_AXISDATA(axisdata_del, 1);
    }

    if (idim == ndim) {
        PyErr_SetString(PyExc_RuntimeError,
                "iterator perm 中的内部错误");
        return NPY_FAIL;
    }

    /* 调整排列顺序 */
    for (idim = 0; idim < ndim-1; ++idim) {
        npy_int8 p = (idim < xdim) ? perm[idim] : perm[idim+1];
        if (p >= 0) {
            if (p > axis) {
                --p;
            }
        }
        else {
            if (p < -1-axis) {
                ++p;
            }
        }
        perm[idim] = p;
    }

    /* 将所有 axisdata 结构向前移动一位 */
    axisdata = NIT_INDEX_AXISDATA(axisdata_del, 1);
    memmove(axisdata_del, axisdata, (ndim-1-xdim)*sizeof_axisdata);

    /* 调整迭代器的大小并重置 iterend */
    NIT_ITERSIZE(iter) = 1;
    axisdata = NIT_AXISDATA(iter);
    for (idim = 0; idim < ndim-1; ++idim) {
        if (npy_mul_sizes_with_overflow(&NIT_ITERSIZE(iter),
                    NIT_ITERSIZE(iter), NAD_SHAPE(axisdata))) {
            NIT_ITERSIZE(iter) = -1;
            break;
        }
        NIT_ADVANCE_AXISDATA(axisdata, 1);
    }
    NIT_ITEREND(iter) = NIT_ITERSIZE(iter);

    /* 缩小迭代器 */
    NIT_NDIM(iter) = ndim - 1;
    /* 如果现在是 0 维，则填充单例维度 */
    if (ndim == 1) {
        npy_intp *strides = NAD_STRIDES(axisdata_del);
        NAD_SHAPE(axisdata_del) = 1;
        for (iop = 0; iop < nop; ++iop) {
            strides[iop] = 0;
        }
        NIT_ITFLAGS(iter) |= NPY_ITFLAG_ONEITERATION;
    }

    return NpyIter_Reset(iter, NULL);
/*NUMPY_API
 * Removes multi-index support from an iterator.
 *
 * Returns NPY_SUCCEED or NPY_FAIL.
 */
NPY_NO_EXPORT int
NpyIter_RemoveMultiIndex(NpyIter *iter)
{
    npy_uint32 itflags;

    /* Make sure the iterator is reset */
    // 调用 NpyIter_Reset 函数重置迭代器
    if (NpyIter_Reset(iter, NULL) != NPY_SUCCEED) {
        return NPY_FAIL;
    }

    itflags = NIT_ITFLAGS(iter);
    // 检查迭代器是否具有多重索引
    if (itflags & NPY_ITFLAG_HASMULTIINDEX) {
        // 检查迭代器大小是否小于零，如果是则设置异常并返回失败
        if (NIT_ITERSIZE(iter) < 0) {
            PyErr_SetString(PyExc_ValueError, "iterator is too large");
            return NPY_FAIL;
        }

        // 清除迭代器的多重索引标志位
        NIT_ITFLAGS(iter) = itflags & ~NPY_ITFLAG_HASMULTIINDEX;
        // 执行迭代器轴数据合并
        npyiter_coalesce_axes(iter);
    }

    return NPY_SUCCEED;
}

/*NUMPY_API
 * Removes the inner loop handling (so HasExternalLoop returns true)
 */
NPY_NO_EXPORT int
NpyIter_EnableExternalLoop(NpyIter *iter)
{
    npy_uint32 itflags = NIT_ITFLAGS(iter);
    /*int ndim = NIT_NDIM(iter);*/
    int nop = NIT_NOP(iter);

    /* Check conditions under which this can be done */
    // 检查是否可以启用外部循环
    if (itflags & (NPY_ITFLAG_HASINDEX | NPY_ITFLAG_HASMULTIINDEX)) {
        PyErr_SetString(PyExc_ValueError,
                        "Iterator flag EXTERNAL_LOOP cannot be used "
                        "if an index or multi-index is being tracked");
        return NPY_FAIL;
    }
    if ((itflags & (NPY_ITFLAG_BUFFER | NPY_ITFLAG_RANGE | NPY_ITFLAG_EXLOOP))
        == (NPY_ITFLAG_RANGE | NPY_ITFLAG_EXLOOP)) {
        PyErr_SetString(PyExc_ValueError,
                        "Iterator flag EXTERNAL_LOOP cannot be used "
                        "with ranged iteration unless buffering is also enabled");
        return NPY_FAIL;
    }
    /* Set the flag */
    // 设置迭代器的外部循环标志位
    if (!(itflags & NPY_ITFLAG_EXLOOP)) {
        itflags |= NPY_ITFLAG_EXLOOP;
        NIT_ITFLAGS(iter) = itflags;

        /*
         * Check whether we can apply the single iteration
         * optimization to the iternext function.
         */
        // 如果不使用缓冲区，检查是否可以应用单次迭代优化
        if (!(itflags & NPY_ITFLAG_BUFFER)) {
            NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
            if (NIT_ITERSIZE(iter) == NAD_SHAPE(axisdata)) {
                NIT_ITFLAGS(iter) |= NPY_ITFLAG_ONEITERATION;
            }
        }
    }

    /* Reset the iterator */
    // 重置迭代器到初始状态
    return NpyIter_Reset(iter, NULL);
}


static char *_reset_cast_error = (
        "Iterator reset failed due to a casting failure. "
        "This error is set as a Python error.");

/*NUMPY_API
 * Resets the iterator to its initial state
 *
 * The use of errmsg is discouraged, it cannot be guaranteed that the GIL
 * will not be grabbed on casting errors even when this is passed.
 *
 * If errmsg is non-NULL, it should point to a variable which will
 * receive the error message, and no Python exception will be set.
 * This is so that the function can be called from code not holding
 * the GIL. Note that cast errors may still lead to the GIL being
 * grabbed temporarily.
 */
NPY_NO_EXPORT int
NpyIter_Reset(NpyIter *iter, char **errmsg)
{
    npy_uint32 itflags = NIT_ITFLAGS(iter);
    /*int ndim = NIT_NDIM(iter);*/
    // ...
    # 根据迭代器获取操作的 NOP 值
    int nop = NIT_NOP(iter);

    # 如果迭代器标志包含 NPY_ITFLAG_BUFFER
    if (itflags&NPY_ITFLAG_BUFFER) {
        # 定义缓冲区数据结构指针
        NpyIter_BufferData *bufferdata;

        # 如果延迟了缓冲区分配，现在进行分配
        if (itflags&NPY_ITFLAG_DELAYBUF) {
            # 如果无法成功分配缓冲区，设置错误消息并返回失败
            if (!npyiter_allocate_buffers(iter, errmsg)) {
                if (errmsg != NULL) {
                    *errmsg = _reset_cast_error;
                }
                return NPY_FAIL;
            }
            # 清除延迟缓冲区标志
            NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_DELAYBUF;
        }
        else {
            /*
             * 如果迭代索引已经正确，不需要执行任何操作
             * （且之前未发生类型转换错误）。
             */
            # 获取缓冲区数据结构指针
            bufferdata = NIT_BUFFERDATA(iter);
            # 如果迭代索引等于迭代起始值，并且缓冲区迭代结束位置小于等于迭代结束位置，并且缓冲区大小大于零
            if (NIT_ITERINDEX(iter) == NIT_ITERSTART(iter) &&
                    NBF_BUFITEREND(bufferdata) <= NIT_ITEREND(iter) &&
                    NBF_SIZE(bufferdata) > 0) {
                return NPY_SUCCEED;
            }
            # 如果从缓冲区复制数据失败，设置错误消息并返回失败
            if (npyiter_copy_from_buffers(iter) < 0) {
                if (errmsg != NULL) {
                    *errmsg = _reset_cast_error;
                }
                return NPY_FAIL;
            }
        }
    }

    # 将迭代器移动到指定的迭代索引位置
    npyiter_goto_iterindex(iter, NIT_ITERSTART(iter));

    # 如果迭代器标志包含 NPY_ITFLAG_BUFFER
    if (itflags&NPY_ITFLAG_BUFFER) {
        # 准备下一个缓冲区并设置迭代结束位置和大小
        if (npyiter_copy_to_buffers(iter, NULL) < 0) {
            # 如果复制数据到缓冲区失败，设置错误消息并返回失败
            if (errmsg != NULL) {
                *errmsg = _reset_cast_error;
            }
            return NPY_FAIL;
        }
    }

    # 返回成功状态
    return NPY_SUCCEED;
/*NUMPY_API
 * Resets the iterator to its initial state, with new base data pointers.
 * This function requires great caution.
 *
 * If errmsg is non-NULL, it should point to a variable which will
 * receive the error message, and no Python exception will be set.
 * This is so that the function can be called from code not holding
 * the GIL. Note that cast errors may still lead to the GIL being
 * grabbed temporarily.
 */
NPY_NO_EXPORT int
NpyIter_ResetBasePointers(NpyIter *iter, char **baseptrs, char **errmsg)
{
    npy_uint32 itflags = NIT_ITFLAGS(iter);
    /* 获取迭代器的标志位 */
    /*int ndim = NIT_NDIM(iter);*/
    /* 获取迭代器的维度 */
    int iop, nop = NIT_NOP(iter);
    /* 初始化操作符数目，并获取迭代器的操作符数目 */

    char **resetdataptr = NIT_RESETDATAPTR(iter);
    /* 获取迭代器重置数据指针 */
    npy_intp *baseoffsets = NIT_BASEOFFSETS(iter);
    /* 获取迭代器的基本偏移量 */

    if (itflags&NPY_ITFLAG_BUFFER) {
        /* 如果设置了缓冲区标志 */
        /* 如果缓冲区分配被延迟，现在进行分配 */
        if (itflags&NPY_ITFLAG_DELAYBUF) {
            if (!npyiter_allocate_buffers(iter, errmsg)) {
                /* 如果缓冲区分配失败，则返回失败状态 */
                return NPY_FAIL;
            }
            NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_DELAYBUF;
        }
        else {
            if (npyiter_copy_from_buffers(iter) < 0) {
                /* 如果从缓冲区复制数据失败 */
                if (errmsg != NULL) {
                    *errmsg = _reset_cast_error;
                }
                return NPY_FAIL;
            }
        }
    }

    /* 更新重置数据指针 */
    for (iop = 0; iop < nop; ++iop) {
        resetdataptr[iop] = baseptrs[iop] + baseoffsets[iop];
    }

    /* 将迭代器移动到起始迭代位置 */
    npyiter_goto_iterindex(iter, NIT_ITERSTART(iter));

    if (itflags&NPY_ITFLAG_BUFFER) {
        /* 如果设置了缓冲区标志 */
        /* 准备下一组缓冲区并设置迭代结束/大小 */
        if (npyiter_copy_to_buffers(iter, NULL) < 0) {
            /* 如果复制数据到缓冲区失败 */
            if (errmsg != NULL) {
                *errmsg = _reset_cast_error;
            }
            return NPY_FAIL;
        }
    }

    /* 返回成功状态 */
    return NPY_SUCCEED;
}

/*NUMPY_API
 * Resets the iterator to a new iterator index range
 *
 * If errmsg is non-NULL, it should point to a variable which will
 * receive the error message, and no Python exception will be set.
 * This is so that the function can be called from code not holding
 * the GIL. Note that cast errors may still lead to the GIL being
 * grabbed temporarily.
 */
NPY_NO_EXPORT int
NpyIter_ResetToIterIndexRange(NpyIter *iter,
                              npy_intp istart, npy_intp iend, char **errmsg)
{
    npy_uint32 itflags = NIT_ITFLAGS(iter);
    /* 获取迭代器的标志位 */
    /*int ndim = NIT_NDIM(iter);*/
    /* 获取迭代器的维度 */
    /*int nop = NIT_NOP(iter);*/

    if (!(itflags&NPY_ITFLAG_RANGE)) {
        /* 如果没有请求迭代范围 */
        if (errmsg == NULL) {
            PyErr_SetString(PyExc_ValueError,
                    "Cannot call ResetToIterIndexRange on an iterator without "
                    "requesting ranged iteration support in the constructor");
        }
        else {
            *errmsg = "Cannot call ResetToIterIndexRange on an iterator "
                      "without requesting ranged iteration support in the "
                    "constructor";
        }
        return NPY_FAIL;
    }
    # 检查迭代器的起始位置和结束位置是否超出有效范围
    if (istart < 0 || iend > NIT_ITERSIZE(iter)) {
        # 如果迭代器的大小小于零，表示迭代器过大，抛出异常
        if (NIT_ITERSIZE(iter) < 0) {
            # 如果错误消息为 NULL，则设置异常字符串到 ValueError
            if (errmsg == NULL) {
                PyErr_SetString(PyExc_ValueError, "iterator is too large");
            }
            # 否则，设置错误消息字符串
            else {
                *errmsg = "iterator is too large";
            }
            # 返回失败标志
            return NPY_FAIL;
        }
        # 如果超出范围，根据错误消息设置异常字符串
        if (errmsg == NULL) {
            PyErr_Format(PyExc_ValueError,
                    "Out-of-bounds range [%" NPY_INTP_FMT ", %" NPY_INTP_FMT ") passed to "
                    "ResetToIterIndexRange", istart, iend);
        }
        # 否则，设置错误消息字符串
        else {
            *errmsg = "Out-of-bounds range passed to ResetToIterIndexRange";
        }
        # 返回失败标志
        return NPY_FAIL;
    }
    # 如果结束位置小于起始位置，表示范围无效，根据错误消息设置异常字符串
    else if (iend < istart) {
        if (errmsg == NULL) {
            PyErr_Format(PyExc_ValueError,
                    "Invalid range [%" NPY_INTP_FMT ", %" NPY_INTP_FMT ") passed to ResetToIterIndexRange",
                    istart, iend);
        }
        # 否则，设置错误消息字符串
        else {
            *errmsg = "Invalid range passed to ResetToIterIndexRange";
        }
        # 返回失败标志
        return NPY_FAIL;
    }

    # 将迭代器的起始位置和结束位置设置为给定值
    NIT_ITERSTART(iter) = istart;
    NIT_ITEREND(iter) = iend;

    # 调用 NpyIter_Reset 函数重置迭代器，并传入可能存在的错误消息
    return NpyIter_Reset(iter, errmsg);
/*NUMPY_API
 * Sets the iterator to the specified multi-index, which must have the
 * correct number of entries for 'ndim'.  It is only valid
 * when NPY_ITER_MULTI_INDEX was passed to the constructor.  This operation
 * fails if the multi-index is out of bounds.
 *
 * Returns NPY_SUCCEED on success, NPY_FAIL on failure.
 */
NPY_NO_EXPORT int
NpyIter_GotoMultiIndex(NpyIter *iter, npy_intp const *multi_index)
{
    npy_uint32 itflags = NIT_ITFLAGS(iter);   // 获取迭代器的标志位
    int idim, ndim = NIT_NDIM(iter);          // 获取迭代器的维度信息
    int nop = NIT_NOP(iter);                  // 获取迭代器的操作数信息

    npy_intp iterindex, factor;               // 声明迭代器索引和因子
    NpyIter_AxisData *axisdata;               // 声明轴数据指针
    npy_intp sizeof_axisdata;                 // 声明轴数据大小
    npy_int8 *perm;                           // 声明排列数组指针

    // 检查迭代器是否支持多索引模式
    if (!(itflags & NPY_ITFLAG_HASMULTIINDEX)) {
        PyErr_SetString(PyExc_ValueError,
                "Cannot call GotoMultiIndex on an iterator without "
                "requesting a multi-index in the constructor");
        return NPY_FAIL;
    }

    // 检查迭代器是否是缓冲模式
    if (itflags & NPY_ITFLAG_BUFFER) {
        PyErr_SetString(PyExc_ValueError,
                "Cannot call GotoMultiIndex on an iterator which "
                "is buffered");
        return NPY_FAIL;
    }

    // 检查迭代器是否有外部循环标志
    if (itflags & NPY_ITFLAG_EXLOOP) {
        PyErr_SetString(PyExc_ValueError,
                "Cannot call GotoMultiIndex on an iterator which "
                "has the flag EXTERNAL_LOOP");
        return NPY_FAIL;
    }

    perm = NIT_PERM(iter);                    // 获取迭代器的排列数组
    axisdata = NIT_AXISDATA(iter);            // 获取迭代器的轴数据
    sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);  // 获取轴数据的大小

    /* Compute the iterindex corresponding to the multi-index */
    iterindex = 0;                           // 初始化迭代器索引为0
    factor = 1;                              // 初始化因子为1
    for (idim = 0; idim < ndim; ++idim) {    // 遍历迭代器的维度
        npy_int8 p = perm[idim];             // 获取当前维度的排列信息
        npy_intp i, shape;                   // 声明当前维度的索引和形状

        shape = NAD_SHAPE(axisdata);         // 获取当前轴数据的形状
        if (p < 0) {
            /* If the perm entry is negative, reverse the index */
            // 如果排列条目为负数，则反转索引
            i = shape - multi_index[ndim + p] - 1;
        } else {
            i = multi_index[ndim - p - 1];
        }

        /* Bounds-check this index */
        // 检查索引是否在合法范围内
        if (i >= 0 && i < shape) {
            iterindex += factor * i;         // 更新迭代器索引
            factor *= shape;                 // 更新因子
        } else {
            PyErr_SetString(PyExc_IndexError,
                    "Iterator GotoMultiIndex called with an out-of-bounds "
                    "multi-index");
            return NPY_FAIL;
        }

        NIT_ADVANCE_AXISDATA(axisdata, 1);   // 推进轴数据指针
    }

    // 检查迭代器索引是否在有效范围内
    if (iterindex < NIT_ITERSTART(iter) || iterindex >= NIT_ITEREND(iter)) {
        if (NIT_ITERSIZE(iter) < 0) {
            PyErr_SetString(PyExc_ValueError, "iterator is too large");
            return NPY_FAIL;
        }
        PyErr_SetString(PyExc_IndexError,
                "Iterator GotoMultiIndex called with a multi-index outside the "
                "restricted iteration range");
        return NPY_FAIL;
    }

    npyiter_goto_iterindex(iter, iterindex);  // 调用具体的迭代器索引跳转函数

    return NPY_SUCCEED;                      // 返回成功状态
}
NPY_NO_EXPORT int
NpyIter_GotoIndex(NpyIter *iter, npy_intp flat_index)
{
    npy_uint32 itflags = NIT_ITFLAGS(iter);
    int idim, ndim = NIT_NDIM(iter);
    int nop = NIT_NOP(iter);

    npy_intp iterindex, factor;
    NpyIter_AxisData *axisdata;
    npy_intp sizeof_axisdata;

    // 检查是否在构造函数中请求了 C 或 Fortran 索引
    if (!(itflags&NPY_ITFLAG_HASINDEX)) {
        PyErr_SetString(PyExc_ValueError,
                "Cannot call GotoIndex on an iterator without "
                "requesting a C or Fortran index in the constructor");
        return NPY_FAIL;
    }

    // 检查迭代器是否是缓冲的
    if (itflags&NPY_ITFLAG_BUFFER) {
        PyErr_SetString(PyExc_ValueError,
                "Cannot call GotoIndex on an iterator which "
                "is buffered");
        return NPY_FAIL;
    }

    // 检查迭代器是否带有 EXTERNAL_LOOP 标志
    if (itflags&NPY_ITFLAG_EXLOOP) {
        PyErr_SetString(PyExc_ValueError,
                "Cannot call GotoIndex on an iterator which "
                "has the flag EXTERNAL_LOOP");
        return NPY_FAIL;
    }

    // 检查 flat_index 是否在有效范围内
    if (flat_index < 0 || flat_index >= NIT_ITERSIZE(iter)) {
        PyErr_SetString(PyExc_IndexError,
                "Iterator GotoIndex called with an out-of-bounds "
                "index");
        return NPY_FAIL;
    }

    axisdata = NIT_AXISDATA(iter);
    sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);

    /* 计算对应于 flat_index 的 iterindex */
    iterindex = 0;
    factor = 1;
    for (idim = 0; idim < ndim; ++idim) {
        npy_intp i, shape, iterstride;

        iterstride = NAD_STRIDES(axisdata)[nop];
        shape = NAD_SHAPE(axisdata);

        /* 从 flat_index 中提取索引 */
        if (iterstride == 0) {
            i = 0;
        }
        else if (iterstride < 0) {
            i = shape - (flat_index/(-iterstride))%shape - 1;
        }
        else {
            i = (flat_index/iterstride)%shape;
        }

        /* 将其对 iterindex 的贡献加到 iterindex 中 */
        iterindex += factor * i;
        factor *= shape;

        NIT_ADVANCE_AXISDATA(axisdata, 1);
    }


    // 检查 iterindex 是否在受限迭代范围之外
    if (iterindex < NIT_ITERSTART(iter) || iterindex >= NIT_ITEREND(iter)) {
        PyErr_SetString(PyExc_IndexError,
                "Iterator GotoIndex called with an index outside the "
                "restricted iteration range.");
        return NPY_FAIL;
    }

    // 跳转到 iterindex 处的迭代位置
    npyiter_goto_iterindex(iter, iterindex);

    return NPY_SUCCEED;
}

/*NUMPY_API
 * Sets the iterator position to the specified iterindex,
 * which matches the iteration order of the iterator.
 *
 * Returns NPY_SUCCEED on success, NPY_FAIL on failure.
 */
NPY_NO_EXPORT int
NpyIter_GotoIterIndex(NpyIter *iter, npy_intp iterindex)
{
    npy_uint32 itflags = NIT_ITFLAGS(iter);
    /*int ndim = NIT_NDIM(iter);*/
    int iop, nop = NIT_NOP(iter);

    // 检查迭代器是否带有 EXTERNAL_LOOP 标志
    if (itflags&NPY_ITFLAG_EXLOOP) {
        PyErr_SetString(PyExc_ValueError,
                "Cannot call GotoIterIndex on an iterator which "
                "has the flag EXTERNAL_LOOP");
        return NPY_FAIL;
    }
    # 检查 iterindex 是否在指定的迭代范围之外
    if (iterindex < NIT_ITERSTART(iter) || iterindex >= NIT_ITEREND(iter)) {
        # 如果迭代器大小为负数，抛出值错误异常
        if (NIT_ITERSIZE(iter) < 0) {
            PyErr_SetString(PyExc_ValueError, "iterator is too large");
            return NPY_FAIL;
        }
        # 抛出索引错误异常，说明 iterindex 超出迭代范围
        PyErr_SetString(PyExc_IndexError,
                "Iterator GotoIterIndex called with an iterindex outside the "
                "iteration range.");
        return NPY_FAIL;
    }

    # 如果设置了 NPY_ITFLAG_BUFFER 标志
    if (itflags&NPY_ITFLAG_BUFFER) {
        # 获取缓冲区数据结构
        NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
        npy_intp bufiterend, size;

        # 获取缓冲区大小和缓冲区迭代结束位置
        size = NBF_SIZE(bufferdata);
        bufiterend = NBF_BUFITEREND(bufferdata);
        
        /* Check if the new iterindex is already within the buffer */
        # 检查新的 iterindex 是否已经在缓冲区内
        if (!(itflags&NPY_ITFLAG_REDUCE) && iterindex < bufiterend &&
                                        iterindex >= bufiterend - size) {
            npy_intp *strides, delta;
            char **ptrs;

            # 获取缓冲区的步幅和指针数组
            strides = NBF_STRIDES(bufferdata);
            ptrs = NBF_PTRS(bufferdata);
            # 计算 iterindex 和当前迭代器的差值，调整指针数组的位置
            delta = iterindex - NIT_ITERINDEX(iter);

            for (iop = 0; iop < nop; ++iop) {
                # 根据步幅调整指针数组的位置
                ptrs[iop] += delta * strides[iop];
            }

            # 更新迭代器的当前索引为 iterindex
            NIT_ITERINDEX(iter) = iterindex;
        }
        /* Start the buffer at the provided iterindex */
        else {
            /* Write back to the arrays */
            # 将缓冲区的数据写回到数组中
            if (npyiter_copy_from_buffers(iter) < 0) {
                return NPY_FAIL;
            }

            # 将迭代器的当前索引移动到 iterindex
            npyiter_goto_iterindex(iter, iterindex);

            /* Prepare the next buffers and set iterend/size */
            # 准备下一轮的缓冲区数据，并设置迭代器的结束位置和大小
            if (npyiter_copy_to_buffers(iter, NULL) < 0) {
                return NPY_FAIL;
            }
        }
    }
    else {
        # 如果没有设置 NPY_ITFLAG_BUFFER 标志，直接将迭代器移动到 iterindex
        npyiter_goto_iterindex(iter, iterindex);
    }

    # 执行成功，返回成功标志
    return NPY_SUCCEED;
/*NUMPY_API
 * 获取当前迭代的索引
 */
NPY_NO_EXPORT npy_intp
NpyIter_GetIterIndex(NpyIter *iter)
{
    npy_uint32 itflags = NIT_ITFLAGS(iter);  // 获取迭代器的标志位
    int idim, ndim = NIT_NDIM(iter);  // 获取迭代器的维度数和操作数的数量
    int nop = NIT_NOP(iter);  // 获取迭代器的操作数数量

    /* 只有在设置了 NPY_ITER_RANGED 或 NPY_ITER_BUFFERED 标志时才使用 iterindex */
    if (itflags & (NPY_ITFLAG_RANGE | NPY_ITFLAG_BUFFER)) {
        return NIT_ITERINDEX(iter);  // 返回迭代器的当前索引
    }
    else {
        npy_intp iterindex;
        NpyIter_AxisData *axisdata;
        npy_intp sizeof_axisdata;

        iterindex = 0;
        if (ndim == 0) {
            return 0;  // 如果维度数为0，直接返回0
        }
        sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);  // 计算轴数据结构的大小
        axisdata = NIT_INDEX_AXISDATA(NIT_AXISDATA(iter), ndim - 1);  // 获取最后一个轴数据

        for (idim = ndim - 2; idim >= 0; --idim) {
            iterindex += NAD_INDEX(axisdata);  // 累加当前轴的索引值
            NIT_ADVANCE_AXISDATA(axisdata, -1);  // 将轴数据向前移动一个元素
            iterindex *= NAD_SHAPE(axisdata);  // 乘以当前轴的形状
        }
        iterindex += NAD_INDEX(axisdata);  // 加上最后一个轴的索引

        return iterindex;  // 返回计算得到的迭代索引
    }
}

/*NUMPY_API
 * 检查缓冲区分配是否被延迟
 */
NPY_NO_EXPORT npy_bool
NpyIter_HasDelayedBufAlloc(NpyIter *iter)
{
    return (NIT_ITFLAGS(iter) & NPY_ITFLAG_DELAYBUF) != 0;  // 检查迭代器是否设置了延迟缓冲区分配标志
}

/*NUMPY_API
 * 检查迭代器是否处理内部循环
 */
NPY_NO_EXPORT npy_bool
NpyIter_HasExternalLoop(NpyIter *iter)
{
    return (NIT_ITFLAGS(iter) & NPY_ITFLAG_EXLOOP) != 0;  // 检查迭代器是否设置了外部循环处理标志
}

/*NUMPY_API
 * 检查迭代器是否跟踪多重索引
 */
NPY_NO_EXPORT npy_bool
NpyIter_HasMultiIndex(NpyIter *iter)
{
    return (NIT_ITFLAGS(iter) & NPY_ITFLAG_HASMULTIINDEX) != 0;  // 检查迭代器是否设置了多重索引跟踪标志
}

/*NUMPY_API
 * 检查迭代器是否跟踪索引
 */
NPY_NO_EXPORT npy_bool
NpyIter_HasIndex(NpyIter *iter)
{
    return (NIT_ITFLAGS(iter) & NPY_ITFLAG_HASINDEX) != 0;  // 检查迭代器是否设置了索引跟踪标志
}

/*NUMPY_API
 * 检查指定的减少操作数在迭代器指向的元素中是否第一次被访问。
 * 对于减少操作数和禁用缓冲区的情况下，该函数会给出一个合理的答案。
 * 对于有缓冲区的非减少操作数，答案可能不正确。
 *
 * 此函数仅用于 EXTERNAL_LOOP 模式，并且在未启用该模式时，结果可能不正确。
 *
 * 如果此函数返回 true，则调用者还应检查操作数的内部循环步长，
 * 因为如果该步长为 0，则仅访问最内层外部循环的第一个元素。
 *
 * 警告：出于性能原因，'iop' 没有进行边界检查，
 *       不确认 'iop' 实际上是减少操作数，也不确认启用了 EXTERNAL_LOOP 模式。
 *       这些检查应由调用者在任何内部循环之外进行。
 */
NPY_NO_EXPORT npy_bool
NpyIter_IsFirstVisit(NpyIter *iter, int iop)
{
    npy_uint32 itflags = NIT_ITFLAGS(iter);  // 获取迭代器的标志位
    int idim, ndim = NIT_NDIM(iter);  // 获取迭代器的维度数

    /* 返回迭代器是否跟踪减少操作数 'iop' 的第一次访问状态 */
    return (itflags & NPY_ITFLAG_FIRST Visit(iop)) != 0;
}
    // 计算 NIT_NOP(iter) 的值，存储在 nop 变量中
    int nop = NIT_NOP(iter);

    // 定义指向轴数据的指针及其大小
    NpyIter_AxisData *axisdata;
    npy_intp sizeof_axisdata;

    // 计算轴数据结构的大小并赋值给 sizeof_axisdata
    sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);

    // 获取迭代器的轴数据指针并赋值给 axisdata
    axisdata = NIT_AXISDATA(iter);

    // 遍历数组的每个维度
    for (idim = 0; idim < ndim; ++idim) {
        // 获取当前轴的坐标
        npy_intp coord = NAD_INDEX(axisdata);
        // 获取当前轴的步幅
        npy_intp stride = NAD_STRIDES(axisdata)[iop];

        /*
         * 如果这是一个归约维度，并且坐标不在起始位置，
         * 则肯定不是第一次访问
         */
        if (stride == 0 && coord != 0) {
            return 0;
        }

        // 将 axisdata 向前推进一个位置
        NIT_ADVANCE_AXISDATA(axisdata, 1);
    }

    /*
     * 在归约缓冲模式下，迭代器数据结构的缓冲区部分中有一个双重循环正在跟踪。
     * 我们只需要检查这两级循环的外层级别，
     * 因为要求启用 EXTERNAL_LOOP。
     */
    if (itflags & NPY_ITFLAG_BUFFER) {
        // 获取缓冲区数据指针
        NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
        /* 外部归约循环 */
        if (NBF_REDUCE_POS(bufferdata) != 0 &&
                NBF_REDUCE_OUTERSTRIDES(bufferdata)[iop] == 0) {
            return 0;
        }
    }

    // 如果所有条件都通过，则返回 1
    return 1;
/*NUMPY_API
 * Whether the iteration could be done with no buffering.
 */
NPY_NO_EXPORT npy_bool
NpyIter_RequiresBuffering(NpyIter *iter)
{
    npy_uint32 itflags = NIT_ITFLAGS(iter);
    // 检查迭代器标志是否包含缓冲标志
    if (!(itflags&NPY_ITFLAG_BUFFER)) {
        return 0;
    }

    npyiter_opitflags *op_itflags;
    op_itflags = NIT_OPITFLAGS(iter);

    // 如果任何操作数需要类型转换，强制使用缓冲
    int iop, nop = NIT_NOP(iter);
    for (iop = 0; iop < nop; ++iop) {
        if (op_itflags[iop]&NPY_OP_ITFLAG_CAST) {
            return 1;
        }
    }

    // 不需要缓冲
    return 0;
}

/*NUMPY_API
 * Whether the iteration loop, and in particular the iternext()
 * function, needs API access.  If this is true, the GIL must
 * be retained while iterating.
 *
 * NOTE: Internally (currently), `NpyIter_GetTransferFlags` will
 *       additionally provide information on whether floating point errors
 *       may be given during casts.  The flags only require the API use
 *       necessary for buffering though.  So an iterate which does not require
 *       buffering may indicate `NpyIter_IterationNeedsAPI`, but not include
 *       the flag in `NpyIter_GetTransferFlags`.
 */
NPY_NO_EXPORT npy_bool
NpyIter_IterationNeedsAPI(NpyIter *iter)
{
    // 检查迭代是否需要 API 访问
    return (NIT_ITFLAGS(iter)&NPY_ITFLAG_NEEDSAPI) != 0;
}

/*
 * Fetch the ArrayMethod (runtime) flags for all "transfer functions' (i.e.
 * copy to buffer/casts).
 *
 * TODO: This should be public API, but that only makes sense when the
 *       ArrayMethod API is made public.
 */
NPY_NO_EXPORT int
NpyIter_GetTransferFlags(NpyIter *iter)
{
    // 获取迭代器的传输标志
    return NIT_ITFLAGS(iter) >> NPY_ITFLAG_TRANSFERFLAGS_SHIFT;
}

/*NUMPY_API
 * Gets the number of dimensions being iterated
 */
NPY_NO_EXPORT int
NpyIter_GetNDim(NpyIter *iter)
{
    // 获取正在迭代的维度数
    return NIT_NDIM(iter);
}

/*NUMPY_API
 * Gets the number of operands being iterated
 */
NPY_NO_EXPORT int
NpyIter_GetNOp(NpyIter *iter)
{
    // 获取正在迭代的操作数个数
    return NIT_NOP(iter);
}

/*NUMPY_API
 * Gets the number of elements being iterated
 */
NPY_NO_EXPORT npy_intp
NpyIter_GetIterSize(NpyIter *iter)
{
    // 获取正在迭代的元素个数
    return NIT_ITERSIZE(iter);
}

/*NUMPY_API
 * Whether the iterator is buffered
 */
NPY_NO_EXPORT npy_bool
NpyIter_IsBuffered(NpyIter *iter)
{
    // 检查迭代器是否使用了缓冲
    return (NIT_ITFLAGS(iter)&NPY_ITFLAG_BUFFER) != 0;
}

/*NUMPY_API
 * Whether the inner loop can grow if buffering is unneeded
 */
NPY_NO_EXPORT npy_bool
NpyIter_IsGrowInner(NpyIter *iter)
{
    // 检查内部循环是否可以增长，即使不需要缓冲
    return (NIT_ITFLAGS(iter)&NPY_ITFLAG_GROWINNER) != 0;
}

/*NUMPY_API
 * Gets the size of the buffer, or 0 if buffering is not enabled
 */
NPY_NO_EXPORT npy_intp
NpyIter_GetBufferSize(NpyIter *iter)
{
    npy_uint32 itflags = NIT_ITFLAGS(iter);
    // 检查迭代器是否启用了缓冲
    if (itflags&NPY_ITFLAG_BUFFER) {
        NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
        return NBF_BUFFERSIZE(bufferdata);
    }
    else {
        return 0;
    }
}
/*NUMPY_API
 * 获取迭代器正在跟踪多索引时的广播形状，否则获取按 Fortran 顺序排列的迭代形状
 * （最快变化的索引在前）。
 *
 * 当未启用多索引时返回 Fortran 顺序的原因是，这提供了直接查看迭代器如何遍历
 * n 维空间的视图。迭代器按最快到最慢的顺序组织其内存，并且当启用多索引时，
 * 使用排列来恢复原始顺序。
 *
 * 返回 NPY_SUCCEED 或 NPY_FAIL。
 */
NPY_NO_EXPORT int
NpyIter_GetShape(NpyIter *iter, npy_intp *outshape)
{
    // 获取迭代器的标志、维度数和操作数
    npy_uint32 itflags = NIT_ITFLAGS(iter);
    int ndim = NIT_NDIM(iter);
    int nop = NIT_NOP(iter);

    int idim, sizeof_axisdata;
    NpyIter_AxisData *axisdata;
    npy_int8 *perm;

    // 获取迭代器的轴数据和轴数据的大小
    axisdata = NIT_AXISDATA(iter);
    sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);

    // 如果迭代器具有多索引标志
    if (itflags & NPY_ITFLAG_HASMULTIINDEX) {
        perm = NIT_PERM(iter);
        // 遍历每个维度
        for(idim = 0; idim < ndim; ++idim) {
            // 使用轴的逆排列恢复轴的原始顺序
            int axis = npyiter_undo_iter_axis_perm(idim, ndim, perm, NULL);
            // 获取当前轴的形状并赋给输出形状数组
            outshape[axis] = NAD_SHAPE(axisdata);

            // 推进轴数据以准备处理下一个轴
            NIT_ADVANCE_AXISDATA(axisdata, 1);
        }
    }
    // 否则，处理没有多索引的情况
    else {
        // 遍历每个维度
        for(idim = 0; idim < ndim; ++idim) {
            // 获取当前轴的形状并赋给输出形状数组
            outshape[idim] = NAD_SHAPE(axisdata);
            // 推进轴数据以准备处理下一个轴
            NIT_ADVANCE_AXISDATA(axisdata, 1);
        }
    }

    // 返回操作成功的标志
    return NPY_SUCCEED;
}
    # 获取迭代器的标志位，表示迭代器的属性
    npy_uint32 itflags = NIT_ITFLAGS(iter);
    # 定义循环中使用的变量：当前维度和总维度数
    int idim, ndim = NIT_NDIM(iter);
    # 获取迭代器的操作数
    int nop = NIT_NOP(iter);
    
    # 声明变量：轴数据大小的整型指针和轴数据的指针
    npy_intp sizeof_axisdata;
    NpyIter_AxisData *axisdata;
    npy_int8 *perm;
    
    # 检查迭代器是否具有多索引，若没有则抛出运行时错误并返回失败标志
    if (!(itflags&NPY_ITFLAG_HASMULTIINDEX)) {
        PyErr_SetString(PyExc_RuntimeError,
                "Iterator CreateCompatibleStrides may only be called "
                "if a multi-index is being tracked");
        return NPY_FAIL;
    }
    
    # 获取迭代器的轴数据和其大小
    axisdata = NIT_AXISDATA(iter);
    sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
    
    # 获取迭代器的排列顺序数组
    perm = NIT_PERM(iter);
    
    # 遍历每一个维度
    for(idim = 0; idim < ndim; ++idim) {
        # 声明布尔变量和轴号，反转标志用于指示是否反转轴的步长
        npy_bool flipped;
        npy_int8 axis = npyiter_undo_iter_axis_perm(idim, ndim, perm, &flipped);
        # 如果发生了轴反转，则抛出运行时错误并返回失败标志
        if (flipped) {
            PyErr_SetString(PyExc_RuntimeError,
                    "Iterator CreateCompatibleStrides may only be called "
                    "if DONT_NEGATE_STRIDES was used to prevent reverse "
                    "iteration of an axis");
            return NPY_FAIL;
        }
        else {
            # 否则，设置输出步长数组中对应轴的步长为itemsize
            outstrides[axis] = itemsize;
        }
    
        # 计算当前轴的itemsize，并推进轴数据以便处理下一个轴
        itemsize *= NAD_SHAPE(axisdata);
        NIT_ADVANCE_AXISDATA(axisdata, 1);
    }
    
    # 成功处理完所有维度后返回成功标志
    return NPY_SUCCEED;
/*NUMPY_API
 * Get the array of data pointers (1 per object being iterated)
 *
 * This function may be safely called without holding the Python GIL.
 */
NPY_NO_EXPORT char **
NpyIter_GetDataPtrArray(NpyIter *iter)
{
    npy_uint32 itflags = NIT_ITFLAGS(iter);
    /*int ndim = NIT_NDIM(iter);*/
    int nop = NIT_NOP(iter);

    // 如果迭代器标志包含NPY_ITFLAG_BUFFER，则返回缓冲区数据指针数组
    if (itflags&NPY_ITFLAG_BUFFER) {
        NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
        return NBF_PTRS(bufferdata);
    }
    // 否则返回轴数据的数据指针数组
    else {
        NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
        return NAD_PTRS(axisdata);
    }
}

/*NUMPY_API
 * Get the array of data pointers (1 per object being iterated),
 * directly into the arrays (never pointing to a buffer), for starting
 * unbuffered iteration. This always returns the addresses for the
 * iterator position as reset to iterator index 0.
 *
 * These pointers are different from the pointers accepted by
 * NpyIter_ResetBasePointers, because the direction along some
 * axes may have been reversed, requiring base offsets.
 *
 * This function may be safely called without holding the Python GIL.
 */
NPY_NO_EXPORT char **
NpyIter_GetInitialDataPtrArray(NpyIter *iter)
{
    /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
    /*int ndim = NIT_NDIM(iter);*/
    int nop = NIT_NOP(iter);

    // 返回迭代器重置为索引0时的数据指针数组
    return NIT_RESETDATAPTR(iter);
}

/*NUMPY_API
 * Get the array of data type pointers (1 per object being iterated)
 */
NPY_NO_EXPORT PyArray_Descr **
NpyIter_GetDescrArray(NpyIter *iter)
{
    /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
    /*int ndim = NIT_NDIM(iter);*/
    /*int nop = NIT_NOP(iter);*/

    // 返回迭代器的数据类型指针数组
    return NIT_DTYPES(iter);
}

/*NUMPY_API
 * Get the array of objects being iterated
 */
NPY_NO_EXPORT PyArrayObject **
NpyIter_GetOperandArray(NpyIter *iter)
{
    /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
    /*int ndim = NIT_NDIM(iter);*/
    int nop = NIT_NOP(iter);

    // 返回迭代器的操作数对象数组
    return NIT_OPERANDS(iter);
}

/*NUMPY_API
 * Returns a view to the i-th object with the iterator's internal axes
 */
NPY_NO_EXPORT PyArrayObject *
NpyIter_GetIterView(NpyIter *iter, npy_intp i)
{
    npy_uint32 itflags = NIT_ITFLAGS(iter);
    int idim, ndim = NIT_NDIM(iter);
    int nop = NIT_NOP(iter);

    npy_intp shape[NPY_MAXDIMS], strides[NPY_MAXDIMS];
    PyArrayObject *obj, *view;
    PyArray_Descr *dtype;
    char *dataptr;
    NpyIter_AxisData *axisdata;
    npy_intp sizeof_axisdata;
    int writeable;

    if (i < 0) {
        PyErr_SetString(PyExc_IndexError,
                "index provided for an iterator view was out of bounds");
        return NULL;
    }

    // 如果索引为负数，返回索引错误
    /* Don't provide views if buffering is enabled */
    if (itflags&NPY_ITFLAG_BUFFER) {
        PyErr_SetString(PyExc_ValueError,
                "cannot provide an iterator view when buffering is enabled");
        return NULL;
    }

    // 获取第i个操作数对象
    obj = NIT_OPERANDS(iter)[i];
    // 获取对象的数据类型描述符
    dtype = PyArray_DESCR(obj);
    // 检查第i个操作数是否可写
    writeable = NIT_OPITFLAGS(iter)[i]&NPY_OP_ITFLAG_WRITE;
    // 获取第i个操作数的数据指针
    dataptr = NIT_RESETDATAPTR(iter)[i];
    # 使用 NIT_AXISDATA 宏从迭代器中获取 axisdata 结构体实例
    axisdata = NIT_AXISDATA(iter);
    # 使用 NIT_AXISDATA_SIZEOF 宏计算 axisdata 结构体的大小
    sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);

    # 从 axisdata 中获取形状（shape）和步幅（strides）
    for (idim = 0; idim < ndim; ++idim) {
        # 从 axisdata 中获取形状并反向存储到 shape 数组中
        shape[ndim-idim-1] = NAD_SHAPE(axisdata);
        # 从 axisdata 中获取步幅数组，并存储到 strides 数组中的当前索引处
        strides[ndim-idim-1] = NAD_STRIDES(axisdata)[i];

        # 将 axisdata 移动到下一个位置
        NIT_ADVANCE_AXISDATA(axisdata, 1);
    }

    # 增加 dtype 的引用计数，确保它在整个视图的生命周期内有效
    Py_INCREF(dtype);
    # 创建一个新的 PyArrayObject 视图，使用给定的描述符和基础数据
    view = (PyArrayObject *)PyArray_NewFromDescrAndBase(
            &PyArray_Type, dtype,
            ndim, shape, strides, dataptr,
            writeable ? NPY_ARRAY_WRITEABLE : 0, NULL, (PyObject *)obj);

    # 返回创建的视图对象
    return view;
/*NUMPY_API
 * Get a pointer to the index, if it is being tracked
 */
NPY_NO_EXPORT npy_intp *
NpyIter_GetIndexPtr(NpyIter *iter)
{
    npy_uint32 itflags = NIT_ITFLAGS(iter);
    /* 获取迭代器的标志位 */
    /*int ndim = NIT_NDIM(iter);*/
    /* 获取迭代器的维度 */
    int nop = NIT_NOP(iter);
    /* 获取迭代器的操作数 */

    NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
    /* 获取迭代器的轴数据 */

    if (itflags&NPY_ITFLAG_HASINDEX) {
        /* 如果迭代器标志位指示包含索引 */
        /* 索引位于数据指针之后 */
        return (npy_intp*)NAD_PTRS(axisdata) + nop;
    }
    else {
        /* 如果迭代器标志位指示不包含索引 */
        return NULL;
    }
}

/*NUMPY_API
 * Gets an array of read flags (1 per object being iterated)
 */
NPY_NO_EXPORT void
NpyIter_GetReadFlags(NpyIter *iter, char *outreadflags)
{
    /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
    /* 获取迭代器的标志位 */
    /*int ndim = NIT_NDIM(iter);*/
    /* 获取迭代器的维度 */
    int iop, nop = NIT_NOP(iter);
    /* 获取迭代器的操作数 */

    npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);
    /* 获取迭代器的操作标志数组 */

    for (iop = 0; iop < nop; ++iop) {
        /* 遍历每个操作 */
        outreadflags[iop] = (op_itflags[iop]&NPY_OP_ITFLAG_READ) != 0;
        /* 设置每个操作的读标志 */
    }
}

/*NUMPY_API
 * Gets an array of write flags (1 per object being iterated)
 */
NPY_NO_EXPORT void
NpyIter_GetWriteFlags(NpyIter *iter, char *outwriteflags)
{
    /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
    /* 获取迭代器的标志位 */
    /*int ndim = NIT_NDIM(iter);*/
    /* 获取迭代器的维度 */
    int iop, nop = NIT_NOP(iter);
    /* 获取迭代器的操作数 */

    npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);
    /* 获取迭代器的操作标志数组 */

    for (iop = 0; iop < nop; ++iop) {
        /* 遍历每个操作 */
        outwriteflags[iop] = (op_itflags[iop]&NPY_OP_ITFLAG_WRITE) != 0;
        /* 设置每个操作的写标志 */
    }
}

/*NUMPY_API
 * Get the array of strides for the inner loop (when HasExternalLoop is true)
 *
 * This function may be safely called without holding the Python GIL.
 */
NPY_NO_EXPORT npy_intp *
NpyIter_GetInnerStrideArray(NpyIter *iter)
{
    npy_uint32 itflags = NIT_ITFLAGS(iter);
    /* 获取迭代器的标志位 */
    /*int ndim = NIT_NDIM(iter);*/
    /* 获取迭代器的维度 */
    int nop = NIT_NOP(iter);
    /* 获取迭代器的操作数 */

    if (itflags&NPY_ITFLAG_BUFFER) {
        /* 如果迭代器标志位指示使用缓冲区 */
        NpyIter_BufferData *data = NIT_BUFFERDATA(iter);
        /* 获取迭代器的缓冲区数据 */
        return NBF_STRIDES(data);
    }
    else {
        /* 如果迭代器标志位指示不使用缓冲区 */
        NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
        /* 获取迭代器的轴数据 */
        return NAD_STRIDES(axisdata);
    }
}

/*NUMPY_API
 * Gets the array of strides for the specified axis.
 * If the iterator is tracking a multi-index, gets the strides
 * for the axis specified, otherwise gets the strides for
 * the iteration axis as Fortran order (fastest-changing axis first).
 *
 * Returns NULL if an error occurs.
 */
NPY_NO_EXPORT npy_intp *
NpyIter_GetAxisStrideArray(NpyIter *iter, int axis)
{
    npy_uint32 itflags = NIT_ITFLAGS(iter);
    /* 获取迭代器的标志位 */
    int idim, ndim = NIT_NDIM(iter);
    /* 获取迭代器的维度 */
    int nop = NIT_NOP(iter);
    /* 获取迭代器的操作数 */

    npy_int8 *perm = NIT_PERM(iter);
    /* 获取迭代器的轴排列 */
    NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
    /* 获取迭代器的轴数据 */
    npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
    /* 获取轴数据结构的大小 */

    if (axis < 0 || axis >= ndim) {
        /* 如果指定的轴超出范围 */
        PyErr_SetString(PyExc_ValueError,
                "axis out of bounds in iterator GetStrideAxisArray");
        return NULL;
    }
    /* 返回轴的步长数组 */

        /*NUMPY_API
         * Get a pointer to the index, if it is being tracked
         */
        NPY_NO_EXPORT npy_intp *
        NpyIter_GetIndexPtr(NpyIter *iter)
        {
            npy_uint32 itflags = NIT_ITFLAGS(iter);
            /* 获取迭代器的标志位 */
            /*int ndim = NIT_NDIM(iter);*/
            /* 获取迭代器的维度 */
            int nop = NIT_NOP(iter);
            /* 获取迭代器的操作数 */

            NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
            /* 获取迭代器的轴数据 */

            if (itflags&NPY_ITFLAG_HASINDEX) {
                /* 如果迭代器标志位指示包含索引 */
                /* 索引位于数据指针之后 */
                return (npy_intp*)NAD_PTRS(axisdata) + nop;
            }
            else {
                /* 如果迭代器标志位指示不包含索引 */
                return NULL;
            }
        }

        /*NUMPY_API
         * Gets an array of read flags (1 per object being iterated)
         */
        NPY_NO_EXPORT void
        NpyIter_GetReadFlags(NpyIter *iter, char *outreadflags)
        {
            /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
            /* 获取迭代器的标志位 */
            /*int ndim = NIT_NDIM(iter);*/
            /* 获取迭代器的维度 */
            int iop, nop = NIT_NOP(iter);
            /* 获取迭代器的操作数 */

            npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);
            /* 获取迭代器的操作标志数组 */

            for (iop = 0; iop < nop; ++iop) {
                /* 遍历每个操作 */
                outreadflags[iop] = (op_itflags[iop]&NPY_OP_ITFLAG_READ) != 0;
                /* 设置每个操作的读标志 */
            }
        }

        /*NUMPY_API
         * Gets an array of write flags (1 per object being iterated)
         */
        NPY_NO_EXPORT void
        NpyIter_GetWriteFlags(NpyIter *iter, char *outwriteflags)
        {
            /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
            /* 获取迭代器的标志位 */
            /*int ndim = NIT_NDIM(iter);*/
            /* 获取迭代器的维度 */
            int iop, nop = NIT_NOP(iter);
            /* 获取迭代器的操作数 */

            npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);
            /* 获取迭代器的操作标志数组 */

            for (iop = 0; iop < nop; ++iop) {
                /* 遍历每个操作 */
                outwriteflags[iop] = (op_itflags[iop]&NPY_OP_ITFLAG_WRITE) != 0;
                /* 设置每个操作的写标志 */
            }
        }


        /*NUMPY_API
         * Get the array of strides for the inner loop (when HasExternalLoop is true)
         *
         * This function may be safely called without holding the Python GIL.
         */
        NPY_NO_EXPORT npy_intp *
        NpyIter_GetInnerStrideArray(NpyIter *iter)
        {
            npy_uint32 itflags = NIT_ITFLAGS(iter);
            /* 获取迭代器的标志位 */
            /*int ndim = NIT_NDIM(iter);*/
            /* 获取迭代器的维度 */
            int nop = NIT_NOP(iter);
            /* 获取迭代器的操作数 */

            if (itflags&NPY_ITFLAG_BUFFER) {
                /* 如果迭代器标志位指示使用缓冲区 */
                NpyIter_BufferData *data = NIT_BUFFERDATA(iter);
                /* 获取迭代器的缓冲区数据 */
                return NBF_STRIDES(data);
            }
            else {
                /* 如果迭代器标志位指示不使用缓冲区 */
                NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
                /* 获取迭代器的轴数据 */
                return NAD_STRIDES(axisdata);
            }
        }

        /*NUMPY_API
         * Gets the array of strides for the specified axis.
         * If the iterator is tracking a multi-index, gets the strides
         * for the axis specified, otherwise gets the strides for
         * the iteration axis as Fortran order (fastest-changing axis first).
         *
         * Returns NULL if an error occurs.
         */
        NPY_NO_EXPORT npy_intp *
        NpyIter_GetAxisStrideArray(NpyIter *iter, int axis)
        {
            npy_uint32 itflags = NIT_ITFLAGS(iter);
            /* 获取迭代器的标志位 */
            int idim, ndim = NIT_NDIM(iter);
            /* 获取迭代器的维度 */
            int nop = NIT_NOP(iter);
            /* 获取迭代器的操作数 */

            npy_int8 *perm = NIT_PERM(iter);
            /* 获取迭代器的轴排列 */
            NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
            /* 获取迭代器的轴数据 */
            npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
            /* 获取轴数据结构的大小 */

            if (axis < 0 || axis >= ndim) {
                /* 如果指定的轴超出范围 */
                PyErr_SetString(PyExc_ValueError,
                        "axis out of bounds in iterator GetStrideAxisArray");
                return NULL;
            }
            /* 返回轴的步长数组 */
    # 检查是否存在多重索引标志位
    if (itflags&NPY_ITFLAG_HASMULTIINDEX) {
        # 如果存在多重索引，则反转轴的顺序，因为迭代器会按照这种方式处理它们
        axis = ndim-1-axis;

        # 首先找到所需的轴
        for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
            # 检查当前维度的排列是否等于轴或者是其相反数
            if (perm[idim] == axis || -1 - perm[idim] == axis) {
                # 返回与该轴相关的步长数据
                return NAD_STRIDES(axisdata);
            }
        }
    }
    else {
        # 如果没有多重索引，则直接返回指定轴的步长数据
        return NAD_STRIDES(NIT_INDEX_AXISDATA(axisdata, axis));
    }

    # 如果以上条件都不满足，则抛出运行时错误并返回空指针
    PyErr_SetString(PyExc_RuntimeError,
            "internal error in iterator perm");
    return  NULL;
/*
 * NUMPY_API
 * 获取一个固定的步幅数组。任何在迭代过程中可能会改变的步幅都设置为 NPY_MAX_INTP。
 * 一旦迭代器准备好进行迭代，调用此函数获取在内部循环中始终保持不变的步幅，
 * 然后选择利用这些固定步幅的优化内部循环函数。
 *
 * 此函数可以在不持有 Python 全局解释器锁（GIL）的情况下安全调用。
 */
NPY_NO_EXPORT void
NpyIter_GetInnerFixedStrideArray(NpyIter *iter, npy_intp *out_strides)
{
    // 获取迭代器的标志位
    npy_uint32 itflags = NIT_ITFLAGS(iter);
    // 获取迭代器的维度数
    int ndim = NIT_NDIM(iter);
    // 初始化循环中的操作数和操作数的数量
    int iop, nop = NIT_NOP(iter);

    // 获取迭代器的第一个轴数据
    NpyIter_AxisData *axisdata0 = NIT_AXISDATA(iter);
    // 计算轴数据结构体的大小
    npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
}
    # 如果itflags中包含NPY_ITFLAG_BUFFER标志
    if (itflags&NPY_ITFLAG_BUFFER) {
        # 获取迭代器的缓冲数据结构
        NpyIter_BufferData *data = NIT_BUFFERDATA(iter);
        # 获取操作迭代器的标志数组
        npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);
        # 获取迭代器中的步长数组以及轴数据0的步长数组
        npy_intp stride, *strides = NBF_STRIDES(data),
                *ad_strides = NAD_STRIDES(axisdata0);
        # 获取迭代器中的数据类型描述符数组
        PyArray_Descr **dtypes = NIT_DTYPES(iter);

        # 遍历操作符的数量
        for (iop = 0; iop < nop; ++iop) {
            # 获取当前操作符的步长
            stride = strides[iop];

            /*
             * 操作数始终/从不缓冲的具有固定步长，
             * 当ndim为0或1时，所有内容都具有固定步长
             */
            if (ndim <= 1 || (op_itflags[iop]&
                            (NPY_OP_ITFLAG_CAST|NPY_OP_ITFLAG_BUFNEVER))) {
                # 将输出步长设置为当前步长
                out_strides[iop] = stride;
            }
            /* 如果是一个约简操作，0步长内循环可能有固定步长 */
            else if (stride == 0 && (itflags&NPY_ITFLAG_REDUCE)) {
                /* 如果是约简操作数，则步长肯定是固定的 */
                if (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE) {
                    # 将输出步长设置为当前步长
                    out_strides[iop] = stride;
                }
                /*
                 * 否则，如果所有维度的步长都是0，则保证是固定步长。
                 */
                else {
                    NpyIter_AxisData *axisdata = axisdata0;
                    int idim;
                    for (idim = 0; idim < ndim; ++idim) {
                        if (NAD_STRIDES(axisdata)[iop] != 0) {
                            break;
                        }
                        NIT_ADVANCE_AXISDATA(axisdata, 1);
                    }
                    /* 如果所有步长都是0，则步长不会改变 */
                    if (idim == ndim) {
                        # 将输出步长设置为当前步长
                        out_strides[iop] = stride;
                    }
                    else {
                        # 将输出步长设置为最大整数值，表示步长可能会变化
                        out_strides[iop] = NPY_MAX_INTP;
                    }
                }
            }
            /*
             * 内循环连续数组意味着在缓冲和非缓冲之间切换时其步长不会改变
             */
            else if (ad_strides[iop] == dtypes[iop]->elsize) {
                # 将输出步长设置为轴数据0的步长数组中的步长
                out_strides[iop] = ad_strides[iop];
            }
            /*
             * 否则，如果操作数有时缓冲有时不缓冲，步长可能会改变。
             */
            else {
                # 将输出步长设置为最大整数值，表示步长可能会变化
                out_strides[iop] = NPY_MAX_INTP;
            }
        }
    }
    else {
        /* 如果没有缓冲，步长始终是固定的 */
        # 将轴数据0的步长数组复制到输出步长数组中
        memcpy(out_strides, NAD_STRIDES(axisdata0), nop*NPY_SIZEOF_INTP);
    }
/*NUMPY_API
 * 获取指向内部循环大小的指针（当 HasExternalLoop 为 true 时）
 *
 * 可以安全地在不持有 Python GIL 的情况下调用此函数。
 */
NPY_NO_EXPORT npy_intp *
NpyIter_GetInnerLoopSizePtr(NpyIter *iter)
{
    npy_uint32 itflags = NIT_ITFLAGS(iter);  // 获取迭代器的标志位
    /*int ndim = NIT_NDIM(iter);*/  // 注释掉的代码，原本用于获取迭代器的维度
    int nop = NIT_NOP(iter);  // 获取迭代器的操作数

    if (itflags & NPY_ITFLAG_BUFFER) {
        NpyIter_BufferData *data = NIT_BUFFERDATA(iter);  // 获取迭代器的缓冲数据
        return &NBF_SIZE(data);  // 返回缓冲数据的大小指针
    }
    else {
        NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);  // 获取迭代器的轴数据
        return &NAD_SHAPE(axisdata);  // 返回轴数据的形状指针
    }
}

/*NUMPY_API
 * 用于调试打印迭代器信息
 */
NPY_NO_EXPORT void
NpyIter_DebugPrint(NpyIter *iter)
{
    npy_uint32 itflags = NIT_ITFLAGS(iter);  // 获取迭代器的标志位
    int idim, ndim = NIT_NDIM(iter);  // 获取迭代器的维度
    int iop, nop = NIT_NOP(iter);  // 获取迭代器的操作数

    NpyIter_AxisData *axisdata;
    npy_intp sizeof_axisdata;

    NPY_ALLOW_C_API_DEF
    NPY_ALLOW_C_API

    printf("\n------ BEGIN ITERATOR DUMP ------\n");
    printf("| Iterator Address: %p\n", (void *)iter);  // 打印迭代器地址
    printf("| ItFlags: ");
    if (itflags & NPY_ITFLAG_IDENTPERM)
        printf("IDENTPERM ");  // 如果标志位包含 IDENTPERM，则打印
    if (itflags & NPY_ITFLAG_NEGPERM)
        printf("NEGPERM ");  // 如果标志位包含 NEGPERM，则打印
    if (itflags & NPY_ITFLAG_HASINDEX)
        printf("HASINDEX ");  // 如果标志位包含 HASINDEX，则打印
    if (itflags & NPY_ITFLAG_HASMULTIINDEX)
        printf("HASMULTIINDEX ");  // 如果标志位包含 HASMULTIINDEX，则打印
    if (itflags & NPY_ITFLAG_FORCEDORDER)
        printf("FORCEDORDER ");  // 如果标志位包含 FORCEDORDER，则打印
    if (itflags & NPY_ITFLAG_EXLOOP)
        printf("EXLOOP ");  // 如果标志位包含 EXLOOP，则打印
    if (itflags & NPY_ITFLAG_RANGE)
        printf("RANGE ");  // 如果标志位包含 RANGE，则打印
    if (itflags & NPY_ITFLAG_BUFFER)
        printf("BUFFER ");  // 如果标志位包含 BUFFER，则打印
    if (itflags & NPY_ITFLAG_GROWINNER)
        printf("GROWINNER ");  // 如果标志位包含 GROWINNER，则打印
    if (itflags & NPY_ITFLAG_ONEITERATION)
        printf("ONEITERATION ");  // 如果标志位包含 ONEITERATION，则打印
    if (itflags & NPY_ITFLAG_DELAYBUF)
        printf("DELAYBUF ");  // 如果标志位包含 DELAYBUF，则打印
    if (itflags & NPY_ITFLAG_NEEDSAPI)
        printf("NEEDSAPI ");  // 如果标志位包含 NEEDSAPI，则打印
    if (itflags & NPY_ITFLAG_REDUCE)
        printf("REDUCE ");  // 如果标志位包含 REDUCE，则打印
    if (itflags & NPY_ITFLAG_REUSE_REDUCE_LOOPS)
        printf("REUSE_REDUCE_LOOPS ");  // 如果标志位包含 REUSE_REDUCE_LOOPS，则打印

    printf("\n");
    printf("| NDim: %d\n", ndim);  // 打印迭代器的维度
    printf("| NOp: %d\n", nop);  // 打印迭代器的操作数
    if (NIT_MASKOP(iter) >= 0) {
        printf("| MaskOp: %d\n", (int)NIT_MASKOP(iter));  // 如果迭代器的掩码操作大于等于零，则打印
    }
    printf("| IterSize: %d\n", (int)NIT_ITERSIZE(iter));  // 打印迭代器的大小
    printf("| IterStart: %d\n", (int)NIT_ITERSTART(iter));  // 打印迭代器的起始位置
    printf("| IterEnd: %d\n", (int)NIT_ITEREND(iter));  // 打印迭代器的结束位置
    printf("| IterIndex: %d\n", (int)NIT_ITERINDEX(iter));  // 打印迭代器的当前索引
    printf("| Iterator SizeOf: %d\n", (int)NIT_SIZEOF_ITERATOR(itflags, ndim, nop));  // 打印迭代器的大小
    printf("| BufferData SizeOf: %d\n", (int)NIT_BUFFERDATA_SIZEOF(itflags, ndim, nop));  // 打印缓冲数据的大小
    printf("| AxisData SizeOf: %d\n", (int)NIT_AXISDATA_SIZEOF(itflags, ndim, nop));  // 打印轴数据的大小
    printf("|\n");

    printf("| Perm: ");
    for (idim = 0; idim < ndim; ++idim) {
        printf("%d ", (int)NIT_PERM(iter)[idim]);  // 打印迭代器的排列顺序
    }
    printf("\n");
    printf("| DTypes: ");
}
    // 打印 NIT_DTYPES(iter) 中指针的十六进制地址
    for (iop = 0; iop < nop; ++iop) {
        printf("%p ", (void *)NIT_DTYPES(iter)[iop]);
    }
    printf("\n");

    // 打印 NIT_DTYPES(iter) 中每个元素的 PyObject 表示，或者打印 "(nil)" 如果是空指针
    printf("| DTypes: ");
    for (iop = 0; iop < nop; ++iop) {
        if (NIT_DTYPES(iter)[iop] != NULL)
            PyObject_Print((PyObject*)NIT_DTYPES(iter)[iop], stdout, 0);
        else
            printf("(nil) ");
        printf(" ");
    }
    printf("\n");

    // 打印 NIT_RESETDATAPTR(iter) 中每个指针的十六进制地址
    printf("| InitDataPtrs: ");
    for (iop = 0; iop < nop; ++iop) {
        printf("%p ", (void *)NIT_RESETDATAPTR(iter)[iop]);
    }
    printf("\n");

    // 打印 NIT_BASEOFFSETS(iter) 中每个元素的整数值
    printf("| BaseOffsets: ");
    for (iop = 0; iop < nop; ++iop) {
        printf("%i ", (int)NIT_BASEOFFSETS(iter)[iop]);
    }
    printf("\n");

    // 如果 itflags 包含 NPY_ITFLAG_HASINDEX 标志，则打印 NIT_RESETDATAPTR(iter)[nop] 强制转换为 npy_intp 后的整数值
    if (itflags&NPY_ITFLAG_HASINDEX) {
        printf("| InitIndex: %d\n",
                        (int)(npy_intp)NIT_RESETDATAPTR(iter)[nop]);
    }

    // 打印 NIT_OPERANDS(iter) 中每个指针的十六进制地址
    printf("| Operands: ");
    for (iop = 0; iop < nop; ++iop) {
        printf("%p ", (void *)NIT_OPERANDS(iter)[iop]);
    }
    printf("\n");

    // 打印 NIT_OPERANDS(iter) 中每个操作数的 dtype 对象，或者打印 "(nil)" 如果是空指针
    printf("| Operand DTypes: ");
    for (iop = 0; iop < nop; ++iop) {
        PyArray_Descr *dtype;
        if (NIT_OPERANDS(iter)[iop] != NULL) {
            dtype = PyArray_DESCR(NIT_OPERANDS(iter)[iop]);
            if (dtype != NULL)
                PyObject_Print((PyObject *)dtype, stdout, 0);
            else
                printf("(nil) ");
        }
        else {
            printf("(op nil) ");
        }
        printf(" ");
    }
    printf("\n");

    // 打印 NIT_OPITFLAGS(iter) 中每个操作的位标志，展示哪些操作被设置为特定的标志位
    printf("| OpItFlags:\n");
    for (iop = 0; iop < nop; ++iop) {
        printf("|   Flags[%d]: ", (int)iop);
        if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_READ)
            printf("READ ");
        if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_WRITE)
            printf("WRITE ");
        if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_CAST)
            printf("CAST ");
        if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_BUFNEVER)
            printf("BUFNEVER ");
        if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_ALIGNED)
            printf("ALIGNED ");
        if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_REDUCE)
            printf("REDUCE ");
        if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_VIRTUAL)
            printf("VIRTUAL ");
        if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_WRITEMASKED)
            printf("WRITEMASKED ");
        printf("\n");
    }
    // 打印分隔符
    printf("|\n");
    # 检查是否存在 NPY_ITFLAG_BUFFER 标志位
    if (itflags&NPY_ITFLAG_BUFFER) {
        # 获取 bufferdata 结构体指针
        NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
        # 获取 transferinfo 结构体指针
        NpyIter_TransferInfo *transferinfo = NBF_TRANSFERINFO(bufferdata);

        # 打印 BufferData 相关信息
        printf("| BufferData:\n");
        # 打印缓冲区大小
        printf("|   BufferSize: %d\n", (int)NBF_BUFFERSIZE(bufferdata));
        # 打印数据大小
        printf("|   Size: %d\n", (int)NBF_SIZE(bufferdata));
        # 打印 BufIterEnd 标志
        printf("|   BufIterEnd: %d\n", (int)NBF_BUFITEREND(bufferdata));

        # 如果存在 NPY_ITFLAG_REDUCE 标志位，打印 REDUCE 相关信息
        if (itflags&NPY_ITFLAG_REDUCE) {
            # 打印 REDUCE Pos
            printf("|   REDUCE Pos: %d\n", (int)NBF_REDUCE_POS(bufferdata));
            # 打印 REDUCE OuterSize
            printf("|   REDUCE OuterSize: %d\n", (int)NBF_REDUCE_OUTERSIZE(bufferdata));
            # 打印 REDUCE OuterDim
            printf("|   REDUCE OuterDim: %d\n", (int)NBF_REDUCE_OUTERDIM(bufferdata));
        }

        # 打印 Strides 数组
        printf("|   Strides: ");
        for (iop = 0; iop < nop; ++iop)
            printf("%d ", (int)NBF_STRIDES(bufferdata)[iop]);
        printf("\n");

        # 当存在 NPY_ITFLAG_EXLOOP 标志位时，打印 Fixed Strides 数组
        if (itflags&NPY_ITFLAG_EXLOOP) {
            npy_intp fixedstrides[NPY_MAXDIMS];
            printf("|   Fixed Strides: ");
            NpyIter_GetInnerFixedStrideArray(iter, fixedstrides);
            for (iop = 0; iop < nop; ++iop)
                printf("%d ", (int)fixedstrides[iop]);
            printf("\n");
        }

        # 打印 Ptrs 数组
        printf("|   Ptrs: ");
        for (iop = 0; iop < nop; ++iop)
            printf("%p ", (void *)NBF_PTRS(bufferdata)[iop]);
        printf("\n");

        # 如果存在 NPY_ITFLAG_REDUCE 标志位，打印 REDUCE Outer Strides 数组和 REDUCE Outer Ptrs 数组
        if (itflags&NPY_ITFLAG_REDUCE) {
            printf("|   REDUCE Outer Strides: ");
            for (iop = 0; iop < nop; ++iop)
                printf("%d ", (int)NBF_REDUCE_OUTERSTRIDES(bufferdata)[iop]);
            printf("\n");
            printf("|   REDUCE Outer Ptrs: ");
            for (iop = 0; iop < nop; ++iop)
                printf("%p ", (void *)NBF_REDUCE_OUTERPTRS(bufferdata)[iop]);
            printf("\n");
        }

        # 打印 ReadTransferFn 函数指针数组
        printf("|   ReadTransferFn: ");
        for (iop = 0; iop < nop; ++iop)
            printf("%p ", (void *)transferinfo[iop].read.func);
        printf("\n");

        # 打印 ReadTransferData 辅助数据指针数组
        printf("|   ReadTransferData: ");
        for (iop = 0; iop < nop; ++iop)
            printf("%p ", (void *)transferinfo[iop].read.auxdata);
        printf("\n");

        # 打印 WriteTransferFn 函数指针数组
        printf("|   WriteTransferFn: ");
        for (iop = 0; iop < nop; ++iop)
            printf("%p ", (void *)transferinfo[iop].write.func);
        printf("\n");

        # 打印 WriteTransferData 辅助数据指针数组
        printf("|   WriteTransferData: ");
        for (iop = 0; iop < nop; ++iop)
            printf("%p ", (void *)transferinfo[iop].write.auxdata);
        printf("\n");

        # 打印 Buffers 缓冲区指针数组
        printf("|   Buffers: ");
        for (iop = 0; iop < nop; ++iop)
            printf("%p ", (void *)NBF_BUFFERS(bufferdata)[iop]);
        printf("\n");

        # 打印结束符号
        printf("|\n");
    }

    # 获取 axisdata 指针
    axisdata = NIT_AXISDATA(iter);
    # 获取 axisdata 的大小
    sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
    // 对每个维度数据进行迭代，输出 AxisData 的相关信息
    for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
        // 打印当前 AxisData 的索引信息
        printf("| AxisData[%d]:\n", (int)idim);
        // 打印当前 AxisData 的形状信息
        printf("|   Shape: %d\n", (int)NAD_SHAPE(axisdata));
        // 打印当前 AxisData 的索引值信息
        printf("|   Index: %d\n", (int)NAD_INDEX(axisdata));
        // 打印当前 AxisData 的步幅信息
        printf("|   Strides: ");
        for (iop = 0; iop < nop; ++iop) {
            // 依次打印每个维度的步幅值
            printf("%d ", (int)NAD_STRIDES(axisdata)[iop]);
        }
        printf("\n");
        // 如果 Iterator 标志中包含 NPY_ITFLAG_HASINDEX 标志位
        if (itflags & NPY_ITFLAG_HASINDEX) {
            // 打印 Index Stride 值
            printf("|   Index Stride: %d\n", (int)NAD_STRIDES(axisdata)[nop]);
        }
        // 打印当前 AxisData 的指针信息
        printf("|   Ptrs: ");
        for (iop = 0; iop < nop; ++iop) {
            // 依次打印每个维度的指针地址
            printf("%p ", (void *)NAD_PTRS(axisdata)[iop]);
        }
        printf("\n");
        // 如果 Iterator 标志中包含 NPY_ITFLAG_HASINDEX 标志位
        if (itflags & NPY_ITFLAG_HASINDEX) {
            // 打印 Index Value 值
            printf("|   Index Value: %d\n",
                   (int)((npy_intp *)NAD_PTRS(axisdata))[nop]);
        }
    }

    // 输出迭代器数据输出结束的标志
    printf("------- END ITERATOR DUMP -------\n");
    // 刷新标准输出缓冲区
    fflush(stdout);

    // 禁用 C API 接口
    NPY_DISABLE_C_API
/* 关闭 npyiter_coalesce_axes 函数 */
NPY_NO_EXPORT void
npyiter_coalesce_axes(NpyIter *iter)
{
    /* 获取迭代器的标志位 */
    npy_uint32 itflags = NIT_ITFLAGS(iter);
    /* 获取迭代器的维度数量 */
    int idim, ndim = NIT_NDIM(iter);
    /* 获取迭代器的操作数数量 */
    int nop = NIT_NOP(iter);

    /* 获取第一个轴数据的步幅数量 */
    npy_intp istrides, nstrides = NAD_NSTRIDES();
    /* 获取迭代器的轴数据 */
    NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
    /* 计算轴数据结构体的大小 */
    npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
    /* 压缩后的轴数据 */
    NpyIter_AxisData *ad_compress = axisdata;
    /* 新的维度数量 */
    npy_intp new_ndim = 1;

    /* 在合并轴之后，清除 IDENTPERM 和 HASMULTIINDEX 标志位 */
    NIT_ITFLAGS(iter) &= ~(NPY_ITFLAG_IDENTPERM|NPY_ITFLAG_HASMULTIINDEX);

    /* 遍历迭代器的每一个轴 */
    for (idim = 0; idim < ndim-1; ++idim) {
        /* 可以合并的标志位 */
        int can_coalesce = 1;
        /* 第一个轴数据的形状 */
        npy_intp shape0 = NAD_SHAPE(ad_compress);
        /* 第二个轴数据的形状 */
        npy_intp shape1 = NAD_SHAPE(NIT_INDEX_AXISDATA(axisdata, 1));
        /* 第一个轴数据的步幅数组 */
        npy_intp *strides0 = NAD_STRIDES(ad_compress);
        /* 第二个轴数据的步幅数组 */
        npy_intp *strides1 = NAD_STRIDES(NIT_INDEX_AXISDATA(axisdata, 1));

        /* 检查所有轴是否可以合并 */
        for (istrides = 0; istrides < nstrides; ++istrides) {
            if (!((shape0 == 1 && strides0[istrides] == 0) ||
                  (shape1 == 1 && strides1[istrides] == 0)) &&
                     (strides0[istrides]*shape0 != strides1[istrides])) {
                can_coalesce = 0;
                break;
            }
        }

        /* 如果可以合并 */
        if (can_coalesce) {
            /* 第一个轴数据的步幅 */
            npy_intp *strides = NAD_STRIDES(ad_compress);

            /* 前进到下一个轴数据 */
            NIT_ADVANCE_AXISDATA(axisdata, 1);
            /* 更新合并后轴数据的形状 */
            NAD_SHAPE(ad_compress) *= NAD_SHAPE(axisdata);
            /* 更新合并后轴数据的步幅 */
            for (istrides = 0; istrides < nstrides; ++istrides) {
                if (strides[istrides] == 0) {
                    strides[istrides] = NAD_STRIDES(axisdata)[istrides];
                }
            }
        }
        else {
            /* 前进到下一个轴数据 */
            NIT_ADVANCE_AXISDATA(axisdata, 1);
            /* 前进到下一个压缩轴数据 */
            NIT_ADVANCE_AXISDATA(ad_compress, 1);
            /* 如果 ad_compress 不等于 axisdata，则复制 axisdata 到 ad_compress */
            if (ad_compress != axisdata) {
                memcpy(ad_compress, axisdata, sizeof_axisdata);
            }
            /* 增加新的维度数量 */
            ++new_ndim;
        }
    }

    /*
     * 如果轴的数量减少了，重置 perm 并压缩数据到新的布局。
     */
    if (new_ndim < ndim) {
        /* 获取 perm 数组 */
        npy_int8 *perm = NIT_PERM(iter);

        /* 重置为身份 perm */
        for (idim = 0; idim < new_ndim; ++idim) {
            perm[idim] = (npy_int8)idim;
        }
        /* 更新迭代器的维度数量 */
        NIT_NDIM(iter) = new_ndim;
    }
}

/*
 * 如果 errmsg 非空，则应指向一个变量，该变量将接收错误消息，且不会设置 Python 异常。
 * 这样可以从不持有 GIL 的代码中调用该函数。
 */
NPY_NO_EXPORT int
npyiter_allocate_buffers(NpyIter *iter, char **errmsg)
{
    /* 获取操作数的数量 */
    int iop = 0, nop = NIT_NOP(iter);

    /* 定义变量 */
    npy_intp i;
    /* 获取操作的标志位 */
    npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);
    /* 获取缓冲区数据 */
    NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
    /* 获取操作的数据类型数组 */
    PyArray_Descr **op_dtype = NIT_DTYPES(iter);
}
    // 从 bufferdata 中获取缓冲区大小并存储在 npy_intp 类型变量 buffersize 中
    npy_intp buffersize = NBF_BUFFERSIZE(bufferdata);
    // 从 bufferdata 中获取缓冲区数组的指针，并存储在 char* 类型指针 buffers 中
    char *buffer, **buffers = NBF_BUFFERS(bufferdata);

    // 对每个操作进行迭代处理
    for (iop = 0; iop < nop; ++iop) {
        // 从 op_itflags 数组中获取当前操作的迭代器标志
        npyiter_opitflags flags = op_itflags[iop];

        /*
         * 如果确定可能需要一个缓冲区，
         * 则分配一个。
         */
        if (!(flags & NPY_OP_ITFLAG_BUFNEVER)) {
            // 获取当前操作的元素大小
            npy_intp itemsize = op_dtype[iop]->elsize;
            // 使用 PyArray_malloc 分配 itemsize*buffersize 大小的内存
            buffer = PyArray_malloc(itemsize * buffersize);
            // 检查分配内存是否成功
            if (buffer == NULL) {
                // 如果内存分配失败，根据情况设置错误信息或者错误码并跳转到失败处理标签
                if (errmsg == NULL) {
                    PyErr_NoMemory();
                } else {
                    *errmsg = "out of memory";
                }
                goto fail;
            }
            // 如果操作的数据类型需要初始化，则使用 memset 初始化缓冲区
            if (PyDataType_FLAGCHK(op_dtype[iop], NPY_NEEDS_INIT)) {
                memset(buffer, '\0', itemsize * buffersize);
            }
            // 将分配的缓冲区指针存储在 buffers 数组中对应的位置
            buffers[iop] = buffer;
        }
    }

    // 函数执行成功，返回值 1 表示成功
    return 1;
fail:
    // 遍历缓冲区数组中的每一个指针
    for (i = 0; i < iop; ++i) {
        // 检查当前指针是否非空
        if (buffers[i] != NULL) {
            // 释放当前指针所指向的内存
            PyArray_free(buffers[i]);
            // 将当前指针置为 NULL，避免悬空指针
            buffers[i] = NULL;
        }
    }
    // 返回成功标志
    return 0;
}

/*
 * This sets the AXISDATA portion of the iterator to the specified
 * iterindex, updating the pointers as well.  This function does
 * no error checking.
 */
NPY_NO_EXPORT void
npyiter_goto_iterindex(NpyIter *iter, npy_intp iterindex)
{
    // 获取迭代器的标志位
    npy_uint32 itflags = NIT_ITFLAGS(iter);
    // 获取迭代器的维度数和操作数
    int idim, ndim = NIT_NDIM(iter);
    int nop = NIT_NOP(iter);

    // 定义数据指针和轴数据结构
    char **dataptr;
    NpyIter_AxisData *axisdata;
    npy_intp sizeof_axisdata;
    npy_intp istrides, nstrides, i, shape;

    // 获取迭代器的轴数据
    axisdata = NIT_AXISDATA(iter);
    // 获取轴数据结构的大小
    sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
    // 获取轴的步长数
    nstrides = NAD_NSTRIDES();

    // 设置迭代器的当前索引为给定的索引值
    NIT_ITERINDEX(iter) = iterindex;

    // 如果维度数为零，则将其设置为一（最小值）
    ndim = ndim ? ndim : 1;

    // 如果索引值为零
    if (iterindex == 0) {
        // 重置数据指针为初始位置
        dataptr = NIT_RESETDATAPTR(iter);

        // 遍历每个维度
        for (idim = 0; idim < ndim; ++idim) {
            char **ptrs;
            // 将当前轴数据的索引设为零
            NAD_INDEX(axisdata) = 0;
            // 获取当前轴数据的指针数组
            ptrs = NAD_PTRS(axisdata);
            // 遍历当前轴的步长数
            for (istrides = 0; istrides < nstrides; ++istrides) {
                // 将数据指针设置为当前轴数据的指针位置
                ptrs[istrides] = dataptr[istrides];
            }

            // 将轴数据向前推进一个位置
            NIT_ADVANCE_AXISDATA(axisdata, 1);
        }
    }
    else {
        /*
         * Set the multi-index, from the fastest-changing to the
         * slowest-changing.
         */
        // 重新获取轴数据
        axisdata = NIT_AXISDATA(iter);
        // 获取轴数据的形状
        shape = NAD_SHAPE(axisdata);
        // 使用索引值初始化 i
        i = iterindex;
        // 计算索引值除以轴数据形状
        iterindex /= shape;
        // 设置当前轴数据的索引值为 i 减去迭代次数乘以轴数据形状
        NAD_INDEX(axisdata) = i - iterindex * shape;
        // 遍历每个维度减一
        for (idim = 0; idim < ndim-1; ++idim) {
            // 将轴数据向前推进一个位置
            NIT_ADVANCE_AXISDATA(axisdata, 1);

            // 重新获取轴数据的形状
            shape = NAD_SHAPE(axisdata);
            // 使用索引值初始化 i
            i = iterindex;
            // 计算索引值除以轴数据形状
            iterindex /= shape;
            // 设置当前轴数据的索引值为 i 减去迭代次数乘以轴数据形状
            NAD_INDEX(axisdata) = i - iterindex * shape;
        }

        // 重置数据指针为初始位置
        dataptr = NIT_RESETDATAPTR(iter);

        /*
         * Accumulate the successive pointers with their
         * offsets in the opposite order, starting from the
         * original data pointers.
         */
        // 遍历每个维度
        for (idim = 0; idim < ndim; ++idim) {
            npy_intp *strides;
            char **ptrs;

            // 获取当前轴数据的步长数组和指针数组
            strides = NAD_STRIDES(axisdata);
            ptrs = NAD_PTRS(axisdata);

            // 初始化 i 为当前轴数据的索引
            i = NAD_INDEX(axisdata);

            // 遍历当前轴的步长数
            for (istrides = 0; istrides < nstrides; ++istrides) {
                // 将数据指针设置为原始数据指针加上索引乘以步长
                ptrs[istrides] = dataptr[istrides] + i*strides[istrides];
            }

            // 将数据指针设置为当前轴数据的指针数组
            dataptr = ptrs;

            // 将轴数据向后推进一个位置
            NIT_ADVANCE_AXISDATA(axisdata, -1);
        }
    }
}

/*
 * This gets called after the buffers have been exhausted, and
 * their data needs to be written back to the arrays.  The multi-index
 * must be positioned for the beginning of the buffer.
 */
NPY_NO_EXPORT int
npyiter_copy_from_buffers(NpyIter *iter)
{
    // 获取迭代器的标志位
    npy_uint32 itflags = NIT_ITFLAGS(iter);
    // 获取迭代器的维度数和操作数
    int ndim = NIT_NDIM(iter);
    int iop, nop = NIT_NOP(iter);
    int maskop = NIT_MASKOP(iter);

    // 获取迭代器的操作标志位
    npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);
    # 获取指向 NpyIter_BufferData 结构体的指针
    NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
    # 获取指向 NpyIter_AxisData 结构体的指针，并初始化 reduce_outeraxisdata 为 NULL
    NpyIter_AxisData *axisdata = NIT_AXISDATA(iter),
                    *reduce_outeraxisdata = NULL;

    # 获取指向 PyArray_Descr 结构体指针数组的指针
    PyArray_Descr **dtypes = NIT_DTYPES(iter);
    # 获取数据传输大小
    npy_intp transfersize = NBF_SIZE(bufferdata);
    # 获取缓冲区步长数组和轴数据步长数组
    npy_intp *strides = NBF_STRIDES(bufferdata),
             *ad_strides = NAD_STRIDES(axisdata);
    # 计算 axisdata 结构体的大小
    npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
    # 获取轴数据指针数组
    char **ad_ptrs = NAD_PTRS(axisdata);
    # 获取缓冲区指针数组
    char **buffers = NBF_BUFFERS(bufferdata);
    # 初始化 buffer 指针
    char *buffer;

    # 初始化 reduce_outerdim 和 reduce_outerstrides
    npy_intp reduce_outerdim = 0;
    npy_intp *reduce_outerstrides = NULL;

    # 计算 axisdata_incr 的值
    npy_intp axisdata_incr = NIT_AXISDATA_SIZEOF(itflags, ndim, nop) /
                                NPY_SIZEOF_INTP;

    # 如果缓冲区大小为 0，则无需复制任何内容，直接返回
    if (NBF_SIZE(bufferdata) == 0) {
        return 0;
    }

    # 打印调试信息，指示正在将缓冲区复制到输出
    NPY_IT_DBG_PRINT("Iterator: Copying buffers to outputs\n");

    # 如果设置了 REDUCE 标志，则获取相关信息
    if (itflags & NPY_ITFLAG_REDUCE) {
        # 获取 reduce_outerdim 和 reduce_outerstrides
        reduce_outerdim = NBF_REDUCE_OUTERDIM(bufferdata);
        reduce_outerstrides = NBF_REDUCE_OUTERSTRIDES(bufferdata);
        # 获取 reduce_outeraxisdata 的指针
        reduce_outeraxisdata = NIT_INDEX_AXISDATA(axisdata, reduce_outerdim);
        # 调整传输大小以考虑 REDUCE 的外部尺寸
        transfersize *= NBF_REDUCE_OUTERSIZE(bufferdata);
    }

    # 获取传输信息结构体的指针
    NpyIter_TransferInfo *transferinfo = NBF_TRANSFERINFO(bufferdata);

    # 打印调试信息，指示完成将缓冲区复制到输出
    NPY_IT_DBG_PRINT("Iterator: Finished copying buffers to outputs\n");

    # 返回值为 0，表示函数执行成功
    return 0;
/*
 * This gets called after the iterator has been positioned to a multi-index
 * for the start of a buffer.  It decides which operands need a buffer,
 * and copies the data into the buffers.
 */
NPY_NO_EXPORT int
npyiter_copy_to_buffers(NpyIter *iter, char **prev_dataptrs)
{
    // 获取迭代器的标志位
    npy_uint32 itflags = NIT_ITFLAGS(iter);
    // 获取操作数的维度
    int ndim = NIT_NDIM(iter);
    // 获取操作数的数量
    int iop, nop = NIT_NOP(iter);

    // 获取操作数的迭代器标志数组和缓冲数据结构
    npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);
    NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
    // 获取轴数据和外部减少轴数据
    NpyIter_AxisData *axisdata = NIT_AXISDATA(iter),
                    *reduce_outeraxisdata = NULL;

    // 获取数据类型数组和操作数数组
    PyArray_Descr **dtypes = NIT_DTYPES(iter);
    PyArrayObject **operands = NIT_OPERANDS(iter);
    // 获取缓冲数据和轴数据的步长数组
    npy_intp *strides = NBF_STRIDES(bufferdata),
             *ad_strides = NAD_STRIDES(axisdata);
    // 计算轴数据的大小
    npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
    // 获取缓冲数据和轴数据的指针数组
    char **ptrs = NBF_PTRS(bufferdata), **ad_ptrs = NAD_PTRS(axisdata);
    char **buffers = NBF_BUFFERS(bufferdata);
    // 初始化迭代器索引、迭代结束位置、传输大小和单步长大小
    npy_intp iterindex, iterend, transfersize,
            singlestridesize, reduce_innersize = 0, reduce_outerdim = 0;
    int is_onestride = 0, any_buffered = 0;

    npy_intp *reduce_outerstrides = NULL;
    char **reduce_outerptrs = NULL;

    /*
     * Have to get this flag before npyiter_checkreducesize sets
     * it for the next iteration.
     */
    // 判断是否可以重用外部减少循环结构
    npy_bool reuse_reduce_loops = (prev_dataptrs != NULL) &&
                    ((itflags&NPY_ITFLAG_REUSE_REDUCE_LOOPS) != 0);

    // 计算轴数据增量
    npy_intp axisdata_incr = NIT_AXISDATA_SIZEOF(itflags, ndim, nop) /
                                NPY_SIZEOF_INTP;

    NPY_IT_DBG_PRINT("Iterator: Copying inputs to buffers\n");

    /* Calculate the size if using any buffers */
    // 计算使用缓冲时的传输大小
    iterindex = NIT_ITERINDEX(iter);
    iterend = NIT_ITEREND(iter);
    transfersize = NBF_BUFFERSIZE(bufferdata);
    if (transfersize > iterend - iterindex) {
        transfersize = iterend - iterindex;
    }

    /* If last time around, the reduce loop structure was full, we reuse it */
    if (reuse_reduce_loops) {
        // 如果设置了重用减少循环标志，则执行以下操作
        npy_intp full_transfersize, prev_reduce_outersize;

        // 获取上一次减少循环的外部尺寸
        prev_reduce_outersize = NBF_REDUCE_OUTERSIZE(bufferdata);
        // 获取减少循环的外部步长
        reduce_outerstrides = NBF_REDUCE_OUTERSTRIDES(bufferdata);
        // 获取减少循环的外部指针
        reduce_outerptrs = NBF_REDUCE_OUTERPTRS(bufferdata);
        // 获取减少循环的外部维度
        reduce_outerdim = NBF_REDUCE_OUTERDIM(bufferdata);
        // 获取减少循环的外部轴数据
        reduce_outeraxisdata = NIT_INDEX_AXISDATA(axisdata, reduce_outerdim);
        // 获取减少循环的内部尺寸
        reduce_innersize = NBF_SIZE(bufferdata);
        // 重置减少循环的位置指针
        NBF_REDUCE_POS(bufferdata) = 0;
        /*
         * 尝试使外部尺寸尽可能大。这允许它在处理外部减少循环的最后一部分时收缩，
         * 然后在下一个外部减少循环的开始时再次增长。
         */
        NBF_REDUCE_OUTERSIZE(bufferdata) = (NAD_SHAPE(reduce_outeraxisdata) -
                                            NAD_INDEX(reduce_outeraxisdata));
        // 计算完整传输尺寸
        full_transfersize = NBF_REDUCE_OUTERSIZE(bufferdata) * reduce_innersize;
        /* 如果完整传输尺寸超过缓冲区大小，则截断传输尺寸 */
        if (full_transfersize > NBF_BUFFERSIZE(bufferdata)) {
            // 调整外部尺寸以使其适合缓冲区大小
            NBF_REDUCE_OUTERSIZE(bufferdata) = transfersize / reduce_innersize;
            transfersize = NBF_REDUCE_OUTERSIZE(bufferdata) * reduce_innersize;
        }
        else {
            transfersize = full_transfersize;
        }
        // 如果本次外部尺寸大于上次外部尺寸，则禁止重用减少循环的缓冲区
        if (prev_reduce_outersize < NBF_REDUCE_OUTERSIZE(bufferdata)) {
            /*
             * 如果上次复制的数据量较少，即使指针匹配，也可能不安全重用缓冲区。
             */
            reuse_reduce_loops = 0;
        }
        // 设置缓冲区迭代结束位置
        NBF_BUFITEREND(bufferdata) = iterindex + reduce_innersize;

        // 调试打印重用的减少传输尺寸、内部尺寸和迭代器尺寸信息
        NPY_IT_DBG_PRINT3("Reused reduce transfersize: %d innersize: %d "
                        "itersize: %d\n",
                            (int)transfersize,
                            (int)reduce_innersize,
                            (int)NpyIter_GetIterSize(iter));
        // 调试打印减少的外部尺寸信息
        NPY_IT_DBG_PRINT1("Reduced reduce outersize: %d",
                            (int)NBF_REDUCE_OUTERSIZE(bufferdata));
    }
    /*
     * 如果存在任何减少操作数，可能需要减小尺寸，以免将相同的值复制到缓冲区两次，
     * 因为缓冲没有机制来自行合并值。
     */
    else if (itflags&NPY_ITFLAG_REDUCE) {
        // 如果迭代器标志指示进行reduce操作
        NPY_IT_DBG_PRINT("Iterator: Calculating reduce loops\n");
        // 打印调试信息：计算reduce循环
        transfersize = npyiter_checkreducesize(iter, transfersize,
                                                &reduce_innersize,
                                                &reduce_outerdim);
        // 检查reduce操作的大小，并更新相关参数
        NPY_IT_DBG_PRINT3("Reduce transfersize: %d innersize: %d "
                        "itersize: %d\n",
                            (int)transfersize,
                            (int)reduce_innersize,
                            (int)NpyIter_GetIterSize(iter));
        // 打印调试信息：显示reduce操作的传输大小、内部大小和迭代器大小

        reduce_outerstrides = NBF_REDUCE_OUTERSTRIDES(bufferdata);
        // 获取缓冲数据中的reduce外部步幅
        reduce_outerptrs = NBF_REDUCE_OUTERPTRS(bufferdata);
        // 获取缓冲数据中的reduce外部指针
        reduce_outeraxisdata = NIT_INDEX_AXISDATA(axisdata, reduce_outerdim);
        // 获取axisdata中reduce操作的外部轴数据
        NBF_SIZE(bufferdata) = reduce_innersize;
        // 设置缓冲数据中的大小为reduce内部大小
        NBF_REDUCE_POS(bufferdata) = 0;
        // 设置缓冲数据中的reduce位置为0
        NBF_REDUCE_OUTERDIM(bufferdata) = reduce_outerdim;
        // 设置缓冲数据中的reduce外部维度
        NBF_BUFITEREND(bufferdata) = iterindex + reduce_innersize;
        // 设置缓冲数据中的迭代器结束位置为当前索引加上reduce内部大小
        if (reduce_innersize == 0) {
            // 如果reduce内部大小为0
            NBF_REDUCE_OUTERSIZE(bufferdata) = 0;
            // 设置缓冲数据中的reduce外部大小为0
            return 0;
            // 返回0
        }
        else {
            // 否则，如果reduce内部大小不为0
            NBF_REDUCE_OUTERSIZE(bufferdata) = transfersize/reduce_innersize;
            // 计算并设置缓冲数据中的reduce外部大小为传输大小除以reduce内部大小
        }
    }
    else {
        // 如果不是reduce操作
        NBF_SIZE(bufferdata) = transfersize;
        // 设置缓冲数据中的大小为传输大小
        NBF_BUFITEREND(bufferdata) = iterindex + transfersize;
        // 设置缓冲数据中的迭代器结束位置为当前索引加上传输大小
    }

    /* Calculate the maximum size if using a single stride and no buffers */
    // 如果使用单个步幅且无缓冲，则计算最大大小
    singlestridesize = NAD_SHAPE(axisdata)-NAD_INDEX(axisdata);
    // 计算单个步幅大小为axisdata的形状减去索引
    if (singlestridesize > iterend - iterindex) {
        // 如果单个步幅大小大于迭代结束减去当前索引
        singlestridesize = iterend - iterindex;
        // 则设置单个步幅大小为迭代结束减去当前索引
    }
    if (singlestridesize >= transfersize) {
        // 如果单个步幅大小大于等于传输大小
        is_onestride = 1;
        // 设置单步幅标志为1
    }

    NpyIter_TransferInfo *transferinfo = NBF_TRANSFERINFO(bufferdata);
    // 获取缓冲数据中的传输信息

    /*
     * If buffering wasn't needed, we can grow the inner
     * loop to as large as possible.
     *
     * TODO: Could grow REDUCE loop too with some more logic above.
     */
    // 如果不需要缓冲，我们可以尽可能扩展内部循环
    if (!any_buffered && (itflags&NPY_ITFLAG_GROWINNER) &&
                        !(itflags&NPY_ITFLAG_REDUCE)) {
        // 如果没有任何缓冲并且需要扩展内部循环且不是reduce操作
        if (singlestridesize > transfersize) {
            // 如果单个步幅大小大于传输大小
            NPY_IT_DBG_PRINT2("Iterator: Expanding inner loop size "
                    "from %d to %d since buffering wasn't needed\n",
                    (int)NBF_SIZE(bufferdata), (int)singlestridesize);
            // 打印调试信息：扩展内部循环大小
            NBF_SIZE(bufferdata) = singlestridesize;
            // 设置缓冲数据中的大小为单个步幅大小
            NBF_BUFITEREND(bufferdata) = iterindex + singlestridesize;
            // 设置缓冲数据中的迭代器结束位置为当前索引加上单个步幅大小
        }
    }

    NPY_IT_DBG_PRINT1("Any buffering needed: %d\n", any_buffered);
    // 打印调试信息：是否需要任何缓冲

    NPY_IT_DBG_PRINT1("Iterator: Finished copying inputs to buffers "
                        "(buffered size is %d)\n", (int)NBF_SIZE(bufferdata));
    // 打印调试信息：完成将输入复制到缓冲区（缓冲区大小为...）
    return 0;
    // 返回0
/**
 * This function clears any references still held by the buffers and should
 * only be used to discard buffers if an error occurred.
 *
 * @param iter Iterator object for which buffers are to be cleared
 */
NPY_NO_EXPORT void
npyiter_clear_buffers(NpyIter *iter)
{
    // Retrieve the number of operands and buffer data associated with the iterator
    int nop = iter->nop;
    NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);

    // If the buffers are already empty, no further action is needed
    if (NBF_SIZE(bufferdata) == 0) {
        return;
    }

    /*
     * Save and temporarily clear any current Python exception information
     * to safely perform buffer cleanup operations.
     */
    PyObject *type, *value, *traceback;
    PyErr_Fetch(&type,  &value, &traceback);

    /* Cleanup any buffers with references */
    char **buffers = NBF_BUFFERS(bufferdata);
    NpyIter_TransferInfo *transferinfo = NBF_TRANSFERINFO(bufferdata);
    PyArray_Descr **dtypes = NIT_DTYPES(iter);
    npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);

    // Iterate over each operand to clear its associated buffer if necessary
    for (int iop = 0; iop < nop; ++iop, ++buffers) {
        // Skip operands without a clear function or not using buffers
        if (transferinfo[iop].clear.func == NULL ||
                !(op_itflags[iop] & NPY_OP_ITFLAG_USINGBUFFER)) {
            continue;
        }
        // Skip buffers that are already cleared (NULL)
        if (*buffers == 0) {
            continue;
        }
        // Calculate item size of the operand's data type
        int itemsize = dtypes[iop]->elsize;
        // Call the clear function to release the buffer
        if (transferinfo[iop].clear.func(NULL, dtypes[iop], *buffers,
                NBF_SIZE(bufferdata), itemsize,
                transferinfo[iop].clear.auxdata) < 0) {
            /* This should never fail; if it does, write an unraisable exception */
            PyErr_WriteUnraisable(NULL);
        }
    }

    /* Signal that the buffers are now empty */
    NBF_SIZE(bufferdata) = 0;
    // Restore any previous Python exception information
    PyErr_Restore(type, value, traceback);
}
*/

/*
 * This checks how much space can be buffered without encountering the
 * same value twice, or for operands whose innermost stride is zero,
 * without encountering a different value.  By reducing the buffered
 * amount to this size, reductions can be safely buffered.
 *
 * Reductions are buffered with two levels of looping, to avoid
 * frequent copying to the buffers.  The return value is the overall
 * buffer size, and when the flag NPY_ITFLAG_REDUCE is set, reduce_innersize
 * receives the size of the inner of the two levels of looping.
 *
 * The value placed in reduce_outerdim is the index into the AXISDATA
 * for where the second level of the double loop begins.
 *
 * The return value is always a multiple of the value placed in
 * reduce_innersize.
 */
static npy_intp
npyiter_checkreducesize(NpyIter *iter, npy_intp count,
                        npy_intp *reduce_innersize,
                        npy_intp *reduce_outerdim)
{
    // Retrieve flags and dimensions related to the iterator
    npy_uint32 itflags = NIT_ITFLAGS(iter);
    int idim, ndim = NIT_NDIM(iter);
    int iop, nop = NIT_NOP(iter);

    // Variables related to axis data and iteration
    NpyIter_AxisData *axisdata;
    npy_intp sizeof_axisdata;
    npy_intp coord, shape, *strides;
    npy_intp reducespace = 1, factor;
    # 声明一个布尔变量 nonzerocoord，用于记录是否存在非零坐标
    npy_bool nonzerocoord;

    # 获取迭代器的操作标志结构体指针
    npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);
    
    # 声明一个字符数组，用于存储第一个操作数的步长信息
    char stride0op[NPY_MAXARGS];

    # 默认情况下不进行外部轴的减少操作
    *reduce_outerdim = 0;

    # 如果数组维度为1或者元素个数为0，则无需计算任何内容，直接返回元素个数
    if (ndim == 1 || count == 0) {
        *reduce_innersize = count;
        return count;
    }

    # 计算存储 axisdata 所需的空间大小
    sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
    axisdata = NIT_AXISDATA(iter);

    # 标记在内部循环中哪些 REDUCE 操作数的步长为0
    strides = NAD_STRIDES(axisdata);
    for (iop = 0; iop < nop; ++iop) {
        stride0op[iop] = (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE) &&
                           (strides[iop] == 0);
        # 打印调试信息，指示操作数在内部循环中是否具有步长为0
        NPY_IT_DBG_PRINT2("Iterator: Operand %d has stride 0 in "
                        "the inner loop? %d\n", iop, (int)stride0op[iop]);
    }

    # 获取 axisdata 的形状和坐标信息
    shape = NAD_SHAPE(axisdata);
    coord = NAD_INDEX(axisdata);
    
    # 更新 reducespace 的值，计算剩余空间大小
    reducespace += (shape-coord-1);
    
    # 计算因子，用于后续的计算
    factor = shape;
    
    # 将 axisdata 指针向前移动一个位置
    NIT_ADVANCE_AXISDATA(axisdata, 1);

    # 根据第一个坐标初始化 nonzerocoord 变量
    nonzerocoord = (coord != 0);

    # 沿着 axisdata 前进，计算可用空间
    # 迭代每个维度，直到达到维度数或者缩减空间已满
    for (idim = 1; idim < ndim && reducespace < count;
                                ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
        # 调试输出内部循环的缩减空间和计数值
        NPY_IT_DBG_PRINT2("Iterator: inner loop reducespace %d, count %d\n",
                                (int)reducespace, (int)count);

        # 获取当前轴的步长数组
        strides = NAD_STRIDES(axisdata);
        # 遍历所有操作
        for (iop = 0; iop < nop; ++iop) {
            /*
             * 如果一个缩减步长从零变为非零，或者从非零变为零，
             * 这是数据不再是同一个元素或者会重复的点，
             * 如果缓冲区从所有零的多索引开始到此点，
             * 则给出缩减内部大小。
             */
            if((stride0op[iop] && (strides[iop] != 0)) ||
                        (!stride0op[iop] &&
                         (strides[iop] == 0) &&
                         (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE))) {
                # 调试输出缩减操作限制缓冲区大小到reducespace
                NPY_IT_DBG_PRINT1("Iterator: Reduce operation limits "
                                    "buffer to %d\n", (int)reducespace);
                /*
                 * 如果已经找到的元素比计数还多，或者
                 * 起始坐标不是零，则两级循环是不必要的/无法完成，因此返回。
                 */
                if (count <= reducespace) {
                    *reduce_innersize = count;
                    # 设置重用缩减循环标志并返回计数
                    NIT_ITFLAGS(iter) |= NPY_ITFLAG_REUSE_REDUCE_LOOPS;
                    return count;
                }
                else if (nonzerocoord) {
                    if (reducespace < count) {
                        count = reducespace;
                    }
                    *reduce_innersize = count;
                    # 注意：这类似于下面的（coord != 0）情况。
                    NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_REUSE_REDUCE_LOOPS;
                    return count;
                }
                else {
                    *reduce_innersize = reducespace;
                    break;
                }
            }
        }
        # 如果提前跳出循环，表示找到了reduce_innersize
        if (iop != nop) {
            # 调试输出找到第一个不是缩减的维度
            NPY_IT_DBG_PRINT2("Iterator: Found first dim not "
                            "reduce (%d of %d)\n", iop, nop);
            break;
        }

        # 获取当前轴的形状和索引
        shape = NAD_SHAPE(axisdata);
        coord = NAD_INDEX(axisdata);
        # 如果索引不为零，设置非零坐标标志
        if (coord != 0) {
            nonzerocoord = 1;
        }
        # 更新缩减空间和因子
        reducespace += (shape-coord-1) * factor;
        factor *= shape;
    }

    /*
     * 如果存在任何非零坐标，缩减内部循环不适合缓冲区大小，
     * 或者缩减内部循环覆盖了整个迭代大小，则无法进行双重循环。
     */
    // 检查是否满足不重用 reduce 循环的条件：非零坐标或者计数小于减少空间的数量，或者已经达到最后一个维度
    if (nonzerocoord || count < reducespace || idim == ndim) {
        // 如果减少空间的数量小于当前计数，则将计数更新为减少空间的数量
        if (reducespace < count) {
            count = reducespace;
        }
        // 更新 reduce_innersize 指针所指向的值为当前计数
        *reduce_innersize = count;
        /* 在这种情况下，我们不能重用 reduce 循环 */
        NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_REUSE_REDUCE_LOOPS;
        // 返回当前计数值
        return count;
    }

    // 从轴数据中获取坐标值
    coord = NAD_INDEX(axisdata);
    // 如果坐标值不为零
    if (coord != 0) {
        /*
         * 在这种情况下，只有在复制的数据量不超过当前轴数时才能安全地重用缓冲区，
         * 这种情况通常出现在已经启用了 reuse_reduce_loops 的情况下。
         * 当 idim 循环立即返回时，原则上是可以的。
         */
        NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_REUSE_REDUCE_LOOPS;
    }
    else {
        /* 在这种情况下，我们可以重用 reduce 循环 */
        NIT_ITFLAGS(iter) |= NPY_ITFLAG_REUSE_REDUCE_LOOPS;
    }

    // 更新 reduce_innersize 指针所指向的值为减少空间的数量
    *reduce_innersize = reducespace;
    // 计算新的 count 值，即将当前计数除以减少空间的数量
    count /= reducespace;

    // 打印调试信息，显示 reduce_innersize 和计数值
    NPY_IT_DBG_PRINT2("Iterator: reduce_innersize %d count /ed %d\n",
                    (int)reducespace, (int)count);

    /*
     * 继续遍历剩余的维度。如果有两个分离的减少轴，我们可能需要再次缩短缓冲区。
     */
    // 更新 reduce_outerdim 指针所指向的值为当前维度 idim
    *reduce_outerdim = idim;
    // 重置 reducespace 为 1，重置 factor 为 1
    reducespace = 1;
    factor = 1;
    /* 指示当前级别的 REDUCE 操作数是否具有零步长 */
    strides = NAD_STRIDES(axisdata);
    // 遍历操作数，标记是否有零步长的 REDUCE 操作数
    for (iop = 0; iop < nop; ++iop) {
        stride0op[iop] = (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE) &&
                           (strides[iop] == 0);
        // 打印调试信息，显示操作数是否在外部循环中具有零步长
        NPY_IT_DBG_PRINT2("Iterator: Operand %d has stride 0 in "
                        "the outer loop? %d\n", iop, (int)stride0op[iop]);
    }
    // 获取轴的形状
    shape = NAD_SHAPE(axisdata);
    // 更新 reducespace，根据坐标和因子的乘积计算
    reducespace += (shape-coord-1) * factor;
    // 更新 factor，乘以当前轴的形状
    factor *= shape;
    // 推进轴数据到下一个维度
    NIT_ADVANCE_AXISDATA(axisdata, 1);
    // 增加 idim，表示当前处理的维度
    ++idim;
    for (; idim < ndim && reducespace < count;
                                ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
        # 执行外层循环，直到达到维度数目或者减少的空间小于计数
        NPY_IT_DBG_PRINT2("Iterator: outer loop reducespace %d, count %d\n",
                                (int)reducespace, (int)count);
        # 调试输出当前迭代的减少空间和计数值

        strides = NAD_STRIDES(axisdata);
        # 获取当前轴数据的步幅数组
        for (iop = 0; iop < nop; ++iop) {
            # 遍历操作数的数量
            /*
             * 如果一个减少步幅从零变为非零，或者反之，则数据将停止
             * 成为同一个元素或者重复，并且如果缓冲区从一个全零的
             * 多索引开始到这一点，给我们减少的内部大小。
             */
            if((stride0op[iop] && (strides[iop] != 0)) ||
                        (!stride0op[iop] &&
                         (strides[iop] == 0) &&
                         (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE))) {
                # 如果条件满足，说明找到了减少操作的边界
                NPY_IT_DBG_PRINT1("Iterator: Reduce operation limits "
                                    "buffer to %d\n", (int)reducespace);
                # 调试输出减少操作限制缓冲区大小的消息
                /*
                 * 这终止了我们双重循环的外层级别。
                 */
                if (count <= reducespace) {
                    return count * (*reduce_innersize);
                    # 如果计数小于或等于减少的空间，则返回计数乘以减少的内部大小
                }
                else {
                    return reducespace * (*reduce_innersize);
                    # 否则返回减少的空间乘以减少的内部大小
                }
            }
        }

        shape = NAD_SHAPE(axisdata);
        # 获取当前轴数据的形状
        coord = NAD_INDEX(axisdata);
        # 获取当前轴数据的索引
        if (coord != 0) {
            nonzerocoord = 1;
            # 如果索引不为零，设置非零坐标标志为1
        }
        reducespace += (shape-coord-1) * factor;
        # 更新减少的空间，乘以形状减去索引减一再乘以因子
        factor *= shape;
        # 更新因子，乘以形状
    }

    if (reducespace < count) {
        count = reducespace;
        # 如果减少的空间小于计数，则更新计数为减少的空间
    }
    return count * (*reduce_innersize);
    # 返回最终计数乘以减少的内部大小
}

NPY_NO_EXPORT npy_bool
npyiter_has_writeback(NpyIter *iter)
{
    // 定义变量 iop 和 nop，分别表示操作数和操作标志数
    int iop, nop;
    // 声明指向操作标志的指针 op_itflags
    npyiter_opitflags *op_itflags;
    
    // 检查迭代器是否为 NULL
    if (iter == NULL) {
        // 如果迭代器为 NULL，返回 0（假）
        return 0;
    }
    
    // 获取迭代器中的操作数
    nop = NIT_NOP(iter);
    // 获取迭代器中的操作标志数组
    op_itflags = NIT_OPITFLAGS(iter);

    // 循环遍历所有操作
    for (iop = 0; iop < nop; iop++) {
        // 检查当前操作的标志是否包含写回标志 NPY_OP_ITFLAG_HAS_WRITEBACK
        if (op_itflags[iop] & NPY_OP_ITFLAG_HAS_WRITEBACK) {
            // 如果有写回标志，返回 NPY_TRUE（真）
            return NPY_TRUE;
        }
    }
    
    // 如果没有任何操作包含写回标志，则返回 NPY_FALSE（假）
    return NPY_FALSE;
}
#undef NPY_ITERATOR_IMPLEMENTATION_CODE

`.\numpy\numpy\_core\src\multiarray\nditer_constr.c`

/*
 * This file implements the construction, copying, and destruction
 * aspects of NumPy's nditer.
 *
 * Copyright (c) 2010-2011 by Mark Wiebe (mwwiebe@gmail.com)
 * The University of British Columbia
 *
 * Copyright (c) 2011 Enthought, Inc
 *
 * See LICENSE.txt for the license.
 */

#define NPY_NO_DEPRECATED_API NPY_API_VERSION

/* Allow this .c file to include nditer_impl.h */
#define NPY_ITERATOR_IMPLEMENTATION_CODE

#include "nditer_impl.h"
#include "arrayobject.h"
#include "array_coercion.h"
#include "templ_common.h"
#include "array_assign.h"
#include "dtype_traversal.h"


/* Internal helper functions private to this file */

/* Check global flags and update iterator flags */
static int
npyiter_check_global_flags(npy_uint32 flags, npy_uint32* itflags);

/* Check operation axes validity */
static int
npyiter_check_op_axes(int nop, int oa_ndim, int **op_axes,
                        const npy_intp *itershape);

/* Calculate the number of dimensions for the iterator */
static int
npyiter_calculate_ndim(int nop, PyArrayObject **op_in,
                       int oa_ndim);

/* Check per-operation flags */
static int
npyiter_check_per_op_flags(npy_uint32 flags, npyiter_opitflags *op_itflags);

/* Prepare one operand for iteration */
static int
npyiter_prepare_one_operand(PyArrayObject **op,
                        char **op_dataptr,
                        PyArray_Descr *op_request_dtype,
                        PyArray_Descr** op_dtype,
                        npy_uint32 flags,
                        npy_uint32 op_flags, npyiter_opitflags *op_itflags);

/* Prepare all operands for iteration */
static int
npyiter_prepare_operands(int nop,
                    PyArrayObject **op_in,
                    PyArrayObject **op,
                    char **op_dataptr,
                    PyArray_Descr **op_request_dtypes,
                    PyArray_Descr **op_dtype,
                    npy_uint32 flags,
                    npy_uint32 *op_flags, npyiter_opitflags *op_itflags,
                    npy_int8 *out_maskop);

/* Check casting compatibility */
static int
npyiter_check_casting(int nop, PyArrayObject **op,
                    PyArray_Descr **op_dtype,
                    NPY_CASTING casting,
                    npyiter_opitflags *op_itflags);

/* Fill axis data for the iterator */
static int
npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, npyiter_opitflags *op_itflags,
                    char **op_dataptr,
                    const npy_uint32 *op_flags, int **op_axes,
                    npy_intp const *itershape);

/* Get the operation axis */
static inline int
npyiter_get_op_axis(int axis, npy_bool *reduction_axis);

/* Replace axis data in the iterator */
static void
npyiter_replace_axisdata(
        NpyIter *iter, int iop, PyArrayObject *op,
        int orig_op_ndim, const int *op_axes);

/* Compute index strides for the iterator */
static void
npyiter_compute_index_strides(NpyIter *iter, npy_uint32 flags);

/* Apply forced iteration order */
static void
npyiter_apply_forced_iteration_order(NpyIter *iter, NPY_ORDER order);

/* Flip negative strides */
static void
npyiter_flip_negative_strides(NpyIter *iter);

/* Reverse axis ordering */
static void
npyiter_reverse_axis_ordering(NpyIter *iter);

/* Find the best axis ordering */
static void
npyiter_find_best_axis_ordering(NpyIter *iter);

/* Return a pointer to PyArray_Descr */
static PyArray_Descr *
    /* 定义函数：从多个数组对象创建高级迭代器，支持广播、形状和缓冲区大小控制 */
NPY_NO_EXPORT NpyIter *
NpyIter_AdvancedNew(int nop, PyArrayObject **op_in, npy_uint32 flags,
                 NPY_ORDER order, NPY_CASTING casting,
                 npy_uint32 *op_flags,
                 PyArray_Descr **op_request_dtypes,
                 int oa_ndim, int **op_axes, npy_intp *itershape,
                 npy_intp buffersize)
{
    /* 迭代器的标志 */
    npy_uint32 itflags = NPY_ITFLAG_IDENTPERM;
    /* 迭代器的维度和操作数的维度 */
    int idim, ndim;
    /* 操作数索引 */
    int iop;

    /* 正在构建的迭代器 */
    NpyIter *iter;

    /* 每个操作数的值 */
    PyArrayObject **op;
    PyArray_Descr **op_dtype;
    npyiter_opitflags *op_itflags;
    char **op_dataptr;

    /* 排列 */
    npy_int8 *perm;
    /* 缓冲区数据 */
    NpyIter_BufferData *bufferdata = NULL;
    /* 是否有任何分配 */
    int any_allocate = 0, any_missing_dtypes = 0, need_subtype = 0;

    /* 自动分配输出的子类型 */
    double subtype_priority = NPY_PRIORITY;
    PyTypeObject *subtype = &PyArray_Type;

    /* 如果定义了构建时间跟踪 */
#if NPY_IT_CONSTRUCTION_TIMING
    /* 构建时间点 */
    npy_intp c_temp,
            c_start,
            c_check_op_axes,
            c_check_global_flags,
            c_calculate_ndim,
            c_malloc,
            c_prepare_operands,
            c_fill_axisdata,
            c_compute_index_strides,
            c_apply_forced_iteration_order,
            c_find_best_axis_ordering,
            c_get_priority_subtype,
            c_find_output_common_dtype,
            c_check_casting,
            c_allocate_arrays,
            c_coalesce_axes,
            c_prepare_buffers;
#endif

    /* 如果操作数超过了最大允许数量 */
    if (nop > NPY_MAXARGS) {
        /* 报错：不能构造超过最大操作数数量的迭代器 */
        PyErr_Format(PyExc_ValueError,
            "Cannot construct an iterator with more than %d operands "
            "(%d were requested)", NPY_MAXARGS, nop);
        /* 返回空指针 */
        return NULL;
    }
    /*
     * 在1.8版本之前，如果 `oa_ndim == 0`，这意味着 `op_axes != NULL` 是一个错误。
     * 在1.8版本中，`oa_ndim == -1` 承担了这个角色，而在这种情况下，op_axes 强制成一个0维的迭代器。
     * 因此，在1.13版本之后，使用 `oa_ndim == 0` 且 `op_axes == NULL` 是一个错误（已废弃）。
     */
    if ((oa_ndim == 0) && (op_axes == NULL)) {
        PyErr_Format(PyExc_ValueError,
            "Using `oa_ndim == 0` when `op_axes` is NULL. "
            "Use `oa_ndim == -1` or the MultiNew "
            "iterator for NumPy <1.8 compatibility");
        return NULL;
    }

    /* 检查 'oa_ndim' 和 'op_axes' 是否一起使用 */
    if (!npyiter_check_op_axes(nop, oa_ndim, op_axes, itershape)) {
        return NULL;
    }

    NPY_IT_TIME_POINT(c_check_op_axes);

    /* 检查全局迭代器标志 */
    if (!npyiter_check_global_flags(flags, &itflags)) {
        return NULL;
    }

    NPY_IT_TIME_POINT(c_check_global_flags);

    /* 计算迭代器应该有多少维度 */
    ndim = npyiter_calculate_ndim(nop, op_in, oa_ndim);

    NPY_IT_TIME_POINT(c_calculate_ndim);

    /* 为迭代器分配内存空间 */
    iter = (NpyIter*)
                PyObject_Malloc(NIT_SIZEOF_ITERATOR(itflags, ndim, nop));
    if (iter == NULL) {
        return NULL;
    }

    NPY_IT_TIME_POINT(c_malloc);

    /* 填充基本数据 */
    NIT_ITFLAGS(iter) = itflags;
    NIT_NDIM(iter) = ndim;
    NIT_NOP(iter) = nop;
    NIT_MASKOP(iter) = -1;
    NIT_ITERINDEX(iter) = 0;
    memset(NIT_BASEOFFSETS(iter), 0, (nop+1)*NPY_SIZEOF_INTP);

    op = NIT_OPERANDS(iter);
    op_dtype = NIT_DTYPES(iter);
    op_itflags = NIT_OPITFLAGS(iter);
    op_dataptr = NIT_RESETDATAPTR(iter);

    /* 准备所有操作数 */
    if (!npyiter_prepare_operands(nop, op_in, op, op_dataptr,
                        op_request_dtypes, op_dtype,
                        flags,
                        op_flags, op_itflags,
                        &NIT_MASKOP(iter))) {
        PyObject_Free(iter);
        return NULL;
    }
    /* 将 resetindex 设置为零（就在 resetdataptr 之后） */
    op_dataptr[nop] = 0;

    NPY_IT_TIME_POINT(c_prepare_operands);

    /*
     * 初始化缓冲区数据（在可能释放迭代器之前，必须将缓冲区和 transferdata 设置为 NULL）。
     */
    if (itflags & NPY_ITFLAG_BUFFER) {
        bufferdata = NIT_BUFFERDATA(iter);
        NBF_SIZE(bufferdata) = 0;
        memset(NBF_BUFFERS(bufferdata), 0, nop*NPY_SIZEOF_INTP);
        memset(NBF_PTRS(bufferdata), 0, nop*NPY_SIZEOF_INTP);
        /* 确保 transferdata/auxdata 被置为 NULL */
        memset(NBF_TRANSFERINFO(bufferdata), 0, nop * sizeof(NpyIter_TransferInfo));
    }

    /* 填充 AXISDATA 数组并设置 ITERSIZE 字段 */
    // 如果填充轴数据失败，则释放迭代器并返回空指针
    if (!npyiter_fill_axisdata(iter, flags, op_itflags, op_dataptr,
                                        op_flags, op_axes, itershape)) {
        NpyIter_Deallocate(iter);
        return NULL;
    }

    // 记录时间点：填充轴数据完成
    NPY_IT_TIME_POINT(c_fill_axisdata);

    // 如果启用了缓冲并且未指定缓冲区大小，则使用默认大小
    if (itflags & NPY_ITFLAG_BUFFER) {
        /*
         * If buffering is enabled and no buffersize was given, use a default
         * chosen to be big enough to get some amortization benefits, but
         * small enough to be cache-friendly.
         */
        if (buffersize <= 0) {
            buffersize = NPY_BUFSIZE;
        }
        /* No point in a buffer bigger than the iteration size */
        if (buffersize > NIT_ITERSIZE(iter)) {
            buffersize = NIT_ITERSIZE(iter);
        }
        NBF_BUFFERSIZE(bufferdata) = buffersize;

        /*
         * Initialize for use in FirstVisit, which may be called before
         * the buffers are filled and the reduce pos is updated.
         */
        NBF_REDUCE_POS(bufferdata) = 0;
    }

    /*
     * 如果请求了索引，则计算索引的步长。
     * 注意：在改变轴顺序之前必须执行此操作。
     */
    npyiter_compute_index_strides(iter, flags);

    // 记录时间点：计算索引步长完成
    NPY_IT_TIME_POINT(c_compute_index_strides);

    // 初始化轴置换为标识顺序
    perm = NIT_PERM(iter);
    for(idim = 0; idim < ndim; ++idim) {
        perm[idim] = (npy_int8)idim;
    }

    /*
     * 如果强制指定了迭代顺序，则应用它。
     */
    npyiter_apply_forced_iteration_order(iter, order);
    itflags = NIT_ITFLAGS(iter);

    // 记录时间点：应用强制迭代顺序完成
    NPY_IT_TIME_POINT(c_apply_forced_iteration_order);

    // 设置一些已分配输出的标志
    for (iop = 0; iop < nop; ++iop) {
        if (op[iop] == NULL) {
            /* Flag this so later we can avoid flipping axes */
            any_allocate = 1;
            /* If a subtype may be used, indicate so */
            if (!(op_flags[iop] & NPY_ITER_NO_SUBTYPE)) {
                need_subtype = 1;
            }
            /*
             * If the data type wasn't provided, will need to
             * calculate it.
             */
            if (op_dtype[iop] == NULL) {
                any_missing_dtypes = 1;
            }
        }
    }

    /*
     * 如果未强制指定顺序，则重新排序轴并翻转负步长以找到最佳顺序。
     */
    if (!(itflags & NPY_ITFLAG_FORCEDORDER)) {
        if (ndim > 1) {
            npyiter_find_best_axis_ordering(iter);
        }
        /*
         * If there's an output being allocated, we must not negate
         * any strides.
         */
        if (!any_allocate && !(flags & NPY_ITER_DONT_NEGATE_STRIDES)) {
            npyiter_flip_negative_strides(iter);
        }
        itflags = NIT_ITFLAGS(iter);
    }

    // 记录时间点：找到最佳轴顺序完成
    NPY_IT_TIME_POINT(c_find_best_axis_ordering);

    // 如果需要子类型，获取优先子类型
    if (need_subtype) {
        npyiter_get_priority_subtype(nop, op, op_itflags,
                                     &subtype_priority, &subtype);
    }
    NPY_IT_TIME_POINT(c_get_priority_subtype);
    # 记录当前时间点，用于性能分析和调试

    /*
     * If an automatically allocated output didn't have a specified
     * dtype, we need to figure it out now, before allocating the outputs.
     */
    # 如果自动分配的输出没有指定数据类型，需要在分配输出之前确定数据类型

    if (any_missing_dtypes || (flags & NPY_ITER_COMMON_DTYPE)) {
        # 如果存在缺失的数据类型或者设置了共同数据类型标志

        PyArray_Descr *dtype;
        # 声明一个 NumPy 数组描述符对象指针

        int only_inputs = !(flags & NPY_ITER_COMMON_DTYPE);
        # 只有输入参数没有共同数据类型标志

        op = NIT_OPERANDS(iter);
        # 获取迭代器的操作数

        op_dtype = NIT_DTYPES(iter);
        # 获取迭代器的数据类型数组

        dtype = npyiter_get_common_dtype(nop, op,
                                    op_itflags, op_dtype,
                                    op_request_dtypes,
                                    only_inputs);
        # 调用函数获取共同的数据类型

        if (dtype == NULL) {
            NpyIter_Deallocate(iter);
            return NULL;
        }
        # 如果未能获取到共同的数据类型，释放迭代器并返回空指针

        if (flags & NPY_ITER_COMMON_DTYPE) {
            NPY_IT_DBG_PRINT("Iterator: Replacing all data types\n");
            /* Replace all the data types */
            # 调试信息：替换所有数据类型

            for (iop = 0; iop < nop; ++iop) {
                if (op_dtype[iop] != dtype) {
                    Py_XDECREF(op_dtype[iop]);
                    Py_INCREF(dtype);
                    op_dtype[iop] = dtype;
                }
            }
            # 如果设置了共同数据类型标志，替换所有操作数的数据类型为共同数据类型
        }
        else {
            NPY_IT_DBG_PRINT("Iterator: Setting unset output data types\n");
            /* Replace the NULL data types */
            # 调试信息：设置未设置的输出数据类型

            for (iop = 0; iop < nop; ++iop) {
                if (op_dtype[iop] == NULL) {
                    Py_INCREF(dtype);
                    op_dtype[iop] = dtype;
                }
            }
            # 如果没有设置共同数据类型标志，设置操作数中未设置的数据类型为共同数据类型
        }

        Py_DECREF(dtype);
        # 减少共同数据类型的引用计数
    }

    NPY_IT_TIME_POINT(c_find_output_common_dtype);
    # 记录当前时间点，用于性能分析和调试

    /*
     * All of the data types have been settled, so it's time
     * to check that data type conversions are following the
     * casting rules.
     */
    # 所有数据类型都已确定，现在是检查数据类型转换是否遵循强制转换规则的时候

    if (!npyiter_check_casting(nop, op, op_dtype, casting, op_itflags)) {
        NpyIter_Deallocate(iter);
        return NULL;
    }
    # 如果数据类型转换不符合强制转换规则，释放迭代器并返回空指针

    NPY_IT_TIME_POINT(c_check_casting);
    # 记录当前时间点，用于性能分析和调试

    /*
     * At this point, the iteration order has been finalized. so
     * any allocation of ops that were NULL, or any temporary
     * copying due to casting/byte order/alignment can be
     * done now using a memory layout matching the iterator.
     */
    # 此时，迭代顺序已经最终确定。因此，可以使用与迭代器匹配的内存布局，现在执行任何空操作的分配或由于强制转换/字节顺序/对齐而产生的临时复制。

    if (!npyiter_allocate_arrays(iter, flags, op_dtype, subtype, op_flags,
                            op_itflags, op_axes)) {
        NpyIter_Deallocate(iter);
        return NULL;
    }
    # 如果无法分配数组，释放迭代器并返回空指针

    NPY_IT_TIME_POINT(c_allocate_arrays);
    # 记录当前时间点，用于性能分析和调试

    /*
     * Finally, if a multi-index wasn't requested,
     * it may be possible to coalesce some axes together.
     */
    # 最后，如果没有请求多索引，可能可以将一些轴合并在一起。
    /*
     * 如果数组维度大于1且没有多重索引标志，执行以下操作：
     * 将迭代器的轴合并成更少的维度。
     */
    if (ndim > 1 && !(itflags & NPY_ITFLAG_HASMULTIINDEX)) {
        npyiter_coalesce_axes(iter);
        /*
         * 操作可能改变了布局，因此需要重新获取内部指针。
         */
        itflags = NIT_ITFLAGS(iter);
        ndim = NIT_NDIM(iter);
        op = NIT_OPERANDS(iter);
        op_dtype = NIT_DTYPES(iter);
        op_itflags = NIT_OPITFLAGS(iter);
        op_dataptr = NIT_RESETDATAPTR(iter);
    }

    NPY_IT_TIME_POINT(c_coalesce_axes);

    /*
     * 现在轴已经完成，检查是否可以对 iternext 函数应用单迭代优化。
     */
    if (!(itflags & NPY_ITFLAG_BUFFER)) {
        NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
        if (itflags & NPY_ITFLAG_EXLOOP) {
            if (NIT_ITERSIZE(iter) == NAD_SHAPE(axisdata)) {
                NIT_ITFLAGS(iter) |= NPY_ITFLAG_ONEITERATION;
            }
        }
        else if (NIT_ITERSIZE(iter) == 1) {
            NIT_ITFLAGS(iter) |= NPY_ITFLAG_ONEITERATION;
        }
    }

    /*
     * 如果设置了 REFS_OK 标志，则检查是否存在任何引用数组，并进行标记。
     *
     * 注意：这实际上应该是不必要的，但很有可能有人依赖它。
     * 迭代器本身不需要此API，因为它仅对类型转换/缓冲需要。
     * 但在几乎所有用例中，API将被用于进行操作。
     */
    if (flags & NPY_ITER_REFS_OK) {
        for (iop = 0; iop < nop; ++iop) {
            PyArray_Descr *rdt = op_dtype[iop];
            if ((rdt->flags & (NPY_ITEM_REFCOUNT |
                                     NPY_ITEM_IS_POINTER |
                                     NPY_NEEDS_PYAPI)) != 0) {
                /* 迭代需要API访问 */
                NIT_ITFLAGS(iter) |= NPY_ITFLAG_NEEDSAPI;
            }
        }
    }

    /* 如果设置了缓冲标志且没有延迟分配 */
    if (itflags & NPY_ITFLAG_BUFFER) {
        if (!npyiter_allocate_transfer_functions(iter)) {
            NpyIter_Deallocate(iter);
            return NULL;
        }
        if (!(itflags & NPY_ITFLAG_DELAYBUF)) {
            /* 分配缓冲区 */
            if (!npyiter_allocate_buffers(iter, NULL)) {
                NpyIter_Deallocate(iter);
                return NULL;
            }

            /* 准备下一个缓冲区并设置 iterend/size */
            if (npyiter_copy_to_buffers(iter, NULL) < 0) {
                NpyIter_Deallocate(iter);
                return NULL;
            }
        }
    }

    NPY_IT_TIME_POINT(c_prepare_buffers);
#if NPY_IT_CONSTRUCTION_TIMING
    // 如果定义了 NPY_IT_CONSTRUCTION_TIMING 宏，则打印迭代器构造时的时间信息
    printf("\nIterator construction timing:\n");
    // 打印各个阶段的时间信息
    NPY_IT_PRINT_TIME_START(c_start);
    NPY_IT_PRINT_TIME_VAR(c_check_op_axes);
    NPY_IT_PRINT_TIME_VAR(c_check_global_flags);
    NPY_IT_PRINT_TIME_VAR(c_calculate_ndim);
    NPY_IT_PRINT_TIME_VAR(c_malloc);
    NPY_IT_PRINT_TIME_VAR(c_prepare_operands);
    NPY_IT_PRINT_TIME_VAR(c_fill_axisdata);
    NPY_IT_PRINT_TIME_VAR(c_compute_index_strides);
    NPY_IT_PRINT_TIME_VAR(c_apply_forced_iteration_order);
    NPY_IT_PRINT_TIME_VAR(c_find_best_axis_ordering);
    NPY_IT_PRINT_TIME_VAR(c_get_priority_subtype);
    NPY_IT_PRINT_TIME_VAR(c_find_output_common_dtype);
    NPY_IT_PRINT_TIME_VAR(c_check_casting);
    NPY_IT_PRINT_TIME_VAR(c_allocate_arrays);
    NPY_IT_PRINT_TIME_VAR(c_coalesce_axes);
    NPY_IT_PRINT_TIME_VAR(c_prepare_buffers);
    // 打印完毕，换行
    printf("\n");
#endif

    // 返回迭代器对象
    return iter;
}

/*NUMPY_API
 * Allocate a new iterator for more than one array object, using
 * standard NumPy broadcasting rules and the default buffer size.
 */
NPY_NO_EXPORT NpyIter *
NpyIter_MultiNew(int nop, PyArrayObject **op_in, npy_uint32 flags,
                 NPY_ORDER order, NPY_CASTING casting,
                 npy_uint32 *op_flags,
                 PyArray_Descr **op_request_dtypes)
{
    // 调用 NpyIter_AdvancedNew 函数创建多个数组对象的迭代器
    return NpyIter_AdvancedNew(nop, op_in, flags, order, casting,
                            op_flags, op_request_dtypes,
                            -1, NULL, NULL, 0);
}

/*NUMPY_API
 * Allocate a new iterator for one array object.
 */
NPY_NO_EXPORT NpyIter *
NpyIter_New(PyArrayObject *op, npy_uint32 flags,
                  NPY_ORDER order, NPY_CASTING casting,
                  PyArray_Descr* dtype)
{
    /* Split the flags into separate global and op flags */
    // 将 flags 分解为全局标志和操作标志
    npy_uint32 op_flags = flags & NPY_ITER_PER_OP_FLAGS;
    flags &= NPY_ITER_GLOBAL_FLAGS;

    // 调用 NpyIter_AdvancedNew 函数创建单个数组对象的迭代器
    return NpyIter_AdvancedNew(1, &op, flags, order, casting,
                            &op_flags, &dtype,
                            -1, NULL, NULL, 0);
}

/*NUMPY_API
 * Makes a copy of the iterator
 */
NPY_NO_EXPORT NpyIter *
NpyIter_Copy(NpyIter *iter)
{
    // 获取迭代器的标志位和维度信息
    npy_uint32 itflags = NIT_ITFLAGS(iter);
    int ndim = NIT_NDIM(iter);
    int iop, nop = NIT_NOP(iter);
    int out_of_memory = 0;

    npy_intp size;
    NpyIter *newiter;
    PyArrayObject **objects;
    PyArray_Descr **dtypes;

    /* Allocate memory for the new iterator */
    // 为新迭代器分配内存空间
    size = NIT_SIZEOF_ITERATOR(itflags, ndim, nop);
    newiter = (NpyIter*)PyObject_Malloc(size);

    /* Copy the raw values to the new iterator */
    // 将原始值复制到新迭代器中
    memcpy(newiter, iter, size);

    /* Take ownership of references to the operands and dtypes */
    // 获取对操作数和数据类型的引用的所有权
    objects = NIT_OPERANDS(newiter);
    dtypes = NIT_DTYPES(newiter);
    for (iop = 0; iop < nop; ++iop) {
        Py_INCREF(objects[iop]);
        Py_INCREF(dtypes[iop]);
    }

    /* Allocate buffers and make copies of the transfer data if necessary */
    // 如果需要，分配缓冲区并复制传输数据
    # 如果迭代器标志中包含 NPY_ITFLAG_BUFFER
    if (itflags & NPY_ITFLAG_BUFFER) {
        # 获取缓冲区数据结构和相关信息
        NpyIter_BufferData *bufferdata;
        npy_intp buffersize, itemsize;
        char **buffers;

        bufferdata = NIT_BUFFERDATA(newiter);
        buffers = NBF_BUFFERS(bufferdata);
        buffersize = NBF_BUFFERSIZE(bufferdata);
        NpyIter_TransferInfo *transferinfo = NBF_TRANSFERINFO(bufferdata);

        # 遍历每个操作数
        for (iop = 0; iop < nop; ++iop) {
            # 如果当前缓冲区不为 NULL
            if (buffers[iop] != NULL) {
                # 如果发生内存不足错误
                if (out_of_memory) {
                    # 置当前缓冲区为 NULL，无需清理
                    buffers[iop] = NULL;
                }
                else {
                    # 分配当前缓冲区所需大小的内存
                    itemsize = dtypes[iop]->elsize;
                    buffers[iop] = PyArray_malloc(itemsize*buffersize);
                    # 如果内存分配失败，则设置内存不足错误标志
                    if (buffers[iop] == NULL) {
                        out_of_memory = 1;
                    }
                    else {
                        # 如果数据类型需要初始化，用零填充缓冲区
                        if (PyDataType_FLAGCHK(dtypes[iop], NPY_NEEDS_INIT)) {
                            memset(buffers[iop], '\0', itemsize*buffersize);
                        }
                    }
                }
            }

            # 如果读取函数不为 NULL
            if (transferinfo[iop].read.func != NULL) {
                # 如果发生内存不足错误
                if (out_of_memory) {
                    # 置读取函数为 NULL，无需清理
                    transferinfo[iop].read.func = NULL;  /* No cleanup */
                }
                else {
                    # 复制读取函数信息，处理内存不足情况
                    if (NPY_cast_info_copy(&transferinfo[iop].read,
                                           &transferinfo[iop].read) < 0) {
                        out_of_memory = 1;
                    }
                }
            }

            # 如果写入函数不为 NULL
            if (transferinfo[iop].write.func != NULL) {
                # 如果发生内存不足错误
                if (out_of_memory) {
                    # 置写入函数为 NULL，无需清理
                    transferinfo[iop].write.func = NULL;  /* No cleanup */
                }
                else {
                    # 复制写入函数信息，处理内存不足情况
                    if (NPY_cast_info_copy(&transferinfo[iop].write,
                                           &transferinfo[iop].write) < 0) {
                        out_of_memory = 1;
                    }
                }
            }

            # 如果清理函数不为 NULL
            if (transferinfo[iop].clear.func != NULL) {
                # 如果发生内存不足错误
                if (out_of_memory) {
                    # 置清理函数为 NULL，无需清理
                    transferinfo[iop].clear.func = NULL;  /* No cleanup */
                }
                else {
                    # 复制清理函数信息，处理内存不足情况
                    if (NPY_traverse_info_copy(&transferinfo[iop].clear,
                                               &transferinfo[iop].clear) < 0) {
                        out_of_memory = 1;
                    }
                }
            }
        }

        /* 初始化缓冲区到当前迭代索引 */
        # 如果没有内存不足错误且缓冲区大小大于 0
        if (!out_of_memory && NBF_SIZE(bufferdata) > 0) {
            # 跳转到当前迭代索引处
            npyiter_goto_iterindex(newiter, NIT_ITERINDEX(newiter));

            /* 准备下一个缓冲区并设置迭代结束标志和大小 */
            npyiter_copy_to_buffers(newiter, NULL);
        }
    }

    # 如果发生内存不足错误
    if (out_of_memory) {
        # 释放迭代器内存并设置无内存错误
        NpyIter_Deallocate(newiter);
        PyErr_NoMemory();
        return NULL;
    }

    # 返回新迭代器
    return newiter;
/*NUMPY_API
 * Deallocate an iterator.
 *
 * To correctly work when an error is in progress, we have to check
 * `PyErr_Occurred()`. This is necessary when buffers are not finalized
 * or WritebackIfCopy is used. We could avoid that check by exposing a new
 * function which is passed in whether or not a Python error is already set.
 */
NPY_NO_EXPORT int
NpyIter_Deallocate(NpyIter *iter)
{
    int success = PyErr_Occurred() == NULL;  // 检查当前是否有 Python 异常

    npy_uint32 itflags;  // 迭代器的标志位
    /*int ndim = NIT_NDIM(iter);*/  // 未使用，注释掉

    int iop, nop;  // 操作数和操作数数量
    PyArray_Descr **dtype;  // 数据类型的数组指针
    PyArrayObject **object;  // 数组对象的数组指针
    npyiter_opitflags *op_itflags;  // 操作标志位的数组指针

    if (iter == NULL) {  // 如果迭代器为空，则直接返回成功状态
        return success;
    }

    itflags = NIT_ITFLAGS(iter);  // 获取迭代器的标志位
    nop = NIT_NOP(iter);  // 获取迭代器的操作数数量
    dtype = NIT_DTYPES(iter);  // 获取迭代器的数据类型数组指针
    object = NIT_OPERANDS(iter);  // 获取迭代器的数组对象数组指针
    op_itflags = NIT_OPITFLAGS(iter);  // 获取迭代器的操作标志位数组指针

    /* Deallocate any buffers and buffering data */
    if (itflags & NPY_ITFLAG_BUFFER) {  // 如果迭代器标志位包含缓冲区标志
        /* Ensure no data is held by the buffers before they are cleared */
        if (success) {  // 如果成功状态为真
            if (npyiter_copy_from_buffers(iter) < 0) {  // 从缓冲区复制数据到数组对象失败
                success = NPY_FAIL;  // 设置成功状态为失败
            }
        }
        else {  // 如果成功状态为假
            npyiter_clear_buffers(iter);  // 清空迭代器的缓冲区数据
        }

        NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);  // 获取迭代器的缓冲区数据
        char **buffers;  // 缓冲区数组指针

        /* buffers */
        buffers = NBF_BUFFERS(bufferdata);  // 获取缓冲区数据的缓冲区数组指针
        for (iop = 0; iop < nop; ++iop, ++buffers) {  // 遍历每个操作数
            PyArray_free(*buffers);  // 释放每个缓冲区的内存
        }

        NpyIter_TransferInfo *transferinfo = NBF_TRANSFERINFO(bufferdata);  // 获取缓冲区数据的传输信息
        /* read bufferdata */
        for (iop = 0; iop < nop; ++iop, ++transferinfo) {  // 遍历每个操作数
            NPY_cast_info_xfree(&transferinfo->read);  // 释放读取传输信息的内存
            NPY_cast_info_xfree(&transferinfo->write);  // 释放写入传输信息的内存
            NPY_traverse_info_xfree(&transferinfo->clear);  // 释放清除传输信息的内存
        }
    }

    /*
     * Deallocate all the dtypes and objects that were iterated and resolve
     * any writeback buffers created by the iterator.
     */
    for (iop = 0; iop < nop; ++iop, ++dtype, ++object) {  // 遍历每个操作数
        if (op_itflags[iop] & NPY_OP_ITFLAG_HAS_WRITEBACK) {  // 如果操作标志包含写回标志
            if (success && PyArray_ResolveWritebackIfCopy(*object) < 0) {  // 如果成功状态为真且解析写回失败
                success = 0;  // 设置成功状态为失败
            }
            else {  // 否则
                PyArray_DiscardWritebackIfCopy(*object);  // 放弃写回拷贝
            }
        }
        Py_XDECREF(*dtype);  // 释放数据类型对象引用
        Py_XDECREF(*object);  // 释放数组对象引用
    }

    /* Deallocate the iterator memory */
    PyObject_Free(iter);  // 释放迭代器内存
    return success;  // 返回操作成功状态
}


/* Checks 'flags' for (C|F)_ORDER_INDEX, MULTI_INDEX, and EXTERNAL_LOOP,
 * setting the appropriate internal flags in 'itflags'.
 *
 * Returns 1 on success, 0 on error.
 */
static int
npyiter_check_global_flags(npy_uint32 flags, npy_uint32* itflags)
{
    if ((flags & NPY_ITER_PER_OP_FLAGS) != 0) {  // 如果传入的标志包含操作数标志
        PyErr_SetString(PyExc_ValueError,
                    "A per-operand flag was passed as a global flag "
                    "to the iterator constructor");  // 抛出值错误异常
        return 0;  // 返回错误状态
    }

    /* Check for an index */
    // 检查是否存在索引，设置相应的内部标志位到 `itflags` 中
    // 成功返回 1，错误返回 0
    // 没有代码，注释结束
}
    # 检查是否设置了 C_INDEX 或 F_INDEX 标志位
    if (flags & (NPY_ITER_C_INDEX | NPY_ITER_F_INDEX)) {
        # 如果同时设置了 C_INDEX 和 F_INDEX 标志位，则抛出数值错误异常
        if ((flags & (NPY_ITER_C_INDEX | NPY_ITER_F_INDEX)) ==
                    (NPY_ITER_C_INDEX | NPY_ITER_F_INDEX)) {
            PyErr_SetString(PyExc_ValueError,
                    "Iterator flags C_INDEX and "
                    "F_INDEX cannot both be specified");
            return 0;
        }
        # 标记迭代器具有索引
        (*itflags) |= NPY_ITFLAG_HASINDEX;
    }
    /* Check if a multi-index was requested */
    # 检查是否请求了多重索引
    if (flags & NPY_ITER_MULTI_INDEX) {
        /*
         * This flag primarily disables dimension manipulations that
         * would produce an incorrect multi-index.
         */
        # 此标志主要禁用可能产生不正确多重索引的维度操作
        (*itflags) |= NPY_ITFLAG_HASMULTIINDEX;
    }
    /* Check if the caller wants to handle inner iteration */
    # 检查调用者是否想处理内部迭代
    if (flags & NPY_ITER_EXTERNAL_LOOP) {
        # 如果迭代器已跟踪索引或多重索引，则不能使用 EXTERNAL_LOOP 标志位
        if ((*itflags) & (NPY_ITFLAG_HASINDEX | NPY_ITFLAG_HASMULTIINDEX)) {
            PyErr_SetString(PyExc_ValueError,
                    "Iterator flag EXTERNAL_LOOP cannot be used "
                    "if an index or multi-index is being tracked");
            return 0;
        }
        # 标记迭代器使用外部循环
        (*itflags) |= NPY_ITFLAG_EXLOOP;
    }
    /* Ranged */
    # 区间迭代标志
    if (flags & NPY_ITER_RANGED) {
        # 标记迭代器具有区间
        (*itflags) |= NPY_ITFLAG_RANGE;
        # 如果同时使用 RANGED 和 EXTERNAL_LOOP，但未使用 BUFFERED，则抛出数值错误异常
        if ((flags & NPY_ITER_EXTERNAL_LOOP) &&
                                    !(flags & NPY_ITER_BUFFERED)) {
            PyErr_SetString(PyExc_ValueError,
                    "Iterator flag RANGED cannot be used with "
                    "the flag EXTERNAL_LOOP unless "
                    "BUFFERED is also enabled");
            return 0;
        }
    }
    /* Buffering */
    # 缓冲标志
    if (flags & NPY_ITER_BUFFERED) {
        # 标记迭代器使用缓冲
        (*itflags) |= NPY_ITFLAG_BUFFER;
        # 如果同时使用 GROWINNER 标志，则标记迭代器增长内部迭代器
        if (flags & NPY_ITER_GROWINNER) {
            (*itflags) |= NPY_ITFLAG_GROWINNER;
        }
        # 如果使用 DELAY_BUFALLOC 标志，则标记迭代器延迟缓冲分配
        if (flags & NPY_ITER_DELAY_BUFALLOC) {
            (*itflags) |= NPY_ITFLAG_DELAYBUF;
        }
    }

    # 返回成功标志
    return 1;
static int
npyiter_calculate_ndim(int nop, PyArrayObject **op_in,
                       int oa_ndim)
{
    /* 如果使用了 'op_axes'，则强制使用 'oa_ndim' */
    if (oa_ndim >= 0 ) {
        // 如果 'oa_ndim' 大于等于零，则直接返回它作为迭代器的维度
        return oa_ndim;
    }
    /* 否则取操作数中的最大 'ndim' */
    else {
        // 初始化变量 ndim 为 0， iop 为循环计数器
        int ndim = 0, iop;

        // 循环遍历操作数数组 op_in
        for (iop = 0; iop < nop; ++iop) {
            // 检查当前操作数是否为非空
            if (op_in[iop] != NULL) {
                // 获取当前操作数的维度
                int ondim = PyArray_NDIM(op_in[iop]);
                // 如果当前操作数的维度大于 ndim，则更新 ndim
                if (ondim > ndim) {
                    ndim = ondim;
                }
            }
        }

        // 返回最大的维度 ndim
        return ndim;
    }
/*
 * 检查每个操作数的输入标志，并填充op_itflags。
 *
 * 在成功时返回1，在失败时返回0。
 */
static int
npyiter_check_per_op_flags(npy_uint32 op_flags, npyiter_opitflags *op_itflags)
{
    // 检查是否存在全局迭代器标志作为操作数标志传递给迭代器构造函数
    if ((op_flags & NPY_ITER_GLOBAL_FLAGS) != 0) {
        PyErr_SetString(PyExc_ValueError,
                    "A global iterator flag was passed as a per-operand flag "
                    "to the iterator constructor");
        return 0;
    }

    /* 检查读写标志 */
    if (op_flags & NPY_ITER_READONLY) {
        /* 读写标志是互斥的 */
        if (op_flags & (NPY_ITER_READWRITE|NPY_ITER_WRITEONLY)) {
            PyErr_SetString(PyExc_ValueError,
                    "Only one of the iterator flags READWRITE, "
                    "READONLY, and WRITEONLY may be "
                    "specified for an operand");
            return 0;
        }

        *op_itflags = NPY_OP_ITFLAG_READ;
    }
    else if (op_flags & NPY_ITER_READWRITE) {
        /* 读写标志是互斥的 */
        if (op_flags & NPY_ITER_WRITEONLY) {
            PyErr_SetString(PyExc_ValueError,
                    "Only one of the iterator flags READWRITE, "
                    "READONLY, and WRITEONLY may be "
                    "specified for an operand");
            return 0;
        }

        *op_itflags = NPY_OP_ITFLAG_READ|NPY_OP_ITFLAG_WRITE;
    }
    else if(op_flags & NPY_ITER_WRITEONLY) {
        *op_itflags = NPY_OP_ITFLAG_WRITE;
    }
    else {
        PyErr_SetString(PyExc_ValueError,
                "None of the iterator flags READWRITE, "
                "READONLY, or WRITEONLY were "
                "specified for an operand");
        return 0;
    }

    /* 检查临时拷贝标志 */
    if (((*op_itflags) & NPY_OP_ITFLAG_WRITE) &&
                (op_flags & (NPY_ITER_COPY |
                           NPY_ITER_UPDATEIFCOPY)) == NPY_ITER_COPY) {
        PyErr_SetString(PyExc_ValueError,
                "If an iterator operand is writeable, must use "
                "the flag UPDATEIFCOPY instead of "
                "COPY");
        return 0;
    }

    /* 检查写入掩码操作数的标志 */
    if (op_flags & NPY_ITER_WRITEMASKED) {
        if (!((*op_itflags) & NPY_OP_ITFLAG_WRITE)) {
            PyErr_SetString(PyExc_ValueError,
                "The iterator flag WRITEMASKED may only "
                "be used with READWRITE or WRITEONLY");
            return 0;
        }
        if ((op_flags & NPY_ITER_ARRAYMASK) != 0) {
            PyErr_SetString(PyExc_ValueError,
                "The iterator flag WRITEMASKED may not "
                "be used together with ARRAYMASK");
            return 0;
        }
        *op_itflags |= NPY_OP_ITFLAG_WRITEMASKED;
    }
    # 检查是否设置了 NPY_ITER_VIRTUAL 标志位
    if ((op_flags & NPY_ITER_VIRTUAL) != 0):
        # 如果设置了 NPY_ITER_VIRTUAL 标志位，再检查是否没有设置 NPY_ITER_READWRITE 标志位
        if ((op_flags & NPY_ITER_READWRITE) == 0):
            # 如果没有设置 NPY_ITER_READWRITE 标志位，则抛出值错误异常
            PyErr_SetString(PyExc_ValueError,
                "The iterator flag VIRTUAL should be "
                "be used together with READWRITE")
            # 返回 0 表示操作失败
            return 0
        # 如果两个标志位都设置了，将 NPY_OP_ITFLAG_VIRTUAL 添加到 op_itflags 中
        *op_itflags |= NPY_OP_ITFLAG_VIRTUAL
    
    # 返回 1 表示操作成功
    return 1
/*
 * Prepares a constructor operand.  Assumes a reference to 'op'
 * is owned, and that 'op' may be replaced.  Fills in 'op_dataptr',
 * 'op_dtype', and may modify 'op_itflags'.
 *
 * Returns 1 on success, 0 on failure.
 */
static int
npyiter_prepare_one_operand(PyArrayObject **op,
                        char **op_dataptr,
                        PyArray_Descr *op_request_dtype,
                        PyArray_Descr **op_dtype,
                        npy_uint32 flags,
                        npy_uint32 op_flags, npyiter_opitflags *op_itflags)
{
    /* NULL operands must be automatically allocated outputs */
    if (*op == NULL) {
        /* ALLOCATE or VIRTUAL should be enabled */
        if ((op_flags & (NPY_ITER_ALLOCATE|NPY_ITER_VIRTUAL)) == 0) {
            PyErr_SetString(PyExc_ValueError,
                    "Iterator operand was NULL, but neither the "
                    "ALLOCATE nor the VIRTUAL flag was specified");
            return 0;
        }

        if (op_flags & NPY_ITER_ALLOCATE) {
            /* Writing should be enabled */
            if (!((*op_itflags) & NPY_OP_ITFLAG_WRITE)) {
                PyErr_SetString(PyExc_ValueError,
                        "Automatic allocation was requested for an iterator "
                        "operand, but it wasn't flagged for writing");
                return 0;
            }
            /*
             * Reading should be disabled if buffering is enabled without
             * also enabling NPY_ITER_DELAY_BUFALLOC.  In all other cases,
             * the caller may initialize the allocated operand to a value
             * before beginning iteration.
             */
            if (((flags & (NPY_ITER_BUFFERED |
                            NPY_ITER_DELAY_BUFALLOC)) == NPY_ITER_BUFFERED) &&
                    ((*op_itflags) & NPY_OP_ITFLAG_READ)) {
                PyErr_SetString(PyExc_ValueError,
                        "Automatic allocation was requested for an iterator "
                        "operand, and it was flagged as readable, but "
                        "buffering  without delayed allocation was enabled");
                return 0;
            }

            /* If a requested dtype was provided, use it, otherwise NULL */
            Py_XINCREF(op_request_dtype);
            *op_dtype = op_request_dtype;
        }
        else {
            *op_dtype = NULL;
        }

        /* Specify bool if no dtype was requested for the mask */
        if (op_flags & NPY_ITER_ARRAYMASK) {
            if (*op_dtype == NULL) {
                *op_dtype = PyArray_DescrFromType(NPY_BOOL);
                if (*op_dtype == NULL) {
                    return 0;
                }
            }
        }

        *op_dataptr = NULL;

        return 1;
    }

    /* VIRTUAL operands must be NULL */
    # 检查 op_flags 中是否包含 NPY_ITER_VIRTUAL 标志位
    if (op_flags & NPY_ITER_VIRTUAL) {
        # 如果包含，则设置错误信息，指出使用了 VIRTUAL 标志但操作数不为 NULL
        PyErr_SetString(PyExc_ValueError,
                "Iterator operand flag VIRTUAL was specified, "
                "but the operand was not NULL");
        # 返回 0，表示出现错误
        return 0;
    }


    }
    else {
        # 如果 op_flags 中不包含 NPY_ITER_VIRTUAL 标志位，则设置错误信息
        PyErr_SetString(PyExc_ValueError,
                "Iterator inputs must be ndarrays");
        # 返回 0，表示出现错误
        return 0;
    }

    # 如果未进入上述错误情况，则返回 1，表示操作成功
    return 1;
/*
 * Process all the operands, copying new references so further processing
 * can replace the arrays if copying is necessary.
 */
static int
npyiter_prepare_operands(int nop, PyArrayObject **op_in,
                    PyArrayObject **op,
                    char **op_dataptr,
                    PyArray_Descr **op_request_dtypes,
                    PyArray_Descr **op_dtype,
                    npy_uint32 flags,
                    npy_uint32 *op_flags, npyiter_opitflags *op_itflags,
                    npy_int8 *out_maskop)
{
    int iop, i;
    npy_int8 maskop = -1;
    int any_writemasked_ops = 0;

    /*
     * Here we just prepare the provided operands.
     */
    for (iop = 0; iop < nop; ++iop) {
        // Copy the input operand into op array and increment its reference count
        op[iop] = op_in[iop];
        Py_XINCREF(op[iop]);
        op_dtype[iop] = NULL;

        /* Check the readonly/writeonly flags, and fill in op_itflags */
        // Verify per-operation flags and populate op_itflags accordingly
        if (!npyiter_check_per_op_flags(op_flags[iop], &op_itflags[iop])) {
            goto fail_iop; // Jump to failure handling for this operand
        }

        /* Extract the operand which is for masked iteration */
        // Identify the operand intended for masked iteration
        if ((op_flags[iop] & NPY_ITER_ARRAYMASK) != 0) {
            if (maskop != -1) {
                PyErr_SetString(PyExc_ValueError,
                        "Only one iterator operand may receive an "
                        "ARRAYMASK flag");
                goto fail_iop; // Multiple ARRAYMASK flags detected, handle error
            }

            maskop = iop; // Set maskop to the current operand index
            *out_maskop = iop; // Store the mask operand index in out_maskop
        }

        if (op_flags[iop] & NPY_ITER_WRITEMASKED) {
            any_writemasked_ops = 1; // Flag indicating at least one WRITEMASKED operand
        }

        /*
         * Prepare the operand.  This produces an op_dtype[iop] reference
         * on success.
         */
        // Prepare the current operand, determine its data pointer and dtype
        if (!npyiter_prepare_one_operand(&op[iop],
                        &op_dataptr[iop],
                        op_request_dtypes ? op_request_dtypes[iop] : NULL,
                        &op_dtype[iop],
                        flags,
                        op_flags[iop], &op_itflags[iop])) {
            goto fail_iop; // Jump to failure handling for this operand
        }
    }

    // Ensure consistency when WRITEMASKED is used without ARRAYMASK
    if (any_writemasked_ops && maskop < 0) {
        PyErr_SetString(PyExc_ValueError,
                "An iterator operand was flagged as WRITEMASKED, "
                "but no ARRAYMASK operand was given to supply "
                "the mask");
        goto fail_nop; // Jump to overall failure handling due to missing ARRAYMASK
    }
    else if (!any_writemasked_ops && maskop >= 0) {
        PyErr_SetString(PyExc_ValueError,
                "An iterator operand was flagged as the ARRAYMASK, "
                "but no WRITEMASKED operands were given to use "
                "the mask");
        goto fail_nop; // Jump to overall failure handling due to mismatched usage
    }

    return 1; // Success

  fail_nop:
    iop = nop - 1; // Adjust iop to the last valid index
  fail_iop:
    // Cleanup and release resources for failed operands
    for (i = 0; i < iop+1; ++i) {
        Py_XDECREF(op[i]); // Decrement reference count of operand
        Py_XDECREF(op_dtype[i]); // Decrement reference count of operand dtype
    }
    return 0; // Return failure
}
    # 根据传入的 `casting` 参数进行不同情况的匹配并返回对应的字符串表示
    switch (casting) {
        case NPY_NO_CASTING:
            return "'no'";  # 如果 `casting` 为 NPY_NO_CASTING，则返回字符串 "'no'"
        case NPY_EQUIV_CASTING:
            return "'equiv'";  # 如果 `casting` 为 NPY_EQUIV_CASTING，则返回字符串 "'equiv'"
        case NPY_SAFE_CASTING:
            return "'safe'";  # 如果 `casting` 为 NPY_SAFE_CASTING，则返回字符串 "'safe'"
        case NPY_SAME_KIND_CASTING:
            return "'same_kind'";  # 如果 `casting` 为 NPY_SAME_KIND_CASTING，则返回字符串 "'same_kind'"
        case NPY_UNSAFE_CASTING:
            return "'unsafe'";  # 如果 `casting` 为 NPY_UNSAFE_CASTING，则返回字符串 "'unsafe'"
        default:
            return "<unknown>";  # 如果 `casting` 不匹配以上任何一个值，则返回 "<unknown>"
    }
# 检查在给定操作数和数据类型描述符上的强制转换需求
static int
npyiter_check_casting(int nop, PyArrayObject **op,
                    PyArray_Descr **op_dtype,
                    NPY_CASTING casting,
                    npyiter_opitflags *op_itflags)
{
    int iop;

    # 遍历所有操作数
    for(iop = 0; iop < nop; ++iop) {
        # 调试输出：打印正在检查的操作数的强制转换情况
        NPY_IT_DBG_PRINT1("Iterator: Checking casting for operand %d\n",
                            (int)iop);
        
        # 如果开启了追踪调试，输出操作数和迭代器的数据类型描述符
#if NPY_IT_DBG_TRACING
        printf("op: ");
        if (op[iop] != NULL) {
            # 打印操作数的数据类型描述符
            PyObject_Print((PyObject *)PyArray_DESCR(op[iop]), stdout, 0);
        }
        else {
            printf("<null>");
        }
        printf(", iter: ");
        # 打印迭代器的数据类型描述符
        PyObject_Print((PyObject *)op_dtype[iop], stdout, 0);
        printf("\n");
#endif

        /* 如果操作数不为空且数据类型不等效，则需要进行强制转换 */
        if (op[iop] != NULL && !PyArray_EquivTypes(PyArray_DESCR(op[iop]),
                                                     op_dtype[iop])) {
            /* 检查读取（op -> temp）的强制转换 */
            if ((op_itflags[iop] & NPY_OP_ITFLAG_READ) &&
                        !PyArray_CanCastArrayTo(op[iop],
                                          op_dtype[iop],
                                          casting)) {
                PyErr_Format(PyExc_TypeError,
                        "Iterator operand %d dtype could not be cast from "
                        "%R to %R according to the rule %s",
                        iop, PyArray_DESCR(op[iop]), op_dtype[iop],
                        npyiter_casting_to_string(casting));
                return 0;
            }
            /* 检查写入（temp -> op）的强制转换 */
            if ((op_itflags[iop] & NPY_OP_ITFLAG_WRITE) &&
                        !PyArray_CanCastTypeTo(op_dtype[iop],
                                          PyArray_DESCR(op[iop]),
                                          casting)) {
                PyErr_Format(PyExc_TypeError,
                        "Iterator requested dtype could not be cast from "
                        "%R to %R, the operand %d dtype, "
                        "according to the rule %s",
                        op_dtype[iop], PyArray_DESCR(op[iop]), iop,
                        npyiter_casting_to_string(casting));
                return 0;
            }

            # 调试输出：因为类型不等效，设置 NPY_OP_ITFLAG_CAST
            NPY_IT_DBG_PRINT("Iterator: Setting NPY_OP_ITFLAG_CAST "
                                "because the types aren't equivalent\n");
            /* 表明此操作数需要强制转换 */
            op_itflags[iop] |= NPY_OP_ITFLAG_CAST;
        }
    }

    return 1;
}

/*
 * 检查掩码是否广播到 WRITEMASK REDUCE 操作数 'iop'，但 'iop' 没有广播到掩码。
 * 如果 'iop' 广播到掩码，则每个约简元素会有多个掩码值，这是无效的情况。
 *
 * 此检查应在填充所有操作数之后调用。
 *
 * 成功时返回 1，出错时返回 0。
 */
static int
check_mask_for_writemasked_reduction(NpyIter *iter, int iop)
{
    # 获取迭代器的标志位
    npy_uint32 itflags = NIT_ITFLAGS(iter);
    # 获取迭代器的维度数量
    int idim, ndim = NIT_NDIM(iter);
    # 获取迭代器的运算操作数量
    int nop = NIT_NOP(iter);
    # 获取迭代器的掩码操作数量
    int maskop = NIT_MASKOP(iter);

    # 定义和初始化轴数据指针和大小
    NpyIter_AxisData *axisdata;
    npy_intp sizeof_axisdata;

    # 获取轴数据指针
    axisdata = NIT_AXISDATA(iter);
    # 获取轴数据大小
    sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);

    # 遍历每一个维度
    for(idim = 0; idim < ndim; ++idim) {
        npy_intp maskstride, istride;

        # 获取迭代器的步长
        istride = NAD_STRIDES(axisdata)[iop];
        # 获取迭代器的掩码步长
        maskstride = NAD_STRIDES(axisdata)[maskop];

        # 如果掩码步长不为0且迭代器步长为0，抛出异常并返回0
        if (maskstride != 0 && istride == 0) {
            PyErr_SetString(PyExc_ValueError,
                    "Iterator reduction operand is WRITEMASKED, "
                    "but also broadcasts to multiple mask values. "
                    "There can be only one mask value per WRITEMASKED "
                    "element.");
            return 0;
        }

        # 更新轴数据指针
        NIT_ADVANCE_AXISDATA(axisdata, 1);
    }

    # 返回1，表示没有异常
    return 1;
/*
 * 检查基于标志和读写操作数的约简是否有效。这个路径已经过时，
 * 因为通常只有特定的轴应该被约简。如果显式指定了轴，那么标志就是不必要的。
 */
static int
npyiter_check_reduce_ok_and_set_flags(
        NpyIter *iter, npy_uint32 flags, npyiter_opitflags *op_itflags,
        int iop, int maskop, int dim) {
    /* 如果可写，意味着进行约简操作 */
    if (op_itflags[iop] & NPY_OP_ITFLAG_WRITE) {
        // 如果标志中没有允许约简，则引发错误
        if (!(flags & NPY_ITER_REDUCE_OK)) {
            PyErr_Format(PyExc_ValueError,
                    "output operand requires a reduction along dimension %d, "
                    "but the reduction is not enabled. The dimension size of 1 "
                    "does not match the expected output shape.", dim);
            return 0;
        }
        // 如果操作标志没有读取权限，则引发错误
        if (!(op_itflags[iop] & NPY_OP_ITFLAG_READ)) {
            PyErr_SetString(PyExc_ValueError,
                    "output operand requires a reduction, but is flagged as "
                    "write-only, not read-write");
            return 0;
        }
        /*
         * 如果是掩码操作，不能进行约简，因为一旦掩码操作返回'True'，
         * 就可能会向数组写回一次，然后在后续的约简中，掩码操作返回'False'，
         * 表明不应该进行写回操作，这会违反严格的掩码语义。
         */
        if (iop == maskop) {
            PyErr_SetString(PyExc_ValueError,
                    "output operand requires a "
                    "reduction, but is flagged as "
                    "the ARRAYMASK operand which "
                    "is not permitted to be the "
                    "result of a reduction");
            return 0;
        }
        // 输出调试信息，指示正在进行约简操作
        NPY_IT_DBG_PRINT("Iterator: Indicating that a reduction is"
                         "occurring\n");

        // 设置迭代器标志表明正在进行约简操作
        NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE;
        // 设置操作标志表明正在进行约简操作
        op_itflags[iop] |= NPY_OP_ITFLAG_REDUCE;
    }
    return 1;
}

/**
 * 移除(NPY_ITER_REDUCTION_AXIS)的减少指示，并将is_forced_broadcast设置为1（如果设置）。否则设置为0。
 *
 * @param axis 要规范化的操作轴（op_axes[i]）。
 * @param reduction_axis 如果是减少轴则设置为1，否则设置为0。
 * @returns 规范化后的轴（去除减少轴标志）。
 */
static inline int
npyiter_get_op_axis(int axis, npy_bool *reduction_axis) {
    npy_bool forced_broadcast = axis >= NPY_ITER_REDUCTION_AXIS(-1);

    // 如果reduction_axis不为NULL，则根据forced_broadcast设置其值
    if (reduction_axis != NULL) {
        *reduction_axis = forced_broadcast;
    }
    // 如果是强制广播，则返回规范化后的轴（去除减少轴标志）
    if (forced_broadcast) {
        return axis - NPY_ITER_REDUCTION_AXIS(0);
    }
    // 否则直接返回原始轴值
    return axis;
}
/*
 * Fills in the AXISDATA for the 'nop' operands, broadcasting
 * the dimensions as necessary. Also fills
 * in the ITERSIZE data member.
 *
 * If op_axes is not NULL, it should point to an array of ndim-sized
 * arrays, one for each operand.
 *
 * Returns 1 on success, 0 on failure.
 */
static int
npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, npyiter_opitflags *op_itflags,
                    char **op_dataptr,
                    const npy_uint32 *op_flags, int **op_axes,
                    npy_intp const *itershape)
{
    npy_uint32 itflags = NIT_ITFLAGS(iter);  // 获取迭代器的标志位
    int idim, ndim = NIT_NDIM(iter);  // 获取迭代器的维度信息
    int iop, nop = NIT_NOP(iter);  // 获取迭代器的操作数数量
    int maskop = NIT_MASKOP(iter);  // 获取迭代器的掩码操作数标志位

    int ondim;  // 未使用的变量
    NpyIter_AxisData *axisdata;  // 定义轴数据结构指针
    npy_intp sizeof_axisdata;  // 定义轴数据结构大小
    PyArrayObject **op = NIT_OPERANDS(iter), *op_cur;  // 获取迭代器的操作数数组

    npy_intp broadcast_shape[NPY_MAXDIMS];  // 定义广播形状数组，大小为最大维度数

    /* First broadcast the shapes together */
    if (itershape == NULL) {  // 如果外部没有指定形状
        for (idim = 0; idim < ndim; ++idim) {
            broadcast_shape[idim] = 1;  // 将广播形状初始化为1
        }
    }
    else {  // 如果外部指定了形状
        for (idim = 0; idim < ndim; ++idim) {
            broadcast_shape[idim] = itershape[idim];  // 使用外部指定的形状
            /* Negative shape entries are deduced from the operands */
            if (broadcast_shape[idim] < 0) {
                broadcast_shape[idim] = 1;  // 如果形状为负数，则设为1
            }
        }
    }
    for (iop = 0; iop < nop; ++iop) {
        # 获取当前操作数
        op_cur = op[iop];
        # 如果操作数不为空
        if (op_cur != NULL) {
            # 获取操作数的形状数组和维度数
            npy_intp *shape = PyArray_DIMS(op_cur);
            ondim = PyArray_NDIM(op_cur);

            # 如果没有指定操作轴或者当前操作的轴为NULL
            if (op_axes == NULL || op_axes[iop] == NULL) {
                /*
                 * 可能是因为正在使用操作轴，但 op_axes[iop] 为 NULL
                 */
                # 如果操作数的维度大于所允许的维度 ndim
                if (ondim > ndim) {
                    # 抛出维度超出异常
                    PyErr_SetString(PyExc_ValueError,
                            "input operand has more dimensions than allowed "
                            "by the axis remapping");
                    return 0;
                }
                # 遍历当前操作数的维度
                for (idim = 0; idim < ondim; ++idim) {
                    # 获取广播后的形状和当前操作数的形状
                    npy_intp bshape = broadcast_shape[idim+ndim-ondim];
                    npy_intp op_shape = shape[idim];

                    # 如果广播形状为1，则更新为当前操作数的形状
                    if (bshape == 1) {
                        broadcast_shape[idim+ndim-ondim] = op_shape;
                    }
                    # 否则，如果广播形状与当前操作数的形状不同且当前操作数的形状不为1，则跳转到广播错误处理
                    else if (bshape != op_shape && op_shape != 1) {
                        goto broadcast_error;
                    }
                }
            }
            # 如果有指定操作轴
            else {
                # 获取当前操作的轴数组
                int *axes = op_axes[iop];
                # 遍历迭代器的维度数
                for (idim = 0; idim < ndim; ++idim) {
                    # 获取操作轴的索引 i
                    int i = npyiter_get_op_axis(axes[idim], NULL);

                    # 如果索引 i 是有效的
                    if (i >= 0) {
                        # 如果 i 小于当前操作数的维度数
                        if (i < ondim) {
                            # 获取广播后的形状和当前操作数轴 i 的形状
                            npy_intp bshape = broadcast_shape[idim];
                            npy_intp op_shape = shape[i];

                            # 如果广播形状为1，则更新为当前操作数轴 i 的形状
                            if (bshape == 1) {
                                broadcast_shape[idim] = op_shape;
                            }
                            # 否则，如果广播形状与当前操作数轴 i 的形状不同且当前操作数轴 i 的形状不为1，则跳转到广播错误处理
                            else if (bshape != op_shape && op_shape != 1) {
                                goto broadcast_error;
                            }
                        }
                        # 否则，如果 i 不在当前操作数的维度范围内，抛出错误
                        else {
                            PyErr_Format(PyExc_ValueError,
                                    "Iterator input op_axes[%d][%d] (==%d) "
                                    "is not a valid axis of op[%d], which "
                                    "has %d dimensions ",
                                    iop, (ndim-idim-1), i,
                                    iop, ondim);
                            return 0;
                        }
                    }
                }
            }
        }
    }
    /*
     * 如果提供了形状并且有一个条目为1，则确保该条目没有通过广播进行扩展。
     */
    # 如果 itershape 不为空
    if (itershape != NULL) {
        # 遍历迭代器的维度数
        for (idim = 0; idim < ndim; ++idim) {
            # 如果 itershape 的当前维度为1，并且广播后的形状的当前维度不为1，则跳转到广播错误处理
            if (itershape[idim] == 1 && broadcast_shape[idim] != 1) {
                goto broadcast_error;
            }
        }
    }

    # 获取轴数据和轴数据大小
    axisdata = NIT_AXISDATA(iter);
    sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
    # 如果数组维度为0，则需要填充第一个轴的axisdata，即使迭代器是0维的情况下
    if (ndim == 0) {
        # 设置axisdata的形状为1
        NAD_SHAPE(axisdata) = 1;
        # 设置axisdata的索引为0
        NAD_INDEX(axisdata) = 0;
        # 将op_dataptr指向的数据拷贝到axisdata的指针位置，共拷贝nop个字节
        memcpy(NAD_PTRS(axisdata), op_dataptr, NPY_SIZEOF_INTP*nop);
        # 将axisdata的步长数组位置的内容设置为0，共设置nop个步长
        memset(NAD_STRIDES(axisdata), 0, NPY_SIZEOF_INTP*nop);
    }

    # 现在处理操作数，填充axisdata
    }

    # 现在填充ITERSIZE成员
    # 设置ITERSIZE为1
    NIT_ITERSIZE(iter) = 1;
    # 遍历维度，计算总的迭代大小
    for (idim = 0; idim < ndim; ++idim) {
        # 如果计算NIT_ITERSIZE(iter)与broadcast_shape[idim]的乘积溢出
        if (npy_mul_sizes_with_overflow(&NIT_ITERSIZE(iter),
                    NIT_ITERSIZE(iter), broadcast_shape[idim])) {
            # 如果itflags包含NPY_ITFLAG_HASMULTIINDEX，且不包含NPY_ITFLAG_HASINDEX和NPY_ITFLAG_BUFFER
            # 则延迟大小检查，直到多索引被移除或GetIterNext被调用
            NIT_ITERSIZE(iter) = -1;
            break;
        }
        else {
            # 如果迭代器太大，抛出ValueError异常
            PyErr_SetString(PyExc_ValueError, "iterator is too large");
            return 0;
        }
    }
    # 默认迭代范围为全部数据
    NIT_ITERSTART(iter) = 0;
    NIT_ITEREND(iter) = NIT_ITERSIZE(iter);

    # 返回成功标志
    return 1;

    }
operand_different_than_broadcast: {
    /* operand shape */
    int ndims = PyArray_NDIM(op[iop]);  // 获取操作数的维度数
    npy_intp *dims = PyArray_DIMS(op[iop]);  // 获取操作数的维度数组
    PyObject *shape1 = convert_shape_to_string(ndims, dims, "");  // 将维度数组转换为字符串表示的形状
    if (shape1 == NULL) {  // 如果形状转换失败，则返回 0
        return 0;
    }

    /* Broadcast shape */
    PyObject *shape2 = convert_shape_to_string(ndim, broadcast_shape, "");  // 将广播形状转换为字符串表示的形状
    if (shape2 == NULL) {  // 如果形状转换失败，则释放 shape1 并返回 0
        Py_DECREF(shape1);
        return 0;
    }

    if (op_axes == NULL || op_axes[iop] == NULL) {
        /* operand shape not remapped */
        
        if (op_flags[iop] & NPY_ITER_READONLY) {
            PyErr_Format(PyExc_ValueError,
                "non-broadcastable operand with shape %S doesn't "
                "match the broadcast shape %S", shape1, shape2);  // 报错，显示操作数形状与广播形状不匹配
        }
        else {
            PyErr_Format(PyExc_ValueError,
                "non-broadcastable output operand with shape %S doesn't "
                "match the broadcast shape %S", shape1, shape2);  // 报错，显示输出操作数形状与广播形状不匹配
        }
        Py_DECREF(shape1);
        Py_DECREF(shape2);
        return 0;
    }
    else {
        /* operand shape remapped */

        npy_intp remdims[NPY_MAXDIMS];  // 创建重新映射的维度数组
        int *axes = op_axes[iop];  // 获取操作数的轴映射数组
        for (idim = 0; idim < ndim; ++idim) {
            npy_intp i = axes[ndim - idim - 1];  // 获取映射后的轴索引
            if (i >= 0 && i < PyArray_NDIM(op[iop])) {
                remdims[idim] = PyArray_DIM(op[iop], i);  // 填充重新映射的维度数组
            }
            else {
                remdims[idim] = -1;  // 如果映射索引不合法，填充为 -1
            }
        }

        PyObject *shape3 = convert_shape_to_string(ndim, remdims, "");  // 将重新映射的维度数组转换为字符串表示的形状
        if (shape3 == NULL) {  // 如果形状转换失败，则释放 shape1 和 shape2，并返回 0
            Py_DECREF(shape1);
            Py_DECREF(shape2);
            return 0;
        }

        if (op_flags[iop] & NPY_ITER_READONLY) {
            PyErr_Format(PyExc_ValueError,
                "non-broadcastable operand with shape %S "
                "[remapped to %S] doesn't match the broadcast shape %S",
                shape1, shape3, shape2);  // 报错，显示操作数形状经过重新映射后与广播形状不匹配
        }
        else {
            PyErr_Format(PyExc_ValueError,
                "non-broadcastable output operand with shape %S "
                "[remapped to %S] doesn't match the broadcast shape %S",
                shape1, shape3, shape2);  // 报错，显示输出操作数形状经过重新映射后与广播形状不匹配
        }
        Py_DECREF(shape1);
        Py_DECREF(shape2);
        Py_DECREF(shape3);
        return 0;
    }
}
{
    // 获取迭代器的标志位
    npy_uint32 itflags = NIT_ITFLAGS(iter);
    // 获取迭代器的维度数目
    int idim, ndim = NIT_NDIM(iter);
    // 获取迭代器的操作数目
    int nop = NIT_NOP(iter);
    // 获取操作数组的数据指针
    char *op_dataptr = PyArray_DATA(op);

    // 迭代器的轴数据指针和相关变量声明
    NpyIter_AxisData *axisdata0, *axisdata;
    npy_intp sizeof_axisdata;
    npy_int8 *perm;
    npy_intp baseoffset = 0;

    // 获取迭代器的轴数据排列
    perm = NIT_PERM(iter);
    // 获取迭代器的初始轴数据
    axisdata0 = NIT_AXISDATA(iter);
    // 获取轴数据的大小
    sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);

    /*
     * 替换非零步幅，并计算基本数据地址。
     */
    axisdata = axisdata0;

    // 如果操作轴不为空
    if (op_axes != NULL) {
        // 遍历迭代器的维度
        for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
            int i;
            npy_bool axis_flipped;
            npy_intp shape;

            /* 应用排列来获取原始轴，并检查其是否翻转 */
            i = npyiter_undo_iter_axis_perm(idim, ndim, perm, &axis_flipped);

            // 获取操作数组的轴索引
            i = npyiter_get_op_axis(op_axes[i], NULL);
            // 断言索引小于原始操作数组的维度数
            assert(i < orig_op_ndim);
            if (i >= 0) {
                // 获取操作数组在指定轴上的维度
                shape = PyArray_DIM(op, i);
                // 如果维度不为1
                if (shape != 1) {
                    // 获取操作数组在指定轴上的步幅
                    npy_intp stride = PyArray_STRIDE(op, i);
                    // 如果轴被翻转
                    if (axis_flipped) {
                        // 设置轴数据的步幅为负值
                        NAD_STRIDES(axisdata)[iop] = -stride;
                        // 基本偏移增加步幅乘以形状减1
                        baseoffset += stride * (shape - 1);
                    } else {
                        // 设置轴数据的步幅为正值
                        NAD_STRIDES(axisdata)[iop] = stride;
                    }
                }
            }
        }
    } else {
        // 如果操作轴为空
        for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
            int i;
            npy_bool axis_flipped;
            npy_intp shape;

            // 获取未经过迭代器轴排列的原始操作数组轴索引
            i = npyiter_undo_iter_axis_perm(idim, orig_op_ndim, perm, &axis_flipped);

            // 如果索引大于等于0
            if (i >= 0) {
                // 获取操作数组在指定轴上的维度
                shape = PyArray_DIM(op, i);
                // 如果维度不为1
                if (shape != 1) {
                    // 获取操作数组在指定轴上的步幅
                    npy_intp stride = PyArray_STRIDE(op, i);
                    // 如果轴被翻转
                    if (axis_flipped) {
                        // 设置轴数据的步幅为负值
                        NAD_STRIDES(axisdata)[iop] = -stride;
                        // 基本偏移增加步幅乘以形状减1
                        baseoffset += stride * (shape - 1);
                    } else {
                        // 设置轴数据的步幅为正值
                        NAD_STRIDES(axisdata)[iop] = stride;
                    }
                }
            }
        }
    }

    // 增加基本数据地址偏移量
    op_dataptr += baseoffset;

    /* 现在基本数据指针已经计算出来，将其设置到所有需要的地方 */
    // 设置迭代器重置数据指针
    NIT_RESETDATAPTR(iter)[iop] = op_dataptr;
    // 设置迭代器基本偏移
    NIT_BASEOFFSETS(iter)[iop] = baseoffset;
    // 重新初始化轴数据
    axisdata = axisdata0;
    /* 对于0维情况，至少填充一个轴数据 */
    // 设置第一个轴数据指针
    NAD_PTRS(axisdata)[iop] = op_dataptr;
    // 逐步增加轴数据指针
    NIT_ADVANCE_AXISDATA(axisdata, 1);
    // 遍历迭代器的维度
    for (idim = 1; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
        // 设置轴数据指针
        NAD_PTRS(axisdata)[iop] = op_dataptr;
    }
}
/*
 * 计算迭代器的索引步长。
 * iter: 迭代器对象
 * flags: 标志位，指示索引顺序（C顺序或Fortran顺序）
 */
npyiter_compute_index_strides(NpyIter *iter, npy_uint32 flags)
{
    npy_uint32 itflags = NIT_ITFLAGS(iter);  // 获取迭代器的标志位
    int idim, ndim = NIT_NDIM(iter);         // 获取迭代器的维度信息
    int nop = NIT_NOP(iter);                 // 获取迭代器的操作数

    npy_intp indexstride;                    // 索引步长
    NpyIter_AxisData *axisdata;              // 指向轴数据的指针
    npy_intp sizeof_axisdata;                // 轴数据的大小

    /*
     * 如果只有一个元素在迭代，则只需操作第一个轴数据，因为没有任何增量操作。
     * 这也初始化了0维情况下的数据。
     */
    if (NIT_ITERSIZE(iter) == 1) {
        if (itflags & NPY_ITFLAG_HASINDEX) {
            axisdata = NIT_AXISDATA(iter);
            NAD_PTRS(axisdata)[nop] = 0;
        }
        return;
    }

    if (flags & NPY_ITER_C_INDEX) {
        sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
        axisdata = NIT_AXISDATA(iter);
        indexstride = 1;
        for(idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
            npy_intp shape = NAD_SHAPE(axisdata);

            if (shape == 1) {
                NAD_STRIDES(axisdata)[nop] = 0;
            }
            else {
                NAD_STRIDES(axisdata)[nop] = indexstride;
            }
            NAD_PTRS(axisdata)[nop] = 0;
            indexstride *= shape;
        }
    }
    else if (flags & NPY_ITER_F_INDEX) {
        sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
        axisdata = NIT_INDEX_AXISDATA(NIT_AXISDATA(iter), ndim-1);
        indexstride = 1;
        for(idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, -1)) {
            npy_intp shape = NAD_SHAPE(axisdata);

            if (shape == 1) {
                NAD_STRIDES(axisdata)[nop] = 0;
            }
            else {
                NAD_STRIDES(axisdata)[nop] = indexstride;
            }
            NAD_PTRS(axisdata)[nop] = 0;
            indexstride *= shape;
        }
    }
}

/*
 * 如果 order 是 NPY_KEEPORDER，则让迭代器找到最佳的迭代顺序；否则强制指定顺序。
 * 在 itflags 中指示是否强制了迭代顺序。
 */
static void
npyiter_apply_forced_iteration_order(NpyIter *iter, NPY_ORDER order)
{
    /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/  // 注释掉的代码，不会被执行
    int ndim = NIT_NDIM(iter);                  // 获取迭代器的维度信息
    int iop, nop = NIT_NOP(iter);               // 获取迭代器的操作数

    switch (order) {
    case NPY_CORDER:
        NIT_ITFLAGS(iter) |= NPY_ITFLAG_FORCEDORDER;  // 强制迭代顺序为 C 顺序
        break;
    case NPY_FORTRANORDER:
        NIT_ITFLAGS(iter) |= NPY_ITFLAG_FORCEDORDER;  // 强制迭代顺序为 Fortran 顺序
        /* 只有在维度大于1时才需要实际执行操作 */
        if (ndim > 1) {
            npyiter_reverse_axis_ordering(iter);  // 反转轴的顺序
        }
        break;
    # 当选项为 NPY_ANYORDER 时执行以下操作
    case NPY_ANYORDER:
        # 设置迭代器强制顺序标志位，确保按顺序迭代
        NIT_ITFLAGS(iter) |= NPY_ITFLAG_FORCEDORDER;
        # 只有在维度大于 1 时才需要实际执行操作
        if (ndim > 1) {
            # 获取操作数数组的指针数组
            PyArrayObject **op = NIT_OPERANDS(iter);
            # 默认按顺序
            int forder = 1;

            # 检查所有数组输入是否都是 Fortran（列优先）顺序
            for (iop = 0; iop < nop; ++iop, ++op) {
                # 如果当前数组不是 Fortran 顺序，则取消顺序标志
                if (*op && !PyArray_CHKFLAGS(*op, NPY_ARRAY_F_CONTIGUOUS)) {
                    forder = 0;
                    break;
                }
            }

            # 如果所有数组都是 Fortran 顺序，则反转轴顺序
            if (forder) {
                npyiter_reverse_axis_ordering(iter);
            }
        }
        break;

    # 当选项为 NPY_KEEPORDER 时不执行任何操作
    case NPY_KEEPORDER:
        # 这里不设置强制顺序标志...
        break;
/*
 * This function negates any strides in the iterator
 * which are negative. When iterating over multiple operands,
 * it flips strides only if all are negative or zero.
 */
static void
npyiter_flip_negative_strides(NpyIter *iter)
{
    npy_uint32 itflags = NIT_ITFLAGS(iter);  // 获取迭代器的标志位
    int idim, ndim = NIT_NDIM(iter);  // 获取迭代器的维度数和当前维度
    int iop, nop = NIT_NOP(iter);  // 获取迭代器的操作数数目

    npy_intp istrides, nstrides = NAD_NSTRIDES();  // 获取每个轴的步长数目
    NpyIter_AxisData *axisdata, *axisdata0;  // 定义轴数据指针
    npy_intp *baseoffsets;  // 基础偏移量指针
    npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);  // 计算轴数据结构体大小
    int any_flipped = 0;  // 指示是否有步长被反转过的标志位

    axisdata0 = axisdata = NIT_AXISDATA(iter);  // 获取迭代器的轴数据
    baseoffsets = NIT_BASEOFFSETS(iter);  // 获取基础偏移量数组
    for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
        npy_intp *strides = NAD_STRIDES(axisdata);  // 获取当前轴的步长数组
        int any_negative = 0;  // 指示当前轴是否有负步长的标志位

        /*
         * Check the signs of all the operand strides.
         */
        for (iop = 0; iop < nop; ++iop) {
            if (strides[iop] < 0) {
                any_negative = 1;  // 如果有负步长则设置标志位
            }
            else if (strides[iop] != 0) {
                break;  // 如果有正步长则退出循环
            }
        }
        /*
         * If at least one stride is negative and none are positive,
         * flip all the strides for this dimension.
         */
        if (any_negative && iop == nop) {
            npy_intp shapem1 = NAD_SHAPE(axisdata) - 1;  // 获取当前轴的形状-1

            for (istrides = 0; istrides < nstrides; ++istrides) {
                npy_intp stride = strides[istrides];

                /* Adjust the base pointers to start at the end */
                baseoffsets[istrides] += shapem1 * stride;  // 调整基础偏移量
                /* Flip the stride */
                strides[istrides] = -stride;  // 反转步长为负值
            }
            /*
             * Make the perm entry negative so get_multi_index
             * knows it's flipped
             */
            NIT_PERM(iter)[idim] = -1 - NIT_PERM(iter)[idim];  // 设置轴的排列索引为负值以指示反转

            any_flipped = 1;  // 设置有步长被反转的标志位
        }
    }

    /*
     * If any strides were flipped, the base pointers were adjusted
     * in the first AXISDATA, and need to be copied to all the rest
     */
    if (any_flipped) {
        char **resetdataptr = NIT_RESETDATAPTR(iter);  // 获取重置数据指针数组

        for (istrides = 0; istrides < nstrides; ++istrides) {
            resetdataptr[istrides] += baseoffsets[istrides];  // 调整重置数据指针的偏移量
        }
        axisdata = axisdata0;
        for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
            char **ptrs = NAD_PTRS(axisdata);
            for (istrides = 0; istrides < nstrides; ++istrides) {
                ptrs[istrides] = resetdataptr[istrides];  // 设置轴数据指针为重置数据指针
            }
        }
        /*
         * Indicate that some of the perm entries are negative,
         * and that it's not (strictly speaking) the identity perm.
         */
        NIT_ITFLAGS(iter) = (NIT_ITFLAGS(iter) | NPY_ITFLAG_NEGPERM) & ~NPY_ITFLAG_IDENTPERM;  // 更新迭代器的标志位
    }
}

/*
 * Reverse the order of iteration over the axes in the iterator.
 */
static void
npyiter_reverse_axis_ordering(NpyIter *iter)
{
    npy_uint32 itflags = NIT_ITFLAGS(iter);  // 获取迭代器的标志位
    # 获取迭代器中的维度数
    int ndim = NIT_NDIM(iter);
    # 获取迭代器中的操作数
    int nop = NIT_NOP(iter);

    # 声明整型变量和指针变量
    npy_intp i, temp, size;
    npy_intp *first, *last;
    npy_int8 *perm;

    # 计算 AXISDATA 数组的大小并分配空间
    size = NIT_AXISDATA_SIZEOF(itflags, ndim, nop) / NPY_SIZEOF_INTP;
    # 获取 AXISDATA 数组的起始地址
    first = (npy_intp*)NIT_AXISDATA(iter);
    # 计算 AXISDATA 数组的末尾地址
    last = first + (ndim - 1) * size;

    /* This loop reverses the order of the AXISDATA array */
    # 循环：反转 AXISDATA 数组的顺序
    while (first < last) {
        for (i = 0; i < size; ++i) {
            temp = first[i];
            first[i] = last[i];
            last[i] = temp;
        }
        first += size;
        last -= size;
    }

    /* Store the perm we applied */
    # 存储应用的排列顺序
    perm = NIT_PERM(iter);
    for (i = ndim - 1; i >= 0; --i, ++perm) {
        *perm = (npy_int8)i;
    }

    # 清除标志位 NPY_ITFLAG_IDENTPERM
    NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_IDENTPERM;
static inline npy_intp
intp_abs(npy_intp x)
{
    // 返回整数 x 的绝对值
    return (x < 0) ? -x : x;
}

static void
npyiter_find_best_axis_ordering(NpyIter *iter)
{
    // 获取迭代器的标志位
    npy_uint32 itflags = NIT_ITFLAGS(iter);
    // 获取迭代器的维度数
    int idim, ndim = NIT_NDIM(iter);
    // 获取迭代器的操作数数量
    int iop, nop = NIT_NOP(iter);

    npy_intp ax_i0, ax_i1, ax_ipos;
    npy_int8 ax_j0, ax_j1;
    npy_int8 *perm;
    // 获取迭代器的轴数据
    NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
    // 计算轴数据结构体的大小
    npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
    // 判断是否进行了置换
    int permuted = 0;

    // 获取迭代器的轴置换数组
    perm = NIT_PERM(iter);

    /*
     * 进行自定义的稳定插入排序。注意，由于 AXISDATA 是从 C 顺序反转过来的，
     * 这里是按照从最小步幅到最大步幅的顺序进行排序。
     */
    for (ax_i0 = 1; ax_i0 < ndim; ++ax_i0) {
        npy_intp *strides0;

        /* 'ax_ipos' 是 perm[ax_i0] 将要插入的位置 */
        ax_ipos = ax_i0;
        ax_j0 = perm[ax_i0];

        // 获取轴 ax_j0 的步幅数组
        strides0 = NAD_STRIDES(NIT_INDEX_AXISDATA(axisdata, ax_j0));
        for (ax_i1 = ax_i0-1; ax_i1 >= 0; --ax_i1) {
            int ambig = 1, shouldswap = 0;
            npy_intp *strides1;

            ax_j1 = perm[ax_i1];

            // 获取轴 ax_j1 的步幅数组
            strides1 = NAD_STRIDES(NIT_INDEX_AXISDATA(axisdata, ax_j1));

            for (iop = 0; iop < nop; ++iop) {
                if (strides0[iop] != 0 && strides1[iop] != 0) {
                    if (intp_abs(strides1[iop]) <= intp_abs(strides0[iop])) {
                        /*
                         * 即使在不明确的情况下，也要设置交换，因为在不同操作数之间的冲突情况下，C 顺序优先。
                         */
                        shouldswap = 0;
                    }
                    else {
                        /* 只有在仍然不明确的情况下才设置交换 */
                        if (ambig) {
                            shouldswap = 1;
                        }
                    }

                    /*
                     * 已经进行了比较，因此不再是不明确的
                     */
                    ambig = 0;
                }
            }
            /*
             * 如果比较是明确的，则将 'ax_ipos' 移动到 'ax_i1' 或停止查找插入点
             */
            if (!ambig) {
                if (shouldswap) {
                    ax_ipos = ax_i1;
                }
                else {
                    break;
                }
            }
        }

        /* 将 perm[ax_i0] 插入到正确的位置 */
        if (ax_ipos != ax_i0) {
            for (ax_i1 = ax_i0; ax_i1 > ax_ipos; --ax_i1) {
                perm[ax_i1] = perm[ax_i1-1];
            }
            perm[ax_ipos] = ax_j0;
            permuted = 1;
        }
    }

    /* 将计算出的置换应用于 AXISDATA 数组 */
}
    if (permuted == 1) {
        npy_intp i, size = sizeof_axisdata/NPY_SIZEOF_INTP;
        NpyIter_AxisData *ad_i;

        /* Use the index as a flag, set each to 1 */
        ad_i = axisdata;
        for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(ad_i, 1)) {
            NAD_INDEX(ad_i) = 1;
        }
        /* Apply the permutation by following the cycles */
        for (idim = 0; idim < ndim; ++idim) {
            ad_i = NIT_INDEX_AXISDATA(axisdata, idim);

            /* If this axis hasn't been touched yet, process it */
            if (NAD_INDEX(ad_i) == 1) {
                npy_int8 pidim = perm[idim];
                npy_intp tmp;
                NpyIter_AxisData *ad_p, *ad_q;

                if (pidim != idim) {
                    /* Follow the cycle, copying the data */
                    for (i = 0; i < size; ++i) {
                        pidim = perm[idim];
                        ad_q = ad_i;
                        tmp = *((npy_intp*)ad_q + i);
                        while (pidim != idim) {
                            ad_p = NIT_INDEX_AXISDATA(axisdata, pidim);
                            *((npy_intp*)ad_q + i) = *((npy_intp*)ad_p + i);

                            ad_q = ad_p;
                            pidim = perm[(int)pidim];
                        }
                        *((npy_intp*)ad_q + i) = tmp;
                    }
                    /* Follow the cycle again, marking it as done */
                    pidim = perm[idim];

                    while (pidim != idim) {
                        NAD_INDEX(NIT_INDEX_AXISDATA(axisdata, pidim)) = 0;
                        pidim = perm[(int)pidim];
                    }
                }
                NAD_INDEX(ad_i) = 0;
            }
        }
        /* Clear the identity perm flag */
        NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_IDENTPERM;
    }



        /* 如果 permuted 等于 1，则执行以下操作 */
        if (permuted == 1) {
            /* 定义循环变量 i 和 size */
            npy_intp i, size = sizeof_axisdata/NPY_SIZEOF_INTP;
            NpyIter_AxisData *ad_i;

            /* 使用索引作为标志，将每个标志设置为 1 */
            ad_i = axisdata;
            for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(ad_i, 1)) {
                NAD_INDEX(ad_i) = 1;
            }

            /* 根据排列应用置换操作 */
            for (idim = 0; idim < ndim; ++idim) {
                ad_i = NIT_INDEX_AXISDATA(axisdata, idim);

                /* 如果这个轴还没有被处理过，则处理它 */
                if (NAD_INDEX(ad_i) == 1) {
                    npy_int8 pidim = perm[idim];
                    npy_intp tmp;
                    NpyIter_AxisData *ad_p, *ad_q;

                    if (pidim != idim) {
                        /* 按照循环路径复制数据 */
                        for (i = 0; i < size; ++i) {
                            pidim = perm[idim];
                            ad_q = ad_i;
                            tmp = *((npy_intp*)ad_q + i);
                            while (pidim != idim) {
                                ad_p = NIT_INDEX_AXISDATA(axisdata, pidim);
                                *((npy_intp*)ad_q + i) = *((npy_intp*)ad_p + i);

                                ad_q = ad_p;
                                pidim = perm[(int)pidim];
                            }
                            *((npy_intp*)ad_q + i) = tmp;
                        }
                        /* 再次按照循环路径标记为已完成 */
                        pidim = perm[idim];

                        while (pidim != idim) {
                            NAD_INDEX(NIT_INDEX_AXISDATA(axisdata, pidim)) = 0;
                            pidim = perm[(int)pidim];
                        }
                    }
                    NAD_INDEX(ad_i) = 0;
                }
            }
            /* 清除身份置换标志 */
            NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_IDENTPERM;
        }
/*
 * Calculates a dtype that all the types can be promoted to, using the
 * ufunc rules.  If only_inputs is 1, it leaves any operands that
 * are not read from out of the calculation.
 */
static PyArray_Descr *
npyiter_get_common_dtype(int nop, PyArrayObject **op,
                        const npyiter_opitflags *op_itflags, PyArray_Descr **op_dtype,
                        PyArray_Descr **op_request_dtypes,
                        int only_inputs)
{
    int iop;
    npy_intp narrs = 0, ndtypes = 0;
    PyArrayObject *arrs[NPY_MAXARGS];
    PyArray_Descr *dtypes[NPY_MAXARGS];
    PyArray_Descr *ret;

    NPY_IT_DBG_PRINT("Iterator: Getting a common data type from operands\n");

    // 遍历所有操作数
    for (iop = 0; iop < nop; ++iop) {
        // 检查是否存在操作数的 dtype，并且只有在 only_inputs 为 0 或者 op_itflags 表明需要读取时才考虑
        if (op_dtype[iop] != NULL &&
                    (!only_inputs || (op_itflags[iop] & NPY_OP_ITFLAG_READ))) {
            /* 如果没有请求 dtype，并且操作数是标量，将操作数本身传入数组 */
            if ((op_request_dtypes == NULL ||
                            op_request_dtypes[iop] == NULL) &&
                                            PyArray_NDIM(op[iop]) == 0) {
                arrs[narrs++] = op[iop];
            }
            /* 否则，将操作数的 dtype 传入数组 */
            else {
                dtypes[ndtypes++] = op_dtype[iop];
            }
        }
    }

    // 根据收集到的操作数和 dtype 计算共同的数据类型
    if (narrs == 0) {
        npy_intp i;
        ret = dtypes[0];
        for (i = 1; i < ndtypes; ++i) {
            // 如果所有的 dtypes 都相同，选择第一个 dtype
            if (ret != dtypes[i])
                break;
        }
        // 如果所有 dtypes 都相同，且只有一个 dtype 或者该 dtype 是本机字节顺序，增加其引用计数
        if (i == ndtypes) {
            if (ndtypes == 1 || PyArray_ISNBO(ret->byteorder)) {
                Py_INCREF(ret);
            }
            // 否则，根据本机字节顺序创建一个新的 dtype
            else {
                ret = PyArray_DescrNewByteorder(ret, NPY_NATIVE);
            }
        }
        // 否则，根据给定的数组和 dtypes 计算结果 dtype
        else {
            ret = PyArray_ResultType(narrs, arrs, ndtypes, dtypes);
        }
    }
    // 如果有操作数是标量，则根据给定的数组和 dtypes 计算结果 dtype
    else {
        ret = PyArray_ResultType(narrs, arrs, ndtypes, dtypes);
    }

    return ret;
}

/*
 * Allocates a temporary array which can be used to replace op
 * in the iteration.  Its dtype will be op_dtype.
 *
 * The result array has a memory ordering which matches the iterator,
 * which may or may not match that of op.  The parameter 'shape' may be
 * NULL, in which case it is filled in from the iterator's shape.
 *
 * This function must be called before any axes are coalesced.
 */
static PyArrayObject *
npyiter_new_temp_array(NpyIter *iter, PyTypeObject *subtype,
                npy_uint32 flags, npyiter_opitflags *op_itflags,
                int op_ndim, npy_intp const *shape,
                PyArray_Descr *op_dtype, const int *op_axes)
{
    npy_uint32 itflags = NIT_ITFLAGS(iter);
    int idim, ndim = NIT_NDIM(iter);
    int used_op_ndim;
    int nop = NIT_NOP(iter);

    npy_int8 *perm = NIT_PERM(iter);
    npy_intp new_shape[NPY_MAXDIMS], strides[NPY_MAXDIMS];
    npy_intp stride = op_dtype->elsize;
    NpyIter_AxisData *axisdata;
    npy_intp sizeof_axisdata;
    int i;

    PyArrayObject *ret;
    /*
     * 在这里与数组数据类型有交互，通常情况下是有效的。假设您使用一个带有输出数据类型为双精度数组的nditer。
     * 所有标量输入将导致一个形状为(2)的一维输出。在nditer中一切仍然正常工作，因为新维度始终添加到末尾，
     * 它关心开始时发生的情况。
     */

    /* 如果是标量，不需要检查轴 */
    if (op_ndim == 0) {
        // 增加引用计数以避免释放内存
        Py_INCREF(op_dtype);
        // 从描述符创建新的数组对象，没有轴
        ret = (PyArrayObject *)PyArray_NewFromDescr(subtype, op_dtype, 0,
                               NULL, NULL, NULL, 0, NULL);
        // 返回新创建的数组对象
        return ret;
    }

    // 获取轴数据
    axisdata = NIT_AXISDATA(iter);
    // 计算轴数据的大小
    sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);

    /* 初始化步幅为无效值 */
    for (i = 0; i < op_ndim; ++i) {
        strides[i] = NPY_MAX_INTP;
    }

    // 如果操作轴不为空
    if (op_axes != NULL) {
        used_op_ndim = 0;
        // 迭代每个轴
        for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
            npy_bool reduction_axis;

            /* 应用排列以获取原始轴 */
            i = npyiter_undo_iter_axis_perm(idim, ndim, perm, NULL);
            // 获取操作轴
            i = npyiter_get_op_axis(op_axes[i], &reduction_axis);

            /*
             * 如果 i < 0，这是一个新轴（操作数没有它），因此我们可以在这里忽略它。
             * 迭代器设置将已经确保了潜在的减少/广播是有效的。
             */
            if (i >= 0) {
                // 调试信息，设置分配的步幅
                NPY_IT_DBG_PRINT3("Iterator: Setting allocated stride %d "
                                    "for iterator dimension %d to %d\n", (int)i,
                                    (int)idim, (int)stride);
                // 增加已使用的操作轴计数
                used_op_ndim += 1;
                // 设置步幅
                strides[i] = stride;
                // 如果形状为空
                if (shape == NULL) {
                    // 如果是减少轴，长度总是1
                    if (reduction_axis) {
                        new_shape[i] = 1;
                    }
                    else {
                        // 否则获取轴的形状
                        new_shape[i] = NAD_SHAPE(axisdata);
                    }
                    // 更新步幅
                    stride *= new_shape[i];
                    // 如果 i 大于等于轴的数量，报错
                    if (i >= ndim) {
                        PyErr_Format(PyExc_ValueError,
                                "automatically allocated output array "
                                "specified with an inconsistent axis mapping; "
                                "the axis mapping cannot include dimension %d "
                                "which is too large for the iterator dimension "
                                "of %d.", i, ndim);
                        // 返回空指针，表示错误
                        return NULL;
                    }
                }
                else {
                    // 断言：如果是减少轴，形状应该是1
                    assert(!reduction_axis || shape[i] == 1);
                    // 更新步幅
                    stride *= shape[i];
                }
            }
        }
    }
    else {
        // 如果 shape 不为 NULL，则计算并设置新的 strides
        used_op_ndim = ndim;
        for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
            /* Apply the perm to get the original axis */
            // 根据排列 perm，找到原始的轴
            i = npyiter_undo_iter_axis_perm(idim, op_ndim, perm, NULL);

            if (i >= 0) {
                // 调试输出：设置迭代器维度 idim 的已分配步长 i 为 stride
                NPY_IT_DBG_PRINT3("Iterator: Setting allocated stride %d "
                                    "for iterator dimension %d to %d\n", (int)i,
                                    (int)idim, (int)stride);
                strides[i] = stride;
                if (shape == NULL) {
                    // 如果 shape 为空，则使用新计算的 shape
                    new_shape[i] = NAD_SHAPE(axisdata);
                    stride *= new_shape[i];
                }
                else {
                    // 否则使用给定的 shape
                    stride *= shape[i];
                }
            }
        }
    }

    if (shape == NULL) {
        /* If shape was NULL, use the shape we calculated */
        // 如果 shape 为空，则使用之前计算的 new_shape
        op_ndim = used_op_ndim;
        shape = new_shape;
        /*
         * If there's a gap in the array's dimensions, it's an error.
         * For instance, if op_axes [0, 2] is specified, there will a place
         * in the strides array where the value is not set.
         */
        for (i = 0; i < op_ndim; i++) {
            // 如果 strides 中有 NPY_MAX_INTP，表示存在维度映射缺失，报错
            if (strides[i] == NPY_MAX_INTP) {
                PyErr_Format(PyExc_ValueError,
                        "automatically allocated output array "
                        "specified with an inconsistent axis mapping; "
                        "the axis mapping is missing an entry for "
                        "dimension %d.", i);
                return NULL;
            }
        }
    }
    else if (used_op_ndim < op_ndim) {
        /*
         * If custom axes were specified, some dimensions may not have
         * been used. These are additional axes which are ignored in the
         * iterator but need to be handled here.
         */
        // 如果 op_ndim 比 used_op_ndim 大，说明有额外的维度需要处理
        npy_intp factor, itemsize, new_strides[NPY_MAXDIMS];

        /* Fill in the missing strides in C order */
        // 按照 C 顺序填充缺失的步长
        factor = 1;
        itemsize = op_dtype->elsize;
        for (i = op_ndim-1; i >= 0; --i) {
            if (strides[i] == NPY_MAX_INTP) {
                new_strides[i] = factor * itemsize;
                factor *= shape[i];
            }
        }

        /*
         * Copy the missing strides, and multiply the existing strides
         * by the calculated factor.  This way, the missing strides
         * are tighter together in memory, which is good for nested
         * loops.
         */
        // 复制缺失的步长，并且将现有的步长乘以计算得到的因子
        for (i = 0; i < op_ndim; ++i) {
            if (strides[i] == NPY_MAX_INTP) {
                strides[i] = new_strides[i];
            }
            else {
                strides[i] *= factor;
            }
        }
    }

    /* Allocate the temporary array */
    // 分配临时数组
    Py_INCREF(op_dtype);
    ret = (PyArrayObject *)PyArray_NewFromDescr(subtype, op_dtype, op_ndim,
                               shape, strides, NULL, 0, NULL);
    if (ret == NULL) {
        return NULL;
    }
    # 检查 subtype 是否与 PyArray_Type 相同，确保其未修改维度
    if (subtype != &PyArray_Type):
        """
         * TODO: dtype 可能具有子数组，这会添加新的维度到 `ret`，
         *       这通常是可以接受的，但在这个分支中会导致错误。
         """
        # 如果 `ret` 的维度与 op_ndim 不同，或者 shape 与 PyArray_DIMS(ret) 的列表不匹配
        if (PyArray_NDIM(ret) != op_ndim or
                    !PyArray_CompareLists(shape, PyArray_DIMS(ret), op_ndim)):
            # 抛出运行时错误，指出迭代器的自动输出具有修改输出维度的数组子类型
            PyErr_SetString(PyExc_RuntimeError,
                    "Iterator automatic output has an array subtype "
                    "which changed the dimensions of the output")
            # 释放 ret 对象
            Py_DECREF(ret)
            # 返回空指针
            return NULL

    # 返回 ret 对象
    return ret;
}

static int
npyiter_allocate_arrays(NpyIter *iter,
                        npy_uint32 flags,
                        PyArray_Descr **op_dtype, PyTypeObject *subtype,
                        const npy_uint32 *op_flags, npyiter_opitflags *op_itflags,
                        int **op_axes)
{
    npy_uint32 itflags = NIT_ITFLAGS(iter);  // 获取迭代器的标志位信息
    int idim, ndim = NIT_NDIM(iter);  // 获取迭代器操作的维度数目
    int iop, nop = NIT_NOP(iter);  // 获取迭代器操作的操作数目

    int check_writemasked_reductions = 0;  // 初始化检查写入屏蔽约简操作标志

    NpyIter_BufferData *bufferdata = NULL;  // 初始化缓冲数据指针为NULL
    PyArrayObject **op = NIT_OPERANDS(iter);  // 获取迭代器的操作数对象数组

    if (itflags & NPY_ITFLAG_BUFFER) {  // 如果迭代器标志中包含缓冲标志
        bufferdata = NIT_BUFFERDATA(iter);  // 获取迭代器的缓冲数据
    }

    }

    }

    if (check_writemasked_reductions) {  // 如果需要检查写入屏蔽约简操作
        for (iop = 0; iop < nop; ++iop) {  // 遍历所有操作数目
            /*
             * 检查是否有需要验证的写入屏蔽约简操作数
             * 在所有步长填充完毕后进行验证。
             */
            if ((op_itflags[iop] &
                    (NPY_OP_ITFLAG_WRITEMASKED | NPY_OP_ITFLAG_REDUCE)) ==
                        (NPY_OP_ITFLAG_WRITEMASKED | NPY_OP_ITFLAG_REDUCE)) {
                /*
                 * 如果数组掩码比此约简写入屏蔽操作的维度还要“大”，
                 * 结果将是每个约简元素超过一个掩码值，这是无效的。
                 * 此函数提供了这种情况的验证。
                 */
                if (!check_mask_for_writemasked_reduction(iter, iop)) {
                    return 0;  // 如果验证失败，返回0
                }
            }
        }
    }

    return 1;  // 默认返回1，表示分配操作数组成功
}

/*
 * 输入的 __array_priority__ 属性决定了输出数组的子类型。
 * 此函数找到优先级最高的输入数组的子类型。
 */
static void
npyiter_get_priority_subtype(int nop, PyArrayObject **op,
                            const npyiter_opitflags *op_itflags,
                            double *subtype_priority,
                            PyTypeObject **subtype)
{
    int iop;

    for (iop = 0; iop < nop; ++iop) {  // 遍历所有操作数目
        if (op[iop] != NULL && op_itflags[iop] & NPY_OP_ITFLAG_READ) {
            double priority = PyArray_GetPriority((PyObject *)op[iop], 0.0);  // 获取数组对象的优先级
            if (priority > *subtype_priority) {  // 如果优先级高于当前记录的最高优先级
                *subtype_priority = priority;  // 更新最高优先级
                *subtype = Py_TYPE(op[iop]);  // 更新子类型对象
            }
        }
    }
}

static int
npyiter_allocate_transfer_functions(NpyIter *iter)
{
    npy_uint32 itflags = NIT_ITFLAGS(iter);  // 获取迭代器的标志位信息
    /*int ndim = NIT_NDIM(iter);*/  // 注释掉的代码行，不执行
    int iop = 0, nop = NIT_NOP(iter);  // 初始化操作数索引和操作数目

    npy_intp i;
    npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);  // 获取迭代器的操作标志
    NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);  // 获取迭代器的缓冲数据
    NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);  // 获取迭代器的轴数据
    PyArrayObject **op = NIT_OPERANDS(iter);  // 获取迭代器的操作数对象数组
    PyArray_Descr **op_dtype = NIT_DTYPES(iter);  // 获取迭代器的操作数数据类型数组
    npy_intp *strides = NAD_STRIDES(axisdata), op_stride;  // 获取轴数据的步长数组和操作步长
}
    # 将 bufferdata 转换为 NpyIter_TransferInfo 结构体指针
    NpyIter_TransferInfo *transferinfo = NBF_TRANSFERINFO(bufferdata);

    /* combined cast flags, the new cast flags for each cast: */
    # 初始化组合的转换标志为 PyArrayMethod_MINIMAL_FLAGS
    NPY_ARRAYMETHOD_FLAGS cflags = PyArrayMethod_MINIMAL_FLAGS;
    # 新的未使用标志
    NPY_ARRAYMETHOD_FLAGS nc_flags;

    }

    /* Store the combined transfer flags on the iterator */
    # 将组合的转换标志存储到迭代器的标志中
    NIT_ITFLAGS(iter) |= cflags << NPY_ITFLAG_TRANSFERFLAGS_SHIFT;
    # 断言迭代器的标志中存储的转换标志等于 cflags
    assert(NIT_ITFLAGS(iter) >> NPY_ITFLAG_TRANSFERFLAGS_SHIFT == cflags);

    /* If any of the dtype transfer functions needed the API, flag it. */
    # 如果转换标志中包含 NPY_METH_REQUIRES_PYAPI，设置迭代器需要 API 标志
    if (cflags & NPY_METH_REQUIRES_PYAPI) {
        NIT_ITFLAGS(iter) |= NPY_ITFLAG_NEEDSAPI;
    }

    # 返回成功标志
    return 1;
fail:
    # 循环遍历从 0 到 iop+1 的范围
    for (i = 0; i < iop+1; ++i) {
        # 释放 transferinfo[iop].read 的内存
        NPY_cast_info_xfree(&transferinfo[iop].read);
        # 释放 transferinfo[iop].write 的内存
        NPY_cast_info_xfree(&transferinfo[iop].write);
    }
    # 返回 0 表示函数执行失败
    return 0;
}
# 取消定义 NPY_ITERATOR_IMPLEMENTATION_CODE
#undef NPY_ITERATOR_IMPLEMENTATION_CODE

NumPy-源码解析-六十九-

NumPy 源码解析（六十九）

.\numpy\numpy\_core\src\multiarray\multiarraymodule.h

.\numpy\numpy\_core\src\multiarray\nditer_api.c

.\numpy\numpy\_core\src\multiarray\nditer_constr.c

`.\numpy\numpy\_core\src\multiarray\multiarraymodule.h`

`.\numpy\numpy\_core\src\multiarray\nditer_api.c`

`.\numpy\numpy\_core\src\multiarray\nditer_constr.c`