NumPy 源码解析（一）

Contributing to numpy

Reporting issues

When reporting issues please include as much detail as possible about your operating system, numpy version and python version. Whenever possible, please also include a brief, self-contained code example that demonstrates the problem.

If you are reporting a segfault please include a GDB traceback, which you can generate by following these instructions.

Contributing code

Thanks for your interest in contributing code to numpy!

If this is your first time contributing to a project on GitHub, please read through our guide to contributing to numpy
If you have contributed to other projects on GitHub you can go straight to our development workflow

Either way, please be sure to follow our convention for commit messages.

If you are writing new C code, please follow the style described in doc/C_STYLE_GUIDE.

Suggested ways to work on your development version (compile and run the tests without interfering with system packages) are described in doc/source/dev/development_environment.rst.

A note on feature enhancements/API changes

If you are interested in adding a new feature to NumPy, consider submitting your feature proposal to the mailing list, which is the preferred forum for discussing new features and API changes.

`.\numpy\.spin\cmds.py`

# 导入必要的库
import os  # 导入操作系统接口模块
import shutil  # 导入高级文件操作模块
import pathlib  # 导入操作路径的模块
import shutil  # 再次导入高级文件操作模块，实际上不需要多次导入同一个模块
import pathlib  # 再次导入操作路径的模块，实际上不需要多次导入同一个模块
import importlib  # 导入导入模块的库
import subprocess  # 导入子进程管理模块

import click  # 导入命令行解析模块click
from spin import util  # 从spin包中导入util模块
from spin.cmds import meson  # 从spin.cmds包中导入meson模块

# 检查meson git子模块是否存在
curdir = pathlib.Path(__file__).parent  # 获取当前文件的父目录路径
meson_import_dir = curdir.parent / 'vendored-meson' / 'meson' / 'mesonbuild'  # 构造meson子模块的路径
if not meson_import_dir.exists():  # 如果meson子模块路径不存在
    raise RuntimeError(  # 抛出运行时错误
        'The `vendored-meson/meson` git submodule does not exist! ' +  # 错误信息
        'Run `git submodule update --init` to fix this problem.'  # 提示用户修复的建议
    )


def _get_numpy_tools(filename):
    """加载并返回指定文件中的工具模块"""
    filepath = pathlib.Path('tools', filename)  # 创建文件路径对象
    spec = importlib.util.spec_from_file_location(filename.stem, filepath)  # 根据文件路径获取模块的规范
    module = importlib.util.module_from_spec(spec)  # 根据规范创建模块对象
    spec.loader.exec_module(module)  # 执行模块加载
    return module  # 返回加载的模块对象


@click.command()
@click.argument(
    "token",
    required=True
)
@click.argument(
    "revision-range",
    required=True
)
@click.pass_context
def changelog(ctx, token, revision_range):
    """👩 获取提供的版本范围的变更日志

    示例：

    $ spin authors -t $GH_TOKEN --revision-range v1.25.0..v1.26.0
    """
    try:
        from github.GithubException import GithubException  # 导入GitHub异常处理类
        from git.exc import GitError  # 导入Git异常处理类
        changelog = _get_numpy_tools(pathlib.Path('changelog.py'))  # 加载并获取changelog.py中的工具模块
    except ModuleNotFoundError as e:
        raise click.ClickException(
            f"{e.msg}. Install the missing packages to use this command."
        )  # 如果模块未找到，抛出Click异常并提示安装缺失的包
    click.secho(
        f"Generating change log for range {revision_range}",
        bold=True, fg="bright_green",  # 输出消息到控制台，使用明亮绿色加粗显示
    )
    try:
        changelog.main(token, revision_range)  # 调用加载的changelog模块的main函数生成变更日志
    except GithubException as e:
        raise click.ClickException(
            f"GithubException raised with status: {e.status} "
            f"and message: {e.data['message']}"
        )  # 处理GitHub异常并抛出Click异常
    except GitError as e:
        raise click.ClickException(
            f"Git error in command `{' '.join(e.command)}` "
            f"with error message: {e.stderr}"
        )  # 处理Git异常并抛出Click异常


@click.command()
@click.option(
    "-j", "--jobs",
    help="Number of parallel tasks to launch",
    type=int
)
@click.option(
    "--clean", is_flag=True,
    help="Clean build directory before build"
)
@click.option(
    "-v", "--verbose", is_flag=True,
    help="Print all build output, even installation"
)
@click.option(
    "--with-scipy-openblas", type=click.Choice(["32", "64"]),
    default=None,
    help="Build with pre-installed scipy-openblas32 or scipy-openblas64 wheel"
)
@click.argument("meson_args", nargs=-1)
@click.pass_context
def build(ctx, meson_args, with_scipy_openblas, jobs=None, clean=False, verbose=False, quiet=False, *args, **kwargs):
    """🔧 使用Meson/ninja构建并安装软件包

    MESON_ARGS将会传递如下，例如：

    spin build -- -Dpkg_config_path=/lib64/pkgconfig

    软件包将安装到build-install目录

    默认情况下，为发布版本构建，为了能够使用调试器，需要适当设置CFLAGS
    例如，在Linux上使用
    """
    CFLAGS="-O0 -g" spin build
    """
    # 设定编译选项 CFLAGS 为 "-O0 -g" 并执行 spin 构建命令
    # XXX 保持与上游构建的同步
    如果设置了 with_scipy_openblas 参数:
        使用 _config_openblas 函数配置 OpenBLAS
    从参数字典 ctx.params 中移除 "with_scipy_openblas" 键，如果存在的话
    将控制流转发给 meson.build 函数进行后续构建处理
@click.command()
@click.argument("pytest_args", nargs=-1)
@click.option(
    "-m",
    "markexpr",
    metavar='MARKEXPR',
    default="not slow",
    help="Run tests with the given markers"
)
@click.option(
    "-j",
    "n_jobs",
    metavar='N_JOBS',
    default="1",
    help=("Number of parallel jobs for testing. "
          "Can be set to `auto` to use all cores.")
)
@click.option(
    "--tests", "-t",
    metavar='TESTS',
    help=("""
Which tests to run. Can be a module, function, class, or method:

 \b
 numpy.random
 numpy.random.tests.test_generator_mt19937
 numpy.random.tests.test_generator_mt19937::TestMultivariateHypergeometric
 numpy.random.tests.test_generator_mt19937::TestMultivariateHypergeometric::test_edge_cases
 \b
""")
)
@click.option(
    '--verbose', '-v', is_flag=True, default=False
)
@click.pass_context
def docs(ctx, sphinx_target, clean, first_build, jobs, *args, **kwargs):
    """📖 Build Sphinx documentation

    By default, SPHINXOPTS="-W", raising errors on warnings.
    To build without raising on warnings:

      SPHINXOPTS="" spin docs

    To list all Sphinx targets:

      spin docs targets

    To build another Sphinx target:

      spin docs TARGET

    E.g., to build a zipfile of the html docs for distribution:

      spin docs dist

    """
    meson.docs.ignore_unknown_options = True  # 设置忽略未知选项为真

    # See https://github.com/scientific-python/spin/pull/199
    # Can be changed when spin updates to 0.11, and moved to pyproject.toml
    if clean:
        clean_dirs = [  # 清理目录列表
            './doc/build/',
            './doc/source/reference/generated',
            './doc/source/reference/random/bit_generators/generated',
            './doc/source/reference/random/generated',
        ]

        for target_dir in clean_dirs:
            if os.path.isdir(target_dir):
                print(f"Removing {target_dir!r}")  # 打印要移除的目录
                shutil.rmtree(target_dir)  # 递归地移除目录及其内容

    # Run towncrier without staging anything for commit. This is the way to get
    # release notes snippets included in a local doc build.
    cmd = ['towncrier', 'build', '--version', '2.x.y', '--keep', '--draft']
    p = subprocess.run(cmd, check=True, capture_output=True, text=True)
    outfile = curdir.parent / 'doc' / 'source' / 'release' / 'notes-towncrier.rst'
    with open(outfile, 'w') as f:
        f.write(p.stdout)  # 将towncrier的输出写入release notes文件

    ctx.forward(meson.docs)  # 转发给meson.docs命令
# 定义名为 `test` 的函数，用于运行测试
def test(ctx, pytest_args, markexpr, n_jobs, tests, verbose, *args, **kwargs):
    """🔧 Run tests

    PYTEST_ARGS are passed through directly to pytest, e.g.:

      spin test -- --pdb

    To run tests on a directory or file:

     \b
     spin test numpy/linalg
     spin test numpy/linalg/tests/test_linalg.py

    To report the durations of the N slowest tests:

      spin test -- --durations=N

    To run tests that match a given pattern:

     \b
     spin test -- -k "geometric"
     spin test -- -k "geometric and not rgeometric"

    By default, spin will run `-m 'not slow'`. To run the full test suite, use
    `spin -m full`

    For more, see `pytest --help`.
    """  # noqa: E501
    
    # 如果未提供 pytest_args 和 tests 参数，则设置默认值 ('numpy',)
    if (not pytest_args) and (not tests):
        pytest_args = ('numpy',)

    # 如果 pytest_args 中不包含 '-m' 参数，并且 markexpr 不是 "full"，则将 markexpr 添加到 pytest_args 前面
    if '-m' not in pytest_args:
        if markexpr != "full":
            pytest_args = ('-m', markexpr) + pytest_args

    # 如果 n_jobs 不为 "1" 并且 pytest_args 中不包含 '-n' 参数，则将 n_jobs 添加到 pytest_args 前面
    if (n_jobs != "1") and ('-n' not in pytest_args):
        pytest_args = ('-n', str(n_jobs)) + pytest_args

    # 如果提供了 tests 参数，并且 pytest_args 中不包含 '--pyargs' 参数，则将 tests 添加到 pytest_args 前面
    if tests and not ('--pyargs' in pytest_args):
        pytest_args = ('--pyargs', tests) + pytest_args

    # 如果 verbose 为 True，则将 '-v' 参数添加到 pytest_args 前面
    if verbose:
        pytest_args = ('-v',) + pytest_args

    # 将更新后的 pytest_args 存储在上下文对象的 'pytest_args' 键中
    ctx.params['pytest_args'] = pytest_args

    # 删除上下文对象中的 'markexpr', 'n_jobs', 'tests', 'verbose' 参数
    for extra_param in ('markexpr', 'n_jobs', 'tests', 'verbose'):
        del ctx.params[extra_param]
    
    # 将控制流转发到 meson.test 命令
    ctx.forward(meson.test)


# 创建一个命令行接口的命令，用于运行 doctests
@click.command()
@click.argument("pytest_args", nargs=-1)
@click.option(
    "-j",
    "n_jobs",
    metavar='N_JOBS',
    default="1",
    help=("Number of parallel jobs for testing. "
          "Can be set to `auto` to use all cores.")
)
@click.option(
    '--verbose', '-v', is_flag=True, default=False
)
@click.pass_context
def check_docs(ctx, pytest_args, n_jobs, verbose, *args, **kwargs):
    """🔧 Run doctests of objects in the public API.

    PYTEST_ARGS are passed through directly to pytest, e.g.:

      spin check-docs -- --pdb

    To run tests on a directory:

     \b
     spin check-docs numpy/linalg

    To report the durations of the N slowest doctests:

      spin check-docs -- --durations=N

    To run doctests that match a given pattern:

     \b
     spin check-docs -- -k "slogdet"
     spin check-docs numpy/linalg -- -k "det and not slogdet"

    \b
    Note:
    -----

    \b
     - This command only runs doctests and skips everything under tests/
     - This command only doctests public objects: those which are accessible
       from the top-level `__init__.py` file.

    """  # noqa: E501
    
    try:
        # 防止之后出现模块未找到的异常
        import scipy_doctest
    except ModuleNotFoundError as e:
        raise ModuleNotFoundError("scipy-doctest not installed") from e

    # 如果未提供 pytest_args 参数，则设置默认值 ('numpy',)
    if (not pytest_args):
        pytest_args = ('numpy',)

    # 如果 n_jobs 不为 "1" 并且 pytest_args 中不包含 '-n' 参数，则将 n_jobs 添加到 pytest_args 前面
    if (n_jobs != "1") and ('-n' not in pytest_args):
        pytest_args = ('-n', str(n_jobs)) + pytest_args

    # 如果 verbose 为 True，则将 '-v' 参数添加到 pytest_args 前面
    if verbose:
        pytest_args = ('-v',) + pytest_args

    # 此处应继续完成后续的 doctesting 设置，但代码被省略了

    # turn doctesting on:
    # 此处应继续完成后续的 doctesting 设置，但代码被省略了
    # 定义包含 doctest 的参数元组，用于 pytest 的配置
    doctest_args = (
        '--doctest-modules',   # 启用对模块中文档测试的支持
        '--doctest-collect=api'  # 指定仅收集名称包含 'api' 的文档测试
    )
    
    # 将 doctest_args 添加到 pytest_args 中，扩展 pytest 的参数列表
    pytest_args = pytest_args + doctest_args
    
    # 将 pytest_args 赋值给上下文对象 ctx 的 'pytest_args' 键
    ctx.params['pytest_args'] = pytest_args
    
    # 循环处理额外参数列表 ('n_jobs', 'verbose')，从 ctx.params 中删除这些参数
    for extra_param in ('n_jobs', 'verbose'):
        del ctx.params[extra_param]
    
    # 使用 ctx.forward 调用 meson.test 命令，将控制权转发给该命令执行
    ctx.forward(meson.test)
# 定义函数_set_mem_rlimit，用于设置地址空间的资源限制
def _set_mem_rlimit(max_mem=None):
    # 导入资源管理模块和进程管理模块
    import resource
    import psutil

    # 获取系统虚拟内存信息
    mem = psutil.virtual_memory()

    # 如果未指定最大内存限制，则将其设定为系统总内存的70%
    if max_mem is None:
        max_mem = int(mem.total * 0.7)

    # 获取当前地址空间限制
    cur_limit = resource.getrlimit(resource.RLIMIT_AS)
    
    # 如果当前限制大于0，则将最大内存限制设为当前限制和指定限制中较小的一个
    if cur_limit[0] > 0:
        max_mem = min(max_mem, cur_limit[0])

    # 尝试设置地址空间限制为指定的最大内存和当前限制的最大值
    try:
        resource.setrlimit(resource.RLIMIT_AS, (max_mem, cur_limit[1]))
    except ValueError:
        # 在 macOS 上可能会出现异常：当前限制超过最大限制
        pass


# 定义函数_commit_to_sha，用于获取给定提交的 SHA 值
def _commit_to_sha(commit):
    # 运行 git 命令获取指定提交的 SHA 值
    p = util.run(['git', 'rev-parse', commit], output=False, echo=False)
    
    # 如果命令执行返回值不为0，抛出异常
    if p.returncode != 0:
        raise click.ClickException(
            f'Could not find SHA matching commit `{commit}`'
        )

    # 返回命令输出的 SHA 值，并转换为 ASCII 编码的字符串
    return p.stdout.decode('ascii').strip()


# 定义函数_dirty_git_working_dir，用于检查当前 Git 工作目录是否有未提交的更改
def _dirty_git_working_dir():
    # 检查工作目录中是否有变更的文件
    p0 = util.run(['git', 'diff-files', '--quiet'])
    
    # 检查暂存区中是否有已经暂存但未提交的更改
    p1 = util.run(['git', 'diff-index', '--quiet', '--cached', 'HEAD'])

    # 如果工作目录或暂存区有未提交的更改，则返回 True，否则返回 False
    return (p0.returncode != 0 or p1.returncode != 0)


# 定义函数_run_asv，用于执行给定的 ASV 命令
def _run_asv(cmd):
    # 获取当前环境的 PATH 变量
    PATH = os.environ['PATH']
    
    # 指定额外的路径列表，用于包含 CCache 和 F90Cache 的路径
    EXTRA_PATH = os.pathsep.join([
        '/usr/lib/ccache', '/usr/lib/f90cache',
        '/usr/local/lib/ccache', '/usr/local/lib/f90cache'
    ])
    
    # 更新环境变量的 PATH，添加额外的路径
    env = os.environ
    env['PATH'] = f'{EXTRA_PATH}{os.pathsep}{PATH}'

    # 控制 BLAS/LAPACK 线程数，设置为 1
    env['OPENBLAS_NUM_THREADS'] = '1'
    env['MKL_NUM_THREADS'] = '1'

    # 尝试限制内存使用，如果失败则忽略异常（ImportError 或 RuntimeError）
    try:
        _set_mem_rlimit()
    except (ImportError, RuntimeError):
        pass

    # 在 benchmarks 目录下执行给定的命令，使用更新后的环境变量
    util.run(cmd, cwd='benchmarks', env=env)


# 定义 lint 命令，用于运行代码风格检查
@click.command()
@click.option(
    "-b", "--branch",
    metavar='branch',
    default="main",
)
@click.option(
    '--uncommitted',
    is_flag=True,
    default=False,
    required=False,
)
@click.pass_context
def lint(ctx, branch, uncommitted):
    """🔦 Run lint checks on diffs.
    Provide target branch name or `uncommitted` to check changes before committing:

    \b
    Examples:

    \b
    For lint checks of your development brach with `main` or a custom branch:

    \b
    $ spin lint # defaults to main
    $ spin lint --branch custom_branch

    \b
    To check just the uncommitted changes before committing

    \b
    $ spin lint --uncommitted
    """
    try:
        # 获取 numpy 工具包中的 linter.py 模块
        linter = _get_numpy_tools(pathlib.Path('linter.py'))
    except ModuleNotFoundError as e:
        # 如果模块未找到，抛出 Click 异常，并提醒安装依赖
        raise click.ClickException(
            f"{e.msg}. Install using requirements/linter_requirements.txt"
        )

    # 创建 DiffLinter 实例，并运行代码风格检查
    linter.DiffLinter(branch).run_lint(uncommitted)


# 定义一个 lint 命令的子命令，用于运行基准测试
@click.command()
@click.option(
    '--tests', '-t',
    default=None, metavar='TESTS', multiple=True,
    help="Which tests to run"
)
@click.option(
    '--compare', '-c',
    is_flag=True,
    default=False,
    help="Compare benchmarks between the current branch and main "
         "(unless other branches specified). "
         "The benchmarks are each executed in a new isolated "
         "environment."



# 设置帮助信息字符串，用于解释比较当前分支与主分支（除非指定其他分支）的基准性能。
# 每个基准测试在一个新的隔离环境中执行。
@click.option(
    '--verbose', '-v', is_flag=True, default=False
)

# 定义一个命令行选项 `--verbose` 或者 `-v`，是一个布尔标志，默认为 False，用于控制详细输出模式


@click.option(
    '--quick', '-q', is_flag=True, default=False,
    help="Run each benchmark only once (timings won't be accurate)"
)

# 定义一个命令行选项 `--quick` 或者 `-q`，是一个布尔标志，默认为 False，帮助信息说明可以快速运行每个基准测试一次（时间不准确）


@click.argument(
    'commits', metavar='',
    required=False,
    nargs=-1
)

# 定义一个命令行参数 `commits`，没有默认值，可选参数，接受任意数量的参数值，用于指定要比较的提交或分支


@click.pass_context

# 声明一个 Click 上下文对象的装饰器，允许在命令函数中访问和操作上下文对象 `ctx`


def bench(ctx, tests, compare, verbose, quick, commits):

# 定义一个命令行命令 `bench`，接收多个参数：`tests`（要运行的基准测试列表）、`compare`（是否进行比较模式）、`verbose`（是否详细输出）、`quick`（是否快速模式）、`commits`（要比较的提交或分支列表）


if not commits:
    commits = ('main', 'HEAD')
elif len(commits) == 1:
    commits = commits + ('HEAD',)
elif len(commits) > 2:
    raise click.ClickException(
        'Need a maximum of two revisions to compare'
    )

# 如果没有指定 `commits`，默认比较 `'main'` 和 `'HEAD'`；如果只指定了一个提交，则与 `'HEAD'` 比较；如果指定了超过两个提交，则抛出异常。


bench_args = []
for t in tests:
    bench_args += ['--bench', t]

# 构建基准测试参数列表 `bench_args`，每个测试添加 `--bench` 选项，用于指定要运行的基准测试


if verbose:
    bench_args = ['-v'] + bench_args

# 如果 `verbose` 为真，则在 `bench_args` 列表前添加 `-v` 选项，表示启用详细输出模式


if quick:
    bench_args = ['--quick'] + bench_args

# 如果 `quick` 为真，则在 `bench_args` 列表前添加 `--quick` 选项，表示启用快速模式


if not compare:
    # No comparison requested; we build and benchmark the current version

# 如果不需要比较模式，则构建和运行当前版本的基准测试


else:
    # Ensure that we don't have uncommited changes
    commit_a, commit_b = [_commit_to_sha(c) for c in commits]

    if commit_b == 'HEAD' and _dirty_git_working_dir():
        click.secho(
            "WARNING: you have uncommitted changes --- "
            "these will NOT be benchmarked!",
            fg="red"
        )

# 否则，确保没有未提交的更改；将提交或分支转换为 SHA 值，并检查工作目录是否干净


@click.command(context_settings={
    'ignore_unknown_options': True
})

# 声明一个 Click 命令 `python`，设置上下文设置 `ignore_unknown_options` 为 True，允许传递未知选项给 Python


@click.argument("python_args", metavar='', nargs=-1)

# 定义一个命令行参数 `python_args`，没有默认值，接受任意数量的参数值，用于传递给 Python 解释器的选项
    # 运行命令 `spin python -c 'import sys; print(sys.path)'` 来获取 Python 的模块搜索路径
    """
    # 获取当前进程的环境变量
    env = os.environ
    # 设置环境变量中的 'PYTHONWARNINGS' 键的值为 'all'，如果该键不存在则设为 'all'
    env['PYTHONWARNINGS'] = env.get('PYTHONWARNINGS', 'all')
    # 调用 ctx 对象的 forward 方法，传递参数 meson.python，执行相关的操作
    ctx.forward(meson.python)
@click.command(context_settings={
    'ignore_unknown_options': True  # 设置命令上下文，允许忽略未知选项
})
@click.argument("ipython_args", metavar='', nargs=-1)  # 定义命令行参数 ipython_args，接受任意数量参数
@click.pass_context  # 传递上下文对象给函数 ipython
def ipython(ctx, ipython_args):
    """💻 Launch IPython shell with PYTHONPATH set

    OPTIONS are passed through directly to IPython, e.g.:

    spin ipython -i myscript.py
    """
    env = os.environ  # 获取当前环境变量
    env['PYTHONWARNINGS'] = env.get('PYTHONWARNINGS', 'all')  # 设置 PYTHONWARNINGS 环境变量值为 'all'

    ctx.invoke(build)  # 调用 build 命令

    ppath = meson._set_pythonpath()  # 设置 PYTHONPATH

    print(f'💻 Launching IPython with PYTHONPATH="{ppath}"')  # 打印启动 IPython 的信息
    preimport = (r"import numpy as np; "
                 r"print(f'\nPreimported NumPy {np.__version__} as np')")
                 # 预先导入 numpy 库并打印版本信息
    util.run(["ipython", "--ignore-cwd",
              f"--TerminalIPythonApp.exec_lines={preimport}"] +
             list(ipython_args))  # 运行 ipython 命令，并传递参数 ipython_args


@click.command(context_settings={"ignore_unknown_options": True})  # 设置命令上下文，允许忽略未知选项
@click.pass_context  # 传递上下文对象给函数 mypy
def mypy(ctx):
    """🦆 Run Mypy tests for NumPy
    """
    env = os.environ  # 获取当前环境变量
    env['NPY_RUN_MYPY_IN_TESTSUITE'] = '1'  # 设置环境变量 NPY_RUN_MYPY_IN_TESTSUITE 的值为 '1'
    ctx.params['pytest_args'] = [os.path.join('numpy', 'typing')]  # 设置参数 pytest_args，指定目录路径
    ctx.params['markexpr'] = 'full'  # 设置参数 markexpr 的值为 'full'
    ctx.forward(test)  # 转发命令到 test 函数


@click.command(context_settings={
    'ignore_unknown_options': True  # 设置命令上下文，允许忽略未知选项
})
@click.option(
    "--with-scipy-openblas", type=click.Choice(["32", "64"]),  # 定义选项 --with-scipy-openblas，可选值为 "32" 或 "64"
    default=None, required=True,
    help="Build with pre-installed scipy-openblas32 or scipy-openblas64 wheel"
)  # 设置选项说明文档
def config_openblas(with_scipy_openblas):
    """🔧 Create .openblas/scipy-openblas.pc file

    Also create _distributor_init_local.py

    Requires a pre-installed scipy-openblas64 or scipy-openblas32
    """
    _config_openblas(with_scipy_openblas)  # 调用函数 _config_openblas，传递选项值


def _config_openblas(blas_variant):
    import importlib  # 导入 importlib 库
    basedir = os.getcwd()  # 获取当前工作目录
    openblas_dir = os.path.join(basedir, ".openblas")  # 创建 .openblas 目录的路径
    pkg_config_fname = os.path.join(openblas_dir, "scipy-openblas.pc")  # 创建 pkg_config 文件名路径
    if blas_variant:  # 如果存在 blas_variant 参数
        module_name = f"scipy_openblas{blas_variant}"  # 构建模块名
        try:
            openblas = importlib.import_module(module_name)  # 导入指定模块
        except ModuleNotFoundError:
            raise RuntimeError(f"'pip install {module_name} first")  # 如果模块未找到，则引发运行时错误
        local = os.path.join(basedir, "numpy", "_distributor_init_local.py")  # 创建本地文件路径
        with open(local, "wt", encoding="utf8") as fid:
            fid.write(f"import {module_name}\n")  # 写入文件导入模块语句
        os.makedirs(openblas_dir, exist_ok=True)  # 创建 .openblas 目录，如果不存在的话
        with open(pkg_config_fname, "wt", encoding="utf8") as fid:
            fid.write(
                openblas.get_pkg_config(use_preloading=True)  # 获取并写入 pkg_config 文件内容
            )


@click.command()  # 定义命令
@click.option(
    "-v", "--version-override",  # 定义选项 -v 或 --version-override
    help="NumPy version of release",  # 设置选项说明文档
    required=False  # 选项非必需
)
@click.pass_context  # 传递上下文对象给函数 notes
def notes(ctx, version_override):
    """🎉 Generate release notes and validate

    \b
    Example:

    \b
    $ spin notes --version-override 2.0

    \b
    To automatically pick the version

    \b
    $ spin notes
    """
    project_config = util.get_config()  # 获取项目配置信息
    version = version_override or project_config['project.version']  # 获取版本号或从配置中获取
    # 打印消息，生成 NumPy 版本的发布说明
    click.secho(
        f"Generating release notes for NumPy {version}",
        bold=True, fg="bright_green",
    )

    # 检查是否安装了 `towncrier`
    if not shutil.which("towncrier"):
        # 如果未安装，抛出 Click 异常
        raise click.ClickException(
            f"please install `towncrier` to use this command"
        )

    # 打印消息，读取即将发布的变更信息的目录路径
    click.secho(
        f"Reading upcoming changes from {project_config['tool.towncrier.directory']}",
        bold=True, fg="bright_yellow"
    )

    # 准备执行 towncrier 的构建命令
    cmd = ["towncrier", "build", "--version", version, "--yes"]
    # 调用工具函数执行命令，捕获输出
    p = util.run(cmd=cmd, sys_exit=False, output=True, encoding="utf-8")
    # 如果命令返回非零状态码，抛出 Click 异常
    if p.returncode != 0:
        raise click.ClickException(
            f"`towncrier` failed returned {p.returncode} with error `{p.stderr}`"
        )

    # 构建输出路径，用于存储发布说明
    output_path = project_config['tool.towncrier.filename'].format(version=version)
    # 打印消息，发布说明成功写入指定路径
    click.secho(
        f"Release notes successfully written to {output_path}",
        bold=True, fg="bright_yellow"
    )

    # 打印消息，验证所有新闻片段的使用情况
    click.secho(
        "Verifying consumption of all news fragments",
        bold=True, fg="bright_green",
    )

    # 尝试调用 `_get_numpy_tools` 函数来获取 NumPy 工具，用于测试所有新闻片段的使用情况
    try:
        test_notes = _get_numpy_tools(pathlib.Path('ci', 'test_all_newsfragments_used.py'))
    except ModuleNotFoundError as e:
        # 如果模块未找到，抛出 Click 异常，提示安装缺失的包
        raise click.ClickException(
            f"{e.msg}. Install the missing packages to use this command."
        )

    # 调用获取的工具对象的 `main` 方法，执行测试所有新闻片段是否被使用
    test_notes.main()

`.\numpy\benchmarks\asv_pip_nopep517.py`

"""
This file is used by asv_compare.conf.json.tpl.
"""
# 导入所需模块：subprocess 用于执行外部命令，sys 用于系统相关操作
import subprocess, sys

# 定义命令列表，使用当前 Python 解释器执行 pip wheel 命令，禁用 pep517 插件
cmd = [sys.executable, '-mpip', 'wheel', '--no-use-pep517']

# 尝试执行命令并捕获输出，将标准错误输出重定向到标准输出
try:
    output = subprocess.check_output(cmd, stderr=subprocess.STDOUT, text=True)
except Exception as e:
    # 如果命令执行失败，则将异常输出转换成字符串
    output = str(e.output)

# 检查输出中是否包含 "no such option" 字符串，用于判断是否是旧版本的 pip
if "no such option" in output:
    # 如果是旧版本的 pip，则打印提示信息，并移除命令列表中的 '--no-use-pep517' 选项
    print("old version of pip, escape '--no-use-pep517'")
    cmd.pop()

# 执行命令及其后续参数，使用 subprocess.run() 函数
subprocess.run(cmd + sys.argv[1:])

`.\numpy\benchmarks\benchmarks\bench_app.py`

from .common import Benchmark  # 导入Benchmark类，来自.common模块

import numpy as np  # 导入NumPy库，用于科学计算

class LaplaceInplace(Benchmark):
    params = ['inplace', 'normal']  # 参数列表，用于测试两种更新方式
    param_names = ['update']  # 参数名称列表，表示参数含义

    def setup(self, update):
        N = 150  # 网格尺寸
        Niter = 1000  # 迭代次数
        dx = 0.1  # x方向步长
        dy = 0.1  # y方向步长
        dx2 = (dx * dx)  # x方向步长的平方
        dy2 = (dy * dy)  # y方向步长的平方

        def num_update(u, dx2, dy2):
            u[1:(-1), 1:(-1)] = ((((u[2:, 1:(-1)] + u[:(-2), 1:(-1)]) * dy2) +
                                  ((u[1:(-1), 2:] + u[1:(-1), :(-2)]) * dx2))
                                 / (2 * (dx2 + dy2)))
            # Laplace更新函数，计算每个网格点的新值

        def num_inplace(u, dx2, dy2):
            tmp = u[:(-2), 1:(-1)].copy()
            np.add(tmp, u[2:, 1:(-1)], out=tmp)
            np.multiply(tmp, dy2, out=tmp)
            tmp2 = u[1:(-1), 2:].copy()
            np.add(tmp2, u[1:(-1), :(-2)], out=tmp2)
            np.multiply(tmp2, dx2, out=tmp2)
            np.add(tmp, tmp2, out=tmp)
            np.multiply(tmp, (1.0 / (2.0 * (dx2 + dy2))),
                        out=u[1:(-1), 1:(-1)])
            # 原地更新的 Laplace 函数，优化内存使用和性能

        def laplace(N, Niter=100, func=num_update, args=()):
            u = np.zeros([N, N], order='C')  # 创建大小为N*N的零数组
            u[0] = 1  # 设置初始条件
            for i in range(Niter):
                func(u, *args)  # 执行指定的更新函数
            return u  # 返回更新后的数组

        func = {'inplace': num_inplace, 'normal': num_update}[update]  # 根据参数选择更新函数

        def run():
            laplace(N, Niter, func, args=(dx2, dy2))  # 运行 Laplace 方程求解函数

        self.run = run  # 将运行函数绑定到类的实例变量

    def time_it(self, update):
        self.run()  # 执行运行函数


class MaxesOfDots(Benchmark):
    def setup(self):
        np.random.seed(1)  # 设置随机种子，确保结果可重复
        nsubj = 5  # 数据集数量
        nfeat = 100  # 特征数量
        ntime = 200  # 时间步数

        self.arrays = [np.random.normal(size=(ntime, nfeat))
                       for i in range(nsubj)]
        # 创建包含随机数据的数组列表

    def maxes_of_dots(self, arrays):
        """
        计算每个数据集中每个特征的特征分数

        :ref:`Haxby et al., Neuron (2011) <HGC+11>`.
        如果在计算前对数组进行了列方向的标准化（zscore-d），结果将表现为每个数组中每个列与其他数组中相应列的最大相关性之和。

        数组只需要在第一维上一致。

        NumPy 使用这个函数来同时评估 1) 点积 和 2) max(<array>, axis=<int>) 的性能。
        """
        feature_scores = ([0] * len(arrays))  # 初始化特征分数列表
        for (i, sd) in enumerate(arrays):
            for (j, sd2) in enumerate(arrays[(i + 1):]):
                corr_temp = np.dot(sd.T, sd2)  # 计算两个数组之间的点积
                feature_scores[i] += np.max(corr_temp, axis=1)  # 计算每列最大值的和
                feature_scores[((j + i) + 1)] += np.max(corr_temp, axis=0)  # 计算每行最大值的和
        return feature_scores  # 返回特征分数列表

    def time_it(self):
        self.maxes_of_dots(self.arrays)  # 执行特征分数计算函数

`.\numpy\benchmarks\benchmarks\bench_array_coercion.py`

# 从.common模块导入Benchmark类，用于性能基准测试
from .common import Benchmark
# 导入NumPy库，用于数组操作
import numpy as np

# 定义ArrayCoercionSmall类，继承Benchmark类，用于数组类型转换的详细性能基准测试
class ArrayCoercionSmall(Benchmark):
    # 参数列表，包含多种类型的数组或类数组对象，用于不同的性能测试
    params = [[range(3), [1], 1, np.array([5], dtype=np.int64), np.int64(5)]]
    # 参数名列表，描述params中各参数的含义
    param_names = ['array_like']
    # 定义一个np.int64类型的数据类型对象
    int64 = np.dtype(np.int64)

    # 测试函数：测试在使用无效关键字参数时调用np.array(array_like)的性能
    def time_array_invalid_kwarg(self, array_like):
        try:
            np.array(array_like, ndmin="not-integer")
        except TypeError:
            pass

    # 测试函数：测试调用np.array(array_like)的性能
    def time_array(self, array_like):
        np.array(array_like)

    # 测试函数：测试在使用非关键字参数（如dtype=self.int64）时调用np.array(array_like)的性能
    def time_array_dtype_not_kwargs(self, array_like):
        np.array(array_like, self.int64)

    # 测试函数：测试在使用copy=None参数时调用np.array(array_like)的性能
    def time_array_no_copy(self, array_like):
        np.array(array_like, copy=None)

    # 测试函数：测试在使用subok=True参数时调用np.array(array_like)的性能
    def time_array_subok(self, array_like):
        np.array(array_like, subok=True)

    # 测试函数：测试在使用多个关键字参数时调用np.array(array_like)的性能
    def time_array_all_kwargs(self, array_like):
        np.array(array_like, dtype=self.int64, copy=None, order="F",
                 subok=False, ndmin=2)

    # 测试函数：测试调用np.asarray(array_like)的性能
    def time_asarray(self, array_like):
        np.asarray(array_like)

    # 测试函数：测试在使用dtype=self.int64参数时调用np.asarray(array_like)的性能
    def time_asarray_dtype(self, array_like):
        np.asarray(array_like, dtype=self.int64)

    # 测试函数：测试在使用dtype=self.int64和order="F"参数时调用np.asarray(array_like)的性能
    def time_asarray_dtype_order(self, array_like):
        np.asarray(array_like, dtype=self.int64, order="F")

    # 测试函数：测试调用np.asanyarray(array_like)的性能
    def time_asanyarray(self, array_like):
        np.asanyarray(array_like)

    # 测试函数：测试在使用dtype=self.int64参数时调用np.asanyarray(array_like)的性能
    def time_asanyarray_dtype(self, array_like):
        np.asanyarray(array_like, dtype=self.int64)

    # 测试函数：测试在使用dtype=self.int64和order="F"参数时调用np.asanyarray(array_like)的性能
    def time_asanyarray_dtype_order(self, array_like):
        np.asanyarray(array_like, dtype=self.int64, order="F")

    # 测试函数：测试调用np.ascontiguousarray(array_like)的性能
    def time_ascontiguousarray(self, array_like):
        np.ascontiguousarray(array_like)

`.\numpy\benchmarks\benchmarks\bench_clip.py`

# 从.common模块中导入Benchmark类
from .common import Benchmark
# 导入NumPy库并使用别名np
import numpy as np

# 定义ClipFloat类，继承Benchmark类
class ClipFloat(Benchmark):
    # 参数名列表，包括dtype和size
    param_names = ["dtype", "size"]
    # 参数的可能取值，dtype包括np.float32, np.float64, np.longdouble；size包括100和100000
    params = [
        [np.float32, np.float64, np.longdouble],
        [100, 100_000]
    ]

    # 设置方法，在每次运行之前调用，初始化数据
    def setup(self, dtype, size):
        # 使用随机种子创建随机状态对象rnd
        rnd = np.random.RandomState(994584855)
        # 创建指定dtype和size的随机数组，转换为指定dtype
        self.array = rnd.random(size=size).astype(dtype)
        # 创建与self.array相同形状和dtype的全0.5数组，并赋值给self.dataout
        self.dataout = np.full_like(self.array, 0.5)

    # 时间测量方法，用于测量np.clip方法的运行时间
    def time_clip(self, dtype, size):
        # 对self.array数组中的值进行裁剪，裁剪范围是0.125到0.875，并将结果存入self.dataout中
        np.clip(self.array, 0.125, 0.875, self.dataout)


# 定义ClipInteger类，同样继承自Benchmark类
class ClipInteger(Benchmark):
    # 参数名列表，同样包括dtype和size
    param_names = ["dtype", "size"]
    # 参数的可能取值，dtype包括np.int32和np.int64；size包括100和100000
    params = [
        [np.int32, np.int64],
        [100, 100_000]
    ]

    # 设置方法，初始化数据
    def setup(self, dtype, size):
        # 使用随机种子创建随机状态对象rnd
        rnd = np.random.RandomState(1301109903)
        # 创建指定dtype和size的随机整数数组，数值范围在0到255之间
        self.array = rnd.randint(256, size=size, dtype=dtype)
        # 创建与self.array相同形状和dtype的全128数组，并赋值给self.dataout
        self.dataout = np.full_like(self.array, 128)

    # 时间测量方法，用于测量np.clip方法的运行时间
    def time_clip(self, dtype, size):
        # 对self.array数组中的值进行裁剪，裁剪范围是32到224，并将结果存入self.dataout中
        np.clip(self.array, 32, 224, self.dataout)

`.\numpy\benchmarks\benchmarks\bench_core.py`

# 从.common模块导入Benchmark类
from .common import Benchmark

# 导入NumPy库并使用np作为别名
import numpy as np

# 定义Core类，继承Benchmark类
class Core(Benchmark):
    
    # 设置初始化方法
    def setup(self):
        # 创建长度为100的整数范围对象并赋值给self.l100
        self.l100 = range(100)
        # 创建长度为50的整数范围对象并赋值给self.l50
        self.l50 = range(50)
        # 创建包含1000个浮点数的列表并赋值给self.float_l1000
        self.float_l1000 = [float(i) for i in range(1000)]
        # 使用NumPy创建包含1000个np.float64类型浮点数的列表并赋值给self.float64_l1000
        self.float64_l1000 = [np.float64(i) for i in range(1000)]
        # 创建包含1000个整数的列表并赋值给self.int_l1000
        self.int_l1000 = list(range(1000))
        # 创建包含两个长度为1000的NumPy数组的列表并赋值给self.l
        self.l = [np.arange(1000), np.arange(1000)]
        # 创建self.l中每个数组的内存视图组成的列表并赋值给self.l_view
        self.l_view = [memoryview(a) for a in self.l]
        # 创建一个全为1的10x10的NumPy数组并赋值给self.l10x10
        self.l10x10 = np.ones((10, 10))
        # 创建np.float64类型的NumPy数据类型对象并赋值给self.float64_dtype
        self.float64_dtype = np.dtype(np.float64)

    # 定义time_array_1方法
    def time_array_1(self):
        # 创建包含单个元素1的NumPy数组
        np.array(1)

    # 定义time_array_empty方法
    def time_array_empty(self):
        # 创建空的NumPy数组
        np.array([])

    # 定义time_array_l1方法
    def time_array_l1(self):
        # 使用self.l100创建包含单个元素1的NumPy数组
        np.array([1])

    # 定义time_array_l100方法
    def time_array_l100(self):
        # 使用self.l100创建NumPy数组
        np.array(self.l100)

    # 定义time_array_float_l1000方法
    def time_array_float_l1000(self):
        # 使用self.float_l1000创建NumPy数组
        np.array(self.float_l1000)

    # 定义time_array_float_l1000_dtype方法
    def time_array_float_l1000_dtype(self):
        # 使用self.float_l1000和self.float64_dtype创建NumPy数组
        np.array(self.float_l1000, dtype=self.float64_dtype)

    # 定义time_array_float64_l1000方法
    def time_array_float64_l1000(self):
        # 使用self.float64_l1000创建NumPy数组
        np.array(self.float64_l1000)

    # 定义time_array_int_l1000方法
    def time_array_int_l1000(self):
        # 使用self.int_l1000创建NumPy数组
        np.array(self.int_l1000)

    # 定义time_array_l方法
    def time_array_l(self):
        # 使用self.l创建NumPy数组
        np.array(self.l)

    # 定义time_array_l_view方法
    def time_array_l_view(self):
        # 使用self.l_view创建NumPy数组
        np.array(self.l_view)

    # 定义time_can_cast方法
    def time_can_cast(self):
        # 检查是否可以将self.l10x10转换为self.float64_dtype类型
        np.can_cast(self.l10x10, self.float64_dtype)

    # 定义time_can_cast_same_kind方法
    def time_can_cast_same_kind(self):
        # 在相同种类转换下检查是否可以将self.l10x10转换为self.float64_dtype类型
        np.can_cast(self.l10x10, self.float64_dtype, casting="same_kind")

    # 定义time_vstack_l方法
    def time_vstack_l(self):
        # 垂直堆叠self.l中的NumPy数组
        np.vstack(self.l)

    # 定义time_hstack_l方法
    def time_hstack_l(self):
        # 水平堆叠self.l中的NumPy数组
        np.hstack(self.l)

    # 定义time_dstack_l方法
    def time_dstack_l(self):
        # 深度堆叠self.l中的NumPy数组
        np.dstack(self.l)

    # 定义time_arange_100方法
    def time_arange_100(self):
        # 创建包含100个元素的NumPy数组
        np.arange(100)

    # 定义time_zeros_100方法
    def time_zeros_100(self):
        # 创建包含100个0的NumPy数组
        np.zeros(100)

    # 定义time_ones_100方法
    def time_ones_100(self):
        # 创建包含100个1的NumPy数组
        np.ones(100)

    # 定义time_empty_100方法
    def time_empty_100(self):
        # 创建长度为100的空NumPy数组
        np.empty(100)

    # 定义time_empty_like方法
    def time_empty_like(self):
        # 创建与self.l10x10具有相同形状的空NumPy数组
        np.empty_like(self.l10x10)

    # 定义time_eye_100方法
    def time_eye_100(self):
        # 创建100x100的单位矩阵（对角线元素为1，其余为0）
        np.eye(100)

    # 定义time_identity_100方法
    def time_identity_100(self):
        # 创建100x100的单位矩阵
        np.identity(100)

    # 定义time_eye_3000方法
    def time_eye_3000(self):
        # 创建3000x3000的单位矩阵
        np.eye(3000)

    # 定义time_identity_3000方法
    def time_identity_3000(self):
        # 创建3000x3000的单位矩阵
        np.identity(3000)

    # 定义time_diag_l100方法
    def time_diag_l100(self):
        # 创建包含self.l100对角线元素的对角矩阵
        np.diag(self.l100)

    # 定义time_diagflat_l100方法
    def time_diagflat_l100(self):
        # 创建包含self.l100扁平化对角元素的数组
        np.diagflat(self.l100)

    # 定义time_diagflat_l50_l50方法
    def time_diagflat_l50_l50(self):
        # 创建包含两个self.l50的对角矩阵的扁平化数组
        np.diagflat([self.l50, self.l50])

    # 定义time_triu_l10x10方法
    def time_triu_l10x10(self):
        # 返回self.l10x10的上三角部分
        np.triu(self.l10x10)

    # 定义time_tril_l10x10方法
    def time_tril_l10x10(self):
        # 返回self.l10x10的下三角部分
        np.tril(self.l10x10)

    # 定义time_triu_indices_500方法
    def time_triu_indices_500(self):
        # 返回一个包含500个元素的上三角形状的索引数组
        np.triu_indices(500)

    # 定义time_tril_indices_500方法
    def time_tril_indices_500(self):
        # 返回一个包含500个元素的下三角形状的索引数组
        np.tril_indices(500)


# 定义Temporaries类，继承Benchmark类
class Temporaries(Benchmark):
    
    # 设置初始化方法
    def setup(self):
        # 创建包含50000个1的NumPy数组并赋值给self.amid
        self.amid = np.ones(50000)
        # 创建包含50000个1的NumPy数组并赋值给self.bmid
        self.bmid = np.ones(50000)
        # 创建包含1000000个1的NumPy数组并赋值给self.alarge
        self.alarge = np.ones(1000000)
        # 创建包含1000000个1的NumPy数组并赋值给self.blarge
        self.blarge = np.ones(1000000)

    # 定义time_mid方法
    def time_mid(self):
        # 计算self
    # 定义一个参数列表，包含三个子列表，每个子列表代表一组参数
    params = [[50, 1000, int(1e5)],
              [10, 100, 1000, int(1e4)],
              ['valid', 'same', 'full']]
    
    # 定义一个参数名称列表，对应于参数列表中的每个参数
    param_names = ['size1', 'size2', 'mode']
    
    # 定义一个类，包含三个方法用于性能测试
    class ClassName:
        # 设置方法，用于初始化数据
        def setup(self, size1, size2, mode):
            # 创建一个等间隔的数组，范围在0到1之间，元素个数为size1
            self.x1 = np.linspace(0, 1, num=size1)
            # 创建一个余弦函数的数组，元素个数为size2
            self.x2 = np.cos(np.linspace(0, 2*np.pi, num=size2))
    
        # 性能测试方法，用于测试np.correlate函数的运行时间
        def time_correlate(self, size1, size2, mode):
            # 调用np.correlate函数，计算x1和x2的相关性，使用指定的mode参数
            np.correlate(self.x1, self.x2, mode=mode)
    
        # 性能测试方法，用于测试np.convolve函数的运行时间
        def time_convolve(self, size1, size2, mode):
            # 调用np.convolve函数，计算x1和x2的卷积，使用指定的mode参数
            np.convolve(self.x1, self.x2, mode=mode)
class CountNonzero(Benchmark):
    # 参数名列表，包括numaxes（轴数）、size（大小）、dtype（数据类型）
    param_names = ['numaxes', 'size', 'dtype']
    # 参数的取值范围：numaxes为1, 2, 3；size为100, 10000, 1000000；dtype为bool, np.int8, np.int16, np.int32, np.int64, str, object
    params = [
        [1, 2, 3],
        [100, 10000, 1000000],
        [bool, np.int8, np.int16, np.int32, np.int64, str, object]
    ]

    # 设置函数，初始化测试所需的数据
    def setup(self, numaxes, size, dtype):
        # 创建一个数组，形状为numaxes * size，并填充0到(numaxes * size - 1)的值
        self.x = np.arange(numaxes * size).reshape(numaxes, size)
        # 将数组元素对3取模后转换为指定的数据类型dtype
        self.x = (self.x % 3).astype(dtype)

    # 测试函数：计算数组self.x中非零元素的数量
    def time_count_nonzero(self, numaxes, size, dtype):
        np.count_nonzero(self.x)

    # 测试函数：计算数组self.x沿最后一个轴（self.x.ndim - 1）中非零元素的数量
    def time_count_nonzero_axis(self, numaxes, size, dtype):
        np.count_nonzero(self.x, axis=self.x.ndim - 1)

    # 测试函数：如果数组self.x的维度大于等于2，计算数组沿倒数第二个和最后一个轴中非零元素的数量
    def time_count_nonzero_multi_axis(self, numaxes, size, dtype):
        if self.x.ndim >= 2:
            np.count_nonzero(self.x, axis=(
                self.x.ndim - 1, self.x.ndim - 2))


class PackBits(Benchmark):
    # 参数名列表，只有dtype（数据类型）
    param_names = ['dtype']
    # 参数的取值范围：dtype为bool或者np.uintp
    params = [[bool, np.uintp]]
    
    # 设置函数，初始化测试所需的数据
    def setup(self, dtype):
        # 创建一个长度为10000的全为1的数组，数据类型为dtype
        self.d = np.ones(10000, dtype=dtype)
        # 创建一个形状为(200, 1000)的全为1的数组，数据类型为dtype
        self.d2 = np.ones((200, 1000), dtype=dtype)

    # 测试函数：对数组self.d进行packbits压缩
    def time_packbits(self, dtype):
        np.packbits(self.d)

    # 测试函数：对数组self.d进行packbits压缩，使用小端字节顺序
    def time_packbits_little(self, dtype):
        np.packbits(self.d, bitorder="little")

    # 测试函数：对数组self.d2按axis=0进行packbits压缩
    def time_packbits_axis0(self, dtype):
        np.packbits(self.d2, axis=0)

    # 测试函数：对数组self.d2按axis=1进行packbits压缩
    def time_packbits_axis1(self, dtype):
        np.packbits(self.d2, axis=1)


class UnpackBits(Benchmark):
    # 设置函数，初始化测试所需的数据
    def setup(self):
        # 创建一个长度为10000的全为1的数组，数据类型为uint8
        self.d = np.ones(10000, dtype=np.uint8)
        # 创建一个形状为(200, 1000)的全为1的数组，数据类型为uint8
        self.d2 = np.ones((200, 1000), dtype=np.uint8)

    # 测试函数：对数组self.d进行unpackbits解压
    def time_unpackbits(self):
        np.unpackbits(self.d)

    # 测试函数：对数组self.d进行unpackbits解压，使用小端字节顺序
    def time_unpackbits_little(self):
        np.unpackbits(self.d, bitorder="little")

    # 测试函数：对数组self.d2按axis=0进行unpackbits解压
    def time_unpackbits_axis0(self):
        np.unpackbits(self.d2, axis=0)

    # 测试函数：对数组self.d2按axis=1进行unpackbits解压
    def time_unpackbits_axis1(self):
        np.unpackbits(self.d2, axis=1)

    # 测试函数：对数组self.d2按axis=1进行unpackbits解压，使用小端字节顺序
    def time_unpackbits_axis1_little(self):
        np.unpackbits(self.d2, bitorder="little", axis=1)


class Indices(Benchmark):
    # 测试函数：生成一个形状为(1000, 500)的索引数组
    def time_indices(self):
        np.indices((1000, 500))


class StatsMethods(Benchmark):
    # 参数名列表，包括dtype（数据类型）和size（大小）
    param_names = ['dtype', 'size']
    # 参数的取值范围：dtype为'int64', 'uint64', 'float32', 'float64', 'complex64', 'bool_'；size为100或10000
    params = [['int64', 'uint64', 'float32', 'float64',
               'complex64', 'bool_'],
              [100, 10000]]

    # 设置函数，初始化测试所需的数据
    def setup(self, dtype, size):
        # 创建一个长度为size的全为1的数组，数据类型为dtype
        self.data = np.ones(size, dtype=dtype)
        # 如果数据类型dtype以'complex'开头，则创建一个随机复数数组
        if dtype.startswith('complex'):
            self.data = np.random.randn(size) + 1j * np.random.randn(size)

    # 测试函数：计算数组self.data的最小值
    def time_min(self, dtype, size):
        self.data.min()

    # 测试函数：计算数组self.data的最大值
    def time_max(self, dtype, size):
        self.data.max()

    # 测试函数：计算数组self.data的均值
    def time_mean(self, dtype, size):
        self.data.mean()

    # 测试函数：计算数组self.data的标准差
    def time_std(self, dtype, size):
        self.data.std()

    # 测试函数：计算数组self.data的乘积
    def time_prod(self, dtype, size):
        self.data.prod()

    # 测试函数：计算数组self.data的方差
    def time_var(self, dtype, size):
        self.data.var()

    # 测试函数：计算数组self.data的总和
    def time_sum(self, dtype, size):
        self.data.sum()


class NumPyChar(Benchmark):
    # 这里省略了具体的实现，需要根据实际情况添加注释
    # 设置函数，初始化类的实例变量
    def setup(self):
        # 创建包含两个长字符串的 NumPy 数组 A
        self.A = np.array([100*'x', 100*'y'])
        # 创建包含 1000 个 'aa' 字符串的 NumPy 数组 B
        self.B = np.array(1000 * ['aa'])

        # 创建包含三个字符串的 NumPy 数组 C，每个字符串都很长
        self.C = np.array([100*'x' + 'z', 100*'y' + 'z' + 'y', 100*'x'])
        # 创建包含 2000 个字符串的 NumPy 数组 D，一半是 'ab'，一半是 'ac'
        self.D = np.array(1000 * ['ab'] + 1000 * ['ac'])

    # 测试 isalpha 函数对于 A 数组的性能
    def time_isalpha_small_list_big_string(self):
        np.char.isalpha(self.A)

    # 测试 isalpha 函数对于 B 数组的性能
    def time_isalpha_big_list_small_string(self):
        np.char.isalpha(self.B)

    # 测试 add 函数对于 A 数组的性能
    def time_add_small_list_big_string(self):
        np.char.add(self.A, self.A)

    # 测试 add 函数对于 B 数组的性能
    def time_add_big_list_small_string(self):
        np.char.add(self.B, self.B)

    # 测试 find 函数对于 C 数组的性能
    def time_find_small_list_big_string(self):
        np.char.find(self.C, 'z')

    # 测试 find 函数对于 D 数组的性能
    def time_find_big_list_small_string(self):
        np.char.find(self.D, 'b')

    # 测试 startswith 函数对于 A 数组的性能
    def time_startswith_small_list_big_string(self):
        np.char.startswith(self.A, 'x')

    # 测试 startswith 函数对于 B 数组的性能
    def time_startswith_big_list_small_string(self):
        np.char.startswith(self.B, 'a')

`.\numpy\benchmarks\benchmarks\bench_creation.py`

# 从当前包中导入Benchmark类、TYPES1常量和get_squares_函数
from .common import Benchmark, TYPES1, get_squares_

# 导入NumPy库，并使用np作为别名
import numpy as np

# Benchmark类的子类，用于测量meshgrid生成的性能
class MeshGrid(Benchmark):
    """ Benchmark meshgrid generation
    """
    # 参数化设置：
    # size：两个整数列表 [16, 32]
    # ndims：三个整数列表 [2, 3, 4]
    # ind：两个字符串列表 ['ij', 'xy']
    # ndtype：TYPES1常量
    params = [[16, 32],
              [2, 3, 4],
              ['ij', 'xy'], TYPES1]
    # 参数名称：
    # size - size参数的名称
    # ndims - ndims参数的名称
    # ind - ind参数的名称
    # ndtype - ndtype参数的名称
    param_names = ['size', 'ndims', 'ind', 'ndtype']
    # 设置超时时间为10秒
    timeout = 10

    # 初始化函数，设置网格维度数组
    def setup(self, size, ndims, ind, ndtype):
        # 使用种子值1864768776创建随机状态对象rnd
        rnd = np.random.RandomState(1864768776)
        # 生成ndims个网格维度数组，每个数组长度为size，元素类型为ndtype
        self.grid_dims = [(rnd.random_sample(size)).astype(ndtype) for
                          x in range(ndims)]

    # 测量meshgrid函数运行时间的函数
    def time_meshgrid(self, size, ndims, ind, ndtype):
        # 调用NumPy的meshgrid函数生成网格
        np.meshgrid(*self.grid_dims, indexing=ind)


# Benchmark类的子类，用于测量创建函数的性能
class Create(Benchmark):
    """ Benchmark for creation functions
    """
    # 参数化设置：
    # shape：三个不同形状的列表 [16, 512, (32, 32)]
    # npdtypes：TYPES1常量
    params = [[16, 512, (32, 32)],
              TYPES1]
    # 参数名称：
    # shape - shape参数的名称
    # npdtypes - npdtypes参数的名称
    param_names = ['shape', 'npdtypes']
    # 设置超时时间为10秒
    timeout = 10

    # 初始化函数，获取方形值并设置xarg变量
    def setup(self, shape, npdtypes):
        # 调用get_squares_函数获取方形值字典
        values = get_squares_()
        # 从字典中获取npdtypes对应的方形值数组，并将其作为xarg保存
        self.xarg = values.get(npdtypes)[0]

    # 测量使用np.full函数创建数组的运行时间
    def time_full(self, shape, npdtypes):
        # 使用NumPy的full函数创建指定形状、填充值和数据类型的数组
        np.full(shape, self.xarg[1], dtype=npdtypes)

    # 测量使用np.full_like函数创建数组的运行时间
    def time_full_like(self, shape, npdtypes):
        # 使用NumPy的full_like函数根据现有数组创建形状相同、填充值相同的新数组
        np.full_like(self.xarg, self.xarg[0])

    # 测量使用np.ones函数创建数组的运行时间
    def time_ones(self, shape, npdtypes):
        # 使用NumPy的ones函数创建指定形状、数据类型的全1数组
        np.ones(shape, dtype=npdtypes)

    # 测量使用np.ones_like函数创建数组的运行时间
    def time_ones_like(self, shape, npdtypes):
        # 使用NumPy的ones_like函数根据现有数组创建形状相同、元素值为1的新数组
        np.ones_like(self.xarg)

    # 测量使用np.zeros函数创建数组的运行时间
    def time_zeros(self, shape, npdtypes):
        # 使用NumPy的zeros函数创建指定形状、数据类型的全0数组
        np.zeros(shape, dtype=npdtypes)

    # 测量使用np.zeros_like函数创建数组的运行时间
    def time_zeros_like(self, shape, npdtypes):
        # 使用NumPy的zeros_like函数根据现有数组创建形状相同、元素值为0的新数组
        np.zeros_like(self.xarg)

    # 测量使用np.empty函数创建数组的运行时间
    def time_empty(self, shape, npdtypes):
        # 使用NumPy的empty函数创建指定形状、数据类型的未初始化数组
        np.empty(shape, dtype=npdtypes)

    # 测量使用np.empty_like函数创建数组的运行时间
    def time_empty_like(self, shape, npdtypes):
        # 使用NumPy的empty_like函数根据现有数组创建形状相同、未初始化的新数组
        np.empty_like(self.xarg)


# Benchmark类的子类，用于测量从DLPack创建数组的性能
class UfuncsFromDLP(Benchmark):
    """ Benchmark for creation functions
    """
    # 参数化设置：
    # shape：四个不同形状的列表 [16, 32, (16, 16), (64, 64)]
    # npdtypes：TYPES1常量
    params = [[16, 32, (16, 16), (64, 64)],
              TYPES1]
    # 参数名称：
    # shape - shape参数的名称
    # npdtypes - npdtypes参数的名称
    param_names = ['shape', 'npdtypes']
    # 设置超时时间为10秒
    timeout = 10

    # 初始化函数，获取方形值并设置xarg变量
    def setup(self, shape, npdtypes):
        # 调用get_squares_函数获取方形值字典
        values = get_squares_()
        # 从字典中获取npdtypes对应的方形值数组，并将其作为xarg保存
        self.xarg = values.get(npdtypes)[0]

    # 测量使用np.from_dlpack函数从DLPack数据结构创建数组的运行时间
    def time_from_dlpack(self, shape, npdtypes):
        # 使用NumPy的from_dlpack函数从DLPack数据结构创建数组
        np.from_dlpack(self.xarg)

`.\numpy\benchmarks\benchmarks\bench_function_base.py`

# 从 common 模块中导入 Benchmark 类
from .common import Benchmark

# 导入 numpy 库并使用别名 np
import numpy as np

try:
    # 尝试导入 SkipNotImplemented 类，该类自 asv_runner.benchmarks.mark 模块中引入
    from asv_runner.benchmarks.mark import SkipNotImplemented
except ImportError:
    # 如果 ImportError 异常发生，则将 SkipNotImplemented 设置为 NotImplementedError
    SkipNotImplemented = NotImplementedError


# 创建 Linspace 类，继承自 Benchmark 类
class Linspace(Benchmark):
    # 设置方法，在每次测试前初始化数据
    def setup(self):
        self.d = np.array([1, 2, 3])

    # 定义时间测试方法 time_linspace_scalar
    def time_linspace_scalar(self):
        # 调用 numpy 中的 linspace 函数，生成一个包含两个元素的等差数列
        np.linspace(0, 10, 2)

    # 定义时间测试方法 time_linspace_array
    def time_linspace_array(self):
        # 调用 numpy 中的 linspace 函数，使用实例变量 self.d 作为起始值，生成一个等差数列
        np.linspace(self.d, 10, 10)


# 创建 Histogram1D 类，继承自 Benchmark 类
class Histogram1D(Benchmark):
    # 设置方法，在每次测试前初始化数据
    def setup(self):
        # 使用 numpy 中的 linspace 函数生成一个包含 100000 个元素的等差数列
        self.d = np.linspace(0, 100, 100000)

    # 定义时间测试方法 time_full_coverage
    def time_full_coverage(self):
        # 调用 numpy 中的 histogram 函数，对 self.d 进行直方图统计，使用 200 个 bin
        np.histogram(self.d, 200, (0, 100))

    # 定义时间测试方法 time_small_coverage
    def time_small_coverage(self):
        # 调用 numpy 中的 histogram 函数，对 self.d 进行直方图统计，使用 200 个 bin，但仅计算区间 (50, 51)
        np.histogram(self.d, 200, (50, 51))

    # 定义时间测试方法 time_fine_binning
    def time_fine_binning(self):
        # 调用 numpy 中的 histogram 函数，对 self.d 进行直方图统计，使用 10000 个 bin
        np.histogram(self.d, 10000, (0, 100))


# 创建 Histogram2D 类，继承自 Benchmark 类
class Histogram2D(Benchmark):
    # 设置方法，在每次测试前初始化数据
    def setup(self):
        # 使用 numpy 中的 linspace 函数生成一个包含 200000 个元素的等差数列，并将其重塑为二维数组
        self.d = np.linspace(0, 100, 200000).reshape((-1,2))

    # 定义时间测试方法 time_full_coverage
    def time_full_coverage(self):
        # 调用 numpy 中的 histogramdd 函数，对 self.d 进行多维直方图统计，使用 200x200 个 bin
        np.histogramdd(self.d, (200, 200), ((0, 100), (0, 100)))

    # 定义时间测试方法 time_small_coverage
    def time_small_coverage(self):
        # 调用 numpy 中的 histogramdd 函数，对 self.d 进行多维直方图统计，使用 200x200 个 bin，但仅计算区间 ((50, 51), (50, 51))
        np.histogramdd(self.d, (200, 200), ((50, 51), (50, 51)))

    # 定义时间测试方法 time_fine_binning
    def time_fine_binning(self):
        # 调用 numpy 中的 histogramdd 函数，对 self.d 进行多维直方图统计，使用 10000x10000 个 bin
        np.histogramdd(self.d, (10000, 10000), ((0, 100), (0, 100)))


# 创建 Bincount 类，继承自 Benchmark 类
class Bincount(Benchmark):
    # 设置方法，在每次测试前初始化数据
    def setup(self):
        # 使用 numpy 中的 arange 函数生成一个包含 80000 个元素的数组，数据类型为 np.intp
        self.d = np.arange(80000, dtype=np.intp)
        # 将 self.d 转换为 np.float64 类型，并赋值给实例变量 self.e
        self.e = self.d.astype(np.float64)

    # 定义时间测试方法 time_bincount
    def time_bincount(self):
        # 调用 numpy 中的 bincount 函数，统计 self.d 中每个值出现的次数
        np.bincount(self.d)

    # 定义时间测试方法 time_weights
    def time_weights(self):
        # 调用 numpy 中的 bincount 函数，同时根据权重 self.e 统计 self.d 中每个值出现的加权次数
        np.bincount(self.d, weights=self.e)


# 创建 Mean 类，继承自 Benchmark 类
class Mean(Benchmark):
    # 定义参数名列表 param_names 和参数值列表 params
    param_names = ['size']
    params = [[1, 10, 100_000]]

    # 设置方法，在每次测试前根据 size 初始化数据
    def setup(self, size):
        # 使用 numpy 中的 arange 函数生成一个包含 2*size 个元素的数组，并将其重塑为二维数组
        self.array = np.arange(2*size).reshape(2, size)

    # 定义时间测试方法 time_mean，计算 self.array 的平均值
    def time_mean(self, size):
        np.mean(self.array)

    # 定义时间测试方法 time_mean_axis，计算 self.array 沿指定轴的平均值
    def time_mean_axis(self, size):
        np.mean(self.array, axis=1)


# 创建 Median 类，继承自 Benchmark 类
class Median(Benchmark):
    # 设置方法，在每次测试前初始化数据
    def setup(self):
        # 使用 numpy 中的 arange 函数生成一个包含 10000 个元素的 np.float32 类型的数组
        self.e = np.arange(10000, dtype=np.float32)
        # 使用 numpy 中的 arange 函数生成一个包含 10001 个元素的 np.float32 类型的数组
        self.o = np.arange(10001, dtype=np.float32)
        # 使用 numpy 中的 random 函数生成一个形状为 (10000, 20) 的随机数组
        self.tall = np.random.random((10000, 20))
        # 使用 numpy 中的 random 函数生成一个形状为 (20, 10000) 的随机数组
        self.wide = np.random.random((20, 10000))

    # 定义时间测试方法 time_even，计算 self.e 的中位数
    def time_even(self):
        np.median(self.e)

    # 定义时间测试方法 time_odd，计算 self.o 的中位数
    def time_odd(self):
        np.median(self.o)

    # 定义时间测试方法 time_even_inplace，计算 self.e 的中位数，且允许在计算过程中覆盖输入数据
    def time_even_inplace(self):
        np.median(self.e, overwrite_input=True)

    # 定义时间测试方法 time_odd_inplace，计算 self.o 的中位数，且允许在计算过程中覆盖输入数据
    def time_odd_inplace(self):
        np.median(self.o, overwrite_input=True)

    # 定义时间测试方法 time_even_small，计算 self.e 的前 500 个元素的中位数，且允许在计算过程中覆盖输入数据
    def time_even_small(self):
        np.median(self.e[:500], overwrite_input=True)

    # 定义时间测试方法 time_odd_small，计算 self.o 的前 500 个元素的中位数，且允许在计算过程中覆盖输入数据
    def time_odd_small(self):
        np.median(self.o[:500], overwrite_input=True)

    # 定义时间测试方法 time_tall，计算 self.tall 沿最后一个轴的中位数
    def time_tall(self):
        np.median(self.tall, axis=-1)

    # 定义时间测试方法 time_wide，计算 self.wide 沿第一个轴的中位数
    def time_wide(self):
        np.median(self.wide, axis=0)


# 创建 Percent
class Select(Benchmark):
    # Benchmark 类的子类，用于选择性能测试
    def setup(self):
        # 创建一个长度为 20000 的 NumPy 数组 d，包含整数序列
        self.d = np.arange(20000)
        # 将数组 d 复制给数组 e
        self.e = self.d.copy()
        # 定义两个条件列表，用于条件选择
        self.cond = [(self.d > 4), (self.d < 2)]
        # 大规模条件列表，包含多个重复的条件
        self.cond_large = [(self.d > 4), (self.d < 2)] * 10

    def time_select(self):
        # 使用 np.select 函数根据条件 self.cond 选择数组 self.d 或 self.e 的值
        np.select(self.cond, [self.d, self.e])

    def time_select_larger(self):
        # 使用 np.select 函数根据大规模条件 self.cond_large 选择数组 self.d 或 self.e 的值
        np.select(self.cond_large, ([self.d, self.e] * 10))


def memoize(f):
    # 缓存装饰器，用于存储函数计算结果，避免重复计算
    _memoized = {}
    def wrapped(*args):
        # 如果参数 args 不在缓存中，则计算并存储结果
        if args not in _memoized:
            _memoized[args] = f(*args)

        return _memoized[args].copy()  # 返回结果的副本

    return f


class SortGenerator:
    # 随机未排序区域的大小，用于基准测试
    AREA_SIZE = 100
    # 部分有序子数组的大小，用于基准测试
    BUBBLE_SIZE = 100

    @staticmethod
    @memoize
    def random(size, dtype, rnd):
        """
        Returns a randomly-shuffled array.
        """
        # 创建一个随机打乱顺序的数组
        arr = np.arange(size, dtype=dtype)
        rnd = np.random.RandomState(1792364059)
        np.random.shuffle(arr)
        rnd.shuffle(arr)
        return arr

    @staticmethod
    @memoize
    def ordered(size, dtype, rnd):
        """
        Returns an ordered array.
        """
        # 创建一个有序的数组
        return np.arange(size, dtype=dtype)

    @staticmethod
    @memoize
    def reversed(size, dtype, rnd):
        """
        Returns an array that's in descending order.
        """
        # 创建一个降序排列的数组
        dtype = np.dtype(dtype)
        try:
            with np.errstate(over="raise"):
                res = dtype.type(size-1)
        except (OverflowError, FloatingPointError):
            raise SkipNotImplemented("Cannot construct arange for this size.")

        return np.arange(size-1, -1, -1, dtype=dtype)

    @staticmethod
    @memoize
    def uniform(size, dtype, rnd):
        """
        Returns an array that has the same value everywhere.
        """
        # 创建一个所有元素均相同的数组
        return np.ones(size, dtype=dtype)

    @staticmethod
    @memoize
    def sorted_block(size, dtype, block_size, rnd):
        """
        Returns an array with blocks that are all sorted.
        """
        # 创建一个包含排序块的数组
        a = np.arange(size, dtype=dtype)
        b = []
        if size < block_size:
            return a
        block_num = size // block_size
        for i in range(block_num):
            b.extend(a[i::block_num])
        return np.array(b)


class Sort(Benchmark):
    """
    This benchmark tests sorting performance with several
    different types of arrays that are likely to appear in
    real-world applications.
    """
    # 排序基准测试类，用于测试不同类型的数组排序性能
    params = [
        # 在 NumPy 1.17 及更新版本中，'merge' 可以是多种稳定排序算法之一，不一定是归并排序。
        ['quick', 'merge', 'heap'],
        ['float64', 'int64', 'float32', 'uint32', 'int32', 'int16', 'float16'],
        [
            ('random',),
            ('ordered',),
            ('reversed',),
            ('uniform',),
            ('sorted_block', 10),
            ('sorted_block', 100),
            ('sorted_block', 1000),
        ],
    ]
    # 定义包含参数名称的列表
    param_names = ['kind', 'dtype', 'array_type']

    # 定义被基准测试数组的大小
    ARRAY_SIZE = 10000

    # 设置函数，用于生成和准备基准测试所需的数组
    def setup(self, kind, dtype, array_type):
        # 使用指定种子创建随机数生成器
        rnd = np.random.RandomState(507582308)
        # 从 array_type 中获取数组类型的类名
        array_class = array_type[0]
        # 调用 SortGenerator 类的相应方法生成数组，存储在 self.arr 中
        self.arr = getattr(SortGenerator, array_class)(self.ARRAY_SIZE, dtype, *array_type[1:], rnd)

    # 基准测试排序算法执行时间
    def time_sort(self, kind, dtype, array_type):
        # 使用 np.sort(...) 而不是 arr.sort(...)，因为它会生成副本
        # 这很重要，因为数据只需准备一次，但会跨多次运行使用
        np.sort(self.arr, kind=kind)

    # 基准测试 argsort 函数执行时间
    def time_argsort(self, kind, dtype, array_type):
        # 使用 np.argsort 对数组进行排序并返回索引
        np.argsort(self.arr, kind=kind)
class Partition(Benchmark):
    # 参数列表，包括数据类型、数组类型和 k 值的不同组合
    params = [
        ['float64', 'int64', 'float32', 'int32', 'int16', 'float16'],
        [
            ('random',),
            ('ordered',),
            ('reversed',),
            ('uniform',),
            ('sorted_block', 10),
            ('sorted_block', 100),
            ('sorted_block', 1000),
        ],
        [10, 100, 1000],
    ]
    # 参数名列表，对应 params 中的各个参数
    param_names = ['dtype', 'array_type', 'k']

    # 被基准测试数组的大小
    ARRAY_SIZE = 100000

    def setup(self, dtype, array_type, k):
        # 设置随机种子，并根据指定的数组类型生成相应大小和类型的数组
        rnd = np.random.seed(2136297818)
        array_class = array_type[0]
        self.arr = getattr(SortGenerator, array_class)(
            self.ARRAY_SIZE, dtype, *array_type[1:], rnd)

    def time_partition(self, dtype, array_type, k):
        # 对 self.arr 数组进行分区操作，并记录时间
        temp = np.partition(self.arr, k)

    def time_argpartition(self, dtype, array_type, k):
        # 对 self.arr 数组进行参数分区操作，并记录时间
        temp = np.argpartition(self.arr, k)


class SortWorst(Benchmark):
    def setup(self):
        # 创建一个最坏情况下的快速排序数组
        # 使用快速排序的中位数为 3 的最坏情况
        self.worst = np.arange(1000000)
        x = self.worst
        while x.size > 3:
            mid = x.size // 2
            x[mid], x[-2] = x[-2], x[mid]
            x = x[:-2]

    def time_sort_worst(self):
        # 对 self.worst 数组进行排序，并记录时间
        np.sort(self.worst)

    # 为了向后兼容性，保留旧基准测试名称
    time_sort_worst.benchmark_name = "bench_function_base.Sort.time_sort_worst"


class Where(Benchmark):
    def setup(self):
        # 创建几种用于测试的数组和条件
        self.d = np.arange(20000)
        self.d_o = self.d.astype(object)
        self.e = self.d.copy()
        self.e_o = self.d_o.copy()
        self.cond = (self.d > 5000)
        size = 1024 * 1024 // 8
        rnd_array = np.random.rand(size)
        self.rand_cond_01 = rnd_array > 0.01
        self.rand_cond_20 = rnd_array > 0.20
        self.rand_cond_30 = rnd_array > 0.30
        self.rand_cond_40 = rnd_array > 0.40
        self.rand_cond_50 = rnd_array > 0.50
        self.all_zeros = np.zeros(size, dtype=bool)
        self.all_ones = np.ones(size, dtype=bool)
        self.rep_zeros_2 = np.arange(size) % 2 == 0
        self.rep_zeros_4 = np.arange(size) % 4 == 0
        self.rep_zeros_8 = np.arange(size) % 8 == 0
        self.rep_ones_2 = np.arange(size) % 2 > 0
        self.rep_ones_4 = np.arange(size) % 4 > 0
        self.rep_ones_8 = np.arange(size) % 8 > 0

    def time_1(self):
        # 测试 np.where 函数在条件 self.cond 下的性能
        np.where(self.cond)

    def time_2(self):
        # 测试 np.where 函数在条件 self.cond 下的性能，并根据条件选择 self.d 或 self.e
        np.where(self.cond, self.d, self.e)

    def time_2_object(self):
        # 测试 np.where 函数在条件 self.cond 下的性能，特别考虑对象和字节交换数组的情况
        np.where(self.cond, self.d_o, self.e_o)

    def time_2_broadcast(self):
        # 测试 np.where 函数在条件 self.cond 下的性能，使用广播方式选择 self.d 或 0
        np.where(self.cond, self.d, 0)

    def time_all_zeros(self):
        # 测试 np.where 函数在全零数组 self.all_zeros 下的性能
        np.where(self.all_zeros)

    def time_random_01_percent(self):
        # 测试 np.where 函数在随机条件 self.rand_cond_01 下的性能
        np.where(self.rand_cond_01)

    def time_random_20_percent(self):
        # 测试 np.where 函数在随机条件 self.rand_cond_20 下的性能
        np.where(self.rand_cond_20)

    def time_random_30_percent(self):
        # 测试 np.where 函数在随机条件 self.rand_cond_30 下的性能
        np.where(self.rand_cond_30)
    # 使用 NumPy 的 where 函数执行条件筛选，针对预定义的随机条件 self.rand_cond_40
    def time_random_40_percent(self):
        np.where(self.rand_cond_40)
    
    # 使用 NumPy 的 where 函数执行条件筛选，针对预定义的随机条件 self.rand_cond_50
    def time_random_50_percent(self):
        np.where(self.rand_cond_50)
    
    # 使用 NumPy 的 where 函数执行条件筛选，针对预定义的全为1的条件 self.all_ones
    def time_all_ones(self):
        np.where(self.all_ones)
    
    # 使用 NumPy 的 where 函数执行条件筛选，针对预定义的交错0的条件 self.rep_zeros_2
    def time_interleaved_zeros_x2(self):
        np.where(self.rep_zeros_2)
    
    # 使用 NumPy 的 where 函数执行条件筛选，针对预定义的交错0的条件 self.rep_zeros_4
    def time_interleaved_zeros_x4(self):
        np.where(self.rep_zeros_4)
    
    # 使用 NumPy 的 where 函数执行条件筛选，针对预定义的交错0的条件 self.rep_zeros_8
    def time_interleaved_zeros_x8(self):
        np.where(self.rep_zeros_8)
    
    # 使用 NumPy 的 where 函数执行条件筛选，针对预定义的交错1的条件 self.rep_ones_2
    def time_interleaved_ones_x2(self):
        np.where(self.rep_ones_2)
    
    # 使用 NumPy 的 where 函数执行条件筛选，针对预定义的交错1的条件 self.rep_ones_4
    def time_interleaved_ones_x4(self):
        np.where(self.rep_ones_4)
    
    # 使用 NumPy 的 where 函数执行条件筛选，针对预定义的交错1的条件 self.rep_ones_8
    def time_interleaved_ones_x8(self):
        np.where(self.rep_ones_8)

`.\numpy\benchmarks\benchmarks\bench_import.py`

# 导入 subprocess 模块中的 call 函数，用于执行外部命令
# 导入 sys 模块中的 executable 变量，表示当前 Python 解释器的路径
# 导入 timeit 模块中的 default_timer 函数，用于获取当前时间
from subprocess import call
from sys import executable
from timeit import default_timer

# 导入 Benchmark 类，该类在 common 模块中定义
from .common import Benchmark

# 定义一个名为 Import 的类，继承自 Benchmark 类
class Import(Benchmark):
    # timer 属性指定为 default_timer 函数，用于计时
    timer = default_timer

    # 定义一个执行外部命令的方法，参数为命令字符串
    def execute(self, command):
        # 调用 subprocess 模块中的 call 函数执行命令
        call((executable, '-c', command))

    # 定义一个测试导入 numpy 模块耗时的方法
    def time_numpy(self):
        self.execute('import numpy')

    # 定义一个测试导入 numpy 和 inspect 模块耗时的方法
    # 此处注释提问了避免导入 inspect 模块可能带来的效率提升
    def time_numpy_inspect(self):
        self.execute('import numpy, inspect')

    # 定义一个测试从 numpy 模块导入 fft 子模块耗时的方法
    def time_fft(self):
        self.execute('from numpy import fft')

    # 定义一个测试从 numpy 模块导入 linalg 子模块耗时的方法
    def time_linalg(self):
        self.execute('from numpy import linalg')

    # 定义一个测试从 numpy 模块导入 ma 子模块耗时的方法
    def time_ma(self):
        self.execute('from numpy import ma')

    # 定义一个测试从 numpy 模块导入 matlib 子模块耗时的方法
    def time_matlib(self):
        self.execute('from numpy import matlib')

    # 定义一个测试从 numpy 模块导入 random 子模块耗时的方法
    def time_random(self):
        self.execute('from numpy import random')

`.\numpy\benchmarks\benchmarks\bench_indexing.py`

# 从common模块导入Benchmark类和其他函数和变量
from .common import (
    Benchmark, get_square_, get_indexes_, get_indexes_rand_, TYPES1)

# 导入os.path模块中的join函数，并重命名为pjoin
from os.path import join as pjoin
# 导入shutil模块，用于文件和目录操作
import shutil
# 导入numpy模块中的memmap、float32和array函数
from numpy import memmap, float32, array
# 导入numpy模块，并重命名为np
import numpy as np
# 导入tempfile模块中的mkdtemp函数，用于创建临时目录
from tempfile import mkdtemp

# 定义Benchmark的子类Indexing，用于测试索引操作的性能
class Indexing(Benchmark):
    # 参数列表，包括TYPES1扩展后的dtype、索引方式、选择器和操作符
    params = [TYPES1 + ["object", "O,i"],
              ["indexes_", "indexes_rand_"],
              ['I', ':,I', 'np.ix_(I, I)'],
              ['', '=1']]
    # 参数名称列表
    param_names = ['dtype', 'indexes', 'sel', 'op']

    # 初始化方法，根据参数设置选择器并动态创建函数
    def setup(self, dtype, indexes, sel, op):
        # 将选择器中的'I'替换为具体的索引方式
        sel = sel.replace('I', indexes)

        # 定义命名空间，包括获取数组函数、numpy模块、固定和随机索引函数
        ns = {'a': get_square_(dtype),
              'np': np,
              'indexes_': get_indexes_(),
              'indexes_rand_': get_indexes_rand_()}

        # 定义函数代码字符串
        code = "def run():\n    a[%s]%s"
        # 将选择器和操作符插入到函数代码字符串中
        code = code % (sel, op)

        # 在命名空间中执行函数代码，生成run函数
        exec(code, ns)
        # 将生成的run函数赋值给实例变量self.func
        self.func = ns['run']

    # 测试运行函数执行时间的方法
    def time_op(self, dtype, indexes, sel, op):
        self.func()

# 定义Benchmark的子类IndexingWith1DArr，用于测试使用1维数组的索引性能
class IndexingWith1DArr(Benchmark):
    # 参数列表，包括不同形状和类型的数组
    params = [
        [(1000,), (1000, 1), (1000, 2), (2, 1000, 1), (1000, 3)],
        TYPES1 + ["O", "i,O"]]
    # 参数名称列表
    param_names = ["shape", "dtype"]

    # 初始化方法，根据参数创建指定形状和类型的数组，并设置索引
    def setup(self, shape, dtype):
        self.arr = np.ones(shape, dtype)
        self.index = np.arange(1000)
        # 如果数组是3维的，则设置索引为第二维的所有元素
        if len(shape) == 3:
            self.index = (slice(None), self.index)

    # 测试按顺序获取数组元素的方法
    def time_getitem_ordered(self, shape, dtype):
        self.arr[self.index]

    # 测试按顺序设置数组元素的方法
    def time_setitem_ordered(self, shape, dtype):
        self.arr[self.index] = 0

# 定义Benchmark的子类ScalarIndexing，用于测试标量索引操作的性能
class ScalarIndexing(Benchmark):
    # 参数列表，包括标量索引的维度
    params = [[0, 1, 2]]
    # 参数名称列表
    param_names = ["ndim"]

    # 初始化方法，创建指定维度的全1数组
    def setup(self, ndim):
        self.array = np.ones((5,) * ndim)

    # 测试索引操作的执行时间
    def time_index(self, ndim):
        # 使用标量索引访问数组元素，并计时执行时间
        arr = self.array
        indx = (1,) * ndim
        for i in range(100):
            arr[indx]

    # 测试赋值操作的执行时间
    def time_assign(self, ndim):
        # 使用标量索引赋值，并计时执行时间
        arr = self.array
        indx = (1,) * ndim
        for i in range(100):
            arr[indx] = 5.

    # 测试赋值操作可能涉及的类型转换的执行时间
    def time_assign_cast(self, ndim):
        # 使用标量索引赋值，可能涉及类型转换，并计时执行时间
        arr = self.array
        indx = (1,) * ndim
        val = np.int16(43)
        for i in range(100):
            arr[indx] = val

# 定义Benchmark的子类IndexingSeparate，用于测试内存映射文件的切片和花式索引操作的性能
class IndexingSeparate(Benchmark):
    # 初始化方法，创建临时目录和内存映射文件，并设置花式索引
    def setup(self):
        self.tmp_dir = mkdtemp()
        self.fp = memmap(pjoin(self.tmp_dir, 'tmp.dat'),
                         dtype=float32, mode='w+', shape=(50, 60))
        self.indexes = array([3, 4, 6, 10, 20])

    # 清理方法，删除内存映射文件和临时目录
    def teardown(self):
        del self.fp
        shutil.rmtree(self.tmp_dir)

    # 测试切片操作的执行时间
    def time_mmap_slicing(self):
        for i in range(1000):
            self.fp[5:10]

    # 测试花式索引操作的执行时间
    def time_mmap_fancy_indexing(self):
        for i in range(1000):
            self.fp[self.indexes]

# 定义Benchmark的子类IndexingStructured0D，用于测试0维结构化索引的性能
class IndexingStructured0D(Benchmark):
    # 设置自定义数据类型，包含一个名为 'a' 的字段，每个字段是一个长度为 256 的 float32 数组
    self.dt = np.dtype([('a', 'f4', 256)])

    # 创建一个空的 ndarray A，数据类型为 self.dt
    self.A = np.zeros((), self.dt)
    # 通过复制创建一个 ndarray B，数据类型与 A 相同
    self.B = self.A.copy()

    # 创建一个长度为 1 的 ndarray，元素类型为 self.dt，并取出第一个元素赋给 a
    self.a = np.zeros(1, self.dt)[0]
    # 通过复制创建一个 ndarray b，数据类型与 a 相同
    self.b = self.a.copy()

# 将 A 数组的 'a' 字段的所有元素复制到 B 数组的 'a' 字段
def time_array_slice(self):
    self.B['a'][:] = self.A['a']

# 将 A 数组的 'a' 字段的所有元素直接赋给 B 数组的 'a' 字段
def time_array_all(self):
    self.B['a'] = self.A['a']

# 将 a 数组的 'a' 字段的所有元素复制到 b 数组的 'a' 字段
def time_scalar_slice(self):
    self.b['a'][:] = self.a['a']

# 将 a 数组的 'a' 字段的所有元素直接赋给 b 数组的 'a' 字段
def time_scalar_all(self):
    self.b['a'] = self.a['a']
# 定义一个继承自Benchmark类的FlatIterIndexing类，用于性能基准测试
class FlatIterIndexing(Benchmark):
    # 设置方法，在每个性能测试之前初始化数据
    def setup(self):
        # 创建一个形状为(200, 50000)的全1数组，并赋值给self.a
        self.a = np.ones((200, 50000))
        # 创建一个长度为200*50000的全True布尔数组，并赋值给self.m_all
        self.m_all = np.repeat(True, 200 * 50000)
        # 复制self.m_all到self.m_half，并将self.m_half中偶数索引位置设置为False
        self.m_half = np.copy(self.m_all)
        self.m_half[::2] = False
        # 创建一个长度为200*50000的全False布尔数组，并赋值给self.m_none
        self.m_none = np.repeat(False, 200 * 50000)

    # 定义性能测试方法，用于测试使用self.m_none进行布尔索引时的性能
    def time_flat_bool_index_none(self):
        # 使用布尔索引self.m_none访问self.a的扁平化视图
        self.a.flat[self.m_none]

    # 定义性能测试方法，用于测试使用self.m_half进行布尔索引时的性能
    def time_flat_bool_index_half(self):
        # 使用布尔索引self.m_half访问self.a的扁平化视图
        self.a.flat[self.m_half]

    # 定义性能测试方法，用于测试使用self.m_all进行布尔索引时的性能
    def time_flat_bool_index_all(self):
        # 使用布尔索引self.m_all访问self.a的扁平化视图
        self.a.flat[self.m_all]

`.\numpy\benchmarks\benchmarks\bench_io.py`

from .common import Benchmark, get_squares, get_squares_
import numpy as np
from io import SEEK_SET, StringIO, BytesIO

class Copy(Benchmark):
    params = ["int8", "int16", "float32", "float64",
              "complex64", "complex128"]
    param_names = ['type']

    def setup(self, typename):
        dtype = np.dtype(typename)
        # 创建一个大小为 500x50 的 NumPy 数组，数据类型由参数指定
        self.d = np.arange((50 * 500), dtype=dtype).reshape((500, 50))
        # 创建一个大小为 50x500 的 NumPy 数组，数据类型由参数指定
        self.e = np.arange((50 * 500), dtype=dtype).reshape((50, 500))
        # 将数组 e 重新形状为数组 d 的形状
        self.e_d = self.e.reshape(self.d.shape)
        # 创建一个大小为 25000 的一维 NumPy 数组，数据类型由参数指定
        self.dflat = np.arange((50 * 500), dtype=dtype)

    def time_memcpy(self, typename):
        # 使用 NumPy 的广播功能，将数组 e_d 的内容复制到数组 d 中
        self.d[...] = self.e_d

    def time_memcpy_large_out_of_place(self, typename):
        # 创建一个大小为 1024x1024 的全为 1 的 NumPy 数组，数据类型由参数指定
        l = np.ones(1024**2, dtype=np.dtype(typename))
        # 使用 NumPy 的 copy 方法复制数组 l
        l.copy()

    def time_cont_assign(self, typename):
        # 将数组 d 中所有元素赋值为 1
        self.d[...] = 1

    def time_strided_copy(self, typename):
        # 使用 NumPy 的转置操作，将数组 e 的转置内容复制到数组 d 中
        self.d[...] = self.e.T

    def time_strided_assign(self, typename):
        # 将数组 dflat 中偶数索引位置的元素赋值为 2
        self.dflat[::2] = 2


class CopyTo(Benchmark):
    def setup(self):
        # 创建一个大小为 50000 的全为 1 的一维 NumPy 数组
        self.d = np.ones(50000)
        # 复制数组 d 到数组 e
        self.e = self.d.copy()
        # 创建一个布尔掩码数组，标记数组 d 中值为 1 的位置
        self.m = (self.d == 1)
        # 取反布尔掩码数组
        self.im = (~ self.m)
        # 复制数组 m 到数组 m8
        self.m8 = self.m.copy()
        # 在数组 m8 中每隔 8 个元素取反
        self.m8[::8] = (~ self.m[::8])
        # 取反布尔掩码数组 m8
        self.im8 = (~ self.m8)

    def time_copyto(self):
        # 使用 np.copyto 将数组 e 的内容复制到数组 d
        np.copyto(self.d, self.e)

    def time_copyto_sparse(self):
        # 使用 np.copyto 将数组 e 的内容复制到数组 d，仅在数组 m 为 True 的位置进行复制
        np.copyto(self.d, self.e, where=self.m)

    def time_copyto_dense(self):
        # 使用 np.copyto 将数组 e 的内容复制到数组 d，仅在数组 im 为 True 的位置进行复制
        np.copyto(self.d, self.e, where=self.im)

    def time_copyto_8_sparse(self):
        # 使用 np.copyto 将数组 e 的内容复制到数组 d，仅在数组 m8 为 True 的位置进行复制
        np.copyto(self.d, self.e, where=self.m8)

    def time_copyto_8_dense(self):
        # 使用 np.copyto 将数组 e 的内容复制到数组 d，仅在数组 im8 为 True 的位置进行复制
        np.copyto(self.d, self.e, where=self.im8)


class Savez(Benchmark):
    def setup(self):
        # 获取由 get_squares 函数返回的字典
        self.squares = get_squares()

    def time_vb_savez_squares(self):
        # 将字典内容保存到文件 tmp.npz
        np.savez('tmp.npz', **self.squares)


class LoadNpyOverhead(Benchmark):
    def setup(self):
        # 创建一个 BytesIO 对象
        self.buffer = BytesIO()
        # 将 get_squares_()['float32'] 保存到 buffer 中
        np.save(self.buffer, get_squares_()['float32'])

    def time_loadnpy_overhead(self):
        # 将 buffer 的指针位置移动到开头
        self.buffer.seek(0, SEEK_SET)
        # 从 buffer 中加载 NumPy 数组
        np.load(self.buffer)


class LoadtxtCSVComments(Benchmark):
    # 用于测试 np.loadtxt 在读取 CSV 文件时处理注释的性能
    params = [10, int(1e2), int(1e4), int(1e5)]
    param_names = ['num_lines']

    def setup(self, num_lines):
        # 创建包含注释的 CSV 数据
        data = ['1,2,3 # comment'] * num_lines
        # 创建一个 StringIO 对象，用于保存 CSV 数据
        self.data_comments = StringIO('\n'.join(data))
    def time_comment_loadtxt_csv(self, num_lines):
        # 定义一个方法用于测试处理带有注释的行数
        # 从 CSV 文件中加载数据时的性能

        # 受到 pandas 中 read_csv 的类似基准测试的启发

        # 需要在每次正确的时间测试调用前重置 StringIO 对象的位置
        # 这会在一定程度上影响计时结果
        np.loadtxt(self.data_comments,
                   delimiter=',')
        # 将 StringIO 对象的位置重置到文件开头，以备下次读取使用
        self.data_comments.seek(0)
class LoadtxtCSVdtypes(Benchmark):
    # 对 np.loadtxt 进行性能基准测试，测试不同的数据类型从 CSV 文件解析/转换

    params = (['float32', 'float64', 'int32', 'int64',
               'complex128', 'str', 'object'],
              [10, int(1e2), int(1e4), int(1e5)])
    param_names = ['dtype', 'num_lines']

    def setup(self, dtype, num_lines):
        # 设置测试数据：生成包含 num_lines 行 '5, 7, 888' 的 CSV 数据
        data = ['5, 7, 888'] * num_lines
        self.csv_data = StringIO('\n'.join(data))

    def time_loadtxt_dtypes_csv(self, dtype, num_lines):
        # 基准测试加载各种数据类型的数组从 CSV 文件

        # 因为基准测试依赖状态和时间测量，需要重置 StringIO 对象的指针
        np.loadtxt(self.csv_data,
                   delimiter=',',
                   dtype=dtype)
        self.csv_data.seek(0)

class LoadtxtCSVStructured(Benchmark):
    # 对 np.loadtxt 进行性能基准测试，测试结构化数据类型与 CSV 文件的解析

    def setup(self):
        # 设置测试数据：生成包含 50000 行的 "M, 21, 72, X, 155" 结构化 CSV 数据
        num_lines = 50000
        data = ["M, 21, 72, X, 155"] * num_lines
        self.csv_data = StringIO('\n'.join(data))

    def time_loadtxt_csv_struct_dtype(self):
        # 在每次迭代重复之间强制重置 StringIO 对象的指针

        np.loadtxt(self.csv_data,
                   delimiter=',',
                   dtype=[('category_1', 'S1'),
                          ('category_2', 'i4'),
                          ('category_3', 'f8'),
                          ('category_4', 'S1'),
                          ('category_5', 'f8')])
        self.csv_data.seek(0)


class LoadtxtCSVSkipRows(Benchmark):
    # 对 loadtxt 进行性能基准测试，在读取 CSV 文件数据时跳过行；pandas asv 套件中也有类似的基准测试

    params = [0, 500, 10000]
    param_names = ['skiprows']

    def setup(self, skiprows):
        # 设置测试数据：生成包含 100000 行和 3 列的随机数据，并将其保存为 CSV 文件
        np.random.seed(123)
        test_array = np.random.rand(100000, 3)
        self.fname = 'test_array.csv'
        np.savetxt(fname=self.fname,
                   X=test_array,
                   delimiter=',')

    def time_skiprows_csv(self, skiprows):
        # 基准测试跳过指定行数后加载 CSV 文件数据

        np.loadtxt(self.fname,
                   delimiter=',',
                   skiprows=skiprows)

class LoadtxtReadUint64Integers(Benchmark):
    # pandas 有一个类似的 CSV 读取基准测试，这里修改以适应 np.loadtxt

    params = [550, 1000, 10000]
    param_names = ['size']

    def setup(self, size):
        # 设置测试数据：生成 uint64 类型的数组，并将其保存为 StringIO 对象
        arr = np.arange(size).astype('uint64') + 2**63
        self.data1 = StringIO('\n'.join(arr.astype(str).tolist()))
        arr = arr.astype(object)
        arr[500] = -1
        self.data2 = StringIO('\n'.join(arr.astype(str).tolist()))

    def time_read_uint64(self, size):
        # 在每次迭代重复之间强制重置 StringIO 对象的指针

        np.loadtxt(self.data1)
        self.data1.seek(0)
    # 定义一个方法用于处理读取 uint64 类型的负值时间
    def time_read_uint64_neg_values(self, size):
        # 强制重置 StringIO 对象的指针位置到文件开头
        np.loadtxt(self.data2)
        # 将 StringIO 对象的指针位置移动到文件开头
        self.data2.seek(0)
class LoadtxtUseColsCSV(Benchmark):
    # benchmark selective column reading from CSV files
    # using np.loadtxt

    params = [2, [1, 3], [1, 3, 5, 7]]
    param_names = ['usecols']

    def setup(self, usecols):
        # 准备数据：生成包含大量行的 CSV 数据
        num_lines = 5000
        data = ['0, 1, 2, 3, 4, 5, 6, 7, 8, 9'] * num_lines
        self.csv_data = StringIO('\n'.join(data))

    def time_loadtxt_usecols_csv(self, usecols):
        # 由于文件读取的状态依赖性，必须重新定位 StringIO 对象
        np.loadtxt(self.csv_data,
                   delimiter=',',
                   usecols=usecols)
        self.csv_data.seek(0)

class LoadtxtCSVDateTime(Benchmark):
    # benchmarks for np.loadtxt operating with
    # datetime data in a CSV file

    params = [20, 200, 2000, 20000]
    param_names = ['num_lines']

    def setup(self, num_lines):
        # 创建一个包含日期字符串和随机浮点数数据的模拟两列 CSV 文件
        dates = np.arange('today', 20, dtype=np.datetime64)
        np.random.seed(123)
        values = np.random.rand(20)
        date_line = ''

        for date, value in zip(dates, values):
            date_line += (str(date) + ',' + str(value) + '\n')

        # 扩展数据至指定行数
        data = date_line * (num_lines // 20)
        self.csv_data = StringIO(data)

    def time_loadtxt_csv_datetime(self, num_lines):
        # 重置 StringIO 对象的位置，因为时间迭代的计时依赖于对象状态
        X = np.loadtxt(self.csv_data,
                       delimiter=',',
                       dtype=([('dates', 'M8[us]'),
                               ('values', 'float64')]))
        self.csv_data.seek(0)

`.\numpy\benchmarks\benchmarks\bench_itemselection.py`

# 从common模块中导入Benchmark类和TYPES1变量
from .common import Benchmark, TYPES1

# 导入numpy库并将其命名为np
import numpy as np

# 定义Take类，继承Benchmark类
class Take(Benchmark):
    # 定义params列表，包含三个参数列表的组合
    params = [
        [(1000, 1), (2, 1000, 1), (1000, 3)],  # 不同的数组形状
        ["raise", "wrap", "clip"],           # 不同的模式
        TYPES1 + ["O", "i,O"]                # 不同的数据类型
    ]
    # 定义param_names列表，列出各个参数列表的名称
    param_names = ["shape", "mode", "dtype"]

    # 定义setup方法，初始化测试环境
    def setup(self, shape, mode, dtype):
        # 创建一个形状为shape，数据类型为dtype的全1数组，并将其赋值给self.arr
        self.arr = np.ones(shape, dtype)
        # 创建一个包含1000个元素的数组，赋值给self.indices
        self.indices = np.arange(1000)

    # 定义time_contiguous方法，测试连续取值操作的性能
    def time_contiguous(self, shape, mode, dtype):
        # 在指定轴上使用给定的取值模式(mode)，从self.arr中按照self.indices取值
        self.arr.take(self.indices, axis=-2, mode=mode)


# 定义PutMask类，继承Benchmark类
class PutMask(Benchmark):
    # 定义params列表，包含两个参数列表的组合
    params = [
        [True, False],            # 布尔参数，指示值是否标量
        TYPES1 + ["O", "i,O"]     # 不同的数据类型
    ]
    # 定义param_names列表，列出各个参数列表的名称
    param_names = ["values_is_scalar", "dtype"]

    # 定义setup方法，初始化测试环境
    def setup(self, values_is_scalar, dtype):
        # 根据values_is_scalar的值选择性地创建标量或者1000个元素的数组，并赋值给self.vals
        if values_is_scalar:
            self.vals = np.array(1., dtype=dtype)
        else:
            self.vals = np.ones(1000, dtype=dtype)

        # 创建一个长度为1000的全1数组，并赋值给self.arr
        self.arr = np.ones(1000, dtype=dtype)

        # 创建一个长度为1000的布尔数组，所有元素为True，并赋值给self.dense_mask
        self.dense_mask = np.ones(1000, dtype="bool")
        # 创建一个长度为1000的布尔数组，所有元素为False，并赋值给self.sparse_mask
        self.sparse_mask = np.zeros(1000, dtype="bool")

    # 定义time_dense方法，测试稠密掩码操作的性能
    def time_dense(self, values_is_scalar, dtype):
        # 使用np.putmask函数，根据self.dense_mask对self.arr应用self.vals
        np.putmask(self.arr, self.dense_mask, self.vals)

    # 定义time_sparse方法，测试稀疏掩码操作的性能
    def time_sparse(self, values_is_scalar, dtype):
        # 使用np.putmask函数，根据self.sparse_mask对self.arr应用self.vals
        np.putmask(self.arr, self.sparse_mask, self.vals)


# 定义Put类，继承Benchmark类
class Put(Benchmark):
    # 定义params列表，包含两个参数列表的组合
    params = [
        [True, False],            # 布尔参数，指示值是否标量
        TYPES1 + ["O", "i,O"]     # 不同的数据类型
    ]
    # 定义param_names列表，列出各个参数列表的名称
    param_names = ["values_is_scalar", "dtype"]

    # 定义setup方法，初始化测试环境
    def setup(self, values_is_scalar, dtype):
        # 根据values_is_scalar的值选择性地创建标量或者1000个元素的数组，并赋值给self.vals
        if values_is_scalar:
            self.vals = np.array(1., dtype=dtype)
        else:
            self.vals = np.ones(1000, dtype=dtype)

        # 创建一个长度为1000的全1数组，并赋值给self.arr
        self.arr = np.ones(1000, dtype=dtype)
        # 创建一个长度为1000的整数数组，值从0到999，并赋值给self.indx
        self.indx = np.arange(1000, dtype=np.intp)

    # 定义time_ordered方法，测试有序放置操作的性能
    def time_ordered(self, values_is_scalar, dtype):
        # 使用np.put函数，根据self.indx将self.vals放置到self.arr中
        np.put(self.arr, self.indx, self.vals)

`.\numpy\benchmarks\benchmarks\bench_lib.py`

"""Benchmarks for `numpy.lib`."""

# 从common模块导入Benchmark类
from .common import Benchmark
# 导入numpy库并简写为np
import numpy as np

# 定义Pad类，继承Benchmark类
class Pad(Benchmark):
    """Benchmarks for `numpy.pad`.

    When benchmarking the pad function it is useful to cover scenarios where
    the ratio between the size of the input array and the output array differs
    significantly (original area vs. padded area). This allows to evaluate for
    which scenario a padding algorithm is optimized. Furthermore involving
    large range of array sizes ensures that the effects of CPU-bound caching is
    visible.

    The table below shows the sizes of the arrays involved in this benchmark:

    +-----------------+----------+-----------+-----------+-----------------+
    | shape           | original | padded: 1 | padded: 8 | padded: (0, 32) |
    +=================+==========+===========+===========+=================+
    | (2 ** 22,)      | 32 MiB   | 32.0 MiB  | 32.0 MiB  | 32.0 MiB        |
    +-----------------+----------+-----------+-----------+-----------------+
    | (1024, 1024)    | 8 MiB    | 8.03 MiB  | 8.25 MiB  | 8.51 MiB        |
    +-----------------+----------+-----------+-----------+-----------------+
    | (256, 256, 1)   | 256 KiB  | 786 KiB   | 5.08 MiB  | 11.6 MiB        |
    +-----------------+----------+-----------+-----------+-----------------+
    | (4, 4, 4, 4)    | 2 KiB    | 10.1 KiB  | 1.22 MiB  | 12.8 MiB        |
    +-----------------+----------+-----------+-----------+-----------------+
    | (1, 1, 1, 1, 1) | 8 B      | 1.90 MiB  | 10.8 MiB  | 299 MiB         |
    +-----------------+----------+-----------+-----------+-----------------+
    """

    # 参数名称列表
    param_names = ["shape", "pad_width", "mode"]
    # 参数组合
    params = [
        # Shape of the input arrays
        [(2 ** 22,), (1024, 1024), (256, 128, 1),
         (4, 4, 4, 4), (1, 1, 1, 1, 1)],
        # Tested pad widths
        [1, 8, (0, 32)],
        # Tested modes: mean, median, minimum & maximum use the same code path
        #               reflect & symmetric share a lot of their code path
        ["constant", "edge", "linear_ramp", "mean", "reflect", "wrap"],
    ]

    # 设置方法，在此方法中填充数组以确保在计时阶段之前触发操作系统的页面错误
    def setup(self, shape, pad_width, mode):
        self.array = np.full(shape, fill_value=1, dtype=np.float64)

    # 计时方法，调用numpy的pad函数进行计时
    def time_pad(self, shape, pad_width, mode):
        np.pad(self.array, pad_width, mode)


# 定义Nan类，继承Benchmark类
class Nan(Benchmark):
    """Benchmarks for nan functions"""

    # 参数名称列表
    param_names = ["array_size", "percent_nans"]
    # 参数组合
    params = [
            # sizes of the 1D arrays
            [200, int(2e5)],
            # percent of np.nan in arrays
            [0, 0.1, 2., 50., 90.],
            ]
    # 设置数组大小和 NaN 值的百分比
    def setup(self, array_size, percent_nans):
        # 使用指定种子创建随机状态生成器
        rnd = np.random.RandomState(1819780348)
        # 生成一个随机打乱顺序的数组，其大约包含指定百分比的 np.nan 值
        base_array = rnd.uniform(size=array_size)
        base_array[base_array < percent_nans / 100.] = np.nan
        # 将生成的数组赋值给对象的实例变量 arr
        self.arr = base_array

    # 计算数组中的最小值，忽略 NaN 值
    def time_nanmin(self, array_size, percent_nans):
        np.nanmin(self.arr)

    # 计算数组中的最大值，忽略 NaN 值
    def time_nanmax(self, array_size, percent_nans):
        np.nanmax(self.arr)

    # 返回数组中最小值的索引，忽略 NaN 值
    def time_nanargmin(self, array_size, percent_nans):
        np.nanargmin(self.arr)

    # 返回数组中最大值的索引，忽略 NaN 值
    def time_nanargmax(self, array_size, percent_nans):
        np.nanargmax(self.arr)

    # 计算数组中的元素总和，忽略 NaN 值
    def time_nansum(self, array_size, percent_nans):
        np.nansum(self.arr)

    # 计算数组中的元素乘积，忽略 NaN 值
    def time_nanprod(self, array_size, percent_nans):
        np.nanprod(self.arr)

    # 计算数组的累积和，忽略 NaN 值
    def time_nancumsum(self, array_size, percent_nans):
        np.nancumsum(self.arr)

    # 计算数组的累积乘积，忽略 NaN 值
    def time_nancumprod(self, array_size, percent_nans):
        np.nancumprod(self.arr)

    # 计算数组中的平均值，忽略 NaN 值
    def time_nanmean(self, array_size, percent_nans):
        np.nanmean(self.arr)

    # 计算数组中的方差，忽略 NaN 值
    def time_nanvar(self, array_size, percent_nans):
        np.nanvar(self.arr)

    # 计算数组中的标准差，忽略 NaN 值
    def time_nanstd(self, array_size, percent_nans):
        np.nanstd(self.arr)

    # 计算数组中的中位数，忽略 NaN 值
    def time_nanmedian(self, array_size, percent_nans):
        np.nanmedian(self.arr)

    # 计算数组中指定分位数对应的值，忽略 NaN 值
    def time_nanquantile(self, array_size, percent_nans):
        np.nanquantile(self.arr, q=0.2)

    # 计算数组中指定百分位数对应的值，忽略 NaN 值
    def time_nanpercentile(self, array_size, percent_nans):
        np.nanpercentile(self.arr, q=50)
# 定义一个继承自 Benchmark 的 Unique 类，用于评估包含 np.nan 值的 np.unique 函数的性能

param_names = ["array_size", "percent_nans"]
params = [
    # 1D 数组的大小
    [200, int(2e5)],
    # 数组中 np.nan 的百分比
    [0, 0.1, 2., 50., 90.],
]

def setup(self, array_size, percent_nans):
    # 设置随机种子为 123
    np.random.seed(123)
    # 创建一个随机打乱顺序的数组，并设置大约指定百分比的 np.nan 内容
    base_array = np.random.uniform(size=array_size)
    n_nan = int(percent_nans * array_size)
    nan_indices = np.random.choice(np.arange(array_size), size=n_nan)
    base_array[nan_indices] = np.nan
    self.arr = base_array

def time_unique_values(self, array_size, percent_nans):
    # 评估 np.unique 函数在数组中查找唯一值时的性能，不返回索引、逆向索引或计数
    np.unique(self.arr, return_index=False,
              return_inverse=False, return_counts=False)

def time_unique_counts(self, array_size, percent_nans):
    # 评估 np.unique 函数在数组中查找唯一值及其计数时的性能
    np.unique(self.arr, return_index=False,
              return_inverse=False, return_counts=True)

def time_unique_inverse(self, array_size, percent_nans):
    # 评估 np.unique 函数在数组中查找唯一值及其逆向索引时的性能
    np.unique(self.arr, return_index=False,
              return_inverse=True, return_counts=False)

def time_unique_all(self, array_size, percent_nans):
    # 评估 np.unique 函数在数组中查找唯一值、索引及其计数及逆向索引时的性能
    np.unique(self.arr, return_index=True,
              return_inverse=True, return_counts=True)


# 定义一个继承自 Benchmark 的 Isin 类，用于评估 numpy.isin 函数的性能

param_names = ["size", "highest_element"]
params = [
    [10, 100000, 3000000],
    [10, 10000, int(1e8)]
]

def setup(self, size, highest_element):
    # 创建一个大小为 size 的随机整数数组，元素范围在 0 到 highest_element 之间
    self.array = np.random.randint(
            low=0, high=highest_element, size=size)
    # 创建一个大小为 size 的随机整数数组，用于检查是否存在于 self.array 中，元素范围同样在 0 到 highest_element 之间
    self.in_array = np.random.randint(
            low=0, high=highest_element, size=size)

def time_isin(self, size, highest_element):
    # 评估 numpy.isin 函数在数组中检查元素是否存在时的性能
    np.isin(self.array, self.in_array)

`.\numpy\benchmarks\benchmarks\bench_linalg.py`

# 导入必要的模块和函数
from .common import Benchmark, get_squares_, get_indexes_rand, TYPES1
import numpy as np

# 定义一个继承自Benchmark类的新类Eindot，用于测试矩阵运算的性能
class Eindot(Benchmark):
    
    # 设置测试环境，在每个测试函数执行前调用
    def setup(self):
        # 创建并初始化各种大小的NumPy数组
        self.a = np.arange(60000.0).reshape(150, 400)
        self.ac = self.a.copy()
        self.at = self.a.T
        self.atc = self.a.T.copy()
        self.b = np.arange(240000.0).reshape(400, 600)
        self.c = np.arange(600)
        self.d = np.arange(400)

        self.a3 = np.arange(480000.).reshape(60, 80, 100)
        self.b3 = np.arange(192000.).reshape(80, 60, 40)

    # 定义矩阵乘法测试函数，计算 self.a 和 self.b 的乘积
    def time_dot_a_b(self):
        np.dot(self.a, self.b)

    # 定义多层次的矩阵乘法测试函数，计算 np.dot(self.b, self.c) 的结果再与 self.d 点乘
    def time_dot_d_dot_b_c(self):
        np.dot(self.d, np.dot(self.b, self.c))

    # 定义矩阵转置后的乘法测试函数，计算 self.a 和 self.at 的乘积
    def time_dot_trans_a_at(self):
        np.dot(self.a, self.at)

    # 定义矩阵和其转置副本的乘法测试函数，计算 self.a 和 self.atc 的乘积
    def time_dot_trans_a_atc(self):
        np.dot(self.a, self.atc)

    # 定义转置矩阵和原始矩阵的乘法测试函数，计算 self.at 和 self.a 的乘积
    def time_dot_trans_at_a(self):
        np.dot(self.at, self.a)

    # 定义转置副本矩阵和原始矩阵的乘法测试函数，计算 self.atc 和 self.a 的乘积
    def time_dot_trans_atc_a(self):
        np.dot(self.atc, self.a)

    # 定义einsum函数的测试函数，计算 'i,ij,j' 的乘积，其中 self.d 与 self.b 和 self.c 的组合
    def time_einsum_i_ij_j(self):
        np.einsum('i,ij,j', self.d, self.b, self.c)

    # 定义einsum函数的测试函数，计算 'ij,jk' 的乘积，其中 self.a 和 self.b 的乘积
    def time_einsum_ij_jk_a_b(self):
        np.einsum('ij,jk', self.a, self.b)

    # 定义einsum函数的测试函数，计算 'ijk,jil->kl' 的乘积，其中 self.a3 和 self.b3 的组合
    def time_einsum_ijk_jil_kl(self):
        np.einsum('ijk,jil->kl', self.a3, self.b3)

    # 定义内积计算函数的测试函数，计算 self.a 和 self.a 的内积
    def time_inner_trans_a_a(self):
        np.inner(self.a, self.a)

    # 定义内积计算函数的测试函数，计算 self.a 和 self.ac 的内积
    def time_inner_trans_a_ac(self):
        np.inner(self.a, self.ac)

    # 定义矩阵乘法函数的测试函数，计算 self.a 和 self.b 的乘积
    def time_matmul_a_b(self):
        np.matmul(self.a, self.b)

    # 定义多层次的矩阵乘法函数的测试函数，计算 np.matmul(self.b, self.c) 的结果再与 self.d 点乘
    def time_matmul_d_matmul_b_c(self):
        np.matmul(self.d, np.matmul(self.b, self.c))

    # 定义矩阵转置后的乘法函数的测试函数，计算 self.a 和 self.at 的乘积
    def time_matmul_trans_a_at(self):
        np.matmul(self.a, self.at)

    # 定义矩阵和其转置副本的乘法函数的测试函数，计算 self.a 和 self.atc 的乘积
    def time_matmul_trans_a_atc(self):
        np.matmul(self.a, self.atc)

    # 定义转置矩阵和原始矩阵的乘法函数的测试函数，计算 self.at 和 self.a 的乘积
    def time_matmul_trans_at_a(self):
        np.matmul(self.at, self.a)

    # 定义转置副本矩阵和原始矩阵的乘法函数的测试函数，计算 self.atc 和 self.a 的乘积
    def time_matmul_trans_atc_a(self):
        np.matmul(self.atc, self.a)

    # 定义tensordot函数的测试函数，计算 axes=([1, 0], [0, 1]) 的 self.a3 和 self.b3 的乘积
    def time_tensordot_a_b_axes_1_0_0_1(self):
        np.tensordot(self.a3, self.b3, axes=([1, 0], [0, 1]))

# 定义一个继承自Benchmark类的新类Linalg，用于测试线性代数运算的性能
class Linalg(Benchmark):
    # 使用类型TYPES1的集合减去'float16'类型后作为参数
    params = sorted(list(set(TYPES1) - set(['float16'])))
    param_names = ['dtype']

    # 设置测试环境，在每个测试函数执行前调用
    def setup(self, typename):
        # 忽略所有的NumPy错误
        np.seterr(all='ignore')
        # 获取指定类型的方阵并赋值给 self.a
        self.a = get_squares_()[typename]

    # 定义奇异值分解测试函数，计算 self.a 的奇异值分解
    def time_svd(self, typename):
        np.linalg.svd(self.a)

    # 定义伪逆矩阵测试函数，计算 self.a 的伪逆矩阵
    def time_pinv(self, typename):
        np.linalg.pinv(self.a)

    # 定义行列式计算测试函数，计算 self.a 的行列式
    def time_det(self, typename):
        np.linalg.det(self.a)

# 定义一个继承自Benchmark类的新类LinalgNorm，用于测试线性代数中的范数计算性能
class LinalgNorm(Benchmark):
    # 使用类型TYPES1作为参数
    params = TYPES1
    param_names = ['dtype']

    # 设置测试环境，在每个测试函数执行前调用
    def setup(self, typename):
        # 获取指定类型的方阵并赋值给 self.a
        self.a = get_squares_()[typename]

    # 定义范数计算测试函数，计算 self.a 的范数
    def time_norm(self, typename):
        np.linalg.norm(self.a)

# 定义一个继承自Benchmark类的新类LinalgSmallArrays，用于测试小数组的线性代数运算性能
class LinalgSmallArrays(Benchmark):
    """ Test overhead of linalg methods for small arrays """

    # 设置测试环境，在每个测试函数执行前调用
    def setup(self):
        # 创建大小为5的一维数组和大小为5x5的二维数组
        self.array_5 = np.arange(5.)
        self.array_5_5 = np.reshape(np.arange(25.), (5, 5))

    # 定义小数组的范数计算测试函数，计算 self.array_5 的范数
    def time_norm_small_array(self):
        np.linalg.norm(self.array_5)

    # 定义小数组的行列式计算测试函数，计算 self.array_5_5 的行列式
    def time_det_small_array(self):
        np.linalg.det(self.array_5_5)
    # 定义一个方法 `setup`，用于初始化对象的 `a` 和 `b` 属性
    def setup(self):
        # 使用 `get_squares_()` 函数获取返回结果中 'float64' 对应的值，将其赋给对象的属性 `a`
        self.a = get_squares_()['float64']
        # 使用 `get_indexes_rand()` 函数获取随机索引，并转换为 `np.float64` 类型的数组，取前 100 个元素，赋给对象的属性 `b`

    # 定义一个方法 `time_numpy_linalg_lstsq_a__b_float64`，用于测试 `np.linalg.lstsq` 函数的性能
    def time_numpy_linalg_lstsq_a__b_float64(self):
        # 调用 `np.linalg.lstsq` 函数，传入对象属性 `a` 和 `b` 作为参数，并设置 `rcond=-1`
        np.linalg.lstsq(self.a, self.b, rcond=-1)
# 定义一个继承自Benchmark类的Einsum类，用于进行基准测试
class Einsum(Benchmark):
    # 参数名称列表
    param_names = ['dtype']
    # 参数取值列表，包括np.float32和np.float64两种数据类型
    params = [[np.float32, np.float64]]
    
    # 初始化方法，设置不同数据类型下的各种测试数据
    def setup(self, dtype):
        # 生成长度为600的一维数组，数据类型为dtype
        self.one_dim_small = np.arange(600, dtype=dtype)
        # 生成长度为3000的一维数组，数据类型为dtype
        self.one_dim = np.arange(3000, dtype=dtype)
        # 生成长度为480000的一维数组，数据类型为dtype
        self.one_dim_big = np.arange(480000, dtype=dtype)
        # 生成形状为(30, 40)的二维数组，数据类型为dtype
        self.two_dim_small = np.arange(1200, dtype=dtype).reshape(30, 40)
        # 生成形状为(400, 600)的二维数组，数据类型为dtype
        self.two_dim = np.arange(240000, dtype=dtype).reshape(400, 600)
        # 生成形状为(10, 100, 10)的三维数组，数据类型为dtype
        self.three_dim_small = np.arange(10000, dtype=dtype).reshape(10, 100, 10)
        # 生成形状为(20, 30, 40)的三维数组，数据类型为dtype
        self.three_dim = np.arange(24000, dtype=dtype).reshape(20, 30, 40)
        
        # 非连续数组
        # 生成步长为2，长度为80的一维数组，数据类型为dtype
        self.non_contiguous_dim1_small = np.arange(1, 80, 2, dtype=dtype)
        # 生成步长为2，长度为4000的一维数组，数据类型为dtype
        self.non_contiguous_dim1 = np.arange(1, 4000, 2, dtype=dtype)
        # 生成步长为2，形状为(30, 40)的二维数组，数据类型为dtype
        self.non_contiguous_dim2 = np.arange(1, 2400, 2, dtype=dtype).reshape(30, 40)
        # 生成步长为2，形状为(20, 30, 40)的三维数组，数据类型为dtype
        self.non_contiguous_dim3 = np.arange(1, 48000, 2, dtype=dtype).reshape(20, 30, 40)

    # 使用np.einsum进行外积计算，触发sum_of_products_contig_stride0_outcontig_two
    def time_einsum_outer(self, dtype):
        np.einsum("i,j", self.one_dim, self.one_dim, optimize=True)

    # 使用np.einsum进行矩阵乘法计算，触发sum_of_products_contig_two
    def time_einsum_multiply(self, dtype):
        np.einsum("..., ...", self.two_dim_small, self.three_dim, optimize=True)

    # 使用np.einsum进行求和和乘法计算，触发sum_of_products_contig_stride0_outstride0_two
    def time_einsum_sum_mul(self, dtype):
        np.einsum(",i...->", 300, self.three_dim_small, optimize=True)

    # 使用np.einsum进行求和和乘法计算，触发sum_of_products_stride0_contig_outstride0_two
    def time_einsum_sum_mul2(self, dtype):
        np.einsum("i...,->", self.three_dim_small, 300, optimize=True)

    # 使用np.einsum进行标量乘法计算，触发sum_of_products_stride0_contig_outcontig_two
    def time_einsum_mul(self, dtype):
        np.einsum("i,->i", self.one_dim_big, 300, optimize=True)

    # 使用np.einsum进行矩阵乘法计算，触发contig_contig_outstride0_two
    def time_einsum_contig_contig(self, dtype):
        np.einsum("ji,i->", self.two_dim, self.one_dim_small, optimize=True)

    # 使用np.einsum进行求和计算，触发sum_of_products_contig_outstride0_one
    def time_einsum_contig_outstride0(self, dtype):
        np.einsum("i->", self.one_dim_big, optimize=True)

    # 使用np.einsum进行外积计算，处理非连续数组
    def time_einsum_noncon_outer(self, dtype):
        np.einsum("i,j", self.non_contiguous_dim1, self.non_contiguous_dim1, optimize=True)

    # 使用np.einsum进行矩阵乘法计算，处理非连续数组
    def time_einsum_noncon_multiply(self, dtype):
        np.einsum("..., ...", self.non_contiguous_dim2, self.non_contiguous_dim3, optimize=True)

    # 使用np.einsum进行求和和乘法计算，处理非连续数组
    def time_einsum_noncon_sum_mul(self, dtype):
        np.einsum(",i...->", 300, self.non_contiguous_dim3, optimize=True)

    # 使用np.einsum进行求和和乘法计算，处理非连续数组
    def time_einsum_noncon_sum_mul2(self, dtype):
        np.einsum("i...,->", self.non_contiguous_dim3, 300, optimize=True)
    # 使用 NumPy 的 einsum 函数计算指定的乘积，优化计算以提高效率
    def time_einsum_noncon_mul(self, dtype):
        # 执行 einsum 操作，计算非连续数组 self.non_contiguous_dim1 与标量 300 的逐元素乘积
        np.einsum("i,->i", self.non_contiguous_dim1, 300, optimize=True)

    # contig_contig_outstride0_two: 非连续数组
    def time_einsum_noncon_contig_contig(self, dtype):
        # 执行 einsum 操作，计算非连续数组 self.non_contiguous_dim2 和 self.non_contiguous_dim1_small 的乘积的总和
        np.einsum("ji,i->", self.non_contiguous_dim2, self.non_contiguous_dim1_small, optimize=True)

    # sum_of_products_contig_outstride0_one：非连续数组
    def time_einsum_noncon_contig_outstride0(self, dtype):
        # 执行 einsum 操作，计算非连续数组 self.non_contiguous_dim1 所有元素的总和
        np.einsum("i->", self.non_contiguous_dim1, optimize=True)
class LinAlgTransposeVdot(Benchmark):
    # 继承自 Benchmark 类，用于性能测试
    # 参数设置：矩阵形状和数据类型
    params = [[(16, 16), (32, 32),
               (64, 64)], TYPES1]
    param_names = ['shape', 'npdtypes']

    def setup(self, shape, npdtypes):
        # 初始化第一个随机矩阵，并按照给定形状重新调整其结构
        self.xarg = np.random.uniform(-1, 1, np.dot(*shape)).reshape(shape)
        # 将第一个随机矩阵转换为指定的数据类型
        self.xarg = self.xarg.astype(npdtypes)
        # 初始化第二个随机矩阵，并按照给定形状重新调整其结构
        self.x2arg = np.random.uniform(-1, 1, np.dot(*shape)).reshape(shape)
        # 将第二个随机矩阵转换为指定的数据类型
        self.x2arg = self.x2arg.astype(npdtypes)
        # 如果数据类型以 'complex' 开头，则将第一个和第二个矩阵转换为复数类型
        if npdtypes.startswith('complex'):
            self.xarg += self.xarg.T*1j
            self.x2arg += self.x2arg.T*1j

    def time_transpose(self, shape, npdtypes):
        # 测试 np.transpose() 函数的执行时间，对第一个随机矩阵进行转置操作
        np.transpose(self.xarg)

    def time_vdot(self, shape, npdtypes):
        # 测试 np.vdot() 函数的执行时间，计算第一个和第二个随机矩阵的向量点积
        np.vdot(self.xarg, self.x2arg)

`.\numpy\benchmarks\benchmarks\bench_ma.py`

# 导入Benchmark类从common模块中
from .common import Benchmark

# 导入NumPy库并用np作为别名
import numpy as np

# 定义MA类，继承Benchmark类
class MA(Benchmark):
    
    # 设置方法，初始化self.l100为0到99的整数范围
    def setup(self):
        self.l100 = range(100)
        # 初始化self.t100为包含100个True的列表
        self.t100 = ([True] * 100)

    # 定义time_masked_array方法
    def time_masked_array(self):
        # 调用NumPy的masked_array函数，未指定参数

    # 定义time_masked_array_l100方法
    def time_masked_array_l100(self):
        # 调用NumPy的masked_array函数，使用self.l100作为数据参数

    # 定义time_masked_array_l100_t100方法
    def time_masked_array_l100_t100(self):
        # 调用NumPy的masked_array函数，使用self.l100作为数据参数，self.t100作为掩码参数

# 定义MACreation类，继承Benchmark类
class MACreation(Benchmark):
    
    # 参数名列表为'data'和'mask'
    param_names = ['data', 'mask']
    # 参数为[[10, 100, 1000], [True, False, None]]
    params = [[10, 100, 1000],
              [True, False, None]]

    # 定义time_ma_creations方法，接受'data'和'mask'作为参数
    def time_ma_creations(self, data, mask):
        # 调用NumPy的ma.array函数，data参数为由0填充的整数数组，mask参数为传入的掩码值

# 定义Indexing类，继承Benchmark类
class Indexing(Benchmark):
    
    # 参数名列表为'masked'、'ndim'、'size'
    param_names = ['masked', 'ndim', 'size']
    # 参数为[[True, False], [1, 2], [10, 100, 1000]]
    params = [[True, False],
              [1, 2],
              [10, 100, 1000]]
    
    # 设置方法，根据参数设置数据self.m和索引self.idx_scalar、self.idx_0d、self.idx_1d
    def setup(self, masked, ndim, size):
        # 创建ndim维大小为size的数组x，根据masked标志创建掩码数组或者普通数组
        x = np.arange(size**ndim).reshape(ndim * (size,))

        # 根据masked标志，创建掩码或者非掩码的ma.array对象self.m
        if masked:
            self.m = np.ma.array(x, mask=x % 2 == 0)
        else:
            self.m = np.ma.array(x)

        # 根据ndim和size设置不同的索引方式
        self.idx_scalar = (size // 2,) * ndim
        self.idx_0d = (size // 2,) * ndim + (Ellipsis,)
        self.idx_1d = (size // 2,) * (ndim - 1)

    # 定义time_scalar方法，接受'masked'、'ndim'、'size'作为参数
    def time_scalar(self, masked, ndim, size):
        # 使用self.idx_scalar索引self.m

    # 定义time_0d方法，接受'masked'、'ndim'、'size'作为参数
    def time_0d(self, masked, ndim, size):
        # 使用self.idx_0d索引self.m

    # 定义time_1d方法，接受'masked'、'ndim'、'size'作为参数
    def time_1d(self, masked, ndim, size):
        # 使用self.idx_1d索引self.m

# 定义UFunc类，继承Benchmark类
class UFunc(Benchmark):
    
    # 参数名列表为'a_masked'、'b_masked'、'size'
    param_names = ['a_masked', 'b_masked', 'size']
    # 参数为[[True, False], [True, False], [10, 100, 1000]]
    params = [[True, False],
              [True, False],
              [10, 100, 1000]]

    # 设置方法，根据参数设置数据self.a_scalar、self.b_scalar、self.a_1d、self.b_1d、self.a_2d、self.b_2d
    def setup(self, a_masked, b_masked, size):
        # 创建大小为size的无符号整数数组x
        x = np.arange(size).astype(np.uint8)

        # 根据a_masked和b_masked创建对应的标量掩码或者数值
        self.a_scalar = np.ma.masked if a_masked else 5
        self.b_scalar = np.ma.masked if b_masked else 3

        # 根据a_masked和b_masked创建1维掩码或者非掩码数组self.a_1d、self.b_1d
        self.a_1d = np.ma.array(x, mask=x % 2 == 0 if a_masked else np.ma.nomask)
        self.b_1d = np.ma.array(x, mask=x % 3 == 0 if b_masked else np.ma.nomask)

        # 根据a_1d和b_1d创建2维数组self.a_2d、self.b_2d
        self.a_2d = self.a_1d.reshape(1, -1)
        self.b_2d = self.a_1d.reshape(-1, 1)

    # 定义time_scalar方法，接受'a_masked'、'b_masked'、'size'作为参数
    def time_scalar(self, a_masked, b_masked, size):
        # 调用NumPy的ma.add函数，对self.a_scalar和self.b_scalar进行运算

    # 定义time_scalar_1d方法，接受'a_masked'、'b_masked'、'size'作为参数
    def time_scalar_1d(self, a_masked, b_masked, size):
        # 调用NumPy的ma.add函数，对self.a_scalar和self.b_1d进行运算

    # 定义time_1d方法，接受'a_masked'、'b_masked'、'size'作为参数
    def time_1d(self, a_masked, b_masked, size):
        # 调用NumPy的ma.add函数，对self.a_1d和self.b_1d进行运算

    # 定义time_2d方法，接受'a_masked'、'b_masked'、'size'作为参数
    def time_2d(self, a_masked, b_masked, size):
        # 调用NumPy的ma.add函数，对self.a_2d和self.b_2d进行运算

# 定义Concatenate类，继承Benchmark类
class Concatenate(Benchmark):
    
    # 参数名列表为'mode'、'n'
    param_names = ['mode', 'n']
    # 参数为多个元组，包括多种模式和不同的n值
    params = [
        ['ndarray', 'unmasked',
         'ndarray+masked', 'unmasked+masked',
         'masked'],
        [2, 100, 2000]
    ]
    # 定义设置方法，初始化测试模式和尺寸
    def setup(self, mode, n):
        # 避免 np.zeros 的延迟分配，这可能在基准测试期间导致页面错误。
        # np.full 会导致设置过程中的页面错误发生。
        # 创建一个 n x n 大小的全零数组，数据类型为整数
        normal = np.full((n, n), 0, int)
        # 创建一个 n x n 大小的未掩码的 Masked Array，数据类型为整数
        unmasked = np.ma.zeros((n, n), int)
        # 使用 normal 数组创建一个掩码为 True 的 Masked Array
        masked = np.ma.array(normal, mask=True)

        # 拆分模式字符串成多个部分
        mode_parts = mode.split('+')
        # 获取基础模式
        base = mode_parts[0]
        # 确定是否提升到 masked 数组
        promote = 'masked' in mode_parts[1:]

        # 根据基础模式选择相应的参数数组
        if base == 'ndarray':
            args = 10 * (normal,)
        elif base == 'unmasked':
            args = 10 * (unmasked,)
        else:
            args = 10 * (masked,)

        # 如果需要提升，则用 masked 替换最后一个参数数组
        if promote:
            args = args[:-1] + (masked,)

        # 将参数数组赋给实例变量 self.args
        self.args = args

    # 定义计时方法，执行 np.ma.concatenate 操作
    def time_it(self, mode, n):
        # 连接 self.args 中的所有 Masked Array
        np.ma.concatenate(self.args)
class MAFunctions1v(Benchmark):
    # 定义一个继承自Benchmark类的MAFunctions1v类，用于性能基准测试
    param_names = ['mtype', 'func', 'msize']
    # 参数名称列表，包括mtype（类型）、func（函数）、msize（大小）
    params = [['np', 'np.ma'],
              ['sin', 'log', 'sqrt'],
              ['small', 'big']]
    # 参数值列表，包括不同的类型、函数和大小的组合

    def setup(self, mtype, func, msize):
        # 初始化设置方法，接受mtype（类型）、func（函数）、msize（大小）作为参数
        xs = 2.0 + np.random.uniform(-1, 1, 6).reshape(2, 3)
        # 创建一个2x3的数组xs，元素为2.0加上从-1到1均匀分布的随机数
        m1 = [[True, False, False], [False, False, True]]
        # 创建一个掩码数组m1，用于遮盖xs的部分数据
        xl = 2.0 + np.random.uniform(-1, 1, 100*100).reshape(100, 100)
        # 创建一个100x100的数组xl，元素为2.0加上从-1到1均匀分布的随机数
        maskx = xl > 2.8
        # 创建一个掩码数组maskx，标记xl中大于2.8的元素
        self.nmxs = np.ma.array(xs, mask=m1)
        # 创建一个掩码数组self.nmxs，使用xs和m1初始化，用于处理缺失值数据
        self.nmxl = np.ma.array(xl, mask=maskx)
        # 创建一个掩码数组self.nmxl，使用xl和maskx初始化，用于处理缺失值数据

    def time_functions_1v(self, mtype, func, msize):
        # 性能基准测试方法，接受mtype（类型）、func（函数）、msize（大小）作为参数
        fun = eval(f"{mtype}.{func}")
        # 根据mtype和func拼接字符串，并使用eval函数执行，获取对应的函数对象
        if msize == 'small':
            fun(self.nmxs)
            # 如果msize为'small'，则对self.nmxs应用fun函数
        elif msize == 'big':
            fun(self.nmxl)
            # 如果msize为'big'，则对self.nmxl应用fun函数


class MAMethod0v(Benchmark):
    # 定义一个继承自Benchmark类的MAMethod0v类，用于性能基准测试
    param_names = ['method', 'msize']
    # 参数名称列表，包括method（方法）、msize（大小）
    params = [['ravel', 'transpose', 'compressed', 'conjugate'],
              ['small', 'big']]
    # 参数值列表，包括不同的方法和大小的组合

    def setup(self, method, msize):
        # 初始化设置方法，接受method（方法）、msize（大小）作为参数
        xs = np.random.uniform(-1, 1, 6).reshape(2, 3)
        # 创建一个2x3的数组xs，元素为从-1到1均匀分布的随机数
        m1 = [[True, False, False], [False, False, True]]
        # 创建一个掩码数组m1，用于遮盖xs的部分数据
        xl = np.random.uniform(-1, 1, 100*100).reshape(100, 100)
        # 创建一个100x100的数组xl，元素为从-1到1均匀分布的随机数
        maskx = xl > 0.8
        # 创建一个掩码数组maskx，标记xl中大于0.8的元素
        self.nmxs = np.ma.array(xs, mask=m1)
        # 创建一个掩码数组self.nmxs，使用xs和m1初始化，用于处理缺失值数据
        self.nmxl = np.ma.array(xl, mask=maskx)
        # 创建一个掩码数组self.nmxl，使用xl和maskx初始化，用于处理缺失值数据

    def time_methods_0v(self, method, msize):
        # 性能基准测试方法，接受method（方法）、msize（大小）作为参数
        if msize == 'small':
            mdat = self.nmxs
            # 如果msize为'small'，则使用self.nmxs作为测试数据
        elif msize == 'big':
            mdat = self.nmxl
            # 如果msize为'big'，则使用self.nmxl作为测试数据
        getattr(mdat, method)()
        # 使用getattr函数根据method名称调用mdat对象的相应方法


class MAFunctions2v(Benchmark):
    # 定义一个继承自Benchmark类的MAFunctions2v类，用于性能基准测试
    param_names = ['mtype', 'func', 'msize']
    # 参数名称列表，包括mtype（类型）、func（函数）、msize（大小）
    params = [['np', 'np.ma'],
              ['multiply', 'divide', 'power'],
              ['small', 'big']]
    # 参数值列表，包括不同的类型、函数和大小的组合

    def setup(self, mtype, func, msize):
        # 初始化设置方法，接受mtype（类型）、func（函数）、msize（大小）作为参数
        # Small arrays
        xs = 2.0 + np.random.uniform(-1, 1, 6).reshape(2, 3)
        # 创建一个2x3的数组xs，元素为2.0加上从-1到1均匀分布的随机数
        ys = 2.0 + np.random.uniform(-1, 1, 6).reshape(2, 3)
        # 创建一个2x3的数组ys，元素为2.0加上从-1到1均匀分布的随机数
        m1 = [[True, False, False], [False, False, True]]
        # 创建一个掩码数组m1，用于遮盖xs的部分数据
        m2 = [[True, False, True], [False, False, True]]
        # 创建一个掩码数组m2，用于遮盖ys的部分数据
        self.nmxs = np.ma.array(xs, mask=m1)
        # 创建一个掩码数组self.nmxs，使用xs和m1初始化，用于处理缺失值数据
        self.nmys = np.ma.array(ys, mask=m2)
        # 创建一个掩码数组self.nmys，使用ys和m2初始化，用于处理缺失值数据
        # Big arrays
        xl = 2.0 + np.random.uniform(-1, 1, 100*100).reshape(100, 100)
        # 创建一个100x100的数组xl，元素为2.0加上从-1到1均匀分布的随机数
        yl = 2.0 + np.random.uniform(-1, 1, 100*100).reshape(100, 100)
        # 创建一个100x100的数组yl，元素为2.0加上从-1到1均匀分布的随机数
        maskx = xl > 2.8
        # 创建一个掩码数组maskx，标记xl中大于2.8的元素
        masky = yl < 1.8
        # 创建一个掩码数组masky，标记yl中小于1.8的元素
        self.nmxl = np.ma.array(xl, mask=maskx)
        # 创建一个掩码数组self.nmxl，使用xl和maskx初始化，用于处理缺失值数据
        self.nmyl = np.ma.array(yl, mask=masky)
        # 创建一个掩码数组self.nmyl，使用yl和masky初始化，用于处理缺失值数据

    def time_functions_2v(self, mtype, func, msize):
        # 性能基准测试方法，接受mtype（类型）、func（函数）、msize（大小）作为参数
        fun = eval(f"{mtype}.{func}")
        # 根据mtype和func拼接字符串，并使用eval函数执行，获取对应的函数对象
        if msize == 'small':
            fun(self.nmxs, self.nmys)
    # 定义设置方法，初始化变量xs、m1、xl和maskx，将结果存储在对象的属性中
    def setup(self, margs, msize):
        # 生成一个 2x3 的随机数数组，范围在[-1, 1)之间
        xs = np.random.uniform(-1, 1, 6).reshape(2, 3)
        # 定义一个2x3的布尔类型数组m1
        m1 = [[True, False, False], [False, False, True]]
        # 生成一个100x100的随机数数组，范围在[-1, 1)之间
        xl = np.random.uniform(-1, 1, 100*100).reshape(100, 100)
        # 创建一个布尔掩码数组，标记xl中大于0.8的元素
        maskx = xl > 0.8
        # 使用m1数组创建带掩码的MaskedArray对象，存储在self.nmxs中
        self.nmxs = np.ma.array(xs, mask=m1)
        # 使用maskx数组创建带掩码的MaskedArray对象，存储在self.nmxl中
        self.nmxl = np.ma.array(xl, mask=maskx)

    # 定义测试方法，根据参数msize选择合适的数据集，调用对象的__getitem__方法
    def time_methods_getitem(self, margs, msize):
        # 根据参数msize选择数据集
        if msize == 'small':
            mdat = self.nmxs
        elif msize == 'big':
            mdat = self.nmxl
        # 动态调用对象mdat的__getitem__方法，并传递参数margs
        getattr(mdat, '__getitem__')(margs)
class MAMethodSetItem(Benchmark):
    # 参数名列表，用于记录测试中使用的参数名称
    param_names = ['margs', 'mset', 'msize']
    # 参数字典，每个键值对代表一组参数值
    params = [[0, (0, 0), (-1, 0)],
              [17, np.ma.masked],
              ['small', 'big']]

    def setup(self, margs, mset, msize):
        # 创建一个 2x3 的随机数组 xs，元素值在 [-1, 1) 之间
        xs = np.random.uniform(-1, 1, 6).reshape(2, 3)
        # 定义一个掩码数组 m1，用于遮蔽特定位置的值
        m1 = [[True, False, False], [False, False, True]]
        # 创建一个 100x100 的随机数组 xl，元素值在 [-1, 1) 之间
        xl = np.random.uniform(-1, 1, 100*100).reshape(100, 100)
        # 创建一个根据阈值生成的掩码数组 maskx
        maskx = xl > 0.8
        # 使用 xs 和 xl 创建分别带有掩码的数组 nmxs 和 nmxl
        self.nmxs = np.ma.array(xs, mask=m1)
        self.nmxl = np.ma.array(xl, mask=maskx)

    def time_methods_setitem(self, margs, mset, msize):
        # 根据 msize 的值选择要操作的数组 mdat
        if msize == 'small':
            mdat = self.nmxs
        elif msize == 'big':
            mdat = self.nmxl
        # 动态调用 mdat 对象的 '__setitem__' 方法，传入 margs 和 mset 作为参数
        getattr(mdat, '__setitem__')(margs, mset)


class Where(Benchmark):
    # 参数名列表，用于记录测试中使用的参数名称
    param_names = ['mtype', 'msize']
    # 参数字典，每个键值对代表一组参数值
    params = [['np', 'np.ma'],
              ['small', 'big']]

    def setup(self, mtype, msize):
        # 创建两个 2x3 的随机数组 xs 和 ys，元素值在 [-1, 1) 之间
        xs = np.random.uniform(-1, 1, 6).reshape(2, 3)
        ys = np.random.uniform(-1, 1, 6).reshape(2, 3)
        # 定义两个掩码数组 m1 和 m2
        m1 = [[True, False, False], [False, False, True]]
        m2 = [[True, False, True], [False, False, True]]
        # 创建两个带有掩码的数组 nmxs 和 nmys
        self.nmxs = np.ma.array(xs, mask=m1)
        self.nmys = np.ma.array(ys, mask=m2)
        # 创建一个 100x100 的随机数组 xl 和 yl，元素值在 [-1, 1) 之间
        xl = np.random.uniform(-1, 1, 100*100).reshape(100, 100)
        yl = np.random.uniform(-1, 1, 100*100).reshape(100, 100)
        # 创建两个根据阈值生成的掩码数组 maskx 和 masky
        maskx = xl > 0.8
        masky = yl < -0.8
        # 使用 xl、yl 和对应的掩码数组创建带有掩码的数组 nmxl 和 nmyl
        self.nmxl = np.ma.array(xl, mask=maskx)
        self.nmyl = np.ma.array(yl, mask=masky)

    def time_where(self, mtype, msize):
        # 根据 mtype 动态获取函数对象 fun
        fun = eval(f"{mtype}.where")
        # 根据 msize 的值选择要操作的数组，并调用 fun 函数
        if msize == 'small':
            fun(self.nmxs > 2, self.nmxs, self.nmys)
        elif msize == 'big':
            fun(self.nmxl > 2, self.nmxl, self.nmyl)


class Cov(Benchmark):
    # 参数名列表，用于记录测试中使用的参数名称
    param_names = ["size"]
    # 参数字典，每个键值对代表一组参数值
    params = [["small", "large"]]

    def setup(self, size):
        # 设置掩码值的比例
        prop_mask = 0.2
        # 创建一个大小为 (10, 10) 的随机浮点数数组 data
        rng = np.random.default_rng()
        data = rng.random((10, 10), dtype=np.float32)
        # 使用 data 和阈值 prop_mask 创建带有掩码的数组 small
        self.small = np.ma.array(data, mask=(data <= prop_mask))
        # 创建一个大小为 (100, 100) 的随机浮点数数组 data
        data = rng.random((100, 100), dtype=np.float32)
        # 使用 data 和阈值 prop_mask 创建带有掩码的数组 large
        self.large = np.ma.array(data, mask=(data <= prop_mask))

    def time_cov(self, size):
        # 根据 size 的值选择要操作的数组，并调用 np.ma.cov 函数
        if size == "small":
            np.ma.cov(self.small)
        if size == "large":
            np.ma.cov(self.large)


class Corrcoef(Benchmark):
    # 参数名列表，用于记录测试中使用的参数名称
    param_names = ["size"]
    # 参数字典，每个键值对代表一组参数值
    params = [["small", "large"]]
    # 设置函数 `setup`，用于初始化数据
    def setup(self, size):
        # 设置被遮蔽数值的比例为 0.2
        prop_mask = 0.2
        # 使用 NumPy 提供的随机数生成器创建实例
        rng = np.random.default_rng()
        # 生成一个大小为 (10, 10) 的随机浮点数数组作为数据
        data = rng.random((10, 10), dtype=np.float32)
        # 创建一个带有遮蔽的小数组，根据设定的比例遮蔽部分数据
        self.small = np.ma.array(data, mask=(data <= prop_mask))
        # 再次生成一个大小为 (100, 100) 的随机浮点数数组作为数据
        data = rng.random((100, 100), dtype=np.float32)
        # 创建一个带有遮蔽的大数组，根据设定的比例遮蔽部分数据
        self.large = np.ma.array(data, mask=(data <= prop_mask))

    # 设置函数 `time_corrcoef`，用于计算相关系数
    def time_corrcoef(self, size):
        # 如果参数 `size` 为 "small"，则计算 `self.small` 的相关系数
        if size == "small":
            np.ma.corrcoef(self.small)
        # 如果参数 `size` 为 "large"，则计算 `self.large` 的相关系数
        if size == "large":
            np.ma.corrcoef(self.large)

`.\numpy\benchmarks\benchmarks\bench_manipulate.py`

from .common import Benchmark, get_squares_, TYPES1, DLPACK_TYPES
# 导入必要的模块和函数

import numpy as np
# 导入 NumPy 库

from collections import deque
# 导入 deque 数据结构，用于特定的维度操作

class BroadcastArrays(Benchmark):
    # 声明 BroadcastArrays 类，继承自 Benchmark 基类

    params = [[(16, 32), (128, 256), (512, 1024)],
              TYPES1]
    # 参数化设置：数组形状和数据类型的组合

    param_names = ['shape', 'ndtype']
    # 参数名称

    timeout = 10
    # 设置超时时间为 10 秒

    def setup(self, shape, ndtype):
        # 设置方法，用于初始化测试环境
        self.xarg = np.random.ranf(shape[0]*shape[1]).reshape(shape)
        # 创建随机数据数组，并根据指定形状进行重塑
        self.xarg = self.xarg.astype(ndtype)
        # 将数组类型转换为指定的数据类型
        if ndtype.startswith('complex'):
            self.xarg += np.random.ranf(1)*1j
            # 如果数据类型以 'complex' 开头，则添加一个虚数部分

    def time_broadcast_arrays(self, shape, ndtype):
        # 测试方法：broadcast_arrays
        np.broadcast_arrays(self.xarg, np.ones(1))
        # 使用 np.broadcast_arrays 函数广播数组


class BroadcastArraysTo(Benchmark):
    # 声明 BroadcastArraysTo 类，继承自 Benchmark 基类

    params = [[16, 64, 512],
              TYPES1]
    # 参数化设置：数组大小和数据类型的组合

    param_names = ['size', 'ndtype']
    # 参数名称

    timeout = 10
    # 设置超时时间为 10 秒

    def setup(self, size, ndtype):
        # 设置方法，用于初始化测试环境
        self.rng = np.random.default_rng()
        # 创建随机数生成器对象
        self.xarg = self.rng.random(size)
        # 生成指定大小的随机数数组
        self.xarg = self.xarg.astype(ndtype)
        # 将数组类型转换为指定的数据类型
        if ndtype.startswith('complex'):
            self.xarg += self.rng.random(1)*1j
            # 如果数据类型以 'complex' 开头，则添加一个虚数部分

    def time_broadcast_to(self, size, ndtype):
        # 测试方法：broadcast_to
        np.broadcast_to(self.xarg, (size, size))
        # 使用 np.broadcast_to 函数进行广播操作


class ConcatenateStackArrays(Benchmark):
    # 声明 ConcatenateStackArrays 类，继承自 Benchmark 基类

    params = [[(16, 32), (32, 64)],
              [2, 5],
              TYPES1]
    # 参数化设置：数组形状、堆叠数量和数据类型的组合

    param_names = ['shape', 'narrays', 'ndtype']
    # 参数名称

    timeout = 10
    # 设置超时时间为 10 秒

    def setup(self, shape, narrays, ndtype):
        # 设置方法，用于初始化测试环境
        self.xarg = [np.random.ranf(shape[0]*shape[1]).reshape(shape)
                     for x in range(narrays)]
        # 创建包含多个随机数组的列表
        self.xarg = [x.astype(ndtype) for x in self.xarg]
        # 将列表中的每个数组转换为指定的数据类型
        if ndtype.startswith('complex'):
            [x + np.random.ranf(1)*1j for x in self.xarg]
            # 如果数据类型以 'complex' 开头，则为每个数组添加一个虚数部分

    def time_concatenate_ax0(self, shape, narrays, ndtype):
        # 测试方法：concatenate_ax0
        np.concatenate(self.xarg, axis=0)
        # 使用 np.concatenate 函数在 axis=0 上堆叠数组

    def time_concatenate_ax1(self, shape, narrays, ndtype):
        # 测试方法：concatenate_ax1
        np.concatenate(self.xarg, axis=1)
        # 使用 np.concatenate 函数在 axis=1 上堆叠数组

    def time_stack_ax0(self, shape, narrays, ndtype):
        # 测试方法：stack_ax0
        np.stack(self.xarg, axis=0)
        # 使用 np.stack 函数在 axis=0 上堆叠数组

    def time_stack_ax1(self, shape, narrays, ndtype):
        # 测试方法：stack_ax1
        np.stack(self.xarg, axis=1)
        # 使用 np.stack 函数在 axis=1 上堆叠数组


class ConcatenateNestedArrays(ConcatenateStackArrays):
    # 声明 ConcatenateNestedArrays 类，继承自 ConcatenateStackArrays 类

    # Large number of small arrays to test GIL (non-)release
    params = [[(1, 1)], [1000, 100000], TYPES1]
    # 参数化设置：小数组数量大以测试 GIL（非）释放

class DimsManipulations(Benchmark):
    # 声明 DimsManipulations 类，继承自 Benchmark 基类

    params = [
        [(2, 1, 4), (2, 1), (5, 2, 3, 1)],
    ]
    # 参数化设置：数组形状的组合

    param_names = ['shape']
    # 参数名称

    timeout = 10
    # 设置超时时间为 10 秒

    def setup(self, shape):
        # 设置方法，用于初始化测试环境
        self.xarg = np.ones(shape=shape)
        # 创建元素全为 1 的数组，并使用指定形状
        self.reshaped = deque(shape)
        # 创建 deque 对象，存储形状信息
        self.reshaped.rotate(1)
        # 将 deque 中的元素循环移动一个位置
        self.reshaped = tuple(self.reshaped)
        # 将 deque 转换为元组形式

    def time_expand_dims(self, shape):
        # 测试方法：expand_dims
        np.expand_dims(self.xarg, axis=1)
        # 使用 np.expand_dims 函数在 axis=1 上扩展数组的维度

    def time_expand_dims_neg(self, shape):
        # 测试方法：expand_dims_neg
        np.expand_dims(self.xarg, axis=-1)
        # 使用 np.expand_dims 函数在 axis=-1 上扩展数组的维度

    def time_squeeze_dims(self, shape):
        # 测试方法：squeeze_dims
        np.squeeze(self.xarg)
        # 使用 np.squeeze 函数去除数组中的单维度

    def time_flip_all(self, shape):
        # 测试方法：flip_all
        np.flip(self.xarg, axis=None)
        # 使用 np.flip 函数反转数组的所有元素

    def time_flip_one(self, shape):
        # 测试方法：flip_one
        np.flip(self.xarg, axis=1)
        # 使用 np.flip 函数反转数组的第二个维度（axis=1）

    def time_flip_neg(self, shape):
        # 测试方法：flip_neg
        np.flip(self.xarg, axis=-1)
        # 使用 np.flip 函数反转数组的最后一个维度（axis=-1）
    # 定义一个方法 `time_moveaxis`，用于执行 `moveaxis` 操作
    def time_moveaxis(self, shape):
        # 使用 NumPy 的 `moveaxis` 函数，将数组 `self.xarg` 的轴从 [0, 1] 调整为 [-1, -2]，但未对 `self.xarg` 进行任何操作
        np.moveaxis(self.xarg, [0, 1], [-1, -2])
    
    # 定义一个方法 `time_roll`，用于执行 `roll` 操作
    def time_roll(self, shape):
        # 使用 NumPy 的 `roll` 函数，将数组 `self.xarg` 向左滚动 3 个位置，但未对 `self.xarg` 进行任何操作
        np.roll(self.xarg, 3)
    
    # 定义一个方法 `time_reshape`，用于执行 `reshape` 操作
    def time_reshape(self, shape):
        # 使用 NumPy 的 `reshape` 函数，将数组 `self.xarg` 重新塑形为 `self.reshaped` 的形状，但未对 `self.xarg` 进行任何操作
        np.reshape(self.xarg, self.reshaped)

`.\numpy\benchmarks\benchmarks\bench_overrides.py`

# 导入Benchmark类，用于性能基准测试
from .common import Benchmark

try:
    # 尝试导入新版Numpy的array_function_dispatch
    from numpy._core.overrides import array_function_dispatch
except ImportError:
    # 如果导入失败，定义一个兼容旧版Numpy的array_function_dispatch函数
    def array_function_dispatch(*args, **kwargs):
        def wrap(*args, **kwargs):
            return None
        return wrap

# 导入Numpy库并使用np作为别名
import numpy as np


def _broadcast_to_dispatcher(array, shape, subok=None):
    return (array,)


# 使用array_function_dispatch装饰器将_broadcast_to_dispatcher注册为mock_broadcast_to的分发器
@array_function_dispatch(_broadcast_to_dispatcher)
def mock_broadcast_to(array, shape, subok=False):
    pass


def _concatenate_dispatcher(arrays, axis=None, out=None):
    if out is not None:
        # 如果提供了输出参数out，则将其追加到arrays列表中
        arrays = list(arrays)
        arrays.append(out)
    return arrays


# 使用array_function_dispatch装饰器将_concatenate_dispatcher注册为mock_concatenate的分发器
@array_function_dispatch(_concatenate_dispatcher)
def mock_concatenate(arrays, axis=0, out=None):
    pass


# 定义一个DuckArray类，用于模拟Numpy数组的行为
class DuckArray:
    def __array_function__(self, func, types, args, kwargs):
        pass


# ArrayFunction类继承自Benchmark类，用于对数组操作的性能进行基准测试
class ArrayFunction(Benchmark):

    def setup(self):
        # 初始化各种类型的数组用于测试
        self.numpy_array = np.array(1)
        self.numpy_arrays = [np.array(1), np.array(2)]
        self.many_arrays = 500 * self.numpy_arrays
        self.duck_array = DuckArray()
        self.duck_arrays = [DuckArray(), DuckArray()]
        self.mixed_arrays = [np.array(1), DuckArray()]

    # 测试mock_broadcast_to函数对Numpy数组的调用性能
    def time_mock_broadcast_to_numpy(self):
        mock_broadcast_to(self.numpy_array, ())

    # 测试mock_broadcast_to函数对DuckArray对象的调用性能
    def time_mock_broadcast_to_duck(self):
        mock_broadcast_to(self.duck_array, ())

    # 测试mock_concatenate函数对Numpy数组列表的调用性能
    def time_mock_concatenate_numpy(self):
        mock_concatenate(self.numpy_arrays, axis=0)

    # 测试mock_concatenate函数对大量Numpy数组列表的调用性能
    def time_mock_concatenate_many(self):
        mock_concatenate(self.many_arrays, axis=0)

    # 测试mock_concatenate函数对DuckArray对象列表的调用性能
    def time_mock_concatenate_duck(self):
        mock_concatenate(self.duck_arrays, axis=0)

    # 测试mock_concatenate函数对混合类型数组列表的调用性能
    def time_mock_concatenate_mixed(self):
        mock_concatenate(self.mixed_arrays, axis=0)

`.\numpy\benchmarks\benchmarks\bench_random.py`

from .common import Benchmark  # 导入 Benchmark 类

import numpy as np  # 导入 NumPy 库

try:
    from numpy.random import Generator  # 尝试导入 Generator 类
except ImportError:
    pass  # 如果导入失败则跳过

class Random(Benchmark):  # 定义 Random 类，继承自 Benchmark 类
    params = ['normal', 'uniform', 'weibull 1', 'binomial 10 0.5',  # 参数列表
              'poisson 10']

    def setup(self, name):  # 设置方法，初始化函数
        items = name.split()  # 将参数名按空格分割成列表
        name = items.pop(0)  # 取出第一个参数作为名称
        params = [float(x) for x in items]  # 将剩余参数转换为浮点数列表

        self.func = getattr(np.random, name)  # 获取 NumPy.random 中的函数对象
        self.params = tuple(params) + ((100, 100),)  # 设置函数调用的参数列表

    def time_rng(self, name):  # 定义时间测量方法
        self.func(*self.params)  # 调用 NumPy.random 中的函数

class Shuffle(Benchmark):  # 定义 Shuffle 类，继承自 Benchmark 类
    def setup(self):  # 设置方法，初始化函数
        self.a = np.arange(100000)  # 创建包含 100000 个元素的数组

    def time_100000(self):  # 定义时间测量方法
        np.random.shuffle(self.a)  # 对数组进行乱序操作

class Randint(Benchmark):  # 定义 Randint 类，继承自 Benchmark 类

    def time_randint_fast(self):  # 快速随机整数生成方法
        """Compare to uint32 below"""  # 进行 uint32 的比较
        np.random.randint(0, 2**30, size=10**5)  # 生成 10^5 个范围在 [0, 2^30) 的随机整数

    def time_randint_slow(self):  # 慢速随机整数生成方法
        """Compare to uint32 below"""  # 进行 uint32 的比较
        np.random.randint(0, 2**30 + 1, size=10**5)  # 生成 10^5 个范围在 [0, 2^30 + 1) 的随机整数

class Randint_dtype(Benchmark):  # 定义 Randint_dtype 类，继承自 Benchmark 类
    high = {  # 设置不同数据类型的最大值字典
        'bool': 1,
        'uint8': 2**7,
        'uint16': 2**15,
        'uint32': 2**31,
        'uint64': 2**63
    }

    param_names = ['dtype']  # 参数名称列表
    params = ['bool', 'uint8', 'uint16', 'uint32', 'uint64']  # 参数值列表

    def setup(self, name):  # 设置方法，初始化函数
        from numpy.lib import NumpyVersion  # 导入 NumpyVersion 类
        if NumpyVersion(np.__version__) < '1.11.0.dev0':  # 检查 NumPy 版本是否符合条件
            raise NotImplementedError  # 如果版本不符合则抛出错误

    def time_randint_fast(self, name):  # 快速随机整数生成方法
        high = self.high[name]  # 获取指定数据类型的最大值
        np.random.randint(0, high, size=10**5, dtype=name)  # 生成 10^5 个指定数据类型的随机整数

    def time_randint_slow(self, name):  # 慢速随机整数生成方法
        high = self.high[name]  # 获取指定数据类型的最大值
        np.random.randint(0, high + 1, size=10**5, dtype=name)  # 生成 10^5 个指定数据类型的随机整数

class Permutation(Benchmark):  # 定义 Permutation 类，继承自 Benchmark 类
    def setup(self):  # 设置方法，初始化函数
        self.n = 10000  # 设置整数 n 的值
        self.a_1d = np.random.random(self.n)  # 生成包含 n 个随机数的一维数组
        self.a_2d = np.random.random((self.n, 2))  # 生成包含 n 行 2 列的随机数二维数组

    def time_permutation_1d(self):  # 测量一维数组乱序操作的时间
        np.random.permutation(self.a_1d)  # 对一维数组进行乱序操作

    def time_permutation_2d(self):  # 测量二维数组乱序操作的时间
        np.random.permutation(self.a_2d)  # 对二维数组进行乱序操作

    def time_permutation_int(self):  # 测量整数乱序操作的时间
        np.random.permutation(self.n)  # 对整数 n 进行乱序操作

nom_size = 100000  # 设置 nom_size 变量的值

class RNG(Benchmark):  # 定义 RNG 类，继承自 Benchmark 类
    param_names = ['rng']  # 参数名称列表
    params = ['PCG64', 'MT19937', 'Philox', 'SFC64', 'numpy']  # 参数值列表

    def setup(self, bitgen):  # 设置方法，初始化函数
        if bitgen == 'numpy':  # 如果参数为 'numpy'
            self.rg = np.random.RandomState()  # 创建 NumPy 随机状态对象
        else:
            self.rg = Generator(getattr(np.random, bitgen)())  # 创建指定类型的 Generator 对象
        self.rg.random()  # 生成随机数
        self.int32info = np.iinfo(np.int32)  # 获取 np.int32 的信息
        self.uint32info = np.iinfo(np.uint32)  # 获取 np.uint32 的信息
        self.uint64info = np.iinfo(np.uint64)  # 获取 np.uint64 的信息

    def time_raw(self, bitgen):  # 测量原始数据生成时间的方法
        if bitgen == 'numpy':  # 如果参数为 'numpy'
            self.rg.random_integers(self.int32info.max, size=nom_size)  # 使用 NumPy.random 中的函数生成随机整数
        else:
            self.rg.integers(self.int32info.max, size=nom_size, endpoint=True)  # 使用 Generator 对象生成随机整数
    # 定义一个方法用于生成32位整数随机数
    def time_32bit(self, bitgen):
        # 从self.uint32info中获取最小值和最大值
        min, max = self.uint32info.min, self.uint32info.max
        # 如果bitgen为'numpy'，使用numpy的randint函数生成指定范围内的随机整数数组，数据类型为np.uint32
        if bitgen == 'numpy':
            self.rg.randint(min, max + 1, nom_size, dtype=np.uint32)
        # 否则使用self.rg的integers方法生成指定范围内的随机整数数组，数据类型为np.uint32
        else:
            self.rg.integers(min, max + 1, nom_size, dtype=np.uint32)

    # 定义一个方法用于生成64位整数随机数
    def time_64bit(self, bitgen):
        # 从self.uint64info中获取最小值和最大值
        min, max = self.uint64info.min, self.uint64info.max
        # 如果bitgen为'numpy'，使用numpy的randint函数生成指定范围内的随机整数数组，数据类型为np.uint64
        if bitgen == 'numpy':
            self.rg.randint(min, max + 1, nom_size, dtype=np.uint64)
        # 否则使用self.rg的integers方法生成指定范围内的随机整数数组，数据类型为np.uint64
        else:
            self.rg.integers(min, max + 1, nom_size, dtype=np.uint64)

    # 定义一个方法用于生成标准正态分布的随机数
    def time_normal_zig(self, bitgen):
        # 使用self.rg的standard_normal方法生成符合标准正态分布的随机数数组，数组大小为nom_size
        self.rg.standard_normal(nom_size)
# 定义一个继承自Benchmark的Bounded类
class Bounded(Benchmark):
    # 定义不同精度的无符号整数类型
    u8 = np.uint8
    u16 = np.uint16
    u32 = np.uint32
    u64 = np.uint64
    # 定义参数名列表
    param_names = ['rng', 'dt_max']
    # 定义参数组合
    params = [['PCG64', 'MT19937', 'Philox', 'SFC64', 'numpy'],
              [[u8,    95],  # 8位最差情况
               [u8,    64],  # 8位遗留最坏情况
               [u8,   127],  # 8位遗留最佳情况
               [u16,   95],  # 16位最差情况
               [u16, 1024],  # 16位遗留最坏情况
               [u16, 1535],  # 16位遗留典型平均情况
               [u16, 2047],  # 16位遗留最佳情况
               [u32, 1024],  # 32位遗留最坏情况
               [u32, 1535],  # 32位遗留典型平均情况
               [u32, 2047],  # 32位遗留最佳情况
               [u64,   95],  # 64位最差情况
               [u64, 1024],  # 64位遗留最坏情况
               [u64, 1535],  # 64位遗留典型平均情况
               [u64, 2047],  # 64位遗留最佳情况
             ]]

    # 初始化设置方法
    def setup(self, bitgen, args):
        # 设置种子值
        seed = 707250673
        # 根据不同的bitgen参数选择不同的随机数生成器
        if bitgen == 'numpy':
            self.rg = np.random.RandomState(seed)
        else:
            self.rg = Generator(getattr(np.random, bitgen)(seed))
        # 生成随机数
        self.rg.random()

    # 定义时间测量方法，用于测量有界值的计时器
    def time_bounded(self, bitgen, args):
            """
            Timer for 8-bit bounded values.

            Parameters (packed as args)
            ----------
            dt : {uint8, uint16, uint32, unit64}
                output dtype
            max : int
                Upper bound for range. Lower is always 0.  Must be <= 2**bits.
            """
            # 解包参数
            dt, max = args
            # 根据不同的bitgen参数选择不同的随机整数生成方法
            if bitgen == 'numpy':
                self.rg.randint(0, max + 1, nom_size, dtype=dt)
            else:
                self.rg.integers(0, max + 1, nom_size, dtype=dt)

# 定义一个继承自Benchmark的Choice类
class Choice(Benchmark):
    # 参数列表
    params = [1e3, 1e6, 1e8]

    # 初始化设置方法
    def setup(self, v):
        # 生成一个长度为v的数组
        self.a = np.arange(v)
        # 使用numpy默认的随机数生成器
        self.rng = np.random.default_rng()

    # 测量遗留选择方法的时间
    def time_legacy_choice(self, v):
        np.random.choice(self.a, 1000, replace=False)

    # 测量选择方法的时间
    def time_choice(self, v):
        self.rng.choice(self.a, 1000, replace=False)

`.\numpy\benchmarks\benchmarks\bench_records.py`

# 导入Benchmark类从common模块中
from .common import Benchmark

# 导入NumPy库并重命名为np
import numpy as np

# Records类继承Benchmark类，用于性能基准测试
class Records(Benchmark):
    
    # 设置方法，初始化测试数据
    def setup(self):
        # 创建一个包含1000个元素的NumPy数组，范围从0到999
        self.l50 = np.arange(1000)
        # 设置字段数量为10000
        self.fields_number = 10000
        # 创建包含self.l50数组的列表，长度为self.fields_number
        self.arrays = [self.l50 for _ in range(self.fields_number)]
        # 创建包含self.l50数组dtype字符串的列表，长度为self.fields_number
        self.formats = [self.l50.dtype.str for _ in range(self.fields_number)]
        # 将self.formats列表中的所有字符串用逗号连接成一个字符串
        self.formats_str = ','.join(self.formats)
        # 创建NumPy结构化数据类型，包含10000个字段，每个字段名为'field_i'（i为0到9999），类型为self.l50.dtype.str
        self.dtype_ = np.dtype(
            [
                ('field_{}'.format(i), self.l50.dtype.str)
                for i in range(self.fields_number)
            ]
        )
        # 将self.l50数组转换为字符串，然后复制self.fields_number次，赋值给self.buffer
        self.buffer = self.l50.tostring() * self.fields_number
    
    # 测试函数，使用指定的dtype创建记录数组
    def time_fromarrays_w_dtype(self):
        np._core.records.fromarrays(self.arrays, dtype=self.dtype_)
    
    # 测试函数，使用默认dtype创建记录数组
    def time_fromarrays_wo_dtype(self):
        np._core.records.fromarrays(self.arrays)
    
    # 测试函数，使用格式列表创建记录数组
    def time_fromarrays_formats_as_list(self):
        np._core.records.fromarrays(self.arrays, formats=self.formats)
    
    # 测试函数，使用格式字符串创建记录数组
    def time_fromarrays_formats_as_string(self):
        np._core.records.fromarrays(self.arrays, formats=self.formats_str)
    
    # 测试函数，使用指定的dtype从字符串创建记录数组
    def time_fromstring_w_dtype(self):
        np._core.records.fromstring(self.buffer, dtype=self.dtype_)
    
    # 测试函数，使用格式列表从字符串创建记录数组
    def time_fromstring_formats_as_list(self):
        np._core.records.fromstring(self.buffer, formats=self.formats)
    
    # 测试函数，使用格式字符串从字符串创建记录数组
    def time_fromstring_formats_as_string(self):
        np._core.records.fromstring(self.buffer, formats=self.formats_str)

`.\numpy\benchmarks\benchmarks\bench_reduce.py`

from .common import Benchmark, TYPES1, get_squares  # 导入所需模块和函数

import numpy as np  # 导入 NumPy 库


class AddReduce(Benchmark):
    def setup(self):
        self.squares = get_squares().values()  # 获取方形矩阵数据集

    def time_axis_0(self):
        [np.add.reduce(a, axis=0) for a in self.squares]
        # 对每个方形矩阵沿着 axis=0 的方向进行 reduce 操作，即按列相加

    def time_axis_1(self):
        [np.add.reduce(a, axis=1) for a in self.squares]
        # 对每个方形矩阵沿着 axis=1 的方向进行 reduce 操作，即按行相加


class AddReduceSeparate(Benchmark):
    params = [[0, 1], TYPES1]  # 参数化设置：axis 可选 0 或 1，typename 取自 TYPES1
    param_names = ['axis', 'type']  # 参数名称

    def setup(self, axis, typename):
        self.a = get_squares()[typename]  # 获取特定类型的方形矩阵数据

    def time_reduce(self, axis, typename):
        np.add.reduce(self.a, axis=axis)
        # 对给定的方形矩阵进行 reduce 操作，根据参数 axis 指定是按列还是按行相加


class AnyAll(Benchmark):
    def setup(self):
        # 初始化全为 0 或全为 1 的数组，用于测试 any 和 all 方法的性能
        self.zeros = np.full(100000, 0, bool)
        self.ones = np.full(100000, 1, bool)

    def time_all_fast(self):
        self.zeros.all()
        # 使用 NumPy 的 all 方法检查数组中的所有元素是否为 True

    def time_all_slow(self):
        self.ones.all()
        # 使用 NumPy 的 all 方法检查数组中的所有元素是否为 True

    def time_any_fast(self):
        self.ones.any()
        # 使用 NumPy 的 any 方法检查数组中是否有任意元素为 True

    def time_any_slow(self):
        self.zeros.any()
        # 使用 NumPy 的 any 方法检查数组中是否有任意元素为 True


class StatsReductions(Benchmark):
    params = ['int64', 'uint64', 'float32', 'float64', 'complex64', 'bool_']  # 参数化设置：不同数据类型
    param_names = ['dtype']  # 参数名称

    def setup(self, dtype):
        self.data = np.ones(200, dtype=dtype)  # 创建指定数据类型的长度为 200 的数组
        if dtype.startswith('complex'):
            self.data = self.data * self.data.T*1j  # 若数据类型为复数，则进行相应初始化操作

    def time_min(self, dtype):
        np.min(self.data)
        # 计算数组中元素的最小值

    def time_max(self, dtype):
        np.max(self.data)
        # 计算数组中元素的最大值

    def time_mean(self, dtype):
        np.mean(self.data)
        # 计算数组中元素的平均值

    def time_std(self, dtype):
        np.std(self.data)
        # 计算数组中元素的标准差

    def time_prod(self, dtype):
        np.prod(self.data)
        # 计算数组中元素的乘积

    def time_var(self, dtype):
        np.var(self.data)
        # 计算数组中元素的方差


class FMinMax(Benchmark):
    params = [np.float32, np.float64]  # 参数化设置：浮点数类型
    param_names = ['dtype']  # 参数名称

    def setup(self, dtype):
        self.d = np.ones(20000, dtype=dtype)  # 创建指定数据类型的长度为 20000 的数组

    def time_min(self, dtype):
        np.fmin.reduce(self.d)
        # 使用 NumPy 的 fmin 方法对数组进行 reduce 操作，计算最小值

    def time_max(self, dtype):
        np.fmax.reduce(self.d)
        # 使用 NumPy 的 fmax 方法对数组进行 reduce 操作，计算最大值


class ArgMax(Benchmark):
    params = [np.int8, np.uint8, np.int16, np.uint16, np.int32, np.uint32,
              np.int64, np.uint64, np.float32, np.float64, bool]  # 参数化设置：不同数据类型
    param_names = ['dtype']  # 参数名称

    def setup(self, dtype):
        self.d = np.zeros(200000, dtype=dtype)  # 创建指定数据类型的长度为 200000 的数组

    def time_argmax(self, dtype):
        np.argmax(self.d)
        # 找出数组中最大元素的索引


class ArgMin(Benchmark):
    params = [np.int8, np.uint8, np.int16, np.uint16, np.int32, np.uint32,
              np.int64, np.uint64, np.float32, np.float64, bool]  # 参数化设置：不同数据类型
    param_names = ['dtype']  # 参数名称

    def setup(self, dtype):
        self.d = np.ones(200000, dtype=dtype)  # 创建指定数据类型的长度为 200000 的数组

    def time_argmin(self, dtype):
        np.argmin(self.d)
        # 找出数组中最小元素的索引


class SmallReduction(Benchmark):
    def setup(self):
        self.d = np.ones(100, dtype=np.float32)  # 创建长度为 100 的单精度浮点数数组

    def time_small(self):
        np.sum(self.d)
        # 计算数组中所有元素的和

`.\numpy\benchmarks\benchmarks\bench_scalar.py`

# 从共享库中导入 Benchmark 类和 TYPES1 常量
from .common import Benchmark, TYPES1

# 导入 numpy 库，并将其命名为 np
import numpy as np

# 定义 ScalarMath 类，继承自 Benchmark 类
class ScalarMath(Benchmark):
    # 测试标量数学运算，每个测试会多次运行以抵消函数调用的开销
    params = [TYPES1]  # 参数列表，包含 TYPES1 常量
    param_names = ["type"]  # 参数名列表，包含 "type"

    # 设置函数，在每次测试前执行
    def setup(self, typename):
        # 使用给定的 typename 创建一个 numpy 数据类型对象，并初始化为 2
        self.num = np.dtype(typename).type(2)
        # 创建一个 np.int32 类型的对象，并初始化为 2
        self.int32 = np.int32(2)
        # 创建一个包含单个元素为 2 的 np.int32 数组
        self.int32arr = np.array(2, dtype=np.int32)

    # 测试函数：加法运算
    def time_addition(self, typename):
        # 将 self.num 赋值给 n
        n = self.num
        # 进行连续的加法操作
        res = n + n + n + n + n + n + n + n + n + n

    # 测试函数：加法运算（包含 Python 整数）
    def time_addition_pyint(self, typename):
        # 将 self.num 赋值给 n
        n = self.num
        # 进行连续的加法操作，其中包含 Python 整数
        res = n + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1

    # 测试函数：乘法运算
    def time_multiplication(self, typename):
        # 将 self.num 赋值给 n
        n = self.num
        # 进行连续的乘法操作
        res = n * n * n * n * n * n * n * n * n * n

    # 测试函数：计算平方数
    def time_power_of_two(self, typename):
        # 将 self.num 赋值给 n
        n = self.num
        # 计算多个 n 的平方数
        res = n**2, n**2, n**2, n**2, n**2, n**2, n**2, n**2, n**2, n**2

    # 测试函数：计算绝对值
    def time_abs(self, typename):
        # 将 self.num 赋值给 n
        n = self.num
        # 进行多次绝对值运算
        res = abs(abs(abs(abs(abs(abs(abs(abs(abs(abs(n))))))))))

    # 测试函数：int32 类型与其他数值相加
    def time_add_int32_other(self, typename):
        # 一些混合情况的测试，有些快，有些慢，这里记录了它们的差异。
        # 当编写时，如果结果类型是输入之一，则速度较快。
        # 将 self.int32 赋值给 int32
        int32 = self.int32
        # 将 self.num 赋值给 other
        other = self.num
        # 执行多次 int32 和 other 的加法运算
        int32 + other
        int32 + other
        int32 + other
        int32 + other
        int32 + other

    # 测试函数：int32arr 数组与其他数值相加
    def time_add_int32arr_and_other(self, typename):
        # `arr + scalar` 会触发正常的 ufunc（数组）路径。
        # 将 self.int32arr 赋值给 int32
        int32 = self.int32arr
        # 将 self.num 赋值给 other
        other = self.num
        # 执行多次 int32arr 和 other 的加法运算
        int32 + other
        int32 + other
        int32 + other
        int32 + other
        int32 + other

    # 测试函数：其他数值与 int32arr 数组相加
    def time_add_other_and_int32arr(self, typename):
        # `scalar + arr` 在某些情况下会触发标量路径，这些路径可以更容易优化
        # 将 self.int32arr 赋值给 int32
        int32 = self.int32arr
        # 将 self.num 赋值给 other
        other = self.num
        # 执行多次 other 和 int32arr 的加法运算
        other + int32
        other + int32
        other + int32
        other + int32
        other + int32


# 定义 ScalarStr 类，继承自 Benchmark 类
class ScalarStr(Benchmark):
    # 测试标量到字符串的转换
    params = [TYPES1]  # 参数列表，包含 TYPES1 常量
    param_names = ["type"]  # 参数名列表，包含 "type"

    # 设置函数，在每次测试前执行
    def setup(self, typename):
        # 创建一个包含 100 个值为 100 的数组，数据类型为 typename
        self.a = np.array([100] * 100, dtype=typename)

    # 测试函数：执行数组元素的字符串表示
    def time_str_repr(self, typename):
        # 对数组 self.a 中的每个元素执行字符串表示操作
        res = [str(x) for x in self.a]

`.\numpy\benchmarks\benchmarks\bench_shape_base.py`

# 导入自定义的Benchmark类，该类用于性能基准测试
from .common import Benchmark

# 导入NumPy库，并将其命名为np，用于处理数组和矩阵运算
import numpy as np


# 定义Block类，继承Benchmark类，用于进行块状数组操作的性能测试
class Block(Benchmark):
    # 参数列表，指定不同的块大小进行性能测试
    params = [1, 10, 100]
    # 参数名称，描述params列表中各个参数的含义
    param_names = ['size']

    # 初始化方法，在每次性能测试前调用，设置测试所需的各种数组和矩阵
    def setup(self, n):
        # 创建一个2*n × 2*n的全1二维数组a_2d
        self.a_2d = np.ones((2 * n, 2 * n))
        # 创建一个长度为2*n的全1一维数组b_1d
        self.b_1d = np.ones(2 * n)
        # 创建一个2*n × 2*n的数组b_2d，每个元素是对应位置a_2d元素的两倍
        self.b_2d = 2 * self.a_2d

        # 创建一个长度为3*n的全1一维数组a
        self.a = np.ones(3 * n)
        # 创建一个长度为3*n的全1一维数组b
        self.b = np.ones(3 * n)

        # 创建一个1*n × 3*n的全1二维数组one_2d
        self.one_2d = np.ones((1 * n, 3 * n))
        # 创建一个1*n × 3*n的全1二维数组two_2d
        self.two_2d = np.ones((1 * n, 3 * n))
        # 创建一个1*n × 6*n的全1二维数组three_2d
        self.three_2d = np.ones((1 * n, 6 * n))
        # 创建一个长度为6*n的全1一维数组four_1d
        self.four_1d = np.ones(6 * n)
        # 创建一个长度为1*n的全1一维数组five_0d
        self.five_0d = np.ones(1 * n)
        # 创建一个长度为5*n的全1一维数组six_1d
        self.six_1d = np.ones(5 * n)
        
        # 避免使用np.zeros懒惰分配可能在基准测试期间导致的页面错误
        # 创建一个2*n × 6*n的全0二维数组zero_2d
        self.zero_2d = np.full((2 * n, 6 * n), 0)

        # 创建一个长度为3*n的全1一维数组one
        self.one = np.ones(3 * n)
        # 创建一个3 × 3*n的全2二维数组two
        self.two = 2 * np.ones((3, 3 * n))
        # 创建一个长度为3*n的全3一维数组three
        self.three = 3 * np.ones(3 * n)
        # 创建一个长度为3*n的全4一维数组four
        self.four = 4 * np.ones(3 * n)
        # 创建一个长度为1*n的全5一维数组five
        self.five = 5 * np.ones(1 * n)
        # 创建一个长度为5*n的全6一维数组six
        self.six = 6 * np.ones(5 * n)
        
        # 避免使用np.zeros懒惰分配可能在基准测试期间导致的页面错误
        # 创建一个2*n × 6*n的全0二维数组zero
        self.zero = np.full((2 * n, 6 * n), 0)

    # 定义简单的行主要方式块操作的性能测试方法
    def time_block_simple_row_wise(self, n):
        np.block([self.a_2d, self.b_2d])

    # 定义简单的列主要方式块操作的性能测试方法
    def time_block_simple_column_wise(self, n):
        np.block([[self.a_2d], [self.b_2d]])

    # 定义复杂块操作的性能测试方法
    def time_block_complicated(self, n):
        np.block([[self.one_2d, self.two_2d],
                  [self.three_2d],
                  [self.four_1d],
                  [self.five_0d, self.six_1d],
                  [self.zero_2d]])

    # 定义嵌套块操作的性能测试方法
    def time_nested(self, n):
        np.block([
            [
                np.block([
                   [self.one],
                   [self.three],
                   [self.four]
                ]),
                self.two
            ],
            [self.five, self.six],
            [self.zero]
        ])

    # 定义不使用列表的块操作性能测试方法
    def time_no_lists(self, n):
        np.block(1)  # 测试np.block函数对标量1的处理
        np.block(np.eye(3 * n))  # 测试np.block函数对3*n维单位矩阵的处理


# 定义Block2D类，继承Benchmark类，用于二维块操作的性能测试
class Block2D(Benchmark):
    # 参数列表，指定不同的形状、数据类型和块数进行性能测试
    params = [[(16, 16), (64, 64), (256, 256), (1024, 1024)],
              ['uint8', 'uint16', 'uint32', 'uint64'],
              [(2, 2), (4, 4)]]
    # 参数名称，描述params列表中各个参数的含义
    param_names = ['shape', 'dtype', 'n_chunks']

    # 初始化方法，在每次性能测试前调用，设置测试所需的块列表
    def setup(self, shape, dtype, n_chunks):
        # 创建一个二维块列表block_list，用于性能测试
        self.block_list = [
             [np.full(shape=[s//n_chunk for s, n_chunk in zip(shape, n_chunks)],
                     fill_value=1, dtype=dtype) for _ in range(n_chunks[1])]
            for _ in range(n_chunks[0])
        ]

    # 定义二维块操作的性能测试方法
    def time_block2d(self, shape, dtype, n_chunks):
        np.block(self.block_list)


# 定义Block3D类，继承Benchmark类，用于三维块操作的性能测试
class Block3D(Benchmark):
    """This benchmark concatenates an array of size ``(5n)^3``"""
    # 参数列表，指定不同的大小和操作模式进行性能测试
    params = [[1, 10, 100],
              ['block', 'copy']]
    param_names = ['n', 'mode']

    def setup(self, n, mode):
        # Slow setup method: hence separated from the others above
        # 初始化一个 3D 的数组，每个维度长度为 2*n，元素值为 1
        self.a000 = np.ones((2 * n, 2 * n, 2 * n), int) * 1

        # 初始化一个 3D 的数组，每个维度长度为 3*n，除了元素值为 2 外其余元素为 1
        self.a100 = np.ones((3 * n, 2 * n, 2 * n), int) * 2
        # 初始化一个 3D 的数组，每个维度长度为 2*n，除了元素值为 3 外其余元素为 1
        self.a010 = np.ones((2 * n, 3 * n, 2 * n), int) * 3
        # 初始化一个 3D 的数组，每个维度长度为 2*n，除了元素值为 4 外其余元素为 1
        self.a001 = np.ones((2 * n, 2 * n, 3 * n), int) * 4

        # 初始化一个 3D 的数组，每个维度长度为 2*n 或 3*n，部分元素值为 5，其余为 1
        self.a011 = np.ones((2 * n, 3 * n, 3 * n), int) * 5
        self.a101 = np.ones((3 * n, 2 * n, 3 * n), int) * 6
        self.a110 = np.ones((3 * n, 3 * n, 2 * n), int) * 7

        # 初始化一个 3D 的数组，每个维度长度为 3*n，元素值为 8
        self.a111 = np.ones((3 * n, 3 * n, 3 * n), int) * 8

        # 构建一个包含两个 2x2x2 维度的块状数组
        self.block = [
            [
                [self.a000, self.a001],
                [self.a010, self.a011],
            ],
            [
                [self.a100, self.a101],
                [self.a110, self.a111],
            ]
        ]
        # 将 self.block 中的所有数组放入一个列表中
        self.arr_list = [a
                         for two_d in self.block
                         for one_d in two_d
                         for a in one_d]

    def time_3d(self, n, mode):
        # 根据 mode 参数选择执行不同的操作
        if mode == 'block':
            # 如果 mode 是 'block'，调用 numpy 的块状数组函数 np.block
            np.block(self.block)
        else:  # mode == 'copy'
            # 如果 mode 是 'copy'，对 self.arr_list 中的所有数组执行复制操作
            [arr.copy() for arr in self.arr_list]

    # 为了向后兼容，保留旧的基准测试名称
    time_3d.benchmark_name = "bench_shape_base.Block.time_3d"
# 继承自Benchmark类，用于评估Kronecker乘积的性能
class Kron(Benchmark):
    """Benchmarks for Kronecker product of two arrays"""

    # 初始化方法，设置大数组、大矩阵和标量
    def setup(self):
        self.large_arr = np.random.random((10,) * 4)  # 创建一个形状为(10, 10, 10, 10)的随机数组
        self.large_mat = np.asmatrix(np.random.random((100, 100)))  # 创建一个形状为(100, 100)的随机矩阵
        self.scalar = 7  # 设置一个标量值为7

    # 评估大数组的Kronecker乘积的性能
    def time_arr_kron(self):
        np.kron(self.large_arr, self.large_arr)

    # 评估大数组与标量的Kronecker乘积的性能
    def time_scalar_kron(self):
        np.kron(self.large_arr, self.scalar)

    # 评估大矩阵的Kronecker乘积的性能
    def time_mat_kron(self):
        np.kron(self.large_mat, self.large_mat)

# 继承自Benchmark类，用于评估np.atleast_1d函数的性能
class AtLeast1D(Benchmark):
    """Benchmarks for np.atleast_1d"""

    # 初始化方法，设置数组和零维浮点数
    def setup(self):
        self.x = np.array([1, 2, 3])  # 创建一个形状为(3,)的数组
        self.zero_d = np.float64(1.)  # 创建一个零维浮点数

    # 评估np.atleast_1d函数将多个数组至少转换为一维数组的性能
    def time_atleast_1d(self):
        np.atleast_1d(self.x, self.x, self.x)

    # 评估np.atleast_1d函数将多个零维对象至少转换为一维数组的性能
    def time_atleast_1d_reshape(self):
        np.atleast_1d(self.zero_d, self.zero_d, self.zero_d)

    # 评估np.atleast_1d函数将单个数组至少转换为一维数组的性能
    def time_atleast_1d_single_argument(self):
        np.atleast_1d(self.x)

`.\numpy\benchmarks\benchmarks\bench_strings.py`

# 导入Benchmark类，用于性能基准测试
from .common import Benchmark

# 导入numpy库，并将其命名为np，用于数值计算
import numpy as np

# 导入operator模块，用于快速访问比较运算符的函数
import operator

# 定义一个字典_OPERATORS，包含了常见比较运算符和对应的函数映射关系
_OPERATORS = {
    '==': operator.eq,
    '!=': operator.ne,
    '<': operator.lt,
    '<=': operator.le,
    '>': operator.gt,
    '>=': operator.ge,
}

# 定义一个继承自Benchmark的类StringComparisons，用于进行字符串比较的性能测试
class StringComparisons(Benchmark):
    # 定义params参数，包含了多个不同的测试参数组合
    params = [
        [100, 10000, (1000, 20)],  # 形状参数
        ['U', 'S'],                # 数据类型参数
        [True, False],             # 连续性参数
        ['==', '!=', '<', '<=', '>', '>=']  # 操作符参数
    ]
    # 定义param_names参数，指定了各个params参数的名称
    param_names = ['shape', 'dtype', 'contig', 'operator']

    # 定义一个int64属性，表示np.int64类型的数据类型
    int64 = np.dtype(np.int64)

    # setup方法用于初始化测试所需的数据和状态
    def setup(self, shape, dtype, contig, operator):
        # 创建一个按照给定形状和数据类型的数组arr
        self.arr = np.arange(np.prod(shape)).astype(dtype).reshape(shape)
        # 创建一个与arr相同的数组arr_identical
        self.arr_identical = self.arr.copy()
        # 创建一个与arr相反顺序的数组arr_different
        self.arr_different = self.arr[::-1].copy()

        # 如果contig为False，对数组进行间隔取值操作
        if not contig:
            self.arr = self.arr[..., ::2]
            self.arr_identical = self.arr_identical[..., ::2]
            self.arr_different = self.arr_different[..., ::2]

        # 根据给定的operator参数，选择对应的比较函数并保存到self.operator属性中
        self.operator = _OPERATORS[operator]

    # time_compare_identical方法用于测试相同数据情况下的比较性能
    def time_compare_identical(self, shape, dtype, contig, operator):
        self.operator(self.arr, self.arr_identical)

    # time_compare_different方法用于测试不同数据情况下的比较性能
    def time_compare_different(self, shape, dtype, contig, operator):
        self.operator(self.arr, self.arr_different)

`.\numpy\benchmarks\benchmarks\bench_trim_zeros.py`

# 从.common模块中导入Benchmark类，用于性能基准测试
from .common import Benchmark

# 导入NumPy库，并定义几种特定的数据类型
import numpy as np

# 定义全局变量，表示不同数据类型的NumPy数据类型对象
_FLOAT = np.dtype('float64')
_COMPLEX = np.dtype('complex128')
_INT = np.dtype('int64')
_BOOL = np.dtype('bool')

# 定义一个继承自Benchmark类的TrimZeros类
class TrimZeros(Benchmark):
    # 参数名列表，用于性能测试参数化
    param_names = ["dtype", "size"]
    # 参数列表，包含数据类型和数组大小的组合
    params = [
        [_INT, _FLOAT, _COMPLEX, _BOOL],  # 数据类型
        [3000, 30_000, 300_000]           # 数组大小
    ]

    # 设置方法，在每个性能测试之前调用，初始化数组
    def setup(self, dtype, size):
        # 计算数组的长度为总大小的三分之一
        n = size // 3
        # 创建一个由三部分组成的NumPy数组：前后各有一部分零元素，中间部分为随机均匀分布的元素
        self.array = np.hstack([
            np.zeros(n),                      # 前部分零元素
            np.random.uniform(size=n),        # 中间部分随机均匀分布的元素
            np.zeros(n),                      # 后部分零元素
        ]).astype(dtype)                      # 转换成指定的数据类型

    # 性能测试方法，测试np.trim_zeros函数的性能
    def time_trim_zeros(self, dtype, size):
        np.trim_zeros(self.array)  # 调用np.trim_zeros函数处理初始化的数组

`.\numpy\benchmarks\benchmarks\bench_ufunc.py`

# 导入从 common 模块中的 Benchmark、get_squares_、TYPES1 和 DLPACK_TYPES
# 注意：'.' 表示当前目录

from .common import Benchmark, get_squares_, TYPES1, DLPACK_TYPES

# 导入 numpy 库，并将其命名为 np
import numpy as np

# 导入 itertools 库，用于生成迭代器的函数
import itertools

# 导入 version 模块，用于处理版本号的类和函数
from packaging import version

# 导入 operator 模块，用于函数操作符的函数
import operator

# 定义一个包含数学函数名的列表
ufuncs = ['abs', 'absolute', 'add', 'arccos', 'arccosh', 'arcsin', 'arcsinh',
          'arctan', 'arctan2', 'arctanh', 'bitwise_and', 'bitwise_count', 'bitwise_not',
          'bitwise_or', 'bitwise_xor', 'cbrt', 'ceil', 'conj', 'conjugate',
          'copysign', 'cos', 'cosh', 'deg2rad', 'degrees', 'divide', 'divmod',
          'equal', 'exp', 'exp2', 'expm1', 'fabs', 'float_power', 'floor',
          'floor_divide', 'fmax', 'fmin', 'fmod', 'frexp', 'gcd', 'greater',
          'greater_equal', 'heaviside', 'hypot', 'invert', 'isfinite',
          'isinf', 'isnan', 'isnat', 'lcm', 'ldexp', 'left_shift', 'less',
          'less_equal', 'log', 'log10', 'log1p', 'log2', 'logaddexp',
          'logaddexp2', 'logical_and', 'logical_not', 'logical_or',
          'logical_xor', 'matmul', 'maximum', 'minimum', 'mod', 'modf',
          'multiply', 'negative', 'nextafter', 'not_equal', 'positive',
          'power', 'rad2deg', 'radians', 'reciprocal', 'remainder',
          'right_shift', 'rint', 'sign', 'signbit', 'sin',
          'sinh', 'spacing', 'sqrt', 'square', 'subtract', 'tan', 'tanh',
          'true_divide', 'trunc']
          
# 定义一个包含数组函数名的列表
arrayfuncdisp = ['real', 'round']

# 遍历 numpy 模块中的所有属性名
for name in dir(np):
    # 检查属性是否是 numpy 的通用函数（ufunc）并且不在 ufuncs 列表中
    if isinstance(getattr(np, name, None), np.ufunc) and name not in ufuncs:
        # 如果找到未包含的通用函数，则打印警告信息
        print("Missing ufunc %r" % (name,))

# 定义 ArrayFunctionDispatcher 类，继承自 Benchmark 类
class ArrayFunctionDispatcher(Benchmark):
    # 参数化设定为 arrayfuncdisp 列表
    params = [arrayfuncdisp]
    # 参数名称为 'func'
    param_names = ['func']
    # 设置超时时间为 10 秒
    timeout = 10
    
    # 初始化方法，设置 ufuncname 属性
    def setup(self, ufuncname):
        # 忽略所有的运行时错误
        np.seterr(all='ignore')
        try:
            # 获取 numpy 模块中的指定名称的函数对象
            self.afdn = getattr(np, ufuncname)
        except AttributeError:
            # 如果未找到指定名称的函数，则抛出未实现错误
            raise NotImplementedError()
        # 初始化 self.args 列表为空
        self.args = []
        # 遍历 get_squares_ 函数返回的字典中的每对键值对
        for _, aarg in get_squares_().items():
            # 构造参数列表，将 aarg 重复一次作为参数
            arg = (aarg,) * 1  # no nin
            try:
                # 调用 self.afdn 函数，并将 arg 作为参数传入
                self.afdn(*arg)
            except TypeError:
                # 如果调用时遇到类型错误，则跳过该参数
                continue
            # 将合法参数 arg 添加到 self.args 列表中
            self.args.append(arg)

    # 定义 time_afdn_types 方法，用于执行 self.afdn 函数的性能测试
    def time_afdn_types(self, ufuncname):
        # 遍历 self.args 列表，对每个参数列表调用 self.afdn 函数
        [self.afdn(*arg) for arg in self.args]

# 定义 Broadcast 类，继承自 Benchmark 类
class Broadcast(Benchmark):
    # 初始化方法，创建两个全为 1 的数组对象
    def setup(self):
        self.d = np.ones((50000, 100), dtype=np.float64)
        self.e = np.ones((100,), dtype=np.float64)

    # 定义 time_broadcast 方法，用于执行数组广播操作的性能测试
    def time_broadcast(self):
        # 执行数组广播操作 self.d - self.e
        self.d - self.e

# 定义 At 类，继承自 Benchmark 类
class At(Benchmark):
    # 初始化方法，使用随机数生成器创建大数组和索引数组
    def setup(self):
        rng = np.random.default_rng(1)
        self.vals = rng.random(10_000_000, dtype=np.float64)
        self.idx = rng.integers(1000, size=10_000_000).astype(np.intp)
        self.res = np.zeros(1000, dtype=self.vals.dtype)

    # 定义 time_sum_at 方法，用于执行 np.add.at 函数的性能测试
    def time_sum_at(self):
        # 对 self.res 执行 np.add.at 操作，将 self.vals 按照 self.idx 进行求和
        np.add.at(self.res, self.idx, self.vals)

    # 定义 time_maximum_at 方法，用于执行 np.maximum.at 函数的性能测试
    def time_maximum_at(self):
        # 对 self.res 执行 np.maximum.at 操作，将 self.vals 按照 self.idx 进行最大值计算
        np.maximum.at(self.res, self.idx, self.vals)

# 定义 UFunc 类，继承自 Benchmark 类
class UFunc(Benchmark):
    # 参数化设定为 ufuncs 列表
    params = [ufuncs]
    # 参数名称为 'ufunc'
    param_names = ['ufunc']
    # 设置超时时间为 10 秒
    timeout = 10
    # 设置函数，用于初始化和配置对象
    def setup(self, ufuncname):
        # 忽略所有的 NumPy 错误
        np.seterr(all='ignore')
        # 尝试从 NumPy 模块中获取指定名称的函数对象
        try:
            self.ufn = getattr(np, ufuncname)
        # 如果指定的函数名称不存在于 NumPy 中，则抛出未实现错误
        except AttributeError:
            raise NotImplementedError()
        # 初始化参数列表
        self.args = []
        # 遍历通过 get_squares_() 函数获取的项目字典中的每一个键值对
        for _, aarg in get_squares_().items():
            # 根据函数的输入参数个数，复制参数，创建参数元组
            arg = (aarg,) * self.ufn.nin
            # 尝试调用函数 self.ufn，如果参数不匹配则捕获 TypeError 异常并继续
            try:
                self.ufn(*arg)
            except TypeError:
                continue
            # 将有效参数添加到参数列表中
            self.args.append(arg)

    # 测试函数执行时间的方法
    def time_ufunc_types(self, ufuncname):
        # 对 self.args 列表中的每个参数元组调用 self.ufn 函数，记录执行时间
        [self.ufn(*arg) for arg in self.args]
class MethodsV0(Benchmark):
    """ Benchmark for the methods which do not take any arguments
    """
    params = [['__abs__', '__neg__', '__pos__'], TYPES1]  # 参数列表，包括方法名称和数据类型
    param_names = ['methods', 'npdtypes']  # 参数名称列表
    timeout = 10  # 设置超时时间为10秒

    def setup(self, methname, npdtypes):
        values = get_squares_()  # 调用获取平方数的函数，返回一个字典
        self.xarg = values.get(npdtypes)[0]  # 获取指定数据类型的平方数列表中的第一个值作为参数

    def time_ndarray_meth(self, methname, npdtypes):
        getattr(operator, methname)(self.xarg)  # 调用 operator 模块的指定方法，作用于 self.xarg


class NDArrayLRShifts(Benchmark):
    """ Benchmark for the shift methods
    """
    params = [['__lshift__', '__rshift__'],  # 参数列表，包括位移方法名称
              ['intp', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64']]  # 数据类型列表
    param_names = ['methods', 'npdtypes']  # 参数名称列表
    timeout = 10  # 设置超时时间为10秒

    def setup(self, methname, npdtypes):
        self.vals = np.ones(1000, dtype=getattr(np, npdtypes)) * \  # 创建一个包含1000个元素的数组，数据类型为指定类型
                    np.random.randint(9)  # 数组元素为随机整数乘以1

    def time_ndarray_meth(self, methname, npdtypes):
        getattr(operator, methname)(*[self.vals, 2])  # 调用 operator 模块的指定位移方法，作用于 self.vals 和 2


class Methods0DBoolComplex(Benchmark):
    """Zero dimension array methods
    """
    params = [['__bool__', '__complex__'],  # 参数列表，包括布尔和复数方法名称
              TYPES1]  # 数据类型列表
    param_names = ['methods', 'npdtypes']  # 参数名称列表
    timeout = 10  # 设置超时时间为10秒

    def setup(self, methname, npdtypes):
        self.xarg = np.array(3, dtype=npdtypes)  # 创建一个零维数组，数据类型为指定类型

    def time_ndarray__0d__(self, methname, npdtypes):
        meth = getattr(self.xarg, methname)  # 获取零维数组的指定方法对象
        meth()  # 调用该方法


class Methods0DFloatInt(Benchmark):
    """Zero dimension array methods
    """
    params = [['__int__', '__float__'],  # 参数列表，包括整数和浮点数方法名称
              [dt for dt in TYPES1 if not dt.startswith('complex')]]  # 数据类型列表，排除复数类型
    param_names = ['methods', 'npdtypes']  # 参数名称列表
    timeout = 10  # 设置超时时间为10秒

    def setup(self, methname, npdtypes):
        self.xarg = np.array(3, dtype=npdtypes)  # 创建一个零维数组，数据类型为指定类型

    def time_ndarray__0d__(self, methname, npdtypes):
        meth = getattr(self.xarg, methname)  # 获取零维数组的指定方法对象
        meth()  # 调用该方法


class Methods0DInvert(Benchmark):
    """Zero dimension array methods
    """
    params = ['int16', 'int32', 'int64']  # 数据类型列表
    param_names = ['npdtypes']  # 参数名称列表
    timeout = 10  # 设置超时时间为10秒

    def setup(self, npdtypes):
        self.xarg = np.array(3, dtype=npdtypes)  # 创建一个零维数组，数据类型为指定类型

    def time_ndarray__0d__(self, npdtypes):
        self.xarg.__invert__()  # 调用数组的按位取反方法


class MethodsV1(Benchmark):
    """ Benchmark for the methods which take an argument
    """
    params = [['__add__', '__eq__', '__ge__', '__gt__', '__le__',  # 参数列表，包括需要传入参数的方法名称
               '__lt__', '__matmul__', '__mul__', '__ne__', '__pow__',
               '__sub__', '__truediv__'],
              TYPES1]  # 数据类型列表
    param_names = ['methods', 'npdtypes']  # 参数名称列表
    timeout = 10  # 设置超时时间为10秒

    def setup(self, methname, npdtypes):
        values = get_squares_().get(npdtypes)  # 调用获取平方数的函数，返回一个字典
        self.xargs = [values[0], values[1]]  # 获取平方数列表中的前两个值作为参数
        if np.issubdtype(npdtypes, np.inexact):
            # 对于低精度数据类型，避免在 __pow__/__matmul__ 中溢出
            self.xargs[1] *= 0.01  # 如果数据类型是浮点数，将第二个参数乘以0.01，避免溢出

    def time_ndarray_meth(self, methname, npdtypes):
        getattr(operator, methname)(*self.xargs)  # 调用 operator 模块的指定方法，作用于 self.xargs
class MethodsV1IntOnly(Benchmark):
    """ Benchmark for the methods which take an argument
    """
    # 参数化：方法名和数据类型
    params = [['__and__', '__or__', '__xor__'],
              ['int16', 'int32', 'int64']]
    # 参数名称：方法名和数据类型
    param_names = ['methods', 'npdtypes']
    # 超时时间：10秒
    timeout = 10

    def setup(self, methname, npdtypes):
        # 获取指定数据类型的平方值
        values = get_squares_().get(npdtypes)
        # 设置方法参数列表
        self.xargs = [values[0], values[1]]

    def time_ndarray_meth(self, methname, npdtypes):
        # 调用 operator 模块中的指定方法
        getattr(operator, methname)(*self.xargs)


class MethodsV1NoComplex(Benchmark):
    """ Benchmark for the methods which take an argument
    """
    # 参数化：方法名和不包含复数类型的数据类型
    params = [['__floordiv__', '__mod__'],
              [dt for dt in TYPES1 if not dt.startswith('complex')]]
    # 参数名称：方法名和数据类型
    param_names = ['methods', 'npdtypes']
    # 超时时间：10秒
    timeout = 10

    def setup(self, methname, npdtypes):
        # 获取指定数据类型的平方值
        values = get_squares_().get(npdtypes)
        # 设置方法参数列表
        self.xargs = [values[0], values[1]]

    def time_ndarray_meth(self, methname, npdtypes):
        # 调用 operator 模块中的指定方法
        getattr(operator, methname)(*self.xargs)


class NDArrayGetItem(Benchmark):
    param_names = ['margs', 'msize']
    # 参数化：访问方式和数据大小
    params = [[0, (0, 0), (-1, 0), [0, -1]],
              ['small', 'big']]

    def setup(self, margs, msize):
        # 创建小或大尺寸的随机数组
        self.xs = np.random.uniform(-1, 1, 6).reshape(2, 3)
        self.xl = np.random.uniform(-1, 1, 50*50).reshape(50, 50)

    def time_methods_getitem(self, margs, msize):
        # 根据数据大小选择数组，并调用其 __getitem__ 方法
        if msize == 'small':
            mdat = self.xs
        elif msize == 'big':
            mdat = self.xl
        getattr(mdat, '__getitem__')(margs)


class NDArraySetItem(Benchmark):
    param_names = ['margs', 'msize']
    # 参数化：修改方式和数据大小
    params = [[0, (0, 0), (-1, 0), [0, -1]],
              ['small', 'big']]

    def setup(self, margs, msize):
        # 创建小或大尺寸的随机数组
        self.xs = np.random.uniform(-1, 1, 6).reshape(2, 3)
        self.xl = np.random.uniform(-1, 1, 100*100).reshape(100, 100)

    def time_methods_setitem(self, margs, msize):
        # 根据数据大小选择数组，并修改其指定位置的值
        if msize == 'small':
            mdat = self.xs
        elif msize == 'big':
            mdat = self.xl
            mdat[margs] = 17


class DLPMethods(Benchmark):
    """ Benchmark for DLPACK helpers
    """
    # 参数化：DLPACK 相关方法和数据类型
    params = [['__dlpack__', '__dlpack_device__'], DLPACK_TYPES]
    # 参数名称：方法名和数据类型
    param_names = ['methods', 'npdtypes']
    # 超时时间：10秒
    timeout = 10

    def setup(self, methname, npdtypes):
        # 获取指定数据类型的平方值
        values = get_squares_()
        # 根据数据类型选择相应的值作为参数
        if npdtypes == 'bool':
            if version.parse(np.__version__) > version.parse("1.25"):
                self.xarg = values.get('int16')[0].astype('bool')
            else:
                raise NotImplementedError("Not supported before v1.25")
        else:
            self.xarg = values.get('int16')[0]

    def time_ndarray_dlp(self, methname, npdtypes):
        # 调用指定对象的 DLPACK 相关方法
        meth = getattr(self.xarg, methname)
        meth()


class NDArrayAsType(Benchmark):
    """ Benchmark for type conversion
    """
    # 参数化：数据类型组合
    params = [list(itertools.combinations(TYPES1, 2))]
    # 参数名称：类型转换
    param_names = ['typeconv']
    # 超时时间：10秒
    timeout = 10
    # 设置测试的初始化方法，接受一个类型转换器参数 typeconv
    def setup(self, typeconv):
        # 检查 typeconv 的第一个元素是否等于第二个元素，如果相等抛出未实现错误
        if typeconv[0] == typeconv[1]:
            raise NotImplementedError(
                    "Skipping test for converting to the same dtype")
        # 获取 typeconv[0] 对应的 get_squares_() 函数的结果，并将其赋值给 self.xarg
        self.xarg = get_squares_().get(typeconv[0])

    # 用于测试类型转换的方法，接受一个类型转换器参数 typeconv
    def time_astype(self, typeconv):
        # 调用 self.xarg 的 astype 方法，将其转换为 typeconv 的第二个元素指定的数据类型
        self.xarg.astype(typeconv[1])
# 创建一个继承自Benchmark的类，用于对小数组和标量上的一些ufunc进行基准测试
class UFuncSmall(Benchmark):
    """Benchmark for a selection of ufuncs on a small arrays and scalars
    
    Since the arrays and scalars are small, we are benchmarking the overhead
    of the numpy ufunc functionality
    """
    
    # 参数列表，包含需要测试的ufunc名称
    params = ['abs', 'sqrt', 'cos']
    
    # 参数名称，只有一个参数名为'ufunc'
    param_names = ['ufunc']
    
    # 设置超时时间为10秒
    timeout = 10

    # 初始化函数，在每个测试之前被调用，设置函数对象self.f为对应的numpy ufunc对象
    def setup(self, ufuncname):
        np.seterr(all='ignore')
        try:
            self.f = getattr(np, ufuncname)
        except AttributeError:
            raise NotImplementedError()
        
        # 创建不同类型的测试数组和标量
        self.array_5 = np.array([1., 2., 10., 3., 4.])
        self.array_int_3 = np.array([1, 2, 3])
        self.float64 = np.float64(1.1)
        self.python_float = 1.1

    # 测试用例，测试对小数组应用ufunc的运行时间
    def time_ufunc_small_array(self, ufuncname):
        self.f(self.array_5)

    # 测试用例，测试对小数组应用ufunc并将结果保存到原数组中的运行时间
    def time_ufunc_small_array_inplace(self, ufuncname):
        self.f(self.array_5, out=self.array_5)

    # 测试用例，测试对小整数数组应用ufunc的运行时间
    def time_ufunc_small_int_array(self, ufuncname):
        self.f(self.array_int_3)

    # 测试用例，测试对numpy标量应用ufunc的运行时间
    def time_ufunc_numpy_scalar(self, ufuncname):
        self.f(self.float64)

    # 测试用例，测试对Python标量应用ufunc的运行时间
    def time_ufunc_python_float(self, ufuncname):
        self.f(self.python_float)


# 自定义的基准测试类
class Custom(Benchmark):
    # 初始化函数，在每个测试之前被调用，创建两种不同大小的布尔类型数组
    def setup(self):
        self.b = np.ones(20000, dtype=bool)
        self.b_small = np.ones(3, dtype=bool)

    # 测试用例，测试对大布尔数组应用np.nonzero()的运行时间
    def time_nonzero(self):
        np.nonzero(self.b)

    # 测试用例，测试对大布尔数组应用逻辑非操作的运行时间
    def time_not_bool(self):
        (~self.b)

    # 测试用例，测试对大布尔数组应用逻辑与操作的运行时间
    def time_and_bool(self):
        (self.b & self.b)

    # 测试用例，测试对大布尔数组应用逻辑或操作的运行时间
    def time_or_bool(self):
        (self.b | self.b)

    # 测试用例，测试对小布尔数组应用逻辑与操作的运行时间
    def time_and_bool_small(self):
        (self.b_small & self.b_small)


# 自定义的基准测试类
class CustomInplace(Benchmark):
    # 初始化函数，在每个测试之前被调用，创建不同大小和类型的数组，并对某些数组进行操作
    def setup(self):
        self.c = np.ones(500000, dtype=np.int8)
        self.i = np.ones(150000, dtype=np.int32)
        self.f = np.zeros(150000, dtype=np.float32)
        self.d = np.zeros(75000, dtype=np.float64)
        # 对某些数组进行赋值操作，可能为提高性能
        self.f *= 1.
        self.d *= 1.

    # 测试用例，测试对大int8数组应用按位或操作的运行时间
    def time_char_or(self):
        np.bitwise_or(self.c, 0, out=self.c)
        np.bitwise_or(0, self.c, out=self.c)

    # 测试用例，测试对大int8数组应用按位或操作的运行时间（使用临时对象）
    def time_char_or_temp(self):
        0 | self.c | 0

    # 测试用例，测试对大int32数组应用按位或操作的运行时间
    def time_int_or(self):
        np.bitwise_or(self.i, 0, out=self.i)
        np.bitwise_or(0, self.i, out=self.i)

    # 测试用例，测试对大int32数组应用按位或操作的运行时间（使用临时对象）
    def time_int_or_temp(self):
        0 | self.i | 0

    # 测试用例，测试对大float32数组应用加法操作的运行时间
    def time_float_add(self):
        np.add(self.f, 1., out=self.f)
        np.add(1., self.f, out=self.f)

    # 测试用例，测试对大float32数组应用加法操作的运行时间（使用临时对象）
    def time_float_add_temp(self):
        1. + self.f + 1.

    # 测试用例，测试对大float64数组应用加法操作的运行时间
    def time_double_add(self):
        np.add(self.d, 1., out=self.d)
        np.add(1., self.d, out=self.d)

    # 测试用例，测试对大float64数组应用加法操作的运行时间（使用临时对象）
    def time_double_add_temp(self):
        1. + self.d + 1.


# 自定义的基准测试类
class CustomScalar(Benchmark):
    # 参数列表，包含测试的数据类型
    params = [np.float32, np.float64]
    
    # 参数名称，只有一个参数名为'dtype'
    param_names = ['dtype']

    # 初始化函数，在每个测试之前被调用，创建不同数据类型的数组
    def setup(self, dtype):
        self.d = np.ones(20000, dtype=dtype)

    # 测试用例，测试对数组应用加法操作的运行时间
    def time_add_scalar2(self, dtype):
        np.add(self.d, 1)

    # 测试用例，测试对数组应用除法操作的运行时间
    def time_divide_scalar2(self, dtype):
        np.divide(self.d, 1)

    # 测试用例，测试对数组应用除法操作并将结果保存到原数组中的运行时间
    def time_divide_scalar2_inplace(self, dtype):
        np.divide(self.d, 1, out=self.d)


class CustomComparison(Benchmark):
    # 定义参数列表，包括多种数据类型，如 int8, int16, 等等
    params = (np.int8,  np.int16,  np.int32,  np.int64, np.uint8, np.uint16,
              np.uint32, np.uint64, np.float32, np.float64, np.bool)
    # 参数名称列表，这里只有一个参数 'dtype'
    param_names = ['dtype']

    # 设置函数，初始化三个属性 x, y, s，均为指定数据类型的全1数组
    def setup(self, dtype):
        self.x = np.ones(50000, dtype=dtype)
        self.y = np.ones(50000, dtype=dtype)
        self.s = np.ones(1, dtype=dtype)

    # 测试函数：比较 self.x 和 self.y 数组中元素的大小关系
    def time_less_than_binary(self, dtype):
        (self.x < self.y)

    # 测试函数：比较 self.s（标量）和 self.x 数组中元素的大小关系
    def time_less_than_scalar1(self, dtype):
        (self.s < self.x)

    # 测试函数：比较 self.x 数组中元素和 self.s（标量）的大小关系
    def time_less_than_scalar2(self, dtype):
        (self.x < self.s)
# 定义一个继承自 Benchmark 的自定义类 CustomScalarFloorDivideInt，用于执行整数类型的除法运算基准测试
class CustomScalarFloorDivideInt(Benchmark):
    # 定义参数，包括整数类型的所有种类和除数列表
    params = (np._core.sctypes['int'],
              [8, -8, 43, -43])
    # 参数名称，分别为数据类型和除数
    param_names = ['dtype', 'divisors']

    # 设置函数，在每次测试前准备数据
    def setup(self, dtype, divisor):
        # 获取指定数据类型的信息
        iinfo = np.iinfo(dtype)
        # 生成指定范围内随机整数数组，作为除数
        self.x = np.random.randint(
                    iinfo.min, iinfo.max, size=10000, dtype=dtype)

    # 定义执行整数除法的基准测试函数
    def time_floor_divide_int(self, dtype, divisor):
        # 执行整数除法操作
        self.x // divisor


# 定义一个继承自 Benchmark 的自定义类 CustomScalarFloorDivideUInt，用于执行无符号整数类型的除法运算基准测试
class CustomScalarFloorDivideUInt(Benchmark):
    # 定义参数，包括无符号整数类型的种类和除数列表
    params = (np._core.sctypes['uint'],
              [8, 43])
    # 参数名称，分别为数据类型和除数
    param_names = ['dtype', 'divisors']

    # 设置函数，在每次测试前准备数据
    def setup(self, dtype, divisor):
        # 获取指定数据类型的信息
        iinfo = np.iinfo(dtype)
        # 生成指定范围内随机无符号整数数组，作为除数
        self.x = np.random.randint(
                    iinfo.min, iinfo.max, size=10000, dtype=dtype)

    # 定义执行无符号整数除法的基准测试函数
    def time_floor_divide_uint(self, dtype, divisor):
        # 执行无符号整数除法操作
        self.x // divisor


# 定义一个继承自 Benchmark 的自定义类 CustomArrayFloorDivideInt，用于执行数组类型的整数除法运算基准测试
class CustomArrayFloorDivideInt(Benchmark):
    # 定义参数，包括整数和无符号整数类型的种类以及不同尺寸的数组大小
    params = (np._core.sctypes['int'] + np._core.sctypes['uint'],
              [100, 10000, 1000000])
    # 参数名称，分别为数据类型和数组大小
    param_names = ['dtype', 'size']

    # 设置函数，在每次测试前准备数据
    def setup(self, dtype, size):
        # 获取指定数据类型的信息
        iinfo = np.iinfo(dtype)
        # 生成指定范围内随机整数数组和随机整数数组（作为除数）
        self.x = np.random.randint(
                    iinfo.min, iinfo.max, size=size, dtype=dtype)
        self.y = np.random.randint(2, 32, size=size, dtype=dtype)

    # 定义执行数组整数除法的基准测试函数
    def time_floor_divide_int(self, dtype, size):
        # 执行数组整数除法操作
        self.x // self.y


# 定义一个继承自 Benchmark 的自定义类 Scalar，用于执行标量操作的基准测试
class Scalar(Benchmark):
    # 设置函数，在每次测试前准备数据
    def setup(self):
        # 初始化标量数组和复数
        self.x = np.asarray(1.0)
        self.y = np.asarray((1.0 + 1j))
        self.z = complex(1.0, 1.0)

    # 定义执行标量加法操作的基准测试函数
    def time_add_scalar(self):
        # 执行标量加法操作
        (self.x + self.x)

    # 定义执行标量加法操作（类型转换为标量）的基准测试函数
    def time_add_scalar_conv(self):
        # 执行标量加法操作（类型转换为标量）
        (self.x + 1.0)

    # 定义执行复数与标量加法操作的基准测试函数
    def time_add_scalar_conv_complex(self):
        # 执行复数与标量加法操作
        (self.y + self.z)


# 定义一个类 ArgPack，用于封装函数参数和关键字参数
class ArgPack:
    __slots__ = ['args', 'kwargs']

    # 初始化函数，接收任意数量的参数和关键字参数
    def __init__(self, *args, **kwargs):
        self.args = args
        self.kwargs = kwargs

    # 返回对象的字符串表示
    def __repr__(self):
        # 格式化输出参数和关键字参数
        return '({})'.format(', '.join(
            [repr(a) for a in self.args] +
            ['{}={}'.format(k, repr(v)) for k, v in self.kwargs.items()]
        ))


# 定义一个继承自 Benchmark 的自定义类 ArgParsing，用于执行参数解析速度的基准测试
class ArgParsing(Benchmark):
    # 设置测试参数，包括各种参数和关键字参数的组合
    x = np.array(1.)
    y = np.array(2.)
    out = np.array(3.)
    param_names = ['arg_kwarg']
    params = [[
        ArgPack(x, y),
        ArgPack(x, y, out),
        ArgPack(x, y, out=out),
        ArgPack(x, y, out=(out,)),
        ArgPack(x, y, out=out, subok=True, where=True),
        ArgPack(x, y, subok=True),
        ArgPack(x, y, subok=True, where=True),
        ArgPack(x, y, out, subok=True, where=True)
    ]]

    # 定义执行参数解析速度测试的基准测试函数
    def time_add_arg_parsing(self, arg_pack):
        # 执行参数解析速度测试
        np.add(*arg_pack.args, **arg_pack.kwargs)


# 定义一个继承自 Benchmark 的自定义类 ArgParsingReduce，用于执行参数解析速度的基准测试
class ArgParsingReduce(Benchmark):
    # 设置测试参数，包括各种参数和关键字参数的组合
    # 创建包含浮点数 0 和 1 的 NumPy 数组，范围是 [0, 1)
    a = np.arange(2.)
    # 创建一个包含单个浮点数 0 的 NumPy 数组
    out = np.array(0.)
    # 参数名列表，包含一个元素 'arg_kwarg'
    param_names = ['arg_kwarg']
    # 参数列表，包含一个元素，每个元素都是一个列表，内部包含 ArgPack 对象
    params = [[
        # 使用 ArgPack 类创建对象，传入数组 a 作为参数
        ArgPack(a,),
        # 使用 ArgPack 类创建对象，传入数组 a 和整数 0 作为参数
        ArgPack(a, 0),
        # 使用 ArgPack 类创建对象，传入数组 a 和轴数 0 作为参数
        ArgPack(a, axis=0),
        # 使用 ArgPack 类创建对象，传入数组 a、整数 0 和 None 作为参数
        ArgPack(a, 0, None),
        # 使用 ArgPack 类创建对象，传入数组 a、轴数 0 和 None 作为参数
        ArgPack(a, axis=0, dtype=None),
        # 使用 ArgPack 类创建对象，传入数组 a、整数 0、None 和 out 数组作为参数
        ArgPack(a, 0, None, out),
        # 使用 ArgPack 类创建对象，传入数组 a、轴数 0、None、dtype=None 和 out 数组作为参数
        ArgPack(a, axis=0, dtype=None, out=out),
        # 使用 ArgPack 类创建对象，传入数组 a 和 out 数组作为参数
        ArgPack(a, out=out)
    ]]

    # 定义一个方法 time_add_reduce_arg_parsing，接收参数 arg_pack
    def time_add_reduce_arg_parsing(self, arg_pack):
        # 调用 NumPy 的 add.reduce 方法，传入 arg_pack 的 args 和 kwargs 作为参数
        np.add.reduce(*arg_pack.args, **arg_pack.kwargs)
# 定义一个继承自 Benchmark 类的二进制操作性能基准类 BinaryBench
class BinaryBench(Benchmark):
    # 参数列表，包含两种数据类型 np.float32 和 np.float64
    params = [np.float32, np.float64]
    # 参数名称列表，对应参数列表中的数据类型
    param_names = ['dtype']

    # 初始化方法，在每个测试函数执行前调用，根据指定的数据类型生成随机数组
    def setup(self, dtype):
        # 数组长度设定为 1000000
        N = 1000000
        # 生成随机浮点数数组 self.a 和 self.b，并将它们转换为指定的数据类型 dtype
        self.a = np.random.rand(N).astype(dtype)
        self.b = np.random.rand(N).astype(dtype)

    # 计算 np.power(self.a, self.b) 的执行时间的测试函数
    def time_pow(self, dtype):
        np.power(self.a, self.b)

    # 计算 np.power(self.a, 2.0) 的执行时间的测试函数
    def time_pow_2(self, dtype):
        np.power(self.a, 2.0)

    # 计算 np.power(self.a, 0.5) 的执行时间的测试函数
    def time_pow_half(self, dtype):
        np.power(self.a, 0.5)

    # 计算 np.arctan2(self.a, self.b) 的执行时间的测试函数
    def time_atan2(self, dtype):
        np.arctan2(self.a, self.b)

# 定义一个继承自 Benchmark 类的整数操作性能基准类 BinaryBenchInteger
class BinaryBenchInteger(Benchmark):
    # 参数列表，包含两种数据类型 np.int32 和 np.int64
    params = [np.int32, np.int64]
    # 参数名称列表，对应参数列表中的数据类型
    param_names = ['dtype']

    # 初始化方法，在每个测试函数执行前调用，根据指定的数据类型生成随机整数数组
    def setup(self, dtype):
        # 数组长度设定为 1000000
        N = 1000000
        # 生成指定范围内的随机整数数组 self.a 和 self.b，并将它们转换为指定的数据类型 dtype
        self.a = np.random.randint(20, size=N).astype(dtype)
        self.b = np.random.randint(4, size=N).astype(dtype)

    # 计算 np.power(self.a, self.b) 的执行时间的测试函数
    def time_pow(self, dtype):
        np.power(self.a, self.b)

    # 计算 np.power(self.a, 2) 的执行时间的测试函数
    def time_pow_two(self, dtype):
        np.power(self.a, 2)

    # 计算 np.power(self.a, 5) 的执行时间的测试函数
    def time_pow_five(self, dtype):
        np.power(self.a, 5)

NumPy-源码解析-一-

NumPy 源码解析（一）

Contributing to numpy

Reporting issues

Contributing code

A note on feature enhancements/API changes

.\numpy\.spin\cmds.py

.\numpy\benchmarks\asv_pip_nopep517.py

.\numpy\benchmarks\benchmarks\bench_app.py

.\numpy\benchmarks\benchmarks\bench_array_coercion.py

.\numpy\benchmarks\benchmarks\bench_clip.py

.\numpy\benchmarks\benchmarks\bench_core.py

.\numpy\benchmarks\benchmarks\bench_creation.py

.\numpy\benchmarks\benchmarks\bench_function_base.py

.\numpy\benchmarks\benchmarks\bench_import.py

.\numpy\benchmarks\benchmarks\bench_indexing.py

.\numpy\benchmarks\benchmarks\bench_io.py

.\numpy\benchmarks\benchmarks\bench_itemselection.py

.\numpy\benchmarks\benchmarks\bench_lib.py

.\numpy\benchmarks\benchmarks\bench_linalg.py

.\numpy\benchmarks\benchmarks\bench_ma.py

.\numpy\benchmarks\benchmarks\bench_manipulate.py

.\numpy\benchmarks\benchmarks\bench_overrides.py

.\numpy\benchmarks\benchmarks\bench_random.py

.\numpy\benchmarks\benchmarks\bench_records.py

.\numpy\benchmarks\benchmarks\bench_reduce.py

.\numpy\benchmarks\benchmarks\bench_scalar.py

.\numpy\benchmarks\benchmarks\bench_shape_base.py

.\numpy\benchmarks\benchmarks\bench_strings.py

.\numpy\benchmarks\benchmarks\bench_trim_zeros.py

.\numpy\benchmarks\benchmarks\bench_ufunc.py