NumPy 源码解析(三)
.\numpy\numpy\distutils\ccompiler_opt.py
import atexit
import inspect
import os
import pprint
import re
import subprocess
import textwrap
class _Config:
"""An abstract class holds all configurable attributes of `CCompilerOpt`,
these class attributes can be used to change the default behavior
of `CCompilerOpt` in order to fit other requirements.
Attributes
----------
conf_nocache : bool
Set True to disable memory and file cache.
Default is False.
conf_noopt : bool
Set True to forces the optimization to be disabled,
in this case `CCompilerOpt` tends to generate all
expected headers in order to 'not' break the build.
Default is False.
conf_cache_factors : list
Add extra factors to the primary caching factors. The caching factors
are utilized to determine if there are changes had happened that
requires to discard the cache and re-updating it. The primary factors
are the arguments of `CCompilerOpt` and `CCompiler`'s properties(type, flags, etc).
Default is list of two items, containing the time of last modification
of `ccompiler_opt` and value of attribute "conf_noopt"
conf_tmp_path : str,
The path of temporary directory. Default is auto-created
temporary directory via ``tempfile.mkdtemp()``.
conf_check_path : str
The path of testing files. Each added CPU feature must have a
**C** source file contains at least one intrinsic or instruction that
related to this feature, so it can be tested against the compiler.
Default is ``./distutils/checks``.
conf_target_groups : dict
Extra tokens that can be reached from dispatch-able sources through
the special mark ``@targets``. Default is an empty dictionary.
**Notes**:
- case-insensitive for tokens and group names
- sign '#' must stick in the begin of group name and only within ``@targets``
**Example**:
.. code-block:: console
$ "@targets #avx_group other_tokens" > group_inside.c
>>> CCompilerOpt.conf_target_groups["avx_group"] = \\
"$werror $maxopt avx2 avx512f avx512_skx"
>>> cco = CCompilerOpt(cc_instance)
>>> cco.try_dispatch(["group_inside.c"])
conf_c_prefix : str
The prefix of public C definitions. Default is ``"NPY_"``.
"""
conf_c_prefix_ : str
The prefix of internal C definitions. Default is ``"NPY__"``.
conf_cc_flags : dict
Nested dictionaries defining several compiler flags
that linked to some major functions, the main key
represent the compiler name and sub-keys represent
flags names. Default is already covers all supported
**C** compilers.
"native": str or None
used by argument option `native`, to detect the current
machine support via the compiler.
"werror": str or None
utilized to treat warning as errors during testing CPU features
against the compiler and also for target's policy `$werror`
via dispatch-able sources.
"maxopt": str or None
utilized for target's policy '$maxopt' and the value should
contains the maximum acceptable optimization by the compiler.
e.g. in gcc ``'-O3'``
**Notes**:
* case-sensitive for compiler names and flags
* use space to separate multiple flags
* any flag will tested against the compiler and it will skipped
if it's not applicable.
# 定义用于参数选项 `'min'` 的 CPU 特性字典,键表示 CPU 架构名称,例如 `'x86'`。
# 默认值在广泛的用户平台上提供最佳支持。
# 注意:架构名称区分大小写。
conf_min_features : dict
A dictionary defines the used CPU features for
argument option ``'min'``, the key represent the CPU architecture
name e.g. ``'x86'``. Default values provide the best effort
on wide range of users platforms.
**Note**: case-sensitive for architecture names.
"""
# 禁用缓存的配置选项,默认为 False。
conf_nocache = False
# 禁用优化的配置选项,默认为 False。
conf_noopt = False
# 缓存因子的配置选项,默认为 None。
conf_cache_factors = None
# 临时路径的配置选项,默认为 None。
conf_tmp_path = None
# 检查路径的配置选项,设置为当前文件的 "checks" 子目录。
conf_check_path = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "checks"
)
# 目标组的配置选项,初始化为空字典。
conf_target_groups = {}
# C 前缀的配置选项,默认为 'NPY_'。
conf_c_prefix = 'NPY_'
# 重新定义 C 前缀的配置选项,此处为 'NPY__'。
conf_c_prefix_ = 'NPY__'
conf_cc_flags = dict(
gcc = dict(
# 对于 arm 和 ppc64 平台,native 应该总是失败,
# native 通常只在 x86 平台有效
native = '-march=native',
opt = '-O3',
werror = '-Werror',
),
clang = dict(
native = '-march=native',
opt = "-O3",
# 为了保证测试过程的健全性,Clang 需要适用以下某一个标志,
# 然而,在某些情况下,由于“未使用的参数”警告,-Werror 在可用性测试中被跳过。
# 参见 https://github.com/numpy/numpy/issues/19624
werror = '-Werror=switch -Werror',
),
icc = dict(
native = '-xHost',
opt = '-O3',
werror = '-Werror',
),
iccw = dict(
native = '/QxHost',
opt = '/O3',
werror = '/Werror',
),
msvc = dict(
native = None,
opt = '/O2',
werror = '/WX',
),
fcc = dict(
native = '-mcpu=a64fx',
opt = None,
werror = None,
)
)
conf_min_features = dict(
x86 = "SSE SSE2",
x64 = "SSE SSE2 SSE3",
ppc64 = '', # 保守起见
ppc64le = "VSX VSX2",
s390x = '',
armhf = '', # 保守起见
aarch64 = "NEON NEON_FP16 NEON_VFPV4 ASIMD"
)
def __init__(self):
# 如果临时路径为 None,则创建一个临时目录,并在程序退出时自动删除
if self.conf_tmp_path is None:
import shutil
import tempfile
tmp = tempfile.mkdtemp()
def rm_temp():
try:
shutil.rmtree(tmp)
except OSError:
pass
atexit.register(rm_temp)
self.conf_tmp_path = tmp
# 如果缓存因子为 None,则设置默认的缓存因子列表
if self.conf_cache_factors is None:
self.conf_cache_factors = [
os.path.getmtime(__file__),
self.conf_nocache
]
class _Distutils:
"""A helper class that provides a collection of fundamental methods
implemented on top of Python and NumPy Distutils.
The idea behind this class is to gather all methods that may
need to be overridden in case 'CCompilerOpt' is reused in an environment
different from what NumPy has.
Parameters
----------
ccompiler : `CCompiler`
The generated instance returned from `distutils.ccompiler.new_compiler()`.
"""
def __init__(self, ccompiler):
# 初始化方法,将传入的编译器实例保存在属性中
self._ccompiler = ccompiler
def dist_compile(self, sources, flags, ccompiler=None, **kwargs):
"""Wrap CCompiler.compile()"""
# 断言参数类型为列表
assert(isinstance(sources, list))
assert(isinstance(flags, list))
# 将额外的编译标志合并到 flags 中
flags = kwargs.pop("extra_postargs", []) + flags
# 如果未提供 ccompiler,则使用初始化时传入的编译器实例
if not ccompiler:
ccompiler = self._ccompiler
# 调用编译器实例的 compile 方法进行编译
return ccompiler.compile(sources, extra_postargs=flags, **kwargs)
def dist_test(self, source, flags, macros=[]):
"""Return True if 'CCompiler.compile()' is able to compile
a source file with certain flags.
"""
# 断言参数类型为字符串
assert(isinstance(source, str))
# 导入 CompileError 类
from distutils.errors import CompileError
# 获取当前保存的编译器实例
cc = self._ccompiler;
# 备份原始的 spawn 方法
bk_spawn = getattr(cc, 'spawn', None)
# 根据编译器类型设置不同的 spawn 方法
if bk_spawn:
cc_type = getattr(self._ccompiler, "compiler_type", "")
if cc_type in ("msvc",):
setattr(cc, 'spawn', self._dist_test_spawn_paths)
else:
setattr(cc, 'spawn', self._dist_test_spawn)
# 默认测试结果为 False
test = False
try:
# 调用 dist_compile 方法尝试编译源文件
self.dist_compile(
[source], flags, macros=macros, output_dir=self.conf_tmp_path
)
# 如果成功编译,则设置测试结果为 True
test = True
except CompileError as e:
# 捕获编译异常,记录错误信息
self.dist_log(str(e), stderr=True)
# 恢复原始的 spawn 方法
if bk_spawn:
setattr(cc, 'spawn', bk_spawn)
# 返回测试结果
return test
def dist_info(self):
"""
Return a tuple containing info about (platform, compiler, extra_args),
required by the abstract class '_CCompiler' for discovering the
platform environment. This is also used as a cache factor in order
to detect any changes happening from outside.
"""
# 如果已经计算过并缓存了信息,则直接返回缓存的结果
if hasattr(self, "_dist_info"):
return self._dist_info
# 获取当前编译器类型
cc_type = getattr(self._ccompiler, "compiler_type", '')
# 根据编译器类型确定平台
if cc_type in ("intelem", "intelemw"):
platform = "x86_64"
elif cc_type in ("intel", "intelw", "intele"):
platform = "x86"
else:
# 如果是 Unix 系统,通过 distutils 获取平台信息
from distutils.util import get_platform
platform = get_platform()
# 获取编译器信息
cc_info = getattr(self._ccompiler, "compiler", getattr(self._ccompiler, "compiler_so", ''))
# 如果编译器类型为空或者是 Unix 系统,则处理编译器信息
if not cc_type or cc_type == "unix":
if hasattr(cc_info, "__iter__"):
compiler = cc_info[0]
else:
compiler = str(cc_info)
else:
compiler = cc_type
# 获取额外的编译参数
if hasattr(cc_info, "__iter__") and len(cc_info) > 1:
extra_args = ' '.join(cc_info[1:])
else:
extra_args = os.environ.get("CFLAGS", "")
extra_args += os.environ.get("CPPFLAGS", "")
# 缓存计算结果并返回
self._dist_info = (platform, compiler, extra_args)
return self._dist_info
@staticmethod
def dist_error(*args):
"""Raise a compiler error"""
# 抛出编译错误异常
from distutils.errors import CompileError
raise CompileError(_Distutils._dist_str(*args))
@staticmethod
def dist_fatal(*args):
"""Raise a distutils error"""
# 抛出 distutils 错误异常
from distutils.errors import DistutilsError
raise DistutilsError(_Distutils._dist_str(*args))
@staticmethod
def dist_log(*args, stderr=False):
"""Print a console message"""
# 打印控制台消息,根据 stderr 参数选择打印级别
from numpy.distutils import log
out = _Distutils._dist_str(*args)
if stderr:
log.warn(out)
else:
log.info(out)
@staticmethod
def dist_load_module(name, path):
"""Load a module from file, required by the abstract class '_Cache'."""
# 从文件加载模块,用于抽象类 '_Cache' 所需
from .misc_util import exec_mod_from_location
try:
return exec_mod_from_location(name, path)
except Exception as e:
# 记录加载模块时出现的异常
_Distutils.dist_log(e, stderr=True)
return None
@staticmethod
def _dist_str(*args):
"""Return a string to print by log and errors."""
# 生成用于日志和错误打印的字符串
def to_str(arg):
if not isinstance(arg, str) and hasattr(arg, '__iter__'):
ret = []
for a in arg:
ret.append(to_str(a))
return '('+ ' '.join(ret) + ')'
return str(arg)
# 获取调用栈信息,生成打印起始信息
stack = inspect.stack()[2]
start = "CCompilerOpt.%s[%d] : " % (stack.function, stack.lineno)
# 将所有参数转换为字符串并拼接
out = ' '.join([
to_str(a)
for a in (*args,)
])
return start + out
def _dist_test_spawn_paths(self, cmd, display=None):
"""
Fix msvc SDK ENV path same as distutils do
without it we get c1: fatal error C1356: unable to find mspdbcore.dll
"""
# 检查 self._ccompiler 是否有 "_paths" 属性,若没有则调用 self._dist_test_spawn(cmd) 并返回
if not hasattr(self._ccompiler, "_paths"):
self._dist_test_spawn(cmd)
return
# 保存当前环境变量中的 PATH 到 old_path
old_path = os.getenv("path")
try:
# 设置环境变量中的 PATH 为 self._ccompiler._paths
os.environ["path"] = self._ccompiler._paths
# 调用 self._dist_test_spawn(cmd) 执行命令
self._dist_test_spawn(cmd)
finally:
# 恢复原来的环境变量中的 PATH
os.environ["path"] = old_path
_dist_warn_regex = re.compile(
# 编译警告正则表达式,匹配 Intel 和 MSVC 编译器的警告信息
".*("
"warning D9002|" # MSVC,应该适用于任何语言。
"invalid argument for option" # Intel
").*"
)
@staticmethod
def _dist_test_spawn(cmd, display=None):
try:
# 执行命令 cmd,捕获输出到 o,将 stderr 合并到 stdout
o = subprocess.check_output(cmd, stderr=subprocess.STDOUT,
text=True)
# 如果输出 o 存在,并且匹配 _Distutils._dist_warn_regex 中定义的警告模式
if o and re.match(_Distutils._dist_warn_regex, o):
# 调用 _Distutils.dist_error 输出错误信息,指示编译器不支持命令中的标志
_Distutils.dist_error(
"Flags in command", cmd ,"aren't supported by the compiler"
", output -> \n%s" % o
)
except subprocess.CalledProcessError as exc:
# 捕获 subprocess 执行命令时的异常,将输出和返回码保存到 o 和 s
o = exc.output
s = exc.returncode
except OSError as e:
# 捕获 OS 错误,将错误信息保存到 o,返回码设置为 127
o = e
s = 127
else:
# 没有异常发生时返回 None
return None
# 调用 _Distutils.dist_error 输出命令执行失败的错误信息,包括返回码和输出内容
_Distutils.dist_error(
"Command", cmd, "failed with exit status %d output -> \n%s" % (
s, o
))
# 共享缓存,用于存储所有实例对象的缓存数据
_share_cache = {}
# 缓存类,处理缓存功能,提供内存和文件两级缓存
class _Cache:
"""An abstract class handles caching functionality, provides two
levels of caching, in-memory by share instances attributes among
each other and by store attributes into files.
**Note**:
any attributes that start with ``_`` or ``conf_`` will be ignored.
Parameters
----------
cache_path : str or None
The path of cache file, if None then cache in file will disabled.
*factors :
The caching factors that need to utilize next to `conf_cache_factors`.
Attributes
----------
cache_private : set
Hold the attributes that need be skipped from "in-memory cache".
cache_infile : bool
Utilized during initializing this class, to determine if the cache was able
to loaded from the specified cache path in 'cache_path'.
"""
# 正则表达式,用于忽略不需要缓存的属性
_cache_ignore = re.compile("^(_|conf_)")
def __init__(self, cache_path=None, *factors):
# 初始化内存缓存字典
self.cache_me = {}
# 初始化需要跳过的私有缓存集合
self.cache_private = set()
# 初始化文件缓存加载状态
self.cache_infile = False
# 缓存文件路径
self._cache_path = None
# 如果禁用了缓存,则记录日志并返回
if self.conf_nocache:
self.dist_log("cache is disabled by `Config`")
return
# 计算缓存哈希值
self._cache_hash = self.cache_hash(*factors, *self.conf_cache_factors)
self._cache_path = cache_path
# 如果指定了缓存文件路径
if cache_path:
# 如果缓存文件存在
if os.path.exists(cache_path):
self.dist_log("load cache from file ->", cache_path)
# 加载缓存文件作为模块
cache_mod = self.dist_load_module("cache", cache_path)
if not cache_mod:
self.dist_log(
"unable to load the cache file as a module",
stderr=True
)
# 如果缓存模块缺少必要的属性
elif not hasattr(cache_mod, "hash") or \
not hasattr(cache_mod, "data"):
self.dist_log("invalid cache file", stderr=True)
# 如果缓存哈希匹配成功
elif self._cache_hash == cache_mod.hash:
self.dist_log("hit the file cache")
# 将缓存模块的数据项设置为当前实例的属性
for attr, val in cache_mod.data.items():
setattr(self, attr, val)
# 标记文件缓存已命中
self.cache_infile = True
else:
self.dist_log("miss the file cache")
# 如果文件缓存未命中,则尝试从共享缓存中获取
if not self.cache_infile:
other_cache = _share_cache.get(self._cache_hash)
if other_cache:
self.dist_log("hit the memory cache")
# 将共享缓存对象的属性设置为当前实例的属性
for attr, val in other_cache.__dict__.items():
if attr in other_cache.cache_private or \
re.match(self._cache_ignore, attr):
continue
setattr(self, attr, val)
# 将当前实例加入共享缓存
_share_cache[self._cache_hash] = self
# 注册析构函数,在程序退出时刷新缓存
atexit.register(self.cache_flush)
def __del__(self):
# 在实例销毁时,从共享缓存中移除当前实例
for h, o in _share_cache.items():
if o == self:
_share_cache.pop(h)
break
def cache_flush(self):
"""
Force update the cache.
"""
# 如果缓存路径为空,则直接返回,不执行更新操作
if not self._cache_path:
return
# 输出缓存写入路径日志信息
self.dist_log("write cache to path ->", self._cache_path)
# 复制对象的字典表示,以准备写入缓存
cdict = self.__dict__.copy()
# 遍历对象字典的所有键
for attr in self.__dict__.keys():
# 如果属性名匹配缓存忽略规则,则从复制的字典中删除该属性
if re.match(self._cache_ignore, attr):
cdict.pop(attr)
# 获取缓存文件的父目录路径,如果不存在则创建
d = os.path.dirname(self._cache_path)
if not os.path.exists(d):
os.makedirs(d)
# 将复制后的字典转换为紧凑格式的字符串表示
repr_dict = pprint.pformat(cdict, compact=True)
# 打开缓存文件,写入缓存头部信息和数据
with open(self._cache_path, "w") as f:
f.write(textwrap.dedent("""\
hash = {}
data = \\
""").format(self._cache_hash))
f.write(repr_dict)
def cache_hash(self, *factors):
# 计算给定因子的散列值
chash = 0
for f in factors:
for char in str(f):
chash = ord(char) + (chash << 6) + (chash << 16) - chash
chash &= 0xFFFFFFFF
return chash
@staticmethod
def me(cb):
"""
A static method that can be treated as a decorator to
dynamically cache certain methods.
"""
def cache_wrap_me(self, *args, **kwargs):
# 生成用于缓存查找的唯一键
cache_key = str((
cb.__name__, *args, *kwargs.keys(), *kwargs.values()
))
# 如果键已存在于缓存中,则直接返回缓存值
if cache_key in self.cache_me:
return self.cache_me[cache_key]
# 否则,调用原始方法生成结果,并存储在缓存中
ccb = cb(self, *args, **kwargs)
self.cache_me[cache_key] = ccb
return ccb
return cache_wrap_me
class _CCompiler:
"""A helper class for `CCompilerOpt` containing all utilities that
related to the fundamental compiler's functions.
Attributes
----------
cc_on_x86 : bool
True when the target architecture is 32-bit x86
cc_on_x64 : bool
True when the target architecture is 64-bit x86
cc_on_ppc64 : bool
True when the target architecture is 64-bit big-endian powerpc
cc_on_ppc64le : bool
True when the target architecture is 64-bit little-endian powerpc
cc_on_s390x : bool
True when the target architecture is IBM/ZARCH on linux
cc_on_armhf : bool
True when the target architecture is 32-bit ARMv7+
cc_on_aarch64 : bool
True when the target architecture is 64-bit Armv8-a+
cc_on_noarch : bool
True when the target architecture is unknown or not supported
cc_is_gcc : bool
True if the compiler is GNU or
if the compiler is unknown
cc_is_clang : bool
True if the compiler is Clang
cc_is_icc : bool
True if the compiler is Intel compiler (unix like)
cc_is_iccw : bool
True if the compiler is Intel compiler (msvc like)
cc_is_nocc : bool
True if the compiler isn't supported directly,
Note: that cause a fail-back to gcc
cc_has_debug : bool
True if the compiler has debug flags
cc_has_native : bool
True if the compiler has native flags
cc_noopt : bool
True if the compiler has definition 'DISABLE_OPT*',
or 'cc_on_noarch' is True
cc_march : str
The target architecture name, or "unknown" if
the architecture isn't supported
cc_name : str
The compiler name, or "unknown" if the compiler isn't supported
cc_flags : dict
Dictionary containing the initialized flags of `_Config.conf_cc_flags`
"""
@_Cache.me
def cc_test_flags(self, flags):
"""
Returns True if the compiler supports 'flags'.
"""
assert(isinstance(flags, list)) # 断言,确保flags是一个列表
self.dist_log("testing flags", flags) # 记录测试的标志
test_path = os.path.join(self.conf_check_path, "test_flags.c") # 构建测试文件路径
test = self.dist_test(test_path, flags) # 进行测试
if not test:
self.dist_log("testing failed", stderr=True) # 如果测试失败,则记录错误日志
return test # 返回测试结果
@_Cache.me
def cc_test_cexpr(self, cexpr, flags=[]):
"""
Same as the above but supports compile-time expressions.
"""
self.dist_log("testing compiler expression", cexpr) # 记录测试的编译器表达式
test_path = os.path.join(self.conf_tmp_path, "npy_dist_test_cexpr.c") # 构建测试文件路径
with open(test_path, "w") as fd:
fd.write(textwrap.dedent(f"""\
int dummy;
""")) # 写入测试文件内容,用于测试编译器表达式
test = self.dist_test(test_path, flags) # 进行测试
if not test:
self.dist_log("testing failed", stderr=True) # 如果测试失败,则记录错误日志
return test # 返回测试结果
# 定义一个方法用于规范化编译器标志,处理由收集到的隐含特性标志所引起的冲突。
def cc_normalize_flags(self, flags):
"""
Remove the conflicts that caused due gathering implied features flags.
Parameters
----------
'flags' list, compiler flags
flags should be sorted from the lowest to the highest interest.
Returns
-------
list, filtered from any conflicts.
Examples
--------
>>> self.cc_normalize_flags(['-march=armv8.2-a+fp16', '-march=armv8.2-a+dotprod'])
['armv8.2-a+fp16+dotprod']
>>> self.cc_normalize_flags(
['-msse', '-msse2', '-msse3', '-mssse3', '-msse4.1', '-msse4.2', '-mavx', '-march=core-avx2']
)
['-march=core-avx2']
"""
# 断言确保 flags 是一个列表
assert(isinstance(flags, list))
# 如果是 GCC、Clang 或 ICC 编译器,调用 Unix 系统下的标志规范化方法
if self.cc_is_gcc or self.cc_is_clang or self.cc_is_icc:
return self._cc_normalize_unix(flags)
# 如果是 MSVC 或 ICCW 编译器,调用 Windows 系统下的标志规范化方法
if self.cc_is_msvc or self.cc_is_iccw:
return self._cc_normalize_win(flags)
# 如果以上条件都不符合,则直接返回原始的 flags
return flags
# Unix 系统下的正则表达式模式,用于匹配和处理不同类型的编译器标志
_cc_normalize_unix_mrgx = re.compile(
# 匹配以 -mcpu=、-march= 或 -x[A-Z0-9\-] 开头的标志
r"^(-mcpu=|-march=|-x[A-Z0-9\-])"
)
# Unix 系统下的正则表达式模式,用于排除特定类型的编译器标志
_cc_normalize_unix_frgx = re.compile(
# 匹配不以 -mcpu=、-march=、-x[A-Z0-9\-] 或 -m[a-z0-9\-\.]*.$ 开头的标志,并排除 -mzvector
r"^(?!(-mcpu=|-march=|-x[A-Z0-9\-]|-m[a-z0-9\-\.]*.$))|"
r"(?:-mzvector)"
)
# Unix 系统下的正则表达式模式,用于保留 -mfpu 和 -mtune 类型的编译器标志
_cc_normalize_unix_krgx = re.compile(
r"^(-mfpu|-mtune)"
)
# 匹配版本号中的数字和小数点,用于提取和处理架构版本信息
_cc_normalize_arch_ver = re.compile(
r"[0-9.]"
)
def _cc_normalize_unix(self, flags):
def ver_flags(f):
# 解析版本相关的标志
# -march=armv8.2-a+fp16fml
tokens = f.split('+')
# 提取架构版本号并转换为浮点数
ver = float('0' + ''.join(
re.findall(self._cc_normalize_arch_ver, tokens[0])
))
return ver, tokens[0], tokens[1:]
if len(flags) <= 1:
return flags
# 获取最高匹配的标志
for i, cur_flag in enumerate(reversed(flags)):
if not re.match(self._cc_normalize_unix_mrgx, cur_flag):
continue
lower_flags = flags[:-(i+1)]
upper_flags = flags[-i:]
# 过滤掉与 _cc_normalize_unix_frgx 不匹配的标志
filtered = list(filter(
self._cc_normalize_unix_frgx.search, lower_flags
))
# 获取版本号、架构和子标志
ver, arch, subflags = ver_flags(cur_flag)
if ver > 0 and len(subflags) > 0:
for xflag in lower_flags:
xver, _, xsubflags = ver_flags(xflag)
if ver == xver:
subflags = xsubflags + subflags
cur_flag = arch + '+' + '+'.join(subflags)
flags = filtered + [cur_flag]
if i > 0:
flags += upper_flags
break
# 移除可以被覆盖的标志
final_flags = []
matched = set()
for f in reversed(flags):
match = re.match(self._cc_normalize_unix_krgx, f)
if not match:
pass
elif match[0] in matched:
continue
else:
matched.add(match[0])
final_flags.insert(0, f)
return final_flags
_cc_normalize_win_frgx = re.compile(
r"^(?!(/arch\:|/Qx\:))"
)
_cc_normalize_win_mrgx = re.compile(
r"^(/arch|/Qx:)"
)
def _cc_normalize_win(self, flags):
for i, f in enumerate(reversed(flags)):
if not re.match(self._cc_normalize_win_mrgx, f):
continue
i += 1
# 返回匹配 _cc_normalize_win_frgx 的标志
return list(filter(
self._cc_normalize_win_frgx.search, flags[:-i]
)) + flags[-i:]
return flags
# 定义一个辅助类`_Feature`,用于管理 CPU 功能
class _Feature:
"""A helper class for `CCompilerOpt` that managing CPU features.
Attributes
----------
feature_supported : dict
Dictionary containing all CPU features that supported
by the platform, according to the specified values in attribute
`_Config.conf_features` and `_Config.conf_features_partial()`
feature_min : set
The minimum support of CPU features, according to
the specified values in attribute `_Config.conf_min_features`.
"""
def __init__(self):
# 如果已经有了`feature_is_cached`属性,则直接返回,不再执行下面的代码
if hasattr(self, "feature_is_cached"):
return
# 获取所有受平台支持的 CPU 功能,根据属性`_Config.conf_features`和`_Config.conf_features_partial()`
self.feature_supported = pfeatures = self.conf_features_partial()
# 遍历每个 CPU 功能
for feature_name in list(pfeatures.keys()):
# 获取 CPU 功能的详细信息
feature = pfeatures[feature_name]
# 获取针对当前 CPU 功能的配置信息
cfeature = self.conf_features[feature_name]
# 将配置信息中没有的部分添加到 CPU 功能的详细信息中
feature.update({
k:v for k,v in cfeature.items() if k not in feature
})
# 检查当前 CPU 功能是否被禁用
disabled = feature.get("disable")
if disabled is not None:
# 如果被禁用,从受支持的 CPU 功能中移除,同时记录日志
pfeatures.pop(feature_name)
self.dist_log(
"feature '%s' is disabled," % feature_name,
disabled, stderr=True
)
continue
# 内部使用列表的选项
for option in (
"implies", "group", "detect", "headers", "flags", "extra_checks"
) :
# 将字符串类型的选项转换为列表
oval = feature.get(option)
if isinstance(oval, str):
feature[option] = oval.split()
# 初始化最小支持的 CPU 功能集合
self.feature_min = set()
# 获取最小支持的 CPU 功能
min_f = self.conf_min_features.get(self.cc_march, "")
# 将最小支持的 CPU 功能转换为大写并分割成集合
for F in min_f.upper().split():
if F in self.feature_supported:
self.feature_min.add(F)
# 标记属性`feature_is_cached`已经被设置
self.feature_is_cached = True
def feature_names(self, names=None, force_flags=None, macros=[]):
"""
返回平台和 **C** 编译器支持的一组 CPU 特性名称
Parameters
----------
names : sequence or None, optional
指定要测试的特定 CPU 特性,以便与 **C** 编译器进行测试。
如果为 None(默认),将测试所有当前支持的特性。
**注意**: 特性名称必须是大写。
force_flags : list or None, optional
如果为 None(默认),将在测试期间使用每个 CPU 特性的默认编译器标志
macros : list of tuples, optional
一个 C 宏定义的列表。
"""
assert(
names is None or (
not isinstance(names, str) and
hasattr(names, "__iter__")
)
)
assert(force_flags is None or isinstance(force_flags, list))
if names is None:
names = self.feature_supported.keys()
supported_names = set()
for f in names:
if self.feature_is_supported(
f, force_flags=force_flags, macros=macros
):
supported_names.add(f)
return supported_names
def feature_is_exist(self, name):
"""
如果某个特性存在且在 ``_Config.conf_features`` 中有覆盖则返回 True。
Parameters
----------
'name': str
大写特性名称。
"""
assert(name.isupper())
return name in self.conf_features
def feature_sorted(self, names, reverse=False):
"""
按照最低兴趣排序 CPU 特性列表。
Parameters
----------
'names': sequence
大写支持的特性名称序列。
'reverse': bool, optional
如果为真,则倒序排列特性。(兴趣最高)
Returns
-------
list, 排序后的 CPU 特性列表
"""
def sort_cb(k):
if isinstance(k, str):
return self.feature_supported[k]["interest"]
# 多个特性
rank = max([self.feature_supported[f]["interest"] for f in k])
# FIXME: 这不是增加多个目标的等级的安全方法
rank += len(k) -1
return rank
return sorted(names, reverse=reverse, key=sort_cb)
# 定义一个方法,用于获取由给定CPU特性名字所暗示的一组CPU特性
def feature_implies(self, names, keep_origins=False):
"""
Return a set of CPU features that implied by 'names'
Parameters
----------
names : str or sequence of str
CPU feature name(s) in uppercase.
keep_origins : bool
if False(default) then the returned set will not contain any
features from 'names'. This case happens only when two features
imply each other.
Examples
--------
>>> self.feature_implies("SSE3")
{'SSE', 'SSE2'}
>>> self.feature_implies("SSE2")
{'SSE'}
>>> self.feature_implies("SSE2", keep_origins=True)
{'SSE', 'SSE2'}
"""
# 定义一个内部方法,用于获取暗示的CPU特性
def get_implies(name, _caller=set()):
implies = set()
d = self.feature_supported[name]
for i in d.get("implies", []):
# 添加暗示的CPU特性到结果集合
implies.add(i)
if i in _caller:
# 由于特性可以互相暗示,需要防止无限递归
continue
# 将当前特性加入调用堆栈,用于检查递归
_caller.add(name)
# 递归获取暗示的CPU特性并合并到结果集合
implies = implies.union(get_implies(i, _caller))
return implies
# 判断输入的特性名是否是字符串
if isinstance(names, str):
# 获取特性暗示的结果集合
implies = get_implies(names)
# 转换为列表方便后续操作
names = [names]
else:
# 如果输入是一个可迭代对象,则遍历获取特性暗示的结果集合
assert(hasattr(names, "__iter__"))
implies = set()
for n in names:
# 将每个特性暗示的结果集合合并到一起
implies = implies.union(get_implies(n))
# 如果不需要保留原始特性,从结果集合中删去输入的特性
if not keep_origins:
implies.difference_update(names)
# 返回最终的特性暗示结果集合
return implies
# 定义另一个方法,与上述方法类似,但是会将输入的特性名字集合合并后再获取暗示的结果集合
def feature_implies_c(self, names):
"""same as feature_implies() but combining 'names'"""
if isinstance(names, str):
names = set((names,))
else:
names = set(names)
# 返回合并后的特性暗示结果集合
return names.union(self.feature_implies(names))
# 定义一个方法,用于返回在给定名称中删除任何暗示特性后并保留原始特性的特性列表
def feature_ahead(self, names):
"""
Return list of features in 'names' after remove any
implied features and keep the origins.
Parameters
----------
'names': sequence
sequence of CPU feature names in uppercase.
Returns
-------
list of CPU features sorted as-is 'names'
Examples
--------
>>> self.feature_ahead(["SSE2", "SSE3", "SSE41"])
["SSE41"]
>>> self.feature_ahead(["SSE2", "SSE3", "SSE41", "AVX2", "FMA3"])
["AVX2"]
>>> self.feature_ahead(["SSE2", "SSE3", "SSE41", "AVX2", "FMA3"])
["AVX2", "FMA3"]
"""
# 检查输入是否为字符串,并且是否可迭代
assert(
not isinstance(names, str)
and hasattr(names, '__iter__')
)
# 获取暗示的特性,保留原始特性
implies = self.feature_implies(names, keep_origins=True)
# 获取不暗示的特性
ahead = [n for n in names if n not in implies]
if len(ahead) == 0:
# 如果所有特性都互相暗示,则返回最感兴趣的特性
ahead = self.feature_sorted(names, reverse=True)[:1]
return ahead
# 定义一个方法,与'feature_ahead()'相同,但如果两个特性互相暗示,保留最感兴趣的特性
def feature_untied(self, names):
"""
same as 'feature_ahead()' but if both features implied each other and keep the highest interest.
Parameters
----------
'names': sequence
sequence of CPU feature names in uppercase.
Returns
-------
list of CPU features sorted as-is 'names'
Examples
--------
>>> self.feature_untied(["SSE2", "SSE3", "SSE41"])
["SSE2", "SSE3", "SSE41"]
>>> self.feature_untied(["SSE2", "SSE3", "SSE41", "FMA3", "AVX2"])
["SSE2", "SSE3", "SSE41", "AVX2"]
"""
# 检查输入是否为字符串,并且是否可迭代
assert(
not isinstance(names, str)
and hasattr(names, '__iter__')
)
# 最终结果列表
final = []
for n in names:
# 获取暗示的特性
implies = self.feature_implies(n)
tied = [
nn for nn in final
if nn in implies and n in self.feature_implies(nn)
]
if tied:
# 根据最感兴趣的顺序排序
tied = self.feature_sorted(tied + [n])
# 如果n不在除第一个特性外的列表中,则继续下一个循环
if n not in tied[1:]:
continue
# 移除最感兴趣的特性
final.remove(tied[:1][0])
# 添加特性到最终结果列表
final.append(n)
return final
def feature_get_til(self, names, keyisfalse):
"""
same as `feature_implies_c()` but stop collecting implied
features when feature's option that provided through
parameter 'keyisfalse' is False, also sorting the returned
features.
"""
def til(tnames):
# 调用 feature_implies_c() 函数获取所有可能的特征
tnames = self.feature_implies_c(tnames)
# 根据兴趣从高到低对特征进行排序
tnames = self.feature_sorted(tnames, reverse=True)
# 如果 keyisfalse 参数对应的特征选项为 False,则截断列表
for i, n in enumerate(tnames):
if not self.feature_supported[n].get(keyisfalse, True):
tnames = tnames[:i+1]
break
return tnames
if isinstance(names, str) or len(names) <= 1:
# 对单个特征名称或短列表进行处理
names = til(names)
# 对排序进行归一化
names.reverse()
return names
# 处理包含多个特征名称的列表
names = self.feature_ahead(names)
# 获取所有特征名称的完整集合,并返回排序后的结果
names = {t for n in names for t in til(n)}
return self.feature_sorted(names)
def feature_detect(self, names):
"""
Return a list of CPU features that required to be detected
sorted from the lowest to highest interest.
"""
# 获取需要进行检测的特征列表,按照兴趣从低到高排序
names = self.feature_get_til(names, "implies_detect")
detect = []
for n in names:
# 获取特征 n 的支持信息字典
d = self.feature_supported[n]
# 将检测所需的特征添加到 detect 列表中
detect += d.get("detect", d.get("group", [n]))
return detect
@_Cache.me
def feature_flags(self, names):
"""
Return a list of CPU features flags sorted from the lowest
to highest interest.
"""
# 对特征列表进行排序,并且获取所有可能的特征
names = self.feature_sorted(self.feature_implies_c(names))
flags = []
for n in names:
# 获取特征 n 的支持信息字典
d = self.feature_supported[n]
# 获取特征的标志(flags),如果为空或不满足 cc_test_flags() 的条件则跳过
f = d.get("flags", [])
if not f or not self.cc_test_flags(f):
continue
# 将特征的标志添加到 flags 列表中
flags += f
return self.cc_normalize_flags(flags)
@_Cache.me
# 测试特定的 CPU 功能在编译器中的支持情况,通过其自身的检查文件
def feature_test(self, name, force_flags=None, macros=[]):
"""
Test a certain CPU feature against the compiler through its own
check file.
Parameters
----------
name : str
Supported CPU feature name.
force_flags : list or None, optional
If None(default), the returned flags from `feature_flags()`
will be used.
macros : list of tuples, optional
A list of C macro definitions.
"""
# 如果 force_flags 为 None,则使用 feature_flags() 返回的标志
if force_flags is None:
force_flags = self.feature_flags(name)
# 记录日志,测试特性与其使用的标志
self.dist_log(
"testing feature '%s' with flags (%s)" % (
name, ' '.join(force_flags)
))
# 每个 CPU 功能必须有包含至少一个与该功能相关的指令的 C 源代码
test_path = os.path.join(
self.conf_check_path, "cpu_%s.c" % name.lower()
)
# 检查测试文件是否存在
if not os.path.exists(test_path):
self.dist_fatal("feature test file is not exist", test_path)
# 进行测试
test = self.dist_test(
test_path, force_flags + self.cc_flags["werror"], macros=macros
)
# 如果测试失败则记录日志
if not test:
self.dist_log("testing failed", stderr=True)
return test
@_Cache.me
def feature_is_supported(self, name, force_flags=None, macros=[]):
"""
Check if a certain CPU feature is supported by the platform and compiler.
Parameters
----------
name : str
CPU feature name in uppercase.
force_flags : list or None, optional
If None(default), default compiler flags for every CPU feature will
be used during test.
macros : list of tuples, optional
A list of C macro definitions.
"""
# 断言 CPU 功能名为大写
assert(name.isupper())
assert(force_flags is None or isinstance(force_flags, list))
# 检查特定 CPU 功能是否在平台和编译器中受支持
supported = name in self.feature_supported
if supported:
# 对于每个实现,检查其依赖的功能
for impl in self.feature_implies(name):
if not self.feature_test(impl, force_flags, macros=macros):
return False
# 检查该功能
if not self.feature_test(name, force_flags, macros=macros):
return False
return supported
@_Cache.me
def feature_can_autovec(self, name):
"""
check if the feature can be auto-vectorized by the compiler
"""
# 断言参数为字符串
assert(isinstance(name, str))
d = self.feature_supported[name]
can = d.get("autovec", None)
if can is None:
# 检查是否有有效的标志支持自动向量化
valid_flags = [
self.cc_test_flags([f]) for f in d.get("flags", [])
]
can = valid_flags and any(valid_flags)
return can
@_Cache.me
def feature_extra_checks(self, name):
"""
Return a list of supported extra checks after testing them against
the compiler.
Parameters
----------
names : str
CPU feature name in uppercase.
"""
# 确保参数 name 是一个字符串
assert isinstance(name, str)
# 获取特定特性的字典
d = self.feature_supported[name]
# 获取额外检查的列表
extra_checks = d.get("extra_checks", [])
# 如果额外检查列表为空,返回空列表
if not extra_checks:
return []
# 执行额外检查的测试,并记录日志
self.dist_log("Testing extra checks for feature '%s'" % name, extra_checks)
# 获取特定特性的编译器标志
flags = self.feature_flags(name)
# 存储支持的额外检查
available = []
# 存储不支持的额外检查
not_available = []
# 遍历额外检查列表
for chk in extra_checks:
# 构建额外检查文件的路径
test_path = os.path.join(
self.conf_check_path, "extra_%s.c" % chk.lower()
)
# 如果文件不存在,记录致命错误
if not os.path.exists(test_path):
self.dist_fatal("extra check file does not exist", test_path)
# 进行额外检查的测试,并判断是否支持
is_supported = self.dist_test(test_path, flags + self.cc_flags["werror"])
if is_supported:
available.append(chk)
else:
not_available.append(chk)
# 如果有不支持的额外检查,记录日志
if not_available:
self.dist_log("testing failed for checks", not_available, stderr=True)
# 返回支持的额外检查列表
return available
def feature_c_preprocessor(self, feature_name, tabs=0):
"""
Generate C preprocessor definitions and include headers of a CPU feature.
Parameters
----------
'feature_name': str
CPU feature name in uppercase.
'tabs': int
if > 0, align the generated strings to the right depend on number of tabs.
Returns
-------
str, generated C preprocessor
Examples
--------
>>> self.feature_c_preprocessor("SSE3")
/** SSE3 **/
#define NPY_HAVE_SSE3 1
#include <pmmintrin.h>
"""
# 确保特性名称是大写字符串
assert(feature_name.isupper())
# 获取特定特性的信息字典
feature = self.feature_supported.get(feature_name)
# 确保特性信息存在
assert(feature is not None)
# 初始化预处理器定义列表
prepr = [
"/** %s **/" % feature_name,
"#define %sHAVE_%s 1" % (self.conf_c_prefix, feature_name)
]
# 添加特性所需的头文件包含
prepr += [
"#include <%s>" % h for h in feature.get("headers", [])
]
# 获取特性的额外定义组并添加到预处理器定义列表
extra_defs = feature.get("group", [])
extra_defs += self.feature_extra_checks(feature_name)
for edef in extra_defs:
# 防止额外定义与其他特性冲突,进行宏定义保护
prepr += [
"#ifndef %sHAVE_%s" % (self.conf_c_prefix, edef),
"\t#define %sHAVE_%s 1" % (self.conf_c_prefix, edef),
"#endif",
]
# 如果需要,根据制表符数量进行右侧对齐
if tabs > 0:
prepr = [('\t'*tabs) + l for l in prepr]
# 返回生成的 C 预处理器定义字符串
return '\n'.join(prepr)
# 定义一个帮助类,用于解析`CCompilerOpt`的主要参数,同时解析可调度源中的配置语句
class _Parse:
"""A helper class that parsing main arguments of `CCompilerOpt`,
also parsing configuration statements in dispatch-able sources.
一个帮助类,用于解析`CCompilerOpt`的主要参数,同时解析可调度源中的配置语句。
Parameters
----------
cpu_baseline : str or None
minimal set of required CPU features or special options.
最小的所需 CPU 特性或特殊选项。
cpu_dispatch : str or None
dispatched set of additional CPU features or special options.
额外的 CPU 特性或特殊选项。
Special options can be:
- **MIN**: Enables the minimum CPU features that utilized via `_Config.conf_min_features`
- **MAX**: Enables all supported CPU features by the Compiler and platform.
- **NATIVE**: Enables all CPU features that supported by the current machine.
- **NONE**: Enables nothing
- **Operand +/-**: remove or add features, useful with options **MAX**, **MIN** and **NATIVE**.
NOTE: operand + is only added for nominal reason.
特殊选项可以是:
- **MIN**:启用通过`_Config.conf_min_features`使用的最小 CPU 特性。
- **MAX**:启用编译器和平台支持的所有 CPU 特性。
- **NATIVE**:启用当前计算机支持的所有 CPU 特性。
- **NONE**:不启用任何特性。
- **操作符 +/-**:移除或添加特性,与 **MAX**,**MIN** 和**NATIVE** 选项一起使用时非常有用。
注意:操作符+仅添加了名义上的原因。
NOTES:
- Case-insensitive among all CPU features and special options.
- Comma or space can be used as a separator.
- If the CPU feature is not supported by the user platform or compiler,
it will be skipped rather than raising a fatal error.
- Any specified CPU features to 'cpu_dispatch' will be skipped if its part of CPU baseline features
- 'cpu_baseline' force enables implied features.
注意:
- 与所有 CPU 特性和特殊选项大小写不敏感。
- 逗号或空格可以用作分隔符。
- 如果 CPU 特性不受用户平台或编译器支持,它将被跳过而不是引发致命错误。
- 如果指定的任何 CPU 特性是`cpu_baseline`的一部分,则它指定是`cpu_dispatch`的将被跳过。
- `cpu_baseline` 强制启用隐含的特性。
Attributes
----------
parse_baseline_names : list
Final CPU baseline's feature names(sorted from low to high)
最终 CPU 基线的特性名称(从低到高排序)。
parse_baseline_flags : list
Compiler flags of baseline features
基线特性的编译器标志。
parse_dispatch_names : list
Final CPU dispatch-able feature names(sorted from low to high)
最终 CPU 可调度特性的特性名称(从低到高排序)。
parse_target_groups : dict
Dictionary containing initialized target groups that configured
through class attribute `conf_target_groups`.
The key is represent the group name and value is a tuple
contains three items :
- bool, True if group has the 'baseline' option.
- list, list of CPU features.
- list, list of extra compiler flags.
parse_target_groups : dict
包含配置好的初始化目标组的字典,通过类属性`conf_target_groups`进行配置。
键代表组名,值是一个包含三个项的元组:
- bool,如果组具有`baseline`选项,则为True。
- list,CPU 特性列表。
- list,额外的编译器标志列表。
"""
# 解析目标 CPU 功能的配置语句
def parse_targets(self, source):
"""
Fetch and parse configuration statements that required for
defining the targeted CPU features, statements should be declared
in the top of source in between **C** comment and start
with a special mark **@targets**.
Configuration statements are sort of keywords representing
CPU features names, group of statements and policies, combined
together to determine the required optimization.
Parameters
----------
source : str
the path of **C** source file.
Returns
-------
- bool, True if group has the 'baseline' option
- list, list of CPU features
- list, list of extra compiler flags
"""
# 输出日志,查找 '@targets' 关键字
self.dist_log("looking for '@targets' inside -> ", source)
# 打开源文件
with open(source) as fd:
tokens = "" # 存放目标 CPU 功能配置语句
max_to_reach = 1000 # 最大行数
start_with = "@targets" # 开始标记
start_pos = -1 # 开始位置
end_with = "*/" # 结束标记
end_pos = -1 # 结束位置
for current_line, line in enumerate(fd):
if current_line == max_to_reach:
self.dist_fatal("reached the max of lines")
break
if start_pos == -1:
# 查找开始标记位置
start_pos = line.find(start_with)
if start_pos == -1:
continue
start_pos += len(start_with)
tokens += line # 存储当前行内容
end_pos = line.find(end_with) # 查找结束标记位置
if end_pos != -1:
end_pos += len(tokens) - len(line)
break
if start_pos == -1:
self.dist_fatal("expected to find '%s' within a C comment" % start_with)
if end_pos == -1:
self.dist_fatal("expected to end with '%s'" % end_with)
tokens = tokens[start_pos:end_pos] # 截取目标 CPU 功能配置语句
return self._parse_target_tokens(tokens)
# 定义正则表达式,用于分割目标 CPU 功能配置语句
_parse_regex_arg = re.compile(r'\s|,|([+-])')
# 解析参数特性,验证参数是否为字符串类型
def _parse_arg_features(self, arg_name, req_features):
if not isinstance(req_features, str):
self.dist_fatal("expected a string in '%s'" % arg_name)
final_features = set()
# 使用空格和逗号作为分隔符,将字符串分割成列表
tokens = list(filter(None, re.split(self._parse_regex_arg, req_features)))
append = True # 默认是追加操作
for tok in tokens:
# 检查是否以 '#' 或 '$' 开头,如果是则报错
if tok[0] in ("#", "$"):
self.dist_fatal(
arg_name, "target groups and policies "
"aren't allowed from arguments, "
"only from dispatch-able sources"
)
# 如果是 '+',则设置为追加操作并继续下一个循环
if tok == '+':
append = True
continue
# 如果是 '-',则设置为不追加操作并继续下一个循环
if tok == '-':
append = False
continue
TOK = tok.upper() # 内部使用大写
features_to = set()
# 如果是 "NONE",不做任何操作
if TOK == "NONE":
pass
# 如果是 "NATIVE",则获取本地支持的特性
elif TOK == "NATIVE":
native = self.cc_flags["native"]
if not native:
self.dist_fatal(arg_name,
"native option isn't supported by the compiler"
)
features_to = self.feature_names(
force_flags=native, macros=[("DETECT_FEATURES", 1)]
)
# 如果是 "MAX",则获取所有支持的特性
elif TOK == "MAX":
features_to = self.feature_supported.keys()
# 如果是 "MIN",则获取最低要求的特性
elif TOK == "MIN":
features_to = self.feature_min
else:
# 如果是已知的特性,则加入到特性集合中;否则报错
if TOK in self.feature_supported:
features_to.add(TOK)
else:
if not self.feature_is_exist(TOK):
self.dist_fatal(arg_name,
", '%s' isn't a known feature or option" % tok
)
# 根据追加操作,合并或移除最终的特性集合
if append:
final_features = final_features.union(features_to)
else:
final_features = final_features.difference(features_to)
append = True # 恢复默认值
return final_features
# 编译正则表达式,用于解析目标
_parse_regex_target = re.compile(r'\s|[*,/]|([()])')
# 解析策略令牌
def _parse_token_policy(self, token):
"""validate policy token"""
# 检查策略名称的有效性
if len(token) <= 1 or token[-1:] == token[0]:
self.dist_fatal("'$' must stuck in the begin of policy name")
token = token[1:] # 去掉开头的 '$'
# 如果策略名称不在已知的策略集合中,则报错
if token not in self._parse_policies:
self.dist_fatal(
"'%s' is an invalid policy name, available policies are" % token,
self._parse_policies.keys()
)
return token
# 验证并解析组合标记
def _parse_token_group(self, token, has_baseline, final_targets, extra_flags):
"""validate group token"""
# 如果标记长度小于等于1,或者最后一个字符与第一个字符相等,抛出错误
if len(token) <= 1 or token[-1:] == token[0]:
self.dist_fatal("'#' must stuck in the begin of group name")
# 去掉标记的第一个字符
token = token[1:]
# 获取标记对应的目标组、目标、额外标志
ghas_baseline, gtargets, gextra_flags = self.parse_target_groups.get(
token, (False, None, [])
)
# 如果目标为空,则抛出错误,并列出所有可用目标组
if gtargets is None:
self.dist_fatal(
"'%s' is an invalid target group name, " % token + \
"available target groups are",
self.parse_target_groups.keys()
)
# 如果该组有基准线,则设置 has_baseline 为真
if ghas_baseline:
has_baseline = True
# 将组内目标加入最终目标列表,保持原有排序
final_targets += [f for f in gtargets if f not in final_targets]
# 将组内额外标志加入额外标志列表,保持原有排序
extra_flags += [f for f in gextra_flags if f not in extra_flags]
# 返回处理后的结果
return has_baseline, final_targets, extra_flags
# 验证被括号包围的多个目标
def _parse_multi_target(self, targets):
"""validate multi targets that defined between parentheses()"""
# 移除任何暗含的特征,保留原始特征
if not targets:
self.dist_fatal("empty multi-target '()'")
# 如果目标列表中存在无效目标,则抛出错误
if not all([
self.feature_is_exist(tar) for tar in targets
]) :
self.dist_fatal("invalid target name in multi-target", targets)
# 如果目标列表中存在不是基准线或分发线的特征,则返回空
if not all([
(
tar in self.parse_baseline_names or
tar in self.parse_dispatch_names
)
for tar in targets
]) :
return None
# 将目标列表排序,使之可比较
targets = self.feature_ahead(targets)
# 如果目标列表为空,则返回空
if not targets:
return None
# 强制排序多个目标,使之可比较
targets = self.feature_sorted(targets)
targets = tuple(targets) # 可散列
return targets
# 跳过所有基准线特征
def _parse_policy_not_keepbase(self, has_baseline, final_targets, extra_flags):
"""skip all baseline features"""
skipped = []
for tar in final_targets[:]:
is_base = False
if isinstance(tar, str):
is_base = tar in self.parse_baseline_names
else:
# 多个目标
is_base = all([
f in self.parse_baseline_names
for f in tar
])
if is_base:
skipped.append(tar)
final_targets.remove(tar)
# 如果有跳过的基准线特征,则记录日志
if skipped:
self.dist_log("skip baseline features", skipped)
# 返回处理后的结果
return has_baseline, final_targets, extra_flags
# 解析保持排序策略,将通知记录在日志中,然后返回处理结果
def _parse_policy_keepsort(self, has_baseline, final_targets, extra_flags):
"""leave a notice that $keep_sort is on"""
self.dist_log(
"policy 'keep_sort' is on, dispatch-able targets", final_targets, "\n"
"are 'not' sorted depend on the highest interest but"
"as specified in the dispatch-able source or the extra group"
)
return has_baseline, final_targets, extra_flags
# 解析不保持排序策略,根据最高兴趣度对最终目标进行排序,然后返回处理结果
def _parse_policy_not_keepsort(self, has_baseline, final_targets, extra_flags):
"""sorted depend on the highest interest"""
final_targets = self.feature_sorted(final_targets, reverse=True)
return has_baseline, final_targets, extra_flags
# 解析最大优化策略,尝试追加编译器优化标志,最后返回处理结果
def _parse_policy_maxopt(self, has_baseline, final_targets, extra_flags):
"""append the compiler optimization flags"""
if self.cc_has_debug:
self.dist_log("debug mode is detected, policy 'maxopt' is skipped.")
elif self.cc_noopt:
self.dist_log("optimization is disabled, policy 'maxopt' is skipped.")
else:
flags = self.cc_flags["opt"]
if not flags:
self.dist_log(
"current compiler doesn't support optimization flags, "
"policy 'maxopt' is skipped", stderr=True
)
else:
extra_flags += flags
return has_baseline, final_targets, extra_flags
# 解析错误作为警告处理策略,尝试追加编译器错误作为警告标志,最后返回处理结果
def _parse_policy_werror(self, has_baseline, final_targets, extra_flags):
"""force warnings to treated as errors"""
flags = self.cc_flags["werror"]
if not flags:
self.dist_log(
"current compiler doesn't support werror flags, "
"warnings will 'not' treated as errors", stderr=True
)
else:
self.dist_log("compiler warnings are treated as errors")
extra_flags += flags
return has_baseline, final_targets, extra_flags
# 解析自动向量化支持策略,跳过编译器不支持的特性,最后返回处理结果
def _parse_policy_autovec(self, has_baseline, final_targets, extra_flags):
"""skip features that has no auto-vectorized support by compiler"""
skipped = []
for tar in final_targets[:]:
if isinstance(tar, str):
can = self.feature_can_autovec(tar)
else: # multiple target
can = all([
self.feature_can_autovec(t)
for t in tar
])
if not can:
final_targets.remove(tar)
skipped.append(tar)
if skipped:
self.dist_log("skip non auto-vectorized features", skipped)
return has_baseline, final_targets, extra_flags
# 定义名为CCompilerOpt的类,它集成了_Config、_Distutils、_Cache、_CCompiler、_Feature和_Parse类
class CCompilerOpt(_Config, _Distutils, _Cache, _CCompiler, _Feature, _Parse):
"""
A helper class for `CCompiler` aims to provide extra build options
to effectively control of compiler optimizations that are directly
related to CPU features.
"""
# 初始化方法,接受ccompiler、cpu_baseline、cpu_dispatch和cache_path这几个参数
def __init__(self, ccompiler, cpu_baseline="min", cpu_dispatch="max", cache_path=None):
# 调用_Config类的初始化方法
_Config.__init__(self)
# 调用_Distutils类的初始化方法,传入ccompiler参数
_Distutils.__init__(self, ccompiler)
# 调用_Cache类的初始化方法,传入cache_path、dist_info()、cpu_baseline和cpu_dispatch参数
_Cache.__init__(self, cache_path, self.dist_info(), cpu_baseline, cpu_dispatch)
# 调用_CCompiler类的初始化方法
_CCompiler.__init__(self)
# 调用_Feature类的初始化方法
_Feature.__init__(self)
# 如果cc_noopt为假且cc_has_native为真,输出警告日志
if not self.cc_noopt and self.cc_has_native:
self.dist_log(
"native flag is specified through environment variables. "
"force cpu-baseline='native'"
)
cpu_baseline = "native"
# 调用_Parse类的初始化方法,传入cpu_baseline和cpu_dispatch参数
_Parse.__init__(self, cpu_baseline, cpu_dispatch)
# 保存请求的基线特性和分发特性,用于后续报告和跟踪目的
self._requested_baseline = cpu_baseline
self._requested_dispatch = cpu_dispatch
# 创建一个字典,key为可分配的源,value为包含两个项(has_baseline[布尔值],dispatched-features[列表])的元组
self.sources_status = getattr(self, "sources_status", {})
# 每个实例应该有自己独立的cache_private成员
self.cache_private.add("sources_status")
# 在初始化类之后设置它,确保在初始化后进行cache写入
self.hit_cache = hasattr(self, "hit_cache")
# 判断是否从缓存文件中加载了该类,返回True或False
def is_cached(self):
"""
Returns True if the class loaded from the cache file
"""
return self.cache_infile and self.hit_cache
# 返回最终的CPU基线编译器标志列表
def cpu_baseline_flags(self):
"""
Returns a list of final CPU baseline compiler flags
"""
return self.parse_baseline_flags
# 返回最终的CPU基线特性名称列表
def cpu_baseline_names(self):
"""
return a list of final CPU baseline feature names
"""
return self.parse_baseline_names
# 返回最终的CPU分发特性名称列表
def cpu_dispatch_names(self):
"""
return a list of final CPU dispatch feature names
"""
return self.parse_dispatch_names
# 将输出目录、源文件路径、目标文件名进行包装
def _wrap_target(self, output_dir, dispatch_src, target, nochange=False):
# 断言,确保目标参数是字符串或元组类型
assert(isinstance(target, (str, tuple)))
# 如果目标参数是字符串
if isinstance(target, str):
ext_name = target_name = target
else:
# 如果目标参数是元组,表示多个目标
ext_name = '.'.join(target)
target_name = '__'.join(target)
# 构造包装后的路径名
wrap_path = os.path.join(output_dir, os.path.basename(dispatch_src))
wrap_path = "{0}.{2}{1}".format(*os.path.splitext(wrap_path), ext_name.lower())
# 如果不改变并且包装后的文件已存在,则直接返回路径
if nochange and os.path.exists(wrap_path):
return wrap_path
# 在日志中记录包装后的目标路径
self.dist_log("wrap dispatch-able target -> ", wrap_path)
# 对特性进行排序以便阅读
features = self.feature_sorted(self.feature_implies_c(target))
target_join = "#define %sCPU_TARGET_" % self.conf_c_prefix_
target_defs = [target_join + f for f in features]
target_defs = '\n'.join(target_defs)
# 打开包装后的路径,写入自动生成的代码
with open(wrap_path, "w") as fd:
fd.write(textwrap.dedent("""\
/**
* AUTOGENERATED DON'T EDIT
* Please make changes to the code generator (distutils/ccompiler_opt.py)
*/
#define {pfx}CPU_TARGET_MODE
#define {pfx}CPU_TARGET_CURRENT {target_name}
{target_defs}
#include "{path}"
""").format(
pfx=self.conf_c_prefix_, target_name=target_name,
path=os.path.abspath(dispatch_src), target_defs=target_defs
))
# 返回包装后的路径
return wrap_path
def _generate_config(self, output_dir, dispatch_src, targets, has_baseline=False):
# 从 dispatch_src 中提取配置文件名,替换后缀为 '.h',然后与 output_dir 组合成完整路径
config_path = os.path.basename(dispatch_src)
config_path = os.path.splitext(config_path)[0] + '.h'
config_path = os.path.join(output_dir, config_path)
# 计算当前 targets 和 has_baseline 的缓存哈希值
cache_hash = self.cache_hash(targets, has_baseline)
try:
# 尝试打开配置文件,读取其中的 cache_hash 值,如果匹配则返回 True
with open(config_path) as f:
last_hash = f.readline().split("cache_hash:")
if len(last_hash) == 2 and int(last_hash[1]) == cache_hash:
return True
except OSError:
pass
# 如果文件不存在,创建文件所在的目录
os.makedirs(os.path.dirname(config_path), exist_ok=True)
# 输出生成的配置文件路径到日志
self.dist_log("generate dispatched config -> ", config_path)
dispatch_calls = []
for tar in targets:
if isinstance(tar, str):
target_name = tar
else: # 多目标情况下,将目标名称用双下划线连接
target_name = '__'.join([t for t in tar])
# 执行特征检测,生成检测条件字符串
req_detect = self.feature_detect(tar)
req_detect = '&&'.join([
"CHK(%s)" % f for f in req_detect
])
# 构建 dispatch_calls 列表,格式化为需要的宏定义形式
dispatch_calls.append(
"\t%sCPU_DISPATCH_EXPAND_(CB((%s), %s, __VA_ARGS__))" % (
self.conf_c_prefix_, req_detect, target_name
))
# 将 dispatch_calls 列表合并为字符串,用换行符连接
dispatch_calls = ' \\\n'.join(dispatch_calls)
# 根据 has_baseline 决定是否设置 baseline_calls
if has_baseline:
baseline_calls = (
"\t%sCPU_DISPATCH_EXPAND_(CB(__VA_ARGS__))"
) % self.conf_c_prefix_
else:
baseline_calls = ''
# 将生成的配置内容写入到 config_path 文件中
with open(config_path, "w") as fd:
fd.write(textwrap.dedent("""\
// cache_hash:{cache_hash}
/**
* AUTOGENERATED DON'T EDIT
* Please make changes to the code generator (distutils/ccompiler_opt.py)
*/
{baseline_calls}
{dispatch_calls}
""").format(
pfx=self.conf_c_prefix_, baseline_calls=baseline_calls,
dispatch_calls=dispatch_calls, cache_hash=cache_hash
))
# 返回 False 表示生成配置文件过程中未复用现有文件
return False
# 创建一个新的 CCompilerOpt 实例,并生成分发头文件
def new_ccompiler_opt(compiler, dispatch_hpath, **kwargs):
"""
Create a new instance of 'CCompilerOpt' and generate the dispatch header
which contains the
the enabled CPU baseline and dispatch-able features.
Parameters
----------
compiler : CCompiler instance
编译器实例,用于编译优化
dispatch_hpath : str
分发头文件的路径,用于存储生成的平台特定指令集定义和头文件
**kwargs: passed as-is to `CCompilerOpt(...)`
其余的参数传递给 `CCompilerOpt` 构造函数
Returns
-------
new instance of CCompilerOpt
返回一个新的 CCompilerOpt 实例
"""
# 使用传入的编译器实例和其他参数,创建一个 CCompilerOpt 对象
opt = CCompilerOpt(compiler, **kwargs)
# 检查分发头文件是否已经存在,或者是否需要重新生成
if not os.path.exists(dispatch_hpath) or not opt.is_cached():
# 如果分发头文件不存在,或者 CCompilerOpt 缓存无效,则生成新的分发头文件
opt.generate_dispatch_header(dispatch_hpath)
# 返回创建的 CCompilerOpt 对象
return opt
.\numpy\numpy\distutils\checks\cpu_asimd.c
int main(int argc, char **argv)
{
// 将最后一个命令行参数视为浮点数数组的起始地址
float *src = (float*)argv[argc-1];
// 创建两个包含相同值的 NEON 浮点向量
float32x4_t v1 = vdupq_n_f32(src[0]), v2 = vdupq_n_f32(src[1]);
/* MAXMIN */
// 计算 v1 和 v2 向量的最大非数字和和最小非数字和,并取第一个元素转换为整数加到 ret 中
int ret = (int)vgetq_lane_f32(vmaxnmq_f32(v1, v2), 0);
// 计算 v1 和 v2 向量的最小非数字和,并取第一个元素转换为整数加到 ret 中
ret += (int)vgetq_lane_f32(vminnmq_f32(v1, v2), 0);
/* ROUNDING */
// 对 v1 向量进行舍入操作,并取第一个元素转换为整数加到 ret 中
ret += (int)vgetq_lane_f32(vrndq_f32(v1), 0);
{
// 将最后一个命令行参数视为双精度浮点数数组的起始地址(仅在 ARM 64 位架构下)
double *src2 = (double*)argv[argc-1];
// 创建两个包含相同值的 NEON 双精度向量
float64x2_t vd1 = vdupq_n_f64(src2[0]), vd2 = vdupq_n_f64(src2[1]);
/* MAXMIN */
// 计算 vd1 和 vd2 向量的最大非数字和和最小非数字和,并取第一个元素转换为整数加到 ret 中
ret += (int)vgetq_lane_f64(vmaxnmq_f64(vd1, vd2), 0);
// 计算 vd1 和 vd2 向量的最小非数字和,并取第一个元素转换为整数加到 ret 中
ret += (int)vgetq_lane_f64(vminnmq_f64(vd1, vd2), 0);
/* ROUNDING */
// 对 vd1 向量进行舍入操作,并取第一个元素转换为整数加到 ret 中
ret += (int)vgetq_lane_f64(vrndq_f64(vd1), 0);
}
// 返回累加结果作为 main 函数的返回值
return ret;
}
.\numpy\numpy\distutils\checks\cpu_asimddp.c
// 如果是 Microsoft Visual C++ 编译器
// 包含 ARM NEON 指令集的头文件
// 主函数
int main(int argc, char **argv)
{
// 获取最后一个命令行参数,强制转换为无符号字符指针
unsigned char *src = (unsigned char*)argv[argc-1];
// 使用 src[0] 复制成 16 个相同的字节作为 v1
uint8x16_t v1 = vdupq_n_u8(src[0]),
// 使用 src[1] 复制成 16 个相同的字节作为 v2
v2 = vdupq_n_u8(src[1]);
// 创建包含四个相同值的向量 va
uint32x4_t va = vdupq_n_u32(3);
// 计算 va 和 v1 与 v2 的点积结果,取第一个元素强制转换为整数
int ret = (int)vgetq_lane_u32(vdotq_u32(va, v1, v2), 0);
// 如果是 AArch64 架构,再计算一次点积结果并加到 ret 上
ret += (int)vgetq_lane_u32(vdotq_laneq_u32(va, v1, v2, 0), 0);
// 返回计算结果
return ret;
}
.\numpy\numpy\distutils\checks\cpu_asimdfhm.c
int main(int argc, char **argv)
{
// 将最后一个命令行参数解释为 float16_t 类型的指针
float16_t *src = (float16_t*)argv[argc-1];
// 将倒数第二个命令行参数解释为 float 类型的指针
float *src2 = (float*)argv[argc-2];
// 创建一个包含 src[0] 值的 float16x8_t 类型变量
float16x8_t vhp = vdupq_n_f16(src[0]);
// 创建一个包含 src[1] 值的 float16x4_t 类型变量
float16x4_t vlhp = vdup_n_f16(src[1]);
// 创建一个包含 src2[0] 值的 float32x4_t 类型变量
float32x4_t vf = vdupq_n_f32(src2[0]);
// 创建一个包含 src2[1] 值的 float32x2_t 类型变量
float32x2_t vlf = vdup_n_f32(src2[1]);
// 计算 vfmlal_low_f16(vlf, vlhp, vlhp) 的第 0 个元素并转换为整数
int ret = (int)vget_lane_f32(vfmlal_low_f16(vlf, vlhp, vlhp), 0);
// 计算 vfmlslq_high_f16(vf, vhp, vhp) 的第 0 个元素并转换为整数,并加到 ret
ret += (int)vgetq_lane_f32(vfmlslq_high_f16(vf, vhp, vhp), 0);
// 返回 ret 作为程序的退出码
return ret;
}
.\numpy\numpy\distutils\checks\cpu_asimdhp.c
int main(int argc, char **argv)
{
// 将命令行参数最后一个作为 float16_t 类型数组的起始地址
float16_t *src = (float16_t*)argv[argc-1];
// 创建一个包含 src[0] 值的 float16x8_t 类型变量 vhp
float16x8_t vhp = vdupq_n_f16(src[0]);
// 创建一个包含 src[1] 值的 float16x4_t 类型变量 vlhp
float16x4_t vlhp = vdup_n_f16(src[1]);
// 计算 vhp 与自身的绝对差,提取结果的第一个元素并转换为整数,赋给 ret
int ret = (int)vgetq_lane_f16(vabdq_f16(vhp, vhp), 0);
// 计算 vlhp 与自身的绝对差,提取结果的第一个元素并转换为整数,累加到 ret
ret += (int)vget_lane_f16(vabd_f16(vlhp, vlhp), 0);
// 返回累加结果 ret
return ret;
}
.\numpy\numpy\distutils\checks\cpu_avx.c
/*
* 当使用 Intel 编译器时,与 GCC 和 CLANG 不同,它会暴露所有支持的指令集,
* 无论是否已经通过编译选项指定了这些特性。因此,我们必须测试 CPU 特性的
* 当使用 `--cpu-baseline` 开启本地/主机优化或者通过环境变量 `CFLAGS` 设置时,
* 否则测试将会失效并导致启用所有可能的特性。
*/
int main(int argc, char **argv)
{
// 加载两个未对齐的 AVX 寄存器,分别从命令行参数的最后一个和第一个参数中加载
__m256 a = _mm256_add_ps(_mm256_loadu_ps((const float*)argv[argc-1]), _mm256_loadu_ps((const float*)argv[1]));
// 将结果向下转换为单精度浮点数,并将其强制转换为整数返回
return (int)_mm_cvtss_f32(_mm256_castps256_ps128(a));
}
.\numpy\numpy\distutils\checks\cpu_avx2.c
/*
* 如果定义了DETECT_FEATURES并且使用Intel编译器,
* 那么与GCC和CLANG不同,Intel编译器公开所有支持的内部功能,
* 无论是否指定了这些功能的构建选项。
* 因此,当通过`--cpu-baseline`启用本机/主机选项时,
* 我们必须测试CPU特性的#定义,否则测试将被破坏,并导致启用所有可能的特性。
*/
int main(int argc, char **argv)
{
// 从argv[argc-1]加载16位整数,取绝对值并存储到__m256i类型的变量a中
__m256i a = _mm256_abs_epi16(_mm256_loadu_si256((const __m256i*)argv[argc-1]));
// 将__m256i类型的变量a转换为__m128i类型,再转换为32位整数并返回
return _mm_cvtsi128_si32(_mm256_castsi256_si128(a));
}
.\numpy\numpy\distutils\checks\cpu_avx512cd.c
/*
* 如果定义了DETECT_FEATURES并且使用Intel编译器,
* Intel编译器会公开所有支持的内置函数,无论是否指定了这些功能的构建选项。
* 因此,我们必须在启用了`--cpu-baseline`或通过环境变量`CFLAGS`的本地/主机选项时测试CPU特性的
* 否则测试将会失败,并导致启用所有可能的特性。
*/
int main(int argc, char **argv)
{
// 从命令行参数中加载一个512位整数向量,并统计其前导零的数量
__m512i a = _mm512_lzcnt_epi32(_mm512_loadu_si512((const __m512i*)argv[argc-1]));
// 将512位整数向量转换为128位整数,并返回其最低位的32位整数
return _mm_cvtsi128_si32(_mm512_castsi512_si128(a));
}
.\numpy\numpy\distutils\checks\cpu_avx512f.c
/*
* 如果定义了DETECT_FEATURES和__INTEL_COMPILER,则执行以下操作:
* 与GCC和CLANG不同,Intel编译器会暴露所有支持的内联函数,
* 无论是否指定了那些特性的构建选项。
* 因此,当使用选项native/host启用`--cpu-baseline`或通过环境变量`CFLAGS`启用
* 时,我们必须测试CPU特性的#定义,否则测试将出错,并导致启用所有可能的特性。
*/
int main(int argc, char **argv)
{
// 从argv数组中加载数据到512位整型寄存器a,并求取绝对值
__m512i a = _mm512_abs_epi32(_mm512_loadu_si512((const __m512i*)argv[argc-1]));
// 将512位整型寄存器a转换为128位整型寄存器,并返回其低128位的整数值
return _mm_cvtsi128_si32(_mm512_castsi512_si128(a));
}
.\numpy\numpy\distutils\checks\cpu_avx512_clx.c
/*
* 如果定义了DETECT_FEATURES和__INTEL_COMPILER,则继续进行以下操作
* 与GCC和CLANG不同,Intel Compiler会暴露所有支持的内部函数,无论是否指定了这些特性的构建选项。
* 因此,当通过`--cpu-baseline`启用选项本机/主机,或通过环境变量`CFLAGS`启用选项时,必须测试CPU特性的
*/
int main(int argc, char **argv)
{
/* VNNI */
// 通过argv[argc-1]加载512位未对齐整数到__m512i类型的寄存器a中
__m512i a = _mm512_loadu_si512((const __m512i*)argv[argc-1]);
// 将a与512位零寄存器进行无符号32位整数累加,结果存储回a中
a = _mm512_dpbusd_epi32(a, _mm512_setzero_si512(), a);
// 将a的低128位转换为32位整数并返回
return _mm_cvtsi128_si32(_mm512_castsi512_si128(a));
}
.\numpy\numpy\distutils\checks\cpu_avx512_cnl.c
/*
* 如果定义了 DETECT_FEATURES 和 __INTEL_COMPILER
* Intel 编译器与 GCC 和 CLANG 不同,会暴露所有支持的内置函数,
* 无论是否为这些特性指定了构建选项。
* 因此,我们必须测试 CPU 特性的
* 当启用 `--cpu-baseline` 或通过环境变量 `CFLAGS` 启用本地/主机选项时,
* 否则测试将中断并导致启用所有可能的特性。
*/
int main(int argc, char **argv)
{
// 从 argv 中加载最后一个参数作为 __m512i 类型的变量 a
__m512i a = _mm512_loadu_si512((const __m512i*)argv[argc-1]);
/* IFMA */
// 使用 _mm512_madd52hi_epu64 执行 IFMA 操作(Integer Fused Multiply-Add)
a = _mm512_madd52hi_epu64(a, a, _mm512_setzero_si512());
/* VMBI */
// 使用 _mm512_permutex2var_epi8 执行 VMBI 操作(Vector Byte Permute)
a = _mm512_permutex2var_epi8(a, _mm512_setzero_si512(), a);
// 将 __m512i 类型的变量 a 转换为 __m128i 类型,并提取低 128 位返回一个整数
return _mm_cvtsi128_si32(_mm512_castsi512_si128(a));
}
.\numpy\numpy\distutils\checks\cpu_avx512_icl.c
/*
* 如果定义了DETECT_FEATURES并且__INTEL_COMPILER也被定义
* 与GCC和CLANG不同,英特尔编译器会暴露所有支持的内联函数,
* 无论是否指定了这些功能的构建选项。
* 因此,当通过`--cpu-baseline`启用本机/主机选项或通过环境变量`CFLAGS`启用时,
* 我们必须测试CPU功能的
*/
int main(int argc, char **argv)
{
__m512i a = _mm512_loadu_si512((const __m512i*)argv[argc-1]);
/* VBMI2 */
a = _mm512_shrdv_epi64(a, a, _mm512_setzero_si512());
/* BITLAG */
a = _mm512_popcnt_epi8(a);
/* VPOPCNTDQ */
a = _mm512_popcnt_epi64(a);
return _mm_cvtsi128_si32(_mm512_castsi512_si128(a));
}
.\numpy\numpy\distutils\checks\cpu_avx512_knl.c
/*
* 如果定义了DETECT_FEATURES并且使用Intel编译器,
* Intel编译器与GCC和CLANG不同,会暴露所有支持的内置函数,
* 无论是否指定了这些特性的构建选项。
* 因此,我们必须在使用`--cpu-baseline`或通过环境变量`CFLAGS`启用本机/主机选项时测试CPU特性的
* 否则测试将失效并导致启用所有可能的特性。
*/
int main(int argc, char **argv)
{
// 定义一个包含128个整数的数组base,初始化为零
int base[128]={};
// 从命令行参数中加载一个__m512d类型的向量ad
__m512d ad = _mm512_loadu_pd((const __m512d*)argv[argc-1]);
/* ER */
// 将ad转换为__m512i类型的向量a,执行_exp2a23_pd()函数
__m512i a = _mm512_castpd_si512(_mm512_exp2a23_pd(ad));
/* PF */
// 使用掩码_MM_HINT_T1,将a作为地址,预取64位整数散列存储到base中
_mm512_mask_prefetch_i64scatter_pd(base, _mm512_cmpeq_epi64_mask(a, a), a, 1, _MM_HINT_T1);
// 返回base数组的第一个元素
return base[0];
}
.\numpy\numpy\distutils\checks\cpu_avx512_knm.c
/*
* 如果定义了DETECT_FEATURES并且定义了__INTEL_COMPILER,
* 那么英特尔编译器会暴露所有支持的指令集,无论是否指定了这些特性的构建选项。
* 因此,当通过`--cpu-baseline`启用选项本机/主机, 或通过环境变量`CFLAGS` 否则,
* 我们必须测试CPU特性的
*/
int main(int argc, char **argv)
{
__m512i a = _mm512_loadu_si512((const __m512i*)argv[argc-1]);
__m512 b = _mm512_loadu_ps((const __m512*)argv[argc-2]);
/* 4FMAPS */
// 执行4FMAPS指令
b = _mm512_4fmadd_ps(b, b, b, b, b, NULL);
/* 4VNNIW */
// 执行4VNNIW指令
a = _mm512_4dpwssd_epi32(a, a, a, a, a, NULL);
/* VPOPCNTDQ */
// 执行VPOPCNTDQ指令
a = _mm512_popcnt_epi64(a);
// 执行矢量加法
a = _mm512_add_epi32(a, _mm512_castps_si512(b));
// 返回a的低128位整数
return _mm_cvtsi128_si32(_mm512_castsi512_si128(a));
}
.\numpy\numpy\distutils\checks\cpu_avx512_skx.c
/*
* 如果定义了 DETECT_FEATURES 和 __INTEL_COMPILER,
* Intel 编译器会暴露所有支持的内置函数,不管是否已经通过编译选项指定了这些特性。
* 因此,在启用了 `--cpu-baseline` 或者通过环境变量 `CFLAGS` 启用本地/主机选项时,
* 我们必须测试 CPU 特性的
* 否则,测试将无法正常运行,并导致启用所有可能的特性。
*/
int main(int argc, char **argv)
{
// 从命令行参数中加载最后一个参数的值,并将其作为 __m512i 类型的变量 aa
__m512i aa = _mm512_abs_epi32(_mm512_loadu_si512((const __m512i*)argv[argc-1]));
/* VL */
// 从 aa 的第二个 64 位整数中提取一个 __m256i 类型的变量 a,并对其进行绝对值操作
__m256i a = _mm256_abs_epi64(_mm512_extracti64x4_epi64(aa, 1));
/* DQ */
// 将 a 的值广播到一个 __m512i 类型的变量 b 中
__m512i b = _mm512_broadcast_i32x8(a);
/* BW */
// 对 b 中的每个元素执行绝对值操作
b = _mm512_abs_epi16(b);
// 将 b 转换为一个 __m128i 类型,并返回其低 128 位整数作为整数类型返回值
return _mm_cvtsi128_si32(_mm512_castsi512_si128(b));
}
.\numpy\numpy\distutils\checks\cpu_avx512_spr.c
/*
* 如果定义了 DETECT_FEATURES 并且使用 Intel 编译器,
* Intel 编译器会公开所有支持的内置函数,无论是否指定了这些特性的构建选项。
* 因此,当通过 `--cpu-baseline` 或环境变量 `CFLAGS` 启用 native/host 选项时,
* 我们必须测试 CPU 特性的
*/
int main(int argc, char **argv)
{
/* clang 在我们的 spr 代码上存在一个 bug,请参见 gh-23730。 */
// 从命令行参数中加载一个 __m512h 类型的向量 a
__m512h a = _mm512_loadu_ph((void*)argv[argc-1]);
// 计算 a * a + a,并将结果保存在 temp 中
__m512h temp = _mm512_fmadd_ph(a, a, a);
// 将 temp 的结果存回到命令行参数指向的地址中
_mm512_storeu_ph((void*)(argv[argc-1]), temp);
// 返回程序结束状态
return 0;
}
.\numpy\numpy\distutils\checks\cpu_f16c.c
/*
* 如果已定义了DETECT_FEATURES并且定义了__INTEL_COMPILER,
* 那么英特尔编译器将暴露所有支持的指令集,
* 无论是否指定了这些特性的构建选项。
* 因此,当通过`--cpu-baseline`启用了native/host选项或者通过环境变量`CFLAGS`设置了它,
* 我们必须在这些选项被启用时测试CPU特性的
* 否则测试将会失效,并导致启用所有可能的特性。
*/
int main(int argc, char **argv)
{
// 将argv[argc-1]的值加载为一个16位半精度浮点数,转换为4个单精度浮点数
__m128 a = _mm_cvtph_ps(_mm_loadu_si128((const __m128i*)argv[argc-1]));
// 将argv[argc-2]的值加载为一个16位半精度浮点数,转换为8个单精度浮点数
__m256 a8 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*)argv[argc-2]));
// 返回a的单精度浮点数值与a8的单精度浮点数值之和的整数部分
return (int)(_mm_cvtss_f32(a) + _mm_cvtss_f32(_mm256_castps256_ps128(a8)));
}
.\numpy\numpy\distutils\checks\cpu_fma3.c
/*
* 如果定义了DETECT_FEATURES并且定义了__INTEL_COMPILER,则编译器将暴露所有支持的内部函数,
* 无论这些功能的构建选项是否已指定。因此,当通过`--cpu-baseline`启用选项本地/主机
* 或通过环境变量`CFLAGS`启用选项时,我们必须测试CPU特性的
* 并导致启用所有可能的特性。
*/
int main(int argc, char **argv)
{
__m256 a = _mm256_loadu_ps((const float*)argv[argc-1]); // 从内存中加载未对齐的256位单精度浮点数到__m256类型的变量a中
a = _mm256_fmadd_ps(a, a, a); // 对a中的每个元素执行乘法和加法
return (int)_mm_cvtss_f32(_mm256_castps256_ps128(a)); // 将a的高128位数据转换为__m128类型,然后将其转换为float类型返回
}
.\numpy\numpy\distutils\checks\cpu_fma4.c
int main(int argc, char **argv)
{
__m256 a = _mm256_loadu_ps((const float*)argv[argc-1]);
// 使用 AVX 指令加载未对齐的单精度浮点数组,转换为 256 位 AVX 寄存器
a = _mm256_macc_ps(a, a, a);
// 使用 AVX 指令执行乘-累加操作:a = a * a + a
return (int)_mm_cvtss_f32(_mm256_castps256_ps128(a));
// 将 AVX 256 位寄存器的高 128 位转换为单精度浮点数并返回其整数部分
}
.\numpy\numpy\distutils\checks\cpu_neon.c
int main(int argc, char **argv)
{
// passing from untraced pointers to avoid optimizing out any constants
// so we can test against the linker.
// 将传递的指针转换为未跟踪的指针,以避免优化掉任何常量
// 从命令行参数中获取最后一个参数作为浮点数数组的指针
float *src = (float*)argv[argc-1];
// 使用首个浮点数创建四个相同值的 Neon 浮点向量 v1 和 v2
float32x4_t v1 = vdupq_n_f32(src[0]), v2 = vdupq_n_f32(src[1]);
// 计算向量 v1 和 v2 的乘积,并取结果向量的第一个元素作为整数
int ret = (int)vgetq_lane_f32(vmulq_f32(v1, v2), 0);
// 如果是 ARM64 架构,处理双精度浮点数
// 从命令行参数中获取倒数第二个参数作为双精度浮点数数组的指针
double *src2 = (double*)argv[argc-2];
// 使用首个双精度浮点数创建两个相同值的 Neon 双精度向量 vd1 和 vd2
float64x2_t vd1 = vdupq_n_f64(src2[0]), vd2 = vdupq_n_f64(src2[1]);
// 计算双精度向量 vd1 和 vd2 的乘积,并取结果向量的第一个元素作为整数
ret += (int)vgetq_lane_f64(vmulq_f64(vd1, vd2), 0);
// 返回计算结果
return ret;
}
.\numpy\numpy\distutils\checks\cpu_neon_fp16.c
// 如果编译器是 MSC,则包含 Intrinsics 头文件
// 包含 ARM NEON 头文件,用于使用 NEON 指令集
// 主函数入口,接收命令行参数
int main(int argc, char **argv)
{
// 将最后一个命令行参数解释为 short 类型指针,并转换为 short 指针
short *src = (short*)argv[argc-1];
// 使用 NEON 指令将 short 数组转换为 float32x4_t 类型
float32x4_t v_z4 = vcvt_f32_f16((float16x4_t)vld1_s16(src));
// 返回 v_z4 中第一个元素的整数值
return (int)vgetq_lane_f32(v_z4, 0);
}
.\numpy\numpy\distutils\checks\cpu_neon_vfpv4.c
int main(int argc, char **argv) // 主函数
{
float *src = (float*)argv[argc-1]; // 获取参数中的最后一个元素,转换为 float 指针
float32x4_t v1 = vdupq_n_f32(src[0]); // 将 src[0] 的值复制到一个四元素的向量中
float32x4_t v2 = vdupq_n_f32(src[1]); // 将 src[1] 的值复制到一个四元素的向量中
float32x4_t v3 = vdupq_n_f32(src[2]); // 将 src[2] 的值复制到一个四元素的向量中
int ret = (int)vgetq_lane_f32(vfmaq_f32(v1, v2, v3), 0); // 计算 v1 * v2 + v3 的结果并将第一个值转换为整数
double *src2 = (double*)argv[argc-2]; // 如果是 aarch64 架构,则获取倒数第二个参数,转换为 double 指针
float64x2_t vd1 = vdupq_n_f64(src2[0]); // 将 src2[0] 的值复制到一个双元素的向量中
float64x2_t vd2 = vdupq_n_f64(src2[1]); // 将 src2[1] 的值复制到一个双元素的向量中
float64x2_t vd3 = vdupq_n_f64(src2[2]); // 将 src2[2] 的值复制到一个双元素的向量中
ret += (int)vgetq_lane_f64(vfmaq_f64(vd1, vd2, vd3), 0); // 计算 vd1 * vd2 + vd3 的结果并将第一个值转换为整数
return ret; // 返回 ret 的值作为函数结果
}
.\numpy\numpy\distutils\checks\cpu_popcnt.c
/*
* 如果定义了DETECT_FEATURES并且使用了Intel编译器
* 与GCC和CLANG不同,Intel编译器会暴露所有支持的内部函数,
* 无论是否指定了这些特征的构建选项。
* 因此,当通过`--cpu-baseline`启用选项本机/主机或通过环境变量`CFLAGS`启用选项时,
* 我们必须测试CPU特性的
*/
int main(int argc, char **argv)
{
// 确保生成popcnt指令
// 并对汇编代码进行测试
unsigned long long a = *((unsigned long long*)argv[argc-1]);
unsigned int b = *((unsigned int*)argv[argc-2]);
a = _mm_popcnt_u64(a);
b = _mm_popcnt_u32(b);
return (int)a + b;
}
.\numpy\numpy\distutils\checks\cpu_rvv.c
int main(void)
{
size_t vlmax = __riscv_vsetvlmax_e32m1(); // 设置向量最大长度为e32m1,并返回该长度
vuint32m1_t a = __riscv_vmv_v_x_u32m1(0, vlmax); // 将值0移动至vuint32m1_t类型的向量a中
vuint32m1_t b = __riscv_vadd_vv_u32m1(a, a, vlmax); // 将向量a和向量a相加,结果存储在向量b中
return __riscv_vmv_x_s_u32m1_u32(b); // 将向量b转换为u32类型并返回
}
.\numpy\numpy\distutils\checks\cpu_sse.c
/*
* 如果定义了DETECT_FEATURES并且使用了Intel编译器
* 不同于GCC和CLANG,Intel编译器会暴露所有支持的内联函数,
* 无论是否为这些特性指定了构建选项。
* 因此,当启用选项native/host通过`--cpu-baseline`或通过环境变量`CFLAGS`时,
* 我们必须测试CPU特性的宏定义,否则测试将会失败,
* 并导致启用所有可能的特性。
*/
int main(void)
{
__m128 a = _mm_add_ps(_mm_setzero_ps(), _mm_setzero_ps());
return (int)_mm_cvtss_f32(a);
}
.\numpy\numpy\distutils\checks\cpu_sse2.c
/*
* 如果定义了DETECT_FEATURES并且使用Intel编译器,
* 与GCC和CLANG不同,Intel编译器会公开所有支持的内置函数,
* 无论是否为这些特性指定了构建选项。
* 因此,当通过`--cpu-baseline`启用了本地/主机选项或通过环境变量`CFLAGS`设置时,
* 我们必须测试CPU特性的
*/
int main(void)
{
// 创建一个全零的__m128i类型变量a
__m128i a = _mm_add_epi16(_mm_setzero_si128(), _mm_setzero_si128());
// 将__m128i类型变量a的低位128位整数转换为32位整数返回
return _mm_cvtsi128_si32(a);
}
.\numpy\numpy\distutils\checks\cpu_sse3.c
/*
* 如果定义了DETECT_FEATURES并且使用Intel编译器,
* Intel编译器与GCC和CLANG不同,它会暴露所有支持的内置函数,
* 无论是否指定了这些特性的构建选项。
* 因此,我们必须在启用`--cpu-baseline`或通过环境变量`CFLAGS`设置时测试CPU特性的
* 否则测试将失效并导致启用所有可能的特性。
*/
int main(void)
{
// 创建两个全零的单精度浮点数向量,并将它们按元素加和
__m128 a = _mm_hadd_ps(_mm_setzero_ps(), _mm_setzero_ps());
// 将结果向量的第一个单精度浮点数转换为整数并返回
return (int)_mm_cvtss_f32(a);
}
.\numpy\numpy\distutils\checks\cpu_sse41.c
/*
* 如果定义了DETECT_FEATURES和__INTEL_COMPILER,说明正在使用Intel编译器,
* 与GCC和CLANG不同,Intel编译器会暴露所有支持的内部函数,
* 无论是否通过`--cpu-baseline`或环境变量`CFLAGS`启用了这些特性选项。
* 因此,我们必须测试CPU特性的
* 否则测试将会失败,并导致启用所有可能的特性。
*/
int main(void)
{
// 创建一个全0的SSE寄存器变量a,并对其执行向下取整操作
__m128 a = _mm_floor_ps(_mm_setzero_ps());
// 将SSE寄存器a中的值转换为单精度浮点数,并作为整数返回
return (int)_mm_cvtss_f32(a);
}
这段代码主要涉及了对CPU指令集特性的检测和使用SSE指令集进行向下取整和单精度浮点数转换的操作。
.\numpy\numpy\distutils\checks\cpu_sse42.c
/*
* 如果定义了DETECT_FEATURES和__INTEL_COMPILER,表示需要检测CPU特性并且是使用Intel Compiler
* 与GCC和CLANG不同,Intel编译器会暴露所有支持的内置函数,不管是否指定了这些特性的构建选项。
* 因此,当通过`--cpu-baseline`启用了本地/主机选项或者通过环境变量`CFLAGS`启用了这些选项,我们必须测试CPU特性的
* 否则,测试将失败并导致启用所有可能的特性。
*/
int main(void)
{
// 创建四个零的单精度浮点数的SSE寄存器
__m128 a = _mm_hadd_ps(_mm_setzero_ps(), _mm_setzero_ps());
// 将SSE寄存器的值转换为整数返回
return (int)_mm_cvtss_f32(a);
}
.\numpy\numpy\distutils\checks\cpu_ssse3.c
/*
* 如果定义了 DETECT_FEATURES 和 __INTEL_COMPILER,
* Intel 编译器与 GCC 和 CLANG 不同,会暴露所有支持的内置函数,
* 不管是否通过 `--cpu-baseline` 或环境变量 `CFLAGS` 指定了构建选项。
* 因此,我们必须测试 CPU 特性的
* 通过 `--cpu-baseline` 或环境变量 `CFLAGS`,否则测试将会失败并导致启用所有可能的特性。
*/
int main(void)
{
// 创建两个零值的 128 位整数向量,并对其进行 16 位整数的横向加法
__m128i a = _mm_hadd_epi16(_mm_setzero_si128(), _mm_setzero_si128());
// 将结果向量的低 32 位整数转换为整数并返回
return (int)_mm_cvtsi128_si32(a);
}
.\numpy\numpy\distutils\checks\cpu_sve.c
int accumulate(svint64_t a, svint64_t b) {
svbool_t p = svptrue_b64();
return svaddv(p, svmla_z(p, a, a, b));
}
int main(void)
{
svbool_t p = svptrue_b64();
svint64_t a = svdup_s64(1);
svint64_t b = svdup_s64(2);
return accumulate(a, b);
}
.\numpy\numpy\distutils\checks\cpu_vsx.c
int main(void)
{
// 定义一个无符号整数数组,长度为4
unsigned int zout[4];
// 定义并初始化一个包含四个0的整数数组
unsigned int z4[] = {0, 0, 0, 0};
// 使用VSX指令加载z4数组中的数据到一个向量寄存器中
__vector unsigned int v_z4 = vsx_ld(0, z4);
// 使用VSX指令将向量寄存器v_z4的数据存储到zout数组中
vsx_st(v_z4, 0, zout);
// 返回zout数组的第一个元素
return zout[0];
}
.\numpy\numpy\distutils\checks\cpu_vsx2.c
// 定义了一个名为v_uint64x2的类型别名,代表一个包含两个无符号长整型元素的向量
typedef __vector unsigned long long v_uint64x2;
int main(void)
{
// 创建一个v_uint64x2类型的变量z2,并初始化为(0, 0)
v_uint64x2 z2 = (v_uint64x2){0, 0};
// 将z2与自身进行逐元素比较,将比较结果存储回z2中
z2 = (v_uint64x2)vec_cmpeq(z2, z2);
// 提取z2中索引为0的元素,并转换为int类型后返回
return (int)vec_extract(z2, 0);
}
.\numpy\numpy\distutils\checks\cpu_vsx3.c
// 定义一个 4 个无符号整数向量的类型
typedef __vector unsigned int v_uint32x4;
int main(void)
{
// 初始化一个 4 个无符号整数向量都为 0 的向量 z4
v_uint32x4 z4 = (v_uint32x4){0, 0, 0, 0};
// 计算 z4 的绝对差值,结果赋值给 z4
z4 = vec_absd(z4, z4);
// 提取 z4 向量中索引为 0 的值,并转换为整数返回
return (int)vec_extract(z4, 0);
}
.\numpy\numpy\distutils\checks\cpu_vsx4.c
// 定义一个 4 个无符号整数向量类型,用于操作 4 个 32 位整数
typedef __vector unsigned int v_uint32x4;
// 主函数入口
int main(void)
{
// 初始化一个包含 2, 4, 8, 16 的向量 v1
v_uint32x4 v1 = (v_uint32x4){2, 4, 8, 16};
// 初始化一个包含 2, 2, 2, 2 的向量 v2
v_uint32x4 v2 = (v_uint32x4){2, 2, 2, 2};
// 对 v1 和 v2 执行向量模运算,将结果存入 v3
v_uint32x4 v3 = vec_mod(v1, v2);
// 提取 v3 的有效位并转换为整数返回
return (int)vec_extractm(v3);
}
.\numpy\numpy\distutils\checks\cpu_vx.c
int main(int argc, char **argv)
{
// 使用 vec_xl 函数加载 argv 指向的内存中的双精度浮点数向量到 x 中
__vector double x = vec_abs(vec_xl(argc, (double*)argv));
// 使用 vec_load_len 函数加载 argv 指向的内存中的长度为 argc 的双精度浮点数向量到 y 中
__vector double y = vec_load_len((double*)argv, (unsigned int)argc);
// 将 x 向量取整并向上取整后与 y 向量向下取整相加,并将结果赋给 x
x = vec_round(vec_ceil(x) + vec_floor(y));
// 使用 vec_cmpge 函数比较 x 和 y 中的元素,将比较结果存储在 m 向量中
__vector bool long long m = vec_cmpge(x, y);
// 使用 vec_sel 函数根据 m 向量的值选择 x 或 y 中的元素,并将结果转换为有符号的 long long 类型,存储在 i 中
__vector long long i = vec_signed(vec_sel(x, y, m));
// 返回 i 向量中第一个元素的整数值作为函数的返回值
return (int)vec_extract(i, 0);
}
.\numpy\numpy\distutils\checks\cpu_vxe.c
int main(int argc, char **argv)
{
// 使用向量指令库中的函数,对参数进行处理
__vector float x = vec_nabs(vec_xl(argc, (float*)argv));
// 加载参数指定的数据到向量 x
__vector float y = vec_load_len((float*)argv, (unsigned int)argc);
// 对向量 x 和 y 中的元素进行取整运算,并将结果存入 x
x = vec_round(vec_ceil(x) + vec_floor(y));
// 比较向量 x 和 y 中的元素,生成布尔向量 m,指示 x 中大于等于 y 的元素位置
__vector bool int m = vec_cmpge(x, y);
// 根据布尔向量 m,在 x 和 y 中选择元素,将选择结果存回 x
x = vec_sel(x, y, m);
// 需要测试是否存在内置函数 "vflls",因为 vec_doublee 映射到错误的内置函数 "vfll"
// 参考 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100871
// 使用 GNU 编译器的特定函数对向量 x 进行操作,将结果存入长长整型向量 i
__vector long long i = vec_signed(__builtin_s390_vflls(x));
// 否则使用默认的函数 vec_doublee 对向量 x 进行操作,将结果存入长长整型向量 i
__vector long long i = vec_signed(vec_doublee(x));
// 返回向量 i 中第一个元素的整数值
return (int)vec_extract(i, 0);
}
.\numpy\numpy\distutils\checks\cpu_vxe2.c
int main(int argc, char **argv)
{
// 声明一个整型变量 val
int val;
// 声明一个包含8个有符号短整型元素的 SIMD 向量 large,初始化为字符 'a', 'b', 'c', 'a', 'g', 'h', 'g', 'o'
__vector signed short large = { 'a', 'b', 'c', 'a', 'g', 'h', 'g', 'o' };
// 声明一个包含4个有符号短整型元素的 SIMD 向量 search,初始化为字符 'g', 'h', 'g', 'o'
__vector signed short search = { 'g', 'h', 'g', 'o' };
// 声明一个包含8个无符号字符元素的 SIMD 向量 len,初始化为全零
__vector unsigned char len = { 0 };
// 调用 vec_search_string_cc 函数,在 large 向量中搜索 search 向量,将结果存储到 res 向量中,val 用于存储返回值
__vector unsigned char res = vec_search_string_cc(large, search, len, &val);
// 调用 vec_xl 函数从 argv 数组中加载数据到 SIMD 向量 x 中
__vector float x = vec_xl(argc, (float*)argv);
// 调用 vec_signed 函数将 x 向量中的元素转换为有符号整型,并存储到 i 向量中
__vector int i = vec_signed(x);
// 对 i 向量中的元素执行向左和向右移位操作
i = vec_srdb(vec_sldb(i, i, 2), i, 3);
// 将 res 向量中第二个元素的值加到 val 上
val += (int)vec_extract(res, 1);
// 将 i 向量中第一个元素的值加到 val 上
val += vec_extract(i, 0);
// 返回 val 作为主函数的返回值
return val;
}
.\numpy\numpy\distutils\checks\cpu_xop.c
int main(void)
{
__m128i a = _mm_comge_epu32(_mm_setzero_si128(), _mm_setzero_si128());
return _mm_cvtsi128_si32(a);
}
.\numpy\numpy\distutils\checks\extra_avx512bw_mask.c
/**
* 测试 AVX-512 BW 掩码操作的效果,具体包括:
* - MSVC 自 vs2019 起支持此功能,详见
* https://developercommunity.visualstudio.com/content/problem/518298/missing-avx512bw-mask-intrinsics.html
* - Clang >= v8.0
* - GCC >= v7.1
*/
int main(void)
{
// 创建一个所有位都置位的掩码 m64,用于比较两个 64 字节长的整数是否相等
__mmask64 m64 = _mm512_cmpeq_epi8_mask(_mm512_set1_epi8((char)1), _mm512_set1_epi8((char)1));
// 对 m64 进行按位或运算,结果仍存入 m64 中
m64 = _kor_mask64(m64, m64);
// 对 m64 进行按位异或运算,结果存入 m64 中
m64 = _kxor_mask64(m64, m64);
// 将 m64 转换为 u64 类型的掩码,再将其转换回掩码类型
m64 = _cvtu64_mask64(_cvtmask64_u64(m64));
// 对 m64 进行 unpack 操作,解压缩成更高位的掩码
m64 = _mm512_kunpackd(m64, m64);
// 将 m64 转换为更宽的掩码类型,并存回 m64
m64 = (__mmask64)_mm512_kunpackw((__mmask32)m64, (__mmask32)m64);
// 将最终的掩码 m64 转换为 u64 类型,并返回作为整数
return (int)_cvtmask64_u64(m64);
}
.\numpy\numpy\distutils\checks\extra_avx512dq_mask.c
/**
* Test DQ mask operations due to:
* - MSVC has supported it since vs2019 see,
* https://developercommunity.visualstudio.com/content/problem/518298/missing-avx512bw-mask-intrinsics.html
* - Clang >= v8.0
* - GCC >= v7.1
*/
int main(void)
{
// 创建一个所有位都置为 1 的 8 位掩码 m8,用来表示相等比较结果
__mmask8 m8 = _mm512_cmpeq_epi64_mask(_mm512_set1_epi64(1), _mm512_set1_epi64(1));
// 对 m8 应用逻辑或运算,结果仍然为 m8,这一步没有变化
m8 = _kor_mask8(m8, m8);
// 对 m8 应用逻辑异或运算,结果为所有位都清零的掩码
m8 = _kxor_mask8(m8, m8);
// 将 m8 转换为一个 32 位无符号整数掩码,再转回 8 位掩码,但此时所有位都为 0
m8 = _cvtu32_mask8(_cvtmask8_u32(m8));
// 将 m8 转换为 32 位无符号整数掩码,再转换为整数返回
return (int)_cvtmask8_u32(m8);
}
.\numpy\numpy\distutils\checks\extra_avx512f_reduce.c
/**
* The following intrinsics don't have direct native support but compilers
* tend to emulate them.
* They're usually supported by gcc >= 7.1, clang >= 4 and icc >= 19
*/
int main(void)
{
// 创建一个包含所有元素为 1.0 的 __m512 类型的向量 one_ps
__m512 one_ps = _mm512_set1_ps(1.0f);
// 创建一个包含所有元素为 1.0 的 __m512d 类型的向量 one_pd
__m512d one_pd = _mm512_set1_pd(1.0);
// 创建一个包含所有元素为 1 的 __m512i 类型的整数向量 one_i64
__m512i one_i64 = _mm512_set1_epi64(1);
// 使用 _mm512_reduce_add_ps 计算 one_ps 向量中所有元素的和
float sum_ps = _mm512_reduce_add_ps(one_ps);
// 使用 _mm512_reduce_add_pd 计算 one_pd 向量中所有元素的和
double sum_pd = _mm512_reduce_add_pd(one_pd);
// 将 one_i64 向量中所有元素视为 64 位有符号整数,计算其和
int sum_int = (int)_mm512_reduce_add_epi64(one_i64);
// 将 one_i64 向量中所有元素视为 32 位有符号整数,计算其和
sum_int += (int)_mm512_reduce_add_epi32(one_i64);
// 使用 _mm512_reduce_mul_ps 计算 one_ps 向量中所有元素的乘积,并加到 sum_ps
sum_ps += _mm512_reduce_mul_ps(one_ps);
// 使用 _mm512_reduce_mul_pd 计算 one_pd 向量中所有元素的乘积,并加到 sum_pd
sum_pd += _mm512_reduce_mul_pd(one_pd);
// 将 one_i64 向量中所有元素视为 64 位有符号整数,计算其乘积,并加到 sum_int
sum_int += (int)_mm512_reduce_mul_epi64(one_i64);
// 将 one_i64 向量中所有元素视为 32 位有符号整数,计算其乘积,并加到 sum_int
sum_int += (int)_mm512_reduce_mul_epi32(one_i64);
// 使用 _mm512_reduce_min_ps 找到 one_ps 向量中所有元素的最小值,并加到 sum_ps
sum_ps += _mm512_reduce_min_ps(one_ps);
// 使用 _mm512_reduce_min_pd 找到 one_pd 向量中所有元素的最小值,并加到 sum_pd
sum_pd += _mm512_reduce_min_pd(one_pd);
// 将 one_i64 向量中所有元素视为 32 位有符号整数,找到其最小值,并加到 sum_int
sum_int += (int)_mm512_reduce_min_epi32(one_i64);
// 将 one_i64 向量中所有元素视为 32 位无符号整数,找到其最小值,并加到 sum_int
sum_int += (int)_mm512_reduce_min_epu32(one_i64);
// 将 one_i64 向量中所有元素视为 64 位有符号整数,找到其最小值,并加到 sum_int
sum_int += (int)_mm512_reduce_min_epi64(one_i64);
// 使用 _mm512_reduce_max_ps 找到 one_ps 向量中所有元素的最大值,并加到 sum_ps
sum_ps += _mm512_reduce_max_ps(one_ps);
// 使用 _mm512_reduce_max_pd 找到 one_pd 向量中所有元素的最大值,并加到 sum_pd
sum_pd += _mm512_reduce_max_pd(one_pd);
// 将 one_i64 向量中所有元素视为 32 位有符号整数,找到其最大值,并加到 sum_int
sum_int += (int)_mm512_reduce_max_epi32(one_i64);
// 将 one_i64 向量中所有元素视为 32 位无符号整数,找到其最大值,并加到 sum_int
sum_int += (int)_mm512_reduce_max_epu32(one_i64);
// 将 one_i64 向量中所有元素视为 64 位有符号整数,找到其最大值,并加到 sum_int
sum_int += (int)_mm512_reduce_max_epi64(one_i64);
// 将 one_i64 向量中所有元素视为 32 位有符号整数,进行按位与操作,并加到 sum_int
sum_int += (int)_mm512_reduce_and_epi32(one_i64);
// 将 one_i64 向量中所有元素视为 64 位有符号整数,进行按位与操作,并加到 sum_int
sum_int += (int)_mm512_reduce_and_epi64(one_i64);
// 将 one_i64 向量中所有元素视为 32 位有符号整数,进行按位或操作,并加到 sum_int
sum_int += (int)_mm512_reduce_or_epi32(one_i64);
// 将 one_i64 向量中所有元素视为 64 位有符号整数,进行按位或操作,并加到 sum_int
sum_int += (int)_mm512_reduce_or_epi64(one_i64);
// 返回 sum_ps、sum_pd 和 sum_int 的整数和作为函数的返回值
return (int)sum_ps + (int)sum_pd + sum_int;
}
.\numpy\numpy\distutils\checks\extra_vsx3_half_double.c
/**
* 主函数,程序入口点
*/
int main(void)
{
// 定义并初始化一个无符号短整型变量 bits,赋值为十六进制数 0xFF (255)
unsigned short bits = 0xFF;
// 定义一个双精度浮点数变量 f
double f;
// 内联汇编语句:将 bits 的低位 16 位转换为双精度浮点数 f
__asm__ __volatile__("xscvhpdp %x0,%x1" : "=wa"(f) : "wa"(bits));
// 内联汇编语句:将双精度浮点数 f 转换为 bits 的低位 16 位
__asm__ __volatile__ ("xscvdphp %x0,%x1" : "=wa" (bits) : "wa" (f));
// 返回 bits 变量的值作为函数的返回值
return bits;
}
.\numpy\numpy\distutils\checks\extra_vsx4_mma.c
typedef __vector float fv4sf_t; // 定义 4 个单精度浮点数向量类型
typedef __vector unsigned char vec_t; // 定义无符号字符向量类型
int main(void)
{
__vector_quad acc0; // 定义一个四元素向量类型 acc0
float a[4] = {0,1,2,3}; // 定义包含 4 个浮点数的数组 a
float b[4] = {0,1,2,3}; // 定义包含 4 个浮点数的数组 b
vec_t *va = (vec_t *) a; // 将数组 a 转换为 vec_t 类型的指针 va
vec_t *vb = (vec_t *) b; // 将数组 b 转换为 vec_t 类型的指针 vb
__builtin_mma_xvf32ger(&acc0, va[0], vb[0]); // 使用 MMA 指令进行向量乘法并存储到 acc0 中
fv4sf_t result[4]; // 定义包含 4 个 fv4sf_t 类型元素的数组 result
__builtin_mma_disassemble_acc((void *)result, &acc0); // 将 acc0 的内容解析为 result 数组
fv4sf_t c0 = result[0]; // 获取 result 数组中的第一个元素赋值给 c0
return (int)((float*)&c0)[0]; // 将 c0 转换为 float* 类型再转换为 int 类型并返回第一个元素
}
.\numpy\numpy\distutils\checks\extra_vsx_asm.c
/**
* Testing ASM VSX register number fixer '%x<n>'
*
* old versions of CLANG doesn't support %x<n> in the inline asm template
* which fixes register number when using any of the register constraints wa, wd, wf.
*
* xref:
* - https://bugs.llvm.org/show_bug.cgi?id=31837
* - https://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html
*/
#ifndef __VSX__
#error "VSX is not supported"
#endif
#include <altivec.h>
#if (defined(__GNUC__) && !defined(vec_xl)) || (defined(__clang__) && !defined(__IBMC__))
// Define VSX load and store macros based on compiler and platform
#define vsx_ld vec_vsx_ld
#define vsx_st vec_vsx_st
#else
#define vsx_ld vec_xl
#define vsx_st vec_xst
#endif
int main(void)
{
float z4[] = {0, 0, 0, 0};
signed int zout[] = {0, 0, 0, 0};
// Load a vector of floats from z4 array using VSX instruction
__vector float vz4 = vsx_ld(0, z4);
// Load a vector of signed ints from zout array using VSX instruction
__vector signed int asm_ret = vsx_ld(0, zout);
// Inline assembly to convert vector of signed ints to vector of floats
__asm__ ("xvcvspsxws %x0,%x1" : "=wa" (vz4) : "wa" (asm_ret));
// Store modified vector back to zout array using VSX instruction
vsx_st(asm_ret, 0, zout);
// Return the first element of zout array
return zout[0];
}
.\numpy\numpy\distutils\checks\test_flags.c
int test_flags;
.\numpy\numpy\distutils\command\autodist.py
"""This module implements additional tests ala autoconf which can be useful.
"""
import textwrap
def check_inline(cmd):
"""Return the inline identifier (may be empty)."""
cmd._check_compiler()
body = textwrap.dedent("""
#ifndef __cplusplus
static %(inline)s int static_func (void)
{
return 0;
}
%(inline)s int nostatic_func (void)
{
return 0;
}
#endif""")
for kw in ['inline', '__inline__', '__inline']:
st = cmd.try_compile(body % {'inline': kw}, None, None)
if st:
return kw
return ''
def check_restrict(cmd):
"""Return the restrict identifier (may be empty)."""
cmd._check_compiler()
body = textwrap.dedent("""
static int static_func (char * %(restrict)s a)
{
return 0;
}
""")
for kw in ['restrict', '__restrict__', '__restrict']:
st = cmd.try_compile(body % {'restrict': kw}, None, None)
if st:
return kw
return ''
def check_compiler_gcc(cmd):
"""Check if the compiler is GCC."""
cmd._check_compiler()
body = textwrap.dedent("""
int
main()
{
#if (! defined __GNUC__)
#error gcc required
#endif
return 0;
}
""")
return cmd.try_compile(body, None, None)
def check_gcc_version_at_least(cmd, major, minor=0, patchlevel=0):
"""
Check that the gcc version is at least the specified version."""
cmd._check_compiler()
version = '.'.join([str(major), str(minor), str(patchlevel)])
body = textwrap.dedent("""
int
main()
{
#if (! defined __GNUC__) || (__GNUC__ < %(major)d) || \\
(__GNUC_MINOR__ < %(minor)d) || \\
(__GNUC_PATCHLEVEL__ < %(patchlevel)d)
#error gcc >= %(version)s required
#endif
return 0;
}
""")
kw = {'version': version, 'major': major, 'minor': minor,
'patchlevel': patchlevel}
return cmd.try_compile(body % kw, None, None)
def check_gcc_function_attribute(cmd, attribute, name):
"""Return True if the given function attribute is supported."""
cmd._check_compiler()
body = textwrap.dedent("""
#pragma GCC diagnostic error "-Wattributes"
#pragma clang diagnostic error "-Wattributes"
int %s %s(void* unused)
{
return 0;
}
int
main()
{
return 0;
}
""") % (attribute, name)
return cmd.try_compile(body, None, None) != 0
def check_gcc_function_attribute_with_intrinsics(cmd, attribute, name, code,
include):
"""Return True if the given function attribute is supported with
intrinsics."""
cmd._check_compiler()
body = textwrap.dedent("""
#include<%s> # 包含指定的头文件
int %s %s(void) # 定义名为name的函数,返回类型为attribute
{
%s; # 函数体内的代码
return 0; # 返回0
}
int # 定义整型函数
main() # 主函数
{
return 0; # 返回0
}
""") % (include, attribute, name, code)
return cmd.try_compile(body, None, None) != 0
def check_gcc_variable_attribute(cmd, attribute):
"""Return True if the given variable attribute is supported."""
cmd._check_compiler()
body = textwrap.dedent("""
#pragma GCC diagnostic error "-Wattributes"
#pragma clang diagnostic error "-Wattributes"
int %s foo;
int
main()
{
return 0;
}
""") % (attribute, )
return cmd.try_compile(body, None, None) != 0
.\numpy\numpy\distutils\command\bdist_rpm.py
import os
import sys
if 'setuptools' in sys.modules:
from setuptools.command.bdist_rpm import bdist_rpm as old_bdist_rpm
else:
from distutils.command.bdist_rpm import bdist_rpm as old_bdist_rpm
class bdist_rpm(old_bdist_rpm):
def _make_spec_file(self):
spec_file = old_bdist_rpm._make_spec_file(self)
setup_py = os.path.basename(sys.argv[0])
if setup_py == 'setup.py':
return spec_file
new_spec_file = []
for line in spec_file:
line = line.replace('setup.py', setup_py)
new_spec_file.append(line)
return new_spec_file
.\numpy\numpy\distutils\command\build.py
import os
import sys
from distutils.command.build import build as old_build
from distutils.util import get_platform
from numpy.distutils.command.config_compiler import show_fortran_compilers
class build(old_build):
sub_commands = [('config_cc', lambda *args: True),
('config_fc', lambda *args: True),
('build_src', old_build.has_ext_modules),
] + old_build.sub_commands
user_options = old_build.user_options + [
('fcompiler=', None,
"specify the Fortran compiler type"),
('warn-error', None,
"turn all warnings into errors (-Werror)"),
('cpu-baseline=', None,
"specify a list of enabled baseline CPU optimizations"),
('cpu-dispatch=', None,
"specify a list of dispatched CPU optimizations"),
('disable-optimization', None,
"disable CPU optimized code(dispatch,simd,fast...)"),
('simd-test=', None,
"specify a list of CPU optimizations to be tested against NumPy SIMD interface"),
]
help_options = old_build.help_options + [
('help-fcompiler', None, "list available Fortran compilers",
show_fortran_compilers),
]
def initialize_options(self):
old_build.initialize_options(self)
self.fcompiler = None
self.warn_error = False
self.cpu_baseline = "min"
self.cpu_dispatch = "max -xop -fma4"
self.disable_optimization = False
"""
the '_simd' module is a very large. Adding more dispatched features
will increase binary size and compile time. By default we minimize
the targeted features to those most commonly used by the NumPy SIMD interface(NPYV),
NOTE: any specified features will be ignored if they're:
- part of the baseline(--cpu-baseline)
- not part of dispatch-able features(--cpu-dispatch)
- not supported by compiler or platform
"""
self.simd_test = "BASELINE SSE2 SSE42 XOP FMA4 (FMA3 AVX2) AVX512F " \
"AVX512_SKX VSX VSX2 VSX3 VSX4 NEON ASIMD VX VXE VXE2"
def finalize_options(self):
build_scripts = self.build_scripts
old_build.finalize_options(self)
plat_specifier = ".{}-{}.{}".format(get_platform(), *sys.version_info[:2])
if build_scripts is None:
self.build_scripts = os.path.join(self.build_base,
'scripts' + plat_specifier)
def run(self):
old_build.run(self)
.\numpy\numpy\distutils\command\build_clib.py
import os
from glob import glob
import shutil
from distutils.command.build_clib import build_clib as old_build_clib
from distutils.errors import DistutilsSetupError, DistutilsError, \
DistutilsFileError
from numpy.distutils import log
from distutils.dep_util import newer_group
from numpy.distutils.misc_util import (
filter_sources, get_lib_source_files, get_numpy_include_dirs,
has_cxx_sources, has_f_sources, is_sequence
)
from numpy.distutils.ccompiler_opt import new_ccompiler_opt
_l = old_build_clib.user_options
for _i in range(len(_l)):
if _l[_i][0] in ['build-clib', 'build-temp']:
_l[_i] = (_l[_i][0] + '=',) + _l[_i][1:]
class build_clib(old_build_clib):
description = "build C/C++/F libraries used by Python extensions"
user_options = old_build_clib.user_options + [
('fcompiler=', None,
"specify the Fortran compiler type"),
('inplace', 'i', 'Build in-place'),
('parallel=', 'j',
"number of parallel jobs"),
('warn-error', None,
"turn all warnings into errors (-Werror)"),
('cpu-baseline=', None,
"specify a list of enabled baseline CPU optimizations"),
('cpu-dispatch=', None,
"specify a list of dispatched CPU optimizations"),
('disable-optimization', None,
"disable CPU optimized code(dispatch,simd,fast...)"),
]
boolean_options = old_build_clib.boolean_options + \
['inplace', 'warn-error', 'disable-optimization']
def initialize_options(self):
old_build_clib.initialize_options(self)
self.fcompiler = None
self.inplace = 0
self.parallel = None
self.warn_error = None
self.cpu_baseline = None
self.cpu_dispatch = None
self.disable_optimization = None
def finalize_options(self):
if self.parallel:
try:
self.parallel = int(self.parallel)
except ValueError as e:
raise ValueError("--parallel/-j argument must be an integer") from e
old_build_clib.finalize_options(self)
self.set_undefined_options('build',
('parallel', 'parallel'),
('warn_error', 'warn_error'),
('cpu_baseline', 'cpu_baseline'),
('cpu_dispatch', 'cpu_dispatch'),
('disable_optimization', 'disable_optimization')
)
def have_f_sources(self):
for (lib_name, build_info) in self.libraries:
if has_f_sources(build_info.get('sources', [])):
return True
return False
def have_cxx_sources(self):
for (lib_name, build_info) in self.libraries:
if has_cxx_sources(build_info.get('sources', [])):
return True
return False
def get_source_files(self):
self.check_library_list(self.libraries)
filenames = []
for lib in self.libraries:
filenames.extend(get_lib_source_files(lib))
return filenames
def build_libraries(self, libraries):
for (lib_name, build_info) in libraries:
self.build_a_library(build_info, lib_name, libraries)
def assemble_flags(self, in_flags):
""" 从标志列表中组装标志
Parameters
----------
in_flags : None or sequence
None 对应空列表。序列元素可以是字符串,也可以是返回字符串列表的可调用对象。可调用对象以 `self` 作为单个参数。
Returns
-------
out_flags : list
"""
if in_flags is None:
return []
out_flags = []
for in_flag in in_flags:
if callable(in_flag):
out_flags += in_flag(self)
else:
out_flags.append(in_flag)
return out_flags
.\numpy\numpy\distutils\command\build_ext.py
"""
# 导入所需要的模块
import os # 导入操作系统模块
import subprocess # 导入子进程管理模块
from glob import glob # 从 glob 模块中导入 glob 函数
from distutils.dep_util import newer_group # 从 distutils.dep_util 模块中导入 newer_group 函数
from distutils.command.build_ext import build_ext as old_build_ext # 从 distutils.command.build_ext 模块中导入 build_ext 类
from distutils.errors import DistutilsFileError, DistutilsSetupError, DistutilsError # 从 distutils.errors 模块中导入错误类
from distutils.file_util import copy_file # 从 distutils.file_util 模块中导入 copy_file 函数
from numpy.distutils import log # 从 numpy.distutils 模块中导入 log 模块
from numpy.distutils.exec_command import filepath_from_subprocess_output # 从 numpy.distutils.exec_command 模块中导入 filepath_from_subprocess_output 函数
from numpy.distutils.system_info import combine_paths # 从 numpy.distutils.system_info 模块中导入 combine_paths 函数
from numpy.distutils.misc_util import ( # 从 numpy.distutils.misc_util 模块中导入多个函数
filter_sources, get_ext_source_files, get_numpy_include_dirs,
has_cxx_sources, has_f_sources, is_sequence
)
from numpy.distutils.command.config_compiler import show_fortran_compilers # 从 numpy.distutils.command.config_compiler 模块中导入 show_fortran_compilers 函数
from numpy.distutils.ccompiler_opt import new_ccompiler_opt, CCompilerOpt # 从 numpy.distutils.ccompiler_opt 模块中导入 new_ccompiler_opt 函数和 CCompilerOpt 类
# 自定义 build_ext 类,继承自 old_build_ext 类
class build_ext (old_build_ext):
# 描述信息
description = "build C/C++/F extensions (compile/link to build directory)"
# 用户选项
user_options = old_build_ext.user_options + [ # 继承父类的用户选项
('fcompiler=', None, # 指定 Fortran 编译器类型
"specify the Fortran compiler type"),
('parallel=', 'j', # 指定并行作业数
"number of parallel jobs"),
('warn-error', None, # 将所有警告转换为错误
"turn all warnings into errors (-Werror)"),
('cpu-baseline=', None, # 指定启用的基线 CPU 优化列表
"specify a list of enabled baseline CPU optimizations"),
('cpu-dispatch=', None, # 指定调度 CPU 优化列表
"specify a list of dispatched CPU optimizations"),
('disable-optimization', None, # 禁用 CPU 优化代码(调度,simd,fast...)
"disable CPU optimized code(dispatch,simd,fast...)"),
('simd-test=', None, # 指定要针对 NumPy SIMD 接口测试的 CPU 优化列表
"specify a list of CPU optimizations to be tested against NumPy SIMD interface"),
]
# 帮助选项
help_options = old_build_ext.help_options + [ # 继承父类的帮助选项
('help-fcompiler', None, "list available Fortran compilers", # 列出可用的 Fortran 编译器
show_fortran_compilers),
]
# 布尔选项
boolean_options = old_build_ext.boolean_options + ['warn-error', 'disable-optimization'] # 继承父类的布尔选项
# 初始化选项
def initialize_options(self):
old_build_ext.initialize_options(self) # 调用父类的初始化选项方法
self.fcompiler = None # Fortran 编译器类型
self.parallel = None # 并行作业数
self.warn_error = None # 所有警告转换为错误
self.cpu_baseline = None # 启用的基线 CPU 优化列表
self.cpu_dispatch = None # 调度 CPU 优化列表
self.disable_optimization = None # 禁用 CPU 优化代码
self.simd_test = None # 针对 NumPy SIMD 接口测试的 CPU 优化列表
# 确定选项的最终值
def finalize_options(self):
# 如果使用了并行选项,则将其转换为整数类型,否则抛出数值错误异常
if self.parallel:
try:
self.parallel = int(self.parallel)
except ValueError as e:
raise ValueError("--parallel/-j argument must be an integer") from e
# 确保 self.include_dirs 和 self.distribution.include_dirs 引用的是同一个列表对象
# finalize_options 将修改 self.include_dirs,但实际构建过程中使用的是 self.distribution.include_dirs
# 在没有指定路径的情况下,self.include_dirs 为 None
# include 路径将按顺序传递给编译器: numpy 路径,--include-dirs 路径,Python include 路径
if isinstance(self.include_dirs, str):
self.include_dirs = self.include_dirs.split(os.pathsep)
incl_dirs = self.include_dirs or []
if self.distribution.include_dirs is None:
self.distribution.include_dirs = []
self.include_dirs = self.distribution.include_dirs
self.include_dirs.extend(incl_dirs)
# 调用旧的 build_ext.finalize_options 方法
old_build_ext.finalize_options(self)
# 设置未定义的选项
self.set_undefined_options('build',
('parallel', 'parallel'),
('warn_error', 'warn_error'),
('cpu_baseline', 'cpu_baseline'),
('cpu_dispatch', 'cpu_dispatch'),
('disable_optimization', 'disable_optimization'),
('simd_test', 'simd_test')
)
# 更新 CCompilerOpt.conf_target_groups["simd_test"] 为 self.simd_test
CCompilerOpt.conf_target_groups["simd_test"] = self.simd_test
# 处理 swig 源文件,这里不做任何操作,swig 源文件已经在 build_src 命令中处理过了
def swig_sources(self, sources, extensions=None):
# 并不做任何操作。Swig 源文件已经在 build_src 命令中处理过了。
return sources
# 向 mingwex_sym 添加一个虚拟符号
def _add_dummy_mingwex_sym(self, c_sources):
# 获取 "build_src" 命令的 build_src 属性
build_src = self.get_finalized_command("build_src").build_src
# 获取 "build_clib" 命令的 build_clib 属性
build_clib = self.get_finalized_command("build_clib").build_clib
# 编译 gfortran_vs2003_hack.c 文件,并输出到 build_temp 目录
objects = self.compiler.compile([os.path.join(build_src, "gfortran_vs2003_hack.c")],
output_dir=self.build_temp)
# 创建名为 "_gfortran_workaround" 的静态库,输出到 build_clib 目录,如果启用了 debug 模式则输出调试信息
self.compiler.create_static_lib(
objects, "_gfortran_workaround", output_dir=build_clib, debug=self.debug)
# 处理不可链接的目标文件
def _process_unlinkable_fobjects(self, objects, libraries,
fcompiler, library_dirs,
unlinkable_fobjects):
# 将参数转换为列表
libraries = list(libraries)
objects = list(objects)
unlinkable_fobjects = list(unlinkable_fobjects)
# 将可能的假静态库扩展为对象文件;
# 确保迭代列表的副本,因为遇到“假”库时会被删除
for lib in libraries[:]:
for libdir in library_dirs:
fake_lib = os.path.join(libdir, lib + '.fobjects')
if os.path.isfile(fake_lib):
# 替换假静态库
libraries.remove(lib)
with open(fake_lib) as f:
# 将假静态库的内容添加到不可链接的对象列表中
unlinkable_fobjects.extend(f.read().splitlines())
# 扩展C对象
c_lib = os.path.join(libdir, lib + '.cobjects')
with open(c_lib) as f:
# 将C对象的内容添加到对象列表中
objects.extend(f.read().splitlines())
# 用链接的对象包装不可链接的对象
if unlinkable_fobjects:
# 将不可链接的对象转换为绝对路径
fobjects = [os.path.abspath(obj) for obj in unlinkable_fobjects]
# 使用 fcompiler 的方法包装不可链接的对象
wrapped = fcompiler.wrap_unlinkable_objects(
fobjects, output_dir=self.build_temp,
extra_dll_dir=self.extra_dll_dir)
# 将结果追加到对象列表中
objects.extend(wrapped)
# 返回更新后的对象列表和库列表
return objects, libraries
# 检查是否有指定的编译器和库文件目录,将 g77 编译的静态库文件转换为 MSVC 可用格式
def _libs_with_msvc_and_fortran(self, fcompiler, c_libraries, c_library_dirs):
# 如果没有指定编译器,则返回
if fcompiler is None:
return
# 遍历传入的 C 库文件列表
for libname in c_libraries:
# 若库文件名以 'msvc' 开头,则跳过
if libname.startswith('msvc'):
continue
fileexists = False
# 遍历传入的 C 库文件目录
for libdir in c_library_dirs or []:
# 拼接库文件路径
libfile = os.path.join(libdir, '%s.lib' % (libname))
# 如果文件存在,标记为存在,跳出循环
if os.path.isfile(libfile):
fileexists = True
break
if fileexists:
continue
# 将 g77 编译的静态库文件转换为 MSVC 可用格式
fileexists = False
for libdir in c_library_dirs:
libfile = os.path.join(libdir, 'lib%s.a' % (libname))
if os.path.isfile(libfile):
# 将 libname.a 文件复制为 name.lib,以便 MSVC 链接器可以找到它
libfile2 = os.path.join(self.build_temp, libname + '.lib')
copy_file(libfile, libfile2)
if self.build_temp not in c_library_dirs:
c_library_dirs.append(self.build_temp)
fileexists = True
break
if fileexists:
continue
# 若找不到库文件,记录警告信息
log.warn('could not find library %r in directories %s' % (libname, c_library_dirs))
# 使用 MSVC 编译器时,始终使用系统链接器
f_lib_dirs = []
for dir in fcompiler.library_dirs:
# 编译在 Cygwin 环境,但使用普通 Windows Python 时,纠正路径
if dir.startswith('/usr/lib'):
try:
dir = subprocess.check_output(['cygpath', '-w', dir])
except (OSError, subprocess.CalledProcessError):
pass
else:
dir = filepath_from_subprocess_output(dir)
f_lib_dirs.append(dir)
c_library_dirs.extend(f_lib_dirs)
# 将 g77 编译的静态库文件转换为 MSVC 可用格式
for lib in fcompiler.libraries:
if not lib.startswith('msvc'):
c_libraries.append(lib)
p = combine_paths(f_lib_dirs, 'lib' + lib + '.a')
if p:
dst_name = os.path.join(self.build_temp, lib + '.lib')
if not os.path.isfile(dst_name):
copy_file(p[0], dst_name)
if self.build_temp not in c_library_dirs:
c_library_dirs.append(self.build_temp)
# 获取源文件列表
def get_source_files(self):
self.check_extensions_list(self.extensions)
filenames = []
for ext in self.extensions:
filenames.extend(get_ext_source_files(ext))
return filenames
# 获取模块的输出文件列表
def get_outputs(self):
# 检查扩展列表中是否包含有效扩展
self.check_extensions_list(self.extensions)
# 初始化输出文件列表
outputs = []
# 遍历每个扩展
for ext in self.extensions:
# 如果没有源文件,则跳过
if not ext.sources:
continue
# 获取扩展的完整名称
fullname = self.get_ext_fullname(ext.name)
# 将输出文件的完整路径添加到输出列表中
outputs.append(os.path.join(self.build_lib,
self.get_ext_filename(fullname)))
# 返回输出文件列表
return outputs
.\numpy\numpy\distutils\command\build_py.py
from distutils.command.build_py import build_py as old_build_py
from numpy.distutils.misc_util import is_string
class build_py(old_build_py):
def run(self):
build_src = self.get_finalized_command('build_src')
if build_src.py_modules_dict and self.packages is None:
self.packages = list(build_src.py_modules_dict.keys ())
old_build_py.run(self)
def find_package_modules(self, package, package_dir):
modules = old_build_py.find_package_modules(self, package, package_dir)
build_src = self.get_finalized_command('build_src')
modules += build_src.py_modules_dict.get(package, [])
return modules
def find_modules(self):
old_py_modules = self.py_modules[:]
new_py_modules = [_m for _m in self.py_modules if is_string(_m)]
self.py_modules[:] = new_py_modules
modules = old_build_py.find_modules(self)
self.py_modules[:] = old_py_modules
return modules
.\numpy\numpy\distutils\command\build_scripts.py
from distutils.command.build_scripts import build_scripts as old_build_scripts
from numpy.distutils import log
from numpy.distutils.misc_util import is_string
class build_scripts(old_build_scripts):
def generate_scripts(self, scripts):
new_scripts = []
func_scripts = []
for script in scripts:
if is_string(script):
new_scripts.append(script)
else:
func_scripts.append(script)
if not func_scripts:
return new_scripts
build_dir = self.build_dir
self.mkpath(build_dir)
for func in func_scripts:
script = func(build_dir)
if not script:
continue
if is_string(script):
log.info(" adding '%s' to scripts" % (script,))
new_scripts.append(script)
else:
[log.info(" adding '%s' to scripts" % (s,)) for s in script]
new_scripts.extend(list(script))
return new_scripts
def run (self):
if not self.scripts:
return
self.scripts = self.generate_scripts(self.scripts)
self.distribution.scripts = self.scripts
return old_build_scripts.run(self)
def get_source_files(self):
from numpy.distutils.misc_util import get_script_files
return get_script_files(self.scripts)
.\numpy\numpy\distutils\command\build_src.py
import os
import re
import sys
import shlex
import copy
from distutils.command import build_ext
from distutils.dep_util import newer_group, newer
from distutils.util import get_platform
from distutils.errors import DistutilsError, DistutilsSetupError
from numpy.distutils import log
from numpy.distutils.misc_util import (
fortran_ext_match, appendpath, is_string, is_sequence, get_cmd
)
from numpy.distutils.from_template import process_file as process_f_file
from numpy.distutils.conv_template import process_file as process_c_file
def subst_vars(target, source, d):
"""Substitute any occurrence of @foo@ by d['foo'] from source file into
target."""
var = re.compile('@([a-zA-Z_]+)@')
with open(source, 'r') as fs:
with open(target, 'w') as ft:
for l in fs:
m = var.search(l)
if m:
ft.write(l.replace('@%s@' % m.group(1), d[m.group(1)]))
else:
ft.write(l)
class build_src(build_ext.build_ext):
description = "build sources from SWIG, F2PY files or a function"
user_options = [
('build-src=', 'd', "directory to \"build\" sources to"),
('f2py-opts=', None, "list of f2py command line options"),
('swig=', None, "path to the SWIG executable"),
('swig-opts=', None, "list of SWIG command line options"),
('swig-cpp', None, "make SWIG create C++ files (default is autodetected from sources)"),
('f2pyflags=', None, "additional flags to f2py (use --f2py-opts= instead)"),
('swigflags=', None, "additional flags to swig (use --swig-opts= instead)"),
('force', 'f', "forcibly build everything (ignore file timestamps)"),
('inplace', 'i',
"ignore build-lib and put compiled extensions into the source " +
"directory alongside your pure Python modules"),
('verbose-cfg', None,
"change logging level from WARN to INFO which will show all " +
"compiler output")
]
boolean_options = ['force', 'inplace', 'verbose-cfg']
help_options = []
def initialize_options(self):
self.extensions = None
self.package = None
self.py_modules = None
self.py_modules_dict = None
self.build_src = None
self.build_lib = None
self.build_base = None
self.force = None
self.inplace = None
self.package_dir = None
self.f2pyflags = None
self.f2py_opts = None
self.swigflags = None
self.swig_opts = None
self.swig_cpp = None
self.swig = None
self.verbose_cfg = None
def finalize_options(self):
self.set_undefined_options('build',
('build_base', 'build_base'),
('build_lib', 'build_lib'),
('force', 'force'))
if self.package is None:
self.package = self.distribution.ext_package
self.extensions = self.distribution.ext_modules
self.libraries = self.distribution.libraries or []
self.py_modules = self.distribution.py_modules or []
self.data_files = self.distribution.data_files or []
if self.build_src is None:
plat_specifier = ".{}-{}.{}".format(get_platform(), *sys.version_info[:2])
self.build_src = os.path.join(self.build_base, 'src'+plat_specifier)
self.py_modules_dict = {}
if self.f2pyflags:
if self.f2py_opts:
log.warn('ignoring --f2pyflags as --f2py-opts already used')
else:
self.f2py_opts = self.f2pyflags
self.f2pyflags = None
if self.f2py_opts is None:
self.f2py_opts = []
else:
self.f2py_opts = shlex.split(self.f2py_opts)
if self.swigflags:
if self.swig_opts:
log.warn('ignoring --swigflags as --swig-opts already used')
else:
self.swig_opts = self.swigflags
self.swigflags = None
if self.swig_opts is None:
self.swig_opts = []
else:
self.swig_opts = shlex.split(self.swig_opts)
build_ext = self.get_finalized_command('build_ext')
if self.inplace is None:
self.inplace = build_ext.inplace
if self.swig_cpp is None:
self.swig_cpp = build_ext.swig_cpp
for c in ['swig', 'swig_opt']:
o = '--'+c.replace('_', '-')
v = getattr(build_ext, c, None)
if v:
if getattr(self, c):
log.warn('both build_src and build_ext define %s option' % (o))
else:
log.info('using "%s=%s" option from build_ext command' % (o, v))
setattr(self, c, v)
def run(self):
log.info("build_src")
if not (self.extensions or self.libraries):
return
self.build_sources()
def build_sources(self):
if self.inplace:
self.get_package_dir = \
self.get_finalized_command('build_py').get_package_dir
self.build_py_modules_sources()
for libname_info in self.libraries:
self.build_library_sources(*libname_info)
if self.extensions:
self.check_extensions_list(self.extensions)
for ext in self.extensions:
self.build_extension_sources(ext)
self.build_data_files_sources()
self.build_npy_pkg_config()
def build_data_files_sources(self):
if not self.data_files:
return
log.info('building data_files sources')
from numpy.distutils.misc_util import get_data_files
new_data_files = []
for data in self.data_files:
if isinstance(data, str):
new_data_files.append(data)
elif isinstance(data, tuple):
d, files = data
if self.inplace:
build_dir = self.get_package_dir('.'.join(d.split(os.sep)))
else:
build_dir = os.path.join(self.build_src, d)
funcs = [f for f in files if hasattr(f, '__call__')]
files = [f for f in files if not hasattr(f, '__call__')]
for f in funcs:
if f.__code__.co_argcount==1:
s = f(build_dir)
else:
s = f()
if s is not None:
if isinstance(s, list):
files.extend(s)
elif isinstance(s, str):
files.append(s)
else:
raise TypeError(repr(s))
filenames = get_data_files((d, files))
new_data_files.append((d, filenames))
else:
raise TypeError(repr(data))
self.data_files[:] = new_data_files
def _build_npy_pkg_config(self, info, gd):
template, install_dir, subst_dict = info
template_dir = os.path.dirname(template)
for k, v in gd.items():
subst_dict[k] = v
if self.inplace == 1:
generated_dir = os.path.join(template_dir, install_dir)
else:
generated_dir = os.path.join(self.build_src, template_dir,
install_dir)
generated = os.path.basename(os.path.splitext(template)[0])
generated_path = os.path.join(generated_dir, generated)
if not os.path.exists(generated_dir):
os.makedirs(generated_dir)
subst_vars(generated_path, template, subst_dict)
full_install_dir = os.path.join(template_dir, install_dir)
return full_install_dir, generated_path
def build_npy_pkg_config(self):
log.info('build_src: building npy-pkg config files')
install_cmd = copy.copy(get_cmd('install'))
if not install_cmd.finalized == 1:
install_cmd.finalize_options()
build_npkg = False
if self.inplace == 1:
top_prefix = '.'
build_npkg = True
elif hasattr(install_cmd, 'install_libbase'):
top_prefix = install_cmd.install_libbase
build_npkg = True
if build_npkg:
for pkg, infos in self.distribution.installed_pkg_config.items():
pkg_path = self.distribution.package_dir[pkg]
prefix = os.path.join(os.path.abspath(top_prefix), pkg_path)
d = {'prefix': prefix}
for info in infos:
install_dir, generated = self._build_npy_pkg_config(info, d)
self.distribution.data_files.append((install_dir, [generated]))
def build_py_modules_sources(self):
if not self.py_modules:
return
log.info('building py_modules sources')
new_py_modules = []
for source in self.py_modules:
if is_sequence(source) and len(source)==3:
package, module_base, source = source
if self.inplace:
build_dir = self.get_package_dir(package)
else:
build_dir = os.path.join(self.build_src, os.path.join(*package.split('.')))
if hasattr(source, '__call__'):
target = os.path.join(build_dir, module_base + '.py')
source = source(target)
if source is None:
continue
modules = [(package, module_base, source)]
if package not in self.py_modules_dict:
self.py_modules_dict[package] = []
self.py_modules_dict[package] += modules
else:
new_py_modules.append(source)
self.py_modules[:] = new_py_modules
def build_library_sources(self, lib_name, build_info):
sources = list(build_info.get('sources', []))
if not sources:
return
log.info('building library "%s" sources' % (lib_name))
sources = self.generate_sources(sources, (lib_name, build_info))
sources = self.template_sources(sources, (lib_name, build_info))
sources, h_files = self.filter_h_files(sources)
if h_files:
log.info('%s - nothing done with h_files = %s',
self.package, h_files)
build_info['sources'] = sources
return
def build_extension_sources(self, ext):
sources = list(ext.sources)
log.info('building extension "%s" sources' % (ext.name))
fullname = self.get_ext_fullname(ext.name)
modpath = fullname.split('.')
package = '.'.join(modpath[0:-1])
if self.inplace:
self.ext_target_dir = self.get_package_dir(package)
sources = self.generate_sources(sources, ext)
sources = self.template_sources(sources, ext)
sources = self.swig_sources(sources, ext)
sources = self.f2py_sources(sources, ext)
sources = self.pyrex_sources(sources, ext)
sources, py_files = self.filter_py_files(sources)
if package not in self.py_modules_dict:
self.py_modules_dict[package] = []
modules = []
for f in py_files:
module = os.path.splitext(os.path.basename(f))[0]
modules.append((package, module, f))
self.py_modules_dict[package] += modules
sources, h_files = self.filter_h_files(sources)
if h_files:
log.info('%s - nothing done with h_files = %s',
package, h_files)
ext.sources = sources
def generate_sources(self, sources, extension):
new_sources = []
func_sources = []
for source in sources:
if is_string(source):
new_sources.append(source)
else:
func_sources.append(source)
if not func_sources:
return new_sources
if self.inplace and not is_sequence(extension):
build_dir = self.ext_target_dir
else:
if is_sequence(extension):
name = extension[0]
else:
name = extension.name
build_dir = os.path.join(*([self.build_src]
+name.split('.')[:-1]))
self.mkpath(build_dir)
if self.verbose_cfg:
new_level = log.INFO
else:
new_level = log.WARN
old_level = log.set_threshold(new_level)
for func in func_sources:
source = func(extension, build_dir)
if not source:
continue
if is_sequence(source):
[log.info(" adding '%s' to sources." % (s,)) for s in source]
new_sources.extend(source)
else:
log.info(" adding '%s' to sources." % (source,))
new_sources.append(source)
log.set_threshold(old_level)
return new_sources
def filter_py_files(self, sources):
return self.filter_files(sources, ['.py'])
def filter_h_files(self, sources):
return self.filter_files(sources, ['.h', '.hpp', '.inc'])
def filter_files(self, sources, exts = []):
new_sources = []
files = []
for source in sources:
(base, ext) = os.path.splitext(source)
if ext in exts:
files.append(source)
else:
new_sources.append(source)
return new_sources, files
def template_sources(self, sources, extension):
new_sources = []
if is_sequence(extension):
depends = extension[1].get('depends')
include_dirs = extension[1].get('include_dirs')
else:
depends = extension.depends
include_dirs = extension.include_dirs
for source in sources:
(base, ext) = os.path.splitext(source)
if ext == '.src':
if self.inplace:
target_dir = os.path.dirname(base)
else:
target_dir = appendpath(self.build_src, os.path.dirname(base))
self.mkpath(target_dir)
target_file = os.path.join(target_dir, os.path.basename(base))
if (self.force or newer_group([source] + depends, target_file)):
if _f_pyf_ext_match(base):
log.info("from_template:> %s" % (target_file))
outstr = process_f_file(source)
else:
log.info("conv_template:> %s" % (target_file))
outstr = process_c_file(source)
with open(target_file, 'w') as fid:
fid.write(outstr)
if _header_ext_match(target_file):
d = os.path.dirname(target_file)
if d not in include_dirs:
log.info(" adding '%s' to include_dirs." % (d))
include_dirs.append(d)
new_sources.append(target_file)
else:
new_sources.append(source)
return new_sources
def pyrex_sources(self, sources, extension):
"""Pyrex not supported; this remains for Cython support (see below)"""
new_sources = []
ext_name = extension.name.split('.')[-1]
for source in sources:
(base, ext) = os.path.splitext(source)
if ext == '.pyx':
target_file = self.generate_a_pyrex_source(base, ext_name,
source,
extension)
new_sources.append(target_file)
else:
new_sources.append(source)
return new_sources
def generate_a_pyrex_source(self, base, ext_name, source, extension):
"""Pyrex is not supported, but some projects monkeypatch this method.
That allows compiling Cython code, see gh-6955.
This method will remain here for compatibility reasons.
"""
return []
_f_pyf_ext_match = re.compile(r'.*\.(f90|f95|f77|for|ftn|f|pyf)\Z', re.I).match
_header_ext_match = re.compile(r'.*\.(inc|h|hpp)\Z', re.I).match
_swig_module_name_match = re.compile(r'\s*%module\s*(.*\(\s*package\s*=\s*"(?P<package>[\w_]+)".*\)|)\s*(?P<name>[\w_]+)',
re.I).match
_has_c_header = re.compile(r'-\*-\s*c\s*-\*-', re.I).search
_has_cpp_header = re.compile(r'-\*-\s*c\+\+\s*-\*-', re.I).search
def get_swig_target(source):
with open(source) as f:
result = None
line = f.readline()
if _has_cpp_header(line):
result = 'c++'
if _has_c_header(line):
result = 'c'
return result
def get_swig_modulename(source):
with open(source) as f:
name = None
for line in f:
m = _swig_module_name_match(line)
if m:
name = m.group('name')
break
return name
def _find_swig_target(target_dir, name):
for ext in ['.cpp', '.c']:
target = os.path.join(target_dir, '%s_wrap%s' % (name, ext))
if os.path.isfile(target):
break
return target
_f2py_module_name_match = re.compile(r'\s*python\s*module\s*(?P<name>[\w_]+)',
re.I).match
_f2py_user_module_name_match = re.compile(r'\s*python\s*module\s*(?P<name>[\w_]*?'
r'__user__[\w_]*)', re.I).match
def get_f2py_modulename(source):
name = None
with open(source) as f:
for line in f:
m = _f2py_module_name_match(line)
if m:
if _f2py_user_module_name_match(line):
continue
name = m.group('name')
break
return name
.\numpy\numpy\distutils\command\config.py
import os
import signal
import subprocess
import sys
import textwrap
import warnings
from distutils.command.config import config as old_config
from distutils.command.config import LANG_EXT
from distutils import log
from distutils.file_util import copy_file
from distutils.ccompiler import CompileError, LinkError
import distutils
from numpy.distutils.exec_command import filepath_from_subprocess_output
from numpy.distutils.mingw32ccompiler import generate_manifest
from numpy.distutils.command.autodist 模块中的函数
from numpy.distutils.command.autodist import (check_gcc_function_attribute,
check_gcc_function_attribute_with_intrinsics,
check_gcc_variable_attribute,
check_gcc_version_at_least,
check_inline,
check_restrict,
check_compiler_gcc)
LANG_EXT['f77'] = '.f'
LANG_EXT['f90'] = '.f90'
class config(old_config):
old_config.user_options += [
('fcompiler=', None, "specify the Fortran compiler type"),
]
def initialize_options(self):
self.fcompiler = None
old_config.initialize_options(self)
def _check_compiler (self):
old_config._check_compiler(self)
from numpy.distutils.fcompiler import FCompiler, new_fcompiler
if sys.platform == 'win32' and (self.compiler.compiler_type in
('msvc', 'intelw', 'intelemw')):
if not self.compiler.initialized:
try:
self.compiler.initialize()
except OSError as e:
msg = textwrap.dedent("""\
Could not initialize compiler instance: do you have Visual Studio
installed? If you are trying to build with MinGW, please use "python setup.py
build -c mingw32" instead. If you have Visual Studio installed, check it is
correctly installed, and the right version (VS 2015 as of this writing).
Original exception was: %s, and the Compiler class was %s
============================================================================""") \
% (e, self.compiler.__class__.__name__)
print(textwrap.dedent("""\
============================================================================"""))
raise distutils.errors.DistutilsPlatformError(msg) from e
from distutils import msvc9compiler
if msvc9compiler.get_build_version() >= 10:
for ldflags in [self.compiler.ldflags_shared,
self.compiler.ldflags_shared_debug]:
if '/MANIFEST' not in ldflags:
ldflags.append('/MANIFEST')
if not isinstance(self.fcompiler, FCompiler):
self.fcompiler = new_fcompiler(compiler=self.fcompiler,
dry_run=self.dry_run, force=1,
c_compiler=self.compiler)
if self.fcompiler is not None:
self.fcompiler.customize(self.distribution)
if self.fcompiler.get_version():
self.fcompiler.customize_cmd(self)
self.fcompiler.show_customization()
def _wrap_method(self, mth, lang, args):
from distutils.ccompiler import CompileError
from distutils.errors import DistutilsExecError
save_compiler = self.compiler
if lang in ['f77', 'f90']:
self.compiler = self.fcompiler
if self.compiler is None:
raise CompileError('%s compiler is not set' % (lang,))
try:
ret = mth(*((self,)+args))
except (DistutilsExecError, CompileError) as e:
self.compiler = save_compiler
raise CompileError from e
self.compiler = save_compiler
return ret
def _compile (self, body, headers, include_dirs, lang):
src, obj = self._wrap_method(old_config._compile, lang,
(body, headers, include_dirs, lang))
self.temp_files.append(obj + '.d')
return src, obj
def _link (self, body,
headers, include_dirs,
libraries, library_dirs, lang):
if self.compiler.compiler_type=='msvc':
libraries = (libraries or [:])
library_dirs = (library_dirs or [:])
if lang in ['f77', 'f90']:
lang = 'c'
if self.fcompiler:
for d in self.fcompiler.library_dirs or []:
if d.startswith('/usr/lib'):
try:
d = subprocess.check_output(['cygpath', '-w', d])
except (OSError, subprocess.CalledProcessError):
pass
else:
d = filepath_from_subprocess_output(d)
library_dirs.append(d)
for libname in self.fcompiler.libraries or []:
if libname not in libraries:
libraries.append(libname)
for libname in libraries:
if libname.startswith('msvc'): continue
fileexists = False
for libdir in library_dirs or []:
libfile = os.path.join(libdir, '%s.lib' % (libname))
if os.path.isfile(libfile):
fileexists = True
break
if fileexists: continue
for libdir in library_dirs:
libfile = os.path.join(libdir, 'lib%s.a' % (libname))
if os.path.isfile(libfile):
libfile2 = os.path.join(libdir, '%s.lib' % (libname))
copy_file(libfile, libfile2)
self.temp_files.append(libfile2)
fileexists = True
break
if fileexists: continue
log.warn('could not find library %r in directories %s' \
% (libname, library_dirs))
elif self.compiler.compiler_type == 'mingw32':
generate_manifest(self)
return self._wrap_method(old_config._link, lang,
(body, headers, include_dirs,
libraries, library_dirs, lang))
def check_header(self, header, include_dirs=None, library_dirs=None, lang='c'):
self._check_compiler()
return self.try_compile(
"/* we need a dummy line to make distutils happy */",
[header], include_dirs)
def check_decl(self, symbol,
headers=None, include_dirs=None):
self._check_compiler()
body = textwrap.dedent("""
int main(void)
{
#ifndef %s
(void) %s;
#endif
;
return 0;
}""") % (symbol, symbol)
return self.try_compile(body, headers, include_dirs)
def check_macro_true(self, symbol,
headers=None, include_dirs=None):
self._check_compiler()
body = textwrap.dedent("""
int main(void)
{
#if %s
#else
#error false or undefined macro
#endif
;
return 0;
}""") % (symbol,)
return self.try_compile(body, headers, include_dirs)
def check_type(self, type_name, headers=None, include_dirs=None,
library_dirs=None):
"""Check type availability. Return True if the type can be compiled,
False otherwise"""
self._check_compiler()
body = textwrap.dedent(r"""
int main(void) {
if ((%(name)s *) 0)
return 0;
if (sizeof (%(name)s))
return 0;
}
""") % {'name': type_name}
st = False
try:
try:
self._compile(body % {'type': type_name},
headers, include_dirs, 'c')
st = True
except distutils.errors.CompileError:
st = False
finally:
self._clean()
return st
def check_type_size(self, type_name, headers=None, include_dirs=None, library_dirs=None, expected=None):
"""Check size of a given type."""
self._check_compiler()
body = textwrap.dedent(r"""
typedef %(type)s npy_check_sizeof_type;
int main (void)
{
static int test_array [1 - 2 * !(((long) (sizeof (npy_check_sizeof_type))) >= 0)];
test_array [0] = 0
;
return 0;
}
""")
self._compile(body % {'type': type_name},
headers, include_dirs, 'c')
self._clean()
if expected:
body = textwrap.dedent(r"""
typedef %(type)s npy_check_sizeof_type;
int main (void)
{
static int test_array [1 - 2 * !(((long) (sizeof (npy_check_sizeof_type))) == %(size)s)];
test_array [0] = 0
;
return 0;
}
""")
for size in expected:
try:
self._compile(body % {'type': type_name, 'size': size},
headers, include_dirs, 'c')
self._clean()
return size
except CompileError:
pass
body = textwrap.dedent(r"""
typedef %(type)s npy_check_sizeof_type;
int main (void)
{
static int test_array [1 - 2 * !(((long) (sizeof (npy_check_sizeof_type))) <= %(size)s)];
test_array [0] = 0
;
return 0;
}
""")
low = 0
mid = 0
while True:
try:
self._compile(body % {'type': type_name, 'size': mid},
headers, include_dirs, 'c')
self._clean()
break
except CompileError:
low = mid + 1
mid = 2 * mid + 1
high = mid
while low != high:
mid = (high - low) // 2 + low
try:
self._compile(body % {'type': type_name, 'size': mid},
headers, include_dirs, 'c')
self._clean()
high = mid
except CompileError:
low = mid + 1
return low
def check_func(self, func,
headers=None, include_dirs=None,
libraries=None, library_dirs=None,
decl=False, call=False, call_args=None):
self._check_compiler()
body = []
if decl:
if type(decl) == str:
body.append(decl)
else:
body.append("int %s (void);" % func)
body.append("#ifdef _MSC_VER")
body.append("#pragma function(%s)" % func)
body.append("#endif")
body.append("int main (void) {")
if call:
if call_args is None:
call_args = ''
body.append(" %s(%s);" % (func, call_args))
else:
body.append(" %s;" % func)
body.append(" return 0;")
body.append("}")
body = '\n'.join(body) + "\n"
return self.try_link(body, headers, include_dirs,
libraries, library_dirs)
def check_funcs_once(self, funcs,
headers=None, include_dirs=None,
libraries=None, library_dirs=None,
decl=False, call=False, call_args=None):
"""Check a list of functions at once.
This is useful to speed up things, since all the functions in the funcs
list will be put in one compilation unit.
Arguments
---------
funcs : seq
list of functions to test
include_dirs : seq
list of header paths
libraries : seq
list of libraries to link the code snippet to
library_dirs : seq
list of library paths
decl : dict
for every (key, value), the declaration in the value will be
used for function in key. If a function is not in the
dictionary, no declaration will be used.
call : dict
for every item (f, value), if the value is True, a call will be
done to the function f.
"""
self._check_compiler()
body = []
if decl:
for f, v in decl.items():
if v:
body.append("int %s (void);" % f)
body.append("#ifdef _MSC_VER")
for func in funcs:
body.append("#pragma function(%s)" % func)
body.append("#endif")
body.append("int main (void) {")
if call:
for f in funcs:
if f in call and call[f]:
if not (call_args and f in call_args and call_args[f]):
args = ''
else:
args = call_args[f]
body.append(" %s(%s);" % (f, args))
else:
body.append(" %s;" % f)
else:
for f in funcs:
body.append(" %s;" % f)
body.append(" return 0;")
body.append("}")
body = '\n'.join(body) + "\n"
return self.try_link(body, headers, include_dirs,
libraries, library_dirs)
def check_inline(self):
"""Return the inline keyword recognized by the compiler, empty string
otherwise."""
return check_inline(self)
def check_restrict(self):
"""Return the restrict keyword recognized by the compiler, empty string
otherwise."""
return check_restrict(self)
def check_compiler_gcc(self):
"""Return True if the C compiler is gcc"""
return check_compiler_gcc(self)
def check_gcc_function_attribute(self, attribute, name):
return check_gcc_function_attribute(self, attribute, name)
def check_gcc_function_attribute_with_intrinsics(self, attribute, name, code, include):
return check_gcc_function_attribute_with_intrinsics(self, attribute, name, code, include)
def check_gcc_variable_attribute(self, attribute):
return check_gcc_variable_attribute(self, attribute)
def check_gcc_version_at_least(self, major, minor=0, patchlevel=0):
"""Return True if the GCC version is greater than or equal to the specified version."""
return check_gcc_version_at_least(self, major, minor, patchlevel)
def get_output(self, body, headers=None, include_dirs=None,
libraries=None, library_dirs=None,
lang="c", use_tee=None):
"""Try to compile, link to an executable, and run a program
built from 'body' and 'headers'. Returns the exit status code
of the program and its output.
"""
warnings.warn("\n+++++++++++++++++++++++++++++++++++++++++++++++++\n"
"Usage of get_output is deprecated: please do not \n"
"use it anymore, and avoid configuration checks \n"
"involving running executable on the target machine.\n"
"+++++++++++++++++++++++++++++++++++++++++++++++++\n",
DeprecationWarning, stacklevel=2)
self._check_compiler()
exitcode, output = 255, ''
try:
grabber = GrabStdout()
try:
src, obj, exe = self._link(body, headers, include_dirs,
libraries, library_dirs, lang)
grabber.restore()
except Exception:
output = grabber.data
grabber.restore()
raise
exe = os.path.join('.', exe)
try:
output = subprocess.check_output([exe], cwd='.')
except subprocess.CalledProcessError as exc:
exitstatus = exc.returncode
output = ''
except OSError:
exitstatus = 127
output = ''
else:
output = filepath_from_subprocess_output(output)
if hasattr(os, 'WEXITSTATUS'):
exitcode = os.WEXITSTATUS(exitstatus)
if os.WIFSIGNALED(exitstatus):
sig = os.WTERMSIG(exitstatus)
log.error('subprocess exited with signal %d' % (sig,))
if sig == signal.SIGINT:
raise KeyboardInterrupt
else:
exitcode = exitstatus
log.info("success!")
except (CompileError, LinkError):
log.info("failure.")
self._clean()
return exitcode, output
class GrabStdout:
def __init__(self):
self.sys_stdout = sys.stdout
self.data = ''
sys.stdout = self
def write(self, data):
self.sys_stdout.write(data)
self.data += data
def flush(self):
self.sys_stdout.flush()
def restore(self):
sys.stdout = self.sys_stdout