NumPy 源码解析(五十二)
.\numpy\numpy\_core\records.pyi
import os
from collections.abc import Sequence, Iterable
from typing import (
Any,
TypeVar,
overload,
Protocol,
SupportsIndex,
Literal
)
from numpy import (
ndarray,
dtype,
generic,
void,
_ByteOrder,
_SupportsBuffer,
_ShapeType,
_DType_co,
_OrderKACF,
)
from numpy._typing import (
ArrayLike,
DTypeLike,
NDArray,
_ShapeLike,
_ArrayLikeInt_co,
_ArrayLikeVoid_co,
_NestedSequence,
)
_SCT = TypeVar("_SCT", bound=generic)
_RecArray = recarray[Any, dtype[_SCT]]
class _SupportsReadInto(Protocol):
def seek(self, offset: int, whence: int, /) -> object: ...
def tell(self, /) -> int: ...
def readinto(self, buffer: memoryview, /) -> int: ...
class record(void):
def __getattribute__(self, attr: str) -> Any: ...
def __setattr__(self, attr: str, val: ArrayLike) -> None: ...
def pprint(self) -> str: ...
@overload
def __getitem__(self, key: str | SupportsIndex) -> Any: ...
@overload
def __getitem__(self, key: list[str]) -> record: ...
class recarray(ndarray[_ShapeType, _DType_co]):
@overload
def __new__(
subtype,
shape: _ShapeLike,
dtype: None = ...,
buf: None | _SupportsBuffer = ...,
offset: SupportsIndex = ...,
strides: None | _ShapeLike = ...,
*,
formats: DTypeLike,
names: None | str | Sequence[str] = ...,
titles: None | str | Sequence[str] = ...,
byteorder: None | _ByteOrder = ...,
aligned: bool = ...,
order: _OrderKACF = ...,
) -> recarray[Any, dtype[record]]: ...
@overload
def __new__(
subtype,
shape: _ShapeLike,
dtype: DTypeLike,
buf: None | _SupportsBuffer = ...,
offset: SupportsIndex = ...,
strides: None | _ShapeLike = ...,
formats: None = ...,
names: None = ...,
titles: None = ...,
byteorder: None = ...,
aligned: Literal[False] = ...,
order: _OrderKACF = ...,
) -> recarray[Any, dtype[Any]]: ...
def __array_finalize__(self, obj: object) -> None: ...
def __getattribute__(self, attr: str) -> Any: ...
def __setattr__(self, attr: str, val: ArrayLike) -> None: ...
@overload
def __getitem__(self, indx: (
SupportsIndex
| _ArrayLikeInt_co
| tuple[SupportsIndex | _ArrayLikeInt_co, ...]
)) -> Any: ...
@overload
def __getitem__(self: recarray[Any, dtype[void]], indx: (
None
| slice
| ellipsis
| SupportsIndex
| _ArrayLikeInt_co
| tuple[None | slice | ellipsis | _ArrayLikeInt_co | SupportsIndex, ...]
)) -> recarray[Any, _DType_co]: ...
def __getitem__(self, indx: (
None
| slice
| ellipsis
| SupportsIndex
| _ArrayLikeInt_co
| tuple[None | slice | ellipsis | _ArrayLikeInt_co | SupportsIndex, ...]
)) -> ndarray[Any, _DType_co]: ...
@overload
def __getitem__(self, indx: str) -> NDArray[Any]: ...
@overload
def __getitem__(self, indx: list[str]) -> recarray[_ShapeType, dtype[record]]: ...
@overload
def field(self, attr: int | str, val: None = ...) -> Any: ...
@overload
def field(self, attr: int | str, val: ArrayLike) -> None: ...
class format_parser:
dtype: dtype[void]
def __init__(
self,
formats: DTypeLike,
names: None | str | Sequence[str],
titles: None | str | Sequence[str],
aligned: bool = ...,
byteorder: None | _ByteOrder = ...,
) -> None: ...
__all__: list[str]
@overload
def fromarrays(
arrayList: Iterable[ArrayLike],
dtype: DTypeLike = ...,
shape: None | _ShapeLike = ...,
formats: None = ...,
names: None = ...,
titles: None = ...,
aligned: bool = ...,
byteorder: None = ...,
) -> _RecArray[Any]: ...
@overload
def fromarrays(
arrayList: Iterable[ArrayLike],
dtype: None = ...,
shape: None | _ShapeLike = ...,
*,
formats: DTypeLike,
names: None | str | Sequence[str] = ...,
titles: None | str | Sequence[str] = ...,
aligned: bool = ...,
byteorder: None | _ByteOrder = ...,
) -> _RecArray[record]: ...
@overload
def fromrecords(
recList: _ArrayLikeVoid_co | tuple[Any, ...] | _NestedSequence[tuple[Any, ...]],
dtype: DTypeLike = ...,
shape: None | _ShapeLike = ...,
formats: None = ...,
names: None = ...,
titles: None = ...,
aligned: bool = ...,
byteorder: None = ...,
) -> _RecArray[record]: ...
@overload
def fromrecords(
recList: _ArrayLikeVoid_co | tuple[Any, ...] | _NestedSequence[tuple[Any, ...]],
dtype: None = ...,
shape: None | _ShapeLike = ...,
*,
formats: DTypeLike = ...,
names: None | str | Sequence[str] = ...,
titles: None | str | Sequence[str] = ...,
aligned: bool = ...,
byteorder: None | _ByteOrder = ...,
) -> _RecArray[record]: ...
@overload
def fromstring(
datastring: _SupportsBuffer,
dtype: DTypeLike,
shape: None | _ShapeLike = ...,
offset: int = ...,
formats: None = ...,
names: None = ...,
titles: None = ...,
aligned: bool = ...,
byteorder: None = ...,
) -> _RecArray[record]: ...
@overload
def fromstring(
datastring: _SupportsBuffer,
dtype: None = ...,
shape: None | _ShapeLike = ...,
offset: int = ...,
*,
formats: DTypeLike,
names: None | str | Sequence[str] = ...,
titles: None | str | Sequence[str] = ...,
aligned: bool = ...,
byteorder: None | _ByteOrder = ...,
) -> _RecArray[record]: ...
@overload
def fromfile(
fd: str | bytes | os.PathLike[str] | os.PathLike[bytes] | _SupportsReadInto,
dtype: DTypeLike,
shape: None | _ShapeLike = ...,
offset: int = ...,
formats: None = ...,
names: None = ...,
titles: None = ...,
aligned: bool = ...,
byteorder: None = ...,
) -> _RecArray[Any]: ...
@overload
def fromfile(
fd: str | bytes | os.PathLike[str] | os.PathLike[bytes] | _SupportsReadInto,
dtype: None = ...,
shape: None | _ShapeLike = ...,
offset: int = ...,
*,
formats: DTypeLike,
names: None | str | Sequence[str] = ...,
titles: None | str | Sequence[str] = ...,
@overload
def array(
obj: _SCT | NDArray[_SCT],
dtype: None = ...,
shape: None | _ShapeLike = ...,
offset: int = ...,
formats: None = ...,
names: None = ...,
titles: None = ...,
aligned: bool = ...,
byteorder: None = ...,
copy: bool = ...,
) -> _RecArray[_SCT]: ...
@overload
def array(
obj: ArrayLike,
dtype: DTypeLike,
shape: None | _ShapeLike = ...,
offset: int = ...,
formats: None = ...,
names: None = ...,
titles: None = ...,
aligned: bool = ...,
byteorder: None = ...,
copy: bool = ...,
) -> _RecArray[Any]: ...
@overload
def array(
obj: ArrayLike,
dtype: None = ...,
shape: None | _ShapeLike = ...,
offset: int = ...,
*,
formats: DTypeLike,
names: None | str | Sequence[str] = ...,
titles: None | str | Sequence[str] = ...,
aligned: bool = ...,
byteorder: None | _ByteOrder = ...,
copy: bool = ...,
) -> _RecArray[record]: ...
@overload
def array(
obj: None,
dtype: DTypeLike,
shape: _ShapeLike,
offset: int = ...,
formats: None = ...,
names: None = ...,
titles: None = ...,
aligned: bool = ...,
byteorder: None = ...,
copy: bool = ...,
) -> _RecArray[Any]: ...
@overload
def array(
obj: None,
dtype: None = ...,
*,
shape: _ShapeLike,
formats: DTypeLike,
names: None | str | Sequence[str] = ...,
titles: None | str | Sequence[str] = ...,
aligned: bool = ...,
byteorder: None | _ByteOrder = ...,
.\numpy\numpy\_core\shape_base.py
__all__ = ['atleast_1d', 'atleast_2d', 'atleast_3d', 'block', 'hstack',
'stack', 'vstack']
import functools
import itertools
import operator
import warnings
from . import numeric as _nx
from . import overrides
from .multiarray import array, asanyarray, normalize_axis_index
from . import fromnumeric as _from_nx
array_function_dispatch = functools.partial(
overrides.array_function_dispatch, module='numpy')
def _atleast_1d_dispatcher(*arys):
return arys
@array_function_dispatch(_atleast_1d_dispatcher)
def atleast_1d(*arys):
"""
Convert inputs to arrays with at least one dimension.
Scalar inputs are converted to 1-dimensional arrays, whilst
higher-dimensional inputs are preserved.
Parameters
----------
arys1, arys2, ... : array_like
One or more input arrays.
Returns
-------
ret : ndarray
An array, or tuple of arrays, each with ``a.ndim >= 1``.
Copies are made only if necessary.
See Also
--------
atleast_2d, atleast_3d
Examples
--------
>>> np.atleast_1d(1.0)
array([1.])
>>> x = np.arange(9.0).reshape(3,3)
>>> np.atleast_1d(x)
array([[0., 1., 2.],
[3., 4., 5.],
[6., 7., 8.]])
>>> np.atleast_1d(x) is x
True
>>> np.atleast_1d(1, [3, 4])
(array([1]), array([3, 4]))
"""
if len(arys) == 1:
result = asanyarray(arys[0])
if result.ndim == 0:
result = result.reshape(1)
return result
res = []
for ary in arys:
result = asanyarray(ary)
if result.ndim == 0:
result = result.reshape(1)
res.append(result)
return tuple(res)
def _atleast_2d_dispatcher(*arys):
return arys
@array_function_dispatch(_atleast_2d_dispatcher)
def atleast_2d(*arys):
"""
View inputs as arrays with at least two dimensions.
Parameters
----------
arys1, arys2, ... : array_like
One or more array-like sequences. Non-array inputs are converted
to arrays. Arrays that already have two or more dimensions are
preserved.
Returns
-------
res, res2, ... : ndarray
An array, or tuple of arrays, each with ``a.ndim >= 2``.
Copies are avoided where possible, and views with two or more
dimensions are returned.
See Also
--------
atleast_1d, atleast_3d
Examples
--------
>>> np.atleast_2d(3.0)
array([[3.]])
>>> x = np.arange(3.0)
>>> np.atleast_2d(x)
array([[0., 1., 2.]])
>>> np.atleast_2d(x).base is x
True
>>> np.atleast_2d(1, [1, 2], [[1, 2]])
(array([[1]]), array([[1, 2]]), array([[1, 2]]))
"""
res = []
for ary in arys:
ary = asanyarray(ary)
if ary.ndim == 0:
result = ary.reshape(1, 1)
elif ary.ndim == 1:
result = ary[_nx.newaxis, :]
else:
result = ary
res.append(result)
if len(res) == 1:
return res[0]
else:
return tuple(res)
def _atleast_3d_dispatcher(*arys):
return arys
@array_function_dispatch(_atleast_3d_dispatcher)
def atleast_3d(*arys):
"""
View inputs as arrays with at least three dimensions.
Parameters
----------
arys1, arys2, ... : array_like
One or more array-like sequences. Non-array inputs are converted to
arrays. Arrays that already have three or more dimensions are
preserved.
Returns
-------
res1, res2, ... : ndarray
An array, or tuple of arrays, each with ``a.ndim >= 3``. Copies are
avoided where possible, and views with three or more dimensions are
returned. For example, a 1-D array of shape ``(N,)`` becomes a view
of shape ``(1, N, 1)``, and a 2-D array of shape ``(M, N)`` becomes a
view of shape ``(M, N, 1)``.
See Also
--------
atleast_1d, atleast_2d
Examples
--------
>>> np.atleast_3d(3.0)
array([[[3.]]])
>>> x = np.arange(3.0)
>>> np.atleast_3d(x).shape
(1, 3, 1)
>>> x = np.arange(12.0).reshape(4,3)
>>> np.atleast_3d(x).shape
(4, 3, 1)
>>> np.atleast_3d(x).base is x.base # x is a reshape, so not base itself
True
>>> for arr in np.atleast_3d([1, 2], [[1, 2]], [[[1, 2]]]):
... print(arr, arr.shape) # doctest: +SKIP
...
[[[1]
[2]]] (1, 2, 1)
[[[1]
[2]]] (1, 2, 1)
[[[1 2]]] (1, 1, 2)
"""
res = []
for ary in arys:
ary = asanyarray(ary)
if ary.ndim == 0:
result = ary.reshape(1, 1, 1)
elif ary.ndim == 1:
result = ary[_nx.newaxis, :, _nx.newaxis]
elif ary.ndim == 2:
result = ary[:, :, _nx.newaxis]
else:
result = ary
res.append(result)
if len(res) == 1:
return res[0]
else:
return tuple(res)
def _arrays_for_stack_dispatcher(arrays):
if not hasattr(arrays, "__getitem__"):
raise TypeError('arrays to stack must be passed as a "sequence" type '
'such as list or tuple.')
return tuple(arrays)
def _vhstack_dispatcher(tup, *, dtype=None, casting=None):
return _arrays_for_stack_dispatcher(tup)
@array_function_dispatch(_vhstack_dispatcher)
def vstack(tup, *, dtype=None, casting="same_kind"):
"""
Stack arrays in sequence vertically (row wise).
This is equivalent to concatenation along the first axis after 1-D arrays
of shape `(N,)` have been reshaped to `(1,N)`. Rebuilds arrays divided by
`vsplit`.
This function makes most sense for arrays with up to 3 dimensions. For
instance, for pixel-data with a height (first axis), width (second axis),
and r/g/b channels (third axis). The functions `concatenate`, `stack` and
`block` provide more general stacking and concatenation operations.
Parameters
----------
"""
tup : sequence of ndarrays
The arrays must have the same shape along all but the first axis.
1-D arrays must have the same length.
dtype : str or dtype
If provided, the destination array will have this dtype. Cannot be
provided together with `out`.
.. versionadded:: 1.24
casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
Controls what kind of data casting may occur. Defaults to 'same_kind'.
.. versionadded:: 1.24
Returns
-------
stacked : ndarray
The array formed by stacking the given arrays, will be at least 2-D.
See Also
--------
concatenate : Join a sequence of arrays along an existing axis.
stack : Join a sequence of arrays along a new axis.
block : Assemble an nd-array from nested lists of blocks.
hstack : Stack arrays in sequence horizontally (column wise).
dstack : Stack arrays in sequence depth wise (along third axis).
column_stack : Stack 1-D arrays as columns into a 2-D array.
vsplit : Split an array into multiple sub-arrays vertically (row-wise).
>>> a = np.array([1, 2, 3])
>>> b = np.array([4, 5, 6])
>>> np.vstack((a,b))
array([[1, 2, 3],
[4, 5, 6]])
>>> a = np.array([[1], [2], [3]])
>>> b = np.array([[4], [5], [6]])
>>> np.vstack((a,b))
array([[1],
[2],
[3],
[4],
[5],
[6]])
"""
# 将 tup 中的数组至少转换为二维数组
arrs = atleast_2d(*tup)
# 如果转换后的结果不是一个元组,则将其包装成一个元组
if not isinstance(arrs, tuple):
arrs = (arrs,)
# 调用底层函数 `_nx.concatenate` 进行数组堆叠操作
return _nx.concatenate(arrs, 0, dtype=dtype, casting=casting)
@array_function_dispatch(_vhstack_dispatcher)
# 使用装饰器将函数与_dispatcher函数关联,用于分发不同输入的处理
def hstack(tup, *, dtype=None, casting="same_kind"):
"""
Stack arrays in sequence horizontally (column wise).
This is equivalent to concatenation along the second axis, except for 1-D
arrays where it concatenates along the first axis. Rebuilds arrays divided
by `hsplit`.
This function makes most sense for arrays with up to 3 dimensions. For
instance, for pixel-data with a height (first axis), width (second axis),
and r/g/b channels (third axis). The functions `concatenate`, `stack` and
`block` provide more general stacking and concatenation operations.
Parameters
----------
tup : sequence of ndarrays
The arrays must have the same shape along all but the second axis,
except 1-D arrays which can be any length.
dtype : str or dtype
If provided, the destination array will have this dtype. Cannot be
provided together with `out`.
.. versionadded:: 1.24
casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
Controls what kind of data casting may occur. Defaults to 'same_kind'.
.. versionadded:: 1.24
Returns
-------
stacked : ndarray
The array formed by stacking the given arrays.
See Also
--------
concatenate : Join a sequence of arrays along an existing axis.
stack : Join a sequence of arrays along a new axis.
block : Assemble an nd-array from nested lists of blocks.
vstack : Stack arrays in sequence vertically (row wise).
dstack : Stack arrays in sequence depth wise (along third axis).
column_stack : Stack 1-D arrays as columns into a 2-D array.
hsplit : Split an array into multiple sub-arrays
horizontally (column-wise).
Examples
--------
>>> a = np.array((1,2,3))
>>> b = np.array((4,5,6))
>>> np.hstack((a,b))
array([1, 2, 3, 4, 5, 6])
>>> a = np.array([[1],[2],[3]])
>>> b = np.array([[4],[5],[6]])
>>> np.hstack((a,b))
array([[1, 4],
[2, 5],
[3, 6]])
"""
arrs = atleast_1d(*tup)
# 将输入数组至少视为一维数组
if not isinstance(arrs, tuple):
arrs = (arrs,)
# 作为特殊情况,对于一维数组,第0维被视为“水平”的
if arrs and arrs[0].ndim == 1:
return _nx.concatenate(arrs, 0, dtype=dtype, casting=casting)
else:
return _nx.concatenate(arrs, 1, dtype=dtype, casting=casting)
def _stack_dispatcher(arrays, axis=None, out=None, *,
dtype=None, casting=None):
arrays = _arrays_for_stack_dispatcher(arrays)
if out is not None:
# optimize for the typical case where only arrays is provided
arrays = list(arrays)
arrays.append(out)
return arrays
@array_function_dispatch(_stack_dispatcher)
# 使用装饰器将函数与_dispatcher函数关联,用于分发不同输入的处理
def stack(arrays, axis=0, out=None, *, dtype=None, casting="same_kind"):
"""
Join a sequence of arrays along a new axis.
"""
"""
The `axis` parameter specifies the index of the new axis in the
dimensions of the result. For example, if `axis=0` it will be the first
dimension and if `axis=-1` it will be the last dimension.
.. versionadded:: 1.10.0
Parameters
----------
arrays : sequence of array_like
Each array must have the same shape.
axis : int, optional
The axis in the result array along which the input arrays are stacked.
out : ndarray, optional
If provided, the destination to place the result. The shape must be
correct, matching that of what stack would have returned if no
out argument were specified.
dtype : str or dtype
If provided, the destination array will have this dtype. Cannot be
provided together with `out`.
.. versionadded:: 1.24
casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
Controls what kind of data casting may occur. Defaults to 'same_kind'.
.. versionadded:: 1.24
Returns
-------
stacked : ndarray
The stacked array has one more dimension than the input arrays.
See Also
--------
concatenate : Join a sequence of arrays along an existing axis.
block : Assemble an nd-array from nested lists of blocks.
split : Split array into a list of multiple sub-arrays of equal size.
Examples
--------
>>> rng = np.random.default_rng()
>>> arrays = [rng.normal(size=(3,4)) for _ in range(10)]
>>> np.stack(arrays, axis=0).shape
(10, 3, 4)
>>> np.stack(arrays, axis=1).shape
(3, 10, 4)
>>> np.stack(arrays, axis=2).shape
(3, 4, 10)
>>> a = np.array([1, 2, 3])
>>> b = np.array([4, 5, 6])
>>> np.stack((a, b))
array([[1, 2, 3],
[4, 5, 6]])
>>> np.stack((a, b), axis=-1)
array([[1, 4],
[2, 5],
[3, 6]])
"""
arrays = [asanyarray(arr) for arr in arrays]
# 转换所有输入数组为数组对象
if not arrays:
raise ValueError('need at least one array to stack')
# 如果数组列表为空,则抛出数值错误
shapes = {arr.shape for arr in arrays}
# 获取所有输入数组的形状集合
if len(shapes) != 1:
raise ValueError('all input arrays must have the same shape')
# 如果形状集合的长度不为1,则抛出数值错误,要求所有输入数组必须具有相同的形状
result_ndim = arrays[0].ndim + 1
# 计算结果数组的维数为第一个数组的维数加1
axis = normalize_axis_index(axis, result_ndim)
# 根据结果数组的维数规范化轴索引
sl = (slice(None),) * axis + (_nx.newaxis,)
# 创建切片对象以扩展数组,添加一个新轴
expanded_arrays = [arr[sl] for arr in arrays]
# 使用切片对象扩展所有输入数组
return _nx.concatenate(expanded_arrays, axis=axis, out=out,
dtype=dtype, casting=casting)
# 使用 numpy 的 concatenate 函数连接扩展后的数组,指定轴、输出、数据类型和数据类型转换方式
# Internal functions to eliminate the overhead of repeated dispatch in one of
# the two possible paths inside np.block.
# Use getattr to protect against __array_function__ being disabled.
# 使用 getattr 来保护免受 __array_function__ 被禁用的影响。
_size = getattr(_from_nx.size, '__wrapped__', _from_nx.size)
# 获取 _from_nx.size 的包装版本,如果不存在,则返回原始版本 _from_nx.size。
_ndim = getattr(_from_nx.ndim, '__wrapped__', _from_nx.ndim)
# 获取 _from_nx.ndim 的包装版本,如果不存在,则返回原始版本 _from_nx.ndim。
_concatenate = getattr(_from_nx.concatenate,
'__wrapped__', _from_nx.concatenate)
# 获取 _from_nx.concatenate 的包装版本,如果不存在,则返回原始版本 _from_nx.concatenate。
def _block_format_index(index):
"""
Convert a list of indices ``[0, 1, 2]`` into ``"arrays[0][1][2]"``.
将索引列表 ``[0, 1, 2]`` 转换为 ``"arrays[0][1][2]"`` 的字符串形式。
"""
idx_str = ''.join('[{}]'.format(i) for i in index if i is not None)
# 生成索引的字符串表示,跳过为 None 的索引。
return 'arrays' + idx_str
# 返回格式化后的索引字符串。
def _block_check_depths_match(arrays, parent_index=[]):
"""
Recursive function checking that the depths of nested lists in `arrays`
all match. Mismatch raises a ValueError as described in the block
docstring below.
The entire index (rather than just the depth) needs to be calculated
for each innermost list, in case an error needs to be raised, so that
the index of the offending list can be printed as part of the error.
Parameters
----------
arrays : nested list of arrays
The arrays to check
parent_index : list of int
The full index of `arrays` within the nested lists passed to
`_block_check_depths_match` at the top of the recursion.
Returns
-------
first_index : list of int
The full index of an element from the bottom of the nesting in
`arrays`. If any element at the bottom is an empty list, this will
refer to it, and the last index along the empty axis will be None.
max_arr_ndim : int
The maximum of the ndims of the arrays nested in `arrays`.
final_size: int
The number of elements in the final array. This is used the motivate
the choice of algorithm used using benchmarking wisdom.
递归函数,用于检查 `arrays` 中嵌套列表的深度是否匹配。如果不匹配,会抛出一个 ValueError,
如下面 block 文档字符串中描述的那样。
需要为每个最内层列表计算整个索引(而不仅仅是深度),以防需要引发错误,以便可以打印错误的列表索引作为错误的一部分。
参数
----------
arrays : 嵌套数组的列表
要检查的数组
parent_index : int 列表
传递给 `_block_check_depths_match` 函数的嵌套列表中 `arrays` 的完整索引。
返回
-------
first_index : int 列表
`arrays` 最底层嵌套元素的完整索引。如果底层有任何空列表,则会引用它,并且沿着空轴的最后一个索引将为 None。
max_arr_ndim : int
`arrays` 中嵌套数组的最大维度。
final_size: int
最终数组中的元素数目。这是用于选择使用基准智慧的算法的动机。
"""
if type(arrays) is tuple:
# not strictly necessary, but saves us from:
# - more than one way to do things - no point treating tuples like
# lists
# - horribly confusing behaviour that results when tuples are
# treated like ndarray
raise TypeError(
'{} is a tuple. '
'Only lists can be used to arrange blocks, and np.block does '
'not allow implicit conversion from tuple to ndarray.'.format(
_block_format_index(parent_index)
)
)
# 如果数组类型为元组,则抛出类型错误,因为 np.block 不允许将元组隐式转换为 ndarray。
# 如果 arrays 是列表且长度大于 0
elif type(arrays) is list and len(arrays) > 0:
# 生成一个生成器表达式,用于检查每个数组的深度匹配情况
idxs_ndims = (_block_check_depths_match(arr, parent_index + [i])
for i, arr in enumerate(arrays))
# 获取第一个数组的索引、最大维度和最终大小
first_index, max_arr_ndim, final_size = next(idxs_ndims)
# 遍历生成器中的剩余元组
for index, ndim, size in idxs_ndims:
# 累加最终大小
final_size += size
# 更新最大维度
if ndim > max_arr_ndim:
max_arr_ndim = ndim
# 检查索引的长度是否与第一个索引相同,若不同则抛出 ValueError
if len(index) != len(first_index):
raise ValueError(
"List depths are mismatched. First element was at depth "
"{}, but there is an element at depth {} ({})".format(
len(first_index),
len(index),
_block_format_index(index)
)
)
# 传播标志,指示底部的空列表
if index[-1] is None:
first_index = index
# 返回第一个索引、最大维度和最终大小
return first_index, max_arr_ndim, final_size
# 如果 arrays 是列表但长度为 0
elif type(arrays) is list and len(arrays) == 0:
# 已经达到空列表的底部
return parent_index + [None], 0, 0
# 如果 arrays 是标量或者数组
else:
# 计算 arrays 的大小
size = _size(arrays)
# 返回父索引、arrays 的维度和大小
return parent_index, _ndim(arrays), size
# 确保数组 `a` 至少具有 `ndim` 维度,通过必要时在 `a.shape` 前面添加一些维度为1的方式来实现
def _atleast_nd(a, ndim):
return array(a, ndmin=ndim, copy=None, subok=True)
# 对给定的值列表进行累加操作,返回累加后的结果列表
def _accumulate(values):
return list(itertools.accumulate(values))
# 给定数组的形状列表和轴向,返回连接后的形状和切片前缀
def _concatenate_shapes(shapes, axis):
"""Given array shapes, return the resulting shape and slices prefixes.
These help in nested concatenation.
Returns
-------
shape: tuple of int
This tuple satisfies::
shape, _ = _concatenate_shapes([arr.shape for shape in arrs], axis)
shape == concatenate(arrs, axis).shape
slice_prefixes: tuple of (slice(start, end), )
For a list of arrays being concatenated, this returns the slice
in the larger array at axis that needs to be sliced into.
For example, the following holds::
ret = concatenate([a, b, c], axis)
_, (sl_a, sl_b, sl_c) = concatenate_slices([a, b, c], axis)
ret[(slice(None),) * axis + sl_a] == a
ret[(slice(None),) * axis + sl_b] == b
ret[(slice(None),) * axis + sl_c] == c
These are called slice prefixes since they are used in the recursive
blocking algorithm to compute the left-most slices during the
recursion. Therefore, they must be prepended to rest of the slice
that was computed deeper in the recursion.
These are returned as tuples to ensure that they can quickly be added
to existing slice tuple without creating a new tuple every time.
"""
# 缓存将会被重复使用的结果。
shape_at_axis = [shape[axis] for shape in shapes]
# 选择任意一个形状
first_shape = shapes[0]
first_shape_pre = first_shape[:axis]
first_shape_post = first_shape[axis+1:]
# 检查是否存在任何形状与第一个形状的前缀或后缀不匹配
if any(shape[:axis] != first_shape_pre or
shape[axis+1:] != first_shape_post for shape in shapes):
raise ValueError(
'Mismatched array shapes in block along axis {}.'.format(axis))
# 计算连接后的形状
shape = (first_shape_pre + (sum(shape_at_axis),) + first_shape[axis+1:])
# 计算轴向上的偏移量
offsets_at_axis = _accumulate(shape_at_axis)
# 返回切片前缀列表,以确保在递归中计算最左边的切片时使用
slice_prefixes = [(slice(start, end),)
for start, end in zip([0] + offsets_at_axis,
offsets_at_axis)]
return shape, slice_prefixes
# 递归地计算数组的信息,包括最终数组的形状、切片列表和可以用于在新数组内赋值的数组列表
def _block_info_recursion(arrays, max_depth, result_ndim, depth=0):
"""
Returns the shape of the final array, along with a list
of slices and a list of arrays that can be used for assignment inside the
new array
Parameters
----------
arrays : nested list of arrays
The arrays to check
max_depth : list of int
The number of nested lists
result_ndim : int
The number of dimensions in the final array.
Returns
-------
shape : tuple of int
The shape that the final array will take on.
"""
# slices: list of tuple of slices
# The slices into the full array required for assignment. These are
# required to be prepended with ``(Ellipsis, )`` to obtain to correct
# final index.
# arrays: list of ndarray
# The data to assign to each slice of the full array
"""
if depth < max_depth:
shapes, slices, arrays = zip(
*[_block_info_recursion(arr, max_depth, result_ndim, depth+1)
for arr in arrays])
axis = result_ndim - max_depth + depth
shape, slice_prefixes = _concatenate_shapes(shapes, axis)
slices = [slice_prefix + the_slice
for slice_prefix, inner_slices in zip(slice_prefixes, slices)
for the_slice in inner_slices]
arrays = functools.reduce(operator.add, arrays)
return shape, slices, arrays
else:
arr = _atleast_nd(arrays, result_ndim)
return arr.shape, [()], [arr]
if depth < max_depth:
arrs = [_block(arr, max_depth, result_ndim, depth+1)
for arr in arrays]
return _concatenate(arrs, axis=-(max_depth-depth))
else:
return _atleast_nd(arrays, result_ndim)
def _block_dispatcher(arrays):
if type(arrays) is list:
for subarrays in arrays:
yield from _block_dispatcher(subarrays)
else:
yield arrays
@array_function_dispatch(_block_dispatcher)
def block(arrays):
"""
Assemble an nd-array from nested lists of blocks.
Blocks in the innermost lists are concatenated (see `concatenate`) along
the last dimension (-1), then these are concatenated along the
second-last dimension (-2), and so on until the outermost list is reached.
Blocks can be of any dimension, but will not be broadcasted using
the normal rules. Instead, leading axes of size 1 are inserted,
to make ``block.ndim`` the same for all blocks. This is primarily useful
for working with scalars, and means that code like ``np.block([v, 1])``
is valid, where ``v.ndim == 1``.
When the nested list is two levels deep, this allows block matrices to be
constructed from their components.
.. versionadded:: 1.13.0
Parameters
----------
arrays : nested list of array_like or scalars (but not tuples)
If passed a single ndarray or scalar (a nested list of depth 0), this
is returned unmodified (and not copied).
Elements shapes must match along the appropriate axes (without
broadcasting), but leading 1s will be prepended to the shape as
necessary to make the dimensions match.
Returns
-------
block_array : ndarray
The array assembled from the given blocks.
The dimensionality of the output is equal to the greatest of:
* the dimensionality of all the inputs
* the depth to which the input list is nested
Raises
------
ValueError
* If list depths are mismatched - for instance, ``[[a, b], c]`` is
illegal, and should be spelt ``[[a, b], [c]]``
* If lists are empty - for instance, ``[[a, b], []]``
See Also
--------
# concatenate : Join a sequence of arrays along an existing axis.
# stack : Join a sequence of arrays along a new axis.
# vstack : Stack arrays in sequence vertically (row wise).
# hstack : Stack arrays in sequence horizontally (column wise).
# dstack : Stack arrays in sequence depth wise (along third axis).
# column_stack : Stack 1-D arrays as columns into a 2-D array.
# vsplit : Split an array into multiple sub-arrays vertically (row-wise).
# Notes
# -----
# When called with only scalars, ``np.block`` is equivalent to an ndarray
# call. So ``np.block([[1, 2], [3, 4]])`` is equivalent to
# ``np.array([[1, 2], [3, 4]])``.
# This function does not enforce that the blocks lie on a fixed grid.
# ``np.block([[a, b], [c, d]])`` is not restricted to arrays of the form::
# AAAbb
# AAAbb
# cccDD
# But is also allowed to produce, for some ``a, b, c, d``::
# AAAbb
# AAAbb
# cDDDD
# Since concatenation happens along the last axis first, `block` is *not*
# capable of producing the following directly::
# AAAbb
# cccbb
# cccDD
# Matlab's "square bracket stacking", ``[A, B, ...; p, q, ...]``, is
# equivalent to ``np.block([[A, B, ...], [p, q, ...]])``.
# Examples
# --------
# The most common use of this function is to build a block matrix
# >>> A = np.eye(2) * 2
# >>> B = np.eye(3) * 3
# >>> np.block([
# ... [A, np.zeros((2, 3))],
# ... [np.ones((3, 2)), B ]
# ... ])
# array([[2., 0., 0., 0., 0.],
# [0., 2., 0., 0., 0.],
# [1., 1., 3., 0., 0.],
# [1., 1., 0., 3., 0.],
# [1., 1., 0., 0., 3.]])
# With a list of depth 1, `block` can be used as `hstack`
# >>> np.block([1, 2, 3]) # hstack([1, 2, 3])
# array([1, 2, 3])
# >>> a = np.array([1, 2, 3])
# >>> b = np.array([4, 5, 6])
# >>> np.block([a, b, 10]) # hstack([a, b, 10])
# array([ 1, 2, 3, 4, 5, 6, 10])
# >>> A = np.ones((2, 2), int)
# >>> B = 2 * A
# >>> np.block([A, B]) # hstack([A, B])
# array([[1, 1, 2, 2],
# [1, 1, 2, 2]])
# With a list of depth 2, `block` can be used in place of `vstack`:
# >>> a = np.array([1, 2, 3])
# >>> b = np.array([4, 5, 6])
# >>> np.block([[a], [b]]) # vstack([a, b])
# array([[1, 2, 3],
# [4, 5, 6]])
# >>> A = np.ones((2, 2), int)
# >>> B = 2 * A
# >>> np.block([[A], [B]]) # vstack([A, B])
# array([[1, 1],
# [1, 1],
# [2, 2],
# [2, 2]])
# It can also be used in places of `atleast_1d` and `atleast_2d`
# >>> a = np.array(0)
# >>> b = np.array([1])
# >>> np.block([a]) # atleast_1d(a)
# array([0])
# >>> np.block([b]) # atleast_1d(b)
# array([1])
# >>> np.block([[a]]) # atleast_2d(a)
# array([[0]])
# >>> np.block([[b]]) # atleast_2d(b)
# array([[1]])
array([[1]])
"""
arrays, list_ndim, result_ndim, final_size = _block_setup(arrays)
"""
"""
"""
"""
"""
if list_ndim * final_size > (2 * 512 * 512):
return _block_slicing(arrays, list_ndim, result_ndim)
else:
return _block_concatenate(arrays, list_ndim, result_ndim)
# 这些辅助函数主要用于测试。
# 它们允许我们编写测试,直接调用 `_block_slicing` 或 `_block_concatenate`,
# 而不需要阻塞大数组以触发所需的路径。
def _block_setup(arrays):
"""
返回 (`arrays`, list_ndim, result_ndim, final_size)
"""
# 调用 `_block_check_depths_match` 函数检查数组的深度匹配情况,并返回底部索引、数组维度、最终大小
bottom_index, arr_ndim, final_size = _block_check_depths_match(arrays)
# 计算列表的维度
list_ndim = len(bottom_index)
# 如果 bottom_index 不为空且最后一个元素为 None,则抛出 ValueError 异常
if bottom_index and bottom_index[-1] is None:
raise ValueError(
'List at {} cannot be empty'.format(
_block_format_index(bottom_index)
)
)
# 计算结果的维度,为数组维度和列表维度的较大值
result_ndim = max(arr_ndim, list_ndim)
# 返回 arrays、列表维度、结果维度和最终大小的元组
return arrays, list_ndim, result_ndim, final_size
def _block_slicing(arrays, list_ndim, result_ndim):
# 通过 `_block_info_recursion` 函数获取形状、切片和处理后的数组
shape, slices, arrays = _block_info_recursion(
arrays, list_ndim, result_ndim)
# 计算数组的数据类型
dtype = _nx.result_type(*[arr.dtype for arr in arrays])
# 测试是否优先使用 F(Fortran)顺序,仅在所有输入数组都为 F 顺序且非 C(连续)顺序时选择 F
F_order = all(arr.flags['F_CONTIGUOUS'] for arr in arrays)
C_order = all(arr.flags['C_CONTIGUOUS'] for arr in arrays)
order = 'F' if F_order and not C_order else 'C'
# 使用 `_nx.empty` 创建一个指定形状、数据类型和顺序的空数组
result = _nx.empty(shape=shape, dtype=dtype, order=order)
# 注意:在 C 实现中,可以使用函数 `PyArray_CreateMultiSortedStridePerm` 来更高级地猜测所需的顺序。
# 将处理后的数组填充到结果数组的相应切片位置
for the_slice, arr in zip(slices, arrays):
result[(Ellipsis,) + the_slice] = arr
# 返回填充后的结果数组
return result
def _block_concatenate(arrays, list_ndim, result_ndim):
# 调用 `_block` 函数处理数组
result = _block(arrays, list_ndim, result_ndim)
# 如果列表维度为 0,处理一个特殊情况,其中 `_block` 返回一个视图,因为 `arrays` 是单个 numpy 数组而不是 numpy 数组列表。
# 这可能会复制标量或列表两次,但对于关注性能的用户来说,这不太可能是一个常见情况。
if list_ndim == 0:
result = result.copy() # 复制结果,以防 `arrays` 是单个数组的情况
# 返回处理后的结果
return result
.\numpy\numpy\_core\shape_base.pyi
from collections.abc import Sequence
from typing import TypeVar, overload, Any, SupportsIndex
from numpy import generic, _CastingKind
from numpy._typing import (
NDArray,
ArrayLike,
DTypeLike,
_ArrayLike,
_DTypeLike,
)
__all__: list[str]
@overload
def atleast_1d(arys: _ArrayLike[_SCT], /) -> NDArray[_SCT]: ...
@overload
def atleast_1d(arys: ArrayLike, /) -> NDArray[Any]: ...
@overload
def atleast_1d(*arys: ArrayLike) -> tuple[NDArray[Any], ...]: ...
@overload
def atleast_2d(arys: _ArrayLike[_SCT], /) -> NDArray[_SCT]: ...
@overload
def atleast_2d(arys: ArrayLike, /) -> NDArray[Any]: ...
@overload
def atleast_2d(*arys: ArrayLike) -> tuple[NDArray[Any], ...]: ...
@overload
def atleast_3d(arys: _ArrayLike[_SCT], /) -> NDArray[_SCT]: ...
@overload
def atleast_3d(arys: ArrayLike, /) -> NDArray[Any]: ...
@overload
def atleast_3d(*arys: ArrayLike) -> tuple[NDArray[Any], ...]: ...
@overload
def vstack(
tup: Sequence[_ArrayLike[_SCT]],
*,
dtype: None = ...,
casting: _CastingKind = ...
) -> NDArray[_SCT]: ...
@overload
def vstack(
tup: Sequence[ArrayLike],
*,
dtype: _DTypeLike[_SCT],
casting: _CastingKind = ...
) -> NDArray[_SCT]: ...
@overload
def vstack(
tup: Sequence[ArrayLike],
*,
dtype: DTypeLike = ...,
casting: _CastingKind = ...
) -> NDArray[Any]: ...
@overload
def hstack(
tup: Sequence[_ArrayLike[_SCT]],
*,
dtype: None = ...,
casting: _CastingKind = ...
) -> NDArray[_SCT]: ...
@overload
def hstack(
tup: Sequence[ArrayLike],
*,
dtype: _DTypeLike[_SCT],
casting: _CastingKind = ...
) -> NDArray[_SCT]: ...
@overload
def hstack(
tup: Sequence[ArrayLike],
*,
dtype: DTypeLike = ...,
casting: _CastingKind = ...
) -> NDArray[Any]: ...
@overload
def stack(
arrays: Sequence[_ArrayLike[_SCT]],
axis: SupportsIndex = ...,
out: None = ...,
*,
dtype: None = ...,
casting: _CastingKind = ...
) -> NDArray[_SCT]: ...
@overload
def stack(
arrays: Sequence[ArrayLike],
axis: SupportsIndex = ...,
out: None = ...,
*,
dtype: _DTypeLike[_SCT],
casting: _CastingKind = ...
) -> NDArray[_SCT]: ...
@overload
def stack(
arrays: Sequence[ArrayLike],
axis: SupportsIndex = ...,
out: None = ...,
*,
dtype: DTypeLike = ...,
casting: _CastingKind = ...
) -> NDArray[Any]: ...
@overload
def stack(
arrays: Sequence[ArrayLike],
axis: SupportsIndex = ...,
out: _ArrayType = ...,
*,
dtype: DTypeLike = ...,
casting: _CastingKind = ...
) -> _ArrayType: ...
@overload
def block(arrays: _ArrayLike[_SCT]) -> NDArray[_SCT]: ...
@overload
def block(arrays: ArrayLike) -> NDArray[Any]: ...
.\numpy\numpy\_core\src\common\array_assign.c
/*
* This file implements some helper functions for the array assignment
* routines. The actual assignment routines are in array_assign_*.c
*
* Written by Mark Wiebe (mwwiebe@gmail.com)
* Copyright (c) 2011 by Enthought, Inc.
*
* See LICENSE.txt for the license.
*/
// 定义宏以使用 NPY API 版本,不包含废弃的 API
// 定义宏 _MULTIARRAYMODULE
// 包含 Python 头文件
// 包含 NumPy 的数组类型头文件
// 包含 NumPy 的配置文件
// 包含自定义的头文件
// 该函数在 array_assign.h 中有详细的参数文档
NPY_NO_EXPORT int
broadcast_strides(int ndim, npy_intp const *shape,
int strides_ndim, npy_intp const *strides_shape, npy_intp const *strides,
char const *strides_name,
npy_intp *out_strides)
{
// 计算开始的维度索引
int idim, idim_start = ndim - strides_ndim;
/* Can't broadcast to fewer dimensions */
// 如果 idim_start 小于 0,无法进行广播
if (idim_start < 0) {
goto broadcast_error;
}
/*
* Process from the end to the start, so that 'strides' and 'out_strides'
* can point to the same memory.
*/
// 从后向前处理,以便 'strides' 和 'out_strides' 可以指向相同的内存
for (idim = ndim - 1; idim >= idim_start; --idim) {
npy_intp strides_shape_value = strides_shape[idim - idim_start];
// 如果维度大小为 1,则输出的步长为 0
if (strides_shape_value == 1) {
out_strides[idim] = 0;
}
// 否则,维度大小必须与 shape 相同
else if (strides_shape_value != shape[idim]) {
goto broadcast_error;
}
else {
out_strides[idim] = strides[idim - idim_start];
}
}
/* New dimensions get a zero stride */
// 新维度的步长设置为 0
for (idim = 0; idim < idim_start; ++idim) {
out_strides[idim] = 0;
}
return 0;
broadcast_error: {
// 将 strides_shape 转换为字符串形式
PyObject *shape1 = convert_shape_to_string(strides_ndim,
strides_shape, "");
if (shape1 == NULL) {
return -1;
}
// 将 shape 转换为字符串形式
PyObject *shape2 = convert_shape_to_string(ndim, shape, "");
if (shape2 == NULL) {
Py_DECREF(shape1);
return -1;
}
// 抛出值错误,指示无法广播的原因
PyErr_Format(PyExc_ValueError,
"could not broadcast %s from shape %S into shape %S",
strides_name, shape1, shape2);
Py_DECREF(shape1);
Py_DECREF(shape2);
return -1;
}
}
/*
* 以下代码假设以下情况:
* * alignment 是 C 标准要求的2的幂次方。
* * 从指针到 uintp 的转换得到的是一个可以进行位操作的合理表示
* (这可能不是 C 标准要求的,但由 glibc 假定,因此应该是可以的)。
* * 将 stride 从 intp 转换为 uintp(以避免依赖于有符号 int 表示)保留与 alignment 的余数,
* 因此 stride % a 与 ((unsigned intp) stride) % a 相同。这是 C 标准要求的。
*
* 代码检查 `data` 的最低 log2(alignment) 位和所有 `strides` 的最低 log2(alignment) 位是否为0,
* 因为这意味着对所有整数 n,(data + n*stride) % alignment == 0。
*/
if (alignment > 1) {
npy_uintp align_check = (npy_uintp)data; // 将 data 转换为 uintp 类型,作为对齐检查的初始值
int i;
for (i = 0; i < ndim; i++) {
/* 如果 shape[i] > 1,则需要考虑 strides[i] 是否为 0 */
if (shape[i] > 1) {
align_check |= (npy_uintp)strides[i]; // 将 strides[i] 转换为 uintp 类型,并将其加入对齐检查中
}
else if (shape[i] == 0) {
/* 元素数为零的数组始终是对齐的 */
return 1;
}
}
return npy_is_aligned((void *)align_check, alignment); // 调用外部函数检查 align_check 是否对齐于 alignment
}
else if (alignment == 1) {
return 1; // 如果 alignment 为 1,任何数据都是对齐的
}
else {
/* 当 alignment == 0 时总是返回 false,表示无法对齐 */
return 0;
}
# 检查给定的数组对象是否是按要求对齐的
NPY_NO_EXPORT int
IsAligned(PyArrayObject *ap)
{
return raw_array_is_aligned(PyArray_NDIM(ap), PyArray_DIMS(ap),
PyArray_DATA(ap), PyArray_STRIDES(ap),
PyArray_DESCR(ap)->alignment);
}
# 检查给定的数组对象是否是按指定的无符号整数对齐
NPY_NO_EXPORT int
IsUintAligned(PyArrayObject *ap)
{
return raw_array_is_aligned(PyArray_NDIM(ap), PyArray_DIMS(ap),
PyArray_DATA(ap), PyArray_STRIDES(ap),
npy_uint_alignment(PyArray_ITEMSIZE(ap)));
}
# 返回数组对象是否具有重叠数据区域,1表示有重叠,0表示没有重叠
NPY_NO_EXPORT int
arrays_overlap(PyArrayObject *arr1, PyArrayObject *arr2)
{
mem_overlap_t result;
# 调用函数判断两个数组对象是否共享内存,以边界共享为准
result = solve_may_share_memory(arr1, arr2, NPY_MAY_SHARE_BOUNDS);
# 如果不共享内存,返回0;否则返回1
if (result == MEM_OVERLAP_NO) {
return 0;
}
else {
return 1;
}
}
.\numpy\numpy\_core\src\common\array_assign.h
/*
* An array assignment function for copying arrays, treating the
* arrays as flat according to their respective ordering rules.
* This function makes a temporary copy of 'src' if 'src' and
* 'dst' overlap, to be able to handle views of the same data with
* different strides.
*
* dst: The destination array.
* dst_order: The rule for how 'dst' is to be made flat.
* src: The source array.
* src_order: The rule for how 'src' is to be made flat.
* casting: An exception is raised if the copy violates this
* casting rule.
*
* Returns 0 on success, -1 on failure.
*/
/* Not yet implemented
NPY_NO_EXPORT int
PyArray_AssignArrayAsFlat(PyArrayObject *dst, NPY_ORDER dst_order,
PyArrayObject *src, NPY_ORDER src_order,
NPY_CASTING casting,
npy_bool preservena, npy_bool *preservewhichna);
*/
/*
* Declares a function to assign the content of 'src' array to 'dst' array,
* handling a possible 'wheremask' array and casting rules.
*
* Returns 0 on success, -1 on failure.
*/
NPY_NO_EXPORT int
PyArray_AssignArray(PyArrayObject *dst, PyArrayObject *src,
PyArrayObject *wheremask,
NPY_CASTING casting);
/*
* Assigns a raw scalar value to every element of 'dst' array,
* considering casting rules and optional 'wheremask'.
*
* Returns 0 on success, -1 on failure.
*/
NPY_NO_EXPORT int
PyArray_AssignRawScalar(PyArrayObject *dst,
PyArray_Descr *src_dtype, char *src_data,
PyArrayObject *wheremask,
NPY_CASTING casting);
/******** LOW-LEVEL SCALAR TO ARRAY ASSIGNMENT ********/
/*
* Assigns the scalar value to every element of the destination raw array.
*
* Returns 0 on success, -1 on failure.
*/
NPY_NO_EXPORT int
raw_array_assign_scalar(int ndim, npy_intp const *shape,
PyArray_Descr *dst_dtype, char *dst_data, npy_intp const *dst_strides,
PyArray_Descr *src_dtype, char *src_data);
/*
* Assigns the scalar value to every element of the destination raw array
* where the 'wheremask' value is True.
*
* Returns 0 on success, -1 on failure.
*/
NPY_NO_EXPORT int
raw_array_wheremasked_assign_scalar(int ndim, npy_intp const *shape,
PyArray_Descr *dst_dtype, char *dst_data, npy_intp const *dst_strides,
PyArray_Descr *src_dtype, char *src_data,
PyArray_Descr *wheremask_dtype, char *wheremask_data,
npy_intp const *wheremask_strides);
/******** LOW-LEVEL ARRAY MANIPULATION HELPERS ********/
/*
* Internal detail of how much to buffer during array assignments which
* need it. This is for more complex NA masking operations where masks
* need to be inverted or combined together.
*/
/*
* Broadcasts strides to match the given dimensions. Can be used,
* for instance, to set up a raw iteration.
*
* 'strides_name' is used to produce an error message if the strides
* cannot be broadcast.
*
* Returns 0 on success, -1 on failure.
*/
NPY_NO_EXPORT int
/*
* 计算广播操作后的数组的步幅。
* ndim: 数组的维度
* shape: 数组的形状
* strides_ndim: 步幅数组的维度
* strides_shape: 步幅数组的形状
* strides: 步幅数组的值
* strides_name: 步幅数组的名称
* out_strides: 输出的步幅数组
*/
broadcast_strides(int ndim, npy_intp const *shape,
int strides_ndim, npy_intp const *strides_shape, npy_intp const *strides,
char const *strides_name,
npy_intp *out_strides);
/*
* 检查一个数据指针加上一组步幅是否指向所有元素都按照给定的对齐方式对齐的原始数组。
* 如果数据对齐到给定的对齐方式返回 1,否则返回 0。
* alignment 应为二的幂,或者可以是特殊值 0 表示不能对齐,此时总是返回 0(false)。
*/
NPY_NO_EXPORT int
raw_array_is_aligned(int ndim, npy_intp const *shape,
char *data, npy_intp const *strides, int alignment);
/*
* 检查数组是否按照其数据类型的“真实对齐方式”对齐。
*/
NPY_NO_EXPORT int
IsAligned(PyArrayObject *ap);
/*
* 检查数组是否按照其数据类型的“无符号整数对齐方式”对齐。
*/
NPY_NO_EXPORT int
IsUintAligned(PyArrayObject *ap);
/* 如果两个数组有重叠的数据返回 1,否则返回 0 */
NPY_NO_EXPORT int
arrays_overlap(PyArrayObject *arr1, PyArrayObject *arr2);
.\numpy\numpy\_core\src\common\binop_override.h
static int
binop_should_defer(PyObject *self, PyObject *other, int inplace)
{
/*
* This function assumes that self.__binop__(other) is underway and
* implements the rules described above. Python's C API is funny, and
* makes it tricky to tell whether a given slot is called for __binop__
* ("forward") or __rbinop__ ("reversed"). You are responsible for
* determining this before calling this function; it only provides the
* logic for forward binop implementations.
*/
/*
* NB: there's another copy of this code in
* numpy.ma.core.MaskedArray._delegate_binop
* which should possibly be updated when this is.
*/
PyObject *attr; // 定义 PyObject 类型的变量 attr
double self_prio, other_prio; // 定义双精度浮点数变量 self_prio 和 other_prio
int defer; // 定义整型变量 defer
/*
* attribute check is expensive for scalar operations, avoid if possible
*/
if (other == NULL || // 如果 other 为 NULL,直接返回 0
self == NULL || // 如果 self 为 NULL,直接返回 0
Py_TYPE(self) == Py_TYPE(other) || // 如果 self 和 other 类型相同,直接返回 0
PyArray_CheckExact(other) || // 如果 other 是精确匹配的 NumPy 数组,直接返回 0
PyArray_CheckAnyScalarExact(other)) { // 如果 other 是任意标量的精确匹配,直接返回 0
return 0;
}
/*
* Classes with __array_ufunc__ are living in the future, and only need to
* check whether __array_ufunc__ equals None.
*/
attr = PyArray_LookupSpecial(other, npy_interned_str.array_ufunc); // 查找 other 对象中的 __array_ufunc__ 属性
if (attr != NULL) { // 如果找到了 __array_ufunc__ 属性
defer = !inplace && (attr == Py_None); // 如果不是原地操作且 __array_ufunc__ 为 None,则推迟执行
Py_DECREF(attr); // 减少引用计数
return defer; // 返回 defer 值
}
else if (PyErr_Occurred()) { // 如果出现了错误
PyErr_Clear(); /* TODO[gh-14801]: propagate crashes during attribute access? */
// 清除错误信息,暂时保留这个 TODO 注释
}
/*
* Otherwise, we need to check for the legacy __array_priority__. But if
* other.__class__ is a subtype of self.__class__, then it's already had
* a chance to run, so no need to defer to it.
*/
if (PyType_IsSubtype(Py_TYPE(other), Py_TYPE(self))) { // 如果 other 是 self 的子类
return 0; // 直接返回 0,不推迟执行
}
self_prio = PyArray_GetPriority((PyObject *)self, NPY_SCALAR_PRIORITY); // 获取 self 的优先级
other_prio = PyArray_GetPriority((PyObject *)other, NPY_SCALAR_PRIORITY); // 获取 other 的优先级
return self_prio < other_prio; // 返回比较结果,判断是否推迟执行
}
#endif // NUMPY_CORE_SRC_COMMON_BINOP_OVERRIDE_H_
/*
* 定义宏BINOP_IS_FORWARD,用于检查是否是正向二元操作。
* 如果 m2 的类型有定义 tp_as_number 并且其 SLOT_NAME 的函数指针不等于 test_func,
* 则返回真,表示这是正向操作。
*/
#define BINOP_IS_FORWARD(m1, m2, SLOT_NAME, test_func) \
(Py_TYPE(m2)->tp_as_number != NULL && \
(void*)(Py_TYPE(m2)->tp_as_number->SLOT_NAME) != (void*)(test_func))
/*
* 定义宏BINOP_GIVE_UP_IF_NEEDED,用于在需要时放弃二元操作的执行。
* 如果是正向操作并且 binop_should_defer((PyObject*)m1, (PyObject*)m2, 0) 返回真,
* 则增加 Py_NotImplemented 的引用计数并返回 Py_NotImplemented。
*/
#define BINOP_GIVE_UP_IF_NEEDED(m1, m2, slot_expr, test_func) \
do { \
if (BINOP_IS_FORWARD(m1, m2, slot_expr, test_func) && \
binop_should_defer((PyObject*)m1, (PyObject*)m2, 0)) { \
Py_INCREF(Py_NotImplemented); \
return Py_NotImplemented; \
} \
} while (0)
/*
* 定义宏INPLACE_GIVE_UP_IF_NEEDED,用于在需要时放弃增强赋值操作的执行。
* 如果是正向操作并且 binop_should_defer((PyObject*)m1, (PyObject*)m2, 1) 返回真,
* 则增加 Py_NotImplemented 的引用计数并返回 Py_NotImplemented。
*/
#define INPLACE_GIVE_UP_IF_NEEDED(m1, m2, slot_expr, test_func) \
do { \
if (BINOP_IS_FORWARD(m1, m2, slot_expr, test_func) && \
binop_should_defer((PyObject*)m1, (PyObject*)m2, 1)) { \
Py_INCREF(Py_NotImplemented); \
return Py_NotImplemented; \
} \
} while (0)
/*
* 定义宏RICHCMP_GIVE_UP_IF_NEEDED,用于在需要时放弃富比较操作的执行。
* 如果 binop_should_defer((PyObject*)m1, (PyObject*)m2, 0) 返回真,
* 则增加 Py_NotImplemented 的引用计数并返回 Py_NotImplemented。
*/
#define RICHCMP_GIVE_UP_IF_NEEDED(m1, m2) \
do { \
if (binop_should_defer((PyObject*)m1, (PyObject*)m2, 0)) { \
Py_INCREF(Py_NotImplemented); \
return Py_NotImplemented; \
} \
} while (0)
// 使用 do-while 循环语句,这种循环至少会执行一次,因为条件判断放在循环体末尾
#endif /* NUMPY_CORE_SRC_COMMON_BINOP_OVERRIDE_H_ */
#endif /* NUMPY_CORE_SRC_COMMON_BINOP_OVERRIDE_H_ */
# endif /* NUMPY_CORE_SRC_COMMON_BINOP_OVERRIDE_H_ */
# endif /* NUMPY_CORE_SRC_COMMON_BINOP_OVERRIDE_H_ */
# endif /* NUMPY_CORE_SRC_COMMON_BINOP_OVERRIDE_H_ */
# endif /* NUMPY_CORE_SRC_COMMON_BINOP_OVERRIDE_H_ */
.\numpy\numpy\_core\src\common\cblasfuncs.c
/*
* This module provides a BLAS optimized matrix multiply,
* inner product and dot for numpy arrays
*/
static const double oneD[2] = {1.0, 0.0}, zeroD[2] = {0.0, 0.0};
static const float oneF[2] = {1.0, 0.0}, zeroF[2] = {0.0, 0.0};
/*
* Helper: dispatch to appropriate cblas_?gemm for typenum.
*/
static void
gemm(int typenum, enum CBLAS_ORDER order,
enum CBLAS_TRANSPOSE transA, enum CBLAS_TRANSPOSE transB,
npy_intp m, npy_intp n, npy_intp k,
PyArrayObject *A, npy_intp lda, PyArrayObject *B, npy_intp ldb, PyArrayObject *R)
{
const void *Adata = PyArray_DATA(A), *Bdata = PyArray_DATA(B);
void *Rdata = PyArray_DATA(R);
npy_intp ldc = PyArray_DIM(R, 1) > 1 ? PyArray_DIM(R, 1) : 1;
switch (typenum) {
case NPY_DOUBLE:
// Perform double precision matrix multiplication using cblas_dgemm
CBLAS_FUNC(cblas_dgemm)(order, transA, transB, m, n, k, 1.,
Adata, lda, Bdata, ldb, 0., Rdata, ldc);
break;
case NPY_FLOAT:
// Perform single precision matrix multiplication using cblas_sgemm
CBLAS_FUNC(cblas_sgemm)(order, transA, transB, m, n, k, 1.f,
Adata, lda, Bdata, ldb, 0.f, Rdata, ldc);
break;
case NPY_CDOUBLE:
// Perform complex double precision matrix multiplication using cblas_zgemm
CBLAS_FUNC(cblas_zgemm)(order, transA, transB, m, n, k, oneD,
Adata, lda, Bdata, ldb, zeroD, Rdata, ldc);
break;
case NPY_CFLOAT:
// Perform complex single precision matrix multiplication using cblas_cgemm
CBLAS_FUNC(cblas_cgemm)(order, transA, transB, m, n, k, oneF,
Adata, lda, Bdata, ldb, zeroF, Rdata, ldc);
break;
}
}
/*
* Helper: dispatch to appropriate cblas_?gemv for typenum.
*/
static void
gemv(int typenum, enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans,
PyArrayObject *A, npy_intp lda, PyArrayObject *X, npy_intp incX,
PyArrayObject *R)
{
const void *Adata = PyArray_DATA(A), *Xdata = PyArray_DATA(X);
void *Rdata = PyArray_DATA(R);
npy_intp m = PyArray_DIM(A, 0), n = PyArray_DIM(A, 1);
switch (typenum) {
case NPY_DOUBLE:
// Perform double precision matrix-vector multiplication using cblas_dgemv
CBLAS_FUNC(cblas_dgemv)(order, trans, m, n, 1., Adata, lda, Xdata, incX,
0., Rdata, 1);
break;
case NPY_FLOAT:
// Perform single precision matrix-vector multiplication using cblas_sgemv
CBLAS_FUNC(cblas_sgemv)(order, trans, m, n, 1.f, Adata, lda, Xdata, incX,
0.f, Rdata, 1);
break;
case NPY_CDOUBLE:
// Perform complex double precision matrix-vector multiplication using cblas_zgemv
CBLAS_FUNC(cblas_zgemv)(order, trans, m, n, oneD, Adata, lda, Xdata, incX,
zeroD, Rdata, 1);
break;
case NPY_CFLOAT:
// Perform complex single precision matrix-vector multiplication using cblas_cgemv
CBLAS_FUNC(cblas_cgemv)(order, trans, m, n, oneF, Adata, lda, Xdata, incX,
zeroF, Rdata, 1);
break;
}
}
/*
* Perform a symmetric rank-k update operation using BLAS routines based on the
* type of elements in the arrays A and R.
*/
syrk(int typenum, enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans,
npy_intp n, npy_intp k,
PyArrayObject *A, npy_intp lda, PyArrayObject *R)
{
// 获取数组 A 的数据指针
const void *Adata = PyArray_DATA(A);
// 获取数组 R 的数据指针
void *Rdata = PyArray_DATA(R);
// 计算 R 的第二维度,如果大于 1 则使用该值,否则设为 1
npy_intp ldc = PyArray_DIM(R, 1) > 1 ? PyArray_DIM(R, 1) : 1;
npy_intp i;
npy_intp j;
// 根据 typenum 的值选择相应的 BLAS 函数进行操作
switch (typenum) {
case NPY_DOUBLE:
// 双精度情况下的 BLAS 函数调用
CBLAS_FUNC(cblas_dsyrk)(order, CblasUpper, trans, n, k, 1.,
Adata, lda, 0., Rdata, ldc);
// 对称阵处理,调整非对角元素的值
for (i = 0; i < n; i++) {
for (j = i + 1; j < n; j++) {
*((npy_double*)PyArray_GETPTR2(R, j, i)) =
*((npy_double*)PyArray_GETPTR2(R, i, j));
}
}
break;
case NPY_FLOAT:
// 单精度情况下的 BLAS 函数调用
CBLAS_FUNC(cblas_ssyrk)(order, CblasUpper, trans, n, k, 1.f,
Adata, lda, 0.f, Rdata, ldc);
// 对称阵处理,调整非对角元素的值
for (i = 0; i < n; i++) {
for (j = i + 1; j < n; j++) {
*((npy_float*)PyArray_GETPTR2(R, j, i)) =
*((npy_float*)PyArray_GETPTR2(R, i, j));
}
}
break;
case NPY_CDOUBLE:
// 复双精度情况下的 BLAS 函数调用
CBLAS_FUNC(cblas_zsyrk)(order, CblasUpper, trans, n, k, oneD,
Adata, lda, zeroD, Rdata, ldc);
// 对称阵处理,调整非对角元素的值
for (i = 0; i < n; i++) {
for (j = i + 1; j < n; j++) {
*((npy_cdouble*)PyArray_GETPTR2(R, j, i)) =
*((npy_cdouble*)PyArray_GETPTR2(R, i, j));
}
}
break;
case NPY_CFLOAT:
// 复单精度情况下的 BLAS 函数调用
CBLAS_FUNC(cblas_csyrk)(order, CblasUpper, trans, n, k, oneF,
Adata, lda, zeroF, Rdata, ldc);
// 对称阵处理,调整非对角元素的值
for (i = 0; i < n; i++) {
for (j = i + 1; j < n; j++) {
*((npy_cfloat*)PyArray_GETPTR2(R, j, i)) =
*((npy_cfloat*)PyArray_GETPTR2(R, i, j));
}
}
break;
}
}
/*
* Determine the shape of the matrix represented by the PyArrayObject.
* Returns one of _scalar, _column, _row, or _matrix.
*/
static MatrixShape
_select_matrix_shape(PyArrayObject *array)
{
// 根据数组的维度确定其形状
switch (PyArray_NDIM(array)) {
case 0:
return _scalar;
case 1:
// 一维数组,根据第一维大小判断是列向量还是标量
if (PyArray_DIM(array, 0) > 1)
return _column;
return _scalar;
case 2:
// 二维数组,根据维度大小判断是列向量、行向量还是矩阵
if (PyArray_DIM(array, 0) > 1) {
if (PyArray_DIM(array, 1) == 1)
return _column;
else
return _matrix;
}
if (PyArray_DIM(array, 1) == 1)
return _scalar;
return _row;
}
return _matrix; // 默认返回矩阵形状
}
/*
* Check if the array strides are misaligned and return 1 if true, 0 otherwise.
* Ensures that the data segment is aligned with an itemsize address.
*/
NPY_NO_EXPORT int
_bad_strides(PyArrayObject *ap)
{
int itemsize = PyArray_ITEMSIZE(ap);
int i, N=PyArray_NDIM(ap);
// 检查数组的步幅是否对齐,返回值为 1 表示不对齐,为 0 表示对齐
npy_intp *strides = PyArray_STRIDES(ap);
npy_intp *dims = PyArray_DIMS(ap);
if (((npy_intp)(PyArray_DATA(ap)) % itemsize) != 0) {
return 1;
}
for (i = 0; i < N; i++) {
if ((strides[i] < 0) || (strides[i] % itemsize) != 0) {
return 1;
}
if ((strides[i] == 0 && dims[i] > 1)) {
return 1;
}
}
return 0;
/*
* dot(a,b)
* 返回浮点类型数组 a 和 b 的点积。
* 与通用的 numpy 等效函数类似,点积的求和是在 a 的最后一个维度和 b 的倒数第二个维度上进行。
* 注意:第一个参数不是共轭的。
*
* 这个函数是供 PyArray_MatrixProduct2 使用的。假定在进入时,数组 ap1 和 ap2 具有相同的数据类型 typenum,
* 可以是 float、double、cfloat 或 cdouble,并且维度 <= 2。假定 __array_ufunc__ 的处理已经完成。
*/
NPY_NO_EXPORT PyObject *
cblas_matrixproduct(int typenum, PyArrayObject *ap1, PyArrayObject *ap2,
PyArrayObject *out)
{
PyArrayObject *result = NULL, *out_buf = NULL;
npy_intp j, lda, ldb;
npy_intp l;
int nd;
npy_intp ap1stride = 0;
npy_intp dimensions[NPY_MAXDIMS];
npy_intp numbytes;
MatrixShape ap1shape, ap2shape;
// 检查 ap1 是否有不良步长
if (_bad_strides(ap1)) {
// 创建 ap1 的副本,按照任意顺序
PyObject *op1 = PyArray_NewCopy(ap1, NPY_ANYORDER);
// 释放原 ap1 对象
Py_DECREF(ap1);
// 将 op1 转换为 PyArrayObject 类型并赋值给 ap1
ap1 = (PyArrayObject *)op1;
if (ap1 == NULL) {
goto fail;
}
}
// 检查 ap2 是否有不良步长
if (_bad_strides(ap2)) {
// 创建 ap2 的副本,按照任意顺序
PyObject *op2 = PyArray_NewCopy(ap2, NPY_ANYORDER);
// 释放原 ap2 对象
Py_DECREF(ap2);
// 将 op2 转换为 PyArrayObject 类型并赋值给 ap2
ap2 = (PyArrayObject *)op2;
if (ap2 == NULL) {
goto fail;
}
}
// 选择 ap1 和 ap2 的矩阵形状
ap1shape = _select_matrix_shape(ap1);
ap2shape = _select_matrix_shape(ap2);
// 在此处继续执行矩阵乘法的计算
// 检查是否有一个操作数是标量(scalar)
if (ap1shape == _scalar || ap2shape == _scalar) {
PyArrayObject *oap1, *oap2;
oap1 = ap1; oap2 = ap2;
/* One of ap1 or ap2 is a scalar */
// 如果 ap1 是标量,则将 ap2 视为标量
if (ap1shape == _scalar) {
/* Make ap2 the scalar */
PyArrayObject *t = ap1;
ap1 = ap2;
ap2 = t;
ap1shape = ap2shape;
ap2shape = _scalar;
}
// 如果 ap1 的形状为行向量(_row)
if (ap1shape == _row) {
// 获取 ap1 的步长(stride)
ap1stride = PyArray_STRIDE(ap1, 1);
}
// 否则,如果 ap1 的维度大于 0
else if (PyArray_NDIM(ap1) > 0) {
// 获取 ap1 的步长(stride)
ap1stride = PyArray_STRIDE(ap1, 0);
}
// 如果 ap1 或 ap2 中有任意一个是标量(维度为 0)
if (PyArray_NDIM(ap1) == 0 || PyArray_NDIM(ap2) == 0) {
npy_intp *thisdims;
// 如果 ap1 是标量
if (PyArray_NDIM(ap1) == 0) {
// 使用 ap2 的维度和数量设置 dimensions 数组和 l
nd = PyArray_NDIM(ap2);
thisdims = PyArray_DIMS(ap2);
}
// 否则,ap2 是标量
else {
// 使用 ap1 的维度和数量设置 dimensions 数组和 l
nd = PyArray_NDIM(ap1);
thisdims = PyArray_DIMS(ap1);
}
// 计算总长度 l 并设置 dimensions 数组
l = 1;
for (j = 0; j < nd; j++) {
dimensions[j] = thisdims[j];
l *= dimensions[j];
}
}
// 否则,ap1 和 ap2 都不是标量(维度大于 0)
else {
// 获取 ap1 和 ap2 的最后一个维度的长度
l = PyArray_DIM(oap1, PyArray_NDIM(oap1) - 1);
// 检查 ap1 和 ap2 是否对齐
if (PyArray_DIM(oap2, 0) != l) {
// 如果不对齐,调用错误处理函数并跳转到失败位置
dot_alignment_error(oap1, PyArray_NDIM(oap1) - 1, oap2, 0);
goto fail;
}
// 计算 nd 作为 ap1 和 ap2 的总维度数减去 2
nd = PyArray_NDIM(ap1) + PyArray_NDIM(ap2) - 2;
/*
* nd = 0 or 1 or 2. If nd == 0 do nothing ...
*/
// 如果 nd 等于 1
if (nd == 1) {
/*
* Either PyArray_NDIM(ap1) is 1 dim or PyArray_NDIM(ap2) is
* 1 dim and the other is 2 dim
*/
// 设置 dimensions[0] 为 ap1 或 ap2 的合适维度长度
dimensions[0] = (PyArray_NDIM(oap1) == 2) ?
PyArray_DIM(oap1, 0) : PyArray_DIM(oap2, 1);
// 设置 l 为 dimensions[0]
l = dimensions[0];
/*
* Fix it so that dot(shape=(N,1), shape=(1,))
* and dot(shape=(1,), shape=(1,N)) both return
* an (N,) array (but use the fast scalar code)
*/
}
// 否则,如果 nd 等于 2
else if (nd == 2) {
// 设置 dimensions[0] 和 dimensions[1] 为 ap1 和 ap2 的合适维度长度
dimensions[0] = PyArray_DIM(oap1, 0);
dimensions[1] = PyArray_DIM(oap2, 1);
/*
* We need to make sure that dot(shape=(1,1), shape=(1,N))
* and dot(shape=(N,1),shape=(1,1)) uses
* scalar multiplication appropriately
*/
// 如果 ap1 的形状为行向量,则 l 设置为 dimensions[1],否则为 dimensions[0]
if (ap1shape == _row) {
l = dimensions[1];
}
else {
l = dimensions[0];
}
}
// 检查求和维度是否为 0 大小
if (PyArray_DIM(oap1, PyArray_NDIM(oap1) - 1) == 0) {
// 如果是,则 l 设置为 0
l = 0;
}
}
}
else {
/*
* (PyArray_NDIM(ap1) <= 2 && PyArray_NDIM(ap2) <= 2)
* Both ap1 and ap2 are vectors or matrices
*/
// 获取 ap1 的最后一个维度的大小
l = PyArray_DIM(ap1, PyArray_NDIM(ap1) - 1);
// 检查 ap2 的第一个维度是否与 ap1 的最后一个维度大小相等
if (PyArray_DIM(ap2, 0) != l) {
// 如果不相等,触发 dot_alignment_error,并跳转到 fail 标签
dot_alignment_error(ap1, PyArray_NDIM(ap1) - 1, ap2, 0);
goto fail;
}
// 计算输出数组的维度数
nd = PyArray_NDIM(ap1) + PyArray_NDIM(ap2) - 2;
// 根据 nd 的值设置输出数组的维度
if (nd == 1) {
dimensions[0] = (PyArray_NDIM(ap1) == 2) ?
PyArray_DIM(ap1, 0) : PyArray_DIM(ap2, 1);
}
else if (nd == 2) {
dimensions[0] = PyArray_DIM(ap1, 0);
dimensions[1] = PyArray_DIM(ap2, 1);
}
}
// 为求和创建新的数组,返回结果给 out_buf
out_buf = new_array_for_sum(ap1, ap2, out, nd, dimensions, typenum, &result);
if (out_buf == NULL) {
goto fail;
}
// 获取输出数组的字节数
numbytes = PyArray_NBYTES(out_buf);
// 将输出数组的数据区域清零
memset(PyArray_DATA(out_buf), 0, numbytes);
// 如果 numbytes 为零或者 l 为零,则释放对象并返回结果
if (numbytes == 0 || l == 0) {
Py_DECREF(ap1);
Py_DECREF(ap2);
Py_DECREF(out_buf);
return PyArray_Return(result);
}
}
else if ((ap2shape == _column) && (ap1shape != _matrix)) {
NPY_BEGIN_ALLOW_THREADS;
/* Dot product between two vectors -- Level 1 BLAS */
// 使用 Level 1 BLAS 计算两个向量的点积
PyDataType_GetArrFuncs(PyArray_DESCR(out_buf))->dotfunc(
PyArray_DATA(ap1), PyArray_STRIDE(ap1, (ap1shape == _row)),
PyArray_DATA(ap2), PyArray_STRIDE(ap2, 0),
PyArray_DATA(out_buf), l, NULL);
NPY_END_ALLOW_THREADS;
}
else if (ap1shape == _matrix && ap2shape != _matrix) {
/* Matrix vector multiplication -- Level 2 BLAS */
/* lda must be MAX(M,1) */
enum CBLAS_ORDER Order;
npy_intp ap2s;
// 如果 ap1 不是单一段,进行复制
if (!PyArray_ISONESEGMENT(ap1)) {
PyObject *new;
new = PyArray_Copy(ap1);
Py_DECREF(ap1);
ap1 = (PyArrayObject *)new;
if (new == NULL) {
goto fail;
}
}
NPY_BEGIN_ALLOW_THREADS
// 根据 ap1 是否连续设置 Order 和 lda
if (PyArray_ISCONTIGUOUS(ap1)) {
Order = CblasRowMajor;
lda = (PyArray_DIM(ap1, 1) > 1 ? PyArray_DIM(ap1, 1) : 1);
}
else {
Order = CblasColMajor;
lda = (PyArray_DIM(ap1, 0) > 1 ? PyArray_DIM(ap1, 0) : 1);
}
// 计算 ap2 的步长
ap2s = PyArray_STRIDE(ap2, 0) / PyArray_ITEMSIZE(ap2);
// 使用 Level 2 BLAS 计算矩阵向量乘法
gemv(typenum, Order, CblasNoTrans, ap1, lda, ap2, ap2s, out_buf);
NPY_END_ALLOW_THREADS;
}
else if (ap1shape != _matrix && ap2shape == _matrix) {
/* Vector matrix multiplication -- Level 2 BLAS */
enum CBLAS_ORDER Order;
npy_intp ap1s;
if (!PyArray_ISONESEGMENT(ap2)) {
PyObject *new;
new = PyArray_Copy(ap2);
Py_DECREF(ap2);
ap2 = (PyArrayObject *)new;
if (new == NULL) {
goto fail;
}
}
NPY_BEGIN_ALLOW_THREADS
if (PyArray_ISCONTIGUOUS(ap2)) {
Order = CblasRowMajor;
lda = (PyArray_DIM(ap2, 1) > 1 ? PyArray_DIM(ap2, 1) : 1);
}
else {
Order = CblasColMajor;
lda = (PyArray_DIM(ap2, 0) > 1 ? PyArray_DIM(ap2, 0) : 1);
}
if (ap1shape == _row) {
ap1s = PyArray_STRIDE(ap1, 1) / PyArray_ITEMSIZE(ap1);
}
else {
ap1s = PyArray_STRIDE(ap1, 0) / PyArray_ITEMSIZE(ap1);
}
gemv(typenum, Order, CblasTrans, ap2, lda, ap1, ap1s, out_buf);
NPY_END_ALLOW_THREADS;
}
else {
/*
* (PyArray_NDIM(ap1) == 2 && PyArray_NDIM(ap2) == 2)
* 矩阵乘法 -- Level 3 BLAS
* L x M 乘以 M x N
*/
enum CBLAS_ORDER Order;
enum CBLAS_TRANSPOSE Trans1, Trans2;
npy_intp M, N, L;
/* 优化可能性: */
/*
* 如果适用,可以处理单段数组
* 使用适当的 Order、Trans1 和 Trans2 的值。
*/
if (!PyArray_IS_C_CONTIGUOUS(ap2) && !PyArray_IS_F_CONTIGUOUS(ap2)) {
PyObject *new = PyArray_Copy(ap2);
Py_DECREF(ap2);
ap2 = (PyArrayObject *)new;
if (new == NULL) {
goto fail;
}
}
if (!PyArray_IS_C_CONTIGUOUS(ap1) && !PyArray_IS_F_CONTIGUOUS(ap1)) {
PyObject *new = PyArray_Copy(ap1);
Py_DECREF(ap1);
ap1 = (PyArrayObject *)new;
if (new == NULL) {
goto fail;
}
}
NPY_BEGIN_ALLOW_THREADS;
Order = CblasRowMajor;
Trans1 = CblasNoTrans;
Trans2 = CblasNoTrans;
L = PyArray_DIM(ap1, 0);
N = PyArray_DIM(ap2, 1);
M = PyArray_DIM(ap2, 0);
lda = (PyArray_DIM(ap1, 1) > 1 ? PyArray_DIM(ap1, 1) : 1);
ldb = (PyArray_DIM(ap2, 1) > 1 ? PyArray_DIM(ap2, 1) : 1);
/*
* 避免对 Fortran 排序的数组进行临时拷贝
*/
if (PyArray_IS_F_CONTIGUOUS(ap1)) {
Trans1 = CblasTrans;
lda = (PyArray_DIM(ap1, 0) > 1 ? PyArray_DIM(ap1, 0) : 1);
}
if (PyArray_IS_F_CONTIGUOUS(ap2)) {
Trans2 = CblasTrans;
ldb = (PyArray_DIM(ap2, 0) > 1 ? PyArray_DIM(ap2, 0) : 1);
}
/*
* 如果是矩阵乘以其转置的情况,则使用 syrk 函数。
* 否则,对所有其他情况使用 gemm 函数。
*/
if (
(PyArray_BYTES(ap1) == PyArray_BYTES(ap2)) &&
(PyArray_DIM(ap1, 0) == PyArray_DIM(ap2, 1)) &&
(PyArray_DIM(ap1, 1) == PyArray_DIM(ap2, 0)) &&
(PyArray_STRIDE(ap1, 0) == PyArray_STRIDE(ap2, 1)) &&
(PyArray_STRIDE(ap1, 1) == PyArray_STRIDE(ap2, 0)) &&
((Trans1 == CblasTrans) ^ (Trans2 == CblasTrans)) &&
((Trans1 == CblasNoTrans) ^ (Trans2 == CblasNoTrans))
) {
if (Trans1 == CblasNoTrans) {
syrk(typenum, Order, Trans1, N, M, ap1, lda, out_buf);
}
else {
syrk(typenum, Order, Trans1, N, M, ap2, ldb, out_buf);
}
}
else {
gemm(typenum, Order, Trans1, Trans2, L, N, M, ap1, lda, ap2, ldb,
out_buf);
}
NPY_END_ALLOW_THREADS;
}
Py_DECREF(ap1);
Py_DECREF(ap2);
/* 触发可能的回写到 `result` */
PyArray_ResolveWritebackIfCopy(out_buf);
Py_DECREF(out_buf);
return PyArray_Return(result);
// 递减并清理 Python 对象 ap1 的引用计数
Py_XDECREF(ap1);
// 递减并清理 Python 对象 ap2 的引用计数
Py_XDECREF(ap2);
// 递减并清理 Python 对象 out_buf 的引用计数
Py_XDECREF(out_buf);
// 递减并清理 Python 对象 result 的引用计数,并返回 NULL 指针表示函数执行失败
return NULL;
}
.\numpy\numpy\_core\src\common\cblasfuncs.h
// 定义条件编译宏,用于避免重复包含该头文件
NPY_NO_EXPORT PyObject *
// 函数声明:计算矩阵乘积的CBLAS函数
cblas_matrixproduct(int, PyArrayObject *, PyArrayObject *, PyArrayObject *);
.\numpy\numpy\_core\src\common\common.hpp
/*
* 下面的 C++ 头文件可以独立安全使用,但它们被汇集在一起是为了我们和未来对支持预编译头文件的需求方便。
*/
.\numpy\numpy\_core\src\common\dlpack\dlpack.h
// 根据以下链接获取的代码片段:
// https://github.com/dmlc/dlpack/blob/bbd2f4d32427e548797929af08cfe2a9cbb3cf12/include/dlpack/dlpack.h
// 并在其中为 DLManagedTensorVersioned 添加了 typedef
/*!
* 版权声明,版权归 2017 年 Contributors 所有
* \file dlpack.h
* \brief DLPack 的公共头文件
*/
/**
* \brief 与 C++ 的兼容性宏定义
*/
/*! \brief DLPack 当前的主版本号 */
/*! \brief DLPack 当前的次版本号 */
/*! \brief DLPACK_DLL 在 Windows 下的前缀定义 */
extern "C" {
/*!
* \brief DLPack 的版本信息结构体
*
* 主版本号的改变表示 ABI 的数据布局发生了改变 - DLManagedTensorVersioned。
* 次版本号的改变表示添加了新的代码,比如新的设备类型,但 ABI 保持不变。
* 如果获取的 DLPack 张量的主版本号与此头文件中指定的版本号不一致
* (即 major != DLPACK_MAJOR_VERSION),消费者必须调用删除器函数
* (并且这样做是安全的)。不能访问其他字段,因为内存布局已经发生了改变。
*
* 在次版本号不匹配的情况下,只要消费者知道如何解释所有字段,张量就可以安全使用。
* 次版本号的更新表示添加了枚举值。
*/
typedef struct {
/*! \brief DLPack 的主版本号 */
uint32_t major;
/*! \brief DLPack 的次版本号 */
uint32_t minor;
} DLPackVersion;
/*!
* \brief DLDevice 中的设备类型枚举
*/
typedef enum : int32_t {
typedef enum {
/*!
* \brief CPU device
*/
kDLCPU = 1,
/*!
* \brief CUDA GPU device
*/
kDLCUDA = 2,
/*!
* \brief Pinned CUDA CPU memory by cudaMallocHost
*/
kDLCUDAHost = 3,
/*!
* \brief OpenCL devices.
*/
kDLOpenCL = 4,
/*!
* \brief Vulkan buffer for next generation graphics.
*/
kDLVulkan = 7,
/*!
* \brief Metal for Apple GPU.
*/
kDLMetal = 8,
/*!
* \brief Verilog simulator buffer
*/
kDLVPI = 9,
/*!
* \brief ROCm GPUs for AMD GPUs
*/
kDLROCM = 10,
/*!
* \brief Pinned ROCm CPU memory allocated by hipMallocHost
*/
kDLROCMHost = 11,
/*!
* \brief Reserved extension device type,
* used for quickly test extension device
* The semantics can differ depending on the implementation.
*/
kDLExtDev = 12,
/*!
* \brief CUDA managed/unified memory allocated by cudaMallocManaged
*/
kDLCUDAManaged = 13,
/*!
* \brief Unified shared memory allocated on a oneAPI non-partitioned
* device. Call to oneAPI runtime is required to determine the device
* type, the USM allocation type and the sycl context it is bound to.
*/
kDLOneAPI = 14,
/*!
* \brief GPU support for next generation WebGPU standard.
*/
kDLWebGPU = 15,
/*!
* \brief Qualcomm Hexagon DSP
*/
kDLHexagon = 16,
/*!
* \brief Microsoft MAIA devices
*/
kDLMAIA = 17,
} DLDeviceType;
/*!
* \brief A Device for Tensor and operator.
*/
typedef struct {
/*!
* \brief The device type used in the device.
*/
DLDeviceType device_type;
/*!
* \brief The device index.
* For vanilla CPU memory, pinned memory, or managed memory, this is set to 0.
*/
int32_t device_id;
} DLDevice;
/*!
* \brief The type code options DLDataType.
*/
typedef enum {
/*!
* \brief signed integer
*/
kDLInt = 0U,
/*!
* \brief unsigned integer
*/
kDLUInt = 1U,
/*!
* \brief IEEE floating point
*/
kDLFloat = 2U,
/*!
* \brief Opaque handle type, reserved for testing purposes.
* Frameworks need to agree on the handle data type for the exchange to be well-defined.
*/
kDLOpaqueHandle = 3U,
/*!
* \brief bfloat16
*/
kDLBfloat = 4U,
/*!
* \brief complex number
* (C/C++/Python layout: compact struct per complex number)
*/
kDLComplex = 5U,
/*!
* \brief boolean
*/
kDLBool = 6U,
} DLDataTypeCode;
/*!
* \brief The data type the tensor can hold. The data type is assumed to follow the
* native endian-ness. An explicit error message should be raised when attempting to
* export an array with non-native endianness
*
* Examples
* - float: type_code = 2, bits = 32, lanes = 1
* - float4(vectorized 4 float): type_code = 2, bits = 32, lanes = 4
* - int8: type_code = 0, bits = 8, lanes = 1
* - std::complex<float>: type_code = 5, bits = 64, lanes = 1
* - bool: type_code = 6, bits = 8, lanes = 1 (as per common array library convention, the underlying storage size of bool is 8 bits)
*/
/*!
* \brief 定义了描述张量数据类型的结构体。
* 包含了数据类型的基本信息:代码、位数和张量的通道数。
*/
typedef struct {
/*!
* \brief 基本类型的类型代码。
* 使用 uint8_t 类型而非 DLDataTypeCode 是为了减小内存占用,
* 但其值应为 DLDataTypeCode 枚举值之一。
*/
uint8_t code;
/*!
* \brief 位数,常见选择为 8、16、32。
*/
uint8_t bits;
/*! \brief 张量的通道数,用于向量类型。 */
uint16_t lanes;
} DLDataType;
/*!
* \brief 简单的 C 张量对象,不管理内存。
*/
typedef struct {
/*!
* \brief 数据指针指向已分配的数据。在 CUDA 设备上,这将是设备指针或 OpenCL 中的 cl_mem 句柄。
* 在某些设备类型上可能是不透明的。此指针始终按照 CUDA 的标准对齐到 256 字节。
* `byte_offset` 字段应用于指向数据的起始位置。
*
* 注意:截至 2021 年 11 月,多个库(如 CuPy、PyTorch、TensorFlow、TVM 等)在 CPU/CUDA/ROCm 上
* 不遵循这种 256 字节对齐的要求,并始终使用 `byte_offset=0`。这必须修复
* (修复后将更新此注释);目前建议不依赖于数据指针的正确对齐。
*
* 对于给定的 DLTensor,存储数据所需的内存大小计算如下:
*
* \code{.c}
* static inline size_t GetDataSize(const DLTensor* t) {
* size_t size = 1;
* for (tvm_index_t i = 0; i < t->ndim; ++i) {
* size *= t->shape[i];
* }
* size *= (t->dtype.bits * t->dtype.lanes + 7) / 8;
* return size;
* }
* \endcode
*
* 注意,如果张量的大小为零,则数据指针应设置为 `NULL`。
*/
void* data;
/*! \brief 张量所在设备 */
DLDevice device;
/*! \brief 张量的维数 */
int32_t ndim;
/*! \brief 指向数据类型的指针 */
DLDataType dtype;
/*! \brief 张量的形状 */
int64_t* shape;
/*!
* \brief 张量的步长(以元素数而非字节为单位)
* 可以为 NULL,表示张量是紧凑且按行主序排列。
*/
int64_t* strides;
/*! \brief 指向数据起始指针的字节偏移量 */
uint64_t byte_offset;
} DLTensor;
/*!
* \brief C 张量对象,管理 DLTensor 的内存。
* 此数据结构旨在通过另一个框架借用 DLTensor。它不用于传输张量。
* 当借用框架不再需要张量时,应调用 deleter 通知主机不再需要该资源。
*
* \note 此数据结构在 DLPack 交换中被用作 Legacy DLManagedTensor,并在 DLPack v0.8 后已弃用。
* 推荐使用 DLManagedTensorVersioned 替代。
* 此数据结构在未来版本中可能会被重命名或删除。
*
* \sa DLManagedTensorVersioned
*/
typedef struct DLManagedTensor {
/*! \brief DLTensor which is being memory managed */
DLTensor dl_tensor;
/*! \brief the context of the original host framework of DLManagedTensor in
* which DLManagedTensor is used in the framework. It can also be NULL.
*/
void * manager_ctx;
/*!
* \brief Destructor - this should be called
* to destruct the manager_ctx which backs the DLManagedTensor. It can be
* NULL if there is no way for the caller to provide a reasonable destructor.
* The destructor deletes the argument self as well.
*/
void (*deleter)(struct DLManagedTensor * self);
} DLManagedTensor;
// bit masks used in in the DLManagedTensorVersioned
/*! \brief bit mask to indicate that the tensor is read only. */
/*!
* \brief bit mask to indicate that the tensor is a copy made by the producer.
*
* If set, the tensor is considered solely owned throughout its lifetime by the
* consumer, until the producer-provided deleter is invoked.
*/
/*!
* \brief A versioned and managed C Tensor object, manage memory of DLTensor.
*
* This data structure is intended to facilitate the borrowing of DLTensor by
* another framework. It is not meant to transfer the tensor. When the borrowing
* framework doesn't need the tensor, it should call the deleter to notify the
* host that the resource is no longer needed.
*
* \note This is the current standard DLPack exchange data structure.
*/
typedef struct DLManagedTensorVersioned {
/*!
* \brief The API and ABI version of the current managed Tensor
*/
DLPackVersion version;
/*!
* \brief the context of the original host framework.
*
* Stores DLManagedTensorVersioned is used in the
* framework. It can also be NULL.
*/
void *manager_ctx;
/*!
* \brief Destructor.
*
* This should be called to destruct manager_ctx which holds the DLManagedTensorVersioned.
* It can be NULL if there is no way for the caller to provide a reasonable
* destructor. The destructor deletes the argument self as well.
*/
void (*deleter)(struct DLManagedTensorVersioned *self);
/*!
* \brief Additional bitmask flags information about the tensor.
*
* By default the flags should be set to 0.
*
* \note Future ABI changes should keep everything until this field
* stable, to ensure that deleter can be correctly called.
*
* \sa DLPACK_FLAG_BITMASK_READ_ONLY
* \sa DLPACK_FLAG_BITMASK_IS_COPIED
*/
uint64_t flags;
/*! \brief DLTensor which is being memory managed */
DLTensor dl_tensor;
} DLManagedTensorVersioned;
#ifdef __cplusplus
} // DLPACK_EXTERN_C
#endif
#endif // DLPACK_DLPACK_H_
.\numpy\numpy\_core\src\common\float_status.hpp
namespace np {
/// @addtogroup cpp_core_utility
/// @{
/**
* Class wraps floating-point environment operations,
* provides lazy access to its functionality.
*/
class FloatStatus {
public:
/*
* According to the C99 standard FE_DIVBYZERO, etc. may not be provided when
* unsupported. In such cases NumPy will not report these correctly, but we
* should still allow compiling (whether tests pass or not).
* By defining them as 0 locally, we make them no-ops. Unlike these defines,
* for example `musl` still defines all of the functions (as no-ops):
* https://git.musl-libc.org/cgit/musl/tree/src/fenv/fenv.c
* and does similar replacement in its tests:
* http://nsz.repo.hu/git/?p=libc-test;a=blob;f=src/common/mtest.h;h=706c1ba23ea8989b17a2f72ed1a919e187c06b6a;hb=HEAD
*/
// If FE_DIVBYZERO is defined in <fenv.h>, use its value; otherwise, use 0
static constexpr int kDivideByZero = FE_DIVBYZERO;
static constexpr int kDivideByZero = 0;
// If FE_INVALID is defined in <fenv.h>, use its value; otherwise, use 0
static constexpr int kInvalid = FE_INVALID;
static constexpr int kInvalid = 0;
// If FE_INEXACT is defined in <fenv.h>, use its value; otherwise, use 0
static constexpr int kInexact = FE_INEXACT;
static constexpr int kInexact = 0;
// If FE_OVERFLOW is defined in <fenv.h>, use its value; otherwise, use 0
static constexpr int kOverflow = FE_OVERFLOW;
static constexpr int kOverflow = 0;
// If FE_UNDERFLOW is defined in <fenv.h>, use its value; otherwise, use 0
static constexpr int kUnderflow = FE_UNDERFLOW;
static constexpr int kUnderflow = 0;
// Calculate the bitwise OR of all supported floating-point exceptions
static constexpr int kAllExcept = (kDivideByZero | kInvalid | kInexact |
kOverflow | kUnderflow);
// Constructor initializes the floating-point status
FloatStatus(bool clear_on_dst=true)
: clear_on_dst_(clear_on_dst)
{
// If any floating-point exceptions are supported, fetch the current status
if constexpr (kAllExcept != 0) {
fpstatus_ = fetestexcept(kAllExcept);
}
else {
fpstatus_ = 0;
}
}
// Destructor clears floating-point exceptions if required
~FloatStatus()
{
// If any floating-point exceptions are supported and set, clear them
if constexpr (kAllExcept != 0) {
if (fpstatus_ != 0 && clear_on_dst_) {
feclearexcept(kAllExcept);
}
}
}
// Check if Divide By Zero exception is set
constexpr bool IsDivideByZero() const
{
return (fpstatus_ & kDivideByZero) != 0;
}
// Check if Inexact exception is set
constexpr bool IsInexact() const
{
return (fpstatus_ & kInexact) != 0;
}
// Check if Invalid exception is set
constexpr bool IsInvalid() const
{
return (fpstatus_ & kInvalid) != 0;
}
// Check if Overflow exception is set
constexpr bool IsOverFlow() const
{
return (fpstatus_ & kOverflow) != 0;
}
// Check if Underflow exception is set
constexpr bool IsUnderFlow() const
{
return (fpstatus_ & kUnderflow) != 0;
}
// Raise Divide By Zero exception
static void RaiseDivideByZero()
{
if constexpr (kDivideByZero != 0) {
feraiseexcept(kDivideByZero);
}
}
// Raise Inexact exception
static void RaiseInexact()
{
if constexpr (kInexact != 0) {
feraiseexcept(kInexact);
}
}
// Raise Invalid exception
static void RaiseInvalid()
{
if constexpr (kInvalid != 0) {
feraiseexcept(kInvalid);
}
}
// End of class definition for FloatStatus
}
static void RaiseOverflow()
{
if constexpr (kOverflow != 0) {
feraiseexcept(kOverflow);
}
}
static void RaiseUnderflow()
{
if constexpr (kUnderflow != 0) {
feraiseexcept(kUnderflow);
}
}
private:
bool clear_on_dst_;
int fpstatus_;
};
/// @} cpp_core_utility
// 结束命名空间 np
} // namespace np
// 结束条件编译指令 NUMPY_CORE_SRC_COMMON_FLOAT_STATUS_HPP
.\numpy\numpy\_core\src\common\get_attr_string.h
// 定义内联函数,用于检查是否为基本的 Python 类型
static inline npy_bool
_is_basic_python_type(PyTypeObject *tp)
{
return (
/* 基本的数值类型 */
tp == &PyBool_Type ||
tp == &PyLong_Type ||
tp == &PyFloat_Type ||
tp == &PyComplex_Type ||
/* 基本的序列类型 */
tp == &PyList_Type ||
tp == &PyTuple_Type ||
tp == &PyDict_Type ||
tp == &PySet_Type ||
tp == &PyFrozenSet_Type ||
tp == &PyUnicode_Type ||
tp == &PyBytes_Type ||
/* 其他内建类型 */
tp == &PySlice_Type ||
tp == Py_TYPE(Py_None) ||
tp == Py_TYPE(Py_Ellipsis) ||
tp == Py_TYPE(Py_NotImplemented) ||
/* TODO: ndarray,但是在此处我们无法看到 PyArray_Type */
/* 结尾的哨兵,用于吸收末尾的 || */
NPY_FALSE
);
}
/*
* 查找特殊方法,遵循 Python 的查找方式,查找类型对象而不是实例本身。
*
* 假设特殊方法是特定于 numpy 的,因此不查看内建类型。但会检查基本的 ndarray 和 numpy 标量类型。
*
* 未来可以更像 _Py_LookupSpecial 的实现。
*/
static inline PyObject *
PyArray_LookupSpecial(PyObject *obj, PyObject *name_unicode)
{
PyTypeObject *tp = Py_TYPE(obj);
/* 不需要在简单类型上检查特殊属性 */
if (_is_basic_python_type(tp)) {
return NULL;
}
// 获取类型对象 tp 的特定属性
PyObject *res = PyObject_GetAttr((PyObject *)tp, name_unicode);
// 处理属性获取异常
if (res == NULL && PyErr_ExceptionMatches(PyExc_AttributeError)) {
PyErr_Clear();
}
return res;
}
/*
* PyArray_LookupSpecial_OnInstance:
*
* 实现了不正确的特殊方法查找规则,违反了 Python 的约定,查找实例而不是类型。
*
* 为了向后兼容而保留。未来应该弃用此功能。
*/
static inline PyObject *
PyArray_LookupSpecial_OnInstance(PyObject *obj, PyObject *name_unicode)
{
PyTypeObject *tp = Py_TYPE(obj);
/* 不需要在简单类型上检查特殊属性 */
if (_is_basic_python_type(tp)) {
return NULL;
}
// 获取实例对象 obj 的特定属性
PyObject *res = PyObject_GetAttr(obj, name_unicode);
// 处理属性获取异常
if (res == NULL && PyErr_ExceptionMatches(PyExc_AttributeError)) {
PyErr_Clear();
}
return res;
}
.\numpy\numpy\_core\src\common\gil_utils.c
// 定义宏:禁用已废弃的 NumPy API,并使用当前版本的 API
// 定义宏:用于多维数组模块
// 定义宏:启用 "PY_SSIZE_T_CLEAN",确保所有使用 Py_ssize_t 类型的 API 都能正确清除内存
// 引入 Python.h 头文件,包含了 Python C API 的核心功能
// 引入 numpy/ndarraytypes.h 头文件,该文件定义了 NumPy 的数组类型和相关的宏定义
// 引入 stdarg.h 头文件,提供了支持可变参数函数的宏定义和类型
NPY_NO_EXPORT void
npy_gil_error(PyObject *type, const char *format, ...)
{
va_list va;
va_start(va, format);
NPY_ALLOW_C_API_DEF;
NPY_ALLOW_C_API;
if (!PyErr_Occurred()) {
PyErr_FormatV(type, format, va);
PyObject *exc_str = PyUnicode_FromFormatV(format, va);
if (exc_str == NULL) {
// no reason to have special handling for this error case, since
// this function sets an error anyway
NPY_DISABLE_C_API;
va_end(va);
return;
}
PyErr_SetObject(type, exc_str);
Py_DECREF(exc_str);
}
NPY_DISABLE_C_API;
va_end(va);
}
// 函数定义:npy_gil_error,用于在释放全局解释器锁 (GIL) 期间出现错误时报告错误
NPY_NO_EXPORT void
npy_gil_error(PyObject *type, const char *format, ...)
{
// 定义可变参数列表
va_list va;
// 初始化可变参数列表
va_start(va, format);
// 定义并允许 NumPy C API 使用
NPY_ALLOW_C_API_DEF;
NPY_ALLOW_C_API;
// 如果没有已设置的异常
if (!PyErr_Occurred()) {
// 如果不是 PyPy 版本
// 使用格式化字符串和可变参数设置异常
PyErr_FormatV(type, format, va);
// 否则,根据格式化字符串和可变参数创建 Python Unicode 字符串对象
PyObject *exc_str = PyUnicode_FromFormatV(format, va);
// 如果创建失败
if (exc_str == NULL) {
// 没有特别处理此错误情况的原因,因为该函数无论如何都会设置错误
NPY_DISABLE_C_API;
// 结束可变参数列表
va_end(va);
// 返回
return;
}
// 将 Python 对象设置为异常对象
PyErr_SetObject(type, exc_str);
// 释放 Python 对象的引用计数
Py_DECREF(exc_str);
}
// 禁用 NumPy C API 使用
NPY_DISABLE_C_API;
// 结束可变参数列表
va_end(va);
}
.\numpy\numpy\_core\src\common\gil_utils.h
extern "C" {
// 声明一个不导出的函数,用于处理 GIL 错误,接受一个异常类型和格式化字符串参数
NPY_NO_EXPORT void
npy_gil_error(PyObject *type, const char *format, ...);
}
.\numpy\numpy\_core\src\common\half.hpp
// TODO(@seiko2plus):
// - covers half-precision operations that being supported by numpy/halffloat.h
// - add support for arithmetic operations
// - enables __fp16 causes massive FP exceptions on aarch64,
// needs a deep investigation
namespace np {
/// @addtogroup cpp_core_types
/// @{
/// Provides a type that implements 16-bit floating point (half-precision).
/// This type is ensured to be 16-bit size.
class Half final {
public:
/// Whether `Half` has a full native HW support.
static constexpr bool kNative = false;
/// Whether `Half` has a native HW support for single/double conversion.
template<typename T>
static constexpr bool kNativeConversion = (
(
std::is_same_v<T, float> &&
true
false
) || (
std::is_same_v<T, double> &&
true
false
)
);
/// Default constructor. initialize nothing.
Half() = default;
/// Construct from float
/// If there are no hardware optimization available, rounding will always
/// be set to ties to even.
explicit Half(float f)
{
// Load float `f` into SSE register
__m128 mf = _mm_load_ss(&f);
// Convert float to half-precision and store in bits_
bits_ = static_cast<uint16_t>(_mm_cvtsi128_si32(_mm_cvtps_ph(mf, _MM_FROUND_TO_NEAREST_INT)));
// Load float `f` into vector register
__vector float vf32 = vec_splats(f);
__vector unsigned short vf16;
// Convert float to half-precision using VSX3 instruction set
__asm__ __volatile__ ("xvcvsphp %x0,%x1" : "=wa" (vf16) : "wa" (vf32));
bits_ = vec_extract(vf16, 1); // Extract half-precision value
bits_ = vec_extract(vf16, 0); // Extract half-precision value
// Fallback: convert float `f` to half-precision using software implementation
bits_ = half_private::FromFloatBits(BitCast<uint32_t>(f));
}
/// Construct from double.
/// If there are no hardware optimization available, rounding will always
/// be set to ties to even.
explicit Half(double f)
{
// Load double `f` into SSE register
__m128d md = _mm_load_sd(&f);
// Convert double to half-precision and store in bits_
bits_ = static_cast<uint16_t>(_mm_cvtsi128_si32(_mm_castph_si128(_mm_cvtpd_ph(md))));
// Convert double `f` to half-precision using VSX3 instruction set
__asm__ __volatile__ ("xscvdphp %x0,%x1" : "=wa" (bits_) : "wa" (f));
// Fallback: convert double `f` to half-precision using software implementation
bits_ = half_private::FromDoubleBits(BitCast<uint64_t>(f));
}
/// Cast to float
explicit operator float() const
{
float ret;
// Convert half-precision to float
_mm_store_ss(&ret, _mm_cvtph_ps(_mm_cvtsi32_si128(bits_)));
return ret;
```
// 如果定义了 NPY_HAVE_VSX3 和 vec_extract_fp_from_shorth,使用它们来提取特定位数的浮点数
return vec_extract(vec_extract_fp_from_shorth(vec_splats(bits_)), 0);
// 如果定义了 NPY_HAVE_VSX3 和 NPY_HAVE_VSX_ASM,使用内联汇编来执行类型转换操作
__vector float vf32;
__asm__ __volatile__("xvcvhpsp %x0,%x1"
: "=wa"(vf32)
: "wa"(vec_splats(bits_)));
return vec_extract(vf32, 0);
// 否则,使用 BitCast 进行类型转换并返回结果
return BitCast<float>(half_private::ToFloatBits(bits_));
}
/// Cast to double
explicit operator double() const
{
// 如果定义了 NPY_HAVE_AVX512FP16,使用 AVX512 指令集进行半精度到双精度的转换
double ret;
_mm_store_sd(&ret, _mm_cvtph_pd(_mm_castsi128_ph(_mm_cvtsi32_si128(bits_))));
return ret;
// 如果定义了 NPY_HAVE_VSX3 和 NPY_HAVE_VSX3_HALF_DOUBLE,使用 VSX3 指令集进行半精度到双精度的转换
double f64;
__asm__ __volatile__("xscvhpdp %x0,%x1"
: "=wa"(f64)
: "wa"(bits_));
return f64;
// 否则,使用 BitCast 进行类型转换并返回结果
return BitCast<double>(half_private::ToDoubleBits(bits_));
}
/// Returns a new Half constructed from the IEEE 754 binary16.
static constexpr Half FromBits(uint16_t bits)
{
// 使用给定的 16 位表示构造一个新的 Half 类型对象
Half h{};
h.bits_ = bits;
return h;
}
/// Returns the IEEE 754 binary16 representation.
constexpr uint16_t Bits() const
{
// 返回当前 Half 对象的 16 位表示
return bits_;
}
/// @name Comparison operators (ordered)
/// @{
constexpr bool operator==(Half r) const
{
// 比较两个 Half 对象是否相等,NaN 时返回 false
return !(IsNaN() || r.IsNaN()) && Equal(r);
}
constexpr bool operator<(Half r) const
{
// 比较当前 Half 对象是否小于另一个 Half 对象,NaN 时返回 false
return !(IsNaN() || r.IsNaN()) && Less(r);
}
constexpr bool operator<=(Half r) const
{
// 比较当前 Half 对象是否小于等于另一个 Half 对象,NaN 时返回 false
return !(IsNaN() || r.IsNaN()) && LessEqual(r);
}
constexpr bool operator>(Half r) const
{
// 比较当前 Half 对象是否大于另一个 Half 对象,NaN 时返回通过反向比较结果
return r < *this;
}
constexpr bool operator>=(Half r) const
{
// 比较当前 Half 对象是否大于等于另一个 Half 对象,NaN 时返回通过反向比较结果
return r <= *this;
}
/// @}
/// @name Comparison operators (unordered)
/// @{
constexpr bool operator!=(Half r) const
{
// 比较两个 Half 对象是否不相等,NaN 时返回 true
return !(*this == r);
}
/// @} Comparison operators
/// @name Comparison with no guarantee of NaN behavior
/// @{
constexpr bool Less(Half r) const
{
// 比较两个 Half 对象的大小,忽略 NaN 的特殊处理
uint_fast16_t a = static_cast<uint_fast16_t>(bits_),
b = static_cast<uint_fast16_t>(r.bits_);
bool sign_a = (a & 0x8000u) == 0x8000u;
bool sign_b = (b & 0x8000u) == 0x8000u;
// 如果两个数符号相同
// 如果 a 有符号并且 a < b,则返回 true,并确保 a 不等于 b,以避免两者均为 +-0 的情况
// 如果 a 无符号并且 a < b,则返回 true
// 否则返回 false
// 如果两个数符号不同
// 如果 a 为负数,并且 a 和 b 不全为 0x8000u,则返回 true
// 否则返回 false
return (sign_a == sign_b) ? (sign_a ^ (a < b)) && (a != b)
: sign_a && ((a | b) != 0x8000u);
}
// 定义一个 constexpr 方法 LessEqual,用于比较当前 Half 对象与参数 r 的大小关系
constexpr bool LessEqual(Half r) const
{
// 将当前对象和参数 r 的 bits_ 转换为 uint_fast16_t 类型
uint_fast16_t a = static_cast<uint_fast16_t>(bits_),
b = static_cast<uint_fast16_t>(r.bits_);
// 检查当前对象和参数 r 的符号位
bool sign_a = (a & 0x8000u) == 0x8000u;
bool sign_b = (b & 0x8000u) == 0x8000u;
// 如果符号相同
// 测试 `a` > `b` 当 `a` 为正号时
// 或者 `a` < `b` 当 `a` 不是正号时
// 或者 a == b (即使上面使用了 <= 也需要测试 +-0 情况)
// 否则
// 测试 `a` 是否为正号
// 或者 `a` 和 `b` 是否都等于 +-0.0
return (sign_a == sign_b) ? (sign_a ^ (a < b)) || (a == b)
: sign_a || ((a | b) == 0x8000u);
}
// 定义一个 constexpr 方法 Equal,用于比较当前 Half 对象与参数 r 是否相等
constexpr bool Equal(Half r) const
{
// 不使用 fast16 转换,因为解包操作可能涉及
uint16_t a = bits_, b = r.bits_;
// 检查当前对象和参数 r 是否相等
return a == b || ((a | b) == 0x8000u);
}
/// @} Comparison
/// @name Properties
// @{
// 定义一个 constexpr 方法 IsNaN,用于检查当前 Half 对象是否表示 NaN
constexpr bool IsNaN() const
{
// 检查 bits_ 是否符合 NaN 的二进制表示条件
return ((bits_ & 0x7c00u) == 0x7c00u) &&
((bits_ & 0x03ffu) != 0);
}
/// @} Properties
private:
uint16_t bits_;
// 如果不是使用 IEEE 格式的 ARM 浮点数,定义一个名为 Half 的类
class Half final {
public:
// 表示此类使用本机格式
static constexpr bool kNative = true;
// 模板,用于检查 T 是否可以转换为本机格式
template<typename T>
static constexpr bool kNativeConversion = (
std::is_same_v<T, float> || std::is_same_v<T, double>
);
// 默认构造函数
Half() = default;
// 将 __fp16 转换为 Half 类型的构造函数
constexpr Half(__fp16 h) : half_(h)
{}
// 将 Half 类型转换为 __fp16 类型的隐式转换操作符
constexpr operator __fp16() const
{ return half_; }
// 根据给定的位模式创建 Half 类型对象
static Half FromBits(uint16_t bits)
{
Half h;
h.half_ = BitCast<__fp16>(bits);
return h;
}
// 返回当前 Half 对象的位模式
uint16_t Bits() const
{ return BitCast<uint16_t>(half_); }
// 比较当前对象与参数对象的大小关系
constexpr bool Less(Half r) const
{ return half_ < r.half_; }
// 比较当前对象与参数对象的大小关系(包含等于)
constexpr bool LessEqual(Half r) const
{ return half_ <= r.half_; }
// 检查当前对象是否等于参数对象
constexpr bool Equal(Half r) const
{ return half_ == r.half_; }
// 检查当前对象是否表示 NaN(Not a Number)
constexpr bool IsNaN() const
{ return half_ != half_; }
private:
// 内部存储的 __fp16 类型的数据
__fp16 half_;
};
/// @} cpp_core_types
} // namespace np
.\numpy\numpy\_core\src\common\half_private.hpp
/*
* The following functions that emulating float/double/half conversions
* are copied from npymath without any changes to its functionality.
*/
namespace np { namespace half_private {
template<bool gen_overflow=true, bool gen_underflow=true, bool round_even=true>
inline uint16_t FromFloatBits(uint32_t f)
{
uint32_t f_exp, f_sig;
uint16_t h_sgn, h_exp, h_sig;
// Extract the sign bit from the float representation and move it to the appropriate position in half precision
h_sgn = (uint16_t) ((f&0x80000000u) >> 16);
// Extract the exponent bits from the float representation
f_exp = (f&0x7f800000u);
/* Exponent overflow/NaN converts to signed inf/NaN */
if (f_exp >= 0x47800000u) {
if (f_exp == 0x7f800000u) {
/* Inf or NaN */
// Extract the significand bits from the float representation
f_sig = (f&0x007fffffu);
if (f_sig != 0) {
/* NaN - propagate the flag in the significand... */
// Convert NaN to half precision format while preserving the NaN flag
uint16_t ret = (uint16_t) (0x7c00u + (f_sig >> 13));
/* ...but make sure it stays a NaN */
if (ret == 0x7c00u) {
ret++;
}
return h_sgn + ret;
} else {
/* signed inf */
return (uint16_t) (h_sgn + 0x7c00u);
}
} else {
if constexpr (gen_overflow) {
/* overflow to signed inf */
// Raise overflow exception if enabled
FloatStatus::RaiseOverflow();
}
return (uint16_t) (h_sgn + 0x7c00u);
}
}
/* Exponent underflow converts to a subnormal half or signed zero */
// 检查浮点数是否小于或等于 0x38000000u
if (f_exp <= 0x38000000u) {
/*
* Signed zeros, subnormal floats, and floats with small
* exponents all convert to signed zero half-floats.
*/
// 处理有符号零、次正规化浮点数和小指数浮点数,它们都转换为有符号零的半精度浮点数
if (f_exp < 0x33000000u) {
if constexpr (gen_underflow) {
/* 如果 f 不等于 0,则发生下溢到 0 */
if ((f&0x7fffffff) != 0) {
FloatStatus::RaiseUnderflow();
}
}
// 返回半精度浮点数的符号部分
return h_sgn;
}
/* 构造次正规化数的尾数 */
f_exp >>= 23;
f_sig = (0x00800000u + (f&0x007fffffu));
if constexpr (gen_underflow) {
/* 如果它不是精确表示,则发生下溢 */
if ((f_sig&(((uint32_t)1 << (126 - f_exp)) - 1)) != 0) {
FloatStatus::RaiseUnderflow();
}
}
/*
* 通常尾数向右移动 13 位。对于次正规化数,需要额外的移动。
* 这个移动是为了在最大指数给出次正规化的情况下 `f_exp = 0x38000000 >> 23 = 112`,
* 这会偏移新的第一位。最多移动 1+10 位。
*/
f_sig >>= (113 - f_exp);
/* 处理四舍五入,通过将超过半精度的位数加 1 */
if constexpr (round_even) {
/*
* 如果半精度尾数的最后一位是 0(已经是偶数),并且剩余的位模式是 1000...0,
* 则我们不会向半精度尾数后的位添加一。然而,(113 - f_exp) 移位最多可能丢失 11 位,
* 所以 || 检查它们在原始中。在所有其他情况下,我们可以直接加一。
*/
if (((f_sig&0x00003fffu) != 0x00001000u) || (f&0x000007ffu)) {
f_sig += 0x00001000u;
}
}
else {
f_sig += 0x00001000u;
}
h_sig = (uint16_t) (f_sig >> 13);
/*
* 如果舍入导致一个位溢出到 h_exp,它将从零增加到一,并且 h_sig 将为零。
* 这是正确的结果。
*/
return (uint16_t) (h_sgn + h_sig);
}
/* 常规情况,没有溢出或下溢 */
h_exp = (uint16_t) ((f_exp - 0x38000000u) >> 13);
/* 处理四舍五入,通过将超过半精度的位数加 1 */
f_sig = (f&0x007fffffu);
if constexpr (round_even) {
/*
* 如果半精度尾数的最后一位是 0(已经是偶数),并且剩余的位模式是 1000...0,
* 则我们不会向半精度尾数后的位添加一。在所有其他情况下,我们会添加一。
*/
if ((f_sig&0x00003fffu) != 0x00001000u) {
f_sig += 0x00001000u;
}
}
else {
f_sig += 0x00001000u;
}
h_sig = (uint16_t) (f_sig >> 13);
/*
* 如果四舍五入导致一个位溢出到 h_exp 中,h_exp 将增加一,并且 h_sig 将变为零。
* 这是正确的结果。h_exp 最多可能增加到 15,此时结果将溢出到有符号的无穷大。
*/
if constexpr (gen_overflow) {
// 如果生成的结果溢出,将 h_sig 加到 h_exp 上
h_sig += h_exp;
// 如果 h_sig 等于 0x7c00u(十进制为 31744),表示结果溢出到正无穷大
if (h_sig == 0x7c00u) {
// 触发浮点溢出状态
FloatStatus::RaiseOverflow();
}
// 返回带符号的结果:符号位 + h_sig
return h_sgn + h_sig;
}
else {
// 返回带符号的结果:符号位 + h_exp + h_sig
return h_sgn + h_exp + h_sig;
}
// 从双精度浮点数表示中提取出半精度浮点数
template<bool gen_overflow=true, bool gen_underflow=true, bool round_even=true>
inline uint16_t FromDoubleBits(uint64_t d)
{
uint64_t d_exp, d_sig;
uint16_t h_sgn, h_exp, h_sig;
// 提取符号位并右移至对应半精度浮点数的位置
h_sgn = (d & 0x8000000000000000ULL) >> 48;
// 提取双精度浮点数的指数位
d_exp = (d & 0x7ff0000000000000ULL);
/* Exponent overflow/NaN converts to signed inf/NaN */
// 如果指数大于等于最大半精度浮点数的指数范围,则处理溢出或 NaN
if (d_exp >= 0x40f0000000000000ULL) {
if (d_exp == 0x7ff0000000000000ULL) {
/* Inf or NaN */
// 提取双精度浮点数的有效位
d_sig = (d & 0x000fffffffffffffULL);
if (d_sig != 0) {
/* NaN - 将标志传播到尾数... */
uint16_t ret = (uint16_t)(0x7c00u + (d_sig >> 42));
/* ...但确保它仍然是 NaN */
if (ret == 0x7c00u) {
ret++;
}
return h_sgn + ret;
} else {
/* signed inf */
// 返回带符号的无穷大
return h_sgn + 0x7c00u;
}
} else {
/* overflow to signed inf */
// 指数溢出转换为带符号的无穷大
if constexpr (gen_overflow) {
FloatStatus::RaiseOverflow(); // 如果允许溢出生成,则引发溢出状态
}
return h_sgn + 0x7c00u;
}
}
/* Exponent underflow converts to subnormal half or signed zero */
// 指数下溢转换为次正规化的半精度浮点数或带符号零
// 如果双精度浮点数的指数小于或等于 0x3f00000000000000ULL
if (d_exp <= 0x3f00000000000000ULL) {
/*
* 对于有符号零、次标准浮点数和指数较小的浮点数,它们都转换为有符号零的半精度浮点数。
*/
if (d_exp < 0x3e60000000000000ULL) {
// 如果生成下溢时为真
if constexpr (gen_underflow) {
/* 如果 d 不等于 0,则它下溢到 0 */
if ((d&0x7fffffffffffffffULL) != 0) {
FloatStatus::RaiseUnderflow();
}
}
// 返回有符号的零半精度浮点数
return h_sgn;
}
/* 构造次标准的尾数 */
d_exp >>= 52;
d_sig = (0x0010000000000000ULL + (d&0x000fffffffffffffULL));
// 如果生成下溢时为真
if constexpr (gen_underflow) {
/* 如果它不能被精确表示,则它下溢了 */
if ((d_sig&(((uint64_t)1 << (1051 - d_exp)) - 1)) != 0) {
FloatStatus::RaiseUnderflow();
}
}
/*
* 不像浮点数,双精度浮点数有足够的空间将尾数左移以对齐次标准尾数,不会丢失最后的位。
* 给出次标准的最小可能指数是:
* `d_exp = 0x3e60000000000000 >> 52 = 998`。所有更大的次标准都相对于它做了偏移。
* 这在与正常分支中的右移比较时增加了 10+1 位的偏移。
*/
assert(d_exp - 998 >= 0);
d_sig <<= (d_exp - 998);
/* 通过在超过半精度后添加 1 来处理舍入 */
if constexpr (round_even) {
/*
* 如果半精度尾数中的最后一位是 0(已经是偶数),并且剩余的位模式是 1000...0,
* 那么我们不在半精度尾数后面加一。在所有其他情况下,我们加一。
*/
if ((d_sig&0x003fffffffffffffULL) != 0x0010000000000000ULL) {
d_sig += 0x0010000000000000ULL;
}
}
else {
d_sig += 0x0010000000000000ULL;
}
h_sig = (uint16_t) (d_sig >> 53);
/*
* 如果舍入导致位溢出到 h_exp,它将从零增加到一,并且 h_sig 将为零。
* 这是正确的结果。
*/
return h_sgn + h_sig;
}
/* 普通情况,没有溢出或下溢 */
h_exp = (uint16_t) ((d_exp - 0x3f00000000000000ULL) >> 42);
/* 通过在超过半精度后添加 1 来处理舍入 */
d_sig = (d&0x000fffffffffffffULL);
if constexpr (round_even) {
/*
* 如果半精度尾数中的最后一位是 0(已经是偶数),并且剩余的位模式是 1000...0,
* 那么我们不在半精度尾数后面加一。在所有其他情况下,我们加一。
*/
if ((d_sig&0x000007ffffffffffULL) != 0x0000020000000000ULL) {
d_sig += 0x0000020000000000ULL;
}
}
else {
d_sig += 0x0000020000000000ULL;
}
// 将 d_sig 右移 42 位,并将结果转换为 uint16_t 类型,存入 h_sig 中
h_sig = (uint16_t) (d_sig >> 42);
/*
* 如果舍入导致一个比特溢出到 h_exp 中,h_exp 将增加一,并且 h_sig 将变为零。
* 这是正确的结果。h_exp 最多可能增加到 15,此时结果会溢出为带符号的无穷大。
*/
// 如果编译时条件 gen_overflow 为真
if constexpr (gen_overflow) {
// 将 h_sig 与 h_exp 相加
h_sig += h_exp;
// 如果 h_sig 等于 0x7c00u
if (h_sig == 0x7c00u) {
// 触发浮点溢出异常
FloatStatus::RaiseOverflow();
}
// 返回 h_sgn + h_sig 的结果
return h_sgn + h_sig;
}
else {
// 返回 h_sgn + h_exp + h_sig 的结果
return h_sgn + h_exp + h_sig;
}
constexpr uint32_t ToFloatBits(uint16_t h)
{
// 提取 h 的指数部分
uint16_t h_exp = (h&0x7c00u);
// 提取 h 的符号位,并左移 16 位
uint32_t f_sgn = ((uint32_t)h&0x8000u) << 16;
switch (h_exp) {
case 0x0000u: { // 0 或者亚正规化数
// 提取 h 的尾数部分
uint16_t h_sig = (h&0x03ffu);
// 如果尾数部分为零,返回符号位
if (h_sig == 0) {
return f_sgn;
}
// 亚正规化数,尾数部分左移一位直到最高位为 1
h_sig <<= 1;
while ((h_sig&0x0400u) == 0) {
h_sig <<= 1;
h_exp++;
}
// 计算浮点数的指数部分,尾数部分,和符号位
uint32_t f_exp = ((uint32_t)(127 - 15 - h_exp)) << 23;
uint32_t f_sig = ((uint32_t)(h_sig&0x03ffu)) << 13;
return f_sgn + f_exp + f_sig;
}
case 0x7c00u: // 无穷大或者 NaN
// 全 1 的指数部分和尾数部分的拷贝
return f_sgn + 0x7f800000u + (((uint32_t)(h&0x03ffu)) << 13);
default: // 规格化数
// 只需要调整指数并左移
return f_sgn + (((uint32_t)(h&0x7fffu) + 0x1c000u) << 13);
}
}
constexpr uint64_t ToDoubleBits(uint16_t h)
{
// 提取 h 的指数部分
uint16_t h_exp = (h&0x7c00u);
// 提取 h 的符号位,并左移 48 位
uint64_t d_sgn = ((uint64_t)h&0x8000u) << 48;
switch (h_exp) {
case 0x0000u: { // 0 或者亚正规化数
// 提取 h 的尾数部分
uint16_t h_sig = (h&0x03ffu);
// 如果尾数部分为零,返回符号位
if (h_sig == 0) {
return d_sgn;
}
// 亚正规化数,尾数部分左移一位直到最高位为 1
h_sig <<= 1;
while ((h_sig&0x0400u) == 0) {
h_sig <<= 1;
h_exp++;
}
// 计算双精度浮点数的指数部分,尾数部分,和符号位
uint64_t d_exp = ((uint64_t)(1023 - 15 - h_exp)) << 52;
uint64_t d_sig = ((uint64_t)(h_sig&0x03ffu)) << 42;
return d_sgn + d_exp + d_sig;
}
case 0x7c00u: // 无穷大或者 NaN
// 全 1 的指数部分和尾数部分的拷贝
return d_sgn + 0x7ff0000000000000ULL + (((uint64_t)(h&0x03ffu)) << 42);
default: // 规格化数
// 只需要调整指数并左移
return d_sgn + (((uint64_t)(h&0x7fffu) + 0xfc000u) << 42);
}
}
.\numpy\numpy\_core\src\common\lowlevel_strided_loops.h
// 检查是否定义了 NUMPY_CORE_SRC_COMMON_LOWLEVEL_STRIDED_LOOPS_H_,如果没有则定义
// 宏定义,用于导入相关头文件
// 导入通用头文件
// 导入 numpy 配置文件
// 导入数组方法头文件
// 导入数据类型转换头文件
// 导入内存重叠头文件
// 导入映射头文件
/* For PyArray_ macros used below */
// 导入 ndarray 对象头文件
/*
* 注意:此 API 目前应保持私有,以便进行进一步的细化。
* 我认为 'aligned' 机制需要进行更改,例如。
*
* 注意:2018 年进行了更新,以区分 "true" 和 "uint" 对齐。
*/
/*
* 此函数指针用于输入任意跨步的一维数组段并输出相同大小的任意跨步的数组段的一元操作。
* 当步长或项目大小具有特定已知值时,它可以是完全通用的函数或专用函数。
*
* 一元操作的示例包括直接复制、字节交换和强制转换操作,
*
* 'transferdata' 参数略微特殊,遵循在 ndarraytypes.h 中定义的通用辅助数据模式
* 使用 NPY_AUXDATA_CLONE 和 NPY_AUXDATA_FREE 处理这些数据。
*/
// TODO: FIX! That comment belongs to something now in array-method
/*
* 这是用于指向与 PyArrayMethod_StridedLoop 完全相同的函数指针,
* 但具有额外的掩码来控制被转换的值。
*
* TODO:我们应该将这个掩码“功能”移动到 ArrayMethod 本身去
* 可能。虽然对于 NumPy 内部的事情来说,这个方法运作良好,并且暴露它应该经过深思熟虑,
* 以便在可能的情况下有用于 NumPy 之外。
*
* 特别地,如果 mask[i*mask_stride] 为真,则对第 'i' 元素进行操作。
*/
typedef int (PyArray_MaskedStridedUnaryOp)(
PyArrayMethod_Context *context, char *const *args,
const npy_intp *dimensions, const npy_intp *strides,
npy_bool *mask, npy_intp mask_stride,
NpyAuxData *auxdata);
/*
* 返回一个指向用于复制步进内存的专用函数的函数指针。如果输入出现问题,则返回 NULL。
*
* aligned:
* 如果 src 和 dst 指针总是指向与 dtype->elsize 相等的 uint 对齐的位置,则为1,否则为0。
* src_stride:
* 如果 src 步幅总是相同的,则应为 src 步幅,否则为 NPY_MAX_INTP。
* dst_stride:
* 如果 dst 步幅总是相同的,则应为 dst 步幅,否则为 NPY_MAX_INTP。
* itemsize:
* 如果项目大小总是相同的,则应为项目大小,否则为0。
*
*/
// 导出函数
NPY_NO_EXPORT PyArrayMethod_StridedLoop *
PyArray_GetStridedCopyFn(int aligned,
npy_intp src_stride, npy_intp dst_stride,
npy_intp itemsize);
/*
* 返回一个指向特定函数的函数指针,用于复制和交换步进内存。假设每个元素都是单个需要交换的值。
* 这个函数假设了步进内存是对齐的,并使用了给定的 src_stride 和 dst_stride 参数。
*
* 参数和 PyArray_GetStridedCopyFn 中描述的一样。
*/
NPY_NO_EXPORT PyArrayMethod_StridedLoop *
PyArray_GetStridedCopySwapFn(int aligned,
npy_intp src_stride, npy_intp dst_stride,
npy_intp itemsize);
/*
* 返回一个指向特定函数的函数指针,用于复制和交换步进内存。假设每个元素是一对需要交换的值。
* 这个函数假设了步进内存是对齐的,并使用了给定的 src_stride 和 dst_stride 参数。
*
* 参数和 PyArray_GetStridedCopyFn 中描述的一样。
*/
NPY_NO_EXPORT PyArrayMethod_StridedLoop *
PyArray_GetStridedCopySwapPairFn(int aligned,
npy_intp src_stride, npy_intp dst_stride,
npy_intp itemsize);
/*
* 返回一个传输函数和传输数据对,将数据从源复制到目标,如果数据不适合,则截断它,并且如果空间过多,则用零字节填充。
* 这个函数假设了步进内存是对齐的,并使用了给定的 src_stride 和 dst_stride 参数。
*
* 参数和 PyArray_GetStridedCopyFn 中描述的一样。
*
* 返回 NPY_SUCCEED 或 NPY_FAIL。
*/
NPY_NO_EXPORT int
PyArray_GetStridedZeroPadCopyFn(int aligned, int unicode_swap,
npy_intp src_stride, npy_intp dst_stride,
npy_intp src_itemsize, npy_intp dst_itemsize,
PyArrayMethod_StridedLoop **outstransfer,
NpyAuxData **outtransferdata);
/*
* 对于内置数值类型之间的转换,返回一个函数指针,用于从 src_type_num 转换到 dst_type_num。
* 如果不支持某个转换,则返回 NULL 而不设置 Python 异常。
*/
NPY_NO_EXPORT PyArrayMethod_StridedLoop *
PyArray_GetStridedNumericCastFn(int aligned,
npy_intp src_stride, npy_intp dst_stride,
int src_type_num, int dst_type_num);
/*
* 获取一个操作,该操作复制给定 dtype 的元素,如果 dtype 不是 NBO 则进行字节顺序交换。
*
* 返回 NPY_SUCCEED 或 NPY_FAIL。
*/
NPY_NO_EXPORT int
PyArray_GetDTypeCopySwapFn(int aligned,
npy_intp src_stride, npy_intp dst_stride,
PyArray_Descr *dtype,
PyArrayMethod_StridedLoop **outstransfer,
NpyAuxData **outtransferdata);
/*
* If it's possible, gives back a transfer function which casts and/or
* byte swaps data with the dtype 'src_dtype' into data with the dtype
* 'dst_dtype'. If the outtransferdata is populated with a non-NULL value,
* it must be deallocated with the NPY_AUXDATA_FREE
* function when the transfer function is no longer required.
*
* aligned:
* Should be 1 if the src and dst pointers always point to
* locations at which a uint of equal size to dtype->elsize
* would be aligned, 0 otherwise.
* src_stride:
* Should be the src stride if it will always be the same,
* NPY_MAX_INTP otherwise.
* dst_stride:
* Should be the dst stride if it will always be the same,
* NPY_MAX_INTP otherwise.
* src_dtype:
* The data type of source data. Must not be NULL.
* dst_dtype:
* The data type of destination data. If this is NULL and
* move_references is 1, a transfer function which decrements
* source data references is produced.
* move_references:
* If 0, the destination data gets new reference ownership.
* If 1, the references from the source data are moved to
* the destination data.
* cast_info:
* A pointer to an (uninitialized) `NPY_cast_info` struct, the caller
* must call `NPY_cast_info_xfree` on it (except on error) and handle
* its memory livespan.
* out_needs_api:
* If this is non-NULL, and the transfer function produced needs
* to call into the (Python) API, this gets set to 1. This
* remains untouched if no API access is required.
*
* WARNING: If you set move_references to 1, it is best that src_stride is
* never zero when calling the transfer function. Otherwise, the
* first destination reference will get the value and all the rest
* will get NULL.
*
* Returns NPY_SUCCEED or NPY_FAIL.
*/
NPY_NO_EXPORT int
PyArray_GetDTypeTransferFunction(int aligned,
npy_intp src_stride, npy_intp dst_stride,
PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype,
int move_references,
NPY_cast_info *cast_info,
NPY_ARRAYMETHOD_FLAGS *out_flags);
/*
* If it's possible, gives back a transfer function which copies fields
* from one structured dtype to another, optionally moving references.
*
* aligned:
* Should be 1 if the src and dst pointers always point to
* locations at which a uint of equal size to dtype->elsize
* would be aligned, 0 otherwise.
* src_stride:
* Should be the src stride if it will always be the same,
* NPY_MAX_INTP otherwise.
* dst_stride:
* Should be the dst stride if it will always be the same,
* NPY_MAX_INTP otherwise.
* src_dtype:
* The data type of source data. Must not be NULL.
* dst_dtype:
* The data type of destination data. If this is NULL and
* move_references is 1, a transfer function which decrements
* source data references is produced.
* move_references:
* If 0, the destination data gets new reference ownership.
* If 1, the references from the source data are moved to
* the destination data.
* out_stransfer:
* A pointer to a `PyArrayMethod_StridedLoop` struct pointer,
* which will contain the transfer function upon successful return.
* out_transferdata:
* A pointer to an `NpyAuxData` struct pointer, which will
* contain auxiliary data needed for the transfer function.
* out_flags:
* Pointer to `NPY_ARRAYMETHOD_FLAGS` where the flags describing
* the transfer function will be stored.
*
* Returns NPY_SUCCEED or NPY_FAIL.
*/
NPY_NO_EXPORT int
get_fields_transfer_function(int aligned,
npy_intp src_stride, npy_intp dst_stride,
PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype,
int move_references,
PyArrayMethod_StridedLoop **out_stransfer,
NpyAuxData **out_transferdata,
NPY_ARRAYMETHOD_FLAGS *out_flags);
/*
* If it's possible, gives back a transfer function which handles
* subarray assignment from one structured dtype to another,
* optionally moving references.
*
* aligned:
* Should be 1 if the src and dst pointers always point to
* locations at which a uint of equal size to dtype->elsize
* would be aligned, 0 otherwise.
* src_stride:
* Should be the src stride if it will always be the same,
* NPY_MAX_INTP otherwise.
* dst_stride:
* Should be the dst stride if it will always be the same,
* NPY_MAX_INTP otherwise.
* src_dtype:
* The data type of source data. Must not be NULL.
* dst_dtype:
* The data type of destination data. If this is NULL and
* move_references is 1, a transfer function which decrements
* source data references is produced.
* move_references:
* If 0, the destination data gets new reference ownership.
* If 1, the references from the source data are moved to
* the destination data.
* out_stransfer:
* A pointer to a `PyArrayMethod_StridedLoop` struct pointer,
* which will contain the transfer function upon successful return.
* out_transferdata:
* A pointer to an `NpyAuxData` struct pointer, which will
* contain auxiliary data needed for the transfer function.
* out_flags:
* Pointer to `NPY_ARRAYMETHOD_FLAGS` where the flags describing
* the transfer function will be stored.
*
* Returns NPY_SUCCEED or NPY_FAIL.
*/
NPY_NO_EXPORT int
get_subarray_transfer_function(int aligned,
npy_intp src_stride, npy_intp dst_stride,
PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype,
int move_references,
PyArrayMethod_StridedLoop **out_stransfer,
NpyAuxData **out_transferdata,
NPY_ARRAYMETHOD_FLAGS *out_flags);
/*
* This is identical to PyArray_GetDTypeTransferFunction, but returns a
* transfer function which also takes a mask as a parameter. The mask is used
* to determine which values to copy, and data is transferred exactly when
* mask[i*mask_stride] is true.
*
* If move_references is true, values which are not copied to the
* destination will still have their source reference decremented.
*
* If mask_dtype is NPY_BOOL or NPY_UINT8, each full element is either
* transferred or not according to the mask as described above. If
* dst_dtype and mask_dtype are both struct dtypes, their names must
* match exactly, and the dtype of each leaf field in mask_dtype must
* be either NPY_BOOL or NPY_UINT8.
*/
NPY_NO_EXPORT int
PyArray_GetMaskedDTypeTransferFunction(int aligned,
npy_intp src_stride,
npy_intp dst_stride,
npy_intp mask_stride,
PyArray_Descr *src_dtype,
PyArray_Descr *dst_dtype,
PyArray_Descr *mask_dtype,
int move_references,
NPY_cast_info *cast_info,
NPY_ARRAYMETHOD_FLAGS *out_flags);
/*
* Casts the specified number of elements from 'src' with data type
* 'src_dtype' to 'dst' with 'dst_dtype'. See
* PyArray_GetDTypeTransferFunction for more details.
*
* Returns NPY_SUCCEED or NPY_FAIL.
*/
NPY_NO_EXPORT int
PyArray_CastRawArrays(npy_intp count,
char *src, char *dst,
npy_intp src_stride, npy_intp dst_stride,
PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype,
int move_references);
/*
* 这两个函数用于复制或转换一个n维数组的数据到/从一个一维跨步缓冲区中。
* 这些函数仅会使用提供的dst_stride/src_stride和dst_strides[0]/src_strides[0]调用'stransfer',
* 因此调用者可以使用这些值来专门化函数。
* 注意,即使ndim == 0,也需要设置所有内容,就好像ndim == 1一样。
*
* 返回值是无法复制的元素数量。返回值为0表示已复制所有元素,返回值大于0表示在复制'count'个元素之前到达了n维数组的末尾。
* 返回值为负表示发生了错误。
*
* ndim:
* n维数组的维度数。
* dst/src/mask:
* 目标、源或掩码的起始指针。
* dst_stride/src_stride/mask_stride:
* 一维跨步缓冲区的跨步。
* dst_strides/src_strides:
* n维数组的跨步。
* dst_strides_inc/src_strides_inc:
* 添加到..._strides指针以获取下一个跨步的增量。
* coords:
* n维数组中的起始坐标。
* coords_inc:
* 添加到坐标指针以获取下一个坐标的增量。
* shape:
* n维数组的形状。
* shape_inc:
* 添加到形状指针以获取下一个形状条目的增量。
* count:
* 要传输的元素数量。
* src_itemsize:
* 每个元素的大小。如果在不同大小的元素之间传输(例如强制转换操作),则'stransfer'函数应针对此进行专门化,
* 在这种情况下,'stransfer'将使用此参数作为源项大小。
* cast_info:
* 指向NPY_cast_info结构的指针,该结构概述了执行强制转换所需的所有信息。
*/
NPY_NO_EXPORT npy_intp
PyArray_TransferNDimToStrided(npy_intp ndim,
char *dst, npy_intp dst_stride,
char *src, npy_intp const *src_strides, npy_intp src_strides_inc,
npy_intp const *coords, npy_intp coords_inc,
npy_intp const *shape, npy_intp shape_inc,
npy_intp count, npy_intp src_itemsize,
NPY_cast_info *cast_info);
/*
* 将一维跨步缓冲区的数据传输到n维数组中的这个函数。
* 这些函数仅会使用提供的dst_strides/src_strides和dst_strides_inc调用'stransfer',
* 因此调用者可以使用这些值来专门化函数。
*
* 返回值是无法复制的元素数量。返回值为0表示已复制所有元素,返回值大于0表示在复制'count'个元素之前到达了n维数组的末尾。
* 返回值为负表示发生了错误。
*
* ndim:
* n维数组的维度数。
* dst/src/mask:
* 目标、源或掩码的起始指针。
* dst_strides/src_strides:
* n维数组的跨步。
* dst_strides_inc:
* 添加到dst_strides指针以获取下一个跨步的增量。
* src_stride:
* 一维跨步缓冲区的跨步。
* coords:
* n维数组中的起始坐标。
* coords_inc:
* 添加到坐标指针以获取下一个坐标的增量。
* shape:
* n维数组的形状。
* shape_inc:
* 添加到形状指针以获取下一个形状条目的增量。
* count:
* 要传输的元素数量。
* src_itemsize:
* 每个元素的大小。如果在不同大小的元素之间传输(例如强制转换操作),则'stransfer'函数应针对此进行专门化,
* 在这种情况下,'stransfer'将使用此参数作为源项大小。
* cast_info:
* 指向NPY_cast_info结构的指针,该结构概述了执行强制转换所需的所有信息。
*/
NPY_NO_EXPORT npy_intp
PyArray_TransferStridedToNDim(npy_intp ndim,
char *dst, npy_intp const *dst_strides, npy_intp dst_strides_inc,
char *src, npy_intp src_stride,
npy_intp const *coords, npy_intp coords_inc,
npy_intp const *shape, npy_intp shape_inc,
npy_intp count, npy_intp src_itemsize,
NPY_cast_info *cast_info);
/*
* Transfers data from a strided array to an N-dimensional array with optional masking.
* Coordinates, strides, shapes, and casting information are provided to control the transfer.
* Returns void.
*/
PyArray_TransferMaskedStridedToNDim(npy_intp ndim,
char *dst, npy_intp const *dst_strides, npy_intp dst_strides_inc,
char *src, npy_intp src_stride,
npy_bool *mask, npy_intp mask_stride,
npy_intp const *coords, npy_intp coords_inc,
npy_intp const *shape, npy_intp shape_inc,
npy_intp count, npy_intp src_itemsize,
NPY_cast_info *cast_info);
/*
* Retrieves values from a simple mapping iterator at specified indices, storing results.
* Handles alignment and casting information provided.
* Returns an integer indicating success (0) or failure (-1).
*/
NPY_NO_EXPORT int
mapiter_trivial_get(
PyArrayObject *self, PyArrayObject *ind, PyArrayObject *result,
int is_aligned, NPY_cast_info *cast_info);
/*
* Sets values into a simple mapping iterator at specified indices, using given data.
* Handles alignment and casting information provided.
* Returns an integer indicating success (0) or failure (-1).
*/
NPY_NO_EXPORT int
mapiter_trivial_set(
PyArrayObject *self, PyArrayObject *ind, PyArrayObject *result,
int is_aligned, NPY_cast_info *cast_info);
/*
* Retrieves values from a more complex mapping iterator object.
* Handles casting and alignment considerations, along with specified flags.
* Returns an integer indicating success (0) or failure (-1).
*/
NPY_NO_EXPORT int
mapiter_get(
PyArrayMapIterObject *mit, NPY_cast_info *cast_info,
NPY_ARRAYMETHOD_FLAGS flags, int is_aligned);
/*
* Sets values into a more complex mapping iterator object.
* Handles casting and alignment considerations, along with specified flags.
* Returns an integer indicating success (0) or failure (-1).
*/
NPY_NO_EXPORT int
mapiter_set(
PyArrayMapIterObject *mit, NPY_cast_info *cast_info,
NPY_ARRAYMETHOD_FLAGS flags, int is_aligned);
/*
* Prepares shape and strides for a simple raw array iteration.
* Orders strides into FORTRAN order, reverses negative strides, and coalesces axes where possible.
* Returns 0 on success, -1 on failure.
*
* Intended for lightweight iteration over raw arrays without PyArrayObject buffering.
* Used together with NPY_RAW_ITER_START and NPY_RAW_ITER_ONE_NEXT for loop handling.
*/
NPY_NO_EXPORT int
PyArray_PrepareOneRawArrayIter(int ndim, npy_intp const *shape,
char *data, npy_intp const *strides,
int *out_ndim, npy_intp *out_shape,
char **out_data, npy_intp *out_strides);
/*
* Prepares shape and strides for two operands' raw array iteration.
* Only uses strides of the first operand for dimension reordering.
* Returns 0 on success, -1 on failure.
*
* Assumes operands have already been broadcasted.
* Used with NPY_RAW_ITER_START and NPY_RAW_ITER_TWO_NEXT for loop handling.
*/
NPY_NO_EXPORT int
/*
* 准备三个原始数组的迭代器,用于处理具有三个操作数的情况。
* 在调用此函数之前,应该已经完成三个操作数的广播,因为ndim和shape只针对所有操作数一次性指定。
*
* 只使用第一个操作数的步幅来重新排序维度,不考虑所有步幅的组合,这与NpyIter对象不同。
*
* 您可以与NPY_RAW_ITER_START和NPY_RAW_ITER_THREE_NEXT一起使用,处理除了最内部循环之外的所有循环模板(即idim == 0的循环)。
*
* 成功时返回0,失败时返回-1。
*/
PyArray_PrepareThreeRawArrayIter(int ndim, npy_intp const *shape,
char *dataA, npy_intp const *stridesA,
char *dataB, npy_intp const *stridesB,
char *dataC, npy_intp const *stridesC,
int *out_ndim, npy_intp *out_shape,
char **out_dataA, npy_intp *out_stridesA,
char **out_dataB, npy_intp *out_stridesB,
char **out_dataC, npy_intp *out_stridesC);
/*
* 返回从地址'addr'开始必须从'nvals'个尺寸为'esize'的元素中剥离的元素数量,
* 以达到可块对齐。'alignment'参数传递所需的字节对齐,必须是2的幂。
* 此函数用于为块化准备数组。参见下面'numpy_blocked_end'函数的文档,了解此函数的使用示例。
*/
static inline npy_intp
npy_aligned_block_offset(const void * addr, const npy_uintp esize,
const npy_uintp alignment, const npy_uintp nvals)
{
npy_uintp offset, peel;
// 计算地址偏移量,使其达到块对齐
offset = (npy_uintp)addr & (alignment - 1);
// 计算需要剥离的元素数量
peel = offset ? (alignment - offset) / esize : 0;
// 如果需要剥离的元素数量超过了nvals,则限制为nvals
peel = (peel <= nvals) ? peel : nvals;
// 确保peel在合理范围内,不超过NPY_MAX_INTP
assert(peel <= NPY_MAX_INTP);
return (npy_intp)peel;
}
/*
* Calculate the upper loop bound for iterating over a raw array,
* handling peeling by 'offset' elements and blocking to a vector
* size of 'vsz' in bytes.
*
* Example usage:
* npy_intp i;
* double v[101];
* npy_intp esize = sizeof(v[0]);
* npy_intp peel = npy_aligned_block_offset(v, esize, 16, n);
* // peel to alignment 16
* for (i = 0; i < peel; i++)
* <scalar-op>
* // simd vectorized operation
* for (; i < npy_blocked_end(peel, esize, 16, n); i += 16 / esize)
* <blocked-op>
* // handle scalar rest
* for (; i < n; i++)
* <scalar-op>
*/
static inline npy_intp
npy_blocked_end(const npy_uintp peel, const npy_uintp esize,
const npy_uintp vsz, const npy_uintp nvals)
{
// Calculate the difference between total elements and peeled elements
npy_uintp ndiff = nvals - peel;
// Calculate the result for blocked end by rounding down to nearest multiple
npy_uintp res = (ndiff - ndiff % (vsz / esize));
// Ensure the total number of elements is at least as large as peeled elements
assert(nvals >= peel);
// Ensure the result fits within the maximum integer size
assert(res <= NPY_MAX_INTP);
// Return the calculated blocked end position
return (npy_intp)(res);
}
/* byte swapping functions */
// Swap bytes of a 16-bit unsigned integer
static inline npy_uint16
npy_bswap2(npy_uint16 x)
{
return ((x & 0xffu) << 8) | (x >> 8);
}
/*
* Treat a memory area as int16 and byte-swap unaligned memory,
* handling CPUs that don't support unaligned access.
*/
static inline void
npy_bswap2_unaligned(char * x)
{
// Swap bytes for 16-bit unaligned memory
char a = x[0];
x[0] = x[1];
x[1] = a;
}
// Swap bytes of a 32-bit unsigned integer
static inline npy_uint32
npy_bswap4(npy_uint32 x)
{
#ifdef HAVE___BUILTIN_BSWAP32
// Use compiler's built-in function for byte swapping if available
return __builtin_bswap32(x);
// Manually swap bytes for 32-bit unsigned integer
return ((x & 0xffu) << 24) | ((x & 0xff00u) << 8) |
((x & 0xff0000u) >> 8) | (x >> 24);
}
/*
* Byte-swap unaligned memory for 32-bit values.
* Swaps bytes to handle CPUs that don't support unaligned access.
*/
static inline void
npy_bswap4_unaligned(char * x)
{
// Swap bytes for 32-bit unaligned memory
char a = x[0];
x[0] = x[3];
x[3] = a;
a = x[1];
x[1] = x[2];
x[2] = a;
}
// Swap bytes of a 64-bit unsigned integer
static inline npy_uint64
npy_bswap8(npy_uint64 x)
{
#ifdef HAVE___BUILTIN_BSWAP64
// Use compiler's built-in function for byte swapping if available
return __builtin_bswap64(x);
// Manually swap bytes for 64-bit unsigned integer
return ((x & 0xffULL) << 56) |
((x & 0xff00ULL) << 40) |
((x & 0xff0000ULL) << 24) |
((x & 0xff000000ULL) << 8) |
((x & 0xff00000000ULL) >> 8) |
((x & 0xff0000000000ULL) >> 24) |
((x & 0xff000000000000ULL) >> 40) |
(x >> 56);
}
/*
* Byte-swap unaligned memory for 64-bit values.
* Swaps bytes to handle CPUs that don't support unaligned access.
*/
static inline void
npy_bswap8_unaligned(char * x)
{
// Swap bytes for 64-bit unaligned memory
char a = x[0]; x[0] = x[7]; x[7] = a;
a = x[1]; x[1] = x[6]; x[6] = a;
a = x[2]; x[2] = x[5]; x[5] = a;
a = x[3]; x[3] = x[4]; x[4] = a;
}
/* Start raw iteration */
// Initialize raw iteration over an array
#define NPY_RAW_ITER_START(idim, ndim, coord, shape) \
// Initialize coordinates to zero
memset((coord), 0, (ndim) * sizeof(coord[0])); \
// Start raw iteration loop
do {
// Move to the next n-dimensional coordinate for one raw array
#define NPY_RAW_ITER_ONE_NEXT(idim, ndim, coord, shape, data, strides) \
// Loop through dimensions starting from 1
for ((idim) = 1; (idim) < (ndim); ++(idim)) { \
// Check if current coordinate exceeds shape limit
if (++(coord)[idim] == (shape)[idim]) { \
// Reset coordinate and adjust data pointer for overflow
(coord)[idim] = 0; \
(data) -= ((shape)[idim] - 1) * (strides)[idim]; \
} \
else { \
// Move data pointer and break the loop
(data) += (strides)[idim]; \
break; \
} \
} \
} while ((idim) < (ndim))
/* Increment to the next n-dimensional coordinate for two raw arrays */
#define NPY_RAW_ITER_TWO_NEXT(idim, ndim, coord, shape, \
dataA, stridesA, dataB, stridesB) \
for ((idim) = 1; (idim) < (ndim); ++(idim)) { \
// 检查当前维度坐标是否需要进位
if (++(coord)[idim] == (shape)[idim]) { \
// 如果需要进位,重置当前维度坐标为0
(coord)[idim] = 0; \
// 调整数据指针A和B以反映下一个坐标的数据位置
(dataA) -= ((shape)[idim] - 1) * (stridesA)[idim]; \
(dataB) -= ((shape)[idim] - 1) * (stridesB)[idim]; \
} \
else { \
// 如果不需要进位,增加数据指针A和B以跳到下一个坐标位置
(dataA) += (stridesA)[idim]; \
(dataB) += (stridesB)[idim]; \
break; \
} \
} \
} while ((idim) < (ndim))
/* Increment to the next n-dimensional coordinate for three raw arrays */
#define NPY_RAW_ITER_THREE_NEXT(idim, ndim, coord, shape, \
dataA, stridesA, \
dataB, stridesB, \
dataC, stridesC) \
for ((idim) = 1; (idim) < (ndim); ++(idim)) { \
// 检查当前维度坐标是否需要进位
if (++(coord)[idim] == (shape)[idim]) { \
// 如果需要进位,重置当前维度坐标为0
(coord)[idim] = 0; \
// 调整数据指针A、B和C以反映下一个坐标的数据位置
(dataA) -= ((shape)[idim] - 1) * (stridesA)[idim]; \
(dataB) -= ((shape)[idim] - 1) * (stridesB)[idim]; \
(dataC) -= ((shape)[idim] - 1) * (stridesC)[idim]; \
} \
else { \
// 如果不需要进位,增加数据指针A、B和C以跳到下一个坐标位置
(dataA) += (stridesA)[idim]; \
(dataB) += (stridesB)[idim]; \
(dataC) += (stridesC)[idim]; \
break; \
} \
} \
} while ((idim) < (ndim))
/* Increment to the next n-dimensional coordinate for four raw arrays */
#define NPY_RAW_ITER_FOUR_NEXT(idim, ndim, coord, shape, \
dataA, stridesA, \
dataB, stridesB, \
dataC, stridesC, \
dataD, stridesD) \
for ((idim) = 1; (idim) < (ndim); ++(idim)) { \
// 检查当前维度坐标是否需要进位
if (++(coord)[idim] == (shape)[idim]) { \
// 如果需要进位,重置当前维度坐标为0
(coord)[idim] = 0; \
// 调整数据指针A、B、C和D以反映下一个坐标的数据位置
(dataA) -= ((shape)[idim] - 1) * (stridesA)[idim]; \
(dataB) -= ((shape)[idim] - 1) * (stridesB)[idim]; \
(dataC) -= ((shape)[idim] - 1) * (stridesC)[idim]; \
(dataD) -= ((shape)[idim] - 1) * (stridesD)[idim]; \
} \
else { \
// 如果不需要进位,增加数据指针A、B、C和D以跳到下一个坐标位置
(dataA) += (stridesA)[idim]; \
(dataB) += (stridesB)[idim]; \
(dataC) += (stridesC)[idim]; \
(dataD) += (stridesD)[idim]; \
break; \
} \
} \
} while ((idim) < (ndim))
/*
* TRIVIAL ITERATION
*
* In some cases when the iteration order isn't important, iteration over
* arrays is trivial. This is the case when:
* * The array has 0 or 1 dimensions.
* * The array is C or Fortran contiguous.
* Use of an iterator can be skipped when this occurs. These macros assist
* in detecting and taking advantage of the situation. Note that it may
* be worthwhile to further check if the stride is a contiguous stride
* and take advantage of that.
*
* Here is example code for a single array:
*
* if (PyArray_TRIVIALLY_ITERABLE(self)) {
* char *data;
* npy_intp count, stride;
*
* PyArray_PREPARE_TRIVIAL_ITERATION(self, count, data, stride);
*
* while (count--) {
* // Use the data pointer
*
* data += stride;
* }
* }
* else {
* // Create iterator, etc...
* }
*
*/
/*
* Note: Equivalently iterable macro requires one of arr1 or arr2 be
* trivially iterable to be valid.
*/
/**
* Determine whether an array is safe for trivial iteration.
*
* This macro checks if the array meets conditions for trivial iteration:
* - Has 0 or 1 dimensions
* - Is C contiguous or Fortran contiguous
*/
PyArray_NDIM(arr) <= 1 || \
PyArray_CHKFLAGS(arr, NPY_ARRAY_C_CONTIGUOUS) || \
PyArray_CHKFLAGS(arr, NPY_ARRAY_F_CONTIGUOUS) \
)
/**
* Calculate the stride for trivial iteration over a single array or pair of arrays.
*
* This macro computes the stride to be used for iterating over an array or pair of arrays:
* - If size is 1, returns 0 (no iteration needed)
* - If array has 1 dimension, returns its stride
* - If array has more dimensions, returns item size
*/
assert(PyArray_TRIVIALLY_ITERABLE(arr)), \
size == 1 ? 0 : ((PyArray_NDIM(arr) == 1) ? \
PyArray_STRIDE(arr, 0) : PyArray_ITEMSIZE(arr)))
/**
* Check if two arrays can be iterated over trivially and safely.
*
* This function checks conditions under which two arrays can be safely iterated over:
* - Both arrays are read-only
* - Arrays do not share overlapping memory
* - Strides match and one array's base address is before the other's
* to ensure correct data dependency
*
* @param arr1 First array object
* @param arr2 Second array object
* @param arr1_read Flag indicating if arr1 is read-only
* @param arr2_read Flag indicating if arr2 is read-only
* @return 1 if trivial iteration is safe, 0 otherwise
*/
static inline int
PyArray_EQUIVALENTLY_ITERABLE_OVERLAP_OK(PyArrayObject *arr1, PyArrayObject *arr2,
int arr1_read, int arr2_read)
{
npy_intp size1, size2, stride1, stride2;
int arr1_ahead = 0, arr2_ahead = 0;
if (arr1_read && arr2_read) {
return 1;
}
size1 = PyArray_SIZE(arr1);
stride1 = PyArray_TRIVIAL_PAIR_ITERATION_STRIDE(size1, arr1);
/*
* arr1 == arr2 is common for in-place operations, so we fast-path it here.
* TODO: The stride1 != 0 check rejects broadcast arrays. This may affect
* self-overlapping arrays, but seems only necessary due to
* `try_trivial_single_output_loop` not rejecting broadcast outputs.
*/
if (arr1 == arr2 && stride1 != 0) {
return 1;
}
if (solve_may_share_memory(arr1, arr2, 1) == 0) {
return 1;
}
/*
* 获取 arr2 的大小(元素个数)
* PyArray_SIZE 是一个宏,用于获取数组的大小
*/
size2 = PyArray_SIZE(arr2);
/*
* 计算 arr2 的迭代步长(stride)
* PyArray_TRIVIAL_PAIR_ITERATION_STRIDE 是一个宏,用于计算数组的迭代步长
*/
stride2 = PyArray_TRIVIAL_PAIR_ITERATION_STRIDE(size2, arr2);
/*
* 如果 arr1 的步长大于 0,则判断 arr1 是否在 arr2 的前面
* 否则,如果 arr1 的步长小于 0,则判断 arr1 是否在 arr2 的前面
* 这里通过比较字节数组的起始地址来判断数组的相对位置
*/
if (stride1 > 0) {
arr1_ahead = (stride1 >= stride2 &&
PyArray_BYTES(arr1) >= PyArray_BYTES(arr2));
}
else if (stride1 < 0) {
arr1_ahead = (stride1 <= stride2 &&
PyArray_BYTES(arr1) <= PyArray_BYTES(arr2));
}
/*
* 如果 arr2 的步长大于 0,则判断 arr2 是否在 arr1 的前面
* 否则,如果 arr2 的步长小于 0,则判断 arr2 是否在 arr1 的前面
* 这里通过比较字节数组的起始地址来判断数组的相对位置
*/
if (stride2 > 0) {
arr2_ahead = (stride2 >= stride1 &&
PyArray_BYTES(arr2) >= PyArray_BYTES(arr1));
}
else if (stride2 < 0) {
arr2_ahead = (stride2 <= stride1 &&
PyArray_BYTES(arr2) <= PyArray_BYTES(arr1));
}
/*
* 返回两个条件的逻辑与结果:
* - 如果 arr1 未读取或者 arr1 在 arr2 前面,则返回 true
* - 如果 arr2 未读取或者 arr2 在 arr1 前面,则返回 true
* 否则返回 false
*/
return (!arr1_read || arr1_ahead) && (!arr2_read || arr2_ahead);
PyArray_NDIM(arr1) == PyArray_NDIM(arr2) && \
PyArray_CompareLists(PyArray_DIMS(arr1), \
PyArray_DIMS(arr2), \
PyArray_NDIM(arr1)) && \
(PyArray_FLAGS(arr1)&(NPY_ARRAY_C_CONTIGUOUS| \
NPY_ARRAY_F_CONTIGUOUS)) & \
(PyArray_FLAGS(arr2)&(NPY_ARRAY_C_CONTIGUOUS| \
NPY_ARRAY_F_CONTIGUOUS)) \
)
// 定义宏 PyArray_EQUIVALENTLY_ITERABLE_BASE,用于比较两个 NumPy 数组是否可以等效迭代
PyArray_NDIM(arr1) == PyArray_NDIM(arr2) && \
PyArray_CompareLists(PyArray_DIMS(arr1), \
PyArray_DIMS(arr2), \
PyArray_NDIM(arr1)) && \
(PyArray_FLAGS(arr1)&(NPY_ARRAY_C_CONTIGUOUS| \
NPY_ARRAY_F_CONTIGUOUS)) & \
(PyArray_FLAGS(arr2)&(NPY_ARRAY_C_CONTIGUOUS| \
NPY_ARRAY_F_CONTIGUOUS)) \
)
PyArray_EQUIVALENTLY_ITERABLE_BASE(arr1, arr2) && \
PyArray_EQUIVALENTLY_ITERABLE_OVERLAP_OK( \
arr1, arr2, arr1_read, arr2_read))
// 定义宏 PyArray_EQUIVALENTLY_ITERABLE,用于比较两个 NumPy 数组是否可以等效迭代,并且允许重叠
PyArray_EQUIVALENTLY_ITERABLE_BASE(arr1, arr2) && \
PyArray_EQUIVALENTLY_ITERABLE_OVERLAP_OK( \
arr1, arr2, arr1_read, arr2_read))
count = PyArray_SIZE(arr); \
data = PyArray_BYTES(arr); \
stride = ((PyArray_NDIM(arr) == 0) ? 0 : \
((PyArray_NDIM(arr) == 1) ? \
PyArray_STRIDE(arr, 0) : \
PyArray_ITEMSIZE(arr)));
// 定义宏 PyArray_PREPARE_TRIVIAL_ITERATION,用于准备简单迭代的参数
count = PyArray_SIZE(arr); \
data = PyArray_BYTES(arr); \
stride = ((PyArray_NDIM(arr) == 0) ? 0 : \
((PyArray_NDIM(arr) == 1) ? \
PyArray_STRIDE(arr, 0) : \
PyArray_ITEMSIZE(arr)));
count, \
data1, data2, \
stride1, stride2) { \
npy_intp size1 = PyArray_SIZE(arr1); \
npy_intp size2 = PyArray_SIZE(arr2); \
count = ((size1 > size2) || size1 == 0) ? size1 : size2; \
data1 = PyArray_BYTES(arr1); \
data2 = PyArray_BYTES(arr2); \
stride1 = PyArray_TRIVIAL_PAIR_ITERATION_STRIDE(size1, arr1); \
stride2 = PyArray_TRIVIAL_PAIR_ITERATION_STRIDE(size2, arr2); \
}
// 定义宏 PyArray_PREPARE_TRIVIAL_PAIR_ITERATION,用于准备简单对成对迭代的参数
count, \
data1, data2, \
stride1, stride2) { \
npy_intp size1 = PyArray_SIZE(arr1); \
npy_intp size2 = PyArray_SIZE(arr2); \
count = ((size1 > size2) || size1 == 0) ? size1 : size2; \
data1 = PyArray_BYTES(arr1); \
data2 = PyArray_BYTES(arr2); \
stride1 = PyArray_TRIVIAL_PAIR_ITERATION_STRIDE(size1, arr1); \
stride2 = PyArray_TRIVIAL_PAIR_ITERATION_STRIDE(size2, arr2); \
}