NumPy 源码解析(十八)
.\numpy\numpy\lib\_function_base_impl.pyi
import sys
from collections.abc import Sequence, Iterator, Callable, Iterable
from typing import (
Literal as L,
Any,
TypeVar,
overload,
Protocol,
SupportsIndex,
SupportsInt,
)
if sys.version_info >= (3, 10):
from typing import TypeGuard
else:
from typing_extensions import TypeGuard
from numpy import (
vectorize as vectorize,
ufunc,
generic,
floating,
complexfloating,
intp,
float64,
complex128,
timedelta64,
datetime64,
object_,
_OrderKACF,
)
from numpy._typing import (
NDArray,
ArrayLike,
DTypeLike,
_ShapeLike,
_ScalarLike_co,
_DTypeLike,
_ArrayLike,
_ArrayLikeInt_co,
_ArrayLikeFloat_co,
_ArrayLikeComplex_co,
_ArrayLikeTD64_co,
_ArrayLikeDT64_co,
_ArrayLikeObject_co,
_FloatLike_co,
_ComplexLike_co,
)
from numpy._core.multiarray import (
bincount as bincount,
)
_T = TypeVar("_T")
_T_co = TypeVar("_T_co", covariant=True)
_SCT = TypeVar("_SCT", bound=generic)
_ArrayType = TypeVar("_ArrayType", bound=NDArray[Any])
_2Tuple = tuple[_T, _T]
class _TrimZerosSequence(Protocol[_T_co]):
def __len__(self) -> int: ...
def __getitem__(self, key: slice, /) -> _T_co: ...
def __iter__(self) -> Iterator[Any]: ...
class _SupportsWriteFlush(Protocol):
def write(self, s: str, /) -> object: ...
def flush(self) -> object: ...
__all__: list[str]
@overload
def rot90(
m: _ArrayLike[_SCT],
k: int = ...,
axes: tuple[int, int] = ...,
) -> NDArray[_SCT]: ...
@overload
def rot90(
m: ArrayLike,
k: int = ...,
axes: tuple[int, int] = ...,
) -> NDArray[Any]: ...
@overload
def flip(m: _SCT, axis: None = ...) -> _SCT: ...
@overload
def flip(m: _ScalarLike_co, axis: None = ...) -> Any: ...
@overload
def flip(m: _ArrayLike[_SCT], axis: None | _ShapeLike = ...) -> NDArray[_SCT]: ...
@overload
def flip(m: ArrayLike, axis: None | _ShapeLike = ...) -> NDArray[Any]: ...
def iterable(y: object) -> TypeGuard[Iterable[Any]]: ...
@overload
def average(
a: _ArrayLikeFloat_co,
axis: None = ...,
weights: None | _ArrayLikeFloat_co= ...,
returned: L[False] = ...,
keepdims: L[False] = ...,
) -> floating[Any]: ...
@overload
def average(
a: _ArrayLikeComplex_co,
axis: None = ...,
weights: None | _ArrayLikeComplex_co = ...,
returned: L[False] = ...,
keepdims: L[False] = ...,
) -> complexfloating[Any, Any]: ...
@overload
def average(
a: _ArrayLikeObject_co,
axis: None = ...,
weights: None | Any = ...,
returned: L[False] = ...,
keepdims: L[False] = ...,
) -> Any: ...
@overload
def average(
a: _ArrayLikeFloat_co,
axis: None = ...,
weights: None | _ArrayLikeFloat_co= ...,
returned: L[True] = ...,
keepdims: L[False] = ...,
) -> _2Tuple[floating[Any]]: ...
@overload
def average(
a: _ArrayLikeComplex_co,
axis: None = ...,
weights: None | _ArrayLikeComplex_co = ...,
returned: L[True] = ...,
keepdims: L[False] = ...,
定义一个名为 `keepdims` 的变量,其类型为 `L[False]`,初始值为未定义的占位符 (`...`)。
) -> _2Tuple[complexfloating[Any, Any]]: ...
@overload
def average(
a: _ArrayLikeObject_co,
axis: None = ...,
weights: None | Any = ...,
returned: L[True] = ...,
keepdims: L[False] = ...,
) -> _2Tuple[Any]: ...
@overload
def average(
a: _ArrayLikeComplex_co | _ArrayLikeObject_co,
axis: None | _ShapeLike = ...,
weights: None | Any = ...,
returned: L[False] = ...,
keepdims: bool = ...,
) -> Any: ...
@overload
def average(
a: _ArrayLikeComplex_co | _ArrayLikeObject_co,
axis: None | _ShapeLike = ...,
weights: None | Any = ...,
returned: L[True] = ...,
keepdims: bool = ...,
) -> _2Tuple[Any]: ...
@overload
def asarray_chkfinite(
a: _ArrayLike[_SCT],
dtype: None = ...,
order: _OrderKACF = ...,
) -> NDArray[_SCT]: ...
@overload
def asarray_chkfinite(
a: object,
dtype: None = ...,
order: _OrderKACF = ...,
) -> NDArray[Any]: ...
@overload
def asarray_chkfinite(
a: Any,
dtype: _DTypeLike[_SCT],
order: _OrderKACF = ...,
) -> NDArray[_SCT]: ...
@overload
def asarray_chkfinite(
a: Any,
dtype: DTypeLike,
order: _OrderKACF = ...,
) -> NDArray[Any]: ...
@overload
def piecewise(
x: _ArrayLike[_SCT],
condlist: ArrayLike,
funclist: Sequence[Any | Callable[..., Any]],
*args: Any,
**kw: Any,
) -> NDArray[_SCT]: ...
@overload
def piecewise(
x: ArrayLike,
condlist: ArrayLike,
funclist: Sequence[Any | Callable[..., Any]],
*args: Any,
**kw: Any,
) -> NDArray[Any]: ...
def select(
condlist: Sequence[ArrayLike],
choicelist: Sequence[ArrayLike],
default: ArrayLike = ...,
) -> NDArray[Any]: ...
@overload
def copy(
a: _ArrayType,
order: _OrderKACF,
subok: L[True],
) -> _ArrayType: ...
@overload
def copy(
a: _ArrayType,
order: _OrderKACF = ...,
*,
subok: L[True],
) -> _ArrayType: ...
@overload
def copy(
a: _ArrayLike[_SCT],
order: _OrderKACF = ...,
subok: L[False] = ...,
) -> NDArray[_SCT]: ...
@overload
def copy(
a: ArrayLike,
order: _OrderKACF = ...,
subok: L[False] = ...,
) -> NDArray[Any]: ...
def gradient(
f: ArrayLike,
*varargs: ArrayLike,
axis: None | _ShapeLike = ...,
edge_order: L[1, 2] = ...,
) -> Any: ...
@overload
def diff(
a: _T,
n: L[0],
axis: SupportsIndex = ...,
prepend: ArrayLike = ...,
append: ArrayLike = ...,
) -> _T: ...
@overload
def diff(
a: ArrayLike,
n: int = ...,
axis: SupportsIndex = ...,
prepend: ArrayLike = ...,
append: ArrayLike = ...,
) -> NDArray[Any]: ...
@overload
def interp(
x: _ArrayLikeFloat_co,
xp: _ArrayLikeFloat_co,
fp: _ArrayLikeFloat_co,
left: None | _FloatLike_co = ...,
right: None | _FloatLike_co = ...,
period: None | _FloatLike_co = ...,
) -> NDArray[float64]: ...
@overload
def interp(
x: _ArrayLikeFloat_co,
xp: _ArrayLikeFloat_co,
fp: _ArrayLikeComplex_co,
left: None | _ComplexLike_co = ...,
right: None | _ComplexLike_co = ...,
period: None | _FloatLike_co = ...,
def angle(z: _ComplexLike_co, deg: bool = ...) -> NDArray[complex128]: ...
@overload
def angle(z: object_, deg: bool = ...) -> floating[Any]: ...
@overload
def angle(z: _ArrayLikeComplex_co, deg: bool = ...) -> NDArray[floating[Any]]: ...
@overload
def angle(z: _ArrayLikeObject_co, deg: bool = ...) -> NDArray[object_]: ...
def sort_complex(a: ArrayLike) -> NDArray[complexfloating[Any, Any]]: ...
def trim_zeros(
filt: _TrimZerosSequence[_T],
trim: L["f", "b", "fb", "bf"] = ...,
) -> _T: ...
@overload
def extract(condition: ArrayLike, arr: _ArrayLike[_SCT]) -> NDArray[_SCT]: ...
@overload
def extract(condition: ArrayLike, arr: ArrayLike) -> NDArray[Any]: ...
def place(arr: NDArray[Any], mask: ArrayLike, vals: Any) -> None: ...
def disp(
mesg: object,
device: None | _SupportsWriteFlush = ...,
linefeed: bool = ...,
) -> None: ...
@overload
def cov(
m: _ArrayLikeFloat_co,
y: None | _ArrayLikeFloat_co = ...,
rowvar: bool = ...,
bias: bool = ...,
ddof: None | SupportsIndex | SupportsInt = ...,
fweights: None | ArrayLike = ...,
aweights: None | ArrayLike = ...,
*,
dtype: None = ...,
) -> NDArray[floating[Any]]: ...
@overload
def cov(
m: _ArrayLikeComplex_co,
y: None | _ArrayLikeComplex_co = ...,
rowvar: bool = ...,
bias: bool = ...,
ddof: None | SupportsIndex | SupportsInt = ...,
fweights: None | ArrayLike = ...,
aweights: None | ArrayLike = ...,
*,
dtype: None = ...,
) -> NDArray[complexfloating[Any, Any]]: ...
@overload
def cov(
m: _ArrayLikeComplex_co,
y: None | _ArrayLikeComplex_co = ...,
rowvar: bool = ...,
bias: bool = ...,
ddof: None | SupportsIndex | SupportsInt = ...,
fweights: None | ArrayLike = ...,
aweights: None | ArrayLike = ...,
*,
dtype: _DTypeLike[_SCT],
) -> NDArray[_SCT]: ...
@overload
def cov(
m: _ArrayLikeComplex_co,
y: None | _ArrayLikeComplex_co = ...,
rowvar: bool = ...,
bias: bool = ...,
ddof: None | SupportsIndex | SupportsInt = ...,
fweights: None | ArrayLike = ...,
aweights: None | ArrayLike = ...,
*,
dtype: DTypeLike,
) -> NDArray[Any]: ...
@overload
def corrcoef(
m: _ArrayLikeFloat_co,
y: None | _ArrayLikeFloat_co = ...,
rowvar: bool = ...,
*,
dtype: None = ...,
) -> NDArray[floating[Any]]: ...
@overload
def corrcoef(
m: _ArrayLikeComplex_co,
y: None | _ArrayLikeComplex_co = ...,
rowvar: bool = ...,
*,
dtype: None = ...,
) -> NDArray[complexfloating[Any, Any]]: ...
y: None | _ArrayLikeComplex_co = ...,
rowvar: bool = ...,
*,
dtype: _DTypeLike[_SCT],
) -> NDArray[_SCT]: ...
@overload
def corrcoef(
m: _ArrayLikeComplex_co,
y: None | _ArrayLikeComplex_co = ...,
rowvar: bool = ...,
*,
dtype: DTypeLike,
) -> NDArray[Any]: ...
def blackman(M: _FloatLike_co) -> NDArray[floating[Any]]: ...
def bartlett(M: _FloatLike_co) -> NDArray[floating[Any]]: ...
def hanning(M: _FloatLike_co) -> NDArray[floating[Any]]: ...
def hamming(M: _FloatLike_co) -> NDArray[floating[Any]]: ...
def i0(x: _ArrayLikeFloat_co) -> NDArray[floating[Any]]: ...
def kaiser(
M: _FloatLike_co,
beta: _FloatLike_co,
) -> NDArray[floating[Any]]: ...
@overload
def sinc(x: _FloatLike_co) -> floating[Any]: ...
@overload
def sinc(x: _ComplexLike_co) -> complexfloating[Any, Any]: ...
@overload
def sinc(x: _ArrayLikeFloat_co) -> NDArray[floating[Any]]: ...
@overload
def sinc(x: _ArrayLikeComplex_co) -> NDArray[complexfloating[Any, Any]]: ...
@overload
def median(
a: _ArrayLikeFloat_co,
axis: None = ...,
out: None = ...,
overwrite_input: bool = ...,
keepdims: L[False] = ...,
) -> floating[Any]: ...
@overload
def median(
a: _ArrayLikeComplex_co,
axis: None = ...,
out: None = ...,
overwrite_input: bool = ...,
keepdims: L[False] = ...,
) -> complexfloating[Any, Any]: ...
@overload
def median(
a: _ArrayLikeTD64_co,
axis: None = ...,
out: None = ...,
overwrite_input: bool = ...,
keepdims: L[False] = ...,
) -> timedelta64: ...
@overload
def median(
a: _ArrayLikeObject_co,
axis: None = ...,
out: None = ...,
overwrite_input: bool = ...,
keepdims: L[False] = ...,
) -> Any: ...
@overload
def median(
a: _ArrayLikeFloat_co | _ArrayLikeComplex_co | _ArrayLikeTD64_co | _ArrayLikeObject_co,
axis: None | _ShapeLike = ...,
out: None = ...,
overwrite_input: bool = ...,
keepdims: bool = ...,
) -> Any: ...
@overload
def median(
a: _ArrayLikeFloat_co | _ArrayLikeComplex_co | _ArrayLikeTD64_co | _ArrayLikeObject_co,
axis: None | _ShapeLike = ...,
out: _ArrayType = ...,
overwrite_input: bool = ...,
keepdims: bool = ...,
) -> _ArrayType: ...
_MethodKind = L[
"inverted_cdf",
"averaged_inverted_cdf",
"closest_observation",
"interpolated_inverted_cdf",
"hazen",
"weibull",
"linear",
"median_unbiased",
"normal_unbiased",
"lower",
"higher",
"midpoint",
"nearest",
]
@overload
def percentile(
a: _ArrayLikeFloat_co,
q: _FloatLike_co,
axis: None = ...,
out: None = ...,
overwrite_input: bool = ...,
method: _MethodKind = ...,
keepdims: L[False] = ...,
*,
weights: None | _ArrayLikeFloat_co = ...,
) -> floating[Any]: ...
@overload
def percentile(
a: _ArrayLikeComplex_co,
q: _FloatLike_co,
axis: None = ...,
out: None = ...,
overwrite_input: bool = ...,
method: _MethodKind = ...,
keepdims: L[False] = ...,
*,
weights: None | _ArrayLikeFloat_co = ...,
) -> complexfloating[Any, Any]: ...
def percentile(
a: _ArrayLikeTD64_co,
q: _FloatLike_co,
axis: None = ...,
out: None = ...,
overwrite_input: bool = ...,
method: _MethodKind = ...,
keepdims: L[False] = ...,
*,
weights: None | _ArrayLikeFloat_co = ...
) -> timedelta64:
...
@overload
def percentile(
a: _ArrayLikeDT64_co,
q: _FloatLike_co,
axis: None = ...,
out: None = ...,
overwrite_input: bool = ...,
method: _MethodKind = ...,
keepdims: L[False] = ...,
*,
weights: None | _ArrayLikeFloat_co = ...
) -> datetime64:
...
(以下重载部分省略,均按照相同的注释模式进行解释)
@overload
def percentile(
a: _ArrayLikeObject_co,
q: _FloatLike_co,
axis: None = ...,
out: None = ...,
overwrite_input: bool = ...,
method: _MethodKind = ...,
keepdims: L[False] = ...,
*,
weights: None | _ArrayLikeFloat_co = ...
) -> Any:
...
@overload
def percentile(
a: _ArrayLikeFloat_co,
q: _ArrayLikeFloat_co,
axis: None = ...,
out: None = ...,
overwrite_input: bool = ...,
method: _MethodKind = ...,
keepdims: L[False] = ...,
*,
weights: None | _ArrayLikeFloat_co = ...
) -> NDArray[floating[Any]]:
...
@overload
def percentile(
a: _ArrayLikeComplex_co,
q: _ArrayLikeFloat_co,
axis: None = ...,
out: None = ...,
overwrite_input: bool = ...,
method: _MethodKind = ...,
keepdims: L[False] = ...,
*,
weights: None | _ArrayLikeFloat_co = ...
) -> NDArray[complexfloating[Any, Any]]:
...
@overload
def percentile(
a: _ArrayLikeTD64_co,
q: _ArrayLikeFloat_co,
axis: None = ...,
out: None = ...,
overwrite_input: bool = ...,
method: _MethodKind = ...,
keepdims: L[False] = ...,
*,
weights: None | _ArrayLikeFloat_co = ...
) -> NDArray[timedelta64]:
...
@overload
def percentile(
a: _ArrayLikeDT64_co,
q: _ArrayLikeFloat_co,
axis: None = ...,
out: None = ...,
overwrite_input: bool = ...,
method: _MethodKind = ...,
keepdims: L[False] = ...,
*,
weights: None | _ArrayLikeFloat_co = ...
) -> NDArray[datetime64]:
...
@overload
def percentile(
a: _ArrayLikeObject_co,
q: _ArrayLikeFloat_co,
axis: None = ...,
out: None = ...,
overwrite_input: bool = ...,
method: _MethodKind = ...,
keepdims: L[False] = ...,
*,
weights: None | _ArrayLikeFloat_co = ...
) -> NDArray[object_]:
...
@overload
def percentile(
a: _ArrayLikeComplex_co | _ArrayLikeTD64_co | _ArrayLikeTD64_co | _ArrayLikeObject_co,
q: _ArrayLikeFloat_co,
axis: None | _ShapeLike = ...,
out: None = ...,
overwrite_input: bool = ...,
method: _MethodKind = ...,
keepdims: bool = ...,
*,
weights: None | _ArrayLikeFloat_co = ...
) -> Any:
...
@overload
def percentile(
a: _ArrayLikeComplex_co | _ArrayLikeTD64_co | _ArrayLikeTD64_co | _ArrayLikeObject_co,
q: _ArrayLikeFloat_co,
axis: None | _ShapeLike = ...,
out: _ArrayType = ...,
overwrite_input: bool = ...,
method: _MethodKind = ...,
keepdims: bool = ...,
*,
weights: None | _ArrayLikeFloat_co = ...
) -> Any:
...
keepdims: bool = ...,
*,
weights: None | _ArrayLikeFloat_co = ...,
keepdims: bool = ...,
weights: None | _ArrayLikeFloat_co = ...,
) -> _ArrayType: ...
quantile = percentile
def meshgrid(
*xi: ArrayLike,
copy: bool = ...,
sparse: bool = ...,
indexing: L["xy", "ij"] = ...,
) -> tuple[NDArray[Any], ...]: ...
@overload
def delete(
arr: _ArrayLike[_SCT],
obj: slice | _ArrayLikeInt_co,
axis: None | SupportsIndex = ...,
) -> NDArray[_SCT]: ...
@overload
def delete(
arr: ArrayLike,
obj: slice | _ArrayLikeInt_co,
axis: None | SupportsIndex = ...,
) -> NDArray[Any]: ...
@overload
def insert(
arr: _ArrayLike[_SCT],
obj: slice | _ArrayLikeInt_co,
values: ArrayLike,
axis: None | SupportsIndex = ...,
) -> NDArray[_SCT]: ...
@overload
def insert(
arr: ArrayLike,
obj: slice | _ArrayLikeInt_co,
values: ArrayLike,
axis: None | SupportsIndex = ...,
) -> NDArray[Any]: ...
def append(
arr: ArrayLike,
values: ArrayLike,
axis: None | SupportsIndex = ...,
) -> NDArray[Any]: ...
@overload
def digitize(
x: _FloatLike_co,
bins: _ArrayLikeFloat_co,
right: bool = ...,
) -> intp: ...
@overload
def digitize(
x: _ArrayLikeFloat_co,
bins: _ArrayLikeFloat_co,
right: bool = ...,
) -> NDArray[intp]: ...
.\numpy\numpy\lib\_histograms_impl.py
"""
Histogram-related functions
"""
import contextlib
import functools
import operator
import warnings
import numpy as np
from numpy._core import overrides
__all__ = ['histogram', 'histogramdd', 'histogram_bin_edges']
array_function_dispatch = functools.partial(
overrides.array_function_dispatch, module='numpy')
_range = range
def _ptp(x):
"""Peak-to-peak value of x.
This implementation avoids the problem of signed integer arrays having a
peak-to-peak value that cannot be represented with the array's data type.
This function returns an unsigned value for signed integer arrays.
"""
return _unsigned_subtract(x.max(), x.min())
def _hist_bin_sqrt(x, range):
"""
Square root histogram bin estimator.
Bin width is inversely proportional to the data size. Used by many
programs for its simplicity.
Parameters
----------
x : array_like
Input data that is to be histogrammed, trimmed to range. May not
be empty.
Returns
-------
h : An estimate of the optimal bin width for the given data.
"""
del range
return _ptp(x) / np.sqrt(x.size)
def _hist_bin_sturges(x, range):
"""
Sturges histogram bin estimator.
A very simplistic estimator based on the assumption of normality of
the data. This estimator has poor performance for non-normal data,
which becomes especially obvious for large data sets. The estimate
depends only on size of the data.
Parameters
----------
x : array_like
Input data that is to be histogrammed, trimmed to range. May not
be empty.
Returns
-------
h : An estimate of the optimal bin width for the given data.
"""
del range
return _ptp(x) / (np.log2(x.size) + 1.0)
def _hist_bin_rice(x, range):
"""
Rice histogram bin estimator.
Another simple estimator with no normality assumption. It has better
performance for large data than Sturges, but tends to overestimate
the number of bins. The number of bins is proportional to the cube
root of data size (asymptotically optimal). The estimate depends
only on size of the data.
Parameters
----------
x : array_like
Input data that is to be histogrammed, trimmed to range. May not
be empty.
Returns
-------
h : An estimate of the optimal bin width for the given data.
"""
del range
return _ptp(x) / (2.0 * x.size ** (1.0 / 3))
def _hist_bin_scott(x, range):
"""
Scott histogram bin estimator.
The binwidth is proportional to the standard deviation of the data
and inversely proportional to the cube root of data size
(asymptotically optimal).
Parameters
----------
x : array_like
Input data that is to be histogrammed, trimmed to range. May not
be empty.
Returns
-------
h : An estimate of the optimal bin width for the given data.
"""
del range
return _ptp(x) / (np.std(x) * x.size ** (1.0 / 3))
h : An estimate of the optimal bin width for the given data.
del range
return (24.0 * np.pi**0.5 / x.size)**(1.0 / 3.0) * np.std(x)
def _hist_bin_stone(x, range):
"""
Histogram bin estimator based on minimizing the estimated integrated squared error (ISE).
The number of bins is chosen by minimizing the estimated ISE against the unknown true distribution.
The ISE is estimated using cross-validation and can be regarded as a generalization of Scott's rule.
https://en.wikipedia.org/wiki/Histogram#Scott.27s_normal_reference_rule
This paper by Stone appears to be the origination of this rule.
https://digitalassets.lib.berkeley.edu/sdtr/ucb/text/34.pdf
Parameters
----------
x : array_like
Input data that is to be histogrammed, trimmed to range. May not
be empty.
range : (float, float)
The lower and upper range of the bins.
Returns
-------
h : An estimate of the optimal bin width for the given data.
"""
n = x.size
ptp_x = _ptp(x)
if n <= 1 or ptp_x == 0:
return 0
def jhat(nbins):
hh = ptp_x / nbins
p_k = np.histogram(x, bins=nbins, range=range)[0] / n
return (2 - (n + 1) * p_k.dot(p_k)) / hh
nbins_upper_bound = max(100, int(np.sqrt(n)))
nbins = min(_range(1, nbins_upper_bound + 1), key=jhat)
if nbins == nbins_upper_bound:
warnings.warn("The number of bins estimated may be suboptimal.",
RuntimeWarning, stacklevel=3)
return ptp_x / nbins
def _hist_bin_doane(x, range):
"""
Doane's histogram bin estimator.
Improved version of Sturges' formula which works better for
non-normal data. See
stats.stackexchange.com/questions/55134/doanes-formula-for-histogram-binning
Parameters
----------
x : array_like
Input data that is to be histogrammed, trimmed to range. May not
be empty.
Returns
-------
h : An estimate of the optimal bin width for the given data.
"""
del range
if x.size > 2:
sg1 = np.sqrt(6.0 * (x.size - 2) / ((x.size + 1.0) * (x.size + 3)))
sigma = np.std(x)
if sigma > 0.0:
temp = x - np.mean(x)
np.true_divide(temp, sigma, temp)
np.power(temp, 3, temp)
g1 = np.mean(temp)
return _ptp(x) / (1.0 + np.log2(x.size) +
np.log2(1.0 + np.absolute(g1) / sg1))
return 0.0
def _hist_bin_fd(x, range):
"""
The Freedman-Diaconis histogram bin estimator.
The Freedman-Diaconis rule uses interquartile range (IQR) to
estimate binwidth. It is considered a variation of the Scott rule
with more robustness as the IQR is less affected by outliers than
the standard deviation. However, the IQR depends on fewer points
than the standard deviation, so it is less accurate, especially for
long tailed distributions.
"""
del range
return _ptp(x) / (2.0 * np.subtract(*np.percentile(x, [75, 25])) * np.power(x.size, -1/3))
Parameters
----------
x : array_like
要制作直方图的输入数据,已经修剪到指定范围。不得为空。
Returns
-------
h : 给定数据的最佳箱宽度的估计值。
"""
# 删除变量 range,因为它未使用
del range # unused
# 计算数据的四分位距(IQR)
iqr = np.subtract(*np.percentile(x, [75, 25]))
# 根据数据大小计算并返回最佳箱宽度的估计值
return 2.0 * iqr * x.size ** (-1.0 / 3.0)
# 使用自由曼-迪亚孔斯和斯特吕格斯估算器的最小宽度来估算直方图的箱宽度,如果自由曼-迪亚孔斯估算器的箱宽度非零,则使用它。
# 如果自由曼-迪亚孔斯估算器得到的箱宽度为0,则使用斯特吕格斯估算器。
# 自由曼-迪亚孔斯估算器通常是最稳健的方法,但其宽度估算对小数据或有限方差的数据来说可能过大。
# 斯特吕格斯估算器在小数据集(<1000)中表现良好,在 R 语言中是默认的方法。此方法提供了良好的即用即得行为。
# 如果方差有限,IQR 可能为0,这会导致自由曼-迪亚孔斯估算器的箱宽度也为0。这不是有效的箱宽度,
# 因此 `np.histogram_bin_edges` 会选择1个箱,这可能不是最优选择。
# 如果 IQR 为0,则任何基于方差的估算器都不太可能有用,因此我们回归到斯特吕格斯估算器,它只使用数据集的大小进行计算。
def _hist_bin_auto(x, range):
fd_bw = _hist_bin_fd(x, range) # 使用自由曼-迪亚孔斯估算器计算箱宽度
sturges_bw = _hist_bin_sturges(x, range) # 使用斯特吕格斯估算器计算箱宽度
del range # 未使用的参数,删除之
if fd_bw:
return min(fd_bw, sturges_bw) # 返回两个估算宽度的较小值
else:
# 方差有限,因此返回一个依赖长度的估算宽度
return sturges_bw
# 模块加载时初始化的私有字典
_hist_bin_selectors = {'stone': _hist_bin_stone, # 使用 stone 方法的直方图箱宽度估算器
'auto': _hist_bin_auto, # 使用 auto 方法的直方图箱宽度估算器
'doane': _hist_bin_doane, # 使用 doane 方法的直方图箱宽度估算器
'fd': _hist_bin_fd, # 使用 fd 方法的直方图箱宽度估算器
'rice': _hist_bin_rice, # 使用 rice 方法的直方图箱宽度估算器
'scott': _hist_bin_scott, # 使用 scott 方法的直方图箱宽度估算器
'sqrt': _hist_bin_sqrt, # 使用 sqrt 方法的直方图箱宽度估算器
'sturges': _hist_bin_sturges} # 使用 sturges 方法的直方图箱宽度估算器
def _ravel_and_check_weights(a, weights):
""" 检查 a 和 weights 的形状是否匹配,并将它们展平 """
a = np.asarray(a) # 将 a 转换为 NumPy 数组
# 确保数组是可“减法”的数据类型
if a.dtype == np.bool:
warnings.warn("Converting input from {} to {} for compatibility."
.format(a.dtype, np.uint8),
RuntimeWarning, stacklevel=3)
a = a.astype(np.uint8) # 将布尔类型转换为无符号整数类型
if weights is not None:
weights = np.asarray(weights) # 将 weights 转换为 NumPy 数组
if weights.shape != a.shape:
raise ValueError(
'weights should have the same shape as a.') # 抛出异常,如果 weights 的形状与 a 不匹配
weights = weights.ravel() # 展平 weights
a = a.ravel() # 展平 a
return a, weights
def _get_outer_edges(a, range):
""" 确定要使用的外部箱边缘,可以从数据或 range 参数中获取 """
# 如果指定了范围参数,则进行范围检查和设置首尾边界
if range is not None:
# 解构赋值得到范围的首尾边界
first_edge, last_edge = range
# 如果首边界大于尾边界,抛出数值错误异常
if first_edge > last_edge:
raise ValueError(
'max must be larger than min in range parameter.')
# 如果首尾边界中有任一不是有限数,则抛出数值错误异常
if not (np.isfinite(first_edge) and np.isfinite(last_edge)):
raise ValueError(
"supplied range of [{}, {}] is not finite".format(first_edge, last_edge))
# 如果数组 a 是空数组,则处理空数组情况,设定范围为 0 到 1
elif a.size == 0:
# 处理空数组情况,设定范围的首尾边界为 0 和 1
first_edge, last_edge = 0, 1
else:
# 否则,自动检测数组 a 的范围,设定首尾边界为数组的最小值和最大值
first_edge, last_edge = a.min(), a.max()
# 如果自动检测得到的首尾边界中有任一不是有限数,则抛出数值错误异常
if not (np.isfinite(first_edge) and np.isfinite(last_edge)):
raise ValueError(
"autodetected range of [{}, {}] is not finite".format(first_edge, last_edge))
# 扩展空范围以避免除以零的情况
if first_edge == last_edge:
# 如果首尾边界相等,则向首尾边界分别增加 0.5 和减少 0.5
first_edge = first_edge - 0.5
last_edge = last_edge + 0.5
# 返回计算得到的首尾边界
return first_edge, last_edge
# 解析重载的 bins 参数
n_equal_bins = None
bin_edges = None
# 如果 bins 是字符串,表示使用自动方法确定的 bin 数量
if isinstance(bins, str):
bin_name = bins
# 如果 bin_name 不在 _hist_bin_selectors 中,抛出 ValueError 异常
if bin_name not in _hist_bin_selectors:
raise ValueError(
"{!r} is not a valid estimator for `bins`".format(bin_name))
# 如果 weights 不为 None,不支持带权数据的自动估计 bin 数量,抛出 TypeError 异常
if weights is not None:
raise TypeError("Automated estimation of the number of "
"bins is not supported for weighted data")
# 获取数据的外侧边界
first_edge, last_edge = _get_outer_edges(a, range)
# 如果指定了 range,根据范围截取数据
if range is not None:
keep = (a >= first_edge)
keep &= (a <= last_edge)
if not np.logical_and.reduce(keep):
a = a[keep]
# 如果数据为空数组,设置默认的 bin 数量为 1
if a.size == 0:
n_equal_bins = 1
else:
# 根据选择器计算 bin 的宽度
width = _hist_bin_selectors[bin_name](a, (first_edge, last_edge))
if width:
# 对于整数类型的数组,确保宽度至少为 1
if np.issubdtype(a.dtype, np.integer) and width < 1:
width = 1
# 计算等宽 bin 的数量
n_equal_bins = int(np.ceil(_unsigned_subtract(last_edge, first_edge) / width))
else:
# 对于某些估计器,如 FD 当数据的 IQR 为零时,宽度可能为零
n_equal_bins = 1
# 如果 bins 是零维数组,将其解释为整数数量的 bin
elif np.ndim(bins) == 0:
try:
n_equal_bins = operator.index(bins)
except TypeError as e:
raise TypeError(
'`bins` must be an integer, a string, or an array') from e
# 如果指定的 bin 数量小于 1,抛出 ValueError 异常
if n_equal_bins < 1:
raise ValueError('`bins` must be positive, when an integer')
# 获取数据的外侧边界
first_edge, last_edge = _get_outer_edges(a, range)
# 如果 bins 是一维数组,将其解释为 bin 的边缘值
elif np.ndim(bins) == 1:
bin_edges = np.asarray(bins)
# 检查 bin 边缘值是否单调递增
if np.any(bin_edges[:-1] > bin_edges[1:]):
raise ValueError(
'`bins` must increase monotonically, when an array')
# 如果 bins 不符合上述条件,抛出 ValueError 异常
else:
raise ValueError('`bins` must be 1d, when an array')
# 如果成功确定了 bin 数量
if n_equal_bins is not None:
# 确定 bin 边缘值的数据类型,以确保类型一致性
bin_type = np.result_type(first_edge, last_edge, a)
if np.issubdtype(bin_type, np.integer):
bin_type = np.result_type(bin_type, float)
# 计算等间距的 bin 边缘值
bin_edges = np.linspace(
first_edge, last_edge, n_equal_bins + 1,
endpoint=True, dtype=bin_type)
# 返回计算得到的 bin 边缘值以及相关信息
return bin_edges, (first_edge, last_edge, n_equal_bins)
else:
# 如果无法确定 bin 数量,则返回已有的 bin 边缘值及空信息
return bin_edges, None
# 定义一个函数 `_search_sorted_inclusive`,用于在数组 `a` 中查找值 `v` 的位置,使得最后一个 `v` 的位置在右侧。
# 在直方图的上下文中,这样可以使最后一个箱边界是包含的。
def _search_sorted_inclusive(a, v):
return np.concatenate((
# 查找数组 `a` 中比 `v[:-1]` 中每个值大的最小索引,返回索引数组
a.searchsorted(v[:-1], 'left'),
# 查找数组 `a` 中比 `v[-1:]` 中每个值大或相等的最小索引,返回索引数组
a.searchsorted(v[-1:], 'right')
))
# 定义一个调度函数 `_histogram_bin_edges_dispatcher`,返回元组 `(a, bins, weights)`
def _histogram_bin_edges_dispatcher(a, bins=None, range=None, weights=None):
return (a, bins, weights)
# 使用装饰器 `array_function_dispatch` 注册 `histogram_bin_edges` 函数,调度函数为 `_histogram_bin_edges_dispatcher`
@array_function_dispatch(_histogram_bin_edges_dispatcher)
# 定义函数 `histogram_bin_edges`,计算用于 `histogram` 函数的直方图的箱边界
# 函数文档字符串说明了其功能及参数含义
def histogram_bin_edges(a, bins=10, range=None, weights=None):
r"""
Function to calculate only the edges of the bins used by the `histogram`
function.
Parameters
----------
a : array_like
Input data. The histogram is computed over the flattened array.
bins : int or sequence of scalars or str, optional
If `bins` is an int, it defines the number of equal-width
bins in the given range (10, by default). If `bins` is a
sequence, it defines the bin edges, including the rightmost
edge, allowing for non-uniform bin widths.
If `bins` is a string from the list below, `histogram_bin_edges` will
use the method chosen to calculate the optimal bin width and
consequently the number of bins (see the Notes section for more detail
on the estimators) from the data that falls within the requested range.
While the bin width will be optimal for the actual data
in the range, the number of bins will be computed to fill the
entire range, including the empty portions. For visualisation,
using the 'auto' option is suggested. Weighted data is not
supported for automated bin size selection.
'auto'
Minimum bin width between the 'sturges' and 'fd' estimators.
Provides good all-around performance.
'fd' (Freedman Diaconis Estimator)
Robust (resilient to outliers) estimator that takes into
account data variability and data size.
'doane'
An improved version of Sturges' estimator that works better
with non-normal datasets.
'scott'
Less robust estimator that takes into account data variability
and data size.
'stone'
Estimator based on leave-one-out cross-validation estimate of
the integrated squared error. Can be regarded as a generalization
of Scott's rule.
'rice'
Estimator does not take variability into account, only data
size. Commonly overestimates number of bins required.
'sturges'
R's default method, only accounts for data size. Only
optimal for gaussian data and underestimates number of bins
for large non-gaussian datasets.
'sqrt'
Square root (of data size) estimator, used by Excel and
other programs for its speed and simplicity.
# range : (float, float), optional
# 箱线图的箱体范围,即数据分布的上下限。如果未提供,将使用数据的最小和最大值。
# 超出此范围的值将被忽略。范围的第一个元素必须小于或等于第二个元素。
# `range` 也会影响自动计算的箱子数量。虽然箱宽是根据 `range` 内的实际数据计算的,
# 但箱子的数量将填充整个范围,包括不包含数据的部分。
# weights : array_like, optional
# 与 `a` 具有相同形状的权重数组。`a` 中的每个值仅将其关联的权重贡献到箱子计数中(而不是 1)。
# 目前没有任何箱子估算器使用这个参数,但将来可能会使用。
# Returns
# -------
# bin_edges : array of dtype float
# 传递给 `histogram` 的边界值数组。
# See Also
# --------
# histogram
# Notes
# -----
# 估算最佳箱子数量的方法在文献中有充分的基础,并受到了 R 提供的直方图可视化选择的启发。
# 注意,将箱子数量与 :math:`n^{1/3}` 成比例是渐近最优的,这也是大多数估算器中的选择。
# 这些仅仅是提供箱子数量的良好起始点的插入式方法。在下面的方程中,:math:`h` 是箱宽,:math:`n_h` 是箱子数量。
# 所有计算箱子计数的估算器都使用数据的 `ptp` 重新调整到箱宽。最终的箱子数量是通过 ``np.round(np.ceil(range / h))`` 获得的。
# 最终的箱宽通常小于估算器返回的宽度。
# 'auto'('sturges' 和 'fd' 估算器的最小箱宽)
# 一个折中的选择,以获取一个良好的值。对于小数据集,通常会选择 Sturges 的值,
# 而对于较大数据集,通常会默认使用 FD。避免了对小和大数据集分别过于保守的行为。
# 切换点通常是 :math:`a.size \approx 1000`。
# 'fd'(Freedman Diaconis 估算器)
# .. math:: h = 2 \frac{IQR}{n^{1/3}}
#
# 箱宽与四分位间距(IQR)成正比,与 :math:`a.size` 的立方根成反比。对于大数据集效果很好,
# 但对于小数据集可能过于保守。IQR 对异常值非常健壮。
# 'scott'
# .. math:: h = \sigma \sqrt[3]{\frac{24 \sqrt{\pi}}{n}}
#
# 箱宽与数据的标准差成正比,与 ``x.size`` 的立方根成反比。对于大数据集效果很好,
# 但对于小数据集可能过于保守。标准差对异常值不太健壮。其值在没有异常值时与 Freedman-Diaconis 估算器非常相似。
# 将数组 `a` 和权重 `weights` 进行展平并检查
a, weights = _ravel_and_check_weights(a, weights)
# 根据输入的数据 `a`、分箱策略 `bins`、数据范围 `range` 和权重 `weights`,获取分箱的边界
bin_edges, _ = _get_bin_edges(a, bins, range, weights)
# 返回计算得到的分箱边界 `bin_edges`,用于直方图分析
return bin_edges
# 定义函数 _histogram_dispatcher,用于直接返回传入的参数 a, bins, weights
def _histogram_dispatcher(
a, bins=None, range=None, density=None, weights=None):
return (a, bins, weights)
# 使用装饰器 array_function_dispatch 将 _histogram_dispatcher 与 histogram 函数关联
@array_function_dispatch(_histogram_dispatcher)
# 定义函数 histogram,用于计算数据集的直方图
def histogram(a, bins=10, range=None, density=None, weights=None):
r"""
计算数据集的直方图。
Parameters
----------
a : array_like
输入数据。直方图计算将在扁平化的数组上进行。
bins : int or sequence of scalars or str, optional
如果 bins 是 int,则定义给定范围内的等宽 bins 的数量(默认为 10)。
如果 bins 是 sequence,则定义一个单调递增的 bin 边缘数组,包括最右边的边缘,允许非均匀的 bin 宽度。
.. versionadded:: 1.11.0
如果 bins 是 str,则定义用于计算最佳 bin 宽度的方法,由 histogram_bin_edges 定义。
range : (float, float), optional
bins 的下限和上限。如果未提供,则 range 简单地为 (a.min(), a.max())。
超出范围的值将被忽略。范围的第一个元素必须小于或等于第二个元素。range 也会影响自动 bin 计算。
虽然根据实际数据计算范围内的最佳 bin 宽度,但 bin 计数将填充整个范围,包括不包含数据的部分。
weights : array_like, optional
与 a 相同形状的权重数组。a 中的每个值仅对 bin 计数贡献其关联的权重(而不是 1)。
如果 density 为 True,则对权重进行归一化,使得范围内密度的积分保持为 1。
请注意,weights 的 dtype 也将成为返回的累加器(hist)的 dtype,因此必须足够大以容纳累积值。
density : bool, optional
如果为 False,则结果将包含每个 bin 中的样本数。如果为 True,则结果是 bin 处概率密度函数的值,
归一化使得范围内的积分为 1。请注意,直方图值的总和不会等于 1,除非选择单位宽度的 bins;它不是概率质量函数。
Returns
-------
hist : array
直方图的值。有关可能语义的描述,请参阅 density 和 weights。
如果给定了 weights,则 hist.dtype 将从 weights 中获取。
bin_edges : array of dtype float
返回 bin 边缘的数组(长度为 hist+1)。
See Also
--------
histogramdd, bincount, searchsorted, digitize, histogram_bin_edges
Notes
-----
所有但最后一个(最右侧的)bin 都是半开放的。换句话说,如果 bins 是::
[1, 2, 3, 4]
"""
a, weights = _ravel_and_check_weights(a, weights)
# 将输入数组 a 和权重 weights 展平并检查它们的格式
bin_edges, uniform_bins = _get_bin_edges(a, bins, range, weights)
# 根据输入数组 a、分bin规则 bins、范围 range 和权重 weights 获取 bin 边界和 uniform_bins
# uniform_bins 是一个布尔值,指示是否使用均匀分bin
# 根据权重的数据类型选择直方图数组的数据类型
if weights is None:
ntype = np.dtype(np.intp)
else:
ntype = weights.dtype
# 设置块大小,以便在计算直方图时能够迭代处理块,从而最小化内存使用
BLOCK = 65536
# 快速路径使用 bincount 函数,但仅适用于特定类型的权重
simple_weights = (
weights is None or
np.can_cast(weights.dtype, np.double) or
np.can_cast(weights.dtype, complex)
)
"""
# 如果 uniform_bins 和 simple_weights 都不为 None,则执行以下代码块
if uniform_bins is not None and simple_weights:
# 使用快速算法生成等宽的直方图
# 这里假设了等宽的箱体,基于这个假设转换 a 中的值为箱子的索引是有效的
# 从 uniform_bins 中解包出第一个边缘、最后一个边缘和等宽箱子的数量
first_edge, last_edge, n_equal_bins = uniform_bins
# 初始化一个空的直方图 n,长度为 n_equal_bins,数据类型为 ntype
n = np.zeros(n_equal_bins, ntype)
# 预先计算直方图的缩放因子
norm_numerator = n_equal_bins
norm_denom = _unsigned_subtract(last_edge, first_edge)
# 我们在这里迭代块的原因有两个:首先,对于大数组来说,这样做实际上更快(例如对于一个 10^8 大小的数组,快两倍),
# 其次,在大数组的极限情况下,它会降低内存占用 3 倍。
for i in _range(0, len(a), BLOCK):
# 从数组 a 中获取块大小为 BLOCK 的子数组 tmp_a
tmp_a = a[i:i+BLOCK]
# 如果 weights 为 None,则 tmp_w 也为 None;否则,获取对应的 weights 子数组 tmp_w
if weights is None:
tmp_w = None
else:
tmp_w = weights[i:i + BLOCK]
# 仅保留落在指定范围内的值
keep = (tmp_a >= first_edge)
keep &= (tmp_a <= last_edge)
if not np.logical_and.reduce(keep):
tmp_a = tmp_a[keep]
if tmp_w is not None:
tmp_w = tmp_w[keep]
# 确保类型转换在这里执行,以避免下面出现不可预测的精度错误
tmp_a = tmp_a.astype(bin_edges.dtype, copy=False)
# 计算箱子的索引,对于恰好在 last_edge 上的值,需要减去一个
f_indices = ((_unsigned_subtract(tmp_a, first_edge) / norm_denom)
* norm_numerator)
indices = f_indices.astype(np.intp)
indices[indices == n_equal_bins] -= 1
# 索引计算可能在箱子边缘附近 ±1 个单位内不一致
# 如果 tmp_a 小于 bin_edges[indices],则减 1
decrement = tmp_a < bin_edges[indices]
indices[decrement] -= 1
# 最后一个箱子包括右边缘,其它箱子不包括
increment = ((tmp_a >= bin_edges[indices + 1])
& (indices != n_equal_bins - 1))
indices[increment] += 1
# 使用 bincount 计算直方图
if ntype.kind == 'c':
# 如果 ntype 的种类是复数,则分别对实部和虚部进行加权求和
n.real += np.bincount(indices, weights=tmp_w.real,
minlength=n_equal_bins)
n.imag += np.bincount(indices, weights=tmp_w.imag,
minlength=n_equal_bins)
else:
# 否则,直接对 n 进行加权求和,并转换为 ntype 类型
n += np.bincount(indices, weights=tmp_w,
minlength=n_equal_bins).astype(ntype)
# 如果指定了 density 参数,则计算密度而不是直方图
if density:
# 计算直方图的累积分布
cum_n = np.zeros(bin_edges.shape, ntype)
# 如果未提供权重,则按块排序并计算累积直方图
if weights is None:
for i in _range(0, len(a), BLOCK):
# 对每个块的数据进行排序
sa = np.sort(a[i:i+BLOCK])
# 将排序后的数据加入累积直方图
cum_n += _search_sorted_inclusive(sa, bin_edges)
else:
zero = np.zeros(1, dtype=ntype)
for i in _range(0, len(a), BLOCK):
# 按块对数据和权重进行排序
tmp_a = a[i:i+BLOCK]
tmp_w = weights[i:i+BLOCK]
sorting_index = np.argsort(tmp_a)
sa = tmp_a[sorting_index]
sw = tmp_w[sorting_index]
# 计算权重的累积和
cw = np.concatenate((zero, sw.cumsum()))
# 找到排序后的数据在分 bin_edges 中的位置并加入累积直方图
bin_index = _search_sorted_inclusive(sa, bin_edges)
cum_n += cw[bin_index]
# 计算直方图的值,即累积分布的差值
n = np.diff(cum_n)
# 计算直方图密度,并返回密度和 bin_edges
db = np.array(np.diff(bin_edges), float)
return n/db/n.sum(), bin_edges
# 如果没有指定 density 参数,则计算普通直方图
# 计算直方图的累积分布
cum_n = np.zeros(bin_edges.shape, ntype)
# 如果未提供权重,则按块排序并计算累积直方图
if weights is None:
for i in _range(0, len(a), BLOCK):
# 对每个块的数据进行排序
sa = np.sort(a[i:i+BLOCK])
# 将排序后的数据加入累积直方图
cum_n += _search_sorted_inclusive(sa, bin_edges)
else:
zero = np.zeros(1, dtype=ntype)
for i in _range(0, len(a), BLOCK):
# 按块对数据和权重进行排序
tmp_a = a[i:i+BLOCK]
tmp_w = weights[i:i+BLOCK]
sorting_index = np.argsort(tmp_a)
sa = tmp_a[sorting_index]
sw = tmp_w[sorting_index]
# 计算权重的累积和
cw = np.concatenate((zero, sw.cumsum()))
# 找到排序后的数据在分 bin_edges 中的位置并加入累积直方图
bin_index = _search_sorted_inclusive(sa, bin_edges)
cum_n += cw[bin_index]
# 计算直方图的值,即累积分布的差值
n = np.diff(cum_n)
# 返回直方图和 bin_edges
return n, bin_edges
# 定义 _histogramdd_dispatcher 函数,用于根据输入参数的类型分派到合适的处理函数
def _histogramdd_dispatcher(sample, bins=None, range=None, density=None,
weights=None):
# 如果 sample 具有 shape 属性,则返回该样本数据(与 histogramdd 中的条件相同)
if hasattr(sample, 'shape'):
yield sample # 返回 sample
else:
yield from sample # 否则,逐个返回 sample 中的元素
# 使用上下文管理器忽略 TypeError 异常
with contextlib.suppress(TypeError):
yield from bins # 逐个返回 bins 中的元素
yield weights # 返回 weights
# 通过 array_function_dispatch 装饰器将 _histogramdd_dispatcher 函数与 histogramdd 函数关联起来
@array_function_dispatch(_histogramdd_dispatcher)
def histogramdd(sample, bins=10, range=None, density=None, weights=None):
"""
Compute the multidimensional histogram of some data.
Parameters
----------
sample : (N, D) array, or (N, D) array_like
The data to be histogrammed.
Note the unusual interpretation of sample when an array_like:
* When an array, each row is a coordinate in a D-dimensional space -
such as ``histogramdd(np.array([p1, p2, p3]))``.
* When an array_like, each element is the list of values for single
coordinate - such as ``histogramdd((X, Y, Z))``.
The first form should be preferred.
bins : sequence or int, optional
The bin specification:
* A sequence of arrays describing the monotonically increasing bin
edges along each dimension.
* The number of bins for each dimension (nx, ny, ... =bins)
* The number of bins for all dimensions (nx=ny=...=bins).
range : sequence, optional
A sequence of length D, each an optional (lower, upper) tuple giving
the outer bin edges to be used if the edges are not given explicitly in
`bins`.
An entry of None in the sequence results in the minimum and maximum
values being used for the corresponding dimension.
The default, None, is equivalent to passing a tuple of D None values.
density : bool, optional
If False, the default, returns the number of samples in each bin.
If True, returns the probability *density* function at the bin,
``bin_count / sample_count / bin_volume``.
weights : (N,) array_like, optional
An array of values `w_i` weighing each sample `(x_i, y_i, z_i, ...)`.
Weights are normalized to 1 if density is True. If density is False,
the values of the returned histogram are equal to the sum of the
weights belonging to the samples falling into each bin.
Returns
-------
H : ndarray
The multidimensional histogram of sample x. See density and weights
for the different possible semantics.
edges : tuple of ndarrays
A tuple of D arrays describing the bin edges for each dimension.
See Also
--------
histogram: 1-D histogram
histogram2d: 2-D histogram
Examples
--------
>>> rng = np.random.default_rng()
>>> r = rng.normal(size=(100,3))
>>> H, edges = np.histogramdd(r, bins = (5, 8, 4))
>>> H.shape, edges[0].size, edges[1].size, edges[2].size
((5, 8, 4), 6, 9, 5)
"""
try:
# 尝试获取样本数据的形状信息
N, D = sample.shape
except (AttributeError, ValueError):
# 如果捕获到 AttributeError 或者 ValueError 异常,则执行以下代码块
# 将 sample 转换为至少是二维数组
sample = np.atleast_2d(sample).T
# 获取样本的行数 N 和维度 D
nbin = np.empty(D, np.intp)
edges = D*[None]
dedges = D*[None]
if weights is not None:
# 如果 weights 不为 None,则将其转换为 NumPy 数组
weights = np.asarray(weights)
try:
M = len(bins)
if M != D:
# 如果 bins 的长度 M 不等于样本的维度 D,则抛出 ValueError 异常
raise ValueError(
'The dimension of bins must be equal to the dimension of the '
'sample x.')
except TypeError:
# 如果 bins 是一个整数,则将其扩展为包含 D 个元素的列表
bins = D*[bins]
# 标准化 range 参数
if range is None:
# 如果 range 为 None,则设置为 D 个 None 组成的元组
range = (None,) * D
elif len(range) != D:
# 如果 range 的长度不等于样本的维度 D,则抛出 ValueError 异常
raise ValueError('range argument must have one entry per dimension')
# 创建边缘数组
for i in _range(D):
if np.ndim(bins[i]) == 0:
# 如果 bins[i] 是标量
if bins[i] < 1:
# 如果 bins[i] 小于 1,则抛出 ValueError 异常
raise ValueError(
'`bins[{}]` must be positive, when an integer'.format(i))
# 获取 sample[:,i] 的最小值 smin 和最大值 smax
smin, smax = _get_outer_edges(sample[:,i], range[i])
try:
# 尝试将 bins[i] 转换为整数
n = operator.index(bins[i])
except TypeError as e:
# 如果失败,则抛出 TypeError 异常
raise TypeError(
"`bins[{}]` must be an integer, when a scalar".format(i)
) from e
# 使用 linspace 生成边缘数组 edges[i]
edges[i] = np.linspace(smin, smax, n + 1)
elif np.ndim(bins[i]) == 1:
# 如果 bins[i] 是一维数组,则直接赋值给 edges[i]
edges[i] = np.asarray(bins[i])
# 检查 edges[i] 是否严格单调递增
if np.any(edges[i][:-1] > edges[i][1:]):
# 如果不是单调递增,则抛出 ValueError 异常
raise ValueError(
'`bins[{}]` must be monotonically increasing, when an array'
.format(i))
else:
# 如果 bins[i] 不是标量或一维数组,则抛出 ValueError 异常
raise ValueError(
'`bins[{}]` must be a scalar or 1d array'.format(i))
# 计算 nbin[i],包括两个边界点
nbin[i] = len(edges[i]) + 1 # 包括每个边界上的一个点
# 计算 dedges[i],edges[i] 中每相邻两点的差值
dedges[i] = np.diff(edges[i])
# 计算每个样本落入的箱子编号
Ncount = tuple(
# 避免使用 np.digitize 来解决 gh-11022 的问题
np.searchsorted(edges[i], sample[:, i], side='right')
for i in _range(D)
)
# 使用 digitize 函数,将落在边缘上的值放入右侧的箱中
# 对于最右侧的箱子,希望等于右边界的值计入最后一个箱子,而不算作异常值。
for i in _range(D):
# 找出落在最右侧边界上的点
on_edge = (sample[:, i] == edges[i][-1])
# 将这些点向左移动一个箱子
Ncount[i][on_edge] -= 1
# 计算在扁平化直方图矩阵中的样本索引
# 如果数组过大,这会引发错误
xy = np.ravel_multi_index(Ncount, nbin)
# 计算 xy 中每个值的重复次数,并将其分配给扁平化的 histmat
hist = np.bincount(xy, weights, minlength=nbin.prod())
# 转换成正确形状的矩阵
hist = hist.reshape(nbin)
# 暂时保留 gh-7845 中观察到的(不良)行为
hist = hist.astype(float, casting='safe')
# 去除异常值(每个维度的第一个和最后一个索引)后的核心数据
core = D*(slice(1, -1),)
# 根据核心数据重新定义直方图
hist = hist[core]
if density:
# 计算概率密度函数
s = hist.sum()
for i in _range(D):
# 创建形状数组,用于除法操作
shape = np.ones(D, int)
shape[i] = nbin[i] - 2
hist = hist / dedges[i].reshape(shape)
# 归一化直方图以得到概率密度
hist /= s
if (hist.shape != nbin - 2).any():
# 如果直方图形状与预期形状不符,则抛出运行时错误
raise RuntimeError(
"Internal Shape Error")
# 返回处理后的直方图和边界
return hist, edges
.\numpy\numpy\lib\_histograms_impl.pyi
from collections.abc import Sequence
from typing import (
Literal as L,
Any,
SupportsIndex,
)
from numpy._typing import (
NDArray,
ArrayLike,
)
_BinKind = L[
"stone",
"auto",
"doane",
"fd",
"rice",
"scott",
"sqrt",
"sturges",
]
__all__: list[str]
def histogram_bin_edges(
a: ArrayLike,
bins: _BinKind | SupportsIndex | ArrayLike = ...,
range: None | tuple[float, float] = ...,
weights: None | ArrayLike = ...,
) -> NDArray[Any]: ...
def histogram(
a: ArrayLike,
bins: _BinKind | SupportsIndex | ArrayLike = ...,
range: None | tuple[float, float] = ...,
density: bool = ...,
weights: None | ArrayLike = ...,
) -> tuple[NDArray[Any], NDArray[Any]]: ...
def histogramdd(
sample: ArrayLike,
bins: SupportsIndex | ArrayLike = ...,
range: Sequence[tuple[float, float]] = ...,
density: None | bool = ...,
weights: None | ArrayLike = ...,
) -> tuple[NDArray[Any], tuple[NDArray[Any], ...]]: ...
.\numpy\numpy\lib\_index_tricks_impl.py
import functools
import sys
import math
import warnings
import numpy as np
from .._utils import set_module
import numpy._core.numeric as _nx
from numpy._core.numeric import ScalarType, array
from numpy._core.numerictypes import issubdtype
import numpy.matrixlib as matrixlib
from numpy._core.multiarray import ravel_multi_index, unravel_index
from numpy._core import overrides, linspace
from numpy.lib.stride_tricks import as_strided
from numpy.lib._function_base_impl import diff
array_function_dispatch = functools.partial(
overrides.array_function_dispatch, module='numpy')
__all__ = [
'ravel_multi_index', 'unravel_index', 'mgrid', 'ogrid', 'r_', 'c_',
's_', 'index_exp', 'ix_', 'ndenumerate', 'ndindex', 'fill_diagonal',
'diag_indices', 'diag_indices_from'
]
def _ix__dispatcher(*args):
return args
@array_function_dispatch(_ix__dispatcher)
def ix_(*args):
"""
Construct an open mesh from multiple sequences.
This function takes N 1-D sequences and returns N outputs with N
dimensions each, such that the shape is 1 in all but one dimension
and the dimension with the non-unit shape value cycles through all
N dimensions.
Using `ix_` one can quickly construct index arrays that will index
the cross product. ``a[np.ix_([1,3],[2,5])]`` returns the array
``[[a[1,2] a[1,5]], [a[3,2] a[3,5]]]``.
Parameters
----------
args : 1-D sequences
Each sequence should be of integer or boolean type.
Boolean sequences will be interpreted as boolean masks for the
corresponding dimension (equivalent to passing in
``np.nonzero(boolean_sequence)``).
Returns
-------
out : tuple of ndarrays
N arrays with N dimensions each, with N the number of input
sequences. Together these arrays form an open mesh.
See Also
--------
ogrid, mgrid, meshgrid
Examples
--------
>>> a = np.arange(10).reshape(2, 5)
>>> a
array([[0, 1, 2, 3, 4],
[5, 6, 7, 8, 9]])
>>> ixgrid = np.ix_([0, 1], [2, 4])
>>> ixgrid
(array([[0],
[1]]), array([[2, 4]]))
>>> ixgrid[0].shape, ixgrid[1].shape
((2, 1), (1, 2))
>>> a[ixgrid]
array([[2, 4],
[7, 9]])
>>> ixgrid = np.ix_([True, True], [2, 4])
>>> a[ixgrid]
array([[2, 4],
[7, 9]])
>>> ixgrid = np.ix_([True, True], [False, False, True, False, True])
>>> a[ixgrid]
array([[2, 4],
[7, 9]])
"""
out = []
nd = len(args)
for k, new in enumerate(args):
if not isinstance(new, _nx.ndarray):
new = np.asarray(new)
if new.size == 0:
new = new.astype(_nx.intp)
if new.ndim != 1:
raise ValueError("Cross index must be 1 dimensional")
if issubdtype(new.dtype, _nx.bool):
new, = new.nonzero()
new = new.reshape((1,)*k + (new.size,) + (1,)*(nd-k-1))
out.append(new)
return tuple(out)
class nd_grid:
"""
Construct a multi-dimensional "meshgrid".
``grid = nd_grid()`` creates an instance which will return a mesh-grid
when indexed. The dimension and number of the output arrays are equal
to the number of indexing dimensions. If the step length is not a
complex number, then the stop is not inclusive.
However, if the step length is a **complex number** (e.g. 5j), then the
integer part of its magnitude is interpreted as specifying the
number of points to create between the start and stop values, where
the stop value **is inclusive**.
If instantiated with an argument of ``sparse=True``, the mesh-grid is
open (or not fleshed out) so that only one-dimension of each returned
argument is greater than 1.
Parameters
----------
sparse : bool, optional
Whether the grid is sparse or not. Default is False.
Notes
-----
Two instances of `nd_grid` are made available in the NumPy namespace,
`mgrid` and `ogrid`, approximately defined as::
mgrid = nd_grid(sparse=False)
ogrid = nd_grid(sparse=True)
Users should use these pre-defined instances instead of using `nd_grid`
directly.
"""
def __init__(self, sparse=False):
self.sparse = sparse
def __getitem__(self, key):
try:
size = []
num_list = [0]
for k in range(len(key)):
step = key[k].step
start = key[k].start
stop = key[k].stop
if start is None:
start = 0
if step is None:
step = 1
if isinstance(step, (_nx.complexfloating, complex)):
step = abs(step)
size.append(int(step))
else:
size.append(
int(math.ceil((stop - start) / (step*1.0))))
num_list += [start, stop, step]
typ = _nx.result_type(*num_list)
if self.sparse:
nn = [_nx.arange(_x, dtype=_t)
for _x, _t in zip(size, (typ,)*len(size))]
else:
nn = _nx.indices(size, typ)
for k, kk in enumerate(key):
step = kk.step
start = kk.start
if start is None:
start = 0
if step is None:
step = 1
if isinstance(step, (_nx.complexfloating, complex)):
step = int(abs(step))
if step != 1:
step = (kk.stop - start) / float(step - 1)
nn[k] = (nn[k]*step+start)
if self.sparse:
slobj = [_nx.newaxis]*len(size)
for k in range(len(size)):
slobj[k] = slice(None, None)
nn[k] = nn[k][tuple(slobj)]
slobj[k] = _nx.newaxis
return tuple(nn)
return nn
except (IndexError, TypeError):
step = key.step
stop = key.stop
start = key.start
if start is None:
start = 0
if isinstance(step, (_nx.complexfloating, complex)):
step_float = abs(step)
step = length = int(step_float)
if step != 1:
step = (key.stop-start)/float(step-1)
typ = _nx.result_type(start, stop, step_float)
return _nx.arange(0, length, 1, dtype=typ)*step + start
else:
return _nx.arange(start, stop, step)
class MGridClass(nd_grid):
"""
An instance which returns a dense multi-dimensional "meshgrid".
An instance which returns a dense (or fleshed out) mesh-grid
when indexed, so that each returned argument has the same shape.
The dimensions and number of the output arrays are equal to the
number of indexing dimensions. If the step length is not a complex
number, then the stop is not inclusive.
However, if the step length is a **complex number** (e.g. 5j), then
the integer part of its magnitude is interpreted as specifying the
number of points to create between the start and stop values, where
the stop value **is inclusive**.
Returns
-------
mesh-grid : ndarray
A single array, containing a set of `ndarray`\ s all of the same
dimensions stacked along the first axis.
See Also
--------
ogrid : like `mgrid` but returns open (not fleshed out) mesh grids
meshgrid: return coordinate matrices from coordinate vectors
r_ : array concatenator
:ref:`how-to-partition`
Examples
--------
>>> np.mgrid[0:5, 0:5]
array([[[0, 0, 0, 0, 0],
[1, 1, 1, 1, 1],
[2, 2, 2, 2, 2],
[3, 3, 3, 3, 3],
[4, 4, 4, 4, 4]],
[[0, 1, 2, 3, 4],
[0, 1, 2, 3, 4],
[0, 1, 2, 3, 4],
[0, 1, 2, 3, 4],
[0, 1, 2, 3, 4]]])
>>> np.mgrid[-1:1:5j]
array([-1. , -0.5, 0. , 0.5, 1. ])
>>> np.mgrid[0:4].shape
(4,)
>>> np.mgrid[0:4, 0:5].shape
(2, 4, 5)
>>> np.mgrid[0:4, 0:5, 0:6].shape
(3, 4, 5, 6)
"""
def __init__(self):
super().__init__(sparse=False)
mgrid = MGridClass()
class OGridClass(nd_grid):
"""
An instance which returns an open multi-dimensional "meshgrid".
An instance which returns an open (i.e. not fleshed out) mesh-grid
when indexed, so that only one dimension of each returned array is
greater than 1. The dimension and number of the output arrays are
equal to the number of indexing dimensions. If the step length is
not a complex number, then the stop is not inclusive.
However, if the step length is a **complex number** (e.g. 5j), then
the integer part of its magnitude is interpreted as specifying the
number of points to create between the start and stop values, where
the stop value **is inclusive**.
Returns
-------
mesh-grid : ndarray or tuple of ndarrays
If the input is a single slice, returns an array.
If the input is multiple slices, returns a tuple of arrays, with
only one dimension not equal to 1.
See Also
--------
mgrid : like `ogrid` but returns dense (or fleshed out) mesh grids
meshgrid: return coordinate matrices from coordinate vectors
r_ : array concatenator
:ref:`how-to-partition`
Examples
--------
>>> from numpy import ogrid
>>> ogrid[-1:1:5j]
"""
array([-1. , -0.5, 0. , 0.5, 1. ])
>>> ogrid[0:5, 0:5]
(array([[0],
[1],
[2],
[3],
[4]]),
array([[0, 1, 2, 3, 4]]))
"""
# 创建一个包含指定值的一维数组
# 然后生成一个二维的 meshgrid 结果
# 第一个数组包含从 0 到 4 的行索引
# 第二个数组包含从 0 到 4 的列索引
# 这两个数组可以用来表示一个 5x5 的格点
def __init__(self):
# 调用父类的构造函数,并设置稀疏模式为 True
super().__init__(sparse=True)
# 创建一个名为ogrid的OGridClass类的实例对象
ogrid = OGridClass()
class AxisConcatenator:
"""
Translates slice objects to concatenation along an axis.
For detailed documentation on usage, see `r_`.
"""
# 定义静态方法concatenate,用于数组的连接操作
concatenate = staticmethod(_nx.concatenate)
# 定义静态方法makemat,用于生成矩阵对象
makemat = staticmethod(matrixlib.matrix)
def __init__(self, axis=0, matrix=False, ndmin=1, trans1d=-1):
# 初始化函数,设置对象的轴向、矩阵标志、最小维数和1维转置
self.axis = axis
self.matrix = matrix
self.trans1d = trans1d
self.ndmin = ndmin
def __len__(self):
# 返回对象的长度,这里总是返回0
return 0
# 在这里使用单独的类而不是简单地使用r_ = concatentor(0)等的原因是为了确保在帮助文档中能正确显示doc字符串
class RClass(AxisConcatenator):
"""
Translates slice objects to concatenation along the first axis.
This is a simple way to build up arrays quickly. There are two use cases.
1. If the index expression contains comma separated arrays, then stack
them along their first axis.
2. If the index expression contains slice notation or scalars then create
a 1-D array with a range indicated by the slice notation.
If slice notation is used, the syntax ``start:stop:step`` is equivalent
to ``np.arange(start, stop, step)`` inside of the brackets. However, if
``step`` is an imaginary number (i.e. 100j) then its integer portion is
interpreted as a number-of-points desired and the start and stop are
inclusive. In other words ``start:stop:stepj`` is interpreted as
``np.linspace(start, stop, step, endpoint=1)`` inside of the brackets.
After expansion of slice notation, all comma separated sequences are
concatenated together.
Optional character strings placed as the first element of the index
expression can be used to change the output. The strings 'r' or 'c' result
in matrix output. If the result is 1-D and 'r' is specified a 1 x N (row)
matrix is produced. If the result is 1-D and 'c' is specified, then a N x 1
(column) matrix is produced. If the result is 2-D then both provide the
same matrix result.
A string integer specifies which axis to stack multiple comma separated
arrays along. A string of two comma-separated integers allows indication
of the minimum number of dimensions to force each entry into as the
second integer (the axis to concatenate along is still the first integer).
A string with three comma-separated integers allows specification of the
axis to concatenate along, the minimum number of dimensions to force the
entries to, and which axis should contain the start of the arrays which
are less than the specified number of dimensions. In other words the third
integer allows you to specify where the 1's should be placed in the shape
of the arrays that have their shapes upgraded. By default, they are placed
in the front of the shape tuple. The third argument allows you to specify
"""
# 这里没有代码需要注释
"""
定义一个名为 `r_` 的类,用于数组或矩阵的连接操作。
"""
def __init__(self):
"""
类的初始化方法。
Parameters
----------
Not a function, so takes no parameters # 此方法没有参数
"""
# 调用父类 AxisConcatenator 的初始化方法,将 axis 参数设为 0
AxisConcatenator.__init__(self, 0)
`
# 创建一个空的 RClass 实例
r_ = RClass()
class CClass(AxisConcatenator):
"""
Translates slice objects to concatenation along the second axis.
This is short-hand for ``np.r_['-1,2,0', index expression]``, which is
useful because of its common occurrence. In particular, arrays will be
stacked along their last axis after being upgraded to at least 2-D with
1's post-pended to the shape (column vectors made out of 1-D arrays).
See Also
--------
column_stack : Stack 1-D arrays as columns into a 2-D array.
r_ : For more detailed documentation.
Examples
--------
>>> np.c_[np.array([1,2,3]), np.array([4,5,6])]
array([[1, 4],
[2, 5],
[3, 6]])
>>> np.c_[np.array([[1,2,3]]), 0, 0, np.array([[4,5,6]])]
array([[1, 2, 3, ..., 4, 5, 6]])
"""
def __init__(self):
# 使用 AxisConcatenator 的初始化方法初始化当前对象,指定连接的轴为 -1,至少为二维数组,1 维数组转换为列向量
AxisConcatenator.__init__(self, -1, ndmin=2, trans1d=0)
# 创建一个 CClass 的实例
c_ = CClass()
@set_module('numpy')
class ndenumerate:
"""
Multidimensional index iterator.
Return an iterator yielding pairs of array coordinates and values.
Parameters
----------
arr : ndarray
Input array.
See Also
--------
ndindex, flatiter
Examples
--------
>>> a = np.array([[1, 2], [3, 4]])
>>> for index, x in np.ndenumerate(a):
... print(index, x)
(0, 0) 1
(0, 1) 2
(1, 0) 3
(1, 1) 4
"""
def __init__(self, arr):
# 将输入数组转换为 ndarray,并使用 flat 属性创建迭代器
self.iter = np.asarray(arr).flat
def __next__(self):
"""
Standard iterator method, returns the index tuple and array value.
Returns
-------
coords : tuple of ints
The indices of the current iteration.
val : scalar
The array element of the current iteration.
"""
# 返回当前迭代的索引元组和数组值
return self.iter.coords, next(self.iter)
def __iter__(self):
# 返回迭代器本身
return self
@set_module('numpy')
class ndindex:
"""
An N-dimensional iterator object to index arrays.
Given the shape of an array, an `ndindex` instance iterates over
the N-dimensional index of the array. At each iteration a tuple
of indices is returned, the last dimension is iterated over first.
Parameters
----------
shape : ints, or a single tuple of ints
The size of each dimension of the array can be passed as
individual parameters or as the elements of a tuple.
See Also
--------
ndenumerate, flatiter
Examples
--------
Dimensions as individual arguments
>>> for index in np.ndindex(3, 2, 1):
... print(index)
(0, 0, 0)
(0, 1, 0)
(1, 0, 0)
(1, 1, 0)
(2, 0, 0)
(2, 1, 0)
Same dimensions - but in a tuple ``(3, 2, 1)``
>>> for index in np.ndindex((3, 2, 1)):
... print(index)
(0, 0, 0)
(0, 1, 0)
(1, 0, 0)
(1, 1, 0)
(2, 0, 0)
(2, 1, 0)
"""
# 初始化方法,接受任意数量的形状参数
def __init__(self, *shape):
# 如果参数长度为1且为元组,则将其解包为形状参数
if len(shape) == 1 and isinstance(shape[0], tuple):
shape = shape[0]
# 创建一个只含有一个元素的零数组,并根据给定形状和步幅创建一个视图
x = as_strided(_nx.zeros(1), shape=shape,
strides=_nx.zeros_like(shape))
# 使用 NumPy 的迭代器创建对象,并指定迭代器的属性和顺序
self._it = _nx.nditer(x, flags=['multi_index', 'zerosize_ok'],
order='C')
# 返回迭代器自身,用于迭代
def __iter__(self):
return self
# 递增多维索引的方法(已废弃,请勿使用)
def ndincr(self):
"""
Increment the multi-dimensional index by one.
This method is for backward compatibility only: do not use.
.. deprecated:: 1.20.0
This method has been advised against since numpy 1.8.0, but only
started emitting DeprecationWarning as of this version.
"""
# 发出警告信息表明此方法已废弃
warnings.warn(
"`ndindex.ndincr()` is deprecated, use `next(ndindex)` instead",
DeprecationWarning, stacklevel=2)
# 调用迭代器的下一个方法,更新索引
next(self)
# 标准迭代器方法,更新索引并返回当前迭代的索引元组
def __next__(self):
"""
Standard iterator method, updates the index and returns the index
tuple.
Returns
-------
val : tuple of ints
Returns a tuple containing the indices of the current
iteration.
"""
# 调用迭代器的下一个方法,更新索引
next(self._it)
# 返回当前迭代的多维索引
return self._it.multi_index
# 定义一个类 IndexExpression,用于构建数组的索引元组。
class IndexExpression:
"""
A nicer way to build up index tuples for arrays.
.. note::
Use one of the two predefined instances ``index_exp`` or `s_`
rather than directly using `IndexExpression`.
For any index combination, including slicing and axis insertion,
``a[indices]`` is the same as ``a[np.index_exp[indices]]`` for any
array `a`. However, ``np.index_exp[indices]`` can be used anywhere
in Python code and returns a tuple of slice objects that can be
used in the construction of complex index expressions.
Parameters
----------
maketuple : bool
If True, always returns a tuple.
See Also
--------
s_ : Predefined instance without tuple conversion:
`s_ = IndexExpression(maketuple=False)`.
The ``index_exp`` is another predefined instance that
always returns a tuple:
`index_exp = IndexExpression(maketuple=True)`.
Notes
-----
You can do all this with :class:`slice` plus a few special objects,
but there's a lot to remember and this version is simpler because
it uses the standard array indexing syntax.
Examples
--------
>>> np.s_[2::2]
slice(2, None, 2)
>>> np.index_exp[2::2]
(slice(2, None, 2),)
>>> np.array([0, 1, 2, 3, 4])[np.s_[2::2]]
array([2, 4])
"""
# 初始化方法,设置是否返回元组
def __init__(self, maketuple):
self.maketuple = maketuple
# 获取索引项的方法
def __getitem__(self, item):
# 如果 maketuple 为 True 且 item 不是元组,则返回元组包装的 item
if self.maketuple and not isinstance(item, tuple):
return (item,)
else:
return item
# 预定义两个实例:maketuple 为 True 的 index_exp 和 maketuple 为 False 的 s_
index_exp = IndexExpression(maketuple=True)
s_ = IndexExpression(maketuple=False)
# End contribution from Konrad.
# The following functions complement those in twodim_base, but are
# applicable to N-dimensions.
# 定义函数 _fill_diagonal_dispatcher,用于返回一个元组 (a,)
def _fill_diagonal_dispatcher(a, val, wrap=None):
return (a,)
# 使用装饰器 array_function_dispatch 将 _fill_diagonal_dispatcher 函数与 fill_diagonal 关联起来
@array_function_dispatch(_fill_diagonal_dispatcher)
# 定义函数 fill_diagonal,用于填充任意维度数组的主对角线
def fill_diagonal(a, val, wrap=False):
"""Fill the main diagonal of the given array of any dimensionality.
For an array `a` with ``a.ndim >= 2``, the diagonal is the list of
values ``a[i, ..., i]`` with indices ``i`` all identical. This function
modifies the input array in-place without returning a value.
Parameters
----------
a : array, at least 2-D.
Array whose diagonal is to be filled in-place.
val : scalar or array_like
Value(s) to write on the diagonal. If `val` is scalar, the value is
written along the diagonal. If array-like, the flattened `val` is
written along the diagonal, repeating if necessary to fill all
diagonal entries.
"""
# 如果数组维度小于2,则抛出值错误异常,数组必须至少是二维的
if a.ndim < 2:
raise ValueError("array must be at least 2-d")
# 初始化结束索引为None
end = None
# 如果数组维度为2
if a.ndim == 2:
# 对于二维数组的常见情况,使用显式且快速的公式。
# 对于矩形数组,我们接受这种情况。
step = a.shape[1] + 1
# 如果wrap选项为False,计算结束索引,以避免对高矩阵进行对角线包装
if not wrap:
end = a.shape[1] * a.shape[1]
else:
# 对于维度大于2的情况,仅当数组的所有维度相等时,才适用步进公式,因此我们首先进行检查。
if not np.all(diff(a.shape) == 0):
raise ValueError("All dimensions of input must be of equal length")
# 计算步进值,通过累积乘积计算
step = 1 + (np.cumprod(a.shape[:-1])).sum()
# 将值写入对角线位置
a.flat[:end:step] = val
# 设置函数的模块名称为 'numpy'
@set_module('numpy')
# 定义函数 diag_indices,返回用于访问数组主对角线的索引
def diag_indices(n, ndim=2):
"""
Return the indices to access the main diagonal of an array.
This returns a tuple of indices that can be used to access the main
diagonal of an array `a` with ``a.ndim >= 2`` dimensions and shape
(n, n, ..., n). For ``a.ndim = 2`` this is the usual diagonal, for
``a.ndim > 2`` this is the set of indices to access ``a[i, i, ..., i]``
for ``i = [0..n-1]``.
Parameters
----------
n : int
The size, along each dimension, of the arrays for which the returned
indices can be used.
ndim : int, optional
The number of dimensions.
See Also
--------
diag_indices_from
Notes
-----
.. versionadded:: 1.4.0
Examples
--------
Create a set of indices to access the diagonal of a (4, 4) array:
>>> di = np.diag_indices(4)
>>> di
(array([0, 1, 2, 3]), array([0, 1, 2, 3]))
>>> a = np.arange(16).reshape(4, 4)
>>> a
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11],
[12, 13, 14, 15]])
>>> a[di] = 100
>>> a
array([[100, 1, 2, 3],
[ 4, 100, 6, 7],
[ 8, 9, 100, 11],
[ 12, 13, 14, 100]])
Now, we create indices to manipulate a 3-D array:
>>> d3 = np.diag_indices(2, 3)
>>> d3
(array([0, 1]), array([0, 1]), array([0, 1]))
And use it to set the diagonal of an array of zeros to 1:
>>> a = np.zeros((2, 2, 2), dtype=int)
>>> a[d3] = 1
>>> a
array([[[1, 0],
[0, 0]],
[[0, 0],
[0, 1]]])
"""
# 创建一个包含 n 个元素的索引数组
idx = np.arange(n)
# 返回一个包含 ndim 维度的元组,每个维度都是 idx 数组的副本
return (idx,) * ndim
# 定义内部函数 _diag_indices_from,返回接收数组的对角线索引
def _diag_indices_from(arr):
return (arr,)
# 注册函数 diag_indices_from,使用数组函数分发装饰器
@array_function_dispatch(_diag_indices_from)
# 定义函数 diag_indices_from,返回访问 n 维数组主对角线的索引
def diag_indices_from(arr):
"""
Return the indices to access the main diagonal of an n-dimensional array.
See `diag_indices` for full details.
Parameters
----------
arr : array, at least 2-D
See Also
--------
diag_indices
Notes
-----
.. versionadded:: 1.4.0
Examples
--------
Create a 4 by 4 array.
>>> a = np.arange(16).reshape(4, 4)
>>> a
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11],
[12, 13, 14, 15]])
Get the indices of the diagonal elements.
>>> di = np.diag_indices_from(a)
>>> di
(array([0, 1, 2, 3]), array([0, 1, 2, 3]))
>>> a[di]
array([ 0, 5, 10, 15])
This is simply syntactic sugar for diag_indices.
>>> np.diag_indices(a.shape[0])
(array([0, 1, 2, 3]), array([0, 1, 2, 3]))
"""
# 如果输入数组的维度小于 2,抛出 ValueError 异常
if not arr.ndim >= 2:
raise ValueError("input array must be at least 2-d")
# 对于超过 2 维的情况,只有所有维度长度相等的数组才能使用步幅公式,因此先进行检查
if not np.all(diff(arr.shape) == 0):
raise ValueError("All dimensions of input must be of equal length")
# 返回一个包含给定数组形状和维度的对角线索引的元组
return diag_indices(arr.shape[0], arr.ndim)
.\numpy\numpy\lib\_index_tricks_impl.pyi
from collections.abc import Sequence
from typing import (
Any,
TypeVar,
Generic,
overload,
Literal,
SupportsIndex,
)
import numpy as np
from numpy import (
matrix as _Matrix,
ndenumerate as ndenumerate,
ndindex as ndindex,
ndarray,
dtype,
str_,
bytes_,
int_,
float64,
complex128,
)
from numpy._typing import (
ArrayLike,
_NestedSequence,
_FiniteNestedSequence,
NDArray,
DTypeLike,
_SupportsDType,
)
from numpy._core.multiarray import (
unravel_index as unravel_index,
ravel_multi_index as ravel_multi_index,
)
_T = TypeVar("_T")
_DType = TypeVar("_DType", bound=dtype[Any])
_BoolType = TypeVar("_BoolType", Literal[True], Literal[False])
_TupType = TypeVar("_TupType", bound=tuple[Any, ...])
_ArrayType = TypeVar("_ArrayType", bound=NDArray[Any])
__all__: list[str]
@overload
def ix_(*args: _FiniteNestedSequence[_SupportsDType[_DType]]) -> tuple[ndarray[Any, _DType], ...]: ...
@overload
def ix_(*args: str | _NestedSequence[str]) -> tuple[NDArray[str_], ...]: ...
@overload
def ix_(*args: bytes | _NestedSequence[bytes]) -> tuple[NDArray[bytes_], ...]: ...
@overload
def ix_(*args: bool | _NestedSequence[bool]) -> tuple[NDArray[np.bool], ...]: ...
@overload
def ix_(*args: int | _NestedSequence[int]) -> tuple[NDArray[int_], ...]: ...
@overload
def ix_(*args: float | _NestedSequence[float]) -> tuple[NDArray[float64], ...]: ...
@overload
def ix_(*args: complex | _NestedSequence[complex]) -> tuple[NDArray[complex128], ...]: ...
class nd_grid(Generic[_BoolType]):
sparse: _BoolType
def __init__(self, sparse: _BoolType = ...) -> None: ...
@overload
def __getitem__(
self: nd_grid[Literal[False]],
key: slice | Sequence[slice],
) -> NDArray[Any]: ...
@overload
def __getitem__(
self: nd_grid[Literal[True]],
key: slice | Sequence[slice],
) -> tuple[NDArray[Any], ...]: ...
class MGridClass(nd_grid[Literal[False]]):
def __init__(self) -> None: ...
mgrid: MGridClass
class OGridClass(nd_grid[Literal[True]]):
def __init__(self) -> None: ...
ogrid: OGridClass
class AxisConcatenator:
axis: int
matrix: bool
ndmin: int
trans1d: int
def __init__(
self,
axis: int = ...,
matrix: bool = ...,
ndmin: int = ...,
trans1d: int = ...,
) -> None: ...
@staticmethod
@overload
def concatenate(
*a: ArrayLike, axis: SupportsIndex = ..., out: None = ...
) -> NDArray[Any]: ...
@staticmethod
@overload
def concatenate(
*a: ArrayLike, axis: SupportsIndex = ..., out: _ArrayType = ...
) -> _ArrayType: ...
@staticmethod
def makemat(
data: ArrayLike, dtype: DTypeLike = ..., copy: bool = ...
) -> _Matrix[Any, Any]: ...
def __getitem__(self, key: Any) -> Any:
...
class RClass(AxisConcatenator):
axis: Literal[0]
matrix: Literal[False]
ndmin: Literal[1]
trans1d: Literal[-1]
def __init__(self) -> None: ...
r_: RClass
class CClass(AxisConcatenator):
axis: Literal[-1]
matrix: Literal[False]
ndmin: Literal[2]
trans1d: Literal[0]
def __init__(self) -> None: ...
c_: CClass
class IndexExpression(Generic[_BoolType]):
maketuple: _BoolType
def __init__(self, maketuple: _BoolType) -> None: ...
@overload
def __getitem__(self, item: _TupType) -> _TupType: ...
@overload
def __getitem__(self: IndexExpression[Literal[True]], item: _T) -> tuple[_T]: ...
@overload
def __getitem__(self: IndexExpression[Literal[False]], item: _T) -> _T: ...
index_exp: IndexExpression[Literal[True]]
s_: IndexExpression[Literal[False]]
def fill_diagonal(a: NDArray[Any], val: Any, wrap: bool = ...) -> None:
...
def diag_indices(n: int, ndim: int = ...) -> tuple[NDArray[int_], ...]:
...
def diag_indices_from(arr: ArrayLike) -> tuple[NDArray[int_], ...]:
...
.\numpy\numpy\lib\_iotools.py
"""A collection of functions designed to help I/O with ascii files.
"""
__docformat__ = "restructuredtext en"
import numpy as np
import numpy._core.numeric as nx
from numpy._utils import asbytes, asunicode
def _decode_line(line, encoding=None):
"""Decode bytes from binary input streams.
Defaults to decoding from 'latin1'. That differs from the behavior of
np.compat.asunicode that decodes from 'ascii'.
Parameters
----------
line : str or bytes
Line to be decoded.
encoding : str
Encoding used to decode `line`.
Returns
-------
decoded_line : str
"""
if type(line) is bytes:
if encoding is None:
encoding = "latin1"
line = line.decode(encoding)
return line
def _is_string_like(obj):
"""
Check whether obj behaves like a string.
"""
try:
obj + ''
except (TypeError, ValueError):
return False
return True
def _is_bytes_like(obj):
"""
Check whether obj behaves like a bytes object.
"""
try:
obj + b''
except (TypeError, ValueError):
return False
return True
def has_nested_fields(ndtype):
"""
Returns whether one or several fields of a dtype are nested.
Parameters
----------
ndtype : dtype
Data-type of a structured array.
Raises
------
AttributeError
If `ndtype` does not have a `names` attribute.
Examples
--------
>>> dt = np.dtype([('name', 'S4'), ('x', float), ('y', float)])
>>> np.lib._iotools.has_nested_fields(dt)
False
"""
for name in ndtype.names or ():
if ndtype[name].names is not None:
return True
return False
def flatten_dtype(ndtype, flatten_base=False):
"""
Unpack a structured data-type by collapsing nested fields and/or fields
with a shape.
Note that the field names are lost.
Parameters
----------
ndtype : dtype
The datatype to collapse
flatten_base : bool, optional
If True, transform a field with a shape into several fields. Default is
False.
Examples
--------
>>> dt = np.dtype([('name', 'S4'), ('x', float), ('y', float),
... ('block', int, (2, 3))])
>>> np.lib._iotools.flatten_dtype(dt)
[dtype('S4'), dtype('float64'), dtype('float64'), dtype('int64')]
>>> np.lib._iotools.flatten_dtype(dt, flatten_base=True)
[dtype('S4'),
dtype('float64'),
dtype('float64'),
dtype('int64'),
dtype('int64'),
dtype('int64'),
dtype('int64'),
dtype('int64'),
dtype('int64')]
"""
names = ndtype.names
if names is None:
if flatten_base:
return [ndtype.base] * int(np.prod(ndtype.shape))
return [ndtype.base]
else:
types = []
for field in names:
info = ndtype.fields[field]
flat_dt = flatten_dtype(info[0], flatten_base)
types.extend(flat_dt)
return types
"""
Object to split a string at a given delimiter or at given places.
Parameters
----------
delimiter : str, int, or sequence of ints, optional
If a string, character used to delimit consecutive fields.
If an integer or a sequence of integers, width(s) of each field.
comments : str, optional
Character used to mark the beginning of a comment. Default is '#'.
autostrip : bool, optional
Whether to strip each individual field. Default is True.
"""
def autostrip(self, method):
"""
Wrapper to strip each member of the output of `method`.
Parameters
----------
method : function
Function that takes a single argument and returns a sequence of
strings.
Returns
-------
wrapped : function
The result of wrapping `method`. `wrapped` takes a single input
argument and returns a list of strings that are stripped of
white-space.
"""
return lambda input: [_.strip() for _ in method(input)]
def __init__(self, delimiter=None, comments='#', autostrip=True,
encoding=None):
delimiter = _decode_line(delimiter)
comments = _decode_line(comments)
self.comments = comments
if (delimiter is None) or isinstance(delimiter, str):
delimiter = delimiter or None
_handyman = self._delimited_splitter
elif hasattr(delimiter, '__iter__'):
_handyman = self._variablewidth_splitter
idx = np.cumsum([0] + list(delimiter))
delimiter = [slice(i, j) for (i, j) in zip(idx[:-1], idx[1:])]
elif int(delimiter):
(_handyman, delimiter) = (
self._fixedwidth_splitter, int(delimiter))
else:
(_handyman, delimiter) = (self._delimited_splitter, None)
self.delimiter = delimiter
if autostrip:
self._handyman = self.autostrip(_handyman)
else:
self._handyman = _handyman
self.encoding = encoding
def _delimited_splitter(self, line):
"""Chop off comments, strip, and split at delimiter. """
if self.comments is not None:
line = line.split(self.comments)[0]
line = line.strip(" \r\n")
if not line:
return []
return line.split(self.delimiter)
def _fixedwidth_splitter(self, line):
if self.comments is not None:
line = line.split(self.comments)[0]
line = line.strip("\r\n")
if not line:
return []
fixed = self.delimiter
slices = [slice(i, i + fixed) for i in range(0, len(line), fixed)]
return [line[s] for s in slices]
def _variablewidth_splitter(self, line):
if self.comments is not None:
line = line.split(self.comments)[0]
if not line:
return []
slices = self.delimiter
return [line[s] for s in slices]
def __call__(self, line):
decoded_line = _decode_line(line, self.encoding)
return self._handyman(decoded_line)
class NameValidator:
"""
Object to validate a list of strings to use as field names.
The strings are stripped of any non alphanumeric character, and spaces
are replaced by '_'. During instantiation, the user can define a list
of names to exclude, as well as a list of invalid characters. Names in
the exclusion list are appended a '_' character.
Once an instance has been created, it can be called with a list of
names, and a list of valid names will be created. The `__call__`
method accepts an optional keyword "default" that sets the default name
in case of ambiguity. By default this is 'f', so that names will
default to `f0`, `f1`, etc.
Parameters
----------
excludelist : sequence, optional
A list of names to exclude. This list is appended to the default
list ['return', 'file', 'print']. Excluded names are appended an
underscore: for example, `file` becomes `file_` if supplied.
deletechars : str, optional
A string combining invalid characters that must be deleted from the
names.
case_sensitive : {True, False, 'upper', 'lower'}, optional
* If True, field names are case-sensitive.
* If False or 'upper', field names are converted to upper case.
* If 'lower', field names are converted to lower case.
The default value is True.
replace_space : '_', optional
Character(s) used in replacement of white spaces.
Notes
-----
Calling an instance of `NameValidator` is the same as calling its
method `validate`.
Examples
--------
>>> validator = np.lib._iotools.NameValidator()
>>> validator(['file', 'field2', 'with space', 'CaSe'])
('file_', 'field2', 'with_space', 'CaSe')
>>> validator = np.lib._iotools.NameValidator(excludelist=['excl'],
... deletechars='q',
... case_sensitive=False)
>>> validator(['excl', 'field2', 'no_q', 'with space', 'CaSe'])
('EXCL', 'FIELD2', 'NO_Q', 'WITH_SPACE', 'CASE')
"""
defaultexcludelist = ['return', 'file', 'print']
defaultdeletechars = set(r"""~!@#$%^&*()-=+~\|]}[{';: /?.>,<""")
def __init__(self, excludelist=None, deletechars=None,
case_sensitive=None, replace_space='_'):
if excludelist is None:
excludelist = []
excludelist.extend(self.defaultexcludelist)
self.excludelist = excludelist
if deletechars is None:
delete = self.defaultdeletechars
else:
delete = set(deletechars)
delete.add('"')
self.deletechars = delete
if (case_sensitive is None) or (case_sensitive is True):
self.case_converter = lambda x: x
elif (case_sensitive is False) or case_sensitive.startswith('u'):
self.case_converter = lambda x: x.upper()
elif case_sensitive.startswith('l'):
self.case_converter = lambda x: x.lower()
else:
msg = 'unrecognized case_sensitive value %s.' % case_sensitive
raise ValueError(msg)
self.replace_space = replace_space
def validate(self, names, defaultfmt="f%i", nbfields=None):
"""
Validate a list of strings as field names for a structured array.
Parameters
----------
names : sequence of str
Strings to be validated.
defaultfmt : str, optional
Default format string, used if validating a given string
reduces its length to zero.
nbfields : integer, optional
Final number of validated names, used to expand or shrink the
initial list of names.
Returns
-------
validatednames : list of str
The list of validated field names.
Notes
-----
A `NameValidator` instance can be called directly, which is the
same as calling `validate`. For examples, see `NameValidator`.
"""
if (names is None):
if (nbfields is None):
return None
names = []
if isinstance(names, str):
names = [names, ]
if nbfields is not None:
nbnames = len(names)
if (nbnames < nbfields):
names = list(names) + [''] * (nbfields - nbnames)
elif (nbnames > nbfields):
names = names[:nbfields]
deletechars = self.deletechars
excludelist = self.excludelist
case_converter = self.case_converter
replace_space = self.replace_space
validatednames = []
seen = dict()
nbempty = 0
for item in names:
item = case_converter(item).strip()
if replace_space:
item = item.replace(' ', replace_space)
item = ''.join([c for c in item if c not in deletechars])
if item == '':
item = defaultfmt % nbempty
while item in names:
nbempty += 1
item = defaultfmt % nbempty
nbempty += 1
elif item in excludelist:
item += '_'
cnt = seen.get(item, 0)
if cnt > 0:
validatednames.append(item + '_%d' % cnt)
else:
validatednames.append(item)
seen[item] = cnt + 1
return tuple(validatednames)
def __call__(self, names, defaultfmt="f%i", nbfields=None):
return self.validate(names, defaultfmt=defaultfmt, nbfields=nbfields)
"""
Factory class for function transforming a string into another object
(int, float).
After initialization, an instance can be called to transform a string
into another object. If the string is recognized as representing a
missing value, a default value is returned.
Attributes
----------
func : function
Function used for the conversion.
default : any
Default value to return when the input corresponds to a missing
value.
type : type
Type of the output.
_status : int
Integer representing the order of the conversion.
_mapper : sequence of tuples
Sequence of tuples (dtype, function, default value) to evaluate in
order.
_locked : bool
Holds `locked` parameter.
Parameters
----------
dtype_or_func : {None, dtype, function}, optional
If a `dtype`, specifies the input data type, used to define a basic
function and a default value for missing data. For example, when
`dtype` is float, the `func` attribute is set to `float` and the
default value to `np.nan`. If a function, this function is used to
convert a string to another object. In this case, it is recommended
to give an associated default value as input.
default : any, optional
Value to return by default, that is, when the string to be
converted is flagged as missing. If not given, `StringConverter`
tries to supply a reasonable default value.
"""
class ConverterError(Exception):
"""
Exception raised when an error occurs in a converter for string values.
"""
pass
class ConverterLockError(ConverterError):
"""
Exception raised when an attempt is made to upgrade a locked converter.
"""
pass
class ConversionWarning(UserWarning):
"""
Warning issued when a string converter has a problem.
Notes
-----
In `genfromtxt` a `ConversionWarning` is issued if raising exceptions
is explicitly suppressed with the "invalid_raise" keyword.
"""
pass
missing_values : {None, sequence of str}, optional
``None`` 或者字符串序列,表示缺失值。如果是 ``None``,则缺失值用空条目表示。默认为 ``None``.
locked : bool, optional
是否锁定 StringConverter,防止自动升级。默认为 False。
"""
_mapper = [(nx.bool, str2bool, False),
(nx.int_, int, -1),]
# On 32-bit systems, we need to make sure that we explicitly include
# nx.int64 since ns.int_ is nx.int32.
# 在32位系统上,需要确保显式包含 nx.int64,因为 ns.int_ 是 nx.int32。
if nx.dtype(nx.int_).itemsize < nx.dtype(nx.int64).itemsize:
_mapper.append((nx.int64, int, -1))
_mapper.extend([(nx.float64, float, nx.nan),
(nx.complex128, complex, nx.nan + 0j),
(nx.longdouble, nx.longdouble, nx.nan),
# If a non-default dtype is passed, fall back to generic
# ones (should only be used for the converter)
# 如果传递了非默认的 dtype,则回退到通用的类型(仅用于转换器)
(nx.integer, int, -1),
(nx.floating, float, nx.nan),
(nx.complexfloating, complex, nx.nan + 0j),
# Last, try with the string types (must be last, because
# `_mapper[-1]` is used as default in some cases)
# 最后,尝试使用字符串类型(必须放在最后,因为在某些情况下 `_mapper[-1]` 用作默认值)
(nx.str_, asunicode, '???'),
(nx.bytes_, asbytes, '???'),
])
@classmethod
def _getdtype(cls, val):
"""Returns the dtype of the input variable."""
# 返回输入变量的 dtype。
return np.array(val).dtype
@classmethod
def _getsubdtype(cls, val):
"""Returns the type of the dtype of the input variable."""
# 返回输入变量的 dtype 的类型。
return np.array(val).dtype.type
@classmethod
def _dtypeortype(cls, dtype):
"""Returns dtype for datetime64 and type of dtype otherwise."""
# 对于 datetime64 返回 dtype,否则返回 dtype 的类型。
# This is a bit annoying. We want to return the "general" type in most
# cases (ie. "string" rather than "S10"), but we want to return the
# specific type for datetime64 (ie. "datetime64[us]" rather than
# "datetime64").
# 这有点烦人。大多数情况下我们想返回“一般”类型(例如“字符串”而不是“S10”),但我们想对 datetime64 返回具体类型(例如“datetime64[us]”而不是“datetime64”)。
if dtype.type == np.datetime64:
return dtype
return dtype.type
@classmethod
def upgrade_mapper(cls, func, default=None):
"""
Upgrade the mapper of a StringConverter by adding a new function and
its corresponding default.
The input function (or sequence of functions) and its associated
default value (if any) is inserted in penultimate position of the
mapper. The corresponding type is estimated from the dtype of the
default value.
Parameters
----------
func : var
Function, or sequence of functions
Examples
--------
>>> import dateutil.parser
>>> import datetime
>>> dateparser = dateutil.parser.parse
>>> defaultdate = datetime.date(2000, 1, 1)
>>> StringConverter.upgrade_mapper(dateparser, default=defaultdate)
"""
# Func is a single function
if hasattr(func, '__call__'):
# Insert a tuple containing dtype, func, and default into the penultimate position of _mapper
cls._mapper.insert(-1, (cls._getsubdtype(default), func, default))
return
elif hasattr(func, '__iter__'):
# Func is a sequence of functions
if isinstance(func[0], (tuple, list)):
# Insert each tuple in func into the penultimate position of _mapper
for _ in func:
cls._mapper.insert(-1, _)
return
# Determine default value if not provided
if default is None:
default = [None] * len(func)
else:
default = list(default)
default.append([None] * (len(func) - len(default)))
# Insert dtype, function, and default values into penultimate position of _mapper for each function in func
for fct, dft in zip(func, default):
cls._mapper.insert(-1, (cls._getsubdtype(dft), fct, dft))
@classmethod
def _find_map_entry(cls, dtype):
# Search for a converter entry matching the specific dtype
for i, (deftype, func, default_def) in enumerate(cls._mapper):
if dtype.type == deftype:
return i, (deftype, func, default_def)
# If no exact match, search for an inexact match
for i, (deftype, func, default_def) in enumerate(cls._mapper):
if np.issubdtype(dtype.type, deftype):
return i, (deftype, func, default_def)
# Raise an exception if no matching entry is found
raise LookupError
# 初始化函数,接受多个参数:dtype_or_func(数据类型或函数,默认为None)、default(默认值,默认为None)、
# missing_values(缺失值,默认为None)、locked(是否锁定,默认为False)
def __init__(self, dtype_or_func=None, default=None, missing_values=None,
locked=False):
# 设置一个锁,用于升级
self._locked = bool(locked)
# 如果没有传入数据类型(dtype_or_func为None),进行最小化初始化
if dtype_or_func is None:
# 设置默认处理函数为str2bool
self.func = str2bool
# 设置状态为0
self._status = 0
# 如果没有指定默认值,则设置为False
self.default = default or False
# 将数据类型设置为布尔型
dtype = np.dtype('bool')
else:
# 如果输入是一个np.dtype类型
try:
# 不设置处理函数
self.func = None
# 将数据类型设置为输入的np.dtype
dtype = np.dtype(dtype_or_func)
except TypeError:
# 如果dtype_or_func必须是一个函数
if not hasattr(dtype_or_func, '__call__'):
errmsg = ("The input argument `dtype` is neither a"
" function nor a dtype (got '%s' instead)")
# 抛出类型错误异常
raise TypeError(errmsg % type(dtype_or_func))
# 设置处理函数为dtype_or_func
self.func = dtype_or_func
# 如果没有默认值,则尝试猜测或者设置为None
if default is None:
try:
default = self.func('0')
except ValueError:
default = None
# 获取数据类型
dtype = self._getdtype(default)
# 在映射器中找到最佳匹配
try:
# 查找dtype在映射器中的匹配项
self._status, (_, func, default_def) = self._find_map_entry(dtype)
except LookupError:
# 如果找不到匹配项,则使用传入的默认值
self.default = default
# 获取最后一个映射项的函数
_, func, _ = self._mapper[-1]
# 设置状态为0
self._status = 0
else:
# 如果没有指定默认值,则使用找到的默认值
if default is None:
self.default = default_def
else:
self.default = default
# 如果输入是一个数据类型,则将函数设置为最后一次看到的函数
if self.func is None:
self.func = func
# 如果函数状态为1(整数),则更改函数为更健壮的选项
if self.func == self._mapper[1][1]:
if issubclass(dtype.type, np.uint64):
self.func = np.uint64
elif issubclass(dtype.type, np.int64):
self.func = np.int64
else:
self.func = lambda x: int(float(x))
# 存储与缺失值对应的字符串列表
if missing_values is None:
# 如果缺失值为空,则设置为{''}
self.missing_values = {''}
else:
# 如果缺失值是字符串,则将其按逗号分隔成列表
if isinstance(missing_values, str):
missing_values = missing_values.split(",")
# 将缺失值列表与{''}合并为集合
self.missing_values = set(list(missing_values) + [''])
# 设置调用函数为_strict_call
self._callingfunction = self._strict_call
# 确定类型为_dtypeortype返回的类型
self.type = self._dtypeortype(dtype)
# 设置检查标志为False
self._checked = False
# 存储初始默认值
self._initial_default = default
def _loose_call(self, value):
try:
# 尝试使用 func 对值进行转换
return self.func(value)
except ValueError:
# 如果值转换失败,则返回默认值
return self.default
def _strict_call(self, value):
try:
# 检查是否可以使用当前函数转换值
new_value = self.func(value)
# 对于整数类型,除了检查 func 是否能转换外,还需确保不会发生溢出错误
if self.func is int:
try:
np.array(value, dtype=self.type)
except OverflowError:
raise ValueError
# 如果能成功转换,则返回新值
return new_value
except ValueError:
# 如果值转换失败
if value.strip() in self.missing_values:
# 如果值属于缺失值列表,并且状态未锁定,则将_checked设置为False
if not self._status:
self._checked = False
return self.default
# 如果转换失败且不属于缺失值列表,则抛出详细错误信息
raise ValueError("Cannot convert string '%s'" % value)
def __call__(self, value):
# 调用_callingfunction方法来处理值
return self._callingfunction(value)
def _do_upgrade(self):
# 如果已锁定转换器,则抛出异常
if self._locked:
errmsg = "Converter is locked and cannot be upgraded"
raise ConverterLockError(errmsg)
# 获取映射的最大状态值
_statusmax = len(self._mapper)
# 如果状态达到最大值,则无法升级
_status = self._status
if _status == _statusmax:
errmsg = "Could not find a valid conversion function"
raise ConverterError(errmsg)
elif _status < _statusmax - 1:
_status += 1
# 更新转换器的类型、函数和默认值
self.type, self.func, default = self._mapper[_status]
self._status = _status
# 如果设置了初始默认值,则使用该值作为默认值;否则使用映射中的默认值
if self._initial_default is not None:
self.default = self._initial_default
else:
self.default = default
def upgrade(self, value):
"""
Find the best converter for a given string, and return the result.
The supplied string `value` is converted by testing different
converters in order. First the `func` method of the
`StringConverter` instance is tried, if this fails other available
converters are tried. The order in which these other converters
are tried is determined by the `_status` attribute of the instance.
Parameters
----------
value : str
The string to convert.
Returns
-------
out : any
The result of converting `value` with the appropriate converter.
"""
# 标记检查状态为True
self._checked = True
try:
# 尝试严格调用转换器处理值
return self._strict_call(value)
except ValueError:
# 如果转换失败,则执行升级操作
self._do_upgrade()
# 递归调用upgrade方法,尝试再次转换值
return self.upgrade(value)
def iterupgrade(self, value):
# 将对象的_checked属性设置为True,表示已经进行了检查
self._checked = True
# 如果value不可迭代,将其转换为包含单个元素的元组
if not hasattr(value, '__iter__'):
value = (value,)
# 将对象的_strict_call方法缓存到本地变量_strict_call中
_strict_call = self._strict_call
# 尝试对value中的每个元素调用_strict_call方法
try:
for _m in value:
_strict_call(_m)
# 如果出现ValueError异常,执行对象的_do_upgrade方法,并递归调用iterupgrade方法
except ValueError:
self._do_upgrade()
self.iterupgrade(value)
def update(self, func, default=None, testing_value=None,
missing_values='', locked=False):
"""
直接设置StringConverter的属性。
Parameters
----------
func : function
转换函数。
default : any, optional
默认返回的值,即当待转换的字符串标记为缺失时使用的值。如果未提供,
StringConverter会尝试提供一个合理的默认值。
testing_value : str, optional
表示转换器的标准输入值的字符串。此字符串用于帮助定义一个合理的默认值。
missing_values : {sequence of str, None}, optional
指示缺失值的字符串序列。如果为``None``,则清除现有的`missing_values`。默认为``''``。
locked : bool, optional
是否锁定StringConverter以防止自动升级。默认为False。
Notes
-----
`update`接受与`StringConverter`构造函数相同的参数,不同之处在于`func`不接受`dtype`,
而构造函数中的`dtype_or_func`接受。
"""
# 将func赋值给对象的func属性
self.func = func
# 将locked赋值给对象的_locked属性
self._locked = locked
# 如果default不为None,将其赋值给对象的default属性,并根据testing_value测试和设置self.type
if default is not None:
self.default = default
tester = func(testing_value or '1')
self.type = self._dtypeortype(self._getdtype(tester))
else:
try:
# 使用testing_value测试func,将结果设置给tester
tester = func(testing_value or '1')
except (TypeError, ValueError):
tester = None
# 根据tester获取其dtype,然后根据dtype设置self.type
self.type = self._dtypeortype(self._getdtype(tester))
# 将missing_values添加到现有集合或清除它
if missing_values is None:
# 如果missing_values为None,清空self.missing_values
self.missing_values = set()
else:
# 如果missing_values不可迭代,将其转换为列表
if not np.iterable(missing_values):
missing_values = [missing_values]
# 检查missing_values中所有元素是否为字符串,如果不是,抛出TypeError异常
if not all(isinstance(v, str) for v in missing_values):
raise TypeError("missing_values must be strings or unicode")
# 将missing_values添加到self.missing_values中
self.missing_values.update(missing_values)
# 定义便捷函数以创建 `np.dtype` 对象
def easy_dtype(ndtype, names=None, defaultfmt="f%i", **validationargs):
"""
Convenience function to create a `np.dtype` object.
The function processes the input `dtype` and matches it with the given
names.
Parameters
----------
ndtype : var
Definition of the dtype. Can be any string or dictionary recognized
by the `np.dtype` function, or a sequence of types.
names : str or sequence, optional
Sequence of strings to use as field names for a structured dtype.
For convenience, `names` can be a string of a comma-separated list
of names.
defaultfmt : str, optional
Format string used to define missing names, such as ``"f%i"``
(default) or ``"fields_%02i"``.
validationargs : optional
A series of optional arguments used to initialize a
`NameValidator`.
Examples
--------
>>> np.lib._iotools.easy_dtype(float)
dtype('float64')
>>> np.lib._iotools.easy_dtype("i4, f8")
dtype([('f0', '<i4'), ('f1', '<f8')])
>>> np.lib._iotools.easy_dtype("i4, f8", defaultfmt="field_%03i")
dtype([('field_000', '<i4'), ('field_001', '<f8')])
>>> np.lib._iotools.easy_dtype((int, float, float), names="a,b,c")
dtype([('a', '<i8'), ('b', '<f8'), ('c', '<f8')])
>>> np.lib._iotools.easy_dtype(float, names="a,b,c")
dtype([('a', '<f8'), ('b', '<f8'), ('c', '<f8')])
"""
# 尝试将 `ndtype` 转换为 `np.dtype` 对象
try:
ndtype = np.dtype(ndtype)
except TypeError:
# 如果无法转换,初始化一个 `NameValidator` 实例用于验证字段名
validate = NameValidator(**validationargs)
nbfields = len(ndtype)
# 如果 `names` 为空,创建一个空列表以匹配 `ndtype` 的长度
if names is None:
names = [''] * len(ndtype)
# 如果 `names` 是字符串,将其分割为列表
elif isinstance(names, str):
names = names.split(",")
# 使用 `validate` 对 `names` 进行验证,确保其数量与 `ndtype` 中字段数量一致,并使用 `defaultfmt` 表示缺失字段的格式
names = validate(names, nbfields=nbfields, defaultfmt=defaultfmt)
# 根据验证后的 `names` 和 `ndtype` 创建一个结构化 `np.dtype` 对象
ndtype = np.dtype(dict(formats=ndtype, names=names))
else:
# 如果没有隐式名称的情况下,处理显式名称
if names is not None:
# 使用给定的验证参数创建名称验证器
validate = NameValidator(**validationargs)
# 如果 `names` 是字符串,则将其拆分为列表
if isinstance(names, str):
names = names.split(",")
# 如果数据类型没有字段名称,则重复数据类型以匹配名称的数量
if ndtype.names is None:
formats = tuple([ndtype.type] * len(names))
# 使用验证器验证并设置名称,默认格式为 `defaultfmt`
names = validate(names, defaultfmt=defaultfmt)
# 创建结构化数据类型,名称与格式一一对应
ndtype = np.dtype(list(zip(names, formats)))
# 如果数据类型已经有字段名称,则只需根据需要验证名称
else:
# 使用验证器验证名称,并根据字段数设置默认格式
ndtype.names = validate(names, nbfields=len(ndtype.names),
defaultfmt=defaultfmt)
# 如果没有隐式名称,且数据类型也没有字段名称
elif ndtype.names is not None:
# 使用给定的验证参数创建名称验证器
validate = NameValidator(**validationargs)
# 默认初始名称为数字命名的元组
numbered_names = tuple("f%i" % i for i in range(len(ndtype.names)))
# 如果初始名称是数字命名且默认格式不是 "f%i",则改变格式
if ((ndtype.names == numbered_names) and (defaultfmt != "f%i")):
# 使用验证器验证并设置空名称列表,格式为 `defaultfmt`
ndtype.names = validate([''] * len(ndtype.names),
defaultfmt=defaultfmt)
# 如果初始名称是显式命名,则只需验证名称
else:
# 使用验证器验证并设置名称,格式为 `defaultfmt`
ndtype.names = validate(ndtype.names, defaultfmt=defaultfmt)
# 返回处理后的数据类型
return ndtype
.\numpy\numpy\lib\_nanfunctions_impl.py
"""
Functions that ignore NaN.
Functions
---------
- `nanmin` -- minimum non-NaN value
- `nanmax` -- maximum non-NaN value
- `nanargmin` -- index of minimum non-NaN value
- `nanargmax` -- index of maximum non-NaN value
- `nansum` -- sum of non-NaN values
- `nanprod` -- product of non-NaN values
- `nancumsum` -- cumulative sum of non-NaN values
- `nancumprod` -- cumulative product of non-NaN values
- `nanmean` -- mean of non-NaN values
- `nanvar` -- variance of non-NaN values
- `nanstd` -- standard deviation of non-NaN values
- `nanmedian` -- median of non-NaN values
- `nanquantile` -- qth quantile of non-NaN values
- `nanpercentile` -- qth percentile of non-NaN values
"""
import functools
import warnings
import numpy as np
import numpy._core.numeric as _nx
from numpy.lib import _function_base_impl as fnb
from numpy.lib._function_base_impl import _weights_are_valid
from numpy._core import overrides
array_function_dispatch = functools.partial(
overrides.array_function_dispatch, module='numpy')
__all__ = [
'nansum', 'nanmax', 'nanmin', 'nanargmax', 'nanargmin', 'nanmean',
'nanmedian', 'nanpercentile', 'nanvar', 'nanstd', 'nanprod',
'nancumsum', 'nancumprod', 'nanquantile'
]
def _nan_mask(a, out=None):
"""
Parameters
----------
a : array-like
Input array with at least 1 dimension.
out : ndarray, optional
Alternate output array in which to place the result. The default
is ``None``; if provided, it must have the same shape as the
expected output and will prevent the allocation of a new array.
Returns
-------
y : bool ndarray or True
A bool array where ``np.nan`` positions are marked with ``False``
and other positions are marked with ``True``. If the type of ``a``
is such that it can't possibly contain ``np.nan``, returns ``True``.
"""
if a.dtype.kind not in 'fc':
return True
y = np.isnan(a, out=out)
y = np.invert(y, out=y)
return y
def _replace_nan(a, val):
"""
If `a` is of inexact type, make a copy of `a`, replace NaNs with
the `val` value, and return the copy together with a boolean mask
marking the locations where NaNs were present. If `a` is not of
inexact type, do nothing and return `a` together with a mask of None.
Note that scalars will end up as array scalars, which is important
for using the result as the value of the out argument in some
operations.
Parameters
----------
a : array-like
Input array.
val : float
NaN values are set to val before doing the operation.
Returns
-------
y : ndarray
If `a` is of inexact type, return a copy of `a` with the NaNs
replaced by the fill value, otherwise return `a`.
mask: {bool, None}
If `a` is of inexact type, return a boolean mask marking locations of
NaNs, otherwise return None.
"""
if np.issubdtype(a.dtype, np.inexact):
y = np.array(a, copy=True)
mask = np.isnan(a)
y[mask] = val
return y, mask
else:
return a, None
a = np.asanyarray(a)
if a.dtype == np.object_:
mask = np.not_equal(a, a, dtype=bool)
elif issubclass(a.dtype.type, np.inexact):
mask = np.isnan(a)
else:
mask = None
if mask is not None:
a = np.array(a, subok=True, copy=True)
np.copyto(a, val, where=mask)
return a, mask
def _copyto(a, val, mask):
if isinstance(a, np.ndarray):
np.copyto(a, val, where=mask, casting='unsafe')
else:
a = a.dtype.type(val)
return a
def _remove_nan_1d(arr1d, second_arr1d=None, overwrite_input=False):
if arr1d.dtype == object:
c = np.not_equal(arr1d, arr1d, dtype=bool)
else:
c = np.isnan(arr1d)
s = np.nonzero(c)[0]
if s.size == arr1d.size:
warnings.warn("All-NaN slice encountered", RuntimeWarning, stacklevel=6)
if second_arr1d is None:
return arr1d[:0], None, True
else:
return arr1d[:0], second_arr1d[:0], True
elif s.size == 0:
return arr1d, second_arr1d, overwrite_input
else:
if not overwrite_input:
arr1d = arr1d.copy()
enonan = arr1d[-s.size:][~c[-s.size:]]
arr1d[s[:enonan.size]] = enonan
if second_arr1d is None:
return arr1d[:-s.size], None, True
else:
if not overwrite_input:
second_arr1d = second_arr1d.copy()
enonan = second_arr1d[-s.size:][~c[-s.size:]]
second_arr1d[s[:enonan.size]] = enonan
return arr1d[:-s.size], second_arr1d[:-s.size], True
def _divide_by_count(a, b, out=None):
"""
Compute a/b ignoring invalid results. If `a` is an array the division
is done in place. If `a` is a scalar, then its type is preserved in the
"""
with np.errstate(invalid='ignore', divide='ignore'):
if isinstance(a, np.ndarray):
if out is None:
return np.divide(a, b, out=a, casting='unsafe')
else:
return np.divide(a, b, out=out, casting='unsafe')
else:
if out is None:
try:
return a.dtype.type(a / b)
except AttributeError:
return a / b
else:
return np.divide(a, b, out=out, casting='unsafe')
def _nanmin_dispatcher(a, axis=None, out=None, keepdims=None,
initial=None, where=None):
return (a, out)
@array_function_dispatch(_nanmin_dispatcher)
def nanmin(a, axis=None, out=None, keepdims=np._NoValue, initial=np._NoValue,
where=np._NoValue):
"""
Return minimum of an array or minimum along an axis, ignoring any NaNs.
When all-NaN slices are encountered a ``RuntimeWarning`` is raised and
Nan is returned for that slice.
Parameters
----------
a : array_like
Array containing numbers whose minimum is desired. If `a` is not an
array, a conversion is attempted.
axis : {int, tuple of int, None}, optional
Axis or axes along which the minimum is computed. The default is to compute
the minimum of the flattened array.
out : ndarray, optional
Alternate output array in which to place the result. The default
is ``None``; if provided, it must have the same shape as the
expected output, but the type will be cast if necessary. See
:ref:`ufuncs-output-type` for more details.
.. versionadded:: 1.8.0
keepdims : bool, optional
If this is set to True, the axes which are reduced are left
in the result as dimensions with size one. With this option,
the result will broadcast correctly against the original `a`.
If the value is anything but the default, then
`keepdims` will be passed through to the `min` method
of sub-classes of `ndarray`. If the sub-classes methods
does not implement `keepdims` any exceptions will be raised.
.. versionadded:: 1.8.0
initial : scalar, optional
The maximum value of an output element. Must be present to allow
computation on empty slice. See `~numpy.ufunc.reduce` for details.
.. versionadded:: 1.22.0
where : array_like of bool, optional
Elements to compare for the minimum. See `~numpy.ufunc.reduce`
for details.
.. versionadded:: 1.22.0
Returns
-------
nanmin : ndarray
An array with the same shape as `a`, with the specified axis
removed. If `a` is a 0-d array, or if axis is None, an ndarray
scalar is returned. The same dtype as `a` is returned.
See Also
--------
nanmax :
The maximum value of an array along a given axis, ignoring any NaNs.
amin :
The minimum value of an array along a given axis, propagating any NaNs.
fmin :
Element-wise minimum of two arrays, ignoring any NaNs.
minimum :
Element-wise minimum of two arrays, propagating any NaNs.
isnan :
Shows which elements are Not a Number (NaN).
isfinite:
Shows which elements are neither NaN nor infinity.
amax, fmax, maximum
Notes
-----
NumPy uses the IEEE Standard for Binary Floating-Point for Arithmetic
(IEEE 754). This means that Not a Number is not equivalent to infinity.
"""
"""
Positive infinity is treated as a very large number and negative
infinity is treated as a very small (i.e. negative) number.
If the input has a integer type the function is equivalent to np.min.
Examples
--------
>>> a = np.array([[1, 2], [3, np.nan]])
>>> np.nanmin(a)
1.0
>>> np.nanmin(a, axis=0)
array([1., 2.])
>>> np.nanmin(a, axis=1)
array([1., 3.])
When positive infinity and negative infinity are present:
>>> np.nanmin([1, 2, np.nan, np.inf])
1.0
>>> np.nanmin([1, 2, np.nan, -np.inf])
-inf
"""
kwargs = {}
if keepdims is not np._NoValue:
kwargs['keepdims'] = keepdims
if initial is not np._NoValue:
kwargs['initial'] = initial
if where is not np._NoValue:
kwargs['where'] = where
if type(a) is np.ndarray and a.dtype != np.object_:
res = np.fmin.reduce(a, axis=axis, out=out, **kwargs)
if np.isnan(res).any():
warnings.warn("All-NaN slice encountered", RuntimeWarning,
stacklevel=2)
else:
a, mask = _replace_nan(a, +np.inf)
res = np.amin(a, axis=axis, out=out, **kwargs)
if mask is None:
return res
kwargs.pop("initial", None)
mask = np.all(mask, axis=axis, **kwargs)
if np.any(mask):
res = _copyto(res, np.nan, mask)
warnings.warn("All-NaN axis encountered", RuntimeWarning,
stacklevel=2)
return res
def _nanmax_dispatcher(a, axis=None, out=None, keepdims=None,
initial=None, where=None):
return (a, out)
@array_function_dispatch(_nanmax_dispatcher)
def nanmax(a, axis=None, out=None, keepdims=np._NoValue, initial=np._NoValue,
where=np._NoValue):
"""
Return the maximum of an array or maximum along an axis, ignoring any
NaNs. When all-NaN slices are encountered a ``RuntimeWarning`` is
raised and NaN is returned for that slice.
Parameters
----------
a : array_like
Array containing numbers whose maximum is desired. If `a` is not an
array, a conversion is attempted.
axis : {int, tuple of int, None}, optional
Axis or axes along which the maximum is computed. The default is to compute
the maximum of the flattened array.
out : ndarray, optional
Alternate output array in which to place the result. The default
is ``None``; if provided, it must have the same shape as the
expected output, but the type will be cast if necessary. See
:ref:`ufuncs-output-type` for more details.
.. versionadded:: 1.8.0
keepdims : bool, optional
If this is set to True, the axes which are reduced are left
in the result as dimensions with size one. With this option,
the result will broadcast correctly against the original `a`.
If the value is anything but the default, then
`keepdims` will be passed through to the `max` method
of sub-classes of `ndarray`. If the sub-classes methods
does not implement `keepdims` any exceptions will be raised.
.. versionadded:: 1.8.0
initial : scalar, optional
The minimum value of an output element. Must be present to allow
computation on empty slice. See `~numpy.ufunc.reduce` for details.
.. versionadded:: 1.22.0
where : array_like of bool, optional
Elements to compare for the maximum. See `~numpy.ufunc.reduce`
for details.
.. versionadded:: 1.22.0
Returns
-------
nanmax : ndarray
An array with the same shape as `a`, with the specified axis removed.
If `a` is a 0-d array, or if axis is None, an ndarray scalar is
returned. The same dtype as `a` is returned.
See Also
--------
nanmin :
The minimum value of an array along a given axis, ignoring any NaNs.
amax :
The maximum value of an array along a given axis, propagating any NaNs.
fmax :
Element-wise maximum of two arrays, ignoring any NaNs.
maximum :
Element-wise maximum of two arrays, propagating any NaNs.
isnan :
Shows which elements are Not a Number (NaN).
isfinite:
Shows which elements are neither NaN nor infinity.
amin, fmin, minimum
Notes
-----
NumPy uses the IEEE Standard for Binary Floating-Point for Arithmetic
"""
kwargs = {}
if keepdims is not np._NoValue:
kwargs['keepdims'] = keepdims
if initial is not np._NoValue:
kwargs['initial'] = initial
if where is not np._NoValue:
kwargs['where'] = where
if type(a) is np.ndarray and a.dtype != np.object_:
res = np.fmax.reduce(a, axis=axis, out=out, **kwargs)
if np.isnan(res).any():
warnings.warn("All-NaN slice encountered", RuntimeWarning, stacklevel=2)
else:
a, mask = _replace_nan(a, -np.inf)
res = np.amax(a, axis=axis, out=out, **kwargs)
if mask is None:
return res
kwargs.pop("initial", None)
mask = np.all(mask, axis=axis, **kwargs)
if np.any(mask):
res = _copyto(res, np.nan, mask)
warnings.warn("All-NaN axis encountered", RuntimeWarning, stacklevel=2)
return res
def _nanargmin_dispatcher(a, axis=None, out=None, *, keepdims=None):
return (a,)
@array_function_dispatch(_nanargmin_dispatcher)
def nanargmin(a, axis=None, out=None, *, keepdims=np._NoValue):
"""
返回忽略 NaN 值后,在指定轴上的最小值的索引。对于全为 NaN 的切片,抛出 ValueError。警告:如果切片包含仅为 NaN 和 Inf 的值,结果不可信。
Parameters
----------
a : array_like
输入数据。
axis : int, optional
操作的轴。默认为扁平化的输入。
out : array, optional
如果提供,结果将插入到此数组中。应具有适当的形状和数据类型。
.. versionadded:: 1.22.0
keepdims : bool, optional
如果设置为 True,则减少的轴将保留在结果中作为尺寸为一的维度。使用此选项,结果将正确广播到数组。
.. versionadded:: 1.22.0
Returns
-------
index_array : ndarray
索引数组或单个索引值。
See Also
--------
argmin, nanargmax
Examples
--------
>>> a = np.array([[np.nan, 4], [2, 3]])
>>> np.argmin(a)
0
>>> np.nanargmin(a)
2
>>> np.nanargmin(a, axis=0)
array([1, 1])
>>> np.nanargmin(a, axis=1)
array([1, 0])
"""
a, mask = _replace_nan(a, np.inf)
if mask is not None and mask.size:
mask = np.all(mask, axis=axis)
if np.any(mask):
raise ValueError("All-NaN slice encountered")
res = np.argmin(a, axis=axis, out=out, keepdims=keepdims)
return res
def _nanargmax_dispatcher(a, axis=None, out=None, *, keepdims=None):
return (a,)
@array_function_dispatch(_nanargmax_dispatcher)
def nanargmax(a, axis=None, out=None, *, keepdims=np._NoValue):
"""
返回忽略 NaN 值后,在指定轴上的最大值的索引。对于全为 NaN 的切片,抛出 ValueError。警告:如果切片包含仅为 NaN 和 -Inf 的值,结果不可信。
Parameters
----------
a : array_like
输入数据。
axis : int, optional
操作的轴。默认为扁平化的输入。
out : array, optional
如果提供,结果将插入到此数组中。应具有适当的形状和数据类型。
.. versionadded:: 1.22.0
keepdims : bool, optional
如果设置为 True,则减少的轴将保留在结果中作为尺寸为一的维度。使用此选项,结果将正确广播到数组。
.. versionadded:: 1.22.0
Returns
-------
index_array : ndarray
索引数组或单个索引值。
See Also
--------
argmax, nanargmin
Examples
--------
>>> a = np.array([[np.nan, 4], [2, 3]])
>>> np.argmax(a)
0
"""
a, mask = _replace_nan(a, -np.inf)
if mask is not None and mask.size:
mask = np.all(mask, axis=axis)
if np.any(mask):
raise ValueError("All-NaN slice encountered")
res = np.argmax(a, axis=axis, out=out, keepdims=keepdims)
return res
>>> np.nanargmax(a)
>>> np.nanargmax(a, axis=0)
>>> np.nanargmax(a, axis=1)
"""
a, mask = _replace_nan(a, -np.inf)
# 调用 _replace_nan 函数,将数组 a 中的 NaN 替换为 -∞,返回替换后的数组 a 和掩码 mask。
if mask is not None and mask.size:
# 如果掩码 mask 不为空且大小不为零:
mask = np.all(mask, axis=axis)
# 计算沿指定轴的掩码 mask 的所有元素是否都为 True。
if np.any(mask):
# 如果掩码 mask 中有任何 True 的值:
raise ValueError("All-NaN slice encountered")
# 抛出 ValueError 异常,指示遇到了全为 NaN 的切片。
res = np.argmax(a, axis=axis, out=out, keepdims=keepdims)
# 使用 np.argmax 函数计算数组 a 沿指定轴的最大值的索引,可以指定输出结果的存储位置及保持维度信息。
return res
# 返回计算得到的最大值索引结果。
# 定义一个分派函数 _nansum_dispatcher,接受多个参数并返回元组 (a, out)
def _nansum_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None,
initial=None, where=None):
return (a, out)
# 使用装饰器 array_function_dispatch 包装 nansum 函数,使其支持分派机制
@array_function_dispatch(_nansum_dispatcher)
def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue,
initial=np._NoValue, where=np._NoValue):
"""
Return the sum of array elements over a given axis treating Not a
Numbers (NaNs) as zero.
In NumPy versions <= 1.9.0 Nan is returned for slices that are all-NaN or
empty. In later versions zero is returned.
Parameters
----------
a : array_like
Array containing numbers whose sum is desired. If `a` is not an
array, a conversion is attempted.
axis : {int, tuple of int, None}, optional
Axis or axes along which the sum is computed. The default is to compute the
sum of the flattened array.
dtype : data-type, optional
The type of the returned array and of the accumulator in which the
elements are summed. By default, the dtype of `a` is used. An
exception is when `a` has an integer type with less precision than
the platform (u)intp. In that case, the default will be either
(u)int32 or (u)int64 depending on whether the platform is 32 or 64
bits. For inexact inputs, dtype must be inexact.
.. versionadded:: 1.8.0
out : ndarray, optional
Alternate output array in which to place the result. The default
is ``None``. If provided, it must have the same shape as the
expected output, but the type will be cast if necessary. See
:ref:`ufuncs-output-type` for more details. The casting of NaN to integer
can yield unexpected results.
.. versionadded:: 1.8.0
keepdims : bool, optional
If this is set to True, the axes which are reduced are left
in the result as dimensions with size one. With this option,
the result will broadcast correctly against the original `a`.
If the value is anything but the default, then
`keepdims` will be passed through to the `mean` or `sum` methods
of sub-classes of `ndarray`. If the sub-classes methods
does not implement `keepdims` any exceptions will be raised.
.. versionadded:: 1.8.0
initial : scalar, optional
Starting value for the sum. See `~numpy.ufunc.reduce` for details.
.. versionadded:: 1.22.0
where : array_like of bool, optional
Elements to include in the sum. See `~numpy.ufunc.reduce` for details.
.. versionadded:: 1.22.0
Returns
-------
nansum : ndarray.
A new array holding the result is returned unless `out` is
specified, in which it is returned. The result has the same
size as `a`, and the same shape as `a` if `axis` is not None
or `a` is a 1-d array.
See Also
--------
numpy.sum : Sum across array propagating NaNs.
isnan : Show which elements are NaN.
"""
"""
将数组中的 NaN 替换为指定的值后,计算数组的和,跳过 NaN 值。
Parameters
----------
a : array_like
输入的数组。
axis : None or int or tuple of ints, optional
沿着哪个轴计算和,默认为 None,在整个数组上执行操作。
dtype : dtype, optional
返回数组的数据类型,默认为 None,表示保持输入数组的数据类型。
out : ndarray, optional
结果数组,用于存储计算结果。
keepdims : bool, optional
如果为 True,则保持轴的维度。
initial : scalar, optional
起始值,用于累加计算的初始值。
where : array_like of bool, optional
只在 where 为 True 的位置执行操作。
Returns
-------
ndarray
返回数组中非 NaN 和 +/-inf 值的和。
Notes
-----
如果同时存在正负无穷大,那么结果将是 Not A Number (NaN)。
Examples
--------
>>> np.nansum(1)
1
>>> np.nansum([1])
1
>>> np.nansum([1, np.nan])
1.0
>>> a = np.array([[1, 1], [1, np.nan]])
>>> np.nansum(a)
3.0
>>> np.nansum(a, axis=0)
array([2., 1.])
>>> np.nansum([1, np.nan, np.inf])
inf
>>> np.nansum([1, np.nan, -np.inf])
-inf
>>> from numpy.testing import suppress_warnings
>>> with np.errstate(invalid="ignore"):
... np.nansum([1, np.nan, np.inf, -np.inf])
np.float64(nan)
"""
# 将数组中的 NaN 替换为指定的值(这里替换为 0)
a, mask = _replace_nan(a, 0)
# 计算数组的和,跳过 NaN 值,根据给定的参数进行计算
return np.sum(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims,
initial=initial, where=where)
# 创建一个分发函数 _nanprod_dispatcher,用于根据参数返回元组 (a, out)
def _nanprod_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None,
initial=None, where=None):
return (a, out)
# 使用 array_function_dispatch 装饰器将 nanprod 函数与 _nanprod_dispatcher 分发函数关联起来
@array_function_dispatch(_nanprod_dispatcher)
def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue,
initial=np._NoValue, where=np._NoValue):
"""
Return the product of array elements over a given axis treating Not a
Numbers (NaNs) as ones.
One is returned for slices that are all-NaN or empty.
.. versionadded:: 1.10.0
Parameters
----------
a : array_like
Array containing numbers whose product is desired. If `a` is not an
array, a conversion is attempted.
axis : {int, tuple of int, None}, optional
Axis or axes along which the product is computed. The default is to compute
the product of the flattened array.
dtype : data-type, optional
The type of the returned array and of the accumulator in which the
elements are summed. By default, the dtype of `a` is used. An
exception is when `a` has an integer type with less precision than
the platform (u)intp. In that case, the default will be either
(u)int32 or (u)int64 depending on whether the platform is 32 or 64
bits. For inexact inputs, dtype must be inexact.
out : ndarray, optional
Alternate output array in which to place the result. The default
is ``None``. If provided, it must have the same shape as the
expected output, but the type will be cast if necessary. See
:ref:`ufuncs-output-type` for more details. The casting of NaN to integer
can yield unexpected results.
keepdims : bool, optional
If True, the axes which are reduced are left in the result as
dimensions with size one. With this option, the result will
broadcast correctly against the original `arr`.
initial : scalar, optional
The starting value for this product. See `~numpy.ufunc.reduce`
for details.
.. versionadded:: 1.22.0
where : array_like of bool, optional
Elements to include in the product. See `~numpy.ufunc.reduce`
for details.
.. versionadded:: 1.22.0
Returns
-------
nanprod : ndarray
A new array holding the result is returned unless `out` is
specified, in which case it is returned.
See Also
--------
numpy.prod : Product across array propagating NaNs.
isnan : Show which elements are NaN.
Examples
--------
>>> np.nanprod(1)
1
>>> np.nanprod([1])
1
>>> np.nanprod([1, np.nan])
1.0
>>> a = np.array([[1, 2], [3, np.nan]])
>>> np.nanprod(a)
6.0
>>> np.nanprod(a, axis=0)
array([3., 2.])
"""
# 调用 _replace_nan 函数将数组 a 中的 NaN 替换为 1,并返回替换后的数组 a 和 NaN 掩码 mask
a, mask = _replace_nan(a, 1)
# 调用 np.prod 计算数组 a 沿指定轴的元素的乘积,返回结果
return np.prod(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims,
initial=initial, where=where)
# 创建一个分发函数 _nancumsum_dispatcher,用于根据参数返回适当的元组 (a, axis, dtype, out)
def _nancumsum_dispatcher(a, axis=None, dtype=None, out=None):
# 返回一个元组,包含变量 a 和 out 的值
return (a, out)
# 用于处理 nancumsum 函数分派的装饰器,使其可以根据不同输入类型调用相应的处理函数
@array_function_dispatch(_nancumsum_dispatcher)
def nancumsum(a, axis=None, dtype=None, out=None):
"""
返回沿给定轴计算的数组元素的累积和,将非数值 (NaN) 视为零处理。
当遇到 NaN 时,累积和不会改变,并且在开头的 NaN 被替换为零。
对于完全是 NaN 或空的切片,返回零。
.. versionadded:: 1.12.0
Parameters
----------
a : array_like
输入数组。
axis : int, optional
沿其计算累积和的轴。默认为 None,表示在扁平化的数组上计算累积和。
dtype : dtype, optional
返回数组和累加器的类型,用于对元素求和。如果未指定 `dtype`,默认为 `a` 的 dtype,除非 `a` 的整数 dtype 的精度低于默认平台整数的精度。在这种情况下,将使用默认平台整数。
out : ndarray, optional
替代输出数组,用于放置结果。它必须具有与预期输出相同的形状和缓冲区长度,但如果必要会进行类型转换。详见 :ref:`ufuncs-output-type` 获取更多详情。
Returns
-------
nancumsum : ndarray
返回一个新的数组,保存结果,除非指定了 `out`,此时返回 `out`。结果与 `a` 具有相同的大小,并且如果 `axis` 不是 None 或 `a` 是 1 维数组,则具有与 `a` 相同的形状。
See Also
--------
numpy.cumsum : 沿数组累积和并传播 NaN。
isnan : 显示哪些元素是 NaN。
Examples
--------
>>> np.nancumsum(1)
array([1])
>>> np.nancumsum([1])
array([1])
>>> np.nancumsum([1, np.nan])
array([1., 1.])
>>> a = np.array([[1, 2], [3, np.nan]])
>>> np.nancumsum(a)
array([1., 3., 6., 6.])
>>> np.nancumsum(a, axis=0)
array([[1., 2.],
[4., 2.]])
>>> np.nancumsum(a, axis=1)
array([[1., 3.],
[3., 3.]])
"""
# 调用 _replace_nan 函数,将数组中的 NaN 替换为零,并返回替换后的数组及掩码
a, mask = _replace_nan(a, 0)
# 调用 numpy 库中的 cumsum 函数,计算累积和,传入相应的参数并返回结果
return np.cumsum(a, axis=axis, dtype=dtype, out=out)
# 用于处理 nancumprod 函数分派的装饰器,使其可以根据不同输入类型调用相应的处理函数
def _nancumprod_dispatcher(a, axis=None, dtype=None, out=None):
return (a, out)
@array_function_dispatch(_nancumprod_dispatcher)
def nancumprod(a, axis=None, dtype=None, out=None):
"""
返回沿给定轴计算的数组元素的累积乘积,将非数值 (NaN) 视为一处理。
当遇到 NaN 时,累积乘积不会改变,并且在开头的 NaN 被替换为一。
对于完全是 NaN 或空的切片,返回一。
.. versionadded:: 1.12.0
Parameters
----------
a : array_like
输入数组。
axis : int, optional
沿其计算累积乘积的轴。默认情况下对输入进行扁平化处理。
"""
# 函数体尚未完全注释,需要进一步完成
pass
这样注释后的代码段将详细解释每一行代码的作用和功能,符合注释的要求。
# 参数 dtype:返回数组的数据类型,以及累积乘积的累加器的数据类型。
# 如果未指定 dtype,则默认为 a 的数据类型,除非 a 的整数类型精度小于默认平台整数类型的精度。
# 在这种情况下,将使用默认平台整数类型。
dtype : dtype, optional
# 参数 out:替代的输出数组,用于存放结果。它必须具有与预期输出相同的形状和缓冲区长度,
# 但如果需要会进行结果值的类型转换。
out : ndarray, optional
# 返回结果
# 返回一个新的数组,其中包含结果,除非指定了 out 参数,在这种情况下将返回 out。
Returns
-------
nancumprod : ndarray
# 参见
# numpy.cumprod : 在数组上执行累积乘积,处理 NaN 值。
# isnan : 显示哪些元素是 NaN。
See Also
--------
numpy.cumprod : Cumulative product across array propagating NaNs.
isnan : Show which elements are NaN.
# 示例
# >>> np.nancumprod(1)
# array([1])
# >>> np.nancumprod([1])
# array([1])
# >>> np.nancumprod([1, np.nan])
# array([1., 1.])
# >>> a = np.array([[1, 2], [3, np.nan]])
# >>> np.nancumprod(a)
# array([1., 2., 6., 6.])
# >>> np.nancumprod(a, axis=0)
# array([[1., 2.],
# [3., 2.]])
# >>> np.nancumprod(a, axis=1)
# array([[1., 2.],
# [3., 3.]])
Examples
--------
# 替换 NaN 值为指定的值(这里是 1),并返回替换后的数组以及掩码数组。
a, mask = _replace_nan(a, 1)
# 调用 numpy 库的累积乘积函数 cumprod,对数组 a 按指定轴进行操作,
# 可选地指定数据类型 dtype 和输出数组 out。
return np.cumprod(a, axis=axis, dtype=dtype, out=out)
# 定义一个调度函数 `_nanmean_dispatcher`,用于分派参数 `a` 和 `out`
def _nanmean_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None,
*, where=None):
# 返回参数 `a` 和 `out` 的元组
return (a, out)
# 使用 `array_function_dispatch` 装饰器来声明 `nanmean` 函数
@array_function_dispatch(_nanmean_dispatcher)
def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue,
*, where=np._NoValue):
"""
Compute the arithmetic mean along the specified axis, ignoring NaNs.
Returns the average of the array elements. The average is taken over
the flattened array by default, otherwise over the specified axis.
`float64` intermediate and return values are used for integer inputs.
For all-NaN slices, NaN is returned and a `RuntimeWarning` is raised.
.. versionadded:: 1.8.0
Parameters
----------
a : array_like
Array containing numbers whose mean is desired. If `a` is not an
array, a conversion is attempted.
axis : {int, tuple of int, None}, optional
Axis or axes along which the means are computed. The default is to compute
the mean of the flattened array.
dtype : data-type, optional
Type to use in computing the mean. For integer inputs, the default
is `float64`; for inexact inputs, it is the same as the input
dtype.
out : ndarray, optional
Alternate output array in which to place the result. The default
is ``None``; if provided, it must have the same shape as the
expected output, but the type will be cast if necessary.
See :ref:`ufuncs-output-type` for more details.
keepdims : bool, optional
If this is set to True, the axes which are reduced are left
in the result as dimensions with size one. With this option,
the result will broadcast correctly against the original `a`.
If the value is anything but the default, then
`keepdims` will be passed through to the `mean` or `sum` methods
of sub-classes of `ndarray`. If the sub-classes methods
does not implement `keepdims` any exceptions will be raised.
where : array_like of bool, optional
Elements to include in the mean. See `~numpy.ufunc.reduce` for details.
.. versionadded:: 1.22.0
Returns
-------
m : ndarray, see dtype parameter above
If `out=None`, returns a new array containing the mean values,
otherwise a reference to the output array is returned. Nan is
returned for slices that contain only NaNs.
See Also
--------
average : Weighted average
mean : Arithmetic mean taken while not ignoring NaNs
var, nanvar
Notes
-----
The arithmetic mean is the sum of the non-NaN elements along the axis
divided by the number of non-NaN elements.
Note that for floating-point input, the mean is computed using the same
precision the input has. Depending on the input data, this can cause
the results to be inaccurate, especially for `float32`. Specifying a
"""
arr, mask = _replace_nan(a, 0)
# 调用 _replace_nan 函数,用 0 替换数组 a 中的 NaN 值,返回替换后的数组 arr 和掩码 mask
if mask is None:
# 如果掩码为空,则表示数组中没有 NaN 值
return np.mean(arr, axis=axis, dtype=dtype, out=out, keepdims=keepdims,
where=where)
# 返回数组 arr 的均值,根据指定的轴和数据类型,可选地输出到 out 数组,保持维度 keepdims,条件 where 生效
if dtype is not None:
dtype = np.dtype(dtype)
# 如果指定了数据类型 dtype,则将其转换为 numpy 的 dtype 对象
if dtype is not None and not issubclass(dtype.type, np.inexact):
# 如果指定了数据类型,并且该类型不是浮点数类型,则抛出类型错误
raise TypeError("If a is inexact, then dtype must be inexact")
if out is not None and not issubclass(out.dtype.type, np.inexact):
# 如果指定了输出数组 out,并且其类型不是浮点数类型,则抛出类型错误
raise TypeError("If a is inexact, then out must be inexact")
cnt = np.sum(~mask, axis=axis, dtype=np.intp, keepdims=keepdims,
where=where)
# 统计掩码中非 NaN 值的数量,按指定轴求和,返回整数类型的结果 cnt
tot = np.sum(arr, axis=axis, dtype=dtype, out=out, keepdims=keepdims,
where=where)
# 计算数组 arr 沿指定轴的总和,根据指定的数据类型、输出数组和条件 where 进行计算,返回总和 tot
avg = _divide_by_count(tot, cnt, out=out)
# 调用 _divide_by_count 函数,计算总和 tot 除以非 NaN 值的数量 cnt 的平均值,结果保存在 avg 中
isbad = (cnt == 0)
# 创建布尔数组 isbad,标记在计算平均值时出现的空切片(即 cnt 为零的情况)
if isbad.any():
# 如果存在空切片的情况
warnings.warn("Mean of empty slice", RuntimeWarning, stacklevel=2)
# 发出运行时警告,指示出现了空切片的平均值
# NaN 是唯一可能的无效值,因此不需要进一步处理坏结果。
return avg
# 返回计算得到的平均值 avg
# 私有函数,用于一维数组。计算忽略 NaN 值的中位数。
# 查看 nanmedian 函数的参数用法
def _nanmedian1d(arr1d, overwrite_input=False):
# 调用 _remove_nan_1d 函数,移除 arr1d 中的 NaN 值,返回处理后的数组和覆盖输入的标志
arr1d_parsed, _, overwrite_input = _remove_nan_1d(
arr1d, overwrite_input=overwrite_input,
)
# 如果 arr1d_parsed 为空数组
if arr1d_parsed.size == 0:
# 确保返回一个类似 NaN 的标量,类型和单位与输入的 `timedelta64` 和 `complexfloating` 相匹配
return arr1d[-1]
# 返回 arr1d_parsed 的中位数,可以选择覆盖输入的标志
return np.median(arr1d_parsed, overwrite_input=overwrite_input)
# 私有函数,不支持扩展轴或保持维度。
# 这些方法使用 _ureduce 扩展到该函数中
# 查看 nanmedian 函数的参数用法
def _nanmedian(a, axis=None, out=None, overwrite_input=False):
# 如果 axis 为 None 或数组是一维的
if axis is None or a.ndim == 1:
# 将数组展平为一维
part = a.ravel()
# 如果没有提供输出数组
if out is None:
# 返回 _nanmedian1d 处理后的结果
return _nanmedian1d(part, overwrite_input)
else:
# 将 _nanmedian1d 处理后的结果复制到输出数组中
out[...] = _nanmedian1d(part, overwrite_input)
return out
else:
# 对于较小的中位数,使用排序 + 索引,仍然比 apply_along_axis 更快
# 在包含少量 NaN 的 shuffled (50, 50, x) 数据上进行基准测试
if a.shape[axis] < 600:
# 返回 _nanmedian_small 处理后的结果
return _nanmedian_small(a, axis, out, overwrite_input)
# 沿指定轴应用 _nanmedian1d 函数,返回结果
result = np.apply_along_axis(_nanmedian1d, axis, a, overwrite_input)
# 如果提供了输出数组,则将结果复制到输出数组中
if out is not None:
out[...] = result
return result
# 私有函数,用于较小的中位数,排序 + 索引中位数,
# 对于多个维度的小中位数由于 apply_along_axis 的高开销更快
# 查看 nanmedian 函数的参数用法
def _nanmedian_small(a, axis=None, out=None, overwrite_input=False):
# 创建一个掩码数组,将 a 中的 NaN 值掩盖起来
a = np.ma.masked_array(a, np.isnan(a))
# 计算掩码数组的中位数,可以指定轴和是否覆盖输入
m = np.ma.median(a, axis=axis, overwrite_input=overwrite_input)
# 如果遇到全为 NaN 的切片,则发出警告
for i in range(np.count_nonzero(m.mask.ravel())):
warnings.warn("All-NaN slice encountered", RuntimeWarning,
stacklevel=5)
# 如果 m 的数据类型是时间间隔或复数浮点数,填充值为 NaT 或 NaN
fill_value = np.timedelta64("NaT") if m.dtype.kind == "m" else np.nan
# 如果提供了输出数组,则将填充后的 m 复制到输出数组中
if out is not None:
out[...] = m.filled(fill_value)
return out
# 否则返回填充后的 m
return m.filled(fill_value)
# 分发器函数,将参数传递给 _nanmedian 函数
def _nanmedian_dispatcher(
a, axis=None, out=None, overwrite_input=None, keepdims=None):
return (a, out)
# 使用 array_function_dispatch 装饰器将 _nanmedian_dispatcher 函数绑定到 nanmedian 函数上
@array_function_dispatch(_nanmedian_dispatcher)
def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=np._NoValue):
"""
计算沿指定轴的中位数,忽略 NaN 值。
返回数组元素的中位数。
.. versionadded:: 1.9.0
参数
----------
a : array_like
输入数组或可以转换为数组的对象。
axis : {int, sequence of int, None}, optional
计算中位数的轴或轴组。默认是沿数组的展平版本计算中位数。
从版本 1.9.0 开始支持一系列轴。
"""
# 将输入参数 `a` 转换为一个 NumPy 数组,不论它是什么类型的数组
a = np.asanyarray(a)
# 如果数组 `a` 的大小为 0,即为空数组,则返回空数组的均值
# 这里使用了 `np.nanmean` 函数来处理空数组的情况,保留指定的轴和维度
if a.size == 0:
return np.nanmean(a, axis, out=out, keepdims=keepdims)
# 调用底层的 `_ureduce` 函数,使用 `_nanmedian` 函数进行中位数计算
# 可以指定是否保持降维后的维度,以及输出结果到指定的数组 `out`
return fnb._ureduce(a, func=_nanmedian, keepdims=keepdims,
axis=axis, out=out,
overwrite_input=overwrite_input)
# 定义一个分派函数,用于_nanpercentile_dispatcher,接收参数a, q, axis, out, overwrite_input, method, keepdims, *, weights, interpolation,并返回前三个参数和weights
def _nanpercentile_dispatcher(
a, q, axis=None, out=None, overwrite_input=None,
method=None, keepdims=None, *, weights=None, interpolation=None):
return (a, q, out, weights)
# 使用array_function_dispatch装饰器,将_nanpercentile_dispatcher作为分派函数,用于nanpercentile函数
@array_function_dispatch(_nanpercentile_dispatcher)
# 定义nanpercentile函数,计算沿指定轴的数据的q分位数,忽略NaN值
def nanpercentile(
a,
q,
axis=None,
out=None,
overwrite_input=False,
method="linear",
keepdims=np._NoValue,
*,
weights=None,
interpolation=None,
):
"""
Compute the qth percentile of the data along the specified axis,
while ignoring nan values.
Returns the qth percentile(s) of the array elements.
.. versionadded:: 1.9.0
Parameters
----------
a : array_like
Input array or object that can be converted to an array, containing
nan values to be ignored.
q : array_like of float
Percentile or sequence of percentiles to compute, which must be
between 0 and 100 inclusive.
axis : {int, tuple of int, None}, optional
Axis or axes along which the percentiles are computed. The default
is to compute the percentile(s) along a flattened version of the
array.
out : ndarray, optional
Alternative output array in which to place the result. It must have
the same shape and buffer length as the expected output, but the
type (of the output) will be cast if necessary.
overwrite_input : bool, optional
If True, then allow the input array `a` to be modified by
intermediate calculations, to save memory. In this case, the
contents of the input `a` after this function completes is
undefined.
method : str, optional
This parameter specifies the method to use for estimating the
percentile. There are many different methods, some unique to NumPy.
See the notes for explanation. The options sorted by their R type
as summarized in the H&F paper [1]_ are:
1. 'inverted_cdf'
2. 'averaged_inverted_cdf'
3. 'closest_observation'
4. 'interpolated_inverted_cdf'
5. 'hazen'
6. 'weibull'
7. 'linear' (default)
8. 'median_unbiased'
9. 'normal_unbiased'
The first three methods are discontinuous. NumPy further defines the
following discontinuous variations of the default 'linear' (7.) option:
* 'lower'
* 'higher',
* 'midpoint'
* 'nearest'
.. versionchanged:: 1.22.0
This argument was previously called "interpolation" and only
offered the "linear" default and last four options.
"""
keepdims : bool, optional
# 如果设置为True,则对被减少的轴留在结果中作为大小为一的维度。此选项可以使结果正确地与原数组“a”广播。
# 如果这个值不是默认值,它将被传递(在空数组的特殊情况下)到底层数组的“mean”函数中。如果数组是一个子类,并且“mean”没有kwarg“keepdims”,则会引发RuntimeError。
weights : array_like, optional
# 与“a”中的值相关联的权重数组。每个“a”中的值根据其关联的权重对百分位数做出贡献。权重数组可以是1-D(其长度必须是沿着给定轴的“a”的大小)或与“a”的形状相同。如果“weights=None”,则假定“a”中的所有数据的权重都等于一。只有“method="inverted_cdf"”支持权重。
.. versionadded:: 2.0.0
interpolation : str, optional
# 方法关键字参数的弃用名称。
.. deprecated:: 1.22.0
Returns
-------
percentile : scalar or ndarray
# 如果“q”是单个百分位数且“axis=None”,则结果是一个标量。如果给定多个百分位数,结果的第一个轴对应于百分位数。其他轴是在“a”减少后保留的轴。如果输入包含小于“float64”的整数或浮点数,则输出数据类型是“float64”。否则,输出数据类型与输入的类型相同。如果指定了“out”,则返回该数组。
See Also
--------
nanmean
nanmedian : 等同于“nanpercentile(..., 50)”
percentile, median, mean
nanquantile : 等同于nanpercentile,只是q的范围是[0, 1]。
Notes
-----
# 用百分比“q”的“numpy.nanpercentile”的行为是使用参数“q/100”的“numpy.quantile”的行为(忽略nan值)。更多信息,请参见“numpy.quantile”。
Examples
--------
# Examples的一系列用法示例,解释了如何使用numpy.nanpercentile
References
----------
"""
Calculate the weighted quantiles of an array with optional weights.
.. [1] R. J. Hyndman and Y. Fan,
"Sample quantiles in statistical packages,"
The American Statistician, 50(4), pp. 361-365, 1996
"""
# 如果指定了插值方法,检查并调整为正确的方法
if interpolation is not None:
method = fnb._check_interpolation_as_method(
method, interpolation, "nanpercentile")
# 将输入数组转换为最通用的数组表示形式
a = np.asanyarray(a)
# 如果数组包含复数,则抛出类型错误
if a.dtype.kind == "c":
raise TypeError("a must be an array of real numbers")
# 将百分位数转换为小数形式
q = np.true_divide(q, a.dtype.type(100) if a.dtype.kind == "f" else 100)
# 恢复由ufunc执行的任何衰减(参见gh-13105)
q = np.asanyarray(q)
# 检查百分位数的有效性,必须在[0, 100]范围内
if not fnb._quantile_is_valid(q):
raise ValueError("Percentiles must be in the range [0, 100]")
# 如果指定了权重
if weights is not None:
# 如果方法不是'inverted_cdf',则不支持权重,抛出错误
if method != "inverted_cdf":
msg = ("Only method 'inverted_cdf' supports weights. "
f"Got: {method}.")
raise ValueError(msg)
# 如果指定了轴参数,标准化轴参数
if axis is not None:
axis = _nx.normalize_axis_tuple(axis, a.ndim, argname="axis")
# 检查权重的有效性
weights = _weights_are_valid(weights=weights, a=a, axis=axis)
# 权重必须是非负数
if np.any(weights < 0):
raise ValueError("Weights must be non-negative.")
# 调用内部函数计算未检查的NaN百分位数
return _nanquantile_unchecked(
a, q, axis, out, overwrite_input, method, keepdims, weights)
# 定义一个调度器函数,用于 nanquantile 函数的分派
def _nanquantile_dispatcher(a, q, axis=None, out=None, overwrite_input=None,
method=None, keepdims=None, *, weights=None,
interpolation=None):
# 返回传入的参数元组,用于 nanquantile 函数的调用
return (a, q, out, weights)
# 通过 array_function_dispatch 装饰器声明 nanquantile 函数
@array_function_dispatch(_nanquantile_dispatcher)
def nanquantile(
a,
q,
axis=None,
out=None,
overwrite_input=False,
method="linear",
keepdims=np._NoValue,
*,
weights=None,
interpolation=None,
):
"""
Compute the qth quantile of the data along the specified axis,
while ignoring nan values.
Returns the qth quantile(s) of the array elements.
.. versionadded:: 1.15.0
Parameters
----------
a : array_like
Input array or object that can be converted to an array, containing
nan values to be ignored
q : array_like of float
Probability or sequence of probabilities for the quantiles to compute.
Values must be between 0 and 1 inclusive.
axis : {int, tuple of int, None}, optional
Axis or axes along which the quantiles are computed. The
default is to compute the quantile(s) along a flattened
version of the array.
out : ndarray, optional
Alternative output array in which to place the result. It must
have the same shape and buffer length as the expected output,
but the type (of the output) will be cast if necessary.
overwrite_input : bool, optional
If True, then allow the input array `a` to be modified by intermediate
calculations, to save memory. In this case, the contents of the input
`a` after this function completes is undefined.
method : str, optional
This parameter specifies the method to use for estimating the
quantile. There are many different methods, some unique to NumPy.
See the notes for explanation. The options sorted by their R type
as summarized in the H&F paper [1]_ are:
1. 'inverted_cdf'
2. 'averaged_inverted_cdf'
3. 'closest_observation'
4. 'interpolated_inverted_cdf'
5. 'hazen'
6. 'weibull'
7. 'linear' (default)
8. 'median_unbiased'
9. 'normal_unbiased'
The first three methods are discontinuous. NumPy further defines the
following discontinuous variations of the default 'linear' (7.) option:
* 'lower'
* 'higher',
* 'midpoint'
* 'nearest'
.. versionchanged:: 1.22.0
This argument was previously called "interpolation" and only
offered the "linear" default and last four options.
"""
# nanquantile 函数的主体部分,在指定轴上计算忽略 NaN 值的 q 分位数
# 返回计算得到的分位数或分位数数组
pass
keepdims : bool, optional
如果设置为 True,则减少的轴会作为大小为一的维度保留在结果中。使用此选项,结果将正确地与原始数组 `a` 进行广播。
如果设为非默认值,在特殊情况下(空数组),将传递给底层数组的 `mean` 函数。如果数组是子类且 `mean` 函数没有 `keepdims` 关键字参数,将引发 RuntimeError。
weights : array_like, optional
与数组 `a` 中的值相关联的权重数组。`a` 中的每个值根据其相关的权重贡献于分位数的计算。权重数组可以是 1-D 数组(此时其长度必须与沿给定轴的 `a` 的大小相同),或者与 `a` 具有相同形状。如果 `weights=None`,则假定 `a` 中的所有数据权重均为1。
仅 `method="inverted_cdf"` 支持权重。
.. versionadded:: 2.0.0
interpolation : str, optional
方法关键字参数的过时名称。
.. deprecated:: 1.22.0
Returns
-------
quantile : scalar or ndarray
如果 `q` 是单个概率且 `axis=None`,则结果是标量。如果给定多个概率水平,结果的第一轴对应于分位数。其他轴是减少 `a` 后保留的轴。如果输入包含小于 ``float64`` 的整数或浮点数,则输出数据类型为 ``float64``。否则,输出数据类型与输入相同。如果指定了 `out`,则返回该数组。
See Also
--------
quantile
nanmean, nanmedian
nanmedian : 相当于 ``nanquantile(..., 0.5)``
nanpercentile : 与 nanquantile 相同,但 q 在范围 [0, 100] 内。
Notes
-----
`numpy.nanquantile` 的行为与 `numpy.quantile` 相同(忽略 NaN 值)。
欲了解更多信息,请参阅 `numpy.quantile`。
Examples
--------
>>> a = np.array([[10., 7., 4.], [3., 2., 1.]])
>>> a[0][1] = np.nan
>>> a
array([[10., nan, 4.],
[ 3., 2., 1.]])
>>> np.quantile(a, 0.5)
np.float64(nan)
>>> np.nanquantile(a, 0.5)
3.0
>>> np.nanquantile(a, 0.5, axis=0)
array([6.5, 2. , 2.5])
>>> np.nanquantile(a, 0.5, axis=1, keepdims=True)
array([[7.],
[2.]])
>>> m = np.nanquantile(a, 0.5, axis=0)
>>> out = np.zeros_like(m)
>>> np.nanquantile(a, 0.5, axis=0, out=out)
array([6.5, 2. , 2.5])
>>> m
array([6.5, 2. , 2.5])
>>> b = a.copy()
>>> np.nanquantile(b, 0.5, axis=1, overwrite_input=True)
array([7., 2.])
>>> assert not np.all(a==b)
References
----------
"""
转换引用文献信息,指向文献中描述的相关内容
"""
# 如果指定了插值方法,则检查并设置为对应的插值方法
if interpolation is not None:
method = fnb._check_interpolation_as_method(
method, interpolation, "nanquantile")
# 将输入数组转换为任意数组,确保可以处理各种类型的输入
a = np.asanyarray(a)
# 如果数组的数据类型是复数,则引发类型错误
if a.dtype.kind == "c":
raise TypeError("a must be an array of real numbers")
# 如果 q 是 Python 的整数或浮点数,并且数组 a 的数据类型是浮点数,则使用数组的数据类型
if isinstance(q, (int, float)) and a.dtype.kind == "f":
q = np.asanyarray(q, dtype=a.dtype)
else:
q = np.asanyarray(q)
# 检查分位数 q 是否有效,必须在 [0, 1] 的范围内
if not fnb._quantile_is_valid(q):
raise ValueError("Quantiles must be in the range [0, 1]")
# 如果指定了 weights,则进行相应的检查和处理
if weights is not None:
# 如果方法不是 "inverted_cdf",则引发错误
if method != "inverted_cdf":
msg = ("Only method 'inverted_cdf' supports weights. "
f"Got: {method}.")
raise ValueError(msg)
# 如果指定了轴,则规范化轴元组
if axis is not None:
axis = _nx.normalize_axis_tuple(axis, a.ndim, argname="axis")
# 检查权重的有效性,并确保非负性
weights = _weights_are_valid(weights=weights, a=a, axis=axis)
if np.any(weights < 0):
raise ValueError("Weights must be non-negative.")
# 调用未经检查的 _nanquantile_unchecked 函数,计算给定数据的分位数
return _nanquantile_unchecked(
a, q, axis, out, overwrite_input, method, keepdims, weights)
def _nanquantile_unchecked(
a,
q,
axis=None,
out=None,
overwrite_input=False,
method="linear",
keepdims=np._NoValue,
weights=None,
):
"""Assumes that q is in [0, 1], and is an ndarray"""
# apply_along_axis in _nanpercentile doesn't handle empty arrays well,
# so deal them upfront
# 如果数组 a 是空的,则返回沿着指定轴的 NaN 均值
if a.size == 0:
return np.nanmean(a, axis, out=out, keepdims=keepdims)
# 否则调用 _ureduce 函数处理 a 数组,返回计算的结果
return fnb._ureduce(a,
func=_nanquantile_ureduce_func,
q=q,
weights=weights,
keepdims=keepdims,
axis=axis,
out=out,
overwrite_input=overwrite_input,
method=method)
def _nanquantile_ureduce_func(
a: np.array,
q: np.array,
weights: np.array,
axis: int = None,
out=None,
overwrite_input: bool = False,
method="linear",
):
"""
Private function that doesn't support extended axis or keepdims.
These methods are extended to this function using _ureduce
See nanpercentile for parameter usage
"""
# 如果 axis 为 None 或者数组 a 的维度为 1,则将数组展平处理
if axis is None or a.ndim == 1:
part = a.ravel()
wgt = None if weights is None else weights.ravel()
# 调用 _nanquantile_1d 函数计算一维情况下的分位数
result = _nanquantile_1d(part, q, overwrite_input, method, weights=wgt)
else:
# 否则,尝试在这里填充 `out`
if weights is None:
# 对数组 a 沿着指定轴应用 _nanquantile_1d 函数
result = np.apply_along_axis(_nanquantile_1d, axis, a, q,
overwrite_input, method, weights)
# apply_along_axis 填充了折叠轴的结果。
# 将这些轴移到开头以匹配百分位数的约定。
if q.ndim != 0:
from_ax = [axis + i for i in range(q.ndim)]
result = np.moveaxis(result, from_ax, list(range(q.ndim)))
else:
# 我们需要在两个数组 a 和 weights 上应用 along axis
# 为简单起见,将操作轴移到末尾:
a = np.moveaxis(a, axis, -1)
if weights is not None:
weights = np.moveaxis(weights, axis, -1)
if out is not None:
result = out
else:
# weights 限制在 `inverted_cdf` 中,因此结果的数据类型与 `a` 相同:
result = np.empty_like(a, shape=q.shape + a.shape[:-1])
for ii in np.ndindex(a.shape[:-1]):
# 对每个索引 ii,调用 _nanquantile_1d 函数计算分位数
result[(...,) + ii] = _nanquantile_1d(
a[ii], q, weights=weights[ii],
overwrite_input=overwrite_input, method=method,
)
# 这条路径已经处理了 `out` ...
return result
# 如果指定了 `out`,则将结果赋给 `out`
if out is not None:
out[...] = result
return result
def _nanquantile_1d(
arr1d, # 第一个参数:一维数组,是要插值的数据点的坐标
q, # 第二个参数:标量或一维数组,是要计算插值的位置或位置序列
overwrite_input=False, # 是否覆盖输入数组,设置为 False 表示不覆盖,默认为 False
method="linear", # 插值方法,默认为线性插值
weights=None, # 可选参数:用于加权的数组,可以控制每个点的插值权重,默认为 None
):
"""
Private function for rank 1 arrays. Compute quantile ignoring NaNs.
See nanpercentile for parameter usage
"""
# TODO: What to do when arr1d = [1, np.nan] and weights = [0, 1]?
# 调用 _remove_nan_1d 函数处理 arr1d 和 weights,移除 NaN 值并返回处理后的数组和标志 overwrite_input
arr1d, weights, overwrite_input = _remove_nan_1d(arr1d,
second_arr1d=weights, overwrite_input=overwrite_input)
# 如果 arr1d 的大小为 0,则返回一个与 q 形状相同的全 NaN 数组的标量值
if arr1d.size == 0:
return np.full(q.shape, np.nan, dtype=arr1d.dtype)[()]
# 调用 fnb._quantile_unchecked 函数计算未检查的分位数
return fnb._quantile_unchecked(
arr1d,
q,
overwrite_input=overwrite_input,
method=method,
weights=weights,
)
def _nanvar_dispatcher(a, axis=None, dtype=None, out=None, ddof=None,
keepdims=None, *, where=None, mean=None,
correction=None):
# 返回包含参数 a 和 out 的元组
return (a, out)
@array_function_dispatch(_nanvar_dispatcher)
def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue,
*, where=np._NoValue, mean=np._NoValue, correction=np._NoValue):
"""
Compute the variance along the specified axis, while ignoring NaNs.
Returns the variance of the array elements, a measure of the spread of
a distribution. The variance is computed for the flattened array by
default, otherwise over the specified axis.
For all-NaN slices or slices with zero degrees of freedom, NaN is
returned and a `RuntimeWarning` is raised.
.. versionadded:: 1.8.0
Parameters
----------
a : array_like
Array containing numbers whose variance is desired. If `a` is not an
array, a conversion is attempted.
axis : {int, tuple of int, None}, optional
Axis or axes along which the variance is computed. The default is to compute
the variance of the flattened array.
dtype : data-type, optional
Type to use in computing the variance. For arrays of integer type
the default is `float64`; for arrays of float types it is the same as
the array type.
out : ndarray, optional
Alternate output array in which to place the result. It must have
the same shape as the expected output, but the type is cast if
necessary.
ddof : {int, float}, optional
"Delta Degrees of Freedom": the divisor used in the calculation is
``N - ddof``, where ``N`` represents the number of non-NaN
elements. By default `ddof` is zero.
keepdims : bool, optional
If this is set to True, the axes which are reduced are left
in the result as dimensions with size one. With this option,
the result will broadcast correctly against the original `a`.
where : array_like of bool, optional
Elements to include in the variance. See `~numpy.ufunc.reduce` for
details.
.. versionadded:: 1.22.0
"""
mean : array_like, optional
# 可选参数,用于提供均值以避免重新计算。均值应当具有 `keepdims=True` 计算后的形状。
# 在调用此函数时,用于计算均值的轴应与此 var 函数使用的轴相同。
.. versionadded:: 1.26.0
# 引入版本:1.26.0
correction : {int, float}, optional
# 可选参数,与 Array API 兼容的 `ddof` 参数名称。这两者只能同时提供其中一个。
.. versionadded:: 2.0.0
# 引入版本:2.0.0
Returns
-------
variance : ndarray, see dtype parameter above
# 如果 `out` 为 None,则返回一个包含方差的新数组;否则返回对输出数组的引用。
# 如果 `ddof` 大于等于切片中非 NaN 元素的数量或切片仅包含 NaN,则该切片的结果为 NaN。
See Also
--------
std : 标准差
mean : 平均值
var : 不忽略 NaN 的方差
nanstd, nanmean
:ref:`ufuncs-output-type`
# 参见:ufunc 的输出类型
Notes
-----
# 方差是平均平方偏差,即 `var = mean(abs(x - x.mean())**2)`。
# 均值通常计算为 `x.sum() / N`,其中 `N = len(x)`。
# 然而,如果指定了 `ddof`,则使用除数 `N - ddof`。
# 在标准统计实践中,`ddof=1` 提供了一个对无限总体方差的无偏估计。
# `ddof=0` 提供了一个正态分布变量方差的最大似然估计。
# 注意,对于复数,先取绝对值再平方,因此结果总是实数且非负。
# 对于浮点输入,方差使用与输入相同的精度计算。根据输入数据,这可能导致结果不准确,特别是对于 `float32`(参见下面的示例)。
# 使用 `dtype` 关键字指定更高精度的累加器可以缓解此问题。
# 为了使此函数在 ndarray 的子类上正常工作,它们必须定义带有 `keepdims` 关键字的 `sum`。
Examples
--------
>>> a = np.array([[1, np.nan], [3, 4]])
>>> np.nanvar(a)
1.5555555555555554
>>> np.nanvar(a, axis=0)
array([1., 0.])
>>> np.nanvar(a, axis=1)
array([0., 0.25]) # 可能会有所不同
```
# 如果给定了修正值 correction,则检查是否同时给定了 ddof,若是则引发 ValueError 异常
if correction != np._NoValue:
if ddof != 0:
raise ValueError(
"ddof and correction can't be provided simultaneously."
)
else:
ddof = correction
# 计算均值
if type(arr) is np.matrix:
_keepdims = np._NoValue # 对于 np.matrix 类型,不保持维度信息
else:
_keepdims = True # 对于其他类型的数组,保持维度信息
# 计算非缺失值的数量
cnt = np.sum(~mask, axis=axis, dtype=np.intp, keepdims=_keepdims,
where=where)
# 如果指定了均值 mean,则使用指定的均值;否则计算数组的均值
if mean is not np._NoValue:
avg = mean
else:
# 对于数组类型为 np.matrix,需要特殊处理以保持与旧版本的兼容性
avg = np.sum(arr, axis=axis, dtype=dtype,
keepdims=_keepdims, where=where)
avg = _divide_by_count(avg, cnt) # 按计数值除以均值
# 计算与均值的平方差
np.subtract(arr, avg, out=arr, casting='unsafe', where=where) # 计算差值并存储到 arr 中
arr = _copyto(arr, 0, mask) # 将 arr 中的缺失值用 0 填充
if issubclass(arr.dtype.type, np.complexfloating):
sqr = np.multiply(arr, arr.conj(), out=arr, where=where).real # 对复数数组进行乘法运算并取实部
else:
sqr = np.multiply(arr, arr, out=arr, where=where) # 对非复数数组进行乘法运算
# 计算方差
var = np.sum(sqr, axis=axis, dtype=dtype, out=out, keepdims=keepdims,
where=where)
# 防范对减少的对象数组
try:
var_ndim = var.ndim # 尝试获取 var 的维度信息
except AttributeError:
var_ndim = np.ndim(var) # 对于没有 ndim 属性的对象,使用 np.ndim 获取维度信息
if var_ndim < cnt.ndim:
# ndarray 的子类可能会忽略 keepdims 参数,这里进行检查和压缩
cnt = cnt.squeeze(axis)
dof = cnt - ddof # 自由度计算
var = _divide_by_count(var, dof) # 按自由度数目除以方差
isbad = (dof <= 0) # 检查自由度是否小于等于 0
if np.any(isbad):
warnings.warn("Degrees of freedom <= 0 for slice.", RuntimeWarning,
stacklevel=2)
# NaN、inf 或负数都是可能的无效数值,用 NaN 明确替换它们
var = _copyto(var, np.nan, isbad) # 将无效值用 NaN 替换
return var
# 分发器函数,接受多个参数并返回前两个参数
def _nanstd_dispatcher(a, axis=None, dtype=None, out=None, ddof=None,
keepdims=None, *, where=None, mean=None,
correction=None):
return (a, out)
# 定义 nanstd 函数,并使用分发器进行装饰
@array_function_dispatch(_nanstd_dispatcher)
def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue,
*, where=np._NoValue, mean=np._NoValue, correction=np._NoValue):
"""
Compute the standard deviation along the specified axis, while
ignoring NaNs.
Returns the standard deviation, a measure of the spread of a
distribution, of the non-NaN array elements. The standard deviation is
computed for the flattened array by default, otherwise over the
specified axis.
For all-NaN slices or slices with zero degrees of freedom, NaN is
returned and a `RuntimeWarning` is raised.
.. versionadded:: 1.8.0
Parameters
----------
a : array_like
Calculate the standard deviation of the non-NaN values.
axis : {int, tuple of int, None}, optional
Axis or axes along which the standard deviation is computed. The default is
to compute the standard deviation of the flattened array.
dtype : dtype, optional
Type to use in computing the standard deviation. For arrays of
integer type the default is float64, for arrays of float types it
is the same as the array type.
out : ndarray, optional
Alternative output array in which to place the result. It must have
the same shape as the expected output but the type (of the
calculated values) will be cast if necessary.
ddof : {int, float}, optional
Means Delta Degrees of Freedom. The divisor used in calculations
is ``N - ddof``, where ``N`` represents the number of non-NaN
elements. By default `ddof` is zero.
keepdims : bool, optional
If this is set to True, the axes which are reduced are left
in the result as dimensions with size one. With this option,
the result will broadcast correctly against the original `a`.
If this value is anything but the default it is passed through
as-is to the relevant functions of the sub-classes. If these
functions do not have a `keepdims` kwarg, a RuntimeError will
be raised.
where : array_like of bool, optional
Elements to include in the standard deviation.
See `~numpy.ufunc.reduce` for details.
.. versionadded:: 1.22.0
mean : array_like, optional
Provide the mean to prevent its recalculation. The mean should have
a shape as if it was calculated with ``keepdims=True``.
The axis for the calculation of the mean should be the same as used in
the call to this std function.
.. versionadded:: 1.26.0
"""
correction : {int, float}, optional
Array API compatible name for the ``ddof`` parameter. Only one of them
can be provided at the same time.
.. versionadded:: 2.0.0
Returns
-------
standard_deviation : ndarray, see dtype parameter above.
If `out` is None, return a new array containing the standard
deviation, otherwise return a reference to the output array. If
ddof is >= the number of non-NaN elements in a slice or the slice
contains only NaNs, then the result for that slice is NaN.
See Also
--------
var, mean, std
nanvar, nanmean
:ref:`ufuncs-output-type`
Notes
-----
The standard deviation is the square root of the average of the squared
deviations from the mean: ``std = sqrt(mean(abs(x - x.mean())**2))``.
The average squared deviation is normally calculated as
``x.sum() / N``, where ``N = len(x)``. If, however, `ddof` is
specified, the divisor ``N - ddof`` is used instead. In standard
statistical practice, ``ddof=1`` provides an unbiased estimator of the
variance of the infinite population. ``ddof=0`` provides a maximum
likelihood estimate of the variance for normally distributed variables.
The standard deviation computed in this function is the square root of
the estimated variance, so even with ``ddof=1``, it will not be an
unbiased estimate of the standard deviation per se.
Note that, for complex numbers, `std` takes the absolute value before
squaring, so that the result is always real and nonnegative.
For floating-point input, the *std* is computed using the same
precision the input has. Depending on the input data, this can cause
the results to be inaccurate, especially for float32 (see example
below). Specifying a higher-accuracy accumulator using the `dtype`
keyword can alleviate this issue.
Examples
--------
>>> a = np.array([[1, np.nan], [3, 4]])
>>> np.nanstd(a)
1.247219128924647
>>> np.nanstd(a, axis=0)
array([1., 0.])
>>> np.nanstd(a, axis=1)
array([0., 0.5]) # may vary
"""
var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
keepdims=keepdims, where=where, mean=mean,
correction=correction)
if isinstance(var, np.ndarray):
std = np.sqrt(var, out=var)
elif hasattr(var, 'dtype'):
std = var.dtype.type(np.sqrt(var))
else:
std = np.sqrt(var)
return std