You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
446 lines
16 KiB
446 lines
16 KiB
from __future__ import absolute_import, division, print_function
|
|
|
|
from distutils.version import LooseVersion
|
|
|
|
import numpy as np
|
|
import warnings
|
|
|
|
# Taken from scikit-learn:
|
|
# https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/utils/fixes.py#L84
|
|
try:
|
|
with warnings.catch_warnings():
|
|
if (not np.allclose(np.divide(.4, 1, casting="unsafe"),
|
|
np.divide(.4, 1, casting="unsafe", dtype=np.float)) or
|
|
not np.allclose(np.divide(1, .5, dtype='i8'), 2) or
|
|
not np.allclose(np.divide(.4, 1), .4)):
|
|
raise TypeError('Divide not working with dtype: '
|
|
'https://github.com/numpy/numpy/issues/3484')
|
|
divide = np.divide
|
|
ma_divide = np.ma.divide
|
|
|
|
except TypeError:
|
|
# Divide with dtype doesn't work on Python 3
|
|
def divide(x1, x2, out=None, dtype=None):
|
|
"""Implementation of numpy.divide that works with dtype kwarg.
|
|
|
|
Temporary compatibility fix for a bug in numpy's version. See
|
|
https://github.com/numpy/numpy/issues/3484 for the relevant issue."""
|
|
x = np.divide(x1, x2, out)
|
|
if dtype is not None:
|
|
x = x.astype(dtype)
|
|
return x
|
|
|
|
ma_divide = np.ma.core._DomainedBinaryOperation(divide,
|
|
np.ma.core._DomainSafeDivide(),
|
|
0, 1)
|
|
|
|
if LooseVersion(np.__version__) < '1.12.0':
|
|
# These functions were added in numpy 1.12.0. For previous versions they
|
|
# are duplicated here
|
|
|
|
# See https://github.com/numpy/numpy/blob/master/LICENSE.txt
|
|
# or NUMPY_LICENSE.txt within this directory
|
|
def _replace_nan(a, val):
|
|
"""
|
|
If `a` is of inexact type, make a copy of `a`, replace NaNs with
|
|
the `val` value, and return the copy together with a boolean mask
|
|
marking the locations where NaNs were present. If `a` is not of
|
|
inexact type, do nothing and return `a` together with a mask of None.
|
|
|
|
Note that scalars will end up as array scalars, which is important
|
|
for using the result as the value of the out argument in some
|
|
operations.
|
|
|
|
Parameters
|
|
----------
|
|
a : array-like
|
|
Input array.
|
|
val : float
|
|
NaN values are set to val before doing the operation.
|
|
|
|
Returns
|
|
-------
|
|
y : ndarray
|
|
If `a` is of inexact type, return a copy of `a` with the NaNs
|
|
replaced by the fill value, otherwise return `a`.
|
|
mask: {bool, None}
|
|
If `a` is of inexact type, return a boolean mask marking locations of
|
|
NaNs, otherwise return None.
|
|
|
|
"""
|
|
is_new = not isinstance(a, np.ndarray)
|
|
if is_new:
|
|
a = np.array(a)
|
|
if not issubclass(a.dtype.type, np.inexact):
|
|
return a, None
|
|
if not is_new:
|
|
# need copy
|
|
a = np.array(a, subok=True)
|
|
|
|
mask = np.isnan(a)
|
|
np.copyto(a, val, where=mask)
|
|
return a, mask
|
|
|
|
def nancumsum(a, axis=None, dtype=None, out=None):
|
|
"""
|
|
Return the cumulative sum of array elements over a given axis treating Not a
|
|
Numbers (NaNs) as zero. The cumulative sum does not change when NaNs are
|
|
encountered and leading NaNs are replaced by zeros.
|
|
|
|
Zeros are returned for slices that are all-NaN or empty.
|
|
|
|
.. versionadded:: 1.12.0
|
|
|
|
Parameters
|
|
----------
|
|
a : array_like
|
|
Input array.
|
|
axis : int, optional
|
|
Axis along which the cumulative sum is computed. The default
|
|
(None) is to compute the cumsum over the flattened array.
|
|
dtype : dtype, optional
|
|
Type of the returned array and of the accumulator in which the
|
|
elements are summed. If `dtype` is not specified, it defaults
|
|
to the dtype of `a`, unless `a` has an integer dtype with a
|
|
precision less than that of the default platform integer. In
|
|
that case, the default platform integer is used.
|
|
out : ndarray, optional
|
|
Alternative output array in which to place the result. It must
|
|
have the same shape and buffer length as the expected output
|
|
but the type will be cast if necessary. See `doc.ufuncs`
|
|
(Section "Output arguments") for more details.
|
|
|
|
Returns
|
|
-------
|
|
nancumsum : ndarray.
|
|
A new array holding the result is returned unless `out` is
|
|
specified, in which it is returned. The result has the same
|
|
size as `a`, and the same shape as `a` if `axis` is not None
|
|
or `a` is a 1-d array.
|
|
|
|
See Also
|
|
--------
|
|
:func:`numpy.cumsum` : Cumulative sum across array propagating NaNs.
|
|
isnan : Show which elements are NaN.
|
|
|
|
Examples
|
|
--------
|
|
>>> nancumsum(1)
|
|
array([1])
|
|
>>> nancumsum([1])
|
|
array([1])
|
|
>>> nancumsum([1, np.nan])
|
|
array([ 1., 1.])
|
|
>>> a = np.array([[1, 2], [3, np.nan]])
|
|
>>> nancumsum(a)
|
|
array([ 1., 3., 6., 6.])
|
|
>>> nancumsum(a, axis=0)
|
|
array([[ 1., 2.],
|
|
[ 4., 2.]])
|
|
>>> nancumsum(a, axis=1)
|
|
array([[ 1., 3.],
|
|
[ 3., 3.]])
|
|
|
|
"""
|
|
a, mask = _replace_nan(a, 0)
|
|
return np.cumsum(a, axis=axis, dtype=dtype, out=out)
|
|
|
|
def nancumprod(a, axis=None, dtype=None, out=None):
|
|
"""
|
|
Return the cumulative product of array elements over a given axis treating Not a
|
|
Numbers (NaNs) as one. The cumulative product does not change when NaNs are
|
|
encountered and leading NaNs are replaced by ones.
|
|
|
|
Ones are returned for slices that are all-NaN or empty.
|
|
|
|
.. versionadded:: 1.12.0
|
|
|
|
Parameters
|
|
----------
|
|
a : array_like
|
|
Input array.
|
|
axis : int, optional
|
|
Axis along which the cumulative product is computed. By default
|
|
the input is flattened.
|
|
dtype : dtype, optional
|
|
Type of the returned array, as well as of the accumulator in which
|
|
the elements are multiplied. If *dtype* is not specified, it
|
|
defaults to the dtype of `a`, unless `a` has an integer dtype with
|
|
a precision less than that of the default platform integer. In
|
|
that case, the default platform integer is used instead.
|
|
out : ndarray, optional
|
|
Alternative output array in which to place the result. It must
|
|
have the same shape and buffer length as the expected output
|
|
but the type of the resulting values will be cast if necessary.
|
|
|
|
Returns
|
|
-------
|
|
nancumprod : ndarray
|
|
A new array holding the result is returned unless `out` is
|
|
specified, in which case it is returned.
|
|
|
|
See Also
|
|
--------
|
|
:func:`numpy.cumprod` : Cumulative product across array propagating NaNs.
|
|
isnan : Show which elements are NaN.
|
|
|
|
Examples
|
|
--------
|
|
>>> nancumprod(1)
|
|
array([1])
|
|
>>> nancumprod([1])
|
|
array([1])
|
|
>>> nancumprod([1, np.nan])
|
|
array([ 1., 1.])
|
|
>>> a = np.array([[1, 2], [3, np.nan]])
|
|
>>> nancumprod(a)
|
|
array([ 1., 2., 6., 6.])
|
|
>>> nancumprod(a, axis=0)
|
|
array([[ 1., 2.],
|
|
[ 3., 2.]])
|
|
>>> nancumprod(a, axis=1)
|
|
array([[ 1., 2.],
|
|
[ 3., 3.]])
|
|
|
|
"""
|
|
a, mask = _replace_nan(a, 1)
|
|
return np.cumprod(a, axis=axis, dtype=dtype, out=out)
|
|
|
|
|
|
if LooseVersion(np.__version__) < '1.15.0':
|
|
# These functions were added in numpy 1.15.0. For previous versions they
|
|
# are duplicated here
|
|
|
|
def _make_along_axis_idx(arr_shape, indices, axis):
|
|
# compute dimensions to iterate over
|
|
if not np.issubdtype(indices.dtype, np.integer):
|
|
raise IndexError('`indices` must be an integer array')
|
|
if len(arr_shape) != indices.ndim:
|
|
raise ValueError(
|
|
"`indices` and `arr` must have the same number of dimensions")
|
|
shape_ones = (1,) * indices.ndim
|
|
dest_dims = list(range(axis)) + [None] + list(
|
|
range(axis + 1, indices.ndim))
|
|
|
|
# build a fancy index, consisting of orthogonal aranges, with the
|
|
# requested index inserted at the right location
|
|
fancy_index = []
|
|
for dim, n in zip(dest_dims, arr_shape):
|
|
if dim is None:
|
|
fancy_index.append(indices)
|
|
else:
|
|
ind_shape = shape_ones[:dim] + (-1,) + shape_ones[dim + 1:]
|
|
fancy_index.append(np.arange(n).reshape(ind_shape))
|
|
|
|
return tuple(fancy_index)
|
|
|
|
def take_along_axis(arr, indices, axis):
|
|
"""
|
|
Take values from the input array by matching 1d index and data slices.
|
|
This iterates over matching 1d slices oriented along the specified axis in
|
|
the index and data arrays, and uses the former to look up values in the
|
|
latter. These slices can be different lengths.
|
|
Functions returning an index along an axis, like `argsort` and
|
|
`argpartition`, produce suitable indices for this function.
|
|
.. versionadded:: 1.15.0
|
|
Parameters
|
|
----------
|
|
arr: ndarray (Ni..., M, Nk...)
|
|
Source array
|
|
indices: ndarray (Ni..., J, Nk...)
|
|
Indices to take along each 1d slice of `arr`. This must match the
|
|
dimension of arr, but dimensions Ni and Nj only need to broadcast
|
|
against `arr`.
|
|
axis: int
|
|
The axis to take 1d slices along. If axis is None, the input array is
|
|
treated as if it had first been flattened to 1d, for consistency with
|
|
`sort` and `argsort`.
|
|
Returns
|
|
-------
|
|
out: ndarray (Ni..., J, Nk...)
|
|
The indexed result.
|
|
Notes
|
|
-----
|
|
This is equivalent to (but faster than) the following use of `ndindex` and
|
|
`s_`, which sets each of ``ii`` and ``kk`` to a tuple of indices::
|
|
Ni, M, Nk = a.shape[:axis], a.shape[axis], a.shape[axis+1:]
|
|
J = indices.shape[axis] # Need not equal M
|
|
out = np.empty(Nk + (J,) + Nk)
|
|
for ii in ndindex(Ni):
|
|
for kk in ndindex(Nk):
|
|
a_1d = a [ii + s_[:,] + kk]
|
|
indices_1d = indices[ii + s_[:,] + kk]
|
|
out_1d = out [ii + s_[:,] + kk]
|
|
for j in range(J):
|
|
out_1d[j] = a_1d[indices_1d[j]]
|
|
Equivalently, eliminating the inner loop, the last two lines would be::
|
|
out_1d[:] = a_1d[indices_1d]
|
|
See Also
|
|
--------
|
|
take : Take along an axis, using the same indices for every 1d slice
|
|
put_along_axis :
|
|
Put values into the destination array by matching 1d index and data slices
|
|
Examples
|
|
--------
|
|
For this sample array
|
|
>>> a = np.array([[10, 30, 20], [60, 40, 50]])
|
|
|
|
We can sort either by using sort directly, or argsort and this function
|
|
>>> np.sort(a, axis=1)
|
|
array([[10, 20, 30],
|
|
[40, 50, 60]])
|
|
>>> ai = np.argsort(a, axis=1); ai
|
|
array([[0, 2, 1],
|
|
[1, 2, 0]])
|
|
>>> take_along_axis(a, ai, axis=1)
|
|
array([[10, 20, 30],
|
|
[40, 50, 60]])
|
|
|
|
The same works for max and min, if you expand the dimensions:
|
|
>>> np.expand_dims(np.max(a, axis=1), axis=1)
|
|
array([[30],
|
|
[60]])
|
|
>>> ai = np.expand_dims(np.argmax(a, axis=1), axis=1)
|
|
>>> ai
|
|
array([[1],
|
|
[0]])
|
|
>>> take_along_axis(a, ai, axis=1)
|
|
array([[30],
|
|
[60]])
|
|
|
|
If we want to get the max and min at the same time,
|
|
we can stack the indices first:
|
|
>>> ai_min = np.expand_dims(np.argmin(a, axis=1), axis=1)
|
|
>>> ai_max = np.expand_dims(np.argmax(a, axis=1), axis=1)
|
|
>>> ai = np.concatenate([ai_min, ai_max], axis=1)
|
|
>>> ai
|
|
array([[0, 1],
|
|
[1, 0]])
|
|
>>> take_along_axis(a, ai, axis=1)
|
|
array([[10, 30],
|
|
[40, 60]])
|
|
"""
|
|
# normalize inputs
|
|
if axis is None:
|
|
arr = arr.flat
|
|
arr_shape = (len(arr),) # flatiter has no .shape
|
|
axis = 0
|
|
else:
|
|
if axis < 0:
|
|
axis = arr.ndim + axis
|
|
arr_shape = arr.shape
|
|
|
|
# use the fancy index
|
|
return arr[_make_along_axis_idx(arr_shape, indices, axis)]
|
|
|
|
|
|
def _make_sliced_dtype_np_ge_16(dtype, index):
|
|
# This was briefly added in 1.14.0
|
|
# https://github.com/numpy/numpy/pull/6053, NumPy >= 1.14
|
|
# which was then reverted in 1.14.1 with
|
|
# https://github.com/numpy/numpy/pull/10411
|
|
# And then was finally released with
|
|
# https://github.com/numpy/numpy/pull/12447
|
|
# in version 1.16.0
|
|
new = {
|
|
'names': index,
|
|
'formats': [dtype.fields[name][0] for name in index],
|
|
'offsets': [dtype.fields[name][1] for name in index],
|
|
'itemsize': dtype.itemsize,
|
|
}
|
|
return np.dtype(new)
|
|
|
|
|
|
def _make_sliced_dtype_np_lt_14(dtype, index):
|
|
# For numpy < 1.14
|
|
dt = np.dtype([(name, dtype[name]) for name in index])
|
|
return dt
|
|
|
|
|
|
if LooseVersion(np.__version__) >= LooseVersion("1.16.0") or \
|
|
LooseVersion(np.__version__) == LooseVersion("1.14.0"):
|
|
_make_sliced_dtype = _make_sliced_dtype_np_ge_16
|
|
else:
|
|
_make_sliced_dtype = _make_sliced_dtype_np_lt_14
|
|
|
|
|
|
class _Recurser(object):
|
|
"""
|
|
Utility class for recursing over nested iterables
|
|
"""
|
|
|
|
# This was copied almost verbatim from numpy.core.shape_base._Recurser
|
|
# See numpy license at https://github.com/numpy/numpy/blob/master/LICENSE.txt
|
|
# or NUMPY_LICENSE.txt within this directory
|
|
|
|
def __init__(self, recurse_if):
|
|
self.recurse_if = recurse_if
|
|
|
|
def map_reduce(self,
|
|
x,
|
|
f_map=lambda x, **kwargs: x,
|
|
f_reduce=lambda x, **kwargs: x,
|
|
f_kwargs=lambda **kwargs: kwargs,
|
|
**kwargs):
|
|
"""
|
|
Iterate over the nested list, applying:
|
|
* ``f_map`` (T -> U) to items
|
|
* ``f_reduce`` (Iterable[U] -> U) to mapped items
|
|
|
|
For instance, ``map_reduce([[1, 2], 3, 4])`` is::
|
|
|
|
f_reduce([
|
|
f_reduce([
|
|
f_map(1),
|
|
f_map(2)
|
|
]),
|
|
f_map(3),
|
|
f_map(4)
|
|
]])
|
|
|
|
|
|
State can be passed down through the calls with `f_kwargs`,
|
|
to iterables of mapped items. When kwargs are passed, as in
|
|
``map_reduce([[1, 2], 3, 4], **kw)``, this becomes::
|
|
|
|
kw1 = f_kwargs(**kw)
|
|
kw2 = f_kwargs(**kw1)
|
|
f_reduce([
|
|
f_reduce([
|
|
f_map(1), **kw2)
|
|
f_map(2, **kw2)
|
|
], **kw1),
|
|
f_map(3, **kw1),
|
|
f_map(4, **kw1)
|
|
]], **kw)
|
|
"""
|
|
def f(x, **kwargs):
|
|
if not self.recurse_if(x):
|
|
return f_map(x, **kwargs)
|
|
else:
|
|
next_kwargs = f_kwargs(**kwargs)
|
|
return f_reduce((
|
|
f(xi, **next_kwargs)
|
|
for xi in x
|
|
), **kwargs)
|
|
return f(x, **kwargs)
|
|
|
|
def walk(self, x, index=()):
|
|
"""
|
|
Iterate over x, yielding (index, value, entering), where
|
|
|
|
* ``index``: a tuple of indices up to this point
|
|
* ``value``: equal to ``x[index[0]][...][index[-1]]``. On the first iteration, is
|
|
``x`` itself
|
|
* ``entering``: bool. The result of ``recurse_if(value)``
|
|
"""
|
|
do_recurse = self.recurse_if(x)
|
|
yield index, x, do_recurse
|
|
|
|
if not do_recurse:
|
|
return
|
|
for i, xi in enumerate(x):
|
|
# yield from ...
|
|
for v in self.walk(xi, index + (i,)):
|
|
yield v
|