You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ORPA-pyOpenRPA/WPy32-3720/python-3.7.2/Lib/site-packages/dask/array/chunk.py

378 lines
11 KiB

6 years ago
""" A set of NumPy functions to apply per chunk """
from __future__ import absolute_import, division, print_function
from functools import wraps
from toolz import concat
import numpy as np
from . import numpy_compat as npcompat
from ..compatibility import Container, Iterable, Sequence
from ..core import flatten
from ..utils import ignoring
from numbers import Integral
try:
from numpy import take_along_axis
except ImportError: # pragma: no cover
take_along_axis = npcompat.take_along_axis
def keepdims_wrapper(a_callable):
"""
A wrapper for functions that don't provide keepdims to ensure that they do.
"""
@wraps(a_callable)
def keepdims_wrapped_callable(x, axis=None, keepdims=None, *args, **kwargs):
r = a_callable(x, axis=axis, *args, **kwargs)
if not keepdims:
return r
axes = axis
if axes is None:
axes = range(x.ndim)
if not isinstance(axes, (Container, Iterable, Sequence)):
axes = [axes]
r_slice = tuple()
for each_axis in range(x.ndim):
if each_axis in axes:
r_slice += (None,)
else:
r_slice += (slice(None),)
r = r[r_slice]
return r
return keepdims_wrapped_callable
# Wrap NumPy functions to ensure they provide keepdims.
sum = np.sum
prod = np.prod
min = np.min
max = np.max
argmin = keepdims_wrapper(np.argmin)
nanargmin = keepdims_wrapper(np.nanargmin)
argmax = keepdims_wrapper(np.argmax)
nanargmax = keepdims_wrapper(np.nanargmax)
any = np.any
all = np.all
nansum = np.nansum
nanprod = np.nanprod
try:
from numpy import nancumprod, nancumsum
except ImportError: # pragma: no cover
nancumprod = npcompat.nancumprod
nancumsum = npcompat.nancumsum
nanmin = np.nanmin
nanmax = np.nanmax
mean = np.mean
with ignoring(AttributeError):
nanmean = np.nanmean
var = np.var
with ignoring(AttributeError):
nanvar = np.nanvar
std = np.std
with ignoring(AttributeError):
nanstd = np.nanstd
def coarsen(reduction, x, axes, trim_excess=False):
""" Coarsen array by applying reduction to fixed size neighborhoods
Parameters
----------
reduction: function
Function like np.sum, np.mean, etc...
x: np.ndarray
Array to be coarsened
axes: dict
Mapping of axis to coarsening factor
Examples
--------
>>> x = np.array([1, 2, 3, 4, 5, 6])
>>> coarsen(np.sum, x, {0: 2})
array([ 3, 7, 11])
>>> coarsen(np.max, x, {0: 3})
array([3, 6])
Provide dictionary of scale per dimension
>>> x = np.arange(24).reshape((4, 6))
>>> x
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23]])
>>> coarsen(np.min, x, {0: 2, 1: 3})
array([[ 0, 3],
[12, 15]])
You must avoid excess elements explicitly
>>> x = np.array([1, 2, 3, 4, 5, 6, 7, 8])
>>> coarsen(np.min, x, {0: 3}, trim_excess=True)
array([1, 4])
"""
# Insert singleton dimensions if they don't exist already
for i in range(x.ndim):
if i not in axes:
axes[i] = 1
if trim_excess:
ind = tuple(slice(0, -(d % axes[i]))
if d % axes[i] else
slice(None, None) for i, d in enumerate(x.shape))
x = x[ind]
# (10, 10) -> (5, 2, 5, 2)
newshape = tuple(concat([(x.shape[i] // axes[i], axes[i])
for i in range(x.ndim)]))
return reduction(x.reshape(newshape), axis=tuple(range(1, x.ndim * 2, 2)))
def trim(x, axes=None):
""" Trim boundaries off of array
>>> x = np.arange(24).reshape((4, 6))
>>> trim(x, axes={0: 0, 1: 1})
array([[ 1, 2, 3, 4],
[ 7, 8, 9, 10],
[13, 14, 15, 16],
[19, 20, 21, 22]])
>>> trim(x, axes={0: 1, 1: 1})
array([[ 7, 8, 9, 10],
[13, 14, 15, 16]])
"""
if isinstance(axes, Integral):
axes = [axes] * x.ndim
if isinstance(axes, dict):
axes = [axes.get(i, 0) for i in range(x.ndim)]
return x[tuple(slice(ax, -ax if ax else None) for ax in axes)]
def topk(a, k, axis, keepdims):
""" Chunk and combine function of topk
Extract the k largest elements from a on the given axis.
If k is negative, extract the -k smallest elements instead.
Note that, unlike in the parent function, the returned elements
are not sorted internally.
"""
assert keepdims is True
axis = axis[0]
if abs(k) >= a.shape[axis]:
return a
a = np.partition(a, -k, axis=axis)
k_slice = slice(-k, None) if k > 0 else slice(-k)
return a[tuple(k_slice if i == axis else slice(None)
for i in range(a.ndim))]
def topk_aggregate(a, k, axis, keepdims):
""" Final aggregation function of topk
Invoke topk one final time and then sort the results internally.
"""
assert keepdims is True
a = topk(a, k, axis, keepdims)
axis = axis[0]
a = np.sort(a, axis=axis)
if k < 0:
return a
return a[tuple(slice(None, None, -1) if i == axis else slice(None)
for i in range(a.ndim))]
def argtopk_preprocess(a, idx):
""" Preparatory step for argtopk
Put data together with its original indices in a tuple.
"""
return a, idx
def argtopk(a_plus_idx, k, axis, keepdims):
""" Chunk and combine function of argtopk
Extract the indices of the k largest elements from a on the given axis.
If k is negative, extract the indices of the -k smallest elements instead.
Note that, unlike in the parent function, the returned elements
are not sorted internally.
"""
assert keepdims is True
axis = axis[0]
if isinstance(a_plus_idx, list):
a_plus_idx = list(flatten(a_plus_idx))
a = np.concatenate([ai for ai, _ in a_plus_idx], axis)
idx = np.concatenate([np.broadcast_to(idxi, ai.shape)
for ai, idxi in a_plus_idx], axis)
else:
a, idx = a_plus_idx
if abs(k) >= a.shape[axis]:
return a_plus_idx
idx2 = np.argpartition(a, -k, axis=axis)
k_slice = slice(-k, None) if k > 0 else slice(-k)
idx2 = idx2[tuple(k_slice if i == axis else slice(None)
for i in range(a.ndim))]
return take_along_axis(a, idx2, axis), take_along_axis(idx, idx2, axis)
def argtopk_aggregate(a_plus_idx, k, axis, keepdims):
""" Final aggregation function of argtopk
Invoke argtopk one final time, sort the results internally, drop the data
and return the index only.
"""
assert keepdims is True
a, idx = argtopk(a_plus_idx, k, axis, keepdims)
axis = axis[0]
idx2 = np.argsort(a, axis=axis)
idx = take_along_axis(idx, idx2, axis)
if k < 0:
return idx
return idx[tuple(slice(None, None, -1) if i == axis else slice(None)
for i in range(idx.ndim))]
def arange(start, stop, step, length, dtype):
res = np.arange(start, stop, step, dtype)
return res[:-1] if len(res) > length else res
def astype(x, astype_dtype=None, **kwargs):
return x.astype(astype_dtype, **kwargs)
def view(x, dtype, order='C'):
if order == 'C':
x = np.ascontiguousarray(x)
return x.view(dtype)
else:
x = np.asfortranarray(x)
return x.T.view(dtype).T
def slice_with_int_dask_array(x, idx, offset, x_size, axis):
""" Chunk function of `slice_with_int_dask_array_on_axis`.
Slice one chunk of x by one chunk of idx.
Parameters
----------
x: ndarray, any dtype, any shape
i-th chunk of x
idx: ndarray, ndim=1, dtype=any integer
j-th chunk of idx (cartesian product with the chunks of x)
offset: ndarray, shape=(1, ), dtype=int64
Index of the first element along axis of the current chunk of x
x_size: int
Total size of the x da.Array along axis
axis: int
normalized axis to take elements from (0 <= axis < x.ndim)
Returns
-------
x sliced along axis, using only the elements of idx that fall inside the
current chunk.
"""
# Needed when idx is unsigned
idx = idx.astype(np.int64)
# Normalize negative indices
idx = np.where(idx < 0, idx + x_size, idx)
# A chunk of the offset dask Array is a numpy array with shape (1, ).
# It indicates the index of the first element along axis of the current
# chunk of x.
idx = idx - offset
# Drop elements of idx that do not fall inside the current chunk of x
idx_filter = (idx >= 0) & (idx < x.shape[axis])
idx = idx[idx_filter]
# np.take does not support slice indices
# return np.take(x, idx, axis)
return x[tuple(
idx if i == axis else slice(None)
for i in range(x.ndim)
)]
def slice_with_int_dask_array_aggregate(idx, chunk_outputs, x_chunks, axis):
""" Final aggregation function of `slice_with_int_dask_array_on_axis`.
Aggregate all chunks of x by one chunk of idx, reordering the output of
`slice_with_int_dask_array`.
Note that there is no combine function, as a recursive aggregation (e.g.
with split_every) would not give any benefit.
Parameters
----------
idx: ndarray, ndim=1, dtype=any integer
j-th chunk of idx
chunk_outputs: ndarray
concatenation along axis of the outputs of `slice_with_int_dask_array`
for all chunks of x and the j-th chunk of idx
x_chunks: tuple
dask chunks of the x da.Array along axis, e.g. ``(3, 3, 2)``
axis: int
normalized axis to take elements from (0 <= axis < x.ndim)
Returns
-------
Selection from all chunks of x for the j-th chunk of idx, in the correct
order
"""
# Needed when idx is unsigned
idx = idx.astype(np.int64)
# Normalize negative indices
idx = np.where(idx < 0, idx + sum(x_chunks), idx)
x_chunk_offset = 0
chunk_output_offset = 0
# Assemble the final index that picks from the output of the previous
# kernel by adding together one layer per chunk of x
# FIXME: this could probably be reimplemented with a faster search-based
# algorithm
idx_final = np.zeros_like(idx)
for x_chunk in x_chunks:
idx_filter = (idx >= x_chunk_offset) & (idx < x_chunk_offset + x_chunk)
idx_cum = np.cumsum(idx_filter)
idx_final += np.where(idx_filter, idx_cum - 1 + chunk_output_offset, 0)
x_chunk_offset += x_chunk
if idx_cum.size > 0:
chunk_output_offset += idx_cum[-1]
# np.take does not support slice indices
# return np.take(chunk_outputs, idx_final, axis)
return chunk_outputs[tuple(
idx_final if i == axis else slice(None)
for i in range(chunk_outputs.ndim)
)]