You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1343 lines
39 KiB
1343 lines
39 KiB
6 years ago
|
from __future__ import division, print_function, absolute_import
|
||
|
|
||
|
import inspect
|
||
|
import math
|
||
|
import warnings
|
||
|
from distutils.version import LooseVersion
|
||
|
from functools import wraps, partial
|
||
|
from numbers import Real, Integral
|
||
|
|
||
|
import numpy as np
|
||
|
from toolz import concat, sliding_window, interleave
|
||
|
|
||
|
from ..compatibility import Iterable
|
||
|
from ..core import flatten
|
||
|
from ..base import tokenize
|
||
|
from ..highlevelgraph import HighLevelGraph
|
||
|
from ..utils import funcname
|
||
|
from . import chunk
|
||
|
from .creation import arange, diag, empty, indices
|
||
|
from .utils import safe_wraps, validate_axis
|
||
|
from .wrap import ones
|
||
|
from .ufunc import multiply, sqrt
|
||
|
|
||
|
from .core import (Array, map_blocks, elemwise, from_array, asarray,
|
||
|
asanyarray, concatenate, stack, blockwise, broadcast_shapes,
|
||
|
is_scalar_for_elemwise, broadcast_to, tensordot_lookup)
|
||
|
|
||
|
from .einsumfuncs import einsum # noqa
|
||
|
|
||
|
|
||
|
@wraps(np.array)
|
||
|
def array(x, dtype=None, ndmin=None):
|
||
|
while ndmin is not None and x.ndim < ndmin:
|
||
|
x = x[None, :]
|
||
|
if dtype is not None and x.dtype != dtype:
|
||
|
x = x.astype(dtype)
|
||
|
return x
|
||
|
|
||
|
|
||
|
@wraps(np.result_type)
|
||
|
def result_type(*args):
|
||
|
args = [a if is_scalar_for_elemwise(a) else a.dtype for a in args]
|
||
|
return np.result_type(*args)
|
||
|
|
||
|
|
||
|
@wraps(np.atleast_3d)
|
||
|
def atleast_3d(*arys):
|
||
|
new_arys = []
|
||
|
for x in arys:
|
||
|
x = asanyarray(x)
|
||
|
if x.ndim == 0:
|
||
|
x = x[None, None, None]
|
||
|
elif x.ndim == 1:
|
||
|
x = x[None, :, None]
|
||
|
elif x.ndim == 2:
|
||
|
x = x[:, :, None]
|
||
|
|
||
|
new_arys.append(x)
|
||
|
|
||
|
if len(new_arys) == 1:
|
||
|
return new_arys[0]
|
||
|
else:
|
||
|
return new_arys
|
||
|
|
||
|
|
||
|
@wraps(np.atleast_2d)
|
||
|
def atleast_2d(*arys):
|
||
|
new_arys = []
|
||
|
for x in arys:
|
||
|
x = asanyarray(x)
|
||
|
if x.ndim == 0:
|
||
|
x = x[None, None]
|
||
|
elif x.ndim == 1:
|
||
|
x = x[None, :]
|
||
|
|
||
|
new_arys.append(x)
|
||
|
|
||
|
if len(new_arys) == 1:
|
||
|
return new_arys[0]
|
||
|
else:
|
||
|
return new_arys
|
||
|
|
||
|
|
||
|
@wraps(np.atleast_1d)
|
||
|
def atleast_1d(*arys):
|
||
|
new_arys = []
|
||
|
for x in arys:
|
||
|
x = asanyarray(x)
|
||
|
if x.ndim == 0:
|
||
|
x = x[None]
|
||
|
|
||
|
new_arys.append(x)
|
||
|
|
||
|
if len(new_arys) == 1:
|
||
|
return new_arys[0]
|
||
|
else:
|
||
|
return new_arys
|
||
|
|
||
|
|
||
|
@wraps(np.vstack)
|
||
|
def vstack(tup, allow_unknown_chunksizes=False):
|
||
|
tup = tuple(atleast_2d(x) for x in tup)
|
||
|
return concatenate(tup, axis=0, allow_unknown_chunksizes=allow_unknown_chunksizes)
|
||
|
|
||
|
|
||
|
@wraps(np.hstack)
|
||
|
def hstack(tup, allow_unknown_chunksizes=False):
|
||
|
if all(x.ndim == 1 for x in tup):
|
||
|
return concatenate(tup, axis=0, allow_unknown_chunksizes=allow_unknown_chunksizes)
|
||
|
else:
|
||
|
return concatenate(tup, axis=1, allow_unknown_chunksizes=allow_unknown_chunksizes)
|
||
|
|
||
|
|
||
|
@wraps(np.dstack)
|
||
|
def dstack(tup, allow_unknown_chunksizes=False):
|
||
|
tup = tuple(atleast_3d(x) for x in tup)
|
||
|
return concatenate(tup, axis=2, allow_unknown_chunksizes=allow_unknown_chunksizes)
|
||
|
|
||
|
|
||
|
@wraps(np.swapaxes)
|
||
|
def swapaxes(a, axis1, axis2):
|
||
|
if axis1 == axis2:
|
||
|
return a
|
||
|
if axis1 < 0:
|
||
|
axis1 = axis1 + a.ndim
|
||
|
if axis2 < 0:
|
||
|
axis2 = axis2 + a.ndim
|
||
|
ind = list(range(a.ndim))
|
||
|
out = list(ind)
|
||
|
out[axis1], out[axis2] = axis2, axis1
|
||
|
|
||
|
return blockwise(np.swapaxes, out, a, ind, axis1=axis1, axis2=axis2, dtype=a.dtype)
|
||
|
|
||
|
|
||
|
@wraps(np.transpose)
|
||
|
def transpose(a, axes=None):
|
||
|
if axes:
|
||
|
if len(axes) != a.ndim:
|
||
|
raise ValueError("axes don't match array")
|
||
|
else:
|
||
|
axes = tuple(range(a.ndim))[::-1]
|
||
|
axes = tuple(d + a.ndim if d < 0 else d for d in axes)
|
||
|
return blockwise(np.transpose, axes, a, tuple(range(a.ndim)),
|
||
|
dtype=a.dtype, axes=axes)
|
||
|
|
||
|
|
||
|
def flip(m, axis):
|
||
|
"""
|
||
|
Reverse element order along axis.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
axis : int
|
||
|
Axis to reverse element order of.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
reversed array : ndarray
|
||
|
"""
|
||
|
|
||
|
m = asanyarray(m)
|
||
|
|
||
|
sl = m.ndim * [slice(None)]
|
||
|
try:
|
||
|
sl[axis] = slice(None, None, -1)
|
||
|
except IndexError:
|
||
|
raise ValueError(
|
||
|
"`axis` of %s invalid for %s-D array" % (str(axis), str(m.ndim))
|
||
|
)
|
||
|
sl = tuple(sl)
|
||
|
|
||
|
return m[sl]
|
||
|
|
||
|
|
||
|
@wraps(np.flipud)
|
||
|
def flipud(m):
|
||
|
return flip(m, 0)
|
||
|
|
||
|
|
||
|
@wraps(np.fliplr)
|
||
|
def fliplr(m):
|
||
|
return flip(m, 1)
|
||
|
|
||
|
|
||
|
alphabet = 'abcdefghijklmnopqrstuvwxyz'
|
||
|
ALPHABET = alphabet.upper()
|
||
|
|
||
|
|
||
|
def _tensordot(a, b, axes):
|
||
|
x = max([a, b], key=lambda x: x.__array_priority__)
|
||
|
tensordot = tensordot_lookup.dispatch(type(x))
|
||
|
|
||
|
# workaround may be removed when numpy version (currently 1.13.0) is bumped
|
||
|
a_dims = np.array([a.shape[i] for i in axes[0]])
|
||
|
b_dims = np.array([b.shape[i] for i in axes[1]])
|
||
|
if len(a_dims) > 0 and (a_dims == b_dims).all() and a_dims.min() == 0:
|
||
|
x = np.zeros(tuple([s for i, s in enumerate(a.shape) if i not in axes[0]] +
|
||
|
[s for i, s in enumerate(b.shape) if i not in axes[1]]))
|
||
|
else:
|
||
|
x = tensordot(a, b, axes=axes)
|
||
|
|
||
|
ind = [slice(None, None)] * x.ndim
|
||
|
for a in sorted(axes[0]):
|
||
|
ind.insert(a, None)
|
||
|
x = x[tuple(ind)]
|
||
|
return x
|
||
|
|
||
|
|
||
|
@wraps(np.tensordot)
|
||
|
def tensordot(lhs, rhs, axes=2):
|
||
|
if isinstance(axes, Iterable):
|
||
|
left_axes, right_axes = axes
|
||
|
else:
|
||
|
left_axes = tuple(range(lhs.ndim - 1, lhs.ndim - axes - 1, -1))
|
||
|
right_axes = tuple(range(0, axes))
|
||
|
|
||
|
if isinstance(left_axes, Integral):
|
||
|
left_axes = (left_axes,)
|
||
|
if isinstance(right_axes, Integral):
|
||
|
right_axes = (right_axes,)
|
||
|
if isinstance(left_axes, list):
|
||
|
left_axes = tuple(left_axes)
|
||
|
if isinstance(right_axes, list):
|
||
|
right_axes = tuple(right_axes)
|
||
|
|
||
|
dt = np.promote_types(lhs.dtype, rhs.dtype)
|
||
|
|
||
|
left_index = list(range(lhs.ndim))
|
||
|
right_index = list(range(lhs.ndim, lhs.ndim + rhs.ndim))
|
||
|
out_index = left_index + right_index
|
||
|
|
||
|
for l, r in zip(left_axes, right_axes):
|
||
|
out_index.remove(right_index[r])
|
||
|
right_index[r] = left_index[l]
|
||
|
|
||
|
intermediate = blockwise(_tensordot, out_index,
|
||
|
lhs, left_index,
|
||
|
rhs, right_index, dtype=dt,
|
||
|
axes=(left_axes, right_axes))
|
||
|
|
||
|
result = intermediate.sum(axis=left_axes)
|
||
|
return result
|
||
|
|
||
|
|
||
|
@wraps(np.dot)
|
||
|
def dot(a, b):
|
||
|
return tensordot(a, b, axes=((a.ndim - 1,), (b.ndim - 2,)))
|
||
|
|
||
|
|
||
|
@wraps(np.vdot)
|
||
|
def vdot(a, b):
|
||
|
return dot(a.conj().ravel(), b.ravel())
|
||
|
|
||
|
|
||
|
@safe_wraps(np.matmul)
|
||
|
def matmul(a, b):
|
||
|
a = asanyarray(a)
|
||
|
b = asanyarray(b)
|
||
|
|
||
|
if a.ndim == 0 or b.ndim == 0:
|
||
|
raise ValueError("`matmul` does not support scalars.")
|
||
|
|
||
|
a_is_1d = False
|
||
|
if a.ndim == 1:
|
||
|
a_is_1d = True
|
||
|
a = a[np.newaxis, :]
|
||
|
|
||
|
b_is_1d = False
|
||
|
if b.ndim == 1:
|
||
|
b_is_1d = True
|
||
|
b = b[:, np.newaxis]
|
||
|
|
||
|
if a.ndim < b.ndim:
|
||
|
a = a[(b.ndim - a.ndim) * (np.newaxis,)]
|
||
|
elif a.ndim > b.ndim:
|
||
|
b = b[(a.ndim - b.ndim) * (np.newaxis,)]
|
||
|
|
||
|
out = blockwise(
|
||
|
np.matmul, tuple(range(1, a.ndim + 1)),
|
||
|
a, tuple(range(1, a.ndim - 1)) + (a.ndim - 1, 0,),
|
||
|
b, tuple(range(1, a.ndim - 1)) + (0, a.ndim,),
|
||
|
dtype=result_type(a, b),
|
||
|
concatenate=True
|
||
|
)
|
||
|
|
||
|
if a_is_1d:
|
||
|
out = out[..., 0, :]
|
||
|
if b_is_1d:
|
||
|
out = out[..., 0]
|
||
|
|
||
|
return out
|
||
|
|
||
|
|
||
|
@wraps(np.outer)
|
||
|
def outer(a, b):
|
||
|
a = a.flatten()
|
||
|
b = b.flatten()
|
||
|
|
||
|
dtype = np.outer(a.dtype.type(), b.dtype.type()).dtype
|
||
|
|
||
|
return blockwise(np.outer, "ij", a, "i", b, "j", dtype=dtype)
|
||
|
|
||
|
|
||
|
def _inner_apply_along_axis(arr,
|
||
|
func1d,
|
||
|
func1d_axis,
|
||
|
func1d_args,
|
||
|
func1d_kwargs):
|
||
|
return np.apply_along_axis(
|
||
|
func1d, func1d_axis, arr, *func1d_args, **func1d_kwargs
|
||
|
)
|
||
|
|
||
|
|
||
|
@wraps(np.apply_along_axis)
|
||
|
def apply_along_axis(func1d, axis, arr, *args, **kwargs):
|
||
|
arr = asarray(arr)
|
||
|
|
||
|
# Validate and normalize axis.
|
||
|
arr.shape[axis]
|
||
|
axis = len(arr.shape[:axis])
|
||
|
|
||
|
# Test out some data with the function.
|
||
|
test_data = np.ones((1,), dtype=arr.dtype)
|
||
|
test_result = np.array(func1d(test_data, *args, **kwargs))
|
||
|
|
||
|
if (LooseVersion(np.__version__) < LooseVersion("1.13.0") and
|
||
|
(np.array(test_result.shape) > 1).sum(dtype=int) > 1):
|
||
|
raise ValueError(
|
||
|
"No more than one non-trivial dimension allowed in result. "
|
||
|
"Need NumPy 1.13.0+ for this functionality."
|
||
|
)
|
||
|
|
||
|
# Rechunk so that func1d is applied over the full axis.
|
||
|
arr = arr.rechunk(
|
||
|
arr.chunks[:axis] + (arr.shape[axis:axis + 1],) + arr.chunks[axis + 1:]
|
||
|
)
|
||
|
|
||
|
# Map func1d over the data to get the result
|
||
|
# Adds other axes as needed.
|
||
|
result = arr.map_blocks(
|
||
|
_inner_apply_along_axis,
|
||
|
name=funcname(func1d) + '-along-axis',
|
||
|
dtype=test_result.dtype,
|
||
|
chunks=(arr.chunks[:axis] + test_result.shape + arr.chunks[axis + 1:]),
|
||
|
drop_axis=axis,
|
||
|
new_axis=list(range(axis, axis + test_result.ndim, 1)),
|
||
|
func1d=func1d,
|
||
|
func1d_axis=axis,
|
||
|
func1d_args=args,
|
||
|
func1d_kwargs=kwargs,
|
||
|
)
|
||
|
|
||
|
return result
|
||
|
|
||
|
|
||
|
@wraps(np.apply_over_axes)
|
||
|
def apply_over_axes(func, a, axes):
|
||
|
# Validate arguments
|
||
|
a = asarray(a)
|
||
|
try:
|
||
|
axes = tuple(axes)
|
||
|
except TypeError:
|
||
|
axes = (axes,)
|
||
|
|
||
|
sl = a.ndim * (slice(None),)
|
||
|
|
||
|
# Compute using `apply_along_axis`.
|
||
|
result = a
|
||
|
for i in axes:
|
||
|
result = apply_along_axis(func, i, result, 0)
|
||
|
|
||
|
# Restore original dimensionality or error.
|
||
|
if result.ndim == (a.ndim - 1):
|
||
|
result = result[sl[:i] + (None,)]
|
||
|
elif result.ndim != a.ndim:
|
||
|
raise ValueError(
|
||
|
"func must either preserve dimensionality of the input"
|
||
|
" or reduce it by one."
|
||
|
)
|
||
|
|
||
|
return result
|
||
|
|
||
|
|
||
|
@wraps(np.ptp)
|
||
|
def ptp(a, axis=None):
|
||
|
return a.max(axis=axis) - a.min(axis=axis)
|
||
|
|
||
|
|
||
|
@wraps(np.diff)
|
||
|
def diff(a, n=1, axis=-1):
|
||
|
a = asarray(a)
|
||
|
n = int(n)
|
||
|
axis = int(axis)
|
||
|
|
||
|
sl_1 = a.ndim * [slice(None)]
|
||
|
sl_2 = a.ndim * [slice(None)]
|
||
|
|
||
|
sl_1[axis] = slice(1, None)
|
||
|
sl_2[axis] = slice(None, -1)
|
||
|
|
||
|
sl_1 = tuple(sl_1)
|
||
|
sl_2 = tuple(sl_2)
|
||
|
|
||
|
r = a
|
||
|
for i in range(n):
|
||
|
r = r[sl_1] - r[sl_2]
|
||
|
|
||
|
return r
|
||
|
|
||
|
|
||
|
@wraps(np.ediff1d)
|
||
|
def ediff1d(ary, to_end=None, to_begin=None):
|
||
|
ary = asarray(ary)
|
||
|
|
||
|
aryf = ary.flatten()
|
||
|
r = aryf[1:] - aryf[:-1]
|
||
|
|
||
|
r = [r]
|
||
|
if to_begin is not None:
|
||
|
r = [asarray(to_begin).flatten()] + r
|
||
|
if to_end is not None:
|
||
|
r = r + [asarray(to_end).flatten()]
|
||
|
r = concatenate(r)
|
||
|
|
||
|
return r
|
||
|
|
||
|
|
||
|
def _gradient_kernel(x, block_id, coord, axis, array_locs, grad_kwargs):
|
||
|
"""
|
||
|
x: nd-array
|
||
|
array of one block
|
||
|
coord: 1d-array or scalar
|
||
|
coordinate along which the gradient is computed.
|
||
|
axis: int
|
||
|
axis along which the gradient is computed
|
||
|
array_locs:
|
||
|
actual location along axis. None if coordinate is scalar
|
||
|
grad_kwargs:
|
||
|
keyword to be passed to np.gradient
|
||
|
"""
|
||
|
block_loc = block_id[axis]
|
||
|
if array_locs is not None:
|
||
|
coord = coord[array_locs[0][block_loc]:array_locs[1][block_loc]]
|
||
|
grad = np.gradient(x, coord, axis=axis, **grad_kwargs)
|
||
|
return grad
|
||
|
|
||
|
|
||
|
@wraps(np.gradient)
|
||
|
def gradient(f, *varargs, **kwargs):
|
||
|
f = asarray(f)
|
||
|
|
||
|
kwargs["edge_order"] = math.ceil(kwargs.get("edge_order", 1))
|
||
|
if kwargs["edge_order"] > 2:
|
||
|
raise ValueError("edge_order must be less than or equal to 2.")
|
||
|
|
||
|
drop_result_list = False
|
||
|
axis = kwargs.pop("axis", None)
|
||
|
if axis is None:
|
||
|
axis = tuple(range(f.ndim))
|
||
|
elif isinstance(axis, Integral):
|
||
|
drop_result_list = True
|
||
|
axis = (axis,)
|
||
|
|
||
|
axis = validate_axis(axis, f.ndim)
|
||
|
|
||
|
if len(axis) != len(set(axis)):
|
||
|
raise ValueError("duplicate axes not allowed")
|
||
|
|
||
|
axis = tuple(ax % f.ndim for ax in axis)
|
||
|
|
||
|
if varargs == ():
|
||
|
varargs = (1,)
|
||
|
if len(varargs) == 1:
|
||
|
varargs = len(axis) * varargs
|
||
|
if len(varargs) != len(axis):
|
||
|
raise TypeError(
|
||
|
"Spacing must either be a single scalar, or a scalar / 1d-array "
|
||
|
"per axis"
|
||
|
)
|
||
|
|
||
|
if issubclass(f.dtype.type, (np.bool8, Integral)):
|
||
|
f = f.astype(float)
|
||
|
elif issubclass(f.dtype.type, Real) and f.dtype.itemsize < 4:
|
||
|
f = f.astype(float)
|
||
|
|
||
|
results = []
|
||
|
for i, ax in enumerate(axis):
|
||
|
for c in f.chunks[ax]:
|
||
|
if np.min(c) < kwargs["edge_order"] + 1:
|
||
|
raise ValueError(
|
||
|
'Chunk size must be larger than edge_order + 1. '
|
||
|
'Minimum chunk for aixs {} is {}. Rechunk to '
|
||
|
'proceed.'.format(np.min(c), ax))
|
||
|
|
||
|
if np.isscalar(varargs[i]):
|
||
|
array_locs = None
|
||
|
else:
|
||
|
if isinstance(varargs[i], Array):
|
||
|
raise NotImplementedError(
|
||
|
'dask array coordinated is not supported.')
|
||
|
# coordinate position for each block taking overlap into account
|
||
|
chunk = np.array(f.chunks[ax])
|
||
|
array_loc_stop = np.cumsum(chunk) + 1
|
||
|
array_loc_start = array_loc_stop - chunk - 2
|
||
|
array_loc_stop[-1] -= 1
|
||
|
array_loc_start[0] = 0
|
||
|
array_locs = (array_loc_start, array_loc_stop)
|
||
|
|
||
|
results.append(f.map_overlap(
|
||
|
_gradient_kernel,
|
||
|
dtype=f.dtype,
|
||
|
depth={j: 1 if j == ax else 0 for j in range(f.ndim)},
|
||
|
boundary="none",
|
||
|
coord=varargs[i],
|
||
|
axis=ax,
|
||
|
array_locs=array_locs,
|
||
|
grad_kwargs=kwargs,
|
||
|
))
|
||
|
|
||
|
if drop_result_list:
|
||
|
results = results[0]
|
||
|
|
||
|
return results
|
||
|
|
||
|
|
||
|
@wraps(np.bincount)
|
||
|
def bincount(x, weights=None, minlength=None):
|
||
|
if minlength is None:
|
||
|
raise TypeError("Must specify minlength argument in da.bincount")
|
||
|
assert x.ndim == 1
|
||
|
if weights is not None:
|
||
|
assert weights.chunks == x.chunks
|
||
|
|
||
|
# Call np.bincount on each block, possibly with weights
|
||
|
token = tokenize(x, weights, minlength)
|
||
|
name = 'bincount-' + token
|
||
|
if weights is not None:
|
||
|
dsk = {(name, i): (np.bincount, (x.name, i), (weights.name, i), minlength)
|
||
|
for i, _ in enumerate(x.__dask_keys__())}
|
||
|
dtype = np.bincount([1], weights=[1]).dtype
|
||
|
else:
|
||
|
dsk = {(name, i): (np.bincount, (x.name, i), None, minlength)
|
||
|
for i, _ in enumerate(x.__dask_keys__())}
|
||
|
dtype = np.bincount([]).dtype
|
||
|
|
||
|
# Sum up all of the intermediate bincounts per block
|
||
|
name = 'bincount-sum-' + token
|
||
|
dsk[(name, 0)] = (np.sum, list(dsk), 0)
|
||
|
|
||
|
chunks = ((minlength,),)
|
||
|
|
||
|
graph = HighLevelGraph.from_collections(name, dsk, dependencies=[x] if weights is None else [x, weights])
|
||
|
|
||
|
return Array(graph, name, chunks, dtype)
|
||
|
|
||
|
|
||
|
@wraps(np.digitize)
|
||
|
def digitize(a, bins, right=False):
|
||
|
bins = np.asarray(bins)
|
||
|
dtype = np.digitize([0], bins, right=False).dtype
|
||
|
return a.map_blocks(np.digitize, dtype=dtype, bins=bins, right=right)
|
||
|
|
||
|
|
||
|
def histogram(a, bins=None, range=None, normed=False, weights=None, density=None):
|
||
|
"""
|
||
|
Blocked variant of :func:`numpy.histogram`.
|
||
|
|
||
|
Follows the signature of :func:`numpy.histogram` exactly with the following
|
||
|
exceptions:
|
||
|
|
||
|
- Either an iterable specifying the ``bins`` or the number of ``bins``
|
||
|
and a ``range`` argument is required as computing ``min`` and ``max``
|
||
|
over blocked arrays is an expensive operation that must be performed
|
||
|
explicitly.
|
||
|
|
||
|
- ``weights`` must be a dask.array.Array with the same block structure
|
||
|
as ``a``.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
Using number of bins and range:
|
||
|
|
||
|
>>> import dask.array as da
|
||
|
>>> import numpy as np
|
||
|
>>> x = da.from_array(np.arange(10000), chunks=10)
|
||
|
>>> h, bins = da.histogram(x, bins=10, range=[0, 10000])
|
||
|
>>> bins
|
||
|
array([ 0., 1000., 2000., 3000., 4000., 5000., 6000., 7000.,
|
||
|
8000., 9000., 10000.])
|
||
|
>>> h.compute()
|
||
|
array([1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000])
|
||
|
|
||
|
Explicitly specifying the bins:
|
||
|
|
||
|
>>> h, bins = da.histogram(x, bins=np.array([0, 5000, 10000]))
|
||
|
>>> bins
|
||
|
array([ 0, 5000, 10000])
|
||
|
>>> h.compute()
|
||
|
array([5000, 5000])
|
||
|
"""
|
||
|
if not np.iterable(bins) and (range is None or bins is None):
|
||
|
raise ValueError('dask.array.histogram requires either specifying '
|
||
|
'bins as an iterable or specifying both a range and '
|
||
|
'the number of bins')
|
||
|
|
||
|
if weights is not None and weights.chunks != a.chunks:
|
||
|
raise ValueError('Input array and weights must have the same '
|
||
|
'chunked structure')
|
||
|
|
||
|
if not np.iterable(bins):
|
||
|
bin_token = bins
|
||
|
mn, mx = range
|
||
|
if mn == mx:
|
||
|
mn -= 0.5
|
||
|
mx += 0.5
|
||
|
|
||
|
bins = np.linspace(mn, mx, bins + 1, endpoint=True)
|
||
|
else:
|
||
|
bin_token = bins
|
||
|
token = tokenize(a, bin_token, range, normed, weights, density)
|
||
|
|
||
|
nchunks = len(list(flatten(a.__dask_keys__())))
|
||
|
chunks = ((1,) * nchunks, (len(bins) - 1,))
|
||
|
|
||
|
name = 'histogram-sum-' + token
|
||
|
|
||
|
# Map the histogram to all bins
|
||
|
def block_hist(x, range=None, weights=None):
|
||
|
return np.histogram(x, bins, range=range, weights=weights)[0][np.newaxis]
|
||
|
|
||
|
if weights is None:
|
||
|
dsk = {(name, i, 0): (block_hist, k, range)
|
||
|
for i, k in enumerate(flatten(a.__dask_keys__()))}
|
||
|
dtype = np.histogram([])[0].dtype
|
||
|
else:
|
||
|
a_keys = flatten(a.__dask_keys__())
|
||
|
w_keys = flatten(weights.__dask_keys__())
|
||
|
dsk = {(name, i, 0): (block_hist, k, range, w)
|
||
|
for i, (k, w) in enumerate(zip(a_keys, w_keys))}
|
||
|
dtype = weights.dtype
|
||
|
|
||
|
graph = HighLevelGraph.from_collections(name, dsk, dependencies=[a] if weights is None else [a, weights])
|
||
|
|
||
|
mapped = Array(graph, name, chunks, dtype=dtype)
|
||
|
n = mapped.sum(axis=0)
|
||
|
|
||
|
# We need to replicate normed and density options from numpy
|
||
|
if density is not None:
|
||
|
if density:
|
||
|
db = from_array(np.diff(bins).astype(float), chunks=n.chunks)
|
||
|
return n / db / n.sum(), bins
|
||
|
else:
|
||
|
return n, bins
|
||
|
else:
|
||
|
# deprecated, will be removed from Numpy 2.0
|
||
|
if normed:
|
||
|
db = from_array(np.diff(bins).astype(float), chunks=n.chunks)
|
||
|
return n / (n * db).sum(), bins
|
||
|
else:
|
||
|
return n, bins
|
||
|
|
||
|
|
||
|
@wraps(np.cov)
|
||
|
def cov(m, y=None, rowvar=1, bias=0, ddof=None):
|
||
|
# This was copied almost verbatim from np.cov
|
||
|
# See numpy license at https://github.com/numpy/numpy/blob/master/LICENSE.txt
|
||
|
# or NUMPY_LICENSE.txt within this directory
|
||
|
if ddof is not None and ddof != int(ddof):
|
||
|
raise ValueError(
|
||
|
"ddof must be integer")
|
||
|
|
||
|
# Handles complex arrays too
|
||
|
m = asarray(m)
|
||
|
if y is None:
|
||
|
dtype = np.result_type(m, np.float64)
|
||
|
else:
|
||
|
y = asarray(y)
|
||
|
dtype = np.result_type(m, y, np.float64)
|
||
|
X = array(m, ndmin=2, dtype=dtype)
|
||
|
|
||
|
if X.shape[0] == 1:
|
||
|
rowvar = 1
|
||
|
if rowvar:
|
||
|
N = X.shape[1]
|
||
|
axis = 0
|
||
|
else:
|
||
|
N = X.shape[0]
|
||
|
axis = 1
|
||
|
|
||
|
# check ddof
|
||
|
if ddof is None:
|
||
|
if bias == 0:
|
||
|
ddof = 1
|
||
|
else:
|
||
|
ddof = 0
|
||
|
fact = float(N - ddof)
|
||
|
if fact <= 0:
|
||
|
warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning)
|
||
|
fact = 0.0
|
||
|
|
||
|
if y is not None:
|
||
|
y = array(y, ndmin=2, dtype=dtype)
|
||
|
X = concatenate((X, y), axis)
|
||
|
|
||
|
X = X - X.mean(axis=1 - axis, keepdims=True)
|
||
|
if not rowvar:
|
||
|
return (dot(X.T, X.conj()) / fact).squeeze()
|
||
|
else:
|
||
|
return (dot(X, X.T.conj()) / fact).squeeze()
|
||
|
|
||
|
|
||
|
@wraps(np.corrcoef)
|
||
|
def corrcoef(x, y=None, rowvar=1):
|
||
|
c = cov(x, y, rowvar)
|
||
|
if c.shape == ():
|
||
|
return c / c
|
||
|
d = diag(c)
|
||
|
d = d.reshape((d.shape[0], 1))
|
||
|
sqr_d = sqrt(d)
|
||
|
return (c / sqr_d) / sqr_d.T
|
||
|
|
||
|
|
||
|
@wraps(np.round)
|
||
|
def round(a, decimals=0):
|
||
|
return a.map_blocks(np.round, decimals=decimals, dtype=a.dtype)
|
||
|
|
||
|
|
||
|
def _unique_internal(ar, indices, counts, return_inverse=False):
|
||
|
"""
|
||
|
Helper/wrapper function for :func:`numpy.unique`.
|
||
|
|
||
|
Uses :func:`numpy.unique` to find the unique values for the array chunk.
|
||
|
Given this chunk may not represent the whole array, also take the
|
||
|
``indices`` and ``counts`` that are in 1-to-1 correspondence to ``ar``
|
||
|
and reduce them in the same fashion as ``ar`` is reduced. Namely sum
|
||
|
any counts that correspond to the same value and take the smallest
|
||
|
index that corresponds to the same value.
|
||
|
|
||
|
To handle the inverse mapping from the unique values to the original
|
||
|
array, simply return a NumPy array created with ``arange`` with enough
|
||
|
values to correspond 1-to-1 to the unique values. While there is more
|
||
|
work needed to be done to create the full inverse mapping for the
|
||
|
original array, this provides enough information to generate the
|
||
|
inverse mapping in Dask.
|
||
|
|
||
|
Given Dask likes to have one array returned from functions like
|
||
|
``blockwise``, some formatting is done to stuff all of the resulting arrays
|
||
|
into one big NumPy structured array. Dask is then able to handle this
|
||
|
object and can split it apart into the separate results on the Dask side,
|
||
|
which then can be passed back to this function in concatenated chunks for
|
||
|
further reduction or can be return to the user to perform other forms of
|
||
|
analysis.
|
||
|
|
||
|
By handling the problem in this way, it does not matter where a chunk
|
||
|
is in a larger array or how big it is. The chunk can still be computed
|
||
|
on the same way. Also it does not matter if the chunk is the result of
|
||
|
other chunks being run through this function multiple times. The end
|
||
|
result will still be just as accurate using this strategy.
|
||
|
"""
|
||
|
|
||
|
return_index = (indices is not None)
|
||
|
return_counts = (counts is not None)
|
||
|
|
||
|
u = np.unique(ar)
|
||
|
|
||
|
dt = [("values", u.dtype)]
|
||
|
if return_index:
|
||
|
dt.append(("indices", np.intp))
|
||
|
if return_inverse:
|
||
|
dt.append(("inverse", np.intp))
|
||
|
if return_counts:
|
||
|
dt.append(("counts", np.intp))
|
||
|
|
||
|
r = np.empty(u.shape, dtype=dt)
|
||
|
r["values"] = u
|
||
|
if return_inverse:
|
||
|
r["inverse"] = np.arange(len(r), dtype=np.intp)
|
||
|
if return_index or return_counts:
|
||
|
for i, v in enumerate(r["values"]):
|
||
|
m = (ar == v)
|
||
|
if return_index:
|
||
|
indices[m].min(keepdims=True, out=r["indices"][i:i + 1])
|
||
|
if return_counts:
|
||
|
counts[m].sum(keepdims=True, out=r["counts"][i:i + 1])
|
||
|
|
||
|
return r
|
||
|
|
||
|
|
||
|
@wraps(np.unique)
|
||
|
def unique(ar, return_index=False, return_inverse=False, return_counts=False):
|
||
|
ar = ar.ravel()
|
||
|
|
||
|
# Run unique on each chunk and collect results in a Dask Array of
|
||
|
# unknown size.
|
||
|
|
||
|
args = [ar, "i"]
|
||
|
out_dtype = [("values", ar.dtype)]
|
||
|
if return_index:
|
||
|
args.extend([
|
||
|
arange(ar.shape[0], dtype=np.intp, chunks=ar.chunks[0]),
|
||
|
"i"
|
||
|
])
|
||
|
out_dtype.append(("indices", np.intp))
|
||
|
else:
|
||
|
args.extend([None, None])
|
||
|
if return_counts:
|
||
|
args.extend([
|
||
|
ones((ar.shape[0],), dtype=np.intp, chunks=ar.chunks[0]),
|
||
|
"i"
|
||
|
])
|
||
|
out_dtype.append(("counts", np.intp))
|
||
|
else:
|
||
|
args.extend([None, None])
|
||
|
|
||
|
out = blockwise(
|
||
|
_unique_internal, "i",
|
||
|
*args,
|
||
|
dtype=out_dtype,
|
||
|
return_inverse=False
|
||
|
)
|
||
|
out._chunks = tuple((np.nan,) * len(c) for c in out.chunks)
|
||
|
|
||
|
# Take the results from the unique chunks and do the following.
|
||
|
#
|
||
|
# 1. Collect all results as arguments.
|
||
|
# 2. Concatenate each result into one big array.
|
||
|
# 3. Pass all results as arguments to the internal unique again.
|
||
|
#
|
||
|
# TODO: This should be replaced with a tree reduction using this strategy.
|
||
|
# xref: https://github.com/dask/dask/issues/2851
|
||
|
|
||
|
out_parts = [out["values"]]
|
||
|
if return_index:
|
||
|
out_parts.append(out["indices"])
|
||
|
else:
|
||
|
out_parts.append(None)
|
||
|
if return_counts:
|
||
|
out_parts.append(out["counts"])
|
||
|
else:
|
||
|
out_parts.append(None)
|
||
|
|
||
|
name = 'unique-aggregate-' + out.name
|
||
|
dsk = {
|
||
|
(name, 0): (
|
||
|
(_unique_internal,) +
|
||
|
tuple(
|
||
|
(np.concatenate, o. __dask_keys__())
|
||
|
if hasattr(o, "__dask_keys__") else o
|
||
|
for o in out_parts
|
||
|
) +
|
||
|
(return_inverse,)
|
||
|
)
|
||
|
}
|
||
|
out_dtype = [("values", ar.dtype)]
|
||
|
if return_index:
|
||
|
out_dtype.append(("indices", np.intp))
|
||
|
if return_inverse:
|
||
|
out_dtype.append(("inverse", np.intp))
|
||
|
if return_counts:
|
||
|
out_dtype.append(("counts", np.intp))
|
||
|
|
||
|
dependencies = [o for o in out_parts if hasattr(o, '__dask_keys__')]
|
||
|
graph = HighLevelGraph.from_collections(name, dsk, dependencies=dependencies)
|
||
|
chunks = ((np.nan,),)
|
||
|
out = Array(graph, name, chunks, out_dtype)
|
||
|
|
||
|
# Split out all results to return to the user.
|
||
|
|
||
|
result = [out["values"]]
|
||
|
if return_index:
|
||
|
result.append(out["indices"])
|
||
|
if return_inverse:
|
||
|
# Using the returned unique values and arange of unknown length, find
|
||
|
# each value matching a unique value and replace it with its
|
||
|
# corresponding index or `0`. There should be only one entry for this
|
||
|
# index in axis `1` (the one of unknown length). Reduce axis `1`
|
||
|
# through summing to get an array with known dimensionality and the
|
||
|
# mapping of the original values.
|
||
|
mtches = (ar[:, None] == out["values"][None, :]).astype(np.intp)
|
||
|
result.append((mtches * out["inverse"]).sum(axis=1))
|
||
|
if return_counts:
|
||
|
result.append(out["counts"])
|
||
|
|
||
|
if len(result) == 1:
|
||
|
result = result[0]
|
||
|
else:
|
||
|
result = tuple(result)
|
||
|
|
||
|
return result
|
||
|
|
||
|
|
||
|
def _isin_kernel(element, test_elements, assume_unique=False):
|
||
|
values = np.in1d(element.ravel(), test_elements,
|
||
|
assume_unique=assume_unique)
|
||
|
return values.reshape(element.shape + (1,) * test_elements.ndim)
|
||
|
|
||
|
|
||
|
@safe_wraps(getattr(np, 'isin', None))
|
||
|
def isin(element, test_elements, assume_unique=False, invert=False):
|
||
|
element = asarray(element)
|
||
|
test_elements = asarray(test_elements)
|
||
|
element_axes = tuple(range(element.ndim))
|
||
|
test_axes = tuple(i + element.ndim for i in range(test_elements.ndim))
|
||
|
mapped = blockwise(
|
||
|
_isin_kernel,
|
||
|
element_axes + test_axes,
|
||
|
element,
|
||
|
element_axes,
|
||
|
test_elements,
|
||
|
test_axes,
|
||
|
adjust_chunks={axis: lambda _: 1 for axis in test_axes},
|
||
|
dtype=bool,
|
||
|
assume_unique=assume_unique
|
||
|
)
|
||
|
|
||
|
result = mapped.any(axis=test_axes)
|
||
|
if invert:
|
||
|
result = ~result
|
||
|
return result
|
||
|
|
||
|
|
||
|
@wraps(np.roll)
|
||
|
def roll(array, shift, axis=None):
|
||
|
result = array
|
||
|
|
||
|
if axis is None:
|
||
|
result = ravel(result)
|
||
|
|
||
|
if not isinstance(shift, Integral):
|
||
|
raise TypeError(
|
||
|
"Expect `shift` to be an instance of Integral"
|
||
|
" when `axis` is None."
|
||
|
)
|
||
|
|
||
|
shift = (shift,)
|
||
|
axis = (0,)
|
||
|
else:
|
||
|
try:
|
||
|
len(shift)
|
||
|
except TypeError:
|
||
|
shift = (shift,)
|
||
|
try:
|
||
|
len(axis)
|
||
|
except TypeError:
|
||
|
axis = (axis,)
|
||
|
|
||
|
if len(shift) != len(axis):
|
||
|
raise ValueError("Must have the same number of shifts as axes.")
|
||
|
|
||
|
for i, s in zip(axis, shift):
|
||
|
s = -s
|
||
|
s %= result.shape[i]
|
||
|
|
||
|
sl1 = result.ndim * [slice(None)]
|
||
|
sl2 = result.ndim * [slice(None)]
|
||
|
|
||
|
sl1[i] = slice(s, None)
|
||
|
sl2[i] = slice(None, s)
|
||
|
|
||
|
sl1 = tuple(sl1)
|
||
|
sl2 = tuple(sl2)
|
||
|
|
||
|
result = concatenate([result[sl1], result[sl2]], axis=i)
|
||
|
|
||
|
result = result.reshape(array.shape)
|
||
|
|
||
|
return result
|
||
|
|
||
|
|
||
|
@wraps(np.ravel)
|
||
|
def ravel(array):
|
||
|
return array.reshape((-1,))
|
||
|
|
||
|
|
||
|
@wraps(np.squeeze)
|
||
|
def squeeze(a, axis=None):
|
||
|
if axis is None:
|
||
|
axis = tuple(i for i, d in enumerate(a.shape) if d == 1)
|
||
|
elif not isinstance(axis, tuple):
|
||
|
axis = (axis,)
|
||
|
|
||
|
if any(a.shape[i] != 1 for i in axis):
|
||
|
raise ValueError("cannot squeeze axis with size other than one")
|
||
|
|
||
|
axis = validate_axis(axis, a.ndim)
|
||
|
|
||
|
sl = tuple(0 if i in axis else slice(None) for i, s in enumerate(a.shape))
|
||
|
|
||
|
return a[sl]
|
||
|
|
||
|
|
||
|
@wraps(np.compress)
|
||
|
def compress(condition, a, axis=None):
|
||
|
if axis is None:
|
||
|
a = a.ravel()
|
||
|
axis = 0
|
||
|
axis = validate_axis(axis, a.ndim)
|
||
|
|
||
|
# Only coerce non-lazy values to numpy arrays
|
||
|
if not isinstance(condition, Array):
|
||
|
condition = np.array(condition, dtype=bool)
|
||
|
if condition.ndim != 1:
|
||
|
raise ValueError("Condition must be one dimensional")
|
||
|
|
||
|
if isinstance(condition, Array):
|
||
|
if len(condition) < a.shape[axis]:
|
||
|
a = a[tuple(slice(None, len(condition))
|
||
|
if i == axis else slice(None)
|
||
|
for i in range(a.ndim))]
|
||
|
inds = tuple(range(a.ndim))
|
||
|
out = blockwise(np.compress, inds, condition, (inds[axis],), a, inds,
|
||
|
axis=axis, dtype=a.dtype)
|
||
|
out._chunks = tuple((np.NaN,) * len(c) if i == axis else c
|
||
|
for i, c in enumerate(out.chunks))
|
||
|
return out
|
||
|
else:
|
||
|
# Optimized case when condition is known
|
||
|
if len(condition) < a.shape[axis]:
|
||
|
condition = condition.copy()
|
||
|
condition.resize(a.shape[axis])
|
||
|
|
||
|
slc = ((slice(None),) * axis + (condition, ) +
|
||
|
(slice(None),) * (a.ndim - axis - 1))
|
||
|
return a[slc]
|
||
|
|
||
|
|
||
|
@wraps(np.extract)
|
||
|
def extract(condition, arr):
|
||
|
if not isinstance(condition, Array):
|
||
|
condition = np.array(condition, dtype=bool)
|
||
|
return compress(condition.ravel(), arr.ravel())
|
||
|
|
||
|
|
||
|
@wraps(np.take)
|
||
|
def take(a, indices, axis=0):
|
||
|
axis = validate_axis(axis, a.ndim)
|
||
|
|
||
|
if isinstance(a, np.ndarray) and isinstance(indices, Array):
|
||
|
return _take_dask_array_from_numpy(a, indices, axis)
|
||
|
else:
|
||
|
return a[(slice(None),) * axis + (indices,)]
|
||
|
|
||
|
|
||
|
def _take_dask_array_from_numpy(a, indices, axis):
|
||
|
assert isinstance(a, np.ndarray)
|
||
|
assert isinstance(indices, Array)
|
||
|
|
||
|
return indices.map_blocks(lambda block: np.take(a, block, axis),
|
||
|
chunks=indices.chunks,
|
||
|
dtype=a.dtype)
|
||
|
|
||
|
|
||
|
@wraps(np.around)
|
||
|
def around(x, decimals=0):
|
||
|
return map_blocks(partial(np.around, decimals=decimals), x, dtype=x.dtype)
|
||
|
|
||
|
|
||
|
def _asarray_isnull(values):
|
||
|
import pandas as pd
|
||
|
return np.asarray(pd.isnull(values))
|
||
|
|
||
|
|
||
|
def isnull(values):
|
||
|
""" pandas.isnull for dask arrays """
|
||
|
# eagerly raise ImportError, if pandas isn't available
|
||
|
import pandas as pd # noqa
|
||
|
return elemwise(_asarray_isnull, values, dtype='bool')
|
||
|
|
||
|
|
||
|
def notnull(values):
|
||
|
""" pandas.notnull for dask arrays """
|
||
|
return ~isnull(values)
|
||
|
|
||
|
|
||
|
@wraps(np.isclose)
|
||
|
def isclose(arr1, arr2, rtol=1e-5, atol=1e-8, equal_nan=False):
|
||
|
func = partial(np.isclose, rtol=rtol, atol=atol, equal_nan=equal_nan)
|
||
|
return elemwise(func, arr1, arr2, dtype='bool')
|
||
|
|
||
|
|
||
|
@wraps(np.allclose)
|
||
|
def allclose(arr1, arr2, rtol=1e-5, atol=1e-8, equal_nan=False):
|
||
|
return isclose(arr1, arr2, rtol=rtol, atol=atol, equal_nan=equal_nan).all()
|
||
|
|
||
|
|
||
|
def variadic_choose(a, *choices):
|
||
|
return np.choose(a, choices)
|
||
|
|
||
|
|
||
|
@wraps(np.choose)
|
||
|
def choose(a, choices):
|
||
|
return elemwise(variadic_choose, a, *choices)
|
||
|
|
||
|
|
||
|
def _isnonzero_vec(v):
|
||
|
return bool(np.count_nonzero(v))
|
||
|
|
||
|
|
||
|
_isnonzero_vec = np.vectorize(_isnonzero_vec, otypes=[bool])
|
||
|
|
||
|
|
||
|
def isnonzero(a):
|
||
|
try:
|
||
|
np.zeros(tuple(), dtype=a.dtype).astype(bool)
|
||
|
except ValueError:
|
||
|
######################################################
|
||
|
# Handle special cases where conversion to bool does #
|
||
|
# not work correctly. #
|
||
|
# #
|
||
|
# xref: https://github.com/numpy/numpy/issues/9479 #
|
||
|
######################################################
|
||
|
return a.map_blocks(_isnonzero_vec, dtype=bool)
|
||
|
else:
|
||
|
return a.astype(bool)
|
||
|
|
||
|
|
||
|
@wraps(np.argwhere)
|
||
|
def argwhere(a):
|
||
|
a = asarray(a)
|
||
|
|
||
|
nz = isnonzero(a).flatten()
|
||
|
|
||
|
ind = indices(a.shape, dtype=np.intp, chunks=a.chunks)
|
||
|
if ind.ndim > 1:
|
||
|
ind = stack([ind[i].ravel() for i in range(len(ind))], axis=1)
|
||
|
ind = compress(nz, ind, axis=0)
|
||
|
|
||
|
return ind
|
||
|
|
||
|
|
||
|
@wraps(np.where)
|
||
|
def where(condition, x=None, y=None):
|
||
|
if (x is None) != (y is None):
|
||
|
raise ValueError("either both or neither of x and y should be given")
|
||
|
if (x is None) and (y is None):
|
||
|
return nonzero(condition)
|
||
|
|
||
|
if np.isscalar(condition):
|
||
|
dtype = result_type(x, y)
|
||
|
x = asarray(x)
|
||
|
y = asarray(y)
|
||
|
|
||
|
shape = broadcast_shapes(x.shape, y.shape)
|
||
|
out = x if condition else y
|
||
|
|
||
|
return broadcast_to(out, shape).astype(dtype)
|
||
|
else:
|
||
|
return elemwise(np.where, condition, x, y)
|
||
|
|
||
|
|
||
|
@wraps(np.count_nonzero)
|
||
|
def count_nonzero(a, axis=None):
|
||
|
return isnonzero(asarray(a)).astype(np.intp).sum(axis=axis)
|
||
|
|
||
|
|
||
|
@wraps(np.flatnonzero)
|
||
|
def flatnonzero(a):
|
||
|
return argwhere(asarray(a).ravel())[:, 0]
|
||
|
|
||
|
|
||
|
@wraps(np.nonzero)
|
||
|
def nonzero(a):
|
||
|
ind = argwhere(a)
|
||
|
if ind.ndim > 1:
|
||
|
return tuple(ind[:, i] for i in range(ind.shape[1]))
|
||
|
else:
|
||
|
return (ind,)
|
||
|
|
||
|
|
||
|
def _int_piecewise(x, *condlist, **kwargs):
|
||
|
return np.piecewise(
|
||
|
x, list(condlist), kwargs["funclist"],
|
||
|
*kwargs["func_args"], **kwargs["func_kw"]
|
||
|
)
|
||
|
|
||
|
|
||
|
def _unravel_index_kernel(indices, func_kwargs):
|
||
|
return np.stack(np.unravel_index(indices, **func_kwargs))
|
||
|
|
||
|
|
||
|
@wraps(np.unravel_index)
|
||
|
def unravel_index(indices, dims, order='C'):
|
||
|
if dims and indices.size:
|
||
|
unraveled_indices = tuple(indices.map_blocks(
|
||
|
_unravel_index_kernel,
|
||
|
dtype=np.intp,
|
||
|
chunks=(((len(dims),),) + indices.chunks),
|
||
|
new_axis=0,
|
||
|
func_kwargs={"dims": dims, "order": order}
|
||
|
))
|
||
|
else:
|
||
|
unraveled_indices = tuple(
|
||
|
empty((0,), dtype=np.intp, chunks=1) for i in dims
|
||
|
)
|
||
|
|
||
|
return unraveled_indices
|
||
|
|
||
|
|
||
|
@wraps(np.piecewise)
|
||
|
def piecewise(x, condlist, funclist, *args, **kw):
|
||
|
return map_blocks(
|
||
|
_int_piecewise,
|
||
|
x, *condlist,
|
||
|
dtype=x.dtype,
|
||
|
name="piecewise",
|
||
|
funclist=funclist, func_args=args, func_kw=kw
|
||
|
)
|
||
|
|
||
|
|
||
|
@wraps(chunk.coarsen)
|
||
|
def coarsen(reduction, x, axes, trim_excess=False):
|
||
|
if (not trim_excess and
|
||
|
not all(bd % div == 0 for i, div in axes.items()
|
||
|
for bd in x.chunks[i])):
|
||
|
msg = "Coarsening factor does not align with block dimensions"
|
||
|
raise ValueError(msg)
|
||
|
|
||
|
if 'dask' in inspect.getfile(reduction):
|
||
|
reduction = getattr(np, reduction.__name__)
|
||
|
|
||
|
name = 'coarsen-' + tokenize(reduction, x, axes, trim_excess)
|
||
|
dsk = {(name,) + key[1:]: (chunk.coarsen, reduction, key, axes, trim_excess)
|
||
|
for key in flatten(x.__dask_keys__())}
|
||
|
chunks = tuple(tuple(int(bd // axes.get(i, 1)) for bd in bds)
|
||
|
for i, bds in enumerate(x.chunks))
|
||
|
|
||
|
dt = reduction(np.empty((1,) * x.ndim, dtype=x.dtype)).dtype
|
||
|
graph = HighLevelGraph.from_collections(name, dsk, dependencies=[x])
|
||
|
return Array(graph, name, chunks, dtype=dt)
|
||
|
|
||
|
|
||
|
def split_at_breaks(array, breaks, axis=0):
|
||
|
""" Split an array into a list of arrays (using slices) at the given breaks
|
||
|
|
||
|
>>> split_at_breaks(np.arange(6), [3, 5])
|
||
|
[array([0, 1, 2]), array([3, 4]), array([5])]
|
||
|
"""
|
||
|
padded_breaks = concat([[None], breaks, [None]])
|
||
|
slices = [slice(i, j) for i, j in sliding_window(2, padded_breaks)]
|
||
|
preslice = (slice(None),) * axis
|
||
|
split_array = [array[preslice + (s,)] for s in slices]
|
||
|
return split_array
|
||
|
|
||
|
|
||
|
@wraps(np.insert)
|
||
|
def insert(arr, obj, values, axis):
|
||
|
# axis is a required argument here to avoid needing to deal with the numpy
|
||
|
# default case (which reshapes the array to make it flat)
|
||
|
axis = validate_axis(axis,arr.ndim)
|
||
|
|
||
|
if isinstance(obj, slice):
|
||
|
obj = np.arange(*obj.indices(arr.shape[axis]))
|
||
|
obj = np.asarray(obj)
|
||
|
scalar_obj = obj.ndim == 0
|
||
|
if scalar_obj:
|
||
|
obj = np.atleast_1d(obj)
|
||
|
|
||
|
obj = np.where(obj < 0, obj + arr.shape[axis], obj)
|
||
|
if (np.diff(obj) < 0).any():
|
||
|
raise NotImplementedError(
|
||
|
'da.insert only implemented for monotonic ``obj`` argument')
|
||
|
|
||
|
split_arr = split_at_breaks(arr, np.unique(obj), axis)
|
||
|
|
||
|
if getattr(values, 'ndim', 0) == 0:
|
||
|
# we need to turn values into a dask array
|
||
|
name = 'values-' + tokenize(values)
|
||
|
dtype = getattr(values, 'dtype', type(values))
|
||
|
values = Array({(name,): values}, name, chunks=(), dtype=dtype)
|
||
|
|
||
|
values_shape = tuple(len(obj) if axis == n else s
|
||
|
for n, s in enumerate(arr.shape))
|
||
|
values = broadcast_to(values, values_shape)
|
||
|
elif scalar_obj:
|
||
|
values = values[(slice(None),) * axis + (None,)]
|
||
|
|
||
|
values_chunks = tuple(values_bd if axis == n else arr_bd
|
||
|
for n, (arr_bd, values_bd)
|
||
|
in enumerate(zip(arr.chunks,
|
||
|
values.chunks)))
|
||
|
values = values.rechunk(values_chunks)
|
||
|
|
||
|
counts = np.bincount(obj)[:-1]
|
||
|
values_breaks = np.cumsum(counts[counts > 0])
|
||
|
split_values = split_at_breaks(values, values_breaks, axis)
|
||
|
|
||
|
interleaved = list(interleave([split_arr, split_values]))
|
||
|
interleaved = [i for i in interleaved if i.nbytes]
|
||
|
return concatenate(interleaved, axis=axis)
|
||
|
|
||
|
|
||
|
def _average(a, axis=None, weights=None, returned=False, is_masked=False):
|
||
|
# This was minimally modified from numpy.average
|
||
|
# See numpy license at https://github.com/numpy/numpy/blob/master/LICENSE.txt
|
||
|
# or NUMPY_LICENSE.txt within this directory
|
||
|
# Wrapper used by da.average or da.ma.average.
|
||
|
a = asanyarray(a)
|
||
|
|
||
|
if weights is None:
|
||
|
avg = a.mean(axis)
|
||
|
scl = avg.dtype.type(a.size / avg.size)
|
||
|
else:
|
||
|
wgt = asanyarray(weights)
|
||
|
|
||
|
if issubclass(a.dtype.type, (np.integer, np.bool_)):
|
||
|
result_dtype = result_type(a.dtype, wgt.dtype, 'f8')
|
||
|
else:
|
||
|
result_dtype = result_type(a.dtype, wgt.dtype)
|
||
|
|
||
|
# Sanity checks
|
||
|
if a.shape != wgt.shape:
|
||
|
if axis is None:
|
||
|
raise TypeError(
|
||
|
"Axis must be specified when shapes of a and weights "
|
||
|
"differ.")
|
||
|
if wgt.ndim != 1:
|
||
|
raise TypeError(
|
||
|
"1D weights expected when shapes of a and weights differ.")
|
||
|
if wgt.shape[0] != a.shape[axis]:
|
||
|
raise ValueError(
|
||
|
"Length of weights not compatible with specified axis.")
|
||
|
|
||
|
# setup wgt to broadcast along axis
|
||
|
wgt = broadcast_to(wgt, (a.ndim - 1) * (1,) + wgt.shape)
|
||
|
wgt = wgt.swapaxes(-1, axis)
|
||
|
if is_masked:
|
||
|
from .ma import getmaskarray
|
||
|
wgt = wgt * (~getmaskarray(a))
|
||
|
scl = wgt.sum(axis=axis, dtype=result_dtype)
|
||
|
avg = multiply(a, wgt, dtype=result_dtype).sum(axis) / scl
|
||
|
|
||
|
if returned:
|
||
|
if scl.shape != avg.shape:
|
||
|
scl = broadcast_to(scl, avg.shape).copy()
|
||
|
return avg, scl
|
||
|
else:
|
||
|
return avg
|
||
|
|
||
|
|
||
|
@wraps(np.average)
|
||
|
def average(a, axis=None, weights=None, returned=False):
|
||
|
return _average(a, axis, weights, returned, is_masked=False)
|