You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ORPA-pyOpenRPA/WPy32-3720/python-3.7.2/Lib/site-packages/dask/array/creation.py

1072 lines
33 KiB

6 years ago
from __future__ import absolute_import, division, print_function
from functools import partial, wraps, reduce
from itertools import product
from operator import add, getitem
from numbers import Integral, Number
import numpy as np
from toolz import accumulate, sliding_window
from ..highlevelgraph import HighLevelGraph
from ..base import tokenize
from ..compatibility import Sequence
from . import chunk
from .core import (Array, asarray, normalize_chunks,
stack, concatenate, block,
broadcast_to, broadcast_arrays)
from .wrap import empty, ones, zeros, full
from .utils import AxisError
def empty_like(a, dtype=None, chunks=None):
"""
Return a new array with the same shape and type as a given array.
Parameters
----------
a : array_like
The shape and data-type of `a` define these same attributes of the
returned array.
dtype : data-type, optional
Overrides the data type of the result.
chunks : sequence of ints
The number of samples on each block. Note that the last block will have
fewer samples if ``len(array) % chunks != 0``.
Returns
-------
out : ndarray
Array of uninitialized (arbitrary) data with the same
shape and type as `a`.
See Also
--------
ones_like : Return an array of ones with shape and type of input.
zeros_like : Return an array of zeros with shape and type of input.
empty : Return a new uninitialized array.
ones : Return a new array setting values to one.
zeros : Return a new array setting values to zero.
Notes
-----
This function does *not* initialize the returned array; to do that use
`zeros_like` or `ones_like` instead. It may be marginally faster than
the functions that do set the array values.
"""
a = asarray(a, name=False)
return empty(
a.shape, dtype=(dtype or a.dtype),
chunks=(chunks if chunks is not None else a.chunks)
)
def ones_like(a, dtype=None, chunks=None):
"""
Return an array of ones with the same shape and type as a given array.
Parameters
----------
a : array_like
The shape and data-type of `a` define these same attributes of
the returned array.
dtype : data-type, optional
Overrides the data type of the result.
chunks : sequence of ints
The number of samples on each block. Note that the last block will have
fewer samples if ``len(array) % chunks != 0``.
Returns
-------
out : ndarray
Array of ones with the same shape and type as `a`.
See Also
--------
zeros_like : Return an array of zeros with shape and type of input.
empty_like : Return an empty array with shape and type of input.
zeros : Return a new array setting values to zero.
ones : Return a new array setting values to one.
empty : Return a new uninitialized array.
"""
a = asarray(a, name=False)
return ones(
a.shape, dtype=(dtype or a.dtype),
chunks=(chunks if chunks is not None else a.chunks)
)
def zeros_like(a, dtype=None, chunks=None):
"""
Return an array of zeros with the same shape and type as a given array.
Parameters
----------
a : array_like
The shape and data-type of `a` define these same attributes of
the returned array.
dtype : data-type, optional
Overrides the data type of the result.
chunks : sequence of ints
The number of samples on each block. Note that the last block will have
fewer samples if ``len(array) % chunks != 0``.
Returns
-------
out : ndarray
Array of zeros with the same shape and type as `a`.
See Also
--------
ones_like : Return an array of ones with shape and type of input.
empty_like : Return an empty array with shape and type of input.
zeros : Return a new array setting values to zero.
ones : Return a new array setting values to one.
empty : Return a new uninitialized array.
"""
a = asarray(a, name=False)
return zeros(
a.shape, dtype=(dtype or a.dtype),
chunks=(chunks if chunks is not None else a.chunks)
)
def full_like(a, fill_value, dtype=None, chunks=None):
"""
Return a full array with the same shape and type as a given array.
Parameters
----------
a : array_like
The shape and data-type of `a` define these same attributes of
the returned array.
fill_value : scalar
Fill value.
dtype : data-type, optional
Overrides the data type of the result.
chunks : sequence of ints
The number of samples on each block. Note that the last block will have
fewer samples if ``len(array) % chunks != 0``.
Returns
-------
out : ndarray
Array of `fill_value` with the same shape and type as `a`.
See Also
--------
zeros_like : Return an array of zeros with shape and type of input.
ones_like : Return an array of ones with shape and type of input.
empty_like : Return an empty array with shape and type of input.
zeros : Return a new array setting values to zero.
ones : Return a new array setting values to one.
empty : Return a new uninitialized array.
full : Fill a new array.
"""
a = asarray(a, name=False)
return full(
a.shape,
fill_value,
dtype=(dtype or a.dtype),
chunks=(chunks if chunks is not None else a.chunks)
)
def linspace(start, stop, num=50, endpoint=True, retstep=False, chunks='auto',
dtype=None):
"""
Return `num` evenly spaced values over the closed interval [`start`,
`stop`].
Parameters
----------
start : scalar
The starting value of the sequence.
stop : scalar
The last value of the sequence.
num : int, optional
Number of samples to include in the returned dask array, including the
endpoints. Default is 50.
endpoint : bool, optional
If True, ``stop`` is the last sample. Otherwise, it is not included.
Default is True.
retstep : bool, optional
If True, return (samples, step), where step is the spacing between
samples. Default is False.
chunks : int
The number of samples on each block. Note that the last block will have
fewer samples if `num % blocksize != 0`
dtype : dtype, optional
The type of the output array. Default is given by ``numpy.dtype(float)``.
Returns
-------
samples : dask array
step : float, optional
Only returned if ``retstep`` is True. Size of spacing between samples.
See Also
--------
dask.array.arange
"""
num = int(num)
chunks = normalize_chunks(chunks, (num,))
range_ = stop - start
div = (num - 1) if endpoint else num
step = float(range_) / div
if dtype is None:
dtype = np.linspace(0, 1, 1).dtype
name = 'linspace-' + tokenize((start, stop, num, endpoint, chunks, dtype))
dsk = {}
blockstart = start
for i, bs in enumerate(chunks[0]):
bs_space = bs - 1 if endpoint else bs
blockstop = blockstart + (bs_space * step)
task = (partial(np.linspace, endpoint=endpoint, dtype=dtype),
blockstart, blockstop, bs)
blockstart = blockstart + (step * bs)
dsk[(name, i)] = task
if retstep:
return Array(dsk, name, chunks, dtype=dtype), step
else:
return Array(dsk, name, chunks, dtype=dtype)
def arange(*args, **kwargs):
"""
Return evenly spaced values from `start` to `stop` with step size `step`.
The values are half-open [start, stop), so including start and excluding
stop. This is basically the same as python's range function but for dask
arrays.
When using a non-integer step, such as 0.1, the results will often not be
consistent. It is better to use linspace for these cases.
Parameters
----------
start : int, optional
The starting value of the sequence. The default is 0.
stop : int
The end of the interval, this value is excluded from the interval.
step : int, optional
The spacing between the values. The default is 1 when not specified.
The last value of the sequence.
chunks : int
The number of samples on each block. Note that the last block will have
fewer samples if ``len(array) % chunks != 0``.
dtype : numpy.dtype
Output dtype. Omit to infer it from start, stop, step
Returns
-------
samples : dask array
See Also
--------
dask.array.linspace
"""
if len(args) == 1:
start = 0
stop = args[0]
step = 1
elif len(args) == 2:
start = args[0]
stop = args[1]
step = 1
elif len(args) == 3:
start, stop, step = args
else:
raise TypeError('''
arange takes 3 positional arguments: arange([start], stop, [step])
''')
chunks = kwargs.pop('chunks', 'auto')
num = int(max(np.ceil((stop - start) / step), 0))
dtype = kwargs.pop('dtype', None)
if dtype is None:
dtype = np.arange(start, stop, step * num if num else step).dtype
chunks = normalize_chunks(chunks, (num,), dtype=dtype)
if kwargs:
raise TypeError("Unexpected keyword argument(s): %s" %
",".join(kwargs.keys()))
name = 'arange-' + tokenize((start, stop, step, chunks, dtype))
dsk = {}
elem_count = 0
for i, bs in enumerate(chunks[0]):
blockstart = start + (elem_count * step)
blockstop = start + ((elem_count + bs) * step)
task = (chunk.arange, blockstart, blockstop, step, bs, dtype)
dsk[(name, i)] = task
elem_count += bs
return Array(dsk, name, chunks, dtype=dtype)
@wraps(np.meshgrid)
def meshgrid(*xi, **kwargs):
indexing = kwargs.pop("indexing", "xy")
sparse = bool(kwargs.pop("sparse", False))
if "copy" in kwargs:
raise NotImplementedError("`copy` not supported")
if kwargs:
raise TypeError("unsupported keyword argument(s) provided")
if indexing not in ("ij", "xy"):
raise ValueError("`indexing` must be `'ij'` or `'xy'`")
xi = [asarray(e) for e in xi]
xi = [e.flatten() for e in xi]
if indexing == "xy" and len(xi) > 1:
xi[0], xi[1] = xi[1], xi[0]
grid = []
for i in range(len(xi)):
s = len(xi) * [None]
s[i] = slice(None)
s = tuple(s)
r = xi[i][s]
grid.append(r)
if not sparse:
grid = broadcast_arrays(*grid)
if indexing == "xy" and len(xi) > 1:
grid[0], grid[1] = grid[1], grid[0]
return grid
def indices(dimensions, dtype=int, chunks='auto'):
"""
Implements NumPy's ``indices`` for Dask Arrays.
Generates a grid of indices covering the dimensions provided.
The final array has the shape ``(len(dimensions), *dimensions)``. The
chunks are used to specify the chunking for axis 1 up to
``len(dimensions)``. The 0th axis always has chunks of length 1.
Parameters
----------
dimensions : sequence of ints
The shape of the index grid.
dtype : dtype, optional
Type to use for the array. Default is ``int``.
chunks : sequence of ints
The number of samples on each block. Note that the last block will have
fewer samples if ``len(array) % chunks != 0``.
Returns
-------
grid : dask array
"""
dimensions = tuple(dimensions)
dtype = np.dtype(dtype)
chunks = tuple(chunks)
if len(dimensions) != len(chunks):
raise ValueError("Need same number of chunks as dimensions.")
xi = []
for i in range(len(dimensions)):
xi.append(arange(dimensions[i], dtype=dtype, chunks=(chunks[i],)))
grid = []
if np.prod(dimensions):
grid = meshgrid(*xi, indexing="ij")
if grid:
grid = stack(grid)
else:
grid = empty(
(len(dimensions),) + dimensions, dtype=dtype, chunks=(1,) + chunks
)
return grid
def eye(N, chunks, M=None, k=0, dtype=float):
"""
Return a 2-D Array with ones on the diagonal and zeros elsewhere.
Parameters
----------
N : int
Number of rows in the output.
chunks: int
chunk size of resulting blocks
M : int, optional
Number of columns in the output. If None, defaults to `N`.
k : int, optional
Index of the diagonal: 0 (the default) refers to the main diagonal,
a positive value refers to an upper diagonal, and a negative value
to a lower diagonal.
dtype : data-type, optional
Data-type of the returned array.
Returns
-------
I : Array of shape (N,M)
An array where all elements are equal to zero, except for the `k`-th
diagonal, whose values are equal to one.
"""
if not isinstance(chunks, Integral):
raise ValueError('chunks must be an int')
token = tokenize(N, chunk, M, k, dtype)
name_eye = 'eye-' + token
eye = {}
if M is None:
M = N
vchunks = [chunks] * (N // chunks)
if N % chunks != 0:
vchunks.append(N % chunks)
hchunks = [chunks] * (M // chunks)
if M % chunks != 0:
hchunks.append(M % chunks)
for i, vchunk in enumerate(vchunks):
for j, hchunk in enumerate(hchunks):
if (j - i - 1) * chunks <= k <= (j - i + 1) * chunks:
eye[name_eye, i, j] = (np.eye, vchunk, hchunk, k - (j - i) * chunks, dtype)
else:
eye[name_eye, i, j] = (np.zeros, (vchunk, hchunk), dtype)
return Array(eye, name_eye, shape=(N, M),
chunks=(chunks, chunks), dtype=dtype)
@wraps(np.diag)
def diag(v):
name = 'diag-' + tokenize(v)
if isinstance(v, np.ndarray):
if v.ndim == 1:
chunks = ((v.shape[0],), (v.shape[0],))
dsk = {(name, 0, 0): (np.diag, v)}
elif v.ndim == 2:
chunks = ((min(v.shape),),)
dsk = {(name, 0): (np.diag, v)}
else:
raise ValueError("Array must be 1d or 2d only")
return Array(dsk, name, chunks, dtype=v.dtype)
if not isinstance(v, Array):
raise TypeError("v must be a dask array or numpy array, "
"got {0}".format(type(v)))
if v.ndim != 1:
if v.chunks[0] == v.chunks[1]:
dsk = {(name, i): (np.diag, row[i])
for i, row in enumerate(v.__dask_keys__())}
graph = HighLevelGraph.from_collections(name, dsk, dependencies=[v])
return Array(graph, name, (v.chunks[0],), dtype=v.dtype)
else:
raise NotImplementedError("Extracting diagonals from non-square "
"chunked arrays")
chunks_1d = v.chunks[0]
blocks = v.__dask_keys__()
dsk = {}
for i, m in enumerate(chunks_1d):
for j, n in enumerate(chunks_1d):
key = (name, i, j)
if i == j:
dsk[key] = (np.diag, blocks[i])
else:
dsk[key] = (np.zeros, (m, n))
graph = HighLevelGraph.from_collections(name, dsk, dependencies=[v])
return Array(graph, name, (chunks_1d, chunks_1d), dtype=v.dtype)
@wraps(np.diagonal)
def diagonal(a, offset=0, axis1=0, axis2=1):
name = 'diagonal-' + tokenize(a, offset, axis1, axis2)
if a.ndim < 2:
# NumPy uses `diag` as we do here.
raise ValueError("diag requires an array of at least two dimensions")
def _axis_fmt(axis, name, ndim):
if axis < 0:
t = ndim + axis
if t < 0:
msg = "{}: axis {} is out of bounds for array of dimension {}"
raise AxisError(msg.format(name, axis, ndim))
axis = t
return axis
axis1 = _axis_fmt(axis1, "axis1", a.ndim)
axis2 = _axis_fmt(axis2, "axis2", a.ndim)
if axis1 == axis2:
raise ValueError("axis1 and axis2 cannot be the same")
a = asarray(a)
if axis1 > axis2:
axis1, axis2 = axis2, axis1
offset = -offset
def _diag_len(dim1, dim2, offset):
return max(0, min(min(dim1, dim2), dim1 + offset, dim2 - offset))
diag_chunks = []
chunk_offsets = []
cum1 = [0] + list(np.cumsum(a.chunks[axis1]))[:-1]
cum2 = [0] + list(np.cumsum(a.chunks[axis2]))[:-1]
for co1, c1 in zip(cum1, a.chunks[axis1]):
chunk_offsets.append([])
for co2, c2 in zip(cum2, a.chunks[axis2]):
k = offset + co1 - co2
diag_chunks.append(_diag_len(c1, c2, k))
chunk_offsets[-1].append(k)
dsk = {}
idx_set = set(range(a.ndim)) - set([axis1, axis2])
n1 = len(a.chunks[axis1])
n2 = len(a.chunks[axis2])
for idx in product(*(range(len(a.chunks[i])) for i in idx_set)):
for i, (i1, i2) in enumerate(product(range(n1), range(n2))):
tsk = reduce(getitem, idx[:axis1], a.__dask_keys__())[i1]
tsk = reduce(getitem, idx[axis1:axis2 - 1], tsk)[i2]
tsk = reduce(getitem, idx[axis2 - 1:], tsk)
k = chunk_offsets[i1][i2]
dsk[(name,) + idx + (i,)] = (np.diagonal, tsk, k, axis1, axis2)
left_shape = tuple(a.shape[i] for i in idx_set)
right_shape = (_diag_len(a.shape[axis1], a.shape[axis2], offset),)
shape = left_shape + right_shape
left_chunks = tuple(a.chunks[i] for i in idx_set)
right_shape = (tuple(diag_chunks),)
chunks = left_chunks + right_shape
graph = HighLevelGraph.from_collections(name, dsk, dependencies=[a])
return Array(graph, name, shape=shape, chunks=chunks, dtype=a.dtype)
def triu(m, k=0):
"""
Upper triangle of an array with elements above the `k`-th diagonal zeroed.
Parameters
----------
m : array_like, shape (M, N)
Input array.
k : int, optional
Diagonal above which to zero elements. `k = 0` (the default) is the
main diagonal, `k < 0` is below it and `k > 0` is above.
Returns
-------
triu : ndarray, shape (M, N)
Upper triangle of `m`, of same shape and data-type as `m`.
See Also
--------
tril : lower triangle of an array
"""
if m.ndim != 2:
raise ValueError('input must be 2 dimensional')
if m.chunks[0][0] != m.chunks[1][0]:
msg = ('chunks must be a square. '
'Use .rechunk method to change the size of chunks.')
raise NotImplementedError(msg)
rdim = len(m.chunks[0])
hdim = len(m.chunks[1])
chunk = m.chunks[0][0]
token = tokenize(m, k)
name = 'triu-' + token
dsk = {}
for i in range(rdim):
for j in range(hdim):
if chunk * (j - i + 1) < k:
dsk[(name, i, j)] = (np.zeros, (m.chunks[0][i], m.chunks[1][j]))
elif chunk * (j - i - 1) < k <= chunk * (j - i + 1):
dsk[(name, i, j)] = (np.triu, (m.name, i, j), k - (chunk * (j - i)))
else:
dsk[(name, i, j)] = (m.name, i, j)
graph = HighLevelGraph.from_collections(name, dsk, dependencies=[m])
return Array(graph, name, shape=m.shape, chunks=m.chunks, dtype=m.dtype)
def tril(m, k=0):
"""
Lower triangle of an array with elements above the `k`-th diagonal zeroed.
Parameters
----------
m : array_like, shape (M, M)
Input array.
k : int, optional
Diagonal above which to zero elements. `k = 0` (the default) is the
main diagonal, `k < 0` is below it and `k > 0` is above.
Returns
-------
tril : ndarray, shape (M, M)
Lower triangle of `m`, of same shape and data-type as `m`.
See Also
--------
triu : upper triangle of an array
"""
if m.ndim != 2:
raise ValueError('input must be 2 dimensional')
if not len(set(m.chunks[0] + m.chunks[1])) == 1:
msg = ('All chunks must be a square matrix to perform lu decomposition. '
'Use .rechunk method to change the size of chunks.')
raise ValueError(msg)
rdim = len(m.chunks[0])
hdim = len(m.chunks[1])
chunk = m.chunks[0][0]
token = tokenize(m, k)
name = 'tril-' + token
dsk = {}
for i in range(rdim):
for j in range(hdim):
if chunk * (j - i + 1) < k:
dsk[(name, i, j)] = (m.name, i, j)
elif chunk * (j - i - 1) < k <= chunk * (j - i + 1):
dsk[(name, i, j)] = (np.tril, (m.name, i, j), k - (chunk * (j - i)))
else:
dsk[(name, i, j)] = (np.zeros, (m.chunks[0][i], m.chunks[1][j]))
graph = HighLevelGraph.from_collections(name, dsk, dependencies=[m])
return Array(graph, name, shape=m.shape, chunks=m.chunks, dtype=m.dtype)
def _np_fromfunction(func, shape, dtype, offset, func_kwargs):
def offset_func(*args, **kwargs):
args2 = list(map(add, args, offset))
return func(*args2, **kwargs)
return np.fromfunction(offset_func, shape, dtype=dtype, **func_kwargs)
@wraps(np.fromfunction)
def fromfunction(func, chunks='auto', shape=None, dtype=None, **kwargs):
chunks = normalize_chunks(chunks, shape)
name = 'fromfunction-' + tokenize(func, chunks, shape, dtype, kwargs)
keys = list(product([name], *[range(len(bd)) for bd in chunks]))
aggdims = [list(accumulate(add, (0,) + bd[:-1])) for bd in chunks]
offsets = list(product(*aggdims))
shapes = list(product(*chunks))
dtype = dtype or float
values = [(_np_fromfunction, func, shp, dtype, offset, kwargs)
for offset, shp in zip(offsets, shapes)]
dsk = dict(zip(keys, values))
return Array(dsk, name, chunks, dtype=dtype)
@wraps(np.repeat)
def repeat(a, repeats, axis=None):
if axis is None:
if a.ndim == 1:
axis = 0
else:
raise NotImplementedError("Must supply an integer axis value")
if not isinstance(repeats, Integral):
raise NotImplementedError("Only integer valued repeats supported")
if -a.ndim <= axis < 0:
axis += a.ndim
elif not 0 <= axis <= a.ndim - 1:
raise ValueError("axis(=%d) out of bounds" % axis)
if repeats == 1:
return a
cchunks = np.cumsum((0,) + a.chunks[axis])
slices = []
for c_start, c_stop in sliding_window(2, cchunks):
ls = np.linspace(c_start, c_stop, repeats).round(0)
for ls_start, ls_stop in sliding_window(2, ls):
if ls_start != ls_stop:
slices.append(slice(ls_start, ls_stop))
all_slice = slice(None, None, None)
slices = [(all_slice,) * axis + (s,) + (all_slice,) * (a.ndim - axis - 1)
for s in slices]
slabs = [a[slc] for slc in slices]
out = []
for slab in slabs:
chunks = list(slab.chunks)
assert len(chunks[axis]) == 1
chunks[axis] = (chunks[axis][0] * repeats,)
chunks = tuple(chunks)
result = slab.map_blocks(np.repeat, repeats, axis=axis, chunks=chunks,
dtype=slab.dtype)
out.append(result)
return concatenate(out, axis=axis)
@wraps(np.tile)
def tile(A, reps):
if not isinstance(reps, Integral):
raise NotImplementedError("Only integer valued `reps` supported.")
if reps < 0:
raise ValueError("Negative `reps` are not allowed.")
elif reps == 0:
return A[..., :0]
elif reps == 1:
return A
return concatenate(reps * [A], axis=-1)
def expand_pad_value(array, pad_value):
if isinstance(pad_value, Number):
pad_value = array.ndim * ((pad_value, pad_value),)
elif (isinstance(pad_value, Sequence) and
all(isinstance(pw, Number) for pw in pad_value) and
len(pad_value) == 1):
pad_value = array.ndim * ((pad_value[0], pad_value[0]),)
elif (isinstance(pad_value, Sequence) and
len(pad_value) == 2 and
all(isinstance(pw, Number) for pw in pad_value)):
pad_value = tuple(
(pad_value[0], pad_value[1]) for _ in range(array.ndim)
)
elif (isinstance(pad_value, Sequence) and
len(pad_value) == array.ndim and
all(isinstance(pw, Sequence) for pw in pad_value) and
all((len(pw) == 2) for pw in pad_value) and
all(all(isinstance(w, Number) for w in pw) for pw in pad_value)):
pad_value = tuple((pw[0], pw[1]) for pw in pad_value)
else:
raise TypeError(
"`pad_value` must be composed of integral typed values."
)
return pad_value
def get_pad_shapes_chunks(array, pad_width, axes):
"""
Helper function for finding shapes and chunks of end pads.
"""
pad_shapes = [list(array.shape), list(array.shape)]
pad_chunks = [list(array.chunks), list(array.chunks)]
for d in axes:
for i in range(2):
pad_shapes[i][d] = pad_width[d][i]
pad_chunks[i][d] = (pad_width[d][i],)
pad_shapes = [tuple(s) for s in pad_shapes]
pad_chunks = [tuple(c) for c in pad_chunks]
return pad_shapes, pad_chunks
def linear_ramp_chunk(start, stop, num, dim, step):
"""
Helper function to find the linear ramp for a chunk.
"""
num1 = num + 1
shape = list(start.shape)
shape[dim] = num
shape = tuple(shape)
dtype = np.dtype(start.dtype)
result = np.empty(shape, dtype=dtype)
for i in np.ndindex(start.shape):
j = list(i)
j[dim] = slice(None)
j = tuple(j)
result[j] = np.linspace(start[i], stop, num1, dtype=dtype)[1:][::step]
return result
def pad_edge(array, pad_width, mode, *args):
"""
Helper function for padding edges.
Handles the cases where the only the values on the edge are needed.
"""
args = tuple(expand_pad_value(array, e) for e in args)
result = array
for d in range(array.ndim):
pad_shapes, pad_chunks = get_pad_shapes_chunks(result, pad_width, (d,))
pad_arrays = [result, result]
if mode == "constant":
constant_values = args[0][d]
constant_values = [
asarray(c).astype(result.dtype) for c in constant_values
]
pad_arrays = [
broadcast_to(v, s, c)
for v, s, c in zip(constant_values, pad_shapes, pad_chunks)
]
elif mode in ["edge", "linear_ramp"]:
pad_slices = [
result.ndim * [slice(None)], result.ndim * [slice(None)]
]
pad_slices[0][d] = slice(None, 1, None)
pad_slices[1][d] = slice(-1, None, None)
pad_slices = [tuple(sl) for sl in pad_slices]
pad_arrays = [result[sl] for sl in pad_slices]
if mode == "edge":
pad_arrays = [
broadcast_to(a, s, c)
for a, s, c in zip(pad_arrays, pad_shapes, pad_chunks)
]
elif mode == "linear_ramp":
end_values = args[0][d]
pad_arrays = [
a.map_blocks(
linear_ramp_chunk, ev, pw,
chunks=c, dtype=result.dtype, dim=d, step=(2 * i - 1)
)
for i, (a, ev, pw, c) in enumerate(
zip(pad_arrays, end_values, pad_width[d], pad_chunks)
)
]
result = concatenate([pad_arrays[0], result, pad_arrays[1]], axis=d)
return result
def pad_reuse(array, pad_width, mode, *args):
"""
Helper function for padding boundaries with values in the array.
Handles the cases where the padding is constructed from values in
the array. Namely by reflecting them or tiling them to create periodic
boundary constraints.
"""
if mode in ["reflect", "symmetric"] and "odd" in args:
raise NotImplementedError(
"`pad` does not support `reflect_type` of `odd`."
)
result = np.empty(array.ndim * (3,), dtype=object)
for idx in np.ndindex(result.shape):
select = []
orient = []
for i, s, pw in zip(idx, array.shape, pad_width):
if mode == "wrap":
pw = pw[::-1]
if i < 1:
if mode == "reflect":
select.append(slice(1, pw[0] + 1, None))
else:
select.append(slice(None, pw[0], None))
elif i > 1:
if mode == "reflect":
select.append(slice(s - pw[1] - 1, s - 1, None))
else:
select.append(slice(s - pw[1], None, None))
else:
select.append(slice(None))
if i != 1 and mode in ["reflect", "symmetric"]:
orient.append(slice(None, None, -1))
else:
orient.append(slice(None))
select = tuple(select)
orient = tuple(orient)
if mode == "wrap":
idx = tuple(2 - i for i in idx)
result[idx] = array[select][orient]
result = block(result.tolist())
return result
def pad_stats(array, pad_width, mode, *args):
"""
Helper function for padding boundaries with statistics from the array.
In cases where the padding requires computations of statistics from part
or all of the array, this function helps compute those statistics as
requested and then adds those statistics onto the boundaries of the array.
"""
if mode == "median":
raise NotImplementedError("`pad` does not support `mode` of `median`.")
stat_length = expand_pad_value(array, args[0])
result = np.empty(array.ndim * (3,), dtype=object)
for idx in np.ndindex(result.shape):
axes = []
select = []
pad_shape = []
pad_chunks = []
for d, (i, s, c, w, l) in enumerate(zip(
idx, array.shape, array.chunks, pad_width, stat_length
)):
if i < 1:
axes.append(d)
select.append(slice(None, l[0], None))
pad_shape.append(w[0])
pad_chunks.append(w[0])
elif i > 1:
axes.append(d)
select.append(slice(s - l[1], None, None))
pad_shape.append(w[1])
pad_chunks.append(w[1])
else:
select.append(slice(None))
pad_shape.append(s)
pad_chunks.append(c)
axes = tuple(axes)
select = tuple(select)
pad_shape = tuple(pad_shape)
pad_chunks = tuple(pad_chunks)
result_idx = array[select]
if mode == "maximum":
result_idx = result_idx.max(axis=axes, keepdims=True)
elif mode == "mean":
result_idx = result_idx.mean(axis=axes, keepdims=True)
elif mode == "minimum":
result_idx = result_idx.min(axis=axes, keepdims=True)
result_idx = broadcast_to(result_idx, pad_shape, chunks=pad_chunks)
result[idx] = result_idx
result = block(result.tolist())
return result
def wrapped_pad_func(array, pad_func, iaxis_pad_width, iaxis, pad_func_kwargs):
result = np.empty_like(array)
for i in np.ndindex(array.shape[:iaxis] + array.shape[iaxis + 1:]):
i = i[:iaxis] + (slice(None),) + i[iaxis:]
result[i] = pad_func(array[i], iaxis_pad_width, iaxis, pad_func_kwargs)
return result
def pad_udf(array, pad_width, mode, **kwargs):
"""
Helper function for padding boundaries with a user defined function.
In cases where the padding requires a custom user defined function be
applied to the array, this function assists in the prepping and
application of this function to the Dask Array to construct the desired
boundaries.
"""
result = pad_edge(array, pad_width, "constant", 0)
chunks = result.chunks
for d in range(result.ndim):
result = result.rechunk(
chunks[:d] + (result.shape[d:d + 1],) + chunks[d + 1:]
)
result = result.map_blocks(
wrapped_pad_func,
name="pad",
dtype=result.dtype,
pad_func=mode,
iaxis_pad_width=pad_width[d],
iaxis=d,
pad_func_kwargs=kwargs,
)
result = result.rechunk(chunks)
return result
@wraps(np.pad)
def pad(array, pad_width, mode, **kwargs):
array = asarray(array)
pad_width = expand_pad_value(array, pad_width)
if mode in ["maximum", "mean", "median", "minimum"]:
kwargs.setdefault("stat_length", array.shape)
elif mode == "constant":
kwargs.setdefault("constant_values", 0)
elif mode == "linear_ramp":
kwargs.setdefault("end_values", 0)
elif mode in ["reflect", "symmetric"]:
kwargs.setdefault("reflect_type", "even")
elif mode in ["edge", "wrap"]:
if kwargs:
raise TypeError("Got unsupported keyword arguments.")
elif callable(mode):
kwargs.setdefault("kwargs", {})
else:
raise ValueError("Got an unsupported `mode`.")
if not callable(mode) and len(kwargs) > 1:
raise TypeError("Got too many keyword arguments.")
if mode in ["maximum", "mean", "median", "minimum"]:
return pad_stats(array, pad_width, mode, *kwargs.values())
elif mode in ["constant", "edge", "linear_ramp"]:
return pad_edge(array, pad_width, mode, *kwargs.values())
elif mode in ["reflect", "symmetric", "wrap"]:
return pad_reuse(array, pad_width, mode, *kwargs.values())
elif callable(mode):
return pad_udf(array, pad_width, mode, **kwargs)
else:
raise ValueError("Unsupported mode selected.")