You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ORPA-pyOpenRPA/WPy32-3720/python-3.7.2/Lib/site-packages/dask/array/slicing.py

1039 lines
33 KiB

from __future__ import absolute_import, division, print_function
from itertools import product
import math
from numbers import Integral, Number
from operator import getitem, itemgetter
import warnings
import numpy as np
from toolz import memoize, merge, pluck, concat
from .. import core
from ..highlevelgraph import HighLevelGraph
from ..base import tokenize, is_dask_collection
colon = slice(None, None, None)
def _sanitize_index_element(ind):
"""Sanitize a one-element index."""
if isinstance(ind, Number):
ind2 = int(ind)
if ind2 != ind:
raise IndexError("Bad index. Must be integer-like: %s" % ind)
else:
return ind2
elif ind is None:
return None
else:
raise TypeError("Invalid index type", type(ind), ind)
def sanitize_index(ind):
""" Sanitize the elements for indexing along one axis
>>> sanitize_index([2, 3, 5])
array([2, 3, 5])
>>> sanitize_index([True, False, True, False])
array([0, 2])
>>> sanitize_index(np.array([1, 2, 3]))
array([1, 2, 3])
>>> sanitize_index(np.array([False, True, True]))
array([1, 2])
>>> type(sanitize_index(np.int32(0)))
<class 'int'>
>>> sanitize_index(1.0)
1
>>> sanitize_index(0.5)
Traceback (most recent call last):
...
IndexError: Bad index. Must be integer-like: 0.5
"""
if ind is None:
return None
elif isinstance(ind, slice):
return slice(_sanitize_index_element(ind.start),
_sanitize_index_element(ind.stop),
_sanitize_index_element(ind.step))
elif isinstance(ind, Number):
return _sanitize_index_element(ind)
elif is_dask_collection(ind):
return ind
index_array = np.asanyarray(ind)
if index_array.dtype == bool:
nonzero = np.nonzero(index_array)
if len(nonzero) == 1:
# If a 1-element tuple, unwrap the element
nonzero = nonzero[0]
return np.asanyarray(nonzero)
elif np.issubdtype(index_array.dtype, np.integer):
return index_array
elif np.issubdtype(index_array.dtype, np.floating):
int_index = index_array.astype(np.intp)
if np.allclose(index_array, int_index):
return int_index
else:
check_int = np.isclose(index_array, int_index)
first_err = index_array.ravel(
)[np.flatnonzero(~check_int)[0]]
raise IndexError("Bad index. Must be integer-like: %s" %
first_err)
else:
raise TypeError("Invalid index type", type(ind), ind)
def slice_array(out_name, in_name, blockdims, index):
"""
Master function for array slicing
This function makes a new dask that slices blocks along every
dimension and aggregates (via cartesian product) each dimension's
slices so that the resulting block slices give the same results
as the original slice on the original structure
Index must be a tuple. It may contain the following types
int, slice, list (at most one list), None
Parameters
----------
in_name - string
This is the dask variable name that will be used as input
out_name - string
This is the dask variable output name
blockshape - iterable of integers
index - iterable of integers, slices, lists, or None
Returns
-------
Dict where the keys are tuples of
(out_name, dim_index[, dim_index[, ...]])
and the values are
(function, (in_name, dim_index, dim_index, ...),
(slice(...), [slice()[,...]])
Also new blockdims with shapes of each block
((10, 10, 10, 10), (20, 20))
Examples
--------
>>> dsk, blockdims = slice_array('y', 'x', [(20, 20, 20, 20, 20)],
... (slice(10, 35),)) # doctest: +SKIP
>>> dsk # doctest: +SKIP
{('y', 0): (getitem, ('x', 0), (slice(10, 20),)),
('y', 1): (getitem, ('x', 1), (slice(0, 15),))}
>>> blockdims # doctest: +SKIP
((10, 15),)
See Also
--------
This function works by successively unwrapping cases and passing down
through a sequence of functions.
slice_with_newaxis - handle None/newaxis case
slice_wrap_lists - handle fancy indexing with lists
slice_slices_and_integers - handle everything else
"""
blockdims = tuple(map(tuple, blockdims))
# x[:, :, :] - Punt and return old value
if all(isinstance(index, slice) and index == slice(None, None, None)
for index in index):
suffixes = product(*[range(len(bd)) for bd in blockdims])
dsk = dict(((out_name,) + s, (in_name,) + s)
for s in suffixes)
return dsk, blockdims
# Add in missing colons at the end as needed. x[5] -> x[5, :, :]
not_none_count = sum(i is not None for i in index)
missing = len(blockdims) - not_none_count
index += (slice(None, None, None),) * missing
# Pass down to next function
dsk_out, bd_out = slice_with_newaxes(out_name, in_name, blockdims, index)
bd_out = tuple(map(tuple, bd_out))
return dsk_out, bd_out
def slice_with_newaxes(out_name, in_name, blockdims, index):
"""
Handle indexing with Nones
Strips out Nones then hands off to slice_wrap_lists
"""
# Strip Nones from index
index2 = tuple([ind for ind in index if ind is not None])
where_none = [i for i, ind in enumerate(index) if ind is None]
where_none_orig = list(where_none)
for i, x in enumerate(where_none):
n = sum(isinstance(ind, Integral) for ind in index[:x])
if n:
where_none[i] -= n
# Pass down and do work
dsk, blockdims2 = slice_wrap_lists(out_name, in_name, blockdims, index2)
if where_none:
expand = expander(where_none)
expand_orig = expander(where_none_orig)
# Insert ",0" into the key: ('x', 2, 3) -> ('x', 0, 2, 0, 3)
dsk2 = {(out_name,) + expand(k[1:], 0):
(v[:2] + (expand_orig(v[2], None),))
for k, v in dsk.items()
if k[0] == out_name}
# Add back intermediate parts of the dask that weren't the output
dsk3 = merge(dsk2, {k: v for k, v in dsk.items() if k[0] != out_name})
# Insert (1,) into blockdims: ((2, 2), (3, 3)) -> ((2, 2), (1,), (3, 3))
blockdims3 = expand(blockdims2, (1,))
return dsk3, blockdims3
else:
return dsk, blockdims2
def slice_wrap_lists(out_name, in_name, blockdims, index):
"""
Fancy indexing along blocked array dasks
Handles index of type list. Calls slice_slices_and_integers for the rest
See Also
--------
take - handle slicing with lists ("fancy" indexing)
slice_slices_and_integers - handle slicing with slices and integers
"""
assert all(isinstance(i, (slice, list, Integral, np.ndarray))
for i in index)
if not len(blockdims) == len(index):
raise IndexError("Too many indices for array")
# Do we have more than one list in the index?
where_list = [i for i, ind in enumerate(index)
if isinstance(ind, np.ndarray) and ind.ndim > 0]
if len(where_list) > 1:
raise NotImplementedError("Don't yet support nd fancy indexing")
# Is the single list an empty list? In this case just treat it as a zero
# length slice
if where_list and not index[where_list[0]].size:
index = list(index)
index[where_list.pop()] = slice(0, 0, 1)
index = tuple(index)
# No lists, hooray! just use slice_slices_and_integers
if not where_list:
return slice_slices_and_integers(out_name, in_name, blockdims, index)
# Replace all lists with full slices [3, 1, 0] -> slice(None, None, None)
index_without_list = tuple(slice(None, None, None)
if isinstance(i, np.ndarray) else i
for i in index)
# lists and full slices. Just use take
if all(isinstance(i, np.ndarray) or i == slice(None, None, None)
for i in index):
axis = where_list[0]
blockdims2, dsk3 = take(out_name, in_name, blockdims,
index[where_list[0]], axis=axis)
# Mixed case. Both slices/integers and lists. slice/integer then take
else:
# Do first pass without lists
tmp = 'slice-' + tokenize((out_name, in_name, blockdims, index))
dsk, blockdims2 = slice_slices_and_integers(tmp, in_name, blockdims, index_without_list)
# After collapsing some axes due to int indices, adjust axis parameter
axis = where_list[0]
axis2 = axis - sum(1 for i, ind in enumerate(index)
if i < axis and isinstance(ind, Integral))
# Do work
blockdims2, dsk2 = take(out_name, tmp, blockdims2, index[axis],
axis=axis2)
dsk3 = merge(dsk, dsk2)
return dsk3, blockdims2
def slice_slices_and_integers(out_name, in_name, blockdims, index):
"""
Dask array indexing with slices and integers
See Also
--------
_slice_1d
"""
shape = tuple(map(sum, blockdims))
for dim, ind in zip(shape, index):
if np.isnan(dim) and ind != slice(None, None, None):
raise ValueError("Arrays chunk sizes are unknown: %s", shape)
assert all(isinstance(ind, (slice, Integral)) for ind in index)
assert len(index) == len(blockdims)
# Get a list (for each dimension) of dicts{blocknum: slice()}
block_slices = list(map(_slice_1d, shape, blockdims, index))
sorted_block_slices = [sorted(i.items()) for i in block_slices]
# (in_name, 1, 1, 2), (in_name, 1, 1, 4), (in_name, 2, 1, 2), ...
in_names = list(product([in_name], *[pluck(0, s) for s in sorted_block_slices]))
# (out_name, 0, 0, 0), (out_name, 0, 0, 1), (out_name, 0, 1, 0), ...
out_names = list(product([out_name],
*[range(len(d))[::-1] if i.step and i.step < 0 else range(len(d))
for d, i in zip(block_slices, index)
if not isinstance(i, Integral)]))
all_slices = list(product(*[pluck(1, s) for s in sorted_block_slices]))
dsk_out = {out_name: (getitem, in_name, slices)
for out_name, in_name, slices
in zip(out_names, in_names, all_slices)}
new_blockdims = [new_blockdim(d, db, i)
for d, i, db in zip(shape, index, blockdims)
if not isinstance(i, Integral)]
return dsk_out, new_blockdims
def _slice_1d(dim_shape, lengths, index):
"""Returns a dict of {blocknum: slice}
This function figures out where each slice should start in each
block for a single dimension. If the slice won't return any elements
in the block, that block will not be in the output.
Parameters
----------
dim_shape - the number of elements in this dimension.
This should be a positive, non-zero integer
blocksize - the number of elements per block in this dimension
This should be a positive, non-zero integer
index - a description of the elements in this dimension that we want
This might be an integer, a slice(), or an Ellipsis
Returns
-------
dictionary where the keys are the integer index of the blocks that
should be sliced and the values are the slices
Examples
--------
Trivial slicing
>>> _slice_1d(100, [60, 40], slice(None, None, None))
{0: slice(None, None, None), 1: slice(None, None, None)}
100 length array cut into length 20 pieces, slice 0:35
>>> _slice_1d(100, [20, 20, 20, 20, 20], slice(0, 35))
{0: slice(None, None, None), 1: slice(0, 15, 1)}
Support irregular blocks and various slices
>>> _slice_1d(100, [20, 10, 10, 10, 25, 25], slice(10, 35))
{0: slice(10, 20, 1), 1: slice(None, None, None), 2: slice(0, 5, 1)}
Support step sizes
>>> _slice_1d(100, [15, 14, 13], slice(10, 41, 3))
{0: slice(10, 15, 3), 1: slice(1, 14, 3), 2: slice(2, 12, 3)}
>>> _slice_1d(100, [20, 20, 20, 20, 20], slice(0, 100, 40)) # step > blocksize
{0: slice(0, 20, 40), 2: slice(0, 20, 40), 4: slice(0, 20, 40)}
Also support indexing single elements
>>> _slice_1d(100, [20, 20, 20, 20, 20], 25)
{1: 5}
And negative slicing
>>> _slice_1d(100, [20, 20, 20, 20, 20], slice(100, 0, -3)) # doctest: +NORMALIZE_WHITESPACE
{4: slice(-1, -21, -3),
3: slice(-2, -21, -3),
2: slice(-3, -21, -3),
1: slice(-1, -21, -3),
0: slice(-2, -20, -3)}
>>> _slice_1d(100, [20, 20, 20, 20, 20], slice(100, 12, -3)) # doctest: +NORMALIZE_WHITESPACE
{4: slice(-1, -21, -3),
3: slice(-2, -21, -3),
2: slice(-3, -21, -3),
1: slice(-1, -21, -3),
0: slice(-2, -8, -3)}
>>> _slice_1d(100, [20, 20, 20, 20, 20], slice(100, -12, -3))
{4: slice(-1, -12, -3)}
"""
chunk_boundaries = np.cumsum(lengths, dtype=np.int64)
if isinstance(index, Integral):
# use right-side search to be consistent with previous result
i = chunk_boundaries.searchsorted(index, side='right')
if i > 0:
# the very first chunk has no relative shift
ind = index - chunk_boundaries[i - 1]
else:
ind = index
return {int(i): int(ind)}
assert isinstance(index, slice)
if index == colon:
return {k: colon for k in range(len(lengths))}
step = index.step or 1
if step > 0:
start = index.start or 0
stop = index.stop if index.stop is not None else dim_shape
else:
start = index.start if index.start is not None else dim_shape - 1
start = dim_shape - 1 if start >= dim_shape else start
stop = -(dim_shape + 1) if index.stop is None else index.stop
# posify start and stop
if start < 0:
start += dim_shape
if stop < 0:
stop += dim_shape
d = dict()
if step > 0:
istart = chunk_boundaries.searchsorted(start, side='right')
istop = chunk_boundaries.searchsorted(stop, side='left')
# the bound is not exactly tight; make it tighter?
istop = min(istop + 1, len(lengths))
# jump directly to istart
if istart > 0:
start = start - chunk_boundaries[istart - 1]
stop = stop - chunk_boundaries[istart - 1]
for i in range(istart, istop):
length = lengths[i]
if start < length and stop > 0:
d[i] = slice(start, min(stop, length), step)
start = (start - length) % step
else:
start = start - length
stop -= length
else:
rstart = start # running start
istart = chunk_boundaries.searchsorted(start, side='left')
istop = chunk_boundaries.searchsorted(stop, side='right')
# the bound is not exactly tight; make it tighter?
istart = min(istart + 1, len(chunk_boundaries) - 1)
istop = max(istop - 1, -1)
for i in range(istart, istop, -1):
chunk_stop = chunk_boundaries[i]
# create a chunk start and stop
if i == 0:
chunk_start = 0
else:
chunk_start = chunk_boundaries[i - 1]
# if our slice is in this chunk
if (chunk_start <= rstart < chunk_stop) and (rstart > stop):
d[i] = slice(rstart - chunk_stop,
max(chunk_start - chunk_stop - 1,
stop - chunk_stop),
step)
# compute the next running start point,
offset = (rstart - (chunk_start - 1)) % step
rstart = chunk_start + offset - 1
# replace 0:20:1 with : if appropriate
for k, v in d.items():
if v == slice(0, lengths[k], 1):
d[k] = slice(None, None, None)
if not d: # special case x[:0]
d[0] = slice(0, 0, 1)
return d
def partition_by_size(sizes, seq):
"""
>>> partition_by_size([10, 20, 10], [1, 5, 9, 12, 29, 35])
[array([1, 5, 9]), array([ 2, 19]), array([5])]
"""
seq = np.asanyarray(seq)
left = np.empty(len(sizes) + 1, dtype=int)
left[0] = 0
right = np.cumsum(sizes, out=left[1:])
locations = np.empty(len(sizes) + 1, dtype=int)
locations[0] = 0
locations[1:] = np.searchsorted(seq, right)
return [(seq[j:k] - l)
for j, k, l in zip(locations[:-1], locations[1:], left)]
def issorted(seq):
""" Is sequence sorted?
>>> issorted([1, 2, 3])
True
>>> issorted([3, 1, 2])
False
"""
if len(seq) == 0:
return True
return np.all(seq[:-1] <= seq[1:])
def slicing_plan(chunks, index):
""" Construct a plan to slice chunks with the given index
Parameters
----------
chunks : Tuple[int]
One dimensions worth of chunking information
index : np.ndarray[int]
The index passed to slice on that dimension
Returns
-------
out : List[Tuple[int, np.ndarray]]
A list of chunk/sub-index pairs corresponding to each output chunk
"""
index = np.asanyarray(index)
cum_chunks = np.cumsum(chunks)
chunk_locations = np.searchsorted(cum_chunks, index, side='right')
where = np.where(np.diff(chunk_locations))[0] + 1
where = np.concatenate([[0], where, [len(chunk_locations)]])
out = []
for i in range(len(where) - 1):
sub_index = index[where[i]:where[i + 1]]
chunk = chunk_locations[where[i]]
if chunk > 0:
sub_index = sub_index - cum_chunks[chunk - 1]
out.append((chunk, sub_index))
return out
def take(outname, inname, chunks, index, axis=0):
""" Index array with an iterable of index
Handles a single index by a single list
Mimics ``np.take``
>>> chunks, dsk = take('y', 'x', [(20, 20, 20, 20)], [5, 1, 47, 3], axis=0)
>>> chunks
((2, 1, 1),)
>>> dsk # doctest: +SKIP
{('y', 0): (getitem, (np.concatenate, [(getitem, ('x', 0), ([1, 3, 5],)),
(getitem, ('x', 2), ([7],))],
0),
(2, 0, 4, 1))}
When list is sorted we retain original block structure
>>> chunks, dsk = take('y', 'x', [(20, 20, 20, 20)], [1, 3, 5, 47], axis=0)
>>> chunks
((3, 1),)
>>> dsk # doctest: +SKIP
{('y', 0): (getitem, ('x', 0), ([1, 3, 5],)),
('y', 2): (getitem, ('x', 2), ([7],))}
"""
plan = slicing_plan(chunks[axis], index)
if len(plan) >= len(chunks[axis]) * 10:
factor = math.ceil(len(plan) / len(chunks[axis]))
from .core import PerformanceWarning
warnings.warn("Slicing with an out-of-order index is generating %d "
"times more chunks" % factor, PerformanceWarning,
stacklevel=6)
index_lists = [idx for _, idx in plan]
where_index = [i for i, _ in plan]
dims = [range(len(bd)) for bd in chunks]
indims = list(dims)
indims[axis] = list(range(len(where_index)))
keys = list(product([outname], *indims))
outdims = list(dims)
outdims[axis] = where_index
slices = [[colon] * len(bd) for bd in chunks]
slices[axis] = index_lists
slices = list(product(*slices))
inkeys = list(product([inname], *outdims))
values = [(getitem, inkey, slc) for inkey, slc in zip(inkeys, slices)]
chunks2 = list(chunks)
chunks2[axis] = tuple(map(len, index_lists))
dsk = dict(zip(keys, values))
return tuple(chunks2), dsk
def posify_index(shape, ind):
""" Flip negative indices around to positive ones
>>> posify_index(10, 3)
3
>>> posify_index(10, -3)
7
>>> posify_index(10, [3, -3])
array([3, 7])
>>> posify_index((10, 20), (3, -3))
(3, 17)
>>> posify_index((10, 20), (3, [3, 4, -3])) # doctest: +NORMALIZE_WHITESPACE
(3, array([ 3, 4, 17]))
"""
if isinstance(ind, tuple):
return tuple(map(posify_index, shape, ind))
if isinstance(ind, Integral):
if ind < 0 and not math.isnan(shape):
return ind + shape
else:
return ind
if isinstance(ind, (np.ndarray, list)) and not math.isnan(shape):
ind = np.asanyarray(ind)
return np.where(ind < 0, ind + shape, ind)
return ind
@memoize
def _expander(where):
if not where:
def expand(seq, val):
return seq
return expand
else:
decl = """def expand(seq, val):
return ({left}) + tuple({right})
"""
left = []
j = 0
for i in range(max(where) + 1):
if i in where:
left.append("val, ")
else:
left.append("seq[%d], " % j)
j += 1
right = "seq[%d:]" % j
left = "".join(left)
decl = decl.format(**locals())
ns = {}
exec(compile(decl, "<dynamic>", "exec"), ns, ns)
return ns['expand']
def expander(where):
"""Create a function to insert value at many locations in sequence.
>>> expander([0, 2])(['a', 'b', 'c'], 'z')
('z', 'a', 'z', 'b', 'c')
"""
return _expander(tuple(where))
def new_blockdim(dim_shape, lengths, index):
"""
>>> new_blockdim(100, [20, 10, 20, 10, 40], slice(0, 90, 2))
[10, 5, 10, 5, 15]
>>> new_blockdim(100, [20, 10, 20, 10, 40], [5, 1, 30, 22])
[4]
>>> new_blockdim(100, [20, 10, 20, 10, 40], slice(90, 10, -2))
[16, 5, 10, 5, 4]
"""
if index == slice(None, None, None):
return lengths
if isinstance(index, list):
return [len(index)]
assert not isinstance(index, Integral)
pairs = sorted(_slice_1d(dim_shape, lengths, index).items(),
key=itemgetter(0))
slices = [slice(0, lengths[i], 1) if slc == slice(None, None, None) else slc
for i, slc in pairs]
if isinstance(index, slice) and index.step and index.step < 0:
slices = slices[::-1]
return [int(math.ceil((1. * slc.stop - slc.start) / slc.step)) for slc in slices]
def replace_ellipsis(n, index):
""" Replace ... with slices, :, : ,:
>>> replace_ellipsis(4, (3, Ellipsis, 2))
(3, slice(None, None, None), slice(None, None, None), 2)
>>> replace_ellipsis(2, (Ellipsis, None))
(slice(None, None, None), slice(None, None, None), None)
"""
# Careful about using in or index because index may contain arrays
isellipsis = [i for i, ind in enumerate(index) if ind is Ellipsis]
if not isellipsis:
return index
else:
loc = isellipsis[0]
extra_dimensions = n - (len(index) - sum(i is None for i in index) - 1)
return (index[:loc] + (slice(None, None, None),) * extra_dimensions +
index[loc + 1:])
def normalize_slice(idx, dim):
""" Normalize slices to canonical form
Parameters
----------
idx: slice or other index
dim: dimension length
Examples
--------
>>> normalize_slice(slice(0, 10, 1), 10)
slice(None, None, None)
"""
if isinstance(idx, slice):
if math.isnan(dim):
return idx
start, stop, step = idx.indices(dim)
if step > 0:
if start == 0:
start = None
if stop >= dim:
stop = None
if step == 1:
step = None
if stop is not None and start is not None and stop < start:
stop = start
elif step < 0:
if start >= dim - 1:
start = None
if stop < 0:
stop = None
return slice(start, stop, step)
return idx
def normalize_index(idx, shape):
""" Normalize slicing indexes
1. Replaces ellipses with many full slices
2. Adds full slices to end of index
3. Checks bounding conditions
4. Replaces numpy arrays with lists
5. Posify's integers and lists
6. Normalizes slices to canonical form
Examples
--------
>>> normalize_index(1, (10,))
(1,)
>>> normalize_index(-1, (10,))
(9,)
>>> normalize_index([-1], (10,))
(array([9]),)
>>> normalize_index(slice(-3, 10, 1), (10,))
(slice(7, None, None),)
>>> normalize_index((Ellipsis, None), (10,))
(slice(None, None, None), None)
"""
if not isinstance(idx, tuple):
idx = (idx,)
idx = replace_ellipsis(len(shape), idx)
n_sliced_dims = 0
for i in idx:
if hasattr(i, 'ndim') and i.ndim >= 1:
n_sliced_dims += i.ndim
elif i is None:
continue
else:
n_sliced_dims += 1
idx = idx + (slice(None),) * (len(shape) - n_sliced_dims)
if len([i for i in idx if i is not None]) > len(shape):
raise IndexError("Too many indices for array")
none_shape = []
i = 0
for ind in idx:
if ind is not None:
none_shape.append(shape[i])
i += 1
else:
none_shape.append(None)
for i, d in zip(idx, none_shape):
if d is not None:
check_index(i, d)
idx = tuple(map(sanitize_index, idx))
idx = tuple(map(normalize_slice, idx, none_shape))
idx = posify_index(none_shape, idx)
return idx
def check_index(ind, dimension):
""" Check validity of index for a given dimension
Examples
--------
>>> check_index(3, 5)
>>> check_index(5, 5)
Traceback (most recent call last):
...
IndexError: Index is not smaller than dimension 5 >= 5
>>> check_index(6, 5)
Traceback (most recent call last):
...
IndexError: Index is not smaller than dimension 6 >= 5
>>> check_index(-1, 5)
>>> check_index(-6, 5)
Traceback (most recent call last):
...
IndexError: Negative index is not greater than negative dimension -6 <= -5
>>> check_index([1, 2], 5)
>>> check_index([6, 3], 5)
Traceback (most recent call last):
...
IndexError: Index out of bounds 5
>>> check_index(slice(0, 3), 5)
>>> check_index([True], 1)
>>> check_index([True, True], 3)
Traceback (most recent call last):
...
IndexError: Boolean array length 2 doesn't equal dimension 3
>>> check_index([True, True, True], 1)
Traceback (most recent call last):
...
IndexError: Boolean array length 3 doesn't equal dimension 1
"""
# unknown dimension, assumed to be in bounds
if np.isnan(dimension):
return
elif isinstance(ind, (list, np.ndarray)):
x = np.asanyarray(ind)
if x.dtype == bool:
if x.size != dimension:
raise IndexError(
"Boolean array length %s doesn't equal dimension %s" %
(x.size, dimension))
elif (x >= dimension).any() or (x < -dimension).any():
raise IndexError("Index out of bounds %s" % dimension)
elif isinstance(ind, slice):
return
elif is_dask_collection(ind):
return
elif ind is None:
return
elif ind >= dimension:
raise IndexError("Index is not smaller than dimension %d >= %d" %
(ind, dimension))
elif ind < -dimension:
msg = "Negative index is not greater than negative dimension %d <= -%d"
raise IndexError(msg % (ind, dimension))
def slice_with_int_dask_array(x, index):
""" Slice x with at most one 1D dask arrays of ints.
This is a helper function of :meth:`Array.__getitem__`.
Parameters
----------
x: Array
index: tuple with as many elements as x.ndim, among which there are
one or more Array's with dtype=int
Returns
-------
tuple of (sliced x, new index)
where the new index is the same as the input, but with slice(None)
replaced to the original slicer where a 1D filter has been applied and
one less element where a zero-dimensional filter has been applied.
"""
from .core import Array
assert len(index) == x.ndim
fancy_indexes = [
isinstance(idx, (tuple, list)) or
(isinstance(idx, (np.ndarray, Array)) and idx.ndim > 0)
for idx in index
]
if sum(fancy_indexes) > 1:
raise NotImplementedError("Don't yet support nd fancy indexing)")
out_index = []
dropped_axis_cnt = 0
for in_axis, idx in enumerate(index):
out_axis = in_axis - dropped_axis_cnt
if isinstance(idx, Array) and idx.dtype.kind in 'iu':
if idx.ndim == 0:
idx = idx[np.newaxis]
x = slice_with_int_dask_array_on_axis(x, idx, out_axis)
x = x[tuple(
0 if i == out_axis else slice(None)
for i in range(x.ndim)
)]
dropped_axis_cnt += 1
elif idx.ndim == 1:
x = slice_with_int_dask_array_on_axis(x, idx, out_axis)
out_index.append(slice(None))
else:
raise NotImplementedError(
"Slicing with dask.array of ints only permitted when "
"the indexer has zero or one dimensions")
else:
out_index.append(idx)
return x, tuple(out_index)
def slice_with_int_dask_array_on_axis(x, idx, axis):
""" Slice a ND dask array with a 1D dask arrays of ints along the given
axis.
This is a helper function of :func:`slice_with_int_dask_array`.
"""
from .core import Array, blockwise, from_array
from . import chunk
assert 0 <= axis < x.ndim
if np.isnan(x.chunks[axis]).any():
raise NotImplementedError("Slicing an array with unknown chunks with "
"a dask.array of ints is not supported")
# Calculate the offset at which each chunk starts along axis
# e.g. chunks=(..., (5, 3, 4), ...) -> offset=[0, 5, 8]
offset = np.roll(np.cumsum(x.chunks[axis]), 1)
offset[0] = 0
offset = from_array(offset, chunks=1)
# Tamper with the declared chunks of offset to make blockwise align it with
# x[axis]
offset = Array(offset.dask, offset.name, (x.chunks[axis], ), offset.dtype)
# Define axis labels for blockwise
x_axes = tuple(range(x.ndim))
idx_axes = (x.ndim, ) # arbitrary index not already in x_axes
offset_axes = (axis, )
p_axes = x_axes[:axis + 1] + idx_axes + x_axes[axis + 1:]
y_axes = x_axes[:axis] + idx_axes + x_axes[axis + 1:]
# Calculate the cartesian product of every chunk of x vs every chunk of idx
p = blockwise(chunk.slice_with_int_dask_array,
p_axes, x, x_axes, idx, idx_axes, offset, offset_axes,
x_size=x.shape[axis], axis=axis, dtype=x.dtype)
# Aggregate on the chunks of x along axis
y = blockwise(chunk.slice_with_int_dask_array_aggregate,
y_axes, idx, idx_axes, p, p_axes,
concatenate=True, x_chunks=x.chunks[axis], axis=axis,
dtype=x.dtype)
return y
def slice_with_bool_dask_array(x, index):
""" Slice x with one or more dask arrays of bools
This is a helper function of `Array.__getitem__`.
Parameters
----------
x: Array
index: tuple with as many elements as x.ndim, among which there are
one or more Array's with dtype=bool
Returns
-------
tuple of (sliced x, new index)
where the new index is the same as the input, but with slice(None)
replaced to the original slicer when a filter has been applied.
Note: The sliced x will have nan chunks on the sliced axes.
"""
from .core import Array, blockwise, elemwise
out_index = [slice(None)
if isinstance(ind, Array) and ind.dtype == bool
else ind
for ind in index]
if len(index) == 1 and index[0].ndim == x.ndim:
y = elemwise(getitem, x, *index, dtype=x.dtype)
name = 'getitem-' + tokenize(x, index)
dsk = {(name, i): k for i, k in enumerate(core.flatten(y.__dask_keys__()))}
chunks = ((np.nan,) * y.npartitions,)
graph = HighLevelGraph.from_collections(name, dsk, dependencies=[y])
return Array(graph, name, chunks, x.dtype), out_index
if any(isinstance(ind, Array) and ind.dtype == bool and ind.ndim != 1
for ind in index):
raise NotImplementedError("Slicing with dask.array of bools only permitted when "
"the indexer has only one dimension or when "
"it has the same dimension as the sliced "
"array")
indexes = [ind
if isinstance(ind, Array) and ind.dtype == bool
else slice(None)
for ind in index]
arginds = []
i = 0
for ind in indexes:
if isinstance(ind, Array) and ind.dtype == bool:
new = (ind, tuple(range(i, i + ind.ndim)))
i += x.ndim
else:
new = (slice(None), None)
i += 1
arginds.append(new)
arginds = list(concat(arginds))
out = blockwise(getitem_variadic, tuple(range(x.ndim)), x, tuple(range(x.ndim)),
*arginds, dtype=x.dtype)
chunks = []
for ind, chunk in zip(index, out.chunks):
if isinstance(ind, Array) and ind.dtype == bool:
chunks.append((np.nan,) * len(chunk))
else:
chunks.append(chunk)
out._chunks = tuple(chunks)
return out, tuple(out_index)
def getitem_variadic(x, *index):
return x[index]