You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
3672 lines
109 KiB
3672 lines
109 KiB
from __future__ import absolute_import, division, print_function
|
|
import copy
|
|
|
|
import pytest
|
|
np = pytest.importorskip('numpy')
|
|
|
|
import os
|
|
import sys
|
|
import time
|
|
from distutils.version import LooseVersion
|
|
import operator
|
|
from operator import add, sub, getitem
|
|
from threading import Lock
|
|
import warnings
|
|
|
|
from toolz import merge, countby, concat
|
|
from toolz.curried import identity
|
|
|
|
import dask
|
|
import dask.array as da
|
|
from dask.base import tokenize, compute_as_if_collection
|
|
from dask.compatibility import PY2
|
|
from dask.delayed import Delayed, delayed
|
|
from dask.utils import ignoring, tmpfile, tmpdir, key_split
|
|
from dask.utils_test import inc, dec
|
|
|
|
from dask.array.core import (getem, getter, dotmany, concatenate3,
|
|
Array, stack, concatenate,
|
|
from_array, broadcast_shapes,
|
|
broadcast_to, blockdims_from_blockshape, store,
|
|
optimize, from_func, normalize_chunks,
|
|
broadcast_chunks, from_delayed,
|
|
common_blockdim, concatenate_axes)
|
|
from dask.blockwise import (make_blockwise_graph as top, broadcast_dimensions)
|
|
from dask.array.utils import assert_eq, same_keys
|
|
|
|
# temporary until numpy functions migrated
|
|
try:
|
|
from numpy import nancumsum, nancumprod
|
|
except ImportError: # pragma: no cover
|
|
import dask.array.numpy_compat as npcompat
|
|
nancumsum = npcompat.nancumsum
|
|
nancumprod = npcompat.nancumprod
|
|
|
|
|
|
def test_getem():
|
|
sol = {('X', 0, 0): (getter, 'X', (slice(0, 2), slice(0, 3))),
|
|
('X', 1, 0): (getter, 'X', (slice(2, 4), slice(0, 3))),
|
|
('X', 1, 1): (getter, 'X', (slice(2, 4), slice(3, 6))),
|
|
('X', 0, 1): (getter, 'X', (slice(0, 2), slice(3, 6)))}
|
|
assert getem('X', (2, 3), shape=(4, 6)) == sol
|
|
|
|
|
|
def test_top():
|
|
assert top(inc, 'z', 'ij', 'x', 'ij', numblocks={'x': (2, 2)}) == \
|
|
{('z', 0, 0): (inc, ('x', 0, 0)),
|
|
('z', 0, 1): (inc, ('x', 0, 1)),
|
|
('z', 1, 0): (inc, ('x', 1, 0)),
|
|
('z', 1, 1): (inc, ('x', 1, 1))}
|
|
|
|
assert top(add, 'z', 'ij', 'x', 'ij', 'y', 'ij',
|
|
numblocks={'x': (2, 2), 'y': (2, 2)}) == \
|
|
{('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)),
|
|
('z', 0, 1): (add, ('x', 0, 1), ('y', 0, 1)),
|
|
('z', 1, 0): (add, ('x', 1, 0), ('y', 1, 0)),
|
|
('z', 1, 1): (add, ('x', 1, 1), ('y', 1, 1))}
|
|
|
|
assert top(dotmany, 'z', 'ik', 'x', 'ij', 'y', 'jk',
|
|
numblocks={'x': (2, 2), 'y': (2, 2)}) == \
|
|
{('z', 0, 0): (dotmany, [('x', 0, 0), ('x', 0, 1)],
|
|
[('y', 0, 0), ('y', 1, 0)]),
|
|
('z', 0, 1): (dotmany, [('x', 0, 0), ('x', 0, 1)],
|
|
[('y', 0, 1), ('y', 1, 1)]),
|
|
('z', 1, 0): (dotmany, [('x', 1, 0), ('x', 1, 1)],
|
|
[('y', 0, 0), ('y', 1, 0)]),
|
|
('z', 1, 1): (dotmany, [('x', 1, 0), ('x', 1, 1)],
|
|
[('y', 0, 1), ('y', 1, 1)])}
|
|
|
|
assert top(identity, 'z', '', 'x', 'ij', numblocks={'x': (2, 2)}) ==\
|
|
{('z',): (identity, [[('x', 0, 0), ('x', 0, 1)],
|
|
[('x', 1, 0), ('x', 1, 1)]])}
|
|
|
|
|
|
def test_top_supports_broadcasting_rules():
|
|
assert top(add, 'z', 'ij', 'x', 'ij', 'y', 'ij',
|
|
numblocks={'x': (1, 2), 'y': (2, 1)}) == \
|
|
{('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)),
|
|
('z', 0, 1): (add, ('x', 0, 1), ('y', 0, 0)),
|
|
('z', 1, 0): (add, ('x', 0, 0), ('y', 1, 0)),
|
|
('z', 1, 1): (add, ('x', 0, 1), ('y', 1, 0))}
|
|
|
|
|
|
def test_top_literals():
|
|
assert top(add, 'z', 'ij', 'x', 'ij', 123, None, numblocks={'x': (2, 2)}) == \
|
|
{('z', 0, 0): (add, ('x', 0, 0), 123),
|
|
('z', 0, 1): (add, ('x', 0, 1), 123),
|
|
('z', 1, 0): (add, ('x', 1, 0), 123),
|
|
('z', 1, 1): (add, ('x', 1, 1), 123)}
|
|
|
|
|
|
def test_blockwise_literals():
|
|
x = da.ones((10, 10), chunks=(5, 5))
|
|
z = da.blockwise(add, 'ij', x, 'ij', 100, None, dtype=x.dtype)
|
|
assert_eq(z, x + 100)
|
|
|
|
z = da.blockwise(lambda x, y, z: x * y + z, 'ij', 2, None, x, 'ij', 100, None, dtype=x.dtype)
|
|
assert_eq(z, 2 * x + 100)
|
|
|
|
z = da.blockwise(getitem, 'ij', x, 'ij', slice(None), None, dtype=x.dtype)
|
|
assert_eq(z, x)
|
|
|
|
|
|
def test_concatenate3_on_scalars():
|
|
assert_eq(concatenate3([1, 2]), np.array([1, 2]))
|
|
|
|
|
|
def test_chunked_dot_product():
|
|
x = np.arange(400).reshape((20, 20))
|
|
o = np.ones((20, 20))
|
|
|
|
d = {'x': x, 'o': o}
|
|
|
|
getx = getem('x', (5, 5), shape=(20, 20))
|
|
geto = getem('o', (5, 5), shape=(20, 20))
|
|
|
|
result = top(dotmany, 'out', 'ik', 'x', 'ij', 'o', 'jk',
|
|
numblocks={'x': (4, 4), 'o': (4, 4)})
|
|
|
|
dsk = merge(d, getx, geto, result)
|
|
out = dask.get(dsk, [[('out', i, j) for j in range(4)] for i in range(4)])
|
|
|
|
assert_eq(np.dot(x, o), concatenate3(out))
|
|
|
|
|
|
def test_chunked_transpose_plus_one():
|
|
x = np.arange(400).reshape((20, 20))
|
|
|
|
d = {'x': x}
|
|
|
|
getx = getem('x', (5, 5), shape=(20, 20))
|
|
|
|
f = lambda x: x.T + 1
|
|
comp = top(f, 'out', 'ij', 'x', 'ji', numblocks={'x': (4, 4)})
|
|
|
|
dsk = merge(d, getx, comp)
|
|
out = dask.get(dsk, [[('out', i, j) for j in range(4)] for i in range(4)])
|
|
|
|
assert_eq(concatenate3(out), x.T + 1)
|
|
|
|
|
|
def test_broadcast_dimensions_works_with_singleton_dimensions():
|
|
argpairs = [('x', 'i')]
|
|
numblocks = {'x': ((1,),)}
|
|
assert broadcast_dimensions(argpairs, numblocks) == {'i': (1,)}
|
|
|
|
|
|
def test_broadcast_dimensions():
|
|
argpairs = [('x', 'ij'), ('y', 'ij')]
|
|
d = {'x': ('Hello', 1), 'y': (1, (2, 3))}
|
|
assert broadcast_dimensions(argpairs, d) == {'i': 'Hello', 'j': (2, 3)}
|
|
|
|
|
|
def test_Array():
|
|
shape = (1000, 1000)
|
|
chunks = (100, 100)
|
|
name = 'x'
|
|
dsk = merge({name: 'some-array'}, getem(name, chunks, shape=shape))
|
|
a = Array(dsk, name, chunks, shape=shape, dtype='f8')
|
|
|
|
assert a.numblocks == (10, 10)
|
|
|
|
assert a.__dask_keys__() == [[('x', i, j) for j in range(10)]
|
|
for i in range(10)]
|
|
|
|
assert a.chunks == ((100,) * 10, (100,) * 10)
|
|
|
|
assert a.shape == shape
|
|
|
|
assert len(a) == shape[0]
|
|
|
|
|
|
def test_uneven_chunks():
|
|
a = Array({}, 'x', chunks=(3, 3), shape=(10, 10), dtype='f8')
|
|
assert a.chunks == ((3, 3, 3, 1), (3, 3, 3, 1))
|
|
|
|
|
|
def test_numblocks_suppoorts_singleton_block_dims():
|
|
shape = (100, 10)
|
|
chunks = (10, 10)
|
|
name = 'x'
|
|
dsk = merge({name: 'some-array'}, getem(name, shape=shape, chunks=chunks))
|
|
a = Array(dsk, name, chunks, shape=shape, dtype='f8')
|
|
|
|
assert set(concat(a.__dask_keys__())) == {('x', i, 0) for i in range(10)}
|
|
|
|
|
|
def test_keys():
|
|
dsk = dict((('x', i, j), ()) for i in range(5) for j in range(6))
|
|
dx = Array(dsk, 'x', chunks=(10, 10), shape=(50, 60), dtype='f8')
|
|
assert dx.__dask_keys__() == [[(dx.name, i, j) for j in range(6)]
|
|
for i in range(5)]
|
|
# Cache works
|
|
assert dx.__dask_keys__() is dx.__dask_keys__()
|
|
# Test mutating names clears key cache
|
|
dx.dask = {('y', i, j): () for i in range(5) for j in range(6)}
|
|
dx.name = 'y'
|
|
assert dx.__dask_keys__() == [[(dx.name, i, j) for j in range(6)]
|
|
for i in range(5)]
|
|
d = Array({}, 'x', (), shape=(), dtype='f8')
|
|
assert d.__dask_keys__() == [('x',)]
|
|
|
|
|
|
def test_Array_computation():
|
|
a = Array({('x', 0, 0): np.eye(3)}, 'x', shape=(3, 3), chunks=(3, 3), dtype='f8')
|
|
assert_eq(np.array(a), np.eye(3))
|
|
assert isinstance(a.compute(), np.ndarray)
|
|
assert float(a[0, 0]) == 1
|
|
|
|
|
|
@pytest.mark.skipif(LooseVersion(np.__version__) < '1.14.0',
|
|
reason="NumPy doesn't have `np.linalg._umath_linalg` yet")
|
|
@pytest.mark.xfail(reason="Protect from `np.linalg._umath_linalg.inv` breaking")
|
|
def test_Array_numpy_gufunc_call__array_ufunc__01():
|
|
x = da.random.normal(size=(3, 10, 10), chunks=(2, 10, 10))
|
|
nx = x.compute()
|
|
ny = np.linalg._umath_linalg.inv(nx)
|
|
y = np.linalg._umath_linalg.inv(x, output_dtypes=float)
|
|
vy = y.compute()
|
|
assert_eq(ny, vy)
|
|
|
|
|
|
@pytest.mark.skipif(LooseVersion(np.__version__) < '1.14.0',
|
|
reason="NumPy doesn't have `np.linalg._umath_linalg` yet")
|
|
@pytest.mark.xfail(reason="Protect from `np.linalg._umath_linalg.eig` breaking")
|
|
def test_Array_numpy_gufunc_call__array_ufunc__02():
|
|
x = da.random.normal(size=(3, 10, 10), chunks=(2, 10, 10))
|
|
nx = x.compute()
|
|
nw, nv = np.linalg._umath_linalg.eig(nx)
|
|
w, v = np.linalg._umath_linalg.eig(x, output_dtypes=(float, float))
|
|
vw = w.compute()
|
|
vv = v.compute()
|
|
assert_eq(nw, vw)
|
|
assert_eq(nv, vv)
|
|
|
|
|
|
def test_stack():
|
|
a, b, c = [Array(getem(name, chunks=(2, 3), shape=(4, 6)),
|
|
name, chunks=(2, 3), dtype='f8', shape=(4, 6))
|
|
for name in 'ABC']
|
|
|
|
s = stack([a, b, c], axis=0)
|
|
|
|
colon = slice(None, None, None)
|
|
|
|
assert s.shape == (3, 4, 6)
|
|
assert s.chunks == ((1, 1, 1), (2, 2), (3, 3))
|
|
assert s.chunksize == (1, 2, 3)
|
|
assert s.dask[(s.name, 0, 1, 0)] == (getitem, ('A', 1, 0),
|
|
(None, colon, colon))
|
|
assert s.dask[(s.name, 2, 1, 0)] == (getitem, ('C', 1, 0),
|
|
(None, colon, colon))
|
|
assert same_keys(s, stack([a, b, c], axis=0))
|
|
|
|
s2 = stack([a, b, c], axis=1)
|
|
assert s2.shape == (4, 3, 6)
|
|
assert s2.chunks == ((2, 2), (1, 1, 1), (3, 3))
|
|
assert s2.chunksize == (2, 1, 3)
|
|
assert s2.dask[(s2.name, 0, 1, 0)] == (getitem, ('B', 0, 0),
|
|
(colon, None, colon))
|
|
assert s2.dask[(s2.name, 1, 1, 0)] == (getitem, ('B', 1, 0),
|
|
(colon, None, colon))
|
|
assert same_keys(s2, stack([a, b, c], axis=1))
|
|
|
|
s2 = stack([a, b, c], axis=2)
|
|
assert s2.shape == (4, 6, 3)
|
|
assert s2.chunks == ((2, 2), (3, 3), (1, 1, 1))
|
|
assert s2.chunksize == (2, 3, 1)
|
|
assert s2.dask[(s2.name, 0, 1, 0)] == (getitem, ('A', 0, 1),
|
|
(colon, colon, None))
|
|
assert s2.dask[(s2.name, 1, 1, 2)] == (getitem, ('C', 1, 1),
|
|
(colon, colon, None))
|
|
assert same_keys(s2, stack([a, b, c], axis=2))
|
|
|
|
pytest.raises(ValueError, lambda: stack([a, b, c], axis=3))
|
|
|
|
assert set(b.dask.keys()).issubset(s2.dask.keys())
|
|
|
|
assert stack([a, b, c], axis=-1).chunks == stack([a, b, c], axis=2).chunks
|
|
|
|
|
|
def test_short_stack():
|
|
x = np.array([1])
|
|
d = da.from_array(x, chunks=(1,))
|
|
s = da.stack([d])
|
|
assert s.shape == (1, 1)
|
|
chunks = compute_as_if_collection(Array, s.dask, s.__dask_keys__())
|
|
assert chunks[0][0].shape == (1, 1)
|
|
|
|
|
|
def test_stack_scalars():
|
|
d = da.arange(4, chunks=2)
|
|
|
|
s = da.stack([d.mean(), d.sum()])
|
|
|
|
assert s.compute().tolist() == [np.arange(4).mean(), np.arange(4).sum()]
|
|
|
|
|
|
def test_stack_promote_type():
|
|
i = np.arange(10, dtype='i4')
|
|
f = np.arange(10, dtype='f4')
|
|
di = da.from_array(i, chunks=5)
|
|
df = da.from_array(f, chunks=5)
|
|
res = da.stack([di, df])
|
|
assert_eq(res, np.stack([i, f]))
|
|
|
|
|
|
def test_stack_rechunk():
|
|
x = da.random.random(10, chunks=5)
|
|
y = da.random.random(10, chunks=4)
|
|
|
|
z = da.stack([x, y], axis=0)
|
|
assert z.shape == (2, 10)
|
|
assert z.chunks == ((1, 1), (4, 1, 3, 2))
|
|
|
|
assert_eq(z, np.stack([x.compute(), y.compute()], axis=0))
|
|
|
|
|
|
def test_concatenate():
|
|
a, b, c = [Array(getem(name, chunks=(2, 3), shape=(4, 6)),
|
|
name, chunks=(2, 3), dtype='f8', shape=(4, 6))
|
|
for name in 'ABC']
|
|
|
|
x = concatenate([a, b, c], axis=0)
|
|
|
|
assert x.shape == (12, 6)
|
|
assert x.chunks == ((2, 2, 2, 2, 2, 2), (3, 3))
|
|
assert x.dask[(x.name, 0, 1)] == ('A', 0, 1)
|
|
assert x.dask[(x.name, 5, 0)] == ('C', 1, 0)
|
|
assert same_keys(x, concatenate([a, b, c], axis=0))
|
|
|
|
y = concatenate([a, b, c], axis=1)
|
|
|
|
assert y.shape == (4, 18)
|
|
assert y.chunks == ((2, 2), (3, 3, 3, 3, 3, 3))
|
|
assert y.dask[(y.name, 1, 0)] == ('A', 1, 0)
|
|
assert y.dask[(y.name, 1, 5)] == ('C', 1, 1)
|
|
assert same_keys(y, concatenate([a, b, c], axis=1))
|
|
|
|
assert set(b.dask.keys()).issubset(y.dask.keys())
|
|
|
|
z = concatenate([a], axis=0)
|
|
|
|
assert z.shape == a.shape
|
|
assert z.chunks == a.chunks
|
|
assert z.dask == a.dask
|
|
assert z is a
|
|
|
|
assert (concatenate([a, b, c], axis=-1).chunks ==
|
|
concatenate([a, b, c], axis=1).chunks)
|
|
|
|
pytest.raises(ValueError, lambda: concatenate([a, b, c], axis=2))
|
|
|
|
|
|
@pytest.mark.parametrize('dtypes', [(('>f8', '>f8'), '>f8'),
|
|
(('<f4', '<f8'), '<f8')])
|
|
def test_concatenate_types(dtypes):
|
|
dts_in, dt_out = dtypes
|
|
arrs = [np.zeros(4, dtype=dt) for dt in dts_in]
|
|
darrs = [from_array(arr, chunks=(2,)) for arr in arrs]
|
|
|
|
x = concatenate(darrs, axis=0)
|
|
assert x.dtype == dt_out
|
|
|
|
|
|
def test_concatenate_unknown_axes():
|
|
dd = pytest.importorskip('dask.dataframe')
|
|
pd = pytest.importorskip('pandas')
|
|
|
|
a_df = pd.DataFrame({'x': np.arange(12)})
|
|
b_df = pd.DataFrame({'y': np.arange(12) * 10})
|
|
|
|
a_ddf = dd.from_pandas(a_df, sort=False, npartitions=3)
|
|
b_ddf = dd.from_pandas(b_df, sort=False, npartitions=3)
|
|
|
|
a_x = a_ddf.values
|
|
b_x = b_ddf.values
|
|
|
|
assert np.isnan(a_x.shape[0])
|
|
assert np.isnan(b_x.shape[0])
|
|
|
|
da.concatenate([a_x, b_x], axis=0) # works fine
|
|
|
|
with pytest.raises(ValueError) as exc_info:
|
|
da.concatenate([a_x, b_x], axis=1) # unknown chunks
|
|
|
|
assert 'nan' in str(exc_info.value)
|
|
assert 'allow_unknown_chunksize' in str(exc_info.value)
|
|
|
|
c_x = da.concatenate([a_x, b_x], axis=1, allow_unknown_chunksizes=True) # unknown chunks
|
|
|
|
assert_eq(c_x, np.concatenate([a_df.values, b_df.values], axis=1))
|
|
|
|
|
|
def test_concatenate_rechunk():
|
|
x = da.random.random((6, 6), chunks=(3, 3))
|
|
y = da.random.random((6, 6), chunks=(2, 2))
|
|
|
|
z = da.concatenate([x, y], axis=0)
|
|
assert z.shape == (12, 6)
|
|
assert z.chunks == ((3, 3, 2, 2, 2), (2, 1, 1, 2))
|
|
assert_eq(z, np.concatenate([x.compute(), y.compute()], axis=0))
|
|
|
|
z = da.concatenate([x, y], axis=1)
|
|
assert z.shape == (6, 12)
|
|
assert z.chunks == ((2, 1, 1, 2), (3, 3, 2, 2, 2))
|
|
assert_eq(z, np.concatenate([x.compute(), y.compute()], axis=1))
|
|
|
|
|
|
def test_concatenate_fixlen_strings():
|
|
x = np.array(['a', 'b', 'c'])
|
|
y = np.array(['aa', 'bb', 'cc'])
|
|
|
|
a = da.from_array(x, chunks=(2,))
|
|
b = da.from_array(y, chunks=(2,))
|
|
|
|
assert_eq(np.concatenate([x, y]),
|
|
da.concatenate([a, b]))
|
|
|
|
|
|
@pytest.mark.skipif(LooseVersion(np.__version__) < '1.13.0',
|
|
reason="NumPy doesn't support `block` yet")
|
|
def test_block_simple_row_wise():
|
|
a1 = np.ones((2, 2))
|
|
a2 = 2 * a1
|
|
|
|
d1 = da.asarray(a1)
|
|
d2 = da.asarray(a2)
|
|
|
|
expected = np.block([a1, a2])
|
|
result = da.block([d1, d2])
|
|
|
|
assert_eq(expected, result)
|
|
|
|
|
|
@pytest.mark.skipif(LooseVersion(np.__version__) < '1.13.0',
|
|
reason="NumPy doesn't support `block` yet")
|
|
def test_block_simple_column_wise():
|
|
a1 = np.ones((2, 2))
|
|
a2 = 2 * a1
|
|
|
|
d1 = da.asarray(a1)
|
|
d2 = da.asarray(a2)
|
|
|
|
expected = np.block([[a1], [a2]])
|
|
result = da.block([[d1], [d2]])
|
|
|
|
assert_eq(expected, result)
|
|
|
|
|
|
@pytest.mark.skipif(LooseVersion(np.__version__) < '1.13.0',
|
|
reason="NumPy doesn't support `block` yet")
|
|
def test_block_with_1d_arrays_row_wise():
|
|
# # # 1-D vectors are treated as row arrays
|
|
a1 = np.array([1, 2, 3])
|
|
a2 = np.array([2, 3, 4])
|
|
|
|
d1 = da.asarray(a1)
|
|
d2 = da.asarray(a2)
|
|
|
|
expected = np.block([a1, a2])
|
|
result = da.block([d1, d2])
|
|
|
|
assert_eq(expected, result)
|
|
|
|
|
|
@pytest.mark.skipif(LooseVersion(np.__version__) < '1.13.0',
|
|
reason="NumPy doesn't support `block` yet")
|
|
def test_block_with_1d_arrays_multiple_rows():
|
|
a1 = np.array([1, 2, 3])
|
|
a2 = np.array([2, 3, 4])
|
|
|
|
d1 = da.asarray(a1)
|
|
d2 = da.asarray(a2)
|
|
|
|
expected = np.block([[a1, a2], [a1, a2]])
|
|
result = da.block([[d1, d2], [d1, d2]])
|
|
|
|
assert_eq(expected, result)
|
|
|
|
|
|
@pytest.mark.skipif(LooseVersion(np.__version__) < '1.13.0',
|
|
reason="NumPy doesn't support `block` yet")
|
|
def test_block_with_1d_arrays_column_wise():
|
|
# # # 1-D vectors are treated as row arrays
|
|
a1 = np.array([1, 2, 3])
|
|
a2 = np.array([2, 3, 4])
|
|
|
|
d1 = da.asarray(a1)
|
|
d2 = da.asarray(a2)
|
|
|
|
expected = np.block([[a1], [a2]])
|
|
result = da.block([[d1], [d2]])
|
|
|
|
assert_eq(expected, result)
|
|
|
|
|
|
@pytest.mark.skipif(LooseVersion(np.__version__) < '1.13.0',
|
|
reason="NumPy doesn't support `block` yet")
|
|
def test_block_mixed_1d_and_2d():
|
|
a1 = np.ones((2, 2))
|
|
a2 = np.array([2, 2])
|
|
|
|
d1 = da.asarray(a1)
|
|
d2 = da.asarray(a2)
|
|
|
|
expected = np.block([[d1], [d2]])
|
|
result = da.block([[a1], [a2]])
|
|
|
|
assert_eq(expected, result)
|
|
|
|
|
|
@pytest.mark.skipif(LooseVersion(np.__version__) < '1.13.0',
|
|
reason="NumPy doesn't support `block` yet")
|
|
def test_block_complicated():
|
|
# a bit more complicated
|
|
a1 = np.array([[1, 1, 1]])
|
|
a2 = np.array([[2, 2, 2]])
|
|
a3 = np.array([[3, 3, 3, 3, 3, 3]])
|
|
a4 = np.array([4, 4, 4, 4, 4, 4])
|
|
a5 = np.array(5)
|
|
a6 = np.array([6, 6, 6, 6, 6])
|
|
a7 = np.zeros((2, 6))
|
|
|
|
d1 = da.asarray(a1)
|
|
d2 = da.asarray(a2)
|
|
d3 = da.asarray(a3)
|
|
d4 = da.asarray(a4)
|
|
d5 = da.asarray(a5)
|
|
d6 = da.asarray(a6)
|
|
d7 = da.asarray(a7)
|
|
|
|
expected = np.block([[a1, a2],
|
|
[a3],
|
|
[a4],
|
|
[a5, a6],
|
|
[a7]])
|
|
result = da.block([[d1, d2],
|
|
[d3],
|
|
[d4],
|
|
[d5, d6],
|
|
[d7]])
|
|
|
|
assert_eq(expected, result)
|
|
|
|
|
|
@pytest.mark.skipif(LooseVersion(np.__version__) < '1.13.0',
|
|
reason="NumPy doesn't support `block` yet")
|
|
def test_block_nested():
|
|
a1 = np.array([1, 1, 1])
|
|
a2 = np.array([[2, 2, 2], [2, 2, 2], [2, 2, 2]])
|
|
a3 = np.array([3, 3, 3])
|
|
a4 = np.array([4, 4, 4])
|
|
a5 = np.array(5)
|
|
a6 = np.array([6, 6, 6, 6, 6])
|
|
a7 = np.zeros((2, 6))
|
|
|
|
d1 = da.asarray(a1)
|
|
d2 = da.asarray(a2)
|
|
d3 = da.asarray(a3)
|
|
d4 = da.asarray(a4)
|
|
d5 = da.asarray(a5)
|
|
d6 = da.asarray(a6)
|
|
d7 = da.asarray(a7)
|
|
|
|
expected = np.block([
|
|
[
|
|
np.block([
|
|
[a1],
|
|
[a3],
|
|
[a4]
|
|
]),
|
|
a2
|
|
],
|
|
[a5, a6],
|
|
[a7]
|
|
])
|
|
result = da.block([
|
|
[
|
|
da.block([
|
|
[d1],
|
|
[d3],
|
|
[d4]
|
|
]),
|
|
d2
|
|
],
|
|
[d5, d6],
|
|
[d7]
|
|
])
|
|
|
|
assert_eq(expected, result)
|
|
|
|
|
|
@pytest.mark.skipif(LooseVersion(np.__version__) < '1.13.0',
|
|
reason="NumPy doesn't support `block` yet")
|
|
def test_block_3d():
|
|
a000 = np.ones((2, 2, 2), int) * 1
|
|
|
|
a100 = np.ones((3, 2, 2), int) * 2
|
|
a010 = np.ones((2, 3, 2), int) * 3
|
|
a001 = np.ones((2, 2, 3), int) * 4
|
|
|
|
a011 = np.ones((2, 3, 3), int) * 5
|
|
a101 = np.ones((3, 2, 3), int) * 6
|
|
a110 = np.ones((3, 3, 2), int) * 7
|
|
|
|
a111 = np.ones((3, 3, 3), int) * 8
|
|
|
|
d000 = da.asarray(a000)
|
|
|
|
d100 = da.asarray(a100)
|
|
d010 = da.asarray(a010)
|
|
d001 = da.asarray(a001)
|
|
|
|
d011 = da.asarray(a011)
|
|
d101 = da.asarray(a101)
|
|
d110 = da.asarray(a110)
|
|
|
|
d111 = da.asarray(a111)
|
|
|
|
expected = np.block([
|
|
[
|
|
[a000, a001],
|
|
[a010, a011],
|
|
],
|
|
[
|
|
[a100, a101],
|
|
[a110, a111],
|
|
]
|
|
])
|
|
result = da.block([
|
|
[
|
|
[d000, d001],
|
|
[d010, d011],
|
|
],
|
|
[
|
|
[d100, d101],
|
|
[d110, d111],
|
|
]
|
|
])
|
|
|
|
assert_eq(expected, result)
|
|
|
|
|
|
def test_block_with_mismatched_shape():
|
|
a = np.array([0, 0])
|
|
b = np.eye(2)
|
|
|
|
for arrays in [[a, b],
|
|
[b, a]]:
|
|
with pytest.raises(ValueError):
|
|
da.block(arrays)
|
|
|
|
|
|
@pytest.mark.skipif(LooseVersion(np.__version__) < '1.13.0',
|
|
reason="NumPy doesn't support `block` yet")
|
|
def test_block_no_lists():
|
|
assert_eq(da.block(1), np.block(1))
|
|
assert_eq(da.block(np.eye(3)), np.block(np.eye(3)))
|
|
|
|
|
|
def test_block_invalid_nesting():
|
|
for arrays in [
|
|
[1, [2]],
|
|
[1, []],
|
|
[[1], 2],
|
|
[[], 2],
|
|
[
|
|
[[1], [2]],
|
|
[[3, 4]],
|
|
[5] # missing brackets
|
|
],
|
|
]:
|
|
with pytest.raises(ValueError) as e:
|
|
da.block(arrays)
|
|
e.match(r'depths are mismatched')
|
|
|
|
|
|
def test_block_empty_lists():
|
|
for arrays in [
|
|
[],
|
|
[[]],
|
|
[[1], []],
|
|
]:
|
|
with pytest.raises(ValueError) as e:
|
|
da.block(arrays)
|
|
e.match(r'empty')
|
|
|
|
|
|
def test_block_tuple():
|
|
for arrays in [
|
|
([1, 2], [3, 4]),
|
|
[(1, 2), (3, 4)],
|
|
]:
|
|
with pytest.raises(TypeError) as e:
|
|
da.block(arrays)
|
|
e.match(r'tuple')
|
|
|
|
|
|
def test_broadcast_shapes():
|
|
with warnings.catch_warnings(record=True) as record:
|
|
assert () == broadcast_shapes()
|
|
assert (2, 5) == broadcast_shapes((2, 5))
|
|
assert (0, 5) == broadcast_shapes((0, 1), (1, 5))
|
|
assert np.allclose(
|
|
(2, np.nan), broadcast_shapes((1, np.nan), (2, 1)), equal_nan=True
|
|
)
|
|
assert np.allclose(
|
|
(2, np.nan), broadcast_shapes((2, 1), (1, np.nan)), equal_nan=True
|
|
)
|
|
assert (3, 4, 5) == broadcast_shapes((3, 4, 5), (4, 1), ())
|
|
assert (3, 4) == broadcast_shapes((3, 1), (1, 4), (4,))
|
|
assert (5, 6, 7, 3, 4) == broadcast_shapes((3, 1), (), (5, 6, 7, 1, 4))
|
|
|
|
assert not record
|
|
|
|
pytest.raises(ValueError, lambda: broadcast_shapes((3,), (3, 4)))
|
|
pytest.raises(ValueError, lambda: broadcast_shapes((2, 3), (2, 3, 1)))
|
|
pytest.raises(ValueError, lambda: broadcast_shapes((2, 3), (1, np.nan)))
|
|
|
|
|
|
def test_elemwise_on_scalars():
|
|
x = np.arange(10, dtype=np.int64)
|
|
a = from_array(x, chunks=(5,))
|
|
assert len(a.__dask_keys__()) == 2
|
|
assert_eq(a.sum()**2, x.sum()**2)
|
|
|
|
y = np.arange(10, dtype=np.int32)
|
|
b = from_array(y, chunks=(5,))
|
|
result = a.sum() * b
|
|
# Dask 0-d arrays do not behave like numpy scalars for type promotion
|
|
assert result.dtype == np.int64
|
|
assert result.compute().dtype == np.int64
|
|
assert (x.sum() * y).dtype == np.int32
|
|
assert_eq((x.sum() * y).astype(np.int64), result)
|
|
|
|
|
|
def test_elemwise_with_ndarrays():
|
|
x = np.arange(3)
|
|
y = np.arange(12).reshape(4, 3)
|
|
a = from_array(x, chunks=(3,))
|
|
b = from_array(y, chunks=(2, 3))
|
|
|
|
assert_eq(x + a, 2 * x)
|
|
assert_eq(a + x, 2 * x)
|
|
|
|
assert_eq(x + b, x + y)
|
|
assert_eq(b + x, x + y)
|
|
assert_eq(a + y, x + y)
|
|
assert_eq(y + a, x + y)
|
|
# Error on shape mismatch
|
|
pytest.raises(ValueError, lambda: a + y.T)
|
|
pytest.raises(ValueError, lambda: a + np.arange(2))
|
|
|
|
|
|
def test_elemwise_differently_chunked():
|
|
x = np.arange(3)
|
|
y = np.arange(12).reshape(4, 3)
|
|
a = from_array(x, chunks=(3,))
|
|
b = from_array(y, chunks=(2, 2))
|
|
|
|
assert_eq(a + b, x + y)
|
|
assert_eq(b + a, x + y)
|
|
|
|
|
|
def test_elemwise_dtype():
|
|
values = [
|
|
da.from_array(np.ones(5, np.float32), chunks=3),
|
|
da.from_array(np.ones(5, np.int16), chunks=3),
|
|
da.from_array(np.ones(5, np.int64), chunks=3),
|
|
da.from_array(np.ones((), np.float64), chunks=()) * 1e200,
|
|
np.ones(5, np.float32),
|
|
1, 1.0, 1e200, np.int64(1), np.ones((), np.int64),
|
|
]
|
|
for x in values:
|
|
for y in values:
|
|
assert da.maximum(x, y).dtype == da.result_type(x, y)
|
|
|
|
|
|
def test_operators():
|
|
x = np.arange(10)
|
|
y = np.arange(10).reshape((10, 1))
|
|
a = from_array(x, chunks=(5,))
|
|
b = from_array(y, chunks=(5, 1))
|
|
|
|
c = a + 1
|
|
assert_eq(c, x + 1)
|
|
|
|
c = a + b
|
|
assert_eq(c, x + x.reshape((10, 1)))
|
|
|
|
expr = (3 / a * b)**2 > 5
|
|
with pytest.warns(None): # ZeroDivisionWarning
|
|
assert_eq(expr, (3 / x * y)**2 > 5)
|
|
|
|
with pytest.warns(None): # OverflowWarning
|
|
c = da.exp(a)
|
|
assert_eq(c, np.exp(x))
|
|
|
|
assert_eq(abs(-a), a)
|
|
assert_eq(a, +x)
|
|
|
|
|
|
def test_operator_dtype_promotion():
|
|
x = np.arange(10, dtype=np.float32)
|
|
y = np.array([1])
|
|
a = from_array(x, chunks=(5,))
|
|
|
|
assert_eq(x + 1, a + 1) # still float32
|
|
assert_eq(x + 1e50, a + 1e50) # now float64
|
|
assert_eq(x + y, a + y) # also float64
|
|
|
|
|
|
def test_field_access():
|
|
x = np.array([(1, 1.0), (2, 2.0)], dtype=[('a', 'i4'), ('b', 'f4')])
|
|
y = from_array(x, chunks=(1,))
|
|
assert_eq(y['a'], x['a'])
|
|
assert_eq(y[['b', 'a']], x[['b', 'a']])
|
|
assert same_keys(y[['b', 'a']], y[['b', 'a']])
|
|
|
|
|
|
def test_field_access_with_shape():
|
|
dtype = [('col1', ('f4', (3, 2))), ('col2', ('f4', 3))]
|
|
data = np.ones((100, 50), dtype=dtype)
|
|
x = da.from_array(data, 10)
|
|
assert_eq(x['col1'], data['col1'])
|
|
assert_eq(x[['col1']], data[['col1']])
|
|
assert_eq(x['col2'], data['col2'])
|
|
assert_eq(x[['col1', 'col2']], data[['col1', 'col2']])
|
|
|
|
|
|
@pytest.mark.skipif(sys.version_info < (3, 5),
|
|
reason="Matrix multiplication operator only after Py3.5")
|
|
def test_matmul():
|
|
x = np.random.random((5, 5))
|
|
y = np.random.random((5, 2))
|
|
a = from_array(x, chunks=(1, 5))
|
|
b = from_array(y, chunks=(5, 1))
|
|
assert_eq(operator.matmul(a, b), a.dot(b))
|
|
assert_eq(operator.matmul(a, b), operator.matmul(x, y))
|
|
assert_eq(operator.matmul(a, y), operator.matmul(x, b))
|
|
list_vec = list(range(1, 6))
|
|
assert_eq(operator.matmul(list_vec, b), operator.matmul(list_vec, y))
|
|
assert_eq(operator.matmul(x, list_vec), operator.matmul(a, list_vec))
|
|
z = np.random.random((5, 5, 5))
|
|
c = from_array(z, chunks=(1, 5, 1))
|
|
assert_eq(operator.matmul(a, z), operator.matmul(x, c))
|
|
assert_eq(operator.matmul(z, a), operator.matmul(c, x))
|
|
|
|
|
|
def test_matmul_array_ufunc():
|
|
# regression test for https://github.com/dask/dask/issues/4353
|
|
x = np.random.random((5, 5))
|
|
y = np.random.random((5, 2))
|
|
a = from_array(x, chunks=(1, 5))
|
|
b = from_array(y, chunks=(5, 1))
|
|
result = b.__array_ufunc__(np.matmul, '__call__', a, b)
|
|
assert_eq(result, x.dot(y))
|
|
|
|
|
|
def test_T():
|
|
x = np.arange(400).reshape((20, 20))
|
|
a = from_array(x, chunks=(5, 5))
|
|
|
|
assert_eq(x.T, a.T)
|
|
|
|
|
|
def test_broadcast_to():
|
|
x = np.random.randint(10, size=(5, 1, 6))
|
|
a = from_array(x, chunks=(3, 1, 3))
|
|
|
|
for shape in [a.shape, (5, 0, 6), (5, 4, 6), (2, 5, 1, 6), (3, 4, 5, 4, 6)]:
|
|
xb = np.broadcast_to(x, shape)
|
|
ab = broadcast_to(a, shape)
|
|
|
|
assert_eq(xb, ab)
|
|
|
|
if a.shape == ab.shape:
|
|
assert a is ab
|
|
|
|
pytest.raises(ValueError, lambda: broadcast_to(a, (2, 1, 6)))
|
|
pytest.raises(ValueError, lambda: broadcast_to(a, (3,)))
|
|
|
|
|
|
def test_broadcast_to_array():
|
|
x = np.random.randint(10, size=(5, 1, 6))
|
|
|
|
for shape in [(5, 0, 6), (5, 4, 6), (2, 5, 1, 6), (3, 4, 5, 4, 6)]:
|
|
a = np.broadcast_to(x, shape)
|
|
d = broadcast_to(x, shape)
|
|
|
|
assert_eq(a, d)
|
|
|
|
|
|
def test_broadcast_to_scalar():
|
|
x = 5
|
|
|
|
for shape in [tuple(), (0,), (2, 3), (5, 4, 6), (2, 5, 1, 6), (3, 4, 5, 4, 6)]:
|
|
a = np.broadcast_to(x, shape)
|
|
d = broadcast_to(x, shape)
|
|
|
|
assert_eq(a, d)
|
|
|
|
|
|
def test_broadcast_to_chunks():
|
|
x = np.random.randint(10, size=(5, 1, 6))
|
|
a = from_array(x, chunks=(3, 1, 3))
|
|
|
|
for shape, chunks, expected_chunks in [
|
|
((5, 3, 6), (3, -1, 3), ((3, 2), (3,), (3, 3))),
|
|
((5, 3, 6), (3, 1, 3), ((3, 2), (1, 1, 1,), (3, 3))),
|
|
((2, 5, 3, 6), (1, 3, 1, 3), ((1, 1), (3, 2), (1, 1, 1,), (3, 3)))]:
|
|
xb = np.broadcast_to(x, shape)
|
|
ab = broadcast_to(a, shape, chunks=chunks)
|
|
assert_eq(xb, ab)
|
|
assert ab.chunks == expected_chunks
|
|
|
|
with pytest.raises(ValueError):
|
|
broadcast_to(a, a.shape, chunks=((2, 3), (1,), (3, 3)))
|
|
with pytest.raises(ValueError):
|
|
broadcast_to(a, a.shape, chunks=((3, 2), (3,), (3, 3)))
|
|
with pytest.raises(ValueError):
|
|
broadcast_to(a, (5, 2, 6), chunks=((3, 2), (3,), (3, 3)))
|
|
|
|
|
|
def test_broadcast_arrays():
|
|
# Calling `broadcast_arrays` with no arguments only works in NumPy 1.13.0+.
|
|
if LooseVersion(np.__version__) >= LooseVersion("1.13.0"):
|
|
assert np.broadcast_arrays() == da.broadcast_arrays()
|
|
|
|
a = np.arange(4)
|
|
d_a = da.from_array(a, chunks=tuple(s // 2 for s in a.shape))
|
|
|
|
a_0 = np.arange(4)[None, :]
|
|
a_1 = np.arange(4)[:, None]
|
|
|
|
d_a_0 = d_a[None, :]
|
|
d_a_1 = d_a[:, None]
|
|
|
|
a_r = np.broadcast_arrays(a_0, a_1)
|
|
d_r = da.broadcast_arrays(d_a_0, d_a_1)
|
|
|
|
assert isinstance(d_r, list)
|
|
assert len(a_r) == len(d_r)
|
|
|
|
for e_a_r, e_d_r in zip(a_r, d_r):
|
|
assert_eq(e_a_r, e_d_r)
|
|
|
|
|
|
@pytest.mark.parametrize('u_shape, v_shape', [
|
|
[tuple(), (2, 3)],
|
|
[(1,), (2, 3)],
|
|
[(1, 1), (2, 3)],
|
|
[(0, 3), (1, 3)],
|
|
[(2, 0), (2, 1)],
|
|
[(1, 0), (2, 1)],
|
|
[(0, 1), (1, 3)],
|
|
])
|
|
def test_broadcast_operator(u_shape, v_shape):
|
|
u = np.random.random(u_shape)
|
|
v = np.random.random(v_shape)
|
|
|
|
d_u = from_array(u, chunks=1)
|
|
d_v = from_array(v, chunks=1)
|
|
|
|
w = u * v
|
|
d_w = d_u * d_v
|
|
|
|
assert_eq(w, d_w)
|
|
|
|
|
|
@pytest.mark.parametrize('original_shape,new_shape,chunks', [
|
|
((10,), (10,), (3, 3, 4)),
|
|
((10,), (10, 1, 1), 5),
|
|
((10,), (1, 10,), 5),
|
|
((24,), (2, 3, 4), 12),
|
|
((1, 24,), (2, 3, 4), 12),
|
|
((2, 3, 4), (24,), (1, 3, 4)),
|
|
((2, 3, 4), (24,), 4),
|
|
((2, 3, 4), (24, 1), 4),
|
|
((2, 3, 4), (1, 24), 4),
|
|
((4, 4, 1), (4, 4), 2),
|
|
((4, 4), (4, 4, 1), 2),
|
|
((1, 4, 4), (4, 4), 2),
|
|
((1, 4, 4), (4, 4, 1), 2),
|
|
((1, 4, 4), (1, 1, 4, 4), 2),
|
|
((4, 4), (1, 4, 4, 1), 2),
|
|
((4, 4), (1, 4, 4), 2),
|
|
((2, 3), (2, 3), (1, 2)),
|
|
((2, 3), (3, 2), 3),
|
|
((4, 2, 3), (4, 6), 4),
|
|
((3, 4, 5, 6), (3, 4, 5, 6), (2, 3, 4, 5)),
|
|
((), (1,), 1),
|
|
((1,), (), 1),
|
|
((24,), (3, 8), 24),
|
|
((24,), (4, 6), 6),
|
|
((24,), (4, 3, 2), 6),
|
|
((24,), (4, 6, 1), 6),
|
|
((24,), (4, 6), (6, 12, 6)),
|
|
((64, 4), (8, 8, 4), (16, 2)),
|
|
((4, 64), (4, 8, 4, 2), (2, 16)),
|
|
((4, 8, 4, 2), (2, 1, 2, 32, 2), (2, 4, 2, 2)),
|
|
((4, 1, 4), (4, 4), (2, 1, 2)),
|
|
((0, 10), (0, 5, 2), (5, 5)),
|
|
((5, 0, 2), (0, 10), (5, 2, 2)),
|
|
((0,), (2, 0, 2), (4,)),
|
|
((2, 0, 2), (0,), (4, 4, 4)),
|
|
])
|
|
def test_reshape(original_shape, new_shape, chunks):
|
|
x = np.random.randint(10, size=original_shape)
|
|
a = from_array(x, chunks=chunks)
|
|
|
|
xr = x.reshape(new_shape)
|
|
ar = a.reshape(new_shape)
|
|
|
|
if a.shape == new_shape:
|
|
assert a is ar
|
|
|
|
assert_eq(xr, ar)
|
|
|
|
|
|
def test_reshape_exceptions():
|
|
x = np.random.randint(10, size=(5,))
|
|
a = from_array(x, chunks=(2,))
|
|
with pytest.raises(ValueError):
|
|
da.reshape(a, (100,))
|
|
|
|
|
|
def test_reshape_splat():
|
|
x = da.ones((5, 5), chunks=(2, 2))
|
|
assert_eq(x.reshape((25,)), x.reshape(25))
|
|
|
|
|
|
def test_reshape_fails_for_dask_only():
|
|
cases = [
|
|
((3, 4), (4, 3), 2),
|
|
]
|
|
for original_shape, new_shape, chunks in cases:
|
|
x = np.random.randint(10, size=original_shape)
|
|
a = from_array(x, chunks=chunks)
|
|
assert x.reshape(new_shape).shape == new_shape
|
|
with pytest.raises(ValueError):
|
|
da.reshape(a, new_shape)
|
|
|
|
|
|
def test_reshape_unknown_dimensions():
|
|
for original_shape in [(24,), (2, 12), (2, 3, 4)]:
|
|
for new_shape in [(-1,), (2, -1), (-1, 3, 4)]:
|
|
x = np.random.randint(10, size=original_shape)
|
|
a = from_array(x, 24)
|
|
assert_eq(x.reshape(new_shape), a.reshape(new_shape))
|
|
|
|
pytest.raises(ValueError, lambda: da.reshape(a, (-1, -1)))
|
|
|
|
|
|
def test_full():
|
|
d = da.full((3, 4), 2, chunks=((2, 1), (2, 2)))
|
|
assert d.chunks == ((2, 1), (2, 2))
|
|
assert_eq(d, np.full((3, 4), 2))
|
|
|
|
|
|
def test_map_blocks():
|
|
x = np.arange(400).reshape((20, 20))
|
|
d = from_array(x, chunks=(7, 7))
|
|
|
|
e = d.map_blocks(inc, dtype=d.dtype)
|
|
|
|
assert d.chunks == e.chunks
|
|
assert_eq(e, x + 1)
|
|
|
|
e = d.map_blocks(inc, name='increment')
|
|
assert e.name.startswith('increment-')
|
|
|
|
assert d.map_blocks(inc, name='foo').name != d.map_blocks(dec, name='foo').name
|
|
|
|
d = from_array(x, chunks=(10, 10))
|
|
e = d.map_blocks(lambda x: x[::2, ::2], chunks=(5, 5), dtype=d.dtype)
|
|
|
|
assert e.chunks == ((5, 5), (5, 5))
|
|
assert_eq(e, x[::2, ::2])
|
|
|
|
d = from_array(x, chunks=(8, 8))
|
|
e = d.map_blocks(lambda x: x[::2, ::2], chunks=((4, 4, 2), (4, 4, 2)),
|
|
dtype=d.dtype)
|
|
|
|
assert_eq(e, x[::2, ::2])
|
|
|
|
|
|
def test_map_blocks2():
|
|
x = np.arange(10, dtype='i8')
|
|
d = from_array(x, chunks=(2,))
|
|
|
|
def func(block, block_id=None, c=0):
|
|
return np.ones_like(block) * sum(block_id) + c
|
|
|
|
out = d.map_blocks(func, dtype='i8')
|
|
expected = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4], dtype='i8')
|
|
|
|
assert_eq(out, expected)
|
|
assert same_keys(d.map_blocks(func, dtype='i8'), out)
|
|
|
|
out = d.map_blocks(func, dtype='i8', c=1)
|
|
expected = expected + 1
|
|
|
|
assert_eq(out, expected)
|
|
assert same_keys(d.map_blocks(func, dtype='i8', c=1), out)
|
|
|
|
|
|
def test_map_blocks_block_info():
|
|
x = da.arange(50, chunks=10)
|
|
|
|
def func(a, b, c, block_info=None):
|
|
for idx in [0, 2]: # positions in args
|
|
assert block_info[idx]['shape'] == (50,)
|
|
assert block_info[idx]['num-chunks'] == (5,)
|
|
start, stop = block_info[idx]['array-location'][0]
|
|
assert stop - start == 10
|
|
assert 0 <= start <= 40
|
|
assert 10 <= stop <= 50
|
|
|
|
assert 0 <= block_info[idx]['chunk-location'][0] <= 4
|
|
|
|
return a + b + c
|
|
|
|
z = da.map_blocks(func, x, 100, x + 1, dtype=x.dtype)
|
|
assert_eq(z, x + x + 1 + 100)
|
|
|
|
|
|
def test_map_blocks_block_info_with_axis():
|
|
# https://github.com/dask/dask/issues/4298
|
|
values = da.from_array(np.array(['a', 'a', 'b', 'c']), 2)
|
|
|
|
def func(x, block_info=None):
|
|
assert set(block_info.keys()) == {0}
|
|
assert block_info[0]['shape'] == (4,)
|
|
assert block_info[0]['num_chunks'] == (2,)
|
|
|
|
assert block_info['chunk-location'] in {(0,), (1,)}
|
|
|
|
if block_info['chunk-location'] == (0,):
|
|
assert block_info['array-location'] == [(0, 2)]
|
|
elif block_info['chunk-location'] == (1,):
|
|
assert block_info['array-location'] == [(2, 4)]
|
|
|
|
return np.ones((len(x), 3))
|
|
|
|
values.map_blocks(func, chunks=((2, 2), 3), new_axis=1, dtype='f8')
|
|
|
|
|
|
def test_map_blocks_with_constants():
|
|
d = da.arange(10, chunks=3)
|
|
e = d.map_blocks(add, 100, dtype=d.dtype)
|
|
|
|
assert_eq(e, np.arange(10) + 100)
|
|
|
|
assert_eq(da.map_blocks(sub, d, 10, dtype=d.dtype),
|
|
np.arange(10) - 10)
|
|
assert_eq(da.map_blocks(sub, 10, d, dtype=d.dtype),
|
|
10 - np.arange(10))
|
|
|
|
|
|
def test_map_blocks_with_kwargs():
|
|
d = da.arange(10, chunks=5)
|
|
|
|
result = d.map_blocks(np.max, axis=0, keepdims=True, dtype=d.dtype,
|
|
chunks=(1,))
|
|
|
|
assert_eq(result, np.array([4, 9]))
|
|
|
|
|
|
def test_map_blocks_with_chunks():
|
|
dx = da.ones((5, 3), chunks=(2, 2))
|
|
dy = da.ones((5, 3), chunks=(2, 2))
|
|
dz = da.map_blocks(np.add, dx, dy, chunks=dx.chunks)
|
|
assert_eq(dz, np.ones((5, 3)) * 2)
|
|
|
|
|
|
def test_map_blocks_dtype_inference():
|
|
x = np.arange(50).reshape((5, 10))
|
|
y = np.arange(10)
|
|
dx = da.from_array(x, chunks=5)
|
|
dy = da.from_array(y, chunks=5)
|
|
|
|
def foo(x, *args, **kwargs):
|
|
cast = kwargs.pop('cast', 'i8')
|
|
return (x + sum(args)).astype(cast)
|
|
|
|
assert_eq(dx.map_blocks(foo, dy, 1), foo(dx, dy, 1))
|
|
assert_eq(dx.map_blocks(foo, dy, 1, cast='f8'), foo(dx, dy, 1, cast='f8'))
|
|
assert_eq(dx.map_blocks(foo, dy, 1, cast='f8', dtype='f8'),
|
|
foo(dx, dy, 1, cast='f8', dtype='f8'))
|
|
|
|
def foo(x):
|
|
raise RuntimeError("Woops")
|
|
|
|
with pytest.raises(ValueError) as e:
|
|
dx.map_blocks(foo)
|
|
msg = str(e.value)
|
|
assert msg.startswith("`dtype` inference failed")
|
|
assert "Please specify the dtype explicitly" in msg
|
|
assert 'RuntimeError' in msg
|
|
|
|
|
|
def test_from_function_requires_block_args():
|
|
x = np.arange(10)
|
|
pytest.raises(Exception, lambda: from_array(x))
|
|
|
|
|
|
def test_repr():
|
|
d = da.ones((4, 4), chunks=(2, 2))
|
|
assert key_split(d.name) in repr(d)
|
|
assert str(d.shape) in repr(d)
|
|
assert str(d.dtype) in repr(d)
|
|
d = da.ones((4000, 4), chunks=(4, 2))
|
|
assert len(str(d)) < 1000
|
|
|
|
|
|
def test_slicing_with_ellipsis():
|
|
x = np.arange(256).reshape((4, 4, 4, 4))
|
|
d = da.from_array(x, chunks=((2, 2, 2, 2)))
|
|
|
|
assert_eq(d[..., 1], x[..., 1])
|
|
assert_eq(d[0, ..., 1], x[0, ..., 1])
|
|
|
|
|
|
def test_slicing_with_ndarray():
|
|
x = np.arange(64).reshape((8, 8))
|
|
d = da.from_array(x, chunks=((4, 4)))
|
|
|
|
assert_eq(d[np.arange(8)], x)
|
|
assert_eq(d[np.ones(8, dtype=bool)], x)
|
|
assert_eq(d[np.array([1])], x[[1]])
|
|
assert_eq(d[np.array([True, False, True] + [False] * 5)], x[[0, 2]])
|
|
|
|
|
|
def test_dtype():
|
|
d = da.ones((4, 4), chunks=(2, 2))
|
|
|
|
assert d.dtype == d.compute().dtype
|
|
assert (d * 1.0).dtype == (d + 1.0).compute().dtype
|
|
assert d.sum().dtype == d.sum().compute().dtype # no shape
|
|
|
|
|
|
def test_blockdims_from_blockshape():
|
|
assert blockdims_from_blockshape((10, 10), (4, 3)) == ((4, 4, 2), (3, 3, 3, 1))
|
|
pytest.raises(TypeError, lambda: blockdims_from_blockshape((10,), None))
|
|
assert blockdims_from_blockshape((1e2, 3), [1e1, 3]) == ((10, ) * 10, (3, ))
|
|
assert blockdims_from_blockshape((np.int8(10), ), (5, )) == ((5, 5), )
|
|
|
|
|
|
def test_coerce():
|
|
d0 = da.from_array(np.array(1), chunks=(1,))
|
|
d1 = da.from_array(np.array([1]), chunks=(1,))
|
|
with dask.config.set(scheduler='sync'):
|
|
for d in d0, d1:
|
|
assert bool(d) is True
|
|
assert int(d) == 1
|
|
assert float(d) == 1.0
|
|
assert complex(d) == complex(1)
|
|
|
|
a2 = np.arange(2)
|
|
d2 = da.from_array(a2, chunks=(2,))
|
|
for func in (int, float, complex):
|
|
pytest.raises(TypeError, lambda :func(d2))
|
|
|
|
|
|
def test_bool():
|
|
arr = np.arange(100).reshape((10,10))
|
|
darr = da.from_array(arr, chunks=(10,10))
|
|
with pytest.raises(ValueError):
|
|
bool(darr)
|
|
bool(darr == darr)
|
|
|
|
|
|
def test_store_kwargs():
|
|
d = da.ones((10, 10), chunks=(2, 2))
|
|
a = d + 1
|
|
|
|
called = [False]
|
|
|
|
def get_func(*args, **kwargs):
|
|
assert kwargs.pop("foo") == "test kwarg"
|
|
r = dask.get(*args, **kwargs)
|
|
called[0] = True
|
|
return r
|
|
|
|
called[0] = False
|
|
at = np.zeros(shape=(10, 10))
|
|
store([a], [at], scheduler=get_func, foo="test kwarg")
|
|
assert called[0]
|
|
|
|
called[0] = False
|
|
at = np.zeros(shape=(10, 10))
|
|
a.store(at, scheduler=get_func, foo="test kwarg")
|
|
assert called[0]
|
|
|
|
called[0] = False
|
|
at = np.zeros(shape=(10, 10))
|
|
store([a], [at], scheduler=get_func, return_stored=True, foo="test kwarg")
|
|
assert called[0]
|
|
|
|
|
|
def test_store_delayed_target():
|
|
from dask.delayed import delayed
|
|
d = da.ones((4, 4), chunks=(2, 2))
|
|
a, b = d + 1, d + 2
|
|
|
|
# empty buffers to be used as targets
|
|
targs = {}
|
|
|
|
def make_target(key):
|
|
a = np.empty((4, 4))
|
|
targs[key] = a
|
|
return a
|
|
|
|
# delayed calls to these targets
|
|
atd = delayed(make_target)('at')
|
|
btd = delayed(make_target)('bt')
|
|
|
|
# test not keeping result
|
|
st = store([a, b], [atd, btd])
|
|
|
|
at = targs['at']
|
|
bt = targs['bt']
|
|
|
|
assert st is None
|
|
assert_eq(at, a)
|
|
assert_eq(bt, b)
|
|
|
|
# test keeping result
|
|
for st_compute in [False, True]:
|
|
targs.clear()
|
|
|
|
st = store([a, b], [atd, btd], return_stored=True, compute=st_compute)
|
|
if st_compute:
|
|
assert all(
|
|
not any(dask.core.get_deps(e.dask)[0].values()) for e in st
|
|
)
|
|
|
|
st = dask.compute(*st)
|
|
|
|
at = targs['at']
|
|
bt = targs['bt']
|
|
|
|
assert st is not None
|
|
assert isinstance(st, tuple)
|
|
assert all([isinstance(v, np.ndarray) for v in st])
|
|
assert_eq(at, a)
|
|
assert_eq(bt, b)
|
|
assert_eq(st[0], a)
|
|
assert_eq(st[1], b)
|
|
|
|
pytest.raises(ValueError, lambda: store([a], [at, bt]))
|
|
pytest.raises(ValueError, lambda: store(at, at))
|
|
pytest.raises(ValueError, lambda: store([at, bt], [at, bt]))
|
|
|
|
|
|
def test_store():
|
|
d = da.ones((4, 4), chunks=(2, 2))
|
|
a, b = d + 1, d + 2
|
|
|
|
at = np.empty(shape=(4, 4))
|
|
bt = np.empty(shape=(4, 4))
|
|
|
|
st = store([a, b], [at, bt])
|
|
assert st is None
|
|
assert (at == 2).all()
|
|
assert (bt == 3).all()
|
|
|
|
pytest.raises(ValueError, lambda: store([a], [at, bt]))
|
|
pytest.raises(ValueError, lambda: store(at, at))
|
|
pytest.raises(ValueError, lambda: store([at, bt], [at, bt]))
|
|
|
|
|
|
def test_store_regions():
|
|
d = da.ones((4, 4, 4), dtype=int, chunks=(2, 2, 2))
|
|
a, b = d + 1, d + 2
|
|
a = a[:, 1:, :].astype(float)
|
|
|
|
region = (slice(None, None, 2), slice(None), [1, 2, 4, 5])
|
|
|
|
# Single region:
|
|
at = np.zeros(shape=(8, 3, 6))
|
|
bt = np.zeros(shape=(8, 4, 6))
|
|
v = store([a, b], [at, bt], regions=region, compute=False)
|
|
assert isinstance(v, Delayed)
|
|
assert (at == 0).all() and (bt[region] == 0).all()
|
|
assert all([ev is None for ev in v.compute()])
|
|
assert (at[region] == 2).all() and (bt[region] == 3).all()
|
|
assert not (bt == 3).all() and not ( bt == 0 ).all()
|
|
assert not (at == 2).all() and not ( at == 0 ).all()
|
|
|
|
# Multiple regions:
|
|
at = np.zeros(shape=(8, 3, 6))
|
|
bt = np.zeros(shape=(8, 4, 6))
|
|
v = store([a, b], [at, bt], regions=[region, region], compute=False)
|
|
assert isinstance(v, Delayed)
|
|
assert (at == 0).all() and (bt[region] == 0).all()
|
|
assert all([ev is None for ev in v.compute()])
|
|
assert (at[region] == 2).all() and (bt[region] == 3).all()
|
|
assert not (bt == 3).all() and not ( bt == 0 ).all()
|
|
assert not (at == 2).all() and not ( at == 0 ).all()
|
|
|
|
# Single region (keep result):
|
|
for st_compute in [False, True]:
|
|
at = np.zeros(shape=(8, 3, 6))
|
|
bt = np.zeros(shape=(8, 4, 6))
|
|
v = store(
|
|
[a, b], [at, bt], regions=region,
|
|
compute=st_compute, return_stored=True
|
|
)
|
|
assert isinstance(v, tuple)
|
|
assert all([isinstance(e, da.Array) for e in v])
|
|
if st_compute:
|
|
assert all(
|
|
not any(dask.core.get_deps(e.dask)[0].values()) for e in v
|
|
)
|
|
else:
|
|
assert (at == 0).all() and (bt[region] == 0).all()
|
|
|
|
ar, br = v
|
|
assert ar.dtype == a.dtype
|
|
assert br.dtype == b.dtype
|
|
assert ar.shape == a.shape
|
|
assert br.shape == b.shape
|
|
assert ar.chunks == a.chunks
|
|
assert br.chunks == b.chunks
|
|
|
|
ar, br = da.compute(ar, br)
|
|
assert (at[region] == 2).all() and (bt[region] == 3).all()
|
|
assert not (bt == 3).all() and not ( bt == 0 ).all()
|
|
assert not (at == 2).all() and not ( at == 0 ).all()
|
|
assert (br == 3).all()
|
|
assert (ar == 2).all()
|
|
|
|
# Multiple regions (keep result):
|
|
for st_compute in [False, True]:
|
|
at = np.zeros(shape=(8, 3, 6))
|
|
bt = np.zeros(shape=(8, 4, 6))
|
|
v = store(
|
|
[a, b], [at, bt], regions=[region, region],
|
|
compute=st_compute, return_stored=True
|
|
)
|
|
assert isinstance(v, tuple)
|
|
assert all([isinstance(e, da.Array) for e in v])
|
|
if st_compute:
|
|
assert all(
|
|
not any(dask.core.get_deps(e.dask)[0].values()) for e in v
|
|
)
|
|
else:
|
|
assert (at == 0).all() and (bt[region] == 0).all()
|
|
|
|
ar, br = v
|
|
assert ar.dtype == a.dtype
|
|
assert br.dtype == b.dtype
|
|
assert ar.shape == a.shape
|
|
assert br.shape == b.shape
|
|
assert ar.chunks == a.chunks
|
|
assert br.chunks == b.chunks
|
|
|
|
ar, br = da.compute(ar, br)
|
|
assert (at[region] == 2).all() and (bt[region] == 3).all()
|
|
assert not (bt == 3).all() and not ( bt == 0 ).all()
|
|
assert not (at == 2).all() and not ( at == 0 ).all()
|
|
assert (br == 3).all()
|
|
assert (ar == 2).all()
|
|
|
|
|
|
def test_store_compute_false():
|
|
d = da.ones((4, 4), chunks=(2, 2))
|
|
a, b = d + 1, d + 2
|
|
|
|
at = np.zeros(shape=(4, 4))
|
|
bt = np.zeros(shape=(4, 4))
|
|
|
|
v = store([a, b], [at, bt], compute=False)
|
|
assert isinstance(v, Delayed)
|
|
assert (at == 0).all() and (bt == 0).all()
|
|
assert all([ev is None for ev in v.compute()])
|
|
assert (at == 2).all() and (bt == 3).all()
|
|
|
|
at = np.zeros(shape=(4, 4))
|
|
bt = np.zeros(shape=(4, 4))
|
|
|
|
dat, dbt = store([a, b], [at, bt], compute=False, return_stored=True)
|
|
assert isinstance(dat, Array) and isinstance(dbt, Array)
|
|
assert (at == 0).all() and (bt == 0).all()
|
|
assert (dat.compute() == at).all() and (dbt.compute() == bt).all()
|
|
assert (at == 2).all() and (bt == 3).all()
|
|
|
|
|
|
def test_store_nocompute_regions():
|
|
x = da.ones(10, chunks=1)
|
|
y = np.zeros((2, 10))
|
|
d1 = da.store(x, y, regions=(0,), compute=False)
|
|
d2 = da.store(x, y, regions=(1,), compute=False)
|
|
assert d1.key != d2.key
|
|
|
|
|
|
class ThreadSafetyError(Exception):
|
|
pass
|
|
|
|
|
|
class NonthreadSafeStore(object):
|
|
def __init__(self):
|
|
self.in_use = False
|
|
|
|
def __setitem__(self, key, value):
|
|
if self.in_use:
|
|
raise ThreadSafetyError()
|
|
self.in_use = True
|
|
time.sleep(0.001)
|
|
self.in_use = False
|
|
|
|
|
|
class ThreadSafeStore(object):
|
|
def __init__(self):
|
|
self.concurrent_uses = 0
|
|
self.max_concurrent_uses = 0
|
|
|
|
def __setitem__(self, key, value):
|
|
self.concurrent_uses += 1
|
|
self.max_concurrent_uses = max(self.concurrent_uses, self.max_concurrent_uses)
|
|
time.sleep(0.01)
|
|
self.concurrent_uses -= 1
|
|
|
|
|
|
class CounterLock(object):
|
|
def __init__(self, *args, **kwargs):
|
|
self.lock = Lock(*args, **kwargs)
|
|
|
|
self.acquire_count = 0
|
|
self.release_count = 0
|
|
|
|
def acquire(self, *args, **kwargs):
|
|
self.acquire_count += 1
|
|
return self.lock.acquire(*args, **kwargs)
|
|
|
|
def release(self, *args, **kwargs):
|
|
self.release_count += 1
|
|
return self.lock.release(*args, **kwargs)
|
|
|
|
|
|
def test_store_locks():
|
|
_Lock = type(Lock())
|
|
d = da.ones((10, 10), chunks=(2, 2))
|
|
a, b = d + 1, d + 2
|
|
|
|
at = np.zeros(shape=(10, 10))
|
|
bt = np.zeros(shape=(10, 10))
|
|
|
|
lock = Lock()
|
|
v = store([a, b], [at, bt], compute=False, lock=lock)
|
|
assert isinstance(v, Delayed)
|
|
dsk = v.dask
|
|
locks = set(vv for v in dsk.values() for vv in v if isinstance(vv, _Lock))
|
|
assert locks == set([lock])
|
|
|
|
# Ensure same lock applies over multiple stores
|
|
at = NonthreadSafeStore()
|
|
v = store([a, b], [at, at], lock=lock,
|
|
scheduler='threads', num_workers=10)
|
|
assert v is None
|
|
|
|
# Don't assume thread safety by default
|
|
at = NonthreadSafeStore()
|
|
assert store(a, at, scheduler='threads', num_workers=10) is None
|
|
assert a.store(at, scheduler='threads', num_workers=10) is None
|
|
|
|
# Ensure locks can be removed
|
|
at = ThreadSafeStore()
|
|
for i in range(10):
|
|
st = a.store(at, lock=False, scheduler='threads', num_workers=10)
|
|
assert st is None
|
|
if at.max_concurrent_uses > 1:
|
|
break
|
|
if i == 9:
|
|
assert False
|
|
|
|
# Verify number of lock calls
|
|
nchunks = np.sum([np.prod([len(c) for c in e.chunks]) for e in [a, b]])
|
|
for c in (False, True):
|
|
at = np.zeros(shape=(10, 10))
|
|
bt = np.zeros(shape=(10, 10))
|
|
lock = CounterLock()
|
|
|
|
v = store([a, b], [at, bt], lock=lock, compute=c, return_stored=True)
|
|
assert all(isinstance(e, Array) for e in v)
|
|
|
|
da.compute(v)
|
|
|
|
# When `return_stored=True` and `compute=False`,
|
|
# the lock should be acquired only once for store and load steps
|
|
# as they are fused together into one step.
|
|
assert lock.acquire_count == lock.release_count
|
|
if c:
|
|
assert lock.acquire_count == 2 * nchunks
|
|
else:
|
|
assert lock.acquire_count == nchunks
|
|
|
|
|
|
def test_store_method_return():
|
|
d = da.ones((10, 10), chunks=(2, 2))
|
|
a = d + 1
|
|
|
|
for compute in [False, True]:
|
|
for return_stored in [False, True]:
|
|
at = np.zeros(shape=(10, 10))
|
|
r = a.store(
|
|
at, scheduler='threads',
|
|
compute=compute, return_stored=return_stored
|
|
)
|
|
|
|
if return_stored:
|
|
assert isinstance(r, Array)
|
|
elif compute:
|
|
assert r is None
|
|
else:
|
|
assert isinstance(r, Delayed)
|
|
|
|
|
|
@pytest.mark.xfail(reason="can't lock with multiprocessing")
|
|
def test_store_multiprocessing_lock():
|
|
d = da.ones((10, 10), chunks=(2, 2))
|
|
a = d + 1
|
|
|
|
at = np.zeros(shape=(10, 10))
|
|
st = a.store(at, scheduler='processes', num_workers=10)
|
|
assert st is None
|
|
|
|
|
|
def test_to_hdf5():
|
|
h5py = pytest.importorskip('h5py')
|
|
x = da.ones((4, 4), chunks=(2, 2))
|
|
y = da.ones(4, chunks=2, dtype='i4')
|
|
|
|
with tmpfile('.hdf5') as fn:
|
|
x.to_hdf5(fn, '/x')
|
|
with h5py.File(fn) as f:
|
|
d = f['/x']
|
|
|
|
assert_eq(d[:], x)
|
|
assert d.chunks == (2, 2)
|
|
|
|
with tmpfile('.hdf5') as fn:
|
|
x.to_hdf5(fn, '/x', chunks=None)
|
|
with h5py.File(fn) as f:
|
|
d = f['/x']
|
|
|
|
assert_eq(d[:], x)
|
|
assert d.chunks is None
|
|
|
|
with tmpfile('.hdf5') as fn:
|
|
x.to_hdf5(fn, '/x', chunks=(1, 1))
|
|
with h5py.File(fn) as f:
|
|
d = f['/x']
|
|
|
|
assert_eq(d[:], x)
|
|
assert d.chunks == (1, 1)
|
|
|
|
with tmpfile('.hdf5') as fn:
|
|
da.to_hdf5(fn, {'/x': x, '/y': y})
|
|
|
|
with h5py.File(fn) as f:
|
|
assert_eq(f['/x'][:], x)
|
|
assert f['/x'].chunks == (2, 2)
|
|
assert_eq(f['/y'][:], y)
|
|
assert f['/y'].chunks == (2,)
|
|
|
|
|
|
def test_to_dask_dataframe():
|
|
dd = pytest.importorskip('dask.dataframe')
|
|
a = da.ones((4,), chunks=(2,))
|
|
d = a.to_dask_dataframe()
|
|
assert isinstance(d, dd.Series)
|
|
|
|
a = da.ones((4, 4), chunks=(2, 2))
|
|
d = a.to_dask_dataframe()
|
|
assert isinstance(d, dd.DataFrame)
|
|
|
|
|
|
def test_np_array_with_zero_dimensions():
|
|
d = da.ones((4, 4), chunks=(2, 2))
|
|
assert_eq(np.array(d.sum()), np.array(d.compute().sum()))
|
|
|
|
|
|
def test_dtype_complex():
|
|
x = np.arange(24).reshape((4, 6)).astype('f4')
|
|
y = np.arange(24).reshape((4, 6)).astype('i8')
|
|
z = np.arange(24).reshape((4, 6)).astype('i2')
|
|
|
|
a = da.from_array(x, chunks=(2, 3))
|
|
b = da.from_array(y, chunks=(2, 3))
|
|
c = da.from_array(z, chunks=(2, 3))
|
|
|
|
def assert_eq(a, b):
|
|
return (isinstance(a, np.dtype) and
|
|
isinstance(b, np.dtype) and
|
|
str(a) == str(b))
|
|
|
|
assert_eq(a.dtype, x.dtype)
|
|
assert_eq(b.dtype, y.dtype)
|
|
|
|
assert_eq((a + 1).dtype, (x + 1).dtype)
|
|
assert_eq((a + b).dtype, (x + y).dtype)
|
|
assert_eq(a.T.dtype, x.T.dtype)
|
|
assert_eq(a[:3].dtype, x[:3].dtype)
|
|
assert_eq((a.dot(b.T)).dtype, (x.dot(y.T)).dtype)
|
|
|
|
assert_eq(stack([a, b]).dtype, np.vstack([x, y]).dtype)
|
|
assert_eq(concatenate([a, b]).dtype, np.concatenate([x, y]).dtype)
|
|
|
|
assert_eq(b.std().dtype, y.std().dtype)
|
|
assert_eq(c.sum().dtype, z.sum().dtype)
|
|
assert_eq(a.min().dtype, a.min().dtype)
|
|
assert_eq(b.std().dtype, b.std().dtype)
|
|
assert_eq(a.argmin(axis=0).dtype, a.argmin(axis=0).dtype)
|
|
|
|
assert_eq(da.sin(c).dtype, np.sin(z).dtype)
|
|
assert_eq(da.exp(b).dtype, np.exp(y).dtype)
|
|
assert_eq(da.floor(a).dtype, np.floor(x).dtype)
|
|
assert_eq(da.isnan(b).dtype, np.isnan(y).dtype)
|
|
with ignoring(ImportError):
|
|
assert da.isnull(b).dtype == 'bool'
|
|
assert da.notnull(b).dtype == 'bool'
|
|
|
|
x = np.array([('a', 1)], dtype=[('text', 'S1'), ('numbers', 'i4')])
|
|
d = da.from_array(x, chunks=(1,))
|
|
|
|
assert_eq(d['text'].dtype, x['text'].dtype)
|
|
assert_eq(d[['numbers', 'text']].dtype, x[['numbers', 'text']].dtype)
|
|
|
|
|
|
def test_astype():
|
|
x = np.ones((5, 5), dtype='f8')
|
|
d = da.from_array(x, chunks=(2,2))
|
|
|
|
assert d.astype('i8').dtype == 'i8'
|
|
assert_eq(d.astype('i8'), x.astype('i8'))
|
|
assert same_keys(d.astype('i8'), d.astype('i8'))
|
|
|
|
with pytest.raises(TypeError):
|
|
d.astype('i8', casting='safe')
|
|
|
|
with pytest.raises(TypeError):
|
|
d.astype('i8', not_a_real_kwarg='foo')
|
|
|
|
# smoketest with kwargs
|
|
assert_eq(d.astype('i8', copy=False), x.astype('i8', copy=False))
|
|
|
|
# Check it's a noop
|
|
assert d.astype('f8') is d
|
|
|
|
|
|
def test_arithmetic():
|
|
x = np.arange(5).astype('f4') + 2
|
|
y = np.arange(5).astype('i8') + 2
|
|
z = np.arange(5).astype('i4') + 2
|
|
a = da.from_array(x, chunks=(2,))
|
|
b = da.from_array(y, chunks=(2,))
|
|
c = da.from_array(z, chunks=(2,))
|
|
assert_eq(a + b, x + y)
|
|
assert_eq(a * b, x * y)
|
|
assert_eq(a - b, x - y)
|
|
assert_eq(a / b, x / y)
|
|
assert_eq(b & b, y & y)
|
|
assert_eq(b | b, y | y)
|
|
assert_eq(b ^ b, y ^ y)
|
|
assert_eq(a // b, x // y)
|
|
assert_eq(a ** b, x ** y)
|
|
assert_eq(a % b, x % y)
|
|
assert_eq(a > b, x > y)
|
|
assert_eq(a < b, x < y)
|
|
assert_eq(a >= b, x >= y)
|
|
assert_eq(a <= b, x <= y)
|
|
assert_eq(a == b, x == y)
|
|
assert_eq(a != b, x != y)
|
|
|
|
assert_eq(a + 2, x + 2)
|
|
assert_eq(a * 2, x * 2)
|
|
assert_eq(a - 2, x - 2)
|
|
assert_eq(a / 2, x / 2)
|
|
assert_eq(b & True, y & True)
|
|
assert_eq(b | True, y | True)
|
|
assert_eq(b ^ True, y ^ True)
|
|
assert_eq(a // 2, x // 2)
|
|
assert_eq(a ** 2, x ** 2)
|
|
assert_eq(a % 2, x % 2)
|
|
assert_eq(a > 2, x > 2)
|
|
assert_eq(a < 2, x < 2)
|
|
assert_eq(a >= 2, x >= 2)
|
|
assert_eq(a <= 2, x <= 2)
|
|
assert_eq(a == 2, x == 2)
|
|
assert_eq(a != 2, x != 2)
|
|
|
|
assert_eq(2 + b, 2 + y)
|
|
assert_eq(2 * b, 2 * y)
|
|
assert_eq(2 - b, 2 - y)
|
|
assert_eq(2 / b, 2 / y)
|
|
assert_eq(True & b, True & y)
|
|
assert_eq(True | b, True | y)
|
|
assert_eq(True ^ b, True ^ y)
|
|
assert_eq(2 // b, 2 // y)
|
|
assert_eq(2 ** b, 2 ** y)
|
|
assert_eq(2 % b, 2 % y)
|
|
assert_eq(2 > b, 2 > y)
|
|
assert_eq(2 < b, 2 < y)
|
|
assert_eq(2 >= b, 2 >= y)
|
|
assert_eq(2 <= b, 2 <= y)
|
|
assert_eq(2 == b, 2 == y)
|
|
assert_eq(2 != b, 2 != y)
|
|
|
|
assert_eq(-a, -x)
|
|
assert_eq(abs(a), abs(x))
|
|
assert_eq(~(a == b), ~(x == y))
|
|
assert_eq(~(a == b), ~(x == y))
|
|
|
|
assert_eq(da.logaddexp(a, b), np.logaddexp(x, y))
|
|
assert_eq(da.logaddexp2(a, b), np.logaddexp2(x, y))
|
|
with pytest.warns(None): # Overflow warning
|
|
assert_eq(da.exp(b), np.exp(y))
|
|
assert_eq(da.log(a), np.log(x))
|
|
assert_eq(da.log10(a), np.log10(x))
|
|
assert_eq(da.log1p(a), np.log1p(x))
|
|
with pytest.warns(None): # Overflow warning
|
|
assert_eq(da.expm1(b), np.expm1(y))
|
|
assert_eq(da.sqrt(a), np.sqrt(x))
|
|
assert_eq(da.square(a), np.square(x))
|
|
|
|
assert_eq(da.sin(a), np.sin(x))
|
|
assert_eq(da.cos(b), np.cos(y))
|
|
assert_eq(da.tan(a), np.tan(x))
|
|
assert_eq(da.arcsin(b / 10), np.arcsin(y / 10))
|
|
assert_eq(da.arccos(b / 10), np.arccos(y / 10))
|
|
assert_eq(da.arctan(b / 10), np.arctan(y / 10))
|
|
assert_eq(da.arctan2(b * 10, a), np.arctan2(y * 10, x))
|
|
assert_eq(da.hypot(b, a), np.hypot(y, x))
|
|
assert_eq(da.sinh(a), np.sinh(x))
|
|
with pytest.warns(None): # Overflow warning
|
|
assert_eq(da.cosh(b), np.cosh(y))
|
|
assert_eq(da.tanh(a), np.tanh(x))
|
|
assert_eq(da.arcsinh(b * 10), np.arcsinh(y * 10))
|
|
assert_eq(da.arccosh(b * 10), np.arccosh(y * 10))
|
|
assert_eq(da.arctanh(b / 10), np.arctanh(y / 10))
|
|
assert_eq(da.deg2rad(a), np.deg2rad(x))
|
|
assert_eq(da.rad2deg(a), np.rad2deg(x))
|
|
|
|
assert_eq(da.logical_and(a < 1, b < 4), np.logical_and(x < 1, y < 4))
|
|
assert_eq(da.logical_or(a < 1, b < 4), np.logical_or(x < 1, y < 4))
|
|
assert_eq(da.logical_xor(a < 1, b < 4), np.logical_xor(x < 1, y < 4))
|
|
assert_eq(da.logical_not(a < 1), np.logical_not(x < 1))
|
|
assert_eq(da.maximum(a, 5 - a), np.maximum(a, 5 - a))
|
|
assert_eq(da.minimum(a, 5 - a), np.minimum(a, 5 - a))
|
|
assert_eq(da.fmax(a, 5 - a), np.fmax(a, 5 - a))
|
|
assert_eq(da.fmin(a, 5 - a), np.fmin(a, 5 - a))
|
|
|
|
assert_eq(da.isreal(a + 1j * b), np.isreal(x + 1j * y))
|
|
assert_eq(da.iscomplex(a + 1j * b), np.iscomplex(x + 1j * y))
|
|
assert_eq(da.isfinite(a), np.isfinite(x))
|
|
assert_eq(da.isinf(a), np.isinf(x))
|
|
assert_eq(da.isnan(a), np.isnan(x))
|
|
assert_eq(da.signbit(a - 3), np.signbit(x - 3))
|
|
assert_eq(da.copysign(a - 3, b), np.copysign(x - 3, y))
|
|
assert_eq(da.nextafter(a - 3, b), np.nextafter(x - 3, y))
|
|
with pytest.warns(None): # overflow warning
|
|
assert_eq(da.ldexp(c, c), np.ldexp(z, z))
|
|
assert_eq(da.fmod(a * 12, b), np.fmod(x * 12, y))
|
|
assert_eq(da.floor(a * 0.5), np.floor(x * 0.5))
|
|
assert_eq(da.ceil(a), np.ceil(x))
|
|
assert_eq(da.trunc(a / 2), np.trunc(x / 2))
|
|
|
|
assert_eq(da.degrees(b), np.degrees(y))
|
|
assert_eq(da.radians(a), np.radians(x))
|
|
|
|
assert_eq(da.rint(a + 0.3), np.rint(x + 0.3))
|
|
assert_eq(da.fix(a - 2.5), np.fix(x - 2.5))
|
|
|
|
assert_eq(da.angle(a + 1j), np.angle(x + 1j))
|
|
assert_eq(da.real(a + 1j), np.real(x + 1j))
|
|
assert_eq((a + 1j).real, np.real(x + 1j))
|
|
assert_eq(da.imag(a + 1j), np.imag(x + 1j))
|
|
assert_eq((a + 1j).imag, np.imag(x + 1j))
|
|
assert_eq(da.conj(a + 1j * b), np.conj(x + 1j * y))
|
|
assert_eq((a + 1j * b).conj(), (x + 1j * y).conj())
|
|
|
|
assert_eq(da.clip(b, 1, 4), np.clip(y, 1, 4))
|
|
assert_eq(b.clip(1, 4), y.clip(1, 4))
|
|
assert_eq(da.fabs(b), np.fabs(y))
|
|
assert_eq(da.sign(b - 2), np.sign(y - 2))
|
|
assert_eq(da.absolute(b - 2), np.absolute(y - 2))
|
|
assert_eq(da.absolute(b - 2 + 1j), np.absolute(y - 2 + 1j))
|
|
|
|
l1, l2 = da.frexp(a)
|
|
r1, r2 = np.frexp(x)
|
|
assert_eq(l1, r1)
|
|
assert_eq(l2, r2)
|
|
|
|
l1, l2 = da.modf(a)
|
|
r1, r2 = np.modf(x)
|
|
assert_eq(l1, r1)
|
|
assert_eq(l2, r2)
|
|
|
|
assert_eq(da.around(a, -1), np.around(x, -1))
|
|
|
|
|
|
def test_elemwise_consistent_names():
|
|
a = da.from_array(np.arange(5, dtype='f4'), chunks=(2,))
|
|
b = da.from_array(np.arange(5, dtype='f4'), chunks=(2,))
|
|
assert same_keys(a + b, a + b)
|
|
assert same_keys(a + 2, a + 2)
|
|
assert same_keys(da.exp(a), da.exp(a))
|
|
assert same_keys(da.exp(a, dtype='f8'), da.exp(a, dtype='f8'))
|
|
assert same_keys(da.maximum(a, b), da.maximum(a, b))
|
|
|
|
|
|
def test_optimize():
|
|
x = np.arange(5).astype('f4')
|
|
a = da.from_array(x, chunks=(2,))
|
|
expr = a[1:4] + 1
|
|
result = optimize(expr.dask, expr.__dask_keys__())
|
|
assert isinstance(result, dict)
|
|
assert all(key in result for key in expr.__dask_keys__())
|
|
|
|
|
|
def test_slicing_with_non_ndarrays():
|
|
class ARangeSlice(object):
|
|
def __init__(self, start, stop):
|
|
self.start = start
|
|
self.stop = stop
|
|
|
|
def __array__(self):
|
|
return np.arange(self.start, self.stop)
|
|
|
|
class ARangeSlicable(object):
|
|
dtype = np.dtype('i8')
|
|
|
|
def __init__(self, n):
|
|
self.n = n
|
|
|
|
@property
|
|
def shape(self):
|
|
return (self.n,)
|
|
|
|
def __getitem__(self, key):
|
|
return ARangeSlice(key[0].start, key[0].stop)
|
|
|
|
x = da.from_array(ARangeSlicable(10), chunks=(4,))
|
|
|
|
assert_eq((x + 1).sum(), (np.arange(10, dtype=x.dtype) + 1).sum())
|
|
|
|
|
|
def test_getter():
|
|
with warnings.catch_warnings(record=True):
|
|
assert type(getter(np.matrix([[1]]), 0)) is np.ndarray
|
|
assert type(getter(np.matrix([[1]]), 0, asarray=False)) is np.matrix
|
|
assert_eq(getter([1, 2, 3, 4, 5], slice(1, 4)), np.array([2, 3, 4]))
|
|
|
|
assert_eq(getter(np.arange(5), (None, slice(None, None))),
|
|
np.arange(5)[None, :])
|
|
|
|
|
|
def test_size():
|
|
x = da.ones((10, 2), chunks=(3, 1))
|
|
assert x.size == np.array(x).size
|
|
assert isinstance(x.size, int)
|
|
|
|
|
|
def test_nbytes():
|
|
x = da.ones((10, 2), chunks=(3, 1))
|
|
assert x.nbytes == np.array(x).nbytes
|
|
|
|
|
|
def test_itemsize():
|
|
x = da.ones((10, 2), chunks=(3, 1))
|
|
assert x.itemsize == 8
|
|
|
|
|
|
def test_Array_normalizes_dtype():
|
|
x = da.ones((3,), chunks=(1,), dtype=int)
|
|
assert isinstance(x.dtype, np.dtype)
|
|
|
|
|
|
def test_from_array_with_lock():
|
|
x = np.arange(10)
|
|
d = da.from_array(x, chunks=5, lock=True)
|
|
|
|
tasks = [v for k, v in d.dask.items() if k[0] == d.name]
|
|
|
|
assert hasattr(tasks[0][4], 'acquire')
|
|
assert len(set(task[4] for task in tasks)) == 1
|
|
|
|
assert_eq(d, x)
|
|
|
|
lock = Lock()
|
|
e = da.from_array(x, chunks=5, lock=lock)
|
|
f = da.from_array(x, chunks=5, lock=lock)
|
|
|
|
assert_eq(e + f, x + x)
|
|
|
|
|
|
class MyArray(object):
|
|
def __init__(self, x):
|
|
self.x = x
|
|
self.dtype = x.dtype
|
|
self.shape = x.shape
|
|
self.ndim = len(x.shape)
|
|
|
|
def __getitem__(self, i):
|
|
return self.x[i]
|
|
|
|
|
|
@pytest.mark.parametrize('x,chunks', [
|
|
(np.arange(25).reshape((5, 5)), (5, 5)),
|
|
(np.arange(25).reshape((5, 5)), -1),
|
|
(np.array([[1]]), 1),
|
|
(np.array(1), 1),
|
|
])
|
|
def test_from_array_tasks_always_call_getter(x, chunks):
|
|
dx = da.from_array(MyArray(x), chunks=chunks, asarray=False)
|
|
assert_eq(x, dx)
|
|
|
|
|
|
def test_from_array_ndarray_onechunk():
|
|
"""ndarray with a single chunk produces a minimal single key dict
|
|
"""
|
|
x = np.array([[1, 2], [3, 4]])
|
|
dx = da.from_array(x, chunks=-1)
|
|
assert_eq(x, dx)
|
|
assert len(dx.dask) == 1
|
|
assert dx.dask[dx.name, 0, 0] is x
|
|
|
|
|
|
def test_from_array_ndarray_getitem():
|
|
"""For ndarray, don't use getter / getter_nofancy; use the cleaner
|
|
operator.getitem"""
|
|
x = np.array([[1, 2], [3, 4]])
|
|
dx = da.from_array(x, chunks=(1, 2))
|
|
assert_eq(x, dx)
|
|
assert dx.dask[dx.name, 0, 0][0] == operator.getitem
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
'x', [[1, 2], (1, 2), memoryview(b'abc')] +
|
|
([buffer(b'abc')] if PY2 else [])) # noqa: F821
|
|
def test_from_array_list(x):
|
|
"""Lists, tuples, and memoryviews are automatically converted to ndarray
|
|
"""
|
|
dx = da.from_array(x, chunks=-1)
|
|
assert_eq(np.array(x), dx)
|
|
assert isinstance(dx.dask[dx.name, 0], np.ndarray)
|
|
|
|
dx = da.from_array(x, chunks=1)
|
|
assert_eq(np.array(x), dx)
|
|
assert dx.dask[dx.name, 0][0] == operator.getitem
|
|
assert isinstance(dx.dask[dx.name.replace('array', 'array-original')],
|
|
np.ndarray)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
'type_', [t for t in np.ScalarType if t not in [memoryview] +
|
|
([buffer] if PY2 else [])]) # noqa: F821
|
|
def test_from_array_scalar(type_):
|
|
"""Python and numpy scalars are automatically converted to ndarray
|
|
"""
|
|
if type_ == np.datetime64:
|
|
x = np.datetime64('2000-01-01')
|
|
else:
|
|
x = type_(1)
|
|
|
|
dx = da.from_array(x, chunks=-1)
|
|
assert_eq(np.array(x), dx)
|
|
assert isinstance(dx.dask[dx.name, ], np.ndarray)
|
|
|
|
|
|
@pytest.mark.parametrize('asarray,cls', [
|
|
(True, np.ndarray),
|
|
(False, np.matrix),
|
|
])
|
|
def test_from_array_no_asarray(asarray, cls):
|
|
|
|
def assert_chunks_are_of_type(x):
|
|
chunks = compute_as_if_collection(Array, x.dask, x.__dask_keys__())
|
|
for c in concat(chunks):
|
|
assert type(c) is cls
|
|
|
|
with warnings.catch_warnings(record=True):
|
|
x = np.matrix(np.arange(100).reshape((10, 10)))
|
|
dx = da.from_array(x, chunks=(5, 5), asarray=asarray)
|
|
assert_chunks_are_of_type(dx)
|
|
assert_chunks_are_of_type(dx[0:5])
|
|
assert_chunks_are_of_type(dx[0:5][:, 0])
|
|
|
|
|
|
def test_from_array_getitem():
|
|
x = np.arange(10)
|
|
|
|
def my_getitem(x, ind):
|
|
return x[ind]
|
|
|
|
y = da.from_array(x, chunks=(5,), getitem=my_getitem)
|
|
|
|
for k, v in y.dask.items():
|
|
if isinstance(v, tuple):
|
|
assert v[0] is my_getitem
|
|
|
|
assert_eq(x, y)
|
|
|
|
|
|
def test_from_array_minus_one():
|
|
x = np.arange(10)
|
|
y = da.from_array(x, -1)
|
|
assert y.chunks == ((10,),)
|
|
assert_eq(x, y)
|
|
|
|
|
|
def test_from_array_copy():
|
|
# Regression test for https://github.com/dask/dask/issues/3751
|
|
x = np.arange(10)
|
|
y = da.from_array(x, -1)
|
|
assert y.npartitions == 1
|
|
y_c = y.copy()
|
|
assert y is not y_c
|
|
assert y.compute() is not y_c.compute()
|
|
|
|
|
|
def test_asarray():
|
|
assert_eq(da.asarray([1, 2, 3]), np.asarray([1, 2, 3]))
|
|
|
|
x = da.asarray([1, 2, 3])
|
|
assert da.asarray(x) is x
|
|
|
|
|
|
def test_asarray_dask_dataframe():
|
|
# https://github.com/dask/dask/issues/3885
|
|
dd = pytest.importorskip('dask.dataframe')
|
|
import pandas as pd
|
|
|
|
s = dd.from_pandas(pd.Series([1, 2, 3, 4]), 2)
|
|
result = da.asarray(s)
|
|
expected = s.values
|
|
assert_eq(result, expected)
|
|
|
|
df = s.to_frame(name='s')
|
|
result = da.asarray(df)
|
|
expected = df.values
|
|
assert_eq(result, expected)
|
|
|
|
|
|
def test_asarray_h5py():
|
|
h5py = pytest.importorskip('h5py')
|
|
|
|
with tmpfile('.hdf5') as fn:
|
|
with h5py.File(fn) as f:
|
|
d = f.create_dataset('/x', shape=(2, 2), dtype=float)
|
|
x = da.asarray(d)
|
|
assert d in x.dask.values()
|
|
assert not any(isinstance(v, np.ndarray) for v in x.dask.values())
|
|
|
|
|
|
def test_asanyarray():
|
|
with warnings.catch_warnings(record=True):
|
|
x = np.matrix([1, 2, 3])
|
|
dx = da.asanyarray(x)
|
|
assert dx.numblocks == (1, 1)
|
|
chunks = compute_as_if_collection(Array, dx.dask, dx.__dask_keys__())
|
|
assert isinstance(chunks[0][0], np.matrix)
|
|
assert da.asanyarray(dx) is dx
|
|
|
|
|
|
def test_asanyarray_dataframe():
|
|
pd = pytest.importorskip('pandas')
|
|
dd = pytest.importorskip('dask.dataframe')
|
|
|
|
df = pd.DataFrame({'x': [1, 2, 3]})
|
|
ddf = dd.from_pandas(df, npartitions=2)
|
|
|
|
x = np.asanyarray(df)
|
|
dx = da.asanyarray(ddf)
|
|
assert isinstance(dx, da.Array)
|
|
|
|
assert_eq(x, dx)
|
|
|
|
x = np.asanyarray(df.x)
|
|
dx = da.asanyarray(ddf.x)
|
|
assert isinstance(dx, da.Array)
|
|
|
|
assert_eq(x, dx)
|
|
|
|
|
|
def test_from_func():
|
|
x = np.arange(10)
|
|
f = lambda n: n * x
|
|
d = from_func(f, (10,), x.dtype, kwargs={'n': 2})
|
|
|
|
assert d.shape == x.shape
|
|
assert d.dtype == x.dtype
|
|
assert_eq(d.compute(), 2 * x)
|
|
assert same_keys(d, from_func(f, (10,), x.dtype, kwargs={'n': 2}))
|
|
|
|
|
|
def test_concatenate3_2():
|
|
x = np.array([1, 2])
|
|
assert_eq(concatenate3([x, x, x]),
|
|
np.array([1, 2, 1, 2, 1, 2]))
|
|
|
|
x = np.array([[1, 2]])
|
|
assert (concatenate3([[x, x, x], [x, x, x]]) ==
|
|
np.array([[1, 2, 1, 2, 1, 2],
|
|
[1, 2, 1, 2, 1, 2]])).all()
|
|
|
|
assert (concatenate3([[x, x], [x, x], [x, x]]) ==
|
|
np.array([[1, 2, 1, 2],
|
|
[1, 2, 1, 2],
|
|
[1, 2, 1, 2]])).all()
|
|
|
|
x = np.arange(12).reshape((2, 2, 3))
|
|
assert_eq(concatenate3([[[x, x, x], [x, x, x]],
|
|
[[x, x, x], [x, x, x]]]),
|
|
np.array([[[ 0, 1, 2, 0, 1, 2, 0, 1, 2],
|
|
[ 3, 4, 5, 3, 4, 5, 3, 4, 5],
|
|
[ 0, 1, 2, 0, 1, 2, 0, 1, 2],
|
|
[ 3, 4, 5, 3, 4, 5, 3, 4, 5]],
|
|
|
|
[[ 6, 7, 8, 6, 7, 8, 6, 7, 8],
|
|
[ 9, 10, 11, 9, 10, 11, 9, 10, 11],
|
|
[ 6, 7, 8, 6, 7, 8, 6, 7, 8],
|
|
[ 9, 10, 11, 9, 10, 11, 9, 10, 11]],
|
|
|
|
[[ 0, 1, 2, 0, 1, 2, 0, 1, 2],
|
|
[ 3, 4, 5, 3, 4, 5, 3, 4, 5],
|
|
[ 0, 1, 2, 0, 1, 2, 0, 1, 2],
|
|
[ 3, 4, 5, 3, 4, 5, 3, 4, 5]],
|
|
|
|
[[ 6, 7, 8, 6, 7, 8, 6, 7, 8],
|
|
[ 9, 10, 11, 9, 10, 11, 9, 10, 11],
|
|
[ 6, 7, 8, 6, 7, 8, 6, 7, 8],
|
|
[ 9, 10, 11, 9, 10, 11, 9, 10, 11]]]))
|
|
|
|
|
|
def test_map_blocks3():
|
|
x = np.arange(10)
|
|
y = np.arange(10) * 2
|
|
|
|
d = da.from_array(x, chunks=5)
|
|
e = da.from_array(y, chunks=5)
|
|
|
|
assert_eq(da.core.map_blocks(lambda a, b: a + 2 * b, d, e, dtype=d.dtype),
|
|
x + 2 * y)
|
|
|
|
z = np.arange(100).reshape((10, 10))
|
|
f = da.from_array(z, chunks=5)
|
|
|
|
func = lambda a, b: a + 2 * b
|
|
res = da.core.map_blocks(func, d, f, dtype=d.dtype)
|
|
assert_eq(res, x + 2 * z)
|
|
assert same_keys(da.core.map_blocks(func, d, f, dtype=d.dtype), res)
|
|
|
|
assert_eq(da.map_blocks(func, f, d, dtype=d.dtype), z + 2 * x)
|
|
|
|
|
|
def test_from_array_with_missing_chunks():
|
|
x = np.random.randn(2, 4, 3)
|
|
d = da.from_array(x, chunks=(None, 2, None))
|
|
assert d.chunks == da.from_array(x, chunks=(2, 2, 3)).chunks
|
|
|
|
|
|
def test_normalize_chunks():
|
|
assert normalize_chunks(3, (4, 6)) == ((3, 1), (3, 3))
|
|
assert normalize_chunks(((3, 3), (8,)), (6, 8)) == ((3, 3), (8, ))
|
|
assert normalize_chunks((4, 5), (9,)) == ((4, 5), )
|
|
assert normalize_chunks((4, 5), (9, 9)) == ((4, 4, 1), (5, 4))
|
|
assert normalize_chunks(-1, (5, 5)) == ((5,), (5, ))
|
|
assert normalize_chunks((3, -1), (5, 5)) == ((3, 2), (5, ))
|
|
assert normalize_chunks({0: 3}, (5, 5)) == ((3, 2), (5,))
|
|
assert normalize_chunks([[2, 2], [3, 3]]) == ((2, 2), (3, 3))
|
|
assert normalize_chunks(10, (30, 5)), ((10, 10, 10), (5,))
|
|
assert normalize_chunks((), (0, 0)), ((0,), (0,))
|
|
assert normalize_chunks(-1, (0, 3)), ((0,), (3,))
|
|
assert normalize_chunks("auto", shape=(20,), limit=5, dtype='uint8') == \
|
|
((5, 5, 5, 5),)
|
|
|
|
with pytest.raises(ValueError):
|
|
normalize_chunks(((10,), ), (11, ))
|
|
with pytest.raises(ValueError):
|
|
normalize_chunks(((5, ), (5, )), (5, ))
|
|
|
|
|
|
def test_align_chunks_to_previous_chunks():
|
|
chunks = normalize_chunks('auto',
|
|
shape=(2000,),
|
|
previous_chunks=(512,),
|
|
limit='600 B', dtype=np.uint8)
|
|
assert chunks == ((512, 512, 512, 2000 - 512 * 3),)
|
|
|
|
chunks = normalize_chunks('auto',
|
|
shape=(2000,),
|
|
previous_chunks=(128,),
|
|
limit='600 B', dtype=np.uint8)
|
|
assert chunks == ((512, 512, 512, 2000 - 512 * 3),)
|
|
|
|
chunks = normalize_chunks('auto',
|
|
shape=(2000,),
|
|
previous_chunks=(512,),
|
|
limit='1200 B', dtype=np.uint8)
|
|
assert chunks == ((1024, 2000 - 1024),)
|
|
|
|
chunks = normalize_chunks('auto',
|
|
shape=(3, 10211, 10376),
|
|
previous_chunks=(1, 512, 512),
|
|
limit='1MiB', dtype=np.float32)
|
|
assert chunks[0] == (1, 1, 1)
|
|
assert all(c % 512 == 0 for c in chunks[1][:-1])
|
|
assert all(c % 512 == 0 for c in chunks[2][:-1])
|
|
|
|
|
|
def test_raise_on_no_chunks():
|
|
x = da.ones(6, chunks=3)
|
|
try:
|
|
Array(x.dask, x.name, chunks=None, dtype=x.dtype, shape=None)
|
|
assert False
|
|
except ValueError as e:
|
|
assert "dask" in str(e)
|
|
assert ".org" in str(e)
|
|
|
|
|
|
def test_chunks_is_immutable():
|
|
x = da.ones(6, chunks=3)
|
|
try:
|
|
x.chunks = 2
|
|
assert False
|
|
except TypeError as e:
|
|
assert 'rechunk(2)' in str(e)
|
|
|
|
|
|
def test_raise_on_bad_kwargs():
|
|
x = da.ones(5, chunks=3)
|
|
try:
|
|
da.minimum(x, foo=None)
|
|
except TypeError as e:
|
|
assert 'minimum' in str(e)
|
|
assert 'foo' in str(e)
|
|
|
|
|
|
def test_long_slice():
|
|
x = np.arange(10000)
|
|
d = da.from_array(x, chunks=1)
|
|
|
|
assert_eq(d[8000:8200], x[8000:8200])
|
|
|
|
|
|
def test_h5py_newaxis():
|
|
h5py = pytest.importorskip('h5py')
|
|
|
|
with tmpfile('h5') as fn:
|
|
with h5py.File(fn) as f:
|
|
x = f.create_dataset('/x', shape=(10, 10), dtype='f8')
|
|
d = da.from_array(x, chunks=(5, 5))
|
|
assert d[None, :, :].compute(scheduler='sync').shape == (1, 10, 10)
|
|
assert d[:, None, :].compute(scheduler='sync').shape == (10, 1, 10)
|
|
assert d[:, :, None].compute(scheduler='sync').shape == (10, 10, 1)
|
|
assert same_keys(d[:, :, None], d[:, :, None])
|
|
|
|
|
|
def test_ellipsis_slicing():
|
|
assert_eq(da.ones(4, chunks=2)[...], np.ones(4))
|
|
|
|
|
|
def test_point_slicing():
|
|
x = np.arange(56).reshape((7, 8))
|
|
d = da.from_array(x, chunks=(3, 4))
|
|
|
|
result = d.vindex[[1, 2, 5, 5], [3, 1, 6, 1]]
|
|
assert_eq(result, x[[1, 2, 5, 5], [3, 1, 6, 1]])
|
|
|
|
result = d.vindex[[0, 1, 6, 0], [0, 1, 0, 7]]
|
|
assert_eq(result, x[[0, 1, 6, 0], [0, 1, 0, 7]])
|
|
assert same_keys(result, d.vindex[[0, 1, 6, 0], [0, 1, 0, 7]])
|
|
|
|
|
|
def test_point_slicing_with_full_slice():
|
|
from dask.array.core import _vindex_transpose, _get_axis
|
|
x = np.arange(4 * 5 * 6 * 7).reshape((4, 5, 6, 7))
|
|
d = da.from_array(x, chunks=(2, 3, 3, 4))
|
|
|
|
inds = [[[1, 2, 3], None, [3, 2, 1], [5, 3, 4]],
|
|
[[1, 2, 3], None, [4, 3, 2], None],
|
|
[[1, 2, 3], [3, 2, 1]],
|
|
[[1, 2, 3], [3, 2, 1], [3, 2, 1], [5, 3, 4]],
|
|
[[], [], [], None],
|
|
[np.array([1, 2, 3]), None, np.array([4, 3, 2]), None],
|
|
[None, None, [1, 2, 3], [4, 3, 2]],
|
|
[None, [0, 2, 3], None, [0, 3, 2]]]
|
|
|
|
for ind in inds:
|
|
slc = [i if isinstance(i, (np.ndarray, list)) else slice(None, None)
|
|
for i in ind]
|
|
result = d.vindex[tuple(slc)]
|
|
|
|
# Rotate the expected result accordingly
|
|
axis = _get_axis(ind)
|
|
expected = _vindex_transpose(x[tuple(slc)], axis)
|
|
|
|
assert_eq(result, expected)
|
|
|
|
# Always have the first axis be the length of the points
|
|
k = len(next(i for i in ind if isinstance(i, (np.ndarray, list))))
|
|
assert result.shape[0] == k
|
|
|
|
|
|
def test_slice_with_floats():
|
|
d = da.ones((5,), chunks=(3,))
|
|
with pytest.raises(IndexError):
|
|
d[1.5]
|
|
with pytest.raises(IndexError):
|
|
d[0:1.5]
|
|
with pytest.raises(IndexError):
|
|
d[[1, 1.5]]
|
|
|
|
|
|
def test_slice_with_integer_types():
|
|
x = np.arange(10)
|
|
dx = da.from_array(x, chunks=5)
|
|
inds = np.array([0, 3, 6], dtype='u8')
|
|
assert_eq(dx[inds], x[inds])
|
|
assert_eq(dx[inds.astype('u4')], x[inds.astype('u4')])
|
|
|
|
inds = np.array([0, 3, 6], dtype=np.int64)
|
|
assert_eq(dx[inds], x[inds])
|
|
assert_eq(dx[inds.astype('u4')], x[inds.astype('u4')])
|
|
|
|
|
|
def test_index_with_integer_types():
|
|
x = np.arange(10)
|
|
dx = da.from_array(x, chunks=5)
|
|
inds = int(3)
|
|
assert_eq(dx[inds], x[inds])
|
|
|
|
inds = np.int64(3)
|
|
assert_eq(dx[inds], x[inds])
|
|
|
|
|
|
def test_vindex_basic():
|
|
x = np.arange(56).reshape((7, 8))
|
|
d = da.from_array(x, chunks=(3, 4))
|
|
|
|
# cases where basic and advanced indexing coincide
|
|
result = d.vindex[0]
|
|
assert_eq(result, x[0])
|
|
|
|
result = d.vindex[0, 1]
|
|
assert_eq(result, x[0, 1])
|
|
|
|
result = d.vindex[[0, 1], ::-1] # slices last
|
|
assert_eq(result, x[:2, ::-1])
|
|
|
|
|
|
def test_vindex_nd():
|
|
x = np.arange(56).reshape((7, 8))
|
|
d = da.from_array(x, chunks=(3, 4))
|
|
|
|
result = d.vindex[[[0, 1], [6, 0]], [[0, 1], [0, 7]]]
|
|
assert_eq(result, x[[[0, 1], [6, 0]], [[0, 1], [0, 7]]])
|
|
|
|
result = d.vindex[np.arange(7)[:, None], np.arange(8)[None, :]]
|
|
assert_eq(result, x)
|
|
|
|
result = d.vindex[np.arange(7)[None, :], np.arange(8)[:, None]]
|
|
assert_eq(result, x.T)
|
|
|
|
|
|
def test_vindex_negative():
|
|
x = np.arange(10)
|
|
d = da.from_array(x, chunks=(5, 5))
|
|
|
|
result = d.vindex[np.array([0, -1])]
|
|
assert_eq(result, x[np.array([0, -1])])
|
|
|
|
|
|
def test_vindex_errors():
|
|
d = da.ones((5, 5, 5), chunks=(3, 3, 3))
|
|
pytest.raises(IndexError, lambda: d.vindex[np.newaxis])
|
|
pytest.raises(IndexError, lambda: d.vindex[[1, 2], [1, 2, 3]])
|
|
pytest.raises(IndexError, lambda: d.vindex[[True] * 5])
|
|
pytest.raises(IndexError, lambda: d.vindex[[0], [5]])
|
|
pytest.raises(IndexError, lambda: d.vindex[[0], [-6]])
|
|
|
|
|
|
def test_vindex_merge():
|
|
from dask.array.core import _vindex_merge
|
|
locations = [1], [2, 0]
|
|
values = [np.array([[1, 2, 3]]),
|
|
np.array([[10, 20, 30], [40, 50, 60]])]
|
|
|
|
assert (_vindex_merge(locations, values) == np.array([[40, 50, 60],
|
|
[1, 2, 3],
|
|
[10, 20, 30]])).all()
|
|
|
|
|
|
def test_vindex_identity():
|
|
rng = da.random.RandomState(42)
|
|
a, b = 10, 20
|
|
|
|
x = rng.random(a, chunks=a // 2)
|
|
assert x is x.vindex[:]
|
|
assert x is x.vindex[:a]
|
|
pytest.raises(IndexError, lambda: x.vindex[:a - 1])
|
|
pytest.raises(IndexError, lambda: x.vindex[1:])
|
|
pytest.raises(IndexError, lambda: x.vindex[0:a:2])
|
|
|
|
x = rng.random((a, b), chunks=(a // 2, b // 2))
|
|
assert x is x.vindex[:, :]
|
|
assert x is x.vindex[:a, :b]
|
|
pytest.raises(IndexError, lambda: x.vindex[:, :b - 1])
|
|
pytest.raises(IndexError, lambda: x.vindex[:, 1:])
|
|
pytest.raises(IndexError, lambda: x.vindex[:, 0:b:2])
|
|
|
|
|
|
def test_empty_array():
|
|
assert_eq(np.arange(0), da.arange(0, chunks=5))
|
|
|
|
|
|
def test_memmap():
|
|
with tmpfile('npy') as fn_1:
|
|
with tmpfile('npy') as fn_2:
|
|
try:
|
|
x = da.arange(100, chunks=15)
|
|
target = np.memmap(fn_1, shape=x.shape, mode='w+', dtype=x.dtype)
|
|
|
|
x.store(target)
|
|
|
|
assert_eq(target, x)
|
|
|
|
np.save(fn_2, target)
|
|
|
|
assert_eq(np.load(fn_2, mmap_mode='r'), x)
|
|
finally:
|
|
target._mmap.close()
|
|
|
|
|
|
def test_to_npy_stack():
|
|
x = np.arange(5 * 10 * 10).reshape((5, 10, 10))
|
|
d = da.from_array(x, chunks=(2, 4, 4))
|
|
|
|
with tmpdir() as dirname:
|
|
stackdir = os.path.join(dirname, 'test')
|
|
da.to_npy_stack(stackdir, d, axis=0)
|
|
assert os.path.exists(os.path.join(stackdir, '0.npy'))
|
|
assert (np.load(os.path.join(stackdir, '1.npy')) == x[2:4]).all()
|
|
|
|
e = da.from_npy_stack(stackdir)
|
|
assert_eq(d, e)
|
|
|
|
|
|
def test_view():
|
|
x = np.arange(56).reshape((7, 8))
|
|
d = da.from_array(x, chunks=(2, 3))
|
|
|
|
assert_eq(x.view('i4'), d.view('i4'))
|
|
assert_eq(x.view('i2'), d.view('i2'))
|
|
assert all(isinstance(s, int) for s in d.shape)
|
|
|
|
x = np.arange(8, dtype='i1')
|
|
d = da.from_array(x, chunks=(4,))
|
|
assert_eq(x.view('i4'), d.view('i4'))
|
|
|
|
with pytest.raises(ValueError):
|
|
x = np.arange(8, dtype='i1')
|
|
d = da.from_array(x, chunks=(3,))
|
|
d.view('i4')
|
|
|
|
with pytest.raises(ValueError):
|
|
d.view('i4', order='asdf')
|
|
|
|
|
|
def test_view_fortran():
|
|
x = np.asfortranarray(np.arange(64).reshape((8, 8)))
|
|
d = da.from_array(x, chunks=(2, 3))
|
|
assert_eq(x.T.view('i4').T, d.view('i4', order='F'))
|
|
assert_eq(x.T.view('i2').T, d.view('i2', order='F'))
|
|
|
|
|
|
def test_h5py_tokenize():
|
|
h5py = pytest.importorskip('h5py')
|
|
with tmpfile('hdf5') as fn1:
|
|
with tmpfile('hdf5') as fn2:
|
|
f = h5py.File(fn1)
|
|
g = h5py.File(fn2)
|
|
|
|
f['x'] = np.arange(10).astype(float)
|
|
g['x'] = np.ones(10).astype(float)
|
|
|
|
x1 = f['x']
|
|
x2 = g['x']
|
|
|
|
assert tokenize(x1) != tokenize(x2)
|
|
|
|
|
|
def test_map_blocks_with_changed_dimension():
|
|
x = np.arange(56).reshape((7, 8))
|
|
d = da.from_array(x, chunks=(7, 4))
|
|
|
|
e = d.map_blocks(lambda b: b.sum(axis=0), chunks=(4,), drop_axis=0,
|
|
dtype=d.dtype)
|
|
assert e.chunks == ((4, 4),)
|
|
assert_eq(e, x.sum(axis=0))
|
|
|
|
# Provided chunks have wrong shape
|
|
with pytest.raises(ValueError):
|
|
d.map_blocks(lambda b: b.sum(axis=0), chunks=(7, 4), drop_axis=0)
|
|
|
|
with pytest.raises(ValueError):
|
|
d.map_blocks(lambda b: b.sum(axis=0), chunks=((4, 4, 4),), drop_axis=0)
|
|
|
|
d = da.from_array(x, chunks=(4, 8))
|
|
e = d.map_blocks(lambda b: b.sum(axis=1), drop_axis=1, dtype=d.dtype)
|
|
assert e.chunks == ((4, 3),)
|
|
assert_eq(e, x.sum(axis=1))
|
|
|
|
x = np.arange(64).reshape((8, 8))
|
|
d = da.from_array(x, chunks=(4, 4))
|
|
e = d.map_blocks(lambda b: b[None, :, :, None],
|
|
chunks=(1, 4, 4, 1), new_axis=[0, 3], dtype=d.dtype)
|
|
assert e.chunks == ((1,), (4, 4), (4, 4), (1,))
|
|
assert_eq(e, x[None, :, :, None])
|
|
|
|
e = d.map_blocks(lambda b: b[None, :, :, None],
|
|
new_axis=[0, 3], dtype=d.dtype)
|
|
assert e.chunks == ((1,), (4, 4), (4, 4), (1,))
|
|
assert_eq(e, x[None, :, :, None])
|
|
|
|
# Adding axis with a gap
|
|
with pytest.raises(ValueError):
|
|
d.map_blocks(lambda b: b, new_axis=(3, 4))
|
|
|
|
# Both new_axis and drop_axis
|
|
d = da.from_array(x, chunks=(8, 4))
|
|
e = d.map_blocks(lambda b: b.sum(axis=0)[:, None, None],
|
|
drop_axis=0, new_axis=(1, 2), dtype=d.dtype)
|
|
assert e.chunks == ((4, 4), (1,), (1,))
|
|
assert_eq(e, x.sum(axis=0)[:, None, None])
|
|
|
|
d = da.from_array(x, chunks=(4, 8))
|
|
e = d.map_blocks(lambda b: b.sum(axis=1)[:, None, None],
|
|
drop_axis=1, new_axis=(1, 2), dtype=d.dtype)
|
|
assert e.chunks == ((4, 4), (1,), (1,))
|
|
assert_eq(e, x.sum(axis=1)[:, None, None])
|
|
|
|
|
|
def test_map_blocks_with_changed_dimension_and_broadcast_chunks():
|
|
# https://github.com/dask/dask/issues/4299
|
|
a = da.from_array([1, 2, 3], 3)
|
|
b = da.from_array(np.array([0, 1, 2, 0, 1, 2]), chunks=3)
|
|
result = da.map_blocks(operator.add, a, b, chunks=b.chunks)
|
|
expected = da.from_array(np.array([1, 3, 5, 1, 3, 5]), chunks=3)
|
|
assert_eq(result, expected)
|
|
|
|
|
|
def test_broadcast_chunks():
|
|
assert broadcast_chunks() == ()
|
|
|
|
assert broadcast_chunks(((2, 3),)) == ((2, 3),)
|
|
|
|
assert broadcast_chunks(((5, 5),), ((5, 5),)) == ((5, 5),)
|
|
|
|
a = ((10, 10, 10), (5, 5),)
|
|
b = ((5, 5),)
|
|
assert broadcast_chunks(a, b) == ((10, 10, 10), (5, 5),)
|
|
assert broadcast_chunks(b, a) == ((10, 10, 10), (5, 5),)
|
|
|
|
a = ((10, 10, 10), (5, 5),)
|
|
b = ((1,), (5, 5),)
|
|
assert broadcast_chunks(a, b) == ((10, 10, 10), (5, 5),)
|
|
|
|
a = ((10, 10, 10), (5, 5),)
|
|
b = ((3, 3,), (5, 5),)
|
|
with pytest.raises(ValueError):
|
|
broadcast_chunks(a, b)
|
|
|
|
a = ((1,), (5, 5),)
|
|
b = ((1,), (5, 5),)
|
|
assert broadcast_chunks(a, b) == a
|
|
|
|
a = ((1,), (np.nan, np.nan, np.nan),)
|
|
b = ((3, 3), (1,),)
|
|
r = broadcast_chunks(a, b)
|
|
assert r[0] == b[0] and np.allclose(r[1], a[1], equal_nan=True)
|
|
|
|
a = ((3, 3), (1,),)
|
|
b = ((1,), (np.nan, np.nan, np.nan),)
|
|
r = broadcast_chunks(a, b)
|
|
assert r[0] == a[0] and np.allclose(r[1], b[1], equal_nan=True)
|
|
|
|
a = ((3, 3,), (5, 5),)
|
|
b = ((1,), (np.nan, np.nan, np.nan),)
|
|
with pytest.raises(ValueError):
|
|
broadcast_chunks(a, b)
|
|
|
|
|
|
def test_chunks_error():
|
|
x = np.ones((10, 10))
|
|
with pytest.raises(ValueError):
|
|
da.from_array(x, chunks=(5,))
|
|
|
|
|
|
def test_array_compute_forward_kwargs():
|
|
x = da.arange(10, chunks=2).sum()
|
|
x.compute(bogus_keyword=10)
|
|
|
|
|
|
def test_dont_fuse_outputs():
|
|
dsk = {('x', 0): np.array([1, 2]),
|
|
('x', 1): (inc, ('x', 0))}
|
|
|
|
a = da.Array(dsk, 'x', chunks=(2,), shape=(4,), dtype=np.array([1]).dtype)
|
|
assert_eq(a, np.array([1, 2, 2, 3], dtype=a.dtype))
|
|
|
|
|
|
def test_dont_dealias_outputs():
|
|
dsk = {('x', 0, 0): np.ones((2, 2)),
|
|
('x', 0, 1): np.ones((2, 2)),
|
|
('x', 1, 0): np.ones((2, 2)),
|
|
('x', 1, 1): ('x', 0, 0)}
|
|
|
|
a = da.Array(dsk, 'x', chunks=(2, 2), shape=(4, 4), dtype=np.ones(1).dtype)
|
|
assert_eq(a, np.ones((4, 4)))
|
|
|
|
|
|
def test_timedelta_op():
|
|
x = np.array([np.timedelta64(10, 'h')])
|
|
y = np.timedelta64(1, 'h')
|
|
a = da.from_array(x, chunks=(1,)) / y
|
|
assert a.compute() == x / y
|
|
|
|
|
|
def test_to_delayed():
|
|
x = da.random.random((4, 4), chunks=(2, 2))
|
|
y = x + 10
|
|
|
|
[[a, b], [c, d]] = y.to_delayed()
|
|
assert_eq(a.compute(), y[:2, :2])
|
|
|
|
s = 2
|
|
x = da.from_array(np.array(s), chunks=0)
|
|
a = x.to_delayed()[tuple()]
|
|
assert a.compute() == s
|
|
|
|
|
|
def test_to_delayed_optimize_graph():
|
|
x = da.ones((4, 4), chunks=(2, 2))
|
|
y = x[1:][1:][1:][:, 1:][:, 1:][:, 1:]
|
|
|
|
# optimizations
|
|
d = y.to_delayed().flatten().tolist()[0]
|
|
assert len([k for k in d.dask if k[0].startswith('getitem')]) == 1
|
|
|
|
# no optimizations
|
|
d2 = y.to_delayed(optimize_graph=False).flatten().tolist()[0]
|
|
assert dict(d2.dask) == dict(y.dask)
|
|
|
|
assert (d.compute() == d2.compute()).all()
|
|
|
|
|
|
def test_cumulative():
|
|
x = da.arange(20, chunks=5)
|
|
assert_eq(x.cumsum(axis=0), np.arange(20).cumsum())
|
|
assert_eq(x.cumprod(axis=0), np.arange(20).cumprod())
|
|
|
|
assert_eq(da.nancumsum(x, axis=0), nancumsum(np.arange(20)))
|
|
assert_eq(da.nancumprod(x, axis=0), nancumprod(np.arange(20)))
|
|
|
|
a = np.random.random((20))
|
|
rs = np.random.RandomState(0)
|
|
a[rs.rand(*a.shape) < 0.5] = np.nan
|
|
x = da.from_array(a, chunks=5)
|
|
assert_eq(da.nancumsum(x, axis=0), nancumsum(a))
|
|
assert_eq(da.nancumprod(x, axis=0), nancumprod(a))
|
|
|
|
a = np.random.random((20, 24))
|
|
x = da.from_array(a, chunks=(6, 5))
|
|
assert_eq(x.cumsum(axis=0), a.cumsum(axis=0))
|
|
assert_eq(x.cumsum(axis=1), a.cumsum(axis=1))
|
|
assert_eq(x.cumprod(axis=0), a.cumprod(axis=0))
|
|
assert_eq(x.cumprod(axis=1), a.cumprod(axis=1))
|
|
|
|
assert_eq(da.nancumsum(x, axis=0), nancumsum(a, axis=0))
|
|
assert_eq(da.nancumsum(x, axis=1), nancumsum(a, axis=1))
|
|
assert_eq(da.nancumprod(x, axis=0), nancumprod(a, axis=0))
|
|
assert_eq(da.nancumprod(x, axis=1), nancumprod(a, axis=1))
|
|
|
|
a = np.random.random((20, 24))
|
|
rs = np.random.RandomState(0)
|
|
a[rs.rand(*a.shape) < 0.5] = np.nan
|
|
x = da.from_array(a, chunks=(6, 5))
|
|
assert_eq(da.nancumsum(x, axis=0), nancumsum(a, axis=0))
|
|
assert_eq(da.nancumsum(x, axis=1), nancumsum(a, axis=1))
|
|
assert_eq(da.nancumprod(x, axis=0), nancumprod(a, axis=0))
|
|
assert_eq(da.nancumprod(x, axis=1), nancumprod(a, axis=1))
|
|
|
|
a = np.random.random((20, 24, 13))
|
|
x = da.from_array(a, chunks=(6, 5, 4))
|
|
for axis in [0, 1, 2, -1, -2, -3]:
|
|
assert_eq(x.cumsum(axis=axis), a.cumsum(axis=axis))
|
|
assert_eq(x.cumprod(axis=axis), a.cumprod(axis=axis))
|
|
|
|
assert_eq(da.nancumsum(x, axis=axis), nancumsum(a, axis=axis))
|
|
assert_eq(da.nancumprod(x, axis=axis), nancumprod(a, axis=axis))
|
|
|
|
a = np.random.random((20, 24, 13))
|
|
rs = np.random.RandomState(0)
|
|
a[rs.rand(*a.shape) < 0.5] = np.nan
|
|
x = da.from_array(a, chunks=(6, 5, 4))
|
|
for axis in [0, 1, 2, -1, -2, -3]:
|
|
assert_eq(da.nancumsum(x, axis=axis), nancumsum(a, axis=axis))
|
|
assert_eq(da.nancumprod(x, axis=axis), nancumprod(a, axis=axis))
|
|
|
|
with pytest.raises(ValueError):
|
|
x.cumsum(axis=3)
|
|
|
|
with pytest.raises(ValueError):
|
|
x.cumsum(axis=-4)
|
|
|
|
|
|
def test_from_delayed():
|
|
v = delayed(np.ones)((5, 3))
|
|
x = from_delayed(v, shape=(5, 3), dtype=np.ones(0).dtype)
|
|
assert isinstance(x, Array)
|
|
assert_eq(x, np.ones((5, 3)))
|
|
|
|
|
|
def test_A_property():
|
|
x = da.ones(5, chunks=(2,))
|
|
assert x.A is x
|
|
|
|
|
|
def test_copy_mutate():
|
|
x = da.arange(5, chunks=(2,))
|
|
y = x.copy()
|
|
memo = {}
|
|
y2 = copy.deepcopy(x, memo=memo)
|
|
x[x % 2 == 0] = -1
|
|
|
|
xx = np.arange(5)
|
|
xx[xx % 2 == 0] = -1
|
|
assert_eq(x, xx)
|
|
|
|
assert_eq(y, np.arange(5))
|
|
assert_eq(y2, np.arange(5))
|
|
assert memo[id(x)] is y2
|
|
|
|
|
|
def test_npartitions():
|
|
assert da.ones(5, chunks=(2,)).npartitions == 3
|
|
assert da.ones((5, 5), chunks=(2, 3)).npartitions == 6
|
|
|
|
|
|
def test_astype_gh1151():
|
|
a = np.arange(5).astype(np.int32)
|
|
b = da.from_array(a, (1,))
|
|
assert_eq(a.astype(np.int16), b.astype(np.int16))
|
|
|
|
|
|
def test_elemwise_name():
|
|
assert (da.ones(5, chunks=2) + 1).name.startswith('add-')
|
|
|
|
|
|
def test_map_blocks_name():
|
|
assert da.ones(5, chunks=2).map_blocks(inc).name.startswith('inc-')
|
|
|
|
|
|
def test_from_array_names():
|
|
pytest.importorskip('distributed')
|
|
|
|
x = np.ones(10)
|
|
d = da.from_array(x, chunks=2)
|
|
|
|
names = countby(key_split, d.dask)
|
|
assert set(names.values()) == set([1, 5])
|
|
|
|
|
|
def test_array_picklable():
|
|
from pickle import loads, dumps
|
|
|
|
a = da.arange(100, chunks=25)
|
|
a2 = loads(dumps(a))
|
|
assert_eq(a, a2)
|
|
|
|
|
|
def test_from_array_raises_on_bad_chunks():
|
|
x = np.ones(10)
|
|
|
|
with pytest.raises(ValueError):
|
|
da.from_array(x, chunks=(5, 5, 5))
|
|
|
|
# with pytest.raises(ValueError):
|
|
# da.from_array(x, chunks=100)
|
|
|
|
with pytest.raises(ValueError):
|
|
da.from_array(x, chunks=((5, 5, 5),))
|
|
|
|
|
|
def test_concatenate_axes():
|
|
x = np.ones((2, 2, 2))
|
|
|
|
assert_eq(concatenate_axes([x, x], axes=[0]),
|
|
np.ones((4, 2, 2)))
|
|
assert_eq(concatenate_axes([x, x, x], axes=[0]),
|
|
np.ones((6, 2, 2)))
|
|
assert_eq(concatenate_axes([x, x], axes=[1]),
|
|
np.ones((2, 4, 2)))
|
|
assert_eq(concatenate_axes([[x, x], [x, x]], axes=[0, 1]),
|
|
np.ones((4, 4, 2)))
|
|
assert_eq(concatenate_axes([[x, x], [x, x]], axes=[0, 2]),
|
|
np.ones((4, 2, 4)))
|
|
assert_eq(concatenate_axes([[x, x, x], [x, x, x]], axes=[1, 2]),
|
|
np.ones((2, 4, 6)))
|
|
|
|
with pytest.raises(ValueError):
|
|
concatenate_axes([[x, x], [x, x]], axes=[0]) # not all nested lists accounted for
|
|
with pytest.raises(ValueError):
|
|
concatenate_axes([x, x], axes=[0, 1, 2, 3]) # too many axes
|
|
|
|
|
|
def test_blockwise_concatenate():
|
|
x = da.ones((4, 4, 4), chunks=(2, 2, 2))
|
|
y = da.ones((4, 4), chunks=(2, 2))
|
|
|
|
def f(a, b):
|
|
assert isinstance(a, np.ndarray)
|
|
assert isinstance(b, np.ndarray)
|
|
|
|
assert a.shape == (2, 4, 4)
|
|
assert b.shape == (4, 4)
|
|
|
|
return (a + b).sum(axis=(1, 2))
|
|
|
|
z = da.blockwise(f, 'i', x, 'ijk', y, 'jk', concatenate=True, dtype=x.dtype)
|
|
assert_eq(z, np.ones(4) * 32)
|
|
|
|
z = da.blockwise(add, 'ij', y, 'ij', y, 'ij', concatenate=True, dtype=x.dtype)
|
|
assert_eq(z, np.ones((4, 4)) * 2)
|
|
|
|
def f(a, b, c):
|
|
assert isinstance(a, np.ndarray)
|
|
assert isinstance(b, np.ndarray)
|
|
assert isinstance(c, np.ndarray)
|
|
|
|
assert a.shape == (4, 2, 4)
|
|
assert b.shape == (4, 4)
|
|
assert c.shape == (4, 2)
|
|
|
|
return np.ones(5)
|
|
|
|
z = da.blockwise(f, 'j', x, 'ijk', y, 'ki', y, 'ij', concatenate=True, dtype=x.dtype)
|
|
assert_eq(z, np.ones(10), check_shape=False)
|
|
|
|
|
|
def test_common_blockdim():
|
|
assert common_blockdim([(5,), (5,)]) == (5,)
|
|
assert common_blockdim([(5,), (2, 3,)]) == (2, 3)
|
|
assert common_blockdim([(5, 5), (2, 3, 5)]) == (2, 3, 5)
|
|
assert common_blockdim([(5, 5), (2, 3, 5)]) == (2, 3, 5)
|
|
assert common_blockdim([(5, 2, 3), (2, 3, 5)]) == (2, 3, 2, 3)
|
|
|
|
assert common_blockdim([(1, 2), (2, 1)]) == (1, 1, 1)
|
|
assert common_blockdim([(1, 2, 2), (2, 1, 2), (2, 2, 1)]) == (1, 1, 1, 1, 1)
|
|
|
|
|
|
def test_uneven_chunks_that_fit_neatly():
|
|
x = da.arange(10, chunks=((5, 5),))
|
|
y = da.ones(10, chunks=((5, 2, 3),))
|
|
|
|
assert_eq(x + y, np.arange(10) + np.ones(10))
|
|
|
|
z = x + y
|
|
assert z.chunks == ((5, 2, 3),)
|
|
|
|
|
|
def test_elemwise_uneven_chunks():
|
|
x = da.arange(10, chunks=((4, 6),))
|
|
y = da.ones(10, chunks=((6, 4),))
|
|
|
|
assert_eq(x + y, np.arange(10) + np.ones(10))
|
|
|
|
z = x + y
|
|
assert z.chunks == ((4, 2, 4),)
|
|
|
|
x = da.random.random((10, 10), chunks=((4, 6), (5, 2, 3)))
|
|
y = da.random.random((4, 10, 10), chunks=((2, 2), (6, 4), (2, 3, 5)))
|
|
|
|
z = x + y
|
|
assert_eq(x + y, x.compute() + y.compute())
|
|
assert z.chunks == ((2, 2), (4, 2, 4), (2, 3, 2, 3))
|
|
|
|
|
|
def test_uneven_chunks_blockwise():
|
|
x = da.random.random((10, 10), chunks=((2, 3, 2, 3), (5, 5)))
|
|
y = da.random.random((10, 10), chunks=((4, 4, 2), (4, 2, 4)))
|
|
z = da.blockwise(np.dot, 'ik', x, 'ij', y, 'jk', dtype=x.dtype, concatenate=True)
|
|
assert z.chunks == (x.chunks[0], y.chunks[1])
|
|
|
|
assert_eq(z, x.compute().dot(y))
|
|
|
|
|
|
def test_warn_bad_rechunking():
|
|
x = da.ones((20, 20), chunks=(20, 1))
|
|
y = da.ones((20, 20), chunks=(1, 20))
|
|
|
|
with warnings.catch_warnings(record=True) as record:
|
|
x + y
|
|
|
|
assert record
|
|
assert '20' in record[0].message.args[0]
|
|
|
|
|
|
def test_concatenate_stack_dont_warn():
|
|
with warnings.catch_warnings(record=True) as record:
|
|
da.concatenate([da.ones(2, chunks=1)] * 62)
|
|
assert not record
|
|
|
|
with warnings.catch_warnings(record=True) as record:
|
|
da.stack([da.ones(2, chunks=1)] * 62)
|
|
assert not record
|
|
|
|
|
|
def test_map_blocks_delayed():
|
|
x = da.ones((10, 10), chunks=(5, 5))
|
|
y = np.ones((5, 5))
|
|
|
|
z = x.map_blocks(add, y, dtype=x.dtype)
|
|
|
|
yy = delayed(y)
|
|
zz = x.map_blocks(add, yy, dtype=x.dtype)
|
|
|
|
assert_eq(z, zz)
|
|
|
|
assert yy.key in zz.dask
|
|
|
|
|
|
def test_no_chunks():
|
|
X = np.arange(11)
|
|
dsk = {('x', 0): np.arange(5), ('x', 1): np.arange(5, 11)}
|
|
x = Array(dsk, 'x', ((np.nan, np.nan,),), np.arange(1).dtype)
|
|
assert_eq(x + 1, X + 1)
|
|
assert_eq(x.sum(), X.sum())
|
|
assert_eq((x + 1).std(), (X + 1).std())
|
|
assert_eq((x + x).std(), (X + X).std())
|
|
assert_eq((x + x).std(keepdims=True), (X + X).std(keepdims=True))
|
|
|
|
|
|
def test_no_chunks_2d():
|
|
X = np.arange(24).reshape((4, 6))
|
|
x = da.from_array(X, chunks=(2, 2))
|
|
x._chunks = ((np.nan, np.nan), (np.nan, np.nan, np.nan))
|
|
|
|
with pytest.warns(None): # zero division warning
|
|
assert_eq(da.log(x), np.log(X))
|
|
assert_eq(x.T, X.T)
|
|
assert_eq(x.sum(axis=0, keepdims=True), X.sum(axis=0, keepdims=True))
|
|
assert_eq(x.sum(axis=1, keepdims=True), X.sum(axis=1, keepdims=True))
|
|
assert_eq(x.dot(x.T + 1), X.dot(X.T + 1))
|
|
|
|
|
|
def test_no_chunks_yes_chunks():
|
|
X = np.arange(24).reshape((4, 6))
|
|
x = da.from_array(X, chunks=(2, 2))
|
|
x._chunks = ((2, 2), (np.nan, np.nan, np.nan))
|
|
|
|
assert (x + 1).chunks == ((2, 2), (np.nan, np.nan, np.nan))
|
|
assert (x.T).chunks == ((np.nan, np.nan, np.nan), (2, 2))
|
|
assert (x.dot(x.T)).chunks == ((2, 2), (2, 2))
|
|
|
|
|
|
def test_raise_informative_errors_no_chunks():
|
|
X = np.arange(10)
|
|
a = da.from_array(X, chunks=(5, 5))
|
|
a._chunks = ((np.nan, np.nan),)
|
|
|
|
b = da.from_array(X, chunks=(4, 4, 2))
|
|
b._chunks = ((np.nan, np.nan, np.nan),)
|
|
|
|
for op in [lambda: a + b,
|
|
lambda: a[1],
|
|
lambda: a[::2],
|
|
lambda: a[-5],
|
|
lambda: a.rechunk(3),
|
|
lambda: a.reshape(2, 5)]:
|
|
with pytest.raises(ValueError) as e:
|
|
op()
|
|
if 'chunk' not in str(e) or 'unknown' not in str(e):
|
|
op()
|
|
|
|
|
|
def test_no_chunks_slicing_2d():
|
|
X = np.arange(24).reshape((4, 6))
|
|
x = da.from_array(X, chunks=(2, 2))
|
|
x._chunks = ((2, 2), (np.nan, np.nan, np.nan))
|
|
|
|
assert_eq(x[0], X[0])
|
|
|
|
for op in [lambda: x[:, 4],
|
|
lambda: x[:, ::2],
|
|
lambda: x[0, 2:4]]:
|
|
with pytest.raises(ValueError) as e:
|
|
op()
|
|
assert 'chunk' in str(e) and 'unknown' in str(e)
|
|
|
|
|
|
def test_index_array_with_array_1d():
|
|
x = np.arange(10)
|
|
dx = da.from_array(x, chunks=(5,))
|
|
dx._chunks = ((np.nan, np.nan),)
|
|
|
|
assert_eq(x[x > 6], dx[dx > 6])
|
|
assert_eq(x[x % 2 == 0], dx[dx % 2 == 0])
|
|
|
|
dy = da.ones(11, chunks=(3,))
|
|
|
|
with pytest.raises(ValueError):
|
|
dx[dy > 5]
|
|
|
|
|
|
def test_index_array_with_array_2d():
|
|
x = np.arange(24).reshape((4, 6))
|
|
dx = da.from_array(x, chunks=(2, 2))
|
|
dx._chunks = ((2, 2), (np.nan, np.nan, np.nan))
|
|
|
|
assert (sorted(x[x % 2 == 0].tolist()) ==
|
|
sorted(dx[dx % 2 == 0].compute().tolist()))
|
|
assert (sorted(x[x > 6].tolist()) ==
|
|
sorted(dx[dx > 6].compute().tolist()))
|
|
|
|
|
|
@pytest.mark.xfail(reason='Chunking does not align well')
|
|
def test_index_array_with_array_3d_2d():
|
|
x = np.arange(4**3).reshape((4, 4, 4))
|
|
dx = da.from_array(x, chunks=(2, 2, 2))
|
|
|
|
ind = np.random.random((4, 4)) > 0.5
|
|
ind = np.arange(4 ** 2).reshape((4, 4)) % 2 == 0
|
|
dind = da.from_array(ind, (2, 2))
|
|
|
|
assert_eq(x[ind], dx[dind])
|
|
assert_eq(x[:, ind], dx[:, dind])
|
|
|
|
|
|
def test_setitem_1d():
|
|
x = np.arange(10)
|
|
dx = da.from_array(x.copy(), chunks=(5,))
|
|
|
|
x[x > 6] = -1
|
|
x[x % 2 == 0] = -2
|
|
|
|
dx[dx > 6] = -1
|
|
dx[dx % 2 == 0] = -2
|
|
|
|
assert_eq(x, dx)
|
|
|
|
|
|
def test_setitem_2d():
|
|
x = np.arange(24).reshape((4, 6))
|
|
dx = da.from_array(x.copy(), chunks=(2, 2))
|
|
|
|
x[x > 6] = -1
|
|
x[x % 2 == 0] = -2
|
|
|
|
dx[dx > 6] = -1
|
|
dx[dx % 2 == 0] = -2
|
|
|
|
assert_eq(x, dx)
|
|
|
|
|
|
@pytest.mark.skipif(np.__version__ >= '1.13.0',
|
|
reason='boolean slicing rules changed')
|
|
def test_setitem_mixed_d():
|
|
x = np.arange(24).reshape((4, 6))
|
|
dx = da.from_array(x, chunks=(2, 2))
|
|
|
|
x[x[0, None] > 2] = -1
|
|
dx[dx[0, None] > 2] = -1
|
|
assert_eq(x, dx)
|
|
|
|
x[x[None, 0] > 2] = -1
|
|
dx[dx[None, 0] > 2] = -1
|
|
assert_eq(x, dx)
|
|
|
|
|
|
def test_setitem_errs():
|
|
x = da.ones((4, 4), chunks=(2, 2))
|
|
|
|
with pytest.raises(ValueError):
|
|
x[x > 1] = x
|
|
|
|
|
|
def test_zero_slice_dtypes():
|
|
x = da.arange(5, chunks=1)
|
|
y = x[[]]
|
|
assert y.dtype == x.dtype
|
|
assert y.shape == (0,)
|
|
assert_eq(x[[]], np.arange(5)[[]])
|
|
|
|
|
|
def test_zero_sized_array_rechunk():
|
|
x = da.arange(5, chunks=1)[:0]
|
|
y = da.blockwise(identity, 'i', x, 'i', dtype=x.dtype)
|
|
assert_eq(x, y)
|
|
|
|
|
|
def test_blockwise_zero_shape():
|
|
da.blockwise(
|
|
lambda x: x, 'i',
|
|
da.arange(10, chunks=10), 'i',
|
|
da.from_array(np.ones((0, 2)), ((0,), 2)), 'ab',
|
|
da.from_array(np.ones((0,)), ((0,),)), 'a',
|
|
dtype='float64'
|
|
)
|
|
|
|
|
|
def test_blockwise_zero_shape_new_axes():
|
|
da.blockwise(
|
|
lambda x: np.ones(42), 'i',
|
|
da.from_array(np.ones((0, 2)), ((0,), 2)), 'ab',
|
|
da.from_array(np.ones((0,)), ((0,),)), 'a',
|
|
dtype='float64', new_axes={'i': 42}
|
|
)
|
|
|
|
|
|
def test_broadcast_against_zero_shape():
|
|
assert_eq(da.arange(1, chunks=1)[:0] + 0,
|
|
np.arange(1)[:0] + 0)
|
|
assert_eq(da.arange(1, chunks=1)[:0] + 0.1,
|
|
np.arange(1)[:0] + 0.1)
|
|
assert_eq(da.ones((5, 5), chunks=(2, 3))[:0] + 0,
|
|
np.ones((5, 5))[:0] + 0)
|
|
assert_eq(da.ones((5, 5), chunks=(2, 3))[:0] + 0.1,
|
|
np.ones((5, 5))[:0] + 0.1)
|
|
assert_eq(da.ones((5, 5), chunks=(2, 3))[:, :0] + 0,
|
|
np.ones((5, 5))[:, :0] + 0)
|
|
assert_eq(da.ones((5, 5), chunks=(2, 3))[:, :0] + 0.1,
|
|
np.ones((5, 5))[:, :0] + 0.1)
|
|
|
|
|
|
def test_from_array_name():
|
|
x = np.array([1, 2, 3, 4, 5])
|
|
chunks = x.shape
|
|
# Default is tokenize the array
|
|
dx = da.from_array(x, chunks=chunks)
|
|
hashed_name = dx.name
|
|
assert da.from_array(x, chunks=chunks).name == hashed_name
|
|
# Specify name directly
|
|
assert da.from_array(x, chunks=chunks, name='x').name == 'x'
|
|
# False gives a random name
|
|
dx2 = da.from_array(x, chunks=chunks, name=False)
|
|
dx3 = da.from_array(x, chunks=chunks, name=False)
|
|
assert dx2.name != hashed_name
|
|
assert dx3.name != hashed_name
|
|
assert dx2.name != dx3.name
|
|
|
|
|
|
def test_concatenate_errs():
|
|
with pytest.raises(ValueError) as e:
|
|
da.concatenate([da.zeros((2, 1), chunks=(2, 1)),
|
|
da.zeros((2, 3), chunks=(2, 3))])
|
|
|
|
assert 'shape' in str(e).lower()
|
|
assert '(2, 1)' in str(e)
|
|
|
|
with pytest.raises(ValueError):
|
|
da.concatenate([da.zeros((1, 2), chunks=(1, 2)),
|
|
da.zeros((3, 2), chunks=(3, 2))], axis=1)
|
|
|
|
|
|
def test_stack_errs():
|
|
with pytest.raises(ValueError) as e:
|
|
da.stack([da.zeros((2,), chunks=(2))] * 10 +
|
|
[da.zeros((3,), chunks=(3))] * 10)
|
|
|
|
assert 'shape' in str(e.value).lower()
|
|
assert '(2,)' in str(e.value)
|
|
assert len(str(e.value)) < 105
|
|
|
|
|
|
def test_blockwise_with_numpy_arrays():
|
|
x = np.ones(10)
|
|
y = da.ones(10, chunks=(5,))
|
|
|
|
assert_eq(x + y, x + x)
|
|
|
|
s = da.sum(x)
|
|
assert any(x is v for v in s.dask.values())
|
|
|
|
|
|
@pytest.mark.parametrize('chunks', (100, 6))
|
|
@pytest.mark.parametrize('other', [[0, 0, 1], [2, 1, 3], (0, 0, 1)])
|
|
def test_elemwise_with_lists(chunks, other):
|
|
x = np.arange(12).reshape((4, 3))
|
|
d = da.arange(12, chunks=chunks).reshape((4, 3))
|
|
|
|
x2 = np.vstack([x[:, 0], x[:, 1], x[:, 2]]).T
|
|
d2 = da.vstack([d[:, 0], d[:, 1], d[:, 2]]).T
|
|
|
|
assert_eq(x2, d2)
|
|
|
|
x3 = x2 * other
|
|
d3 = d2 * other
|
|
|
|
assert_eq(x3, d3)
|
|
|
|
|
|
def test_constructor_plugin():
|
|
L = []
|
|
L2 = []
|
|
with dask.config.set(array_plugins=[L.append, L2.append]):
|
|
x = da.ones(10, chunks=5)
|
|
y = x + 1
|
|
|
|
assert L == L2 == [x, y]
|
|
|
|
with dask.config.set(array_plugins=[lambda x: x.compute()]):
|
|
x = da.ones(10, chunks=5)
|
|
y = x + 1
|
|
|
|
assert isinstance(y, np.ndarray)
|
|
assert len(L) == 2
|
|
|
|
|
|
def test_no_warnings_on_metadata():
|
|
x = da.ones(5, chunks=3)
|
|
with warnings.catch_warnings(record=True) as record:
|
|
da.arccos(x)
|
|
|
|
assert not record
|
|
|
|
|
|
def test_delayed_array_key_hygeine():
|
|
a = da.zeros((1,), chunks=(1,))
|
|
d = delayed(identity)(a)
|
|
b = da.from_delayed(d, shape=a.shape, dtype=a.dtype)
|
|
assert_eq(a, b)
|
|
|
|
|
|
def test_empty_chunks_in_array_len():
|
|
x = da.ones((), chunks=())
|
|
with pytest.raises(TypeError) as exc_info:
|
|
len(x)
|
|
|
|
err_msg = 'len() of unsized object'
|
|
assert err_msg in str(exc_info.value)
|
|
|
|
|
|
@pytest.mark.parametrize('dtype', [None, [('a', 'f4'), ('b', object)]])
|
|
def test_meta(dtype):
|
|
a = da.zeros((1,), chunks=(1,))
|
|
assert a._meta.dtype == a.dtype
|
|
assert isinstance(a._meta, np.ndarray)
|
|
assert a.nbytes < 1000
|
|
|
|
|
|
@pytest.mark.parametrize('shape,limit,expected', [
|
|
(100, 10, (10,) * 10),
|
|
(20, 10, (10, 10)),
|
|
(20, 5, (5, 5, 5, 5)),
|
|
(24, 5, (4, 4, 4, 4, 4, 4)), # common factor is close, use it
|
|
(23, 5, (5, 5, 5, 5, 3)), # relatively prime, don't use 1s
|
|
(1000, 167, (125,) * 8), # find close value
|
|
])
|
|
def test_normalize_chunks_auto_1d(shape, limit, expected):
|
|
result = normalize_chunks('auto', (shape,), limit=limit * 8, dtype=np.float64)
|
|
assert result == (expected,)
|
|
|
|
|
|
@pytest.mark.parametrize('shape,chunks,limit,expected', [
|
|
((20, 20), ('auto', 2), 20, ((10, 10), (2,) * 10)),
|
|
((20, 20), ('auto', (2, 2, 2, 2, 2, 5, 5)), 20, ((4, 4, 4, 4, 4), (2, 2, 2, 2, 2, 5, 5))),
|
|
((1, 20), 'auto', 10, ((1,), (10, 10))),
|
|
])
|
|
def test_normalize_chunks_auto_2d(shape, chunks, limit, expected):
|
|
result = normalize_chunks(chunks, shape, limit=limit, dtype='uint8')
|
|
assert result == expected
|
|
|
|
|
|
def test_normalize_chunks_auto_3d():
|
|
result = normalize_chunks(('auto', 'auto', 2), (20, 20, 20), limit=200, dtype='uint8')
|
|
expected = ((10, 10), (10, 10), (2,) * 10)
|
|
assert result == expected
|
|
|
|
result = normalize_chunks('auto', (20, 20, 20), limit=8, dtype='uint8')
|
|
expected = ((2,) * 10,) * 3
|
|
assert result == expected
|
|
|
|
|
|
def test_constructors_chunks_dict():
|
|
x = da.ones((20, 20), chunks={0: 10, 1: 5})
|
|
assert x.chunks == ((10, 10), (5, 5, 5, 5))
|
|
|
|
x = da.ones((20, 20), chunks={0: 10, 1: "auto"})
|
|
assert x.chunks == ((10, 10), (20,))
|
|
|
|
|
|
def test_from_array_chunks_dict():
|
|
with dask.config.set({'array.chunk-size': '128kiB'}):
|
|
x = np.empty((100, 100, 100))
|
|
y = da.from_array(x, chunks={0: 10, 1: -1, 2: 'auto'})
|
|
z = da.from_array(x, chunks=(10, 100, 10))
|
|
assert y.chunks == z.chunks
|
|
|
|
|
|
@pytest.mark.parametrize('dtype', [object, [('a', object), ('b', int)]])
|
|
def test_normalize_chunks_object_dtype(dtype):
|
|
x = np.array(['a', 'abc'], dtype=object)
|
|
with pytest.raises(NotImplementedError):
|
|
da.from_array(x, chunks='auto')
|
|
|
|
|
|
def test_normalize_chunks_tuples_of_tuples():
|
|
result = normalize_chunks(((2, 3, 5), 'auto'), (10, 10), limit=10, dtype=np.uint8)
|
|
expected = ((2, 3, 5), (2, 2, 2, 2, 2))
|
|
assert result == expected
|
|
|
|
|
|
def test_normalize_chunks_nan():
|
|
with pytest.raises(ValueError) as info:
|
|
normalize_chunks('auto', (np.nan,), limit=10, dtype=np.uint8)
|
|
assert "auto" in str(info.value)
|
|
with pytest.raises(ValueError) as info:
|
|
normalize_chunks(((np.nan, np.nan), 'auto'), (10, 10), limit=10, dtype=np.uint8)
|
|
assert "auto" in str(info.value)
|
|
|
|
|
|
def test_zarr_roundtrip():
|
|
pytest.importorskip('zarr')
|
|
with tmpdir() as d:
|
|
a = da.zeros((3, 3), chunks=(1, 1))
|
|
a.to_zarr(d)
|
|
a2 = da.from_zarr(d)
|
|
assert_eq(a, a2)
|
|
assert a2.chunks == a.chunks
|
|
|
|
|
|
@pytest.mark.parametrize('compute', [False, True])
|
|
def test_zarr_return_stored(compute):
|
|
pytest.importorskip('zarr')
|
|
with tmpdir() as d:
|
|
a = da.zeros((3, 3), chunks=(1, 1))
|
|
a2 = a.to_zarr(d, compute=compute, return_stored=True)
|
|
assert isinstance(a2, Array)
|
|
assert_eq(a, a2)
|
|
assert a2.chunks == a.chunks
|
|
|
|
|
|
def test_zarr_existing_array():
|
|
zarr = pytest.importorskip('zarr')
|
|
c = (1, 1)
|
|
a = da.ones((3, 3), chunks=c)
|
|
z = zarr.zeros_like(a, chunks=c)
|
|
a.to_zarr(z)
|
|
a2 = da.from_zarr(z)
|
|
assert_eq(a, a2)
|
|
assert a2.chunks == a.chunks
|
|
|
|
|
|
def test_read_zarr_chunks():
|
|
pytest.importorskip('zarr')
|
|
a = da.zeros((9, ), chunks=(3, ))
|
|
with tmpdir() as d:
|
|
a.to_zarr(d)
|
|
arr = da.from_zarr(d, chunks=(5, ))
|
|
assert arr.chunks == ((5, 4), )
|
|
|
|
|
|
def test_zarr_pass_mapper():
|
|
pytest.importorskip('zarr')
|
|
import zarr.storage
|
|
with tmpdir() as d:
|
|
mapper = zarr.storage.DirectoryStore(d)
|
|
a = da.zeros((3, 3), chunks=(1, 1))
|
|
a.to_zarr(mapper)
|
|
a2 = da.from_zarr(mapper)
|
|
assert_eq(a, a2)
|
|
assert a2.chunks == a.chunks
|
|
|
|
|
|
def test_zarr_group():
|
|
zarr = pytest.importorskip('zarr')
|
|
with tmpdir() as d:
|
|
a = da.zeros((3, 3), chunks=(1, 1))
|
|
a.to_zarr(d, component='test')
|
|
with pytest.raises((OSError, ValueError)):
|
|
a.to_zarr(d, component='test', overwrite=False)
|
|
a.to_zarr(d, component='test', overwrite=True)
|
|
|
|
# second time is fine, group exists
|
|
a.to_zarr(d, component='test2', overwrite=False)
|
|
a.to_zarr(d, component='nested/test', overwrite=False)
|
|
group = zarr.open_group(d, mode='r')
|
|
assert list(group) == ['nested', 'test', 'test2']
|
|
assert 'test' in group['nested']
|
|
|
|
a2 = da.from_zarr(d, component='test')
|
|
assert_eq(a, a2)
|
|
assert a2.chunks == a.chunks
|
|
|
|
|
|
@pytest.mark.parametrize('data', [[( ), True],
|
|
[((1, ),), True],
|
|
[((1, 1, 1),), True],
|
|
[((1, ), (1, )), True],
|
|
[((2, 2, 1), ), True],
|
|
[((2, 2, 3), ), False],
|
|
[((1, 1, 1), (2, 2, 3)), False],
|
|
[((1, 2, 1), ), False]
|
|
])
|
|
def test_regular_chunks(data):
|
|
chunkset, expected = data
|
|
assert da.core._check_regular_chunks(chunkset) == expected
|
|
|
|
|
|
def test_zarr_nocompute():
|
|
pytest.importorskip('zarr')
|
|
with tmpdir() as d:
|
|
a = da.zeros((3, 3), chunks=(1, 1))
|
|
out = a.to_zarr(d, compute=False)
|
|
assert isinstance(out, Delayed)
|
|
dask.compute(out)
|
|
a2 = da.from_zarr(d)
|
|
assert_eq(a, a2)
|
|
assert a2.chunks == a.chunks
|
|
|
|
|
|
def test_blocks_indexer():
|
|
x = da.arange(10, chunks=2)
|
|
|
|
assert isinstance(x.blocks[0], da.Array)
|
|
|
|
assert_eq(x.blocks[0], x[:2])
|
|
assert_eq(x.blocks[-1], x[-2:])
|
|
assert_eq(x.blocks[:3], x[:6])
|
|
assert_eq(x.blocks[[0, 1, 2]], x[:6])
|
|
assert_eq(x.blocks[[3, 0, 2]], np.array([6, 7, 0, 1, 4, 5]))
|
|
|
|
x = da.random.random((20, 20), chunks=(4, 5))
|
|
assert_eq(x.blocks[0], x[:4])
|
|
assert_eq(x.blocks[0, :3], x[:4, :15])
|
|
assert_eq(x.blocks[:, :3], x[:, :15])
|
|
|
|
x = da.ones((40, 40, 40), chunks=(10, 10, 10))
|
|
assert_eq(x.blocks[0, :, 0], np.ones((10, 40, 10)))
|
|
|
|
x = da.ones((2, 2), chunks=1)
|
|
with pytest.raises(ValueError):
|
|
x.blocks[[0, 1], [0, 1]]
|
|
with pytest.raises(ValueError):
|
|
x.blocks[np.array([0, 1]), [0, 1]]
|
|
with pytest.raises(ValueError) as info:
|
|
x.blocks[np.array([0, 1]), np.array([0, 1])]
|
|
assert "list" in str(info.value)
|
|
with pytest.raises(ValueError) as info:
|
|
x.blocks[None, :, :]
|
|
assert "newaxis" in str(info.value) and "not supported" in str(info.value)
|
|
with pytest.raises(IndexError) as info:
|
|
x.blocks[100, 100]
|
|
|
|
|
|
def test_dask_array_holds_scipy_sparse_containers():
|
|
pytest.importorskip('scipy.sparse')
|
|
import scipy.sparse
|
|
x = da.random.random((1000, 10), chunks=(100, 10))
|
|
x[x < 0.9] = 0
|
|
xx = x.compute()
|
|
y = x.map_blocks(scipy.sparse.csr_matrix)
|
|
|
|
vs = y.to_delayed().flatten().tolist()
|
|
values = dask.compute(*vs, scheduler='single-threaded')
|
|
assert all(isinstance(v, scipy.sparse.csr_matrix) for v in values)
|
|
|
|
yy = y.compute(scheduler='single-threaded')
|
|
assert isinstance(yy, scipy.sparse.spmatrix)
|
|
assert (yy == xx).all()
|
|
|
|
z = x.T.map_blocks(scipy.sparse.csr_matrix)
|
|
zz = z.compute(scheduler='single-threaded')
|
|
assert isinstance(yy, scipy.sparse.spmatrix)
|
|
assert (zz == xx.T).all()
|
|
|
|
|
|
def test_3851():
|
|
with warnings.catch_warnings() as record:
|
|
Y = da.random.random((10, 10), chunks='auto')
|
|
da.argmax(Y, axis=0).compute()
|
|
|
|
assert not record
|
|
|
|
|
|
def test_3925():
|
|
x = da.from_array(np.array(['a', 'b', 'c'], dtype=object), chunks=-1)
|
|
assert (x[0] == x[0]).compute(scheduler='sync')
|
|
|
|
|
|
def test_map_blocks_large_inputs_delayed():
|
|
a = da.ones(10, chunks=(5,))
|
|
b = np.ones(1000000)
|
|
|
|
c = a.map_blocks(add, b)
|
|
assert any(b is v for v in c.dask.values())
|
|
assert repr(dict(c.dask)).count(repr(b)[:10]) == 1 # only one occurrence
|
|
|
|
d = a.map_blocks(lambda x, y: x + y.sum(), y=b)
|
|
assert_eq(d, d)
|
|
assert any(b is v for v in d.dask.values())
|
|
assert repr(dict(c.dask)).count(repr(b)[:10]) == 1 # only one occurrence
|
|
|
|
|
|
def test_blockwise_large_inputs_delayed():
|
|
a = da.ones(10, chunks=(5,))
|
|
b = np.ones(1000000)
|
|
|
|
c = da.blockwise(add, 'i', a, 'i', b, None, dtype=a.dtype)
|
|
assert any(b is v for v in c.dask.values())
|
|
assert repr(dict(c.dask)).count(repr(b)[:10]) == 1 # only one occurrence
|
|
|
|
d = da.blockwise(lambda x, y: x + y, 'i', a, 'i', y=b, dtype=a.dtype)
|
|
assert any(b is v for v in d.dask.values())
|
|
assert repr(dict(c.dask)).count(repr(b)[:10]) == 1 # only one occurrence
|
|
|
|
|
|
def test_slice_reversed():
|
|
x = da.ones(10, chunks=-1)
|
|
y = x[6:3]
|
|
|
|
assert_eq(y, np.ones(0))
|
|
|
|
|
|
def test_map_blocks_chunks():
|
|
x = da.arange(400, chunks=(100,))
|
|
y = da.arange(40, chunks=(10,))
|
|
|
|
def func(a, b):
|
|
return np.array([a.max(), b.max()])
|
|
|
|
assert_eq(da.map_blocks(func, x, y, chunks=(2,), dtype=x.dtype),
|
|
np.array([99, 9, 199, 19, 299, 29, 399, 39]))
|
|
|
|
|
|
def test_nbytes_auto():
|
|
chunks = normalize_chunks("800B", shape=(500,), dtype='float64')
|
|
assert chunks == ((100, 100, 100, 100, 100),)
|
|
chunks = normalize_chunks("200B", shape=(10, 10), dtype='float64')
|
|
assert chunks == ((5, 5), (5, 5))
|
|
chunks = normalize_chunks((5,"200B"), shape=(10, 10), dtype='float64')
|
|
assert chunks == ((5, 5), (5, 5))
|
|
chunks = normalize_chunks("33B", shape=(10, 10), dtype='float64')
|
|
assert chunks == ((2, 2, 2, 2, 2), (2, 2, 2, 2, 2))
|
|
chunks = normalize_chunks("1800B", shape=(10, 20, 30), dtype='float64')
|
|
assert chunks == ((5, 5), (5, 5, 5, 5), (6, 6, 6, 6, 6))
|
|
|
|
with pytest.raises(ValueError):
|
|
normalize_chunks("10B", shape=(10,), limit=20, dtype='float64')
|
|
with pytest.raises(ValueError):
|
|
normalize_chunks("100B", shape=(10, 10), limit=20, dtype='float64')
|
|
with pytest.raises(ValueError):
|
|
normalize_chunks(("100B", "10B"), shape=(10, 10), dtype='float64')
|
|
with pytest.raises(ValueError):
|
|
normalize_chunks(("10B", "10B"), shape=(10, 10), limit=20, dtype='float64')
|