from __future__ import absolute_import, division, print_function import copy import pytest np = pytest.importorskip('numpy') import os import sys import time from distutils.version import LooseVersion import operator from operator import add, sub, getitem from threading import Lock import warnings from toolz import merge, countby, concat from toolz.curried import identity import dask import dask.array as da from dask.base import tokenize, compute_as_if_collection from dask.compatibility import PY2 from dask.delayed import Delayed, delayed from dask.utils import ignoring, tmpfile, tmpdir, key_split from dask.utils_test import inc, dec from dask.array.core import (getem, getter, dotmany, concatenate3, Array, stack, concatenate, from_array, broadcast_shapes, broadcast_to, blockdims_from_blockshape, store, optimize, from_func, normalize_chunks, broadcast_chunks, from_delayed, common_blockdim, concatenate_axes) from dask.blockwise import (make_blockwise_graph as top, broadcast_dimensions) from dask.array.utils import assert_eq, same_keys # temporary until numpy functions migrated try: from numpy import nancumsum, nancumprod except ImportError: # pragma: no cover import dask.array.numpy_compat as npcompat nancumsum = npcompat.nancumsum nancumprod = npcompat.nancumprod def test_getem(): sol = {('X', 0, 0): (getter, 'X', (slice(0, 2), slice(0, 3))), ('X', 1, 0): (getter, 'X', (slice(2, 4), slice(0, 3))), ('X', 1, 1): (getter, 'X', (slice(2, 4), slice(3, 6))), ('X', 0, 1): (getter, 'X', (slice(0, 2), slice(3, 6)))} assert getem('X', (2, 3), shape=(4, 6)) == sol def test_top(): assert top(inc, 'z', 'ij', 'x', 'ij', numblocks={'x': (2, 2)}) == \ {('z', 0, 0): (inc, ('x', 0, 0)), ('z', 0, 1): (inc, ('x', 0, 1)), ('z', 1, 0): (inc, ('x', 1, 0)), ('z', 1, 1): (inc, ('x', 1, 1))} assert top(add, 'z', 'ij', 'x', 'ij', 'y', 'ij', numblocks={'x': (2, 2), 'y': (2, 2)}) == \ {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)), ('z', 0, 1): (add, ('x', 0, 1), ('y', 0, 1)), ('z', 1, 0): (add, ('x', 1, 0), ('y', 1, 0)), ('z', 1, 1): (add, ('x', 1, 1), ('y', 1, 1))} assert top(dotmany, 'z', 'ik', 'x', 'ij', 'y', 'jk', numblocks={'x': (2, 2), 'y': (2, 2)}) == \ {('z', 0, 0): (dotmany, [('x', 0, 0), ('x', 0, 1)], [('y', 0, 0), ('y', 1, 0)]), ('z', 0, 1): (dotmany, [('x', 0, 0), ('x', 0, 1)], [('y', 0, 1), ('y', 1, 1)]), ('z', 1, 0): (dotmany, [('x', 1, 0), ('x', 1, 1)], [('y', 0, 0), ('y', 1, 0)]), ('z', 1, 1): (dotmany, [('x', 1, 0), ('x', 1, 1)], [('y', 0, 1), ('y', 1, 1)])} assert top(identity, 'z', '', 'x', 'ij', numblocks={'x': (2, 2)}) ==\ {('z',): (identity, [[('x', 0, 0), ('x', 0, 1)], [('x', 1, 0), ('x', 1, 1)]])} def test_top_supports_broadcasting_rules(): assert top(add, 'z', 'ij', 'x', 'ij', 'y', 'ij', numblocks={'x': (1, 2), 'y': (2, 1)}) == \ {('z', 0, 0): (add, ('x', 0, 0), ('y', 0, 0)), ('z', 0, 1): (add, ('x', 0, 1), ('y', 0, 0)), ('z', 1, 0): (add, ('x', 0, 0), ('y', 1, 0)), ('z', 1, 1): (add, ('x', 0, 1), ('y', 1, 0))} def test_top_literals(): assert top(add, 'z', 'ij', 'x', 'ij', 123, None, numblocks={'x': (2, 2)}) == \ {('z', 0, 0): (add, ('x', 0, 0), 123), ('z', 0, 1): (add, ('x', 0, 1), 123), ('z', 1, 0): (add, ('x', 1, 0), 123), ('z', 1, 1): (add, ('x', 1, 1), 123)} def test_blockwise_literals(): x = da.ones((10, 10), chunks=(5, 5)) z = da.blockwise(add, 'ij', x, 'ij', 100, None, dtype=x.dtype) assert_eq(z, x + 100) z = da.blockwise(lambda x, y, z: x * y + z, 'ij', 2, None, x, 'ij', 100, None, dtype=x.dtype) assert_eq(z, 2 * x + 100) z = da.blockwise(getitem, 'ij', x, 'ij', slice(None), None, dtype=x.dtype) assert_eq(z, x) def test_concatenate3_on_scalars(): assert_eq(concatenate3([1, 2]), np.array([1, 2])) def test_chunked_dot_product(): x = np.arange(400).reshape((20, 20)) o = np.ones((20, 20)) d = {'x': x, 'o': o} getx = getem('x', (5, 5), shape=(20, 20)) geto = getem('o', (5, 5), shape=(20, 20)) result = top(dotmany, 'out', 'ik', 'x', 'ij', 'o', 'jk', numblocks={'x': (4, 4), 'o': (4, 4)}) dsk = merge(d, getx, geto, result) out = dask.get(dsk, [[('out', i, j) for j in range(4)] for i in range(4)]) assert_eq(np.dot(x, o), concatenate3(out)) def test_chunked_transpose_plus_one(): x = np.arange(400).reshape((20, 20)) d = {'x': x} getx = getem('x', (5, 5), shape=(20, 20)) f = lambda x: x.T + 1 comp = top(f, 'out', 'ij', 'x', 'ji', numblocks={'x': (4, 4)}) dsk = merge(d, getx, comp) out = dask.get(dsk, [[('out', i, j) for j in range(4)] for i in range(4)]) assert_eq(concatenate3(out), x.T + 1) def test_broadcast_dimensions_works_with_singleton_dimensions(): argpairs = [('x', 'i')] numblocks = {'x': ((1,),)} assert broadcast_dimensions(argpairs, numblocks) == {'i': (1,)} def test_broadcast_dimensions(): argpairs = [('x', 'ij'), ('y', 'ij')] d = {'x': ('Hello', 1), 'y': (1, (2, 3))} assert broadcast_dimensions(argpairs, d) == {'i': 'Hello', 'j': (2, 3)} def test_Array(): shape = (1000, 1000) chunks = (100, 100) name = 'x' dsk = merge({name: 'some-array'}, getem(name, chunks, shape=shape)) a = Array(dsk, name, chunks, shape=shape, dtype='f8') assert a.numblocks == (10, 10) assert a.__dask_keys__() == [[('x', i, j) for j in range(10)] for i in range(10)] assert a.chunks == ((100,) * 10, (100,) * 10) assert a.shape == shape assert len(a) == shape[0] def test_uneven_chunks(): a = Array({}, 'x', chunks=(3, 3), shape=(10, 10), dtype='f8') assert a.chunks == ((3, 3, 3, 1), (3, 3, 3, 1)) def test_numblocks_suppoorts_singleton_block_dims(): shape = (100, 10) chunks = (10, 10) name = 'x' dsk = merge({name: 'some-array'}, getem(name, shape=shape, chunks=chunks)) a = Array(dsk, name, chunks, shape=shape, dtype='f8') assert set(concat(a.__dask_keys__())) == {('x', i, 0) for i in range(10)} def test_keys(): dsk = dict((('x', i, j), ()) for i in range(5) for j in range(6)) dx = Array(dsk, 'x', chunks=(10, 10), shape=(50, 60), dtype='f8') assert dx.__dask_keys__() == [[(dx.name, i, j) for j in range(6)] for i in range(5)] # Cache works assert dx.__dask_keys__() is dx.__dask_keys__() # Test mutating names clears key cache dx.dask = {('y', i, j): () for i in range(5) for j in range(6)} dx.name = 'y' assert dx.__dask_keys__() == [[(dx.name, i, j) for j in range(6)] for i in range(5)] d = Array({}, 'x', (), shape=(), dtype='f8') assert d.__dask_keys__() == [('x',)] def test_Array_computation(): a = Array({('x', 0, 0): np.eye(3)}, 'x', shape=(3, 3), chunks=(3, 3), dtype='f8') assert_eq(np.array(a), np.eye(3)) assert isinstance(a.compute(), np.ndarray) assert float(a[0, 0]) == 1 @pytest.mark.skipif(LooseVersion(np.__version__) < '1.14.0', reason="NumPy doesn't have `np.linalg._umath_linalg` yet") @pytest.mark.xfail(reason="Protect from `np.linalg._umath_linalg.inv` breaking") def test_Array_numpy_gufunc_call__array_ufunc__01(): x = da.random.normal(size=(3, 10, 10), chunks=(2, 10, 10)) nx = x.compute() ny = np.linalg._umath_linalg.inv(nx) y = np.linalg._umath_linalg.inv(x, output_dtypes=float) vy = y.compute() assert_eq(ny, vy) @pytest.mark.skipif(LooseVersion(np.__version__) < '1.14.0', reason="NumPy doesn't have `np.linalg._umath_linalg` yet") @pytest.mark.xfail(reason="Protect from `np.linalg._umath_linalg.eig` breaking") def test_Array_numpy_gufunc_call__array_ufunc__02(): x = da.random.normal(size=(3, 10, 10), chunks=(2, 10, 10)) nx = x.compute() nw, nv = np.linalg._umath_linalg.eig(nx) w, v = np.linalg._umath_linalg.eig(x, output_dtypes=(float, float)) vw = w.compute() vv = v.compute() assert_eq(nw, vw) assert_eq(nv, vv) def test_stack(): a, b, c = [Array(getem(name, chunks=(2, 3), shape=(4, 6)), name, chunks=(2, 3), dtype='f8', shape=(4, 6)) for name in 'ABC'] s = stack([a, b, c], axis=0) colon = slice(None, None, None) assert s.shape == (3, 4, 6) assert s.chunks == ((1, 1, 1), (2, 2), (3, 3)) assert s.chunksize == (1, 2, 3) assert s.dask[(s.name, 0, 1, 0)] == (getitem, ('A', 1, 0), (None, colon, colon)) assert s.dask[(s.name, 2, 1, 0)] == (getitem, ('C', 1, 0), (None, colon, colon)) assert same_keys(s, stack([a, b, c], axis=0)) s2 = stack([a, b, c], axis=1) assert s2.shape == (4, 3, 6) assert s2.chunks == ((2, 2), (1, 1, 1), (3, 3)) assert s2.chunksize == (2, 1, 3) assert s2.dask[(s2.name, 0, 1, 0)] == (getitem, ('B', 0, 0), (colon, None, colon)) assert s2.dask[(s2.name, 1, 1, 0)] == (getitem, ('B', 1, 0), (colon, None, colon)) assert same_keys(s2, stack([a, b, c], axis=1)) s2 = stack([a, b, c], axis=2) assert s2.shape == (4, 6, 3) assert s2.chunks == ((2, 2), (3, 3), (1, 1, 1)) assert s2.chunksize == (2, 3, 1) assert s2.dask[(s2.name, 0, 1, 0)] == (getitem, ('A', 0, 1), (colon, colon, None)) assert s2.dask[(s2.name, 1, 1, 2)] == (getitem, ('C', 1, 1), (colon, colon, None)) assert same_keys(s2, stack([a, b, c], axis=2)) pytest.raises(ValueError, lambda: stack([a, b, c], axis=3)) assert set(b.dask.keys()).issubset(s2.dask.keys()) assert stack([a, b, c], axis=-1).chunks == stack([a, b, c], axis=2).chunks def test_short_stack(): x = np.array([1]) d = da.from_array(x, chunks=(1,)) s = da.stack([d]) assert s.shape == (1, 1) chunks = compute_as_if_collection(Array, s.dask, s.__dask_keys__()) assert chunks[0][0].shape == (1, 1) def test_stack_scalars(): d = da.arange(4, chunks=2) s = da.stack([d.mean(), d.sum()]) assert s.compute().tolist() == [np.arange(4).mean(), np.arange(4).sum()] def test_stack_promote_type(): i = np.arange(10, dtype='i4') f = np.arange(10, dtype='f4') di = da.from_array(i, chunks=5) df = da.from_array(f, chunks=5) res = da.stack([di, df]) assert_eq(res, np.stack([i, f])) def test_stack_rechunk(): x = da.random.random(10, chunks=5) y = da.random.random(10, chunks=4) z = da.stack([x, y], axis=0) assert z.shape == (2, 10) assert z.chunks == ((1, 1), (4, 1, 3, 2)) assert_eq(z, np.stack([x.compute(), y.compute()], axis=0)) def test_concatenate(): a, b, c = [Array(getem(name, chunks=(2, 3), shape=(4, 6)), name, chunks=(2, 3), dtype='f8', shape=(4, 6)) for name in 'ABC'] x = concatenate([a, b, c], axis=0) assert x.shape == (12, 6) assert x.chunks == ((2, 2, 2, 2, 2, 2), (3, 3)) assert x.dask[(x.name, 0, 1)] == ('A', 0, 1) assert x.dask[(x.name, 5, 0)] == ('C', 1, 0) assert same_keys(x, concatenate([a, b, c], axis=0)) y = concatenate([a, b, c], axis=1) assert y.shape == (4, 18) assert y.chunks == ((2, 2), (3, 3, 3, 3, 3, 3)) assert y.dask[(y.name, 1, 0)] == ('A', 1, 0) assert y.dask[(y.name, 1, 5)] == ('C', 1, 1) assert same_keys(y, concatenate([a, b, c], axis=1)) assert set(b.dask.keys()).issubset(y.dask.keys()) z = concatenate([a], axis=0) assert z.shape == a.shape assert z.chunks == a.chunks assert z.dask == a.dask assert z is a assert (concatenate([a, b, c], axis=-1).chunks == concatenate([a, b, c], axis=1).chunks) pytest.raises(ValueError, lambda: concatenate([a, b, c], axis=2)) @pytest.mark.parametrize('dtypes', [(('>f8', '>f8'), '>f8'), ((' 5 with pytest.warns(None): # ZeroDivisionWarning assert_eq(expr, (3 / x * y)**2 > 5) with pytest.warns(None): # OverflowWarning c = da.exp(a) assert_eq(c, np.exp(x)) assert_eq(abs(-a), a) assert_eq(a, +x) def test_operator_dtype_promotion(): x = np.arange(10, dtype=np.float32) y = np.array([1]) a = from_array(x, chunks=(5,)) assert_eq(x + 1, a + 1) # still float32 assert_eq(x + 1e50, a + 1e50) # now float64 assert_eq(x + y, a + y) # also float64 def test_field_access(): x = np.array([(1, 1.0), (2, 2.0)], dtype=[('a', 'i4'), ('b', 'f4')]) y = from_array(x, chunks=(1,)) assert_eq(y['a'], x['a']) assert_eq(y[['b', 'a']], x[['b', 'a']]) assert same_keys(y[['b', 'a']], y[['b', 'a']]) def test_field_access_with_shape(): dtype = [('col1', ('f4', (3, 2))), ('col2', ('f4', 3))] data = np.ones((100, 50), dtype=dtype) x = da.from_array(data, 10) assert_eq(x['col1'], data['col1']) assert_eq(x[['col1']], data[['col1']]) assert_eq(x['col2'], data['col2']) assert_eq(x[['col1', 'col2']], data[['col1', 'col2']]) @pytest.mark.skipif(sys.version_info < (3, 5), reason="Matrix multiplication operator only after Py3.5") def test_matmul(): x = np.random.random((5, 5)) y = np.random.random((5, 2)) a = from_array(x, chunks=(1, 5)) b = from_array(y, chunks=(5, 1)) assert_eq(operator.matmul(a, b), a.dot(b)) assert_eq(operator.matmul(a, b), operator.matmul(x, y)) assert_eq(operator.matmul(a, y), operator.matmul(x, b)) list_vec = list(range(1, 6)) assert_eq(operator.matmul(list_vec, b), operator.matmul(list_vec, y)) assert_eq(operator.matmul(x, list_vec), operator.matmul(a, list_vec)) z = np.random.random((5, 5, 5)) c = from_array(z, chunks=(1, 5, 1)) assert_eq(operator.matmul(a, z), operator.matmul(x, c)) assert_eq(operator.matmul(z, a), operator.matmul(c, x)) def test_matmul_array_ufunc(): # regression test for https://github.com/dask/dask/issues/4353 x = np.random.random((5, 5)) y = np.random.random((5, 2)) a = from_array(x, chunks=(1, 5)) b = from_array(y, chunks=(5, 1)) result = b.__array_ufunc__(np.matmul, '__call__', a, b) assert_eq(result, x.dot(y)) def test_T(): x = np.arange(400).reshape((20, 20)) a = from_array(x, chunks=(5, 5)) assert_eq(x.T, a.T) def test_broadcast_to(): x = np.random.randint(10, size=(5, 1, 6)) a = from_array(x, chunks=(3, 1, 3)) for shape in [a.shape, (5, 0, 6), (5, 4, 6), (2, 5, 1, 6), (3, 4, 5, 4, 6)]: xb = np.broadcast_to(x, shape) ab = broadcast_to(a, shape) assert_eq(xb, ab) if a.shape == ab.shape: assert a is ab pytest.raises(ValueError, lambda: broadcast_to(a, (2, 1, 6))) pytest.raises(ValueError, lambda: broadcast_to(a, (3,))) def test_broadcast_to_array(): x = np.random.randint(10, size=(5, 1, 6)) for shape in [(5, 0, 6), (5, 4, 6), (2, 5, 1, 6), (3, 4, 5, 4, 6)]: a = np.broadcast_to(x, shape) d = broadcast_to(x, shape) assert_eq(a, d) def test_broadcast_to_scalar(): x = 5 for shape in [tuple(), (0,), (2, 3), (5, 4, 6), (2, 5, 1, 6), (3, 4, 5, 4, 6)]: a = np.broadcast_to(x, shape) d = broadcast_to(x, shape) assert_eq(a, d) def test_broadcast_to_chunks(): x = np.random.randint(10, size=(5, 1, 6)) a = from_array(x, chunks=(3, 1, 3)) for shape, chunks, expected_chunks in [ ((5, 3, 6), (3, -1, 3), ((3, 2), (3,), (3, 3))), ((5, 3, 6), (3, 1, 3), ((3, 2), (1, 1, 1,), (3, 3))), ((2, 5, 3, 6), (1, 3, 1, 3), ((1, 1), (3, 2), (1, 1, 1,), (3, 3)))]: xb = np.broadcast_to(x, shape) ab = broadcast_to(a, shape, chunks=chunks) assert_eq(xb, ab) assert ab.chunks == expected_chunks with pytest.raises(ValueError): broadcast_to(a, a.shape, chunks=((2, 3), (1,), (3, 3))) with pytest.raises(ValueError): broadcast_to(a, a.shape, chunks=((3, 2), (3,), (3, 3))) with pytest.raises(ValueError): broadcast_to(a, (5, 2, 6), chunks=((3, 2), (3,), (3, 3))) def test_broadcast_arrays(): # Calling `broadcast_arrays` with no arguments only works in NumPy 1.13.0+. if LooseVersion(np.__version__) >= LooseVersion("1.13.0"): assert np.broadcast_arrays() == da.broadcast_arrays() a = np.arange(4) d_a = da.from_array(a, chunks=tuple(s // 2 for s in a.shape)) a_0 = np.arange(4)[None, :] a_1 = np.arange(4)[:, None] d_a_0 = d_a[None, :] d_a_1 = d_a[:, None] a_r = np.broadcast_arrays(a_0, a_1) d_r = da.broadcast_arrays(d_a_0, d_a_1) assert isinstance(d_r, list) assert len(a_r) == len(d_r) for e_a_r, e_d_r in zip(a_r, d_r): assert_eq(e_a_r, e_d_r) @pytest.mark.parametrize('u_shape, v_shape', [ [tuple(), (2, 3)], [(1,), (2, 3)], [(1, 1), (2, 3)], [(0, 3), (1, 3)], [(2, 0), (2, 1)], [(1, 0), (2, 1)], [(0, 1), (1, 3)], ]) def test_broadcast_operator(u_shape, v_shape): u = np.random.random(u_shape) v = np.random.random(v_shape) d_u = from_array(u, chunks=1) d_v = from_array(v, chunks=1) w = u * v d_w = d_u * d_v assert_eq(w, d_w) @pytest.mark.parametrize('original_shape,new_shape,chunks', [ ((10,), (10,), (3, 3, 4)), ((10,), (10, 1, 1), 5), ((10,), (1, 10,), 5), ((24,), (2, 3, 4), 12), ((1, 24,), (2, 3, 4), 12), ((2, 3, 4), (24,), (1, 3, 4)), ((2, 3, 4), (24,), 4), ((2, 3, 4), (24, 1), 4), ((2, 3, 4), (1, 24), 4), ((4, 4, 1), (4, 4), 2), ((4, 4), (4, 4, 1), 2), ((1, 4, 4), (4, 4), 2), ((1, 4, 4), (4, 4, 1), 2), ((1, 4, 4), (1, 1, 4, 4), 2), ((4, 4), (1, 4, 4, 1), 2), ((4, 4), (1, 4, 4), 2), ((2, 3), (2, 3), (1, 2)), ((2, 3), (3, 2), 3), ((4, 2, 3), (4, 6), 4), ((3, 4, 5, 6), (3, 4, 5, 6), (2, 3, 4, 5)), ((), (1,), 1), ((1,), (), 1), ((24,), (3, 8), 24), ((24,), (4, 6), 6), ((24,), (4, 3, 2), 6), ((24,), (4, 6, 1), 6), ((24,), (4, 6), (6, 12, 6)), ((64, 4), (8, 8, 4), (16, 2)), ((4, 64), (4, 8, 4, 2), (2, 16)), ((4, 8, 4, 2), (2, 1, 2, 32, 2), (2, 4, 2, 2)), ((4, 1, 4), (4, 4), (2, 1, 2)), ((0, 10), (0, 5, 2), (5, 5)), ((5, 0, 2), (0, 10), (5, 2, 2)), ((0,), (2, 0, 2), (4,)), ((2, 0, 2), (0,), (4, 4, 4)), ]) def test_reshape(original_shape, new_shape, chunks): x = np.random.randint(10, size=original_shape) a = from_array(x, chunks=chunks) xr = x.reshape(new_shape) ar = a.reshape(new_shape) if a.shape == new_shape: assert a is ar assert_eq(xr, ar) def test_reshape_exceptions(): x = np.random.randint(10, size=(5,)) a = from_array(x, chunks=(2,)) with pytest.raises(ValueError): da.reshape(a, (100,)) def test_reshape_splat(): x = da.ones((5, 5), chunks=(2, 2)) assert_eq(x.reshape((25,)), x.reshape(25)) def test_reshape_fails_for_dask_only(): cases = [ ((3, 4), (4, 3), 2), ] for original_shape, new_shape, chunks in cases: x = np.random.randint(10, size=original_shape) a = from_array(x, chunks=chunks) assert x.reshape(new_shape).shape == new_shape with pytest.raises(ValueError): da.reshape(a, new_shape) def test_reshape_unknown_dimensions(): for original_shape in [(24,), (2, 12), (2, 3, 4)]: for new_shape in [(-1,), (2, -1), (-1, 3, 4)]: x = np.random.randint(10, size=original_shape) a = from_array(x, 24) assert_eq(x.reshape(new_shape), a.reshape(new_shape)) pytest.raises(ValueError, lambda: da.reshape(a, (-1, -1))) def test_full(): d = da.full((3, 4), 2, chunks=((2, 1), (2, 2))) assert d.chunks == ((2, 1), (2, 2)) assert_eq(d, np.full((3, 4), 2)) def test_map_blocks(): x = np.arange(400).reshape((20, 20)) d = from_array(x, chunks=(7, 7)) e = d.map_blocks(inc, dtype=d.dtype) assert d.chunks == e.chunks assert_eq(e, x + 1) e = d.map_blocks(inc, name='increment') assert e.name.startswith('increment-') assert d.map_blocks(inc, name='foo').name != d.map_blocks(dec, name='foo').name d = from_array(x, chunks=(10, 10)) e = d.map_blocks(lambda x: x[::2, ::2], chunks=(5, 5), dtype=d.dtype) assert e.chunks == ((5, 5), (5, 5)) assert_eq(e, x[::2, ::2]) d = from_array(x, chunks=(8, 8)) e = d.map_blocks(lambda x: x[::2, ::2], chunks=((4, 4, 2), (4, 4, 2)), dtype=d.dtype) assert_eq(e, x[::2, ::2]) def test_map_blocks2(): x = np.arange(10, dtype='i8') d = from_array(x, chunks=(2,)) def func(block, block_id=None, c=0): return np.ones_like(block) * sum(block_id) + c out = d.map_blocks(func, dtype='i8') expected = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4], dtype='i8') assert_eq(out, expected) assert same_keys(d.map_blocks(func, dtype='i8'), out) out = d.map_blocks(func, dtype='i8', c=1) expected = expected + 1 assert_eq(out, expected) assert same_keys(d.map_blocks(func, dtype='i8', c=1), out) def test_map_blocks_block_info(): x = da.arange(50, chunks=10) def func(a, b, c, block_info=None): for idx in [0, 2]: # positions in args assert block_info[idx]['shape'] == (50,) assert block_info[idx]['num-chunks'] == (5,) start, stop = block_info[idx]['array-location'][0] assert stop - start == 10 assert 0 <= start <= 40 assert 10 <= stop <= 50 assert 0 <= block_info[idx]['chunk-location'][0] <= 4 return a + b + c z = da.map_blocks(func, x, 100, x + 1, dtype=x.dtype) assert_eq(z, x + x + 1 + 100) def test_map_blocks_block_info_with_axis(): # https://github.com/dask/dask/issues/4298 values = da.from_array(np.array(['a', 'a', 'b', 'c']), 2) def func(x, block_info=None): assert set(block_info.keys()) == {0} assert block_info[0]['shape'] == (4,) assert block_info[0]['num_chunks'] == (2,) assert block_info['chunk-location'] in {(0,), (1,)} if block_info['chunk-location'] == (0,): assert block_info['array-location'] == [(0, 2)] elif block_info['chunk-location'] == (1,): assert block_info['array-location'] == [(2, 4)] return np.ones((len(x), 3)) values.map_blocks(func, chunks=((2, 2), 3), new_axis=1, dtype='f8') def test_map_blocks_with_constants(): d = da.arange(10, chunks=3) e = d.map_blocks(add, 100, dtype=d.dtype) assert_eq(e, np.arange(10) + 100) assert_eq(da.map_blocks(sub, d, 10, dtype=d.dtype), np.arange(10) - 10) assert_eq(da.map_blocks(sub, 10, d, dtype=d.dtype), 10 - np.arange(10)) def test_map_blocks_with_kwargs(): d = da.arange(10, chunks=5) result = d.map_blocks(np.max, axis=0, keepdims=True, dtype=d.dtype, chunks=(1,)) assert_eq(result, np.array([4, 9])) def test_map_blocks_with_chunks(): dx = da.ones((5, 3), chunks=(2, 2)) dy = da.ones((5, 3), chunks=(2, 2)) dz = da.map_blocks(np.add, dx, dy, chunks=dx.chunks) assert_eq(dz, np.ones((5, 3)) * 2) def test_map_blocks_dtype_inference(): x = np.arange(50).reshape((5, 10)) y = np.arange(10) dx = da.from_array(x, chunks=5) dy = da.from_array(y, chunks=5) def foo(x, *args, **kwargs): cast = kwargs.pop('cast', 'i8') return (x + sum(args)).astype(cast) assert_eq(dx.map_blocks(foo, dy, 1), foo(dx, dy, 1)) assert_eq(dx.map_blocks(foo, dy, 1, cast='f8'), foo(dx, dy, 1, cast='f8')) assert_eq(dx.map_blocks(foo, dy, 1, cast='f8', dtype='f8'), foo(dx, dy, 1, cast='f8', dtype='f8')) def foo(x): raise RuntimeError("Woops") with pytest.raises(ValueError) as e: dx.map_blocks(foo) msg = str(e.value) assert msg.startswith("`dtype` inference failed") assert "Please specify the dtype explicitly" in msg assert 'RuntimeError' in msg def test_from_function_requires_block_args(): x = np.arange(10) pytest.raises(Exception, lambda: from_array(x)) def test_repr(): d = da.ones((4, 4), chunks=(2, 2)) assert key_split(d.name) in repr(d) assert str(d.shape) in repr(d) assert str(d.dtype) in repr(d) d = da.ones((4000, 4), chunks=(4, 2)) assert len(str(d)) < 1000 def test_slicing_with_ellipsis(): x = np.arange(256).reshape((4, 4, 4, 4)) d = da.from_array(x, chunks=((2, 2, 2, 2))) assert_eq(d[..., 1], x[..., 1]) assert_eq(d[0, ..., 1], x[0, ..., 1]) def test_slicing_with_ndarray(): x = np.arange(64).reshape((8, 8)) d = da.from_array(x, chunks=((4, 4))) assert_eq(d[np.arange(8)], x) assert_eq(d[np.ones(8, dtype=bool)], x) assert_eq(d[np.array([1])], x[[1]]) assert_eq(d[np.array([True, False, True] + [False] * 5)], x[[0, 2]]) def test_dtype(): d = da.ones((4, 4), chunks=(2, 2)) assert d.dtype == d.compute().dtype assert (d * 1.0).dtype == (d + 1.0).compute().dtype assert d.sum().dtype == d.sum().compute().dtype # no shape def test_blockdims_from_blockshape(): assert blockdims_from_blockshape((10, 10), (4, 3)) == ((4, 4, 2), (3, 3, 3, 1)) pytest.raises(TypeError, lambda: blockdims_from_blockshape((10,), None)) assert blockdims_from_blockshape((1e2, 3), [1e1, 3]) == ((10, ) * 10, (3, )) assert blockdims_from_blockshape((np.int8(10), ), (5, )) == ((5, 5), ) def test_coerce(): d0 = da.from_array(np.array(1), chunks=(1,)) d1 = da.from_array(np.array([1]), chunks=(1,)) with dask.config.set(scheduler='sync'): for d in d0, d1: assert bool(d) is True assert int(d) == 1 assert float(d) == 1.0 assert complex(d) == complex(1) a2 = np.arange(2) d2 = da.from_array(a2, chunks=(2,)) for func in (int, float, complex): pytest.raises(TypeError, lambda :func(d2)) def test_bool(): arr = np.arange(100).reshape((10,10)) darr = da.from_array(arr, chunks=(10,10)) with pytest.raises(ValueError): bool(darr) bool(darr == darr) def test_store_kwargs(): d = da.ones((10, 10), chunks=(2, 2)) a = d + 1 called = [False] def get_func(*args, **kwargs): assert kwargs.pop("foo") == "test kwarg" r = dask.get(*args, **kwargs) called[0] = True return r called[0] = False at = np.zeros(shape=(10, 10)) store([a], [at], scheduler=get_func, foo="test kwarg") assert called[0] called[0] = False at = np.zeros(shape=(10, 10)) a.store(at, scheduler=get_func, foo="test kwarg") assert called[0] called[0] = False at = np.zeros(shape=(10, 10)) store([a], [at], scheduler=get_func, return_stored=True, foo="test kwarg") assert called[0] def test_store_delayed_target(): from dask.delayed import delayed d = da.ones((4, 4), chunks=(2, 2)) a, b = d + 1, d + 2 # empty buffers to be used as targets targs = {} def make_target(key): a = np.empty((4, 4)) targs[key] = a return a # delayed calls to these targets atd = delayed(make_target)('at') btd = delayed(make_target)('bt') # test not keeping result st = store([a, b], [atd, btd]) at = targs['at'] bt = targs['bt'] assert st is None assert_eq(at, a) assert_eq(bt, b) # test keeping result for st_compute in [False, True]: targs.clear() st = store([a, b], [atd, btd], return_stored=True, compute=st_compute) if st_compute: assert all( not any(dask.core.get_deps(e.dask)[0].values()) for e in st ) st = dask.compute(*st) at = targs['at'] bt = targs['bt'] assert st is not None assert isinstance(st, tuple) assert all([isinstance(v, np.ndarray) for v in st]) assert_eq(at, a) assert_eq(bt, b) assert_eq(st[0], a) assert_eq(st[1], b) pytest.raises(ValueError, lambda: store([a], [at, bt])) pytest.raises(ValueError, lambda: store(at, at)) pytest.raises(ValueError, lambda: store([at, bt], [at, bt])) def test_store(): d = da.ones((4, 4), chunks=(2, 2)) a, b = d + 1, d + 2 at = np.empty(shape=(4, 4)) bt = np.empty(shape=(4, 4)) st = store([a, b], [at, bt]) assert st is None assert (at == 2).all() assert (bt == 3).all() pytest.raises(ValueError, lambda: store([a], [at, bt])) pytest.raises(ValueError, lambda: store(at, at)) pytest.raises(ValueError, lambda: store([at, bt], [at, bt])) def test_store_regions(): d = da.ones((4, 4, 4), dtype=int, chunks=(2, 2, 2)) a, b = d + 1, d + 2 a = a[:, 1:, :].astype(float) region = (slice(None, None, 2), slice(None), [1, 2, 4, 5]) # Single region: at = np.zeros(shape=(8, 3, 6)) bt = np.zeros(shape=(8, 4, 6)) v = store([a, b], [at, bt], regions=region, compute=False) assert isinstance(v, Delayed) assert (at == 0).all() and (bt[region] == 0).all() assert all([ev is None for ev in v.compute()]) assert (at[region] == 2).all() and (bt[region] == 3).all() assert not (bt == 3).all() and not ( bt == 0 ).all() assert not (at == 2).all() and not ( at == 0 ).all() # Multiple regions: at = np.zeros(shape=(8, 3, 6)) bt = np.zeros(shape=(8, 4, 6)) v = store([a, b], [at, bt], regions=[region, region], compute=False) assert isinstance(v, Delayed) assert (at == 0).all() and (bt[region] == 0).all() assert all([ev is None for ev in v.compute()]) assert (at[region] == 2).all() and (bt[region] == 3).all() assert not (bt == 3).all() and not ( bt == 0 ).all() assert not (at == 2).all() and not ( at == 0 ).all() # Single region (keep result): for st_compute in [False, True]: at = np.zeros(shape=(8, 3, 6)) bt = np.zeros(shape=(8, 4, 6)) v = store( [a, b], [at, bt], regions=region, compute=st_compute, return_stored=True ) assert isinstance(v, tuple) assert all([isinstance(e, da.Array) for e in v]) if st_compute: assert all( not any(dask.core.get_deps(e.dask)[0].values()) for e in v ) else: assert (at == 0).all() and (bt[region] == 0).all() ar, br = v assert ar.dtype == a.dtype assert br.dtype == b.dtype assert ar.shape == a.shape assert br.shape == b.shape assert ar.chunks == a.chunks assert br.chunks == b.chunks ar, br = da.compute(ar, br) assert (at[region] == 2).all() and (bt[region] == 3).all() assert not (bt == 3).all() and not ( bt == 0 ).all() assert not (at == 2).all() and not ( at == 0 ).all() assert (br == 3).all() assert (ar == 2).all() # Multiple regions (keep result): for st_compute in [False, True]: at = np.zeros(shape=(8, 3, 6)) bt = np.zeros(shape=(8, 4, 6)) v = store( [a, b], [at, bt], regions=[region, region], compute=st_compute, return_stored=True ) assert isinstance(v, tuple) assert all([isinstance(e, da.Array) for e in v]) if st_compute: assert all( not any(dask.core.get_deps(e.dask)[0].values()) for e in v ) else: assert (at == 0).all() and (bt[region] == 0).all() ar, br = v assert ar.dtype == a.dtype assert br.dtype == b.dtype assert ar.shape == a.shape assert br.shape == b.shape assert ar.chunks == a.chunks assert br.chunks == b.chunks ar, br = da.compute(ar, br) assert (at[region] == 2).all() and (bt[region] == 3).all() assert not (bt == 3).all() and not ( bt == 0 ).all() assert not (at == 2).all() and not ( at == 0 ).all() assert (br == 3).all() assert (ar == 2).all() def test_store_compute_false(): d = da.ones((4, 4), chunks=(2, 2)) a, b = d + 1, d + 2 at = np.zeros(shape=(4, 4)) bt = np.zeros(shape=(4, 4)) v = store([a, b], [at, bt], compute=False) assert isinstance(v, Delayed) assert (at == 0).all() and (bt == 0).all() assert all([ev is None for ev in v.compute()]) assert (at == 2).all() and (bt == 3).all() at = np.zeros(shape=(4, 4)) bt = np.zeros(shape=(4, 4)) dat, dbt = store([a, b], [at, bt], compute=False, return_stored=True) assert isinstance(dat, Array) and isinstance(dbt, Array) assert (at == 0).all() and (bt == 0).all() assert (dat.compute() == at).all() and (dbt.compute() == bt).all() assert (at == 2).all() and (bt == 3).all() def test_store_nocompute_regions(): x = da.ones(10, chunks=1) y = np.zeros((2, 10)) d1 = da.store(x, y, regions=(0,), compute=False) d2 = da.store(x, y, regions=(1,), compute=False) assert d1.key != d2.key class ThreadSafetyError(Exception): pass class NonthreadSafeStore(object): def __init__(self): self.in_use = False def __setitem__(self, key, value): if self.in_use: raise ThreadSafetyError() self.in_use = True time.sleep(0.001) self.in_use = False class ThreadSafeStore(object): def __init__(self): self.concurrent_uses = 0 self.max_concurrent_uses = 0 def __setitem__(self, key, value): self.concurrent_uses += 1 self.max_concurrent_uses = max(self.concurrent_uses, self.max_concurrent_uses) time.sleep(0.01) self.concurrent_uses -= 1 class CounterLock(object): def __init__(self, *args, **kwargs): self.lock = Lock(*args, **kwargs) self.acquire_count = 0 self.release_count = 0 def acquire(self, *args, **kwargs): self.acquire_count += 1 return self.lock.acquire(*args, **kwargs) def release(self, *args, **kwargs): self.release_count += 1 return self.lock.release(*args, **kwargs) def test_store_locks(): _Lock = type(Lock()) d = da.ones((10, 10), chunks=(2, 2)) a, b = d + 1, d + 2 at = np.zeros(shape=(10, 10)) bt = np.zeros(shape=(10, 10)) lock = Lock() v = store([a, b], [at, bt], compute=False, lock=lock) assert isinstance(v, Delayed) dsk = v.dask locks = set(vv for v in dsk.values() for vv in v if isinstance(vv, _Lock)) assert locks == set([lock]) # Ensure same lock applies over multiple stores at = NonthreadSafeStore() v = store([a, b], [at, at], lock=lock, scheduler='threads', num_workers=10) assert v is None # Don't assume thread safety by default at = NonthreadSafeStore() assert store(a, at, scheduler='threads', num_workers=10) is None assert a.store(at, scheduler='threads', num_workers=10) is None # Ensure locks can be removed at = ThreadSafeStore() for i in range(10): st = a.store(at, lock=False, scheduler='threads', num_workers=10) assert st is None if at.max_concurrent_uses > 1: break if i == 9: assert False # Verify number of lock calls nchunks = np.sum([np.prod([len(c) for c in e.chunks]) for e in [a, b]]) for c in (False, True): at = np.zeros(shape=(10, 10)) bt = np.zeros(shape=(10, 10)) lock = CounterLock() v = store([a, b], [at, bt], lock=lock, compute=c, return_stored=True) assert all(isinstance(e, Array) for e in v) da.compute(v) # When `return_stored=True` and `compute=False`, # the lock should be acquired only once for store and load steps # as they are fused together into one step. assert lock.acquire_count == lock.release_count if c: assert lock.acquire_count == 2 * nchunks else: assert lock.acquire_count == nchunks def test_store_method_return(): d = da.ones((10, 10), chunks=(2, 2)) a = d + 1 for compute in [False, True]: for return_stored in [False, True]: at = np.zeros(shape=(10, 10)) r = a.store( at, scheduler='threads', compute=compute, return_stored=return_stored ) if return_stored: assert isinstance(r, Array) elif compute: assert r is None else: assert isinstance(r, Delayed) @pytest.mark.xfail(reason="can't lock with multiprocessing") def test_store_multiprocessing_lock(): d = da.ones((10, 10), chunks=(2, 2)) a = d + 1 at = np.zeros(shape=(10, 10)) st = a.store(at, scheduler='processes', num_workers=10) assert st is None def test_to_hdf5(): h5py = pytest.importorskip('h5py') x = da.ones((4, 4), chunks=(2, 2)) y = da.ones(4, chunks=2, dtype='i4') with tmpfile('.hdf5') as fn: x.to_hdf5(fn, '/x') with h5py.File(fn) as f: d = f['/x'] assert_eq(d[:], x) assert d.chunks == (2, 2) with tmpfile('.hdf5') as fn: x.to_hdf5(fn, '/x', chunks=None) with h5py.File(fn) as f: d = f['/x'] assert_eq(d[:], x) assert d.chunks is None with tmpfile('.hdf5') as fn: x.to_hdf5(fn, '/x', chunks=(1, 1)) with h5py.File(fn) as f: d = f['/x'] assert_eq(d[:], x) assert d.chunks == (1, 1) with tmpfile('.hdf5') as fn: da.to_hdf5(fn, {'/x': x, '/y': y}) with h5py.File(fn) as f: assert_eq(f['/x'][:], x) assert f['/x'].chunks == (2, 2) assert_eq(f['/y'][:], y) assert f['/y'].chunks == (2,) def test_to_dask_dataframe(): dd = pytest.importorskip('dask.dataframe') a = da.ones((4,), chunks=(2,)) d = a.to_dask_dataframe() assert isinstance(d, dd.Series) a = da.ones((4, 4), chunks=(2, 2)) d = a.to_dask_dataframe() assert isinstance(d, dd.DataFrame) def test_np_array_with_zero_dimensions(): d = da.ones((4, 4), chunks=(2, 2)) assert_eq(np.array(d.sum()), np.array(d.compute().sum())) def test_dtype_complex(): x = np.arange(24).reshape((4, 6)).astype('f4') y = np.arange(24).reshape((4, 6)).astype('i8') z = np.arange(24).reshape((4, 6)).astype('i2') a = da.from_array(x, chunks=(2, 3)) b = da.from_array(y, chunks=(2, 3)) c = da.from_array(z, chunks=(2, 3)) def assert_eq(a, b): return (isinstance(a, np.dtype) and isinstance(b, np.dtype) and str(a) == str(b)) assert_eq(a.dtype, x.dtype) assert_eq(b.dtype, y.dtype) assert_eq((a + 1).dtype, (x + 1).dtype) assert_eq((a + b).dtype, (x + y).dtype) assert_eq(a.T.dtype, x.T.dtype) assert_eq(a[:3].dtype, x[:3].dtype) assert_eq((a.dot(b.T)).dtype, (x.dot(y.T)).dtype) assert_eq(stack([a, b]).dtype, np.vstack([x, y]).dtype) assert_eq(concatenate([a, b]).dtype, np.concatenate([x, y]).dtype) assert_eq(b.std().dtype, y.std().dtype) assert_eq(c.sum().dtype, z.sum().dtype) assert_eq(a.min().dtype, a.min().dtype) assert_eq(b.std().dtype, b.std().dtype) assert_eq(a.argmin(axis=0).dtype, a.argmin(axis=0).dtype) assert_eq(da.sin(c).dtype, np.sin(z).dtype) assert_eq(da.exp(b).dtype, np.exp(y).dtype) assert_eq(da.floor(a).dtype, np.floor(x).dtype) assert_eq(da.isnan(b).dtype, np.isnan(y).dtype) with ignoring(ImportError): assert da.isnull(b).dtype == 'bool' assert da.notnull(b).dtype == 'bool' x = np.array([('a', 1)], dtype=[('text', 'S1'), ('numbers', 'i4')]) d = da.from_array(x, chunks=(1,)) assert_eq(d['text'].dtype, x['text'].dtype) assert_eq(d[['numbers', 'text']].dtype, x[['numbers', 'text']].dtype) def test_astype(): x = np.ones((5, 5), dtype='f8') d = da.from_array(x, chunks=(2,2)) assert d.astype('i8').dtype == 'i8' assert_eq(d.astype('i8'), x.astype('i8')) assert same_keys(d.astype('i8'), d.astype('i8')) with pytest.raises(TypeError): d.astype('i8', casting='safe') with pytest.raises(TypeError): d.astype('i8', not_a_real_kwarg='foo') # smoketest with kwargs assert_eq(d.astype('i8', copy=False), x.astype('i8', copy=False)) # Check it's a noop assert d.astype('f8') is d def test_arithmetic(): x = np.arange(5).astype('f4') + 2 y = np.arange(5).astype('i8') + 2 z = np.arange(5).astype('i4') + 2 a = da.from_array(x, chunks=(2,)) b = da.from_array(y, chunks=(2,)) c = da.from_array(z, chunks=(2,)) assert_eq(a + b, x + y) assert_eq(a * b, x * y) assert_eq(a - b, x - y) assert_eq(a / b, x / y) assert_eq(b & b, y & y) assert_eq(b | b, y | y) assert_eq(b ^ b, y ^ y) assert_eq(a // b, x // y) assert_eq(a ** b, x ** y) assert_eq(a % b, x % y) assert_eq(a > b, x > y) assert_eq(a < b, x < y) assert_eq(a >= b, x >= y) assert_eq(a <= b, x <= y) assert_eq(a == b, x == y) assert_eq(a != b, x != y) assert_eq(a + 2, x + 2) assert_eq(a * 2, x * 2) assert_eq(a - 2, x - 2) assert_eq(a / 2, x / 2) assert_eq(b & True, y & True) assert_eq(b | True, y | True) assert_eq(b ^ True, y ^ True) assert_eq(a // 2, x // 2) assert_eq(a ** 2, x ** 2) assert_eq(a % 2, x % 2) assert_eq(a > 2, x > 2) assert_eq(a < 2, x < 2) assert_eq(a >= 2, x >= 2) assert_eq(a <= 2, x <= 2) assert_eq(a == 2, x == 2) assert_eq(a != 2, x != 2) assert_eq(2 + b, 2 + y) assert_eq(2 * b, 2 * y) assert_eq(2 - b, 2 - y) assert_eq(2 / b, 2 / y) assert_eq(True & b, True & y) assert_eq(True | b, True | y) assert_eq(True ^ b, True ^ y) assert_eq(2 // b, 2 // y) assert_eq(2 ** b, 2 ** y) assert_eq(2 % b, 2 % y) assert_eq(2 > b, 2 > y) assert_eq(2 < b, 2 < y) assert_eq(2 >= b, 2 >= y) assert_eq(2 <= b, 2 <= y) assert_eq(2 == b, 2 == y) assert_eq(2 != b, 2 != y) assert_eq(-a, -x) assert_eq(abs(a), abs(x)) assert_eq(~(a == b), ~(x == y)) assert_eq(~(a == b), ~(x == y)) assert_eq(da.logaddexp(a, b), np.logaddexp(x, y)) assert_eq(da.logaddexp2(a, b), np.logaddexp2(x, y)) with pytest.warns(None): # Overflow warning assert_eq(da.exp(b), np.exp(y)) assert_eq(da.log(a), np.log(x)) assert_eq(da.log10(a), np.log10(x)) assert_eq(da.log1p(a), np.log1p(x)) with pytest.warns(None): # Overflow warning assert_eq(da.expm1(b), np.expm1(y)) assert_eq(da.sqrt(a), np.sqrt(x)) assert_eq(da.square(a), np.square(x)) assert_eq(da.sin(a), np.sin(x)) assert_eq(da.cos(b), np.cos(y)) assert_eq(da.tan(a), np.tan(x)) assert_eq(da.arcsin(b / 10), np.arcsin(y / 10)) assert_eq(da.arccos(b / 10), np.arccos(y / 10)) assert_eq(da.arctan(b / 10), np.arctan(y / 10)) assert_eq(da.arctan2(b * 10, a), np.arctan2(y * 10, x)) assert_eq(da.hypot(b, a), np.hypot(y, x)) assert_eq(da.sinh(a), np.sinh(x)) with pytest.warns(None): # Overflow warning assert_eq(da.cosh(b), np.cosh(y)) assert_eq(da.tanh(a), np.tanh(x)) assert_eq(da.arcsinh(b * 10), np.arcsinh(y * 10)) assert_eq(da.arccosh(b * 10), np.arccosh(y * 10)) assert_eq(da.arctanh(b / 10), np.arctanh(y / 10)) assert_eq(da.deg2rad(a), np.deg2rad(x)) assert_eq(da.rad2deg(a), np.rad2deg(x)) assert_eq(da.logical_and(a < 1, b < 4), np.logical_and(x < 1, y < 4)) assert_eq(da.logical_or(a < 1, b < 4), np.logical_or(x < 1, y < 4)) assert_eq(da.logical_xor(a < 1, b < 4), np.logical_xor(x < 1, y < 4)) assert_eq(da.logical_not(a < 1), np.logical_not(x < 1)) assert_eq(da.maximum(a, 5 - a), np.maximum(a, 5 - a)) assert_eq(da.minimum(a, 5 - a), np.minimum(a, 5 - a)) assert_eq(da.fmax(a, 5 - a), np.fmax(a, 5 - a)) assert_eq(da.fmin(a, 5 - a), np.fmin(a, 5 - a)) assert_eq(da.isreal(a + 1j * b), np.isreal(x + 1j * y)) assert_eq(da.iscomplex(a + 1j * b), np.iscomplex(x + 1j * y)) assert_eq(da.isfinite(a), np.isfinite(x)) assert_eq(da.isinf(a), np.isinf(x)) assert_eq(da.isnan(a), np.isnan(x)) assert_eq(da.signbit(a - 3), np.signbit(x - 3)) assert_eq(da.copysign(a - 3, b), np.copysign(x - 3, y)) assert_eq(da.nextafter(a - 3, b), np.nextafter(x - 3, y)) with pytest.warns(None): # overflow warning assert_eq(da.ldexp(c, c), np.ldexp(z, z)) assert_eq(da.fmod(a * 12, b), np.fmod(x * 12, y)) assert_eq(da.floor(a * 0.5), np.floor(x * 0.5)) assert_eq(da.ceil(a), np.ceil(x)) assert_eq(da.trunc(a / 2), np.trunc(x / 2)) assert_eq(da.degrees(b), np.degrees(y)) assert_eq(da.radians(a), np.radians(x)) assert_eq(da.rint(a + 0.3), np.rint(x + 0.3)) assert_eq(da.fix(a - 2.5), np.fix(x - 2.5)) assert_eq(da.angle(a + 1j), np.angle(x + 1j)) assert_eq(da.real(a + 1j), np.real(x + 1j)) assert_eq((a + 1j).real, np.real(x + 1j)) assert_eq(da.imag(a + 1j), np.imag(x + 1j)) assert_eq((a + 1j).imag, np.imag(x + 1j)) assert_eq(da.conj(a + 1j * b), np.conj(x + 1j * y)) assert_eq((a + 1j * b).conj(), (x + 1j * y).conj()) assert_eq(da.clip(b, 1, 4), np.clip(y, 1, 4)) assert_eq(b.clip(1, 4), y.clip(1, 4)) assert_eq(da.fabs(b), np.fabs(y)) assert_eq(da.sign(b - 2), np.sign(y - 2)) assert_eq(da.absolute(b - 2), np.absolute(y - 2)) assert_eq(da.absolute(b - 2 + 1j), np.absolute(y - 2 + 1j)) l1, l2 = da.frexp(a) r1, r2 = np.frexp(x) assert_eq(l1, r1) assert_eq(l2, r2) l1, l2 = da.modf(a) r1, r2 = np.modf(x) assert_eq(l1, r1) assert_eq(l2, r2) assert_eq(da.around(a, -1), np.around(x, -1)) def test_elemwise_consistent_names(): a = da.from_array(np.arange(5, dtype='f4'), chunks=(2,)) b = da.from_array(np.arange(5, dtype='f4'), chunks=(2,)) assert same_keys(a + b, a + b) assert same_keys(a + 2, a + 2) assert same_keys(da.exp(a), da.exp(a)) assert same_keys(da.exp(a, dtype='f8'), da.exp(a, dtype='f8')) assert same_keys(da.maximum(a, b), da.maximum(a, b)) def test_optimize(): x = np.arange(5).astype('f4') a = da.from_array(x, chunks=(2,)) expr = a[1:4] + 1 result = optimize(expr.dask, expr.__dask_keys__()) assert isinstance(result, dict) assert all(key in result for key in expr.__dask_keys__()) def test_slicing_with_non_ndarrays(): class ARangeSlice(object): def __init__(self, start, stop): self.start = start self.stop = stop def __array__(self): return np.arange(self.start, self.stop) class ARangeSlicable(object): dtype = np.dtype('i8') def __init__(self, n): self.n = n @property def shape(self): return (self.n,) def __getitem__(self, key): return ARangeSlice(key[0].start, key[0].stop) x = da.from_array(ARangeSlicable(10), chunks=(4,)) assert_eq((x + 1).sum(), (np.arange(10, dtype=x.dtype) + 1).sum()) def test_getter(): with warnings.catch_warnings(record=True): assert type(getter(np.matrix([[1]]), 0)) is np.ndarray assert type(getter(np.matrix([[1]]), 0, asarray=False)) is np.matrix assert_eq(getter([1, 2, 3, 4, 5], slice(1, 4)), np.array([2, 3, 4])) assert_eq(getter(np.arange(5), (None, slice(None, None))), np.arange(5)[None, :]) def test_size(): x = da.ones((10, 2), chunks=(3, 1)) assert x.size == np.array(x).size assert isinstance(x.size, int) def test_nbytes(): x = da.ones((10, 2), chunks=(3, 1)) assert x.nbytes == np.array(x).nbytes def test_itemsize(): x = da.ones((10, 2), chunks=(3, 1)) assert x.itemsize == 8 def test_Array_normalizes_dtype(): x = da.ones((3,), chunks=(1,), dtype=int) assert isinstance(x.dtype, np.dtype) def test_from_array_with_lock(): x = np.arange(10) d = da.from_array(x, chunks=5, lock=True) tasks = [v for k, v in d.dask.items() if k[0] == d.name] assert hasattr(tasks[0][4], 'acquire') assert len(set(task[4] for task in tasks)) == 1 assert_eq(d, x) lock = Lock() e = da.from_array(x, chunks=5, lock=lock) f = da.from_array(x, chunks=5, lock=lock) assert_eq(e + f, x + x) class MyArray(object): def __init__(self, x): self.x = x self.dtype = x.dtype self.shape = x.shape self.ndim = len(x.shape) def __getitem__(self, i): return self.x[i] @pytest.mark.parametrize('x,chunks', [ (np.arange(25).reshape((5, 5)), (5, 5)), (np.arange(25).reshape((5, 5)), -1), (np.array([[1]]), 1), (np.array(1), 1), ]) def test_from_array_tasks_always_call_getter(x, chunks): dx = da.from_array(MyArray(x), chunks=chunks, asarray=False) assert_eq(x, dx) def test_from_array_ndarray_onechunk(): """ndarray with a single chunk produces a minimal single key dict """ x = np.array([[1, 2], [3, 4]]) dx = da.from_array(x, chunks=-1) assert_eq(x, dx) assert len(dx.dask) == 1 assert dx.dask[dx.name, 0, 0] is x def test_from_array_ndarray_getitem(): """For ndarray, don't use getter / getter_nofancy; use the cleaner operator.getitem""" x = np.array([[1, 2], [3, 4]]) dx = da.from_array(x, chunks=(1, 2)) assert_eq(x, dx) assert dx.dask[dx.name, 0, 0][0] == operator.getitem @pytest.mark.parametrize( 'x', [[1, 2], (1, 2), memoryview(b'abc')] + ([buffer(b'abc')] if PY2 else [])) # noqa: F821 def test_from_array_list(x): """Lists, tuples, and memoryviews are automatically converted to ndarray """ dx = da.from_array(x, chunks=-1) assert_eq(np.array(x), dx) assert isinstance(dx.dask[dx.name, 0], np.ndarray) dx = da.from_array(x, chunks=1) assert_eq(np.array(x), dx) assert dx.dask[dx.name, 0][0] == operator.getitem assert isinstance(dx.dask[dx.name.replace('array', 'array-original')], np.ndarray) @pytest.mark.parametrize( 'type_', [t for t in np.ScalarType if t not in [memoryview] + ([buffer] if PY2 else [])]) # noqa: F821 def test_from_array_scalar(type_): """Python and numpy scalars are automatically converted to ndarray """ if type_ == np.datetime64: x = np.datetime64('2000-01-01') else: x = type_(1) dx = da.from_array(x, chunks=-1) assert_eq(np.array(x), dx) assert isinstance(dx.dask[dx.name, ], np.ndarray) @pytest.mark.parametrize('asarray,cls', [ (True, np.ndarray), (False, np.matrix), ]) def test_from_array_no_asarray(asarray, cls): def assert_chunks_are_of_type(x): chunks = compute_as_if_collection(Array, x.dask, x.__dask_keys__()) for c in concat(chunks): assert type(c) is cls with warnings.catch_warnings(record=True): x = np.matrix(np.arange(100).reshape((10, 10))) dx = da.from_array(x, chunks=(5, 5), asarray=asarray) assert_chunks_are_of_type(dx) assert_chunks_are_of_type(dx[0:5]) assert_chunks_are_of_type(dx[0:5][:, 0]) def test_from_array_getitem(): x = np.arange(10) def my_getitem(x, ind): return x[ind] y = da.from_array(x, chunks=(5,), getitem=my_getitem) for k, v in y.dask.items(): if isinstance(v, tuple): assert v[0] is my_getitem assert_eq(x, y) def test_from_array_minus_one(): x = np.arange(10) y = da.from_array(x, -1) assert y.chunks == ((10,),) assert_eq(x, y) def test_from_array_copy(): # Regression test for https://github.com/dask/dask/issues/3751 x = np.arange(10) y = da.from_array(x, -1) assert y.npartitions == 1 y_c = y.copy() assert y is not y_c assert y.compute() is not y_c.compute() def test_asarray(): assert_eq(da.asarray([1, 2, 3]), np.asarray([1, 2, 3])) x = da.asarray([1, 2, 3]) assert da.asarray(x) is x def test_asarray_dask_dataframe(): # https://github.com/dask/dask/issues/3885 dd = pytest.importorskip('dask.dataframe') import pandas as pd s = dd.from_pandas(pd.Series([1, 2, 3, 4]), 2) result = da.asarray(s) expected = s.values assert_eq(result, expected) df = s.to_frame(name='s') result = da.asarray(df) expected = df.values assert_eq(result, expected) def test_asarray_h5py(): h5py = pytest.importorskip('h5py') with tmpfile('.hdf5') as fn: with h5py.File(fn) as f: d = f.create_dataset('/x', shape=(2, 2), dtype=float) x = da.asarray(d) assert d in x.dask.values() assert not any(isinstance(v, np.ndarray) for v in x.dask.values()) def test_asanyarray(): with warnings.catch_warnings(record=True): x = np.matrix([1, 2, 3]) dx = da.asanyarray(x) assert dx.numblocks == (1, 1) chunks = compute_as_if_collection(Array, dx.dask, dx.__dask_keys__()) assert isinstance(chunks[0][0], np.matrix) assert da.asanyarray(dx) is dx def test_asanyarray_dataframe(): pd = pytest.importorskip('pandas') dd = pytest.importorskip('dask.dataframe') df = pd.DataFrame({'x': [1, 2, 3]}) ddf = dd.from_pandas(df, npartitions=2) x = np.asanyarray(df) dx = da.asanyarray(ddf) assert isinstance(dx, da.Array) assert_eq(x, dx) x = np.asanyarray(df.x) dx = da.asanyarray(ddf.x) assert isinstance(dx, da.Array) assert_eq(x, dx) def test_from_func(): x = np.arange(10) f = lambda n: n * x d = from_func(f, (10,), x.dtype, kwargs={'n': 2}) assert d.shape == x.shape assert d.dtype == x.dtype assert_eq(d.compute(), 2 * x) assert same_keys(d, from_func(f, (10,), x.dtype, kwargs={'n': 2})) def test_concatenate3_2(): x = np.array([1, 2]) assert_eq(concatenate3([x, x, x]), np.array([1, 2, 1, 2, 1, 2])) x = np.array([[1, 2]]) assert (concatenate3([[x, x, x], [x, x, x]]) == np.array([[1, 2, 1, 2, 1, 2], [1, 2, 1, 2, 1, 2]])).all() assert (concatenate3([[x, x], [x, x], [x, x]]) == np.array([[1, 2, 1, 2], [1, 2, 1, 2], [1, 2, 1, 2]])).all() x = np.arange(12).reshape((2, 2, 3)) assert_eq(concatenate3([[[x, x, x], [x, x, x]], [[x, x, x], [x, x, x]]]), np.array([[[ 0, 1, 2, 0, 1, 2, 0, 1, 2], [ 3, 4, 5, 3, 4, 5, 3, 4, 5], [ 0, 1, 2, 0, 1, 2, 0, 1, 2], [ 3, 4, 5, 3, 4, 5, 3, 4, 5]], [[ 6, 7, 8, 6, 7, 8, 6, 7, 8], [ 9, 10, 11, 9, 10, 11, 9, 10, 11], [ 6, 7, 8, 6, 7, 8, 6, 7, 8], [ 9, 10, 11, 9, 10, 11, 9, 10, 11]], [[ 0, 1, 2, 0, 1, 2, 0, 1, 2], [ 3, 4, 5, 3, 4, 5, 3, 4, 5], [ 0, 1, 2, 0, 1, 2, 0, 1, 2], [ 3, 4, 5, 3, 4, 5, 3, 4, 5]], [[ 6, 7, 8, 6, 7, 8, 6, 7, 8], [ 9, 10, 11, 9, 10, 11, 9, 10, 11], [ 6, 7, 8, 6, 7, 8, 6, 7, 8], [ 9, 10, 11, 9, 10, 11, 9, 10, 11]]])) def test_map_blocks3(): x = np.arange(10) y = np.arange(10) * 2 d = da.from_array(x, chunks=5) e = da.from_array(y, chunks=5) assert_eq(da.core.map_blocks(lambda a, b: a + 2 * b, d, e, dtype=d.dtype), x + 2 * y) z = np.arange(100).reshape((10, 10)) f = da.from_array(z, chunks=5) func = lambda a, b: a + 2 * b res = da.core.map_blocks(func, d, f, dtype=d.dtype) assert_eq(res, x + 2 * z) assert same_keys(da.core.map_blocks(func, d, f, dtype=d.dtype), res) assert_eq(da.map_blocks(func, f, d, dtype=d.dtype), z + 2 * x) def test_from_array_with_missing_chunks(): x = np.random.randn(2, 4, 3) d = da.from_array(x, chunks=(None, 2, None)) assert d.chunks == da.from_array(x, chunks=(2, 2, 3)).chunks def test_normalize_chunks(): assert normalize_chunks(3, (4, 6)) == ((3, 1), (3, 3)) assert normalize_chunks(((3, 3), (8,)), (6, 8)) == ((3, 3), (8, )) assert normalize_chunks((4, 5), (9,)) == ((4, 5), ) assert normalize_chunks((4, 5), (9, 9)) == ((4, 4, 1), (5, 4)) assert normalize_chunks(-1, (5, 5)) == ((5,), (5, )) assert normalize_chunks((3, -1), (5, 5)) == ((3, 2), (5, )) assert normalize_chunks({0: 3}, (5, 5)) == ((3, 2), (5,)) assert normalize_chunks([[2, 2], [3, 3]]) == ((2, 2), (3, 3)) assert normalize_chunks(10, (30, 5)), ((10, 10, 10), (5,)) assert normalize_chunks((), (0, 0)), ((0,), (0,)) assert normalize_chunks(-1, (0, 3)), ((0,), (3,)) assert normalize_chunks("auto", shape=(20,), limit=5, dtype='uint8') == \ ((5, 5, 5, 5),) with pytest.raises(ValueError): normalize_chunks(((10,), ), (11, )) with pytest.raises(ValueError): normalize_chunks(((5, ), (5, )), (5, )) def test_align_chunks_to_previous_chunks(): chunks = normalize_chunks('auto', shape=(2000,), previous_chunks=(512,), limit='600 B', dtype=np.uint8) assert chunks == ((512, 512, 512, 2000 - 512 * 3),) chunks = normalize_chunks('auto', shape=(2000,), previous_chunks=(128,), limit='600 B', dtype=np.uint8) assert chunks == ((512, 512, 512, 2000 - 512 * 3),) chunks = normalize_chunks('auto', shape=(2000,), previous_chunks=(512,), limit='1200 B', dtype=np.uint8) assert chunks == ((1024, 2000 - 1024),) chunks = normalize_chunks('auto', shape=(3, 10211, 10376), previous_chunks=(1, 512, 512), limit='1MiB', dtype=np.float32) assert chunks[0] == (1, 1, 1) assert all(c % 512 == 0 for c in chunks[1][:-1]) assert all(c % 512 == 0 for c in chunks[2][:-1]) def test_raise_on_no_chunks(): x = da.ones(6, chunks=3) try: Array(x.dask, x.name, chunks=None, dtype=x.dtype, shape=None) assert False except ValueError as e: assert "dask" in str(e) assert ".org" in str(e) def test_chunks_is_immutable(): x = da.ones(6, chunks=3) try: x.chunks = 2 assert False except TypeError as e: assert 'rechunk(2)' in str(e) def test_raise_on_bad_kwargs(): x = da.ones(5, chunks=3) try: da.minimum(x, foo=None) except TypeError as e: assert 'minimum' in str(e) assert 'foo' in str(e) def test_long_slice(): x = np.arange(10000) d = da.from_array(x, chunks=1) assert_eq(d[8000:8200], x[8000:8200]) def test_h5py_newaxis(): h5py = pytest.importorskip('h5py') with tmpfile('h5') as fn: with h5py.File(fn) as f: x = f.create_dataset('/x', shape=(10, 10), dtype='f8') d = da.from_array(x, chunks=(5, 5)) assert d[None, :, :].compute(scheduler='sync').shape == (1, 10, 10) assert d[:, None, :].compute(scheduler='sync').shape == (10, 1, 10) assert d[:, :, None].compute(scheduler='sync').shape == (10, 10, 1) assert same_keys(d[:, :, None], d[:, :, None]) def test_ellipsis_slicing(): assert_eq(da.ones(4, chunks=2)[...], np.ones(4)) def test_point_slicing(): x = np.arange(56).reshape((7, 8)) d = da.from_array(x, chunks=(3, 4)) result = d.vindex[[1, 2, 5, 5], [3, 1, 6, 1]] assert_eq(result, x[[1, 2, 5, 5], [3, 1, 6, 1]]) result = d.vindex[[0, 1, 6, 0], [0, 1, 0, 7]] assert_eq(result, x[[0, 1, 6, 0], [0, 1, 0, 7]]) assert same_keys(result, d.vindex[[0, 1, 6, 0], [0, 1, 0, 7]]) def test_point_slicing_with_full_slice(): from dask.array.core import _vindex_transpose, _get_axis x = np.arange(4 * 5 * 6 * 7).reshape((4, 5, 6, 7)) d = da.from_array(x, chunks=(2, 3, 3, 4)) inds = [[[1, 2, 3], None, [3, 2, 1], [5, 3, 4]], [[1, 2, 3], None, [4, 3, 2], None], [[1, 2, 3], [3, 2, 1]], [[1, 2, 3], [3, 2, 1], [3, 2, 1], [5, 3, 4]], [[], [], [], None], [np.array([1, 2, 3]), None, np.array([4, 3, 2]), None], [None, None, [1, 2, 3], [4, 3, 2]], [None, [0, 2, 3], None, [0, 3, 2]]] for ind in inds: slc = [i if isinstance(i, (np.ndarray, list)) else slice(None, None) for i in ind] result = d.vindex[tuple(slc)] # Rotate the expected result accordingly axis = _get_axis(ind) expected = _vindex_transpose(x[tuple(slc)], axis) assert_eq(result, expected) # Always have the first axis be the length of the points k = len(next(i for i in ind if isinstance(i, (np.ndarray, list)))) assert result.shape[0] == k def test_slice_with_floats(): d = da.ones((5,), chunks=(3,)) with pytest.raises(IndexError): d[1.5] with pytest.raises(IndexError): d[0:1.5] with pytest.raises(IndexError): d[[1, 1.5]] def test_slice_with_integer_types(): x = np.arange(10) dx = da.from_array(x, chunks=5) inds = np.array([0, 3, 6], dtype='u8') assert_eq(dx[inds], x[inds]) assert_eq(dx[inds.astype('u4')], x[inds.astype('u4')]) inds = np.array([0, 3, 6], dtype=np.int64) assert_eq(dx[inds], x[inds]) assert_eq(dx[inds.astype('u4')], x[inds.astype('u4')]) def test_index_with_integer_types(): x = np.arange(10) dx = da.from_array(x, chunks=5) inds = int(3) assert_eq(dx[inds], x[inds]) inds = np.int64(3) assert_eq(dx[inds], x[inds]) def test_vindex_basic(): x = np.arange(56).reshape((7, 8)) d = da.from_array(x, chunks=(3, 4)) # cases where basic and advanced indexing coincide result = d.vindex[0] assert_eq(result, x[0]) result = d.vindex[0, 1] assert_eq(result, x[0, 1]) result = d.vindex[[0, 1], ::-1] # slices last assert_eq(result, x[:2, ::-1]) def test_vindex_nd(): x = np.arange(56).reshape((7, 8)) d = da.from_array(x, chunks=(3, 4)) result = d.vindex[[[0, 1], [6, 0]], [[0, 1], [0, 7]]] assert_eq(result, x[[[0, 1], [6, 0]], [[0, 1], [0, 7]]]) result = d.vindex[np.arange(7)[:, None], np.arange(8)[None, :]] assert_eq(result, x) result = d.vindex[np.arange(7)[None, :], np.arange(8)[:, None]] assert_eq(result, x.T) def test_vindex_negative(): x = np.arange(10) d = da.from_array(x, chunks=(5, 5)) result = d.vindex[np.array([0, -1])] assert_eq(result, x[np.array([0, -1])]) def test_vindex_errors(): d = da.ones((5, 5, 5), chunks=(3, 3, 3)) pytest.raises(IndexError, lambda: d.vindex[np.newaxis]) pytest.raises(IndexError, lambda: d.vindex[[1, 2], [1, 2, 3]]) pytest.raises(IndexError, lambda: d.vindex[[True] * 5]) pytest.raises(IndexError, lambda: d.vindex[[0], [5]]) pytest.raises(IndexError, lambda: d.vindex[[0], [-6]]) def test_vindex_merge(): from dask.array.core import _vindex_merge locations = [1], [2, 0] values = [np.array([[1, 2, 3]]), np.array([[10, 20, 30], [40, 50, 60]])] assert (_vindex_merge(locations, values) == np.array([[40, 50, 60], [1, 2, 3], [10, 20, 30]])).all() def test_vindex_identity(): rng = da.random.RandomState(42) a, b = 10, 20 x = rng.random(a, chunks=a // 2) assert x is x.vindex[:] assert x is x.vindex[:a] pytest.raises(IndexError, lambda: x.vindex[:a - 1]) pytest.raises(IndexError, lambda: x.vindex[1:]) pytest.raises(IndexError, lambda: x.vindex[0:a:2]) x = rng.random((a, b), chunks=(a // 2, b // 2)) assert x is x.vindex[:, :] assert x is x.vindex[:a, :b] pytest.raises(IndexError, lambda: x.vindex[:, :b - 1]) pytest.raises(IndexError, lambda: x.vindex[:, 1:]) pytest.raises(IndexError, lambda: x.vindex[:, 0:b:2]) def test_empty_array(): assert_eq(np.arange(0), da.arange(0, chunks=5)) def test_memmap(): with tmpfile('npy') as fn_1: with tmpfile('npy') as fn_2: try: x = da.arange(100, chunks=15) target = np.memmap(fn_1, shape=x.shape, mode='w+', dtype=x.dtype) x.store(target) assert_eq(target, x) np.save(fn_2, target) assert_eq(np.load(fn_2, mmap_mode='r'), x) finally: target._mmap.close() def test_to_npy_stack(): x = np.arange(5 * 10 * 10).reshape((5, 10, 10)) d = da.from_array(x, chunks=(2, 4, 4)) with tmpdir() as dirname: stackdir = os.path.join(dirname, 'test') da.to_npy_stack(stackdir, d, axis=0) assert os.path.exists(os.path.join(stackdir, '0.npy')) assert (np.load(os.path.join(stackdir, '1.npy')) == x[2:4]).all() e = da.from_npy_stack(stackdir) assert_eq(d, e) def test_view(): x = np.arange(56).reshape((7, 8)) d = da.from_array(x, chunks=(2, 3)) assert_eq(x.view('i4'), d.view('i4')) assert_eq(x.view('i2'), d.view('i2')) assert all(isinstance(s, int) for s in d.shape) x = np.arange(8, dtype='i1') d = da.from_array(x, chunks=(4,)) assert_eq(x.view('i4'), d.view('i4')) with pytest.raises(ValueError): x = np.arange(8, dtype='i1') d = da.from_array(x, chunks=(3,)) d.view('i4') with pytest.raises(ValueError): d.view('i4', order='asdf') def test_view_fortran(): x = np.asfortranarray(np.arange(64).reshape((8, 8))) d = da.from_array(x, chunks=(2, 3)) assert_eq(x.T.view('i4').T, d.view('i4', order='F')) assert_eq(x.T.view('i2').T, d.view('i2', order='F')) def test_h5py_tokenize(): h5py = pytest.importorskip('h5py') with tmpfile('hdf5') as fn1: with tmpfile('hdf5') as fn2: f = h5py.File(fn1) g = h5py.File(fn2) f['x'] = np.arange(10).astype(float) g['x'] = np.ones(10).astype(float) x1 = f['x'] x2 = g['x'] assert tokenize(x1) != tokenize(x2) def test_map_blocks_with_changed_dimension(): x = np.arange(56).reshape((7, 8)) d = da.from_array(x, chunks=(7, 4)) e = d.map_blocks(lambda b: b.sum(axis=0), chunks=(4,), drop_axis=0, dtype=d.dtype) assert e.chunks == ((4, 4),) assert_eq(e, x.sum(axis=0)) # Provided chunks have wrong shape with pytest.raises(ValueError): d.map_blocks(lambda b: b.sum(axis=0), chunks=(7, 4), drop_axis=0) with pytest.raises(ValueError): d.map_blocks(lambda b: b.sum(axis=0), chunks=((4, 4, 4),), drop_axis=0) d = da.from_array(x, chunks=(4, 8)) e = d.map_blocks(lambda b: b.sum(axis=1), drop_axis=1, dtype=d.dtype) assert e.chunks == ((4, 3),) assert_eq(e, x.sum(axis=1)) x = np.arange(64).reshape((8, 8)) d = da.from_array(x, chunks=(4, 4)) e = d.map_blocks(lambda b: b[None, :, :, None], chunks=(1, 4, 4, 1), new_axis=[0, 3], dtype=d.dtype) assert e.chunks == ((1,), (4, 4), (4, 4), (1,)) assert_eq(e, x[None, :, :, None]) e = d.map_blocks(lambda b: b[None, :, :, None], new_axis=[0, 3], dtype=d.dtype) assert e.chunks == ((1,), (4, 4), (4, 4), (1,)) assert_eq(e, x[None, :, :, None]) # Adding axis with a gap with pytest.raises(ValueError): d.map_blocks(lambda b: b, new_axis=(3, 4)) # Both new_axis and drop_axis d = da.from_array(x, chunks=(8, 4)) e = d.map_blocks(lambda b: b.sum(axis=0)[:, None, None], drop_axis=0, new_axis=(1, 2), dtype=d.dtype) assert e.chunks == ((4, 4), (1,), (1,)) assert_eq(e, x.sum(axis=0)[:, None, None]) d = da.from_array(x, chunks=(4, 8)) e = d.map_blocks(lambda b: b.sum(axis=1)[:, None, None], drop_axis=1, new_axis=(1, 2), dtype=d.dtype) assert e.chunks == ((4, 4), (1,), (1,)) assert_eq(e, x.sum(axis=1)[:, None, None]) def test_map_blocks_with_changed_dimension_and_broadcast_chunks(): # https://github.com/dask/dask/issues/4299 a = da.from_array([1, 2, 3], 3) b = da.from_array(np.array([0, 1, 2, 0, 1, 2]), chunks=3) result = da.map_blocks(operator.add, a, b, chunks=b.chunks) expected = da.from_array(np.array([1, 3, 5, 1, 3, 5]), chunks=3) assert_eq(result, expected) def test_broadcast_chunks(): assert broadcast_chunks() == () assert broadcast_chunks(((2, 3),)) == ((2, 3),) assert broadcast_chunks(((5, 5),), ((5, 5),)) == ((5, 5),) a = ((10, 10, 10), (5, 5),) b = ((5, 5),) assert broadcast_chunks(a, b) == ((10, 10, 10), (5, 5),) assert broadcast_chunks(b, a) == ((10, 10, 10), (5, 5),) a = ((10, 10, 10), (5, 5),) b = ((1,), (5, 5),) assert broadcast_chunks(a, b) == ((10, 10, 10), (5, 5),) a = ((10, 10, 10), (5, 5),) b = ((3, 3,), (5, 5),) with pytest.raises(ValueError): broadcast_chunks(a, b) a = ((1,), (5, 5),) b = ((1,), (5, 5),) assert broadcast_chunks(a, b) == a a = ((1,), (np.nan, np.nan, np.nan),) b = ((3, 3), (1,),) r = broadcast_chunks(a, b) assert r[0] == b[0] and np.allclose(r[1], a[1], equal_nan=True) a = ((3, 3), (1,),) b = ((1,), (np.nan, np.nan, np.nan),) r = broadcast_chunks(a, b) assert r[0] == a[0] and np.allclose(r[1], b[1], equal_nan=True) a = ((3, 3,), (5, 5),) b = ((1,), (np.nan, np.nan, np.nan),) with pytest.raises(ValueError): broadcast_chunks(a, b) def test_chunks_error(): x = np.ones((10, 10)) with pytest.raises(ValueError): da.from_array(x, chunks=(5,)) def test_array_compute_forward_kwargs(): x = da.arange(10, chunks=2).sum() x.compute(bogus_keyword=10) def test_dont_fuse_outputs(): dsk = {('x', 0): np.array([1, 2]), ('x', 1): (inc, ('x', 0))} a = da.Array(dsk, 'x', chunks=(2,), shape=(4,), dtype=np.array([1]).dtype) assert_eq(a, np.array([1, 2, 2, 3], dtype=a.dtype)) def test_dont_dealias_outputs(): dsk = {('x', 0, 0): np.ones((2, 2)), ('x', 0, 1): np.ones((2, 2)), ('x', 1, 0): np.ones((2, 2)), ('x', 1, 1): ('x', 0, 0)} a = da.Array(dsk, 'x', chunks=(2, 2), shape=(4, 4), dtype=np.ones(1).dtype) assert_eq(a, np.ones((4, 4))) def test_timedelta_op(): x = np.array([np.timedelta64(10, 'h')]) y = np.timedelta64(1, 'h') a = da.from_array(x, chunks=(1,)) / y assert a.compute() == x / y def test_to_delayed(): x = da.random.random((4, 4), chunks=(2, 2)) y = x + 10 [[a, b], [c, d]] = y.to_delayed() assert_eq(a.compute(), y[:2, :2]) s = 2 x = da.from_array(np.array(s), chunks=0) a = x.to_delayed()[tuple()] assert a.compute() == s def test_to_delayed_optimize_graph(): x = da.ones((4, 4), chunks=(2, 2)) y = x[1:][1:][1:][:, 1:][:, 1:][:, 1:] # optimizations d = y.to_delayed().flatten().tolist()[0] assert len([k for k in d.dask if k[0].startswith('getitem')]) == 1 # no optimizations d2 = y.to_delayed(optimize_graph=False).flatten().tolist()[0] assert dict(d2.dask) == dict(y.dask) assert (d.compute() == d2.compute()).all() def test_cumulative(): x = da.arange(20, chunks=5) assert_eq(x.cumsum(axis=0), np.arange(20).cumsum()) assert_eq(x.cumprod(axis=0), np.arange(20).cumprod()) assert_eq(da.nancumsum(x, axis=0), nancumsum(np.arange(20))) assert_eq(da.nancumprod(x, axis=0), nancumprod(np.arange(20))) a = np.random.random((20)) rs = np.random.RandomState(0) a[rs.rand(*a.shape) < 0.5] = np.nan x = da.from_array(a, chunks=5) assert_eq(da.nancumsum(x, axis=0), nancumsum(a)) assert_eq(da.nancumprod(x, axis=0), nancumprod(a)) a = np.random.random((20, 24)) x = da.from_array(a, chunks=(6, 5)) assert_eq(x.cumsum(axis=0), a.cumsum(axis=0)) assert_eq(x.cumsum(axis=1), a.cumsum(axis=1)) assert_eq(x.cumprod(axis=0), a.cumprod(axis=0)) assert_eq(x.cumprod(axis=1), a.cumprod(axis=1)) assert_eq(da.nancumsum(x, axis=0), nancumsum(a, axis=0)) assert_eq(da.nancumsum(x, axis=1), nancumsum(a, axis=1)) assert_eq(da.nancumprod(x, axis=0), nancumprod(a, axis=0)) assert_eq(da.nancumprod(x, axis=1), nancumprod(a, axis=1)) a = np.random.random((20, 24)) rs = np.random.RandomState(0) a[rs.rand(*a.shape) < 0.5] = np.nan x = da.from_array(a, chunks=(6, 5)) assert_eq(da.nancumsum(x, axis=0), nancumsum(a, axis=0)) assert_eq(da.nancumsum(x, axis=1), nancumsum(a, axis=1)) assert_eq(da.nancumprod(x, axis=0), nancumprod(a, axis=0)) assert_eq(da.nancumprod(x, axis=1), nancumprod(a, axis=1)) a = np.random.random((20, 24, 13)) x = da.from_array(a, chunks=(6, 5, 4)) for axis in [0, 1, 2, -1, -2, -3]: assert_eq(x.cumsum(axis=axis), a.cumsum(axis=axis)) assert_eq(x.cumprod(axis=axis), a.cumprod(axis=axis)) assert_eq(da.nancumsum(x, axis=axis), nancumsum(a, axis=axis)) assert_eq(da.nancumprod(x, axis=axis), nancumprod(a, axis=axis)) a = np.random.random((20, 24, 13)) rs = np.random.RandomState(0) a[rs.rand(*a.shape) < 0.5] = np.nan x = da.from_array(a, chunks=(6, 5, 4)) for axis in [0, 1, 2, -1, -2, -3]: assert_eq(da.nancumsum(x, axis=axis), nancumsum(a, axis=axis)) assert_eq(da.nancumprod(x, axis=axis), nancumprod(a, axis=axis)) with pytest.raises(ValueError): x.cumsum(axis=3) with pytest.raises(ValueError): x.cumsum(axis=-4) def test_from_delayed(): v = delayed(np.ones)((5, 3)) x = from_delayed(v, shape=(5, 3), dtype=np.ones(0).dtype) assert isinstance(x, Array) assert_eq(x, np.ones((5, 3))) def test_A_property(): x = da.ones(5, chunks=(2,)) assert x.A is x def test_copy_mutate(): x = da.arange(5, chunks=(2,)) y = x.copy() memo = {} y2 = copy.deepcopy(x, memo=memo) x[x % 2 == 0] = -1 xx = np.arange(5) xx[xx % 2 == 0] = -1 assert_eq(x, xx) assert_eq(y, np.arange(5)) assert_eq(y2, np.arange(5)) assert memo[id(x)] is y2 def test_npartitions(): assert da.ones(5, chunks=(2,)).npartitions == 3 assert da.ones((5, 5), chunks=(2, 3)).npartitions == 6 def test_astype_gh1151(): a = np.arange(5).astype(np.int32) b = da.from_array(a, (1,)) assert_eq(a.astype(np.int16), b.astype(np.int16)) def test_elemwise_name(): assert (da.ones(5, chunks=2) + 1).name.startswith('add-') def test_map_blocks_name(): assert da.ones(5, chunks=2).map_blocks(inc).name.startswith('inc-') def test_from_array_names(): pytest.importorskip('distributed') x = np.ones(10) d = da.from_array(x, chunks=2) names = countby(key_split, d.dask) assert set(names.values()) == set([1, 5]) def test_array_picklable(): from pickle import loads, dumps a = da.arange(100, chunks=25) a2 = loads(dumps(a)) assert_eq(a, a2) def test_from_array_raises_on_bad_chunks(): x = np.ones(10) with pytest.raises(ValueError): da.from_array(x, chunks=(5, 5, 5)) # with pytest.raises(ValueError): # da.from_array(x, chunks=100) with pytest.raises(ValueError): da.from_array(x, chunks=((5, 5, 5),)) def test_concatenate_axes(): x = np.ones((2, 2, 2)) assert_eq(concatenate_axes([x, x], axes=[0]), np.ones((4, 2, 2))) assert_eq(concatenate_axes([x, x, x], axes=[0]), np.ones((6, 2, 2))) assert_eq(concatenate_axes([x, x], axes=[1]), np.ones((2, 4, 2))) assert_eq(concatenate_axes([[x, x], [x, x]], axes=[0, 1]), np.ones((4, 4, 2))) assert_eq(concatenate_axes([[x, x], [x, x]], axes=[0, 2]), np.ones((4, 2, 4))) assert_eq(concatenate_axes([[x, x, x], [x, x, x]], axes=[1, 2]), np.ones((2, 4, 6))) with pytest.raises(ValueError): concatenate_axes([[x, x], [x, x]], axes=[0]) # not all nested lists accounted for with pytest.raises(ValueError): concatenate_axes([x, x], axes=[0, 1, 2, 3]) # too many axes def test_blockwise_concatenate(): x = da.ones((4, 4, 4), chunks=(2, 2, 2)) y = da.ones((4, 4), chunks=(2, 2)) def f(a, b): assert isinstance(a, np.ndarray) assert isinstance(b, np.ndarray) assert a.shape == (2, 4, 4) assert b.shape == (4, 4) return (a + b).sum(axis=(1, 2)) z = da.blockwise(f, 'i', x, 'ijk', y, 'jk', concatenate=True, dtype=x.dtype) assert_eq(z, np.ones(4) * 32) z = da.blockwise(add, 'ij', y, 'ij', y, 'ij', concatenate=True, dtype=x.dtype) assert_eq(z, np.ones((4, 4)) * 2) def f(a, b, c): assert isinstance(a, np.ndarray) assert isinstance(b, np.ndarray) assert isinstance(c, np.ndarray) assert a.shape == (4, 2, 4) assert b.shape == (4, 4) assert c.shape == (4, 2) return np.ones(5) z = da.blockwise(f, 'j', x, 'ijk', y, 'ki', y, 'ij', concatenate=True, dtype=x.dtype) assert_eq(z, np.ones(10), check_shape=False) def test_common_blockdim(): assert common_blockdim([(5,), (5,)]) == (5,) assert common_blockdim([(5,), (2, 3,)]) == (2, 3) assert common_blockdim([(5, 5), (2, 3, 5)]) == (2, 3, 5) assert common_blockdim([(5, 5), (2, 3, 5)]) == (2, 3, 5) assert common_blockdim([(5, 2, 3), (2, 3, 5)]) == (2, 3, 2, 3) assert common_blockdim([(1, 2), (2, 1)]) == (1, 1, 1) assert common_blockdim([(1, 2, 2), (2, 1, 2), (2, 2, 1)]) == (1, 1, 1, 1, 1) def test_uneven_chunks_that_fit_neatly(): x = da.arange(10, chunks=((5, 5),)) y = da.ones(10, chunks=((5, 2, 3),)) assert_eq(x + y, np.arange(10) + np.ones(10)) z = x + y assert z.chunks == ((5, 2, 3),) def test_elemwise_uneven_chunks(): x = da.arange(10, chunks=((4, 6),)) y = da.ones(10, chunks=((6, 4),)) assert_eq(x + y, np.arange(10) + np.ones(10)) z = x + y assert z.chunks == ((4, 2, 4),) x = da.random.random((10, 10), chunks=((4, 6), (5, 2, 3))) y = da.random.random((4, 10, 10), chunks=((2, 2), (6, 4), (2, 3, 5))) z = x + y assert_eq(x + y, x.compute() + y.compute()) assert z.chunks == ((2, 2), (4, 2, 4), (2, 3, 2, 3)) def test_uneven_chunks_blockwise(): x = da.random.random((10, 10), chunks=((2, 3, 2, 3), (5, 5))) y = da.random.random((10, 10), chunks=((4, 4, 2), (4, 2, 4))) z = da.blockwise(np.dot, 'ik', x, 'ij', y, 'jk', dtype=x.dtype, concatenate=True) assert z.chunks == (x.chunks[0], y.chunks[1]) assert_eq(z, x.compute().dot(y)) def test_warn_bad_rechunking(): x = da.ones((20, 20), chunks=(20, 1)) y = da.ones((20, 20), chunks=(1, 20)) with warnings.catch_warnings(record=True) as record: x + y assert record assert '20' in record[0].message.args[0] def test_concatenate_stack_dont_warn(): with warnings.catch_warnings(record=True) as record: da.concatenate([da.ones(2, chunks=1)] * 62) assert not record with warnings.catch_warnings(record=True) as record: da.stack([da.ones(2, chunks=1)] * 62) assert not record def test_map_blocks_delayed(): x = da.ones((10, 10), chunks=(5, 5)) y = np.ones((5, 5)) z = x.map_blocks(add, y, dtype=x.dtype) yy = delayed(y) zz = x.map_blocks(add, yy, dtype=x.dtype) assert_eq(z, zz) assert yy.key in zz.dask def test_no_chunks(): X = np.arange(11) dsk = {('x', 0): np.arange(5), ('x', 1): np.arange(5, 11)} x = Array(dsk, 'x', ((np.nan, np.nan,),), np.arange(1).dtype) assert_eq(x + 1, X + 1) assert_eq(x.sum(), X.sum()) assert_eq((x + 1).std(), (X + 1).std()) assert_eq((x + x).std(), (X + X).std()) assert_eq((x + x).std(keepdims=True), (X + X).std(keepdims=True)) def test_no_chunks_2d(): X = np.arange(24).reshape((4, 6)) x = da.from_array(X, chunks=(2, 2)) x._chunks = ((np.nan, np.nan), (np.nan, np.nan, np.nan)) with pytest.warns(None): # zero division warning assert_eq(da.log(x), np.log(X)) assert_eq(x.T, X.T) assert_eq(x.sum(axis=0, keepdims=True), X.sum(axis=0, keepdims=True)) assert_eq(x.sum(axis=1, keepdims=True), X.sum(axis=1, keepdims=True)) assert_eq(x.dot(x.T + 1), X.dot(X.T + 1)) def test_no_chunks_yes_chunks(): X = np.arange(24).reshape((4, 6)) x = da.from_array(X, chunks=(2, 2)) x._chunks = ((2, 2), (np.nan, np.nan, np.nan)) assert (x + 1).chunks == ((2, 2), (np.nan, np.nan, np.nan)) assert (x.T).chunks == ((np.nan, np.nan, np.nan), (2, 2)) assert (x.dot(x.T)).chunks == ((2, 2), (2, 2)) def test_raise_informative_errors_no_chunks(): X = np.arange(10) a = da.from_array(X, chunks=(5, 5)) a._chunks = ((np.nan, np.nan),) b = da.from_array(X, chunks=(4, 4, 2)) b._chunks = ((np.nan, np.nan, np.nan),) for op in [lambda: a + b, lambda: a[1], lambda: a[::2], lambda: a[-5], lambda: a.rechunk(3), lambda: a.reshape(2, 5)]: with pytest.raises(ValueError) as e: op() if 'chunk' not in str(e) or 'unknown' not in str(e): op() def test_no_chunks_slicing_2d(): X = np.arange(24).reshape((4, 6)) x = da.from_array(X, chunks=(2, 2)) x._chunks = ((2, 2), (np.nan, np.nan, np.nan)) assert_eq(x[0], X[0]) for op in [lambda: x[:, 4], lambda: x[:, ::2], lambda: x[0, 2:4]]: with pytest.raises(ValueError) as e: op() assert 'chunk' in str(e) and 'unknown' in str(e) def test_index_array_with_array_1d(): x = np.arange(10) dx = da.from_array(x, chunks=(5,)) dx._chunks = ((np.nan, np.nan),) assert_eq(x[x > 6], dx[dx > 6]) assert_eq(x[x % 2 == 0], dx[dx % 2 == 0]) dy = da.ones(11, chunks=(3,)) with pytest.raises(ValueError): dx[dy > 5] def test_index_array_with_array_2d(): x = np.arange(24).reshape((4, 6)) dx = da.from_array(x, chunks=(2, 2)) dx._chunks = ((2, 2), (np.nan, np.nan, np.nan)) assert (sorted(x[x % 2 == 0].tolist()) == sorted(dx[dx % 2 == 0].compute().tolist())) assert (sorted(x[x > 6].tolist()) == sorted(dx[dx > 6].compute().tolist())) @pytest.mark.xfail(reason='Chunking does not align well') def test_index_array_with_array_3d_2d(): x = np.arange(4**3).reshape((4, 4, 4)) dx = da.from_array(x, chunks=(2, 2, 2)) ind = np.random.random((4, 4)) > 0.5 ind = np.arange(4 ** 2).reshape((4, 4)) % 2 == 0 dind = da.from_array(ind, (2, 2)) assert_eq(x[ind], dx[dind]) assert_eq(x[:, ind], dx[:, dind]) def test_setitem_1d(): x = np.arange(10) dx = da.from_array(x.copy(), chunks=(5,)) x[x > 6] = -1 x[x % 2 == 0] = -2 dx[dx > 6] = -1 dx[dx % 2 == 0] = -2 assert_eq(x, dx) def test_setitem_2d(): x = np.arange(24).reshape((4, 6)) dx = da.from_array(x.copy(), chunks=(2, 2)) x[x > 6] = -1 x[x % 2 == 0] = -2 dx[dx > 6] = -1 dx[dx % 2 == 0] = -2 assert_eq(x, dx) @pytest.mark.skipif(np.__version__ >= '1.13.0', reason='boolean slicing rules changed') def test_setitem_mixed_d(): x = np.arange(24).reshape((4, 6)) dx = da.from_array(x, chunks=(2, 2)) x[x[0, None] > 2] = -1 dx[dx[0, None] > 2] = -1 assert_eq(x, dx) x[x[None, 0] > 2] = -1 dx[dx[None, 0] > 2] = -1 assert_eq(x, dx) def test_setitem_errs(): x = da.ones((4, 4), chunks=(2, 2)) with pytest.raises(ValueError): x[x > 1] = x def test_zero_slice_dtypes(): x = da.arange(5, chunks=1) y = x[[]] assert y.dtype == x.dtype assert y.shape == (0,) assert_eq(x[[]], np.arange(5)[[]]) def test_zero_sized_array_rechunk(): x = da.arange(5, chunks=1)[:0] y = da.blockwise(identity, 'i', x, 'i', dtype=x.dtype) assert_eq(x, y) def test_blockwise_zero_shape(): da.blockwise( lambda x: x, 'i', da.arange(10, chunks=10), 'i', da.from_array(np.ones((0, 2)), ((0,), 2)), 'ab', da.from_array(np.ones((0,)), ((0,),)), 'a', dtype='float64' ) def test_blockwise_zero_shape_new_axes(): da.blockwise( lambda x: np.ones(42), 'i', da.from_array(np.ones((0, 2)), ((0,), 2)), 'ab', da.from_array(np.ones((0,)), ((0,),)), 'a', dtype='float64', new_axes={'i': 42} ) def test_broadcast_against_zero_shape(): assert_eq(da.arange(1, chunks=1)[:0] + 0, np.arange(1)[:0] + 0) assert_eq(da.arange(1, chunks=1)[:0] + 0.1, np.arange(1)[:0] + 0.1) assert_eq(da.ones((5, 5), chunks=(2, 3))[:0] + 0, np.ones((5, 5))[:0] + 0) assert_eq(da.ones((5, 5), chunks=(2, 3))[:0] + 0.1, np.ones((5, 5))[:0] + 0.1) assert_eq(da.ones((5, 5), chunks=(2, 3))[:, :0] + 0, np.ones((5, 5))[:, :0] + 0) assert_eq(da.ones((5, 5), chunks=(2, 3))[:, :0] + 0.1, np.ones((5, 5))[:, :0] + 0.1) def test_from_array_name(): x = np.array([1, 2, 3, 4, 5]) chunks = x.shape # Default is tokenize the array dx = da.from_array(x, chunks=chunks) hashed_name = dx.name assert da.from_array(x, chunks=chunks).name == hashed_name # Specify name directly assert da.from_array(x, chunks=chunks, name='x').name == 'x' # False gives a random name dx2 = da.from_array(x, chunks=chunks, name=False) dx3 = da.from_array(x, chunks=chunks, name=False) assert dx2.name != hashed_name assert dx3.name != hashed_name assert dx2.name != dx3.name def test_concatenate_errs(): with pytest.raises(ValueError) as e: da.concatenate([da.zeros((2, 1), chunks=(2, 1)), da.zeros((2, 3), chunks=(2, 3))]) assert 'shape' in str(e).lower() assert '(2, 1)' in str(e) with pytest.raises(ValueError): da.concatenate([da.zeros((1, 2), chunks=(1, 2)), da.zeros((3, 2), chunks=(3, 2))], axis=1) def test_stack_errs(): with pytest.raises(ValueError) as e: da.stack([da.zeros((2,), chunks=(2))] * 10 + [da.zeros((3,), chunks=(3))] * 10) assert 'shape' in str(e.value).lower() assert '(2,)' in str(e.value) assert len(str(e.value)) < 105 def test_blockwise_with_numpy_arrays(): x = np.ones(10) y = da.ones(10, chunks=(5,)) assert_eq(x + y, x + x) s = da.sum(x) assert any(x is v for v in s.dask.values()) @pytest.mark.parametrize('chunks', (100, 6)) @pytest.mark.parametrize('other', [[0, 0, 1], [2, 1, 3], (0, 0, 1)]) def test_elemwise_with_lists(chunks, other): x = np.arange(12).reshape((4, 3)) d = da.arange(12, chunks=chunks).reshape((4, 3)) x2 = np.vstack([x[:, 0], x[:, 1], x[:, 2]]).T d2 = da.vstack([d[:, 0], d[:, 1], d[:, 2]]).T assert_eq(x2, d2) x3 = x2 * other d3 = d2 * other assert_eq(x3, d3) def test_constructor_plugin(): L = [] L2 = [] with dask.config.set(array_plugins=[L.append, L2.append]): x = da.ones(10, chunks=5) y = x + 1 assert L == L2 == [x, y] with dask.config.set(array_plugins=[lambda x: x.compute()]): x = da.ones(10, chunks=5) y = x + 1 assert isinstance(y, np.ndarray) assert len(L) == 2 def test_no_warnings_on_metadata(): x = da.ones(5, chunks=3) with warnings.catch_warnings(record=True) as record: da.arccos(x) assert not record def test_delayed_array_key_hygeine(): a = da.zeros((1,), chunks=(1,)) d = delayed(identity)(a) b = da.from_delayed(d, shape=a.shape, dtype=a.dtype) assert_eq(a, b) def test_empty_chunks_in_array_len(): x = da.ones((), chunks=()) with pytest.raises(TypeError) as exc_info: len(x) err_msg = 'len() of unsized object' assert err_msg in str(exc_info.value) @pytest.mark.parametrize('dtype', [None, [('a', 'f4'), ('b', object)]]) def test_meta(dtype): a = da.zeros((1,), chunks=(1,)) assert a._meta.dtype == a.dtype assert isinstance(a._meta, np.ndarray) assert a.nbytes < 1000 @pytest.mark.parametrize('shape,limit,expected', [ (100, 10, (10,) * 10), (20, 10, (10, 10)), (20, 5, (5, 5, 5, 5)), (24, 5, (4, 4, 4, 4, 4, 4)), # common factor is close, use it (23, 5, (5, 5, 5, 5, 3)), # relatively prime, don't use 1s (1000, 167, (125,) * 8), # find close value ]) def test_normalize_chunks_auto_1d(shape, limit, expected): result = normalize_chunks('auto', (shape,), limit=limit * 8, dtype=np.float64) assert result == (expected,) @pytest.mark.parametrize('shape,chunks,limit,expected', [ ((20, 20), ('auto', 2), 20, ((10, 10), (2,) * 10)), ((20, 20), ('auto', (2, 2, 2, 2, 2, 5, 5)), 20, ((4, 4, 4, 4, 4), (2, 2, 2, 2, 2, 5, 5))), ((1, 20), 'auto', 10, ((1,), (10, 10))), ]) def test_normalize_chunks_auto_2d(shape, chunks, limit, expected): result = normalize_chunks(chunks, shape, limit=limit, dtype='uint8') assert result == expected def test_normalize_chunks_auto_3d(): result = normalize_chunks(('auto', 'auto', 2), (20, 20, 20), limit=200, dtype='uint8') expected = ((10, 10), (10, 10), (2,) * 10) assert result == expected result = normalize_chunks('auto', (20, 20, 20), limit=8, dtype='uint8') expected = ((2,) * 10,) * 3 assert result == expected def test_constructors_chunks_dict(): x = da.ones((20, 20), chunks={0: 10, 1: 5}) assert x.chunks == ((10, 10), (5, 5, 5, 5)) x = da.ones((20, 20), chunks={0: 10, 1: "auto"}) assert x.chunks == ((10, 10), (20,)) def test_from_array_chunks_dict(): with dask.config.set({'array.chunk-size': '128kiB'}): x = np.empty((100, 100, 100)) y = da.from_array(x, chunks={0: 10, 1: -1, 2: 'auto'}) z = da.from_array(x, chunks=(10, 100, 10)) assert y.chunks == z.chunks @pytest.mark.parametrize('dtype', [object, [('a', object), ('b', int)]]) def test_normalize_chunks_object_dtype(dtype): x = np.array(['a', 'abc'], dtype=object) with pytest.raises(NotImplementedError): da.from_array(x, chunks='auto') def test_normalize_chunks_tuples_of_tuples(): result = normalize_chunks(((2, 3, 5), 'auto'), (10, 10), limit=10, dtype=np.uint8) expected = ((2, 3, 5), (2, 2, 2, 2, 2)) assert result == expected def test_normalize_chunks_nan(): with pytest.raises(ValueError) as info: normalize_chunks('auto', (np.nan,), limit=10, dtype=np.uint8) assert "auto" in str(info.value) with pytest.raises(ValueError) as info: normalize_chunks(((np.nan, np.nan), 'auto'), (10, 10), limit=10, dtype=np.uint8) assert "auto" in str(info.value) def test_zarr_roundtrip(): pytest.importorskip('zarr') with tmpdir() as d: a = da.zeros((3, 3), chunks=(1, 1)) a.to_zarr(d) a2 = da.from_zarr(d) assert_eq(a, a2) assert a2.chunks == a.chunks @pytest.mark.parametrize('compute', [False, True]) def test_zarr_return_stored(compute): pytest.importorskip('zarr') with tmpdir() as d: a = da.zeros((3, 3), chunks=(1, 1)) a2 = a.to_zarr(d, compute=compute, return_stored=True) assert isinstance(a2, Array) assert_eq(a, a2) assert a2.chunks == a.chunks def test_zarr_existing_array(): zarr = pytest.importorskip('zarr') c = (1, 1) a = da.ones((3, 3), chunks=c) z = zarr.zeros_like(a, chunks=c) a.to_zarr(z) a2 = da.from_zarr(z) assert_eq(a, a2) assert a2.chunks == a.chunks def test_read_zarr_chunks(): pytest.importorskip('zarr') a = da.zeros((9, ), chunks=(3, )) with tmpdir() as d: a.to_zarr(d) arr = da.from_zarr(d, chunks=(5, )) assert arr.chunks == ((5, 4), ) def test_zarr_pass_mapper(): pytest.importorskip('zarr') import zarr.storage with tmpdir() as d: mapper = zarr.storage.DirectoryStore(d) a = da.zeros((3, 3), chunks=(1, 1)) a.to_zarr(mapper) a2 = da.from_zarr(mapper) assert_eq(a, a2) assert a2.chunks == a.chunks def test_zarr_group(): zarr = pytest.importorskip('zarr') with tmpdir() as d: a = da.zeros((3, 3), chunks=(1, 1)) a.to_zarr(d, component='test') with pytest.raises((OSError, ValueError)): a.to_zarr(d, component='test', overwrite=False) a.to_zarr(d, component='test', overwrite=True) # second time is fine, group exists a.to_zarr(d, component='test2', overwrite=False) a.to_zarr(d, component='nested/test', overwrite=False) group = zarr.open_group(d, mode='r') assert list(group) == ['nested', 'test', 'test2'] assert 'test' in group['nested'] a2 = da.from_zarr(d, component='test') assert_eq(a, a2) assert a2.chunks == a.chunks @pytest.mark.parametrize('data', [[( ), True], [((1, ),), True], [((1, 1, 1),), True], [((1, ), (1, )), True], [((2, 2, 1), ), True], [((2, 2, 3), ), False], [((1, 1, 1), (2, 2, 3)), False], [((1, 2, 1), ), False] ]) def test_regular_chunks(data): chunkset, expected = data assert da.core._check_regular_chunks(chunkset) == expected def test_zarr_nocompute(): pytest.importorskip('zarr') with tmpdir() as d: a = da.zeros((3, 3), chunks=(1, 1)) out = a.to_zarr(d, compute=False) assert isinstance(out, Delayed) dask.compute(out) a2 = da.from_zarr(d) assert_eq(a, a2) assert a2.chunks == a.chunks def test_blocks_indexer(): x = da.arange(10, chunks=2) assert isinstance(x.blocks[0], da.Array) assert_eq(x.blocks[0], x[:2]) assert_eq(x.blocks[-1], x[-2:]) assert_eq(x.blocks[:3], x[:6]) assert_eq(x.blocks[[0, 1, 2]], x[:6]) assert_eq(x.blocks[[3, 0, 2]], np.array([6, 7, 0, 1, 4, 5])) x = da.random.random((20, 20), chunks=(4, 5)) assert_eq(x.blocks[0], x[:4]) assert_eq(x.blocks[0, :3], x[:4, :15]) assert_eq(x.blocks[:, :3], x[:, :15]) x = da.ones((40, 40, 40), chunks=(10, 10, 10)) assert_eq(x.blocks[0, :, 0], np.ones((10, 40, 10))) x = da.ones((2, 2), chunks=1) with pytest.raises(ValueError): x.blocks[[0, 1], [0, 1]] with pytest.raises(ValueError): x.blocks[np.array([0, 1]), [0, 1]] with pytest.raises(ValueError) as info: x.blocks[np.array([0, 1]), np.array([0, 1])] assert "list" in str(info.value) with pytest.raises(ValueError) as info: x.blocks[None, :, :] assert "newaxis" in str(info.value) and "not supported" in str(info.value) with pytest.raises(IndexError) as info: x.blocks[100, 100] def test_dask_array_holds_scipy_sparse_containers(): pytest.importorskip('scipy.sparse') import scipy.sparse x = da.random.random((1000, 10), chunks=(100, 10)) x[x < 0.9] = 0 xx = x.compute() y = x.map_blocks(scipy.sparse.csr_matrix) vs = y.to_delayed().flatten().tolist() values = dask.compute(*vs, scheduler='single-threaded') assert all(isinstance(v, scipy.sparse.csr_matrix) for v in values) yy = y.compute(scheduler='single-threaded') assert isinstance(yy, scipy.sparse.spmatrix) assert (yy == xx).all() z = x.T.map_blocks(scipy.sparse.csr_matrix) zz = z.compute(scheduler='single-threaded') assert isinstance(yy, scipy.sparse.spmatrix) assert (zz == xx.T).all() def test_3851(): with warnings.catch_warnings() as record: Y = da.random.random((10, 10), chunks='auto') da.argmax(Y, axis=0).compute() assert not record def test_3925(): x = da.from_array(np.array(['a', 'b', 'c'], dtype=object), chunks=-1) assert (x[0] == x[0]).compute(scheduler='sync') def test_map_blocks_large_inputs_delayed(): a = da.ones(10, chunks=(5,)) b = np.ones(1000000) c = a.map_blocks(add, b) assert any(b is v for v in c.dask.values()) assert repr(dict(c.dask)).count(repr(b)[:10]) == 1 # only one occurrence d = a.map_blocks(lambda x, y: x + y.sum(), y=b) assert_eq(d, d) assert any(b is v for v in d.dask.values()) assert repr(dict(c.dask)).count(repr(b)[:10]) == 1 # only one occurrence def test_blockwise_large_inputs_delayed(): a = da.ones(10, chunks=(5,)) b = np.ones(1000000) c = da.blockwise(add, 'i', a, 'i', b, None, dtype=a.dtype) assert any(b is v for v in c.dask.values()) assert repr(dict(c.dask)).count(repr(b)[:10]) == 1 # only one occurrence d = da.blockwise(lambda x, y: x + y, 'i', a, 'i', y=b, dtype=a.dtype) assert any(b is v for v in d.dask.values()) assert repr(dict(c.dask)).count(repr(b)[:10]) == 1 # only one occurrence def test_slice_reversed(): x = da.ones(10, chunks=-1) y = x[6:3] assert_eq(y, np.ones(0)) def test_map_blocks_chunks(): x = da.arange(400, chunks=(100,)) y = da.arange(40, chunks=(10,)) def func(a, b): return np.array([a.max(), b.max()]) assert_eq(da.map_blocks(func, x, y, chunks=(2,), dtype=x.dtype), np.array([99, 9, 199, 19, 299, 29, 399, 39])) def test_nbytes_auto(): chunks = normalize_chunks("800B", shape=(500,), dtype='float64') assert chunks == ((100, 100, 100, 100, 100),) chunks = normalize_chunks("200B", shape=(10, 10), dtype='float64') assert chunks == ((5, 5), (5, 5)) chunks = normalize_chunks((5,"200B"), shape=(10, 10), dtype='float64') assert chunks == ((5, 5), (5, 5)) chunks = normalize_chunks("33B", shape=(10, 10), dtype='float64') assert chunks == ((2, 2, 2, 2, 2), (2, 2, 2, 2, 2)) chunks = normalize_chunks("1800B", shape=(10, 20, 30), dtype='float64') assert chunks == ((5, 5), (5, 5, 5, 5), (6, 6, 6, 6, 6)) with pytest.raises(ValueError): normalize_chunks("10B", shape=(10,), limit=20, dtype='float64') with pytest.raises(ValueError): normalize_chunks("100B", shape=(10, 10), limit=20, dtype='float64') with pytest.raises(ValueError): normalize_chunks(("100B", "10B"), shape=(10, 10), dtype='float64') with pytest.raises(ValueError): normalize_chunks(("10B", "10B"), shape=(10, 10), limit=20, dtype='float64')