You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ORPA-pyOpenRPA/Resources/WPy64-3720/python-3.7.2.amd64/Lib/site-packages/dask/array/tests/test_masked.py

365 lines
11 KiB

import random
from itertools import product
import numpy as np
import pytest
import dask.array as da
from dask.base import tokenize
from dask.array.utils import assert_eq
from copy import deepcopy
pytest.importorskip("dask.array.ma")
def test_tokenize_masked_array():
m = np.ma.masked_array([1, 2, 3], mask=[True, True, False], fill_value=10)
m2 = np.ma.masked_array([1, 2, 3], mask=[True, True, False], fill_value=0)
m3 = np.ma.masked_array([1, 2, 3], mask=False, fill_value=10)
assert tokenize(m) == tokenize(m)
assert tokenize(m2) == tokenize(m2)
assert tokenize(m3) == tokenize(m3)
assert tokenize(m) != tokenize(m2)
assert tokenize(m) != tokenize(m3)
def test_from_array_masked_array():
m = np.ma.masked_array([1, 2, 3], mask=[True, True, False], fill_value=10)
dm = da.from_array(m, chunks=(2,), asarray=False)
assert_eq(dm, m)
def test_copy_deepcopy():
t = np.ma.masked_array([1, 2], mask=[0, 1])
x = da.from_array(t, chunks=t.shape, asarray=False)
#x = da.arange(5, chunks=(2,))
y = x.copy()
memo = {}
y2 = deepcopy(x, memo=memo)
xx = da.ma.masked_where([False, True], [1,2])
assert_eq(x, xx)
assert_eq(y, t)
assert isinstance(y.compute(), np.ma.masked_array)
assert_eq(y2, t)
assert isinstance(y2.compute(), np.ma.masked_array)
functions = [
lambda x: x,
lambda x: da.expm1(x),
lambda x: 2 * x,
lambda x: x / 2,
lambda x: x**2,
lambda x: x + x,
lambda x: x * x,
lambda x: x[0],
lambda x: x[:, 1],
lambda x: x[:1, None, 1:3],
lambda x: x.T,
lambda x: da.transpose(x, (1, 2, 0)),
lambda x: x.sum(),
lambda x: x.dot(np.arange(x.shape[-1])),
lambda x: x.dot(np.eye(x.shape[-1])),
lambda x: da.tensordot(x, np.ones(x.shape[:2]), axes=[(0, 1), (0, 1)]),
lambda x: x.sum(axis=0),
lambda x: x.max(axis=0),
lambda x: x.sum(axis=(1, 2)),
lambda x: x.astype(np.complex128),
lambda x: x.map_blocks(lambda x: x * 2),
lambda x: x.round(1),
lambda x: x.reshape((x.shape[0] * x.shape[1], x.shape[2])),
lambda x: abs(x),
lambda x: x > 0.5,
lambda x: x.rechunk((4, 4, 4)),
lambda x: x.rechunk((2, 2, 1)),
]
@pytest.mark.parametrize('func', functions)
def test_basic(func):
x = da.random.random((2, 3, 4), chunks=(1, 2, 2))
x[x < 0.4] = 0
y = da.ma.masked_equal(x, 0)
xx = func(x)
yy = func(y)
assert_eq(xx, da.ma.filled(yy, 0))
if yy.shape:
zz = yy.compute()
assert isinstance(zz, np.ma.masked_array)
def test_tensordot():
x = da.random.random((2, 3, 4), chunks=(1, 2, 2))
x[x < 0.4] = 0
y = da.random.random((4, 3, 2), chunks=(2, 2, 1))
y[y < 0.4] = 0
xx = da.ma.masked_equal(x, 0)
yy = da.ma.masked_equal(y, 0)
assert_eq(da.tensordot(x, y, axes=(2, 0)),
da.ma.filled(da.tensordot(xx, yy, axes=(2, 0)), 0))
assert_eq(da.tensordot(x, y, axes=(1, 1)),
da.ma.filled(da.tensordot(xx, yy, axes=(1, 1)), 0))
assert_eq(da.tensordot(x, y, axes=((1, 2), (1, 0))),
da.ma.filled(da.tensordot(xx, yy, axes=((1, 2), (1, 0))), 0))
@pytest.mark.parametrize('func', functions)
def test_mixed_concatenate(func):
x = da.random.random((2, 3, 4), chunks=(1, 2, 2))
y = da.random.random((2, 3, 4), chunks=(1, 2, 2))
y[y < 0.4] = 0
yy = da.ma.masked_equal(y, 0)
d = da.concatenate([x, y], axis=0)
s = da.concatenate([x, yy], axis=0)
dd = func(d)
ss = func(s)
assert_eq(dd, ss)
@pytest.mark.parametrize('func', functions)
def test_mixed_random(func):
d = da.random.random((4, 3, 4), chunks=(1, 2, 2))
d[d < 0.4] = 0
fn = lambda x: np.ma.masked_equal(x, 0) if random.random() < 0.5 else x
s = d.map_blocks(fn)
dd = func(d)
ss = func(s)
assert_eq(dd, ss)
def test_mixed_output_type():
y = da.random.random((10, 10), chunks=(5, 5))
y[y < 0.4] = 0
y = da.ma.masked_equal(y, 0)
x = da.zeros((10, 1), chunks=(5, 1))
z = da.concatenate([x, y], axis=1)
assert z.shape == (10, 11)
zz = z.compute()
assert isinstance(zz, np.ma.masked_array)
def test_creation_functions():
x = np.array([-2, -1, 0, 1, 2] * 20).reshape((10, 10))
y = np.array([-2, 0, 1, 1, 0] * 2)
dx = da.from_array(x, chunks=5)
dy = da.from_array(y, chunks=4)
sol = np.ma.masked_greater(x, y)
for (a, b) in product([dx, x], [dy, y]):
assert_eq(da.ma.masked_greater(a, b), sol)
# These are all the same as masked_greater, just check for correct op
assert_eq(da.ma.masked_greater(dx, 0), np.ma.masked_greater(x, 0))
assert_eq(da.ma.masked_greater_equal(dx, 0), np.ma.masked_greater_equal(x, 0))
assert_eq(da.ma.masked_less(dx, 0), np.ma.masked_less(x, 0))
assert_eq(da.ma.masked_less_equal(dx, 0), np.ma.masked_less_equal(x, 0))
assert_eq(da.ma.masked_equal(dx, 0), np.ma.masked_equal(x, 0))
assert_eq(da.ma.masked_not_equal(dx, 0), np.ma.masked_not_equal(x, 0))
# masked_where
assert_eq(da.ma.masked_where(False, dx), np.ma.masked_where(False, x))
assert_eq(da.ma.masked_where(dx > 2, dx), np.ma.masked_where(x > 2, x))
with pytest.raises(IndexError):
da.ma.masked_where((dx > 2)[:, 0], dx)
assert_eq(da.ma.masked_inside(dx, -1, 1), np.ma.masked_inside(x, -1, 1))
assert_eq(da.ma.masked_outside(dx, -1, 1), np.ma.masked_outside(x, -1, 1))
assert_eq(da.ma.masked_values(dx, -1), np.ma.masked_values(x, -1))
# masked_equal and masked_values in numpy sets the fill_value to `value`,
# which can sometimes be an array. This is hard to support in dask, so we
# forbid it. Check that this isn't supported:
with pytest.raises(ValueError):
da.ma.masked_equal(dx, dy)
with pytest.raises(ValueError):
da.ma.masked_values(dx, dy)
y = x.astype('f8')
y[0, 0] = y[7, 5] = np.nan
dy = da.from_array(y, chunks=5)
assert_eq(da.ma.masked_invalid(dy), np.ma.masked_invalid(y))
my = np.ma.masked_greater(y, 0)
dmy = da.ma.masked_greater(dy, 0)
assert_eq(da.ma.fix_invalid(dmy, fill_value=0),
np.ma.fix_invalid(my, fill_value=0))
def test_filled():
x = np.array([-2, -1, 0, 1, 2] * 20).reshape((10, 10))
dx = da.from_array(x, chunks=5)
mx = np.ma.masked_equal(x, 0)
mdx = da.ma.masked_equal(dx, 0)
assert_eq(da.ma.filled(mdx), np.ma.filled(mx))
assert_eq(da.ma.filled(mdx, -5), np.ma.filled(mx, -5))
def assert_eq_ma(a, b):
res = a.compute()
assert type(res) == type(b)
if hasattr(res, 'mask'):
np.testing.assert_equal(res.mask, b.mask)
a = da.ma.filled(a)
b = np.ma.filled(b)
assert_eq(a, b, equal_nan=True)
@pytest.mark.parametrize('dtype', ('i8', 'f8'))
@pytest.mark.parametrize('reduction', ['sum', 'prod', 'mean', 'var', 'std',
'min', 'max', 'any', 'all'])
def test_reductions(dtype, reduction):
x = (np.random.RandomState(42).rand(11, 11) * 10).astype(dtype)
dx = da.from_array(x, chunks=(4, 4))
mx = np.ma.masked_greater(x, 5)
mdx = da.ma.masked_greater(dx, 5)
dfunc = getattr(da, reduction)
func = getattr(np, reduction)
assert_eq_ma(dfunc(mdx), func(mx))
assert_eq_ma(dfunc(mdx, axis=0), func(mx, axis=0))
assert_eq_ma(dfunc(mdx, keepdims=True, split_every=4),
func(mx, keepdims=True))
assert_eq_ma(dfunc(mdx, axis=0, split_every=2), func(mx, axis=0))
assert_eq_ma(dfunc(mdx, axis=0, keepdims=True, split_every=2),
func(mx, axis=0, keepdims=True))
assert_eq_ma(dfunc(mdx, axis=1, split_every=2), func(mx, axis=1))
assert_eq_ma(dfunc(mdx, axis=1, keepdims=True, split_every=2),
func(mx, axis=1, keepdims=True))
@pytest.mark.parametrize('reduction', ['argmin', 'argmax'])
def test_arg_reductions(reduction):
x = np.random.random((10, 10, 10))
dx = da.from_array(x, chunks=(3, 4, 5))
mx = np.ma.masked_greater(x, 0.4)
dmx = da.ma.masked_greater(dx, 0.4)
dfunc = getattr(da, reduction)
func = getattr(np, reduction)
assert_eq_ma(dfunc(dmx), func(mx))
assert_eq_ma(dfunc(dmx, 0), func(mx, 0))
assert_eq_ma(dfunc(dmx, 1), func(mx, 1))
assert_eq_ma(dfunc(dmx, 2), func(mx, 2))
def test_cumulative():
x = np.random.RandomState(0).rand(20, 24, 13)
dx = da.from_array(x, chunks=(6, 5, 4))
mx = np.ma.masked_greater(x, 0.4)
dmx = da.ma.masked_greater(dx, 0.4)
for axis in [0, 1, 2]:
assert_eq_ma(dmx.cumsum(axis=axis), mx.cumsum(axis=axis))
assert_eq_ma(dmx.cumprod(axis=axis), mx.cumprod(axis=axis))
def test_accessors():
x = np.random.random((10, 10))
dx = da.from_array(x, chunks=(3, 4))
mx = np.ma.masked_greater(x, 0.4)
dmx = da.ma.masked_greater(dx, 0.4)
assert_eq(da.ma.getmaskarray(dmx), np.ma.getmaskarray(mx))
assert_eq(da.ma.getmaskarray(dx), np.ma.getmaskarray(x))
assert_eq(da.ma.getdata(dmx), np.ma.getdata(mx))
assert_eq(da.ma.getdata(dx), np.ma.getdata(x))
def test_masked_array():
x = np.random.random((10, 10)).astype('f4')
dx = da.from_array(x, chunks=(3, 4))
f1 = da.from_array(np.array(1), chunks=())
fill_values = [(None, None), (0.5, 0.5), (1, f1)]
for data, (df, f) in product([x, dx], fill_values):
assert_eq(da.ma.masked_array(data, fill_value=df),
np.ma.masked_array(x, fill_value=f))
assert_eq(da.ma.masked_array(data, mask=data > 0.4, fill_value=df),
np.ma.masked_array(x, mask=x > 0.4, fill_value=f))
assert_eq(da.ma.masked_array(data, mask=data > 0.4, fill_value=df),
np.ma.masked_array(x, mask=x > 0.4, fill_value=f))
assert_eq(da.ma.masked_array(data, fill_value=df, dtype='f8'),
np.ma.masked_array(x, fill_value=f, dtype='f8'))
with pytest.raises(ValueError):
da.ma.masked_array(dx, fill_value=dx)
with pytest.raises(np.ma.MaskError):
da.ma.masked_array(dx, mask=dx[:3, :3])
def test_set_fill_value():
x = np.random.randint(0, 10, (10, 10))
dx = da.from_array(x, chunks=(3, 4))
mx = np.ma.masked_greater(x, 3)
dmx = da.ma.masked_greater(dx, 3)
da.ma.set_fill_value(dmx, -10)
np.ma.set_fill_value(mx, -10)
assert_eq_ma(dmx, mx)
da.ma.set_fill_value(dx, -10)
np.ma.set_fill_value(x, -10)
assert_eq_ma(dx, x)
with pytest.raises(TypeError):
da.ma.set_fill_value(dmx, 1e20)
with pytest.raises(ValueError):
da.ma.set_fill_value(dmx, dx)
def test_average_weights_with_masked_array():
mask = np.array([[True, False],
[True, True],
[False, True]])
data = np.arange(6).reshape((3, 2))
a = np.ma.array(data, mask=mask)
d_a = da.ma.masked_array(data=data, mask=mask, chunks=2)
weights = np.array([0.25, 0.75])
d_weights = da.from_array(weights, chunks=2)
np_avg = np.ma.average(a, weights=weights, axis=1)
da_avg = da.ma.average(d_a, weights=d_weights, axis=1)
assert_eq(np_avg, da_avg)
def test_arithmetic_results_in_masked():
mask = np.array([[True, False],
[True, True],
[False, True]])
x = np.arange(6).reshape((3, 2))
masked = np.ma.array(x, mask=mask)
dx = da.from_array(x, chunks=(2, 2))
res = dx + masked
sol = x + masked
assert_eq(res, sol)
assert isinstance(res.compute(), np.ma.masked_array)