You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
75 lines
2.3 KiB
75 lines
2.3 KiB
import pytest
|
|
pytest.importorskip('numpy')
|
|
|
|
import numpy as np
|
|
|
|
import dask.array as da
|
|
from dask.array.utils import assert_eq, same_keys
|
|
|
|
|
|
def test_percentile():
|
|
d = da.ones((16,), chunks=(4,))
|
|
assert_eq(da.percentile(d, [0, 50, 100]),
|
|
np.array([1, 1, 1], dtype=d.dtype))
|
|
|
|
x = np.array([0, 0, 5, 5, 5, 5, 20, 20])
|
|
d = da.from_array(x, chunks=(3,))
|
|
result = da.percentile(d, [0, 50, 100])
|
|
assert_eq(da.percentile(d, [0, 50, 100]),
|
|
np.array([0, 5, 20], dtype=result.dtype))
|
|
assert same_keys(da.percentile(d, [0, 50, 100]),
|
|
da.percentile(d, [0, 50, 100]))
|
|
assert not same_keys(da.percentile(d, [0, 50, 100]),
|
|
da.percentile(d, [0, 50]))
|
|
|
|
x = np.array(['a', 'a', 'd', 'd', 'd', 'e'])
|
|
d = da.from_array(x, chunks=(3,))
|
|
assert_eq(da.percentile(d, [0, 50, 100]),
|
|
np.array(['a', 'd', 'e'], dtype=x.dtype))
|
|
|
|
|
|
@pytest.mark.skip
|
|
def test_percentile_with_categoricals():
|
|
try:
|
|
import pandas as pd
|
|
except ImportError:
|
|
return
|
|
x0 = pd.Categorical(['Alice', 'Bob', 'Charlie', 'Dennis', 'Alice', 'Alice'])
|
|
x1 = pd.Categorical(['Alice', 'Bob', 'Charlie', 'Dennis', 'Alice', 'Alice'])
|
|
|
|
dsk = {('x', 0): x0, ('x', 1): x1}
|
|
|
|
x = da.Array(dsk, 'x', chunks=((6, 6),))
|
|
|
|
p = da.percentile(x, [50])
|
|
assert (p.compute().categories == x0.categories).all()
|
|
assert (p.compute().codes == [0]).all()
|
|
assert same_keys(da.percentile(x, [50]),
|
|
da.percentile(x, [50]))
|
|
|
|
|
|
def test_percentiles_with_empty_arrays():
|
|
x = da.ones(10, chunks=((5, 0, 5),))
|
|
assert_eq(da.percentile(x, [10, 50, 90]), np.array([1, 1, 1], dtype=x.dtype))
|
|
|
|
|
|
@pytest.mark.parametrize('q', [5, 5.0, np.int64(5), np.float64(5)])
|
|
def test_percentiles_with_scaler_percentile(q):
|
|
# Regression test to ensure da.percentile works with scalar percentiles
|
|
# See #3020
|
|
d = da.ones((16,), chunks=(4,))
|
|
assert_eq(da.percentile(d, q), np.array([1], dtype=d.dtype))
|
|
|
|
|
|
def test_unknown_chunk_sizes():
|
|
x = da.random.random(1000, chunks=(100,))
|
|
x._chunks = ((np.nan,) * 10,)
|
|
|
|
result = da.percentile(x, 50).compute()
|
|
assert 0.1 < result < 0.9
|
|
|
|
a, b = da.percentile(x, [40, 60]).compute()
|
|
assert 0.1 < a < 0.9
|
|
assert 0.1 < b < 0.9
|
|
assert a < b
|