You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ORPA-pyOpenRPA/Resources/WPy64-3720/python-3.7.2.amd64/Lib/site-packages/dask/dataframe/tseries/tests/test_resample.py

96 lines
3.2 KiB

from itertools import product
import pandas as pd
import pytest
from dask.dataframe.utils import assert_eq
import dask.dataframe as dd
def resample(df, freq, how='mean', **kwargs):
return getattr(df.resample(freq, **kwargs), how)()
@pytest.mark.parametrize(['obj', 'method', 'npartitions', 'freq', 'closed', 'label'],
list(product(['series', 'frame'],
['count', 'mean', 'ohlc'],
[2, 5],
['30T', 'h', 'd', 'w', 'M'],
['right', 'left'],
['right', 'left'])))
def test_series_resample(obj, method, npartitions, freq, closed, label):
index = pd.date_range('1-1-2000', '2-15-2000', freq='h')
index = index.union(pd.date_range('4-15-2000', '5-15-2000', freq='h'))
if obj == 'series':
ps = pd.Series(range(len(index)), index=index)
elif obj == 'frame':
ps = pd.DataFrame({'a':range(len(index))}, index=index)
ds = dd.from_pandas(ps, npartitions=npartitions)
# Series output
result = resample(ds, freq, how=method, closed=closed, label=label)
expected = resample(ps, freq, how=method, closed=closed, label=label)
assert_eq(result, expected, check_dtype=False)
divisions = result.divisions
assert expected.index[0] == divisions[0]
assert expected.index[-1] == divisions[-1]
def test_resample_agg():
index = pd.date_range('2000-01-01', '2000-02-15', freq='h')
ps = pd.Series(range(len(index)), index=index)
ds = dd.from_pandas(ps, npartitions=2)
assert_eq(ds.resample('10min').agg('mean'),
ps.resample('10min').agg('mean'))
assert_eq(ds.resample('10min').agg(['mean', 'min']),
ps.resample('10min').agg(['mean', 'min']))
def test_resample_agg_passes_kwargs():
index = pd.date_range('2000-01-01', '2000-02-15', freq='h')
ps = pd.Series(range(len(index)), index=index)
ds = dd.from_pandas(ps, npartitions=2)
def foo(series, bar=1, *args, **kwargs):
return bar
assert_eq(ds.resample('2h').agg(foo, bar=2),
ps.resample('2h').agg(foo, bar=2))
assert (ds.resample('2h').agg(foo, bar=2) == 2).compute().all()
def test_series_resample_not_implemented():
index = pd.date_range(start='2012-01-02', periods=100, freq='T')
s = pd.Series(range(len(index)), index=index)
ds = dd.from_pandas(s, npartitions=5)
# Frequency doesn't evenly divide day
pytest.raises(NotImplementedError, lambda: resample(ds, '57T'))
def test_unknown_divisions_error():
df = pd.DataFrame({'x': [1, 2, 3]})
ddf = dd.from_pandas(df, npartitions=2, sort=False)
try:
ddf.x.resample('1m').mean()
assert False
except ValueError as e:
assert 'divisions' in str(e)
def test_resample_index_name():
import numpy as np
from datetime import datetime, timedelta
date_today = datetime.now()
days = pd.date_range(date_today, date_today + timedelta(20), freq='D')
data = np.random.randint(1, high=100, size=len(days))
df = pd.DataFrame({'date': days, 'values': data})
df = df.set_index('date')
ddf = dd.from_pandas(df, npartitions=4)
assert ddf.resample('D').mean().head().index.name == "date"