You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ORPA-pyOpenRPA/Resources/WPy64-3720/python-3.7.2.amd64/Lib/site-packages/dask/dataframe/tests/test_optimize_dataframe.py

66 lines
2.0 KiB

import pytest
from operator import getitem
from toolz import merge
import dask
from dask.dataframe.io import dataframe_from_ctable
import dask.dataframe as dd
import pandas as pd
dsk = {('x', 0): pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]},
index=[0, 1, 3]),
('x', 1): pd.DataFrame({'a': [4, 5, 6], 'b': [3, 2, 1]},
index=[5, 6, 8]),
('x', 2): pd.DataFrame({'a': [7, 8, 9], 'b': [0, 0, 0]},
index=[9, 9, 9])}
dfs = list(dsk.values())
def test_column_optimizations_with_bcolz_and_rewrite():
bcolz = pytest.importorskip('bcolz')
bc = bcolz.ctable([[1, 2, 3], [10, 20, 30]], names=['a', 'b'])
for cols in [None, 'abc', ['abc']]:
dsk2 = merge(dict((('x', i),
(dataframe_from_ctable, bc, slice(0, 2), cols, {}))
for i in [1, 2, 3]),
dict((('y', i),
(getitem, ('x', i), ['a', 'b']))
for i in [1, 2, 3]))
expected = dict((('y', i), (dataframe_from_ctable,
bc, slice(0, 2), ['a', 'b'], {}))
for i in [1, 2, 3])
with dask.config.set(fuse_ave_width=0):
result = dd.optimize(dsk2, [('y', i) for i in [1, 2, 3]])
assert result == expected
def test_fuse_ave_width():
df = pd.DataFrame({'x': range(10)})
df = dd.from_pandas(df, npartitions=5)
s = ((df.x + 1) + (df.x + 2))
with dask.config.set(fuse_ave_width=4):
a = s.__dask_optimize__(s.dask, s.__dask_keys__())
b = s.__dask_optimize__(s.dask, s.__dask_keys__())
assert len(a) <= 15
assert len(b) <= 15
def test_optimize_blockwise():
from dask.array.optimization import optimize_blockwise
df = pd.DataFrame({'x': range(10), 'y': range(10)})
ddf = dd.from_pandas(df, npartitions=2)
for i in range(10):
ddf['x'] = ddf.x + 1 + ddf.y
graph = optimize_blockwise(ddf.dask)
assert len(graph) <= 4