You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ORPA-pyOpenRPA/WPy32-3720/python-3.7.2/Lib/site-packages/dask/dataframe/optimize.py

36 lines
1.1 KiB

6 years ago
""" Dataframe optimizations """
from __future__ import absolute_import, division, print_function
from ..optimization import cull, fuse_getitem, fuse
from .. import config, core
from ..highlevelgraph import HighLevelGraph
from ..utils import ensure_dict
from ..blockwise import optimize_blockwise
try:
import fastparquet # noqa: F401
except ImportError:
fastparquet = False
def optimize(dsk, keys, **kwargs):
if isinstance(dsk, HighLevelGraph):
dsk = optimize_blockwise(dsk, keys=list(core.flatten(keys)))
dsk = ensure_dict(dsk)
from .io import dataframe_from_ctable
if isinstance(keys, list):
dsk, dependencies = cull(dsk, list(core.flatten(keys)))
else:
dsk, dependencies = cull(dsk, [keys])
dsk = fuse_getitem(dsk, dataframe_from_ctable, 3)
if fastparquet:
from .io.parquet import _read_parquet_row_group
dsk = fuse_getitem(dsk, _read_parquet_row_group, 4)
dsk, dependencies = fuse(dsk, keys, dependencies=dependencies,
fuse_subgraphs=config.get('fuse_subgraphs', True))
dsk, _ = cull(dsk, keys)
return dsk