You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
36 lines
1.1 KiB
36 lines
1.1 KiB
6 years ago
|
""" Dataframe optimizations """
|
||
|
from __future__ import absolute_import, division, print_function
|
||
|
|
||
|
from ..optimization import cull, fuse_getitem, fuse
|
||
|
from .. import config, core
|
||
|
from ..highlevelgraph import HighLevelGraph
|
||
|
from ..utils import ensure_dict
|
||
|
from ..blockwise import optimize_blockwise
|
||
|
|
||
|
try:
|
||
|
import fastparquet # noqa: F401
|
||
|
except ImportError:
|
||
|
fastparquet = False
|
||
|
|
||
|
|
||
|
def optimize(dsk, keys, **kwargs):
|
||
|
|
||
|
if isinstance(dsk, HighLevelGraph):
|
||
|
dsk = optimize_blockwise(dsk, keys=list(core.flatten(keys)))
|
||
|
|
||
|
dsk = ensure_dict(dsk)
|
||
|
from .io import dataframe_from_ctable
|
||
|
if isinstance(keys, list):
|
||
|
dsk, dependencies = cull(dsk, list(core.flatten(keys)))
|
||
|
else:
|
||
|
dsk, dependencies = cull(dsk, [keys])
|
||
|
dsk = fuse_getitem(dsk, dataframe_from_ctable, 3)
|
||
|
if fastparquet:
|
||
|
from .io.parquet import _read_parquet_row_group
|
||
|
dsk = fuse_getitem(dsk, _read_parquet_row_group, 4)
|
||
|
|
||
|
dsk, dependencies = fuse(dsk, keys, dependencies=dependencies,
|
||
|
fuse_subgraphs=config.get('fuse_subgraphs', True))
|
||
|
dsk, _ = cull(dsk, keys)
|
||
|
return dsk
|