ORPA-pyOpenRPA/WPy32-3720/python-3.7.2/Lib/site-packages/dask/datasets.py

from __future__ import absolute_import, print_function, division

import random

from .utils import import_required


def timeseries(
    start='2000-01-01',
    end='2000-01-31',
    freq='1s',
    partition_freq='1d',
    dtypes={'name': str, 'id': int, 'x': float, 'y': float},
    seed=None,
):
    """ Create timeseries dataframe with random data

    Parameters
    ----------
    start : datetime (or datetime-like string)
        Start of time series
    end : datetime (or datetime-like string)
        End of time series
    dtypes : dict
        Mapping of column names to types.
        Valid types include {float, int, str, 'category'}
    freq : string
        String like '2s' or '1H' or '12W' for the time series frequency
    partition_freq : string
        String like '1M' or '2Y' to divide the dataframe into partitions
    seed : int (optional)
        Randomstate seed

    Examples
    --------
    >>> import dask
    >>> df = dask.datasets.timeseries()
    >>> df.head()  # doctest: +SKIP
              timestamp    id     name         x         y
    2000-01-01 00:00:00   967    Jerry -0.031348 -0.040633
    2000-01-01 00:00:01  1066  Michael -0.262136  0.307107
    2000-01-01 00:00:02   988    Wendy -0.526331  0.128641
    2000-01-01 00:00:03  1016   Yvonne  0.620456  0.767270
    2000-01-01 00:00:04   998   Ursula  0.684902 -0.463278
    """
    from dask.dataframe.io.demo import make_timeseries
    return make_timeseries(start=start, end=end, freq=freq,
                           partition_freq=partition_freq,
                           seed=seed, dtypes=dtypes)


def _generate_mimesis(field, schema_description, records_per_partition, seed):
    """ Generate data for a single partition of a dask bag

    See Also
    --------
    _make_mimesis
    """
    from mimesis.schema import Schema, Field
    field = Field(seed=seed, **field)
    schema = Schema(schema=lambda: schema_description(field))
    for i in range(records_per_partition):
        yield schema.create(iterations=1)[0]


def _make_mimesis(field, schema, npartitions, records_per_partition, seed=None):
    """
    Make a Dask Bag filled with data randomly generated by the mimesis projet

    Parameters
    ----------
    field: dict
        keyword arguments to pass to ``mimesis.Field``
    schema: Callable[Field] -> dict
        The schema to use to generate the data
    npartitions: int
    records_per_partition: int
    seed: int, None
        Seed for random data

    Returns
    -------
    Dask Bag

    See Also
    --------
    make_people
    """
    import dask.bag as db
    from dask.base import tokenize

    field = field or {}

    if seed is None:
        seed = random.random()

    seeds = db.core.random_state_data_python(npartitions, seed)

    name = 'mimesis-' + tokenize(field, schema, npartitions, records_per_partition, seed)
    dsk = {(name, i): (_generate_mimesis, field, schema, records_per_partition, seed)
           for i, seed in enumerate(seeds)}

    return db.Bag(dsk, name, npartitions)


def make_people(npartitions=10, records_per_partition=1000, seed=None, locale='en'):
    """ Make a dataset of random people

    This makes a Dask Bag with dictionary records of randomly generated people.
    This requires the optional library ``mimesis`` to generate records.

    Paramters
    ---------
    npartitions : int
        Number of partitions
    records_per_partition : int
        Number of records in each partition
    seed : int, (optional)
        Random seed
    locale : str
        Language locale, like 'en', 'fr', 'zh', or 'ru'

    Returns
    -------
    b: Dask Bag
    """
    import_required('mimesis',
                    'The mimesis module is required for this function.  Try:\n'
                    '  pip install mimesis')

    schema = lambda field: {
        'age': field('person.age'),
        'name': (field('person.name'), field('person.surname')),
        'occupation': field('person.occupation'),
        'telephone': field('person.telephone'),
        'address': {'address': field('address.address'),
                    'city': field('address.city')},
        'credit-card': {'number': field('payment.credit_card_number'),
                        'expiration-date': field('payment.credit_card_expiration_date')},
    }

    return _make_mimesis({'locale': locale}, schema, npartitions, records_per_partition, seed)