ORPA-pyOpenRPA/WPy32-3720/python-3.7.2/Lib/site-packages/dask/datasets.py

from __future__ import absolute_import, print_function, division

import random

from .utils import import_required


def timeseries(
    start='2000-01-01',
    end='2000-01-31',
    freq='1s',
    partition_freq='1d',
    dtypes={'name': str, 'id': int, 'x': float, 'y': float},
    seed=None,
):
    """ Create timeseries dataframe with random data

    Parameters
    ----------
    start : datetime (or datetime-like string)
        Start of time series
    end : datetime (or datetime-like string)
        End of time series
    dtypes : dict
        Mapping of column names to types.
        Valid types include {float, int, str, 'category'}
    freq : string
        String like '2s' or '1H' or '12W' for the time series frequency
    partition_freq : string
        String like '1M' or '2Y' to divide the dataframe into partitions
    seed : int (optional)
        Randomstate seed

    Examples
    --------
    >>> import dask
    >>> df = dask.datasets.timeseries()
    >>> df.head()  # doctest: +SKIP
              timestamp    id     name         x         y
    2000-01-01 00:00:00   967    Jerry -0.031348 -0.040633
    2000-01-01 00:00:01  1066  Michael -0.262136  0.307107
    2000-01-01 00:00:02   988    Wendy -0.526331  0.128641
    2000-01-01 00:00:03  1016   Yvonne  0.620456  0.767270
    2000-01-01 00:00:04   998   Ursula  0.684902 -0.463278
    """
    from dask.dataframe.io.demo import make_timeseries
    return make_timeseries(start=start, end=end, freq=freq,
                           partition_freq=partition_freq,
                           seed=seed, dtypes=dtypes)


def _generate_mimesis(field, schema_description, records_per_partition, seed):
    """ Generate data for a single partition of a dask bag

    See Also
    --------
    _make_mimesis
    """
    from mimesis.schema import Schema, Field
    field = Field(seed=seed, **field)
    schema = Schema(schema=lambda: schema_description(field))
    for i in range(records_per_partition):
        yield schema.create(iterations=1)[0]


def _make_mimesis(field, schema, npartitions, records_per_partition, seed=None):
    """
    Make a Dask Bag filled with data randomly generated by the mimesis projet

    Parameters
    ----------
    field: dict
        keyword arguments to pass to ``mimesis.Field``
    schema: Callable[Field] -> dict
        The schema to use to generate the data
    npartitions: int
    records_per_partition: int
    seed: int, None
        Seed for random data

    Returns
    -------
    Dask Bag

    See Also
    --------
    make_people
    """
    import dask.bag as db
    from dask.base import tokenize

    field = field or {}

    if seed is None:
        seed = random.random()

    seeds = db.core.random_state_data_python(npartitions, seed)

    name = 'mimesis-' + tokenize(field, schema, npartitions, records_per_partition, seed)
    dsk = {(name, i): (_generate_mimesis, field, schema, records_per_partition, seed)
           for i, seed in enumerate(seeds)}

    return db.Bag(dsk, name, npartitions)


def make_people(npartitions=10, records_per_partition=1000, seed=None, locale='en'):
    """ Make a dataset of random people

    This makes a Dask Bag with dictionary records of randomly generated people.
    This requires the optional library ``mimesis`` to generate records.

    Paramters
    ---------
    npartitions : int
        Number of partitions
    records_per_partition : int
        Number of records in each partition
    seed : int, (optional)
        Random seed
    locale : str
        Language locale, like 'en', 'fr', 'zh', or 'ru'

    Returns
    -------
    b: Dask Bag
    """
    import_required('mimesis',
                    'The mimesis module is required for this function.  Try:\n'
                    '  pip install mimesis')

    schema = lambda field: {
        'age': field('person.age'),
        'name': (field('person.name'), field('person.surname')),
        'occupation': field('person.occupation'),
        'telephone': field('person.telephone'),
        'address': {'address': field('address.address'),
                    'city': field('address.city')},
        'credit-card': {'number': field('payment.credit_card_number'),
                        'expiration-date': field('payment.credit_card_expiration_date')},
    }

    return _make_mimesis({'locale': locale}, schema, npartitions, records_per_partition, seed)
v1.0.0 prototype 6 years ago			`from __future__ import absolute_import, print_function, division`

			`import random`

			`from .utils import import_required`


			`def timeseries(`
			`start='2000-01-01',`
			`end='2000-01-31',`
			`freq='1s',`
			`partition_freq='1d',`
			`dtypes={'name': str, 'id': int, 'x': float, 'y': float},`
			`seed=None,`
			`):`
			`""" Create timeseries dataframe with random data`

			`Parameters`
			`----------`
			`start : datetime (or datetime-like string)`
			`Start of time series`
			`end : datetime (or datetime-like string)`
			`End of time series`
			`dtypes : dict`
			`Mapping of column names to types.`
			`Valid types include {float, int, str, 'category'}`
			`freq : string`
			`String like '2s' or '1H' or '12W' for the time series frequency`
			`partition_freq : string`
			`String like '1M' or '2Y' to divide the dataframe into partitions`
			`seed : int (optional)`
			`Randomstate seed`

			`Examples`
			`--------`
			`>>> import dask`
			`>>> df = dask.datasets.timeseries()`
			`>>> df.head() # doctest: +SKIP`
			`timestamp id name x y`
			`2000-01-01 00:00:00 967 Jerry -0.031348 -0.040633`
			`2000-01-01 00:00:01 1066 Michael -0.262136 0.307107`
			`2000-01-01 00:00:02 988 Wendy -0.526331 0.128641`
			`2000-01-01 00:00:03 1016 Yvonne 0.620456 0.767270`
			`2000-01-01 00:00:04 998 Ursula 0.684902 -0.463278`
			`"""`
			`from dask.dataframe.io.demo import make_timeseries`
			`return make_timeseries(start=start, end=end, freq=freq,`
			`partition_freq=partition_freq,`
			`seed=seed, dtypes=dtypes)`


			`def _generate_mimesis(field, schema_description, records_per_partition, seed):`
			`""" Generate data for a single partition of a dask bag`

			`See Also`
			`--------`
			`_make_mimesis`
			`"""`
			`from mimesis.schema import Schema, Field`
			`field = Field(seed=seed, **field)`
			`schema = Schema(schema=lambda: schema_description(field))`
			`for i in range(records_per_partition):`
			`yield schema.create(iterations=1)[0]`


			`def _make_mimesis(field, schema, npartitions, records_per_partition, seed=None):`
			`"""`
			`Make a Dask Bag filled with data randomly generated by the mimesis projet`

			`Parameters`
			`----------`
			`field: dict`
			keyword arguments to pass to ``mimesis.Field``
			`schema: Callable[Field] -> dict`
			`The schema to use to generate the data`
			`npartitions: int`
			`records_per_partition: int`
			`seed: int, None`
			`Seed for random data`

			`Returns`
			`-------`
			`Dask Bag`

			`See Also`
			`--------`
			`make_people`
			`"""`
			`import dask.bag as db`
			`from dask.base import tokenize`

			`field = field or {}`

			`if seed is None:`
			`seed = random.random()`

			`seeds = db.core.random_state_data_python(npartitions, seed)`

			`name = 'mimesis-' + tokenize(field, schema, npartitions, records_per_partition, seed)`
			`dsk = {(name, i): (_generate_mimesis, field, schema, records_per_partition, seed)`
			`for i, seed in enumerate(seeds)}`

			`return db.Bag(dsk, name, npartitions)`


			`def make_people(npartitions=10, records_per_partition=1000, seed=None, locale='en'):`
			`""" Make a dataset of random people`

			`This makes a Dask Bag with dictionary records of randomly generated people.`
			This requires the optional library ``mimesis`` to generate records.

			`Paramters`
			`---------`
			`npartitions : int`
			`Number of partitions`
			`records_per_partition : int`
			`Number of records in each partition`
			`seed : int, (optional)`
			`Random seed`
			`locale : str`
			`Language locale, like 'en', 'fr', 'zh', or 'ru'`

			`Returns`
			`-------`
			`b: Dask Bag`
			`"""`
			`import_required('mimesis',`
			`'The mimesis module is required for this function. Try:\n'`
			`' pip install mimesis')`

			`schema = lambda field: {`
			`'age': field('person.age'),`
			`'name': (field('person.name'), field('person.surname')),`
			`'occupation': field('person.occupation'),`
			`'telephone': field('person.telephone'),`
			`'address': {'address': field('address.address'),`
			`'city': field('address.city')},`
			`'credit-card': {'number': field('payment.credit_card_number'),`
			`'expiration-date': field('payment.credit_card_expiration_date')},`
			`}`

			`return _make_mimesis({'locale': locale}, schema, npartitions, records_per_partition, seed)`