""" Store arrays We put arrays on disk as raw bytes, extending along the first dimension. Alongside each array x we ensure the value x.dtype which stores the string description of the array's dtype. """ from __future__ import absolute_import import numpy as np from toolz import valmap, identity, partial from .compatibility import pickle from .core import Interface from .file import File from .utils import frame, framesplit, suffix, ignoring def serialize_dtype(dt): """ Serialize dtype to bytes >>> serialize_dtype(np.dtype('i4')) '>> serialize_dtype(np.dtype('M8[us]')) '>> parse_dtype('i4') dtype('int32') >>> parse_dtype("[('a', 'i4')]") dtype([('a', '= (0, 5, 2): unpack_kwargs = {'raw': False} else: unpack_kwargs = {'encoding': 'utf-8'} blocks = [msgpack.unpackb(f, **unpack_kwargs) for f in framesplit(bytes)] except Exception: blocks = [pickle.loads(f) for f in framesplit(bytes)] result = np.empty(sum(map(len, blocks)), dtype='O') i = 0 for block in blocks: result[i:i + len(block)] = block i += len(block) return result else: result = np.frombuffer(bytes, dtype) if copy: result = result.copy() return result compress_text = identity decompress_text = identity compress_bytes = lambda bytes, itemsize: bytes decompress_bytes = identity with ignoring(ImportError): import blosc blosc.set_nthreads(1) compress_bytes = blosc.compress decompress_bytes = blosc.decompress compress_text = partial(blosc.compress, typesize=1) decompress_text = blosc.decompress with ignoring(ImportError): from snappy import compress as compress_text from snappy import decompress as decompress_text def compress(bytes, dtype): if dtype == 'O': return compress_text(bytes) else: return compress_bytes(bytes, dtype.itemsize) def decompress(bytes, dtype): if dtype == 'O': return decompress_text(bytes) else: return decompress_bytes(bytes)