125 lines
3.6 KiB

from .core import Interface
from threading import Lock
from toolz import merge_with, topk, accumulate, pluck
from operator import add
from bisect import bisect
from collections import defaultdict
from .compatibility import Queue, Empty
def zero():
return 0
class Buffer(Interface):
def __init__(self, fast, slow, available_memory=1e9):
self.lock = Lock()
self.fast = fast
self.slow = slow
self.available_memory = available_memory
self.lengths = defaultdict(zero)
self.memory_usage = 0
Interface.__init__(self)
def __getstate__(self):
return {'fast': self.fast,
'slow': self.slow,
'memory_usage': self.memory_usage,
'lengths': self.lengths,
'available_memory': self.available_memory}
def __setstate__(self, state):
Interface.__setstate__(self, state)
self.lock = Lock()
self.__dict__.update(state)
def append(self, data, lock=True, **kwargs):
if lock: self.lock.acquire()
try:
for k, v in data.items():
self.lengths[k] += len(v)
self.memory_usage += len(v)
self.fast.append(data, lock=False, **kwargs)
while self.memory_usage > self.available_memory:
keys = keys_to_flush(self.lengths, 0.1, maxcount=20)
self.flush(keys)
finally:
if lock: self.lock.release()
def _get(self, keys, lock=True, **kwargs):
if lock: self.lock.acquire()
try:
result = list(map(add, self.fast.get(keys, lock=False),
self.slow.get(keys, lock=False)))
finally:
if lock: self.lock.release()
return result
def _iset(self, key, value, lock=True):
""" Idempotent set """
if lock: self.lock.acquire()
try:
self.fast.iset(key, value, lock=False)
finally:
if lock: self.lock.release()
def _delete(self, keys, lock=True):
if lock: self.lock.acquire()
try:
self.fast.delete(keys, lock=False)
self.slow.delete(keys, lock=False)
finally:
if lock: self.lock.release()
def drop(self):
self._iset_seen.clear()
self.fast.drop()
self.slow.drop()
def __exit__(self, *args):
self.drop()
def flush(self, keys=None, block=None):
""" Flush keys to disk
Parameters
----------
keys: list or None
list of keys to flush
block: bool (defaults to None)
Whether or not to block until all writing is complete
If no keys are given then flush all keys
"""
if keys is None:
keys = list(self.lengths)
self.slow.append(dict(zip(keys, self.fast.get(keys))))
self.fast.delete(keys)
for key in keys:
self.memory_usage -= self.lengths[key]
del self.lengths[key]
def keys_to_flush(lengths, fraction=0.1, maxcount=100000):
""" Which keys to remove
>>> lengths = {'a': 20, 'b': 10, 'c': 15, 'd': 15,
... 'e': 10, 'f': 25, 'g': 5}
>>> keys_to_flush(lengths, 0.5)
['f', 'a']
"""
top = topk(max(len(lengths) // 2, 1),
lengths.items(),
key=1)
total = sum(lengths.values())
cutoff = min(maxcount, max(1,
bisect(list(accumulate(add, pluck(1, top))),
total * fraction)))
result = [k for k, v in top[:cutoff]]
assert result
return result