You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
104 lines
2.6 KiB
104 lines
2.6 KiB
6 years ago
|
from __future__ import absolute_import, division, print_function
|
||
|
|
||
|
import binascii
|
||
|
import hashlib
|
||
|
|
||
|
from .compatibility import PY2
|
||
|
|
||
|
|
||
|
hashers = [] # In decreasing performance order
|
||
|
|
||
|
|
||
|
# Timings on a largish array:
|
||
|
# - CityHash is 2x faster than MurmurHash
|
||
|
# - xxHash is slightly slower than CityHash
|
||
|
# - MurmurHash is 8x faster than SHA1
|
||
|
# - SHA1 is significantly faster than all other hashlib algorithms
|
||
|
|
||
|
try:
|
||
|
import cityhash # `pip install cityhash`
|
||
|
except ImportError:
|
||
|
pass
|
||
|
else:
|
||
|
# CityHash disabled unless the reference leak in
|
||
|
# https://github.com/escherba/python-cityhash/pull/16
|
||
|
# is fixed.
|
||
|
if cityhash.__version__ >= '0.2.2':
|
||
|
def _hash_cityhash(buf):
|
||
|
"""
|
||
|
Produce a 16-bytes hash of *buf* using CityHash.
|
||
|
"""
|
||
|
h = cityhash.CityHash128(buf)
|
||
|
if not PY2:
|
||
|
return h.to_bytes(16, 'little')
|
||
|
else:
|
||
|
return binascii.a2b_hex('%032x' % h)
|
||
|
|
||
|
hashers.append(_hash_cityhash)
|
||
|
|
||
|
try:
|
||
|
import xxhash # `pip install xxhash`
|
||
|
except ImportError:
|
||
|
pass
|
||
|
else:
|
||
|
def _hash_xxhash(buf):
|
||
|
"""
|
||
|
Produce a 8-bytes hash of *buf* using xxHash.
|
||
|
"""
|
||
|
return xxhash.xxh64(buf).digest()
|
||
|
|
||
|
hashers.append(_hash_xxhash)
|
||
|
|
||
|
try:
|
||
|
import mmh3 # `pip install mmh3`
|
||
|
except ImportError:
|
||
|
pass
|
||
|
else:
|
||
|
def _hash_murmurhash(buf):
|
||
|
"""
|
||
|
Produce a 16-bytes hash of *buf* using MurmurHash.
|
||
|
"""
|
||
|
return mmh3.hash_bytes(buf)
|
||
|
|
||
|
hashers.append(_hash_murmurhash)
|
||
|
|
||
|
|
||
|
def _hash_sha1(buf):
|
||
|
"""
|
||
|
Produce a 20-bytes hash of *buf* using SHA1.
|
||
|
"""
|
||
|
return hashlib.sha1(buf).digest()
|
||
|
|
||
|
|
||
|
hashers.append(_hash_sha1)
|
||
|
|
||
|
|
||
|
def hash_buffer(buf, hasher=None):
|
||
|
"""
|
||
|
Hash a bytes-like (buffer-compatible) object. This function returns
|
||
|
a good quality hash but is not cryptographically secure. The fastest
|
||
|
available algorithm is selected. A fixed-length bytes object is returned.
|
||
|
"""
|
||
|
if hasher is not None:
|
||
|
try:
|
||
|
return hasher(buf)
|
||
|
except (TypeError, OverflowError):
|
||
|
# Some hash libraries may have overly-strict type checking,
|
||
|
# not accepting all buffers
|
||
|
pass
|
||
|
for hasher in hashers:
|
||
|
try:
|
||
|
return hasher(buf)
|
||
|
except (TypeError, OverflowError):
|
||
|
pass
|
||
|
raise TypeError("unsupported type for hashing: %s" % (type(buf),))
|
||
|
|
||
|
|
||
|
def hash_buffer_hex(buf, hasher=None):
|
||
|
"""
|
||
|
Same as hash_buffer, but returns its result in hex-encoded form.
|
||
|
"""
|
||
|
h = hash_buffer(buf, hasher)
|
||
|
s = binascii.b2a_hex(h)
|
||
|
return s.decode() if not PY2 else s
|