PYTHON-977 - Fix __hash__ method on BSON types that inherit from Python builtin types.

In Python 2, objects automatically inherit the __hash__ of their parent
class. In Python 3, objects that override __eq__ do not automatically inherit
__hash__, so these objects were not hashable under Python 3.  Additionally,
mutable BSON types and types that overide __eq__ but did not explicitly define
__hash__ had broken __hash__ methods under Python 2.  This commit unifies the
hashing behavior between Python versions and fixes the __hash__ methods such
that two BSON objects hash the same only if they are equal.

N.B.: bson.code.Code and bson.regex.Regex are no longer hashable under Python 2
because they are mutable.
This commit is contained in:
Luke Lovett 2015-09-03 14:48:50 -07:00
parent b31399b3b0
commit 18054a19fc
10 changed files with 40 additions and 0 deletions

View File

@ -187,6 +187,9 @@ class Binary(binary_type):
# subclass of str...
return False
def __hash__(self):
return super(Binary, self).__hash__() ^ hash(self.__subtype)
def __ne__(self, other):
return not self == other

View File

@ -76,5 +76,7 @@ class Code(str):
return (self.__scope, str(self)) == (other.__scope, str(other))
return False
__hash__ = None
def __ne__(self, other):
return not self == other

View File

@ -28,6 +28,9 @@ class MaxKey(object):
def __eq__(self, other):
return isinstance(other, MaxKey)
def __hash__(self):
return hash(self._type_marker)
def __ne__(self, other):
return not self == other

View File

@ -28,6 +28,9 @@ class MinKey(object):
def __eq__(self, other):
return isinstance(other, MinKey)
def __hash__(self):
return hash(self._type_marker)
def __ne__(self, other):
return not self == other

View File

@ -104,6 +104,8 @@ class Regex(object):
else:
return NotImplemented
__hash__ = None
def __ne__(self, other):
return not self == other

View File

@ -22,6 +22,7 @@ from bson.tz_util import utc
UPPERBOUND = 4294967296
class Timestamp(object):
"""MongoDB internal timestamps used in the opLog.
"""
@ -83,6 +84,9 @@ class Timestamp(object):
else:
return NotImplemented
def __hash__(self):
return hash(self.time) ^ hash(self.inc)
def __ne__(self, other):
return not self == other

View File

@ -104,6 +104,13 @@ class TestBinary(unittest.TestCase):
self.assertEqual(repr(five),
"Binary(%s, 100)" % (repr(b("test")),))
def test_hash(self):
one = Binary(b"hello world")
two = Binary(b"hello world", 42)
self.assertEqual(hash(Binary(b"hello world")), hash(one))
self.assertNotEqual(hash(one), hash(two))
self.assertEqual(hash(Binary(b"hello world", 42)), hash(two))
def test_legacy_java_uuid(self):
if not should_test_uuid:
raise SkipTest("No uuid module")

View File

@ -775,6 +775,9 @@ class TestBSON(unittest.TestCase):
unicode_regex = re.compile('', re.U)
self.assertEqual(re.U, Regex.from_native(unicode_regex).flags)
def test_regex_hash(self):
self.assertRaises(TypeError, hash, Regex('hello'))
def test_exception_wrapping(self):
# No matter what exception is raised while trying to decode BSON,
# the final exception always matches InvalidBSON and the original
@ -844,6 +847,11 @@ class TestBSON(unittest.TestCase):
self.assertTrue(MaxKey() != MinKey())
self.assertFalse(MaxKey() == MinKey())
def test_minkey_maxkey_hash(self):
self.assertEqual(hash(MaxKey()), hash(MaxKey()))
self.assertEqual(hash(MinKey()), hash(MinKey()))
self.assertNotEqual(hash(MaxKey()), hash(MinKey()))
def test_timestamp_comparison(self):
# Timestamp is initialized with time, inc. Time is the more
# significant comparand.

View File

@ -76,6 +76,9 @@ class TestCode(unittest.TestCase):
self.assertFalse(b != Code("hello"))
self.assertFalse(b != Code("hello", {}))
def test_hash(self):
self.assertRaises(TypeError, hash, Code("hello world"))
def test_scope_preserved(self):
a = Code("hello")
b = Code("hello", {"foo": 5})

View File

@ -69,6 +69,11 @@ class TestTimestamp(unittest.TestCase):
# Explicitly test inequality
self.assertFalse(t != Timestamp(1, 1))
def test_hash(self):
self.assertEqual(hash(Timestamp(1, 2)), hash(Timestamp(1, 2)))
self.assertNotEqual(hash(Timestamp(1, 2)), hash(Timestamp(1, 3)))
self.assertNotEqual(hash(Timestamp(1, 2)), hash(Timestamp(2, 2)))
def test_repr(self):
t = Timestamp(0, 0)
self.assertEqual(repr(t), "Timestamp(0, 0)")