From 18054a19fc693a0d7333b9c647dc4e4a1083e69b Mon Sep 17 00:00:00 2001 From: Luke Lovett Date: Thu, 3 Sep 2015 14:48:50 -0700 Subject: [PATCH] PYTHON-977 - Fix __hash__ method on BSON types that inherit from Python builtin types. In Python 2, objects automatically inherit the __hash__ of their parent class. In Python 3, objects that override __eq__ do not automatically inherit __hash__, so these objects were not hashable under Python 3. Additionally, mutable BSON types and types that overide __eq__ but did not explicitly define __hash__ had broken __hash__ methods under Python 2. This commit unifies the hashing behavior between Python versions and fixes the __hash__ methods such that two BSON objects hash the same only if they are equal. N.B.: bson.code.Code and bson.regex.Regex are no longer hashable under Python 2 because they are mutable. --- bson/binary.py | 3 +++ bson/code.py | 2 ++ bson/max_key.py | 3 +++ bson/min_key.py | 3 +++ bson/regex.py | 2 ++ bson/timestamp.py | 4 ++++ test/test_binary.py | 7 +++++++ test/test_bson.py | 8 ++++++++ test/test_code.py | 3 +++ test/test_timestamp.py | 5 +++++ 10 files changed, 40 insertions(+) diff --git a/bson/binary.py b/bson/binary.py index 58626c0fb..59e4d0ba7 100644 --- a/bson/binary.py +++ b/bson/binary.py @@ -187,6 +187,9 @@ class Binary(binary_type): # subclass of str... return False + def __hash__(self): + return super(Binary, self).__hash__() ^ hash(self.__subtype) + def __ne__(self, other): return not self == other diff --git a/bson/code.py b/bson/code.py index 28dbceb37..4530adf99 100644 --- a/bson/code.py +++ b/bson/code.py @@ -76,5 +76,7 @@ class Code(str): return (self.__scope, str(self)) == (other.__scope, str(other)) return False + __hash__ = None + def __ne__(self, other): return not self == other diff --git a/bson/max_key.py b/bson/max_key.py index 9ed9ab565..4420b9049 100644 --- a/bson/max_key.py +++ b/bson/max_key.py @@ -28,6 +28,9 @@ class MaxKey(object): def __eq__(self, other): return isinstance(other, MaxKey) + def __hash__(self): + return hash(self._type_marker) + def __ne__(self, other): return not self == other diff --git a/bson/min_key.py b/bson/min_key.py index ee135aff0..ec40781b2 100644 --- a/bson/min_key.py +++ b/bson/min_key.py @@ -28,6 +28,9 @@ class MinKey(object): def __eq__(self, other): return isinstance(other, MinKey) + def __hash__(self): + return hash(self._type_marker) + def __ne__(self, other): return not self == other diff --git a/bson/regex.py b/bson/regex.py index c37210c25..99535e3b4 100644 --- a/bson/regex.py +++ b/bson/regex.py @@ -104,6 +104,8 @@ class Regex(object): else: return NotImplemented + __hash__ = None + def __ne__(self, other): return not self == other diff --git a/bson/timestamp.py b/bson/timestamp.py index edf3666c4..8d8dd114c 100644 --- a/bson/timestamp.py +++ b/bson/timestamp.py @@ -22,6 +22,7 @@ from bson.tz_util import utc UPPERBOUND = 4294967296 + class Timestamp(object): """MongoDB internal timestamps used in the opLog. """ @@ -83,6 +84,9 @@ class Timestamp(object): else: return NotImplemented + def __hash__(self): + return hash(self.time) ^ hash(self.inc) + def __ne__(self, other): return not self == other diff --git a/test/test_binary.py b/test/test_binary.py index c45e1f1d3..587bcdf91 100644 --- a/test/test_binary.py +++ b/test/test_binary.py @@ -104,6 +104,13 @@ class TestBinary(unittest.TestCase): self.assertEqual(repr(five), "Binary(%s, 100)" % (repr(b("test")),)) + def test_hash(self): + one = Binary(b"hello world") + two = Binary(b"hello world", 42) + self.assertEqual(hash(Binary(b"hello world")), hash(one)) + self.assertNotEqual(hash(one), hash(two)) + self.assertEqual(hash(Binary(b"hello world", 42)), hash(two)) + def test_legacy_java_uuid(self): if not should_test_uuid: raise SkipTest("No uuid module") diff --git a/test/test_bson.py b/test/test_bson.py index 00b48f9a1..086b5a2a9 100644 --- a/test/test_bson.py +++ b/test/test_bson.py @@ -775,6 +775,9 @@ class TestBSON(unittest.TestCase): unicode_regex = re.compile('', re.U) self.assertEqual(re.U, Regex.from_native(unicode_regex).flags) + def test_regex_hash(self): + self.assertRaises(TypeError, hash, Regex('hello')) + def test_exception_wrapping(self): # No matter what exception is raised while trying to decode BSON, # the final exception always matches InvalidBSON and the original @@ -844,6 +847,11 @@ class TestBSON(unittest.TestCase): self.assertTrue(MaxKey() != MinKey()) self.assertFalse(MaxKey() == MinKey()) + def test_minkey_maxkey_hash(self): + self.assertEqual(hash(MaxKey()), hash(MaxKey())) + self.assertEqual(hash(MinKey()), hash(MinKey())) + self.assertNotEqual(hash(MaxKey()), hash(MinKey())) + def test_timestamp_comparison(self): # Timestamp is initialized with time, inc. Time is the more # significant comparand. diff --git a/test/test_code.py b/test/test_code.py index 55c5759b7..1b3b9895e 100644 --- a/test/test_code.py +++ b/test/test_code.py @@ -76,6 +76,9 @@ class TestCode(unittest.TestCase): self.assertFalse(b != Code("hello")) self.assertFalse(b != Code("hello", {})) + def test_hash(self): + self.assertRaises(TypeError, hash, Code("hello world")) + def test_scope_preserved(self): a = Code("hello") b = Code("hello", {"foo": 5}) diff --git a/test/test_timestamp.py b/test/test_timestamp.py index 2eb67d907..06d343329 100644 --- a/test/test_timestamp.py +++ b/test/test_timestamp.py @@ -69,6 +69,11 @@ class TestTimestamp(unittest.TestCase): # Explicitly test inequality self.assertFalse(t != Timestamp(1, 1)) + def test_hash(self): + self.assertEqual(hash(Timestamp(1, 2)), hash(Timestamp(1, 2))) + self.assertNotEqual(hash(Timestamp(1, 2)), hash(Timestamp(1, 3))) + self.assertNotEqual(hash(Timestamp(1, 2)), hash(Timestamp(2, 2))) + def test_repr(self): t = Timestamp(0, 0) self.assertEqual(repr(t), "Timestamp(0, 0)")