From 3321b66d3404e5b6f92dcd5ef0da599bbb21fbf7 Mon Sep 17 00:00:00 2001 From: Luke Lovett Date: Thu, 3 Sep 2015 14:48:50 -0700 Subject: [PATCH] PYTHON-977 - Fix __hash__ method on BSON types that inherit from Python builtin types. In Python 2, objects automatically inherit the __hash__ of their parent class. In Python 3, objects that override __eq__ do not automatically inherit __hash__, so these objects were not hashable under Python 3. Additionally, mutable BSON types and types that overide __eq__ but did not explicitly define __hash__ had broken __hash__ methods under Python 2. This commit unifies the hashing behavior between Python versions and fixes the __hash__ methods such that two BSON objects hash the same only if they are equal. N.B.: bson.code.Code and bson.regex.Regex are no longer hashable under Python 2 because they are mutable. --- bson/binary.py | 3 +++ bson/code.py | 2 ++ bson/max_key.py | 3 +++ bson/min_key.py | 3 +++ bson/regex.py | 2 ++ bson/timestamp.py | 4 ++++ test/test_binary.py | 7 +++++++ test/test_bson.py | 8 ++++++++ test/test_code.py | 3 +++ test/test_timestamp.py | 5 +++++ 10 files changed, 40 insertions(+) diff --git a/bson/binary.py b/bson/binary.py index 4620a1c81..e3b033bb1 100644 --- a/bson/binary.py +++ b/bson/binary.py @@ -171,6 +171,9 @@ class Binary(bytes): # subclass of str... return False + def __hash__(self): + return super(Binary, self).__hash__() ^ hash(self.__subtype) + def __ne__(self, other): return not self == other diff --git a/bson/code.py b/bson/code.py index 3a5c6b5d6..8a9c55824 100644 --- a/bson/code.py +++ b/bson/code.py @@ -77,5 +77,7 @@ class Code(str): return (self.__scope, str(self)) == (other.__scope, str(other)) return False + __hash__ = None + def __ne__(self, other): return not self == other diff --git a/bson/max_key.py b/bson/max_key.py index 9ed9ab565..4420b9049 100644 --- a/bson/max_key.py +++ b/bson/max_key.py @@ -28,6 +28,9 @@ class MaxKey(object): def __eq__(self, other): return isinstance(other, MaxKey) + def __hash__(self): + return hash(self._type_marker) + def __ne__(self, other): return not self == other diff --git a/bson/min_key.py b/bson/min_key.py index ee135aff0..ec40781b2 100644 --- a/bson/min_key.py +++ b/bson/min_key.py @@ -28,6 +28,9 @@ class MinKey(object): def __eq__(self, other): return isinstance(other, MinKey) + def __hash__(self): + return hash(self._type_marker) + def __ne__(self, other): return not self == other diff --git a/bson/regex.py b/bson/regex.py index 375489465..5fe7e9e21 100644 --- a/bson/regex.py +++ b/bson/regex.py @@ -104,6 +104,8 @@ class Regex(object): else: return NotImplemented + __hash__ = None + def __ne__(self, other): return not self == other diff --git a/bson/timestamp.py b/bson/timestamp.py index d00e29da0..7ea755117 100644 --- a/bson/timestamp.py +++ b/bson/timestamp.py @@ -23,6 +23,7 @@ from bson.tz_util import utc UPPERBOUND = 4294967296 + class Timestamp(object): """MongoDB internal timestamps used in the opLog. """ @@ -81,6 +82,9 @@ class Timestamp(object): else: return NotImplemented + def __hash__(self): + return hash(self.time) ^ hash(self.inc) + def __ne__(self, other): return not self == other diff --git a/test/test_binary.py b/test/test_binary.py index 90ff64471..dc1ffb888 100644 --- a/test/test_binary.py +++ b/test/test_binary.py @@ -129,6 +129,13 @@ class TestBinary(unittest.TestCase): self.assertEqual(repr(five), "Binary(%s, 100)" % (repr(b"test"),)) + def test_hash(self): + one = Binary(b"hello world") + two = Binary(b"hello world", 42) + self.assertEqual(hash(Binary(b"hello world")), hash(one)) + self.assertNotEqual(hash(one), hash(two)) + self.assertEqual(hash(Binary(b"hello world", 42)), hash(two)) + def test_legacy_java_uuid(self): # Test decoding data = self.java_data diff --git a/test/test_bson.py b/test/test_bson.py index eae4087c4..261ec0098 100644 --- a/test/test_bson.py +++ b/test/test_bson.py @@ -759,6 +759,9 @@ class TestBSON(unittest.TestCase): unicode_regex = re.compile('', re.U) self.assertEqual(re.U, Regex.from_native(unicode_regex).flags) + def test_regex_hash(self): + self.assertRaises(TypeError, hash, Regex('hello')) + def test_exception_wrapping(self): # No matter what exception is raised while trying to decode BSON, # the final exception always matches InvalidBSON. @@ -815,6 +818,11 @@ class TestBSON(unittest.TestCase): self.assertTrue(MaxKey() != MinKey()) self.assertFalse(MaxKey() == MinKey()) + def test_minkey_maxkey_hash(self): + self.assertEqual(hash(MaxKey()), hash(MaxKey())) + self.assertEqual(hash(MinKey()), hash(MinKey())) + self.assertNotEqual(hash(MaxKey()), hash(MinKey())) + def test_timestamp_comparison(self): # Timestamp is initialized with time, inc. Time is the more # significant comparand. diff --git a/test/test_code.py b/test/test_code.py index e439af7bf..aacca7208 100644 --- a/test/test_code.py +++ b/test/test_code.py @@ -77,6 +77,9 @@ class TestCode(unittest.TestCase): self.assertFalse(b != Code("hello")) self.assertFalse(b != Code("hello", {})) + def test_hash(self): + self.assertRaises(TypeError, hash, Code("hello world")) + def test_scope_preserved(self): a = Code("hello") b = Code("hello", {"foo": 5}) diff --git a/test/test_timestamp.py b/test/test_timestamp.py index b4def6c77..bb3358121 100644 --- a/test/test_timestamp.py +++ b/test/test_timestamp.py @@ -69,6 +69,11 @@ class TestTimestamp(unittest.TestCase): # Explicitly test inequality self.assertFalse(t != Timestamp(1, 1)) + def test_hash(self): + self.assertEqual(hash(Timestamp(1, 2)), hash(Timestamp(1, 2))) + self.assertNotEqual(hash(Timestamp(1, 2)), hash(Timestamp(1, 3))) + self.assertNotEqual(hash(Timestamp(1, 2)), hash(Timestamp(2, 2))) + def test_repr(self): t = Timestamp(0, 0) self.assertEqual(repr(t), "Timestamp(0, 0)")