From 6a11888a9cfccf1f564fa4abfab30e5174f3756f Mon Sep 17 00:00:00 2001 From: Shane Harvey Date: Wed, 19 Jul 2017 09:55:34 -0700 Subject: [PATCH] PYTHON-1330 Decode JSON binary type 0 to Python 3 bytes. --- bson/json_util.py | 2 ++ doc/changelog.rst | 3 +++ doc/python3.rst | 26 +++++++++++++++++++++++++- test/test_json_util.py | 15 +++++++++++++-- 4 files changed, 43 insertions(+), 3 deletions(-) diff --git a/bson/json_util.py b/bson/json_util.py index df530cf3a..12fd73549 100644 --- a/bson/json_util.py +++ b/bson/json_util.py @@ -412,6 +412,8 @@ def _get_binary(doc, json_options): return uuid.UUID(bytes=data) if subtype == UUID_SUBTYPE: return uuid.UUID(bytes=data) + if PY3 and subtype == 0: + return data return Binary(data, subtype) diff --git a/doc/changelog.rst b/doc/changelog.rst index 709325862..6e3454de1 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -34,6 +34,9 @@ Changes and Deprecations: MongoDB >= 3.0. Instead, it returns an empty :class:`~pymongo.command_cursor.CommandCursor` to make the behavior consistent across all MongoDB versions. +- In Python 3, :meth:`~bson.json_util.loads` now automatically decodes JSON + binary with a subtype of 0 into :class:`bytes` instead of + :class:`~bson.binary.Binary`. See the :doc:`/python3` for more details. Changes in Version 3.4 ---------------------- diff --git a/doc/python3.rst b/doc/python3.rst index c3736346a..e55daddb7 100644 --- a/doc/python3.rst +++ b/doc/python3.rst @@ -14,7 +14,7 @@ Are there any PyMongo behavior changes with Python 3? Only one intentional change. Instances of :class:`bytes` are encoded as BSON type 5 (Binary data) with subtype 0. In Python 3 they are decoded back to :class:`bytes`. In -Python 2 they will be decoded to :class:`~bson.binary.Binary` +Python 2 they are decoded to :class:`~bson.binary.Binary` with subtype 0. For example, let's insert a :class:`bytes` instance using Python 3 then @@ -42,6 +42,30 @@ to :class:`~bson.binary.Binary`:: {u'binary': Binary('this is a byte string', 0), u'_id': ObjectId('4f9086b1fba5222021000000')} +There is a similar change in behavior in parsing JSON binary with subtype 0. +In Python 3 they are decoded into :class:`bytes`. In Python 2 they are +decoded to :class:`~bson.binary.Binary` with subtype 0. + +For example, let's decode a JSON binary subtype 0 using Python 3. Notice the +byte string is decoded to :class:`bytes`:: + + Python 3.6.1 (v3.6.1:69c0db5050, Mar 21 2017, 01:21:04) + [GCC 4.2.1 (Apple Inc. build 5666) (dot 3)] on darwin + Type "help", "copyright", "credits" or "license" for more information. + >>> from bson.json_util import loads + >>> loads('{"b": {"$binary": "dGhpcyBpcyBhIGJ5dGUgc3RyaW5n", "$type": "00"}}') + {'b': b'this is a byte string'} + +Now decode the same JSON in Python 2 . Notice the byte string is decoded +to :class:`~bson.binary.Binary`:: + + Python 2.7.10 (default, Feb 7 2017, 00:08:15) + [GCC 4.2.1 Compatible Apple LLVM 8.0.0 (clang-800.0.34)] on darwin + Type "help", "copyright", "credits" or "license" for more information. + >>> from bson.json_util import loads + >>> loads('{"b": {"$binary": "dGhpcyBpcyBhIGJ5dGUgc3RyaW5n", "$type": "00"}}') + {u'b': Binary('this is a byte string', 0)} + Why can't I share pickled ObjectIds between some versions of Python 2 and 3? ---------------------------------------------------------------------------- diff --git a/test/test_json_util.py b/test/test_json_util.py index 1a74e4abf..3a031232a 100644 --- a/test/test_json_util.py +++ b/test/test_json_util.py @@ -276,7 +276,10 @@ class TestJsonUtil(unittest.TestCase): strict_uuid=True, uuid_representation=CSHARP_LEGACY)) def test_binary(self): - bin_type_dict = {"bin": Binary(b"\x00\x01\x02\x03\x04")} + if PY3: + bin_type_dict = {"bin": b"\x00\x01\x02\x03\x04"} + else: + bin_type_dict = {"bin": Binary(b"\x00\x01\x02\x03\x04")} md5_type_dict = { "md5": Binary(b' n7\x18\xaf\t/\xd1\xd1/\x80\xca\xe7q\xcc\xac', MD5_SUBTYPE)} @@ -286,6 +289,14 @@ class TestJsonUtil(unittest.TestCase): self.round_trip(md5_type_dict) self.round_trip(custom_type_dict) + # Binary with subtype 0 is decoded into bytes in Python 3. + bin = json_util.loads( + '{"bin": {"$binary": "AAECAwQ=", "$type": "00"}}')['bin'] + if PY3: + self.assertEqual(type(bin), bytes) + else: + self.assertEqual(type(bin), Binary) + # PYTHON-443 ensure old type formats are supported json_bin_dump = json_util.dumps(bin_type_dict) self.assertTrue('"$type": "00"' in json_bin_dump) @@ -370,7 +381,7 @@ class TestJsonUtilRoundtrip(IntegrationTest): {'foo': [1, 2]}, {'bar': {'hello': 'world'}}, {'code': Code("function x() { return 1; }")}, - {'bin': Binary(b"\x00\x01\x02\x03\x04")}, + {'bin': Binary(b"\x00\x01\x02\x03\x04", USER_DEFINED_SUBTYPE)}, {'dbref': {'_ref': DBRef('simple', ObjectId('509b8db456c02c5ab7e63c34'))}} ]