PYTHON-1330 Decode JSON binary type 0 to Python 3 bytes.

This commit is contained in:
Shane Harvey 2017-07-19 09:55:34 -07:00
parent 75b2b1fc41
commit 6a11888a9c
4 changed files with 43 additions and 3 deletions

View File

@ -412,6 +412,8 @@ def _get_binary(doc, json_options):
return uuid.UUID(bytes=data)
if subtype == UUID_SUBTYPE:
return uuid.UUID(bytes=data)
if PY3 and subtype == 0:
return data
return Binary(data, subtype)

View File

@ -34,6 +34,9 @@ Changes and Deprecations:
MongoDB >= 3.0. Instead, it returns an empty
:class:`~pymongo.command_cursor.CommandCursor` to make the behavior
consistent across all MongoDB versions.
- In Python 3, :meth:`~bson.json_util.loads` now automatically decodes JSON
binary with a subtype of 0 into :class:`bytes` instead of
:class:`~bson.binary.Binary`. See the :doc:`/python3` for more details.
Changes in Version 3.4
----------------------

View File

@ -14,7 +14,7 @@ Are there any PyMongo behavior changes with Python 3?
Only one intentional change. Instances of :class:`bytes`
are encoded as BSON type 5 (Binary data) with subtype 0.
In Python 3 they are decoded back to :class:`bytes`. In
Python 2 they will be decoded to :class:`~bson.binary.Binary`
Python 2 they are decoded to :class:`~bson.binary.Binary`
with subtype 0.
For example, let's insert a :class:`bytes` instance using Python 3 then
@ -42,6 +42,30 @@ to :class:`~bson.binary.Binary`::
{u'binary': Binary('this is a byte string', 0), u'_id': ObjectId('4f9086b1fba5222021000000')}
There is a similar change in behavior in parsing JSON binary with subtype 0.
In Python 3 they are decoded into :class:`bytes`. In Python 2 they are
decoded to :class:`~bson.binary.Binary` with subtype 0.
For example, let's decode a JSON binary subtype 0 using Python 3. Notice the
byte string is decoded to :class:`bytes`::
Python 3.6.1 (v3.6.1:69c0db5050, Mar 21 2017, 01:21:04)
[GCC 4.2.1 (Apple Inc. build 5666) (dot 3)] on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>> from bson.json_util import loads
>>> loads('{"b": {"$binary": "dGhpcyBpcyBhIGJ5dGUgc3RyaW5n", "$type": "00"}}')
{'b': b'this is a byte string'}
Now decode the same JSON in Python 2 . Notice the byte string is decoded
to :class:`~bson.binary.Binary`::
Python 2.7.10 (default, Feb 7 2017, 00:08:15)
[GCC 4.2.1 Compatible Apple LLVM 8.0.0 (clang-800.0.34)] on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>> from bson.json_util import loads
>>> loads('{"b": {"$binary": "dGhpcyBpcyBhIGJ5dGUgc3RyaW5n", "$type": "00"}}')
{u'b': Binary('this is a byte string', 0)}
Why can't I share pickled ObjectIds between some versions of Python 2 and 3?
----------------------------------------------------------------------------

View File

@ -276,7 +276,10 @@ class TestJsonUtil(unittest.TestCase):
strict_uuid=True, uuid_representation=CSHARP_LEGACY))
def test_binary(self):
bin_type_dict = {"bin": Binary(b"\x00\x01\x02\x03\x04")}
if PY3:
bin_type_dict = {"bin": b"\x00\x01\x02\x03\x04"}
else:
bin_type_dict = {"bin": Binary(b"\x00\x01\x02\x03\x04")}
md5_type_dict = {
"md5": Binary(b' n7\x18\xaf\t/\xd1\xd1/\x80\xca\xe7q\xcc\xac',
MD5_SUBTYPE)}
@ -286,6 +289,14 @@ class TestJsonUtil(unittest.TestCase):
self.round_trip(md5_type_dict)
self.round_trip(custom_type_dict)
# Binary with subtype 0 is decoded into bytes in Python 3.
bin = json_util.loads(
'{"bin": {"$binary": "AAECAwQ=", "$type": "00"}}')['bin']
if PY3:
self.assertEqual(type(bin), bytes)
else:
self.assertEqual(type(bin), Binary)
# PYTHON-443 ensure old type formats are supported
json_bin_dump = json_util.dumps(bin_type_dict)
self.assertTrue('"$type": "00"' in json_bin_dump)
@ -370,7 +381,7 @@ class TestJsonUtilRoundtrip(IntegrationTest):
{'foo': [1, 2]},
{'bar': {'hello': 'world'}},
{'code': Code("function x() { return 1; }")},
{'bin': Binary(b"\x00\x01\x02\x03\x04")},
{'bin': Binary(b"\x00\x01\x02\x03\x04", USER_DEFINED_SUBTYPE)},
{'dbref': {'_ref': DBRef('simple',
ObjectId('509b8db456c02c5ab7e63c34'))}}
]