diff --git a/bson/__init__.py b/bson/__init__.py index 088ed0e99..3b013aab7 100644 --- a/bson/__init__.py +++ b/bson/__init__.py @@ -200,7 +200,7 @@ def _get_binary(data, position, obj_end, opts, dummy1): end = position + length if length < 0 or end > obj_end: raise InvalidBSON('bad binary object length') - if subtype in (3, 4): + if subtype == 3: # Java Legacy uuid_representation = opts.uuid_representation if uuid_representation == JAVA_LEGACY: @@ -213,6 +213,8 @@ def _get_binary(data, position, obj_end, opts, dummy1): else: value = uuid.UUID(bytes=data[position:end]) return value, end + if subtype == 4: + return uuid.UUID(bytes=data[position:end]), end # Python3 special case. Decode subtype 0 to 'bytes'. if PY3 and subtype == 0: value = data[position:end] diff --git a/bson/_cbsonmodule.c b/bson/_cbsonmodule.c index a2927425c..2efac1af6 100644 --- a/bson/_cbsonmodule.c +++ b/bson/_cbsonmodule.c @@ -1960,13 +1960,13 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer, * From this point, we hold refs to args, kwargs, and data. * If anything fails, goto uuiderror to clean them up. */ - if (options->uuid_rep == CSHARP_LEGACY) { + if (subtype == 3 && options->uuid_rep == CSHARP_LEGACY) { /* Legacy C# byte order */ if ((PyDict_SetItemString(kwargs, "bytes_le", data)) == -1) goto uuiderror; } else { - if (options->uuid_rep == JAVA_LEGACY) { + if (subtype == 3 && options->uuid_rep == JAVA_LEGACY) { /* Convert from legacy java byte order */ char big_endian[16]; _fix_java(buffer + *position, big_endian); diff --git a/bson/binary.py b/bson/binary.py index e3b033bb1..c78299b82 100644 --- a/bson/binary.py +++ b/bson/binary.py @@ -80,9 +80,11 @@ JAVA_LEGACY = 5 """The Java legacy UUID representation. :class:`uuid.UUID` instances will automatically be encoded to -and decoded from BSON binary, using the Java driver's legacy -byte order with binary subtype :data:`OLD_UUID_SUBTYPE`. +and decoded from BSON binary subtype :data:`OLD_UUID_SUBTYPE`, +using the Java driver's legacy byte order. +.. versionchanged:: 3.6 + BSON binary subtype 4 is decoded using RFC-4122 byte order. .. versionadded:: 2.3 """ @@ -90,9 +92,11 @@ CSHARP_LEGACY = 6 """The C#/.net legacy UUID representation. :class:`uuid.UUID` instances will automatically be encoded to -and decoded from BSON binary, using the C# driver's legacy -byte order and binary subtype :data:`OLD_UUID_SUBTYPE`. +and decoded from BSON binary subtype :data:`OLD_UUID_SUBTYPE`, +using the C# driver's legacy byte order. +.. versionchanged:: 3.6 + BSON binary subtype 4 is decoded using RFC-4122 byte order. .. versionadded:: 2.3 """ diff --git a/doc/changelog.rst b/doc/changelog.rst index c68ccd177..132ebd9e3 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -18,6 +18,16 @@ Highlights include: :meth:`~pymongo.collection.Collection.aggregate_raw_batches` for use with external libraries that can parse raw batches of BSON data. +Breaking changes include: + +- BSON binary subtype 4 is decoded using RFC-4122 byte order regardless + of the UUID representation. This is a change in behavior for applications + that use UUID representation :data:`bson.binary.JAVA_LEGACY` or + :data:`bson.binary.CSHARP_LEGACY` to decode BSON binary subtype 4. Other + UUID representations, :data:`bson.binary.PYTHON_LEGACY` (the default) and + :data:`bson.binary.STANDARD`, and the decoding of BSON binary subtype 3 + are unchanged. + Changes in Version 3.5.1 ------------------------ diff --git a/test/test_binary.py b/test/test_binary.py index d0aef95e9..2ff6bc3bc 100644 --- a/test/test_binary.py +++ b/test/test_binary.py @@ -135,6 +135,15 @@ class TestBinary(unittest.TestCase): self.assertNotEqual(hash(one), hash(two)) self.assertEqual(hash(Binary(b"hello world", 42)), hash(two)) + def test_uuid_subtype_4(self): + """uuid_representation should be ignored when decoding subtype 4.""" + expected_uuid = uuid.uuid4() + doc = {"uuid": Binary(expected_uuid.bytes, 4)} + encoded = bson.BSON.encode(doc) + for uuid_representation in ALL_UUID_REPRESENTATIONS: + options = CodecOptions(uuid_representation=uuid_representation) + self.assertEqual(expected_uuid, encoded.decode(options)["uuid"]) + def test_legacy_java_uuid(self): # Test decoding data = self.java_data diff --git a/test/test_change_stream.py b/test/test_change_stream.py index fd7bcb9f8..a23109fa1 100644 --- a/test/test_change_stream.py +++ b/test/test_change_stream.py @@ -24,9 +24,8 @@ import uuid sys.path[0:0] = [''] from bson import BSON, ObjectId, SON -from bson.binary import (Binary, - CSHARP_LEGACY, - JAVA_LEGACY, +from bson.binary import (ALL_UUID_REPRESENTATIONS, + Binary, STANDARD, PYTHON_LEGACY) from bson.raw_bson import DEFAULT_RAW_BSON_OPTIONS, RawBSONDocument @@ -314,8 +313,7 @@ class TestChangeStream(IntegrationTest): def test_uuid_representations(self): """Test with uuid document _ids and different uuid_representation.""" - for uuid_representation in (STANDARD, PYTHON_LEGACY, CSHARP_LEGACY, - JAVA_LEGACY): + for uuid_representation in ALL_UUID_REPRESENTATIONS: for id_subtype in (STANDARD, PYTHON_LEGACY): resume_token = None options = self.coll.codec_options.with_options( diff --git a/test/test_json_util.py b/test/test_json_util.py index 3a031232a..385a89915 100644 --- a/test/test_json_util.py +++ b/test/test_json_util.py @@ -33,8 +33,9 @@ from pymongo.errors import ConfigurationError from bson import json_util, EPOCH_AWARE, EPOCH_NAIVE, SON from bson.json_util import (DatetimeRepresentation, STRICT_JSON_OPTIONS) -from bson.binary import (Binary, MD5_SUBTYPE, USER_DEFINED_SUBTYPE, - JAVA_LEGACY, CSHARP_LEGACY, STANDARD) +from bson.binary import (ALL_UUID_REPRESENTATIONS, Binary, MD5_SUBTYPE, + USER_DEFINED_SUBTYPE, JAVA_LEGACY, CSHARP_LEGACY, + STANDARD) from bson.code import Code from bson.dbref import DBRef from bson.int64 import Int64 @@ -268,12 +269,14 @@ class TestJsonUtil(unittest.TestCase): strict_uuid=True, uuid_representation=STANDARD))) self.assertEqual(doc, json_util.loads( '{"uuid": {"$binary": "9HrBC1jMQ3KlZw4CssPUeQ==", "$type": "03"}}')) - self.assertEqual(doc, json_util.loads( - '{"uuid": {"$binary": "9HrBC1jMQ3KlZw4CssPUeQ==", "$type": "04"}}')) - self.round_trip(doc, json_options=json_util.JSONOptions( - strict_uuid=True, uuid_representation=JAVA_LEGACY)) - self.round_trip(doc, json_options=json_util.JSONOptions( - strict_uuid=True, uuid_representation=CSHARP_LEGACY)) + for uuid_representation in ALL_UUID_REPRESENTATIONS: + options = json_util.JSONOptions( + strict_uuid=True, uuid_representation=uuid_representation) + self.round_trip(doc, json_options=options) + # Ignore UUID representation when decoding BSON binary subtype 4. + self.assertEqual(doc, json_util.loads( + '{"uuid": {"$binary": "9HrBC1jMQ3KlZw4CssPUeQ==", "$type": ' + '"04"}}', json_options=options)) def test_binary(self): if PY3: