PYTHON-2245 Change default uuidRepresentation to UNSPECIFIED (ie disable UUID encoding by default) (#724)

This change also stops decoding both 3 and 4 subtypes as UUIDs.
With standard, only subtype 4 is decoded to UUID and subtype 3 is decoded to Binary.
With legacy representations, only subtype 3 is decoded to UUID and subtype 4 is decoded to Binary.
This commit is contained in:
Shane Harvey 2021-09-14 16:48:52 -07:00 committed by GitHub
parent 3b8961a76e
commit fb20975a1f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 305 additions and 258 deletions

View File

@ -69,7 +69,7 @@ from collections import abc as _abc
from bson.binary import (Binary, UuidRepresentation, ALL_UUID_SUBTYPES,
OLD_UUID_SUBTYPE,
JAVA_LEGACY, CSHARP_LEGACY,
JAVA_LEGACY, CSHARP_LEGACY, STANDARD,
UUID_SUBTYPE)
from bson.code import Code
from bson.codec_options import (
@ -265,20 +265,14 @@ def _get_binary(data, view, position, obj_end, opts, dummy1):
raise InvalidBSON('bad binary object length')
# Convert UUID subtypes to native UUIDs.
# TODO: PYTHON-2245 Decoding should follow UUID spec in PyMongo 4.0+
if subtype in ALL_UUID_SUBTYPES:
uuid_representation = opts.uuid_representation
uuid_rep = opts.uuid_representation
binary_value = Binary(data[position:end], subtype)
if uuid_representation == UuidRepresentation.UNSPECIFIED:
if ((uuid_rep == UuidRepresentation.UNSPECIFIED) or
(subtype == UUID_SUBTYPE and uuid_rep != STANDARD) or
(subtype == OLD_UUID_SUBTYPE and uuid_rep == STANDARD)):
return binary_value, end
if subtype == UUID_SUBTYPE:
# Legacy behavior: use STANDARD with binary subtype 4.
uuid_representation = UuidRepresentation.STANDARD
elif uuid_representation == UuidRepresentation.STANDARD:
# subtype == OLD_UUID_SUBTYPE
# Legacy behavior: STANDARD is the same as PYTHON_LEGACY.
uuid_representation = UuidRepresentation.PYTHON_LEGACY
return binary_value.as_uuid(uuid_representation), end
return binary_value.as_uuid(uuid_rep), end
# Decode subtype 0 to 'bytes'.
if subtype == 0:

View File

@ -1760,8 +1760,7 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
if (!data) {
goto invalid;
}
/* Encode as UUID or Binary based on options->uuid_rep
* TODO: PYTHON-2245 Decoding should follow UUID spec in PyMongo 4.0 */
/* Encode as UUID or Binary based on options->uuid_rep */
if (subtype == 3 || subtype == 4) {
PyObject* binary_type = NULL;
PyObject* binary_value = NULL;
@ -1782,15 +1781,12 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
goto uuiderror;
}
if (uuid_rep == UNSPECIFIED) {
if ((uuid_rep == UNSPECIFIED) ||
(subtype == 4 && uuid_rep != STANDARD) ||
(subtype == 3 && uuid_rep == STANDARD)) {
value = binary_value;
Py_INCREF(value);
} else {
if (subtype == 4) {
uuid_rep = STANDARD;
} else if (uuid_rep == STANDARD) {
uuid_rep = PYTHON_LEGACY;
}
value = PyObject_CallMethod(binary_value, "as_uuid", "(i)", uuid_rep);
}

View File

@ -39,7 +39,10 @@ OLD_UUID_SUBTYPE = 3
"""Old BSON binary subtype for a UUID.
:class:`uuid.UUID` instances will automatically be encoded
by :mod:`bson` using this subtype.
by :mod:`bson` using this subtype when using
:data:`UuidRepresentation.PYTHON_LEGACY`,
:data:`UuidRepresentation.JAVA_LEGACY`, or
:data:`UuidRepresentation.CSHARP_LEGACY`.
.. versionadded:: 2.1
"""
@ -47,8 +50,10 @@ by :mod:`bson` using this subtype.
UUID_SUBTYPE = 4
"""BSON binary subtype for a UUID.
This is the new BSON binary subtype for UUIDs. The
current default is :data:`OLD_UUID_SUBTYPE`.
This is the standard BSON binary subtype for UUIDs.
:class:`uuid.UUID` instances will automatically be encoded
by :mod:`bson` using this subtype when using
:data:`UuidRepresentation.STANDARD`.
"""

View File

@ -219,7 +219,7 @@ class CodecOptions(_options_base):
naive. Defaults to ``False``.
- `uuid_representation`: The BSON representation to use when encoding
and decoding instances of :class:`~uuid.UUID`. Defaults to
:data:`~bson.binary.UuidRepresentation.PYTHON_LEGACY`. New
:data:`~bson.binary.UuidRepresentation.UNSPECIFIED`. New
applications should consider setting this to
:data:`~bson.binary.UuidRepresentation.STANDARD` for cross language
compatibility. See :ref:`handling-uuid-data-example` for details.
@ -233,6 +233,11 @@ class CodecOptions(_options_base):
- `type_registry`: Instance of :class:`TypeRegistry` used to customize
encoding and decoding behavior.
.. versionchanged:: 4.0
The default for `uuid_representation` was changed from
:const:`~bson.binary.UuidRepresentation.PYTHON_LEGACY` to
:const:`~bson.binary.UuidRepresentation.UNSPECIFIED`.
.. versionadded:: 3.8
`type_registry` attribute.
@ -245,7 +250,7 @@ class CodecOptions(_options_base):
def __new__(cls, document_class=dict,
tz_aware=False,
uuid_representation=None,
uuid_representation=UuidRepresentation.UNSPECIFIED,
unicode_decode_error_handler="strict",
tzinfo=None, type_registry=None):
if not (issubclass(document_class, _MutableMapping) or
@ -255,9 +260,7 @@ class CodecOptions(_options_base):
"sublass of collections.abc.MutableMapping")
if not isinstance(tz_aware, bool):
raise TypeError("tz_aware must be True or False")
if uuid_representation is None:
uuid_representation = UuidRepresentation.PYTHON_LEGACY
elif uuid_representation not in ALL_UUID_REPRESENTATIONS:
if uuid_representation not in ALL_UUID_REPRESENTATIONS:
raise ValueError("uuid_representation must be a value "
"from bson.binary.UuidRepresentation")
if not isinstance(unicode_decode_error_handler, (str, None)):
@ -327,21 +330,18 @@ class CodecOptions(_options_base):
return CodecOptions(**opts)
DEFAULT_CODEC_OPTIONS = CodecOptions(
uuid_representation=UuidRepresentation.PYTHON_LEGACY)
DEFAULT_CODEC_OPTIONS = CodecOptions()
def _parse_codec_options(options):
"""Parse BSON codec options."""
return CodecOptions(
document_class=options.get(
'document_class', DEFAULT_CODEC_OPTIONS.document_class),
tz_aware=options.get(
'tz_aware', DEFAULT_CODEC_OPTIONS.tz_aware),
uuid_representation=options.get('uuidrepresentation'),
unicode_decode_error_handler=options.get(
'unicode_decode_error_handler',
DEFAULT_CODEC_OPTIONS.unicode_decode_error_handler),
tzinfo=options.get('tzinfo', DEFAULT_CODEC_OPTIONS.tzinfo),
type_registry=options.get(
'type_registry', DEFAULT_CODEC_OPTIONS.type_registry))
kwargs = {}
for k in set(options) & {'document_class', 'tz_aware',
'uuidrepresentation',
'unicode_decode_error_handler', 'tzinfo',
'type_registry'}:
if k == 'uuidrepresentation':
kwargs['uuid_representation'] = options[k]
else:
kwargs[k] = options[k]
return CodecOptions(**kwargs)

View File

@ -222,7 +222,7 @@ class JSONOptions(CodecOptions):
:class:`collections.MutableMapping`. Defaults to :class:`dict`.
- `uuid_representation`: The :class:`~bson.binary.UuidRepresentation`
to use when encoding and decoding instances of :class:`uuid.UUID`.
Defaults to :const:`~bson.binary.UuidRepresentation.PYTHON_LEGACY`.
Defaults to :const:`~bson.binary.UuidRepresentation.UNSPECIFIED`.
- `tz_aware`: If ``True``, MongoDB Extended JSON's *Strict mode* type
`Date` will be decoded to timezone aware instances of
:class:`datetime.datetime`. Otherwise they will be naive. Defaults
@ -238,6 +238,9 @@ class JSONOptions(CodecOptions):
.. versionchanged:: 4.0
The default for `json_mode` was changed from :const:`JSONMode.LEGACY`
to :const:`JSONMode.RELAXED`.
The default for `uuid_representation` was changed from
:const:`~bson.binary.UuidRepresentation.PYTHON_LEGACY` to
:const:`~bson.binary.UuidRepresentation.UNSPECIFIED`.
.. versionchanged:: 3.5
Accepts the optional parameter `json_mode`.

View File

@ -17,6 +17,13 @@ Breaking Changes in 4.0
.......................
- Removed support for Python 2.7, 3.4, and 3.5. Python 3.6+ is now required.
- The default uuid_representation for :class:`~bson.codec_options.CodecOptions`,
:class:`~bson.json_util.JSONOptions`, and
:class:`~pymongo.mongo_client.MongoClient` has been changed from
:data:`bson.binary.UuidRepresentation.PYTHON_LEGACY` to
:data:`bson.binary.UuidRepresentation.UNSPECIFIED`. Attempting to encode a
:class:`uuid.UUID` instance to BSON or JSON now produces an error by default.
See :ref:`handling-uuid-data-example` for details.
- Removed the ``waitQueueMultiple`` keyword argument to
:class:`~pymongo.mongo_client.MongoClient` and removed
:exc:`pymongo.errors.ExceededMaxWaiters`.

View File

@ -147,16 +147,15 @@ Consider the following situation::
collection.insert_one({'_id': 'foo', 'uuid': input_uuid})
assert collection.find_one({'uuid': Binary(input_uuid.bytes, 3)})['_id'] == 'foo'
# Retrieving this document using UuidRepresentation.STANDARD returns a native UUID
# Retrieving this document using UuidRepresentation.STANDARD returns a Binary instance
std_opts = CodecOptions(uuid_representation=UuidRepresentation.STANDARD)
std_collection = client.testdb.get_collection('test', codec_options=std_opts)
doc = std_collection.find_one({'_id': 'foo'})
assert doc['uuid'] == input_uuid
assert isinstance(doc['uuid'], Binary)
# Round-tripping the retrieved document silently changes the Binary subtype to 4
# Round-tripping the retrieved document yields the exact same document
std_collection.replace_one({'_id': 'foo'}, doc)
assert collection.find_one({'uuid': Binary(input_uuid.bytes, 3)}) is None
round_tripped_doc = collection.find_one({'uuid': Binary(input_uuid.bytes, 4)})
round_tripped_doc = collection.find_one({'uuid': Binary(input_uuid.bytes, 3)})
assert doc == round_tripped_doc
@ -230,7 +229,7 @@ Applications can set the UUID representation in one of the following ways:
#. At the ``MongoClient`` level using the ``uuidRepresentation`` URI option,
e.g.::
client = MongoClient("mongodb://a:27107/?uuidRepresentation=javaLegacy")
client = MongoClient("mongodb://a:27107/?uuidRepresentation=standard")
Valid values are:
@ -240,6 +239,12 @@ Applications can set the UUID representation in one of the following ways:
* - Value
- UUID Representation
* - ``unspecified``
- :ref:`unspecified-representation-details`
* - ``standard``
- :ref:`standard-representation-details`
* - ``pythonLegacy``
- :ref:`python-legacy-representation-details`
@ -249,17 +254,11 @@ Applications can set the UUID representation in one of the following ways:
* - ``csharpLegacy``
- :ref:`csharp-legacy-representation-details`
* - ``standard``
- :ref:`standard-representation-details`
* - ``unspecified``
- :ref:`unspecified-representation-details`
#. At the ``MongoClient`` level using the ``uuidRepresentation`` kwarg
option, e.g.::
from bson.binary import UuidRepresentation
client = MongoClient(uuidRepresentation=UuidRepresentation.PYTHON_LEGACY)
client = MongoClient(uuidRepresentation=UuidRepresentation.STANDARD)
#. At the ``Database`` or ``Collection`` level by supplying a suitable
:class:`~bson.codec_options.CodecOptions` instance, e.g.::
@ -288,186 +287,51 @@ Supported UUID Representations
- Decode :class:`~bson.binary.Binary` subtype 4 to
- Decode :class:`~bson.binary.Binary` subtype 3 to
* - :ref:`python-legacy-representation-details`
- Yes, in PyMongo>=2.9,<4
- :class:`~bson.binary.Binary` subtype 3 with standard byte-order
- :class:`uuid.UUID` in PyMongo<4; :class:`~bson.binary.Binary` subtype 4 in PyMongo>=4
- :class:`uuid.UUID`
* - :ref:`java-legacy-representation-details`
- No
- :class:`~bson.binary.Binary` subtype 3 with Java legacy byte-order
- :class:`uuid.UUID` in PyMongo<4; :class:`~bson.binary.Binary` subtype 4 in PyMongo>=4
- :class:`uuid.UUID`
* - :ref:`csharp-legacy-representation-details`
- No
- :class:`~bson.binary.Binary` subtype 3 with C# legacy byte-order
- :class:`uuid.UUID` in PyMongo<4; :class:`~bson.binary.Binary` subtype 4 in PyMongo>=4
- :class:`uuid.UUID`
* - :ref:`standard-representation-details`
- No
- :class:`~bson.binary.Binary` subtype 4
- :class:`uuid.UUID`
- :class:`uuid.UUID` in PyMongo<4; :class:`~bson.binary.Binary` subtype 3 in PyMongo>=4
- :class:`~bson.binary.Binary` subtype 3
* - :ref:`unspecified-representation-details`
- Yes, in PyMongo>=4
- Raise :exc:`ValueError`
- :class:`~bson.binary.Binary` subtype 4
- :class:`uuid.UUID` in PyMongo<4; :class:`~bson.binary.Binary` subtype 3 in PyMongo>=4
- :class:`~bson.binary.Binary` subtype 3
* - :ref:`python-legacy-representation-details`
- No
- :class:`~bson.binary.Binary` subtype 3 with standard byte-order
- :class:`~bson.binary.Binary` subtype 4
- :class:`uuid.UUID`
* - :ref:`java-legacy-representation-details`
- No
- :class:`~bson.binary.Binary` subtype 3 with Java legacy byte-order
- :class:`~bson.binary.Binary` subtype 4
- :class:`uuid.UUID`
* - :ref:`csharp-legacy-representation-details`
- No
- :class:`~bson.binary.Binary` subtype 3 with C# legacy byte-order
- :class:`~bson.binary.Binary` subtype 4
- :class:`uuid.UUID`
We now detail the behavior and use-case for each supported UUID
representation.
.. _python-legacy-representation-details:
``PYTHON_LEGACY``
^^^^^^^^^^^^^^^^^
.. attention:: This uuid representation should be used when reading UUIDs
generated by existing applications that use the Python driver
but **don't** explicitly set a UUID representation.
.. attention:: :data:`~bson.binary.UuidRepresentation.PYTHON_LEGACY`
has been the default uuid representation since PyMongo 2.9.
The :data:`~bson.binary.UuidRepresentation.PYTHON_LEGACY` representation
corresponds to the legacy representation of UUIDs used by PyMongo. This
representation conforms with
`RFC 4122 Section 4.1.2 <https://tools.ietf.org/html/rfc4122#section-4.1.2>`_.
The following example illustrates the use of this representation::
from bson.codec_options import CodecOptions, DEFAULT_CODEC_OPTIONS
from bson.binary import UuidRepresentation
# No configured UUID representation
collection = client.python_legacy.get_collection('test', codec_options=DEFAULT_CODEC_OPTIONS)
# Using UuidRepresentation.PYTHON_LEGACY
pylegacy_opts = CodecOptions(uuid_representation=UuidRepresentation.PYTHON_LEGACY)
pylegacy_collection = client.python_legacy.get_collection('test', codec_options=pylegacy_opts)
# UUIDs written by PyMongo with no UuidRepresentation configured can be queried using PYTHON_LEGACY
uuid_1 = uuid4()
collection.insert_one({'uuid': uuid_1})
document = pylegacy_collection.find_one({'uuid': uuid_1})
# UUIDs written using PYTHON_LEGACY can be read by PyMongo with no UuidRepresentation configured
uuid_2 = uuid4()
pylegacy_collection.insert_one({'uuid': uuid_2})
document = collection.find_one({'uuid': uuid_2})
``PYTHON_LEGACY`` encodes native :class:`uuid.UUID` objects to
:class:`~bson.binary.Binary` subtype 3 objects, preserving the same
byte-order as :attr:`~uuid.UUID.bytes`::
from bson.binary import Binary
document = collection.find_one({'uuid': Binary(uuid_2.bytes, subtype=3)})
assert document['uuid'] == uuid_2
.. _java-legacy-representation-details:
``JAVA_LEGACY``
^^^^^^^^^^^^^^^
.. attention:: This UUID representation should be used when reading UUIDs
written to MongoDB by the legacy applications (i.e. applications that don't
use the ``STANDARD`` representation) using the Java driver.
The :data:`~bson.binary.UuidRepresentation.JAVA_LEGACY` representation
corresponds to the legacy representation of UUIDs used by the MongoDB Java
Driver.
.. note:: The ``JAVA_LEGACY`` representation reverses the order of bytes 0-7,
and bytes 8-15.
As an example, consider the same UUID described in :ref:`example-legacy-uuid`.
Let us assume that an application used the Java driver without an explicitly
specified UUID representation to insert the example UUID
``00112233-4455-6677-8899-aabbccddeeff`` into MongoDB. If we try to read this
value using PyMongo with no UUID representation specified, we end up with an
entirely different UUID::
UUID('77665544-3322-1100-ffee-ddccbbaa9988')
However, if we explicitly set the representation to
:data:`~bson.binary.UuidRepresentation.JAVA_LEGACY`, we get the correct result::
UUID('00112233-4455-6677-8899-aabbccddeeff')
PyMongo uses the specified UUID representation to reorder the BSON bytes and
load them correctly. ``JAVA_LEGACY`` encodes native :class:`uuid.UUID` objects
to :class:`~bson.binary.Binary` subtype 3 objects, while performing the same
byte-reordering as the legacy Java driver's UUID to BSON encoder.
.. _csharp-legacy-representation-details:
``CSHARP_LEGACY``
^^^^^^^^^^^^^^^^^
.. attention:: This UUID representation should be used when reading UUIDs
written to MongoDB by the legacy applications (i.e. applications that don't
use the ``STANDARD`` representation) using the C# driver.
The :data:`~bson.binary.UuidRepresentation.CSHARP_LEGACY` representation
corresponds to the legacy representation of UUIDs used by the MongoDB Java
Driver.
.. note:: The ``CSHARP_LEGACY`` representation reverses the order of bytes 0-3,
bytes 4-5, and bytes 6-7.
As an example, consider the same UUID described in :ref:`example-legacy-uuid`.
Let us assume that an application used the C# driver without an explicitly
specified UUID representation to insert the example UUID
``00112233-4455-6677-8899-aabbccddeeff`` into MongoDB. If we try to read this
value using PyMongo with no UUID representation specified, we end up with an
entirely different UUID::
UUID('33221100-5544-7766-8899-aabbccddeeff')
However, if we explicitly set the representation to
:data:`~bson.binary.UuidRepresentation.CSHARP_LEGACY`, we get the correct result::
UUID('00112233-4455-6677-8899-aabbccddeeff')
PyMongo uses the specified UUID representation to reorder the BSON bytes and
load them correctly. ``CSHARP_LEGACY`` encodes native :class:`uuid.UUID`
objects to :class:`~bson.binary.Binary` subtype 3 objects, while performing
the same byte-reordering as the legacy C# driver's UUID to BSON encoder.
.. _standard-representation-details:
``STANDARD``
^^^^^^^^^^^^
.. attention:: This UUID representation should be used by new applications
that have never stored UUIDs in MongoDB.
The :data:`~bson.binary.UuidRepresentation.STANDARD` representation
enables cross-language compatibility by ensuring the same byte-ordering
when encoding UUIDs from all drivers. UUIDs written by a driver with this
representation configured will be handled correctly by every other provided
it is also configured with the ``STANDARD`` representation.
``STANDARD`` encodes native :class:`uuid.UUID` objects to
:class:`~bson.binary.Binary` subtype 4 objects.
.. _unspecified-representation-details:
``UNSPECIFIED``
^^^^^^^^^^^^^^^
.. attention:: Starting in PyMongo 4.0,
:data:`~bson.binary.UuidRepresentation.UNSPECIFIED` will be the default
:data:`~bson.binary.UuidRepresentation.UNSPECIFIED` is the default
UUID representation used by PyMongo.
The :data:`~bson.binary.UuidRepresentation.UNSPECIFIED` representation
prevents the incorrect interpretation of UUID bytes by stopping short of
automatically converting UUID fields in BSON to native UUID types. Loading
automatically converting UUID fields in BSON to native UUID types. Decoding
a UUID when using this representation returns a :class:`~bson.binary.Binary`
object instead. If required, users can coerce the decoded
:class:`~bson.binary.Binary` objects into native UUIDs using the
@ -513,5 +377,135 @@ Instead, applications using :data:`~bson.binary.UuidRepresentation.UNSPECIFIED`
must explicitly coerce a native UUID using the
:meth:`~bson.binary.Binary.from_uuid` method::
explicit_binary = Binary.from_uuid(uuid4(), UuidRepresentation.PYTHON_LEGACY)
explicit_binary = Binary.from_uuid(uuid4(), UuidRepresentation.STANDARD)
unspec_collection.insert_one({'_id': 'bar', 'uuid': explicit_binary})
.. _standard-representation-details:
``STANDARD``
^^^^^^^^^^^^
.. attention:: This UUID representation should be used by new applications or
applications that are encoding and/or decoding UUIDs in MongoDB for the
first time.
The :data:`~bson.binary.UuidRepresentation.STANDARD` representation
enables cross-language compatibility by ensuring the same byte-ordering
when encoding UUIDs from all drivers. UUIDs written by a driver with this
representation configured will be handled correctly by every other provided
it is also configured with the ``STANDARD`` representation.
``STANDARD`` encodes native :class:`uuid.UUID` objects to
:class:`~bson.binary.Binary` subtype 4 objects.
.. _python-legacy-representation-details:
``PYTHON_LEGACY``
^^^^^^^^^^^^^^^^^
.. attention:: This uuid representation should be used when reading UUIDs
generated by existing applications that use the Python driver
but **don't** explicitly set a UUID representation.
.. attention:: :data:`~bson.binary.UuidRepresentation.PYTHON_LEGACY`
was the default uuid representation in PyMongo 3.
The :data:`~bson.binary.UuidRepresentation.PYTHON_LEGACY` representation
corresponds to the legacy representation of UUIDs used by PyMongo. This
representation conforms with
`RFC 4122 Section 4.1.2 <https://tools.ietf.org/html/rfc4122#section-4.1.2>`_.
The following example illustrates the use of this representation::
from bson.codec_options import CodecOptions, DEFAULT_CODEC_OPTIONS
from bson.binary import Binary, UuidRepresentation
# No configured UUID representation
collection = client.python_legacy.get_collection('test', codec_options=DEFAULT_CODEC_OPTIONS)
# Using UuidRepresentation.PYTHON_LEGACY
pylegacy_opts = CodecOptions(uuid_representation=UuidRepresentation.PYTHON_LEGACY)
pylegacy_collection = client.python_legacy.get_collection('test', codec_options=pylegacy_opts)
# UUIDs written by PyMongo 3 with no UuidRepresentation configured
# (or PyMongo 4.0 with PYTHON_LEGACY) can be queried using PYTHON_LEGACY
uuid_1 = uuid4()
pylegacy_collection.insert_one({'uuid': uuid_1})
document = pylegacy_collection.find_one({'uuid': uuid_1})
``PYTHON_LEGACY`` encodes native :class:`uuid.UUID` objects to
:class:`~bson.binary.Binary` subtype 3 objects, preserving the same
byte-order as :attr:`~uuid.UUID.bytes`::
from bson.binary import Binary
document = collection.find_one({'uuid': Binary(uuid_2.bytes, subtype=3)})
assert document['uuid'] == uuid_2
.. _java-legacy-representation-details:
``JAVA_LEGACY``
^^^^^^^^^^^^^^^
.. attention:: This UUID representation should be used when reading UUIDs
written to MongoDB by the legacy applications (i.e. applications that don't
use the ``STANDARD`` representation) using the Java driver.
The :data:`~bson.binary.UuidRepresentation.JAVA_LEGACY` representation
corresponds to the legacy representation of UUIDs used by the MongoDB Java
Driver.
.. note:: The ``JAVA_LEGACY`` representation reverses the order of bytes 0-7,
and bytes 8-15.
As an example, consider the same UUID described in :ref:`example-legacy-uuid`.
Let us assume that an application used the Java driver without an explicitly
specified UUID representation to insert the example UUID
``00112233-4455-6677-8899-aabbccddeeff`` into MongoDB. If we try to read this
value using ``PYTHON_LEGACY``, we end up with an entirely different UUID::
UUID('77665544-3322-1100-ffee-ddccbbaa9988')
However, if we explicitly set the representation to
:data:`~bson.binary.UuidRepresentation.JAVA_LEGACY`, we get the correct result::
UUID('00112233-4455-6677-8899-aabbccddeeff')
PyMongo uses the specified UUID representation to reorder the BSON bytes and
load them correctly. ``JAVA_LEGACY`` encodes native :class:`uuid.UUID` objects
to :class:`~bson.binary.Binary` subtype 3 objects, while performing the same
byte-reordering as the legacy Java driver's UUID to BSON encoder.
.. _csharp-legacy-representation-details:
``CSHARP_LEGACY``
^^^^^^^^^^^^^^^^^
.. attention:: This UUID representation should be used when reading UUIDs
written to MongoDB by the legacy applications (i.e. applications that don't
use the ``STANDARD`` representation) using the C# driver.
The :data:`~bson.binary.UuidRepresentation.CSHARP_LEGACY` representation
corresponds to the legacy representation of UUIDs used by the MongoDB Java
Driver.
.. note:: The ``CSHARP_LEGACY`` representation reverses the order of bytes 0-3,
bytes 4-5, and bytes 6-7.
As an example, consider the same UUID described in :ref:`example-legacy-uuid`.
Let us assume that an application used the C# driver without an explicitly
specified UUID representation to insert the example UUID
``00112233-4455-6677-8899-aabbccddeeff`` into MongoDB. If we try to read this
value using PYTHON_LEGACY, we end up with an entirely different UUID::
UUID('33221100-5544-7766-8899-aabbccddeeff')
However, if we explicitly set the representation to
:data:`~bson.binary.UuidRepresentation.CSHARP_LEGACY`, we get the correct result::
UUID('00112233-4455-6677-8899-aabbccddeeff')
PyMongo uses the specified UUID representation to reorder the BSON bytes and
load them correctly. ``CSHARP_LEGACY`` encodes native :class:`uuid.UUID`
objects to :class:`~bson.binary.Binary` subtype 3 objects, while performing
the same byte-reordering as the legacy C# driver's UUID to BSON encoder.

View File

@ -266,7 +266,7 @@ collection, configured to use :class:`~bson.son.SON` instead of dict:
>>> opts
CodecOptions(document_class=<class 'bson.son.SON'>,
tz_aware=False,
uuid_representation=UuidRepresentation.PYTHON_LEGACY,
uuid_representation=UuidRepresentation.UNSPECIFIED,
unicode_decode_error_handler='strict',
tzinfo=None, type_registry=TypeRegistry(type_codecs=[],
fallback_encoder=None))

View File

@ -774,3 +774,14 @@ subdocument containing a ``$ref`` field would be decoded as a
:class:`~bson.dbref.DBRef`.
.. _DBRef specification: https://github.com/mongodb/specifications/blob/5a8c8d7/source/dbref.rst
Encoding a UUID raises an error by default
..........................................
The default uuid_representation for :class:`~bson.codec_options.CodecOptions`,
:class:`~bson.json_util.JSONOptions`, and
:class:`~pymongo.mongo_client.MongoClient` has been changed from
:data:`bson.binary.UuidRepresentation.PYTHON_LEGACY` to
:data:`bson.binary.UuidRepresentation.UNSPECIFIED`. Attempting to encode a
:class:`uuid.UUID` instance to BSON or JSON now produces an error by default.
See :ref:`handling-uuid-data-example` for details.

View File

@ -325,8 +325,8 @@ class MongoClient(common.BaseObject):
speed. 9 is best compression. Defaults to -1.
- `uuidRepresentation`: The BSON representation to use when encoding
from and decoding to instances of :class:`~uuid.UUID`. Valid
values are `pythonLegacy` (the default), `javaLegacy`,
`csharpLegacy`, `standard` and `unspecified`. New applications
values are `pythonLegacy`, `javaLegacy`, `csharpLegacy`, `standard`
and `unspecified` (the default). New applications
should consider setting this to `standard` for cross language
compatibility. See :ref:`handling-uuid-data-example` for details.
- `unicode_decode_error_handler`: The error handler to apply when
@ -501,6 +501,8 @@ class MongoClient(common.BaseObject):
.. versionchanged:: 4.0
Removed the ``waitQueueMultiple`` and ``socketKeepAlive`` keyword
arguments.
The default for `uuidRepresentation` was changed from
``pythonLegacy`` to ``unspecified``.
.. versionchanged:: 3.12
Added the ``server_api`` keyword argument.

View File

@ -145,15 +145,18 @@ class TestBinary(unittest.TestCase):
self.assertEqual(hash(Binary(b"hello world", 42)), hash(two))
def test_uuid_subtype_4(self):
"""uuid_representation should be ignored when decoding subtype 4 for
all UuidRepresentation values except UNSPECIFIED."""
"""Only STANDARD should decode subtype 4 as native uuid."""
expected_uuid = uuid.uuid4()
doc = {"uuid": Binary(expected_uuid.bytes, 4)}
expected_bin = Binary(expected_uuid.bytes, 4)
doc = {"uuid": expected_bin}
encoded = encode(doc)
for uuid_representation in (set(ALL_UUID_REPRESENTATIONS) -
{UuidRepresentation.UNSPECIFIED}):
options = CodecOptions(uuid_representation=uuid_representation)
self.assertEqual(expected_uuid, decode(encoded, options)["uuid"])
for uuid_rep in (UuidRepresentation.PYTHON_LEGACY,
UuidRepresentation.JAVA_LEGACY,
UuidRepresentation.CSHARP_LEGACY):
opts = CodecOptions(uuid_representation=uuid_rep)
self.assertEqual(expected_bin, decode(encoded, opts)["uuid"])
opts = CodecOptions(uuid_representation=UuidRepresentation.STANDARD)
self.assertEqual(expected_uuid, decode(encoded, opts)["uuid"])
def test_legacy_java_uuid(self):
# Test decoding
@ -522,29 +525,25 @@ class TestUuidSpecImplicitCoding(IntegrationTest):
# Implicit decoding prose test #1
def test_decoding_1(self):
# TODO: these assertions will change after PYTHON-2245. Specifically,
# the 'standard' field will be decoded as a Binary subtype 4.
binary_value = Binary.from_uuid(
self.uuid, UuidRepresentation.PYTHON_LEGACY)
standard_binary = Binary.from_uuid(
self.uuid, UuidRepresentation.STANDARD)
self._test_decoding(
"javaLegacy", UuidRepresentation.JAVA_LEGACY,
self.uuid, self.uuid)
standard_binary, self.uuid)
self._test_decoding(
"csharpLegacy", UuidRepresentation.CSHARP_LEGACY,
self.uuid, self.uuid)
standard_binary, self.uuid)
self._test_decoding(
"pythonLegacy", UuidRepresentation.PYTHON_LEGACY,
self.uuid, self.uuid)
standard_binary, self.uuid)
# Implicit decoding pose test #2
def test_decoding_2(self):
# TODO: these assertions will change after PYTHON-2245. Specifically,
# the 'legacy' field will be decoded as a Binary subtype 3.
binary_value = Binary.from_uuid(
legacy_binary = Binary.from_uuid(
self.uuid, UuidRepresentation.PYTHON_LEGACY)
self._test_decoding(
"standard", UuidRepresentation.PYTHON_LEGACY,
self.uuid, binary_value.as_uuid(UuidRepresentation.PYTHON_LEGACY))
self.uuid, legacy_binary)
# Implicit decoding pose test #3
def test_decoding_3(self):

View File

@ -638,8 +638,13 @@ class TestBSON(unittest.TestCase):
def test_uuid(self):
id = uuid.uuid4()
transformed_id = decode(encode({"id": id}))["id"]
# The default uuid_representation is UNSPECIFIED
with self.assertRaisesRegex(ValueError, 'cannot encode native uuid'):
bson.decode_all(encode({'uuid': id}))
opts = CodecOptions(uuid_representation=UuidRepresentation.STANDARD)
transformed_id = decode(encode({"id": id}, codec_options=opts),
codec_options=opts)["id"]
self.assertTrue(isinstance(transformed_id, uuid.UUID))
self.assertEqual(id, transformed_id)
self.assertNotEqual(uuid.uuid4(), transformed_id)
@ -648,8 +653,9 @@ class TestBSON(unittest.TestCase):
id = uuid.uuid4()
legacy = Binary.from_uuid(id, UuidRepresentation.PYTHON_LEGACY)
self.assertEqual(3, legacy.subtype)
transformed = decode(encode({"uuid": legacy}))["uuid"]
self.assertTrue(isinstance(transformed, uuid.UUID))
bin = decode(encode({"uuid": legacy}))["uuid"]
self.assertTrue(isinstance(bin, Binary))
transformed = bin.as_uuid(UuidRepresentation.PYTHON_LEGACY)
self.assertEqual(id, transformed)
# The C extension was segfaulting on unicode RegExs, so we have this test
@ -965,7 +971,7 @@ class TestCodecOptions(unittest.TestCase):
def test_codec_options_repr(self):
r = ("CodecOptions(document_class=dict, tz_aware=False, "
"uuid_representation=UuidRepresentation.PYTHON_LEGACY, "
"uuid_representation=UuidRepresentation.UNSPECIFIED, "
"unicode_decode_error_handler='strict', "
"tzinfo=None, type_registry=TypeRegistry(type_codecs=[], "
"fallback_encoder=None))")
@ -973,17 +979,16 @@ class TestCodecOptions(unittest.TestCase):
def test_decode_all_defaults(self):
# Test decode_all()'s default document_class is dict and tz_aware is
# False. The default uuid_representation is PYTHON_LEGACY but this
# decodes same as STANDARD, so all this test proves about UUID decoding
# is that it's not CSHARP_LEGACY or JAVA_LEGACY.
# False.
doc = {'sub_document': {},
'uuid': uuid.uuid4(),
'dt': datetime.datetime.utcnow()}
decoded = bson.decode_all(bson.encode(doc))[0]
self.assertIsInstance(decoded['sub_document'], dict)
self.assertEqual(decoded['uuid'], doc['uuid'])
self.assertIsNone(decoded['dt'].tzinfo)
# The default uuid_representation is UNSPECIFIED
with self.assertRaisesRegex(ValueError, 'cannot encode native uuid'):
bson.decode_all(bson.encode({'uuid': uuid.uuid4()}))
def test_unicode_decode_error_handler(self):
enc = encode({"keystr": "foobar"})

View File

@ -896,7 +896,8 @@ class TestDatabaseChangeStream(TestChangeStreamBase, APITestsMixin):
with self.change_stream() as change_stream:
for collname in collnames:
self._insert_and_check(
change_stream, collname, {'_id': uuid.uuid4()})
change_stream, collname,
{'_id': Binary.from_uuid(uuid.uuid4())})
def test_isolation(self):
# Ensure inserts to other dbs don't show up in our ChangeStream.
@ -905,9 +906,11 @@ class TestDatabaseChangeStream(TestChangeStreamBase, APITestsMixin):
other_db, self.db, msg="Isolation must be tested on separate DBs")
collname = self.id()
with self.change_stream() as change_stream:
other_db[collname].insert_one({'_id': uuid.uuid4()})
other_db[collname].insert_one(
{'_id': Binary.from_uuid(uuid.uuid4())})
self._insert_and_check(
change_stream, collname, {'_id': uuid.uuid4()})
change_stream, collname,
{'_id': Binary.from_uuid(uuid.uuid4())})
self.client.drop_database(other_db)

View File

@ -19,7 +19,7 @@ import uuid
sys.path[0:0] = [""]
from bson.binary import Binary, PYTHON_LEGACY, STANDARD
from bson.binary import Binary, PYTHON_LEGACY, STANDARD, UuidRepresentation
from bson.codec_options import CodecOptions
from bson.objectid import ObjectId
from pymongo.errors import OperationFailure
@ -40,12 +40,15 @@ class TestCommon(IntegrationTest):
coll.drop()
# Test property
self.assertEqual(PYTHON_LEGACY,
self.assertEqual(UuidRepresentation.UNSPECIFIED,
coll.codec_options.uuid_representation)
# Test basic query
uu = uuid.uuid4()
# Insert as binary subtype 3
coll = self.db.get_collection(
"uuid", CodecOptions(uuid_representation=PYTHON_LEGACY))
legacy_opts = coll.codec_options
coll.insert_one({'uu': uu})
self.assertEqual(uu, coll.find_one({'uu': uu})['uu'])
coll = self.db.get_collection(
@ -53,7 +56,7 @@ class TestCommon(IntegrationTest):
self.assertEqual(STANDARD, coll.codec_options.uuid_representation)
self.assertEqual(None, coll.find_one({'uu': uu}))
uul = Binary.from_uuid(uu, PYTHON_LEGACY)
self.assertEqual(uu, coll.find_one({'uu': uul})['uu'])
self.assertEqual(uul, coll.find_one({'uu': uul})['uu'])
# Test count_documents
self.assertEqual(0, coll.count_documents({'uu': uu}))
@ -98,9 +101,10 @@ class TestCommon(IntegrationTest):
self.assertEqual(5, coll.find_one({'_id': uu})['i'])
# Test command
self.assertEqual(5, self.db.command('findAndModify', 'uuid',
update={'$set': {'i': 6}},
query={'_id': uu})['value']['i'])
self.assertEqual(5, self.db.command(
'findAndModify', 'uuid',
update={'$set': {'i': 6}},
query={'_id': uu}, codec_options=legacy_opts)['value']['i'])
self.assertEqual(6, self.db.command(
'findAndModify', 'uuid',
update={'$set': {'i': 7}},

View File

@ -298,9 +298,22 @@ class TestJsonUtil(unittest.TestCase):
self.assertEqual(dct, rtdct)
self.assertEqual('{"ts": {"$timestamp": {"t": 4, "i": 13}}}', res)
def test_uuid_default(self):
# Cannot directly encode native UUIDs with the default
# uuid_representation.
doc = {'uuid': uuid.UUID('f47ac10b-58cc-4372-a567-0e02b2c3d479')}
with self.assertRaisesRegex(ValueError, 'cannot encode native uuid'):
json_util.dumps(doc)
legacy_jsn = '{"uuid": {"$uuid": "f47ac10b58cc4372a5670e02b2c3d479"}}'
expected = {'uuid': Binary(
b'\xf4z\xc1\x0bX\xccCr\xa5g\x0e\x02\xb2\xc3\xd4y', 4)}
self.assertEqual(json_util.loads(legacy_jsn), expected)
def test_uuid(self):
doc = {'uuid': uuid.UUID('f47ac10b-58cc-4372-a567-0e02b2c3d479')}
self.round_trip(doc)
uuid_legacy_opts = LEGACY_JSON_OPTIONS.with_options(
uuid_representation=UuidRepresentation.PYTHON_LEGACY)
self.round_trip(doc, json_options=uuid_legacy_opts)
self.assertEqual(
'{"uuid": {"$uuid": "f47ac10b58cc4372a5670e02b2c3d479"}}',
json_util.dumps(doc, json_options=LEGACY_JSON_OPTIONS))
@ -308,7 +321,8 @@ class TestJsonUtil(unittest.TestCase):
'{"uuid": '
'{"$binary": "9HrBC1jMQ3KlZw4CssPUeQ==", "$type": "03"}}',
json_util.dumps(
doc, json_options=STRICT_JSON_OPTIONS))
doc, json_options=STRICT_JSON_OPTIONS.with_options(
uuid_representation=UuidRepresentation.PYTHON_LEGACY)))
self.assertEqual(
'{"uuid": '
'{"$binary": "9HrBC1jMQ3KlZw4CssPUeQ==", "$type": "04"}}',
@ -319,7 +333,8 @@ class TestJsonUtil(unittest.TestCase):
self.assertEqual(
doc, json_util.loads(
'{"uuid": '
'{"$binary": "9HrBC1jMQ3KlZw4CssPUeQ==", "$type": "03"}}'))
'{"$binary": "9HrBC1jMQ3KlZw4CssPUeQ==", "$type": "03"}}',
json_options=uuid_legacy_opts))
for uuid_representation in (set(ALL_UUID_REPRESENTATIONS) -
{UuidRepresentation.UNSPECIFIED}):
options = JSONOptions(

View File

@ -19,7 +19,7 @@ import uuid
sys.path[0:0] = [""]
from bson import decode, encode
from bson.binary import Binary, JAVA_LEGACY
from bson.binary import Binary, JAVA_LEGACY, UuidRepresentation
from bson.codec_options import CodecOptions
from bson.errors import InvalidBSON
from bson.raw_bson import RawBSONDocument, DEFAULT_RAW_BSON_OPTIONS
@ -92,7 +92,12 @@ class TestRawBSONDocument(IntegrationTest):
'bin3': Binary(uid.bytes, 3)}
raw = RawBSONDocument(encode(doc))
coll.insert_one(raw)
self.assertEqual(coll.find_one(), {'_id': 1, 'bin4': uid, 'bin3': uid})
self.assertEqual(coll.find_one(), doc)
uuid_coll = coll.with_options(
codec_options=coll.codec_options.with_options(
uuid_representation=UuidRepresentation.STANDARD))
self.assertEqual(uuid_coll.find_one(),
{'_id': 1, 'bin4': uid, 'bin3': Binary(uid.bytes, 3)})
# Test that the raw bytes haven't changed.
raw_coll = coll.with_options(codec_options=DEFAULT_RAW_BSON_OPTIONS)
@ -185,3 +190,7 @@ class TestRawBSONDocument(IntegrationTest):
for rkey, elt in zip(rawdoc, keyvaluepairs):
self.assertEqual(rkey, elt[0])
if __name__ == "__main__":
unittest.main()