PYTHON-2252 Add examples and documentation for new UUID behavior (#467)
This commit is contained in:
parent
de1e29305c
commit
ff327b3e31
@ -69,6 +69,8 @@ class UuidRepresentation:
|
||||
code. When decoding a BSON binary field with a UUID subtype, a
|
||||
:class:`~bson.binary.Binary` instance will be returned instead of a
|
||||
:class:`uuid.UUID` instance.
|
||||
|
||||
See :ref:`unspecified-representation-details` for details.
|
||||
|
||||
.. versionadded:: 3.11
|
||||
"""
|
||||
@ -79,6 +81,8 @@ class UuidRepresentation:
|
||||
:class:`uuid.UUID` instances will automatically be encoded to
|
||||
and decoded from BSON binary, using RFC-4122 byte order with
|
||||
binary subtype :data:`UUID_SUBTYPE`.
|
||||
|
||||
See :ref:`standard-representation-details` for details.
|
||||
|
||||
.. versionadded:: 3.11
|
||||
"""
|
||||
@ -89,6 +93,8 @@ class UuidRepresentation:
|
||||
:class:`uuid.UUID` instances will automatically be encoded to
|
||||
and decoded from BSON binary, using RFC-4122 byte order with
|
||||
binary subtype :data:`OLD_UUID_SUBTYPE`.
|
||||
|
||||
See :ref:`python-legacy-representation-details` for details.
|
||||
|
||||
.. versionadded:: 3.11
|
||||
"""
|
||||
@ -99,6 +105,8 @@ class UuidRepresentation:
|
||||
:class:`uuid.UUID` instances will automatically be encoded to
|
||||
and decoded from BSON binary subtype :data:`OLD_UUID_SUBTYPE`,
|
||||
using the Java driver's legacy byte order.
|
||||
|
||||
See :ref:`java-legacy-representation-details` for details.
|
||||
|
||||
.. versionadded:: 3.11
|
||||
"""
|
||||
@ -109,6 +117,8 @@ class UuidRepresentation:
|
||||
:class:`uuid.UUID` instances will automatically be encoded to
|
||||
and decoded from BSON binary subtype :data:`OLD_UUID_SUBTYPE`,
|
||||
using the C# driver's legacy byte order.
|
||||
|
||||
See :ref:`csharp-legacy-representation-details` for details.
|
||||
|
||||
.. versionadded:: 3.11
|
||||
"""
|
||||
@ -220,6 +230,7 @@ class Binary(bytes):
|
||||
- `uuid_representation`: A member of
|
||||
:class:`~bson.binary.UuidRepresentation`. Default:
|
||||
:const:`~bson.binary.UuidRepresentation.STANDARD`.
|
||||
See :ref:`handling-uuid-data-example` for details.
|
||||
|
||||
.. versionadded:: 3.11
|
||||
"""
|
||||
@ -236,7 +247,8 @@ class Binary(bytes):
|
||||
"UuidRepresentation.UNSPECIFIED. UUIDs can be manually "
|
||||
"converted to bson.Binary instances using "
|
||||
"bson.Binary.from_uuid() or a different UuidRepresentation "
|
||||
"can be configured.")
|
||||
"can be configured. See the documentation for "
|
||||
"UuidRepresentation for more information.")
|
||||
|
||||
subtype = OLD_UUID_SUBTYPE
|
||||
if uuid_representation == UuidRepresentation.PYTHON_LEGACY:
|
||||
@ -266,6 +278,7 @@ class Binary(bytes):
|
||||
- `uuid_representation`: A member of
|
||||
:class:`~bson.binary.UuidRepresentation`. Default:
|
||||
:const:`~bson.binary.UuidRepresentation.STANDARD`.
|
||||
See :ref:`handling-uuid-data-example` for details.
|
||||
|
||||
.. versionadded:: 3.11
|
||||
"""
|
||||
|
||||
@ -218,7 +218,10 @@ class CodecOptions(_options_base):
|
||||
naive. Defaults to ``False``.
|
||||
- `uuid_representation`: The BSON representation to use when encoding
|
||||
and decoding instances of :class:`~uuid.UUID`. Defaults to
|
||||
:data:`~bson.binary.PYTHON_LEGACY`.
|
||||
:data:`~bson.binary.UuidRepresentation.PYTHON_LEGACY`. New
|
||||
applications should consider setting this to
|
||||
:data:`~bson.binary.UuidRepresentation.STANDARD` for cross language
|
||||
compatibility. See :ref:`handling-uuid-data-example` for details.
|
||||
- `unicode_decode_error_handler`: The error handler to apply when
|
||||
a Unicode-related error occurs during BSON decoding that would
|
||||
otherwise raise :exc:`UnicodeDecodeError`. Valid options include
|
||||
|
||||
@ -32,3 +32,4 @@ MongoDB, you can start it like so:
|
||||
tailable
|
||||
tls
|
||||
encryption
|
||||
uuid
|
||||
|
||||
509
doc/examples/uuid.rst
Normal file
509
doc/examples/uuid.rst
Normal file
@ -0,0 +1,509 @@
|
||||
.. _handling-uuid-data-example:
|
||||
|
||||
Handling UUID Data
|
||||
==================
|
||||
|
||||
PyMongo ships with built-in support for dealing with UUID types.
|
||||
It is straightforward to store native :class:`uuid.UUID` objects
|
||||
to MongoDB and retrieve them as native :class:`uuid.UUID` objects::
|
||||
|
||||
from pymongo import MongoClient
|
||||
from bson.binary import UuidRepresentation
|
||||
from uuid import uuid4
|
||||
|
||||
# use the 'standard' representation for cross-language compatibility.
|
||||
client = MongoClient(uuid_representation=UuidRepresentation.STANDARD)
|
||||
collection = client.get_database('uuid_db').get_collection('uuid_coll')
|
||||
|
||||
# remove all documents from collection
|
||||
collection.delete_many({})
|
||||
|
||||
# create a native uuid object
|
||||
uuid_obj = uuid4()
|
||||
|
||||
# save the native uuid object to MongoDB
|
||||
collection.insert_one({'uuid': uuid_obj})
|
||||
|
||||
# retrieve the stored uuid object from MongoDB
|
||||
document = collection.find_one({})
|
||||
|
||||
# check that the retrieved UUID matches the inserted UUID
|
||||
assert document['uuid'] == uuid_obj
|
||||
|
||||
Native :class:`uuid.UUID` objects can also be used as part of MongoDB
|
||||
queries::
|
||||
|
||||
document = collection.find({'uuid': uuid_obj})
|
||||
assert document['uuid'] == uuid_obj
|
||||
|
||||
The above examples illustrate the simplest of use-cases - one where the
|
||||
UUID is generated by, and used in the same application. However,
|
||||
the situation can be significantly more complex when dealing with a MongoDB
|
||||
deployment that contains UUIDs created by other drivers as the Java and CSharp
|
||||
drivers have historically encoded UUIDs using a byte-order that is different
|
||||
from the one used by PyMongo. Applications that require interoperability across
|
||||
these drivers must specify the appropriate
|
||||
:class:`~bson.binary.UuidRepresentation`.
|
||||
|
||||
In the following sections, we describe how drivers have historically differed
|
||||
in their encoding of UUIDs, and how applications can use the
|
||||
:class:`~bson.binary.UuidRepresentation` configuration option to maintain
|
||||
cross-language compatibility.
|
||||
|
||||
.. attention:: New applications that do not share a MongoDB deployment with
|
||||
any other application and that have never stored UUIDs in MongoDB
|
||||
should use the ``standard`` UUID representation for cross-language
|
||||
compatibility. See :ref:`configuring-uuid-representation` for details
|
||||
on how to configure the :class:`~bson.binary.UuidRepresentation`.
|
||||
|
||||
.. _example-legacy-uuid:
|
||||
|
||||
Legacy Handling of UUID Data
|
||||
----------------------------
|
||||
|
||||
Historically, MongoDB Drivers have used different byte-ordering
|
||||
while serializing UUID types to :class:`~bson.binary.Binary`.
|
||||
Consider, for instance, a UUID with the following canonical textual
|
||||
representation::
|
||||
|
||||
00112233-4455-6677-8899-aabbccddeeff
|
||||
|
||||
This UUID would historically be serialized by the Python driver as::
|
||||
|
||||
00112233-4455-6677-8899-aabbccddeeff
|
||||
|
||||
The same UUID would historically be serialized by the C# driver as::
|
||||
|
||||
33221100-5544-7766-8899-aabbccddeeff
|
||||
|
||||
Finally, the same UUID would historically be serialized by the Java driver as::
|
||||
|
||||
77665544-3322-1100-ffee-ddccbbaa9988
|
||||
|
||||
.. note:: For in-depth information about the the byte-order historically
|
||||
used by different drivers, see the `Handling of Native UUID Types
|
||||
Specification
|
||||
<https://github.com/mongodb/specifications/blob/master/source/uuid.rst>`_.
|
||||
|
||||
This difference in the byte-order of UUIDs encoded by different drivers can
|
||||
result in highly unintuitive behavior in some scenarios. We detail two such
|
||||
scenarios in the next sections.
|
||||
|
||||
Scenario 1: Applications Share a MongoDB Deployment
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Consider the following situation:
|
||||
|
||||
* Application ``C`` written in C# generates a UUID and uses it as the ``_id``
|
||||
of a document that it proceeds to insert into the ``uuid_test`` collection of
|
||||
the ``example_db`` database. Let's assume that the canonical textual
|
||||
representation of the generated UUID is::
|
||||
|
||||
00112233-4455-6677-8899-aabbccddeeff
|
||||
|
||||
* Application ``P`` written in Python attempts to ``find`` the document
|
||||
written by application ``C`` in the following manner::
|
||||
|
||||
from uuid import UUID
|
||||
collection = client.example_db.uuid_test
|
||||
result = collection.find_one({'_id': UUID('00112233-4455-6677-8899-aabbccddeeff')})
|
||||
|
||||
In this instance, ``result`` will never be the document that
|
||||
was inserted by application ``C`` in the previous step. This is because of
|
||||
the different byte-order used by the C# driver for representing UUIDs as
|
||||
BSON Binary. The following query, on the other hand, will successfully find
|
||||
this document::
|
||||
|
||||
result = collection.find_one({'_id': UUID('33221100-5544-7766-8899-aabbccddeeff')})
|
||||
|
||||
This example demonstrates how the differing byte-order used by different
|
||||
drivers can hamper interoperability. To workaround this problem, users should
|
||||
configure their ``MongoClient`` with the appropriate
|
||||
:class:`~bson.binary.UuidRepresentation` (in this case, ``client`` in application
|
||||
``P`` can be configured to use the
|
||||
:data:`~bson.binary.UuidRepresentation.CSHARP_LEGACY` representation to
|
||||
avoid the unintuitive behavior) as described in
|
||||
:ref:`configuring-uuid-representation`.
|
||||
|
||||
Scenario 2: Round-Tripping UUIDs
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
In the following examples, we see how using a misconfigured
|
||||
:class:`~bson.binary.UuidRepresentation` can cause an application
|
||||
to inadvertently change the :class:`~bson.binary.Binary` subtype, and in some
|
||||
cases, the bytes of the :class:`~bson.binary.Binary` field itself when
|
||||
round-tripping documents containing UUIDs.
|
||||
|
||||
Consider the following situation::
|
||||
|
||||
from bson.codec_options import CodecOptions, DEFAULT_CODEC_OPTIONS
|
||||
from bson.binary import Binary, UuidRepresentation
|
||||
from uuid import uuid4
|
||||
|
||||
# Using UuidRepresentation.PYTHON_LEGACY stores a Binary subtype-3 UUID
|
||||
python_opts = CodecOptions(uuid_representation=UuidRepresentation.PYTHON_LEGACY)
|
||||
input_uuid = uuid4()
|
||||
collection = client.testdb.get_collection('test', codec_options=python_opts)
|
||||
collection.insert_one({'_id': 'foo', 'uuid': input_uuid})
|
||||
assert collection.find_one({'uuid': Binary(input_uuid.bytes, 3)})['_id'] == 'foo'
|
||||
|
||||
# Retrieving this document using UuidRepresentation.STANDARD returns a native UUID
|
||||
std_opts = CodecOptions(uuid_representation=UuidRepresentation.STANDARD)
|
||||
std_collection = client.testdb.get_collection('test', codec_options=std_opts)
|
||||
doc = std_collection.find_one({'_id': 'foo'})
|
||||
assert doc['uuid'] == input_uuid
|
||||
|
||||
# Round-tripping the retrieved document silently changes the Binary subtype to 4
|
||||
std_collection.replace_one({'_id': 'foo'}, doc)
|
||||
assert collection.find_one({'uuid': Binary(input_uuid.bytes, 3)}) is None
|
||||
round_tripped_doc = collection.find_one({'uuid': Binary(input_uuid.bytes, 4)})
|
||||
assert doc == round_tripped_doc
|
||||
|
||||
|
||||
In this example, round-tripping the document using the incorrect
|
||||
:class:`~bson.binary.UuidRepresentation` (``STANDARD`` instead of
|
||||
``PYTHON_LEGACY``) changes the :class:`~bson.binary.Binary` subtype as a
|
||||
side-effect. **Note that this can also happen when the situation is reversed -
|
||||
i.e. when the original document is written using ``STANDARD`` representation
|
||||
and then round-tripped using the ``PYTHON_LEGACY`` representation.**
|
||||
|
||||
In the next example, we see the consequences of incorrectly using a
|
||||
representation that modifies byte-order (``CSHARP_LEGACY`` or ``JAVA_LEGACY``)
|
||||
when round-tripping documents::
|
||||
|
||||
from bson.codec_options import CodecOptions, DEFAULT_CODEC_OPTIONS
|
||||
from bson.binary import Binary, UuidRepresentation
|
||||
from uuid import uuid4
|
||||
|
||||
# Using UuidRepresentation.STANDARD stores a Binary subtype-4 UUID
|
||||
std_opts = CodecOptions(uuid_representation=UuidRepresentation.STANDARD)
|
||||
input_uuid = uuid4()
|
||||
collection = client.testdb.get_collection('test', codec_options=std_opts)
|
||||
collection.insert_one({'_id': 'baz', 'uuid': input_uuid})
|
||||
assert collection.find_one({'uuid': Binary(input_uuid.bytes, 4)})['_id'] == 'baz'
|
||||
|
||||
# Retrieving this document using UuidRepresentation.JAVA_LEGACY returns a native UUID
|
||||
# without modifying the UUID byte-order
|
||||
java_opts = CodecOptions(uuid_representation=UuidRepresentation.JAVA_LEGACY)
|
||||
java_collection = client.testdb.get_collection('test', codec_options=java_opts)
|
||||
doc = java_collection.find_one({'_id': 'baz'})
|
||||
assert doc['uuid'] == input_uuid
|
||||
|
||||
# Round-tripping the retrieved document silently changes the Binary bytes and subtype
|
||||
java_collection.replace_one({'_id': 'baz'}, doc)
|
||||
assert collection.find_one({'uuid': Binary(input_uuid.bytes, 3)}) is None
|
||||
assert collection.find_one({'uuid': Binary(input_uuid.bytes, 4)}) is None
|
||||
round_tripped_doc = collection.find_one({'_id': 'baz'})
|
||||
assert round_tripped_doc['uuid'] == Binary(input_uuid.bytes, 3).as_uuid(UuidRepresentation.JAVA_LEGACY)
|
||||
|
||||
|
||||
In this case, using the incorrect :class:`~bson.binary.UuidRepresentation`
|
||||
(``JAVA_LEGACY`` instead of ``STANDARD``) changes the
|
||||
:class:`~bson.binary.Binary` bytes and subtype as a side-effect.
|
||||
**Note that this happens when any representation that
|
||||
manipulates byte-order (``CSHARP_LEGACY`` or ``JAVA_LEGACY``) is incorrectly
|
||||
used to round-trip UUIDs written with ``STANDARD``. When the situation is
|
||||
reversed - i.e. when the original document is written using ``CSHARP_LEGACY``
|
||||
or ``JAVA_LEGACY`` and then round-tripped using ``STANDARD`` -
|
||||
only the :class:`~bson.binary.Binary` subtype is changed.**
|
||||
|
||||
.. note:: Starting in PyMongo 4.0, these issue will be resolved as
|
||||
the ``STANDARD`` representation will decode Binary subtype 3 fields as
|
||||
:class:`~bson.binary.Binary` objects of subtype 3 (instead of
|
||||
:class:`uuid.UUID`), and each of the ``LEGACY_*`` representations will
|
||||
decode Binary subtype 4 fields to :class:`~bson.binary.Binary` objects of
|
||||
subtype 4 (instead of :class:`uuid.UUID`).
|
||||
|
||||
.. _configuring-uuid-representation:
|
||||
|
||||
Configuring a UUID Representation
|
||||
---------------------------------
|
||||
|
||||
Users can workaround the problems described above by configuring their
|
||||
applications with the appropriate :class:`~bson.binary.UuidRepresentation`.
|
||||
Configuring the representation modifies PyMongo's behavior while
|
||||
encoding :class:`uuid.UUID` objects to BSON and decoding
|
||||
Binary subtype 3 and 4 fields from BSON.
|
||||
|
||||
Applications can set the UUID representation in one of the following ways:
|
||||
|
||||
#. At the ``MongoClient`` level using the ``uuidRepresentation`` URI option,
|
||||
e.g.::
|
||||
|
||||
client = MongoClient("mongodb://a:27107/?uuidRepresentation=javaLegacy")
|
||||
|
||||
Valid values are:
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - Value
|
||||
- UUID Representation
|
||||
|
||||
* - ``pythonLegacy``
|
||||
- :ref:`python-legacy-representation-details`
|
||||
|
||||
* - ``javaLegacy``
|
||||
- :ref:`java-legacy-representation-details`
|
||||
|
||||
* - ``csharpLegacy``
|
||||
- :ref:`csharp-legacy-representation-details`
|
||||
|
||||
* - ``standard``
|
||||
- :ref:`standard-representation-details`
|
||||
|
||||
* - ``unspecified``
|
||||
- :ref:`unspecified-representation-details`
|
||||
|
||||
#. Using the ``uuid_representation`` kwarg option, e.g.::
|
||||
|
||||
from bson.binary import UuidRepresentation
|
||||
client = MongoClient(uuid_representation=UuidRepresentation.PYTHON_LEGACY)
|
||||
|
||||
#. By supplying a suitable :class:`~bson.codec_options.CodecOptions`
|
||||
instance, e.g.::
|
||||
|
||||
from bson.codec_options import CodecOptions
|
||||
csharp_opts = CodecOptions(uuid_representation=UuidRepresentation.CSHARP_LEGACY)
|
||||
csharp_database = client.get_database('csharp_db', codec_options=csharp_opts)
|
||||
csharp_collection = client.testdb.get_collection('csharp_coll', codec_options=csharp_opts)
|
||||
|
||||
Supported UUID Representations
|
||||
------------------------------
|
||||
|
||||
.. list-table::
|
||||
:header-rows: 1
|
||||
|
||||
* - UUID Representation
|
||||
- Default?
|
||||
- Encode :class:`uuid.UUID` to
|
||||
- Decode :class:`~bson.binary.Binary` subtype 4 to
|
||||
- Decode :class:`~bson.binary.Binary` subtype 3 to
|
||||
|
||||
* - :ref:`python-legacy-representation-details`
|
||||
- Yes, in PyMongo>=2.9,<4
|
||||
- :class:`~bson.binary.Binary` subtype 3 with standard byte-order
|
||||
- :class:`uuid.UUID` in PyMongo<4; :class:`~bson.binary.Binary` subtype 4 in PyMongo>=4
|
||||
- :class:`uuid.UUID`
|
||||
|
||||
* - :ref:`java-legacy-representation-details`
|
||||
- No
|
||||
- :class:`~bson.binary.Binary` subtype 3 with Java legacy byte-order
|
||||
- :class:`uuid.UUID` in PyMongo<4; :class:`~bson.binary.Binary` subtype 4 in PyMongo>=4
|
||||
- :class:`uuid.UUID`
|
||||
|
||||
* - :ref:`csharp-legacy-representation-details`
|
||||
- No
|
||||
- :class:`~bson.binary.Binary` subtype 3 with C# legacy byte-order
|
||||
- :class:`uuid.UUID` in PyMongo<4; :class:`~bson.binary.Binary` subtype 4 in PyMongo>=4
|
||||
- :class:`uuid.UUID`
|
||||
|
||||
* - :ref:`standard-representation-details`
|
||||
- No
|
||||
- :class:`~bson.binary.Binary` subtype 4
|
||||
- :class:`uuid.UUID`
|
||||
- :class:`uuid.UUID` in PyMongo<4; :class:`~bson.binary.Binary` subtype 3 in PyMongo>=4
|
||||
|
||||
* - :ref:`unspecified-representation-details`
|
||||
- Yes, in PyMongo>=4
|
||||
- Raise :exc:`ValueError`
|
||||
- :class:`~bson.binary.Binary` subtype 4
|
||||
- :class:`uuid.UUID` in PyMongo<4; :class:`~bson.binary.Binary` subtype 3 in PyMongo>=4
|
||||
|
||||
We now detail the behavior and use-case for each supported UUID
|
||||
representation.
|
||||
|
||||
.. _python-legacy-representation-details:
|
||||
|
||||
``PYTHON_LEGACY``
|
||||
^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. attention:: This uuid representation should be used when reading UUIDs
|
||||
generated by existing applications that use the Python driver
|
||||
but **don't** explicitly set a UUID representation.
|
||||
|
||||
.. attention:: :data:`~bson.binary.UuidRepresentation.PYTHON_LEGACY`
|
||||
has been the default uuid representation since PyMongo 2.9.
|
||||
|
||||
The :data:`~bson.binary.UuidRepresentation.PYTHON_LEGACY` representation
|
||||
corresponds to the legacy representation of UUIDs used by PyMongo. This
|
||||
representation conforms with
|
||||
`RFC 4122 Section 4.1.2 <https://tools.ietf.org/html/rfc4122#section-4.1.2>`_.
|
||||
|
||||
The following example illustrates the use of this representation::
|
||||
|
||||
from bson.codec_options import CodecOptions, DEFAULT_CODEC_OPTIONS
|
||||
from bson.binary import UuidRepresentation
|
||||
|
||||
# No configured UUID representation
|
||||
collection = client.python_legacy.get_collection('test', codec_options=DEFAULT_CODEC_OPTIONS)
|
||||
|
||||
# Using UuidRepresentation.PYTHON_LEGACY
|
||||
pylegacy_opts = CodecOptions(uuid_representation=UuidRepresentation.PYTHON_LEGACY)
|
||||
pylegacy_collection = client.python_legacy.get_collection('test', codec_options=pylegacy_opts)
|
||||
|
||||
# UUIDs written by PyMongo with no UuidRepresentation configured can be queried using PYTHON_LEGACY
|
||||
uuid_1 = uuid4()
|
||||
collection.insert_one({'uuid': uuid_1})
|
||||
document = pylegacy_collection.find_one({'uuid': uuid_1})
|
||||
|
||||
# UUIDs written using PYTHON_LEGACY can be read by PyMongo with no UuidRepresentation configured
|
||||
uuid_2 = uuid4()
|
||||
pylegacy_collection.insert_one({'uuid': uuid_2})
|
||||
document = collection.find_one({'uuid': uuid_2})
|
||||
|
||||
``PYTHON_LEGACY`` encodes native :class:`uuid.UUID` objects to
|
||||
:class:`~bson.binary.Binary` subtype 3 objects, preserving the same
|
||||
byte-order as :attr:`~uuid.UUID.bytes`::
|
||||
|
||||
from bson.binary import Binary
|
||||
|
||||
document = collection.find_one({'uuid': Binary(uuid_2.bytes, subtype=3)})
|
||||
assert document['uuid'] == uuid_2
|
||||
|
||||
.. _java-legacy-representation-details:
|
||||
|
||||
``JAVA_LEGACY``
|
||||
^^^^^^^^^^^^^^^
|
||||
|
||||
.. attention:: This UUID representation should be used when reading UUIDs
|
||||
written to MongoDB by the legacy applications (i.e. applications that don't
|
||||
use the ``STANDARD`` representation) using the Java driver.
|
||||
|
||||
The :data:`~bson.binary.UuidRepresentation.JAVA_LEGACY` representation
|
||||
corresponds to the legacy representation of UUIDs used by the MongoDB Java
|
||||
Driver.
|
||||
|
||||
.. note:: The ``JAVA_LEGACY`` representation reverses the order of bytes 0-7,
|
||||
and bytes 8-15.
|
||||
|
||||
As an example, consider the same UUID described in :ref:`example-legacy-uuid`.
|
||||
Let us assume that an application used the Java driver without an explicitly
|
||||
specified UUID representation to insert the example UUID
|
||||
``00112233-4455-6677-8899-aabbccddeeff`` into MongoDB. If we try to read this
|
||||
value using PyMongo with no UUID representation specified, we end up with an
|
||||
entirely different UUID::
|
||||
|
||||
UUID('77665544-3322-1100-ffee-ddccbbaa9988')
|
||||
|
||||
However, if we explicitly set the representation to
|
||||
:data:`~bson.binary.UuidRepresentation.JAVA_LEGACY`, we get the correct result::
|
||||
|
||||
UUID('00112233-4455-6677-8899-aabbccddeeff')
|
||||
|
||||
PyMongo uses the specified UUID representation to reorder the BSON bytes and
|
||||
load them correctly. ``JAVA_LEGACY`` encodes native :class:`uuid.UUID` objects
|
||||
to :class:`~bson.binary.Binary` subtype 3 objects, while performing the same
|
||||
byte-reordering as the legacy Java driver's UUID to BSON encoder.
|
||||
|
||||
.. _csharp-legacy-representation-details:
|
||||
|
||||
``CSHARP_LEGACY``
|
||||
^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. attention:: This UUID representation should be used when reading UUIDs
|
||||
written to MongoDB by the legacy applications (i.e. applications that don't
|
||||
use the ``STANDARD`` representation) using the C# driver.
|
||||
|
||||
The :data:`~bson.binary.UuidRepresentation.CSHARP_LEGACY` representation
|
||||
corresponds to the legacy representation of UUIDs used by the MongoDB Java
|
||||
Driver.
|
||||
|
||||
.. note:: The ``CSHARP_LEGACY`` representation reverses the order of bytes 0-3,
|
||||
bytes 4-5, and bytes 6-7.
|
||||
|
||||
As an example, consider the same UUID described in :ref:`example-legacy-uuid`.
|
||||
Let us assume that an application used the C# driver without an explicitly
|
||||
specified UUID representation to insert the example UUID
|
||||
``00112233-4455-6677-8899-aabbccddeeff`` into MongoDB. If we try to read this
|
||||
value using PyMongo with no UUID representation specified, we end up with an
|
||||
entirely different UUID::
|
||||
|
||||
UUID('33221100-5544-7766-8899-aabbccddeeff')
|
||||
|
||||
However, if we explicitly set the representation to
|
||||
:data:`~bson.binary.UuidRepresentation.CSHARP_LEGACY`, we get the correct result::
|
||||
|
||||
UUID('00112233-4455-6677-8899-aabbccddeeff')
|
||||
|
||||
PyMongo uses the specified UUID representation to reorder the BSON bytes and
|
||||
load them correctly. ``CSHARP_LEGACY`` encodes native :class:`uuid.UUID`
|
||||
objects to :class:`~bson.binary.Binary` subtype 3 objects, while performing
|
||||
the same byte-reordering as the legacy C# driver's UUID to BSON encoder.
|
||||
|
||||
.. _standard-representation-details:
|
||||
|
||||
``STANDARD``
|
||||
^^^^^^^^^^^^
|
||||
|
||||
.. attention:: This UUID representation should be used by new applications
|
||||
that have never stored UUIDs in MongoDB.
|
||||
|
||||
The :data:`~bson.binary.UuidRepresentation.STANDARD` representation
|
||||
enables cross-language compatibility by ensuring the same byte-ordering
|
||||
when encoding UUIDs from all drivers. UUIDs written by a driver with this
|
||||
representation configured will be handled correctly by every other provided
|
||||
it is also configured with the ``STANDARD`` representation.
|
||||
|
||||
``STANDARD`` encodes native :class:`uuid.UUID` objects to
|
||||
:class:`~bson.binary.Binary` subtype 4 objects.
|
||||
|
||||
.. _unspecified-representation-details:
|
||||
|
||||
``UNSPECIFIED``
|
||||
^^^^^^^^^^^^^^^
|
||||
|
||||
.. attention:: Starting in PyMongo 4.0,
|
||||
:data:`~bson.binary.UuidRepresentation.UNSPECIFIED` will be the default
|
||||
UUID representation used by PyMongo.
|
||||
|
||||
The :data:`~bson.binary.UuidRepresentation.UNSPECIFIED` representation
|
||||
prevents the incorrect interpretation of UUID bytes by stopping short of
|
||||
automatically converting UUID fields in BSON to native UUID types. Loading
|
||||
a UUID when using this representation returns a :class:`~bson.binary.Binary`
|
||||
object instead. If required, users can coerce the decoded
|
||||
:class:`~bson.binary.Binary` objects into native UUIDs using the
|
||||
:meth:`~bson.binary.Binary.as_uuid` method and specifying the appropriate
|
||||
representation format. The following example shows
|
||||
what this might look like for a UUID stored by the C# driver::
|
||||
|
||||
from bson.codec_options import CodecOptions, DEFAULT_CODEC_OPTIONS
|
||||
from bson.binary import Binary, UuidRepresentation
|
||||
from uuid import uuid4
|
||||
|
||||
# Using UuidRepresentation.CSHARP_LEGACY
|
||||
csharp_opts = CodecOptions(uuid_representation=UuidRepresentation.CSHARP_LEGACY)
|
||||
|
||||
# Store a legacy C#-formatted UUID
|
||||
input_uuid = uuid4()
|
||||
collection = client.testdb.get_collection('test', codec_options=csharp_opts)
|
||||
collection.insert_one({'_id': 'foo', 'uuid': input_uuid})
|
||||
|
||||
# Using UuidRepresentation.UNSPECIFIED
|
||||
unspec_opts = CodecOptions(uuid_representation=UuidRepresentation.UNSPECIFIED)
|
||||
unspec_collection = client.testdb.get_collection('test', codec_options=unspec_opts)
|
||||
|
||||
# UUID fields are decoded as Binary when UuidRepresentation.UNSPECIFIED is configured
|
||||
document = unspec_collection.find_one({'_id': 'foo'})
|
||||
decoded_field = document['uuid']
|
||||
assert isinstance(decoded_field, Binary)
|
||||
|
||||
# Binary.as_uuid() can be used to coerce the decoded value to a native UUID
|
||||
decoded_uuid = decoded_field.as_uuid(UuidRepresentation.CSHARP_LEGACY)
|
||||
assert decoded_uuid == input_uuid
|
||||
|
||||
Native :class:`uuid.UUID` objects cannot directly be encoded to
|
||||
:class:`~bson.binary.Binary` when the UUID representation is ``UNSPECIFIED``
|
||||
and attempting to do so will result in an exception::
|
||||
|
||||
unspec_collection.insert_one({'_id': 'bar', 'uuid': uuid4()})
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: cannot encode native uuid.UUID with UuidRepresentation.UNSPECIFIED. UUIDs can be manually converted to bson.Binary instances using bson.Binary.from_uuid() or a different UuidRepresentation can be configured. See the documentation for UuidRepresentation for more information.
|
||||
|
||||
Instead, applications using :data:`~bson.binary.UuidRepresentation.UNSPECIFIED`
|
||||
must explicitly coerce a native UUID using the
|
||||
:meth:`~bson.binary.Binary.from_uuid` method::
|
||||
|
||||
explicit_binary = Binary.from_uuid(uuid4(), UuidRepresentation.PYTHON_LEGACY)
|
||||
unspec_collection.insert_one({'_id': 'bar', 'uuid': explicit_binary})
|
||||
@ -341,8 +341,9 @@ class MongoClient(common.BaseObject):
|
||||
- `uuidRepresentation`: The BSON representation to use when encoding
|
||||
from and decoding to instances of :class:`~uuid.UUID`. Valid
|
||||
values are `pythonLegacy` (the default), `javaLegacy`,
|
||||
`csharpLegacy` and `standard`. New applications should consider
|
||||
setting this to `standard` for cross language compatibility.
|
||||
`csharpLegacy`, `standard` and `unspecified`. New applications
|
||||
should consider setting this to `standard` for cross language
|
||||
compatibility. See :ref:`handling-uuid-data-example` for details.
|
||||
|
||||
| **Write Concern options:**
|
||||
| (Only set if passed. No default values.)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user