PYTHON-2152 Expand native UUID handling support; Implement UUID specification

This commit is contained in:
Prashant Mital 2020-05-12 00:59:12 -07:00
parent 29960237dc
commit 4760d07815
No known key found for this signature in database
GPG Key ID: D5A4E9E5CFB4CBD7
11 changed files with 576 additions and 234 deletions

View File

@ -76,9 +76,10 @@ import uuid
from codecs import (utf_8_decode as _utf_8_decode,
utf_8_encode as _utf_8_encode)
from bson.binary import (Binary, OLD_UUID_SUBTYPE,
from bson.binary import (Binary, UuidRepresentation, ALL_UUID_SUBTYPES,
OLD_UUID_SUBTYPE,
JAVA_LEGACY, CSHARP_LEGACY,
UUIDLegacy)
UUIDLegacy, UUID_SUBTYPE)
from bson.code import Code
from bson.codec_options import (
CodecOptions, DEFAULT_CODEC_OPTIONS, _raw_document_class)
@ -303,26 +304,29 @@ def _get_binary(data, view, position, obj_end, opts, dummy1):
end = position + length
if length < 0 or end > obj_end:
raise InvalidBSON('bad binary object length')
if subtype == 3:
# Java Legacy
# Convert UUID subtypes to native UUIDs.
# TODO: PYTHON-2245 Decoding should follow UUID spec in PyMongo 4.0+
if subtype in ALL_UUID_SUBTYPES:
uuid_representation = opts.uuid_representation
if uuid_representation == JAVA_LEGACY:
java = data[position:end]
value = uuid.UUID(bytes=java[0:8][::-1] + java[8:16][::-1])
# C# legacy
elif uuid_representation == CSHARP_LEGACY:
value = uuid.UUID(bytes_le=data[position:end])
# Python
else:
value = uuid.UUID(bytes=data[position:end])
return value, end
if subtype == 4:
return uuid.UUID(bytes=data[position:end]), end
binary_value = Binary(data[position:end], subtype)
if uuid_representation == UuidRepresentation.UNSPECIFIED:
return binary_value, end
if subtype == UUID_SUBTYPE:
# Legacy behavior: use STANDARD with binary subtype 4.
uuid_representation = UuidRepresentation.STANDARD
elif uuid_representation == UuidRepresentation.STANDARD:
# subtype == OLD_UUID_SUBTYPE
# Legacy behavior: STANDARD is the same as PYTHON_LEGACY.
uuid_representation = UuidRepresentation.PYTHON_LEGACY
return binary_value.as_uuid(uuid_representation), end
# Python3 special case. Decode subtype 0 to 'bytes'.
if PY3 and subtype == 0:
value = data[position:end]
else:
value = Binary(data[position:end], subtype)
return value, end
@ -633,21 +637,8 @@ def _encode_binary(name, value, dummy0, dummy1):
def _encode_uuid(name, value, dummy, opts):
"""Encode uuid.UUID."""
uuid_representation = opts.uuid_representation
# Python Legacy Common Case
if uuid_representation == OLD_UUID_SUBTYPE:
return b"\x05" + name + b'\x10\x00\x00\x00\x03' + value.bytes
# Java Legacy
elif uuid_representation == JAVA_LEGACY:
from_uuid = value.bytes
data = from_uuid[0:8][::-1] + from_uuid[8:16][::-1]
return b"\x05" + name + b'\x10\x00\x00\x00\x03' + data
# C# legacy
elif uuid_representation == CSHARP_LEGACY:
# Microsoft GUID representation.
return b"\x05" + name + b'\x10\x00\x00\x00\x03' + value.bytes_le
# New
return b"\x05" + name + b'\x10\x00\x00\x00\x04' + value.bytes
binval = Binary.from_uuid(value, uuid_representation=uuid_representation)
return _encode_binary(name, binval, dummy, opts)
def _encode_objectid(name, value, dummy0, dummy1):
"""Encode bson.objectid.ObjectId."""

View File

@ -78,6 +78,7 @@ static struct module_state _state;
#define STANDARD 4
#define JAVA_LEGACY 5
#define CSHARP_LEGACY 6
#define UNSPECIFIED 0
#define BSON_MAX_SIZE 2147483647
/* The smallest possible BSON document, i.e. "{}" */
@ -583,19 +584,6 @@ static int write_element_to_buffer(PyObject* self, buffer_t buffer,
return result;
}
static void
_fix_java(const char* in, char* out) {
int i, j;
for (i = 0, j = 7; i < j; i++, j--) {
out[i] = in[j];
out[j] = in[i];
}
for (i = 8, j = 15; i < j; i++, j--) {
out[i] = in[j];
out[j] = in[i];
}
}
static void
_set_cannot_encode(PyObject* value) {
PyObject* type = NULL;
@ -1276,14 +1264,9 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
uuid_type = _get_object(state->UUID, "uuid", "UUID");
if (uuid_type && PyObject_IsInstance(value, uuid_type)) {
/* Just a special case of Binary above, but
* simpler to do as a separate case. */
PyObject* bytes;
/* Could be bytes, bytearray, str... */
const char* data;
/* UUID is always 16 bytes */
int size = 16;
char subtype;
PyObject* binary_type = NULL;
PyObject* binary_value = NULL;
int result;
Py_DECREF(uuid_type);
/* PyObject_IsInstance returns -1 on error */
@ -1291,58 +1274,25 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
return 0;
}
if (options->uuid_rep == JAVA_LEGACY
|| options->uuid_rep == CSHARP_LEGACY) {
subtype = 3;
}
else {
subtype = options->uuid_rep;
}
*(buffer_get_buffer(buffer) + type_byte) = 0x05;
if (!buffer_write_int32(buffer, (int32_t)size)) {
return 0;
}
if (!buffer_write_bytes(buffer, &subtype, 1)) {
binary_type = _get_object(state->Binary, "bson", "Binary");
if (binary_type == NULL) {
return 0;
}
if (options->uuid_rep == CSHARP_LEGACY) {
/* Legacy C# byte order */
bytes = PyObject_GetAttrString(value, "bytes_le");
}
else {
bytes = PyObject_GetAttrString(value, "bytes");
}
if (!bytes) {
binary_value = PyObject_CallMethod(binary_type, "from_uuid", "(Oi)", value, options->uuid_rep);
if (binary_value == NULL) {
Py_DECREF(binary_type);
return 0;
}
#if PY_MAJOR_VERSION >= 3
data = PyBytes_AsString(bytes);
#else
data = PyString_AsString(bytes);
#endif
if (data == NULL) {
Py_DECREF(bytes);
return 0;
}
if (options->uuid_rep == JAVA_LEGACY) {
/* Store in legacy java byte order. */
char as_legacy_java[16];
_fix_java(data, as_legacy_java);
if (!buffer_write_bytes(buffer, as_legacy_java, size)) {
Py_DECREF(bytes);
return 0;
}
}
else {
if (!buffer_write_bytes(buffer, data, size)) {
Py_DECREF(bytes);
return 0;
}
}
Py_DECREF(bytes);
return 1;
result = _write_element_to_buffer(self, buffer,
type_byte, binary_value,
check_keys, options,
in_custom_call,
in_fallback_call);
Py_DECREF(binary_type);
Py_DECREF(binary_value);
return result;
}
Py_XDECREF(mapping_type);
Py_XDECREF(uuid_type);
@ -1823,7 +1773,6 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
unsigned* position, unsigned char type,
unsigned max, const codec_options_t* options) {
struct module_state *state = GETSTATE(self);
PyObject* value = NULL;
switch (type) {
case 1:
@ -2063,70 +2012,49 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
if (!data) {
goto invalid;
}
/* Encode as UUID, not Binary */
/* Encode as UUID or Binary based on options->uuid_rep
* TODO: PYTHON-2245 Decoding should follow UUID spec in PyMongo 4.0 */
if (subtype == 3 || subtype == 4) {
PyObject* kwargs;
PyObject* args = PyTuple_New(0);
PyObject* binary_type = NULL;
PyObject* binary_value = NULL;
char uuid_rep = options->uuid_rep;
/* UUID should always be 16 bytes */
if (!args || length != 16) {
Py_DECREF(data);
goto invalid;
}
kwargs = PyDict_New();
if (!kwargs) {
Py_DECREF(data);
Py_DECREF(args);
goto invalid;
if (length != 16) {
goto uuiderror;
}
/*
* From this point, we hold refs to args, kwargs, and data.
* If anything fails, goto uuiderror to clean them up.
*/
if (subtype == 3 && options->uuid_rep == CSHARP_LEGACY) {
/* Legacy C# byte order */
if ((PyDict_SetItemString(kwargs, "bytes_le", data)) == -1)
goto uuiderror;
binary_type = _get_object(state->Binary, "bson", "Binary");
if (binary_type == NULL) {
goto uuiderror;
}
else {
if (subtype == 3 && options->uuid_rep == JAVA_LEGACY) {
/* Convert from legacy java byte order */
char big_endian[16];
_fix_java(buffer + *position, big_endian);
/* Free the previously created PyString object */
Py_DECREF(data);
#if PY_MAJOR_VERSION >= 3
data = PyBytes_FromStringAndSize(big_endian, length);
#else
data = PyString_FromStringAndSize(big_endian, length);
#endif
if (data == NULL)
goto uuiderror;
binary_value = PyObject_CallFunction(binary_type, "(Oi)", data, subtype);
if (binary_value == NULL) {
goto uuiderror;
}
if (uuid_rep == UNSPECIFIED) {
value = binary_value;
Py_INCREF(value);
} else {
if (subtype == 4) {
uuid_rep = STANDARD;
} else if (uuid_rep == STANDARD) {
uuid_rep = PYTHON_LEGACY;
}
if ((PyDict_SetItemString(kwargs, "bytes", data)) == -1)
goto uuiderror;
}
if ((type_to_create = _get_object(state->UUID, "uuid", "UUID"))) {
value = PyObject_Call(type_to_create, args, kwargs);
Py_DECREF(type_to_create);
value = PyObject_CallMethod(binary_value, "as_uuid", "(i)", uuid_rep);
}
Py_DECREF(args);
Py_DECREF(kwargs);
uuiderror:
Py_XDECREF(binary_type);
Py_XDECREF(binary_value);
Py_DECREF(data);
if (!value) {
goto invalid;
}
*position += length;
break;
uuiderror:
Py_DECREF(args);
Py_DECREF(kwargs);
Py_XDECREF(data);
goto invalid;
}
#if PY_MAJOR_VERSION >= 3

View File

@ -13,6 +13,7 @@
# limitations under the License.
from uuid import UUID
from warnings import warn
from bson.py3compat import PY3
@ -55,57 +56,104 @@ current default is :data:`OLD_UUID_SUBTYPE`.
Changed to subtype 4.
"""
STANDARD = UUID_SUBTYPE
"""The standard UUID representation.
:class:`uuid.UUID` instances will automatically be encoded to
and decoded from BSON binary, using RFC-4122 byte order with
binary subtype :data:`UUID_SUBTYPE`.
class UuidRepresentation:
UNSPECIFIED = 0
"""An unspecified UUID representation.
When configured, :class:`uuid.UUID` instances will **not** be
automatically encoded to or decoded from :class:`~bson.binary.Binary`.
When encoding a :class:`uuid.UUID` instance, an error will be raised.
To encode a :class:`uuid.UUID` instance with this configuration, it must
be wrapped in the :class:`~bson.binary.Binary` class by the application
code. When decoding a BSON binary field with a UUID subtype, a
:class:`~bson.binary.Binary` instance will be returned instead of a
:class:`uuid.UUID` instance.
.. versionadded:: 3.11
"""
STANDARD = UUID_SUBTYPE
"""The standard UUID representation.
:class:`uuid.UUID` instances will automatically be encoded to
and decoded from BSON binary, using RFC-4122 byte order with
binary subtype :data:`UUID_SUBTYPE`.
.. versionadded:: 3.11
"""
PYTHON_LEGACY = OLD_UUID_SUBTYPE
"""The Python legacy UUID representation.
:class:`uuid.UUID` instances will automatically be encoded to
and decoded from BSON binary, using RFC-4122 byte order with
binary subtype :data:`OLD_UUID_SUBTYPE`.
.. versionadded:: 3.11
"""
JAVA_LEGACY = 5
"""The Java legacy UUID representation.
:class:`uuid.UUID` instances will automatically be encoded to
and decoded from BSON binary subtype :data:`OLD_UUID_SUBTYPE`,
using the Java driver's legacy byte order.
.. versionadded:: 3.11
"""
CSHARP_LEGACY = 6
"""The C#/.net legacy UUID representation.
:class:`uuid.UUID` instances will automatically be encoded to
and decoded from BSON binary subtype :data:`OLD_UUID_SUBTYPE`,
using the C# driver's legacy byte order.
.. versionadded:: 3.11
"""
STANDARD = UuidRepresentation.STANDARD
"""An alias for :data:`UuidRepresentation.STANDARD`.
.. versionadded:: 3.0
"""
PYTHON_LEGACY = OLD_UUID_SUBTYPE
"""The Python legacy UUID representation.
:class:`uuid.UUID` instances will automatically be encoded to
and decoded from BSON binary, using RFC-4122 byte order with
binary subtype :data:`OLD_UUID_SUBTYPE`.
PYTHON_LEGACY = UuidRepresentation.PYTHON_LEGACY
"""An alias for :data:`UuidRepresentation.PYTHON_LEGACY`.
.. versionadded:: 3.0
"""
JAVA_LEGACY = 5
"""The Java legacy UUID representation.
:class:`uuid.UUID` instances will automatically be encoded to
and decoded from BSON binary subtype :data:`OLD_UUID_SUBTYPE`,
using the Java driver's legacy byte order.
JAVA_LEGACY = UuidRepresentation.JAVA_LEGACY
"""An alias for :data:`UuidRepresentation.JAVA_LEGACY`.
.. versionchanged:: 3.6
BSON binary subtype 4 is decoded using RFC-4122 byte order.
BSON binary subtype 4 is decoded using RFC-4122 byte order.
.. versionadded:: 2.3
"""
CSHARP_LEGACY = 6
"""The C#/.net legacy UUID representation.
:class:`uuid.UUID` instances will automatically be encoded to
and decoded from BSON binary subtype :data:`OLD_UUID_SUBTYPE`,
using the C# driver's legacy byte order.
CSHARP_LEGACY = UuidRepresentation.CSHARP_LEGACY
"""An alias for :data:`UuidRepresentation.CSHARP_LEGACY`.
.. versionchanged:: 3.6
BSON binary subtype 4 is decoded using RFC-4122 byte order.
BSON binary subtype 4 is decoded using RFC-4122 byte order.
.. versionadded:: 2.3
"""
ALL_UUID_SUBTYPES = (OLD_UUID_SUBTYPE, UUID_SUBTYPE)
ALL_UUID_REPRESENTATIONS = (STANDARD, PYTHON_LEGACY, JAVA_LEGACY, CSHARP_LEGACY)
ALL_UUID_REPRESENTATIONS = (UuidRepresentation.UNSPECIFIED,
UuidRepresentation.STANDARD,
UuidRepresentation.PYTHON_LEGACY,
UuidRepresentation.JAVA_LEGACY,
UuidRepresentation.CSHARP_LEGACY)
UUID_REPRESENTATION_NAMES = {
PYTHON_LEGACY: 'PYTHON_LEGACY',
STANDARD: 'STANDARD',
JAVA_LEGACY: 'JAVA_LEGACY',
CSHARP_LEGACY: 'CSHARP_LEGACY'}
UuidRepresentation.UNSPECIFIED: 'UuidRepresentation.UNSPECIFIED',
UuidRepresentation.STANDARD: 'UuidRepresentation.STANDARD',
UuidRepresentation.PYTHON_LEGACY: 'UuidRepresentation.PYTHON_LEGACY',
UuidRepresentation.JAVA_LEGACY: 'UuidRepresentation.JAVA_LEGACY',
UuidRepresentation.CSHARP_LEGACY: 'UuidRepresentation.CSHARP_LEGACY'}
MD5_SUBTYPE = 5
"""BSON binary subtype for an MD5 hash.
@ -155,6 +203,99 @@ class Binary(bytes):
self.__subtype = subtype
return self
@classmethod
def from_uuid(cls, uuid, uuid_representation=UuidRepresentation.STANDARD):
"""Create a BSON Binary object from a Python UUID.
Creates a :class:`~bson.binary.Binary` object from a
:class:`uuid.UUID` instance. Assumes that the native
:class:`uuid.UUID` instance uses the byte-order implied by the
provided ``uuid_representation``.
Raises :exc:`TypeError` if `uuid` is not an instance of
:class:`~uuid.UUID`.
:Parameters:
- `uuid`: A :class:`uuid.UUID` instance.
- `uuid_representation`: A member of
:class:`~bson.binary.UuidRepresentation`. Default:
:const:`~bson.binary.UuidRepresentation.STANDARD`.
.. versionadded:: 3.11
"""
if not isinstance(uuid, UUID):
raise TypeError("uuid must be an instance of uuid.UUID")
if uuid_representation not in ALL_UUID_REPRESENTATIONS:
raise ValueError("uuid_representation must be a value "
"from bson.binary.UuidRepresentation")
if uuid_representation == UuidRepresentation.UNSPECIFIED:
raise ValueError(
"cannot encode native uuid.UUID with "
"UuidRepresentation.UNSPECIFIED. UUIDs can be manually "
"converted to bson.Binary instances using "
"bson.Binary.from_uuid() or a different UuidRepresentation "
"can be configured.")
subtype = OLD_UUID_SUBTYPE
if uuid_representation == UuidRepresentation.PYTHON_LEGACY:
payload = uuid.bytes
elif uuid_representation == UuidRepresentation.JAVA_LEGACY:
from_uuid = uuid.bytes
payload = from_uuid[0:8][::-1] + from_uuid[8:16][::-1]
elif uuid_representation == UuidRepresentation.CSHARP_LEGACY:
payload = uuid.bytes_le
else:
# uuid_representation == UuidRepresentation.STANDARD
subtype = UUID_SUBTYPE
payload = uuid.bytes
return cls(payload, subtype)
def as_uuid(self, uuid_representation=UuidRepresentation.STANDARD):
"""Create a Python UUID from this BSON Binary object.
Decodes this binary object as a native :class:`uuid.UUID` instance
with the provided ``uuid_representation``.
Raises :exc:`ValueError` if this :class:`~bson.binary.Binary` instance
does not contain a UUID.
:Parameters:
- `uuid_representation`: A member of
:class:`~bson.binary.UuidRepresentation`. Default:
:const:`~bson.binary.UuidRepresentation.STANDARD`.
.. versionadded:: 3.11
"""
if self.subtype not in ALL_UUID_SUBTYPES:
raise ValueError("cannot decode subtype %s as a uuid" % (
self.subtype,))
if uuid_representation not in ALL_UUID_REPRESENTATIONS:
raise ValueError("uuid_representation must be a value from "
"bson.binary.UuidRepresentation")
if uuid_representation == UuidRepresentation.UNSPECIFIED:
raise ValueError("uuid_representation cannot be UNSPECIFIED")
elif uuid_representation == UuidRepresentation.PYTHON_LEGACY:
if self.subtype == OLD_UUID_SUBTYPE:
return UUID(bytes=self)
elif uuid_representation == UuidRepresentation.JAVA_LEGACY:
if self.subtype == OLD_UUID_SUBTYPE:
return UUID(bytes=self[0:8][::-1] + self[8:16][::-1])
elif uuid_representation == UuidRepresentation.CSHARP_LEGACY:
if self.subtype == OLD_UUID_SUBTYPE:
return UUID(bytes_le=self)
else:
# uuid_representation == UuidRepresentation.STANDARD
if self.subtype == UUID_SUBTYPE:
return UUID(bytes=self)
raise ValueError("cannot decode subtype %s to %s" % (
self.subtype, UUID_REPRESENTATION_NAMES[uuid_representation]))
@property
def subtype(self):
"""Subtype of this binary data.
@ -188,7 +329,26 @@ class Binary(bytes):
class UUIDLegacy(Binary):
"""UUID wrapper to support working with UUIDs stored as PYTHON_LEGACY.
"""**DEPRECATED** - UUID wrapper to support working with UUIDs stored as
PYTHON_LEGACY.
.. note:: This class has been deprecated and will be removed in
PyMongo 4.0. Use :meth:`~bson.binary.Binary.from_uuid` and
:meth:`~bson.binary.Binary.as_uuid` with the appropriate
:class:`~bson.binary.UuidRepresentation` to handle legacy-formatted
UUIDs instead.::
from bson import Binary, UUIDLegacy, UuidRepresentation
import uuid
my_uuid = uuid.uuid4()
legacy_uuid = UUIDLegacy(my_uuid)
binary_uuid = Binary.from_uuid(
my_uuid, UuidRepresentation.PYTHON_LEGACY)
assert legacy_uuid == binary_uuid
assert legacy_uuid.uuid == binary_uuid.as_uuid(
UuidRepresentation.PYTHON_LEGACY)
.. doctest::
@ -218,13 +378,25 @@ class UUIDLegacy(Binary):
>>> coll.find_one({'uuid': my_uuid})['uuid']
UUID('...')
Raises TypeError if `obj` is not an instance of :class:`~uuid.UUID`.
Raises :exc:`TypeError` if `obj` is not an instance of :class:`~uuid.UUID`.
:Parameters:
- `obj`: An instance of :class:`~uuid.UUID`.
.. versionchanged:: 3.11
Deprecated. The same functionality can be replicated using the
:meth:`~Binary.from_uuid` and :meth:`~Binary.to_uuid` methods with
:data:`~UuidRepresentation.PYTHON_LEGACY`.
.. versionadded:: 2.1
"""
def __new__(cls, obj):
warn(
"The UUIDLegacy class has been deprecated and will be removed "
"in PyMongo 4.0. Use the Binary.from_uuid() and Binary.to_uuid() "
"with the appropriate UuidRepresentation to handle "
"legacy-formatted UUIDs instead.",
DeprecationWarning, stacklevel=2)
if not isinstance(obj, UUID):
raise TypeError("obj must be an instance of uuid.UUID")
self = Binary.__new__(cls, obj.bytes, OLD_UUID_SUBTYPE)

View File

@ -15,14 +15,15 @@
"""Tools for specifying BSON codec options."""
import datetime
import warnings
from abc import abstractmethod
from collections import namedtuple
from bson.py3compat import ABC, abc, abstractproperty, string_type
from bson.binary import (ALL_UUID_REPRESENTATIONS,
PYTHON_LEGACY,
from bson.binary import (UuidRepresentation,
ALL_UUID_REPRESENTATIONS,
UUID_REPRESENTATION_NAMES)
@ -239,7 +240,8 @@ class CodecOptions(_options_base):
"""
def __new__(cls, document_class=dict,
tz_aware=False, uuid_representation=PYTHON_LEGACY,
tz_aware=False,
uuid_representation=None,
unicode_decode_error_handler="strict",
tzinfo=None, type_registry=None):
if not (issubclass(document_class, abc.MutableMapping) or
@ -249,9 +251,17 @@ class CodecOptions(_options_base):
"sublass of collections.MutableMapping")
if not isinstance(tz_aware, bool):
raise TypeError("tz_aware must be True or False")
if uuid_representation not in ALL_UUID_REPRESENTATIONS:
if uuid_representation is None:
warnings.warn(
"Starting in PyMongo 4.0, the default uuidRepresentation "
"will be changed to 'unspecified'. Applications will need to "
"explicitly set 'uuidRepresentation=pythonLegacy' in the "
"connection string to preserve current behavior.",
DeprecationWarning, stacklevel=2)
uuid_representation = UuidRepresentation.PYTHON_LEGACY
elif uuid_representation not in ALL_UUID_REPRESENTATIONS:
raise ValueError("uuid_representation must be a value "
"from bson.binary.ALL_UUID_REPRESENTATIONS")
"from bson.binary.UuidRepresentation")
if not isinstance(unicode_decode_error_handler, (string_type, None)):
raise ValueError("unicode_decode_error_handler must be a string "
"or None")
@ -314,7 +324,8 @@ class CodecOptions(_options_base):
)
DEFAULT_CODEC_OPTIONS = CodecOptions()
DEFAULT_CODEC_OPTIONS = CodecOptions(
uuid_representation=UuidRepresentation.PYTHON_LEGACY)
def _parse_codec_options(options):
@ -324,8 +335,7 @@ def _parse_codec_options(options):
'document_class', DEFAULT_CODEC_OPTIONS.document_class),
tz_aware=options.get(
'tz_aware', DEFAULT_CODEC_OPTIONS.tz_aware),
uuid_representation=options.get(
'uuidrepresentation', DEFAULT_CODEC_OPTIONS.uuid_representation),
uuid_representation=options.get('uuidrepresentation'),
unicode_decode_error_handler=options.get(
'unicode_decode_error_handler',
DEFAULT_CODEC_OPTIONS.unicode_decode_error_handler),

View File

@ -110,14 +110,13 @@ import datetime
import json
import math
import re
import sys
import uuid
from pymongo.errors import ConfigurationError
import bson
from bson import EPOCH_AWARE, EPOCH_NAIVE, RE_TYPE, SON
from bson.binary import (Binary, JAVA_LEGACY, CSHARP_LEGACY, OLD_UUID_SUBTYPE,
from bson import EPOCH_AWARE, RE_TYPE, SON
from bson.binary import (Binary, UuidRepresentation, ALL_UUID_SUBTYPES,
UUID_SUBTYPE)
from bson.code import Code
from bson.codec_options import CodecOptions
@ -245,9 +244,9 @@ class JSONOptions(CodecOptions):
- `document_class`: BSON documents returned by :func:`loads` will be
decoded to an instance of this class. Must be a subclass of
:class:`collections.MutableMapping`. Defaults to :class:`dict`.
- `uuid_representation`: The BSON representation to use when encoding
and decoding instances of :class:`uuid.UUID`. Defaults to
:const:`~bson.binary.PYTHON_LEGACY`.
- `uuid_representation`: The :class:`~bson.binary.UuidRepresentation`
to use when encoding and decoding instances of :class:`uuid.UUID`.
Defaults to :const:`~bson.binary.UuidRepresentation.PYTHON_LEGACY`.
- `tz_aware`: If ``True``, MongoDB Extended JSON's *Strict mode* type
`Date` will be decoded to timezone aware instances of
:class:`datetime.datetime`. Otherwise they will be naive. Defaults
@ -494,14 +493,20 @@ def _parse_legacy_uuid(doc):
def _binary_or_uuid(data, subtype, json_options):
# special handling for UUID
if subtype == OLD_UUID_SUBTYPE:
if json_options.uuid_representation == CSHARP_LEGACY:
return uuid.UUID(bytes_le=data)
if json_options.uuid_representation == JAVA_LEGACY:
data = data[7::-1] + data[:7:-1]
return uuid.UUID(bytes=data)
if subtype == UUID_SUBTYPE:
return uuid.UUID(bytes=data)
if subtype in ALL_UUID_SUBTYPES:
uuid_representation = json_options.uuid_representation
binary_value = Binary(data, subtype)
if uuid_representation == UuidRepresentation.UNSPECIFIED:
return binary_value
if subtype == UUID_SUBTYPE:
# Legacy behavior: use STANDARD with binary subtype 4.
uuid_representation = UuidRepresentation.STANDARD
elif uuid_representation == UuidRepresentation.STANDARD:
# subtype == OLD_UUID_SUBTYPE
# Legacy behavior: STANDARD is the same as PYTHON_LEGACY.
uuid_representation = UuidRepresentation.PYTHON_LEGACY
return binary_value.as_uuid(uuid_representation)
if PY3 and subtype == 0:
return data
return Binary(data, subtype)
@ -795,15 +800,9 @@ def default(obj, json_options=DEFAULT_JSON_OPTIONS):
return _encode_binary(obj, 0, json_options)
if isinstance(obj, uuid.UUID):
if json_options.strict_uuid:
data = obj.bytes
subtype = OLD_UUID_SUBTYPE
if json_options.uuid_representation == CSHARP_LEGACY:
data = obj.bytes_le
elif json_options.uuid_representation == JAVA_LEGACY:
data = data[7::-1] + data[:7:-1]
elif json_options.uuid_representation == UUID_SUBTYPE:
subtype = UUID_SUBTYPE
return _encode_binary(data, subtype, json_options)
binval = Binary.from_uuid(
obj, uuid_representation=json_options.uuid_representation)
return _encode_binary(binval, binval.subtype, json_options)
else:
return {"$uuid": obj.hex}
if isinstance(obj, Decimal128):

View File

@ -16,6 +16,9 @@
.. autodata:: MD5_SUBTYPE
.. autodata:: USER_DEFINED_SUBTYPE
.. autoclass:: UuidRepresentation
:members:
.. autoclass:: Binary(data, subtype=BINARY_SUBTYPE)
:members:
:show-inheritance:

View File

@ -19,8 +19,7 @@ import datetime
import warnings
from bson import SON
from bson.binary import (STANDARD, PYTHON_LEGACY,
JAVA_LEGACY, CSHARP_LEGACY)
from bson.binary import UuidRepresentation
from bson.codec_options import CodecOptions, TypeRegistry
from bson.py3compat import abc, integer_types, iteritems, string_type, PY3
from bson.raw_bson import RawBSONDocument
@ -149,10 +148,11 @@ def raise_config_error(key, dummy):
# Mapping of URI uuid representation options to valid subtypes.
_UUID_REPRESENTATIONS = {
'standard': STANDARD,
'pythonLegacy': PYTHON_LEGACY,
'javaLegacy': JAVA_LEGACY,
'csharpLegacy': CSHARP_LEGACY
'unspecified': UuidRepresentation.UNSPECIFIED,
'standard': UuidRepresentation.STANDARD,
'pythonLegacy': UuidRepresentation.PYTHON_LEGACY,
'javaLegacy': UuidRepresentation.JAVA_LEGACY,
'csharpLegacy': UuidRepresentation.CSHARP_LEGACY
}

View File

@ -31,8 +31,10 @@ from bson.binary import *
from bson.codec_options import CodecOptions
from bson.py3compat import PY3
from bson.son import SON
from pymongo.common import validate_uuid_representation
from pymongo.mongo_client import MongoClient
from test import client_context, unittest
from pymongo.write_concern import WriteConcern
from test import client_context, unittest, IntegrationTest
from test.utils import ignore_deprecations
@ -144,11 +146,13 @@ class TestBinary(unittest.TestCase):
self.assertEqual(hash(Binary(b"hello world", 42)), hash(two))
def test_uuid_subtype_4(self):
"""uuid_representation should be ignored when decoding subtype 4."""
"""uuid_representation should be ignored when decoding subtype 4 for
all UuidRepresentation values except UNSPECIFIED."""
expected_uuid = uuid.uuid4()
doc = {"uuid": Binary(expected_uuid.bytes, 4)}
encoded = encode(doc)
for uuid_representation in ALL_UUID_REPRESENTATIONS:
for uuid_representation in (set(ALL_UUID_REPRESENTATIONS) -
{UuidRepresentation.UNSPECIFIED}):
options = CodecOptions(uuid_representation=uuid_representation)
self.assertEqual(expected_uuid, decode(encoded, options)["uuid"])
@ -296,8 +300,9 @@ class TestBinary(unittest.TestCase):
self.assertEqual(1, coll.count_documents({}))
# Test UUIDLegacy queries.
coll = db.get_collection("test",
CodecOptions(uuid_representation=STANDARD))
coll = db.get_collection(
"test", CodecOptions(
uuid_representation=UuidRepresentation.STANDARD))
self.assertEqual(0, coll.find({'uuid': uu}).count())
cur = coll.find({'uuid': UUIDLegacy(uu)})
self.assertEqual(1, cur.count())
@ -364,5 +369,219 @@ class TestBinary(unittest.TestCase):
self.assertEqual(b0, Binary(array.array('B', b'123'), 2))
class TestUuidSpecExplicitCoding(unittest.TestCase):
@classmethod
def setUpClass(cls):
super(TestUuidSpecExplicitCoding, cls).setUpClass()
cls.uuid = uuid.UUID("00112233445566778899AABBCCDDEEFF")
@staticmethod
def _hex_to_bytes(hexstring):
if PY3:
return bytes.fromhex(hexstring)
return hexstring.decode("hex")
# Explicit encoding prose test #1
def test_encoding_1(self):
obj = Binary.from_uuid(self.uuid)
expected_obj = Binary(
self._hex_to_bytes("00112233445566778899AABBCCDDEEFF"), 4)
self.assertEqual(obj, expected_obj)
def _test_encoding_w_uuid_rep(
self, uuid_rep, expected_hexstring, expected_subtype):
obj = Binary.from_uuid(self.uuid, uuid_rep)
expected_obj = Binary(
self._hex_to_bytes(expected_hexstring), expected_subtype)
self.assertEqual(obj, expected_obj)
# Explicit encoding prose test #2
def test_encoding_2(self):
self._test_encoding_w_uuid_rep(
UuidRepresentation.STANDARD,
"00112233445566778899AABBCCDDEEFF", 4)
# Explicit encoding prose test #3
def test_encoding_3(self):
self._test_encoding_w_uuid_rep(
UuidRepresentation.JAVA_LEGACY,
"7766554433221100FFEEDDCCBBAA9988", 3)
# Explicit encoding prose test #4
def test_encoding_4(self):
self._test_encoding_w_uuid_rep(
UuidRepresentation.CSHARP_LEGACY,
"33221100554477668899AABBCCDDEEFF", 3)
# Explicit encoding prose test #5
def test_encoding_5(self):
self._test_encoding_w_uuid_rep(
UuidRepresentation.PYTHON_LEGACY,
"00112233445566778899AABBCCDDEEFF", 3)
# Explicit encoding prose test #6
def test_encoding_6(self):
with self.assertRaises(ValueError):
Binary.from_uuid(self.uuid, UuidRepresentation.UNSPECIFIED)
# Explicit decoding prose test #1
def test_decoding_1(self):
obj = Binary(
self._hex_to_bytes("00112233445566778899AABBCCDDEEFF"), 4)
# Case i:
self.assertEqual(obj.as_uuid(), self.uuid)
# Case ii:
self.assertEqual(obj.as_uuid(UuidRepresentation.STANDARD), self.uuid)
# Cases iii-vi:
for uuid_rep in (UuidRepresentation.JAVA_LEGACY,
UuidRepresentation.CSHARP_LEGACY,
UuidRepresentation.PYTHON_LEGACY):
with self.assertRaises(ValueError):
obj.as_uuid(uuid_rep)
def _test_decoding_legacy(self, hexstring, uuid_rep):
obj = Binary(self._hex_to_bytes(hexstring), 3)
# Case i:
with self.assertRaises(ValueError):
obj.as_uuid()
# Cases ii-iii:
for rep in (UuidRepresentation.STANDARD,
UuidRepresentation.UNSPECIFIED):
with self.assertRaises(ValueError):
obj.as_uuid(rep)
# Case iv:
self.assertEqual(obj.as_uuid(uuid_rep),
self.uuid)
# Explicit decoding prose test #2
def test_decoding_2(self):
self._test_decoding_legacy(
"7766554433221100FFEEDDCCBBAA9988",
UuidRepresentation.JAVA_LEGACY)
# Explicit decoding prose test #3
def test_decoding_3(self):
self._test_decoding_legacy(
"33221100554477668899AABBCCDDEEFF",
UuidRepresentation.CSHARP_LEGACY)
# Explicit decoding prose test #4
def test_decoding_4(self):
self._test_decoding_legacy(
"00112233445566778899AABBCCDDEEFF",
UuidRepresentation.PYTHON_LEGACY)
class TestUuidSpecImplicitCoding(IntegrationTest):
@classmethod
def setUpClass(cls):
super(TestUuidSpecImplicitCoding, cls).setUpClass()
cls.uuid = uuid.UUID("00112233445566778899AABBCCDDEEFF")
@staticmethod
def _hex_to_bytes(hexstring):
if PY3:
return bytes.fromhex(hexstring)
return hexstring.decode("hex")
def _get_coll_w_uuid_rep(self, uuid_rep):
codec_options = self.client.codec_options.with_options(
uuid_representation=validate_uuid_representation(None, uuid_rep))
coll = self.db.get_collection(
'pymongo_test', codec_options=codec_options,
write_concern=WriteConcern("majority"))
return coll
def _test_encoding(self, uuid_rep, expected_hexstring, expected_subtype):
coll = self._get_coll_w_uuid_rep(uuid_rep)
coll.delete_many({})
coll.insert_one({'_id': self.uuid})
self.assertTrue(
coll.find_one({"_id": Binary(
self._hex_to_bytes(expected_hexstring), expected_subtype)}))
# Implicit encoding prose test #1
def test_encoding_1(self):
self._test_encoding(
"javaLegacy", "7766554433221100FFEEDDCCBBAA9988", 3)
# Implicit encoding prose test #2
def test_encoding_2(self):
self._test_encoding(
"csharpLegacy", "33221100554477668899AABBCCDDEEFF", 3)
# Implicit encoding prose test #3
def test_encoding_3(self):
self._test_encoding(
"pythonLegacy", "00112233445566778899AABBCCDDEEFF", 3)
# Implicit encoding prose test #4
def test_encoding_4(self):
self._test_encoding(
"standard", "00112233445566778899AABBCCDDEEFF", 4)
# Implicit encoding prose test #5
def test_encoding_5(self):
with self.assertRaises(ValueError):
self._test_encoding(
"unspecifed", "dummy", -1)
def _test_decoding(self, client_uuid_representation_string,
legacy_field_uuid_representation,
expected_standard_field_value,
expected_legacy_field_value):
coll = self._get_coll_w_uuid_rep(client_uuid_representation_string)
coll.drop()
standard_val = Binary.from_uuid(self.uuid, UuidRepresentation.STANDARD)
legacy_val = Binary.from_uuid(self.uuid, legacy_field_uuid_representation)
coll.insert_one({'standard': standard_val, 'legacy': legacy_val})
doc = coll.find_one()
self.assertEqual(doc['standard'], expected_standard_field_value)
self.assertEqual(doc['legacy'], expected_legacy_field_value)
# Implicit decoding prose test #1
def test_decoding_1(self):
# TODO: these assertions will change after PYTHON-2245. Specifically,
# the 'standard' field will be decoded as a Binary subtype 4.
binary_value = Binary.from_uuid(
self.uuid, UuidRepresentation.PYTHON_LEGACY)
self._test_decoding(
"javaLegacy", UuidRepresentation.JAVA_LEGACY,
self.uuid, self.uuid)
self._test_decoding(
"csharpLegacy", UuidRepresentation.CSHARP_LEGACY,
self.uuid, self.uuid)
self._test_decoding(
"pythonLegacy", UuidRepresentation.PYTHON_LEGACY,
self.uuid, self.uuid)
# Implicit decoding pose test #2
def test_decoding_2(self):
# TODO: these assertions will change after PYTHON-2245. Specifically,
# the 'legacy' field will be decoded as a Binary subtype 3.
binary_value = Binary.from_uuid(
self.uuid, UuidRepresentation.PYTHON_LEGACY)
self._test_decoding(
"standard", UuidRepresentation.PYTHON_LEGACY,
self.uuid, binary_value.as_uuid(UuidRepresentation.PYTHON_LEGACY))
# Implicit decoding pose test #3
def test_decoding_3(self):
expected_standard_value = Binary.from_uuid(
self.uuid, UuidRepresentation.STANDARD)
for legacy_uuid_rep in (UuidRepresentation.PYTHON_LEGACY,
UuidRepresentation.CSHARP_LEGACY,
UuidRepresentation.JAVA_LEGACY):
expected_legacy_value = Binary.from_uuid(
self.uuid, legacy_uuid_rep)
self._test_decoding(
"unspecified", legacy_uuid_rep,
expected_standard_value, expected_legacy_value)
if __name__ == "__main__":
unittest.main()

View File

@ -649,7 +649,6 @@ class TestBSON(unittest.TestCase):
decode(encode({"tuple": (1, 2)})))
def test_uuid(self):
id = uuid.uuid4()
transformed_id = decode(encode({"id": id}))["id"]
@ -991,7 +990,6 @@ class TestCodecOptions(unittest.TestCase):
self.assertTrue(CodecOptions(tz_aware=True).tz_aware)
def test_uuid_representation(self):
self.assertRaises(ValueError, CodecOptions, uuid_representation=None)
self.assertRaises(ValueError, CodecOptions, uuid_representation=7)
self.assertRaises(ValueError, CodecOptions, uuid_representation=2)
@ -1003,7 +1001,7 @@ class TestCodecOptions(unittest.TestCase):
def test_codec_options_repr(self):
r = ("CodecOptions(document_class=dict, tz_aware=False, "
"uuid_representation=PYTHON_LEGACY, "
"uuid_representation=UuidRepresentation.PYTHON_LEGACY, "
"unicode_decode_error_handler='strict', "
"tzinfo=None, type_registry=TypeRegistry(type_codecs=[], "
"fallback_encoder=None))")

View File

@ -25,7 +25,7 @@ from bson.codec_options import CodecOptions
from bson.objectid import ObjectId
from pymongo.errors import OperationFailure
from pymongo.write_concern import WriteConcern
from test import client_context, IntegrationTest
from test import client_context, unittest, IntegrationTest
from test.utils import connected, rs_or_single_client, single_client

View File

@ -22,14 +22,11 @@ import uuid
sys.path[0:0] = [""]
from pymongo.errors import ConfigurationError
from bson import json_util, EPOCH_AWARE, EPOCH_NAIVE, SON
from bson import json_util, EPOCH_AWARE, SON
from bson.json_util import (DatetimeRepresentation,
STRICT_JSON_OPTIONS)
from bson.binary import (ALL_UUID_REPRESENTATIONS, Binary, MD5_SUBTYPE,
USER_DEFINED_SUBTYPE, JAVA_LEGACY, CSHARP_LEGACY,
STANDARD)
USER_DEFINED_SUBTYPE, UuidRepresentation, STANDARD)
from bson.code import Code
from bson.dbref import DBRef
from bson.int64 import Int64
@ -271,7 +268,8 @@ class TestJsonUtil(unittest.TestCase):
doc, json_util.loads(
'{"uuid": '
'{"$binary": "9HrBC1jMQ3KlZw4CssPUeQ==", "$type": "03"}}'))
for uuid_representation in ALL_UUID_REPRESENTATIONS:
for uuid_representation in (set(ALL_UUID_REPRESENTATIONS) -
{UuidRepresentation.UNSPECIFIED}):
options = json_util.JSONOptions(
strict_uuid=True, uuid_representation=uuid_representation)
self.round_trip(doc, json_options=options)
@ -281,6 +279,30 @@ class TestJsonUtil(unittest.TestCase):
'{"$binary": "9HrBC1jMQ3KlZw4CssPUeQ==", "$type": "04"}}',
json_options=options))
def test_uuid_uuid_rep_unspecified(self):
_uuid = uuid.uuid4()
options = json_util.JSONOptions(
strict_uuid=True,
uuid_representation=UuidRepresentation.UNSPECIFIED)
# Cannot directly encode native UUIDs with UNSPECIFIED.
doc = {'uuid': _uuid}
with self.assertRaises(ValueError):
json_util.dumps(doc, json_options=options)
# All UUID subtypes are decoded as Binary with UNSPECIFIED.
# subtype 3
doc = {'uuid': Binary(_uuid.bytes, subtype=3)}
ext_json_str = json_util.dumps(doc)
self.assertEqual(
doc, json_util.loads(ext_json_str, json_options=options))
# subtype 4
doc = {'uuid': Binary(_uuid.bytes, subtype=4)}
ext_json_str = json_util.dumps(doc)
self.assertEqual(
doc, json_util.loads(ext_json_str, json_options=options))
def test_binary(self):
if PY3:
bin_type_dict = {"bin": b"\x00\x01\x02\x03\x04"}