diff --git a/bson/__init__.py b/bson/__init__.py index c8ac12e46..37794c325 100644 --- a/bson/__init__.py +++ b/bson/__init__.py @@ -76,9 +76,10 @@ import uuid from codecs import (utf_8_decode as _utf_8_decode, utf_8_encode as _utf_8_encode) -from bson.binary import (Binary, OLD_UUID_SUBTYPE, +from bson.binary import (Binary, UuidRepresentation, ALL_UUID_SUBTYPES, + OLD_UUID_SUBTYPE, JAVA_LEGACY, CSHARP_LEGACY, - UUIDLegacy) + UUIDLegacy, UUID_SUBTYPE) from bson.code import Code from bson.codec_options import ( CodecOptions, DEFAULT_CODEC_OPTIONS, _raw_document_class) @@ -303,26 +304,29 @@ def _get_binary(data, view, position, obj_end, opts, dummy1): end = position + length if length < 0 or end > obj_end: raise InvalidBSON('bad binary object length') - if subtype == 3: - # Java Legacy + + # Convert UUID subtypes to native UUIDs. + # TODO: PYTHON-2245 Decoding should follow UUID spec in PyMongo 4.0+ + if subtype in ALL_UUID_SUBTYPES: uuid_representation = opts.uuid_representation - if uuid_representation == JAVA_LEGACY: - java = data[position:end] - value = uuid.UUID(bytes=java[0:8][::-1] + java[8:16][::-1]) - # C# legacy - elif uuid_representation == CSHARP_LEGACY: - value = uuid.UUID(bytes_le=data[position:end]) - # Python - else: - value = uuid.UUID(bytes=data[position:end]) - return value, end - if subtype == 4: - return uuid.UUID(bytes=data[position:end]), end + binary_value = Binary(data[position:end], subtype) + if uuid_representation == UuidRepresentation.UNSPECIFIED: + return binary_value, end + if subtype == UUID_SUBTYPE: + # Legacy behavior: use STANDARD with binary subtype 4. + uuid_representation = UuidRepresentation.STANDARD + elif uuid_representation == UuidRepresentation.STANDARD: + # subtype == OLD_UUID_SUBTYPE + # Legacy behavior: STANDARD is the same as PYTHON_LEGACY. + uuid_representation = UuidRepresentation.PYTHON_LEGACY + return binary_value.as_uuid(uuid_representation), end + # Python3 special case. Decode subtype 0 to 'bytes'. if PY3 and subtype == 0: value = data[position:end] else: value = Binary(data[position:end], subtype) + return value, end @@ -633,21 +637,8 @@ def _encode_binary(name, value, dummy0, dummy1): def _encode_uuid(name, value, dummy, opts): """Encode uuid.UUID.""" uuid_representation = opts.uuid_representation - # Python Legacy Common Case - if uuid_representation == OLD_UUID_SUBTYPE: - return b"\x05" + name + b'\x10\x00\x00\x00\x03' + value.bytes - # Java Legacy - elif uuid_representation == JAVA_LEGACY: - from_uuid = value.bytes - data = from_uuid[0:8][::-1] + from_uuid[8:16][::-1] - return b"\x05" + name + b'\x10\x00\x00\x00\x03' + data - # C# legacy - elif uuid_representation == CSHARP_LEGACY: - # Microsoft GUID representation. - return b"\x05" + name + b'\x10\x00\x00\x00\x03' + value.bytes_le - # New - return b"\x05" + name + b'\x10\x00\x00\x00\x04' + value.bytes - + binval = Binary.from_uuid(value, uuid_representation=uuid_representation) + return _encode_binary(name, binval, dummy, opts) def _encode_objectid(name, value, dummy0, dummy1): """Encode bson.objectid.ObjectId.""" diff --git a/bson/_cbsonmodule.c b/bson/_cbsonmodule.c index ae28c1ba8..f457f96b0 100644 --- a/bson/_cbsonmodule.c +++ b/bson/_cbsonmodule.c @@ -78,6 +78,7 @@ static struct module_state _state; #define STANDARD 4 #define JAVA_LEGACY 5 #define CSHARP_LEGACY 6 +#define UNSPECIFIED 0 #define BSON_MAX_SIZE 2147483647 /* The smallest possible BSON document, i.e. "{}" */ @@ -583,19 +584,6 @@ static int write_element_to_buffer(PyObject* self, buffer_t buffer, return result; } -static void -_fix_java(const char* in, char* out) { - int i, j; - for (i = 0, j = 7; i < j; i++, j--) { - out[i] = in[j]; - out[j] = in[i]; - } - for (i = 8, j = 15; i < j; i++, j--) { - out[i] = in[j]; - out[j] = in[i]; - } -} - static void _set_cannot_encode(PyObject* value) { PyObject* type = NULL; @@ -1276,14 +1264,9 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer, uuid_type = _get_object(state->UUID, "uuid", "UUID"); if (uuid_type && PyObject_IsInstance(value, uuid_type)) { - /* Just a special case of Binary above, but - * simpler to do as a separate case. */ - PyObject* bytes; - /* Could be bytes, bytearray, str... */ - const char* data; - /* UUID is always 16 bytes */ - int size = 16; - char subtype; + PyObject* binary_type = NULL; + PyObject* binary_value = NULL; + int result; Py_DECREF(uuid_type); /* PyObject_IsInstance returns -1 on error */ @@ -1291,58 +1274,25 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer, return 0; } - if (options->uuid_rep == JAVA_LEGACY - || options->uuid_rep == CSHARP_LEGACY) { - subtype = 3; - } - else { - subtype = options->uuid_rep; - } - - *(buffer_get_buffer(buffer) + type_byte) = 0x05; - if (!buffer_write_int32(buffer, (int32_t)size)) { - return 0; - } - if (!buffer_write_bytes(buffer, &subtype, 1)) { + binary_type = _get_object(state->Binary, "bson", "Binary"); + if (binary_type == NULL) { return 0; } - if (options->uuid_rep == CSHARP_LEGACY) { - /* Legacy C# byte order */ - bytes = PyObject_GetAttrString(value, "bytes_le"); - } - else { - bytes = PyObject_GetAttrString(value, "bytes"); - } - if (!bytes) { + binary_value = PyObject_CallMethod(binary_type, "from_uuid", "(Oi)", value, options->uuid_rep); + if (binary_value == NULL) { + Py_DECREF(binary_type); return 0; } -#if PY_MAJOR_VERSION >= 3 - data = PyBytes_AsString(bytes); -#else - data = PyString_AsString(bytes); -#endif - if (data == NULL) { - Py_DECREF(bytes); - return 0; - } - if (options->uuid_rep == JAVA_LEGACY) { - /* Store in legacy java byte order. */ - char as_legacy_java[16]; - _fix_java(data, as_legacy_java); - if (!buffer_write_bytes(buffer, as_legacy_java, size)) { - Py_DECREF(bytes); - return 0; - } - } - else { - if (!buffer_write_bytes(buffer, data, size)) { - Py_DECREF(bytes); - return 0; - } - } - Py_DECREF(bytes); - return 1; + + result = _write_element_to_buffer(self, buffer, + type_byte, binary_value, + check_keys, options, + in_custom_call, + in_fallback_call); + Py_DECREF(binary_type); + Py_DECREF(binary_value); + return result; } Py_XDECREF(mapping_type); Py_XDECREF(uuid_type); @@ -1823,7 +1773,6 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer, unsigned* position, unsigned char type, unsigned max, const codec_options_t* options) { struct module_state *state = GETSTATE(self); - PyObject* value = NULL; switch (type) { case 1: @@ -2063,70 +2012,49 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer, if (!data) { goto invalid; } - /* Encode as UUID, not Binary */ + /* Encode as UUID or Binary based on options->uuid_rep + * TODO: PYTHON-2245 Decoding should follow UUID spec in PyMongo 4.0 */ if (subtype == 3 || subtype == 4) { - PyObject* kwargs; - PyObject* args = PyTuple_New(0); + PyObject* binary_type = NULL; + PyObject* binary_value = NULL; + char uuid_rep = options->uuid_rep; + /* UUID should always be 16 bytes */ - if (!args || length != 16) { - Py_DECREF(data); - goto invalid; - } - kwargs = PyDict_New(); - if (!kwargs) { - Py_DECREF(data); - Py_DECREF(args); - goto invalid; + if (length != 16) { + goto uuiderror; } - /* - * From this point, we hold refs to args, kwargs, and data. - * If anything fails, goto uuiderror to clean them up. - */ - if (subtype == 3 && options->uuid_rep == CSHARP_LEGACY) { - /* Legacy C# byte order */ - if ((PyDict_SetItemString(kwargs, "bytes_le", data)) == -1) - goto uuiderror; + binary_type = _get_object(state->Binary, "bson", "Binary"); + if (binary_type == NULL) { + goto uuiderror; } - else { - if (subtype == 3 && options->uuid_rep == JAVA_LEGACY) { - /* Convert from legacy java byte order */ - char big_endian[16]; - _fix_java(buffer + *position, big_endian); - /* Free the previously created PyString object */ - Py_DECREF(data); -#if PY_MAJOR_VERSION >= 3 - data = PyBytes_FromStringAndSize(big_endian, length); -#else - data = PyString_FromStringAndSize(big_endian, length); -#endif - if (data == NULL) - goto uuiderror; + + binary_value = PyObject_CallFunction(binary_type, "(Oi)", data, subtype); + if (binary_value == NULL) { + goto uuiderror; + } + + if (uuid_rep == UNSPECIFIED) { + value = binary_value; + Py_INCREF(value); + } else { + if (subtype == 4) { + uuid_rep = STANDARD; + } else if (uuid_rep == STANDARD) { + uuid_rep = PYTHON_LEGACY; } - if ((PyDict_SetItemString(kwargs, "bytes", data)) == -1) - goto uuiderror; - - } - if ((type_to_create = _get_object(state->UUID, "uuid", "UUID"))) { - value = PyObject_Call(type_to_create, args, kwargs); - Py_DECREF(type_to_create); + value = PyObject_CallMethod(binary_value, "as_uuid", "(i)", uuid_rep); } - Py_DECREF(args); - Py_DECREF(kwargs); + uuiderror: + Py_XDECREF(binary_type); + Py_XDECREF(binary_value); Py_DECREF(data); if (!value) { goto invalid; } - *position += length; break; - - uuiderror: - Py_DECREF(args); - Py_DECREF(kwargs); - Py_XDECREF(data); - goto invalid; } #if PY_MAJOR_VERSION >= 3 diff --git a/bson/binary.py b/bson/binary.py index 1c833b5a5..cb89c69da 100644 --- a/bson/binary.py +++ b/bson/binary.py @@ -13,6 +13,7 @@ # limitations under the License. from uuid import UUID +from warnings import warn from bson.py3compat import PY3 @@ -55,57 +56,104 @@ current default is :data:`OLD_UUID_SUBTYPE`. Changed to subtype 4. """ -STANDARD = UUID_SUBTYPE -"""The standard UUID representation. -:class:`uuid.UUID` instances will automatically be encoded to -and decoded from BSON binary, using RFC-4122 byte order with -binary subtype :data:`UUID_SUBTYPE`. +class UuidRepresentation: + UNSPECIFIED = 0 + """An unspecified UUID representation. + + When configured, :class:`uuid.UUID` instances will **not** be + automatically encoded to or decoded from :class:`~bson.binary.Binary`. + When encoding a :class:`uuid.UUID` instance, an error will be raised. + To encode a :class:`uuid.UUID` instance with this configuration, it must + be wrapped in the :class:`~bson.binary.Binary` class by the application + code. When decoding a BSON binary field with a UUID subtype, a + :class:`~bson.binary.Binary` instance will be returned instead of a + :class:`uuid.UUID` instance. + + .. versionadded:: 3.11 + """ + + STANDARD = UUID_SUBTYPE + """The standard UUID representation. + + :class:`uuid.UUID` instances will automatically be encoded to + and decoded from BSON binary, using RFC-4122 byte order with + binary subtype :data:`UUID_SUBTYPE`. + + .. versionadded:: 3.11 + """ + + PYTHON_LEGACY = OLD_UUID_SUBTYPE + """The Python legacy UUID representation. + + :class:`uuid.UUID` instances will automatically be encoded to + and decoded from BSON binary, using RFC-4122 byte order with + binary subtype :data:`OLD_UUID_SUBTYPE`. + + .. versionadded:: 3.11 + """ + + JAVA_LEGACY = 5 + """The Java legacy UUID representation. + + :class:`uuid.UUID` instances will automatically be encoded to + and decoded from BSON binary subtype :data:`OLD_UUID_SUBTYPE`, + using the Java driver's legacy byte order. + + .. versionadded:: 3.11 + """ + + CSHARP_LEGACY = 6 + """The C#/.net legacy UUID representation. + + :class:`uuid.UUID` instances will automatically be encoded to + and decoded from BSON binary subtype :data:`OLD_UUID_SUBTYPE`, + using the C# driver's legacy byte order. + + .. versionadded:: 3.11 + """ + + +STANDARD = UuidRepresentation.STANDARD +"""An alias for :data:`UuidRepresentation.STANDARD`. .. versionadded:: 3.0 """ -PYTHON_LEGACY = OLD_UUID_SUBTYPE -"""The Python legacy UUID representation. - -:class:`uuid.UUID` instances will automatically be encoded to -and decoded from BSON binary, using RFC-4122 byte order with -binary subtype :data:`OLD_UUID_SUBTYPE`. +PYTHON_LEGACY = UuidRepresentation.PYTHON_LEGACY +"""An alias for :data:`UuidRepresentation.PYTHON_LEGACY`. .. versionadded:: 3.0 """ -JAVA_LEGACY = 5 -"""The Java legacy UUID representation. - -:class:`uuid.UUID` instances will automatically be encoded to -and decoded from BSON binary subtype :data:`OLD_UUID_SUBTYPE`, -using the Java driver's legacy byte order. +JAVA_LEGACY = UuidRepresentation.JAVA_LEGACY +"""An alias for :data:`UuidRepresentation.JAVA_LEGACY`. .. versionchanged:: 3.6 - BSON binary subtype 4 is decoded using RFC-4122 byte order. + BSON binary subtype 4 is decoded using RFC-4122 byte order. .. versionadded:: 2.3 """ -CSHARP_LEGACY = 6 -"""The C#/.net legacy UUID representation. - -:class:`uuid.UUID` instances will automatically be encoded to -and decoded from BSON binary subtype :data:`OLD_UUID_SUBTYPE`, -using the C# driver's legacy byte order. +CSHARP_LEGACY = UuidRepresentation.CSHARP_LEGACY +"""An alias for :data:`UuidRepresentation.CSHARP_LEGACY`. .. versionchanged:: 3.6 - BSON binary subtype 4 is decoded using RFC-4122 byte order. + BSON binary subtype 4 is decoded using RFC-4122 byte order. .. versionadded:: 2.3 """ ALL_UUID_SUBTYPES = (OLD_UUID_SUBTYPE, UUID_SUBTYPE) -ALL_UUID_REPRESENTATIONS = (STANDARD, PYTHON_LEGACY, JAVA_LEGACY, CSHARP_LEGACY) +ALL_UUID_REPRESENTATIONS = (UuidRepresentation.UNSPECIFIED, + UuidRepresentation.STANDARD, + UuidRepresentation.PYTHON_LEGACY, + UuidRepresentation.JAVA_LEGACY, + UuidRepresentation.CSHARP_LEGACY) UUID_REPRESENTATION_NAMES = { - PYTHON_LEGACY: 'PYTHON_LEGACY', - STANDARD: 'STANDARD', - JAVA_LEGACY: 'JAVA_LEGACY', - CSHARP_LEGACY: 'CSHARP_LEGACY'} + UuidRepresentation.UNSPECIFIED: 'UuidRepresentation.UNSPECIFIED', + UuidRepresentation.STANDARD: 'UuidRepresentation.STANDARD', + UuidRepresentation.PYTHON_LEGACY: 'UuidRepresentation.PYTHON_LEGACY', + UuidRepresentation.JAVA_LEGACY: 'UuidRepresentation.JAVA_LEGACY', + UuidRepresentation.CSHARP_LEGACY: 'UuidRepresentation.CSHARP_LEGACY'} MD5_SUBTYPE = 5 """BSON binary subtype for an MD5 hash. @@ -155,6 +203,99 @@ class Binary(bytes): self.__subtype = subtype return self + @classmethod + def from_uuid(cls, uuid, uuid_representation=UuidRepresentation.STANDARD): + """Create a BSON Binary object from a Python UUID. + + Creates a :class:`~bson.binary.Binary` object from a + :class:`uuid.UUID` instance. Assumes that the native + :class:`uuid.UUID` instance uses the byte-order implied by the + provided ``uuid_representation``. + + Raises :exc:`TypeError` if `uuid` is not an instance of + :class:`~uuid.UUID`. + + :Parameters: + - `uuid`: A :class:`uuid.UUID` instance. + - `uuid_representation`: A member of + :class:`~bson.binary.UuidRepresentation`. Default: + :const:`~bson.binary.UuidRepresentation.STANDARD`. + + .. versionadded:: 3.11 + """ + if not isinstance(uuid, UUID): + raise TypeError("uuid must be an instance of uuid.UUID") + + if uuid_representation not in ALL_UUID_REPRESENTATIONS: + raise ValueError("uuid_representation must be a value " + "from bson.binary.UuidRepresentation") + + if uuid_representation == UuidRepresentation.UNSPECIFIED: + raise ValueError( + "cannot encode native uuid.UUID with " + "UuidRepresentation.UNSPECIFIED. UUIDs can be manually " + "converted to bson.Binary instances using " + "bson.Binary.from_uuid() or a different UuidRepresentation " + "can be configured.") + + subtype = OLD_UUID_SUBTYPE + if uuid_representation == UuidRepresentation.PYTHON_LEGACY: + payload = uuid.bytes + elif uuid_representation == UuidRepresentation.JAVA_LEGACY: + from_uuid = uuid.bytes + payload = from_uuid[0:8][::-1] + from_uuid[8:16][::-1] + elif uuid_representation == UuidRepresentation.CSHARP_LEGACY: + payload = uuid.bytes_le + else: + # uuid_representation == UuidRepresentation.STANDARD + subtype = UUID_SUBTYPE + payload = uuid.bytes + + return cls(payload, subtype) + + def as_uuid(self, uuid_representation=UuidRepresentation.STANDARD): + """Create a Python UUID from this BSON Binary object. + + Decodes this binary object as a native :class:`uuid.UUID` instance + with the provided ``uuid_representation``. + + Raises :exc:`ValueError` if this :class:`~bson.binary.Binary` instance + does not contain a UUID. + + :Parameters: + - `uuid_representation`: A member of + :class:`~bson.binary.UuidRepresentation`. Default: + :const:`~bson.binary.UuidRepresentation.STANDARD`. + + .. versionadded:: 3.11 + """ + if self.subtype not in ALL_UUID_SUBTYPES: + raise ValueError("cannot decode subtype %s as a uuid" % ( + self.subtype,)) + + if uuid_representation not in ALL_UUID_REPRESENTATIONS: + raise ValueError("uuid_representation must be a value from " + "bson.binary.UuidRepresentation") + + if uuid_representation == UuidRepresentation.UNSPECIFIED: + raise ValueError("uuid_representation cannot be UNSPECIFIED") + elif uuid_representation == UuidRepresentation.PYTHON_LEGACY: + if self.subtype == OLD_UUID_SUBTYPE: + return UUID(bytes=self) + elif uuid_representation == UuidRepresentation.JAVA_LEGACY: + if self.subtype == OLD_UUID_SUBTYPE: + return UUID(bytes=self[0:8][::-1] + self[8:16][::-1]) + elif uuid_representation == UuidRepresentation.CSHARP_LEGACY: + if self.subtype == OLD_UUID_SUBTYPE: + return UUID(bytes_le=self) + else: + # uuid_representation == UuidRepresentation.STANDARD + if self.subtype == UUID_SUBTYPE: + return UUID(bytes=self) + + raise ValueError("cannot decode subtype %s to %s" % ( + self.subtype, UUID_REPRESENTATION_NAMES[uuid_representation])) + @property def subtype(self): """Subtype of this binary data. @@ -188,7 +329,26 @@ class Binary(bytes): class UUIDLegacy(Binary): - """UUID wrapper to support working with UUIDs stored as PYTHON_LEGACY. + """**DEPRECATED** - UUID wrapper to support working with UUIDs stored as + PYTHON_LEGACY. + + .. note:: This class has been deprecated and will be removed in + PyMongo 4.0. Use :meth:`~bson.binary.Binary.from_uuid` and + :meth:`~bson.binary.Binary.as_uuid` with the appropriate + :class:`~bson.binary.UuidRepresentation` to handle legacy-formatted + UUIDs instead.:: + + from bson import Binary, UUIDLegacy, UuidRepresentation + import uuid + + my_uuid = uuid.uuid4() + legacy_uuid = UUIDLegacy(my_uuid) + binary_uuid = Binary.from_uuid( + my_uuid, UuidRepresentation.PYTHON_LEGACY) + + assert legacy_uuid == binary_uuid + assert legacy_uuid.uuid == binary_uuid.as_uuid( + UuidRepresentation.PYTHON_LEGACY) .. doctest:: @@ -218,13 +378,25 @@ class UUIDLegacy(Binary): >>> coll.find_one({'uuid': my_uuid})['uuid'] UUID('...') - Raises TypeError if `obj` is not an instance of :class:`~uuid.UUID`. + Raises :exc:`TypeError` if `obj` is not an instance of :class:`~uuid.UUID`. :Parameters: - `obj`: An instance of :class:`~uuid.UUID`. + + .. versionchanged:: 3.11 + Deprecated. The same functionality can be replicated using the + :meth:`~Binary.from_uuid` and :meth:`~Binary.to_uuid` methods with + :data:`~UuidRepresentation.PYTHON_LEGACY`. + .. versionadded:: 2.1 """ def __new__(cls, obj): + warn( + "The UUIDLegacy class has been deprecated and will be removed " + "in PyMongo 4.0. Use the Binary.from_uuid() and Binary.to_uuid() " + "with the appropriate UuidRepresentation to handle " + "legacy-formatted UUIDs instead.", + DeprecationWarning, stacklevel=2) if not isinstance(obj, UUID): raise TypeError("obj must be an instance of uuid.UUID") self = Binary.__new__(cls, obj.bytes, OLD_UUID_SUBTYPE) diff --git a/bson/codec_options.py b/bson/codec_options.py index 471d695a9..a514cc92d 100644 --- a/bson/codec_options.py +++ b/bson/codec_options.py @@ -15,14 +15,15 @@ """Tools for specifying BSON codec options.""" import datetime +import warnings from abc import abstractmethod from collections import namedtuple from bson.py3compat import ABC, abc, abstractproperty, string_type -from bson.binary import (ALL_UUID_REPRESENTATIONS, - PYTHON_LEGACY, +from bson.binary import (UuidRepresentation, + ALL_UUID_REPRESENTATIONS, UUID_REPRESENTATION_NAMES) @@ -239,7 +240,8 @@ class CodecOptions(_options_base): """ def __new__(cls, document_class=dict, - tz_aware=False, uuid_representation=PYTHON_LEGACY, + tz_aware=False, + uuid_representation=None, unicode_decode_error_handler="strict", tzinfo=None, type_registry=None): if not (issubclass(document_class, abc.MutableMapping) or @@ -249,9 +251,17 @@ class CodecOptions(_options_base): "sublass of collections.MutableMapping") if not isinstance(tz_aware, bool): raise TypeError("tz_aware must be True or False") - if uuid_representation not in ALL_UUID_REPRESENTATIONS: + if uuid_representation is None: + warnings.warn( + "Starting in PyMongo 4.0, the default uuidRepresentation " + "will be changed to 'unspecified'. Applications will need to " + "explicitly set 'uuidRepresentation=pythonLegacy' in the " + "connection string to preserve current behavior.", + DeprecationWarning, stacklevel=2) + uuid_representation = UuidRepresentation.PYTHON_LEGACY + elif uuid_representation not in ALL_UUID_REPRESENTATIONS: raise ValueError("uuid_representation must be a value " - "from bson.binary.ALL_UUID_REPRESENTATIONS") + "from bson.binary.UuidRepresentation") if not isinstance(unicode_decode_error_handler, (string_type, None)): raise ValueError("unicode_decode_error_handler must be a string " "or None") @@ -314,7 +324,8 @@ class CodecOptions(_options_base): ) -DEFAULT_CODEC_OPTIONS = CodecOptions() +DEFAULT_CODEC_OPTIONS = CodecOptions( + uuid_representation=UuidRepresentation.PYTHON_LEGACY) def _parse_codec_options(options): @@ -324,8 +335,7 @@ def _parse_codec_options(options): 'document_class', DEFAULT_CODEC_OPTIONS.document_class), tz_aware=options.get( 'tz_aware', DEFAULT_CODEC_OPTIONS.tz_aware), - uuid_representation=options.get( - 'uuidrepresentation', DEFAULT_CODEC_OPTIONS.uuid_representation), + uuid_representation=options.get('uuidrepresentation'), unicode_decode_error_handler=options.get( 'unicode_decode_error_handler', DEFAULT_CODEC_OPTIONS.unicode_decode_error_handler), diff --git a/bson/json_util.py b/bson/json_util.py index 35bdc3070..14c364e7f 100644 --- a/bson/json_util.py +++ b/bson/json_util.py @@ -110,14 +110,13 @@ import datetime import json import math import re -import sys import uuid from pymongo.errors import ConfigurationError import bson -from bson import EPOCH_AWARE, EPOCH_NAIVE, RE_TYPE, SON -from bson.binary import (Binary, JAVA_LEGACY, CSHARP_LEGACY, OLD_UUID_SUBTYPE, +from bson import EPOCH_AWARE, RE_TYPE, SON +from bson.binary import (Binary, UuidRepresentation, ALL_UUID_SUBTYPES, UUID_SUBTYPE) from bson.code import Code from bson.codec_options import CodecOptions @@ -245,9 +244,9 @@ class JSONOptions(CodecOptions): - `document_class`: BSON documents returned by :func:`loads` will be decoded to an instance of this class. Must be a subclass of :class:`collections.MutableMapping`. Defaults to :class:`dict`. - - `uuid_representation`: The BSON representation to use when encoding - and decoding instances of :class:`uuid.UUID`. Defaults to - :const:`~bson.binary.PYTHON_LEGACY`. + - `uuid_representation`: The :class:`~bson.binary.UuidRepresentation` + to use when encoding and decoding instances of :class:`uuid.UUID`. + Defaults to :const:`~bson.binary.UuidRepresentation.PYTHON_LEGACY`. - `tz_aware`: If ``True``, MongoDB Extended JSON's *Strict mode* type `Date` will be decoded to timezone aware instances of :class:`datetime.datetime`. Otherwise they will be naive. Defaults @@ -494,14 +493,20 @@ def _parse_legacy_uuid(doc): def _binary_or_uuid(data, subtype, json_options): # special handling for UUID - if subtype == OLD_UUID_SUBTYPE: - if json_options.uuid_representation == CSHARP_LEGACY: - return uuid.UUID(bytes_le=data) - if json_options.uuid_representation == JAVA_LEGACY: - data = data[7::-1] + data[:7:-1] - return uuid.UUID(bytes=data) - if subtype == UUID_SUBTYPE: - return uuid.UUID(bytes=data) + if subtype in ALL_UUID_SUBTYPES: + uuid_representation = json_options.uuid_representation + binary_value = Binary(data, subtype) + if uuid_representation == UuidRepresentation.UNSPECIFIED: + return binary_value + if subtype == UUID_SUBTYPE: + # Legacy behavior: use STANDARD with binary subtype 4. + uuid_representation = UuidRepresentation.STANDARD + elif uuid_representation == UuidRepresentation.STANDARD: + # subtype == OLD_UUID_SUBTYPE + # Legacy behavior: STANDARD is the same as PYTHON_LEGACY. + uuid_representation = UuidRepresentation.PYTHON_LEGACY + return binary_value.as_uuid(uuid_representation) + if PY3 and subtype == 0: return data return Binary(data, subtype) @@ -795,15 +800,9 @@ def default(obj, json_options=DEFAULT_JSON_OPTIONS): return _encode_binary(obj, 0, json_options) if isinstance(obj, uuid.UUID): if json_options.strict_uuid: - data = obj.bytes - subtype = OLD_UUID_SUBTYPE - if json_options.uuid_representation == CSHARP_LEGACY: - data = obj.bytes_le - elif json_options.uuid_representation == JAVA_LEGACY: - data = data[7::-1] + data[:7:-1] - elif json_options.uuid_representation == UUID_SUBTYPE: - subtype = UUID_SUBTYPE - return _encode_binary(data, subtype, json_options) + binval = Binary.from_uuid( + obj, uuid_representation=json_options.uuid_representation) + return _encode_binary(binval, binval.subtype, json_options) else: return {"$uuid": obj.hex} if isinstance(obj, Decimal128): diff --git a/doc/api/bson/binary.rst b/doc/api/bson/binary.rst index ab9d58f81..ab4d599f8 100644 --- a/doc/api/bson/binary.rst +++ b/doc/api/bson/binary.rst @@ -16,6 +16,9 @@ .. autodata:: MD5_SUBTYPE .. autodata:: USER_DEFINED_SUBTYPE + .. autoclass:: UuidRepresentation + :members: + .. autoclass:: Binary(data, subtype=BINARY_SUBTYPE) :members: :show-inheritance: diff --git a/pymongo/common.py b/pymongo/common.py index 945b1f692..d0177cea0 100644 --- a/pymongo/common.py +++ b/pymongo/common.py @@ -19,8 +19,7 @@ import datetime import warnings from bson import SON -from bson.binary import (STANDARD, PYTHON_LEGACY, - JAVA_LEGACY, CSHARP_LEGACY) +from bson.binary import UuidRepresentation from bson.codec_options import CodecOptions, TypeRegistry from bson.py3compat import abc, integer_types, iteritems, string_type, PY3 from bson.raw_bson import RawBSONDocument @@ -149,10 +148,11 @@ def raise_config_error(key, dummy): # Mapping of URI uuid representation options to valid subtypes. _UUID_REPRESENTATIONS = { - 'standard': STANDARD, - 'pythonLegacy': PYTHON_LEGACY, - 'javaLegacy': JAVA_LEGACY, - 'csharpLegacy': CSHARP_LEGACY + 'unspecified': UuidRepresentation.UNSPECIFIED, + 'standard': UuidRepresentation.STANDARD, + 'pythonLegacy': UuidRepresentation.PYTHON_LEGACY, + 'javaLegacy': UuidRepresentation.JAVA_LEGACY, + 'csharpLegacy': UuidRepresentation.CSHARP_LEGACY } diff --git a/test/test_binary.py b/test/test_binary.py index 392cd97c8..39de987c1 100644 --- a/test/test_binary.py +++ b/test/test_binary.py @@ -31,8 +31,10 @@ from bson.binary import * from bson.codec_options import CodecOptions from bson.py3compat import PY3 from bson.son import SON +from pymongo.common import validate_uuid_representation from pymongo.mongo_client import MongoClient -from test import client_context, unittest +from pymongo.write_concern import WriteConcern +from test import client_context, unittest, IntegrationTest from test.utils import ignore_deprecations @@ -144,11 +146,13 @@ class TestBinary(unittest.TestCase): self.assertEqual(hash(Binary(b"hello world", 42)), hash(two)) def test_uuid_subtype_4(self): - """uuid_representation should be ignored when decoding subtype 4.""" + """uuid_representation should be ignored when decoding subtype 4 for + all UuidRepresentation values except UNSPECIFIED.""" expected_uuid = uuid.uuid4() doc = {"uuid": Binary(expected_uuid.bytes, 4)} encoded = encode(doc) - for uuid_representation in ALL_UUID_REPRESENTATIONS: + for uuid_representation in (set(ALL_UUID_REPRESENTATIONS) - + {UuidRepresentation.UNSPECIFIED}): options = CodecOptions(uuid_representation=uuid_representation) self.assertEqual(expected_uuid, decode(encoded, options)["uuid"]) @@ -296,8 +300,9 @@ class TestBinary(unittest.TestCase): self.assertEqual(1, coll.count_documents({})) # Test UUIDLegacy queries. - coll = db.get_collection("test", - CodecOptions(uuid_representation=STANDARD)) + coll = db.get_collection( + "test", CodecOptions( + uuid_representation=UuidRepresentation.STANDARD)) self.assertEqual(0, coll.find({'uuid': uu}).count()) cur = coll.find({'uuid': UUIDLegacy(uu)}) self.assertEqual(1, cur.count()) @@ -364,5 +369,219 @@ class TestBinary(unittest.TestCase): self.assertEqual(b0, Binary(array.array('B', b'123'), 2)) +class TestUuidSpecExplicitCoding(unittest.TestCase): + @classmethod + def setUpClass(cls): + super(TestUuidSpecExplicitCoding, cls).setUpClass() + cls.uuid = uuid.UUID("00112233445566778899AABBCCDDEEFF") + + @staticmethod + def _hex_to_bytes(hexstring): + if PY3: + return bytes.fromhex(hexstring) + return hexstring.decode("hex") + + # Explicit encoding prose test #1 + def test_encoding_1(self): + obj = Binary.from_uuid(self.uuid) + expected_obj = Binary( + self._hex_to_bytes("00112233445566778899AABBCCDDEEFF"), 4) + self.assertEqual(obj, expected_obj) + + def _test_encoding_w_uuid_rep( + self, uuid_rep, expected_hexstring, expected_subtype): + obj = Binary.from_uuid(self.uuid, uuid_rep) + expected_obj = Binary( + self._hex_to_bytes(expected_hexstring), expected_subtype) + self.assertEqual(obj, expected_obj) + + # Explicit encoding prose test #2 + def test_encoding_2(self): + self._test_encoding_w_uuid_rep( + UuidRepresentation.STANDARD, + "00112233445566778899AABBCCDDEEFF", 4) + + # Explicit encoding prose test #3 + def test_encoding_3(self): + self._test_encoding_w_uuid_rep( + UuidRepresentation.JAVA_LEGACY, + "7766554433221100FFEEDDCCBBAA9988", 3) + + # Explicit encoding prose test #4 + def test_encoding_4(self): + self._test_encoding_w_uuid_rep( + UuidRepresentation.CSHARP_LEGACY, + "33221100554477668899AABBCCDDEEFF", 3) + + # Explicit encoding prose test #5 + def test_encoding_5(self): + self._test_encoding_w_uuid_rep( + UuidRepresentation.PYTHON_LEGACY, + "00112233445566778899AABBCCDDEEFF", 3) + + # Explicit encoding prose test #6 + def test_encoding_6(self): + with self.assertRaises(ValueError): + Binary.from_uuid(self.uuid, UuidRepresentation.UNSPECIFIED) + + # Explicit decoding prose test #1 + def test_decoding_1(self): + obj = Binary( + self._hex_to_bytes("00112233445566778899AABBCCDDEEFF"), 4) + + # Case i: + self.assertEqual(obj.as_uuid(), self.uuid) + # Case ii: + self.assertEqual(obj.as_uuid(UuidRepresentation.STANDARD), self.uuid) + # Cases iii-vi: + for uuid_rep in (UuidRepresentation.JAVA_LEGACY, + UuidRepresentation.CSHARP_LEGACY, + UuidRepresentation.PYTHON_LEGACY): + with self.assertRaises(ValueError): + obj.as_uuid(uuid_rep) + + def _test_decoding_legacy(self, hexstring, uuid_rep): + obj = Binary(self._hex_to_bytes(hexstring), 3) + + # Case i: + with self.assertRaises(ValueError): + obj.as_uuid() + # Cases ii-iii: + for rep in (UuidRepresentation.STANDARD, + UuidRepresentation.UNSPECIFIED): + with self.assertRaises(ValueError): + obj.as_uuid(rep) + # Case iv: + self.assertEqual(obj.as_uuid(uuid_rep), + self.uuid) + + # Explicit decoding prose test #2 + def test_decoding_2(self): + self._test_decoding_legacy( + "7766554433221100FFEEDDCCBBAA9988", + UuidRepresentation.JAVA_LEGACY) + + # Explicit decoding prose test #3 + def test_decoding_3(self): + self._test_decoding_legacy( + "33221100554477668899AABBCCDDEEFF", + UuidRepresentation.CSHARP_LEGACY) + + # Explicit decoding prose test #4 + def test_decoding_4(self): + self._test_decoding_legacy( + "00112233445566778899AABBCCDDEEFF", + UuidRepresentation.PYTHON_LEGACY) + + +class TestUuidSpecImplicitCoding(IntegrationTest): + @classmethod + def setUpClass(cls): + super(TestUuidSpecImplicitCoding, cls).setUpClass() + cls.uuid = uuid.UUID("00112233445566778899AABBCCDDEEFF") + + @staticmethod + def _hex_to_bytes(hexstring): + if PY3: + return bytes.fromhex(hexstring) + return hexstring.decode("hex") + + def _get_coll_w_uuid_rep(self, uuid_rep): + codec_options = self.client.codec_options.with_options( + uuid_representation=validate_uuid_representation(None, uuid_rep)) + coll = self.db.get_collection( + 'pymongo_test', codec_options=codec_options, + write_concern=WriteConcern("majority")) + return coll + + def _test_encoding(self, uuid_rep, expected_hexstring, expected_subtype): + coll = self._get_coll_w_uuid_rep(uuid_rep) + coll.delete_many({}) + coll.insert_one({'_id': self.uuid}) + self.assertTrue( + coll.find_one({"_id": Binary( + self._hex_to_bytes(expected_hexstring), expected_subtype)})) + + # Implicit encoding prose test #1 + def test_encoding_1(self): + self._test_encoding( + "javaLegacy", "7766554433221100FFEEDDCCBBAA9988", 3) + + # Implicit encoding prose test #2 + def test_encoding_2(self): + self._test_encoding( + "csharpLegacy", "33221100554477668899AABBCCDDEEFF", 3) + + # Implicit encoding prose test #3 + def test_encoding_3(self): + self._test_encoding( + "pythonLegacy", "00112233445566778899AABBCCDDEEFF", 3) + + # Implicit encoding prose test #4 + def test_encoding_4(self): + self._test_encoding( + "standard", "00112233445566778899AABBCCDDEEFF", 4) + + # Implicit encoding prose test #5 + def test_encoding_5(self): + with self.assertRaises(ValueError): + self._test_encoding( + "unspecifed", "dummy", -1) + + def _test_decoding(self, client_uuid_representation_string, + legacy_field_uuid_representation, + expected_standard_field_value, + expected_legacy_field_value): + coll = self._get_coll_w_uuid_rep(client_uuid_representation_string) + coll.drop() + + standard_val = Binary.from_uuid(self.uuid, UuidRepresentation.STANDARD) + legacy_val = Binary.from_uuid(self.uuid, legacy_field_uuid_representation) + coll.insert_one({'standard': standard_val, 'legacy': legacy_val}) + + doc = coll.find_one() + self.assertEqual(doc['standard'], expected_standard_field_value) + self.assertEqual(doc['legacy'], expected_legacy_field_value) + + # Implicit decoding prose test #1 + def test_decoding_1(self): + # TODO: these assertions will change after PYTHON-2245. Specifically, + # the 'standard' field will be decoded as a Binary subtype 4. + binary_value = Binary.from_uuid( + self.uuid, UuidRepresentation.PYTHON_LEGACY) + self._test_decoding( + "javaLegacy", UuidRepresentation.JAVA_LEGACY, + self.uuid, self.uuid) + self._test_decoding( + "csharpLegacy", UuidRepresentation.CSHARP_LEGACY, + self.uuid, self.uuid) + self._test_decoding( + "pythonLegacy", UuidRepresentation.PYTHON_LEGACY, + self.uuid, self.uuid) + + # Implicit decoding pose test #2 + def test_decoding_2(self): + # TODO: these assertions will change after PYTHON-2245. Specifically, + # the 'legacy' field will be decoded as a Binary subtype 3. + binary_value = Binary.from_uuid( + self.uuid, UuidRepresentation.PYTHON_LEGACY) + self._test_decoding( + "standard", UuidRepresentation.PYTHON_LEGACY, + self.uuid, binary_value.as_uuid(UuidRepresentation.PYTHON_LEGACY)) + + # Implicit decoding pose test #3 + def test_decoding_3(self): + expected_standard_value = Binary.from_uuid( + self.uuid, UuidRepresentation.STANDARD) + for legacy_uuid_rep in (UuidRepresentation.PYTHON_LEGACY, + UuidRepresentation.CSHARP_LEGACY, + UuidRepresentation.JAVA_LEGACY): + expected_legacy_value = Binary.from_uuid( + self.uuid, legacy_uuid_rep) + self._test_decoding( + "unspecified", legacy_uuid_rep, + expected_standard_value, expected_legacy_value) + + if __name__ == "__main__": unittest.main() diff --git a/test/test_bson.py b/test/test_bson.py index dd604c738..ad726f71b 100644 --- a/test/test_bson.py +++ b/test/test_bson.py @@ -649,7 +649,6 @@ class TestBSON(unittest.TestCase): decode(encode({"tuple": (1, 2)}))) def test_uuid(self): - id = uuid.uuid4() transformed_id = decode(encode({"id": id}))["id"] @@ -991,7 +990,6 @@ class TestCodecOptions(unittest.TestCase): self.assertTrue(CodecOptions(tz_aware=True).tz_aware) def test_uuid_representation(self): - self.assertRaises(ValueError, CodecOptions, uuid_representation=None) self.assertRaises(ValueError, CodecOptions, uuid_representation=7) self.assertRaises(ValueError, CodecOptions, uuid_representation=2) @@ -1003,7 +1001,7 @@ class TestCodecOptions(unittest.TestCase): def test_codec_options_repr(self): r = ("CodecOptions(document_class=dict, tz_aware=False, " - "uuid_representation=PYTHON_LEGACY, " + "uuid_representation=UuidRepresentation.PYTHON_LEGACY, " "unicode_decode_error_handler='strict', " "tzinfo=None, type_registry=TypeRegistry(type_codecs=[], " "fallback_encoder=None))") diff --git a/test/test_common.py b/test/test_common.py index 5175dd8bf..5a35fd8bb 100644 --- a/test/test_common.py +++ b/test/test_common.py @@ -25,7 +25,7 @@ from bson.codec_options import CodecOptions from bson.objectid import ObjectId from pymongo.errors import OperationFailure from pymongo.write_concern import WriteConcern -from test import client_context, IntegrationTest +from test import client_context, unittest, IntegrationTest from test.utils import connected, rs_or_single_client, single_client diff --git a/test/test_json_util.py b/test/test_json_util.py index 75b177e44..6499818b2 100644 --- a/test/test_json_util.py +++ b/test/test_json_util.py @@ -22,14 +22,11 @@ import uuid sys.path[0:0] = [""] -from pymongo.errors import ConfigurationError - -from bson import json_util, EPOCH_AWARE, EPOCH_NAIVE, SON +from bson import json_util, EPOCH_AWARE, SON from bson.json_util import (DatetimeRepresentation, STRICT_JSON_OPTIONS) from bson.binary import (ALL_UUID_REPRESENTATIONS, Binary, MD5_SUBTYPE, - USER_DEFINED_SUBTYPE, JAVA_LEGACY, CSHARP_LEGACY, - STANDARD) + USER_DEFINED_SUBTYPE, UuidRepresentation, STANDARD) from bson.code import Code from bson.dbref import DBRef from bson.int64 import Int64 @@ -271,7 +268,8 @@ class TestJsonUtil(unittest.TestCase): doc, json_util.loads( '{"uuid": ' '{"$binary": "9HrBC1jMQ3KlZw4CssPUeQ==", "$type": "03"}}')) - for uuid_representation in ALL_UUID_REPRESENTATIONS: + for uuid_representation in (set(ALL_UUID_REPRESENTATIONS) - + {UuidRepresentation.UNSPECIFIED}): options = json_util.JSONOptions( strict_uuid=True, uuid_representation=uuid_representation) self.round_trip(doc, json_options=options) @@ -281,6 +279,30 @@ class TestJsonUtil(unittest.TestCase): '{"$binary": "9HrBC1jMQ3KlZw4CssPUeQ==", "$type": "04"}}', json_options=options)) + def test_uuid_uuid_rep_unspecified(self): + _uuid = uuid.uuid4() + options = json_util.JSONOptions( + strict_uuid=True, + uuid_representation=UuidRepresentation.UNSPECIFIED) + + # Cannot directly encode native UUIDs with UNSPECIFIED. + doc = {'uuid': _uuid} + with self.assertRaises(ValueError): + json_util.dumps(doc, json_options=options) + + # All UUID subtypes are decoded as Binary with UNSPECIFIED. + # subtype 3 + doc = {'uuid': Binary(_uuid.bytes, subtype=3)} + ext_json_str = json_util.dumps(doc) + self.assertEqual( + doc, json_util.loads(ext_json_str, json_options=options)) + # subtype 4 + doc = {'uuid': Binary(_uuid.bytes, subtype=4)} + ext_json_str = json_util.dumps(doc) + self.assertEqual( + doc, json_util.loads(ext_json_str, json_options=options)) + + def test_binary(self): if PY3: bin_type_dict = {"bin": b"\x00\x01\x02\x03\x04"}