From 9da835ab3d6407d366c3db09e1862c7587d65f7f Mon Sep 17 00:00:00 2001 From: "A. Jesse Jiryu Davis" Date: Mon, 2 Feb 2015 22:00:15 -0500 Subject: [PATCH] PYTHON-825 BSON API changes and internal options handling. This change resolves four issues: PYTHON-826 The new codec_options submodule is moved from pymongo to bson. PYTHON-827 Use codec_options in BSON APIs. Functions and methods of the bson module that accepted the options as_class, tz_aware, and uuid_subtype now accept a codec_options parameter instead. For example, the function definition for bson.decode_all changes from this: def decode_all(data, as_class=dict, tz_aware=True, uuid_subtype=OLD_UUID_SUBTYPE) to: def decode_all(data, codec_options=CodecOptions()) The following functions are changed: - decode_all - decode_iter - decode_file_iter The following methods are changed: - BSON.encode - BSON.decode This is a breaking change for any application that uses the BSON API directly and changes any of the named parameter defaults. No changes are required for applications that use the default values for these options. The behavior remains the same. PYTHON-828 Internal BSON module changes to support CodecOptions The pure Python BSON module passes around a CodecOptions instance instead of as_class, tz_aware, and uuid_subtype. C extensions pass these values around in a struct. PYTHON-801 Rename uuid_subtype to uuid_representation. --- bson/__init__.py | 202 ++++++++++++++++++++++---------------- bson/_cbsonmodule.c | 178 +++++++++++++++++++++------------ bson/_cbsonmodule.h | 30 +++++- bson/binary.py | 5 + bson/codec_options.py | 62 ++++++------ doc/changelog.rst | 16 +++ pymongo/_cmessagemodule.c | 150 ++++++++++++++++++---------- pymongo/bulk.py | 6 +- pymongo/collection.py | 17 ++-- pymongo/cursor.py | 2 +- pymongo/helpers.py | 7 +- pymongo/message.py | 41 ++++---- pymongo/monitor.py | 4 +- pymongo/network.py | 4 +- test/test_binary.py | 78 +++++++++------ test/test_bson.py | 62 ++++++++++-- test/test_common.py | 3 - 17 files changed, 546 insertions(+), 321 deletions(-) diff --git a/bson/__init__.py b/bson/__init__.py index 4a79539ec..947365aed 100644 --- a/bson/__init__.py +++ b/bson/__init__.py @@ -31,6 +31,7 @@ from bson.binary import (Binary, OLD_UUID_SUBTYPE, JAVA_LEGACY, CSHARP_LEGACY, UUIDLegacy) from bson.code import Code +from bson.codec_options import CodecOptions, DEFAULT_CODEC_OPTIONS from bson.dbref import DBRef from bson.errors import (InvalidBSON, InvalidDocument, @@ -122,7 +123,7 @@ def _get_string(data, position, obj_end, dummy): def _get_object(data, position, obj_end, opts): - """Decode a BSON subdocument to as_class or bson.dbref.DBRef.""" + """Decode a BSON subdocument to opts.as_class or bson.dbref.DBRef.""" obj_size = _UNPACK_INT(data[position:position + 4])[0] end = position + obj_size - 1 if data[end:position + obj_size] != b"\x00": @@ -175,12 +176,12 @@ def _get_binary(data, position, dummy, opts): end = position + length if subtype in (3, 4): # Java Legacy - uuid_subtype = opts[2] - if uuid_subtype == JAVA_LEGACY: + uuid_representation = opts.uuid_representation + if uuid_representation == JAVA_LEGACY: java = data[position:end] value = uuid.UUID(bytes=java[0:8][::-1] + java[8:16][::-1]) # C# legacy - elif uuid_subtype == CSHARP_LEGACY: + elif uuid_representation == CSHARP_LEGACY: value = uuid.UUID(bytes_le=data[position:end]) # Python else: @@ -213,7 +214,7 @@ def _get_date(data, position, dummy, opts): diff = ((millis % 1000) + 1000) % 1000 seconds = (millis - diff) / 1000 micros = diff * 1000 - if opts[1]: + if opts.tz_aware: return EPOCH_AWARE + datetime.timedelta( seconds=seconds, microseconds=micros), end else: @@ -262,6 +263,11 @@ def _get_int64(data, position, dummy0, dummy1): return Int64(_UNPACK_LONG(data[position:end])[0]), end +# Each decoder function's signature is: +# - data: bytes +# - position: int, beginning of object in 'data' to decode +# - obj_end: int, end of object to decode in 'data' if variable-length type +# - opts: a CodecOptions _ELEMENT_GETTER = { BSONNUM: _get_float, BSONSTR: _get_string, @@ -297,7 +303,7 @@ def _element_to_dict(data, position, obj_end, opts): def _elements_to_dict(data, position, obj_end, opts): """Decode a BSON document.""" - result = opts[0]() + result = opts.as_class() end = obj_end - 1 while position < end: (key, value, position) = _element_to_dict(data, position, obj_end, opts) @@ -305,9 +311,8 @@ def _elements_to_dict(data, position, obj_end, opts): return result -def _bson_to_dict(data, as_class, tz_aware, uuid_subtype): +def _bson_to_dict(data, opts): """Decode a BSON string to as_class.""" - opts = (as_class, tz_aware, uuid_subtype) try: obj_size = _UNPACK_INT(data[:4])[0] except struct.error as e: @@ -417,38 +422,38 @@ else: return b"\x02" + name + _PACK_INT(len(value) + 1) + value + b"\x00" -def _encode_mapping(name, value, check_keys, uuid_subtype): +def _encode_mapping(name, value, check_keys, opts): """Encode a mapping type.""" - data = b"".join([_element_to_bson(key, val, check_keys, uuid_subtype) + data = b"".join([_element_to_bson(key, val, check_keys, opts) for key, val in iteritems(value)]) return b"\x03" + name + _PACK_INT(len(data) + 5) + data + b"\x00" -def _encode_dbref(name, value, check_keys, uuid_subtype): +def _encode_dbref(name, value, check_keys, opts): """Encode bson.dbref.DBRef.""" buf = bytearray(b"\x03" + name + b"\x00\x00\x00\x00") begin = len(buf) - 4 buf += _name_value_to_bson(b"$ref\x00", - value.collection, check_keys, uuid_subtype) + value.collection, check_keys, opts) buf += _name_value_to_bson(b"$id\x00", - value.id, check_keys, uuid_subtype) + value.id, check_keys, opts) if value.database is not None: buf += _name_value_to_bson( - b"$db\x00", value.database, check_keys, uuid_subtype) + b"$db\x00", value.database, check_keys, opts) for key, val in iteritems(value._DBRef__kwargs): - buf += _element_to_bson(key, val, check_keys, uuid_subtype) + buf += _element_to_bson(key, val, check_keys, opts) buf += b"\x00" buf[begin:begin + 4] = _PACK_INT(len(buf) - begin) return bytes(buf) -def _encode_list(name, value, check_keys, uuid_subtype): +def _encode_list(name, value, check_keys, opts): """Encode a list/tuple.""" lname = gen_list_name() data = b"".join([_name_value_to_bson(next(lname), item, - check_keys, uuid_subtype) + check_keys, opts) for item in value]) return b"\x04" + name + _PACK_INT(len(data) + 5) + data + b"\x00" @@ -467,18 +472,19 @@ def _encode_binary(name, value, dummy0, dummy1): return b"\x05" + name + _PACK_LENGTH_SUBTYPE(len(value), subtype) + value -def _encode_uuid(name, value, dummy, uuid_subtype): +def _encode_uuid(name, value, dummy, opts): """Encode uuid.UUID.""" + uuid_representation = opts.uuid_representation # Python Legacy Common Case - if uuid_subtype == OLD_UUID_SUBTYPE: + if uuid_representation == OLD_UUID_SUBTYPE: return b"\x05" + name + b'\x10\x00\x00\x00\x03' + value.bytes # Java Legacy - elif uuid_subtype == JAVA_LEGACY: + elif uuid_representation == JAVA_LEGACY: from_uuid = value.bytes data = from_uuid[0:8][::-1] + from_uuid[8:16][::-1] return b"\x05" + name + b'\x10\x00\x00\x00\x03' + data # C# legacy - elif uuid_subtype == CSHARP_LEGACY: + elif uuid_representation == CSHARP_LEGACY: # Microsoft GUID representation. return b"\x05" + name + b'\x10\x00\x00\x00\x03' + value.bytes_le # New @@ -537,13 +543,13 @@ def _encode_regex(name, value, dummy0, dummy1): return b"\x0B" + name + _make_c_string_check(value.pattern) + sflags -def _encode_code(name, value, dummy, uuid_subtype): +def _encode_code(name, value, dummy, opts): """Encode bson.code.Code.""" cstring = _make_c_string(value) cstrlen = len(cstring) if not value.scope: return b"\x0D" + name + _PACK_INT(cstrlen) + cstring - scope = _dict_to_bson(value.scope, False, uuid_subtype, False) + scope = _dict_to_bson(value.scope, False, opts, False) full_length = _PACK_INT(8 + cstrlen + len(scope)) return b"\x0F" + name + full_length + _PACK_INT(cstrlen) + cstring + scope @@ -582,6 +588,11 @@ def _encode_maxkey(name, dummy0, dummy1, dummy2): return b"\x7F" + name +# Each encoder function's signature is: +# - name: utf-8 bytes +# - value: a Python data type, e.g. a Python int for _encode_int +# - check_keys: bool, whether to check for invalid names +# - opts: a CodecOptions _ENCODERS = { bool: _encode_bool, bytes: _encode_bytes, @@ -628,13 +639,13 @@ if not PY3: _ENCODERS[long] = _encode_long -def _name_value_to_bson(name, value, check_keys, uuid_subtype): +def _name_value_to_bson(name, value, check_keys, opts): """Encode a single name, value pair.""" # First see if the type is already cached. KeyError will only ever # happen once per subtype. try: - return _ENCODERS[type(value)](name, value, check_keys, uuid_subtype) + return _ENCODERS[type(value)](name, value, check_keys, opts) except KeyError: pass @@ -646,7 +657,7 @@ def _name_value_to_bson(name, value, check_keys, uuid_subtype): func = _MARKERS[marker] # Cache this type for faster subsequent lookup. _ENCODERS[type(value)] = func - return func(name, value, check_keys, uuid_subtype) + return func(name, value, check_keys, opts) # If all else fails test each base type. This will only happen once for # a subtype of a supported base type. @@ -655,13 +666,13 @@ def _name_value_to_bson(name, value, check_keys, uuid_subtype): func = _ENCODERS[base] # Cache this type for faster subsequent lookup. _ENCODERS[type(value)] = func - return func(name, value, check_keys, uuid_subtype) + return func(name, value, check_keys, opts) raise InvalidDocument("cannot convert value of type %s to bson" % type(value)) -def _element_to_bson(key, value, check_keys, uuid_subtype): +def _element_to_bson(key, value, check_keys, opts): """Encode a single key, value pair.""" if not isinstance(key, string_type): raise InvalidDocument("documents must have only string keys, " @@ -673,20 +684,20 @@ def _element_to_bson(key, value, check_keys, uuid_subtype): raise InvalidDocument("key %r must not contain '.'" % (key,)) name = _make_name(key) - return _name_value_to_bson(name, value, check_keys, uuid_subtype) + return _name_value_to_bson(name, value, check_keys, opts) -def _dict_to_bson(doc, check_keys, uuid_subtype, top_level=True): +def _dict_to_bson(doc, check_keys, opts, top_level=True): """Encode a document to BSON.""" try: elements = [] if top_level and "_id" in doc: elements.append(_name_value_to_bson(b"_id\x00", doc["_id"], - check_keys, uuid_subtype)) + check_keys, opts)) for (key, value) in iteritems(doc): if not top_level or key != "_id": elements.append(_element_to_bson(key, value, - check_keys, uuid_subtype)) + check_keys, opts)) except AttributeError: raise TypeError("encoder expected a mapping type but got: %r" % (doc,)) @@ -696,8 +707,11 @@ if _USE_C: _dict_to_bson = _cbson._dict_to_bson -def decode_all(data, as_class=dict, - tz_aware=True, uuid_subtype=OLD_UUID_SUBTYPE): +_CODEC_OPTIONS_TYPE_ERROR = TypeError( + "codec_options must be an instance of CodecOptions") + + +def decode_all(data, codec_options=DEFAULT_CODEC_OPTIONS): """Decode BSON data to multiple documents. `data` must be a string of concatenated, valid, BSON-encoded @@ -705,12 +719,8 @@ def decode_all(data, as_class=dict, :Parameters: - `data`: BSON data - - `as_class` (optional): the class to use for the resulting - documents - - `tz_aware` (optional): if ``True``, return timezone-aware - :class:`~datetime.datetime` instances - - `uuid_subtype` (optional): The BSON representation to use for UUIDs. - See the :mod:`bson.binary` module for all options. + - `codec_options` (optional): An instance of + :class:`~bson.codec_options.CodecOptions`. .. versionchanged:: 3.0 Removed `compile_re` option: PyMongo now always represents BSON regular @@ -718,6 +728,9 @@ def decode_all(data, as_class=dict, :meth:`~bson.regex.Regex.try_compile` to attempt to convert from a BSON regular expression to a Python regular expression object. + Replaced `as_class`, `tz_aware`, and `uuid_subtype` options with + `codec_options`. + .. versionchanged:: 2.7 Added `compile_re` option. If set to False, PyMongo represented BSON regular expressions as :class:`~bson.regex.Regex` objects instead of @@ -727,7 +740,9 @@ def decode_all(data, as_class=dict, .. _PYTHON-500: https://jira.mongodb.org/browse/PYTHON-500 """ - opts = (as_class, tz_aware, uuid_subtype) + if not isinstance(codec_options, CodecOptions): + raise _CODEC_OPTIONS_TYPE_ERROR + docs = [] position = 0 end = len(data) - 1 @@ -739,7 +754,10 @@ def decode_all(data, as_class=dict, obj_end = position + obj_size - 1 if data[obj_end:position + obj_size] != b"\x00": raise InvalidBSON("bad eoo") - docs.append(_elements_to_dict(data, position + 4, obj_end, opts)) + docs.append(_elements_to_dict(data, + position + 4, + obj_end, + codec_options)) position += obj_size return docs except InvalidBSON: @@ -754,8 +772,7 @@ if _USE_C: decode_all = _cbson.decode_all -def decode_iter(data, as_class=dict, tz_aware=True, - uuid_subtype=OLD_UUID_SUBTYPE): +def decode_iter(data, codec_options=DEFAULT_CODEC_OPTIONS): """Decode BSON data to multiple documents as a generator. Works similarly to the decode_all function, but yields one document at a @@ -766,15 +783,18 @@ def decode_iter(data, as_class=dict, tz_aware=True, :Parameters: - `data`: BSON data - - `as_class` (optional): the class to use for the resulting - documents - - `tz_aware` (optional): if ``True``, return timezone-aware - :class:`~datetime.datetime` instances - - `uuid_subtype` (optional): The BSON representation to use for UUIDs. - See the :mod:`bson.binary` module for all options. + - `codec_options` (optional): An instance of + :class:`~bson.codec_options.CodecOptions`. + + .. versionchanged:: 3.0 + Replaced `as_class`, `tz_aware`, and `uuid_subtype` options with + `codec_options`. .. versionadded:: 2.8 """ + if not isinstance(codec_options, CodecOptions): + raise _CODEC_OPTIONS_TYPE_ERROR + position = 0 end = len(data) - 1 while position < end: @@ -782,12 +802,10 @@ def decode_iter(data, as_class=dict, tz_aware=True, elements = data[position:position + obj_size] position += obj_size - yield _bson_to_dict(elements, as_class, - tz_aware, uuid_subtype) + yield _bson_to_dict(elements, codec_options) -def decode_file_iter(file_obj, as_class=dict, tz_aware=True, - uuid_subtype=OLD_UUID_SUBTYPE): +def decode_file_iter(file_obj, codec_options=DEFAULT_CODEC_OPTIONS): """Decode bson data from a file to multiple documents as a generator. Works similarly to the decode_all function, but reads from the file object @@ -795,12 +813,12 @@ def decode_file_iter(file_obj, as_class=dict, tz_aware=True, :Parameters: - `file_obj`: A file object containing BSON data. - - `as_class` (optional): the class to use for the resulting - documents - - `tz_aware` (optional): if ``True``, return timezone-aware - :class:`~datetime.datetime` instances - - `uuid_subtype` (optional): The BSON representation to use for UUIDs. - See the :mod:`bson.binary` module for all options. + - `codec_options` (optional): An instance of + :class:`~bson.codec_options.CodecOptions`. + + .. versionchanged:: 3.0 + Replaced `as_class`, `tz_aware`, and `uuid_subtype` options with + `codec_options`. .. versionadded:: 2.8 """ @@ -813,8 +831,7 @@ def decode_file_iter(file_obj, as_class=dict, tz_aware=True, raise InvalidBSON("cut off in middle of objsize") obj_size = _UNPACK_INT(size_data)[0] - 4 elements = size_data + file_obj.read(obj_size) - yield _bson_to_dict(elements, as_class, - tz_aware, uuid_subtype) + yield _bson_to_dict(elements, codec_options) def is_valid(bson): @@ -831,7 +848,7 @@ def is_valid(bson): raise TypeError("BSON data must be an instance of a subclass of bytes") try: - _bson_to_dict(bson, dict, True, OLD_UUID_SUBTYPE) + _bson_to_dict(bson, DEFAULT_CODEC_OPTIONS) return True except Exception: return False @@ -842,7 +859,8 @@ class BSON(bytes): """ @classmethod - def encode(cls, document, check_keys=False, uuid_subtype=OLD_UUID_SUBTYPE): + def encode(cls, document, check_keys=False, + codec_options=DEFAULT_CODEC_OPTIONS): """Encode a document to a new :class:`BSON` instance. A document can be any mapping type (like :class:`dict`). @@ -858,34 +876,38 @@ class BSON(bytes): - `check_keys` (optional): check if keys start with '$' or contain '.', raising :class:`~bson.errors.InvalidDocument` in either case - - `uuid_subtype` (optional): The BSON representation to use for - UUIDs. See the :mod:`bson.binary` module for all options. - """ - return cls(_dict_to_bson(document, check_keys, uuid_subtype)) + - `codec_options` (optional): An instance of + :class:`~bson.codec_options.CodecOptions`. - def decode(self, as_class=dict, - tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE): + .. versionchanged:: 3.0 + Replaced `uuid_subtype` option with `codec_options`. + """ + if not isinstance(codec_options, CodecOptions): + raise _CODEC_OPTIONS_TYPE_ERROR + + return cls(_dict_to_bson(document, check_keys, codec_options)) + + def decode(self, codec_options=DEFAULT_CODEC_OPTIONS): """Decode this BSON data. - The default type to use for the resultant document is - :class:`dict`. Any other class that supports - :meth:`__setitem__` can be used instead by passing it as the - `as_class` parameter. + By default, returns a BSON document represented as a Python + :class:`dict`. To use a different :class:`MutableMapping` class, + configure a :class:`~bson.codec_options.CodecOptions`:: - If `tz_aware` is ``True`` (recommended), any - :class:`~datetime.datetime` instances returned will be - timezone-aware, with their timezone set to - :attr:`bson.tz_util.utc`. Otherwise (default), all - :class:`~datetime.datetime` instances will be naive (but - contain UTC). + >>> import collections # From Python standard library. + >>> import bson + >>> from bson.codec_options import CodecOptions + >>> data = bson.BSON.encode({'a': 1}) + >>> decoded_doc = bson.BSON.decode(data) + + >>> options = CodecOptions(as_class=collections.OrderedDict) + >>> decoded_doc = bson.BSON.decode(data, codec_options=options) + >>> type(decoded_doc) + :Parameters: - - `as_class` (optional): the class to use for the resulting - document - - `tz_aware` (optional): if ``True``, return timezone-aware - :class:`~datetime.datetime` instances - - `uuid_subtype` (optional): The BSON representation to use for - UUIDs. See the :mod:`bson.binary` module for all options. + - `codec_options` (optional): An instance of + :class:`~bson.codec_options.CodecOptions`. .. versionchanged:: 3.0 Removed `compile_re` option: PyMongo now always represents BSON @@ -893,6 +915,9 @@ class BSON(bytes): :meth:`~bson.regex.Regex.try_compile` to attempt to convert from a BSON regular expression to a Python regular expression object. + Replaced `as_class`, `tz_aware`, and `uuid_subtype` options with + `codec_options`. + .. versionchanged:: 2.7 Added `compile_re` option. If set to False, PyMongo represented BSON regular expressions as :class:`~bson.regex.Regex` objects instead of @@ -902,7 +927,10 @@ class BSON(bytes): .. _PYTHON-500: https://jira.mongodb.org/browse/PYTHON-500 """ - return _bson_to_dict(self, as_class, tz_aware, uuid_subtype) + if not isinstance(codec_options, CodecOptions): + raise _CODEC_OPTIONS_TYPE_ERROR + + return _bson_to_dict(self, codec_options) def has_c(): diff --git a/bson/_cbsonmodule.c b/bson/_cbsonmodule.c index 8129d3907..e367f3c3e 100644 --- a/bson/_cbsonmodule.c +++ b/bson/_cbsonmodule.c @@ -68,8 +68,14 @@ static struct module_state _state; /* Maximum number of regex flags */ #define FLAGS_SIZE 7 +/* Default UUID representation type code. */ +#define PYTHON_LEGACY 3 + +/* Other UUID representations. */ +#define STANDARD 4 #define JAVA_LEGACY 5 #define CSHARP_LEGACY 6 + #define BSON_MAX_SIZE 2147483647 /* The smallest possible BSON document, i.e. "{}" */ #define BSON_MIN_SIZE 5 @@ -104,15 +110,46 @@ _downcast_and_check(Py_ssize_t size, int extra) { return (int)size + extra; } +/* Fill out a codec_options_t* from a CodecOptions object. Use with the "O&" + * format spec in PyArg_ParseTuple. + * + * Return 1 on success. options->as_class is a new reference. + * Return 0 on failure. + */ +int convert_codec_options(PyObject* options_obj, void* p) { + codec_options_t* options = (codec_options_t*)p; + if (!PyArg_ParseTuple(options_obj, "Obb", + &options->as_class, + &options->tz_aware, + &options->uuid_rep)) { + return 0; + } + + Py_INCREF(options->as_class); + return 1; +} + +/* Fill out a codec_options_t* with default options. */ +void default_codec_options(codec_options_t* options) { + options->as_class = (PyObject*)&PyDict_Type; + Py_INCREF(options->as_class); + + // TODO: set to "1". PYTHON-526, setting tz_aware=True by default. + options->tz_aware = 0; + options->uuid_rep = PYTHON_LEGACY; +} + +void destroy_codec_options(codec_options_t* options) { + Py_CLEAR(options->as_class); +} + static PyObject* elements_to_dict(PyObject* self, const char* string, - unsigned max, PyObject* as_class, - unsigned char tz_aware, - unsigned char uuid_subtype); + unsigned max, const codec_options_t* options); static int _write_element_to_buffer(PyObject* self, buffer_t buffer, int type_byte, PyObject* value, unsigned char check_keys, - unsigned char uuid_subtype); + const codec_options_t* options); /* Date stuff */ static PyObject* datetime_from_millis(long long millis) { @@ -367,12 +404,12 @@ static int _load_python_objects(PyObject* module) { static int write_element_to_buffer(PyObject* self, buffer_t buffer, int type_byte, PyObject* value, unsigned char check_keys, - unsigned char uuid_subtype) { + const codec_options_t* options) { int result; if(Py_EnterRecursiveCall(" while encoding an object to BSON ")) return 0; result = _write_element_to_buffer(self, buffer, type_byte, - value, check_keys, uuid_subtype); + value, check_keys, options); Py_LeaveRecursiveCall(); return result; } @@ -556,7 +593,7 @@ static int _write_regex_to_buffer( static int _write_element_to_buffer(PyObject* self, buffer_t buffer, int type_byte, PyObject* value, unsigned char check_keys, - unsigned char uuid_subtype) { + const codec_options_t* options) { struct module_state *state = GETSTATE(self); PyObject* type_marker = NULL; PyObject* mapping_type; @@ -730,7 +767,7 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer, return 0; } - if (!write_dict(self, buffer, scope, 0, uuid_subtype, 0)) { + if (!write_dict(self, buffer, scope, 0, options, 0)) { Py_DECREF(scope); return 0; } @@ -799,7 +836,7 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer, if (!as_doc) { return 0; } - if (!write_dict(self, buffer, as_doc, 0, uuid_subtype, 0)) { + if (!write_dict(self, buffer, as_doc, 0, options, 0)) { Py_DECREF(as_doc); return 0; } @@ -879,7 +916,7 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer, return 1; } else if (PyDict_Check(value)) { *(buffer_get_buffer(buffer) + type_byte) = 0x03; - return write_dict(self, buffer, value, check_keys, uuid_subtype, 0); + return write_dict(self, buffer, value, check_keys, options, 0); } else if (PyList_Check(value) || PyTuple_Check(value)) { Py_ssize_t items, i; int start_position, @@ -923,7 +960,7 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer, if (!(item_value = PySequence_GetItem(value, i))) return 0; if (!write_element_to_buffer(self, buffer, list_type_byte, - item_value, check_keys, uuid_subtype)) { + item_value, check_keys, options)) { Py_DECREF(item_value); return 0; } @@ -1041,7 +1078,7 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer, return 0; } *(buffer_get_buffer(buffer) + type_byte) = 0x03; - return write_dict(self, buffer, value, check_keys, uuid_subtype, 0); + return write_dict(self, buffer, value, check_keys, options, 0); } uuid_type = _get_object(state->UUID, "uuid", "UUID"); @@ -1061,11 +1098,12 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer, return 0; } - if (uuid_subtype == JAVA_LEGACY || uuid_subtype == CSHARP_LEGACY) { + if (options->uuid_rep == JAVA_LEGACY + || options->uuid_rep == CSHARP_LEGACY) { subtype = 3; } else { - subtype = uuid_subtype; + subtype = options->uuid_rep; } *(buffer_get_buffer(buffer) + type_byte) = 0x05; @@ -1076,7 +1114,7 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer, return 0; } - if (uuid_subtype == CSHARP_LEGACY) { + if (options->uuid_rep == CSHARP_LEGACY) { /* Legacy C# byte order */ bytes = PyObject_GetAttrString(value, "bytes_le"); } @@ -1095,7 +1133,7 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer, Py_DECREF(bytes); return 0; } - if (uuid_subtype == JAVA_LEGACY) { + if (options->uuid_rep == JAVA_LEGACY) { /* Store in legacy java byte order. */ char as_legacy_java[16]; _fix_java(data, as_legacy_java); @@ -1166,7 +1204,7 @@ static int check_key_name(const char* name, int name_length) { * Returns 0 on failure */ int write_pair(PyObject* self, buffer_t buffer, const char* name, int name_length, PyObject* value, unsigned char check_keys, - unsigned char uuid_subtype, unsigned char allow_id) { + const codec_options_t* options, unsigned char allow_id) { int type_byte; /* Don't write any _id elements unless we're explicitly told to - @@ -1188,7 +1226,7 @@ int write_pair(PyObject* self, buffer_t buffer, const char* name, int name_lengt return 0; } if (!write_element_to_buffer(self, buffer, type_byte, - value, check_keys, uuid_subtype)) { + value, check_keys, options)) { return 0; } return 1; @@ -1197,7 +1235,8 @@ int write_pair(PyObject* self, buffer_t buffer, const char* name, int name_lengt int decode_and_write_pair(PyObject* self, buffer_t buffer, PyObject* key, PyObject* value, unsigned char check_keys, - unsigned char uuid_subtype, unsigned char top_level) { + const codec_options_t* options, + unsigned char top_level) { PyObject* encoded; const char* data; int size; @@ -1310,7 +1349,7 @@ int decode_and_write_pair(PyObject* self, buffer_t buffer, /* If top_level is True, don't allow writing _id here - it was already written. */ if (!write_pair(self, buffer, data, - size - 1, value, check_keys, uuid_subtype, !top_level)) { + size - 1, value, check_keys, options, !top_level)) { Py_DECREF(encoded); return 0; } @@ -1322,7 +1361,7 @@ int decode_and_write_pair(PyObject* self, buffer_t buffer, /* returns 0 on failure */ int write_dict(PyObject* self, buffer_t buffer, PyObject* dict, unsigned char check_keys, - unsigned char uuid_subtype, unsigned char top_level) { + const codec_options_t* options, unsigned char top_level) { PyObject* key; PyObject* iter; char zero = 0; @@ -1390,7 +1429,7 @@ int write_dict(PyObject* self, buffer_t buffer, return 0; } if (!write_pair(self, buffer, "_id", 3, - _id, check_keys, uuid_subtype, 1)) { + _id, check_keys, options, 1)) { Py_DECREF(_id); return 0; } @@ -1410,7 +1449,7 @@ int write_dict(PyObject* self, buffer_t buffer, return 0; } if (!decode_and_write_pair(self, buffer, key, value, - check_keys, uuid_subtype, top_level)) { + check_keys, options, top_level)) { Py_DECREF(key); Py_DECREF(value); Py_DECREF(iter); @@ -1434,22 +1473,23 @@ static PyObject* _cbson_dict_to_bson(PyObject* self, PyObject* args) { PyObject* dict; PyObject* result; unsigned char check_keys; - unsigned char uuid_subtype; unsigned char top_level = 1; + codec_options_t options; buffer_t buffer; - if (!PyArg_ParseTuple(args, "Obb|b", &dict, - &check_keys, &uuid_subtype, &top_level)) { + if (!PyArg_ParseTuple(args, "ObO&|b", &dict, &check_keys, + convert_codec_options, &options, &top_level)) { return NULL; } - buffer = buffer_new(); if (!buffer) { + destroy_codec_options(&options); PyErr_NoMemory(); return NULL; } - if (!write_dict(self, buffer, dict, check_keys, uuid_subtype, top_level)) { + if (!write_dict(self, buffer, dict, check_keys, &options, top_level)) { + destroy_codec_options(&options); buffer_free(buffer); return NULL; } @@ -1462,15 +1502,14 @@ static PyObject* _cbson_dict_to_bson(PyObject* self, PyObject* args) { result = Py_BuildValue("s#", buffer_get_buffer(buffer), buffer_get_position(buffer)); #endif + destroy_codec_options(&options); buffer_free(buffer); return result; } static PyObject* get_value(PyObject* self, const char* buffer, unsigned* position, unsigned char type, - unsigned max, PyObject* as_class, - unsigned char tz_aware, - unsigned char uuid_subtype) { + unsigned max, const codec_options_t* options) { struct module_state *state = GETSTATE(self); PyObject* value = NULL; @@ -1526,8 +1565,7 @@ static PyObject* get_value(PyObject* self, const char* buffer, goto invalid; } value = elements_to_dict(self, buffer + *position + 4, - size - 5, as_class, tz_aware, - uuid_subtype); + size - 5, options); if (!value) { goto invalid; } @@ -1625,8 +1663,7 @@ static PyObject* get_value(PyObject* self, const char* buffer, goto invalid; } to_append = get_value(self, buffer, position, bson_type, - max - (unsigned)key_size, - as_class, tz_aware, uuid_subtype); + max - (unsigned)key_size, options); Py_LeaveRecursiveCall(); if (!to_append) { Py_DECREF(value); @@ -1701,13 +1738,13 @@ static PyObject* get_value(PyObject* self, const char* buffer, * From this point, we hold refs to args, kwargs, and data. * If anything fails, goto uuiderror to clean them up. */ - if (uuid_subtype == CSHARP_LEGACY) { + if (options->uuid_rep == CSHARP_LEGACY) { /* Legacy C# byte order */ if ((PyDict_SetItemString(kwargs, "bytes_le", data)) == -1) goto uuiderror; } else { - if (uuid_subtype == JAVA_LEGACY) { + if (options->uuid_rep == JAVA_LEGACY) { /* Convert from legacy java byte order */ char big_endian[16]; _fix_java(buffer + *position, big_endian); @@ -1812,7 +1849,7 @@ static PyObject* get_value(PyObject* self, const char* buffer, memcpy(&millis, buffer + *position, 8); naive = datetime_from_millis(millis); *position += 8; - if (!tz_aware) { /* In the naive case, we're done here. */ + if (!options->tz_aware) { /* In the naive case, we're done here. */ value = naive; break; } @@ -2035,8 +2072,7 @@ static PyObject* get_value(PyObject* self, const char* buffer, goto invalid; } scope = elements_to_dict(self, buffer + *position + 4, - scope_size - 5, (PyObject*)&PyDict_Type, - tz_aware, uuid_subtype); + scope_size - 5, options); if (!scope) { Py_DECREF(code); goto invalid; @@ -2184,11 +2220,10 @@ static PyObject* get_value(PyObject* self, const char* buffer, } static PyObject* _elements_to_dict(PyObject* self, const char* string, - unsigned max, PyObject* as_class, - unsigned char tz_aware, - unsigned char uuid_subtype) { + unsigned max, + const codec_options_t* options) { unsigned position = 0; - PyObject* dict = PyObject_CallObject(as_class, NULL); + PyObject* dict = PyObject_CallObject(options->as_class, NULL); if (!dict) { return NULL; } @@ -2214,7 +2249,7 @@ static PyObject* _elements_to_dict(PyObject* self, const char* string, } position += (unsigned)name_length + 1; value = get_value(self, string, &position, type, - max - position, as_class, tz_aware, uuid_subtype); + max - position, options); if (!value) { Py_DECREF(name); Py_DECREF(dict); @@ -2229,14 +2264,12 @@ static PyObject* _elements_to_dict(PyObject* self, const char* string, } static PyObject* elements_to_dict(PyObject* self, const char* string, - unsigned max, PyObject* as_class, - unsigned char tz_aware, - unsigned char uuid_subtype) { + unsigned max, + const codec_options_t* options) { PyObject* result; if (Py_EnterRecursiveCall(" while decoding a BSON document")) return NULL; - result = _elements_to_dict(self, string, max, - as_class, tz_aware, uuid_subtype); + result = _elements_to_dict(self, string, max, options); Py_LeaveRecursiveCall(); return result; } @@ -2246,12 +2279,11 @@ static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) { Py_ssize_t total_size; const char* string; PyObject* bson; - PyObject* as_class; - unsigned char tz_aware; - unsigned char uuid_subtype; + codec_options_t options; + PyObject* result; if (!PyArg_ParseTuple( - args, "OObb", &bson, &as_class, &tz_aware, &uuid_subtype)) { + args, "OO&", &bson, convert_codec_options, &options)) { return NULL; } @@ -2262,6 +2294,7 @@ static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) { if (!PyString_Check(bson)) { PyErr_SetString(PyExc_TypeError, "argument to _bson_to_dict must be a string"); #endif + destroy_codec_options(&options); return NULL; } #if PY_MAJOR_VERSION >= 3 @@ -2276,6 +2309,7 @@ static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) { "not enough data for a BSON document"); Py_DECREF(InvalidBSON); } + destroy_codec_options(&options); return NULL; } @@ -2285,6 +2319,7 @@ static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) { string = PyString_AsString(bson); #endif if (!string) { + destroy_codec_options(&options); return NULL; } @@ -2295,6 +2330,7 @@ static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) { PyErr_SetString(InvalidBSON, "invalid message size"); Py_DECREF(InvalidBSON); } + destroy_codec_options(&options); return NULL; } @@ -2304,6 +2340,7 @@ static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) { PyErr_SetString(InvalidBSON, "objsize too large"); Py_DECREF(InvalidBSON); } + destroy_codec_options(&options); return NULL; } @@ -2313,11 +2350,13 @@ static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) { PyErr_SetString(InvalidBSON, "bad eoo"); Py_DECREF(InvalidBSON); } + destroy_codec_options(&options); return NULL; } - return elements_to_dict(self, string + 4, (unsigned)size - 5, - as_class, tz_aware, uuid_subtype); + result = elements_to_dict(self, string + 4, (unsigned)size - 5, &options); + destroy_codec_options(&options); + return result; } static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) { @@ -2327,16 +2366,18 @@ static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) { PyObject* bson; PyObject* dict; PyObject* result; - PyObject* as_class = (PyObject*)&PyDict_Type; - unsigned char tz_aware = 1; - unsigned char uuid_subtype = 3; + codec_options_t options; if (!PyArg_ParseTuple( - args, "O|Obb", - &bson, &as_class, &tz_aware, &uuid_subtype)) { + args, "O|O&", + &bson, convert_codec_options, &options)) { return NULL; } + if (PyTuple_GET_SIZE(args) < 2) { + default_codec_options(&options); + } + #if PY_MAJOR_VERSION >= 3 if (!PyBytes_Check(bson)) { PyErr_SetString(PyExc_TypeError, "argument to decode_all must be a bytes object"); @@ -2357,8 +2398,10 @@ static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) { return NULL; } - if (!(result = PyList_New(0))) + if (!(result = PyList_New(0))) { + destroy_codec_options(&options); return NULL; + } while (total_size > 0) { if (total_size < BSON_MIN_SIZE) { @@ -2368,6 +2411,7 @@ static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) { "not enough data for a BSON document"); Py_DECREF(InvalidBSON); } + destroy_codec_options(&options); Py_DECREF(result); return NULL; } @@ -2379,6 +2423,7 @@ static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) { PyErr_SetString(InvalidBSON, "invalid message size"); Py_DECREF(InvalidBSON); } + destroy_codec_options(&options); Py_DECREF(result); return NULL; } @@ -2389,6 +2434,7 @@ static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) { PyErr_SetString(InvalidBSON, "objsize too large"); Py_DECREF(InvalidBSON); } + destroy_codec_options(&options); Py_DECREF(result); return NULL; } @@ -2399,14 +2445,15 @@ static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) { PyErr_SetString(InvalidBSON, "bad eoo"); Py_DECREF(InvalidBSON); } + destroy_codec_options(&options); Py_DECREF(result); return NULL; } - dict = elements_to_dict(self, string + 4, (unsigned)size - 5, - as_class, tz_aware, uuid_subtype); + dict = elements_to_dict(self, string + 4, (unsigned)size - 5, &options); if (!dict) { Py_DECREF(result); + destroy_codec_options(&options); return NULL; } PyList_Append(result, dict); @@ -2415,6 +2462,7 @@ static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) { total_size -= size; } + destroy_codec_options(&options); return result; } @@ -2494,6 +2542,8 @@ init_cbson(void) _cbson_API[_cbson_write_dict_INDEX] = (void *) write_dict; _cbson_API[_cbson_write_pair_INDEX] = (void *) write_pair; _cbson_API[_cbson_decode_and_write_pair_INDEX] = (void *) decode_and_write_pair; + _cbson_API[_cbson_convert_codec_options_INDEX] = (void *) convert_codec_options; + _cbson_API[_cbson_destroy_codec_options_INDEX] = (void *) destroy_codec_options; #if PY_VERSION_HEX >= 0x03010000 /* PyCapsule is new in python 3.1 */ diff --git a/bson/_cbsonmodule.h b/bson/_cbsonmodule.h index 0b207a32a..b363b926c 100644 --- a/bson/_cbsonmodule.h +++ b/bson/_cbsonmodule.h @@ -52,6 +52,12 @@ typedef int Py_ssize_t; #define STRCAT(dest, n, src) strcat((dest), (src)) #endif +typedef struct codec_options_t { + PyObject* as_class; + unsigned char tz_aware; + unsigned char uuid_rep; +} codec_options_t; + /* C API functions */ #define _cbson_buffer_write_bytes_INDEX 0 #define _cbson_buffer_write_bytes_RETURN int @@ -59,18 +65,26 @@ typedef int Py_ssize_t; #define _cbson_write_dict_INDEX 1 #define _cbson_write_dict_RETURN int -#define _cbson_write_dict_PROTO (PyObject* self, buffer_t buffer, PyObject* dict, unsigned char check_keys, unsigned char uuid_subtype, unsigned char top_level) +#define _cbson_write_dict_PROTO (PyObject* self, buffer_t buffer, PyObject* dict, unsigned char check_keys, const codec_options_t* options, unsigned char top_level) #define _cbson_write_pair_INDEX 2 #define _cbson_write_pair_RETURN int -#define _cbson_write_pair_PROTO (PyObject* self, buffer_t buffer, const char* name, int name_length, PyObject* value, unsigned char check_keys, unsigned char uuid_subtype, unsigned char allow_id) +#define _cbson_write_pair_PROTO (PyObject* self, buffer_t buffer, const char* name, int name_length, PyObject* value, unsigned char check_keys, const codec_options_t* options, unsigned char allow_id) #define _cbson_decode_and_write_pair_INDEX 3 #define _cbson_decode_and_write_pair_RETURN int -#define _cbson_decode_and_write_pair_PROTO (PyObject* self, buffer_t buffer, PyObject* key, PyObject* value, unsigned char check_keys, unsigned char uuid_subtype, unsigned char top_level) +#define _cbson_decode_and_write_pair_PROTO (PyObject* self, buffer_t buffer, PyObject* key, PyObject* value, unsigned char check_keys, const codec_options_t* options, unsigned char top_level) + +#define _cbson_convert_codec_options_INDEX 4 +#define _cbson_convert_codec_options_RETURN int +#define _cbson_convert_codec_options_PROTO (PyObject* options_obj, void* p) + +#define _cbson_destroy_codec_options_INDEX 5 +#define _cbson_destroy_codec_options_RETURN void +#define _cbson_destroy_codec_options_PROTO (codec_options_t* options) /* Total number of C API pointers */ -#define _cbson_API_POINTER_COUNT 4 +#define _cbson_API_POINTER_COUNT 6 #ifdef _CBSON_MODULE /* This section is used when compiling _cbsonmodule */ @@ -83,6 +97,10 @@ static _cbson_write_pair_RETURN write_pair _cbson_write_pair_PROTO; static _cbson_decode_and_write_pair_RETURN decode_and_write_pair _cbson_decode_and_write_pair_PROTO; +static _cbson_convert_codec_options_RETURN convert_codec_options _cbson_convert_codec_options_PROTO; + +static _cbson_destroy_codec_options_RETURN destroy_codec_options _cbson_destroy_codec_options_PROTO; + #else /* This section is used in modules that use _cbsonmodule's API */ @@ -96,6 +114,10 @@ static void **_cbson_API; #define decode_and_write_pair (*(_cbson_decode_and_write_pair_RETURN (*)_cbson_decode_and_write_pair_PROTO) _cbson_API[_cbson_decode_and_write_pair_INDEX]) +#define convert_codec_options (*(_cbson_convert_codec_options_RETURN (*)_cbson_convert_codec_options_PROTO) _cbson_API[_cbson_convert_codec_options_INDEX]) + +#define destroy_codec_options (*(_cbson_destroy_codec_options_RETURN (*)_cbson_destroy_codec_options_PROTO) _cbson_API[_cbson_destroy_codec_options_INDEX]) + #define _cbson_IMPORT _cbson_API = (void **)PyCapsule_Import("_cbson._C_API", 0) #endif diff --git a/bson/binary.py b/bson/binary.py index 2ac1dfea9..679d0398d 100644 --- a/bson/binary.py +++ b/bson/binary.py @@ -98,6 +98,11 @@ byte order and binary subtype :data:`OLD_UUID_SUBTYPE`. ALL_UUID_SUBTYPES = (OLD_UUID_SUBTYPE, UUID_SUBTYPE) ALL_UUID_REPRESENTATIONS = (STANDARD, PYTHON_LEGACY, JAVA_LEGACY, CSHARP_LEGACY) +UUID_REPRESENTATION_NAMES = { + PYTHON_LEGACY: 'PYTHON_LEGACY', + STANDARD: 'STANDARD', + JAVA_LEGACY: 'JAVA_LEGACY', + CSHARP_LEGACY: 'CSHARP_LEGACY'} MD5_SUBTYPE = 5 """BSON binary subtype for an MD5 hash. diff --git a/bson/codec_options.py b/bson/codec_options.py index e75ca07d0..2c6260373 100644 --- a/bson/codec_options.py +++ b/bson/codec_options.py @@ -14,12 +14,18 @@ """Tools for specifying BSON codec options.""" -from collections import MutableMapping +from collections import MutableMapping, namedtuple -from bson.binary import ALL_UUID_REPRESENTATIONS, PYTHON_LEGACY +from bson.binary import (ALL_UUID_REPRESENTATIONS, + PYTHON_LEGACY, + UUID_REPRESENTATION_NAMES) -class CodecOptions(object): +_options_base = namedtuple('CodecOptions', + ('as_class', 'tz_aware', 'uuid_representation')) + + +class CodecOptions(_options_base): """Encapsulates BSON options used in CRUD operations. :Parameters: @@ -34,10 +40,8 @@ class CodecOptions(object): :data:`~bson.binary.PYTHON_LEGACY`. """ - __slots__ = ("__as_class", "__tz_aware", "__uuid_rep") - - def __init__(self, as_class=dict, - tz_aware=False, uuid_representation=PYTHON_LEGACY): + def __new__(cls, as_class=dict, + tz_aware=False, uuid_representation=PYTHON_LEGACY): if not issubclass(as_class, MutableMapping): raise TypeError("document_class must be a " "subclass of MutableMapping") @@ -47,39 +51,29 @@ class CodecOptions(object): raise ValueError("uuid_representation must be a value " "from bson.binary.ALL_UUID_REPRESENTATIONS") - self.__as_class = as_class - self.__tz_aware = tz_aware - self.__uuid_rep = uuid_representation + return tuple.__new__(cls, (as_class, tz_aware, uuid_representation)) - @property - def as_class(self): - """Read only property for as_class.""" - return self.__as_class + def __repr__(self): + as_class_repr = ( + 'dict' if self.as_class is dict else repr(self.as_class)) - @property - def tz_aware(self): - """Read only property for tz_aware.""" - return self.__tz_aware + uuid_rep_repr = UUID_REPRESENTATION_NAMES.get(self.uuid_representation, + self.uuid_representation) - @property - def uuid_representation(self): - """Read only property for uuid_representation.""" - return self.__uuid_rep + return ( + 'CodecOptions(as_class=%s, tz_aware=%r, uuid_representation=%s)' + % (as_class_repr, self.tz_aware, uuid_rep_repr)) - def __eq__(self, other): - if isinstance(other, CodecOptions): - return (self.__as_class == other.as_class and - self.__tz_aware == other.tz_aware and - self.__uuid_rep == other.uuid_representation) - raise NotImplementedError - def __ne__(self, other): - return self != other +DEFAULT_CODEC_OPTIONS = CodecOptions() def _parse_codec_options(options): """Parse BSON codec options.""" - as_class = options.get('document_class', dict) - tz_aware = options.get('tz_aware', False) - uuid_rep = options.get('uuidrepresentation', PYTHON_LEGACY) - return CodecOptions(as_class, tz_aware, uuid_rep) + return CodecOptions( + as_class=options.get( + 'document_class', DEFAULT_CODEC_OPTIONS.as_class), + tz_aware=options.get( + 'tz_aware', DEFAULT_CODEC_OPTIONS.tz_aware), + uuid_representation=options.get( + 'uuidrepresentation', DEFAULT_CODEC_OPTIONS.uuid_representation)) diff --git a/doc/changelog.rst b/doc/changelog.rst index ceea0524d..df7d1ca90 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -111,6 +111,22 @@ patterns, see `PYTHON-500`_. Use :meth:`~bson.regex.Regex.try_compile` to attempt to convert from a BSON regular expression to a Python regular expression object. +The `as_class`, `tz_aware`, and `uuid_subtype` options are removed from all +BSON encoding and decoding methods. Use +:class:`~bson.codec_options.CodecOptions` to configure these options. The +APIs affected are: + +- :func:`~bson.decode_all` +- :func:`~bson.decode_iter` +- :func:`~bson.decode_file_iter` +- :meth:`~bson.BSON.encode` +- :meth:`~bson.BSON.decode` + +This is a breaking change for any application that uses the BSON API directly +and changes any of the named parameter defaults. No changes are required for +applications that use the default values for these options. The behavior +remains the same. + .. _PYTHON-500: https://jira.mongodb.org/browse/PYTHON-500 Issues Resolved diff --git a/pymongo/_cmessagemodule.c b/pymongo/_cmessagemodule.c index 4df393d2b..1409108e5 100644 --- a/pymongo/_cmessagemodule.c +++ b/pymongo/_cmessagemodule.c @@ -64,7 +64,8 @@ static PyObject* _error(char* name) { /* add a lastError message on the end of the buffer. * returns 0 on failure */ static int add_last_error(PyObject* self, buffer_t buffer, - int request_id, char* ns, int nslen, PyObject* args) { + int request_id, char* ns, int nslen, + codec_options_t* options, PyObject* args) { struct module_state *state = GETSTATE(self); int message_start; @@ -110,7 +111,9 @@ static int add_last_error(PyObject* self, buffer_t buffer, /* getlasterror: 1 */ if (!(one = PyLong_FromLong(1))) return 0; - if (!write_pair(state->_cbson, buffer, "getlasterror", 12, one, 0, 4, 1)) { + + if (!write_pair(state->_cbson, buffer, "getlasterror", 12, one, 0, + options, 1)) { Py_DECREF(one); return 0; } @@ -118,7 +121,8 @@ static int add_last_error(PyObject* self, buffer_t buffer, /* getlasterror options */ while (PyDict_Next(args, &pos, &key, &value)) { - if (!decode_and_write_pair(state->_cbson, buffer, key, value, 0, 4, 0)) { + if (!decode_and_write_pair(state->_cbson, buffer, key, value, 0, + options, 0)) { return 0; } } @@ -170,42 +174,44 @@ static PyObject* _cbson_insert_message(PyObject* self, PyObject* args) { PyObject* doc; PyObject* iterator; int before, cur_size, max_size = 0; - int options = 0; + int flags = 0; unsigned char check_keys; unsigned char safe; unsigned char continue_on_error; - unsigned char uuid_subtype; + codec_options_t options; PyObject* last_error_args; buffer_t buffer; int length_location, message_length; PyObject* result; - if (!PyArg_ParseTuple(args, "et#ObbObb", + if (!PyArg_ParseTuple(args, "et#ObbObO&", "utf-8", &collection_name, &collection_name_length, &docs, &check_keys, &safe, &last_error_args, - &continue_on_error, &uuid_subtype)) { + &continue_on_error, + convert_codec_options, &options)) { return NULL; } if (continue_on_error) { - options += 1; + flags += 1; } - buffer = buffer_new(); if (!buffer) { PyErr_NoMemory(); + destroy_codec_options(&options); PyMem_Free(collection_name); return NULL; } length_location = init_insert_buffer(buffer, request_id, - options, + flags, collection_name, collection_name_length); if (length_location == -1) { + destroy_codec_options(&options); PyMem_Free(collection_name); buffer_free(buffer); return NULL; @@ -218,15 +224,18 @@ static PyObject* _cbson_insert_message(PyObject* self, PyObject* args) { PyErr_SetString(InvalidOperation, "input is not iterable"); Py_DECREF(InvalidOperation); } + destroy_codec_options(&options); buffer_free(buffer); PyMem_Free(collection_name); return NULL; } while ((doc = PyIter_Next(iterator)) != NULL) { before = buffer_get_position(buffer); - if (!write_dict(state->_cbson, buffer, doc, check_keys, uuid_subtype, 1)) { + if (!write_dict(state->_cbson, buffer, doc, check_keys, + &options, 1)) { Py_DECREF(doc); Py_DECREF(iterator); + destroy_codec_options(&options); buffer_free(buffer); PyMem_Free(collection_name); return NULL; @@ -238,6 +247,7 @@ static PyObject* _cbson_insert_message(PyObject* self, PyObject* args) { Py_DECREF(iterator); if (PyErr_Occurred()) { + destroy_codec_options(&options); buffer_free(buffer); PyMem_Free(collection_name); return NULL; @@ -249,6 +259,7 @@ static PyObject* _cbson_insert_message(PyObject* self, PyObject* args) { PyErr_SetString(InvalidOperation, "cannot do an empty bulk insert"); Py_DECREF(InvalidOperation); } + destroy_codec_options(&options); buffer_free(buffer); PyMem_Free(collection_name); return NULL; @@ -259,7 +270,8 @@ static PyObject* _cbson_insert_message(PyObject* self, PyObject* args) { if (safe) { if (!add_last_error(self, buffer, request_id, collection_name, - collection_name_length, last_error_args)) { + collection_name_length, &options, last_error_args)) { + destroy_codec_options(&options); buffer_free(buffer); PyMem_Free(collection_name); return NULL; @@ -273,6 +285,7 @@ static PyObject* _cbson_insert_message(PyObject* self, PyObject* args) { buffer_get_buffer(buffer), buffer_get_position(buffer), max_size); + destroy_codec_options(&options); buffer_free(buffer); return result; } @@ -291,31 +304,33 @@ static PyObject* _cbson_update_message(PyObject* self, PyObject* args) { unsigned char upsert; unsigned char safe; unsigned char check_keys; - unsigned char uuid_subtype; + codec_options_t options; PyObject* last_error_args; - int options; + int flags; buffer_t buffer; int length_location, message_length; PyObject* result; - if (!PyArg_ParseTuple(args, "et#bbOObObb", + if (!PyArg_ParseTuple(args, "et#bbOObObO&", "utf-8", &collection_name, &collection_name_length, &upsert, &multi, &spec, &doc, &safe, - &last_error_args, &check_keys, &uuid_subtype)) { + &last_error_args, &check_keys, + convert_codec_options, &options)) { return NULL; } - options = 0; + flags = 0; if (upsert) { - options += 1; + flags += 1; } if (multi) { - options += 2; + flags += 2; } buffer = buffer_new(); if (!buffer) { + destroy_codec_options(&options); PyErr_NoMemory(); PyMem_Free(collection_name); return NULL; @@ -324,6 +339,7 @@ static PyObject* _cbson_update_message(PyObject* self, PyObject* args) { // save space for message length length_location = buffer_save_space(buffer, 4); if (length_location == -1) { + destroy_codec_options(&options); PyMem_Free(collection_name); PyErr_NoMemory(); return NULL; @@ -337,14 +353,16 @@ static PyObject* _cbson_update_message(PyObject* self, PyObject* args) { !buffer_write_bytes(buffer, collection_name, collection_name_length + 1) || - !buffer_write_bytes(buffer, (const char*)&options, 4)) { + !buffer_write_bytes(buffer, (const char*)&flags, 4)) { + destroy_codec_options(&options); buffer_free(buffer); PyMem_Free(collection_name); return NULL; } before = buffer_get_position(buffer); - if (!write_dict(state->_cbson, buffer, spec, 0, uuid_subtype, 1)) { + if (!write_dict(state->_cbson, buffer, spec, 0, &options, 1)) { + destroy_codec_options(&options); buffer_free(buffer); PyMem_Free(collection_name); return NULL; @@ -352,7 +370,9 @@ static PyObject* _cbson_update_message(PyObject* self, PyObject* args) { max_size = buffer_get_position(buffer) - before; before = buffer_get_position(buffer); - if (!write_dict(state->_cbson, buffer, doc, check_keys, uuid_subtype, 1)) { + if (!write_dict(state->_cbson, buffer, doc, check_keys, + &options, 1)) { + destroy_codec_options(&options); buffer_free(buffer); PyMem_Free(collection_name); return NULL; @@ -365,7 +385,8 @@ static PyObject* _cbson_update_message(PyObject* self, PyObject* args) { if (safe) { if (!add_last_error(self, buffer, request_id, collection_name, - collection_name_length, last_error_args)) { + collection_name_length, &options, last_error_args)) { + destroy_codec_options(&options); buffer_free(buffer); PyMem_Free(collection_name); return NULL; @@ -379,6 +400,7 @@ static PyObject* _cbson_update_message(PyObject* self, PyObject* args) { buffer_get_buffer(buffer), buffer_get_position(buffer), max_size); + destroy_codec_options(&options); buffer_free(buffer); return result; } @@ -388,31 +410,33 @@ static PyObject* _cbson_query_message(PyObject* self, PyObject* args) { struct module_state *state = GETSTATE(self); int request_id = rand(); - unsigned int options; + unsigned int flags; char* collection_name = NULL; int collection_name_length; int begin, cur_size, max_size = 0; int num_to_skip; int num_to_return; PyObject* query; - PyObject* field_selector = Py_None; - unsigned char uuid_subtype = 3; + PyObject* field_selector; + codec_options_t options; buffer_t buffer; int length_location, message_length; PyObject* result; - if (!PyArg_ParseTuple(args, "Iet#iiO|Ob", - &options, + if (!PyArg_ParseTuple(args, "Iet#iiOOO&", + &flags, "utf-8", &collection_name, &collection_name_length, &num_to_skip, &num_to_return, - &query, &field_selector, &uuid_subtype)) { + &query, &field_selector, + convert_codec_options, &options)) { return NULL; } buffer = buffer_new(); if (!buffer) { PyErr_NoMemory(); + destroy_codec_options(&options); PyMem_Free(collection_name); return NULL; } @@ -420,24 +444,27 @@ static PyObject* _cbson_query_message(PyObject* self, PyObject* args) { // save space for message length length_location = buffer_save_space(buffer, 4); if (length_location == -1) { + destroy_codec_options(&options); PyMem_Free(collection_name); PyErr_NoMemory(); return NULL; } if (!buffer_write_bytes(buffer, (const char*)&request_id, 4) || !buffer_write_bytes(buffer, "\x00\x00\x00\x00\xd4\x07\x00\x00", 8) || - !buffer_write_bytes(buffer, (const char*)&options, 4) || + !buffer_write_bytes(buffer, (const char*)&flags, 4) || !buffer_write_bytes(buffer, collection_name, collection_name_length + 1) || !buffer_write_bytes(buffer, (const char*)&num_to_skip, 4) || !buffer_write_bytes(buffer, (const char*)&num_to_return, 4)) { + destroy_codec_options(&options); buffer_free(buffer); PyMem_Free(collection_name); return NULL; } begin = buffer_get_position(buffer); - if (!write_dict(state->_cbson, buffer, query, 0, uuid_subtype, 1)) { + if (!write_dict(state->_cbson, buffer, query, 0, &options, 1)) { + destroy_codec_options(&options); buffer_free(buffer); PyMem_Free(collection_name); return NULL; @@ -446,7 +473,9 @@ static PyObject* _cbson_query_message(PyObject* self, PyObject* args) { if (field_selector != Py_None) { begin = buffer_get_position(buffer); - if (!write_dict(state->_cbson, buffer, field_selector, 0, uuid_subtype, 1)) { + if (!write_dict(state->_cbson, buffer, field_selector, 0, + &options, 1)) { + destroy_codec_options(&options); buffer_free(buffer); PyMem_Free(collection_name); return NULL; @@ -465,6 +494,7 @@ static PyObject* _cbson_query_message(PyObject* self, PyObject* args) { buffer_get_buffer(buffer), buffer_get_position(buffer), max_size); + destroy_codec_options(&options); buffer_free(buffer); return result; } @@ -550,12 +580,13 @@ _set_document_too_large(int size, long max) { static PyObject* _send_insert(PyObject* self, PyObject* client, PyObject* gle_args, buffer_t buffer, - char* coll_name, int coll_len, int request_id, int safe) { + char* coll_name, int coll_len, int request_id, int safe, + codec_options_t* options) { PyObject* result; if (safe) { if (!add_last_error(self, buffer, request_id, - coll_name, coll_len, gle_args)) { + coll_name, coll_len, options, gle_args)) { return NULL; } } @@ -573,7 +604,7 @@ static PyObject* _cbson_do_batched_insert(PyObject* self, PyObject* args) { /* NOTE just using a random number as the request_id */ int request_id = rand(); - int send_safe, options = 0; + int send_safe, flags = 0; int length_location, message_length; int collection_name_length; char* collection_name = NULL; @@ -588,32 +619,32 @@ static PyObject* _cbson_do_batched_insert(PyObject* self, PyObject* args) { unsigned char check_keys; unsigned char safe; unsigned char continue_on_error; - unsigned char uuid_subtype; + codec_options_t options; unsigned char empty = 1; long max_bson_size; long max_message_size; buffer_t buffer; PyObject *exc_type = NULL, *exc_value = NULL, *exc_trace = NULL; - if (!PyArg_ParseTuple(args, "et#ObbObbO", + if (!PyArg_ParseTuple(args, "et#ObbObO&O", "utf-8", &collection_name, &collection_name_length, &docs, &check_keys, &safe, &last_error_args, &continue_on_error, - &uuid_subtype, &client)) { + convert_codec_options, &options, + &client)) { return NULL; } if (continue_on_error) { - options += 1; + flags += 1; } /* * If we are doing unacknowledged writes *and* continue_on_error * is True it's pointless (and slower) to send GLE. */ send_safe = (safe || !continue_on_error); - max_bson_size_obj = PyObject_GetAttrString(client, "max_bson_size"); #if PY_MAJOR_VERSION >= 3 max_bson_size = PyLong_AsLong(max_bson_size_obj); @@ -622,6 +653,7 @@ static PyObject* _cbson_do_batched_insert(PyObject* self, PyObject* args) { #endif Py_XDECREF(max_bson_size_obj); if (max_bson_size == -1) { + destroy_codec_options(&options); PyMem_Free(collection_name); return NULL; } @@ -634,12 +666,14 @@ static PyObject* _cbson_do_batched_insert(PyObject* self, PyObject* args) { #endif Py_XDECREF(max_message_size_obj); if (max_message_size == -1) { + destroy_codec_options(&options); PyMem_Free(collection_name); return NULL; } buffer = buffer_new(); if (!buffer) { + destroy_codec_options(&options); PyErr_NoMemory(); PyMem_Free(collection_name); return NULL; @@ -647,7 +681,7 @@ static PyObject* _cbson_do_batched_insert(PyObject* self, PyObject* args) { length_location = init_insert_buffer(buffer, request_id, - options, + flags, collection_name, collection_name_length); if (length_location == -1) { @@ -666,7 +700,8 @@ static PyObject* _cbson_do_batched_insert(PyObject* self, PyObject* args) { while ((doc = PyIter_Next(iterator)) != NULL) { int before = buffer_get_position(buffer); int cur_size; - if (!write_dict(state->_cbson, buffer, doc, check_keys, uuid_subtype, 1)) { + if (!write_dict(state->_cbson, buffer, doc, check_keys, + &options, 1)) { Py_DECREF(doc); goto iterfail; } @@ -682,7 +717,7 @@ static PyObject* _cbson_do_batched_insert(PyObject* self, PyObject* args) { &message_length, 4); result = _send_insert(self, client, last_error_args, buffer, collection_name, collection_name_length, - request_id, send_safe); + request_id, send_safe, &options); if (!result) goto iterfail; Py_DECREF(result); @@ -703,7 +738,7 @@ static PyObject* _cbson_do_batched_insert(PyObject* self, PyObject* args) { } message_start = init_insert_buffer(new_buffer, new_request_id, - options, + flags, collection_name, collection_name_length); if (message_start == -1) { @@ -725,7 +760,7 @@ static PyObject* _cbson_do_batched_insert(PyObject* self, PyObject* args) { result = _send_insert(self, client, last_error_args, buffer, collection_name, collection_name_length, - request_id, send_safe); + request_id, send_safe, &options); buffer_free(buffer); buffer = new_buffer; @@ -796,7 +831,7 @@ static PyObject* _cbson_do_batched_insert(PyObject* self, PyObject* args) { /* Send the last (or only) batch */ result = _send_insert(self, client, last_error_args, buffer, collection_name, collection_name_length, - request_id, safe); + request_id, safe, &options); PyMem_Free(collection_name); buffer_free(buffer); @@ -877,7 +912,7 @@ _command_buffer_new(char* ns, int ns_len) { if (!buffer_write_bytes(buffer, "\x00\x00\x00\x00" /* responseTo */ "\xd4\x07\x00\x00" /* opcode */ - "\x00\x00\x00\x00", /* options */ + "\x00\x00\x00\x00", /* flags */ 12) || !buffer_write_bytes(buffer, ns, ns_len + 1) || /* namespace */ @@ -920,14 +955,15 @@ _cbson_do_batched_write_command(PyObject* self, PyObject* args) { PyObject* results; unsigned char op; unsigned char check_keys; - unsigned char uuid_subtype; + codec_options_t options; unsigned char empty = 1; unsigned char errors = 0; buffer_t buffer; - if (!PyArg_ParseTuple(args, "et#bOObbO", "utf-8", - &ns, &ns_len, &op, &command, &docs, - &check_keys, &uuid_subtype, &client)) { + if (!PyArg_ParseTuple(args, "et#bOObO&O", "utf-8", + &ns, &ns_len, &op, &command, &docs, &check_keys, + convert_codec_options, &options, + &client)) { return NULL; } @@ -939,6 +975,7 @@ _cbson_do_batched_write_command(PyObject* self, PyObject* args) { #endif Py_XDECREF(max_bson_size_obj); if (max_bson_size == -1) { + destroy_codec_options(&options); PyMem_Free(ns); return NULL; } @@ -956,6 +993,7 @@ _cbson_do_batched_write_command(PyObject* self, PyObject* args) { #endif Py_XDECREF(max_write_batch_size_obj); if (max_write_batch_size == -1) { + destroy_codec_options(&options); PyMem_Free(ns); return NULL; } @@ -964,11 +1002,13 @@ _cbson_do_batched_write_command(PyObject* self, PyObject* args) { ordered = !((PyDict_GetItemString(command, "ordered")) == Py_False); if (!(results = PyList_New(0))) { + destroy_codec_options(&options); PyMem_Free(ns); return NULL; } if (!(buffer = _command_buffer_new(ns, ns_len))) { + destroy_codec_options(&options); PyMem_Free(ns); Py_DECREF(results); return NULL; @@ -978,7 +1018,8 @@ _cbson_do_batched_write_command(PyObject* self, PyObject* args) { /* Position of command document length */ cmd_len_loc = buffer_get_position(buffer); - if (!write_dict(state->_cbson, buffer, command, 0, uuid_subtype, 0)) { + if (!write_dict(state->_cbson, buffer, command, 0, + &options, 0)) { goto cmdfail; } @@ -1051,7 +1092,7 @@ _cbson_do_batched_write_command(PyObject* self, PyObject* args) { } cur_doc_begin = buffer_get_position(buffer); if (!write_dict(state->_cbson, buffer, doc, - check_keys, uuid_subtype, 1)) { + check_keys, &options, 1)) { Py_DECREF(doc); goto cmditerfail; } @@ -1130,6 +1171,7 @@ _cbson_do_batched_write_command(PyObject* self, PyObject* args) { Py_DECREF(result); if (errors && ordered) { + destroy_codec_options(&options); Py_DECREF(iterator); buffer_free(buffer); return results; @@ -1174,11 +1216,13 @@ _cbson_do_batched_write_command(PyObject* self, PyObject* args) { PyList_Append(results, result); Py_DECREF(result); + destroy_codec_options(&options); return results; cmditerfail: Py_DECREF(iterator); cmdfail: + destroy_codec_options(&options); Py_DECREF(results); buffer_free(buffer); return NULL; diff --git a/pymongo/bulk.py b/pymongo/bulk.py index 571425cec..8e44bb06e 100644 --- a/pymongo/bulk.py +++ b/pymongo/bulk.py @@ -506,7 +506,6 @@ class _Bulk(object): def execute_command(self, generator, write_concern): """Execute using write commands. """ - uuid_representation = self.collection.codec_options.uuid_representation client = self.collection.database.connection # nModified is only reported for write commands, not legacy ops. full_result = { @@ -527,7 +526,7 @@ class _Bulk(object): results = _do_batched_write_command( self.namespace, run.op_type, cmd, - run.ops, True, uuid_representation, client) + run.ops, True, self.collection.codec_options, client) _merge_command(run, full_result, results) # We're supposed to continue if errors are @@ -584,10 +583,9 @@ class _Bulk(object): # We have to do this here since Collection.insert # throws away results and we need to check for jnote. client = self.collection.database.connection - uuid_representation = self.collection.codec_options.uuid_representation return client._send_message( insert(self.name, [operation], True, True, - write_concern, False, uuid_representation), True) + write_concern, False, self.collection.codec_options), True) def execute_legacy(self, generator, write_concern): """Execute using legacy wire protocol ops. diff --git a/pymongo/collection.py b/pymongo/collection.py index d03ac74b9..157e9f69b 100644 --- a/pymongo/collection.py +++ b/pymongo/collection.py @@ -470,7 +470,6 @@ class Collection(common.BaseObject): .. mongodoc:: insert """ client = self.database.connection - uuid_representation = self.codec_options.uuid_representation docs = doc_or_docs return_one = False if isinstance(docs, collections.MutableMapping): @@ -511,14 +510,14 @@ class Collection(common.BaseObject): command['writeConcern'] = concern results = message._do_batched_write_command( - self.database.name + ".$cmd", _INSERT, command, - gen(), check_keys, uuid_representation, client) + self.database.name + ".$cmd", _INSERT, command, + gen(), check_keys, self.codec_options, client) _check_write_command_response(results) else: # Legacy batched OP_INSERT message._do_batched_insert(self.__full_name, gen(), check_keys, safe, concern, continue_on_error, - uuid_representation, client) + self.codec_options, client) if return_one: return ids[0] @@ -641,7 +640,6 @@ class Collection(common.BaseObject): check_keys = False client = self.database.connection - uuid_representation = self.codec_options.uuid_representation if client._writable_max_wire_version() > 1 and safe: # Update command command = SON([('update', self.name)]) @@ -653,7 +651,7 @@ class Collection(common.BaseObject): results = message._do_batched_write_command( self.database.name + '.$cmd', _UPDATE, command, - docs, check_keys, uuid_representation, client) + docs, check_keys, self.codec_options, client) _check_write_command_response(results) _, result = results[0] @@ -674,7 +672,7 @@ class Collection(common.BaseObject): return client._send_message( message.update(self.__full_name, upsert, multi, spec, document, safe, concern, - check_keys, uuid_representation), safe) + check_keys, self.codec_options), safe) def drop(self): """Alias for :meth:`~pymongo.database.Database.drop_collection`. @@ -754,7 +752,6 @@ class Collection(common.BaseObject): safe = concern.get("w") != 0 client = self.database.connection - uuid_representation = self.codec_options.uuid_representation if client._writable_max_wire_version() > 1 and safe: # Delete command command = SON([('delete', self.name)]) @@ -765,7 +762,7 @@ class Collection(common.BaseObject): results = message._do_batched_write_command( self.database.name + '.$cmd', _DELETE, command, - docs, False, uuid_representation, client) + docs, False, self.codec_options, client) _check_write_command_response(results) _, result = results[0] @@ -775,7 +772,7 @@ class Collection(common.BaseObject): # Legacy OP_DELETE return client._send_message( message.delete(self.__full_name, spec_or_id, safe, - concern, uuid_representation, + concern, self.codec_options, int(not multi)), safe) def find_one(self, filter=None, *args, **kwargs): diff --git a/pymongo/cursor.py b/pymongo/cursor.py index 0c9bb2c09..b983ccde4 100644 --- a/pymongo/cursor.py +++ b/pymongo/cursor.py @@ -912,7 +912,7 @@ class Cursor(object): self.__collection.full_name, self.__skip, ntoreturn, self.__query_spec(), self.__projection, - self.__codec_options.uuid_representation)) + self.__codec_options)) if not self.__id: self.__killed = True elif self.__id: # Get More diff --git a/pymongo/helpers.py b/pymongo/helpers.py index 0d5564647..1d1cf15a5 100644 --- a/pymongo/helpers.py +++ b/pymongo/helpers.py @@ -115,10 +115,7 @@ def _unpack_response(response, cursor_id=None, codec_options=CodecOptions()): result["cursor_id"] = struct.unpack(" client.max_bson_size) @@ -274,7 +279,7 @@ if _use_c: def _do_batched_write_command(namespace, operation, command, - docs, check_keys, uuid_subtype, client): + docs, check_keys, opts, client): """Execute a batch of insert, update, or delete commands. """ max_bson_size = client.max_bson_size @@ -350,7 +355,7 @@ def _do_batched_write_command(namespace, operation, command, has_docs = True # Encode the current operation key = b(str(idx)) - value = bson.BSON.encode(doc, check_keys, uuid_subtype) + value = bson.BSON.encode(doc, check_keys, opts) # Send a batch? enough_data = (buf.tell() + len(key) + len(value) + 2) >= max_cmd_size enough_documents = (idx >= max_write_batch_size) diff --git a/pymongo/monitor.py b/pymongo/monitor.py index 8cbbd8d69..e0b3ff1ca 100644 --- a/pymongo/monitor.py +++ b/pymongo/monitor.py @@ -16,6 +16,7 @@ import weakref +from bson.codec_options import DEFAULT_CODEC_OPTIONS from pymongo import common, helpers, message, periodic_executor from pymongo.server_type import SERVER_TYPE from pymongo.ismaster import IsMaster @@ -154,7 +155,8 @@ class Monitor(object): """ start = _time() request_id, msg, _ = message.query( - 0, 'admin.$cmd', 0, -1, {'ismaster': 1}) + 0, 'admin.$cmd', 0, -1, {'ismaster': 1}, + None, DEFAULT_CODEC_OPTIONS) sock_info.send_message(msg) raw_response = sock_info.receive_message(1, request_id) diff --git a/pymongo/network.py b/pymongo/network.py index 29844e7fb..9848259c9 100644 --- a/pymongo/network.py +++ b/pymongo/network.py @@ -17,6 +17,7 @@ import select import struct +from bson.codec_options import DEFAULT_CODEC_OPTIONS from pymongo import helpers, message from pymongo.errors import AutoReconnect @@ -32,7 +33,8 @@ def command(sock, dbname, spec): - `spec`: a command document as a dict, SON, or mapping object """ ns = dbname + '.$cmd' - request_id, msg, _ = message.query(0, ns, 0, -1, spec) + request_id, msg, _ = message.query(0, ns, 0, -1, spec, + None, DEFAULT_CODEC_OPTIONS) sock.sendall(msg) response = receive_message(sock, 1, request_id) unpacked = helpers._unpack_response(response)['data'][0] diff --git a/test/test_binary.py b/test/test_binary.py index a77a5c6b0..cd94df048 100644 --- a/test/test_binary.py +++ b/test/test_binary.py @@ -132,44 +132,55 @@ class TestBinary(unittest.TestCase): def test_legacy_java_uuid(self): # Test decoding data = self.java_data - docs = bson.decode_all(data, SON, False, PYTHON_LEGACY) + docs = bson.decode_all(data, CodecOptions(SON, False, PYTHON_LEGACY)) for d in docs: self.assertNotEqual(d['newguid'], uuid.UUID(d['newguidstring'])) - docs = bson.decode_all(data, SON, False, STANDARD) + docs = bson.decode_all(data, CodecOptions(SON, False, STANDARD)) for d in docs: self.assertNotEqual(d['newguid'], uuid.UUID(d['newguidstring'])) - docs = bson.decode_all(data, SON, False, CSHARP_LEGACY) + docs = bson.decode_all(data, CodecOptions(SON, False, CSHARP_LEGACY)) for d in docs: self.assertNotEqual(d['newguid'], uuid.UUID(d['newguidstring'])) - docs = bson.decode_all(data, SON, False, JAVA_LEGACY) + docs = bson.decode_all(data, CodecOptions(SON, False, JAVA_LEGACY)) for d in docs: self.assertEqual(d['newguid'], uuid.UUID(d['newguidstring'])) # Test encoding - encoded = b''.join([bson.BSON.encode(doc, - uuid_subtype=PYTHON_LEGACY) - for doc in docs]) + encoded = b''.join([ + bson.BSON.encode(doc, + False, + CodecOptions(uuid_representation=PYTHON_LEGACY)) + for doc in docs]) self.assertNotEqual(data, encoded) - encoded = b''.join([bson.BSON.encode(doc, uuid_subtype=STANDARD) - for doc in docs]) + encoded = b''.join( + [bson.BSON.encode(doc, + False, + CodecOptions(uuid_representation=STANDARD)) + for doc in docs]) self.assertNotEqual(data, encoded) - encoded = b''.join([bson.BSON.encode(doc, uuid_subtype=CSHARP_LEGACY) - for doc in docs]) + encoded = b''.join( + [bson.BSON.encode(doc, + False, + CodecOptions(uuid_representation=CSHARP_LEGACY)) + for doc in docs]) self.assertNotEqual(data, encoded) - encoded = b''.join([bson.BSON.encode(doc, uuid_subtype=JAVA_LEGACY) - for doc in docs]) + encoded = b''.join( + [bson.BSON.encode(doc, + False, + CodecOptions(uuid_representation=JAVA_LEGACY)) + for doc in docs]) self.assertEqual(data, encoded) @client_context.require_connection def test_legacy_java_uuid_roundtrip(self): data = self.java_data - docs = bson.decode_all(data, SON, False, JAVA_LEGACY) + docs = bson.decode_all(data, CodecOptions(SON, False, JAVA_LEGACY)) client_context.client.pymongo_test.drop_collection('java_uuid') db = client_context.client.pymongo_test @@ -191,44 +202,55 @@ class TestBinary(unittest.TestCase): data = self.csharp_data # Test decoding - docs = bson.decode_all(data, SON, False, PYTHON_LEGACY) + docs = bson.decode_all(data, CodecOptions(SON, False, PYTHON_LEGACY)) for d in docs: self.assertNotEqual(d['newguid'], uuid.UUID(d['newguidstring'])) - docs = bson.decode_all(data, SON, False, STANDARD) + docs = bson.decode_all(data, CodecOptions(SON, False, STANDARD)) for d in docs: self.assertNotEqual(d['newguid'], uuid.UUID(d['newguidstring'])) - docs = bson.decode_all(data, SON, False, JAVA_LEGACY) + docs = bson.decode_all(data, CodecOptions(SON, False, JAVA_LEGACY)) for d in docs: self.assertNotEqual(d['newguid'], uuid.UUID(d['newguidstring'])) - docs = bson.decode_all(data, SON, False, CSHARP_LEGACY) + docs = bson.decode_all(data, CodecOptions(SON, False, CSHARP_LEGACY)) for d in docs: self.assertEqual(d['newguid'], uuid.UUID(d['newguidstring'])) # Test encoding - encoded = b''.join([bson.BSON.encode(doc, - uuid_subtype=PYTHON_LEGACY) - for doc in docs]) + encoded = b''.join([ + bson.BSON.encode(doc, + False, + CodecOptions(uuid_representation=PYTHON_LEGACY)) + for doc in docs]) self.assertNotEqual(data, encoded) - encoded = b''.join([bson.BSON.encode(doc, uuid_subtype=STANDARD) - for doc in docs]) + encoded = b''.join([ + bson.BSON.encode(doc, + False, + CodecOptions(uuid_representation=STANDARD)) + for doc in docs]) self.assertNotEqual(data, encoded) - encoded = b''.join([bson.BSON.encode(doc, uuid_subtype=JAVA_LEGACY) - for doc in docs]) + encoded = b''.join( + [bson.BSON.encode(doc, + False, + CodecOptions(uuid_representation=JAVA_LEGACY)) + for doc in docs]) self.assertNotEqual(data, encoded) - encoded = b''.join([bson.BSON.encode(doc, uuid_subtype=CSHARP_LEGACY) - for doc in docs]) + encoded = b''.join( + [bson.BSON.encode(doc, + False, + CodecOptions(uuid_representation=CSHARP_LEGACY)) + for doc in docs]) self.assertEqual(data, encoded) @client_context.require_connection def test_legacy_csharp_uuid_roundtrip(self): data = self.csharp_data - docs = bson.decode_all(data, SON, False, CSHARP_LEGACY) + docs = bson.decode_all(data, CodecOptions(SON, False, CSHARP_LEGACY)) client_context.client.pymongo_test.drop_collection('csharp_uuid') db = client_context.client.pymongo_test diff --git a/test/test_bson.py b/test/test_bson.py index 888875e12..c3903eb8b 100644 --- a/test/test_bson.py +++ b/test/test_bson.py @@ -33,6 +33,7 @@ from bson import (BSON, Regex) from bson.binary import Binary, UUIDLegacy from bson.code import Code +from bson.codec_options import CodecOptions from bson.int64 import Int64 from bson.objectid import ObjectId from bson.dbref import DBRef @@ -130,7 +131,8 @@ class TestBSON(unittest.TestCase): helper({"$field": Code("return function(){ return x; }", scope={'x': False})}) def encode_then_decode(doc): - return doc_class(doc) == BSON.encode(doc).decode(as_class=doc_class) + return doc_class(doc) == BSON.encode(doc).decode( + CodecOptions(as_class=doc_class)) qcheck.check_unittest(self, encode_then_decode, qcheck.gen_mongo_dict(3)) @@ -425,7 +427,8 @@ class TestBSON(unittest.TestCase): as_utc = (aware - aware.utcoffset()).replace(tzinfo=utc) self.assertEqual(datetime.datetime(1993, 4, 3, 16, 45, tzinfo=utc), as_utc) - after = BSON.encode({"date": aware}).decode(tz_aware=True)["date"] + after = BSON.encode({"date": aware}).decode( + CodecOptions(tz_aware=True))["date"] self.assertEqual(utc, after.tzinfo) self.assertEqual(as_utc, after) @@ -584,14 +587,19 @@ class TestBSON(unittest.TestCase): raise def test_custom_class(self): - self.assertTrue(isinstance(BSON.encode({}).decode(), dict)) - self.assertFalse(isinstance(BSON.encode({}).decode(), SON)) - self.assertTrue(isinstance(BSON.encode({}).decode(SON), SON)) + self.assertIsInstance(BSON.encode({}).decode(), dict) + self.assertNotIsInstance(BSON.encode({}).decode(), SON) + self.assertIsInstance( + BSON.encode({}).decode(CodecOptions(as_class=SON)), + SON) - self.assertEqual(1, BSON.encode({"x": 1}).decode(SON)["x"]) + self.assertEqual( + 1, + BSON.encode({"x": 1}).decode(CodecOptions(as_class=SON))["x"]) x = BSON.encode({"x": [{"y": 1}]}) - self.assertTrue(isinstance(x.decode(SON)["x"][0], SON)) + self.assertIsInstance(x.decode(CodecOptions(as_class=SON))["x"][0], + SON) def test_subclasses(self): # make sure we can serialize subclasses of native Python types. @@ -620,7 +628,9 @@ class TestBSON(unittest.TestCase): except ImportError: raise SkipTest("No OrderedDict") d = OrderedDict([("one", 1), ("two", 2), ("three", 3), ("four", 4)]) - self.assertEqual(d, BSON.encode(d).decode(as_class=OrderedDict)) + self.assertEqual( + d, + BSON.encode(d).decode(CodecOptions(as_class=OrderedDict))) def test_bson_regex(self): # Invalid Python regex, though valid PCRE. @@ -752,5 +762,41 @@ class TestBSON(unittest.TestCase): {"_id": {'$oid': "52d0b971b3ba219fdeb4170e"}}, True) BSON.encode({"_id": {'$oid': "52d0b971b3ba219fdeb4170e"}}) + +class TestCodecOptions(unittest.TestCase): + def test_as_class(self): + self.assertRaises(TypeError, CodecOptions, as_class=object) + self.assertIs(SON, CodecOptions(as_class=SON).as_class) + + def test_tz_aware(self): + self.assertRaises(TypeError, CodecOptions, tz_aware=1) + self.assertFalse(CodecOptions().tz_aware) + self.assertTrue(CodecOptions(tz_aware=True).tz_aware) + + def test_uuid_representation(self): + self.assertRaises(ValueError, CodecOptions, uuid_representation=None) + self.assertRaises(ValueError, CodecOptions, uuid_representation=7) + self.assertRaises(ValueError, CodecOptions, uuid_representation=2) + + def test_codec_options_repr(self): + r = ('CodecOptions(as_class=dict, tz_aware=False, ' + 'uuid_representation=PYTHON_LEGACY)') + self.assertEqual(r, repr(CodecOptions())) + + def test_decode_all_defaults(self): + # Test decode_all()'s default as_class is dict and tz_aware is False. + # The default uuid_representation is PYTHON_LEGACY but this decodes + # same as STANDARD, so all this test proves about UUID decoding is + # that it's not CSHARP_LEGACY or JAVA_LEGACY. + doc = {'sub_document': {}, + 'uuid': uuid.uuid4(), + 'dt': datetime.datetime.utcnow()} + + decoded = bson.decode_all(bson.BSON.encode(doc))[0] + self.assertIsInstance(decoded['sub_document'], dict) + self.assertEqual(decoded['uuid'], doc['uuid']) + self.assertIsNone(decoded['dt'].tzinfo) + + if __name__ == "__main__": unittest.main() diff --git a/test/test_common.py b/test/test_common.py index a82a04e9d..bda81d8d6 100644 --- a/test/test_common.py +++ b/test/test_common.py @@ -41,9 +41,6 @@ class TestCommon(IntegrationTest): coll = self.db.uuid coll.drop() - self.assertRaises(ValueError, CodecOptions, uuid_representation=7) - self.assertRaises(ValueError, CodecOptions, uuid_representation=2) - # Test property self.assertEqual(PYTHON_LEGACY, coll.codec_options.uuid_representation)