diff --git a/bson/__init__.py b/bson/__init__.py index 4a79539ec..947365aed 100644 --- a/bson/__init__.py +++ b/bson/__init__.py @@ -31,6 +31,7 @@ from bson.binary import (Binary, OLD_UUID_SUBTYPE, JAVA_LEGACY, CSHARP_LEGACY, UUIDLegacy) from bson.code import Code +from bson.codec_options import CodecOptions, DEFAULT_CODEC_OPTIONS from bson.dbref import DBRef from bson.errors import (InvalidBSON, InvalidDocument, @@ -122,7 +123,7 @@ def _get_string(data, position, obj_end, dummy): def _get_object(data, position, obj_end, opts): - """Decode a BSON subdocument to as_class or bson.dbref.DBRef.""" + """Decode a BSON subdocument to opts.as_class or bson.dbref.DBRef.""" obj_size = _UNPACK_INT(data[position:position + 4])[0] end = position + obj_size - 1 if data[end:position + obj_size] != b"\x00": @@ -175,12 +176,12 @@ def _get_binary(data, position, dummy, opts): end = position + length if subtype in (3, 4): # Java Legacy - uuid_subtype = opts[2] - if uuid_subtype == JAVA_LEGACY: + uuid_representation = opts.uuid_representation + if uuid_representation == JAVA_LEGACY: java = data[position:end] value = uuid.UUID(bytes=java[0:8][::-1] + java[8:16][::-1]) # C# legacy - elif uuid_subtype == CSHARP_LEGACY: + elif uuid_representation == CSHARP_LEGACY: value = uuid.UUID(bytes_le=data[position:end]) # Python else: @@ -213,7 +214,7 @@ def _get_date(data, position, dummy, opts): diff = ((millis % 1000) + 1000) % 1000 seconds = (millis - diff) / 1000 micros = diff * 1000 - if opts[1]: + if opts.tz_aware: return EPOCH_AWARE + datetime.timedelta( seconds=seconds, microseconds=micros), end else: @@ -262,6 +263,11 @@ def _get_int64(data, position, dummy0, dummy1): return Int64(_UNPACK_LONG(data[position:end])[0]), end +# Each decoder function's signature is: +# - data: bytes +# - position: int, beginning of object in 'data' to decode +# - obj_end: int, end of object to decode in 'data' if variable-length type +# - opts: a CodecOptions _ELEMENT_GETTER = { BSONNUM: _get_float, BSONSTR: _get_string, @@ -297,7 +303,7 @@ def _element_to_dict(data, position, obj_end, opts): def _elements_to_dict(data, position, obj_end, opts): """Decode a BSON document.""" - result = opts[0]() + result = opts.as_class() end = obj_end - 1 while position < end: (key, value, position) = _element_to_dict(data, position, obj_end, opts) @@ -305,9 +311,8 @@ def _elements_to_dict(data, position, obj_end, opts): return result -def _bson_to_dict(data, as_class, tz_aware, uuid_subtype): +def _bson_to_dict(data, opts): """Decode a BSON string to as_class.""" - opts = (as_class, tz_aware, uuid_subtype) try: obj_size = _UNPACK_INT(data[:4])[0] except struct.error as e: @@ -417,38 +422,38 @@ else: return b"\x02" + name + _PACK_INT(len(value) + 1) + value + b"\x00" -def _encode_mapping(name, value, check_keys, uuid_subtype): +def _encode_mapping(name, value, check_keys, opts): """Encode a mapping type.""" - data = b"".join([_element_to_bson(key, val, check_keys, uuid_subtype) + data = b"".join([_element_to_bson(key, val, check_keys, opts) for key, val in iteritems(value)]) return b"\x03" + name + _PACK_INT(len(data) + 5) + data + b"\x00" -def _encode_dbref(name, value, check_keys, uuid_subtype): +def _encode_dbref(name, value, check_keys, opts): """Encode bson.dbref.DBRef.""" buf = bytearray(b"\x03" + name + b"\x00\x00\x00\x00") begin = len(buf) - 4 buf += _name_value_to_bson(b"$ref\x00", - value.collection, check_keys, uuid_subtype) + value.collection, check_keys, opts) buf += _name_value_to_bson(b"$id\x00", - value.id, check_keys, uuid_subtype) + value.id, check_keys, opts) if value.database is not None: buf += _name_value_to_bson( - b"$db\x00", value.database, check_keys, uuid_subtype) + b"$db\x00", value.database, check_keys, opts) for key, val in iteritems(value._DBRef__kwargs): - buf += _element_to_bson(key, val, check_keys, uuid_subtype) + buf += _element_to_bson(key, val, check_keys, opts) buf += b"\x00" buf[begin:begin + 4] = _PACK_INT(len(buf) - begin) return bytes(buf) -def _encode_list(name, value, check_keys, uuid_subtype): +def _encode_list(name, value, check_keys, opts): """Encode a list/tuple.""" lname = gen_list_name() data = b"".join([_name_value_to_bson(next(lname), item, - check_keys, uuid_subtype) + check_keys, opts) for item in value]) return b"\x04" + name + _PACK_INT(len(data) + 5) + data + b"\x00" @@ -467,18 +472,19 @@ def _encode_binary(name, value, dummy0, dummy1): return b"\x05" + name + _PACK_LENGTH_SUBTYPE(len(value), subtype) + value -def _encode_uuid(name, value, dummy, uuid_subtype): +def _encode_uuid(name, value, dummy, opts): """Encode uuid.UUID.""" + uuid_representation = opts.uuid_representation # Python Legacy Common Case - if uuid_subtype == OLD_UUID_SUBTYPE: + if uuid_representation == OLD_UUID_SUBTYPE: return b"\x05" + name + b'\x10\x00\x00\x00\x03' + value.bytes # Java Legacy - elif uuid_subtype == JAVA_LEGACY: + elif uuid_representation == JAVA_LEGACY: from_uuid = value.bytes data = from_uuid[0:8][::-1] + from_uuid[8:16][::-1] return b"\x05" + name + b'\x10\x00\x00\x00\x03' + data # C# legacy - elif uuid_subtype == CSHARP_LEGACY: + elif uuid_representation == CSHARP_LEGACY: # Microsoft GUID representation. return b"\x05" + name + b'\x10\x00\x00\x00\x03' + value.bytes_le # New @@ -537,13 +543,13 @@ def _encode_regex(name, value, dummy0, dummy1): return b"\x0B" + name + _make_c_string_check(value.pattern) + sflags -def _encode_code(name, value, dummy, uuid_subtype): +def _encode_code(name, value, dummy, opts): """Encode bson.code.Code.""" cstring = _make_c_string(value) cstrlen = len(cstring) if not value.scope: return b"\x0D" + name + _PACK_INT(cstrlen) + cstring - scope = _dict_to_bson(value.scope, False, uuid_subtype, False) + scope = _dict_to_bson(value.scope, False, opts, False) full_length = _PACK_INT(8 + cstrlen + len(scope)) return b"\x0F" + name + full_length + _PACK_INT(cstrlen) + cstring + scope @@ -582,6 +588,11 @@ def _encode_maxkey(name, dummy0, dummy1, dummy2): return b"\x7F" + name +# Each encoder function's signature is: +# - name: utf-8 bytes +# - value: a Python data type, e.g. a Python int for _encode_int +# - check_keys: bool, whether to check for invalid names +# - opts: a CodecOptions _ENCODERS = { bool: _encode_bool, bytes: _encode_bytes, @@ -628,13 +639,13 @@ if not PY3: _ENCODERS[long] = _encode_long -def _name_value_to_bson(name, value, check_keys, uuid_subtype): +def _name_value_to_bson(name, value, check_keys, opts): """Encode a single name, value pair.""" # First see if the type is already cached. KeyError will only ever # happen once per subtype. try: - return _ENCODERS[type(value)](name, value, check_keys, uuid_subtype) + return _ENCODERS[type(value)](name, value, check_keys, opts) except KeyError: pass @@ -646,7 +657,7 @@ def _name_value_to_bson(name, value, check_keys, uuid_subtype): func = _MARKERS[marker] # Cache this type for faster subsequent lookup. _ENCODERS[type(value)] = func - return func(name, value, check_keys, uuid_subtype) + return func(name, value, check_keys, opts) # If all else fails test each base type. This will only happen once for # a subtype of a supported base type. @@ -655,13 +666,13 @@ def _name_value_to_bson(name, value, check_keys, uuid_subtype): func = _ENCODERS[base] # Cache this type for faster subsequent lookup. _ENCODERS[type(value)] = func - return func(name, value, check_keys, uuid_subtype) + return func(name, value, check_keys, opts) raise InvalidDocument("cannot convert value of type %s to bson" % type(value)) -def _element_to_bson(key, value, check_keys, uuid_subtype): +def _element_to_bson(key, value, check_keys, opts): """Encode a single key, value pair.""" if not isinstance(key, string_type): raise InvalidDocument("documents must have only string keys, " @@ -673,20 +684,20 @@ def _element_to_bson(key, value, check_keys, uuid_subtype): raise InvalidDocument("key %r must not contain '.'" % (key,)) name = _make_name(key) - return _name_value_to_bson(name, value, check_keys, uuid_subtype) + return _name_value_to_bson(name, value, check_keys, opts) -def _dict_to_bson(doc, check_keys, uuid_subtype, top_level=True): +def _dict_to_bson(doc, check_keys, opts, top_level=True): """Encode a document to BSON.""" try: elements = [] if top_level and "_id" in doc: elements.append(_name_value_to_bson(b"_id\x00", doc["_id"], - check_keys, uuid_subtype)) + check_keys, opts)) for (key, value) in iteritems(doc): if not top_level or key != "_id": elements.append(_element_to_bson(key, value, - check_keys, uuid_subtype)) + check_keys, opts)) except AttributeError: raise TypeError("encoder expected a mapping type but got: %r" % (doc,)) @@ -696,8 +707,11 @@ if _USE_C: _dict_to_bson = _cbson._dict_to_bson -def decode_all(data, as_class=dict, - tz_aware=True, uuid_subtype=OLD_UUID_SUBTYPE): +_CODEC_OPTIONS_TYPE_ERROR = TypeError( + "codec_options must be an instance of CodecOptions") + + +def decode_all(data, codec_options=DEFAULT_CODEC_OPTIONS): """Decode BSON data to multiple documents. `data` must be a string of concatenated, valid, BSON-encoded @@ -705,12 +719,8 @@ def decode_all(data, as_class=dict, :Parameters: - `data`: BSON data - - `as_class` (optional): the class to use for the resulting - documents - - `tz_aware` (optional): if ``True``, return timezone-aware - :class:`~datetime.datetime` instances - - `uuid_subtype` (optional): The BSON representation to use for UUIDs. - See the :mod:`bson.binary` module for all options. + - `codec_options` (optional): An instance of + :class:`~bson.codec_options.CodecOptions`. .. versionchanged:: 3.0 Removed `compile_re` option: PyMongo now always represents BSON regular @@ -718,6 +728,9 @@ def decode_all(data, as_class=dict, :meth:`~bson.regex.Regex.try_compile` to attempt to convert from a BSON regular expression to a Python regular expression object. + Replaced `as_class`, `tz_aware`, and `uuid_subtype` options with + `codec_options`. + .. versionchanged:: 2.7 Added `compile_re` option. If set to False, PyMongo represented BSON regular expressions as :class:`~bson.regex.Regex` objects instead of @@ -727,7 +740,9 @@ def decode_all(data, as_class=dict, .. _PYTHON-500: https://jira.mongodb.org/browse/PYTHON-500 """ - opts = (as_class, tz_aware, uuid_subtype) + if not isinstance(codec_options, CodecOptions): + raise _CODEC_OPTIONS_TYPE_ERROR + docs = [] position = 0 end = len(data) - 1 @@ -739,7 +754,10 @@ def decode_all(data, as_class=dict, obj_end = position + obj_size - 1 if data[obj_end:position + obj_size] != b"\x00": raise InvalidBSON("bad eoo") - docs.append(_elements_to_dict(data, position + 4, obj_end, opts)) + docs.append(_elements_to_dict(data, + position + 4, + obj_end, + codec_options)) position += obj_size return docs except InvalidBSON: @@ -754,8 +772,7 @@ if _USE_C: decode_all = _cbson.decode_all -def decode_iter(data, as_class=dict, tz_aware=True, - uuid_subtype=OLD_UUID_SUBTYPE): +def decode_iter(data, codec_options=DEFAULT_CODEC_OPTIONS): """Decode BSON data to multiple documents as a generator. Works similarly to the decode_all function, but yields one document at a @@ -766,15 +783,18 @@ def decode_iter(data, as_class=dict, tz_aware=True, :Parameters: - `data`: BSON data - - `as_class` (optional): the class to use for the resulting - documents - - `tz_aware` (optional): if ``True``, return timezone-aware - :class:`~datetime.datetime` instances - - `uuid_subtype` (optional): The BSON representation to use for UUIDs. - See the :mod:`bson.binary` module for all options. + - `codec_options` (optional): An instance of + :class:`~bson.codec_options.CodecOptions`. + + .. versionchanged:: 3.0 + Replaced `as_class`, `tz_aware`, and `uuid_subtype` options with + `codec_options`. .. versionadded:: 2.8 """ + if not isinstance(codec_options, CodecOptions): + raise _CODEC_OPTIONS_TYPE_ERROR + position = 0 end = len(data) - 1 while position < end: @@ -782,12 +802,10 @@ def decode_iter(data, as_class=dict, tz_aware=True, elements = data[position:position + obj_size] position += obj_size - yield _bson_to_dict(elements, as_class, - tz_aware, uuid_subtype) + yield _bson_to_dict(elements, codec_options) -def decode_file_iter(file_obj, as_class=dict, tz_aware=True, - uuid_subtype=OLD_UUID_SUBTYPE): +def decode_file_iter(file_obj, codec_options=DEFAULT_CODEC_OPTIONS): """Decode bson data from a file to multiple documents as a generator. Works similarly to the decode_all function, but reads from the file object @@ -795,12 +813,12 @@ def decode_file_iter(file_obj, as_class=dict, tz_aware=True, :Parameters: - `file_obj`: A file object containing BSON data. - - `as_class` (optional): the class to use for the resulting - documents - - `tz_aware` (optional): if ``True``, return timezone-aware - :class:`~datetime.datetime` instances - - `uuid_subtype` (optional): The BSON representation to use for UUIDs. - See the :mod:`bson.binary` module for all options. + - `codec_options` (optional): An instance of + :class:`~bson.codec_options.CodecOptions`. + + .. versionchanged:: 3.0 + Replaced `as_class`, `tz_aware`, and `uuid_subtype` options with + `codec_options`. .. versionadded:: 2.8 """ @@ -813,8 +831,7 @@ def decode_file_iter(file_obj, as_class=dict, tz_aware=True, raise InvalidBSON("cut off in middle of objsize") obj_size = _UNPACK_INT(size_data)[0] - 4 elements = size_data + file_obj.read(obj_size) - yield _bson_to_dict(elements, as_class, - tz_aware, uuid_subtype) + yield _bson_to_dict(elements, codec_options) def is_valid(bson): @@ -831,7 +848,7 @@ def is_valid(bson): raise TypeError("BSON data must be an instance of a subclass of bytes") try: - _bson_to_dict(bson, dict, True, OLD_UUID_SUBTYPE) + _bson_to_dict(bson, DEFAULT_CODEC_OPTIONS) return True except Exception: return False @@ -842,7 +859,8 @@ class BSON(bytes): """ @classmethod - def encode(cls, document, check_keys=False, uuid_subtype=OLD_UUID_SUBTYPE): + def encode(cls, document, check_keys=False, + codec_options=DEFAULT_CODEC_OPTIONS): """Encode a document to a new :class:`BSON` instance. A document can be any mapping type (like :class:`dict`). @@ -858,34 +876,38 @@ class BSON(bytes): - `check_keys` (optional): check if keys start with '$' or contain '.', raising :class:`~bson.errors.InvalidDocument` in either case - - `uuid_subtype` (optional): The BSON representation to use for - UUIDs. See the :mod:`bson.binary` module for all options. - """ - return cls(_dict_to_bson(document, check_keys, uuid_subtype)) + - `codec_options` (optional): An instance of + :class:`~bson.codec_options.CodecOptions`. - def decode(self, as_class=dict, - tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE): + .. versionchanged:: 3.0 + Replaced `uuid_subtype` option with `codec_options`. + """ + if not isinstance(codec_options, CodecOptions): + raise _CODEC_OPTIONS_TYPE_ERROR + + return cls(_dict_to_bson(document, check_keys, codec_options)) + + def decode(self, codec_options=DEFAULT_CODEC_OPTIONS): """Decode this BSON data. - The default type to use for the resultant document is - :class:`dict`. Any other class that supports - :meth:`__setitem__` can be used instead by passing it as the - `as_class` parameter. + By default, returns a BSON document represented as a Python + :class:`dict`. To use a different :class:`MutableMapping` class, + configure a :class:`~bson.codec_options.CodecOptions`:: - If `tz_aware` is ``True`` (recommended), any - :class:`~datetime.datetime` instances returned will be - timezone-aware, with their timezone set to - :attr:`bson.tz_util.utc`. Otherwise (default), all - :class:`~datetime.datetime` instances will be naive (but - contain UTC). + >>> import collections # From Python standard library. + >>> import bson + >>> from bson.codec_options import CodecOptions + >>> data = bson.BSON.encode({'a': 1}) + >>> decoded_doc = bson.BSON.decode(data) + + >>> options = CodecOptions(as_class=collections.OrderedDict) + >>> decoded_doc = bson.BSON.decode(data, codec_options=options) + >>> type(decoded_doc) + :Parameters: - - `as_class` (optional): the class to use for the resulting - document - - `tz_aware` (optional): if ``True``, return timezone-aware - :class:`~datetime.datetime` instances - - `uuid_subtype` (optional): The BSON representation to use for - UUIDs. See the :mod:`bson.binary` module for all options. + - `codec_options` (optional): An instance of + :class:`~bson.codec_options.CodecOptions`. .. versionchanged:: 3.0 Removed `compile_re` option: PyMongo now always represents BSON @@ -893,6 +915,9 @@ class BSON(bytes): :meth:`~bson.regex.Regex.try_compile` to attempt to convert from a BSON regular expression to a Python regular expression object. + Replaced `as_class`, `tz_aware`, and `uuid_subtype` options with + `codec_options`. + .. versionchanged:: 2.7 Added `compile_re` option. If set to False, PyMongo represented BSON regular expressions as :class:`~bson.regex.Regex` objects instead of @@ -902,7 +927,10 @@ class BSON(bytes): .. _PYTHON-500: https://jira.mongodb.org/browse/PYTHON-500 """ - return _bson_to_dict(self, as_class, tz_aware, uuid_subtype) + if not isinstance(codec_options, CodecOptions): + raise _CODEC_OPTIONS_TYPE_ERROR + + return _bson_to_dict(self, codec_options) def has_c(): diff --git a/bson/_cbsonmodule.c b/bson/_cbsonmodule.c index 8129d3907..e367f3c3e 100644 --- a/bson/_cbsonmodule.c +++ b/bson/_cbsonmodule.c @@ -68,8 +68,14 @@ static struct module_state _state; /* Maximum number of regex flags */ #define FLAGS_SIZE 7 +/* Default UUID representation type code. */ +#define PYTHON_LEGACY 3 + +/* Other UUID representations. */ +#define STANDARD 4 #define JAVA_LEGACY 5 #define CSHARP_LEGACY 6 + #define BSON_MAX_SIZE 2147483647 /* The smallest possible BSON document, i.e. "{}" */ #define BSON_MIN_SIZE 5 @@ -104,15 +110,46 @@ _downcast_and_check(Py_ssize_t size, int extra) { return (int)size + extra; } +/* Fill out a codec_options_t* from a CodecOptions object. Use with the "O&" + * format spec in PyArg_ParseTuple. + * + * Return 1 on success. options->as_class is a new reference. + * Return 0 on failure. + */ +int convert_codec_options(PyObject* options_obj, void* p) { + codec_options_t* options = (codec_options_t*)p; + if (!PyArg_ParseTuple(options_obj, "Obb", + &options->as_class, + &options->tz_aware, + &options->uuid_rep)) { + return 0; + } + + Py_INCREF(options->as_class); + return 1; +} + +/* Fill out a codec_options_t* with default options. */ +void default_codec_options(codec_options_t* options) { + options->as_class = (PyObject*)&PyDict_Type; + Py_INCREF(options->as_class); + + // TODO: set to "1". PYTHON-526, setting tz_aware=True by default. + options->tz_aware = 0; + options->uuid_rep = PYTHON_LEGACY; +} + +void destroy_codec_options(codec_options_t* options) { + Py_CLEAR(options->as_class); +} + static PyObject* elements_to_dict(PyObject* self, const char* string, - unsigned max, PyObject* as_class, - unsigned char tz_aware, - unsigned char uuid_subtype); + unsigned max, const codec_options_t* options); static int _write_element_to_buffer(PyObject* self, buffer_t buffer, int type_byte, PyObject* value, unsigned char check_keys, - unsigned char uuid_subtype); + const codec_options_t* options); /* Date stuff */ static PyObject* datetime_from_millis(long long millis) { @@ -367,12 +404,12 @@ static int _load_python_objects(PyObject* module) { static int write_element_to_buffer(PyObject* self, buffer_t buffer, int type_byte, PyObject* value, unsigned char check_keys, - unsigned char uuid_subtype) { + const codec_options_t* options) { int result; if(Py_EnterRecursiveCall(" while encoding an object to BSON ")) return 0; result = _write_element_to_buffer(self, buffer, type_byte, - value, check_keys, uuid_subtype); + value, check_keys, options); Py_LeaveRecursiveCall(); return result; } @@ -556,7 +593,7 @@ static int _write_regex_to_buffer( static int _write_element_to_buffer(PyObject* self, buffer_t buffer, int type_byte, PyObject* value, unsigned char check_keys, - unsigned char uuid_subtype) { + const codec_options_t* options) { struct module_state *state = GETSTATE(self); PyObject* type_marker = NULL; PyObject* mapping_type; @@ -730,7 +767,7 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer, return 0; } - if (!write_dict(self, buffer, scope, 0, uuid_subtype, 0)) { + if (!write_dict(self, buffer, scope, 0, options, 0)) { Py_DECREF(scope); return 0; } @@ -799,7 +836,7 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer, if (!as_doc) { return 0; } - if (!write_dict(self, buffer, as_doc, 0, uuid_subtype, 0)) { + if (!write_dict(self, buffer, as_doc, 0, options, 0)) { Py_DECREF(as_doc); return 0; } @@ -879,7 +916,7 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer, return 1; } else if (PyDict_Check(value)) { *(buffer_get_buffer(buffer) + type_byte) = 0x03; - return write_dict(self, buffer, value, check_keys, uuid_subtype, 0); + return write_dict(self, buffer, value, check_keys, options, 0); } else if (PyList_Check(value) || PyTuple_Check(value)) { Py_ssize_t items, i; int start_position, @@ -923,7 +960,7 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer, if (!(item_value = PySequence_GetItem(value, i))) return 0; if (!write_element_to_buffer(self, buffer, list_type_byte, - item_value, check_keys, uuid_subtype)) { + item_value, check_keys, options)) { Py_DECREF(item_value); return 0; } @@ -1041,7 +1078,7 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer, return 0; } *(buffer_get_buffer(buffer) + type_byte) = 0x03; - return write_dict(self, buffer, value, check_keys, uuid_subtype, 0); + return write_dict(self, buffer, value, check_keys, options, 0); } uuid_type = _get_object(state->UUID, "uuid", "UUID"); @@ -1061,11 +1098,12 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer, return 0; } - if (uuid_subtype == JAVA_LEGACY || uuid_subtype == CSHARP_LEGACY) { + if (options->uuid_rep == JAVA_LEGACY + || options->uuid_rep == CSHARP_LEGACY) { subtype = 3; } else { - subtype = uuid_subtype; + subtype = options->uuid_rep; } *(buffer_get_buffer(buffer) + type_byte) = 0x05; @@ -1076,7 +1114,7 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer, return 0; } - if (uuid_subtype == CSHARP_LEGACY) { + if (options->uuid_rep == CSHARP_LEGACY) { /* Legacy C# byte order */ bytes = PyObject_GetAttrString(value, "bytes_le"); } @@ -1095,7 +1133,7 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer, Py_DECREF(bytes); return 0; } - if (uuid_subtype == JAVA_LEGACY) { + if (options->uuid_rep == JAVA_LEGACY) { /* Store in legacy java byte order. */ char as_legacy_java[16]; _fix_java(data, as_legacy_java); @@ -1166,7 +1204,7 @@ static int check_key_name(const char* name, int name_length) { * Returns 0 on failure */ int write_pair(PyObject* self, buffer_t buffer, const char* name, int name_length, PyObject* value, unsigned char check_keys, - unsigned char uuid_subtype, unsigned char allow_id) { + const codec_options_t* options, unsigned char allow_id) { int type_byte; /* Don't write any _id elements unless we're explicitly told to - @@ -1188,7 +1226,7 @@ int write_pair(PyObject* self, buffer_t buffer, const char* name, int name_lengt return 0; } if (!write_element_to_buffer(self, buffer, type_byte, - value, check_keys, uuid_subtype)) { + value, check_keys, options)) { return 0; } return 1; @@ -1197,7 +1235,8 @@ int write_pair(PyObject* self, buffer_t buffer, const char* name, int name_lengt int decode_and_write_pair(PyObject* self, buffer_t buffer, PyObject* key, PyObject* value, unsigned char check_keys, - unsigned char uuid_subtype, unsigned char top_level) { + const codec_options_t* options, + unsigned char top_level) { PyObject* encoded; const char* data; int size; @@ -1310,7 +1349,7 @@ int decode_and_write_pair(PyObject* self, buffer_t buffer, /* If top_level is True, don't allow writing _id here - it was already written. */ if (!write_pair(self, buffer, data, - size - 1, value, check_keys, uuid_subtype, !top_level)) { + size - 1, value, check_keys, options, !top_level)) { Py_DECREF(encoded); return 0; } @@ -1322,7 +1361,7 @@ int decode_and_write_pair(PyObject* self, buffer_t buffer, /* returns 0 on failure */ int write_dict(PyObject* self, buffer_t buffer, PyObject* dict, unsigned char check_keys, - unsigned char uuid_subtype, unsigned char top_level) { + const codec_options_t* options, unsigned char top_level) { PyObject* key; PyObject* iter; char zero = 0; @@ -1390,7 +1429,7 @@ int write_dict(PyObject* self, buffer_t buffer, return 0; } if (!write_pair(self, buffer, "_id", 3, - _id, check_keys, uuid_subtype, 1)) { + _id, check_keys, options, 1)) { Py_DECREF(_id); return 0; } @@ -1410,7 +1449,7 @@ int write_dict(PyObject* self, buffer_t buffer, return 0; } if (!decode_and_write_pair(self, buffer, key, value, - check_keys, uuid_subtype, top_level)) { + check_keys, options, top_level)) { Py_DECREF(key); Py_DECREF(value); Py_DECREF(iter); @@ -1434,22 +1473,23 @@ static PyObject* _cbson_dict_to_bson(PyObject* self, PyObject* args) { PyObject* dict; PyObject* result; unsigned char check_keys; - unsigned char uuid_subtype; unsigned char top_level = 1; + codec_options_t options; buffer_t buffer; - if (!PyArg_ParseTuple(args, "Obb|b", &dict, - &check_keys, &uuid_subtype, &top_level)) { + if (!PyArg_ParseTuple(args, "ObO&|b", &dict, &check_keys, + convert_codec_options, &options, &top_level)) { return NULL; } - buffer = buffer_new(); if (!buffer) { + destroy_codec_options(&options); PyErr_NoMemory(); return NULL; } - if (!write_dict(self, buffer, dict, check_keys, uuid_subtype, top_level)) { + if (!write_dict(self, buffer, dict, check_keys, &options, top_level)) { + destroy_codec_options(&options); buffer_free(buffer); return NULL; } @@ -1462,15 +1502,14 @@ static PyObject* _cbson_dict_to_bson(PyObject* self, PyObject* args) { result = Py_BuildValue("s#", buffer_get_buffer(buffer), buffer_get_position(buffer)); #endif + destroy_codec_options(&options); buffer_free(buffer); return result; } static PyObject* get_value(PyObject* self, const char* buffer, unsigned* position, unsigned char type, - unsigned max, PyObject* as_class, - unsigned char tz_aware, - unsigned char uuid_subtype) { + unsigned max, const codec_options_t* options) { struct module_state *state = GETSTATE(self); PyObject* value = NULL; @@ -1526,8 +1565,7 @@ static PyObject* get_value(PyObject* self, const char* buffer, goto invalid; } value = elements_to_dict(self, buffer + *position + 4, - size - 5, as_class, tz_aware, - uuid_subtype); + size - 5, options); if (!value) { goto invalid; } @@ -1625,8 +1663,7 @@ static PyObject* get_value(PyObject* self, const char* buffer, goto invalid; } to_append = get_value(self, buffer, position, bson_type, - max - (unsigned)key_size, - as_class, tz_aware, uuid_subtype); + max - (unsigned)key_size, options); Py_LeaveRecursiveCall(); if (!to_append) { Py_DECREF(value); @@ -1701,13 +1738,13 @@ static PyObject* get_value(PyObject* self, const char* buffer, * From this point, we hold refs to args, kwargs, and data. * If anything fails, goto uuiderror to clean them up. */ - if (uuid_subtype == CSHARP_LEGACY) { + if (options->uuid_rep == CSHARP_LEGACY) { /* Legacy C# byte order */ if ((PyDict_SetItemString(kwargs, "bytes_le", data)) == -1) goto uuiderror; } else { - if (uuid_subtype == JAVA_LEGACY) { + if (options->uuid_rep == JAVA_LEGACY) { /* Convert from legacy java byte order */ char big_endian[16]; _fix_java(buffer + *position, big_endian); @@ -1812,7 +1849,7 @@ static PyObject* get_value(PyObject* self, const char* buffer, memcpy(&millis, buffer + *position, 8); naive = datetime_from_millis(millis); *position += 8; - if (!tz_aware) { /* In the naive case, we're done here. */ + if (!options->tz_aware) { /* In the naive case, we're done here. */ value = naive; break; } @@ -2035,8 +2072,7 @@ static PyObject* get_value(PyObject* self, const char* buffer, goto invalid; } scope = elements_to_dict(self, buffer + *position + 4, - scope_size - 5, (PyObject*)&PyDict_Type, - tz_aware, uuid_subtype); + scope_size - 5, options); if (!scope) { Py_DECREF(code); goto invalid; @@ -2184,11 +2220,10 @@ static PyObject* get_value(PyObject* self, const char* buffer, } static PyObject* _elements_to_dict(PyObject* self, const char* string, - unsigned max, PyObject* as_class, - unsigned char tz_aware, - unsigned char uuid_subtype) { + unsigned max, + const codec_options_t* options) { unsigned position = 0; - PyObject* dict = PyObject_CallObject(as_class, NULL); + PyObject* dict = PyObject_CallObject(options->as_class, NULL); if (!dict) { return NULL; } @@ -2214,7 +2249,7 @@ static PyObject* _elements_to_dict(PyObject* self, const char* string, } position += (unsigned)name_length + 1; value = get_value(self, string, &position, type, - max - position, as_class, tz_aware, uuid_subtype); + max - position, options); if (!value) { Py_DECREF(name); Py_DECREF(dict); @@ -2229,14 +2264,12 @@ static PyObject* _elements_to_dict(PyObject* self, const char* string, } static PyObject* elements_to_dict(PyObject* self, const char* string, - unsigned max, PyObject* as_class, - unsigned char tz_aware, - unsigned char uuid_subtype) { + unsigned max, + const codec_options_t* options) { PyObject* result; if (Py_EnterRecursiveCall(" while decoding a BSON document")) return NULL; - result = _elements_to_dict(self, string, max, - as_class, tz_aware, uuid_subtype); + result = _elements_to_dict(self, string, max, options); Py_LeaveRecursiveCall(); return result; } @@ -2246,12 +2279,11 @@ static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) { Py_ssize_t total_size; const char* string; PyObject* bson; - PyObject* as_class; - unsigned char tz_aware; - unsigned char uuid_subtype; + codec_options_t options; + PyObject* result; if (!PyArg_ParseTuple( - args, "OObb", &bson, &as_class, &tz_aware, &uuid_subtype)) { + args, "OO&", &bson, convert_codec_options, &options)) { return NULL; } @@ -2262,6 +2294,7 @@ static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) { if (!PyString_Check(bson)) { PyErr_SetString(PyExc_TypeError, "argument to _bson_to_dict must be a string"); #endif + destroy_codec_options(&options); return NULL; } #if PY_MAJOR_VERSION >= 3 @@ -2276,6 +2309,7 @@ static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) { "not enough data for a BSON document"); Py_DECREF(InvalidBSON); } + destroy_codec_options(&options); return NULL; } @@ -2285,6 +2319,7 @@ static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) { string = PyString_AsString(bson); #endif if (!string) { + destroy_codec_options(&options); return NULL; } @@ -2295,6 +2330,7 @@ static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) { PyErr_SetString(InvalidBSON, "invalid message size"); Py_DECREF(InvalidBSON); } + destroy_codec_options(&options); return NULL; } @@ -2304,6 +2340,7 @@ static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) { PyErr_SetString(InvalidBSON, "objsize too large"); Py_DECREF(InvalidBSON); } + destroy_codec_options(&options); return NULL; } @@ -2313,11 +2350,13 @@ static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) { PyErr_SetString(InvalidBSON, "bad eoo"); Py_DECREF(InvalidBSON); } + destroy_codec_options(&options); return NULL; } - return elements_to_dict(self, string + 4, (unsigned)size - 5, - as_class, tz_aware, uuid_subtype); + result = elements_to_dict(self, string + 4, (unsigned)size - 5, &options); + destroy_codec_options(&options); + return result; } static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) { @@ -2327,16 +2366,18 @@ static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) { PyObject* bson; PyObject* dict; PyObject* result; - PyObject* as_class = (PyObject*)&PyDict_Type; - unsigned char tz_aware = 1; - unsigned char uuid_subtype = 3; + codec_options_t options; if (!PyArg_ParseTuple( - args, "O|Obb", - &bson, &as_class, &tz_aware, &uuid_subtype)) { + args, "O|O&", + &bson, convert_codec_options, &options)) { return NULL; } + if (PyTuple_GET_SIZE(args) < 2) { + default_codec_options(&options); + } + #if PY_MAJOR_VERSION >= 3 if (!PyBytes_Check(bson)) { PyErr_SetString(PyExc_TypeError, "argument to decode_all must be a bytes object"); @@ -2357,8 +2398,10 @@ static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) { return NULL; } - if (!(result = PyList_New(0))) + if (!(result = PyList_New(0))) { + destroy_codec_options(&options); return NULL; + } while (total_size > 0) { if (total_size < BSON_MIN_SIZE) { @@ -2368,6 +2411,7 @@ static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) { "not enough data for a BSON document"); Py_DECREF(InvalidBSON); } + destroy_codec_options(&options); Py_DECREF(result); return NULL; } @@ -2379,6 +2423,7 @@ static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) { PyErr_SetString(InvalidBSON, "invalid message size"); Py_DECREF(InvalidBSON); } + destroy_codec_options(&options); Py_DECREF(result); return NULL; } @@ -2389,6 +2434,7 @@ static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) { PyErr_SetString(InvalidBSON, "objsize too large"); Py_DECREF(InvalidBSON); } + destroy_codec_options(&options); Py_DECREF(result); return NULL; } @@ -2399,14 +2445,15 @@ static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) { PyErr_SetString(InvalidBSON, "bad eoo"); Py_DECREF(InvalidBSON); } + destroy_codec_options(&options); Py_DECREF(result); return NULL; } - dict = elements_to_dict(self, string + 4, (unsigned)size - 5, - as_class, tz_aware, uuid_subtype); + dict = elements_to_dict(self, string + 4, (unsigned)size - 5, &options); if (!dict) { Py_DECREF(result); + destroy_codec_options(&options); return NULL; } PyList_Append(result, dict); @@ -2415,6 +2462,7 @@ static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) { total_size -= size; } + destroy_codec_options(&options); return result; } @@ -2494,6 +2542,8 @@ init_cbson(void) _cbson_API[_cbson_write_dict_INDEX] = (void *) write_dict; _cbson_API[_cbson_write_pair_INDEX] = (void *) write_pair; _cbson_API[_cbson_decode_and_write_pair_INDEX] = (void *) decode_and_write_pair; + _cbson_API[_cbson_convert_codec_options_INDEX] = (void *) convert_codec_options; + _cbson_API[_cbson_destroy_codec_options_INDEX] = (void *) destroy_codec_options; #if PY_VERSION_HEX >= 0x03010000 /* PyCapsule is new in python 3.1 */ diff --git a/bson/_cbsonmodule.h b/bson/_cbsonmodule.h index 0b207a32a..b363b926c 100644 --- a/bson/_cbsonmodule.h +++ b/bson/_cbsonmodule.h @@ -52,6 +52,12 @@ typedef int Py_ssize_t; #define STRCAT(dest, n, src) strcat((dest), (src)) #endif +typedef struct codec_options_t { + PyObject* as_class; + unsigned char tz_aware; + unsigned char uuid_rep; +} codec_options_t; + /* C API functions */ #define _cbson_buffer_write_bytes_INDEX 0 #define _cbson_buffer_write_bytes_RETURN int @@ -59,18 +65,26 @@ typedef int Py_ssize_t; #define _cbson_write_dict_INDEX 1 #define _cbson_write_dict_RETURN int -#define _cbson_write_dict_PROTO (PyObject* self, buffer_t buffer, PyObject* dict, unsigned char check_keys, unsigned char uuid_subtype, unsigned char top_level) +#define _cbson_write_dict_PROTO (PyObject* self, buffer_t buffer, PyObject* dict, unsigned char check_keys, const codec_options_t* options, unsigned char top_level) #define _cbson_write_pair_INDEX 2 #define _cbson_write_pair_RETURN int -#define _cbson_write_pair_PROTO (PyObject* self, buffer_t buffer, const char* name, int name_length, PyObject* value, unsigned char check_keys, unsigned char uuid_subtype, unsigned char allow_id) +#define _cbson_write_pair_PROTO (PyObject* self, buffer_t buffer, const char* name, int name_length, PyObject* value, unsigned char check_keys, const codec_options_t* options, unsigned char allow_id) #define _cbson_decode_and_write_pair_INDEX 3 #define _cbson_decode_and_write_pair_RETURN int -#define _cbson_decode_and_write_pair_PROTO (PyObject* self, buffer_t buffer, PyObject* key, PyObject* value, unsigned char check_keys, unsigned char uuid_subtype, unsigned char top_level) +#define _cbson_decode_and_write_pair_PROTO (PyObject* self, buffer_t buffer, PyObject* key, PyObject* value, unsigned char check_keys, const codec_options_t* options, unsigned char top_level) + +#define _cbson_convert_codec_options_INDEX 4 +#define _cbson_convert_codec_options_RETURN int +#define _cbson_convert_codec_options_PROTO (PyObject* options_obj, void* p) + +#define _cbson_destroy_codec_options_INDEX 5 +#define _cbson_destroy_codec_options_RETURN void +#define _cbson_destroy_codec_options_PROTO (codec_options_t* options) /* Total number of C API pointers */ -#define _cbson_API_POINTER_COUNT 4 +#define _cbson_API_POINTER_COUNT 6 #ifdef _CBSON_MODULE /* This section is used when compiling _cbsonmodule */ @@ -83,6 +97,10 @@ static _cbson_write_pair_RETURN write_pair _cbson_write_pair_PROTO; static _cbson_decode_and_write_pair_RETURN decode_and_write_pair _cbson_decode_and_write_pair_PROTO; +static _cbson_convert_codec_options_RETURN convert_codec_options _cbson_convert_codec_options_PROTO; + +static _cbson_destroy_codec_options_RETURN destroy_codec_options _cbson_destroy_codec_options_PROTO; + #else /* This section is used in modules that use _cbsonmodule's API */ @@ -96,6 +114,10 @@ static void **_cbson_API; #define decode_and_write_pair (*(_cbson_decode_and_write_pair_RETURN (*)_cbson_decode_and_write_pair_PROTO) _cbson_API[_cbson_decode_and_write_pair_INDEX]) +#define convert_codec_options (*(_cbson_convert_codec_options_RETURN (*)_cbson_convert_codec_options_PROTO) _cbson_API[_cbson_convert_codec_options_INDEX]) + +#define destroy_codec_options (*(_cbson_destroy_codec_options_RETURN (*)_cbson_destroy_codec_options_PROTO) _cbson_API[_cbson_destroy_codec_options_INDEX]) + #define _cbson_IMPORT _cbson_API = (void **)PyCapsule_Import("_cbson._C_API", 0) #endif diff --git a/bson/binary.py b/bson/binary.py index 2ac1dfea9..679d0398d 100644 --- a/bson/binary.py +++ b/bson/binary.py @@ -98,6 +98,11 @@ byte order and binary subtype :data:`OLD_UUID_SUBTYPE`. ALL_UUID_SUBTYPES = (OLD_UUID_SUBTYPE, UUID_SUBTYPE) ALL_UUID_REPRESENTATIONS = (STANDARD, PYTHON_LEGACY, JAVA_LEGACY, CSHARP_LEGACY) +UUID_REPRESENTATION_NAMES = { + PYTHON_LEGACY: 'PYTHON_LEGACY', + STANDARD: 'STANDARD', + JAVA_LEGACY: 'JAVA_LEGACY', + CSHARP_LEGACY: 'CSHARP_LEGACY'} MD5_SUBTYPE = 5 """BSON binary subtype for an MD5 hash. diff --git a/bson/codec_options.py b/bson/codec_options.py index e75ca07d0..2c6260373 100644 --- a/bson/codec_options.py +++ b/bson/codec_options.py @@ -14,12 +14,18 @@ """Tools for specifying BSON codec options.""" -from collections import MutableMapping +from collections import MutableMapping, namedtuple -from bson.binary import ALL_UUID_REPRESENTATIONS, PYTHON_LEGACY +from bson.binary import (ALL_UUID_REPRESENTATIONS, + PYTHON_LEGACY, + UUID_REPRESENTATION_NAMES) -class CodecOptions(object): +_options_base = namedtuple('CodecOptions', + ('as_class', 'tz_aware', 'uuid_representation')) + + +class CodecOptions(_options_base): """Encapsulates BSON options used in CRUD operations. :Parameters: @@ -34,10 +40,8 @@ class CodecOptions(object): :data:`~bson.binary.PYTHON_LEGACY`. """ - __slots__ = ("__as_class", "__tz_aware", "__uuid_rep") - - def __init__(self, as_class=dict, - tz_aware=False, uuid_representation=PYTHON_LEGACY): + def __new__(cls, as_class=dict, + tz_aware=False, uuid_representation=PYTHON_LEGACY): if not issubclass(as_class, MutableMapping): raise TypeError("document_class must be a " "subclass of MutableMapping") @@ -47,39 +51,29 @@ class CodecOptions(object): raise ValueError("uuid_representation must be a value " "from bson.binary.ALL_UUID_REPRESENTATIONS") - self.__as_class = as_class - self.__tz_aware = tz_aware - self.__uuid_rep = uuid_representation + return tuple.__new__(cls, (as_class, tz_aware, uuid_representation)) - @property - def as_class(self): - """Read only property for as_class.""" - return self.__as_class + def __repr__(self): + as_class_repr = ( + 'dict' if self.as_class is dict else repr(self.as_class)) - @property - def tz_aware(self): - """Read only property for tz_aware.""" - return self.__tz_aware + uuid_rep_repr = UUID_REPRESENTATION_NAMES.get(self.uuid_representation, + self.uuid_representation) - @property - def uuid_representation(self): - """Read only property for uuid_representation.""" - return self.__uuid_rep + return ( + 'CodecOptions(as_class=%s, tz_aware=%r, uuid_representation=%s)' + % (as_class_repr, self.tz_aware, uuid_rep_repr)) - def __eq__(self, other): - if isinstance(other, CodecOptions): - return (self.__as_class == other.as_class and - self.__tz_aware == other.tz_aware and - self.__uuid_rep == other.uuid_representation) - raise NotImplementedError - def __ne__(self, other): - return self != other +DEFAULT_CODEC_OPTIONS = CodecOptions() def _parse_codec_options(options): """Parse BSON codec options.""" - as_class = options.get('document_class', dict) - tz_aware = options.get('tz_aware', False) - uuid_rep = options.get('uuidrepresentation', PYTHON_LEGACY) - return CodecOptions(as_class, tz_aware, uuid_rep) + return CodecOptions( + as_class=options.get( + 'document_class', DEFAULT_CODEC_OPTIONS.as_class), + tz_aware=options.get( + 'tz_aware', DEFAULT_CODEC_OPTIONS.tz_aware), + uuid_representation=options.get( + 'uuidrepresentation', DEFAULT_CODEC_OPTIONS.uuid_representation)) diff --git a/doc/changelog.rst b/doc/changelog.rst index ceea0524d..df7d1ca90 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -111,6 +111,22 @@ patterns, see `PYTHON-500`_. Use :meth:`~bson.regex.Regex.try_compile` to attempt to convert from a BSON regular expression to a Python regular expression object. +The `as_class`, `tz_aware`, and `uuid_subtype` options are removed from all +BSON encoding and decoding methods. Use +:class:`~bson.codec_options.CodecOptions` to configure these options. The +APIs affected are: + +- :func:`~bson.decode_all` +- :func:`~bson.decode_iter` +- :func:`~bson.decode_file_iter` +- :meth:`~bson.BSON.encode` +- :meth:`~bson.BSON.decode` + +This is a breaking change for any application that uses the BSON API directly +and changes any of the named parameter defaults. No changes are required for +applications that use the default values for these options. The behavior +remains the same. + .. _PYTHON-500: https://jira.mongodb.org/browse/PYTHON-500 Issues Resolved diff --git a/pymongo/_cmessagemodule.c b/pymongo/_cmessagemodule.c index 4df393d2b..1409108e5 100644 --- a/pymongo/_cmessagemodule.c +++ b/pymongo/_cmessagemodule.c @@ -64,7 +64,8 @@ static PyObject* _error(char* name) { /* add a lastError message on the end of the buffer. * returns 0 on failure */ static int add_last_error(PyObject* self, buffer_t buffer, - int request_id, char* ns, int nslen, PyObject* args) { + int request_id, char* ns, int nslen, + codec_options_t* options, PyObject* args) { struct module_state *state = GETSTATE(self); int message_start; @@ -110,7 +111,9 @@ static int add_last_error(PyObject* self, buffer_t buffer, /* getlasterror: 1 */ if (!(one = PyLong_FromLong(1))) return 0; - if (!write_pair(state->_cbson, buffer, "getlasterror", 12, one, 0, 4, 1)) { + + if (!write_pair(state->_cbson, buffer, "getlasterror", 12, one, 0, + options, 1)) { Py_DECREF(one); return 0; } @@ -118,7 +121,8 @@ static int add_last_error(PyObject* self, buffer_t buffer, /* getlasterror options */ while (PyDict_Next(args, &pos, &key, &value)) { - if (!decode_and_write_pair(state->_cbson, buffer, key, value, 0, 4, 0)) { + if (!decode_and_write_pair(state->_cbson, buffer, key, value, 0, + options, 0)) { return 0; } } @@ -170,42 +174,44 @@ static PyObject* _cbson_insert_message(PyObject* self, PyObject* args) { PyObject* doc; PyObject* iterator; int before, cur_size, max_size = 0; - int options = 0; + int flags = 0; unsigned char check_keys; unsigned char safe; unsigned char continue_on_error; - unsigned char uuid_subtype; + codec_options_t options; PyObject* last_error_args; buffer_t buffer; int length_location, message_length; PyObject* result; - if (!PyArg_ParseTuple(args, "et#ObbObb", + if (!PyArg_ParseTuple(args, "et#ObbObO&", "utf-8", &collection_name, &collection_name_length, &docs, &check_keys, &safe, &last_error_args, - &continue_on_error, &uuid_subtype)) { + &continue_on_error, + convert_codec_options, &options)) { return NULL; } if (continue_on_error) { - options += 1; + flags += 1; } - buffer = buffer_new(); if (!buffer) { PyErr_NoMemory(); + destroy_codec_options(&options); PyMem_Free(collection_name); return NULL; } length_location = init_insert_buffer(buffer, request_id, - options, + flags, collection_name, collection_name_length); if (length_location == -1) { + destroy_codec_options(&options); PyMem_Free(collection_name); buffer_free(buffer); return NULL; @@ -218,15 +224,18 @@ static PyObject* _cbson_insert_message(PyObject* self, PyObject* args) { PyErr_SetString(InvalidOperation, "input is not iterable"); Py_DECREF(InvalidOperation); } + destroy_codec_options(&options); buffer_free(buffer); PyMem_Free(collection_name); return NULL; } while ((doc = PyIter_Next(iterator)) != NULL) { before = buffer_get_position(buffer); - if (!write_dict(state->_cbson, buffer, doc, check_keys, uuid_subtype, 1)) { + if (!write_dict(state->_cbson, buffer, doc, check_keys, + &options, 1)) { Py_DECREF(doc); Py_DECREF(iterator); + destroy_codec_options(&options); buffer_free(buffer); PyMem_Free(collection_name); return NULL; @@ -238,6 +247,7 @@ static PyObject* _cbson_insert_message(PyObject* self, PyObject* args) { Py_DECREF(iterator); if (PyErr_Occurred()) { + destroy_codec_options(&options); buffer_free(buffer); PyMem_Free(collection_name); return NULL; @@ -249,6 +259,7 @@ static PyObject* _cbson_insert_message(PyObject* self, PyObject* args) { PyErr_SetString(InvalidOperation, "cannot do an empty bulk insert"); Py_DECREF(InvalidOperation); } + destroy_codec_options(&options); buffer_free(buffer); PyMem_Free(collection_name); return NULL; @@ -259,7 +270,8 @@ static PyObject* _cbson_insert_message(PyObject* self, PyObject* args) { if (safe) { if (!add_last_error(self, buffer, request_id, collection_name, - collection_name_length, last_error_args)) { + collection_name_length, &options, last_error_args)) { + destroy_codec_options(&options); buffer_free(buffer); PyMem_Free(collection_name); return NULL; @@ -273,6 +285,7 @@ static PyObject* _cbson_insert_message(PyObject* self, PyObject* args) { buffer_get_buffer(buffer), buffer_get_position(buffer), max_size); + destroy_codec_options(&options); buffer_free(buffer); return result; } @@ -291,31 +304,33 @@ static PyObject* _cbson_update_message(PyObject* self, PyObject* args) { unsigned char upsert; unsigned char safe; unsigned char check_keys; - unsigned char uuid_subtype; + codec_options_t options; PyObject* last_error_args; - int options; + int flags; buffer_t buffer; int length_location, message_length; PyObject* result; - if (!PyArg_ParseTuple(args, "et#bbOObObb", + if (!PyArg_ParseTuple(args, "et#bbOObObO&", "utf-8", &collection_name, &collection_name_length, &upsert, &multi, &spec, &doc, &safe, - &last_error_args, &check_keys, &uuid_subtype)) { + &last_error_args, &check_keys, + convert_codec_options, &options)) { return NULL; } - options = 0; + flags = 0; if (upsert) { - options += 1; + flags += 1; } if (multi) { - options += 2; + flags += 2; } buffer = buffer_new(); if (!buffer) { + destroy_codec_options(&options); PyErr_NoMemory(); PyMem_Free(collection_name); return NULL; @@ -324,6 +339,7 @@ static PyObject* _cbson_update_message(PyObject* self, PyObject* args) { // save space for message length length_location = buffer_save_space(buffer, 4); if (length_location == -1) { + destroy_codec_options(&options); PyMem_Free(collection_name); PyErr_NoMemory(); return NULL; @@ -337,14 +353,16 @@ static PyObject* _cbson_update_message(PyObject* self, PyObject* args) { !buffer_write_bytes(buffer, collection_name, collection_name_length + 1) || - !buffer_write_bytes(buffer, (const char*)&options, 4)) { + !buffer_write_bytes(buffer, (const char*)&flags, 4)) { + destroy_codec_options(&options); buffer_free(buffer); PyMem_Free(collection_name); return NULL; } before = buffer_get_position(buffer); - if (!write_dict(state->_cbson, buffer, spec, 0, uuid_subtype, 1)) { + if (!write_dict(state->_cbson, buffer, spec, 0, &options, 1)) { + destroy_codec_options(&options); buffer_free(buffer); PyMem_Free(collection_name); return NULL; @@ -352,7 +370,9 @@ static PyObject* _cbson_update_message(PyObject* self, PyObject* args) { max_size = buffer_get_position(buffer) - before; before = buffer_get_position(buffer); - if (!write_dict(state->_cbson, buffer, doc, check_keys, uuid_subtype, 1)) { + if (!write_dict(state->_cbson, buffer, doc, check_keys, + &options, 1)) { + destroy_codec_options(&options); buffer_free(buffer); PyMem_Free(collection_name); return NULL; @@ -365,7 +385,8 @@ static PyObject* _cbson_update_message(PyObject* self, PyObject* args) { if (safe) { if (!add_last_error(self, buffer, request_id, collection_name, - collection_name_length, last_error_args)) { + collection_name_length, &options, last_error_args)) { + destroy_codec_options(&options); buffer_free(buffer); PyMem_Free(collection_name); return NULL; @@ -379,6 +400,7 @@ static PyObject* _cbson_update_message(PyObject* self, PyObject* args) { buffer_get_buffer(buffer), buffer_get_position(buffer), max_size); + destroy_codec_options(&options); buffer_free(buffer); return result; } @@ -388,31 +410,33 @@ static PyObject* _cbson_query_message(PyObject* self, PyObject* args) { struct module_state *state = GETSTATE(self); int request_id = rand(); - unsigned int options; + unsigned int flags; char* collection_name = NULL; int collection_name_length; int begin, cur_size, max_size = 0; int num_to_skip; int num_to_return; PyObject* query; - PyObject* field_selector = Py_None; - unsigned char uuid_subtype = 3; + PyObject* field_selector; + codec_options_t options; buffer_t buffer; int length_location, message_length; PyObject* result; - if (!PyArg_ParseTuple(args, "Iet#iiO|Ob", - &options, + if (!PyArg_ParseTuple(args, "Iet#iiOOO&", + &flags, "utf-8", &collection_name, &collection_name_length, &num_to_skip, &num_to_return, - &query, &field_selector, &uuid_subtype)) { + &query, &field_selector, + convert_codec_options, &options)) { return NULL; } buffer = buffer_new(); if (!buffer) { PyErr_NoMemory(); + destroy_codec_options(&options); PyMem_Free(collection_name); return NULL; } @@ -420,24 +444,27 @@ static PyObject* _cbson_query_message(PyObject* self, PyObject* args) { // save space for message length length_location = buffer_save_space(buffer, 4); if (length_location == -1) { + destroy_codec_options(&options); PyMem_Free(collection_name); PyErr_NoMemory(); return NULL; } if (!buffer_write_bytes(buffer, (const char*)&request_id, 4) || !buffer_write_bytes(buffer, "\x00\x00\x00\x00\xd4\x07\x00\x00", 8) || - !buffer_write_bytes(buffer, (const char*)&options, 4) || + !buffer_write_bytes(buffer, (const char*)&flags, 4) || !buffer_write_bytes(buffer, collection_name, collection_name_length + 1) || !buffer_write_bytes(buffer, (const char*)&num_to_skip, 4) || !buffer_write_bytes(buffer, (const char*)&num_to_return, 4)) { + destroy_codec_options(&options); buffer_free(buffer); PyMem_Free(collection_name); return NULL; } begin = buffer_get_position(buffer); - if (!write_dict(state->_cbson, buffer, query, 0, uuid_subtype, 1)) { + if (!write_dict(state->_cbson, buffer, query, 0, &options, 1)) { + destroy_codec_options(&options); buffer_free(buffer); PyMem_Free(collection_name); return NULL; @@ -446,7 +473,9 @@ static PyObject* _cbson_query_message(PyObject* self, PyObject* args) { if (field_selector != Py_None) { begin = buffer_get_position(buffer); - if (!write_dict(state->_cbson, buffer, field_selector, 0, uuid_subtype, 1)) { + if (!write_dict(state->_cbson, buffer, field_selector, 0, + &options, 1)) { + destroy_codec_options(&options); buffer_free(buffer); PyMem_Free(collection_name); return NULL; @@ -465,6 +494,7 @@ static PyObject* _cbson_query_message(PyObject* self, PyObject* args) { buffer_get_buffer(buffer), buffer_get_position(buffer), max_size); + destroy_codec_options(&options); buffer_free(buffer); return result; } @@ -550,12 +580,13 @@ _set_document_too_large(int size, long max) { static PyObject* _send_insert(PyObject* self, PyObject* client, PyObject* gle_args, buffer_t buffer, - char* coll_name, int coll_len, int request_id, int safe) { + char* coll_name, int coll_len, int request_id, int safe, + codec_options_t* options) { PyObject* result; if (safe) { if (!add_last_error(self, buffer, request_id, - coll_name, coll_len, gle_args)) { + coll_name, coll_len, options, gle_args)) { return NULL; } } @@ -573,7 +604,7 @@ static PyObject* _cbson_do_batched_insert(PyObject* self, PyObject* args) { /* NOTE just using a random number as the request_id */ int request_id = rand(); - int send_safe, options = 0; + int send_safe, flags = 0; int length_location, message_length; int collection_name_length; char* collection_name = NULL; @@ -588,32 +619,32 @@ static PyObject* _cbson_do_batched_insert(PyObject* self, PyObject* args) { unsigned char check_keys; unsigned char safe; unsigned char continue_on_error; - unsigned char uuid_subtype; + codec_options_t options; unsigned char empty = 1; long max_bson_size; long max_message_size; buffer_t buffer; PyObject *exc_type = NULL, *exc_value = NULL, *exc_trace = NULL; - if (!PyArg_ParseTuple(args, "et#ObbObbO", + if (!PyArg_ParseTuple(args, "et#ObbObO&O", "utf-8", &collection_name, &collection_name_length, &docs, &check_keys, &safe, &last_error_args, &continue_on_error, - &uuid_subtype, &client)) { + convert_codec_options, &options, + &client)) { return NULL; } if (continue_on_error) { - options += 1; + flags += 1; } /* * If we are doing unacknowledged writes *and* continue_on_error * is True it's pointless (and slower) to send GLE. */ send_safe = (safe || !continue_on_error); - max_bson_size_obj = PyObject_GetAttrString(client, "max_bson_size"); #if PY_MAJOR_VERSION >= 3 max_bson_size = PyLong_AsLong(max_bson_size_obj); @@ -622,6 +653,7 @@ static PyObject* _cbson_do_batched_insert(PyObject* self, PyObject* args) { #endif Py_XDECREF(max_bson_size_obj); if (max_bson_size == -1) { + destroy_codec_options(&options); PyMem_Free(collection_name); return NULL; } @@ -634,12 +666,14 @@ static PyObject* _cbson_do_batched_insert(PyObject* self, PyObject* args) { #endif Py_XDECREF(max_message_size_obj); if (max_message_size == -1) { + destroy_codec_options(&options); PyMem_Free(collection_name); return NULL; } buffer = buffer_new(); if (!buffer) { + destroy_codec_options(&options); PyErr_NoMemory(); PyMem_Free(collection_name); return NULL; @@ -647,7 +681,7 @@ static PyObject* _cbson_do_batched_insert(PyObject* self, PyObject* args) { length_location = init_insert_buffer(buffer, request_id, - options, + flags, collection_name, collection_name_length); if (length_location == -1) { @@ -666,7 +700,8 @@ static PyObject* _cbson_do_batched_insert(PyObject* self, PyObject* args) { while ((doc = PyIter_Next(iterator)) != NULL) { int before = buffer_get_position(buffer); int cur_size; - if (!write_dict(state->_cbson, buffer, doc, check_keys, uuid_subtype, 1)) { + if (!write_dict(state->_cbson, buffer, doc, check_keys, + &options, 1)) { Py_DECREF(doc); goto iterfail; } @@ -682,7 +717,7 @@ static PyObject* _cbson_do_batched_insert(PyObject* self, PyObject* args) { &message_length, 4); result = _send_insert(self, client, last_error_args, buffer, collection_name, collection_name_length, - request_id, send_safe); + request_id, send_safe, &options); if (!result) goto iterfail; Py_DECREF(result); @@ -703,7 +738,7 @@ static PyObject* _cbson_do_batched_insert(PyObject* self, PyObject* args) { } message_start = init_insert_buffer(new_buffer, new_request_id, - options, + flags, collection_name, collection_name_length); if (message_start == -1) { @@ -725,7 +760,7 @@ static PyObject* _cbson_do_batched_insert(PyObject* self, PyObject* args) { result = _send_insert(self, client, last_error_args, buffer, collection_name, collection_name_length, - request_id, send_safe); + request_id, send_safe, &options); buffer_free(buffer); buffer = new_buffer; @@ -796,7 +831,7 @@ static PyObject* _cbson_do_batched_insert(PyObject* self, PyObject* args) { /* Send the last (or only) batch */ result = _send_insert(self, client, last_error_args, buffer, collection_name, collection_name_length, - request_id, safe); + request_id, safe, &options); PyMem_Free(collection_name); buffer_free(buffer); @@ -877,7 +912,7 @@ _command_buffer_new(char* ns, int ns_len) { if (!buffer_write_bytes(buffer, "\x00\x00\x00\x00" /* responseTo */ "\xd4\x07\x00\x00" /* opcode */ - "\x00\x00\x00\x00", /* options */ + "\x00\x00\x00\x00", /* flags */ 12) || !buffer_write_bytes(buffer, ns, ns_len + 1) || /* namespace */ @@ -920,14 +955,15 @@ _cbson_do_batched_write_command(PyObject* self, PyObject* args) { PyObject* results; unsigned char op; unsigned char check_keys; - unsigned char uuid_subtype; + codec_options_t options; unsigned char empty = 1; unsigned char errors = 0; buffer_t buffer; - if (!PyArg_ParseTuple(args, "et#bOObbO", "utf-8", - &ns, &ns_len, &op, &command, &docs, - &check_keys, &uuid_subtype, &client)) { + if (!PyArg_ParseTuple(args, "et#bOObO&O", "utf-8", + &ns, &ns_len, &op, &command, &docs, &check_keys, + convert_codec_options, &options, + &client)) { return NULL; } @@ -939,6 +975,7 @@ _cbson_do_batched_write_command(PyObject* self, PyObject* args) { #endif Py_XDECREF(max_bson_size_obj); if (max_bson_size == -1) { + destroy_codec_options(&options); PyMem_Free(ns); return NULL; } @@ -956,6 +993,7 @@ _cbson_do_batched_write_command(PyObject* self, PyObject* args) { #endif Py_XDECREF(max_write_batch_size_obj); if (max_write_batch_size == -1) { + destroy_codec_options(&options); PyMem_Free(ns); return NULL; } @@ -964,11 +1002,13 @@ _cbson_do_batched_write_command(PyObject* self, PyObject* args) { ordered = !((PyDict_GetItemString(command, "ordered")) == Py_False); if (!(results = PyList_New(0))) { + destroy_codec_options(&options); PyMem_Free(ns); return NULL; } if (!(buffer = _command_buffer_new(ns, ns_len))) { + destroy_codec_options(&options); PyMem_Free(ns); Py_DECREF(results); return NULL; @@ -978,7 +1018,8 @@ _cbson_do_batched_write_command(PyObject* self, PyObject* args) { /* Position of command document length */ cmd_len_loc = buffer_get_position(buffer); - if (!write_dict(state->_cbson, buffer, command, 0, uuid_subtype, 0)) { + if (!write_dict(state->_cbson, buffer, command, 0, + &options, 0)) { goto cmdfail; } @@ -1051,7 +1092,7 @@ _cbson_do_batched_write_command(PyObject* self, PyObject* args) { } cur_doc_begin = buffer_get_position(buffer); if (!write_dict(state->_cbson, buffer, doc, - check_keys, uuid_subtype, 1)) { + check_keys, &options, 1)) { Py_DECREF(doc); goto cmditerfail; } @@ -1130,6 +1171,7 @@ _cbson_do_batched_write_command(PyObject* self, PyObject* args) { Py_DECREF(result); if (errors && ordered) { + destroy_codec_options(&options); Py_DECREF(iterator); buffer_free(buffer); return results; @@ -1174,11 +1216,13 @@ _cbson_do_batched_write_command(PyObject* self, PyObject* args) { PyList_Append(results, result); Py_DECREF(result); + destroy_codec_options(&options); return results; cmditerfail: Py_DECREF(iterator); cmdfail: + destroy_codec_options(&options); Py_DECREF(results); buffer_free(buffer); return NULL; diff --git a/pymongo/bulk.py b/pymongo/bulk.py index 571425cec..8e44bb06e 100644 --- a/pymongo/bulk.py +++ b/pymongo/bulk.py @@ -506,7 +506,6 @@ class _Bulk(object): def execute_command(self, generator, write_concern): """Execute using write commands. """ - uuid_representation = self.collection.codec_options.uuid_representation client = self.collection.database.connection # nModified is only reported for write commands, not legacy ops. full_result = { @@ -527,7 +526,7 @@ class _Bulk(object): results = _do_batched_write_command( self.namespace, run.op_type, cmd, - run.ops, True, uuid_representation, client) + run.ops, True, self.collection.codec_options, client) _merge_command(run, full_result, results) # We're supposed to continue if errors are @@ -584,10 +583,9 @@ class _Bulk(object): # We have to do this here since Collection.insert # throws away results and we need to check for jnote. client = self.collection.database.connection - uuid_representation = self.collection.codec_options.uuid_representation return client._send_message( insert(self.name, [operation], True, True, - write_concern, False, uuid_representation), True) + write_concern, False, self.collection.codec_options), True) def execute_legacy(self, generator, write_concern): """Execute using legacy wire protocol ops. diff --git a/pymongo/collection.py b/pymongo/collection.py index d03ac74b9..157e9f69b 100644 --- a/pymongo/collection.py +++ b/pymongo/collection.py @@ -470,7 +470,6 @@ class Collection(common.BaseObject): .. mongodoc:: insert """ client = self.database.connection - uuid_representation = self.codec_options.uuid_representation docs = doc_or_docs return_one = False if isinstance(docs, collections.MutableMapping): @@ -511,14 +510,14 @@ class Collection(common.BaseObject): command['writeConcern'] = concern results = message._do_batched_write_command( - self.database.name + ".$cmd", _INSERT, command, - gen(), check_keys, uuid_representation, client) + self.database.name + ".$cmd", _INSERT, command, + gen(), check_keys, self.codec_options, client) _check_write_command_response(results) else: # Legacy batched OP_INSERT message._do_batched_insert(self.__full_name, gen(), check_keys, safe, concern, continue_on_error, - uuid_representation, client) + self.codec_options, client) if return_one: return ids[0] @@ -641,7 +640,6 @@ class Collection(common.BaseObject): check_keys = False client = self.database.connection - uuid_representation = self.codec_options.uuid_representation if client._writable_max_wire_version() > 1 and safe: # Update command command = SON([('update', self.name)]) @@ -653,7 +651,7 @@ class Collection(common.BaseObject): results = message._do_batched_write_command( self.database.name + '.$cmd', _UPDATE, command, - docs, check_keys, uuid_representation, client) + docs, check_keys, self.codec_options, client) _check_write_command_response(results) _, result = results[0] @@ -674,7 +672,7 @@ class Collection(common.BaseObject): return client._send_message( message.update(self.__full_name, upsert, multi, spec, document, safe, concern, - check_keys, uuid_representation), safe) + check_keys, self.codec_options), safe) def drop(self): """Alias for :meth:`~pymongo.database.Database.drop_collection`. @@ -754,7 +752,6 @@ class Collection(common.BaseObject): safe = concern.get("w") != 0 client = self.database.connection - uuid_representation = self.codec_options.uuid_representation if client._writable_max_wire_version() > 1 and safe: # Delete command command = SON([('delete', self.name)]) @@ -765,7 +762,7 @@ class Collection(common.BaseObject): results = message._do_batched_write_command( self.database.name + '.$cmd', _DELETE, command, - docs, False, uuid_representation, client) + docs, False, self.codec_options, client) _check_write_command_response(results) _, result = results[0] @@ -775,7 +772,7 @@ class Collection(common.BaseObject): # Legacy OP_DELETE return client._send_message( message.delete(self.__full_name, spec_or_id, safe, - concern, uuid_representation, + concern, self.codec_options, int(not multi)), safe) def find_one(self, filter=None, *args, **kwargs): diff --git a/pymongo/cursor.py b/pymongo/cursor.py index 0c9bb2c09..b983ccde4 100644 --- a/pymongo/cursor.py +++ b/pymongo/cursor.py @@ -912,7 +912,7 @@ class Cursor(object): self.__collection.full_name, self.__skip, ntoreturn, self.__query_spec(), self.__projection, - self.__codec_options.uuid_representation)) + self.__codec_options)) if not self.__id: self.__killed = True elif self.__id: # Get More diff --git a/pymongo/helpers.py b/pymongo/helpers.py index 0d5564647..1d1cf15a5 100644 --- a/pymongo/helpers.py +++ b/pymongo/helpers.py @@ -115,10 +115,7 @@ def _unpack_response(response, cursor_id=None, codec_options=CodecOptions()): result["cursor_id"] = struct.unpack(" client.max_bson_size) @@ -274,7 +279,7 @@ if _use_c: def _do_batched_write_command(namespace, operation, command, - docs, check_keys, uuid_subtype, client): + docs, check_keys, opts, client): """Execute a batch of insert, update, or delete commands. """ max_bson_size = client.max_bson_size @@ -350,7 +355,7 @@ def _do_batched_write_command(namespace, operation, command, has_docs = True # Encode the current operation key = b(str(idx)) - value = bson.BSON.encode(doc, check_keys, uuid_subtype) + value = bson.BSON.encode(doc, check_keys, opts) # Send a batch? enough_data = (buf.tell() + len(key) + len(value) + 2) >= max_cmd_size enough_documents = (idx >= max_write_batch_size) diff --git a/pymongo/monitor.py b/pymongo/monitor.py index 8cbbd8d69..e0b3ff1ca 100644 --- a/pymongo/monitor.py +++ b/pymongo/monitor.py @@ -16,6 +16,7 @@ import weakref +from bson.codec_options import DEFAULT_CODEC_OPTIONS from pymongo import common, helpers, message, periodic_executor from pymongo.server_type import SERVER_TYPE from pymongo.ismaster import IsMaster @@ -154,7 +155,8 @@ class Monitor(object): """ start = _time() request_id, msg, _ = message.query( - 0, 'admin.$cmd', 0, -1, {'ismaster': 1}) + 0, 'admin.$cmd', 0, -1, {'ismaster': 1}, + None, DEFAULT_CODEC_OPTIONS) sock_info.send_message(msg) raw_response = sock_info.receive_message(1, request_id) diff --git a/pymongo/network.py b/pymongo/network.py index 29844e7fb..9848259c9 100644 --- a/pymongo/network.py +++ b/pymongo/network.py @@ -17,6 +17,7 @@ import select import struct +from bson.codec_options import DEFAULT_CODEC_OPTIONS from pymongo import helpers, message from pymongo.errors import AutoReconnect @@ -32,7 +33,8 @@ def command(sock, dbname, spec): - `spec`: a command document as a dict, SON, or mapping object """ ns = dbname + '.$cmd' - request_id, msg, _ = message.query(0, ns, 0, -1, spec) + request_id, msg, _ = message.query(0, ns, 0, -1, spec, + None, DEFAULT_CODEC_OPTIONS) sock.sendall(msg) response = receive_message(sock, 1, request_id) unpacked = helpers._unpack_response(response)['data'][0] diff --git a/test/test_binary.py b/test/test_binary.py index a77a5c6b0..cd94df048 100644 --- a/test/test_binary.py +++ b/test/test_binary.py @@ -132,44 +132,55 @@ class TestBinary(unittest.TestCase): def test_legacy_java_uuid(self): # Test decoding data = self.java_data - docs = bson.decode_all(data, SON, False, PYTHON_LEGACY) + docs = bson.decode_all(data, CodecOptions(SON, False, PYTHON_LEGACY)) for d in docs: self.assertNotEqual(d['newguid'], uuid.UUID(d['newguidstring'])) - docs = bson.decode_all(data, SON, False, STANDARD) + docs = bson.decode_all(data, CodecOptions(SON, False, STANDARD)) for d in docs: self.assertNotEqual(d['newguid'], uuid.UUID(d['newguidstring'])) - docs = bson.decode_all(data, SON, False, CSHARP_LEGACY) + docs = bson.decode_all(data, CodecOptions(SON, False, CSHARP_LEGACY)) for d in docs: self.assertNotEqual(d['newguid'], uuid.UUID(d['newguidstring'])) - docs = bson.decode_all(data, SON, False, JAVA_LEGACY) + docs = bson.decode_all(data, CodecOptions(SON, False, JAVA_LEGACY)) for d in docs: self.assertEqual(d['newguid'], uuid.UUID(d['newguidstring'])) # Test encoding - encoded = b''.join([bson.BSON.encode(doc, - uuid_subtype=PYTHON_LEGACY) - for doc in docs]) + encoded = b''.join([ + bson.BSON.encode(doc, + False, + CodecOptions(uuid_representation=PYTHON_LEGACY)) + for doc in docs]) self.assertNotEqual(data, encoded) - encoded = b''.join([bson.BSON.encode(doc, uuid_subtype=STANDARD) - for doc in docs]) + encoded = b''.join( + [bson.BSON.encode(doc, + False, + CodecOptions(uuid_representation=STANDARD)) + for doc in docs]) self.assertNotEqual(data, encoded) - encoded = b''.join([bson.BSON.encode(doc, uuid_subtype=CSHARP_LEGACY) - for doc in docs]) + encoded = b''.join( + [bson.BSON.encode(doc, + False, + CodecOptions(uuid_representation=CSHARP_LEGACY)) + for doc in docs]) self.assertNotEqual(data, encoded) - encoded = b''.join([bson.BSON.encode(doc, uuid_subtype=JAVA_LEGACY) - for doc in docs]) + encoded = b''.join( + [bson.BSON.encode(doc, + False, + CodecOptions(uuid_representation=JAVA_LEGACY)) + for doc in docs]) self.assertEqual(data, encoded) @client_context.require_connection def test_legacy_java_uuid_roundtrip(self): data = self.java_data - docs = bson.decode_all(data, SON, False, JAVA_LEGACY) + docs = bson.decode_all(data, CodecOptions(SON, False, JAVA_LEGACY)) client_context.client.pymongo_test.drop_collection('java_uuid') db = client_context.client.pymongo_test @@ -191,44 +202,55 @@ class TestBinary(unittest.TestCase): data = self.csharp_data # Test decoding - docs = bson.decode_all(data, SON, False, PYTHON_LEGACY) + docs = bson.decode_all(data, CodecOptions(SON, False, PYTHON_LEGACY)) for d in docs: self.assertNotEqual(d['newguid'], uuid.UUID(d['newguidstring'])) - docs = bson.decode_all(data, SON, False, STANDARD) + docs = bson.decode_all(data, CodecOptions(SON, False, STANDARD)) for d in docs: self.assertNotEqual(d['newguid'], uuid.UUID(d['newguidstring'])) - docs = bson.decode_all(data, SON, False, JAVA_LEGACY) + docs = bson.decode_all(data, CodecOptions(SON, False, JAVA_LEGACY)) for d in docs: self.assertNotEqual(d['newguid'], uuid.UUID(d['newguidstring'])) - docs = bson.decode_all(data, SON, False, CSHARP_LEGACY) + docs = bson.decode_all(data, CodecOptions(SON, False, CSHARP_LEGACY)) for d in docs: self.assertEqual(d['newguid'], uuid.UUID(d['newguidstring'])) # Test encoding - encoded = b''.join([bson.BSON.encode(doc, - uuid_subtype=PYTHON_LEGACY) - for doc in docs]) + encoded = b''.join([ + bson.BSON.encode(doc, + False, + CodecOptions(uuid_representation=PYTHON_LEGACY)) + for doc in docs]) self.assertNotEqual(data, encoded) - encoded = b''.join([bson.BSON.encode(doc, uuid_subtype=STANDARD) - for doc in docs]) + encoded = b''.join([ + bson.BSON.encode(doc, + False, + CodecOptions(uuid_representation=STANDARD)) + for doc in docs]) self.assertNotEqual(data, encoded) - encoded = b''.join([bson.BSON.encode(doc, uuid_subtype=JAVA_LEGACY) - for doc in docs]) + encoded = b''.join( + [bson.BSON.encode(doc, + False, + CodecOptions(uuid_representation=JAVA_LEGACY)) + for doc in docs]) self.assertNotEqual(data, encoded) - encoded = b''.join([bson.BSON.encode(doc, uuid_subtype=CSHARP_LEGACY) - for doc in docs]) + encoded = b''.join( + [bson.BSON.encode(doc, + False, + CodecOptions(uuid_representation=CSHARP_LEGACY)) + for doc in docs]) self.assertEqual(data, encoded) @client_context.require_connection def test_legacy_csharp_uuid_roundtrip(self): data = self.csharp_data - docs = bson.decode_all(data, SON, False, CSHARP_LEGACY) + docs = bson.decode_all(data, CodecOptions(SON, False, CSHARP_LEGACY)) client_context.client.pymongo_test.drop_collection('csharp_uuid') db = client_context.client.pymongo_test diff --git a/test/test_bson.py b/test/test_bson.py index 888875e12..c3903eb8b 100644 --- a/test/test_bson.py +++ b/test/test_bson.py @@ -33,6 +33,7 @@ from bson import (BSON, Regex) from bson.binary import Binary, UUIDLegacy from bson.code import Code +from bson.codec_options import CodecOptions from bson.int64 import Int64 from bson.objectid import ObjectId from bson.dbref import DBRef @@ -130,7 +131,8 @@ class TestBSON(unittest.TestCase): helper({"$field": Code("return function(){ return x; }", scope={'x': False})}) def encode_then_decode(doc): - return doc_class(doc) == BSON.encode(doc).decode(as_class=doc_class) + return doc_class(doc) == BSON.encode(doc).decode( + CodecOptions(as_class=doc_class)) qcheck.check_unittest(self, encode_then_decode, qcheck.gen_mongo_dict(3)) @@ -425,7 +427,8 @@ class TestBSON(unittest.TestCase): as_utc = (aware - aware.utcoffset()).replace(tzinfo=utc) self.assertEqual(datetime.datetime(1993, 4, 3, 16, 45, tzinfo=utc), as_utc) - after = BSON.encode({"date": aware}).decode(tz_aware=True)["date"] + after = BSON.encode({"date": aware}).decode( + CodecOptions(tz_aware=True))["date"] self.assertEqual(utc, after.tzinfo) self.assertEqual(as_utc, after) @@ -584,14 +587,19 @@ class TestBSON(unittest.TestCase): raise def test_custom_class(self): - self.assertTrue(isinstance(BSON.encode({}).decode(), dict)) - self.assertFalse(isinstance(BSON.encode({}).decode(), SON)) - self.assertTrue(isinstance(BSON.encode({}).decode(SON), SON)) + self.assertIsInstance(BSON.encode({}).decode(), dict) + self.assertNotIsInstance(BSON.encode({}).decode(), SON) + self.assertIsInstance( + BSON.encode({}).decode(CodecOptions(as_class=SON)), + SON) - self.assertEqual(1, BSON.encode({"x": 1}).decode(SON)["x"]) + self.assertEqual( + 1, + BSON.encode({"x": 1}).decode(CodecOptions(as_class=SON))["x"]) x = BSON.encode({"x": [{"y": 1}]}) - self.assertTrue(isinstance(x.decode(SON)["x"][0], SON)) + self.assertIsInstance(x.decode(CodecOptions(as_class=SON))["x"][0], + SON) def test_subclasses(self): # make sure we can serialize subclasses of native Python types. @@ -620,7 +628,9 @@ class TestBSON(unittest.TestCase): except ImportError: raise SkipTest("No OrderedDict") d = OrderedDict([("one", 1), ("two", 2), ("three", 3), ("four", 4)]) - self.assertEqual(d, BSON.encode(d).decode(as_class=OrderedDict)) + self.assertEqual( + d, + BSON.encode(d).decode(CodecOptions(as_class=OrderedDict))) def test_bson_regex(self): # Invalid Python regex, though valid PCRE. @@ -752,5 +762,41 @@ class TestBSON(unittest.TestCase): {"_id": {'$oid': "52d0b971b3ba219fdeb4170e"}}, True) BSON.encode({"_id": {'$oid': "52d0b971b3ba219fdeb4170e"}}) + +class TestCodecOptions(unittest.TestCase): + def test_as_class(self): + self.assertRaises(TypeError, CodecOptions, as_class=object) + self.assertIs(SON, CodecOptions(as_class=SON).as_class) + + def test_tz_aware(self): + self.assertRaises(TypeError, CodecOptions, tz_aware=1) + self.assertFalse(CodecOptions().tz_aware) + self.assertTrue(CodecOptions(tz_aware=True).tz_aware) + + def test_uuid_representation(self): + self.assertRaises(ValueError, CodecOptions, uuid_representation=None) + self.assertRaises(ValueError, CodecOptions, uuid_representation=7) + self.assertRaises(ValueError, CodecOptions, uuid_representation=2) + + def test_codec_options_repr(self): + r = ('CodecOptions(as_class=dict, tz_aware=False, ' + 'uuid_representation=PYTHON_LEGACY)') + self.assertEqual(r, repr(CodecOptions())) + + def test_decode_all_defaults(self): + # Test decode_all()'s default as_class is dict and tz_aware is False. + # The default uuid_representation is PYTHON_LEGACY but this decodes + # same as STANDARD, so all this test proves about UUID decoding is + # that it's not CSHARP_LEGACY or JAVA_LEGACY. + doc = {'sub_document': {}, + 'uuid': uuid.uuid4(), + 'dt': datetime.datetime.utcnow()} + + decoded = bson.decode_all(bson.BSON.encode(doc))[0] + self.assertIsInstance(decoded['sub_document'], dict) + self.assertEqual(decoded['uuid'], doc['uuid']) + self.assertIsNone(decoded['dt'].tzinfo) + + if __name__ == "__main__": unittest.main() diff --git a/test/test_common.py b/test/test_common.py index a82a04e9d..bda81d8d6 100644 --- a/test/test_common.py +++ b/test/test_common.py @@ -41,9 +41,6 @@ class TestCommon(IntegrationTest): coll = self.db.uuid coll.drop() - self.assertRaises(ValueError, CodecOptions, uuid_representation=7) - self.assertRaises(ValueError, CodecOptions, uuid_representation=2) - # Test property self.assertEqual(PYTHON_LEGACY, coll.codec_options.uuid_representation)