diff --git a/bson/__init__.py b/bson/__init__.py index a9e214523..3d8a416c3 100644 --- a/bson/__init__.py +++ b/bson/__init__.py @@ -31,7 +31,8 @@ from bson.binary import (Binary, OLD_UUID_SUBTYPE, JAVA_LEGACY, CSHARP_LEGACY, UUIDLegacy) from bson.code import Code -from bson.codec_options import CodecOptions, DEFAULT_CODEC_OPTIONS +from bson.codec_options import ( + CodecOptions, DEFAULT_CODEC_OPTIONS, _raw_document_class) from bson.dbref import DBRef from bson.errors import (InvalidBSON, InvalidDocument, @@ -132,6 +133,10 @@ def _get_object(data, position, obj_end, opts): raise InvalidBSON("bad eoo") if end >= obj_end: raise InvalidBSON("invalid object length") + if _raw_document_class(opts.document_class): + return (opts.document_class(data[position:end + 1], opts), + position + obj_size) + obj = _elements_to_dict(data, position + 4, end, opts) position += obj_size @@ -147,6 +152,7 @@ def _get_array(data, position, obj_end, opts): end = position + size - 1 if data[end:end + 1] != b"\x00": raise InvalidBSON("bad eoo") + position += 4 end -= 1 result = [] @@ -304,14 +310,21 @@ def _element_to_dict(data, position, obj_end, opts): value, position = _ELEMENT_GETTER[element_type](data, position, obj_end, opts) return element_name, value, position +if _USE_C: + _element_to_dict = _cbson._element_to_dict + + +def _iterate_elements(data, position, obj_end, opts): + end = obj_end - 1 + while position < end: + (key, value, position) = _element_to_dict(data, position, obj_end, opts) + yield key, value def _elements_to_dict(data, position, obj_end, opts): """Decode a BSON document.""" result = opts.document_class() - end = obj_end - 1 - while position < end: - (key, value, position) = _element_to_dict(data, position, obj_end, opts) + for key, value in _iterate_elements(data, position, obj_end, opts): result[key] = value return result @@ -327,6 +340,8 @@ def _bson_to_dict(data, opts): if data[obj_size - 1:obj_size] != b"\x00": raise InvalidBSON("bad eoo") try: + if _raw_document_class(opts.document_class): + return opts.document_class(data, opts) return _elements_to_dict(data, 4, obj_size - 1, opts) except InvalidBSON: raise @@ -429,6 +444,8 @@ else: def _encode_mapping(name, value, check_keys, opts): """Encode a mapping type.""" + if _raw_document_class(value): + return b'\x03' + name + value.raw data = b"".join([_element_to_bson(key, val, check_keys, opts) for key, val in iteritems(value)]) return b"\x03" + name + _PACK_INT(len(data) + 5) + data + b"\x00" @@ -694,6 +711,8 @@ def _element_to_bson(key, value, check_keys, opts): def _dict_to_bson(doc, check_keys, opts, top_level=True): """Encode a document to BSON.""" + if _raw_document_class(doc): + return doc.raw try: elements = [] if top_level and "_id" in doc: @@ -751,6 +770,7 @@ def decode_all(data, codec_options=DEFAULT_CODEC_OPTIONS): docs = [] position = 0 end = len(data) - 1 + use_raw = _raw_document_class(codec_options.document_class) try: while position < end: obj_size = _UNPACK_INT(data[position:position + 4])[0] @@ -759,10 +779,15 @@ def decode_all(data, codec_options=DEFAULT_CODEC_OPTIONS): obj_end = position + obj_size - 1 if data[obj_end:position + obj_size] != b"\x00": raise InvalidBSON("bad eoo") - docs.append(_elements_to_dict(data, - position + 4, - obj_end, - codec_options)) + if use_raw: + docs.append( + codec_options.document_class( + data[position:obj_end + 1], codec_options)) + else: + docs.append(_elements_to_dict(data, + position + 4, + obj_end, + codec_options)) position += obj_size return docs except InvalidBSON: diff --git a/bson/_cbsonmodule.c b/bson/_cbsonmodule.c index fd6baf0ce..c4ddd6529 100644 --- a/bson/_cbsonmodule.c +++ b/bson/_cbsonmodule.c @@ -51,6 +51,7 @@ struct module_state { PyTypeObject* REType; PyObject* BSONInt64; PyObject* Mapping; + PyObject* CodecOptions; }; /* The Py_TYPE macro was introduced in CPython 2.6 */ @@ -110,49 +111,9 @@ _downcast_and_check(Py_ssize_t size, int extra) { return (int)size + extra; } -/* Fill out a codec_options_t* from a CodecOptions object. Use with the "O&" - * format spec in PyArg_ParseTuple. - * - * Return 1 on success. options->document_class is a new reference. - * Return 0 on failure. - */ -int convert_codec_options(PyObject* options_obj, void* p) { - codec_options_t* options = (codec_options_t*)p; - options->unicode_decode_error_handler = NULL; - if (!PyArg_ParseTuple(options_obj, "ObbzO", - &options->document_class, - &options->tz_aware, - &options->uuid_rep, - &options->unicode_decode_error_handler, - &options->tzinfo)) { - return 0; - } - - Py_INCREF(options->document_class); - Py_INCREF(options->tzinfo); - return 1; -} - -/* Fill out a codec_options_t* with default options. */ -void default_codec_options(codec_options_t* options) { - options->document_class = (PyObject*)&PyDict_Type; - Py_INCREF(options->document_class); - - // TODO: set to "1". PYTHON-526, setting tz_aware=True by default. - options->tz_aware = 0; - options->uuid_rep = PYTHON_LEGACY; - options->unicode_decode_error_handler = NULL; - options->tzinfo = Py_None; - Py_INCREF(options->tzinfo); -} - -void destroy_codec_options(codec_options_t* options) { - Py_CLEAR(options->document_class); - Py_CLEAR(options->tzinfo); -} - static PyObject* elements_to_dict(PyObject* self, const char* string, - unsigned max, const codec_options_t* options); + unsigned max, + const codec_options_t* options); static int _write_element_to_buffer(PyObject* self, buffer_t buffer, int type_byte, PyObject* value, @@ -377,7 +338,8 @@ static int _load_python_objects(PyObject* module) { _load_object(&state->Regex, "bson.regex", "Regex") || _load_object(&state->BSONInt64, "bson.int64", "Int64") || _load_object(&state->UUID, "uuid", "UUID") || - _load_object(&state->Mapping, "collections", "Mapping")) { + _load_object(&state->Mapping, "collections", "Mapping") || + _load_object(&state->CodecOptions, "bson.codec_options", "CodecOptions")) { return 1; } /* Reload our REType hack too. */ @@ -410,6 +372,101 @@ static int _load_python_objects(PyObject* module) { return 0; } +/* + * Get the _type_marker from an Object. + * + * Return the type marker, 0 if there is no marker, or -1 on failure. + */ +static long _type_marker(PyObject* object) { + PyObject* type_marker = NULL; + long type = 0; + + if (PyObject_HasAttrString(object, "_type_marker")) { + type_marker = PyObject_GetAttrString(object, "_type_marker"); + if (type_marker == NULL) { + return -1; + } + } + + /* + * Python objects with broken __getattr__ implementations could return + * arbitrary types for a call to PyObject_GetAttrString. For example + * pymongo.database.Database returns a new Collection instance for + * __getattr__ calls with names that don't match an existing attribute + * or method. In some cases "value" could be a subtype of something + * we know how to serialize. Make a best effort to encode these types. + */ +#if PY_MAJOR_VERSION >= 3 + if (type_marker && PyLong_CheckExact(type_marker)) { + type = PyLong_AsLong(type_marker); +#else + if (type_marker && PyInt_CheckExact(type_marker)) { + type = PyInt_AsLong(type_marker); +#endif + Py_DECREF(type_marker); + /* + * Py(Long|Int)_AsLong returns -1 for error but -1 is a valid value + * so we call PyErr_Occurred to differentiate. + */ + if (type == -1 && PyErr_Occurred()) { + return -1; + } + } else { + Py_XDECREF(type_marker); + } + + return type; +} + +/* Fill out a codec_options_t* from a CodecOptions object. Use with the "O&" + * format spec in PyArg_ParseTuple. + * + * Return 1 on success. options->document_class is a new reference. + * Return 0 on failure. + */ +int convert_codec_options(PyObject* options_obj, void* p) { + codec_options_t* options = (codec_options_t*)p; + long type_marker; + options->unicode_decode_error_handler = NULL; + if (!PyArg_ParseTuple(options_obj, "ObbzO", + &options->document_class, + &options->tz_aware, + &options->uuid_rep, + &options->unicode_decode_error_handler, + &options->tzinfo)) { + return 0; + } + + type_marker = _type_marker(options->document_class); + if (type_marker < 0) return 0; + + Py_INCREF(options->document_class); + Py_INCREF(options->tzinfo); + options->options_obj = options_obj; + Py_INCREF(options->options_obj); + options->is_raw_bson = (101 == type_marker); + return 1; +} + +/* Fill out a codec_options_t* with default options. + * + * Return 1 on success. + * Return 0 on failure. + */ +int default_codec_options(struct module_state* state, codec_options_t* options) { + PyObject* codec_options_func = _get_object( + state->CodecOptions, "bson.codec_options", "CodecOptions"); + PyObject* options_obj = PyObject_CallFunctionObjArgs( + codec_options_func, NULL); + return convert_codec_options(options_obj, options); +} + +void destroy_codec_options(codec_options_t* options) { + Py_CLEAR(options->document_class); + Py_CLEAR(options->tzinfo); + Py_CLEAR(options->options_obj); +} + static int write_element_to_buffer(PyObject* self, buffer_t buffer, int type_byte, PyObject* value, unsigned char check_keys, @@ -604,270 +661,279 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer, unsigned char check_keys, const codec_options_t* options) { struct module_state *state = GETSTATE(self); - PyObject* type_marker = NULL; PyObject* mapping_type; PyObject* uuid_type; - /* * Don't use PyObject_IsInstance for our custom types. It causes * problems with python sub interpreters. Our custom types should * have a _type_marker attribute, which we can switch on instead. */ - if (PyObject_HasAttrString(value, "_type_marker")) { - type_marker = PyObject_GetAttrString(value, "_type_marker"); - if (type_marker == NULL) { - return 0; - } + long type = _type_marker(value); + if (type < 0) { + return 0; } - /* - * Python objects with broken __getattr__ implementations could return - * arbitrary types for a call to PyObject_GetAttrString. For example - * pymongo.database.Database returns a new Collection instance for - * __getattr__ calls with names that don't match an existing attribute - * or method. In some cases "value" could be a subtype of something - * we know how to serialize. Make a best effort to encode these types. - */ -#if PY_MAJOR_VERSION >= 3 - if (type_marker && PyLong_CheckExact(type_marker)) { - long type = PyLong_AsLong(type_marker); -#else - if (type_marker && PyInt_CheckExact(type_marker)) { - long type = PyInt_AsLong(type_marker); -#endif - Py_DECREF(type_marker); - /* - * Py(Long|Int)_AsLong returns -1 for error but -1 is a valid value - * so we call PyErr_Occurred to differentiate. - */ - if (type == -1 && PyErr_Occurred()) { - return 0; - } - switch (type) { - case 5: - { - /* Binary */ - PyObject* subtype_object; - long subtype; - const char* data; - int size; - *(buffer_get_buffer(buffer) + type_byte) = 0x05; - subtype_object = PyObject_GetAttrString(value, "subtype"); - if (!subtype_object) { - return 0; - } -#if PY_MAJOR_VERSION >= 3 - subtype = PyLong_AsLong(subtype_object); -#else - subtype = PyInt_AsLong(subtype_object); -#endif - if (subtype == -1) { - Py_DECREF(subtype_object); - return 0; - } -#if PY_MAJOR_VERSION >= 3 - size = _downcast_and_check(PyBytes_Size(value), 0); -#else - size = _downcast_and_check(PyString_Size(value), 0); -#endif - if (size == -1) { - Py_DECREF(subtype_object); - return 0; - } + switch (type) { + case 5: + { + /* Binary */ + PyObject* subtype_object; + long subtype; + const char* data; + int size; + *(buffer_get_buffer(buffer) + type_byte) = 0x05; + subtype_object = PyObject_GetAttrString(value, "subtype"); + if (!subtype_object) { + return 0; + } +#if PY_MAJOR_VERSION >= 3 + subtype = PyLong_AsLong(subtype_object); +#else + subtype = PyInt_AsLong(subtype_object); +#endif + if (subtype == -1) { Py_DECREF(subtype_object); - if (subtype == 2) { + return 0; + } #if PY_MAJOR_VERSION >= 3 - int other_size = _downcast_and_check(PyBytes_Size(value), 4); + size = _downcast_and_check(PyBytes_Size(value), 0); #else - int other_size = _downcast_and_check(PyString_Size(value), 4); + size = _downcast_and_check(PyString_Size(value), 0); #endif - if (other_size == -1) - return 0; - if (!buffer_write_bytes(buffer, (const char*)&other_size, 4)) { - return 0; - } - if (!buffer_write_bytes(buffer, (const char*)&subtype, 1)) { - return 0; - } - } - if (!buffer_write_bytes(buffer, (const char*)&size, 4)) { - return 0; - } - if (subtype != 2) { - if (!buffer_write_bytes(buffer, (const char*)&subtype, 1)) { - return 0; - } - } + if (size == -1) { + Py_DECREF(subtype_object); + return 0; + } + + Py_DECREF(subtype_object); + if (subtype == 2) { #if PY_MAJOR_VERSION >= 3 - data = PyBytes_AsString(value); + int other_size = _downcast_and_check(PyBytes_Size(value), 4); #else - data = PyString_AsString(value); + int other_size = _downcast_and_check(PyString_Size(value), 4); #endif - if (!data) { + if (other_size == -1) + return 0; + if (!buffer_write_bytes(buffer, (const char*)&other_size, 4)) { return 0; } - if (!buffer_write_bytes(buffer, data, size)) { - return 0; + if (!buffer_write_bytes(buffer, (const char*)&subtype, 1)) { + return 0; } - return 1; } - case 7: - { - /* ObjectId */ - const char* data; - PyObject* pystring = PyObject_GetAttrString(value, "_ObjectId__id"); - if (!pystring) { + if (!buffer_write_bytes(buffer, (const char*)&size, 4)) { + return 0; + } + if (subtype != 2) { + if (!buffer_write_bytes(buffer, (const char*)&subtype, 1)) { return 0; } + } #if PY_MAJOR_VERSION >= 3 - data = PyBytes_AsString(pystring); + data = PyBytes_AsString(value); #else - data = PyString_AsString(pystring); + data = PyString_AsString(value); #endif - if (!data) { - Py_DECREF(pystring); - return 0; - } - if (!buffer_write_bytes(buffer, data, 12)) { - Py_DECREF(pystring); - return 0; - } - Py_DECREF(pystring); - *(buffer_get_buffer(buffer) + type_byte) = 0x07; - return 1; + if (!data) { + return 0; } - case 11: - { - /* Regex */ - return _write_regex_to_buffer(buffer, type_byte, value); - } - case 13: - { - /* Code */ - int start_position, - length_location, - length; - - PyObject* scope = PyObject_GetAttrString(value, "scope"); - if (!scope) { - return 0; - } - - if (!PyDict_Size(scope)) { - Py_DECREF(scope); - *(buffer_get_buffer(buffer) + type_byte) = 0x0D; - return write_string(buffer, value); - } - - *(buffer_get_buffer(buffer) + type_byte) = 0x0F; - - start_position = buffer_get_position(buffer); - /* save space for length */ - length_location = buffer_save_space(buffer, 4); - if (length_location == -1) { - PyErr_NoMemory(); - Py_DECREF(scope); - return 0; - } - - if (!write_string(buffer, value)) { - Py_DECREF(scope); - return 0; - } - - if (!write_dict(self, buffer, scope, 0, options, 0)) { - Py_DECREF(scope); - return 0; - } - Py_DECREF(scope); - - length = buffer_get_position(buffer) - start_position; - memcpy(buffer_get_buffer(buffer) + length_location, &length, 4); - return 1; - } - case 17: - { - /* Timestamp */ - PyObject* obj; - long i; - - obj = PyObject_GetAttrString(value, "inc"); - if (!obj) { - return 0; - } -#if PY_MAJOR_VERSION >= 3 - i = PyLong_AsLong(obj); -#else - i = PyInt_AsLong(obj); -#endif - Py_DECREF(obj); - if (!buffer_write_bytes(buffer, (const char*)&i, 4)) { - return 0; - } - - obj = PyObject_GetAttrString(value, "time"); - if (!obj) { - return 0; - } -#if PY_MAJOR_VERSION >= 3 - i = PyLong_AsLong(obj); -#else - i = PyInt_AsLong(obj); -#endif - Py_DECREF(obj); - if (!buffer_write_bytes(buffer, (const char*)&i, 4)) { - return 0; - } - - *(buffer_get_buffer(buffer) + type_byte) = 0x11; - return 1; - } - case 18: - { - /* Int64 */ - const long long ll = PyLong_AsLongLong(value); - if (PyErr_Occurred()) { /* Overflow */ - PyErr_SetString(PyExc_OverflowError, - "MongoDB can only handle up to 8-byte ints"); - return 0; - } - if (!buffer_write_bytes(buffer, (const char*)&ll, 8)) { - return 0; - } - *(buffer_get_buffer(buffer) + type_byte) = 0x12; - return 1; - } - case 100: - { - /* DBRef */ - PyObject* as_doc = PyObject_CallMethod(value, "as_doc", NULL); - if (!as_doc) { - return 0; - } - if (!write_dict(self, buffer, as_doc, 0, options, 0)) { - Py_DECREF(as_doc); - return 0; - } - Py_DECREF(as_doc); - *(buffer_get_buffer(buffer) + type_byte) = 0x03; - return 1; - } - case 255: - { - /* MinKey */ - *(buffer_get_buffer(buffer) + type_byte) = 0xFF; - return 1; - } - case 127: - { - /* MaxKey */ - *(buffer_get_buffer(buffer) + type_byte) = 0x7F; - return 1; + if (!buffer_write_bytes(buffer, data, size)) { + return 0; } + return 1; + } + case 7: + { + /* ObjectId */ + const char* data; + PyObject* pystring = PyObject_GetAttrString(value, "_ObjectId__id"); + if (!pystring) { + return 0; + } +#if PY_MAJOR_VERSION >= 3 + data = PyBytes_AsString(pystring); +#else + data = PyString_AsString(pystring); +#endif + if (!data) { + Py_DECREF(pystring); + return 0; + } + if (!buffer_write_bytes(buffer, data, 12)) { + Py_DECREF(pystring); + return 0; + } + Py_DECREF(pystring); + *(buffer_get_buffer(buffer) + type_byte) = 0x07; + return 1; + } + case 11: + { + /* Regex */ + return _write_regex_to_buffer(buffer, type_byte, value); + } + case 13: + { + /* Code */ + int start_position, + length_location, + length; + + PyObject* scope = PyObject_GetAttrString(value, "scope"); + if (!scope) { + return 0; + } + + if (!PyDict_Size(scope)) { + Py_DECREF(scope); + *(buffer_get_buffer(buffer) + type_byte) = 0x0D; + return write_string(buffer, value); + } + + *(buffer_get_buffer(buffer) + type_byte) = 0x0F; + + start_position = buffer_get_position(buffer); + /* save space for length */ + length_location = buffer_save_space(buffer, 4); + if (length_location == -1) { + PyErr_NoMemory(); + Py_DECREF(scope); + return 0; + } + + if (!write_string(buffer, value)) { + Py_DECREF(scope); + return 0; + } + + if (!write_dict(self, buffer, scope, 0, options, 0)) { + Py_DECREF(scope); + return 0; + } + Py_DECREF(scope); + + length = buffer_get_position(buffer) - start_position; + memcpy(buffer_get_buffer(buffer) + length_location, &length, 4); + return 1; + } + case 17: + { + /* Timestamp */ + PyObject* obj; + long i; + + obj = PyObject_GetAttrString(value, "inc"); + if (!obj) { + return 0; + } +#if PY_MAJOR_VERSION >= 3 + i = PyLong_AsLong(obj); +#else + i = PyInt_AsLong(obj); +#endif + Py_DECREF(obj); + if (!buffer_write_bytes(buffer, (const char*)&i, 4)) { + return 0; + } + + obj = PyObject_GetAttrString(value, "time"); + if (!obj) { + return 0; + } +#if PY_MAJOR_VERSION >= 3 + i = PyLong_AsLong(obj); +#else + i = PyInt_AsLong(obj); +#endif + Py_DECREF(obj); + if (!buffer_write_bytes(buffer, (const char*)&i, 4)) { + return 0; + } + + *(buffer_get_buffer(buffer) + type_byte) = 0x11; + return 1; + } + case 18: + { + /* Int64 */ + const long long ll = PyLong_AsLongLong(value); + if (PyErr_Occurred()) { /* Overflow */ + PyErr_SetString(PyExc_OverflowError, + "MongoDB can only handle up to 8-byte ints"); + return 0; + } + if (!buffer_write_bytes(buffer, (const char*)&ll, 8)) { + return 0; + } + *(buffer_get_buffer(buffer) + type_byte) = 0x12; + return 1; + } + case 100: + { + /* DBRef */ + PyObject* as_doc = PyObject_CallMethod(value, "as_doc", NULL); + if (!as_doc) { + return 0; + } + if (!write_dict(self, buffer, as_doc, 0, options, 0)) { + Py_DECREF(as_doc); + return 0; + } + Py_DECREF(as_doc); + *(buffer_get_buffer(buffer) + type_byte) = 0x03; + return 1; + } + case 101: + { + /* RawBSONDocument */ + char* raw_bson_document_bytes; + Py_ssize_t raw_bson_document_bytes_len; + int raw_bson_document_bytes_len_int; + PyObject* raw_bson_document_bytes_obj = PyObject_GetAttrString(value, "raw"); + if (!raw_bson_document_bytes_obj) { + return 0; + } + +#if PY_MAJOR_VERSION >= 3 + if (-1 == PyBytes_AsStringAndSize(raw_bson_document_bytes_obj, + &raw_bson_document_bytes, + &raw_bson_document_bytes_len)) { +#else + if (-1 == PyString_AsStringAndSize(raw_bson_document_bytes_obj, + &raw_bson_document_bytes, + &raw_bson_document_bytes_len)) { +#endif + Py_DECREF(raw_bson_document_bytes_obj); + return 0; + } + raw_bson_document_bytes_len_int = _downcast_and_check( + raw_bson_document_bytes_len, 0); + if (-1 == raw_bson_document_bytes_len_int) { + Py_DECREF(raw_bson_document_bytes_obj); + return 0; + } + if(!buffer_write_bytes(buffer, raw_bson_document_bytes, + raw_bson_document_bytes_len_int)) { + Py_DECREF(raw_bson_document_bytes_obj); + return 0; + } + *(buffer_get_buffer(buffer) + type_byte) = 0x03; + Py_DECREF(raw_bson_document_bytes_obj); + return 1; + } + case 255: + { + /* MinKey */ + *(buffer_get_buffer(buffer) + type_byte) = 0xFF; + return 1; + } + case 127: + { + /* MaxKey */ + *(buffer_get_buffer(buffer) + type_byte) = 0x7F; + return 1; } - } else { - Py_XDECREF(type_marker); } /* No _type_marker attibute or not one of our types. */ @@ -1505,6 +1571,11 @@ static PyObject* _cbson_dict_to_bson(PyObject* self, PyObject* args) { unsigned char top_level = 1; codec_options_t options; buffer_t buffer; + PyObject* raw_bson_document_bytes_obj; + char* raw_bson_document_bytes; + Py_ssize_t raw_bson_document_bytes_len; + int raw_bson_document_bytes_len_int; + long type_marker; if (!PyArg_ParseTuple(args, "ObO&|b", &dict, &check_keys, convert_codec_options, &options, &top_level)) { @@ -1517,7 +1588,45 @@ static PyObject* _cbson_dict_to_bson(PyObject* self, PyObject* args) { return NULL; } - if (!write_dict(self, buffer, dict, check_keys, &options, top_level)) { + /* check for RawBSONDocument */ + type_marker = _type_marker(dict); + if (type_marker < 0) { + destroy_codec_options(&options); + return NULL; + } else if (101 == type_marker) { + raw_bson_document_bytes_obj = PyObject_GetAttrString(dict, "raw"); + if (NULL == raw_bson_document_bytes_obj) { + destroy_codec_options(&options); + buffer_free(buffer); + return NULL; + } +#if PY_MAJOR_VERSION >= 3 + if (-1 == PyBytes_AsStringAndSize(raw_bson_document_bytes_obj, + &raw_bson_document_bytes, + &raw_bson_document_bytes_len)) { +#else + if (-1 == PyString_AsStringAndSize(raw_bson_document_bytes_obj, + &raw_bson_document_bytes, + &raw_bson_document_bytes_len)) { +#endif + Py_DECREF(raw_bson_document_bytes_obj); + destroy_codec_options(&options); + buffer_free(buffer); + return NULL; + } + raw_bson_document_bytes_len_int = _downcast_and_check(raw_bson_document_bytes_len, 0); + if (raw_bson_document_bytes_len_int < 0 || + !buffer_write_bytes(buffer, + raw_bson_document_bytes, + raw_bson_document_bytes_len_int)) { + destroy_codec_options(&options); + buffer_free(buffer); + Py_DECREF(raw_bson_document_bytes_obj); + return NULL; + } + + Py_DECREF(raw_bson_document_bytes_obj); + } else if (!write_dict(self, buffer, dict, check_keys, &options, top_level)) { destroy_codec_options(&options); buffer_free(buffer); return NULL; @@ -1584,6 +1693,7 @@ static PyObject* get_value(PyObject* self, const char* buffer, { PyObject* collection; unsigned size; + if (max < 4) { goto invalid; } @@ -1595,6 +1705,18 @@ static PyObject* get_value(PyObject* self, const char* buffer, if (buffer[*position + size - 1]) { goto invalid; } + + if (options->is_raw_bson) { + value = PyObject_CallFunction( + options->document_class, BYTES_FORMAT_STRING "O", + buffer + *position, size, options->options_obj); + if (!value) { + goto invalid; + } + *position += size; + break; + } + value = elements_to_dict(self, buffer + *position + 4, size - 5, options); if (!value) { @@ -2287,10 +2409,117 @@ static PyObject* get_value(PyObject* self, const char* buffer, return NULL; } +/* + * Get the next 'name' and 'value' from a document in a string, whose position + * is provided. + * + * Returns the position of the next element in the document, or -1 on error. + */ +static int _element_to_dict(PyObject* self, const char* string, + unsigned position, unsigned max, + const codec_options_t* options, + PyObject** name, PyObject** value) { + unsigned char type = (unsigned char)string[position++]; + size_t name_length = strlen(string + position); + if (name_length > BSON_MAX_SIZE || position + name_length >= max) { + PyObject* InvalidBSON = _error("InvalidBSON"); + if (InvalidBSON) { + PyErr_SetNone(InvalidBSON); + Py_DECREF(InvalidBSON); + } + return -1; + } + *name = PyUnicode_DecodeUTF8( + string + position, name_length, + options->unicode_decode_error_handler); + if (!*name) { + /* If NULL is returned then wrap the UnicodeDecodeError + in an InvalidBSON error */ + PyObject *etype, *evalue, *etrace; + PyObject *InvalidBSON; + + PyErr_Fetch(&etype, &evalue, &etrace); + if (PyErr_GivenExceptionMatches(etype, PyExc_Exception)) { + InvalidBSON = _error("InvalidBSON"); + if (InvalidBSON) { + Py_DECREF(etype); + etype = InvalidBSON; + + if (evalue) { + PyObject *msg = PyObject_Str(evalue); + Py_DECREF(evalue); + evalue = msg; + } + PyErr_NormalizeException(&etype, &evalue, &etrace); + } + } + PyErr_Restore(etype, evalue, etrace); + return -1; + } + position += (unsigned)name_length + 1; + *value = get_value(self, string, &position, type, + max - position, options); + if (!*value) { + Py_DECREF(name); + return -1; + } + return position; +} + +static PyObject* _cbson_element_to_dict(PyObject* self, PyObject* args) { + char* string; + PyObject* bson; + codec_options_t options; + unsigned position; + unsigned max; + int new_position; + PyObject* name; + PyObject* value; + PyObject* result_tuple; + + if (!PyArg_ParseTuple(args, "OII|O&", &bson, &position, &max, + convert_codec_options, &options)) { + return NULL; + } + if (PyTuple_GET_SIZE(args) < 4) { + default_codec_options(GETSTATE(self), &options); + } + +#if PY_MAJOR_VERSION >= 3 + if (!PyBytes_Check(bson)) { + PyErr_SetString(PyExc_TypeError, "argument to _element_to_dict must be a bytes object"); +#else + if (!PyString_Check(bson)) { + PyErr_SetString(PyExc_TypeError, "argument to _element_to_dict must be a string"); +#endif + return NULL; + } +#if PY_MAJOR_VERSION >= 3 + string = PyBytes_AsString(bson); +#else + string = PyString_AsString(bson); +#endif + + new_position = _element_to_dict(self, string, position, max, &options, + &name, &value); + if (new_position < 0) { + return NULL; + } + + result_tuple = Py_BuildValue("OOi", name, value, new_position); + if (!result_tuple) { + Py_DECREF(name); + Py_DECREF(value); + return NULL; + } + + return result_tuple; +} + static PyObject* _elements_to_dict(PyObject* self, const char* string, unsigned max, const codec_options_t* options) { - unsigned position = 0; + int position = 0; PyObject* dict = PyObject_CallObject(options->document_class, NULL); if (!dict) { return NULL; @@ -2299,50 +2528,9 @@ static PyObject* _elements_to_dict(PyObject* self, const char* string, PyObject* name; PyObject* value; - unsigned char type = (unsigned char)string[position++]; - size_t name_length = strlen(string + position); - if (name_length > BSON_MAX_SIZE || position + name_length >= max) { - PyObject* InvalidBSON = _error("InvalidBSON"); - if (InvalidBSON) { - PyErr_SetNone(InvalidBSON); - Py_DECREF(InvalidBSON); - } - Py_DECREF(dict); - return NULL; - } - name = PyUnicode_DecodeUTF8( - string + position, name_length, - options->unicode_decode_error_handler); - if (!name) { - /* If NULL is returned then wrap the UnicodeDecodeError - in an InvalidBSON error */ - PyObject *etype, *evalue, *etrace; - PyObject *InvalidBSON; - - PyErr_Fetch(&etype, &evalue, &etrace); - if (PyErr_GivenExceptionMatches(etype, PyExc_Exception)) { - InvalidBSON = _error("InvalidBSON"); - if (InvalidBSON) { - Py_DECREF(etype); - etype = InvalidBSON; - - if (evalue) { - PyObject *msg = PyObject_Str(evalue); - Py_DECREF(evalue); - evalue = msg; - } - PyErr_NormalizeException(&etype, &evalue, &etrace); - } - } - PyErr_Restore(etype, evalue, etrace); - Py_DECREF(dict); - return NULL; - } - position += (unsigned)name_length + 1; - value = get_value(self, string, &position, type, - max - position, options); - if (!value) { - Py_DECREF(name); + position = _element_to_dict(self, string, position, max, + options, &name, &value); + if (position < 0) { Py_DECREF(dict); return NULL; } @@ -2372,9 +2560,10 @@ static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) { PyObject* bson; codec_options_t options; PyObject* result; + PyObject* options_obj; - if (!PyArg_ParseTuple( - args, "OO&", &bson, convert_codec_options, &options)) { + if (! (PyArg_ParseTuple(args, "OO", &bson, &options_obj) && + convert_codec_options(options_obj, &options))) { return NULL; } @@ -2445,6 +2634,13 @@ static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) { return NULL; } + /* No need to decode fields if using RawBSONDocument */ + if (options.is_raw_bson) { + return PyObject_CallFunction( + options.document_class, BYTES_FORMAT_STRING "O", string, size, + options_obj); + } + result = elements_to_dict(self, string + 4, (unsigned)size - 5, &options); destroy_codec_options(&options); return result; @@ -2458,15 +2654,15 @@ static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) { PyObject* dict; PyObject* result; codec_options_t options; + PyObject* options_obj; - if (!PyArg_ParseTuple( - args, "O|O&", - &bson, convert_codec_options, &options)) { + if (!PyArg_ParseTuple(args, "O|O", &bson, &options_obj)) { return NULL; } - if (PyTuple_GET_SIZE(args) < 2) { - default_codec_options(&options); + default_codec_options(GETSTATE(self), &options); + } else if (!convert_codec_options(options_obj, &options)) { + return NULL; } #if PY_MAJOR_VERSION >= 3 @@ -2541,7 +2737,14 @@ static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) { return NULL; } - dict = elements_to_dict(self, string + 4, (unsigned)size - 5, &options); + /* No need to decode fields if using RawBSONDocument. */ + if (options.is_raw_bson) { + dict = PyObject_CallFunction( + options.document_class, BYTES_FORMAT_STRING "O", string, size, + options_obj); + } else { + dict = elements_to_dict(self, string + 4, (unsigned)size - 5, &options); + } if (!dict) { Py_DECREF(result); destroy_codec_options(&options); @@ -2569,6 +2772,8 @@ static PyMethodDef _CBSONMethods[] = { "convert a BSON string to a SON object."}, {"decode_all", _cbson_decode_all, METH_VARARGS, "convert binary data to a sequence of documents."}, + {"_element_to_dict", _cbson_element_to_dict, METH_VARARGS, + "Decode a single key, value pair."}, {NULL, NULL, 0, NULL} }; diff --git a/bson/_cbsonmodule.h b/bson/_cbsonmodule.h index 3f8c66be0..e76079198 100644 --- a/bson/_cbsonmodule.h +++ b/bson/_cbsonmodule.h @@ -52,12 +52,20 @@ typedef int Py_ssize_t; #define STRCAT(dest, n, src) strcat((dest), (src)) #endif +#if PY_MAJOR_VERSION >= 3 +#define BYTES_FORMAT_STRING "y#" +#else +#define BYTES_FORMAT_STRING "s#" +#endif + typedef struct codec_options_t { PyObject* document_class; unsigned char tz_aware; unsigned char uuid_rep; char* unicode_decode_error_handler; PyObject* tzinfo; + PyObject* options_obj; + unsigned char is_raw_bson; } codec_options_t; /* C API functions */ diff --git a/bson/codec_options.py b/bson/codec_options.py index e0e5e120f..036bb9382 100644 --- a/bson/codec_options.py +++ b/bson/codec_options.py @@ -23,6 +23,14 @@ from bson.binary import (ALL_UUID_REPRESENTATIONS, PYTHON_LEGACY, UUID_REPRESENTATION_NAMES) +_RAW_BSON_DOCUMENT_MARKER = 101 + + +def _raw_document_class(document_class): + """Determine if a document_class is a RawBSONDocument class.""" + marker = getattr(document_class, '_type_marker', None) + return marker == _RAW_BSON_DOCUMENT_MARKER + _options_base = namedtuple( 'CodecOptions', @@ -61,9 +69,11 @@ class CodecOptions(_options_base): tz_aware=False, uuid_representation=PYTHON_LEGACY, unicode_decode_error_handler="strict", tzinfo=None): - if not issubclass(document_class, MutableMapping): - raise TypeError("document_class must be dict, bson.son.SON, or " - "another subclass of collections.MutableMapping") + if not (issubclass(document_class, MutableMapping) or + _raw_document_class(document_class)): + raise TypeError("document_class must be dict, bson.son.SON, " + "bson.raw_bson_document.RawBSONDocument, or a " + "sublass of collections.MutableMapping") if not isinstance(tz_aware, bool): raise TypeError("tz_aware must be True or False") if uuid_representation not in ALL_UUID_REPRESENTATIONS: diff --git a/bson/raw_bson.py b/bson/raw_bson.py new file mode 100644 index 000000000..da00c8823 --- /dev/null +++ b/bson/raw_bson.py @@ -0,0 +1,92 @@ +# Copyright 2015 MongoDB, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tools for representing raw BSON documents. +""" + +import collections + +from bson import _UNPACK_INT, _iterate_elements +from bson.py3compat import iteritems +from bson.codec_options import ( + CodecOptions, DEFAULT_CODEC_OPTIONS, _RAW_BSON_DOCUMENT_MARKER) + + +class RawBSONDocument(collections.Mapping): + """Representation for a MongoDB document that provides access to the raw + BSON bytes that compose it. + + Only when a field is accessed or modified within the document does + RawBSONDocument decode its bytes. + """ + + __slots__ = ('__raw', '__inflated_doc', '__codec_options') + _type_marker = _RAW_BSON_DOCUMENT_MARKER + + def __init__(self, bson_bytes, codec_options=DEFAULT_CODEC_OPTIONS): + """Create a new :class:`RawBSONDocument`. + + :Parameters: + - `bson_bytes`: the BSON bytes that compose this document + - `codec_options` (optional): An instance of + :class:`~bson.codec_options.CodecOptions`. + """ + self.__raw = bson_bytes + self.__inflated_doc = None + # Always decode documents to their lazy representations. + co = codec_options + self.__codec_options = CodecOptions( + tz_aware=co.tz_aware, + document_class=RawBSONDocument, + uuid_representation=co.uuid_representation, + unicode_decode_error_handler=co.unicode_decode_error_handler, + tzinfo=co.tzinfo) + + @property + def raw(self): + """The raw BSON bytes composing this document.""" + return self.__raw + + def items(self): + """Lazily decode and iterate elements in this document.""" + return iteritems(self.__inflated) + + @property + def __inflated(self): + if self.__inflated_doc is None: + # We already validated the object's size when this document was + # created, so no need to do that again. + self.__inflated_doc = dict( + element for element in _iterate_elements( + self.__raw, 4, _UNPACK_INT(self.__raw[:4])[0] - 1, + self.__codec_options)) + return self.__inflated_doc + + def __getitem__(self, item): + return self.__inflated[item] + + def __iter__(self): + return iter(self.__inflated) + + def __len__(self): + return len(self.__inflated) + + def __eq__(self, other): + if isinstance(other, RawBSONDocument): + return self.__raw == other.raw + return NotImplemented + + def __repr__(self): + return ("RawBSONDocument(%r, codec_options=%r)" + % (self.raw, self.__codec_options)) diff --git a/pymongo/bulk.py b/pymongo/bulk.py index bcedf6fd7..0f293f5d5 100644 --- a/pymongo/bulk.py +++ b/pymongo/bulk.py @@ -19,9 +19,10 @@ from bson.objectid import ObjectId from bson.py3compat import u +from bson.raw_bson import RawBSONDocument from bson.son import SON from pymongo.common import (validate_is_mapping, - validate_is_mutable_mapping, + validate_is_document_type, validate_ok_for_replace, validate_ok_for_update) from pymongo.errors import (BulkWriteError, @@ -213,9 +214,9 @@ class _Bulk(object): def add_insert(self, document): """Add an insert document to the list of ops. """ - validate_is_mutable_mapping("document", document) + validate_is_document_type("document", document) # Generate ObjectId client side. - if '_id' not in document: + if not (isinstance(document, RawBSONDocument) or '_id' in document): document['_id'] = ObjectId() self.ops.append((_INSERT, document)) diff --git a/pymongo/collection.py b/pymongo/collection.py index a3cf05851..9253fde9d 100644 --- a/pymongo/collection.py +++ b/pymongo/collection.py @@ -24,6 +24,7 @@ from bson.py3compat import (_unicode, integer_types, string_type, u) +from bson.raw_bson import RawBSONDocument from bson.codec_options import CodecOptions from bson.son import SON from pymongo import (common, @@ -489,7 +490,7 @@ class Collection(common.BaseObject): """Internal helper for inserting a single document.""" if manipulate: doc = self.__database._apply_incoming_manipulators(doc, self) - if '_id' not in doc: + if not isinstance(doc, RawBSONDocument) and '_id' not in doc: doc['_id'] = ObjectId() doc = self.__database._apply_incoming_copying_manipulators(doc, self) @@ -516,13 +517,14 @@ class Collection(common.BaseObject): sock_info, 'insert', command, acknowledged, op_id, bypass_doc_val, message.insert, self.__full_name, [doc], check_keys, acknowledged, concern, False, self.codec_options) - return doc.get('_id') + if not isinstance(doc, RawBSONDocument): + return doc.get('_id') def _insert(self, sock_info, docs, ordered=True, check_keys=True, manipulate=False, write_concern=None, op_id=None, bypass_doc_val=False): """Internal insert helper.""" - if isinstance(docs, collections.MutableMapping): + if isinstance(docs, collections.Mapping): return self._insert_one( sock_info, docs, ordered, check_keys, manipulate, write_concern, op_id, bypass_doc_val) @@ -540,7 +542,7 @@ class Collection(common.BaseObject): # operations is required for backwards compatibility, # see PYTHON-709. doc = _db._apply_incoming_manipulators(doc, self) - if '_id' not in doc: + if not (isinstance(doc, RawBSONDocument) or '_id' in doc): doc['_id'] = ObjectId() doc = _db._apply_incoming_copying_manipulators(doc, self) @@ -550,7 +552,9 @@ class Collection(common.BaseObject): def gen(): """Generator that only tracks existing _ids.""" for doc in docs: - ids.append(doc.get('_id')) + # Don't inflate RawBSONDocument by touching fields. + if not isinstance(doc, RawBSONDocument): + ids.append(doc.get('_id')) yield doc concern = (write_concern or self.write_concern).document @@ -612,8 +616,8 @@ class Collection(common.BaseObject): .. versionadded:: 3.0 """ - common.validate_is_mutable_mapping("document", document) - if "_id" not in document: + common.validate_is_document_type("document", document) + if not (isinstance(document, RawBSONDocument) or "_id" in document): document["_id"] = ObjectId() with self._socket_for_writes() as sock_info: return InsertOneResult( @@ -663,10 +667,11 @@ class Collection(common.BaseObject): def gen(): """A generator that validates documents and handles _ids.""" for document in documents: - common.validate_is_mutable_mapping("document", document) - if "_id" not in document: - document["_id"] = ObjectId() - inserted_ids.append(document["_id"]) + common.validate_is_document_type("document", document) + if not isinstance(document, RawBSONDocument): + if "_id" not in document: + document["_id"] = ObjectId() + inserted_ids.append(document["_id"]) yield (message._INSERT, document) blk = _Bulk(self, ordered, bypass_document_validation) @@ -1189,6 +1194,8 @@ class Collection(common.BaseObject): with self._socket_for_reads() as (sock_info, slave_ok): res = self._command(sock_info, cmd, slave_ok, allowable_errors=["ns missing"], + codec_options=self.codec_options._replace( + document_class=dict), read_concern=self.read_concern) if res.get("errmsg", "") == "ns missing": return 0 @@ -2158,14 +2165,14 @@ class Collection(common.BaseObject): """ warnings.warn("save is deprecated. Use insert_one or replace_one " "instead", DeprecationWarning, stacklevel=2) - common.validate_is_mutable_mapping("to_save", to_save) + common.validate_is_document_type("to_save", to_save) write_concern = None if kwargs: write_concern = WriteConcern(**kwargs) with self._socket_for_writes() as sock_info: - if "_id" not in to_save: + if not (isinstance(to_save, RawBSONDocument) or "_id" in to_save): return self._insert(sock_info, to_save, True, check_keys, manipulate, write_concern) else: diff --git a/pymongo/common.py b/pymongo/common.py index dc1af4c6f..daa22785c 100644 --- a/pymongo/common.py +++ b/pymongo/common.py @@ -22,6 +22,7 @@ from bson.binary import (STANDARD, PYTHON_LEGACY, JAVA_LEGACY, CSHARP_LEGACY) from bson.codec_options import CodecOptions from bson.py3compat import string_type, integer_types, iteritems +from bson.raw_bson import RawBSONDocument from pymongo.auth import MECHANISMS from pymongo.errors import ConfigurationError from pymongo.monitoring import _validate_event_listeners @@ -359,8 +360,9 @@ def validate_auth_mechanism_properties(option, value): def validate_document_class(option, value): """Validate the document_class option.""" - if not issubclass(value, collections.MutableMapping): - raise TypeError("%s must be dict, bson.son.SON, or another " + if not issubclass(value, (collections.MutableMapping, RawBSONDocument)): + raise TypeError("%s must be dict, bson.son.SON, " + "bson.raw_bson.RawBSONDocument, or a " "sublass of collections.MutableMapping" % (option,)) return value @@ -373,11 +375,12 @@ def validate_is_mapping(option, value): "collections.Mapping" % (option,)) -def validate_is_mutable_mapping(option, value): - """Validate the type of method arguments that expect a mutable document.""" - if not isinstance(value, collections.MutableMapping): - raise TypeError("%s must be an instance of dict, bson.son.SON, or " - "other type that inherits from " +def validate_is_document_type(option, value): + """Validate the type of method arguments that expect a MongoDB document.""" + if not isinstance(value, (collections.MutableMapping, RawBSONDocument)): + raise TypeError("%s must be an instance of dict, bson.son.SON, " + "bson.raw_bson.RawBSONDocument, or " + "a type that inherits from " "collections.MutableMapping" % (option,)) @@ -385,7 +388,7 @@ def validate_ok_for_replace(replacement): """Validate a replacement document.""" validate_is_mapping("replacement", replacement) # Replacement can be {} - if replacement: + if replacement and not isinstance(replacement, RawBSONDocument): first = next(iter(replacement)) if first.startswith('$'): raise ValueError('replacement can not include $ operators') diff --git a/pymongo/database.py b/pymongo/database.py index 9250cbb13..049ec1152 100644 --- a/pymongo/database.py +++ b/pymongo/database.py @@ -275,6 +275,15 @@ class Database(common.BaseObject): self, name, False, codec_options, read_preference, write_concern, read_concern) + def _collection_default_options(self, name, **kargs): + """Get a Collection instance with the default settings.""" + wc = (self.write_concern + if self.write_concern.acknowledged else WriteConcern()) + return self.get_collection( + name, codec_options=DEFAULT_CODEC_OPTIONS, + read_preference=ReadPreference.PRIMARY, + write_concern=wc) + def create_collection(self, name, codec_options=None, read_preference=None, write_concern=None, read_concern=None, **kwargs): @@ -824,7 +833,9 @@ class Database(common.BaseObject): def _legacy_add_user(self, name, password, read_only, **kwargs): """Uses v1 system to add users, i.e. saving to system.users. """ - user = self.system.users.find_one({"user": name}) or {"user": name} + # Use a Collection with the default codec_options. + system_users = self._collection_default_options('system.users') + user = system_users.find_one({"user": name}) or {"user": name} if password is not None: user["pwd"] = auth._password_digest(name, password) if read_only is not None: @@ -834,11 +845,8 @@ class Database(common.BaseObject): # We don't care what the _id is, only that it has one # for the replace_one call below. user.setdefault("_id", ObjectId()) - coll = self.system.users - if not self.write_concern.acknowledged: - coll = coll.with_options(write_concern=WriteConcern()) try: - coll.replace_one({"_id": user["_id"]}, user, True) + system_users.replace_one({"_id": user["_id"]}, user, True) except OperationFailure as exc: # First admin user add fails gle in MongoDB >= 2.1.2 # See SERVER-4225 for more information. @@ -930,9 +938,7 @@ class Database(common.BaseObject): except OperationFailure as exc: # See comment in add_user try / except above. if exc.code in common.COMMAND_NOT_FOUND_CODES: - coll = self.system.users - if not self.write_concern.acknowledged: - coll = coll.with_options(write_concern=WriteConcern()) + coll = self._collection_default_options('system.users') coll.delete_one({"user": name}) return raise diff --git a/pymongo/mongo_client.py b/pymongo/mongo_client.py index 428f2311f..55a7667e3 100644 --- a/pymongo/mongo_client.py +++ b/pymongo/mongo_client.py @@ -38,6 +38,7 @@ import warnings import weakref from collections import defaultdict +from bson.codec_options import DEFAULT_CODEC_OPTIONS from bson.py3compat import (integer_types, string_type) from bson.son import SON @@ -64,6 +65,7 @@ from pymongo.server_type import SERVER_TYPE from pymongo.topology import Topology from pymongo.topology_description import TOPOLOGY_TYPE from pymongo.settings import TopologySettings +from pymongo.write_concern import WriteConcern class MongoClient(common.BaseObject): @@ -1002,9 +1004,8 @@ class MongoClient(common.BaseObject): def database_names(self): """Get a list of the names of all databases on the connected server.""" return [db["name"] for db in - self.admin.command( - "listDatabases", - read_preference=ReadPreference.PRIMARY)["databases"]] + self._database_default_options('admin').command( + "listDatabases")["databases"]] def drop_database(self, name_or_database): """Drop a database. @@ -1089,13 +1090,20 @@ class MongoClient(common.BaseObject): self, name, codec_options, read_preference, write_concern, read_concern) + def _database_default_options(self, name): + """Get a Database instance with the default settings.""" + return self.get_database( + name, codec_options=DEFAULT_CODEC_OPTIONS, + read_preference=ReadPreference.PRIMARY, + write_concern=WriteConcern()) + @property def is_locked(self): """Is this server locked? While locked, all write operations are blocked, although read operations may still be allowed. Use :meth:`unlock` to unlock. """ - ops = self.admin.current_op() + ops = self._database_default_options('admin').current_op() return bool(ops.get('fsyncLock', 0)) def fsync(self, **kwargs): diff --git a/test/test_collection.py b/test/test_collection.py index 9d988cfa0..6eebeb99c 100644 --- a/test/test_collection.py +++ b/test/test_collection.py @@ -24,8 +24,12 @@ from collections import defaultdict sys.path[0:0] = [""] +import bson + +from bson.raw_bson import RawBSONDocument from bson.regex import Regex from bson.code import Code +from bson.codec_options import CodecOptions from bson.objectid import ObjectId from bson.py3compat import u, itervalues from bson.son import SON @@ -656,6 +660,12 @@ class TestCollection(IntegrationTest): # The insert failed duplicate key... wait_until(lambda: 2 == db.test.count(), 'forcing duplicate key error') + document = RawBSONDocument( + bson.BSON.encode({'_id': ObjectId(), 'foo': 'bar'})) + result = db.test.insert_one(document) + self.assertTrue(isinstance(result, InsertOneResult)) + self.assertEqual(result.inserted_id, None) + def test_insert_many(self): db = self.db db.test.drop() @@ -684,13 +694,20 @@ class TestCollection(IntegrationTest): self.assertEqual(1, db.test.count({"_id": _id})) self.assertTrue(result.acknowledged) + docs = [RawBSONDocument(bson.BSON.encode({"_id": i + 5})) + for i in range(5)] + result = db.test.insert_many(docs) + self.assertTrue(isinstance(result, InsertManyResult)) + self.assertTrue(isinstance(result.inserted_ids, list)) + self.assertEqual([], result.inserted_ids) + db = db.client.get_database(db.name, write_concern=WriteConcern(w=0)) docs = [{} for _ in range(5)] result = db.test.insert_many(docs) self.assertTrue(isinstance(result, InsertManyResult)) self.assertFalse(result.acknowledged) - self.assertEqual(15, db.test.count()) + self.assertEqual(20, db.test.count()) def test_delete_one(self): self.db.test.drop() @@ -1160,6 +1177,17 @@ class TestCollection(IntegrationTest): self.assertEqual(0, db.test.count({"x": 1})) self.assertEqual(db.test.find_one(id1)["y"], 1) + replacement = RawBSONDocument(bson.BSON.encode({"_id": id1, "z": 1})) + result = db.test.replace_one({"y": 1}, replacement, True) + self.assertTrue(isinstance(result, UpdateResult)) + self.assertEqual(1, result.matched_count) + self.assertTrue(result.modified_count in (None, 1)) + self.assertIsNone(result.upserted_id) + self.assertTrue(result.acknowledged) + self.assertEqual(1, db.test.count({"z": 1})) + self.assertEqual(0, db.test.count({"y": 1})) + self.assertEqual(db.test.find_one(id1)["z"], 1) + result = db.test.replace_one({"x": 2}, {"y": 2}, True) self.assertTrue(isinstance(result, UpdateResult)) self.assertEqual(0, result.matched_count) @@ -1354,6 +1382,23 @@ class TestCollection(IntegrationTest): self.assertTrue(isinstance(result, CommandCursor)) self.assertEqual([{'foo': [1, 2]}], list(result)) + def test_aggregate_raw_bson(self): + db = self.db + db.drop_collection("test") + db.test.insert_one({'foo': [1, 2]}) + + self.assertRaises(TypeError, db.test.aggregate, "wow") + + pipeline = {"$project": {"_id": False, "foo": True}} + result = db.get_collection( + 'test', + codec_options=CodecOptions(document_class=RawBSONDocument) + ).aggregate([pipeline], useCursor=False) + self.assertTrue(isinstance(result, CommandCursor)) + first_result = next(result) + self.assertIsInstance(first_result, RawBSONDocument) + self.assertEqual([1, 2], list(first_result['foo'])) + @client_context.require_version_min(2, 5, 1) def test_aggregation_cursor_validation(self): db = self.db diff --git a/test/test_raw_bson.py b/test/test_raw_bson.py new file mode 100644 index 000000000..2fa131aee --- /dev/null +++ b/test/test_raw_bson.py @@ -0,0 +1,109 @@ +import datetime +import uuid + +import pymongo + +from bson import BSON +from bson.binary import JAVA_LEGACY +from bson.codec_options import CodecOptions +from bson.raw_bson import RawBSONDocument +from test import client_context, unittest, pair + + +class TestRawBSONDocument(unittest.TestCase): + + # {u'_id': ObjectId('556df68b6e32ab21a95e0785'), + # u'name': u'Sherlock', + # u'addresses': [{u'street': u'Baker Street'}]} + bson_string = ( + b'Z\x00\x00\x00\x07_id\x00Um\xf6\x8bn2\xab!\xa9^\x07\x85\x02name\x00\t' + b'\x00\x00\x00Sherlock\x00\x04addresses\x00&\x00\x00\x00\x030\x00\x1e' + b'\x00\x00\x00\x02street\x00\r\x00\x00\x00Baker Street\x00\x00\x00\x00' + ) + document = RawBSONDocument(bson_string) + + def tearDown(self): + if client_context.connected: + client_context.client.pymongo_test.test_raw.drop() + + def test_decode(self): + self.assertEqual('Sherlock', self.document['name']) + first_address = self.document['addresses'][0] + self.assertIsInstance(first_address, RawBSONDocument) + self.assertEqual('Baker Street', first_address['street']) + + def test_raw(self): + self.assertEqual(self.bson_string, self.document.raw) + + @client_context.require_connection + def test_round_trip(self): + client = pymongo.MongoClient(pair, document_class=RawBSONDocument) + client.pymongo_test.test_raw.insert_one(self.document) + result = client.pymongo_test.test_raw.find_one(self.document['_id']) + self.assertIsInstance(result, RawBSONDocument) + self.assertEqual(dict(self.document.items()), dict(result.items())) + + def test_with_codec_options(self): + # {u'date': datetime.datetime(2015, 6, 3, 18, 40, 50, 826000), + # u'_id': UUID('026fab8f-975f-4965-9fbf-85ad874c60ff')} + # encoded with JAVA_LEGACY uuid representation. + bson_string = ( + b'-\x00\x00\x00\x05_id\x00\x10\x00\x00\x00\x03eI_\x97\x8f\xabo\x02' + b'\xff`L\x87\xad\x85\xbf\x9f\tdate\x00\x8a\xd6\xb9\xbaM' + b'\x01\x00\x00\x00' + ) + document = RawBSONDocument( + bson_string, + codec_options=CodecOptions(uuid_representation=JAVA_LEGACY)) + + self.assertEqual(uuid.UUID('026fab8f-975f-4965-9fbf-85ad874c60ff'), + document['_id']) + + @client_context.require_connection + def test_round_trip_codec_options(self): + doc = { + 'date': datetime.datetime(2015, 6, 3, 18, 40, 50, 826000), + '_id': uuid.UUID('026fab8f-975f-4965-9fbf-85ad874c60ff') + } + db = pymongo.MongoClient(pair).pymongo_test + coll = db.get_collection( + 'test_raw', + codec_options=CodecOptions(uuid_representation=JAVA_LEGACY)) + coll.insert_one(doc) + raw_java_legacy = CodecOptions(uuid_representation=JAVA_LEGACY, + document_class=RawBSONDocument) + coll = db.get_collection('test_raw', codec_options=raw_java_legacy) + self.assertEqual( + RawBSONDocument(BSON.encode(doc, codec_options=raw_java_legacy)), + coll.find_one()) + + @client_context.require_connection + def test_raw_bson_document_embedded(self): + doc = {'embedded': self.document} + db = client_context.client.pymongo_test + db.test_raw.insert_one(doc) + result = db.test_raw.find_one() + self.assertEqual(BSON(self.document.raw).decode(), result['embedded']) + + # Make sure that CodecOptions are preserved. + # {'embedded': [ + # {u'date': datetime.datetime(2015, 6, 3, 18, 40, 50, 826000), + # u'_id': UUID('026fab8f-975f-4965-9fbf-85ad874c60ff')} + # ]} + # encoded with JAVA_LEGACY uuid representation. + bson_string = ( + b'D\x00\x00\x00\x04embedded\x005\x00\x00\x00\x030\x00-\x00\x00\x00' + b'\tdate\x00\x8a\xd6\xb9\xbaM\x01\x00\x00\x05_id\x00\x10\x00\x00' + b'\x00\x03eI_\x97\x8f\xabo\x02\xff`L\x87\xad\x85\xbf\x9f\x00\x00' + b'\x00' + ) + rbd = RawBSONDocument( + bson_string, + codec_options=CodecOptions(uuid_representation=JAVA_LEGACY)) + + db.test_raw.drop() + db.test_raw.insert_one(rbd) + result = db.get_collection('test_raw', codec_options=CodecOptions( + uuid_representation=JAVA_LEGACY)).find_one() + self.assertEqual(rbd['embedded'][0]['_id'], + result['embedded'][0]['_id'])