From dca72b7884f7940498c0898cdaf7b041bc6386db Mon Sep 17 00:00:00 2001 From: Shane Harvey Date: Tue, 12 Apr 2022 17:18:23 -0700 Subject: [PATCH] PYTHON-3222 Fix memory leak in cbson decode_all (#927) Add decode_all keyword arg for codec_options. Make decode_all show up in docs. --- bson/__init__.py | 67 +++++++++++++++++++++++---------------------- bson/_cbsonmodule.c | 43 ++++------------------------- doc/changelog.rst | 26 ++++++++++++++++++ test/test_bson.py | 9 ++++++ 4 files changed, 76 insertions(+), 69 deletions(-) diff --git a/bson/__init__.py b/bson/__init__.py index 11a87bbe7..70aa6ae86 100644 --- a/bson/__init__.py +++ b/bson/__init__.py @@ -982,6 +982,40 @@ def decode( return _bson_to_dict(data, opts) +def _decode_all(data: _ReadableBuffer, opts: "CodecOptions[_DocumentType]") -> List[_DocumentType]: + """Decode a BSON data to multiple documents.""" + data, view = get_data_and_view(data) + data_len = len(data) + docs: List[_DocumentType] = [] + position = 0 + end = data_len - 1 + use_raw = _raw_document_class(opts.document_class) + try: + while position < end: + obj_size = _UNPACK_INT_FROM(data, position)[0] + if data_len - position < obj_size: + raise InvalidBSON("invalid object size") + obj_end = position + obj_size - 1 + if data[obj_end] != 0: + raise InvalidBSON("bad eoo") + if use_raw: + docs.append(opts.document_class(data[position : obj_end + 1], opts)) # type: ignore + else: + docs.append(_elements_to_dict(data, view, position + 4, obj_end, opts)) + position += obj_size + return docs + except InvalidBSON: + raise + except Exception: + # Change exception type to InvalidBSON but preserve traceback. + _, exc_value, exc_tb = sys.exc_info() + raise InvalidBSON(str(exc_value)).with_traceback(exc_tb) + + +if _USE_C: + _decode_all = _cbson._decode_all # noqa: F811 + + def decode_all( data: _ReadableBuffer, codec_options: "Optional[CodecOptions[_DocumentType]]" = None ) -> List[_DocumentType]: @@ -1008,41 +1042,10 @@ def decode_all( `codec_options`. """ opts = codec_options or DEFAULT_CODEC_OPTIONS - data, view = get_data_and_view(data) if not isinstance(opts, CodecOptions): raise _CODEC_OPTIONS_TYPE_ERROR - data_len = len(data) - docs: List[_DocumentType] = [] - position = 0 - end = data_len - 1 - use_raw = _raw_document_class(opts.document_class) - try: - while position < end: - obj_size = _UNPACK_INT_FROM(data, position)[0] - if data_len - position < obj_size: - raise InvalidBSON("invalid object size") - obj_end = position + obj_size - 1 - if data[obj_end] != 0: - raise InvalidBSON("bad eoo") - if use_raw: - docs.append( - opts.document_class(data[position : obj_end + 1], codec_options) # type: ignore - ) - else: - docs.append(_elements_to_dict(data, view, position + 4, obj_end, opts)) - position += obj_size - return docs - except InvalidBSON: - raise - except Exception: - # Change exception type to InvalidBSON but preserve traceback. - _, exc_value, exc_tb = sys.exc_info() - raise InvalidBSON(str(exc_value)).with_traceback(exc_tb) - - -if _USE_C: - decode_all = _cbson.decode_all # noqa: F811 + return _decode_all(data, opts) # type: ignore[arg-type] def _decode_selective(rawdoc: Any, fields: Any, codec_options: Any) -> Mapping[Any, Any]: diff --git a/bson/_cbsonmodule.c b/bson/_cbsonmodule.c index 8100e951c..1a296db52 100644 --- a/bson/_cbsonmodule.c +++ b/bson/_cbsonmodule.c @@ -53,7 +53,6 @@ struct module_state { PyObject* BSONInt64; PyObject* Decimal128; PyObject* Mapping; - PyObject* CodecOptions; }; #define GETSTATE(m) ((struct module_state*)PyModule_GetState(m)) @@ -344,8 +343,7 @@ static int _load_python_objects(PyObject* module) { _load_object(&state->BSONInt64, "bson.int64", "Int64") || _load_object(&state->Decimal128, "bson.decimal128", "Decimal128") || _load_object(&state->UUID, "uuid", "UUID") || - _load_object(&state->Mapping, "collections.abc", "Mapping") || - _load_object(&state->CodecOptions, "bson.codec_options", "CodecOptions")) { + _load_object(&state->Mapping, "collections.abc", "Mapping")) { return 1; } /* Reload our REType hack too. */ @@ -498,26 +496,6 @@ int convert_codec_options(PyObject* options_obj, void* p) { return 1; } -/* Fill out a codec_options_t* with default options. - * - * Return 1 on success. - * Return 0 on failure. - */ -int default_codec_options(struct module_state* state, codec_options_t* options) { - PyObject* options_obj = NULL; - PyObject* codec_options_func = _get_object( - state->CodecOptions, "bson.codec_options", "CodecOptions"); - if (codec_options_func == NULL) { - return 0; - } - options_obj = PyObject_CallFunctionObjArgs(codec_options_func, NULL); - Py_DECREF(codec_options_func); - if (options_obj == NULL) { - return 0; - } - return convert_codec_options(options_obj, options); -} - void destroy_codec_options(codec_options_t* options) { Py_CLEAR(options->document_class); Py_CLEAR(options->tzinfo); @@ -2411,15 +2389,10 @@ static PyObject* _cbson_element_to_dict(PyObject* self, PyObject* args) { PyObject* value; PyObject* result_tuple; - if (!PyArg_ParseTuple(args, "OII|O&", &bson, &position, &max, + if (!PyArg_ParseTuple(args, "OIIO&", &bson, &position, &max, convert_codec_options, &options)) { return NULL; } - if (PyTuple_GET_SIZE(args) < 4) { - if (!default_codec_options(GETSTATE(self), &options)) { - return NULL; - } - } if (!PyBytes_Check(bson)) { PyErr_SetString(PyExc_TypeError, "argument to _element_to_dict must be a bytes object"); @@ -2594,17 +2567,13 @@ static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) { PyObject* dict; PyObject* result = NULL; codec_options_t options; - PyObject* options_obj; + PyObject* options_obj = NULL; Py_buffer view = {0}; - if (!PyArg_ParseTuple(args, "O|O", &bson, &options_obj)) { + if (!PyArg_ParseTuple(args, "OO", &bson, &options_obj)) { return NULL; } - if ((PyTuple_GET_SIZE(args) < 2) || (options_obj == Py_None)) { - if (!default_codec_options(GETSTATE(self), &options)) { - return NULL; - } - } else if (!convert_codec_options(options_obj, &options)) { + if (!convert_codec_options(options_obj, &options)) { return NULL; } @@ -2698,7 +2667,7 @@ static PyMethodDef _CBSONMethods[] = { "convert a dictionary to a string containing its BSON representation."}, {"_bson_to_dict", _cbson_bson_to_dict, METH_VARARGS, "convert a BSON string to a SON object."}, - {"decode_all", _cbson_decode_all, METH_VARARGS, + {"_decode_all", _cbson_decode_all, METH_VARARGS, "convert binary data to a sequence of documents."}, {"_element_to_dict", _cbson_element_to_dict, METH_VARARGS, "Decode a single key, value pair."}, diff --git a/doc/changelog.rst b/doc/changelog.rst index 28c467a29..1f8a146b3 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -1,6 +1,32 @@ Changelog ========= +Changes in Version 4.1.1 +------------------------- + +Issues Resolved +............... + +Version 4.1.1 fixes a number of bugs: + +- Fixed a memory leak bug when calling :func:`~bson.decode_all` without a + ``codec_options`` argument (`PYTHON-3222`_). +- Fixed a bug where :func:`~bson.decode_all` did not accept ``codec_options`` + as a keyword argument (`PYTHON-3222`_). +- Fixed an oversight where type markers (py.typed files) were not included + in our release distributions (`PYTHON-3214`_). +- Fixed a bug where pymongo would raise a "NameError: name sys is not defined" + exception when attempting to parse a "mongodb+srv://" URI when the dnspython + dependency was not installed (`PYTHON-3198`_). + +See the `PyMongo 4.1.1 release notes in JIRA`_ for the list of resolved issues +in this release. + +.. _PYTHON-3198: https://jira.mongodb.org/browse/PYTHON-3198 +.. _PYTHON-3214: https://jira.mongodb.org/browse/PYTHON-3214 +.. _PYTHON-3222: https://jira.mongodb.org/browse/PYTHON-3222 +.. _PyMongo 4.1.1 release notes in JIRA: https://jira.mongodb.org/secure/ReleaseNote.jspa?projectId=10004&version=33290 + Changes in Version 4.1 ---------------------- diff --git a/test/test_bson.py b/test/test_bson.py index b0dce7db4..8ad65f341 100644 --- a/test/test_bson.py +++ b/test/test_bson.py @@ -1006,6 +1006,15 @@ class TestCodecOptions(unittest.TestCase): decoded = bson.decode_all(bson.encode(doc2), None)[0] self.assertIsInstance(decoded["id"], Binary) + def test_decode_all_kwarg(self): + doc = {"a": uuid.uuid4()} + opts = CodecOptions(uuid_representation=UuidRepresentation.STANDARD) + encoded = encode(doc, codec_options=opts) + # Positional codec_options + self.assertEqual([doc], decode_all(encoded, opts)) + # Keyword codec_options + self.assertEqual([doc], decode_all(encoded, codec_options=opts)) + def test_unicode_decode_error_handler(self): enc = encode({"keystr": "foobar"})