PYTHON-3222 Fix memory leak in cbson decode_all (#927)

Add decode_all keyword arg for codec_options.
Make decode_all show up in docs.
This commit is contained in:
Shane Harvey 2022-04-12 17:18:23 -07:00 committed by GitHub
parent 5ccbb4d6d8
commit dca72b7884
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 76 additions and 69 deletions

View File

@ -982,6 +982,40 @@ def decode(
return _bson_to_dict(data, opts)
def _decode_all(data: _ReadableBuffer, opts: "CodecOptions[_DocumentType]") -> List[_DocumentType]:
"""Decode a BSON data to multiple documents."""
data, view = get_data_and_view(data)
data_len = len(data)
docs: List[_DocumentType] = []
position = 0
end = data_len - 1
use_raw = _raw_document_class(opts.document_class)
try:
while position < end:
obj_size = _UNPACK_INT_FROM(data, position)[0]
if data_len - position < obj_size:
raise InvalidBSON("invalid object size")
obj_end = position + obj_size - 1
if data[obj_end] != 0:
raise InvalidBSON("bad eoo")
if use_raw:
docs.append(opts.document_class(data[position : obj_end + 1], opts)) # type: ignore
else:
docs.append(_elements_to_dict(data, view, position + 4, obj_end, opts))
position += obj_size
return docs
except InvalidBSON:
raise
except Exception:
# Change exception type to InvalidBSON but preserve traceback.
_, exc_value, exc_tb = sys.exc_info()
raise InvalidBSON(str(exc_value)).with_traceback(exc_tb)
if _USE_C:
_decode_all = _cbson._decode_all # noqa: F811
def decode_all(
data: _ReadableBuffer, codec_options: "Optional[CodecOptions[_DocumentType]]" = None
) -> List[_DocumentType]:
@ -1008,41 +1042,10 @@ def decode_all(
`codec_options`.
"""
opts = codec_options or DEFAULT_CODEC_OPTIONS
data, view = get_data_and_view(data)
if not isinstance(opts, CodecOptions):
raise _CODEC_OPTIONS_TYPE_ERROR
data_len = len(data)
docs: List[_DocumentType] = []
position = 0
end = data_len - 1
use_raw = _raw_document_class(opts.document_class)
try:
while position < end:
obj_size = _UNPACK_INT_FROM(data, position)[0]
if data_len - position < obj_size:
raise InvalidBSON("invalid object size")
obj_end = position + obj_size - 1
if data[obj_end] != 0:
raise InvalidBSON("bad eoo")
if use_raw:
docs.append(
opts.document_class(data[position : obj_end + 1], codec_options) # type: ignore
)
else:
docs.append(_elements_to_dict(data, view, position + 4, obj_end, opts))
position += obj_size
return docs
except InvalidBSON:
raise
except Exception:
# Change exception type to InvalidBSON but preserve traceback.
_, exc_value, exc_tb = sys.exc_info()
raise InvalidBSON(str(exc_value)).with_traceback(exc_tb)
if _USE_C:
decode_all = _cbson.decode_all # noqa: F811
return _decode_all(data, opts) # type: ignore[arg-type]
def _decode_selective(rawdoc: Any, fields: Any, codec_options: Any) -> Mapping[Any, Any]:

View File

@ -53,7 +53,6 @@ struct module_state {
PyObject* BSONInt64;
PyObject* Decimal128;
PyObject* Mapping;
PyObject* CodecOptions;
};
#define GETSTATE(m) ((struct module_state*)PyModule_GetState(m))
@ -344,8 +343,7 @@ static int _load_python_objects(PyObject* module) {
_load_object(&state->BSONInt64, "bson.int64", "Int64") ||
_load_object(&state->Decimal128, "bson.decimal128", "Decimal128") ||
_load_object(&state->UUID, "uuid", "UUID") ||
_load_object(&state->Mapping, "collections.abc", "Mapping") ||
_load_object(&state->CodecOptions, "bson.codec_options", "CodecOptions")) {
_load_object(&state->Mapping, "collections.abc", "Mapping")) {
return 1;
}
/* Reload our REType hack too. */
@ -498,26 +496,6 @@ int convert_codec_options(PyObject* options_obj, void* p) {
return 1;
}
/* Fill out a codec_options_t* with default options.
*
* Return 1 on success.
* Return 0 on failure.
*/
int default_codec_options(struct module_state* state, codec_options_t* options) {
PyObject* options_obj = NULL;
PyObject* codec_options_func = _get_object(
state->CodecOptions, "bson.codec_options", "CodecOptions");
if (codec_options_func == NULL) {
return 0;
}
options_obj = PyObject_CallFunctionObjArgs(codec_options_func, NULL);
Py_DECREF(codec_options_func);
if (options_obj == NULL) {
return 0;
}
return convert_codec_options(options_obj, options);
}
void destroy_codec_options(codec_options_t* options) {
Py_CLEAR(options->document_class);
Py_CLEAR(options->tzinfo);
@ -2411,15 +2389,10 @@ static PyObject* _cbson_element_to_dict(PyObject* self, PyObject* args) {
PyObject* value;
PyObject* result_tuple;
if (!PyArg_ParseTuple(args, "OII|O&", &bson, &position, &max,
if (!PyArg_ParseTuple(args, "OIIO&", &bson, &position, &max,
convert_codec_options, &options)) {
return NULL;
}
if (PyTuple_GET_SIZE(args) < 4) {
if (!default_codec_options(GETSTATE(self), &options)) {
return NULL;
}
}
if (!PyBytes_Check(bson)) {
PyErr_SetString(PyExc_TypeError, "argument to _element_to_dict must be a bytes object");
@ -2594,17 +2567,13 @@ static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) {
PyObject* dict;
PyObject* result = NULL;
codec_options_t options;
PyObject* options_obj;
PyObject* options_obj = NULL;
Py_buffer view = {0};
if (!PyArg_ParseTuple(args, "O|O", &bson, &options_obj)) {
if (!PyArg_ParseTuple(args, "OO", &bson, &options_obj)) {
return NULL;
}
if ((PyTuple_GET_SIZE(args) < 2) || (options_obj == Py_None)) {
if (!default_codec_options(GETSTATE(self), &options)) {
return NULL;
}
} else if (!convert_codec_options(options_obj, &options)) {
if (!convert_codec_options(options_obj, &options)) {
return NULL;
}
@ -2698,7 +2667,7 @@ static PyMethodDef _CBSONMethods[] = {
"convert a dictionary to a string containing its BSON representation."},
{"_bson_to_dict", _cbson_bson_to_dict, METH_VARARGS,
"convert a BSON string to a SON object."},
{"decode_all", _cbson_decode_all, METH_VARARGS,
{"_decode_all", _cbson_decode_all, METH_VARARGS,
"convert binary data to a sequence of documents."},
{"_element_to_dict", _cbson_element_to_dict, METH_VARARGS,
"Decode a single key, value pair."},

View File

@ -1,6 +1,32 @@
Changelog
=========
Changes in Version 4.1.1
-------------------------
Issues Resolved
...............
Version 4.1.1 fixes a number of bugs:
- Fixed a memory leak bug when calling :func:`~bson.decode_all` without a
``codec_options`` argument (`PYTHON-3222`_).
- Fixed a bug where :func:`~bson.decode_all` did not accept ``codec_options``
as a keyword argument (`PYTHON-3222`_).
- Fixed an oversight where type markers (py.typed files) were not included
in our release distributions (`PYTHON-3214`_).
- Fixed a bug where pymongo would raise a "NameError: name sys is not defined"
exception when attempting to parse a "mongodb+srv://" URI when the dnspython
dependency was not installed (`PYTHON-3198`_).
See the `PyMongo 4.1.1 release notes in JIRA`_ for the list of resolved issues
in this release.
.. _PYTHON-3198: https://jira.mongodb.org/browse/PYTHON-3198
.. _PYTHON-3214: https://jira.mongodb.org/browse/PYTHON-3214
.. _PYTHON-3222: https://jira.mongodb.org/browse/PYTHON-3222
.. _PyMongo 4.1.1 release notes in JIRA: https://jira.mongodb.org/secure/ReleaseNote.jspa?projectId=10004&version=33290
Changes in Version 4.1
----------------------

View File

@ -1006,6 +1006,15 @@ class TestCodecOptions(unittest.TestCase):
decoded = bson.decode_all(bson.encode(doc2), None)[0]
self.assertIsInstance(decoded["id"], Binary)
def test_decode_all_kwarg(self):
doc = {"a": uuid.uuid4()}
opts = CodecOptions(uuid_representation=UuidRepresentation.STANDARD)
encoded = encode(doc, codec_options=opts)
# Positional codec_options
self.assertEqual([doc], decode_all(encoded, opts))
# Keyword codec_options
self.assertEqual([doc], decode_all(encoded, codec_options=opts))
def test_unicode_decode_error_handler(self):
enc = encode({"keystr": "foobar"})