From 2ba730722b74f123efb8375338df41eec133ecbc Mon Sep 17 00:00:00 2001 From: "A. Jesse Jiryu Davis" Date: Wed, 3 Dec 2014 17:10:34 -0500 Subject: [PATCH] PYTHON-526 Remove 'compile_re' option. PyMongo now never attempts to compile BSON regular expressions as Python native regular expressions. --- bson/__init__.py | 82 ++++++++++++++++------------------ bson/_cbsonmodule.c | 77 ++++++++++++++----------------- bson/json_util.py | 19 ++------ bson/son.py | 8 ++-- doc/api/pymongo/collection.rst | 2 +- doc/api/pymongo/cursor.rst | 2 +- doc/changelog.rst | 17 +++++++ gridfs/__init__.py | 3 -- gridfs/grid_file.py | 4 +- pymongo/collection.py | 31 ++++++------- pymongo/command_cursor.py | 8 ++-- pymongo/cursor.py | 10 ++--- pymongo/database.py | 28 +++++++----- pymongo/helpers.py | 6 +-- test/test_bson.py | 34 ++++---------- test/test_collection.py | 21 +-------- test/test_cursor.py | 3 -- test/test_database.py | 8 ++-- test/test_json_util.py | 13 +----- 19 files changed, 155 insertions(+), 221 deletions(-) diff --git a/bson/__init__.py b/bson/__init__.py index 365153cba..4a79539ec 100644 --- a/bson/__init__.py +++ b/bson/__init__.py @@ -239,10 +239,7 @@ def _get_regex(data, position, dummy, opts): pattern, position = _get_c_string(data, position) bson_flags, position = _get_c_string(data, position) bson_re = Regex(pattern, bson_flags) - if opts[3]: - return bson_re.try_compile(), position - else: - return bson_re, position + return bson_re, position def _get_ref(data, position, obj_end, opts): @@ -308,9 +305,9 @@ def _elements_to_dict(data, position, obj_end, opts): return result -def _bson_to_dict(data, as_class, tz_aware, uuid_subtype, compile_re): +def _bson_to_dict(data, as_class, tz_aware, uuid_subtype): """Decode a BSON string to as_class.""" - opts = (as_class, tz_aware, uuid_subtype, compile_re) + opts = (as_class, tz_aware, uuid_subtype) try: obj_size = _UNPACK_INT(data[:4])[0] except struct.error as e: @@ -700,7 +697,7 @@ if _USE_C: def decode_all(data, as_class=dict, - tz_aware=True, uuid_subtype=OLD_UUID_SUBTYPE, compile_re=True): + tz_aware=True, uuid_subtype=OLD_UUID_SUBTYPE): """Decode BSON data to multiple documents. `data` must be a string of concatenated, valid, BSON-encoded @@ -714,16 +711,23 @@ def decode_all(data, as_class=dict, :class:`~datetime.datetime` instances - `uuid_subtype` (optional): The BSON representation to use for UUIDs. See the :mod:`bson.binary` module for all options. - - `compile_re` (optional): if ``False``, don't attempt to compile - BSON regular expressions into Python regular expressions. Return - instances of :class:`~bson.regex.Regex` instead. Can avoid - :exc:`~bson.errors.InvalidBSON` errors when receiving - Python-incompatible regular expressions, for example from ``currentOp`` + + .. versionchanged:: 3.0 + Removed `compile_re` option: PyMongo now always represents BSON regular + expressions as :class:`~bson.regex.Regex` objects. Use + :meth:`~bson.regex.Regex.try_compile` to attempt to convert from a + BSON regular expression to a Python regular expression object. .. versionchanged:: 2.7 - Added `compile_re` option. + Added `compile_re` option. If set to False, PyMongo represented BSON + regular expressions as :class:`~bson.regex.Regex` objects instead of + attempting to compile BSON regular expressions as Python native + regular expressions, thus preventing errors for some incompatible + patterns, see `PYTHON-500`_. + + .. _PYTHON-500: https://jira.mongodb.org/browse/PYTHON-500 """ - opts = (as_class, tz_aware, uuid_subtype, compile_re) + opts = (as_class, tz_aware, uuid_subtype) docs = [] position = 0 end = len(data) - 1 @@ -751,7 +755,7 @@ if _USE_C: def decode_iter(data, as_class=dict, tz_aware=True, - uuid_subtype=OLD_UUID_SUBTYPE, compile_re=True): + uuid_subtype=OLD_UUID_SUBTYPE): """Decode BSON data to multiple documents as a generator. Works similarly to the decode_all function, but yields one document at a @@ -768,13 +772,6 @@ def decode_iter(data, as_class=dict, tz_aware=True, :class:`~datetime.datetime` instances - `uuid_subtype` (optional): The BSON representation to use for UUIDs. See the :mod:`bson.binary` module for all options. - - `compile_re` (optional): if ``False``, don't attempt to compile - BSON regular expressions into Python regular expressions. Return - instances of - :class:`~bson.regex.Regex` instead. Can avoid - :exc:`~bson.errors.InvalidBSON` errors when receiving - Python-incompatible regular expressions, for example from - ``currentOp`` .. versionadded:: 2.8 """ @@ -786,11 +783,11 @@ def decode_iter(data, as_class=dict, tz_aware=True, position += obj_size yield _bson_to_dict(elements, as_class, - tz_aware, uuid_subtype, compile_re) + tz_aware, uuid_subtype) def decode_file_iter(file_obj, as_class=dict, tz_aware=True, - uuid_subtype=OLD_UUID_SUBTYPE, compile_re=True): + uuid_subtype=OLD_UUID_SUBTYPE): """Decode bson data from a file to multiple documents as a generator. Works similarly to the decode_all function, but reads from the file object @@ -804,13 +801,6 @@ def decode_file_iter(file_obj, as_class=dict, tz_aware=True, :class:`~datetime.datetime` instances - `uuid_subtype` (optional): The BSON representation to use for UUIDs. See the :mod:`bson.binary` module for all options. - - `compile_re` (optional): if ``False``, don't attempt to compile - BSON regular expressions into Python regular expressions. Return - instances of - :class:`~bson.regex.Regex` instead. Can avoid - :exc:`~bson.errors.InvalidBSON` errors when receiving - Python-incompatible regular expressions, for example from - ``currentOp`` .. versionadded:: 2.8 """ @@ -824,7 +814,7 @@ def decode_file_iter(file_obj, as_class=dict, tz_aware=True, obj_size = _UNPACK_INT(size_data)[0] - 4 elements = size_data + file_obj.read(obj_size) yield _bson_to_dict(elements, as_class, - tz_aware, uuid_subtype, compile_re) + tz_aware, uuid_subtype) def is_valid(bson): @@ -841,7 +831,7 @@ def is_valid(bson): raise TypeError("BSON data must be an instance of a subclass of bytes") try: - _bson_to_dict(bson, dict, True, OLD_UUID_SUBTYPE, True) + _bson_to_dict(bson, dict, True, OLD_UUID_SUBTYPE) return True except Exception: return False @@ -874,7 +864,7 @@ class BSON(bytes): return cls(_dict_to_bson(document, check_keys, uuid_subtype)) def decode(self, as_class=dict, - tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE, compile_re=True): + tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE): """Decode this BSON data. The default type to use for the resultant document is @@ -896,19 +886,23 @@ class BSON(bytes): :class:`~datetime.datetime` instances - `uuid_subtype` (optional): The BSON representation to use for UUIDs. See the :mod:`bson.binary` module for all options. - - `compile_re` (optional): if ``False``, don't attempt to compile - BSON regular expressions into Python regular expressions. Return - instances of - :class:`~bson.regex.Regex` instead. Can avoid - :exc:`~bson.errors.InvalidBSON` errors when receiving - Python-incompatible regular expressions, for example from - ``currentOp`` + + .. versionchanged:: 3.0 + Removed `compile_re` option: PyMongo now always represents BSON + regular expressions as :class:`~bson.regex.Regex` objects. Use + :meth:`~bson.regex.Regex.try_compile` to attempt to convert from a + BSON regular expression to a Python regular expression object. .. versionchanged:: 2.7 - Added ``compile_re`` option. + Added `compile_re` option. If set to False, PyMongo represented BSON + regular expressions as :class:`~bson.regex.Regex` objects instead of + attempting to compile BSON regular expressions as Python native + regular expressions, thus preventing errors for some incompatible + patterns, see `PYTHON-500`_. + + .. _PYTHON-500: https://jira.mongodb.org/browse/PYTHON-500 """ - return _bson_to_dict( - self, as_class, tz_aware, uuid_subtype, compile_re) + return _bson_to_dict(self, as_class, tz_aware, uuid_subtype) def has_c(): diff --git a/bson/_cbsonmodule.c b/bson/_cbsonmodule.c index 13a143c8a..8129d3907 100644 --- a/bson/_cbsonmodule.c +++ b/bson/_cbsonmodule.c @@ -42,7 +42,6 @@ struct module_state { PyObject* Code; PyObject* ObjectId; PyObject* DBRef; - PyObject* RECompile; PyObject* Regex; PyObject* UUID; PyObject* Timestamp; @@ -108,8 +107,7 @@ _downcast_and_check(Py_ssize_t size, int extra) { static PyObject* elements_to_dict(PyObject* self, const char* string, unsigned max, PyObject* as_class, unsigned char tz_aware, - unsigned char uuid_subtype, - unsigned char compile_re); + unsigned char uuid_subtype); static int _write_element_to_buffer(PyObject* self, buffer_t buffer, int type_byte, PyObject* value, @@ -319,6 +317,7 @@ static int _load_object(PyObject** object, char* module_name, char* object_name) * Returns non-zero on failure. */ static int _load_python_objects(PyObject* module) { PyObject* empty_string; + PyObject* re_compile; PyObject* compiled; struct module_state *state = GETSTATE(module); @@ -330,7 +329,6 @@ static int _load_python_objects(PyObject* module) { _load_object(&state->MinKey, "bson.min_key", "MinKey") || _load_object(&state->MaxKey, "bson.max_key", "MaxKey") || _load_object(&state->UTC, "bson.tz_util", "utc") || - _load_object(&state->RECompile, "re", "compile") || _load_object(&state->Regex, "bson.regex", "Regex") || _load_object(&state->BSONInt64, "bson.int64", "Int64") || _load_object(&state->UUID, "uuid", "UUID") || @@ -347,7 +345,13 @@ static int _load_python_objects(PyObject* module) { state->REType = NULL; return 1; } - compiled = PyObject_CallFunction(state->RECompile, "O", empty_string); + + if (_load_object(&re_compile, "re", "compile")) { + state->REType = NULL; + return 1; + } + + compiled = PyObject_CallFunction(re_compile, "O", empty_string); if (compiled == NULL) { state->REType = NULL; Py_DECREF(empty_string); @@ -1462,10 +1466,11 @@ static PyObject* _cbson_dict_to_bson(PyObject* self, PyObject* args) { return result; } -static PyObject* get_value(PyObject* self, const char* buffer, unsigned* position, - unsigned char type, unsigned max, PyObject* as_class, - unsigned char tz_aware, unsigned char uuid_subtype, - unsigned char compile_re) { +static PyObject* get_value(PyObject* self, const char* buffer, + unsigned* position, unsigned char type, + unsigned max, PyObject* as_class, + unsigned char tz_aware, + unsigned char uuid_subtype) { struct module_state *state = GETSTATE(self); PyObject* value = NULL; @@ -1521,8 +1526,8 @@ static PyObject* get_value(PyObject* self, const char* buffer, unsigned* positio goto invalid; } value = elements_to_dict(self, buffer + *position + 4, - size - 5, as_class, tz_aware, uuid_subtype, - compile_re); + size - 5, as_class, tz_aware, + uuid_subtype); if (!value) { goto invalid; } @@ -1621,8 +1626,7 @@ static PyObject* get_value(PyObject* self, const char* buffer, unsigned* positio } to_append = get_value(self, buffer, position, bson_type, max - (unsigned)key_size, - as_class, tz_aware, uuid_subtype, - compile_re); + as_class, tz_aware, uuid_subtype); Py_LeaveRecursiveCall(); if (!to_append) { Py_DECREF(value); @@ -1849,7 +1853,7 @@ static PyObject* get_value(PyObject* self, const char* buffer, unsigned* positio } case 11: { - PyObject* compile_func; + PyObject* regex_class; PyObject* pattern; int flags; size_t flags_length, i; @@ -1890,19 +1894,11 @@ static PyObject* get_value(PyObject* self, const char* buffer, unsigned* positio } *position += (unsigned)flags_length + 1; - /* - * Use re.compile() if we're configured to compile regular - * expressions, else create an instance of our Regex class. - */ - if (compile_re) { - compile_func = _get_object(state->RECompile, "re", "compile"); - } else { - compile_func = _get_object(state->Regex, "bson.regex", "Regex"); - } - - if (compile_func) { - value = PyObject_CallFunction(compile_func, "Oi", pattern, flags); - Py_DECREF(compile_func); + regex_class = _get_object(state->Regex, "bson.regex", "Regex"); + if (regex_class) { + value = PyObject_CallFunction(regex_class, + "Oi", pattern, flags); + Py_DECREF(regex_class); } Py_DECREF(pattern); break; @@ -2040,7 +2036,7 @@ static PyObject* get_value(PyObject* self, const char* buffer, unsigned* positio } scope = elements_to_dict(self, buffer + *position + 4, scope_size - 5, (PyObject*)&PyDict_Type, - tz_aware, uuid_subtype, compile_re); + tz_aware, uuid_subtype); if (!scope) { Py_DECREF(code); goto invalid; @@ -2190,8 +2186,7 @@ static PyObject* get_value(PyObject* self, const char* buffer, unsigned* positio static PyObject* _elements_to_dict(PyObject* self, const char* string, unsigned max, PyObject* as_class, unsigned char tz_aware, - unsigned char uuid_subtype, - unsigned char compile_re) { + unsigned char uuid_subtype) { unsigned position = 0; PyObject* dict = PyObject_CallObject(as_class, NULL); if (!dict) { @@ -2219,8 +2214,7 @@ static PyObject* _elements_to_dict(PyObject* self, const char* string, } position += (unsigned)name_length + 1; value = get_value(self, string, &position, type, - max - position, as_class, tz_aware, uuid_subtype, - compile_re); + max - position, as_class, tz_aware, uuid_subtype); if (!value) { Py_DECREF(name); Py_DECREF(dict); @@ -2237,13 +2231,12 @@ static PyObject* _elements_to_dict(PyObject* self, const char* string, static PyObject* elements_to_dict(PyObject* self, const char* string, unsigned max, PyObject* as_class, unsigned char tz_aware, - unsigned char uuid_subtype, - unsigned char compile_re) { + unsigned char uuid_subtype) { PyObject* result; if (Py_EnterRecursiveCall(" while decoding a BSON document")) return NULL; result = _elements_to_dict(self, string, max, - as_class, tz_aware, uuid_subtype, compile_re); + as_class, tz_aware, uuid_subtype); Py_LeaveRecursiveCall(); return result; } @@ -2256,10 +2249,9 @@ static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) { PyObject* as_class; unsigned char tz_aware; unsigned char uuid_subtype; - unsigned char compile_re; if (!PyArg_ParseTuple( - args, "OObbb", &bson, &as_class, &tz_aware, &uuid_subtype, &compile_re)) { + args, "OObb", &bson, &as_class, &tz_aware, &uuid_subtype)) { return NULL; } @@ -2325,7 +2317,7 @@ static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) { } return elements_to_dict(self, string + 4, (unsigned)size - 5, - as_class, tz_aware, uuid_subtype, compile_re); + as_class, tz_aware, uuid_subtype); } static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) { @@ -2338,11 +2330,10 @@ static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) { PyObject* as_class = (PyObject*)&PyDict_Type; unsigned char tz_aware = 1; unsigned char uuid_subtype = 3; - unsigned char compile_re = 1; if (!PyArg_ParseTuple( - args, "O|Obbb", - &bson, &as_class, &tz_aware, &uuid_subtype, &compile_re)) { + args, "O|Obb", + &bson, &as_class, &tz_aware, &uuid_subtype)) { return NULL; } @@ -2413,7 +2404,7 @@ static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) { } dict = elements_to_dict(self, string + 4, (unsigned)size - 5, - as_class, tz_aware, uuid_subtype, compile_re); + as_class, tz_aware, uuid_subtype); if (!dict) { Py_DECREF(result); return NULL; @@ -2444,7 +2435,6 @@ static int _cbson_traverse(PyObject *m, visitproc visit, void *arg) { Py_VISIT(GETSTATE(m)->Code); Py_VISIT(GETSTATE(m)->ObjectId); Py_VISIT(GETSTATE(m)->DBRef); - Py_VISIT(GETSTATE(m)->RECompile); Py_VISIT(GETSTATE(m)->Regex); Py_VISIT(GETSTATE(m)->UUID); Py_VISIT(GETSTATE(m)->Timestamp); @@ -2460,7 +2450,6 @@ static int _cbson_clear(PyObject *m) { Py_CLEAR(GETSTATE(m)->Code); Py_CLEAR(GETSTATE(m)->ObjectId); Py_CLEAR(GETSTATE(m)->DBRef); - Py_CLEAR(GETSTATE(m)->RECompile); Py_CLEAR(GETSTATE(m)->Regex); Py_CLEAR(GETSTATE(m)->UUID); Py_CLEAR(GETSTATE(m)->Timestamp); diff --git a/bson/json_util.py b/bson/json_util.py index 04891e908..2c5a56f08 100644 --- a/bson/json_util.py +++ b/bson/json_util.py @@ -117,17 +117,8 @@ def loads(s, *args, **kwargs): """Helper function that wraps :class:`json.loads`. Automatically passes the object_hook for BSON type conversion. - - :Parameters: - - `compile_re` (optional): if ``False``, don't attempt to compile BSON - regular expressions into Python regular expressions. Return instances - of :class:`~bson.bsonregex.BSONRegex` instead. - - .. versionchanged:: 2.7 - Added ``compile_re`` option. """ - compile_re = kwargs.pop('compile_re', True) - kwargs['object_hook'] = lambda dct: object_hook(dct, compile_re) + kwargs['object_hook'] = lambda dct: object_hook(dct) return json.loads(s, *args, **kwargs) @@ -145,7 +136,7 @@ def _json_convert(obj): return obj -def object_hook(dct, compile_re=True): +def object_hook(dct): if "$oid" in dct: return ObjectId(str(dct["$oid"])) if "$ref" in dct: @@ -181,11 +172,7 @@ def object_hook(dct, compile_re=True): # PyMongo always adds $options but some other tools may not. for opt in dct.get("$options", ""): flags |= _RE_OPT_TABLE.get(opt, 0) - - if compile_re: - return re.compile(dct["$regex"], flags) - else: - return Regex(dct["$regex"], flags) + return Regex(dct["$regex"], flags) if "$minKey" in dct: return MinKey() if "$maxKey" in dct: diff --git a/bson/son.py b/bson/son.py index 3c0794069..a2ad43e62 100644 --- a/bson/son.py +++ b/bson/son.py @@ -53,7 +53,8 @@ class SON(dict): list array both dict / `SON` object both datetime.datetime [#dt]_ [#dt2]_ date both - `bson.regex.Regex` / compiled re [#re]_ regex both + `bson.regex.Regex` regex both + compiled re [#re]_ regex py -> bson `bson.binary.Binary` binary both `bson.objectid.ObjectId` oid both `bson.dbref.DBRef` dbref both @@ -77,9 +78,8 @@ class SON(dict): should always use UTC. .. [#re] :class:`~bson.regex.Regex` instances and regular expression objects from ``re.compile()`` are both saved as BSON regular expressions. - BSON regular expressions are decoded as Python regular expressions by - default, or as :class:`~bson.regex.Regex` instances if the ``compile_re`` - option is set to ``False``. + BSON regular expressions are decoded as :class:`~bson.regex.Regex` + instances. .. [#bytes] The bytes type from Python 3.x is encoded as BSON binary with subtype 0. In Python 3.x it will be decoded back to bytes. In Python 2.x it will be decoded to an instance of :class:`~bson.binary.Binary` with diff --git a/doc/api/pymongo/collection.rst b/doc/api/pymongo/collection.rst index 016ffc196..42f7a3795 100644 --- a/doc/api/pymongo/collection.rst +++ b/doc/api/pymongo/collection.rst @@ -34,7 +34,7 @@ .. automethod:: initialize_unordered_bulk_op .. automethod:: initialize_ordered_bulk_op .. automethod:: drop - .. automethod:: find([spec=None[, fields=None[, skip=0[, limit=0[, timeout=True[, snapshot=False[, tailable=False[, sort=None[, max_scan=None[, as_class=None[, await_data=False[, partial=False[, manipulate=True[, read_preference=None[, exhaust=False[, compile_re=True]]]]]]]]]]]]]]]]) + .. automethod:: find([spec=None[, fields=None[, skip=0[, limit=0[, timeout=True[, snapshot=False[, tailable=False[, sort=None[, max_scan=None[, as_class=None[, await_data=False[, partial=False[, manipulate=True[, read_preference=None[, exhaust=False]]]]]]]]]]]]]]]) .. automethod:: find_one([spec_or_id=None[, *args[, **kwargs]]]) .. automethod:: parallel_scan .. automethod:: count diff --git a/doc/api/pymongo/cursor.rst b/doc/api/pymongo/cursor.rst index 009c040d7..00c358d00 100644 --- a/doc/api/pymongo/cursor.rst +++ b/doc/api/pymongo/cursor.rst @@ -4,7 +4,7 @@ .. automodule:: pymongo.cursor :synopsis: Tools for iterating over MongoDB query results - .. autoclass:: pymongo.cursor.Cursor(collection, spec=None, fields=None, skip=0, limit=0, timeout=True, snapshot=False, tailable=False, sort=None, max_scan=None, as_class=None, await_data=False, partial=False, manipulate=True, read_preference=None, tag_sets=None, secondary_acceptable_latency_ms=None, exhaust=False, compile_re=True) + .. autoclass:: pymongo.cursor.Cursor(collection, spec=None, fields=None, skip=0, limit=0, timeout=True, snapshot=False, tailable=False, sort=None, max_scan=None, as_class=None, await_data=False, partial=False, manipulate=True, read_preference=None, tag_sets=None, secondary_acceptable_latency_ms=None, exhaust=False) :members: .. describe:: c[index] diff --git a/doc/changelog.rst b/doc/changelog.rst index d86d23986..f99e56291 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -96,6 +96,23 @@ Since PyMongo 1.6, methods ``open`` and ``close`` of :class:`~gridfs.GridFS` raised an ``UnsupportedAPI`` exception, as did the entire ``GridFile`` class. The unsupported methods, the class, and the exception are all deleted. +:mod:`~bson` Changes +.................... + +The `compile_re` option is removed from all methods +that accepted it in :mod:`~bson` and :mod:`~bson.json_util`. Additionally, it +is removed from :meth:`~pymongo.collection.Collection.find`, +:meth:`~pymongo.collection.Collection.find_one`, +:meth:`~pymongo.collection.Collection.aggregate`, +:meth:`~pymongo.database.Database.command`, and so on. +PyMongo now always represents BSON regular expressions as +:class:`~bson.regex.Regex` objects. This prevents errors for incompatible +patterns, see `PYTHON-500`_. Use :meth:`~bson.regex.Regex.try_compile` to +attempt to convert from a BSON regular expression to a Python regular +expression object. + +.. _PYTHON-500: https://jira.mongodb.org/browse/PYTHON-500 + Issues Resolved ............... diff --git a/gridfs/__init__.py b/gridfs/__init__.py index 196479635..884c83c24 100644 --- a/gridfs/__init__.py +++ b/gridfs/__init__.py @@ -322,9 +322,6 @@ class GridFS(object): this query. - `tag_sets` **DEPRECATED** - `secondary_acceptable_latency_ms` **DEPRECATED** - - `compile_re` (optional): if ``False``, don't attempt to compile - BSON regex objects into Python regexes. Return instances of - :class:`~bson.regex.Regex` instead. Raises :class:`TypeError` if any of the arguments are of improper type. Returns an instance of diff --git a/gridfs/grid_file.py b/gridfs/grid_file.py index 993a8e788..b83fa318a 100644 --- a/gridfs/grid_file.py +++ b/gridfs/grid_file.py @@ -605,7 +605,7 @@ class GridOutCursor(Cursor): def __init__(self, collection, spec=None, skip=0, limit=0, timeout=True, sort=None, max_scan=None, read_preference=None, tag_sets=None, - secondary_acceptable_latency_ms=None, compile_re=True): + secondary_acceptable_latency_ms=None): """Create a new cursor, similar to the normal :class:`~pymongo.cursor.Cursor`. @@ -626,7 +626,7 @@ class GridOutCursor(Cursor): collection.files, spec, skip=skip, limit=limit, timeout=timeout, sort=sort, max_scan=max_scan, read_preference=read_preference, secondary_acceptable_latency_ms=secondary_acceptable_latency_ms, - tag_sets=tag_sets, compile_re=compile_re) + tag_sets=tag_sets) def next(self): """Get next GridOut object from cursor. diff --git a/pymongo/collection.py b/pymongo/collection.py index c0f47eccb..fd365a206 100644 --- a/pymongo/collection.py +++ b/pymongo/collection.py @@ -779,9 +779,6 @@ class Collection(common.BaseObject): this query. - `tag_sets` **DEPRECATED** - `secondary_acceptable_latency_ms` **DEPRECATED** - - `compile_re` (optional): if ``False``, don't attempt to compile - BSON regex objects into Python regexes. Return instances of - :class:`~bson.regex.Regex` instead. - `exhaust` (optional): If ``True`` create an "exhaust" cursor. MongoDB will stream batched results to the client without waiting for the client to request each batch, reducing latency. @@ -802,20 +799,30 @@ class Collection(common.BaseObject): :class:`~socket.socket` connection will be closed and discarded without being returned to the connection pool. - .. note:: The `manipulate` and `compile_re` parameters may default to - False in future releases. + .. note:: The `manipulate` parameter may default to False in a future + release. .. versionchanged:: 3.0 Removed the `network_timeout` parameter. Deprecated the `tag_sets`, and `secondary_acceptable_latency_ms` parameters. + Removed `compile_re` option: PyMongo now always represents BSON + regular expressions as :class:`~bson.regex.Regex` objects. Use + :meth:`~bson.regex.Regex.try_compile` to attempt to convert from a + BSON regular expression to a Python regular expression object. - .. versionadded:: 2.7 - The ``compile_re`` parameter. + .. versionchanged:: 2.7 + Added `compile_re` option. If set to False, PyMongo represented BSON + regular expressions as :class:`~bson.regex.Regex` objects instead of + attempting to compile BSON regular expressions as Python native + regular expressions, thus preventing errors for some incompatible + patterns, see `PYTHON-500`_. .. versionadded:: 2.3 The `tag_sets` and `secondary_acceptable_latency_ms` parameters. + .. _PYTHON-500: https://jira.mongodb.org/browse/PYTHON-500 + .. mongodoc:: find """ return Cursor(self, *args, **kwargs) @@ -863,8 +870,6 @@ class Collection(common.BaseObject): .. note:: Requires server version **>= 2.5.5**. """ - compile_re = kwargs.get('compile_re', False) - cmd = SON([('parallelCollectionScan', self.__name), ('numCursors', num_cursors)]) @@ -875,8 +880,7 @@ class Collection(common.BaseObject): return [CommandCursor(self, cursor['cursor'], - address, - compile_re) for cursor in result['cursors']] + address) for cursor in result['cursors']] def count(self): """Get the number of documents in this collection. @@ -1279,8 +1283,6 @@ class Collection(common.BaseObject): cmd = SON([("aggregate", self.__name), ("pipeline", pipeline)]) - compile_re = kwargs.get('compile_re', True) - mode = read_preference or self.read_preference result, address = self.__database._command( cmd, uuid_subtype=self.uuid_subtype, @@ -1290,8 +1292,7 @@ class Collection(common.BaseObject): return CommandCursor( self, result['cursor'], - address, - compile_re) + address) else: return result diff --git a/pymongo/command_cursor.py b/pymongo/command_cursor.py index 52dc9ff78..19f911daf 100644 --- a/pymongo/command_cursor.py +++ b/pymongo/command_cursor.py @@ -25,8 +25,7 @@ class CommandCursor(object): """A cursor / iterator over command cursors. """ - def __init__(self, collection, cursor_info, - address, compile_re=True, retrieved=0): + def __init__(self, collection, cursor_info, address, retrieved=0): """Create a new command cursor. """ self.__collection = collection @@ -36,9 +35,8 @@ class CommandCursor(object): self.__decode_opts = ( collection.database.connection.document_class, collection.database.connection.tz_aware, - collection.uuid_subtype, - compile_re - ) + collection.uuid_subtype) + self.__retrieved = retrieved self.__batch_size = 0 self.__killed = False diff --git a/pymongo/cursor.py b/pymongo/cursor.py index 7c036d530..afac7d01d 100644 --- a/pymongo/cursor.py +++ b/pymongo/cursor.py @@ -80,7 +80,7 @@ class Cursor(object): await_data=False, partial=False, manipulate=True, read_preference=None, tag_sets=None, secondary_acceptable_latency_ms=None, - exhaust=False, compile_re=True, _uuid_subtype=None): + exhaust=False, _uuid_subtype=None): """Create a new cursor. Should not be called directly by application developers - see @@ -157,7 +157,6 @@ class Cursor(object): self.__as_class = as_class self.__manipulate = manipulate self.__tz_aware = collection.database.connection.tz_aware - self.__compile_re = compile_re self.__uuid_subtype = _uuid_subtype or collection.uuid_subtype self.__data = deque() @@ -241,7 +240,7 @@ class Cursor(object): "snapshot", "ordering", "explain", "hint", "batch_size", "max_scan", "as_class", "manipulate", "read_preference", - "uuid_subtype", "compile_re", "query_flags") + "uuid_subtype", "query_flags") data = dict((k, v) for k, v in iteritems(self.__dict__) if k.startswith('_Cursor__') and k[9:] in values_to_clone) if deepcopy: @@ -708,7 +707,6 @@ class Cursor(object): r = database.command("count", self.__collection.name, allowable_errors=["ns missing"], uuid_subtype=self.__uuid_subtype, - compile_re=self.__compile_re, read_preference=self.__read_preference, **command) if r.get("errmsg", "") == "ns missing": @@ -749,7 +747,6 @@ class Cursor(object): return database.command("distinct", self.__collection.name, uuid_subtype=self.__uuid_subtype, - compile_re=self.__compile_re, read_preference=self.__read_preference, **options)["values"] @@ -890,8 +887,7 @@ class Cursor(object): cursor_id=self.__id, as_class=self.__as_class, tz_aware=self.__tz_aware, - uuid_subtype=self.__uuid_subtype, - compile_re=self.__compile_re) + uuid_subtype=self.__uuid_subtype) except OperationFailure: self.__killed = True diff --git a/pymongo/database.py b/pymongo/database.py index 48abc0790..d95a9550c 100644 --- a/pymongo/database.py +++ b/pymongo/database.py @@ -285,7 +285,7 @@ class Database(common.BaseObject): def _command(self, command, value=1, check=True, allowable_errors=None, - uuid_subtype=OLD_UUID_SUBTYPE, compile_re=True, + uuid_subtype=OLD_UUID_SUBTYPE, read_preference=None, **kwargs): """Internal command helper. """ @@ -343,7 +343,6 @@ class Database(common.BaseObject): limit=-1, as_class=as_class, read_preference=pref, - compile_re=compile_re, _uuid_subtype=uuid_subtype) result = {} @@ -363,7 +362,7 @@ class Database(common.BaseObject): def command(self, command, value=1, check=True, allowable_errors=[], - uuid_subtype=OLD_UUID_SUBTYPE, compile_re=True, + uuid_subtype=OLD_UUID_SUBTYPE, read_preference=None, **kwargs): """Issue a MongoDB command. @@ -409,12 +408,6 @@ class Database(common.BaseObject): in this list will be ignored by error-checking - `uuid_subtype` (optional): The BSON binary subtype to use for a UUID used in this command. - - `compile_re` (optional): if ``False``, don't attempt to compile - BSON regular expressions into Python regular expressions. Return - instances of :class:`~bson.regex.Regex` instead. Can avoid - :exc:`~bson.errors.InvalidBSON` errors when receiving - Python-incompatible regular expressions, for example from - ``currentOp`` - `read_preference`: The read preference for this operation. - `tag_sets` **DEPRECATED** - `secondary_acceptable_latency_ms` **DEPRECATED** @@ -424,19 +417,30 @@ class Database(common.BaseObject): .. versionchanged:: 3.0 Deprecated the `tag_sets` and `secondary_acceptable_latency_ms` options. + Removed `compile_re` option: PyMongo now always represents BSON + regular expressions as :class:`~bson.regex.Regex` objects. Use + :meth:`~bson.regex.Regex.try_compile` to attempt to convert from a + BSON regular expression to a Python regular expression object. + .. versionchanged:: 2.7 - Added ``compile_re`` option. + Added `compile_re` option. If set to False, PyMongo represented BSON + regular expressions as :class:`~bson.regex.Regex` objects instead of + attempting to compile BSON regular expressions as Python native + regular expressions, thus preventing errors for some incompatible + patterns, see `PYTHON-500`_. + .. versionchanged:: 2.3 Added `tag_sets` and `secondary_acceptable_latency_ms` options. .. versionchanged:: 2.2 Added support for `as_class` - the class you want to use for the resulting documents + .. _PYTHON-500: https://jira.mongodb.org/browse/PYTHON-500 + .. mongodoc:: commands """ return self._command(command, value, check, allowable_errors, - uuid_subtype, compile_re, - read_preference, **kwargs)[0] + uuid_subtype, read_preference, **kwargs)[0] def collection_names(self, include_system_collections=True): """Get a list of all the collection names in this database. diff --git a/pymongo/helpers.py b/pymongo/helpers.py index d38e8a292..f7d976490 100644 --- a/pymongo/helpers.py +++ b/pymongo/helpers.py @@ -75,8 +75,7 @@ def _index_document(index_list): def _unpack_response(response, cursor_id=None, as_class=dict, - tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE, - compile_re=True): + tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE): """Unpack a response from the database. Check the response for errors and unpack, returning a dictionary @@ -117,8 +116,7 @@ def _unpack_response(response, cursor_id=None, as_class=dict, result["starting_from"] = struct.unpack(", >=, !=, and ==. self.assertTrue(MinKey() < None) diff --git a/test/test_collection.py b/test/test_collection.py index a3c67eee1..7e310afc1 100644 --- a/test/test_collection.py +++ b/test/test_collection.py @@ -1289,16 +1289,6 @@ class TestCollection(IntegrationTest): self.assertEqual(1.0, result['ok']) self.assertEqual([{'foo': [1, 2]}], result['result']) - @client_context.require_version_min(2, 3, 2) # See SERVER-6470. - def test_aggregate_with_compile_re(self): - self.db.test.drop() - self.db.test.insert({'r': re.compile('.*')}) - - result = self.db.test.aggregate([]) - self.assertTrue(isinstance(result['result'][0]['r'], RE_TYPE)) - result = self.db.test.aggregate([], compile_re=False) - self.assertTrue(isinstance(result['result'][0]['r'], Regex)) - @client_context.require_version_min(2, 5, 1) def test_aggregation_cursor_validation(self): db = self.db @@ -2200,20 +2190,13 @@ class TestCollection(IntegrationTest): self.assertEqual(2, c.find_one(manipulate=True)['foo']) c.remove({}) - def test_compile_re(self): + def test_find_regex(self): c = self.db.test c.drop() c.insert({'r': re.compile('.*')}) - # Test find_one with compile_re. - self.assertTrue(isinstance(c.find_one()['r'], RE_TYPE)) - self.assertTrue(isinstance(c.find_one(compile_re=False)['r'], Regex)) - - # Test find with compile_re. + self.assertTrue(isinstance(c.find_one()['r'], Regex)) for doc in c.find(): - self.assertTrue(isinstance(doc['r'], RE_TYPE)) - - for doc in c.find(compile_re=False): self.assertTrue(isinstance(doc['r'], Regex)) def test_find_and_modify_with_manipulator(self): diff --git a/test/test_cursor.py b/test/test_cursor.py index 787889824..a0979b0c9 100644 --- a/test/test_cursor.py +++ b/test/test_cursor.py @@ -754,7 +754,6 @@ class TestCursor(IntegrationTest): await_data=True, partial=True, manipulate=False, - compile_re=False, fields={'_id': False}).limit(2) cursor.min([('a', 1)]).max([('b', 3)]) cursor.add_option(128) @@ -768,8 +767,6 @@ class TestCursor(IntegrationTest): type(cursor2._Cursor__as_class)) self.assertEqual(cursor._Cursor__manipulate, cursor2._Cursor__manipulate) - self.assertEqual(cursor._Cursor__compile_re, - cursor2._Cursor__compile_re) self.assertEqual(cursor._Cursor__query_flags, cursor2._Cursor__query_flags) self.assertEqual(cursor._Cursor__comment, diff --git a/test/test_database.py b/test/test_database.py index 8f426cfe1..46ca9bcf8 100644 --- a/test/test_database.py +++ b/test/test_database.py @@ -327,15 +327,15 @@ class TestDatabase(IntegrationTest): # retrieve a BSON regex from a collection using a command. But until # MongoDB 2.3.2, aggregation turned regexes into strings: SERVER-6470. @client_context.require_version_min(2, 3, 2) - def test_command_with_compile_re(self): + def test_command_with_regex(self): db = self.client.pymongo_test db.test.drop() db.test.insert({'r': re.compile('.*')}) + db.test.insert({'r': Regex('.*')}) result = db.command('aggregate', 'test', pipeline=[]) - self.assertTrue(isinstance(result['result'][0]['r'], RE_TYPE)) - result = db.command('aggregate', 'test', pipeline=[], compile_re=False) - self.assertTrue(isinstance(result['result'][0]['r'], Regex)) + for doc in result['result']: + self.assertTrue(isinstance(doc['r'], Regex)) def test_last_status(self): # We must call getlasterror on the same socket as the last operation. diff --git a/test/test_json_util.py b/test/test_json_util.py index 0cf05d8f8..b253b9062 100644 --- a/test/test_json_util.py +++ b/test/test_json_util.py @@ -96,11 +96,6 @@ class TestJsonUtil(unittest.TestCase): pat = 'a*b' json_re = '{"$regex": "%s", "$options": "u"}' % pat loaded = json_util.object_hook(json.loads(json_re)) - self.assertTrue(isinstance(loaded, RE_TYPE)) - self.assertEqual(pat, loaded.pattern) - self.assertEqual(re.U, loaded.flags) - - loaded = json_util.object_hook(json.loads(json_re), compile_re=False) self.assertTrue(isinstance(loaded, Regex)) self.assertEqual(pat, loaded.pattern) self.assertEqual(re.U, loaded.flags) @@ -127,16 +122,12 @@ class TestJsonUtil(unittest.TestCase): # Some tools may not add $options if no flags are set. res = json_util.loads('{"r": {"$regex": "a*b"}}')['r'] - expected_flags = 0 - if PY3: - expected_flags = re.U - self.assertEqual(expected_flags, res.flags) + self.assertEqual(0, res.flags) self.assertEqual( Regex('.*', 'ilm'), json_util.loads( - '{"r": {"$regex": ".*", "$options": "ilm"}}', - compile_re=False)['r']) + '{"r": {"$regex": ".*", "$options": "ilm"}}')['r']) # Check order. self.assertEqual(