PYTHON-526 Remove 'compile_re' option.

PyMongo now never attempts to compile BSON regular expressions as Python native
regular expressions.
This commit is contained in:
A. Jesse Jiryu Davis 2014-12-03 17:10:34 -05:00
parent 25e5bca03b
commit 2ba730722b
19 changed files with 155 additions and 221 deletions

View File

@ -239,10 +239,7 @@ def _get_regex(data, position, dummy, opts):
pattern, position = _get_c_string(data, position)
bson_flags, position = _get_c_string(data, position)
bson_re = Regex(pattern, bson_flags)
if opts[3]:
return bson_re.try_compile(), position
else:
return bson_re, position
return bson_re, position
def _get_ref(data, position, obj_end, opts):
@ -308,9 +305,9 @@ def _elements_to_dict(data, position, obj_end, opts):
return result
def _bson_to_dict(data, as_class, tz_aware, uuid_subtype, compile_re):
def _bson_to_dict(data, as_class, tz_aware, uuid_subtype):
"""Decode a BSON string to as_class."""
opts = (as_class, tz_aware, uuid_subtype, compile_re)
opts = (as_class, tz_aware, uuid_subtype)
try:
obj_size = _UNPACK_INT(data[:4])[0]
except struct.error as e:
@ -700,7 +697,7 @@ if _USE_C:
def decode_all(data, as_class=dict,
tz_aware=True, uuid_subtype=OLD_UUID_SUBTYPE, compile_re=True):
tz_aware=True, uuid_subtype=OLD_UUID_SUBTYPE):
"""Decode BSON data to multiple documents.
`data` must be a string of concatenated, valid, BSON-encoded
@ -714,16 +711,23 @@ def decode_all(data, as_class=dict,
:class:`~datetime.datetime` instances
- `uuid_subtype` (optional): The BSON representation to use for UUIDs.
See the :mod:`bson.binary` module for all options.
- `compile_re` (optional): if ``False``, don't attempt to compile
BSON regular expressions into Python regular expressions. Return
instances of :class:`~bson.regex.Regex` instead. Can avoid
:exc:`~bson.errors.InvalidBSON` errors when receiving
Python-incompatible regular expressions, for example from ``currentOp``
.. versionchanged:: 3.0
Removed `compile_re` option: PyMongo now always represents BSON regular
expressions as :class:`~bson.regex.Regex` objects. Use
:meth:`~bson.regex.Regex.try_compile` to attempt to convert from a
BSON regular expression to a Python regular expression object.
.. versionchanged:: 2.7
Added `compile_re` option.
Added `compile_re` option. If set to False, PyMongo represented BSON
regular expressions as :class:`~bson.regex.Regex` objects instead of
attempting to compile BSON regular expressions as Python native
regular expressions, thus preventing errors for some incompatible
patterns, see `PYTHON-500`_.
.. _PYTHON-500: https://jira.mongodb.org/browse/PYTHON-500
"""
opts = (as_class, tz_aware, uuid_subtype, compile_re)
opts = (as_class, tz_aware, uuid_subtype)
docs = []
position = 0
end = len(data) - 1
@ -751,7 +755,7 @@ if _USE_C:
def decode_iter(data, as_class=dict, tz_aware=True,
uuid_subtype=OLD_UUID_SUBTYPE, compile_re=True):
uuid_subtype=OLD_UUID_SUBTYPE):
"""Decode BSON data to multiple documents as a generator.
Works similarly to the decode_all function, but yields one document at a
@ -768,13 +772,6 @@ def decode_iter(data, as_class=dict, tz_aware=True,
:class:`~datetime.datetime` instances
- `uuid_subtype` (optional): The BSON representation to use for UUIDs.
See the :mod:`bson.binary` module for all options.
- `compile_re` (optional): if ``False``, don't attempt to compile
BSON regular expressions into Python regular expressions. Return
instances of
:class:`~bson.regex.Regex` instead. Can avoid
:exc:`~bson.errors.InvalidBSON` errors when receiving
Python-incompatible regular expressions, for example from
``currentOp``
.. versionadded:: 2.8
"""
@ -786,11 +783,11 @@ def decode_iter(data, as_class=dict, tz_aware=True,
position += obj_size
yield _bson_to_dict(elements, as_class,
tz_aware, uuid_subtype, compile_re)
tz_aware, uuid_subtype)
def decode_file_iter(file_obj, as_class=dict, tz_aware=True,
uuid_subtype=OLD_UUID_SUBTYPE, compile_re=True):
uuid_subtype=OLD_UUID_SUBTYPE):
"""Decode bson data from a file to multiple documents as a generator.
Works similarly to the decode_all function, but reads from the file object
@ -804,13 +801,6 @@ def decode_file_iter(file_obj, as_class=dict, tz_aware=True,
:class:`~datetime.datetime` instances
- `uuid_subtype` (optional): The BSON representation to use for UUIDs.
See the :mod:`bson.binary` module for all options.
- `compile_re` (optional): if ``False``, don't attempt to compile
BSON regular expressions into Python regular expressions. Return
instances of
:class:`~bson.regex.Regex` instead. Can avoid
:exc:`~bson.errors.InvalidBSON` errors when receiving
Python-incompatible regular expressions, for example from
``currentOp``
.. versionadded:: 2.8
"""
@ -824,7 +814,7 @@ def decode_file_iter(file_obj, as_class=dict, tz_aware=True,
obj_size = _UNPACK_INT(size_data)[0] - 4
elements = size_data + file_obj.read(obj_size)
yield _bson_to_dict(elements, as_class,
tz_aware, uuid_subtype, compile_re)
tz_aware, uuid_subtype)
def is_valid(bson):
@ -841,7 +831,7 @@ def is_valid(bson):
raise TypeError("BSON data must be an instance of a subclass of bytes")
try:
_bson_to_dict(bson, dict, True, OLD_UUID_SUBTYPE, True)
_bson_to_dict(bson, dict, True, OLD_UUID_SUBTYPE)
return True
except Exception:
return False
@ -874,7 +864,7 @@ class BSON(bytes):
return cls(_dict_to_bson(document, check_keys, uuid_subtype))
def decode(self, as_class=dict,
tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE, compile_re=True):
tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE):
"""Decode this BSON data.
The default type to use for the resultant document is
@ -896,19 +886,23 @@ class BSON(bytes):
:class:`~datetime.datetime` instances
- `uuid_subtype` (optional): The BSON representation to use for
UUIDs. See the :mod:`bson.binary` module for all options.
- `compile_re` (optional): if ``False``, don't attempt to compile
BSON regular expressions into Python regular expressions. Return
instances of
:class:`~bson.regex.Regex` instead. Can avoid
:exc:`~bson.errors.InvalidBSON` errors when receiving
Python-incompatible regular expressions, for example from
``currentOp``
.. versionchanged:: 3.0
Removed `compile_re` option: PyMongo now always represents BSON
regular expressions as :class:`~bson.regex.Regex` objects. Use
:meth:`~bson.regex.Regex.try_compile` to attempt to convert from a
BSON regular expression to a Python regular expression object.
.. versionchanged:: 2.7
Added ``compile_re`` option.
Added `compile_re` option. If set to False, PyMongo represented BSON
regular expressions as :class:`~bson.regex.Regex` objects instead of
attempting to compile BSON regular expressions as Python native
regular expressions, thus preventing errors for some incompatible
patterns, see `PYTHON-500`_.
.. _PYTHON-500: https://jira.mongodb.org/browse/PYTHON-500
"""
return _bson_to_dict(
self, as_class, tz_aware, uuid_subtype, compile_re)
return _bson_to_dict(self, as_class, tz_aware, uuid_subtype)
def has_c():

View File

@ -42,7 +42,6 @@ struct module_state {
PyObject* Code;
PyObject* ObjectId;
PyObject* DBRef;
PyObject* RECompile;
PyObject* Regex;
PyObject* UUID;
PyObject* Timestamp;
@ -108,8 +107,7 @@ _downcast_and_check(Py_ssize_t size, int extra) {
static PyObject* elements_to_dict(PyObject* self, const char* string,
unsigned max, PyObject* as_class,
unsigned char tz_aware,
unsigned char uuid_subtype,
unsigned char compile_re);
unsigned char uuid_subtype);
static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
int type_byte, PyObject* value,
@ -319,6 +317,7 @@ static int _load_object(PyObject** object, char* module_name, char* object_name)
* Returns non-zero on failure. */
static int _load_python_objects(PyObject* module) {
PyObject* empty_string;
PyObject* re_compile;
PyObject* compiled;
struct module_state *state = GETSTATE(module);
@ -330,7 +329,6 @@ static int _load_python_objects(PyObject* module) {
_load_object(&state->MinKey, "bson.min_key", "MinKey") ||
_load_object(&state->MaxKey, "bson.max_key", "MaxKey") ||
_load_object(&state->UTC, "bson.tz_util", "utc") ||
_load_object(&state->RECompile, "re", "compile") ||
_load_object(&state->Regex, "bson.regex", "Regex") ||
_load_object(&state->BSONInt64, "bson.int64", "Int64") ||
_load_object(&state->UUID, "uuid", "UUID") ||
@ -347,7 +345,13 @@ static int _load_python_objects(PyObject* module) {
state->REType = NULL;
return 1;
}
compiled = PyObject_CallFunction(state->RECompile, "O", empty_string);
if (_load_object(&re_compile, "re", "compile")) {
state->REType = NULL;
return 1;
}
compiled = PyObject_CallFunction(re_compile, "O", empty_string);
if (compiled == NULL) {
state->REType = NULL;
Py_DECREF(empty_string);
@ -1462,10 +1466,11 @@ static PyObject* _cbson_dict_to_bson(PyObject* self, PyObject* args) {
return result;
}
static PyObject* get_value(PyObject* self, const char* buffer, unsigned* position,
unsigned char type, unsigned max, PyObject* as_class,
unsigned char tz_aware, unsigned char uuid_subtype,
unsigned char compile_re) {
static PyObject* get_value(PyObject* self, const char* buffer,
unsigned* position, unsigned char type,
unsigned max, PyObject* as_class,
unsigned char tz_aware,
unsigned char uuid_subtype) {
struct module_state *state = GETSTATE(self);
PyObject* value = NULL;
@ -1521,8 +1526,8 @@ static PyObject* get_value(PyObject* self, const char* buffer, unsigned* positio
goto invalid;
}
value = elements_to_dict(self, buffer + *position + 4,
size - 5, as_class, tz_aware, uuid_subtype,
compile_re);
size - 5, as_class, tz_aware,
uuid_subtype);
if (!value) {
goto invalid;
}
@ -1621,8 +1626,7 @@ static PyObject* get_value(PyObject* self, const char* buffer, unsigned* positio
}
to_append = get_value(self, buffer, position, bson_type,
max - (unsigned)key_size,
as_class, tz_aware, uuid_subtype,
compile_re);
as_class, tz_aware, uuid_subtype);
Py_LeaveRecursiveCall();
if (!to_append) {
Py_DECREF(value);
@ -1849,7 +1853,7 @@ static PyObject* get_value(PyObject* self, const char* buffer, unsigned* positio
}
case 11:
{
PyObject* compile_func;
PyObject* regex_class;
PyObject* pattern;
int flags;
size_t flags_length, i;
@ -1890,19 +1894,11 @@ static PyObject* get_value(PyObject* self, const char* buffer, unsigned* positio
}
*position += (unsigned)flags_length + 1;
/*
* Use re.compile() if we're configured to compile regular
* expressions, else create an instance of our Regex class.
*/
if (compile_re) {
compile_func = _get_object(state->RECompile, "re", "compile");
} else {
compile_func = _get_object(state->Regex, "bson.regex", "Regex");
}
if (compile_func) {
value = PyObject_CallFunction(compile_func, "Oi", pattern, flags);
Py_DECREF(compile_func);
regex_class = _get_object(state->Regex, "bson.regex", "Regex");
if (regex_class) {
value = PyObject_CallFunction(regex_class,
"Oi", pattern, flags);
Py_DECREF(regex_class);
}
Py_DECREF(pattern);
break;
@ -2040,7 +2036,7 @@ static PyObject* get_value(PyObject* self, const char* buffer, unsigned* positio
}
scope = elements_to_dict(self, buffer + *position + 4,
scope_size - 5, (PyObject*)&PyDict_Type,
tz_aware, uuid_subtype, compile_re);
tz_aware, uuid_subtype);
if (!scope) {
Py_DECREF(code);
goto invalid;
@ -2190,8 +2186,7 @@ static PyObject* get_value(PyObject* self, const char* buffer, unsigned* positio
static PyObject* _elements_to_dict(PyObject* self, const char* string,
unsigned max, PyObject* as_class,
unsigned char tz_aware,
unsigned char uuid_subtype,
unsigned char compile_re) {
unsigned char uuid_subtype) {
unsigned position = 0;
PyObject* dict = PyObject_CallObject(as_class, NULL);
if (!dict) {
@ -2219,8 +2214,7 @@ static PyObject* _elements_to_dict(PyObject* self, const char* string,
}
position += (unsigned)name_length + 1;
value = get_value(self, string, &position, type,
max - position, as_class, tz_aware, uuid_subtype,
compile_re);
max - position, as_class, tz_aware, uuid_subtype);
if (!value) {
Py_DECREF(name);
Py_DECREF(dict);
@ -2237,13 +2231,12 @@ static PyObject* _elements_to_dict(PyObject* self, const char* string,
static PyObject* elements_to_dict(PyObject* self, const char* string,
unsigned max, PyObject* as_class,
unsigned char tz_aware,
unsigned char uuid_subtype,
unsigned char compile_re) {
unsigned char uuid_subtype) {
PyObject* result;
if (Py_EnterRecursiveCall(" while decoding a BSON document"))
return NULL;
result = _elements_to_dict(self, string, max,
as_class, tz_aware, uuid_subtype, compile_re);
as_class, tz_aware, uuid_subtype);
Py_LeaveRecursiveCall();
return result;
}
@ -2256,10 +2249,9 @@ static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) {
PyObject* as_class;
unsigned char tz_aware;
unsigned char uuid_subtype;
unsigned char compile_re;
if (!PyArg_ParseTuple(
args, "OObbb", &bson, &as_class, &tz_aware, &uuid_subtype, &compile_re)) {
args, "OObb", &bson, &as_class, &tz_aware, &uuid_subtype)) {
return NULL;
}
@ -2325,7 +2317,7 @@ static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) {
}
return elements_to_dict(self, string + 4, (unsigned)size - 5,
as_class, tz_aware, uuid_subtype, compile_re);
as_class, tz_aware, uuid_subtype);
}
static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) {
@ -2338,11 +2330,10 @@ static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) {
PyObject* as_class = (PyObject*)&PyDict_Type;
unsigned char tz_aware = 1;
unsigned char uuid_subtype = 3;
unsigned char compile_re = 1;
if (!PyArg_ParseTuple(
args, "O|Obbb",
&bson, &as_class, &tz_aware, &uuid_subtype, &compile_re)) {
args, "O|Obb",
&bson, &as_class, &tz_aware, &uuid_subtype)) {
return NULL;
}
@ -2413,7 +2404,7 @@ static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) {
}
dict = elements_to_dict(self, string + 4, (unsigned)size - 5,
as_class, tz_aware, uuid_subtype, compile_re);
as_class, tz_aware, uuid_subtype);
if (!dict) {
Py_DECREF(result);
return NULL;
@ -2444,7 +2435,6 @@ static int _cbson_traverse(PyObject *m, visitproc visit, void *arg) {
Py_VISIT(GETSTATE(m)->Code);
Py_VISIT(GETSTATE(m)->ObjectId);
Py_VISIT(GETSTATE(m)->DBRef);
Py_VISIT(GETSTATE(m)->RECompile);
Py_VISIT(GETSTATE(m)->Regex);
Py_VISIT(GETSTATE(m)->UUID);
Py_VISIT(GETSTATE(m)->Timestamp);
@ -2460,7 +2450,6 @@ static int _cbson_clear(PyObject *m) {
Py_CLEAR(GETSTATE(m)->Code);
Py_CLEAR(GETSTATE(m)->ObjectId);
Py_CLEAR(GETSTATE(m)->DBRef);
Py_CLEAR(GETSTATE(m)->RECompile);
Py_CLEAR(GETSTATE(m)->Regex);
Py_CLEAR(GETSTATE(m)->UUID);
Py_CLEAR(GETSTATE(m)->Timestamp);

View File

@ -117,17 +117,8 @@ def loads(s, *args, **kwargs):
"""Helper function that wraps :class:`json.loads`.
Automatically passes the object_hook for BSON type conversion.
:Parameters:
- `compile_re` (optional): if ``False``, don't attempt to compile BSON
regular expressions into Python regular expressions. Return instances
of :class:`~bson.bsonregex.BSONRegex` instead.
.. versionchanged:: 2.7
Added ``compile_re`` option.
"""
compile_re = kwargs.pop('compile_re', True)
kwargs['object_hook'] = lambda dct: object_hook(dct, compile_re)
kwargs['object_hook'] = lambda dct: object_hook(dct)
return json.loads(s, *args, **kwargs)
@ -145,7 +136,7 @@ def _json_convert(obj):
return obj
def object_hook(dct, compile_re=True):
def object_hook(dct):
if "$oid" in dct:
return ObjectId(str(dct["$oid"]))
if "$ref" in dct:
@ -181,11 +172,7 @@ def object_hook(dct, compile_re=True):
# PyMongo always adds $options but some other tools may not.
for opt in dct.get("$options", ""):
flags |= _RE_OPT_TABLE.get(opt, 0)
if compile_re:
return re.compile(dct["$regex"], flags)
else:
return Regex(dct["$regex"], flags)
return Regex(dct["$regex"], flags)
if "$minKey" in dct:
return MinKey()
if "$maxKey" in dct:

View File

@ -53,7 +53,8 @@ class SON(dict):
list array both
dict / `SON` object both
datetime.datetime [#dt]_ [#dt2]_ date both
`bson.regex.Regex` / compiled re [#re]_ regex both
`bson.regex.Regex` regex both
compiled re [#re]_ regex py -> bson
`bson.binary.Binary` binary both
`bson.objectid.ObjectId` oid both
`bson.dbref.DBRef` dbref both
@ -77,9 +78,8 @@ class SON(dict):
should always use UTC.
.. [#re] :class:`~bson.regex.Regex` instances and regular expression
objects from ``re.compile()`` are both saved as BSON regular expressions.
BSON regular expressions are decoded as Python regular expressions by
default, or as :class:`~bson.regex.Regex` instances if the ``compile_re``
option is set to ``False``.
BSON regular expressions are decoded as :class:`~bson.regex.Regex`
instances.
.. [#bytes] The bytes type from Python 3.x is encoded as BSON binary with
subtype 0. In Python 3.x it will be decoded back to bytes. In Python 2.x
it will be decoded to an instance of :class:`~bson.binary.Binary` with

View File

@ -34,7 +34,7 @@
.. automethod:: initialize_unordered_bulk_op
.. automethod:: initialize_ordered_bulk_op
.. automethod:: drop
.. automethod:: find([spec=None[, fields=None[, skip=0[, limit=0[, timeout=True[, snapshot=False[, tailable=False[, sort=None[, max_scan=None[, as_class=None[, await_data=False[, partial=False[, manipulate=True[, read_preference=None[, exhaust=False[, compile_re=True]]]]]]]]]]]]]]]])
.. automethod:: find([spec=None[, fields=None[, skip=0[, limit=0[, timeout=True[, snapshot=False[, tailable=False[, sort=None[, max_scan=None[, as_class=None[, await_data=False[, partial=False[, manipulate=True[, read_preference=None[, exhaust=False]]]]]]]]]]]]]]])
.. automethod:: find_one([spec_or_id=None[, *args[, **kwargs]]])
.. automethod:: parallel_scan
.. automethod:: count

View File

@ -4,7 +4,7 @@
.. automodule:: pymongo.cursor
:synopsis: Tools for iterating over MongoDB query results
.. autoclass:: pymongo.cursor.Cursor(collection, spec=None, fields=None, skip=0, limit=0, timeout=True, snapshot=False, tailable=False, sort=None, max_scan=None, as_class=None, await_data=False, partial=False, manipulate=True, read_preference=None, tag_sets=None, secondary_acceptable_latency_ms=None, exhaust=False, compile_re=True)
.. autoclass:: pymongo.cursor.Cursor(collection, spec=None, fields=None, skip=0, limit=0, timeout=True, snapshot=False, tailable=False, sort=None, max_scan=None, as_class=None, await_data=False, partial=False, manipulate=True, read_preference=None, tag_sets=None, secondary_acceptable_latency_ms=None, exhaust=False)
:members:
.. describe:: c[index]

View File

@ -96,6 +96,23 @@ Since PyMongo 1.6, methods ``open`` and ``close`` of :class:`~gridfs.GridFS`
raised an ``UnsupportedAPI`` exception, as did the entire ``GridFile`` class.
The unsupported methods, the class, and the exception are all deleted.
:mod:`~bson` Changes
....................
The `compile_re` option is removed from all methods
that accepted it in :mod:`~bson` and :mod:`~bson.json_util`. Additionally, it
is removed from :meth:`~pymongo.collection.Collection.find`,
:meth:`~pymongo.collection.Collection.find_one`,
:meth:`~pymongo.collection.Collection.aggregate`,
:meth:`~pymongo.database.Database.command`, and so on.
PyMongo now always represents BSON regular expressions as
:class:`~bson.regex.Regex` objects. This prevents errors for incompatible
patterns, see `PYTHON-500`_. Use :meth:`~bson.regex.Regex.try_compile` to
attempt to convert from a BSON regular expression to a Python regular
expression object.
.. _PYTHON-500: https://jira.mongodb.org/browse/PYTHON-500
Issues Resolved
...............

View File

@ -322,9 +322,6 @@ class GridFS(object):
this query.
- `tag_sets` **DEPRECATED**
- `secondary_acceptable_latency_ms` **DEPRECATED**
- `compile_re` (optional): if ``False``, don't attempt to compile
BSON regex objects into Python regexes. Return instances of
:class:`~bson.regex.Regex` instead.
Raises :class:`TypeError` if any of the arguments are of
improper type. Returns an instance of

View File

@ -605,7 +605,7 @@ class GridOutCursor(Cursor):
def __init__(self, collection, spec=None, skip=0, limit=0,
timeout=True, sort=None, max_scan=None,
read_preference=None, tag_sets=None,
secondary_acceptable_latency_ms=None, compile_re=True):
secondary_acceptable_latency_ms=None):
"""Create a new cursor, similar to the normal
:class:`~pymongo.cursor.Cursor`.
@ -626,7 +626,7 @@ class GridOutCursor(Cursor):
collection.files, spec, skip=skip, limit=limit, timeout=timeout,
sort=sort, max_scan=max_scan, read_preference=read_preference,
secondary_acceptable_latency_ms=secondary_acceptable_latency_ms,
tag_sets=tag_sets, compile_re=compile_re)
tag_sets=tag_sets)
def next(self):
"""Get next GridOut object from cursor.

View File

@ -779,9 +779,6 @@ class Collection(common.BaseObject):
this query.
- `tag_sets` **DEPRECATED**
- `secondary_acceptable_latency_ms` **DEPRECATED**
- `compile_re` (optional): if ``False``, don't attempt to compile
BSON regex objects into Python regexes. Return instances of
:class:`~bson.regex.Regex` instead.
- `exhaust` (optional): If ``True`` create an "exhaust" cursor.
MongoDB will stream batched results to the client without waiting
for the client to request each batch, reducing latency.
@ -802,20 +799,30 @@ class Collection(common.BaseObject):
:class:`~socket.socket` connection will be closed and discarded
without being returned to the connection pool.
.. note:: The `manipulate` and `compile_re` parameters may default to
False in future releases.
.. note:: The `manipulate` parameter may default to False in a future
release.
.. versionchanged:: 3.0
Removed the `network_timeout` parameter.
Deprecated the `tag_sets`, and
`secondary_acceptable_latency_ms` parameters.
Removed `compile_re` option: PyMongo now always represents BSON
regular expressions as :class:`~bson.regex.Regex` objects. Use
:meth:`~bson.regex.Regex.try_compile` to attempt to convert from a
BSON regular expression to a Python regular expression object.
.. versionadded:: 2.7
The ``compile_re`` parameter.
.. versionchanged:: 2.7
Added `compile_re` option. If set to False, PyMongo represented BSON
regular expressions as :class:`~bson.regex.Regex` objects instead of
attempting to compile BSON regular expressions as Python native
regular expressions, thus preventing errors for some incompatible
patterns, see `PYTHON-500`_.
.. versionadded:: 2.3
The `tag_sets` and `secondary_acceptable_latency_ms` parameters.
.. _PYTHON-500: https://jira.mongodb.org/browse/PYTHON-500
.. mongodoc:: find
"""
return Cursor(self, *args, **kwargs)
@ -863,8 +870,6 @@ class Collection(common.BaseObject):
.. note:: Requires server version **>= 2.5.5**.
"""
compile_re = kwargs.get('compile_re', False)
cmd = SON([('parallelCollectionScan', self.__name),
('numCursors', num_cursors)])
@ -875,8 +880,7 @@ class Collection(common.BaseObject):
return [CommandCursor(self,
cursor['cursor'],
address,
compile_re) for cursor in result['cursors']]
address) for cursor in result['cursors']]
def count(self):
"""Get the number of documents in this collection.
@ -1279,8 +1283,6 @@ class Collection(common.BaseObject):
cmd = SON([("aggregate", self.__name),
("pipeline", pipeline)])
compile_re = kwargs.get('compile_re', True)
mode = read_preference or self.read_preference
result, address = self.__database._command(
cmd, uuid_subtype=self.uuid_subtype,
@ -1290,8 +1292,7 @@ class Collection(common.BaseObject):
return CommandCursor(
self,
result['cursor'],
address,
compile_re)
address)
else:
return result

View File

@ -25,8 +25,7 @@ class CommandCursor(object):
"""A cursor / iterator over command cursors.
"""
def __init__(self, collection, cursor_info,
address, compile_re=True, retrieved=0):
def __init__(self, collection, cursor_info, address, retrieved=0):
"""Create a new command cursor.
"""
self.__collection = collection
@ -36,9 +35,8 @@ class CommandCursor(object):
self.__decode_opts = (
collection.database.connection.document_class,
collection.database.connection.tz_aware,
collection.uuid_subtype,
compile_re
)
collection.uuid_subtype)
self.__retrieved = retrieved
self.__batch_size = 0
self.__killed = False

View File

@ -80,7 +80,7 @@ class Cursor(object):
await_data=False, partial=False, manipulate=True,
read_preference=None, tag_sets=None,
secondary_acceptable_latency_ms=None,
exhaust=False, compile_re=True, _uuid_subtype=None):
exhaust=False, _uuid_subtype=None):
"""Create a new cursor.
Should not be called directly by application developers - see
@ -157,7 +157,6 @@ class Cursor(object):
self.__as_class = as_class
self.__manipulate = manipulate
self.__tz_aware = collection.database.connection.tz_aware
self.__compile_re = compile_re
self.__uuid_subtype = _uuid_subtype or collection.uuid_subtype
self.__data = deque()
@ -241,7 +240,7 @@ class Cursor(object):
"snapshot", "ordering", "explain", "hint",
"batch_size", "max_scan", "as_class",
"manipulate", "read_preference",
"uuid_subtype", "compile_re", "query_flags")
"uuid_subtype", "query_flags")
data = dict((k, v) for k, v in iteritems(self.__dict__)
if k.startswith('_Cursor__') and k[9:] in values_to_clone)
if deepcopy:
@ -708,7 +707,6 @@ class Cursor(object):
r = database.command("count", self.__collection.name,
allowable_errors=["ns missing"],
uuid_subtype=self.__uuid_subtype,
compile_re=self.__compile_re,
read_preference=self.__read_preference,
**command)
if r.get("errmsg", "") == "ns missing":
@ -749,7 +747,6 @@ class Cursor(object):
return database.command("distinct",
self.__collection.name,
uuid_subtype=self.__uuid_subtype,
compile_re=self.__compile_re,
read_preference=self.__read_preference,
**options)["values"]
@ -890,8 +887,7 @@ class Cursor(object):
cursor_id=self.__id,
as_class=self.__as_class,
tz_aware=self.__tz_aware,
uuid_subtype=self.__uuid_subtype,
compile_re=self.__compile_re)
uuid_subtype=self.__uuid_subtype)
except OperationFailure:
self.__killed = True

View File

@ -285,7 +285,7 @@ class Database(common.BaseObject):
def _command(self, command, value=1,
check=True, allowable_errors=None,
uuid_subtype=OLD_UUID_SUBTYPE, compile_re=True,
uuid_subtype=OLD_UUID_SUBTYPE,
read_preference=None, **kwargs):
"""Internal command helper.
"""
@ -343,7 +343,6 @@ class Database(common.BaseObject):
limit=-1,
as_class=as_class,
read_preference=pref,
compile_re=compile_re,
_uuid_subtype=uuid_subtype)
result = {}
@ -363,7 +362,7 @@ class Database(common.BaseObject):
def command(self, command, value=1,
check=True, allowable_errors=[],
uuid_subtype=OLD_UUID_SUBTYPE, compile_re=True,
uuid_subtype=OLD_UUID_SUBTYPE,
read_preference=None, **kwargs):
"""Issue a MongoDB command.
@ -409,12 +408,6 @@ class Database(common.BaseObject):
in this list will be ignored by error-checking
- `uuid_subtype` (optional): The BSON binary subtype to use
for a UUID used in this command.
- `compile_re` (optional): if ``False``, don't attempt to compile
BSON regular expressions into Python regular expressions. Return
instances of :class:`~bson.regex.Regex` instead. Can avoid
:exc:`~bson.errors.InvalidBSON` errors when receiving
Python-incompatible regular expressions, for example from
``currentOp``
- `read_preference`: The read preference for this operation.
- `tag_sets` **DEPRECATED**
- `secondary_acceptable_latency_ms` **DEPRECATED**
@ -424,19 +417,30 @@ class Database(common.BaseObject):
.. versionchanged:: 3.0
Deprecated the `tag_sets` and `secondary_acceptable_latency_ms`
options.
Removed `compile_re` option: PyMongo now always represents BSON
regular expressions as :class:`~bson.regex.Regex` objects. Use
:meth:`~bson.regex.Regex.try_compile` to attempt to convert from a
BSON regular expression to a Python regular expression object.
.. versionchanged:: 2.7
Added ``compile_re`` option.
Added `compile_re` option. If set to False, PyMongo represented BSON
regular expressions as :class:`~bson.regex.Regex` objects instead of
attempting to compile BSON regular expressions as Python native
regular expressions, thus preventing errors for some incompatible
patterns, see `PYTHON-500`_.
.. versionchanged:: 2.3
Added `tag_sets` and `secondary_acceptable_latency_ms` options.
.. versionchanged:: 2.2
Added support for `as_class` - the class you want to use for
the resulting documents
.. _PYTHON-500: https://jira.mongodb.org/browse/PYTHON-500
.. mongodoc:: commands
"""
return self._command(command, value, check, allowable_errors,
uuid_subtype, compile_re,
read_preference, **kwargs)[0]
uuid_subtype, read_preference, **kwargs)[0]
def collection_names(self, include_system_collections=True):
"""Get a list of all the collection names in this database.

View File

@ -75,8 +75,7 @@ def _index_document(index_list):
def _unpack_response(response, cursor_id=None, as_class=dict,
tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE,
compile_re=True):
tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE):
"""Unpack a response from the database.
Check the response for errors and unpack, returning a dictionary
@ -117,8 +116,7 @@ def _unpack_response(response, cursor_id=None, as_class=dict,
result["starting_from"] = struct.unpack("<i", response[12:16])[0]
result["number_returned"] = struct.unpack("<i", response[16:20])[0]
result["data"] = bson.decode_all(response[20:],
as_class, tz_aware, uuid_subtype,
compile_re)
as_class, tz_aware, uuid_subtype)
assert len(result["data"]) == result["number_returned"]
return result

View File

@ -20,7 +20,6 @@ import collections
import datetime
import re
import sys
import traceback
import uuid
sys.path[0:0] = [""]
@ -636,7 +635,7 @@ class TestBSON(unittest.TestCase):
b'\x00') # document terminator
self.assertEqual(doc1_bson, BSON.encode(doc1))
self.assertEqual(doc1, BSON(doc1_bson).decode(compile_re=False))
self.assertEqual(doc1, BSON(doc1_bson).decode())
# Valid Python regex, with flags.
re2 = re.compile('.*', re.I | re.L | re.M | re.S | re.U | re.X)
@ -652,13 +651,9 @@ class TestBSON(unittest.TestCase):
self.assertEqual(doc2_bson, BSON.encode(doc2_with_re))
self.assertEqual(doc2_bson, BSON.encode(doc2_with_bson_re))
# Built-in re objects don't support ==. Compare pattern and flags.
self.assertEqual(re2.pattern, BSON(doc2_bson).decode()['r'].pattern)
self.assertEqual(re2.flags, BSON(doc2_bson).decode()['r'].flags)
self.assertEqual(
doc2_with_bson_re, BSON(doc2_bson).decode(compile_re=False))
def test_regex_from_native(self):
self.assertEqual('.*', Regex.from_native(re.compile('.*')).pattern)
self.assertEqual(0, Regex.from_native(re.compile(b'')).flags)
@ -673,30 +668,17 @@ class TestBSON(unittest.TestCase):
def test_exception_wrapping(self):
# No matter what exception is raised while trying to decode BSON,
# the final exception always matches InvalidBSON and the original
# traceback is preserved.
# the final exception always matches InvalidBSON.
# Invalid Python regex, though valid PCRE.
# Causes an error in re.compile().
bad_doc = BSON.encode({'r': Regex(r'[\w-\.]')})
# {'s': '\xff'}, will throw attempting to decode utf-8.
bad_doc = b'\x0f\x00\x00\x00\x02s\x00\x03\x00\x00\x00\xff\x00\x00\x00'
try:
with self.assertRaises(InvalidBSON) as context:
decode_all(bad_doc)
except InvalidBSON:
exc_type, exc_value, exc_tb = sys.exc_info()
# Original re error was captured and wrapped in InvalidBSON.
self.assertEqual(exc_value.args[0], 'bad character range')
# Traceback includes bson module's call into re module.
for filename, lineno, fname, text in traceback.extract_tb(exc_tb):
if filename.endswith('re.py') and fname == 'compile':
# Traceback was correctly preserved.
break
else:
self.fail('Traceback not captured')
else:
self.fail('InvalidBSON not raised')
self.assertIn("codec can't decode byte 0xff",
str(context.exception))
def test_minkey_maxkey_comparison(self):
# MinKey's <, <=, >, >=, !=, and ==.
self.assertTrue(MinKey() < None)

View File

@ -1289,16 +1289,6 @@ class TestCollection(IntegrationTest):
self.assertEqual(1.0, result['ok'])
self.assertEqual([{'foo': [1, 2]}], result['result'])
@client_context.require_version_min(2, 3, 2) # See SERVER-6470.
def test_aggregate_with_compile_re(self):
self.db.test.drop()
self.db.test.insert({'r': re.compile('.*')})
result = self.db.test.aggregate([])
self.assertTrue(isinstance(result['result'][0]['r'], RE_TYPE))
result = self.db.test.aggregate([], compile_re=False)
self.assertTrue(isinstance(result['result'][0]['r'], Regex))
@client_context.require_version_min(2, 5, 1)
def test_aggregation_cursor_validation(self):
db = self.db
@ -2200,20 +2190,13 @@ class TestCollection(IntegrationTest):
self.assertEqual(2, c.find_one(manipulate=True)['foo'])
c.remove({})
def test_compile_re(self):
def test_find_regex(self):
c = self.db.test
c.drop()
c.insert({'r': re.compile('.*')})
# Test find_one with compile_re.
self.assertTrue(isinstance(c.find_one()['r'], RE_TYPE))
self.assertTrue(isinstance(c.find_one(compile_re=False)['r'], Regex))
# Test find with compile_re.
self.assertTrue(isinstance(c.find_one()['r'], Regex))
for doc in c.find():
self.assertTrue(isinstance(doc['r'], RE_TYPE))
for doc in c.find(compile_re=False):
self.assertTrue(isinstance(doc['r'], Regex))
def test_find_and_modify_with_manipulator(self):

View File

@ -754,7 +754,6 @@ class TestCursor(IntegrationTest):
await_data=True,
partial=True,
manipulate=False,
compile_re=False,
fields={'_id': False}).limit(2)
cursor.min([('a', 1)]).max([('b', 3)])
cursor.add_option(128)
@ -768,8 +767,6 @@ class TestCursor(IntegrationTest):
type(cursor2._Cursor__as_class))
self.assertEqual(cursor._Cursor__manipulate,
cursor2._Cursor__manipulate)
self.assertEqual(cursor._Cursor__compile_re,
cursor2._Cursor__compile_re)
self.assertEqual(cursor._Cursor__query_flags,
cursor2._Cursor__query_flags)
self.assertEqual(cursor._Cursor__comment,

View File

@ -327,15 +327,15 @@ class TestDatabase(IntegrationTest):
# retrieve a BSON regex from a collection using a command. But until
# MongoDB 2.3.2, aggregation turned regexes into strings: SERVER-6470.
@client_context.require_version_min(2, 3, 2)
def test_command_with_compile_re(self):
def test_command_with_regex(self):
db = self.client.pymongo_test
db.test.drop()
db.test.insert({'r': re.compile('.*')})
db.test.insert({'r': Regex('.*')})
result = db.command('aggregate', 'test', pipeline=[])
self.assertTrue(isinstance(result['result'][0]['r'], RE_TYPE))
result = db.command('aggregate', 'test', pipeline=[], compile_re=False)
self.assertTrue(isinstance(result['result'][0]['r'], Regex))
for doc in result['result']:
self.assertTrue(isinstance(doc['r'], Regex))
def test_last_status(self):
# We must call getlasterror on the same socket as the last operation.

View File

@ -96,11 +96,6 @@ class TestJsonUtil(unittest.TestCase):
pat = 'a*b'
json_re = '{"$regex": "%s", "$options": "u"}' % pat
loaded = json_util.object_hook(json.loads(json_re))
self.assertTrue(isinstance(loaded, RE_TYPE))
self.assertEqual(pat, loaded.pattern)
self.assertEqual(re.U, loaded.flags)
loaded = json_util.object_hook(json.loads(json_re), compile_re=False)
self.assertTrue(isinstance(loaded, Regex))
self.assertEqual(pat, loaded.pattern)
self.assertEqual(re.U, loaded.flags)
@ -127,16 +122,12 @@ class TestJsonUtil(unittest.TestCase):
# Some tools may not add $options if no flags are set.
res = json_util.loads('{"r": {"$regex": "a*b"}}')['r']
expected_flags = 0
if PY3:
expected_flags = re.U
self.assertEqual(expected_flags, res.flags)
self.assertEqual(0, res.flags)
self.assertEqual(
Regex('.*', 'ilm'),
json_util.loads(
'{"r": {"$regex": ".*", "$options": "ilm"}}',
compile_re=False)['r'])
'{"r": {"$regex": ".*", "$options": "ilm"}}')['r'])
# Check order.
self.assertEqual(