Option to not compile BSON regexes. PYTHON-500

Add a 'compile_re' parameter to Collection.find, Collection.find_one, and
json_util.loads. If it's False, regular expressions are encoded as a new class,
Regex, instead of passed to re.compile(). This allows PyMongo to handle regular
expressions that don't compile in Python but are valid in other contexts like
MongoDB queries.
This commit is contained in:
A. Jesse Jiryu Davis 2013-08-06 18:36:33 -04:00 committed by A. Jesse Jiryu Davis
parent f0a419e5ff
commit e4cf504559
17 changed files with 530 additions and 218 deletions

View File

@ -32,6 +32,7 @@ from bson.max_key import MaxKey
from bson.min_key import MinKey
from bson.objectid import ObjectId
from bson.py3compat import b, binary_type
from bson.regex import Regex
from bson.son import SON, RE_TYPE
from bson.timestamp import Timestamp
from bson.tz_util import utc
@ -90,7 +91,8 @@ BSONMAX = b("\x7F") # Max key
def _get_int(data, position, as_class=None,
tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE, unsigned=False):
tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE,
compile_re=True, unsigned=False):
format = unsigned and "I" or "i"
try:
value = struct.unpack("<%s" % format, data[position:position + 4])[0]
@ -132,13 +134,13 @@ def _make_c_string(string, check_null=False):
"UTF-8: %r" % string)
def _get_number(data, position, as_class, tz_aware, uuid_subtype):
def _get_number(data, position, as_class, tz_aware, uuid_subtype, compile_re):
num = struct.unpack("<d", data[position:position + 8])[0]
position += 8
return num, position
def _get_string(data, position, as_class, tz_aware, uuid_subtype):
def _get_string(data, position, as_class, tz_aware, uuid_subtype, compile_re):
length = struct.unpack("<i", data[position:position + 4])[0]
if length <= 0 or (len(data) - position - 4) < length:
raise InvalidBSON("invalid string length")
@ -148,12 +150,14 @@ def _get_string(data, position, as_class, tz_aware, uuid_subtype):
return _get_c_string(data, position, length - 1)
def _get_object(data, position, as_class, tz_aware, uuid_subtype):
def _get_object(data, position, as_class, tz_aware, uuid_subtype, compile_re):
obj_size = struct.unpack("<i", data[position:position + 4])[0]
if data[position + obj_size - 1:position + obj_size] != ZERO:
raise InvalidBSON("bad eoo")
encoded = data[position + 4:position + obj_size - 1]
object = _elements_to_dict(encoded, as_class, tz_aware, uuid_subtype)
object = _elements_to_dict(
encoded, as_class, tz_aware, uuid_subtype, compile_re)
position += obj_size
if "$ref" in object:
return (DBRef(object.pop("$ref"), object.pop("$id", None),
@ -161,9 +165,9 @@ def _get_object(data, position, as_class, tz_aware, uuid_subtype):
return object, position
def _get_array(data, position, as_class, tz_aware, uuid_subtype):
def _get_array(data, position, as_class, tz_aware, uuid_subtype, compile_re):
obj, position = _get_object(data, position,
as_class, tz_aware, uuid_subtype)
as_class, tz_aware, uuid_subtype, compile_re)
result = []
i = 0
while True:
@ -175,7 +179,7 @@ def _get_array(data, position, as_class, tz_aware, uuid_subtype):
return result, position
def _get_binary(data, position, as_class, tz_aware, uuid_subtype):
def _get_binary(data, position, as_class, tz_aware, uuid_subtype, compile_re):
length, position = _get_int(data, position)
subtype = ord(data[position:position + 1])
position += 1
@ -207,19 +211,19 @@ def _get_binary(data, position, as_class, tz_aware, uuid_subtype):
def _get_oid(data, position, as_class=None,
tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE):
tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE, compile_re=True):
value = ObjectId(data[position:position + 12])
position += 12
return value, position
def _get_boolean(data, position, as_class, tz_aware, uuid_subtype):
def _get_boolean(data, position, as_class, tz_aware, uuid_subtype, compile_re):
value = data[position:position + 1] == ONE
position += 1
return value, position
def _get_date(data, position, as_class, tz_aware, uuid_subtype):
def _get_date(data, position, as_class, tz_aware, uuid_subtype, compile_re):
millis = struct.unpack("<q", data[position:position + 8])[0]
diff = millis % 1000
seconds = (millis - diff) / 1000
@ -231,58 +235,51 @@ def _get_date(data, position, as_class, tz_aware, uuid_subtype):
return dt.replace(microsecond=diff * 1000), position
def _get_code(data, position, as_class, tz_aware, uuid_subtype):
def _get_code(data, position, as_class, tz_aware, uuid_subtype, compile_re):
code, position = _get_string(data, position,
as_class, tz_aware, uuid_subtype)
as_class, tz_aware, uuid_subtype, compile_re)
return Code(code), position
def _get_code_w_scope(data, position, as_class, tz_aware, uuid_subtype):
def _get_code_w_scope(
data, position, as_class, tz_aware, uuid_subtype, compile_re):
_, position = _get_int(data, position)
code, position = _get_string(data, position,
as_class, tz_aware, uuid_subtype)
as_class, tz_aware, uuid_subtype, compile_re)
scope, position = _get_object(data, position,
as_class, tz_aware, uuid_subtype)
as_class, tz_aware, uuid_subtype, compile_re)
return Code(code, scope), position
def _get_null(data, position, as_class, tz_aware, uuid_subtype):
def _get_null(data, position, as_class, tz_aware, uuid_subtype, compile_re):
return None, position
def _get_regex(data, position, as_class, tz_aware, uuid_subtype):
def _get_regex(data, position, as_class, tz_aware, uuid_subtype, compile_re):
pattern, position = _get_c_string(data, position)
bson_flags, position = _get_c_string(data, position)
flags = 0
if "i" in bson_flags:
flags |= re.IGNORECASE
if "l" in bson_flags:
flags |= re.LOCALE
if "m" in bson_flags:
flags |= re.MULTILINE
if "s" in bson_flags:
flags |= re.DOTALL
if "u" in bson_flags:
flags |= re.UNICODE
if "x" in bson_flags:
flags |= re.VERBOSE
return re.compile(pattern, flags), position
bson_re = Regex(pattern, bson_flags)
if compile_re:
return bson_re.compile(), position
else:
return bson_re, position
def _get_ref(data, position, as_class, tz_aware, uuid_subtype):
collection, position = _get_string(data, position,
as_class, tz_aware, uuid_subtype)
def _get_ref(data, position, as_class, tz_aware, uuid_subtype, compile_re):
collection, position = _get_string(data, position, as_class, tz_aware,
uuid_subtype, compile_re)
oid, position = _get_oid(data, position)
return DBRef(collection, oid), position
def _get_timestamp(data, position, as_class, tz_aware, uuid_subtype):
def _get_timestamp(
data, position, as_class, tz_aware, uuid_subtype, compile_re):
inc, position = _get_int(data, position, unsigned=True)
timestamp, position = _get_int(data, position, unsigned=True)
return Timestamp(timestamp, inc), position
def _get_long(data, position, as_class, tz_aware, uuid_subtype):
def _get_long(data, position, as_class, tz_aware, uuid_subtype, compile_re):
# Have to cast to long; on 32-bit unpack may return an int.
# 2to3 will change long to int. That's fine since long doesn't
# exist in python3.
@ -310,30 +307,32 @@ _element_getter = {
BSONINT: _get_int, # number_int
BSONTIM: _get_timestamp,
BSONLON: _get_long, # Same as _get_int after 2to3 runs.
BSONMIN: lambda v, w, x, y, z: (MinKey(), w),
BSONMAX: lambda v, w, x, y, z: (MaxKey(), w)}
BSONMIN: lambda u, v, w, x, y, z: (MinKey(), v),
BSONMAX: lambda u, v, w, x, y, z: (MaxKey(), v)}
def _element_to_dict(data, position, as_class, tz_aware, uuid_subtype):
def _element_to_dict(
data, position, as_class, tz_aware, uuid_subtype, compile_re):
element_type = data[position:position + 1]
position += 1
element_name, position = _get_c_string(data, position)
value, position = _element_getter[element_type](data, position, as_class,
tz_aware, uuid_subtype)
value, position = _element_getter[element_type](
data, position, as_class, tz_aware, uuid_subtype, compile_re)
return element_name, value, position
def _elements_to_dict(data, as_class, tz_aware, uuid_subtype):
def _elements_to_dict(data, as_class, tz_aware, uuid_subtype, compile_re):
result = as_class()
position = 0
end = len(data) - 1
while position < end:
(key, value, position) = _element_to_dict(data, position, as_class,
tz_aware, uuid_subtype)
(key, value, position) = _element_to_dict(
data, position, as_class, tz_aware, uuid_subtype, compile_re)
result[key] = value
return result
def _bson_to_dict(data, as_class, tz_aware, uuid_subtype):
def _bson_to_dict(data, as_class, tz_aware, uuid_subtype, compile_re):
obj_size = struct.unpack("<i", data[:4])[0]
length = len(data)
if length < obj_size:
@ -341,8 +340,10 @@ def _bson_to_dict(data, as_class, tz_aware, uuid_subtype):
if obj_size != length or data[obj_size - 1:obj_size] != ZERO:
raise InvalidBSON("bad eoo")
elements = data[4:obj_size - 1]
return (_elements_to_dict(elements, as_class,
tz_aware, uuid_subtype), data[obj_size:])
dct = _elements_to_dict(
elements, as_class, tz_aware, uuid_subtype, compile_re)
return dct, data[obj_size:]
if _use_c:
_bson_to_dict = _cbson._bson_to_dict
@ -444,7 +445,7 @@ def _element_to_bson(key, value, check_keys, uuid_subtype):
return BSONTIM + name + inc + time
if value is None:
return BSONNUL + name
if isinstance(value, RE_TYPE):
if isinstance(value, (RE_TYPE, Regex)):
pattern = value.pattern
flags = ""
if value.flags & re.IGNORECASE:
@ -492,7 +493,7 @@ if _use_c:
def decode_all(data, as_class=dict,
tz_aware=True, uuid_subtype=OLD_UUID_SUBTYPE):
tz_aware=True, uuid_subtype=OLD_UUID_SUBTYPE, compile_re=True):
"""Decode BSON data to multiple documents.
`data` must be a string of concatenated, valid, BSON-encoded
@ -504,7 +505,14 @@ def decode_all(data, as_class=dict,
documents
- `tz_aware` (optional): if ``True``, return timezone-aware
:class:`~datetime.datetime` instances
- `compile_re` (optional): if ``False``, don't attempt to compile
BSON regular expressions into Python regular expressions. Return
instances of :class:`~bson.regex.Regex` instead. Can avoid
:exc:`~bson.errors.InvalidBSON` errors when receiving
Python-incompatible regular expressions, for example from ``currentOp``
.. versionchanged:: 2.7
Added `compile_re` option.
.. versionadded:: 1.9
"""
docs = []
@ -519,7 +527,7 @@ def decode_all(data, as_class=dict,
elements = data[position + 4:position + obj_size - 1]
position += obj_size
docs.append(_elements_to_dict(elements, as_class,
tz_aware, uuid_subtype))
tz_aware, uuid_subtype, compile_re))
return docs
if _use_c:
decode_all = _cbson.decode_all
@ -540,7 +548,7 @@ def is_valid(bson):
"of a subclass of %s" % (binary_type.__name__,))
try:
(_, remainder) = _bson_to_dict(bson, dict, True, OLD_UUID_SUBTYPE)
(_, remainder) = _bson_to_dict(bson, dict, True, OLD_UUID_SUBTYPE, True)
return remainder == EMPTY
except:
return False
@ -573,7 +581,7 @@ class BSON(binary_type):
return cls(_dict_to_bson(document, check_keys, uuid_subtype))
def decode(self, as_class=dict,
tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE):
tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE, compile_re=True):
"""Decode this BSON data.
The default type to use for the resultant document is
@ -593,10 +601,21 @@ class BSON(binary_type):
document
- `tz_aware` (optional): if ``True``, return timezone-aware
:class:`~datetime.datetime` instances
- `compile_re` (optional): if ``False``, don't attempt to compile
BSON regular expressions into Python regular expressions. Return
instances of
:class:`~bson.regex.Regex` instead. Can avoid
:exc:`~bson.errors.InvalidBSON` errors when receiving
Python-incompatible regular expressions, for example from
``currentOp``
.. versionchanged:: 2.7
Added ``compile_re`` option.
.. versionadded:: 1.9
"""
(document, _) = _bson_to_dict(self, as_class, tz_aware, uuid_subtype)
(document, _) = _bson_to_dict(
self, as_class, tz_aware, uuid_subtype, compile_re)
return document

View File

@ -43,6 +43,7 @@ struct module_state {
PyObject* ObjectId;
PyObject* DBRef;
PyObject* RECompile;
PyObject* Regex;
PyObject* UUID;
PyObject* Timestamp;
PyObject* MinKey;
@ -133,7 +134,8 @@ _downcast_and_check(Py_ssize_t size, int extra) {
static PyObject* elements_to_dict(PyObject* self, const char* string,
unsigned max, PyObject* as_class,
unsigned char tz_aware,
unsigned char uuid_subtype);
unsigned char uuid_subtype,
unsigned char compile_re);
static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
int type_byte, PyObject* value,
@ -348,7 +350,8 @@ static int _load_python_objects(PyObject* module) {
_load_object(&state->MinKey, "bson.min_key", "MinKey") ||
_load_object(&state->MaxKey, "bson.max_key", "MaxKey") ||
_load_object(&state->UTC, "bson.tz_util", "utc") ||
_load_object(&state->RECompile, "re", "compile")) {
_load_object(&state->RECompile, "re", "compile") ||
_load_object(&state->Regex, "bson.regex", "Regex")) {
return 1;
}
/* If we couldn't import uuid then we must be on 2.4. Just ignore. */
@ -440,6 +443,130 @@ _set_cannot_encode(PyObject* value) {
}
}
/*
* Encode a builtin Python regular expression or our custom Regex class.
*
* Sets exception and returns 0 on failure.
*/
static int _write_regex_to_buffer(
buffer_t buffer, int type_byte, PyObject* value) {
struct module_state *state = GETSTATE(self);
PyObject* py_flags;
PyObject* py_pattern;
PyObject* encoded_pattern;
long int_flags;
char flags[FLAGS_SIZE];
char check_utf8 = 0;
const char* pattern_data;
int pattern_length, flags_length;
result_t status;
/*
* Both the builtin re type and our Regex class have attributes
* "flags" and "pattern".
*/
py_flags = PyObject_GetAttrString(value, "flags");
if (!py_flags) {
return 0;
}
#if PY_MAJOR_VERSION >= 3
int_flags = PyLong_AsLong(py_flags);
#else
int_flags = PyInt_AsLong(py_flags);
#endif
Py_DECREF(py_flags);
py_pattern = PyObject_GetAttrString(value, "pattern");
if (!py_pattern) {
return 0;
}
if (PyUnicode_Check(py_pattern)) {
encoded_pattern = PyUnicode_AsUTF8String(py_pattern);
Py_DECREF(py_pattern);
if (!encoded_pattern) {
return 0;
}
} else {
encoded_pattern = py_pattern;
check_utf8 = 1;
}
#if PY_MAJOR_VERSION >= 3
if (!(pattern_data = PyBytes_AsString(encoded_pattern))) {
Py_DECREF(encoded_pattern);
return 0;
}
if ((pattern_length = _downcast_and_check(PyBytes_Size(encoded_pattern), 0)) == -1) {
Py_DECREF(encoded_pattern);
return 0;
}
#else
if (!(pattern_data = PyString_AsString(encoded_pattern))) {
Py_DECREF(encoded_pattern);
return 0;
}
if ((pattern_length = _downcast_and_check(PyString_Size(encoded_pattern), 0)) == -1) {
Py_DECREF(encoded_pattern);
return 0;
}
#endif
status = check_string((const unsigned char*)pattern_data,
pattern_length, check_utf8, 1);
if (status == NOT_UTF_8) {
PyObject* InvalidStringData = _error("InvalidStringData");
if (InvalidStringData) {
PyErr_SetString(InvalidStringData,
"regex patterns must be valid UTF-8");
Py_DECREF(InvalidStringData);
}
Py_DECREF(encoded_pattern);
return 0;
} else if (status == HAS_NULL) {
PyObject* InvalidDocument = _error("InvalidDocument");
if (InvalidDocument) {
PyErr_SetString(InvalidDocument,
"regex patterns must not contain the NULL byte");
Py_DECREF(InvalidDocument);
}
Py_DECREF(encoded_pattern);
return 0;
}
if (!buffer_write_bytes(buffer, pattern_data, pattern_length + 1)) {
Py_DECREF(encoded_pattern);
return 0;
}
Py_DECREF(encoded_pattern);
flags[0] = 0;
if (int_flags & 2) {
STRCAT(flags, FLAGS_SIZE, "i");
}
if (int_flags & 4) {
STRCAT(flags, FLAGS_SIZE, "l");
}
if (int_flags & 8) {
STRCAT(flags, FLAGS_SIZE, "m");
}
if (int_flags & 16) {
STRCAT(flags, FLAGS_SIZE, "s");
}
if (int_flags & 32) {
STRCAT(flags, FLAGS_SIZE, "u");
}
if (int_flags & 64) {
STRCAT(flags, FLAGS_SIZE, "x");
}
flags_length = (int)strlen(flags) + 1;
if (!buffer_write_bytes(buffer, flags, flags_length)) {
return 0;
}
*(buffer_get_buffer(buffer) + type_byte) = 0x0B;
return 1;
}
/* TODO our platform better be little-endian w/ 4-byte ints! */
/* Write a single value to the buffer (also write its type_byte, for which
* space has already been reserved.
@ -574,6 +701,11 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
*(buffer_get_buffer(buffer) + type_byte) = 0x07;
return 1;
}
case 11:
{
/* Regex */
return _write_regex_to_buffer(buffer, type_byte, value);
}
case 13:
{
/* Code */
@ -890,115 +1022,7 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
*(buffer_get_buffer(buffer) + type_byte) = 0x09;
return buffer_write_bytes(buffer, (const char*)&millis, 8);
} else if (PyObject_TypeCheck(value, state->REType)) {
PyObject* py_flags;
PyObject* py_pattern;
PyObject* encoded_pattern;
long int_flags;
char flags[FLAGS_SIZE];
char check_utf8 = 0;
const char* pattern_data;
int pattern_length, flags_length;
result_t status;
py_flags = PyObject_GetAttrString(value, "flags");
if (!py_flags) {
return 0;
}
#if PY_MAJOR_VERSION >= 3
int_flags = PyLong_AsLong(py_flags);
#else
int_flags = PyInt_AsLong(py_flags);
#endif
Py_DECREF(py_flags);
py_pattern = PyObject_GetAttrString(value, "pattern");
if (!py_pattern) {
return 0;
}
if (PyUnicode_Check(py_pattern)) {
encoded_pattern = PyUnicode_AsUTF8String(py_pattern);
Py_DECREF(py_pattern);
if (!encoded_pattern) {
return 0;
}
} else {
encoded_pattern = py_pattern;
check_utf8 = 1;
}
#if PY_MAJOR_VERSION >= 3
if (!(pattern_data = PyBytes_AsString(encoded_pattern))) {
Py_DECREF(encoded_pattern);
return 0;
}
if ((pattern_length = _downcast_and_check(PyBytes_Size(encoded_pattern), 0)) == -1) {
Py_DECREF(encoded_pattern);
return 0;
}
#else
if (!(pattern_data = PyString_AsString(encoded_pattern))) {
Py_DECREF(encoded_pattern);
return 0;
}
if ((pattern_length = _downcast_and_check(PyString_Size(encoded_pattern), 0)) == -1) {
Py_DECREF(encoded_pattern);
return 0;
}
#endif
status = check_string((const unsigned char*)pattern_data,
pattern_length, check_utf8, 1);
if (status == NOT_UTF_8) {
PyObject* InvalidStringData = _error("InvalidStringData");
if (InvalidStringData) {
PyErr_SetString(InvalidStringData,
"regex patterns must be valid UTF-8");
Py_DECREF(InvalidStringData);
}
Py_DECREF(encoded_pattern);
return 0;
} else if (status == HAS_NULL) {
PyObject* InvalidDocument = _error("InvalidDocument");
if (InvalidDocument) {
PyErr_SetString(InvalidDocument,
"regex patterns must not contain the NULL byte");
Py_DECREF(InvalidDocument);
}
Py_DECREF(encoded_pattern);
return 0;
}
if (!buffer_write_bytes(buffer, pattern_data, pattern_length + 1)) {
Py_DECREF(encoded_pattern);
return 0;
}
Py_DECREF(encoded_pattern);
flags[0] = 0;
/* TODO don't hardcode these */
if (int_flags & 2) {
STRCAT(flags, FLAGS_SIZE, "i");
}
if (int_flags & 4) {
STRCAT(flags, FLAGS_SIZE, "l");
}
if (int_flags & 8) {
STRCAT(flags, FLAGS_SIZE, "m");
}
if (int_flags & 16) {
STRCAT(flags, FLAGS_SIZE, "s");
}
if (int_flags & 32) {
STRCAT(flags, FLAGS_SIZE, "u");
}
if (int_flags & 64) {
STRCAT(flags, FLAGS_SIZE, "x");
}
flags_length = (int)strlen(flags) + 1;
if (!buffer_write_bytes(buffer, flags, flags_length)) {
return 0;
}
*(buffer_get_buffer(buffer) + type_byte) = 0x0B;
return 1;
return _write_regex_to_buffer(buffer, type_byte, value);
}
/*
@ -1435,7 +1459,8 @@ static PyObject* _cbson_dict_to_bson(PyObject* self, PyObject* args) {
static PyObject* get_value(PyObject* self, const char* buffer, unsigned* position,
unsigned char type, unsigned max, PyObject* as_class,
unsigned char tz_aware, unsigned char uuid_subtype) {
unsigned char tz_aware, unsigned char uuid_subtype,
unsigned char compile_re) {
struct module_state *state = GETSTATE(self);
PyObject* value = NULL;
@ -1495,7 +1520,8 @@ static PyObject* get_value(PyObject* self, const char* buffer, unsigned* positio
goto invalid;
}
value = elements_to_dict(self, buffer + *position + 4,
size - 5, as_class, tz_aware, uuid_subtype);
size - 5, as_class, tz_aware, uuid_subtype,
compile_re);
if (!value) {
return NULL;
}
@ -1587,7 +1613,8 @@ static PyObject* get_value(PyObject* self, const char* buffer, unsigned* positio
}
to_append = get_value(self, buffer, position, bson_type,
max - (unsigned)key_size,
as_class, tz_aware, uuid_subtype);
as_class, tz_aware, uuid_subtype,
compile_re);
Py_LeaveRecursiveCall();
if (!to_append) {
Py_DECREF(value);
@ -1850,7 +1877,18 @@ static PyObject* get_value(PyObject* self, const char* buffer, unsigned* positio
}
}
*position += (unsigned)flags_length + 1;
if ((compile_func = _get_object(state->RECompile, "re", "compile"))) {
/*
* Use re.compile() if we're configured to compile regular
* expressions, else create an instance of our Regex class.
*/
if (compile_re) {
compile_func = _get_object(state->RECompile, "re", "compile");
} else {
compile_func = _get_object(state->Regex, "bson.regex", "Regex");
}
if (compile_func) {
value = PyObject_CallFunction(compile_func, "Oi", pattern, flags);
Py_DECREF(compile_func);
}
@ -1990,7 +2028,7 @@ static PyObject* get_value(PyObject* self, const char* buffer, unsigned* positio
}
scope = elements_to_dict(self, buffer + *position + 4,
scope_size - 5, (PyObject*)&PyDict_Type,
tz_aware, uuid_subtype);
tz_aware, uuid_subtype, compile_re);
if (!scope) {
Py_DECREF(code);
return NULL;
@ -2098,7 +2136,8 @@ static PyObject* get_value(PyObject* self, const char* buffer, unsigned* positio
static PyObject* _elements_to_dict(PyObject* self, const char* string,
unsigned max, PyObject* as_class,
unsigned char tz_aware,
unsigned char uuid_subtype) {
unsigned char uuid_subtype,
unsigned char compile_re) {
unsigned position = 0;
PyObject* dict = PyObject_CallObject(as_class, NULL);
if (!dict) {
@ -2126,7 +2165,8 @@ static PyObject* _elements_to_dict(PyObject* self, const char* string,
}
position += (unsigned)name_length + 1;
value = get_value(self, string, &position, type,
max - position, as_class, tz_aware, uuid_subtype);
max - position, as_class, tz_aware, uuid_subtype,
compile_re);
if (!value) {
Py_DECREF(name);
Py_DECREF(dict);
@ -2143,12 +2183,13 @@ static PyObject* _elements_to_dict(PyObject* self, const char* string,
static PyObject* elements_to_dict(PyObject* self, const char* string,
unsigned max, PyObject* as_class,
unsigned char tz_aware,
unsigned char uuid_subtype) {
unsigned char uuid_subtype,
unsigned char compile_re) {
PyObject* result;
if (Py_EnterRecursiveCall(" while decoding a BSON document"))
return NULL;
result = _elements_to_dict(self, string, max,
as_class, tz_aware, uuid_subtype);
as_class, tz_aware, uuid_subtype, compile_re);
Py_LeaveRecursiveCall();
return result;
}
@ -2161,11 +2202,14 @@ static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) {
PyObject* as_class;
unsigned char tz_aware;
unsigned char uuid_subtype;
unsigned char compile_re;
PyObject* dict;
PyObject* remainder;
PyObject* result;
if (!PyArg_ParseTuple(args, "OObb", &bson, &as_class, &tz_aware, &uuid_subtype)) {
if (!PyArg_ParseTuple(
args, "OObbb", &bson, &as_class, &tz_aware, &uuid_subtype, &compile_re)) {
return NULL;
}
@ -2231,7 +2275,7 @@ static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) {
}
dict = elements_to_dict(self, string + 4, (unsigned)size - 5,
as_class, tz_aware, uuid_subtype);
as_class, tz_aware, uuid_subtype, compile_re);
if (!dict) {
return NULL;
}
@ -2260,8 +2304,11 @@ static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) {
PyObject* as_class = (PyObject*)&PyDict_Type;
unsigned char tz_aware = 1;
unsigned char uuid_subtype = 3;
unsigned char compile_re;
if (!PyArg_ParseTuple(args, "O|Obb", &bson, &as_class, &tz_aware, &uuid_subtype)) {
if (!PyArg_ParseTuple(
args, "O|Obbb",
&bson, &as_class, &tz_aware, &uuid_subtype, &compile_re)) {
return NULL;
}
@ -2332,7 +2379,7 @@ static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) {
}
dict = elements_to_dict(self, string + 4, (unsigned)size - 5,
as_class, tz_aware, uuid_subtype);
as_class, tz_aware, uuid_subtype, compile_re);
if (!dict) {
Py_DECREF(result);
return NULL;
@ -2364,6 +2411,7 @@ static int _cbson_traverse(PyObject *m, visitproc visit, void *arg) {
Py_VISIT(GETSTATE(m)->ObjectId);
Py_VISIT(GETSTATE(m)->DBRef);
Py_VISIT(GETSTATE(m)->RECompile);
Py_VISIT(GETSTATE(m)->Regex);
Py_VISIT(GETSTATE(m)->UUID);
Py_VISIT(GETSTATE(m)->Timestamp);
Py_VISIT(GETSTATE(m)->MinKey);
@ -2379,6 +2427,7 @@ static int _cbson_clear(PyObject *m) {
Py_CLEAR(GETSTATE(m)->ObjectId);
Py_CLEAR(GETSTATE(m)->DBRef);
Py_CLEAR(GETSTATE(m)->RECompile);
Py_CLEAR(GETSTATE(m)->Regex);
Py_CLEAR(GETSTATE(m)->UUID);
Py_CLEAR(GETSTATE(m)->Timestamp);
Py_CLEAR(GETSTATE(m)->MinKey);

View File

@ -90,6 +90,7 @@ from bson.dbref import DBRef
from bson.max_key import MaxKey
from bson.min_key import MinKey
from bson.objectid import ObjectId
from bson.regex import Regex
from bson.timestamp import Timestamp
from bson.py3compat import PY3, binary_type, string_types
@ -120,10 +121,20 @@ def loads(s, *args, **kwargs):
"""Helper function that wraps :class:`json.loads`.
Automatically passes the object_hook for BSON type conversion.
:Parameters:
- `compile_re` (optional): if ``False``, don't attempt to compile
BSON regular expressions into Python regular expressions. Return
instances of :class:`~bson.bsonregex.BSONRegex` instead.
.. versionchanged:: 2.7
Added ``compile_re`` option.
"""
if not json_lib:
raise Exception("No json library available")
kwargs['object_hook'] = object_hook
compile_re = kwargs.pop('compile_re', True)
kwargs['object_hook'] = lambda dct: object_hook(dct, compile_re)
return json.loads(s, *args, **kwargs)
@ -141,7 +152,7 @@ def _json_convert(obj):
return obj
def object_hook(dct):
def object_hook(dct, compile_re=True):
if "$oid" in dct:
return ObjectId(str(dct["$oid"]))
if "$ref" in dct:
@ -154,7 +165,11 @@ def object_hook(dct):
# PyMongo always adds $options but some other tools may not.
for opt in dct.get("$options", ""):
flags |= _RE_OPT_TABLE.get(opt, 0)
return re.compile(dct["$regex"], flags)
if compile_re:
return re.compile(dct["$regex"], flags)
else:
return Regex(dct["$regex"], flags)
if "$minKey" in dct:
return MinKey()
if "$maxKey" in dct:

83
bson/regex.py Normal file
View File

@ -0,0 +1,83 @@
# Copyright 2013 MongoDB, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tools for representing MongoDB regular expressions.
"""
import re
from bson.py3compat import string_types
def str_flags_to_int(str_flags):
flags = 0
if "i" in str_flags:
flags |= re.IGNORECASE
if "l" in str_flags:
flags |= re.LOCALE
if "m" in str_flags:
flags |= re.MULTILINE
if "s" in str_flags:
flags |= re.DOTALL
if "u" in str_flags:
flags |= re.UNICODE
if "x" in str_flags:
flags |= re.VERBOSE
return flags
class Regex(object):
"""BSON regular expression data."""
_type_marker = 11
def __init__(self, pattern, flags=0):
"""BSON regular expression data.
This class is useful to store and retrieve regular expressions that are
incompatible with Python's regular expression dialect.
:Parameters:
- `pattern`: string
- `flags`: (optional) an integer bitmask, or a string of flag
characters like "im" for IGNORECASE and MULTILINE
"""
if not isinstance(pattern, string_types):
raise TypeError("pattern must be a string, not %s" % type(pattern))
self.pattern = pattern
if isinstance(flags, string_types):
self.flags = str_flags_to_int(flags)
elif isinstance(flags, int):
self.flags = flags
else:
raise TypeError(
"flags must be a string or int, not %s" % type(flags))
def __eq__(self, other):
if isinstance(other, Regex):
return self.pattern == self.pattern and self.flags == other.flags
else:
return NotImplemented
def __ne__(self, other):
return not self == other
def __repr__(self):
return "Regex(%r, %r)" % (self.pattern, self.flags)
def compile(self):
"""Compile this ``Regex`` as a Python regular expression.
"""
return re.compile(self.pattern, self.flags)

View File

@ -35,29 +35,29 @@ class SON(dict):
The mapping from Python types to BSON types is as follows:
=================================== ============= ===================
Python Type BSON Type Supported Direction
=================================== ============= ===================
None null both
bool boolean both
int [#int]_ int32 / int64 py -> bson
long int64 both
float number (real) both
string string py -> bson
unicode string both
list array both
dict / `SON` object both
datetime.datetime [#dt]_ [#dt2]_ date both
compiled re regex both
`bson.binary.Binary` binary both
`bson.objectid.ObjectId` oid both
`bson.dbref.DBRef` dbref both
None undefined bson -> py
unicode code bson -> py
`bson.code.Code` code py -> bson
unicode symbol bson -> py
bytes (Python 3) [#bytes]_ binary both
=================================== ============= ===================
======================================= ============= ===================
Python Type BSON Type Supported Direction
======================================= ============= ===================
None null both
bool boolean both
int [#int]_ int32 / int64 py -> bson
long int64 both
float number (real) both
string string py -> bson
unicode string both
list array both
dict / `SON` object both
datetime.datetime [#dt]_ [#dt2]_ date both
`bson.regex.Regex` / compiled re [#re]_ regex both
`bson.binary.Binary` binary both
`bson.objectid.ObjectId` oid both
`bson.dbref.DBRef` dbref both
None undefined bson -> py
unicode code bson -> py
`bson.code.Code` code py -> bson
unicode symbol bson -> py
bytes (Python 3) [#bytes]_ binary both
======================================= ============= ===================
Note that to save binary data it must be wrapped as an instance of
`bson.binary.Binary`. Otherwise it will be saved as a BSON string
@ -71,6 +71,11 @@ class SON(dict):
millisecond when saved
.. [#dt2] all datetime.datetime instances are treated as *naive*. clients
should always use UTC.
.. [#re] :class:`~bson.regex.Regex` instances and regular expression
objects from ``re.compile()`` are both saved as BSON regular expressions.
BSON regular expressions are decoded as Python regular expressions by
default, or as :class:`~bson.regex.Regex` instances if the ``compile_re``
option is set to ``False``.
.. [#bytes] The bytes type from Python 3.x is encoded as BSON binary with
subtype 0. In Python 3.x it will be decoded back to bytes. In Python 2.x
it will be decoded to an instance of :class:`~bson.binary.Binary` with

View File

@ -11,6 +11,7 @@ Sub-modules:
:maxdepth: 2
binary
regex
code
dbref
errors

7
doc/api/bson/regex.rst Normal file
View File

@ -0,0 +1,7 @@
:mod:`regex` -- Tools for representing MongoDB regular expressions
==================================================================
.. versionadded:: 2.7
.. automodule:: bson.regex
:synopsis: Tools for representing MongoDB regular expressions
:members:

View File

@ -33,7 +33,7 @@
.. automethod:: update(spec, document[, upsert=False[, manipulate=False[, safe=None[, multi=False[, check_keys=True[, **kwargs]]]]]])
.. automethod:: remove([spec_or_id=None[, safe=None[, **kwargs]]])
.. automethod:: drop
.. automethod:: find([spec=None[, fields=None[, skip=0[, limit=0[, timeout=True[, snapshot=False[, tailable=False[, sort=None[, max_scan=None[, as_class=None[, slave_okay=False[, await_data=False[, partial=False[, manipulate=True[, read_preference=ReadPreference.PRIMARY[, exhaust=False[,**kwargs]]]]]]]]]]]]]]]]])
.. automethod:: find([spec=None[, fields=None[, skip=0[, limit=0[, timeout=True[, snapshot=False[, tailable=False[, sort=None[, max_scan=None[, as_class=None[, slave_okay=False[, await_data=False[, partial=False[, manipulate=True[, read_preference=ReadPreference.PRIMARY[, exhaust=False, [compile_re=True, [,**kwargs]]]]]]]]]]]]]]]]]])
.. automethod:: find_one([spec_or_id=None[, *args[, **kwargs]]])
.. automethod:: count
.. automethod:: create_index

View File

@ -690,6 +690,9 @@ class Collection(common.BaseObject):
the nearest member may accept reads. Default 15 milliseconds.
**Ignored by mongos** and must be configured on the command line.
See the localThreshold_ option for more information.
- `compile_re` (optional): if ``False``, don't attempt to compile
BSON regex objects into Python regexes. Return instances of
:class:`~bson.regex.Regex` instead.
- `exhaust` (optional): If ``True`` create an "exhaust" cursor.
MongoDB will stream batched results to the client without waiting
for the client to request each batch, reducing latency.
@ -717,12 +720,15 @@ class Collection(common.BaseObject):
5. The `network_timeout` option is ignored when using the
`exhaust` option.
.. note:: The `manipulate` parameter may default to False in
a future release.
.. note:: The `manipulate` and `compile_re` parameters may default to
False in future releases.
.. note:: The `max_scan` parameter requires server
version **>= 1.5.1**
.. versionadded:: 2.7
The ``compile_re`` parameter.
.. versionadded:: 2.3
The `tag_sets` and `secondary_acceptable_latency_ms` parameters.

View File

@ -69,8 +69,8 @@ class Cursor(object):
await_data=False, partial=False, manipulate=True,
read_preference=ReadPreference.PRIMARY,
tag_sets=[{}], secondary_acceptable_latency_ms=None,
exhaust=False, _must_use_master=False, _uuid_subtype=None,
_first_batch=None, _cursor_id=None,
exhaust=False, compile_re=True, _must_use_master=False,
_uuid_subtype=None, _first_batch=None, _cursor_id=None,
**kwargs):
"""Create a new cursor.
@ -152,6 +152,7 @@ class Cursor(object):
self.__tag_sets = tag_sets
self.__secondary_acceptable_latency_ms = secondary_acceptable_latency_ms
self.__tz_aware = collection.database.connection.tz_aware
self.__compile_re = compile_re
self.__must_use_master = _must_use_master
self.__uuid_subtype = _uuid_subtype or collection.uuid_subtype
@ -225,8 +226,8 @@ class Cursor(object):
"batch_size", "max_scan", "as_class", "slave_okay",
"manipulate", "read_preference", "tag_sets",
"secondary_acceptable_latency_ms",
"must_use_master", "uuid_subtype", "query_flags",
"kwargs")
"must_use_master", "uuid_subtype", "compile_re",
"query_flags", "kwargs")
data = dict((k, v) for k, v in self.__dict__.iteritems()
if k.startswith('_Cursor__') and k[9:] in values_to_clone)
if deepcopy:
@ -667,6 +668,7 @@ class Cursor(object):
r = database.command("count", self.__collection.name,
allowable_errors=["ns missing"],
uuid_subtype=self.__uuid_subtype,
compile_re=self.__compile_re,
**command)
if r.get("errmsg", "") == "ns missing":
return 0
@ -718,6 +720,7 @@ class Cursor(object):
return database.command("distinct",
self.__collection.name,
uuid_subtype=self.__uuid_subtype,
compile_re=self.__compile_re,
**options)["values"]
def explain(self):
@ -829,7 +832,8 @@ class Cursor(object):
response = helpers._unpack_response(response, self.__id,
self.__as_class,
self.__tz_aware,
self.__uuid_subtype)
self.__uuid_subtype,
self.__compile_re)
except AutoReconnect:
# Don't send kill cursors to another server after a "not master"
# error. It's completely pointless.

View File

@ -273,7 +273,7 @@ class Database(common.BaseObject):
def command(self, command, value=1,
check=True, allowable_errors=[],
uuid_subtype=OLD_UUID_SUBTYPE, **kwargs):
uuid_subtype=OLD_UUID_SUBTYPE, compile_re=True, **kwargs):
"""Issue a MongoDB command.
Send command `command` to the database and return the
@ -318,6 +318,12 @@ class Database(common.BaseObject):
in this list will be ignored by error-checking
- `uuid_subtype` (optional): The BSON binary subtype to use
for a UUID used in this command.
- `compile_re` (optional): if ``False``, don't attempt to compile
BSON regular expressions into Python regular expressions. Return
instances of :class:`~bson.regex.Regex` instead. Can avoid
:exc:`~bson.errors.InvalidBSON` errors when receiving
Python-incompatible regular expressions, for example from
``currentOp``
- `read_preference`: The read preference for this connection.
See :class:`~pymongo.read_preferences.ReadPreference` for available
options.
@ -337,6 +343,8 @@ class Database(common.BaseObject):
.. note:: ``command`` ignores the ``network_timeout`` parameter.
.. versionchanged:: 2.7
Added ``compile_re`` option.
.. versionchanged:: 2.3
Added `tag_sets` and `secondary_acceptable_latency_ms` options.
.. versionchanged:: 2.2
@ -390,6 +398,7 @@ class Database(common.BaseObject):
extra_opts['secondary_acceptable_latency_ms'] = kwargs.pop(
'secondary_acceptable_latency_ms',
self.secondary_acceptable_latency_ms)
extra_opts['compile_re'] = compile_re
fields = kwargs.get('fields')
if fields is not None and not isinstance(fields, dict):

View File

@ -73,7 +73,8 @@ def _index_document(index_list):
def _unpack_response(response, cursor_id=None, as_class=dict,
tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE):
tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE,
compile_re=True):
"""Unpack a response from the database.
Check the response for errors and unpack, returning a dictionary
@ -108,7 +109,8 @@ def _unpack_response(response, cursor_id=None, as_class=dict,
result["starting_from"] = struct.unpack("<i", response[12:16])[0]
result["number_returned"] = struct.unpack("<i", response[16:20])[0]
result["data"] = bson.decode_all(response[20:],
as_class, tz_aware, uuid_subtype)
as_class, tz_aware, uuid_subtype,
compile_re)
assert len(result["data"]) == result["number_returned"]
return result

View File

@ -32,7 +32,8 @@ from nose.plugins.skip import SkipTest
import bson
from bson import (BSON,
decode_all,
is_valid)
is_valid,
Regex)
from bson.binary import Binary, UUIDLegacy
from bson.code import Code
from bson.objectid import ObjectId
@ -534,5 +535,42 @@ class TestBSON(unittest.TestCase):
d = OrderedDict([("one", 1), ("two", 2), ("three", 3), ("four", 4)])
self.assertEqual(d, BSON.encode(d).decode(as_class=OrderedDict))
def test_bson_regex(self):
# Invalid Python regex, though valid PCRE.
bson_re1 = Regex(r'[\w-\.]')
self.assertEqual(r'[\w-\.]', bson_re1.pattern)
self.assertEqual(0, bson_re1.flags)
doc1 = {'r': bson_re1}
doc1_bson = b(
'\x11\x00\x00\x00' # document length
'\x0br\x00[\\w-\\.]\x00\x00' # r: regex
'\x00') # document terminator
self.assertEqual(doc1_bson, BSON.encode(doc1))
self.assertEqual(doc1, BSON(doc1_bson).decode(compile_re=False))
# Valid Python regex, with flags.
re2 = re.compile('.*', re.IGNORECASE | re.MULTILINE | re.UNICODE)
bson_re2 = Regex('.*', re.IGNORECASE | re.MULTILINE | re.UNICODE)
doc2_with_re = {'r': re2}
doc2_with_bson_re = {'r': bson_re2}
doc2_bson = b(
"\x0f\x00\x00\x00" # document length
"\x0br\x00.*\x00imu\x00" # r: regex
"\x00") # document terminator
self.assertEqual(doc2_bson, BSON.encode(doc2_with_re))
self.assertEqual(doc2_bson, BSON.encode(doc2_with_bson_re))
# Built-in re objects don't support ==. Compare pattern and flags.
self.assertEqual(re2.pattern, BSON(doc2_bson).decode()['r'].pattern)
self.assertEqual(re2.flags, BSON(doc2_bson).decode()['r'].flags)
self.assertEqual(
doc2_with_bson_re, BSON(doc2_bson).decode(compile_re=False))
if __name__ == "__main__":
unittest.main()

View File

@ -29,11 +29,12 @@ from nose.plugins.skip import SkipTest
sys.path[0:0] = [""]
from bson.binary import Binary
from bson.regex import Regex
from bson.code import Code
from bson.dbref import DBRef
from bson.objectid import ObjectId
from bson.py3compat import b
from bson.son import SON
from bson.son import SON, RE_TYPE
from pymongo import (ASCENDING, DESCENDING, GEO2D,
GEOHAYSTACK, GEOSPHERE, HASHED)
from pymongo import message as message_module
@ -1267,6 +1268,19 @@ class TestCollection(unittest.TestCase):
self.assertEqual(expected, db.test.aggregate([pipeline]))
self.assertEqual(expected, db.test.aggregate((pipeline,)))
def test_aggregate_with_compile_re(self):
if not version.at_least(self.db.connection, (2, 1, 0)):
raise SkipTest("The aggregate command requires MongoDB >= 2.1.0")
db = self.client.pymongo_test
db.test.drop()
db.test.insert({'r': re.compile('.*')})
result = db.test.aggregate([])
self.assertTrue(isinstance(result['result'][0]['r'], RE_TYPE))
result = db.test.aggregate([], compile_re=False)
self.assertTrue(isinstance(result['result'][0]['r'], Regex))
def test_aggregation_cursor_validation(self):
if not version.at_least(self.db.connection, (2, 5, 1)):
raise SkipTest("Aggregation cursor requires MongoDB >= 2.5.1")
@ -2148,6 +2162,22 @@ class TestCollection(unittest.TestCase):
self.assertEqual(2, c.find_one(manipulate=True)['foo'])
c.remove({})
def test_compile_re(self):
c = self.client.pymongo_test.test
c.drop()
c.insert({'r': re.compile('.*')})
# Test find_one with compile_re.
self.assertTrue(isinstance(c.find_one()['r'], RE_TYPE))
self.assertTrue(isinstance(c.find_one(compile_re=False)['r'], Regex))
# Test find with compile_re.
for doc in c.find():
self.assertTrue(isinstance(doc['r'], RE_TYPE))
for doc in c.find(compile_re=False):
self.assertTrue(isinstance(doc['r'], Regex))
if __name__ == "__main__":
unittest.main()

View File

@ -552,6 +552,7 @@ class TestCursor(unittest.TestCase):
await_data=True,
partial=True,
manipulate=False,
compile_re=False,
fields={'_id': False}).limit(2)
cursor.add_option(128)
@ -565,6 +566,8 @@ class TestCursor(unittest.TestCase):
cursor2._Cursor__slave_okay)
self.assertEqual(cursor._Cursor__manipulate,
cursor2._Cursor__manipulate)
self.assertEqual(cursor._Cursor__compile_re,
cursor2._Cursor__compile_re)
self.assertEqual(cursor._Cursor__query_flags,
cursor2._Cursor__query_flags)

View File

@ -16,6 +16,7 @@
import datetime
import os
import re
import sys
import warnings
@ -25,9 +26,10 @@ import unittest
from nose.plugins.skip import SkipTest
from bson.code import Code
from bson.regex import Regex
from bson.dbref import DBRef
from bson.objectid import ObjectId
from bson.son import SON
from bson.son import SON, RE_TYPE
from pymongo import (ALL,
auth,
OFF,
@ -303,6 +305,21 @@ class TestDatabase(unittest.TestCase):
if not is_mongos(self.client):
db.command('eval', 'sleep(100)', network_timeout=0.001)
def test_command_with_compile_re(self):
# Using 'aggregate' as our example command, since it's an easy way to
# retrieve a BSON regex from a collection using a command.
if not version.at_least(self.client, (2, 1, 0)):
raise SkipTest('Need aggregation to test compile_re')
db = self.client.pymongo_test
db.test.drop()
db.test.insert({'r': re.compile('.*')})
result = db.command('aggregate', 'test', pipeline=[])
self.assertTrue(isinstance(result['result'][0]['r'], RE_TYPE))
result = db.command('aggregate', 'test', pipeline=[], compile_re=False)
self.assertTrue(isinstance(result['result'][0]['r'], Regex))
def test_last_status(self):
db = self.client.pymongo_test

View File

@ -32,6 +32,8 @@ from bson.dbref import DBRef
from bson.max_key import MaxKey
from bson.min_key import MinKey
from bson.objectid import ObjectId
from bson.regex import Regex
from bson.son import RE_TYPE
from bson.timestamp import Timestamp
from bson.tz_util import utc
@ -74,6 +76,22 @@ class TestJsonUtil(unittest.TestCase):
self.round_trip({"date": datetime.datetime(2009, 12, 9, 15,
49, 45, 191000, utc)})
def test_regex_object_hook(self):
import json
# Extended JSON format regular expression.
pat = 'a*b'
json_re = '{"$regex": "%s", "$options": "u"}' % pat
loaded = json_util.object_hook(json.loads(json_re))
self.assertTrue(isinstance(loaded, RE_TYPE))
self.assertEqual(pat, loaded.pattern)
self.assertEqual(re.U, loaded.flags)
loaded = json_util.object_hook(json.loads(json_re), compile_re=False)
self.assertTrue(isinstance(loaded, Regex))
self.assertEqual(pat, loaded.pattern)
self.assertEqual(re.U, loaded.flags)
def test_regex(self):
res = self.round_tripped({"r": re.compile("a*b", re.IGNORECASE)})["r"]
self.assertEqual("a*b", res.pattern)
@ -95,6 +113,12 @@ class TestJsonUtil(unittest.TestCase):
expected_flags = re.U
self.assertEqual(expected_flags, res.flags)
self.assertEqual(
Regex('.*', 'ilm'),
json_util.loads(
'{"r": {"$regex": ".*", "$options": "ilm"}}',
compile_re=False)['r'])
def test_minkey(self):
self.round_trip({"m": MinKey()})