Option to not compile BSON regexes. PYTHON-500

Add a 'compile_re' parameter to Collection.find, Collection.find_one, and json_util.loads. If it's False, regular expressions are encoded as a new class, Regex, instead of passed to re.compile(). This allows PyMongo to handle regular expressions that don't compile in Python but are valid in other contexts like MongoDB queries.
2013-08-06 18:36:33 -04:00 · 2013-08-06 18:36:33 -04:00 · e4cf504559
commit e4cf504559
parent f0a419e5ff
17 changed files with 530 additions and 218 deletions
--- a/bson/init.py
+++ b/bson/init.py
@ -32,6 +32,7 @@ from bson.max_key import MaxKey
 from bson.min_key import MinKey
 from bson.objectid import ObjectId
 from bson.py3compat import b, binary_type
+from bson.regex import Regex
 from bson.son import SON, RE_TYPE
 from bson.timestamp import Timestamp
 from bson.tz_util import utc
@ -90,7 +91,8 @@ BSONMAX = b("\x7F") # Max key


 def _get_int(data, position, as_class=None,
-             tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE, unsigned=False):
+             tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE,
+             compile_re=True, unsigned=False):
    format = unsigned and "I" or "i"
    try:
        value = struct.unpack("<%s" % format, data[position:position + 4])[0]
@ -132,13 +134,13 @@ def _make_c_string(string, check_null=False):
                                    "UTF-8: %r" % string)


-def _get_number(data, position, as_class, tz_aware, uuid_subtype):
+def _get_number(data, position, as_class, tz_aware, uuid_subtype, compile_re):
    num = struct.unpack("<d", data[position:position + 8])[0]
    position += 8
    return num, position


-def _get_string(data, position, as_class, tz_aware, uuid_subtype):
+def _get_string(data, position, as_class, tz_aware, uuid_subtype, compile_re):
    length = struct.unpack("<i", data[position:position + 4])[0]
    if length <= 0 or (len(data) - position - 4) < length:
        raise InvalidBSON("invalid string length")
@ -148,12 +150,14 @@ def _get_string(data, position, as_class, tz_aware, uuid_subtype):
    return _get_c_string(data, position, length - 1)


-def _get_object(data, position, as_class, tz_aware, uuid_subtype):
+def _get_object(data, position, as_class, tz_aware, uuid_subtype, compile_re):
    obj_size = struct.unpack("<i", data[position:position + 4])[0]
    if data[position + obj_size - 1:position + obj_size] != ZERO:
        raise InvalidBSON("bad eoo")
    encoded = data[position + 4:position + obj_size - 1]
-    object = _elements_to_dict(encoded, as_class, tz_aware, uuid_subtype)
+    object = _elements_to_dict(
+        encoded, as_class, tz_aware, uuid_subtype, compile_re)
+
    position += obj_size
    if "$ref" in object:
        return (DBRef(object.pop("$ref"), object.pop("$id", None),
@ -161,9 +165,9 @@ def _get_object(data, position, as_class, tz_aware, uuid_subtype):
    return object, position


-def _get_array(data, position, as_class, tz_aware, uuid_subtype):
+def _get_array(data, position, as_class, tz_aware, uuid_subtype, compile_re):
    obj, position = _get_object(data, position,
-                                as_class, tz_aware, uuid_subtype)
+                                as_class, tz_aware, uuid_subtype, compile_re)
    result = []
    i = 0
    while True:
@ -175,7 +179,7 @@ def _get_array(data, position, as_class, tz_aware, uuid_subtype):
    return result, position


-def _get_binary(data, position, as_class, tz_aware, uuid_subtype):
+def _get_binary(data, position, as_class, tz_aware, uuid_subtype, compile_re):
    length, position = _get_int(data, position)
    subtype = ord(data[position:position + 1])
    position += 1
@ -207,19 +211,19 @@ def _get_binary(data, position, as_class, tz_aware, uuid_subtype):


 def _get_oid(data, position, as_class=None,
-             tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE):
+             tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE, compile_re=True):
    value = ObjectId(data[position:position + 12])
    position += 12
    return value, position


-def _get_boolean(data, position, as_class, tz_aware, uuid_subtype):
+def _get_boolean(data, position, as_class, tz_aware, uuid_subtype, compile_re):
    value = data[position:position + 1] == ONE
    position += 1
    return value, position


-def _get_date(data, position, as_class, tz_aware, uuid_subtype):
+def _get_date(data, position, as_class, tz_aware, uuid_subtype, compile_re):
    millis = struct.unpack("<q", data[position:position + 8])[0]
    diff = millis % 1000
    seconds = (millis - diff) / 1000
@ -231,58 +235,51 @@ def _get_date(data, position, as_class, tz_aware, uuid_subtype):
    return dt.replace(microsecond=diff * 1000), position


-def _get_code(data, position, as_class, tz_aware, uuid_subtype):
+def _get_code(data, position, as_class, tz_aware, uuid_subtype, compile_re):
    code, position = _get_string(data, position,
-                                 as_class, tz_aware, uuid_subtype)
+                                 as_class, tz_aware, uuid_subtype, compile_re)
    return Code(code), position


-def _get_code_w_scope(data, position, as_class, tz_aware, uuid_subtype):
+def _get_code_w_scope(
+        data, position, as_class, tz_aware, uuid_subtype, compile_re):
    _, position = _get_int(data, position)
    code, position = _get_string(data, position,
-                                 as_class, tz_aware, uuid_subtype)
+                                 as_class, tz_aware, uuid_subtype, compile_re)
    scope, position = _get_object(data, position,
-                                  as_class, tz_aware, uuid_subtype)
+                                  as_class, tz_aware, uuid_subtype, compile_re)
    return Code(code, scope), position


-def _get_null(data, position, as_class, tz_aware, uuid_subtype):
+def _get_null(data, position, as_class, tz_aware, uuid_subtype, compile_re):
    return None, position


-def _get_regex(data, position, as_class, tz_aware, uuid_subtype):
+def _get_regex(data, position, as_class, tz_aware, uuid_subtype, compile_re):
    pattern, position = _get_c_string(data, position)
    bson_flags, position = _get_c_string(data, position)
-    flags = 0
-    if "i" in bson_flags:
-        flags |= re.IGNORECASE
-    if "l" in bson_flags:
-        flags |= re.LOCALE
-    if "m" in bson_flags:
-        flags |= re.MULTILINE
-    if "s" in bson_flags:
-        flags |= re.DOTALL
-    if "u" in bson_flags:
-        flags |= re.UNICODE
-    if "x" in bson_flags:
-        flags |= re.VERBOSE
-    return re.compile(pattern, flags), position
+    bson_re = Regex(pattern, bson_flags)
+    if compile_re:
+        return bson_re.compile(), position
+    else:
+        return bson_re, position


-def _get_ref(data, position, as_class, tz_aware, uuid_subtype):
-    collection, position = _get_string(data, position,
-                                       as_class, tz_aware, uuid_subtype)
+def _get_ref(data, position, as_class, tz_aware, uuid_subtype, compile_re):
+    collection, position = _get_string(data, position, as_class, tz_aware,
+                                       uuid_subtype, compile_re)
    oid, position = _get_oid(data, position)
    return DBRef(collection, oid), position


-def _get_timestamp(data, position, as_class, tz_aware, uuid_subtype):
+def _get_timestamp(
+        data, position, as_class, tz_aware, uuid_subtype, compile_re):
    inc, position = _get_int(data, position, unsigned=True)
    timestamp, position = _get_int(data, position, unsigned=True)
    return Timestamp(timestamp, inc), position


-def _get_long(data, position, as_class, tz_aware, uuid_subtype):
+def _get_long(data, position, as_class, tz_aware, uuid_subtype, compile_re):
    # Have to cast to long; on 32-bit unpack may return an int.
    # 2to3 will change long to int. That's fine since long doesn't
    # exist in python3.
@ -310,30 +307,32 @@ _element_getter = {
    BSONINT: _get_int,  # number_int
    BSONTIM: _get_timestamp,
    BSONLON: _get_long, # Same as _get_int after 2to3 runs.
-    BSONMIN: lambda v, w, x, y, z: (MinKey(), w),
-    BSONMAX: lambda v, w, x, y, z: (MaxKey(), w)}
+    BSONMIN: lambda u, v, w, x, y, z: (MinKey(), v),
+    BSONMAX: lambda u, v, w, x, y, z: (MaxKey(), v)}


-def _element_to_dict(data, position, as_class, tz_aware, uuid_subtype):
+def _element_to_dict(
+        data, position, as_class, tz_aware, uuid_subtype, compile_re):
    element_type = data[position:position + 1]
    position += 1
    element_name, position = _get_c_string(data, position)
-    value, position = _element_getter[element_type](data, position, as_class,
-                                                    tz_aware, uuid_subtype)
+    value, position = _element_getter[element_type](
+        data, position, as_class, tz_aware, uuid_subtype, compile_re)
+
    return element_name, value, position


-def _elements_to_dict(data, as_class, tz_aware, uuid_subtype):
+def _elements_to_dict(data, as_class, tz_aware, uuid_subtype, compile_re):
    result = as_class()
    position = 0
    end = len(data) - 1
    while position < end:
-        (key, value, position) = _element_to_dict(data, position, as_class,
-                                                  tz_aware, uuid_subtype)
+        (key, value, position) = _element_to_dict(
+            data, position, as_class, tz_aware, uuid_subtype, compile_re)
        result[key] = value
    return result

-def _bson_to_dict(data, as_class, tz_aware, uuid_subtype):
+def _bson_to_dict(data, as_class, tz_aware, uuid_subtype, compile_re):
    obj_size = struct.unpack("<i", data[:4])[0]
    length = len(data)
    if length < obj_size:
@ -341,8 +340,10 @@ def _bson_to_dict(data, as_class, tz_aware, uuid_subtype):
    if obj_size != length or data[obj_size - 1:obj_size] != ZERO:
        raise InvalidBSON("bad eoo")
    elements = data[4:obj_size - 1]
-    return (_elements_to_dict(elements, as_class,
-                              tz_aware, uuid_subtype), data[obj_size:])
+    dct = _elements_to_dict(
+        elements, as_class, tz_aware, uuid_subtype, compile_re)
+
+    return dct, data[obj_size:]
 if _use_c:
    _bson_to_dict = _cbson._bson_to_dict

@ -444,7 +445,7 @@ def _element_to_bson(key, value, check_keys, uuid_subtype):
        return BSONTIM + name + inc + time
    if value is None:
        return BSONNUL + name
-    if isinstance(value, RE_TYPE):
+    if isinstance(value, (RE_TYPE, Regex)):
        pattern = value.pattern
        flags = ""
        if value.flags & re.IGNORECASE:
@ -492,7 +493,7 @@ if _use_c:


 def decode_all(data, as_class=dict,
-               tz_aware=True, uuid_subtype=OLD_UUID_SUBTYPE):
+               tz_aware=True, uuid_subtype=OLD_UUID_SUBTYPE, compile_re=True):
    """Decode BSON data to multiple documents.

    `data` must be a string of concatenated, valid, BSON-encoded
@ -504,7 +505,14 @@ def decode_all(data, as_class=dict,
        documents
      - `tz_aware` (optional): if ``True``, return timezone-aware
        :class:`~datetime.datetime` instances
+      - `compile_re` (optional): if ``False``, don't attempt to compile
+        BSON regular expressions into Python regular expressions. Return
+        instances of :class:`~bson.regex.Regex` instead. Can avoid
+        :exc:`~bson.errors.InvalidBSON` errors when receiving
+        Python-incompatible regular expressions, for example from ``currentOp``

+    .. versionchanged:: 2.7
+       Added `compile_re` option.
    .. versionadded:: 1.9
    """
    docs = []
@ -519,7 +527,7 @@ def decode_all(data, as_class=dict,
        elements = data[position + 4:position + obj_size - 1]
        position += obj_size
        docs.append(_elements_to_dict(elements, as_class,
-                                      tz_aware, uuid_subtype))
+                                      tz_aware, uuid_subtype, compile_re))
    return docs
 if _use_c:
    decode_all = _cbson.decode_all
@ -540,7 +548,7 @@ def is_valid(bson):
                        "of a subclass of %s" % (binary_type.__name__,))

    try:
-        (_, remainder) = _bson_to_dict(bson, dict, True, OLD_UUID_SUBTYPE)
+        (_, remainder) = _bson_to_dict(bson, dict, True, OLD_UUID_SUBTYPE, True)
        return remainder == EMPTY
    except:
        return False
@ -573,7 +581,7 @@ class BSON(binary_type):
        return cls(_dict_to_bson(document, check_keys, uuid_subtype))

    def decode(self, as_class=dict,
-               tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE):
+               tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE, compile_re=True):
        """Decode this BSON data.

        The default type to use for the resultant document is
@ -593,10 +601,21 @@ class BSON(binary_type):
            document
          - `tz_aware` (optional): if ``True``, return timezone-aware
            :class:`~datetime.datetime` instances
+          - `compile_re` (optional): if ``False``, don't attempt to compile
+            BSON regular expressions into Python regular expressions. Return
+            instances of
+            :class:`~bson.regex.Regex` instead. Can avoid
+            :exc:`~bson.errors.InvalidBSON` errors when receiving
+            Python-incompatible regular expressions, for example from
+            ``currentOp``

+        .. versionchanged:: 2.7
+           Added ``compile_re`` option.
        .. versionadded:: 1.9
        """
-        (document, _) = _bson_to_dict(self, as_class, tz_aware, uuid_subtype)
+        (document, _) = _bson_to_dict(
+            self, as_class, tz_aware, uuid_subtype, compile_re)
+
        return document


--- a/bson/_cbsonmodule.c
+++ b/bson/_cbsonmodule.c
@ -43,6 +43,7 @@ struct module_state {
    PyObject* ObjectId;
    PyObject* DBRef;
    PyObject* RECompile;
+    PyObject* Regex;
    PyObject* UUID;
    PyObject* Timestamp;
    PyObject* MinKey;
@ -133,7 +134,8 @@ _downcast_and_check(Py_ssize_t size, int extra) {
 static PyObject* elements_to_dict(PyObject* self, const char* string,
                                  unsigned max, PyObject* as_class,
                                  unsigned char tz_aware,
-                                  unsigned char uuid_subtype);
+                                  unsigned char uuid_subtype,
+                                  unsigned char compile_re);

 static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
                                    int type_byte, PyObject* value,
@ -348,7 +350,8 @@ static int _load_python_objects(PyObject* module) {
        _load_object(&state->MinKey, "bson.min_key", "MinKey") ||
        _load_object(&state->MaxKey, "bson.max_key", "MaxKey") ||
        _load_object(&state->UTC, "bson.tz_util", "utc") ||
-        _load_object(&state->RECompile, "re", "compile")) {
+        _load_object(&state->RECompile, "re", "compile") ||
+        _load_object(&state->Regex, "bson.regex", "Regex")) {
        return 1;
    }
    /* If we couldn't import uuid then we must be on 2.4. Just ignore. */
@ -440,6 +443,130 @@ _set_cannot_encode(PyObject* value) {
    }
 }

+/*
+ * Encode a builtin Python regular expression or our custom Regex class.
+ *
+ * Sets exception and returns 0 on failure.
+ */
+static int _write_regex_to_buffer(
+    buffer_t buffer, int type_byte, PyObject* value) {
+
+    struct module_state *state = GETSTATE(self);
+    PyObject* py_flags;
+    PyObject* py_pattern;
+    PyObject* encoded_pattern;
+    long int_flags;
+    char flags[FLAGS_SIZE];
+    char check_utf8 = 0;
+    const char* pattern_data;
+    int pattern_length, flags_length;
+    result_t status;
+
+    /*
+     * Both the builtin re type and our Regex class have attributes
+     * "flags" and "pattern".
+     */
+    py_flags = PyObject_GetAttrString(value, "flags");
+    if (!py_flags) {
+        return 0;
+    }
+#if PY_MAJOR_VERSION >= 3
+    int_flags = PyLong_AsLong(py_flags);
+#else
+    int_flags = PyInt_AsLong(py_flags);
+#endif
+    Py_DECREF(py_flags);
+    py_pattern = PyObject_GetAttrString(value, "pattern");
+    if (!py_pattern) {
+        return 0;
+    }
+
+    if (PyUnicode_Check(py_pattern)) {
+        encoded_pattern = PyUnicode_AsUTF8String(py_pattern);
+        Py_DECREF(py_pattern);
+        if (!encoded_pattern) {
+            return 0;
+        }
+    } else {
+        encoded_pattern = py_pattern;
+        check_utf8 = 1;
+    }
+
+#if PY_MAJOR_VERSION >= 3
+    if (!(pattern_data = PyBytes_AsString(encoded_pattern))) {
+        Py_DECREF(encoded_pattern);
+        return 0;
+    }
+    if ((pattern_length = _downcast_and_check(PyBytes_Size(encoded_pattern), 0)) == -1) {
+        Py_DECREF(encoded_pattern);
+        return 0;
+    }
+#else
+    if (!(pattern_data = PyString_AsString(encoded_pattern))) {
+        Py_DECREF(encoded_pattern);
+        return 0;
+    }
+    if ((pattern_length = _downcast_and_check(PyString_Size(encoded_pattern), 0)) == -1) {
+        Py_DECREF(encoded_pattern);
+        return 0;
+    }
+#endif
+    status = check_string((const unsigned char*)pattern_data,
+                          pattern_length, check_utf8, 1);
+    if (status == NOT_UTF_8) {
+        PyObject* InvalidStringData = _error("InvalidStringData");
+        if (InvalidStringData) {
+            PyErr_SetString(InvalidStringData,
+                            "regex patterns must be valid UTF-8");
+            Py_DECREF(InvalidStringData);
+        }
+        Py_DECREF(encoded_pattern);
+        return 0;
+    } else if (status == HAS_NULL) {
+        PyObject* InvalidDocument = _error("InvalidDocument");
+        if (InvalidDocument) {
+            PyErr_SetString(InvalidDocument,
+                            "regex patterns must not contain the NULL byte");
+            Py_DECREF(InvalidDocument);
+        }
+        Py_DECREF(encoded_pattern);
+        return 0;
+    }
+
+    if (!buffer_write_bytes(buffer, pattern_data, pattern_length + 1)) {
+        Py_DECREF(encoded_pattern);
+        return 0;
+    }
+    Py_DECREF(encoded_pattern);
+
+    flags[0] = 0;
+
+    if (int_flags & 2) {
+        STRCAT(flags, FLAGS_SIZE, "i");
+    }
+    if (int_flags & 4) {
+        STRCAT(flags, FLAGS_SIZE, "l");
+    }
+    if (int_flags & 8) {
+        STRCAT(flags, FLAGS_SIZE, "m");
+    }
+    if (int_flags & 16) {
+        STRCAT(flags, FLAGS_SIZE, "s");
+    }
+    if (int_flags & 32) {
+        STRCAT(flags, FLAGS_SIZE, "u");
+    }
+    if (int_flags & 64) {
+        STRCAT(flags, FLAGS_SIZE, "x");
+    }
+    flags_length = (int)strlen(flags) + 1;
+    if (!buffer_write_bytes(buffer, flags, flags_length)) {
+        return 0;
+    }
+    *(buffer_get_buffer(buffer) + type_byte) = 0x0B;
+    return 1;
+}
+
 /* TODO our platform better be little-endian w/ 4-byte ints! */
 /* Write a single value to the buffer (also write its type_byte, for which
 * space has already been reserved.
@ -574,6 +701,11 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
                *(buffer_get_buffer(buffer) + type_byte) = 0x07;
                return 1;
            }
+        case 11:
+            {
+                /* Regex */
+                return _write_regex_to_buffer(buffer, type_byte, value);
+            }
        case 13:
            {
                /* Code */
@ -890,115 +1022,7 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
        *(buffer_get_buffer(buffer) + type_byte) = 0x09;
        return buffer_write_bytes(buffer, (const char*)&millis, 8);
    } else if (PyObject_TypeCheck(value, state->REType)) {
-        PyObject* py_flags;
-        PyObject* py_pattern;
-        PyObject* encoded_pattern;
-        long int_flags;
-        char flags[FLAGS_SIZE];
-        char check_utf8 = 0;
-        const char* pattern_data;
-        int pattern_length, flags_length;
-        result_t status;
-
-        py_flags = PyObject_GetAttrString(value, "flags");
-        if (!py_flags) {
-            return 0;
-        }
-#if PY_MAJOR_VERSION >= 3
-        int_flags = PyLong_AsLong(py_flags);
-#else
-        int_flags = PyInt_AsLong(py_flags);
-#endif
-        Py_DECREF(py_flags);
-        py_pattern = PyObject_GetAttrString(value, "pattern");
-        if (!py_pattern) {
-            return 0;
-        }
-
-        if (PyUnicode_Check(py_pattern)) {
-            encoded_pattern = PyUnicode_AsUTF8String(py_pattern);
-            Py_DECREF(py_pattern);
-            if (!encoded_pattern) {
-                return 0;
-            }
-        } else {
-            encoded_pattern = py_pattern;
-            check_utf8 = 1;
-        }
-
-#if PY_MAJOR_VERSION >= 3
-        if (!(pattern_data = PyBytes_AsString(encoded_pattern))) {
-            Py_DECREF(encoded_pattern);
-            return 0;
-        }
-        if ((pattern_length = _downcast_and_check(PyBytes_Size(encoded_pattern), 0)) == -1) {
-            Py_DECREF(encoded_pattern);
-            return 0;
-        }
-#else
-        if (!(pattern_data = PyString_AsString(encoded_pattern))) {
-            Py_DECREF(encoded_pattern);
-            return 0;
-        }
-        if ((pattern_length = _downcast_and_check(PyString_Size(encoded_pattern), 0)) == -1) {
-            Py_DECREF(encoded_pattern);
-            return 0;
-        }
-#endif
-        status = check_string((const unsigned char*)pattern_data,
-                              pattern_length, check_utf8, 1);
-        if (status == NOT_UTF_8) {
-            PyObject* InvalidStringData = _error("InvalidStringData");
-            if (InvalidStringData) {
-                PyErr_SetString(InvalidStringData,
-                                "regex patterns must be valid UTF-8");
-                Py_DECREF(InvalidStringData);
-            }
-            Py_DECREF(encoded_pattern);
-            return 0;
-        } else if (status == HAS_NULL) {
-            PyObject* InvalidDocument = _error("InvalidDocument");
-            if (InvalidDocument) {
-                PyErr_SetString(InvalidDocument,
-                                "regex patterns must not contain the NULL byte");
-                Py_DECREF(InvalidDocument);
-            }
-            Py_DECREF(encoded_pattern);
-            return 0;
-        }
-
-        if (!buffer_write_bytes(buffer, pattern_data, pattern_length + 1)) {
-            Py_DECREF(encoded_pattern);
-            return 0;
-        }
-        Py_DECREF(encoded_pattern);
-
-        flags[0] = 0;
-        /* TODO don't hardcode these */
-        if (int_flags & 2) {
-            STRCAT(flags, FLAGS_SIZE, "i");
-        }
-        if (int_flags & 4) {
-            STRCAT(flags, FLAGS_SIZE, "l");
-        }
-        if (int_flags & 8) {
-            STRCAT(flags, FLAGS_SIZE, "m");
-        }
-        if (int_flags & 16) {
-            STRCAT(flags, FLAGS_SIZE, "s");
-        }
-        if (int_flags & 32) {
-            STRCAT(flags, FLAGS_SIZE, "u");
-        }
-        if (int_flags & 64) {
-            STRCAT(flags, FLAGS_SIZE, "x");
-        }
-        flags_length = (int)strlen(flags) + 1;
-        if (!buffer_write_bytes(buffer, flags, flags_length)) {
-            return 0;
-        }
-        *(buffer_get_buffer(buffer) + type_byte) = 0x0B;
-        return 1;
+        return _write_regex_to_buffer(buffer, type_byte, value);
    }
    
    /* 
@ -1435,7 +1459,8 @@ static PyObject* _cbson_dict_to_bson(PyObject* self, PyObject* args) {

 static PyObject* get_value(PyObject* self, const char* buffer, unsigned* position,
                           unsigned char type, unsigned max, PyObject* as_class,
-                           unsigned char tz_aware, unsigned char uuid_subtype) {
+                           unsigned char tz_aware, unsigned char uuid_subtype,
+                           unsigned char compile_re) {
    struct module_state *state = GETSTATE(self);

    PyObject* value = NULL;
@ -1495,7 +1520,8 @@ static PyObject* get_value(PyObject* self, const char* buffer, unsigned* positio
                goto invalid;
            }
            value = elements_to_dict(self, buffer + *position + 4,
-                                     size - 5, as_class, tz_aware, uuid_subtype);
+                                     size - 5, as_class, tz_aware, uuid_subtype,
+                                     compile_re);
            if (!value) {
                return NULL;
            }
@ -1587,7 +1613,8 @@ static PyObject* get_value(PyObject* self, const char* buffer, unsigned* positio
                }
                to_append = get_value(self, buffer, position, bson_type,
                                      max - (unsigned)key_size,
-                                      as_class, tz_aware, uuid_subtype);
+                                      as_class, tz_aware, uuid_subtype,
+                                      compile_re);
                Py_LeaveRecursiveCall();
                if (!to_append) {
                    Py_DECREF(value);
@ -1850,7 +1877,18 @@ static PyObject* get_value(PyObject* self, const char* buffer, unsigned* positio
                }
            }
            *position += (unsigned)flags_length + 1;
-            if ((compile_func = _get_object(state->RECompile, "re", "compile"))) {
+
+            /*
+             * Use re.compile() if we're configured to compile regular
+             * expressions, else create an instance of our Regex class.
+             */
+            if (compile_re) {
+                compile_func = _get_object(state->RECompile, "re", "compile");
+            } else {
+                compile_func = _get_object(state->Regex, "bson.regex", "Regex");
+            }
+
+            if (compile_func) {
                value = PyObject_CallFunction(compile_func, "Oi", pattern, flags);
                Py_DECREF(compile_func);
            }
@ -1990,7 +2028,7 @@ static PyObject* get_value(PyObject* self, const char* buffer, unsigned* positio
            }
            scope = elements_to_dict(self, buffer + *position + 4,
                                     scope_size - 5, (PyObject*)&PyDict_Type,
-                                     tz_aware, uuid_subtype);
+                                     tz_aware, uuid_subtype, compile_re);
            if (!scope) {
                Py_DECREF(code);
                return NULL;
@ -2098,7 +2136,8 @@ static PyObject* get_value(PyObject* self, const char* buffer, unsigned* positio
 static PyObject* _elements_to_dict(PyObject* self, const char* string,
                                   unsigned max, PyObject* as_class,
                                   unsigned char tz_aware,
-                                   unsigned char uuid_subtype) {
+                                   unsigned char uuid_subtype,
+                                   unsigned char compile_re) {
    unsigned position = 0;
    PyObject* dict = PyObject_CallObject(as_class, NULL);
    if (!dict) {
@ -2126,7 +2165,8 @@ static PyObject* _elements_to_dict(PyObject* self, const char* string,
        }
        position += (unsigned)name_length + 1;
        value = get_value(self, string, &position, type,
-                          max - position, as_class, tz_aware, uuid_subtype);
+                          max - position, as_class, tz_aware, uuid_subtype,
+                          compile_re);
        if (!value) {
            Py_DECREF(name);
            Py_DECREF(dict);
@ -2143,12 +2183,13 @@ static PyObject* _elements_to_dict(PyObject* self, const char* string,
 static PyObject* elements_to_dict(PyObject* self, const char* string,
                                  unsigned max, PyObject* as_class,
                                  unsigned char tz_aware,
-                                  unsigned char uuid_subtype) {
+                                  unsigned char uuid_subtype,
+                                  unsigned char compile_re) {
    PyObject* result;
    if (Py_EnterRecursiveCall(" while decoding a BSON document"))
        return NULL;
    result = _elements_to_dict(self, string, max,
-                               as_class, tz_aware, uuid_subtype);
+                               as_class, tz_aware, uuid_subtype, compile_re);
    Py_LeaveRecursiveCall();
    return result;
 }
@ -2161,11 +2202,14 @@ static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) {
    PyObject* as_class;
    unsigned char tz_aware;
    unsigned char uuid_subtype;
+    unsigned char compile_re;
+
    PyObject* dict;
    PyObject* remainder;
    PyObject* result;

-    if (!PyArg_ParseTuple(args, "OObb", &bson, &as_class, &tz_aware, &uuid_subtype)) {
+    if (!PyArg_ParseTuple(
+            args, "OObbb", &bson, &as_class, &tz_aware, &uuid_subtype, &compile_re)) {
        return NULL;
    }

@ -2231,7 +2275,7 @@ static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) {
    }

    dict = elements_to_dict(self, string + 4, (unsigned)size - 5,
-                            as_class, tz_aware, uuid_subtype);
+                            as_class, tz_aware, uuid_subtype, compile_re);
    if (!dict) {
        return NULL;
    }
@ -2260,8 +2304,11 @@ static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) {
    PyObject* as_class = (PyObject*)&PyDict_Type;
    unsigned char tz_aware = 1;
    unsigned char uuid_subtype = 3;
+    unsigned char compile_re;

-    if (!PyArg_ParseTuple(args, "O|Obb", &bson, &as_class, &tz_aware, &uuid_subtype)) {
+    if (!PyArg_ParseTuple(
+            args, "O|Obbb",
+            &bson, &as_class, &tz_aware, &uuid_subtype, &compile_re)) {
        return NULL;
    }

@ -2332,7 +2379,7 @@ static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) {
        }

        dict = elements_to_dict(self, string + 4, (unsigned)size - 5,
-                                as_class, tz_aware, uuid_subtype);
+                                as_class, tz_aware, uuid_subtype, compile_re);
        if (!dict) {
            Py_DECREF(result);
            return NULL;
@ -2364,6 +2411,7 @@ static int _cbson_traverse(PyObject *m, visitproc visit, void *arg) {
    Py_VISIT(GETSTATE(m)->ObjectId);
    Py_VISIT(GETSTATE(m)->DBRef);
    Py_VISIT(GETSTATE(m)->RECompile);
+    Py_VISIT(GETSTATE(m)->Regex);
    Py_VISIT(GETSTATE(m)->UUID);
    Py_VISIT(GETSTATE(m)->Timestamp);
    Py_VISIT(GETSTATE(m)->MinKey);
@ -2379,6 +2427,7 @@ static int _cbson_clear(PyObject *m) {
    Py_CLEAR(GETSTATE(m)->ObjectId);
    Py_CLEAR(GETSTATE(m)->DBRef);
    Py_CLEAR(GETSTATE(m)->RECompile);
+    Py_CLEAR(GETSTATE(m)->Regex);
    Py_CLEAR(GETSTATE(m)->UUID);
    Py_CLEAR(GETSTATE(m)->Timestamp);
    Py_CLEAR(GETSTATE(m)->MinKey);
--- a/bson/json_util.py
+++ b/bson/json_util.py
@ -90,6 +90,7 @@ from bson.dbref import DBRef
 from bson.max_key import MaxKey
 from bson.min_key import MinKey
 from bson.objectid import ObjectId
+from bson.regex import Regex
 from bson.timestamp import Timestamp

 from bson.py3compat import PY3, binary_type, string_types
@ -120,10 +121,20 @@ def loads(s, *args, **kwargs):
    """Helper function that wraps :class:`json.loads`.

    Automatically passes the object_hook for BSON type conversion.
+
+    :Parameters:
+      - `compile_re` (optional): if ``False``, don't attempt to compile
+      BSON regular expressions into Python regular expressions. Return
+      instances of :class:`~bson.bsonregex.BSONRegex` instead.
+
+    .. versionchanged:: 2.7
+       Added ``compile_re`` option.
    """
    if not json_lib:
        raise Exception("No json library available")
-    kwargs['object_hook'] = object_hook
+
+    compile_re = kwargs.pop('compile_re', True)
+    kwargs['object_hook'] = lambda dct: object_hook(dct, compile_re)
    return json.loads(s, *args, **kwargs)


@ -141,7 +152,7 @@ def _json_convert(obj):
        return obj


-def object_hook(dct):
+def object_hook(dct, compile_re=True):
    if "$oid" in dct:
        return ObjectId(str(dct["$oid"]))
    if "$ref" in dct:
@ -154,7 +165,11 @@ def object_hook(dct):
        # PyMongo always adds $options but some other tools may not.
        for opt in dct.get("$options", ""):
            flags |= _RE_OPT_TABLE.get(opt, 0)
-        return re.compile(dct["$regex"], flags)
+
+        if compile_re:
+            return re.compile(dct["$regex"], flags)
+        else:
+            return Regex(dct["$regex"], flags)
    if "$minKey" in dct:
        return MinKey()
    if "$maxKey" in dct:
--- a/bson/regex.py
+++ b/bson/regex.py
@ -0,0 +1,83 @@
+# Copyright 2013 MongoDB, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tools for representing MongoDB regular expressions.
+"""
+
+import re
+
+from bson.py3compat import string_types
+
+
+def str_flags_to_int(str_flags):
+    flags = 0
+    if "i" in str_flags:
+        flags |= re.IGNORECASE
+    if "l" in str_flags:
+        flags |= re.LOCALE
+    if "m" in str_flags:
+        flags |= re.MULTILINE
+    if "s" in str_flags:
+        flags |= re.DOTALL
+    if "u" in str_flags:
+        flags |= re.UNICODE
+    if "x" in str_flags:
+        flags |= re.VERBOSE
+
+    return flags
+
+
+class Regex(object):
+    """BSON regular expression data."""
+    _type_marker = 11
+
+    def __init__(self, pattern, flags=0):
+        """BSON regular expression data.
+
+        This class is useful to store and retrieve regular expressions that are
+        incompatible with Python's regular expression dialect.
+
+        :Parameters:
+          - `pattern`: string
+          - `flags`: (optional) an integer bitmask, or a string of flag
+            characters like "im" for IGNORECASE and MULTILINE
+        """
+        if not isinstance(pattern, string_types):
+            raise TypeError("pattern must be a string, not %s" % type(pattern))
+        self.pattern = pattern
+
+        if isinstance(flags, string_types):
+            self.flags = str_flags_to_int(flags)
+        elif isinstance(flags, int):
+            self.flags = flags
+        else:
+            raise TypeError(
+                "flags must be a string or int, not %s" % type(flags))
+
+    def __eq__(self, other):
+        if isinstance(other, Regex):
+            return self.pattern == self.pattern and self.flags == other.flags
+        else:
+            return NotImplemented
+
+    def __ne__(self, other):
+        return not self == other
+
+    def __repr__(self):
+        return "Regex(%r, %r)" % (self.pattern, self.flags)
+
+    def compile(self):
+        """Compile this ``Regex`` as a Python regular expression.
+        """
+        return re.compile(self.pattern, self.flags)
--- a/bson/son.py
+++ b/bson/son.py
@ -35,29 +35,29 @@ class SON(dict):

    The mapping from Python types to BSON types is as follows:

-    ===================================  =============  ===================
-    Python Type                          BSON Type      Supported Direction
-    ===================================  =============  ===================
-    None                                 null           both
-    bool                                 boolean        both
-    int [#int]_                          int32 / int64  py -> bson
-    long                                 int64          both
-    float                                number (real)  both
-    string                               string         py -> bson
-    unicode                              string         both
-    list                                 array          both
-    dict / `SON`                         object         both
-    datetime.datetime [#dt]_ [#dt2]_     date           both
-    compiled re                          regex          both
-    `bson.binary.Binary`                 binary         both
-    `bson.objectid.ObjectId`             oid            both
-    `bson.dbref.DBRef`                   dbref          both
-    None                                 undefined      bson -> py
-    unicode                              code           bson -> py
-    `bson.code.Code`                     code           py -> bson
-    unicode                              symbol         bson -> py
-    bytes (Python 3) [#bytes]_           binary         both
-    ===================================  =============  ===================
+    =======================================  =============  ===================
+    Python Type                              BSON Type      Supported Direction
+    =======================================  =============  ===================
+    None                                     null           both
+    bool                                     boolean        both
+    int [#int]_                              int32 / int64  py -> bson
+    long                                     int64          both
+    float                                    number (real)  both
+    string                                   string         py -> bson
+    unicode                                  string         both
+    list                                     array          both
+    dict / `SON`                             object         both
+    datetime.datetime [#dt]_ [#dt2]_         date           both
+    `bson.regex.Regex` / compiled re [#re]_  regex          both
+    `bson.binary.Binary`                     binary         both
+    `bson.objectid.ObjectId`                 oid            both
+    `bson.dbref.DBRef`                       dbref          both
+    None                                     undefined      bson -> py
+    unicode                                  code           bson -> py
+    `bson.code.Code`                         code           py -> bson
+    unicode                                  symbol         bson -> py
+    bytes (Python 3) [#bytes]_               binary         both
+    =======================================  =============  ===================

    Note that to save binary data it must be wrapped as an instance of
    `bson.binary.Binary`. Otherwise it will be saved as a BSON string
@ -71,6 +71,11 @@ class SON(dict):
       millisecond when saved
    .. [#dt2] all datetime.datetime instances are treated as *naive*. clients
       should always use UTC.
+    .. [#re] :class:`~bson.regex.Regex` instances and regular expression
+       objects from ``re.compile()`` are both saved as BSON regular expressions.
+       BSON regular expressions are decoded as Python regular expressions by
+       default, or as :class:`~bson.regex.Regex` instances if the ``compile_re``
+       option is set to ``False``.
    .. [#bytes] The bytes type from Python 3.x is encoded as BSON binary with
       subtype 0. In Python 3.x it will be decoded back to bytes. In Python 2.x
       it will be decoded to an instance of :class:`~bson.binary.Binary` with
--- a/doc/api/bson/index.rst
+++ b/doc/api/bson/index.rst
@ -11,6 +11,7 @@ Sub-modules:
   :maxdepth: 2

   binary
+   regex
   code
   dbref
   errors
--- a/doc/api/bson/regex.rst
+++ b/doc/api/bson/regex.rst
@ -0,0 +1,7 @@
+:mod:`regex` -- Tools for representing MongoDB regular expressions
+==================================================================
+.. versionadded:: 2.7
+
+.. automodule:: bson.regex
+   :synopsis: Tools for representing MongoDB regular expressions
+   :members:
--- a/doc/api/pymongo/collection.rst
+++ b/doc/api/pymongo/collection.rst
@ -33,7 +33,7 @@
      .. automethod:: update(spec, document[, upsert=False[, manipulate=False[, safe=None[, multi=False[, check_keys=True[, **kwargs]]]]]])
      .. automethod:: remove([spec_or_id=None[, safe=None[, **kwargs]]])
      .. automethod:: drop
-      .. automethod:: find([spec=None[, fields=None[, skip=0[, limit=0[, timeout=True[, snapshot=False[, tailable=False[, sort=None[, max_scan=None[, as_class=None[, slave_okay=False[, await_data=False[, partial=False[, manipulate=True[, read_preference=ReadPreference.PRIMARY[, exhaust=False[,**kwargs]]]]]]]]]]]]]]]]])
+      .. automethod:: find([spec=None[, fields=None[, skip=0[, limit=0[, timeout=True[, snapshot=False[, tailable=False[, sort=None[, max_scan=None[, as_class=None[, slave_okay=False[, await_data=False[, partial=False[, manipulate=True[, read_preference=ReadPreference.PRIMARY[, exhaust=False, [compile_re=True, [,**kwargs]]]]]]]]]]]]]]]]]])
      .. automethod:: find_one([spec_or_id=None[, *args[, **kwargs]]])
      .. automethod:: count
      .. automethod:: create_index
--- a/pymongo/collection.py
+++ b/pymongo/collection.py
@ -690,6 +690,9 @@ class Collection(common.BaseObject):
            the nearest member may accept reads. Default 15 milliseconds.
            **Ignored by mongos** and must be configured on the command line.
            See the localThreshold_ option for more information.
+          - `compile_re` (optional): if ``False``, don't attempt to compile
+            BSON regex objects into Python regexes. Return instances of
+            :class:`~bson.regex.Regex` instead.
          - `exhaust` (optional): If ``True`` create an "exhaust" cursor.
            MongoDB will stream batched results to the client without waiting
            for the client to request each batch, reducing latency.
@ -717,12 +720,15 @@ class Collection(common.BaseObject):
            5. The `network_timeout` option is ignored when using the
            `exhaust` option.

-        .. note:: The `manipulate` parameter may default to False in
-           a future release.
+        .. note:: The `manipulate` and `compile_re` parameters may default to
+           False in future releases.

        .. note:: The `max_scan` parameter requires server
           version **>= 1.5.1**

+        .. versionadded:: 2.7
+           The ``compile_re`` parameter.
+
        .. versionadded:: 2.3
           The `tag_sets` and `secondary_acceptable_latency_ms` parameters.

--- a/pymongo/cursor.py
+++ b/pymongo/cursor.py
@ -69,8 +69,8 @@ class Cursor(object):
                 await_data=False, partial=False, manipulate=True,
                 read_preference=ReadPreference.PRIMARY,
                 tag_sets=[{}], secondary_acceptable_latency_ms=None,
-                 exhaust=False, _must_use_master=False, _uuid_subtype=None,
-                 _first_batch=None, _cursor_id=None,
+                 exhaust=False, compile_re=True, _must_use_master=False,
+                 _uuid_subtype=None, _first_batch=None, _cursor_id=None,
                 **kwargs):
        """Create a new cursor.

@ -152,6 +152,7 @@ class Cursor(object):
        self.__tag_sets = tag_sets
        self.__secondary_acceptable_latency_ms = secondary_acceptable_latency_ms
        self.__tz_aware = collection.database.connection.tz_aware
+        self.__compile_re = compile_re
        self.__must_use_master = _must_use_master
        self.__uuid_subtype = _uuid_subtype or collection.uuid_subtype

@ -225,8 +226,8 @@ class Cursor(object):
                           "batch_size", "max_scan", "as_class", "slave_okay",
                           "manipulate", "read_preference", "tag_sets",
                           "secondary_acceptable_latency_ms",
-                           "must_use_master", "uuid_subtype", "query_flags",
-                           "kwargs")
+                           "must_use_master", "uuid_subtype", "compile_re",
+                           "query_flags", "kwargs")
        data = dict((k, v) for k, v in self.__dict__.iteritems()
                    if k.startswith('_Cursor__') and k[9:] in values_to_clone)
        if deepcopy:
@ -667,6 +668,7 @@ class Cursor(object):
        r = database.command("count", self.__collection.name,
                             allowable_errors=["ns missing"],
                             uuid_subtype=self.__uuid_subtype,
+                             compile_re=self.__compile_re,
                             **command)
        if r.get("errmsg", "") == "ns missing":
            return 0
@ -718,6 +720,7 @@ class Cursor(object):
        return database.command("distinct",
                                self.__collection.name,
                                uuid_subtype=self.__uuid_subtype,
+                                compile_re=self.__compile_re,
                                **options)["values"]

    def explain(self):
@ -829,7 +832,8 @@ class Cursor(object):
            response = helpers._unpack_response(response, self.__id,
                                                self.__as_class,
                                                self.__tz_aware,
-                                                self.__uuid_subtype)
+                                                self.__uuid_subtype,
+                                                self.__compile_re)
        except AutoReconnect:
            # Don't send kill cursors to another server after a "not master"
            # error. It's completely pointless.
--- a/pymongo/database.py
+++ b/pymongo/database.py
@ -273,7 +273,7 @@ class Database(common.BaseObject):

    def command(self, command, value=1,
                check=True, allowable_errors=[],
-                uuid_subtype=OLD_UUID_SUBTYPE, **kwargs):
+                uuid_subtype=OLD_UUID_SUBTYPE, compile_re=True, **kwargs):
        """Issue a MongoDB command.

        Send command `command` to the database and return the
@ -318,6 +318,12 @@ class Database(common.BaseObject):
            in this list will be ignored by error-checking
          - `uuid_subtype` (optional): The BSON binary subtype to use
            for a UUID used in this command.
+          - `compile_re` (optional): if ``False``, don't attempt to compile
+            BSON regular expressions into Python regular expressions. Return
+            instances of :class:`~bson.regex.Regex` instead. Can avoid
+            :exc:`~bson.errors.InvalidBSON` errors when receiving
+            Python-incompatible regular expressions, for example from
+            ``currentOp``
          - `read_preference`: The read preference for this connection.
            See :class:`~pymongo.read_preferences.ReadPreference` for available
            options.
@ -337,6 +343,8 @@ class Database(common.BaseObject):

        .. note:: ``command`` ignores the ``network_timeout`` parameter.

+        .. versionchanged:: 2.7
+           Added ``compile_re`` option.
        .. versionchanged:: 2.3
           Added `tag_sets` and `secondary_acceptable_latency_ms` options.
        .. versionchanged:: 2.2
@ -390,6 +398,7 @@ class Database(common.BaseObject):
        extra_opts['secondary_acceptable_latency_ms'] = kwargs.pop(
            'secondary_acceptable_latency_ms',
            self.secondary_acceptable_latency_ms)
+        extra_opts['compile_re'] = compile_re

        fields = kwargs.get('fields')
        if fields is not None and not isinstance(fields, dict):
--- a/pymongo/helpers.py
+++ b/pymongo/helpers.py
@ -73,7 +73,8 @@ def _index_document(index_list):


 def _unpack_response(response, cursor_id=None, as_class=dict,
-                     tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE):
+                     tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE,
+                     compile_re=True):
    """Unpack a response from the database.

    Check the response for errors and unpack, returning a dictionary
@ -108,7 +109,8 @@ def _unpack_response(response, cursor_id=None, as_class=dict,
    result["starting_from"] = struct.unpack("<i", response[12:16])[0]
    result["number_returned"] = struct.unpack("<i", response[16:20])[0]
    result["data"] = bson.decode_all(response[20:],
-                                     as_class, tz_aware, uuid_subtype)
+                                     as_class, tz_aware, uuid_subtype,
+                                     compile_re)
    assert len(result["data"]) == result["number_returned"]
    return result

--- a/test/test_bson.py
+++ b/test/test_bson.py
@ -32,7 +32,8 @@ from nose.plugins.skip import SkipTest
 import bson
 from bson import (BSON,
                  decode_all,
-                  is_valid)
+                  is_valid,
+                  Regex)
 from bson.binary import Binary, UUIDLegacy
 from bson.code import Code
 from bson.objectid import ObjectId
@ -534,5 +535,42 @@ class TestBSON(unittest.TestCase):
        d = OrderedDict([("one", 1), ("two", 2), ("three", 3), ("four", 4)])
        self.assertEqual(d, BSON.encode(d).decode(as_class=OrderedDict))

+    def test_bson_regex(self):
+        # Invalid Python regex, though valid PCRE.
+        bson_re1 = Regex(r'[\w-\.]')
+        self.assertEqual(r'[\w-\.]', bson_re1.pattern)
+        self.assertEqual(0, bson_re1.flags)
+
+        doc1 = {'r': bson_re1}
+        doc1_bson = b(
+            '\x11\x00\x00\x00'              # document length
+            '\x0br\x00[\\w-\\.]\x00\x00'    # r: regex
+            '\x00')                         # document terminator
+
+        self.assertEqual(doc1_bson, BSON.encode(doc1))
+        self.assertEqual(doc1, BSON(doc1_bson).decode(compile_re=False))
+
+        # Valid Python regex, with flags.
+        re2 = re.compile('.*', re.IGNORECASE | re.MULTILINE | re.UNICODE)
+        bson_re2 = Regex('.*', re.IGNORECASE | re.MULTILINE | re.UNICODE)
+
+        doc2_with_re = {'r': re2}
+        doc2_with_bson_re = {'r': bson_re2}
+        doc2_bson = b(
+            "\x0f\x00\x00\x00"          # document length
+            "\x0br\x00.*\x00imu\x00"    # r: regex
+            "\x00")                     # document terminator
+
+        self.assertEqual(doc2_bson, BSON.encode(doc2_with_re))
+        self.assertEqual(doc2_bson, BSON.encode(doc2_with_bson_re))
+
+        # Built-in re objects don't support ==. Compare pattern and flags.
+        self.assertEqual(re2.pattern, BSON(doc2_bson).decode()['r'].pattern)
+        self.assertEqual(re2.flags, BSON(doc2_bson).decode()['r'].flags)
+
+        self.assertEqual(
+            doc2_with_bson_re, BSON(doc2_bson).decode(compile_re=False))
+
+
 if __name__ == "__main__":
    unittest.main()
--- a/test/test_collection.py
+++ b/test/test_collection.py
@ -29,11 +29,12 @@ from nose.plugins.skip import SkipTest
 sys.path[0:0] = [""]

 from bson.binary import Binary
+from bson.regex import Regex
 from bson.code import Code
 from bson.dbref import DBRef
 from bson.objectid import ObjectId
 from bson.py3compat import b
-from bson.son import SON
+from bson.son import SON, RE_TYPE
 from pymongo import (ASCENDING, DESCENDING, GEO2D,
                     GEOHAYSTACK, GEOSPHERE, HASHED)
 from pymongo import message as message_module
@ -1267,6 +1268,19 @@ class TestCollection(unittest.TestCase):
        self.assertEqual(expected, db.test.aggregate([pipeline]))
        self.assertEqual(expected, db.test.aggregate((pipeline,)))

+    def test_aggregate_with_compile_re(self):
+        if not version.at_least(self.db.connection, (2, 1, 0)):
+            raise SkipTest("The aggregate command requires MongoDB >= 2.1.0")
+
+        db = self.client.pymongo_test
+        db.test.drop()
+        db.test.insert({'r': re.compile('.*')})
+
+        result = db.test.aggregate([])
+        self.assertTrue(isinstance(result['result'][0]['r'], RE_TYPE))
+        result = db.test.aggregate([], compile_re=False)
+        self.assertTrue(isinstance(result['result'][0]['r'], Regex))
+
    def test_aggregation_cursor_validation(self):
        if not version.at_least(self.db.connection, (2, 5, 1)):
            raise SkipTest("Aggregation cursor requires MongoDB >= 2.5.1")
@ -2148,6 +2162,22 @@ class TestCollection(unittest.TestCase):
        self.assertEqual(2, c.find_one(manipulate=True)['foo'])
        c.remove({})

+    def test_compile_re(self):
+        c = self.client.pymongo_test.test
+        c.drop()
+        c.insert({'r': re.compile('.*')})
+
+        # Test find_one with compile_re.
+        self.assertTrue(isinstance(c.find_one()['r'], RE_TYPE))
+        self.assertTrue(isinstance(c.find_one(compile_re=False)['r'], Regex))
+
+        # Test find with compile_re.
+        for doc in c.find():
+            self.assertTrue(isinstance(doc['r'], RE_TYPE))
+
+        for doc in c.find(compile_re=False):
+            self.assertTrue(isinstance(doc['r'], Regex))
+

 if __name__ == "__main__":
    unittest.main()
--- a/test/test_cursor.py
+++ b/test/test_cursor.py
@ -552,6 +552,7 @@ class TestCursor(unittest.TestCase):
                                   await_data=True,
                                   partial=True,
                                   manipulate=False,
+                                   compile_re=False,
                                   fields={'_id': False}).limit(2)
        cursor.add_option(128)

@ -565,6 +566,8 @@ class TestCursor(unittest.TestCase):
                         cursor2._Cursor__slave_okay)
        self.assertEqual(cursor._Cursor__manipulate,
                         cursor2._Cursor__manipulate)
+        self.assertEqual(cursor._Cursor__compile_re,
+                         cursor2._Cursor__compile_re)
        self.assertEqual(cursor._Cursor__query_flags,
                         cursor2._Cursor__query_flags)

--- a/test/test_database.py
+++ b/test/test_database.py
@ -16,6 +16,7 @@

 import datetime
 import os
+import re
 import sys
 import warnings

@ -25,9 +26,10 @@ import unittest
 from nose.plugins.skip import SkipTest

 from bson.code import Code
+from bson.regex import Regex
 from bson.dbref import DBRef
 from bson.objectid import ObjectId
-from bson.son import SON
+from bson.son import SON, RE_TYPE
 from pymongo import (ALL,
                     auth,
                     OFF,
@ -303,6 +305,21 @@ class TestDatabase(unittest.TestCase):
        if not is_mongos(self.client):
            db.command('eval', 'sleep(100)', network_timeout=0.001)

+    def test_command_with_compile_re(self):
+        # Using 'aggregate' as our example command, since it's an easy way to
+        # retrieve a BSON regex from a collection using a command.
+        if not version.at_least(self.client, (2, 1, 0)):
+            raise SkipTest('Need aggregation to test compile_re')
+
+        db = self.client.pymongo_test
+        db.test.drop()
+        db.test.insert({'r': re.compile('.*')})
+
+        result = db.command('aggregate', 'test', pipeline=[])
+        self.assertTrue(isinstance(result['result'][0]['r'], RE_TYPE))
+        result = db.command('aggregate', 'test', pipeline=[], compile_re=False)
+        self.assertTrue(isinstance(result['result'][0]['r'], Regex))
+
    def test_last_status(self):
        db = self.client.pymongo_test

--- a/test/test_json_util.py
+++ b/test/test_json_util.py
@ -32,6 +32,8 @@ from bson.dbref import DBRef
 from bson.max_key import MaxKey
 from bson.min_key import MinKey
 from bson.objectid import ObjectId
+from bson.regex import Regex
+from bson.son import RE_TYPE
 from bson.timestamp import Timestamp
 from bson.tz_util import utc

@ -74,6 +76,22 @@ class TestJsonUtil(unittest.TestCase):
        self.round_trip({"date": datetime.datetime(2009, 12, 9, 15,
                                                   49, 45, 191000, utc)})

+    def test_regex_object_hook(self):
+        import json
+
+        # Extended JSON format regular expression.
+        pat = 'a*b'
+        json_re = '{"$regex": "%s", "$options": "u"}' % pat
+        loaded = json_util.object_hook(json.loads(json_re))
+        self.assertTrue(isinstance(loaded, RE_TYPE))
+        self.assertEqual(pat, loaded.pattern)
+        self.assertEqual(re.U, loaded.flags)
+
+        loaded = json_util.object_hook(json.loads(json_re), compile_re=False)
+        self.assertTrue(isinstance(loaded, Regex))
+        self.assertEqual(pat, loaded.pattern)
+        self.assertEqual(re.U, loaded.flags)
+
    def test_regex(self):
        res = self.round_tripped({"r": re.compile("a*b", re.IGNORECASE)})["r"]
        self.assertEqual("a*b", res.pattern)
@ -95,6 +113,12 @@ class TestJsonUtil(unittest.TestCase):
            expected_flags = re.U
        self.assertEqual(expected_flags, res.flags)

+        self.assertEqual(
+            Regex('.*', 'ilm'),
+            json_util.loads(
+                '{"r": {"$regex": ".*", "$options": "ilm"}}',
+                compile_re=False)['r'])
+
    def test_minkey(self):
        self.round_trip({"m": MinKey()})