From e4cf504559a7b9cb9e7faabf4d54be01e9a3a352 Mon Sep 17 00:00:00 2001
From: "A. Jesse Jiryu Davis" <jesse@10gen.com>
Date: Tue, 6 Aug 2013 18:36:33 -0400
Subject: [PATCH] Option to not compile BSON regexes. PYTHON-500

Add a 'compile_re' parameter to Collection.find, Collection.find_one, and
json_util.loads. If it's False, regular expressions are encoded as a new class,
Regex, instead of passed to re.compile(). This allows PyMongo to handle regular
expressions that don't compile in Python but are valid in other contexts like
MongoDB queries.
---
 bson/__init__.py               | 127 ++++++++------
 bson/_cbsonmodule.c            | 297 +++++++++++++++++++--------------
 bson/json_util.py              |  21 ++-
 bson/regex.py                  |  83 +++++++++
 bson/son.py                    |  51 +++---
 doc/api/bson/index.rst         |   1 +
 doc/api/bson/regex.rst         |   7 +
 doc/api/pymongo/collection.rst |   2 +-
 pymongo/collection.py          |  10 +-
 pymongo/cursor.py              |  14 +-
 pymongo/database.py            |  11 +-
 pymongo/helpers.py             |   6 +-
 test/test_bson.py              |  40 ++++-
 test/test_collection.py        |  32 +++-
 test/test_cursor.py            |   3 +
 test/test_database.py          |  19 ++-
 test/test_json_util.py         |  24 +++
 17 files changed, 530 insertions(+), 218 deletions(-)
 create mode 100644 bson/regex.py
 create mode 100644 doc/api/bson/regex.rst

diff --git a/bson/__init__.py b/bson/__init__.py
index 3ad2cd3f9..a6f993db5 100644
--- a/bson/__init__.py
+++ b/bson/__init__.py
@@ -32,6 +32,7 @@ from bson.max_key import MaxKey
 from bson.min_key import MinKey
 from bson.objectid import ObjectId
 from bson.py3compat import b, binary_type
+from bson.regex import Regex
 from bson.son import SON, RE_TYPE
 from bson.timestamp import Timestamp
 from bson.tz_util import utc
@@ -90,7 +91,8 @@ BSONMAX = b("\x7F") # Max key
 
 
 def _get_int(data, position, as_class=None,
-             tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE, unsigned=False):
+             tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE,
+             compile_re=True, unsigned=False):
     format = unsigned and "I" or "i"
     try:
         value = struct.unpack("<%s" % format, data[position:position + 4])[0]
@@ -132,13 +134,13 @@ def _make_c_string(string, check_null=False):
                                     "UTF-8: %r" % string)
 
 
-def _get_number(data, position, as_class, tz_aware, uuid_subtype):
+def _get_number(data, position, as_class, tz_aware, uuid_subtype, compile_re):
     num = struct.unpack("<d", data[position:position + 8])[0]
     position += 8
     return num, position
 
 
-def _get_string(data, position, as_class, tz_aware, uuid_subtype):
+def _get_string(data, position, as_class, tz_aware, uuid_subtype, compile_re):
     length = struct.unpack("<i", data[position:position + 4])[0]
     if length <= 0 or (len(data) - position - 4) < length:
         raise InvalidBSON("invalid string length")
@@ -148,12 +150,14 @@ def _get_string(data, position, as_class, tz_aware, uuid_subtype):
     return _get_c_string(data, position, length - 1)
 
 
-def _get_object(data, position, as_class, tz_aware, uuid_subtype):
+def _get_object(data, position, as_class, tz_aware, uuid_subtype, compile_re):
     obj_size = struct.unpack("<i", data[position:position + 4])[0]
     if data[position + obj_size - 1:position + obj_size] != ZERO:
         raise InvalidBSON("bad eoo")
     encoded = data[position + 4:position + obj_size - 1]
-    object = _elements_to_dict(encoded, as_class, tz_aware, uuid_subtype)
+    object = _elements_to_dict(
+        encoded, as_class, tz_aware, uuid_subtype, compile_re)
+
     position += obj_size
     if "$ref" in object:
         return (DBRef(object.pop("$ref"), object.pop("$id", None),
@@ -161,9 +165,9 @@ def _get_object(data, position, as_class, tz_aware, uuid_subtype):
     return object, position
 
 
-def _get_array(data, position, as_class, tz_aware, uuid_subtype):
+def _get_array(data, position, as_class, tz_aware, uuid_subtype, compile_re):
     obj, position = _get_object(data, position,
-                                as_class, tz_aware, uuid_subtype)
+                                as_class, tz_aware, uuid_subtype, compile_re)
     result = []
     i = 0
     while True:
@@ -175,7 +179,7 @@ def _get_array(data, position, as_class, tz_aware, uuid_subtype):
     return result, position
 
 
-def _get_binary(data, position, as_class, tz_aware, uuid_subtype):
+def _get_binary(data, position, as_class, tz_aware, uuid_subtype, compile_re):
     length, position = _get_int(data, position)
     subtype = ord(data[position:position + 1])
     position += 1
@@ -207,19 +211,19 @@ def _get_binary(data, position, as_class, tz_aware, uuid_subtype):
 
 
 def _get_oid(data, position, as_class=None,
-             tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE):
+             tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE, compile_re=True):
     value = ObjectId(data[position:position + 12])
     position += 12
     return value, position
 
 
-def _get_boolean(data, position, as_class, tz_aware, uuid_subtype):
+def _get_boolean(data, position, as_class, tz_aware, uuid_subtype, compile_re):
     value = data[position:position + 1] == ONE
     position += 1
     return value, position
 
 
-def _get_date(data, position, as_class, tz_aware, uuid_subtype):
+def _get_date(data, position, as_class, tz_aware, uuid_subtype, compile_re):
     millis = struct.unpack("<q", data[position:position + 8])[0]
     diff = millis % 1000
     seconds = (millis - diff) / 1000
@@ -231,58 +235,51 @@ def _get_date(data, position, as_class, tz_aware, uuid_subtype):
     return dt.replace(microsecond=diff * 1000), position
 
 
-def _get_code(data, position, as_class, tz_aware, uuid_subtype):
+def _get_code(data, position, as_class, tz_aware, uuid_subtype, compile_re):
     code, position = _get_string(data, position,
-                                 as_class, tz_aware, uuid_subtype)
+                                 as_class, tz_aware, uuid_subtype, compile_re)
     return Code(code), position
 
 
-def _get_code_w_scope(data, position, as_class, tz_aware, uuid_subtype):
+def _get_code_w_scope(
+        data, position, as_class, tz_aware, uuid_subtype, compile_re):
     _, position = _get_int(data, position)
     code, position = _get_string(data, position,
-                                 as_class, tz_aware, uuid_subtype)
+                                 as_class, tz_aware, uuid_subtype, compile_re)
     scope, position = _get_object(data, position,
-                                  as_class, tz_aware, uuid_subtype)
+                                  as_class, tz_aware, uuid_subtype, compile_re)
     return Code(code, scope), position
 
 
-def _get_null(data, position, as_class, tz_aware, uuid_subtype):
+def _get_null(data, position, as_class, tz_aware, uuid_subtype, compile_re):
     return None, position
 
 
-def _get_regex(data, position, as_class, tz_aware, uuid_subtype):
+def _get_regex(data, position, as_class, tz_aware, uuid_subtype, compile_re):
     pattern, position = _get_c_string(data, position)
     bson_flags, position = _get_c_string(data, position)
-    flags = 0
-    if "i" in bson_flags:
-        flags |= re.IGNORECASE
-    if "l" in bson_flags:
-        flags |= re.LOCALE
-    if "m" in bson_flags:
-        flags |= re.MULTILINE
-    if "s" in bson_flags:
-        flags |= re.DOTALL
-    if "u" in bson_flags:
-        flags |= re.UNICODE
-    if "x" in bson_flags:
-        flags |= re.VERBOSE
-    return re.compile(pattern, flags), position
+    bson_re = Regex(pattern, bson_flags)
+    if compile_re:
+        return bson_re.compile(), position
+    else:
+        return bson_re, position
 
 
-def _get_ref(data, position, as_class, tz_aware, uuid_subtype):
-    collection, position = _get_string(data, position,
-                                       as_class, tz_aware, uuid_subtype)
+def _get_ref(data, position, as_class, tz_aware, uuid_subtype, compile_re):
+    collection, position = _get_string(data, position, as_class, tz_aware,
+                                       uuid_subtype, compile_re)
     oid, position = _get_oid(data, position)
     return DBRef(collection, oid), position
 
 
-def _get_timestamp(data, position, as_class, tz_aware, uuid_subtype):
+def _get_timestamp(
+        data, position, as_class, tz_aware, uuid_subtype, compile_re):
     inc, position = _get_int(data, position, unsigned=True)
     timestamp, position = _get_int(data, position, unsigned=True)
     return Timestamp(timestamp, inc), position
 
 
-def _get_long(data, position, as_class, tz_aware, uuid_subtype):
+def _get_long(data, position, as_class, tz_aware, uuid_subtype, compile_re):
     # Have to cast to long; on 32-bit unpack may return an int.
     # 2to3 will change long to int. That's fine since long doesn't
     # exist in python3.
@@ -310,30 +307,32 @@ _element_getter = {
     BSONINT: _get_int,  # number_int
     BSONTIM: _get_timestamp,
     BSONLON: _get_long, # Same as _get_int after 2to3 runs.
-    BSONMIN: lambda v, w, x, y, z: (MinKey(), w),
-    BSONMAX: lambda v, w, x, y, z: (MaxKey(), w)}
+    BSONMIN: lambda u, v, w, x, y, z: (MinKey(), v),
+    BSONMAX: lambda u, v, w, x, y, z: (MaxKey(), v)}
 
 
-def _element_to_dict(data, position, as_class, tz_aware, uuid_subtype):
+def _element_to_dict(
+        data, position, as_class, tz_aware, uuid_subtype, compile_re):
     element_type = data[position:position + 1]
     position += 1
     element_name, position = _get_c_string(data, position)
-    value, position = _element_getter[element_type](data, position, as_class,
-                                                    tz_aware, uuid_subtype)
+    value, position = _element_getter[element_type](
+        data, position, as_class, tz_aware, uuid_subtype, compile_re)
+
     return element_name, value, position
 
 
-def _elements_to_dict(data, as_class, tz_aware, uuid_subtype):
+def _elements_to_dict(data, as_class, tz_aware, uuid_subtype, compile_re):
     result = as_class()
     position = 0
     end = len(data) - 1
     while position < end:
-        (key, value, position) = _element_to_dict(data, position, as_class,
-                                                  tz_aware, uuid_subtype)
+        (key, value, position) = _element_to_dict(
+            data, position, as_class, tz_aware, uuid_subtype, compile_re)
         result[key] = value
     return result
 
-def _bson_to_dict(data, as_class, tz_aware, uuid_subtype):
+def _bson_to_dict(data, as_class, tz_aware, uuid_subtype, compile_re):
     obj_size = struct.unpack("<i", data[:4])[0]
     length = len(data)
     if length < obj_size:
@@ -341,8 +340,10 @@ def _bson_to_dict(data, as_class, tz_aware, uuid_subtype):
     if obj_size != length or data[obj_size - 1:obj_size] != ZERO:
         raise InvalidBSON("bad eoo")
     elements = data[4:obj_size - 1]
-    return (_elements_to_dict(elements, as_class,
-                              tz_aware, uuid_subtype), data[obj_size:])
+    dct = _elements_to_dict(
+        elements, as_class, tz_aware, uuid_subtype, compile_re)
+
+    return dct, data[obj_size:]
 if _use_c:
     _bson_to_dict = _cbson._bson_to_dict
 
@@ -444,7 +445,7 @@ def _element_to_bson(key, value, check_keys, uuid_subtype):
         return BSONTIM + name + inc + time
     if value is None:
         return BSONNUL + name
-    if isinstance(value, RE_TYPE):
+    if isinstance(value, (RE_TYPE, Regex)):
         pattern = value.pattern
         flags = ""
         if value.flags & re.IGNORECASE:
@@ -492,7 +493,7 @@ if _use_c:
 
 
 def decode_all(data, as_class=dict,
-               tz_aware=True, uuid_subtype=OLD_UUID_SUBTYPE):
+               tz_aware=True, uuid_subtype=OLD_UUID_SUBTYPE, compile_re=True):
     """Decode BSON data to multiple documents.
 
     `data` must be a string of concatenated, valid, BSON-encoded
@@ -504,7 +505,14 @@ def decode_all(data, as_class=dict,
         documents
       - `tz_aware` (optional): if ``True``, return timezone-aware
         :class:`~datetime.datetime` instances
+      - `compile_re` (optional): if ``False``, don't attempt to compile
+        BSON regular expressions into Python regular expressions. Return
+        instances of :class:`~bson.regex.Regex` instead. Can avoid
+        :exc:`~bson.errors.InvalidBSON` errors when receiving
+        Python-incompatible regular expressions, for example from ``currentOp``
 
+    .. versionchanged:: 2.7
+       Added `compile_re` option.
     .. versionadded:: 1.9
     """
     docs = []
@@ -519,7 +527,7 @@ def decode_all(data, as_class=dict,
         elements = data[position + 4:position + obj_size - 1]
         position += obj_size
         docs.append(_elements_to_dict(elements, as_class,
-                                      tz_aware, uuid_subtype))
+                                      tz_aware, uuid_subtype, compile_re))
     return docs
 if _use_c:
     decode_all = _cbson.decode_all
@@ -540,7 +548,7 @@ def is_valid(bson):
                         "of a subclass of %s" % (binary_type.__name__,))
 
     try:
-        (_, remainder) = _bson_to_dict(bson, dict, True, OLD_UUID_SUBTYPE)
+        (_, remainder) = _bson_to_dict(bson, dict, True, OLD_UUID_SUBTYPE, True)
         return remainder == EMPTY
     except:
         return False
@@ -573,7 +581,7 @@ class BSON(binary_type):
         return cls(_dict_to_bson(document, check_keys, uuid_subtype))
 
     def decode(self, as_class=dict,
-               tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE):
+               tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE, compile_re=True):
         """Decode this BSON data.
 
         The default type to use for the resultant document is
@@ -593,10 +601,21 @@ class BSON(binary_type):
             document
           - `tz_aware` (optional): if ``True``, return timezone-aware
             :class:`~datetime.datetime` instances
+          - `compile_re` (optional): if ``False``, don't attempt to compile
+            BSON regular expressions into Python regular expressions. Return
+            instances of
+            :class:`~bson.regex.Regex` instead. Can avoid
+            :exc:`~bson.errors.InvalidBSON` errors when receiving
+            Python-incompatible regular expressions, for example from
+            ``currentOp``
 
+        .. versionchanged:: 2.7
+           Added ``compile_re`` option.
         .. versionadded:: 1.9
         """
-        (document, _) = _bson_to_dict(self, as_class, tz_aware, uuid_subtype)
+        (document, _) = _bson_to_dict(
+            self, as_class, tz_aware, uuid_subtype, compile_re)
+
         return document
 
 
diff --git a/bson/_cbsonmodule.c b/bson/_cbsonmodule.c
index fe936553b..4e3441271 100644
--- a/bson/_cbsonmodule.c
+++ b/bson/_cbsonmodule.c
@@ -43,6 +43,7 @@ struct module_state {
     PyObject* ObjectId;
     PyObject* DBRef;
     PyObject* RECompile;
+    PyObject* Regex;
     PyObject* UUID;
     PyObject* Timestamp;
     PyObject* MinKey;
@@ -133,7 +134,8 @@ _downcast_and_check(Py_ssize_t size, int extra) {
 static PyObject* elements_to_dict(PyObject* self, const char* string,
                                   unsigned max, PyObject* as_class,
                                   unsigned char tz_aware,
-                                  unsigned char uuid_subtype);
+                                  unsigned char uuid_subtype,
+                                  unsigned char compile_re);
 
 static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
                                     int type_byte, PyObject* value,
@@ -348,7 +350,8 @@ static int _load_python_objects(PyObject* module) {
         _load_object(&state->MinKey, "bson.min_key", "MinKey") ||
         _load_object(&state->MaxKey, "bson.max_key", "MaxKey") ||
         _load_object(&state->UTC, "bson.tz_util", "utc") ||
-        _load_object(&state->RECompile, "re", "compile")) {
+        _load_object(&state->RECompile, "re", "compile") ||
+        _load_object(&state->Regex, "bson.regex", "Regex")) {
         return 1;
     }
     /* If we couldn't import uuid then we must be on 2.4. Just ignore. */
@@ -440,6 +443,130 @@ _set_cannot_encode(PyObject* value) {
     }
 }
 
+/*
+ * Encode a builtin Python regular expression or our custom Regex class.
+ *
+ * Sets exception and returns 0 on failure.
+ */
+static int _write_regex_to_buffer(
+    buffer_t buffer, int type_byte, PyObject* value) {
+
+    struct module_state *state = GETSTATE(self);
+    PyObject* py_flags;
+    PyObject* py_pattern;
+    PyObject* encoded_pattern;
+    long int_flags;
+    char flags[FLAGS_SIZE];
+    char check_utf8 = 0;
+    const char* pattern_data;
+    int pattern_length, flags_length;
+    result_t status;
+
+    /*
+     * Both the builtin re type and our Regex class have attributes
+     * "flags" and "pattern".
+     */
+    py_flags = PyObject_GetAttrString(value, "flags");
+    if (!py_flags) {
+        return 0;
+    }
+#if PY_MAJOR_VERSION >= 3
+    int_flags = PyLong_AsLong(py_flags);
+#else
+    int_flags = PyInt_AsLong(py_flags);
+#endif
+    Py_DECREF(py_flags);
+    py_pattern = PyObject_GetAttrString(value, "pattern");
+    if (!py_pattern) {
+        return 0;
+    }
+
+    if (PyUnicode_Check(py_pattern)) {
+        encoded_pattern = PyUnicode_AsUTF8String(py_pattern);
+        Py_DECREF(py_pattern);
+        if (!encoded_pattern) {
+            return 0;
+        }
+    } else {
+        encoded_pattern = py_pattern;
+        check_utf8 = 1;
+    }
+
+#if PY_MAJOR_VERSION >= 3
+    if (!(pattern_data = PyBytes_AsString(encoded_pattern))) {
+        Py_DECREF(encoded_pattern);
+        return 0;
+    }
+    if ((pattern_length = _downcast_and_check(PyBytes_Size(encoded_pattern), 0)) == -1) {
+        Py_DECREF(encoded_pattern);
+        return 0;
+    }
+#else
+    if (!(pattern_data = PyString_AsString(encoded_pattern))) {
+        Py_DECREF(encoded_pattern);
+        return 0;
+    }
+    if ((pattern_length = _downcast_and_check(PyString_Size(encoded_pattern), 0)) == -1) {
+        Py_DECREF(encoded_pattern);
+        return 0;
+    }
+#endif
+    status = check_string((const unsigned char*)pattern_data,
+                          pattern_length, check_utf8, 1);
+    if (status == NOT_UTF_8) {
+        PyObject* InvalidStringData = _error("InvalidStringData");
+        if (InvalidStringData) {
+            PyErr_SetString(InvalidStringData,
+                            "regex patterns must be valid UTF-8");
+            Py_DECREF(InvalidStringData);
+        }
+        Py_DECREF(encoded_pattern);
+        return 0;
+    } else if (status == HAS_NULL) {
+        PyObject* InvalidDocument = _error("InvalidDocument");
+        if (InvalidDocument) {
+            PyErr_SetString(InvalidDocument,
+                            "regex patterns must not contain the NULL byte");
+            Py_DECREF(InvalidDocument);
+        }
+        Py_DECREF(encoded_pattern);
+        return 0;
+    }
+
+    if (!buffer_write_bytes(buffer, pattern_data, pattern_length + 1)) {
+        Py_DECREF(encoded_pattern);
+        return 0;
+    }
+    Py_DECREF(encoded_pattern);
+
+    flags[0] = 0;
+
+    if (int_flags & 2) {
+        STRCAT(flags, FLAGS_SIZE, "i");
+    }
+    if (int_flags & 4) {
+        STRCAT(flags, FLAGS_SIZE, "l");
+    }
+    if (int_flags & 8) {
+        STRCAT(flags, FLAGS_SIZE, "m");
+    }
+    if (int_flags & 16) {
+        STRCAT(flags, FLAGS_SIZE, "s");
+    }
+    if (int_flags & 32) {
+        STRCAT(flags, FLAGS_SIZE, "u");
+    }
+    if (int_flags & 64) {
+        STRCAT(flags, FLAGS_SIZE, "x");
+    }
+    flags_length = (int)strlen(flags) + 1;
+    if (!buffer_write_bytes(buffer, flags, flags_length)) {
+        return 0;
+    }
+    *(buffer_get_buffer(buffer) + type_byte) = 0x0B;
+    return 1;
+}
+
 /* TODO our platform better be little-endian w/ 4-byte ints! */
 /* Write a single value to the buffer (also write its type_byte, for which
  * space has already been reserved.
@@ -574,6 +701,11 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
                 *(buffer_get_buffer(buffer) + type_byte) = 0x07;
                 return 1;
             }
+        case 11:
+            {
+                /* Regex */
+                return _write_regex_to_buffer(buffer, type_byte, value);
+            }
         case 13:
             {
                 /* Code */
@@ -890,115 +1022,7 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
         *(buffer_get_buffer(buffer) + type_byte) = 0x09;
         return buffer_write_bytes(buffer, (const char*)&millis, 8);
     } else if (PyObject_TypeCheck(value, state->REType)) {
-        PyObject* py_flags;
-        PyObject* py_pattern;
-        PyObject* encoded_pattern;
-        long int_flags;
-        char flags[FLAGS_SIZE];
-        char check_utf8 = 0;
-        const char* pattern_data;
-        int pattern_length, flags_length;
-        result_t status;
-
-        py_flags = PyObject_GetAttrString(value, "flags");
-        if (!py_flags) {
-            return 0;
-        }
-#if PY_MAJOR_VERSION >= 3
-        int_flags = PyLong_AsLong(py_flags);
-#else
-        int_flags = PyInt_AsLong(py_flags);
-#endif
-        Py_DECREF(py_flags);
-        py_pattern = PyObject_GetAttrString(value, "pattern");
-        if (!py_pattern) {
-            return 0;
-        }
-
-        if (PyUnicode_Check(py_pattern)) {
-            encoded_pattern = PyUnicode_AsUTF8String(py_pattern);
-            Py_DECREF(py_pattern);
-            if (!encoded_pattern) {
-                return 0;
-            }
-        } else {
-            encoded_pattern = py_pattern;
-            check_utf8 = 1;
-        }
-
-#if PY_MAJOR_VERSION >= 3
-        if (!(pattern_data = PyBytes_AsString(encoded_pattern))) {
-            Py_DECREF(encoded_pattern);
-            return 0;
-        }
-        if ((pattern_length = _downcast_and_check(PyBytes_Size(encoded_pattern), 0)) == -1) {
-            Py_DECREF(encoded_pattern);
-            return 0;
-        }
-#else
-        if (!(pattern_data = PyString_AsString(encoded_pattern))) {
-            Py_DECREF(encoded_pattern);
-            return 0;
-        }
-        if ((pattern_length = _downcast_and_check(PyString_Size(encoded_pattern), 0)) == -1) {
-            Py_DECREF(encoded_pattern);
-            return 0;
-        }
-#endif
-        status = check_string((const unsigned char*)pattern_data,
-                              pattern_length, check_utf8, 1);
-        if (status == NOT_UTF_8) {
-            PyObject* InvalidStringData = _error("InvalidStringData");
-            if (InvalidStringData) {
-                PyErr_SetString(InvalidStringData,
-                                "regex patterns must be valid UTF-8");
-                Py_DECREF(InvalidStringData);
-            }
-            Py_DECREF(encoded_pattern);
-            return 0;
-        } else if (status == HAS_NULL) {
-            PyObject* InvalidDocument = _error("InvalidDocument");
-            if (InvalidDocument) {
-                PyErr_SetString(InvalidDocument,
-                                "regex patterns must not contain the NULL byte");
-                Py_DECREF(InvalidDocument);
-            }
-            Py_DECREF(encoded_pattern);
-            return 0;
-        }
-
-        if (!buffer_write_bytes(buffer, pattern_data, pattern_length + 1)) {
-            Py_DECREF(encoded_pattern);
-            return 0;
-        }
-        Py_DECREF(encoded_pattern);
-
-        flags[0] = 0;
-        /* TODO don't hardcode these */
-        if (int_flags & 2) {
-            STRCAT(flags, FLAGS_SIZE, "i");
-        }
-        if (int_flags & 4) {
-            STRCAT(flags, FLAGS_SIZE, "l");
-        }
-        if (int_flags & 8) {
-            STRCAT(flags, FLAGS_SIZE, "m");
-        }
-        if (int_flags & 16) {
-            STRCAT(flags, FLAGS_SIZE, "s");
-        }
-        if (int_flags & 32) {
-            STRCAT(flags, FLAGS_SIZE, "u");
-        }
-        if (int_flags & 64) {
-            STRCAT(flags, FLAGS_SIZE, "x");
-        }
-        flags_length = (int)strlen(flags) + 1;
-        if (!buffer_write_bytes(buffer, flags, flags_length)) {
-            return 0;
-        }
-        *(buffer_get_buffer(buffer) + type_byte) = 0x0B;
-        return 1;
+        return _write_regex_to_buffer(buffer, type_byte, value);
     }
     
     /* 
@@ -1435,7 +1459,8 @@ static PyObject* _cbson_dict_to_bson(PyObject* self, PyObject* args) {
 
 static PyObject* get_value(PyObject* self, const char* buffer, unsigned* position,
                            unsigned char type, unsigned max, PyObject* as_class,
-                           unsigned char tz_aware, unsigned char uuid_subtype) {
+                           unsigned char tz_aware, unsigned char uuid_subtype,
+                           unsigned char compile_re) {
     struct module_state *state = GETSTATE(self);
 
     PyObject* value = NULL;
@@ -1495,7 +1520,8 @@ static PyObject* get_value(PyObject* self, const char* buffer, unsigned* positio
                 goto invalid;
             }
             value = elements_to_dict(self, buffer + *position + 4,
-                                     size - 5, as_class, tz_aware, uuid_subtype);
+                                     size - 5, as_class, tz_aware, uuid_subtype,
+                                     compile_re);
             if (!value) {
                 return NULL;
             }
@@ -1587,7 +1613,8 @@ static PyObject* get_value(PyObject* self, const char* buffer, unsigned* positio
                 }
                 to_append = get_value(self, buffer, position, bson_type,
                                       max - (unsigned)key_size,
-                                      as_class, tz_aware, uuid_subtype);
+                                      as_class, tz_aware, uuid_subtype,
+                                      compile_re);
                 Py_LeaveRecursiveCall();
                 if (!to_append) {
                     Py_DECREF(value);
@@ -1850,7 +1877,18 @@ static PyObject* get_value(PyObject* self, const char* buffer, unsigned* positio
                 }
             }
             *position += (unsigned)flags_length + 1;
-            if ((compile_func = _get_object(state->RECompile, "re", "compile"))) {
+
+            /*
+             * Use re.compile() if we're configured to compile regular
+             * expressions, else create an instance of our Regex class.
+             */
+            if (compile_re) {
+                compile_func = _get_object(state->RECompile, "re", "compile");
+            } else {
+                compile_func = _get_object(state->Regex, "bson.regex", "Regex");
+            }
+
+            if (compile_func) {
                 value = PyObject_CallFunction(compile_func, "Oi", pattern, flags);
                 Py_DECREF(compile_func);
             }
@@ -1990,7 +2028,7 @@ static PyObject* get_value(PyObject* self, const char* buffer, unsigned* positio
             }
             scope = elements_to_dict(self, buffer + *position + 4,
                                      scope_size - 5, (PyObject*)&PyDict_Type,
-                                     tz_aware, uuid_subtype);
+                                     tz_aware, uuid_subtype, compile_re);
             if (!scope) {
                 Py_DECREF(code);
                 return NULL;
@@ -2098,7 +2136,8 @@ static PyObject* get_value(PyObject* self, const char* buffer, unsigned* positio
 static PyObject* _elements_to_dict(PyObject* self, const char* string,
                                    unsigned max, PyObject* as_class,
                                    unsigned char tz_aware,
-                                   unsigned char uuid_subtype) {
+                                   unsigned char uuid_subtype,
+                                   unsigned char compile_re) {
     unsigned position = 0;
     PyObject* dict = PyObject_CallObject(as_class, NULL);
     if (!dict) {
@@ -2126,7 +2165,8 @@ static PyObject* _elements_to_dict(PyObject* self, const char* string,
         }
         position += (unsigned)name_length + 1;
         value = get_value(self, string, &position, type,
-                          max - position, as_class, tz_aware, uuid_subtype);
+                          max - position, as_class, tz_aware, uuid_subtype,
+                          compile_re);
         if (!value) {
             Py_DECREF(name);
             Py_DECREF(dict);
@@ -2143,12 +2183,13 @@ static PyObject* _elements_to_dict(PyObject* self, const char* string,
 static PyObject* elements_to_dict(PyObject* self, const char* string,
                                   unsigned max, PyObject* as_class,
                                   unsigned char tz_aware,
-                                  unsigned char uuid_subtype) {
+                                  unsigned char uuid_subtype,
+                                  unsigned char compile_re) {
     PyObject* result;
     if (Py_EnterRecursiveCall(" while decoding a BSON document"))
         return NULL;
     result = _elements_to_dict(self, string, max,
-                               as_class, tz_aware, uuid_subtype);
+                               as_class, tz_aware, uuid_subtype, compile_re);
     Py_LeaveRecursiveCall();
     return result;
 }
@@ -2161,11 +2202,14 @@ static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) {
     PyObject* as_class;
     unsigned char tz_aware;
     unsigned char uuid_subtype;
+    unsigned char compile_re;
+
     PyObject* dict;
     PyObject* remainder;
     PyObject* result;
 
-    if (!PyArg_ParseTuple(args, "OObb", &bson, &as_class, &tz_aware, &uuid_subtype)) {
+    if (!PyArg_ParseTuple(
+            args, "OObbb", &bson, &as_class, &tz_aware, &uuid_subtype, &compile_re)) {
         return NULL;
     }
 
@@ -2231,7 +2275,7 @@ static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) {
     }
 
     dict = elements_to_dict(self, string + 4, (unsigned)size - 5,
-                            as_class, tz_aware, uuid_subtype);
+                            as_class, tz_aware, uuid_subtype, compile_re);
     if (!dict) {
         return NULL;
     }
@@ -2260,8 +2304,11 @@ static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) {
     PyObject* as_class = (PyObject*)&PyDict_Type;
     unsigned char tz_aware = 1;
     unsigned char uuid_subtype = 3;
+    unsigned char compile_re;
 
-    if (!PyArg_ParseTuple(args, "O|Obb", &bson, &as_class, &tz_aware, &uuid_subtype)) {
+    if (!PyArg_ParseTuple(
+            args, "O|Obbb",
+            &bson, &as_class, &tz_aware, &uuid_subtype, &compile_re)) {
         return NULL;
     }
 
@@ -2332,7 +2379,7 @@ static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) {
         }
 
         dict = elements_to_dict(self, string + 4, (unsigned)size - 5,
-                                as_class, tz_aware, uuid_subtype);
+                                as_class, tz_aware, uuid_subtype, compile_re);
         if (!dict) {
             Py_DECREF(result);
             return NULL;
@@ -2364,6 +2411,7 @@ static int _cbson_traverse(PyObject *m, visitproc visit, void *arg) {
     Py_VISIT(GETSTATE(m)->ObjectId);
     Py_VISIT(GETSTATE(m)->DBRef);
     Py_VISIT(GETSTATE(m)->RECompile);
+    Py_VISIT(GETSTATE(m)->Regex);
     Py_VISIT(GETSTATE(m)->UUID);
     Py_VISIT(GETSTATE(m)->Timestamp);
     Py_VISIT(GETSTATE(m)->MinKey);
@@ -2379,6 +2427,7 @@ static int _cbson_clear(PyObject *m) {
     Py_CLEAR(GETSTATE(m)->ObjectId);
     Py_CLEAR(GETSTATE(m)->DBRef);
     Py_CLEAR(GETSTATE(m)->RECompile);
+    Py_CLEAR(GETSTATE(m)->Regex);
     Py_CLEAR(GETSTATE(m)->UUID);
     Py_CLEAR(GETSTATE(m)->Timestamp);
     Py_CLEAR(GETSTATE(m)->MinKey);
diff --git a/bson/json_util.py b/bson/json_util.py
index e5056d895..9439c4b8d 100644
--- a/bson/json_util.py
+++ b/bson/json_util.py
@@ -90,6 +90,7 @@ from bson.dbref import DBRef
 from bson.max_key import MaxKey
 from bson.min_key import MinKey
 from bson.objectid import ObjectId
+from bson.regex import Regex
 from bson.timestamp import Timestamp
 
 from bson.py3compat import PY3, binary_type, string_types
@@ -120,10 +121,20 @@ def loads(s, *args, **kwargs):
     """Helper function that wraps :class:`json.loads`.
 
     Automatically passes the object_hook for BSON type conversion.
+
+    :Parameters:
+      - `compile_re` (optional): if ``False``, don't attempt to compile
+      BSON regular expressions into Python regular expressions. Return
+      instances of :class:`~bson.bsonregex.BSONRegex` instead.
+
+    .. versionchanged:: 2.7
+       Added ``compile_re`` option.
     """
     if not json_lib:
         raise Exception("No json library available")
-    kwargs['object_hook'] = object_hook
+
+    compile_re = kwargs.pop('compile_re', True)
+    kwargs['object_hook'] = lambda dct: object_hook(dct, compile_re)
     return json.loads(s, *args, **kwargs)
 
 
@@ -141,7 +152,7 @@ def _json_convert(obj):
         return obj
 
 
-def object_hook(dct):
+def object_hook(dct, compile_re=True):
     if "$oid" in dct:
         return ObjectId(str(dct["$oid"]))
     if "$ref" in dct:
@@ -154,7 +165,11 @@ def object_hook(dct):
         # PyMongo always adds $options but some other tools may not.
         for opt in dct.get("$options", ""):
             flags |= _RE_OPT_TABLE.get(opt, 0)
-        return re.compile(dct["$regex"], flags)
+
+        if compile_re:
+            return re.compile(dct["$regex"], flags)
+        else:
+            return Regex(dct["$regex"], flags)
     if "$minKey" in dct:
         return MinKey()
     if "$maxKey" in dct:
diff --git a/bson/regex.py b/bson/regex.py
new file mode 100644
index 000000000..f790ddc3c
--- /dev/null
+++ b/bson/regex.py
@@ -0,0 +1,83 @@
+# Copyright 2013 MongoDB, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tools for representing MongoDB regular expressions.
+"""
+
+import re
+
+from bson.py3compat import string_types
+
+
+def str_flags_to_int(str_flags):
+    flags = 0
+    if "i" in str_flags:
+        flags |= re.IGNORECASE
+    if "l" in str_flags:
+        flags |= re.LOCALE
+    if "m" in str_flags:
+        flags |= re.MULTILINE
+    if "s" in str_flags:
+        flags |= re.DOTALL
+    if "u" in str_flags:
+        flags |= re.UNICODE
+    if "x" in str_flags:
+        flags |= re.VERBOSE
+
+    return flags
+
+
+class Regex(object):
+    """BSON regular expression data."""
+    _type_marker = 11
+
+    def __init__(self, pattern, flags=0):
+        """BSON regular expression data.
+
+        This class is useful to store and retrieve regular expressions that are
+        incompatible with Python's regular expression dialect.
+
+        :Parameters:
+          - `pattern`: string
+          - `flags`: (optional) an integer bitmask, or a string of flag
+            characters like "im" for IGNORECASE and MULTILINE
+        """
+        if not isinstance(pattern, string_types):
+            raise TypeError("pattern must be a string, not %s" % type(pattern))
+        self.pattern = pattern
+
+        if isinstance(flags, string_types):
+            self.flags = str_flags_to_int(flags)
+        elif isinstance(flags, int):
+            self.flags = flags
+        else:
+            raise TypeError(
+                "flags must be a string or int, not %s" % type(flags))
+
+    def __eq__(self, other):
+        if isinstance(other, Regex):
+            return self.pattern == self.pattern and self.flags == other.flags
+        else:
+            return NotImplemented
+
+    def __ne__(self, other):
+        return not self == other
+
+    def __repr__(self):
+        return "Regex(%r, %r)" % (self.pattern, self.flags)
+
+    def compile(self):
+        """Compile this ``Regex`` as a Python regular expression.
+        """
+        return re.compile(self.pattern, self.flags)
diff --git a/bson/son.py b/bson/son.py
index 7390e30db..29ada2ffb 100644
--- a/bson/son.py
+++ b/bson/son.py
@@ -35,29 +35,29 @@ class SON(dict):
 
     The mapping from Python types to BSON types is as follows:
 
-    ===================================  =============  ===================
-    Python Type                          BSON Type      Supported Direction
-    ===================================  =============  ===================
-    None                                 null           both
-    bool                                 boolean        both
-    int [#int]_                          int32 / int64  py -> bson
-    long                                 int64          both
-    float                                number (real)  both
-    string                               string         py -> bson
-    unicode                              string         both
-    list                                 array          both
-    dict / `SON`                         object         both
-    datetime.datetime [#dt]_ [#dt2]_     date           both
-    compiled re                          regex          both
-    `bson.binary.Binary`                 binary         both
-    `bson.objectid.ObjectId`             oid            both
-    `bson.dbref.DBRef`                   dbref          both
-    None                                 undefined      bson -> py
-    unicode                              code           bson -> py
-    `bson.code.Code`                     code           py -> bson
-    unicode                              symbol         bson -> py
-    bytes (Python 3) [#bytes]_           binary         both
-    ===================================  =============  ===================
+    =======================================  =============  ===================
+    Python Type                              BSON Type      Supported Direction
+    =======================================  =============  ===================
+    None                                     null           both
+    bool                                     boolean        both
+    int [#int]_                              int32 / int64  py -> bson
+    long                                     int64          both
+    float                                    number (real)  both
+    string                                   string         py -> bson
+    unicode                                  string         both
+    list                                     array          both
+    dict / `SON`                             object         both
+    datetime.datetime [#dt]_ [#dt2]_         date           both
+    `bson.regex.Regex` / compiled re [#re]_  regex          both
+    `bson.binary.Binary`                     binary         both
+    `bson.objectid.ObjectId`                 oid            both
+    `bson.dbref.DBRef`                       dbref          both
+    None                                     undefined      bson -> py
+    unicode                                  code           bson -> py
+    `bson.code.Code`                         code           py -> bson
+    unicode                                  symbol         bson -> py
+    bytes (Python 3) [#bytes]_               binary         both
+    =======================================  =============  ===================
 
     Note that to save binary data it must be wrapped as an instance of
     `bson.binary.Binary`. Otherwise it will be saved as a BSON string
@@ -71,6 +71,11 @@ class SON(dict):
        millisecond when saved
     .. [#dt2] all datetime.datetime instances are treated as *naive*. clients
        should always use UTC.
+    .. [#re] :class:`~bson.regex.Regex` instances and regular expression
+       objects from ``re.compile()`` are both saved as BSON regular expressions.
+       BSON regular expressions are decoded as Python regular expressions by
+       default, or as :class:`~bson.regex.Regex` instances if the ``compile_re``
+       option is set to ``False``.
     .. [#bytes] The bytes type from Python 3.x is encoded as BSON binary with
        subtype 0. In Python 3.x it will be decoded back to bytes. In Python 2.x
        it will be decoded to an instance of :class:`~bson.binary.Binary` with
diff --git a/doc/api/bson/index.rst b/doc/api/bson/index.rst
index 745dc0dd6..8b8a90504 100644
--- a/doc/api/bson/index.rst
+++ b/doc/api/bson/index.rst
@@ -11,6 +11,7 @@ Sub-modules:
    :maxdepth: 2
 
    binary
+   regex
    code
    dbref
    errors
diff --git a/doc/api/bson/regex.rst b/doc/api/bson/regex.rst
new file mode 100644
index 000000000..3a5603774
--- /dev/null
+++ b/doc/api/bson/regex.rst
@@ -0,0 +1,7 @@
+:mod:`regex` -- Tools for representing MongoDB regular expressions
+==================================================================
+.. versionadded:: 2.7
+
+.. automodule:: bson.regex
+   :synopsis: Tools for representing MongoDB regular expressions
+   :members:
diff --git a/doc/api/pymongo/collection.rst b/doc/api/pymongo/collection.rst
index 3288d8916..ea6c915b3 100644
--- a/doc/api/pymongo/collection.rst
+++ b/doc/api/pymongo/collection.rst
@@ -33,7 +33,7 @@
       .. automethod:: update(spec, document[, upsert=False[, manipulate=False[, safe=None[, multi=False[, check_keys=True[, **kwargs]]]]]])
       .. automethod:: remove([spec_or_id=None[, safe=None[, **kwargs]]])
       .. automethod:: drop
-      .. automethod:: find([spec=None[, fields=None[, skip=0[, limit=0[, timeout=True[, snapshot=False[, tailable=False[, sort=None[, max_scan=None[, as_class=None[, slave_okay=False[, await_data=False[, partial=False[, manipulate=True[, read_preference=ReadPreference.PRIMARY[, exhaust=False[,**kwargs]]]]]]]]]]]]]]]]])
+      .. automethod:: find([spec=None[, fields=None[, skip=0[, limit=0[, timeout=True[, snapshot=False[, tailable=False[, sort=None[, max_scan=None[, as_class=None[, slave_okay=False[, await_data=False[, partial=False[, manipulate=True[, read_preference=ReadPreference.PRIMARY[, exhaust=False, [compile_re=True, [,**kwargs]]]]]]]]]]]]]]]]]])
       .. automethod:: find_one([spec_or_id=None[, *args[, **kwargs]]])
       .. automethod:: count
       .. automethod:: create_index
diff --git a/pymongo/collection.py b/pymongo/collection.py
index 466095bf5..ab87c4682 100644
--- a/pymongo/collection.py
+++ b/pymongo/collection.py
@@ -690,6 +690,9 @@ class Collection(common.BaseObject):
             the nearest member may accept reads. Default 15 milliseconds.
             **Ignored by mongos** and must be configured on the command line.
             See the localThreshold_ option for more information.
+          - `compile_re` (optional): if ``False``, don't attempt to compile
+            BSON regex objects into Python regexes. Return instances of
+            :class:`~bson.regex.Regex` instead.
           - `exhaust` (optional): If ``True`` create an "exhaust" cursor.
             MongoDB will stream batched results to the client without waiting
             for the client to request each batch, reducing latency.
@@ -717,12 +720,15 @@ class Collection(common.BaseObject):
             5. The `network_timeout` option is ignored when using the
             `exhaust` option.
 
-        .. note:: The `manipulate` parameter may default to False in
-           a future release.
+        .. note:: The `manipulate` and `compile_re` parameters may default to
+           False in future releases.
 
         .. note:: The `max_scan` parameter requires server
            version **>= 1.5.1**
 
+        .. versionadded:: 2.7
+           The ``compile_re`` parameter.
+
         .. versionadded:: 2.3
            The `tag_sets` and `secondary_acceptable_latency_ms` parameters.
 
diff --git a/pymongo/cursor.py b/pymongo/cursor.py
index de999af06..7e145bc1b 100644
--- a/pymongo/cursor.py
+++ b/pymongo/cursor.py
@@ -69,8 +69,8 @@ class Cursor(object):
                  await_data=False, partial=False, manipulate=True,
                  read_preference=ReadPreference.PRIMARY,
                  tag_sets=[{}], secondary_acceptable_latency_ms=None,
-                 exhaust=False, _must_use_master=False, _uuid_subtype=None,
-                 _first_batch=None, _cursor_id=None,
+                 exhaust=False, compile_re=True, _must_use_master=False,
+                 _uuid_subtype=None, _first_batch=None, _cursor_id=None,
                  **kwargs):
         """Create a new cursor.
 
@@ -152,6 +152,7 @@ class Cursor(object):
         self.__tag_sets = tag_sets
         self.__secondary_acceptable_latency_ms = secondary_acceptable_latency_ms
         self.__tz_aware = collection.database.connection.tz_aware
+        self.__compile_re = compile_re
         self.__must_use_master = _must_use_master
         self.__uuid_subtype = _uuid_subtype or collection.uuid_subtype
 
@@ -225,8 +226,8 @@ class Cursor(object):
                            "batch_size", "max_scan", "as_class", "slave_okay",
                            "manipulate", "read_preference", "tag_sets",
                            "secondary_acceptable_latency_ms",
-                           "must_use_master", "uuid_subtype", "query_flags",
-                           "kwargs")
+                           "must_use_master", "uuid_subtype", "compile_re",
+                           "query_flags", "kwargs")
         data = dict((k, v) for k, v in self.__dict__.iteritems()
                     if k.startswith('_Cursor__') and k[9:] in values_to_clone)
         if deepcopy:
@@ -667,6 +668,7 @@ class Cursor(object):
         r = database.command("count", self.__collection.name,
                              allowable_errors=["ns missing"],
                              uuid_subtype=self.__uuid_subtype,
+                             compile_re=self.__compile_re,
                              **command)
         if r.get("errmsg", "") == "ns missing":
             return 0
@@ -718,6 +720,7 @@ class Cursor(object):
         return database.command("distinct",
                                 self.__collection.name,
                                 uuid_subtype=self.__uuid_subtype,
+                                compile_re=self.__compile_re,
                                 **options)["values"]
 
     def explain(self):
@@ -829,7 +832,8 @@ class Cursor(object):
             response = helpers._unpack_response(response, self.__id,
                                                 self.__as_class,
                                                 self.__tz_aware,
-                                                self.__uuid_subtype)
+                                                self.__uuid_subtype,
+                                                self.__compile_re)
         except AutoReconnect:
             # Don't send kill cursors to another server after a "not master"
             # error. It's completely pointless.
diff --git a/pymongo/database.py b/pymongo/database.py
index ae26dee4c..5832f1d09 100644
--- a/pymongo/database.py
+++ b/pymongo/database.py
@@ -273,7 +273,7 @@ class Database(common.BaseObject):
 
     def command(self, command, value=1,
                 check=True, allowable_errors=[],
-                uuid_subtype=OLD_UUID_SUBTYPE, **kwargs):
+                uuid_subtype=OLD_UUID_SUBTYPE, compile_re=True, **kwargs):
         """Issue a MongoDB command.
 
         Send command `command` to the database and return the
@@ -318,6 +318,12 @@ class Database(common.BaseObject):
             in this list will be ignored by error-checking
           - `uuid_subtype` (optional): The BSON binary subtype to use
             for a UUID used in this command.
+          - `compile_re` (optional): if ``False``, don't attempt to compile
+            BSON regular expressions into Python regular expressions. Return
+            instances of :class:`~bson.regex.Regex` instead. Can avoid
+            :exc:`~bson.errors.InvalidBSON` errors when receiving
+            Python-incompatible regular expressions, for example from
+            ``currentOp``
           - `read_preference`: The read preference for this connection.
             See :class:`~pymongo.read_preferences.ReadPreference` for available
             options.
@@ -337,6 +343,8 @@ class Database(common.BaseObject):
 
         .. note:: ``command`` ignores the ``network_timeout`` parameter.
 
+        .. versionchanged:: 2.7
+           Added ``compile_re`` option.
         .. versionchanged:: 2.3
            Added `tag_sets` and `secondary_acceptable_latency_ms` options.
         .. versionchanged:: 2.2
@@ -390,6 +398,7 @@ class Database(common.BaseObject):
         extra_opts['secondary_acceptable_latency_ms'] = kwargs.pop(
             'secondary_acceptable_latency_ms',
             self.secondary_acceptable_latency_ms)
+        extra_opts['compile_re'] = compile_re
 
         fields = kwargs.get('fields')
         if fields is not None and not isinstance(fields, dict):
diff --git a/pymongo/helpers.py b/pymongo/helpers.py
index 38d88b904..904c5913b 100644
--- a/pymongo/helpers.py
+++ b/pymongo/helpers.py
@@ -73,7 +73,8 @@ def _index_document(index_list):
 
 
 def _unpack_response(response, cursor_id=None, as_class=dict,
-                     tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE):
+                     tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE,
+                     compile_re=True):
     """Unpack a response from the database.
 
     Check the response for errors and unpack, returning a dictionary
@@ -108,7 +109,8 @@ def _unpack_response(response, cursor_id=None, as_class=dict,
     result["starting_from"] = struct.unpack("<i", response[12:16])[0]
     result["number_returned"] = struct.unpack("<i", response[16:20])[0]
     result["data"] = bson.decode_all(response[20:],
-                                     as_class, tz_aware, uuid_subtype)
+                                     as_class, tz_aware, uuid_subtype,
+                                     compile_re)
     assert len(result["data"]) == result["number_returned"]
     return result
 
diff --git a/test/test_bson.py b/test/test_bson.py
index 392f6f541..0b8ac1b40 100644
--- a/test/test_bson.py
+++ b/test/test_bson.py
@@ -32,7 +32,8 @@ from nose.plugins.skip import SkipTest
 import bson
 from bson import (BSON,
                   decode_all,
-                  is_valid)
+                  is_valid,
+                  Regex)
 from bson.binary import Binary, UUIDLegacy
 from bson.code import Code
 from bson.objectid import ObjectId
@@ -534,5 +535,42 @@ class TestBSON(unittest.TestCase):
         d = OrderedDict([("one", 1), ("two", 2), ("three", 3), ("four", 4)])
         self.assertEqual(d, BSON.encode(d).decode(as_class=OrderedDict))
 
+    def test_bson_regex(self):
+        # Invalid Python regex, though valid PCRE.
+        bson_re1 = Regex(r'[\w-\.]')
+        self.assertEqual(r'[\w-\.]', bson_re1.pattern)
+        self.assertEqual(0, bson_re1.flags)
+
+        doc1 = {'r': bson_re1}
+        doc1_bson = b(
+            '\x11\x00\x00\x00'              # document length
+            '\x0br\x00[\\w-\\.]\x00\x00'    # r: regex
+            '\x00')                         # document terminator
+
+        self.assertEqual(doc1_bson, BSON.encode(doc1))
+        self.assertEqual(doc1, BSON(doc1_bson).decode(compile_re=False))
+
+        # Valid Python regex, with flags.
+        re2 = re.compile('.*', re.IGNORECASE | re.MULTILINE | re.UNICODE)
+        bson_re2 = Regex('.*', re.IGNORECASE | re.MULTILINE | re.UNICODE)
+
+        doc2_with_re = {'r': re2}
+        doc2_with_bson_re = {'r': bson_re2}
+        doc2_bson = b(
+            "\x0f\x00\x00\x00"          # document length
+            "\x0br\x00.*\x00imu\x00"    # r: regex
+            "\x00")                     # document terminator
+
+        self.assertEqual(doc2_bson, BSON.encode(doc2_with_re))
+        self.assertEqual(doc2_bson, BSON.encode(doc2_with_bson_re))
+
+        # Built-in re objects don't support ==. Compare pattern and flags.
+        self.assertEqual(re2.pattern, BSON(doc2_bson).decode()['r'].pattern)
+        self.assertEqual(re2.flags, BSON(doc2_bson).decode()['r'].flags)
+
+        self.assertEqual(
+            doc2_with_bson_re, BSON(doc2_bson).decode(compile_re=False))
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/test/test_collection.py b/test/test_collection.py
index caa4016cc..d9717eac4 100644
--- a/test/test_collection.py
+++ b/test/test_collection.py
@@ -29,11 +29,12 @@ from nose.plugins.skip import SkipTest
 sys.path[0:0] = [""]
 
 from bson.binary import Binary
+from bson.regex import Regex
 from bson.code import Code
 from bson.dbref import DBRef
 from bson.objectid import ObjectId
 from bson.py3compat import b
-from bson.son import SON
+from bson.son import SON, RE_TYPE
 from pymongo import (ASCENDING, DESCENDING, GEO2D,
                      GEOHAYSTACK, GEOSPHERE, HASHED)
 from pymongo import message as message_module
@@ -1267,6 +1268,19 @@ class TestCollection(unittest.TestCase):
         self.assertEqual(expected, db.test.aggregate([pipeline]))
         self.assertEqual(expected, db.test.aggregate((pipeline,)))
 
+    def test_aggregate_with_compile_re(self):
+        if not version.at_least(self.db.connection, (2, 1, 0)):
+            raise SkipTest("The aggregate command requires MongoDB >= 2.1.0")
+
+        db = self.client.pymongo_test
+        db.test.drop()
+        db.test.insert({'r': re.compile('.*')})
+
+        result = db.test.aggregate([])
+        self.assertTrue(isinstance(result['result'][0]['r'], RE_TYPE))
+        result = db.test.aggregate([], compile_re=False)
+        self.assertTrue(isinstance(result['result'][0]['r'], Regex))
+
     def test_aggregation_cursor_validation(self):
         if not version.at_least(self.db.connection, (2, 5, 1)):
             raise SkipTest("Aggregation cursor requires MongoDB >= 2.5.1")
@@ -2148,6 +2162,22 @@ class TestCollection(unittest.TestCase):
         self.assertEqual(2, c.find_one(manipulate=True)['foo'])
         c.remove({})
 
+    def test_compile_re(self):
+        c = self.client.pymongo_test.test
+        c.drop()
+        c.insert({'r': re.compile('.*')})
+
+        # Test find_one with compile_re.
+        self.assertTrue(isinstance(c.find_one()['r'], RE_TYPE))
+        self.assertTrue(isinstance(c.find_one(compile_re=False)['r'], Regex))
+
+        # Test find with compile_re.
+        for doc in c.find():
+            self.assertTrue(isinstance(doc['r'], RE_TYPE))
+
+        for doc in c.find(compile_re=False):
+            self.assertTrue(isinstance(doc['r'], Regex))
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/test/test_cursor.py b/test/test_cursor.py
index f02b326c3..8e9901ccf 100644
--- a/test/test_cursor.py
+++ b/test/test_cursor.py
@@ -552,6 +552,7 @@ class TestCursor(unittest.TestCase):
                                    await_data=True,
                                    partial=True,
                                    manipulate=False,
+                                   compile_re=False,
                                    fields={'_id': False}).limit(2)
         cursor.add_option(128)
 
@@ -565,6 +566,8 @@ class TestCursor(unittest.TestCase):
                          cursor2._Cursor__slave_okay)
         self.assertEqual(cursor._Cursor__manipulate,
                          cursor2._Cursor__manipulate)
+        self.assertEqual(cursor._Cursor__compile_re,
+                         cursor2._Cursor__compile_re)
         self.assertEqual(cursor._Cursor__query_flags,
                          cursor2._Cursor__query_flags)
 
diff --git a/test/test_database.py b/test/test_database.py
index b3fdf9bda..d69cc6738 100644
--- a/test/test_database.py
+++ b/test/test_database.py
@@ -16,6 +16,7 @@
 
 import datetime
 import os
+import re
 import sys
 import warnings
 
@@ -25,9 +26,10 @@ import unittest
 from nose.plugins.skip import SkipTest
 
 from bson.code import Code
+from bson.regex import Regex
 from bson.dbref import DBRef
 from bson.objectid import ObjectId
-from bson.son import SON
+from bson.son import SON, RE_TYPE
 from pymongo import (ALL,
                      auth,
                      OFF,
@@ -303,6 +305,21 @@ class TestDatabase(unittest.TestCase):
         if not is_mongos(self.client):
             db.command('eval', 'sleep(100)', network_timeout=0.001)
 
+    def test_command_with_compile_re(self):
+        # Using 'aggregate' as our example command, since it's an easy way to
+        # retrieve a BSON regex from a collection using a command.
+        if not version.at_least(self.client, (2, 1, 0)):
+            raise SkipTest('Need aggregation to test compile_re')
+
+        db = self.client.pymongo_test
+        db.test.drop()
+        db.test.insert({'r': re.compile('.*')})
+
+        result = db.command('aggregate', 'test', pipeline=[])
+        self.assertTrue(isinstance(result['result'][0]['r'], RE_TYPE))
+        result = db.command('aggregate', 'test', pipeline=[], compile_re=False)
+        self.assertTrue(isinstance(result['result'][0]['r'], Regex))
+
     def test_last_status(self):
         db = self.client.pymongo_test
 
diff --git a/test/test_json_util.py b/test/test_json_util.py
index 6a21ba154..cc299e28b 100644
--- a/test/test_json_util.py
+++ b/test/test_json_util.py
@@ -32,6 +32,8 @@ from bson.dbref import DBRef
 from bson.max_key import MaxKey
 from bson.min_key import MinKey
 from bson.objectid import ObjectId
+from bson.regex import Regex
+from bson.son import RE_TYPE
 from bson.timestamp import Timestamp
 from bson.tz_util import utc
 
@@ -74,6 +76,22 @@ class TestJsonUtil(unittest.TestCase):
         self.round_trip({"date": datetime.datetime(2009, 12, 9, 15,
                                                    49, 45, 191000, utc)})
 
+    def test_regex_object_hook(self):
+        import json
+
+        # Extended JSON format regular expression.
+        pat = 'a*b'
+        json_re = '{"$regex": "%s", "$options": "u"}' % pat
+        loaded = json_util.object_hook(json.loads(json_re))
+        self.assertTrue(isinstance(loaded, RE_TYPE))
+        self.assertEqual(pat, loaded.pattern)
+        self.assertEqual(re.U, loaded.flags)
+
+        loaded = json_util.object_hook(json.loads(json_re), compile_re=False)
+        self.assertTrue(isinstance(loaded, Regex))
+        self.assertEqual(pat, loaded.pattern)
+        self.assertEqual(re.U, loaded.flags)
+
     def test_regex(self):
         res = self.round_tripped({"r": re.compile("a*b", re.IGNORECASE)})["r"]
         self.assertEqual("a*b", res.pattern)
@@ -95,6 +113,12 @@ class TestJsonUtil(unittest.TestCase):
             expected_flags = re.U
         self.assertEqual(expected_flags, res.flags)
 
+        self.assertEqual(
+            Regex('.*', 'ilm'),
+            json_util.loads(
+                '{"r": {"$regex": ".*", "$options": "ilm"}}',
+                compile_re=False)['r'])
+
     def test_minkey(self):
         self.round_trip({"m": MinKey()})