Option to not compile BSON regexes. PYTHON-500
Add a 'compile_re' parameter to Collection.find, Collection.find_one, and json_util.loads. If it's False, regular expressions are encoded as a new class, Regex, instead of passed to re.compile(). This allows PyMongo to handle regular expressions that don't compile in Python but are valid in other contexts like MongoDB queries.
This commit is contained in:
parent
f0a419e5ff
commit
e4cf504559
127
bson/__init__.py
127
bson/__init__.py
@ -32,6 +32,7 @@ from bson.max_key import MaxKey
|
||||
from bson.min_key import MinKey
|
||||
from bson.objectid import ObjectId
|
||||
from bson.py3compat import b, binary_type
|
||||
from bson.regex import Regex
|
||||
from bson.son import SON, RE_TYPE
|
||||
from bson.timestamp import Timestamp
|
||||
from bson.tz_util import utc
|
||||
@ -90,7 +91,8 @@ BSONMAX = b("\x7F") # Max key
|
||||
|
||||
|
||||
def _get_int(data, position, as_class=None,
|
||||
tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE, unsigned=False):
|
||||
tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE,
|
||||
compile_re=True, unsigned=False):
|
||||
format = unsigned and "I" or "i"
|
||||
try:
|
||||
value = struct.unpack("<%s" % format, data[position:position + 4])[0]
|
||||
@ -132,13 +134,13 @@ def _make_c_string(string, check_null=False):
|
||||
"UTF-8: %r" % string)
|
||||
|
||||
|
||||
def _get_number(data, position, as_class, tz_aware, uuid_subtype):
|
||||
def _get_number(data, position, as_class, tz_aware, uuid_subtype, compile_re):
|
||||
num = struct.unpack("<d", data[position:position + 8])[0]
|
||||
position += 8
|
||||
return num, position
|
||||
|
||||
|
||||
def _get_string(data, position, as_class, tz_aware, uuid_subtype):
|
||||
def _get_string(data, position, as_class, tz_aware, uuid_subtype, compile_re):
|
||||
length = struct.unpack("<i", data[position:position + 4])[0]
|
||||
if length <= 0 or (len(data) - position - 4) < length:
|
||||
raise InvalidBSON("invalid string length")
|
||||
@ -148,12 +150,14 @@ def _get_string(data, position, as_class, tz_aware, uuid_subtype):
|
||||
return _get_c_string(data, position, length - 1)
|
||||
|
||||
|
||||
def _get_object(data, position, as_class, tz_aware, uuid_subtype):
|
||||
def _get_object(data, position, as_class, tz_aware, uuid_subtype, compile_re):
|
||||
obj_size = struct.unpack("<i", data[position:position + 4])[0]
|
||||
if data[position + obj_size - 1:position + obj_size] != ZERO:
|
||||
raise InvalidBSON("bad eoo")
|
||||
encoded = data[position + 4:position + obj_size - 1]
|
||||
object = _elements_to_dict(encoded, as_class, tz_aware, uuid_subtype)
|
||||
object = _elements_to_dict(
|
||||
encoded, as_class, tz_aware, uuid_subtype, compile_re)
|
||||
|
||||
position += obj_size
|
||||
if "$ref" in object:
|
||||
return (DBRef(object.pop("$ref"), object.pop("$id", None),
|
||||
@ -161,9 +165,9 @@ def _get_object(data, position, as_class, tz_aware, uuid_subtype):
|
||||
return object, position
|
||||
|
||||
|
||||
def _get_array(data, position, as_class, tz_aware, uuid_subtype):
|
||||
def _get_array(data, position, as_class, tz_aware, uuid_subtype, compile_re):
|
||||
obj, position = _get_object(data, position,
|
||||
as_class, tz_aware, uuid_subtype)
|
||||
as_class, tz_aware, uuid_subtype, compile_re)
|
||||
result = []
|
||||
i = 0
|
||||
while True:
|
||||
@ -175,7 +179,7 @@ def _get_array(data, position, as_class, tz_aware, uuid_subtype):
|
||||
return result, position
|
||||
|
||||
|
||||
def _get_binary(data, position, as_class, tz_aware, uuid_subtype):
|
||||
def _get_binary(data, position, as_class, tz_aware, uuid_subtype, compile_re):
|
||||
length, position = _get_int(data, position)
|
||||
subtype = ord(data[position:position + 1])
|
||||
position += 1
|
||||
@ -207,19 +211,19 @@ def _get_binary(data, position, as_class, tz_aware, uuid_subtype):
|
||||
|
||||
|
||||
def _get_oid(data, position, as_class=None,
|
||||
tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE):
|
||||
tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE, compile_re=True):
|
||||
value = ObjectId(data[position:position + 12])
|
||||
position += 12
|
||||
return value, position
|
||||
|
||||
|
||||
def _get_boolean(data, position, as_class, tz_aware, uuid_subtype):
|
||||
def _get_boolean(data, position, as_class, tz_aware, uuid_subtype, compile_re):
|
||||
value = data[position:position + 1] == ONE
|
||||
position += 1
|
||||
return value, position
|
||||
|
||||
|
||||
def _get_date(data, position, as_class, tz_aware, uuid_subtype):
|
||||
def _get_date(data, position, as_class, tz_aware, uuid_subtype, compile_re):
|
||||
millis = struct.unpack("<q", data[position:position + 8])[0]
|
||||
diff = millis % 1000
|
||||
seconds = (millis - diff) / 1000
|
||||
@ -231,58 +235,51 @@ def _get_date(data, position, as_class, tz_aware, uuid_subtype):
|
||||
return dt.replace(microsecond=diff * 1000), position
|
||||
|
||||
|
||||
def _get_code(data, position, as_class, tz_aware, uuid_subtype):
|
||||
def _get_code(data, position, as_class, tz_aware, uuid_subtype, compile_re):
|
||||
code, position = _get_string(data, position,
|
||||
as_class, tz_aware, uuid_subtype)
|
||||
as_class, tz_aware, uuid_subtype, compile_re)
|
||||
return Code(code), position
|
||||
|
||||
|
||||
def _get_code_w_scope(data, position, as_class, tz_aware, uuid_subtype):
|
||||
def _get_code_w_scope(
|
||||
data, position, as_class, tz_aware, uuid_subtype, compile_re):
|
||||
_, position = _get_int(data, position)
|
||||
code, position = _get_string(data, position,
|
||||
as_class, tz_aware, uuid_subtype)
|
||||
as_class, tz_aware, uuid_subtype, compile_re)
|
||||
scope, position = _get_object(data, position,
|
||||
as_class, tz_aware, uuid_subtype)
|
||||
as_class, tz_aware, uuid_subtype, compile_re)
|
||||
return Code(code, scope), position
|
||||
|
||||
|
||||
def _get_null(data, position, as_class, tz_aware, uuid_subtype):
|
||||
def _get_null(data, position, as_class, tz_aware, uuid_subtype, compile_re):
|
||||
return None, position
|
||||
|
||||
|
||||
def _get_regex(data, position, as_class, tz_aware, uuid_subtype):
|
||||
def _get_regex(data, position, as_class, tz_aware, uuid_subtype, compile_re):
|
||||
pattern, position = _get_c_string(data, position)
|
||||
bson_flags, position = _get_c_string(data, position)
|
||||
flags = 0
|
||||
if "i" in bson_flags:
|
||||
flags |= re.IGNORECASE
|
||||
if "l" in bson_flags:
|
||||
flags |= re.LOCALE
|
||||
if "m" in bson_flags:
|
||||
flags |= re.MULTILINE
|
||||
if "s" in bson_flags:
|
||||
flags |= re.DOTALL
|
||||
if "u" in bson_flags:
|
||||
flags |= re.UNICODE
|
||||
if "x" in bson_flags:
|
||||
flags |= re.VERBOSE
|
||||
return re.compile(pattern, flags), position
|
||||
bson_re = Regex(pattern, bson_flags)
|
||||
if compile_re:
|
||||
return bson_re.compile(), position
|
||||
else:
|
||||
return bson_re, position
|
||||
|
||||
|
||||
def _get_ref(data, position, as_class, tz_aware, uuid_subtype):
|
||||
collection, position = _get_string(data, position,
|
||||
as_class, tz_aware, uuid_subtype)
|
||||
def _get_ref(data, position, as_class, tz_aware, uuid_subtype, compile_re):
|
||||
collection, position = _get_string(data, position, as_class, tz_aware,
|
||||
uuid_subtype, compile_re)
|
||||
oid, position = _get_oid(data, position)
|
||||
return DBRef(collection, oid), position
|
||||
|
||||
|
||||
def _get_timestamp(data, position, as_class, tz_aware, uuid_subtype):
|
||||
def _get_timestamp(
|
||||
data, position, as_class, tz_aware, uuid_subtype, compile_re):
|
||||
inc, position = _get_int(data, position, unsigned=True)
|
||||
timestamp, position = _get_int(data, position, unsigned=True)
|
||||
return Timestamp(timestamp, inc), position
|
||||
|
||||
|
||||
def _get_long(data, position, as_class, tz_aware, uuid_subtype):
|
||||
def _get_long(data, position, as_class, tz_aware, uuid_subtype, compile_re):
|
||||
# Have to cast to long; on 32-bit unpack may return an int.
|
||||
# 2to3 will change long to int. That's fine since long doesn't
|
||||
# exist in python3.
|
||||
@ -310,30 +307,32 @@ _element_getter = {
|
||||
BSONINT: _get_int, # number_int
|
||||
BSONTIM: _get_timestamp,
|
||||
BSONLON: _get_long, # Same as _get_int after 2to3 runs.
|
||||
BSONMIN: lambda v, w, x, y, z: (MinKey(), w),
|
||||
BSONMAX: lambda v, w, x, y, z: (MaxKey(), w)}
|
||||
BSONMIN: lambda u, v, w, x, y, z: (MinKey(), v),
|
||||
BSONMAX: lambda u, v, w, x, y, z: (MaxKey(), v)}
|
||||
|
||||
|
||||
def _element_to_dict(data, position, as_class, tz_aware, uuid_subtype):
|
||||
def _element_to_dict(
|
||||
data, position, as_class, tz_aware, uuid_subtype, compile_re):
|
||||
element_type = data[position:position + 1]
|
||||
position += 1
|
||||
element_name, position = _get_c_string(data, position)
|
||||
value, position = _element_getter[element_type](data, position, as_class,
|
||||
tz_aware, uuid_subtype)
|
||||
value, position = _element_getter[element_type](
|
||||
data, position, as_class, tz_aware, uuid_subtype, compile_re)
|
||||
|
||||
return element_name, value, position
|
||||
|
||||
|
||||
def _elements_to_dict(data, as_class, tz_aware, uuid_subtype):
|
||||
def _elements_to_dict(data, as_class, tz_aware, uuid_subtype, compile_re):
|
||||
result = as_class()
|
||||
position = 0
|
||||
end = len(data) - 1
|
||||
while position < end:
|
||||
(key, value, position) = _element_to_dict(data, position, as_class,
|
||||
tz_aware, uuid_subtype)
|
||||
(key, value, position) = _element_to_dict(
|
||||
data, position, as_class, tz_aware, uuid_subtype, compile_re)
|
||||
result[key] = value
|
||||
return result
|
||||
|
||||
def _bson_to_dict(data, as_class, tz_aware, uuid_subtype):
|
||||
def _bson_to_dict(data, as_class, tz_aware, uuid_subtype, compile_re):
|
||||
obj_size = struct.unpack("<i", data[:4])[0]
|
||||
length = len(data)
|
||||
if length < obj_size:
|
||||
@ -341,8 +340,10 @@ def _bson_to_dict(data, as_class, tz_aware, uuid_subtype):
|
||||
if obj_size != length or data[obj_size - 1:obj_size] != ZERO:
|
||||
raise InvalidBSON("bad eoo")
|
||||
elements = data[4:obj_size - 1]
|
||||
return (_elements_to_dict(elements, as_class,
|
||||
tz_aware, uuid_subtype), data[obj_size:])
|
||||
dct = _elements_to_dict(
|
||||
elements, as_class, tz_aware, uuid_subtype, compile_re)
|
||||
|
||||
return dct, data[obj_size:]
|
||||
if _use_c:
|
||||
_bson_to_dict = _cbson._bson_to_dict
|
||||
|
||||
@ -444,7 +445,7 @@ def _element_to_bson(key, value, check_keys, uuid_subtype):
|
||||
return BSONTIM + name + inc + time
|
||||
if value is None:
|
||||
return BSONNUL + name
|
||||
if isinstance(value, RE_TYPE):
|
||||
if isinstance(value, (RE_TYPE, Regex)):
|
||||
pattern = value.pattern
|
||||
flags = ""
|
||||
if value.flags & re.IGNORECASE:
|
||||
@ -492,7 +493,7 @@ if _use_c:
|
||||
|
||||
|
||||
def decode_all(data, as_class=dict,
|
||||
tz_aware=True, uuid_subtype=OLD_UUID_SUBTYPE):
|
||||
tz_aware=True, uuid_subtype=OLD_UUID_SUBTYPE, compile_re=True):
|
||||
"""Decode BSON data to multiple documents.
|
||||
|
||||
`data` must be a string of concatenated, valid, BSON-encoded
|
||||
@ -504,7 +505,14 @@ def decode_all(data, as_class=dict,
|
||||
documents
|
||||
- `tz_aware` (optional): if ``True``, return timezone-aware
|
||||
:class:`~datetime.datetime` instances
|
||||
- `compile_re` (optional): if ``False``, don't attempt to compile
|
||||
BSON regular expressions into Python regular expressions. Return
|
||||
instances of :class:`~bson.regex.Regex` instead. Can avoid
|
||||
:exc:`~bson.errors.InvalidBSON` errors when receiving
|
||||
Python-incompatible regular expressions, for example from ``currentOp``
|
||||
|
||||
.. versionchanged:: 2.7
|
||||
Added `compile_re` option.
|
||||
.. versionadded:: 1.9
|
||||
"""
|
||||
docs = []
|
||||
@ -519,7 +527,7 @@ def decode_all(data, as_class=dict,
|
||||
elements = data[position + 4:position + obj_size - 1]
|
||||
position += obj_size
|
||||
docs.append(_elements_to_dict(elements, as_class,
|
||||
tz_aware, uuid_subtype))
|
||||
tz_aware, uuid_subtype, compile_re))
|
||||
return docs
|
||||
if _use_c:
|
||||
decode_all = _cbson.decode_all
|
||||
@ -540,7 +548,7 @@ def is_valid(bson):
|
||||
"of a subclass of %s" % (binary_type.__name__,))
|
||||
|
||||
try:
|
||||
(_, remainder) = _bson_to_dict(bson, dict, True, OLD_UUID_SUBTYPE)
|
||||
(_, remainder) = _bson_to_dict(bson, dict, True, OLD_UUID_SUBTYPE, True)
|
||||
return remainder == EMPTY
|
||||
except:
|
||||
return False
|
||||
@ -573,7 +581,7 @@ class BSON(binary_type):
|
||||
return cls(_dict_to_bson(document, check_keys, uuid_subtype))
|
||||
|
||||
def decode(self, as_class=dict,
|
||||
tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE):
|
||||
tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE, compile_re=True):
|
||||
"""Decode this BSON data.
|
||||
|
||||
The default type to use for the resultant document is
|
||||
@ -593,10 +601,21 @@ class BSON(binary_type):
|
||||
document
|
||||
- `tz_aware` (optional): if ``True``, return timezone-aware
|
||||
:class:`~datetime.datetime` instances
|
||||
- `compile_re` (optional): if ``False``, don't attempt to compile
|
||||
BSON regular expressions into Python regular expressions. Return
|
||||
instances of
|
||||
:class:`~bson.regex.Regex` instead. Can avoid
|
||||
:exc:`~bson.errors.InvalidBSON` errors when receiving
|
||||
Python-incompatible regular expressions, for example from
|
||||
``currentOp``
|
||||
|
||||
.. versionchanged:: 2.7
|
||||
Added ``compile_re`` option.
|
||||
.. versionadded:: 1.9
|
||||
"""
|
||||
(document, _) = _bson_to_dict(self, as_class, tz_aware, uuid_subtype)
|
||||
(document, _) = _bson_to_dict(
|
||||
self, as_class, tz_aware, uuid_subtype, compile_re)
|
||||
|
||||
return document
|
||||
|
||||
|
||||
|
||||
@ -43,6 +43,7 @@ struct module_state {
|
||||
PyObject* ObjectId;
|
||||
PyObject* DBRef;
|
||||
PyObject* RECompile;
|
||||
PyObject* Regex;
|
||||
PyObject* UUID;
|
||||
PyObject* Timestamp;
|
||||
PyObject* MinKey;
|
||||
@ -133,7 +134,8 @@ _downcast_and_check(Py_ssize_t size, int extra) {
|
||||
static PyObject* elements_to_dict(PyObject* self, const char* string,
|
||||
unsigned max, PyObject* as_class,
|
||||
unsigned char tz_aware,
|
||||
unsigned char uuid_subtype);
|
||||
unsigned char uuid_subtype,
|
||||
unsigned char compile_re);
|
||||
|
||||
static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
|
||||
int type_byte, PyObject* value,
|
||||
@ -348,7 +350,8 @@ static int _load_python_objects(PyObject* module) {
|
||||
_load_object(&state->MinKey, "bson.min_key", "MinKey") ||
|
||||
_load_object(&state->MaxKey, "bson.max_key", "MaxKey") ||
|
||||
_load_object(&state->UTC, "bson.tz_util", "utc") ||
|
||||
_load_object(&state->RECompile, "re", "compile")) {
|
||||
_load_object(&state->RECompile, "re", "compile") ||
|
||||
_load_object(&state->Regex, "bson.regex", "Regex")) {
|
||||
return 1;
|
||||
}
|
||||
/* If we couldn't import uuid then we must be on 2.4. Just ignore. */
|
||||
@ -440,6 +443,130 @@ _set_cannot_encode(PyObject* value) {
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Encode a builtin Python regular expression or our custom Regex class.
|
||||
*
|
||||
* Sets exception and returns 0 on failure.
|
||||
*/
|
||||
static int _write_regex_to_buffer(
|
||||
buffer_t buffer, int type_byte, PyObject* value) {
|
||||
|
||||
struct module_state *state = GETSTATE(self);
|
||||
PyObject* py_flags;
|
||||
PyObject* py_pattern;
|
||||
PyObject* encoded_pattern;
|
||||
long int_flags;
|
||||
char flags[FLAGS_SIZE];
|
||||
char check_utf8 = 0;
|
||||
const char* pattern_data;
|
||||
int pattern_length, flags_length;
|
||||
result_t status;
|
||||
|
||||
/*
|
||||
* Both the builtin re type and our Regex class have attributes
|
||||
* "flags" and "pattern".
|
||||
*/
|
||||
py_flags = PyObject_GetAttrString(value, "flags");
|
||||
if (!py_flags) {
|
||||
return 0;
|
||||
}
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
int_flags = PyLong_AsLong(py_flags);
|
||||
#else
|
||||
int_flags = PyInt_AsLong(py_flags);
|
||||
#endif
|
||||
Py_DECREF(py_flags);
|
||||
py_pattern = PyObject_GetAttrString(value, "pattern");
|
||||
if (!py_pattern) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (PyUnicode_Check(py_pattern)) {
|
||||
encoded_pattern = PyUnicode_AsUTF8String(py_pattern);
|
||||
Py_DECREF(py_pattern);
|
||||
if (!encoded_pattern) {
|
||||
return 0;
|
||||
}
|
||||
} else {
|
||||
encoded_pattern = py_pattern;
|
||||
check_utf8 = 1;
|
||||
}
|
||||
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
if (!(pattern_data = PyBytes_AsString(encoded_pattern))) {
|
||||
Py_DECREF(encoded_pattern);
|
||||
return 0;
|
||||
}
|
||||
if ((pattern_length = _downcast_and_check(PyBytes_Size(encoded_pattern), 0)) == -1) {
|
||||
Py_DECREF(encoded_pattern);
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
if (!(pattern_data = PyString_AsString(encoded_pattern))) {
|
||||
Py_DECREF(encoded_pattern);
|
||||
return 0;
|
||||
}
|
||||
if ((pattern_length = _downcast_and_check(PyString_Size(encoded_pattern), 0)) == -1) {
|
||||
Py_DECREF(encoded_pattern);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
status = check_string((const unsigned char*)pattern_data,
|
||||
pattern_length, check_utf8, 1);
|
||||
if (status == NOT_UTF_8) {
|
||||
PyObject* InvalidStringData = _error("InvalidStringData");
|
||||
if (InvalidStringData) {
|
||||
PyErr_SetString(InvalidStringData,
|
||||
"regex patterns must be valid UTF-8");
|
||||
Py_DECREF(InvalidStringData);
|
||||
}
|
||||
Py_DECREF(encoded_pattern);
|
||||
return 0;
|
||||
} else if (status == HAS_NULL) {
|
||||
PyObject* InvalidDocument = _error("InvalidDocument");
|
||||
if (InvalidDocument) {
|
||||
PyErr_SetString(InvalidDocument,
|
||||
"regex patterns must not contain the NULL byte");
|
||||
Py_DECREF(InvalidDocument);
|
||||
}
|
||||
Py_DECREF(encoded_pattern);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!buffer_write_bytes(buffer, pattern_data, pattern_length + 1)) {
|
||||
Py_DECREF(encoded_pattern);
|
||||
return 0;
|
||||
}
|
||||
Py_DECREF(encoded_pattern);
|
||||
|
||||
flags[0] = 0;
|
||||
|
||||
if (int_flags & 2) {
|
||||
STRCAT(flags, FLAGS_SIZE, "i");
|
||||
}
|
||||
if (int_flags & 4) {
|
||||
STRCAT(flags, FLAGS_SIZE, "l");
|
||||
}
|
||||
if (int_flags & 8) {
|
||||
STRCAT(flags, FLAGS_SIZE, "m");
|
||||
}
|
||||
if (int_flags & 16) {
|
||||
STRCAT(flags, FLAGS_SIZE, "s");
|
||||
}
|
||||
if (int_flags & 32) {
|
||||
STRCAT(flags, FLAGS_SIZE, "u");
|
||||
}
|
||||
if (int_flags & 64) {
|
||||
STRCAT(flags, FLAGS_SIZE, "x");
|
||||
}
|
||||
flags_length = (int)strlen(flags) + 1;
|
||||
if (!buffer_write_bytes(buffer, flags, flags_length)) {
|
||||
return 0;
|
||||
}
|
||||
*(buffer_get_buffer(buffer) + type_byte) = 0x0B;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* TODO our platform better be little-endian w/ 4-byte ints! */
|
||||
/* Write a single value to the buffer (also write its type_byte, for which
|
||||
* space has already been reserved.
|
||||
@ -574,6 +701,11 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
|
||||
*(buffer_get_buffer(buffer) + type_byte) = 0x07;
|
||||
return 1;
|
||||
}
|
||||
case 11:
|
||||
{
|
||||
/* Regex */
|
||||
return _write_regex_to_buffer(buffer, type_byte, value);
|
||||
}
|
||||
case 13:
|
||||
{
|
||||
/* Code */
|
||||
@ -890,115 +1022,7 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
|
||||
*(buffer_get_buffer(buffer) + type_byte) = 0x09;
|
||||
return buffer_write_bytes(buffer, (const char*)&millis, 8);
|
||||
} else if (PyObject_TypeCheck(value, state->REType)) {
|
||||
PyObject* py_flags;
|
||||
PyObject* py_pattern;
|
||||
PyObject* encoded_pattern;
|
||||
long int_flags;
|
||||
char flags[FLAGS_SIZE];
|
||||
char check_utf8 = 0;
|
||||
const char* pattern_data;
|
||||
int pattern_length, flags_length;
|
||||
result_t status;
|
||||
|
||||
py_flags = PyObject_GetAttrString(value, "flags");
|
||||
if (!py_flags) {
|
||||
return 0;
|
||||
}
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
int_flags = PyLong_AsLong(py_flags);
|
||||
#else
|
||||
int_flags = PyInt_AsLong(py_flags);
|
||||
#endif
|
||||
Py_DECREF(py_flags);
|
||||
py_pattern = PyObject_GetAttrString(value, "pattern");
|
||||
if (!py_pattern) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (PyUnicode_Check(py_pattern)) {
|
||||
encoded_pattern = PyUnicode_AsUTF8String(py_pattern);
|
||||
Py_DECREF(py_pattern);
|
||||
if (!encoded_pattern) {
|
||||
return 0;
|
||||
}
|
||||
} else {
|
||||
encoded_pattern = py_pattern;
|
||||
check_utf8 = 1;
|
||||
}
|
||||
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
if (!(pattern_data = PyBytes_AsString(encoded_pattern))) {
|
||||
Py_DECREF(encoded_pattern);
|
||||
return 0;
|
||||
}
|
||||
if ((pattern_length = _downcast_and_check(PyBytes_Size(encoded_pattern), 0)) == -1) {
|
||||
Py_DECREF(encoded_pattern);
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
if (!(pattern_data = PyString_AsString(encoded_pattern))) {
|
||||
Py_DECREF(encoded_pattern);
|
||||
return 0;
|
||||
}
|
||||
if ((pattern_length = _downcast_and_check(PyString_Size(encoded_pattern), 0)) == -1) {
|
||||
Py_DECREF(encoded_pattern);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
status = check_string((const unsigned char*)pattern_data,
|
||||
pattern_length, check_utf8, 1);
|
||||
if (status == NOT_UTF_8) {
|
||||
PyObject* InvalidStringData = _error("InvalidStringData");
|
||||
if (InvalidStringData) {
|
||||
PyErr_SetString(InvalidStringData,
|
||||
"regex patterns must be valid UTF-8");
|
||||
Py_DECREF(InvalidStringData);
|
||||
}
|
||||
Py_DECREF(encoded_pattern);
|
||||
return 0;
|
||||
} else if (status == HAS_NULL) {
|
||||
PyObject* InvalidDocument = _error("InvalidDocument");
|
||||
if (InvalidDocument) {
|
||||
PyErr_SetString(InvalidDocument,
|
||||
"regex patterns must not contain the NULL byte");
|
||||
Py_DECREF(InvalidDocument);
|
||||
}
|
||||
Py_DECREF(encoded_pattern);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!buffer_write_bytes(buffer, pattern_data, pattern_length + 1)) {
|
||||
Py_DECREF(encoded_pattern);
|
||||
return 0;
|
||||
}
|
||||
Py_DECREF(encoded_pattern);
|
||||
|
||||
flags[0] = 0;
|
||||
/* TODO don't hardcode these */
|
||||
if (int_flags & 2) {
|
||||
STRCAT(flags, FLAGS_SIZE, "i");
|
||||
}
|
||||
if (int_flags & 4) {
|
||||
STRCAT(flags, FLAGS_SIZE, "l");
|
||||
}
|
||||
if (int_flags & 8) {
|
||||
STRCAT(flags, FLAGS_SIZE, "m");
|
||||
}
|
||||
if (int_flags & 16) {
|
||||
STRCAT(flags, FLAGS_SIZE, "s");
|
||||
}
|
||||
if (int_flags & 32) {
|
||||
STRCAT(flags, FLAGS_SIZE, "u");
|
||||
}
|
||||
if (int_flags & 64) {
|
||||
STRCAT(flags, FLAGS_SIZE, "x");
|
||||
}
|
||||
flags_length = (int)strlen(flags) + 1;
|
||||
if (!buffer_write_bytes(buffer, flags, flags_length)) {
|
||||
return 0;
|
||||
}
|
||||
*(buffer_get_buffer(buffer) + type_byte) = 0x0B;
|
||||
return 1;
|
||||
return _write_regex_to_buffer(buffer, type_byte, value);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1435,7 +1459,8 @@ static PyObject* _cbson_dict_to_bson(PyObject* self, PyObject* args) {
|
||||
|
||||
static PyObject* get_value(PyObject* self, const char* buffer, unsigned* position,
|
||||
unsigned char type, unsigned max, PyObject* as_class,
|
||||
unsigned char tz_aware, unsigned char uuid_subtype) {
|
||||
unsigned char tz_aware, unsigned char uuid_subtype,
|
||||
unsigned char compile_re) {
|
||||
struct module_state *state = GETSTATE(self);
|
||||
|
||||
PyObject* value = NULL;
|
||||
@ -1495,7 +1520,8 @@ static PyObject* get_value(PyObject* self, const char* buffer, unsigned* positio
|
||||
goto invalid;
|
||||
}
|
||||
value = elements_to_dict(self, buffer + *position + 4,
|
||||
size - 5, as_class, tz_aware, uuid_subtype);
|
||||
size - 5, as_class, tz_aware, uuid_subtype,
|
||||
compile_re);
|
||||
if (!value) {
|
||||
return NULL;
|
||||
}
|
||||
@ -1587,7 +1613,8 @@ static PyObject* get_value(PyObject* self, const char* buffer, unsigned* positio
|
||||
}
|
||||
to_append = get_value(self, buffer, position, bson_type,
|
||||
max - (unsigned)key_size,
|
||||
as_class, tz_aware, uuid_subtype);
|
||||
as_class, tz_aware, uuid_subtype,
|
||||
compile_re);
|
||||
Py_LeaveRecursiveCall();
|
||||
if (!to_append) {
|
||||
Py_DECREF(value);
|
||||
@ -1850,7 +1877,18 @@ static PyObject* get_value(PyObject* self, const char* buffer, unsigned* positio
|
||||
}
|
||||
}
|
||||
*position += (unsigned)flags_length + 1;
|
||||
if ((compile_func = _get_object(state->RECompile, "re", "compile"))) {
|
||||
|
||||
/*
|
||||
* Use re.compile() if we're configured to compile regular
|
||||
* expressions, else create an instance of our Regex class.
|
||||
*/
|
||||
if (compile_re) {
|
||||
compile_func = _get_object(state->RECompile, "re", "compile");
|
||||
} else {
|
||||
compile_func = _get_object(state->Regex, "bson.regex", "Regex");
|
||||
}
|
||||
|
||||
if (compile_func) {
|
||||
value = PyObject_CallFunction(compile_func, "Oi", pattern, flags);
|
||||
Py_DECREF(compile_func);
|
||||
}
|
||||
@ -1990,7 +2028,7 @@ static PyObject* get_value(PyObject* self, const char* buffer, unsigned* positio
|
||||
}
|
||||
scope = elements_to_dict(self, buffer + *position + 4,
|
||||
scope_size - 5, (PyObject*)&PyDict_Type,
|
||||
tz_aware, uuid_subtype);
|
||||
tz_aware, uuid_subtype, compile_re);
|
||||
if (!scope) {
|
||||
Py_DECREF(code);
|
||||
return NULL;
|
||||
@ -2098,7 +2136,8 @@ static PyObject* get_value(PyObject* self, const char* buffer, unsigned* positio
|
||||
static PyObject* _elements_to_dict(PyObject* self, const char* string,
|
||||
unsigned max, PyObject* as_class,
|
||||
unsigned char tz_aware,
|
||||
unsigned char uuid_subtype) {
|
||||
unsigned char uuid_subtype,
|
||||
unsigned char compile_re) {
|
||||
unsigned position = 0;
|
||||
PyObject* dict = PyObject_CallObject(as_class, NULL);
|
||||
if (!dict) {
|
||||
@ -2126,7 +2165,8 @@ static PyObject* _elements_to_dict(PyObject* self, const char* string,
|
||||
}
|
||||
position += (unsigned)name_length + 1;
|
||||
value = get_value(self, string, &position, type,
|
||||
max - position, as_class, tz_aware, uuid_subtype);
|
||||
max - position, as_class, tz_aware, uuid_subtype,
|
||||
compile_re);
|
||||
if (!value) {
|
||||
Py_DECREF(name);
|
||||
Py_DECREF(dict);
|
||||
@ -2143,12 +2183,13 @@ static PyObject* _elements_to_dict(PyObject* self, const char* string,
|
||||
static PyObject* elements_to_dict(PyObject* self, const char* string,
|
||||
unsigned max, PyObject* as_class,
|
||||
unsigned char tz_aware,
|
||||
unsigned char uuid_subtype) {
|
||||
unsigned char uuid_subtype,
|
||||
unsigned char compile_re) {
|
||||
PyObject* result;
|
||||
if (Py_EnterRecursiveCall(" while decoding a BSON document"))
|
||||
return NULL;
|
||||
result = _elements_to_dict(self, string, max,
|
||||
as_class, tz_aware, uuid_subtype);
|
||||
as_class, tz_aware, uuid_subtype, compile_re);
|
||||
Py_LeaveRecursiveCall();
|
||||
return result;
|
||||
}
|
||||
@ -2161,11 +2202,14 @@ static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) {
|
||||
PyObject* as_class;
|
||||
unsigned char tz_aware;
|
||||
unsigned char uuid_subtype;
|
||||
unsigned char compile_re;
|
||||
|
||||
PyObject* dict;
|
||||
PyObject* remainder;
|
||||
PyObject* result;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "OObb", &bson, &as_class, &tz_aware, &uuid_subtype)) {
|
||||
if (!PyArg_ParseTuple(
|
||||
args, "OObbb", &bson, &as_class, &tz_aware, &uuid_subtype, &compile_re)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -2231,7 +2275,7 @@ static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) {
|
||||
}
|
||||
|
||||
dict = elements_to_dict(self, string + 4, (unsigned)size - 5,
|
||||
as_class, tz_aware, uuid_subtype);
|
||||
as_class, tz_aware, uuid_subtype, compile_re);
|
||||
if (!dict) {
|
||||
return NULL;
|
||||
}
|
||||
@ -2260,8 +2304,11 @@ static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) {
|
||||
PyObject* as_class = (PyObject*)&PyDict_Type;
|
||||
unsigned char tz_aware = 1;
|
||||
unsigned char uuid_subtype = 3;
|
||||
unsigned char compile_re;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "O|Obb", &bson, &as_class, &tz_aware, &uuid_subtype)) {
|
||||
if (!PyArg_ParseTuple(
|
||||
args, "O|Obbb",
|
||||
&bson, &as_class, &tz_aware, &uuid_subtype, &compile_re)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -2332,7 +2379,7 @@ static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) {
|
||||
}
|
||||
|
||||
dict = elements_to_dict(self, string + 4, (unsigned)size - 5,
|
||||
as_class, tz_aware, uuid_subtype);
|
||||
as_class, tz_aware, uuid_subtype, compile_re);
|
||||
if (!dict) {
|
||||
Py_DECREF(result);
|
||||
return NULL;
|
||||
@ -2364,6 +2411,7 @@ static int _cbson_traverse(PyObject *m, visitproc visit, void *arg) {
|
||||
Py_VISIT(GETSTATE(m)->ObjectId);
|
||||
Py_VISIT(GETSTATE(m)->DBRef);
|
||||
Py_VISIT(GETSTATE(m)->RECompile);
|
||||
Py_VISIT(GETSTATE(m)->Regex);
|
||||
Py_VISIT(GETSTATE(m)->UUID);
|
||||
Py_VISIT(GETSTATE(m)->Timestamp);
|
||||
Py_VISIT(GETSTATE(m)->MinKey);
|
||||
@ -2379,6 +2427,7 @@ static int _cbson_clear(PyObject *m) {
|
||||
Py_CLEAR(GETSTATE(m)->ObjectId);
|
||||
Py_CLEAR(GETSTATE(m)->DBRef);
|
||||
Py_CLEAR(GETSTATE(m)->RECompile);
|
||||
Py_CLEAR(GETSTATE(m)->Regex);
|
||||
Py_CLEAR(GETSTATE(m)->UUID);
|
||||
Py_CLEAR(GETSTATE(m)->Timestamp);
|
||||
Py_CLEAR(GETSTATE(m)->MinKey);
|
||||
|
||||
@ -90,6 +90,7 @@ from bson.dbref import DBRef
|
||||
from bson.max_key import MaxKey
|
||||
from bson.min_key import MinKey
|
||||
from bson.objectid import ObjectId
|
||||
from bson.regex import Regex
|
||||
from bson.timestamp import Timestamp
|
||||
|
||||
from bson.py3compat import PY3, binary_type, string_types
|
||||
@ -120,10 +121,20 @@ def loads(s, *args, **kwargs):
|
||||
"""Helper function that wraps :class:`json.loads`.
|
||||
|
||||
Automatically passes the object_hook for BSON type conversion.
|
||||
|
||||
:Parameters:
|
||||
- `compile_re` (optional): if ``False``, don't attempt to compile
|
||||
BSON regular expressions into Python regular expressions. Return
|
||||
instances of :class:`~bson.bsonregex.BSONRegex` instead.
|
||||
|
||||
.. versionchanged:: 2.7
|
||||
Added ``compile_re`` option.
|
||||
"""
|
||||
if not json_lib:
|
||||
raise Exception("No json library available")
|
||||
kwargs['object_hook'] = object_hook
|
||||
|
||||
compile_re = kwargs.pop('compile_re', True)
|
||||
kwargs['object_hook'] = lambda dct: object_hook(dct, compile_re)
|
||||
return json.loads(s, *args, **kwargs)
|
||||
|
||||
|
||||
@ -141,7 +152,7 @@ def _json_convert(obj):
|
||||
return obj
|
||||
|
||||
|
||||
def object_hook(dct):
|
||||
def object_hook(dct, compile_re=True):
|
||||
if "$oid" in dct:
|
||||
return ObjectId(str(dct["$oid"]))
|
||||
if "$ref" in dct:
|
||||
@ -154,7 +165,11 @@ def object_hook(dct):
|
||||
# PyMongo always adds $options but some other tools may not.
|
||||
for opt in dct.get("$options", ""):
|
||||
flags |= _RE_OPT_TABLE.get(opt, 0)
|
||||
return re.compile(dct["$regex"], flags)
|
||||
|
||||
if compile_re:
|
||||
return re.compile(dct["$regex"], flags)
|
||||
else:
|
||||
return Regex(dct["$regex"], flags)
|
||||
if "$minKey" in dct:
|
||||
return MinKey()
|
||||
if "$maxKey" in dct:
|
||||
|
||||
83
bson/regex.py
Normal file
83
bson/regex.py
Normal file
@ -0,0 +1,83 @@
|
||||
# Copyright 2013 MongoDB, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Tools for representing MongoDB regular expressions.
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
from bson.py3compat import string_types
|
||||
|
||||
|
||||
def str_flags_to_int(str_flags):
|
||||
flags = 0
|
||||
if "i" in str_flags:
|
||||
flags |= re.IGNORECASE
|
||||
if "l" in str_flags:
|
||||
flags |= re.LOCALE
|
||||
if "m" in str_flags:
|
||||
flags |= re.MULTILINE
|
||||
if "s" in str_flags:
|
||||
flags |= re.DOTALL
|
||||
if "u" in str_flags:
|
||||
flags |= re.UNICODE
|
||||
if "x" in str_flags:
|
||||
flags |= re.VERBOSE
|
||||
|
||||
return flags
|
||||
|
||||
|
||||
class Regex(object):
|
||||
"""BSON regular expression data."""
|
||||
_type_marker = 11
|
||||
|
||||
def __init__(self, pattern, flags=0):
|
||||
"""BSON regular expression data.
|
||||
|
||||
This class is useful to store and retrieve regular expressions that are
|
||||
incompatible with Python's regular expression dialect.
|
||||
|
||||
:Parameters:
|
||||
- `pattern`: string
|
||||
- `flags`: (optional) an integer bitmask, or a string of flag
|
||||
characters like "im" for IGNORECASE and MULTILINE
|
||||
"""
|
||||
if not isinstance(pattern, string_types):
|
||||
raise TypeError("pattern must be a string, not %s" % type(pattern))
|
||||
self.pattern = pattern
|
||||
|
||||
if isinstance(flags, string_types):
|
||||
self.flags = str_flags_to_int(flags)
|
||||
elif isinstance(flags, int):
|
||||
self.flags = flags
|
||||
else:
|
||||
raise TypeError(
|
||||
"flags must be a string or int, not %s" % type(flags))
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, Regex):
|
||||
return self.pattern == self.pattern and self.flags == other.flags
|
||||
else:
|
||||
return NotImplemented
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __repr__(self):
|
||||
return "Regex(%r, %r)" % (self.pattern, self.flags)
|
||||
|
||||
def compile(self):
|
||||
"""Compile this ``Regex`` as a Python regular expression.
|
||||
"""
|
||||
return re.compile(self.pattern, self.flags)
|
||||
51
bson/son.py
51
bson/son.py
@ -35,29 +35,29 @@ class SON(dict):
|
||||
|
||||
The mapping from Python types to BSON types is as follows:
|
||||
|
||||
=================================== ============= ===================
|
||||
Python Type BSON Type Supported Direction
|
||||
=================================== ============= ===================
|
||||
None null both
|
||||
bool boolean both
|
||||
int [#int]_ int32 / int64 py -> bson
|
||||
long int64 both
|
||||
float number (real) both
|
||||
string string py -> bson
|
||||
unicode string both
|
||||
list array both
|
||||
dict / `SON` object both
|
||||
datetime.datetime [#dt]_ [#dt2]_ date both
|
||||
compiled re regex both
|
||||
`bson.binary.Binary` binary both
|
||||
`bson.objectid.ObjectId` oid both
|
||||
`bson.dbref.DBRef` dbref both
|
||||
None undefined bson -> py
|
||||
unicode code bson -> py
|
||||
`bson.code.Code` code py -> bson
|
||||
unicode symbol bson -> py
|
||||
bytes (Python 3) [#bytes]_ binary both
|
||||
=================================== ============= ===================
|
||||
======================================= ============= ===================
|
||||
Python Type BSON Type Supported Direction
|
||||
======================================= ============= ===================
|
||||
None null both
|
||||
bool boolean both
|
||||
int [#int]_ int32 / int64 py -> bson
|
||||
long int64 both
|
||||
float number (real) both
|
||||
string string py -> bson
|
||||
unicode string both
|
||||
list array both
|
||||
dict / `SON` object both
|
||||
datetime.datetime [#dt]_ [#dt2]_ date both
|
||||
`bson.regex.Regex` / compiled re [#re]_ regex both
|
||||
`bson.binary.Binary` binary both
|
||||
`bson.objectid.ObjectId` oid both
|
||||
`bson.dbref.DBRef` dbref both
|
||||
None undefined bson -> py
|
||||
unicode code bson -> py
|
||||
`bson.code.Code` code py -> bson
|
||||
unicode symbol bson -> py
|
||||
bytes (Python 3) [#bytes]_ binary both
|
||||
======================================= ============= ===================
|
||||
|
||||
Note that to save binary data it must be wrapped as an instance of
|
||||
`bson.binary.Binary`. Otherwise it will be saved as a BSON string
|
||||
@ -71,6 +71,11 @@ class SON(dict):
|
||||
millisecond when saved
|
||||
.. [#dt2] all datetime.datetime instances are treated as *naive*. clients
|
||||
should always use UTC.
|
||||
.. [#re] :class:`~bson.regex.Regex` instances and regular expression
|
||||
objects from ``re.compile()`` are both saved as BSON regular expressions.
|
||||
BSON regular expressions are decoded as Python regular expressions by
|
||||
default, or as :class:`~bson.regex.Regex` instances if the ``compile_re``
|
||||
option is set to ``False``.
|
||||
.. [#bytes] The bytes type from Python 3.x is encoded as BSON binary with
|
||||
subtype 0. In Python 3.x it will be decoded back to bytes. In Python 2.x
|
||||
it will be decoded to an instance of :class:`~bson.binary.Binary` with
|
||||
|
||||
@ -11,6 +11,7 @@ Sub-modules:
|
||||
:maxdepth: 2
|
||||
|
||||
binary
|
||||
regex
|
||||
code
|
||||
dbref
|
||||
errors
|
||||
|
||||
7
doc/api/bson/regex.rst
Normal file
7
doc/api/bson/regex.rst
Normal file
@ -0,0 +1,7 @@
|
||||
:mod:`regex` -- Tools for representing MongoDB regular expressions
|
||||
==================================================================
|
||||
.. versionadded:: 2.7
|
||||
|
||||
.. automodule:: bson.regex
|
||||
:synopsis: Tools for representing MongoDB regular expressions
|
||||
:members:
|
||||
@ -33,7 +33,7 @@
|
||||
.. automethod:: update(spec, document[, upsert=False[, manipulate=False[, safe=None[, multi=False[, check_keys=True[, **kwargs]]]]]])
|
||||
.. automethod:: remove([spec_or_id=None[, safe=None[, **kwargs]]])
|
||||
.. automethod:: drop
|
||||
.. automethod:: find([spec=None[, fields=None[, skip=0[, limit=0[, timeout=True[, snapshot=False[, tailable=False[, sort=None[, max_scan=None[, as_class=None[, slave_okay=False[, await_data=False[, partial=False[, manipulate=True[, read_preference=ReadPreference.PRIMARY[, exhaust=False[,**kwargs]]]]]]]]]]]]]]]]])
|
||||
.. automethod:: find([spec=None[, fields=None[, skip=0[, limit=0[, timeout=True[, snapshot=False[, tailable=False[, sort=None[, max_scan=None[, as_class=None[, slave_okay=False[, await_data=False[, partial=False[, manipulate=True[, read_preference=ReadPreference.PRIMARY[, exhaust=False, [compile_re=True, [,**kwargs]]]]]]]]]]]]]]]]]])
|
||||
.. automethod:: find_one([spec_or_id=None[, *args[, **kwargs]]])
|
||||
.. automethod:: count
|
||||
.. automethod:: create_index
|
||||
|
||||
@ -690,6 +690,9 @@ class Collection(common.BaseObject):
|
||||
the nearest member may accept reads. Default 15 milliseconds.
|
||||
**Ignored by mongos** and must be configured on the command line.
|
||||
See the localThreshold_ option for more information.
|
||||
- `compile_re` (optional): if ``False``, don't attempt to compile
|
||||
BSON regex objects into Python regexes. Return instances of
|
||||
:class:`~bson.regex.Regex` instead.
|
||||
- `exhaust` (optional): If ``True`` create an "exhaust" cursor.
|
||||
MongoDB will stream batched results to the client without waiting
|
||||
for the client to request each batch, reducing latency.
|
||||
@ -717,12 +720,15 @@ class Collection(common.BaseObject):
|
||||
5. The `network_timeout` option is ignored when using the
|
||||
`exhaust` option.
|
||||
|
||||
.. note:: The `manipulate` parameter may default to False in
|
||||
a future release.
|
||||
.. note:: The `manipulate` and `compile_re` parameters may default to
|
||||
False in future releases.
|
||||
|
||||
.. note:: The `max_scan` parameter requires server
|
||||
version **>= 1.5.1**
|
||||
|
||||
.. versionadded:: 2.7
|
||||
The ``compile_re`` parameter.
|
||||
|
||||
.. versionadded:: 2.3
|
||||
The `tag_sets` and `secondary_acceptable_latency_ms` parameters.
|
||||
|
||||
|
||||
@ -69,8 +69,8 @@ class Cursor(object):
|
||||
await_data=False, partial=False, manipulate=True,
|
||||
read_preference=ReadPreference.PRIMARY,
|
||||
tag_sets=[{}], secondary_acceptable_latency_ms=None,
|
||||
exhaust=False, _must_use_master=False, _uuid_subtype=None,
|
||||
_first_batch=None, _cursor_id=None,
|
||||
exhaust=False, compile_re=True, _must_use_master=False,
|
||||
_uuid_subtype=None, _first_batch=None, _cursor_id=None,
|
||||
**kwargs):
|
||||
"""Create a new cursor.
|
||||
|
||||
@ -152,6 +152,7 @@ class Cursor(object):
|
||||
self.__tag_sets = tag_sets
|
||||
self.__secondary_acceptable_latency_ms = secondary_acceptable_latency_ms
|
||||
self.__tz_aware = collection.database.connection.tz_aware
|
||||
self.__compile_re = compile_re
|
||||
self.__must_use_master = _must_use_master
|
||||
self.__uuid_subtype = _uuid_subtype or collection.uuid_subtype
|
||||
|
||||
@ -225,8 +226,8 @@ class Cursor(object):
|
||||
"batch_size", "max_scan", "as_class", "slave_okay",
|
||||
"manipulate", "read_preference", "tag_sets",
|
||||
"secondary_acceptable_latency_ms",
|
||||
"must_use_master", "uuid_subtype", "query_flags",
|
||||
"kwargs")
|
||||
"must_use_master", "uuid_subtype", "compile_re",
|
||||
"query_flags", "kwargs")
|
||||
data = dict((k, v) for k, v in self.__dict__.iteritems()
|
||||
if k.startswith('_Cursor__') and k[9:] in values_to_clone)
|
||||
if deepcopy:
|
||||
@ -667,6 +668,7 @@ class Cursor(object):
|
||||
r = database.command("count", self.__collection.name,
|
||||
allowable_errors=["ns missing"],
|
||||
uuid_subtype=self.__uuid_subtype,
|
||||
compile_re=self.__compile_re,
|
||||
**command)
|
||||
if r.get("errmsg", "") == "ns missing":
|
||||
return 0
|
||||
@ -718,6 +720,7 @@ class Cursor(object):
|
||||
return database.command("distinct",
|
||||
self.__collection.name,
|
||||
uuid_subtype=self.__uuid_subtype,
|
||||
compile_re=self.__compile_re,
|
||||
**options)["values"]
|
||||
|
||||
def explain(self):
|
||||
@ -829,7 +832,8 @@ class Cursor(object):
|
||||
response = helpers._unpack_response(response, self.__id,
|
||||
self.__as_class,
|
||||
self.__tz_aware,
|
||||
self.__uuid_subtype)
|
||||
self.__uuid_subtype,
|
||||
self.__compile_re)
|
||||
except AutoReconnect:
|
||||
# Don't send kill cursors to another server after a "not master"
|
||||
# error. It's completely pointless.
|
||||
|
||||
@ -273,7 +273,7 @@ class Database(common.BaseObject):
|
||||
|
||||
def command(self, command, value=1,
|
||||
check=True, allowable_errors=[],
|
||||
uuid_subtype=OLD_UUID_SUBTYPE, **kwargs):
|
||||
uuid_subtype=OLD_UUID_SUBTYPE, compile_re=True, **kwargs):
|
||||
"""Issue a MongoDB command.
|
||||
|
||||
Send command `command` to the database and return the
|
||||
@ -318,6 +318,12 @@ class Database(common.BaseObject):
|
||||
in this list will be ignored by error-checking
|
||||
- `uuid_subtype` (optional): The BSON binary subtype to use
|
||||
for a UUID used in this command.
|
||||
- `compile_re` (optional): if ``False``, don't attempt to compile
|
||||
BSON regular expressions into Python regular expressions. Return
|
||||
instances of :class:`~bson.regex.Regex` instead. Can avoid
|
||||
:exc:`~bson.errors.InvalidBSON` errors when receiving
|
||||
Python-incompatible regular expressions, for example from
|
||||
``currentOp``
|
||||
- `read_preference`: The read preference for this connection.
|
||||
See :class:`~pymongo.read_preferences.ReadPreference` for available
|
||||
options.
|
||||
@ -337,6 +343,8 @@ class Database(common.BaseObject):
|
||||
|
||||
.. note:: ``command`` ignores the ``network_timeout`` parameter.
|
||||
|
||||
.. versionchanged:: 2.7
|
||||
Added ``compile_re`` option.
|
||||
.. versionchanged:: 2.3
|
||||
Added `tag_sets` and `secondary_acceptable_latency_ms` options.
|
||||
.. versionchanged:: 2.2
|
||||
@ -390,6 +398,7 @@ class Database(common.BaseObject):
|
||||
extra_opts['secondary_acceptable_latency_ms'] = kwargs.pop(
|
||||
'secondary_acceptable_latency_ms',
|
||||
self.secondary_acceptable_latency_ms)
|
||||
extra_opts['compile_re'] = compile_re
|
||||
|
||||
fields = kwargs.get('fields')
|
||||
if fields is not None and not isinstance(fields, dict):
|
||||
|
||||
@ -73,7 +73,8 @@ def _index_document(index_list):
|
||||
|
||||
|
||||
def _unpack_response(response, cursor_id=None, as_class=dict,
|
||||
tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE):
|
||||
tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE,
|
||||
compile_re=True):
|
||||
"""Unpack a response from the database.
|
||||
|
||||
Check the response for errors and unpack, returning a dictionary
|
||||
@ -108,7 +109,8 @@ def _unpack_response(response, cursor_id=None, as_class=dict,
|
||||
result["starting_from"] = struct.unpack("<i", response[12:16])[0]
|
||||
result["number_returned"] = struct.unpack("<i", response[16:20])[0]
|
||||
result["data"] = bson.decode_all(response[20:],
|
||||
as_class, tz_aware, uuid_subtype)
|
||||
as_class, tz_aware, uuid_subtype,
|
||||
compile_re)
|
||||
assert len(result["data"]) == result["number_returned"]
|
||||
return result
|
||||
|
||||
|
||||
@ -32,7 +32,8 @@ from nose.plugins.skip import SkipTest
|
||||
import bson
|
||||
from bson import (BSON,
|
||||
decode_all,
|
||||
is_valid)
|
||||
is_valid,
|
||||
Regex)
|
||||
from bson.binary import Binary, UUIDLegacy
|
||||
from bson.code import Code
|
||||
from bson.objectid import ObjectId
|
||||
@ -534,5 +535,42 @@ class TestBSON(unittest.TestCase):
|
||||
d = OrderedDict([("one", 1), ("two", 2), ("three", 3), ("four", 4)])
|
||||
self.assertEqual(d, BSON.encode(d).decode(as_class=OrderedDict))
|
||||
|
||||
def test_bson_regex(self):
|
||||
# Invalid Python regex, though valid PCRE.
|
||||
bson_re1 = Regex(r'[\w-\.]')
|
||||
self.assertEqual(r'[\w-\.]', bson_re1.pattern)
|
||||
self.assertEqual(0, bson_re1.flags)
|
||||
|
||||
doc1 = {'r': bson_re1}
|
||||
doc1_bson = b(
|
||||
'\x11\x00\x00\x00' # document length
|
||||
'\x0br\x00[\\w-\\.]\x00\x00' # r: regex
|
||||
'\x00') # document terminator
|
||||
|
||||
self.assertEqual(doc1_bson, BSON.encode(doc1))
|
||||
self.assertEqual(doc1, BSON(doc1_bson).decode(compile_re=False))
|
||||
|
||||
# Valid Python regex, with flags.
|
||||
re2 = re.compile('.*', re.IGNORECASE | re.MULTILINE | re.UNICODE)
|
||||
bson_re2 = Regex('.*', re.IGNORECASE | re.MULTILINE | re.UNICODE)
|
||||
|
||||
doc2_with_re = {'r': re2}
|
||||
doc2_with_bson_re = {'r': bson_re2}
|
||||
doc2_bson = b(
|
||||
"\x0f\x00\x00\x00" # document length
|
||||
"\x0br\x00.*\x00imu\x00" # r: regex
|
||||
"\x00") # document terminator
|
||||
|
||||
self.assertEqual(doc2_bson, BSON.encode(doc2_with_re))
|
||||
self.assertEqual(doc2_bson, BSON.encode(doc2_with_bson_re))
|
||||
|
||||
# Built-in re objects don't support ==. Compare pattern and flags.
|
||||
self.assertEqual(re2.pattern, BSON(doc2_bson).decode()['r'].pattern)
|
||||
self.assertEqual(re2.flags, BSON(doc2_bson).decode()['r'].flags)
|
||||
|
||||
self.assertEqual(
|
||||
doc2_with_bson_re, BSON(doc2_bson).decode(compile_re=False))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
@ -29,11 +29,12 @@ from nose.plugins.skip import SkipTest
|
||||
sys.path[0:0] = [""]
|
||||
|
||||
from bson.binary import Binary
|
||||
from bson.regex import Regex
|
||||
from bson.code import Code
|
||||
from bson.dbref import DBRef
|
||||
from bson.objectid import ObjectId
|
||||
from bson.py3compat import b
|
||||
from bson.son import SON
|
||||
from bson.son import SON, RE_TYPE
|
||||
from pymongo import (ASCENDING, DESCENDING, GEO2D,
|
||||
GEOHAYSTACK, GEOSPHERE, HASHED)
|
||||
from pymongo import message as message_module
|
||||
@ -1267,6 +1268,19 @@ class TestCollection(unittest.TestCase):
|
||||
self.assertEqual(expected, db.test.aggregate([pipeline]))
|
||||
self.assertEqual(expected, db.test.aggregate((pipeline,)))
|
||||
|
||||
def test_aggregate_with_compile_re(self):
|
||||
if not version.at_least(self.db.connection, (2, 1, 0)):
|
||||
raise SkipTest("The aggregate command requires MongoDB >= 2.1.0")
|
||||
|
||||
db = self.client.pymongo_test
|
||||
db.test.drop()
|
||||
db.test.insert({'r': re.compile('.*')})
|
||||
|
||||
result = db.test.aggregate([])
|
||||
self.assertTrue(isinstance(result['result'][0]['r'], RE_TYPE))
|
||||
result = db.test.aggregate([], compile_re=False)
|
||||
self.assertTrue(isinstance(result['result'][0]['r'], Regex))
|
||||
|
||||
def test_aggregation_cursor_validation(self):
|
||||
if not version.at_least(self.db.connection, (2, 5, 1)):
|
||||
raise SkipTest("Aggregation cursor requires MongoDB >= 2.5.1")
|
||||
@ -2148,6 +2162,22 @@ class TestCollection(unittest.TestCase):
|
||||
self.assertEqual(2, c.find_one(manipulate=True)['foo'])
|
||||
c.remove({})
|
||||
|
||||
def test_compile_re(self):
|
||||
c = self.client.pymongo_test.test
|
||||
c.drop()
|
||||
c.insert({'r': re.compile('.*')})
|
||||
|
||||
# Test find_one with compile_re.
|
||||
self.assertTrue(isinstance(c.find_one()['r'], RE_TYPE))
|
||||
self.assertTrue(isinstance(c.find_one(compile_re=False)['r'], Regex))
|
||||
|
||||
# Test find with compile_re.
|
||||
for doc in c.find():
|
||||
self.assertTrue(isinstance(doc['r'], RE_TYPE))
|
||||
|
||||
for doc in c.find(compile_re=False):
|
||||
self.assertTrue(isinstance(doc['r'], Regex))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
@ -552,6 +552,7 @@ class TestCursor(unittest.TestCase):
|
||||
await_data=True,
|
||||
partial=True,
|
||||
manipulate=False,
|
||||
compile_re=False,
|
||||
fields={'_id': False}).limit(2)
|
||||
cursor.add_option(128)
|
||||
|
||||
@ -565,6 +566,8 @@ class TestCursor(unittest.TestCase):
|
||||
cursor2._Cursor__slave_okay)
|
||||
self.assertEqual(cursor._Cursor__manipulate,
|
||||
cursor2._Cursor__manipulate)
|
||||
self.assertEqual(cursor._Cursor__compile_re,
|
||||
cursor2._Cursor__compile_re)
|
||||
self.assertEqual(cursor._Cursor__query_flags,
|
||||
cursor2._Cursor__query_flags)
|
||||
|
||||
|
||||
@ -16,6 +16,7 @@
|
||||
|
||||
import datetime
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import warnings
|
||||
|
||||
@ -25,9 +26,10 @@ import unittest
|
||||
from nose.plugins.skip import SkipTest
|
||||
|
||||
from bson.code import Code
|
||||
from bson.regex import Regex
|
||||
from bson.dbref import DBRef
|
||||
from bson.objectid import ObjectId
|
||||
from bson.son import SON
|
||||
from bson.son import SON, RE_TYPE
|
||||
from pymongo import (ALL,
|
||||
auth,
|
||||
OFF,
|
||||
@ -303,6 +305,21 @@ class TestDatabase(unittest.TestCase):
|
||||
if not is_mongos(self.client):
|
||||
db.command('eval', 'sleep(100)', network_timeout=0.001)
|
||||
|
||||
def test_command_with_compile_re(self):
|
||||
# Using 'aggregate' as our example command, since it's an easy way to
|
||||
# retrieve a BSON regex from a collection using a command.
|
||||
if not version.at_least(self.client, (2, 1, 0)):
|
||||
raise SkipTest('Need aggregation to test compile_re')
|
||||
|
||||
db = self.client.pymongo_test
|
||||
db.test.drop()
|
||||
db.test.insert({'r': re.compile('.*')})
|
||||
|
||||
result = db.command('aggregate', 'test', pipeline=[])
|
||||
self.assertTrue(isinstance(result['result'][0]['r'], RE_TYPE))
|
||||
result = db.command('aggregate', 'test', pipeline=[], compile_re=False)
|
||||
self.assertTrue(isinstance(result['result'][0]['r'], Regex))
|
||||
|
||||
def test_last_status(self):
|
||||
db = self.client.pymongo_test
|
||||
|
||||
|
||||
@ -32,6 +32,8 @@ from bson.dbref import DBRef
|
||||
from bson.max_key import MaxKey
|
||||
from bson.min_key import MinKey
|
||||
from bson.objectid import ObjectId
|
||||
from bson.regex import Regex
|
||||
from bson.son import RE_TYPE
|
||||
from bson.timestamp import Timestamp
|
||||
from bson.tz_util import utc
|
||||
|
||||
@ -74,6 +76,22 @@ class TestJsonUtil(unittest.TestCase):
|
||||
self.round_trip({"date": datetime.datetime(2009, 12, 9, 15,
|
||||
49, 45, 191000, utc)})
|
||||
|
||||
def test_regex_object_hook(self):
|
||||
import json
|
||||
|
||||
# Extended JSON format regular expression.
|
||||
pat = 'a*b'
|
||||
json_re = '{"$regex": "%s", "$options": "u"}' % pat
|
||||
loaded = json_util.object_hook(json.loads(json_re))
|
||||
self.assertTrue(isinstance(loaded, RE_TYPE))
|
||||
self.assertEqual(pat, loaded.pattern)
|
||||
self.assertEqual(re.U, loaded.flags)
|
||||
|
||||
loaded = json_util.object_hook(json.loads(json_re), compile_re=False)
|
||||
self.assertTrue(isinstance(loaded, Regex))
|
||||
self.assertEqual(pat, loaded.pattern)
|
||||
self.assertEqual(re.U, loaded.flags)
|
||||
|
||||
def test_regex(self):
|
||||
res = self.round_tripped({"r": re.compile("a*b", re.IGNORECASE)})["r"]
|
||||
self.assertEqual("a*b", res.pattern)
|
||||
@ -95,6 +113,12 @@ class TestJsonUtil(unittest.TestCase):
|
||||
expected_flags = re.U
|
||||
self.assertEqual(expected_flags, res.flags)
|
||||
|
||||
self.assertEqual(
|
||||
Regex('.*', 'ilm'),
|
||||
json_util.loads(
|
||||
'{"r": {"$regex": ".*", "$options": "ilm"}}',
|
||||
compile_re=False)['r'])
|
||||
|
||||
def test_minkey(self):
|
||||
self.round_trip({"m": MinKey()})
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user