From d625ff2bc9b90a1142afddd0d30dce654d76a45b Mon Sep 17 00:00:00 2001 From: "A. Jesse Jiryu Davis" Date: Fri, 6 Dec 2013 16:44:11 -0500 Subject: [PATCH] Rename Regex.compile() to try_compile() and add a warning, add from_native(), PYTHON-500. --- bson/__init__.py | 2 +- bson/regex.py | 38 ++++++++++++++++++++++++++++++++++++-- test/test_bson.py | 19 ++++++++++++++----- 3 files changed, 51 insertions(+), 8 deletions(-) diff --git a/bson/__init__.py b/bson/__init__.py index c68892247..7c3ac4a7e 100644 --- a/bson/__init__.py +++ b/bson/__init__.py @@ -260,7 +260,7 @@ def _get_regex(data, position, as_class, tz_aware, uuid_subtype, compile_re): bson_flags, position = _get_c_string(data, position) bson_re = Regex(pattern, bson_flags) if compile_re: - return bson_re.compile(), position + return bson_re.try_compile(), position else: return bson_re, position diff --git a/bson/regex.py b/bson/regex.py index f790ddc3c..43d807a35 100644 --- a/bson/regex.py +++ b/bson/regex.py @@ -17,6 +17,7 @@ import re +from bson.son import RE_TYPE from bson.py3compat import string_types @@ -42,6 +43,29 @@ class Regex(object): """BSON regular expression data.""" _type_marker = 11 + @classmethod + def from_native(cls, regex): + """Convert a Python regular expression into a ``Regex`` instance. + + :Parameters: + - `regex`: A regular expression object from ``re.compile()``. + + .. warning:: + Python regular expressions use a different syntax and different + set of flags than MongoDB, which uses `PCRE`_. A regular + expression retrieved from the server may not compile in + Python, or may match a different set of strings in Python than + when used in a MongoDB query. + + .. _PCRE: http://www.pcre.org/ + """ + if not isinstance(regex, RE_TYPE): + raise TypeError( + "regex must be a compiled regular expression, not %s" + % type(regex)) + + return Regex(regex.pattern, regex.flags) + def __init__(self, pattern, flags=0): """BSON regular expression data. @@ -77,7 +101,17 @@ class Regex(object): def __repr__(self): return "Regex(%r, %r)" % (self.pattern, self.flags) - def compile(self): - """Compile this ``Regex`` as a Python regular expression. + def try_compile(self): + """Compile this :class:`Regex` as a Python regular expression. + + .. warning:: + Python regular expressions use a different syntax and different + set of flags than MongoDB, which uses `PCRE`_. A regular + expression retrieved from the server may not compile in + Python, or may match a different set of strings in Python than + when used in a MongoDB query. ``try_compile()`` may raise + :exc:`re.error`. + + .. _PCRE: http://www.pcre.org/ """ return re.compile(self.pattern, self.flags) diff --git a/test/test_bson.py b/test/test_bson.py index cebd675e2..886c23ea2 100644 --- a/test/test_bson.py +++ b/test/test_bson.py @@ -552,15 +552,15 @@ class TestBSON(unittest.TestCase): self.assertEqual(doc1, BSON(doc1_bson).decode(compile_re=False)) # Valid Python regex, with flags. - re2 = re.compile('.*', re.IGNORECASE | re.MULTILINE | re.UNICODE) - bson_re2 = Regex('.*', re.IGNORECASE | re.MULTILINE | re.UNICODE) + re2 = re.compile('.*', re.I | re.L | re.M | re.S | re.U | re.X) + bson_re2 = Regex('.*', re.I | re.L | re.M | re.S | re.U | re.X) doc2_with_re = {'r': re2} doc2_with_bson_re = {'r': bson_re2} doc2_bson = b( - "\x0f\x00\x00\x00" # document length - "\x0br\x00.*\x00imu\x00" # r: regex - "\x00") # document terminator + "\x12\x00\x00\x00" # document length + "\x0br\x00.*\x00ilmsux\x00" # r: regex + "\x00") # document terminator self.assertEqual(doc2_bson, BSON.encode(doc2_with_re)) self.assertEqual(doc2_bson, BSON.encode(doc2_with_bson_re)) @@ -572,6 +572,15 @@ class TestBSON(unittest.TestCase): self.assertEqual( doc2_with_bson_re, BSON(doc2_bson).decode(compile_re=False)) + def test_regex_from_native(self): + self.assertEqual('.*', Regex.from_native(re.compile('.*')).pattern) + self.assertEqual(0, Regex.from_native(re.compile('')).flags) + + regex = re.compile('', re.I | re.L | re.M | re.S | re.U | re.X) + self.assertEqual( + re.I | re.L | re.M | re.S | re.U | re.X, + Regex.from_native(regex).flags) + def test_exception_wrapping(self): # No matter what exception is raised while trying to decode BSON, # the final exception always matches InvalidBSON and the original