diff --git a/.evergreen/config.yml b/.evergreen/config.yml index 6bfa148c0..a4d7f2797 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -1160,7 +1160,10 @@ buildvariants: # OSes that support versions of MongoDB>=2.6 and <3.6 with SSL. - ubuntu-12.04 auth-ssl: "*" - display_name: "${platform} ${auth-ssl}" + # Ubuntu 12 ships Python 2.7.3. We want to test that version with + # and without C extensions + c-extensions: "*" + display_name: "${platform} ${auth-ssl} ${c-extensions}" tasks: - ".3.4" - ".3.2" @@ -1173,7 +1176,10 @@ buildvariants: # OSes that support versions of MongoDB>=2.6 and <4.0 with SSL. - debian71 auth-ssl: "*" - display_name: "${platform} ${auth-ssl}" + # Debian 7 ships Python 2.7.3. We want to test that version with + # and without C extensions + c-extensions: "*" + display_name: "${platform} ${auth-ssl} ${c-extensions}" tasks: - ".3.6" - ".3.4" @@ -1331,6 +1337,21 @@ buildvariants: display_name: "${c-extensions} ${python-version} ${platform} ${auth} ${ssl} ${coverage}" tasks: *all-server-versions +- matrix_name: "tests-python-version-ubuntu1604-without-c-extensions" + matrix_spec: + platform: ubuntu-16.04 + python-version: &openssl-102-plus-pythons ["3.7"] + c-extensions: without-c-extensions + auth-ssl: noauth-nossl + display_name: "${c-extensions} ${python-version} ${platform} ${auth} ${ssl} ${coverage}" + tasks: + - ".latest" + - ".4.2" + - ".4.0" + - ".3.6" + - ".3.4" + - ".3.2" + - matrix_name: "tests-python-version-ubuntu16-compression" matrix_spec: # Ubuntu 16.04 images have libsnappy-dev installed, and provides OpenSSL 1.0.2 for testing Python 3.7 diff --git a/bson/__init__.py b/bson/__init__.py index c802cbfcb..892246a7c 100644 --- a/bson/__init__.py +++ b/bson/__init__.py @@ -67,6 +67,7 @@ type. import calendar import datetime import itertools +import platform import re import struct import sys @@ -137,60 +138,98 @@ BSONMIN = b"\xFF" # Min key BSONMAX = b"\x7F" # Max key -_UNPACK_FLOAT = struct.Struct("= obj_end: raise InvalidBSON("invalid object length") @@ -200,14 +239,14 @@ def _get_object_size(data, position, obj_end): return obj_size, end -def _get_object(data, position, obj_end, opts, dummy): +def _get_object(data, view, position, obj_end, opts, dummy): """Decode a BSON subdocument to opts.document_class or bson.dbref.DBRef.""" obj_size, end = _get_object_size(data, position, obj_end) if _raw_document_class(opts.document_class): return (opts.document_class(data[position:end + 1], opts), position + obj_size) - obj = _elements_to_dict(data, position + 4, end, opts) + obj = _elements_to_dict(data, view, position + 4, end, opts) position += obj_size if "$ref" in obj: @@ -216,11 +255,11 @@ def _get_object(data, position, obj_end, opts, dummy): return obj, position -def _get_array(data, position, obj_end, opts, element_name): +def _get_array(data, view, position, obj_end, opts, element_name): """Decode a BSON array to python list.""" - size = _UNPACK_INT(data[position:position + 4])[0] + size = _UNPACK_INT_FROM(data, position)[0] end = position + size - 1 - if data[end:end + 1] != b"\x00": + if data[end] != _OBJEND: raise InvalidBSON("bad eoo") position += 4 @@ -234,12 +273,12 @@ def _get_array(data, position, obj_end, opts, element_name): decoder_map = opts.type_registry._decoder_map while position < end: - element_type = data[position:position + 1] + element_type = data[position] # Just skip the keys. position = index(b'\x00', position) + 1 try: value, position = getter[element_type]( - data, position, obj_end, opts, element_name) + data, view, position, obj_end, opts, element_name) except KeyError: _raise_unknown_type(element_type, element_name) @@ -255,12 +294,12 @@ def _get_array(data, position, obj_end, opts, element_name): return result, position + 1 -def _get_binary(data, position, obj_end, opts, dummy1): +def _get_binary(data, view, position, obj_end, opts, dummy1): """Decode a BSON binary to bson.binary.Binary or python UUID.""" - length, subtype = _UNPACK_LENGTH_SUBTYPE(data[position:position + 5]) + length, subtype = _UNPACK_LENGTH_SUBTYPE_FROM(data, position) position += 5 if subtype == 2: - length2 = _UNPACK_INT(data[position:position + 4])[0] + length2 = _UNPACK_INT_FROM(data, position)[0] position += 4 if length2 != length - 4: raise InvalidBSON("invalid binary (st 2) - lengths don't match!") @@ -291,13 +330,13 @@ def _get_binary(data, position, obj_end, opts, dummy1): return value, end -def _get_oid(data, position, dummy0, dummy1, dummy2): +def _get_oid(data, view, position, dummy0, dummy1, dummy2): """Decode a BSON ObjectId to bson.objectid.ObjectId.""" end = position + 12 return ObjectId(data[position:end]), end -def _get_boolean(data, position, dummy0, dummy1, dummy2): +def _get_boolean(data, view, position, dummy0, dummy1, dummy2): """Decode a BSON true/false to python True/False.""" end = position + 1 boolean_byte = data[position:end] @@ -308,60 +347,57 @@ def _get_boolean(data, position, dummy0, dummy1, dummy2): raise InvalidBSON('invalid boolean value: %r' % boolean_byte) -def _get_date(data, position, dummy0, opts, dummy1): +def _get_date(data, view, position, dummy0, opts, dummy1): """Decode a BSON datetime to python datetime.datetime.""" - end = position + 8 - millis = _UNPACK_LONG(data[position:end])[0] - return _millis_to_datetime(millis, opts), end + return _millis_to_datetime( + _UNPACK_LONG_FROM(data, position)[0], opts), position + 8 -def _get_code(data, position, obj_end, opts, element_name): +def _get_code(data, view, position, obj_end, opts, element_name): """Decode a BSON code to bson.code.Code.""" - code, position = _get_string(data, position, obj_end, opts, element_name) + code, position = _get_string(data, view, position, obj_end, opts, element_name) return Code(code), position -def _get_code_w_scope(data, position, obj_end, opts, element_name): +def _get_code_w_scope(data, view, position, obj_end, opts, element_name): """Decode a BSON code_w_scope to bson.code.Code.""" - code_end = position + _UNPACK_INT(data[position:position + 4])[0] + code_end = position + _UNPACK_INT_FROM(data, position)[0] code, position = _get_string( - data, position + 4, code_end, opts, element_name) - scope, position = _get_object(data, position, code_end, opts, element_name) + data, view, position + 4, code_end, opts, element_name) + scope, position = _get_object(data, view, position, code_end, opts, element_name) if position != code_end: raise InvalidBSON('scope outside of javascript code boundaries') return Code(code, scope), position -def _get_regex(data, position, dummy0, opts, dummy1): +def _get_regex(data, view, position, dummy0, opts, dummy1): """Decode a BSON regex to bson.regex.Regex or a python pattern object.""" - pattern, position = _get_c_string(data, position, opts) - bson_flags, position = _get_c_string(data, position, opts) + pattern, position = _get_c_string(data, view, position, opts) + bson_flags, position = _get_c_string(data, view, position, opts) bson_re = Regex(pattern, bson_flags) return bson_re, position -def _get_ref(data, position, obj_end, opts, element_name): +def _get_ref(data, view, position, obj_end, opts, element_name): """Decode (deprecated) BSON DBPointer to bson.dbref.DBRef.""" collection, position = _get_string( - data, position, obj_end, opts, element_name) - oid, position = _get_oid(data, position, obj_end, opts, element_name) + data, view, position, obj_end, opts, element_name) + oid, position = _get_oid(data, view, position, obj_end, opts, element_name) return DBRef(collection, oid), position -def _get_timestamp(data, position, dummy0, dummy1, dummy2): +def _get_timestamp(data, view, position, dummy0, dummy1, dummy2): """Decode a BSON timestamp to bson.timestamp.Timestamp.""" - end = position + 8 - inc, timestamp = _UNPACK_TIMESTAMP(data[position:end]) - return Timestamp(timestamp, inc), end + inc, timestamp = _UNPACK_TIMESTAMP_FROM(data, position) + return Timestamp(timestamp, inc), position + 8 -def _get_int64(data, position, dummy0, dummy1, dummy2): +def _get_int64(data, view, position, dummy0, dummy1, dummy2): """Decode a BSON int64 to bson.int64.Int64.""" - end = position + 8 - return Int64(_UNPACK_LONG(data[position:end])[0]), end + return Int64(_UNPACK_LONG_FROM(data, position)[0]), position + 8 -def _get_decimal128(data, position, dummy0, dummy1, dummy2): +def _get_decimal128(data, view, position, dummy0, dummy1, dummy2): """Decode a BSON decimal128 to bson.decimal128.Decimal128.""" end = position + 16 return Decimal128.from_bid(data[position:end]), end @@ -369,62 +405,70 @@ def _get_decimal128(data, position, dummy0, dummy1, dummy2): # Each decoder function's signature is: # - data: bytes +# - view: memoryview that references `data` # - position: int, beginning of object in 'data' to decode # - obj_end: int, end of object to decode in 'data' if variable-length type # - opts: a CodecOptions _ELEMENT_GETTER = { - BSONNUM: _get_float, - BSONSTR: _get_string, - BSONOBJ: _get_object, - BSONARR: _get_array, - BSONBIN: _get_binary, - BSONUND: lambda v, w, x, y, z: (None, w), # Deprecated undefined - BSONOID: _get_oid, - BSONBOO: _get_boolean, - BSONDAT: _get_date, - BSONNUL: lambda v, w, x, y, z: (None, w), - BSONRGX: _get_regex, - BSONREF: _get_ref, # Deprecated DBPointer - BSONCOD: _get_code, - BSONSYM: _get_string, # Deprecated symbol - BSONCWS: _get_code_w_scope, - BSONINT: _get_int, - BSONTIM: _get_timestamp, - BSONLON: _get_int64, - BSONDEC: _get_decimal128, - BSONMIN: lambda v, w, x, y, z: (MinKey(), w), - BSONMAX: lambda v, w, x, y, z: (MaxKey(), w)} + _maybe_ord(BSONNUM): _get_float, + _maybe_ord(BSONSTR): _get_string, + _maybe_ord(BSONOBJ): _get_object, + _maybe_ord(BSONARR): _get_array, + _maybe_ord(BSONBIN): _get_binary, + _maybe_ord(BSONUND): lambda u, v, w, x, y, z: (None, w), # Deprecated undefined + _maybe_ord(BSONOID): _get_oid, + _maybe_ord(BSONBOO): _get_boolean, + _maybe_ord(BSONDAT): _get_date, + _maybe_ord(BSONNUL): lambda u, v, w, x, y, z: (None, w), + _maybe_ord(BSONRGX): _get_regex, + _maybe_ord(BSONREF): _get_ref, # Deprecated DBPointer + _maybe_ord(BSONCOD): _get_code, + _maybe_ord(BSONSYM): _get_string, # Deprecated symbol + _maybe_ord(BSONCWS): _get_code_w_scope, + _maybe_ord(BSONINT): _get_int, + _maybe_ord(BSONTIM): _get_timestamp, + _maybe_ord(BSONLON): _get_int64, + _maybe_ord(BSONDEC): _get_decimal128, + _maybe_ord(BSONMIN): lambda u, v, w, x, y, z: (MinKey(), w), + _maybe_ord(BSONMAX): lambda u, v, w, x, y, z: (MaxKey(), w)} -def _element_to_dict(data, position, obj_end, opts): - """Decode a single key, value pair.""" - element_type = data[position:position + 1] - position += 1 - element_name, position = _get_c_string(data, position, opts) - try: - value, position = _ELEMENT_GETTER[element_type](data, position, - obj_end, opts, - element_name) - except KeyError: - _raise_unknown_type(element_type, element_name) - - if opts.type_registry._decoder_map: - custom_decoder = opts.type_registry._decoder_map.get(type(value)) - if custom_decoder is not None: - value = custom_decoder(value) - - return element_name, value, position if _USE_C: - _element_to_dict = _cbson._element_to_dict + def _element_to_dict(data, view, position, obj_end, opts): + return _cbson._element_to_dict(data, position, obj_end, opts) +else: + def _element_to_dict(data, view, position, obj_end, opts): + """Decode a single key, value pair.""" + element_type = data[position] + position += 1 + element_name, position = _get_c_string(data, view, position, opts) + try: + value, position = _ELEMENT_GETTER[element_type](data, view, position, + obj_end, opts, + element_name) + except KeyError: + _raise_unknown_type(element_type, element_name) + + if opts.type_registry._decoder_map: + custom_decoder = opts.type_registry._decoder_map.get(type(value)) + if custom_decoder is not None: + value = custom_decoder(value) + + return element_name, value, position -def _elements_to_dict(data, position, obj_end, opts, result=None): +def _raw_to_dict(data, position, obj_end, opts, result): + data, view = get_data_and_view(data) + return _elements_to_dict(data, view, position, obj_end, opts, result) + + +def _elements_to_dict(data, view, position, obj_end, opts, result=None): """Decode a BSON document into result.""" if result is None: result = opts.document_class() end = obj_end - 1 while position < end: - key, value, position = _element_to_dict(data, position, obj_end, opts) + key, value, position = _element_to_dict(data, view, position, obj_end, opts) result[key] = value if position != obj_end: raise InvalidBSON('bad object or element length') @@ -433,11 +477,12 @@ def _elements_to_dict(data, position, obj_end, opts, result=None): def _bson_to_dict(data, opts): """Decode a BSON string to document_class.""" + data, view = get_data_and_view(data) try: if _raw_document_class(opts.document_class): return opts.document_class(data, opts) _, end = _get_object_size(data, 0, len(data)) - return _elements_to_dict(data, 4, end, opts) + return _elements_to_dict(data, view, 4, end, opts) except InvalidBSON: raise except Exception: @@ -605,8 +650,7 @@ def _encode_uuid(name, value, dummy, opts): # Microsoft GUID representation. return b"\x05" + name + b'\x10\x00\x00\x00\x03' + value.bytes_le # New - else: - return b"\x05" + name + b'\x10\x00\x00\x00\x04' + value.bytes + return b"\x05" + name + b'\x10\x00\x00\x00\x04' + value.bytes def _encode_objectid(name, value, dummy0, dummy1): @@ -859,7 +903,7 @@ def _millis_to_datetime(millis, opts): micros = diff * 1000 if opts.tz_aware: dt = EPOCH_AWARE + datetime.timedelta(seconds=seconds, - microseconds=micros) + microseconds=micros) if opts.tzinfo: dt = dt.astimezone(opts.tzinfo) return dt @@ -883,14 +927,17 @@ _CODEC_OPTIONS_TYPE_ERROR = TypeError( def decode_all(data, codec_options=DEFAULT_CODEC_OPTIONS): """Decode BSON data to multiple documents. - `data` must be a string of concatenated, valid, BSON-encoded - documents. + `data` must be a bytes-like object implementing the buffer protocol that + provides concatenated, valid, BSON-encoded documents. :Parameters: - `data`: BSON data - `codec_options` (optional): An instance of :class:`~bson.codec_options.CodecOptions`. + .. versionchanges:: 3.9 + Supports bytes-like objects that implement the buffer protocol. + .. versionchanged:: 3.0 Removed `compile_re` option: PyMongo now always represents BSON regular expressions as :class:`~bson.regex.Regex` objects. Use @@ -909,20 +956,22 @@ def decode_all(data, codec_options=DEFAULT_CODEC_OPTIONS): .. _PYTHON-500: https://jira.mongodb.org/browse/PYTHON-500 """ + data, view = get_data_and_view(data) if not isinstance(codec_options, CodecOptions): raise _CODEC_OPTIONS_TYPE_ERROR + data_len = len(data) docs = [] position = 0 - end = len(data) - 1 + end = data_len - 1 use_raw = _raw_document_class(codec_options.document_class) try: while position < end: - obj_size = _UNPACK_INT(data[position:position + 4])[0] - if len(data) - position < obj_size: + obj_size = _UNPACK_INT_FROM(data, position)[0] + if data_len - position < obj_size: raise InvalidBSON("invalid object size") obj_end = position + obj_size - 1 - if data[obj_end:position + obj_size] != b"\x00": + if data[obj_end] != _OBJEND: raise InvalidBSON("bad eoo") if use_raw: docs.append( @@ -930,6 +979,7 @@ def decode_all(data, codec_options=DEFAULT_CODEC_OPTIONS): data[position:obj_end + 1], codec_options)) else: docs.append(_elements_to_dict(data, + view, position + 4, obj_end, codec_options)) @@ -1029,7 +1079,7 @@ def decode_iter(data, codec_options=DEFAULT_CODEC_OPTIONS): position = 0 end = len(data) - 1 while position < end: - obj_size = _UNPACK_INT(data[position:position + 4])[0] + obj_size = _UNPACK_INT_FROM(data, position)[0] elements = data[position:position + obj_size] position += obj_size @@ -1056,11 +1106,11 @@ def decode_file_iter(file_obj, codec_options=DEFAULT_CODEC_OPTIONS): while True: # Read size of next object. size_data = file_obj.read(4) - if len(size_data) == 0: + if not size_data: break # Finished with file normaly. elif len(size_data) != 4: raise InvalidBSON("cut off in middle of objsize") - obj_size = _UNPACK_INT(size_data)[0] - 4 + obj_size = _UNPACK_INT_FROM(size_data, 0)[0] - 4 elements = size_data + file_obj.read(obj_size) yield _bson_to_dict(elements, codec_options) diff --git a/bson/_cbsonmodule.c b/bson/_cbsonmodule.c index 8efaf820c..e82e1bb34 100644 --- a/bson/_cbsonmodule.c +++ b/bson/_cbsonmodule.c @@ -2757,6 +2757,7 @@ static int _element_to_dict(PyObject* self, const char* string, } static PyObject* _cbson_element_to_dict(PyObject* self, PyObject* args) { + /* TODO: Support buffer protocol */ char* string; PyObject* bson; codec_options_t options; diff --git a/bson/raw_bson.py b/bson/raw_bson.py index 429b2acc3..fdf93081c 100644 --- a/bson/raw_bson.py +++ b/bson/raw_bson.py @@ -15,7 +15,7 @@ """Tools for representing raw BSON documents. """ -from bson import _elements_to_dict, _get_object_size +from bson import _raw_to_dict, _get_object_size from bson.py3compat import abc, iteritems from bson.codec_options import ( DEFAULT_CODEC_OPTIONS as DEFAULT, _RAW_BSON_DOCUMENT_MARKER) @@ -95,7 +95,7 @@ class RawBSONDocument(abc.Mapping): # We already validated the object's size when this document was # created, so no need to do that again. # Use SON to preserve ordering of elements. - self.__inflated_doc = _elements_to_dict( + self.__inflated_doc = _raw_to_dict( self.__raw, 4, len(self.__raw)-1, self.__codec_options, SON()) return self.__inflated_doc diff --git a/test/test_bson.py b/test/test_bson.py index f03f4092c..bea144f18 100644 --- a/test/test_bson.py +++ b/test/test_bson.py @@ -283,6 +283,21 @@ class TestBSON(unittest.TestCase): b"\x6f\x20\x77\x6F\x72\x6C\x64\x00\x00" b"\x05\x00\x00\x00\x00")))) + def test_buffer_protocol(self): + docs = [{'foo': 'bar'}, {}] + bs = b"".join(map(BSON.encode, docs)) + self.assertEqual(docs, decode_all(bytearray(bs))) + self.assertEqual(docs, decode_all(memoryview(bs))) + if PY3: + import array + import mmap + self.assertEqual(docs, decode_all(array.array('B', bs))) + with mmap.mmap(-1, len(bs)) as mm: + mm.write(bs) + mm.seek(0) + self.assertEqual(docs, decode_all(mm)) + + def test_invalid_decodes(self): # Invalid object size (not enough bytes in document for even # an object size of first object. diff --git a/test/test_raw_bson.py b/test/test_raw_bson.py index f262d003c..82a4a331d 100644 --- a/test/test_raw_bson.py +++ b/test/test_raw_bson.py @@ -22,9 +22,10 @@ from bson.errors import InvalidBSON from bson.raw_bson import RawBSONDocument, DEFAULT_RAW_BSON_OPTIONS from bson.son import SON from test import client_context, unittest +from test.test_client import IntegrationTest -class TestRawBSONDocument(unittest.TestCase): +class TestRawBSONDocument(IntegrationTest): # {u'_id': ObjectId('556df68b6e32ab21a95e0785'), # u'name': u'Sherlock', @@ -38,6 +39,7 @@ class TestRawBSONDocument(unittest.TestCase): @classmethod def setUpClass(cls): + super(TestRawBSONDocument, cls).setUpClass() cls.client = client_context.client def tearDown(self): @@ -179,4 +181,4 @@ class TestRawBSONDocument(unittest.TestCase): rawdoc = RawBSONDocument(BSON.encode(SON(keyvaluepairs))) for rkey, elt in zip(rawdoc, keyvaluepairs): - self.assertEqual(rkey, elt[0]) \ No newline at end of file + self.assertEqual(rkey, elt[0])