diff --git a/bson/__init__.py b/bson/__init__.py index 0f4937d40..8e09b807e 100644 --- a/bson/__init__.py +++ b/bson/__init__.py @@ -17,6 +17,7 @@ import calendar import datetime +import itertools import re import struct import sys @@ -114,23 +115,47 @@ def _get_c_string(data, position, length=None): return value, position - -def _make_c_string(string, check_null=False): +def _make_c_string_check(string): + """Make a 'C' string, checking for embedded NUL characters.""" if isinstance(string, bytes): - if check_null and ZERO in string: + if b"\x00" in string: raise InvalidDocument("BSON keys / regex patterns must not " - "contain a NULL character") + "contain a NUL character") try: string.decode("utf-8") - return string + ZERO + return string + b"\x00" except UnicodeError: raise InvalidStringData("strings in documents must be valid " "UTF-8: %r" % string) else: - if check_null and "\x00" in string: + if "\x00" in string: raise InvalidDocument("BSON keys / regex patterns must not " - "contain a NULL character") - return string.encode("utf-8") + ZERO + "contain a NUL character") + return string.encode("utf-8") + b"\x00" + +def _make_c_string(string): + """Make a 'C' string.""" + if isinstance(string, bytes): + try: + string.decode("utf-8") + return string + b"\x00" + except UnicodeError: + raise InvalidStringData("strings in documents must be valid " + "UTF-8: %r" % string) + else: + return string.encode("utf-8") + b"\x00" + +if PY3: + def _make_name(string): + """Make a 'C' string suitable for a BSON key.""" + # Keys can only be text in python 3. + if "\x00" in string: + raise InvalidDocument("BSON keys / regex patterns must not " + "contain a NUL character") + return string.encode("utf-8") + b"\x00" +else: + # Keys can be unicode or bytes in python 2. + _make_name = _make_c_string_check def _get_number(data, position, as_class, tz_aware, uuid_subtype, compile_re): @@ -346,151 +371,298 @@ if _use_c: _bson_to_dict = _cbson._bson_to_dict -def _element_to_bson(key, value, check_keys, uuid_subtype): - if not isinstance(key, string_type): - raise InvalidDocument("documents must have only string keys, " - "key was %r" % key) +_PACK_FLOAT = struct.Struct(" MAX_INT64 or value < MIN_INT64: - raise OverflowError("BSON can only handle up to 8-byte ints") - if value > MAX_INT32 or value < MIN_INT32: - return BSONLON + name + struct.pack(" MAX_INT64 or value < MIN_INT64: - raise OverflowError("BSON can only handle up to 8-byte ints") - return BSONLON + name + struct.pack("