PYTHON-673 python 2/3 single-source for the bson module

This commit is contained in:
Luke Lovett 2014-04-17 21:29:15 +00:00
parent e0b52baf8e
commit d4a94d30fd
9 changed files with 107 additions and 73 deletions

View File

@ -31,7 +31,13 @@ from bson.errors import (InvalidBSON,
from bson.max_key import MaxKey
from bson.min_key import MinKey
from bson.objectid import ObjectId
from bson.py3compat import b, binary_type
from bson.py3compat import (b,
PY3,
binary_type,
iteritems,
text_type,
string_type,
reraise)
from bson.regex import Regex
from bson.son import SON, RE_TYPE
from bson.timestamp import Timestamp
@ -50,8 +56,8 @@ try:
except ImportError:
_use_uuid = False
PY3 = sys.version_info[0] == 3
if PY3:
long = int
MAX_INT32 = 2147483647
MIN_INT32 = -2147483648
@ -61,33 +67,30 @@ MIN_INT64 = -9223372036854775808
EPOCH_AWARE = datetime.datetime.fromtimestamp(0, utc)
EPOCH_NAIVE = datetime.datetime.utcfromtimestamp(0)
# Create constants compatible with all versions of
# python from 2.4 forward. In 2.x b("foo") is just
# "foo". In 3.x it becomes b"foo".
EMPTY = b("")
ZERO = b("\x00")
ONE = b("\x01")
EMPTY = b""
ZERO = b"\x00"
ONE = b"\x01"
BSONNUM = b("\x01") # Floating point
BSONSTR = b("\x02") # UTF-8 string
BSONOBJ = b("\x03") # Embedded document
BSONARR = b("\x04") # Array
BSONBIN = b("\x05") # Binary
BSONUND = b("\x06") # Undefined
BSONOID = b("\x07") # ObjectId
BSONBOO = b("\x08") # Boolean
BSONDAT = b("\x09") # UTC Datetime
BSONNUL = b("\x0A") # Null
BSONRGX = b("\x0B") # Regex
BSONREF = b("\x0C") # DBRef
BSONCOD = b("\x0D") # Javascript code
BSONSYM = b("\x0E") # Symbol
BSONCWS = b("\x0F") # Javascript code with scope
BSONINT = b("\x10") # 32bit int
BSONTIM = b("\x11") # Timestamp
BSONLON = b("\x12") # 64bit int
BSONMIN = b("\xFF") # Min key
BSONMAX = b("\x7F") # Max key
BSONNUM = b"\x01" # Floating point
BSONSTR = b"\x02" # UTF-8 string
BSONOBJ = b"\x03" # Embedded document
BSONARR = b"\x04" # Array
BSONBIN = b"\x05" # Binary
BSONUND = b"\x06" # Undefined
BSONOID = b"\x07" # ObjectId
BSONBOO = b"\x08" # Boolean
BSONDAT = b"\x09" # UTC Datetime
BSONNUL = b"\x0A" # Null
BSONRGX = b"\x0B" # Regex
BSONREF = b"\x0C" # DBRef
BSONCOD = b"\x0D" # Javascript code
BSONSYM = b"\x0E" # Symbol
BSONCWS = b"\x0F" # Javascript code with scope
BSONINT = b"\x10" # 32bit int
BSONTIM = b"\x11" # Timestamp
BSONLON = b"\x12" # 64bit int
BSONMIN = b"\xFF" # Min key
BSONMAX = b"\x7F" # Max key
def _get_int(data, position, as_class=None,
@ -117,12 +120,7 @@ def _get_c_string(data, position, length=None):
def _make_c_string(string, check_null=False):
if isinstance(string, unicode):
if check_null and "\x00" in string:
raise InvalidDocument("BSON keys / regex patterns must not "
"contain a NULL character")
return string.encode("utf-8") + ZERO
else:
if isinstance(string, bytes):
if check_null and ZERO in string:
raise InvalidDocument("BSON keys / regex patterns must not "
"contain a NULL character")
@ -132,6 +130,11 @@ def _make_c_string(string, check_null=False):
except UnicodeError:
raise InvalidStringData("strings in documents must be valid "
"UTF-8: %r" % string)
else:
if check_null and "\x00" in string:
raise InvalidDocument("BSON keys / regex patterns must not "
"contain a NULL character")
return string.encode("utf-8") + ZERO
def _get_number(data, position, as_class, tz_aware, uuid_subtype, compile_re):
@ -349,7 +352,7 @@ if _use_c:
def _element_to_bson(key, value, check_keys, uuid_subtype):
if not isinstance(key, basestring):
if not isinstance(key, string_type):
raise InvalidDocument("documents must have only string keys, "
"key was %r" % key)
@ -405,7 +408,7 @@ def _element_to_bson(key, value, check_keys, uuid_subtype):
cstring = _make_c_string(value)
length = struct.pack("<i", len(cstring))
return BSONSTR + name + length + cstring
if isinstance(value, unicode):
if isinstance(value, text_type):
cstring = _make_c_string(value)
length = struct.pack("<i", len(cstring))
return BSONSTR + name + length + cstring
@ -429,7 +432,7 @@ def _element_to_bson(key, value, check_keys, uuid_subtype):
return BSONINT + name + struct.pack("<i", value)
# 2to3 will convert long to int here since there is no long in python3.
# That's OK. The previous if block will match instead.
if isinstance(value, long):
if not PY3 and isinstance(value, long):
if value > MAX_INT64 or value < MIN_INT64:
raise OverflowError("BSON can only handle up to 8-byte ints")
return BSONLON + name + struct.pack("<q", value)
@ -479,7 +482,7 @@ def _dict_to_bson(dict, check_keys, uuid_subtype, top_level=True):
if top_level and "_id" in dict:
elements.append(_element_to_bson("_id", dict["_id"],
check_keys, uuid_subtype))
for (key, value) in dict.iteritems():
for (key, value) in iteritems(dict):
if not top_level or key != "_id":
elements.append(_element_to_bson(key, value,
check_keys, uuid_subtype))
@ -537,7 +540,9 @@ def decode_all(data, as_class=dict,
except Exception:
# Change exception type to InvalidBSON but preserve traceback.
exc_type, exc_value, exc_tb = sys.exc_info()
raise InvalidBSON, str(exc_value), exc_tb
reraise(InvalidBSON, exc_value, exc_tb)
if _use_c:
decode_all = _cbson.decode_all

View File

@ -14,6 +14,8 @@
"""Tools for representing JavaScript code in BSON.
"""
from bson.py3compat import string_type
class Code(str):
"""BSON's JavaScript code type.
@ -42,9 +44,9 @@ class Code(str):
_type_marker = 13
def __new__(cls, code, scope=None, **kwargs):
if not isinstance(code, basestring):
if not isinstance(code, string_type):
raise TypeError("code must be an "
"instance of %s" % (basestring.__name__,))
"instance of %s" % (string_type.__name__))
self = str.__new__(cls, code)

View File

@ -16,6 +16,7 @@
from copy import deepcopy
from bson.py3compat import iteritems, string_type
from bson.son import SON
@ -49,12 +50,12 @@ class DBRef(object):
.. mongodoc:: dbrefs
"""
if not isinstance(collection, basestring):
if not isinstance(collection, string_type):
raise TypeError("collection must be an "
"instance of %s" % (basestring.__name__,))
if database is not None and not isinstance(database, basestring):
"instance of %s" % string_type.__name__)
if database is not None and not isinstance(database, string_type):
raise TypeError("database must be an "
"instance of %s" % (basestring.__name__,))
"instance of %s" % string_type.__name__)
self.__collection = collection
self.__id = id
@ -110,7 +111,7 @@ class DBRef(object):
def __repr__(self):
extra = "".join([", %s=%r" % (k, v)
for k, v in self.__kwargs.iteritems()])
for k, v in iteritems(self.__kwargs)])
if self.database is None:
return "DBRef(%r, %r%s)" % (self.collection, self.id, extra)
return "DBRef(%r, %r, %r%s)" % (self.collection, self.id,

View File

@ -77,6 +77,8 @@ import calendar
import datetime
import re
from bson.py3compat import iteritems, text_type
json_lib = True
try:
import json
@ -97,7 +99,7 @@ from bson.objectid import ObjectId
from bson.regex import Regex
from bson.timestamp import Timestamp
from bson.py3compat import PY3, binary_type, string_types
from bson.py3compat import PY3, binary_type, string_type
_RE_OPT_TABLE = {
@ -151,8 +153,8 @@ def _json_convert(obj):
converted into json.
"""
if hasattr(obj, 'iteritems') or hasattr(obj, 'items'): # PY3 support
return SON(((k, _json_convert(v)) for k, v in obj.iteritems()))
elif hasattr(obj, '__iter__') and not isinstance(obj, string_types):
return SON(((k, _json_convert(v)) for k, v in iteritems(obj)))
elif hasattr(obj, '__iter__') and not isinstance(obj, string_type):
return list((_json_convert(v) for v in obj))
try:
return default(obj)
@ -230,7 +232,7 @@ def default(obj):
flags += "u"
if obj.flags & re.VERBOSE:
flags += "x"
if isinstance(obj.pattern, unicode):
if isinstance(obj.pattern, text_type):
pattern = obj.pattern
else:
pattern = obj.pattern.decode('utf-8')

View File

@ -33,12 +33,11 @@ import threading
import time
from bson.errors import InvalidId
from bson.py3compat import (PY3, b, binary_type, text_type,
bytes_from_hex, string_types)
from bson.py3compat import PY3, binary_type, text_type, bytes_from_hex
from bson.tz_util import utc
EMPTY = b("")
ZERO = b("\x00")
EMPTY = b""
ZERO = b"\x00"
def _machine_bytes():
"""Get the machine portion of an ObjectId.
@ -181,7 +180,7 @@ class ObjectId(object):
"""
if isinstance(oid, ObjectId):
self.__id = oid.__id
elif isinstance(oid, string_types):
elif isinstance(oid, (text_type, binary_type)):
if len(oid) == 12:
if isinstance(oid, binary_type):
self.__id = oid

View File

@ -20,7 +20,6 @@ PY3 = sys.version_info[0] == 3
if PY3:
import codecs
from io import BytesIO as StringIO
def b(s):
@ -33,12 +32,23 @@ if PY3:
# See http://python3porting.com/problems.html#nicer-solutions
return codecs.latin_1_encode(s)[0]
def u(s):
# PY3 strings may already be treated as unicode literals
return s
def bytes_from_hex(h):
return bytes.fromhex(h)
def iteritems(d):
return d.items()
def reraise(exctype, value, trace=None):
raise exctype(str(value)).with_traceback(trace)
binary_type = bytes
text_type = str
next_item = "__next__"
text_type = str
string_type = str
integer_types = int
else:
try:
@ -50,13 +60,24 @@ else:
# See comments above. In python 2.x b('foo') is just 'foo'.
return s
import codecs
def u(s):
"""Replacement for unicode literal prefix."""
return unicode(s.replace('\\', '\\\\'), 'unicode_escape')
def bytes_from_hex(h):
return h.decode('hex')
binary_type = str
# 2to3 will convert this to "str". That's okay
# since we won't ever get here under python3.
text_type = unicode
next_item = "next"
def iteritems(d):
return d.iteritems()
string_types = (binary_type, text_type)
# "raise x, y, z" raises SyntaxError in Python 3
exec("""def reraise(exctype, value, trace=None):
raise exctype, str(value), trace
""")
binary_type = str
string_type = basestring
text_type = unicode
integer_types = (int, long)

View File

@ -18,7 +18,7 @@
import re
from bson.son import RE_TYPE
from bson.py3compat import string_types
from bson.py3compat import text_type, binary_type, string_type
def str_flags_to_int(str_flags):
@ -86,11 +86,11 @@ class Regex(object):
- `flags`: (optional) an integer bitmask, or a string of flag
characters like "im" for IGNORECASE and MULTILINE
"""
if not isinstance(pattern, string_types):
if not isinstance(pattern, (text_type, binary_type)):
raise TypeError("pattern must be a string, not %s" % type(pattern))
self.pattern = pattern
if isinstance(flags, string_types):
if isinstance(flags, string_type):
self.flags = str_flags_to_int(flags)
elif isinstance(flags, int):
self.flags = flags

View File

@ -21,6 +21,9 @@ dictionary."""
import copy
import re
from bson.py3compat import iteritems
# This sort of sucks, but seems to be as good as it gets...
# This is essentially the same as re._pattern_type
RE_TYPE = type(re.compile(""))
@ -174,7 +177,7 @@ class SON(dict):
def popitem(self):
try:
k, v = self.iteritems().next()
k, v = next(self.iteritems())
except StopIteration:
raise KeyError('container is empty')
del self[k]
@ -214,7 +217,7 @@ class SON(dict):
return not self == other
def __len__(self):
return len(self.keys())
return len(self.__keys)
def to_dict(self):
"""Convert a SON document to a normal Python dictionary instance.
@ -229,7 +232,7 @@ class SON(dict):
if isinstance(value, SON):
value = dict(value)
if isinstance(value, dict):
for k, v in value.iteritems():
for k, v in iteritems(value):
value[k] = transform_value(v)
return value

View File

@ -18,6 +18,7 @@
import calendar
import datetime
from bson.py3compat import integer_types
from bson.tz_util import utc
UPPERBOUND = 4294967296
@ -53,9 +54,9 @@ class Timestamp(object):
if time.utcoffset() is not None:
time = time - time.utcoffset()
time = int(calendar.timegm(time.timetuple()))
if not isinstance(time, (int, long)):
if not isinstance(time, integer_types):
raise TypeError("time must be an instance of int")
if not isinstance(inc, (int, long)):
if not isinstance(inc, integer_types):
raise TypeError("inc must be an instance of int")
if not 0 <= time < UPPERBOUND:
raise ValueError("time must be contained in [0, 2**32)")