PYTHON-673 python 2/3 single-source for the bson module
This commit is contained in:
parent
e0b52baf8e
commit
d4a94d30fd
@ -31,7 +31,13 @@ from bson.errors import (InvalidBSON,
|
||||
from bson.max_key import MaxKey
|
||||
from bson.min_key import MinKey
|
||||
from bson.objectid import ObjectId
|
||||
from bson.py3compat import b, binary_type
|
||||
from bson.py3compat import (b,
|
||||
PY3,
|
||||
binary_type,
|
||||
iteritems,
|
||||
text_type,
|
||||
string_type,
|
||||
reraise)
|
||||
from bson.regex import Regex
|
||||
from bson.son import SON, RE_TYPE
|
||||
from bson.timestamp import Timestamp
|
||||
@ -50,8 +56,8 @@ try:
|
||||
except ImportError:
|
||||
_use_uuid = False
|
||||
|
||||
PY3 = sys.version_info[0] == 3
|
||||
|
||||
if PY3:
|
||||
long = int
|
||||
|
||||
MAX_INT32 = 2147483647
|
||||
MIN_INT32 = -2147483648
|
||||
@ -61,33 +67,30 @@ MIN_INT64 = -9223372036854775808
|
||||
EPOCH_AWARE = datetime.datetime.fromtimestamp(0, utc)
|
||||
EPOCH_NAIVE = datetime.datetime.utcfromtimestamp(0)
|
||||
|
||||
# Create constants compatible with all versions of
|
||||
# python from 2.4 forward. In 2.x b("foo") is just
|
||||
# "foo". In 3.x it becomes b"foo".
|
||||
EMPTY = b("")
|
||||
ZERO = b("\x00")
|
||||
ONE = b("\x01")
|
||||
EMPTY = b""
|
||||
ZERO = b"\x00"
|
||||
ONE = b"\x01"
|
||||
|
||||
BSONNUM = b("\x01") # Floating point
|
||||
BSONSTR = b("\x02") # UTF-8 string
|
||||
BSONOBJ = b("\x03") # Embedded document
|
||||
BSONARR = b("\x04") # Array
|
||||
BSONBIN = b("\x05") # Binary
|
||||
BSONUND = b("\x06") # Undefined
|
||||
BSONOID = b("\x07") # ObjectId
|
||||
BSONBOO = b("\x08") # Boolean
|
||||
BSONDAT = b("\x09") # UTC Datetime
|
||||
BSONNUL = b("\x0A") # Null
|
||||
BSONRGX = b("\x0B") # Regex
|
||||
BSONREF = b("\x0C") # DBRef
|
||||
BSONCOD = b("\x0D") # Javascript code
|
||||
BSONSYM = b("\x0E") # Symbol
|
||||
BSONCWS = b("\x0F") # Javascript code with scope
|
||||
BSONINT = b("\x10") # 32bit int
|
||||
BSONTIM = b("\x11") # Timestamp
|
||||
BSONLON = b("\x12") # 64bit int
|
||||
BSONMIN = b("\xFF") # Min key
|
||||
BSONMAX = b("\x7F") # Max key
|
||||
BSONNUM = b"\x01" # Floating point
|
||||
BSONSTR = b"\x02" # UTF-8 string
|
||||
BSONOBJ = b"\x03" # Embedded document
|
||||
BSONARR = b"\x04" # Array
|
||||
BSONBIN = b"\x05" # Binary
|
||||
BSONUND = b"\x06" # Undefined
|
||||
BSONOID = b"\x07" # ObjectId
|
||||
BSONBOO = b"\x08" # Boolean
|
||||
BSONDAT = b"\x09" # UTC Datetime
|
||||
BSONNUL = b"\x0A" # Null
|
||||
BSONRGX = b"\x0B" # Regex
|
||||
BSONREF = b"\x0C" # DBRef
|
||||
BSONCOD = b"\x0D" # Javascript code
|
||||
BSONSYM = b"\x0E" # Symbol
|
||||
BSONCWS = b"\x0F" # Javascript code with scope
|
||||
BSONINT = b"\x10" # 32bit int
|
||||
BSONTIM = b"\x11" # Timestamp
|
||||
BSONLON = b"\x12" # 64bit int
|
||||
BSONMIN = b"\xFF" # Min key
|
||||
BSONMAX = b"\x7F" # Max key
|
||||
|
||||
|
||||
def _get_int(data, position, as_class=None,
|
||||
@ -117,12 +120,7 @@ def _get_c_string(data, position, length=None):
|
||||
|
||||
|
||||
def _make_c_string(string, check_null=False):
|
||||
if isinstance(string, unicode):
|
||||
if check_null and "\x00" in string:
|
||||
raise InvalidDocument("BSON keys / regex patterns must not "
|
||||
"contain a NULL character")
|
||||
return string.encode("utf-8") + ZERO
|
||||
else:
|
||||
if isinstance(string, bytes):
|
||||
if check_null and ZERO in string:
|
||||
raise InvalidDocument("BSON keys / regex patterns must not "
|
||||
"contain a NULL character")
|
||||
@ -132,6 +130,11 @@ def _make_c_string(string, check_null=False):
|
||||
except UnicodeError:
|
||||
raise InvalidStringData("strings in documents must be valid "
|
||||
"UTF-8: %r" % string)
|
||||
else:
|
||||
if check_null and "\x00" in string:
|
||||
raise InvalidDocument("BSON keys / regex patterns must not "
|
||||
"contain a NULL character")
|
||||
return string.encode("utf-8") + ZERO
|
||||
|
||||
|
||||
def _get_number(data, position, as_class, tz_aware, uuid_subtype, compile_re):
|
||||
@ -349,7 +352,7 @@ if _use_c:
|
||||
|
||||
|
||||
def _element_to_bson(key, value, check_keys, uuid_subtype):
|
||||
if not isinstance(key, basestring):
|
||||
if not isinstance(key, string_type):
|
||||
raise InvalidDocument("documents must have only string keys, "
|
||||
"key was %r" % key)
|
||||
|
||||
@ -405,7 +408,7 @@ def _element_to_bson(key, value, check_keys, uuid_subtype):
|
||||
cstring = _make_c_string(value)
|
||||
length = struct.pack("<i", len(cstring))
|
||||
return BSONSTR + name + length + cstring
|
||||
if isinstance(value, unicode):
|
||||
if isinstance(value, text_type):
|
||||
cstring = _make_c_string(value)
|
||||
length = struct.pack("<i", len(cstring))
|
||||
return BSONSTR + name + length + cstring
|
||||
@ -429,7 +432,7 @@ def _element_to_bson(key, value, check_keys, uuid_subtype):
|
||||
return BSONINT + name + struct.pack("<i", value)
|
||||
# 2to3 will convert long to int here since there is no long in python3.
|
||||
# That's OK. The previous if block will match instead.
|
||||
if isinstance(value, long):
|
||||
if not PY3 and isinstance(value, long):
|
||||
if value > MAX_INT64 or value < MIN_INT64:
|
||||
raise OverflowError("BSON can only handle up to 8-byte ints")
|
||||
return BSONLON + name + struct.pack("<q", value)
|
||||
@ -479,7 +482,7 @@ def _dict_to_bson(dict, check_keys, uuid_subtype, top_level=True):
|
||||
if top_level and "_id" in dict:
|
||||
elements.append(_element_to_bson("_id", dict["_id"],
|
||||
check_keys, uuid_subtype))
|
||||
for (key, value) in dict.iteritems():
|
||||
for (key, value) in iteritems(dict):
|
||||
if not top_level or key != "_id":
|
||||
elements.append(_element_to_bson(key, value,
|
||||
check_keys, uuid_subtype))
|
||||
@ -537,7 +540,9 @@ def decode_all(data, as_class=dict,
|
||||
except Exception:
|
||||
# Change exception type to InvalidBSON but preserve traceback.
|
||||
exc_type, exc_value, exc_tb = sys.exc_info()
|
||||
raise InvalidBSON, str(exc_value), exc_tb
|
||||
reraise(InvalidBSON, exc_value, exc_tb)
|
||||
|
||||
|
||||
if _use_c:
|
||||
decode_all = _cbson.decode_all
|
||||
|
||||
|
||||
@ -14,6 +14,8 @@
|
||||
|
||||
"""Tools for representing JavaScript code in BSON.
|
||||
"""
|
||||
from bson.py3compat import string_type
|
||||
|
||||
|
||||
class Code(str):
|
||||
"""BSON's JavaScript code type.
|
||||
@ -42,9 +44,9 @@ class Code(str):
|
||||
_type_marker = 13
|
||||
|
||||
def __new__(cls, code, scope=None, **kwargs):
|
||||
if not isinstance(code, basestring):
|
||||
if not isinstance(code, string_type):
|
||||
raise TypeError("code must be an "
|
||||
"instance of %s" % (basestring.__name__,))
|
||||
"instance of %s" % (string_type.__name__))
|
||||
|
||||
self = str.__new__(cls, code)
|
||||
|
||||
|
||||
@ -16,6 +16,7 @@
|
||||
|
||||
from copy import deepcopy
|
||||
|
||||
from bson.py3compat import iteritems, string_type
|
||||
from bson.son import SON
|
||||
|
||||
|
||||
@ -49,12 +50,12 @@ class DBRef(object):
|
||||
|
||||
.. mongodoc:: dbrefs
|
||||
"""
|
||||
if not isinstance(collection, basestring):
|
||||
if not isinstance(collection, string_type):
|
||||
raise TypeError("collection must be an "
|
||||
"instance of %s" % (basestring.__name__,))
|
||||
if database is not None and not isinstance(database, basestring):
|
||||
"instance of %s" % string_type.__name__)
|
||||
if database is not None and not isinstance(database, string_type):
|
||||
raise TypeError("database must be an "
|
||||
"instance of %s" % (basestring.__name__,))
|
||||
"instance of %s" % string_type.__name__)
|
||||
|
||||
self.__collection = collection
|
||||
self.__id = id
|
||||
@ -110,7 +111,7 @@ class DBRef(object):
|
||||
|
||||
def __repr__(self):
|
||||
extra = "".join([", %s=%r" % (k, v)
|
||||
for k, v in self.__kwargs.iteritems()])
|
||||
for k, v in iteritems(self.__kwargs)])
|
||||
if self.database is None:
|
||||
return "DBRef(%r, %r%s)" % (self.collection, self.id, extra)
|
||||
return "DBRef(%r, %r, %r%s)" % (self.collection, self.id,
|
||||
|
||||
@ -77,6 +77,8 @@ import calendar
|
||||
import datetime
|
||||
import re
|
||||
|
||||
from bson.py3compat import iteritems, text_type
|
||||
|
||||
json_lib = True
|
||||
try:
|
||||
import json
|
||||
@ -97,7 +99,7 @@ from bson.objectid import ObjectId
|
||||
from bson.regex import Regex
|
||||
from bson.timestamp import Timestamp
|
||||
|
||||
from bson.py3compat import PY3, binary_type, string_types
|
||||
from bson.py3compat import PY3, binary_type, string_type
|
||||
|
||||
|
||||
_RE_OPT_TABLE = {
|
||||
@ -151,8 +153,8 @@ def _json_convert(obj):
|
||||
converted into json.
|
||||
"""
|
||||
if hasattr(obj, 'iteritems') or hasattr(obj, 'items'): # PY3 support
|
||||
return SON(((k, _json_convert(v)) for k, v in obj.iteritems()))
|
||||
elif hasattr(obj, '__iter__') and not isinstance(obj, string_types):
|
||||
return SON(((k, _json_convert(v)) for k, v in iteritems(obj)))
|
||||
elif hasattr(obj, '__iter__') and not isinstance(obj, string_type):
|
||||
return list((_json_convert(v) for v in obj))
|
||||
try:
|
||||
return default(obj)
|
||||
@ -230,7 +232,7 @@ def default(obj):
|
||||
flags += "u"
|
||||
if obj.flags & re.VERBOSE:
|
||||
flags += "x"
|
||||
if isinstance(obj.pattern, unicode):
|
||||
if isinstance(obj.pattern, text_type):
|
||||
pattern = obj.pattern
|
||||
else:
|
||||
pattern = obj.pattern.decode('utf-8')
|
||||
|
||||
@ -33,12 +33,11 @@ import threading
|
||||
import time
|
||||
|
||||
from bson.errors import InvalidId
|
||||
from bson.py3compat import (PY3, b, binary_type, text_type,
|
||||
bytes_from_hex, string_types)
|
||||
from bson.py3compat import PY3, binary_type, text_type, bytes_from_hex
|
||||
from bson.tz_util import utc
|
||||
|
||||
EMPTY = b("")
|
||||
ZERO = b("\x00")
|
||||
EMPTY = b""
|
||||
ZERO = b"\x00"
|
||||
|
||||
def _machine_bytes():
|
||||
"""Get the machine portion of an ObjectId.
|
||||
@ -181,7 +180,7 @@ class ObjectId(object):
|
||||
"""
|
||||
if isinstance(oid, ObjectId):
|
||||
self.__id = oid.__id
|
||||
elif isinstance(oid, string_types):
|
||||
elif isinstance(oid, (text_type, binary_type)):
|
||||
if len(oid) == 12:
|
||||
if isinstance(oid, binary_type):
|
||||
self.__id = oid
|
||||
|
||||
@ -20,7 +20,6 @@ PY3 = sys.version_info[0] == 3
|
||||
|
||||
if PY3:
|
||||
import codecs
|
||||
|
||||
from io import BytesIO as StringIO
|
||||
|
||||
def b(s):
|
||||
@ -33,12 +32,23 @@ if PY3:
|
||||
# See http://python3porting.com/problems.html#nicer-solutions
|
||||
return codecs.latin_1_encode(s)[0]
|
||||
|
||||
def u(s):
|
||||
# PY3 strings may already be treated as unicode literals
|
||||
return s
|
||||
|
||||
def bytes_from_hex(h):
|
||||
return bytes.fromhex(h)
|
||||
|
||||
def iteritems(d):
|
||||
return d.items()
|
||||
|
||||
def reraise(exctype, value, trace=None):
|
||||
raise exctype(str(value)).with_traceback(trace)
|
||||
|
||||
binary_type = bytes
|
||||
text_type = str
|
||||
next_item = "__next__"
|
||||
text_type = str
|
||||
string_type = str
|
||||
integer_types = int
|
||||
|
||||
else:
|
||||
try:
|
||||
@ -50,13 +60,24 @@ else:
|
||||
# See comments above. In python 2.x b('foo') is just 'foo'.
|
||||
return s
|
||||
|
||||
import codecs
|
||||
|
||||
def u(s):
|
||||
"""Replacement for unicode literal prefix."""
|
||||
return unicode(s.replace('\\', '\\\\'), 'unicode_escape')
|
||||
|
||||
def bytes_from_hex(h):
|
||||
return h.decode('hex')
|
||||
|
||||
binary_type = str
|
||||
# 2to3 will convert this to "str". That's okay
|
||||
# since we won't ever get here under python3.
|
||||
text_type = unicode
|
||||
next_item = "next"
|
||||
def iteritems(d):
|
||||
return d.iteritems()
|
||||
|
||||
string_types = (binary_type, text_type)
|
||||
# "raise x, y, z" raises SyntaxError in Python 3
|
||||
exec("""def reraise(exctype, value, trace=None):
|
||||
raise exctype, str(value), trace
|
||||
""")
|
||||
|
||||
binary_type = str
|
||||
string_type = basestring
|
||||
text_type = unicode
|
||||
integer_types = (int, long)
|
||||
|
||||
@ -18,7 +18,7 @@
|
||||
import re
|
||||
|
||||
from bson.son import RE_TYPE
|
||||
from bson.py3compat import string_types
|
||||
from bson.py3compat import text_type, binary_type, string_type
|
||||
|
||||
|
||||
def str_flags_to_int(str_flags):
|
||||
@ -86,11 +86,11 @@ class Regex(object):
|
||||
- `flags`: (optional) an integer bitmask, or a string of flag
|
||||
characters like "im" for IGNORECASE and MULTILINE
|
||||
"""
|
||||
if not isinstance(pattern, string_types):
|
||||
if not isinstance(pattern, (text_type, binary_type)):
|
||||
raise TypeError("pattern must be a string, not %s" % type(pattern))
|
||||
self.pattern = pattern
|
||||
|
||||
if isinstance(flags, string_types):
|
||||
if isinstance(flags, string_type):
|
||||
self.flags = str_flags_to_int(flags)
|
||||
elif isinstance(flags, int):
|
||||
self.flags = flags
|
||||
|
||||
@ -21,6 +21,9 @@ dictionary."""
|
||||
import copy
|
||||
import re
|
||||
|
||||
from bson.py3compat import iteritems
|
||||
|
||||
|
||||
# This sort of sucks, but seems to be as good as it gets...
|
||||
# This is essentially the same as re._pattern_type
|
||||
RE_TYPE = type(re.compile(""))
|
||||
@ -174,7 +177,7 @@ class SON(dict):
|
||||
|
||||
def popitem(self):
|
||||
try:
|
||||
k, v = self.iteritems().next()
|
||||
k, v = next(self.iteritems())
|
||||
except StopIteration:
|
||||
raise KeyError('container is empty')
|
||||
del self[k]
|
||||
@ -214,7 +217,7 @@ class SON(dict):
|
||||
return not self == other
|
||||
|
||||
def __len__(self):
|
||||
return len(self.keys())
|
||||
return len(self.__keys)
|
||||
|
||||
def to_dict(self):
|
||||
"""Convert a SON document to a normal Python dictionary instance.
|
||||
@ -229,7 +232,7 @@ class SON(dict):
|
||||
if isinstance(value, SON):
|
||||
value = dict(value)
|
||||
if isinstance(value, dict):
|
||||
for k, v in value.iteritems():
|
||||
for k, v in iteritems(value):
|
||||
value[k] = transform_value(v)
|
||||
return value
|
||||
|
||||
|
||||
@ -18,6 +18,7 @@
|
||||
import calendar
|
||||
import datetime
|
||||
|
||||
from bson.py3compat import integer_types
|
||||
from bson.tz_util import utc
|
||||
|
||||
UPPERBOUND = 4294967296
|
||||
@ -53,9 +54,9 @@ class Timestamp(object):
|
||||
if time.utcoffset() is not None:
|
||||
time = time - time.utcoffset()
|
||||
time = int(calendar.timegm(time.timetuple()))
|
||||
if not isinstance(time, (int, long)):
|
||||
if not isinstance(time, integer_types):
|
||||
raise TypeError("time must be an instance of int")
|
||||
if not isinstance(inc, (int, long)):
|
||||
if not isinstance(inc, integer_types):
|
||||
raise TypeError("inc must be an instance of int")
|
||||
if not 0 <= time < UPPERBOUND:
|
||||
raise ValueError("time must be contained in [0, 2**32)")
|
||||
|
||||
Loading…
Reference in New Issue
Block a user