PYTHON-1111 Add JSONOptions for dumps and loads

PYTHON-767 Support JSON strict mode $date output
PYTHON-1039 Support JSON strict mode $numberLong output
PYTHON-1103 Support JSON strict mode UUID output
PYTHON-1111 Support custom document class in loads
PYTHON-1111 Support tz_aware and tzinfo in loads
Refactor milliseconds to datetime conversions
This commit is contained in:
Shane Harvey 2016-07-22 11:14:24 -07:00
parent 84875ed38f
commit 57d1ccde2f
6 changed files with 280 additions and 55 deletions

View File

@ -230,18 +230,7 @@ def _get_date(data, position, dummy0, opts, dummy1):
"""Decode a BSON datetime to python datetime.datetime."""
end = position + 8
millis = _UNPACK_LONG(data[position:end])[0]
diff = ((millis % 1000) + 1000) % 1000
seconds = (millis - diff) / 1000
micros = diff * 1000
if opts.tz_aware:
dt = EPOCH_AWARE + datetime.timedelta(
seconds=seconds, microseconds=micros)
if opts.tzinfo:
dt = dt.astimezone(opts.tzinfo)
else:
dt = EPOCH_NAIVE + datetime.timedelta(
seconds=seconds, microseconds=micros)
return dt, end
return _millis_to_datetime(millis, opts), end
def _get_code(data, position, obj_end, opts, element_name):
@ -543,10 +532,7 @@ def _encode_bool(name, value, dummy0, dummy1):
def _encode_datetime(name, value, dummy0, dummy1):
"""Encode datetime.datetime."""
if value.utcoffset() is not None:
value = value - value.utcoffset()
millis = int(calendar.timegm(value.timetuple()) * 1000 +
value.microsecond / 1000)
millis = _datetime_to_millis(value)
return b"\x09" + name + _PACK_LONG(millis)
@ -748,6 +734,30 @@ if _USE_C:
_dict_to_bson = _cbson._dict_to_bson
def _millis_to_datetime(millis, opts):
"""Convert milliseconds since epoch UTC to datetime."""
diff = ((millis % 1000) + 1000) % 1000
seconds = (millis - diff) / 1000
micros = diff * 1000
if opts.tz_aware:
dt = EPOCH_AWARE + datetime.timedelta(seconds=seconds,
microseconds=micros)
if opts.tzinfo:
dt = dt.astimezone(opts.tzinfo)
return dt
else:
return EPOCH_NAIVE + datetime.timedelta(seconds=seconds,
microseconds=micros)
def _datetime_to_millis(dtm):
"""Convert datetime to milliseconds since epoch UTC."""
if dtm.utcoffset() is not None:
dtm = dtm - dtm.utcoffset()
return int(calendar.timegm(dtm.timetuple()) * 1000 +
dtm.microsecond / 1000)
_CODEC_OPTIONS_TYPE_ERROR = TypeError(
"codec_options must be an instance of CodecOptions")

View File

@ -109,7 +109,7 @@ class CodecOptions(_options_base):
self.unicode_decode_error_handler, self.tzinfo))
def __repr__(self):
return 'CodecOptions(%s)' % (self._arguments_repr(),)
return '%s(%s)' % (self.__class__.__name__, self._arguments_repr())
DEFAULT_CODEC_OPTIONS = CodecOptions()

View File

@ -38,3 +38,16 @@ class InvalidDocument(BSONError):
class InvalidId(BSONError):
"""Raised when trying to create an ObjectId from invalid data.
"""
class JSONError(Exception):
"""Base class for all JSON exceptions.
"""
class InvalidDatetime(JSONError):
"""Raised when trying to encode a datetime without a timezone.
Only raised when
:class:`JSONOptions.strict_date <bson.json_util.JSONOptions>` is ``True``.
"""

View File

@ -17,9 +17,9 @@
This module provides two helper methods `dumps` and `loads` that wrap the
native :mod:`json` methods and provide explicit BSON conversion to and from
json. This allows for specialized encoding and decoding of BSON documents
into `Mongo Extended JSON
<http://www.mongodb.org/display/DOCS/Mongo+Extended+JSON>`_'s *Strict*
mode. This lets you encode / decode BSON documents to JSON even when
into `MongoDB Extended JSON
<http://www.mongodb.org/display/DOCS/Mongo+Extended+JSON>`_'s *Strict
mode*. This lets you encode / decode BSON documents to JSON even when
they use special BSON types.
Example usage (serialization):
@ -68,17 +68,19 @@ but it will be faster as there is less recursion.
"""
import base64
import calendar
import collections
import datetime
import json
import re
import uuid
import bson
from bson import EPOCH_AWARE, RE_TYPE, SON
from bson.binary import Binary
from bson.binary import (Binary, JAVA_LEGACY, CSHARP_LEGACY, OLD_UUID_SUBTYPE,
UUID_SUBTYPE)
from bson.code import Code
from bson.codec_options import CodecOptions
from bson.errors import InvalidDatetime
from bson.dbref import DBRef
from bson.int64 import Int64
from bson.max_key import MaxKey
@ -105,18 +107,68 @@ class JSONOptions(CodecOptions):
"""Encapsulates JSON options for :func:`dumps` and :func:`loads`.
:Parameters:
- `strict_number_long`: If ``True``, :class:`~bson.int64.Int64` objects
are encoded to MongoDB Extended JSON's *Strict mode* type
`NumberLong`, ie ``'{"$numberLong": "<number>" }'``. Otherwise they
will be encoded as an `int`. Defaults to ``False``.
- `strict_date`: If ``True``, `datetime.datetime` objects are encoded to
MongoDB Extended JSON's *Strict mode* type `Date`. Otherwise it will
be encoded as milliseconds since Unix epoch. Defaults to ``False``.
- `strict_uuid`: If ``True``, :class:`uuid.UUID` object are encoded to
MongoDB Extended JSON's *Strict mode* type `Binary`. Otherwise it
will be encoded as ``'{"$uuid": "<hex>" }'``. Defaults to ``False``.
- `document_class`: BSON documents returned by :func:`loads` will be
decoded to an instance of this class. Must be a subclass of
:class:`collections.MutableMapping`. Defaults to :class:`dict`.
- `uuid_representation`: The BSON representation to use when encoding
and decoding instances of :class:`uuid.UUID`. Defaults to
:const:`~bson.binary.PYTHON_LEGACY`.
- `tz_aware`: If ``True``, MongoDB Extended JSON's *Strict mode* type
`Date` will be decoded to timezone aware instances of
:class:`datetime.datetime`. Otherwise they will be naive. Defaults
to ``True``.
- `tzinfo`: A :class:`datetime.tzinfo` subclass that specifies the
timezone from which :class:`~datetime.datetime` objects should be
decoded. Defaults to :const:`~bson.tz_util.utc`.
- `args`: arguments to :class:`~bson.codec_options.CodecOptions`
- `kwargs`: arguments to :class:`~bson.codec_options.CodecOptions`
.. seealso:: The documentation for `MongoDB Extended JSON
<http://www.mongodb.org/display/DOCS/Mongo+Extended+JSON>`_.
.. versionadded:: 3.4
"""
def __new__(cls, *args, **kwargs):
def __new__(cls, strict_number_long=False, strict_date=False,
strict_uuid=False, *args, **kwargs):
kwargs["tz_aware"] = kwargs.get("tz_aware", True)
if kwargs["tz_aware"]:
kwargs["tzinfo"] = kwargs.get("tzinfo", utc)
self = super(JSONOptions, cls).__new__(cls, *args, **kwargs)
self.strict_number_long = strict_number_long
self.strict_date = strict_date
self.strict_uuid = strict_uuid
return self
def __repr__(self):
return 'JSONOptions(%s)' % (self._arguments_repr(),)
def _arguments_repr(self):
return 'strict_number_long=%r, strict_date=%r, strict_uuid=%r, %s' % (
self.strict_number_long,
self.strict_date,
self.strict_uuid,
super(JSONOptions, self)._arguments_repr())
DEFAULT_JSON_OPTIONS = JSONOptions()
"""The default :class:`JSONOptions` for JSON encoding/decoding.
.. versionadded:: 3.4
"""
STRICT_JSON_OPTIONS = JSONOptions(strict_number_long=True, strict_date=True,
strict_uuid=True)
""":class:`JSONOptions` for MongoDB Extended JSON's *Strict mode* encoding.
.. versionadded:: 3.4
"""
def dumps(obj, *args, **kwargs):
@ -125,6 +177,18 @@ def dumps(obj, *args, **kwargs):
Recursive function that handles all BSON types including
:class:`~bson.binary.Binary` and :class:`~bson.code.Code`.
Raises :class:`~bson.errors.InvalidDatetime` if `obj` contains a
:class:`datetime.datetime` without a timezone and
`json_options.strict_date` is ``True``.
:Parameters:
- `json_options`: A :class:`JSONOptions` instance used to modify the
encoding of MongoDB Extended JSON types. Defaults to
:const:`DEFAULT_JSON_OPTIONS`.
.. versionchanged:: 3.4
Accepts optional parameter `json_options`. See :class:`JSONOptions`.
.. versionchanged:: 2.7
Preserves order when rendering SON, Timestamp, Code, Binary, and DBRef
instances.
@ -137,9 +201,18 @@ def loads(s, *args, **kwargs):
"""Helper function that wraps :func:`json.loads`.
Automatically passes the object_hook for BSON type conversion.
:Parameters:
- `json_options`: A :class:`JSONOptions` instance used to modify the
decoding of MongoDB Extended JSON types. Defaults to
:const:`DEFAULT_JSON_OPTIONS`.
.. versionchanged:: 3.4
Accepts optional parameter `json_options`. See :class:`JSONOptions`.
"""
json_options = kwargs.pop("json_options", DEFAULT_JSON_OPTIONS)
kwargs["object_hook"] = lambda dct: object_hook(dct, json_options)
kwargs["object_pairs_hook"] = lambda pairs: object_pairs_hook(pairs,
json_options)
return json.loads(s, *args, **kwargs)
@ -158,6 +231,10 @@ def _json_convert(obj, json_options=DEFAULT_JSON_OPTIONS):
return obj
def object_pairs_hook(pairs, json_options=DEFAULT_JSON_OPTIONS):
return object_hook(json_options.document_class(pairs), json_options)
def object_hook(dct, json_options=DEFAULT_JSON_OPTIONS):
if "$oid" in dct:
return ObjectId(str(dct["$oid"]))
@ -190,10 +267,7 @@ def object_hook(dct, json_options=DEFAULT_JSON_OPTIONS):
aware = datetime.datetime.strptime(
dt, "%Y-%m-%dT%H:%M:%S.%f").replace(tzinfo=utc)
if not offset or offset == 'Z':
# UTC
return aware
else:
if offset and offset != 'Z':
if len(offset) == 6:
hours, minutes = offset[1:].split(':')
secs = (int(hours) * 3600 + int(minutes) * 60)
@ -203,14 +277,21 @@ def object_hook(dct, json_options=DEFAULT_JSON_OPTIONS):
secs = int(offset[1:3]) * 3600
if offset[0] == "-":
secs *= -1
return aware - datetime.timedelta(seconds=secs)
aware = aware - datetime.timedelta(seconds=secs)
if json_options.tz_aware:
if json_options.tzinfo:
aware = aware.astimezone(json_options.tzinfo)
return aware
else:
return aware.replace(tzinfo=None)
# mongoexport 2.6 and newer, time before the epoch (SERVER-15275)
elif isinstance(dtm, collections.Mapping):
secs = float(dtm["$numberLong"]) / 1000.0
millis = int(dtm["$numberLong"])
# mongoexport before 2.6
else:
secs = float(dtm) / 1000.0
return EPOCH_AWARE + datetime.timedelta(seconds=secs)
millis = int(dtm)
return bson._millis_to_datetime(millis, json_options)
if "$regex" in dct:
flags = 0
# PyMongo always adds $options but some other tools may not.
@ -227,7 +308,17 @@ def object_hook(dct, json_options=DEFAULT_JSON_OPTIONS):
subtype = int(dct["$type"], 16)
if subtype >= 0xffffff80: # Handle mongoexport values
subtype = int(dct["$type"][6:], 16)
return Binary(base64.b64decode(dct["$binary"].encode()), subtype)
data = base64.b64decode(dct["$binary"].encode())
# special handling for UUID
if subtype == OLD_UUID_SUBTYPE:
if json_options.uuid_representation == CSHARP_LEGACY:
return uuid.UUID(bytes_le=data)
if json_options.uuid_representation == JAVA_LEGACY:
data = data[7::-1] + data[:7:-1]
return uuid.UUID(bytes=data)
if subtype == UUID_SUBTYPE:
return uuid.UUID(bytes=data)
return Binary(data, subtype)
if "$code" in dct:
return Code(dct["$code"], dct.get("$scope"))
if "$uuid" in dct:
@ -250,12 +341,22 @@ def default(obj, json_options=DEFAULT_JSON_OPTIONS):
if isinstance(obj, DBRef):
return _json_convert(obj.as_doc())
if isinstance(obj, datetime.datetime):
# TODO share this code w/ bson.py?
if obj.utcoffset() is not None:
obj = obj - obj.utcoffset()
millis = int(calendar.timegm(obj.timetuple()) * 1000 +
obj.microsecond / 1000)
return {"$date": millis}
if json_options.strict_date:
if not obj.tzinfo:
raise InvalidDatetime("datetime is not timezone aware", obj)
if obj >= EPOCH_AWARE:
return {"$date": "%s.%03d%s" % (
obj.strftime("%Y-%m-%dT%H:%M:%S"),
int(obj.microsecond / 1000),
obj.strftime("%z"))}
millis = bson._datetime_to_millis(obj)
if json_options.strict_date:
return {"$date": {"$numberLong": str(millis)}}
else:
return {"$date": millis}
if json_options.strict_number_long and isinstance(obj, Int64):
return {"$numberLong": str(obj)}
if isinstance(obj, (RE_TYPE, Regex)):
flags = ""
if obj.flags & re.IGNORECASE:
@ -292,5 +393,18 @@ def default(obj, json_options=DEFAULT_JSON_OPTIONS):
('$binary', base64.b64encode(obj).decode()),
('$type', "00")])
if isinstance(obj, uuid.UUID):
return {"$uuid": obj.hex}
if json_options.strict_uuid:
data = obj.bytes
subtype = OLD_UUID_SUBTYPE
if json_options.uuid_representation == CSHARP_LEGACY:
data = obj.bytes_le
elif json_options.uuid_representation == JAVA_LEGACY:
data = data[7::-1] + data[:7:-1]
elif json_options.uuid_representation == UUID_SUBTYPE:
subtype = UUID_SUBTYPE
return SON([
('$binary', base64.b64encode(data).decode()),
('$type', "%02x" % subtype)])
else:
return {"$uuid": obj.hex}
raise TypeError("%r is not JSON serializable" % obj)

View File

@ -4,3 +4,4 @@
:synopsis: Tools for using Python's json module with BSON documents
:members:
:undoc-members:
:member-order: bysource

View File

@ -22,9 +22,11 @@ import uuid
sys.path[0:0] = [""]
from bson import json_util, EPOCH_AWARE
from bson.binary import Binary, MD5_SUBTYPE, USER_DEFINED_SUBTYPE
from bson import json_util, EPOCH_AWARE, EPOCH_NAIVE, SON
from bson.binary import (Binary, MD5_SUBTYPE, USER_DEFINED_SUBTYPE,
JAVA_LEGACY, CSHARP_LEGACY, STANDARD)
from bson.code import Code
from bson.errors import InvalidDatetime
from bson.dbref import DBRef
from bson.int64 import Int64
from bson.max_key import MaxKey
@ -32,7 +34,7 @@ from bson.min_key import MinKey
from bson.objectid import ObjectId
from bson.regex import Regex
from bson.timestamp import Timestamp
from bson.tz_util import utc
from bson.tz_util import FixedOffset, utc
from test import unittest, IntegrationTest
@ -40,11 +42,11 @@ PY3 = sys.version_info[0] == 3
class TestJsonUtil(unittest.TestCase):
def round_tripped(self, doc):
return json_util.loads(json_util.dumps(doc))
def round_tripped(self, doc, **kwargs):
return json_util.loads(json_util.dumps(doc, **kwargs), **kwargs)
def round_trip(self, doc):
self.assertEqual(doc, self.round_tripped(doc))
def round_trip(self, doc, **kwargs):
self.assertEqual(doc, self.round_tripped(doc, **kwargs))
def test_basic(self):
self.round_trip({"hello": "world"})
@ -115,6 +117,62 @@ class TestJsonUtil(unittest.TestCase):
jsn = '{"dt": {"$date": {"$numberLong": "-62135593139000"}}}'
self.assertEqual(dtm, json_util.loads(jsn)["dt"])
# Test dumps format
pre_epoch = {"dt": datetime.datetime(1, 1, 1, 1, 1, 1, 10000, utc)}
post_epoch = {"dt": datetime.datetime(1972, 1, 1, 1, 1, 1, 10000, utc)}
json_options = json_util.JSONOptions(strict_date=True)
self.assertEqual(
'{"dt": {"$date": -62135593138990}}',
json_util.dumps(pre_epoch))
self.assertEqual(
'{"dt": {"$date": 63075661010}}',
json_util.dumps(post_epoch))
self.assertEqual(
'{"dt": {"$date": {"$numberLong": "-62135593138990"}}}',
json_util.dumps(pre_epoch, json_options=json_options))
self.assertEqual(
'{"dt": {"$date": "1972-01-01T01:01:01.010+0000"}}',
json_util.dumps(post_epoch, json_options=json_options))
# Strict mode requires dates to have a timezone
pre_epoch_naive = {"dt": datetime.datetime(1, 1, 1, 1, 1, 1, 1000)}
post_epoch_naive = {"dt": datetime.datetime(1972, 1, 1, 1, 1, 1, 1000)}
self.assertRaises(InvalidDatetime, json_util.dumps, pre_epoch_naive,
json_options=json_options)
self.assertRaises(InvalidDatetime, json_util.dumps, post_epoch_naive,
json_options=json_options)
# Test tz_aware and tzinfo options
self.assertEqual(
datetime.datetime(1972, 1, 1, 1, 1, 1, 10000, utc),
json_util.loads(
'{"dt": {"$date": "1972-01-01T01:01:01.010+0000"}}')["dt"])
self.assertEqual(
datetime.datetime(1972, 1, 1, 1, 1, 1, 10000, utc),
json_util.loads(
'{"dt": {"$date": "1972-01-01T01:01:01.010+0000"}}',
json_options=json_util.JSONOptions(tz_aware=True,
tzinfo=utc))["dt"])
self.assertEqual(
datetime.datetime(1972, 1, 1, 1, 1, 1, 10000),
json_util.loads(
'{"dt": {"$date": "1972-01-01T01:01:01.010+0000"}}',
json_options=json_util.JSONOptions(tz_aware=False))["dt"])
self.round_trip(pre_epoch_naive, json_options=json_util.JSONOptions(
tz_aware=False))
# Test a non-utc timezone
pacific = FixedOffset(-8 * 60, 'US/Pacific')
aware_datetime = {"dt": datetime.datetime(2002, 10, 27, 6, 0, 0, 10000,
pacific)}
self.assertEqual(
'{"dt": {"$date": "2002-10-27T06:00:00.010-0800"}}',
json_util.dumps(aware_datetime, json_options=json_options))
self.round_trip(aware_datetime, json_options=json_util.JSONOptions(
tz_aware=True, tzinfo=pacific))
self.round_trip(aware_datetime, json_options=json_util.JSONOptions(
strict_date=True, tz_aware=True, tzinfo=pacific))
def test_regex_object_hook(self):
# Extended JSON format regular expression.
pat = 'a*b'
@ -173,8 +231,26 @@ class TestJsonUtil(unittest.TestCase):
self.assertEqual(dct, rtdct)
def test_uuid(self):
self.round_trip(
{'uuid': uuid.UUID('f47ac10b-58cc-4372-a567-0e02b2c3d479')})
doc = {'uuid': uuid.UUID('f47ac10b-58cc-4372-a567-0e02b2c3d479')}
self.round_trip(doc)
self.assertEqual(
'{"uuid": {"$uuid": "f47ac10b58cc4372a5670e02b2c3d479"}}',
json_util.dumps(doc))
self.assertEqual(
'{"uuid": {"$binary": "9HrBC1jMQ3KlZw4CssPUeQ==", "$type": "03"}}',
json_util.dumps(doc, json_options=json_util.STRICT_JSON_OPTIONS))
self.assertEqual(
'{"uuid": {"$binary": "9HrBC1jMQ3KlZw4CssPUeQ==", "$type": "04"}}',
json_util.dumps(doc, json_options=json_util.JSONOptions(
strict_uuid=True, uuid_representation=STANDARD)))
self.assertEqual(doc, json_util.loads(
'{"uuid": {"$binary": "9HrBC1jMQ3KlZw4CssPUeQ==", "$type": "03"}}'))
self.assertEqual(doc, json_util.loads(
'{"uuid": {"$binary": "9HrBC1jMQ3KlZw4CssPUeQ==", "$type": "04"}}'))
self.round_trip(doc, json_options=json_util.JSONOptions(
strict_uuid=True, uuid_representation=JAVA_LEGACY))
self.round_trip(doc, json_options=json_util.JSONOptions(
strict_uuid=True, uuid_representation=CSHARP_LEGACY))
def test_binary(self):
bin_type_dict = {"bin": Binary(b"\x00\x01\x02\x03\x04")}
@ -230,13 +306,24 @@ class TestJsonUtil(unittest.TestCase):
self.assertEqual('{"$code": "return z", "$scope": {"z": 2}}', res)
def test_undefined(self):
json = '{"name": {"$undefined": true}}'
self.assertIsNone(json_util.loads(json)['name'])
jsn = '{"name": {"$undefined": true}}'
self.assertIsNone(json_util.loads(jsn)['name'])
def test_numberlong(self):
json = '{"weight": {"$numberLong": 65535}}'
self.assertEqual(json_util.loads(json)['weight'],
jsn = '{"weight": {"$numberLong": "65535"}}'
self.assertEqual(json_util.loads(jsn)['weight'],
Int64(65535))
self.assertEqual(json_util.dumps({"weight": Int64(65535)}),
'{"weight": 65535}')
json_options = json_util.JSONOptions(strict_number_long=True)
self.assertEqual(json_util.dumps({"weight": Int64(65535)},
json_options=json_options),
jsn)
def test_loads_document_class(self):
self.assertEqual(SON([("foo", "bar"), ("b", 1)]), json_util.loads(
'{"foo": "bar", "b": 1}',
json_options=json_util.JSONOptions(document_class=SON)))
class TestJsonUtilRoundtrip(IntegrationTest):