diff --git a/bson/__init__.py b/bson/__init__.py index 7a186279b..acac701ab 100644 --- a/bson/__init__.py +++ b/bson/__init__.py @@ -230,18 +230,7 @@ def _get_date(data, position, dummy0, opts, dummy1): """Decode a BSON datetime to python datetime.datetime.""" end = position + 8 millis = _UNPACK_LONG(data[position:end])[0] - diff = ((millis % 1000) + 1000) % 1000 - seconds = (millis - diff) / 1000 - micros = diff * 1000 - if opts.tz_aware: - dt = EPOCH_AWARE + datetime.timedelta( - seconds=seconds, microseconds=micros) - if opts.tzinfo: - dt = dt.astimezone(opts.tzinfo) - else: - dt = EPOCH_NAIVE + datetime.timedelta( - seconds=seconds, microseconds=micros) - return dt, end + return _millis_to_datetime(millis, opts), end def _get_code(data, position, obj_end, opts, element_name): @@ -543,10 +532,7 @@ def _encode_bool(name, value, dummy0, dummy1): def _encode_datetime(name, value, dummy0, dummy1): """Encode datetime.datetime.""" - if value.utcoffset() is not None: - value = value - value.utcoffset() - millis = int(calendar.timegm(value.timetuple()) * 1000 + - value.microsecond / 1000) + millis = _datetime_to_millis(value) return b"\x09" + name + _PACK_LONG(millis) @@ -748,6 +734,30 @@ if _USE_C: _dict_to_bson = _cbson._dict_to_bson +def _millis_to_datetime(millis, opts): + """Convert milliseconds since epoch UTC to datetime.""" + diff = ((millis % 1000) + 1000) % 1000 + seconds = (millis - diff) / 1000 + micros = diff * 1000 + if opts.tz_aware: + dt = EPOCH_AWARE + datetime.timedelta(seconds=seconds, + microseconds=micros) + if opts.tzinfo: + dt = dt.astimezone(opts.tzinfo) + return dt + else: + return EPOCH_NAIVE + datetime.timedelta(seconds=seconds, + microseconds=micros) + + +def _datetime_to_millis(dtm): + """Convert datetime to milliseconds since epoch UTC.""" + if dtm.utcoffset() is not None: + dtm = dtm - dtm.utcoffset() + return int(calendar.timegm(dtm.timetuple()) * 1000 + + dtm.microsecond / 1000) + + _CODEC_OPTIONS_TYPE_ERROR = TypeError( "codec_options must be an instance of CodecOptions") diff --git a/bson/codec_options.py b/bson/codec_options.py index 0dbcd43da..81b894bc0 100644 --- a/bson/codec_options.py +++ b/bson/codec_options.py @@ -109,7 +109,7 @@ class CodecOptions(_options_base): self.unicode_decode_error_handler, self.tzinfo)) def __repr__(self): - return 'CodecOptions(%s)' % (self._arguments_repr(),) + return '%s(%s)' % (self.__class__.__name__, self._arguments_repr()) DEFAULT_CODEC_OPTIONS = CodecOptions() diff --git a/bson/errors.py b/bson/errors.py index b6c3864aa..8c9af8135 100644 --- a/bson/errors.py +++ b/bson/errors.py @@ -38,3 +38,16 @@ class InvalidDocument(BSONError): class InvalidId(BSONError): """Raised when trying to create an ObjectId from invalid data. """ + + +class JSONError(Exception): + """Base class for all JSON exceptions. + """ + + +class InvalidDatetime(JSONError): + """Raised when trying to encode a datetime without a timezone. + + Only raised when + :class:`JSONOptions.strict_date ` is ``True``. + """ diff --git a/bson/json_util.py b/bson/json_util.py index adfed4c4b..cfa67ae7c 100644 --- a/bson/json_util.py +++ b/bson/json_util.py @@ -17,9 +17,9 @@ This module provides two helper methods `dumps` and `loads` that wrap the native :mod:`json` methods and provide explicit BSON conversion to and from json. This allows for specialized encoding and decoding of BSON documents -into `Mongo Extended JSON -`_'s *Strict* -mode. This lets you encode / decode BSON documents to JSON even when +into `MongoDB Extended JSON +`_'s *Strict +mode*. This lets you encode / decode BSON documents to JSON even when they use special BSON types. Example usage (serialization): @@ -68,17 +68,19 @@ but it will be faster as there is less recursion. """ import base64 -import calendar import collections import datetime import json import re import uuid +import bson from bson import EPOCH_AWARE, RE_TYPE, SON -from bson.binary import Binary +from bson.binary import (Binary, JAVA_LEGACY, CSHARP_LEGACY, OLD_UUID_SUBTYPE, + UUID_SUBTYPE) from bson.code import Code from bson.codec_options import CodecOptions +from bson.errors import InvalidDatetime from bson.dbref import DBRef from bson.int64 import Int64 from bson.max_key import MaxKey @@ -105,18 +107,68 @@ class JSONOptions(CodecOptions): """Encapsulates JSON options for :func:`dumps` and :func:`loads`. :Parameters: + - `strict_number_long`: If ``True``, :class:`~bson.int64.Int64` objects + are encoded to MongoDB Extended JSON's *Strict mode* type + `NumberLong`, ie ``'{"$numberLong": "" }'``. Otherwise they + will be encoded as an `int`. Defaults to ``False``. + - `strict_date`: If ``True``, `datetime.datetime` objects are encoded to + MongoDB Extended JSON's *Strict mode* type `Date`. Otherwise it will + be encoded as milliseconds since Unix epoch. Defaults to ``False``. + - `strict_uuid`: If ``True``, :class:`uuid.UUID` object are encoded to + MongoDB Extended JSON's *Strict mode* type `Binary`. Otherwise it + will be encoded as ``'{"$uuid": "" }'``. Defaults to ``False``. + - `document_class`: BSON documents returned by :func:`loads` will be + decoded to an instance of this class. Must be a subclass of + :class:`collections.MutableMapping`. Defaults to :class:`dict`. + - `uuid_representation`: The BSON representation to use when encoding + and decoding instances of :class:`uuid.UUID`. Defaults to + :const:`~bson.binary.PYTHON_LEGACY`. + - `tz_aware`: If ``True``, MongoDB Extended JSON's *Strict mode* type + `Date` will be decoded to timezone aware instances of + :class:`datetime.datetime`. Otherwise they will be naive. Defaults + to ``True``. + - `tzinfo`: A :class:`datetime.tzinfo` subclass that specifies the + timezone from which :class:`~datetime.datetime` objects should be + decoded. Defaults to :const:`~bson.tz_util.utc`. + - `args`: arguments to :class:`~bson.codec_options.CodecOptions` - `kwargs`: arguments to :class:`~bson.codec_options.CodecOptions` + + .. seealso:: The documentation for `MongoDB Extended JSON + `_. + + .. versionadded:: 3.4 """ - def __new__(cls, *args, **kwargs): + def __new__(cls, strict_number_long=False, strict_date=False, + strict_uuid=False, *args, **kwargs): + kwargs["tz_aware"] = kwargs.get("tz_aware", True) + if kwargs["tz_aware"]: + kwargs["tzinfo"] = kwargs.get("tzinfo", utc) self = super(JSONOptions, cls).__new__(cls, *args, **kwargs) + self.strict_number_long = strict_number_long + self.strict_date = strict_date + self.strict_uuid = strict_uuid return self - def __repr__(self): - return 'JSONOptions(%s)' % (self._arguments_repr(),) + def _arguments_repr(self): + return 'strict_number_long=%r, strict_date=%r, strict_uuid=%r, %s' % ( + self.strict_number_long, + self.strict_date, + self.strict_uuid, + super(JSONOptions, self)._arguments_repr()) DEFAULT_JSON_OPTIONS = JSONOptions() +"""The default :class:`JSONOptions` for JSON encoding/decoding. + +.. versionadded:: 3.4 +""" +STRICT_JSON_OPTIONS = JSONOptions(strict_number_long=True, strict_date=True, + strict_uuid=True) +""":class:`JSONOptions` for MongoDB Extended JSON's *Strict mode* encoding. + +.. versionadded:: 3.4 +""" def dumps(obj, *args, **kwargs): @@ -125,6 +177,18 @@ def dumps(obj, *args, **kwargs): Recursive function that handles all BSON types including :class:`~bson.binary.Binary` and :class:`~bson.code.Code`. + Raises :class:`~bson.errors.InvalidDatetime` if `obj` contains a + :class:`datetime.datetime` without a timezone and + `json_options.strict_date` is ``True``. + + :Parameters: + - `json_options`: A :class:`JSONOptions` instance used to modify the + encoding of MongoDB Extended JSON types. Defaults to + :const:`DEFAULT_JSON_OPTIONS`. + + .. versionchanged:: 3.4 + Accepts optional parameter `json_options`. See :class:`JSONOptions`. + .. versionchanged:: 2.7 Preserves order when rendering SON, Timestamp, Code, Binary, and DBRef instances. @@ -137,9 +201,18 @@ def loads(s, *args, **kwargs): """Helper function that wraps :func:`json.loads`. Automatically passes the object_hook for BSON type conversion. + + :Parameters: + - `json_options`: A :class:`JSONOptions` instance used to modify the + decoding of MongoDB Extended JSON types. Defaults to + :const:`DEFAULT_JSON_OPTIONS`. + + .. versionchanged:: 3.4 + Accepts optional parameter `json_options`. See :class:`JSONOptions`. """ json_options = kwargs.pop("json_options", DEFAULT_JSON_OPTIONS) - kwargs["object_hook"] = lambda dct: object_hook(dct, json_options) + kwargs["object_pairs_hook"] = lambda pairs: object_pairs_hook(pairs, + json_options) return json.loads(s, *args, **kwargs) @@ -158,6 +231,10 @@ def _json_convert(obj, json_options=DEFAULT_JSON_OPTIONS): return obj +def object_pairs_hook(pairs, json_options=DEFAULT_JSON_OPTIONS): + return object_hook(json_options.document_class(pairs), json_options) + + def object_hook(dct, json_options=DEFAULT_JSON_OPTIONS): if "$oid" in dct: return ObjectId(str(dct["$oid"])) @@ -190,10 +267,7 @@ def object_hook(dct, json_options=DEFAULT_JSON_OPTIONS): aware = datetime.datetime.strptime( dt, "%Y-%m-%dT%H:%M:%S.%f").replace(tzinfo=utc) - if not offset or offset == 'Z': - # UTC - return aware - else: + if offset and offset != 'Z': if len(offset) == 6: hours, minutes = offset[1:].split(':') secs = (int(hours) * 3600 + int(minutes) * 60) @@ -203,14 +277,21 @@ def object_hook(dct, json_options=DEFAULT_JSON_OPTIONS): secs = int(offset[1:3]) * 3600 if offset[0] == "-": secs *= -1 - return aware - datetime.timedelta(seconds=secs) + aware = aware - datetime.timedelta(seconds=secs) + + if json_options.tz_aware: + if json_options.tzinfo: + aware = aware.astimezone(json_options.tzinfo) + return aware + else: + return aware.replace(tzinfo=None) # mongoexport 2.6 and newer, time before the epoch (SERVER-15275) elif isinstance(dtm, collections.Mapping): - secs = float(dtm["$numberLong"]) / 1000.0 + millis = int(dtm["$numberLong"]) # mongoexport before 2.6 else: - secs = float(dtm) / 1000.0 - return EPOCH_AWARE + datetime.timedelta(seconds=secs) + millis = int(dtm) + return bson._millis_to_datetime(millis, json_options) if "$regex" in dct: flags = 0 # PyMongo always adds $options but some other tools may not. @@ -227,7 +308,17 @@ def object_hook(dct, json_options=DEFAULT_JSON_OPTIONS): subtype = int(dct["$type"], 16) if subtype >= 0xffffff80: # Handle mongoexport values subtype = int(dct["$type"][6:], 16) - return Binary(base64.b64decode(dct["$binary"].encode()), subtype) + data = base64.b64decode(dct["$binary"].encode()) + # special handling for UUID + if subtype == OLD_UUID_SUBTYPE: + if json_options.uuid_representation == CSHARP_LEGACY: + return uuid.UUID(bytes_le=data) + if json_options.uuid_representation == JAVA_LEGACY: + data = data[7::-1] + data[:7:-1] + return uuid.UUID(bytes=data) + if subtype == UUID_SUBTYPE: + return uuid.UUID(bytes=data) + return Binary(data, subtype) if "$code" in dct: return Code(dct["$code"], dct.get("$scope")) if "$uuid" in dct: @@ -250,12 +341,22 @@ def default(obj, json_options=DEFAULT_JSON_OPTIONS): if isinstance(obj, DBRef): return _json_convert(obj.as_doc()) if isinstance(obj, datetime.datetime): - # TODO share this code w/ bson.py? - if obj.utcoffset() is not None: - obj = obj - obj.utcoffset() - millis = int(calendar.timegm(obj.timetuple()) * 1000 + - obj.microsecond / 1000) - return {"$date": millis} + if json_options.strict_date: + if not obj.tzinfo: + raise InvalidDatetime("datetime is not timezone aware", obj) + if obj >= EPOCH_AWARE: + return {"$date": "%s.%03d%s" % ( + obj.strftime("%Y-%m-%dT%H:%M:%S"), + int(obj.microsecond / 1000), + obj.strftime("%z"))} + + millis = bson._datetime_to_millis(obj) + if json_options.strict_date: + return {"$date": {"$numberLong": str(millis)}} + else: + return {"$date": millis} + if json_options.strict_number_long and isinstance(obj, Int64): + return {"$numberLong": str(obj)} if isinstance(obj, (RE_TYPE, Regex)): flags = "" if obj.flags & re.IGNORECASE: @@ -292,5 +393,18 @@ def default(obj, json_options=DEFAULT_JSON_OPTIONS): ('$binary', base64.b64encode(obj).decode()), ('$type', "00")]) if isinstance(obj, uuid.UUID): - return {"$uuid": obj.hex} + if json_options.strict_uuid: + data = obj.bytes + subtype = OLD_UUID_SUBTYPE + if json_options.uuid_representation == CSHARP_LEGACY: + data = obj.bytes_le + elif json_options.uuid_representation == JAVA_LEGACY: + data = data[7::-1] + data[:7:-1] + elif json_options.uuid_representation == UUID_SUBTYPE: + subtype = UUID_SUBTYPE + return SON([ + ('$binary', base64.b64encode(data).decode()), + ('$type', "%02x" % subtype)]) + else: + return {"$uuid": obj.hex} raise TypeError("%r is not JSON serializable" % obj) diff --git a/doc/api/bson/json_util.rst b/doc/api/bson/json_util.rst index 8664261f8..851972d0c 100644 --- a/doc/api/bson/json_util.rst +++ b/doc/api/bson/json_util.rst @@ -4,3 +4,4 @@ :synopsis: Tools for using Python's json module with BSON documents :members: :undoc-members: + :member-order: bysource diff --git a/test/test_json_util.py b/test/test_json_util.py index fc12b5a0f..1df9bb3ce 100644 --- a/test/test_json_util.py +++ b/test/test_json_util.py @@ -22,9 +22,11 @@ import uuid sys.path[0:0] = [""] -from bson import json_util, EPOCH_AWARE -from bson.binary import Binary, MD5_SUBTYPE, USER_DEFINED_SUBTYPE +from bson import json_util, EPOCH_AWARE, EPOCH_NAIVE, SON +from bson.binary import (Binary, MD5_SUBTYPE, USER_DEFINED_SUBTYPE, + JAVA_LEGACY, CSHARP_LEGACY, STANDARD) from bson.code import Code +from bson.errors import InvalidDatetime from bson.dbref import DBRef from bson.int64 import Int64 from bson.max_key import MaxKey @@ -32,7 +34,7 @@ from bson.min_key import MinKey from bson.objectid import ObjectId from bson.regex import Regex from bson.timestamp import Timestamp -from bson.tz_util import utc +from bson.tz_util import FixedOffset, utc from test import unittest, IntegrationTest @@ -40,11 +42,11 @@ PY3 = sys.version_info[0] == 3 class TestJsonUtil(unittest.TestCase): - def round_tripped(self, doc): - return json_util.loads(json_util.dumps(doc)) + def round_tripped(self, doc, **kwargs): + return json_util.loads(json_util.dumps(doc, **kwargs), **kwargs) - def round_trip(self, doc): - self.assertEqual(doc, self.round_tripped(doc)) + def round_trip(self, doc, **kwargs): + self.assertEqual(doc, self.round_tripped(doc, **kwargs)) def test_basic(self): self.round_trip({"hello": "world"}) @@ -115,6 +117,62 @@ class TestJsonUtil(unittest.TestCase): jsn = '{"dt": {"$date": {"$numberLong": "-62135593139000"}}}' self.assertEqual(dtm, json_util.loads(jsn)["dt"]) + # Test dumps format + pre_epoch = {"dt": datetime.datetime(1, 1, 1, 1, 1, 1, 10000, utc)} + post_epoch = {"dt": datetime.datetime(1972, 1, 1, 1, 1, 1, 10000, utc)} + json_options = json_util.JSONOptions(strict_date=True) + self.assertEqual( + '{"dt": {"$date": -62135593138990}}', + json_util.dumps(pre_epoch)) + self.assertEqual( + '{"dt": {"$date": 63075661010}}', + json_util.dumps(post_epoch)) + self.assertEqual( + '{"dt": {"$date": {"$numberLong": "-62135593138990"}}}', + json_util.dumps(pre_epoch, json_options=json_options)) + self.assertEqual( + '{"dt": {"$date": "1972-01-01T01:01:01.010+0000"}}', + json_util.dumps(post_epoch, json_options=json_options)) + + # Strict mode requires dates to have a timezone + pre_epoch_naive = {"dt": datetime.datetime(1, 1, 1, 1, 1, 1, 1000)} + post_epoch_naive = {"dt": datetime.datetime(1972, 1, 1, 1, 1, 1, 1000)} + self.assertRaises(InvalidDatetime, json_util.dumps, pre_epoch_naive, + json_options=json_options) + self.assertRaises(InvalidDatetime, json_util.dumps, post_epoch_naive, + json_options=json_options) + + # Test tz_aware and tzinfo options + self.assertEqual( + datetime.datetime(1972, 1, 1, 1, 1, 1, 10000, utc), + json_util.loads( + '{"dt": {"$date": "1972-01-01T01:01:01.010+0000"}}')["dt"]) + self.assertEqual( + datetime.datetime(1972, 1, 1, 1, 1, 1, 10000, utc), + json_util.loads( + '{"dt": {"$date": "1972-01-01T01:01:01.010+0000"}}', + json_options=json_util.JSONOptions(tz_aware=True, + tzinfo=utc))["dt"]) + self.assertEqual( + datetime.datetime(1972, 1, 1, 1, 1, 1, 10000), + json_util.loads( + '{"dt": {"$date": "1972-01-01T01:01:01.010+0000"}}', + json_options=json_util.JSONOptions(tz_aware=False))["dt"]) + self.round_trip(pre_epoch_naive, json_options=json_util.JSONOptions( + tz_aware=False)) + + # Test a non-utc timezone + pacific = FixedOffset(-8 * 60, 'US/Pacific') + aware_datetime = {"dt": datetime.datetime(2002, 10, 27, 6, 0, 0, 10000, + pacific)} + self.assertEqual( + '{"dt": {"$date": "2002-10-27T06:00:00.010-0800"}}', + json_util.dumps(aware_datetime, json_options=json_options)) + self.round_trip(aware_datetime, json_options=json_util.JSONOptions( + tz_aware=True, tzinfo=pacific)) + self.round_trip(aware_datetime, json_options=json_util.JSONOptions( + strict_date=True, tz_aware=True, tzinfo=pacific)) + def test_regex_object_hook(self): # Extended JSON format regular expression. pat = 'a*b' @@ -173,8 +231,26 @@ class TestJsonUtil(unittest.TestCase): self.assertEqual(dct, rtdct) def test_uuid(self): - self.round_trip( - {'uuid': uuid.UUID('f47ac10b-58cc-4372-a567-0e02b2c3d479')}) + doc = {'uuid': uuid.UUID('f47ac10b-58cc-4372-a567-0e02b2c3d479')} + self.round_trip(doc) + self.assertEqual( + '{"uuid": {"$uuid": "f47ac10b58cc4372a5670e02b2c3d479"}}', + json_util.dumps(doc)) + self.assertEqual( + '{"uuid": {"$binary": "9HrBC1jMQ3KlZw4CssPUeQ==", "$type": "03"}}', + json_util.dumps(doc, json_options=json_util.STRICT_JSON_OPTIONS)) + self.assertEqual( + '{"uuid": {"$binary": "9HrBC1jMQ3KlZw4CssPUeQ==", "$type": "04"}}', + json_util.dumps(doc, json_options=json_util.JSONOptions( + strict_uuid=True, uuid_representation=STANDARD))) + self.assertEqual(doc, json_util.loads( + '{"uuid": {"$binary": "9HrBC1jMQ3KlZw4CssPUeQ==", "$type": "03"}}')) + self.assertEqual(doc, json_util.loads( + '{"uuid": {"$binary": "9HrBC1jMQ3KlZw4CssPUeQ==", "$type": "04"}}')) + self.round_trip(doc, json_options=json_util.JSONOptions( + strict_uuid=True, uuid_representation=JAVA_LEGACY)) + self.round_trip(doc, json_options=json_util.JSONOptions( + strict_uuid=True, uuid_representation=CSHARP_LEGACY)) def test_binary(self): bin_type_dict = {"bin": Binary(b"\x00\x01\x02\x03\x04")} @@ -230,13 +306,24 @@ class TestJsonUtil(unittest.TestCase): self.assertEqual('{"$code": "return z", "$scope": {"z": 2}}', res) def test_undefined(self): - json = '{"name": {"$undefined": true}}' - self.assertIsNone(json_util.loads(json)['name']) + jsn = '{"name": {"$undefined": true}}' + self.assertIsNone(json_util.loads(jsn)['name']) def test_numberlong(self): - json = '{"weight": {"$numberLong": 65535}}' - self.assertEqual(json_util.loads(json)['weight'], + jsn = '{"weight": {"$numberLong": "65535"}}' + self.assertEqual(json_util.loads(jsn)['weight'], Int64(65535)) + self.assertEqual(json_util.dumps({"weight": Int64(65535)}), + '{"weight": 65535}') + json_options = json_util.JSONOptions(strict_number_long=True) + self.assertEqual(json_util.dumps({"weight": Int64(65535)}, + json_options=json_options), + jsn) + + def test_loads_document_class(self): + self.assertEqual(SON([("foo", "bar"), ("b", 1)]), json_util.loads( + '{"foo": "bar", "b": 1}', + json_options=json_util.JSONOptions(document_class=SON))) class TestJsonUtilRoundtrip(IntegrationTest):