PYTHON-1824 Allow encoding/decoding out-of-range datetimes via DatetimeMS and datetime_conversion (#981)

https://jira.mongodb.org/browse/PYTHON-1824

Co-authored-by: Ben Warner <ben.warner@mongodb.com>
This commit is contained in:
Ben Warner 2022-07-27 16:53:52 -07:00 committed by GitHub
parent e96f112d84
commit 14002a5a0d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 721 additions and 46 deletions

View File

@ -54,7 +54,6 @@ bytes [#bytes]_ binary both
subtype 0. It will be decoded back to bytes.
"""
import calendar
import datetime
import itertools
import re
@ -100,9 +99,18 @@ from bson.code import Code
from bson.codec_options import (
DEFAULT_CODEC_OPTIONS,
CodecOptions,
DatetimeConversionOpts,
_DocumentType,
_raw_document_class,
)
from bson.datetime_ms import (
EPOCH_AWARE,
EPOCH_NAIVE,
DatetimeMS,
_datetime_to_millis,
_millis_to_datetime,
utc,
)
from bson.dbref import DBRef
from bson.decimal128 import Decimal128
from bson.errors import InvalidBSON, InvalidDocument, InvalidStringData
@ -113,7 +121,6 @@ from bson.objectid import ObjectId
from bson.regex import Regex
from bson.son import RE_TYPE, SON
from bson.timestamp import Timestamp
from bson.tz_util import utc
# Import some modules for type-checking only.
if TYPE_CHECKING:
@ -187,12 +194,10 @@ __all__ = [
"is_valid",
"BSON",
"has_c",
"DatetimeConversionOpts",
"DatetimeMS",
]
EPOCH_AWARE = datetime.datetime.fromtimestamp(0, utc)
EPOCH_NAIVE = datetime.datetime.utcfromtimestamp(0)
BSONNUM = b"\x01" # Floating point
BSONSTR = b"\x02" # UTF-8 string
BSONOBJ = b"\x03" # Embedded document
@ -413,7 +418,7 @@ def _get_boolean(
def _get_date(
data: Any, view: Any, position: int, dummy0: int, opts: CodecOptions, dummy1: Any
) -> Tuple[datetime.datetime, int]:
) -> Tuple[Union[datetime.datetime, DatetimeMS], int]:
"""Decode a BSON datetime to python datetime.datetime."""
return _millis_to_datetime(_UNPACK_LONG_FROM(data, position)[0], opts), position + 8
@ -724,6 +729,12 @@ def _encode_datetime(name: bytes, value: datetime.datetime, dummy0: Any, dummy1:
return b"\x09" + name + _PACK_LONG(millis)
def _encode_datetime_ms(name: bytes, value: DatetimeMS, dummy0: Any, dummy1: Any) -> bytes:
"""Encode datetime.datetime."""
millis = int(value)
return b"\x09" + name + _PACK_LONG(millis)
def _encode_none(name: bytes, dummy0: Any, dummy1: Any, dummy2: Any) -> bytes:
"""Encode python None."""
return b"\x0A" + name
@ -814,6 +825,7 @@ _ENCODERS = {
bool: _encode_bool,
bytes: _encode_bytes,
datetime.datetime: _encode_datetime,
DatetimeMS: _encode_datetime_ms,
dict: _encode_mapping,
float: _encode_float,
int: _encode_int,
@ -948,27 +960,6 @@ if _USE_C:
_dict_to_bson = _cbson._dict_to_bson # noqa: F811
def _millis_to_datetime(millis: int, opts: CodecOptions) -> datetime.datetime:
"""Convert milliseconds since epoch UTC to datetime."""
diff = ((millis % 1000) + 1000) % 1000
seconds = (millis - diff) // 1000
micros = diff * 1000
if opts.tz_aware:
dt = EPOCH_AWARE + datetime.timedelta(seconds=seconds, microseconds=micros)
if opts.tzinfo:
dt = dt.astimezone(opts.tzinfo)
return dt
else:
return EPOCH_NAIVE + datetime.timedelta(seconds=seconds, microseconds=micros)
def _datetime_to_millis(dtm: datetime.datetime) -> int:
"""Convert datetime to milliseconds since epoch UTC."""
if dtm.utcoffset() is not None:
dtm = dtm - dtm.utcoffset() # type: ignore
return int(calendar.timegm(dtm.timetuple()) * 1000 + dtm.microsecond // 1000)
_CODEC_OPTIONS_TYPE_ERROR = TypeError("codec_options must be an instance of CodecOptions")

View File

@ -52,6 +52,9 @@ struct module_state {
PyObject* BSONInt64;
PyObject* Decimal128;
PyObject* Mapping;
PyObject* DatetimeMS;
PyObject* _min_datetime_ms;
PyObject* _max_datetime_ms;
};
#define GETSTATE(m) ((struct module_state*)PyModule_GetState(m))
@ -72,6 +75,12 @@ struct module_state {
/* The smallest possible BSON document, i.e. "{}" */
#define BSON_MIN_SIZE 5
/* Datetime codec options */
#define DATETIME 1
#define DATETIME_CLAMP 2
#define DATETIME_MS 3
#define DATETIME_AUTO 4
/* Get an error class from the bson.errors module.
*
* Returns a new ref */
@ -179,6 +188,45 @@ static long long millis_from_datetime(PyObject* datetime) {
return millis;
}
/* Extended-range datetime, returns a DatetimeMS object with millis */
static PyObject* datetime_ms_from_millis(PyObject* self, long long millis){
// Allocate a new DatetimeMS object.
struct module_state *state = GETSTATE(self);
PyObject* dt;
PyObject* ll_millis;
if (!(ll_millis = PyLong_FromLongLong(millis))){
return NULL;
}
dt = PyObject_CallFunctionObjArgs(state->DatetimeMS, ll_millis, NULL);
Py_DECREF(ll_millis);
return dt;
}
/* Extended-range datetime, takes a DatetimeMS object and extracts the long long value. */
static int millis_from_datetime_ms(PyObject* dt, long long* out){
PyObject* ll_millis;
long long millis;
if (!(ll_millis = PyNumber_Long(dt))){
if (PyErr_Occurred()) { // TypeError
return 0;
}
}
if ((millis = PyLong_AsLongLong(ll_millis)) == -1){
if (PyErr_Occurred()) { /* Overflow */
PyErr_SetString(PyExc_OverflowError,
"MongoDB datetimes can only handle up to 8-byte ints");
return 0;
}
}
Py_DECREF(ll_millis);
*out = millis;
return 1;
}
/* Just make this compatible w/ the old API. */
int buffer_write_bytes(buffer_t buffer, const char* data, int size) {
if (pymongo_buffer_write(buffer, data, size)) {
@ -342,7 +390,10 @@ static int _load_python_objects(PyObject* module) {
_load_object(&state->BSONInt64, "bson.int64", "Int64") ||
_load_object(&state->Decimal128, "bson.decimal128", "Decimal128") ||
_load_object(&state->UUID, "uuid", "UUID") ||
_load_object(&state->Mapping, "collections.abc", "Mapping")) {
_load_object(&state->Mapping, "collections.abc", "Mapping") ||
_load_object(&state->DatetimeMS, "bson.datetime_ms", "DatetimeMS") ||
_load_object(&state->_min_datetime_ms, "bson.datetime_ms", "_min_datetime_ms") ||
_load_object(&state->_max_datetime_ms, "bson.datetime_ms", "_max_datetime_ms")) {
return 1;
}
/* Reload our REType hack too. */
@ -466,13 +517,14 @@ int convert_codec_options(PyObject* options_obj, void* p) {
options->unicode_decode_error_handler = NULL;
if (!PyArg_ParseTuple(options_obj, "ObbzOO",
if (!PyArg_ParseTuple(options_obj, "ObbzOOb",
&options->document_class,
&options->tz_aware,
&options->uuid_rep,
&options->unicode_decode_error_handler,
&options->tzinfo,
&type_registry_obj))
&type_registry_obj,
&options->datetime_conversion))
return 0;
type_marker = _type_marker(options->document_class);
@ -1049,6 +1101,13 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
}
*(pymongo_buffer_get_buffer(buffer) + type_byte) = 0x09;
return buffer_write_int64(buffer, (int64_t)millis);
} else if (PyObject_TypeCheck(value, (PyTypeObject *) state->DatetimeMS)) {
long long millis;
if (!millis_from_datetime_ms(value, &millis)) {
return 0;
}
*(pymongo_buffer_get_buffer(buffer) + type_byte) = 0x09;
return buffer_write_int64(buffer, (int64_t)millis);
} else if (PyObject_TypeCheck(value, state->REType)) {
return _write_regex_to_buffer(buffer, type_byte, value);
}
@ -1854,8 +1913,79 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
}
memcpy(&millis, buffer + *position, 8);
millis = (int64_t)BSON_UINT64_FROM_LE(millis);
naive = datetime_from_millis(millis);
*position += 8;
if (options->datetime_conversion == DATETIME_MS){
value = datetime_ms_from_millis(self, millis);
break;
}
int dt_clamp = options->datetime_conversion == DATETIME_CLAMP;
int dt_auto = options->datetime_conversion == DATETIME_AUTO;
if (dt_clamp || dt_auto){
PyObject *min_millis_fn = _get_object(state->_min_datetime_ms, "bson.datetime_ms", "_min_datetime_ms");
PyObject *max_millis_fn = _get_object(state->_max_datetime_ms, "bson.datetime_ms", "_max_datetime_ms");
PyObject *min_millis_fn_res;
PyObject *max_millis_fn_res;
int64_t min_millis;
int64_t max_millis;
if (min_millis_fn == NULL || max_millis_fn == NULL) {
Py_XDECREF(min_millis_fn);
Py_XDECREF(max_millis_fn);
goto invalid;
}
if (options->tz_aware){
PyObject* tzinfo = options->tzinfo;
if (tzinfo == Py_None) {
// Default to UTC.
utc_type = _get_object(state->UTC, "bson.tz_util", "utc");
tzinfo = utc_type;
}
min_millis_fn_res = PyObject_CallFunctionObjArgs(min_millis_fn, tzinfo, NULL);
max_millis_fn_res = PyObject_CallFunctionObjArgs(max_millis_fn, tzinfo, NULL);
} else {
min_millis_fn_res = PyObject_CallObject(min_millis_fn, NULL);
max_millis_fn_res = PyObject_CallObject(max_millis_fn, NULL);
}
Py_DECREF(min_millis_fn);
Py_DECREF(max_millis_fn);
if (!min_millis_fn_res || !max_millis_fn_res){
Py_XDECREF(min_millis_fn_res);
Py_XDECREF(max_millis_fn_res);
goto invalid;
}
min_millis = PyLong_AsLongLong(min_millis_fn_res);
max_millis = PyLong_AsLongLong(max_millis_fn_res);
if ((min_millis == -1 || max_millis == -1) && PyErr_Occurred())
{
// min/max_millis check
goto invalid;
}
if (dt_clamp) {
if (millis < min_millis) {
millis = min_millis;
} else if (millis > max_millis) {
millis = max_millis;
}
// Continues from here to return a datetime.
} else if (dt_auto) {
if (millis < min_millis || millis > max_millis){
value = datetime_ms_from_millis(self, millis);
break; // Out-of-range so done.
}
}
}
naive = datetime_from_millis(millis);
if (!options->tz_aware) { /* In the naive case, we're done here. */
value = naive;
break;

View File

@ -62,6 +62,7 @@ typedef struct codec_options_t {
char* unicode_decode_error_handler;
PyObject* tzinfo;
type_registry_t type_registry;
unsigned char datetime_conversion;
PyObject* options_obj;
unsigned char is_raw_bson;
} codec_options_t;

View File

@ -16,6 +16,7 @@
import abc
import datetime
import enum
from collections.abc import MutableMapping as _MutableMapping
from typing import (
Any,
@ -198,6 +199,16 @@ class TypeRegistry(object):
)
class DatetimeConversionOpts(enum.IntEnum):
DATETIME = 1
DATETIME_CLAMP = 2
DATETIME_MS = 3
DATETIME_AUTO = 4
def __repr__(self):
return f"{self.value}"
class _BaseCodecOptions(NamedTuple):
document_class: Type[Mapping[str, Any]]
tz_aware: bool
@ -205,6 +216,7 @@ class _BaseCodecOptions(NamedTuple):
unicode_decode_error_handler: str
tzinfo: Optional[datetime.tzinfo]
type_registry: TypeRegistry
datetime_conversion: Optional[DatetimeConversionOpts]
class CodecOptions(_BaseCodecOptions):
@ -268,7 +280,13 @@ class CodecOptions(_BaseCodecOptions):
encoded/decoded.
- `type_registry`: Instance of :class:`TypeRegistry` used to customize
encoding and decoding behavior.
- `datetime_conversion`: Specifies how UTC datetimes should be decoded
within BSON. Valid options include 'datetime_ms' to return as a
DatetimeMS, 'datetime' to return as a datetime.datetime and
raising a ValueError for out-of-range values, 'datetime_auto' to
return DatetimeMS objects when the underlying datetime is
out-of-range and 'datetime_clamp' to clamp to the minimum and
maximum possible datetimes. Defaults to 'datetime'.
.. versionchanged:: 4.0
The default for `uuid_representation` was changed from
:const:`~bson.binary.UuidRepresentation.PYTHON_LEGACY` to
@ -292,6 +310,7 @@ class CodecOptions(_BaseCodecOptions):
unicode_decode_error_handler: str = "strict",
tzinfo: Optional[datetime.tzinfo] = None,
type_registry: Optional[TypeRegistry] = None,
datetime_conversion: Optional[DatetimeConversionOpts] = DatetimeConversionOpts.DATETIME,
) -> "CodecOptions":
doc_class = document_class or dict
# issubclass can raise TypeError for generic aliases like SON[str, Any].
@ -336,6 +355,7 @@ class CodecOptions(_BaseCodecOptions):
unicode_decode_error_handler,
tzinfo,
type_registry,
datetime_conversion,
),
)
@ -350,7 +370,7 @@ class CodecOptions(_BaseCodecOptions):
return (
"document_class=%s, tz_aware=%r, uuid_representation=%s, "
"unicode_decode_error_handler=%r, tzinfo=%r, "
"type_registry=%r"
"type_registry=%r, datetime_conversion=%r"
% (
document_class_repr,
self.tz_aware,
@ -358,6 +378,7 @@ class CodecOptions(_BaseCodecOptions):
self.unicode_decode_error_handler,
self.tzinfo,
self.type_registry,
self.datetime_conversion,
)
)
@ -371,6 +392,7 @@ class CodecOptions(_BaseCodecOptions):
"unicode_decode_error_handler": self.unicode_decode_error_handler,
"tzinfo": self.tzinfo,
"type_registry": self.type_registry,
"datetime_conversion": self.datetime_conversion,
}
def __repr__(self):
@ -406,6 +428,7 @@ def _parse_codec_options(options: Any) -> CodecOptions:
"unicode_decode_error_handler",
"tzinfo",
"type_registry",
"datetime_conversion",
}:
if k == "uuidrepresentation":
kwargs["uuid_representation"] = options[k]

View File

@ -21,6 +21,7 @@ you get the error: "TypeError: 'type' object is not subscriptable".
import datetime
import abc
import enum
from typing import Tuple, Generic, Optional, Mapping, Any, TypeVar, Type, Dict, Iterable, Tuple, MutableMapping, Callable, Union
@ -54,6 +55,11 @@ class TypeRegistry:
_DocumentType = TypeVar("_DocumentType", bound=Mapping[str, Any])
class DatetimeConversionOpts(int, enum.Enum):
DATETIME = ...
DATETIME_CLAMP = ...
DATETIME_MS = ...
DATETIME_AUTO = ...
class CodecOptions(Tuple, Generic[_DocumentType]):
document_class: Type[_DocumentType]
@ -62,6 +68,7 @@ class CodecOptions(Tuple, Generic[_DocumentType]):
unicode_decode_error_handler: Optional[str]
tzinfo: Optional[datetime.tzinfo]
type_registry: TypeRegistry
datetime_conversion: Optional[int]
def __new__(
cls: Type[CodecOptions],
@ -71,6 +78,7 @@ class CodecOptions(Tuple, Generic[_DocumentType]):
unicode_decode_error_handler: Optional[str] = ...,
tzinfo: Optional[datetime.tzinfo] = ...,
type_registry: Optional[TypeRegistry] = ...,
datetime_conversion: Optional[int] = ...,
) -> CodecOptions[_DocumentType]: ...
# CodecOptions API

157
bson/datetime_ms.py Normal file
View File

@ -0,0 +1,157 @@
# Copyright 2022-present MongoDB, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you
# may not use this file except in compliance with the License. You
# may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied. See the License for the specific language governing
# permissions and limitations under the License.
"""Tools for representing the BSON datetime type."""
import calendar
import datetime
import functools
from typing import Any, Union, cast
from bson.codec_options import (
DEFAULT_CODEC_OPTIONS,
CodecOptions,
DatetimeConversionOpts,
)
from bson.tz_util import utc
EPOCH_AWARE = datetime.datetime.fromtimestamp(0, utc)
EPOCH_NAIVE = datetime.datetime.utcfromtimestamp(0)
class DatetimeMS:
__slots__ = ("_value",)
def __init__(self, value: Union[int, datetime.datetime]):
"""Represents a BSON UTC datetime.
BSON UTC datetimes are defined as an int64 of milliseconds since the Unix
epoch. The principal use of DatetimeMS is to represent datetimes outside
the range of the Python builtin :class:`~datetime.datetime` class when
encoding/decoding BSON.
To decode UTC datetimes as a ``DatetimeMS``,`datetime_conversion` in
:class:`~bson.CodecOptions` must be set to 'datetime_ms' or
'datetime_auto'. See :ref:`handling-out-of-range-datetimes` for details.
:Parameters:
- `value`: An instance of :class:`datetime.datetime` to be
represented as milliseconds since the Unix epoch, or int of
milliseconds since the Unix epoch.
.. versionadded:: 4.3
"""
if isinstance(value, int):
if not (-(2**63) <= value <= 2**63 - 1):
raise OverflowError("Must be a 64-bit integer of milliseconds")
self._value = value
elif isinstance(value, datetime.datetime):
self._value = _datetime_to_millis(value)
else:
raise TypeError(f"{type(value)} is not a valid type for DatetimeMS")
def __hash__(self) -> int:
return hash(self._value)
def __repr__(self) -> str:
return type(self).__name__ + "(" + str(self._value) + ")"
def __lt__(self, other: Union["DatetimeMS", int]) -> bool:
return self._value < other
def __le__(self, other: Union["DatetimeMS", int]) -> bool:
return self._value <= other
def __eq__(self, other: Any) -> bool:
if isinstance(other, DatetimeMS):
return self._value == other._value
return False
def __ne__(self, other: Any) -> bool:
if isinstance(other, DatetimeMS):
return self._value != other._value
return True
def __gt__(self, other: Union["DatetimeMS", int]) -> bool:
return self._value > other
def __ge__(self, other: Union["DatetimeMS", int]) -> bool:
return self._value >= other
_type_marker = 9
def as_datetime(self, codec_options: CodecOptions = DEFAULT_CODEC_OPTIONS) -> datetime.datetime:
"""Create a Python :class:`~datetime.datetime` from this DatetimeMS object.
:Parameters:
- `codec_options`: A CodecOptions instance for specifying how the
resulting DatetimeMS object will be formatted using ``tz_aware``
and ``tz_info``. Defaults to
:const:`~bson.codec_options.DEFAULT_CODEC_OPTIONS`.
"""
return cast(datetime.datetime, _millis_to_datetime(self._value, codec_options))
def __int__(self) -> int:
return self._value
# Inclusive and exclusive min and max for timezones.
# Timezones are hashed by their offset, which is a timedelta
# and therefore there are more than 24 possible timezones.
@functools.lru_cache(maxsize=None)
def _min_datetime_ms(tz=datetime.timezone.utc):
return _datetime_to_millis(datetime.datetime.min.replace(tzinfo=tz))
@functools.lru_cache(maxsize=None)
def _max_datetime_ms(tz=datetime.timezone.utc):
return _datetime_to_millis(datetime.datetime.max.replace(tzinfo=tz))
def _millis_to_datetime(millis: int, opts: CodecOptions) -> Union[datetime.datetime, DatetimeMS]:
"""Convert milliseconds since epoch UTC to datetime."""
if (
opts.datetime_conversion == DatetimeConversionOpts.DATETIME
or opts.datetime_conversion == DatetimeConversionOpts.DATETIME_CLAMP
or opts.datetime_conversion == DatetimeConversionOpts.DATETIME_AUTO
):
tz = opts.tzinfo or datetime.timezone.utc
if opts.datetime_conversion == DatetimeConversionOpts.DATETIME_CLAMP:
millis = max(_min_datetime_ms(tz), min(millis, _max_datetime_ms(tz)))
elif opts.datetime_conversion == DatetimeConversionOpts.DATETIME_AUTO:
if not (_min_datetime_ms(tz) <= millis <= _max_datetime_ms(tz)):
return DatetimeMS(millis)
diff = ((millis % 1000) + 1000) % 1000
seconds = (millis - diff) // 1000
micros = diff * 1000
if opts.tz_aware:
dt = EPOCH_AWARE + datetime.timedelta(seconds=seconds, microseconds=micros)
if opts.tzinfo:
dt = dt.astimezone(tz)
return dt
else:
return EPOCH_NAIVE + datetime.timedelta(seconds=seconds, microseconds=micros)
elif opts.datetime_conversion == DatetimeConversionOpts.DATETIME_MS:
return DatetimeMS(millis)
else:
raise ValueError("datetime_conversion must be an element of DatetimeConversionOpts")
def _datetime_to_millis(dtm: datetime.datetime) -> int:
"""Convert datetime to milliseconds since epoch UTC."""
if dtm.utcoffset() is not None:
dtm = dtm - dtm.utcoffset() # type: ignore
return int(calendar.timegm(dtm.timetuple()) * 1000 + dtm.microsecond // 1000)

View File

@ -94,11 +94,16 @@ import re
import uuid
from typing import Any, Dict, Mapping, Optional, Sequence, Tuple, Type, Union, cast
import bson
from bson import EPOCH_AWARE
from bson.binary import ALL_UUID_SUBTYPES, UUID_SUBTYPE, Binary, UuidRepresentation
from bson.code import Code
from bson.codec_options import CodecOptions
from bson.codec_options import CodecOptions, DatetimeConversionOpts
from bson.datetime_ms import (
EPOCH_AWARE,
DatetimeMS,
_datetime_to_millis,
_max_datetime_ms,
_millis_to_datetime,
)
from bson.dbref import DBRef
from bson.decimal128 import Decimal128
from bson.int64 import Int64
@ -228,6 +233,14 @@ class JSONOptions(CodecOptions):
- `tzinfo`: A :class:`datetime.tzinfo` subclass that specifies the
timezone from which :class:`~datetime.datetime` objects should be
decoded. Defaults to :const:`~bson.tz_util.utc`.
- `datetime_conversion`: Specifies how UTC datetimes should be decoded
within BSON. Valid options include 'datetime_ms' to return as a
DatetimeMS, 'datetime' to return as a datetime.datetime and
raising a ValueError for out-of-range values, 'datetime_auto' to
return DatetimeMS objects when the underlying datetime is
out-of-range and 'datetime_clamp' to clamp to the minimum and
maximum possible datetimes. Defaults to 'datetime'. See
:ref:`handling-out-of-range-datetimes` for details.
- `args`: arguments to :class:`~bson.codec_options.CodecOptions`
- `kwargs`: arguments to :class:`~bson.codec_options.CodecOptions`
@ -594,7 +607,9 @@ def _parse_canonical_binary(doc: Any, json_options: JSONOptions) -> Union[Binary
return _binary_or_uuid(data, int(subtype, 16), json_options)
def _parse_canonical_datetime(doc: Any, json_options: JSONOptions) -> datetime.datetime:
def _parse_canonical_datetime(
doc: Any, json_options: JSONOptions
) -> Union[datetime.datetime, DatetimeMS]:
"""Decode a JSON datetime to python datetime.datetime."""
dtm = doc["$date"]
if len(doc) != 1:
@ -647,10 +662,15 @@ def _parse_canonical_datetime(doc: Any, json_options: JSONOptions) -> datetime.d
if json_options.tz_aware:
if json_options.tzinfo:
aware = aware.astimezone(json_options.tzinfo)
if json_options.datetime_conversion == DatetimeConversionOpts.DATETIME_MS:
return DatetimeMS(aware)
return aware
else:
return aware.replace(tzinfo=None)
return bson._millis_to_datetime(int(dtm), json_options)
aware_tzinfo_none = aware.replace(tzinfo=None)
if json_options.datetime_conversion == DatetimeConversionOpts.DATETIME_MS:
return DatetimeMS(aware_tzinfo_none)
return aware_tzinfo_none
return _millis_to_datetime(int(dtm), json_options)
def _parse_canonical_oid(doc: Any) -> ObjectId:
@ -806,10 +826,19 @@ def default(obj: Any, json_options: JSONOptions = DEFAULT_JSON_OPTIONS) -> Any:
"$date": "%s%s%s" % (obj.strftime("%Y-%m-%dT%H:%M:%S"), fracsecs, tz_string)
}
millis = bson._datetime_to_millis(obj)
millis = _datetime_to_millis(obj)
if json_options.datetime_representation == DatetimeRepresentation.LEGACY:
return {"$date": millis}
return {"$date": {"$numberLong": str(millis)}}
if isinstance(obj, DatetimeMS):
if (
json_options.datetime_representation == DatetimeRepresentation.ISO8601
and 0 <= int(obj) <= _max_datetime_ms()
):
return default(obj.as_datetime(), json_options)
elif json_options.datetime_representation == DatetimeRepresentation.LEGACY:
return {"$date": str(int(obj))}
return {"$date": {"$numberLong": str(int(obj))}}
if json_options.strict_number_long and isinstance(obj, Int64):
return {"$numberLong": str(obj)}
if isinstance(obj, (RE_TYPE, Regex)):

View File

@ -0,0 +1,4 @@
:mod:`datetime_ms` -- Support for BSON UTC Datetime
===================================================
.. automodule:: bson.datetime_ms
:members:

View File

@ -13,6 +13,7 @@ Sub-modules:
binary
code
codec_options
datetime_ms
dbref
decimal128
errors

View File

@ -102,3 +102,57 @@ out of MongoDB in US/Pacific time:
>>> result = aware_times.find_one()
datetime.datetime(2002, 10, 27, 6, 0, # doctest: +NORMALIZE_WHITESPACE
tzinfo=<DstTzInfo 'US/Pacific' PST-1 day, 16:00:00 STD>)
.. _handling-out-of-range-datetimes:
Handling out of range datetimes
-------------------------------
Python's :class:`~datetime.datetime` can only represent datetimes within the
range allowed by
:attr:`~datetime.datetime.min` and :attr:`~datetime.datetime.max`, whereas
the range of datetimes allowed in BSON can represent any 64-bit number
of milliseconds from the Unix epoch. To deal with this, we can use the
:class:`bson.datetime_ms.DatetimeMS` object, which is a wrapper for the
:class:`int` built-in.
To decode UTC datetime values as :class:`~bson.datetime_ms.DatetimeMS`,
:class:`~bson.codec_options.CodecOptions` should have its
``datetime_conversion`` parameter set to one of the options available in
:class:`bson.datetime_ms.DatetimeConversionOpts`. These include
:attr:`~bson.datetime_ms.DatetimeConversionOpts.DATETIME`,
:attr:`~bson.datetime_ms.DatetimeConversionOpts.DATETIME_MS`,
:attr:`~bson.datetime_ms.DatetimeConversionOpts.DATETIME_AUTO`,
:attr:`~bson.datetime_ms.DatetimeConversionOpts.DATETIME_CLAMP`.
:attr:`~bson.datetime_ms.DatetimeConversionOpts.DATETIME` is the default
option and has the behavior of raising an exception upon attempting to
decode an out-of-range date.
:attr:`~bson.datetime_ms.DatetimeConversionOpts.DATETIME_MS` will only return
:class:`~bson.datetime_ms.DatetimeMS` objects, regardless of whether the
represented datetime is in- or out-of-range.
:attr:`~bson.datetime_ms.DatetimeConversionOpts.DATETIME_AUTO` will return
:class:`~datetime.datetime` if the underlying UTC datetime is within range,
or :class:`~bson.datetime_ms.DatetimeMS` if the underlying datetime
cannot be represented using the builtin Python :class:`~datetime.datetime`.
:attr:`~bson.datetime_ms.DatetimeConversionOpts.DATETIME_CLAMP` will clamp
resulting :class:`~datetime.datetime` objects to be within
:attr:`~datetime.datetime.min` and :attr:`~datetime.datetime.max`
(trimmed to `999000` microseconds).
An example of encoding and decoding using `DATETIME_MS` is as follows:
.. doctest::
>>> from datetime import datetime
>>> from bson import encode, decode
>>> from bson.datetime_ms import DatetimeMS
>>> from bson.codec_options import CodecOptions,DatetimeConversionOpts
>>> x = encode({"x": datetime(1970, 1, 1)})
>>> x
b'\x10\x00\x00\x00\tx\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
>>> decode(x, codec_options=CodecOptions(datetime_conversion=DatetimeConversionOpts.DATETIME_MS))
{'x': DatetimeMS(0)}
:class:`~bson.datetime_ms.DatetimeMS` objects have support for rich comparison
methods against other instances of :class:`~bson.datetime_ms.DatetimeMS`.
They can also be converted to :class:`~datetime.datetime` objects with
:meth:`~bson.datetime_ms.DatetimeMS.to_datetime()`.

View File

@ -36,7 +36,7 @@ from urllib.parse import unquote_plus
from bson import SON
from bson.binary import UuidRepresentation
from bson.codec_options import CodecOptions, TypeRegistry
from bson.codec_options import CodecOptions, DatetimeConversionOpts, TypeRegistry
from bson.raw_bson import RawBSONDocument
from pymongo.auth import MECHANISMS
from pymongo.compression_support import (
@ -620,6 +620,21 @@ def validate_auto_encryption_opts_or_none(option: Any, value: Any) -> Optional[A
return value
def validate_datetime_conversion(option: Any, value: Any) -> Optional[DatetimeConversionOpts]:
"""Validate a DatetimeConversionOpts string."""
if value is None:
return DatetimeConversionOpts.DATETIME
if isinstance(value, str):
if value.isdigit():
return DatetimeConversionOpts(int(value))
return DatetimeConversionOpts[value]
elif isinstance(value, int):
return DatetimeConversionOpts(value)
raise TypeError("%s must be a str or int representing DatetimeConversionOpts" % (option,))
# Dictionary where keys are the names of public URI options, and values
# are lists of aliases for that option.
URI_OPTIONS_ALIAS_MAP: Dict[str, List[str]] = {
@ -684,6 +699,7 @@ NONSPEC_OPTIONS_VALIDATOR_MAP: Dict[str, Callable[[Any, Any], Any]] = {
"uuidrepresentation": validate_uuid_representation,
"waitqueuemultiple": validate_non_negative_integer_or_none,
"waitqueuetimeoutms": validate_timeout_or_none,
"datetime_conversion": validate_datetime_conversion,
}
# Dictionary where keys are the names of keyword-only options for the

View File

@ -239,6 +239,14 @@ class MongoClient(common.BaseObject, Generic[_DocumentType]):
- `type_registry` (optional): instance of
:class:`~bson.codec_options.TypeRegistry` to enable encoding
and decoding of custom types.
- `datetime_conversion`: Specifies how UTC datetimes should be decoded
within BSON. Valid options include 'datetime_ms' to return as a
DatetimeMS, 'datetime' to return as a datetime.datetime and
raising a ValueError for out-of-range values, 'datetime_auto' to
return DatetimeMS objects when the underlying datetime is
out-of-range and 'datetime_clamp' to clamp to the minimum and
maximum possible datetimes. Defaults to 'datetime'. See
:ref:`handling-out-of-range-datetimes` for details.
| **Other optional parameters can be passed as keyword arguments:**

View File

@ -38,7 +38,9 @@ import bson
from bson import (
BSON,
EPOCH_AWARE,
DatetimeMS,
Regex,
_datetime_to_millis,
decode,
decode_all,
decode_file_iter,
@ -48,7 +50,7 @@ from bson import (
)
from bson.binary import Binary, UuidRepresentation
from bson.code import Code
from bson.codec_options import CodecOptions
from bson.codec_options import CodecOptions, DatetimeConversionOpts
from bson.dbref import DBRef
from bson.errors import InvalidBSON, InvalidDocument
from bson.int64 import Int64
@ -978,7 +980,7 @@ class TestCodecOptions(unittest.TestCase):
"uuid_representation=UuidRepresentation.UNSPECIFIED, "
"unicode_decode_error_handler='strict', "
"tzinfo=None, type_registry=TypeRegistry(type_codecs=[], "
"fallback_encoder=None))"
"fallback_encoder=None), datetime_conversion=1)"
)
self.assertEqual(r, repr(CodecOptions()))
@ -1153,5 +1155,169 @@ class TestCodecOptions(unittest.TestCase):
self.assertTrue(decoded["_id"].generation_time)
class TestDatetimeConversion(unittest.TestCase):
def test_comps(self):
# Tests other timestamp formats.
# Test each of the rich comparison methods.
pairs = [
(DatetimeMS(-1), DatetimeMS(1)),
(DatetimeMS(0), DatetimeMS(0)),
(DatetimeMS(1), DatetimeMS(-1)),
]
comp_ops = ["__lt__", "__le__", "__eq__", "__ne__", "__gt__", "__ge__"]
for lh, rh in pairs:
for op in comp_ops:
self.assertEqual(getattr(lh, op)(rh), getattr(lh._value, op)(rh._value))
def test_class_conversions(self):
# Test class conversions.
dtr1 = DatetimeMS(1234)
dt1 = dtr1.as_datetime()
self.assertEqual(dtr1, DatetimeMS(dt1))
dt2 = datetime.datetime(1969, 1, 1)
dtr2 = DatetimeMS(dt2)
self.assertEqual(dtr2.as_datetime(), dt2)
# Test encode and decode without codec options. Expect: DatetimeMS => datetime
dtr1 = DatetimeMS(0)
enc1 = encode({"x": dtr1})
dec1 = decode(enc1)
self.assertEqual(dec1["x"], datetime.datetime(1970, 1, 1))
self.assertNotEqual(type(dtr1), type(dec1["x"]))
# Test encode and decode with codec options. Expect: UTCDateimteRaw => DatetimeMS
opts1 = CodecOptions(datetime_conversion=DatetimeConversionOpts.DATETIME_MS)
enc1 = encode({"x": dtr1})
dec1 = decode(enc1, opts1)
self.assertEqual(type(dtr1), type(dec1["x"]))
self.assertEqual(dtr1, dec1["x"])
# Expect: datetime => DatetimeMS
opts1 = CodecOptions(datetime_conversion=DatetimeConversionOpts.DATETIME_MS)
dt1 = datetime.datetime(1970, 1, 1, tzinfo=datetime.timezone.utc)
enc1 = encode({"x": dt1})
dec1 = decode(enc1, opts1)
self.assertEqual(dec1["x"], DatetimeMS(0))
self.assertNotEqual(dt1, type(dec1["x"]))
def test_clamping(self):
# Test clamping from below and above.
opts1 = CodecOptions(
datetime_conversion=DatetimeConversionOpts.DATETIME_CLAMP,
tz_aware=True,
tzinfo=datetime.timezone.utc,
)
below = encode({"x": DatetimeMS(_datetime_to_millis(datetime.datetime.min) - 1)})
dec_below = decode(below, opts1)
self.assertEqual(
dec_below["x"], datetime.datetime.min.replace(tzinfo=datetime.timezone.utc)
)
above = encode({"x": DatetimeMS(_datetime_to_millis(datetime.datetime.max) + 1)})
dec_above = decode(above, opts1)
self.assertEqual(
dec_above["x"],
datetime.datetime.max.replace(tzinfo=datetime.timezone.utc, microsecond=999000),
)
def test_tz_clamping(self):
# Naive clamping to local tz.
opts1 = CodecOptions(
datetime_conversion=DatetimeConversionOpts.DATETIME_CLAMP, tz_aware=False
)
below = encode({"x": DatetimeMS(_datetime_to_millis(datetime.datetime.min) - 24 * 60 * 60)})
dec_below = decode(below, opts1)
self.assertEqual(dec_below["x"], datetime.datetime.min)
above = encode({"x": DatetimeMS(_datetime_to_millis(datetime.datetime.max) + 24 * 60 * 60)})
dec_above = decode(above, opts1)
self.assertEqual(
dec_above["x"],
datetime.datetime.max.replace(microsecond=999000),
)
# Aware clamping.
opts2 = CodecOptions(
datetime_conversion=DatetimeConversionOpts.DATETIME_CLAMP, tz_aware=True
)
below = encode({"x": DatetimeMS(_datetime_to_millis(datetime.datetime.min) - 24 * 60 * 60)})
dec_below = decode(below, opts2)
self.assertEqual(
dec_below["x"], datetime.datetime.min.replace(tzinfo=datetime.timezone.utc)
)
above = encode({"x": DatetimeMS(_datetime_to_millis(datetime.datetime.max) + 24 * 60 * 60)})
dec_above = decode(above, opts2)
self.assertEqual(
dec_above["x"],
datetime.datetime.max.replace(tzinfo=datetime.timezone.utc, microsecond=999000),
)
def test_datetime_auto(self):
# Naive auto, in range.
opts1 = CodecOptions(datetime_conversion=DatetimeConversionOpts.DATETIME_AUTO)
inr = encode({"x": datetime.datetime(1970, 1, 1)}, codec_options=opts1)
dec_inr = decode(inr)
self.assertEqual(dec_inr["x"], datetime.datetime(1970, 1, 1))
# Naive auto, below range.
below = encode({"x": DatetimeMS(_datetime_to_millis(datetime.datetime.min) - 24 * 60 * 60)})
dec_below = decode(below, opts1)
self.assertEqual(
dec_below["x"], DatetimeMS(_datetime_to_millis(datetime.datetime.min) - 24 * 60 * 60)
)
# Naive auto, above range.
above = encode({"x": DatetimeMS(_datetime_to_millis(datetime.datetime.max) + 24 * 60 * 60)})
dec_above = decode(above, opts1)
self.assertEqual(
dec_above["x"],
DatetimeMS(_datetime_to_millis(datetime.datetime.max) + 24 * 60 * 60),
)
# Aware auto, in range.
opts2 = CodecOptions(
datetime_conversion=DatetimeConversionOpts.DATETIME_AUTO,
tz_aware=True,
tzinfo=datetime.timezone.utc,
)
inr = encode({"x": datetime.datetime(1970, 1, 1)}, codec_options=opts2)
dec_inr = decode(inr)
self.assertEqual(dec_inr["x"], datetime.datetime(1970, 1, 1))
# Aware auto, below range.
below = encode({"x": DatetimeMS(_datetime_to_millis(datetime.datetime.min) - 24 * 60 * 60)})
dec_below = decode(below, opts2)
self.assertEqual(
dec_below["x"], DatetimeMS(_datetime_to_millis(datetime.datetime.min) - 24 * 60 * 60)
)
# Aware auto, above range.
above = encode({"x": DatetimeMS(_datetime_to_millis(datetime.datetime.max) + 24 * 60 * 60)})
dec_above = decode(above, opts2)
self.assertEqual(
dec_above["x"],
DatetimeMS(_datetime_to_millis(datetime.datetime.max) + 24 * 60 * 60),
)
def test_millis_from_datetime_ms(self):
# Test 65+ bit integer conversion, expect OverflowError.
big_ms = 2**65
with self.assertRaises(OverflowError):
encode({"x": DatetimeMS(big_ms)})
# Subclass of DatetimeMS w/ __int__ override, expect an Error.
class DatetimeMSOverride(DatetimeMS):
def __int__(self):
return float(self._value)
float_ms = DatetimeMSOverride(2)
with self.assertRaises(TypeError):
encode({"x": float_ms})
if __name__ == "__main__":
unittest.main()

View File

@ -65,7 +65,12 @@ from test.utils import (
import pymongo
from bson import encode
from bson.codec_options import CodecOptions, TypeEncoder, TypeRegistry
from bson.codec_options import (
CodecOptions,
DatetimeConversionOpts,
TypeEncoder,
TypeRegistry,
)
from bson.son import SON
from bson.tz_util import utc
from pymongo import event_loggers, message, monitoring
@ -386,14 +391,17 @@ class ClientUnitTest(unittest.TestCase):
# Ensure codec options are passed in correctly
uuid_representation_label = "javaLegacy"
unicode_decode_error_handler = "ignore"
datetime_conversion = "DATETIME_CLAMP"
uri = (
"mongodb://%s:%d/foo?tz_aware=true&uuidrepresentation="
"%s&unicode_decode_error_handler=%s"
"&datetime_conversion=%s"
% (
client_context.host,
client_context.port,
uuid_representation_label,
unicode_decode_error_handler,
datetime_conversion,
)
)
c = MongoClient(uri, connect=False)
@ -403,6 +411,19 @@ class ClientUnitTest(unittest.TestCase):
c.codec_options.uuid_representation, _UUID_REPRESENTATIONS[uuid_representation_label]
)
self.assertEqual(c.codec_options.unicode_decode_error_handler, unicode_decode_error_handler)
self.assertEqual(
c.codec_options.datetime_conversion, DatetimeConversionOpts[datetime_conversion]
)
# Change the passed datetime_conversion to a number and re-assert.
uri = uri.replace(
datetime_conversion, f"{int(DatetimeConversionOpts[datetime_conversion])}"
)
c = MongoClient(uri, connect=False)
self.assertEqual(
c.codec_options.datetime_conversion, DatetimeConversionOpts[datetime_conversion]
)
def test_uri_option_precedence(self):
# Ensure kwarg options override connection string options.

View File

@ -21,11 +21,13 @@ import sys
import uuid
from typing import Any, List, MutableMapping
from bson.codec_options import CodecOptions, DatetimeConversionOpts
sys.path[0:0] = [""]
from test import IntegrationTest, unittest
from bson import EPOCH_AWARE, EPOCH_NAIVE, SON, json_util
from bson import EPOCH_AWARE, EPOCH_NAIVE, SON, DatetimeMS, json_util
from bson.binary import (
ALL_UUID_REPRESENTATIONS,
MD5_SUBTYPE,
@ -35,6 +37,7 @@ from bson.binary import (
UuidRepresentation,
)
from bson.code import Code
from bson.datetime_ms import _max_datetime_ms
from bson.dbref import DBRef
from bson.int64 import Int64
from bson.json_util import (
@ -241,6 +244,69 @@ class TestJsonUtil(unittest.TestCase):
),
)
def test_datetime_ms(self):
# Test ISO8601 in-range
dat_min = {"x": DatetimeMS(0)}
dat_max = {"x": DatetimeMS(_max_datetime_ms())}
opts = JSONOptions(datetime_representation=DatetimeRepresentation.ISO8601)
self.assertEqual(
dat_min["x"].as_datetime(CodecOptions(tz_aware=False)),
json_util.loads(json_util.dumps(dat_min))["x"],
)
self.assertEqual(
dat_max["x"].as_datetime(CodecOptions(tz_aware=False)),
json_util.loads(json_util.dumps(dat_max))["x"],
)
# Test ISO8601 out-of-range
dat_min = {"x": DatetimeMS(-1)}
dat_max = {"x": DatetimeMS(_max_datetime_ms() + 1)}
self.assertEqual('{"x": {"$date": {"$numberLong": "-1"}}}', json_util.dumps(dat_min))
self.assertEqual(
'{"x": {"$date": {"$numberLong": "' + str(int(dat_max["x"])) + '"}}}',
json_util.dumps(dat_max),
)
# Test legacy.
opts = JSONOptions(
datetime_representation=DatetimeRepresentation.LEGACY, json_mode=JSONMode.LEGACY
)
self.assertEqual('{"x": {"$date": "-1"}}', json_util.dumps(dat_min, json_options=opts))
self.assertEqual(
'{"x": {"$date": "' + str(int(dat_max["x"])) + '"}}',
json_util.dumps(dat_max, json_options=opts),
)
# Test regular.
opts = JSONOptions(
datetime_representation=DatetimeRepresentation.NUMBERLONG, json_mode=JSONMode.LEGACY
)
self.assertEqual(
'{"x": {"$date": {"$numberLong": "-1"}}}', json_util.dumps(dat_min, json_options=opts)
)
self.assertEqual(
'{"x": {"$date": {"$numberLong": "' + str(int(dat_max["x"])) + '"}}}',
json_util.dumps(dat_max, json_options=opts),
)
# Test decode from datetime.datetime to DatetimeMS
dat_min = {"x": datetime.datetime.min}
dat_max = {"x": DatetimeMS(_max_datetime_ms()).as_datetime(CodecOptions(tz_aware=False))}
opts = JSONOptions(
datetime_representation=DatetimeRepresentation.ISO8601,
datetime_conversion=DatetimeConversionOpts.DATETIME_MS,
)
self.assertEqual(
DatetimeMS(dat_min["x"]),
json_util.loads(json_util.dumps(dat_min), json_options=opts)["x"],
)
self.assertEqual(
DatetimeMS(dat_max["x"]),
json_util.loads(json_util.dumps(dat_max), json_options=opts)["x"],
)
def test_regex_object_hook(self):
# Extended JSON format regular expression.
pat = "a*b"