From 474420b2e5b8318c58f596a9f5b4d3ed6a871ccd Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Wed, 16 Mar 2022 05:56:07 -0500 Subject: [PATCH] PYTHON-3085 Audit consistent and correct types for _DocumentOut (#893) --- .evergreen/config.yml | 2 +- .github/workflows/test-python.yml | 2 + README.rst | 2 +- bson/__init__.py | 111 +++++++------ bson/_cbsonmodule.c | 2 +- bson/codec_options.py | 63 +++---- bson/codec_options.pyi | 100 +++++++++++ doc/changelog.rst | 6 +- doc/examples/tls.rst | 2 +- doc/faq.rst | 2 +- doc/installation.rst | 4 +- doc/migrate-to-pymongo4.rst | 2 +- doc/python3.rst | 18 +- pymongo/collection.py | 2 +- pymongo/database.py | 13 +- pymongo/encryption.py | 4 +- pymongo/message.py | 8 +- pymongo/mongo_client.py | 5 +- pymongo/monitoring.py | 11 +- setup.py | 6 +- test/test_binary.py | 20 +-- test/test_bson.py | 23 ++- test/test_bson_corpus.py | 4 +- test/test_custom_types.py | 4 +- test/test_mypy.py | 267 +++++++++++++++++++++++++----- 25 files changed, 504 insertions(+), 179 deletions(-) create mode 100644 bson/codec_options.pyi diff --git a/.evergreen/config.yml b/.evergreen/config.yml index 2e3c12f3f..ef60eaf7d 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -1726,7 +1726,7 @@ tasks: vars: VERSION: "latest" TOPOLOGY: "server" - PYTHON_BINARY: "/opt/mongodbtoolchain/v2/bin/python3" + PYTHON_BINARY: "/opt/mongodbtoolchain/v3/bin/python3" - func: "run tests" # }}} - name: "coverage-report" diff --git a/.github/workflows/test-python.yml b/.github/workflows/test-python.yml index 8eec9d9bf..ba9b99e06 100644 --- a/.github/workflows/test-python.yml +++ b/.github/workflows/test-python.yml @@ -64,6 +64,8 @@ jobs: - name: Run mypy run: | mypy --install-types --non-interactive bson gridfs tools pymongo + # Test overshadowed codec_options.py file + mypy --install-types --non-interactive bson/codec_options.py mypy --install-types --non-interactive --disable-error-code var-annotated --disable-error-code attr-defined --disable-error-code union-attr --disable-error-code assignment --disable-error-code no-redef --disable-error-code index --allow-redefinition --allow-untyped-globals --exclude "test/mypy_fails/*.*" test linkcheck: diff --git a/README.rst b/README.rst index 390599a6c..fedb9e14d 100644 --- a/README.rst +++ b/README.rst @@ -88,7 +88,7 @@ is incompatible with PyMongo. Dependencies ============ -PyMongo supports CPython 3.6+ and PyPy3.6+. +PyMongo supports CPython 3.6.2+ and PyPy3.6+. Optional dependencies: diff --git a/bson/__init__.py b/bson/__init__.py index a287db180..343fbecb2 100644 --- a/bson/__init__.py +++ b/bson/__init__.py @@ -76,6 +76,7 @@ from typing import ( List, Mapping, MutableMapping, + Optional, Sequence, Tuple, Type, @@ -95,7 +96,12 @@ from bson.binary import ( # noqa: F401 UuidRepresentation, ) from bson.code import Code -from bson.codec_options import DEFAULT_CODEC_OPTIONS, CodecOptions, _raw_document_class +from bson.codec_options import ( + DEFAULT_CODEC_OPTIONS, + CodecOptions, + _DocumentType, + _raw_document_class, +) from bson.dbref import DBRef from bson.decimal128 import Decimal128 from bson.errors import InvalidBSON, InvalidDocument, InvalidStringData @@ -108,13 +114,11 @@ from bson.son import RE_TYPE, SON from bson.timestamp import Timestamp from bson.tz_util import utc -# Import RawBSONDocument for type-checking only to avoid circular dependency. +# Import some modules for type-checking only. if TYPE_CHECKING: from array import array from mmap import mmap - from bson.raw_bson import RawBSONDocument - try: from bson import _cbson # type: ignore[attr-defined] @@ -181,7 +185,7 @@ def _get_int( return _UNPACK_INT_FROM(data, position)[0], position + 4 -def _get_c_string(data: Any, view: Any, position: int, opts: Any) -> Tuple[str, int]: +def _get_c_string(data: Any, view: Any, position: int, opts: CodecOptions) -> Tuple[str, int]: """Decode a BSON 'C' string to python str.""" end = data.index(b"\x00", position) return _utf_8_decode(view[position:end], opts.unicode_decode_error_handler, True)[0], end + 1 @@ -195,7 +199,7 @@ def _get_float( def _get_string( - data: Any, view: Any, position: int, obj_end: int, opts: Any, dummy: Any + data: Any, view: Any, position: int, obj_end: int, opts: CodecOptions, dummy: Any ) -> Tuple[str, int]: """Decode a BSON string to python str.""" length = _UNPACK_INT_FROM(data, position)[0] @@ -226,7 +230,7 @@ def _get_object_size(data: Any, position: int, obj_end: int) -> Tuple[int, int]: def _get_object( - data: Any, view: Any, position: int, obj_end: int, opts: Any, dummy: Any + data: Any, view: Any, position: int, obj_end: int, opts: CodecOptions, dummy: Any ) -> Tuple[Any, int]: """Decode a BSON subdocument to opts.document_class or bson.dbref.DBRef.""" obj_size, end = _get_object_size(data, position, obj_end) @@ -247,7 +251,7 @@ def _get_object( def _get_array( - data: Any, view: Any, position: int, obj_end: int, opts: Any, element_name: str + data: Any, view: Any, position: int, obj_end: int, opts: CodecOptions, element_name: str ) -> Tuple[Any, int]: """Decode a BSON array to python list.""" size = _UNPACK_INT_FROM(data, position)[0] @@ -289,7 +293,7 @@ def _get_array( def _get_binary( - data: Any, view: Any, position: int, obj_end: int, opts: Any, dummy1: Any + data: Any, view: Any, position: int, obj_end: int, opts: CodecOptions, dummy1: Any ) -> Tuple[Union[Binary, uuid.UUID], int]: """Decode a BSON binary to bson.binary.Binary or python UUID.""" length, subtype = _UNPACK_LENGTH_SUBTYPE_FROM(data, position) @@ -347,14 +351,14 @@ def _get_boolean( def _get_date( - data: Any, view: Any, position: int, dummy0: int, opts: Any, dummy1: Any + data: Any, view: Any, position: int, dummy0: int, opts: CodecOptions, dummy1: Any ) -> Tuple[datetime.datetime, int]: """Decode a BSON datetime to python datetime.datetime.""" return _millis_to_datetime(_UNPACK_LONG_FROM(data, position)[0], opts), position + 8 def _get_code( - data: Any, view: Any, position: int, obj_end: int, opts: Any, element_name: str + data: Any, view: Any, position: int, obj_end: int, opts: CodecOptions, element_name: str ) -> Tuple[Code, int]: """Decode a BSON code to bson.code.Code.""" code, position = _get_string(data, view, position, obj_end, opts, element_name) @@ -362,7 +366,7 @@ def _get_code( def _get_code_w_scope( - data: Any, view: Any, position: int, obj_end: int, opts: Any, element_name: str + data: Any, view: Any, position: int, obj_end: int, opts: CodecOptions, element_name: str ) -> Tuple[Code, int]: """Decode a BSON code_w_scope to bson.code.Code.""" code_end = position + _UNPACK_INT_FROM(data, position)[0] @@ -374,7 +378,7 @@ def _get_code_w_scope( def _get_regex( - data: Any, view: Any, position: int, dummy0: Any, opts: Any, dummy1: Any + data: Any, view: Any, position: int, dummy0: Any, opts: CodecOptions, dummy1: Any ) -> Tuple[Regex, int]: """Decode a BSON regex to bson.regex.Regex or a python pattern object.""" pattern, position = _get_c_string(data, view, position, opts) @@ -384,7 +388,7 @@ def _get_regex( def _get_ref( - data: Any, view: Any, position: int, obj_end: int, opts: Any, element_name: str + data: Any, view: Any, position: int, obj_end: int, opts: CodecOptions, element_name: str ) -> Tuple[DBRef, int]: """Decode (deprecated) BSON DBPointer to bson.dbref.DBRef.""" collection, position = _get_string(data, view, position, obj_end, opts, element_name) @@ -448,12 +452,16 @@ _ELEMENT_GETTER: Dict[int, Callable[..., Tuple[Any, int]]] = { if _USE_C: - def _element_to_dict(data: Any, view: Any, position: int, obj_end: int, opts: Any) -> Any: + def _element_to_dict( + data: Any, view: Any, position: int, obj_end: int, opts: CodecOptions + ) -> Any: return _cbson._element_to_dict(data, position, obj_end, opts) else: - def _element_to_dict(data: Any, view: Any, position: int, obj_end: int, opts: Any) -> Any: + def _element_to_dict( + data: Any, view: Any, position: int, obj_end: int, opts: CodecOptions + ) -> Any: """Decode a single key, value pair.""" element_type = data[position] position += 1 @@ -476,13 +484,13 @@ else: _T = TypeVar("_T", bound=MutableMapping[Any, Any]) -def _raw_to_dict(data: Any, position: int, obj_end: int, opts: Any, result: _T) -> _T: +def _raw_to_dict(data: Any, position: int, obj_end: int, opts: CodecOptions, result: _T) -> _T: data, view = get_data_and_view(data) return _elements_to_dict(data, view, position, obj_end, opts, result) def _elements_to_dict( - data: Any, view: Any, position: int, obj_end: int, opts: Any, result: Any = None + data: Any, view: Any, position: int, obj_end: int, opts: CodecOptions, result: Any = None ) -> Any: """Decode a BSON document into result.""" if result is None: @@ -496,7 +504,7 @@ def _elements_to_dict( return result -def _bson_to_dict(data: Any, opts: Any) -> Any: +def _bson_to_dict(data: Any, opts: CodecOptions) -> Any: """Decode a BSON string to document_class.""" data, view = get_data_and_view(data) try: @@ -586,7 +594,7 @@ def _encode_bytes(name: bytes, value: bytes, dummy0: Any, dummy1: Any) -> bytes: return b"\x05" + name + _PACK_INT(len(value)) + b"\x00" + value -def _encode_mapping(name: bytes, value: Any, check_keys: bool, opts: Any) -> bytes: +def _encode_mapping(name: bytes, value: Any, check_keys: bool, opts: CodecOptions) -> bytes: """Encode a mapping type.""" if _raw_document_class(value): return b"\x03" + name + value.raw @@ -594,7 +602,7 @@ def _encode_mapping(name: bytes, value: Any, check_keys: bool, opts: Any) -> byt return b"\x03" + name + _PACK_INT(len(data) + 5) + data + b"\x00" -def _encode_dbref(name: bytes, value: DBRef, check_keys: bool, opts: Any) -> bytes: +def _encode_dbref(name: bytes, value: DBRef, check_keys: bool, opts: CodecOptions) -> bytes: """Encode bson.dbref.DBRef.""" buf = bytearray(b"\x03" + name + b"\x00\x00\x00\x00") begin = len(buf) - 4 @@ -611,7 +619,7 @@ def _encode_dbref(name: bytes, value: DBRef, check_keys: bool, opts: Any) -> byt return bytes(buf) -def _encode_list(name: bytes, value: Sequence[Any], check_keys: bool, opts: Any) -> bytes: +def _encode_list(name: bytes, value: Sequence[Any], check_keys: bool, opts: CodecOptions) -> bytes: """Encode a list/tuple.""" lname = gen_list_name() data = b"".join([_name_value_to_bson(next(lname), item, check_keys, opts) for item in value]) @@ -620,8 +628,8 @@ def _encode_list(name: bytes, value: Sequence[Any], check_keys: bool, opts: Any) def _encode_text(name: bytes, value: str, dummy0: Any, dummy1: Any) -> bytes: """Encode a python str.""" - value = _utf_8_encode(value)[0] - return b"\x02" + name + _PACK_INT(len(value) + 1) + value + b"\x00" # type: ignore + bvalue = _utf_8_encode(value)[0] + return b"\x02" + name + _PACK_INT(len(bvalue) + 1) + bvalue + b"\x00" def _encode_binary(name: bytes, value: Binary, dummy0: Any, dummy1: Any) -> bytes: @@ -632,7 +640,7 @@ def _encode_binary(name: bytes, value: Binary, dummy0: Any, dummy1: Any) -> byte return b"\x05" + name + _PACK_LENGTH_SUBTYPE(len(value), subtype) + value -def _encode_uuid(name: bytes, value: uuid.UUID, dummy: Any, opts: Any) -> bytes: +def _encode_uuid(name: bytes, value: uuid.UUID, dummy: Any, opts: CodecOptions) -> bytes: """Encode uuid.UUID.""" uuid_representation = opts.uuid_representation binval = Binary.from_uuid(value, uuid_representation=uuid_representation) @@ -686,7 +694,7 @@ def _encode_regex(name: bytes, value: Regex, dummy0: Any, dummy1: Any) -> bytes: return b"\x0B" + name + _make_c_string_check(value.pattern) + sflags -def _encode_code(name: bytes, value: Code, dummy: Any, opts: Any) -> bytes: +def _encode_code(name: bytes, value: Code, dummy: Any, opts: CodecOptions) -> bytes: """Encode bson.code.Code.""" cstring = _make_c_string(value) cstrlen = len(cstring) @@ -790,7 +798,7 @@ def _name_value_to_bson( name: bytes, value: Any, check_keys: bool, - opts: Any, + opts: CodecOptions, in_custom_call: bool = False, in_fallback_call: bool = False, ) -> bytes: @@ -843,7 +851,7 @@ def _name_value_to_bson( raise InvalidDocument("cannot encode object: %r, of type: %r" % (value, type(value))) -def _element_to_bson(key: Any, value: Any, check_keys: bool, opts: Any) -> bytes: +def _element_to_bson(key: Any, value: Any, check_keys: bool, opts: CodecOptions) -> bytes: """Encode a single key, value pair.""" if not isinstance(key, str): raise InvalidDocument("documents must have only string keys, key was %r" % (key,)) @@ -857,7 +865,7 @@ def _element_to_bson(key: Any, value: Any, check_keys: bool, opts: Any) -> bytes return _name_value_to_bson(name, value, check_keys, opts) -def _dict_to_bson(doc: Any, check_keys: bool, opts: Any, top_level: bool = True) -> bytes: +def _dict_to_bson(doc: Any, check_keys: bool, opts: CodecOptions, top_level: bool = True) -> bytes: """Encode a document to BSON.""" if _raw_document_class(doc): return cast(bytes, doc.raw) @@ -879,7 +887,7 @@ if _USE_C: _dict_to_bson = _cbson._dict_to_bson # noqa: F811 -def _millis_to_datetime(millis: int, opts: Any) -> datetime.datetime: +def _millis_to_datetime(millis: int, opts: CodecOptions) -> datetime.datetime: """Convert milliseconds since epoch UTC to datetime.""" diff = ((millis % 1000) + 1000) % 1000 seconds = (millis - diff) // 1000 @@ -904,7 +912,6 @@ _CODEC_OPTIONS_TYPE_ERROR = TypeError("codec_options must be an instance of Code _DocumentIn = Mapping[str, Any] -_DocumentOut = Union[MutableMapping[str, Any], "RawBSONDocument"] _ReadableBuffer = Union[bytes, memoryview, "mmap", "array"] @@ -940,8 +947,8 @@ def encode( def decode( - data: _ReadableBuffer, codec_options: CodecOptions = DEFAULT_CODEC_OPTIONS -) -> Dict[str, Any]: + data: _ReadableBuffer, codec_options: "Optional[CodecOptions[_DocumentType]]" = None +) -> _DocumentType: """Decode BSON to a document. By default, returns a BSON document represented as a Python @@ -967,15 +974,16 @@ def decode( .. versionadded:: 3.9 """ - if not isinstance(codec_options, CodecOptions): + opts: CodecOptions = codec_options or DEFAULT_CODEC_OPTIONS + if not isinstance(opts, CodecOptions): raise _CODEC_OPTIONS_TYPE_ERROR - return _bson_to_dict(data, codec_options) + return _bson_to_dict(data, opts) def decode_all( - data: _ReadableBuffer, codec_options: CodecOptions = DEFAULT_CODEC_OPTIONS -) -> List[Dict[str, Any]]: + data: _ReadableBuffer, codec_options: "Optional[CodecOptions[_DocumentType]]" = None +) -> List[_DocumentType]: """Decode BSON data to multiple documents. `data` must be a bytes-like object implementing the buffer protocol that @@ -998,15 +1006,16 @@ def decode_all( Replaced `as_class`, `tz_aware`, and `uuid_subtype` options with `codec_options`. """ + opts = codec_options or DEFAULT_CODEC_OPTIONS data, view = get_data_and_view(data) - if not isinstance(codec_options, CodecOptions): + if not isinstance(opts, CodecOptions): raise _CODEC_OPTIONS_TYPE_ERROR data_len = len(data) - docs = [] + docs: List[_DocumentType] = [] position = 0 end = data_len - 1 - use_raw = _raw_document_class(codec_options.document_class) + use_raw = _raw_document_class(opts.document_class) try: while position < end: obj_size = _UNPACK_INT_FROM(data, position)[0] @@ -1017,10 +1026,10 @@ def decode_all( raise InvalidBSON("bad eoo") if use_raw: docs.append( - codec_options.document_class(data[position : obj_end + 1], codec_options) + opts.document_class(data[position : obj_end + 1], codec_options) # type: ignore ) else: - docs.append(_elements_to_dict(data, view, position + 4, obj_end, codec_options)) + docs.append(_elements_to_dict(data, view, position + 4, obj_end, opts)) position += obj_size return docs except InvalidBSON: @@ -1110,8 +1119,8 @@ def _decode_all_selective(data: Any, codec_options: CodecOptions, fields: Any) - def decode_iter( - data: bytes, codec_options: CodecOptions = DEFAULT_CODEC_OPTIONS -) -> Iterator[_DocumentOut]: + data: bytes, codec_options: "Optional[CodecOptions[_DocumentType]]" = None +) -> Iterator[_DocumentType]: """Decode BSON data to multiple documents as a generator. Works similarly to the decode_all function, but yields one document at a @@ -1131,7 +1140,8 @@ def decode_iter( .. versionadded:: 2.8 """ - if not isinstance(codec_options, CodecOptions): + opts = codec_options or DEFAULT_CODEC_OPTIONS + if not isinstance(opts, CodecOptions): raise _CODEC_OPTIONS_TYPE_ERROR position = 0 @@ -1141,12 +1151,12 @@ def decode_iter( elements = data[position : position + obj_size] position += obj_size - yield _bson_to_dict(elements, codec_options) + yield _bson_to_dict(elements, opts) def decode_file_iter( - file_obj: Union[BinaryIO, IO], codec_options: CodecOptions = DEFAULT_CODEC_OPTIONS -) -> Iterator[_DocumentOut]: + file_obj: Union[BinaryIO, IO], codec_options: "Optional[CodecOptions[_DocumentType]]" = None +) -> Iterator[_DocumentType]: """Decode bson data from a file to multiple documents as a generator. Works similarly to the decode_all function, but reads from the file object @@ -1163,6 +1173,7 @@ def decode_file_iter( .. versionadded:: 2.8 """ + opts = codec_options or DEFAULT_CODEC_OPTIONS while True: # Read size of next object. size_data = file_obj.read(4) @@ -1172,7 +1183,7 @@ def decode_file_iter( raise InvalidBSON("cut off in middle of objsize") obj_size = _UNPACK_INT_FROM(size_data, 0)[0] - 4 elements = size_data + file_obj.read(max(0, obj_size)) - yield _bson_to_dict(elements, codec_options) + yield _bson_to_dict(elements, opts) def is_valid(bson: bytes) -> bool: @@ -1233,7 +1244,7 @@ class BSON(bytes): """ return cls(encode(document, check_keys, codec_options)) - def decode(self, codec_options: CodecOptions = DEFAULT_CODEC_OPTIONS) -> Dict[str, Any]: # type: ignore[override] + def decode(self, codec_options: CodecOptions = DEFAULT_CODEC_OPTIONS) -> _DocumentType: # type: ignore[override] """Decode this BSON data. By default, returns a BSON document represented as a Python diff --git a/bson/_cbsonmodule.c b/bson/_cbsonmodule.c index 93610f7c5..8100e951c 100644 --- a/bson/_cbsonmodule.c +++ b/bson/_cbsonmodule.c @@ -2600,7 +2600,7 @@ static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) { if (!PyArg_ParseTuple(args, "O|O", &bson, &options_obj)) { return NULL; } - if (PyTuple_GET_SIZE(args) < 2) { + if ((PyTuple_GET_SIZE(args) < 2) || (options_obj == Py_None)) { if (!default_codec_options(GETSTATE(self), &options)) { return NULL; } diff --git a/bson/codec_options.py b/bson/codec_options.py index b4436dfdb..4eaff59ea 100644 --- a/bson/codec_options.py +++ b/bson/codec_options.py @@ -16,17 +16,17 @@ import abc import datetime -from collections import namedtuple from collections.abc import MutableMapping as _MutableMapping from typing import ( - TYPE_CHECKING, Any, Callable, Dict, Iterable, - MutableMapping, + Mapping, + NamedTuple, Optional, Type, + TypeVar, Union, cast, ) @@ -37,10 +37,6 @@ from bson.binary import ( UuidRepresentation, ) -# Import RawBSONDocument for type-checking only to avoid circular dependency. -if TYPE_CHECKING: - from bson.raw_bson import RawBSONDocument - def _abstractproperty(func: Callable[..., Any]) -> property: return property(abc.abstractmethod(func)) @@ -115,7 +111,7 @@ class TypeCodec(TypeEncoder, TypeDecoder): _Codec = Union[TypeEncoder, TypeDecoder, TypeCodec] _Fallback = Callable[[Any], Any] -_DocumentClass = Union[Type[MutableMapping], Type["RawBSONDocument"]] +_DocumentType = TypeVar("_DocumentType", bound=Mapping[str, Any]) class TypeRegistry(object): @@ -152,8 +148,8 @@ class TypeRegistry(object): ) -> None: self.__type_codecs = list(type_codecs or []) self._fallback_encoder = fallback_encoder - self._encoder_map = {} - self._decoder_map = {} + self._encoder_map: Dict[Any, Any] = {} + self._decoder_map: Dict[Any, Any] = {} if self._fallback_encoder is not None: if not callable(fallback_encoder): @@ -202,20 +198,16 @@ class TypeRegistry(object): ) -_options_base = namedtuple( # type: ignore - "CodecOptions", - ( - "document_class", - "tz_aware", - "uuid_representation", - "unicode_decode_error_handler", - "tzinfo", - "type_registry", - ), -) +class _BaseCodecOptions(NamedTuple): + document_class: Type[Mapping[str, Any]] + tz_aware: bool + uuid_representation: int + unicode_decode_error_handler: str + tzinfo: Optional[datetime.tzinfo] + type_registry: TypeRegistry -class CodecOptions(_options_base): +class CodecOptions(_BaseCodecOptions): """Encapsulates options used encoding and / or decoding BSON. The `document_class` option is used to define a custom type for use @@ -250,7 +242,7 @@ class CodecOptions(_options_base): See :doc:`/examples/datetimes` for examples using the `tz_aware` and `tzinfo` options. - See :doc:`examples/uuid` for examples using the `uuid_representation` + See :doc:`/examples/uuid` for examples using the `uuid_representation` option. :Parameters: @@ -294,18 +286,27 @@ class CodecOptions(_options_base): def __new__( cls: Type["CodecOptions"], - document_class: _DocumentClass = dict, + document_class: Optional[Type[Mapping[str, Any]]] = None, tz_aware: bool = False, uuid_representation: Optional[int] = UuidRepresentation.UNSPECIFIED, - unicode_decode_error_handler: Optional[str] = "strict", + unicode_decode_error_handler: str = "strict", tzinfo: Optional[datetime.tzinfo] = None, type_registry: Optional[TypeRegistry] = None, ) -> "CodecOptions": - if not (issubclass(document_class, _MutableMapping) or _raw_document_class(document_class)): + doc_class = document_class or dict + # issubclass can raise TypeError for generic aliases like SON[str, Any]. + # In that case we can use the base class for the comparison. + is_mapping = False + try: + is_mapping = issubclass(doc_class, _MutableMapping) + except TypeError: + if hasattr(doc_class, "__origin__"): + is_mapping = issubclass(doc_class.__origin__, _MutableMapping) # type: ignore[union-attr] + if not (is_mapping or _raw_document_class(doc_class)): raise TypeError( "document_class must be dict, bson.son.SON, " "bson.raw_bson.RawBSONDocument, or a " - "sublass of collections.abc.MutableMapping" + "subclass of collections.abc.MutableMapping" ) if not isinstance(tz_aware, bool): raise TypeError("tz_aware must be True or False") @@ -313,8 +314,8 @@ class CodecOptions(_options_base): raise ValueError( "uuid_representation must be a value from bson.binary.UuidRepresentation" ) - if not isinstance(unicode_decode_error_handler, (str, None)): # type: ignore - raise ValueError("unicode_decode_error_handler must be a string or None") + if not isinstance(unicode_decode_error_handler, str): + raise ValueError("unicode_decode_error_handler must be a string") if tzinfo is not None: if not isinstance(tzinfo, datetime.tzinfo): raise TypeError("tzinfo must be an instance of datetime.tzinfo") @@ -329,7 +330,7 @@ class CodecOptions(_options_base): return tuple.__new__( cls, ( - document_class, + doc_class, tz_aware, uuid_representation, unicode_decode_error_handler, @@ -392,7 +393,7 @@ class CodecOptions(_options_base): return CodecOptions(**opts) -DEFAULT_CODEC_OPTIONS: CodecOptions = CodecOptions() +DEFAULT_CODEC_OPTIONS = CodecOptions() def _parse_codec_options(options: Any) -> CodecOptions: diff --git a/bson/codec_options.pyi b/bson/codec_options.pyi new file mode 100644 index 000000000..9d5f5c265 --- /dev/null +++ b/bson/codec_options.pyi @@ -0,0 +1,100 @@ +# Copyright 2022-present MongoDB, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Workaround for https://bugs.python.org/issue43923. +Ideally we would have done this with a single class, but +generic subclasses *must* take a parameter, and prior to Python 3.9 +or in Python 3.7 and 3.8 with `from __future__ import annotations`, +you get the error: "TypeError: 'type' object is not subscriptable". +""" + +import datetime +import abc +from typing import Tuple, Generic, Optional, Mapping, Any, TypeVar, Type, Dict, Iterable, Tuple, MutableMapping, Callable, Union + + +class TypeEncoder(abc.ABC, metaclass=abc.ABCMeta): + @property + @abc.abstractmethod + def python_type(self) -> Any: ... + @abc.abstractmethod + def transform_python(self, value: Any) -> Any: ... + +class TypeDecoder(abc.ABC, metaclass=abc.ABCMeta): + @property + @abc.abstractmethod + def bson_type(self) -> Any: ... + @abc.abstractmethod + def transform_bson(self, value: Any) -> Any: ... + +class TypeCodec(TypeEncoder, TypeDecoder, metaclass=abc.ABCMeta): ... + +Codec = Union[TypeEncoder, TypeDecoder, TypeCodec] +Fallback = Callable[[Any], Any] + +class TypeRegistry: + _decoder_map: Dict[Any, Any] + _encoder_map: Dict[Any, Any] + _fallback_encoder: Optional[Fallback] + + def __init__(self, type_codecs: Optional[Iterable[Codec]] = ..., fallback_encoder: Optional[Fallback] = ...) -> None: ... + def __eq__(self, other: Any) -> Any: ... + + +_DocumentType = TypeVar("_DocumentType", bound=Mapping[str, Any]) + + +class CodecOptions(Tuple, Generic[_DocumentType]): + document_class: Type[_DocumentType] + tz_aware: bool + uuid_representation: int + unicode_decode_error_handler: Optional[str] + tzinfo: Optional[datetime.tzinfo] + type_registry: TypeRegistry + + def __new__( + cls: Type[CodecOptions], + document_class: Optional[Type[_DocumentType]] = ..., + tz_aware: bool = ..., + uuid_representation: Optional[int] = ..., + unicode_decode_error_handler: Optional[str] = ..., + tzinfo: Optional[datetime.tzinfo] = ..., + type_registry: Optional[TypeRegistry] = ..., + ) -> CodecOptions[_DocumentType]: ... + + # CodecOptions API + def with_options(self, **kwargs: Any) -> CodecOptions[_DocumentType]: ... + + def _arguments_repr(self) -> str: ... + + def _options_dict(self) -> Dict[Any, Any]: ... + + # NamedTuple API + @classmethod + def _make(cls, obj: Iterable) -> CodecOptions[_DocumentType]: ... + + def _asdict(self) -> Dict[str, Any]: ... + + def _replace(self, **kwargs: Any) -> CodecOptions[_DocumentType]: ... + + _source: str + _fields: Tuple[str] + + +DEFAULT_CODEC_OPTIONS: CodecOptions[MutableMapping[str, Any]] +_RAW_BSON_DOCUMENT_MARKER: int + +def _raw_document_class(document_class: Any) -> bool: ... + +def _parse_codec_options(options: Any) -> CodecOptions: ... diff --git a/doc/changelog.rst b/doc/changelog.rst index 73e2ea9ba..d326c24b3 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -26,6 +26,10 @@ PyMongo 4.1 brings a number of improvements including: - :meth:`gridfs.GridOut.seek` now returns the new position in the file, to conform to the behavior of :meth:`io.IOBase.seek`. +Breaking Changes in 4.1 +....................... +- Removed support for Python 3.6.0 and 3.6.1, Python 3.6.2+ is now required. + Bug fixes ......... @@ -57,7 +61,7 @@ before upgrading from PyMongo 3.x. Breaking Changes in 4.0 ....................... -- Removed support for Python 2.7, 3.4, and 3.5. Python 3.6+ is now required. +- Removed support for Python 2.7, 3.4, and 3.5. Python 3.6.2+ is now required. - The default uuid_representation for :class:`~bson.codec_options.CodecOptions`, :class:`~bson.json_util.JSONOptions`, and :class:`~pymongo.mongo_client.MongoClient` has been changed from diff --git a/doc/examples/tls.rst b/doc/examples/tls.rst index f6920ad27..9c3c2c829 100644 --- a/doc/examples/tls.rst +++ b/doc/examples/tls.rst @@ -181,7 +181,7 @@ server's certificate:: This often occurs because OpenSSL does not have access to the system's root certificates or the certificates are out of date. Linux users should ensure that they have the latest root certificate updates installed from -their Linux vendor. macOS users using Python 3.6.0 or newer downloaded +their Linux vendor. macOS users using Python 3.6.2 or newer downloaded from python.org `may have to run a script included with python `_ to install root certificates:: diff --git a/doc/faq.rst b/doc/faq.rst index a7f7c87bd..0d045f762 100644 --- a/doc/faq.rst +++ b/doc/faq.rst @@ -145,7 +145,7 @@ they are returned to the pool. Does PyMongo support Python 3? ------------------------------ -PyMongo supports CPython 3.6+ and PyPy3.6+. See the :doc:`python3` for details. +PyMongo supports CPython 3.6.2+ and PyPy3.6+. See the :doc:`python3` for details. Does PyMongo support asynchronous frameworks like Gevent, asyncio, Tornado, or Twisted? --------------------------------------------------------------------------------------- diff --git a/doc/installation.rst b/doc/installation.rst index 9c9d80c7a..4f14b3112 100644 --- a/doc/installation.rst +++ b/doc/installation.rst @@ -28,7 +28,7 @@ To upgrade using pip:: Dependencies ------------ -PyMongo supports CPython 3.6+ and PyPy3.6+. +PyMongo supports CPython 3.6.2+ and PyPy3.6+. Optional dependencies: @@ -133,7 +133,7 @@ See `http://bugs.python.org/issue11623 `_ for a more detailed explanation. **Lion (10.7) and newer** - PyMongo's C extensions can be built against -versions of Python 3.6+ downloaded from python.org. In all cases Xcode must be +versions of Python 3.6.2+ downloaded from python.org. In all cases Xcode must be installed with 'UNIX Development Support'. **Xcode 5.1**: Starting with version 5.1 the version of clang that ships with diff --git a/doc/migrate-to-pymongo4.rst b/doc/migrate-to-pymongo4.rst index 6fcbdf501..6d290dd51 100644 --- a/doc/migrate-to-pymongo4.rst +++ b/doc/migrate-to-pymongo4.rst @@ -34,7 +34,7 @@ Python 3.6+ ----------- PyMongo 4.0 drops support for Python 2.7, 3.4, and 3.5. Users who wish to -upgrade to 4.x must first upgrade to Python 3.6+. Users upgrading from +upgrade to 4.x must first upgrade to Python 3.6.2+. Users upgrading from Python 2 should consult the :doc:`python3`. Enable Deprecation Warnings diff --git a/doc/python3.rst b/doc/python3.rst index e001c55c8..c14224166 100644 --- a/doc/python3.rst +++ b/doc/python3.rst @@ -6,7 +6,7 @@ Python 3 FAQ What Python 3 versions are supported? ------------------------------------- -PyMongo supports CPython 3.6+ and PyPy3.6+. +PyMongo supports CPython 3.6.2+ and PyPy3.6+. Are there any PyMongo behavior changes with Python 3? ----------------------------------------------------- @@ -20,8 +20,8 @@ with subtype 0. For example, let's insert a :class:`bytes` instance using Python 3 then read it back. Notice the byte string is decoded back to :class:`bytes`:: - Python 3.6.1 (v3.6.1:69c0db5050, Mar 21 2017, 01:21:04) - [GCC 4.9.3] on linux + Python 3.6.8 (v3.6.8:3c6b436a57, Dec 24 2018, 02:04:31) + [GCC 4.2.1 Compatible Apple LLVM 6.0 (clang-600.0.57)] on darwin Type "help", "copyright", "credits" or "license" for more information. >>> import pymongo >>> c = pymongo.MongoClient() @@ -49,8 +49,8 @@ decoded to :class:`~bson.binary.Binary` with subtype 0. For example, let's decode a JSON binary subtype 0 using Python 3. Notice the byte string is decoded to :class:`bytes`:: - Python 3.6.1 (v3.6.1:69c0db5050, Mar 21 2017, 01:21:04) - [GCC 4.2.1 (Apple Inc. build 5666) (dot 3)] on darwin + Python 3.6.8 (v3.6.8:3c6b436a57, Dec 24 2018, 02:04:31) + [GCC 4.2.1 Compatible Apple LLVM 6.0 (clang-600.0.57)] on darwin Type "help", "copyright", "credits" or "license" for more information. >>> from bson.json_util import loads >>> loads('{"b": {"$binary": "dGhpcyBpcyBhIGJ5dGUgc3RyaW5n", "$type": "00"}}') @@ -86,8 +86,8 @@ Python 3 you must pass ``encoding='latin-1'`` to pickle.loads:: >>> pickle.dumps(oid) 'ccopy_reg\n_reconstructor\np0\n(cbson.objectid\...' - Python 3.6.1 (v3.6.1:69c0db5050, Mar 21 2017, 01:21:04) - [GCC 4.9.3] on linux + Python 3.6.8 (v3.6.8:3c6b436a57, Dec 24 2018, 02:04:31) + [GCC 4.2.1 Compatible Apple LLVM 6.0 (clang-600.0.57)] on darwin Type "help", "copyright", "credits" or "license" for more information. >>> import pickle >>> pickle.loads(b'ccopy_reg\n_reconstructor\np0\n(cbson.objectid\...', encoding='latin-1') @@ -97,8 +97,8 @@ Python 3 you must pass ``encoding='latin-1'`` to pickle.loads:: If you need to pickle ObjectIds using Python 3 and unpickle them using Python 2 you must use ``protocol <= 2``:: - Python 3.6.5 (default, Jun 21 2018, 15:09:09) - [GCC 7.3.0] on linux + Python 3.6.8 (v3.6.8:3c6b436a57, Dec 24 2018, 02:04:31) + [GCC 4.2.1 Compatible Apple LLVM 6.0 (clang-600.0.57)] on darwin Type "help", "copyright", "credits" or "license" for more information. >>> import pickle >>> from bson.objectid import ObjectId diff --git a/pymongo/collection.py b/pymongo/collection.py index ad75fb760..dc344b640 100644 --- a/pymongo/collection.py +++ b/pymongo/collection.py @@ -2156,7 +2156,7 @@ class Collection(common.BaseObject, Generic[_DocumentType]): .. versionadded:: 3.0 """ - codec_options = CodecOptions(SON) + codec_options: CodecOptions = CodecOptions(SON) coll = self.with_options( codec_options=codec_options, read_preference=ReadPreference.PRIMARY ) diff --git a/pymongo/database.py b/pymongo/database.py index 934b50219..6f2d0fd5c 100644 --- a/pymongo/database.py +++ b/pymongo/database.py @@ -23,6 +23,7 @@ from typing import ( MutableMapping, Optional, Sequence, + TypeVar, Union, cast, ) @@ -38,7 +39,7 @@ from pymongo.collection import Collection from pymongo.command_cursor import CommandCursor from pymongo.errors import CollectionInvalid, InvalidName from pymongo.read_preferences import ReadPreference, _ServerMode -from pymongo.typings import _CollationIn, _DocumentOut, _DocumentType, _Pipeline +from pymongo.typings import _CollationIn, _DocumentType, _Pipeline def _check_name(name): @@ -58,6 +59,9 @@ if TYPE_CHECKING: from pymongo.write_concern import WriteConcern +_CodecDocumentType = TypeVar("_CodecDocumentType", bound=Mapping[str, Any]) + + class Database(common.BaseObject, Generic[_DocumentType]): """A Mongo database.""" @@ -617,11 +621,11 @@ class Database(common.BaseObject, Generic[_DocumentType]): check: bool = True, allowable_errors: Optional[Sequence[Union[str, int]]] = None, read_preference: Optional[_ServerMode] = None, - codec_options: Optional[CodecOptions] = DEFAULT_CODEC_OPTIONS, + codec_options: "Optional[CodecOptions[_CodecDocumentType]]" = None, session: Optional["ClientSession"] = None, comment: Optional[Any] = None, **kwargs: Any, - ) -> _DocumentOut: + ) -> _CodecDocumentType: """Issue a MongoDB command. Send command `command` to the database and return the @@ -707,6 +711,7 @@ class Database(common.BaseObject, Generic[_DocumentType]): .. seealso:: The MongoDB documentation on `commands `_. """ + opts = codec_options or DEFAULT_CODEC_OPTIONS if comment is not None: kwargs["comment"] = comment @@ -723,7 +728,7 @@ class Database(common.BaseObject, Generic[_DocumentType]): check, allowable_errors, read_preference, - codec_options, + opts, session=session, **kwargs, ) diff --git a/pymongo/encryption.py b/pymongo/encryption.py index 502c83e47..1e06f7062 100644 --- a/pymongo/encryption.py +++ b/pymongo/encryption.py @@ -56,7 +56,7 @@ _HTTPS_PORT = 443 _KMS_CONNECT_TIMEOUT = 10 # TODO: CDRIVER-3262 will define this value. _MONGOCRYPTD_TIMEOUT_MS = 10000 -_DATA_KEY_OPTS = CodecOptions(document_class=SON, uuid_representation=STANDARD) +_DATA_KEY_OPTS: CodecOptions = CodecOptions(document_class=SON, uuid_representation=STANDARD) # Use RawBSONDocument codec options to avoid needlessly decoding # documents from the key vault. _KEY_VAULT_OPTS = CodecOptions(document_class=RawBSONDocument, uuid_representation=STANDARD) @@ -572,7 +572,7 @@ class ClientEncryption(object): encrypted_doc = self._encryption.encrypt( doc, algorithm, key_id=key_id, key_alt_name=key_alt_name ) - return decode(encrypted_doc)["v"] + return decode(encrypted_doc)["v"] # type: ignore[index] def decrypt(self, value: Binary) -> Any: """Decrypt an encrypted value. diff --git a/pymongo/message.py b/pymongo/message.py index 92d59c3eb..58f71629d 100644 --- a/pymongo/message.py +++ b/pymongo/message.py @@ -24,7 +24,7 @@ import datetime import random import struct from io import BytesIO as _BytesIO -from typing import Any +from typing import Any, Dict import bson from bson import CodecOptions, _decode_selective, _dict_to_bson, _make_c_string, encode @@ -76,7 +76,9 @@ _OP_MAP = { } _FIELD_MAP = {"insert": "documents", "update": "updates", "delete": "deletes"} -_UNICODE_REPLACE_CODEC_OPTIONS = CodecOptions(unicode_decode_error_handler="replace") +_UNICODE_REPLACE_CODEC_OPTIONS: "CodecOptions[Dict[str, Any]]" = CodecOptions( + unicode_decode_error_handler="replace" +) def _randint(): @@ -1259,7 +1261,7 @@ class _OpReply(object): errobj = {"ok": 0, "errmsg": msg, "code": 43} raise CursorNotFound(msg, 43, errobj) elif self.flags & 2: - error_object = bson.BSON(self.documents).decode() + error_object: dict = bson.BSON(self.documents).decode() # Fake the ok field if it doesn't exist. error_object.setdefault("ok", 0) if error_object["$err"].startswith(HelloCompat.LEGACY_ERROR): diff --git a/pymongo/mongo_client.py b/pymongo/mongo_client.py index ee8927981..4231db95a 100644 --- a/pymongo/mongo_client.py +++ b/pymongo/mongo_client.py @@ -649,10 +649,11 @@ class MongoClient(common.BaseObject, Generic[_DocumentType]): client.__my_database__ """ + doc_class = document_class or dict self.__init_kwargs: Dict[str, Any] = { "host": host, "port": port, - "document_class": document_class or dict, + "document_class": doc_class, "tz_aware": tz_aware, "connect": connect, "type_registry": type_registry, @@ -676,7 +677,7 @@ class MongoClient(common.BaseObject, Generic[_DocumentType]): # Parse options passed as kwargs. keyword_opts = common._CaseInsensitiveDictionary(kwargs) - keyword_opts["document_class"] = document_class or dict + keyword_opts["document_class"] = doc_class seeds = set() username = None diff --git a/pymongo/monitoring.py b/pymongo/monitoring.py index 4798542dc..ad604f3f1 100644 --- a/pymongo/monitoring.py +++ b/pymongo/monitoring.py @@ -182,12 +182,12 @@ will not add that listener to existing client instances. import datetime from collections import abc, namedtuple -from typing import TYPE_CHECKING, Any, Dict, Mapping, Optional +from typing import TYPE_CHECKING, Any, Dict, Optional from bson.objectid import ObjectId from pymongo.hello import Hello, HelloCompat from pymongo.helpers import _handle_exception -from pymongo.typings import _Address +from pymongo.typings import _Address, _DocumentOut if TYPE_CHECKING: from pymongo.server_description import ServerDescription @@ -208,9 +208,6 @@ _Listeners = namedtuple( _LISTENERS = _Listeners([], [], [], [], []) -_DocumentOut = Mapping[str, Any] - - class _EventListener(object): """Abstract base class for all event listeners.""" @@ -635,7 +632,7 @@ class CommandStartedEvent(_CommandEvent): ) cmd_name = command_name.lower() if cmd_name in _SENSITIVE_COMMANDS or _is_speculative_authenticate(cmd_name, command): - self.__cmd: Mapping[str, Any] = {} + self.__cmd: _DocumentOut = {} else: self.__cmd = command self.__db = database_name @@ -693,7 +690,7 @@ class CommandSucceededEvent(_CommandEvent): self.__duration_micros = _to_micros(duration) cmd_name = command_name.lower() if cmd_name in _SENSITIVE_COMMANDS or _is_speculative_authenticate(cmd_name, reply): - self.__reply: Mapping[str, Any] = {} + self.__reply: _DocumentOut = {} else: self.__reply = reply diff --git a/setup.py b/setup.py index 699ced1f8..5bae7dc21 100755 --- a/setup.py +++ b/setup.py @@ -4,8 +4,8 @@ import re import sys import warnings -if sys.version_info[:2] < (3, 6): - raise RuntimeError("Python version >= 3.6 required.") +if sys.version_info[:3] < (3, 6, 2): + raise RuntimeError("Python version >= 3.6.2 required.") # Hack to silence atexit traceback in some Python versions @@ -321,7 +321,7 @@ setup( keywords=["mongo", "mongodb", "pymongo", "gridfs", "bson"], install_requires=[], license="Apache License, Version 2.0", - python_requires=">=3.6", + python_requires=">=3.6.2", classifiers=[ "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", diff --git a/test/test_binary.py b/test/test_binary.py index 7d0ef2ce2..65abdca79 100644 --- a/test/test_binary.py +++ b/test/test_binary.py @@ -158,19 +158,19 @@ class TestBinary(unittest.TestCase): def test_legacy_java_uuid(self): # Test decoding data = self.java_data - docs = bson.decode_all(data, CodecOptions(SON, False, PYTHON_LEGACY)) + docs = bson.decode_all(data, CodecOptions(SON[str, Any], False, PYTHON_LEGACY)) for d in docs: self.assertNotEqual(d["newguid"], uuid.UUID(d["newguidstring"])) - docs = bson.decode_all(data, CodecOptions(SON, False, STANDARD)) + docs = bson.decode_all(data, CodecOptions(SON[str, Any], False, STANDARD)) for d in docs: self.assertNotEqual(d["newguid"], uuid.UUID(d["newguidstring"])) - docs = bson.decode_all(data, CodecOptions(SON, False, CSHARP_LEGACY)) + docs = bson.decode_all(data, CodecOptions(SON[str, Any], False, CSHARP_LEGACY)) for d in docs: self.assertNotEqual(d["newguid"], uuid.UUID(d["newguidstring"])) - docs = bson.decode_all(data, CodecOptions(SON, False, JAVA_LEGACY)) + docs = bson.decode_all(data, CodecOptions(SON[str, Any], False, JAVA_LEGACY)) for d in docs: self.assertEqual(d["newguid"], uuid.UUID(d["newguidstring"])) @@ -198,7 +198,7 @@ class TestBinary(unittest.TestCase): @client_context.require_connection def test_legacy_java_uuid_roundtrip(self): data = self.java_data - docs = bson.decode_all(data, CodecOptions(SON, False, JAVA_LEGACY)) + docs = bson.decode_all(data, CodecOptions(SON[str, Any], False, JAVA_LEGACY)) client_context.client.pymongo_test.drop_collection("java_uuid") db = client_context.client.pymongo_test @@ -218,19 +218,19 @@ class TestBinary(unittest.TestCase): data = self.csharp_data # Test decoding - docs = bson.decode_all(data, CodecOptions(SON, False, PYTHON_LEGACY)) + docs = bson.decode_all(data, CodecOptions(SON[str, Any], False, PYTHON_LEGACY)) for d in docs: self.assertNotEqual(d["newguid"], uuid.UUID(d["newguidstring"])) - docs = bson.decode_all(data, CodecOptions(SON, False, STANDARD)) + docs = bson.decode_all(data, CodecOptions(SON[str, Any], False, STANDARD)) for d in docs: self.assertNotEqual(d["newguid"], uuid.UUID(d["newguidstring"])) - docs = bson.decode_all(data, CodecOptions(SON, False, JAVA_LEGACY)) + docs = bson.decode_all(data, CodecOptions(SON[str, Any], False, JAVA_LEGACY)) for d in docs: self.assertNotEqual(d["newguid"], uuid.UUID(d["newguidstring"])) - docs = bson.decode_all(data, CodecOptions(SON, False, CSHARP_LEGACY)) + docs = bson.decode_all(data, CodecOptions(SON[str, Any], False, CSHARP_LEGACY)) for d in docs: self.assertEqual(d["newguid"], uuid.UUID(d["newguidstring"])) @@ -258,7 +258,7 @@ class TestBinary(unittest.TestCase): @client_context.require_connection def test_legacy_csharp_uuid_roundtrip(self): data = self.csharp_data - docs = bson.decode_all(data, CodecOptions(SON, False, CSHARP_LEGACY)) + docs = bson.decode_all(data, CodecOptions(SON[str, Any], False, CSHARP_LEGACY)) client_context.client.pymongo_test.drop_collection("csharp_uuid") db = client_context.client.pymongo_test diff --git a/test/test_bson.py b/test/test_bson.py index 9bf8df897..b0dce7db4 100644 --- a/test/test_bson.py +++ b/test/test_bson.py @@ -744,12 +744,12 @@ class TestBSON(unittest.TestCase): def test_custom_class(self): self.assertIsInstance(decode(encode({})), dict) self.assertNotIsInstance(decode(encode({})), SON) - self.assertIsInstance(decode(encode({}), CodecOptions(document_class=SON)), SON) + self.assertIsInstance(decode(encode({}), CodecOptions(document_class=SON)), SON) # type: ignore[type-var] - self.assertEqual(1, decode(encode({"x": 1}), CodecOptions(document_class=SON))["x"]) + self.assertEqual(1, decode(encode({"x": 1}), CodecOptions(document_class=SON))["x"]) # type: ignore[type-var] x = encode({"x": [{"y": 1}]}) - self.assertIsInstance(decode(x, CodecOptions(document_class=SON))["x"][0], SON) + self.assertIsInstance(decode(x, CodecOptions(document_class=SON))["x"][0], SON) # type: ignore[type-var] def test_subclasses(self): # make sure we can serialize subclasses of native Python types. @@ -772,7 +772,7 @@ class TestBSON(unittest.TestCase): def test_ordered_dict(self): d = OrderedDict([("one", 1), ("two", 2), ("three", 3), ("four", 4)]) - self.assertEqual(d, decode(encode(d), CodecOptions(document_class=OrderedDict))) + self.assertEqual(d, decode(encode(d), CodecOptions(document_class=OrderedDict))) # type: ignore[type-var] def test_bson_regex(self): # Invalid Python regex, though valid PCRE. @@ -954,7 +954,7 @@ class TestBSON(unittest.TestCase): class TestCodecOptions(unittest.TestCase): def test_document_class(self): self.assertRaises(TypeError, CodecOptions, document_class=object) - self.assertIs(SON, CodecOptions(document_class=SON).document_class) + self.assertIs(SON, CodecOptions(document_class=SON).document_class) # type: ignore[type-var] def test_tz_aware(self): self.assertRaises(TypeError, CodecOptions, tz_aware=1) @@ -993,6 +993,19 @@ class TestCodecOptions(unittest.TestCase): with self.assertRaisesRegex(ValueError, "cannot encode native uuid"): bson.decode_all(bson.encode({"uuid": uuid.uuid4()})) + def test_decode_all_no_options(self): + # Test decode_all()'s default document_class is dict and tz_aware is + # False. + doc = {"sub_document": {}, "dt": datetime.datetime.utcnow()} + + decoded = bson.decode_all(bson.encode(doc), None)[0] + self.assertIsInstance(decoded["sub_document"], dict) + self.assertIsNone(decoded["dt"].tzinfo) + + doc2 = {"id": Binary.from_uuid(uuid.uuid4())} + decoded = bson.decode_all(bson.encode(doc2), None)[0] + self.assertIsInstance(decoded["id"], Binary) + def test_unicode_decode_error_handler(self): enc = encode({"keystr": "foobar"}) diff --git a/test/test_bson_corpus.py b/test/test_bson_corpus.py index 4f8fc7413..193a6dff3 100644 --- a/test/test_bson_corpus.py +++ b/test/test_bson_corpus.py @@ -71,8 +71,8 @@ _DEPRECATED_BSON_TYPES = { # Need to set tz_aware=True in order to use "strict" dates in extended JSON. -codec_options = CodecOptions(tz_aware=True, document_class=SON) -codec_options_no_tzaware = CodecOptions(document_class=SON) +codec_options: CodecOptions = CodecOptions(tz_aware=True, document_class=SON) +codec_options_no_tzaware: CodecOptions = CodecOptions(document_class=SON) # We normally encode UUID as binary subtype 0x03, # but we'll need to encode to subtype 0x04 for one of the tests. codec_options_uuid_04 = codec_options._replace(uuid_representation=STANDARD) diff --git a/test/test_custom_types.py b/test/test_custom_types.py index a7073cde9..e11b5ebe0 100644 --- a/test/test_custom_types.py +++ b/test/test_custom_types.py @@ -538,10 +538,10 @@ class TestTypeRegistry(unittest.TestCase): self.assertEqual( type_registry._encoder_map, - {MyIntEncoder.python_type: codec_instances[1].transform_python}, # type: ignore[has-type] + {MyIntEncoder.python_type: codec_instances[1].transform_python}, ) self.assertEqual( - type_registry._decoder_map, {MyIntDecoder.bson_type: codec_instances[0].transform_bson} # type: ignore[has-type] + type_registry._decoder_map, {MyIntDecoder.bson_type: codec_instances[0].transform_bson} ) def test_initialize_fail(self): diff --git a/test/test_mypy.py b/test/test_mypy.py index 55794e138..6cf3eb2c8 100644 --- a/test/test_mypy.py +++ b/test/test_mypy.py @@ -16,8 +16,9 @@ sample client code that uses PyMongo typings.""" import os +import tempfile import unittest -from typing import TYPE_CHECKING, Any, Dict, Iterable, List +from typing import TYPE_CHECKING, Any, Dict, Iterable, Iterator, List try: from typing import TypedDict # type: ignore[attr-defined] @@ -39,6 +40,7 @@ except ImportError: from test import IntegrationTest from test.utils import rs_or_single_client +from bson import CodecOptions, decode, decode_all, decode_file_iter, decode_iter, encode from bson.raw_bson import RawBSONDocument from bson.son import SON from pymongo.collection import Collection @@ -54,6 +56,15 @@ def get_tests() -> Iterable[str]: yield os.path.join(dirpath, filename) +def only_type_check(func): + def inner(*args, **kwargs): + if not TYPE_CHECKING: + raise unittest.SkipTest("Used for Type Checking Only") + func(*args, **kwargs) + + return inner + + class TestMypyFails(unittest.TestCase): def ensure_mypy_fails(self, filename: str) -> None: if api is None: @@ -105,7 +116,7 @@ class TestPymongo(IntegrationTest): self.assertTrue(result.acknowledged) def test_command(self) -> None: - result = self.client.admin.command("ping") + result: Dict = self.client.admin.command("ping") items = result.items() def test_list_collections(self) -> None: @@ -127,43 +138,6 @@ class TestPymongo(IntegrationTest): assert retreived is not None retreived["a"] = 1 - def test_explicit_document_type(self) -> None: - if not TYPE_CHECKING: - raise unittest.SkipTest("Do not use raw MongoClient") - client: MongoClient[Dict[str, Any]] = MongoClient() - coll = client.test.test - retreived = coll.find_one({"_id": "foo"}) - assert retreived is not None - retreived["a"] = 1 - - def test_typeddict_document_type(self) -> None: - if not TYPE_CHECKING: - raise unittest.SkipTest("Do not use raw MongoClient") - client: MongoClient[Movie] = MongoClient() - coll = client.test.test - retreived = coll.find_one({"_id": "foo"}) - assert retreived is not None - assert retreived["year"] == 1 - assert retreived["name"] == "a" - - def test_raw_bson_document_type(self) -> None: - if not TYPE_CHECKING: - raise unittest.SkipTest("Do not use raw MongoClient") - client = MongoClient(document_class=RawBSONDocument) - coll = client.test.test - retreived = coll.find_one({"_id": "foo"}) - assert retreived is not None - assert len(retreived.raw) > 0 - - def test_son_document_type(self) -> None: - if not TYPE_CHECKING: - raise unittest.SkipTest("Do not use raw MongoClient") - client = MongoClient(document_class=SON[str, Any]) - coll = client.test.test - retreived = coll.find_one({"_id": "foo"}) - assert retreived is not None - retreived["a"] = 1 - def test_aggregate_pipeline(self) -> None: coll3 = self.client.test.test3 coll3.insert_many( @@ -188,5 +162,220 @@ class TestPymongo(IntegrationTest): self.assertTrue(len(list(result))) +class TestDecode(unittest.TestCase): + def test_bson_decode(self) -> None: + doc = {"_id": 1} + bsonbytes = encode(doc) + rt_document: Dict[str, Any] = decode(bsonbytes) + assert rt_document["_id"] == 1 + rt_document["foo"] = "bar" + + class MyDict(Dict[str, Any]): + def foo(self): + return "bar" + + codec_options = CodecOptions(document_class=MyDict) + bsonbytes2 = encode(doc, codec_options=codec_options) + rt_document2 = decode(bsonbytes2, codec_options=codec_options) + assert rt_document2.foo() == "bar" + + codec_options2 = CodecOptions(document_class=RawBSONDocument) + bsonbytes3 = encode(doc, codec_options=codec_options2) + rt_document3 = decode(bsonbytes2, codec_options=codec_options2) + assert rt_document3.raw + + def test_bson_decode_all(self) -> None: + doc = {"_id": 1} + bsonbytes = encode(doc) + bsonbytes += encode(doc) + rt_documents: List[Dict[str, Any]] = decode_all(bsonbytes) + assert rt_documents[0]["_id"] == 1 + rt_documents[0]["foo"] = "bar" + + class MyDict(Dict[str, Any]): + def foo(self): + return "bar" + + codec_options2 = CodecOptions(MyDict) + bsonbytes2 = encode(doc, codec_options=codec_options2) + bsonbytes2 += encode(doc, codec_options=codec_options2) + rt_documents2 = decode_all(bsonbytes2, codec_options2) + assert rt_documents2[0].foo() == "bar" + + codec_options3 = CodecOptions(RawBSONDocument) + bsonbytes3 = encode(doc, codec_options=codec_options3) + bsonbytes3 += encode(doc, codec_options=codec_options3) + rt_documents3 = decode_all(bsonbytes3, codec_options3) + assert rt_documents3[0].raw + + def test_bson_decode_iter(self) -> None: + doc = {"_id": 1} + bsonbytes = encode(doc) + bsonbytes += encode(doc) + rt_documents: Iterator[Dict[str, Any]] = decode_iter(bsonbytes) + assert next(rt_documents)["_id"] == 1 + next(rt_documents)["foo"] = "bar" + + class MyDict(Dict[str, Any]): + def foo(self): + return "bar" + + codec_options2 = CodecOptions(MyDict) + bsonbytes2 = encode(doc, codec_options=codec_options2) + bsonbytes2 += encode(doc, codec_options=codec_options2) + rt_documents2 = decode_iter(bsonbytes2, codec_options2) + assert next(rt_documents2).foo() == "bar" + + codec_options3 = CodecOptions(RawBSONDocument) + bsonbytes3 = encode(doc, codec_options=codec_options3) + bsonbytes3 += encode(doc, codec_options=codec_options3) + rt_documents3 = decode_iter(bsonbytes3, codec_options3) + assert next(rt_documents3).raw + + def make_tempfile(self, content: bytes) -> Any: + fileobj = tempfile.TemporaryFile() + fileobj.write(content) + fileobj.seek(0) + self.addCleanup(fileobj.close) + return fileobj + + def test_bson_decode_file_iter(self) -> None: + doc = {"_id": 1} + bsonbytes = encode(doc) + bsonbytes += encode(doc) + fileobj = self.make_tempfile(bsonbytes) + rt_documents: Iterator[Dict[str, Any]] = decode_file_iter(fileobj) + assert next(rt_documents)["_id"] == 1 + next(rt_documents)["foo"] = "bar" + + class MyDict(Dict[str, Any]): + def foo(self): + return "bar" + + codec_options2 = CodecOptions(MyDict) + bsonbytes2 = encode(doc, codec_options=codec_options2) + bsonbytes2 += encode(doc, codec_options=codec_options2) + fileobj2 = self.make_tempfile(bsonbytes2) + rt_documents2 = decode_file_iter(fileobj2, codec_options2) + assert next(rt_documents2).foo() == "bar" + + codec_options3 = CodecOptions(RawBSONDocument) + bsonbytes3 = encode(doc, codec_options=codec_options3) + bsonbytes3 += encode(doc, codec_options=codec_options3) + fileobj3 = self.make_tempfile(bsonbytes3) + rt_documents3 = decode_file_iter(fileobj3, codec_options3) + assert next(rt_documents3).raw + + +class TestDocumentType(unittest.TestCase): + @only_type_check + def test_default(self) -> None: + client: MongoClient = MongoClient() + coll = client.test.test + retreived = coll.find_one({"_id": "foo"}) + assert retreived is not None + retreived["a"] = 1 + + @only_type_check + def test_explicit_document_type(self) -> None: + client: MongoClient[Dict[str, Any]] = MongoClient() + coll = client.test.test + retreived = coll.find_one({"_id": "foo"}) + assert retreived is not None + retreived["a"] = 1 + + @only_type_check + def test_typeddict_document_type(self) -> None: + client: MongoClient[Movie] = MongoClient() + coll = client.test.test + retreived = coll.find_one({"_id": "foo"}) + assert retreived is not None + assert retreived["year"] == 1 + assert retreived["name"] == "a" + + @only_type_check + def test_raw_bson_document_type(self) -> None: + client = MongoClient(document_class=RawBSONDocument) + coll = client.test.test + retreived = coll.find_one({"_id": "foo"}) + assert retreived is not None + assert len(retreived.raw) > 0 + + @only_type_check + def test_son_document_type(self) -> None: + client = MongoClient(document_class=SON[str, Any]) + coll = client.test.test + retreived = coll.find_one({"_id": "foo"}) + assert retreived is not None + retreived["a"] = 1 + + +class TestCommandDocumentType(unittest.TestCase): + @only_type_check + def test_default(self) -> None: + client: MongoClient = MongoClient() + result: Dict = client.admin.command("ping") + result["a"] = 1 + + @only_type_check + def test_explicit_document_type(self) -> None: + client: MongoClient = MongoClient() + codec_options: CodecOptions[Dict[str, Any]] = CodecOptions() + result = client.admin.command("ping", codec_options=codec_options) + result["a"] = 1 + + @only_type_check + def test_typeddict_document_type(self) -> None: + client: MongoClient = MongoClient() + codec_options: CodecOptions[Movie] = CodecOptions() + result = client.admin.command("ping", codec_options=codec_options) + assert result["year"] == 1 + assert result["name"] == "a" + + @only_type_check + def test_raw_bson_document_type(self) -> None: + client: MongoClient = MongoClient() + codec_options = CodecOptions(RawBSONDocument) + result = client.admin.command("ping", codec_options=codec_options) + assert len(result.raw) > 0 + + @only_type_check + def test_son_document_type(self) -> None: + client = MongoClient(document_class=SON[str, Any]) + codec_options = CodecOptions(SON[str, Any]) + result = client.admin.command("ping", codec_options=codec_options) + result["a"] = 1 + + +class TestCodecOptionsDocumentType(unittest.TestCase): + def test_default(self) -> None: + options: CodecOptions = CodecOptions() + obj = options.document_class() + obj["a"] = 1 + + def test_explicit_document_type(self) -> None: + options: CodecOptions[Dict[str, Any]] = CodecOptions() + obj = options.document_class() + obj["a"] = 1 + + def test_typeddict_document_type(self) -> None: + options: CodecOptions[Movie] = CodecOptions() + # Suppress: Cannot instantiate type "Type[Movie]". + obj = options.document_class(name="a", year=1) # type: ignore[misc] + assert obj["year"] == 1 + assert obj["name"] == "a" + + def test_raw_bson_document_type(self) -> None: + options = CodecOptions(RawBSONDocument) + doc_bson = b"\x10\x00\x00\x00\x11a\x00\xff\xff\xff\xff\xff\xff\xff\xff\x00" + obj = options.document_class(doc_bson) + assert len(obj.raw) > 0 + + def test_son_document_type(self) -> None: + options = CodecOptions(SON[str, Any]) + obj = options.document_class() + obj["a"] = 1 + + if __name__ == "__main__": unittest.main()