From 824b58ac608f95bd9cc46308377b8112a8cb0fa7 Mon Sep 17 00:00:00 2001 From: Prashant Mital Date: Tue, 16 Apr 2019 12:28:45 -0700 Subject: [PATCH] PYTHON-1819 Documentation & examples for custom type encoding/decoding functionality (cherry picked from commit 0ea5a1542e3237c7f1ef6697a68f9426338d3685) --- bson/codec_options.py | 24 +++- doc/examples/custom_type.rst | 231 +++++++++++++++++++++++++++-------- pymongo/database.py | 3 + 3 files changed, 201 insertions(+), 57 deletions(-) diff --git a/bson/codec_options.py b/bson/codec_options.py index 1a4f0876e..471d695a9 100644 --- a/bson/codec_options.py +++ b/bson/codec_options.py @@ -41,6 +41,8 @@ class TypeEncoder(ABC): Codec classes must implement the ``python_type`` attribute, and the ``transform_python`` method to support encoding. + + See :ref:`custom-type-type-codec` documentation for an example. """ @abstractproperty def python_type(self): @@ -59,6 +61,8 @@ class TypeDecoder(ABC): Codec classes must implement the ``bson_type`` attribute, and the ``transform_bson`` method to support decoding. + + See :ref:`custom-type-type-codec` documentation for an example. """ @abstractproperty def bson_type(self): @@ -73,13 +77,15 @@ class TypeDecoder(ABC): class TypeCodec(TypeEncoder, TypeDecoder): """Base class for defining type codec classes which describe how a - custom type can be transformed to/from one of the types BSON already - understands, and can encode/decode. + custom type can be transformed to/from one of the types :mod:`bson` + can already encode/decode. Codec classes must implement the ``python_type`` attribute, and the ``transform_python`` method to support encoding, as well as the ``bson_type`` attribute, and the ``transform_bson`` method to support decoding. + + See :ref:`custom-type-type-codec` documentation for an example. """ pass @@ -96,14 +102,19 @@ class TypeRegistry(object): >>> type_registry = TypeRegistry([Codec1, Codec2, Codec3, ...], ... fallback_encoder) + See :ref:`custom-type-type-registry` documentation for an example. + :Parameters: - `type_codecs` (optional): iterable of type codec instances. If ``type_codecs`` contains multiple codecs that transform a single python or BSON type, the transformation specified by the type codec - occurring last prevails. + occurring last prevails. A TypeError will be raised if one or more + type codecs modify the encoding behavior of a built-in :mod:`bson` + type. - `fallback_encoder` (optional): callable that accepts a single, - unencodable python value and transforms it into a type that BSON can - encode. + unencodable python value and transforms it into a type that + :mod:`bson` can encode. See :ref:`fallback-encoder-callable` + documentation for an example. """ def __init__(self, type_codecs=None, fallback_encoder=None): self.__type_codecs = list(type_codecs or []) @@ -217,6 +228,9 @@ class CodecOptions(_options_base): - `type_registry`: Instance of :class:`TypeRegistry` used to customize encoding and decoding behavior. + .. versionadded:: 3.8 + `type_registry` attribute. + .. warning:: Care must be taken when changing `unicode_decode_error_handler` from its default value ('strict'). The 'replace' and 'ignore' modes should not be used when documents diff --git a/doc/examples/custom_type.rst b/doc/examples/custom_type.rst index 2cdf22cb3..464f7bfde 100644 --- a/doc/examples/custom_type.rst +++ b/doc/examples/custom_type.rst @@ -7,11 +7,11 @@ codec, which is used to populate a :class:`~bson.codec_options.TypeRegistry`. The type registry can then be used to create a custom-type-aware :class:`~pymongo.collection.Collection`. Read and write operations issued against the resulting collection object transparently manipulate -documents as they are saved or retrieved from MongoDB. +documents as they are saved to or retrieved from MongoDB. -Setup ------ +Setting Up +---------- We'll start by getting a clean database to use for the example: @@ -26,10 +26,10 @@ We'll start by getting a clean database to use for the example: Since the purpose of the example is to demonstrate working with custom types, we'll need a custom data type to use. For this example, we will be working with the :py:class:`~decimal.Decimal` type from Python's standard library. Since the -BSON library has a :class:`~bson.decimal128.Decimal128` type (that implements -the IEEE 754 decimal128 decimal-based floating-point numbering format) which -is distinct from Python's built-in :py:class:`~decimal.Decimal` type, when we -try to save an instance of ``Decimal`` with PyMongo, we get an +BSON library's :class:`~bson.decimal128.Decimal128` type (that implements +the IEEE 754 decimal128 decimal-based floating-point numbering format) is +distinct from Python's built-in :py:class:`~decimal.Decimal` type, attempting +to save an instance of ``Decimal`` with PyMongo, results in an :exc:`~bson.errors.InvalidDocument` exception. .. doctest:: @@ -44,13 +44,13 @@ try to save an instance of ``Decimal`` with PyMongo, we get an .. _custom-type-type-codec: -The Type Codec --------------- +The :class:`~bson.codec_options.TypeCodec` Class +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. versionadded:: 3.8 -In order to encode custom types, we must first define a **type codec** for our -type. A type codec describes how an instance of a custom type can be +In order to encode a custom type, we must first define a **type codec** for +that type. A type codec describes how an instance of a custom type can be *transformed* to and/or from one of the types :mod:`~bson` already understands. Depending on the desired functionality, users must choose from the following base classes when defining type codecs: @@ -62,7 +62,7 @@ base classes when defining type codecs: decodes a specified BSON type into a custom Python type. Users must implement the ``bson_type`` property/attribute and the ``transform_bson`` method. * :class:`~bson.codec_options.TypeCodec`: subclass this to define a codec that - can both encode from and decode to a custom type. Users must implement the + can both encode and decode a custom type. Users must implement the ``python_type`` and ``bson_type`` properties/attributes, as well as the ``transform_python`` and ``transform_bson`` methods. @@ -93,14 +93,14 @@ interested in both encoding and decoding our custom type, we use the .. _custom-type-type-registry: -The Type Registry ------------------ +The :class:`~bson.codec_options.TypeRegistry` Class +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. versionadded:: 3.8 Before we can begin encoding and decoding our custom type objects, we must -first inform PyMongo about our type codec. This is done by creating a -:class:`~bson.codec_options.TypeRegistry` instance: +first inform PyMongo about the corresponding codec. This is done by creating +a :class:`~bson.codec_options.TypeRegistry` instance: .. doctest:: @@ -113,7 +113,7 @@ Once instantiated, registries are immutable and the only way to add codecs to a registry is to create a new one. -Putting it together +Putting It Together ------------------- Finally, we can define a :class:`~bson.codec_options.CodecOptions` instance @@ -201,11 +201,11 @@ This is trivial to do since the same transformation as the one used for information, it is impossible to discern which incoming :class:`~bson.decimal128.Decimal128` value needs to be decoded as ``Decimal`` and which needs to be decoded as ``DecimalInt``. This example only considers - the situation where a user wants to *encode* documents containing one or both + the situation where a user wants to *encode* documents containing either of these types. -Now, we can create a new codec options object and use it to get a collection -object: +After creating a new codec options object and using it to get a collection +object, we can seamlessly encode instances of ``DecimalInt``: .. doctest:: @@ -213,23 +213,67 @@ object: >>> codec_options = CodecOptions(type_registry=type_registry) >>> collection = db.get_collection('test', codec_options=codec_options) >>> collection.drop() - - -We can now seamlessly encode instances of ``DecimalInt``. Note that the -``transform_bson`` method of the base codec class results in these values -being decoded as ``Decimal`` (and not ``DecimalInt``): - -.. doctest:: - >>> collection.insert_one({'num': DecimalInt("45.321")}) >>> mydoc = collection.find_one() >>> pprint.pprint(mydoc) {u'_id': ObjectId('...'), u'num': Decimal('45.321')} +Note that the ``transform_bson`` method of the base codec class results in +these values being decoded as ``Decimal`` (and not ``DecimalInt``). -The Fallback Encoder --------------------- + +.. _decoding-binary-types: + +Decoding :class:`~bson.binary.Binary` Types +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The decoding treatment of :class:`~bson.binary.Binary` types having +``subtype = 0`` by the :mod:`bson` module varies slightly depending on the +version of the Python runtime in use. This must be taken into account while +writing a ``TypeDecoder`` that modifies how this datatype is decoded. + +On Python 3.x, :class:`~bson.binary.Binary` data (``subtype = 0``) is decoded +as a ``bytes`` instance: + +.. code-block:: python + + >>> # On Python 3.x. + >>> from bson.binary import Binary + >>> newcoll = db.get_collection('new') + >>> newcoll.insert_one({'_id': 1, 'data': Binary(b"123", subtype=0)}) + >>> doc = newcoll.find_one() + >>> type(doc['data']) + bytes + + +On Python 2.7.x, the same data is decoded as a :class:`~bson.binary.Binary` +instance: + +.. code-block:: python + + >>> # On Python 2.7.x + >>> newcoll = db.get_collection('new') + >>> doc = newcoll.find_one() + >>> type(doc['data']) + bson.binary.Binary + + +As a consequence of this disparity, users must set the ``bson_type`` attribute +on their :class:`~bson.codec_options.TypeDecoder` classes differently, +depending on the python version in use. + + +.. note:: + + For codebases requiring compatibility with both Python 2 and 3, type + decoders will have to be registered for both possible ``bson_type`` values. + + +.. _fallback-encoder-callable: + +The ``fallback_encoder`` Callable +--------------------------------- .. versionadded:: 3.8 @@ -268,27 +312,110 @@ We can now seamlessly encode instances of :py:class:`~decimal.Decimal`: >>> pprint.pprint(mydoc) {u'_id': ObjectId('...'), u'num': Decimal128('45.321')} -As you can tell, fallback encoders are a compelling alternative to type codecs -when we only want to encode custom types due to their much simpler API. -Users should note however, that fallback encoders cannot be used to modify the -encoding of types that PyMongo already understands, as illustrated by the -following example: - >>> def fallback_encoder(value): - ... """Encoder that converts floats to int.""" - ... if isinstance(value, float): - ... return int(value) - ... return value - >>> type_registry = TypeRegistry(fallback_encoder=fallback_encoder) - >>> codec_options = CodecOptions(type_registry=type_registry) - >>> collection = db.get_collection('test', codec_options=codec_options) - >>> collection.drop() - >>> collection.insert_one({'num': 45.321}) - - >>> mydoc = collection.find_one() - >>> pprint.pprint(mydoc) - {u'_id': ObjectId('...'), u'num': 45.321} +.. note:: -This is due to the fact that fallback encoders are invoked only after -an attempt to encode the value with type codecs and standard BSON encoding -routines has been unsuccessful. \ No newline at end of file + Fallback encoders are invoked *after* attempts to encode the given value + with standard BSON encoders and any configured type encoders have failed. + Therefore, in a type registry configured with a type encoder and fallback + encoder that both target the same custom type, the behavior specified in + the type encoder will prevail. + + +Because fallback encoders don't need to declare the types that they encode +beforehand, they can be used to support interesting use-cases that cannot be +serviced by ``TypeEncoder``. One such use-case is described in the next +section. + + +Encoding Unknown Types +^^^^^^^^^^^^^^^^^^^^^^ + +In this example, we demonstrate how a fallback encoder can be used to save +arbitrary objects to the database. We will use the the standard library's +:py:mod:`pickle` module to serialize the unknown types and so naturally, this +approach only works for types that are picklable. + +We start by defining some arbitrary custom types: + +.. code-block:: python + + class MyStringType(object): + def __init__(self, value): + self.__value = value + def __repr__(self): + return "MyStringType('%s')" % (self.__value,) + + class MyNumberType(object): + def __init__(self, value): + self.__value = value + def __repr__(self): + return "MyNumberType(%s)" % (self.__value,) + +We also define a fallback encoder that pickles whatever objects it receives +and returns them as :class:`~bson.binary.Binary` instances with a custom +subtype. The custom subtype, in turn, allows us to write a TypeDecoder that +identifies pickled artifacts upon retrieval and transparently decodes them +back into Python objects: + +.. code-block:: python + + import pickle + from bson.binary import Binary, USER_DEFINED_SUBTYPE + def fallback_pickle_encoder(value): + return Binary(pickle.dumps(value), USER_DEFINED_SUBTYPE) + + class PickledBinaryDecoder(TypeDecoder): + bson_type = Binary + def transform_bson(self, value): + if value.subtype == USER_DEFINED_SUBTYPE: + return pickle.loads(value) + return value + + +.. note:: + + The above example is written assuming the use of Python 3. If you are using + Python 2, ``bson_type`` must be set to ``Binary``. See the + :ref:`decoding-binary-types` section for a detailed explanation. + + +Finally, we create a ``CodecOptions`` instance: + +.. code-block:: python + + codec_options = CodecOptions(type_registry=TypeRegistry( + [PickledBinaryDecoder()], fallback_encoder=fallback_pickle_encoder)) + +We can now round trip our custom objects to MongoDB: + +.. code-block:: python + + collection = db.get_collection('test_fe', codec_options=codec_options) + collection.insert_one({'_id': 1, 'str': MyStringType("hello world"), + 'num': MyNumberType(2)}) + mydoc = collection.find_one() + assert isinstance(mydoc['str'], MyStringType) + assert isinstance(mydoc['num'], MyNumberType) + + +Limitations +----------- + +PyMongo's type codec and fallback encoder features have the following +limitations: + +#. Users cannot customize the encoding behavior of Python types that PyMongo + already understands like ``int`` and ``str`` (the 'built-in types'). + Attempting to instantiate a type registry with one or more codecs that act + upon a built-in type results in a ``TypeError``. This limitation extends + to all subtypes of the standard types. +#. Chaining type encoders is not supported. A custom type value, once + transformed by a codec's ``transform_python`` method, *must* result in a + type that is either BSON-encodable by default, or can be + transformed by the fallback encoder into something BSON-encodable--it + *cannot* be transformed a second time by a different type codec. +#. The :meth:`~pymongo.database.Database.command` method does not apply the + user's TypeDecoders while decoding the command response document. +#. :mod:`gridfs` does not apply custom type encoding or decoding to any + documents received from or to returned to the user. diff --git a/pymongo/database.py b/pymongo/database.py index 7a3eca56c..c4f7f47f8 100644 --- a/pymongo/database.py +++ b/pymongo/database.py @@ -616,6 +616,9 @@ class Database(common.BaseObject): :attr:`read_preference` or :attr:`codec_options`. You must use the `read_preference` and `codec_options` parameters instead. + .. note:: :meth:`command` does **not** apply any custom TypeDecoders + when decoding the command response. + .. versionchanged:: 3.6 Added ``session`` parameter.