From 768858eed61625f715c95b09abec2daa0334de49 Mon Sep 17 00:00:00 2001 From: Shruti Sridhar <77828382+shruti-sridhar@users.noreply.github.com> Date: Fri, 16 Aug 2024 15:55:30 -0700 Subject: [PATCH] PYTHON-4630 Add documentation for MongoClient.bulk_write (#1794) --- .github/workflows/test-python.yml | 2 +- doc/api/pymongo/asynchronous/mongo_client.rst | 1 + doc/api/pymongo/mongo_client.rst | 1 + doc/changelog.rst | 14 +- doc/examples/client_bulk.rst | 188 ++++++++++++++++++ doc/examples/index.rst | 1 + pymongo/asynchronous/mongo_client.py | 6 +- pymongo/errors.py | 4 +- pymongo/synchronous/mongo_client.py | 6 +- 9 files changed, 215 insertions(+), 8 deletions(-) create mode 100644 doc/examples/client_bulk.rst diff --git a/.github/workflows/test-python.yml b/.github/workflows/test-python.yml index ba04e8e41..036b2c4b7 100644 --- a/.github/workflows/test-python.yml +++ b/.github/workflows/test-python.yml @@ -96,7 +96,7 @@ jobs: - name: Start MongoDB uses: supercharge/mongodb-github-action@1.10.0 with: - mongodb-version: 4.4 + mongodb-version: '8.0.0-rc4' - name: Run tests run: | hatch run doctest:test diff --git a/doc/api/pymongo/asynchronous/mongo_client.rst b/doc/api/pymongo/asynchronous/mongo_client.rst index afbd802ff..75952f1b6 100644 --- a/doc/api/pymongo/asynchronous/mongo_client.rst +++ b/doc/api/pymongo/asynchronous/mongo_client.rst @@ -35,5 +35,6 @@ .. automethod:: get_database .. automethod:: server_info .. automethod:: watch + .. automethod:: bulk_write .. automethod:: __getitem__ .. automethod:: __getattr__ diff --git a/doc/api/pymongo/mongo_client.rst b/doc/api/pymongo/mongo_client.rst index 37ec8ae00..0409e7ef6 100644 --- a/doc/api/pymongo/mongo_client.rst +++ b/doc/api/pymongo/mongo_client.rst @@ -35,5 +35,6 @@ .. automethod:: get_database .. automethod:: server_info .. automethod:: watch + .. automethod:: bulk_write .. automethod:: __getitem__ .. automethod:: __getattr__ diff --git a/doc/changelog.rst b/doc/changelog.rst index d14a466cd..6a9744cfd 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -16,7 +16,19 @@ PyMongo 4.9 brings a number of improvements including: :class:`~pymongo.asynchronous.cursor.AsyncCursor`, and :class:`~pymongo.asynchronous.command_cursor.AsyncCommandCursor` as an asynchronous-friendly alternative to ``list(cursor)``. - +- Added :meth:`~pymongo.mongo_client.MongoClient.bulk_write` to :class:`~pymongo.mongo_client.MongoClient` + and :class:`~pymongo.asynchronous.mongo_client.AsyncMongoClient`, + enabling users to perform insert, update, and delete operations + against mixed namespaces in a minimized number of round trips. + Please see :doc:`examples/client_bulk` for more information. +- Added support for the ``namespace`` parameter to the + :class:`~pymongo.operations.InsertOne`, + :class:`~pymongo.operations.ReplaceOne`, + :class:`~pymongo.operations.UpdateOne`, + :class:`~pymongo.operations.UpdateMany`, + :class:`~pymongo.operations.DeleteOne`, and + :class:`~pymongo.operations.DeleteMany` operations, so + they can be used in the new :meth:`~pymongo.mongo_client.MongoClient.bulk_write`. Issues Resolved ............... diff --git a/doc/examples/client_bulk.rst b/doc/examples/client_bulk.rst new file mode 100644 index 000000000..d2b4a70e2 --- /dev/null +++ b/doc/examples/client_bulk.rst @@ -0,0 +1,188 @@ +Client Bulk Write Operations +============================= + +.. testsetup:: + + from pymongo import MongoClient + + client = MongoClient() + client.drop_database("client_bulk_example") + db = client.client_bulk_example + client.db.drop_collection("test_one") + client.db.drop_collection("test_two") + client.db.drop_collection("test_three") + client.db.drop_collection("test_four") + client.db.drop_collection("test_five") + client.db.drop_collection("test_six") + +The :meth:`~pymongo.mongo_client.MongoClient.bulk_write` +method has been added to :class:`~pymongo.mongo_client.MongoClient` in PyMongo 4.9. +This method enables users to perform batches of write operations **across +multiple namespaces** in a minimized number of round trips, and +to receive detailed results for each operation performed. + +.. note:: This method requires MongoDB server version 8.0+. + +Basic Usage +------------ + +A list of insert, update, and delete operations can be passed into the +:meth:`~pymongo.mongo_client.MongoClient.bulk_write` method. Each request +must include the namespace on which to perform the operation. + +PyMongo will automatically split the given requests into smaller sub-batches based on +the maximum message size accepted by MongoDB, supporting very large bulk write operations. + +The return value is an instance of +:class:`~pymongo.results.ClientBulkWriteResult`. + +.. _summary_client_bulk: + +Summary Results +................. + +By default, the returned :class:`~pymongo.results.ClientBulkWriteResult` instance will contain a +summary of the types of operations performed in the bulk write, along with their respective counts. + +.. doctest:: + :options: +NORMALIZE_WHITESPACE + + >>> from pymongo import InsertOne, DeleteOne, UpdateOne + >>> models = [ + ... InsertOne(namespace="db.test_one", document={"_id": 1}), + ... InsertOne(namespace="db.test_two", document={"_id": 2}), + ... DeleteOne(namespace="db.test_one", filter={"_id": 1}), + ... UpdateOne( + ... namespace="db.test_two", + ... filter={"_id": 4}, + ... update={"$inc": {"j": 1}}, + ... upsert=True, + ... ), + ... ] + >>> result = client.bulk_write(models) + >>> result.inserted_count + 2 + >>> result.deleted_count + 1 + >>> result.modified_count + 0 + >>> result.upserted_count + 1 + +.. _verbose_client_bulk: + +Verbose Results +................. + +If the ``verbose_results`` parameter is set to True, the returned :class:`~pymongo.results.ClientBulkWriteResult` +instance will also include detailed results about each successful operation performed as part of the bulk write. + +.. doctest:: + :options: +NORMALIZE_WHITESPACE + + >>> from pymongo import InsertOne, DeleteMany, ReplaceOne, UpdateMany + >>> models = [ + ... DeleteMany( + ... namespace="db.test_two", filter={} + ... ), # Delete all documents from the previous example + ... InsertOne(namespace="db.test_one", document={"_id": 1}), + ... InsertOne(namespace="db.test_one", document={"_id": 2}), + ... InsertOne(namespace="db.test_two", document={"_id": 3}), + ... UpdateMany(namespace="db.test_one", filter={}, update={"$set": {"foo": "bar"}}), + ... ReplaceOne( + ... namespace="db.test_two", filter={"j": 1}, replacement={"_id": 4}, upsert=True + ... ), + ... ] + >>> result = client.bulk_write(models, verbose_results=True) + >>> result.delete_results + {0: DeleteResult({'ok': 1.0, 'idx': 0, 'n': 2}, ...)} + >>> result.insert_results + {1: InsertOneResult(1, ...), + 2: InsertOneResult(2, ...), + 3: InsertOneResult(3, ...)} + >>> result.update_results + {4: UpdateResult({'ok': 1.0, 'idx': 4, 'n': 2, 'nModified': 2}, ...), + 5: UpdateResult({'ok': 1.0, 'idx': 5, 'n': 1, 'nModified': 0, 'upserted': {'_id': 4}}, ...)} + + +Handling Errors +---------------- + +If any errors occur during the bulk write, a :class:`~pymongo.errors.ClientBulkWriteException` will be raised. +If a server, connection, or network error occurred, the ``error`` field of the exception will contain +that error. + +Individual write errors or write concern errors get recorded in the ``write_errors`` and ``write_concern_errors`` fields of the exception. +The ``partial_result`` field gets populated with the results of any operations that were successfully completed before the exception was raised. + +.. _ordered_client_bulk: + +Ordered Operations +.................... + +In an ordered bulk write (the default), if an individual write fails, no further operations will get executed. +For example, a duplicate key error on the third operation below aborts the remaining two operations. + +.. doctest:: + :options: +NORMALIZE_WHITESPACE + + >>> from pymongo import InsertOne, DeleteOne + >>> from pymongo.errors import ClientBulkWriteException + >>> models = [ + ... InsertOne(namespace="db.test_three", document={"_id": 3}), + ... InsertOne(namespace="db.test_four", document={"_id": 4}), + ... InsertOne(namespace="db.test_three", document={"_id": 3}), # Duplicate _id + ... InsertOne(namespace="db.test_four", document={"_id": 5}), + ... DeleteOne(namespace="db.test_three", filter={"_id": 3}), + ... ] + >>> try: + ... client.bulk_write(models) + ... except ClientBulkWriteException as cbwe: + ... exception = cbwe + ... + >>> exception.write_errors + [{'ok': 0.0, + 'idx': 2, + 'code': 11000, + 'errmsg': 'E11000 duplicate key error ... dup key: { _id: 3 }', ... + 'op': {'insert': 'db.test_three', 'document': {'_id': 3}}}] + >>> exception.partial_result.inserted_count + 2 + >>> exception.partial_result.deleted_count + 0 + +.. _unordered_client_bulk: + +Unordered Operations +..................... + +If the ``ordered`` parameter is set to False, all operations in the bulk write will be attempted, regardless of any individual write errors that occur. +For example, the fourth and fifth write operations below get executed successfully, despite the duplicate key error on the third operation. + +.. doctest:: + :options: +NORMALIZE_WHITESPACE + + >>> from pymongo import InsertOne, DeleteOne + >>> from pymongo.errors import ClientBulkWriteException + >>> models = [ + ... InsertOne(namespace="db.test_five", document={"_id": 5}), + ... InsertOne(namespace="db.test_six", document={"_id": 6}), + ... InsertOne(namespace="db.test_five", document={"_id": 5}), # Duplicate _id + ... InsertOne(namespace="db.test_six", document={"_id": 7}), + ... DeleteOne(namespace="db.test_five", filter={"_id": 5}), + ... ] + >>> try: + ... client.bulk_write(models, ordered=False) + ... except ClientBulkWriteException as cbwe: + ... exception = cbwe + ... + >>> exception.write_errors + [{'ok': 0.0, + 'idx': 2, + 'code': 11000, + 'errmsg': 'E11000 duplicate key error ... dup key: { _id: 5 }', ... + 'op': {'insert': 'db.test_five', 'document': {'_id': 5}}}] + >>> exception.partial_result.inserted_count + 3 + >>> exception.partial_result.deleted_count + 1 diff --git a/doc/examples/index.rst b/doc/examples/index.rst index 75d208f20..ac450470e 100644 --- a/doc/examples/index.rst +++ b/doc/examples/index.rst @@ -22,6 +22,7 @@ MongoDB, you can start it like so: copydb custom_type bulk + client_bulk datetimes geo gevent diff --git a/pymongo/asynchronous/mongo_client.py b/pymongo/asynchronous/mongo_client.py index 8848fa4fd..e1a9d7735 100644 --- a/pymongo/asynchronous/mongo_client.py +++ b/pymongo/asynchronous/mongo_client.py @@ -2274,8 +2274,8 @@ class AsyncMongoClient(common.BaseObject, Generic[_DocumentType]): 1 >>> result.modified_count 0 - >>> result.upserted_ids - {3: ObjectId('54f62ee28891e756a6e1abd5')} + >>> result.upserted_count + 1 >>> async for doc in db.test.find({}): ... print(doc) ... @@ -2312,6 +2312,8 @@ class AsyncMongoClient(common.BaseObject, Generic[_DocumentType]): :return: An instance of :class:`~pymongo.results.ClientBulkWriteResult`. + .. seealso:: For more info, see :doc:`/examples/client_bulk`. + .. seealso:: :ref:`writes-and-ids` .. note:: requires MongoDB server version 8.0+. diff --git a/pymongo/errors.py b/pymongo/errors.py index 1c51708c7..2cd1081e3 100644 --- a/pymongo/errors.py +++ b/pymongo/errors.py @@ -342,10 +342,10 @@ class ClientBulkWriteException(OperationFailure): return self.details.get("writeConcernErrors", []) @property - def write_errors(self) -> Optional[Mapping[int, WriteError]]: + def write_errors(self) -> Optional[list[WriteError]]: """Errors that occurred during the execution of individual write operations. - This map will contain at most one entry if the bulk write was ordered. + This list will contain at most one entry if the bulk write was ordered. """ return self.details.get("writeErrors", {}) diff --git a/pymongo/synchronous/mongo_client.py b/pymongo/synchronous/mongo_client.py index 4aff3b5ee..287ad6af7 100644 --- a/pymongo/synchronous/mongo_client.py +++ b/pymongo/synchronous/mongo_client.py @@ -2263,8 +2263,8 @@ class MongoClient(common.BaseObject, Generic[_DocumentType]): 1 >>> result.modified_count 0 - >>> result.upserted_ids - {3: ObjectId('54f62ee28891e756a6e1abd5')} + >>> result.upserted_count + 1 >>> for doc in db.test.find({}): ... print(doc) ... @@ -2301,6 +2301,8 @@ class MongoClient(common.BaseObject, Generic[_DocumentType]): :return: An instance of :class:`~pymongo.results.ClientBulkWriteResult`. + .. seealso:: For more info, see :doc:`/examples/client_bulk`. + .. seealso:: :ref:`writes-and-ids` .. note:: requires MongoDB server version 8.0+.