Merge branch 'master' of github.com:mongodb/mongo-python-driver

This commit is contained in:
Steven Silvester 2025-08-07 13:21:01 -05:00
commit bd33b8d9e9
No known key found for this signature in database
GPG Key ID: B1BF5EC3A8B32F91
102 changed files with 529 additions and 7629 deletions

View File

@ -16,6 +16,13 @@ implementation on top of `pymongo`.
PyMongo supports MongoDB 4.0, 4.2, 4.4, 5.0, 6.0, 7.0, and 8.0. PyMongo follows [semantic versioning](https://semver.org/spec/v2.0.0.html) for its releases.
## Documentation
Documentation is available at
[mongodb.com](https://www.mongodb.com/docs/languages/python/pymongo-driver/current/).
[API documentation](https://pymongo.readthedocs.io/en/stable/api/) and the [full changelog](https://pymongo.readthedocs.io/en/stable/changelog.html) for each release is available at [readthedocs.io](https://pymongo.readthedocs.io/en/stable/index.html).
## Support / Feedback
For issues with, questions about, or feedback for PyMongo, please look
@ -191,13 +198,6 @@ ObjectId('4aba160ee23f6b543e000002')
[8, 11]
```
## Documentation
Documentation is available at
[pymongo.readthedocs.io](https://pymongo.readthedocs.io/en/stable/).
See the [contributing guide](./CONTRIBUTING.md#documentation) for how to build the documentation.
## Learning Resources
- MongoDB Learn - [Python

View File

@ -58,10 +58,10 @@ bytes [#bytes]_ binary both
the microsecond field is truncated.
.. [#dt2] all datetime.datetime instances are encoded as UTC. By default, they
are decoded as *naive* but timezone aware datetimes are also supported.
See :doc:`/examples/datetimes` for examples.
See `Dates and Times <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/dates-and-times/#dates-and-times>`_ for examples.
.. [#dt3] To enable decoding a bson UTC datetime to a :class:`~bson.datetime_ms.DatetimeMS`
instance see :ref:`handling-out-of-range-datetimes`.
.. [#uuid] For :py:class:`uuid.UUID` encoding and decoding behavior see :doc:`/examples/uuid`.
instance see `handling out of range datetimes <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/dates-and-times/#handling-out-of-range-datetimes>`_.
.. [#uuid] For :py:class:`uuid.UUID` encoding and decoding behavior see `<https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/uuid/#universally-unique-ids--uuids->`_.
.. [#re] :class:`~bson.regex.Regex` instances and regular expression
objects from ``re.compile()`` are both saved as BSON regular expressions.
BSON regular expressions are decoded as :class:`~bson.regex.Regex`

View File

@ -79,7 +79,7 @@ class UuidRepresentation:
:class:`~bson.binary.Binary` instance will be returned instead of a
:class:`uuid.UUID` instance.
See :ref:`unspecified-representation-details` for details.
See `unspecified representation details <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/uuid/#unspecified>`_ for details.
.. versionadded:: 3.11
"""
@ -91,7 +91,7 @@ class UuidRepresentation:
and decoded from BSON binary, using RFC-4122 byte order with
binary subtype :data:`UUID_SUBTYPE`.
See :ref:`standard-representation-details` for details.
See `standard representation details <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/uuid/#standard>`_ for details.
.. versionadded:: 3.11
"""
@ -103,7 +103,7 @@ class UuidRepresentation:
and decoded from BSON binary, using RFC-4122 byte order with
binary subtype :data:`OLD_UUID_SUBTYPE`.
See :ref:`python-legacy-representation-details` for details.
See `python legacy representation details <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/uuid/#python_legacy>`_ for details.
.. versionadded:: 3.11
"""
@ -115,7 +115,7 @@ class UuidRepresentation:
and decoded from BSON binary subtype :data:`OLD_UUID_SUBTYPE`,
using the Java driver's legacy byte order.
See :ref:`java-legacy-representation-details` for details.
See `Java Legacy UUID <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/uuid/#java_legacy>`_ for details.
.. versionadded:: 3.11
"""
@ -127,7 +127,7 @@ class UuidRepresentation:
and decoded from BSON binary subtype :data:`OLD_UUID_SUBTYPE`,
using the C# driver's legacy byte order.
See :ref:`csharp-legacy-representation-details` for details.
See `C# Legacy UUID <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/uuid/#csharp_legacy>`_ for details.
.. versionadded:: 3.11
"""
@ -328,7 +328,7 @@ class Binary(bytes):
:param uuid_representation: A member of
:class:`~bson.binary.UuidRepresentation`. Default:
:const:`~bson.binary.UuidRepresentation.STANDARD`.
See :ref:`handling-uuid-data-example` for details.
See `UUID representations <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/uuid/#universally-unique-ids--uuids->`_ for details.
.. versionadded:: 3.11
"""
@ -377,7 +377,7 @@ class Binary(bytes):
:param uuid_representation: A member of
:class:`~bson.binary.UuidRepresentation`. Default:
:const:`~bson.binary.UuidRepresentation.STANDARD`.
See :ref:`handling-uuid-data-example` for details.
See `UUID representations <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/uuid/#universally-unique-ids--uuids->`_ for details.
.. versionadded:: 3.11
"""

View File

@ -57,7 +57,7 @@ class TypeEncoder(abc.ABC):
Codec classes must implement the ``python_type`` attribute, and the
``transform_python`` method to support encoding.
See :ref:`custom-type-type-codec` documentation for an example.
See `encode data with type codecs <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/custom-types/type-codecs/#encode-data-with-type-codecs>`_ documentation for an example.
"""
@abc.abstractproperty
@ -76,7 +76,7 @@ class TypeDecoder(abc.ABC):
Codec classes must implement the ``bson_type`` attribute, and the
``transform_bson`` method to support decoding.
See :ref:`custom-type-type-codec` documentation for an example.
See `encode data with type codecs <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/custom-types/type-codecs/#encode-data-with-type-codecs>`_ documentation for an example.
"""
@abc.abstractproperty
@ -98,7 +98,7 @@ class TypeCodec(TypeEncoder, TypeDecoder):
``bson_type`` attribute, and the ``transform_bson`` method to support
decoding.
See :ref:`custom-type-type-codec` documentation for an example.
See `encode data with type codecs <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/custom-types/type-codecs/#encode-data-with-type-codecs>`_ documentation for an example.
"""
@ -118,7 +118,7 @@ class TypeRegistry:
>>> type_registry = TypeRegistry([Codec1, Codec2, Codec3, ...],
... fallback_encoder)
See :ref:`custom-type-type-registry` documentation for an example.
See `add codec to the type registry <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/custom-types/type-codecs/#add-codec-to-the-type-registry>`_ documentation for an example.
:param type_codecs: iterable of type codec instances. If
``type_codecs`` contains multiple codecs that transform a single
@ -128,7 +128,7 @@ class TypeRegistry:
type.
:param fallback_encoder: callable that accepts a single,
unencodable python value and transforms it into a type that
:mod:`bson` can encode. See :ref:`fallback-encoder-callable`
:mod:`bson` can encode. See `define a fallback encoder <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/custom-types/type-codecs/#define-a-fallback-encoder>`_
documentation for an example.
"""
@ -327,10 +327,10 @@ else:
>>> doc._id
ObjectId('5b3016359110ea14e8c58b93')
See :doc:`/examples/datetimes` for examples using the `tz_aware` and
See `Dates and Times <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/dates-and-times/#dates-and-times>`_ for examples using the `tz_aware` and
`tzinfo` options.
See :doc:`/examples/uuid` for examples using the `uuid_representation`
See `UUID <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/uuid/#universally-unique-ids--uuids->`_ for examples using the `uuid_representation`
option.
:param document_class: BSON documents returned in queries will be decoded
@ -344,7 +344,7 @@ else:
:data:`~bson.binary.UuidRepresentation.UNSPECIFIED`. New
applications should consider setting this to
:data:`~bson.binary.UuidRepresentation.STANDARD` for cross language
compatibility. See :ref:`handling-uuid-data-example` for details.
compatibility. See `UUID representations <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/uuid/#universally-unique-ids--uuids->`_ for details.
:param unicode_decode_error_handler: The error handler to apply when
a Unicode-related error occurs during BSON decoding that would
otherwise raise :exc:`UnicodeDecodeError`. Valid options include

View File

@ -51,7 +51,7 @@ class DatetimeMS:
To decode UTC datetimes as a ``DatetimeMS``, `datetime_conversion` in
:class:`~bson.codec_options.CodecOptions` must be set to 'datetime_ms' or
'datetime_auto'. See :ref:`handling-out-of-range-datetimes` for
'datetime_auto'. See `handling out of range datetimes <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/dates-and-times/#handling-out-of-range-datetimes>`_ for
details.
:param value: An instance of :class:`datetime.datetime` to be

View File

@ -281,7 +281,7 @@ class JSONOptions(_BASE_CLASS):
return DatetimeMS objects when the underlying datetime is
out-of-range and 'datetime_clamp' to clamp to the minimum and
maximum possible datetimes. Defaults to 'datetime'. See
:ref:`handling-out-of-range-datetimes` for details.
`handling out of range datetimes <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/dates-and-times/#handling-out-of-range-datetimes>`_ for details.
:param args: arguments to :class:`~bson.codec_options.CodecOptions`
:param kwargs: arguments to :class:`~bson.codec_options.CodecOptions`
@ -844,7 +844,7 @@ def _encode_binary(data: bytes, subtype: int, json_options: JSONOptions) -> Any:
return {"$binary": {"base64": base64.b64encode(data).decode(), "subType": "%02x" % subtype}}
def _encode_datetimems(obj: Any, json_options: JSONOptions) -> dict:
def _encode_datetimems(obj: Any, json_options: JSONOptions) -> dict: # type: ignore[type-arg]
if (
json_options.datetime_representation == DatetimeRepresentation.ISO8601
and 0 <= int(obj) <= _MAX_UTC_MS
@ -855,7 +855,7 @@ def _encode_datetimems(obj: Any, json_options: JSONOptions) -> dict:
return {"$date": {"$numberLong": str(int(obj))}}
def _encode_code(obj: Code, json_options: JSONOptions) -> dict:
def _encode_code(obj: Code, json_options: JSONOptions) -> dict: # type: ignore[type-arg]
if obj.scope is None:
return {"$code": str(obj)}
else:
@ -873,7 +873,7 @@ def _encode_noop(obj: Any, dummy0: Any) -> Any:
return obj
def _encode_regex(obj: Any, json_options: JSONOptions) -> dict:
def _encode_regex(obj: Any, json_options: JSONOptions) -> dict: # type: ignore[type-arg]
flags = ""
if obj.flags & re.IGNORECASE:
flags += "i"
@ -918,7 +918,7 @@ def _encode_float(obj: float, json_options: JSONOptions) -> Any:
return obj
def _encode_datetime(obj: datetime.datetime, json_options: JSONOptions) -> dict:
def _encode_datetime(obj: datetime.datetime, json_options: JSONOptions) -> dict: # type: ignore[type-arg]
if json_options.datetime_representation == DatetimeRepresentation.ISO8601:
if not obj.tzinfo:
obj = obj.replace(tzinfo=utc)
@ -941,15 +941,15 @@ def _encode_datetime(obj: datetime.datetime, json_options: JSONOptions) -> dict:
return {"$date": {"$numberLong": str(millis)}}
def _encode_bytes(obj: bytes, json_options: JSONOptions) -> dict:
def _encode_bytes(obj: bytes, json_options: JSONOptions) -> dict: # type: ignore[type-arg]
return _encode_binary(obj, 0, json_options)
def _encode_binary_obj(obj: Binary, json_options: JSONOptions) -> dict:
def _encode_binary_obj(obj: Binary, json_options: JSONOptions) -> dict: # type: ignore[type-arg]
return _encode_binary(obj, obj.subtype, json_options)
def _encode_uuid(obj: uuid.UUID, json_options: JSONOptions) -> dict:
def _encode_uuid(obj: uuid.UUID, json_options: JSONOptions) -> dict: # type: ignore[type-arg]
if json_options.strict_uuid:
binval = Binary.from_uuid(obj, uuid_representation=json_options.uuid_representation)
return _encode_binary(binval, binval.subtype, json_options)
@ -957,27 +957,27 @@ def _encode_uuid(obj: uuid.UUID, json_options: JSONOptions) -> dict:
return {"$uuid": obj.hex}
def _encode_objectid(obj: ObjectId, dummy0: Any) -> dict:
def _encode_objectid(obj: ObjectId, dummy0: Any) -> dict: # type: ignore[type-arg]
return {"$oid": str(obj)}
def _encode_timestamp(obj: Timestamp, dummy0: Any) -> dict:
def _encode_timestamp(obj: Timestamp, dummy0: Any) -> dict: # type: ignore[type-arg]
return {"$timestamp": {"t": obj.time, "i": obj.inc}}
def _encode_decimal128(obj: Timestamp, dummy0: Any) -> dict:
def _encode_decimal128(obj: Timestamp, dummy0: Any) -> dict: # type: ignore[type-arg]
return {"$numberDecimal": str(obj)}
def _encode_dbref(obj: DBRef, json_options: JSONOptions) -> dict:
def _encode_dbref(obj: DBRef, json_options: JSONOptions) -> dict: # type: ignore[type-arg]
return _json_convert(obj.as_doc(), json_options=json_options)
def _encode_minkey(dummy0: Any, dummy1: Any) -> dict:
def _encode_minkey(dummy0: Any, dummy1: Any) -> dict: # type: ignore[type-arg]
return {"$minKey": 1}
def _encode_maxkey(dummy0: Any, dummy1: Any) -> dict:
def _encode_maxkey(dummy0: Any, dummy1: Any) -> dict: # type: ignore[type-arg]
return {"$maxKey": 1}
@ -985,7 +985,7 @@ def _encode_maxkey(dummy0: Any, dummy1: Any) -> dict:
# Each encoder function's signature is:
# - obj: a Python data type, e.g. a Python int for _encode_int
# - json_options: a JSONOptions
_ENCODERS: dict[Type, Callable[[Any, JSONOptions], Any]] = {
_ENCODERS: dict[Type, Callable[[Any, JSONOptions], Any]] = { # type: ignore[type-arg]
bool: _encode_noop,
bytes: _encode_bytes,
datetime.datetime: _encode_datetime,
@ -1056,7 +1056,7 @@ def _get_datetime_size(obj: datetime.datetime) -> int:
return 5 + len(str(obj.time()))
def _get_regex_size(obj: Regex) -> int:
def _get_regex_size(obj: Regex) -> int: # type: ignore[type-arg]
return 18 + len(obj.pattern)

View File

@ -28,4 +28,4 @@ if TYPE_CHECKING:
_DocumentOut = Union[MutableMapping[str, Any], "RawBSONDocument"]
_DocumentType = TypeVar("_DocumentType", bound=Mapping[str, Any])
_DocumentTypeArg = TypeVar("_DocumentTypeArg", bound=Mapping[str, Any])
_ReadableBuffer = Union[bytes, memoryview, "mmap", "array"]
_ReadableBuffer = Union[bytes, memoryview, "mmap", "array"] # type: ignore[type-arg]

View File

@ -1,425 +0,0 @@
Async Tutorial
==============
.. code-block:: pycon
from pymongo import AsyncMongoClient
client = AsyncMongoClient()
await client.drop_database("test-database")
This tutorial is intended as an introduction to working with
**MongoDB** and **PyMongo** using the asynchronous API.
Prerequisites
-------------
Before we start, make sure that you have the **PyMongo** distribution
:doc:`installed <installation>`. In the Python shell, the following
should run without raising an exception:
.. code-block:: pycon
>>> import pymongo
This tutorial also assumes that a MongoDB instance is running on the
default host and port. Assuming you have `downloaded and installed
<https://www.mongodb.com/docs/manual/installation/>`_ MongoDB, you
can start it like so:
.. code-block:: bash
$ mongod
Making a Connection with AsyncMongoClient
-----------------------------------------
The first step when working with **PyMongo** is to create a
:class:`~pymongo.asynchronous.mongo_client.AsyncMongoClient` to the running **mongod**
instance. Doing so is easy:
.. code-block:: pycon
>>> from pymongo import AsyncMongoClient
>>> client = AsyncMongoClient()
The above code will connect on the default host and port. We can also
specify the host and port explicitly, as follows:
.. code-block:: pycon
>>> client = AsyncMongoClient("localhost", 27017)
Or use the MongoDB URI format:
.. code-block:: pycon
>>> client = AsyncMongoClient("mongodb://localhost:27017/")
By default, :class:`~pymongo.asynchronous.mongo_client.AsyncMongoClient` only connects to the database on its first operation.
To explicitly connect before performing an operation, use :meth:`~pymongo.asynchronous.mongo_client.AsyncMongoClient.aconnect`:
.. code-block:: pycon
>>> client = await AsyncMongoClient().aconnect()
Getting a Database
------------------
A single instance of MongoDB can support multiple independent
`databases <https://www.mongodb.com/docs/manual/core/databases-and-collections>`_. When
working with PyMongo you access databases using attribute style access
on :class:`~pymongo.asynchronous.mongo_client.AsyncMongoClient` instances:
.. code-block:: pycon
>>> db = client.test_database
If your database name is such that using attribute style access won't
work (like ``test-database``), you can use dictionary style access
instead:
.. code-block:: pycon
>>> db = client["test-database"]
Getting a Collection
--------------------
A `collection <https://www.mongodb.com/docs/manual/core/databases-and-collections>`_ is a
group of documents stored in MongoDB, and can be thought of as roughly
the equivalent of a table in a relational database. Getting a
collection in PyMongo works the same as getting a database:
.. code-block:: pycon
>>> collection = db.test_collection
or (using dictionary style access):
.. code-block:: pycon
>>> collection = db["test-collection"]
An important note about collections (and databases) in MongoDB is that
they are created lazily - none of the above commands have actually
performed any operations on the MongoDB server. Collections and
databases are created when the first document is inserted into them.
Documents
---------
Data in MongoDB is represented (and stored) using JSON-style
documents. In PyMongo we use dictionaries to represent documents. As
an example, the following dictionary might be used to represent a blog
post:
.. code-block:: pycon
>>> import datetime
>>> post = {
... "author": "Mike",
... "text": "My first blog post!",
... "tags": ["mongodb", "python", "pymongo"],
... "date": datetime.datetime.now(tz=datetime.timezone.utc),
... }
Note that documents can contain native Python types (like
:class:`datetime.datetime` instances) which will be automatically
converted to and from the appropriate `BSON
<https://bsonspec.org/>`_ types.
Inserting a Document
--------------------
To insert a document into a collection we can use the
:meth:`~pymongo.asynchronous.collection.AsyncCollection.insert_one` method:
.. code-block:: pycon
>>> posts = db.posts
>>> post_id = (await posts.insert_one(post)).inserted_id
>>> post_id
ObjectId('...')
When a document is inserted a special key, ``"_id"``, is automatically
added if the document doesn't already contain an ``"_id"`` key. The value
of ``"_id"`` must be unique across the
collection. :meth:`~pymongo.asynchronous.collection.AsyncCollection.insert_one` returns an
instance of :class:`~pymongo.results.InsertOneResult`. For more information
on ``"_id"``, see the `documentation on _id
<https://www.mongodb.com/docs/manual/reference/method/ObjectId/>`_.
After inserting the first document, the *posts* collection has
actually been created on the server. We can verify this by listing all
of the collections in our database:
.. code-block:: pycon
>>> await db.list_collection_names()
['posts']
Getting a Single Document With :meth:`~pymongo.asynchronous.collection.AsyncCollection.find_one`
------------------------------------------------------------------------------------------------
The most basic type of query that can be performed in MongoDB is
:meth:`~pymongo.asynchronous.collection.AsyncCollection.find_one`. This method returns a
single document matching a query (or ``None`` if there are no
matches). It is useful when you know there is only one matching
document, or are only interested in the first match. Here we use
:meth:`~pymongo.asynchronous.collection.AsyncCollection.find_one` to get the first
document from the posts collection:
.. code-block:: pycon
>>> import pprint
>>> pprint.pprint(await posts.find_one())
{'_id': ObjectId('...'),
'author': 'Mike',
'date': datetime.datetime(...),
'tags': ['mongodb', 'python', 'pymongo'],
'text': 'My first blog post!'}
The result is a dictionary matching the one that we inserted previously.
.. note:: The returned document contains an ``"_id"``, which was
automatically added on insert.
:meth:`~pymongo.asynchronous.collection.AsyncCollection.find_one` also supports querying
on specific elements that the resulting document must match. To limit
our results to a document with author "Mike" we do:
.. code-block:: pycon
>>> pprint.pprint(await posts.find_one({"author": "Mike"}))
{'_id': ObjectId('...'),
'author': 'Mike',
'date': datetime.datetime(...),
'tags': ['mongodb', 'python', 'pymongo'],
'text': 'My first blog post!'}
If we try with a different author, like "Eliot", we'll get no result:
.. code-block:: pycon
>>> await posts.find_one({"author": "Eliot"})
>>>
.. _async-querying-by-objectid:
Querying By ObjectId
--------------------
We can also find a post by its ``_id``, which in our example is an ObjectId:
.. code-block:: pycon
>>> post_id
ObjectId(...)
>>> pprint.pprint(await posts.find_one({"_id": post_id}))
{'_id': ObjectId('...'),
'author': 'Mike',
'date': datetime.datetime(...),
'tags': ['mongodb', 'python', 'pymongo'],
'text': 'My first blog post!'}
Note that an ObjectId is not the same as its string representation:
.. code-block:: pycon
>>> post_id_as_str = str(post_id)
>>> await posts.find_one({"_id": post_id_as_str}) # No result
>>>
A common task in web applications is to get an ObjectId from the
request URL and find the matching document. It's necessary in this
case to **convert the ObjectId from a string** before passing it to
``find_one``::
from bson.objectid import ObjectId
# The web framework gets post_id from the URL and passes it as a string
async def get(post_id):
# Convert from string to ObjectId:
document = await client.db.collection.find_one({'_id': ObjectId(post_id)})
.. seealso:: :ref:`web-application-querying-by-objectid`
Bulk Inserts
------------
In order to make querying a little more interesting, let's insert a
few more documents. In addition to inserting a single document, we can
also perform *bulk insert* operations, by passing a list as the
first argument to :meth:`~pymongo.asynchronous.collection.AsyncCollection.insert_many`.
This will insert each document in the list, sending only a single
command to the server:
.. code-block:: pycon
>>> new_posts = [
... {
... "author": "Mike",
... "text": "Another post!",
... "tags": ["bulk", "insert"],
... "date": datetime.datetime(2009, 11, 12, 11, 14),
... },
... {
... "author": "Eliot",
... "title": "MongoDB is fun",
... "text": "and pretty easy too!",
... "date": datetime.datetime(2009, 11, 10, 10, 45),
... },
... ]
>>> result = await posts.insert_many(new_posts)
>>> result.inserted_ids
[ObjectId('...'), ObjectId('...')]
There are a couple of interesting things to note about this example:
- The result from :meth:`~pymongo.asynchronous.collection.AsyncCollection.insert_many` now
returns two :class:`~bson.objectid.ObjectId` instances, one for
each inserted document.
- ``new_posts[1]`` has a different "shape" than the other posts -
there is no ``"tags"`` field and we've added a new field,
``"title"``. This is what we mean when we say that MongoDB is
*schema-free*.
Querying for More Than One Document
-----------------------------------
To get more than a single document as the result of a query we use the
:meth:`~pymongo.asynchronous.collection.AsyncCollection.find`
method. :meth:`~pymongo.asynchronous.collection.AsyncCollection.find` returns a
:class:`~pymongo.asynchronous.cursor.AsyncCursor` instance, which allows us to iterate
over all matching documents. For example, we can iterate over every
document in the ``posts`` collection:
.. code-block:: pycon
>>> async for post in posts.find():
... pprint.pprint(post)
...
{'_id': ObjectId('...'),
'author': 'Mike',
'date': datetime.datetime(...),
'tags': ['mongodb', 'python', 'pymongo'],
'text': 'My first blog post!'}
{'_id': ObjectId('...'),
'author': 'Mike',
'date': datetime.datetime(...),
'tags': ['bulk', 'insert'],
'text': 'Another post!'}
{'_id': ObjectId('...'),
'author': 'Eliot',
'date': datetime.datetime(...),
'text': 'and pretty easy too!',
'title': 'MongoDB is fun'}
Just like we did with :meth:`~pymongo.asynchronous.collection.AsyncCollection.find_one`,
we can pass a document to :meth:`~pymongo.asynchronous.collection.AsyncCollection.find`
to limit the returned results. Here, we get only those documents whose
author is "Mike":
.. code-block:: pycon
>>> async for post in posts.find({"author": "Mike"}):
... pprint.pprint(post)
...
{'_id': ObjectId('...'),
'author': 'Mike',
'date': datetime.datetime(...),
'tags': ['mongodb', 'python', 'pymongo'],
'text': 'My first blog post!'}
{'_id': ObjectId('...'),
'author': 'Mike',
'date': datetime.datetime(...),
'tags': ['bulk', 'insert'],
'text': 'Another post!'}
Counting
--------
If we just want to know how many documents match a query we can
perform a :meth:`~pymongo.asynchronous.collection.AsyncCollection.count_documents` operation
instead of a full query. We can get a count of all of the documents
in a collection:
.. code-block:: pycon
>>> await posts.count_documents({})
3
or just of those documents that match a specific query:
.. code-block:: pycon
>>> await posts.count_documents({"author": "Mike"})
2
Range Queries
-------------
MongoDB supports many different types of `advanced queries
<https://www.mongodb.com/docs/manual/reference/operator/>`_. As an
example, lets perform a query where we limit results to posts older
than a certain date, but also sort the results by author:
.. code-block:: pycon
>>> d = datetime.datetime(2009, 11, 12, 12)
>>> async for post in posts.find({"date": {"$lt": d}}).sort("author"):
... pprint.pprint(post)
...
{'_id': ObjectId('...'),
'author': 'Eliot',
'date': datetime.datetime(...),
'text': 'and pretty easy too!',
'title': 'MongoDB is fun'}
{'_id': ObjectId('...'),
'author': 'Mike',
'date': datetime.datetime(...),
'tags': ['bulk', 'insert'],
'text': 'Another post!'}
Here we use the special ``"$lt"`` operator to do a range query, and
also call :meth:`~pymongo.asynchronous.cursor.AsyncCursor.sort` to sort the results
by author.
Indexing
--------
Adding indexes can help accelerate certain queries and can also add additional
functionality to querying and storing documents. In this example, we'll
demonstrate how to create a `unique index
<https://mongodb.com/docs/manual/core/index-unique/>`_ on a key that rejects
documents whose value for that key already exists in the index.
First, we'll need to create the index:
.. code-block:: pycon
>>> result = await db.profiles.create_index([("user_id", pymongo.ASCENDING)], unique=True)
>>> sorted(list(await db.profiles.index_information()))
['_id_', 'user_id_1']
Notice that we have two indexes now: one is the index on ``_id`` that MongoDB
creates automatically, and the other is the index on ``user_id`` we just
created.
Now let's set up some user profiles:
.. code-block:: pycon
>>> user_profiles = [{"user_id": 211, "name": "Luke"}, {"user_id": 212, "name": "Ziltoid"}]
>>> result = await db.profiles.insert_many(user_profiles)
The index prevents us from inserting a document whose ``user_id`` is already in
the collection:
.. code-block:: pycon
>>> new_profile = {"user_id": 213, "name": "Drew"}
>>> duplicate_profile = {"user_id": 212, "name": "Tommy"}
>>> result = await db.profiles.insert_one(new_profile) # This is fine.
>>> result = await db.profiles.insert_one(duplicate_profile)
Traceback (most recent call last):
DuplicateKeyError: E11000 duplicate key error index: test_database.profiles.$user_id_1 dup key: { : 212 }
.. seealso:: The MongoDB documentation on `indexes <https://www.mongodb.com/docs/manual/indexes/>`_
Task Cancellation
-----------------
`Cancelling <https://docs.python.org/3/library/asyncio-task.html#task-cancellation>`_ an asyncio Task
that is running a PyMongo operation is treated as a fatal interrupt. Any connections, cursors, and transactions
involved in a cancelled Task will be safely closed and cleaned up as part of the cancellation. If those resources are
also used elsewhere, attempting to utilize them after the cancellation will result in an error.

View File

@ -1,43 +0,0 @@
Using PyMongo with MongoDB Atlas
================================
`Atlas <https://www.mongodb.com/cloud>`_ is MongoDB, Inc.'s hosted MongoDB as a
service offering. To connect to Atlas, pass the connection string provided by
Atlas to :class:`~pymongo.mongo_client.MongoClient`::
client = pymongo.MongoClient(<Atlas connection string>)
Connections to Atlas require TLS/SSL.
.. warning:: Industry best practices recommend, and some regulations require,
the use of TLS 1.1 or newer. Though no application changes are required for
PyMongo to make use of the newest protocols, some operating systems or
versions may not provide an OpenSSL version new enough to support them.
Users of macOS older than 10.13 (High Sierra) will need to install Python
from `python.org`_, `homebrew`_, `macports`_, or another similar source.
Users of Linux or other non-macOS Unix can check their OpenSSL version like
this::
$ openssl version
If the version number is less than 1.0.1 support for TLS 1.1 or newer is not
available. Contact your operating system vendor for a solution or upgrade to
a newer distribution.
You can check your Python interpreter by installing the `requests`_ module
and executing the following command::
python -c "import requests; print(requests.get('https://www.howsmyssl.com/a/check', verify=False).json()['tls_version'])"
You should see "TLS 1.X" where X is >= 1.
You can read more about TLS versions and their security implications here:
`<https://cheatsheetseries.owasp.org/cheatsheets/Transport_Layer_Security_Cheat_Sheet.html#only-support-strong-protocols>`_
.. _python.org: https://www.python.org/downloads/
.. _homebrew: https://brew.sh/
.. _macports: https://www.macports.org/
.. _requests: https://pypi.python.org/pypi/requests

View File

@ -1,25 +1,38 @@
Changelog
=========
Changes in Version 4.14.0 (XXXX/XX/XX)
Changes in Version 4.14.0 (2025/08/06)
--------------------------------------
.. warning:: PyMongo 4.14 drops support for MongoDB 4.0. PyMongo now supports
MongoDB 4.2+.
PyMongo 4.14 brings a number of changes including:
- Added preliminary support for Python 3.14 and 3.14 with free-threading. We do not yet support the following with Python 3.14:
- Subinterpreters (``concurrent.interpreters``)
- Free-threading with Encryption
- mod_wsgi
- Removed experimental support for free-threading support in Python 3.13.
- Added :attr:`bson.codec_options.TypeRegistry.codecs` and :attr:`bson.codec_options.TypeRegistry.fallback_encoder` properties
to allow users to directly access the type codecs and fallback encoder for a given :class:`bson.codec_options.TypeRegistry`.
- Added :meth:`pymongo.asynchronous.mongo_client.AsyncMongoClient.append_metadata` and
:meth:`pymongo.mongo_client.MongoClient.append_metadata` to allow instantiated MongoClients to send client metadata
on-demand
- Improved performance of selecting a server with the Primary selector.
- Dropped support for MongoDB 4.0.
- Added preliminary support for Python 3.14 and 3.14 with free-threading. We do
not yet support the following with Python 3.14:
- Introduces a minor breaking change. When encoding :class:`bson.binary.BinaryVector`, a ``ValueError`` will be raised
if the 'padding' metadata field is < 0 or > 7, or non-zero for any type other than PACKED_BIT.
- Changed :meth:`~pymongo.uri_parser.parse_uri`'s ``options`` parameter to be type ``dict`` instead of ``_CaseInsensitiveDictionary``.
- Subinterpreters (``concurrent.interpreters``)
- Free-threading with Encryption
- mod_wsgi
- Removed experimental support for free-threading support in Python 3.13.
- Added :attr:`bson.codec_options.TypeRegistry.codecs` and
:attr:`bson.codec_options.TypeRegistry.fallback_encoder` properties
to allow users to directly access the type codecs and fallback encoder for a
given :class:`bson.codec_options.TypeRegistry`.
- Added
:meth:`pymongo.asynchronous.mongo_client.AsyncMongoClient.append_metadata` and
:meth:`pymongo.mongo_client.MongoClient.append_metadata` to allow instantiated
MongoClients to send client metadata on-demand
- Improved performance of selecting a server with the Primary selector.
- Introduces a minor breaking change. When encoding
:class:`bson.binary.BinaryVector`, a ``ValueError`` will be raised if the
'padding' metadata field is < 0 or > 7, or non-zero for any type other than
PACKED_BIT.
- Changed :meth:`~pymongo.uri_parser.parse_uri`'s ``options`` return value to be
type ``dict`` instead of ``_CaseInsensitiveDictionary``.
Changes in Version 4.13.2 (2025/06/17)
--------------------------------------
@ -115,7 +128,7 @@ PyMongo 4.12 brings a number of changes including:
- Support for configuring DEK cache lifetime via the ``key_expiration_ms`` argument to
:class:`~pymongo.encryption_options.AutoEncryptionOpts`.
- Support for $lookup in CSFLE and QE supported on MongoDB 8.1+.
- pymongocrypt>=1.13 is now required for :ref:`In-Use Encryption` support.
- pymongocrypt>=1.13 is now required for `In-Use Encryption <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/security/in-use-encryption/#in-use-encryption>`_ support.
- Added :meth:`gridfs.asynchronous.grid_file.AsyncGridFSBucket.rename_by_name` and :meth:`gridfs.grid_file.GridFSBucket.rename_by_name`
for more performant renaming of a file with multiple revisions.
- Added :meth:`gridfs.asynchronous.grid_file.AsyncGridFSBucket.delete_by_name` and :meth:`gridfs.grid_file.GridFSBucket.delete_by_name`
@ -177,7 +190,7 @@ PyMongo 4.11 brings a number of changes including:
- Dropped support for Python 3.8 and PyPy 3.9.
- Dropped support for MongoDB 3.6.
- Dropped support for the MONGODB-CR authenticate mechanism, which is no longer supported by MongoDB 4.0+.
- pymongocrypt>=1.12 is now required for :ref:`In-Use Encryption` support.
- pymongocrypt>=1.12 is now required for `In-Use Encryption <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/security/in-use-encryption/#in-use-encryption>`_ support.
- Added support for free-threaded Python with the GIL disabled. For more information see:
`Free-threaded CPython <https://docs.python.org/3.13/whatsnew/3.13.html#whatsnew313-free-threaded-cpython>`_.
We do not yet support free-threaded Python on Windows (`PYTHON-5027`_) or with In-Use Encryption (`PYTHON-5024`_).
@ -299,7 +312,7 @@ PyMongo 4.9 brings a number of improvements including:
``sparsity`` and ``trim_factor`` are now optional in :class:`~pymongo.encryption_options.RangeOpts`.
- Added support for the "delegated" option for the KMIP ``master_key`` in
:meth:`~pymongo.encryption.ClientEncryption.create_data_key`.
- pymongocrypt>=1.10 is now required for :ref:`In-Use Encryption` support.
- pymongocrypt>=1.10 is now required for `In-Use Encryption <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/security/in-use-encryption/#in-use-encryption>`_ support.
- Added :meth:`~pymongo.cursor.Cursor.to_list` to :class:`~pymongo.cursor.Cursor`,
:class:`~pymongo.command_cursor.CommandCursor`,
:class:`~pymongo.asynchronous.cursor.AsyncCursor`,
@ -309,7 +322,7 @@ PyMongo 4.9 brings a number of improvements including:
and :class:`~pymongo.asynchronous.mongo_client.AsyncMongoClient`,
enabling users to perform insert, update, and delete operations
against mixed namespaces in a minimized number of round trips.
Please see :doc:`examples/client_bulk` for more information.
Please see `Client Bulk Write <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/crud/bulk-write/#client-bulk-write-example>`_ for more information.
- Added support for the ``namespace`` parameter to the
:class:`~pymongo.operations.InsertOne`,
:class:`~pymongo.operations.ReplaceOne`,
@ -339,7 +352,7 @@ PyMongo 4.9 brings a number of improvements including:
unction-as-a-service (FaaS) like AWS Lambda, Google Cloud Functions, and Microsoft Azure Functions.
On some FaaS systems, there is a ``fork()`` operation at function
startup. By delaying the connection to the first operation, we avoid a deadlock. See
:ref:`pymongo-fork-safe` for more information.
`multiple forks <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/connect/mongoclient/#multiple-forks>`_ for more information.
Issues Resolved
@ -446,10 +459,10 @@ PyMongo 4.7 brings a number of improvements including:
using an OpenID Connect (OIDC) access token.
The driver supports OIDC for workload identity, defined as an identity you assign to a software workload
(such as an application, service, script, or container) to authenticate and access other services and resources.
Please see :doc:`examples/authentication` for more information.
Please see `Authentication <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/security/authentication/#authentication-mechanisms>`_ for more information.
- Added support for Python's `native logging library <https://docs.python.org/3/howto/logging.html>`_,
enabling developers to customize the verbosity of log messages for their applications.
Please see :doc:`examples/logging` for more information.
Please see `Logging <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/monitoring-and-logging/logging/#logging>`_ for more information.
- Significantly improved the performance of encoding BSON documents to JSON.
- Added support for named KMS providers for client side field level encryption.
Previously supported KMS providers were only: aws, azure, gcp, kmip, and local.
@ -608,7 +621,7 @@ PyMongo 4.6 brings a number of improvements including:
"mongodb://example.com?tls=true" is now a valid URI.
- Fixed a bug where PyMongo would incorrectly promote all cursors to exhaust cursors
when connected to load balanced MongoDB clusters or Serverless clusters.
- Added the :ref:`network-compression-example` documentation page.
- Added the `network compression <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/connect/connection-options/network-compression/#compress-network-traffic>`_ documentation page.
- Added more timeout information to network errors.
Issues Resolved
@ -633,7 +646,7 @@ PyMongo 4.5 brings a number of improvements including:
- Added :meth:`~pymongo.database.Database.cursor_command`
and :meth:`~pymongo.command_cursor.CommandCursor.try_next` to support
executing an arbitrary command that returns a cursor.
- ``cryptography`` 2.5 or later is now required for :ref:`OCSP` support.
- ``cryptography`` 2.5 or later is now required for `OCSP <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/security/tls/#ocsp>`_ support.
- Improved bson encoding and decoding performance by up to 134%(`PYTHON-3729`_, `PYTHON-3797`_, `PYTHON-3816`_, `PYTHON-3817`_, `PYTHON-3820`_, `PYTHON-3824`_, and `PYTHON-3846`_).
.. warning:: PyMongo no longer supports PyPy3 versions older than 3.8. Users
@ -694,7 +707,7 @@ PyMongo 4.4 brings a number of improvements including:
:class:`~pymongo.encryption_options.RangeOpts`,
and :attr:`~pymongo.encryption.Algorithm.RANGEPREVIEW` as part of the experimental
Queryable Encryption beta.
- pymongocrypt 1.6.0 or later is now required for :ref:`In-Use Encryption` support. MongoDB
- pymongocrypt 1.6.0 or later is now required for `In-Use Encryption <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/security/in-use-encryption/#in-use-encryption>`_ support. MongoDB
Server 7.0 introduced a backwards breaking change to the QE protocol. Users taking
advantage of the Queryable Encryption beta must now upgrade to MongoDB 7.0+ and
PyMongo 4.4+.
@ -722,9 +735,9 @@ Changes in Version 4.3.3 (2022/11/17)
Version 4.3.3 documents support for the following:
- :ref:`CSFLE on-demand credentials` for cloud KMS providers.
- Authentication support for :ref:`EKS Clusters`.
- Added the :ref:`timeout-example` example page to improve the documentation
- `CSFLE on-demand credentials <https://www.mongodb.com/docs/v7.0/core/csfle/tutorials/aws/aws-automatic/?interface=driver&language=python#use-automatic-client-side-field-level-encryption-with-aws>`_ for cloud KMS providers.
- Authentication support for `EKS Clusters <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/security/authentication/aws-iam/#assumerolewithwebidentity>`_.
- Added the `timeout <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/connect/connection-options/csot/#limit-server-execution-time>`_ example page to improve the documentation
for :func:`pymongo.timeout`.
Bug Fixes
@ -759,7 +772,7 @@ PyMongo 4.3 brings a number of improvements including:
- Added support for decoding BSON datetimes outside of the range supported
by Python's :class:`~datetime.datetime` builtin. See
:ref:`handling-out-of-range-datetimes` for examples, as well as
`handling out of range datetimes <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/dates-and-times/#handling-out-of-range-datetimes>`_ for examples, as well as
:class:`bson.datetime_ms.DatetimeMS`,
:class:`bson.codec_options.DatetimeConversion`, and
:class:`bson.codec_options.CodecOptions`'s ``datetime_conversion``
@ -768,7 +781,7 @@ PyMongo 4.3 brings a number of improvements including:
after a :py:func:`os.fork` to reduce the frequency of deadlocks. Note that
deadlocks are still possible because libraries that PyMongo depends like
OpenSSL cannot be made fork() safe in multithreaded applications.
(`PYTHON-2484`_). For more info see :ref:`pymongo-fork-safe`.
(`PYTHON-2484`_). For more info see `multiple forks <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/connect/mongoclient/#multiple-forks>`_.
- When used with MongoDB 6.0+, :class:`~pymongo.change_stream.ChangeStream` s
now allow for new types of events (such as DDL and C2C replication events)
to be recorded with the new parameter ``show_expanded_events``
@ -778,7 +791,7 @@ PyMongo 4.3 brings a number of improvements including:
credentials expire or an error is encountered.
- When using the ``MONGODB-AWS`` authentication mechanism with the
``aws`` extra, the behavior of credential fetching has changed with
``pymongo_auth_aws>=1.1.0``. Please see :doc:`examples/authentication` for
``pymongo_auth_aws>=1.1.0``. Please see `Authentication <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/security/authentication/#authentication-mechanisms>`_ for
more information.
Bug fixes
@ -811,9 +824,9 @@ PyMongo 4.2 brings a number of improvements including:
- Support for MongoDB 6.0.
- Support for the Queryable Encryption beta with MongoDB 6.0. Note that backwards-breaking
changes may be made before the final release. See :ref:`automatic-queryable-client-side-encryption` for example usage.
changes may be made before the final release. See `automatic queryable client-side encryption <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/security/in-use-encryption/#queryable-encryption>`_ for example usage.
- Provisional (beta) support for :func:`pymongo.timeout` to apply a single timeout
to an entire block of pymongo operations. See :ref:`timeout-example` for examples.
to an entire block of pymongo operations. See `timeout <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/connect/connection-options/csot/#limit-server-execution-time>`_ for examples.
- Added the ``timeoutMS`` URI and keyword argument to :class:`~pymongo.mongo_client.MongoClient`.
- Added the :attr:`pymongo.errors.PyMongoError.timeout` property which is ``True`` when
the error was caused by a timeout.
@ -861,7 +874,7 @@ Unavoidable breaking changes
encryption support.
- :meth:`~pymongo.collection.Collection.estimated_document_count` now always uses
the `count`_ command. Due to an oversight in versions 5.0.0-5.0.8 of MongoDB,
the count command was not included in V1 of the :ref:`versioned-api-ref`.
the count command was not included in V1 of the `Stable API <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/connect/connection-options/stable-api/#stable-api>`_.
Users of the Stable API with estimated_document_count are recommended to upgrade
their server version to 5.0.9+ or set :attr:`pymongo.server_api.ServerApi.strict`
to ``False`` to avoid encountering errors (`PYTHON-3167`_).
@ -924,7 +937,7 @@ Changes in Version 4.1 (2021/12/07)
PyMongo 4.1 brings a number of improvements including:
- Type Hinting support (formerly provided by `pymongo-stubs`_). See :doc:`examples/type_hints` for more information.
- Type Hinting support (formerly provided by `pymongo-stubs`_). See `Type Hints <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/run-command/#type-hints>`_ for more information.
- Added support for the ``comment`` parameter to all helpers. For example see
:meth:`~pymongo.collection.Collection.insert_one`.
- Added support for the ``let`` parameter to
@ -1013,7 +1026,7 @@ Breaking Changes in 4.0
:data:`bson.binary.UuidRepresentation.PYTHON_LEGACY` to
:data:`bson.binary.UuidRepresentation.UNSPECIFIED`. Attempting to encode a
:class:`uuid.UUID` instance to BSON or JSON now produces an error by default.
See :ref:`handling-uuid-data-example` for details.
See `UUID representations <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/uuid/#universally-unique-ids--uuids->`_ for details.
- Removed the ``waitQueueMultiple`` keyword argument to
:class:`~pymongo.mongo_client.MongoClient` and removed
:exc:`pymongo.errors.ExceededMaxWaiters`.
@ -1352,7 +1365,7 @@ Notable improvements
- Added support for MongoDB 5.0.
- Support for MongoDB Stable API, see :class:`~pymongo.server_api.ServerApi`.
- Support for snapshot reads on secondaries (see :ref:`snapshot-reads-ref`).
- Support for snapshot reads on secondaries (see `snapshot reads <https://www.mongodb.com/docs/manual/reference/read-concern-snapshot/#read-concern--snapshot->`_).
- Support for Azure and GCP KMS providers for client side field level
encryption. See the docstring for :class:`~pymongo.mongo_client.MongoClient`,
:class:`~pymongo.encryption_options.AutoEncryptionOpts`,
@ -1409,7 +1422,7 @@ Deprecations
same API.
- Deprecated the :mod:`pymongo.messeage` module.
- Deprecated the ``ssl_keyfile`` and ``ssl_certfile`` URI options in favor
of ``tlsCertificateKeyFile`` (see :doc:`examples/tls`).
of ``tlsCertificateKeyFile`` (see `TLS <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/security/tls/#configure-transport-layer-security--tls->`_).
.. _PYTHON-2466: https://jira.mongodb.org/browse/PYTHON-2466
.. _PYTHON-1690: https://jira.mongodb.org/browse/PYTHON-1690
@ -1507,12 +1520,12 @@ Changes in Version 3.11.0 (2020/07/30)
Version 3.11 adds support for MongoDB 4.4 and includes a number of bug fixes.
Highlights include:
- Support for :ref:`OCSP` (Online Certificate Status Protocol).
- Support for `OCSP <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/security/tls/#ocsp>`_ (Online Certificate Status Protocol).
- Support for `PyOpenSSL <https://pypi.org/project/pyOpenSSL/>`_ as an
alternative TLS implementation. PyOpenSSL is required for :ref:`OCSP`
alternative TLS implementation. PyOpenSSL is required for `OCSP <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/security/tls/#ocsp>`_
support. It will also be installed when using the "tls" extra if the
version of Python in use is older than 2.7.9.
- Support for the :ref:`MONGODB-AWS` authentication mechanism.
- Support for the `MONGODB-AWS <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/security/authentication/aws-iam/#aws-identity-and-access-management>`_ authentication mechanism.
- Support for the ``directConnection`` URI option and kwarg to
:class:`~pymongo.mongo_client.MongoClient`.
- Support for speculative authentication attempts in connection handshakes
@ -1538,7 +1551,7 @@ Highlights include:
- Added support for :data:`bson.binary.UuidRepresentation.UNSPECIFIED` and
``MongoClient(uuidRepresentation='unspecified')`` which will become the
default UUID representation starting in PyMongo 4.0. See
:ref:`handling-uuid-data-example` for details.
`UUID representations <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/uuid/#universally-unique-ids--uuids->`_ for details.
- New methods :meth:`bson.binary.Binary.from_uuid` and
:meth:`bson.binary.Binary.as_uuid`.
- Added the ``background`` parameter to
@ -1622,7 +1635,7 @@ Version 3.10 includes a number of improvements and bug fixes. Highlights
include:
- Support for Client-Side Field Level Encryption with MongoDB 4.2. See
:doc:`examples/encryption` for examples.
`Client-Side Field Level Encryption <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/security/in-use-encryption/#client-side-field-level-encryption>`_ for examples.
- Support for Python 3.8.
- Added :attr:`pymongo.client_session.ClientSession.in_transaction`.
- Do not hold the Topology lock while creating connections in a MongoClient's
@ -1648,7 +1661,7 @@ Changes in Version 3.9.0 (2019/08/13)
Version 3.9 adds support for MongoDB 4.2. Highlights include:
- Support for MongoDB 4.2 sharded transactions. Sharded transactions have
the same API as replica set transactions. See :ref:`transactions-ref`.
the same API as replica set transactions. See `Transactions <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/crud/transactions/#transactions>`_.
- New method :meth:`pymongo.client_session.ClientSession.with_transaction` to
support conveniently running a transaction in a session with automatic
retries and at-most-once semantics.
@ -1776,7 +1789,7 @@ Changes in Version 3.8.0 (2019/04/22)
- Custom types can now be directly encoded to, and decoded from MongoDB using
the :class:`~bson.codec_options.TypeCodec` and
:class:`~bson.codec_options.TypeRegistry` APIs. For more information, see
the :doc:`custom type example <examples/custom_type>`.
`Custom Types <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/custom-types/type-codecs/#encode-data-with-type-codecs>`_.
- Attempting a multi-document transaction on a sharded cluster now raises a
:exc:`~pymongo.errors.ConfigurationError`.
- :meth:`pymongo.cursor.Cursor.distinct` and
@ -1806,7 +1819,7 @@ Changes in Version 3.8.0 (2019/04/22)
- Iterating over a :class:`~bson.raw_bson.RawBSONDocument` now maintains the
same field order of the underlying raw BSON document.
- Applications can now register a custom server selector. For more information
see the :doc:`server selector example <examples/server_selection>`.
see `Customize Server Selection <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/connect/connection-options/server-selection/#customize-server-selection>`_.
- The connection pool now implements a LIFO policy.
Unavoidable breaking changes:
@ -1874,9 +1887,9 @@ Changes in Version 3.7.0 (2018/06/26)
Version 3.7 adds support for MongoDB 4.0. Highlights include:
- Support for single replica set multi-document ACID transactions.
See :ref:`transactions-ref`.
See `transactions <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/crud/transactions/#transactions>`_.
- Support for wire protocol compression via the new ``compressors`` URI and keyword argument to
:meth:`~pymongo.mongo_client.MongoClient`. See :ref:`network-compression-example` for details.
:meth:`~pymongo.mongo_client.MongoClient`. See `network compression <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/connect/connection-options/network-compression/#compress-network-traffic>`_ for details.
- Support for Python 3.7.
- New count methods, :meth:`~pymongo.collection.Collection.count_documents`
and :meth:`~pymongo.collection.Collection.estimated_document_count`.
@ -1897,9 +1910,9 @@ Version 3.7 adds support for MongoDB 4.0. Highlights include:
the following features and changes allow PyMongo to function when MD5 support
is disabled in OpenSSL by the FIPS Object Module:
- Support for the :ref:`SCRAM-SHA-256 <scram_sha_256>`
authentication mechanism. The :ref:`GSSAPI <gssapi>`,
:ref:`PLAIN <sasl_plain>`, and :ref:`MONGODB-X509 <mongodb_x509>`
- Support for the `SCRAM-SHA-256 <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/security/authentication/scram/#scram>`_
authentication mechanism. The `GSSAPI <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/security/authentication/kerberos/#kerberos--gssapi->`_,
`PLAIN <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/security/authentication/ldap/#overview>`_, and `MONGODB-X509 <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/security/authentication/x509/#x.509>`_
mechanisms can also be used to avoid issues with OpenSSL in FIPS
environments.
- MD5 checksums are now optional in GridFS. See the ``disable_md5`` option
@ -1917,7 +1930,7 @@ Version 3.7 adds support for MongoDB 4.0. Highlights include:
class which is a subclass of :class:`~pymongo.change_stream.ChangeStream`.
- SCRAM client and server keys are cached for improved performance, following
`RFC 5802 <https://tools.ietf.org/html/rfc5802>`_.
- If not specified, the authSource for the :ref:`PLAIN <sasl_plain>`
- If not specified, the authSource for the `PLAIN <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/security/authentication/ldap/#overview>`_
authentication mechanism defaults to $external.
- wtimeoutMS is once again supported as a URI option.
- When using unacknowledged write concern and connected to MongoDB server
@ -2167,7 +2180,7 @@ Changes and Deprecations:
consistent across all MongoDB versions.
- In Python 3, :meth:`~bson.json_util.loads` now automatically decodes JSON
$binary with a subtype of 0 into :class:`bytes` instead of
:class:`~bson.binary.Binary`. See the :doc:`/python3` for more details.
:class:`~bson.binary.Binary`.
- :meth:`~bson.json_util.loads` now raises ``TypeError`` or ``ValueError``
when parsing JSON type wrappers with values of the wrong type or any
extra keys.
@ -2196,7 +2209,7 @@ Highlights include:
- Complete support for MongoDB 3.4:
- Unicode aware string comparison using :doc:`examples/collations`.
- Unicode aware string comparison using `Collation <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/crud/configure/#collation>`_.
- Support for the new :class:`~bson.decimal128.Decimal128` BSON type.
- A new maxStalenessSeconds read preference option.
- A username is no longer required for the MONGODB-X509 authentication
@ -2534,7 +2547,7 @@ In PyMongo 3.0, the ``use_greenlets`` option is gone. To use PyMongo with
Gevent simply call ``gevent.monkey.patch_all()``.
For more information,
see :doc:`PyMongo's Gevent documentation <examples/gevent>`.
see `Gevent <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/integrations/#gevent>`_.
:class:`~pymongo.mongo_client.MongoClient` changes
..................................................
@ -2578,7 +2591,7 @@ the list, and used it until a network error prompted it to re-evaluate all
mongoses' latencies and reconnect to one of them. In PyMongo 3, the client
monitors its network latency to all the mongoses continuously, and distributes
operations evenly among those with the lowest latency.
See :ref:`mongos-load-balancing` for more information.
See `load balancing <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/connect/connection-targets/#replica-sets>`_ for more information.
The client methods ``start_request``, ``in_request``, and ``end_request``
are removed, and so is the ``auto_start_request`` option. Requests were
@ -2586,7 +2599,7 @@ designed to make read-your-writes consistency more likely with the ``w=0``
write concern. Additionally, a thread in a request used the same member for
all secondary reads in a replica set. To ensure read-your-writes consistency
in PyMongo 3.0, do not override the default write concern with ``w=0``, and
do not override the default :ref:`read preference <secondary-reads>` of
do not override the default `read preference <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/crud/configure/#read-and-write-settings>`_ of
PRIMARY.
Support for the ``slaveOk`` (or ``slave_okay``), ``safe``, and
@ -2600,8 +2613,7 @@ The ``max_pool_size`` option has been removed. It is replaced by the
``maxPoolSize`` MongoDB URI option. ``maxPoolSize`` is now a supported URI
option in PyMongo and can be passed as a keyword argument.
The ``copy_database`` method is removed, see the
:doc:`copy_database examples </examples/copydb>` for alternatives.
The ``copy_database`` method is removed, see `Copy and Clone Databases <https://www.mongodb.com/docs/database-tools/mongodump/mongodump-examples/#copy-and-clone-databases>`_ for alternatives.
The ``disconnect`` method is removed. Use
:meth:`~pymongo.mongo_client.MongoClient.close` instead.
@ -2938,7 +2950,7 @@ Version 2.9.4 fixes issues reported since the release of 2.9.3.
- Fixed :class:`~pymongo.mongo_replica_set_client.MongoReplicaSetClient` handling of
uuidRepresentation.
- Fixed building and testing the documentation with python 3.x.
- New documentation for :doc:`examples/tls` and :doc:`atlas`.
- New documentation for `TLS <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/security/tls/#configure-transport-layer-security--tls->`_ and `Atlas <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/connect/connection-targets/#atlas>`_.
Issues Resolved
...............
@ -3177,7 +3189,7 @@ PyMongo 2.7 is a major release with a large number of new features and bug
fixes. Highlights include:
- Full support for MongoDB 2.6.
- A new :doc:`bulk write operations API </examples/bulk>`.
- A new `bulk write operations API <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/crud/bulk-write/#collection-bulk-write-example>`_.
- Support for server side query timeouts using
:meth:`~pymongo.cursor.Cursor.max_time_ms`.
- Support for writing :meth:`~pymongo.collection.Collection.aggregate`
@ -3188,7 +3200,7 @@ fixes. Highlights include:
error details from the server.
- A new GridFS :meth:`~gridfs.GridFS.find` method that returns a
:class:`~gridfs.grid_file.GridOutCursor`.
- Greatly improved :doc:`support for mod_wsgi </examples/mod_wsgi>` when using
- Greatly improved `support for mod_wsgi <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/integrations/#mod_wsgi>`_ when using
PyMongo's C extensions. Read `Jesse's blog post
<https://emptysqua.re/blog/python-c-extensions-and-mod-wsgi/>`_ for details.
- Improved C extension support for ARM little endian.
@ -3268,14 +3280,14 @@ Important new features:
``waitQueueTimeoutMS`` is set, an operation that blocks waiting for a socket
will raise :exc:`~pymongo.errors.ConnectionFailure` after the timeout. By
default ``waitQueueTimeoutMS`` is not set.
See :ref:`connection-pooling` for more information.
See `connection pooling <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/connect/connection-options/connection-pools/#connection-pools>`_ for more information.
- The :meth:`~pymongo.collection.Collection.insert` method automatically splits
large batches of documents into multiple insert messages based on
:attr:`~pymongo.mongo_client.MongoClient.max_message_size`
- Support for the exhaust cursor flag.
See :meth:`~pymongo.collection.Collection.find` for details and caveats.
- Support for the PLAIN and MONGODB-X509 authentication mechanisms.
See :doc:`the authentication docs </examples/authentication>` for more
See `the authentication docs <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/security/authentication/#authentication-mechanisms>`_ for more
information.
- Support aggregation output as a :class:`~pymongo.cursor.Cursor`. See
:meth:`~pymongo.collection.Collection.aggregate` for details.
@ -3288,7 +3300,7 @@ Important new features:
to having a ``max_pool_size`` larger than necessary. Err towards a larger
value.) If your application accepts the default, continue to do so.
See :ref:`connection-pooling` for more information.
See `connection pooling <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/connect/connection-options/connection-pools/#connection-pools>`_ for more information.
Issues Resolved
...............
@ -3334,7 +3346,7 @@ Version 2.5 includes changes to support new features in MongoDB 2.4.
Important new features:
- Support for :ref:`GSSAPI (Kerberos) authentication <gssapi>`.
- Support for `GSSAPI (Kerberos) <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/security/authentication/kerberos/#kerberos--gssapi->`_.
- Support for SSL certificate validation with hostname matching.
- Support for delegated and role based authentication.
- New GEOSPHERE (2dsphere) and HASHED index constants.
@ -3441,7 +3453,7 @@ Version 2.3 adds support for new features and behavior changes in MongoDB
Important New Features:
- Support for expanded read preferences including directing reads to tagged
servers - See :ref:`secondary-reads` for more information.
servers - See `secondary reads <https://www.mongodb.com/docs/manual/core/read-preference/#mongodb-readmode-secondary>`_ for more information.
- Support for mongos failover.
- A new :meth:`~pymongo.collection.Collection.aggregate` method to support
MongoDB's new `aggregation framework
@ -3495,10 +3507,10 @@ to this release.
Important New Features:
- Support for Python 3 -
See the :doc:`python3` for more information.
- Support for Python 3.
See `Python 3 <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/reference/upgrade/#upgrade-pymongo-versions>`_ for more information.
- Support for Gevent -
See :doc:`examples/gevent` for more information.
See `Gevent <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/integrations/#gevent>`_ for more information.
- Improved connection pooling.
See `PYTHON-287 <https://jira.mongodb.org/browse/PYTHON-287>`_.
@ -4104,7 +4116,7 @@ Other changes:
- clean up all cases where :class:`~pymongo.errors.ConnectionFailure`
is raised.
- simplification of connection pooling - makes driver ~2x faster for
simple benchmarks. see :ref:`connection-pooling` for more information.
simple benchmarks. see `connection pooling <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/connect/connection-options/connection-pools/#connection-pools>`_ for more information.
- DEPRECATED ``pool_size``, ``auto_start_request`` and ``timeout``
parameters to :class:`~pymongo.connection.Connection`. DEPRECATED
:meth:`~pymongo.connection.Connection.start_request`.
@ -4171,7 +4183,7 @@ Changes in Version 1.2 (2009/12/09)
get around some issues with queries on fields named ``query``
- enforce 4MB document limit on the client side
- added :meth:`~pymongo.collection.Collection.map_reduce` helper - see
:doc:`example <examples/aggregation>`
`Aggregation <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/aggregation/#transform-your-data-with-aggregation>`_
- added :meth:`~pymongo.cursor.Cursor.distinct` method on
:class:`~pymongo.cursor.Cursor` instances to allow distinct with
queries

View File

@ -1,96 +0,0 @@
Frequently Encountered Issues
=============================
Also see the :ref:`TLSErrors` section.
Server reports wire version X, PyMongo requires Y
-------------------------------------------------
When one attempts to connect to a <=3.6 version server, PyMongo will throw the following error::
>>> client.admin.command('ping')
...
pymongo.errors.ConfigurationError: Server at localhost:27017 reports wire version 6, but this version of PyMongo requires at least 7 (MongoDB 4.0).
This is caused by the driver being too new for the server it is being run against.
To resolve this issue either upgrade your database to version >= 4.0 or downgrade to an early version of PyMongo which supports MongoDB < 4.0.
'Cursor' object has no attribute '_Cursor__killed'
--------------------------------------------------
On versions of PyMongo <3.9, when supplying invalid arguments the constructor of Cursor,
there will be a TypeError raised, and an AttributeError printed to ``stderr``. The AttributeError is not relevant,
instead look at the TypeError for debugging information::
>>> coll.find(wrong=1)
Exception ignored in: <function Cursor.__del__ at 0x1048129d8>
...
AttributeError: 'Cursor' object has no attribute '_Cursor__killed'
...
TypeError: __init__() got an unexpected keyword argument 'wrong'
To fix this, make sure that you are supplying the correct keyword arguments.
In addition, you can also upgrade to PyMongo >=3.9, which will remove the spurious error.
MongoClient fails ConfigurationError
------------------------------------
This is a common issue stemming from using incorrect keyword argument names.
>>> client = MongoClient(wrong=1)
...
pymongo.errors.ConfigurationError: Unknown option wrong
To fix this, check your spelling and make sure that the keyword argument you are specifying exists.
DeprecationWarning: count is deprecated
---------------------------------------
PyMongo no longer supports :meth:`pymongo.cursor.count`.
Instead, use :meth:`pymongo.collection.count_documents`::
>>> client = MongoClient()
>>> d = datetime.datetime(2009, 11, 12, 12)
>>> list(client.db.coll.find({"date": {"$lt": d}}, limit=2))
[{'_id': ObjectId('6247b058cebb8b179b7039f8'), 'date': datetime.datetime(1, 1, 1, 0, 0)}, {'_id': ObjectId('6247b059cebb8b179b7039f9'), 'date': datetime.datetime(1, 1, 1, 0, 0)}]
>>> client.db.coll.count_documents({"date": {"$lt": d}}, limit=2)
2
Note that this is NOT the same as ``Cursor.count_documents`` (which does not exist),
this is a method of the Collection class, so you must call it on a collection object
or you will receive the following error::
>>> Cursor(MongoClient().db.coll).count()
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
AttributeError: 'Cursor' object has no attribute 'count'
>>>
Timeout when accessing MongoDB from PyMongo with tunneling
----------------------------------------------------------
When attempting to connect to a replica set MongoDB instance over an SSH tunnel you
will receive the following error::
File "/Library/Python/2.7/site-packages/pymongo/collection.py", line 1560, in count
return self._count(cmd, collation, session)
File "/Library/Python/2.7/site-packages/pymongo/collection.py", line 1504, in _count
with self._socket_for_reads() as (connection, slave_ok):
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/contextlib.py", line 17, in __enter__
return self.gen.next()
File "/Library/Python/2.7/site-packages/pymongo/mongo_client.py", line 982, in _socket_for_reads
server = topology.select_server(read_preference)
File "/Library/Python/2.7/site-packages/pymongo/topology.py", line 224, in select_server
address))
File "/Library/Python/2.7/site-packages/pymongo/topology.py", line 183, in select_servers
selector, server_timeout, address)
File "/Library/Python/2.7/site-packages/pymongo/topology.py", line 199, in _select_servers_loop
self._error_message(selector))
pymongo.errors.ServerSelectionTimeoutError: localhost:27017: timed out
This is due to the fact that PyMongo discovers replica set members using the response from the isMaster command which
then contains the address and ports of the other members. However, these addresses and ports will not be accessible through the SSH tunnel. Thus, this behavior is unsupported.
You can, however, connect directly to a single MongoDB node using the directConnection=True option with SSH tunneling.

View File

@ -1,62 +0,0 @@
Compatibility Policy
====================
Semantic Versioning
-------------------
PyMongo's version numbers follow `semantic versioning`_: each version number
is structured "major.minor.patch". Patch releases fix bugs, minor releases
add features (and may fix bugs), and major releases include API changes that
break backwards compatibility (and may add features and fix bugs).
Deprecation
-----------
Before we remove a feature in a major release, PyMongo's maintainers make an
effort to release at least one minor version that *deprecates* it. We add
"**DEPRECATED**" to the feature's documentation, and update the code to raise a
`DeprecationWarning`_. You can ensure your code is future-proof by running
your code with the latest PyMongo release and looking for DeprecationWarnings.
The interpreter silences DeprecationWarnings by default. For example, the
following code uses the deprecated ``insert`` method but does not raise any
warning:
.. code-block:: python
# "insert.py" (with PyMongo 3.X)
from pymongo import MongoClient
client = MongoClient()
client.test.test.insert({})
To print deprecation warnings to stderr, run python with "-Wd"::
$ python3 -Wd insert.py
insert.py:4: DeprecationWarning: insert is deprecated. Use insert_one or insert_many instead.
client.test.test.insert({})
You can turn warnings into exceptions with "python -We"::
$ python3 -We insert.py
Traceback (most recent call last):
File "insert.py", line 4, in <module>
client.test.test.insert({})
File "/home/durin/work/mongo-python-driver/pymongo/collection.py", line 2906, in insert
"instead.", DeprecationWarning, stacklevel=2)
DeprecationWarning: insert is deprecated. Use insert_one or insert_many instead.
If your own code's test suite passes with "python -We" then it uses no
deprecated PyMongo features.
.. seealso:: The Python documentation on `the warnings module`_,
and `the -W command line option`_.
.. _semantic versioning: https://semver.org/
.. _DeprecationWarning:
https://docs.python.org/3/library/exceptions.html#DeprecationWarning
.. _the warnings module: https://docs.python.org/3/library/warnings.html
.. _the -W command line option: https://docs.python.org/3/using/cmdline.html#cmdoption-W

View File

@ -1,9 +0,0 @@
Developer Guide
===============
Technical guide for contributors to PyMongo.
.. toctree::
:maxdepth: 1
periodic_executor

View File

@ -1,113 +0,0 @@
Periodic Executors
==================
.. currentmodule:: pymongo
PyMongo implements a :class:`~periodic_executor.PeriodicExecutor` for two
purposes: as the background thread for :class:`~monitor.Monitor`, and to
regularly check if there are ``OP_KILL_CURSORS`` messages that must be sent to the server.
Killing Cursors
---------------
An incompletely iterated :class:`~cursor.Cursor` on the client represents an
open cursor object on the server. In code like this, we lose a reference to
the cursor before finishing iteration::
for doc in collection.find():
raise Exception()
We try to send an ``OP_KILL_CURSORS`` to the server to tell it to clean up the
server-side cursor. But we must not take any locks directly from the cursor's
destructor (see `PYTHON-799`_), so we cannot safely use the PyMongo data
structures required to send a message. The solution is to add the cursor's id
to an array on the :class:`~mongo_client.MongoClient` without taking any locks.
Each client has a :class:`~periodic_executor.PeriodicExecutor` devoted to
checking the array for cursor ids. Any it sees are the result of cursors that
were freed while the server-side cursor was still open. The executor can safely
take the locks it needs in order to send the ``OP_KILL_CURSORS`` message.
.. _PYTHON-799: https://jira.mongodb.org/browse/PYTHON-799
Stopping Executors
------------------
Just as :class:`~cursor.Cursor` must not take any locks from its destructor,
neither can :class:`~mongo_client.MongoClient` and :class:`~topology.Topology`.
Thus, although the client calls :meth:`close` on its kill-cursors thread, and
the topology calls :meth:`close` on all its monitor threads, the :meth:`close`
method cannot actually call :meth:`wake` on the executor, since :meth:`wake`
takes a lock.
Instead, executors wake periodically to check if ``self.close`` is set,
and if so they exit.
A thread can log spurious errors if it wakes late in the Python interpreter's
shutdown sequence, so we try to join threads before then. Each periodic
executor (either a monitor or a kill-cursors thread) adds a weakref to itself
to a set called ``_EXECUTORS``, in the ``periodic_executor`` module.
An `exit handler`_ runs on shutdown and tells all executors to stop, then
tries (with a short timeout) to join all executor threads.
.. _exit handler: https://docs.python.org/2/library/atexit.html
Monitoring
----------
For each server in the topology, :class:`~topology.Topology` uses a periodic
executor to launch a monitor thread. This thread must not prevent the topology
from being freed, so it weakrefs the topology. Furthermore, it uses a weakref
callback to terminate itself soon after the topology is freed.
Solid lines represent strong references, dashed lines weak ones:
.. generated with graphviz: "dot -Tpng periodic-executor-refs.dot > periodic-executor-refs.png"
.. image:: ../static/periodic-executor-refs.png
See `Stopping Executors`_ above for an explanation of the ``_EXECUTORS`` set.
It is a requirement of the `Server Discovery And Monitoring Spec`_ that a
sleeping monitor can be awakened early. Aside from infrequent wakeups to do
their appointed chores, and occasional interruptions, periodic executors also
wake periodically to check if they should terminate.
Our first implementation of this idea was the obvious one: use the Python
standard library's threading.Condition.wait with a timeout. Another thread
wakes the executor early by signaling the condition variable.
A topology cannot signal the condition variable to tell the executor to
terminate, because it would risk a deadlock in the garbage collector: no
destructor or weakref callback can take a lock to signal the condition variable
(see `PYTHON-863`_); thus the only way for a dying object to terminate a
periodic executor is to set its "stopped" flag and let the executor see the
flag next time it wakes.
We erred on the side of prompt cleanup, and set the check interval at 100ms. We
assumed that checking a flag and going back to sleep 10 times a second was
cheap on modern machines.
Starting in Python 3.2, the builtin C implementation of lock.acquire takes a
timeout parameter, so Python 3.2+ Condition variables sleep simply by calling
lock.acquire; they are implemented as efficiently as expected.
But in Python 2, lock.acquire has no timeout. To wait with a timeout, a Python
2 condition variable sleeps a millisecond, tries to acquire the lock, sleeps
twice as long, and tries again. This exponential backoff reaches a maximum
sleep time of 50ms.
If PyMongo calls the condition variable's "wait" method with a short timeout,
the exponential backoff is restarted frequently. Overall, the condition variable
is not waking a few times a second, but hundreds of times. (See `PYTHON-983`_.)
Thus the current design of periodic executors is surprisingly simple: they
do a simple ``time.sleep`` for a half-second, check if it is time to wake or
terminate, and sleep again.
.. _Server Discovery And Monitoring Spec: https://github.com/mongodb/specifications/blob/master/source/server-discovery-and-monitoring/server-monitoring.md#requesting-an-immediate-check
.. _PYTHON-863: https://jira.mongodb.org/browse/PYTHON-863
.. _PYTHON-983: https://jira.mongodb.org/browse/PYTHON-983

View File

@ -1,90 +0,0 @@
Aggregation Examples
====================
There are several methods of performing aggregations in MongoDB. These
examples cover the new aggregation framework, using map reduce and using the
group method.
.. testsetup::
from pymongo import MongoClient
client = MongoClient()
client.drop_database("aggregation_example")
Setup
-----
To start, we'll insert some example data which we can perform
aggregations on:
.. doctest::
>>> from pymongo import MongoClient
>>> db = MongoClient().aggregation_example
>>> result = db.things.insert_many(
... [
... {"x": 1, "tags": ["dog", "cat"]},
... {"x": 2, "tags": ["cat"]},
... {"x": 2, "tags": ["mouse", "cat", "dog"]},
... {"x": 3, "tags": []},
... ]
... )
>>> result.inserted_ids
[ObjectId('...'), ObjectId('...'), ObjectId('...'), ObjectId('...')]
.. _aggregate-examples:
Aggregation Framework
---------------------
This example shows how to use the
:meth:`~pymongo.collection.Collection.aggregate` method to use the aggregation
framework. We'll perform a simple aggregation to count the number of
occurrences for each tag in the ``tags`` array, across the entire collection.
To achieve this we need to pass in three operations to the pipeline.
First, we need to unwind the ``tags`` array, then group by the tags and
sum them up, finally we sort by count.
Python dictionaries prior to 3.7 don't maintain order. You should use :class:`~bson.son.SON`
or :class:`collections.OrderedDict` where explicit ordering is required for an older Python version
eg "$sort":
.. note::
aggregate requires server version **>= 2.1.0**.
.. doctest::
>>> from bson.son import SON
>>> pipeline = [
... {"$unwind": "$tags"},
... {"$group": {"_id": "$tags", "count": {"$sum": 1}}},
... {"$sort": SON([("count", -1), ("_id", -1)])},
... ]
>>> import pprint
>>> pprint.pprint(list(db.things.aggregate(pipeline)))
[{'_id': 'cat', 'count': 3},
{'_id': 'dog', 'count': 2},
{'_id': 'mouse', 'count': 1}]
To run an explain plan for this aggregation use
`PyMongoExplain <https://pypi.org/project/pymongoexplain/>`_,
a companion library for PyMongo. It allows you to explain any CRUD operation
by providing a few convenience classes::
>>> from pymongoexplain import ExplainableCollection
>>> ExplainableCollection(collection).aggregate(pipeline)
{'ok': 1.0, 'queryPlanner': [...]}
Or, use the :meth:`~pymongo.database.Database.command` method::
>>> db.command('aggregate', 'things', pipeline=pipeline, explain=True)
{'ok': 1.0, 'stages': [...]}
As well as simple aggregations the aggregation framework provides projection
capabilities to reshape the returned data. Using projections and aggregation,
you can add computed fields, create new virtual sub-objects, and extract
sub-fields into the top-level of results.
.. seealso:: The full documentation for MongoDB's `aggregation framework
<https://mongodb.com/docs/manual/applications/aggregation>`_

View File

@ -1,528 +0,0 @@
Authentication Examples
=======================
MongoDB supports several different authentication mechanisms. These examples
cover all authentication methods currently supported by PyMongo, documenting
Python module and MongoDB version dependencies.
.. _percent escaped:
Percent-Escaping Username and Password
--------------------------------------
Username and password must be percent-escaped with
:py:func:`urllib.parse.quote_plus`, to be used in a MongoDB URI. For example::
>>> from pymongo import MongoClient
>>> import urllib.parse
>>> username = urllib.parse.quote_plus('user')
>>> username
'user'
>>> password = urllib.parse.quote_plus('pass/word')
>>> password
'pass%2Fword'
>>> MongoClient('mongodb://%s:%s@127.0.0.1' % (username, password))
...
.. _scram_sha_256:
SCRAM-SHA-256 (RFC 7677)
------------------------
.. versionadded:: 3.7
SCRAM-SHA-256 is the default authentication mechanism supported by a cluster
configured for authentication with MongoDB 4.0 or later. Authentication
requires a username, a password, and a database name. The default database
name is "admin", this can be overridden with the ``authSource`` option.
Credentials can be specified as arguments to
:class:`~pymongo.mongo_client.MongoClient`::
>>> from pymongo import MongoClient
>>> client = MongoClient('example.com',
... username='user',
... password='password',
... authSource='the_database',
... authMechanism='SCRAM-SHA-256')
Or through the MongoDB URI::
>>> uri = "mongodb://user:password@example.com/?authSource=the_database&authMechanism=SCRAM-SHA-256"
>>> client = MongoClient(uri)
SCRAM-SHA-1 (RFC 5802)
----------------------
.. versionadded:: 2.8
SCRAM-SHA-1 is the default authentication mechanism supported by a cluster
configured for authentication with MongoDB 3.0 or later. Authentication
requires a username, a password, and a database name. The default database
name is "admin", this can be overridden with the ``authSource`` option.
Credentials can be specified as arguments to
:class:`~pymongo.mongo_client.MongoClient`::
>>> from pymongo import MongoClient
>>> client = MongoClient('example.com',
... username='user',
... password='password',
... authSource='the_database',
... authMechanism='SCRAM-SHA-1')
Or through the MongoDB URI::
>>> uri = "mongodb://user:password@example.com/?authSource=the_database&authMechanism=SCRAM-SHA-1"
>>> client = MongoClient(uri)
For best performance on Python versions older than 2.7.8 install `backports.pbkdf2`_.
.. _backports.pbkdf2: https://pypi.python.org/pypi/backports.pbkdf2/
Default Authentication Mechanism
--------------------------------
If no mechanism is specified, PyMongo automatically negotiates the mechanism to use (SCRAM-SHA-1
or SCRAM-SHA-256) with the MongoDB server.
Default Database and "authSource"
---------------------------------
You can specify both a default database and the authentication database in the
URI::
>>> uri = "mongodb://user:password@example.com/default_db?authSource=admin"
>>> client = MongoClient(uri)
PyMongo will authenticate on the "admin" database, but the default database
will be "default_db"::
>>> # get_database with no "name" argument chooses the DB from the URI
>>> db = MongoClient(uri).get_database()
>>> print(db.name)
'default_db'
.. _mongodb_x509:
MONGODB-X509
------------
.. versionadded:: 2.6
The MONGODB-X509 mechanism authenticates via the X.509 certificate presented
by the driver during TLS/SSL negotiation. This authentication method requires
the use of TLS/SSL connections with certificate validation::
>>> from pymongo import MongoClient
>>> client = MongoClient('example.com',
... authMechanism="MONGODB-X509",
... tls=True,
... tlsCertificateKeyFile='/path/to/client.pem',
... tlsCAFile='/path/to/ca.pem')
MONGODB-X509 authenticates against the $external virtual database, so you
do not have to specify a database in the URI::
>>> uri = "mongodb://example.com/?authMechanism=MONGODB-X509"
>>> client = MongoClient(uri,
... tls=True,
... tlsCertificateKeyFile='/path/to/client.pem',
... tlsCAFile='/path/to/ca.pem')
>>>
.. _gssapi:
GSSAPI (Kerberos)
-----------------
.. versionadded:: 2.5
GSSAPI (Kerberos) authentication is available in the Enterprise Edition of
MongoDB.
Unix
~~~~
To authenticate using GSSAPI you must first install the python `kerberos`_ or
`pykerberos`_ module using pip. Make sure you run kinit before
using the following authentication methods::
$ kinit mongodbuser@EXAMPLE.COM
mongodbuser@EXAMPLE.COM's Password:
$ klist
Credentials cache: FILE:/tmp/krb5cc_1000
Principal: mongodbuser@EXAMPLE.COM
Issued Expires Principal
Feb 9 13:48:51 2013 Feb 9 23:48:51 2013 krbtgt/EXAMPLE.COM@EXAMPLE.COM
Now authenticate using the MongoDB URI. GSSAPI authenticates against the
$external virtual database so you do not have to specify a database in the
URI::
>>> # Note: the kerberos principal must be url encoded.
>>> from pymongo import MongoClient
>>> uri = "mongodb://mongodbuser%40EXAMPLE.COM@mongo-server.example.com/?authMechanism=GSSAPI"
>>> client = MongoClient(uri)
>>>
The default service name used by MongoDB and PyMongo is ``mongodb``. You can
specify a custom service name with the ``authMechanismProperties`` option::
>>> from pymongo import MongoClient
>>> uri = "mongodb://mongodbuser%40EXAMPLE.COM@mongo-server.example.com/?authMechanism=GSSAPI&authMechanismProperties=SERVICE_NAME:myservicename"
>>> client = MongoClient(uri)
Windows (SSPI)
~~~~~~~~~~~~~~
.. versionadded:: 3.3
First install the `winkerberos`_ module. Unlike authentication on Unix kinit is
not used. If the user to authenticate is different from the user that owns the
application process provide a password to authenticate::
>>> uri = "mongodb://mongodbuser%40EXAMPLE.COM:mongodbuserpassword@example.com/?authMechanism=GSSAPI"
Two extra ``authMechanismProperties`` are supported on Windows platforms:
- CANONICALIZE_HOST_NAME - Uses the fully qualified domain name (FQDN) of the
MongoDB host for the server principal (GSSAPI libraries on Unix do this by
default)::
>>> uri = "mongodb://mongodbuser%40EXAMPLE.COM@example.com/?authMechanism=GSSAPI&authMechanismProperties=CANONICALIZE_HOST_NAME:true"
- SERVICE_REALM - This is used when the user's realm is different from the service's realm::
>>> uri = "mongodb://mongodbuser%40EXAMPLE.COM@example.com/?authMechanism=GSSAPI&authMechanismProperties=SERVICE_REALM:otherrealm"
.. _kerberos: https://pypi.python.org/pypi/kerberos
.. _pykerberos: https://pypi.python.org/pypi/pykerberos
.. _winkerberos: https://pypi.python.org/pypi/winkerberos/
.. _sasl_plain:
SASL PLAIN (RFC 4616)
---------------------
.. versionadded:: 2.6
MongoDB Enterprise Edition version 2.6 and newer support the SASL PLAIN
authentication mechanism, initially intended for delegating authentication
to an LDAP server. These examples use the $external virtual database for LDAP support::
>>> from pymongo import MongoClient
>>> uri = "mongodb://user:password@example.com/?authMechanism=PLAIN"
>>> client = MongoClient(uri)
>>>
SASL PLAIN is a clear-text authentication mechanism. We **strongly** recommend
that you connect to MongoDB using TLS/SSL with certificate validation when
using the SASL PLAIN mechanism::
>>> from pymongo import MongoClient
>>> uri = "mongodb://user:password@example.com/?authMechanism=PLAIN"
>>> client = MongoClient(uri,
... tls=True,
... tlsCertificateKeyFile='/path/to/client.pem',
... tlsCAFile='/path/to/ca.pem')
>>>
.. _MONGODB-AWS:
MONGODB-AWS
-----------
.. versionadded:: 3.11
The MONGODB-AWS authentication mechanism is available in MongoDB 4.4+ and
requires extra pymongo dependencies. To use it, install pymongo with the
``aws`` extra::
$ python -m pip install 'pymongo[aws]'
The MONGODB-AWS mechanism authenticates using AWS IAM credentials (an access
key ID and a secret access key), `temporary AWS IAM credentials`_ obtained
from an `AWS Security Token Service (STS)`_ `Assume Role`_ request,
AWS Lambda `environment variables`_, or temporary AWS IAM credentials assigned
to an `EC2 instance`_ or ECS task. The use of temporary credentials, in
addition to an access key ID and a secret access key, also requires a
security (or session) token.
Credentials can be configured through the MongoDB URI, environment variables,
or the local EC2 or ECS endpoint. The order in which the client searches for
`credentials`_ is the same as the one used by the AWS ``boto3`` library
when using ``pymongo_auth_aws>=1.1.0``.
Because we are now using ``boto3`` to handle credentials, the order and
locations of credentials are slightly different from before. Particularly,
if you have a shared AWS credentials or config file,
then those credentials will be used by default if AWS auth environment
variables are not set. To override this behavior, set
``AWS_SHARED_CREDENTIALS_FILE=""`` in your shell or add
``os.environ["AWS_SHARED_CREDENTIALS_FILE"] = ""`` to your script or
application. Alternatively, you can create an AWS profile specifically for
your MongoDB credentials and set ``AWS_PROFILE`` to that profile name.
MONGODB-AWS authenticates against the "$external" virtual database, so none of
the URIs in this section need to include the ``authSource`` URI option.
.. _credentials: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html
AWS IAM credentials
~~~~~~~~~~~~~~~~~~~
Applications can authenticate using AWS IAM credentials by providing a valid
access key id and secret access key pair as the username and password,
respectively, in the MongoDB URI. A sample URI would be::
>>> from pymongo import MongoClient
>>> uri = "mongodb+srv://<access_key_id>:<secret_access_key>@example.mongodb.net/?authMechanism=MONGODB-AWS"
>>> client = MongoClient(uri)
.. note:: The access_key_id and secret_access_key passed into the URI MUST
be `percent escaped`_.
AssumeRole
~~~~~~~~~~
Applications can authenticate using temporary credentials returned from an
assume role request. These temporary credentials consist of an access key
ID, a secret access key, and a security token passed into the URI.
A sample URI would be::
>>> from pymongo import MongoClient
>>> uri = "mongodb+srv://<access_key_id>:<secret_access_key>@example.mongodb.net/?authMechanism=MONGODB-AWS&authMechanismProperties=AWS_SESSION_TOKEN:<session_token>"
>>> client = MongoClient(uri)
.. note:: The access_key_id, secret_access_key, and session_token passed into
the URI MUST be `percent escaped`_.
AWS Lambda (Environment Variables)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
When the username and password are not provided and the MONGODB-AWS mechanism
is set, the client will fallback to using the `environment variables`_
``AWS_ACCESS_KEY_ID``, ``AWS_SECRET_ACCESS_KEY``, and ``AWS_SESSION_TOKEN``
for the access key ID, secret access key, and session token, respectively::
$ export AWS_ACCESS_KEY_ID=<access_key_id>
$ export AWS_SECRET_ACCESS_KEY=<secret_access_key>
$ export AWS_SESSION_TOKEN=<session_token>
$ python
>>> from pymongo import MongoClient
>>> uri = "mongodb+srv://example.mongodb.net/?authMechanism=MONGODB-AWS"
>>> client = MongoClient(uri)
.. note:: No username, password, or session token is passed into the URI.
PyMongo will use credentials set via the environment variables.
These environment variables MUST NOT be `percent escaped`_.
.. _EKS Clusters:
EKS Clusters
~~~~~~~~~~~~
Applications using the `Authenticating users for your cluster from an OpenID Connect identity provider <https://docs.aws.amazon.com/eks/latest/userguide/authenticate-oidc-identity-provider.html>`_ capability on EKS can now
use the provided credentials, by giving the associated IAM User
`sts:AssumeRoleWithWebIdentity <https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html>`_
permission.
When the username and password are not provided, the MONGODB-AWS mechanism
is set, and ``AWS_WEB_IDENTITY_TOKEN_FILE``, ``AWS_ROLE_ARN``, and
optional ``AWS_ROLE_SESSION_NAME`` are available, the driver will use
an ``AssumeRoleWithWebIdentity`` call to retrieve temporary credentials.
The application must be using ``pymongo_auth_aws`` >= 1.1.0 for EKS support.
ECS Container
~~~~~~~~~~~~~
Applications can authenticate from an ECS container via temporary
credentials assigned to the machine. A sample URI on an ECS container
would be::
>>> from pymongo import MongoClient
>>> uri = "mongodb+srv://example.mongodb.com/?authMechanism=MONGODB-AWS"
>>> client = MongoClient(uri)
.. note:: No username, password, or session token is passed into the URI.
PyMongo will query the ECS container endpoint to obtain these
credentials.
EC2 Instance
~~~~~~~~~~~~
Applications can authenticate from an EC2 instance via temporary
credentials assigned to the machine. A sample URI on an EC2 machine
would be::
>>> from pymongo import MongoClient
>>> uri = "mongodb+srv://example.mongodb.com/?authMechanism=MONGODB-AWS"
>>> client = MongoClient(uri)
.. note:: No username, password, or session token is passed into the URI.
PyMongo will query the EC2 instance endpoint to obtain these
credentials.
.. _temporary AWS IAM credentials: https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_temp.html
.. _AWS Security Token Service (STS): https://docs.aws.amazon.com/STS/latest/APIReference/Welcome.html
.. _Assume Role: https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html
.. _EC2 instance: https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_use_switch-role-ec2.html
.. _environment variables: https://docs.aws.amazon.com/lambda/latest/dg/configuration-envvars.html#configuration-envvars-runtime
MONGODB-OIDC
------------
.. versionadded:: 4.7
The `MONGODB-OIDC authentication mechanism`_ is available in MongoDB 7.0+ on Linux platforms.
The MONGODB-OIDC mechanism authenticates using an OpenID Connect (OIDC) access token.
The driver supports OIDC for workload identity, defined as an identity you assign to a software workload
(such as an application, service, script, or container) to authenticate and access other services and resources.
Credentials can be configured through the MongoDB URI or as arguments to
:class:`~pymongo.mongo_client.MongoClient`.
Built-in Support
~~~~~~~~~~~~~~~~
The driver has built-in support for Azure IMDS and GCP IMDS environments. Other environments
are supported with `Custom Callbacks`_.
Azure IMDS
^^^^^^^^^^
For an application running on an Azure VM or otherwise using the `Azure Internal Metadata Service`_,
you can use the built-in support for Azure. If using an Azure managed identity, the "<client_id>" is
the client ID. If using a service principal to represent an enterprise application, the "<client_id>" is
the application ID of the service principal. The ``<audience>`` value is the ``audience``
`configured on your MongoDB deployment`_.
.. code-block:: python
import os
uri = os.environ["MONGODB_URI"]
props = {"ENVIRONMENT": "azure", "TOKEN_RESOURCE": "<audience>"}
c = MongoClient(
uri,
username="<client_id>",
authMechanism="MONGODB-OIDC",
authMechanismProperties=props,
)
c.test.test.insert_one({})
c.close()
If the application is running on an Azure VM and only one managed identity is associated with the
VM, ``username`` can be omitted.
If providing the ``TOKEN_RESOURCE`` as part of a connection string, it can be given as follows.
If the ``TOKEN_RESOURCE`` contains any of the following characters [``,``, ``+``, ``&``], then
it MUST be url-encoded.
.. code-block:: python
import os
uri = f'{os.environ["MONGODB_URI"]}?authMechanism=MONGODB-OIDC&authMechanismProperties=ENVIRONMENT:azure,TOKEN_RESOURCE:<audience>'
c = MongoClient(uri)
c.test.test.insert_one({})
c.close()
GCP IMDS
^^^^^^^^
For an application running on an GCP VM or otherwise using the `GCP Internal Metadata Service`_,
you can use the built-in support for GCP, where ``<audience>`` below is the ``audience``
`configured on your MongoDB deployment`_.
.. code-block:: python
import os
uri = os.environ["MONGODB_URI"]
props = {"ENVIRONMENT": "gcp", "TOKEN_RESOURCE": "<audience>"}
c = MongoClient(uri, authMechanism="MONGODB-OIDC", authMechanismProperties=props)
c.test.test.insert_one({})
c.close()
If providing the ``TOKEN_RESOURCE`` as part of a connection string, it can be given as follows.
If the ``TOKEN_RESOURCE`` contains any of the following characters [``,``, ``+``, ``&``], then
it MUST be url-encoded.
.. code-block:: python
import os
uri = f'{os.environ["MONGODB_URI"]}?authMechanism=MONGODB-OIDC&authMechanismProperties=ENVIRONMENT:gcp,TOKEN_RESOURCE:<audience>'
c = MongoClient(uri)
c.test.test.insert_one({})
c.close()
Custom Callbacks
~~~~~~~~~~~~~~~~
For environments that are not directly supported by the driver, you can use :class:`~pymongo.auth_oidc.OIDCCallback`.
Some examples are given below.
Other Azure Environments
^^^^^^^^^^^^^^^^^^^^^^^^
For applications running on Azure Functions, App Service Environment (ASE), or
Azure Kubernetes Service (AKS), you can use the `azure-identity package`_
to fetch the credentials. This example assumes you have set environment variables for
the ``audience`` `configured on your MongoDB deployment`_, and for the client id of the Azure
managed identity.
.. code-block:: python
import os
from azure.identity import DefaultAzureCredential
from pymongo import MongoClient
from pymongo.auth_oidc import OIDCCallback, OIDCCallbackContext, OIDCCallbackResult
audience = os.environ["AZURE_AUDIENCE"]
client_id = os.environ["AZURE_IDENTITY_CLIENT_ID"]
uri = os.environ["MONGODB_URI"]
class MyCallback(OIDCCallback):
def fetch(self, context: OIDCCallbackContext) -> OIDCCallbackResult:
credential = DefaultAzureCredential(managed_identity_client_id=client_id)
token = credential.get_token(f"{audience}/.default").token
return OIDCCallbackResult(access_token=token)
props = {"OIDC_CALLBACK": MyCallback()}
c = MongoClient(uri, authMechanism="MONGODB-OIDC", authMechanismProperties=props)
c.test.test.insert_one({})
c.close()
GCP GKE
^^^^^^^
For a Google Kubernetes Engine cluster with a `configured service account`_, the token can be read from the standard
service account token file location.
.. code-block:: python
import os
from pymongo.auth_oidc import OIDCCallback, OIDCCallbackContext, OIDCCallbackResult
class MyCallback(OIDCCallback):
def fetch(self, context: OIDCCallbackContext) -> OIDCCallbackResult:
with open("/var/run/secrets/kubernetes.io/serviceaccount/token") as fid:
token = fid.read()
return OIDCCallbackResult(access_token=token)
uri = os.environ["MONGODB_URI"]
props = {"OIDC_CALLBACK": MyCallback()}
c = MongoClient(uri, authMechanism="MONGODB-OIDC", authMechanismProperties=props)
c.test.test.insert_one({})
c.close()
.. _MONGODB-OIDC authentication mechanism: https://www.mongodb.com/docs/manual/core/security-oidc/
.. _Azure Internal Metadata Service: https://learn.microsoft.com/en-us/azure/virtual-machines/instance-metadata-service
.. _configured on your MongoDB deployment: https://www.mongodb.com/docs/manual/reference/parameters/#mongodb-parameter-param.oidcIdentityProviders
.. _GCP Internal Metadata Service: https://cloud.google.com/compute/docs/metadata/querying-metadata
.. _azure-identity package: https://pypi.org/project/azure-identity/
.. _configured service account: https://cloud.google.com/kubernetes-engine/docs/how-to/service-accounts

View File

@ -1,184 +0,0 @@
Bulk Write Operations
=====================
.. testsetup::
from pymongo import MongoClient
client = MongoClient()
client.drop_database("bulk_example")
This tutorial explains how to take advantage of PyMongo's bulk
write operation features. Executing write operations in batches
reduces the number of network round trips, increasing write
throughput.
Bulk Insert
-----------
.. versionadded:: 2.6
A batch of documents can be inserted by passing a list to the
:meth:`~pymongo.collection.Collection.insert_many` method. PyMongo
will automatically split the batch into smaller sub-batches based on
the maximum message size accepted by MongoDB, supporting very large
bulk insert operations.
.. doctest::
>>> import pymongo
>>> db = pymongo.MongoClient().bulk_example
>>> db.test.insert_many([{"i": i} for i in range(10000)]).inserted_ids
[...]
>>> db.test.count_documents({})
10000
Mixed Bulk Write Operations
---------------------------
.. versionadded:: 2.7
PyMongo also supports executing mixed bulk write operations. A batch
of insert, update, and remove operations can be executed together using
the bulk write operations API.
.. _ordered_bulk:
Ordered Bulk Write Operations
.............................
Ordered bulk write operations are batched and sent to the server in the
order provided for serial execution. The return value is an instance of
:class:`~pymongo.results.BulkWriteResult` describing the type and count
of operations performed.
.. doctest::
:options: +NORMALIZE_WHITESPACE
>>> from pprint import pprint
>>> from pymongo import InsertOne, DeleteMany, ReplaceOne, UpdateOne
>>> result = db.test.bulk_write(
... [
... DeleteMany({}), # Remove all documents from the previous example.
... InsertOne({"_id": 1}),
... InsertOne({"_id": 2}),
... InsertOne({"_id": 3}),
... UpdateOne({"_id": 1}, {"$set": {"foo": "bar"}}),
... UpdateOne({"_id": 4}, {"$inc": {"j": 1}}, upsert=True),
... ReplaceOne({"j": 1}, {"j": 2}),
... ]
... )
>>> pprint(result.bulk_api_result)
{'nInserted': 3,
'nMatched': 2,
'nModified': 2,
'nRemoved': 10000,
'nUpserted': 1,
'upserted': [{'_id': 4, 'index': 5}],
'writeConcernErrors': [],
'writeErrors': []}
The first write failure that occurs (e.g. duplicate key error) aborts the
remaining operations, and PyMongo raises
:class:`~pymongo.errors.BulkWriteError`. The :attr:`details` attribute of
the exception instance provides the execution results up until the failure
occurred and details about the failure - including the operation that caused
the failure.
.. doctest::
:options: +NORMALIZE_WHITESPACE
>>> from pymongo import InsertOne, DeleteOne, ReplaceOne
>>> from pymongo.errors import BulkWriteError
>>> requests = [
... ReplaceOne({"j": 2}, {"i": 5}),
... InsertOne({"_id": 4}), # Violates the unique key constraint on _id.
... DeleteOne({"i": 5}),
... ]
>>> try:
... db.test.bulk_write(requests)
... except BulkWriteError as bwe:
... pprint(bwe.details)
...
{'nInserted': 0,
'nMatched': 1,
'nModified': 1,
'nRemoved': 0,
'nUpserted': 0,
'upserted': [],
'writeConcernErrors': [],
'writeErrors': [{'code': 11000,
'errmsg': '...E11000...duplicate key error...',
'index': 1,...
'op': {'_id': 4}}]}
.. _unordered_bulk:
Unordered Bulk Write Operations
...............................
Unordered bulk write operations are batched and sent to the server in
**arbitrary order** where they may be executed in parallel. Any errors
that occur are reported after all operations are attempted.
In the next example the first and third operations fail due to the unique
constraint on _id. Since we are doing unordered execution the second
and fourth operations succeed.
.. doctest::
:options: +NORMALIZE_WHITESPACE
>>> requests = [
... InsertOne({"_id": 1}),
... DeleteOne({"_id": 2}),
... InsertOne({"_id": 3}),
... ReplaceOne({"_id": 4}, {"i": 1}),
... ]
>>> try:
... db.test.bulk_write(requests, ordered=False)
... except BulkWriteError as bwe:
... pprint(bwe.details)
...
{'nInserted': 0,
'nMatched': 1,
'nModified': 1,
'nRemoved': 1,
'nUpserted': 0,
'upserted': [],
'writeConcernErrors': [],
'writeErrors': [{'code': 11000,
'errmsg': '...E11000...duplicate key error...',
'index': 0,...
'op': {'_id': 1}},
{'code': 11000,
'errmsg': '...',
'index': 2,...
'op': {'_id': 3}}]}
Write Concern
.............
Bulk operations are executed with the
:attr:`~pymongo.collection.Collection.write_concern` of the collection they
are executed against. Write concern errors (e.g. wtimeout) will be reported
after all operations are attempted, regardless of execution order.
::
>>> from pymongo import WriteConcern
>>> coll = db.get_collection(
... 'test', write_concern=WriteConcern(w=3, wtimeout=1))
>>> try:
... coll.bulk_write([InsertOne({'a': i}) for i in range(4)])
... except BulkWriteError as bwe:
... pprint(bwe.details)
...
{'nInserted': 4,
'nMatched': 0,
'nModified': 0,
'nRemoved': 0,
'nUpserted': 0,
'upserted': [],
'writeConcernErrors': [{'code': 64...
'errInfo': {'wtimeout': True},
'errmsg': 'waiting for replication timed out'}],
'writeErrors': []}

View File

@ -1,192 +0,0 @@
Client Bulk Write Operations
=============================
.. testsetup::
from pymongo import MongoClient
client = MongoClient()
client.drop_database("client_bulk_example")
db = client.client_bulk_example
client.db.drop_collection("test_one")
client.db.drop_collection("test_two")
client.db.drop_collection("test_three")
client.db.drop_collection("test_four")
client.db.drop_collection("test_five")
client.db.drop_collection("test_six")
The :meth:`~pymongo.mongo_client.MongoClient.bulk_write`
method has been added to :class:`~pymongo.mongo_client.MongoClient` in PyMongo 4.9.
This method enables users to perform batches of write operations **across
multiple namespaces** in a minimized number of round trips, and
to receive detailed results for each operation performed.
.. note:: This method requires MongoDB server version 8.0+.
Basic Usage
------------
A list of insert, update, and delete operations can be passed into the
:meth:`~pymongo.mongo_client.MongoClient.bulk_write` method. Each request
must include the namespace on which to perform the operation.
PyMongo will automatically split the given requests into smaller sub-batches based on
the maximum message size accepted by MongoDB, supporting very large bulk write operations.
The return value is an instance of
:class:`~pymongo.results.ClientBulkWriteResult`.
.. _summary_client_bulk:
Summary Results
.................
By default, the returned :class:`~pymongo.results.ClientBulkWriteResult` instance will contain a
summary of the types of operations performed in the bulk write, along with their respective counts.
.. doctest::
:options: +NORMALIZE_WHITESPACE
:skipif: server_major_version < 8
>>> from pymongo import InsertOne, DeleteOne, UpdateOne
>>> models = [
... InsertOne(namespace="db.test_one", document={"_id": 1}),
... InsertOne(namespace="db.test_two", document={"_id": 2}),
... DeleteOne(namespace="db.test_one", filter={"_id": 1}),
... UpdateOne(
... namespace="db.test_two",
... filter={"_id": 4},
... update={"$inc": {"j": 1}},
... upsert=True,
... ),
... ]
>>> result = client.bulk_write(models)
>>> result.inserted_count
2
>>> result.deleted_count
1
>>> result.modified_count
0
>>> result.upserted_count
1
.. _verbose_client_bulk:
Verbose Results
.................
If the ``verbose_results`` parameter is set to True, the returned :class:`~pymongo.results.ClientBulkWriteResult`
instance will also include detailed results about each successful operation performed as part of the bulk write.
.. doctest::
:options: +NORMALIZE_WHITESPACE
:skipif: server_major_version < 8
>>> from pymongo import InsertOne, DeleteMany, ReplaceOne, UpdateMany
>>> models = [
... DeleteMany(
... namespace="db.test_two", filter={}
... ), # Delete all documents from the previous example
... InsertOne(namespace="db.test_one", document={"_id": 1}),
... InsertOne(namespace="db.test_one", document={"_id": 2}),
... InsertOne(namespace="db.test_two", document={"_id": 3}),
... UpdateMany(namespace="db.test_one", filter={}, update={"$set": {"foo": "bar"}}),
... ReplaceOne(
... namespace="db.test_two", filter={"j": 1}, replacement={"_id": 4}, upsert=True
... ),
... ]
>>> result = client.bulk_write(models, verbose_results=True)
>>> result.delete_results
{0: DeleteResult({'ok': 1.0, 'idx': 0, 'n': 2}, ...)}
>>> result.insert_results
{1: InsertOneResult(1, ...),
2: InsertOneResult(2, ...),
3: InsertOneResult(3, ...)}
>>> result.update_results
{4: UpdateResult({'ok': 1.0, 'idx': 4, 'n': 2, 'nModified': 2}, ...),
5: UpdateResult({'ok': 1.0, 'idx': 5, 'n': 1, 'nModified': 0, 'upserted': {'_id': 4}}, ...)}
Handling Errors
----------------
If any errors occur during the bulk write, a :class:`~pymongo.errors.ClientBulkWriteException` will be raised.
If a server, connection, or network error occurred, the ``error`` field of the exception will contain
that error.
Individual write errors or write concern errors get recorded in the ``write_errors`` and ``write_concern_errors`` fields of the exception.
The ``partial_result`` field gets populated with the results of any operations that were successfully completed before the exception was raised.
.. _ordered_client_bulk:
Ordered Operations
....................
In an ordered bulk write (the default), if an individual write fails, no further operations will get executed.
For example, a duplicate key error on the third operation below aborts the remaining two operations.
.. doctest::
:options: +NORMALIZE_WHITESPACE
:skipif: server_major_version < 8
>>> from pymongo import InsertOne, DeleteOne
>>> from pymongo.errors import ClientBulkWriteException
>>> models = [
... InsertOne(namespace="db.test_three", document={"_id": 3}),
... InsertOne(namespace="db.test_four", document={"_id": 4}),
... InsertOne(namespace="db.test_three", document={"_id": 3}), # Duplicate _id
... InsertOne(namespace="db.test_four", document={"_id": 5}),
... DeleteOne(namespace="db.test_three", filter={"_id": 3}),
... ]
>>> try:
... client.bulk_write(models)
... except ClientBulkWriteException as cbwe:
... exception = cbwe
...
>>> exception.write_errors
[{'ok': 0.0,
'idx': 2,
'code': 11000,
'errmsg': 'E11000 duplicate key error ... dup key: { _id: 3 }', ...
'op': {'insert': 0, 'document': {'_id': 3}}}]
>>> exception.partial_result.inserted_count
2
>>> exception.partial_result.deleted_count
0
.. _unordered_client_bulk:
Unordered Operations
.....................
If the ``ordered`` parameter is set to False, all operations in the bulk write will be attempted, regardless of any individual write errors that occur.
For example, the fourth and fifth write operations below get executed successfully, despite the duplicate key error on the third operation.
.. doctest::
:options: +NORMALIZE_WHITESPACE
:skipif: server_major_version < 8
>>> from pymongo import InsertOne, DeleteOne
>>> from pymongo.errors import ClientBulkWriteException
>>> models = [
... InsertOne(namespace="db.test_five", document={"_id": 5}),
... InsertOne(namespace="db.test_six", document={"_id": 6}),
... InsertOne(namespace="db.test_five", document={"_id": 5}), # Duplicate _id
... InsertOne(namespace="db.test_six", document={"_id": 7}),
... DeleteOne(namespace="db.test_five", filter={"_id": 5}),
... ]
>>> try:
... client.bulk_write(models, ordered=False)
... except ClientBulkWriteException as cbwe:
... exception = cbwe
...
>>> exception.write_errors
[{'ok': 0.0,
'idx': 2,
'code': 11000,
'errmsg': 'E11000 duplicate key error ... dup key: { _id: 5 }', ...
'op': {'insert': 0, 'document': {'_id': 5}}}]
>>> exception.partial_result.inserted_count
3
>>> exception.partial_result.deleted_count
1

View File

@ -1,134 +0,0 @@
Collations
==========
.. seealso:: The API docs for :mod:`~pymongo.collation`.
Collations are a new feature in MongoDB version 3.4. They provide a set of rules
to use when comparing strings that comply with the conventions of a particular
language, such as Spanish or German. If no collation is specified, the server
sorts strings based on a binary comparison. Many languages have specific
ordering rules, and collations allow users to build applications that adhere to
language-specific comparison rules.
In French, for example, the last accent in a given word determines the sorting
order. The correct sorting order for the following four words in French is::
cote < côte < coté < côté
Specifying a French collation allows users to sort string fields using the
French sort order.
Usage
-----
Users can specify a collation for a
:ref:`collection<collation-on-collection>`, an
:ref:`index<collation-on-index>`, or a
:ref:`CRUD command <collation-on-operation>`.
Collation Parameters:
~~~~~~~~~~~~~~~~~~~~~
Collations can be specified with the :class:`~pymongo.collation.Collation` model
or with plain Python dictionaries. The structure is the same::
Collation(locale=<string>,
caseLevel=<bool>,
caseFirst=<string>,
strength=<int>,
numericOrdering=<bool>,
alternate=<string>,
maxVariable=<string>,
backwards=<bool>)
The only required parameter is ``locale``, which the server parses as
an `ICU format locale ID <https://www.mongodb.com/docs/manual/reference/collation-locales-defaults/>`_.
For example, set ``locale`` to ``en_US`` to represent US English
or ``fr_CA`` to represent Canadian French.
For a complete description of the available parameters, see the MongoDB `manual
</>`_.
.. COMMENT add link for manual entry.
.. _collation-on-collection:
Assign a Default Collation to a Collection
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The following example demonstrates how to create a new collection called
``contacts`` and assign a default collation with the ``fr_CA`` locale. This
operation ensures that all queries that are run against the ``contacts``
collection use the ``fr_CA`` collation unless another collation is explicitly
specified::
from pymongo import MongoClient
from pymongo.collation import Collation
db = MongoClient().test
collection = db.create_collection('contacts',
collation=Collation(locale='fr_CA'))
.. _collation-on-index:
Assign a Default Collation to an Index
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
When creating a new index, you can specify a default collation.
The following example shows how to create an index on the ``name``
field of the ``contacts`` collection, with the ``unique`` parameter
enabled and a default collation with ``locale`` set to ``fr_CA``::
from pymongo import MongoClient
from pymongo.collation import Collation
contacts = MongoClient().test.contacts
contacts.create_index('name',
unique=True,
collation=Collation(locale='fr_CA'))
.. _collation-on-operation:
Specify a Collation for a Query
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Individual queries can specify a collation to use when sorting
results. The following example demonstrates a query that runs on the
``contacts`` collection in database ``test``. It matches on
documents that contain ``New York`` in the ``city`` field,
and sorts on the ``name`` field with the ``fr_CA`` collation::
from pymongo import MongoClient
from pymongo.collation import Collation
collection = MongoClient().test.contacts
docs = collection.find({'city': 'New York'}).sort('name').collation(
Collation(locale='fr_CA'))
Other Query Types
~~~~~~~~~~~~~~~~~
You can use collations to control document matching rules for several different
types of queries. All the various update and delete methods
(:meth:`~pymongo.collection.Collection.update_one`,
:meth:`~pymongo.collection.Collection.update_many`,
:meth:`~pymongo.collection.Collection.delete_one`, etc.) support collation, and
you can create query filters which employ collations to comply with any of the
languages and variants available to the ``locale`` parameter.
The following example uses a collation with ``strength`` set to
:const:`~pymongo.collation.CollationStrength.SECONDARY`, which considers only
the base character and character accents in string comparisons, but not case
sensitivity, for example. All documents in the ``contacts`` collection with
``jürgen`` (case-insensitive) in the ``first_name`` field are updated::
from pymongo import MongoClient
from pymongo.collation import Collation, CollationStrength
contacts = MongoClient().test.contacts
result = contacts.update_many(
{'first_name': 'jürgen'},
{'$set': {'verified': 1}},
collation=Collation(locale='de',
strength=CollationStrength.SECONDARY))

View File

@ -1,73 +0,0 @@
Copying a Database
==================
MongoDB >= 4.2
--------------
Starting in MongoDB version 4.2, the server removes the deprecated ``copydb`` command.
As an alternative, users can use ``mongodump`` and ``mongorestore`` (with the ``mongorestore``
options ``--nsFrom`` and ``--nsTo``).
For example, to copy the ``test`` database from a local instance running on the
default port 27017 to the ``examples`` database on the same instance, you can:
#. Use ``mongodump`` to dump the test database to an archive ``mongodump-test-db``::
mongodump --archive="mongodump-test-db" --db=test
#. Use ``mongorestore`` with ``--nsFrom`` and ``--nsTo`` to restore (with database name change)
from the archive::
mongorestore --archive="mongodump-test-db" --nsFrom='test.*' --nsTo='examples.*'
Include additional options as necessary, such as to specify the uri or host, username,
password and authentication database.
For more info about using ``mongodump`` and ``mongorestore`` see the `Copy a Database`_ example
in the official ``mongodump`` documentation.
MongoDB <= 4.0
--------------
When using MongoDB <= 4.0, it is possible to use the deprecated ``copydb`` command
to copy a database. To copy a database within a single ``mongod`` process, or
between ``mongod`` servers, connect to the target ``mongod`` and use the
:meth:`~pymongo.database.Database.command` method::
>>> from pymongo import MongoClient
>>> client = MongoClient('target.example.com')
>>> client.admin.command('copydb',
fromdb='source_db_name',
todb='target_db_name')
To copy from a different mongod server that is not password-protected::
>>> client.admin.command('copydb',
fromdb='source_db_name',
todb='target_db_name',
fromhost='source.example.com')
If the target server is password-protected, authenticate to the "admin"
database::
>>> client = MongoClient('target.example.com',
... username='administrator',
... password='pwd')
>>> client.admin.command('copydb',
fromdb='source_db_name',
todb='target_db_name',
fromhost='source.example.com')
See the :doc:`authentication examples </examples/authentication>`.
If the **source** server is password-protected, use the `copyDatabase
function in the mongo shell`_.
Versions of PyMongo before 3.0 included a ``copy_database`` helper method,
but it has been removed.
.. _copyDatabase function in the mongo shell:
https://mongodb.com/docs/manual/reference/method/db.copyDatabase/
.. _Copy a Database:
https://www.mongodb.com/docs/database-tools/mongodump/mongodump-examples/#copy-and-clone-databases

View File

@ -1,436 +0,0 @@
Custom Type Example
===================
This is an example of using a custom type with PyMongo. The example here shows
how to subclass :class:`~bson.codec_options.TypeCodec` to write a type
codec, which is used to populate a :class:`~bson.codec_options.TypeRegistry`.
The type registry can then be used to create a custom-type-aware
:class:`~pymongo.collection.Collection`. Read and write operations
issued against the resulting collection object transparently manipulate
documents as they are saved to or retrieved from MongoDB.
Setting Up
----------
We'll start by getting a clean database to use for the example:
.. doctest::
>>> from pymongo import MongoClient
>>> client = MongoClient()
>>> client.drop_database("custom_type_example")
>>> db = client.custom_type_example
Since the purpose of the example is to demonstrate working with custom types,
we'll need a custom data type to use. For this example, we will be working with
the :py:class:`~decimal.Decimal` type from Python's standard library. Since the
BSON library's :class:`~bson.decimal128.Decimal128` type (that implements
the IEEE 754 decimal128 decimal-based floating-point numbering format) is
distinct from Python's built-in :py:class:`~decimal.Decimal` type, attempting
to save an instance of ``Decimal`` with PyMongo, results in an
:exc:`~bson.errors.InvalidDocument` exception.
.. doctest::
>>> from decimal import Decimal
>>> num = Decimal("45.321")
>>> db.test.insert_one({"num": num})
Traceback (most recent call last):
...
bson.errors.InvalidDocument: cannot encode object: Decimal('45.321'), of type: <class 'decimal.Decimal'>
.. _custom-type-type-codec:
The :class:`~bson.codec_options.TypeCodec` Class
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
.. versionadded:: 3.8
In order to encode a custom type, we must first define a **type codec** for
that type. A type codec describes how an instance of a custom type can be
*transformed* to and/or from one of the types :mod:`~bson` already understands.
Depending on the desired functionality, users must choose from the following
base classes when defining type codecs:
* :class:`~bson.codec_options.TypeEncoder`: subclass this to define a codec that
encodes a custom Python type to a known BSON type. Users must implement the
``python_type`` property/attribute and the ``transform_python`` method.
* :class:`~bson.codec_options.TypeDecoder`: subclass this to define a codec that
decodes a specified BSON type into a custom Python type. Users must implement
the ``bson_type`` property/attribute and the ``transform_bson`` method.
* :class:`~bson.codec_options.TypeCodec`: subclass this to define a codec that
can both encode and decode a custom type. Users must implement the
``python_type`` and ``bson_type`` properties/attributes, as well as the
``transform_python`` and ``transform_bson`` methods.
The type codec for our custom type simply needs to define how a
:py:class:`~decimal.Decimal` instance can be converted into a
:class:`~bson.decimal128.Decimal128` instance and vice-versa. Since we are
interested in both encoding and decoding our custom type, we use the
``TypeCodec`` base class to define our codec:
.. doctest::
>>> from bson.decimal128 import Decimal128
>>> from bson.codec_options import TypeCodec
>>> class DecimalCodec(TypeCodec):
... python_type = Decimal # the Python type acted upon by this type codec
... bson_type = Decimal128 # the BSON type acted upon by this type codec
... def transform_python(self, value):
... """Function that transforms a custom type value into a type
... that BSON can encode."""
... return Decimal128(value)
... def transform_bson(self, value):
... """Function that transforms a vanilla BSON type value into our
... custom type."""
... return value.to_decimal()
...
>>> decimal_codec = DecimalCodec()
.. _custom-type-type-registry:
The :class:`~bson.codec_options.TypeRegistry` Class
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
.. versionadded:: 3.8
Before we can begin encoding and decoding our custom type objects, we must
first inform PyMongo about the corresponding codec. This is done by creating
a :class:`~bson.codec_options.TypeRegistry` instance:
.. doctest::
>>> from bson.codec_options import TypeRegistry
>>> type_registry = TypeRegistry([decimal_codec])
Note that type registries can be instantiated with any number of type codecs.
Once instantiated, registries are immutable and the only way to add codecs
to a registry is to create a new one.
Putting It Together
-------------------
Finally, we can define a :class:`~bson.codec_options.CodecOptions` instance
with our ``type_registry`` and use it to get a
:class:`~pymongo.collection.Collection` object that understands the
:py:class:`~decimal.Decimal` data type:
.. doctest::
>>> from bson.codec_options import CodecOptions
>>> codec_options = CodecOptions(type_registry=type_registry)
>>> collection = db.get_collection("test", codec_options=codec_options)
Now, we can seamlessly encode and decode instances of
:py:class:`~decimal.Decimal`:
.. doctest::
>>> collection.insert_one({"num": Decimal("45.321")})
InsertOneResult(ObjectId('...'), acknowledged=True)
>>> mydoc = collection.find_one()
>>> import pprint
>>> pprint.pprint(mydoc)
{'_id': ObjectId('...'), 'num': Decimal('45.321')}
We can see what's actually being saved to the database by creating a fresh
collection object without the customized codec options and using that to query
MongoDB:
.. doctest::
>>> vanilla_collection = db.get_collection("test")
>>> pprint.pprint(vanilla_collection.find_one())
{'_id': ObjectId('...'), 'num': Decimal128('45.321')}
Encoding Subtypes
^^^^^^^^^^^^^^^^^
Consider the situation where, in addition to encoding
:py:class:`~decimal.Decimal`, we also need to encode a type that subclasses
``Decimal``. PyMongo does this automatically for types that inherit from
Python types that are BSON-encodable by default, but the type codec system
described above does not offer the same flexibility.
Consider this subtype of ``Decimal`` that has a method to return its value as
an integer:
.. doctest::
>>> class DecimalInt(Decimal):
... def my_method(self):
... """Method implementing some custom logic."""
... return int(self)
...
If we try to save an instance of this type without first registering a type
codec for it, we get an error:
.. doctest::
>>> collection.insert_one({"num": DecimalInt("45.321")})
Traceback (most recent call last):
...
bson.errors.InvalidDocument: cannot encode object: Decimal('45.321'), of type: <class 'decimal.Decimal'>
In order to proceed further, we must define a type codec for ``DecimalInt``.
This is trivial to do since the same transformation as the one used for
``Decimal`` is adequate for encoding ``DecimalInt`` as well:
.. doctest::
>>> class DecimalIntCodec(DecimalCodec):
... @property
... def python_type(self):
... """The Python type acted upon by this type codec."""
... return DecimalInt
...
>>> decimalint_codec = DecimalIntCodec()
.. note::
No attempt is made to modify decoding behavior because without additional
information, it is impossible to discern which incoming
:class:`~bson.decimal128.Decimal128` value needs to be decoded as ``Decimal``
and which needs to be decoded as ``DecimalInt``. This example only considers
the situation where a user wants to *encode* documents containing either
of these types.
After creating a new codec options object and using it to get a collection
object, we can seamlessly encode instances of ``DecimalInt``:
.. doctest::
>>> type_registry = TypeRegistry([decimal_codec, decimalint_codec])
>>> codec_options = CodecOptions(type_registry=type_registry)
>>> collection = db.get_collection("test", codec_options=codec_options)
>>> collection.drop()
>>> collection.insert_one({"num": DecimalInt("45.321")})
InsertOneResult(ObjectId('...'), acknowledged=True)
>>> mydoc = collection.find_one()
>>> pprint.pprint(mydoc)
{'_id': ObjectId('...'), 'num': Decimal('45.321')}
Note that the ``transform_bson`` method of the base codec class results in
these values being decoded as ``Decimal`` (and not ``DecimalInt``).
.. _decoding-binary-types:
Decoding :class:`~bson.binary.Binary` Types
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The decoding treatment of :class:`~bson.binary.Binary` types having
``subtype = 0`` by the :mod:`bson` module varies slightly depending on the
version of the Python runtime in use. This must be taken into account while
writing a ``TypeDecoder`` that modifies how this datatype is decoded.
On Python 3.x, :class:`~bson.binary.Binary` data (``subtype = 0``) is decoded
as a ``bytes`` instance:
.. code-block:: pycon
>>> # On Python 3.x.
>>> from bson.binary import Binary
>>> newcoll = db.get_collection("new")
>>> newcoll.insert_one({"_id": 1, "data": Binary(b"123", subtype=0)})
>>> doc = newcoll.find_one()
>>> type(doc["data"])
bytes
On Python 2.7.x, the same data is decoded as a :class:`~bson.binary.Binary`
instance:
.. code-block:: pycon
>>> # On Python 2.7.x
>>> newcoll = db.get_collection("new")
>>> doc = newcoll.find_one()
>>> type(doc["data"])
bson.binary.Binary
As a consequence of this disparity, users must set the ``bson_type`` attribute
on their :class:`~bson.codec_options.TypeDecoder` classes differently,
depending on the python version in use.
.. note::
For codebases requiring compatibility with both Python 2 and 3, type
decoders will have to be registered for both possible ``bson_type`` values.
.. _fallback-encoder-callable:
The ``fallback_encoder`` Callable
---------------------------------
.. versionadded:: 3.8
In addition to type codecs, users can also register a callable to encode types
that BSON doesn't recognize and for which no type codec has been registered.
This callable is the **fallback encoder** and like the ``transform_python``
method, it accepts an unencodable value as a parameter and returns a
BSON-encodable value. The following fallback encoder encodes python's
:py:class:`~decimal.Decimal` type to a :class:`~bson.decimal128.Decimal128`:
.. doctest::
>>> def fallback_encoder(value):
... if isinstance(value, Decimal):
... return Decimal128(value)
... return value
...
After declaring the callback, we must create a type registry and codec options
with this fallback encoder before it can be used for initializing a collection:
.. doctest::
>>> type_registry = TypeRegistry(fallback_encoder=fallback_encoder)
>>> codec_options = CodecOptions(type_registry=type_registry)
>>> collection = db.get_collection("test", codec_options=codec_options)
>>> collection.drop()
We can now seamlessly encode instances of :py:class:`~decimal.Decimal`:
.. doctest::
>>> collection.insert_one({"num": Decimal("45.321")})
InsertOneResult(ObjectId('...'), acknowledged=True)
>>> mydoc = collection.find_one()
>>> pprint.pprint(mydoc)
{'_id': ObjectId('...'), 'num': Decimal128('45.321')}
.. note::
Fallback encoders are invoked *after* attempts to encode the given value
with standard BSON encoders and any configured type encoders have failed.
Therefore, in a type registry configured with a type encoder and fallback
encoder that both target the same custom type, the behavior specified in
the type encoder will prevail.
Because fallback encoders don't need to declare the types that they encode
beforehand, they can be used to support interesting use-cases that cannot be
serviced by ``TypeEncoder``. One such use-case is described in the next
section.
Encoding Unknown Types
^^^^^^^^^^^^^^^^^^^^^^
In this example, we demonstrate how a fallback encoder can be used to save
arbitrary objects to the database. We will use the the standard library's
:py:mod:`pickle` module to serialize the unknown types and so naturally, this
approach only works for types that are picklable.
We start by defining some arbitrary custom types:
.. code-block:: python
class MyStringType(object):
def __init__(self, value):
self.__value = value
def __repr__(self):
return "MyStringType('%s')" % (self.__value,)
class MyNumberType(object):
def __init__(self, value):
self.__value = value
def __repr__(self):
return "MyNumberType(%s)" % (self.__value,)
We also define a fallback encoder that pickles whatever objects it receives
and returns them as :class:`~bson.binary.Binary` instances with a custom
subtype. The custom subtype, in turn, allows us to write a TypeDecoder that
identifies pickled artifacts upon retrieval and transparently decodes them
back into Python objects:
.. code-block:: python
import pickle
from bson.binary import Binary, USER_DEFINED_SUBTYPE
def fallback_pickle_encoder(value):
return Binary(pickle.dumps(value), USER_DEFINED_SUBTYPE)
class PickledBinaryDecoder(TypeDecoder):
bson_type = Binary
def transform_bson(self, value):
if value.subtype == USER_DEFINED_SUBTYPE:
return pickle.loads(value)
return value
.. note::
The above example is written assuming the use of Python 3. If you are using
Python 2, ``bson_type`` must be set to ``Binary``. See the
:ref:`decoding-binary-types` section for a detailed explanation.
Finally, we create a ``CodecOptions`` instance:
.. code-block:: python
codec_options = CodecOptions(
type_registry=TypeRegistry(
[PickledBinaryDecoder()], fallback_encoder=fallback_pickle_encoder
)
)
We can now round trip our custom objects to MongoDB:
.. code-block:: python
collection = db.get_collection("test_fe", codec_options=codec_options)
collection.insert_one(
{"_id": 1, "str": MyStringType("hello world"), "num": MyNumberType(2)}
)
mydoc = collection.find_one()
assert isinstance(mydoc["str"], MyStringType)
assert isinstance(mydoc["num"], MyNumberType)
Limitations
-----------
PyMongo's type codec and fallback encoder features have the following
limitations:
#. Users cannot customize the encoding behavior of Python types that PyMongo
already understands like ``int`` and ``str`` (the 'built-in types').
Attempting to instantiate a type registry with one or more codecs that act
upon a built-in type results in a ``TypeError``. This limitation extends
to all subtypes of the standard types.
#. Chaining type encoders is not supported. A custom type value, once
transformed by a codec's ``transform_python`` method, *must* result in a
type that is either BSON-encodable by default, or can be
transformed by the fallback encoder into something BSON-encodable--it
*cannot* be transformed a second time by a different type codec.
#. The :meth:`~pymongo.database.Database.command` method does not apply the
user's TypeDecoders while decoding the command response document.
#. :mod:`gridfs` does not apply custom type encoding or decoding to any
documents received from or to returned to the user.

View File

@ -1,177 +0,0 @@
Datetimes and Timezones
=======================
.. testsetup::
import datetime
from pymongo import MongoClient
from bson.codec_options import CodecOptions
client = MongoClient()
client.drop_database("dt_example")
db = client.dt_example
These examples show how to handle Python :class:`datetime.datetime` objects
correctly in PyMongo.
Basic Usage
-----------
PyMongo uses :class:`datetime.datetime` objects for representing dates and times
in MongoDB documents. Because MongoDB assumes that dates and times are in UTC,
care should be taken to ensure that dates and times written to the database
reflect UTC. For example, the following code stores the current UTC date and
time into MongoDB:
.. doctest::
>>> result = db.objects.insert_one(
... {"last_modified": datetime.datetime.now(tz=datetime.timezone.utc)}
... )
Always use :meth:`datetime.datetime.now(tz=datetime.timezone.utc)`, which explicitly returns the current time in
UTC, instead of :meth:`datetime.datetime.now`, with no arguments, which returns the current local
time. Avoid doing this:
.. doctest::
>>> result = db.objects.insert_one({"last_modified": datetime.datetime.now()})
The value for ``last_modified`` is very different between these two examples, even
though both documents were stored at around the same local time. This will be
confusing to the application that reads them:
.. doctest::
>>> [doc["last_modified"] for doc in db.objects.find()] # doctest: +SKIP
[datetime.datetime(2015, 7, 8, 18, 17, 28, 324000),
datetime.datetime(2015, 7, 8, 11, 17, 42, 911000)]
:class:`bson.codec_options.CodecOptions` has a ``tz_aware`` option that enables
"aware" :class:`datetime.datetime` objects, i.e., datetimes that know what
timezone they're in. By default, PyMongo retrieves naive datetimes:
.. doctest::
>>> result = db.tzdemo.insert_one({"date": datetime.datetime(2002, 10, 27, 6, 0, 0)})
>>> db.tzdemo.find_one()["date"]
datetime.datetime(2002, 10, 27, 6, 0)
>>> options = CodecOptions(tz_aware=True)
>>> db.get_collection("tzdemo", codec_options=options).find_one()["date"] # doctest: +SKIP
datetime.datetime(2002, 10, 27, 6, 0,
tzinfo=<bson.tz_util.FixedOffset object at 0x10583a050>)
Saving Datetimes with Timezones
-------------------------------
When storing :class:`datetime.datetime` objects that specify a timezone
(i.e. they have a ``tzinfo`` property that isn't ``None``), PyMongo will convert
those datetimes to UTC automatically:
.. doctest::
>>> from zoneinfo import ZoneInfo
>>> from datetime import datetime
>>> aware_datetime = datetime(2002, 10, 27, 6, 0, 0, tzinfo=ZoneInfo("US/Pacific"))
>>> result = db.times.insert_one({"date": aware_datetime})
>>> db.times.find_one()["date"]
datetime.datetime(2002, 10, 27, 14, 0)
Reading Time
------------
As previously mentioned, by default all :class:`datetime.datetime` objects
returned by PyMongo will be naive but reflect UTC (i.e. the time as stored in
MongoDB). By setting the ``tz_aware`` option on
:class:`~bson.codec_options.CodecOptions`, :class:`datetime.datetime` objects
will be timezone-aware and have a ``tzinfo`` property that reflects the UTC
timezone.
PyMongo 3.1 introduced a ``tzinfo`` property that can be set on
:class:`~bson.codec_options.CodecOptions` to convert :class:`datetime.datetime`
objects to local time automatically. For example, if we wanted to read all times
out of MongoDB in US/Pacific time:
>>> from bson.codec_options import CodecOptions
>>> db.times.find_one()['date']
datetime.datetime(2002, 10, 27, 14, 0)
>>> aware_times = db.times.with_options(codec_options=CodecOptions(
... tz_aware=True,
... tzinfo=ZoneInfo("US/Pacific")))
>>> result = aware_times.find_one()['date']
datetime.datetime(2002, 10, 27, 6, 0, # doctest: +NORMALIZE_WHITESPACE
tzinfo=<DstTzInfo 'US/Pacific' PST-1 day, 16:00:00 STD>)
.. _handling-out-of-range-datetimes:
Handling out of range datetimes
-------------------------------
Python's :class:`~datetime.datetime` can only represent datetimes within the
range allowed by
:attr:`~datetime.datetime.min` and :attr:`~datetime.datetime.max`, whereas
the range of datetimes allowed in BSON can represent any 64-bit number
of milliseconds from the Unix epoch. To deal with this, we can use the
:class:`bson.datetime_ms.DatetimeMS` object, which is a wrapper for the
:class:`int` built-in.
To decode UTC datetime values as :class:`~bson.datetime_ms.DatetimeMS`,
:class:`~bson.codec_options.CodecOptions` should have its
``datetime_conversion`` parameter set to one of the options available in
:class:`bson.datetime_ms.DatetimeConversion`. These include
:attr:`~bson.datetime_ms.DatetimeConversion.DATETIME`,
:attr:`~bson.datetime_ms.DatetimeConversion.DATETIME_MS`,
:attr:`~bson.datetime_ms.DatetimeConversion.DATETIME_AUTO`,
:attr:`~bson.datetime_ms.DatetimeConversion.DATETIME_CLAMP`.
:attr:`~bson.datetime_ms.DatetimeConversion.DATETIME` is the default
option and has the behavior of raising an :class:`~builtin.OverflowError` upon
attempting to decode an out-of-range date.
:attr:`~bson.datetime_ms.DatetimeConversion.DATETIME_MS` will only return
:class:`~bson.datetime_ms.DatetimeMS` objects, regardless of whether the
represented datetime is in- or out-of-range:
.. doctest::
>>> from datetime import datetime
>>> from bson import encode, decode
>>> from bson.datetime_ms import DatetimeMS
>>> from bson.codec_options import CodecOptions, DatetimeConversion
>>> x = encode({"x": datetime(1970, 1, 1)})
>>> codec_ms = CodecOptions(datetime_conversion=DatetimeConversion.DATETIME_MS)
>>> decode(x, codec_options=codec_ms)
{'x': DatetimeMS(0)}
:attr:`~bson.datetime_ms.DatetimeConversion.DATETIME_AUTO` will return
:class:`~datetime.datetime` if the underlying UTC datetime is within range,
or :class:`~bson.datetime_ms.DatetimeMS` if the underlying datetime
cannot be represented using the builtin Python :class:`~datetime.datetime`:
.. doctest::
>>> x = encode({"x": datetime(1970, 1, 1)})
>>> y = encode({"x": DatetimeMS(-(2**62))})
>>> codec_auto = CodecOptions(datetime_conversion=DatetimeConversion.DATETIME_AUTO)
>>> decode(x, codec_options=codec_auto)
{'x': datetime.datetime(1970, 1, 1, 0, 0)}
>>> decode(y, codec_options=codec_auto)
{'x': DatetimeMS(-4611686018427387904)}
:attr:`~bson.datetime_ms.DatetimeConversion.DATETIME_CLAMP` will clamp
resulting :class:`~datetime.datetime` objects to be within
:attr:`~datetime.datetime.min` and :attr:`~datetime.datetime.max`
(trimmed to ``999000`` microseconds):
.. doctest::
>>> x = encode({"x": DatetimeMS(2**62)})
>>> y = encode({"x": DatetimeMS(-(2**62))})
>>> codec_clamp = CodecOptions(datetime_conversion=DatetimeConversion.DATETIME_CLAMP)
>>> decode(x, codec_options=codec_clamp)
{'x': datetime.datetime(9999, 12, 31, 23, 59, 59, 999000)}
>>> decode(y, codec_options=codec_clamp)
{'x': datetime.datetime(1, 1, 1, 0, 0)}
:class:`~bson.datetime_ms.DatetimeMS` objects have support for rich comparison
methods against other instances of :class:`~bson.datetime_ms.DatetimeMS`.
They can also be converted to :class:`~datetime.datetime` objects with
:meth:`~bson.datetime_ms.DatetimeMS.to_datetime()`.

View File

@ -1,840 +0,0 @@
.. _In-Use Encryption:
In-Use Encryption
=================
.. _Client-Side Field Level Encryption:
Client-Side Field Level Encryption
----------------------------------
New in MongoDB 4.2, client-side field level encryption allows an application
to encrypt specific data fields in addition to pre-existing MongoDB
encryption features such as `Encryption at Rest
<https://dochub.mongodb.org/core/security-encryption-at-rest>`_ and
`TLS/SSL (Transport Encryption)
<https://dochub.mongodb.org/core/security-tls-transport-encryption>`_.
With field level encryption, applications can encrypt fields in documents
*prior* to transmitting data over the wire to the server. Client-side field
level encryption supports workloads where applications must guarantee that
unauthorized parties, including server administrators, cannot read the
encrypted data.
.. seealso:: The MongoDB documentation on `Client Side Field Level Encryption <https://dochub.mongodb.org/core/client-side-field-level-encryption>`_.
Dependencies
~~~~~~~~~~~~
To get started using client-side field level encryption in your project,
you will need to install the
`pymongocrypt <https://pypi.org/project/pymongocrypt/>`_ and
`pymongo-auth-aws <https://pypi.org/project/pymongo-auth-aws/>`_ libraries
as well as the driver itself. Install both the driver and a compatible
version of the dependencies like this::
$ python -m pip install 'pymongo[encryption]'
Note that installing on Linux requires pip 19 or later for manylinux2010 wheel
support. For more information about installing pymongocrypt see
`the installation instructions on the project's PyPI page
<https://pypi.org/project/pymongocrypt/>`_.
Additionally, either `crypt_shared`_ or `mongocryptd`_ are required in order
to use *automatic* client-side encryption.
crypt_shared
````````````
The Automatic Encryption Shared Library (crypt_shared) provides the same
functionality as `mongocryptd`_, but does not require you to spawn another
process to perform automatic encryption.
By default, pymongo attempts to load crypt_shared from the system and if
found uses it automatically. To load crypt_shared from another location,
use the ``crypt_shared_lib_path`` argument to
:class:`~pymongo.encryption_options.AutoEncryptionOpts`.
If pymongo cannot load crypt_shared it will attempt to fallback to using
`mongocryptd`_ by default. Set ``crypt_shared_lib_required=True`` to make
the app always use crypt_shared and fail if it could not be loaded.
For detailed installation instructions see
`the MongoDB documentation on Automatic Encryption Shared Library
<https://www.mongodb.com/docs/manual/core/queryable-encryption/reference/shared-library>`_.
mongocryptd
```````````
The ``mongocryptd`` binary is required for automatic client-side encryption
and is included as a component in the `MongoDB Enterprise Server package
<https://dochub.mongodb.org/core/install-mongodb-enterprise>`_.
For detailed installation instructions see
`the MongoDB documentation on mongocryptd
<https://dochub.mongodb.org/core/client-side-field-level-encryption-mongocryptd>`_.
``mongocryptd`` performs the following:
- Parses the automatic encryption rules specified to the database connection.
If the JSON schema contains invalid automatic encryption syntax or any
document validation syntax, ``mongocryptd`` returns an error.
- Uses the specified automatic encryption rules to mark fields in read and
write operations for encryption.
- Rejects read/write operations that may return unexpected or incorrect results
when applied to an encrypted field. For supported and unsupported operations,
see `Read/Write Support with Automatic Field Level Encryption
<https://dochub.mongodb.org/core/client-side-field-level-encryption-read-write-support>`_.
A MongoClient configured with auto encryption will automatically spawn the
``mongocryptd`` process from the application's ``PATH``. Applications can
control the spawning behavior as part of the automatic encryption options.
For example to set the path to the ``mongocryptd`` process::
auto_encryption_opts = AutoEncryptionOpts(
...,
mongocryptd_spawn_path='/path/to/mongocryptd')
To control the logging output of ``mongocryptd`` pass options using
``mongocryptd_spawn_args``::
auto_encryption_opts = AutoEncryptionOpts(
...,
mongocryptd_spawn_args=['--logpath=/path/to/mongocryptd.log', '--logappend'])
If your application wishes to manage the ``mongocryptd`` process manually,
it is possible to disable spawning ``mongocryptd``::
auto_encryption_opts = AutoEncryptionOpts(
...,
mongocryptd_bypass_spawn=True,
# URI of the local ``mongocryptd`` process.
mongocryptd_uri='mongodb://localhost:27020')
``mongocryptd`` is only responsible for supporting automatic client-side field
level encryption and does not itself perform any encryption or decryption.
.. _automatic-client-side-encryption:
Automatic Client-Side Field Level Encryption
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Automatic client-side field level encryption is enabled by creating a
:class:`~pymongo.mongo_client.MongoClient` with the ``auto_encryption_opts``
option set to an instance of
:class:`~pymongo.encryption_options.AutoEncryptionOpts`. The following
examples show how to setup automatic client-side field level encryption
using :class:`~pymongo.encryption.ClientEncryption` to create a new
encryption data key.
.. note:: Automatic client-side field level encryption requires MongoDB >=4.2
enterprise or a MongoDB >=4.2 Atlas cluster. The community version of the
server supports automatic decryption as well as
:ref:`explicit-client-side-encryption`.
Providing Local Automatic Encryption Rules
``````````````````````````````````````````
The following example shows how to specify automatic encryption rules via the
``schema_map`` option. The automatic encryption rules are expressed using a
`strict subset of the JSON Schema syntax
<https://dochub.mongodb.org/core/client-side-field-level-encryption-automatic-encryption-rules>`_.
Supplying a ``schema_map`` provides more security than relying on
JSON Schemas obtained from the server. It protects against a
malicious server advertising a false JSON Schema, which could trick
the client into sending unencrypted data that should be encrypted.
JSON Schemas supplied in the ``schema_map`` only apply to configuring
automatic client-side field level encryption. Other validation
rules in the JSON schema will not be enforced by the driver and
will result in an error.
.. code-block:: python
import os
from bson.codec_options import CodecOptions
from bson import json_util
from pymongo import MongoClient
from pymongo.encryption import Algorithm, ClientEncryption
from pymongo.encryption_options import AutoEncryptionOpts
def create_json_schema_file(kms_providers, key_vault_namespace, key_vault_client):
client_encryption = ClientEncryption(
kms_providers,
key_vault_namespace,
key_vault_client,
# The CodecOptions class used for encrypting and decrypting.
# This should be the same CodecOptions instance you have configured
# on MongoClient, Database, or Collection. We will not be calling
# encrypt() or decrypt() in this example so we can use any
# CodecOptions.
CodecOptions(),
)
# Create a new data key and json schema for the encryptedField.
# https://dochub.mongodb.org/core/client-side-field-level-encryption-automatic-encryption-rules
data_key_id = client_encryption.create_data_key(
"local", key_alt_names=["pymongo_encryption_example_1"]
)
schema = {
"properties": {
"encryptedField": {
"encrypt": {
"keyId": [data_key_id],
"bsonType": "string",
"algorithm": Algorithm.AEAD_AES_256_CBC_HMAC_SHA_512_Deterministic,
}
}
},
"bsonType": "object",
}
# Use CANONICAL_JSON_OPTIONS so that other drivers and tools will be
# able to parse the MongoDB extended JSON file.
json_schema_string = json_util.dumps(
schema, json_options=json_util.CANONICAL_JSON_OPTIONS
)
with open("jsonSchema.json", "w") as file:
file.write(json_schema_string)
def main():
# The MongoDB namespace (db.collection) used to store the
# encrypted documents in this example.
encrypted_namespace = "test.coll"
# This must be the same master key that was used to create
# the encryption key.
local_master_key = os.urandom(96)
kms_providers = {"local": {"key": local_master_key}}
# The MongoDB namespace (db.collection) used to store
# the encryption data keys.
key_vault_namespace = "encryption.__pymongoTestKeyVault"
key_vault_db_name, key_vault_coll_name = key_vault_namespace.split(".", 1)
# The MongoClient used to access the key vault (key_vault_namespace).
key_vault_client = MongoClient()
key_vault = key_vault_client[key_vault_db_name][key_vault_coll_name]
# Ensure that two data keys cannot share the same keyAltName.
key_vault.drop()
key_vault.create_index(
"keyAltNames",
unique=True,
partialFilterExpression={"keyAltNames": {"$exists": True}},
)
create_json_schema_file(kms_providers, key_vault_namespace, key_vault_client)
# Load the JSON Schema and construct the local schema_map option.
with open("jsonSchema.json", "r") as file:
json_schema_string = file.read()
json_schema = json_util.loads(json_schema_string)
schema_map = {encrypted_namespace: json_schema}
auto_encryption_opts = AutoEncryptionOpts(
kms_providers, key_vault_namespace, schema_map=schema_map
)
client = MongoClient(auto_encryption_opts=auto_encryption_opts)
db_name, coll_name = encrypted_namespace.split(".", 1)
coll = client[db_name][coll_name]
# Clear old data
coll.drop()
coll.insert_one({"encryptedField": "123456789"})
print("Decrypted document: %s" % (coll.find_one(),))
unencrypted_coll = MongoClient()[db_name][coll_name]
print("Encrypted document: %s" % (unencrypted_coll.find_one(),))
if __name__ == "__main__":
main()
Server-Side Field Level Encryption Enforcement
``````````````````````````````````````````````
MongoDB >=4.2 servers supports using schema validation to enforce encryption
of specific fields in a collection. This schema validation will prevent an
application from inserting unencrypted values for any fields marked with the
``"encrypt"`` JSON schema keyword.
The following example shows how to setup automatic client-side field level
encryption using
:class:`~pymongo.encryption.ClientEncryption` to create a new encryption
data key and create a collection with the
`Automatic Encryption JSON Schema Syntax
<https://dochub.mongodb.org/core/client-side-field-level-encryption-automatic-encryption-rules>`_:
.. code-block:: python
import os
from bson.codec_options import CodecOptions
from bson.binary import STANDARD
from pymongo import MongoClient
from pymongo.encryption import Algorithm, ClientEncryption
from pymongo.encryption_options import AutoEncryptionOpts
from pymongo.errors import OperationFailure
from pymongo.write_concern import WriteConcern
def main():
# The MongoDB namespace (db.collection) used to store the
# encrypted documents in this example.
encrypted_namespace = "test.coll"
# This must be the same master key that was used to create
# the encryption key.
local_master_key = os.urandom(96)
kms_providers = {"local": {"key": local_master_key}}
# The MongoDB namespace (db.collection) used to store
# the encryption data keys.
key_vault_namespace = "encryption.__pymongoTestKeyVault"
key_vault_db_name, key_vault_coll_name = key_vault_namespace.split(".", 1)
# The MongoClient used to access the key vault (key_vault_namespace).
key_vault_client = MongoClient()
key_vault = key_vault_client[key_vault_db_name][key_vault_coll_name]
# Ensure that two data keys cannot share the same keyAltName.
key_vault.drop()
key_vault.create_index(
"keyAltNames",
unique=True,
partialFilterExpression={"keyAltNames": {"$exists": True}},
)
client_encryption = ClientEncryption(
kms_providers,
key_vault_namespace,
key_vault_client,
# The CodecOptions class used for encrypting and decrypting.
# This should be the same CodecOptions instance you have configured
# on MongoClient, Database, or Collection. We will not be calling
# encrypt() or decrypt() in this example so we can use any
# CodecOptions.
CodecOptions(),
)
# Create a new data key and json schema for the encryptedField.
data_key_id = client_encryption.create_data_key(
"local", key_alt_names=["pymongo_encryption_example_2"]
)
json_schema = {
"properties": {
"encryptedField": {
"encrypt": {
"keyId": [data_key_id],
"bsonType": "string",
"algorithm": Algorithm.AEAD_AES_256_CBC_HMAC_SHA_512_Deterministic,
}
}
},
"bsonType": "object",
}
auto_encryption_opts = AutoEncryptionOpts(kms_providers, key_vault_namespace)
client = MongoClient(auto_encryption_opts=auto_encryption_opts)
db_name, coll_name = encrypted_namespace.split(".", 1)
db = client[db_name]
# Clear old data
db.drop_collection(coll_name)
# Create the collection with the encryption JSON Schema.
db.create_collection(
coll_name,
# uuid_representation=STANDARD is required to ensure that any
# UUIDs in the $jsonSchema document are encoded to BSON Binary
# with the standard UUID subtype 4. This is only needed when
# running the "create" collection command with an encryption
# JSON Schema.
codec_options=CodecOptions(uuid_representation=STANDARD),
write_concern=WriteConcern(w="majority"),
validator={"$jsonSchema": json_schema},
)
coll = client[db_name][coll_name]
coll.insert_one({"encryptedField": "123456789"})
print("Decrypted document: %s" % (coll.find_one(),))
unencrypted_coll = MongoClient()[db_name][coll_name]
print("Encrypted document: %s" % (unencrypted_coll.find_one(),))
try:
unencrypted_coll.insert_one({"encryptedField": "123456789"})
except OperationFailure as exc:
print("Unencrypted insert failed: %s" % (exc.details,))
if __name__ == "__main__":
main()
.. _explicit-client-side-encryption:
Explicit Encryption
~~~~~~~~~~~~~~~~~~~
Explicit encryption is a MongoDB community feature and does not use the
``mongocryptd`` process. Explicit encryption is provided by the
:class:`~pymongo.encryption.ClientEncryption` class, for example:
.. code-block:: python
import os
from pymongo import MongoClient
from pymongo.encryption import Algorithm, ClientEncryption
def main():
# This must be the same master key that was used to create
# the encryption key.
local_master_key = os.urandom(96)
kms_providers = {"local": {"key": local_master_key}}
# The MongoDB namespace (db.collection) used to store
# the encryption data keys.
key_vault_namespace = "encryption.__pymongoTestKeyVault"
key_vault_db_name, key_vault_coll_name = key_vault_namespace.split(".", 1)
# The MongoClient used to read/write application data.
client = MongoClient()
coll = client.test.coll
# Clear old data
coll.drop()
# Set up the key vault (key_vault_namespace) for this example.
key_vault = client[key_vault_db_name][key_vault_coll_name]
# Ensure that two data keys cannot share the same keyAltName.
key_vault.drop()
key_vault.create_index(
"keyAltNames",
unique=True,
partialFilterExpression={"keyAltNames": {"$exists": True}},
)
client_encryption = ClientEncryption(
kms_providers,
key_vault_namespace,
# The MongoClient to use for reading/writing to the key vault.
# This can be the same MongoClient used by the main application.
client,
# The CodecOptions class used for encrypting and decrypting.
# This should be the same CodecOptions instance you have configured
# on MongoClient, Database, or Collection.
coll.codec_options,
)
# Create a new data key for the encryptedField.
data_key_id = client_encryption.create_data_key(
"local", key_alt_names=["pymongo_encryption_example_3"]
)
# Explicitly encrypt a field:
encrypted_field = client_encryption.encrypt(
"123456789",
Algorithm.AEAD_AES_256_CBC_HMAC_SHA_512_Deterministic,
key_id=data_key_id,
)
coll.insert_one({"encryptedField": encrypted_field})
doc = coll.find_one()
print("Encrypted document: %s" % (doc,))
# Explicitly decrypt the field:
doc["encryptedField"] = client_encryption.decrypt(doc["encryptedField"])
print("Decrypted document: %s" % (doc,))
# Cleanup resources.
client_encryption.close()
client.close()
if __name__ == "__main__":
main()
Explicit Encryption with Automatic Decryption
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Although automatic encryption requires MongoDB >=4.2 enterprise or a
MongoDB >=4.2 Atlas cluster, automatic *decryption* is supported for all users.
To configure automatic *decryption* without automatic *encryption* set
``bypass_auto_encryption=True`` in
:class:`~pymongo.encryption_options.AutoEncryptionOpts`:
.. code-block:: python
import os
from pymongo import MongoClient
from pymongo.encryption import Algorithm, ClientEncryption
from pymongo.encryption_options import AutoEncryptionOpts
def main():
# This must be the same master key that was used to create
# the encryption key.
local_master_key = os.urandom(96)
kms_providers = {"local": {"key": local_master_key}}
# The MongoDB namespace (db.collection) used to store
# the encryption data keys.
key_vault_namespace = "encryption.__pymongoTestKeyVault"
key_vault_db_name, key_vault_coll_name = key_vault_namespace.split(".", 1)
# bypass_auto_encryption=True disable automatic encryption but keeps
# the automatic _decryption_ behavior. bypass_auto_encryption will
# also disable spawning mongocryptd.
auto_encryption_opts = AutoEncryptionOpts(
kms_providers, key_vault_namespace, bypass_auto_encryption=True
)
client = MongoClient(auto_encryption_opts=auto_encryption_opts)
coll = client.test.coll
# Clear old data
coll.drop()
# Set up the key vault (key_vault_namespace) for this example.
key_vault = client[key_vault_db_name][key_vault_coll_name]
# Ensure that two data keys cannot share the same keyAltName.
key_vault.drop()
key_vault.create_index(
"keyAltNames",
unique=True,
partialFilterExpression={"keyAltNames": {"$exists": True}},
)
client_encryption = ClientEncryption(
kms_providers,
key_vault_namespace,
# The MongoClient to use for reading/writing to the key vault.
# This can be the same MongoClient used by the main application.
client,
# The CodecOptions class used for encrypting and decrypting.
# This should be the same CodecOptions instance you have configured
# on MongoClient, Database, or Collection.
coll.codec_options,
)
# Create a new data key for the encryptedField.
data_key_id = client_encryption.create_data_key(
"local", key_alt_names=["pymongo_encryption_example_4"]
)
# Explicitly encrypt a field:
encrypted_field = client_encryption.encrypt(
"123456789",
Algorithm.AEAD_AES_256_CBC_HMAC_SHA_512_Deterministic,
key_alt_name="pymongo_encryption_example_4",
)
coll.insert_one({"encryptedField": encrypted_field})
# Automatically decrypts any encrypted fields.
doc = coll.find_one()
print("Decrypted document: %s" % (doc,))
unencrypted_coll = MongoClient().test.coll
print("Encrypted document: %s" % (unencrypted_coll.find_one(),))
# Cleanup resources.
client_encryption.close()
client.close()
if __name__ == "__main__":
main()
.. _CSFLE on-demand credentials:
CSFLE on-demand credentials
~~~~~~~~~~~~~~~~~~~~~~~~~~~
``pymongocrypt`` 1.4 adds support for fetching on-demand KMS credentials for
AWS, GCP, and Azure cloud environments.
To enable the driver's behavior to obtain credentials from the environment, add the appropriate key ("aws", "gcp", or "azure") with an empty map to
"kms_providers" in either :class:`~pymongo.encryption_options.AutoEncryptionOpts` or :class:`~pymongo.encryption.ClientEncryption` options.
An application using AWS credentials would look like:
.. code-block:: python
from pymongo import MongoClient
from pymongo.encryption import ClientEncryption
client = MongoClient()
client_encryption = ClientEncryption(
# The empty dictionary enables on-demand credentials.
kms_providers={"aws": {}},
key_vault_namespace="keyvault.datakeys",
key_vault_client=client,
codec_options=client.codec_options,
)
master_key = {
"region": "us-east-1",
"key": ("arn:aws:kms:us-east-1:123456789:key/89fcc2c4-08b0-4bd9-9f25-e30687b580d0"),
}
client_encryption.create_data_key("aws", master_key)
The above will enable the same behavior of obtaining AWS credentials from the environment as is used for :ref:`MONGODB-AWS` authentication, including the
caching to avoid rate limiting.
An application using GCP credentials would look like:
.. code-block:: python
from pymongo import MongoClient
from pymongo.encryption import ClientEncryption
client = MongoClient()
client_encryption = ClientEncryption(
# The empty dictionary enables on-demand credentials.
kms_providers={"gcp": {}},
key_vault_namespace="keyvault.datakeys",
key_vault_client=client,
codec_options=client.codec_options,
)
master_key = {
"projectId": "my-project",
"location": "global",
"keyRing": "key-ring-csfle",
"keyName": "key-name-csfle",
}
client_encryption.create_data_key("gcp", master_key)
The driver will query the `VM instance metadata <https://cloud.google.com/compute/docs/metadata/querying-metadata>`_ to obtain credentials.
An application using Azure credentials would look like, this time using
:class:`~pymongo.encryption_options.AutoEncryptionOpts`:
.. code-block:: python
from pymongo import MongoClient
from pymongo.encryption_options import AutoEncryptionOpts
# The empty dictionary enables on-demand credentials.
kms_providers = {"azure": {}}
key_vault_namespace = "keyvault.datakeys"
auto_encryption_opts = AutoEncryptionOpts(kms_providers, key_vault_namespace)
client = MongoClient(auto_encryption_opts=auto_encryption_opts)
coll = client.test.coll
coll.insert_one({"encryptedField": "123456789"})
The driver will `acquire an access token <https://learn.microsoft.com/en-us/azure/active-directory/managed-identities-azure-resources/how-to-use-vm-token>`_ from the Azure VM.
.. _Queryable Encryption:
Queryable Encryption
--------------------
.. _automatic-queryable-client-side-encryption:
Automatic Queryable Encryption
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Automatic Queryable Encryption requires MongoDB 7.0+ Enterprise or a MongoDB 7.0+ Atlas cluster.
Queryable Encryption is the second version of Client-Side Field Level Encryption.
Data is encrypted client-side. Queryable Encryption supports indexed encrypted fields,
which are further processed server-side.
Automatic encryption in Queryable Encryption is configured with an ``encrypted_fields`` mapping,
as demonstrated by the following example:
.. code-block:: python
import os
from bson.codec_options import CodecOptions
from pymongo import MongoClient
from pymongo.encryption import ClientEncryption
from pymongo.encryption_options import AutoEncryptionOpts
local_master_key = os.urandom(96)
kms_providers = {"local": {"key": local_master_key}}
key_vault_namespace = "keyvault.datakeys"
key_vault_client = MongoClient()
client_encryption = ClientEncryption(
kms_providers, key_vault_namespace, key_vault_client, CodecOptions()
)
key_vault = key_vault_client["keyvault"]["datakeys"]
key_vault.drop()
# Ensure that two data keys cannot share the same keyAltName.
key_vault.create_index(
"keyAltNames",
unique=True,
partialFilterExpression={"keyAltNames": {"$exists": True}},
)
key1_id = client_encryption.create_data_key("local", key_alt_names=["firstName"])
key2_id = client_encryption.create_data_key("local", key_alt_names=["lastName"])
encrypted_fields_map = {
"default.encryptedCollection": {
"fields": [
{
"path": "firstName",
"bsonType": "string",
"keyId": key1_id,
"queries": [{"queryType": "equality"}],
},
{
"path": "lastName",
"bsonType": "string",
"keyId": key2_id,
},
],
}
}
auto_encryption_opts = AutoEncryptionOpts(
kms_providers,
key_vault_namespace,
encrypted_fields_map=encrypted_fields_map,
)
client = MongoClient(auto_encryption_opts=auto_encryption_opts)
client.default.drop_collection("encryptedCollection")
coll = client.default.create_collection("encryptedCollection")
coll.insert_one({"_id": 1, "firstName": "Jane", "lastName": "Doe"})
docs = list(coll.find({"firstName": "Jane"}))
print(docs)
In the above example, the ``firstName`` and ``lastName`` fields are
automatically encrypted and decrypted.
Explicit Queryable Encryption
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Explicit Queryable Encryption requires MongoDB 7.0+.
Queryable Encryption is the second version of Client-Side Field Level Encryption.
Data is encrypted client-side. Queryable Encryption supports indexed encrypted fields,
which are further processed server-side.
Explicit encryption in Queryable Encryption is performed using the ``encrypt`` and ``decrypt``
methods. Automatic encryption (to allow the ``find_one`` to automatically decrypt) is configured
using an ``encrypted_fields`` mapping, as demonstrated by the following example:
.. code-block:: python
import os
from pymongo import MongoClient
from pymongo.encryption import (
Algorithm,
AutoEncryptionOpts,
ClientEncryption,
QueryType,
)
def main():
# This must be the same master key that was used to create
# the encryption key.
local_master_key = os.urandom(96)
kms_providers = {"local": {"key": local_master_key}}
# The MongoDB namespace (db.collection) used to store
# the encryption data keys.
key_vault_namespace = "encryption.__pymongoTestKeyVault"
key_vault_db_name, key_vault_coll_name = key_vault_namespace.split(".", 1)
# Set up the key vault (key_vault_namespace) for this example.
client = MongoClient()
key_vault = client[key_vault_db_name][key_vault_coll_name]
# Ensure that two data keys cannot share the same keyAltName.
key_vault.drop()
key_vault.create_index(
"keyAltNames",
unique=True,
partialFilterExpression={"keyAltNames": {"$exists": True}},
)
client_encryption = ClientEncryption(
kms_providers,
key_vault_namespace,
# The MongoClient to use for reading/writing to the key vault.
# This can be the same MongoClient used by the main application.
client,
# The CodecOptions class used for encrypting and decrypting.
# This should be the same CodecOptions instance you have configured
# on MongoClient, Database, or Collection.
client.codec_options,
)
# Create a new data key for the encryptedField.
indexed_key_id = client_encryption.create_data_key("local")
unindexed_key_id = client_encryption.create_data_key("local")
encrypted_fields = {
"fields": [
{
"keyId": indexed_key_id,
"path": "encryptedIndexed",
"bsonType": "string",
"queries": {"queryType": "equality"},
},
{
"keyId": unindexed_key_id,
"path": "encryptedUnindexed",
"bsonType": "string",
},
],
}
opts = AutoEncryptionOpts(
{"local": {"key": local_master_key}},
key_vault.full_name,
bypass_query_analysis=True,
key_vault_client=client,
)
# The MongoClient used to read/write application data.
encrypted_client = MongoClient(auto_encryption_opts=opts)
encrypted_client.drop_database("test")
db = encrypted_client.test
# Create the collection with encrypted fields.
coll = db.create_collection("coll", encryptedFields=encrypted_fields)
# Create and encrypt an indexed and unindexed value.
val = "encrypted indexed value"
unindexed_val = "encrypted unindexed value"
insert_payload_indexed = client_encryption.encrypt(
val, Algorithm.INDEXED, indexed_key_id, contention_factor=1
)
insert_payload_unindexed = client_encryption.encrypt(
unindexed_val, Algorithm.UNINDEXED, unindexed_key_id
)
# Insert the payloads.
coll.insert_one(
{
"encryptedIndexed": insert_payload_indexed,
"encryptedUnindexed": insert_payload_unindexed,
}
)
# Encrypt our find payload using QueryType.EQUALITY.
# The value of "indexed_key_id" must be the same as used to encrypt
# the values above.
find_payload = client_encryption.encrypt(
val,
Algorithm.INDEXED,
indexed_key_id,
query_type=QueryType.EQUALITY,
contention_factor=1,
)
# Find the document we inserted using the encrypted payload.
# The returned document is automatically decrypted.
doc = coll.find_one({"encryptedIndexed": find_payload})
print("Returned document: %s" % (doc,))
# Cleanup resources.
client_encryption.close()
encrypted_client.close()
client.close()
if __name__ == "__main__":
main()

View File

@ -1,109 +0,0 @@
Geospatial Indexing Example
===========================
.. testsetup::
from pymongo import MongoClient
client = MongoClient()
client.drop_database("geo_example")
This example shows how to create and use a :data:`~pymongo.GEO2D`
index in PyMongo. To create a spherical (earth-like) geospatial index use :data:`~pymongo.GEOSPHERE` instead.
.. seealso:: The MongoDB documentation on `Geospatial Indexes <https://dochub.mongodb.org/core/geo>`_.
Creating a Geospatial Index
---------------------------
Creating a geospatial index in pymongo is easy:
.. doctest::
>>> from pymongo import MongoClient, GEO2D
>>> db = MongoClient().geo_example
>>> db.places.create_index([("loc", GEO2D)])
'loc_2d'
Inserting Places
----------------
Locations in MongoDB are represented using either embedded documents
or lists where the first two elements are coordinates. Here, we'll
insert a couple of example locations:
.. doctest::
>>> result = db.places.insert_many(
... [{"loc": [2, 5]}, {"loc": [30, 5]}, {"loc": [1, 2]}, {"loc": [4, 4]}]
... )
>>> result.inserted_ids
[ObjectId('...'), ObjectId('...'), ObjectId('...'), ObjectId('...')]
.. note:: If specifying latitude and longitude coordinates in :data:`~pymongo.GEOSPHERE`, list the **longitude** first and then **latitude**.
Querying
--------
Using the geospatial index we can find documents near another point:
.. doctest::
>>> import pprint
>>> for doc in db.places.find({"loc": {"$near": [3, 6]}}).limit(3):
... pprint.pprint(doc)
...
{'_id': ObjectId('...'), 'loc': [2, 5]}
{'_id': ObjectId('...'), 'loc': [4, 4]}
{'_id': ObjectId('...'), 'loc': [1, 2]}
.. note:: If using :data:`pymongo.GEOSPHERE`, using $nearSphere is recommended.
The $maxDistance operator requires the use of :class:`~bson.son.SON`:
.. doctest::
>>> from bson.son import SON
>>> query = {"loc": SON([("$near", [3, 6]), ("$maxDistance", 100)])}
>>> for doc in db.places.find(query).limit(3):
... pprint.pprint(doc)
...
{'_id': ObjectId('...'), 'loc': [2, 5]}
{'_id': ObjectId('...'), 'loc': [4, 4]}
{'_id': ObjectId('...'), 'loc': [1, 2]}
It's also possible to query for all items within a given rectangle
(specified by lower-left and upper-right coordinates):
.. doctest::
>>> query = {"loc": {"$within": {"$box": [[2, 2], [5, 6]]}}}
>>> for doc in db.places.find(query).sort("_id"):
... pprint.pprint(doc)
...
{'_id': ObjectId('...'), 'loc': [2, 5]}
{'_id': ObjectId('...'), 'loc': [4, 4]}
Or circle (specified by center point and radius):
.. doctest::
>>> query = {"loc": {"$within": {"$center": [[0, 0], 6]}}}
>>> for doc in db.places.find(query).sort("_id"):
... pprint.pprint(doc)
...
{'_id': ObjectId('...'), 'loc': [2, 5]}
{'_id': ObjectId('...'), 'loc': [1, 2]}
{'_id': ObjectId('...'), 'loc': [4, 4]}
geoNear queries are also supported using :class:`~bson.son.SON`::
>>> from bson.son import SON
>>> db.command(SON([('geoNear', 'places'), ('near', [1, 2])]))
{'ok': 1.0, 'stats': ...}
.. warning:: Starting in MongoDB version 4.0, MongoDB deprecates the **geoNear** command. Use one of the following operations instead.
* $geoNear - aggregation stage.
* $near - query operator.
* $nearSphere - query operator.

View File

@ -1,52 +0,0 @@
Gevent
======
PyMongo supports `Gevent <https://www.gevent.org/>`_. Simply call Gevent's
``monkey.patch_all()`` before loading any other modules:
.. code-block:: pycon
>>> # You must call patch_all() *before* importing any other modules
>>> from gevent import monkey
>>> _ = monkey.patch_all()
>>> from pymongo import MongoClient
>>> client = MongoClient()
PyMongo uses thread and socket functions from the Python standard library.
Gevent's monkey-patching replaces those standard functions so that PyMongo
does asynchronous I/O with non-blocking sockets, and schedules operations
on greenlets instead of threads.
Avoid blocking in Hub.join
--------------------------
By default, PyMongo uses threads to discover and monitor your servers' topology
(see :ref:`health-monitoring`). If you execute ``monkey.patch_all()`` when
your application first begins, PyMongo automatically uses greenlets instead
of threads.
When shutting down, if your application calls :meth:`~gevent.hub.Hub.join` on
Gevent's :class:`~gevent.hub.Hub` without first terminating these background
greenlets, the call to :meth:`~gevent.hub.Hub.join` blocks indefinitely. You
therefore **must close or dereference** any active
:class:`~pymongo.mongo_client.MongoClient` before exiting.
An example solution to this issue in some application frameworks is a signal
handler to end background greenlets when your application receives SIGHUP:
.. code-block:: python
import signal
def graceful_reload(signum, traceback):
"""Explicitly close some global MongoClient object."""
client.close()
signal.signal(signal.SIGHUP, graceful_reload)
Applications using uWSGI prior to 1.9.16 are affected by this issue,
or newer uWSGI versions with the ``-gevent-wait-for-hub`` option.
See `the uWSGI changelog for details
<https://uwsgi-docs.readthedocs.io/en/latest/Changelog-1.9.16.html#important-change-in-the-gevent-plugin-shutdown-reload-procedure>`_.

View File

@ -1,84 +0,0 @@
GridFS Example
==============
.. testsetup::
from pymongo import MongoClient
client = MongoClient()
client.drop_database("gridfs_example")
This example shows how to use :mod:`gridfs` to store large binary
objects (e.g. files) in MongoDB.
.. seealso:: The API docs for :mod:`gridfs`.
.. seealso:: `This blog post
<https://dirolf.com/2010/03/29/new-gridfs-implementation-for-pymongo.html>`_
for some motivation behind this API.
Setup
-----
We start by creating a :class:`~gridfs.GridFS` instance to use:
.. doctest::
>>> from pymongo import MongoClient
>>> import gridfs
>>>
>>> db = MongoClient().gridfs_example
>>> fs = gridfs.GridFS(db)
Every :class:`~gridfs.GridFS` instance is created with and will
operate on a specific :class:`~pymongo.database.Database` instance.
Saving and Retrieving Data
--------------------------
The simplest way to work with :mod:`gridfs` is to use its key/value
interface (the :meth:`~gridfs.GridFS.put` and
:meth:`~gridfs.GridFS.get` methods). To write data to GridFS, use
:meth:`~gridfs.GridFS.put`:
.. doctest::
>>> a = fs.put(b"hello world")
:meth:`~gridfs.GridFS.put` creates a new file in GridFS, and returns
the value of the file document's ``"_id"`` key. Given that ``"_id"``
we can use :meth:`~gridfs.GridFS.get` to get back the contents of the
file:
.. doctest::
>>> fs.get(a).read()
b'hello world'
:meth:`~gridfs.GridFS.get` returns a file-like object, so we get the
file's contents by calling :meth:`~gridfs.grid_file.GridOut.read`.
In addition to putting a :class:`str` as a GridFS file, we can also
put any file-like object (an object with a :meth:`read`
method). GridFS will handle reading the file in chunk-sized segments
automatically. We can also add additional attributes to the file as
keyword arguments:
.. doctest::
>>> b = fs.put(fs.get(a), filename="foo", bar="baz")
>>> out = fs.get(b)
>>> out.read()
b'hello world'
>>> out.filename
'foo'
>>> out.bar
'baz'
>>> out.upload_date
datetime.datetime(...)
The attributes we set in :meth:`~gridfs.GridFS.put` are stored in the
file document, and retrievable after calling
:meth:`~gridfs.GridFS.get`. Some attributes (like ``"filename"``) are
special and are defined in the GridFS specification - see that
document for more details.

View File

@ -1,367 +0,0 @@
High Availability and PyMongo
=============================
PyMongo makes it easy to write highly available applications whether
you use a `single replica set <https://dochub.mongodb.org/core/rs>`_
or a `large sharded cluster
<https://www.mongodb.com/docs/manual/sharding/>`_.
Connecting to a Replica Set
---------------------------
PyMongo makes working with `replica sets
<https://dochub.mongodb.org/core/rs>`_ easy. Here we'll launch a new
replica set and show how to handle both initialization and normal
connections with PyMongo.
.. seealso:: The MongoDB documentation on `replication <https://dochub.mongodb.org/core/rs>`_.
Starting a Replica Set
~~~~~~~~~~~~~~~~~~~~~~
The main `replica set documentation
<https://dochub.mongodb.org/core/rs>`_ contains extensive information
about setting up a new replica set or migrating an existing MongoDB
setup, be sure to check that out. Here, we'll just do the bare minimum
to get a three node replica set setup locally.
.. warning:: Replica sets should always use multiple nodes in
production - putting all set members on the same physical node is
only recommended for testing and development.
We start three ``mongod`` processes, each on a different port and with
a different dbpath, but all using the same replica set name "foo".
.. code-block:: bash
$ mkdir -p /data/db0 /data/db1 /data/db2
$ mongod --port 27017 --dbpath /data/db0 --replSet foo
.. code-block:: bash
$ mongod --port 27018 --dbpath /data/db1 --replSet foo
.. code-block:: bash
$ mongod --port 27019 --dbpath /data/db2 --replSet foo
Initializing the Set
~~~~~~~~~~~~~~~~~~~~
At this point all of our nodes are up and running, but the set has yet
to be initialized. Until the set is initialized no node will become
the primary, and things are essentially "offline".
To initialize the set we need to connect directly to a single node and run the
initiate command using the ``directConnection`` option::
>>> from pymongo import MongoClient
>>> c = MongoClient('localhost', 27017, directConnection=True)
.. note:: We could have connected to any of the other nodes instead,
but only the node we initiate from is allowed to contain any
initial data.
After connecting, we run the initiate command to get things started::
>>> config = {'_id': 'foo', 'members': [
... {'_id': 0, 'host': 'localhost:27017'},
... {'_id': 1, 'host': 'localhost:27018'},
... {'_id': 2, 'host': 'localhost:27019'}]}
>>> c.admin.command("replSetInitiate", config)
{'ok': 1.0, ...}
The three ``mongod`` servers we started earlier will now coordinate
and come online as a replica set.
Connecting to a Replica Set
~~~~~~~~~~~~~~~~~~~~~~~~~~~
The initial connection as made above is a special case for an
uninitialized replica set. Normally we'll want to connect
differently. A connection to a replica set can be made using the
:meth:`~pymongo.mongo_client.MongoClient` constructor, specifying
one or more members of the set and optionally the replica set name.
Any of the following connects to the replica set we just created::
>>> MongoClient('localhost')
MongoClient(host=['localhost:27017'], ...)
>>> MongoClient('localhost', replicaset='foo')
MongoClient(host=['localhost:27017'], replicaset='foo', ...)
>>> MongoClient('localhost:27018', replicaset='foo')
MongoClient(['localhost:27018'], replicaset='foo', ...)
>>> MongoClient('localhost', 27019, replicaset='foo')
MongoClient(['localhost:27019'], replicaset='foo', ...)
>>> MongoClient('mongodb://localhost:27017,localhost:27018/')
MongoClient(['localhost:27017', 'localhost:27018'], ...)
>>> MongoClient('mongodb://localhost:27017,localhost:27018/?replicaSet=foo')
MongoClient(['localhost:27017', 'localhost:27018'], replicaset='foo', ...)
The addresses passed to :meth:`~pymongo.mongo_client.MongoClient` are called
the *seeds*. As long as at least one of the seeds is online, MongoClient
discovers all the members in the replica set, and determines which is the
current primary and which are secondaries or arbiters. Each seed must be the
address of a single mongod. Multihomed and round robin DNS addresses are
**not** supported.
The :class:`~pymongo.mongo_client.MongoClient` constructor is non-blocking:
the constructor returns immediately while the client connects to the replica
set using background threads. Note how, if you create a client and immediately
print the string representation of its
:attr:`~pymongo.mongo_client.MongoClient.nodes` attribute, the list may be
empty initially. If you wait a moment, MongoClient discovers the whole replica
set::
>>> from time import sleep
>>> c = MongoClient(replicaset='foo'); print(c.nodes); sleep(0.1); print(c.nodes)
frozenset([])
frozenset([('localhost', 27019), ('localhost', 27017), ('localhost', 27018)])
You need not wait for replica set discovery in your application, however.
If you need to do any operation with a MongoClient, such as a
:meth:`~pymongo.collection.Collection.find` or an
:meth:`~pymongo.collection.Collection.insert_one`, the client waits to discover
a suitable member before it attempts the operation.
Handling Failover
~~~~~~~~~~~~~~~~~
When a failover occurs, PyMongo will automatically attempt to find the
new primary node and perform subsequent operations on that node. This
can't happen completely transparently, however. Here we'll perform an
example failover to illustrate how everything behaves. First, we'll
connect to the replica set and perform a couple of basic operations::
>>> db = MongoClient("localhost", replicaSet='foo').test
>>> db.test.insert_one({"x": 1}).inserted_id
ObjectId('...')
>>> db.test.find_one()
{'x': 1, '_id': ObjectId('...')}
By checking the host and port, we can see that we're connected to
*localhost:27017*, which is the current primary::
>>> db.client.address
('localhost', 27017)
Now let's bring down that node and see what happens when we run our
query again::
>>> db.test.find_one()
Traceback (most recent call last):
pymongo.errors.AutoReconnect: ...
We get an :class:`~pymongo.errors.AutoReconnect` exception. This means
that the driver was not able to connect to the old primary (which
makes sense, as we killed the server), but that it will attempt to
automatically reconnect on subsequent operations. When this exception
is raised our application code needs to decide whether to retry the
operation or to simply continue, accepting the fact that the operation
might have failed.
On subsequent attempts to run the query we might continue to see this
exception. Eventually, however, the replica set will failover and
elect a new primary (this should take no more than a couple of seconds in
general). At that point the driver will connect to the new primary and
the operation will succeed::
>>> db.test.find_one()
{'x': 1, '_id': ObjectId('...')}
>>> db.client.address
('localhost', 27018)
Bring the former primary back up. It will rejoin the set as a secondary.
Now we can move to the next section: distributing reads to secondaries.
.. _secondary-reads:
Secondary Reads
~~~~~~~~~~~~~~~
By default an instance of MongoClient sends queries to
the primary member of the replica set. To use secondaries for queries
we have to change the read preference::
>>> client = MongoClient(
... 'localhost:27017',
... replicaSet='foo',
... readPreference='secondaryPreferred')
>>> client.read_preference
SecondaryPreferred(tag_sets=None)
Now all queries will be sent to the secondary members of the set. If there are
no secondary members the primary will be used as a fallback. If you have
queries you would prefer to never send to the primary you can specify that
using the ``secondary`` read preference.
By default the read preference of a :class:`~pymongo.database.Database` is
inherited from its MongoClient, and the read preference of a
:class:`~pymongo.collection.Collection` is inherited from its Database. To use
a different read preference use the
:meth:`~pymongo.mongo_client.MongoClient.get_database` method, or the
:meth:`~pymongo.database.Database.get_collection` method::
>>> from pymongo import ReadPreference
>>> client.read_preference
SecondaryPreferred(tag_sets=None)
>>> db = client.get_database('test', read_preference=ReadPreference.SECONDARY)
>>> db.read_preference
Secondary(tag_sets=None)
>>> coll = db.get_collection('test', read_preference=ReadPreference.PRIMARY)
>>> coll.read_preference
Primary()
You can also change the read preference of an existing
:class:`~pymongo.collection.Collection` with the
:meth:`~pymongo.collection.Collection.with_options` method::
>>> coll2 = coll.with_options(read_preference=ReadPreference.NEAREST)
>>> coll.read_preference
Primary()
>>> coll2.read_preference
Nearest(tag_sets=None)
Note that since most database commands can only be sent to the primary of a
replica set, the :meth:`~pymongo.database.Database.command` method does not obey
the Database's :attr:`~pymongo.database.Database.read_preference`, but you can
pass an explicit read preference to the method::
>>> db.command('dbstats', read_preference=ReadPreference.NEAREST)
{...}
Reads are configured using three options: **read preference**, **tag sets**,
and **local threshold**.
**Read preference**:
Read preference is configured using one of the classes from
:mod:`~pymongo.read_preferences` (:class:`~pymongo.read_preferences.Primary`,
:class:`~pymongo.read_preferences.PrimaryPreferred`,
:class:`~pymongo.read_preferences.Secondary`,
:class:`~pymongo.read_preferences.SecondaryPreferred`, or
:class:`~pymongo.read_preferences.Nearest`). For convenience, we also provide
:class:`~pymongo.read_preferences.ReadPreference` with the following
attributes:
- ``PRIMARY``: Read from the primary. This is the default read preference,
and provides the strongest consistency. If no primary is available, raise
:class:`~pymongo.errors.AutoReconnect`.
- ``PRIMARY_PREFERRED``: Read from the primary if available, otherwise read
from a secondary.
- ``SECONDARY``: Read from a secondary. If no matching secondary is available,
raise :class:`~pymongo.errors.AutoReconnect`.
- ``SECONDARY_PREFERRED``: Read from a secondary if available, otherwise
from the primary.
- ``NEAREST``: Read from any available member.
**Tag sets**:
Replica-set members can be `tagged
<https://www.mongodb.com/docs/manual/data-center-awareness/>`_ according to any
criteria you choose. By default, PyMongo ignores tags when
choosing a member to read from, but your read preference can be configured with
a ``tag_sets`` parameter. ``tag_sets`` must be a list of dictionaries, each
dict providing tag values that the replica set member must match.
PyMongo tries each set of tags in turn until it finds a set of
tags with at least one matching member. For example, to prefer reads from the
New York data center, but fall back to the San Francisco data center, tag your
replica set members according to their location and create a
MongoClient like so::
>>> from pymongo.read_preferences import Secondary
>>> db = client.get_database(
... 'test', read_preference=Secondary([{'dc': 'ny'}, {'dc': 'sf'}]))
>>> db.read_preference
Secondary(tag_sets=[{'dc': 'ny'}, {'dc': 'sf'}])
MongoClient tries to find secondaries in New York, then San Francisco,
and raises :class:`~pymongo.errors.AutoReconnect` if none are available. As an
additional fallback, specify a final, empty tag set, ``{}``, which means "read
from any member that matches the mode, ignoring tags."
See :mod:`~pymongo.read_preferences` for more information.
.. _distributes reads to secondaries:
**Local threshold**:
If multiple members match the read preference and tag sets, PyMongo reads
from among the nearest members, chosen according to ping time. By default,
only members whose ping times are within 15 milliseconds of the nearest
are used for queries. You can choose to distribute reads among members with
higher latencies by setting ``localThresholdMS`` to a larger
number::
>>> client = pymongo.MongoClient(
... replicaSet='repl0',
... readPreference='secondaryPreferred',
... localThresholdMS=35)
In this case, PyMongo distributes reads among matching members within 35
milliseconds of the closest member's ping time.
.. note:: ``localThresholdMS`` is ignored when talking to a
replica set *through* a mongos. The equivalent is the localThreshold_ command
line option.
.. _localThreshold: https://mongodb.com/docs/manual/reference/program/mongos/#std-option-mongos.--localThreshold
.. _health-monitoring:
Health Monitoring
'''''''''''''''''
When MongoClient is initialized it launches background threads to
monitor the replica set for changes in:
* Health: detect when a member goes down or comes up, or if a different member
becomes primary
* Configuration: detect when members are added or removed, and detect changes
in members' tags
* Latency: track a moving average of each member's ping time
Replica-set monitoring ensures queries are continually routed to the proper
members as the state of the replica set changes.
.. _mongos-load-balancing:
mongos Load Balancing
---------------------
An instance of :class:`~pymongo.mongo_client.MongoClient` can be configured
with a list of addresses of mongos servers:
>>> client = MongoClient('mongodb://host1,host2,host3')
Each member of the list must be a single mongos server. Multihomed and round
robin DNS addresses are **not** supported. The client continuously
monitors all the mongoses' availability, and its network latency to each.
PyMongo distributes operations evenly among the set of mongoses within its
``localThresholdMS`` (similar to how it `distributes reads to secondaries`_
in a replica set). By default the threshold is 15 ms.
The lowest-latency server, and all servers with latencies no more than
``localThresholdMS`` beyond the lowest-latency server's, receive
operations equally. For example, if we have three mongoses:
- host1: 20 ms
- host2: 35 ms
- host3: 40 ms
By default the ``localThresholdMS`` is 15 ms, so PyMongo uses host1 and host2
evenly. It uses host1 because its network latency to the driver is shortest. It
uses host2 because its latency is within 15 ms of the lowest-latency server's.
But it excuses host3: host3 is 20ms beyond the lowest-latency server.
If we set ``localThresholdMS`` to 30 ms all servers are within the threshold:
>>> client = MongoClient('mongodb://host1,host2,host3/?localThresholdMS=30')
.. warning:: Do **not** connect PyMongo to a pool of mongos instances through a
load balancer. A single socket connection must always be routed to the same
mongos instance for proper cursor support.

View File

@ -1,40 +0,0 @@
Examples
========
The examples in this section are intended to give in depth overviews
of how to accomplish specific tasks with MongoDB and PyMongo.
Unless otherwise noted, all examples assume that a MongoDB instance is
running on the default host and port. Assuming you have `downloaded
and installed <https://www.mongodb.org/display/DOCS/Getting+Started>`_
MongoDB, you can start it like so:
.. code-block:: bash
$ mongod
.. toctree::
:maxdepth: 1
aggregation
authentication
collations
copydb
custom_type
bulk
client_bulk
datetimes
geo
gevent
gridfs
high_availability
logging
mod_wsgi
network_compression
server_selection
tailable
timeouts
tls
type_hints
encryption
uuid

View File

@ -1,63 +0,0 @@
Logging
========
Starting in 4.8, **PyMongo** supports `Python's native logging library <https://docs.python.org/3/howto/logging.html>`_,
enabling developers to customize the verbosity of log messages for their applications.
Components
-------------
There are currently three different **PyMongo** components with logging support: ``pymongo.command``, ``pymongo.connection``, and ``pymongo.serverSelection``.
These components deal with command operations, connection management, and server selection, respectively.
Each can be configured separately or they can all be configured together.
Configuration
-------------
Currently, the above components each support ``DEBUG`` logging. To enable a single component, do the following::
import logging
logging.getLogger('pymongo.<componentName>').setLevel(logging.DEBUG)
For example, to enable command logging::
import logging
logging.getLogger('pymongo.command').setLevel(logging.DEBUG)
You can also enable all ``DEBUG`` logs at once::
import logging
logging.getLogger('pymongo').setLevel(logging.DEBUG)
Truncation
-------------
When ``pymongo.command`` debug logs are enabled, every command sent to the server and every response sent back will be included as part of the logs.
By default, these command and response documents are truncated after 1000 bytes.
You can configure a higher truncation limit by setting the ``MONGOB_LOG_MAX_DOCUMENT_LENGTH`` environment variable to your desired length.
Note that by default, only sensitive authentication command contents are redacted.
All commands containing user data will be logged, including the actual contents of your queries.
To prevent this behavior, set ``MONGOB_LOG_MAX_DOCUMENT_LENGTH`` to 0. This will omit the command and response bodies from the logs.
Example
-------------
Here's a simple example that enables ``pymongo.command`` debug logs and performs two database operations::
import logging
import pymongo
# Automatically writes all logs to stdout
logging.basicConfig()
logging.getLogger('pymongo.command').setLevel(logging.DEBUG)
client = pymongo.MongoClient()
client.db.test.insert_one({"x": 1})
client.db.test.find_one({"x": 1})
---------------------------------
DEBUG:pymongo.command:{"clientId": {"$oid": "65cbe82614be1fc2beb4e4a9"}, "message": "Command started", "command": "{\"insert\": \"test\", \"ordered\": true, \"lsid\": {\"id\": {\"$binary\": {\"base64\": \"GI7ubVhPSsWd7+OwHEFx6Q==\", \"subType\": \"04\"}}}, \"$db\": \"db\", \"documents\": [{\"x\": 1, \"_id\": {\"$oid\": \"65cbe82614be1fc2beb4e4aa\"}}]}", "commandName": "insert", "databaseName": "db", "requestId": 1144108930, "operationId": 1144108930, "driverConnectionId": 1, "serverConnectionId": 3554, "serverHost": "localhost", "serverPort": 27017}
DEBUG:pymongo.command:{"clientId": {"$oid": "65cbe82614be1fc2beb4e4a9"}, "message": "Command succeeded", "durationMS": 0.515, "reply": "{\"n\": 1, \"ok\": 1.0}", "commandName": "insert", "databaseName": "db", "requestId": 1144108930, "operationId": 1144108930, "driverConnectionId": 1, "serverConnectionId": 3554, "serverHost": "localhost", "serverPort": 27017}
DEBUG:pymongo.command:{"clientId": {"$oid": "65cbe82614be1fc2beb4e4a9"}, "message": "Command started", "command": "{\"find\": \"test\", \"filter\": {\"x\": 1}, \"limit\": 1, \"singleBatch\": true, \"lsid\": {\"id\": {\"$binary\": {\"base64\": \"GI7ubVhPSsWd7+OwHEFx6Q==\", \"subType\": \"04\"}}}, \"$db\": \"db\"}", "commandName": "find", "databaseName": "db", "requestId": 470211272, "operationId": 470211272, "driverConnectionId": 1, "serverConnectionId": 3554, "serverHost": "localhost", "serverPort": 27017}
DEBUG:pymongo.command:{"clientId": {"$oid": "65cbe82614be1fc2beb4e4a9"}, "message": "Command succeeded", "durationMS": 0.621, "reply": "{\"cursor\": {\"firstBatch\": [{\"_id\": {\"$oid\": \"65cbdf391a957ed280001417\"}, \"x\": 1}], \"ns\": \"db.test\"}, \"ok\": 1.0}", "commandName": "find", "databaseName": "db", "requestId": 470211272, "operationId": 470211272, "driverConnectionId": 1, "serverConnectionId": 3554, "serverHost": "localhost", "serverPort": 27017}

View File

@ -1,64 +0,0 @@
.. _pymongo-and-mod_wsgi:
PyMongo and mod_wsgi
====================
To run your application under `mod_wsgi <https://github.com/GrahamDumpleton/mod_wsgi/>`_,
follow these guidelines:
* Run ``mod_wsgi`` in daemon mode with the ``WSGIDaemonProcess`` directive.
* Assign each application to a separate daemon with ``WSGIProcessGroup``.
* Use ``WSGIApplicationGroup %{GLOBAL}`` to ensure your application is running
in the daemon's main Python interpreter, not a sub interpreter.
For example, this ``mod_wsgi`` configuration ensures an application runs in the
main interpreter::
<VirtualHost *>
WSGIDaemonProcess my_process
WSGIScriptAlias /my_app /path/to/app.wsgi
WSGIProcessGroup my_process
WSGIApplicationGroup %{GLOBAL}
</VirtualHost>
If you have multiple applications that use PyMongo, put each in a separate
daemon, still in the global application group::
<VirtualHost *>
WSGIDaemonProcess my_process
WSGIScriptAlias /my_app /path/to/app.wsgi
<Location /my_app>
WSGIProcessGroup my_process
</Location>
WSGIDaemonProcess my_other_process
WSGIScriptAlias /my_other_app /path/to/other_app.wsgi
<Location /my_other_app>
WSGIProcessGroup my_other_process
</Location>
WSGIApplicationGroup %{GLOBAL}
</VirtualHost>
Background: ``mod_wsgi`` can run in "embedded" mode when only WSGIScriptAlias
is set, or "daemon" mode with WSGIDaemonProcess. In daemon mode, ``mod_wsgi``
can run your application in the Python main interpreter, or in sub interpreters.
The correct way to run a PyMongo application is in daemon mode, using the main
interpreter.
Python C extensions in general have issues running in multiple
Python sub interpreters. These difficulties are explained in the documentation for
`Py_NewInterpreter <https://docs.python.org/3/c-api/init.html#c.Py_NewInterpreter>`_
and in the `Multiple Python Sub Interpreters
<https://modwsgi.readthedocs.io/en/master/user-guides/application-issues.html#multiple-python-sub-interpreters>`_
section of the ``mod_wsgi`` documentation.
Beginning with PyMongo 2.7, the C extension for BSON detects when it is running
in a sub interpreter and activates a workaround, which adds a small cost to
BSON decoding. To avoid this cost, use ``WSGIApplicationGroup %{GLOBAL}`` to
ensure your application runs in the main interpreter.
Since your program runs in the main interpreter it should not share its
process with any other applications, lest they interfere with each other's
state. Each application should have its own daemon process, as shown in the
example above.

View File

@ -1,39 +0,0 @@
.. _network-compression-example:
Network Compression
===================
PyMongo supports network compression where network traffic between the client
and MongoDB server are compressed which reduces the amount of data passed
over the network. By default no compression is used.
The driver supports the following algorithms:
- `snappy <https://pypi.org/project/python-snappy>`_ available in MongoDB 3.4 and later.
- :mod:`zlib` available in MongoDB 3.6 and later.
- `zstandard <https://pypi.org/project/zstandard/>`_ available in MongoDB 4.2 and later.
.. note:: snappy and zstandard compression require additional dependencies. See :ref:`optional-deps`.
Applications can enable wire protocol compression via the ``compressors`` URI and
keyword argument to :meth:`~pymongo.mongo_client.MongoClient`. For example::
>>> client = MongoClient(compressors='zlib')
When multiple compression algorithms are given, the driver selects the first one in the
list supported by the MongoDB instance to which it is connected. For example::
>>> client = MongoClient(compressors='snappy,zstandard,zlib')
The ``compressors`` option can also be set via the URI::
>>> client = MongoClient('mongodb://example.com/?compressors=snappy,zstandard,zlib')
Additionally, zlib compression allows specifying a compression level with supported values from -1 to 9::
>>> client = MongoClient(compressors='zlib', zlibCompressionLevel=-1)
The ``zlibCompressionLevel`` is passed as the ``level`` argument to :func:`zlib.compress`.
.. seealso:: The MongoDB documentation on `network compression URI options <https://dochub.mongodb.org/core/compression-options>`_.

View File

@ -1,108 +0,0 @@
Server Selector Example
=======================
Users can exert fine-grained control over the `server selection algorithm`_
by setting the ``server_selector`` option on the :class:`~pymongo.MongoClient`
to an appropriate callable. This example shows how to use this functionality
to prefer servers running on ``localhost``.
.. warning::
Use of custom server selector functions is a power user feature. Misusing
custom server selectors can have unintended consequences such as degraded
read/write performance.
.. testsetup::
from pymongo import MongoClient
.. _server selection algorithm: https://mongodb.com/docs/manual/core/read-preference-mechanics/
Example: Selecting Servers Running on ``localhost``
---------------------------------------------------
To start, we need to write the server selector function that will be used.
The server selector function should accept a list of
:class:`~pymongo.server_description.ServerDescription` objects and return a
list of server descriptions that are suitable for the read or write operation.
A server selector must not create or modify
:class:`~pymongo.server_description.ServerDescription` objects, and must return
the selected instances unchanged.
In this example, we write a server selector that prioritizes servers running on
``localhost``. This can be desirable when using a sharded cluster with multiple
``mongos``, as locally run queries are likely to see lower latency and higher
throughput. Please note, however, that it is highly dependent on the
application if preferring ``localhost`` is beneficial or not.
In addition to comparing the hostname with ``localhost``, our server selector
function accounts for the edge case when no servers are running on
``localhost``. In this case, we allow the default server selection logic to
prevail by passing through the received server description list unchanged.
Failure to do this would render the client unable to communicate with MongoDB
in the event that no servers were running on ``localhost``.
The described server selection logic is implemented in the following server
selector function:
.. doctest::
>>> def server_selector(server_descriptions):
... servers = [
... server for server in server_descriptions if server.address[0] == "localhost"
... ]
... if not servers:
... return server_descriptions
... return servers
...
Finally, we can create a :class:`~pymongo.MongoClient` instance with this
server selector.
.. doctest::
>>> client = MongoClient(server_selector=server_selector)
Server Selection Process
------------------------
This section dives deeper into the server selection process for reads and
writes. In the case of a write, the driver performs the following operations
(in order) during the selection process:
#. Select all writeable servers from the list of known hosts. For a replica set
this is the primary, while for a sharded cluster this is all the known mongoses.
#. Apply the user-defined server selector function. Note that the custom server
selector is **not** called if there are no servers left from the previous
filtering stage.
#. Apply the ``localThresholdMS`` setting to the list of remaining hosts. This
whittles the host list down to only contain servers whose latency is at most
``localThresholdMS`` milliseconds higher than the lowest observed latency.
#. Select a server at random from the remaining host list. The desired
operation is then performed against the selected server.
In the case of **reads** the process is identical except for the first step.
Here, instead of selecting all writeable servers, we select all servers
matching the user's :class:`~pymongo.read_preferences.ReadPreference` from the
list of known hosts. As an example, for a 3-member replica set with a
:class:`~pymongo.read_preferences.Secondary` read preference, we would select
all available secondaries.
.. _server selection algorithm: https://mongodb.com/docs/manual/core/read-preference-mechanics/

View File

@ -1,42 +0,0 @@
Tailable Cursors
================
By default, MongoDB will automatically close a cursor when the client has
exhausted all results in the cursor. However, for `capped collections
<https://mongodb.com/docs/manual/core/capped-collections/>`_ you may
use a `tailable cursor
<https://mongodb.com/docs/manual/core/tailable-cursors/>`_
that remains open after the client exhausts the results in the initial cursor.
The following is a basic example of using a tailable cursor to tail the oplog
of a replica set member::
import time
import pymongo
client = pymongo.MongoClient()
oplog = client.local.oplog.rs
first = oplog.find().sort('$natural', pymongo.ASCENDING).limit(-1).next()
print(first)
ts = first['ts']
while True:
# For a regular capped collection CursorType.TAILABLE_AWAIT is the
# only option required to create a tailable cursor. When querying the
# oplog, the oplog_replay option enables an optimization to quickly
# find the 'ts' value we're looking for. The oplog_replay option
# can only be used when querying the oplog. Starting in MongoDB 4.4
# this option is ignored by the server as queries against the oplog
# are optimized automatically by the MongoDB query engine.
cursor = oplog.find({'ts': {'$gt': ts}},
cursor_type=pymongo.CursorType.TAILABLE_AWAIT,
oplog_replay=True)
while cursor.alive:
for doc in cursor:
ts = doc['ts']
print(doc)
# We end up here if the find() returned no documents or if the
# tailable cursor timed out (no new documents were added to the
# collection for more than 1 second).
time.sleep(1)

View File

@ -1,162 +0,0 @@
.. _timeout-example:
Client Side Operation Timeout
=============================
PyMongo 4.2 introduced :meth:`~pymongo.timeout` and the ``timeoutMS``
URI and keyword argument to :class:`~pymongo.mongo_client.MongoClient`.
These features allow applications to more easily limit the amount of time that
one or more operations can execute before control is returned to the app. This
timeout applies to all of the work done to execute the operation, including
but not limited to server selection, connection checkout, serialization, and
server-side execution.
Basic Usage
-----------
The following example uses :meth:`~pymongo.timeout` to configure a 10-second
timeout for an :meth:`~pymongo.collection.Collection.insert_one` operation::
import pymongo
with pymongo.timeout(10):
coll.insert_one({"name": "Nunu"})
The :meth:`~pymongo.timeout` applies to all pymongo operations within the block.
The following example ensures that both the ``insert`` and the ``find`` complete
within 10 seconds total, or raise a timeout error::
with pymongo.timeout(10):
coll.insert_one({"name": "Nunu"})
coll.find_one({"name": "Nunu"})
When nesting :func:`~pymongo.timeout`, the nested deadline is capped by the outer
deadline. The deadline can only be shortened, not extended.
When exiting the block, the previous deadline is restored::
with pymongo.timeout(5):
coll.find_one() # Uses the 5 second deadline.
with pymongo.timeout(3):
coll.find_one() # Uses the 3 second deadline.
coll.find_one() # Uses the original 5 second deadline.
with pymongo.timeout(10):
coll.find_one() # Still uses the original 5 second deadline.
coll.find_one() # Uses the original 5 second deadline.
Timeout errors
--------------
When the :meth:`~pymongo.timeout` with-statement is entered, a deadline is set
for the entire block. When that deadline is exceeded, any blocking pymongo operation
will raise a timeout exception. For example::
try:
with pymongo.timeout(10):
coll.insert_one({"name": "Nunu"})
time.sleep(10)
# The deadline has now expired, the next operation will raise
# a timeout exception.
coll.find_one({"name": "Nunu"})
except PyMongoError as exc:
if exc.timeout:
print(f"block timed out: {exc!r}")
else:
print(f"failed with non-timeout error: {exc!r}")
The :attr:`pymongo.errors.PyMongoError.timeout` property (added in PyMongo 4.2)
will be ``True`` when the error was caused by a timeout and ``False`` otherwise.
The timeoutMS URI option
------------------------
PyMongo 4.2 also added support for the ``timeoutMS`` URI and keyword argument to
:class:`~pymongo.mongo_client.MongoClient`. When this option is configured, the
client will automatically apply the timeout to each API call. For example::
client = MongoClient("mongodb://localhost/?timeoutMS=10000")
coll = client.test.test
coll.insert_one({"name": "Nunu"}) # Uses a 10-second timeout.
coll.find_one({"name": "Nunu"}) # Also uses a 10-second timeout.
The above is roughly equivalent to::
client = MongoClient()
coll = client.test.test
with pymongo.timeout(10):
coll.insert_one({"name": "Nunu"})
with pymongo.timeout(10):
coll.find_one({"name": "Nunu"})
pymongo.timeout overrides timeoutMS
-----------------------------------
:meth:`~pymongo.timeout` overrides ``timeoutMS``; within a
:meth:`~pymongo.timeout` block a client's ``timeoutMS`` option is ignored::
client = MongoClient("mongodb://localhost/?timeoutMS=10000")
coll = client.test.test
coll.insert_one({"name": "Nunu"}) # Uses the client's 10-second timeout.
# pymongo.timeout overrides the client's timeoutMS.
with pymongo.timeout(20):
coll.insert_one({"name": "Nunu"}) # Uses the 20-second timeout.
with pymongo.timeout(5):
coll.find_one({"name": "Nunu"}) # Uses the 5-second timeout.
pymongo.timeout is thread safe
------------------------------
:meth:`~pymongo.timeout` is thread safe; the timeout only applies to current
thread and multiple threads can configure different timeouts in parallel.
pymongo.timeout is asyncio safe
-------------------------------
:meth:`~pymongo.timeout` is asyncio safe; the timeout only applies to current
Task and multiple Tasks can configure different timeouts concurrently.
:meth:`~pymongo.timeout` can be used identically in
`Motor <https://github.com/mongodb/motor>`_, for example::
import motor.motor_asyncio
client = motor.motor_asyncio.AsyncIOMotorClient()
coll = client.test.test
with pymongo.timeout(10):
await coll.insert_one({"name": "Nunu"})
await coll.find_one({"name": "Nunu"})
Troubleshooting
---------------
There are many timeout errors that can be raised depending on when the timeout
expires. In code, these can be identified with the :attr:`pymongo.errors.PyMongoError.timeout`
property. Some specific timeout errors examples are described below.
When the client was unable to find an available server to run the operation
within the given timeout::
pymongo.errors.ServerSelectionTimeoutError: No servers found yet, Timeout: -0.00202266700216569s, Topology Description: <TopologyDescription id: 63698e87cebfd22ab1bd2ae0, topology_type: Unknown, servers: [<ServerDescription ('localhost', 27017) server_type: Unknown, rtt: None>]>
When either the client was unable to establish a connection within the given
timeout or the operation was sent but the server was not able to respond in time::
pymongo.errors.NetworkTimeout: localhost:27017: timed out
When the server cancelled the operation because it exceeded the given timeout.
Note that the operation may have partially completed on the server (depending
on the operation)::
pymongo.errors.ExecutionTimeout: operation exceeded time limit, full error: {'ok': 0.0, 'errmsg': 'operation exceeded time limit', 'code': 50, 'codeName': 'MaxTimeMSExpired'}
When the client cancelled the operation because it was not possible to complete
within the given timeout::
pymongo.errors.ExecutionTimeout: operation would exceed time limit, remaining timeout:0.00196 <= network round trip time:0.00427
When the client attempted a write operation but the server could not replicate
that write (according to the configured write concern) within the given timeout::
pymongo.errors.WTimeoutError: operation exceeded time limit, full error: {'code': 50, 'codeName': 'MaxTimeMSExpired', 'errmsg': 'operation exceeded time limit', 'errInfo': {'writeConcern': {'w': 1, 'wtimeout': 0}}}
The same error as above but for :meth:`~pymongo.collection.Collection.insert_many`
or :meth:`~pymongo.collection.Collection.bulk_write`::
pymongo.errors.BulkWriteError: batch op errors occurred, full error: {'writeErrors': [], 'writeConcernErrors': [{'code': 50, 'codeName': 'MaxTimeMSExpired', 'errmsg': 'operation exceeded time limit', 'errInfo': {'writeConcern': {'w': 1, 'wtimeout': 0}}}], 'nInserted': 2, 'nUpserted': 0, 'nMatched': 0, 'nModified': 0, 'nRemoved': 0, 'upserted': []}

View File

@ -1,234 +0,0 @@
TLS/SSL and PyMongo
===================
PyMongo supports connecting to MongoDB over TLS/SSL. This guide covers the
configuration options supported by PyMongo. See `the server documentation
<https://mongodb.com/docs/manual/tutorial/configure-ssl/>`_ to configure
MongoDB.
.. warning:: Industry best practices recommend, and some regulations require,
the use of TLS 1.1 or newer. Though no application changes are required for
PyMongo to make use of the newest protocols, some operating systems or
versions may not provide an OpenSSL version new enough to support them.
Users of macOS older than 10.13 (High Sierra) will need to install Python
from `python.org`_, `homebrew`_, `macports`_, or another similar source.
Users of Linux or other non-macOS Unix can check their OpenSSL version like
this::
$ openssl version
If the version number is less than 1.0.1 support for TLS 1.1 or newer is not
available. Contact your operating system vendor for a solution or upgrade to
a newer distribution.
You can check your Python interpreter by installing the `requests`_ module
and executing the following command::
python -c "import requests; print(requests.get('https://www.howsmyssl.com/a/check', verify=False).json()['tls_version'])"
You should see "TLS 1.X" where X is >= 1.
You can read more about TLS versions and their security implications here:
`<https://cheatsheetseries.owasp.org/cheatsheets/Transport_Layer_Security_Cheat_Sheet.html#only-support-strong-protocols>`_
.. _python.org: https://www.python.org/downloads/
.. _homebrew: https://brew.sh/
.. _macports: https://www.macports.org/
.. _requests: https://pypi.python.org/pypi/requests
Basic configuration
...................
In many cases connecting to MongoDB over TLS/SSL requires nothing more than
passing ``tls=True`` as a keyword argument to
:class:`~pymongo.mongo_client.MongoClient`::
>>> client = pymongo.MongoClient('example.com', tls=True)
Or passing ``tls=true`` in the URI::
>>> client = pymongo.MongoClient('mongodb://example.com/?tls=true')
This configures PyMongo to connect to the server using TLS, verify the server's
certificate and verify that the host you are attempting to connect to is listed
by that certificate.
Certificate verification policy
...............................
By default, PyMongo is configured to require a certificate from the server when
TLS is enabled. This is configurable using the ``tlsAllowInvalidCertificates``
option. To disable this requirement pass ``tlsAllowInvalidCertificates=True``
as a keyword parameter::
>>> client = pymongo.MongoClient('example.com',
... tls=True,
... tlsAllowInvalidCertificates=True)
Or, in the URI::
>>> uri = 'mongodb://example.com/?tls=true&tlsAllowInvalidCertificates=true'
>>> client = pymongo.MongoClient(uri)
Specifying a CA file
....................
In some cases you may want to configure PyMongo to use a specific set of CA
certificates. This is most often the case when you are acting as your own
certificate authority rather than using server certificates signed by a well
known authority. The ``tlsCAFile`` option takes a path to a CA file. It can be
passed as a keyword argument::
>>> client = pymongo.MongoClient('example.com',
... tls=True,
... tlsCAFile='/path/to/ca.pem')
Or, in the URI::
>>> uri = 'mongodb://example.com/?tls=true&tlsCAFile=/path/to/ca.pem'
>>> client = pymongo.MongoClient(uri)
Specifying a certificate revocation list
........................................
The ``tlsCRLFile`` option takes a path to a CRL file. It can be passed
as a keyword argument::
>>> client = pymongo.MongoClient('example.com',
... tls=True,
... tlsCRLFile='/path/to/crl.pem')
Or, in the URI::
>>> uri = 'mongodb://example.com/?tls=true&tlsCRLFile=/path/to/crl.pem'
>>> client = pymongo.MongoClient(uri)
.. note:: Certificate revocation lists and :ref:`OCSP` cannot be used together.
Client certificates
...................
PyMongo can be configured to present a client certificate using the
``tlsCertificateKeyFile`` option::
>>> client = pymongo.MongoClient('example.com',
... tls=True,
... tlsCertificateKeyFile='/path/to/client.pem')
If the private key for the client certificate is stored in a separate file,
it should be concatenated with the certificate file. For example, to
concatenate a PEM-formatted certificate file ``cert.pem`` and a PEM-formatted
keyfile ``key.pem`` into a single file ``combined.pem``, on Unix systems,
users can run::
$ cat key.pem cert.pem > combined.pem
PyMongo can be configured with the concatenated certificate keyfile using the
``tlsCertificateKeyFile`` option::
>>> client = pymongo.MongoClient('example.com',
... tls=True,
... tlsCertificateKeyFile='/path/to/combined.pem')
If the private key contained in the certificate keyfile is encrypted, users
can provide a password or passphrase to decrypt the encrypted private keys
using the ``tlsCertificateKeyFilePassword`` option::
>>> client = pymongo.MongoClient('example.com',
... tls=True,
... tlsCertificateKeyFile='/path/to/combined.pem',
... tlsCertificateKeyFilePassword=<passphrase>)
These options can also be passed as part of the MongoDB URI.
.. _OCSP:
OCSP
....
Starting with PyMongo 3.11, if PyMongo was installed with the "ocsp" extra::
python -m pip install pymongo[ocsp]
certificate revocation checking is enabled by way of `OCSP (Online Certification
Status Protocol) <https://en.wikipedia.org/wiki/Online_Certificate_Status_Protocol>`_.
MongoDB 4.4+ `staples OCSP responses <https://en.wikipedia.org/wiki/OCSP_stapling>`_
to the TLS handshake which PyMongo will verify, failing the TLS handshake if
the stapled OCSP response is invalid or indicates that the peer certificate is
revoked.
When connecting to a server version older than 4.4, or when a 4.4+ version of
MongoDB does not staple an OCSP response, PyMongo will attempt to connect
directly to an OCSP endpoint if the peer certificate specified one. The TLS
handshake will only fail in this case if the response indicates that the
certificate is revoked. Invalid or malformed responses will be ignored,
favoring availability over maximum security.
.. _TLSErrors:
Troubleshooting TLS Errors
..........................
TLS errors often fall into three categories - certificate verification failure,
protocol version mismatch or certificate revocation checking failure. An error
message similar to the following means that OpenSSL was not able to verify the
server's certificate::
[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed
This often occurs because OpenSSL does not have access to the system's
root certificates or the certificates are out of date. Linux users should
ensure that they have the latest root certificate updates installed from
their Linux vendor. macOS users using Python 3.7 or newer downloaded
from python.org `may have to run a script included with python
<https://bugs.python.org/issue29065#msg283984>`_ to install
root certificates::
open "/Applications/Python <YOUR PYTHON VERSION>/Install Certificates.command"
Users of older PyPy portable versions may have to `set an environment
variable <https://github.com/squeaky-pl/portable-pypy/issues/15>`_ to tell
OpenSSL where to find root certificates. This is easily done using the `certifi
module <https://pypi.org/project/certifi/>`_ from pypi::
$ pypy -m pip install certifi
$ export SSL_CERT_FILE=$(pypy -c "import certifi; print(certifi.where())")
An error message similar to the following message means that the OpenSSL
version used by Python does not support a new enough TLS protocol to connect
to the server::
[SSL: TLSV1_ALERT_PROTOCOL_VERSION] tlsv1 alert protocol version
Industry best practices recommend, and some regulations require, that older
TLS protocols be disabled in some MongoDB deployments. Some deployments may
disable TLS 1.0, others may disable TLS 1.0 and TLS 1.1. See the warning
earlier in this document for troubleshooting steps and solutions.
An error message similar to the following message means that certificate
revocation checking failed::
[('SSL routines', 'tls_process_initial_server_flight', 'invalid status response')]
See :ref:`OCSP` for more details.
Python 3.10+ incompatibilities with TLS/SSL on MongoDB <= 4.0
.............................................................
Note that `changes made to the ssl module in Python 3.10+
<https://docs.python.org/3/whatsnew/3.10.html#ssl>`_ may cause incompatibilities
with MongoDB <= 4.0. The following are some example errors that may occur with this
combination::
SSL handshake failed: localhost:27017: [SSL: SSLV3_ALERT_HANDSHAKE_FAILURE] sslv3 alert handshake failure (_ssl.c:997)
SSL handshake failed: localhost:27017: EOF occurred in violation of protocol (_ssl.c:997)
The MongoDB server logs may show the following error::
2021-06-30T21:22:44.917+0100 E NETWORK [conn16] SSL: error:1408A0C1:SSL routines:ssl3_get_client_hello:no shared cipher
To resolve this issue, use Python <=3.10, upgrade to MongoDB 4.2+, or install
pymongo with the :ref:`OCSP` extra which relies on PyOpenSSL.

View File

@ -1,332 +0,0 @@
.. _type_hints-example:
Type Hints
==========
As of version 4.1, PyMongo ships with `type hints`_. With type hints, Python
type checkers can easily find bugs before they reveal themselves in your code.
If your IDE is configured to use type hints,
it can suggest more appropriate completions and highlight errors in your code.
Some examples include `PyCharm`_, `Sublime Text`_, and `Visual Studio Code`_.
You can also use the `mypy`_ tool from your command line or in Continuous Integration tests.
All of the public APIs in PyMongo are fully type hinted, and
several of them support generic parameters for the
type of document object returned when decoding BSON documents.
Due to `limitations in mypy`_, the default
values for generic document types are not yet provided (they will eventually be ``Dict[str, any]``).
For a larger set of examples that use types, see the PyMongo `test_typing module`_.
If you would like to opt out of using the provided types, add the following to
your `mypy config`_: ::
[mypy-pymongo]
follow_imports = False
Basic Usage
-----------
Note that a type for :class:`~pymongo.mongo_client.MongoClient` must be specified. Here we use the
default, unspecified document type:
.. doctest::
>>> from pymongo import MongoClient
>>> client: MongoClient = MongoClient()
>>> collection = client.test.test
>>> inserted = collection.insert_one({"x": 1, "tags": ["dog", "cat"]})
>>> retrieved = collection.find_one({"x": 1})
>>> assert isinstance(retrieved, dict)
For a more accurate typing for document type you can use:
.. doctest::
>>> from typing import Any, Dict
>>> from pymongo import MongoClient
>>> client: MongoClient[Dict[str, Any]] = MongoClient()
>>> collection = client.test.test
>>> inserted = collection.insert_one({"x": 1, "tags": ["dog", "cat"]})
>>> retrieved = collection.find_one({"x": 1})
>>> assert isinstance(retrieved, dict)
Typed Client
------------
:class:`~pymongo.mongo_client.MongoClient` is generic on the document type used to decode BSON documents.
You can specify a :class:`~bson.raw_bson.RawBSONDocument` document type:
.. doctest::
>>> from pymongo import MongoClient
>>> from bson.raw_bson import RawBSONDocument
>>> client = MongoClient(document_class=RawBSONDocument)
>>> collection = client.test.test
>>> inserted = collection.insert_one({"x": 1, "tags": ["dog", "cat"]})
>>> result = collection.find_one({"x": 1})
>>> assert isinstance(result, RawBSONDocument)
Subclasses of :py:class:`collections.abc.Mapping` can also be used, such as :class:`~bson.son.SON`:
.. doctest::
>>> from bson import SON
>>> from pymongo import MongoClient
>>> client = MongoClient(document_class=SON[str, int])
>>> collection = client.test.test
>>> inserted = collection.insert_one({"x": 1, "y": 2})
>>> result = collection.find_one({"x": 1})
>>> assert result is not None
>>> assert result["x"] == 1
Note that when using :class:`~bson.son.SON`, the key and value types must be given, e.g. ``SON[str, Any]``.
Typed Collection
----------------
You can use :py:class:`~typing.TypedDict` (Python 3.8+) when using a well-defined schema for the data in a
:class:`~pymongo.collection.Collection`. Note that all `schema validation`_ for inserts and updates is done on the server.
These methods automatically add an "_id" field.
.. doctest::
:pyversion: >= 3.8
>>> from typing import TypedDict
>>> from pymongo import MongoClient
>>> from pymongo.collection import Collection
>>> class Movie(TypedDict):
... name: str
... year: int
...
>>> client: MongoClient = MongoClient()
>>> collection: Collection[Movie] = client.test.test
>>> inserted = collection.insert_one(Movie(name="Jurassic Park", year=1993))
>>> result = collection.find_one({"name": "Jurassic Park"})
>>> assert result is not None
>>> assert result["year"] == 1993
>>> # This will raise a type-checking error, despite being present, because it is added by PyMongo.
>>> assert result["_id"] # type:ignore[typeddict-item]
This same typing scheme works for all of the insert methods (:meth:`~pymongo.collection.Collection.insert_one`,
:meth:`~pymongo.collection.Collection.insert_many`, and :meth:`~pymongo.collection.Collection.bulk_write`).
For ``bulk_write`` both :class:`~pymongo.operations.InsertOne` and :class:`~pymongo.operations.ReplaceOne` operators are generic.
.. doctest::
:pyversion: >= 3.8
>>> from typing import TypedDict
>>> from pymongo import MongoClient
>>> from pymongo.operations import InsertOne
>>> from pymongo.collection import Collection
>>> client: MongoClient = MongoClient()
>>> collection: Collection[Movie] = client.test.test
>>> inserted = collection.bulk_write([InsertOne(Movie(name="Jurassic Park", year=1993))])
>>> result = collection.find_one({"name": "Jurassic Park"})
>>> assert result is not None
>>> assert result["year"] == 1993
>>> # This will raise a type-checking error, despite being present, because it is added by PyMongo.
>>> assert result["_id"] # type:ignore[typeddict-item]
Modeling Document Types with TypedDict
--------------------------------------
You can use :py:class:`~typing.TypedDict` (Python 3.8+) to model structured data.
As noted above, PyMongo will automatically add an ``_id`` field if it is not present. This also applies to TypedDict.
There are three approaches to this:
1. Do not specify ``_id`` at all. It will be inserted automatically, and can be retrieved at run-time, but will yield a type-checking error unless explicitly ignored.
2. Specify ``_id`` explicitly. This will mean that every instance of your custom TypedDict class will have to pass a value for ``_id``.
3. Make use of :py:class:`~typing.NotRequired`. This has the flexibility of option 1, but with the ability to access the ``_id`` field without causing a type-checking error.
Note: to use :py:class:`~typing.TypedDict` and :py:class:`~typing.NotRequired` in earlier versions of Python (<3.8, <3.11), use the ``typing_extensions`` package.
.. doctest:: typed-dict-example
:pyversion: >= 3.11
>>> from typing import TypedDict, NotRequired
>>> from pymongo import MongoClient
>>> from pymongo.collection import Collection
>>> from bson import ObjectId
>>> class Movie(TypedDict):
... name: str
... year: int
...
>>> class ExplicitMovie(TypedDict):
... _id: ObjectId
... name: str
... year: int
...
>>> class NotRequiredMovie(TypedDict):
... _id: NotRequired[ObjectId]
... name: str
... year: int
...
>>> client: MongoClient = MongoClient()
>>> collection: Collection[Movie] = client.test.test
>>> inserted = collection.insert_one(Movie(name="Jurassic Park", year=1993))
>>> result = collection.find_one({"name": "Jurassic Park"})
>>> assert result is not None
>>> # This will yield a type-checking error, despite being present, because it is added by PyMongo.
>>> assert result["_id"] # type:ignore[typeddict-item]
>>> collection: Collection[ExplicitMovie] = client.test.test
>>> # Note that the _id keyword argument must be supplied
>>> inserted = collection.insert_one(
... ExplicitMovie(_id=ObjectId(), name="Jurassic Park", year=1993)
... )
>>> result = collection.find_one({"name": "Jurassic Park"})
>>> assert result is not None
>>> # This will not raise a type-checking error.
>>> assert result["_id"]
>>> collection: Collection[NotRequiredMovie] = client.test.test
>>> # Note the lack of _id, similar to the first example
>>> inserted = collection.insert_one(NotRequiredMovie(name="Jurassic Park", year=1993))
>>> result = collection.find_one({"name": "Jurassic Park"})
>>> assert result is not None
>>> # This will not raise a type-checking error, despite not being provided explicitly.
>>> assert result["_id"]
Typed Database
--------------
While less common, you could specify that the documents in an entire database
match a well-defined schema using :py:class:`~typing.TypedDict` (Python 3.8+).
.. doctest::
>>> from typing import TypedDict
>>> from pymongo import MongoClient
>>> from pymongo.database import Database
>>> class Movie(TypedDict):
... name: str
... year: int
...
>>> client: MongoClient = MongoClient()
>>> db: Database[Movie] = client.test
>>> collection = db.test
>>> inserted = collection.insert_one({"name": "Jurassic Park", "year": 1993})
>>> result = collection.find_one({"name": "Jurassic Park"})
>>> assert result is not None
>>> assert result["year"] == 1993
Typed Command
-------------
When using the :meth:`~pymongo.database.Database.command`, you can specify the document type by providing a custom :class:`~bson.codec_options.CodecOptions`:
.. doctest::
>>> from pymongo import MongoClient
>>> from bson.raw_bson import RawBSONDocument
>>> from bson import CodecOptions
>>> client: MongoClient = MongoClient()
>>> options = CodecOptions(RawBSONDocument)
>>> result = client.admin.command("ping", codec_options=options)
>>> assert isinstance(result, RawBSONDocument)
Custom :py:class:`collections.abc.Mapping` subclasses and :py:class:`~typing.TypedDict` (Python 3.8+) are also supported.
For :py:class:`~typing.TypedDict`, use the form: ``options: CodecOptions[MyTypedDict] = CodecOptions(...)``.
Typed BSON Decoding
-------------------
You can specify the document type returned by :mod:`bson` decoding functions by providing :class:`~bson.codec_options.CodecOptions`:
.. doctest::
>>> from typing import Any, Dict
>>> from bson import CodecOptions, encode, decode
>>> class MyDict(Dict[str, Any]):
... def foo(self):
... return "bar"
...
>>> options = CodecOptions(document_class=MyDict)
>>> doc = {"x": 1, "y": 2}
>>> bsonbytes = encode(doc, codec_options=options)
>>> rt_document = decode(bsonbytes, codec_options=options)
>>> assert rt_document.foo() == "bar"
:class:`~bson.raw_bson.RawBSONDocument` and :py:class:`~typing.TypedDict` (Python 3.8+) are also supported.
For :py:class:`~typing.TypedDict`, use the form: ``options: CodecOptions[MyTypedDict] = CodecOptions(...)``.
Troubleshooting
---------------
Client Type Annotation
~~~~~~~~~~~~~~~~~~~~~~
If you forget to add a type annotation for a :class:`~pymongo.mongo_client.MongoClient` object you may get the following ``mypy`` error::
from pymongo import MongoClient
client = MongoClient() # error: Need type annotation for "client"
The solution is to annotate the type as ``client: MongoClient`` or ``client: MongoClient[Dict[str, Any]]``. See `Basic Usage`_.
Incompatible Types
~~~~~~~~~~~~~~~~~~
If you use the generic form of :class:`~pymongo.mongo_client.MongoClient` you
may encounter a ``mypy`` error like::
from pymongo import MongoClient
client: MongoClient = MongoClient()
client.test.test.insert_many(
{"a": 1}
) # error: Dict entry 0 has incompatible type "str": "int";
# expected "Mapping[str, Any]": "int"
The solution is to use ``client: MongoClient[Dict[str, Any]]`` as used in
`Basic Usage`_ .
Actual Type Errors
~~~~~~~~~~~~~~~~~~
Other times ``mypy`` will catch an actual error, like the following code::
from pymongo import MongoClient
from typing import Mapping
client: MongoClient = MongoClient()
client.test.test.insert_one(
[{}]
) # error: Argument 1 to "insert_one" of "Collection" has
# incompatible type "List[Dict[<nothing>, <nothing>]]";
# expected "Mapping[str, Any]"
In this case the solution is to use ``insert_one({})``, passing a document instead of a list.
Another example is trying to set a value on a :class:`~bson.raw_bson.RawBSONDocument`, which is read-only.::
from bson.raw_bson import RawBSONDocument
from pymongo import MongoClient
client = MongoClient(document_class=RawBSONDocument)
coll = client.test.test
doc = {"my": "doc"}
coll.insert_one(doc)
retrieved = coll.find_one({"_id": doc["_id"]})
assert retrieved is not None
assert len(retrieved.raw) > 0
retrieved[
"foo"
] = "bar" # error: Unsupported target for indexed assignment
# ("RawBSONDocument") [index]
.. _PyCharm: https://www.jetbrains.com/help/pycharm/type-hinting-in-product.html
.. _Visual Studio Code: https://code.visualstudio.com/docs/languages/python
.. _Sublime Text: https://github.com/sublimelsp/LSP-pyright
.. _type hints: https://docs.python.org/3/library/typing.html
.. _mypy: https://mypy.readthedocs.io/en/stable/cheat_sheet_py3.html
.. _limitations in mypy: https://github.com/python/mypy/issues/3737
.. _mypy config: https://mypy.readthedocs.io/en/stable/config_file.html
.. _test_typing module: https://github.com/mongodb/mongo-python-driver/blob/master/test/test_typing.py
.. _schema validation: https://www.mongodb.com/docs/manual/core/schema-validation/#when-to-use-schema-validation

View File

@ -1,512 +0,0 @@
.. _handling-uuid-data-example:
Handling UUID Data
==================
PyMongo ships with built-in support for dealing with UUID types.
It is straightforward to store native :class:`uuid.UUID` objects
to MongoDB and retrieve them as native :class:`uuid.UUID` objects::
from pymongo import MongoClient
from bson.binary import UuidRepresentation
from uuid import uuid4
# use the 'standard' representation for cross-language compatibility.
client = MongoClient(uuidRepresentation='standard')
collection = client.get_database('uuid_db').get_collection('uuid_coll')
# remove all documents from collection
collection.delete_many({})
# create a native uuid object
uuid_obj = uuid4()
# save the native uuid object to MongoDB
collection.insert_one({'uuid': uuid_obj})
# retrieve the stored uuid object from MongoDB
document = collection.find_one({})
# check that the retrieved UUID matches the inserted UUID
assert document['uuid'] == uuid_obj
Native :class:`uuid.UUID` objects can also be used as part of MongoDB
queries::
document = collection.find({'uuid': uuid_obj})
assert document['uuid'] == uuid_obj
The above examples illustrate the simplest of use-cases - one where the
UUID is generated by, and used in the same application. However,
the situation can be significantly more complex when dealing with a MongoDB
deployment that contains UUIDs created by other drivers as the Java and CSharp
drivers have historically encoded UUIDs using a byte-order that is different
from the one used by PyMongo. Applications that require interoperability across
these drivers must specify the appropriate
:class:`~bson.binary.UuidRepresentation`.
In the following sections, we describe how drivers have historically differed
in their encoding of UUIDs, and how applications can use the
:class:`~bson.binary.UuidRepresentation` configuration option to maintain
cross-language compatibility.
.. attention:: New applications that do not share a MongoDB deployment with
any other application and that have never stored UUIDs in MongoDB
should use the ``standard`` UUID representation for cross-language
compatibility. See :ref:`configuring-uuid-representation` for details
on how to configure the :class:`~bson.binary.UuidRepresentation`.
.. _example-legacy-uuid:
Legacy Handling of UUID Data
----------------------------
Historically, MongoDB Drivers have used different byte-ordering
while serializing UUID types to :class:`~bson.binary.Binary`.
Consider, for instance, a UUID with the following canonical textual
representation::
00112233-4455-6677-8899-aabbccddeeff
This UUID would historically be serialized by the Python driver as::
00112233-4455-6677-8899-aabbccddeeff
The same UUID would historically be serialized by the C# driver as::
33221100-5544-7766-8899-aabbccddeeff
Finally, the same UUID would historically be serialized by the Java driver as::
77665544-3322-1100-ffee-ddccbbaa9988
.. note:: For in-depth information about the the byte-order historically
used by different drivers, see the `Handling of Native UUID Types
Specification
<https://github.com/mongodb/specifications/blob/master/source/bson-binary-uuid/uuid.md>`_.
This difference in the byte-order of UUIDs encoded by different drivers can
result in highly unintuitive behavior in some scenarios. We detail two such
scenarios in the next sections.
Scenario 1: Applications Share a MongoDB Deployment
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Consider the following situation:
* Application ``C`` written in C# generates a UUID and uses it as the ``_id``
of a document that it proceeds to insert into the ``uuid_test`` collection of
the ``example_db`` database. Let's assume that the canonical textual
representation of the generated UUID is::
00112233-4455-6677-8899-aabbccddeeff
* Application ``P`` written in Python attempts to ``find`` the document
written by application ``C`` in the following manner::
from uuid import UUID
collection = client.example_db.uuid_test
result = collection.find_one({'_id': UUID('00112233-4455-6677-8899-aabbccddeeff')})
In this instance, ``result`` will never be the document that
was inserted by application ``C`` in the previous step. This is because of
the different byte-order used by the C# driver for representing UUIDs as
BSON Binary. The following query, on the other hand, will successfully find
this document::
result = collection.find_one({'_id': UUID('33221100-5544-7766-8899-aabbccddeeff')})
This example demonstrates how the differing byte-order used by different
drivers can hamper interoperability. To workaround this problem, users should
configure their ``MongoClient`` with the appropriate
:class:`~bson.binary.UuidRepresentation` (in this case, ``client`` in application
``P`` can be configured to use the
:data:`~bson.binary.UuidRepresentation.CSHARP_LEGACY` representation to
avoid the unintuitive behavior) as described in
:ref:`configuring-uuid-representation`.
Scenario 2: Round-Tripping UUIDs
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
In the following examples, we see how using a misconfigured
:class:`~bson.binary.UuidRepresentation` can cause an application
to inadvertently change the :class:`~bson.binary.Binary` subtype, and in some
cases, the bytes of the :class:`~bson.binary.Binary` field itself when
round-tripping documents containing UUIDs.
Consider the following situation::
from bson.codec_options import CodecOptions, DEFAULT_CODEC_OPTIONS
from bson.binary import Binary, UuidRepresentation
from uuid import uuid4
# Using UuidRepresentation.PYTHON_LEGACY stores a Binary subtype-3 UUID
python_opts = CodecOptions(uuid_representation=UuidRepresentation.PYTHON_LEGACY)
input_uuid = uuid4()
collection = client.testdb.get_collection('test', codec_options=python_opts)
collection.insert_one({'_id': 'foo', 'uuid': input_uuid})
assert collection.find_one({'uuid': Binary(input_uuid.bytes, 3)})['_id'] == 'foo'
# Retrieving this document using UuidRepresentation.STANDARD returns a Binary instance
std_opts = CodecOptions(uuid_representation=UuidRepresentation.STANDARD)
std_collection = client.testdb.get_collection('test', codec_options=std_opts)
doc = std_collection.find_one({'_id': 'foo'})
assert isinstance(doc['uuid'], Binary)
# Round-tripping the retrieved document yields the exact same document
std_collection.replace_one({'_id': 'foo'}, doc)
round_tripped_doc = collection.find_one({'uuid': Binary(input_uuid.bytes, 3)})
assert doc == round_tripped_doc
In this example, round-tripping the document using the incorrect
:class:`~bson.binary.UuidRepresentation` (``STANDARD`` instead of
``PYTHON_LEGACY``) changes the :class:`~bson.binary.Binary` subtype as a
side-effect. **Note that this can also happen when the situation is reversed -
i.e. when the original document is written using ``STANDARD`` representation
and then round-tripped using the ``PYTHON_LEGACY`` representation.**
In the next example, we see the consequences of incorrectly using a
representation that modifies byte-order (``CSHARP_LEGACY`` or ``JAVA_LEGACY``)
when round-tripping documents::
from bson.codec_options import CodecOptions, DEFAULT_CODEC_OPTIONS
from bson.binary import Binary, UuidRepresentation
from uuid import uuid4
# Using UuidRepresentation.STANDARD stores a Binary subtype-4 UUID
std_opts = CodecOptions(uuid_representation=UuidRepresentation.STANDARD)
input_uuid = uuid4()
collection = client.testdb.get_collection('test', codec_options=std_opts)
collection.insert_one({'_id': 'baz', 'uuid': input_uuid})
assert collection.find_one({'uuid': Binary(input_uuid.bytes, 4)})['_id'] == 'baz'
# Retrieving this document using UuidRepresentation.JAVA_LEGACY returns a native UUID
# without modifying the UUID byte-order
java_opts = CodecOptions(uuid_representation=UuidRepresentation.JAVA_LEGACY)
java_collection = client.testdb.get_collection('test', codec_options=java_opts)
doc = java_collection.find_one({'_id': 'baz'})
assert doc['uuid'] == input_uuid
# Round-tripping the retrieved document silently changes the Binary bytes and subtype
java_collection.replace_one({'_id': 'baz'}, doc)
assert collection.find_one({'uuid': Binary(input_uuid.bytes, 3)}) is None
assert collection.find_one({'uuid': Binary(input_uuid.bytes, 4)}) is None
round_tripped_doc = collection.find_one({'_id': 'baz'})
assert round_tripped_doc['uuid'] == Binary(input_uuid.bytes, 3).as_uuid(UuidRepresentation.JAVA_LEGACY)
In this case, using the incorrect :class:`~bson.binary.UuidRepresentation`
(``JAVA_LEGACY`` instead of ``STANDARD``) changes the
:class:`~bson.binary.Binary` bytes and subtype as a side-effect.
**Note that this happens when any representation that
manipulates byte-order (``CSHARP_LEGACY`` or ``JAVA_LEGACY``) is incorrectly
used to round-trip UUIDs written with ``STANDARD``. When the situation is
reversed - i.e. when the original document is written using ``CSHARP_LEGACY``
or ``JAVA_LEGACY`` and then round-tripped using ``STANDARD`` -
only the :class:`~bson.binary.Binary` subtype is changed.**
.. note:: Starting in PyMongo 4.0, these issue will be resolved as
the ``STANDARD`` representation will decode Binary subtype 3 fields as
:class:`~bson.binary.Binary` objects of subtype 3 (instead of
:class:`uuid.UUID`), and each of the ``LEGACY_*`` representations will
decode Binary subtype 4 fields to :class:`~bson.binary.Binary` objects of
subtype 4 (instead of :class:`uuid.UUID`).
.. _configuring-uuid-representation:
Configuring a UUID Representation
---------------------------------
Users can workaround the problems described above by configuring their
applications with the appropriate :class:`~bson.binary.UuidRepresentation`.
Configuring the representation modifies PyMongo's behavior while
encoding :class:`uuid.UUID` objects to BSON and decoding
Binary subtype 3 and 4 fields from BSON.
Applications can set the UUID representation in one of the following ways:
#. At the ``MongoClient`` level using the ``uuidRepresentation`` URI option,
e.g.::
client = MongoClient("mongodb://a:27107/?uuidRepresentation=standard")
Valid values are:
.. list-table::
:header-rows: 1
* - Value
- UUID Representation
* - ``unspecified``
- :ref:`unspecified-representation-details`
* - ``standard``
- :ref:`standard-representation-details`
* - ``pythonLegacy``
- :ref:`python-legacy-representation-details`
* - ``javaLegacy``
- :ref:`java-legacy-representation-details`
* - ``csharpLegacy``
- :ref:`csharp-legacy-representation-details`
#. At the ``MongoClient`` level using the ``uuidRepresentation`` kwarg
option, e.g.::
from bson.binary import UuidRepresentation
client = MongoClient(uuidRepresentation=UuidRepresentation.STANDARD)
#. At the ``Database`` or ``Collection`` level by supplying a suitable
:class:`~bson.codec_options.CodecOptions` instance, e.g.::
from bson.codec_options import CodecOptions
csharp_opts = CodecOptions(uuid_representation=UuidRepresentation.CSHARP_LEGACY)
java_opts = CodecOptions(uuid_representation=UuidRepresentation.JAVA_LEGACY)
# Get database/collection from client with csharpLegacy UUID representation
csharp_database = client.get_database('csharp_db', codec_options=csharp_opts)
csharp_collection = client.testdb.get_collection('csharp_coll', codec_options=csharp_opts)
# Get database/collection from existing database/collection with javaLegacy UUID representation
java_database = csharp_database.with_options(codec_options=java_opts)
java_collection = csharp_collection.with_options(codec_options=java_opts)
Supported UUID Representations
------------------------------
.. list-table::
:header-rows: 1
* - UUID Representation
- Default?
- Encode :class:`uuid.UUID` to
- Decode :class:`~bson.binary.Binary` subtype 4 to
- Decode :class:`~bson.binary.Binary` subtype 3 to
* - :ref:`standard-representation-details`
- No
- :class:`~bson.binary.Binary` subtype 4
- :class:`uuid.UUID`
- :class:`~bson.binary.Binary` subtype 3
* - :ref:`unspecified-representation-details`
- Yes, in PyMongo>=4
- Raise :exc:`ValueError`
- :class:`~bson.binary.Binary` subtype 4
- :class:`~bson.binary.Binary` subtype 3
* - :ref:`python-legacy-representation-details`
- No
- :class:`~bson.binary.Binary` subtype 3 with standard byte-order
- :class:`~bson.binary.Binary` subtype 4
- :class:`uuid.UUID`
* - :ref:`java-legacy-representation-details`
- No
- :class:`~bson.binary.Binary` subtype 3 with Java legacy byte-order
- :class:`~bson.binary.Binary` subtype 4
- :class:`uuid.UUID`
* - :ref:`csharp-legacy-representation-details`
- No
- :class:`~bson.binary.Binary` subtype 3 with C# legacy byte-order
- :class:`~bson.binary.Binary` subtype 4
- :class:`uuid.UUID`
We now detail the behavior and use-case for each supported UUID
representation.
.. _unspecified-representation-details:
``UNSPECIFIED``
^^^^^^^^^^^^^^^
.. attention:: Starting in PyMongo 4.0,
:data:`~bson.binary.UuidRepresentation.UNSPECIFIED` is the default
UUID representation used by PyMongo.
The :data:`~bson.binary.UuidRepresentation.UNSPECIFIED` representation
prevents the incorrect interpretation of UUID bytes by stopping short of
automatically converting UUID fields in BSON to native UUID types. Decoding
a UUID when using this representation returns a :class:`~bson.binary.Binary`
object instead. If required, users can coerce the decoded
:class:`~bson.binary.Binary` objects into native UUIDs using the
:meth:`~bson.binary.Binary.as_uuid` method and specifying the appropriate
representation format. The following example shows
what this might look like for a UUID stored by the C# driver::
from bson.codec_options import CodecOptions, DEFAULT_CODEC_OPTIONS
from bson.binary import Binary, UuidRepresentation
from uuid import uuid4
# Using UuidRepresentation.CSHARP_LEGACY
csharp_opts = CodecOptions(uuid_representation=UuidRepresentation.CSHARP_LEGACY)
# Store a legacy C#-formatted UUID
input_uuid = uuid4()
collection = client.testdb.get_collection('test', codec_options=csharp_opts)
collection.insert_one({'_id': 'foo', 'uuid': input_uuid})
# Using UuidRepresentation.UNSPECIFIED
unspec_opts = CodecOptions(uuid_representation=UuidRepresentation.UNSPECIFIED)
unspec_collection = client.testdb.get_collection('test', codec_options=unspec_opts)
# UUID fields are decoded as Binary when UuidRepresentation.UNSPECIFIED is configured
document = unspec_collection.find_one({'_id': 'foo'})
decoded_field = document['uuid']
assert isinstance(decoded_field, Binary)
# Binary.as_uuid() can be used to coerce the decoded value to a native UUID
decoded_uuid = decoded_field.as_uuid(UuidRepresentation.CSHARP_LEGACY)
assert decoded_uuid == input_uuid
Native :class:`uuid.UUID` objects cannot directly be encoded to
:class:`~bson.binary.Binary` when the UUID representation is ``UNSPECIFIED``
and attempting to do so will result in an exception::
unspec_collection.insert_one({'_id': 'bar', 'uuid': uuid4()})
Traceback (most recent call last):
...
ValueError: cannot encode native uuid.UUID with UuidRepresentation.UNSPECIFIED. UUIDs can be manually converted to bson.Binary instances using bson.Binary.from_uuid() or a different UuidRepresentation can be configured. See the documentation for UuidRepresentation for more information.
Instead, applications using :data:`~bson.binary.UuidRepresentation.UNSPECIFIED`
must explicitly coerce a native UUID using the
:meth:`~bson.binary.Binary.from_uuid` method::
explicit_binary = Binary.from_uuid(uuid4(), UuidRepresentation.STANDARD)
unspec_collection.insert_one({'_id': 'bar', 'uuid': explicit_binary})
.. _standard-representation-details:
``STANDARD``
^^^^^^^^^^^^
.. attention:: This UUID representation should be used by new applications or
applications that are encoding and/or decoding UUIDs in MongoDB for the
first time.
The :data:`~bson.binary.UuidRepresentation.STANDARD` representation
enables cross-language compatibility by ensuring the same byte-ordering
when encoding UUIDs from all drivers. UUIDs written by a driver with this
representation configured will be handled correctly by every other provided
it is also configured with the ``STANDARD`` representation.
``STANDARD`` encodes native :class:`uuid.UUID` objects to
:class:`~bson.binary.Binary` subtype 4 objects.
.. _python-legacy-representation-details:
``PYTHON_LEGACY``
^^^^^^^^^^^^^^^^^
.. attention:: This uuid representation should be used when reading UUIDs
generated by existing applications that use the Python driver
but **don't** explicitly set a UUID representation.
.. attention:: :data:`~bson.binary.UuidRepresentation.PYTHON_LEGACY`
was the default uuid representation in PyMongo 3.
The :data:`~bson.binary.UuidRepresentation.PYTHON_LEGACY` representation
corresponds to the legacy representation of UUIDs used by PyMongo. This
representation conforms with
`RFC 4122 Section 4.1.2 <https://tools.ietf.org/html/rfc4122#section-4.1.2>`_.
The following example illustrates the use of this representation::
from bson.codec_options import CodecOptions, DEFAULT_CODEC_OPTIONS
from bson.binary import Binary, UuidRepresentation
# No configured UUID representation
collection = client.python_legacy.get_collection('test', codec_options=DEFAULT_CODEC_OPTIONS)
# Using UuidRepresentation.PYTHON_LEGACY
pylegacy_opts = CodecOptions(uuid_representation=UuidRepresentation.PYTHON_LEGACY)
pylegacy_collection = client.python_legacy.get_collection('test', codec_options=pylegacy_opts)
# UUIDs written by PyMongo 3 with no UuidRepresentation configured
# (or PyMongo 4.0 with PYTHON_LEGACY) can be queried using PYTHON_LEGACY
uuid_1 = uuid4()
pylegacy_collection.insert_one({'uuid': uuid_1})
document = pylegacy_collection.find_one({'uuid': uuid_1})
``PYTHON_LEGACY`` encodes native :class:`uuid.UUID` objects to
:class:`~bson.binary.Binary` subtype 3 objects, preserving the same
byte-order as :attr:`~uuid.UUID.bytes`::
from bson.binary import Binary
document = collection.find_one({'uuid': Binary(uuid_2.bytes, subtype=3)})
assert document['uuid'] == uuid_2
.. _java-legacy-representation-details:
``JAVA_LEGACY``
^^^^^^^^^^^^^^^
.. attention:: This UUID representation should be used when reading UUIDs
written to MongoDB by the legacy applications (i.e. applications that don't
use the ``STANDARD`` representation) using the Java driver.
The :data:`~bson.binary.UuidRepresentation.JAVA_LEGACY` representation
corresponds to the legacy representation of UUIDs used by the MongoDB Java
Driver.
.. note:: The ``JAVA_LEGACY`` representation reverses the order of bytes 0-7,
and bytes 8-15.
As an example, consider the same UUID described in :ref:`example-legacy-uuid`.
Let us assume that an application used the Java driver without an explicitly
specified UUID representation to insert the example UUID
``00112233-4455-6677-8899-aabbccddeeff`` into MongoDB. If we try to read this
value using ``PYTHON_LEGACY``, we end up with an entirely different UUID::
UUID('77665544-3322-1100-ffee-ddccbbaa9988')
However, if we explicitly set the representation to
:data:`~bson.binary.UuidRepresentation.JAVA_LEGACY`, we get the correct result::
UUID('00112233-4455-6677-8899-aabbccddeeff')
PyMongo uses the specified UUID representation to reorder the BSON bytes and
load them correctly. ``JAVA_LEGACY`` encodes native :class:`uuid.UUID` objects
to :class:`~bson.binary.Binary` subtype 3 objects, while performing the same
byte-reordering as the legacy Java driver's UUID to BSON encoder.
.. _csharp-legacy-representation-details:
``CSHARP_LEGACY``
^^^^^^^^^^^^^^^^^
.. attention:: This UUID representation should be used when reading UUIDs
written to MongoDB by the legacy applications (i.e. applications that don't
use the ``STANDARD`` representation) using the C# driver.
The :data:`~bson.binary.UuidRepresentation.CSHARP_LEGACY` representation
corresponds to the legacy representation of UUIDs used by the MongoDB Java
Driver.
.. note:: The ``CSHARP_LEGACY`` representation reverses the order of bytes 0-3,
bytes 4-5, and bytes 6-7.
As an example, consider the same UUID described in :ref:`example-legacy-uuid`.
Let us assume that an application used the C# driver without an explicitly
specified UUID representation to insert the example UUID
``00112233-4455-6677-8899-aabbccddeeff`` into MongoDB. If we try to read this
value using PYTHON_LEGACY, we end up with an entirely different UUID::
UUID('33221100-5544-7766-8899-aabbccddeeff')
However, if we explicitly set the representation to
:data:`~bson.binary.UuidRepresentation.CSHARP_LEGACY`, we get the correct result::
UUID('00112233-4455-6677-8899-aabbccddeeff')
PyMongo uses the specified UUID representation to reorder the BSON bytes and
load them correctly. ``CSHARP_LEGACY`` encodes native :class:`uuid.UUID`
objects to :class:`~bson.binary.Binary` subtype 3 objects, while performing
the same byte-reordering as the legacy C# driver's UUID to BSON encoder.

View File

@ -1,595 +0,0 @@
Frequently Asked Questions
==========================
Is PyMongo thread-safe?
-----------------------
PyMongo is thread-safe and provides built-in connection pooling
for threaded applications.
.. _pymongo-fork-safe:
Is PyMongo fork-safe?
---------------------
PyMongo is not fork-safe. Care must be taken when using instances of
:class:`~pymongo.mongo_client.MongoClient` with ``fork()``. Specifically,
instances of MongoClient must not be copied from a parent process to
a child process. Instead, the parent process and each child process must
create their own instances of MongoClient. Instances of MongoClient copied from
the parent process have a high probability of deadlock in the child process due
to the inherent incompatibilities between ``fork()``, threads, and locks
described :ref:`below <pymongo-fork-safe-details>`. PyMongo will attempt to
issue a warning if there is a chance of this deadlock occurring.
.. _pymongo-fork-safe-details:
MongoClient spawns multiple threads to run background tasks such as monitoring
connected servers. These threads share state that is protected by instances of
:class:`~threading.Lock`, which are themselves `not fork-safe`_. The
driver is therefore subject to the same limitations as any other multithreaded
code that uses :class:`~threading.Lock` (and mutexes in general). One of these
limitations is that the locks become useless after ``fork()``. During the fork,
all locks are copied over to the child process in the same state as they were
in the parent: if they were locked, the copied locks are also locked. The child
created by ``fork()`` only has one thread, so any locks that were taken out by
other threads in the parent will never be released in the child. The next time
the child process attempts to acquire one of these locks, deadlock occurs.
Starting in version 4.3, PyMongo utilizes :py:func:`os.register_at_fork` to
reset its locks and other shared state in the child process after a
:py:func:`os.fork` to reduce the frequency of deadlocks. However deadlocks
are still possible because libraries that PyMongo depends on, like `OpenSSL`_
and `getaddrinfo(3)`_ (on some platforms), are not fork() safe in a
multithreaded application. Linux also imposes the restriction that:
After a `fork()`_ in a multithreaded program, the child can
safely call only async-signal-safe functions (see
`signal-safety(7)`_) until such time as it calls `execve(2)`_.
PyMongo relies on functions that are *not* `async-signal-safe`_ and hence the
child process can experience deadlocks or crashes when attempting to call
a non `async-signal-safe`_ function. For examples of deadlocks or crashes
that could occur see `PYTHON-3406`_.
For a long but interesting read about the problems of Python locks in
multithreaded contexts with ``fork()``, see https://bugs.python.org/issue6721.
.. _not fork-safe: https://bugs.python.org/issue6721
.. _OpenSSL: https://github.com/openssl/openssl/issues/19066
.. _fork(): https://man7.org/linux/man-pages/man2/fork.2.html
.. _signal-safety(7): https://man7.org/linux/man-pages/man7/signal-safety.7.html
.. _async-signal-safe: https://man7.org/linux/man-pages/man7/signal-safety.7.html
.. _execve(2): https://man7.org/linux/man-pages/man2/execve.2.html
.. _getaddrinfo(3): https://man7.org/linux/man-pages/man3/gai_strerror.3.html
.. _PYTHON-3406: https://jira.mongodb.org/browse/PYTHON-3406
.. _connection-pooling:
Can PyMongo help me load the results of my query as a Pandas ``DataFrame``?
---------------------------------------------------------------------------
While PyMongo itself does not provide any APIs for working with
numerical or columnar data,
`PyMongoArrow <https://mongo-arrow.readthedocs.io/en/pymongoarrow-0.1.1/>`_
is a companion library to PyMongo that makes it easy to load MongoDB query result sets as
`Pandas DataFrames <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html>`_,
`NumPy ndarrays <https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html>`_, or
`Apache Arrow Tables <https://arrow.apache.org/docs/python/generated/pyarrow.Table.html>`_.
How does connection pooling work in PyMongo?
--------------------------------------------
Every :class:`~pymongo.mongo_client.MongoClient` instance has a built-in
connection pool per server in your MongoDB topology. These pools open sockets
on demand to support the number of concurrent MongoDB operations that your
multi-threaded application requires. There is no thread-affinity for sockets.
The size of each connection pool is capped at ``maxPoolSize``, which defaults
to 100. If there are ``maxPoolSize`` connections to a server and all are in
use, the next request to that server will wait until one of the connections
becomes available.
The client instance opens two additional sockets per server in your MongoDB
topology for monitoring the server's state.
For example, a client connected to a 3-node replica set opens 6 monitoring
sockets. It also opens as many sockets as needed to support a multi-threaded
application's concurrent operations on each server, up to ``maxPoolSize``. With
a ``maxPoolSize`` of 100, if the application only uses the primary (the
default), then only the primary connection pool grows and the total connections
is at most 106. If the application uses a
:class:`~pymongo.read_preferences.ReadPreference` to query the secondaries,
their pools also grow and the total connections can reach 306.
Additionally, the pools are rate limited such that each connection pool can
only create at most 2 connections in parallel at any time. The connection
creation covers covers all the work required to setup a new connection
including DNS, TCP, SSL/TLS, MongoDB handshake, and MongoDB authentication.
For example, if three threads concurrently attempt to check out a connection
from an empty pool, the first two threads will begin creating new connections
while the third thread will wait. The third thread stops waiting when either:
- one of the first two threads finishes creating a connection, or
- an existing connection is checked back into the pool.
Rate limiting concurrent connection creation reduces the likelihood of
connection storms and improves the driver's ability to reuse existing
connections.
It is possible to set the minimum number of concurrent connections to each
server with ``minPoolSize``, which defaults to 0. The connection pool will be
initialized with this number of sockets. If sockets are closed due to any
network errors, causing the total number of sockets (both in use and idle) to
drop below the minimum, more sockets are opened until the minimum is reached.
The maximum number of milliseconds that a connection can remain idle in the
pool before being removed and replaced can be set with ``maxIdleTimeMS``, which
defaults to ``None`` (no limit).
The default configuration for a :class:`~pymongo.mongo_client.MongoClient`
works for most applications::
client = MongoClient(host, port)
Create this client **once** for each process, and reuse it for all
operations. It is a common mistake to create a new client for each request,
which is very inefficient.
To support extremely high numbers of concurrent MongoDB operations within one
process, increase ``maxPoolSize``::
client = MongoClient(host, port, maxPoolSize=200)
... or make it unbounded::
client = MongoClient(host, port, maxPoolSize=None)
Once the pool reaches its maximum size, additional threads have to wait for
sockets to become available. PyMongo does not limit the number of threads
that can wait for sockets to become available and it is the application's
responsibility to limit the size of its thread pool to bound queuing during a
load spike. Threads are allowed to wait for any length of time unless
``waitQueueTimeoutMS`` is defined::
client = MongoClient(host, port, waitQueueTimeoutMS=100)
A thread that waits more than 100ms (in this example) for a socket raises
:exc:`~pymongo.errors.ConnectionFailure`. Use this option if it is more
important to bound the duration of operations during a load spike than it is to
complete every operation.
When :meth:`~pymongo.mongo_client.MongoClient.close` is called by any thread,
all idle sockets are closed, and all sockets that are in use will be closed as
they are returned to the pool.
Does PyMongo support Python 3?
------------------------------
PyMongo supports CPython 3.9+ and PyPy3.10+. See the :doc:`python3` for details.
Does PyMongo support asynchronous frameworks like Gevent, asyncio, Tornado, or Twisted?
---------------------------------------------------------------------------------------
As of PyMongo v4.13, PyMongo fully supports asyncio and `Tornado <https://www.tornadoweb.org/>`_. See `the official docs <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/reference/migration/>`_ for more details.
PyMongo also fully supports :doc:`Gevent <examples/gevent>`.
For `Twisted <https://twistedmatrix.com/>`_, see `TxMongo
<https://github.com/twisted/txmongo>`_. Its stated mission is to keep feature
parity with PyMongo.
.. _writes-and-ids:
Why does PyMongo add an _id field to all of my documents?
---------------------------------------------------------
When a document is inserted to MongoDB using
:meth:`~pymongo.collection.Collection.insert_one`,
:meth:`~pymongo.collection.Collection.insert_many`, or
:meth:`~pymongo.collection.Collection.bulk_write`, and that document does not
include an ``_id`` field, PyMongo automatically adds one for you, set to an
instance of :class:`~bson.objectid.ObjectId`. For example::
>>> my_doc = {'x': 1}
>>> collection.insert_one(my_doc)
InsertOneResult(ObjectId('560db337fba522189f171720'), acknowledged=True)
>>> my_doc
{'x': 1, '_id': ObjectId('560db337fba522189f171720')}
Users often discover this behavior when calling
:meth:`~pymongo.collection.Collection.insert_many` with a list of references
to a single document raises :exc:`~pymongo.errors.BulkWriteError`. Several
Python idioms lead to this pitfall::
>>> doc = {}
>>> collection.insert_many(doc for _ in range(10))
Traceback (most recent call last):
...
pymongo.errors.BulkWriteError: batch op errors occurred
>>> doc
{'_id': ObjectId('560f171cfba52279f0b0da0c')}
>>> docs = [{}]
>>> collection.insert_many(docs * 10)
Traceback (most recent call last):
...
pymongo.errors.BulkWriteError: batch op errors occurred
>>> docs
[{'_id': ObjectId('560f1933fba52279f0b0da0e')}]
PyMongo adds an ``_id`` field in this manner for a few reasons:
- All MongoDB documents are required to have an ``_id`` field.
- If PyMongo were to insert a document without an ``_id`` MongoDB would add one
itself, but it would not report the value back to PyMongo.
- Copying the document to insert before adding the ``_id`` field would be
prohibitively expensive for most high write volume applications.
If you don't want PyMongo to add an ``_id`` to your documents, insert only
documents that already have an ``_id`` field, added by your application.
Key order in subdocuments -- why does my query work in the shell but not PyMongo?
---------------------------------------------------------------------------------
..
Note: We should rework this section now that Python 3.6+ has ordered dict.
.. testsetup:: key-order
from bson.son import SON
from pymongo.mongo_client import MongoClient
collection = MongoClient().test.collection
collection.drop()
collection.insert_one({"_id": 1.0, "subdocument": SON([("b", 1.0), ("a", 1.0)])})
The key-value pairs in a BSON document can have any order (except that ``_id``
is always first). The mongo shell preserves key order when reading and writing
data. Observe that "b" comes before "a" when we create the document and when it
is displayed:
.. code-block:: javascript
> // mongo shell.
> db.collection.insertOne( { "_id" : 1, "subdocument" : { "b" : 1, "a" : 1 } } )
WriteResult({ "nInserted" : 1 })
> db.collection.findOne()
{ "_id" : 1, "subdocument" : { "b" : 1, "a" : 1 } }
PyMongo represents BSON documents as Python dicts by default, and the order
of keys in dicts is not defined. That is, a dict declared with the "a" key
first is the same, to Python, as one with "b" first:
>>> print({'a': 1.0, 'b': 1.0})
{'a': 1.0, 'b': 1.0}
>>> print({'b': 1.0, 'a': 1.0})
{'a': 1.0, 'b': 1.0}
Therefore, Python dicts are not guaranteed to show keys in the order they are
stored in BSON. Here, "a" is shown before "b":
>>> print(collection.find_one())
{'_id': 1.0, 'subdocument': {'a': 1.0, 'b': 1.0}}
To preserve order when reading BSON, use the :class:`~bson.son.SON` class,
which is a dict that remembers its key order. First, get a handle to the
collection, configured to use :class:`~bson.son.SON` instead of dict:
.. doctest:: key-order
:options: +NORMALIZE_WHITESPACE
>>> from bson import CodecOptions, SON
>>> opts = CodecOptions(document_class=SON)
>>> opts
CodecOptions(document_class=...SON..., tz_aware=False, uuid_representation=UuidRepresentation.UNSPECIFIED, unicode_decode_error_handler='strict', tzinfo=None, type_registry=TypeRegistry(type_codecs=[], fallback_encoder=None), datetime_conversion=DatetimeConversion.DATETIME)
>>> collection_son = collection.with_options(codec_options=opts)
Now, documents and subdocuments in query results are represented with
:class:`~bson.son.SON` objects:
.. doctest:: key-order
>>> print(collection_son.find_one())
SON([('_id', 1.0), ('subdocument', SON([('b', 1.0), ('a', 1.0)]))])
The subdocument's actual storage layout is now visible: "b" is before "a".
Because a dict's key order is not defined, you cannot predict how it will be
serialized **to** BSON. But MongoDB considers subdocuments equal only if their
keys have the same order. So if you use a dict to query on a subdocument it may
not match:
>>> collection.find_one({'subdocument': {'a': 1.0, 'b': 1.0}}) is None
True
Swapping the key order in your query makes no difference:
>>> collection.find_one({'subdocument': {'b': 1.0, 'a': 1.0}}) is None
True
... because, as we saw above, Python considers the two dicts the same.
There are two solutions. First, you can match the subdocument field-by-field:
>>> collection.find_one({'subdocument.a': 1.0,
... 'subdocument.b': 1.0})
{'_id': 1.0, 'subdocument': {'a': 1.0, 'b': 1.0}}
The query matches any subdocument with an "a" of 1.0 and a "b" of 1.0,
regardless of the order you specify them in Python or the order they are stored
in BSON. Additionally, this query now matches subdocuments with additional
keys besides "a" and "b", whereas the previous query required an exact match.
The second solution is to use a :class:`~bson.son.SON` to specify the key order:
>>> query = {'subdocument': SON([('b', 1.0), ('a', 1.0)])}
>>> collection.find_one(query)
{'_id': 1.0, 'subdocument': {'a': 1.0, 'b': 1.0}}
The key order you use when you create a :class:`~bson.son.SON` is preserved
when it is serialized to BSON and used as a query. Thus you can create a
subdocument that exactly matches the subdocument in the collection.
.. seealso:: `MongoDB Manual entry on subdocument matching
<https://mongodb.com/docs/manual/tutorial/query-embedded-documents/>`_.
What does *CursorNotFound* cursor id not valid at server mean?
--------------------------------------------------------------
Cursors in MongoDB can timeout on the server if they've been open for
a long time without any operations being performed on them. This can
lead to an :class:`~pymongo.errors.CursorNotFound` exception being
raised when attempting to iterate the cursor.
How do I change the timeout value for cursors?
----------------------------------------------
MongoDB doesn't support custom timeouts for cursors, but cursor
timeouts can be turned off entirely. Pass ``no_cursor_timeout=True`` to
:meth:`~pymongo.collection.Collection.find`.
How can I store :mod:`decimal.Decimal` instances?
-------------------------------------------------
PyMongo >= 3.4 supports the Decimal128 BSON type introduced in MongoDB 3.4.
See :mod:`~bson.decimal128` for more information.
MongoDB <= 3.2 only supports IEEE 754 floating points - the same as the
Python float type. The only way PyMongo could store Decimal instances to
these versions of MongoDB would be to convert them to this standard, so
you'd really only be storing floats anyway - we force users to do this
conversion explicitly so that they are aware that it is happening.
I'm saving ``9.99`` but when I query my document contains ``9.9900000000000002`` - what's going on here?
--------------------------------------------------------------------------------------------------------
The database representation is ``9.99`` as an IEEE floating point (which
is common to MongoDB and Python as well as most other modern
languages). The problem is that ``9.99`` cannot be represented exactly
with a double precision floating point - this is true in some versions of
Python as well:
>>> 9.99
9.9900000000000002
The result that you get when you save ``9.99`` with PyMongo is exactly the
same as the result you'd get saving it with the JavaScript shell or
any of the other languages (and as the data you're working with when
you type ``9.99`` into a Python program).
Can you add attribute style access for documents?
-------------------------------------------------
This request has come up a number of times but we've decided not to
implement anything like this. The relevant `jira case
<https://jira.mongodb.org/browse/PYTHON-35>`_ has some information
about the decision, but here is a brief summary:
1. This will pollute the attribute namespace for documents, so could
lead to subtle bugs / confusing errors when using a key with the
same name as a dictionary method.
2. The only reason we even use SON objects instead of regular
dictionaries is to maintain key ordering, since the server
requires this for certain operations. So we're hesitant to
needlessly complicate SON (at some point it's hypothetically
possible we might want to revert back to using dictionaries alone,
without breaking backwards compatibility for everyone).
3. It's easy (and Pythonic) for new users to deal with documents,
since they behave just like dictionaries. If we start changing
their behavior it adds a barrier to entry for new users - another
class to learn.
What is the correct way to handle time zones with PyMongo?
----------------------------------------------------------
See :doc:`examples/datetimes` for examples on how to handle
:class:`~datetime.datetime` objects correctly.
How can I save a :mod:`datetime.date` instance?
-----------------------------------------------
PyMongo doesn't support saving :mod:`datetime.date` instances, since
there is no BSON type for dates without times. Rather than having the
driver enforce a convention for converting :mod:`datetime.date`
instances to :mod:`datetime.datetime` instances for you, any
conversion should be performed in your client code.
.. _web-application-querying-by-objectid:
When I query for a document by ObjectId in my web application I get no result
-----------------------------------------------------------------------------
It's common in web applications to encode documents' ObjectIds in URLs, like::
"/posts/50b3bda58a02fb9a84d8991e"
Your web framework will pass the ObjectId portion of the URL to your request
handler as a string, so it must be converted to :class:`~bson.objectid.ObjectId`
before it is passed to :meth:`~pymongo.collection.Collection.find_one`. It is a
common mistake to forget to do this conversion. Here's how to do it correctly
in Flask_ (other web frameworks are similar)::
from pymongo import MongoClient
from bson.objectid import ObjectId
from flask import Flask, render_template
client = MongoClient()
app = Flask(__name__)
@app.route("/posts/<_id>")
def show_post(_id):
# NOTE!: converting _id from string to ObjectId before passing to find_one
post = client.db.posts.find_one({'_id': ObjectId(_id)})
return render_template('post.html', post=post)
if __name__ == "__main__":
app.run()
.. _Flask: http://flask.pocoo.org/
.. seealso:: :ref:`querying-by-objectid`
How can I use PyMongo from Django?
----------------------------------
`Django <https://www.djangoproject.com/>`_ is a popular Python web
framework. Django includes an ORM, :mod:`django.db`. Currently,
there's no official MongoDB backend for Django.
`django-mongodb-engine <https://django-mongodb-engine.readthedocs.io/>`_
is an unofficial MongoDB backend that supports Django aggregations, (atomic)
updates, embedded objects, Map/Reduce and GridFS. It allows you to use most
of Django's built-in features, including the ORM, admin, authentication, site
and session frameworks and caching.
However, it's easy to use MongoDB (and PyMongo) from Django
without using a Django backend. Certain features of Django that require
:mod:`django.db` (admin, authentication and sessions) will not work
using just MongoDB, but most of what Django provides can still be
used.
One project which should make working with MongoDB and Django easier
is `mango <https://github.com/vpulim/mango>`_. Mango is a set of
MongoDB backends for Django sessions and authentication (bypassing
:mod:`django.db` entirely).
.. _using-with-mod-wsgi:
Does PyMongo work with **mod_wsgi**?
------------------------------------
Yes. See the configuration guide for :ref:`pymongo-and-mod_wsgi`.
Does PyMongo work with PythonAnywhere?
--------------------------------------
No. PyMongo creates Python threads which
`PythonAnywhere <https://www.pythonanywhere.com>`_ does not support. For more
information see `PYTHON-1495 <https://jira.mongodb.org/browse/PYTHON-1495>`_.
How can I use something like Python's ``json`` module to encode my documents to JSON?
-------------------------------------------------------------------------------------
:mod:`~bson.json_util` is PyMongo's built in, flexible tool for using
Python's :mod:`json` module with BSON documents and `MongoDB Extended JSON
<https://mongodb.com/docs/manual/reference/mongodb-extended-json/>`_. The
:mod:`json` module won't work out of the box with all documents from PyMongo
as PyMongo supports some special types (like :class:`~bson.objectid.ObjectId`
and :class:`~bson.dbref.DBRef`) that are not supported in JSON.
`python-bsonjs <https://pypi.python.org/pypi/python-bsonjs>`_ is a fast
BSON to MongoDB Extended JSON converter built on top of
`libbson <https://github.com/mongodb/libbson>`_. ``python-bsonjs`` does not
depend on PyMongo and can offer a nice performance improvement over
:mod:`~bson.json_util`. ``python-bsonjs`` works best with PyMongo when using
:class:`~bson.raw_bson.RawBSONDocument`.
Why do I get OverflowError decoding dates stored by another language's driver?
------------------------------------------------------------------------------
PyMongo decodes BSON datetime values to instances of Python's
:class:`datetime.datetime`. Instances of :class:`datetime.datetime` are
limited to years between :data:`datetime.MINYEAR` (usually 1) and
:data:`datetime.MAXYEAR` (usually 9999). Some MongoDB drivers (e.g. the PHP
driver) can store BSON datetimes with year values far outside those supported
by :class:`datetime.datetime`.
There are a few ways to work around this issue. Starting with PyMongo 4.3,
:func:`bson.decode` can decode BSON datetimes in one of four ways, and can
be specified using the ``datetime_conversion`` parameter of
:class:`~bson.codec_options.CodecOptions`.
The default option is
:attr:`~bson.codec_options.DatetimeConversion.DATETIME`, which will
attempt to decode as a :class:`datetime.datetime`, allowing
:class:`~builtin.OverflowError` to occur upon out-of-range dates.
:attr:`~bson.codec_options.DatetimeConversion.DATETIME_AUTO` alters
this behavior to instead return :class:`~bson.datetime_ms.DatetimeMS` when
representations are out-of-range, while returning :class:`~datetime.datetime`
objects as before:
.. doctest::
>>> from datetime import datetime
>>> from bson.datetime_ms import DatetimeMS
>>> from bson.codec_options import DatetimeConversion
>>> from pymongo import MongoClient
>>> client = MongoClient(datetime_conversion=DatetimeConversion.DATETIME_AUTO)
>>> client.db.collection.insert_one({"x": datetime(1970, 1, 1)})
InsertOneResult(ObjectId('...'), acknowledged=True)
>>> client.db.collection.insert_one({"x": DatetimeMS(2**62)})
InsertOneResult(ObjectId('...'), acknowledged=True)
>>> for x in client.db.collection.find():
... print(x)
...
{'_id': ObjectId('...'), 'x': datetime.datetime(1970, 1, 1, 0, 0)}
{'_id': ObjectId('...'), 'x': DatetimeMS(4611686018427387904)}
For other options, please refer to
:class:`~bson.codec_options.DatetimeConversion`.
Another option that does not involve setting ``datetime_conversion`` is to to
filter out documents values outside of the range supported by
:class:`~datetime.datetime`:
>>> from datetime import datetime
>>> coll = client.test.dates
>>> cur = coll.find({'dt': {'$gte': datetime.min, '$lte': datetime.max}})
Another option, assuming you don't need the datetime field, is to filter out
just that field::
>>> cur = coll.find({}, projection={'dt': False})
.. _multiprocessing:
Using PyMongo with Multiprocessing
----------------------------------
On Unix systems the multiprocessing module spawns processes using ``fork()``.
Care must be taken when using instances of
:class:`~pymongo.mongo_client.MongoClient` with ``fork()``. Specifically,
instances of MongoClient must not be copied from a parent process to a child
process. Instead, the parent process and each child process must create their
own instances of MongoClient. For example::
# Each process creates its own instance of MongoClient.
def func():
db = pymongo.MongoClient().mydb
# Do something with db.
proc = multiprocessing.Process(target=func)
proc.start()
**Never do this**::
client = pymongo.MongoClient()
# Each child process attempts to copy a global MongoClient
# created in the parent process. Never do this.
def func():
db = client.mydb
# Do something with db.
proc = multiprocessing.Process(target=func)
proc.start()
Instances of MongoClient copied from the parent process have a high probability
of deadlock in the child process due to
:ref:`inherent incompatibilities between fork(), threads, and locks
<pymongo-fork-safe-details>`. PyMongo will attempt to issue a warning if there
is a chance of this deadlock occurring.
.. seealso:: :ref:`pymongo-fork-safe`

View File

@ -3,70 +3,21 @@ PyMongo |release| Documentation
.. note:: The PyMongo documentation has been migrated to the
`MongoDB Documentation site <https://www.mongodb.com/docs/languages/python/pymongo-driver/current>`_.
As of PyMongo 4.10, the ReadTheDocs site will contain the detailed changelog and API docs, while the
rest of the documentation will only appear on the MongoDB Documentation site.
This site contains only the detailed changelog and API docs, while the
rest of the documentation appears on the MongoDB Documentation site.
Overview
--------
**PyMongo** is a Python distribution containing tools for working with
`MongoDB <https://www.mongodb.org>`_, and is the recommended way to
work with MongoDB from Python. This documentation attempts to explain
everything you need to know to use **PyMongo**.
.. todo:: a list of PyMongo's features
:doc:`installation`
Instructions on how to get the distribution.
:doc:`tutorial`
Start here for a quick overview.
:doc:`async-tutorial`
Start here for a quick overview of the asynchronous API.
:doc:`examples/index`
Examples of how to perform specific tasks.
:doc:`atlas`
Using PyMongo with MongoDB Atlas.
:doc:`examples/tls`
Using PyMongo with TLS / SSL.
:doc:`examples/encryption`
Using PyMongo with In-Use Encryption.
:doc:`examples/type_hints`
Using PyMongo with type hints.
:doc:`examples/logging`
Using PyMongo's logging capabilities.
:doc:`faq`
Some questions that come up often.
:doc:`migrate-to-pymongo4`
A PyMongo 3.x to 4.x migration guide.
:doc:`python3`
Frequently asked questions about python 3 support.
:doc:`compatibility-policy`
Explanation of deprecations, and how to keep pace with changes in PyMongo's
API.
work with MongoDB from Python.
:doc:`api/index`
The complete API documentation, organized by module.
:doc:`tools`
A listing of Python tools and libraries that have been written for
MongoDB.
:doc:`changelog`
A full list of changes to PyMongo.
:doc:`developer/index`
Developer guide for contributors to PyMongo.
:doc:`common-issues`
Common issues encountered when using PyMongo.
Getting Help
------------
@ -97,10 +48,6 @@ minor tweaks to this documentation. To contribute, fork the project on
`GitHub <https://github.com/mongodb/mongo-python-driver/>`_ and send a
pull request.
Changes
-------
See the :doc:`changelog` for a full list of changes to PyMongo.
About This Documentation
------------------------
This documentation is generated using the `Sphinx
@ -119,18 +66,6 @@ Indices and tables
.. toctree::
:hidden:
atlas
installation
tutorial
async-tutorial
examples/index
faq
compatibility-policy
api/index
tools
contributors
changelog
python3
migrate-to-pymongo4
developer/index
common-issues
contributors

View File

@ -1,197 +0,0 @@
Installing / Upgrading
======================
.. highlight:: bash
**PyMongo** is in the `Python Package Index
<https://pypi.python.org/pypi/pymongo/>`_.
.. warning:: **Do not install the "bson" package from pypi.** PyMongo comes
with its own bson package; doing "pip install bson"
installs a third-party package that is incompatible with PyMongo.
Installing with pip
-------------------
We recommend using `pip <https://pypi.python.org/pypi/pip>`_
to install pymongo on all platforms::
$ python3 -m pip install pymongo
To get a specific version of pymongo::
$ python3 -m pip install pymongo==3.5.1
To upgrade using pip::
$ python3 -m pip install --upgrade pymongo
Dependencies
------------
PyMongo supports CPython 3.9+ and PyPy3.10+.
Required dependencies
.....................
Support for mongodb+srv:// URIs requires `dnspython
<https://pypi.python.org/pypi/dnspython>`_
.. _optional-deps:
Optional dependencies
.....................
GSSAPI authentication requires `pykerberos
<https://pypi.python.org/pypi/pykerberos>`_ on Unix or `WinKerberos
<https://pypi.python.org/pypi/winkerberos>`_ on Windows. The correct
dependency can be installed automatically along with PyMongo::
$ python3 -m pip install "pymongo[gssapi]"
:ref:`MONGODB-AWS` authentication requires `pymongo-auth-aws
<https://pypi.org/project/pymongo-auth-aws/>`_::
$ python3 -m pip install "pymongo[aws]"
:ref:`OCSP` requires `PyOpenSSL
<https://pypi.org/project/pyOpenSSL/>`_, `requests
<https://pypi.org/project/requests/>`_ and `service_identity
<https://pypi.org/project/service_identity/>`_::
$ python3 -m pip install "pymongo[ocsp]"
Wire protocol compression with snappy requires `python-snappy
<https://pypi.org/project/python-snappy>`_::
$ python3 -m pip install "pymongo[snappy]"
Wire protocol compression with zstandard requires `zstandard
<https://pypi.org/project/zstandard>`_::
$ python3 -m pip install "pymongo[zstd]"
:ref:`Client-Side Field Level Encryption` requires `pymongocrypt
<https://pypi.org/project/pymongocrypt/>`_ and
`pymongo-auth-aws <https://pypi.org/project/pymongo-auth-aws/>`_::
$ python3 -m pip install "pymongo[encryption]"
You can install all dependencies automatically with the following
command::
$ python3 -m pip install "pymongo[gssapi,aws,ocsp,snappy,zstd,encryption]"
Installing from source
----------------------
If you'd rather install directly from the source (i.e. to stay on the
bleeding edge), install the C extension dependencies then check out the
latest source from GitHub and install the driver from the resulting tree::
$ git clone https://github.com/mongodb/mongo-python-driver.git pymongo
$ cd pymongo/
$ pip install .
Installing from source on Unix
..............................
To build the optional C extensions on Linux or another non-macOS Unix you must
have the GNU C compiler (gcc) installed. Depending on your flavor of Unix
(or Linux distribution) you may also need a python development package that
provides the necessary header files for your version of Python. The package
name may vary from distro to distro.
Debian and Ubuntu users should issue the following command::
$ sudo apt-get install build-essential python-dev
Users of Red Hat based distributions (RHEL, CentOS, Amazon Linux, Oracle Linux,
Fedora, etc.) should issue the following command::
$ sudo yum install gcc python-devel
Installing from source on macOS / OSX
.....................................
If you want to install PyMongo with C extensions from source you will need
the command line developer tools. On modern versions of macOS they can be
installed by running the following in Terminal (found in
/Applications/Utilities/)::
xcode-select --install
For older versions of OSX you may need Xcode. See the notes below for various
OSX and Xcode versions.
**Snow Leopard (10.6)** - Xcode 3 with 'UNIX Development Support'.
**Snow Leopard Xcode 4**: The Python versions shipped with OSX 10.6.x
are universal binaries. They support i386, PPC, and x86_64. Xcode 4 removed
support for PPC, causing the distutils version shipped with Apple's builds of
Python to fail to build the C extensions if you have Xcode 4 installed. There
is a workaround::
# For some Python builds from python.org
$ env ARCHFLAGS='-arch i386 -arch x86_64' python -m pip install pymongo
See `https://bugs.python.org/issue11623 <https://bugs.python.org/issue11623>`_
for a more detailed explanation.
**Lion (10.7) and newer** - PyMongo's C extensions can be built against
versions of Python 3.9+ downloaded from python.org. In all cases Xcode must be
installed with 'UNIX Development Support'.
**Xcode 5.1**: Starting with version 5.1 the version of clang that ships with
Xcode throws an error when it encounters compiler flags it doesn't recognize.
This may cause C extension builds to fail with an error similar to::
clang: error: unknown argument: '-mno-fused-madd' [-Wunused-command-line-argument-hard-error-in-future]
There are workarounds::
# Apple specified workaround for Xcode 5.1
$ ARCHFLAGS=-Wno-error=unused-command-line-argument-hard-error-in-future pip install pymongo
# Alternative workaround using CFLAGS
$ CFLAGS=-Qunused-arguments pip install pymongo
Installing from source on Windows
.................................
If you want to install PyMongo with C extensions from source the following
requirements apply to both CPython and ActiveState's ActivePython:
Windows
~~~~~~~
Install Visual Studio 2015+.
.. _install-no-c:
Installing Without C Extensions
-------------------------------
By default, the driver attempts to build and install optional C
extensions (used for increasing performance) when it is installed. If
any extension fails to build the driver will be installed anyway but a
warning will be printed.
If you wish to install PyMongo without the C extensions, even if the
extensions build properly, it can be done using a command line option to
*pip install*::
$ NO_EXT=1 python -m pip install .
Installing a beta or release candidate
--------------------------------------
MongoDB, Inc. may occasionally tag a beta or release candidate for testing by
the community before final release. These releases will not be uploaded to pypi
but can be found on the
`GitHub tags page <https://github.com/mongodb/mongo-python-driver/tags>`_.
They can be installed by passing the full URL for the tag to pip::
$ python3 -m pip install https://github.com/mongodb/mongo-python-driver/archive/4.4.0b0.tar.gz

View File

@ -1,3 +1,5 @@
:orphan:
.. _pymongo4-migration-guide:
PyMongo 4 Migration Guide
@ -34,7 +36,7 @@ Python 3.6+
PyMongo 4.0 drops support for Python 2.7, 3.4, and 3.5. Users who wish to
upgrade to 4.x must first upgrade to Python 3.6.2+. Users upgrading from
Python 2 should consult the :doc:`python3`.
Python 2 should consult `Python 3 <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/reference/upgrade/#upgrade-pymongo-versions>`_.
Enable Deprecation Warnings
---------------------------
@ -796,8 +798,7 @@ incoming documents after receiving them from PyMongo.
Alternatively, if your application uses the ``SONManipulator`` API to convert
custom types to BSON, the :class:`~bson.codec_options.TypeCodec` and
:class:`~bson.codec_options.TypeRegistry` APIs may be a suitable alternative.
For more information, see the
:doc:`custom type example <examples/custom_type>`.
For more information, see `Custom Types <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/custom-types/type-codecs/#encode-data-with-type-codecs>`_.
``SON().items()`` now returns ``dict_items`` object.
----------------------------------------------------
@ -982,7 +983,7 @@ you will receive an error like this when attempting to encode a :class:`uuid.UUI
ValueError: cannot encode native uuid.UUID with UuidRepresentation.UNSPECIFIED. UUIDs can be manually converted...
See :ref:`handling-uuid-data-example` for details.
See `Handling UUIDs <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/uuid/#universally-unique-ids--uuids->`_ for details.
Additional BSON classes implement ``__slots__``
...............................................

View File

@ -1,114 +0,0 @@
Python 3 FAQ
============
What Python 3 versions are supported?
-------------------------------------
PyMongo supports CPython 3.9+ and PyPy3.10+.
Are there any PyMongo behavior changes with Python 3?
-----------------------------------------------------
Only one intentional change. Instances of :class:`bytes`
are encoded as BSON type 5 (Binary data) with subtype 0.
In Python 3 they are decoded back to :class:`bytes`. In
Python 2 they are decoded to :class:`~bson.binary.Binary`
with subtype 0.
For example, let's insert a :class:`bytes` instance using Python 3 then
read it back. Notice the byte string is decoded back to :class:`bytes`::
Python 3.7.9 (v3.7.9:13c94747c7, Aug 15 2020, 01:31:08)
[Clang 6.0 (clang-600.0.57)] on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>> import pymongo
>>> c = pymongo.MongoClient()
>>> c.test.bintest.insert_one({'binary': b'this is a byte string'}).inserted_id
ObjectId('4f9086b1fba5222021000000')
>>> c.test.bintest.find_one()
{'binary': b'this is a byte string', '_id': ObjectId('4f9086b1fba5222021000000')}
Now retrieve the same document in Python 2. Notice the byte string is decoded
to :class:`~bson.binary.Binary`::
Python 2.7.6 (default, Feb 26 2014, 10:36:22)
[GCC 4.7.3] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> import pymongo
>>> c = pymongo.MongoClient()
>>> c.test.bintest.find_one()
{u'binary': Binary('this is a byte string', 0), u'_id': ObjectId('4f9086b1fba5222021000000')}
There is a similar change in behavior in parsing JSON binary with subtype 0.
In Python 3 they are decoded into :class:`bytes`. In Python 2 they are
decoded to :class:`~bson.binary.Binary` with subtype 0.
For example, let's decode a JSON binary subtype 0 using Python 3. Notice the
byte string is decoded to :class:`bytes`::
Python 3.7.9 (v3.7.9:13c94747c7, Aug 15 2020, 01:31:08)
[Clang 6.0 (clang-600.0.57)] on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>> from bson.json_util import loads
>>> loads('{"b": {"$binary": "dGhpcyBpcyBhIGJ5dGUgc3RyaW5n", "$type": "00"}}')
{'b': b'this is a byte string'}
Now decode the same JSON in Python 2 . Notice the byte string is decoded
to :class:`~bson.binary.Binary`::
Python 2.7.10 (default, Feb 7 2017, 00:08:15)
[GCC 4.2.1 Compatible Apple LLVM 8.0.0 (clang-800.0.34)] on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>> from bson.json_util import loads
>>> loads('{"b": {"$binary": "dGhpcyBpcyBhIGJ5dGUgc3RyaW5n", "$type": "00"}}')
{u'b': Binary('this is a byte string', 0)}
Why can't I share pickled ObjectIds between some versions of Python 2 and 3?
----------------------------------------------------------------------------
Instances of :class:`~bson.objectid.ObjectId` pickled using Python 2
can always be unpickled using Python 3.
If you pickled an ObjectId using Python 2 and want to unpickle it using
Python 3 you must pass ``encoding='latin-1'`` to pickle.loads::
Python 2.7.6 (default, Feb 26 2014, 10:36:22)
[GCC 4.7.3] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> import pickle
>>> from bson.objectid import ObjectId
>>> oid = ObjectId()
>>> oid
ObjectId('4f919ba2fba5225b84000000')
>>> pickle.dumps(oid)
'ccopy_reg\n_reconstructor\np0\n(cbson.objectid\...'
Python 3.7.9 (v3.7.9:13c94747c7, Aug 15 2020, 01:31:08)
[Clang 6.0 (clang-600.0.57)] on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>> import pickle
>>> pickle.loads(b'ccopy_reg\n_reconstructor\np0\n(cbson.objectid\...', encoding='latin-1')
ObjectId('4f919ba2fba5225b84000000')
If you need to pickle ObjectIds using Python 3 and unpickle them using Python 2
you must use ``protocol <= 2``::
Python 3.7.9 (v3.7.9:13c94747c7, Aug 15 2020, 01:31:08)
[Clang 6.0 (clang-600.0.57)] on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>> import pickle
>>> from bson.objectid import ObjectId
>>> oid = ObjectId()
>>> oid
ObjectId('4f96f20c430ee6bd06000000')
>>> pickle.dumps(oid, protocol=2)
b'\x80\x02cbson.objectid\nObjectId\nq\x00)\x81q\x01c_codecs\nencode\...'
Python 2.7.15 (default, Jun 21 2018, 15:00:48)
[GCC 7.3.0] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> import pickle
>>> pickle.loads('\x80\x02cbson.objectid\nObjectId\nq\x00)\x81q\x01c_codecs\nencode\...')
ObjectId('4f96f20c430ee6bd06000000')

View File

@ -1,173 +0,0 @@
Tools
=====
Many tools have been written for working with **PyMongo**. If you know
of or have created a tool for working with MongoDB from Python please
list it here.
.. note:: We try to keep this list current. As such, projects that
have not been updated recently or appear to be unmaintained will
occasionally be removed from the list or moved to the back (to keep
the list from becoming too intimidating).
If a project gets removed that is still being developed or is in active use
please let us know or add it back.
ORM-like Layers
---------------
Some people have found that they prefer to work with a layer that
has more features than PyMongo provides. Often, things like models and
validation are desired. To that end, several different ORM-like layers
have been written by various authors.
It is our recommendation that new users begin by working directly with
PyMongo, as described in the rest of this documentation. Many people
have found that the features of PyMongo are enough for their
needs. Even if you eventually come to the decision to use one of these
layers, the time spent working directly with the driver will have
increased your understanding of how MongoDB actually works.
MongoEngine
`MongoEngine <http://mongoengine.org/>`_ is another ORM-like
layer on top of PyMongo. It allows you to define schemas for
documents and query collections using syntax inspired by the Django
ORM. The code is available on `GitHub
<https://github.com/mongoengine/mongoengine>`_; for more information, see
the `tutorial <https://docs.mongoengine.org/tutorial.html>`_.
MincePy
`MincePy <https://mincepy.readthedocs.io/en/latest/>`_ is an
object-document mapper (ODM) designed to make any Python object storable
and queryable in a MongoDB database. It is designed with machine learning
and big-data computational and experimental science applications in mind
but is entirely general and can be useful to anyone looking to organise,
share, or process large amounts data with as little change to their current
workflow as possible.
Ming
`Ming <https://ming.readthedocs.io/en/latest/>`_ is a
library that allows you to enforce schemas on a MongoDB database in
your Python application. It was developed by `SourceForge
<https://sourceforge.net/>`_ in the course of their migration to
MongoDB.
MotorEngine
`MotorEngine <https://motorengine.readthedocs.io/>`_ is a port of
MongoEngine to Motor, for asynchronous access with Tornado.
It implements the same modeling APIs to be data-portable, meaning that a
model defined in MongoEngine can be read in MotorEngine. The source is
`available on GitHub <https://github.com/heynemann/motorengine>`_.
uMongo
`uMongo <https://umongo.readthedocs.io/>`_ is a Python MongoDB ODM.
Its inception comes from two needs: the lack of async ODM and the
difficulty to do document (un)serialization with existing ODMs.
Works with multiple drivers: PyMongo, TxMongo, motor_asyncio, and
mongomock. The source `is available on GitHub
<https://github.com/Scille/umongo>`_
Django MongoDB Backend
`Django MongoDB Backend <https://django-mongodb-backend.readthedocs.io>`_ is a
database backend library specifically made for Django. The integration takes
advantage of MongoDB's unique document model capabilities, which align
naturally with Django's philosophy of simplified data modeling and
reduced development complexity. The source is available
`on GitHub <https://github.com/mongodb-labs/django-mongodb-backend>`_.
No longer maintained
""""""""""""""""""""
PyMODM
`PyMODM <https://pypi.python.org/pypi/pymodm>`_ is an ORM-like framework on top
of PyMongo. PyMODM is maintained by engineers at MongoDB, Inc. and is quick
to adopt new MongoDB features. PyMODM is a "core" ODM, meaning that it
provides simple, extensible functionality that can be leveraged by other
libraries to target platforms like Django. At the same time, PyMODM is
powerful enough to be used for developing applications on its own. Complete
documentation is available on `readthedocs
<https://pymodm.readthedocs.io/en/stable/>`_.
MongoKit
The `MongoKit <https://github.com/namlook/mongokit>`_ framework
is an ORM-like layer on top of PyMongo. There is also a MongoKit
`google group <https://groups.google.com/group/mongokit>`_.
Minimongo
`minimongo <https://pypi.python.org/pypi/minimongo>`_ is a lightweight,
pythonic interface to MongoDB. It retains pymongo's query and update API,
and provides a number of additional features, including a simple
document-oriented interface, connection pooling, index management, and
collection & database naming helpers. The `source is on GitHub
<https://github.com/MiniMongo/minimongo>`_.
Manga
`Manga <https://pypi.python.org/pypi/manga>`_ aims to be a simpler ORM-like
layer on top of PyMongo. The syntax for defining schema is inspired by the
Django ORM, but Pymongo's query language is maintained. The source `is on
GitHub <https://github.com/wladston/manga>`_.
Humongolus
`Humongolus <https://github.com/entone/Humongolus>`_ is a lightweight ORM
framework for Python and MongoDB. The name comes from the combination of
MongoDB and `Homunculus <https://en.wikipedia.org/wiki/Homunculus>`_ (the
concept of a miniature though fully formed human body). Humongolus allows
you to create models/schemas with robust validation. It attempts to be as
pythonic as possible and exposes the pymongo cursor objects whenever
possible. The code is available for download
`at GitHub <https://github.com/entone/Humongolus>`_. Tutorials and usage
examples are also available at GitHub.
Framework Tools
---------------
This section lists tools and adapters that have been designed to work with
various Python frameworks and libraries.
* `Djongo <https://www.djongomapper.com/>`_ is a connector for using
Django with MongoDB as the database backend. Use the Django Admin GUI to add and
modify documents in MongoDB.
The `Djongo Source Code <https://github.com/doableware/djongo>`_ is hosted on GitHub
and the `Djongo package <https://pypi.python.org/pypi/djongo>`_ is on pypi.
* `Django MongoDB Engine
<https://django-mongodb-engine.readthedocs.io/en/latest/>`_ is a MongoDB
database backend for Django that completely integrates with its ORM.
For more information `see the tutorial
<https://django-mongodb-engine.readthedocs.io/en/latest/tutorial.html>`_.
* `mango <https://github.com/vpulim/mango>`_ provides MongoDB backends for
Django sessions and authentication (bypassing :mod:`django.db` entirely).
* `Django MongoEngine
<https://github.com/MongoEngine/django-mongoengine>`_ is a MongoDB backend for
Django, an `example:
<https://github.com/MongoEngine/django-mongoengine/tree/master/example/tumblelog>`_.
For more information see `<https://django-mongoengine.readthedocs.io/en/latest/>`_
* `mongodb_beaker <https://pypi.python.org/pypi/mongodb_beaker>`_ is a
project to enable using MongoDB as a backend for `beakers <https://beaker.readthedocs.io/en/latest/>`_ caching / session system.
`The source is on GitHub <https://github.com/bwmcadams/mongodb_beaker>`_.
* `Log4Mongo <https://github.com/log4mongo/log4mongo-python>`_ is a flexible
Python logging handler that can store logs in MongoDB using normal and capped
collections.
* `MongoLog <https://github.com/puentesarrin/mongodb-log/>`_ is a Python logging
handler that stores logs in MongoDB using a capped collection.
* `rod.recipe.mongodb <https://pypi.python.org/pypi/rod.recipe.mongodb/>`_ is a
ZC Buildout recipe for downloading and installing MongoDB.
* `mongobox <https://github.com/theorm/mongobox>`_ is a tool to run a sandboxed
MongoDB instance from within a python app.
* `Flask-MongoAlchemy <https://github.com/cobrateam/flask-mongoalchemy/>`_ Add
Flask support for MongoDB using MongoAlchemy.
* `Flask-MongoKit <https://github.com/jarus/flask-mongokit/>`_ Flask extension
to better integrate MongoKit into Flask.
* `Flask-PyMongo <https://github.com/dcrosta/flask-pymongo/>`_ Flask-PyMongo
bridges Flask and PyMongo.
Alternative Drivers
-------------------
These are alternatives to PyMongo.
* `Motor <https://github.com/mongodb/motor>`_ is a full-featured, non-blocking
MongoDB driver for Python Tornado applications.
As of PyMongo v4.13, Motor's features have been merged into PyMongo via the new AsyncMongoClient API.
As a result of this merger, Motor will be officially deprecated on May 14th, 2026.
For more information, see `the official PyMongo docs <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/reference/migration/>`_.
* `TxMongo <https://github.com/twisted/txmongo>`_ is an asynchronous Twisted
Python driver for MongoDB.
* `MongoMock <https://github.com/mongomock/mongomock>`_ is a small
library to help testing Python code that interacts with MongoDB via
Pymongo.

View File

@ -1,413 +0,0 @@
Tutorial
========
.. testsetup::
from pymongo import MongoClient
client = MongoClient()
client.drop_database("test-database")
This tutorial is intended as an introduction to working with
**MongoDB** and **PyMongo**.
Prerequisites
-------------
Before we start, make sure that you have the **PyMongo** distribution
:doc:`installed <installation>`. In the Python shell, the following
should run without raising an exception:
.. doctest::
>>> import pymongo
This tutorial also assumes that a MongoDB instance is running on the
default host and port. Assuming you have `downloaded and installed
<https://www.mongodb.com/docs/manual/installation/>`_ MongoDB, you
can start it like so:
.. code-block:: bash
$ mongod
Making a Connection with MongoClient
------------------------------------
The first step when working with **PyMongo** is to create a
:class:`~pymongo.mongo_client.MongoClient` to the running **mongod**
instance. Doing so is easy:
.. doctest::
>>> from pymongo import MongoClient
>>> client = MongoClient()
The above code will connect on the default host and port. We can also
specify the host and port explicitly, as follows:
.. doctest::
>>> client = MongoClient("localhost", 27017)
Or use the MongoDB URI format:
.. doctest::
>>> client = MongoClient("mongodb://localhost:27017/")
Getting a Database
------------------
A single instance of MongoDB can support multiple independent
`databases <https://www.mongodb.com/docs/manual/core/databases-and-collections>`_. When
working with PyMongo you access databases using attribute style access
on :class:`~pymongo.mongo_client.MongoClient` instances:
.. doctest::
>>> db = client.test_database
If your database name is such that using attribute style access won't
work (like ``test-database``), you can use dictionary style access
instead:
.. doctest::
>>> db = client["test-database"]
Getting a Collection
--------------------
A `collection <https://www.mongodb.com/docs/manual/core/databases-and-collections>`_ is a
group of documents stored in MongoDB, and can be thought of as roughly
the equivalent of a table in a relational database. Getting a
collection in PyMongo works the same as getting a database:
.. doctest::
>>> collection = db.test_collection
or (using dictionary style access):
.. doctest::
>>> collection = db["test-collection"]
An important note about collections (and databases) in MongoDB is that
they are created lazily - none of the above commands have actually
performed any operations on the MongoDB server. Collections and
databases are created when the first document is inserted into them.
Documents
---------
Data in MongoDB is represented (and stored) using JSON-style
documents. In PyMongo we use dictionaries to represent documents. As
an example, the following dictionary might be used to represent a blog
post:
.. doctest::
>>> import datetime
>>> post = {
... "author": "Mike",
... "text": "My first blog post!",
... "tags": ["mongodb", "python", "pymongo"],
... "date": datetime.datetime.now(tz=datetime.timezone.utc),
... }
Note that documents can contain native Python types (like
:class:`datetime.datetime` instances) which will be automatically
converted to and from the appropriate `BSON
<https://bsonspec.org/>`_ types.
.. todo:: link to table of Python <-> BSON types
Inserting a Document
--------------------
To insert a document into a collection we can use the
:meth:`~pymongo.collection.Collection.insert_one` method:
.. doctest::
>>> posts = db.posts
>>> post_id = posts.insert_one(post).inserted_id
>>> post_id
ObjectId('...')
When a document is inserted a special key, ``"_id"``, is automatically
added if the document doesn't already contain an ``"_id"`` key. The value
of ``"_id"`` must be unique across the
collection. :meth:`~pymongo.collection.Collection.insert_one` returns an
instance of :class:`~pymongo.results.InsertOneResult`. For more information
on ``"_id"``, see the `documentation on _id
<https://www.mongodb.com/docs/manual/reference/method/ObjectId/>`_.
After inserting the first document, the *posts* collection has
actually been created on the server. We can verify this by listing all
of the collections in our database:
.. doctest::
>>> db.list_collection_names()
['posts']
Getting a Single Document With :meth:`~pymongo.collection.Collection.find_one`
------------------------------------------------------------------------------
The most basic type of query that can be performed in MongoDB is
:meth:`~pymongo.collection.Collection.find_one`. This method returns a
single document matching a query (or ``None`` if there are no
matches). It is useful when you know there is only one matching
document, or are only interested in the first match. Here we use
:meth:`~pymongo.collection.Collection.find_one` to get the first
document from the posts collection:
.. doctest::
>>> import pprint
>>> pprint.pprint(posts.find_one())
{'_id': ObjectId('...'),
'author': 'Mike',
'date': datetime.datetime(...),
'tags': ['mongodb', 'python', 'pymongo'],
'text': 'My first blog post!'}
The result is a dictionary matching the one that we inserted previously.
.. note:: The returned document contains an ``"_id"``, which was
automatically added on insert.
:meth:`~pymongo.collection.Collection.find_one` also supports querying
on specific elements that the resulting document must match. To limit
our results to a document with author "Mike" we do:
.. doctest::
>>> pprint.pprint(posts.find_one({"author": "Mike"}))
{'_id': ObjectId('...'),
'author': 'Mike',
'date': datetime.datetime(...),
'tags': ['mongodb', 'python', 'pymongo'],
'text': 'My first blog post!'}
If we try with a different author, like "Eliot", we'll get no result:
.. doctest::
>>> posts.find_one({"author": "Eliot"})
>>>
.. _querying-by-objectid:
Querying By ObjectId
--------------------
We can also find a post by its ``_id``, which in our example is an ObjectId:
.. doctest::
>>> post_id
ObjectId(...)
>>> pprint.pprint(posts.find_one({"_id": post_id}))
{'_id': ObjectId('...'),
'author': 'Mike',
'date': datetime.datetime(...),
'tags': ['mongodb', 'python', 'pymongo'],
'text': 'My first blog post!'}
Note that an ObjectId is not the same as its string representation:
.. doctest::
>>> post_id_as_str = str(post_id)
>>> posts.find_one({"_id": post_id_as_str}) # No result
>>>
A common task in web applications is to get an ObjectId from the
request URL and find the matching document. It's necessary in this
case to **convert the ObjectId from a string** before passing it to
``find_one``::
from bson.objectid import ObjectId
# The web framework gets post_id from the URL and passes it as a string
def get(post_id):
# Convert from string to ObjectId:
document = client.db.collection.find_one({'_id': ObjectId(post_id)})
.. seealso:: :ref:`web-application-querying-by-objectid`
Bulk Inserts
------------
In order to make querying a little more interesting, let's insert a
few more documents. In addition to inserting a single document, we can
also perform *bulk insert* operations, by passing a list as the
first argument to :meth:`~pymongo.collection.Collection.insert_many`.
This will insert each document in the list, sending only a single
command to the server:
.. doctest::
>>> new_posts = [
... {
... "author": "Mike",
... "text": "Another post!",
... "tags": ["bulk", "insert"],
... "date": datetime.datetime(2009, 11, 12, 11, 14),
... },
... {
... "author": "Eliot",
... "title": "MongoDB is fun",
... "text": "and pretty easy too!",
... "date": datetime.datetime(2009, 11, 10, 10, 45),
... },
... ]
>>> result = posts.insert_many(new_posts)
>>> result.inserted_ids
[ObjectId('...'), ObjectId('...')]
There are a couple of interesting things to note about this example:
- The result from :meth:`~pymongo.collection.Collection.insert_many` now
returns two :class:`~bson.objectid.ObjectId` instances, one for
each inserted document.
- ``new_posts[1]`` has a different "shape" than the other posts -
there is no ``"tags"`` field and we've added a new field,
``"title"``. This is what we mean when we say that MongoDB is
*schema-free*.
Querying for More Than One Document
-----------------------------------
To get more than a single document as the result of a query we use the
:meth:`~pymongo.collection.Collection.find`
method. :meth:`~pymongo.collection.Collection.find` returns a
:class:`~pymongo.cursor.Cursor` instance, which allows us to iterate
over all matching documents. For example, we can iterate over every
document in the ``posts`` collection:
.. doctest::
>>> for post in posts.find():
... pprint.pprint(post)
...
{'_id': ObjectId('...'),
'author': 'Mike',
'date': datetime.datetime(...),
'tags': ['mongodb', 'python', 'pymongo'],
'text': 'My first blog post!'}
{'_id': ObjectId('...'),
'author': 'Mike',
'date': datetime.datetime(...),
'tags': ['bulk', 'insert'],
'text': 'Another post!'}
{'_id': ObjectId('...'),
'author': 'Eliot',
'date': datetime.datetime(...),
'text': 'and pretty easy too!',
'title': 'MongoDB is fun'}
Just like we did with :meth:`~pymongo.collection.Collection.find_one`,
we can pass a document to :meth:`~pymongo.collection.Collection.find`
to limit the returned results. Here, we get only those documents whose
author is "Mike":
.. doctest::
>>> for post in posts.find({"author": "Mike"}):
... pprint.pprint(post)
...
{'_id': ObjectId('...'),
'author': 'Mike',
'date': datetime.datetime(...),
'tags': ['mongodb', 'python', 'pymongo'],
'text': 'My first blog post!'}
{'_id': ObjectId('...'),
'author': 'Mike',
'date': datetime.datetime(...),
'tags': ['bulk', 'insert'],
'text': 'Another post!'}
Counting
--------
If we just want to know how many documents match a query we can
perform a :meth:`~pymongo.collection.Collection.count_documents` operation
instead of a full query. We can get a count of all of the documents
in a collection:
.. doctest::
>>> posts.count_documents({})
3
or just of those documents that match a specific query:
.. doctest::
>>> posts.count_documents({"author": "Mike"})
2
Range Queries
-------------
MongoDB supports many different types of `advanced queries
<https://www.mongodb.com/docs/manual/reference/operator/>`_. As an
example, lets perform a query where we limit results to posts older
than a certain date, but also sort the results by author:
.. doctest::
>>> d = datetime.datetime(2009, 11, 12, 12)
>>> for post in posts.find({"date": {"$lt": d}}).sort("author"):
... pprint.pprint(post)
...
{'_id': ObjectId('...'),
'author': 'Eliot',
'date': datetime.datetime(...),
'text': 'and pretty easy too!',
'title': 'MongoDB is fun'}
{'_id': ObjectId('...'),
'author': 'Mike',
'date': datetime.datetime(...),
'tags': ['bulk', 'insert'],
'text': 'Another post!'}
Here we use the special ``"$lt"`` operator to do a range query, and
also call :meth:`~pymongo.cursor.Cursor.sort` to sort the results
by author.
Indexing
--------
Adding indexes can help accelerate certain queries and can also add additional
functionality to querying and storing documents. In this example, we'll
demonstrate how to create a `unique index
<https://mongodb.com/docs/manual/core/index-unique/>`_ on a key that rejects
documents whose value for that key already exists in the index.
First, we'll need to create the index:
.. doctest::
>>> result = db.profiles.create_index([("user_id", pymongo.ASCENDING)], unique=True)
>>> sorted(list(db.profiles.index_information()))
['_id_', 'user_id_1']
Notice that we have two indexes now: one is the index on ``_id`` that MongoDB
creates automatically, and the other is the index on ``user_id`` we just
created.
Now let's set up some user profiles:
.. doctest::
>>> user_profiles = [{"user_id": 211, "name": "Luke"}, {"user_id": 212, "name": "Ziltoid"}]
>>> result = db.profiles.insert_many(user_profiles)
The index prevents us from inserting a document whose ``user_id`` is already in
the collection:
.. doctest::
:options: +IGNORE_EXCEPTION_DETAIL
>>> new_profile = {"user_id": 213, "name": "Drew"}
>>> duplicate_profile = {"user_id": 212, "name": "Tommy"}
>>> result = db.profiles.insert_one(new_profile) # This is fine.
>>> result = db.profiles.insert_one(duplicate_profile)
Traceback (most recent call last):
DuplicateKeyError: E11000 duplicate key error index: test_database.profiles.$user_id_1 dup key: { : 212 }
.. seealso:: The MongoDB documentation on `indexes <https://www.mongodb.com/docs/manual/indexes/>`_

View File

@ -70,7 +70,7 @@ def _disallow_transactions(session: Optional[AsyncClientSession]) -> None:
class AsyncGridFS:
"""An instance of GridFS on top of a single Database."""
def __init__(self, database: AsyncDatabase, collection: str = "fs"):
def __init__(self, database: AsyncDatabase[Any], collection: str = "fs"):
"""Create a new instance of :class:`GridFS`.
Raises :class:`TypeError` if `database` is not an instance of
@ -463,7 +463,7 @@ class AsyncGridFSBucket:
def __init__(
self,
db: AsyncDatabase,
db: AsyncDatabase[Any],
bucket_name: str = "fs",
chunk_size_bytes: int = DEFAULT_CHUNK_SIZE,
write_concern: Optional[WriteConcern] = None,
@ -513,11 +513,11 @@ class AsyncGridFSBucket:
self._bucket_name = bucket_name
self._collection = db[bucket_name]
self._chunks: AsyncCollection = self._collection.chunks.with_options(
self._chunks: AsyncCollection[Any] = self._collection.chunks.with_options(
write_concern=write_concern, read_preference=read_preference
)
self._files: AsyncCollection = self._collection.files.with_options(
self._files: AsyncCollection[Any] = self._collection.files.with_options(
write_concern=write_concern, read_preference=read_preference
)
@ -1085,7 +1085,7 @@ class AsyncGridIn:
def __init__(
self,
root_collection: AsyncCollection,
root_collection: AsyncCollection[Any],
session: Optional[AsyncClientSession] = None,
**kwargs: Any,
) -> None:
@ -1172,7 +1172,7 @@ class AsyncGridIn:
object.__setattr__(self, "_buffered_docs_size", 0)
async def _create_index(
self, collection: AsyncCollection, index_key: Any, unique: bool
self, collection: AsyncCollection[Any], index_key: Any, unique: bool
) -> None:
doc = await collection.find_one(projection={"_id": 1}, session=self._session)
if doc is None:
@ -1456,7 +1456,7 @@ class AsyncGridOut(GRIDOUT_BASE_CLASS): # type: ignore
def __init__(
self,
root_collection: AsyncCollection,
root_collection: AsyncCollection[Any],
file_id: Optional[int] = None,
file_document: Optional[Any] = None,
session: Optional[AsyncClientSession] = None,
@ -1829,7 +1829,7 @@ class _AsyncGridOutChunkIterator:
def __init__(
self,
grid_out: AsyncGridOut,
chunks: AsyncCollection,
chunks: AsyncCollection[Any],
session: Optional[AsyncClientSession],
next_chunk: Any,
) -> None:
@ -1842,7 +1842,7 @@ class _AsyncGridOutChunkIterator:
self._num_chunks = math.ceil(float(self._length) / self._chunk_size)
self._cursor = None
_cursor: Optional[AsyncCursor]
_cursor: Optional[AsyncCursor[Any]]
def expected_chunk_length(self, chunk_n: int) -> int:
if chunk_n < self._num_chunks - 1:
@ -1921,7 +1921,7 @@ class _AsyncGridOutChunkIterator:
class AsyncGridOutIterator:
def __init__(
self, grid_out: AsyncGridOut, chunks: AsyncCollection, session: AsyncClientSession
self, grid_out: AsyncGridOut, chunks: AsyncCollection[Any], session: AsyncClientSession
):
self._chunk_iter = _AsyncGridOutChunkIterator(grid_out, chunks, session, 0)
@ -1935,14 +1935,14 @@ class AsyncGridOutIterator:
__anext__ = next
class AsyncGridOutCursor(AsyncCursor):
class AsyncGridOutCursor(AsyncCursor): # type: ignore[type-arg]
"""A cursor / iterator for returning GridOut objects as the result
of an arbitrary query against the GridFS files collection.
"""
def __init__(
self,
collection: AsyncCollection,
collection: AsyncCollection[Any],
filter: Optional[Mapping[str, Any]] = None,
skip: int = 0,
limit: int = 0,

View File

@ -70,7 +70,7 @@ def _disallow_transactions(session: Optional[ClientSession]) -> None:
class GridFS:
"""An instance of GridFS on top of a single Database."""
def __init__(self, database: Database, collection: str = "fs"):
def __init__(self, database: Database[Any], collection: str = "fs"):
"""Create a new instance of :class:`GridFS`.
Raises :class:`TypeError` if `database` is not an instance of
@ -461,7 +461,7 @@ class GridFSBucket:
def __init__(
self,
db: Database,
db: Database[Any],
bucket_name: str = "fs",
chunk_size_bytes: int = DEFAULT_CHUNK_SIZE,
write_concern: Optional[WriteConcern] = None,
@ -511,11 +511,11 @@ class GridFSBucket:
self._bucket_name = bucket_name
self._collection = db[bucket_name]
self._chunks: Collection = self._collection.chunks.with_options(
self._chunks: Collection[Any] = self._collection.chunks.with_options(
write_concern=write_concern, read_preference=read_preference
)
self._files: Collection = self._collection.files.with_options(
self._files: Collection[Any] = self._collection.files.with_options(
write_concern=write_concern, read_preference=read_preference
)
@ -1077,7 +1077,7 @@ class GridIn:
def __init__(
self,
root_collection: Collection,
root_collection: Collection[Any],
session: Optional[ClientSession] = None,
**kwargs: Any,
) -> None:
@ -1163,7 +1163,7 @@ class GridIn:
object.__setattr__(self, "_buffered_docs", [])
object.__setattr__(self, "_buffered_docs_size", 0)
def _create_index(self, collection: Collection, index_key: Any, unique: bool) -> None:
def _create_index(self, collection: Collection[Any], index_key: Any, unique: bool) -> None:
doc = collection.find_one(projection={"_id": 1}, session=self._session)
if doc is None:
try:
@ -1444,7 +1444,7 @@ class GridOut(GRIDOUT_BASE_CLASS): # type: ignore
def __init__(
self,
root_collection: Collection,
root_collection: Collection[Any],
file_id: Optional[int] = None,
file_document: Optional[Any] = None,
session: Optional[ClientSession] = None,
@ -1817,7 +1817,7 @@ class GridOutChunkIterator:
def __init__(
self,
grid_out: GridOut,
chunks: Collection,
chunks: Collection[Any],
session: Optional[ClientSession],
next_chunk: Any,
) -> None:
@ -1830,7 +1830,7 @@ class GridOutChunkIterator:
self._num_chunks = math.ceil(float(self._length) / self._chunk_size)
self._cursor = None
_cursor: Optional[Cursor]
_cursor: Optional[Cursor[Any]]
def expected_chunk_length(self, chunk_n: int) -> int:
if chunk_n < self._num_chunks - 1:
@ -1908,7 +1908,7 @@ class GridOutChunkIterator:
class GridOutIterator:
def __init__(self, grid_out: GridOut, chunks: Collection, session: ClientSession):
def __init__(self, grid_out: GridOut, chunks: Collection[Any], session: ClientSession):
self._chunk_iter = GridOutChunkIterator(grid_out, chunks, session, 0)
def __iter__(self) -> GridOutIterator:
@ -1921,14 +1921,14 @@ class GridOutIterator:
__next__ = next
class GridOutCursor(Cursor):
class GridOutCursor(Cursor): # type: ignore[type-arg]
"""A cursor / iterator for returning GridOut objects as the result
of an arbitrary query against the GridFS files collection.
"""
def __init__(
self,
collection: Collection,
collection: Collection[Any],
filter: Optional[Mapping[str, Any]] = None,
skip: int = 0,
limit: int = 0,

View File

@ -165,7 +165,7 @@ def timeout(seconds: Optional[float]) -> ContextManager[None]:
:raises: :py:class:`ValueError`: When `seconds` is negative.
See :ref:`timeout-example` for more examples.
See `Limit Server Execution Time <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/connect/connection-options/csot/#overview>`_ for more examples.
.. versionadded:: 4.2
"""

View File

@ -93,7 +93,7 @@ class Lock(_ContextManagerMixin, _LoopBoundMixin):
"""
def __init__(self) -> None:
self._waiters: Optional[collections.deque] = None
self._waiters: Optional[collections.deque[Any]] = None
self._locked = False
def __repr__(self) -> str:
@ -196,7 +196,7 @@ class Condition(_ContextManagerMixin, _LoopBoundMixin):
self.acquire = lock.acquire
self.release = lock.release
self._waiters: collections.deque = collections.deque()
self._waiters: collections.deque[Any] = collections.deque()
def __repr__(self) -> str:
res = super().__repr__()
@ -260,7 +260,7 @@ class Condition(_ContextManagerMixin, _LoopBoundMixin):
self._notify(1)
raise
async def wait_for(self, predicate: Any) -> Coroutine:
async def wait_for(self, predicate: Any) -> Coroutine[Any, Any, Any]:
"""Wait until a predicate becomes true.
The predicate should be a callable whose result will be

View File

@ -24,7 +24,7 @@ from typing import Any, Coroutine, Optional
# TODO (https://jira.mongodb.org/browse/PYTHON-4981): Revisit once the underlying cause of the swallowed cancellations is uncovered
class _Task(asyncio.Task):
class _Task(asyncio.Task[Any]):
def __init__(self, coro: Coroutine[Any, Any, Any], *, name: Optional[str] = None) -> None:
super().__init__(coro, name=name)
self._cancel_requests = 0
@ -43,7 +43,7 @@ class _Task(asyncio.Task):
return self._cancel_requests
def create_task(coro: Coroutine[Any, Any, Any], *, name: Optional[str] = None) -> asyncio.Task:
def create_task(coro: Coroutine[Any, Any, Any], *, name: Optional[str] = None) -> asyncio.Task[Any]:
if sys.version_info >= (3, 11):
return asyncio.create_task(coro, name=name)
return _Task(coro, name=name)

View File

@ -68,7 +68,7 @@ def clamp_remaining(max_timeout: float) -> float:
return min(timeout, max_timeout)
class _TimeoutContext(AbstractContextManager):
class _TimeoutContext(AbstractContextManager[Any]):
"""Internal timeout context manager.
Use :func:`pymongo.timeout` instead::

View File

@ -18,7 +18,7 @@ from __future__ import annotations
import re
from typing import List, Tuple, Union
__version__ = "4.14.0.dev0"
__version__ = "4.15.0.dev0"
def get_version_tuple(version: str) -> Tuple[Union[int, str], ...]:

View File

@ -46,8 +46,8 @@ class _AggregationCommand:
def __init__(
self,
target: Union[AsyncDatabase, AsyncCollection],
cursor_class: type[AsyncCommandCursor],
target: Union[AsyncDatabase[Any], AsyncCollection[Any]],
cursor_class: type[AsyncCommandCursor[Any]],
pipeline: _Pipeline,
options: MutableMapping[str, Any],
explicit_session: bool,
@ -111,12 +111,12 @@ class _AggregationCommand:
"""The namespace in which the aggregate command is run."""
raise NotImplementedError
def _cursor_collection(self, cursor_doc: Mapping[str, Any]) -> AsyncCollection:
def _cursor_collection(self, cursor_doc: Mapping[str, Any]) -> AsyncCollection[Any]:
"""The AsyncCollection used for the aggregate command cursor."""
raise NotImplementedError
@property
def _database(self) -> AsyncDatabase:
def _database(self) -> AsyncDatabase[Any]:
"""The database against which the aggregation command is run."""
raise NotImplementedError
@ -205,7 +205,7 @@ class _AggregationCommand:
class _CollectionAggregationCommand(_AggregationCommand):
_target: AsyncCollection
_target: AsyncCollection[Any]
@property
def _aggregation_target(self) -> str:
@ -215,12 +215,12 @@ class _CollectionAggregationCommand(_AggregationCommand):
def _cursor_namespace(self) -> str:
return self._target.full_name
def _cursor_collection(self, cursor: Mapping[str, Any]) -> AsyncCollection:
def _cursor_collection(self, cursor: Mapping[str, Any]) -> AsyncCollection[Any]:
"""The AsyncCollection used for the aggregate command cursor."""
return self._target
@property
def _database(self) -> AsyncDatabase:
def _database(self) -> AsyncDatabase[Any]:
return self._target.database
@ -234,7 +234,7 @@ class _CollectionRawAggregationCommand(_CollectionAggregationCommand):
class _DatabaseAggregationCommand(_AggregationCommand):
_target: AsyncDatabase
_target: AsyncDatabase[Any]
@property
def _aggregation_target(self) -> int:
@ -245,10 +245,10 @@ class _DatabaseAggregationCommand(_AggregationCommand):
return f"{self._target.name}.$cmd.aggregate"
@property
def _database(self) -> AsyncDatabase:
def _database(self) -> AsyncDatabase[Any]:
return self._target
def _cursor_collection(self, cursor: Mapping[str, Any]) -> AsyncCollection:
def _cursor_collection(self, cursor: Mapping[str, Any]) -> AsyncCollection[Any]:
"""The AsyncCollection used for the aggregate command cursor."""
# AsyncCollection level aggregate may not always return the "ns" field
# according to our MockupDB tests. Let's handle that case for db level

View File

@ -259,7 +259,7 @@ class _OIDCAuthenticator:
) -> Mapping[str, Any]:
self.access_token = None
self.refresh_token = None
start_payload: dict = bson.decode(start_resp["payload"])
start_payload: dict[str, Any] = bson.decode(start_resp["payload"])
if "issuer" in start_payload:
self.idp_info = OIDCIdPInfo(**start_payload)
access_token = await self._get_access_token()

View File

@ -248,7 +248,7 @@ class _AsyncBulk:
request_id: int,
msg: bytes,
docs: list[Mapping[str, Any]],
client: AsyncMongoClient,
client: AsyncMongoClient[Any],
) -> dict[str, Any]:
"""A proxy for SocketInfo.write_command that handles event publishing."""
cmd[bwc.field] = docs
@ -334,7 +334,7 @@ class _AsyncBulk:
msg: bytes,
max_doc_size: int,
docs: list[Mapping[str, Any]],
client: AsyncMongoClient,
client: AsyncMongoClient[Any],
) -> Optional[Mapping[str, Any]]:
"""A proxy for AsyncConnection.unack_write that handles event publishing."""
if _COMMAND_LOGGER.isEnabledFor(logging.DEBUG):
@ -419,7 +419,7 @@ class _AsyncBulk:
bwc: Union[_BulkWriteContext, _EncryptedBulkWriteContext],
cmd: dict[str, Any],
ops: list[Mapping[str, Any]],
client: AsyncMongoClient,
client: AsyncMongoClient[Any],
) -> list[Mapping[str, Any]]:
if self.is_encrypted:
_, batched_cmd, to_send = bwc.batch_command(cmd, ops)
@ -446,7 +446,7 @@ class _AsyncBulk:
bwc: Union[_BulkWriteContext, _EncryptedBulkWriteContext],
cmd: dict[str, Any],
ops: list[Mapping[str, Any]],
client: AsyncMongoClient,
client: AsyncMongoClient[Any],
) -> tuple[dict[str, Any], list[Mapping[str, Any]]]:
if self.is_encrypted:
_, batched_cmd, to_send = bwc.batch_command(cmd, ops)

View File

@ -164,7 +164,7 @@ class AsyncChangeStream(Generic[_DocumentType]):
raise NotImplementedError
@property
def _client(self) -> AsyncMongoClient:
def _client(self) -> AsyncMongoClient: # type: ignore[type-arg]
"""The client against which the aggregation commands for
this AsyncChangeStream will be run.
"""
@ -206,7 +206,7 @@ class AsyncChangeStream(Generic[_DocumentType]):
def _aggregation_pipeline(self) -> list[dict[str, Any]]:
"""Return the full aggregation pipeline for this AsyncChangeStream."""
options = self._change_stream_options()
full_pipeline: list = [{"$changeStream": options}]
full_pipeline: list[dict[str, Any]] = [{"$changeStream": options}]
full_pipeline.extend(self._pipeline)
return full_pipeline
@ -237,7 +237,7 @@ class AsyncChangeStream(Generic[_DocumentType]):
async def _run_aggregation_cmd(
self, session: Optional[AsyncClientSession], explicit_session: bool
) -> AsyncCommandCursor:
) -> AsyncCommandCursor: # type: ignore[type-arg]
"""Run the full aggregation pipeline for this AsyncChangeStream and return
the corresponding AsyncCommandCursor.
"""
@ -257,7 +257,7 @@ class AsyncChangeStream(Generic[_DocumentType]):
operation=_Op.AGGREGATE,
)
async def _create_cursor(self) -> AsyncCommandCursor:
async def _create_cursor(self) -> AsyncCommandCursor: # type: ignore[type-arg]
async with self._client._tmp_session(self._session, close=False) as s:
return await self._run_aggregation_cmd(
session=s, explicit_session=self._session is not None

View File

@ -88,7 +88,7 @@ class _AsyncClientBulk:
def __init__(
self,
client: AsyncMongoClient,
client: AsyncMongoClient[Any],
write_concern: WriteConcern,
ordered: bool = True,
bypass_document_validation: Optional[bool] = None,
@ -233,7 +233,7 @@ class _AsyncClientBulk:
msg: Union[bytes, dict[str, Any]],
op_docs: list[Mapping[str, Any]],
ns_docs: list[Mapping[str, Any]],
client: AsyncMongoClient,
client: AsyncMongoClient[Any],
) -> dict[str, Any]:
"""A proxy for AsyncConnection.write_command that handles event publishing."""
cmd["ops"] = op_docs
@ -324,7 +324,7 @@ class _AsyncClientBulk:
msg: bytes,
op_docs: list[Mapping[str, Any]],
ns_docs: list[Mapping[str, Any]],
client: AsyncMongoClient,
client: AsyncMongoClient[Any],
) -> Optional[Mapping[str, Any]]:
"""A proxy for AsyncConnection.unack_write that handles event publishing."""
if _COMMAND_LOGGER.isEnabledFor(logging.DEBUG):

View File

@ -396,7 +396,7 @@ class _TxnState:
class _Transaction:
"""Internal class to hold transaction information in a AsyncClientSession."""
def __init__(self, opts: Optional[TransactionOptions], client: AsyncMongoClient):
def __init__(self, opts: Optional[TransactionOptions], client: AsyncMongoClient[Any]):
self.opts = opts
self.state = _TxnState.NONE
self.sharded = False
@ -459,7 +459,7 @@ def _max_time_expired_error(exc: PyMongoError) -> bool:
# From the transactions spec, all the retryable writes errors plus
# WriteConcernTimeout.
_UNKNOWN_COMMIT_ERROR_CODES: frozenset = _RETRYABLE_ERROR_CODES | frozenset(
_UNKNOWN_COMMIT_ERROR_CODES: frozenset = _RETRYABLE_ERROR_CODES | frozenset( # type: ignore[type-arg]
[
64, # WriteConcernTimeout
50, # MaxTimeMSExpired
@ -499,13 +499,13 @@ class AsyncClientSession:
def __init__(
self,
client: AsyncMongoClient,
client: AsyncMongoClient[Any],
server_session: Any,
options: SessionOptions,
implicit: bool,
) -> None:
# An AsyncMongoClient, a _ServerSession, a SessionOptions, and a set.
self._client: AsyncMongoClient = client
self._client: AsyncMongoClient[Any] = client
self._server_session = server_session
self._options = options
self._cluster_time: Optional[Mapping[str, Any]] = None
@ -551,7 +551,7 @@ class AsyncClientSession:
await self._end_session(lock=True)
@property
def client(self) -> AsyncMongoClient:
def client(self) -> AsyncMongoClient[Any]:
"""The :class:`~pymongo.asynchronous.mongo_client.AsyncMongoClient` this session was
created from.
"""
@ -751,7 +751,7 @@ class AsyncClientSession:
write_concern: Optional[WriteConcern] = None,
read_preference: Optional[_ServerMode] = None,
max_commit_time_ms: Optional[int] = None,
) -> AsyncContextManager:
) -> AsyncContextManager[Any]:
"""Start a multi-statement transaction.
Takes the same arguments as :class:`TransactionOptions`.
@ -1123,7 +1123,7 @@ class _ServerSession:
self._transaction_id += 1
class _ServerSessionPool(collections.deque):
class _ServerSessionPool(collections.deque): # type: ignore[type-arg]
"""Pool of _ServerSession objects.
This class is thread-safe.

View File

@ -581,7 +581,7 @@ class AsyncCollection(common.BaseObject, Generic[_DocumentType]):
conn: AsyncConnection,
command: MutableMapping[str, Any],
read_preference: Optional[_ServerMode] = None,
codec_options: Optional[CodecOptions] = None,
codec_options: Optional[CodecOptions[Mapping[str, Any]]] = None,
check: bool = True,
allowable_errors: Optional[Sequence[Union[str, int]]] = None,
read_concern: Optional[ReadConcern] = None,
@ -704,7 +704,7 @@ class AsyncCollection(common.BaseObject, Generic[_DocumentType]):
bypass_document_validation: Optional[bool] = None,
session: Optional[AsyncClientSession] = None,
comment: Optional[Any] = None,
let: Optional[Mapping] = None,
let: Optional[Mapping[str, Any]] = None,
) -> BulkWriteResult:
"""Send a batch of write operations to the server.
@ -762,7 +762,7 @@ class AsyncCollection(common.BaseObject, Generic[_DocumentType]):
:return: An instance of :class:`~pymongo.results.BulkWriteResult`.
.. seealso:: :ref:`writes-and-ids`
.. seealso:: `Writes and ids <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/crud/insert/#overview>`_
.. note:: `bypass_document_validation` requires server version
**>= 3.2**
@ -867,7 +867,7 @@ class AsyncCollection(common.BaseObject, Generic[_DocumentType]):
:return: - An instance of :class:`~pymongo.results.InsertOneResult`.
.. seealso:: :ref:`writes-and-ids`
.. seealso:: `Writes and ids <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/crud/insert/#overview>`_
.. note:: `bypass_document_validation` requires server version
**>= 3.2**
@ -936,7 +936,7 @@ class AsyncCollection(common.BaseObject, Generic[_DocumentType]):
:return: An instance of :class:`~pymongo.results.InsertManyResult`.
.. seealso:: :ref:`writes-and-ids`
.. seealso:: `Writes and ids <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/crud/insert/#overview>`_
.. note:: `bypass_document_validation` requires server version
**>= 3.2**
@ -2041,7 +2041,7 @@ class AsyncCollection(common.BaseObject, Generic[_DocumentType]):
.. versionchanged:: 4.2
This method now always uses the `count`_ command. Due to an oversight in versions
5.0.0-5.0.8 of MongoDB, the count command was not included in V1 of the
:ref:`versioned-api-ref`. Users of the Stable API with estimated_document_count are
`versioned API <https://www.mongodb.com/docs/manual/reference/stable-api/#what-is-the-stable-api--and-should-you-use-it->`_. Users of the Stable API with estimated_document_count are
recommended to upgrade their server version to 5.0.9+ or set
:attr:`pymongo.server_api.ServerApi.strict` to ``False`` to avoid encountering errors.
@ -2525,7 +2525,7 @@ class AsyncCollection(common.BaseObject, Generic[_DocumentType]):
session: Optional[AsyncClientSession] = None,
comment: Optional[Any] = None,
) -> AsyncCommandCursor[MutableMapping[str, Any]]:
codec_options: CodecOptions = CodecOptions(SON)
codec_options: CodecOptions[Mapping[str, Any]] = CodecOptions(SON)
coll = cast(
AsyncCollection[MutableMapping[str, Any]],
self.with_options(codec_options=codec_options, read_preference=ReadPreference.PRIMARY),
@ -2871,7 +2871,7 @@ class AsyncCollection(common.BaseObject, Generic[_DocumentType]):
self,
aggregation_command: Type[_AggregationCommand],
pipeline: _Pipeline,
cursor_class: Type[AsyncCommandCursor],
cursor_class: Type[AsyncCommandCursor], # type: ignore[type-arg]
session: Optional[AsyncClientSession],
explicit_session: bool,
let: Optional[Mapping[str, Any]] = None,
@ -2916,7 +2916,7 @@ class AsyncCollection(common.BaseObject, Generic[_DocumentType]):
.. note:: This method does not support the 'explain' option. Please
use `PyMongoExplain <https://pypi.org/project/pymongoexplain/>`_
instead. An example is included in the :ref:`aggregate-examples`
instead. An example is included in the `aggregation example <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/aggregation/#aggregation-example>`_
documentation.
.. note:: The :attr:`~pymongo.asynchronous.collection.AsyncCollection.write_concern` of
@ -2977,7 +2977,7 @@ class AsyncCollection(common.BaseObject, Generic[_DocumentType]):
The :meth:`aggregate` method always returns an AsyncCommandCursor. The
pipeline argument must be a list.
.. seealso:: :doc:`/examples/aggregation`
.. seealso:: `Aggregation <https://mongodb.com/docs/manual/applications/aggregation/>`_
.. _aggregate command:
https://mongodb.com/docs/manual/reference/command/aggregate
@ -3114,7 +3114,7 @@ class AsyncCollection(common.BaseObject, Generic[_DocumentType]):
comment: Optional[Any] = None,
hint: Optional[_IndexKeyHint] = None,
**kwargs: Any,
) -> list:
) -> list[str]:
"""Get a list of distinct values for `key` among all documents
in this collection.
@ -3177,7 +3177,7 @@ class AsyncCollection(common.BaseObject, Generic[_DocumentType]):
_server: Server,
conn: AsyncConnection,
read_preference: Optional[_ServerMode],
) -> list:
) -> list: # type: ignore[type-arg]
return (
await self._command(
conn,
@ -3202,7 +3202,7 @@ class AsyncCollection(common.BaseObject, Generic[_DocumentType]):
array_filters: Optional[Sequence[Mapping[str, Any]]] = None,
hint: Optional[_IndexKeyHint] = None,
session: Optional[AsyncClientSession] = None,
let: Optional[Mapping] = None,
let: Optional[Mapping[str, Any]] = None,
**kwargs: Any,
) -> Any:
"""Internal findAndModify helper."""

View File

@ -350,7 +350,7 @@ class AsyncCommandCursor(Generic[_DocumentType]):
else:
return None
async def _next_batch(self, result: list, total: Optional[int] = None) -> bool:
async def _next_batch(self, result: list, total: Optional[int] = None) -> bool: # type: ignore[type-arg]
"""Get all or some available documents from the cursor."""
if not len(self._data) and not self._killed:
await self._refresh()
@ -457,7 +457,7 @@ class AsyncRawBatchCommandCursor(AsyncCommandCursor[_DocumentType]):
self,
response: Union[_OpReply, _OpMsg],
cursor_id: Optional[int],
codec_options: CodecOptions,
codec_options: CodecOptions[dict[str, Any]],
user_fields: Optional[Mapping[str, Any]] = None,
legacy_response: bool = False,
) -> list[Mapping[str, Any]]:

View File

@ -216,7 +216,7 @@ class AsyncCursor(Generic[_DocumentType]):
# it anytime we change __limit.
self._empty = False
self._data: deque = deque()
self._data: deque = deque() # type: ignore[type-arg]
self._address: Optional[_Address] = None
self._retrieved = 0
@ -280,7 +280,7 @@ class AsyncCursor(Generic[_DocumentType]):
"""
return self._clone(True)
def _clone(self, deepcopy: bool = True, base: Optional[AsyncCursor] = None) -> AsyncCursor:
def _clone(self, deepcopy: bool = True, base: Optional[AsyncCursor] = None) -> AsyncCursor: # type: ignore[type-arg]
"""Internal clone helper."""
if not base:
if self._explicit_session:
@ -322,7 +322,7 @@ class AsyncCursor(Generic[_DocumentType]):
base.__dict__.update(data)
return base
def _clone_base(self, session: Optional[AsyncClientSession]) -> AsyncCursor:
def _clone_base(self, session: Optional[AsyncClientSession]) -> AsyncCursor: # type: ignore[type-arg]
"""Creates an empty AsyncCursor object for information to be copied into."""
return self.__class__(self._collection, session=session)
@ -864,7 +864,7 @@ class AsyncCursor(Generic[_DocumentType]):
if self._has_filter:
spec = dict(self._spec)
else:
spec = cast(dict, self._spec)
spec = cast(dict, self._spec) # type: ignore[type-arg]
spec["$where"] = code
self._spec = spec
return self
@ -888,7 +888,7 @@ class AsyncCursor(Generic[_DocumentType]):
self,
response: Union[_OpReply, _OpMsg],
cursor_id: Optional[int],
codec_options: CodecOptions,
codec_options: CodecOptions, # type: ignore[type-arg]
user_fields: Optional[Mapping[str, Any]] = None,
legacy_response: bool = False,
) -> Sequence[_DocumentOut]:
@ -964,29 +964,33 @@ class AsyncCursor(Generic[_DocumentType]):
return self._clone(deepcopy=True)
@overload
def _deepcopy(self, x: Iterable, memo: Optional[dict[int, Union[list, dict]]] = None) -> list:
def _deepcopy(self, x: Iterable, memo: Optional[dict[int, Union[list, dict]]] = None) -> list: # type: ignore[type-arg]
...
@overload
def _deepcopy(
self, x: SupportsItems, memo: Optional[dict[int, Union[list, dict]]] = None
) -> dict:
self,
x: SupportsItems, # type: ignore[type-arg]
memo: Optional[dict[int, Union[list, dict]]] = None, # type: ignore[type-arg]
) -> dict: # type: ignore[type-arg]
...
def _deepcopy(
self, x: Union[Iterable, SupportsItems], memo: Optional[dict[int, Union[list, dict]]] = None
) -> Union[list, dict]:
self,
x: Union[Iterable, SupportsItems], # type: ignore[type-arg]
memo: Optional[dict[int, Union[list, dict]]] = None, # type: ignore[type-arg]
) -> Union[list[Any], dict[str, Any]]:
"""Deepcopy helper for the data dictionary or list.
Regular expressions cannot be deep copied but as they are immutable we
don't have to copy them when cloning.
"""
y: Union[list, dict]
y: Union[list[Any], dict[str, Any]]
iterator: Iterable[tuple[Any, Any]]
if not hasattr(x, "items"):
y, is_list, iterator = [], True, enumerate(x)
else:
y, is_list, iterator = {}, False, cast("SupportsItems", x).items()
y, is_list, iterator = {}, False, cast("SupportsItems", x).items() # type: ignore[type-arg]
if memo is None:
memo = {}
val_id = id(x)
@ -1060,7 +1064,7 @@ class AsyncCursor(Generic[_DocumentType]):
"""Explicitly close / kill this cursor."""
await self._die_lock()
async def distinct(self, key: str) -> list:
async def distinct(self, key: str) -> list[str]:
"""Get a list of distinct values for `key` among all documents
in the result set of this query.
@ -1265,7 +1269,7 @@ class AsyncCursor(Generic[_DocumentType]):
else:
raise StopAsyncIteration
async def _next_batch(self, result: list, total: Optional[int] = None) -> bool:
async def _next_batch(self, result: list, total: Optional[int] = None) -> bool: # type: ignore[type-arg]
"""Get all or some documents from the cursor."""
if not self._exhaust_checked:
self._exhaust_checked = True
@ -1325,7 +1329,7 @@ class AsyncCursor(Generic[_DocumentType]):
return res
class AsyncRawBatchCursor(AsyncCursor, Generic[_DocumentType]):
class AsyncRawBatchCursor(AsyncCursor, Generic[_DocumentType]): # type: ignore[type-arg]
"""An asynchronous cursor / iterator over raw batches of BSON data from a query result."""
_query_class = _RawBatchQuery

View File

@ -771,7 +771,7 @@ class AsyncDatabase(common.BaseObject, Generic[_DocumentType]):
self._name,
command,
read_preference,
codec_options,
codec_options, # type: ignore[arg-type]
check,
allowable_errors,
write_concern=write_concern,
@ -893,7 +893,7 @@ class AsyncDatabase(common.BaseObject, Generic[_DocumentType]):
when decoding the command response.
.. note:: If this client has been configured to use MongoDB Stable
API (see :ref:`versioned-api-ref`), then :meth:`command` will
API (see `versioned API <https://www.mongodb.com/docs/manual/reference/stable-api/#what-is-the-stable-api--and-should-you-use-it->`_), then :meth:`command` will
automatically add API versioning options to the given command.
Explicitly adding API versioning options in the command and
declaring an API version on the client is not supported.
@ -994,7 +994,7 @@ class AsyncDatabase(common.BaseObject, Generic[_DocumentType]):
when decoding the command response.
.. note:: If this client has been configured to use MongoDB Stable
API (see :ref:`versioned-api-ref`), then :meth:`command` will
API (see `versioned API <https://www.mongodb.com/docs/manual/reference/stable-api/#what-is-the-stable-api--and-should-you-use-it->`_), then :meth:`command` will
automatically add API versioning options to the given command.
Explicitly adding API versioning options in the command and
declaring an API version on the client is not supported.

View File

@ -579,7 +579,7 @@ class AsyncClientEncryption(Generic[_DocumentType]):
creating data keys. It does not provide an API to query keys from the
key vault collection, as this can be done directly on the AsyncMongoClient.
See :ref:`explicit-client-side-encryption` for an example.
See `explicit client-side encryption <https://www.mongodb.com/docs/manual/core/csfle/fundamentals/manual-encryption/#csfle-explicit-encryption>`_ for an example.
:param kms_providers: Map of KMS provider options. The `kms_providers`
map values differ by provider:
@ -608,7 +608,7 @@ class AsyncClientEncryption(Generic[_DocumentType]):
KMS providers may be specified with an optional name suffix
separated by a colon, for example "kmip:name" or "aws:name".
Named KMS providers do not support :ref:`CSFLE on-demand credentials`.
Named KMS providers do not support `CSFLE on-demand credentials <https://www.mongodb.com/docs/manual/core/csfle/tutorials/aws/aws-automatic/?interface=driver&language=python#use-automatic-client-side-field-level-encryption-with-aws>`_.
:param key_vault_namespace: The namespace for the key vault collection.
The key vault collection contains all data keys used for encryption
and decryption. Data keys are stored as documents in this MongoDB

View File

@ -14,7 +14,7 @@
"""Tools for connecting to MongoDB.
.. seealso:: :doc:`/examples/high_availability` for examples of connecting
.. seealso:: `Read and Write Settings <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/crud/configure/#read-and-write-settings>`_ for examples of connecting
to replica sets or sets of mongos servers.
To get a :class:`~pymongo.asynchronous.database.AsyncDatabase` instance from a
@ -161,10 +161,10 @@ _ReadCall = Callable[
_IS_SYNC = False
_WriteOp = Union[
InsertOne,
InsertOne, # type: ignore[type-arg]
DeleteOne,
DeleteMany,
ReplaceOne,
ReplaceOne, # type: ignore[type-arg]
UpdateOne,
UpdateMany,
]
@ -176,7 +176,7 @@ class AsyncMongoClient(common.BaseObject, Generic[_DocumentType]):
# Define order to retrieve options from ClientOptions for __repr__.
# No host/port; these are retrieved from TopologySettings.
_constructor_args = ("document_class", "tz_aware", "connect")
_clients: weakref.WeakValueDictionary = weakref.WeakValueDictionary()
_clients: weakref.WeakValueDictionary = weakref.WeakValueDictionary() # type: ignore[type-arg]
def __init__(
self,
@ -263,7 +263,7 @@ class AsyncMongoClient(common.BaseObject, Generic[_DocumentType]):
print("Server not available")
.. warning:: When using PyMongo in a multiprocessing context, please
read :ref:`multiprocessing` first.
read `PyMongo multiprocessing <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/connect/mongoclient/#multiprocessing>`_ first.
.. note:: Many of the following options can be passed using a MongoDB
URI or keyword parameters. If the same option is passed in a URI and
@ -296,7 +296,7 @@ class AsyncMongoClient(common.BaseObject, Generic[_DocumentType]):
return DatetimeMS objects when the underlying datetime is
out-of-range and 'datetime_clamp' to clamp to the minimum and
maximum possible datetimes. Defaults to 'datetime'. See
:ref:`handling-out-of-range-datetimes` for details.
`handling out of range datetimes <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/dates-and-times/#handling-out-of-range-datetimes>`_ for details.
- `directConnection` (optional): if ``True``, forces this client to
connect directly to the specified MongoDB host as a standalone.
If ``false``, the client connects to the entire replica set of
@ -421,7 +421,7 @@ class AsyncMongoClient(common.BaseObject, Generic[_DocumentType]):
package. By default no compression is used. Compression support
must also be enabled on the server. MongoDB 3.6+ supports snappy
and zlib compression. MongoDB 4.2+ adds support for zstd.
See :ref:`network-compression-example` for details.
See `compress network traffic <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/connect/connection-options/network-compression/#compress-network-traffic>`_ for details.
- `zlibCompressionLevel`: (int) The zlib compression level to use
when zlib is used as the wire protocol compressor. Supported values
are -1 through 9. -1 tells the zlib library to use its default
@ -432,7 +432,7 @@ class AsyncMongoClient(common.BaseObject, Generic[_DocumentType]):
values are the strings: "standard", "pythonLegacy", "javaLegacy",
"csharpLegacy", and "unspecified" (the default). New applications
should consider setting this to "standard" for cross language
compatibility. See :ref:`handling-uuid-data-example` for details.
compatibility. See `handling UUID data <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/uuid/#unspecified>`_ for details.
- `unicode_decode_error_handler`: The error handler to apply when
a Unicode-related error occurs during BSON decoding that would
otherwise raise :exc:`UnicodeDecodeError`. Valid options include
@ -496,7 +496,7 @@ class AsyncMongoClient(common.BaseObject, Generic[_DocumentType]):
is set, it must be a positive integer greater than or equal to
90 seconds.
.. seealso:: :doc:`/examples/server_selection`
.. seealso:: `Customize Server Selection <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/connect/connection-options/server-selection/#customize-server-selection>`_
| **Authentication:**
@ -522,7 +522,7 @@ class AsyncMongoClient(common.BaseObject, Generic[_DocumentType]):
To specify the session token for MONGODB-AWS authentication pass
``authMechanismProperties='AWS_SESSION_TOKEN:<session token>'``.
.. seealso:: :doc:`/examples/authentication`
.. seealso:: `Authentication <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/security/authentication/#authentication-mechanisms>`_
| **TLS/SSL configuration:**
@ -585,7 +585,7 @@ class AsyncMongoClient(common.BaseObject, Generic[_DocumentType]):
:class:`~pymongo.encryption_options.AutoEncryptionOpts` which
configures this client to automatically encrypt collection commands
and automatically decrypt results. See
:ref:`automatic-client-side-encryption` for an example.
`client-side field level encryption <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/security/in-use-encryption/#client-side-field-level-encryption>`_ for an example.
If a :class:`AsyncMongoClient` is configured with
``auto_encryption_opts`` and a non-None ``maxPoolSize``, a
separate internal ``AsyncMongoClient`` is created if any of the
@ -601,7 +601,7 @@ class AsyncMongoClient(common.BaseObject, Generic[_DocumentType]):
- `server_api`: A
:class:`~pymongo.server_api.ServerApi` which configures this
client to use Stable API. See :ref:`versioned-api-ref` for
client to use Stable API. See `versioned API <https://www.mongodb.com/docs/manual/reference/stable-api/#what-is-the-stable-api--and-should-you-use-it->`_ for
details.
.. seealso:: The MongoDB documentation on `connections <https://dochub.mongodb.org/core/connections>`_.
@ -712,15 +712,15 @@ class AsyncMongoClient(common.BaseObject, Generic[_DocumentType]):
reconnect to one of them. In PyMongo 3, the client monitors its
network latency to all the mongoses continuously, and distributes
operations evenly among those with the lowest latency. See
:ref:`mongos-load-balancing` for more information.
`load balancing <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/connect/connection-targets/#replica-sets>`_ for more information.
The ``connect`` option is added.
The ``start_request``, ``in_request``, and ``end_request`` methods
are removed, as well as the ``auto_start_request`` option.
The ``copy_database`` method is removed, see the
:doc:`copy_database examples </examples/copydb>` for alternatives.
The ``copy_database`` method is removed, see
`Copy and Clone Databases <https://www.mongodb.com/docs/database-tools/mongodump/mongodump-examples/#copy-and-clone-databases>`_ for alternatives.
The :meth:`AsyncMongoClient.disconnect` method is removed; it was a
synonym for :meth:`~pymongo.asynchronous.AsyncMongoClient.close`.
@ -847,7 +847,7 @@ class AsyncMongoClient(common.BaseObject, Generic[_DocumentType]):
self._default_database_name = dbase
self._lock = _async_create_lock()
self._kill_cursors_queue: list = []
self._kill_cursors_queue: list = [] # type: ignore[type-arg]
self._encrypter: Optional[_Encrypter] = None
@ -1064,7 +1064,7 @@ class AsyncMongoClient(common.BaseObject, Generic[_DocumentType]):
# Reset the session pool to avoid duplicate sessions in the child process.
self._topology._session_pool.reset()
def _duplicate(self, **kwargs: Any) -> AsyncMongoClient:
def _duplicate(self, **kwargs: Any) -> AsyncMongoClient: # type: ignore[type-arg]
args = self._init_kwargs.copy()
args.update(kwargs)
return AsyncMongoClient(**args)
@ -1548,7 +1548,7 @@ class AsyncMongoClient(common.BaseObject, Generic[_DocumentType]):
self, name, codec_options, read_preference, write_concern, read_concern
)
def _database_default_options(self, name: str) -> database.AsyncDatabase:
def _database_default_options(self, name: str) -> database.AsyncDatabase: # type: ignore[type-arg]
"""Get a AsyncDatabase instance with the default settings."""
return self.get_database(
name,
@ -1887,7 +1887,7 @@ class AsyncMongoClient(common.BaseObject, Generic[_DocumentType]):
async def _run_operation(
self,
operation: Union[_Query, _GetMore],
unpack_res: Callable,
unpack_res: Callable, # type: ignore[type-arg]
address: Optional[_Address] = None,
) -> Response:
"""Run a _Query/_GetMore operation and return a Response.
@ -2261,7 +2261,7 @@ class AsyncMongoClient(common.BaseObject, Generic[_DocumentType]):
@contextlib.asynccontextmanager
async def _tmp_session(
self, session: Optional[client_session.AsyncClientSession], close: bool = True
) -> AsyncGenerator[Optional[client_session.AsyncClientSession], None, None]:
) -> AsyncGenerator[Optional[client_session.AsyncClientSession], None]:
"""If provided session is None, lend a temporary session."""
if session is not None:
if not isinstance(session, client_session.AsyncClientSession):
@ -2308,8 +2308,8 @@ class AsyncMongoClient(common.BaseObject, Generic[_DocumentType]):
.. versionchanged:: 3.6
Added ``session`` parameter.
"""
return cast(
dict,
return cast( # type: ignore[redundant-cast]
dict[str, Any],
await self.admin.command(
"buildinfo", read_preference=ReadPreference.PRIMARY, session=session
),
@ -2438,13 +2438,13 @@ class AsyncMongoClient(common.BaseObject, Generic[_DocumentType]):
@_csot.apply
async def bulk_write(
self,
models: Sequence[_WriteOp[_DocumentType]],
models: Sequence[_WriteOp],
session: Optional[AsyncClientSession] = None,
ordered: bool = True,
verbose_results: bool = False,
bypass_document_validation: Optional[bool] = None,
comment: Optional[Any] = None,
let: Optional[Mapping] = None,
let: Optional[Mapping[str, Any]] = None,
write_concern: Optional[WriteConcern] = None,
) -> ClientBulkWriteResult:
"""Send a batch of write operations, potentially across multiple namespaces, to the server.
@ -2519,9 +2519,9 @@ class AsyncMongoClient(common.BaseObject, Generic[_DocumentType]):
:return: An instance of :class:`~pymongo.results.ClientBulkWriteResult`.
.. seealso:: For more info, see :doc:`/examples/client_bulk`.
.. seealso:: For more info, see `Client Bulk Write <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/crud/bulk-write/#client-bulk-write-example>`_.
.. seealso:: :ref:`writes-and-ids`
.. seealso:: `Writes and ids <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/crud/insert/#overview>`_
.. note:: requires MongoDB server version 8.0+.
@ -2631,7 +2631,10 @@ class _MongoClientErrorHandler:
)
def __init__(
self, client: AsyncMongoClient, server: Server, session: Optional[AsyncClientSession]
self,
client: AsyncMongoClient, # type: ignore[type-arg]
server: Server,
session: Optional[AsyncClientSession],
):
if not isinstance(client, AsyncMongoClient):
# This is for compatibility with mocked and subclassed types, such as in Motor.
@ -2704,7 +2707,7 @@ class _ClientConnectionRetryable(Generic[T]):
def __init__(
self,
mongo_client: AsyncMongoClient,
mongo_client: AsyncMongoClient, # type: ignore[type-arg]
func: _WriteCall[T] | _ReadCall[T],
bulk: Optional[Union[_AsyncBulk, _AsyncClientBulk]],
operation: str,

View File

@ -351,7 +351,7 @@ class Monitor(MonitorBase):
)
return sd
async def _check_with_socket(self, conn: AsyncConnection) -> tuple[Hello, float]:
async def _check_with_socket(self, conn: AsyncConnection) -> tuple[Hello, float]: # type: ignore[type-arg]
"""Return (Hello, round_trip_time).
Can raise ConnectionFailure or OperationFailure.
@ -423,12 +423,13 @@ class SrvMonitor(MonitorBase):
if len(seedlist) == 0:
# As per the spec: this should be treated as a failure.
raise Exception
except Exception:
except Exception as exc:
# As per the spec, upon encountering an error:
# - An error must not be raised
# - SRV records must be rescanned every heartbeatFrequencyMS
# - Topology must be left unchanged
self.request_check()
_debug_log(_SDAM_LOGGER, message="SRV monitor check failed", failure=repr(exc))
return None
else:
self._executor.update_interval(max(ttl, common.MIN_SRV_RESCAN_INTERVAL))

View File

@ -66,7 +66,7 @@ async def command(
read_preference: Optional[_ServerMode],
codec_options: CodecOptions[_DocumentType],
session: Optional[AsyncClientSession],
client: Optional[AsyncMongoClient],
client: Optional[AsyncMongoClient[Any]],
check: bool = True,
allowable_errors: Optional[Sequence[Union[str, int]]] = None,
address: Optional[_Address] = None,

View File

@ -201,7 +201,7 @@ class AsyncConnection:
self.conn.get_conn.settimeout(timeout)
def apply_timeout(
self, client: AsyncMongoClient, cmd: Optional[MutableMapping[str, Any]]
self, client: AsyncMongoClient[Any], cmd: Optional[MutableMapping[str, Any]]
) -> Optional[float]:
# CSOT: use remaining timeout when set.
timeout = _csot.remaining()
@ -255,7 +255,7 @@ class AsyncConnection:
else:
return {HelloCompat.LEGACY_CMD: 1, "helloOk": True}
async def hello(self) -> Hello:
async def hello(self) -> Hello[dict[str, Any]]:
return await self._hello(None, None)
async def _hello(
@ -357,7 +357,7 @@ class AsyncConnection:
dbname: str,
spec: MutableMapping[str, Any],
read_preference: _ServerMode = ReadPreference.PRIMARY,
codec_options: CodecOptions = DEFAULT_CODEC_OPTIONS,
codec_options: CodecOptions[Mapping[str, Any]] = DEFAULT_CODEC_OPTIONS, # type: ignore[assignment]
check: bool = True,
allowable_errors: Optional[Sequence[Union[str, int]]] = None,
read_concern: Optional[ReadConcern] = None,
@ -365,7 +365,7 @@ class AsyncConnection:
parse_write_concern_error: bool = False,
collation: Optional[_CollationIn] = None,
session: Optional[AsyncClientSession] = None,
client: Optional[AsyncMongoClient] = None,
client: Optional[AsyncMongoClient[Any]] = None,
retryable_write: bool = False,
publish_events: bool = True,
user_fields: Optional[Mapping[str, Any]] = None,
@ -417,7 +417,7 @@ class AsyncConnection:
spec,
self.is_mongos,
read_preference,
codec_options,
codec_options, # type: ignore[arg-type]
session,
client,
check,
@ -489,7 +489,7 @@ class AsyncConnection:
await self.send_message(msg, max_doc_size)
async def write_command(
self, request_id: int, msg: bytes, codec_options: CodecOptions
self, request_id: int, msg: bytes, codec_options: CodecOptions[Mapping[str, Any]]
) -> dict[str, Any]:
"""Send "insert" etc. command, returning response as a dict.
@ -541,7 +541,7 @@ class AsyncConnection:
)
def validate_session(
self, client: Optional[AsyncMongoClient], session: Optional[AsyncClientSession]
self, client: Optional[AsyncMongoClient[Any]], session: Optional[AsyncClientSession]
) -> None:
"""Validate this session before use with client.
@ -598,7 +598,7 @@ class AsyncConnection:
self,
command: MutableMapping[str, Any],
session: Optional[AsyncClientSession],
client: Optional[AsyncMongoClient],
client: Optional[AsyncMongoClient[Any]],
) -> None:
"""Add $clusterTime."""
if client:
@ -732,7 +732,7 @@ class Pool:
# LIFO pool. Sockets are ordered on idle time. Sockets claimed
# and returned to pool from the left side. Stale sockets removed
# from the right side.
self.conns: collections.deque = collections.deque()
self.conns: collections.deque[AsyncConnection] = collections.deque()
self.active_contexts: set[_CancellationContext] = set()
self.lock = _async_create_lock()
self._max_connecting_cond = _async_create_condition(self.lock)
@ -839,8 +839,8 @@ class Pool:
if service_id is None:
sockets, self.conns = self.conns, collections.deque()
else:
discard: collections.deque = collections.deque()
keep: collections.deque = collections.deque()
discard: collections.deque = collections.deque() # type: ignore[type-arg]
keep: collections.deque = collections.deque() # type: ignore[type-arg]
for conn in self.conns:
if conn.service_id == service_id:
discard.append(conn)
@ -866,7 +866,7 @@ class Pool:
if close:
if not _IS_SYNC:
await asyncio.gather(
*[conn.close_conn(ConnectionClosedReason.POOL_CLOSED) for conn in sockets],
*[conn.close_conn(ConnectionClosedReason.POOL_CLOSED) for conn in sockets], # type: ignore[func-returns-value]
return_exceptions=True,
)
else:
@ -903,7 +903,7 @@ class Pool:
)
if not _IS_SYNC:
await asyncio.gather(
*[conn.close_conn(ConnectionClosedReason.STALE) for conn in sockets],
*[conn.close_conn(ConnectionClosedReason.STALE) for conn in sockets], # type: ignore[func-returns-value]
return_exceptions=True,
)
else:
@ -917,7 +917,7 @@ class Pool:
self.is_writable = is_writable
async with self.lock:
for _socket in self.conns:
_socket.update_is_writable(self.is_writable)
_socket.update_is_writable(self.is_writable) # type: ignore[arg-type]
async def reset(
self, service_id: Optional[ObjectId] = None, interrupt_connections: bool = False
@ -956,7 +956,7 @@ class Pool:
close_conns.append(self.conns.pop())
if not _IS_SYNC:
await asyncio.gather(
*[conn.close_conn(ConnectionClosedReason.IDLE) for conn in close_conns],
*[conn.close_conn(ConnectionClosedReason.IDLE) for conn in close_conns], # type: ignore[func-returns-value]
return_exceptions=True,
)
else:
@ -1477,4 +1477,4 @@ class Pool:
# not safe to acquire a lock in __del__.
if _IS_SYNC:
for conn in self.conns:
conn.close_conn(None)
conn.close_conn(None) # type: ignore[unused-coroutine]

View File

@ -66,7 +66,7 @@ class Server:
monitor: Monitor,
topology_id: Optional[ObjectId] = None,
listeners: Optional[_EventListeners] = None,
events: Optional[ReferenceType[Queue]] = None,
events: Optional[ReferenceType[Queue[Any]]] = None,
) -> None:
"""Represent one MongoDB server."""
self._description = server_description
@ -142,7 +142,7 @@ class Server:
read_preference: _ServerMode,
listeners: Optional[_EventListeners],
unpack_res: Callable[..., list[_DocumentOut]],
client: AsyncMongoClient,
client: AsyncMongoClient[Any],
) -> Response:
"""Run a _Query or _GetMore operation and return a Response object.

View File

@ -84,7 +84,7 @@ _IS_SYNC = False
_pymongo_dir = str(Path(__file__).parent)
def process_events_queue(queue_ref: weakref.ReferenceType[queue.Queue]) -> bool:
def process_events_queue(queue_ref: weakref.ReferenceType[queue.Queue]) -> bool: # type: ignore[type-arg]
q = queue_ref()
if not q:
return False # Cancel PeriodicExecutor.
@ -186,7 +186,7 @@ class Topology:
if self._publish_server or self._publish_tp:
assert self._events is not None
weak: weakref.ReferenceType[queue.Queue]
weak: weakref.ReferenceType[queue.Queue[Any]]
async def target() -> bool:
return process_events_queue(weak)

View File

@ -247,7 +247,7 @@ class ClientOptions:
return self.__connect
@property
def codec_options(self) -> CodecOptions:
def codec_options(self) -> CodecOptions[Any]:
"""A :class:`~bson.codec_options.CodecOptions` instance."""
return self.__codec_options

View File

@ -56,7 +56,7 @@ if TYPE_CHECKING:
from pymongo.typings import _AgnosticClientSession
ORDERED_TYPES: Sequence[Type] = (SON, OrderedDict)
ORDERED_TYPES: Sequence[Type[Any]] = (SON, OrderedDict)
# Defaults until we connect to a server and get updated limits.
MAX_BSON_SIZE = 16 * (1024**2)
@ -166,7 +166,7 @@ def clean_node(node: str) -> tuple[str, int]:
return host.lower(), port
def raise_config_error(key: str, suggestions: Optional[list] = None) -> NoReturn:
def raise_config_error(key: str, suggestions: Optional[list[str]] = None) -> NoReturn:
"""Raise ConfigurationError with the given key name."""
msg = f"Unknown option: {key}."
if suggestions:
@ -411,7 +411,7 @@ def validate_read_preference_tags(name: str, value: Any) -> list[dict[str, str]]
if not isinstance(value, list):
value = [value]
tag_sets: list = []
tag_sets: list[dict[str, Any]] = []
for tag_set in value:
if tag_set == "":
tag_sets.append({})
@ -497,7 +497,7 @@ def validate_auth_mechanism_properties(option: str, value: Any) -> dict[str, Uni
def validate_document_class(
option: str, value: Any
) -> Union[Type[MutableMapping], Type[RawBSONDocument]]:
) -> Union[Type[MutableMapping[str, Any]], Type[RawBSONDocument]]:
"""Validate the document_class option."""
# issubclass can raise TypeError for generic aliases like SON[str, Any].
# In that case we can use the base class for the comparison.
@ -523,14 +523,14 @@ def validate_type_registry(option: Any, value: Any) -> Optional[TypeRegistry]:
return value
def validate_list(option: str, value: Any) -> list:
def validate_list(option: str, value: Any) -> list[Any]:
"""Validates that 'value' is a list."""
if not isinstance(value, list):
raise TypeError(f"{option} must be a list, not {type(value)}")
return value
def validate_list_or_none(option: Any, value: Any) -> Optional[list]:
def validate_list_or_none(option: Any, value: Any) -> Optional[list[Any]]:
"""Validates that 'value' is a list or None."""
if value is None:
return value
@ -597,7 +597,7 @@ def validate_server_api_or_none(option: Any, value: Any) -> Optional[ServerApi]:
return value
def validate_is_callable_or_none(option: Any, value: Any) -> Optional[Callable]:
def validate_is_callable_or_none(option: Any, value: Any) -> Optional[Callable[..., Any]]:
"""Validates that 'value' is a callable."""
if value is None:
return value
@ -829,7 +829,7 @@ def validate_auth_option(option: str, value: Any) -> tuple[str, Any]:
def _get_validator(
key: str, validators: dict[str, Callable[[Any, Any], Any]], normed_key: Optional[str] = None
) -> Callable:
) -> Callable[[Any, Any], Any]:
normed_key = normed_key or key
try:
return validators[normed_key]
@ -917,7 +917,7 @@ class BaseObject:
def __init__(
self,
codec_options: CodecOptions,
codec_options: CodecOptions[Any],
read_preference: _ServerMode,
write_concern: WriteConcern,
read_concern: ReadConcern,
@ -947,7 +947,7 @@ class BaseObject:
self._read_concern = read_concern
@property
def codec_options(self) -> CodecOptions:
def codec_options(self) -> CodecOptions[Any]:
"""Read only access to the :class:`~bson.codec_options.CodecOptions`
of this instance.
"""

View File

@ -16,7 +16,7 @@
PyMongo only attempts to spawn the mongocryptd daemon process when automatic
client-side field level encryption is enabled. See
:ref:`automatic-client-side-encryption` for more info.
`Client-side Field Level Encryption <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/security/in-use-encryption/#client-side-field-level-encryption>`_ for more info.
"""
from __future__ import annotations

View File

@ -37,7 +37,7 @@ from pymongo.errors import ConfigurationError
if TYPE_CHECKING:
from pymongo.pyopenssl_context import SSLContext
from pymongo.typings import _AgnosticMongoClient, _DocumentTypeArg
from pymongo.typings import _AgnosticMongoClient
class AutoEncryptionOpts:
@ -47,7 +47,7 @@ class AutoEncryptionOpts:
self,
kms_providers: Mapping[str, Any],
key_vault_namespace: str,
key_vault_client: Optional[_AgnosticMongoClient[_DocumentTypeArg]] = None,
key_vault_client: Optional[_AgnosticMongoClient] = None,
schema_map: Optional[Mapping[str, Any]] = None,
bypass_auto_encryption: bool = False,
mongocryptd_uri: str = "mongodb://localhost:27020",
@ -75,7 +75,7 @@ class AutoEncryptionOpts:
encryption and explicit decryption is also supported for all users
with the :class:`~pymongo.asynchronous.encryption.AsyncClientEncryption` and :class:`~pymongo.encryption.ClientEncryption` classes.
See :ref:`automatic-client-side-encryption` for an example.
See `client-side field level encryption <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/security/in-use-encryption/#client-side-field-level-encryption>`_ for an example.
:param kms_providers: Map of KMS provider options. The `kms_providers`
map values differ by provider:
@ -104,7 +104,7 @@ class AutoEncryptionOpts:
KMS providers may be specified with an optional name suffix
separated by a colon, for example "kmip:name" or "aws:name".
Named KMS providers do not support :ref:`CSFLE on-demand credentials`.
Named KMS providers do not support `CSFLE on-demand credentials <https://www.mongodb.com/docs/manual/core/csfle/tutorials/aws/aws-automatic/?interface=driver&language=python#use-automatic-client-side-field-level-encryption-with-aws>`_.
Named KMS providers enables more than one of each KMS provider type to be configured.
For example, to configure multiple local KMS providers::

View File

@ -52,7 +52,7 @@ if TYPE_CHECKING:
# From the SDAM spec, the "node is shutting down" codes.
_SHUTDOWN_CODES: frozenset = frozenset(
_SHUTDOWN_CODES: frozenset[int] = frozenset(
[
11600, # InterruptedAtShutdown
91, # ShutdownInProgress
@ -61,7 +61,7 @@ _SHUTDOWN_CODES: frozenset = frozenset(
# From the SDAM spec, the "not primary" error codes are combined with the
# "node is recovering" error codes (of which the "node is shutting down"
# errors are a subset).
_NOT_PRIMARY_CODES: frozenset = (
_NOT_PRIMARY_CODES: frozenset[int] = (
frozenset(
[
10058, # LegacyNotPrimary <=3.2 "not primary" error code
@ -75,7 +75,7 @@ _NOT_PRIMARY_CODES: frozenset = (
| _SHUTDOWN_CODES
)
# From the retryable writes spec.
_RETRYABLE_ERROR_CODES: frozenset = _NOT_PRIMARY_CODES | frozenset(
_RETRYABLE_ERROR_CODES: frozenset[int] = _NOT_PRIMARY_CODES | frozenset(
[
7, # HostNotFound
6, # HostUnreachable
@ -95,7 +95,7 @@ _AUTHENTICATION_FAILURE_CODE: int = 18
# Note - to avoid bugs from forgetting which if these is all lowercase and
# which are camelCase, and at the same time avoid having to add a test for
# every command, use all lowercase here and test against command_name.lower().
_SENSITIVE_COMMANDS: set = {
_SENSITIVE_COMMANDS: set[str] = {
"authenticate",
"saslstart",
"saslcontinue",

View File

@ -333,7 +333,7 @@ def _op_msg_no_header(
command: Mapping[str, Any],
identifier: str,
docs: Optional[list[Mapping[str, Any]]],
opts: CodecOptions,
opts: CodecOptions[Any],
) -> tuple[bytes, int, int]:
"""Get a OP_MSG message.
@ -365,7 +365,7 @@ def _op_msg_compressed(
command: Mapping[str, Any],
identifier: str,
docs: Optional[list[Mapping[str, Any]]],
opts: CodecOptions,
opts: CodecOptions[Any],
ctx: Union[SnappyContext, ZlibContext, ZstdContext],
) -> tuple[int, bytes, int, int]:
"""Internal OP_MSG message helper."""
@ -379,7 +379,7 @@ def _op_msg_uncompressed(
command: Mapping[str, Any],
identifier: str,
docs: Optional[list[Mapping[str, Any]]],
opts: CodecOptions,
opts: CodecOptions[Any],
) -> tuple[int, bytes, int, int]:
"""Internal compressed OP_MSG message helper."""
data, total_size, max_bson_size = _op_msg_no_header(flags, command, identifier, docs, opts)
@ -396,7 +396,7 @@ def _op_msg(
command: MutableMapping[str, Any],
dbname: str,
read_preference: Optional[_ServerMode],
opts: CodecOptions,
opts: CodecOptions[Any],
ctx: Union[SnappyContext, ZlibContext, ZstdContext, None] = None,
) -> tuple[int, bytes, int, int]:
"""Get a OP_MSG message."""
@ -430,7 +430,7 @@ def _query_impl(
num_to_return: int,
query: Mapping[str, Any],
field_selector: Optional[Mapping[str, Any]],
opts: CodecOptions,
opts: CodecOptions[Any],
) -> tuple[bytes, int]:
"""Get an OP_QUERY message."""
encoded = _dict_to_bson(query, False, opts)
@ -461,7 +461,7 @@ def _query_compressed(
num_to_return: int,
query: Mapping[str, Any],
field_selector: Optional[Mapping[str, Any]],
opts: CodecOptions,
opts: CodecOptions[Any],
ctx: Union[SnappyContext, ZlibContext, ZstdContext],
) -> tuple[int, bytes, int]:
"""Internal compressed query message helper."""
@ -479,7 +479,7 @@ def _query_uncompressed(
num_to_return: int,
query: Mapping[str, Any],
field_selector: Optional[Mapping[str, Any]],
opts: CodecOptions,
opts: CodecOptions[Any],
) -> tuple[int, bytes, int]:
"""Internal query message helper."""
op_query, max_bson_size = _query_impl(
@ -500,7 +500,7 @@ def _query(
num_to_return: int,
query: Mapping[str, Any],
field_selector: Optional[Mapping[str, Any]],
opts: CodecOptions,
opts: CodecOptions[Any],
ctx: Union[SnappyContext, ZlibContext, ZstdContext, None] = None,
) -> tuple[int, bytes, int]:
"""Get a **query** message."""
@ -598,7 +598,7 @@ class _BulkWriteContextBase:
listeners: _EventListeners,
session: Optional[_AgnosticClientSession],
op_type: int,
codec: CodecOptions,
codec: CodecOptions[Any],
):
self.db_name = database_name
self.conn = conn
@ -679,7 +679,7 @@ class _BulkWriteContext(_BulkWriteContextBase):
listeners: _EventListeners,
session: Optional[_AgnosticClientSession],
op_type: int,
codec: CodecOptions,
codec: CodecOptions[Any],
):
super().__init__(
database_name,
@ -771,7 +771,7 @@ def _batched_op_msg_impl(
command: Mapping[str, Any],
docs: list[Mapping[str, Any]],
ack: bool,
opts: CodecOptions,
opts: CodecOptions[Any],
ctx: _BulkWriteContext,
buf: _BytesIO,
) -> tuple[list[Mapping[str, Any]], int]:
@ -839,7 +839,7 @@ def _encode_batched_op_msg(
command: Mapping[str, Any],
docs: list[Mapping[str, Any]],
ack: bool,
opts: CodecOptions,
opts: CodecOptions[Any],
ctx: _BulkWriteContext,
) -> tuple[bytes, list[Mapping[str, Any]]]:
"""Encode the next batched insert, update, or delete operation
@ -860,7 +860,7 @@ def _batched_op_msg_compressed(
command: Mapping[str, Any],
docs: list[Mapping[str, Any]],
ack: bool,
opts: CodecOptions,
opts: CodecOptions[Any],
ctx: _BulkWriteContext,
) -> tuple[int, bytes, list[Mapping[str, Any]]]:
"""Create the next batched insert, update, or delete operation
@ -878,7 +878,7 @@ def _batched_op_msg(
command: Mapping[str, Any],
docs: list[Mapping[str, Any]],
ack: bool,
opts: CodecOptions,
opts: CodecOptions[Any],
ctx: _BulkWriteContext,
) -> tuple[int, bytes, list[Mapping[str, Any]]]:
"""OP_MSG implementation entry point."""
@ -910,7 +910,7 @@ def _do_batched_op_msg(
operation: int,
command: MutableMapping[str, Any],
docs: list[Mapping[str, Any]],
opts: CodecOptions,
opts: CodecOptions[Any],
ctx: _BulkWriteContext,
) -> tuple[int, bytes, list[Mapping[str, Any]]]:
"""Create the next batched insert, update, or delete operation
@ -939,7 +939,7 @@ class _ClientBulkWriteContext(_BulkWriteContextBase):
operation_id: int,
listeners: _EventListeners,
session: Optional[_AgnosticClientSession],
codec: CodecOptions,
codec: CodecOptions[Any],
):
super().__init__(
database_name,
@ -1043,7 +1043,7 @@ def _client_batched_op_msg_impl(
operations: list[tuple[str, Mapping[str, Any]]],
namespaces: list[str],
ack: bool,
opts: CodecOptions,
opts: CodecOptions[Any],
ctx: _ClientBulkWriteContext,
buf: _BytesIO,
) -> tuple[list[Mapping[str, Any]], list[Mapping[str, Any]], int]:
@ -1161,7 +1161,7 @@ def _client_encode_batched_op_msg(
operations: list[tuple[str, Mapping[str, Any]]],
namespaces: list[str],
ack: bool,
opts: CodecOptions,
opts: CodecOptions[Any],
ctx: _ClientBulkWriteContext,
) -> tuple[bytes, list[Mapping[str, Any]], list[Mapping[str, Any]]]:
"""Encode the next batched client-level bulkWrite
@ -1180,7 +1180,7 @@ def _client_batched_op_msg_compressed(
operations: list[tuple[str, Mapping[str, Any]]],
namespaces: list[str],
ack: bool,
opts: CodecOptions,
opts: CodecOptions[Any],
ctx: _ClientBulkWriteContext,
) -> tuple[int, bytes, list[Mapping[str, Any]], list[Mapping[str, Any]]]:
"""Create the next batched client-level bulkWrite operation
@ -1200,7 +1200,7 @@ def _client_batched_op_msg(
operations: list[tuple[str, Mapping[str, Any]]],
namespaces: list[str],
ack: bool,
opts: CodecOptions,
opts: CodecOptions[Any],
ctx: _ClientBulkWriteContext,
) -> tuple[int, bytes, list[Mapping[str, Any]], list[Mapping[str, Any]]]:
"""OP_MSG implementation entry point for client-level bulkWrite."""
@ -1229,7 +1229,7 @@ def _client_do_batched_op_msg(
command: MutableMapping[str, Any],
operations: list[tuple[str, Mapping[str, Any]]],
namespaces: list[str],
opts: CodecOptions,
opts: CodecOptions[Any],
ctx: _ClientBulkWriteContext,
) -> tuple[int, bytes, list[Mapping[str, Any]], list[Mapping[str, Any]]]:
"""Create the next batched client-level bulkWrite
@ -1253,7 +1253,7 @@ def _encode_batched_write_command(
operation: int,
command: MutableMapping[str, Any],
docs: list[Mapping[str, Any]],
opts: CodecOptions,
opts: CodecOptions[Any],
ctx: _BulkWriteContext,
) -> tuple[bytes, list[Mapping[str, Any]]]:
"""Encode the next batched insert, update, or delete command."""
@ -1272,7 +1272,7 @@ def _batched_write_command_impl(
operation: int,
command: MutableMapping[str, Any],
docs: list[Mapping[str, Any]],
opts: CodecOptions,
opts: CodecOptions[Any],
ctx: _BulkWriteContext,
buf: _BytesIO,
) -> tuple[list[Mapping[str, Any]], int]:
@ -1383,7 +1383,7 @@ class _OpReply:
errobj = {"ok": 0, "errmsg": msg, "code": 43}
raise CursorNotFound(msg, 43, errobj)
elif self.flags & 2:
error_object: dict = bson.BSON(self.documents).decode()
error_object: dict[str, Any] = bson.BSON(self.documents).decode()
# Fake the ok field if it doesn't exist.
error_object.setdefault("ok", 0)
if error_object["$err"].startswith(HelloCompat.LEGACY_ERROR):
@ -1405,7 +1405,7 @@ class _OpReply:
def unpack_response(
self,
cursor_id: Optional[int] = None,
codec_options: CodecOptions = _UNICODE_REPLACE_CODEC_OPTIONS,
codec_options: CodecOptions[Any] = _UNICODE_REPLACE_CODEC_OPTIONS,
user_fields: Optional[Mapping[str, Any]] = None,
legacy_response: bool = False,
) -> list[dict[str, Any]]:
@ -1431,7 +1431,7 @@ class _OpReply:
return bson.decode_all(self.documents, codec_options)
return bson._decode_all_selective(self.documents, codec_options, user_fields)
def command_response(self, codec_options: CodecOptions) -> dict[str, Any]:
def command_response(self, codec_options: CodecOptions[Any]) -> dict[str, Any]:
"""Unpack a command response."""
docs = self.unpack_response(codec_options=codec_options)
assert self.number_returned == 1
@ -1491,7 +1491,7 @@ class _OpMsg:
def unpack_response(
self,
cursor_id: Optional[int] = None,
codec_options: CodecOptions = _UNICODE_REPLACE_CODEC_OPTIONS,
codec_options: CodecOptions[Any] = _UNICODE_REPLACE_CODEC_OPTIONS,
user_fields: Optional[Mapping[str, Any]] = None,
legacy_response: bool = False,
) -> list[dict[str, Any]]:
@ -1508,7 +1508,7 @@ class _OpMsg:
assert not legacy_response
return bson._decode_all_selective(self.payload_document, codec_options, user_fields)
def command_response(self, codec_options: CodecOptions) -> dict[str, Any]:
def command_response(self, codec_options: CodecOptions[Any]) -> dict[str, Any]:
"""Unpack a command response."""
return self.unpack_response(codec_options=codec_options)[0]
@ -1583,7 +1583,7 @@ class _Query:
ntoskip: int,
spec: Mapping[str, Any],
fields: Optional[Mapping[str, Any]],
codec_options: CodecOptions,
codec_options: CodecOptions[Any],
read_preference: _ServerMode,
limit: int,
batch_size: int,
@ -1757,7 +1757,7 @@ class _GetMore:
coll: str,
ntoreturn: int,
cursor_id: int,
codec_options: CodecOptions,
codec_options: CodecOptions[Any],
read_preference: _ServerMode,
session: Optional[_AgnosticClientSession],
client: _AgnosticMongoClient,
@ -1871,7 +1871,7 @@ class _RawBatchGetMore(_GetMore):
return False
class _CursorAddress(tuple):
class _CursorAddress(tuple[Any, ...]):
"""The server address (host, port) of a cursor, with namespace property."""
__namespace: Any

View File

@ -1347,7 +1347,11 @@ class ServerHeartbeatSucceededEvent(_ServerHeartbeatEvent):
__slots__ = ("__duration", "__reply")
def __init__(
self, duration: float, reply: Hello, connection_id: _Address, awaited: bool = False
self,
duration: float,
reply: Hello[dict[str, Any]],
connection_id: _Address,
awaited: bool = False,
) -> None:
super().__init__(connection_id, awaited)
self.__duration = duration
@ -1359,7 +1363,7 @@ class ServerHeartbeatSucceededEvent(_ServerHeartbeatEvent):
return self.__duration
@property
def reply(self) -> Hello:
def reply(self) -> Hello[dict[str, Any]]:
"""An instance of :class:`~pymongo.hello.Hello`."""
return self.__reply
@ -1647,7 +1651,7 @@ class _EventListeners:
_handle_exception()
def publish_server_heartbeat_succeeded(
self, connection_id: _Address, duration: float, reply: Hello, awaited: bool
self, connection_id: _Address, duration: float, reply: Hello[dict[str, Any]], awaited: bool
) -> None:
"""Publish a ServerHeartbeatSucceededEvent to all server heartbeat
listeners.

View File

@ -96,7 +96,7 @@ if sys.platform != "win32":
view = memoryview(buf)
sent = 0
def _is_ready(fut: Future) -> None:
def _is_ready(fut: Future[Any]) -> None:
if fut.done():
return
fut.set_result(None)
@ -139,7 +139,7 @@ if sys.platform != "win32":
mv = memoryview(bytearray(length))
total_read = 0
def _is_ready(fut: Future) -> None:
def _is_ready(fut: Future[Any]) -> None:
if fut.done():
return
fut.set_result(None)
@ -486,15 +486,15 @@ class PyMongoProtocol(BufferedProtocol):
self._message_size = 0
self._op_code = 0
self._connection_lost = False
self._read_waiter: Optional[Future] = None
self._read_waiter: Optional[Future[Any]] = None
self._timeout = timeout
self._is_compressed = False
self._compressor_id: Optional[int] = None
self._max_message_size = MAX_MESSAGE_SIZE
self._response_to: Optional[int] = None
self._closed = asyncio.get_running_loop().create_future()
self._pending_messages: collections.deque[Future] = collections.deque()
self._done_messages: collections.deque[Future] = collections.deque()
self._pending_messages: collections.deque[Future[Any]] = collections.deque()
self._done_messages: collections.deque[Future[Any]] = collections.deque()
def settimeout(self, timeout: float | None) -> None:
self._timeout = timeout

View File

@ -53,7 +53,7 @@ class AsyncPeriodicExecutor:
self._min_interval = min_interval
self._target = target
self._stopped = False
self._task: Optional[asyncio.Task] = None
self._task: Optional[asyncio.Task[Any]] = None
self._name = name
self._skip_sleep = False

View File

@ -551,7 +551,7 @@ class ReadPreference:
Nearest(tag_sets=[{"node":"analytics"}])
See :doc:`/examples/high_availability` for code examples.
See `Read and Write Settings <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/crud/configure/#read-and-write-settings>`_ for code examples.
A read preference is used in three cases:

View File

@ -69,7 +69,7 @@ class ServerDescription:
def __init__(
self,
address: _Address,
hello: Optional[Hello] = None,
hello: Optional[Hello[dict[str, Any]]] = None,
round_trip_time: Optional[float] = None,
error: Optional[Exception] = None,
min_round_trip_time: float = 0.0,
@ -299,4 +299,4 @@ class ServerDescription:
)
# For unittesting only. Use under no circumstances!
_host_to_round_trip_time: dict = {}
_host_to_round_trip_time: dict = {} # type: ignore[type-arg]

View File

@ -56,17 +56,22 @@ if HAVE_SSL:
if HAVE_PYSSL:
PYSSLError: Any = _pyssl.SSLError
BLOCKING_IO_ERRORS: tuple = _ssl.BLOCKING_IO_ERRORS + _pyssl.BLOCKING_IO_ERRORS
BLOCKING_IO_READ_ERROR: tuple = (_pyssl.BLOCKING_IO_READ_ERROR, _ssl.BLOCKING_IO_READ_ERROR)
BLOCKING_IO_WRITE_ERROR: tuple = (
BLOCKING_IO_ERRORS: tuple = ( # type: ignore[type-arg]
_ssl.BLOCKING_IO_ERRORS + _pyssl.BLOCKING_IO_ERRORS
)
BLOCKING_IO_READ_ERROR: tuple = ( # type: ignore[type-arg]
_pyssl.BLOCKING_IO_READ_ERROR,
_ssl.BLOCKING_IO_READ_ERROR,
)
BLOCKING_IO_WRITE_ERROR: tuple = ( # type: ignore[type-arg]
_pyssl.BLOCKING_IO_WRITE_ERROR,
_ssl.BLOCKING_IO_WRITE_ERROR,
)
else:
PYSSLError = _ssl.SSLError
BLOCKING_IO_ERRORS = _ssl.BLOCKING_IO_ERRORS
BLOCKING_IO_READ_ERROR = (_ssl.BLOCKING_IO_READ_ERROR,)
BLOCKING_IO_WRITE_ERROR = (_ssl.BLOCKING_IO_WRITE_ERROR,)
BLOCKING_IO_ERRORS: tuple = _ssl.BLOCKING_IO_ERRORS # type: ignore[type-arg, no-redef]
BLOCKING_IO_READ_ERROR: tuple = (_ssl.BLOCKING_IO_READ_ERROR,) # type: ignore[type-arg, no-redef]
BLOCKING_IO_WRITE_ERROR: tuple = (_ssl.BLOCKING_IO_WRITE_ERROR,) # type: ignore[type-arg, no-redef]
SSLError = _ssl.SSLError
BLOCKING_IO_LOOKUP_ERROR = BLOCKING_IO_READ_ERROR
@ -131,7 +136,7 @@ else:
pass
IPADDR_SAFE = False
BLOCKING_IO_ERRORS = ()
BLOCKING_IO_ERRORS: tuple = () # type: ignore[type-arg, no-redef]
def _has_sni(is_sync: bool) -> bool: # noqa: ARG001
return False

View File

@ -46,8 +46,8 @@ class _AggregationCommand:
def __init__(
self,
target: Union[Database, Collection],
cursor_class: type[CommandCursor],
target: Union[Database[Any], Collection[Any]],
cursor_class: type[CommandCursor[Any]],
pipeline: _Pipeline,
options: MutableMapping[str, Any],
explicit_session: bool,
@ -111,12 +111,12 @@ class _AggregationCommand:
"""The namespace in which the aggregate command is run."""
raise NotImplementedError
def _cursor_collection(self, cursor_doc: Mapping[str, Any]) -> Collection:
def _cursor_collection(self, cursor_doc: Mapping[str, Any]) -> Collection[Any]:
"""The Collection used for the aggregate command cursor."""
raise NotImplementedError
@property
def _database(self) -> Database:
def _database(self) -> Database[Any]:
"""The database against which the aggregation command is run."""
raise NotImplementedError
@ -205,7 +205,7 @@ class _AggregationCommand:
class _CollectionAggregationCommand(_AggregationCommand):
_target: Collection
_target: Collection[Any]
@property
def _aggregation_target(self) -> str:
@ -215,12 +215,12 @@ class _CollectionAggregationCommand(_AggregationCommand):
def _cursor_namespace(self) -> str:
return self._target.full_name
def _cursor_collection(self, cursor: Mapping[str, Any]) -> Collection:
def _cursor_collection(self, cursor: Mapping[str, Any]) -> Collection[Any]:
"""The Collection used for the aggregate command cursor."""
return self._target
@property
def _database(self) -> Database:
def _database(self) -> Database[Any]:
return self._target.database
@ -234,7 +234,7 @@ class _CollectionRawAggregationCommand(_CollectionAggregationCommand):
class _DatabaseAggregationCommand(_AggregationCommand):
_target: Database
_target: Database[Any]
@property
def _aggregation_target(self) -> int:
@ -245,10 +245,10 @@ class _DatabaseAggregationCommand(_AggregationCommand):
return f"{self._target.name}.$cmd.aggregate"
@property
def _database(self) -> Database:
def _database(self) -> Database[Any]:
return self._target
def _cursor_collection(self, cursor: Mapping[str, Any]) -> Collection:
def _cursor_collection(self, cursor: Mapping[str, Any]) -> Collection[Any]:
"""The Collection used for the aggregate command cursor."""
# Collection level aggregate may not always return the "ns" field
# according to our MockupDB tests. Let's handle that case for db level

View File

@ -257,7 +257,7 @@ class _OIDCAuthenticator:
) -> Mapping[str, Any]:
self.access_token = None
self.refresh_token = None
start_payload: dict = bson.decode(start_resp["payload"])
start_payload: dict[str, Any] = bson.decode(start_resp["payload"])
if "issuer" in start_payload:
self.idp_info = OIDCIdPInfo(**start_payload)
access_token = self._get_access_token()

View File

@ -248,7 +248,7 @@ class _Bulk:
request_id: int,
msg: bytes,
docs: list[Mapping[str, Any]],
client: MongoClient,
client: MongoClient[Any],
) -> dict[str, Any]:
"""A proxy for SocketInfo.write_command that handles event publishing."""
cmd[bwc.field] = docs
@ -334,7 +334,7 @@ class _Bulk:
msg: bytes,
max_doc_size: int,
docs: list[Mapping[str, Any]],
client: MongoClient,
client: MongoClient[Any],
) -> Optional[Mapping[str, Any]]:
"""A proxy for Connection.unack_write that handles event publishing."""
if _COMMAND_LOGGER.isEnabledFor(logging.DEBUG):
@ -419,7 +419,7 @@ class _Bulk:
bwc: Union[_BulkWriteContext, _EncryptedBulkWriteContext],
cmd: dict[str, Any],
ops: list[Mapping[str, Any]],
client: MongoClient,
client: MongoClient[Any],
) -> list[Mapping[str, Any]]:
if self.is_encrypted:
_, batched_cmd, to_send = bwc.batch_command(cmd, ops)
@ -446,7 +446,7 @@ class _Bulk:
bwc: Union[_BulkWriteContext, _EncryptedBulkWriteContext],
cmd: dict[str, Any],
ops: list[Mapping[str, Any]],
client: MongoClient,
client: MongoClient[Any],
) -> tuple[dict[str, Any], list[Mapping[str, Any]]]:
if self.is_encrypted:
_, batched_cmd, to_send = bwc.batch_command(cmd, ops)

View File

@ -164,7 +164,7 @@ class ChangeStream(Generic[_DocumentType]):
raise NotImplementedError
@property
def _client(self) -> MongoClient:
def _client(self) -> MongoClient: # type: ignore[type-arg]
"""The client against which the aggregation commands for
this ChangeStream will be run.
"""
@ -206,7 +206,7 @@ class ChangeStream(Generic[_DocumentType]):
def _aggregation_pipeline(self) -> list[dict[str, Any]]:
"""Return the full aggregation pipeline for this ChangeStream."""
options = self._change_stream_options()
full_pipeline: list = [{"$changeStream": options}]
full_pipeline: list[dict[str, Any]] = [{"$changeStream": options}]
full_pipeline.extend(self._pipeline)
return full_pipeline
@ -237,7 +237,7 @@ class ChangeStream(Generic[_DocumentType]):
def _run_aggregation_cmd(
self, session: Optional[ClientSession], explicit_session: bool
) -> CommandCursor:
) -> CommandCursor: # type: ignore[type-arg]
"""Run the full aggregation pipeline for this ChangeStream and return
the corresponding CommandCursor.
"""
@ -257,7 +257,7 @@ class ChangeStream(Generic[_DocumentType]):
operation=_Op.AGGREGATE,
)
def _create_cursor(self) -> CommandCursor:
def _create_cursor(self) -> CommandCursor: # type: ignore[type-arg]
with self._client._tmp_session(self._session, close=False) as s:
return self._run_aggregation_cmd(session=s, explicit_session=self._session is not None)

View File

@ -88,7 +88,7 @@ class _ClientBulk:
def __init__(
self,
client: MongoClient,
client: MongoClient[Any],
write_concern: WriteConcern,
ordered: bool = True,
bypass_document_validation: Optional[bool] = None,
@ -233,7 +233,7 @@ class _ClientBulk:
msg: Union[bytes, dict[str, Any]],
op_docs: list[Mapping[str, Any]],
ns_docs: list[Mapping[str, Any]],
client: MongoClient,
client: MongoClient[Any],
) -> dict[str, Any]:
"""A proxy for Connection.write_command that handles event publishing."""
cmd["ops"] = op_docs
@ -324,7 +324,7 @@ class _ClientBulk:
msg: bytes,
op_docs: list[Mapping[str, Any]],
ns_docs: list[Mapping[str, Any]],
client: MongoClient,
client: MongoClient[Any],
) -> Optional[Mapping[str, Any]]:
"""A proxy for Connection.unack_write that handles event publishing."""
if _COMMAND_LOGGER.isEnabledFor(logging.DEBUG):

View File

@ -395,7 +395,7 @@ class _TxnState:
class _Transaction:
"""Internal class to hold transaction information in a ClientSession."""
def __init__(self, opts: Optional[TransactionOptions], client: MongoClient):
def __init__(self, opts: Optional[TransactionOptions], client: MongoClient[Any]):
self.opts = opts
self.state = _TxnState.NONE
self.sharded = False
@ -458,7 +458,7 @@ def _max_time_expired_error(exc: PyMongoError) -> bool:
# From the transactions spec, all the retryable writes errors plus
# WriteConcernTimeout.
_UNKNOWN_COMMIT_ERROR_CODES: frozenset = _RETRYABLE_ERROR_CODES | frozenset(
_UNKNOWN_COMMIT_ERROR_CODES: frozenset = _RETRYABLE_ERROR_CODES | frozenset( # type: ignore[type-arg]
[
64, # WriteConcernTimeout
50, # MaxTimeMSExpired
@ -498,13 +498,13 @@ class ClientSession:
def __init__(
self,
client: MongoClient,
client: MongoClient[Any],
server_session: Any,
options: SessionOptions,
implicit: bool,
) -> None:
# A MongoClient, a _ServerSession, a SessionOptions, and a set.
self._client: MongoClient = client
self._client: MongoClient[Any] = client
self._server_session = server_session
self._options = options
self._cluster_time: Optional[Mapping[str, Any]] = None
@ -550,7 +550,7 @@ class ClientSession:
self._end_session(lock=True)
@property
def client(self) -> MongoClient:
def client(self) -> MongoClient[Any]:
"""The :class:`~pymongo.mongo_client.MongoClient` this session was
created from.
"""
@ -748,7 +748,7 @@ class ClientSession:
write_concern: Optional[WriteConcern] = None,
read_preference: Optional[_ServerMode] = None,
max_commit_time_ms: Optional[int] = None,
) -> ContextManager:
) -> ContextManager[Any]:
"""Start a multi-statement transaction.
Takes the same arguments as :class:`TransactionOptions`.
@ -1118,7 +1118,7 @@ class _ServerSession:
self._transaction_id += 1
class _ServerSessionPool(collections.deque):
class _ServerSessionPool(collections.deque): # type: ignore[type-arg]
"""Pool of _ServerSession objects.
This class is thread-safe.

View File

@ -582,7 +582,7 @@ class Collection(common.BaseObject, Generic[_DocumentType]):
conn: Connection,
command: MutableMapping[str, Any],
read_preference: Optional[_ServerMode] = None,
codec_options: Optional[CodecOptions] = None,
codec_options: Optional[CodecOptions[Mapping[str, Any]]] = None,
check: bool = True,
allowable_errors: Optional[Sequence[Union[str, int]]] = None,
read_concern: Optional[ReadConcern] = None,
@ -703,7 +703,7 @@ class Collection(common.BaseObject, Generic[_DocumentType]):
bypass_document_validation: Optional[bool] = None,
session: Optional[ClientSession] = None,
comment: Optional[Any] = None,
let: Optional[Mapping] = None,
let: Optional[Mapping[str, Any]] = None,
) -> BulkWriteResult:
"""Send a batch of write operations to the server.
@ -761,7 +761,7 @@ class Collection(common.BaseObject, Generic[_DocumentType]):
:return: An instance of :class:`~pymongo.results.BulkWriteResult`.
.. seealso:: :ref:`writes-and-ids`
.. seealso:: `Writes and ids <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/crud/insert/#overview>`_
.. note:: `bypass_document_validation` requires server version
**>= 3.2**
@ -866,7 +866,7 @@ class Collection(common.BaseObject, Generic[_DocumentType]):
:return: - An instance of :class:`~pymongo.results.InsertOneResult`.
.. seealso:: :ref:`writes-and-ids`
.. seealso:: `Writes and ids <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/crud/insert/#overview>`_
.. note:: `bypass_document_validation` requires server version
**>= 3.2**
@ -935,7 +935,7 @@ class Collection(common.BaseObject, Generic[_DocumentType]):
:return: An instance of :class:`~pymongo.results.InsertManyResult`.
.. seealso:: :ref:`writes-and-ids`
.. seealso:: `Writes and ids <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/crud/insert/#overview>`_
.. note:: `bypass_document_validation` requires server version
**>= 3.2**
@ -2040,7 +2040,7 @@ class Collection(common.BaseObject, Generic[_DocumentType]):
.. versionchanged:: 4.2
This method now always uses the `count`_ command. Due to an oversight in versions
5.0.0-5.0.8 of MongoDB, the count command was not included in V1 of the
:ref:`versioned-api-ref`. Users of the Stable API with estimated_document_count are
`versioned API <https://www.mongodb.com/docs/manual/reference/stable-api/#what-is-the-stable-api--and-should-you-use-it->`_. Users of the Stable API with estimated_document_count are
recommended to upgrade their server version to 5.0.9+ or set
:attr:`pymongo.server_api.ServerApi.strict` to ``False`` to avoid encountering errors.
@ -2522,7 +2522,7 @@ class Collection(common.BaseObject, Generic[_DocumentType]):
session: Optional[ClientSession] = None,
comment: Optional[Any] = None,
) -> CommandCursor[MutableMapping[str, Any]]:
codec_options: CodecOptions = CodecOptions(SON)
codec_options: CodecOptions[Mapping[str, Any]] = CodecOptions(SON)
coll = cast(
Collection[MutableMapping[str, Any]],
self.with_options(codec_options=codec_options, read_preference=ReadPreference.PRIMARY),
@ -2864,7 +2864,7 @@ class Collection(common.BaseObject, Generic[_DocumentType]):
self,
aggregation_command: Type[_AggregationCommand],
pipeline: _Pipeline,
cursor_class: Type[CommandCursor],
cursor_class: Type[CommandCursor], # type: ignore[type-arg]
session: Optional[ClientSession],
explicit_session: bool,
let: Optional[Mapping[str, Any]] = None,
@ -2909,7 +2909,7 @@ class Collection(common.BaseObject, Generic[_DocumentType]):
.. note:: This method does not support the 'explain' option. Please
use `PyMongoExplain <https://pypi.org/project/pymongoexplain/>`_
instead. An example is included in the :ref:`aggregate-examples`
instead. An example is included in the `aggregation example <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/aggregation/#aggregation-example>`_
documentation.
.. note:: The :attr:`~pymongo.collection.Collection.write_concern` of
@ -2970,7 +2970,7 @@ class Collection(common.BaseObject, Generic[_DocumentType]):
The :meth:`aggregate` method always returns a CommandCursor. The
pipeline argument must be a list.
.. seealso:: :doc:`/examples/aggregation`
.. seealso:: `Aggregation <https://mongodb.com/docs/manual/applications/aggregation/>`_
.. _aggregate command:
https://mongodb.com/docs/manual/reference/command/aggregate
@ -3107,7 +3107,7 @@ class Collection(common.BaseObject, Generic[_DocumentType]):
comment: Optional[Any] = None,
hint: Optional[_IndexKeyHint] = None,
**kwargs: Any,
) -> list:
) -> list[str]:
"""Get a list of distinct values for `key` among all documents
in this collection.
@ -3170,7 +3170,7 @@ class Collection(common.BaseObject, Generic[_DocumentType]):
_server: Server,
conn: Connection,
read_preference: Optional[_ServerMode],
) -> list:
) -> list: # type: ignore[type-arg]
return (
self._command(
conn,
@ -3195,7 +3195,7 @@ class Collection(common.BaseObject, Generic[_DocumentType]):
array_filters: Optional[Sequence[Mapping[str, Any]]] = None,
hint: Optional[_IndexKeyHint] = None,
session: Optional[ClientSession] = None,
let: Optional[Mapping] = None,
let: Optional[Mapping[str, Any]] = None,
**kwargs: Any,
) -> Any:
"""Internal findAndModify helper."""

View File

@ -350,7 +350,7 @@ class CommandCursor(Generic[_DocumentType]):
else:
return None
def _next_batch(self, result: list, total: Optional[int] = None) -> bool:
def _next_batch(self, result: list, total: Optional[int] = None) -> bool: # type: ignore[type-arg]
"""Get all or some available documents from the cursor."""
if not len(self._data) and not self._killed:
self._refresh()
@ -457,7 +457,7 @@ class RawBatchCommandCursor(CommandCursor[_DocumentType]):
self,
response: Union[_OpReply, _OpMsg],
cursor_id: Optional[int],
codec_options: CodecOptions,
codec_options: CodecOptions[dict[str, Any]],
user_fields: Optional[Mapping[str, Any]] = None,
legacy_response: bool = False,
) -> list[Mapping[str, Any]]:

View File

@ -216,7 +216,7 @@ class Cursor(Generic[_DocumentType]):
# it anytime we change __limit.
self._empty = False
self._data: deque = deque()
self._data: deque = deque() # type: ignore[type-arg]
self._address: Optional[_Address] = None
self._retrieved = 0
@ -280,7 +280,7 @@ class Cursor(Generic[_DocumentType]):
"""
return self._clone(True)
def _clone(self, deepcopy: bool = True, base: Optional[Cursor] = None) -> Cursor:
def _clone(self, deepcopy: bool = True, base: Optional[Cursor] = None) -> Cursor: # type: ignore[type-arg]
"""Internal clone helper."""
if not base:
if self._explicit_session:
@ -322,7 +322,7 @@ class Cursor(Generic[_DocumentType]):
base.__dict__.update(data)
return base
def _clone_base(self, session: Optional[ClientSession]) -> Cursor:
def _clone_base(self, session: Optional[ClientSession]) -> Cursor: # type: ignore[type-arg]
"""Creates an empty Cursor object for information to be copied into."""
return self.__class__(self._collection, session=session)
@ -862,7 +862,7 @@ class Cursor(Generic[_DocumentType]):
if self._has_filter:
spec = dict(self._spec)
else:
spec = cast(dict, self._spec)
spec = cast(dict, self._spec) # type: ignore[type-arg]
spec["$where"] = code
self._spec = spec
return self
@ -886,7 +886,7 @@ class Cursor(Generic[_DocumentType]):
self,
response: Union[_OpReply, _OpMsg],
cursor_id: Optional[int],
codec_options: CodecOptions,
codec_options: CodecOptions, # type: ignore[type-arg]
user_fields: Optional[Mapping[str, Any]] = None,
legacy_response: bool = False,
) -> Sequence[_DocumentOut]:
@ -962,29 +962,33 @@ class Cursor(Generic[_DocumentType]):
return self._clone(deepcopy=True)
@overload
def _deepcopy(self, x: Iterable, memo: Optional[dict[int, Union[list, dict]]] = None) -> list:
def _deepcopy(self, x: Iterable, memo: Optional[dict[int, Union[list, dict]]] = None) -> list: # type: ignore[type-arg]
...
@overload
def _deepcopy(
self, x: SupportsItems, memo: Optional[dict[int, Union[list, dict]]] = None
) -> dict:
self,
x: SupportsItems, # type: ignore[type-arg]
memo: Optional[dict[int, Union[list, dict]]] = None, # type: ignore[type-arg]
) -> dict: # type: ignore[type-arg]
...
def _deepcopy(
self, x: Union[Iterable, SupportsItems], memo: Optional[dict[int, Union[list, dict]]] = None
) -> Union[list, dict]:
self,
x: Union[Iterable, SupportsItems], # type: ignore[type-arg]
memo: Optional[dict[int, Union[list, dict]]] = None, # type: ignore[type-arg]
) -> Union[list[Any], dict[str, Any]]:
"""Deepcopy helper for the data dictionary or list.
Regular expressions cannot be deep copied but as they are immutable we
don't have to copy them when cloning.
"""
y: Union[list, dict]
y: Union[list[Any], dict[str, Any]]
iterator: Iterable[tuple[Any, Any]]
if not hasattr(x, "items"):
y, is_list, iterator = [], True, enumerate(x)
else:
y, is_list, iterator = {}, False, cast("SupportsItems", x).items()
y, is_list, iterator = {}, False, cast("SupportsItems", x).items() # type: ignore[type-arg]
if memo is None:
memo = {}
val_id = id(x)
@ -1058,7 +1062,7 @@ class Cursor(Generic[_DocumentType]):
"""Explicitly close / kill this cursor."""
self._die_lock()
def distinct(self, key: str) -> list:
def distinct(self, key: str) -> list[str]:
"""Get a list of distinct values for `key` among all documents
in the result set of this query.
@ -1263,7 +1267,7 @@ class Cursor(Generic[_DocumentType]):
else:
raise StopIteration
def _next_batch(self, result: list, total: Optional[int] = None) -> bool:
def _next_batch(self, result: list, total: Optional[int] = None) -> bool: # type: ignore[type-arg]
"""Get all or some documents from the cursor."""
if not self._exhaust_checked:
self._exhaust_checked = True
@ -1323,7 +1327,7 @@ class Cursor(Generic[_DocumentType]):
return res
class RawBatchCursor(Cursor, Generic[_DocumentType]):
class RawBatchCursor(Cursor, Generic[_DocumentType]): # type: ignore[type-arg]
"""A cursor / iterator over raw batches of BSON data from a query result."""
_query_class = _RawBatchQuery

View File

@ -771,7 +771,7 @@ class Database(common.BaseObject, Generic[_DocumentType]):
self._name,
command,
read_preference,
codec_options,
codec_options, # type: ignore[arg-type]
check,
allowable_errors,
write_concern=write_concern,
@ -893,7 +893,7 @@ class Database(common.BaseObject, Generic[_DocumentType]):
when decoding the command response.
.. note:: If this client has been configured to use MongoDB Stable
API (see :ref:`versioned-api-ref`), then :meth:`command` will
API (see `versioned API <https://www.mongodb.com/docs/manual/reference/stable-api/#what-is-the-stable-api--and-should-you-use-it->`_), then :meth:`command` will
automatically add API versioning options to the given command.
Explicitly adding API versioning options in the command and
declaring an API version on the client is not supported.
@ -992,7 +992,7 @@ class Database(common.BaseObject, Generic[_DocumentType]):
when decoding the command response.
.. note:: If this client has been configured to use MongoDB Stable
API (see :ref:`versioned-api-ref`), then :meth:`command` will
API (see `versioned API <https://www.mongodb.com/docs/manual/reference/stable-api/#what-is-the-stable-api--and-should-you-use-it->`_), then :meth:`command` will
automatically add API versioning options to the given command.
Explicitly adding API versioning options in the command and
declaring an API version on the client is not supported.

View File

@ -576,7 +576,7 @@ class ClientEncryption(Generic[_DocumentType]):
creating data keys. It does not provide an API to query keys from the
key vault collection, as this can be done directly on the MongoClient.
See :ref:`explicit-client-side-encryption` for an example.
See `explicit client-side encryption <https://www.mongodb.com/docs/manual/core/csfle/fundamentals/manual-encryption/#csfle-explicit-encryption>`_ for an example.
:param kms_providers: Map of KMS provider options. The `kms_providers`
map values differ by provider:
@ -605,7 +605,7 @@ class ClientEncryption(Generic[_DocumentType]):
KMS providers may be specified with an optional name suffix
separated by a colon, for example "kmip:name" or "aws:name".
Named KMS providers do not support :ref:`CSFLE on-demand credentials`.
Named KMS providers do not support `CSFLE on-demand credentials <https://www.mongodb.com/docs/manual/core/csfle/tutorials/aws/aws-automatic/?interface=driver&language=python#use-automatic-client-side-field-level-encryption-with-aws>`_.
:param key_vault_namespace: The namespace for the key vault collection.
The key vault collection contains all data keys used for encryption
and decryption. Data keys are stored as documents in this MongoDB

View File

@ -14,7 +14,7 @@
"""Tools for connecting to MongoDB.
.. seealso:: :doc:`/examples/high_availability` for examples of connecting
.. seealso:: `Read and Write Settings <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/crud/configure/#read-and-write-settings>`_ for examples of connecting
to replica sets or sets of mongos servers.
To get a :class:`~pymongo.database.Database` instance from a
@ -158,10 +158,10 @@ _ReadCall = Callable[
_IS_SYNC = True
_WriteOp = Union[
InsertOne,
InsertOne, # type: ignore[type-arg]
DeleteOne,
DeleteMany,
ReplaceOne,
ReplaceOne, # type: ignore[type-arg]
UpdateOne,
UpdateMany,
]
@ -173,7 +173,7 @@ class MongoClient(common.BaseObject, Generic[_DocumentType]):
# Define order to retrieve options from ClientOptions for __repr__.
# No host/port; these are retrieved from TopologySettings.
_constructor_args = ("document_class", "tz_aware", "connect")
_clients: weakref.WeakValueDictionary = weakref.WeakValueDictionary()
_clients: weakref.WeakValueDictionary = weakref.WeakValueDictionary() # type: ignore[type-arg]
def __init__(
self,
@ -260,7 +260,7 @@ class MongoClient(common.BaseObject, Generic[_DocumentType]):
print("Server not available")
.. warning:: When using PyMongo in a multiprocessing context, please
read :ref:`multiprocessing` first.
read `PyMongo multiprocessing <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/connect/mongoclient/#multiprocessing>`_ first.
.. note:: Many of the following options can be passed using a MongoDB
URI or keyword parameters. If the same option is passed in a URI and
@ -296,7 +296,7 @@ class MongoClient(common.BaseObject, Generic[_DocumentType]):
return DatetimeMS objects when the underlying datetime is
out-of-range and 'datetime_clamp' to clamp to the minimum and
maximum possible datetimes. Defaults to 'datetime'. See
:ref:`handling-out-of-range-datetimes` for details.
`handling out of range datetimes <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/dates-and-times/#handling-out-of-range-datetimes>`_ for details.
- `directConnection` (optional): if ``True``, forces this client to
connect directly to the specified MongoDB host as a standalone.
If ``false``, the client connects to the entire replica set of
@ -421,7 +421,7 @@ class MongoClient(common.BaseObject, Generic[_DocumentType]):
package. By default no compression is used. Compression support
must also be enabled on the server. MongoDB 3.6+ supports snappy
and zlib compression. MongoDB 4.2+ adds support for zstd.
See :ref:`network-compression-example` for details.
See `compress network traffic <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/connect/connection-options/network-compression/#compress-network-traffic>`_ for details.
- `zlibCompressionLevel`: (int) The zlib compression level to use
when zlib is used as the wire protocol compressor. Supported values
are -1 through 9. -1 tells the zlib library to use its default
@ -432,7 +432,7 @@ class MongoClient(common.BaseObject, Generic[_DocumentType]):
values are the strings: "standard", "pythonLegacy", "javaLegacy",
"csharpLegacy", and "unspecified" (the default). New applications
should consider setting this to "standard" for cross language
compatibility. See :ref:`handling-uuid-data-example` for details.
compatibility. See `handling UUID data <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/data-formats/uuid/#unspecified>`_ for details.
- `unicode_decode_error_handler`: The error handler to apply when
a Unicode-related error occurs during BSON decoding that would
otherwise raise :exc:`UnicodeDecodeError`. Valid options include
@ -496,7 +496,7 @@ class MongoClient(common.BaseObject, Generic[_DocumentType]):
is set, it must be a positive integer greater than or equal to
90 seconds.
.. seealso:: :doc:`/examples/server_selection`
.. seealso:: `Customize Server Selection <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/connect/connection-options/server-selection/#customize-server-selection>`_
| **Authentication:**
@ -522,7 +522,7 @@ class MongoClient(common.BaseObject, Generic[_DocumentType]):
To specify the session token for MONGODB-AWS authentication pass
``authMechanismProperties='AWS_SESSION_TOKEN:<session token>'``.
.. seealso:: :doc:`/examples/authentication`
.. seealso:: `Authentication <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/security/authentication/#authentication-mechanisms>`_
| **TLS/SSL configuration:**
@ -585,7 +585,7 @@ class MongoClient(common.BaseObject, Generic[_DocumentType]):
:class:`~pymongo.encryption_options.AutoEncryptionOpts` which
configures this client to automatically encrypt collection commands
and automatically decrypt results. See
:ref:`automatic-client-side-encryption` for an example.
`client-side field level encryption <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/security/in-use-encryption/#client-side-field-level-encryption>`_ for an example.
If a :class:`MongoClient` is configured with
``auto_encryption_opts`` and a non-None ``maxPoolSize``, a
separate internal ``MongoClient`` is created if any of the
@ -601,7 +601,7 @@ class MongoClient(common.BaseObject, Generic[_DocumentType]):
- `server_api`: A
:class:`~pymongo.server_api.ServerApi` which configures this
client to use Stable API. See :ref:`versioned-api-ref` for
client to use Stable API. See `versioned API <https://www.mongodb.com/docs/manual/reference/stable-api/#what-is-the-stable-api--and-should-you-use-it->`_ for
details.
.. seealso:: The MongoDB documentation on `connections <https://dochub.mongodb.org/core/connections>`_.
@ -712,15 +712,15 @@ class MongoClient(common.BaseObject, Generic[_DocumentType]):
reconnect to one of them. In PyMongo 3, the client monitors its
network latency to all the mongoses continuously, and distributes
operations evenly among those with the lowest latency. See
:ref:`mongos-load-balancing` for more information.
`load balancing <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/connect/connection-targets/#replica-sets>`_ for more information.
The ``connect`` option is added.
The ``start_request``, ``in_request``, and ``end_request`` methods
are removed, as well as the ``auto_start_request`` option.
The ``copy_database`` method is removed, see the
:doc:`copy_database examples </examples/copydb>` for alternatives.
The ``copy_database`` method is removed, see
`Copy and Clone Databases <https://www.mongodb.com/docs/database-tools/mongodump/mongodump-examples/#copy-and-clone-databases>`_ for alternatives.
The :meth:`MongoClient.disconnect` method is removed; it was a
synonym for :meth:`~pymongo.MongoClient.close`.
@ -847,7 +847,7 @@ class MongoClient(common.BaseObject, Generic[_DocumentType]):
self._default_database_name = dbase
self._lock = _create_lock()
self._kill_cursors_queue: list = []
self._kill_cursors_queue: list = [] # type: ignore[type-arg]
self._encrypter: Optional[_Encrypter] = None
@ -1064,7 +1064,7 @@ class MongoClient(common.BaseObject, Generic[_DocumentType]):
# Reset the session pool to avoid duplicate sessions in the child process.
self._topology._session_pool.reset()
def _duplicate(self, **kwargs: Any) -> MongoClient:
def _duplicate(self, **kwargs: Any) -> MongoClient: # type: ignore[type-arg]
args = self._init_kwargs.copy()
args.update(kwargs)
return MongoClient(**args)
@ -1546,7 +1546,7 @@ class MongoClient(common.BaseObject, Generic[_DocumentType]):
self, name, codec_options, read_preference, write_concern, read_concern
)
def _database_default_options(self, name: str) -> database.Database:
def _database_default_options(self, name: str) -> database.Database: # type: ignore[type-arg]
"""Get a Database instance with the default settings."""
return self.get_database(
name,
@ -1883,7 +1883,7 @@ class MongoClient(common.BaseObject, Generic[_DocumentType]):
def _run_operation(
self,
operation: Union[_Query, _GetMore],
unpack_res: Callable,
unpack_res: Callable, # type: ignore[type-arg]
address: Optional[_Address] = None,
) -> Response:
"""Run a _Query/_GetMore operation and return a Response.
@ -2257,7 +2257,7 @@ class MongoClient(common.BaseObject, Generic[_DocumentType]):
@contextlib.contextmanager
def _tmp_session(
self, session: Optional[client_session.ClientSession], close: bool = True
) -> Generator[Optional[client_session.ClientSession], None, None]:
) -> Generator[Optional[client_session.ClientSession], None]:
"""If provided session is None, lend a temporary session."""
if session is not None:
if not isinstance(session, client_session.ClientSession):
@ -2300,8 +2300,8 @@ class MongoClient(common.BaseObject, Generic[_DocumentType]):
.. versionchanged:: 3.6
Added ``session`` parameter.
"""
return cast(
dict,
return cast( # type: ignore[redundant-cast]
dict[str, Any],
self.admin.command(
"buildinfo", read_preference=ReadPreference.PRIMARY, session=session
),
@ -2428,13 +2428,13 @@ class MongoClient(common.BaseObject, Generic[_DocumentType]):
@_csot.apply
def bulk_write(
self,
models: Sequence[_WriteOp[_DocumentType]],
models: Sequence[_WriteOp],
session: Optional[ClientSession] = None,
ordered: bool = True,
verbose_results: bool = False,
bypass_document_validation: Optional[bool] = None,
comment: Optional[Any] = None,
let: Optional[Mapping] = None,
let: Optional[Mapping[str, Any]] = None,
write_concern: Optional[WriteConcern] = None,
) -> ClientBulkWriteResult:
"""Send a batch of write operations, potentially across multiple namespaces, to the server.
@ -2509,9 +2509,9 @@ class MongoClient(common.BaseObject, Generic[_DocumentType]):
:return: An instance of :class:`~pymongo.results.ClientBulkWriteResult`.
.. seealso:: For more info, see :doc:`/examples/client_bulk`.
.. seealso:: For more info, see `Client Bulk Write <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/crud/bulk-write/#client-bulk-write-example>`_.
.. seealso:: :ref:`writes-and-ids`
.. seealso:: `Writes and ids <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/crud/insert/#overview>`_
.. note:: requires MongoDB server version 8.0+.
@ -2620,7 +2620,12 @@ class _MongoClientErrorHandler:
"handled",
)
def __init__(self, client: MongoClient, server: Server, session: Optional[ClientSession]):
def __init__(
self,
client: MongoClient, # type: ignore[type-arg]
server: Server,
session: Optional[ClientSession],
):
if not isinstance(client, MongoClient):
# This is for compatibility with mocked and subclassed types, such as in Motor.
if not any(cls.__name__ == "MongoClient" for cls in type(client).__mro__):
@ -2692,7 +2697,7 @@ class _ClientConnectionRetryable(Generic[T]):
def __init__(
self,
mongo_client: MongoClient,
mongo_client: MongoClient, # type: ignore[type-arg]
func: _WriteCall[T] | _ReadCall[T],
bulk: Optional[Union[_Bulk, _ClientBulk]],
operation: str,

View File

@ -349,7 +349,7 @@ class Monitor(MonitorBase):
)
return sd
def _check_with_socket(self, conn: Connection) -> tuple[Hello, float]:
def _check_with_socket(self, conn: Connection) -> tuple[Hello, float]: # type: ignore[type-arg]
"""Return (Hello, round_trip_time).
Can raise ConnectionFailure or OperationFailure.
@ -421,12 +421,13 @@ class SrvMonitor(MonitorBase):
if len(seedlist) == 0:
# As per the spec: this should be treated as a failure.
raise Exception
except Exception:
except Exception as exc:
# As per the spec, upon encountering an error:
# - An error must not be raised
# - SRV records must be rescanned every heartbeatFrequencyMS
# - Topology must be left unchanged
self.request_check()
_debug_log(_SDAM_LOGGER, message="SRV monitor check failed", failure=repr(exc))
return None
else:
self._executor.update_interval(max(ttl, common.MIN_SRV_RESCAN_INTERVAL))

View File

@ -66,7 +66,7 @@ def command(
read_preference: Optional[_ServerMode],
codec_options: CodecOptions[_DocumentType],
session: Optional[ClientSession],
client: Optional[MongoClient],
client: Optional[MongoClient[Any]],
check: bool = True,
allowable_errors: Optional[Sequence[Union[str, int]]] = None,
address: Optional[_Address] = None,

View File

@ -201,7 +201,7 @@ class Connection:
self.conn.get_conn.settimeout(timeout)
def apply_timeout(
self, client: MongoClient, cmd: Optional[MutableMapping[str, Any]]
self, client: MongoClient[Any], cmd: Optional[MutableMapping[str, Any]]
) -> Optional[float]:
# CSOT: use remaining timeout when set.
timeout = _csot.remaining()
@ -255,7 +255,7 @@ class Connection:
else:
return {HelloCompat.LEGACY_CMD: 1, "helloOk": True}
def hello(self) -> Hello:
def hello(self) -> Hello[dict[str, Any]]:
return self._hello(None, None)
def _hello(
@ -357,7 +357,7 @@ class Connection:
dbname: str,
spec: MutableMapping[str, Any],
read_preference: _ServerMode = ReadPreference.PRIMARY,
codec_options: CodecOptions = DEFAULT_CODEC_OPTIONS,
codec_options: CodecOptions[Mapping[str, Any]] = DEFAULT_CODEC_OPTIONS, # type: ignore[assignment]
check: bool = True,
allowable_errors: Optional[Sequence[Union[str, int]]] = None,
read_concern: Optional[ReadConcern] = None,
@ -365,7 +365,7 @@ class Connection:
parse_write_concern_error: bool = False,
collation: Optional[_CollationIn] = None,
session: Optional[ClientSession] = None,
client: Optional[MongoClient] = None,
client: Optional[MongoClient[Any]] = None,
retryable_write: bool = False,
publish_events: bool = True,
user_fields: Optional[Mapping[str, Any]] = None,
@ -417,7 +417,7 @@ class Connection:
spec,
self.is_mongos,
read_preference,
codec_options,
codec_options, # type: ignore[arg-type]
session,
client,
check,
@ -489,7 +489,7 @@ class Connection:
self.send_message(msg, max_doc_size)
def write_command(
self, request_id: int, msg: bytes, codec_options: CodecOptions
self, request_id: int, msg: bytes, codec_options: CodecOptions[Mapping[str, Any]]
) -> dict[str, Any]:
"""Send "insert" etc. command, returning response as a dict.
@ -541,7 +541,7 @@ class Connection:
)
def validate_session(
self, client: Optional[MongoClient], session: Optional[ClientSession]
self, client: Optional[MongoClient[Any]], session: Optional[ClientSession]
) -> None:
"""Validate this session before use with client.
@ -596,7 +596,7 @@ class Connection:
self,
command: MutableMapping[str, Any],
session: Optional[ClientSession],
client: Optional[MongoClient],
client: Optional[MongoClient[Any]],
) -> None:
"""Add $clusterTime."""
if client:
@ -730,7 +730,7 @@ class Pool:
# LIFO pool. Sockets are ordered on idle time. Sockets claimed
# and returned to pool from the left side. Stale sockets removed
# from the right side.
self.conns: collections.deque = collections.deque()
self.conns: collections.deque[Connection] = collections.deque()
self.active_contexts: set[_CancellationContext] = set()
self.lock = _create_lock()
self._max_connecting_cond = _create_condition(self.lock)
@ -837,8 +837,8 @@ class Pool:
if service_id is None:
sockets, self.conns = self.conns, collections.deque()
else:
discard: collections.deque = collections.deque()
keep: collections.deque = collections.deque()
discard: collections.deque = collections.deque() # type: ignore[type-arg]
keep: collections.deque = collections.deque() # type: ignore[type-arg]
for conn in self.conns:
if conn.service_id == service_id:
discard.append(conn)
@ -864,7 +864,7 @@ class Pool:
if close:
if not _IS_SYNC:
asyncio.gather(
*[conn.close_conn(ConnectionClosedReason.POOL_CLOSED) for conn in sockets],
*[conn.close_conn(ConnectionClosedReason.POOL_CLOSED) for conn in sockets], # type: ignore[func-returns-value]
return_exceptions=True,
)
else:
@ -901,7 +901,7 @@ class Pool:
)
if not _IS_SYNC:
asyncio.gather(
*[conn.close_conn(ConnectionClosedReason.STALE) for conn in sockets],
*[conn.close_conn(ConnectionClosedReason.STALE) for conn in sockets], # type: ignore[func-returns-value]
return_exceptions=True,
)
else:
@ -915,7 +915,7 @@ class Pool:
self.is_writable = is_writable
with self.lock:
for _socket in self.conns:
_socket.update_is_writable(self.is_writable)
_socket.update_is_writable(self.is_writable) # type: ignore[arg-type]
def reset(
self, service_id: Optional[ObjectId] = None, interrupt_connections: bool = False
@ -952,7 +952,7 @@ class Pool:
close_conns.append(self.conns.pop())
if not _IS_SYNC:
asyncio.gather(
*[conn.close_conn(ConnectionClosedReason.IDLE) for conn in close_conns],
*[conn.close_conn(ConnectionClosedReason.IDLE) for conn in close_conns], # type: ignore[func-returns-value]
return_exceptions=True,
)
else:
@ -1473,4 +1473,4 @@ class Pool:
# not safe to acquire a lock in __del__.
if _IS_SYNC:
for conn in self.conns:
conn.close_conn(None)
conn.close_conn(None) # type: ignore[unused-coroutine]

View File

@ -66,7 +66,7 @@ class Server:
monitor: Monitor,
topology_id: Optional[ObjectId] = None,
listeners: Optional[_EventListeners] = None,
events: Optional[ReferenceType[Queue]] = None,
events: Optional[ReferenceType[Queue[Any]]] = None,
) -> None:
"""Represent one MongoDB server."""
self._description = server_description
@ -142,7 +142,7 @@ class Server:
read_preference: _ServerMode,
listeners: Optional[_EventListeners],
unpack_res: Callable[..., list[_DocumentOut]],
client: MongoClient,
client: MongoClient[Any],
) -> Response:
"""Run a _Query or _GetMore operation and return a Response object.

View File

@ -84,7 +84,7 @@ _IS_SYNC = True
_pymongo_dir = str(Path(__file__).parent)
def process_events_queue(queue_ref: weakref.ReferenceType[queue.Queue]) -> bool:
def process_events_queue(queue_ref: weakref.ReferenceType[queue.Queue]) -> bool: # type: ignore[type-arg]
q = queue_ref()
if not q:
return False # Cancel PeriodicExecutor.
@ -186,7 +186,7 @@ class Topology:
if self._publish_server or self._publish_tp:
assert self._events is not None
weak: weakref.ReferenceType[queue.Queue]
weak: weakref.ReferenceType[queue.Queue[Any]]
def target() -> bool:
return process_events_queue(weak)

View File

@ -569,8 +569,8 @@ def _update_rs_from_primary(
return _check_has_primary(sds), replica_set_name, max_set_version, max_election_id
if server_description.max_wire_version is None or server_description.max_wire_version < 17:
new_election_tuple: tuple = (server_description.set_version, server_description.election_id)
max_election_tuple: tuple = (max_set_version, max_election_id)
new_election_tuple: tuple = (server_description.set_version, server_description.election_id) # type: ignore[type-arg]
max_election_tuple: tuple = (max_set_version, max_election_id) # type: ignore[type-arg]
if None not in new_election_tuple:
if None not in max_election_tuple and new_election_tuple < max_election_tuple:
# Stale primary, set to type Unknown.

View File

@ -51,7 +51,7 @@ ClusterTime = Mapping[str, Any]
_T = TypeVar("_T")
# Type hinting types for compatibility between async and sync classes
_AgnosticMongoClient = Union["AsyncMongoClient", "MongoClient"]
_AgnosticMongoClient = Union["AsyncMongoClient", "MongoClient"] # type: ignore[type-arg]
_AgnosticConnection = Union["AsyncConnection", "Connection"]
_AgnosticClientSession = Union["AsyncClientSession", "ClientSession"]
_AgnosticBulk = Union["_AsyncBulk", "_Bulk"]

View File

@ -149,11 +149,12 @@ markers = [
strict = true
show_error_codes = true
pretty = true
disable_error_code = ["type-arg", "no-any-return"]
disable_error_code = ["no-any-return"]
disallow_any_generics = true
[[tool.mypy.overrides]]
module = ["test.*"]
disable_error_code = ["no-untyped-def", "no-untyped-call"]
disable_error_code = ["type-arg", "no-untyped-def", "no-untyped-call"]
[[tool.mypy.overrides]]
module = ["service_identity.*"]

Some files were not shown because too many files have changed in this diff Show More