PYTHON-961 - Add warning to prevent deadlock during getaddrinfo.

This commit is contained in:
aherlihy 2015-07-21 17:07:55 -04:00
parent 6b644054b7
commit 07ff7ea721
3 changed files with 40 additions and 0 deletions

View File

@ -375,3 +375,20 @@ just that field::
>>> cur = coll.find({}, projection={'dt': False})
.. _multiprocessing:
Using PyMongo with Multiprocessing
----------------------------------
There are a few things to be aware of when using multiprocessing with PyMongo.
On certain platforms (`defined here <https://hg.python.org/cpython/file/d2b8354e87f5/Modules/socketmodule.c#l187>`_)
:class:`~pymongo.mongo_client.MongoClient` MUST be initialized with ``connect=False`` if a :class:`~pymongo.mongo_client.MongoClient` used in a
child process is initialized before forking. If ``connect`` cannot be False,
then :class:`~pymongo.mongo_client.MongoClient` must be initialized AFTER forking.
This is because CPython must acquire a lock before calling
`getaddrinfo() <https://hg.python.org/cpython/file/d2b8354e87f5/Modules/socketmodule.c#l4203>`_.
A deadlock will occur if the :class:`~pymongo.mongo_client.MongoClient`'s parent process forks (on the main
thread) while its monitor thread is in the getaddrinfo() system call.
PyMongo will issue a warning if there is a chance of this deadlock occurring.

View File

@ -98,6 +98,9 @@ class MongoClient(common.BaseObject):
passwords reserved characters like ':', '/', '+' and '@' must be
escaped following RFC 2396.
.. warning:: When using PyMongo in a multiprocessing context, please
read :ref:`multiprocessing` first.
:Parameters:
- `host` (optional): hostname or IP address of the
instance to connect to, or a mongodb URI, or a list of

View File

@ -14,8 +14,10 @@
"""Internal class to monitor a topology of one or more servers."""
import os
import random
import threading
import warnings
from bson.py3compat import itervalues
from pymongo import common
@ -50,13 +52,31 @@ class Topology(object):
self._lock = threading.Lock()
self._condition = self._settings.condition_class(self._lock)
self._servers = {}
self._pid = None
def open(self):
"""Start monitoring, or restart after a fork.
No effect if called multiple times.
.. warning:: To avoid a deadlock during Python's getaddrinfo call,
will generate a warning if open() is called from a different
process than the one that initialized the Topology. To prevent this
from happening, MongoClient must be created after any forking OR
MongoClient must be started with connect=False.
"""
with self._lock:
if self._pid is None:
self._pid = os.getpid()
else:
if os.getpid() != self._pid:
warnings.warn(
"MongoClient opened before fork. Create MongoClient "
"with connect=False, or create client after forking. "
"See PyMongo's documentation for details: http://api."
"mongodb.org/python/current/faq.html#using-pymongo-"
"with-multiprocessing>")
self._ensure_opened()
def select_servers(self,