From 07ff7ea721cda71e6adaa2f5dbc78928f116501b Mon Sep 17 00:00:00 2001 From: aherlihy Date: Tue, 21 Jul 2015 17:07:55 -0400 Subject: [PATCH] PYTHON-961 - Add warning to prevent deadlock during getaddrinfo. --- doc/faq.rst | 17 +++++++++++++++++ pymongo/mongo_client.py | 3 +++ pymongo/topology.py | 20 ++++++++++++++++++++ 3 files changed, 40 insertions(+) diff --git a/doc/faq.rst b/doc/faq.rst index db5494fa7..2f7a90604 100644 --- a/doc/faq.rst +++ b/doc/faq.rst @@ -375,3 +375,20 @@ just that field:: >>> cur = coll.find({}, projection={'dt': False}) +.. _multiprocessing: + +Using PyMongo with Multiprocessing +---------------------------------- +There are a few things to be aware of when using multiprocessing with PyMongo. +On certain platforms (`defined here `_) +:class:`~pymongo.mongo_client.MongoClient` MUST be initialized with ``connect=False`` if a :class:`~pymongo.mongo_client.MongoClient` used in a +child process is initialized before forking. If ``connect`` cannot be False, +then :class:`~pymongo.mongo_client.MongoClient` must be initialized AFTER forking. + +This is because CPython must acquire a lock before calling +`getaddrinfo() `_. +A deadlock will occur if the :class:`~pymongo.mongo_client.MongoClient`'s parent process forks (on the main +thread) while its monitor thread is in the getaddrinfo() system call. + +PyMongo will issue a warning if there is a chance of this deadlock occurring. + diff --git a/pymongo/mongo_client.py b/pymongo/mongo_client.py index ab1878602..2b9180c4c 100644 --- a/pymongo/mongo_client.py +++ b/pymongo/mongo_client.py @@ -98,6 +98,9 @@ class MongoClient(common.BaseObject): passwords reserved characters like ':', '/', '+' and '@' must be escaped following RFC 2396. + .. warning:: When using PyMongo in a multiprocessing context, please + read :ref:`multiprocessing` first. + :Parameters: - `host` (optional): hostname or IP address of the instance to connect to, or a mongodb URI, or a list of diff --git a/pymongo/topology.py b/pymongo/topology.py index de3b34782..72be9f800 100644 --- a/pymongo/topology.py +++ b/pymongo/topology.py @@ -14,8 +14,10 @@ """Internal class to monitor a topology of one or more servers.""" +import os import random import threading +import warnings from bson.py3compat import itervalues from pymongo import common @@ -50,13 +52,31 @@ class Topology(object): self._lock = threading.Lock() self._condition = self._settings.condition_class(self._lock) self._servers = {} + self._pid = None def open(self): """Start monitoring, or restart after a fork. No effect if called multiple times. + + .. warning:: To avoid a deadlock during Python's getaddrinfo call, + will generate a warning if open() is called from a different + process than the one that initialized the Topology. To prevent this + from happening, MongoClient must be created after any forking OR + MongoClient must be started with connect=False. """ with self._lock: + if self._pid is None: + self._pid = os.getpid() + else: + if os.getpid() != self._pid: + warnings.warn( + "MongoClient opened before fork. Create MongoClient " + "with connect=False, or create client after forking. " + "See PyMongo's documentation for details: http://api." + "mongodb.org/python/current/faq.html#using-pymongo-" + "with-multiprocessing>") + self._ensure_opened() def select_servers(self,