499 lines
17 KiB
Python
499 lines
17 KiB
Python
# Copyright 2011-2012 10gen, Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you
|
|
# may not use this file except in compliance with the License. You
|
|
# may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
# implied. See the License for the specific language governing
|
|
# permissions and limitations under the License.
|
|
|
|
import os
|
|
import socket
|
|
import sys
|
|
import thread
|
|
import time
|
|
import threading
|
|
import weakref
|
|
|
|
from pymongo.errors import ConnectionFailure
|
|
|
|
|
|
have_ssl = True
|
|
try:
|
|
import ssl
|
|
except ImportError:
|
|
have_ssl = False
|
|
|
|
|
|
# PyMongo does not use greenlet-aware connection pools by default, but it will
|
|
# attempt to do so if you pass use_greenlets=True to Connection or
|
|
# ReplicaSetConnection
|
|
have_greenlet = True
|
|
try:
|
|
import greenlet
|
|
except ImportError:
|
|
have_greenlet = False
|
|
|
|
|
|
NO_REQUEST = None
|
|
NO_SOCKET_YET = -1
|
|
|
|
|
|
if sys.platform.startswith('java'):
|
|
from select import cpython_compatible_select as select
|
|
else:
|
|
from select import select
|
|
|
|
|
|
def _closed(sock):
|
|
"""Return True if we know socket has been closed, False otherwise.
|
|
"""
|
|
try:
|
|
rd, _, _ = select([sock], [], [], 0)
|
|
# Any exception here is equally bad (select.error, ValueError, etc.).
|
|
except:
|
|
return True
|
|
return len(rd) > 0
|
|
|
|
|
|
class SocketInfo(object):
|
|
"""Store a socket with some metadata
|
|
"""
|
|
def __init__(self, sock, pool_id):
|
|
self.sock = sock
|
|
self.authset = set()
|
|
self.closed = False
|
|
self.last_checkout = time.time()
|
|
|
|
# The pool's pool_id changes with each reset() so we can close sockets
|
|
# created before the last reset.
|
|
self.pool_id = pool_id
|
|
|
|
def close(self):
|
|
self.closed = True
|
|
# Avoid exceptions on interpreter shutdown.
|
|
try:
|
|
self.sock.close()
|
|
except:
|
|
pass
|
|
|
|
def __eq__(self, other):
|
|
# Need to check if other is NO_REQUEST or NO_SOCKET_YET, and then check
|
|
# if its sock is the same as ours
|
|
return hasattr(other, 'sock') and self.sock == other.sock
|
|
|
|
def __hash__(self):
|
|
return hash(self.sock)
|
|
|
|
def __repr__(self):
|
|
return "SocketInfo(%s)%s at %s" % (
|
|
repr(self.sock),
|
|
self.closed and " CLOSED" or "",
|
|
id(self)
|
|
)
|
|
|
|
|
|
class BasePool(object):
|
|
def __init__(self, pair, max_size, net_timeout, conn_timeout, use_ssl):
|
|
"""
|
|
:Parameters:
|
|
- `pair`: a (hostname, port) tuple
|
|
- `max_size`: approximate number of idle connections to keep open
|
|
- `net_timeout`: timeout in seconds for operations on open connection
|
|
- `conn_timeout`: timeout in seconds for establishing connection
|
|
- `use_ssl`: bool, if True use an encrypted connection
|
|
"""
|
|
self.sockets = set()
|
|
self.lock = threading.Lock()
|
|
|
|
# Keep track of resets, so we notice sockets created before the most
|
|
# recent reset and close them.
|
|
self.pool_id = 0
|
|
self.pid = os.getpid()
|
|
self.pair = pair
|
|
self.max_size = max_size
|
|
self.net_timeout = net_timeout
|
|
self.conn_timeout = conn_timeout
|
|
self.use_ssl = use_ssl
|
|
|
|
# Map self._get_thread_ident() -> request socket
|
|
self._tid_to_sock = {}
|
|
|
|
# Weakrefs used by subclasses to watch for dead threads or greenlets.
|
|
# We must keep a reference to the weakref to keep it alive for at least
|
|
# as long as what it references, otherwise its delete-callback won't
|
|
# fire.
|
|
self._refs = {}
|
|
|
|
def reset(self):
|
|
# Ignore this race condition -- if many threads are resetting at once,
|
|
# the pool_id will definitely change, which is all we care about.
|
|
self.pool_id += 1
|
|
self.pid = os.getpid()
|
|
|
|
sockets = None
|
|
try:
|
|
# Swapping variables is not atomic. We need to ensure no other
|
|
# thread is modifying self.sockets, or replacing it, in this
|
|
# critical section.
|
|
self.lock.acquire()
|
|
sockets, self.sockets = self.sockets, set()
|
|
finally:
|
|
self.lock.release()
|
|
|
|
for sock_info in sockets: sock_info.close()
|
|
|
|
def create_connection(self, pair):
|
|
"""Connect to *pair* and return the socket object.
|
|
|
|
This is a modified version of create_connection from
|
|
CPython >=2.6.
|
|
"""
|
|
host, port = pair or self.pair
|
|
|
|
# Don't try IPv6 if we don't support it. Also skip it if host
|
|
# is 'localhost' (::1 is fine). Avoids slow connect issues
|
|
# like PYTHON-356.
|
|
family = socket.AF_INET
|
|
if socket.has_ipv6 and host != 'localhost':
|
|
family = socket.AF_UNSPEC
|
|
|
|
err = None
|
|
for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
|
|
af, socktype, proto, dummy, sa = res
|
|
sock = None
|
|
try:
|
|
sock = socket.socket(af, socktype, proto)
|
|
sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
|
|
sock.settimeout(self.conn_timeout or 20.0)
|
|
sock.connect(sa)
|
|
return sock
|
|
except socket.error, e:
|
|
err = e
|
|
if sock is not None:
|
|
sock.close()
|
|
|
|
if err is not None:
|
|
raise err
|
|
else:
|
|
# This likely means we tried to connect to an IPv6 only
|
|
# host with an OS/kernel or Python interpeter that doesn't
|
|
# support IPv6. The test case is Jython2.5.1 which doesn't
|
|
# support IPv6 at all.
|
|
raise socket.error('getaddrinfo failed')
|
|
|
|
def connect(self, pair):
|
|
"""Connect to Mongo and return a new (connected) socket. Note that the
|
|
pool does not keep a reference to the socket -- you must call
|
|
return_socket() when you're done with it.
|
|
"""
|
|
sock = self.create_connection(pair)
|
|
|
|
if self.use_ssl:
|
|
try:
|
|
sock = ssl.wrap_socket(sock)
|
|
except ssl.SSLError:
|
|
sock.close()
|
|
raise ConnectionFailure("SSL handshake failed. MongoDB may "
|
|
"not be configured with SSL support.")
|
|
|
|
sock.settimeout(self.net_timeout)
|
|
return SocketInfo(sock, self.pool_id)
|
|
|
|
def get_socket(self, pair=None):
|
|
"""Get a socket from the pool.
|
|
|
|
Returns a :class:`SocketInfo` object wrapping a connected
|
|
:class:`socket.socket`, and a bool saying whether the socket was from
|
|
the pool or freshly created.
|
|
|
|
:Parameters:
|
|
- `pair`: optional (hostname, port) tuple
|
|
"""
|
|
# We use the pid here to avoid issues with fork / multiprocessing.
|
|
# See test.test_connection:TestConnection.test_fork for an example of
|
|
# what could go wrong otherwise
|
|
if self.pid != os.getpid():
|
|
self.reset()
|
|
|
|
# Have we opened a socket for this request?
|
|
req_state = self._get_request_state()
|
|
if req_state not in (NO_SOCKET_YET, NO_REQUEST):
|
|
# There's a socket for this request, check it and return it
|
|
checked_sock = self._check(req_state, pair)
|
|
if checked_sock != req_state:
|
|
self._set_request_state(checked_sock)
|
|
|
|
checked_sock.last_checkout = time.time()
|
|
return checked_sock
|
|
|
|
# We're not in a request, just get any free socket or create one
|
|
sock_info, from_pool = None, None
|
|
try:
|
|
try:
|
|
# set.pop() isn't atomic in Jython less than 2.7, see
|
|
# http://bugs.jython.org/issue1854
|
|
self.lock.acquire()
|
|
sock_info, from_pool = self.sockets.pop(), True
|
|
finally:
|
|
self.lock.release()
|
|
except KeyError:
|
|
sock_info, from_pool = self.connect(pair), False
|
|
|
|
if from_pool:
|
|
sock_info = self._check(sock_info, pair)
|
|
|
|
if req_state == NO_SOCKET_YET:
|
|
# start_request has been called but we haven't assigned a socket to
|
|
# the request yet. Let's use this socket for this request until
|
|
# end_request.
|
|
self._set_request_state(sock_info)
|
|
|
|
sock_info.last_checkout = time.time()
|
|
return sock_info
|
|
|
|
def start_request(self):
|
|
if self._get_request_state() == NO_REQUEST:
|
|
# Add a placeholder value so we know we're in a request, but we
|
|
# have no socket assigned to the request yet.
|
|
self._set_request_state(NO_SOCKET_YET)
|
|
|
|
def in_request(self):
|
|
return self._get_request_state() != NO_REQUEST
|
|
|
|
def end_request(self):
|
|
sock_info = self._get_request_state()
|
|
self._set_request_state(NO_REQUEST)
|
|
if sock_info not in (NO_REQUEST, NO_SOCKET_YET):
|
|
self._return_socket(sock_info)
|
|
|
|
def discard_socket(self, sock_info):
|
|
"""Close and discard the active socket.
|
|
"""
|
|
if sock_info not in (NO_REQUEST, NO_SOCKET_YET):
|
|
sock_info.close()
|
|
|
|
if sock_info == self._get_request_state():
|
|
# Discarding request socket; prepare to use a new request
|
|
# socket on next get_socket().
|
|
self._set_request_state(NO_SOCKET_YET)
|
|
|
|
def maybe_return_socket(self, sock_info):
|
|
"""Return the socket to the pool unless it's the request socket.
|
|
"""
|
|
if self.pid != os.getpid():
|
|
self.reset()
|
|
elif sock_info not in (NO_REQUEST, NO_SOCKET_YET):
|
|
if sock_info.closed:
|
|
return
|
|
|
|
if sock_info != self._get_request_state():
|
|
self._return_socket(sock_info)
|
|
|
|
def _return_socket(self, sock_info):
|
|
"""Return socket to the pool. If pool is full the socket is discarded.
|
|
"""
|
|
try:
|
|
self.lock.acquire()
|
|
if len(self.sockets) < self.max_size:
|
|
self.sockets.add(sock_info)
|
|
else:
|
|
sock_info.close()
|
|
finally:
|
|
self.lock.release()
|
|
|
|
def _check(self, sock_info, pair):
|
|
"""This side-effecty function checks if this pool has been reset since
|
|
the last time this socket was used, or if the socket has been closed by
|
|
some external network error, and if so, attempts to create a new socket.
|
|
If this connection attempt fails we reset the pool and reraise the
|
|
error.
|
|
|
|
Checking sockets lets us avoid seeing *some*
|
|
:class:`~pymongo.errors.AutoReconnect` exceptions on server
|
|
hiccups, etc. We only do this if it's been > 1 second since
|
|
the last socket checkout, to keep performance reasonable - we
|
|
can't avoid AutoReconnects completely anyway.
|
|
"""
|
|
error = False
|
|
|
|
if sock_info.closed:
|
|
error = True
|
|
|
|
elif self.pool_id != sock_info.pool_id:
|
|
sock_info.close()
|
|
error = True
|
|
|
|
elif time.time() - sock_info.last_checkout > 1:
|
|
if _closed(sock_info.sock):
|
|
sock_info.close()
|
|
error = True
|
|
|
|
if not error:
|
|
return sock_info
|
|
else:
|
|
try:
|
|
return self.connect(pair)
|
|
except socket.error:
|
|
self.reset()
|
|
raise
|
|
|
|
def _set_request_state(self, sock_info):
|
|
tid = self._get_thread_ident()
|
|
|
|
if sock_info == NO_REQUEST:
|
|
# Ending a request
|
|
self._refs.pop(tid, None)
|
|
self._tid_to_sock.pop(tid, None)
|
|
else:
|
|
self._tid_to_sock[tid] = sock_info
|
|
|
|
if tid not in self._refs:
|
|
# Closure over tid and poolref. Don't refer directly to self,
|
|
# otherwise there's a cycle.
|
|
|
|
# Do not access threadlocals in this function, or any
|
|
# function it calls! In the case of the Pool subclass and
|
|
# mod_wsgi 2.x, on_thread_died() is triggered when mod_wsgi
|
|
# calls PyThreadState_Clear(), which deferences the
|
|
# ThreadVigil and triggers the weakref callback. Accessing
|
|
# thread locals in this function, while PyThreadState_Clear()
|
|
# is in progress can cause leaks, see PYTHON-353.
|
|
poolref = weakref.ref(self)
|
|
def on_thread_died(ref):
|
|
try:
|
|
pool = poolref()
|
|
if pool:
|
|
# End the request
|
|
pool._refs.pop(tid, None)
|
|
request_sock = pool._tid_to_sock.pop(tid, None)
|
|
|
|
# Was thread ever assigned a socket before it died?
|
|
if request_sock not in (NO_REQUEST, NO_SOCKET_YET):
|
|
pool._return_socket(request_sock)
|
|
except:
|
|
# Random exceptions on interpreter shutdown.
|
|
pass
|
|
|
|
self._watch_current_thread(on_thread_died)
|
|
|
|
def _get_request_state(self):
|
|
tid = self._get_thread_ident()
|
|
return self._tid_to_sock.get(tid, NO_REQUEST)
|
|
|
|
# Overridable methods for pools.
|
|
def _get_thread_ident(self):
|
|
raise NotImplementedError
|
|
|
|
def _watch_current_thread(self, callback):
|
|
raise NotImplementedError
|
|
|
|
def __del__(self):
|
|
# Avoid ResourceWarnings in Python 3
|
|
for sock_info in self.sockets:
|
|
sock_info.close()
|
|
|
|
for request_sock in self._tid_to_sock.values():
|
|
if request_sock not in (NO_REQUEST, NO_SOCKET_YET):
|
|
request_sock.close()
|
|
|
|
|
|
class Pool(BasePool):
|
|
"""A simple connection pool.
|
|
|
|
Calling start_request() acquires a thread-local socket, which is returned
|
|
to the pool when the thread calls end_request() or dies.
|
|
"""
|
|
def __init__(self, *args, **kwargs):
|
|
super(Pool, self).__init__(*args, **kwargs)
|
|
self._local = threading.local()
|
|
|
|
# Overrides
|
|
def _get_request_state(self):
|
|
# In Python <= 2.6, a dead thread's locals aren't cleaned up until the
|
|
# next access. That can lead to a nasty race where a new thread with
|
|
# the same ident as a previous one does _get_request_state() and thinks
|
|
# it's still in the previous thread's request. Only when some thread
|
|
# next accesses self._local.vigil does the dead thread's vigil get
|
|
# destroyed, triggered on_thread_died and returning the request socket
|
|
# to self.sockets. At that point a different thread can acquire that
|
|
# socket, and with two threads using the same socket they'll read
|
|
# each other's data. A symptom is an AssertionError in
|
|
# Connection.__receive_message_on_socket().
|
|
|
|
# Accessing the thread local here guarantees that a previous thread's
|
|
# locals are cleaned up before we check request state, and so even if
|
|
# this thread has the same ident as a previous one, we don't think we're
|
|
# in the same request.
|
|
getattr(self._local, 'vigil', None)
|
|
return super(Pool, self)._get_request_state()
|
|
|
|
def _get_thread_ident(self):
|
|
return thread.get_ident()
|
|
|
|
# After a thread calls start_request() and we assign it a socket, we must
|
|
# watch the thread to know if it dies without calling end_request so we can
|
|
# return its socket to the idle pool, self.sockets. We watch for
|
|
# thread-death using a weakref callback to a thread local. The weakref is
|
|
# permitted on subclasses of object but not object() itself, so we make
|
|
# this class.
|
|
class ThreadVigil(object):
|
|
pass
|
|
|
|
def _watch_current_thread(self, callback):
|
|
tid = self._get_thread_ident()
|
|
self._local.vigil = vigil = Pool.ThreadVigil()
|
|
self._refs[tid] = weakref.ref(vigil, callback)
|
|
|
|
|
|
class GreenletPool(BasePool):
|
|
"""A simple connection pool.
|
|
|
|
Calling start_request() acquires a greenlet-local socket, which is returned
|
|
to the pool when the greenlet calls end_request() or dies.
|
|
"""
|
|
# Overrides
|
|
def _get_thread_ident(self):
|
|
return id(greenlet.getcurrent())
|
|
|
|
def _watch_current_thread(self, callback):
|
|
current = greenlet.getcurrent()
|
|
tid = self._get_thread_ident()
|
|
|
|
if hasattr(current, 'link'):
|
|
# This is a Gevent Greenlet (capital G), which inherits from
|
|
# greenlet and provides a 'link' method to detect when the
|
|
# Greenlet exits.
|
|
current.link(callback)
|
|
self._refs[tid] = None
|
|
else:
|
|
# This is a non-Gevent greenlet (small g), or it's the main
|
|
# greenlet.
|
|
self._refs[tid] = weakref.ref(current, callback)
|
|
|
|
|
|
class Request(object):
|
|
"""
|
|
A context manager returned by Connection.start_request(), so you can do
|
|
`with connection.start_request(): do_something()` in Python 2.5+.
|
|
"""
|
|
def __init__(self, connection):
|
|
self.connection = connection
|
|
|
|
def end(self):
|
|
self.connection.end_request()
|
|
|
|
def __enter__(self):
|
|
return self
|
|
|
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
self.end()
|
|
# Returning False means, "Don't suppress exceptions if any were
|
|
# thrown within the block"
|
|
return False
|