mongo-python-driver/test/asynchronous/test_retryable_reads.py
Noah Stapp 912ef337f9
PYTHON-5798 - Overload retargeting prose tests do not ensure that sec… (#2760)
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2026-04-16 13:32:50 -04:00

526 lines
21 KiB
Python

# Copyright 2019-present MongoDB, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Test retryable reads spec."""
from __future__ import annotations
import os
import pprint
import sys
import threading
from test.asynchronous.utils import async_ensure_all_connected, async_set_fail_point
from unittest import mock
from pymongo import MongoClient
from pymongo.common import MAX_ADAPTIVE_RETRIES
from pymongo.errors import OperationFailure, PyMongoError
sys.path[0:0] = [""]
from test.asynchronous import (
AsyncIntegrationTest,
AsyncPyMongoTestCase,
async_client_context,
client_knobs,
unittest,
)
from test.utils_shared import (
CMAPListener,
OvertCommandListener,
)
from pymongo.monitoring import (
CommandFailedEvent,
ConnectionCheckedOutEvent,
ConnectionCheckOutFailedEvent,
ConnectionCheckOutFailedReason,
PoolClearedEvent,
)
_IS_SYNC = False
class TestClientOptions(AsyncPyMongoTestCase):
async def test_default(self):
client = self.simple_client(connect=False)
self.assertEqual(client.options.retry_reads, True)
async def test_kwargs(self):
client = self.simple_client(retryReads=True, connect=False)
self.assertEqual(client.options.retry_reads, True)
client = self.simple_client(retryReads=False, connect=False)
self.assertEqual(client.options.retry_reads, False)
async def test_uri(self):
client = self.simple_client("mongodb://h/?retryReads=true", connect=False)
self.assertEqual(client.options.retry_reads, True)
client = self.simple_client("mongodb://h/?retryReads=false", connect=False)
self.assertEqual(client.options.retry_reads, False)
class FindThread(threading.Thread):
def __init__(self, collection):
super().__init__()
self.daemon = True
self.collection = collection
self.passed = False
async def run(self):
await self.collection.find_one({})
self.passed = True
class TestPoolPausedError(AsyncIntegrationTest):
# Pools don't get paused in load balanced mode.
RUN_ON_LOAD_BALANCER = False
@async_client_context.require_sync
@async_client_context.require_failCommand_blockConnection
@client_knobs(heartbeat_frequency=0.05, min_heartbeat_interval=0.05)
async def test_pool_paused_error_is_retryable(self):
if "PyPy" in sys.version:
# Tracked in PYTHON-3519
self.skipTest("Test is flaky on PyPy")
cmap_listener = CMAPListener()
cmd_listener = OvertCommandListener()
client = await self.async_rs_or_single_client(
maxPoolSize=1, event_listeners=[cmap_listener, cmd_listener]
)
for _ in range(10):
cmap_listener.reset()
cmd_listener.reset()
threads = [FindThread(client.pymongo_test.test) for _ in range(2)]
fail_command = {
"mode": {"times": 1},
"data": {
"failCommands": ["find"],
"blockConnection": True,
"blockTimeMS": 1000,
"errorCode": 91,
},
}
async with self.fail_point(fail_command):
for thread in threads:
thread.start()
for thread in threads:
thread.join()
for thread in threads:
self.assertTrue(thread.passed)
# It's possible that SDAM can rediscover the server and mark the
# pool ready before the thread in the wait queue has a chance
# to run. Repeat the test until the thread actually encounters
# a PoolClearedError.
if cmap_listener.event_count(ConnectionCheckOutFailedEvent):
break
# Via CMAP monitoring, assert that the first check out succeeds.
cmap_events = cmap_listener.events_by_type(
(ConnectionCheckedOutEvent, ConnectionCheckOutFailedEvent, PoolClearedEvent)
)
msg = pprint.pformat(cmap_listener.events)
self.assertIsInstance(cmap_events[0], ConnectionCheckedOutEvent, msg)
self.assertIsInstance(cmap_events[1], PoolClearedEvent, msg)
self.assertIsInstance(cmap_events[2], ConnectionCheckOutFailedEvent, msg)
self.assertEqual(cmap_events[2].reason, ConnectionCheckOutFailedReason.CONN_ERROR, msg)
self.assertIsInstance(cmap_events[3], ConnectionCheckedOutEvent, msg)
# Connection check out failures are not reflected in command
# monitoring because we only publish command events _after_ checking
# out a connection.
started = cmd_listener.started_events
msg = pprint.pformat(cmd_listener.results)
self.assertEqual(3, len(started), msg)
succeeded = cmd_listener.succeeded_events
self.assertEqual(2, len(succeeded), msg)
failed = cmd_listener.failed_events
self.assertEqual(1, len(failed), msg)
class TestRetryableReads(AsyncIntegrationTest):
async def asyncSetUp(self) -> None:
await super().asyncSetUp()
self.setup_client = MongoClient(**async_client_context.client_options)
self.addCleanup(self.setup_client.close)
# TODO: After PYTHON-4595 we can use async event handlers and remove this workaround.
def configure_fail_point_sync(self, command_args, off=False) -> None:
cmd = {"configureFailPoint": "failCommand", **command_args}
if off:
cmd["mode"] = "off"
cmd.pop("data", None)
self.setup_client.admin.command(cmd)
@async_client_context.require_multiple_mongoses
@async_client_context.require_failCommand_fail_point
async def test_retryable_reads_are_retried_on_a_different_mongos_when_one_is_available(self):
fail_command = {
"configureFailPoint": "failCommand",
"mode": {"times": 1},
"data": {"failCommands": ["find"], "errorCode": 6},
}
mongos_clients = []
for mongos in async_client_context.mongos_seeds().split(","):
client = await self.async_rs_or_single_client(mongos)
await async_set_fail_point(client, fail_command)
mongos_clients.append(client)
listener = OvertCommandListener()
client = await self.async_rs_or_single_client(
async_client_context.mongos_seeds(),
event_listeners=[listener],
retryReads=True,
)
with self.assertRaises(OperationFailure):
await client.t.t.find_one({})
# Disable failpoints on each mongos
for client in mongos_clients:
fail_command["mode"] = "off"
await async_set_fail_point(client, fail_command)
self.assertEqual(len(listener.failed_events), 2)
self.assertEqual(len(listener.succeeded_events), 0)
# Assert that both events occurred on different mongos.
assert listener.failed_events[0].connection_id != listener.failed_events[1].connection_id
@async_client_context.require_multiple_mongoses
@async_client_context.require_failCommand_fail_point
async def test_retryable_reads_are_retried_on_the_same_mongos_when_no_others_are_available(
self
):
fail_command = {
"configureFailPoint": "failCommand",
"mode": {"times": 1},
"data": {"failCommands": ["find"], "errorCode": 6},
}
host = async_client_context.mongos_seeds().split(",")[0]
mongos_client = await self.async_rs_or_single_client(host)
await async_set_fail_point(mongos_client, fail_command)
listener = OvertCommandListener()
client = await self.async_rs_or_single_client(
host,
directConnection=False,
event_listeners=[listener],
retryReads=True,
)
await client.t.t.find_one({})
# Disable failpoint.
fail_command["mode"] = "off"
await async_set_fail_point(mongos_client, fail_command)
# Assert that exactly one failed command event and one succeeded command event occurred.
self.assertEqual(len(listener.failed_events), 1)
self.assertEqual(len(listener.succeeded_events), 1)
# Assert that both events occurred on the same mongos.
assert listener.succeeded_events[0].connection_id == listener.failed_events[0].connection_id
@async_client_context.require_failCommand_fail_point
async def test_retryable_reads_are_retried_on_the_same_implicit_session(self):
listener = OvertCommandListener()
client = await self.async_rs_or_single_client(
directConnection=False,
event_listeners=[listener],
retryReads=True,
)
await client.t.t.insert_one({"x": 1})
commands = [
("aggregate", lambda: client.t.t.count_documents({})),
("aggregate", lambda: client.t.t.aggregate([{"$match": {}}])),
("count", lambda: client.t.t.estimated_document_count()),
("distinct", lambda: client.t.t.distinct("x")),
("find", lambda: client.t.t.find_one({})),
("listDatabases", lambda: client.list_databases()),
("listCollections", lambda: client.t.list_collections()),
("listIndexes", lambda: client.t.t.list_indexes()),
]
for command_name, operation in commands:
listener.reset()
fail_command = {
"configureFailPoint": "failCommand",
"mode": {"times": 1},
"data": {"failCommands": [command_name], "errorCode": 6},
}
async with self.fail_point(fail_command):
await operation()
# Assert that both events occurred on the same session.
command_docs = [
event.command
for event in listener.started_events
if event.command_name == command_name
]
self.assertEqual(len(command_docs), 2)
self.assertEqual(command_docs[0]["lsid"], command_docs[1]["lsid"])
self.assertIsNot(command_docs[0], command_docs[1])
@async_client_context.require_replica_set
@async_client_context.require_secondaries_count(1)
@async_client_context.require_failCommand_fail_point
@async_client_context.require_version_min(4, 4, 0)
async def test_03_01_retryable_reads_caused_by_overload_errors_are_retried_on_a_different_replicaset_server_when_one_is_available_and_overload_retargeting_is_enabled(
self
):
listener = OvertCommandListener()
# 1. Create a client `client` with `retryReads=true`, `readPreference=primaryPreferred`, `enableOverloadRetargeting=True`, and command event monitoring enabled.
client = await self.async_rs_or_single_client(
event_listeners=[listener],
retryReads=True,
readPreference="primaryPreferred",
enableOverloadRetargeting=True,
)
# Ensure the client has discovered all nodes.
await async_ensure_all_connected(client)
# 2. Configure a fail point with the RetryableError and SystemOverloadedError error labels.
command_args = {
"configureFailPoint": "failCommand",
"mode": {"times": 1},
"data": {
"failCommands": ["find"],
"errorLabels": ["RetryableError", "SystemOverloadedError"],
"errorCode": 6,
},
}
await async_set_fail_point(client, command_args)
# 3. Reset the command event monitor to clear the fail point command from its stored events.
listener.reset()
# 4. Execute a `find` command with `client`.
await client.t.t.find_one({})
# 5. Assert that one failed command event and one successful command event occurred.
self.assertEqual(len(listener.failed_events), 1)
self.assertEqual(len(listener.succeeded_events), 1)
# 6. Assert that both events occurred on different servers.
assert listener.failed_events[0].connection_id != listener.succeeded_events[0].connection_id
@async_client_context.require_replica_set
@async_client_context.require_secondaries_count(1)
@async_client_context.require_failCommand_fail_point
@async_client_context.require_version_min(4, 4, 0)
async def test_03_02_retryable_reads_caused_by_non_overload_errors_are_retried_on_the_same_replicaset_server(
self
):
listener = OvertCommandListener()
# 1. Create a client `client` with `retryReads=true`, `readPreference=primaryPreferred`, and command event monitoring enabled.
client = await self.async_rs_or_single_client(
event_listeners=[listener], retryReads=True, readPreference="primaryPreferred"
)
# Ensure the client has discovered all nodes.
await async_ensure_all_connected(client)
# 2. Configure a fail point with the RetryableError error label.
command_args = {
"configureFailPoint": "failCommand",
"mode": {"times": 1},
"data": {
"failCommands": ["find"],
"errorLabels": ["RetryableError"],
"errorCode": 6,
},
}
await async_set_fail_point(client, command_args)
# 3. Reset the command event monitor to clear the fail point command from its stored events.
listener.reset()
# 4. Execute a `find` command with `client`.
await client.t.t.find_one({})
# 5. Assert that one failed command event and one successful command event occurred.
self.assertEqual(len(listener.failed_events), 1)
self.assertEqual(len(listener.succeeded_events), 1)
# 6. Assert that both events occurred the same server.
assert listener.failed_events[0].connection_id == listener.succeeded_events[0].connection_id
@async_client_context.require_replica_set
@async_client_context.require_secondaries_count(1)
@async_client_context.require_failCommand_fail_point
@async_client_context.require_version_min(4, 4, 0)
async def test_03_03_retryable_reads_caused_by_overload_errors_are_retried_on_the_same_replicaset_server_when_one_is_available_and_overload_retargeting_is_disabled(
self
):
listener = OvertCommandListener()
# 1. Create a client `client` with `retryReads=true`, `readPreference=primaryPreferred`, and command event monitoring enabled.
client = await self.async_rs_or_single_client(
event_listeners=[listener],
retryReads=True,
readPreference="primaryPreferred",
)
# Ensure the client has discovered all nodes.
await async_ensure_all_connected(client)
# 2. Configure a fail point with the RetryableError and SystemOverloadedError error labels.
command_args = {
"configureFailPoint": "failCommand",
"mode": {"times": 1},
"data": {
"failCommands": ["find"],
"errorLabels": ["RetryableError", "SystemOverloadedError"],
"errorCode": 6,
},
}
await async_set_fail_point(client, command_args)
# 3. Reset the command event monitor to clear the fail point command from its stored events.
listener.reset()
# 4. Execute a `find` command with `client`.
await client.t.t.find_one({})
# 5. Assert that one failed command event and one successful command event occurred.
self.assertEqual(len(listener.failed_events), 1)
self.assertEqual(len(listener.succeeded_events), 1)
# 6. Assert that both events occurred on the same server.
assert listener.failed_events[0].connection_id == listener.succeeded_events[0].connection_id
@async_client_context.require_failCommand_fail_point
@async_client_context.require_version_min(4, 4, 0) # type:ignore[untyped-decorator]
async def test_overload_then_nonoverload_retries_increased_reads(self) -> None:
# Create a client.
listener = OvertCommandListener()
# Configure the client to listen to CommandFailedEvents. In the attached listener, configure a fail point with error
# code `91` (ShutdownInProgress) and `RetryableError` and `SystemOverloadedError` labels.
overload_fail_point = {
"configureFailPoint": "failCommand",
"mode": {"times": 1},
"data": {
"failCommands": ["find"],
"errorLabels": ["RetryableError", "SystemOverloadedError"],
"errorCode": 91,
},
}
# Configure a fail point with error code `91` (ShutdownInProgress) with only the `RetryableError` error label.
non_overload_fail_point = {
"configureFailPoint": "failCommand",
"mode": "alwaysOn",
"data": {
"failCommands": ["find"],
"errorCode": 91,
"errorLabels": ["RetryableError"],
},
}
def failed(event: CommandFailedEvent) -> None:
# Configure the fail point command only if the failed event is for the 91 error configured in step 2.
if listener.failed_events:
return
assert event.failure["code"] == 91
self.configure_fail_point_sync(non_overload_fail_point)
self.addCleanup(self.configure_fail_point_sync, {}, off=True)
listener.failed_events.append(event)
listener.failed = failed
client = await self.async_rs_client(event_listeners=[listener])
await client.test.test.insert_one({})
self.configure_fail_point_sync(overload_fail_point)
self.addCleanup(self.configure_fail_point_sync, {}, off=True)
with self.assertRaises(PyMongoError):
await client.test.test.find_one()
started_finds = [e for e in listener.started_events if e.command_name == "find"]
self.assertEqual(len(started_finds), MAX_ADAPTIVE_RETRIES + 1)
@async_client_context.require_failCommand_fail_point
@async_client_context.require_version_min(4, 4, 0) # type:ignore[untyped-decorator]
async def test_backoff_is_not_applied_for_non_overload_errors(self):
if _IS_SYNC:
mock_target = "pymongo.synchronous.helpers._RetryPolicy.backoff"
else:
mock_target = "pymongo.asynchronous.helpers._RetryPolicy.backoff"
# Create a client.
listener = OvertCommandListener()
# Configure the client to listen to CommandFailedEvents. In the attached listener, configure a fail point with error
# code `91` (ShutdownInProgress) and `RetryableError` and `SystemOverloadedError` labels.
overload_fail_point = {
"configureFailPoint": "failCommand",
"mode": {"times": 1},
"data": {
"failCommands": ["find"],
"errorLabels": ["RetryableError", "SystemOverloadedError"],
"errorCode": 91,
},
}
# Configure a fail point with error code `91` (ShutdownInProgress) with only the `RetryableError` error label.
non_overload_fail_point = {
"configureFailPoint": "failCommand",
"mode": "alwaysOn",
"data": {
"failCommands": ["find"],
"errorCode": 91,
"errorLabels": ["RetryableError"],
},
}
def failed(event: CommandFailedEvent) -> None:
# Configure the fail point command only if the failed event is for the 91 error configured in step 2.
if listener.failed_events:
return
assert event.failure["code"] == 91
self.configure_fail_point_sync(non_overload_fail_point)
self.addCleanup(self.configure_fail_point_sync, {}, off=True)
listener.failed_events.append(event)
listener.failed = failed
client = await self.async_rs_client(event_listeners=[listener])
await client.test.test.insert_one({})
self.configure_fail_point_sync(overload_fail_point)
self.addCleanup(self.configure_fail_point_sync, {}, off=True)
# Perform a findOne operation with coll. Expect the operation to fail.
with mock.patch(mock_target, return_value=0) as mock_backoff:
with self.assertRaises(PyMongoError):
await client.test.test.find_one()
# Assert that backoff was applied only once for the initial overload error and not for the subsequent non-overload retryable errors.
self.assertEqual(mock_backoff.call_count, 1)
if __name__ == "__main__":
unittest.main()