PYTHON-5731 - Server selection deprioritization only for overload errors on replica sets (#2710)

This commit is contained in:
Noah Stapp 2026-02-23 13:18:24 -05:00 committed by GitHub
parent 908102d776
commit 84814b2a72
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 166 additions and 2 deletions

View File

@ -2825,7 +2825,11 @@ class _ClientConnectionRetryable(Generic[T]):
if self._last_error is None:
self._last_error = exc
if self._server is not None:
if (
self._server is not None
and self._client.topology_description.topology_type_name == "Sharded"
or exc.has_error_label("SystemOverloadedError")
):
self._deprioritized_servers.append(self._server)
def _is_not_eligible_for_retry(self) -> bool:

View File

@ -2815,7 +2815,11 @@ class _ClientConnectionRetryable(Generic[T]):
if self._last_error is None:
self._last_error = exc
if self._server is not None:
if (
self._server is not None
and self._client.topology_description.topology_type_name == "Sharded"
or exc.has_error_label("SystemOverloadedError")
):
self._deprioritized_servers.append(self._server)
def _is_not_eligible_for_retry(self) -> bool:

View File

@ -261,6 +261,84 @@ class TestRetryableReads(AsyncIntegrationTest):
self.assertEqual(command_docs[0]["lsid"], command_docs[1]["lsid"])
self.assertIsNot(command_docs[0], command_docs[1])
@async_client_context.require_replica_set
@async_client_context.require_secondaries_count(1)
@async_client_context.require_failCommand_fail_point
@async_client_context.require_version_min(4, 4, 0)
async def test_03_01_retryable_reads_caused_by_overload_errors_are_retried_on_a_different_replicaset_server_when_one_is_available(
self
):
listener = OvertCommandListener()
# 1. Create a client `client` with `retryReads=true`, `readPreference=primaryPreferred`, and command event monitoring enabled.
client = await self.async_rs_or_single_client(
event_listeners=[listener], retryReads=True, readPreference="primaryPreferred"
)
# 2. Configure a fail point with the RetryableError and SystemOverloadedError error labels.
command_args = {
"configureFailPoint": "failCommand",
"mode": {"times": 1},
"data": {
"failCommands": ["find"],
"errorLabels": ["RetryableError", "SystemOverloadedError"],
"errorCode": 6,
},
}
await async_set_fail_point(client, command_args)
# 3. Reset the command event monitor to clear the fail point command from its stored events.
listener.reset()
# 4. Execute a `find` command with `client`.
await client.t.t.find_one({})
# 5. Assert that one failed command event and one successful command event occurred.
self.assertEqual(len(listener.failed_events), 1)
self.assertEqual(len(listener.succeeded_events), 1)
# 6. Assert that both events occurred on different servers.
assert listener.failed_events[0].connection_id != listener.succeeded_events[0].connection_id
@async_client_context.require_replica_set
@async_client_context.require_secondaries_count(1)
@async_client_context.require_failCommand_fail_point
@async_client_context.require_version_min(4, 4, 0)
async def test_03_02_retryable_reads_caused_by_non_overload_errors_are_retried_on_the_same_replicaset_server(
self
):
listener = OvertCommandListener()
# 1. Create a client `client` with `retryReads=true`, `readPreference=primaryPreferred`, and command event monitoring enabled.
client = await self.async_rs_or_single_client(
event_listeners=[listener], retryReads=True, readPreference="primaryPreferred"
)
# 2. Configure a fail point with the RetryableError error label.
command_args = {
"configureFailPoint": "failCommand",
"mode": {"times": 1},
"data": {
"failCommands": ["find"],
"errorLabels": ["RetryableError"],
"errorCode": 6,
},
}
await async_set_fail_point(client, command_args)
# 3. Reset the command event monitor to clear the fail point command from its stored events.
listener.reset()
# 4. Execute a `find` command with `client`.
await client.t.t.find_one({})
# 5. Assert that one failed command event and one successful command event occurred.
self.assertEqual(len(listener.failed_events), 1)
self.assertEqual(len(listener.succeeded_events), 1)
# 6. Assert that both events occurred the same server.
assert listener.failed_events[0].connection_id == listener.succeeded_events[0].connection_id
if __name__ == "__main__":
unittest.main()

View File

@ -259,6 +259,84 @@ class TestRetryableReads(IntegrationTest):
self.assertEqual(command_docs[0]["lsid"], command_docs[1]["lsid"])
self.assertIsNot(command_docs[0], command_docs[1])
@client_context.require_replica_set
@client_context.require_secondaries_count(1)
@client_context.require_failCommand_fail_point
@client_context.require_version_min(4, 4, 0)
def test_03_01_retryable_reads_caused_by_overload_errors_are_retried_on_a_different_replicaset_server_when_one_is_available(
self
):
listener = OvertCommandListener()
# 1. Create a client `client` with `retryReads=true`, `readPreference=primaryPreferred`, and command event monitoring enabled.
client = self.rs_or_single_client(
event_listeners=[listener], retryReads=True, readPreference="primaryPreferred"
)
# 2. Configure a fail point with the RetryableError and SystemOverloadedError error labels.
command_args = {
"configureFailPoint": "failCommand",
"mode": {"times": 1},
"data": {
"failCommands": ["find"],
"errorLabels": ["RetryableError", "SystemOverloadedError"],
"errorCode": 6,
},
}
set_fail_point(client, command_args)
# 3. Reset the command event monitor to clear the fail point command from its stored events.
listener.reset()
# 4. Execute a `find` command with `client`.
client.t.t.find_one({})
# 5. Assert that one failed command event and one successful command event occurred.
self.assertEqual(len(listener.failed_events), 1)
self.assertEqual(len(listener.succeeded_events), 1)
# 6. Assert that both events occurred on different servers.
assert listener.failed_events[0].connection_id != listener.succeeded_events[0].connection_id
@client_context.require_replica_set
@client_context.require_secondaries_count(1)
@client_context.require_failCommand_fail_point
@client_context.require_version_min(4, 4, 0)
def test_03_02_retryable_reads_caused_by_non_overload_errors_are_retried_on_the_same_replicaset_server(
self
):
listener = OvertCommandListener()
# 1. Create a client `client` with `retryReads=true`, `readPreference=primaryPreferred`, and command event monitoring enabled.
client = self.rs_or_single_client(
event_listeners=[listener], retryReads=True, readPreference="primaryPreferred"
)
# 2. Configure a fail point with the RetryableError error label.
command_args = {
"configureFailPoint": "failCommand",
"mode": {"times": 1},
"data": {
"failCommands": ["find"],
"errorLabels": ["RetryableError"],
"errorCode": 6,
},
}
set_fail_point(client, command_args)
# 3. Reset the command event monitor to clear the fail point command from its stored events.
listener.reset()
# 4. Execute a `find` command with `client`.
client.t.t.find_one({})
# 5. Assert that one failed command event and one successful command event occurred.
self.assertEqual(len(listener.failed_events), 1)
self.assertEqual(len(listener.succeeded_events), 1)
# 6. Assert that both events occurred the same server.
assert listener.failed_events[0].connection_id == listener.succeeded_events[0].connection_id
if __name__ == "__main__":
unittest.main()