SERVER-74993 Create a hook to test for leaked cursors at the end of concurrency test (#42548)

GitOrigin-RevId: 9dbc2557d3b2e2b5c20dba19648d8380369825cf
This commit is contained in:
Denis Grebennicov 2026-02-12 17:42:04 +01:00 committed by MongoDB Bot
parent 7fe9a0e3a6
commit eb0eb0f4f3
44 changed files with 196 additions and 18 deletions

View File

@ -42,6 +42,7 @@ executor:
mongod_options:
oplogSize: 1024
hooks:
- class: CheckIdleCursors
- class: CheckShardFilteringMetadata
- class: CheckReplDBHashInBackground
- class: CheckReplDBHash

View File

@ -44,6 +44,7 @@ executor:
mongod_options:
oplogSize: 1024
hooks:
- class: CheckIdleCursors
- class: CheckShardFilteringMetadata
- class: CheckReplDBHashInBackground
- class: CheckReplDBHash

View File

@ -44,6 +44,7 @@ executor:
mongod_options:
oplogSize: 1024
hooks:
- class: CheckIdleCursors
- class: CheckShardFilteringMetadata
- class: CheckReplDBHashInBackground
- class: CheckReplDBHash

View File

@ -45,6 +45,7 @@ executor:
mongod_options:
oplogSize: 1024
hooks:
- class: CheckIdleCursors
- class: CheckShardFilteringMetadata
- class: CheckReplDBHashInBackground
- class: CheckReplDBHash

View File

@ -10,6 +10,7 @@ executor:
archive:
hooks:
- ContinuousStepdown
- CheckIdleCursors
- CheckReplDBHash
- CheckMetadataConsistencyInBackground
- ValidateCollections
@ -74,6 +75,7 @@ executor:
hooks:
- class: ContinuousStepdown
randomize_kill: true
- class: CheckIdleCursors
- class: CheckReplDBHash
- class: CheckMetadataConsistencyInBackground
- class: ValidateCollections

View File

@ -10,6 +10,7 @@ executor:
archive:
hooks:
- ContinuousStepdown
- CheckIdleCursors
- CheckReplDBHash
- CheckMetadataConsistencyInBackground
- ValidateCollections
@ -74,6 +75,7 @@ executor:
hooks:
- class: ContinuousStepdown
randomize_kill: true
- class: CheckIdleCursors
- class: CheckReplDBHash
- class: CheckMetadataConsistencyInBackground
- class: ValidateCollections

View File

@ -9,6 +9,7 @@
executor:
archive:
hooks:
- CheckIdleCursors
- CheckReplDBHash
- CheckMetadataConsistencyInBackground
- ValidateCollections
@ -70,6 +71,7 @@ executor:
hooks:
- class: ContinuousStepdown
randomize_kill: true
- class: CheckIdleCursors
- class: CheckReplDBHash
- class: CheckMetadataConsistencyInBackground
- class: ValidateCollections

View File

@ -287,6 +287,7 @@
# too many network errors occur when re-running a transaction we will run out of retries.
- class: ContinuousStepdown
randomize_kill: true
- class: CheckIdleCursors
- class: CheckReplDBHash
- class: CheckMetadataConsistencyInBackground
- class: ValidateCollections
@ -306,6 +307,7 @@
test: true
hooks:
- ContinuousStepdown
- CheckIdleCursors
- CheckReplDBHash
- CheckMetadataConsistencyInBackground
- ValidateCollections
@ -384,6 +386,7 @@
archive:
test: true
hooks:
- CheckIdleCursors
- CheckReplDBHash
- CheckMetadataConsistencyInBackground
- ValidateCollections
@ -392,6 +395,7 @@
hooks:
- class: ContinuousStepdown
randomize_kill: true
- class: CheckIdleCursors
- class: CheckReplDBHash
- class: CheckMetadataConsistencyInBackground
- class: ValidateCollections

View File

@ -55,6 +55,7 @@ executor:
runningWithBalancer: false
implicitlyShardOnCreateCollectionOnly: true
hooks:
- class: CheckIdleCursors
- class: CheckShardFilteringMetadata
- class: CheckReplDBHashInBackground
- class: CheckReplDBHash

View File

@ -69,6 +69,7 @@ executor:
implicitlyShardOnCreateCollectionOnly: true
hooks:
- class: CheckIdleCursors
- class: CheckShardFilteringMetadata
- class: CheckReplDBHashInBackground
- class: CheckReplDBHash

View File

@ -131,6 +131,7 @@ executor:
- class: ContinuousInitialSync
is_fsm_workload: true
sync_interval_secs: 15
- class: CheckIdleCursors
- class: CheckShardFilteringMetadata
- class: CheckReplDBHash
- class: CheckMetadataConsistencyInBackground

View File

@ -106,6 +106,7 @@ executor:
implicitlyShardOnCreateCollectionOnly: true
hooks:
- class: CheckIdleCursors
- class: CheckShardFilteringMetadata
- class: CheckReplDBHashInBackground
- class: CheckReplDBHash

View File

@ -121,6 +121,7 @@ executor:
implicitlyShardOnCreateCollectionOnly: true
hooks:
- class: CheckIdleCursors
- class: CheckShardFilteringMetadata
- class: CheckReplDBHashInBackground
- class: CheckReplDBHash

View File

@ -96,6 +96,7 @@ executor:
traceExceptions: false
implicitlyShardOnCreateCollectionOnly: true
hooks:
- class: CheckIdleCursors
- class: CheckShardFilteringMetadata
- class: CheckReplDBHashInBackground
- class: CheckReplDBHash

View File

@ -173,6 +173,7 @@ executor:
shard_stepdown: true
stepdown_interval_ms: 15000
is_fsm_workload: true
- class: CheckIdleCursors
- class: CheckShardFilteringMetadata
- class: CheckReplDBHash
- class: CheckMetadataConsistencyInBackground

View File

@ -101,6 +101,7 @@ executor:
implicitlyShardOnCreateCollectionOnly: true
hooks:
- class: CheckIdleCursors
- class: CheckShardFilteringMetadata
- class: CheckReplDBHashInBackground
- class: CheckReplDBHash

View File

@ -142,6 +142,7 @@ executor:
add_remove_random_shards: false # TODO(SERVER-91474): turn this to true when fixing the ticket
move_primary_comment: *movePrimaryComment
is_fsm_workload: true
- class: CheckIdleCursors
- class: CheckShardFilteringMetadata
- class: CheckReplDBHashInBackground
- class: CheckReplDBHash

View File

@ -35,6 +35,7 @@ executor:
runningWithBalancer: false
implicitlyShardOnCreateCollectionOnly: true
hooks:
- class: CheckIdleCursors
- class: CheckShardFilteringMetadata
# TODO (SERVER-63855): remove 'RunDBCheckInBackground' or put it back.
# - class: RunDBCheckInBackground

View File

@ -71,6 +71,7 @@ executor:
runningWithBalancer: true
implicitlyShardOnCreateCollectionOnly: true
hooks:
- class: CheckIdleCursors
- class: CheckShardFilteringMetadata
- class: CheckReplDBHashInBackground
- class: CheckReplDBHash

View File

@ -102,6 +102,7 @@ executor:
transition_configsvr: true
move_primary_comment: *movePrimaryComment
is_fsm_workload: true
- class: CheckIdleCursors
- class: CheckShardFilteringMetadata
- class: CheckReplDBHashInBackground
- class: CheckReplDBHash

View File

@ -138,6 +138,7 @@ executor:
transition_configsvr: true
add_remove_random_shards: true
is_fsm_workload: true
- class: CheckIdleCursors
- class: CheckShardFilteringMetadata
# Suites that shutdown nodes are not compatible with the CheckReplDBHashInBackground hook, so
# this suite does not include that hook

View File

@ -103,6 +103,7 @@ executor:
shard_stepdown: true
is_fsm_workload: true
randomize_kill: true
- class: CheckIdleCursors
- class: CheckShardFilteringMetadata
- class: CheckReplDBHash
- class: CheckMetadataConsistencyInBackground

View File

@ -133,6 +133,7 @@ executor:
add_remove_random_shards: true
move_primary_comment: *movePrimaryComment
is_fsm_workload: true
- class: CheckIdleCursors
- class: CheckShardFilteringMetadata
- class: CheckReplDBHash
- class: CheckMetadataConsistencyInBackground

View File

@ -70,6 +70,7 @@ executor:
implicitlyShardOnCreateCollectionOnly: true
hooks:
- class: CheckIdleCursors
- class: CheckShardFilteringMetadata
- class: CheckReplDBHashInBackground
- class: CheckReplDBHash

View File

@ -57,6 +57,7 @@ executor:
runningWithBalancer: false
implicitlyShardOnCreateCollectionOnly: true
hooks:
- class: CheckIdleCursors
- class: CheckShardFilteringMetadata
# TODO (SERVER-63855): remove 'RunDBCheckInBackground' or put it back.
# - class: RunDBCheckInBackground

View File

@ -94,6 +94,7 @@ executor:
config_stepdown: true
shard_stepdown: true
is_fsm_workload: true
- class: CheckIdleCursors
- class: CheckShardFilteringMetadata
- class: CheckReplDBHash
- class: CheckMetadataConsistencyInBackground

View File

@ -60,6 +60,9 @@ filters:
- "drop_sessions_collection.py":
approvers:
- 10gen/server-catalog-and-routing-ddl
- "check_idle_cursors.py":
approvers:
- 10gen/server-jstest-infra
- "rotate_execution_control_params.py":
approvers:
- 10gen/server-workload-resilience

View File

@ -0,0 +1,29 @@
import os.path
from buildscripts.resmokelib.testing.hooks import jsfile
from buildscripts.resmokelib.utils import jscomment
class CheckIdleCursors(jsfile.JSHook):
IS_BACKGROUND = False
# Tag youll set in the JS test files that are allowed to leak cursors.
ALLOW_LEAK_TAG = "can_leak_idle_cursors"
def __init__(self, hook_logger, fixture, shell_options={}):
description = "Checking for idle cursors in $currentOp"
js_filename = os.path.join("jstests", "hooks", "jstest_infra", "run_check_idle_cursors.js")
super().__init__(
hook_logger, fixture, js_filename, description, shell_options=shell_options
)
def after_test(self, test, test_report):
global_vars = self._shell_options.setdefault("global_vars", {})
test_data = global_vars.setdefault("TestData", {})
test_data["shouldKillIdleCursors"] = self._does_test_allow_leaking_idle_cursors(test)
super().after_test(test, test_report)
def _does_test_allow_leaking_idle_cursors(self, test):
tags = jscomment.get_tags(test.test_name)
return self.ALLOW_LEAK_TAG in tags

View File

@ -287,19 +287,6 @@ export const workerThread = (function () {
};
}
} finally {
// Kill this worker thread's session to ensure any possible idle cursors left open by
// the workload are closed.
// TODO SERVER-74993: Remove this.
try {
let session = myDB.getSession();
if (session) {
myDB.runCommand({killSessions: [session.getSessionId()]});
}
} catch (e) {
// Ignore errors from killSessions.
jsTest.log("Error running killSessions: " + e);
}
// Avoid retention of connection object
configs = null;
myDB = null;

View File

@ -7,6 +7,8 @@
* requires_sharding,
* # killOp does not support stepdowns.
* does_not_support_stepdowns,
* # TODO: SERVER-114503 Investigate DDL commands FSM tests leaking cursors.
* can_leak_idle_cursors,
* ]
*/

View File

@ -2,6 +2,10 @@
* agg_lookup.js
*
* Runs a $lookup aggregation simultaneously with updates.
* @tags: [
* # TODO: SERVER-114502 Investigate agg_lookup and long_secondary_reads fsm tests leaking cursors.
* can_leak_idle_cursors,
* ]
*/
import {interruptedQueryErrors} from "jstests/concurrency/fsm_libs/assert.js";
import {FeatureFlagUtil} from "jstests/libs/feature_flag_util.js";

View File

@ -13,7 +13,9 @@
* # Tests which expect commands to fail and catch the error can cause transactions to abort and
* # retry indefinitely.
* catches_command_failures,
* requires_fcv_82
* requires_fcv_82,
* # TODO: SERVER-114502 Investigate agg_lookup and long_secondary_reads fsm tests leaking cursors.
* can_leak_idle_cursors,
* ]
*/

View File

@ -13,6 +13,8 @@
* # TODO (SERVER-91251): Run this with stepdowns on sanitizers.
* tsan_incompatible,
* incompatible_aubsan,
* # TODO: SERVER-114503 Investigate DDL commands FSM tests leaking cursors.
* can_leak_idle_cursors,
* ];
*/
import {extendWorkload} from "jstests/concurrency/fsm_libs/extend_workload.js";

View File

@ -13,6 +13,8 @@
* # TODO (SERVER-91251): Run this with stepdowns on sanitizers.
* tsan_incompatible,
* incompatible_aubsan,
* # TODO: SERVER-114503 Investigate DDL commands FSM tests leaking cursors.
* can_leak_idle_cursors,
* ];
*/
import {extendWorkload} from "jstests/concurrency/fsm_libs/extend_workload.js";

View File

@ -1,7 +1,12 @@
/**
* Same as the base workload, but refines to a nested shard key.
*
* @tags: [requires_persistence, requires_sharding]
* @tags: [
* requires_persistence,
* requires_sharding,
* # TODO: SERVER-114503 Investigate DDL commands FSM tests leaking cursors.
* can_leak_idle_cursors,
* ]
*/
import {extendWorkload} from "jstests/concurrency/fsm_libs/extend_workload.js";

View File

@ -1,7 +1,13 @@
/**
* Same as the base workload, but refines to a nested shard key.
*
* @tags: [requires_persistence, requires_sharding, assumes_stable_shard_list]
* @tags: [
* requires_persistence,
* requires_sharding,
* assumes_stable_shard_list,
* # TODO: SERVER-114503 Investigate DDL commands FSM tests leaking cursors.
* can_leak_idle_cursors,
* ]
*/
import {extendWorkload} from "jstests/concurrency/fsm_libs/extend_workload.js";

View File

@ -13,7 +13,13 @@
* - refineCollectionShardKey - Refines the collection's shard key and decreases the latch count
* such that the next latch collection will be targeted by the test.
*
* @tags: [requires_persistence, requires_sharding, assumes_stable_shard_list]
* @tags: [
* requires_persistence,
* requires_sharding,
* assumes_stable_shard_list,
* # TODO: SERVER-114503 Investigate DDL commands FSM tests leaking cursors.
* can_leak_idle_cursors,
* ]
*/
import "jstests/libs/parallelTester.js";

View File

@ -5,7 +5,12 @@
* once the read has begun, catalog operations with conflicting locks will block until the read is
* finished.
*
* @tags: [creates_background_indexes, uses_transactions]
* @tags: [
* creates_background_indexes,
* uses_transactions,
* # TODO: SERVER-114500 Investigate snapshot_read* FSM tests leaking cursors.
* can_leak_idle_cursors,
* ]
*/
import {doSnapshotFind, doSnapshotGetMore} from "jstests/concurrency/fsm_workload_helpers/snapshot_read_utils.js";

View File

@ -10,6 +10,8 @@
* requires_getmore,
* # This test relies on query commands returning specific batch-sized responses.
* assumes_no_implicit_cursor_exhaustion,
* # TODO: SERVER-114500 Investigate snapshot_read* FSM tests leaking cursors.
* can_leak_idle_cursors,
* ]
*/

View File

@ -8,6 +8,8 @@
* requires_getmore,
* # This test relies on query commands returning specific batch-sized responses.
* assumes_no_implicit_cursor_exhaustion,
* # TODO: SERVER-114500 Investigate snapshot_read* FSM tests leaking cursors.
* can_leak_idle_cursors,
* ]
*/

View File

@ -7,6 +7,7 @@
*
* @tags: [
* requires_fcv_51,
* can_leak_idle_cursors,
* ]
*/

View File

@ -0,0 +1,12 @@
load("//bazel:mongo_js_rules.bzl", "all_subpackage_javascript_files", "mongo_js_library")
package(default_visibility = ["//visibility:public"])
mongo_js_library(
name = "all_javascript_files",
srcs = glob([
"*.js",
]),
)
all_subpackage_javascript_files()

View File

@ -0,0 +1,5 @@
version: 1.0.0
filters:
- "*":
approvers:
- 10gen/server-jstest-infra

View File

@ -0,0 +1,71 @@
// This script checks for leaked cursors in the MongoDB system.
import {DiscoverTopology} from "jstests/libs/discover_topology.js";
function findIdleCursors(conn) {
try {
const systemIntrospectionFilter = {
$nor: [
// Commands:
{"cursor.originatingCommand.listDatabases": {$exists: true}},
{"cursor.originatingCommand.listCollections": {$exists: true}},
{"cursor.originatingCommand.listIndexes": {$exists: true}},
// Aggregation pipelines:
{"cursor.originatingCommand.pipeline.0.$listCatalog": {$exists: true}},
{"cursor.originatingCommand.pipeline.0.$indexStats": {$exists: true}},
{"cursor.originatingCommand.pipeline.0.$collStats": {$exists: true}},
],
};
const systemCollections = ["local.oplog.rs", "config.system.sessions"];
return conn
.getDB("admin")
.aggregate(
[
{$currentOp: {localOps: true, idleCursors: true}},
{$match: {"command.comment": {$ne: "$currentOp"}}},
{$match: {"type": "idleCursor"}},
{$match: {"ns": {$nin: systemCollections}}},
{$match: systemIntrospectionFilter},
],
{comment: "$currentOp"},
)
.toArray();
} catch (error) {
jsTest.log.info(`Failed to read currentOps for connection`, {conn, error});
return [];
}
}
function assertNoIdleCursors(conn) {
let idleCursors = findIdleCursors(conn);
jsTest.log.info("Idle cursor for connection", {conn, idleCursors});
// Currently some tests leak idle cursors. In order to not fail those tests we kill the leaked cursors.
if (TestData && TestData.shouldKillIdleCursors) {
jsTest.log.info("Killing idle cursors for connection", {conn, idleCursors});
idleCursors.forEach(function (idleCursor) {
assert.commandWorked(
conn.getDB("admin").runCommand({killCursors: idleCursor.ns, cursors: [idleCursor.cursor.cursorId]}),
);
});
idleCursors = findIdleCursors(conn);
}
assert.eq(0, idleCursors.length, `Expected no idleCursors left over on connection ${conn}`);
}
(function () {
const nodesToConnectTo = (function () {
const topology = DiscoverTopology.findConnectedNodes(db.getMongo());
if (topology.mongos) {
const shardConnStrings = Object.values(topology.shards).flatMap((shardInfo) => shardInfo.nodes);
return topology.mongos.nodes.concat(shardConnStrings);
}
return [topology.nodes];
})();
jsTest.log.info("Nodes to check for leaked cursors", {nodesToConnectTo});
nodesToConnectTo.map((connString) => new Mongo(connString)).forEach(assertNoIdleCursors);
})();