SERVER-122438 Prevent setFCV CurOp state leak from collMod sub-operations (#52198)

Signed-off-by: Daniele Alessandrelli <daniele.alessandrelli@mongodb.com>
GitOrigin-RevId: 16dcd7f8b41f4d5580a76edbdb43e22fc8f572b7
This commit is contained in:
Daniele Alessandrelli 2026-04-28 20:14:00 +01:00 committed by MongoDB Bot
parent ec43b0daa8
commit 2d41cfb611
2 changed files with 88 additions and 11 deletions

View File

@ -0,0 +1,50 @@
/**
* Verifies that a collMod issued by the FCV upgrade path does not leak its
* CurOp namespace onto the parent setFCV CurOp. Without the nested-CurOp
* shield, each sub-op's AutoStatsTracker mutates the parent CurOp's NSS via
* CurOp::enter(); with the shield, the parent's NSS stays at "admin.$cmd".
* Regression test for SERVER-122438.
*/
import {configureFailPoint} from "jstests/libs/fail_point_util.js";
import {funWithArgs} from "jstests/libs/parallel_shell_helpers.js";
import {ReplSetTest} from "jstests/libs/replsettest.js";
const rst = new ReplSetTest({nodes: 1});
rst.startSet();
rst.initiate();
const primary = rst.getPrimary();
const adminDB = primary.getDB("admin");
// Downgrade so the subsequent upgrade has real work to do (the
// _cleanUpIndexCatalogMetadataOnUpgrade step iterates every collection via collMod).
assert.commandWorked(adminDB.runCommand({setFeatureCompatibilityVersion: lastLTSFCV, confirm: true}));
// Pause setFCV right after _upgradeServerMetadata completes.
const fp = configureFailPoint(primary, "hangWhileUpgrading");
const awaitSetFCV = startParallelShell(
funWithArgs(function (fcv) {
assert.commandWorked(db.getSiblingDB("admin").runCommand({setFeatureCompatibilityVersion: fcv, confirm: true}));
}, latestFCV),
primary.port,
);
try {
fp.wait();
const ops = adminDB
.aggregate([
{$currentOp: {allUsers: true, idleConnections: true}},
{$match: {"command.setFeatureCompatibilityVersion": {$exists: true}}},
])
.toArray();
assert.eq(1, ops.length, `expected exactly one setFCV op in progress: ${tojson(ops)}`);
assert.eq("admin.$cmd", ops[0].ns, "setFCV CurOp namespace leaked from a collMod sub-operation: " + tojson(ops[0]));
} finally {
fp.off();
awaitSetFCV();
}
rst.stopSet();

View File

@ -31,6 +31,7 @@
#include "mongo/crypto/fle_crypto.h"
#include "mongo/db/client.h"
#include "mongo/db/commands/set_feature_compatibility_version_steps/fcv_step.h"
#include "mongo/db/curop.h"
#include "mongo/db/dbdirectclient.h"
#include "mongo/db/dbhelpers.h"
#include "mongo/db/generic_argument_util.h"
@ -76,10 +77,13 @@
#include "mongo/s/migration_blocking_operation/migration_blocking_operation_feature_flags_gen.h"
#include "mongo/util/fail_point.h"
#include <mutex>
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kDefault
namespace mongo {
namespace {
MONGO_FAIL_POINT_DEFINE(setFCVPauseAfterReadingConfigDropPendingDBs);
@ -283,6 +287,32 @@ void dropAuthoritativeDatabaseCollectionOnShards(OperationContext* opCtx) {
}
}
// Helper function for running processCollModCommand() under a nested CurOp so the sub-operation's
// namespace and raised dbProfileLevel (via AutoStatsTracker) don't leak onto the parent setFCV
// CurOp. Without this, a slow setFCV can write profile entries to system.profile of
// databases the user never enabled profiling on.
//
// See SERVER-85925 (original fix) / SERVER-122438 (regression).
Status processCollModCommandWithNestedCurOp(OperationContext* opCtx,
const NamespaceStringOrUUID& nsOrUUID,
const CollMod& cmd,
BSONObjBuilder* result) {
auto parentCurOp = CurOp::get(opCtx);
// Pushed onto the CurOp stack; pops itself on scope exit via ~CurOp.
CurOp nestedCurOp;
nestedCurOp.push(opCtx);
{
std::lock_guard<Client> lk(*opCtx->getClient());
nestedCurOp.setGenericOpRequestDetails(
lk, cmd.getNamespace(), parentCurOp->getCommand(), cmd.toBSON(), NetworkOp::dbMsg);
}
return processCollModCommand(opCtx, nsOrUUID, cmd, nullptr, result);
}
} // namespace
class LegacyFCVStep : public mongo::FCVStep {
public:
static LegacyFCVStep* get(ServiceContext* serviceContext);
@ -326,8 +356,8 @@ private:
collModCmd.set_removeLegacyTimeseriesBucketingParametersHaveChanged(true);
BSONObjBuilder responseBuilder;
uassertStatusOK(processCollModCommand(
opCtx, collection->ns(), collModCmd, nullptr, &responseBuilder));
uassertStatusOK(processCollModCommandWithNestedCurOp(
opCtx, collection->ns(), collModCmd, &responseBuilder));
},
[&](const Collection* collection) {
return collection->shouldRemoveLegacyTimeseriesBucketingParametersHaveChanged();
@ -412,8 +442,8 @@ private:
// deprecated catalog metadata and to correct any invalid value types previously
// allowed in metadata.
BSONObjBuilder responseBuilder;
uassertStatusOK(processCollModCommand(
opCtx, collection->ns(), CollMod{collection->ns()}, nullptr, &responseBuilder));
uassertStatusOK(processCollModCommandWithNestedCurOp(
opCtx, collection->ns(), CollMod{collection->ns()}, &responseBuilder));
return true;
});
}
@ -882,11 +912,8 @@ private:
// storage, issue the "collMod" command with none of the parameters
// set.
BSONObjBuilder responseBuilder;
uassertStatusOK(processCollModCommand(opCtx,
collection->ns(),
CollMod{collection->ns()},
nullptr,
&responseBuilder));
uassertStatusOK(processCollModCommandWithNestedCurOp(
opCtx, collection->ns(), CollMod{collection->ns()}, &responseBuilder));
},
[&](const Collection* collection) {
// Only remove the catalog entry flag if it exists. It could've been
@ -972,8 +999,8 @@ private:
BSONObjBuilder responseBuilder;
auto collMod = CollMod{collection->ns()};
collMod.setRecordIdsReplicated(false);
uassertStatusOK(processCollModCommand(
opCtx, collection->ns(), collMod, nullptr, &responseBuilder));
uassertStatusOK(processCollModCommandWithNestedCurOp(
opCtx, collection->ns(), collMod, &responseBuilder));
},
[&](const Collection* collection) { return collection->areRecordIdsReplicated(); });
}