SERVER-123190 Disable shard registry and catalog cache when --configOnly is specified (#51029)

GitOrigin-RevId: bf27d03b207f066eb37d81195d7745f5eeadc5d9
This commit is contained in:
Allison Easton 2026-04-28 09:52:53 +02:00 committed by MongoDB Bot
parent 42cdf8a67f
commit 259e1f4564
11 changed files with 346 additions and 7 deletions

View File

@ -339,6 +339,7 @@ mongo_install(
"//src/mongo/s:mongos",
"//src/mongo/shell:mongo",
"//src/mongo/tools/mongobridge_tool:mongobridge",
"//src/mongo/tools/mongosentry_tool:mongosentry",
"//src/mongo/util:pretty_printer_test_program",
"//src/third_party/wiredtiger:wt",
] + select({

View File

@ -41,6 +41,13 @@ export class StandbyClusterTestFixture {
*/
this.standbyRS = null;
/**
* Array of {pid, port} objects for mongosentry processes guarding retired ports.
* Available after transitionToStandby().
* @type {Array<{pid: number, port: number}>|null}
*/
this._sentries = null;
this._setup();
}
@ -65,6 +72,9 @@ export class StandbyClusterTestFixture {
const configRS = this.st.configRS;
const numNodes = configRS.nodes.length;
// Capture shard ports before stopping shards so we can guard them with sentries.
const shardPorts = this.st._rs.flatMap((shard) => shard.test.ports);
// Stop all mongos processes.
this.st.stopAllMongos();
@ -106,6 +116,7 @@ export class StandbyClusterTestFixture {
const hostParts = newMember.host.split(":");
hostParts[hostParts.length - 1] = String(newPorts[idx]);
newMember.host = hostParts.join(":");
newMember.priority = idx === 0 ? 1 : 0;
return newMember;
});
@ -114,6 +125,14 @@ export class StandbyClusterTestFixture {
newConfig.members = newMembers;
delete newConfig.configsvr;
// We use a single electable member (priority set in newMembers above) and a very long
// cluster-wide election timeout so that after the forced stepdown no new primary is elected
// for the duration of the test.
if (!newConfig.settings) {
newConfig.settings = {};
}
newConfig.settings.electionTimeoutMillis = ReplSetTest.kForeverMillis;
// Replace the hosts in each individual replSetConfig.
for (let i = 0; i < numNodes; i++) {
// Start as a standalone (no --replSet, no --configsvr) on the old port so we can
@ -148,18 +167,66 @@ export class StandbyClusterTestFixture {
// startSet() launches the processes with --replSet standby but does NOT call
// replSetInitiate. The nodes will self-elect using the config already in
// local.system.replset from the previous step.
this.standbyRS.startSet();
this.standbyRS.awaitNodesAgreeOnPrimary();
let nodes = this.standbyRS.startSet();
this.standbyRS.asCluster(nodes, () => {
this.standbyRS.stepUp(nodes[0], {awaitReplicationBeforeStepUp: false});
this.standbyRS.awaitNodesAgreeOnPrimary();
// Write a majority-committed noop before stepping down to ensure the committed snapshot
// is established. The primary's drain-completion noop may not yet be journaled, and
// without a journaled entry the commit point (and thus the committed snapshot) never
// advances. Once the node steps down to secondary, the commit point can no longer
// advance, so majority reads would be permanently blocked.
const primary = this.standbyRS.getPrimary();
assert.commandWorked(
primary.adminCommand({
appendOplogNote: 1,
data: {msg: "standby transition commit point advance"},
writeConcern: {w: "majority", wtimeout: ReplSetTest.kDefaultTimeoutMS},
}),
);
assert.commandWorked(primary.adminCommand({replSetStepDown: 1, force: true}));
});
// Start a mongosentry on every retired port (old shard ports and old config server ports).
// Any traffic to these ports indicates a bug - the sentry will invariant if it receives a
// MongoDB wire protocol message.
const retiredPorts = [...shardPorts, ...oldPorts];
this._sentries = retiredPorts.map((port) => {
const pid = _startMongoProgram("mongosentry", "--port", port.toString());
return {pid, port};
});
// Wait for each sentry process to be alive before continuing.
for (const {pid, port} of this._sentries) {
assert.soon(() => checkProgram(pid).alive, `mongosentry failed to start on retired port ${port}`);
}
}
/**
* Shuts down the standby replica set (if transitioned) and cleans up.
*
* Checks that no mongosentry process received traffic on a retired port before stopping
* them. Throws if any sentry hit its invariant (i.e. received a command it should not have).
*/
teardown() {
const failedPorts = [];
for (const {pid, port} of this._sentries || []) {
const {alive} = checkProgram(pid);
if (!alive) {
jsTest.log.info(`mongosentry on retired port ${port} received a command and hit an invariant`);
failedPorts.push(port);
} else {
stopMongoProgramByPid(pid);
}
}
if (this.standbyRS) {
this.standbyRS.stopSet();
} else {
this.st.stop();
}
if (failedPorts.length > 0) {
throw new Error(`Commands were sent to retired ports after standby transition: ` + failedPorts.join(", "));
}
}
}

View File

@ -25,6 +25,12 @@ const fixture = new StandbyClusterTestFixture({
fixture.st.s.getDB("admin").createUser({user: ADMIN_USER, pwd: ADMIN_PWD, roles: ["root"]});
fixture.st.s.getDB("admin").auth(ADMIN_USER, ADMIN_PWD);
// Create both a sharded and unsharded collection to ensure that the catalog cache is correctly
// disabled.
assert.commandWorked(fixture.st.s.getDB("unshardedDB").getCollection("test").insertOne({x: 1}));
assert.commandWorked(fixture.st.s.getDB("shardedDB").getCollection("test").insertOne({x: 1}));
assert.commandWorked(fixture.st.s.adminCommand({shardCollection: "shardedDB.test", key: {_id: 1}}));
fixture.transitionToStandby();
const mongos = MongoRunner.runMongos({
@ -39,5 +45,9 @@ mongos.getDB("admin").auth(ADMIN_USER, ADMIN_PWD);
assert.commandWorked(mongos.adminCommand({hello: 1}));
// Test that operations against both sharded and unsharded collections fail due to configOnly mode.
assert.commandFailedWithCode(mongos.getDB("unshardedDB").runCommand({find: "test"}), 12319007);
assert.commandFailedWithCode(mongos.getDB("shardedDB").runCommand({find: "test"}), 12319007);
MongoRunner.stopMongos(mongos);
fixture.teardown();

View File

@ -351,6 +351,9 @@ catalog_and_routing.topology:
# change streams:
- src/mongo/s/change_streams*
# standby clusters:
- src/mongo/tools/mongosentry_tool/
transactions:
meta:
slack: server-replication

View File

@ -491,6 +491,15 @@ DatabaseType ShardingCatalogClientImpl::getDatabase(OperationContext* opCtx,
return DatabaseType(dbName, ShardId::kConfigServerId, DatabaseVersion::makeFixed());
}
// If config only mode is enabled, we are not allowed to access any databases other than the
// fixed databases above.
// TODO (SERVER-124191): add dochub link.
if (MONGO_unlikely(serverGlobalParams.configOnly)) {
uasserted(12319007,
str::stream() << "Cannot access database " << dbName.toStringForErrorMsg()
<< " while configOnly mode is enabled");
}
auto result =
_fetchDatabaseMetadata(opCtx, dbName, getConfigReadPreference(opCtx), readConcernLevel);
if (result == ErrorCodes::NamespaceNotFound) {
@ -953,6 +962,15 @@ std::pair<CollectionType, std::vector<ChunkType>> ShardingCatalogClientImpl::get
const NamespaceString& nss,
const ChunkVersion& sinceVersion,
const repl::ReadConcernArgs& readConcern) {
// If config only mode is enabled, we are not allowed to access any collections other than
// those which are unsharded and in the fixed databases.
// TODO (SERVER-124191): add dochub link.
if (MONGO_unlikely(serverGlobalParams.configOnly)) {
uasserted(12319005,
str::stream() << "Cannot access collection " << nss.toStringForErrorMsg()
<< " while configOnly mode is enabled");
}
auto aggRequest = makeCollectionAndChunksAggregation(opCtx, nss, sinceVersion);
std::vector<BSONObj> aggResult = runCatalogAggregation(
@ -1098,6 +1116,11 @@ std::vector<NamespaceString> ShardingCatalogClientImpl::getAllNssThatHaveZonesFo
repl::OpTimeWith<std::vector<ShardType>> ShardingCatalogClientImpl::getAllShards(
OperationContext* opCtx, repl::ReadConcernLevel readConcern, BSONObj filter) {
// TODO (SERVER-124191): add dochub link.
if (MONGO_unlikely(serverGlobalParams.configOnly)) {
uasserted(12319006,
str::stream() << "Cannot retrieve shard list while configOnly mode is enabled");
}
const auto& findRes =
uassertStatusOK(_exhaustiveFindOnConfig(opCtx,

View File

@ -365,8 +365,8 @@ void ReplicationCoordinatorImpl::_handleHeartbeatResponse(
if (responseStatus.isOK()) {
networkTime = cbData.response.elapsed.value_or(Microseconds{0});
// TODO(sz) Because the term is duplicated in ReplSetMetaData, we can get rid of this
// and update tests.
// TODO (SERVER-123321): Because the term is duplicated in ReplSetMetaData, we can get rid
// of this and update tests.
const auto& hbResponse = hbStatusResponse.getValue();
_updateTerm(lk, hbResponse.getTerm());
// Postpone election timeout if we have a successful heartbeat response from the primary.
@ -1301,7 +1301,8 @@ void ReplicationCoordinatorImpl::_startElectSelfIfEligibleV1(StartElectionReason
void ReplicationCoordinatorImpl::_startElectSelfIfEligibleV1(WithLock lk,
StartElectionReasonEnum reason) {
// If it is not a single node replica set, no need to start an election after stepdown timeout.
// If it is not a single node replica set, no need to start an election after stepdown
// timeout.
if (reason == StartElectionReasonEnum::kSingleNodePromptElection &&
!_topCoord->isElectableNodeInSingleNodeReplicaSet()) {
LOGV2_FOR_ELECTION(

View File

@ -359,6 +359,10 @@ ShardRegistry::Cache::LookupResult ShardRegistry::_lookup(OperationContext* opCt
}
void ShardRegistry::startupPeriodicReloader(OperationContext* opCtx) {
if (MONGO_unlikely(serverGlobalParams.configOnly)) {
return;
}
// startupPeriodicReloader() must be called only once
invariant(!_executor);

View File

@ -442,8 +442,9 @@ ProgramRunner::ProgramRunner(BSONObj args,
parseArgs(args, isMongo, isMongodProgram);
loadEnvironmentVariables(env);
bool needsPort =
isMongo && (isMongodProgram || isMongosProgram || (programName == "mongobridge"));
bool needsPort = isMongo &&
(isMongodProgram || isMongosProgram || (programName == "mongobridge") ||
(programName == "mongosentry"));
if (!needsPort) {
_port = -1;
}

View File

@ -0,0 +1,23 @@
load("//bazel:mongo_src_rules.bzl", "mongo_cc_binary")
package(default_visibility = ["//visibility:public"])
mongo_cc_binary(
name = "mongosentry",
srcs = [
"mongosentry.cpp",
],
tags = [
"dist_test",
],
deps = [
"//src/mongo/db:dbmessage",
"//src/mongo/db/shard_role:service_context_non_d",
"//src/mongo/rpc",
"//src/mongo/transport:service_executor",
"//src/mongo/transport:session_manager",
"//src/mongo/transport:transport_layer_manager",
"//src/mongo/util:signal_handlers",
"//src/mongo/util/options_parser:options_parser_init",
],
)

View File

@ -0,0 +1,5 @@
version: 1.0.0
filters:
- "*":
approvers:
- 10gen/server-catalog-and-routing-routing-and-topology

View File

@ -0,0 +1,201 @@
/**
* Copyright (C) 2026-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
/**
* Test-only binary that listens on a port and invariants if any MongoDB wire protocol message
* is received. Used to detect accidental traffic to retired server ports after a standby
* cluster transition.
*/
#include "mongo/base/initializer.h"
#include "mongo/base/parse_number.h"
#include "mongo/db/client.h"
#include "mongo/db/dbmessage.h"
#include "mongo/db/operation_context.h"
#include "mongo/db/service_context.h"
#include "mongo/logv2/log.h"
#include "mongo/rpc/factory.h"
#include "mongo/rpc/message.h"
#include "mongo/transport/asio/asio_session_manager.h"
#include "mongo/transport/asio/asio_transport_layer.h"
#include "mongo/transport/service_entry_point.h"
#include "mongo/transport/service_executor.h"
#include "mongo/transport/transport_layer_manager_impl.h"
#include "mongo/util/assert_util.h"
#include "mongo/util/exit.h"
#include "mongo/util/exit_code.h"
#include "mongo/util/future.h"
#include "mongo/util/quick_exit.h"
#include "mongo/util/signal_handlers.h"
#include "mongo/util/str.h"
#include "mongo/util/text.h" // IWYU pragma: keep
#include "mongo/util/time_support.h"
#include <memory>
#include <string>
#include <string_view>
#include <vector>
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kDefault
namespace mongo {
/**
* A ServiceEntryPoint that invariants upon receiving any message. This ensures that no traffic
* reaches a retired server port after a standby cluster transition.
*/
class ServiceEntryPointSentry final : public ServiceEntryPoint {
public:
explicit ServiceEntryPointSentry(int port) : _port(port) {}
Future<DbResponse> handleRequest(OperationContext* opCtx,
const Message& request,
Date_t started) final {
auto* client = opCtx->getClient();
auto remote = (client && client->hasRemote()) ? client->getRemote().toString()
: std::string("unknown");
std::string commandName = "unknown";
bool isFromServer = false;
try {
auto opMsgRequest = rpc::opMsgRequestFromAnyProtocol(request, client);
commandName = std::string(opMsgRequest.getCommandName());
isFromServer = !opMsgRequest.body["internalClient"].eoo();
} catch (...) {
}
// We allow requests from non-server processes (e.g. the mongo shell) because
// ReplicaSetMonitor::drop() does not explicitly close pooled connections to monitored
// hosts. After _forgetReplSet removes the monitor, the connection pool may still attempt
// to refresh idle connections to old hosts, sending a hello to the sentry.
if (!isFromServer) {
LOGV2_WARNING(12319004,
"mongosentry: ignoring non-server command on retired port",
"command"_attr = commandName,
"remote"_attr = remote,
"port"_attr = _port);
return Future<DbResponse>::makeReady(
Status{ErrorCodes::IllegalOperation,
"mongosentry: this port is retired and should not receive traffic"});
}
invariant(false,
str::stream() << "mongosentry: received server command '" << commandName
<< "' from " << remote << " on retired port " << _port
<< "; this port should not be receiving any server traffic "
"after the standby cluster transition");
MONGO_UNREACHABLE;
}
private:
int _port;
};
int mongosentry_main(int argc, char** argv) {
// Extract --port from argv before passing remaining args to the global initializers, since
// --port is not registered as a global option in mongosentry. This is done because this binary
// only has the port as an option and so linking the server global params or adding a separate
// idl was unneccesary.
int port = -1;
std::vector<std::string> initArgs{argv[0]};
for (int i = 1; i < argc; ++i) {
if (std::string_view(argv[i]) == "--port" && i + 1 < argc) {
++i;
int parsedPort = 0;
auto status = NumberParser{}.base(10)(argv[i], &parsedPort);
invariant(
status.isOK(),
"mongosentry: --port must be followed by a valid integer with no extra characters");
port = parsedPort;
} else {
initArgs.emplace_back(argv[i]);
}
}
invariant(port > 0, "mongosentry: --port <port> is required");
registerShutdownTask([&] {
if (hasGlobalServiceContext()) {
if (auto* tl = getGlobalServiceContext()->getTransportLayerManager()) {
tl->endAllSessions(Client::kEmptyTagMask);
tl->shutdown();
}
}
});
setupSignalHandlers();
runGlobalInitializersOrDie(initArgs);
startSignalProcessingThread(LogFileStatus::kNoLogFileToRotate);
auto serviceContextHolder = ServiceContext::make();
setGlobalServiceContext(std::move(serviceContextHolder));
auto serviceContext = getGlobalServiceContext();
serviceContext->getService()->setServiceEntryPoint(
std::make_unique<ServiceEntryPointSentry>(port));
{
transport::AsioTransportLayer::Options opts;
opts.ipList.emplace_back("0.0.0.0");
opts.port = port;
auto sm = std::make_unique<transport::AsioSessionManager>(serviceContext);
auto tl = std::make_unique<transport::AsioTransportLayer>(opts, std::move(sm));
serviceContext->setTransportLayerManager(
std::make_unique<transport::TransportLayerManagerImpl>(std::move(tl)));
}
transport::ServiceExecutor::startupAll(serviceContext);
if (auto status = serviceContext->getTransportLayerManager()->setup(); !status.isOK()) {
LOGV2(12319001, "mongosentry: error setting up transport layer", "error"_attr = status);
return static_cast<int>(ExitCode::netError);
}
if (auto status = serviceContext->getTransportLayerManager()->start(); !status.isOK()) {
LOGV2(12319002, "mongosentry: error starting transport layer", "error"_attr = status);
return static_cast<int>(ExitCode::netError);
}
serviceContext->notifyStorageStartupRecoveryComplete();
return static_cast<int>(waitForShutdown());
}
} // namespace mongo
#if defined(_WIN32)
int wmain(int argc, wchar_t* argvW[]) {
mongo::quickExit(mongo::mongosentry_main(argc, mongo::WindowsCommandLine(argc, argvW).argv()));
}
#else
int main(int argc, char* argv[]) {
mongo::quickExit(mongo::mongosentry_main(argc, argv));
}
#endif