SERVER-112250 Test shard mongod starting without --shardsvr. (#42502)

GitOrigin-RevId: 8628e85ba1654f55397dee6c8973fae2f3c5bbc1
This commit is contained in:
Max Hirschhorn 2025-10-13 09:39:35 -04:00 committed by MongoDB Bot
parent e0ed889918
commit 5d2f275cff
3 changed files with 45 additions and 2 deletions

1
.github/CODEOWNERS vendored
View File

@ -1532,6 +1532,7 @@ WORKSPACE.bazel @10gen/devprod-build @svc-auto-approve-bot
/jstests/sharding/**/read_write_concern_defaults_application.js @10gen/server-catalog-and-routing @svc-auto-approve-bot
/jstests/sharding/**/safe_secondary_reads_drop_recreate.js @10gen/server-catalog-and-routing @svc-auto-approve-bot
/jstests/sharding/**/safe_secondary_reads_single_migration_waitForDelete.js @10gen/server-catalog-and-routing @svc-auto-approve-bot
/jstests/sharding/**/startup_with_all_configs_down.js @10gen/server-catalog-and-routing-routing-and-topology @svc-auto-approve-bot
# The following patterns are parsed from ./jstests/sharding/analyze_shard_key/OWNERS.yml
/jstests/sharding/analyze_shard_key/**/* @10gen/server-cluster-scalability @svc-auto-approve-bot

View File

@ -124,3 +124,6 @@ filters:
- "safe_secondary_reads_single_migration_waitForDelete.js":
approvers:
- 10gen/server-catalog-and-routing
- "startup_with_all_configs_down.js":
approvers:
- 10gen/server-catalog-and-routing-routing-and-topology

View File

@ -2,11 +2,15 @@
// server, and that they will wait until there is a config server online before handling any
// sharding operations.
//
// However, if a shard mongod is restarted without --shardsvr mode then the shard mongod
// will NOT wait until there is a config server online before continuing replication.
//
// This test involves restarting a standalone shard, so cannot be run on ephemeral storage engines.
// A restarted standalone will lose all data when using an ephemeral storage engine.
// @tags: [requires_persistence]
import {ShardingTest} from "jstests/libs/shardingtest.js";
import {stopServerReplication} from "jstests/libs/write_concern_util.js";
// The following checks use connections to shards cached on the ShardingTest object, but this test
// restarts a shard, so the cached connection is not usable.
@ -20,6 +24,7 @@ let st = new ShardingTest({
// an election when they do not detect an active primary. Therefore, we are setting the
// electionTimeoutMillis to its default value.
initiateWithDefaultElectionTimeout: true,
rs1: {nodes: [{}, {rsConfig: {votes: 0, priority: 0}}]},
});
jsTestLog("Setting up initial data");
@ -35,6 +40,16 @@ assert.commandWorked(st.s0.adminCommand({moveChunk: "test.foo", find: {_id: 75},
// Make sure the pre-existing mongos already has the routing information loaded into memory
assert.eq(100, st.s.getDB("test").foo.find().itcount());
// We pause replication on one of the replica set shard secondaries and perform some additional
// writes to later verify the secondary will catch up when --shardsvr is omitted.
const secondary = st.rs1.getSecondary();
stopServerReplication(secondary);
const collection = st.s0.getCollection("test.foo");
for (let i = 75; i < 100; ++i) {
assert.commandWorked(collection.update({_id: i}, {$inc: {x: 1}}, {writeConcern: {w: "majority"}}));
}
jsTestLog("Shutting down all config servers");
st.configRS.nodes.forEach((config) => {
st.stopConfigServer(config);
@ -48,8 +63,32 @@ assert.throws(function () {
new Mongo(newMongosInfo.host);
});
jsTestLog("Restarting a shard while there are no config servers up");
st.rs1.stopSet(undefined, true);
jsTestLog("Restarting a shard WITHOUT --shardsvr while there are no config servers up");
// We won't be able to compare dbHashes because server replication was stopped on the secondary.
st.rs1.stopSet(undefined, true, {skipCheckDBHashes: true, skipValidation: true});
for (let node of st.rs1.nodes) {
delete node.fullOptions.shardsvr;
st.rs1.start(node.nodeId, undefined, /*restart=*/ true, /*waitForHealth=*/ false);
if (node.nodeId === 0) {
// We prevent the node from being or becoming primary because we want to exercise the case
// where a secondary mongod is syncing from another secondary with the config server replica
// set being entirely unavailable. This is also why we must restart the shard mongod
// processes individually.
st.rs1.freeze(node);
}
}
// Verify the non-voting shard mongod successfully syncs from the former primary.
st.rs1.awaitReplication(undefined, undefined, undefined, undefined, st.rs1.nodes[0]);
jsTestLog("Restarting a shard WITH --shardsvr while there are no config servers up");
// Since there is intentionally no primary for the replica set shard, we won't be able to compare
// dbHashes still.
st.rs1.stopSet(undefined, true, {skipCheckDBHashes: true, skipValidation: true});
st.rs1.nodes.forEach((node) => (node.fullOptions.shardsvr = ""));
st.rs1.startSet({waitForConnect: false}, true);
jsTestLog("Queries should fail because the shard can't initialize sharding state");