SERVER-83435 Extend sharding concurrency suites with unsharded collections state (#53299)

GitOrigin-RevId: ac895885b45801434dfa5fbd6b792aa372484468
This commit is contained in:
Enrico Golfieri 2026-05-26 13:06:08 +02:00 committed by MongoDB Bot
parent 26d3dfa093
commit 10787747e6
25 changed files with 125 additions and 18 deletions

View File

@ -21,6 +21,7 @@ executor:
TestData:
implicitlyShardOnCreateCollectionOnly: true
runningWithBalancer: false
shardCollectionProbability: 0.5
fixture:
class: ShardedClusterFixture
enable_balancer: false

View File

@ -22,6 +22,7 @@ executor:
enableOTELTracing: false
implicitlyShardOnCreateCollectionOnly: true
runningWithBalancer: false
shardCollectionProbability: 0.5
fixture:
class: ShardedClusterFixture
enable_balancer: false

View File

@ -22,6 +22,7 @@ executor:
enableOTELTracing: false
implicitlyShardOnCreateCollectionOnly: true
runningWithBalancer: false
shardCollectionProbability: 0.5
fixture:
class: ShardedClusterFixture
enable_balancer: false

View File

@ -21,6 +21,7 @@ executor:
TestData:
implicitlyShardOnCreateCollectionOnly: true
runningWithBalancer: false
shardCollectionProbability: 0.5
fixture:
class: ShardedClusterFixture
enable_balancer: false

View File

@ -54,6 +54,7 @@ executor:
runningWithCausalConsistency: true
runningWithBalancer: false
implicitlyShardOnCreateCollectionOnly: true
shardCollectionProbability: 0.5
hooks:
# TODO SERVER-120162: Re-enable once all the false positive have been addressed and the test is stable.
# - class: CheckIdleCursors

View File

@ -67,6 +67,7 @@ executor:
runningWithCausalConsistency: true
runningWithBalancer: true
implicitlyShardOnCreateCollectionOnly: true
shardCollectionProbability: 0.5
hooks:
# TODO SERVER-120162: Re-enable once all the false positive have been addressed and the test is stable.

View File

@ -126,6 +126,7 @@ executor:
killShards: true
implicitlyShardOnCreateCollectionOnly: true
useActionPermittedFile: [ContinuousInitialSync]
shardCollectionProbability: 0.5
hooks:
- class: ContinuousInitialSync

View File

@ -104,6 +104,7 @@ executor:
runningWithSessions: true
traceExceptions: false
implicitlyShardOnCreateCollectionOnly: true
shardCollectionProbability: 0.5
hooks:
# TODO SERVER-120162: Re-enable once all the false positive have been addressed and the test is stable.

View File

@ -119,6 +119,7 @@ executor:
traceExceptions: false
runningWithBalancer: true
implicitlyShardOnCreateCollectionOnly: true
shardCollectionProbability: 0.5
hooks:
# TODO SERVER-120162: Re-enable once all the false positive have been addressed and the test is stable.

View File

@ -95,6 +95,7 @@ executor:
runningWithSessions: true
traceExceptions: false
implicitlyShardOnCreateCollectionOnly: true
shardCollectionProbability: 0.5
hooks:
# TODO SERVER-120162: Re-enable once all the false positive have been addressed and the test is stable.
# - class: CheckIdleCursors

View File

@ -163,6 +163,7 @@ executor:
traceExceptions: false
implicitlyShardOnCreateCollectionOnly: true
useActionPermittedFile: [ContinuousStepdown]
shardCollectionProbability: 0.5
hooks:
# We use a stepdown interval of 15 seconds because we will retry all commands in a transaction
# so we need to allow time for at most 10 operations to be re-run and then re-committed. If

View File

@ -99,6 +99,7 @@ executor:
traceExceptions: false
runningWithBalancer: true
implicitlyShardOnCreateCollectionOnly: true
shardCollectionProbability: 0.5
hooks:
# TODO SERVER-120162: Re-enable once all the false positive have been addressed and the test is stable.

View File

@ -34,6 +34,7 @@ executor:
TestData:
runningWithBalancer: false
implicitlyShardOnCreateCollectionOnly: true
shardCollectionProbability: 0.5
hooks:
# TODO SERVER-120162: Re-enable once all the false positive have been addressed and the test is stable.
# - class: CheckIdleCursors

View File

@ -70,6 +70,7 @@ executor:
TestData:
runningWithBalancer: true
implicitlyShardOnCreateCollectionOnly: true
shardCollectionProbability: 0.5
hooks:
# TODO SERVER-120162: Re-enable once all the false positive have been addressed and the test is stable.
# - class: CheckIdleCursors

View File

@ -97,6 +97,7 @@ executor:
useActionPermittedFile: [ContinuousStepdown]
runningWithBalancer: true
implicitlyShardOnCreateCollectionOnly: true
shardCollectionProbability: 0.5
hooks:
- class: ContinuousStepdown
config_stepdown: true

View File

@ -68,6 +68,7 @@ executor:
TestData:
runningWithBalancer: true
implicitlyShardOnCreateCollectionOnly: true
shardCollectionProbability: 0.5
hooks:
# TODO SERVER-120162: Re-enable once all the false positive have been addressed and the test is stable.

View File

@ -56,6 +56,7 @@ executor:
TestData:
runningWithBalancer: false
implicitlyShardOnCreateCollectionOnly: true
shardCollectionProbability: 0.5
hooks:
# TODO SERVER-120162: Re-enable once all the false positive have been addressed and the test is stable.
# - class: CheckIdleCursors

View File

@ -89,6 +89,7 @@ executor:
runningWithBalancer: false
implicitlyShardOnCreateCollectionOnly: true
useActionPermittedFile: [ContinuousStepdown]
shardCollectionProbability: 0.5
hooks:
- class: ContinuousStepdown
config_stepdown: true

View File

@ -14,6 +14,7 @@
*/
import {ShardingTopologyHelpers} from "jstests/concurrency/fsm_workload_helpers/catalog_and_routing/sharding_topology_helpers.js";
import {FixtureHelpers} from "jstests/libs/fixture_helpers.js";
export const $config = (function () {
const kTotalWorkingDocuments = 500;
@ -55,6 +56,8 @@ export const $config = (function () {
};
let currentShard = currentShardFn();
let shards = ShardingTopologyHelpers.getShardNames(mongosConn);
// If the collection was never tracked, currentShard is undefined and any shard is a valid
// moveCollection destination.
let destinationShards = shards.filter(function (shard) {
if (shard !== currentShard) {
return shard;
@ -136,13 +139,24 @@ export const $config = (function () {
function setup(db, collName, _cluster) {
const ns = db + "." + collName;
print(`Started unshardCollection on ${ns}`);
assert.commandWorked(db.adminCommand({unshardCollection: ns}));
print(`Finished unshardCollection on ${ns}`);
// Calculate the primary shard
let unshardedColl = db.getSiblingDB("config").collections.findOne({_id: ns});
let chunk = db.getSiblingDB("config").chunks.findOne({uuid: unshardedColl.uuid});
this.primaryShard = chunk.shard;
const result = db.adminCommand({unshardCollection: ns});
if (result.ok) {
print(`Finished unshardCollection on ${ns}`);
let unshardedColl = db.getSiblingDB("config").collections.findOne({_id: ns});
let chunk = db.getSiblingDB("config").chunks.findOne({uuid: unshardedColl.uuid});
this.primaryShard = chunk.shard;
} else if (
result.code === ErrorCodes.NamespaceNotFound &&
FixtureHelpers.maySkipImplicitSharding() &&
FixtureHelpers.isUntracked(db.getCollection(collName))
) {
// When implicit sharding may be skipped, the collection may not have been sharded, so
// the database may not be registered in the cluster catalog.
this.primaryShard = undefined;
print(`unshardCollection skipped for ${ns}: collection is not tracked in cluster catalog`);
} else {
assert.commandWorked(result);
}
const coll = db.getCollection(collName);
assert.commandWorked(coll.insert(createDocuments(kTotalWorkingDocuments)));

View File

@ -49,6 +49,9 @@ const runCommandInLoop = function (db, namespace, cmdName, cmdObj, data, expecte
ErrorCodes.BackgroundOperationInProgressForNamespace,
ErrorCodes.ReshardCollectionInProgress,
ErrorCodes.QueryPlanKilled,
ErrorCodes.MaxTimeMSExpired,
// A rename can starve behind another concurrent DDL operation holding the namespace lock.
ErrorCodes.LockBusy,
// StaleConfig is usually retried by the mongos, but in situations where multiple errors
// have ocurred on the same batch and MultipleErrorsOcurred is returned, one of the errors
// could be StaleConfig and the other could be one that mongos does not retry the batch on.
@ -273,9 +276,12 @@ export const $config = (function () {
let otherDbShard;
if (numShards > 1) {
// currDb may be null if implicit sharding is skipped, leaving the database
// unregistered in config.databases (no sharded collection created yet).
const currDb = db.getSiblingDB("config")["databases"].findOne({_id: db.getName()});
const primaryShard = currDb ? currDb.primary : null;
shardNames.some((shard) => {
if (shard != currDb.primary) {
if (shard != primaryShard) {
otherDbShard = shard;
return false;
}

View File

@ -16,6 +16,7 @@ import {
$config as $baseConfig,
testCommand,
} from "jstests/concurrency/fsm_workloads/ddl/rename_collection/collection_uuid.js";
import {FixtureHelpers} from "jstests/libs/fixture_helpers.js";
export const $config = extendWorkload($baseConfig, function ($config, $super) {
const origStates = Object.keys($config.states);
@ -24,6 +25,12 @@ export const $config = extendWorkload($baseConfig, function ($config, $super) {
shardingCommands: function shardingCommands(db, collName) {
const namespace = db.getName() + "." + collName;
// When implicit sharding may be skipped, the collection may be unsharded and absent
// from the cluster catalog. All commands below require it to be tracked.
if (FixtureHelpers.maySkipImplicitSharding() && FixtureHelpers.isUntracked(db, this.collUUID)) {
return;
}
// ShardCollection should fail as the collection is already sharded.
let shardCollectionCmd = {
shardCollection: namespace,

View File

@ -25,7 +25,9 @@ import {$config as $baseConfig} from "jstests/concurrency/fsm_workloads/query/ag
import {
areViewlessTimeseriesEnabled,
getTimeseriesBucketsColl,
isTrackedTimeseries,
} from "jstests/core/timeseries/libs/viewless_timeseries_util.js";
import {FixtureHelpers} from "jstests/libs/fixture_helpers.js";
export const $config = extendWorkload($baseConfig, function ($config, $super) {
const timeFieldName = "time";
@ -39,6 +41,8 @@ export const $config = extendWorkload($baseConfig, function ($config, $super) {
*/
$config.states.query = function query(db, collName) {
jsTestLog(`Running query: coll=${collName} out=${this.outputCollName}`);
const sourceCollectionWasUntrackedBeforeQuery =
FixtureHelpers.maySkipImplicitSharding() && !isTrackedTimeseries(db.getCollection(collName));
const res = db[collName].runCommand({
aggregate: collName,
pipeline: [
@ -79,7 +83,7 @@ export const $config = extendWorkload($baseConfig, function ($config, $super) {
// TODO (SERVER-88275) a moveCollection can cause the original collection to be dropped and
// re-created with a different uuid, causing the aggregation to fail with QueryPlannedKilled
// when the mongos is fetching data from the shard using getMore(). Remove
// the interruptedQueryErrors from allowedErrorCodes once this bug is being addressed
// the interruptedQueryErrors from allowedErrorCodes once this bug is being addressed.
if (TestData.runningWithBalancer) {
allowedErrorCodes = allowedErrorCodes.concat(interruptedQueryErrors);
// On slow builds with the balancer enabled, it is possible for the router to exhaust
@ -87,6 +91,13 @@ export const $config = extendWorkload($baseConfig, function ($config, $super) {
// to the client.
allowedErrorCodes.push(ErrorCodes.StaleConfig);
}
// When the source collection is untracked, we might encounter a concurrent movePrimary
// which drops and recreates the collection on the donor shard, triggering the same class of
// interruption (QueryPlanKilled / NamespaceNotFound).
if (sourceCollectionWasUntrackedBeforeQuery) {
allowedErrorCodes.push(ErrorCodes.NamespaceNotFound);
allowedErrorCodes.push(ErrorCodes.QueryPlanKilled);
}
assert.commandWorkedOrFailedWithCode(res, allowedErrorCodes);
if (res.ok) {
@ -165,6 +176,13 @@ export const $config = extendWorkload($baseConfig, function ($config, $super) {
if (TestData.runningWithBalancer) {
allowedErrorCodes.push(ErrorCodes.StaleConfig);
}
if (FixtureHelpers.maySkipImplicitSharding()) {
// In suites that only sometimes implicitly track the timeseries collection, the router
// could exhaust the retry attempts on StaleConfig because we repeatedly drop and
// re-create the collection, causing its shard version to change continuously.
allowedErrorCodes.push(ErrorCodes.StaleConfig);
}
assert.commandWorkedOrFailedWithCode(
db.createCollection(this.outputCollName, {
timeseries: {timeField: timeFieldName, metaField: metaFieldName},

View File

@ -20,6 +20,7 @@
import {extendWorkload} from "jstests/concurrency/fsm_libs/extend_workload.js";
import {ShardingTopologyHelpers} from "jstests/concurrency/fsm_workload_helpers/catalog_and_routing/sharding_topology_helpers.js";
import {randomUpdateDelete} from "jstests/concurrency/fsm_workload_modifiers/random_update_delete.js";
import {FixtureHelpers} from "jstests/libs/fixture_helpers.js";
const $baseConfig = {
threadCount: 5,
@ -43,7 +44,21 @@ const $baseConfig = {
// The runner will implicitly shard the collection if we are in a sharded cluster, so
// unshard it.
assert.commandWorked(db.adminCommand({unshardCollection: `${db}.${collName}`}));
const ns = `${db}.${collName}`;
const result = db.adminCommand({unshardCollection: ns});
if (!result.ok) {
// When implicit sharding is skipped, the collection may not have been sharded,
// so the database may not be registered in the cluster catalog. In that case the
// collection is already effectively unsharded and no action is needed.
const collectionWasAlreadyUntracked =
result.code === ErrorCodes.NamespaceNotFound &&
FixtureHelpers.maySkipImplicitSharding() &&
FixtureHelpers.isUntracked(db.getCollection(collName));
// Ignore the error if the collection was already untracked. Otherwise, force the test to fail.
if (!collectionWasAlreadyUntracked) {
assert.commandWorked(result);
}
}
const bulk = db[collName].initializeUnorderedBulkOp();
for (let i = 0; i < this.threadCount * 200; ++i) {
bulk.insert({_id: i});

View File

@ -19,6 +19,7 @@
*/
import {extendWorkload} from "jstests/concurrency/fsm_libs/extend_workload.js";
import {randomUpdateDelete} from "jstests/concurrency/fsm_workload_modifiers/random_update_delete.js";
import {FixtureHelpers} from "jstests/libs/fixture_helpers.js";
const $baseConfig = {
threadCount: 5,
@ -79,14 +80,23 @@ export const $config = extendWorkload($partialConfig, function ($config, $super)
$config.states.reshardCollection = function reshardCollection(db, collName, connCache) {
const namespace = `${db}.${collName}`;
jsTestLog(`Attempting to reshard collection ${namespace}`);
const result = assert.commandWorked(
db.adminCommand({
reshardCollection: namespace,
key: this.getShardKey(collName),
numInitialChunks: 1,
forceRedistribution: true,
}),
);
const result = db.adminCommand({
reshardCollection: namespace,
key: this.getShardKey(collName),
numInitialChunks: 1,
forceRedistribution: true,
});
if (
result.code === ErrorCodes.NamespaceNotFound &&
FixtureHelpers.maySkipImplicitSharding() &&
FixtureHelpers.isUntracked(db.getCollection(collName))
) {
// When implicit sharding is skipped, the collection may not have been sharded at
// setup time, making reshardCollection legitimately fail with NamespaceNotFound.
jsTestLog(`reshardCollection skipped for ${namespace}: collection is not sharded`);
return;
}
assert.commandWorked(result);
jsTestLog(`Reshard collection result for ${namespace}: ${tojson(result)}`);
};

View File

@ -107,6 +107,23 @@ export var FixtureHelpers = (function () {
return isSharded(coll) || isUnsplittable(coll);
}
/**
* Looks for an entry in the sharding catalog for the given collection to check whether it is
* absent.
*
* TODO (SERVER-86443): remove this utility once all collections are tracked.
*/
function isUntracked(collOrDb, collUUID = undefined) {
if (collUUID !== undefined) {
return collOrDb.getSiblingDB("config").collections.findOne({uuid: collUUID}) === null;
}
return !isTracked(collOrDb);
}
function maySkipImplicitSharding() {
return typeof TestData.shardCollectionProbability !== "undefined" && TestData.shardCollectionProbability < 1;
}
/**
* Returns an array with the shardIds that own data for the given collection.
*/
@ -290,6 +307,8 @@ export var FixtureHelpers = (function () {
isSharded: isSharded,
isUnsplittable: isUnsplittable,
isTracked: isTracked,
isUntracked: isUntracked,
maySkipImplicitSharding: maySkipImplicitSharding,
areCollectionsColocated: areCollectionsColocated,
getShardsOwningDataForCollection: getShardsOwningDataForCollection,
getTopologyTime: getTopologyTime,