SERVER-117382 Make acquisitions fail with specific error when restoring after timeseries upgrade/downgrade (#46599)

Co-authored-by: Pierlauro Sciarelli <pierlauro.sciarelli@mongodb.com>
GitOrigin-RevId: b5c2d1f09182a0016391707b4f2a0ecc9833e943
This commit is contained in:
Tommaso Tocci 2026-01-28 15:39:22 +01:00 committed by MongoDB Bot
parent da6b5d42de
commit 41e699dfce
5 changed files with 218 additions and 12 deletions

View File

@ -0,0 +1,171 @@
/**
* Test that CRUD operations on timeseries collection works correctly
* when executed concurrently with FCV upgrade/downgrade.
*
* TODO SERVER-117477: remove this test once 9.0 becomes lastLTS
* by then we will not perform any timeseries transformation on FCV upgrade/downgrade.
*
* @tags: [
* requires_timeseries,
* # Requires all nodes to be running the latest binary.
* multiversion_incompatible,
* # Runs setFCV, which can interfere with other tests.
* incompatible_with_concurrency_simultaneous,
* # Suites with balancer don't support retriable commands outside of non-retriable sessions (e.g. delete)
* assumes_balancer_off,
* # Suites with stepdowns don't support retriable commands outside of non-retriable writes (e.g. delete)
* does_not_support_stepdowns,
* # This test performs FCV upgrade/downgrade, and config fuzzer
* # may set cluster/server parameters incompatible with the current/target FCV
* does_not_support_config_fuzzer,
* ]
*/
import {uniformDistTransitions} from "jstests/concurrency/fsm_workload_helpers/state_transition_utils.js";
const timeFieldName = "t_field";
const metaFieldName = "m_field";
export const $config = (function () {
let getCollNames = function () {
if (Random.randInt(2)) {
return ["A_coll", "B_coll"];
} else {
return ["B_coll", "A_coll"];
}
};
let rndMeta = function () {
const meta_values = ["x", "y", "z"];
return meta_values[Random.randInt(3)];
};
let states = {
upgrade: function (db, collName) {
jsTestLog(`Upgrade`);
assert.commandWorked(db.adminCommand({setFeatureCompatibilityVersion: latestFCV, confirm: true}));
},
downgrade: function (db, collName) {
jsTestLog(`Downgrade`);
assert.commandWorked(db.adminCommand({setFeatureCompatibilityVersion: lastLTSFCV, confirm: true}));
},
insertOne: function (db, collName) {
const coll = db[getCollNames()[0]];
try {
const res = assert.commandWorked(
coll.insertOne({"op": "insertOne", [metaFieldName]: rndMeta(), [timeFieldName]: ISODate()}),
);
jsTest.log(`${coll.getName()} insertOne: ${tojsononeline(res)}`);
} catch (e) {
const acceptedErrors = [ErrorCodes.InterruptedDueToTimeseriesUpgradeDowngrade];
if (e.code && acceptedErrors.includes(e.code)) {
return;
}
throw e;
}
},
insertMany: function (db, collName) {
const coll = db[getCollNames()[0]];
let docs = [];
for (let i = 0; i < 1000; i++) {
docs.push({"op": "insertMany", [metaFieldName]: rndMeta(), [timeFieldName]: ISODate()});
}
try {
const res = assert.commandWorked(coll.insertMany(docs));
jsTest.log(`${coll.getName()} insertMany: ${tojsononeline(res)}`);
} catch (e) {
const acceptedErrors = [ErrorCodes.InterruptedDueToTimeseriesUpgradeDowngrade];
if (e.code && acceptedErrors.includes(e.code)) {
return;
}
throw e;
}
},
deleteOne: function (db, collName) {
const coll = db[getCollNames()[0]];
try {
const res = assert.commandWorked(coll.deleteOne({[metaFieldName]: rndMeta()}));
jsTest.log(`${coll.getName()} deleteOne: ${tojsononeline(res)}`);
} catch (e) {
const acceptedErrors = [ErrorCodes.InterruptedDueToTimeseriesUpgradeDowngrade];
if (e.code && acceptedErrors.includes(e.code)) {
return;
}
throw e;
}
},
deleteMany: function (db, collName) {
const coll = db[getCollNames()[0]];
try {
const res = assert.commandWorked(coll.deleteMany({[metaFieldName]: rndMeta()}));
jsTest.log(`${coll.getName()} deleteMany: ${tojsononeline(res)}`);
} catch (e) {
const acceptedErrors = [ErrorCodes.InterruptedDueToTimeseriesUpgradeDowngrade];
if (e.code && acceptedErrors.includes(e.code)) {
return;
}
throw e;
}
},
find: function (db, collName) {
const coll = db[getCollNames()[0]];
try {
const res = coll.find().itcount();
jsTest.log(`${coll.getName()} find ${res} docs`);
} catch (e) {
const acceptedErrors = [ErrorCodes.InterruptedDueToTimeseriesUpgradeDowngrade];
if (e.code && acceptedErrors.includes(e.code)) {
return;
}
throw e;
}
},
countDocuments: function (db, collName) {
const coll = db[getCollNames()[0]];
try {
const res = coll.countDocuments({});
jsTest.log(`${coll.getName()} counted ${res} docs`);
} catch (e) {
const acceptedErrors = [ErrorCodes.InterruptedDueToTimeseriesUpgradeDowngrade];
if (e.code && acceptedErrors.includes(e.code)) {
return;
}
throw e;
}
},
aggregate: function (db, collName) {
const coll = db[getCollNames()[0]];
try {
const res = coll.aggregate([{"$match": {[metaFieldName]: rndMeta()}}]).toArray();
jsTest.log(`${coll.getName()} aggregate found ${res.length}`);
} catch (e) {
const acceptedErrors = [ErrorCodes.InterruptedDueToTimeseriesUpgradeDowngrade];
if (e.code && acceptedErrors.includes(e.code)) {
return;
}
throw e;
}
},
};
let setup = function (db, collName) {
const collNames = getCollNames();
for (const collName of collNames) {
db.createCollection(collName, {timeseries: {timeField: timeFieldName, metaField: metaFieldName}});
}
};
let teardown = function (db, collName) {
assert.commandWorked(db.adminCommand({setFeatureCompatibilityVersion: latestFCV, confirm: true}));
};
return {
threadCount: 12,
iterations: 300,
startState: "upgrade",
states: states,
transitions: uniformDistTransitions(states),
setup: setup,
teardown: teardown,
};
})();

View File

@ -133,6 +133,8 @@ const exceptionFilteredBackgroundDbCheck = function (newMongoWithRetry, hosts) {
ErrorCodes.NamespaceNotFound,
ErrorCodes.LockTimeout,
ErrorCodes.Interrupted,
// TODO SERVER-117477 remove this error code once 9.0 becomes last LTS and all timeseries collection are viewless.
ErrorCodes.InterruptedDueToTimeseriesUpgradeDowngrade,
ErrorCodes.CommandNotSupportedOnView,
],
onDrop,

View File

@ -27,7 +27,9 @@ const isIgnorableError = function ignorableError(codeName) {
codeName == "CommandNotSupportedOnView" ||
codeName == "InterruptedAtShutdown" ||
codeName == "InvalidViewDefinition" ||
codeName == "CommandNotSupported"
codeName == "CommandNotSupported" ||
// TODO SERVER-117477 remove this error code once 9.0 becomes last LTS and all timeseries collection are viewless.
codeName == "InterruptedDueToTimeseriesUpgradeDowngrade"
) {
return true;
}

View File

@ -180,7 +180,6 @@ error_codes:
name: InterruptedDueToFCVChange,
categories: [Interruption, RetriableError],
}
- {code: 10171600, name: ReadThroughCacheTimeMonotonicityViolation}
# Add new codes at the bottom of this file, not here. They are to be
@ -1156,5 +1155,11 @@ error_codes:
}
- {code: 484, name: KeyExists}
- {
code: 485,
name: InterruptedDueToTimeseriesUpgradeDowngrade,
categories: [Interruption, RetriableError],
}
# ^^^^
# Add new codes, sequentially numbered, above.

View File

@ -365,10 +365,11 @@ void assertReadConcernSupported(OperationContext* opCtx,
std::variant<CollectionPtr, std::shared_ptr<const ViewDefinition>> acquireLocalCollectionOrView(
OperationContext* opCtx,
const CollectionCatalog& catalog,
const AcquisitionPrerequisites& prerequisites) {
const AcquisitionPrerequisites& prerequisites,
const bool forRestore = false) {
const auto& nss = prerequisites.nss;
auto coll = [&]() {
auto getCollection = [&](const NamespaceString& nss) {
if (prerequisites.useConsistentCatalog) {
auto readTimestamp =
shard_role_details::getRecoveryUnit(opCtx)->getPointInTimeReadTimestamp();
@ -376,9 +377,29 @@ std::variant<CollectionPtr, std::shared_ptr<const ViewDefinition>> acquireLocalC
opCtx, NamespaceStringOrUUID(nss), readTimestamp));
} else {
return CollectionPtr::CollectionPtr_UNSAFE(
catalog.lookupCollectionByNamespace(opCtx, prerequisites.nss));
catalog.lookupCollectionByNamespace(opCtx, nss));
}
}();
};
auto coll = getCollection(nss);
if (!coll && forRestore) {
// Throw `InterruptedDueToTimeseriesUpgradeDowngrade` if this is a timeseries collection
// that has been concurrently upgraded/downgraded.
// TODO SERVER-117477 remove this logic once 9.0 becomes last LTS and all timeseries
// collection are viewless.
const auto otherTimeseriesNss = nss.isTimeseriesBucketsCollection()
? nss.getTimeseriesViewNamespace()
: nss.makeTimeseriesBucketsNamespace();
auto otherTimeseriesColl = getCollection(otherTimeseriesNss);
if (otherTimeseriesColl && otherTimeseriesColl->isTimeseriesCollection()) {
uasserted(
ErrorCodes::InterruptedDueToTimeseriesUpgradeDowngrade,
fmt::format("Operation on collection '{}' was interrupted due to a time-series "
"metadata change during FCV transition. Retry the operation.",
nss.toStringForErrorMsg()));
}
}
checkCollectionUUIDMismatch(opCtx, catalog, nss, coll, prerequisites.uuid);
@ -418,16 +439,19 @@ struct SnapshotedServices {
SnapshotedServices acquireServicesSnapshot(OperationContext* opCtx,
const CollectionCatalog& catalog,
const AcquisitionPrerequisites& prerequisites) {
const AcquisitionPrerequisites& prerequisites,
const bool forRestore = false) {
if (holds_alternative<AcquisitionPrerequisites::PlacementConcernPlaceholder>(
prerequisites.placementConcern)) {
return SnapshotedServices{
acquireLocalCollectionOrView(opCtx, catalog, prerequisites), boost::none, boost::none};
acquireLocalCollectionOrView(opCtx, catalog, prerequisites, forRestore),
boost::none,
boost::none};
}
const auto& placementConcern = get<PlacementConcern>(prerequisites.placementConcern);
auto collOrView = acquireLocalCollectionOrView(opCtx, catalog, prerequisites);
auto collOrView = acquireLocalCollectionOrView(opCtx, catalog, prerequisites, forRestore);
const auto& nss = prerequisites.nss;
const auto scopedCSS = CollectionShardingState::acquire(opCtx, nss);
@ -1943,7 +1967,8 @@ void restoreTransactionResourcesToOperationContext(
// Just reacquire the CollectionPtr. Reads don't care about placement changes
// because they have already established a ScopedCollectionFilter that acts as
// RangePreserver.
auto collOrView = acquireLocalCollectionOrView(opCtx, *catalog, prerequisites);
auto collOrView = acquireLocalCollectionOrView(
opCtx, *catalog, prerequisites, true /* forRestore */);
if (!holds_alternative<CollectionPtr>(collOrView)) {
uassertedCollectionIsAViewAfterRestore();
@ -1963,7 +1988,7 @@ void restoreTransactionResourcesToOperationContext(
}
auto reacquiredServicesSnapshot =
acquireServicesSnapshot(opCtx, *catalog, prerequisites);
acquireServicesSnapshot(opCtx, *catalog, prerequisites, true /* forRestore */);
if (!holds_alternative<CollectionPtr>(
reacquiredServicesSnapshot.collectionPtrOrView)) {
@ -2000,7 +2025,8 @@ void restoreTransactionResourcesToOperationContext(
for (auto& acquiredView : transactionResources.acquiredViews) {
const auto& prerequisites = acquiredView.prerequisites;
auto collOrView = acquireLocalCollectionOrView(opCtx, *catalog, prerequisites);
auto collOrView =
acquireLocalCollectionOrView(opCtx, *catalog, prerequisites, true /* forRestore */);
uassert(ErrorCodes::QueryPlanKilled,
str::stream() << "Namespace '" << prerequisites.nss.toStringForErrorMsg()