SERVER-117612 Prevent convertToCapped to hold the critical section after failing to clean up (#53234)
GitOrigin-RevId: 15931a285de918ab317afa0016a3a41821130add
This commit is contained in:
parent
59ff43bce4
commit
e842bbac7f
@ -1,10 +1,14 @@
|
||||
/**
|
||||
* Tests the convertToCapped coordinator behavior when there is an hypothetical failure during the
|
||||
* local convertToCapped operation.
|
||||
* @tags: [
|
||||
* # TODO SERVER-126244: Remove once 9.0 becomes last LTS.
|
||||
* # In the newest version convertToCapped will also retry errors in untracked collections.
|
||||
* multiversion_incompatible,
|
||||
* ]
|
||||
*/
|
||||
|
||||
import {configureFailPoint} from "jstests/libs/fail_point_util.js";
|
||||
import {FixtureHelpers} from "jstests/libs/fixture_helpers.js";
|
||||
import {funWithArgs} from "jstests/libs/parallel_shell_helpers.js";
|
||||
import {ShardingTest} from "jstests/libs/shardingtest.js";
|
||||
|
||||
@ -29,27 +33,13 @@ assert.commandWorked(st.s.adminCommand({enableSharding: db.getName(), primarySha
|
||||
|
||||
const waitForConvertToCapped = startParallelShell(
|
||||
funWithArgs(
|
||||
function (dbName, collName, isTracked) {
|
||||
function (dbName, collName) {
|
||||
const testDB = db.getSiblingDB(dbName);
|
||||
if (isTracked) {
|
||||
// The coordinator will eventually succeed since once the collection has been
|
||||
// locally capped, the sharding catalog has to be updated.
|
||||
assert.commandWorked(testDB.runCommand({convertToCapped: collName, size: 1000}));
|
||||
} else {
|
||||
// The coordinator will fail the first time because it doesn't retry on error if the
|
||||
// collection is not tracked (since the sharding catalog doesn't need to be
|
||||
// updated). So let's retry the operation once the failpoint has been disabled to
|
||||
// make sure it eventually succeeds.
|
||||
assert.commandFailedWithCode(
|
||||
testDB.runCommand({convertToCapped: collName, size: 1000}),
|
||||
ErrorCodes.InternalError,
|
||||
);
|
||||
assert.commandWorked(testDB.runCommand({convertToCapped: collName, size: 1000}));
|
||||
}
|
||||
// An error on the coordinator right after the collection is locally capped should trigger the retry logic, so the operation eventually succeeds.
|
||||
assert.commandWorked(testDB.runCommand({convertToCapped: collName, size: 1000}));
|
||||
},
|
||||
db.getName(),
|
||||
coll.getName(),
|
||||
FixtureHelpers.isTracked(coll),
|
||||
),
|
||||
st.s.port,
|
||||
);
|
||||
|
||||
@ -522,16 +522,14 @@ ExecutorFuture<void> ConvertToCappedCoordinator::_runImpl(
|
||||
}
|
||||
}
|
||||
|
||||
// If the coordinator succeeded to convert the collection to capped and the
|
||||
// collection is tracked, the sharding catalog must be updated. Thus throw the error
|
||||
// and retry relying on _mustAlwaysMakeProgress that will always be true reached
|
||||
// this phase.
|
||||
if (_mustAlwaysMakeProgress() || _isRetriableErrorForDDLCoordinator(status)) {
|
||||
// Retry the operation.
|
||||
return status;
|
||||
}
|
||||
|
||||
if (_doc.getPhase() >= Phase::kAcquireCriticalSectionOnCoordinator) {
|
||||
// Trigger cleanup for non-retriable errors in phases where the critical section is
|
||||
// held but the collection hasn't been capped yet — the operation can safely be
|
||||
// aborted. _mustAlwaysMakeProgress() returning true from
|
||||
// kAcquireCriticalSectionOnCoordinator ensures that if triggerCleanup() fails
|
||||
// transiently, the base class retries and this block is reached again.
|
||||
if (!_isRetriableErrorForDDLCoordinator(status) &&
|
||||
_doc.getPhase() >= Phase::kAcquireCriticalSectionOnCoordinator &&
|
||||
_doc.getPhase() < Phase::kConvertCollectionToCappedOnDataShard) {
|
||||
triggerCleanup(opCtx, status);
|
||||
MONGO_UNREACHABLE_TASSERT(10083519);
|
||||
}
|
||||
@ -546,12 +544,11 @@ bool ConvertToCappedCoordinator::isInCriticalSection(Phase phase) const {
|
||||
}
|
||||
|
||||
bool ConvertToCappedCoordinator::_mustAlwaysMakeProgress() {
|
||||
// If the collection was originally tracked on the sharding catalog, the coodinator must always
|
||||
// make forward progress after converting the collection to capped in order to align local and
|
||||
// sharding catalog.
|
||||
const bool isCollectionTrackedOnTheShardingCatalog = _doc.getOriginalCollection().has_value();
|
||||
return isCollectionTrackedOnTheShardingCatalog &&
|
||||
_doc.getPhase() >= Phase::kConvertCollectionToCappedOnDataShard;
|
||||
// Once the critical section is acquired, the coordinator must always make forward progress
|
||||
// so cleanup (critical section release) cannot be skipped. _mustAlwaysMakeProgress()
|
||||
// returning true ensures that even if triggerCleanup() fails transiently, the base class
|
||||
// retries _runImpl and calls triggerCleanup again.
|
||||
return _doc.getPhase() >= Phase::kAcquireCriticalSectionOnCoordinator;
|
||||
}
|
||||
|
||||
ExecutorFuture<void> ConvertToCappedCoordinator::_cleanupOnAbort(
|
||||
|
||||
Loading…
Reference in New Issue
Block a user