SERVER-92857 Skip observer wait on resharding coordinator abort (#52330)

GitOrigin-RevId: 8a6d6ee53942904583b6a408e178f7fcb0910cb2
This commit is contained in:
Kruti Shah 2026-04-22 11:48:38 -04:00 committed by MongoDB Bot
parent b8b4646792
commit b883600ee1
2 changed files with 64 additions and 8 deletions

View File

@ -1878,20 +1878,41 @@ void ReshardingCoordinator::_generatePlacementChangeNotificationForChangeStreams
ExecutorFuture<void> ReshardingCoordinator::_awaitAllParticipantShardsDone(
const std::shared_ptr<executor::ScopedTaskExecutor>& executor) {
std::vector<ExecutorFuture<ReshardingCoordinatorDocument>> futures;
futures.emplace_back(
_reshardingCoordinatorObserver->awaitAllRecipientsDone().thenRunOn(**executor));
futures.emplace_back(
_reshardingCoordinatorObserver->awaitAllDonorsDone().thenRunOn(**executor));
auto coordinatorDocFuture = [&]() -> ExecutorFuture<ReshardingCoordinatorDocument> {
if (_coordinatorDoc.getAbortReason() &&
resharding::gFeatureFlagReshardingInitNoRefresh.isEnabled(
resharding::getVersionContextOrDefault(_forwardableOpMetadata),
serverGlobalParams.featureCompatibility.acquireFCVSnapshot())) {
// Under featureFlagReshardingInitNoRefresh, all participants are guaranteed to be done
// at this point. AbortReshardCollection ensures that all initialized participants have
// completed, and since the command carries a higher txnNumber (via OSI), no new
// participants can initialize after it is sent. Therefore, it is safe to skip the
// observer wait.
LOGV2_DEBUG(9285700,
1,
"Skipping participant observer wait: command OSI ordering guarantees "
"participants will self-abort.",
"reshardingUUID"_attr = _coordinatorDoc.getReshardingUUID());
return ExecutorFuture<ReshardingCoordinatorDocument>(**executor, _coordinatorDoc);
}
std::vector<ExecutorFuture<ReshardingCoordinatorDocument>> futures;
futures.emplace_back(
_reshardingCoordinatorObserver->awaitAllRecipientsDone().thenRunOn(**executor));
futures.emplace_back(
_reshardingCoordinatorObserver->awaitAllDonorsDone().thenRunOn(**executor));
return whenAllSucceed(std::move(futures)).thenRunOn(**executor).then([](const auto& docs) {
return docs[1];
});
}();
// We only allow the stepdown token to cancel operations after progressing past
// kCommitting.
return future_util::withCancellation(whenAllSucceed(std::move(futures)),
return future_util::withCancellation(std::move(coordinatorDocFuture),
_ctHolder->getStepdownToken())
.thenRunOn(**executor)
.then([this, executor](const auto& coordinatorDocsChangedOnDisk) {
.then([this, executor](const ReshardingCoordinatorDocument& coordinatorDoc) {
auto opCtx = _makeOperationContext();
auto& coordinatorDoc = coordinatorDocsChangedOnDisk[1];
boost::optional<Status> abortReason;
if (coordinatorDoc.getAbortReason()) {

View File

@ -2766,5 +2766,40 @@ TEST_F(ReshardingCoordinatorServiceTest, FeatureFlagReshardingInitNoRefreshSends
runReshardingToCompletion();
}
TEST_F(ReshardingCoordinatorServiceTest,
FeatureFlagReshardingInitNoRefreshSkipsParticipantWaitOnAbort) {
RAIIServerParameterControllerForTest noRefreshFeatureFlagController(
"featureFlagReshardingInitNoRefresh", true);
PauseDuringStateTransitions stateTransitionsGuard{controller(),
CoordinatorStateEnum::kPreparingToDonate};
auto opCtx = operationContext();
auto coordinator = initializeAndGetCoordinator();
// Wait until kPreparingToDonate is committed to ensure coordinator sends abort commands to
// participants.
stateTransitionsGuard.wait(CoordinatorStateEnum::kPreparingToDonate);
stateTransitionsGuard.unset(CoordinatorStateEnum::kPreparingToDonate);
waitUntilCommittedCoordinatorDocReach(opCtx, CoordinatorStateEnum::kPreparingToDonate);
{
auto coordDoc = getCoordinatorDoc(opCtx);
for (const auto& donor : coordDoc.getDonorShards()) {
ASSERT_EQ(donor.getMutableState().getState(), DonorStateEnum::kUnused);
}
for (const auto& recipient : coordDoc.getRecipientShards()) {
ASSERT_EQ(recipient.getMutableState().getState(), RecipientStateEnum::kUnused);
}
}
// Abort while all participants are still in kUnused. With featureFlagReshardingInitNoRefresh,
// coordinator completion will not hang on abort. See SERVER-92857.
coordinator->abort({resharding::kUserAbortReason, resharding::AbortType::kAbortSkipQuiesce});
ASSERT_EQ(coordinator->getCompletionFuture().getNoThrow(),
ErrorCodes::ReshardCollectionAborted);
}
} // namespace
} // namespace mongo