diff --git a/src/mongo/db/s/resharding/resharding_coordinator.inl b/src/mongo/db/s/resharding/resharding_coordinator.inl index 8f8fbc004cb..8d8db002b0f 100644 --- a/src/mongo/db/s/resharding/resharding_coordinator.inl +++ b/src/mongo/db/s/resharding/resharding_coordinator.inl @@ -1878,20 +1878,41 @@ void ReshardingCoordinator::_generatePlacementChangeNotificationForChangeStreams ExecutorFuture ReshardingCoordinator::_awaitAllParticipantShardsDone( const std::shared_ptr& executor) { - std::vector> futures; - futures.emplace_back( - _reshardingCoordinatorObserver->awaitAllRecipientsDone().thenRunOn(**executor)); - futures.emplace_back( - _reshardingCoordinatorObserver->awaitAllDonorsDone().thenRunOn(**executor)); + auto coordinatorDocFuture = [&]() -> ExecutorFuture { + if (_coordinatorDoc.getAbortReason() && + resharding::gFeatureFlagReshardingInitNoRefresh.isEnabled( + resharding::getVersionContextOrDefault(_forwardableOpMetadata), + serverGlobalParams.featureCompatibility.acquireFCVSnapshot())) { + // Under featureFlagReshardingInitNoRefresh, all participants are guaranteed to be done + // at this point. AbortReshardCollection ensures that all initialized participants have + // completed, and since the command carries a higher txnNumber (via OSI), no new + // participants can initialize after it is sent. Therefore, it is safe to skip the + // observer wait. + LOGV2_DEBUG(9285700, + 1, + "Skipping participant observer wait: command OSI ordering guarantees " + "participants will self-abort.", + "reshardingUUID"_attr = _coordinatorDoc.getReshardingUUID()); + return ExecutorFuture(**executor, _coordinatorDoc); + } + + std::vector> futures; + futures.emplace_back( + _reshardingCoordinatorObserver->awaitAllRecipientsDone().thenRunOn(**executor)); + futures.emplace_back( + _reshardingCoordinatorObserver->awaitAllDonorsDone().thenRunOn(**executor)); + return whenAllSucceed(std::move(futures)).thenRunOn(**executor).then([](const auto& docs) { + return docs[1]; + }); + }(); // We only allow the stepdown token to cancel operations after progressing past // kCommitting. - return future_util::withCancellation(whenAllSucceed(std::move(futures)), + return future_util::withCancellation(std::move(coordinatorDocFuture), _ctHolder->getStepdownToken()) .thenRunOn(**executor) - .then([this, executor](const auto& coordinatorDocsChangedOnDisk) { + .then([this, executor](const ReshardingCoordinatorDocument& coordinatorDoc) { auto opCtx = _makeOperationContext(); - auto& coordinatorDoc = coordinatorDocsChangedOnDisk[1]; boost::optional abortReason; if (coordinatorDoc.getAbortReason()) { diff --git a/src/mongo/db/s/resharding/resharding_coordinator_service_test.cpp b/src/mongo/db/s/resharding/resharding_coordinator_service_test.cpp index 05838f43b22..1a57845143e 100644 --- a/src/mongo/db/s/resharding/resharding_coordinator_service_test.cpp +++ b/src/mongo/db/s/resharding/resharding_coordinator_service_test.cpp @@ -2766,5 +2766,40 @@ TEST_F(ReshardingCoordinatorServiceTest, FeatureFlagReshardingInitNoRefreshSends runReshardingToCompletion(); } +TEST_F(ReshardingCoordinatorServiceTest, + FeatureFlagReshardingInitNoRefreshSkipsParticipantWaitOnAbort) { + RAIIServerParameterControllerForTest noRefreshFeatureFlagController( + "featureFlagReshardingInitNoRefresh", true); + + PauseDuringStateTransitions stateTransitionsGuard{controller(), + CoordinatorStateEnum::kPreparingToDonate}; + + auto opCtx = operationContext(); + auto coordinator = initializeAndGetCoordinator(); + + // Wait until kPreparingToDonate is committed to ensure coordinator sends abort commands to + // participants. + stateTransitionsGuard.wait(CoordinatorStateEnum::kPreparingToDonate); + stateTransitionsGuard.unset(CoordinatorStateEnum::kPreparingToDonate); + waitUntilCommittedCoordinatorDocReach(opCtx, CoordinatorStateEnum::kPreparingToDonate); + + { + auto coordDoc = getCoordinatorDoc(opCtx); + for (const auto& donor : coordDoc.getDonorShards()) { + ASSERT_EQ(donor.getMutableState().getState(), DonorStateEnum::kUnused); + } + for (const auto& recipient : coordDoc.getRecipientShards()) { + ASSERT_EQ(recipient.getMutableState().getState(), RecipientStateEnum::kUnused); + } + } + + // Abort while all participants are still in kUnused. With featureFlagReshardingInitNoRefresh, + // coordinator completion will not hang on abort. See SERVER-92857. + coordinator->abort({resharding::kUserAbortReason, resharding::AbortType::kAbortSkipQuiesce}); + + ASSERT_EQ(coordinator->getCompletionFuture().getNoThrow(), + ErrorCodes::ReshardCollectionAborted); +} + } // namespace } // namespace mongo