SERVER-99623 Do not immediately retry when range deleter encounters an index not found error (#42274)

Co-authored-by: Randolph Tan <randolph@10gen.com>
GitOrigin-RevId: 3593ec907940bdb17ba6be2bda2a2701e5327049
This commit is contained in:
Silvia Surroca 2025-10-09 21:30:38 +02:00 committed by MongoDB Bot
parent 0048f016e8
commit 9e04e35005
3 changed files with 30 additions and 10 deletions

View File

@ -90,6 +90,7 @@ namespace {
const auto rangeDeleterServiceDecorator = ServiceContext::declareDecoration<RangeDeleterService>();
const Seconds kCheckForEnabledServiceInterval(10);
const Seconds kMissingIndexRetryInterval(10);
BSONObj getShardKeyPattern(OperationContext* opCtx,
const DatabaseName& dbName,
@ -131,8 +132,10 @@ RangeDeleterService* RangeDeleterService::get(OperationContext* opCtx) {
}
RangeDeleterService::ReadyRangeDeletionsProcessor::ReadyRangeDeletionsProcessor(
OperationContext* opCtx)
: _service(opCtx->getServiceContext()), _thread([this] { _runRangeDeletions(); }) {}
OperationContext* opCtx, std::shared_ptr<executor::TaskExecutor> executor)
: _service(opCtx->getServiceContext()),
_thread([this] { _runRangeDeletions(); }),
_executor(executor) {}
RangeDeleterService::ReadyRangeDeletionsProcessor::~ReadyRangeDeletionsProcessor() {
shutdown();
@ -345,7 +348,21 @@ void RangeDeleterService::ReadyRangeDeletionsProcessor::_runRangeDeletions() {
// recoverable for a range shard key. This index may be rebuilt in the future, so
// reschedule the task at the end of the queue.
_completedRangeDeletion();
emplaceRangeDeletion(task);
sleepFor(_executor, kMissingIndexRetryInterval)
.getAsync([this, task](Status status) {
if (!status.isOK()) {
LOGV2_WARNING(9962300,
"Encountered an error while retrying a range deletion "
"task that previously failed due to missing index",
"status"_attr = status,
"task"_attr = task.toBSON());
return;
}
emplaceRangeDeletion(task);
});
break;
} catch (const DBException&) {
// Release the thread only in case the operation context has been interrupted, as
@ -388,7 +405,8 @@ void RangeDeleterService::onStepUpComplete(OperationContext* opCtx, long long te
_executor->startup();
// Initialize the range deletion processor to allow enqueueing ready task
_readyRangeDeletionsProcessorPtr = std::make_unique<ReadyRangeDeletionsProcessor>(opCtx);
_readyRangeDeletionsProcessorPtr =
std::make_unique<ReadyRangeDeletionsProcessor>(opCtx, _executor);
_recoverRangeDeletionsOnStepUp(opCtx);
}

View File

@ -136,7 +136,8 @@ private:
*/
class ReadyRangeDeletionsProcessor {
public:
ReadyRangeDeletionsProcessor(OperationContext* opCtx);
ReadyRangeDeletionsProcessor(OperationContext* opCtx,
std::shared_ptr<executor::TaskExecutor> executor);
~ReadyRangeDeletionsProcessor();
/*
@ -189,6 +190,12 @@ private:
/* Thread consuming the range deletions queue */
stdx::thread _thread;
/*
* An executor that is managed (startup & shutdown) by the RangeDeleterService. An example
* use of this is to schedule a retry of task that errored at a later time.
*/
std::shared_ptr<executor::TaskExecutor> _executor;
};
// Keeping track of per-collection registered range deletion tasks

View File

@ -123,11 +123,6 @@ StatusWith<std::pair<int, int>> deleteNextBatch(OperationContext* opCtx,
"Unable to find range shard key index",
"keyPattern"_attr = keyPattern,
logAttrs(nss));
// When a shard key index is not found, the range deleter moves the task to the bottom
// of the range deletion queue. This sleep is aimed at avoiding logging too aggressively
// in order to prevent log files to increase too much in size.
opCtx->sleepFor(Seconds(5));
}
iasserted(ErrorCodes::IndexNotFound,