SERVER-99623 Do not immediately retry when range deleter encounters an index not found error (#42274)
Co-authored-by: Randolph Tan <randolph@10gen.com> GitOrigin-RevId: 3593ec907940bdb17ba6be2bda2a2701e5327049
This commit is contained in:
parent
0048f016e8
commit
9e04e35005
@ -90,6 +90,7 @@ namespace {
|
||||
const auto rangeDeleterServiceDecorator = ServiceContext::declareDecoration<RangeDeleterService>();
|
||||
|
||||
const Seconds kCheckForEnabledServiceInterval(10);
|
||||
const Seconds kMissingIndexRetryInterval(10);
|
||||
|
||||
BSONObj getShardKeyPattern(OperationContext* opCtx,
|
||||
const DatabaseName& dbName,
|
||||
@ -131,8 +132,10 @@ RangeDeleterService* RangeDeleterService::get(OperationContext* opCtx) {
|
||||
}
|
||||
|
||||
RangeDeleterService::ReadyRangeDeletionsProcessor::ReadyRangeDeletionsProcessor(
|
||||
OperationContext* opCtx)
|
||||
: _service(opCtx->getServiceContext()), _thread([this] { _runRangeDeletions(); }) {}
|
||||
OperationContext* opCtx, std::shared_ptr<executor::TaskExecutor> executor)
|
||||
: _service(opCtx->getServiceContext()),
|
||||
_thread([this] { _runRangeDeletions(); }),
|
||||
_executor(executor) {}
|
||||
|
||||
RangeDeleterService::ReadyRangeDeletionsProcessor::~ReadyRangeDeletionsProcessor() {
|
||||
shutdown();
|
||||
@ -345,7 +348,21 @@ void RangeDeleterService::ReadyRangeDeletionsProcessor::_runRangeDeletions() {
|
||||
// recoverable for a range shard key. This index may be rebuilt in the future, so
|
||||
// reschedule the task at the end of the queue.
|
||||
_completedRangeDeletion();
|
||||
emplaceRangeDeletion(task);
|
||||
|
||||
sleepFor(_executor, kMissingIndexRetryInterval)
|
||||
.getAsync([this, task](Status status) {
|
||||
if (!status.isOK()) {
|
||||
LOGV2_WARNING(9962300,
|
||||
"Encountered an error while retrying a range deletion "
|
||||
"task that previously failed due to missing index",
|
||||
"status"_attr = status,
|
||||
"task"_attr = task.toBSON());
|
||||
return;
|
||||
}
|
||||
|
||||
emplaceRangeDeletion(task);
|
||||
});
|
||||
|
||||
break;
|
||||
} catch (const DBException&) {
|
||||
// Release the thread only in case the operation context has been interrupted, as
|
||||
@ -388,7 +405,8 @@ void RangeDeleterService::onStepUpComplete(OperationContext* opCtx, long long te
|
||||
_executor->startup();
|
||||
|
||||
// Initialize the range deletion processor to allow enqueueing ready task
|
||||
_readyRangeDeletionsProcessorPtr = std::make_unique<ReadyRangeDeletionsProcessor>(opCtx);
|
||||
_readyRangeDeletionsProcessorPtr =
|
||||
std::make_unique<ReadyRangeDeletionsProcessor>(opCtx, _executor);
|
||||
|
||||
_recoverRangeDeletionsOnStepUp(opCtx);
|
||||
}
|
||||
|
||||
@ -136,7 +136,8 @@ private:
|
||||
*/
|
||||
class ReadyRangeDeletionsProcessor {
|
||||
public:
|
||||
ReadyRangeDeletionsProcessor(OperationContext* opCtx);
|
||||
ReadyRangeDeletionsProcessor(OperationContext* opCtx,
|
||||
std::shared_ptr<executor::TaskExecutor> executor);
|
||||
~ReadyRangeDeletionsProcessor();
|
||||
|
||||
/*
|
||||
@ -189,6 +190,12 @@ private:
|
||||
|
||||
/* Thread consuming the range deletions queue */
|
||||
stdx::thread _thread;
|
||||
|
||||
/*
|
||||
* An executor that is managed (startup & shutdown) by the RangeDeleterService. An example
|
||||
* use of this is to schedule a retry of task that errored at a later time.
|
||||
*/
|
||||
std::shared_ptr<executor::TaskExecutor> _executor;
|
||||
};
|
||||
|
||||
// Keeping track of per-collection registered range deletion tasks
|
||||
|
||||
@ -123,11 +123,6 @@ StatusWith<std::pair<int, int>> deleteNextBatch(OperationContext* opCtx,
|
||||
"Unable to find range shard key index",
|
||||
"keyPattern"_attr = keyPattern,
|
||||
logAttrs(nss));
|
||||
|
||||
// When a shard key index is not found, the range deleter moves the task to the bottom
|
||||
// of the range deletion queue. This sleep is aimed at avoiding logging too aggressively
|
||||
// in order to prevent log files to increase too much in size.
|
||||
opCtx->sleepFor(Seconds(5));
|
||||
}
|
||||
|
||||
iasserted(ErrorCodes::IndexNotFound,
|
||||
|
||||
Loading…
Reference in New Issue
Block a user