From 5fb893bc95010ab9987d7ee63cd41cb2ff9150dd Mon Sep 17 00:00:00 2001 From: Matt Kneiser Date: Tue, 27 Jan 2026 10:23:48 -0800 Subject: [PATCH] SERVER-117510 Add replicated fastcount validate check skeleton (#46981) GitOrigin-RevId: b5154dba54f04bc9eecd1c601c8793c8e1a615e6 --- src/mongo/db/validate/validate_adaptor.cpp | 35 +++++++-- src/mongo/db/validate/validate_state.cpp | 82 ++++++++++++++++++++++ src/mongo/db/validate/validate_state.h | 24 ++++++- 3 files changed, 133 insertions(+), 8 deletions(-) diff --git a/src/mongo/db/validate/validate_adaptor.cpp b/src/mongo/db/validate/validate_adaptor.cpp index 5c96e6583d3..30eb0792b5c 100644 --- a/src/mongo/db/validate/validate_adaptor.cpp +++ b/src/mongo/db/validate/validate_adaptor.cpp @@ -1298,12 +1298,35 @@ void ValidateAdaptor::traverseRecordStore(OperationContext* opCtx, << " invalid documents."); } - const auto fastCount = coll->numRecords(opCtx); - if (_validateState->shouldEnforceFastCount() && fastCount != _numRecords) { - results->addError(str::stream() - << "fast count (" << fastCount << ") does not match number of records (" - << _numRecords << ") for collection '" << coll->ns().toStringForErrorMsg() - << "'"); + if (_validateState->shouldEnforceFastCount()) { + auto fastCountType = _validateState->getDetectedFastCountType(opCtx); + switch (fastCountType) { + case CollectionValidation::FastCountType::legacySizeStorer: + if (const auto fastCount = coll->numRecords(opCtx); fastCount != _numRecords) { + results->addError(str::stream() << "fast count (" << fastCount + << ") does not match number of records (" + << _numRecords << ") for collection '" + << coll->ns().toStringForErrorMsg() << "'"); + } + break; + case CollectionValidation::FastCountType::replicated: + if (const auto fastCount = coll->numRecords(opCtx); fastCount != _numRecords) { + results->addError(str::stream() << "replicated fast count (" << fastCount + << ") does not match number of records (" + << _numRecords << ") for collection '" + << coll->ns().toStringForErrorMsg() << "'"); + } + break; + case CollectionValidation::FastCountType::both: + uasserted(ErrorCodes::InvalidOptions, "Both FastCount tables found"); + break; + case CollectionValidation::FastCountType::none: + uasserted(ErrorCodes::InvalidOptions, "No FastCount tables found"); + break; + case CollectionValidation::FastCountType::invalid: + uasserted(ErrorCodes::InvalidOptions, "No FastCount tables found"); + break; + } } // Do not update the record store stats if we're in the background as we've validated a diff --git a/src/mongo/db/validate/validate_state.cpp b/src/mongo/db/validate/validate_state.cpp index bd85eeac841..563a2eaf818 100644 --- a/src/mongo/db/validate/validate_state.cpp +++ b/src/mongo/db/validate/validate_state.cpp @@ -114,6 +114,72 @@ ValidateState::ValidateState(OperationContext* opCtx, if (adjustMultikey()) { invariant(!isBackground()); } + + if (false) { // TODO(SERVER-117795): Check feature flag here + if (enforceFastCountRequested()) { + auto fastCountType = getDetectedFastCountType(opCtx); + uassert(ErrorCodes::InvalidOptions, + "Both FastCount tables found", + fastCountType != FastCountType::both); + } + } +} + + +// TODO(SERVER-117795) +Status ValidateState::_getReplicatedFastCountCollection(OperationContext* opCtx) const { + // try { + // auto fastCountNss = NamespaceString::makeGlobalConfigCollection( + // NamespaceString::kSystemReplicatedSizeAndCountMetadataStore); + // boost::optional acquisition = + // acquireCollectionOrViewMaybeLockFree( + // opCtx, + // CollectionOrViewAcquisitionRequest::fromOpCtx( + // opCtx, fastCountNss, AcquisitionPrerequisites::OperationType::kRead)); + + // if (!acquisition || !acquisition->collectionExists()) { + // return Status( + // ErrorCodes::NamespaceNotFound, + // str::stream() + // << "Internal FastCount Collection '" << fastCountNss.toStringForErrorMsg() + // << "' does not exist to validate. Required for enforcing fast count."); + // } + + // // TODO(SERVER-117795): Is this needed? Unused at the moment, might be needed? + // // _fastCountCollection = std::move(acquisition); + // } catch (const ExceptionFor&) { + // // TODO(SERVER-117795): Is this the right exception? Is it relevant? + // if (isBackground()) { + // // This will throw SnapshotTooOld to indicate we cannot find an available snapshot at + // // the provided timestamp. This is likely because minSnapshotHistoryWindowInSeconds + // has + // // been changed to a lower value from the default of 5 minutes. + // return Status( + // ErrorCodes::NamespaceNotFound, + // fmt::format("Cannot run background validation on collection {} because the " + // "snapshot history is no longer available", + // _nss.toStringForErrorMsg())); + // } + // throw; + // } + return Status::OK(); +} + +// TODO(SERVER-117795): Get state from the storage engine. +Status ValidateState::_getUnreplicatedFastCountCollection(OperationContext* opCtx) const { + try { + // std::string filename = ident::kSizeStorer + ".wt"; + // boost::filesystem::path sizeStorerAbsoluteFilePath = + // boost::filesystem::path(storageGlobalParams.dbpath) / filename; + // if (boost::filesystem::exists(sizeStorerAbsoluteFilePath)) { + // return Status::OK(); + // } + + return Status::OK(); + // return Status(ErrorCodes::NonExistentPath, "SizeStorer doesn't exist"); + } catch (...) { + return exceptionToStatus(); + } } bool ValidateState::shouldEnforceFastCount() const { @@ -150,6 +216,22 @@ bool ValidateState::shouldEnforceFastCount() const { return false; } +FastCountType ValidateState::getDetectedFastCountType(OperationContext* opCtx) const { + // TODO(SERVER-117795): Uncomment. + return FastCountType::legacySizeStorer; + // auto replicatedFastCountStatus = _getReplicatedFastCountCollection(opCtx); + // auto legacyFastCountStatus = _getUnreplicatedFastCountCollection(opCtx); + // if (replicatedFastCountStatus.isOK() && legacyFastCountStatus.isOK()) { + // return FastCountType::both; + // } else if (replicatedFastCountStatus.isOK()) { + // return FastCountType::replicated; + // } else if (legacyFastCountStatus.isOK()) { + // return FastCountType::legacySizeStorer; + // } else { + // return FastCountType::none; + // } +} + void ValidateState::yieldCursors(OperationContext* opCtx) { // Save all the cursors. for (const auto& indexCursor : _indexCursors) { diff --git a/src/mongo/db/validate/validate_state.h b/src/mongo/db/validate/validate_state.h index 9759707d58a..14c2c5e43d5 100644 --- a/src/mongo/db/validate/validate_state.h +++ b/src/mongo/db/validate/validate_state.h @@ -73,10 +73,18 @@ namespace CollectionValidation { */ MONGO_MOD_PUBLIC Lock::ExclusiveLock obtainExclusiveValidationLock(OperationContext* opCtx); +enum struct FastCountType { + legacySizeStorer, + replicated, + both, + none, + invalid, +}; + /** * Contains information about the collection being validated and the user provided validation - * options. Additionally it maintains the state of shared objects throughtout the validation, such - * as locking, cursors and data throttling. + * options. Additionally it maintains the state of shared objects throughout the validation, such + * as locking, cursors, and data throttling. */ class ValidateState : public ValidationOptions { ValidateState(const ValidateState&) = delete; @@ -89,8 +97,14 @@ public: return _nss; } + /** + * Returns true if fast count is being validated, and the collection + * supports fast count. Certain internal collections are not supported by fast count. + */ bool shouldEnforceFastCount() const; + FastCountType getDetectedFastCountType(OperationContext* opCtx) const; + BSONValidateModeEnum getBSONValidateMode() const { return isBSONConformanceValidation() ? BSONValidateModeEnum::kFull : BSONValidateModeEnum::kExtended; @@ -151,6 +165,12 @@ public: private: ValidateState() = delete; + /** + * These functions use catalog and on-disk state to determine which system is being used. + */ + Status _getReplicatedFastCountCollection(OperationContext* opCtx) const; + Status _getUnreplicatedFastCountCollection(OperationContext* opCtx) const; + // This lock needs to be obtained before the global lock. Initialise in the validation // constructor. Oplog Batch Applier takes this lock in exclusive mode when applying the batch. // Foreground validation waits on this lock to begin.