diff --git a/src/mongo/db/storage/collection_truncate_markers.cpp b/src/mongo/db/storage/collection_truncate_markers.cpp index bcb31aff7a0..116be437a65 100644 --- a/src/mongo/db/storage/collection_truncate_markers.cpp +++ b/src/mongo/db/storage/collection_truncate_markers.cpp @@ -192,11 +192,14 @@ CollectionTruncateMarkers::InitialSetOfMarkers CollectionTruncateMarkers::create OperationContext* opCtx, CollectionIterator& collectionIterator, int64_t minBytesPerMarker, - std::function getRecordIdAndWallTime) { + std::function getRecordIdAndWallTime, + TickSource* tickSource) { auto startTime = curTimeMicros64(); + const int64_t numRecordsTotal = collectionIterator.numRecords(); LOGV2_INFO(7393212, "Scanning collection to determine where to place markers for truncation", - "uuid"_attr = collectionIterator.getRecordStore()->uuid()); + "uuid"_attr = collectionIterator.getRecordStore()->uuid(), + "numRecords"_attr = numRecordsTotal); int64_t numRecords = 0; int64_t dataSize = 0; @@ -204,6 +207,7 @@ CollectionTruncateMarkers::InitialSetOfMarkers CollectionTruncateMarkers::create int64_t currentBytes = 0; std::deque markers; + Timer lastProgressTimer(tickSource); while (auto nextRecord = collectionIterator.getNext()) { const auto& [rId, doc] = *nextRecord; @@ -225,6 +229,17 @@ CollectionTruncateMarkers::InitialSetOfMarkers CollectionTruncateMarkers::create numRecords++; dataSize += doc.objsize(); + const int samplingLogIntervalSeconds = gCollectionSamplingLogIntervalSeconds.load(); + if (samplingLogIntervalSeconds > 0 && + lastProgressTimer.elapsed() >= Seconds(samplingLogIntervalSeconds)) { + LOGV2(11212203, + "Collection scanning progress", + "uuid"_attr = collectionIterator.getRecordStore()->uuid(), + "completed"_attr = numRecords, + "total"_attr = numRecordsTotal); + lastProgressTimer.reset(); + } + // Force a call to next() only every ~ 1 second to simulate slowness. if (MONGO_unlikely(gUseSlowCollectionTruncateMarkerScanning)) { sleepFor(Seconds(1)); diff --git a/src/mongo/db/storage/collection_truncate_markers.h b/src/mongo/db/storage/collection_truncate_markers.h index 70daf731e06..0349ee935fd 100644 --- a/src/mongo/db/storage/collection_truncate_markers.h +++ b/src/mongo/db/storage/collection_truncate_markers.h @@ -242,7 +242,8 @@ public: OperationContext* opCtx, CollectionIterator& collIterator, int64_t minBytesPerMarker, - std::function getRecordIdAndWallTime); + std::function getRecordIdAndWallTime, + TickSource* tickSource = globalSystemTickSource()); // Creates the initial set of markers by sampling the collection. The set of markers // returned will have approximate metrics. The metrics of each marker will be equal and contain diff --git a/src/mongo/db/storage/oplog_cap_maintainer_thread.cpp b/src/mongo/db/storage/oplog_cap_maintainer_thread.cpp index 296d0e5d5b1..0bede6aadb7 100644 --- a/src/mongo/db/storage/oplog_cap_maintainer_thread.cpp +++ b/src/mongo/db/storage/oplog_cap_maintainer_thread.cpp @@ -69,7 +69,11 @@ AtomicWord totalTimeTruncating; // Cumulative number of truncates of the oplog. AtomicWord truncateCount; +// Cumulative number of times the thread has been interrupted +AtomicWord interruptCount; + MONGO_FAIL_POINT_DEFINE(hangOplogCapMaintainerThread); +MONGO_FAIL_POINT_DEFINE(hangBeforeOplogSampling); class OplogTruncateMarkersServerStatusSection : public ServerStatusSection { public: @@ -122,6 +126,7 @@ public: builder.append("totalTimeTruncatingMicros", totalTimeTruncating.load()); builder.append("truncateCount", truncateCount.load()); + builder.append("interruptCount", interruptCount.load()); return builder.obj(); } @@ -261,6 +266,11 @@ void OplogCapMaintainerThread::run() { } } + if (MONGO_unlikely(hangBeforeOplogSampling.shouldFail())) { + LOGV2(11212200, "Hanging due to 'hangBeforeSampling' fail point"); + hangBeforeOplogSampling.pauseWhileSet(_uniqueCtx->get()); + } + // Need the oplog to have been created first before we proceed. do { // Create the initial set of truncate markers as part of this thread before we @@ -293,16 +303,9 @@ void OplogCapMaintainerThread::run() { sleepFor(Milliseconds(100)); LOGV2_DEBUG(10621109, 1, "OplogCapMaintainerThread is active"); } while (true); - } catch (const ExceptionFor& e) { - LOGV2_DEBUG(9468100, - 1, - "Interrupted due to shutdown. OplogCapMaintainerThread Exiting!", - "error"_attr = e.what()); - return; - } catch (const ExceptionFor&) { - LOGV2_DEBUG(10167201, - 1, - "Interrupted due to storage change. OplogCapMaintainerThread Exiting!"); + } catch (ExceptionFor& ex) { + LOGV2(11212201, "OplogCapMaintainerThread interrupted", "reason"_attr = ex.reason()); + interruptCount.fetchAndAdd(1); return; } } @@ -329,16 +332,9 @@ void OplogCapMaintainerThread::run() { _uniqueCtx->get()->sleepFor( Seconds(1)); // Back off in case there were problems deleting. - } catch (const ExceptionFor& e) { - LOGV2_DEBUG(9259900, - 1, - "Interrupted due to shutdown. OplogCapMaintainerThread Exiting", - "error"_attr = e); - return; - } catch (const ExceptionFor&) { - LOGV2_DEBUG(10167202, - 1, - "Interrupted due to storage change. OplogCapMaintainerThread Exiting!"); + } catch (ExceptionFor& ex) { + LOGV2(11212204, "OplogCapMaintainerThread interrupted", "reason"_attr = ex.reason()); + interruptCount.fetchAndAdd(1); return; } catch (...) { const auto& err = mongo::exceptionToStatus(); @@ -362,7 +358,7 @@ void OplogCapMaintainerThread::run() { } void OplogCapMaintainerThread::shutdown(const Status& reason) { - LOGV2_INFO(7474902, "Shutting down oplog cap maintainer thread"); + LOGV2_INFO(7474902, "Shutting down oplog cap maintainer thread", "reason"_attr = reason); { stdx::lock_guard lk(_opCtxMutex); if (_uniqueCtx) {