SERVER-106211 Adding metrics to oplog sampling (#38137)

Co-authored-by: Clarisse Cheah <clarisse.cheah@mongodb.com>
GitOrigin-RevId: d6cda34bbbebab5982898cc989ce67f77b9949ce
This commit is contained in:
adelinexchen 2025-08-26 22:54:28 +10:00 committed by MongoDB Bot
parent 85f50fb465
commit f1ef2c7dae
3 changed files with 19 additions and 7 deletions

View File

@ -379,12 +379,13 @@ CollectionTruncateMarkers::InitialSetOfMarkers CollectionTruncateMarkers::create
auto currentRecords =
collectionIterator.numRecords() - estimatedRecordsPerMarker * wholeMarkers;
auto currentBytes = collectionIterator.dataSize() - estimatedBytesPerMarker * wholeMarkers;
return CollectionTruncateMarkers::InitialSetOfMarkers{
std::move(markers),
currentRecords,
currentBytes,
Microseconds{static_cast<int64_t>(curTimeMicros64() - startTime)},
MarkersCreationMethod::Sampling};
auto duration = static_cast<int64_t>(curTimeMicros64() - startTime);
LOGV2_DEBUG(10621100, 1, "createMarkersBySampling finished", "durationMicros"_attr = duration);
return CollectionTruncateMarkers::InitialSetOfMarkers{std::move(markers),
currentRecords,
currentBytes,
Microseconds{duration},
MarkersCreationMethod::Sampling};
}
CollectionTruncateMarkers::MarkersCreationMethod

View File

@ -152,6 +152,7 @@ bool OplogCapMaintainerThread::_deleteExcessDocuments(OperationContext* opCtx) {
LOGV2_DEBUG(4562600, 2, "oplog collection does not exist");
return false;
}
LOGV2(10621107, "Deleting excess documents", "Oplog size (in bytes)"_attr = rs->dataSize());
// Create another reference to the oplog truncate markers while holding a lock on
// the collection to prevent it from being destructed.
@ -212,7 +213,8 @@ void OplogCapMaintainerThread::_run() {
toStringForLogging(NamespaceString::kRsOplogNamespace);
setThreadName(name);
LOGV2_DEBUG(5295000, 1, "Oplog cap maintainer thread started", "threadName"_attr = name);
LOGV2_DEBUG(
5295000, 1, "Oplog cap maintainer thread started and active", "threadName"_attr = name);
ThreadClient tc(name,
getGlobalServiceContext()->getService(ClusterRole::ShardServer),
Client::noSession(),
@ -245,9 +247,11 @@ void OplogCapMaintainerThread::_run() {
}
// Wait a bit to give the oplog a chance to be created.
MONGO_IDLE_THREAD_BLOCK;
LOGV2_DEBUG(10621101, 1, "OplogCapMaintainerThread is idle");
// Reset the oplogRead so we don't hold a lock while we sleep.
oplogRead.reset();
sleepFor(Milliseconds(100));
LOGV2_DEBUG(10621109, 1, "OplogCapMaintainerThread is active");
} while (!rs);
// Initial sampling and marker creation.

View File

@ -104,6 +104,8 @@ std::shared_ptr<OplogTruncateMarkers> OplogTruncateMarkers::sampleAndUpdate(Oper
LOGV2(22382,
"Record store oplog processing finished",
"duration"_attr = duration_cast<Milliseconds>(initialSetOfMarkers.timeTaken));
LOGV2(
10621110, "Initial set of markers created.", "Oplog size (in bytes)"_attr = rs.dataSize());
// This value will eventually replace the empty OplogTruncateMarker object with this newly
// populated object now that initial sampling has finished.
@ -207,9 +209,11 @@ bool OplogTruncateMarkers::awaitHasExcessMarkersOrDead(OperationContext* opCtx)
// Wait until kill() is called or there are too many collection markers.
stdx::unique_lock<stdx::mutex> lock(_reclaimMutex);
MONGO_IDLE_THREAD_BLOCK;
LOGV2_DEBUG(10621102, 1, "OplogCapMaintainerThread is idle");
auto isWaitConditionSatisfied = opCtx->waitForConditionOrInterruptFor(
_reclaimCv, lock, Seconds(gOplogTruncationCheckPeriodSeconds), [this, opCtx] {
if (_isDead) {
LOGV2_DEBUG(10621103, 1, "OplogCapMaintainerThread is active");
return true;
}
@ -221,12 +225,15 @@ bool OplogTruncateMarkers::awaitHasExcessMarkersOrDead(OperationContext* opCtx)
"Collection has excess markers",
"lastRecord"_attr = marker->lastRecord,
"wallTime"_attr = marker->wallTime);
LOGV2_DEBUG(10621104, 1, "OplogCapMaintainerThread is active");
return true;
}
LOGV2_DEBUG(10621105, 1, "OplogCapMaintainerThread is active");
return false;
});
LOGV2_DEBUG(10621106, 1, "OplogCapMaintainerThread is active");
// Return true only when we have detected excess markers, not because the record store
// is being destroyed (_isDead) or we timed out waiting on the condition variable.
return !(_isDead || !isWaitConditionSatisfied);