SERVER-120069 Instrument the storage layer with OpenTelemetry Metrics (#53848)

GitOrigin-RevId: 4f19ab5bf8733201b10d1ded3d6988faba5a99c3
This commit is contained in:
DennisSHCheung 2026-05-27 10:08:03 +10:00 committed by MongoDB Bot
parent 005ea9599c
commit 837806ab05
13 changed files with 1126 additions and 1 deletions

View File

@ -2810,6 +2810,8 @@ mongo_cc_library(
"startup_warnings_mongod",
"//src/mongo/db/storage:backup_cursor_hooks",
"//src/mongo/db/storage:disk_space_monitor",
"//src/mongo/otel/metrics/instrumentation:disk_metrics",
"//src/mongo/otel/metrics/instrumentation:system_mount_metrics",
"//src/mongo/db/storage:oplog_truncation",
"//src/mongo/db/storage:storage_control",
"system_index",

View File

@ -230,6 +230,8 @@
#include "mongo/executor/task_executor_pool.h"
#include "mongo/executor/thread_pool_task_executor.h"
#include "mongo/logv2/log.h"
#include "mongo/otel/metrics/instrumentation/disk_metrics.h"
#include "mongo/otel/metrics/instrumentation/system_mount_metrics.h"
#include "mongo/otel/metrics/metrics_initialization.h"
#include "mongo/otel/traces/trace_initialization.h"
#include "mongo/platform/atomic_word.h"
@ -1011,6 +1013,8 @@ ExitCode _initAndListen(ServiceContext* serviceContext) {
startFLECrud(serviceContext);
DiskSpaceMonitor::start(serviceContext);
installSystemMountOtelMetrics(serviceContext);
installDiskOtelMetrics(serviceContext);
if (!storageEngine->storesFilesInDbPath()) {
LOGV2(7333400,
"The index builds DiskSpaceMonitor action which periodically checks if we "

View File

@ -381,5 +381,7 @@ test_suite(
":otel_metric_name_validation_test",
":otel_metric_server_status_adapter_test",
":server_status_metric_name_validation_test",
"//src/mongo/otel/metrics/instrumentation:disk_metrics_test",
"//src/mongo/otel/metrics/instrumentation:system_mount_metrics_test",
],
)

View File

@ -0,0 +1,71 @@
load("//bazel:mongo_src_rules.bzl", "mongo_cc_library", "mongo_cc_unit_test")
package(default_visibility = ["//visibility:public"])
exports_files(
glob([
"*.h",
]),
)
mongo_cc_library(
name = "disk_metrics",
srcs_select = [{
"@platforms//os:linux": ["disk_metrics_linux.cpp"],
"//conditions:default": ["disk_metrics_noop.cpp"],
}],
deps = [
"//src/mongo/db:service_context",
] + select({
"@platforms//os:linux": [
"//src/mongo/otel/metrics:otel_metrics_service",
"//src/mongo/util:periodic_runner",
"//src/mongo/util:procparser",
],
"//conditions:default": [],
}),
)
mongo_cc_unit_test(
name = "disk_metrics_test",
srcs = ["disk_metrics_test.cpp"],
tags = ["mongo_unittest_fourth_group"],
deps = [
":disk_metrics",
"//src/mongo/db:service_context_test_fixture",
"//src/mongo/db/auth:authmocks",
"//src/mongo/db/shard_role:service_context_non_d",
"//src/mongo/otel/metrics:metrics_test_util",
],
)
mongo_cc_library(
name = "system_mount_metrics",
srcs_select = [{
"@platforms//os:linux": ["system_mount_metrics_linux.cpp"],
"//conditions:default": ["system_mount_metrics_noop.cpp"],
}],
deps = [
"//src/mongo/db:service_context",
] + select({
"@platforms//os:linux": [
"//src/mongo/otel/metrics:otel_metrics_service",
"//src/mongo/util:periodic_runner",
"//src/mongo/util:procparser",
],
"//conditions:default": [],
}),
)
mongo_cc_unit_test(
name = "system_mount_metrics_test",
srcs = ["system_mount_metrics_test.cpp"],
tags = ["mongo_unittest_fifth_group"],
deps = [
":system_mount_metrics",
"//src/mongo/db:service_context_test_fixture",
"//src/mongo/db/auth:authmocks",
"//src/mongo/db/shard_role:service_context_non_d",
"//src/mongo/otel/metrics:metrics_test_util",
],
)

View File

@ -0,0 +1,72 @@
/**
* Copyright (C) 2026-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#pragma once
#include "mongo/bson/bsonobj.h"
#include "mongo/util/modules.h"
#include <memory>
#include <string>
#include <vector>
namespace mongo {
class ServiceContext;
/**
* Owns the OpenTelemetry instruments for disk I/O metrics. The set of devices is fixed at
* construction time: disks that appear after startup are not tracked.
*/
class DiskMetrics {
public:
/**
* Registers per-device counters for each of the provided disk devices.
*/
explicit DiskMetrics(std::vector<std::string> disks);
~DiskMetrics();
/**
* Walks the BSON and adds deltas to the registered counters. Devices not declared at
* construction time are ignored.
*/
void update(BSONObj disksBson);
private:
class Impl;
std::unique_ptr<Impl> _impl;
};
/**
* Registers OpenTelemetry disk I/O counters and starts a periodic job that samples
* once per second. No-op on unsupported platforms.
*/
MONGO_MOD_PUBLIC void installDiskOtelMetrics(ServiceContext* svcCtx);
} // namespace mongo

View File

@ -0,0 +1,209 @@
/**
* Copyright (C) 2026-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#include "mongo/base/string_data.h"
#include "mongo/bson/bsonobj.h"
#include "mongo/bson/bsonobjbuilder.h"
#include "mongo/db/service_context.h"
#include "mongo/logv2/log.h"
#include "mongo/logv2/log_severity_suppressor.h"
#include "mongo/otel/metrics/instrumentation/disk_metrics.h"
#include "mongo/otel/metrics/metric_unit.h"
#include "mongo/otel/metrics/metrics_counter.h"
#include "mongo/otel/metrics/metrics_service.h"
#include "mongo/util/duration.h"
#include "mongo/util/periodic_runner.h"
#include "mongo/util/procparser.h"
#include <utility>
#include <vector>
#include <boost/optional.hpp>
#include <fmt/format.h>
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kControl
namespace mongo {
namespace {
using otel::metrics::Counter;
using otel::metrics::DynamicMetricNameMaker;
using otel::metrics::MetricsService;
using otel::metrics::MetricUnit;
constexpr StringData kDiskStatsPath = "/proc/diskstats"_sd;
constexpr StringData kSysBlockPath = "/sys/block"_sd;
struct DiskCounters {
Counter<int64_t>* reads{nullptr};
Counter<int64_t>* readSectors{nullptr};
Counter<int64_t>* readTimeMs{nullptr};
Counter<int64_t>* writes{nullptr};
Counter<int64_t>* writeSectors{nullptr};
Counter<int64_t>* writeTimeMs{nullptr};
Counter<int64_t>* ioTimeMs{nullptr};
Counter<int64_t>* ioQueuedMs{nullptr};
};
struct DiskMetricsState {
std::unique_ptr<DiskMetrics> metrics;
std::vector<std::string> diskNames;
std::vector<StringData> diskViews;
PeriodicJobAnchor job;
};
const auto getDiskMetricsState = ServiceContext::declareDecoration<DiskMetricsState>();
} // namespace
class DiskMetrics::Impl {
public:
explicit Impl(std::vector<std::string> disks) : _disks(std::move(disks)) {
_instruments.resize(_disks.size());
for (size_t i = 0; i < _disks.size(); ++i) {
const std::string& disk = _disks[i];
const auto makeCounter =
[&](StringData field, std::string desc, MetricUnit unit) -> Counter<int64_t>* {
std::string fullName = fmt::format("systemMetrics.disks.{}.{}", disk, field);
return &MetricsService::instance().createInt64Counter(
DynamicMetricNameMaker::make(StringData{fullName}), std::move(desc), unit);
};
_instruments[i].reads = makeCounter(
"reads", "Number of read operations completed", MetricUnit::kOperations);
_instruments[i].readSectors =
makeCounter("read_sectors", "Number of sectors read", MetricUnit::kCount);
_instruments[i].readTimeMs =
makeCounter("read_time_ms", "Time spent reading", MetricUnit::kMilliseconds);
_instruments[i].writes = makeCounter(
"writes", "Number of write operations completed", MetricUnit::kOperations);
_instruments[i].writeSectors =
makeCounter("write_sectors", "Number of sectors written", MetricUnit::kCount);
_instruments[i].writeTimeMs =
makeCounter("write_time_ms", "Time spent writing", MetricUnit::kMilliseconds);
_instruments[i].ioTimeMs = makeCounter(
"io_time_ms", "Time disk was busy doing I/O", MetricUnit::kMilliseconds);
_instruments[i].ioQueuedMs = makeCounter("io_queued_ms",
"Weighted time spent in the disk I/O queue",
MetricUnit::kMilliseconds);
}
}
void update(BSONObj disksBson) {
if (!_previousBson) {
_previousBson = std::move(disksBson);
return;
}
for (size_t i = 0; i < _disks.size(); ++i) {
const BSONElement prev = (*_previousBson)[_disks[i]];
const BSONElement curr = disksBson[_disks[i]];
if (prev.type() != BSONType::object || curr.type() != BSONType::object) {
continue;
}
addDeltas(_instruments[i], prev.Obj(), curr.Obj());
}
_previousBson = std::move(disksBson);
}
private:
void addDeltas(DiskCounters& instr, const BSONObj& prev, const BSONObj& curr) {
const auto delta = [&](StringData field) {
return std::max(0LL, curr[field].safeNumberLong() - prev[field].safeNumberLong());
};
instr.reads->add(delta("reads"));
instr.readSectors->add(delta("read_sectors"));
instr.readTimeMs->add(delta("read_time_ms"));
instr.writes->add(delta("writes"));
instr.writeSectors->add(delta("write_sectors"));
instr.writeTimeMs->add(delta("write_time_ms"));
instr.ioTimeMs->add(delta("io_time_ms"));
instr.ioQueuedMs->add(delta("io_queued_ms"));
}
std::vector<std::string> _disks;
std::vector<DiskCounters> _instruments;
boost::optional<BSONObj> _previousBson;
};
DiskMetrics::DiskMetrics(std::vector<std::string> disks)
: _impl(std::make_unique<Impl>(std::move(disks))) {}
DiskMetrics::~DiskMetrics() = default;
void DiskMetrics::update(BSONObj disksBson) {
_impl->update(std::move(disksBson));
}
void installDiskOtelMetrics(ServiceContext* svcCtx) {
auto& state = getDiskMetricsState(svcCtx);
state.diskNames = procparser::findPhysicalDisks(kSysBlockPath);
if (state.diskNames.empty()) {
return;
}
state.diskViews.reserve(state.diskNames.size());
for (const auto& name : state.diskNames) {
state.diskViews.push_back(name);
}
state.metrics = std::make_unique<DiskMetrics>(state.diskNames);
state.job = svcCtx->getPeriodicRunner()->makeJob(PeriodicRunner::PeriodicJob{
"DiskOtelMetrics",
[&state](Client*) {
BSONObjBuilder builder;
Status s =
procparser::parseProcDiskStatsFile(kDiskStatsPath, state.diskViews, &builder);
if (s.isOK()) {
state.metrics->update(builder.obj());
} else {
static logv2::SeveritySuppressor suppressor(
Minutes{1}, logv2::LogSeverity::Warning(), logv2::LogSeverity::Debug(3));
if (auto sev = suppressor(); shouldLog(MONGO_LOGV2_DEFAULT_COMPONENT, sev)) {
LOGV2_DEBUG(
12006910, sev.toInt(), "Failed to collect disk stats", "error"_attr = s);
}
}
},
Seconds(1),
false /*isKillableByStepdown*/});
state.job.start();
}
} // namespace mongo

View File

@ -0,0 +1,44 @@
/**
* Copyright (C) 2026-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#include "mongo/otel/metrics/instrumentation/disk_metrics.h"
namespace mongo {
class DiskMetrics::Impl {};
DiskMetrics::DiskMetrics(std::vector<std::string>) : _impl(std::make_unique<Impl>()) {}
DiskMetrics::~DiskMetrics() = default;
void DiskMetrics::update(BSONObj) {}
void installDiskOtelMetrics(ServiceContext*) {}
} // namespace mongo

View File

@ -0,0 +1,250 @@
/**
* Copyright (C) 2026-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#include "mongo/otel/metrics/instrumentation/disk_metrics.h"
#include "mongo/bson/bsonobjbuilder.h"
#include "mongo/otel/metrics/metrics_test_util.h"
#include "mongo/unittest/unittest.h"
namespace mongo {
namespace {
using otel::metrics::DynamicMetricNameMaker;
using otel::metrics::OtelMetricsCapturer;
constexpr StringData kSda = "sda"_sd;
constexpr StringData kSdb = "sdb"_sd;
constexpr StringData kSdc = "sdc"_sd;
// All tests register DiskMetrics with this fixed device set. The global MetricsService retains
// registrations across tests within the same binary, so every test must use the same device set.
const std::vector<std::string> kTestDevices = {std::string(kSda), std::string(kSdb)};
constexpr StringData kSdaReads = "systemMetrics.disks.sda.reads"_sd;
constexpr StringData kSdaReadSectors = "systemMetrics.disks.sda.read_sectors"_sd;
constexpr StringData kSdaReadTimeMs = "systemMetrics.disks.sda.read_time_ms"_sd;
constexpr StringData kSdaWrites = "systemMetrics.disks.sda.writes"_sd;
constexpr StringData kSdaWriteSectors = "systemMetrics.disks.sda.write_sectors"_sd;
constexpr StringData kSdaWriteTimeMs = "systemMetrics.disks.sda.write_time_ms"_sd;
constexpr StringData kSdaIoTimeMs = "systemMetrics.disks.sda.io_time_ms"_sd;
constexpr StringData kSdaIoQueuedMs = "systemMetrics.disks.sda.io_queued_ms"_sd;
constexpr StringData kSdbWrites = "systemMetrics.disks.sdb.writes"_sd;
constexpr StringData kSdcWrites = "systemMetrics.disks.sdc.writes"_sd;
BSONObj makeDiskBson(StringData device,
long long reads,
long long readSectors,
long long readTimeMs,
long long writes,
long long writeSectors,
long long writeTimeMs,
long long ioTimeMs,
long long ioQueuedMs) {
BSONObjBuilder b;
{
BSONObjBuilder sub(b.subobjStart(device));
sub.appendNumber("reads", reads);
sub.appendNumber("read_sectors", readSectors);
sub.appendNumber("read_time_ms", readTimeMs);
sub.appendNumber("writes", writes);
sub.appendNumber("write_sectors", writeSectors);
sub.appendNumber("write_time_ms", writeTimeMs);
sub.appendNumber("io_time_ms", ioTimeMs);
sub.appendNumber("io_queued_ms", ioQueuedMs);
}
return b.obj();
}
class DiskOtelMetricsTest : public unittest::Test {
protected:
void setUp() override {
if (!OtelMetricsCapturer::canReadMetrics()) {
GTEST_SKIP() << "Skipping test: OTel metrics unavailable on this platform";
}
}
OtelMetricsCapturer _capturer;
DiskMetrics _metrics{kTestDevices};
};
TEST_F(DiskOtelMetricsTest, FirstUpdateSetsBaseline) {
ASSERT_DOES_NOT_THROW(
_metrics.update(makeDiskBson(kSda, 10, 20, 50, 50, 100, 300, 1000, 1200)));
}
TEST_F(DiskOtelMetricsTest, SecondUpdateEmitsDeltas) {
_metrics.update(makeDiskBson(kSda,
/*reads=*/10,
/*readSectors=*/20,
/*readTimeMs=*/50,
/*writes=*/50,
/*writeSectors=*/100,
/*writeTimeMs=*/300,
/*ioTimeMs=*/1000,
/*ioQueuedMs=*/1200));
_metrics.update(makeDiskBson(kSda,
/*reads=*/25,
/*readSectors=*/50,
/*readTimeMs=*/120,
/*writes=*/70,
/*writeSectors=*/130,
/*writeTimeMs=*/500,
/*ioTimeMs=*/1500,
/*ioQueuedMs=*/1800));
ASSERT_EQ(_capturer.readInt64Counter(DynamicMetricNameMaker::make(kSdaReads)), 15);
ASSERT_EQ(_capturer.readInt64Counter(DynamicMetricNameMaker::make(kSdaReadSectors)), 30);
ASSERT_EQ(_capturer.readInt64Counter(DynamicMetricNameMaker::make(kSdaReadTimeMs)), 70);
ASSERT_EQ(_capturer.readInt64Counter(DynamicMetricNameMaker::make(kSdaWrites)), 20);
ASSERT_EQ(_capturer.readInt64Counter(DynamicMetricNameMaker::make(kSdaWriteSectors)), 30);
ASSERT_EQ(_capturer.readInt64Counter(DynamicMetricNameMaker::make(kSdaWriteTimeMs)), 200);
ASSERT_EQ(_capturer.readInt64Counter(DynamicMetricNameMaker::make(kSdaIoTimeMs)), 500);
ASSERT_EQ(_capturer.readInt64Counter(DynamicMetricNameMaker::make(kSdaIoQueuedMs)), 600);
}
TEST_F(DiskOtelMetricsTest, MultipleDeltasAccumulate) {
_metrics.update(makeDiskBson(kSda, 50, 100, 200, 100, 200, 500, 1000, 1500));
_metrics.update(makeDiskBson(kSda, 60, 120, 260, 105, 210, 560, 1100, 1650));
_metrics.update(makeDiskBson(kSda, 80, 160, 380, 115, 230, 680, 1300, 1950));
// reads delta: (60-50) + (80-60) = 10 + 20 = 30
ASSERT_EQ(_capturer.readInt64Counter(DynamicMetricNameMaker::make(kSdaReads)), 30);
// read_sectors delta: (120-100) + (160-120) = 20 + 40 = 60
ASSERT_EQ(_capturer.readInt64Counter(DynamicMetricNameMaker::make(kSdaReadSectors)), 60);
// read_time_ms delta: (260-200) + (380-260) = 60 + 120 = 180
ASSERT_EQ(_capturer.readInt64Counter(DynamicMetricNameMaker::make(kSdaReadTimeMs)), 180);
// writes delta: (105-100) + (115-105) = 5 + 10 = 15
ASSERT_EQ(_capturer.readInt64Counter(DynamicMetricNameMaker::make(kSdaWrites)), 15);
// write_sectors delta: (210-200) + (230-210) = 10 + 20 = 30
ASSERT_EQ(_capturer.readInt64Counter(DynamicMetricNameMaker::make(kSdaWriteSectors)), 30);
// write_time_ms delta: (560-500) + (680-560) = 60 + 120 = 180
ASSERT_EQ(_capturer.readInt64Counter(DynamicMetricNameMaker::make(kSdaWriteTimeMs)), 180);
// io_time_ms delta: (1100-1000) + (1300-1100) = 100 + 200 = 300
ASSERT_EQ(_capturer.readInt64Counter(DynamicMetricNameMaker::make(kSdaIoTimeMs)), 300);
// io_queued_ms delta: (1650-1500) + (1950-1650) = 150 + 300 = 450
ASSERT_EQ(_capturer.readInt64Counter(DynamicMetricNameMaker::make(kSdaIoQueuedMs)), 450);
}
TEST_F(DiskOtelMetricsTest, UndeclaredDevicesAreIgnored) {
_metrics.update(makeDiskBson(kSda, 0, 0, 0, 0, 0, 0, 0, 0));
BSONObjBuilder both;
{
BSONObjBuilder sub(both.subobjStart(kSda));
sub.appendNumber("reads", 10LL);
sub.appendNumber("read_sectors", 20LL);
sub.appendNumber("read_time_ms", 50LL);
sub.appendNumber("writes", 20LL);
sub.appendNumber("write_sectors", 40LL);
sub.appendNumber("write_time_ms", 100LL);
sub.appendNumber("io_time_ms", 200LL);
sub.appendNumber("io_queued_ms", 300LL);
}
{
// sdc was never registered — its data must be ignored.
BSONObjBuilder sub(both.subobjStart(kSdc));
sub.appendNumber("reads", 100LL);
sub.appendNumber("read_sectors", 200LL);
sub.appendNumber("read_time_ms", 500LL);
sub.appendNumber("writes", 500LL);
sub.appendNumber("write_sectors", 1000LL);
sub.appendNumber("write_time_ms", 3000LL);
sub.appendNumber("io_time_ms", 5000LL);
sub.appendNumber("io_queued_ms", 7000LL);
}
_metrics.update(both.obj());
ASSERT_EQ(_capturer.readInt64Counter(DynamicMetricNameMaker::make(kSdaWrites)), 20);
// sdc was never registered, so its metric name does not exist in the service.
ASSERT_THROWS_CODE(_capturer.readInt64Counter(DynamicMetricNameMaker::make(kSdcWrites)),
DBException,
ErrorCodes::KeyNotFound);
}
TEST_F(DiskOtelMetricsTest, MultipleRegisteredDevicesTrackedIndependently) {
BSONObjBuilder baseline;
{
BSONObjBuilder sub(baseline.subobjStart(kSda));
sub.appendNumber("reads", 40LL);
sub.appendNumber("read_sectors", 80LL);
sub.appendNumber("read_time_ms", 200LL);
sub.appendNumber("writes", 100LL);
sub.appendNumber("write_sectors", 200LL);
sub.appendNumber("write_time_ms", 500LL);
sub.appendNumber("io_time_ms", 1000LL);
sub.appendNumber("io_queued_ms", 1500LL);
}
{
BSONObjBuilder sub(baseline.subobjStart(kSdb));
sub.appendNumber("reads", 20LL);
sub.appendNumber("read_sectors", 40LL);
sub.appendNumber("read_time_ms", 100LL);
sub.appendNumber("writes", 50LL);
sub.appendNumber("write_sectors", 80LL);
sub.appendNumber("write_time_ms", 200LL);
sub.appendNumber("io_time_ms", 400LL);
sub.appendNumber("io_queued_ms", 600LL);
}
_metrics.update(baseline.obj());
BSONObjBuilder next;
{
BSONObjBuilder sub(next.subobjStart(kSda));
sub.appendNumber("reads", 50LL);
sub.appendNumber("read_sectors", 100LL);
sub.appendNumber("read_time_ms", 250LL);
sub.appendNumber("writes", 110LL);
sub.appendNumber("write_sectors", 220LL);
sub.appendNumber("write_time_ms", 560LL);
sub.appendNumber("io_time_ms", 1100LL);
sub.appendNumber("io_queued_ms", 1650LL);
}
{
BSONObjBuilder sub(next.subobjStart(kSdb));
sub.appendNumber("reads", 23LL);
sub.appendNumber("read_sectors", 46LL);
sub.appendNumber("read_time_ms", 115LL);
sub.appendNumber("writes", 55LL);
sub.appendNumber("write_sectors", 90LL);
sub.appendNumber("write_time_ms", 230LL);
sub.appendNumber("io_time_ms", 440LL);
sub.appendNumber("io_queued_ms", 660LL);
}
_metrics.update(next.obj());
ASSERT_EQ(_capturer.readInt64Counter(DynamicMetricNameMaker::make(kSdaReads)), 10);
ASSERT_EQ(_capturer.readInt64Counter(DynamicMetricNameMaker::make(kSdaWrites)), 10);
ASSERT_EQ(_capturer.readInt64Counter(DynamicMetricNameMaker::make(kSdbWrites)), 5);
}
} // namespace
} // namespace mongo

View File

@ -0,0 +1,72 @@
/**
* Copyright (C) 2026-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#pragma once
#include "mongo/bson/bsonobj.h"
#include "mongo/util/modules.h"
#include <memory>
#include <string>
#include <vector>
namespace mongo {
class ServiceContext;
/**
* Owns the OpenTelemetry instruments for filesystem mount metrics. The set of mountpoints is
* fixed at construction time and mounts that appear after startup are not tracked.
*/
class SystemMountMetrics {
public:
/**
* Registers per-mountpoint gauges for each of the provided mountpoints.
*/
explicit SystemMountMetrics(std::vector<std::string> mountpoints);
~SystemMountMetrics();
/**
* Walks the BSON and pushes the values to the registered gauges. Mountpoints not declared at
* construction time are ignored.
*/
void update(const BSONObj& mountsBson);
private:
class Impl;
std::unique_ptr<Impl> _impl;
};
/**
* Registers OpenTelemetry mount filesystem gauges and starts a periodic job that samples
* OS-level mount stats once per second. No-op on unsupported platforms.
*/
MONGO_MOD_PUBLIC void installSystemMountOtelMetrics(ServiceContext* svcCtx);
} // namespace mongo

View File

@ -0,0 +1,218 @@
/**
* Copyright (C) 2026-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#include "mongo/base/string_data.h"
#include "mongo/bson/bsonobjbuilder.h"
#include "mongo/db/service_context.h"
#include "mongo/logv2/log.h"
#include "mongo/logv2/log_severity_suppressor.h"
#include "mongo/otel/metrics/instrumentation/system_mount_metrics.h"
#include "mongo/otel/metrics/metric_unit.h"
#include "mongo/otel/metrics/metrics_gauge.h"
#include "mongo/otel/metrics/metrics_service.h"
#include "mongo/util/assert_util.h"
#include "mongo/util/duration.h"
#include "mongo/util/periodic_runner.h"
#include "mongo/util/procparser.h"
#include <algorithm>
#include <iterator>
#include <string>
#include <utility>
#include <vector>
#include <fmt/format.h>
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kControl
namespace mongo {
namespace {
using otel::metrics::DynamicMetricNameMaker;
using otel::metrics::Gauge;
using otel::metrics::MetricsService;
using otel::metrics::MetricUnit;
struct MountGauges {
Gauge<int64_t>* capacity{nullptr};
Gauge<int64_t>* available{nullptr};
Gauge<int64_t>* free{nullptr};
};
struct MountOtelMetricsState {
std::unique_ptr<SystemMountMetrics> metrics;
PeriodicJobAnchor job;
};
const auto getMountOtelMetricsState = ServiceContext::declareDecoration<MountOtelMetricsState>();
constexpr StringData kMountInfoPath = "/proc/self/mountinfo"_sd;
// Sanitize a mount path for use as a metric name segment:
// "/" -> "root"
// "/data" -> "data"
// "/boot/efi" -> "boot.efi"
std::string sanitizeMountpoint(StringData path) {
if (path == "/") {
return "root";
}
// Strip leading slash and replace interior slashes with dots.
std::string result(path.substr(1));
std::replace(result.begin(), result.end(), '/', '.');
return result;
}
std::vector<std::string> enumerateMountpoints() {
BSONObjBuilder probe;
const Status status = procparser::parseProcSelfMountStatsFile(kMountInfoPath, &probe);
if (!status.isOK()) {
LOGV2_WARNING(12006900,
"Failed to enumerate mountpoints for OTel system mount metrics",
"error"_attr = status);
return {};
}
const BSONObj probeObj = probe.obj();
std::vector<std::string> mountpoints;
std::ranges::transform(probeObj, std::back_inserter(mountpoints), [](const BSONElement& entry) {
return std::string(entry.fieldName());
});
std::ranges::sort(mountpoints);
mountpoints.erase(std::ranges::unique(mountpoints).begin(), mountpoints.end());
return mountpoints;
}
std::unique_ptr<SystemMountMetrics> makeMetrics() {
auto mountpoints = enumerateMountpoints();
if (mountpoints.empty()) {
return nullptr;
}
try {
return std::make_unique<SystemMountMetrics>(std::move(mountpoints));
} catch (const DBException& ex) {
LOGV2_ERROR(
12006901, "Failed to register OTel system mount metrics", "error"_attr = ex.toStatus());
return nullptr;
}
}
} // namespace
class SystemMountMetrics::Impl {
public:
explicit Impl(std::vector<std::string> mountpoints) : _mountpoints(std::move(mountpoints)) {
_instruments.resize(_mountpoints.size());
for (size_t i = 0; i < _mountpoints.size(); ++i) {
const auto& mountpoint = _mountpoints[i];
const auto sanitized = sanitizeMountpoint(mountpoint);
const auto makeGauge =
[&](StringData field, std::string desc, MetricUnit unit) -> Gauge<int64_t>* {
std::string fullName = fmt::format("systemMetrics.mounts.{}.{}", sanitized, field);
return &MetricsService::instance().createInt64Gauge(
DynamicMetricNameMaker::make(StringData{fullName}), std::move(desc), unit);
};
_instruments[i].capacity =
makeGauge("capacity", "Total filesystem capacity in bytes", MetricUnit::kBytes);
_instruments[i].available =
makeGauge("available", "Filesystem space available in bytes", MetricUnit::kBytes);
_instruments[i].free =
makeGauge("free", "Total free filesystem space in bytes", MetricUnit::kBytes);
}
}
void update(const BSONObj& mountsBson) {
for (size_t i = 0; i < _mountpoints.size(); ++i) {
const BSONElement entry = mountsBson[_mountpoints[i]];
if (entry.type() != BSONType::object) {
continue;
}
const BSONObj stats = entry.Obj();
_instruments[i].capacity->set(stats["capacity"].safeNumberLong());
_instruments[i].available->set(stats["available"].safeNumberLong());
_instruments[i].free->set(stats["free"].safeNumberLong());
}
}
private:
std::vector<std::string> _mountpoints;
std::vector<MountGauges> _instruments;
};
SystemMountMetrics::SystemMountMetrics(std::vector<std::string> mountpoints)
: _impl(std::make_unique<Impl>(std::move(mountpoints))) {}
SystemMountMetrics::~SystemMountMetrics() = default;
void SystemMountMetrics::update(const BSONObj& mountsBson) {
_impl->update(mountsBson);
}
void installSystemMountOtelMetrics(ServiceContext* svcCtx) {
auto metrics = makeMetrics();
if (!metrics) {
return;
}
auto& state = getMountOtelMetricsState(svcCtx);
state.metrics = std::move(metrics);
state.job = svcCtx->getPeriodicRunner()->makeJob(PeriodicRunner::PeriodicJob{
"SystemMountOtelMetrics",
[&state](Client*) {
BSONObjBuilder builder;
Status s = procparser::parseProcSelfMountStatsFile(kMountInfoPath, &builder);
if (s.isOK()) {
state.metrics->update(builder.obj());
} else {
static logv2::SeveritySuppressor suppressor(
Minutes{1}, logv2::LogSeverity::Warning(), logv2::LogSeverity::Debug(3));
if (auto sev = suppressor(); shouldLog(MONGO_LOGV2_DEFAULT_COMPONENT, sev)) {
LOGV2_DEBUG(
12006902, sev.toInt(), "Failed to collect mount stats", "error"_attr = s);
}
}
},
Seconds(1),
false /*isKillableByStepdown*/});
state.job.start();
}
} // namespace mongo

View File

@ -0,0 +1,45 @@
/**
* Copyright (C) 2026-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#include "mongo/otel/metrics/instrumentation/system_mount_metrics.h"
namespace mongo {
class SystemMountMetrics::Impl {};
SystemMountMetrics::SystemMountMetrics(std::vector<std::string>)
: _impl(std::make_unique<Impl>()) {}
SystemMountMetrics::~SystemMountMetrics() = default;
void SystemMountMetrics::update(const BSONObj&) {}
void installSystemMountOtelMetrics(ServiceContext*) {}
} // namespace mongo

View File

@ -0,0 +1,124 @@
/**
* Copyright (C) 2026-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#include "mongo/otel/metrics/instrumentation/system_mount_metrics.h"
#include "mongo/bson/bsonobjbuilder.h"
#include "mongo/otel/metrics/metrics_test_util.h"
#include "mongo/unittest/unittest.h"
namespace mongo {
namespace {
using otel::metrics::DynamicMetricNameMaker;
using otel::metrics::OtelMetricsCapturer;
constexpr StringData kDataMount = "/data"_sd;
constexpr StringData kTmpMount = "/tmp"_sd;
// All tests register SystemMountMetrics with this fixed mountpoint set. The global MetricsService
// retains registrations across tests within the same binary, so every test must use the same set.
const std::vector<std::string> kTestMountpoints = {std::string(kDataMount)};
constexpr StringData kDataCapacity = "systemMetrics.mounts.data.capacity"_sd;
constexpr StringData kDataAvailable = "systemMetrics.mounts.data.available"_sd;
constexpr StringData kDataFree = "systemMetrics.mounts.data.free"_sd;
constexpr StringData kTmpCapacity = "systemMetrics.mounts.tmp.capacity"_sd;
BSONObj makeMountsBson(StringData mountpoint,
long long capacity,
long long available,
long long free) {
BSONObjBuilder b;
{
BSONObjBuilder sub(b.subobjStart(mountpoint));
sub.appendNumber("capacity", capacity);
sub.appendNumber("available", available);
sub.appendNumber("free", free);
}
return b.obj();
}
class SystemMountOtelMetricsTest : public unittest::Test {
protected:
void setUp() override {
if (!OtelMetricsCapturer::canReadMetrics()) {
GTEST_SKIP() << "Skipping test: OTel metrics unavailable on this platform";
}
}
OtelMetricsCapturer _capturer;
SystemMountMetrics _metrics{kTestMountpoints};
};
TEST_F(SystemMountOtelMetricsTest, UpdateSetsGaugeValues) {
_metrics.update(makeMountsBson(kDataMount, 1000, 400, 500));
ASSERT_EQ(_capturer.readInt64Gauge(DynamicMetricNameMaker::make(kDataCapacity)), 1000);
ASSERT_EQ(_capturer.readInt64Gauge(DynamicMetricNameMaker::make(kDataAvailable)), 400);
ASSERT_EQ(_capturer.readInt64Gauge(DynamicMetricNameMaker::make(kDataFree)), 500);
}
TEST_F(SystemMountOtelMetricsTest, UpdateIgnoresUndeclaredMountpoints) {
_metrics.update(makeMountsBson(kDataMount, 1000, 400, 500));
BSONObjBuilder both;
{
BSONObjBuilder sub(both.subobjStart(kDataMount));
sub.appendNumber("capacity", 2000LL);
sub.appendNumber("available", 800LL);
sub.appendNumber("free", 1000LL);
}
{
// /tmp was never registered — its data must be ignored.
BSONObjBuilder sub(both.subobjStart(kTmpMount));
sub.appendNumber("capacity", 500LL);
sub.appendNumber("available", 200LL);
sub.appendNumber("free", 250LL);
}
_metrics.update(both.obj());
ASSERT_EQ(_capturer.readInt64Gauge(DynamicMetricNameMaker::make(kDataCapacity)), 2000);
// /tmp was never registered, so its metric name does not exist in the service.
ASSERT_THROWS_CODE(_capturer.readInt64Gauge(DynamicMetricNameMaker::make(kTmpCapacity)),
DBException,
ErrorCodes::KeyNotFound);
}
TEST_F(SystemMountOtelMetricsTest, UpdateIsIdempotentForSameValues) {
BSONObj bson = makeMountsBson(kDataMount, 8000, 3000, 4000);
_metrics.update(bson);
_metrics.update(bson);
ASSERT_EQ(_capturer.readInt64Gauge(DynamicMetricNameMaker::make(kDataCapacity)), 8000);
}
} // namespace
} // namespace mongo

View File

@ -60,7 +60,6 @@ public:
*/
constexpr MetricName(StringData name, Passkey<MetricNameMaker>) : _name(name) {}
constexpr MetricName(StringData name, Passkey<disagg::MetricNameMaker>) : _name(name) {}
constexpr StringData getName() const {
return _name;
}
@ -79,6 +78,19 @@ class MONGO_MOD_FILE_PRIVATE MetricNameMaker{public : static constexpr MetricNam
} // namespace otel::metrics
}; // namespace mongo
/**
* Helper to create MetricName instances with runtime-constructed names (e.g. names that embed
* device names or mount paths discovered at startup). Requires N&O review since dynamic names
* cannot be audited at compile time.
*
* TODO(SERVER-127521): Ensure ServerStatusOptions is boost::none for any runtime metric.
*/
class MONGO_MOD_PUBLIC DynamicMetricNameMaker{
public : static MetricName make(StringData name){return MetricNameMaker::make(name);
}
}
;
/**
* Central registry of OpenTelemetry metric names used in the server. When adding a new metric to
* the server, please add an entry to MetricNames grouped under your team name.