SERVER-110442 SERVER-114337 Capture duration for last committed index build (#43531) (#45104)

Co-authored-by: Cedric Sirianni <cedric@sirianni.dev>
GitOrigin-RevId: 4cd153494e158413ec76b3b9b8838174aa0991d0
This commit is contained in:
Damian Wasilewicz 2025-12-11 09:00:31 -08:00 committed by MongoDB Bot
parent 73dadb8ec4
commit 9a7c840349
5 changed files with 237 additions and 10 deletions

View File

@ -0,0 +1,177 @@
/**
* Tests that serverStatus contains an indexBuilds section with lastCommittedMillis.
*
* @tags: [
* # Primary-driven index builds aren't resumable.
* primary_driven_index_builds_incompatible,
* requires_persistence,
* requires_replication,
* ]
*/
import {configureFailPoint} from "jstests/libs/fail_point_util.js";
import {afterEach, before, beforeEach, describe, it} from "jstests/libs/mochalite.js";
import {ReplSetTest} from "jstests/libs/replsettest.js";
import {extractUUIDFromObject} from "jstests/libs/uuid_util.js";
import {
IndexBuildTest,
ResumableIndexBuildTest
} from "jstests/noPassthrough/libs/index_builds/index_build.js";
const collName = "t";
const dbName = "test";
function getLastCommittedMillis(conn) {
const serverStatus = conn.serverStatus();
assert(serverStatus.hasOwnProperty("indexBuilds"),
"indexBuilds section missing: " + tojson(serverStatus));
const indexBuilds = serverStatus.indexBuilds;
assert(indexBuilds.hasOwnProperty("phases"), "phases missing: " + tojson(indexBuilds));
const phases = indexBuilds.phases;
assert(phases.hasOwnProperty("lastCommittedMillis"),
"lastCommittedMillis missing: " + tojson(phases));
return phases.lastCommittedMillis;
}
describe("indexBuilds serverStatus metrics", function() {
before(() => {
this.rst = new ReplSetTest({
nodes: 1,
});
});
beforeEach(() => {
this.rst.startSet();
this.rst.initiate();
this.primary = this.rst.getPrimary();
this.db = this.primary.getDB(dbName);
this.coll = this.db.getCollection(collName);
assert.commandWorked(this.coll.insertMany(Array.from({length: 10}, () => ({a: "foo"}))));
});
afterEach(() => {
this.rst.stopSet();
});
it("during steady state", () => {
let duration = getLastCommittedMillis(this.db);
assert.eq(
duration, 0, `Expected lastCommittedMillis to be 0 on startup, but got ${duration}`);
// Hang index build before completion to extend duration.
const fp = configureFailPoint(this.primary, "hangIndexBuildBeforeCommit");
const awaitCreateIndex =
IndexBuildTest.startIndexBuild(this.primary, this.coll.getFullName(), {a: 1});
// Initiate the failpoint and then sleep for 1000ms to ensure duration >= 1000ms.
fp.wait();
sleep(1000);
fp.off();
// Wait for the parallel shell to exit.
awaitCreateIndex();
duration = getLastCommittedMillis(this.db);
assert.gt(
duration,
1000,
`Expected lastCommittedMillis to be > 1000 after index build, but got ${duration}`);
});
it("on resume", () => {
let duration = getLastCommittedMillis(this.db);
assert.eq(
duration, 0, `Expected lastCommittedMillis to be 0 on startup, but got ${duration}`);
// Hang index build before completion to require resume.
const fp = configureFailPoint(this.primary, "hangIndexBuildBeforeCommit");
const awaitCreateIndex = IndexBuildTest.startIndexBuild(
this.primary,
this.coll.getFullName(),
{a: 1},
{} /* options */,
[ErrorCodes.InterruptedDueToReplStateChange],
);
// Ensure index build is in-progress before restarting, else it is not resumed.
fp.wait();
const buildUUID = extractUUIDFromObject(
IndexBuildTest
.assertIndexes(this.coll, 2, ["_id_"], ["a_1"], {includeBuildUUIDs: true})["a_1"]
.buildUUID,
);
// Restart to trigger resume.
this.rst.restart(0);
// Wait for the parallel shell to exit.
awaitCreateIndex();
// Connect to new primary.
this.primary = this.rst.getPrimary();
this.db = this.primary.getDB(dbName);
this.coll = this.db.getCollection(collName);
// Ensure index build is completed before reading metrics.
ResumableIndexBuildTest.assertCompleted(this.primary, this.coll, [buildUUID], ["a_1"]);
duration = getLastCommittedMillis(this.db);
assert.gt(duration,
0,
`Expected lastCommittedMillis to be > 0 after index build, but got ${duration}`);
});
it("on restart", function() {
let duration = getLastCommittedMillis(this.db);
assert.eq(
duration, 0, `Expected lastCommittedMillis to be 0 on startup, but got ${duration}`);
// Hang index build before completion to require restart.
const fp = configureFailPoint(this.primary, "hangIndexBuildBeforeCommit");
const awaitCreateIndex = IndexBuildTest.startIndexBuild(
this.primary,
this.coll.getFullName(),
{a: 1},
{} /* options */,
[ErrorCodes.InterruptedDueToReplStateChange],
);
// Ensure index build is in-progress before stopping and starting, else it is not restarted.
fp.wait();
const buildUUID = extractUUIDFromObject(
IndexBuildTest
.assertIndexes(this.coll, 2, ["_id_"], ["a_1"], {includeBuildUUIDs: true})["a_1"]
.buildUUID,
);
// Kill process to require index build to start over. The `forRestart` flag is required to
// preserve state between stop and start.
this.rst.stop(
0, 9 /* signal */, {allowedExitCode: MongoRunner.EXIT_SIGKILL}, {forRestart: true});
// Wait for the parallel shell to exit.
awaitCreateIndex({checkExitStatus: false});
this.rst.start(0, {} /* options */, true /* restart */);
// Connect to new primary.
this.primary = this.rst.getPrimary();
this.db = this.primary.getDB(dbName);
this.coll = this.db.getCollection(collName);
// Ensure index build is completed before reading metrics.
ResumableIndexBuildTest.assertCompleted(this.primary, this.coll, [buildUUID], ["a_1"]);
duration = getLastCommittedMillis(this.db);
assert.gt(duration,
0,
`Expected lastCommittedMillis to be > 0 after index build, but got ${duration}`);
});
});

View File

@ -1,5 +1,5 @@
/**
* Tests that serverStatus contains an indexBuilder section. This section reports
* Tests that serverStatus contains an indexBulkBuilder section. This section reports
* globally-aggregated statistics about index builds and the external sorter.
*
* @tags: [

View File

@ -159,6 +159,7 @@ public:
phases.append("processConstraintsViolatonTableOnCommit",
processConstraintsViolatonTableOnCommit.loadRelaxed());
phases.append("commit", commit.loadRelaxed());
phases.append("lastCommittedMillis", lastCommittedMillis.loadRelaxed());
phases.done();
return indexBuilds.obj();
@ -173,6 +174,8 @@ public:
AtomicWord<int> drainSideWritesTableOnCommit{0};
AtomicWord<int> processConstraintsViolatonTableOnCommit{0};
AtomicWord<int> commit{0};
// The duration of the last committed index build.
AtomicWord<int64_t> lastCommittedMillis{0};
};
auto& indexBuildsSSS = *ServerStatusSectionBuilder<IndexBuildsSSS>("indexBuilds").forShard();
@ -523,6 +526,17 @@ AutoGetCollection::Options makeAutoGetCollectionOptions(
Lock::GlobalLockOptions{.skipRSTLLock = skipRSTL, .explicitIntent = explicitIntent});
}
/**
* Stores the duration of the most recently committed index build in the indexBuilds server
* status section.
*/
void storeLastCommittedDuration(const ReplIndexBuildState& replState) {
const auto metrics = replState.getIndexBuildMetrics();
const auto now = Date_t::now();
const auto elapsedTime = (now - metrics.startTime).count();
indexBuildsSSS.lastCommittedMillis.store(elapsedTime);
}
} // namespace
const auto getIndexBuildsCoord =
@ -703,8 +717,13 @@ Status IndexBuildsCoordinator::_startIndexBuildForRecovery(OperationContext* opC
}
}
auto replIndexBuildState = std::make_shared<ReplIndexBuildState>(
buildUUID, collWriter->uuid(), collWriter->ns().dbName(), specs, indexIdents, protocol);
auto replIndexBuildState = std::make_shared<ReplIndexBuildState>(buildUUID,
collWriter->uuid(),
collWriter->ns().dbName(),
specs,
indexIdents,
protocol,
Date_t::now());
Status status = activeIndexBuilds.registerIndexBuild(replIndexBuildState);
if (!status.isOK()) {
@ -841,8 +860,13 @@ Status IndexBuildsCoordinator::_setUpResumeIndexBuild(OperationContext* opCtx,
}
auto protocol = IndexBuildProtocol::kTwoPhase;
auto replIndexBuildState = std::make_shared<ReplIndexBuildState>(
buildUUID, collection->uuid(), dbName, specs, std::vector<std::string>{}, protocol);
auto replIndexBuildState = std::make_shared<ReplIndexBuildState>(buildUUID,
collection->uuid(),
dbName,
specs,
std::vector<std::string>{},
protocol,
Date_t::now());
Status status = activeIndexBuilds.registerIndexBuild(replIndexBuildState);
if (!status.isOK()) {
@ -2400,7 +2424,7 @@ IndexBuildsCoordinator::_filterSpecsAndRegisterBuild(OperationContext* opCtx,
}
auto replIndexBuildState = std::make_shared<ReplIndexBuildState>(
buildUUID, collectionUUID, dbName, filteredSpecs, filteredIdents, protocol);
buildUUID, collectionUUID, dbName, filteredSpecs, filteredIdents, protocol, Date_t::now());
replIndexBuildState->stats.numIndexesBefore = getNumIndexesTotal(opCtx, collection.get());
auto status = activeIndexBuilds.registerIndexBuild(replIndexBuildState);
@ -3199,7 +3223,7 @@ IndexBuildsCoordinator::CommitResult IndexBuildsCoordinator::_insertKeysFromSide
if (MONGO_unlikely(hangIndexBuildBeforeCommit.shouldFail())) {
LOGV2(4841706, "Hanging before committing index build");
hangIndexBuildBeforeCommit.pauseWhileSet();
hangIndexBuildBeforeCommit.pauseWhileSet(opCtx);
}
// Need to return the collection lock back to exclusive mode to complete the index build.
@ -3295,7 +3319,9 @@ IndexBuildsCoordinator::CommitResult IndexBuildsCoordinator::_insertKeysFromSide
opCtx, collection.get(), replState->buildUUID));
}
}
indexBuildsSSS.commit.addAndFetch(1);
storeLastCommittedDuration(*replState);
// If two phase index builds is enabled, index build will be coordinated using
// startIndexBuild and commitIndexBuild oplog entries.

View File

@ -204,14 +204,16 @@ ReplIndexBuildState::ReplIndexBuildState(const UUID& indexBuildUUID,
const DatabaseName& dbName,
const std::vector<BSONObj>& specs,
const std::vector<std::string>& idents,
IndexBuildProtocol protocol)
IndexBuildProtocol protocol,
Date_t startTime)
: buildUUID(indexBuildUUID),
collectionUUID(collUUID),
dbName(dbName),
indexNames(extractIndexNames(specs)),
indexSpecs(specs),
indexIdents(idents),
protocol(protocol) {
protocol(protocol),
_metrics(IndexBuildMetrics{.startTime = startTime}) {
_waitForNextAction = std::make_unique<SharedPromise<IndexBuildAction>>();
if (protocol == IndexBuildProtocol::kTwoPhase)
commitQuorumLock.emplace(indexBuildUUID.toString());
@ -722,6 +724,11 @@ void ReplIndexBuildState::appendBuildInfo(BSONObjBuilder* builder) const {
_indexBuildState.appendBuildInfo(builder);
}
IndexBuildMetrics ReplIndexBuildState::getIndexBuildMetrics() const {
stdx::lock_guard lk(_mutex);
return _metrics;
}
bool ReplIndexBuildState::_shouldSkipIndexBuildStateTransitionCheck(OperationContext* opCtx) const {
const auto replCoord = repl::ReplicationCoordinator::get(opCtx);
if (replCoord->getSettings().isReplSet() && protocol == IndexBuildProtocol::kTwoPhase) {

View File

@ -295,6 +295,14 @@ private:
Status _abortStatus = Status::OK();
};
/**
* Tracks metrics for an index build of one or more indexes.
*/
struct IndexBuildMetrics {
// The time at which the index build begins.
Date_t startTime;
};
/**
* Tracks the cross replica set progress of a particular index build identified by a build UUID.
*
@ -312,7 +320,8 @@ public:
const DatabaseName& dbName,
const std::vector<BSONObj>& specs,
const std::vector<std::string>& idents,
IndexBuildProtocol protocol);
IndexBuildProtocol protocol,
Date_t startTime);
/**
* The index build thread has been scheduled, from now on it should be possible to interrupt the
@ -527,6 +536,11 @@ public:
*/
void appendBuildInfo(BSONObjBuilder* builder) const;
/**
* Returns the metrics for this index build.
*/
IndexBuildMetrics getIndexBuildMetrics() const;
// Uniquely identifies this index build across replica set members.
const UUID buildUUID;
@ -632,6 +646,9 @@ private:
// Set once before attempting to vote for commit readiness.
bool _votedForCommitReadiness = false;
// Metrics for this index build. Used for server status reporting.
IndexBuildMetrics _metrics;
};
} // namespace mongo