mongo/jstests/replsets/initial_sync_replSetGetStatus.js
Denis Trailin 266ab6d110 SERVER-121218: Re-enable Initial Sync stats in FTDC (#51017)
GitOrigin-RevId: 29cd2eb832e7029939c9ec5fdbd5b19207f440d4
2026-04-21 21:03:51 +00:00

250 lines
11 KiB
JavaScript

/**
* This test tests that replSetGetStatus returns initial sync stats while initial sync is in
* progress.
*/
import {configureFailPoint} from "jstests/libs/fail_point_util.js";
import {ReplSetTest} from "jstests/libs/replsettest.js";
import {FeatureFlagUtil} from "jstests/libs/feature_flag_util.js";
let name = "initial_sync_replSetGetStatus";
let replSet = new ReplSetTest({
name: name,
nodes: 1,
});
replSet.startSet();
replSet.initiate();
let primary = replSet.getPrimary();
const barColl = primary.getDB("pretest").bar;
assert.commandWorked(barColl.insert({a: 1}));
assert.commandWorked(barColl.insert({a: 2}));
assert.commandWorked(barColl.insert({a: 3}));
let coll = primary.getDB("test").foo;
assert.commandWorked(coll.insert({a: 1}));
assert.commandWorked(coll.insert({a: 2}));
// Add a secondary node but make it hang before copying databases.
let secondary = replSet.add({rsConfig: {votes: 0, priority: 0}, setParameter: {"collectionClonerBatchSize": 2}});
secondary.setSecondaryOk();
const failPointBeforeCopying = configureFailPoint(secondary, "initialSyncHangBeforeCopyingDatabases");
const failPointBeforeFinish = configureFailPoint(secondary, "initialSyncHangBeforeFinish");
const failPointAfterFinish = configureFailPoint(secondary, "initialSyncHangAfterFinish");
let failPointAfterNumDocsCopied = configureFailPoint(secondary, "initialSyncHangDuringCollectionClone", {
namespace: barColl.getFullName(),
numDocsToClone: 2,
});
replSet.reInitiate();
// Wait for initial sync to pause before it copies the databases.
failPointBeforeCopying.wait();
// Test that replSetGetStatus returns the correct results while initial sync is in progress.
let res = assert.commandWorked(secondary.adminCommand({replSetGetStatus: 1}));
assert(res.initialSyncStatus, () => "Response should have an 'initialSyncStatus' field: " + tojson(res));
res = assert.commandWorked(secondary.adminCommand({replSetGetStatus: 1, initialSync: 0}));
assert(!res.initialSyncStatus, () => "Response should not have an 'initialSyncStatus' field: " + tojson(res));
assert.commandFailedWithCode(secondary.adminCommand({replSetGetStatus: 1, initialSync: "t"}), ErrorCodes.TypeMismatch);
// Test that initialSync: 2 (summary mode) returns initialSyncStatus.
res = assert.commandWorked(secondary.adminCommand({replSetGetStatus: 1, initialSync: 2}));
assert(
res.initialSyncStatus,
() => "Response with initialSync: 2 should have 'initialSyncStatus' field: " + tojson(res),
);
// At this early stage (before copying databases), the databases section has aggregate counts
// but no per-database sub-objects. Any per-database sub-objects that do appear are a bug.
if (res.initialSyncStatus.databases) {
assert(
!res.initialSyncStatus.databases.hasOwnProperty("pretest"),
"Summary should not have per-database 'pretest' sub-object before cloning: " +
tojson(res.initialSyncStatus.databases),
);
}
assert.commandWorked(coll.insert({a: 3}));
assert.commandWorked(coll.insert({a: 4}));
// Let initial sync continue working.
failPointBeforeCopying.off();
// Wait for initial sync to pause halfway through cloning the 'pretest.bar' collection.
failPointAfterNumDocsCopied.wait();
const pretestDbRes = assert.commandWorked(secondary.adminCommand({replSetGetStatus: 1}));
assert.gt(pretestDbRes.initialSyncStatus.totalInitialSyncElapsedMillis, 0);
assert.gt(pretestDbRes.initialSyncStatus.remainingInitialSyncEstimatedMillis, 0);
assert.gt(pretestDbRes.initialSyncStatus.approxTotalDataSize, 0);
assert.eq(pretestDbRes.initialSyncStatus.databases.pretest.collections, 1);
assert.eq(pretestDbRes.initialSyncStatus.databases.pretest.clonedCollections, 0);
let barCollRes = pretestDbRes.initialSyncStatus.databases.pretest["pretest.bar"];
assert.eq(barCollRes.documentsToCopy, 3);
// Even though we set the collectionClonerBatchSize to 2, it is possible for a batch to actually
// return only 1 document. This can lead to us hitting the failpoint in the next batch instead,
// causing us to copy up to 3 documents.
assert.lte(barCollRes.documentsCopied, 3);
assert.gt(barCollRes.bytesToCopy, 0);
assert.gt(barCollRes.approxBytesCopied, 0);
assert.lte(barCollRes.approxBytesCopied, barCollRes.bytesToCopy);
assert.lt(barCollRes.approxBytesCopied, pretestDbRes.initialSyncStatus.approxTotalDataSize);
const bytesCopiedAdminDb =
pretestDbRes.initialSyncStatus.databases.admin["admin.system.version"].approxBytesCopied +
pretestDbRes.initialSyncStatus.databases.admin["admin.system.keys"].approxBytesCopied;
// Skip size assertions when the replicated size and count feature is enabled since size accounting is different.
if (!FeatureFlagUtil.isPresentAndEnabled(primary.getDB("test"), "ReplicatedFastCount")) {
assert.eq(pretestDbRes.initialSyncStatus.approxTotalBytesCopied, bytesCopiedAdminDb + barCollRes.approxBytesCopied);
assert.gt(pretestDbRes.initialSyncStatus.approxTotalBytesCopied, 0);
}
// The server still has the 'pretest' and 'test' dbs to finish cloning.
assert.eq(pretestDbRes.initialSyncStatus.databases.databasesCloned, 2);
assert.eq(pretestDbRes.initialSyncStatus.databases.databasesToClone, 2);
// Test summary mode (initialSync: 2) during mid-clone.
const summaryRes = assert.commandWorked(secondary.adminCommand({replSetGetStatus: 1, initialSync: 2}));
assert(
summaryRes.initialSyncStatus,
() => "Summary response should have 'initialSyncStatus' field: " + tojson(summaryRes),
);
const summaryDbs = summaryRes.initialSyncStatus.databases;
// Summary should have aggregate counts.
assert(summaryDbs.hasOwnProperty("databasesToClone"), "Should have databasesToClone: " + tojson(summaryDbs));
assert(summaryDbs.hasOwnProperty("databasesCloned"), "Should have databasesCloned: " + tojson(summaryDbs));
assert(summaryDbs.hasOwnProperty("collectionsToClone"), "Should have collectionsToClone: " + tojson(summaryDbs));
assert(summaryDbs.hasOwnProperty("collectionsCloned"), "Should have collectionsCloned: " + tojson(summaryDbs));
// Summary should NOT have per-database sub-objects.
assert(
!summaryDbs.hasOwnProperty("pretest"),
"Summary should not have per-database 'pretest' sub-object: " + tojson(summaryDbs),
);
assert(
!summaryDbs.hasOwnProperty("admin"),
"Summary should not have per-database 'admin' sub-object: " + tojson(summaryDbs),
);
// Compare with full response which should have per-database detail.
assert(
pretestDbRes.initialSyncStatus.databases.hasOwnProperty("pretest"),
"Full response should have per-database 'pretest' sub-object: " + tojson(pretestDbRes.initialSyncStatus.databases),
);
assert(
pretestDbRes.initialSyncStatus.databases.pretest.hasOwnProperty("pretest.bar"),
"Full response should have per-collection detail: " + tojson(pretestDbRes.initialSyncStatus.databases.pretest),
);
// Summary top-level fields should match full response.
assert.eq(
summaryRes.initialSyncStatus.failedInitialSyncAttempts,
pretestDbRes.initialSyncStatus.failedInitialSyncAttempts,
);
assert.eq(
summaryRes.initialSyncStatus.maxFailedInitialSyncAttempts,
pretestDbRes.initialSyncStatus.maxFailedInitialSyncAttempts,
);
failPointAfterNumDocsCopied.off();
// Wait for initial sync to pause right before it finishes.
failPointBeforeFinish.wait();
// Test that replSetGetStatus returns the correct results when initial sync is at the very end.
const endOfCloningRes = assert.commandWorked(secondary.adminCommand({replSetGetStatus: 1}));
assert(endOfCloningRes.initialSyncStatus, () => "Response should have an 'initialSyncStatus' field: " + tojson(res));
// It is possible that we update the config document after going through a reconfig. So make sure
// we account for this.
assert.gte(endOfCloningRes.initialSyncStatus.appliedOps, 3);
// Assert metrics have progressed in the right direction since the last time we checked the metrics.
assert.gt(
endOfCloningRes.initialSyncStatus.totalInitialSyncElapsedMillis,
pretestDbRes.initialSyncStatus.totalInitialSyncElapsedMillis,
);
assert.lt(
endOfCloningRes.initialSyncStatus.remainingInitialSyncEstimatedMillis,
pretestDbRes.initialSyncStatus.remainingInitialSyncEstimatedMillis,
);
assert.gt(
endOfCloningRes.initialSyncStatus.approxTotalBytesCopied,
pretestDbRes.initialSyncStatus.approxTotalBytesCopied,
);
assert.eq(endOfCloningRes.initialSyncStatus.approxTotalDataSize, pretestDbRes.initialSyncStatus.approxTotalDataSize);
assert.eq(endOfCloningRes.initialSyncStatus.failedInitialSyncAttempts, 0);
assert.eq(endOfCloningRes.initialSyncStatus.maxFailedInitialSyncAttempts, 10);
assert.eq(endOfCloningRes.initialSyncStatus.databases.databasesCloned, 4);
assert.eq(endOfCloningRes.initialSyncStatus.databases.databasesToClone, 0);
assert.eq(endOfCloningRes.initialSyncStatus.databases.pretest.collections, 1);
assert.eq(endOfCloningRes.initialSyncStatus.databases.pretest.clonedCollections, 1);
barCollRes = endOfCloningRes.initialSyncStatus.databases.pretest["pretest.bar"];
assert.eq(barCollRes.documentsToCopy, 3);
assert.eq(barCollRes.documentsCopied, 3);
assert.eq(barCollRes.indexes, 1);
assert.eq(barCollRes.fetchedBatches, 2);
assert.gt(barCollRes.bytesToCopy, 0);
assert.eq(barCollRes.approxBytesCopied, barCollRes.bytesToCopy);
let fooCollRes = endOfCloningRes.initialSyncStatus.databases.test["test.foo"];
assert.eq(endOfCloningRes.initialSyncStatus.databases.test.collections, 1);
assert.eq(endOfCloningRes.initialSyncStatus.databases.test.clonedCollections, 1);
assert.eq(fooCollRes.documentsToCopy, 4);
assert.eq(fooCollRes.documentsCopied, 4);
assert.eq(fooCollRes.indexes, 1);
assert.eq(fooCollRes.fetchedBatches, 2);
assert.gt(fooCollRes.bytesToCopy, 0);
assert.eq(fooCollRes.approxBytesCopied, fooCollRes.bytesToCopy);
// Skip size assertions when the replicated size and count feature is enabled since size accounting is different.
if (!FeatureFlagUtil.isPresentAndEnabled(primary.getDB("test"), "ReplicatedFastCount")) {
assert.eq(
endOfCloningRes.initialSyncStatus.approxTotalDataSize,
endOfCloningRes.initialSyncStatus.approxTotalBytesCopied,
);
assert.eq(
endOfCloningRes.initialSyncStatus.approxTotalBytesCopied,
fooCollRes.approxBytesCopied + barCollRes.approxBytesCopied + bytesCopiedAdminDb,
);
}
failPointBeforeFinish.off();
// Wait until the 'initialSync' field has been cleared before issuing 'replSetGetStatus'.
failPointAfterFinish.wait();
// Test that replSetGetStatus returns the correct results after initial sync is finished.
res = assert.commandWorked(secondary.adminCommand({replSetGetStatus: 1}));
assert(!res.initialSyncStatus, () => "Response should not have an 'initialSyncStatus' field: " + tojson(res));
assert.commandFailedWithCode(secondary.adminCommand({replSetGetStatus: 1, initialSync: "m"}), ErrorCodes.TypeMismatch);
// After initial sync completes, summary mode should also not have initialSyncStatus.
res = assert.commandWorked(secondary.adminCommand({replSetGetStatus: 1, initialSync: 2}));
assert(
!res.initialSyncStatus,
() => "After initial sync, response with initialSync: 2 should not have 'initialSyncStatus' field: " + tojson(res),
);
// Let initial sync finish and get into secondary state.
failPointAfterFinish.off();
replSet.awaitSecondaryNodes(60 * 1000);
assert.eq(
0,
secondary.getDB("local")["temp_oplog_buffer"].find().itcount(),
"Oplog buffer was not dropped after initial sync",
);
replSet.stopSet();