SERVER-116091 Explicitly replicate wildcard multikeyness in txns (#51970)

GitOrigin-RevId: 9f85874cd282b9fc3273b01c00b2522de64dc9d8
This commit is contained in:
Yujin Kang Park 2026-05-26 12:48:04 +02:00 committed by MongoDB Bot
parent 1adc42dcf2
commit 26d3dfa093
24 changed files with 2746 additions and 153 deletions

View File

@ -3,3 +3,6 @@ filters:
- "*":
approvers:
- 10gen/server-catalog-and-routing-ddl
- "/multikey_timestamp_consistency.js":
approvers:
- 10gen/server-catalog-and-routing-shard-catalog

View File

@ -0,0 +1,958 @@
/**
* Tests timestamp consistency of multikey metadata across replicas under concurrent load.
*
* Several kinds of multikey-triggering operations run concurrently:
*
* Transactional (expected to be timestamp-consistent):
* 1. createCollAndInsertWildcardMkInTxn: Creates a NEW collection + wildcard index + multikey
* insert in one transaction. Exercises the path where a side txn can't see the index.
* 2. createCollAndInsertRegularMkInTxn: Same but with a regular {a: 1} index.
* 3. createCollAndUpdateWildcardMkInTxn: Creates a NEW collection + wildcard index + scalar
* insert + update-to-array in one transaction. Exercises the update-path multikey trigger
* when the index was created in the same txn (side txn can't see the index).
* 4. createCollAndUpdateRegularMkInTxn: Same but with a regular {a: 1} index.
* 5. insertWildcardMkInTxn: N (1-5) multikey inserts on distinct fields (a shuffled subset of
* INDEXED_FIELDS) into a pre-existing wildcard-indexed collection in a single transaction.
* Exercises both single-statement and multi-statement side-write accumulation across
* distinct multikey paths.
* 6. insertRegularMkInTxn: N (1-5) multikey inserts into a pre-existing regular-indexed
* collection that has a separate {field: 1} index per field in INDEXED_FIELDS. Each
* statement targets a distinct field so each insert exercises a distinct index's multikey
* path.
* 7. updateWildcardMkInTxn: N (1-5) updates of pre-inserted scalar fields to arrays inside a
* transaction against a pre-existing wildcard-indexed collection. Exercises the update-path
* multikey transition across distinct paths.
* 8. updateRegularMkInTxn: N (1-5) updates of pre-inserted scalars to arrays inside a
* transaction against the same multi-index regular collection as state 6, with each
* statement targeting a distinct field.
*
* Transactional states 5-8 randomly prepare the transaction before commit with probability
* `prepareProbability`, exercising both unprepared and prepared commit paths.
*
* TODO (SERVER-110904): Amend comment below and remove 'shouldAssert' parameter to always assert.
* Non-transactional (NOT expected to be timestamp-consistent yet):
* 9. insertWildcardMkDirect: N (1-5) non-transactional multikey inserts on distinct fields into
* a pre-existing wildcard-indexed collection. Adjacent inserts may be batched together
* during oplog application on the secondary, exercising batched multikey-metadata apply.
* Logs inconsistencies but does not assert.
* 10. insertRegularMkDirect: N (1-5) non-transactional multikey inserts into a pre-existing
* regular-indexed collection that has a separate {field: 1} index per field in
* INDEXED_FIELDS, with each insert targeting a distinct field. Adjacent inserts may
* be batched together during oplog application on the secondary. Logs inconsistencies but
* does not assert.
* 11. updateWildcardMkDirect: N (1-5) non-transactional updates of pre-inserted scalar fields
* to arrays against a pre-existing wildcard-indexed collection. Adjacent updates may be
* batched together during oplog application on the secondary. Logs inconsistencies but
* does not assert.
* 12. updateRegularMkDirect: N (1-5) non-transactional updates of pre-inserted scalars to
* arrays against the same multi-index regular collection as state 10. Adjacent updates may
* be batched together during oplog application on the secondary. Logs inconsistencies but
* does not assert.
* 13. dropAndRecreateIndex: Non-transactionally drops a random index (wildcard or one of the
* regular per-field indexes) on a random pool collection and immediately recreates it.
* Stresses the interaction between index lifecycle and concurrent multikey side-writes
* from other threads. Tolerates IndexNotFound (another thread may have dropped first).
* 14. dropAndRecreateColl: Non-transactionally drops a random pool collection and recreates it
* with the full set of wildcard + regular indexes. Stresses catalog churn against
* concurrent multikey side-writes. Other pool-using states tolerate NamespaceNotFound and
* CollectionUUIDMismatch via isCollDropError when their writes race with this state.
*
* 15. insertNoise: Non-transactional insert to an unrelated collection for batch pressure.
*
* After each operation, the thread verifies that the catalog multikey flag (and index-specific
* multikey paths) are consistent between primary and secondary at the operation's timestamp
* and the preceding timestamp.
*
* @tags: [
* uses_transactions,
* featureFlagReplicateMultikeynessInTransactions,
* requires_replication,
* does_not_support_stepdowns,
* # Suites converting to txns would break our assumption about oplog format of inserts.
* does_not_support_transactions,
* # The workload's internal contention plus a per-test runtime in the tens of minutes exceeds
* # the burn-in budget when layered under simultaneous FSM workloads.
* incompatible_with_concurrency_simultaneous,
* ]
*/
import {withTxnAndAutoRetry} from "jstests/concurrency/fsm_workload_helpers/auto_retry_transaction.js";
import {PersistenceProviderUtil} from "jstests/libs/server-rss/persistence_provider_util.js";
export const $config = (function () {
const collPrefix = "mk_ts_";
function poolCollName(idx) {
return collPrefix + "pool_" + idx;
}
function pickPoolColl() {
return poolCollName(Random.randInt(POOL_SIZE));
}
// Returns true if `e` is a transient error caused by a concurrent dropAndRecreateColl on
// the target collection. Callers tolerate it by returning early without verification.
// NamespaceNotFound: a non-txn write hit the post-drop, pre-recreate window.
// CollectionUUIDMismatch: a non-txn write referenced the pre-drop UUID after recreate.
// OperationNotSupportedInTransaction: an in-txn write implicitly created the dropped
// collection, then prepare/commit rejected because distributed txns disallow implicit
// collection creation.
function isCollDropError(e) {
return (
e.code === ErrorCodes.NamespaceNotFound ||
e.code === ErrorCodes.CollectionUUIDMismatch ||
e.code === ErrorCodes.OperationNotSupportedInTransaction
);
}
// Returns true and best-effort aborts any leftover transaction on the session if `e` is a
// tolerable coll-drop race; returns false otherwise. withTxnAndAutoRetry skips the abort
// path when an error is raised by prepare/commit (treated as a commit error of unknown
// outcome), which leaves the session with an active transaction marker. Aborting here
// guarantees the next state can start a fresh transaction. NoSuchTransaction is expected
// when there was no leftover transaction (e.g. for non-txn states).
function tolerateCollDropError(state, e) {
if (!isCollDropError(e)) return false;
assert.commandWorkedOrFailedWithCode(state.session.abortTransaction_forTesting(), ErrorCodes.NoSuchTransaction);
return true;
}
const IndexType = Object.freeze({REGULAR: "regular", WILDCARD: "wildcard"});
// Number of pre-existing collections in the shared pool. Each pool collection has both a
// wildcard {"$**": 1} index and one {field: 1} regular index per field in
// REGULAR_INDEX_FIELDS. States pick a random pool collection on each invocation so threads
// contend on the same multikey metadata, exercising concurrent setMultikey paths.
const POOL_SIZE = 2;
// Fields exercised by regular-index states. Each gets its own {field: 1} index on every
// pool collection.
const REGULAR_INDEX_FIELDS = ["a", "b", "c", "d", "e"];
// Fields exercised by wildcard-index states. Overlap with REGULAR_INDEX_FIELDS on "d" and
// "e" so a single op can trigger multikey transitions on both index types simultaneously.
// The wildcard {"$**": 1} index physically covers any field, so f/g/h hit only the
// wildcard index while a/b/c hit only the regular indexes.
const WILDCARD_FIELDS = ["d", "e", "f", "g", "h"];
// --- Shared helpers ---
function getCatalogMetadata(node, dbName, collName, ts) {
try {
const catalog = node
.getDB("admin")
.aggregate([{$listCatalog: {}}, {$match: {db: dbName, name: collName}}], {
readConcern: {level: "snapshot", atClusterTime: ts},
})
.toArray();
if (catalog.length === 0) return null;
return catalog[0]["md"]["indexes"];
} catch (e) {
return undefined;
}
}
// Locates the index entry that covers `fieldName` for the given index type.
// Wildcard: matches the unique index with keyPattern {"$**": 1}.
// Regular: matches a single-field index whose keyPattern is exactly {fieldName: 1}.
function findIndexForField(indexes, indexType, fieldName) {
if (indexType === IndexType.WILDCARD) {
return indexes.find((idx) => idx.spec && idx.spec.key && idx.spec.key["$**"] === 1);
}
return indexes.find((idx) => {
const key = idx.spec && idx.spec.key;
if (!key) return false;
const keys = Object.keys(key);
return keys.length === 1 && key[fieldName] === 1;
});
}
function getWildcardMultikeyPaths(node, dbName, collName, ts, hint, fieldName) {
try {
const filter = {};
filter[fieldName] = 1;
const explain = node.getDB(dbName).runCommand({
explain: {find: collName, filter: filter, hint: hint},
readConcern: {level: "snapshot", atClusterTime: ts},
verbosity: "queryPlanner",
});
if (!explain.ok) return undefined;
// Walks the explain tree to find the wildcard IXSCAN. Handles both engines:
// - Classic: winningPlan.{stage:..., inputStage:{...}}
// - SBE: winningPlan.queryPlan.{stage:..., inputStage:{...}}
// SBE wraps the QuerySolutionNode tree under a 'queryPlan' field; recurse into it.
function findWcIxscan(plan) {
if (!plan) return null;
if (plan.stage === "IXSCAN" && plan.keyPattern && plan.keyPattern["$_path"]) return plan;
if (plan.queryPlan) {
const r = findWcIxscan(plan.queryPlan);
if (r) return r;
}
if (plan.inputStage) return findWcIxscan(plan.inputStage);
if (plan.inputStages) {
for (const s of plan.inputStages) {
const r = findWcIxscan(s);
if (r) return r;
}
}
return null;
}
const ixscan = findWcIxscan(explain.queryPlanner.winningPlan);
if (!ixscan) return [];
return ixscan.multiKeyPaths && ixscan.multiKeyPaths[fieldName] ? ixscan.multiKeyPaths[fieldName] : [];
} catch (e) {
return undefined;
}
}
const OPLOG_SCAN_LOST_MSG =
"Skipping multikey consistency check: oplog scan lost capped position (concurrent oplog rotation)";
// Returns the applyOps oplog entry whose `o.applyOps` array contains an element matching
// `elemMatch`, or null if the entry is absent (e.g. the txn committed with a no-op update
// because dropAndRecreateColl removed its target doc) or the oplog got truncated
// (CappedPositionLost). Callers handle null by skipping verification. The outer `op: "c"`
// restricts the match to applyOps txn entries and excludes any standalone CRUD entry.
function findTxnOplogEntry(primary, elemMatch) {
try {
return primary.getDB("local").oplog.rs.findOne({
op: "c",
"o.applyOps": {$elemMatch: elemMatch},
});
} catch (e) {
if (e.code === ErrorCodes.CappedPositionLost) {
jsTestLog(OPLOG_SCAN_LOST_MSG + ": " + tojson(e));
return null;
}
throw e;
}
}
// Returns the oplog entry matching `match`, or null if the entry is absent (e.g. the op
// was a no-op update because dropAndRecreateColl removed its target doc) or the oplog got
// truncated (CappedPositionLost). Callers handle null by skipping verification.
function findOplogEntry(primary, match) {
try {
return primary.getDB("local").oplog.rs.findOne(match);
} catch (e) {
if (e.code === ErrorCodes.CappedPositionLost) {
jsTestLog(OPLOG_SCAN_LOST_MSG + ": " + tojson(e));
return null;
}
throw e;
}
}
// Returns the timestamp of the most recent oplog entry strictly before `ts`, or null if the
// oplog truncated (CappedPositionLost).
function findPrevTimestamp(primary, ts) {
let prev;
try {
prev = primary
.getDB("local")
.oplog.rs.find({ts: {$lt: ts}})
.sort({ts: -1})
.limit(1)
.toArray()[0];
assert(prev, "Could not find a previous oplog entry before ts: " + tojson(ts));
} catch (e) {
if (e.code === ErrorCodes.CappedPositionLost) {
jsTestLog(OPLOG_SCAN_LOST_MSG + ": " + tojson(e));
return null;
}
throw e;
}
return prev ? prev.ts : null;
}
// Checks multikey metadata consistency. If shouldAssert is false, logs mismatches
// instead of asserting (for non-txn paths where consistency is not yet guaranteed).
function verifyMultikeyConsistency(
primary,
secondary,
dbName,
collName,
indexType,
wildcardHint,
fieldNames,
ts1,
ts2,
shouldAssert,
) {
for (const ts of [ts1, ts2]) {
const pIndexes = getCatalogMetadata(primary, dbName, collName, ts);
const sIndexes = getCatalogMetadata(secondary, dbName, collName, ts);
if (pIndexes === undefined || sIndexes === undefined) continue;
// Both must agree on collection existence.
if (!pIndexes || !sIndexes) {
if (shouldAssert) {
assert.eq(
!pIndexes,
!sIndexes,
"Catalog entry presence mismatch at ts " + tojson(ts) + " for " + collName,
);
} else if (!!pIndexes !== !!sIndexes) {
jsTestLog(
"NON-TXN inconsistency (expected): catalog presence mismatch at " +
tojson(ts) +
" for " +
collName,
);
}
continue;
}
// Compare per-index multikey flag and paths for each field.
for (const fieldName of fieldNames) {
const pIdx = findIndexForField(pIndexes, indexType, fieldName);
const sIdx = findIndexForField(sIndexes, indexType, fieldName);
// Index may not exist at this ts yet (e.g., prevTs before createIndex). Skip.
if (!pIdx || !sIdx) continue;
if (pIdx.multikey !== sIdx.multikey) {
if (shouldAssert) {
assert.eq(
pIdx.multikey,
sIdx.multikey,
"Index multikey flag mismatch at ts " +
tojson(ts) +
" for " +
collName +
" field " +
fieldName,
);
} else {
jsTestLog(
"NON-TXN inconsistency (expected): index flag mismatch at " +
tojson(ts) +
" for " +
collName +
" field " +
fieldName +
": primary=" +
pIdx.multikey +
", secondary=" +
sIdx.multikey,
);
}
continue;
}
if (indexType === IndexType.REGULAR) {
const pPaths = pIdx.multikeyPaths[fieldName];
const sPaths = sIdx.multikeyPaths[fieldName];
if (shouldAssert) {
assert.eq(
pPaths,
sPaths,
"Regular multikeyPaths mismatch at ts " +
tojson(ts) +
" for " +
collName +
" field " +
fieldName,
);
} else if (bsonWoCompare(pPaths, sPaths) !== 0) {
jsTestLog(
"NON-TXN inconsistency (expected): regular paths mismatch at " +
tojson(ts) +
" for " +
collName +
" field " +
fieldName,
);
}
} else {
const pPaths = getWildcardMultikeyPaths(primary, dbName, collName, ts, wildcardHint, fieldName);
const sPaths = getWildcardMultikeyPaths(secondary, dbName, collName, ts, wildcardHint, fieldName);
if (pPaths === undefined || sPaths === undefined) continue;
if (shouldAssert) {
assert.sameMembers(
pPaths,
sPaths,
"Wildcard multiKeyPaths mismatch at ts " +
tojson(ts) +
" for " +
collName +
" field " +
fieldName,
);
} else {
try {
assert.sameMembers(pPaths, sPaths);
} catch (e) {
jsTestLog(
"NON-TXN inconsistency (expected): wildcard paths mismatch at " +
tojson(ts) +
" for " +
collName +
" field " +
fieldName,
);
}
}
}
}
}
}
// Returns a shuffled subset of `fields` of size 1..fields.length.
function pickFieldSubset(fields) {
const numOps = 1 + Random.randInt(fields.length);
return Array.shuffle(fields.slice()).slice(0, numOps);
}
// Returns `count` unique ids of the form `${prefix}${tid}_${insertCount}` and bumps
// `state.insertCount` for each.
function nextIds(state, prefix, count) {
const ids = [];
for (let i = 0; i < count; i++) {
ids.push(prefix + state.tid + "_" + state.insertCount);
state.insertCount++;
}
return ids;
}
// After a transaction commits, locates the applyOps oplog entry matching `oplogElemMatch`,
// computes the prior timestamp, and asserts multikey consistency between primary and
// secondary at both timestamps for the given fields.
function verifyTxnMultikey(state, db, myCollName, oplogElemMatch, indexType, wildcardHint, fields) {
const primary = db.getMongo();
const dbName = db.getName();
const txnEntry = findTxnOplogEntry(primary, oplogElemMatch);
if (!txnEntry) return;
const prevTs = findPrevTimestamp(primary, txnEntry.ts);
if (!prevTs || !state.secondaryConn) return;
verifyMultikeyConsistency(
primary,
state.secondaryConn,
dbName,
myCollName,
indexType,
wildcardHint,
fields,
txnEntry.ts,
prevTs,
true /* shouldAssert */,
);
}
// After a sequence of non-transactional ops, locates the first and last oplog entries
// matching `firstMatch` / `lastMatch`, computes the prior timestamp, and logs (without
// asserting) any multikey inconsistency between primary and secondary.
function verifyDirectMultikey(state, db, myCollName, firstMatch, lastMatch, indexType, wildcardHint, fields) {
const primary = db.getMongo();
const dbName = db.getName();
const firstEntry = findOplogEntry(primary, firstMatch);
const lastEntry = findOplogEntry(primary, lastMatch);
if (!firstEntry || !lastEntry) return;
const prevTs = findPrevTimestamp(primary, firstEntry.ts);
if (!prevTs || !state.secondaryConn) return;
verifyMultikeyConsistency(
primary,
state.secondaryConn,
dbName,
myCollName,
indexType,
wildcardHint,
fields,
lastEntry.ts,
prevTs,
false /* shouldAssert */,
);
}
// --- FSM states ---
let states = {
init: function init(db, collName) {
this.session = db.getMongo().startSession({causalConsistency: false});
this.sessionDb = this.session.getDatabase(db.getName());
this.insertCount = 0;
if (this.secondaryHost) {
this.secondaryConn = new Mongo(this.secondaryHost);
this.secondaryConn.setSecondaryOk();
}
},
// --- Transactional states (timestamp-consistent, assert on mismatch) ---
createCollAndInsertWildcardMkInTxn: function createCollAndInsertWildcardMkInTxn(db, collName) {
const myCollName = collPrefix + "wc_txn_" + this.tid + "_" + this.insertCount;
this.insertCount++;
this.session.startTransaction({writeConcern: {w: "majority"}});
try {
assert.commandWorked(this.sessionDb.createCollection(myCollName));
assert.commandWorked(this.sessionDb[myCollName].createIndex({"$**": 1}));
assert.commandWorked(this.sessionDb[myCollName].insert({a: [1, 2]}));
assert.commandWorked(this.session.commitTransaction_forTesting());
} catch (e) {
this.session.abortTransaction_forTesting();
if (e.hasOwnProperty("errorLabels") && e.errorLabels.includes("TransientTransactionError")) return;
throw e;
}
const dbName = db.getName();
verifyTxnMultikey(this, db, myCollName, {ns: dbName + "." + myCollName}, IndexType.WILDCARD, {"$**": 1}, [
"a",
]);
},
createCollAndInsertRegularMkInTxn: function createCollAndInsertRegularMkInTxn(db, collName) {
const myCollName = collPrefix + "reg_txn_" + this.tid + "_" + this.insertCount;
this.insertCount++;
this.session.startTransaction({writeConcern: {w: "majority"}});
try {
assert.commandWorked(this.sessionDb.createCollection(myCollName));
assert.commandWorked(this.sessionDb[myCollName].createIndex({a: 1}));
assert.commandWorked(this.sessionDb[myCollName].insert({a: [1, 2]}));
assert.commandWorked(this.session.commitTransaction_forTesting());
} catch (e) {
this.session.abortTransaction_forTesting();
if (e.hasOwnProperty("errorLabels") && e.errorLabels.includes("TransientTransactionError")) return;
throw e;
}
const dbName = db.getName();
verifyTxnMultikey(this, db, myCollName, {ns: dbName + "." + myCollName}, IndexType.REGULAR, null, ["a"]);
},
createCollAndUpdateWildcardMkInTxn: function createCollAndUpdateWildcardMkInTxn(db, collName) {
const myCollName = collPrefix + "wc_txn_upd_" + this.tid + "_" + this.insertCount;
this.insertCount++;
this.session.startTransaction({writeConcern: {w: "majority"}});
try {
assert.commandWorked(this.sessionDb.createCollection(myCollName));
assert.commandWorked(this.sessionDb[myCollName].createIndex({"$**": 1}));
assert.commandWorked(this.sessionDb[myCollName].insert({_id: 1, a: 1}));
assert.commandWorked(this.sessionDb[myCollName].update({_id: 1}, {$set: {a: [1, 2]}}));
assert.commandWorked(this.session.commitTransaction_forTesting());
} catch (e) {
this.session.abortTransaction_forTesting();
if (e.hasOwnProperty("errorLabels") && e.errorLabels.includes("TransientTransactionError")) return;
throw e;
}
const dbName = db.getName();
verifyTxnMultikey(this, db, myCollName, {ns: dbName + "." + myCollName}, IndexType.WILDCARD, {"$**": 1}, [
"a",
]);
},
createCollAndUpdateRegularMkInTxn: function createCollAndUpdateRegularMkInTxn(db, collName) {
const myCollName = collPrefix + "reg_txn_upd_" + this.tid + "_" + this.insertCount;
this.insertCount++;
this.session.startTransaction({writeConcern: {w: "majority"}});
try {
assert.commandWorked(this.sessionDb.createCollection(myCollName));
assert.commandWorked(this.sessionDb[myCollName].createIndex({a: 1}));
assert.commandWorked(this.sessionDb[myCollName].insert({_id: 1, a: 1}));
assert.commandWorked(this.sessionDb[myCollName].update({_id: 1}, {$set: {a: [1, 2]}}));
assert.commandWorked(this.session.commitTransaction_forTesting());
} catch (e) {
this.session.abortTransaction_forTesting();
if (e.hasOwnProperty("errorLabels") && e.errorLabels.includes("TransientTransactionError")) return;
throw e;
}
const dbName = db.getName();
verifyTxnMultikey(this, db, myCollName, {ns: dbName + "." + myCollName}, IndexType.REGULAR, null, ["a"]);
},
insertWildcardMkInTxn: function insertWildcardMkInTxn(db, collName) {
const myCollName = pickPoolColl();
const fields = pickFieldSubset(WILDCARD_FIELDS);
const markers = nextIds(this, "wc_txn_", fields.length);
try {
withTxnAndAutoRetry(
this.session,
() => {
for (let i = 0; i < fields.length; i++) {
assert.commandWorked(
this.sessionDb[myCollName].insert({[fields[i]]: [1, 2], _marker: markers[i]}),
);
}
},
{
retryOnKilledSession: this.retryOnKilledSession,
prepareProbability: this.prepareProbability,
},
);
} catch (e) {
if (tolerateCollDropError(this, e)) return;
throw e;
}
const fullNs = db.getName() + "." + myCollName;
verifyTxnMultikey(
this,
db,
myCollName,
{"op": "i", "ns": fullNs, "o._marker": markers[0]},
IndexType.WILDCARD,
{"$**": 1},
fields,
);
},
insertRegularMkInTxn: function insertRegularMkInTxn(db, collName) {
const myCollName = pickPoolColl();
const fields = pickFieldSubset(REGULAR_INDEX_FIELDS);
const markers = nextIds(this, "r", fields.length);
try {
withTxnAndAutoRetry(
this.session,
() => {
for (let i = 0; i < fields.length; i++) {
assert.commandWorked(
this.sessionDb[myCollName].insert({[fields[i]]: [1, 2], _marker: markers[i]}),
);
}
},
{
retryOnKilledSession: this.retryOnKilledSession,
prepareProbability: this.prepareProbability,
},
);
} catch (e) {
if (tolerateCollDropError(this, e)) return;
throw e;
}
const fullNs = db.getName() + "." + myCollName;
verifyTxnMultikey(
this,
db,
myCollName,
{"op": "i", "ns": fullNs, "o._marker": markers[0]},
IndexType.REGULAR,
null,
fields,
);
},
updateWildcardMkInTxn: function updateWildcardMkInTxn(db, collName) {
const myCollName = pickPoolColl();
const fields = pickFieldSubset(WILDCARD_FIELDS);
const docIds = nextIds(this, "u_wc_", fields.length);
try {
for (let i = 0; i < fields.length; i++) {
assert.commandWorked(db[myCollName].insert({_id: docIds[i], [fields[i]]: 1}));
}
} catch (e) {
if (tolerateCollDropError(this, e)) return;
throw e;
}
try {
withTxnAndAutoRetry(
this.session,
() => {
for (let i = 0; i < fields.length; i++) {
assert.commandWorked(
this.sessionDb[myCollName].update({_id: docIds[i]}, {$set: {[fields[i]]: [1, 2]}}),
);
}
},
{
retryOnKilledSession: this.retryOnKilledSession,
prepareProbability: this.prepareProbability,
},
);
} catch (e) {
if (tolerateCollDropError(this, e)) return;
throw e;
}
const fullNs = db.getName() + "." + myCollName;
verifyTxnMultikey(
this,
db,
myCollName,
{"op": "u", "ns": fullNs, "o2._id": docIds[0]},
IndexType.WILDCARD,
{"$**": 1},
fields,
);
},
updateRegularMkInTxn: function updateRegularMkInTxn(db, collName) {
const myCollName = pickPoolColl();
const fields = pickFieldSubset(REGULAR_INDEX_FIELDS);
const docIds = nextIds(this, "u_reg_", fields.length);
try {
for (let i = 0; i < fields.length; i++) {
assert.commandWorked(db[myCollName].insert({_id: docIds[i], [fields[i]]: 1}));
}
} catch (e) {
if (tolerateCollDropError(this, e)) return;
throw e;
}
try {
withTxnAndAutoRetry(
this.session,
() => {
for (let i = 0; i < fields.length; i++) {
assert.commandWorked(
this.sessionDb[myCollName].update({_id: docIds[i]}, {$set: {[fields[i]]: [1, 2]}}),
);
}
},
{
retryOnKilledSession: this.retryOnKilledSession,
prepareProbability: this.prepareProbability,
},
);
} catch (e) {
if (tolerateCollDropError(this, e)) return;
throw e;
}
const fullNs = db.getName() + "." + myCollName;
verifyTxnMultikey(
this,
db,
myCollName,
{"op": "u", "ns": fullNs, "o2._id": docIds[0]},
IndexType.REGULAR,
null,
fields,
);
},
// --- Non-transactional states (NOT timestamp-consistent, log only) ---
insertWildcardMkDirect: function insertWildcardMkDirect(db, collName) {
const myCollName = pickPoolColl();
const fields = pickFieldSubset(WILDCARD_FIELDS);
const markers = nextIds(this, "wc_direct_", fields.length);
try {
for (let i = 0; i < fields.length; i++) {
assert.commandWorked(db[myCollName].insert({[fields[i]]: [1, 2], _marker: markers[i]}));
}
} catch (e) {
if (tolerateCollDropError(this, e)) return;
throw e;
}
const fullNs = db.getName() + "." + myCollName;
verifyDirectMultikey(
this,
db,
myCollName,
{"ns": fullNs, "op": "i", "o._marker": markers[0]},
{"ns": fullNs, "op": "i", "o._marker": markers[fields.length - 1]},
IndexType.WILDCARD,
{"$**": 1},
fields,
);
},
insertRegularMkDirect: function insertRegularMkDirect(db, collName) {
const myCollName = pickPoolColl();
const fields = pickFieldSubset(REGULAR_INDEX_FIELDS);
const markers = nextIds(this, "m", fields.length);
try {
for (let i = 0; i < fields.length; i++) {
assert.commandWorked(db[myCollName].insert({[fields[i]]: [1, 2], _marker: markers[i]}));
}
} catch (e) {
if (tolerateCollDropError(this, e)) return;
throw e;
}
const fullNs = db.getName() + "." + myCollName;
verifyDirectMultikey(
this,
db,
myCollName,
{"ns": fullNs, "op": "i", "o._marker": markers[0]},
{"ns": fullNs, "op": "i", "o._marker": markers[fields.length - 1]},
IndexType.REGULAR,
null,
fields,
);
},
updateWildcardMkDirect: function updateWildcardMkDirect(db, collName) {
const myCollName = pickPoolColl();
const fields = pickFieldSubset(WILDCARD_FIELDS);
const docIds = nextIds(this, "u_wc_direct_", fields.length);
try {
for (let i = 0; i < fields.length; i++) {
assert.commandWorked(db[myCollName].insert({_id: docIds[i], [fields[i]]: 1}));
}
for (let i = 0; i < fields.length; i++) {
assert.commandWorked(db[myCollName].update({_id: docIds[i]}, {$set: {[fields[i]]: [1, 2]}}));
}
} catch (e) {
if (tolerateCollDropError(this, e)) return;
throw e;
}
const fullNs = db.getName() + "." + myCollName;
verifyDirectMultikey(
this,
db,
myCollName,
{"ns": fullNs, "op": "u", "o2._id": docIds[0]},
{"ns": fullNs, "op": "u", "o2._id": docIds[fields.length - 1]},
IndexType.WILDCARD,
{"$**": 1},
fields,
);
},
updateRegularMkDirect: function updateRegularMkDirect(db, collName) {
const myCollName = pickPoolColl();
const fields = pickFieldSubset(REGULAR_INDEX_FIELDS);
const docIds = nextIds(this, "u_reg_direct_", fields.length);
try {
for (let i = 0; i < fields.length; i++) {
assert.commandWorked(db[myCollName].insert({_id: docIds[i], [fields[i]]: 1}));
}
for (let i = 0; i < fields.length; i++) {
assert.commandWorked(db[myCollName].update({_id: docIds[i]}, {$set: {[fields[i]]: [1, 2]}}));
}
} catch (e) {
if (tolerateCollDropError(this, e)) return;
throw e;
}
const fullNs = db.getName() + "." + myCollName;
verifyDirectMultikey(
this,
db,
myCollName,
{"ns": fullNs, "op": "u", "o2._id": docIds[0]},
{"ns": fullNs, "op": "u", "o2._id": docIds[fields.length - 1]},
IndexType.REGULAR,
null,
fields,
);
},
dropAndRecreateColl: function dropAndRecreateColl(db, collName) {
// SERVER-126285 only affects fixtures without primary-driven index builds.
if (!this.usesPrimaryDrivenIndexBuilds) return;
const myCollName = pickPoolColl();
db[myCollName].drop();
// Tolerate IndexBuildAborted: another thread may drop the coll again while these
// index builds are running.
assert.commandWorkedOrFailedWithCode(db[myCollName].createIndex({"$**": 1}), ErrorCodes.IndexBuildAborted);
for (const f of REGULAR_INDEX_FIELDS) {
assert.commandWorkedOrFailedWithCode(
db[myCollName].createIndex({[f]: 1}),
ErrorCodes.IndexBuildAborted,
);
}
},
dropAndRecreateIndex: function dropAndRecreateIndex(db, collName) {
// SERVER-126285 only affects fixtures without primary-driven index builds.
if (!this.usesPrimaryDrivenIndexBuilds) return;
const myCollName = pickPoolColl();
const targets = [{"$**": 1}].concat(REGULAR_INDEX_FIELDS.map((f) => ({[f]: 1})));
const target = targets[Random.randInt(targets.length)];
// Tolerate races with other threads dropping or recreating the same index, and
// with dropAndRecreateColl removing the entire collection:
// IndexNotFound: another thread already dropped this index.
// NamespaceNotFound: dropAndRecreateColl dropped the collection.
// IndexBuildAborted: a concurrent dropIndexes/dropCollection aborted our build.
assert.commandWorkedOrFailedWithCode(db[myCollName].dropIndex(target), [
ErrorCodes.IndexNotFound,
ErrorCodes.NamespaceNotFound,
]);
assert.commandWorkedOrFailedWithCode(db[myCollName].createIndex(target), ErrorCodes.IndexBuildAborted);
},
// Batch pressure.
insertNoise: function insertNoise(db, collName) {
assert.commandWorked(db["padding_noise"].insert({noise: Random.randInt(100000), tid: this.tid}));
},
};
function setup(db, collName, cluster) {
if (!cluster.isReplication()) return;
for (let i = 0; i < POOL_SIZE; i++) {
const coll = poolCollName(i);
assert.commandWorked(db[coll].createIndex({"$**": 1}));
for (const f of REGULAR_INDEX_FIELDS) {
assert.commandWorked(db[coll].createIndex({[f]: 1}));
}
}
this.secondaryHost = cluster.getSecondaryHost(db.getName());
// TODO (SERVER-126285): alwasy allow the dropAndRecreate* states. Currently only enabled
// for primary-driven index builds, which avoid the IndexBuildInterceptor multikey-path leak
// (SERVER-126285) that makes multikeyness inconsistent between primary and secondaries.
this.usesPrimaryDrivenIndexBuilds = PersistenceProviderUtil.allNodesHavePropertyWithValue(
db,
"mustUsePrimaryDrivenIndexBuilds",
true,
);
cluster.awaitReplication();
}
function teardown(db, collName, cluster) {
if (cluster.isReplication()) {
cluster.awaitReplication();
}
}
const allTransitions = {
insertNoise: 0.3,
createCollAndInsertWildcardMkInTxn: 0.05,
createCollAndInsertRegularMkInTxn: 0.05,
createCollAndUpdateWildcardMkInTxn: 0.05,
createCollAndUpdateRegularMkInTxn: 0.05,
insertWildcardMkInTxn: 0.05,
insertRegularMkInTxn: 0.05,
updateWildcardMkInTxn: 0.05,
updateRegularMkInTxn: 0.05,
insertWildcardMkDirect: 0.05,
insertRegularMkDirect: 0.05,
updateWildcardMkDirect: 0.05,
updateRegularMkDirect: 0.05,
// Gated at runtime in the state bodies on `mustUsePrimaryDrivenIndexBuilds`: no-op on
// fixtures that don't use primary-driven index builds, see SERVER-126285.
dropAndRecreateIndex: 0.05,
dropAndRecreateColl: 0.05,
};
let transitions = {init: allTransitions};
for (const state of Object.keys(allTransitions)) {
transitions[state] = allTransitions;
}
return {
threadCount: 10,
iterations: 100,
startState: "init",
states: states,
transitions: transitions,
data: {retryOnKilledSession: false, prepareProbability: 0.5},
setup: setup,
teardown: teardown,
};
})();

View File

@ -1,31 +1,205 @@
// Tests that multikey updates made inside a transaction are visible to that transaction's reads.
// @tags: [assumes_unsharded_collection, uses_transactions]
// @tags: [
// assumes_unsharded_collection,
// uses_transactions,
// ]
const dbName = "test";
const collName = "testReadOwnMultikeyWrites";
// Use majority write concern to clear the drop-pending that can cause lock conflicts with
// transactions.
db.getSiblingDB(dbName)
.getCollection(collName)
.drop({writeConcern: {w: "majority"}});
const session = db.getMongo().startSession({causalConsistency: false});
const sessionDb = session.getDatabase(dbName);
const sessionColl = sessionDb.getCollection(collName);
assert.commandWorked(sessionDb.runCommand({create: collName}));
(function testSimpleCase() {
// Use majority write concern to clear the drop-pending that can cause lock conflicts with
// transactions.
db.getSiblingDB(dbName)
.getCollection(collName)
.drop({writeConcern: {w: "majority"}});
assert.commandWorked(sessionColl.insert({a: 1}));
assert.commandWorked(
sessionDb.runCommand({
createIndexes: collName,
indexes: [{name: "a_1", key: {a: 1}}],
writeConcern: {w: "majority"},
}),
const sessionColl = sessionDb.getCollection(collName);
assert.commandWorked(sessionDb.runCommand({create: collName}));
assert.commandWorked(sessionColl.insert({a: 1}));
assert.commandWorked(
sessionDb.runCommand({
createIndexes: collName,
indexes: [{name: "a_1", key: {a: 1}}],
writeConcern: {w: "majority"},
}),
);
session.startTransaction();
assert.commandWorked(sessionColl.update({}, {$set: {a: [1, 2, 3]}}));
assert.eq(1, sessionColl.find({}, {_id: 0, a: 1}).sort({a: 1}).itcount());
assert.commandWorked(session.commitTransaction_forTesting());
assert.eq(1, db.getSiblingDB(dbName).getCollection(collName).find({}, {_id: 0, a: 1}).sort({a: 1}).itcount());
})();
// === Wildcard index multikey RYOW cases ===
//
// These cases verify that wildcard index multikey state written inside a multi-document
// transaction is visible to subsequent reads in the same transaction.
//
// The query pattern uses contradictory range bounds: {field: {$gt: high, $lt: low}}.
// - Multikey-aware planner: generates a union of ($gt: high) and ($lt: low) bounds and
// post-filters; matches a document whose array contains elements satisfying each
// predicate independently.
// - Non-multikey planner: intersects the bounds into an empty range; returns nothing.
function runWildcardCase(collName, setupFn, queryHint, query, expectedCount, message) {
db.getSiblingDB(dbName)
.getCollection(collName)
.drop({writeConcern: {w: "majority"}});
assert.commandWorked(sessionDb.runCommand({create: collName}));
assert.commandWorked(
sessionDb.runCommand({
createIndexes: collName,
indexes: [{name: "wildcard", key: {"$**": 1}}],
writeConcern: {w: "majority"},
}),
);
const wcColl = sessionDb.getCollection(collName);
session.startTransaction();
setupFn(wcColl);
const results = wcColl.find(query).hint(queryHint).toArray();
assert.eq(expectedCount, results.length, message + " got: " + tojson(results));
assert.commandWorked(session.commitTransaction_forTesting());
}
// Sanity: a non-multikey scalar field must NOT match contradictory bounds.
runWildcardCase(
"testReadOwnWildcardMultikeyWrites_sanity",
(coll) => assert.commandWorked(coll.insert({a: 1})),
{"$**": 1},
{a: {$gt: 8, $lt: 3}},
0,
"wildcard sanity (non-multikey scalar):",
);
session.startTransaction();
assert.commandWorked(sessionColl.update({}, {$set: {a: [1, 2, 3]}}));
assert.eq(1, sessionColl.find({}, {_id: 0, a: 1}).sort({a: 1}).itcount());
assert.commandWorked(session.commitTransaction_forTesting());
// Single multikey field.
runWildcardCase(
"testReadOwnWildcardMultikeyWrites_single",
(coll) => assert.commandWorked(coll.insert({a: [1, 10]})),
{"$**": 1},
{a: {$gt: 8, $lt: 3}},
1,
"wildcard RYOW single field:",
);
assert.eq(1, db.getSiblingDB(dbName).getCollection(collName).find({}, {_id: 0, a: 1}).sort({a: 1}).itcount());
// Multiple multikey fields accumulate within a transaction.
(function testWildcardMultipleFields() {
const collName = "testReadOwnWildcardMultikeyWrites_multi";
db.getSiblingDB(dbName)
.getCollection(collName)
.drop({writeConcern: {w: "majority"}});
assert.commandWorked(sessionDb.runCommand({create: collName}));
assert.commandWorked(
sessionDb.runCommand({
createIndexes: collName,
indexes: [{name: "wildcard", key: {"$**": 1}}],
writeConcern: {w: "majority"},
}),
);
const wcColl = sessionDb.getCollection(collName);
session.startTransaction();
assert.commandWorked(wcColl.insert({a: [1, 10]}));
assert.commandWorked(wcColl.insert({b: [2, 20]}));
const resultsA = wcColl
.find({a: {$gt: 8, $lt: 3}})
.hint({"$**": 1})
.toArray();
assert.eq(1, resultsA.length, "wildcard RYOW field 'a': " + tojson(resultsA));
const resultsB = wcColl
.find({b: {$gt: 15, $lt: 5}})
.hint({"$**": 1})
.toArray();
assert.eq(1, resultsB.length, "wildcard RYOW field 'b': " + tojson(resultsB));
assert.commandWorked(session.commitTransaction_forTesting());
})();
// Write, read, write, read — multikey state from later writes must be visible to subsequent
// reads in the same transaction.
(function testWildcardWriteReadWriteRead() {
const collName = "testReadOwnWildcardMultikeyWrites_wrwr";
db.getSiblingDB(dbName)
.getCollection(collName)
.drop({writeConcern: {w: "majority"}});
assert.commandWorked(sessionDb.runCommand({create: collName}));
assert.commandWorked(
sessionDb.runCommand({
createIndexes: collName,
indexes: [{name: "wildcard", key: {"$**": 1}}],
writeConcern: {w: "majority"},
}),
);
const wcColl = sessionDb.getCollection(collName);
session.startTransaction();
assert.commandWorked(wcColl.insert({_id: 1, a: [1, 10]}));
const r1 = wcColl
.find({a: {$gt: 8, $lt: 3}})
.hint({"$**": 1})
.toArray();
assert.eq(1, r1.length, "wildcard RYOW W/R/W/R read 1: " + tojson(r1));
assert.commandWorked(wcColl.insert({_id: 2, b: [2, 20]}));
const r2 = wcColl
.find({b: {$gt: 15, $lt: 5}})
.hint({"$**": 1})
.toArray();
assert.eq(1, r2.length, "wildcard RYOW W/R/W/R read 2: " + tojson(r2));
assert.commandWorked(session.commitTransaction_forTesting());
})();
// $or across two distinct wildcard indexes within one parent transaction.
(function testWildcardOrAcrossTwoIndexes() {
const collName = "testReadOwnWildcardMultikeyWrites_or";
db.getSiblingDB(dbName)
.getCollection(collName)
.drop({writeConcern: {w: "majority"}});
assert.commandWorked(sessionDb.runCommand({create: collName}));
// Two wildcard indexes with disjoint projections so each branch of the $or has exactly
// one usable wildcard index.
assert.commandWorked(
sessionDb.runCommand({
createIndexes: collName,
indexes: [
{name: "wc_a", key: {"$**": 1}, wildcardProjection: {a: 1}},
{name: "wc_c", key: {"$**": 1}, wildcardProjection: {c: 1}},
],
writeConcern: {w: "majority"},
}),
);
const wcColl = sessionDb.getCollection(collName);
session.startTransaction();
assert.commandWorked(wcColl.insert({_id: 1, a: [1, 10]}));
assert.commandWorked(wcColl.insert({_id: 2, c: [2, 20]}));
// Validate each wildcard index individually with hint by name to force IXSCAN.
const resultsA = wcColl
.find({a: {$gt: 8, $lt: 3}})
.hint("wc_a")
.toArray();
assert.eq(1, resultsA.length, "wildcard $or per-index wc_a: " + tojson(resultsA));
const resultsC = wcColl
.find({c: {$gt: 15, $lt: 5}})
.hint("wc_c")
.toArray();
assert.eq(1, resultsC.length, "wildcard $or per-index wc_c: " + tojson(resultsC));
// $or query touching both wildcard indexes simultaneously.
const results = wcColl.find({$or: [{a: {$gt: 8, $lt: 3}}, {c: {$gt: 15, $lt: 5}}]}).toArray();
assert.eq(2, results.length, "wildcard $or across 2 wildcard indexes: " + tojson(results));
assert.commandWorked(session.commitTransaction_forTesting());
})();

View File

@ -2,6 +2,9 @@
* Tests that the multikey metadata set by a CRUD operation in a multi-document transaction is
* timestamp-consistent among replicas, for both a committed transaction and an aborted transaction.
*
* Covers both regular indexes (multikey paths tracked in the catalog) and wildcard indexes
* (multikey paths tracked as metadata key entries in the index itself).
*
* Note that the timestamp consistency cannot be guaranteed after a logical initial sync.
*
* TODO (WT-15476): consider removing this test once timestamp consistency is turned on.
@ -16,27 +19,109 @@
import {ReplSetTest} from "jstests/libs/replsettest.js";
function getMultikeyMetadata(node, dbName, collName, indexId, multiPathsKey, ts) {
return node
.getDB("admin")
.aggregate([{$listCatalog: {}}, {$match: {db: dbName, name: collName}}], {
const IndexType = Object.freeze({
REGULAR: "regular",
WILDCARD: "wildcard",
});
/**
* Returns multikey metadata for the given index at the given timestamp, or null if the
* collection/index does not yet exist at that snapshot.
*
* For regular indexes, reads the multikeyPaths bitset from the catalog ($listCatalog).
* For wildcard indexes, reads the multiKeyPaths from the query planner (explain with atClusterTime),
* which reflects the actual metadata key entries in the index.
*/
function getMultikeyMetadata(node, indexType, dbName, collName, indexId, multiPathsKey, wildcardHint, ts) {
if (indexType == IndexType.REGULAR) {
const catalog = node
.getDB("admin")
.aggregate([{$listCatalog: {}}, {$match: {db: dbName, name: collName}}], {
readConcern: {level: "snapshot", atClusterTime: ts},
})
.toArray();
// Collection does not exist at this snapshot (same-txn create-then-insert case walks
// timestamps earlier than the collection's creation).
if (catalog.length === 0) {
return null;
}
const idx = catalog[0]["md"]["indexes"][indexId];
if (!idx) {
return null;
}
return idx["multikeyPaths"][multiPathsKey];
} else {
const explain = node.getDB(dbName).runCommand({
explain: {find: collName, filter: {[multiPathsKey]: 1}, hint: wildcardHint},
readConcern: {level: "snapshot", atClusterTime: ts},
})
.toArray()[0]["md"]["indexes"][indexId]["multikeyPaths"][multiPathsKey];
verbosity: "queryPlanner",
});
// If the collection does not yet exist at this snapshot, explain returns an error.
if (!explain.ok) {
return null;
}
// Walks the explain tree to find the wildcard IXSCAN. Handles both engines:
// - Classic: winningPlan.{stage:..., inputStage:{...}}
// - SBE: winningPlan.queryPlan.{stage:..., inputStage:{...}}
// SBE wraps the QuerySolutionNode tree under a 'queryPlan' field; recurse into it.
function findWildcardIxscan(plan) {
if (!plan) {
return null;
}
if (plan.stage === "IXSCAN" && plan.keyPattern && plan.keyPattern["$_path"]) {
return plan;
}
if (plan.queryPlan) {
const r = findWildcardIxscan(plan.queryPlan);
if (r) {
return r;
}
}
if (plan.inputStage) {
return findWildcardIxscan(plan.inputStage);
}
if (plan.inputStages) {
for (const s of plan.inputStages) {
const result = findWildcardIxscan(s);
if (result) {
return result;
}
}
}
return null;
}
const ixscan = findWildcardIxscan(explain.queryPlanner.winningPlan);
return ixscan?.multiKeyPaths?.[multiPathsKey] ?? [];
}
}
// Returns the highest timestamp present in the oplog that is less than the given timestamp.
/**
* Returns true if the multikey metadata indicates "not multikey" for the given index type, or the
* collection/index does not exist yet at this snapshot.
*/
function absentMultikeyMetadata(multikeyMetadata, indexType) {
if (multikeyMetadata === null) {
// Collection/index not yet created at this snapshot.
return true;
}
if (indexType == IndexType.REGULAR) {
return bsonBinaryEqual(multikeyMetadata, BinData(0, "AA==")); // byte [0]
} else {
return multikeyMetadata.length === 0;
}
}
/**
* Returns the highest timestamp present in the oplog that is less than the given timestamp.
*/
function decrementTimestamp(node, ts) {
// Query the oplog to find the highest timestamp entry that is lower than the given timestamp.
const oplogEntry = node
.getDB("local")
.oplog.rs.find({ts: {$lt: ts}})
.sort({ts: -1})
.limit(1)
.toArray()[0];
assert(oplogEntry, "No earlier oplog entry found for timestamp: " + tojson(ts));
return oplogEntry.ts;
}
@ -54,30 +139,114 @@ const TxnCommitOrAbort = Object.freeze({
ABORT: "abort",
});
function verifyTimestampConsistency(commitOrAbort, collName) {
jsTestLog(
"Verifying multikey timestamp consistency with transaction that will " +
commitOrAbort +
" against collection: " +
/**
* Walks the oplog backwards from the latest timestamp. At each timestamp, fetches multikey
* metadata on both primary and secondary and asserts they are identical. Stops when it finds a
* timestamp at which multikey metadata is absent (i.e. before it was first recorded). Asserts that
* at least `expectedMinTimestamps` timestamps had multikey metadata.
*/
function walkAndAssertReplicaMultikeyConsistency({
indexType,
collName,
indexId,
multiPathsKey,
wildcardHint,
expectedMinTimestamps,
context,
}) {
// Ensure primaries have caught up before walking.
rst.awaitReplication();
const latestTs = primary.getDB("local").oplog.rs.find().sort({$natural: -1}).limit(1).toArray()[0].ts;
jsTestLog("Latest oplog timestamp for " + context + ": " + tojson(latestTs));
let currentTs = latestTs;
let timestampsWithMultikeyness = 0;
while (true) {
jsTestLog(
"Checking replica set multikey consistency at timestamp: " + tojson(currentTs) + " (" + context + ")",
);
const primaryMultikeyMetadata = getMultikeyMetadata(
primary,
indexType,
dbName,
collName,
indexId,
multiPathsKey,
wildcardHint,
currentTs,
);
const secondaryMultikeyMetadata = getMultikeyMetadata(
secondary,
indexType,
dbName,
collName,
indexId,
multiPathsKey,
wildcardHint,
currentTs,
);
assert.eq(
primaryMultikeyMetadata,
secondaryMultikeyMetadata,
"Primary and secondary multikey metadata differ at timestamp " +
tojson(currentTs) +
" (" +
context +
"), iteration: " +
timestampsWithMultikeyness,
);
if (absentMultikeyMetadata(primaryMultikeyMetadata, indexType)) {
assert.gte(
timestampsWithMultikeyness,
expectedMinTimestamps,
"Expected at least " + expectedMinTimestamps + " timestamp(s) with multikey metadata for " + context,
);
break;
}
timestampsWithMultikeyness++;
currentTs = decrementTimestamp(primary, currentTs);
}
}
/**
* Verifies timestamp-consistency of multikey metadata between primary and secondary after a
* multi-document transaction that inserts into a pre-existing indexed collection.
*
* Expected minimum timestamps:
* - Txn commit: 2 (setMultikeyMetadata entry + commit entry)
* - Txn abort: 1 (setMultikeyMetadata entry committed by the side txn survives abort)
*/
function verifyTimestampConsistency(commitOrAbort, indexType, collName) {
jsTestLog(
"Verifying multikey timestamp consistency for " +
indexType +
" index against collection '" +
collName +
"' with transaction that will " +
commitOrAbort,
);
const testColl = testDB[collName];
assert.commandWorked(testDB.runCommand({drop: collName}));
// Create collection and index.
assert.commandWorked(testColl.createIndex({a: 1}));
if (indexType == IndexType.REGULAR) {
assert.commandWorked(testColl.createIndex({a: 1}));
} else {
assert.commandWorked(testColl.createIndex({"$**": 1}));
}
// Start a multi-document transaction.
const session = testDB.getMongo().startSession();
const sessionDB = session.getDatabase(dbName);
const sessionColl = sessionDB[collName];
session.startTransaction();
// Insert a document with an array value for field 'a', making the index multikey.
assert.commandWorked(sessionColl.insert({a: [1, 2]}));
// Commit the transaction.
if (commitOrAbort == TxnCommitOrAbort.COMMIT) {
assert.commandWorked(session.commitTransaction_forTesting());
jsTestLog("Transaction committed as expected");
@ -87,44 +256,79 @@ function verifyTimestampConsistency(commitOrAbort, collName) {
}
session.endSession();
const latestTs = primary.getDB("local").oplog.rs.find().sort({$natural: -1}).limit(1).toArray()[0].ts;
jsTestLog("Latest oplog timestamp after transaction " + commitOrAbort + ": " + tojson(latestTs));
// Commit: both the setMultikeyMetadata side-txn oplog entry and the parent commit's insert
// emit multikey state updates — 2 distinct timestamps show multikey metadata.
// Abort: only the setMultikeyMetadata side-txn oplog entry survives the aborted parent —
// 1 timestamp shows multikey metadata.
const expectedMinTimestamps = commitOrAbort == TxnCommitOrAbort.COMMIT ? 2 : 1;
let currentTs = latestTs;
let timestampsWithMultikeyness = 0;
const absentMultikeyMetadata = BinData(0, "AA=="); // Base64 encoding of byte [0]
while (true) {
jsTestLog("Checking replica set consistency of multikeyness at timestamp: " + tojson(currentTs));
const primaryMultikeyMetadata = getMultikeyMetadata(primary, dbName, collName, 1, "a", currentTs);
const secondaryMultikeyMetadata = getMultikeyMetadata(secondary, dbName, collName, 1, "a", currentTs);
// Verify primary and secondary have identical multikeyness.
assert.eq(
primaryMultikeyMetadata,
secondaryMultikeyMetadata,
"Primary and secondary multikey paths differ at timestamp " +
tojson(currentTs) +
", iteration: " +
timestampsWithMultikeyness,
);
if (bsonBinaryEqual(primaryMultikeyMetadata, absentMultikeyMetadata)) {
// If the transaction committed, we expect at least two timestamps with multikeyness:
// the setMultikeyMetadata timestamp, and the transaction's commit timestamp.
// If the transaction aborted, we expect only one timestamp with multikeyness:
// the setMultikeyMetadata timestamp.
const expectedMinimumMultikeyTimestamps = commitOrAbort == TxnCommitOrAbort.COMMIT ? 2 : 1;
assert.gte(timestampsWithMultikeyness, expectedMinimumMultikeyTimestamps);
break;
}
timestampsWithMultikeyness++;
currentTs = decrementTimestamp(primary, currentTs);
}
walkAndAssertReplicaMultikeyConsistency({
indexType,
collName,
indexId: 1, // index 0 is _id, index 1 is the user index.
multiPathsKey: "a",
wildcardHint: {"$**": 1},
expectedMinTimestamps,
context: indexType + "/" + commitOrAbort + "/" + collName,
});
}
verifyTimestampConsistency(TxnCommitOrAbort.COMMIT, "coll_commit");
verifyTimestampConsistency(TxnCommitOrAbort.ABORT, "coll_abort");
verifyTimestampConsistency(TxnCommitOrAbort.COMMIT, IndexType.REGULAR, "coll_regular_commit");
verifyTimestampConsistency(TxnCommitOrAbort.ABORT, IndexType.REGULAR, "coll_regular_abort");
verifyTimestampConsistency(TxnCommitOrAbort.COMMIT, IndexType.WILDCARD, "coll_wildcard_commit");
verifyTimestampConsistency(TxnCommitOrAbort.ABORT, IndexType.WILDCARD, "coll_wildcard_abort");
/**
* Same as `verifyTimestampConsistency`, but creates the collection + index inside the same
* transaction as the multikey-triggering insert. Exercises the path where the side transaction
* cannot see the index (it was created in the parent transaction), so metadata keys are written in
* the parent transaction instead.
*
* Reuses the shared walker to assert primary/secondary multikey metadata consistency at every
* oplog timestamp. For this path, multikey metadata becomes visible starting at the commit
* timestamp, so the minimum expected count is 1.
*/
function verifyIndexCreatedInSameTransaction(indexType, collName) {
jsTestLog(
"Verifying multikey timestamp consistency for " +
indexType +
" index created in same transaction against collection '" +
collName +
"'",
);
assert.commandWorked(testDB.runCommand({drop: collName}));
const session = testDB.getMongo().startSession();
const sessionDB = session.getDatabase(dbName);
session.startTransaction();
// Create collection and index inside the transaction.
assert.commandWorked(sessionDB.createCollection(collName));
if (indexType == IndexType.REGULAR) {
assert.commandWorked(sessionDB[collName].createIndex({a: 1}));
} else {
assert.commandWorked(sessionDB[collName].createIndex({"$**": 1}));
}
// Insert a document that makes the index multikey.
assert.commandWorked(sessionDB[collName].insert({a: [1, 2]}));
assert.commandWorked(session.commitTransaction_forTesting());
session.endSession();
walkAndAssertReplicaMultikeyConsistency({
indexType,
collName,
indexId: 1, // index 0 is _id, index 1 is the user index.
multiPathsKey: "a",
wildcardHint: {"$**": 1},
expectedMinTimestamps: 1,
context: indexType + "/same-txn-create/" + collName,
});
}
verifyIndexCreatedInSameTransaction(IndexType.REGULAR, "coll_regular_same_txn");
verifyIndexCreatedInSameTransaction(IndexType.WILDCARD, "coll_wildcard_same_txn");
rst.stopSet();

View File

@ -96,14 +96,6 @@ void appendToKeyString(const std::vector<BSONElement>& elems,
}
}
// Append 'MinKey' to 'keyString'. Multikey path keys use 'MinKey' for non-wildcard fields.
void appendToMultiKeyString(const std::vector<BSONElement>& elems,
key_string::PooledBuilder* keyString) {
for (size_t i = 0; i < elems.size(); i++) {
keyString->appendBSONElement(kMinBSONKey.firstElement());
}
}
/**
* A helper class for generating all the various types of keys for a wildcard index.
*
@ -232,22 +224,14 @@ void SingleDocumentKeyEncoder::_addMultiKey(const FieldRef& fullPath) {
// 'multikeyPaths' may be nullptr if the access method is being used in an operation which does
// not require multikey path generation.
if (_multikeyPaths) {
key_string::PooledBuilder keyString(_pooledBufferBuilder, _keyStringVersion, _ordering);
if (!_preElems.empty()) {
appendToMultiKeyString(_preElems, &keyString);
}
for (auto elem : BSON("" << 1 << "" << fullPath.dottedField())) {
keyString.appendBSONElement(elem);
}
if (!_postElems.empty()) {
appendToMultiKeyString(_postElems, &keyString);
}
keyString.appendRecordId(record_id_helpers::reservedIdFor(
record_id_helpers::ReservationId::kWildcardMultikeyMetadataId, *_rsKeyFormat));
_multikeyPaths->push_back(keyString.release());
_multikeyPaths->push_back(
WildcardKeyGenerator::makeMultikeyMetadataKey(fullPath.dottedField(),
_preElems.size(),
_postElems.size(),
_keyStringVersion,
_ordering,
*_rsKeyFormat,
_pooledBufferBuilder));
}
}
@ -433,4 +417,29 @@ void WildcardKeyGenerator::generateKeys(SharedBufferFragmentBuilder& pooledBuffe
multikeyPaths->adopt_sequence(std::move(multikeyPathsSequence));
keys->adopt_sequence(std::move(keysSequence));
}
key_string::Value WildcardKeyGenerator::makeMultikeyMetadataKey(
StringData fieldPath,
size_t prefixFieldCount,
size_t suffixFieldCount,
key_string::Version version,
Ordering ordering,
KeyFormat rsKeyFormat,
SharedBufferFragmentBuilder& pooledBuilder) {
key_string::PooledBuilder keyString(pooledBuilder, version, ordering);
for (size_t i = 0; i < prefixFieldCount; ++i) {
keyString.appendBSONElement(kMinBSONKey.firstElement());
}
for (auto elem : BSON("" << 1 << "" << fieldPath)) {
keyString.appendBSONElement(elem);
}
for (size_t i = 0; i < suffixFieldCount; ++i) {
keyString.appendBSONElement(kMinBSONKey.firstElement());
}
keyString.appendRecordId(record_id_helpers::reservedIdFor(
record_id_helpers::ReservationId::kWildcardMultikeyMetadataId, rsKeyFormat));
return keyString.release();
}
} // namespace mongo

View File

@ -99,6 +99,19 @@ public:
KeyStringSet* multikeyPaths,
const boost::optional<RecordId>& id = boost::none) const;
/**
* Builds a single wildcard multikey metadata KeyString for 'fieldPath'. Used both during
* document-driven key generation on the primary and when regenerating keys from a
* `setMultikeyMetadata` oplog entry on the secondary.
*/
static key_string::Value makeMultikeyMetadataKey(StringData fieldPath,
size_t prefixFieldCount,
size_t suffixFieldCount,
key_string::Version version,
Ordering ordering,
KeyFormat rsKeyFormat,
SharedBufferFragmentBuilder& pooledBuilder);
private:
WildcardProjection _proj;
const CollatorInterface* _collator;

View File

@ -105,7 +105,7 @@ public:
/**
* Contain utilities to work with wildcard fields used for Wildcard indexes.
*/
struct WildcardNames {
struct MONGO_MOD_PUBLIC WildcardNames {
static constexpr StringData WILDCARD_FIELD_NAME = "$**"_sd;
static constexpr StringData WILDCARD_FIELD_NAME_SUFFIX = ".$**"_sd;

View File

@ -92,6 +92,13 @@ OperationContext::~OperationContext() {
releaseOperationKey();
}
RecoveryUnit& OperationContext::getOrCreateWildcardMultikeyReadRecoveryUnit() {
if (!_wildcardMultikeyReadRu) {
_wildcardMultikeyReadRu = getServiceContext()->getStorageEngine()->newRecoveryUnit();
}
return *_wildcardMultikeyReadRu;
}
void OperationContext::setDeadlineAndMaxTime(Date_t when,
Microseconds maxTime,
ErrorCodes::Error timeoutError) {

View File

@ -356,6 +356,33 @@ public:
}
}
/**
* Returns true if a side transaction has committed wildcard multikey metadata keys
* during this transaction. Set by the side transaction commit path and propagated
* to each new opCtx via unstashTransactionResources().
*/
bool hasSideCommittedWildcardKeys() const {
return _hasSideCommittedWildcardKeys;
}
void setHasSideCommittedWildcardKeys() {
_hasSideCommittedWildcardKeys = true;
// Defensive invalidation. As each statement in a multi-document transaction runs in its
// own operation context, but we don't want to rely on that assumption holding forever.
_wildcardMultikeyReadRu.reset();
}
/**
* Returns the side recovery unit used to read wildcard multikey metadata keys committed
* by side transactions during this multi-doc transaction. Lazily creates a fresh RU on
* first call so the snapshot reflects state at "now" (after side commits). Subsequent
* calls within the same planning operation reuse the same RU, providing a consistent
* snapshot across all wildcard indexes referenced by the query (e.g. $or over multiple
* wildcard indexes). Invalidated by setHasSideCommittedWildcardKeys() so newly side-
* committed keys are visible on the next read (RYOW).
*/
RecoveryUnit& getOrCreateWildcardMultikeyReadRecoveryUnit();
bool isRetryableWrite() const {
return _txnNumber &&
(!_inMultiDocumentTransaction ||
@ -645,6 +672,8 @@ public:
_txnNumber = boost::none;
_txnRetryCounter = boost::none;
_killOpsExempt = false;
_hasSideCommittedWildcardKeys = false;
_wildcardMultikeyReadRu.reset();
}
/**
@ -1140,6 +1169,14 @@ private:
bool _inMultiDocumentTransaction = false;
bool _isStartingMultiDocumentTransaction = false;
bool _isActiveTransactionParticipant = false;
// TODO (SERVER-77213): Move multi-statement transaction state out of operation context.
bool _hasSideCommittedWildcardKeys = false;
// TODO (SERVER-77213): Move multi-statement transaction state out of operation context.
// Cached side recovery unit used by getOrCreateWildcardMultikeyReadRecoveryUnit() to
// read side-committed wildcard multikey metadata keys during query planning. Lazily
// created and invalidated when a new side commit occurs, so reads always see the
// latest side-committed state across all wildcard indexes referenced by a query.
std::unique_ptr<RecoveryUnit> _wildcardMultikeyReadRu;
bool _isCommandForwardedFromRouter = false;
// Commands from user applications must run validations and enforce constraints. Operations from
// a trusted source, such as initial sync or consuming an oplog entry generated by a primary

View File

@ -34,6 +34,7 @@
#include <boost/container/small_vector.hpp>
// IWYU pragma: no_include "boost/intrusive/detail/iterator.hpp"
#include "mongo/base/checked_cast.h"
#include "mongo/base/string_data.h"
#include "mongo/bson/bsonelement.h"
#include "mongo/bson/bsonmisc.h"
@ -53,12 +54,15 @@
#include "mongo/db/query/wildcard_multikey_paths.h"
#include "mongo/db/record_id.h"
#include "mongo/db/record_id_helpers.h"
#include "mongo/db/service_context.h"
#include "mongo/db/shard_role/lock_manager/exception_util.h"
#include "mongo/db/shard_role/shard_catalog/index_catalog_entry.h"
#include "mongo/db/shard_role/shard_catalog/index_descriptor.h"
#include "mongo/db/storage/index_entry_comparison.h"
#include "mongo/db/storage/key_format.h"
#include "mongo/db/storage/recovery_unit.h"
#include "mongo/db/storage/sorted_data_interface.h"
#include "mongo/db/storage/storage_engine.h"
#include "mongo/util/assert_util.h"
#include "mongo/util/str.h"
@ -161,30 +165,25 @@ static BSONObj buildIndexBoundsKeyPattern(const BSONObj& wiKeyPattern) {
}
/**
* Retrieves from the wildcard index the set of multikey path metadata keys bounded by
* 'indexBounds'. Returns the set of multikey paths represented by the keys.
* Scans wildcard multikey metadata keys from the index using the given recovery unit and
* index bounds. Adds discovered multikey paths to 'multikeyPaths' and updates 'stats'.
*/
static std::set<FieldRef> getWildcardMultikeyPathSetHelper(OperationContext* opCtx,
const IndexCatalogEntry* index,
const IndexBounds& indexBounds,
MultikeyMetadataAccessStats* stats) {
const WildcardAccessMethod* wam =
static_cast<const WildcardAccessMethod*>(index->accessMethod());
stats->numSeeks = 0;
stats->keysExamined = 0;
auto& ru = *shard_role_details::getRecoveryUnit(opCtx);
static void scanWildcardMetadataKeys(OperationContext* opCtx,
const WildcardAccessMethod* wam,
RecoveryUnit& ru,
const IndexBounds& indexBounds,
const BSONObj& keyPattern,
std::set<FieldRef>* multikeyPaths,
MultikeyMetadataAccessStats* stats) {
auto cursor = wam->newCursor(opCtx, ru);
constexpr int kForward = 1;
const auto keyPattern = buildIndexBoundsKeyPattern(index->descriptor()->keyPattern());
IndexBoundsChecker checker(&indexBounds, keyPattern, kForward);
IndexSeekPoint seekPoint;
if (!checker.getStartSeekPoint(&seekPoint)) {
return {};
return;
}
std::set<FieldRef> multikeyPaths{};
key_string::Builder builder(wam->getSortedDataInterface()->getKeyStringVersion(),
wam->getSortedDataInterface()->getOrdering());
@ -197,7 +196,7 @@ static std::set<FieldRef> getWildcardMultikeyPathSetHelper(OperationContext* opC
switch (checker.checkKey(entry->key, &seekPoint)) {
case IndexBoundsChecker::VALID:
multikeyPaths.emplace(extractMultikeyPathFromIndexKey(*entry));
multikeyPaths->emplace(extractMultikeyPathFromIndexKey(*entry));
entry = cursor->next(ru);
break;
@ -219,6 +218,52 @@ static std::set<FieldRef> getWildcardMultikeyPathSetHelper(OperationContext* opC
MONGO_UNREACHABLE;
}
}
}
/**
* Retrieves from the wildcard index the set of multikey path metadata keys bounded by
* 'indexBounds'. Returns the set of multikey paths represented by the keys.
*/
static std::set<FieldRef> getWildcardMultikeyPathSetHelper(OperationContext* opCtx,
const IndexCatalogEntry* index,
const IndexBounds& indexBounds,
MultikeyMetadataAccessStats* stats) {
const WildcardAccessMethod* wam =
checked_cast<const WildcardAccessMethod*>(index->accessMethod());
stats->numSeeks = 0;
stats->keysExamined = 0;
const auto keyPattern = buildIndexBoundsKeyPattern(index->descriptor()->keyPattern());
auto& parentRu = *shard_role_details::getRecoveryUnit(opCtx);
std::set<FieldRef> multikeyPaths{};
// Always read from the parent transaction's RU first. This covers the fallback case
// where the side transaction was abandoned (because the index was created in the same
// transaction and is not visible to the side transaction's snapshot). In that case,
// metadata keys were written directly in the parent transaction and are only visible
// through the parent's RU — the fresh snapshot below cannot see uncommitted parent writes.
scanWildcardMetadataKeys(opCtx, wam, parentRu, indexBounds, keyPattern, &multikeyPaths, stats);
// In active multi-document transactions, also read from a fresh snapshot to see
// side-committed metadata keys — but only if a side transaction actually committed
// wildcard metadata keys during this transaction. The parent transaction's WT snapshot
// predates the side transaction's commit, so it can't see those keys. The union of both
// scans covers both code paths:
// - Normal case (side txn succeeded): fresh RU sees the side-committed keys.
// - Fallback case (side txn abandoned): parent RU sees its own uncommitted writes.
// Safe because multikey is monotonic — the union is always correct.
if (opCtx->inMultiDocumentTransaction() && parentRu.isActive() &&
opCtx->hasSideCommittedWildcardKeys()) {
// Cached on opCtx so a query touching multiple wildcard indexes (e.g. $or over two
// wildcard indexes) reads them through one consistent snapshot. The cache is reset
// by setHasSideCommittedWildcardKeys() so the next read after a new side commit
// opens a fresh snapshot that sees the just-committed keys (RYOW).
auto& freshRu = opCtx->getOrCreateWildcardMultikeyReadRecoveryUnit();
scanWildcardMetadataKeys(
opCtx, wam, freshRu, indexBounds, keyPattern, &multikeyPaths, stats);
}
return multikeyPaths;
}

View File

@ -859,9 +859,11 @@ mongo_cc_library(
"//src/mongo/db:dbhelpers",
"//src/mongo/db:import_collection_oplog_entry",
"//src/mongo/db:multitenancy",
"//src/mongo/db:record_id_helpers",
"//src/mongo/db:server_base",
"//src/mongo/db/collection_crud",
"//src/mongo/db/commands:txn_cmd_request",
"//src/mongo/db/index:index_access_method",
"//src/mongo/db/index_builds:index_build_oplog_entry",
"//src/mongo/db/index_builds:index_builds_coordinator",
"//src/mongo/db/index_builds/primary_driven:primary_driven_index_builds",
@ -883,6 +885,7 @@ mongo_cc_library(
"//src/mongo/db/shard_role/ddl:index_commands_idl",
"//src/mongo/db/shard_role/lock_manager:exception_util",
"//src/mongo/db/shard_role/shard_catalog:catalog_helpers",
"//src/mongo/db/shard_role/shard_catalog:set_multikey_metadata_oplog_helpers",
"//src/mongo/db/stats:counters",
"//src/mongo/db/stats:server_read_concern_write_concern_metrics",
"//src/mongo/db/storage:storage_engine_direct_crud",

View File

@ -49,11 +49,14 @@
#include "mongo/db/dbdirectclient.h"
#include "mongo/db/dbhelpers.h"
#include "mongo/db/import_collection_oplog_entry_gen.h"
#include "mongo/db/index/index_access_method.h"
#include "mongo/db/index/index_constants.h"
#include "mongo/db/index/wildcard_access_method.h"
#include "mongo/db/index_builds/index_build_oplog_entry.h"
#include "mongo/db/index_builds/index_builds_coordinator.h"
#include "mongo/db/index_builds/index_builds_manager.h"
#include "mongo/db/index_builds/primary_driven/util.h"
#include "mongo/db/index_names.h"
#include "mongo/db/namespace_string.h"
#include "mongo/db/namespace_string_util.h"
#include "mongo/db/op_observer/batched_write_context.h"
@ -68,6 +71,7 @@
#include "mongo/db/query/write_ops/update_request.h"
#include "mongo/db/query/write_ops/update_result.h"
#include "mongo/db/query/write_ops/write_ops_parsers.h"
#include "mongo/db/record_id_helpers.h"
#include "mongo/db/repl/always_allow_non_local_writes.h"
#include "mongo/db/repl/apply_ops.h"
#include "mongo/db/repl/container_oplog_entry_gen.h"
@ -117,6 +121,7 @@
#include "mongo/db/shard_role/shard_catalog/index_catalog.h"
#include "mongo/db/shard_role/shard_catalog/index_descriptor.h"
#include "mongo/db/shard_role/shard_catalog/rename_collection.h"
#include "mongo/db/shard_role/shard_catalog/set_multikey_metadata_oplog_helpers.h"
#include "mongo/db/shard_role/shard_catalog/uncommitted_catalog_updates.h"
#include "mongo/db/shard_role/shard_role.h"
#include "mongo/db/shard_role/transaction_resources.h"
@ -125,10 +130,12 @@
#include "mongo/db/stats/counters.h"
#include "mongo/db/stats/server_write_concern_metrics.h"
#include "mongo/db/storage/ident.h"
#include "mongo/db/storage/key_string/key_string.h"
#include "mongo/db/storage/kv/kv_engine.h"
#include "mongo/db/storage/oplog_truncate_marker_parameters_gen.h"
#include "mongo/db/storage/record_data.h"
#include "mongo/db/storage/recovery_unit.h"
#include "mongo/db/storage/sorted_data_interface.h"
#include "mongo/db/storage/storage_engine.h"
#include "mongo/db/storage/storage_engine_direct_crud.h"
#include "mongo/db/storage/storage_options.h"
@ -1428,7 +1435,7 @@ const StringMap<ApplyOpMetadata> kOpsMap = {
const auto setMkEntry = SetMultikeyMetadataOplogEntry::parse(cmd);
const auto idxName = setMkEntry.getIdxName();
const auto paths = uassertStatusOK(multikey_paths::parse(setMkEntry.getPaths()));
const auto pathsObj = setMkEntry.getPaths();
// Ignore setMultikeyMetadata when in initial sync, to avoid having to deal with
// idempotency. This is acceptable because by design, the resynced node is not expected to
@ -1463,8 +1470,30 @@ const StringMap<ApplyOpMetadata> kOpsMap = {
<< "Failed to set multikey paths due to missing index: " << idxName,
idxEntry);
// Determine if this is a wildcard index from the catalog, then either regenerate
// wildcard metadata KeyStrings or parse regular multikey paths.
const bool isWildcard =
idxEntry->descriptor()->getAccessMethodName() == IndexNames::WILDCARD;
KeyStringSet metadataKeys;
MultikeyPaths multikeyPaths;
if (isWildcard) {
const auto* sdi =
idxEntry->accessMethod()->asSortedData()->getSortedDataInterface();
metadataKeys =
set_multikey_metadata_oplog_helpers::regenerateMetadataKeysFromFieldPaths(
pathsObj,
sdi->getKeyStringVersion(),
sdi->getOrdering(),
sdi->rsKeyFormat(),
idxEntry->descriptor()->keyPattern());
} else {
multikeyPaths = uassertStatusOK(multikey_paths::parse(pathsObj));
}
WriteUnitOfWork wuow(opCtx);
idxEntry->setMultikeyForApplyOps(opCtx, coll.getCollectionPtr(), paths);
idxEntry->setMultikeyForApplyOps(
opCtx, coll.getCollectionPtr(), metadataKeys, multikeyPaths);
wuow.commit();
});
return Status::OK();

View File

@ -83,6 +83,7 @@
#include "mongo/db/repl/replication_process.h"
#include "mongo/db/repl/replication_recovery.h"
#include "mongo/db/repl/replication_recovery_mock.h"
#include "mongo/db/repl/set_multikey_metadata_oplog_entry_gen.h"
#include "mongo/db/repl/storage_interface.h"
#include "mongo/db/repl/storage_interface_impl.h"
#include "mongo/db/repl/timestamp_block.h"
@ -111,6 +112,7 @@
#include "mongo/db/shard_role/shard_catalog/index_catalog.h"
#include "mongo/db/shard_role/shard_catalog/index_catalog_entry.h"
#include "mongo/db/shard_role/shard_catalog/index_descriptor.h"
#include "mongo/db/shard_role/shard_catalog/set_multikey_metadata_oplog_helpers.h"
#include "mongo/db/shard_role/shard_role.h"
#include "mongo/db/shard_role/transaction_resources.h"
#include "mongo/db/storage/damage_vector.h"
@ -1780,6 +1782,612 @@ TEST_F(StorageTimestampTest, PrimarySetsMultikeyInsideMultiDocumentTransaction)
assertMultikeyPaths(_opCtx, collPtr, indexName, _nullTs, true /* shouldBeMultikey */, {{0}});
}
TEST_F(StorageTimestampTest, PrimarySetsWildcardMultikeyInsideMultiDocumentTransaction) {
auto service = _opCtx->getServiceContext();
auto sessionCatalog = SessionCatalog::get(service);
sessionCatalog->reset_forTest();
auto mongoDSessionCatalog = MongoDSessionCatalog::get(_opCtx);
mongoDSessionCatalog->onStepUp(_opCtx);
NamespaceString nss = NamespaceString::createNamespaceString_forTest(
"unittests.PrimarySetsWildcardMultikeyInsideMultiDocumentTransaction");
create(nss);
auto indexName = "wildcard";
auto indexSpec = BSON("name" << indexName << "ns" << nss.ns_forTest() << "key"
<< BSON("$**" << 1) << "v" << static_cast<int>(kIndexVersion));
auto doc = BSON("_id" << 1 << "a" << BSON_ARRAY(1 << 2));
ASSERT_OK(createIndexFromSpec(_opCtx, _clock, nss.ns_forTest(), indexSpec));
const auto currentTime = _clock->getTime();
const auto presentTs = currentTime.clusterTime().asTimestamp();
auto storageEngine = cc().getServiceContext()->getStorageEngine();
storageEngine->getSnapshotManager()->setLastApplied(presentTs);
const auto beforeTxnTime = _clock->tickClusterTime(1);
auto beforeTxnTs = beforeTxnTime.asTimestamp();
const auto sessionId = makeLogicalSessionIdForTest();
_opCtx->setLogicalSessionId(sessionId);
_opCtx->setTxnNumber(1);
_opCtx->setInMultiDocumentTransaction();
auto ocs = mongoDSessionCatalog->checkOutSession(_opCtx);
auto txnParticipant = TransactionParticipant::get(_opCtx);
ASSERT(txnParticipant);
txnParticipant.beginOrContinue(_opCtx,
{*_opCtx->getTxnNumber()},
false /* autocommit */,
TransactionParticipant::TransactionActions::kStart);
txnParticipant.unstashTransactionResources(_opCtx, "insert");
{
auto collAcq = acquireCollection(
_opCtx,
CollectionAcquisitionRequest::fromOpCtx(_opCtx, nss, AcquisitionPrerequisites::kWrite),
MODE_IX);
insertDocument(collAcq.getCollectionPtr(), InsertStatement(doc));
}
txnParticipant.commitUnpreparedTransaction(_opCtx);
txnParticipant.stashTransactionResources(_opCtx);
const auto collAcq = acquireCollForRead(_opCtx, nss);
const auto& collPtr = collAcq.getCollectionPtr();
// Verify the document was committed.
assertDocumentAtTimestamp(collPtr, presentTs, BSONObj());
assertDocumentAtTimestamp(collPtr, _nullTs, doc);
// Verify the catalog multikey flag is set at the latest timestamp.
assertMultikeyPaths(_opCtx, collPtr, indexName, presentTs, false /* shouldBeMultikey */, {});
assertMultikeyPaths(_opCtx, collPtr, indexName, _nullTs, true /* shouldBeMultikey */, {});
// Verify the wildcard metadata keys are present at the latest timestamp.
// The field "a" is an array, so it should be recorded as a multikey path.
auto entry = collPtr->getIndexCatalog()->findIndexByName(_opCtx, indexName);
ASSERT(entry);
stdx::unordered_set<std::string> fieldSet = {"a"};
{
MultikeyMetadataAccessStats stats;
auto paths = getWildcardMultikeyPathSet(_opCtx, entry, fieldSet, &stats);
ASSERT_EQUALS(1, paths.size());
ASSERT_EQUALS("a", paths.begin()->dottedField());
}
{
// Before the transaction started, no metadata keys should be visible.
OneOffRead oor(_opCtx, beforeTxnTs);
MultikeyMetadataAccessStats stats;
auto paths = getWildcardMultikeyPathSet(_opCtx, entry, fieldSet, &stats);
ASSERT_EQUALS(0, paths.size());
}
// Before the transaction, the catalog multikey flag should not be set.
assertMultikeyPaths(_opCtx, collPtr, indexName, beforeTxnTs, false /* shouldBeMultikey */, {});
}
TEST_F(StorageTimestampTest,
PrimarySetsWildcardMultikeyInsideMultiDocumentTransactionMultiplePaths) {
auto service = _opCtx->getServiceContext();
auto sessionCatalog = SessionCatalog::get(service);
sessionCatalog->reset_forTest();
auto mongoDSessionCatalog = MongoDSessionCatalog::get(_opCtx);
mongoDSessionCatalog->onStepUp(_opCtx);
NamespaceString nss = NamespaceString::createNamespaceString_forTest(
"unittests.PrimarySetsWildcardMultikeyInsideMultiDocTxnMultiplePaths");
create(nss);
auto indexName = "wildcard";
auto indexSpec = BSON("name" << indexName << "ns" << nss.ns_forTest() << "key"
<< BSON("$**" << 1) << "v" << static_cast<int>(kIndexVersion));
ASSERT_OK(createIndexFromSpec(_opCtx, _clock, nss.ns_forTest(), indexSpec));
const auto currentTime = _clock->getTime();
const auto presentTs = currentTime.clusterTime().asTimestamp();
auto storageEngine = cc().getServiceContext()->getStorageEngine();
storageEngine->getSnapshotManager()->setLastApplied(presentTs);
const auto beforeTxnTime = _clock->tickClusterTime(1);
auto beforeTxnTs = beforeTxnTime.asTimestamp();
const auto sessionId = makeLogicalSessionIdForTest();
_opCtx->setLogicalSessionId(sessionId);
_opCtx->setTxnNumber(1);
_opCtx->setInMultiDocumentTransaction();
auto ocs = mongoDSessionCatalog->checkOutSession(_opCtx);
auto txnParticipant = TransactionParticipant::get(_opCtx);
ASSERT(txnParticipant);
txnParticipant.beginOrContinue(_opCtx,
{*_opCtx->getTxnNumber()},
false /* autocommit */,
TransactionParticipant::TransactionActions::kStart);
txnParticipant.unstashTransactionResources(_opCtx, "insert");
{
auto collAcq = acquireCollection(
_opCtx,
CollectionAcquisitionRequest::fromOpCtx(_opCtx, nss, AcquisitionPrerequisites::kWrite),
MODE_IX);
// Insert documents that make multiple paths multikey.
insertDocument(collAcq.getCollectionPtr(),
InsertStatement(BSON("_id" << 1 << "a" << BSON_ARRAY(1 << 2))));
insertDocument(collAcq.getCollectionPtr(),
InsertStatement(BSON("_id" << 2 << "b" << BSON_ARRAY(3 << 4))));
insertDocument(collAcq.getCollectionPtr(),
InsertStatement(BSON("_id" << 3 << "c" << BSON_ARRAY(5 << 6))));
}
txnParticipant.commitUnpreparedTransaction(_opCtx);
txnParticipant.stashTransactionResources(_opCtx);
// Verify all three multikey paths are present in the wildcard index.
const auto collAcq = acquireCollForRead(_opCtx, nss);
const auto& collPtr = collAcq.getCollectionPtr();
auto entry = collPtr->getIndexCatalog()->findIndexByName(_opCtx, indexName);
ASSERT(entry);
stdx::unordered_set<std::string> fieldSet = {"a", "b", "c"};
{
// Before the transaction started, no metadata keys should be visible.
OneOffRead oor(_opCtx, beforeTxnTs);
MultikeyMetadataAccessStats stats;
auto paths = getWildcardMultikeyPathSet(_opCtx, entry, fieldSet, &stats);
ASSERT_EQUALS(0, paths.size());
}
{
MultikeyMetadataAccessStats stats;
auto paths = getWildcardMultikeyPathSet(_opCtx, entry, fieldSet, &stats);
ASSERT_EQUALS(3, paths.size());
std::set<std::string> pathStrings;
for (const auto& p : paths) {
pathStrings.insert(std::string(p.dottedField()));
}
ASSERT(pathStrings.count("a"));
ASSERT(pathStrings.count("b"));
ASSERT(pathStrings.count("c"));
}
}
TEST_F(StorageTimestampTest, PrimarySetsWildcardMultikeyInsideMultiDocumentTransactionPrepared) {
auto service = _opCtx->getServiceContext();
auto sessionCatalog = SessionCatalog::get(service);
sessionCatalog->reset_forTest();
auto mongoDSessionCatalog = MongoDSessionCatalog::get(_opCtx);
mongoDSessionCatalog->onStepUp(_opCtx);
NamespaceString nss = NamespaceString::createNamespaceString_forTest(
"unittests.PrimarySetsWildcardMultikeyInsideMultiDocTxnPrepared");
create(nss);
auto indexName = "wildcard";
auto indexSpec = BSON("name" << indexName << "ns" << nss.ns_forTest() << "key"
<< BSON("$**" << 1) << "v" << static_cast<int>(kIndexVersion));
auto doc = BSON("_id" << 1 << "a" << BSON_ARRAY(1 << 2));
ASSERT_OK(createIndexFromSpec(_opCtx, _clock, nss.ns_forTest(), indexSpec));
const auto currentTime = _clock->getTime();
const auto presentTs = currentTime.clusterTime().asTimestamp();
auto storageEngine = cc().getServiceContext()->getStorageEngine();
storageEngine->getSnapshotManager()->setLastApplied(presentTs);
const auto beforeTxnTime = _clock->tickClusterTime(1);
auto beforeTxnTs = beforeTxnTime.asTimestamp();
const auto sessionId = makeLogicalSessionIdForTest();
_opCtx->setLogicalSessionId(sessionId);
_opCtx->setTxnNumber(1);
_opCtx->setInMultiDocumentTransaction();
auto ocs = mongoDSessionCatalog->checkOutSession(_opCtx);
auto txnParticipant = TransactionParticipant::get(_opCtx);
ASSERT(txnParticipant);
txnParticipant.beginOrContinue(_opCtx,
{*_opCtx->getTxnNumber()},
false /* autocommit */,
TransactionParticipant::TransactionActions::kStart);
txnParticipant.unstashTransactionResources(_opCtx, "insert");
{
auto collAcq = acquireCollection(
_opCtx,
CollectionAcquisitionRequest::fromOpCtx(_opCtx, nss, AcquisitionPrerequisites::kWrite),
MODE_IX);
insertDocument(collAcq.getCollectionPtr(), InsertStatement(doc));
}
// Prepare the transaction.
txnParticipant.prepareTransaction(_opCtx, {});
txnParticipant.stashTransactionResources(_opCtx);
// Commit the prepared transaction.
const auto commitTime = _clock->getTime();
const auto commitClusterTime = commitTime.clusterTime();
const auto commitTs = commitClusterTime.addTicks(1).asTimestamp();
txnParticipant.unstashTransactionResources(_opCtx, "commitTransaction");
{
FailPointEnableBlock failPointBlock("skipCommitTxnCheckPrepareMajorityCommitted");
txnParticipant.commitPreparedTransaction(_opCtx, commitTs, {});
}
txnParticipant.stashTransactionResources(_opCtx);
const auto collAcq = acquireCollForRead(_opCtx, nss);
const auto& collPtr = collAcq.getCollectionPtr();
// Verify the document was committed.
assertDocumentAtTimestamp(collPtr, _nullTs, doc);
// Verify the catalog multikey flag is set at the latest timestamp.
assertMultikeyPaths(_opCtx, collPtr, indexName, _nullTs, true /* shouldBeMultikey */, {});
// Before the transaction, multikey should not be set.
assertMultikeyPaths(_opCtx, collPtr, indexName, beforeTxnTs, false /* shouldBeMultikey */, {});
// Verify the wildcard metadata keys are present at the latest timestamp.
auto entry = collPtr->getIndexCatalog()->findIndexByName(_opCtx, indexName);
ASSERT(entry);
stdx::unordered_set<std::string> fieldSet = {"a"};
{
MultikeyMetadataAccessStats stats;
auto paths = getWildcardMultikeyPathSet(_opCtx, entry, fieldSet, &stats);
ASSERT_EQUALS(1, paths.size());
ASSERT_EQUALS("a", paths.begin()->dottedField());
}
{
// Before the transaction started, no metadata keys should be visible.
OneOffRead oor(_opCtx, beforeTxnTs);
MultikeyMetadataAccessStats stats;
auto paths = getWildcardMultikeyPathSet(_opCtx, entry, fieldSet, &stats);
ASSERT_EQUALS(0, paths.size());
}
}
TEST_F(StorageTimestampTest, PrimarySetsCompoundWildcardMultikeyInsideMultiDocumentTransaction) {
auto service = _opCtx->getServiceContext();
auto sessionCatalog = SessionCatalog::get(service);
sessionCatalog->reset_forTest();
auto mongoDSessionCatalog = MongoDSessionCatalog::get(_opCtx);
mongoDSessionCatalog->onStepUp(_opCtx);
NamespaceString nss = NamespaceString::createNamespaceString_forTest(
"unittests.PrimarySetsCompoundWildcardMultikeyInsideMultiDocTxn");
create(nss);
auto indexName = "compound_wildcard";
auto indexSpec =
BSON("name" << indexName << "ns" << nss.ns_forTest() << "key"
<< BSON("x" << 1 << "$**" << 1) << "v" << static_cast<int>(kIndexVersion));
auto doc = BSON("_id" << 1 << "x" << 5 << "a" << BSON_ARRAY(1 << 2));
ASSERT_OK(createIndexFromSpec(_opCtx, _clock, nss.ns_forTest(), indexSpec));
const auto currentTime = _clock->getTime();
const auto presentTs = currentTime.clusterTime().asTimestamp();
auto storageEngine = cc().getServiceContext()->getStorageEngine();
storageEngine->getSnapshotManager()->setLastApplied(presentTs);
const auto beforeTxnTime = _clock->tickClusterTime(1);
auto beforeTxnTs = beforeTxnTime.asTimestamp();
const auto sessionId = makeLogicalSessionIdForTest();
_opCtx->setLogicalSessionId(sessionId);
_opCtx->setTxnNumber(1);
_opCtx->setInMultiDocumentTransaction();
auto ocs = mongoDSessionCatalog->checkOutSession(_opCtx);
auto txnParticipant = TransactionParticipant::get(_opCtx);
ASSERT(txnParticipant);
txnParticipant.beginOrContinue(_opCtx,
{*_opCtx->getTxnNumber()},
false /* autocommit */,
TransactionParticipant::TransactionActions::kStart);
txnParticipant.unstashTransactionResources(_opCtx, "insert");
{
auto collAcq = acquireCollection(
_opCtx,
CollectionAcquisitionRequest::fromOpCtx(_opCtx, nss, AcquisitionPrerequisites::kWrite),
MODE_IX);
insertDocument(collAcq.getCollectionPtr(), InsertStatement(doc));
}
txnParticipant.commitUnpreparedTransaction(_opCtx);
txnParticipant.stashTransactionResources(_opCtx);
const auto collAcq = acquireCollForRead(_opCtx, nss);
const auto& collPtr = collAcq.getCollectionPtr();
assertMultikeyPaths(_opCtx, collPtr, indexName, _nullTs, true /* shouldBeMultikey */, {});
// Before the transaction, the catalog multikey flag should not be set.
assertMultikeyPaths(_opCtx, collPtr, indexName, beforeTxnTs, false /* shouldBeMultikey */, {});
auto entry = collPtr->getIndexCatalog()->findIndexByName(_opCtx, indexName);
ASSERT(entry);
stdx::unordered_set<std::string> fieldSet = {"a"};
{
// Before the transaction started, no metadata keys should be visible.
OneOffRead oor(_opCtx, beforeTxnTs);
MultikeyMetadataAccessStats stats;
auto paths = getWildcardMultikeyPathSet(_opCtx, entry, fieldSet, &stats);
ASSERT_EQUALS(0, paths.size());
}
{
MultikeyMetadataAccessStats stats;
auto paths = getWildcardMultikeyPathSet(_opCtx, entry, fieldSet, &stats);
ASSERT_EQUALS(1, paths.size());
ASSERT_EQUALS("a", paths.begin()->dottedField());
}
}
// Builds a setMultikeyMetadata op:'c' oplog entry that mirrors what
// OpObserverImpl::onSetMultikeyMetadata writes on the primary.
namespace {
repl::OplogEntry makeSetMultikeyMetadataOplogEntry(const NamespaceString& nss,
const std::string& indexName,
const BSONObj& paths,
Timestamp ts,
long long term = 1) {
SetMultikeyMetadataOplogEntry objectEntry(nss, indexName, paths);
return repl::OplogEntry(BSON("ts" << ts << "t" << term << "v" << 2 << "op"
<< "c"
<< "ns" << nss.getCommandNS().ns_forTest() << "wall"
<< Date_t() << "o" << objectEntry.toBSON()));
}
} // namespace
TEST_F(StorageTimestampTest, SecondaryAppliesWildcardMultikeyMetadataFromTransaction) {
repl::UnreplicatedWritesBlock uwb(_opCtx);
NamespaceString nss = NamespaceString::createNamespaceString_forTest(
"unittests.SecondaryAppliesWildcardMultikeyMetadataFromTransaction");
ASSERT_OK(createCollection(_opCtx, nss.dbName(), BSON("create" << nss.coll())));
auto indexName = "wildcard";
auto indexSpec = BSON("name" << indexName << "key" << BSON("$**" << 1) << "v"
<< static_cast<int>(kIndexVersion));
ASSERT_OK(createIndexFromSpec(_opCtx, _clock, nss.ns_forTest(), indexSpec));
_coordinatorMock->alwaysAllowWrites(false);
const auto currentTime = _clock->getTime();
const auto presentTs = currentTime.clusterTime().asTimestamp();
cc().getServiceContext()->getStorageEngine()->getSnapshotManager()->setLastApplied(presentTs);
const auto beforeTxnTs = _clock->tickClusterTime(1).asTimestamp();
const auto applyTs = _clock->tickClusterTime(1).asTimestamp();
auto setMkOp = makeSetMultikeyMetadataOplogEntry(
nss, indexName, set_multikey_metadata_oplog_helpers::fieldPathsToBSON({"a"}), applyTs);
std::vector<repl::OplogEntry> ops = {setMkOp};
DoNothingOplogApplierObserver observer;
auto storageInterface = repl::StorageInterface::get(_opCtx);
auto workerPool = repl::makeReplWorkerPool();
repl::OplogApplierImpl oplogApplier(
nullptr,
nullptr,
&observer,
_coordinatorMock,
_consistencyMarkers,
storageInterface,
repl::OplogApplier::Options(repl::OplogApplication::Mode::kSecondary),
workerPool.get());
uassertStatusOK(oplogApplier.applyOplogBatch(_opCtx, ops));
const auto collAcq = acquireCollForRead(_opCtx, nss);
const auto& collPtr = collAcq.getCollectionPtr();
// Catalog flag set after apply, not before.
assertMultikeyPaths(_opCtx, collPtr, indexName, applyTs, true /* shouldBeMultikey */, {});
assertMultikeyPaths(_opCtx, collPtr, indexName, beforeTxnTs, false /* shouldBeMultikey */, {});
auto entry = collPtr->getIndexCatalog()->findIndexByName(_opCtx, indexName);
ASSERT(entry);
stdx::unordered_set<std::string> fieldSet = {"a"};
{
// At the apply timestamp, the regenerated metadata keys are visible.
OneOffRead oor(_opCtx, applyTs);
MultikeyMetadataAccessStats stats;
auto paths = getWildcardMultikeyPathSet(_opCtx, entry, fieldSet, &stats);
ASSERT_EQUALS(1, paths.size());
ASSERT_EQUALS("a", paths.begin()->dottedField());
}
{
// Before the transaction started, no metadata keys should be visible.
OneOffRead oor(_opCtx, beforeTxnTs);
MultikeyMetadataAccessStats stats;
auto paths = getWildcardMultikeyPathSet(_opCtx, entry, fieldSet, &stats);
ASSERT_EQUALS(0, paths.size());
}
}
TEST_F(StorageTimestampTest, SecondaryAppliesWildcardMultikeyMetadataFromTransactionMultiplePaths) {
repl::UnreplicatedWritesBlock uwb(_opCtx);
NamespaceString nss = NamespaceString::createNamespaceString_forTest(
"unittests.SecondaryAppliesWildcardMkMetaFromTxnMultiplePaths");
ASSERT_OK(createCollection(_opCtx, nss.dbName(), BSON("create" << nss.coll())));
auto indexName = "wildcard";
auto indexSpec = BSON("name" << indexName << "key" << BSON("$**" << 1) << "v"
<< static_cast<int>(kIndexVersion));
ASSERT_OK(createIndexFromSpec(_opCtx, _clock, nss.ns_forTest(), indexSpec));
_coordinatorMock->alwaysAllowWrites(false);
const auto currentTime = _clock->getTime();
const auto presentTs = currentTime.clusterTime().asTimestamp();
cc().getServiceContext()->getStorageEngine()->getSnapshotManager()->setLastApplied(presentTs);
const auto beforeTxnTs = _clock->tickClusterTime(1).asTimestamp();
const auto applyTs = _clock->tickClusterTime(1).asTimestamp();
// A single setMultikeyMetadata entry carries all multikey paths from one insert.
auto setMkOp = makeSetMultikeyMetadataOplogEntry(
nss,
indexName,
set_multikey_metadata_oplog_helpers::fieldPathsToBSON({"a", "b", "c"}),
applyTs);
DoNothingOplogApplierObserver observer;
auto storageInterface = repl::StorageInterface::get(_opCtx);
auto workerPool = repl::makeReplWorkerPool();
repl::OplogApplierImpl oplogApplier(
nullptr,
nullptr,
&observer,
_coordinatorMock,
_consistencyMarkers,
storageInterface,
repl::OplogApplier::Options(repl::OplogApplication::Mode::kSecondary),
workerPool.get());
uassertStatusOK(oplogApplier.applyOplogBatch(_opCtx, {setMkOp}));
const auto collAcq = acquireCollForRead(_opCtx, nss);
const auto& collPtr = collAcq.getCollectionPtr();
auto entry = collPtr->getIndexCatalog()->findIndexByName(_opCtx, indexName);
ASSERT(entry);
stdx::unordered_set<std::string> fieldSet = {"a", "b", "c"};
{
// At the apply timestamp, all three regenerated metadata keys are visible.
OneOffRead oor(_opCtx, applyTs);
MultikeyMetadataAccessStats stats;
auto paths = getWildcardMultikeyPathSet(_opCtx, entry, fieldSet, &stats);
ASSERT_EQUALS(3, paths.size());
std::set<std::string> pathStrings;
for (const auto& p : paths) {
pathStrings.insert(std::string(p.dottedField()));
}
ASSERT(pathStrings.count("a"));
ASSERT(pathStrings.count("b"));
ASSERT(pathStrings.count("c"));
}
{
// Before the transaction started, no metadata keys should be visible.
OneOffRead oor(_opCtx, beforeTxnTs);
MultikeyMetadataAccessStats stats;
auto paths = getWildcardMultikeyPathSet(_opCtx, entry, fieldSet, &stats);
ASSERT_EQUALS(0, paths.size());
}
}
TEST_F(StorageTimestampTest, SecondaryAppliesCompoundWildcardMultikeyMetadataFromTransaction) {
repl::UnreplicatedWritesBlock uwb(_opCtx);
NamespaceString nss = NamespaceString::createNamespaceString_forTest(
"unittests.SecondaryAppliesCompoundWildcardMkMetaFromTxn");
ASSERT_OK(createCollection(_opCtx, nss.dbName(), BSON("create" << nss.coll())));
auto indexName = "compound_wildcard";
auto indexSpec = BSON("name" << indexName << "key" << BSON("x" << 1 << "$**" << 1) << "v"
<< static_cast<int>(kIndexVersion));
ASSERT_OK(createIndexFromSpec(_opCtx, _clock, nss.ns_forTest(), indexSpec));
_coordinatorMock->alwaysAllowWrites(false);
const auto currentTime = _clock->getTime();
const auto presentTs = currentTime.clusterTime().asTimestamp();
cc().getServiceContext()->getStorageEngine()->getSnapshotManager()->setLastApplied(presentTs);
const auto beforeTxnTs = _clock->tickClusterTime(1).asTimestamp();
const auto applyTs = _clock->tickClusterTime(1).asTimestamp();
auto setMkOp = makeSetMultikeyMetadataOplogEntry(
nss, indexName, set_multikey_metadata_oplog_helpers::fieldPathsToBSON({"a"}), applyTs);
std::vector<repl::OplogEntry> ops = {setMkOp};
DoNothingOplogApplierObserver observer;
auto storageInterface = repl::StorageInterface::get(_opCtx);
auto workerPool = repl::makeReplWorkerPool();
repl::OplogApplierImpl oplogApplier(
nullptr,
nullptr,
&observer,
_coordinatorMock,
_consistencyMarkers,
storageInterface,
repl::OplogApplier::Options(repl::OplogApplication::Mode::kSecondary),
workerPool.get());
uassertStatusOK(oplogApplier.applyOplogBatch(_opCtx, ops));
const auto collAcq = acquireCollForRead(_opCtx, nss);
const auto& collPtr = collAcq.getCollectionPtr();
assertMultikeyPaths(_opCtx, collPtr, indexName, applyTs, true /* shouldBeMultikey */, {});
assertMultikeyPaths(_opCtx, collPtr, indexName, beforeTxnTs, false /* shouldBeMultikey */, {});
auto entry = collPtr->getIndexCatalog()->findIndexByName(_opCtx, indexName);
ASSERT(entry);
stdx::unordered_set<std::string> fieldSet = {"a"};
{
// At the apply timestamp, the regenerated metadata key is visible.
OneOffRead oor(_opCtx, applyTs);
MultikeyMetadataAccessStats stats;
auto paths = getWildcardMultikeyPathSet(_opCtx, entry, fieldSet, &stats);
ASSERT_EQUALS(1, paths.size());
ASSERT_EQUALS("a", paths.begin()->dottedField());
}
}
TEST_F(StorageTimestampTest, SecondaryAppliesRegularMultikeyMetadataFromTransaction) {
repl::UnreplicatedWritesBlock uwb(_opCtx);
NamespaceString nss = NamespaceString::createNamespaceString_forTest(
"unittests.SecondaryAppliesRegularMkMetaFromTxn");
ASSERT_OK(createCollection(_opCtx, nss.dbName(), BSON("create" << nss.coll())));
auto indexName = "a_1";
auto keyPattern = BSON("a" << 1);
auto indexSpec =
BSON("name" << indexName << "key" << keyPattern << "v" << static_cast<int>(kIndexVersion));
ASSERT_OK(createIndexFromSpec(_opCtx, _clock, nss.ns_forTest(), indexSpec));
_coordinatorMock->alwaysAllowWrites(false);
const auto currentTime = _clock->getTime();
const auto presentTs = currentTime.clusterTime().asTimestamp();
cc().getServiceContext()->getStorageEngine()->getSnapshotManager()->setLastApplied(presentTs);
const auto beforeTxnTs = _clock->tickClusterTime(1).asTimestamp();
const auto applyTs = _clock->tickClusterTime(1).asTimestamp();
// Regular index uses multikeyPaths bitset (per-field array of multikey component indexes)
// instead of wildcard's field-path list.
auto setMkOp = makeSetMultikeyMetadataOplogEntry(
nss, indexName, multikey_paths::serialize(keyPattern, {{0}}), applyTs);
std::vector<repl::OplogEntry> ops = {setMkOp};
DoNothingOplogApplierObserver observer;
auto storageInterface = repl::StorageInterface::get(_opCtx);
auto workerPool = repl::makeReplWorkerPool();
repl::OplogApplierImpl oplogApplier(
nullptr,
nullptr,
&observer,
_coordinatorMock,
_consistencyMarkers,
storageInterface,
repl::OplogApplier::Options(repl::OplogApplication::Mode::kSecondary),
workerPool.get());
uassertStatusOK(oplogApplier.applyOplogBatch(_opCtx, ops));
const auto collAcq = acquireCollForRead(_opCtx, nss);
const auto& collPtr = collAcq.getCollectionPtr();
// Regular index: catalog flag set + multikey path component is recorded.
assertMultikeyPaths(_opCtx, collPtr, indexName, applyTs, true /* shouldBeMultikey */, {{0}});
assertMultikeyPaths(
_opCtx, collPtr, indexName, beforeTxnTs, false /* shouldBeMultikey */, {{}});
}
TEST_F(StorageTimestampTest, InitializeMinValid) {
NamespaceString nss = NamespaceString::kDefaultMinValidNamespace;
create(nss);

View File

@ -327,6 +327,18 @@ mongo_cc_library(
],
)
mongo_cc_library(
name = "set_multikey_metadata_oplog_helpers",
srcs = [
"set_multikey_metadata_oplog_helpers.cpp",
],
deps = [
"//src/mongo:base",
"//src/mongo/db/index:index_access_method",
"//src/mongo/db/storage/key_string",
],
)
mongo_cc_library(
name = "catalog_impl",
srcs = [
@ -345,6 +357,7 @@ mongo_cc_library(
"document_validation",
"durable_catalog",
"index_catalog",
"set_multikey_metadata_oplog_helpers",
"//src/mongo/db:audit",
"//src/mongo/db:collection_index_usage_tracker",
"//src/mongo/db:dbcommands_idl",
@ -474,6 +487,7 @@ mongo_cc_unit_test(
"index_catalog_impl_test.cpp",
"index_signature_test.cpp",
"rename_collection_test.cpp",
"set_multikey_metadata_oplog_helpers_test.cpp",
"shard_catalog_history_cleanup_test.cpp",
],
tags = ["mongo_unittest_eighth_group"],

View File

@ -138,6 +138,7 @@ public:
virtual void setMultikeyForApplyOps(OperationContext* opCtx,
const CollectionPtr& coll,
const KeyStringSet& multikeyMetadataKeys,
const MultikeyPaths& multikeyPaths) const = 0;
/**

View File

@ -61,6 +61,7 @@
#include "mongo/db/shard_role/shard_catalog/durable_catalog.h"
#include "mongo/db/shard_role/shard_catalog/index_catalog_entry_helpers.h"
#include "mongo/db/shard_role/shard_catalog/index_descriptor.h"
#include "mongo/db/shard_role/shard_catalog/set_multikey_metadata_oplog_helpers.h"
#include "mongo/db/shard_role/transaction_resources.h"
#include "mongo/db/storage/mdb_catalog.h"
#include "mongo/db/storage/recovery_unit.h"
@ -74,6 +75,7 @@
#include "mongo/util/assert_util.h"
#include "mongo/util/decorable.h"
#include "mongo/util/fail_point.h"
#include "mongo/util/str.h"
#include <algorithm>
#include <cstddef>
@ -281,39 +283,102 @@ void IndexCatalogEntryImpl::setMultikey(OperationContext* opCtx,
return;
}
// If multikeyMetadataKeys is non-empty, we must insert these keys into the index itself. We do
// not have to account for potential dupes, since all metadata keys are indexed against a single
// RecordId. An attempt to write a duplicate key will therefore be ignored.
if (!multikeyMetadataKeys.empty()) {
uassertStatusOK(
accessMethod()->asSortedData()->insertKeys(opCtx,
*shard_role_details::getRecoveryUnit(opCtx),
collection,
this,
multikeyMetadataKeys,
{},
{},
nullptr));
// `multikeyMetadataKeys` is wildcard-only by construction: only wildcard indexes store
// multikey-path information as keys inside the index itself. Other index types track multikey
// paths exclusively in the catalog and always pass an empty set here. Violating this would
// attempt to insert metadata keys into a non-wildcard index, which has no reserved RecordId for
// them and risks index corruption.
invariant(multikeyMetadataKeys.empty() ||
_descriptor.getIndexType() == IndexType::INDEX_WILDCARD,
"Non-empty multikeyMetadataKeys must come from a wildcard index");
const auto insertWildcardMultikeyMetadataKeysIfNotEmpty = [&]() {
if (!multikeyMetadataKeys.empty()) {
// If multikeyMetadataKeys is non-empty, we must insert these keys into the index
// itself. We do not have to account for potential dupes, since all metadata keys are
// indexed against a single RecordId. An attempt to write a duplicate key will therefore
// be ignored.
uassertStatusOK(accessMethod()->asSortedData()->insertKeys(
opCtx,
*shard_role_details::getRecoveryUnit(opCtx),
collection,
this,
multikeyMetadataKeys,
{},
{},
nullptr));
}
};
if (!opCtx->inMultiDocumentTransaction()) {
insertWildcardMultikeyMetadataKeysIfNotEmpty();
_catalogSetMultikey(opCtx, collection, paths);
return;
}
// Mark the catalog as multikey, and record the multikey paths if applicable.
if (opCtx->inMultiDocumentTransaction()) {
auto status = _setMultikeyInMultiDocumentTransaction(opCtx, collection, paths);
// Retry without side transaction.
if (!status.isOK()) {
_catalogSetMultikey(opCtx, collection, paths);
VersionContext::FixedOperationFCVRegion fixedOfcvRegion(opCtx);
const bool replicateMultikeyness = gFeatureFlagReplicateMultikeynessInTransactions.isEnabled(
VersionContext::getDecoration(opCtx));
// Feature off, insert metadata keys in transaction itself. Otherwise the keys are written in
// the side txn.
if (!replicateMultikeyness) {
insertWildcardMultikeyMetadataKeysIfNotEmpty();
}
// replicateMultikeyness==false: writes catalog changes in a side txn, uses no-op entry.
// replicateMultikeyness==true: writes catalog changes + wildcard metadata keys in a side txn,
// uses setMultikeyMetadata entry.
auto status = _setMultikeyInMultiDocumentTransaction(
opCtx, collection, multikeyMetadataKeys, paths, replicateMultikeyness);
if (!status.isOK()) {
// The only expected failure is IndexNotFound — the index was created in the same
// parent transaction and is not yet visible to the side transaction's snapshot.
// Any other error is unexpected and should not be silently swallowed.
tassert(11609105,
str::stream() << "Unexpected error from side transaction for multikey update: "
<< status.toString(),
status.code() == ErrorCodes::IndexNotFound);
// Fallback: the side transaction could not see the index because it was created (but not
// yet committed) in the parent transaction. This happens when a user creates a collection
// with a wildcard index and inserts data in the same multi-document transaction. Since the
// side transaction was abandoned, we insert the metadata keys directly in the parent
// transaction instead.
//
// No setMultikeyMetadata oplog entry is emitted. On the secondary, the MultikeyPathTracker
// collects multikey info during oplog application and flushes it at the batch timestamp
// (firstTimeInBatch). This is still timestamp-consistent because transactions containing
// DDL commands (create, createIndexes) are always processed in their own oplog applier
// batch, so firstTimeInBatch == T_commit for these transactions.
if (replicateMultikeyness) {
insertWildcardMultikeyMetadataKeysIfNotEmpty();
}
} else {
_catalogSetMultikey(opCtx, collection, paths);
}
}
void IndexCatalogEntryImpl::setMultikeyForApplyOps(OperationContext* opCtx,
const CollectionPtr& coll,
const KeyStringSet& multikeyMetadataKeys,
const MultikeyPaths& multikeyPaths) const {
invariant(shard_role_details::getLocker(opCtx)->isCollectionLockedForMode(coll->ns(), MODE_IX));
invariant(shard_role_details::getLocker(opCtx)->inAWriteUnitOfWork());
// Insert wildcard metadata keys into the index if provided.
if (!multikeyMetadataKeys.empty()) {
uassertStatusOK(
accessMethod()->asSortedData()->insertKeys(opCtx,
*shard_role_details::getRecoveryUnit(opCtx),
coll,
this,
multikeyMetadataKeys,
{},
{},
nullptr));
}
opCtx->getClient()->getServiceContext()->getOpObserver()->onSetMultikeyMetadata(
opCtx,
coll->ns(),
@ -348,7 +413,9 @@ void IndexCatalogEntryImpl::forceSetMultikey(OperationContext* const opCtx,
Status IndexCatalogEntryImpl::_setMultikeyInMultiDocumentTransaction(
OperationContext* opCtx,
const CollectionPtr& collection,
const MultikeyPaths& multikeyPaths) const {
const KeyStringSet& multikeyMetadataKeys,
const MultikeyPaths& multikeyPaths,
bool replicateMultikeyness) const {
// If we are inside a multi-document transaction, we write the on-disk multikey update in a
// separate transaction so that it will not generate prepare conflicts with other operations
// that try to set the multikey flag. In general, it should always be safe to update the
@ -359,16 +426,15 @@ Status IndexCatalogEntryImpl::_setMultikeyInMultiDocumentTransaction(
TransactionParticipant::SideTransactionBlock sideTxn(opCtx);
VersionContext::FixedOperationFCVRegion fixedOfcvRegion(opCtx);
const bool replicateMultikeyness = gFeatureFlagReplicateMultikeynessInTransactions.isEnabled(
VersionContext::getDecoration(opCtx));
// If the index is not visible within the side transaction, the index may have been created,
// but not committed, in the parent transaction. Therefore, we abandon the side transaction
// and set the multikey flag in the parent transaction.
if (!durable_catalog::isIndexPresent(
opCtx, _shared->_catalogId, _descriptor.indexName(), MDBCatalog::get(opCtx))) {
return {ErrorCodes::SnapshotUnavailable, "index not visible in side transaction"};
return {ErrorCodes::IndexNotFound,
str::stream() << "index '" << _descriptor.indexName()
<< "' not visible in side transaction — likely created in the "
"same multi-document transaction"};
}
writeConflictRetry(opCtx, "set index multikey", collection->ns(), [&] {
@ -406,7 +472,7 @@ Status IndexCatalogEntryImpl::_setMultikeyInMultiDocumentTransaction(
// During initial sync, we may not have a stable timestamp. Therefore, we need to round
// up the multi-key write timestamp to the max of the three so that we don't write
// behind the oldest/stable timestamp. This code path is only hit during initial
// sync (and recovery if FeatureFlagReplicateMultikeynessInTransactions is enabled)
// sync (and recovery if FeatureFlagReplicateMultikeynessInTransactions is disabled)
// when reconstructing prepared transactions, and so we don't expect the oldest/stable
// timestamp to advance concurrently.
//
@ -431,11 +497,29 @@ Status IndexCatalogEntryImpl::_setMultikeyInMultiDocumentTransaction(
invariant(opCtx->writesAreReplicated());
if (replicateMultikeyness) {
BSONObj pathsObj;
if (!multikeyMetadataKeys.empty()) {
// Wildcard index: serialize field path names extracted from metadata keys.
auto fieldPaths =
set_multikey_metadata_oplog_helpers::extractFieldPathsFromMetadataKeys(
multikeyMetadataKeys, descriptor()->ordering());
pathsObj = set_multikey_metadata_oplog_helpers::fieldPathsToBSON(fieldPaths);
uassertStatusOK(accessMethod()->asSortedData()->insertKeys(
opCtx,
*shard_role_details::getRecoveryUnit(opCtx),
collection,
this,
multikeyMetadataKeys,
{},
{},
nullptr));
} else {
// Regular index: existing multikey paths format.
pathsObj = multikeyPathsToBSON(descriptor()->keyPattern(), multikeyPaths);
}
opCtx->getClient()->getServiceContext()->getOpObserver()->onSetMultikeyMetadata(
opCtx,
collection->ns(),
descriptor()->indexName(),
multikeyPathsToBSON(descriptor()->keyPattern(), multikeyPaths));
opCtx, collection->ns(), descriptor()->indexName(), pathsObj);
} else {
// Write a noop oplog entry to get a properly ordered timestamp.
auto msg = BSON("msg" << "Setting index to multikey"
@ -449,6 +533,11 @@ Status IndexCatalogEntryImpl::_setMultikeyInMultiDocumentTransaction(
_catalogSetMultikey(opCtx, collection, multikeyPaths);
wuow.commit();
if (replicateMultikeyness && !multikeyMetadataKeys.empty()) {
txnParticipant.setHasSideCommittedWildcardKeys();
opCtx->setHasSideCommittedWildcardKeys();
}
});
return Status::OK();
@ -585,8 +674,9 @@ public:
void setMultikeyForApplyOps(OperationContext* opCtx,
const CollectionPtr& coll,
const KeyStringSet& multikeyMetadataKeys,
const MultikeyPaths& multikeyPaths) const final {
return _original->setMultikeyForApplyOps(opCtx, coll, multikeyPaths);
return _original->setMultikeyForApplyOps(opCtx, coll, multikeyMetadataKeys, multikeyPaths);
}
void forceSetMultikey(OperationContext* opCtx,
@ -715,8 +805,9 @@ public:
void setMultikeyForApplyOps(OperationContext* opCtx,
const CollectionPtr& coll,
const KeyStringSet& multikeyMetadataKeys,
const MultikeyPaths& multikeyPaths) const final {
return _original->setMultikeyForApplyOps(opCtx, coll, multikeyPaths);
return _original->setMultikeyForApplyOps(opCtx, coll, multikeyMetadataKeys, multikeyPaths);
}
void forceSetMultikey(OperationContext* opCtx,

View File

@ -167,6 +167,7 @@ public:
*/
void setMultikeyForApplyOps(OperationContext* opCtx,
const CollectionPtr& coll,
const KeyStringSet& multikeyMetadataKeys,
const MultikeyPaths& multikeyPaths) const final;
void forceSetMultikey(OperationContext* opCtx,
@ -199,7 +200,9 @@ private:
*/
Status _setMultikeyInMultiDocumentTransaction(OperationContext* opCtx,
const CollectionPtr& collection,
const MultikeyPaths& multikeyPaths) const;
const KeyStringSet& multikeyMetadataKeys,
const MultikeyPaths& multikeyPaths,
bool replicateMultikeyness) const;
/**
* Retrieves the multikey information associated with this index from '_collection',

View File

@ -120,6 +120,7 @@ public:
void setMultikeyForApplyOps(OperationContext* opCtx,
const CollectionPtr& coll,
const KeyStringSet& multikeyMetadataKeys,
const MultikeyPaths& multikeyPaths) const override {
MONGO_UNREACHABLE;
}

View File

@ -133,6 +133,7 @@ public:
void setMultikeyForApplyOps(OperationContext* opCtx,
const CollectionPtr& coll,
const KeyStringSet& multikeyMetadataKeys,
const MultikeyPaths& multikeyPaths) const final {
MONGO_UNREACHABLE;
}

View File

@ -0,0 +1,139 @@
/**
* Copyright (C) 2026-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#include "mongo/db/shard_role/shard_catalog/set_multikey_metadata_oplog_helpers.h"
#include "mongo/bson/bsonelement.h"
#include "mongo/bson/bsonobjbuilder.h"
#include "mongo/db/index/wildcard_key_generator.h"
#include "mongo/db/index_names.h"
#include "mongo/util/assert_util.h"
#include "mongo/util/shared_buffer_fragment.h"
#include "mongo/util/str.h"
namespace mongo::set_multikey_metadata_oplog_helpers {
std::vector<std::string> extractFieldPathsFromMetadataKeys(const KeyStringSet& metadataKeys,
const Ordering& ordering) {
std::vector<std::string> fieldPaths;
fieldPaths.reserve(metadataKeys.size());
for (const auto& ks : metadataKeys) {
auto bsonKey = key_string::toBson(ks, ordering);
BSONObjIterator iter(bsonKey);
bool foundMarker = false;
while (iter.more()) {
auto elem = iter.next();
if (elem.type() != BSONType::minKey) {
tassert(11609101,
"Expected integer 1 marker in wildcard metadata key",
elem.isNumber() && elem.numberInt() == 1);
tassert(11609102,
"Expected field path string after marker in wildcard metadata key",
iter.more());
auto pathElem = iter.next();
tassert(11609103,
"Expected string type for field path in wildcard metadata key",
pathElem.type() == BSONType::string);
fieldPaths.emplace_back(pathElem.valueStringData());
foundMarker = true;
break;
}
}
tassert(11609104,
"Wildcard metadata key contained no non-MinKey marker; cannot extract field path",
foundMarker);
}
return fieldPaths;
}
BSONObj fieldPathsToBSON(const std::vector<std::string>& fieldPaths) {
BSONArrayBuilder arrBuilder;
for (const auto& path : fieldPaths) {
arrBuilder.append(path);
}
return arrBuilder.arr();
}
/*
* Regeneration for each path string:
* 1. Count 'prefixFields' and 'suffixFields' around the "$**" position in 'keyPattern'.
* E.g. {x: 1, $**: 1, y: 1, z: 1} prefix=1, suffix=2.
* 2. Build a KeyString matching the encoding written by the primary's `WildcardKeyGenerator`:
* MinKey × prefixFields, integer 1, "<path>", MinKey × suffixFields, reservedRecordId.
* Delegates to `WildcardKeyGenerator::makeMultikeyMetadataKey` so the encoding stays
* shared between primary (doc-walk path) and secondary (oplog-apply path).
*/
KeyStringSet regenerateMetadataKeysFromFieldPaths(const BSONObj& pathsObj,
key_string::Version version,
Ordering ordering,
KeyFormat rsKeyFormat,
const BSONObj& keyPattern) {
size_t prefixFields = 0;
size_t suffixFields = 0;
bool foundWildcard = false;
for (const auto& field : keyPattern) {
if (WildcardNames::isWildcardFieldName(field.fieldNameStringData())) {
tassert(11609106,
str::stream() << "keyPattern contains multiple wildcard fields: " << keyPattern,
!foundWildcard);
foundWildcard = true;
} else if (foundWildcard) {
++suffixFields;
} else {
++prefixFields;
}
}
tassert(11609107,
str::stream() << "Expected wildcard ($**) field in keyPattern: " << keyPattern,
foundWildcard);
KeyStringSet result;
SharedBufferFragmentBuilder pooledBufferBuilder(
key_string::HeapBuilder::kHeapAllocatorDefaultBytes);
for (const auto& elem : pathsObj) {
tassert(11609108,
str::stream() << "Expected string elements in wildcard multikey paths array in "
"setMultikeyMetadata oplog entry, got type "
<< typeName(elem.type()),
elem.type() == BSONType::string);
result.insert(WildcardKeyGenerator::makeMultikeyMetadataKey(elem.valueStringData(),
prefixFields,
suffixFields,
version,
ordering,
rsKeyFormat,
pooledBufferBuilder));
}
return result;
}
} // namespace mongo::set_multikey_metadata_oplog_helpers

View File

@ -0,0 +1,83 @@
/**
* Copyright (C) 2026-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#pragma once
#include "mongo/bson/bsonobj.h"
#include "mongo/bson/ordering.h"
#include "mongo/db/storage/key_string/key_string.h"
#include "mongo/db/storage/sorted_data_interface.h"
#include "mongo/util/modules.h"
#include <string>
#include <vector>
MONGO_MOD_PUBLIC;
/**
* Helpers shared between the primary-side serializer (index_catalog_entry_impl.cpp) and the
* secondary-side apply handler (repl/oplog.cpp) for the wildcard variant of the
* `setMultikeyMetadata` oplog entry.
*
* Format of the oplog 'paths' field for wildcard indexes: an array of field path strings,
* e.g. ["a", "b.c"]. The primary extracts the paths from the in-index metadata KeyStrings;
* the secondary rebuilds KeyStrings from those paths using the target index's properties.
*/
namespace mongo::set_multikey_metadata_oplog_helpers {
/**
* Extracts field path strings from wildcard metadata KeyStrings.
* Metadata keys encode: { "": MinKey, ..., "": 1, "": "field.path", "": MinKey, ... }.
*/
std::vector<std::string> extractFieldPathsFromMetadataKeys(const KeyStringSet& metadataKeys,
const Ordering& ordering);
/**
* Serializes wildcard field path strings into a BSON array suitable for the 'paths' field of
* the `setMultikeyMetadata` oplog entry.
*/
BSONObj fieldPathsToBSON(const std::vector<std::string>& fieldPaths);
/**
* Regenerates wildcard multikey metadata KeyStrings from the serialized field paths in a
* `setMultikeyMetadata` oplog entry.
*
* Input format 'pathsObj' is the oplog entry's 'paths' field: a BSON array of field-path
* strings, one per multikey path. Example: `["a", "b.c"]`.
*
* 'version', 'ordering', 'rsKeyFormat' come from the target index's SortedDataInterface so
* regenerated keys are identical to the keys produced by the primary at write time.
*/
KeyStringSet regenerateMetadataKeysFromFieldPaths(const BSONObj& pathsObj,
key_string::Version version,
Ordering ordering,
KeyFormat rsKeyFormat,
const BSONObj& keyPattern);
} // namespace mongo::set_multikey_metadata_oplog_helpers

View File

@ -0,0 +1,144 @@
/**
* Copyright (C) 2026-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#include "mongo/db/shard_role/shard_catalog/set_multikey_metadata_oplog_helpers.h"
#include "mongo/bson/bsonmisc.h"
#include "mongo/bson/bsonobj.h"
#include "mongo/db/index/wildcard_access_method.h"
#include "mongo/db/index/wildcard_key_generator.h"
#include "mongo/db/storage/key_string/key_string.h"
#include "mongo/unittest/unittest.h"
#include "mongo/util/shared_buffer_fragment.h"
namespace mongo {
namespace {
constexpr auto kVersion = key_string::Version::kLatestVersion;
constexpr auto kRsKeyFormat = KeyFormat::Long;
KeyStringSet generateMultikeyMetadataIndexKeys(const BSONObj& keyPattern,
const BSONObj& doc,
KeyFormat rsKeyFormat = kRsKeyFormat) {
const auto ordering = WildcardAccessMethod::makeOrdering(keyPattern);
WildcardKeyGenerator gen(keyPattern, BSONObj(), nullptr, kVersion, ordering, rsKeyFormat);
SharedBufferFragmentBuilder buf(key_string::HeapBuilder::kHeapAllocatorDefaultBytes);
KeyStringSet indexKeys;
KeyStringSet metadataKeys;
gen.generateKeys(buf, doc, &indexKeys, &metadataKeys);
return metadataKeys;
}
void assertKeyStringSetsEqual(const KeyStringSet& a, const KeyStringSet& b, StringData context) {
ASSERT_EQ(a.size(), b.size()) << context;
auto itA = a.begin();
auto itB = b.begin();
for (; itA != a.end(); ++itA, ++itB) {
ASSERT_EQ(0, itA->compare(*itB)) << "KeyString mismatch in: " << context;
}
}
void verifyRoundTrip(const BSONObj& keyPattern,
const KeyStringSet& originalKeys,
KeyFormat rsKeyFormat,
StringData context) {
const auto ordering = WildcardAccessMethod::makeOrdering(keyPattern);
auto fieldPaths = set_multikey_metadata_oplog_helpers::extractFieldPathsFromMetadataKeys(
originalKeys, ordering);
auto pathsObj = set_multikey_metadata_oplog_helpers::fieldPathsToBSON(fieldPaths);
auto regeneratedKeys =
set_multikey_metadata_oplog_helpers::regenerateMetadataKeysFromFieldPaths(
pathsObj, kVersion, ordering, rsKeyFormat, keyPattern);
assertKeyStringSetsEqual(originalKeys, regeneratedKeys, context);
}
TEST(SetMultikeyMetadataOplogHelpersTest, RoundTripSimple) {
const auto keyPattern = BSON("$**" << 1);
KeyStringSet originalKeys;
for (const auto& doc : {BSON("a" << BSON_ARRAY(1 << 2)),
BSON("b" << BSON("c" << BSON_ARRAY(1 << 2))),
BSON("d" << BSON("e" << BSON("f" << BSON_ARRAY(1))))}) {
auto docKeys = generateMultikeyMetadataIndexKeys(keyPattern, doc);
originalKeys.insert(docKeys.begin(), docKeys.end());
}
ASSERT_EQ(3u, originalKeys.size());
verifyRoundTrip(keyPattern, originalKeys, kRsKeyFormat, "simple wildcard");
}
TEST(SetMultikeyMetadataOplogHelpersTest, RoundTripCompoundWildcard) {
const auto keyPattern = BSON("x" << 1 << "$**" << 1 << "y" << 1);
KeyStringSet originalKeys;
for (const auto& doc : {BSON("x" << 10 << "a" << BSON_ARRAY(1 << 2) << "y" << 20),
BSON("x" << 30 << "b" << BSON("c" << BSON_ARRAY(3)) << "y" << 40)}) {
auto docKeys = generateMultikeyMetadataIndexKeys(keyPattern, doc);
originalKeys.insert(docKeys.begin(), docKeys.end());
}
ASSERT_EQ(2u, originalKeys.size());
verifyRoundTrip(keyPattern, originalKeys, kRsKeyFormat, "compound wildcard");
}
TEST(SetMultikeyMetadataOplogHelpersTest, RoundTripDescendingCompoundWildcard) {
const auto keyPattern = BSON("x" << -1 << "$**" << 1 << "y" << -1);
auto originalKeys = generateMultikeyMetadataIndexKeys(
keyPattern, BSON("x" << 1 << "path" << BSON_ARRAY(1 << 2) << "y" << 2));
ASSERT_EQ(1u, originalKeys.size());
verifyRoundTrip(keyPattern, originalKeys, kRsKeyFormat, "descending compound wildcard");
}
TEST(SetMultikeyMetadataOplogHelpersTest, RoundTripEmpty) {
const auto keyPattern = BSON("$**" << 1);
KeyStringSet emptyKeys;
verifyRoundTrip(keyPattern, emptyKeys, kRsKeyFormat, "empty");
}
TEST(SetMultikeyMetadataOplogHelpersTest, RoundTripStringKeyFormat) {
const auto keyPattern = BSON("$**" << 1);
auto originalKeys = generateMultikeyMetadataIndexKeys(
keyPattern, BSON("field" << BSON_ARRAY(1 << 2)), KeyFormat::String);
ASSERT_EQ(1u, originalKeys.size());
verifyRoundTrip(keyPattern, originalKeys, KeyFormat::String, "string key format");
}
TEST(SetMultikeyMetadataOplogHelpersTest, FieldPathsToBSONFormat) {
auto pathsObj = set_multikey_metadata_oplog_helpers::fieldPathsToBSON({"a", "b.c"});
ASSERT_EQ(BSON_ARRAY("a" << "b.c").woCompare(pathsObj), 0);
}
} // namespace
} // namespace mongo

View File

@ -1817,6 +1817,13 @@ void TransactionParticipant::Participant::unstashTransactionResources(
}
_releaseTransactionResourcesToOpCtx(opCtx, maxLockTimeout);
// Propagate session-scoped flag to this opCtx so the query planner can check it without
// depending on TransactionParticipant.
if (p().hasSideCommittedWildcardKeys) {
opCtx->setHasSideCommittedWildcardKeys();
}
std::lock_guard<Client> lg(*opCtx->getClient());
o(lg).transactionMetricsObserver.onUnstash(ServerTransactionsMetrics::get(opCtx),
opCtx->getServiceContext()->getTickSource());
@ -3737,6 +3744,7 @@ void TransactionParticipant::Participant::_resetTransactionStateAndUnlock(
o(*lk).transactionRuntimeContext = boost::none;
p().autoCommit = boost::none;
p().needToWriteAbortEntry = false;
p().hasSideCommittedWildcardKeys = false;
// Swap out txnResourceStash while holding the Client lock, then release any locks held by this
// participant and abort the storage transaction after releasing the lock. The transaction

View File

@ -876,6 +876,19 @@ public:
*/
void setLastWriteOpTime(OperationContext* opCtx, const repl::OpTime& lastWriteOpTime);
/**
* Returns true if a side transaction has committed wildcard multikey metadata keys
* during this transaction. Used by the query planner to decide whether a fresh
* RecoveryUnit scan is needed to see side-committed metadata keys.
*/
bool hasSideCommittedWildcardKeys() const {
return p().hasSideCommittedWildcardKeys;
}
void setHasSideCommittedWildcardKeys() {
p().hasSideCommittedWildcardKeys = true;
}
//
// Methods for use in C++ unit tests, only. Beware: these methods may not adhere to the
// concurrency control rules.
@ -1435,6 +1448,11 @@ private:
// transaction.
bool needToWriteAbortEntry{false};
// Set to true when a side transaction commits wildcard multikey metadata keys into an
// index. This allows the query planner to skip creating a fresh RecoveryUnit to scan
// for side-committed metadata keys when no such keys exist.
bool hasSideCommittedWildcardKeys{false};
// Caches the per-collection size and count deltas across a prepared transaction. Aligns
// with the `sizeMetadata` persisted to the `config.transactions` entry once prepared.
boost::optional<std::vector<MultiOpSizeMetadata>> preparedSizeMetadata;