From 26d3dfa093773f8b76de0d2607bc6840d99ec4b7 Mon Sep 17 00:00:00 2001 From: Yujin Kang Park <36410031+ykangpark@users.noreply.github.com> Date: Tue, 26 May 2026 12:48:04 +0200 Subject: [PATCH] SERVER-116091 Explicitly replicate wildcard multikeyness in txns (#51970) GitOrigin-RevId: 9f85874cd282b9fc3273b01c00b2522de64dc9d8 --- .../concurrency/fsm_workloads/ddl/OWNERS.yml | 3 + .../ddl/multikey_timestamp_consistency.js | 958 ++++++++++++++++++ jstests/core/txns/read_own_multikey_writes.js | 214 +++- .../txn_multikey_timestamping.js | 316 +++++- src/mongo/db/index/wildcard_key_generator.cpp | 57 +- src/mongo/db/index/wildcard_key_generator.h | 13 + src/mongo/db/index_names.h | 2 +- src/mongo/db/operation_context.cpp | 7 + src/mongo/db/operation_context.h | 37 + .../db/query/wildcard_multikey_paths.cpp | 77 +- src/mongo/db/repl/BUILD.bazel | 3 + src/mongo/db/repl/oplog.cpp | 33 +- src/mongo/db/repl/storage_timestamp_test.cpp | 608 +++++++++++ .../db/shard_role/shard_catalog/BUILD.bazel | 14 + .../shard_catalog/index_catalog_entry.h | 1 + .../index_catalog_entry_impl.cpp | 157 ++- .../shard_catalog/index_catalog_entry_impl.h | 5 +- .../index_catalog_entry_impl_test.cpp | 1 + .../shard_catalog/index_catalog_entry_mock.h | 1 + .../set_multikey_metadata_oplog_helpers.cpp | 139 +++ .../set_multikey_metadata_oplog_helpers.h | 83 ++ ...t_multikey_metadata_oplog_helpers_test.cpp | 144 +++ .../transaction/transaction_participant.cpp | 8 + .../db/transaction/transaction_participant.h | 18 + 24 files changed, 2746 insertions(+), 153 deletions(-) create mode 100644 jstests/concurrency/fsm_workloads/ddl/multikey_timestamp_consistency.js create mode 100644 src/mongo/db/shard_role/shard_catalog/set_multikey_metadata_oplog_helpers.cpp create mode 100644 src/mongo/db/shard_role/shard_catalog/set_multikey_metadata_oplog_helpers.h create mode 100644 src/mongo/db/shard_role/shard_catalog/set_multikey_metadata_oplog_helpers_test.cpp diff --git a/jstests/concurrency/fsm_workloads/ddl/OWNERS.yml b/jstests/concurrency/fsm_workloads/ddl/OWNERS.yml index b7aba4fc416..bb6774f2d20 100644 --- a/jstests/concurrency/fsm_workloads/ddl/OWNERS.yml +++ b/jstests/concurrency/fsm_workloads/ddl/OWNERS.yml @@ -3,3 +3,6 @@ filters: - "*": approvers: - 10gen/server-catalog-and-routing-ddl + - "/multikey_timestamp_consistency.js": + approvers: + - 10gen/server-catalog-and-routing-shard-catalog diff --git a/jstests/concurrency/fsm_workloads/ddl/multikey_timestamp_consistency.js b/jstests/concurrency/fsm_workloads/ddl/multikey_timestamp_consistency.js new file mode 100644 index 00000000000..6b45e3f8a3d --- /dev/null +++ b/jstests/concurrency/fsm_workloads/ddl/multikey_timestamp_consistency.js @@ -0,0 +1,958 @@ +/** + * Tests timestamp consistency of multikey metadata across replicas under concurrent load. + * + * Several kinds of multikey-triggering operations run concurrently: + * + * Transactional (expected to be timestamp-consistent): + * 1. createCollAndInsertWildcardMkInTxn: Creates a NEW collection + wildcard index + multikey + * insert in one transaction. Exercises the path where a side txn can't see the index. + * 2. createCollAndInsertRegularMkInTxn: Same but with a regular {a: 1} index. + * 3. createCollAndUpdateWildcardMkInTxn: Creates a NEW collection + wildcard index + scalar + * insert + update-to-array in one transaction. Exercises the update-path multikey trigger + * when the index was created in the same txn (side txn can't see the index). + * 4. createCollAndUpdateRegularMkInTxn: Same but with a regular {a: 1} index. + * 5. insertWildcardMkInTxn: N (1-5) multikey inserts on distinct fields (a shuffled subset of + * INDEXED_FIELDS) into a pre-existing wildcard-indexed collection in a single transaction. + * Exercises both single-statement and multi-statement side-write accumulation across + * distinct multikey paths. + * 6. insertRegularMkInTxn: N (1-5) multikey inserts into a pre-existing regular-indexed + * collection that has a separate {field: 1} index per field in INDEXED_FIELDS. Each + * statement targets a distinct field so each insert exercises a distinct index's multikey + * path. + * 7. updateWildcardMkInTxn: N (1-5) updates of pre-inserted scalar fields to arrays inside a + * transaction against a pre-existing wildcard-indexed collection. Exercises the update-path + * multikey transition across distinct paths. + * 8. updateRegularMkInTxn: N (1-5) updates of pre-inserted scalars to arrays inside a + * transaction against the same multi-index regular collection as state 6, with each + * statement targeting a distinct field. + * + * Transactional states 5-8 randomly prepare the transaction before commit with probability + * `prepareProbability`, exercising both unprepared and prepared commit paths. + * + * TODO (SERVER-110904): Amend comment below and remove 'shouldAssert' parameter to always assert. + * Non-transactional (NOT expected to be timestamp-consistent yet): + * 9. insertWildcardMkDirect: N (1-5) non-transactional multikey inserts on distinct fields into + * a pre-existing wildcard-indexed collection. Adjacent inserts may be batched together + * during oplog application on the secondary, exercising batched multikey-metadata apply. + * Logs inconsistencies but does not assert. + * 10. insertRegularMkDirect: N (1-5) non-transactional multikey inserts into a pre-existing + * regular-indexed collection that has a separate {field: 1} index per field in + * INDEXED_FIELDS, with each insert targeting a distinct field. Adjacent inserts may + * be batched together during oplog application on the secondary. Logs inconsistencies but + * does not assert. + * 11. updateWildcardMkDirect: N (1-5) non-transactional updates of pre-inserted scalar fields + * to arrays against a pre-existing wildcard-indexed collection. Adjacent updates may be + * batched together during oplog application on the secondary. Logs inconsistencies but + * does not assert. + * 12. updateRegularMkDirect: N (1-5) non-transactional updates of pre-inserted scalars to + * arrays against the same multi-index regular collection as state 10. Adjacent updates may + * be batched together during oplog application on the secondary. Logs inconsistencies but + * does not assert. + * 13. dropAndRecreateIndex: Non-transactionally drops a random index (wildcard or one of the + * regular per-field indexes) on a random pool collection and immediately recreates it. + * Stresses the interaction between index lifecycle and concurrent multikey side-writes + * from other threads. Tolerates IndexNotFound (another thread may have dropped first). + * 14. dropAndRecreateColl: Non-transactionally drops a random pool collection and recreates it + * with the full set of wildcard + regular indexes. Stresses catalog churn against + * concurrent multikey side-writes. Other pool-using states tolerate NamespaceNotFound and + * CollectionUUIDMismatch via isCollDropError when their writes race with this state. + * + * 15. insertNoise: Non-transactional insert to an unrelated collection for batch pressure. + * + * After each operation, the thread verifies that the catalog multikey flag (and index-specific + * multikey paths) are consistent between primary and secondary at the operation's timestamp + * and the preceding timestamp. + * + * @tags: [ + * uses_transactions, + * featureFlagReplicateMultikeynessInTransactions, + * requires_replication, + * does_not_support_stepdowns, + * # Suites converting to txns would break our assumption about oplog format of inserts. + * does_not_support_transactions, + * # The workload's internal contention plus a per-test runtime in the tens of minutes exceeds + * # the burn-in budget when layered under simultaneous FSM workloads. + * incompatible_with_concurrency_simultaneous, + * ] + */ + +import {withTxnAndAutoRetry} from "jstests/concurrency/fsm_workload_helpers/auto_retry_transaction.js"; +import {PersistenceProviderUtil} from "jstests/libs/server-rss/persistence_provider_util.js"; + +export const $config = (function () { + const collPrefix = "mk_ts_"; + + function poolCollName(idx) { + return collPrefix + "pool_" + idx; + } + function pickPoolColl() { + return poolCollName(Random.randInt(POOL_SIZE)); + } + + // Returns true if `e` is a transient error caused by a concurrent dropAndRecreateColl on + // the target collection. Callers tolerate it by returning early without verification. + // NamespaceNotFound: a non-txn write hit the post-drop, pre-recreate window. + // CollectionUUIDMismatch: a non-txn write referenced the pre-drop UUID after recreate. + // OperationNotSupportedInTransaction: an in-txn write implicitly created the dropped + // collection, then prepare/commit rejected because distributed txns disallow implicit + // collection creation. + function isCollDropError(e) { + return ( + e.code === ErrorCodes.NamespaceNotFound || + e.code === ErrorCodes.CollectionUUIDMismatch || + e.code === ErrorCodes.OperationNotSupportedInTransaction + ); + } + + // Returns true and best-effort aborts any leftover transaction on the session if `e` is a + // tolerable coll-drop race; returns false otherwise. withTxnAndAutoRetry skips the abort + // path when an error is raised by prepare/commit (treated as a commit error of unknown + // outcome), which leaves the session with an active transaction marker. Aborting here + // guarantees the next state can start a fresh transaction. NoSuchTransaction is expected + // when there was no leftover transaction (e.g. for non-txn states). + function tolerateCollDropError(state, e) { + if (!isCollDropError(e)) return false; + assert.commandWorkedOrFailedWithCode(state.session.abortTransaction_forTesting(), ErrorCodes.NoSuchTransaction); + return true; + } + const IndexType = Object.freeze({REGULAR: "regular", WILDCARD: "wildcard"}); + + // Number of pre-existing collections in the shared pool. Each pool collection has both a + // wildcard {"$**": 1} index and one {field: 1} regular index per field in + // REGULAR_INDEX_FIELDS. States pick a random pool collection on each invocation so threads + // contend on the same multikey metadata, exercising concurrent setMultikey paths. + const POOL_SIZE = 2; + + // Fields exercised by regular-index states. Each gets its own {field: 1} index on every + // pool collection. + const REGULAR_INDEX_FIELDS = ["a", "b", "c", "d", "e"]; + + // Fields exercised by wildcard-index states. Overlap with REGULAR_INDEX_FIELDS on "d" and + // "e" so a single op can trigger multikey transitions on both index types simultaneously. + // The wildcard {"$**": 1} index physically covers any field, so f/g/h hit only the + // wildcard index while a/b/c hit only the regular indexes. + const WILDCARD_FIELDS = ["d", "e", "f", "g", "h"]; + + // --- Shared helpers --- + + function getCatalogMetadata(node, dbName, collName, ts) { + try { + const catalog = node + .getDB("admin") + .aggregate([{$listCatalog: {}}, {$match: {db: dbName, name: collName}}], { + readConcern: {level: "snapshot", atClusterTime: ts}, + }) + .toArray(); + if (catalog.length === 0) return null; + return catalog[0]["md"]["indexes"]; + } catch (e) { + return undefined; + } + } + + // Locates the index entry that covers `fieldName` for the given index type. + // Wildcard: matches the unique index with keyPattern {"$**": 1}. + // Regular: matches a single-field index whose keyPattern is exactly {fieldName: 1}. + function findIndexForField(indexes, indexType, fieldName) { + if (indexType === IndexType.WILDCARD) { + return indexes.find((idx) => idx.spec && idx.spec.key && idx.spec.key["$**"] === 1); + } + return indexes.find((idx) => { + const key = idx.spec && idx.spec.key; + if (!key) return false; + const keys = Object.keys(key); + return keys.length === 1 && key[fieldName] === 1; + }); + } + + function getWildcardMultikeyPaths(node, dbName, collName, ts, hint, fieldName) { + try { + const filter = {}; + filter[fieldName] = 1; + const explain = node.getDB(dbName).runCommand({ + explain: {find: collName, filter: filter, hint: hint}, + readConcern: {level: "snapshot", atClusterTime: ts}, + verbosity: "queryPlanner", + }); + if (!explain.ok) return undefined; + // Walks the explain tree to find the wildcard IXSCAN. Handles both engines: + // - Classic: winningPlan.{stage:..., inputStage:{...}} + // - SBE: winningPlan.queryPlan.{stage:..., inputStage:{...}} + // SBE wraps the QuerySolutionNode tree under a 'queryPlan' field; recurse into it. + function findWcIxscan(plan) { + if (!plan) return null; + if (plan.stage === "IXSCAN" && plan.keyPattern && plan.keyPattern["$_path"]) return plan; + if (plan.queryPlan) { + const r = findWcIxscan(plan.queryPlan); + if (r) return r; + } + if (plan.inputStage) return findWcIxscan(plan.inputStage); + if (plan.inputStages) { + for (const s of plan.inputStages) { + const r = findWcIxscan(s); + if (r) return r; + } + } + return null; + } + const ixscan = findWcIxscan(explain.queryPlanner.winningPlan); + if (!ixscan) return []; + return ixscan.multiKeyPaths && ixscan.multiKeyPaths[fieldName] ? ixscan.multiKeyPaths[fieldName] : []; + } catch (e) { + return undefined; + } + } + + const OPLOG_SCAN_LOST_MSG = + "Skipping multikey consistency check: oplog scan lost capped position (concurrent oplog rotation)"; + + // Returns the applyOps oplog entry whose `o.applyOps` array contains an element matching + // `elemMatch`, or null if the entry is absent (e.g. the txn committed with a no-op update + // because dropAndRecreateColl removed its target doc) or the oplog got truncated + // (CappedPositionLost). Callers handle null by skipping verification. The outer `op: "c"` + // restricts the match to applyOps txn entries and excludes any standalone CRUD entry. + function findTxnOplogEntry(primary, elemMatch) { + try { + return primary.getDB("local").oplog.rs.findOne({ + op: "c", + "o.applyOps": {$elemMatch: elemMatch}, + }); + } catch (e) { + if (e.code === ErrorCodes.CappedPositionLost) { + jsTestLog(OPLOG_SCAN_LOST_MSG + ": " + tojson(e)); + return null; + } + throw e; + } + } + + // Returns the oplog entry matching `match`, or null if the entry is absent (e.g. the op + // was a no-op update because dropAndRecreateColl removed its target doc) or the oplog got + // truncated (CappedPositionLost). Callers handle null by skipping verification. + function findOplogEntry(primary, match) { + try { + return primary.getDB("local").oplog.rs.findOne(match); + } catch (e) { + if (e.code === ErrorCodes.CappedPositionLost) { + jsTestLog(OPLOG_SCAN_LOST_MSG + ": " + tojson(e)); + return null; + } + throw e; + } + } + + // Returns the timestamp of the most recent oplog entry strictly before `ts`, or null if the + // oplog truncated (CappedPositionLost). + function findPrevTimestamp(primary, ts) { + let prev; + try { + prev = primary + .getDB("local") + .oplog.rs.find({ts: {$lt: ts}}) + .sort({ts: -1}) + .limit(1) + .toArray()[0]; + assert(prev, "Could not find a previous oplog entry before ts: " + tojson(ts)); + } catch (e) { + if (e.code === ErrorCodes.CappedPositionLost) { + jsTestLog(OPLOG_SCAN_LOST_MSG + ": " + tojson(e)); + return null; + } + throw e; + } + return prev ? prev.ts : null; + } + + // Checks multikey metadata consistency. If shouldAssert is false, logs mismatches + // instead of asserting (for non-txn paths where consistency is not yet guaranteed). + function verifyMultikeyConsistency( + primary, + secondary, + dbName, + collName, + indexType, + wildcardHint, + fieldNames, + ts1, + ts2, + shouldAssert, + ) { + for (const ts of [ts1, ts2]) { + const pIndexes = getCatalogMetadata(primary, dbName, collName, ts); + const sIndexes = getCatalogMetadata(secondary, dbName, collName, ts); + if (pIndexes === undefined || sIndexes === undefined) continue; + + // Both must agree on collection existence. + if (!pIndexes || !sIndexes) { + if (shouldAssert) { + assert.eq( + !pIndexes, + !sIndexes, + "Catalog entry presence mismatch at ts " + tojson(ts) + " for " + collName, + ); + } else if (!!pIndexes !== !!sIndexes) { + jsTestLog( + "NON-TXN inconsistency (expected): catalog presence mismatch at " + + tojson(ts) + + " for " + + collName, + ); + } + continue; + } + + // Compare per-index multikey flag and paths for each field. + for (const fieldName of fieldNames) { + const pIdx = findIndexForField(pIndexes, indexType, fieldName); + const sIdx = findIndexForField(sIndexes, indexType, fieldName); + // Index may not exist at this ts yet (e.g., prevTs before createIndex). Skip. + if (!pIdx || !sIdx) continue; + + if (pIdx.multikey !== sIdx.multikey) { + if (shouldAssert) { + assert.eq( + pIdx.multikey, + sIdx.multikey, + "Index multikey flag mismatch at ts " + + tojson(ts) + + " for " + + collName + + " field " + + fieldName, + ); + } else { + jsTestLog( + "NON-TXN inconsistency (expected): index flag mismatch at " + + tojson(ts) + + " for " + + collName + + " field " + + fieldName + + ": primary=" + + pIdx.multikey + + ", secondary=" + + sIdx.multikey, + ); + } + continue; + } + + if (indexType === IndexType.REGULAR) { + const pPaths = pIdx.multikeyPaths[fieldName]; + const sPaths = sIdx.multikeyPaths[fieldName]; + if (shouldAssert) { + assert.eq( + pPaths, + sPaths, + "Regular multikeyPaths mismatch at ts " + + tojson(ts) + + " for " + + collName + + " field " + + fieldName, + ); + } else if (bsonWoCompare(pPaths, sPaths) !== 0) { + jsTestLog( + "NON-TXN inconsistency (expected): regular paths mismatch at " + + tojson(ts) + + " for " + + collName + + " field " + + fieldName, + ); + } + } else { + const pPaths = getWildcardMultikeyPaths(primary, dbName, collName, ts, wildcardHint, fieldName); + const sPaths = getWildcardMultikeyPaths(secondary, dbName, collName, ts, wildcardHint, fieldName); + if (pPaths === undefined || sPaths === undefined) continue; + if (shouldAssert) { + assert.sameMembers( + pPaths, + sPaths, + "Wildcard multiKeyPaths mismatch at ts " + + tojson(ts) + + " for " + + collName + + " field " + + fieldName, + ); + } else { + try { + assert.sameMembers(pPaths, sPaths); + } catch (e) { + jsTestLog( + "NON-TXN inconsistency (expected): wildcard paths mismatch at " + + tojson(ts) + + " for " + + collName + + " field " + + fieldName, + ); + } + } + } + } + } + } + + // Returns a shuffled subset of `fields` of size 1..fields.length. + function pickFieldSubset(fields) { + const numOps = 1 + Random.randInt(fields.length); + return Array.shuffle(fields.slice()).slice(0, numOps); + } + + // Returns `count` unique ids of the form `${prefix}${tid}_${insertCount}` and bumps + // `state.insertCount` for each. + function nextIds(state, prefix, count) { + const ids = []; + for (let i = 0; i < count; i++) { + ids.push(prefix + state.tid + "_" + state.insertCount); + state.insertCount++; + } + return ids; + } + + // After a transaction commits, locates the applyOps oplog entry matching `oplogElemMatch`, + // computes the prior timestamp, and asserts multikey consistency between primary and + // secondary at both timestamps for the given fields. + function verifyTxnMultikey(state, db, myCollName, oplogElemMatch, indexType, wildcardHint, fields) { + const primary = db.getMongo(); + const dbName = db.getName(); + const txnEntry = findTxnOplogEntry(primary, oplogElemMatch); + if (!txnEntry) return; + const prevTs = findPrevTimestamp(primary, txnEntry.ts); + if (!prevTs || !state.secondaryConn) return; + + verifyMultikeyConsistency( + primary, + state.secondaryConn, + dbName, + myCollName, + indexType, + wildcardHint, + fields, + txnEntry.ts, + prevTs, + true /* shouldAssert */, + ); + } + + // After a sequence of non-transactional ops, locates the first and last oplog entries + // matching `firstMatch` / `lastMatch`, computes the prior timestamp, and logs (without + // asserting) any multikey inconsistency between primary and secondary. + function verifyDirectMultikey(state, db, myCollName, firstMatch, lastMatch, indexType, wildcardHint, fields) { + const primary = db.getMongo(); + const dbName = db.getName(); + const firstEntry = findOplogEntry(primary, firstMatch); + const lastEntry = findOplogEntry(primary, lastMatch); + if (!firstEntry || !lastEntry) return; + const prevTs = findPrevTimestamp(primary, firstEntry.ts); + if (!prevTs || !state.secondaryConn) return; + + verifyMultikeyConsistency( + primary, + state.secondaryConn, + dbName, + myCollName, + indexType, + wildcardHint, + fields, + lastEntry.ts, + prevTs, + false /* shouldAssert */, + ); + } + + // --- FSM states --- + + let states = { + init: function init(db, collName) { + this.session = db.getMongo().startSession({causalConsistency: false}); + this.sessionDb = this.session.getDatabase(db.getName()); + this.insertCount = 0; + if (this.secondaryHost) { + this.secondaryConn = new Mongo(this.secondaryHost); + this.secondaryConn.setSecondaryOk(); + } + }, + + // --- Transactional states (timestamp-consistent, assert on mismatch) --- + + createCollAndInsertWildcardMkInTxn: function createCollAndInsertWildcardMkInTxn(db, collName) { + const myCollName = collPrefix + "wc_txn_" + this.tid + "_" + this.insertCount; + this.insertCount++; + + this.session.startTransaction({writeConcern: {w: "majority"}}); + try { + assert.commandWorked(this.sessionDb.createCollection(myCollName)); + assert.commandWorked(this.sessionDb[myCollName].createIndex({"$**": 1})); + assert.commandWorked(this.sessionDb[myCollName].insert({a: [1, 2]})); + assert.commandWorked(this.session.commitTransaction_forTesting()); + } catch (e) { + this.session.abortTransaction_forTesting(); + if (e.hasOwnProperty("errorLabels") && e.errorLabels.includes("TransientTransactionError")) return; + throw e; + } + + const dbName = db.getName(); + verifyTxnMultikey(this, db, myCollName, {ns: dbName + "." + myCollName}, IndexType.WILDCARD, {"$**": 1}, [ + "a", + ]); + }, + + createCollAndInsertRegularMkInTxn: function createCollAndInsertRegularMkInTxn(db, collName) { + const myCollName = collPrefix + "reg_txn_" + this.tid + "_" + this.insertCount; + this.insertCount++; + + this.session.startTransaction({writeConcern: {w: "majority"}}); + try { + assert.commandWorked(this.sessionDb.createCollection(myCollName)); + assert.commandWorked(this.sessionDb[myCollName].createIndex({a: 1})); + assert.commandWorked(this.sessionDb[myCollName].insert({a: [1, 2]})); + assert.commandWorked(this.session.commitTransaction_forTesting()); + } catch (e) { + this.session.abortTransaction_forTesting(); + if (e.hasOwnProperty("errorLabels") && e.errorLabels.includes("TransientTransactionError")) return; + throw e; + } + + const dbName = db.getName(); + verifyTxnMultikey(this, db, myCollName, {ns: dbName + "." + myCollName}, IndexType.REGULAR, null, ["a"]); + }, + + createCollAndUpdateWildcardMkInTxn: function createCollAndUpdateWildcardMkInTxn(db, collName) { + const myCollName = collPrefix + "wc_txn_upd_" + this.tid + "_" + this.insertCount; + this.insertCount++; + + this.session.startTransaction({writeConcern: {w: "majority"}}); + try { + assert.commandWorked(this.sessionDb.createCollection(myCollName)); + assert.commandWorked(this.sessionDb[myCollName].createIndex({"$**": 1})); + assert.commandWorked(this.sessionDb[myCollName].insert({_id: 1, a: 1})); + assert.commandWorked(this.sessionDb[myCollName].update({_id: 1}, {$set: {a: [1, 2]}})); + assert.commandWorked(this.session.commitTransaction_forTesting()); + } catch (e) { + this.session.abortTransaction_forTesting(); + if (e.hasOwnProperty("errorLabels") && e.errorLabels.includes("TransientTransactionError")) return; + throw e; + } + + const dbName = db.getName(); + verifyTxnMultikey(this, db, myCollName, {ns: dbName + "." + myCollName}, IndexType.WILDCARD, {"$**": 1}, [ + "a", + ]); + }, + + createCollAndUpdateRegularMkInTxn: function createCollAndUpdateRegularMkInTxn(db, collName) { + const myCollName = collPrefix + "reg_txn_upd_" + this.tid + "_" + this.insertCount; + this.insertCount++; + + this.session.startTransaction({writeConcern: {w: "majority"}}); + try { + assert.commandWorked(this.sessionDb.createCollection(myCollName)); + assert.commandWorked(this.sessionDb[myCollName].createIndex({a: 1})); + assert.commandWorked(this.sessionDb[myCollName].insert({_id: 1, a: 1})); + assert.commandWorked(this.sessionDb[myCollName].update({_id: 1}, {$set: {a: [1, 2]}})); + assert.commandWorked(this.session.commitTransaction_forTesting()); + } catch (e) { + this.session.abortTransaction_forTesting(); + if (e.hasOwnProperty("errorLabels") && e.errorLabels.includes("TransientTransactionError")) return; + throw e; + } + + const dbName = db.getName(); + verifyTxnMultikey(this, db, myCollName, {ns: dbName + "." + myCollName}, IndexType.REGULAR, null, ["a"]); + }, + + insertWildcardMkInTxn: function insertWildcardMkInTxn(db, collName) { + const myCollName = pickPoolColl(); + const fields = pickFieldSubset(WILDCARD_FIELDS); + const markers = nextIds(this, "wc_txn_", fields.length); + + try { + withTxnAndAutoRetry( + this.session, + () => { + for (let i = 0; i < fields.length; i++) { + assert.commandWorked( + this.sessionDb[myCollName].insert({[fields[i]]: [1, 2], _marker: markers[i]}), + ); + } + }, + { + retryOnKilledSession: this.retryOnKilledSession, + prepareProbability: this.prepareProbability, + }, + ); + } catch (e) { + if (tolerateCollDropError(this, e)) return; + throw e; + } + + const fullNs = db.getName() + "." + myCollName; + verifyTxnMultikey( + this, + db, + myCollName, + {"op": "i", "ns": fullNs, "o._marker": markers[0]}, + IndexType.WILDCARD, + {"$**": 1}, + fields, + ); + }, + + insertRegularMkInTxn: function insertRegularMkInTxn(db, collName) { + const myCollName = pickPoolColl(); + const fields = pickFieldSubset(REGULAR_INDEX_FIELDS); + const markers = nextIds(this, "r", fields.length); + + try { + withTxnAndAutoRetry( + this.session, + () => { + for (let i = 0; i < fields.length; i++) { + assert.commandWorked( + this.sessionDb[myCollName].insert({[fields[i]]: [1, 2], _marker: markers[i]}), + ); + } + }, + { + retryOnKilledSession: this.retryOnKilledSession, + prepareProbability: this.prepareProbability, + }, + ); + } catch (e) { + if (tolerateCollDropError(this, e)) return; + throw e; + } + + const fullNs = db.getName() + "." + myCollName; + verifyTxnMultikey( + this, + db, + myCollName, + {"op": "i", "ns": fullNs, "o._marker": markers[0]}, + IndexType.REGULAR, + null, + fields, + ); + }, + + updateWildcardMkInTxn: function updateWildcardMkInTxn(db, collName) { + const myCollName = pickPoolColl(); + const fields = pickFieldSubset(WILDCARD_FIELDS); + const docIds = nextIds(this, "u_wc_", fields.length); + + try { + for (let i = 0; i < fields.length; i++) { + assert.commandWorked(db[myCollName].insert({_id: docIds[i], [fields[i]]: 1})); + } + } catch (e) { + if (tolerateCollDropError(this, e)) return; + throw e; + } + + try { + withTxnAndAutoRetry( + this.session, + () => { + for (let i = 0; i < fields.length; i++) { + assert.commandWorked( + this.sessionDb[myCollName].update({_id: docIds[i]}, {$set: {[fields[i]]: [1, 2]}}), + ); + } + }, + { + retryOnKilledSession: this.retryOnKilledSession, + prepareProbability: this.prepareProbability, + }, + ); + } catch (e) { + if (tolerateCollDropError(this, e)) return; + throw e; + } + + const fullNs = db.getName() + "." + myCollName; + verifyTxnMultikey( + this, + db, + myCollName, + {"op": "u", "ns": fullNs, "o2._id": docIds[0]}, + IndexType.WILDCARD, + {"$**": 1}, + fields, + ); + }, + + updateRegularMkInTxn: function updateRegularMkInTxn(db, collName) { + const myCollName = pickPoolColl(); + const fields = pickFieldSubset(REGULAR_INDEX_FIELDS); + const docIds = nextIds(this, "u_reg_", fields.length); + + try { + for (let i = 0; i < fields.length; i++) { + assert.commandWorked(db[myCollName].insert({_id: docIds[i], [fields[i]]: 1})); + } + } catch (e) { + if (tolerateCollDropError(this, e)) return; + throw e; + } + + try { + withTxnAndAutoRetry( + this.session, + () => { + for (let i = 0; i < fields.length; i++) { + assert.commandWorked( + this.sessionDb[myCollName].update({_id: docIds[i]}, {$set: {[fields[i]]: [1, 2]}}), + ); + } + }, + { + retryOnKilledSession: this.retryOnKilledSession, + prepareProbability: this.prepareProbability, + }, + ); + } catch (e) { + if (tolerateCollDropError(this, e)) return; + throw e; + } + + const fullNs = db.getName() + "." + myCollName; + verifyTxnMultikey( + this, + db, + myCollName, + {"op": "u", "ns": fullNs, "o2._id": docIds[0]}, + IndexType.REGULAR, + null, + fields, + ); + }, + + // --- Non-transactional states (NOT timestamp-consistent, log only) --- + + insertWildcardMkDirect: function insertWildcardMkDirect(db, collName) { + const myCollName = pickPoolColl(); + const fields = pickFieldSubset(WILDCARD_FIELDS); + const markers = nextIds(this, "wc_direct_", fields.length); + + try { + for (let i = 0; i < fields.length; i++) { + assert.commandWorked(db[myCollName].insert({[fields[i]]: [1, 2], _marker: markers[i]})); + } + } catch (e) { + if (tolerateCollDropError(this, e)) return; + throw e; + } + + const fullNs = db.getName() + "." + myCollName; + verifyDirectMultikey( + this, + db, + myCollName, + {"ns": fullNs, "op": "i", "o._marker": markers[0]}, + {"ns": fullNs, "op": "i", "o._marker": markers[fields.length - 1]}, + IndexType.WILDCARD, + {"$**": 1}, + fields, + ); + }, + + insertRegularMkDirect: function insertRegularMkDirect(db, collName) { + const myCollName = pickPoolColl(); + const fields = pickFieldSubset(REGULAR_INDEX_FIELDS); + const markers = nextIds(this, "m", fields.length); + + try { + for (let i = 0; i < fields.length; i++) { + assert.commandWorked(db[myCollName].insert({[fields[i]]: [1, 2], _marker: markers[i]})); + } + } catch (e) { + if (tolerateCollDropError(this, e)) return; + throw e; + } + + const fullNs = db.getName() + "." + myCollName; + verifyDirectMultikey( + this, + db, + myCollName, + {"ns": fullNs, "op": "i", "o._marker": markers[0]}, + {"ns": fullNs, "op": "i", "o._marker": markers[fields.length - 1]}, + IndexType.REGULAR, + null, + fields, + ); + }, + + updateWildcardMkDirect: function updateWildcardMkDirect(db, collName) { + const myCollName = pickPoolColl(); + const fields = pickFieldSubset(WILDCARD_FIELDS); + const docIds = nextIds(this, "u_wc_direct_", fields.length); + + try { + for (let i = 0; i < fields.length; i++) { + assert.commandWorked(db[myCollName].insert({_id: docIds[i], [fields[i]]: 1})); + } + for (let i = 0; i < fields.length; i++) { + assert.commandWorked(db[myCollName].update({_id: docIds[i]}, {$set: {[fields[i]]: [1, 2]}})); + } + } catch (e) { + if (tolerateCollDropError(this, e)) return; + throw e; + } + + const fullNs = db.getName() + "." + myCollName; + verifyDirectMultikey( + this, + db, + myCollName, + {"ns": fullNs, "op": "u", "o2._id": docIds[0]}, + {"ns": fullNs, "op": "u", "o2._id": docIds[fields.length - 1]}, + IndexType.WILDCARD, + {"$**": 1}, + fields, + ); + }, + + updateRegularMkDirect: function updateRegularMkDirect(db, collName) { + const myCollName = pickPoolColl(); + const fields = pickFieldSubset(REGULAR_INDEX_FIELDS); + const docIds = nextIds(this, "u_reg_direct_", fields.length); + + try { + for (let i = 0; i < fields.length; i++) { + assert.commandWorked(db[myCollName].insert({_id: docIds[i], [fields[i]]: 1})); + } + for (let i = 0; i < fields.length; i++) { + assert.commandWorked(db[myCollName].update({_id: docIds[i]}, {$set: {[fields[i]]: [1, 2]}})); + } + } catch (e) { + if (tolerateCollDropError(this, e)) return; + throw e; + } + + const fullNs = db.getName() + "." + myCollName; + verifyDirectMultikey( + this, + db, + myCollName, + {"ns": fullNs, "op": "u", "o2._id": docIds[0]}, + {"ns": fullNs, "op": "u", "o2._id": docIds[fields.length - 1]}, + IndexType.REGULAR, + null, + fields, + ); + }, + + dropAndRecreateColl: function dropAndRecreateColl(db, collName) { + // SERVER-126285 only affects fixtures without primary-driven index builds. + if (!this.usesPrimaryDrivenIndexBuilds) return; + + const myCollName = pickPoolColl(); + db[myCollName].drop(); + + // Tolerate IndexBuildAborted: another thread may drop the coll again while these + // index builds are running. + assert.commandWorkedOrFailedWithCode(db[myCollName].createIndex({"$**": 1}), ErrorCodes.IndexBuildAborted); + for (const f of REGULAR_INDEX_FIELDS) { + assert.commandWorkedOrFailedWithCode( + db[myCollName].createIndex({[f]: 1}), + ErrorCodes.IndexBuildAborted, + ); + } + }, + + dropAndRecreateIndex: function dropAndRecreateIndex(db, collName) { + // SERVER-126285 only affects fixtures without primary-driven index builds. + if (!this.usesPrimaryDrivenIndexBuilds) return; + + const myCollName = pickPoolColl(); + const targets = [{"$**": 1}].concat(REGULAR_INDEX_FIELDS.map((f) => ({[f]: 1}))); + const target = targets[Random.randInt(targets.length)]; + + // Tolerate races with other threads dropping or recreating the same index, and + // with dropAndRecreateColl removing the entire collection: + // IndexNotFound: another thread already dropped this index. + // NamespaceNotFound: dropAndRecreateColl dropped the collection. + // IndexBuildAborted: a concurrent dropIndexes/dropCollection aborted our build. + assert.commandWorkedOrFailedWithCode(db[myCollName].dropIndex(target), [ + ErrorCodes.IndexNotFound, + ErrorCodes.NamespaceNotFound, + ]); + assert.commandWorkedOrFailedWithCode(db[myCollName].createIndex(target), ErrorCodes.IndexBuildAborted); + }, + + // Batch pressure. + insertNoise: function insertNoise(db, collName) { + assert.commandWorked(db["padding_noise"].insert({noise: Random.randInt(100000), tid: this.tid})); + }, + }; + + function setup(db, collName, cluster) { + if (!cluster.isReplication()) return; + + for (let i = 0; i < POOL_SIZE; i++) { + const coll = poolCollName(i); + assert.commandWorked(db[coll].createIndex({"$**": 1})); + for (const f of REGULAR_INDEX_FIELDS) { + assert.commandWorked(db[coll].createIndex({[f]: 1})); + } + } + this.secondaryHost = cluster.getSecondaryHost(db.getName()); + + // TODO (SERVER-126285): alwasy allow the dropAndRecreate* states. Currently only enabled + // for primary-driven index builds, which avoid the IndexBuildInterceptor multikey-path leak + // (SERVER-126285) that makes multikeyness inconsistent between primary and secondaries. + this.usesPrimaryDrivenIndexBuilds = PersistenceProviderUtil.allNodesHavePropertyWithValue( + db, + "mustUsePrimaryDrivenIndexBuilds", + true, + ); + + cluster.awaitReplication(); + } + + function teardown(db, collName, cluster) { + if (cluster.isReplication()) { + cluster.awaitReplication(); + } + } + + const allTransitions = { + insertNoise: 0.3, + createCollAndInsertWildcardMkInTxn: 0.05, + createCollAndInsertRegularMkInTxn: 0.05, + createCollAndUpdateWildcardMkInTxn: 0.05, + createCollAndUpdateRegularMkInTxn: 0.05, + insertWildcardMkInTxn: 0.05, + insertRegularMkInTxn: 0.05, + updateWildcardMkInTxn: 0.05, + updateRegularMkInTxn: 0.05, + insertWildcardMkDirect: 0.05, + insertRegularMkDirect: 0.05, + updateWildcardMkDirect: 0.05, + updateRegularMkDirect: 0.05, + // Gated at runtime in the state bodies on `mustUsePrimaryDrivenIndexBuilds`: no-op on + // fixtures that don't use primary-driven index builds, see SERVER-126285. + dropAndRecreateIndex: 0.05, + dropAndRecreateColl: 0.05, + }; + + let transitions = {init: allTransitions}; + for (const state of Object.keys(allTransitions)) { + transitions[state] = allTransitions; + } + + return { + threadCount: 10, + iterations: 100, + startState: "init", + states: states, + transitions: transitions, + data: {retryOnKilledSession: false, prepareProbability: 0.5}, + setup: setup, + teardown: teardown, + }; +})(); diff --git a/jstests/core/txns/read_own_multikey_writes.js b/jstests/core/txns/read_own_multikey_writes.js index a46be82077f..2b947686586 100644 --- a/jstests/core/txns/read_own_multikey_writes.js +++ b/jstests/core/txns/read_own_multikey_writes.js @@ -1,31 +1,205 @@ // Tests that multikey updates made inside a transaction are visible to that transaction's reads. -// @tags: [assumes_unsharded_collection, uses_transactions] +// @tags: [ +// assumes_unsharded_collection, +// uses_transactions, +// ] const dbName = "test"; const collName = "testReadOwnMultikeyWrites"; -// Use majority write concern to clear the drop-pending that can cause lock conflicts with -// transactions. -db.getSiblingDB(dbName) - .getCollection(collName) - .drop({writeConcern: {w: "majority"}}); const session = db.getMongo().startSession({causalConsistency: false}); const sessionDb = session.getDatabase(dbName); -const sessionColl = sessionDb.getCollection(collName); -assert.commandWorked(sessionDb.runCommand({create: collName})); +(function testSimpleCase() { + // Use majority write concern to clear the drop-pending that can cause lock conflicts with + // transactions. + db.getSiblingDB(dbName) + .getCollection(collName) + .drop({writeConcern: {w: "majority"}}); -assert.commandWorked(sessionColl.insert({a: 1})); -assert.commandWorked( - sessionDb.runCommand({ - createIndexes: collName, - indexes: [{name: "a_1", key: {a: 1}}], - writeConcern: {w: "majority"}, - }), + const sessionColl = sessionDb.getCollection(collName); + + assert.commandWorked(sessionDb.runCommand({create: collName})); + + assert.commandWorked(sessionColl.insert({a: 1})); + assert.commandWorked( + sessionDb.runCommand({ + createIndexes: collName, + indexes: [{name: "a_1", key: {a: 1}}], + writeConcern: {w: "majority"}, + }), + ); + + session.startTransaction(); + assert.commandWorked(sessionColl.update({}, {$set: {a: [1, 2, 3]}})); + assert.eq(1, sessionColl.find({}, {_id: 0, a: 1}).sort({a: 1}).itcount()); + assert.commandWorked(session.commitTransaction_forTesting()); + + assert.eq(1, db.getSiblingDB(dbName).getCollection(collName).find({}, {_id: 0, a: 1}).sort({a: 1}).itcount()); +})(); + +// === Wildcard index multikey RYOW cases === +// +// These cases verify that wildcard index multikey state written inside a multi-document +// transaction is visible to subsequent reads in the same transaction. +// +// The query pattern uses contradictory range bounds: {field: {$gt: high, $lt: low}}. +// - Multikey-aware planner: generates a union of ($gt: high) and ($lt: low) bounds and +// post-filters; matches a document whose array contains elements satisfying each +// predicate independently. +// - Non-multikey planner: intersects the bounds into an empty range; returns nothing. +function runWildcardCase(collName, setupFn, queryHint, query, expectedCount, message) { + db.getSiblingDB(dbName) + .getCollection(collName) + .drop({writeConcern: {w: "majority"}}); + assert.commandWorked(sessionDb.runCommand({create: collName})); + assert.commandWorked( + sessionDb.runCommand({ + createIndexes: collName, + indexes: [{name: "wildcard", key: {"$**": 1}}], + writeConcern: {w: "majority"}, + }), + ); + + const wcColl = sessionDb.getCollection(collName); + session.startTransaction(); + setupFn(wcColl); + const results = wcColl.find(query).hint(queryHint).toArray(); + assert.eq(expectedCount, results.length, message + " got: " + tojson(results)); + assert.commandWorked(session.commitTransaction_forTesting()); +} + +// Sanity: a non-multikey scalar field must NOT match contradictory bounds. +runWildcardCase( + "testReadOwnWildcardMultikeyWrites_sanity", + (coll) => assert.commandWorked(coll.insert({a: 1})), + {"$**": 1}, + {a: {$gt: 8, $lt: 3}}, + 0, + "wildcard sanity (non-multikey scalar):", ); -session.startTransaction(); -assert.commandWorked(sessionColl.update({}, {$set: {a: [1, 2, 3]}})); -assert.eq(1, sessionColl.find({}, {_id: 0, a: 1}).sort({a: 1}).itcount()); -assert.commandWorked(session.commitTransaction_forTesting()); +// Single multikey field. +runWildcardCase( + "testReadOwnWildcardMultikeyWrites_single", + (coll) => assert.commandWorked(coll.insert({a: [1, 10]})), + {"$**": 1}, + {a: {$gt: 8, $lt: 3}}, + 1, + "wildcard RYOW single field:", +); -assert.eq(1, db.getSiblingDB(dbName).getCollection(collName).find({}, {_id: 0, a: 1}).sort({a: 1}).itcount()); +// Multiple multikey fields accumulate within a transaction. +(function testWildcardMultipleFields() { + const collName = "testReadOwnWildcardMultikeyWrites_multi"; + db.getSiblingDB(dbName) + .getCollection(collName) + .drop({writeConcern: {w: "majority"}}); + assert.commandWorked(sessionDb.runCommand({create: collName})); + assert.commandWorked( + sessionDb.runCommand({ + createIndexes: collName, + indexes: [{name: "wildcard", key: {"$**": 1}}], + writeConcern: {w: "majority"}, + }), + ); + + const wcColl = sessionDb.getCollection(collName); + session.startTransaction(); + assert.commandWorked(wcColl.insert({a: [1, 10]})); + assert.commandWorked(wcColl.insert({b: [2, 20]})); + + const resultsA = wcColl + .find({a: {$gt: 8, $lt: 3}}) + .hint({"$**": 1}) + .toArray(); + assert.eq(1, resultsA.length, "wildcard RYOW field 'a': " + tojson(resultsA)); + + const resultsB = wcColl + .find({b: {$gt: 15, $lt: 5}}) + .hint({"$**": 1}) + .toArray(); + assert.eq(1, resultsB.length, "wildcard RYOW field 'b': " + tojson(resultsB)); + + assert.commandWorked(session.commitTransaction_forTesting()); +})(); + +// Write, read, write, read — multikey state from later writes must be visible to subsequent +// reads in the same transaction. +(function testWildcardWriteReadWriteRead() { + const collName = "testReadOwnWildcardMultikeyWrites_wrwr"; + db.getSiblingDB(dbName) + .getCollection(collName) + .drop({writeConcern: {w: "majority"}}); + assert.commandWorked(sessionDb.runCommand({create: collName})); + assert.commandWorked( + sessionDb.runCommand({ + createIndexes: collName, + indexes: [{name: "wildcard", key: {"$**": 1}}], + writeConcern: {w: "majority"}, + }), + ); + + const wcColl = sessionDb.getCollection(collName); + session.startTransaction(); + + assert.commandWorked(wcColl.insert({_id: 1, a: [1, 10]})); + const r1 = wcColl + .find({a: {$gt: 8, $lt: 3}}) + .hint({"$**": 1}) + .toArray(); + assert.eq(1, r1.length, "wildcard RYOW W/R/W/R read 1: " + tojson(r1)); + + assert.commandWorked(wcColl.insert({_id: 2, b: [2, 20]})); + const r2 = wcColl + .find({b: {$gt: 15, $lt: 5}}) + .hint({"$**": 1}) + .toArray(); + assert.eq(1, r2.length, "wildcard RYOW W/R/W/R read 2: " + tojson(r2)); + + assert.commandWorked(session.commitTransaction_forTesting()); +})(); + +// $or across two distinct wildcard indexes within one parent transaction. +(function testWildcardOrAcrossTwoIndexes() { + const collName = "testReadOwnWildcardMultikeyWrites_or"; + db.getSiblingDB(dbName) + .getCollection(collName) + .drop({writeConcern: {w: "majority"}}); + assert.commandWorked(sessionDb.runCommand({create: collName})); + // Two wildcard indexes with disjoint projections so each branch of the $or has exactly + // one usable wildcard index. + assert.commandWorked( + sessionDb.runCommand({ + createIndexes: collName, + indexes: [ + {name: "wc_a", key: {"$**": 1}, wildcardProjection: {a: 1}}, + {name: "wc_c", key: {"$**": 1}, wildcardProjection: {c: 1}}, + ], + writeConcern: {w: "majority"}, + }), + ); + + const wcColl = sessionDb.getCollection(collName); + session.startTransaction(); + assert.commandWorked(wcColl.insert({_id: 1, a: [1, 10]})); + assert.commandWorked(wcColl.insert({_id: 2, c: [2, 20]})); + + // Validate each wildcard index individually with hint by name to force IXSCAN. + const resultsA = wcColl + .find({a: {$gt: 8, $lt: 3}}) + .hint("wc_a") + .toArray(); + assert.eq(1, resultsA.length, "wildcard $or per-index wc_a: " + tojson(resultsA)); + + const resultsC = wcColl + .find({c: {$gt: 15, $lt: 5}}) + .hint("wc_c") + .toArray(); + assert.eq(1, resultsC.length, "wildcard $or per-index wc_c: " + tojson(resultsC)); + + // $or query touching both wildcard indexes simultaneously. + const results = wcColl.find({$or: [{a: {$gt: 8, $lt: 3}}, {c: {$gt: 15, $lt: 5}}]}).toArray(); + assert.eq(2, results.length, "wildcard $or across 2 wildcard indexes: " + tojson(results)); + + assert.commandWorked(session.commitTransaction_forTesting()); +})(); diff --git a/jstests/noPassthrough/local_catalog/txn_multikey_timestamping.js b/jstests/noPassthrough/local_catalog/txn_multikey_timestamping.js index a58e8b105d2..2aadfa89ee7 100644 --- a/jstests/noPassthrough/local_catalog/txn_multikey_timestamping.js +++ b/jstests/noPassthrough/local_catalog/txn_multikey_timestamping.js @@ -2,6 +2,9 @@ * Tests that the multikey metadata set by a CRUD operation in a multi-document transaction is * timestamp-consistent among replicas, for both a committed transaction and an aborted transaction. * + * Covers both regular indexes (multikey paths tracked in the catalog) and wildcard indexes + * (multikey paths tracked as metadata key entries in the index itself). + * * Note that the timestamp consistency cannot be guaranteed after a logical initial sync. * * TODO (WT-15476): consider removing this test once timestamp consistency is turned on. @@ -16,27 +19,109 @@ import {ReplSetTest} from "jstests/libs/replsettest.js"; -function getMultikeyMetadata(node, dbName, collName, indexId, multiPathsKey, ts) { - return node - .getDB("admin") - .aggregate([{$listCatalog: {}}, {$match: {db: dbName, name: collName}}], { +const IndexType = Object.freeze({ + REGULAR: "regular", + WILDCARD: "wildcard", +}); + +/** + * Returns multikey metadata for the given index at the given timestamp, or null if the + * collection/index does not yet exist at that snapshot. + * + * For regular indexes, reads the multikeyPaths bitset from the catalog ($listCatalog). + * For wildcard indexes, reads the multiKeyPaths from the query planner (explain with atClusterTime), + * which reflects the actual metadata key entries in the index. + */ +function getMultikeyMetadata(node, indexType, dbName, collName, indexId, multiPathsKey, wildcardHint, ts) { + if (indexType == IndexType.REGULAR) { + const catalog = node + .getDB("admin") + .aggregate([{$listCatalog: {}}, {$match: {db: dbName, name: collName}}], { + readConcern: {level: "snapshot", atClusterTime: ts}, + }) + .toArray(); + // Collection does not exist at this snapshot (same-txn create-then-insert case walks + // timestamps earlier than the collection's creation). + if (catalog.length === 0) { + return null; + } + const idx = catalog[0]["md"]["indexes"][indexId]; + if (!idx) { + return null; + } + return idx["multikeyPaths"][multiPathsKey]; + } else { + const explain = node.getDB(dbName).runCommand({ + explain: {find: collName, filter: {[multiPathsKey]: 1}, hint: wildcardHint}, readConcern: {level: "snapshot", atClusterTime: ts}, - }) - .toArray()[0]["md"]["indexes"][indexId]["multikeyPaths"][multiPathsKey]; + verbosity: "queryPlanner", + }); + // If the collection does not yet exist at this snapshot, explain returns an error. + if (!explain.ok) { + return null; + } + // Walks the explain tree to find the wildcard IXSCAN. Handles both engines: + // - Classic: winningPlan.{stage:..., inputStage:{...}} + // - SBE: winningPlan.queryPlan.{stage:..., inputStage:{...}} + // SBE wraps the QuerySolutionNode tree under a 'queryPlan' field; recurse into it. + function findWildcardIxscan(plan) { + if (!plan) { + return null; + } + if (plan.stage === "IXSCAN" && plan.keyPattern && plan.keyPattern["$_path"]) { + return plan; + } + if (plan.queryPlan) { + const r = findWildcardIxscan(plan.queryPlan); + if (r) { + return r; + } + } + if (plan.inputStage) { + return findWildcardIxscan(plan.inputStage); + } + if (plan.inputStages) { + for (const s of plan.inputStages) { + const result = findWildcardIxscan(s); + if (result) { + return result; + } + } + } + return null; + } + const ixscan = findWildcardIxscan(explain.queryPlanner.winningPlan); + return ixscan?.multiKeyPaths?.[multiPathsKey] ?? []; + } } -// Returns the highest timestamp present in the oplog that is less than the given timestamp. +/** + * Returns true if the multikey metadata indicates "not multikey" for the given index type, or the + * collection/index does not exist yet at this snapshot. + */ +function absentMultikeyMetadata(multikeyMetadata, indexType) { + if (multikeyMetadata === null) { + // Collection/index not yet created at this snapshot. + return true; + } + if (indexType == IndexType.REGULAR) { + return bsonBinaryEqual(multikeyMetadata, BinData(0, "AA==")); // byte [0] + } else { + return multikeyMetadata.length === 0; + } +} + +/** + * Returns the highest timestamp present in the oplog that is less than the given timestamp. + */ function decrementTimestamp(node, ts) { - // Query the oplog to find the highest timestamp entry that is lower than the given timestamp. const oplogEntry = node .getDB("local") .oplog.rs.find({ts: {$lt: ts}}) .sort({ts: -1}) .limit(1) .toArray()[0]; - assert(oplogEntry, "No earlier oplog entry found for timestamp: " + tojson(ts)); - return oplogEntry.ts; } @@ -54,30 +139,114 @@ const TxnCommitOrAbort = Object.freeze({ ABORT: "abort", }); -function verifyTimestampConsistency(commitOrAbort, collName) { - jsTestLog( - "Verifying multikey timestamp consistency with transaction that will " + - commitOrAbort + - " against collection: " + +/** + * Walks the oplog backwards from the latest timestamp. At each timestamp, fetches multikey + * metadata on both primary and secondary and asserts they are identical. Stops when it finds a + * timestamp at which multikey metadata is absent (i.e. before it was first recorded). Asserts that + * at least `expectedMinTimestamps` timestamps had multikey metadata. + */ +function walkAndAssertReplicaMultikeyConsistency({ + indexType, + collName, + indexId, + multiPathsKey, + wildcardHint, + expectedMinTimestamps, + context, +}) { + // Ensure primaries have caught up before walking. + rst.awaitReplication(); + + const latestTs = primary.getDB("local").oplog.rs.find().sort({$natural: -1}).limit(1).toArray()[0].ts; + jsTestLog("Latest oplog timestamp for " + context + ": " + tojson(latestTs)); + + let currentTs = latestTs; + let timestampsWithMultikeyness = 0; + + while (true) { + jsTestLog( + "Checking replica set multikey consistency at timestamp: " + tojson(currentTs) + " (" + context + ")", + ); + const primaryMultikeyMetadata = getMultikeyMetadata( + primary, + indexType, + dbName, collName, + indexId, + multiPathsKey, + wildcardHint, + currentTs, + ); + const secondaryMultikeyMetadata = getMultikeyMetadata( + secondary, + indexType, + dbName, + collName, + indexId, + multiPathsKey, + wildcardHint, + currentTs, + ); + + assert.eq( + primaryMultikeyMetadata, + secondaryMultikeyMetadata, + "Primary and secondary multikey metadata differ at timestamp " + + tojson(currentTs) + + " (" + + context + + "), iteration: " + + timestampsWithMultikeyness, + ); + + if (absentMultikeyMetadata(primaryMultikeyMetadata, indexType)) { + assert.gte( + timestampsWithMultikeyness, + expectedMinTimestamps, + "Expected at least " + expectedMinTimestamps + " timestamp(s) with multikey metadata for " + context, + ); + break; + } + + timestampsWithMultikeyness++; + currentTs = decrementTimestamp(primary, currentTs); + } +} + +/** + * Verifies timestamp-consistency of multikey metadata between primary and secondary after a + * multi-document transaction that inserts into a pre-existing indexed collection. + * + * Expected minimum timestamps: + * - Txn commit: 2 (setMultikeyMetadata entry + commit entry) + * - Txn abort: 1 (setMultikeyMetadata entry committed by the side txn survives abort) + */ +function verifyTimestampConsistency(commitOrAbort, indexType, collName) { + jsTestLog( + "Verifying multikey timestamp consistency for " + + indexType + + " index against collection '" + + collName + + "' with transaction that will " + + commitOrAbort, ); const testColl = testDB[collName]; assert.commandWorked(testDB.runCommand({drop: collName})); - // Create collection and index. - assert.commandWorked(testColl.createIndex({a: 1})); + if (indexType == IndexType.REGULAR) { + assert.commandWorked(testColl.createIndex({a: 1})); + } else { + assert.commandWorked(testColl.createIndex({"$**": 1})); + } - // Start a multi-document transaction. const session = testDB.getMongo().startSession(); const sessionDB = session.getDatabase(dbName); const sessionColl = sessionDB[collName]; session.startTransaction(); - // Insert a document with an array value for field 'a', making the index multikey. assert.commandWorked(sessionColl.insert({a: [1, 2]})); - // Commit the transaction. if (commitOrAbort == TxnCommitOrAbort.COMMIT) { assert.commandWorked(session.commitTransaction_forTesting()); jsTestLog("Transaction committed as expected"); @@ -87,44 +256,79 @@ function verifyTimestampConsistency(commitOrAbort, collName) { } session.endSession(); - const latestTs = primary.getDB("local").oplog.rs.find().sort({$natural: -1}).limit(1).toArray()[0].ts; - jsTestLog("Latest oplog timestamp after transaction " + commitOrAbort + ": " + tojson(latestTs)); + // Commit: both the setMultikeyMetadata side-txn oplog entry and the parent commit's insert + // emit multikey state updates — 2 distinct timestamps show multikey metadata. + // Abort: only the setMultikeyMetadata side-txn oplog entry survives the aborted parent — + // 1 timestamp shows multikey metadata. + const expectedMinTimestamps = commitOrAbort == TxnCommitOrAbort.COMMIT ? 2 : 1; - let currentTs = latestTs; - let timestampsWithMultikeyness = 0; - const absentMultikeyMetadata = BinData(0, "AA=="); // Base64 encoding of byte [0] - - while (true) { - jsTestLog("Checking replica set consistency of multikeyness at timestamp: " + tojson(currentTs)); - const primaryMultikeyMetadata = getMultikeyMetadata(primary, dbName, collName, 1, "a", currentTs); - const secondaryMultikeyMetadata = getMultikeyMetadata(secondary, dbName, collName, 1, "a", currentTs); - - // Verify primary and secondary have identical multikeyness. - assert.eq( - primaryMultikeyMetadata, - secondaryMultikeyMetadata, - "Primary and secondary multikey paths differ at timestamp " + - tojson(currentTs) + - ", iteration: " + - timestampsWithMultikeyness, - ); - - if (bsonBinaryEqual(primaryMultikeyMetadata, absentMultikeyMetadata)) { - // If the transaction committed, we expect at least two timestamps with multikeyness: - // the setMultikeyMetadata timestamp, and the transaction's commit timestamp. - // If the transaction aborted, we expect only one timestamp with multikeyness: - // the setMultikeyMetadata timestamp. - const expectedMinimumMultikeyTimestamps = commitOrAbort == TxnCommitOrAbort.COMMIT ? 2 : 1; - assert.gte(timestampsWithMultikeyness, expectedMinimumMultikeyTimestamps); - break; - } - - timestampsWithMultikeyness++; - currentTs = decrementTimestamp(primary, currentTs); - } + walkAndAssertReplicaMultikeyConsistency({ + indexType, + collName, + indexId: 1, // index 0 is _id, index 1 is the user index. + multiPathsKey: "a", + wildcardHint: {"$**": 1}, + expectedMinTimestamps, + context: indexType + "/" + commitOrAbort + "/" + collName, + }); } -verifyTimestampConsistency(TxnCommitOrAbort.COMMIT, "coll_commit"); -verifyTimestampConsistency(TxnCommitOrAbort.ABORT, "coll_abort"); +verifyTimestampConsistency(TxnCommitOrAbort.COMMIT, IndexType.REGULAR, "coll_regular_commit"); +verifyTimestampConsistency(TxnCommitOrAbort.ABORT, IndexType.REGULAR, "coll_regular_abort"); +verifyTimestampConsistency(TxnCommitOrAbort.COMMIT, IndexType.WILDCARD, "coll_wildcard_commit"); +verifyTimestampConsistency(TxnCommitOrAbort.ABORT, IndexType.WILDCARD, "coll_wildcard_abort"); + +/** + * Same as `verifyTimestampConsistency`, but creates the collection + index inside the same + * transaction as the multikey-triggering insert. Exercises the path where the side transaction + * cannot see the index (it was created in the parent transaction), so metadata keys are written in + * the parent transaction instead. + * + * Reuses the shared walker to assert primary/secondary multikey metadata consistency at every + * oplog timestamp. For this path, multikey metadata becomes visible starting at the commit + * timestamp, so the minimum expected count is 1. + */ +function verifyIndexCreatedInSameTransaction(indexType, collName) { + jsTestLog( + "Verifying multikey timestamp consistency for " + + indexType + + " index created in same transaction against collection '" + + collName + + "'", + ); + + assert.commandWorked(testDB.runCommand({drop: collName})); + + const session = testDB.getMongo().startSession(); + const sessionDB = session.getDatabase(dbName); + session.startTransaction(); + + // Create collection and index inside the transaction. + assert.commandWorked(sessionDB.createCollection(collName)); + if (indexType == IndexType.REGULAR) { + assert.commandWorked(sessionDB[collName].createIndex({a: 1})); + } else { + assert.commandWorked(sessionDB[collName].createIndex({"$**": 1})); + } + + // Insert a document that makes the index multikey. + assert.commandWorked(sessionDB[collName].insert({a: [1, 2]})); + + assert.commandWorked(session.commitTransaction_forTesting()); + session.endSession(); + + walkAndAssertReplicaMultikeyConsistency({ + indexType, + collName, + indexId: 1, // index 0 is _id, index 1 is the user index. + multiPathsKey: "a", + wildcardHint: {"$**": 1}, + expectedMinTimestamps: 1, + context: indexType + "/same-txn-create/" + collName, + }); +} + +verifyIndexCreatedInSameTransaction(IndexType.REGULAR, "coll_regular_same_txn"); +verifyIndexCreatedInSameTransaction(IndexType.WILDCARD, "coll_wildcard_same_txn"); rst.stopSet(); diff --git a/src/mongo/db/index/wildcard_key_generator.cpp b/src/mongo/db/index/wildcard_key_generator.cpp index c72d791cf39..f3a0e066d89 100644 --- a/src/mongo/db/index/wildcard_key_generator.cpp +++ b/src/mongo/db/index/wildcard_key_generator.cpp @@ -96,14 +96,6 @@ void appendToKeyString(const std::vector& elems, } } -// Append 'MinKey' to 'keyString'. Multikey path keys use 'MinKey' for non-wildcard fields. -void appendToMultiKeyString(const std::vector& elems, - key_string::PooledBuilder* keyString) { - for (size_t i = 0; i < elems.size(); i++) { - keyString->appendBSONElement(kMinBSONKey.firstElement()); - } -} - /** * A helper class for generating all the various types of keys for a wildcard index. * @@ -232,22 +224,14 @@ void SingleDocumentKeyEncoder::_addMultiKey(const FieldRef& fullPath) { // 'multikeyPaths' may be nullptr if the access method is being used in an operation which does // not require multikey path generation. if (_multikeyPaths) { - key_string::PooledBuilder keyString(_pooledBufferBuilder, _keyStringVersion, _ordering); - - if (!_preElems.empty()) { - appendToMultiKeyString(_preElems, &keyString); - } - for (auto elem : BSON("" << 1 << "" << fullPath.dottedField())) { - keyString.appendBSONElement(elem); - } - if (!_postElems.empty()) { - appendToMultiKeyString(_postElems, &keyString); - } - - keyString.appendRecordId(record_id_helpers::reservedIdFor( - record_id_helpers::ReservationId::kWildcardMultikeyMetadataId, *_rsKeyFormat)); - - _multikeyPaths->push_back(keyString.release()); + _multikeyPaths->push_back( + WildcardKeyGenerator::makeMultikeyMetadataKey(fullPath.dottedField(), + _preElems.size(), + _postElems.size(), + _keyStringVersion, + _ordering, + *_rsKeyFormat, + _pooledBufferBuilder)); } } @@ -433,4 +417,29 @@ void WildcardKeyGenerator::generateKeys(SharedBufferFragmentBuilder& pooledBuffe multikeyPaths->adopt_sequence(std::move(multikeyPathsSequence)); keys->adopt_sequence(std::move(keysSequence)); } + +key_string::Value WildcardKeyGenerator::makeMultikeyMetadataKey( + StringData fieldPath, + size_t prefixFieldCount, + size_t suffixFieldCount, + key_string::Version version, + Ordering ordering, + KeyFormat rsKeyFormat, + SharedBufferFragmentBuilder& pooledBuilder) { + key_string::PooledBuilder keyString(pooledBuilder, version, ordering); + + for (size_t i = 0; i < prefixFieldCount; ++i) { + keyString.appendBSONElement(kMinBSONKey.firstElement()); + } + for (auto elem : BSON("" << 1 << "" << fieldPath)) { + keyString.appendBSONElement(elem); + } + for (size_t i = 0; i < suffixFieldCount; ++i) { + keyString.appendBSONElement(kMinBSONKey.firstElement()); + } + keyString.appendRecordId(record_id_helpers::reservedIdFor( + record_id_helpers::ReservationId::kWildcardMultikeyMetadataId, rsKeyFormat)); + + return keyString.release(); +} } // namespace mongo diff --git a/src/mongo/db/index/wildcard_key_generator.h b/src/mongo/db/index/wildcard_key_generator.h index 26b81015ca4..cd582962aea 100644 --- a/src/mongo/db/index/wildcard_key_generator.h +++ b/src/mongo/db/index/wildcard_key_generator.h @@ -99,6 +99,19 @@ public: KeyStringSet* multikeyPaths, const boost::optional& id = boost::none) const; + /** + * Builds a single wildcard multikey metadata KeyString for 'fieldPath'. Used both during + * document-driven key generation on the primary and when regenerating keys from a + * `setMultikeyMetadata` oplog entry on the secondary. + */ + static key_string::Value makeMultikeyMetadataKey(StringData fieldPath, + size_t prefixFieldCount, + size_t suffixFieldCount, + key_string::Version version, + Ordering ordering, + KeyFormat rsKeyFormat, + SharedBufferFragmentBuilder& pooledBuilder); + private: WildcardProjection _proj; const CollatorInterface* _collator; diff --git a/src/mongo/db/index_names.h b/src/mongo/db/index_names.h index c068b1be329..ae7d03f52da 100644 --- a/src/mongo/db/index_names.h +++ b/src/mongo/db/index_names.h @@ -105,7 +105,7 @@ public: /** * Contain utilities to work with wildcard fields used for Wildcard indexes. */ -struct WildcardNames { +struct MONGO_MOD_PUBLIC WildcardNames { static constexpr StringData WILDCARD_FIELD_NAME = "$**"_sd; static constexpr StringData WILDCARD_FIELD_NAME_SUFFIX = ".$**"_sd; diff --git a/src/mongo/db/operation_context.cpp b/src/mongo/db/operation_context.cpp index c4b8164c443..83543653923 100644 --- a/src/mongo/db/operation_context.cpp +++ b/src/mongo/db/operation_context.cpp @@ -92,6 +92,13 @@ OperationContext::~OperationContext() { releaseOperationKey(); } +RecoveryUnit& OperationContext::getOrCreateWildcardMultikeyReadRecoveryUnit() { + if (!_wildcardMultikeyReadRu) { + _wildcardMultikeyReadRu = getServiceContext()->getStorageEngine()->newRecoveryUnit(); + } + return *_wildcardMultikeyReadRu; +} + void OperationContext::setDeadlineAndMaxTime(Date_t when, Microseconds maxTime, ErrorCodes::Error timeoutError) { diff --git a/src/mongo/db/operation_context.h b/src/mongo/db/operation_context.h index 968a1675282..2e5f6d23c5d 100644 --- a/src/mongo/db/operation_context.h +++ b/src/mongo/db/operation_context.h @@ -356,6 +356,33 @@ public: } } + /** + * Returns true if a side transaction has committed wildcard multikey metadata keys + * during this transaction. Set by the side transaction commit path and propagated + * to each new opCtx via unstashTransactionResources(). + */ + bool hasSideCommittedWildcardKeys() const { + return _hasSideCommittedWildcardKeys; + } + + void setHasSideCommittedWildcardKeys() { + _hasSideCommittedWildcardKeys = true; + // Defensive invalidation. As each statement in a multi-document transaction runs in its + // own operation context, but we don't want to rely on that assumption holding forever. + _wildcardMultikeyReadRu.reset(); + } + + /** + * Returns the side recovery unit used to read wildcard multikey metadata keys committed + * by side transactions during this multi-doc transaction. Lazily creates a fresh RU on + * first call so the snapshot reflects state at "now" (after side commits). Subsequent + * calls within the same planning operation reuse the same RU, providing a consistent + * snapshot across all wildcard indexes referenced by the query (e.g. $or over multiple + * wildcard indexes). Invalidated by setHasSideCommittedWildcardKeys() so newly side- + * committed keys are visible on the next read (RYOW). + */ + RecoveryUnit& getOrCreateWildcardMultikeyReadRecoveryUnit(); + bool isRetryableWrite() const { return _txnNumber && (!_inMultiDocumentTransaction || @@ -645,6 +672,8 @@ public: _txnNumber = boost::none; _txnRetryCounter = boost::none; _killOpsExempt = false; + _hasSideCommittedWildcardKeys = false; + _wildcardMultikeyReadRu.reset(); } /** @@ -1140,6 +1169,14 @@ private: bool _inMultiDocumentTransaction = false; bool _isStartingMultiDocumentTransaction = false; bool _isActiveTransactionParticipant = false; + // TODO (SERVER-77213): Move multi-statement transaction state out of operation context. + bool _hasSideCommittedWildcardKeys = false; + // TODO (SERVER-77213): Move multi-statement transaction state out of operation context. + // Cached side recovery unit used by getOrCreateWildcardMultikeyReadRecoveryUnit() to + // read side-committed wildcard multikey metadata keys during query planning. Lazily + // created and invalidated when a new side commit occurs, so reads always see the + // latest side-committed state across all wildcard indexes referenced by a query. + std::unique_ptr _wildcardMultikeyReadRu; bool _isCommandForwardedFromRouter = false; // Commands from user applications must run validations and enforce constraints. Operations from // a trusted source, such as initial sync or consuming an oplog entry generated by a primary diff --git a/src/mongo/db/query/wildcard_multikey_paths.cpp b/src/mongo/db/query/wildcard_multikey_paths.cpp index c222bec46ea..5c71efb7c3d 100644 --- a/src/mongo/db/query/wildcard_multikey_paths.cpp +++ b/src/mongo/db/query/wildcard_multikey_paths.cpp @@ -34,6 +34,7 @@ #include // IWYU pragma: no_include "boost/intrusive/detail/iterator.hpp" +#include "mongo/base/checked_cast.h" #include "mongo/base/string_data.h" #include "mongo/bson/bsonelement.h" #include "mongo/bson/bsonmisc.h" @@ -53,12 +54,15 @@ #include "mongo/db/query/wildcard_multikey_paths.h" #include "mongo/db/record_id.h" #include "mongo/db/record_id_helpers.h" +#include "mongo/db/service_context.h" #include "mongo/db/shard_role/lock_manager/exception_util.h" #include "mongo/db/shard_role/shard_catalog/index_catalog_entry.h" #include "mongo/db/shard_role/shard_catalog/index_descriptor.h" #include "mongo/db/storage/index_entry_comparison.h" #include "mongo/db/storage/key_format.h" +#include "mongo/db/storage/recovery_unit.h" #include "mongo/db/storage/sorted_data_interface.h" +#include "mongo/db/storage/storage_engine.h" #include "mongo/util/assert_util.h" #include "mongo/util/str.h" @@ -161,30 +165,25 @@ static BSONObj buildIndexBoundsKeyPattern(const BSONObj& wiKeyPattern) { } /** - * Retrieves from the wildcard index the set of multikey path metadata keys bounded by - * 'indexBounds'. Returns the set of multikey paths represented by the keys. + * Scans wildcard multikey metadata keys from the index using the given recovery unit and + * index bounds. Adds discovered multikey paths to 'multikeyPaths' and updates 'stats'. */ -static std::set getWildcardMultikeyPathSetHelper(OperationContext* opCtx, - const IndexCatalogEntry* index, - const IndexBounds& indexBounds, - MultikeyMetadataAccessStats* stats) { - const WildcardAccessMethod* wam = - static_cast(index->accessMethod()); - - stats->numSeeks = 0; - stats->keysExamined = 0; - auto& ru = *shard_role_details::getRecoveryUnit(opCtx); +static void scanWildcardMetadataKeys(OperationContext* opCtx, + const WildcardAccessMethod* wam, + RecoveryUnit& ru, + const IndexBounds& indexBounds, + const BSONObj& keyPattern, + std::set* multikeyPaths, + MultikeyMetadataAccessStats* stats) { auto cursor = wam->newCursor(opCtx, ru); constexpr int kForward = 1; - const auto keyPattern = buildIndexBoundsKeyPattern(index->descriptor()->keyPattern()); IndexBoundsChecker checker(&indexBounds, keyPattern, kForward); IndexSeekPoint seekPoint; if (!checker.getStartSeekPoint(&seekPoint)) { - return {}; + return; } - std::set multikeyPaths{}; key_string::Builder builder(wam->getSortedDataInterface()->getKeyStringVersion(), wam->getSortedDataInterface()->getOrdering()); @@ -197,7 +196,7 @@ static std::set getWildcardMultikeyPathSetHelper(OperationContext* opC switch (checker.checkKey(entry->key, &seekPoint)) { case IndexBoundsChecker::VALID: - multikeyPaths.emplace(extractMultikeyPathFromIndexKey(*entry)); + multikeyPaths->emplace(extractMultikeyPathFromIndexKey(*entry)); entry = cursor->next(ru); break; @@ -219,6 +218,52 @@ static std::set getWildcardMultikeyPathSetHelper(OperationContext* opC MONGO_UNREACHABLE; } } +} + +/** + * Retrieves from the wildcard index the set of multikey path metadata keys bounded by + * 'indexBounds'. Returns the set of multikey paths represented by the keys. + */ +static std::set getWildcardMultikeyPathSetHelper(OperationContext* opCtx, + const IndexCatalogEntry* index, + const IndexBounds& indexBounds, + MultikeyMetadataAccessStats* stats) { + const WildcardAccessMethod* wam = + checked_cast(index->accessMethod()); + + stats->numSeeks = 0; + stats->keysExamined = 0; + + const auto keyPattern = buildIndexBoundsKeyPattern(index->descriptor()->keyPattern()); + auto& parentRu = *shard_role_details::getRecoveryUnit(opCtx); + + std::set multikeyPaths{}; + + // Always read from the parent transaction's RU first. This covers the fallback case + // where the side transaction was abandoned (because the index was created in the same + // transaction and is not visible to the side transaction's snapshot). In that case, + // metadata keys were written directly in the parent transaction and are only visible + // through the parent's RU — the fresh snapshot below cannot see uncommitted parent writes. + scanWildcardMetadataKeys(opCtx, wam, parentRu, indexBounds, keyPattern, &multikeyPaths, stats); + + // In active multi-document transactions, also read from a fresh snapshot to see + // side-committed metadata keys — but only if a side transaction actually committed + // wildcard metadata keys during this transaction. The parent transaction's WT snapshot + // predates the side transaction's commit, so it can't see those keys. The union of both + // scans covers both code paths: + // - Normal case (side txn succeeded): fresh RU sees the side-committed keys. + // - Fallback case (side txn abandoned): parent RU sees its own uncommitted writes. + // Safe because multikey is monotonic — the union is always correct. + if (opCtx->inMultiDocumentTransaction() && parentRu.isActive() && + opCtx->hasSideCommittedWildcardKeys()) { + // Cached on opCtx so a query touching multiple wildcard indexes (e.g. $or over two + // wildcard indexes) reads them through one consistent snapshot. The cache is reset + // by setHasSideCommittedWildcardKeys() so the next read after a new side commit + // opens a fresh snapshot that sees the just-committed keys (RYOW). + auto& freshRu = opCtx->getOrCreateWildcardMultikeyReadRecoveryUnit(); + scanWildcardMetadataKeys( + opCtx, wam, freshRu, indexBounds, keyPattern, &multikeyPaths, stats); + } return multikeyPaths; } diff --git a/src/mongo/db/repl/BUILD.bazel b/src/mongo/db/repl/BUILD.bazel index 60c20bcf60d..edd77176281 100644 --- a/src/mongo/db/repl/BUILD.bazel +++ b/src/mongo/db/repl/BUILD.bazel @@ -859,9 +859,11 @@ mongo_cc_library( "//src/mongo/db:dbhelpers", "//src/mongo/db:import_collection_oplog_entry", "//src/mongo/db:multitenancy", + "//src/mongo/db:record_id_helpers", "//src/mongo/db:server_base", "//src/mongo/db/collection_crud", "//src/mongo/db/commands:txn_cmd_request", + "//src/mongo/db/index:index_access_method", "//src/mongo/db/index_builds:index_build_oplog_entry", "//src/mongo/db/index_builds:index_builds_coordinator", "//src/mongo/db/index_builds/primary_driven:primary_driven_index_builds", @@ -883,6 +885,7 @@ mongo_cc_library( "//src/mongo/db/shard_role/ddl:index_commands_idl", "//src/mongo/db/shard_role/lock_manager:exception_util", "//src/mongo/db/shard_role/shard_catalog:catalog_helpers", + "//src/mongo/db/shard_role/shard_catalog:set_multikey_metadata_oplog_helpers", "//src/mongo/db/stats:counters", "//src/mongo/db/stats:server_read_concern_write_concern_metrics", "//src/mongo/db/storage:storage_engine_direct_crud", diff --git a/src/mongo/db/repl/oplog.cpp b/src/mongo/db/repl/oplog.cpp index bbe767ae86a..db5b827fe2e 100644 --- a/src/mongo/db/repl/oplog.cpp +++ b/src/mongo/db/repl/oplog.cpp @@ -49,11 +49,14 @@ #include "mongo/db/dbdirectclient.h" #include "mongo/db/dbhelpers.h" #include "mongo/db/import_collection_oplog_entry_gen.h" +#include "mongo/db/index/index_access_method.h" #include "mongo/db/index/index_constants.h" +#include "mongo/db/index/wildcard_access_method.h" #include "mongo/db/index_builds/index_build_oplog_entry.h" #include "mongo/db/index_builds/index_builds_coordinator.h" #include "mongo/db/index_builds/index_builds_manager.h" #include "mongo/db/index_builds/primary_driven/util.h" +#include "mongo/db/index_names.h" #include "mongo/db/namespace_string.h" #include "mongo/db/namespace_string_util.h" #include "mongo/db/op_observer/batched_write_context.h" @@ -68,6 +71,7 @@ #include "mongo/db/query/write_ops/update_request.h" #include "mongo/db/query/write_ops/update_result.h" #include "mongo/db/query/write_ops/write_ops_parsers.h" +#include "mongo/db/record_id_helpers.h" #include "mongo/db/repl/always_allow_non_local_writes.h" #include "mongo/db/repl/apply_ops.h" #include "mongo/db/repl/container_oplog_entry_gen.h" @@ -117,6 +121,7 @@ #include "mongo/db/shard_role/shard_catalog/index_catalog.h" #include "mongo/db/shard_role/shard_catalog/index_descriptor.h" #include "mongo/db/shard_role/shard_catalog/rename_collection.h" +#include "mongo/db/shard_role/shard_catalog/set_multikey_metadata_oplog_helpers.h" #include "mongo/db/shard_role/shard_catalog/uncommitted_catalog_updates.h" #include "mongo/db/shard_role/shard_role.h" #include "mongo/db/shard_role/transaction_resources.h" @@ -125,10 +130,12 @@ #include "mongo/db/stats/counters.h" #include "mongo/db/stats/server_write_concern_metrics.h" #include "mongo/db/storage/ident.h" +#include "mongo/db/storage/key_string/key_string.h" #include "mongo/db/storage/kv/kv_engine.h" #include "mongo/db/storage/oplog_truncate_marker_parameters_gen.h" #include "mongo/db/storage/record_data.h" #include "mongo/db/storage/recovery_unit.h" +#include "mongo/db/storage/sorted_data_interface.h" #include "mongo/db/storage/storage_engine.h" #include "mongo/db/storage/storage_engine_direct_crud.h" #include "mongo/db/storage/storage_options.h" @@ -1428,7 +1435,7 @@ const StringMap kOpsMap = { const auto setMkEntry = SetMultikeyMetadataOplogEntry::parse(cmd); const auto idxName = setMkEntry.getIdxName(); - const auto paths = uassertStatusOK(multikey_paths::parse(setMkEntry.getPaths())); + const auto pathsObj = setMkEntry.getPaths(); // Ignore setMultikeyMetadata when in initial sync, to avoid having to deal with // idempotency. This is acceptable because by design, the resynced node is not expected to @@ -1463,8 +1470,30 @@ const StringMap kOpsMap = { << "Failed to set multikey paths due to missing index: " << idxName, idxEntry); + // Determine if this is a wildcard index from the catalog, then either regenerate + // wildcard metadata KeyStrings or parse regular multikey paths. + const bool isWildcard = + idxEntry->descriptor()->getAccessMethodName() == IndexNames::WILDCARD; + + KeyStringSet metadataKeys; + MultikeyPaths multikeyPaths; + if (isWildcard) { + const auto* sdi = + idxEntry->accessMethod()->asSortedData()->getSortedDataInterface(); + metadataKeys = + set_multikey_metadata_oplog_helpers::regenerateMetadataKeysFromFieldPaths( + pathsObj, + sdi->getKeyStringVersion(), + sdi->getOrdering(), + sdi->rsKeyFormat(), + idxEntry->descriptor()->keyPattern()); + } else { + multikeyPaths = uassertStatusOK(multikey_paths::parse(pathsObj)); + } + WriteUnitOfWork wuow(opCtx); - idxEntry->setMultikeyForApplyOps(opCtx, coll.getCollectionPtr(), paths); + idxEntry->setMultikeyForApplyOps( + opCtx, coll.getCollectionPtr(), metadataKeys, multikeyPaths); wuow.commit(); }); return Status::OK(); diff --git a/src/mongo/db/repl/storage_timestamp_test.cpp b/src/mongo/db/repl/storage_timestamp_test.cpp index 9a797fba5b0..8cf877d3683 100644 --- a/src/mongo/db/repl/storage_timestamp_test.cpp +++ b/src/mongo/db/repl/storage_timestamp_test.cpp @@ -83,6 +83,7 @@ #include "mongo/db/repl/replication_process.h" #include "mongo/db/repl/replication_recovery.h" #include "mongo/db/repl/replication_recovery_mock.h" +#include "mongo/db/repl/set_multikey_metadata_oplog_entry_gen.h" #include "mongo/db/repl/storage_interface.h" #include "mongo/db/repl/storage_interface_impl.h" #include "mongo/db/repl/timestamp_block.h" @@ -111,6 +112,7 @@ #include "mongo/db/shard_role/shard_catalog/index_catalog.h" #include "mongo/db/shard_role/shard_catalog/index_catalog_entry.h" #include "mongo/db/shard_role/shard_catalog/index_descriptor.h" +#include "mongo/db/shard_role/shard_catalog/set_multikey_metadata_oplog_helpers.h" #include "mongo/db/shard_role/shard_role.h" #include "mongo/db/shard_role/transaction_resources.h" #include "mongo/db/storage/damage_vector.h" @@ -1780,6 +1782,612 @@ TEST_F(StorageTimestampTest, PrimarySetsMultikeyInsideMultiDocumentTransaction) assertMultikeyPaths(_opCtx, collPtr, indexName, _nullTs, true /* shouldBeMultikey */, {{0}}); } +TEST_F(StorageTimestampTest, PrimarySetsWildcardMultikeyInsideMultiDocumentTransaction) { + auto service = _opCtx->getServiceContext(); + auto sessionCatalog = SessionCatalog::get(service); + sessionCatalog->reset_forTest(); + auto mongoDSessionCatalog = MongoDSessionCatalog::get(_opCtx); + mongoDSessionCatalog->onStepUp(_opCtx); + + NamespaceString nss = NamespaceString::createNamespaceString_forTest( + "unittests.PrimarySetsWildcardMultikeyInsideMultiDocumentTransaction"); + create(nss); + + auto indexName = "wildcard"; + auto indexSpec = BSON("name" << indexName << "ns" << nss.ns_forTest() << "key" + << BSON("$**" << 1) << "v" << static_cast(kIndexVersion)); + auto doc = BSON("_id" << 1 << "a" << BSON_ARRAY(1 << 2)); + + ASSERT_OK(createIndexFromSpec(_opCtx, _clock, nss.ns_forTest(), indexSpec)); + + const auto currentTime = _clock->getTime(); + const auto presentTs = currentTime.clusterTime().asTimestamp(); + + auto storageEngine = cc().getServiceContext()->getStorageEngine(); + storageEngine->getSnapshotManager()->setLastApplied(presentTs); + + const auto beforeTxnTime = _clock->tickClusterTime(1); + auto beforeTxnTs = beforeTxnTime.asTimestamp(); + + const auto sessionId = makeLogicalSessionIdForTest(); + _opCtx->setLogicalSessionId(sessionId); + _opCtx->setTxnNumber(1); + _opCtx->setInMultiDocumentTransaction(); + + auto ocs = mongoDSessionCatalog->checkOutSession(_opCtx); + auto txnParticipant = TransactionParticipant::get(_opCtx); + ASSERT(txnParticipant); + + txnParticipant.beginOrContinue(_opCtx, + {*_opCtx->getTxnNumber()}, + false /* autocommit */, + TransactionParticipant::TransactionActions::kStart); + txnParticipant.unstashTransactionResources(_opCtx, "insert"); + { + auto collAcq = acquireCollection( + _opCtx, + CollectionAcquisitionRequest::fromOpCtx(_opCtx, nss, AcquisitionPrerequisites::kWrite), + MODE_IX); + insertDocument(collAcq.getCollectionPtr(), InsertStatement(doc)); + } + + txnParticipant.commitUnpreparedTransaction(_opCtx); + txnParticipant.stashTransactionResources(_opCtx); + + const auto collAcq = acquireCollForRead(_opCtx, nss); + const auto& collPtr = collAcq.getCollectionPtr(); + + // Verify the document was committed. + assertDocumentAtTimestamp(collPtr, presentTs, BSONObj()); + assertDocumentAtTimestamp(collPtr, _nullTs, doc); + + // Verify the catalog multikey flag is set at the latest timestamp. + assertMultikeyPaths(_opCtx, collPtr, indexName, presentTs, false /* shouldBeMultikey */, {}); + assertMultikeyPaths(_opCtx, collPtr, indexName, _nullTs, true /* shouldBeMultikey */, {}); + + // Verify the wildcard metadata keys are present at the latest timestamp. + // The field "a" is an array, so it should be recorded as a multikey path. + auto entry = collPtr->getIndexCatalog()->findIndexByName(_opCtx, indexName); + ASSERT(entry); + stdx::unordered_set fieldSet = {"a"}; + { + MultikeyMetadataAccessStats stats; + auto paths = getWildcardMultikeyPathSet(_opCtx, entry, fieldSet, &stats); + ASSERT_EQUALS(1, paths.size()); + ASSERT_EQUALS("a", paths.begin()->dottedField()); + } + { + // Before the transaction started, no metadata keys should be visible. + OneOffRead oor(_opCtx, beforeTxnTs); + MultikeyMetadataAccessStats stats; + auto paths = getWildcardMultikeyPathSet(_opCtx, entry, fieldSet, &stats); + ASSERT_EQUALS(0, paths.size()); + } + + // Before the transaction, the catalog multikey flag should not be set. + assertMultikeyPaths(_opCtx, collPtr, indexName, beforeTxnTs, false /* shouldBeMultikey */, {}); +} + +TEST_F(StorageTimestampTest, + PrimarySetsWildcardMultikeyInsideMultiDocumentTransactionMultiplePaths) { + auto service = _opCtx->getServiceContext(); + auto sessionCatalog = SessionCatalog::get(service); + sessionCatalog->reset_forTest(); + auto mongoDSessionCatalog = MongoDSessionCatalog::get(_opCtx); + mongoDSessionCatalog->onStepUp(_opCtx); + + NamespaceString nss = NamespaceString::createNamespaceString_forTest( + "unittests.PrimarySetsWildcardMultikeyInsideMultiDocTxnMultiplePaths"); + create(nss); + + auto indexName = "wildcard"; + auto indexSpec = BSON("name" << indexName << "ns" << nss.ns_forTest() << "key" + << BSON("$**" << 1) << "v" << static_cast(kIndexVersion)); + + ASSERT_OK(createIndexFromSpec(_opCtx, _clock, nss.ns_forTest(), indexSpec)); + + const auto currentTime = _clock->getTime(); + const auto presentTs = currentTime.clusterTime().asTimestamp(); + + auto storageEngine = cc().getServiceContext()->getStorageEngine(); + storageEngine->getSnapshotManager()->setLastApplied(presentTs); + + const auto beforeTxnTime = _clock->tickClusterTime(1); + auto beforeTxnTs = beforeTxnTime.asTimestamp(); + + const auto sessionId = makeLogicalSessionIdForTest(); + _opCtx->setLogicalSessionId(sessionId); + _opCtx->setTxnNumber(1); + _opCtx->setInMultiDocumentTransaction(); + + auto ocs = mongoDSessionCatalog->checkOutSession(_opCtx); + auto txnParticipant = TransactionParticipant::get(_opCtx); + ASSERT(txnParticipant); + + txnParticipant.beginOrContinue(_opCtx, + {*_opCtx->getTxnNumber()}, + false /* autocommit */, + TransactionParticipant::TransactionActions::kStart); + txnParticipant.unstashTransactionResources(_opCtx, "insert"); + { + auto collAcq = acquireCollection( + _opCtx, + CollectionAcquisitionRequest::fromOpCtx(_opCtx, nss, AcquisitionPrerequisites::kWrite), + MODE_IX); + // Insert documents that make multiple paths multikey. + insertDocument(collAcq.getCollectionPtr(), + InsertStatement(BSON("_id" << 1 << "a" << BSON_ARRAY(1 << 2)))); + insertDocument(collAcq.getCollectionPtr(), + InsertStatement(BSON("_id" << 2 << "b" << BSON_ARRAY(3 << 4)))); + insertDocument(collAcq.getCollectionPtr(), + InsertStatement(BSON("_id" << 3 << "c" << BSON_ARRAY(5 << 6)))); + } + + txnParticipant.commitUnpreparedTransaction(_opCtx); + txnParticipant.stashTransactionResources(_opCtx); + + // Verify all three multikey paths are present in the wildcard index. + const auto collAcq = acquireCollForRead(_opCtx, nss); + const auto& collPtr = collAcq.getCollectionPtr(); + auto entry = collPtr->getIndexCatalog()->findIndexByName(_opCtx, indexName); + ASSERT(entry); + + stdx::unordered_set fieldSet = {"a", "b", "c"}; + { + // Before the transaction started, no metadata keys should be visible. + OneOffRead oor(_opCtx, beforeTxnTs); + MultikeyMetadataAccessStats stats; + auto paths = getWildcardMultikeyPathSet(_opCtx, entry, fieldSet, &stats); + ASSERT_EQUALS(0, paths.size()); + } + { + MultikeyMetadataAccessStats stats; + auto paths = getWildcardMultikeyPathSet(_opCtx, entry, fieldSet, &stats); + ASSERT_EQUALS(3, paths.size()); + std::set pathStrings; + for (const auto& p : paths) { + pathStrings.insert(std::string(p.dottedField())); + } + ASSERT(pathStrings.count("a")); + ASSERT(pathStrings.count("b")); + ASSERT(pathStrings.count("c")); + } +} + +TEST_F(StorageTimestampTest, PrimarySetsWildcardMultikeyInsideMultiDocumentTransactionPrepared) { + auto service = _opCtx->getServiceContext(); + auto sessionCatalog = SessionCatalog::get(service); + sessionCatalog->reset_forTest(); + auto mongoDSessionCatalog = MongoDSessionCatalog::get(_opCtx); + mongoDSessionCatalog->onStepUp(_opCtx); + + NamespaceString nss = NamespaceString::createNamespaceString_forTest( + "unittests.PrimarySetsWildcardMultikeyInsideMultiDocTxnPrepared"); + create(nss); + + auto indexName = "wildcard"; + auto indexSpec = BSON("name" << indexName << "ns" << nss.ns_forTest() << "key" + << BSON("$**" << 1) << "v" << static_cast(kIndexVersion)); + auto doc = BSON("_id" << 1 << "a" << BSON_ARRAY(1 << 2)); + + ASSERT_OK(createIndexFromSpec(_opCtx, _clock, nss.ns_forTest(), indexSpec)); + + const auto currentTime = _clock->getTime(); + const auto presentTs = currentTime.clusterTime().asTimestamp(); + + auto storageEngine = cc().getServiceContext()->getStorageEngine(); + storageEngine->getSnapshotManager()->setLastApplied(presentTs); + + const auto beforeTxnTime = _clock->tickClusterTime(1); + auto beforeTxnTs = beforeTxnTime.asTimestamp(); + + const auto sessionId = makeLogicalSessionIdForTest(); + _opCtx->setLogicalSessionId(sessionId); + _opCtx->setTxnNumber(1); + _opCtx->setInMultiDocumentTransaction(); + + auto ocs = mongoDSessionCatalog->checkOutSession(_opCtx); + auto txnParticipant = TransactionParticipant::get(_opCtx); + ASSERT(txnParticipant); + + txnParticipant.beginOrContinue(_opCtx, + {*_opCtx->getTxnNumber()}, + false /* autocommit */, + TransactionParticipant::TransactionActions::kStart); + txnParticipant.unstashTransactionResources(_opCtx, "insert"); + { + auto collAcq = acquireCollection( + _opCtx, + CollectionAcquisitionRequest::fromOpCtx(_opCtx, nss, AcquisitionPrerequisites::kWrite), + MODE_IX); + insertDocument(collAcq.getCollectionPtr(), InsertStatement(doc)); + } + + // Prepare the transaction. + txnParticipant.prepareTransaction(_opCtx, {}); + txnParticipant.stashTransactionResources(_opCtx); + + // Commit the prepared transaction. + const auto commitTime = _clock->getTime(); + const auto commitClusterTime = commitTime.clusterTime(); + const auto commitTs = commitClusterTime.addTicks(1).asTimestamp(); + txnParticipant.unstashTransactionResources(_opCtx, "commitTransaction"); + { + FailPointEnableBlock failPointBlock("skipCommitTxnCheckPrepareMajorityCommitted"); + txnParticipant.commitPreparedTransaction(_opCtx, commitTs, {}); + } + txnParticipant.stashTransactionResources(_opCtx); + + const auto collAcq = acquireCollForRead(_opCtx, nss); + const auto& collPtr = collAcq.getCollectionPtr(); + + // Verify the document was committed. + assertDocumentAtTimestamp(collPtr, _nullTs, doc); + + // Verify the catalog multikey flag is set at the latest timestamp. + assertMultikeyPaths(_opCtx, collPtr, indexName, _nullTs, true /* shouldBeMultikey */, {}); + // Before the transaction, multikey should not be set. + assertMultikeyPaths(_opCtx, collPtr, indexName, beforeTxnTs, false /* shouldBeMultikey */, {}); + + // Verify the wildcard metadata keys are present at the latest timestamp. + auto entry = collPtr->getIndexCatalog()->findIndexByName(_opCtx, indexName); + ASSERT(entry); + stdx::unordered_set fieldSet = {"a"}; + { + MultikeyMetadataAccessStats stats; + auto paths = getWildcardMultikeyPathSet(_opCtx, entry, fieldSet, &stats); + ASSERT_EQUALS(1, paths.size()); + ASSERT_EQUALS("a", paths.begin()->dottedField()); + } + { + // Before the transaction started, no metadata keys should be visible. + OneOffRead oor(_opCtx, beforeTxnTs); + MultikeyMetadataAccessStats stats; + auto paths = getWildcardMultikeyPathSet(_opCtx, entry, fieldSet, &stats); + ASSERT_EQUALS(0, paths.size()); + } +} + +TEST_F(StorageTimestampTest, PrimarySetsCompoundWildcardMultikeyInsideMultiDocumentTransaction) { + auto service = _opCtx->getServiceContext(); + auto sessionCatalog = SessionCatalog::get(service); + sessionCatalog->reset_forTest(); + auto mongoDSessionCatalog = MongoDSessionCatalog::get(_opCtx); + mongoDSessionCatalog->onStepUp(_opCtx); + + NamespaceString nss = NamespaceString::createNamespaceString_forTest( + "unittests.PrimarySetsCompoundWildcardMultikeyInsideMultiDocTxn"); + create(nss); + + auto indexName = "compound_wildcard"; + auto indexSpec = + BSON("name" << indexName << "ns" << nss.ns_forTest() << "key" + << BSON("x" << 1 << "$**" << 1) << "v" << static_cast(kIndexVersion)); + auto doc = BSON("_id" << 1 << "x" << 5 << "a" << BSON_ARRAY(1 << 2)); + + ASSERT_OK(createIndexFromSpec(_opCtx, _clock, nss.ns_forTest(), indexSpec)); + + const auto currentTime = _clock->getTime(); + const auto presentTs = currentTime.clusterTime().asTimestamp(); + + auto storageEngine = cc().getServiceContext()->getStorageEngine(); + storageEngine->getSnapshotManager()->setLastApplied(presentTs); + + const auto beforeTxnTime = _clock->tickClusterTime(1); + auto beforeTxnTs = beforeTxnTime.asTimestamp(); + + const auto sessionId = makeLogicalSessionIdForTest(); + _opCtx->setLogicalSessionId(sessionId); + _opCtx->setTxnNumber(1); + _opCtx->setInMultiDocumentTransaction(); + + auto ocs = mongoDSessionCatalog->checkOutSession(_opCtx); + auto txnParticipant = TransactionParticipant::get(_opCtx); + ASSERT(txnParticipant); + + txnParticipant.beginOrContinue(_opCtx, + {*_opCtx->getTxnNumber()}, + false /* autocommit */, + TransactionParticipant::TransactionActions::kStart); + txnParticipant.unstashTransactionResources(_opCtx, "insert"); + { + auto collAcq = acquireCollection( + _opCtx, + CollectionAcquisitionRequest::fromOpCtx(_opCtx, nss, AcquisitionPrerequisites::kWrite), + MODE_IX); + insertDocument(collAcq.getCollectionPtr(), InsertStatement(doc)); + } + + txnParticipant.commitUnpreparedTransaction(_opCtx); + txnParticipant.stashTransactionResources(_opCtx); + + const auto collAcq = acquireCollForRead(_opCtx, nss); + const auto& collPtr = collAcq.getCollectionPtr(); + + assertMultikeyPaths(_opCtx, collPtr, indexName, _nullTs, true /* shouldBeMultikey */, {}); + + // Before the transaction, the catalog multikey flag should not be set. + assertMultikeyPaths(_opCtx, collPtr, indexName, beforeTxnTs, false /* shouldBeMultikey */, {}); + + auto entry = collPtr->getIndexCatalog()->findIndexByName(_opCtx, indexName); + ASSERT(entry); + stdx::unordered_set fieldSet = {"a"}; + { + // Before the transaction started, no metadata keys should be visible. + OneOffRead oor(_opCtx, beforeTxnTs); + MultikeyMetadataAccessStats stats; + auto paths = getWildcardMultikeyPathSet(_opCtx, entry, fieldSet, &stats); + ASSERT_EQUALS(0, paths.size()); + } + { + MultikeyMetadataAccessStats stats; + auto paths = getWildcardMultikeyPathSet(_opCtx, entry, fieldSet, &stats); + ASSERT_EQUALS(1, paths.size()); + ASSERT_EQUALS("a", paths.begin()->dottedField()); + } +} + +// Builds a setMultikeyMetadata op:'c' oplog entry that mirrors what +// OpObserverImpl::onSetMultikeyMetadata writes on the primary. +namespace { +repl::OplogEntry makeSetMultikeyMetadataOplogEntry(const NamespaceString& nss, + const std::string& indexName, + const BSONObj& paths, + Timestamp ts, + long long term = 1) { + SetMultikeyMetadataOplogEntry objectEntry(nss, indexName, paths); + return repl::OplogEntry(BSON("ts" << ts << "t" << term << "v" << 2 << "op" + << "c" + << "ns" << nss.getCommandNS().ns_forTest() << "wall" + << Date_t() << "o" << objectEntry.toBSON())); +} +} // namespace + +TEST_F(StorageTimestampTest, SecondaryAppliesWildcardMultikeyMetadataFromTransaction) { + repl::UnreplicatedWritesBlock uwb(_opCtx); + + NamespaceString nss = NamespaceString::createNamespaceString_forTest( + "unittests.SecondaryAppliesWildcardMultikeyMetadataFromTransaction"); + ASSERT_OK(createCollection(_opCtx, nss.dbName(), BSON("create" << nss.coll()))); + + auto indexName = "wildcard"; + auto indexSpec = BSON("name" << indexName << "key" << BSON("$**" << 1) << "v" + << static_cast(kIndexVersion)); + ASSERT_OK(createIndexFromSpec(_opCtx, _clock, nss.ns_forTest(), indexSpec)); + + _coordinatorMock->alwaysAllowWrites(false); + + const auto currentTime = _clock->getTime(); + const auto presentTs = currentTime.clusterTime().asTimestamp(); + cc().getServiceContext()->getStorageEngine()->getSnapshotManager()->setLastApplied(presentTs); + const auto beforeTxnTs = _clock->tickClusterTime(1).asTimestamp(); + const auto applyTs = _clock->tickClusterTime(1).asTimestamp(); + + auto setMkOp = makeSetMultikeyMetadataOplogEntry( + nss, indexName, set_multikey_metadata_oplog_helpers::fieldPathsToBSON({"a"}), applyTs); + + std::vector ops = {setMkOp}; + + DoNothingOplogApplierObserver observer; + auto storageInterface = repl::StorageInterface::get(_opCtx); + auto workerPool = repl::makeReplWorkerPool(); + repl::OplogApplierImpl oplogApplier( + nullptr, + nullptr, + &observer, + _coordinatorMock, + _consistencyMarkers, + storageInterface, + repl::OplogApplier::Options(repl::OplogApplication::Mode::kSecondary), + workerPool.get()); + uassertStatusOK(oplogApplier.applyOplogBatch(_opCtx, ops)); + + const auto collAcq = acquireCollForRead(_opCtx, nss); + const auto& collPtr = collAcq.getCollectionPtr(); + + // Catalog flag set after apply, not before. + assertMultikeyPaths(_opCtx, collPtr, indexName, applyTs, true /* shouldBeMultikey */, {}); + assertMultikeyPaths(_opCtx, collPtr, indexName, beforeTxnTs, false /* shouldBeMultikey */, {}); + + auto entry = collPtr->getIndexCatalog()->findIndexByName(_opCtx, indexName); + ASSERT(entry); + stdx::unordered_set fieldSet = {"a"}; + { + // At the apply timestamp, the regenerated metadata keys are visible. + OneOffRead oor(_opCtx, applyTs); + MultikeyMetadataAccessStats stats; + auto paths = getWildcardMultikeyPathSet(_opCtx, entry, fieldSet, &stats); + ASSERT_EQUALS(1, paths.size()); + ASSERT_EQUALS("a", paths.begin()->dottedField()); + } + { + // Before the transaction started, no metadata keys should be visible. + OneOffRead oor(_opCtx, beforeTxnTs); + MultikeyMetadataAccessStats stats; + auto paths = getWildcardMultikeyPathSet(_opCtx, entry, fieldSet, &stats); + ASSERT_EQUALS(0, paths.size()); + } +} + +TEST_F(StorageTimestampTest, SecondaryAppliesWildcardMultikeyMetadataFromTransactionMultiplePaths) { + repl::UnreplicatedWritesBlock uwb(_opCtx); + + NamespaceString nss = NamespaceString::createNamespaceString_forTest( + "unittests.SecondaryAppliesWildcardMkMetaFromTxnMultiplePaths"); + ASSERT_OK(createCollection(_opCtx, nss.dbName(), BSON("create" << nss.coll()))); + + auto indexName = "wildcard"; + auto indexSpec = BSON("name" << indexName << "key" << BSON("$**" << 1) << "v" + << static_cast(kIndexVersion)); + ASSERT_OK(createIndexFromSpec(_opCtx, _clock, nss.ns_forTest(), indexSpec)); + + _coordinatorMock->alwaysAllowWrites(false); + + const auto currentTime = _clock->getTime(); + const auto presentTs = currentTime.clusterTime().asTimestamp(); + cc().getServiceContext()->getStorageEngine()->getSnapshotManager()->setLastApplied(presentTs); + const auto beforeTxnTs = _clock->tickClusterTime(1).asTimestamp(); + const auto applyTs = _clock->tickClusterTime(1).asTimestamp(); + + // A single setMultikeyMetadata entry carries all multikey paths from one insert. + auto setMkOp = makeSetMultikeyMetadataOplogEntry( + nss, + indexName, + set_multikey_metadata_oplog_helpers::fieldPathsToBSON({"a", "b", "c"}), + applyTs); + + DoNothingOplogApplierObserver observer; + auto storageInterface = repl::StorageInterface::get(_opCtx); + auto workerPool = repl::makeReplWorkerPool(); + repl::OplogApplierImpl oplogApplier( + nullptr, + nullptr, + &observer, + _coordinatorMock, + _consistencyMarkers, + storageInterface, + repl::OplogApplier::Options(repl::OplogApplication::Mode::kSecondary), + workerPool.get()); + uassertStatusOK(oplogApplier.applyOplogBatch(_opCtx, {setMkOp})); + + const auto collAcq = acquireCollForRead(_opCtx, nss); + const auto& collPtr = collAcq.getCollectionPtr(); + + auto entry = collPtr->getIndexCatalog()->findIndexByName(_opCtx, indexName); + ASSERT(entry); + stdx::unordered_set fieldSet = {"a", "b", "c"}; + { + // At the apply timestamp, all three regenerated metadata keys are visible. + OneOffRead oor(_opCtx, applyTs); + MultikeyMetadataAccessStats stats; + auto paths = getWildcardMultikeyPathSet(_opCtx, entry, fieldSet, &stats); + ASSERT_EQUALS(3, paths.size()); + std::set pathStrings; + for (const auto& p : paths) { + pathStrings.insert(std::string(p.dottedField())); + } + ASSERT(pathStrings.count("a")); + ASSERT(pathStrings.count("b")); + ASSERT(pathStrings.count("c")); + } + { + // Before the transaction started, no metadata keys should be visible. + OneOffRead oor(_opCtx, beforeTxnTs); + MultikeyMetadataAccessStats stats; + auto paths = getWildcardMultikeyPathSet(_opCtx, entry, fieldSet, &stats); + ASSERT_EQUALS(0, paths.size()); + } +} + +TEST_F(StorageTimestampTest, SecondaryAppliesCompoundWildcardMultikeyMetadataFromTransaction) { + repl::UnreplicatedWritesBlock uwb(_opCtx); + + NamespaceString nss = NamespaceString::createNamespaceString_forTest( + "unittests.SecondaryAppliesCompoundWildcardMkMetaFromTxn"); + ASSERT_OK(createCollection(_opCtx, nss.dbName(), BSON("create" << nss.coll()))); + + auto indexName = "compound_wildcard"; + auto indexSpec = BSON("name" << indexName << "key" << BSON("x" << 1 << "$**" << 1) << "v" + << static_cast(kIndexVersion)); + ASSERT_OK(createIndexFromSpec(_opCtx, _clock, nss.ns_forTest(), indexSpec)); + + _coordinatorMock->alwaysAllowWrites(false); + + const auto currentTime = _clock->getTime(); + const auto presentTs = currentTime.clusterTime().asTimestamp(); + cc().getServiceContext()->getStorageEngine()->getSnapshotManager()->setLastApplied(presentTs); + const auto beforeTxnTs = _clock->tickClusterTime(1).asTimestamp(); + const auto applyTs = _clock->tickClusterTime(1).asTimestamp(); + + auto setMkOp = makeSetMultikeyMetadataOplogEntry( + nss, indexName, set_multikey_metadata_oplog_helpers::fieldPathsToBSON({"a"}), applyTs); + + std::vector ops = {setMkOp}; + + DoNothingOplogApplierObserver observer; + auto storageInterface = repl::StorageInterface::get(_opCtx); + auto workerPool = repl::makeReplWorkerPool(); + repl::OplogApplierImpl oplogApplier( + nullptr, + nullptr, + &observer, + _coordinatorMock, + _consistencyMarkers, + storageInterface, + repl::OplogApplier::Options(repl::OplogApplication::Mode::kSecondary), + workerPool.get()); + uassertStatusOK(oplogApplier.applyOplogBatch(_opCtx, ops)); + + const auto collAcq = acquireCollForRead(_opCtx, nss); + const auto& collPtr = collAcq.getCollectionPtr(); + + assertMultikeyPaths(_opCtx, collPtr, indexName, applyTs, true /* shouldBeMultikey */, {}); + assertMultikeyPaths(_opCtx, collPtr, indexName, beforeTxnTs, false /* shouldBeMultikey */, {}); + + auto entry = collPtr->getIndexCatalog()->findIndexByName(_opCtx, indexName); + ASSERT(entry); + stdx::unordered_set fieldSet = {"a"}; + { + // At the apply timestamp, the regenerated metadata key is visible. + OneOffRead oor(_opCtx, applyTs); + MultikeyMetadataAccessStats stats; + auto paths = getWildcardMultikeyPathSet(_opCtx, entry, fieldSet, &stats); + ASSERT_EQUALS(1, paths.size()); + ASSERT_EQUALS("a", paths.begin()->dottedField()); + } +} + +TEST_F(StorageTimestampTest, SecondaryAppliesRegularMultikeyMetadataFromTransaction) { + repl::UnreplicatedWritesBlock uwb(_opCtx); + + NamespaceString nss = NamespaceString::createNamespaceString_forTest( + "unittests.SecondaryAppliesRegularMkMetaFromTxn"); + ASSERT_OK(createCollection(_opCtx, nss.dbName(), BSON("create" << nss.coll()))); + + auto indexName = "a_1"; + auto keyPattern = BSON("a" << 1); + auto indexSpec = + BSON("name" << indexName << "key" << keyPattern << "v" << static_cast(kIndexVersion)); + ASSERT_OK(createIndexFromSpec(_opCtx, _clock, nss.ns_forTest(), indexSpec)); + + _coordinatorMock->alwaysAllowWrites(false); + + const auto currentTime = _clock->getTime(); + const auto presentTs = currentTime.clusterTime().asTimestamp(); + cc().getServiceContext()->getStorageEngine()->getSnapshotManager()->setLastApplied(presentTs); + const auto beforeTxnTs = _clock->tickClusterTime(1).asTimestamp(); + const auto applyTs = _clock->tickClusterTime(1).asTimestamp(); + + // Regular index uses multikeyPaths bitset (per-field array of multikey component indexes) + // instead of wildcard's field-path list. + auto setMkOp = makeSetMultikeyMetadataOplogEntry( + nss, indexName, multikey_paths::serialize(keyPattern, {{0}}), applyTs); + + std::vector ops = {setMkOp}; + + DoNothingOplogApplierObserver observer; + auto storageInterface = repl::StorageInterface::get(_opCtx); + auto workerPool = repl::makeReplWorkerPool(); + repl::OplogApplierImpl oplogApplier( + nullptr, + nullptr, + &observer, + _coordinatorMock, + _consistencyMarkers, + storageInterface, + repl::OplogApplier::Options(repl::OplogApplication::Mode::kSecondary), + workerPool.get()); + uassertStatusOK(oplogApplier.applyOplogBatch(_opCtx, ops)); + + const auto collAcq = acquireCollForRead(_opCtx, nss); + const auto& collPtr = collAcq.getCollectionPtr(); + + // Regular index: catalog flag set + multikey path component is recorded. + assertMultikeyPaths(_opCtx, collPtr, indexName, applyTs, true /* shouldBeMultikey */, {{0}}); + assertMultikeyPaths( + _opCtx, collPtr, indexName, beforeTxnTs, false /* shouldBeMultikey */, {{}}); +} + TEST_F(StorageTimestampTest, InitializeMinValid) { NamespaceString nss = NamespaceString::kDefaultMinValidNamespace; create(nss); diff --git a/src/mongo/db/shard_role/shard_catalog/BUILD.bazel b/src/mongo/db/shard_role/shard_catalog/BUILD.bazel index 28130322f8d..41991ef226d 100644 --- a/src/mongo/db/shard_role/shard_catalog/BUILD.bazel +++ b/src/mongo/db/shard_role/shard_catalog/BUILD.bazel @@ -327,6 +327,18 @@ mongo_cc_library( ], ) +mongo_cc_library( + name = "set_multikey_metadata_oplog_helpers", + srcs = [ + "set_multikey_metadata_oplog_helpers.cpp", + ], + deps = [ + "//src/mongo:base", + "//src/mongo/db/index:index_access_method", + "//src/mongo/db/storage/key_string", + ], +) + mongo_cc_library( name = "catalog_impl", srcs = [ @@ -345,6 +357,7 @@ mongo_cc_library( "document_validation", "durable_catalog", "index_catalog", + "set_multikey_metadata_oplog_helpers", "//src/mongo/db:audit", "//src/mongo/db:collection_index_usage_tracker", "//src/mongo/db:dbcommands_idl", @@ -474,6 +487,7 @@ mongo_cc_unit_test( "index_catalog_impl_test.cpp", "index_signature_test.cpp", "rename_collection_test.cpp", + "set_multikey_metadata_oplog_helpers_test.cpp", "shard_catalog_history_cleanup_test.cpp", ], tags = ["mongo_unittest_eighth_group"], diff --git a/src/mongo/db/shard_role/shard_catalog/index_catalog_entry.h b/src/mongo/db/shard_role/shard_catalog/index_catalog_entry.h index 5ffddf5a015..0a328d342a0 100644 --- a/src/mongo/db/shard_role/shard_catalog/index_catalog_entry.h +++ b/src/mongo/db/shard_role/shard_catalog/index_catalog_entry.h @@ -138,6 +138,7 @@ public: virtual void setMultikeyForApplyOps(OperationContext* opCtx, const CollectionPtr& coll, + const KeyStringSet& multikeyMetadataKeys, const MultikeyPaths& multikeyPaths) const = 0; /** diff --git a/src/mongo/db/shard_role/shard_catalog/index_catalog_entry_impl.cpp b/src/mongo/db/shard_role/shard_catalog/index_catalog_entry_impl.cpp index b284f92dc60..951e9945977 100644 --- a/src/mongo/db/shard_role/shard_catalog/index_catalog_entry_impl.cpp +++ b/src/mongo/db/shard_role/shard_catalog/index_catalog_entry_impl.cpp @@ -61,6 +61,7 @@ #include "mongo/db/shard_role/shard_catalog/durable_catalog.h" #include "mongo/db/shard_role/shard_catalog/index_catalog_entry_helpers.h" #include "mongo/db/shard_role/shard_catalog/index_descriptor.h" +#include "mongo/db/shard_role/shard_catalog/set_multikey_metadata_oplog_helpers.h" #include "mongo/db/shard_role/transaction_resources.h" #include "mongo/db/storage/mdb_catalog.h" #include "mongo/db/storage/recovery_unit.h" @@ -74,6 +75,7 @@ #include "mongo/util/assert_util.h" #include "mongo/util/decorable.h" #include "mongo/util/fail_point.h" +#include "mongo/util/str.h" #include #include @@ -281,39 +283,102 @@ void IndexCatalogEntryImpl::setMultikey(OperationContext* opCtx, return; } - // If multikeyMetadataKeys is non-empty, we must insert these keys into the index itself. We do - // not have to account for potential dupes, since all metadata keys are indexed against a single - // RecordId. An attempt to write a duplicate key will therefore be ignored. - if (!multikeyMetadataKeys.empty()) { - uassertStatusOK( - accessMethod()->asSortedData()->insertKeys(opCtx, - *shard_role_details::getRecoveryUnit(opCtx), - collection, - this, - multikeyMetadataKeys, - {}, - {}, - nullptr)); + // `multikeyMetadataKeys` is wildcard-only by construction: only wildcard indexes store + // multikey-path information as keys inside the index itself. Other index types track multikey + // paths exclusively in the catalog and always pass an empty set here. Violating this would + // attempt to insert metadata keys into a non-wildcard index, which has no reserved RecordId for + // them and risks index corruption. + invariant(multikeyMetadataKeys.empty() || + _descriptor.getIndexType() == IndexType::INDEX_WILDCARD, + "Non-empty multikeyMetadataKeys must come from a wildcard index"); + + const auto insertWildcardMultikeyMetadataKeysIfNotEmpty = [&]() { + if (!multikeyMetadataKeys.empty()) { + // If multikeyMetadataKeys is non-empty, we must insert these keys into the index + // itself. We do not have to account for potential dupes, since all metadata keys are + // indexed against a single RecordId. An attempt to write a duplicate key will therefore + // be ignored. + uassertStatusOK(accessMethod()->asSortedData()->insertKeys( + opCtx, + *shard_role_details::getRecoveryUnit(opCtx), + collection, + this, + multikeyMetadataKeys, + {}, + {}, + nullptr)); + } + }; + + if (!opCtx->inMultiDocumentTransaction()) { + insertWildcardMultikeyMetadataKeysIfNotEmpty(); + _catalogSetMultikey(opCtx, collection, paths); + return; } - // Mark the catalog as multikey, and record the multikey paths if applicable. - if (opCtx->inMultiDocumentTransaction()) { - auto status = _setMultikeyInMultiDocumentTransaction(opCtx, collection, paths); - // Retry without side transaction. - if (!status.isOK()) { - _catalogSetMultikey(opCtx, collection, paths); + VersionContext::FixedOperationFCVRegion fixedOfcvRegion(opCtx); + const bool replicateMultikeyness = gFeatureFlagReplicateMultikeynessInTransactions.isEnabled( + VersionContext::getDecoration(opCtx)); + + // Feature off, insert metadata keys in transaction itself. Otherwise the keys are written in + // the side txn. + if (!replicateMultikeyness) { + insertWildcardMultikeyMetadataKeysIfNotEmpty(); + } + + // replicateMultikeyness==false: writes catalog changes in a side txn, uses no-op entry. + // replicateMultikeyness==true: writes catalog changes + wildcard metadata keys in a side txn, + // uses setMultikeyMetadata entry. + auto status = _setMultikeyInMultiDocumentTransaction( + opCtx, collection, multikeyMetadataKeys, paths, replicateMultikeyness); + + if (!status.isOK()) { + // The only expected failure is IndexNotFound — the index was created in the same + // parent transaction and is not yet visible to the side transaction's snapshot. + // Any other error is unexpected and should not be silently swallowed. + tassert(11609105, + str::stream() << "Unexpected error from side transaction for multikey update: " + << status.toString(), + status.code() == ErrorCodes::IndexNotFound); + + // Fallback: the side transaction could not see the index because it was created (but not + // yet committed) in the parent transaction. This happens when a user creates a collection + // with a wildcard index and inserts data in the same multi-document transaction. Since the + // side transaction was abandoned, we insert the metadata keys directly in the parent + // transaction instead. + // + // No setMultikeyMetadata oplog entry is emitted. On the secondary, the MultikeyPathTracker + // collects multikey info during oplog application and flushes it at the batch timestamp + // (firstTimeInBatch). This is still timestamp-consistent because transactions containing + // DDL commands (create, createIndexes) are always processed in their own oplog applier + // batch, so firstTimeInBatch == T_commit for these transactions. + if (replicateMultikeyness) { + insertWildcardMultikeyMetadataKeysIfNotEmpty(); } - } else { _catalogSetMultikey(opCtx, collection, paths); } } void IndexCatalogEntryImpl::setMultikeyForApplyOps(OperationContext* opCtx, const CollectionPtr& coll, + const KeyStringSet& multikeyMetadataKeys, const MultikeyPaths& multikeyPaths) const { invariant(shard_role_details::getLocker(opCtx)->isCollectionLockedForMode(coll->ns(), MODE_IX)); invariant(shard_role_details::getLocker(opCtx)->inAWriteUnitOfWork()); + // Insert wildcard metadata keys into the index if provided. + if (!multikeyMetadataKeys.empty()) { + uassertStatusOK( + accessMethod()->asSortedData()->insertKeys(opCtx, + *shard_role_details::getRecoveryUnit(opCtx), + coll, + this, + multikeyMetadataKeys, + {}, + {}, + nullptr)); + } + opCtx->getClient()->getServiceContext()->getOpObserver()->onSetMultikeyMetadata( opCtx, coll->ns(), @@ -348,7 +413,9 @@ void IndexCatalogEntryImpl::forceSetMultikey(OperationContext* const opCtx, Status IndexCatalogEntryImpl::_setMultikeyInMultiDocumentTransaction( OperationContext* opCtx, const CollectionPtr& collection, - const MultikeyPaths& multikeyPaths) const { + const KeyStringSet& multikeyMetadataKeys, + const MultikeyPaths& multikeyPaths, + bool replicateMultikeyness) const { // If we are inside a multi-document transaction, we write the on-disk multikey update in a // separate transaction so that it will not generate prepare conflicts with other operations // that try to set the multikey flag. In general, it should always be safe to update the @@ -359,16 +426,15 @@ Status IndexCatalogEntryImpl::_setMultikeyInMultiDocumentTransaction( TransactionParticipant::SideTransactionBlock sideTxn(opCtx); - VersionContext::FixedOperationFCVRegion fixedOfcvRegion(opCtx); - const bool replicateMultikeyness = gFeatureFlagReplicateMultikeynessInTransactions.isEnabled( - VersionContext::getDecoration(opCtx)); - // If the index is not visible within the side transaction, the index may have been created, // but not committed, in the parent transaction. Therefore, we abandon the side transaction // and set the multikey flag in the parent transaction. if (!durable_catalog::isIndexPresent( opCtx, _shared->_catalogId, _descriptor.indexName(), MDBCatalog::get(opCtx))) { - return {ErrorCodes::SnapshotUnavailable, "index not visible in side transaction"}; + return {ErrorCodes::IndexNotFound, + str::stream() << "index '" << _descriptor.indexName() + << "' not visible in side transaction — likely created in the " + "same multi-document transaction"}; } writeConflictRetry(opCtx, "set index multikey", collection->ns(), [&] { @@ -406,7 +472,7 @@ Status IndexCatalogEntryImpl::_setMultikeyInMultiDocumentTransaction( // During initial sync, we may not have a stable timestamp. Therefore, we need to round // up the multi-key write timestamp to the max of the three so that we don't write // behind the oldest/stable timestamp. This code path is only hit during initial - // sync (and recovery if FeatureFlagReplicateMultikeynessInTransactions is enabled) + // sync (and recovery if FeatureFlagReplicateMultikeynessInTransactions is disabled) // when reconstructing prepared transactions, and so we don't expect the oldest/stable // timestamp to advance concurrently. // @@ -431,11 +497,29 @@ Status IndexCatalogEntryImpl::_setMultikeyInMultiDocumentTransaction( invariant(opCtx->writesAreReplicated()); if (replicateMultikeyness) { + BSONObj pathsObj; + if (!multikeyMetadataKeys.empty()) { + // Wildcard index: serialize field path names extracted from metadata keys. + auto fieldPaths = + set_multikey_metadata_oplog_helpers::extractFieldPathsFromMetadataKeys( + multikeyMetadataKeys, descriptor()->ordering()); + pathsObj = set_multikey_metadata_oplog_helpers::fieldPathsToBSON(fieldPaths); + uassertStatusOK(accessMethod()->asSortedData()->insertKeys( + opCtx, + *shard_role_details::getRecoveryUnit(opCtx), + collection, + this, + multikeyMetadataKeys, + {}, + {}, + nullptr)); + } else { + // Regular index: existing multikey paths format. + pathsObj = multikeyPathsToBSON(descriptor()->keyPattern(), multikeyPaths); + } + opCtx->getClient()->getServiceContext()->getOpObserver()->onSetMultikeyMetadata( - opCtx, - collection->ns(), - descriptor()->indexName(), - multikeyPathsToBSON(descriptor()->keyPattern(), multikeyPaths)); + opCtx, collection->ns(), descriptor()->indexName(), pathsObj); } else { // Write a noop oplog entry to get a properly ordered timestamp. auto msg = BSON("msg" << "Setting index to multikey" @@ -449,6 +533,11 @@ Status IndexCatalogEntryImpl::_setMultikeyInMultiDocumentTransaction( _catalogSetMultikey(opCtx, collection, multikeyPaths); wuow.commit(); + + if (replicateMultikeyness && !multikeyMetadataKeys.empty()) { + txnParticipant.setHasSideCommittedWildcardKeys(); + opCtx->setHasSideCommittedWildcardKeys(); + } }); return Status::OK(); @@ -585,8 +674,9 @@ public: void setMultikeyForApplyOps(OperationContext* opCtx, const CollectionPtr& coll, + const KeyStringSet& multikeyMetadataKeys, const MultikeyPaths& multikeyPaths) const final { - return _original->setMultikeyForApplyOps(opCtx, coll, multikeyPaths); + return _original->setMultikeyForApplyOps(opCtx, coll, multikeyMetadataKeys, multikeyPaths); } void forceSetMultikey(OperationContext* opCtx, @@ -715,8 +805,9 @@ public: void setMultikeyForApplyOps(OperationContext* opCtx, const CollectionPtr& coll, + const KeyStringSet& multikeyMetadataKeys, const MultikeyPaths& multikeyPaths) const final { - return _original->setMultikeyForApplyOps(opCtx, coll, multikeyPaths); + return _original->setMultikeyForApplyOps(opCtx, coll, multikeyMetadataKeys, multikeyPaths); } void forceSetMultikey(OperationContext* opCtx, diff --git a/src/mongo/db/shard_role/shard_catalog/index_catalog_entry_impl.h b/src/mongo/db/shard_role/shard_catalog/index_catalog_entry_impl.h index 9c3ab302c0c..31b8b5bed9f 100644 --- a/src/mongo/db/shard_role/shard_catalog/index_catalog_entry_impl.h +++ b/src/mongo/db/shard_role/shard_catalog/index_catalog_entry_impl.h @@ -167,6 +167,7 @@ public: */ void setMultikeyForApplyOps(OperationContext* opCtx, const CollectionPtr& coll, + const KeyStringSet& multikeyMetadataKeys, const MultikeyPaths& multikeyPaths) const final; void forceSetMultikey(OperationContext* opCtx, @@ -199,7 +200,9 @@ private: */ Status _setMultikeyInMultiDocumentTransaction(OperationContext* opCtx, const CollectionPtr& collection, - const MultikeyPaths& multikeyPaths) const; + const KeyStringSet& multikeyMetadataKeys, + const MultikeyPaths& multikeyPaths, + bool replicateMultikeyness) const; /** * Retrieves the multikey information associated with this index from '_collection', diff --git a/src/mongo/db/shard_role/shard_catalog/index_catalog_entry_impl_test.cpp b/src/mongo/db/shard_role/shard_catalog/index_catalog_entry_impl_test.cpp index 2cfdd52bd33..3b3facce31c 100644 --- a/src/mongo/db/shard_role/shard_catalog/index_catalog_entry_impl_test.cpp +++ b/src/mongo/db/shard_role/shard_catalog/index_catalog_entry_impl_test.cpp @@ -120,6 +120,7 @@ public: void setMultikeyForApplyOps(OperationContext* opCtx, const CollectionPtr& coll, + const KeyStringSet& multikeyMetadataKeys, const MultikeyPaths& multikeyPaths) const override { MONGO_UNREACHABLE; } diff --git a/src/mongo/db/shard_role/shard_catalog/index_catalog_entry_mock.h b/src/mongo/db/shard_role/shard_catalog/index_catalog_entry_mock.h index ec9bca1da9b..84c19bb90eb 100644 --- a/src/mongo/db/shard_role/shard_catalog/index_catalog_entry_mock.h +++ b/src/mongo/db/shard_role/shard_catalog/index_catalog_entry_mock.h @@ -133,6 +133,7 @@ public: void setMultikeyForApplyOps(OperationContext* opCtx, const CollectionPtr& coll, + const KeyStringSet& multikeyMetadataKeys, const MultikeyPaths& multikeyPaths) const final { MONGO_UNREACHABLE; } diff --git a/src/mongo/db/shard_role/shard_catalog/set_multikey_metadata_oplog_helpers.cpp b/src/mongo/db/shard_role/shard_catalog/set_multikey_metadata_oplog_helpers.cpp new file mode 100644 index 00000000000..44429507c1f --- /dev/null +++ b/src/mongo/db/shard_role/shard_catalog/set_multikey_metadata_oplog_helpers.cpp @@ -0,0 +1,139 @@ +/** + * Copyright (C) 2026-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/shard_role/shard_catalog/set_multikey_metadata_oplog_helpers.h" + +#include "mongo/bson/bsonelement.h" +#include "mongo/bson/bsonobjbuilder.h" +#include "mongo/db/index/wildcard_key_generator.h" +#include "mongo/db/index_names.h" +#include "mongo/util/assert_util.h" +#include "mongo/util/shared_buffer_fragment.h" +#include "mongo/util/str.h" + +namespace mongo::set_multikey_metadata_oplog_helpers { + +std::vector extractFieldPathsFromMetadataKeys(const KeyStringSet& metadataKeys, + const Ordering& ordering) { + std::vector fieldPaths; + fieldPaths.reserve(metadataKeys.size()); + + for (const auto& ks : metadataKeys) { + auto bsonKey = key_string::toBson(ks, ordering); + + BSONObjIterator iter(bsonKey); + bool foundMarker = false; + while (iter.more()) { + auto elem = iter.next(); + if (elem.type() != BSONType::minKey) { + tassert(11609101, + "Expected integer 1 marker in wildcard metadata key", + elem.isNumber() && elem.numberInt() == 1); + tassert(11609102, + "Expected field path string after marker in wildcard metadata key", + iter.more()); + auto pathElem = iter.next(); + tassert(11609103, + "Expected string type for field path in wildcard metadata key", + pathElem.type() == BSONType::string); + fieldPaths.emplace_back(pathElem.valueStringData()); + foundMarker = true; + break; + } + } + tassert(11609104, + "Wildcard metadata key contained no non-MinKey marker; cannot extract field path", + foundMarker); + } + return fieldPaths; +} + +BSONObj fieldPathsToBSON(const std::vector& fieldPaths) { + BSONArrayBuilder arrBuilder; + for (const auto& path : fieldPaths) { + arrBuilder.append(path); + } + return arrBuilder.arr(); +} + +/* + * Regeneration — for each path string: + * 1. Count 'prefixFields' and 'suffixFields' around the "$**" position in 'keyPattern'. + * E.g. {x: 1, $**: 1, y: 1, z: 1} → prefix=1, suffix=2. + * 2. Build a KeyString matching the encoding written by the primary's `WildcardKeyGenerator`: + * MinKey × prefixFields, integer 1, "", MinKey × suffixFields, reservedRecordId. + * Delegates to `WildcardKeyGenerator::makeMultikeyMetadataKey` so the encoding stays + * shared between primary (doc-walk path) and secondary (oplog-apply path). + */ +KeyStringSet regenerateMetadataKeysFromFieldPaths(const BSONObj& pathsObj, + key_string::Version version, + Ordering ordering, + KeyFormat rsKeyFormat, + const BSONObj& keyPattern) { + size_t prefixFields = 0; + size_t suffixFields = 0; + bool foundWildcard = false; + for (const auto& field : keyPattern) { + if (WildcardNames::isWildcardFieldName(field.fieldNameStringData())) { + tassert(11609106, + str::stream() << "keyPattern contains multiple wildcard fields: " << keyPattern, + !foundWildcard); + foundWildcard = true; + } else if (foundWildcard) { + ++suffixFields; + } else { + ++prefixFields; + } + } + tassert(11609107, + str::stream() << "Expected wildcard ($**) field in keyPattern: " << keyPattern, + foundWildcard); + + KeyStringSet result; + SharedBufferFragmentBuilder pooledBufferBuilder( + key_string::HeapBuilder::kHeapAllocatorDefaultBytes); + + for (const auto& elem : pathsObj) { + tassert(11609108, + str::stream() << "Expected string elements in wildcard multikey paths array in " + "setMultikeyMetadata oplog entry, got type " + << typeName(elem.type()), + elem.type() == BSONType::string); + result.insert(WildcardKeyGenerator::makeMultikeyMetadataKey(elem.valueStringData(), + prefixFields, + suffixFields, + version, + ordering, + rsKeyFormat, + pooledBufferBuilder)); + } + return result; +} + +} // namespace mongo::set_multikey_metadata_oplog_helpers diff --git a/src/mongo/db/shard_role/shard_catalog/set_multikey_metadata_oplog_helpers.h b/src/mongo/db/shard_role/shard_catalog/set_multikey_metadata_oplog_helpers.h new file mode 100644 index 00000000000..acce19e52c3 --- /dev/null +++ b/src/mongo/db/shard_role/shard_catalog/set_multikey_metadata_oplog_helpers.h @@ -0,0 +1,83 @@ +/** + * Copyright (C) 2026-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/bson/bsonobj.h" +#include "mongo/bson/ordering.h" +#include "mongo/db/storage/key_string/key_string.h" +#include "mongo/db/storage/sorted_data_interface.h" +#include "mongo/util/modules.h" + +#include +#include + +MONGO_MOD_PUBLIC; + +/** + * Helpers shared between the primary-side serializer (index_catalog_entry_impl.cpp) and the + * secondary-side apply handler (repl/oplog.cpp) for the wildcard variant of the + * `setMultikeyMetadata` oplog entry. + * + * Format of the oplog 'paths' field for wildcard indexes: an array of field path strings, + * e.g. ["a", "b.c"]. The primary extracts the paths from the in-index metadata KeyStrings; + * the secondary rebuilds KeyStrings from those paths using the target index's properties. + */ +namespace mongo::set_multikey_metadata_oplog_helpers { + +/** + * Extracts field path strings from wildcard metadata KeyStrings. + * Metadata keys encode: { "": MinKey, ..., "": 1, "": "field.path", "": MinKey, ... }. + */ +std::vector extractFieldPathsFromMetadataKeys(const KeyStringSet& metadataKeys, + const Ordering& ordering); + +/** + * Serializes wildcard field path strings into a BSON array suitable for the 'paths' field of + * the `setMultikeyMetadata` oplog entry. + */ +BSONObj fieldPathsToBSON(const std::vector& fieldPaths); + +/** + * Regenerates wildcard multikey metadata KeyStrings from the serialized field paths in a + * `setMultikeyMetadata` oplog entry. + * + * Input format — 'pathsObj' is the oplog entry's 'paths' field: a BSON array of field-path + * strings, one per multikey path. Example: `["a", "b.c"]`. + * + * 'version', 'ordering', 'rsKeyFormat' come from the target index's SortedDataInterface so + * regenerated keys are identical to the keys produced by the primary at write time. + */ +KeyStringSet regenerateMetadataKeysFromFieldPaths(const BSONObj& pathsObj, + key_string::Version version, + Ordering ordering, + KeyFormat rsKeyFormat, + const BSONObj& keyPattern); + +} // namespace mongo::set_multikey_metadata_oplog_helpers diff --git a/src/mongo/db/shard_role/shard_catalog/set_multikey_metadata_oplog_helpers_test.cpp b/src/mongo/db/shard_role/shard_catalog/set_multikey_metadata_oplog_helpers_test.cpp new file mode 100644 index 00000000000..1c5699b91f4 --- /dev/null +++ b/src/mongo/db/shard_role/shard_catalog/set_multikey_metadata_oplog_helpers_test.cpp @@ -0,0 +1,144 @@ +/** + * Copyright (C) 2026-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/shard_role/shard_catalog/set_multikey_metadata_oplog_helpers.h" + +#include "mongo/bson/bsonmisc.h" +#include "mongo/bson/bsonobj.h" +#include "mongo/db/index/wildcard_access_method.h" +#include "mongo/db/index/wildcard_key_generator.h" +#include "mongo/db/storage/key_string/key_string.h" +#include "mongo/unittest/unittest.h" +#include "mongo/util/shared_buffer_fragment.h" + +namespace mongo { +namespace { + +constexpr auto kVersion = key_string::Version::kLatestVersion; +constexpr auto kRsKeyFormat = KeyFormat::Long; + +KeyStringSet generateMultikeyMetadataIndexKeys(const BSONObj& keyPattern, + const BSONObj& doc, + KeyFormat rsKeyFormat = kRsKeyFormat) { + const auto ordering = WildcardAccessMethod::makeOrdering(keyPattern); + WildcardKeyGenerator gen(keyPattern, BSONObj(), nullptr, kVersion, ordering, rsKeyFormat); + SharedBufferFragmentBuilder buf(key_string::HeapBuilder::kHeapAllocatorDefaultBytes); + KeyStringSet indexKeys; + KeyStringSet metadataKeys; + gen.generateKeys(buf, doc, &indexKeys, &metadataKeys); + return metadataKeys; +} + +void assertKeyStringSetsEqual(const KeyStringSet& a, const KeyStringSet& b, StringData context) { + ASSERT_EQ(a.size(), b.size()) << context; + auto itA = a.begin(); + auto itB = b.begin(); + for (; itA != a.end(); ++itA, ++itB) { + ASSERT_EQ(0, itA->compare(*itB)) << "KeyString mismatch in: " << context; + } +} + +void verifyRoundTrip(const BSONObj& keyPattern, + const KeyStringSet& originalKeys, + KeyFormat rsKeyFormat, + StringData context) { + const auto ordering = WildcardAccessMethod::makeOrdering(keyPattern); + + auto fieldPaths = set_multikey_metadata_oplog_helpers::extractFieldPathsFromMetadataKeys( + originalKeys, ordering); + auto pathsObj = set_multikey_metadata_oplog_helpers::fieldPathsToBSON(fieldPaths); + auto regeneratedKeys = + set_multikey_metadata_oplog_helpers::regenerateMetadataKeysFromFieldPaths( + pathsObj, kVersion, ordering, rsKeyFormat, keyPattern); + + assertKeyStringSetsEqual(originalKeys, regeneratedKeys, context); +} + +TEST(SetMultikeyMetadataOplogHelpersTest, RoundTripSimple) { + const auto keyPattern = BSON("$**" << 1); + + KeyStringSet originalKeys; + for (const auto& doc : {BSON("a" << BSON_ARRAY(1 << 2)), + BSON("b" << BSON("c" << BSON_ARRAY(1 << 2))), + BSON("d" << BSON("e" << BSON("f" << BSON_ARRAY(1))))}) { + auto docKeys = generateMultikeyMetadataIndexKeys(keyPattern, doc); + originalKeys.insert(docKeys.begin(), docKeys.end()); + } + ASSERT_EQ(3u, originalKeys.size()); + + verifyRoundTrip(keyPattern, originalKeys, kRsKeyFormat, "simple wildcard"); +} + +TEST(SetMultikeyMetadataOplogHelpersTest, RoundTripCompoundWildcard) { + const auto keyPattern = BSON("x" << 1 << "$**" << 1 << "y" << 1); + + KeyStringSet originalKeys; + for (const auto& doc : {BSON("x" << 10 << "a" << BSON_ARRAY(1 << 2) << "y" << 20), + BSON("x" << 30 << "b" << BSON("c" << BSON_ARRAY(3)) << "y" << 40)}) { + auto docKeys = generateMultikeyMetadataIndexKeys(keyPattern, doc); + originalKeys.insert(docKeys.begin(), docKeys.end()); + } + ASSERT_EQ(2u, originalKeys.size()); + + verifyRoundTrip(keyPattern, originalKeys, kRsKeyFormat, "compound wildcard"); +} + +TEST(SetMultikeyMetadataOplogHelpersTest, RoundTripDescendingCompoundWildcard) { + const auto keyPattern = BSON("x" << -1 << "$**" << 1 << "y" << -1); + + auto originalKeys = generateMultikeyMetadataIndexKeys( + keyPattern, BSON("x" << 1 << "path" << BSON_ARRAY(1 << 2) << "y" << 2)); + ASSERT_EQ(1u, originalKeys.size()); + + verifyRoundTrip(keyPattern, originalKeys, kRsKeyFormat, "descending compound wildcard"); +} + +TEST(SetMultikeyMetadataOplogHelpersTest, RoundTripEmpty) { + const auto keyPattern = BSON("$**" << 1); + + KeyStringSet emptyKeys; + verifyRoundTrip(keyPattern, emptyKeys, kRsKeyFormat, "empty"); +} + +TEST(SetMultikeyMetadataOplogHelpersTest, RoundTripStringKeyFormat) { + const auto keyPattern = BSON("$**" << 1); + auto originalKeys = generateMultikeyMetadataIndexKeys( + keyPattern, BSON("field" << BSON_ARRAY(1 << 2)), KeyFormat::String); + ASSERT_EQ(1u, originalKeys.size()); + + verifyRoundTrip(keyPattern, originalKeys, KeyFormat::String, "string key format"); +} + +TEST(SetMultikeyMetadataOplogHelpersTest, FieldPathsToBSONFormat) { + auto pathsObj = set_multikey_metadata_oplog_helpers::fieldPathsToBSON({"a", "b.c"}); + ASSERT_EQ(BSON_ARRAY("a" << "b.c").woCompare(pathsObj), 0); +} + +} // namespace +} // namespace mongo diff --git a/src/mongo/db/transaction/transaction_participant.cpp b/src/mongo/db/transaction/transaction_participant.cpp index 41dd50ad29f..a98638534b6 100644 --- a/src/mongo/db/transaction/transaction_participant.cpp +++ b/src/mongo/db/transaction/transaction_participant.cpp @@ -1817,6 +1817,13 @@ void TransactionParticipant::Participant::unstashTransactionResources( } _releaseTransactionResourcesToOpCtx(opCtx, maxLockTimeout); + + // Propagate session-scoped flag to this opCtx so the query planner can check it without + // depending on TransactionParticipant. + if (p().hasSideCommittedWildcardKeys) { + opCtx->setHasSideCommittedWildcardKeys(); + } + std::lock_guard lg(*opCtx->getClient()); o(lg).transactionMetricsObserver.onUnstash(ServerTransactionsMetrics::get(opCtx), opCtx->getServiceContext()->getTickSource()); @@ -3737,6 +3744,7 @@ void TransactionParticipant::Participant::_resetTransactionStateAndUnlock( o(*lk).transactionRuntimeContext = boost::none; p().autoCommit = boost::none; p().needToWriteAbortEntry = false; + p().hasSideCommittedWildcardKeys = false; // Swap out txnResourceStash while holding the Client lock, then release any locks held by this // participant and abort the storage transaction after releasing the lock. The transaction diff --git a/src/mongo/db/transaction/transaction_participant.h b/src/mongo/db/transaction/transaction_participant.h index 42041d4151b..e55ad2667db 100644 --- a/src/mongo/db/transaction/transaction_participant.h +++ b/src/mongo/db/transaction/transaction_participant.h @@ -876,6 +876,19 @@ public: */ void setLastWriteOpTime(OperationContext* opCtx, const repl::OpTime& lastWriteOpTime); + /** + * Returns true if a side transaction has committed wildcard multikey metadata keys + * during this transaction. Used by the query planner to decide whether a fresh + * RecoveryUnit scan is needed to see side-committed metadata keys. + */ + bool hasSideCommittedWildcardKeys() const { + return p().hasSideCommittedWildcardKeys; + } + + void setHasSideCommittedWildcardKeys() { + p().hasSideCommittedWildcardKeys = true; + } + // // Methods for use in C++ unit tests, only. Beware: these methods may not adhere to the // concurrency control rules. @@ -1435,6 +1448,11 @@ private: // transaction. bool needToWriteAbortEntry{false}; + // Set to true when a side transaction commits wildcard multikey metadata keys into an + // index. This allows the query planner to skip creating a fresh RecoveryUnit to scan + // for side-committed metadata keys when no such keys exist. + bool hasSideCommittedWildcardKeys{false}; + // Caches the per-collection size and count deltas across a prepared transaction. Aligns // with the `sizeMetadata` persisted to the `config.transactions` entry once prepared. boost::optional> preparedSizeMetadata;