From fc0636057c07c4589bafb1bf4e86401c7a4b481d Mon Sep 17 00:00:00 2001 From: HanaPearlman Date: Tue, 7 Oct 2025 12:13:00 -0400 Subject: [PATCH] SERVER-110254: Support swapping $match before "complex" renames when a flag says there are no arrays (#41196) Co-authored-by: Andi Wang andi.wang@mongodb.com Co-authored-by: David Storch david.storch@mongodb.com GitOrigin-RevId: 16a355ba0654dfbc527cb301dee154ca59fc06e1 --- .../complex_rename_match_swapping_pbt.js | 82 ++ jstests/core/query/partial_index_pbt.js | 4 +- .../core/query/query_knob_correctness_pbt.js | 97 +- jstests/libs/property_test_helpers/README.md | 2 +- .../common_properties.js | 84 ++ .../models/collection_models.js | 17 +- .../models/document_models.js | 34 +- .../property_testing_utils.js | 6 +- jstests/query_golden/complex_match_swap_md.js | 235 +++ .../expected_output/complex_match_swap.md | 1274 +++++++++++++++++ .../lookup_unwind_complex_match_swap.md | 846 +++++++++++ .../lookup_unwind_complex_match_swap_md.js | 195 +++ .../db/pipeline/document_source_match.cpp | 27 +- src/mongo/db/query/query_knobs.idl | 15 + 14 files changed, 2815 insertions(+), 103 deletions(-) create mode 100644 jstests/core/query/complex_rename_match_swapping_pbt.js create mode 100644 jstests/query_golden/complex_match_swap_md.js create mode 100644 jstests/query_golden/expected_output/complex_match_swap.md create mode 100644 jstests/query_golden/expected_output/lookup_unwind_complex_match_swap.md create mode 100644 jstests/query_golden/lookup_unwind_complex_match_swap_md.js diff --git a/jstests/core/query/complex_rename_match_swapping_pbt.js b/jstests/core/query/complex_rename_match_swapping_pbt.js new file mode 100644 index 00000000000..bd0d7acb650 --- /dev/null +++ b/jstests/core/query/complex_rename_match_swapping_pbt.js @@ -0,0 +1,82 @@ +/** + * A property-based test that runs queries with "internalQueryPermitMatchSwappingForComplexRenames" + * enabled and asserts the correctness by comparing results with the knob disabled. + * + * @tags: [ + * query_intensive_pbt, + * # This test runs commands that are not allowed with security token: setParameter. + * not_allowed_with_signed_security_token, + * config_shard_incompatible, + * # Incompatible with setParameter + * does_not_support_stepdowns, + * # Runs queries that may return many results, requiring getmores + * requires_getmore, + * # Some query knobs may not exist on older versions. + * multiversion_incompatible + * ] + */ + +import { + createQueriesWithKnobsSetAreSameAsControlCollScanProperty +} from "jstests/libs/property_test_helpers/common_properties.js"; +import {getCollectionModel} from "jstests/libs/property_test_helpers/models/collection_models.js"; +import { + getDocsModel, + getNestedDocModelNoArray +} from "jstests/libs/property_test_helpers/models/document_models.js"; +import {groupArb} from "jstests/libs/property_test_helpers/models/group_models.js"; +import {getMatchArb} from "jstests/libs/property_test_helpers/models/match_models.js"; +import { + addFieldsVarArb, + computedProjectArb +} from "jstests/libs/property_test_helpers/models/query_models.js"; +import {testProperty} from "jstests/libs/property_test_helpers/property_testing_utils.js"; +import {isSlowBuild} from "jstests/libs/query/aggregation_pipeline_utils.js"; +import {fc} from "jstests/third_party/fast_check/fc-3.1.0.js"; + +if (isSlowBuild(db)) { + jsTestLog("Exiting early because debug is on, opt is off, or a sanitizer is enabled."); + quit(); +} + +const numRuns = 30; +const numQueriesPerRun = 50; + +const controlColl = db.query_knob_correctness_pbt_control; +const experimentColl = db.query_knob_correctness_pbt_experiment; + +const knobCorrectnessProperty = + createQueriesWithKnobsSetAreSameAsControlCollScanProperty(controlColl, experimentColl); + +// The property only holds when the docs don't contain arrays and pipelines don't generate nested +// arrays. +function getWorkloadModelForComplexRenameMatchSwap() { + // Aggregations are 'renaming' stage followed by a match stage. + const renamingArb = fc.oneof(computedProjectArb, addFieldsVarArb, groupArb); + const aggModel = fc.tuple(renamingArb, getMatchArb()); + + // This document model generates very nested objects that do not contain any arrays. + const docModel = getNestedDocModelNoArray(); + const docsModel = getDocsModel({docModel}); + + // Because we don't have as much control over types here, we need to remove the indexes because + // otherwise they are likely to fail to build. Comparing results with collection scans only is + // sufficient for detecting an incorrect rewrite here. + const indexesModel = fc.constant([]); + + return fc + .record({ + collSpec: getCollectionModel({docsModel, indexesModel}), + queries: fc.array(aggModel, {minLength: 1, maxLength: numQueriesPerRun}), + knobToVal: fc.constant({internalQueryPermitMatchSwappingForComplexRenames: true}), + }) + .map(({collSpec, queries, knobToVal}) => { + return {collSpec, queries, extraParams: {knobToVal}}; + }); +} +testProperty( + knobCorrectnessProperty, + {controlColl, experimentColl}, + getWorkloadModelForComplexRenameMatchSwap(), + numRuns, +); diff --git a/jstests/core/query/partial_index_pbt.js b/jstests/core/query/partial_index_pbt.js index 203334161f0..bb237f50c97 100644 --- a/jstests/core/query/partial_index_pbt.js +++ b/jstests/core/query/partial_index_pbt.js @@ -55,7 +55,7 @@ const workloadModel = // This filter will be used for the partial index filter, and to prefix queries with // {$match: filter} so that every query is eligible to use the partial indexes. partialFilterPredShape: getPartialFilterPredicateArb(), - docs: getDocsModel(false /* isTS */), + docs: getDocsModel(), indexes: fc.array(getIndexModel({allowPartialIndexes: false, allowSparse: false}), {minLength: 0, maxLength: 15, size: '+2'}), pipelines: fc.array(getAggPipelineModel(), @@ -88,4 +88,4 @@ testProperty(correctnessProperty, workloadModel, numRuns, partialIndexCounterexamples); -// TODO SERVER-103381 extend this test to use time-series collections. \ No newline at end of file +// TODO SERVER-103381 extend this test to use time-series collections. diff --git a/jstests/core/query/query_knob_correctness_pbt.js b/jstests/core/query/query_knob_correctness_pbt.js index f80783be354..596cb507d6f 100644 --- a/jstests/core/query/query_knob_correctness_pbt.js +++ b/jstests/core/query/query_knob_correctness_pbt.js @@ -16,15 +16,13 @@ * multiversion_incompatible * ] */ -import {FixtureHelpers} from "jstests/libs/fixture_helpers.js"; -import {getDifferentlyShapedQueries} from "jstests/libs/property_test_helpers/common_properties.js"; +import { + createQueriesWithKnobsSetAreSameAsControlCollScanProperty +} from "jstests/libs/property_test_helpers/common_properties.js"; import {getCollectionModel} from "jstests/libs/property_test_helpers/models/collection_models.js"; import {queryKnobsModel} from "jstests/libs/property_test_helpers/models/query_knob_models.js"; import {getAggPipelineModel} from "jstests/libs/property_test_helpers/models/query_models.js"; -import { - runDeoptimized, - testProperty -} from "jstests/libs/property_test_helpers/property_testing_utils.js"; +import {testProperty} from "jstests/libs/property_test_helpers/property_testing_utils.js"; import {isSlowBuild} from "jstests/libs/query/aggregation_pipeline_utils.js"; import {fc} from "jstests/third_party/fast_check/fc-3.1.0.js"; @@ -39,78 +37,6 @@ const numQueriesPerRun = 50; const controlColl = db.query_knob_correctness_pbt_control; const experimentColl = db.query_knob_correctness_pbt_experiment; -function runSetParamCommand(cmd) { - FixtureHelpers.runCommandOnAllShards({db: db.getSiblingDB("admin"), cmdObj: cmd}); -} - -/* - * Runs the given function with the query knobs set, then sets the query knobs back to their - * original state. - * It's important that each run of the property is independent from one another, so we'll always - * reset the knobs to their original state even if the function throws an exception. - */ -function runWithKnobs(knobToVal, fn) { - const knobNames = Object.keys(knobToVal); - // If there are no knobs to change, return the result of the function since there's no other - // work to do. - if (knobNames.length === 0) { - return fn(); - } - - // Get the previous knob settings, so we can undo our changes after setting the knobs from - // `knobToVal`. - const getParamObj = {getParameter: 1}; - for (const key of knobNames) { - getParamObj[key] = 1; - } - const getParamResult = assert.commandWorked(db.adminCommand(getParamObj)); - // Copy only the knob key/vals into the new object. - const priorSettings = {}; - for (const key of knobNames) { - priorSettings[key] = getParamResult[key]; - } - - // Set the requested knobs. - runSetParamCommand({setParameter: 1, ...knobToVal}); - - // With the finally block, we'll always revert the parameters back to their original settings, - // even if an exception is thrown. - try { - return fn(); - } finally { - // Reset to the original settings. - runSetParamCommand({setParameter: 1, ...priorSettings}); - } -} - -function queriesWithKnobsSetAreSameAsControlCollScan(getQuery, testHelpers, knobToVal) { - const queries = getDifferentlyShapedQueries(getQuery, testHelpers); - - // Compute the control results all at once. - const resultMap = runDeoptimized(controlColl, queries); - - return runWithKnobs(knobToVal, () => { - for (let i = 0; i < queries.length; i++) { - const query = queries[i]; - const controlResults = resultMap[i]; - const experimentResults = experimentColl.aggregate(query).toArray(); - if (!testHelpers.comp(controlResults, experimentResults)) { - return { - passed: false, - message: - 'A query with different knobs set has returned incorrect results compared to a collection scan query with no knobs set.', - query, - explain: experimentColl.explain().aggregate(query), - controlResults, - experimentResults, - knobToVal - }; - } - } - return {passed: true}; - }); -} - function getWorkloadModel(isTS, aggModel) { return fc .record({ @@ -119,15 +45,20 @@ function getWorkloadModel(isTS, aggModel) { knobToVal: queryKnobsModel }) .map(({collSpec, queries, knobToVal}) => { - return {collSpec, queries, extraParams: [knobToVal]}; + return {collSpec, queries, extraParams: {knobToVal}}; }); } +const knobCorrectnessProperty = + createQueriesWithKnobsSetAreSameAsControlCollScanProperty(controlColl, experimentColl); + // Test with a regular collection. -testProperty(queriesWithKnobsSetAreSameAsControlCollScan, - {controlColl, experimentColl}, - getWorkloadModel(false /* isTS */, getAggPipelineModel()), - numRuns); +testProperty( + knobCorrectnessProperty, + {controlColl, experimentColl}, + getWorkloadModel(false /* isTS */, getAggPipelineModel()), + numRuns, +); // TODO SERVER-103381 re-enable timeseries PBT testing. // Test with a TS collection. diff --git a/jstests/libs/property_test_helpers/README.md b/jstests/libs/property_test_helpers/README.md index 942794afc0d..1ddaa820fb7 100644 --- a/jstests/libs/property_test_helpers/README.md +++ b/jstests/libs/property_test_helpers/README.md @@ -58,7 +58,7 @@ A workload consists of a collection model and an aggregation model, in the follo indexes: a list of indexes }, queries: a list of aggregation pipelines, - extraParams: an optional list of extra values to be passed to the property function + extraParams: an optional object of extra values to be passed to the property function } ``` diff --git a/jstests/libs/property_test_helpers/common_properties.js b/jstests/libs/property_test_helpers/common_properties.js index b87020f34ec..5de7ba28da5 100644 --- a/jstests/libs/property_test_helpers/common_properties.js +++ b/jstests/libs/property_test_helpers/common_properties.js @@ -2,7 +2,15 @@ * Common properties our property-based tests may use. Intended to be paired with the `testProperty` * interface in property_testing_utils.js. */ +import {FixtureHelpers} from "jstests/libs/fixture_helpers.js"; import {runDeoptimized} from "jstests/libs/property_test_helpers/property_testing_utils.js"; +import { + getAllPlans, + getAllPlanStages, + getPlanStages, + getRejectedPlans, + getWinningPlanFromExplain, +} from "jstests/libs/query/analyze_plan.js"; // Returns different query shapes using the first parameters plugged in. export function getDifferentlyShapedQueries(getQuery, testHelpers) { @@ -117,3 +125,79 @@ export function createCacheCorrectnessProperty(controlColl, experimentColl, stat return {passed: true}; }; } + +function runSetParamCommand(db, cmd) { + FixtureHelpers.runCommandOnAllShards({db: db.getSiblingDB("admin"), cmdObj: cmd}); +} + +/* + * Runs the given function with the query knobs set, then sets the query knobs back to their + * original state. + * It's important that each run of the property is independent from one another, so we'll always + * reset the knobs to their original state even if the function throws an exception. + */ +function runWithKnobs(db, knobToVal, fn) { + const knobNames = Object.keys(knobToVal); + // If there are no knobs to change, return the result of the function since there's no other + // work to do. + if (knobNames.length === 0) { + return fn(); + } + + // Get the previous knob settings, so we can undo our changes after setting the knobs from + // `knobToVal`. + const getParamObj = {getParameter: 1}; + for (const key of knobNames) { + getParamObj[key] = 1; + } + const getParamResult = assert.commandWorked(db.adminCommand(getParamObj)); + // Copy only the knob key/vals into the new object. + const priorSettings = {}; + for (const key of knobNames) { + priorSettings[key] = getParamResult[key]; + } + + // Set the requested knobs. + runSetParamCommand(db, {setParameter: 1, ...knobToVal}); + + // With the finally block, we'll always revert the parameters back to their original settings, + // even if an exception is thrown. + try { + return fn(); + } finally { + // Reset to the original settings. + runSetParamCommand(db, {setParameter: 1, ...priorSettings}); + } +} + +export function createQueriesWithKnobsSetAreSameAsControlCollScanProperty(controlColl, + experimentColl) { + return function queriesWithKnobsSetAreSameAsControlCollScan( + getQuery, testHelpers, {knobToVal}) { + const queries = getDifferentlyShapedQueries(getQuery, testHelpers); + + // Compute the control results all at once. + const resultMap = runDeoptimized(controlColl, queries); + + return runWithKnobs(experimentColl.getDB(), knobToVal, () => { + for (let i = 0; i < queries.length; i++) { + const query = queries[i]; + const controlResults = resultMap[i]; + const experimentResults = experimentColl.aggregate(query).toArray(); + if (!testHelpers.comp(controlResults, experimentResults)) { + return { + passed: false, + message: + "A query with different knobs set has returned incorrect results compared to a collection scan query with no knobs set.", + query, + explain: experimentColl.explain().aggregate(query), + controlResults, + experimentResults, + knobToVal, + }; + } + } + return {passed: true}; + }); + }; +} diff --git a/jstests/libs/property_test_helpers/models/collection_models.js b/jstests/libs/property_test_helpers/models/collection_models.js index ebb6d98cd7c..c2c02aab5d8 100644 --- a/jstests/libs/property_test_helpers/models/collection_models.js +++ b/jstests/libs/property_test_helpers/models/collection_models.js @@ -9,10 +9,17 @@ import { } from "jstests/libs/property_test_helpers/models/index_models.js"; import {fc} from "jstests/third_party/fast_check/fc-3.1.0.js"; -export function getCollectionModel({isTS = false, allowPartialIndexes = false} = {}) { - const indexModel = isTS ? getTimeSeriesIndexModel({allowPartialIndexes}) - : getIndexModel({allowPartialIndexes}); - const indexesModel = fc.array(indexModel, {minLength: 0, maxLength: 15, size: '+2'}); +export function getCollectionModel( + {isTS = false, allowPartialIndexes = false, indexesModel, docsModel} = {}) { + // If no documents model or index model is provided, assume the default. + if (!docsModel) { + docsModel = getDocsModel({isTS}); + } + if (!indexesModel) { + const indexModel = isTS ? getTimeSeriesIndexModel({allowPartialIndexes}) + : getIndexModel({allowPartialIndexes}); + indexesModel = fc.array(indexModel, {minLength: 0, maxLength: 15, size: '+2'}); + } - return fc.record({isTS: fc.constant(isTS), docs: getDocsModel(isTS), indexes: indexesModel}); + return fc.record({isTS: fc.constant(isTS), docs: docsModel, indexes: indexesModel}); } diff --git a/jstests/libs/property_test_helpers/models/document_models.js b/jstests/libs/property_test_helpers/models/document_models.js index 8257cfb7f1b..fcadf6c7592 100644 --- a/jstests/libs/property_test_helpers/models/document_models.js +++ b/jstests/libs/property_test_helpers/models/document_models.js @@ -16,6 +16,7 @@ */ import { dateArb, + fieldArb, intArb, scalarArb } from "jstests/libs/property_test_helpers/models/basic_models.js"; @@ -48,8 +49,11 @@ for (let i = 0; i < kMaxNumDocs; i++) { } const uniqueIdsArb = fc.shuffledSubarray(docIds, {minLength: kMaxNumDocs, maxLength: kMaxNumDocs}); -export function getDocsModel(isTS) { - const docModel = isTS ? timeseriesDocModel : defaultDocModel; +export function getDocsModel({isTS = false, docModel} = {}) { + if (!docModel) { + docModel = isTS ? timeseriesDocModel : defaultDocModel; + } + // The size=+2 argument tells fc.array to generate array sizes closer to the max than the min. // This way the average number of documents produced is >100, which means our queries will be // less likely to produce empty results. The size argument does not affect minimization. On @@ -69,3 +73,29 @@ export function getDocsModel(isTS) { }); }); } + +/** + * Similar to getDocModel(), but generates more deeply nested data, and does not allow arrays. + * + * 'keyArb' is the arbitrary used to generate object keys. It's not a strict guarantee that objects + * produced will have nesting depth at most 'approxMaxDepth' (hence "approx"); see note below. + */ +export function getNestedDocModelNoArray({keyArb, approxMaxDepth, maxObjectKeys} = {}) { + if (!keyArb) { + // Re-use the standard field arbitrary if keyArb is not provided. Note that some of these + // keys could be dotted, so in reality we may end up with an object slightly more nested + // than 'approxMaxDepth'. + keyArb = fieldArb; + } + if (!maxObjectKeys) { + maxObjectKeys = 5; + } + + return fc + .letrec((tie) => ({ + // A value in an object can be our leaf arbitrary, or it can be a nested object. + value: fc.oneof({maxDepth: approxMaxDepth}, scalarArb, tie("object")), + object: fc.dictionary(keyArb, tie("value"), {maxKeys: maxObjectKeys}), + })) + .object; +} diff --git a/jstests/libs/property_test_helpers/property_testing_utils.js b/jstests/libs/property_test_helpers/property_testing_utils.js index e8a1c1b3543..06d94b6bf95 100644 --- a/jstests/libs/property_test_helpers/property_testing_utils.js +++ b/jstests/libs/property_test_helpers/property_testing_utils.js @@ -87,10 +87,6 @@ const okIndexCreationErrorCodes = [ function runProperty(propertyFn, namespaces, workload) { let {collSpec, queries, extraParams} = workload; const {controlColl, experimentColl} = namespaces; - // `extraParams` is an optional field in a workload model. - if (!extraParams) { - extraParams = []; - } // Setup the control/experiment collections, define the helper functions, then run the property. if (controlColl) { @@ -121,7 +117,7 @@ function runProperty(propertyFn, namespaces, workload) { return concreteQueryFromFamily(query, paramIx); } - return propertyFn(getQuery, testHelpers, ...extraParams); + return propertyFn(getQuery, testHelpers, extraParams); } /* diff --git a/jstests/query_golden/complex_match_swap_md.js b/jstests/query_golden/complex_match_swap_md.js new file mode 100644 index 00000000000..7620ea2433c --- /dev/null +++ b/jstests/query_golden/complex_match_swap_md.js @@ -0,0 +1,235 @@ +/** + * Tests that when the parameter internalQueryPermitMatchSwappingForComplexRenames is set, + * then match will swap with complex renames. + */ + +import {normalizeArray} from "jstests/libs/golden_test.js"; +import {code, linebreak, section, subSection} from "jstests/libs/pretty_md.js"; + +try { + assert.commandWorked(db.adminCommand( + {setParameter: 1, internalQueryPermitMatchSwappingForComplexRenames: true})); + const coll = db.complex_match_swap; + coll.drop(); + + section("Inserting docs:"); + const docs = [ + {_id: 1, z: 11, h: {i: 11}, b: {c: 42}}, + {_id: 2, z: 12, h: {i: 12}, b: {}}, + {_id: 3, z: 13, h: {i: 13}, b: {c: null}}, + {_id: 4, z: 14, h: {i: 14}, b: {c: 42, d: "foo"}}, + {_id: 5, z: 15, h: {i: 15}, b: {c: {e: 42, f: "bar"}}}, + {_id: 6, z: 16, h: {i: 16}, b: {c: {e: 42, f: {g: 9}}, d: "foo"}}, + ]; + code(tojson(docs)); + + assert.commandWorked(coll.insert(docs)); + + function runPipeline(testCaseName, pipeline) { + section(testCaseName); + subSection("Pipeline"); + code(tojsononeline(pipeline)); + + // Append {$_internalInhibitOptimization: {}} to the front of the pipeline. This prevents + // pushdown into the find layer, which means that we can just print the pipeline (without + // $cursor) to the golden file. + pipeline.unshift({$_internalInhibitOptimization: {}}); + + // Print the results of the query to the golden file. + subSection("Results"); + code(normalizeArray(coll.aggregate(pipeline).toArray())); + + let explain = coll.explain("queryPlanner").aggregate(pipeline); + + // Since we prevented pushdown into the find layer, we expect an array of pipeline stages to + // be present in the explain output. + assert(explain.hasOwnProperty("stages"), explain); + + // Drop the first two stages, since we don't need to see the $cursor or + // $_inhibitOptimization in the golden output. + let stages = explain.stages; + assert.gte(stages.length, 3, explain); + stages = stages.slice(2); + subSection("Explain"); + code(tojson(stages)); + linebreak(); + } + + let testCaseName = "Basic inclusion projection"; + let pipeline = [{$project: {_id: 1, a: "$b.c", z: 1}}, {$match: {a: {$eq: 42}}}]; + runPipeline(testCaseName, pipeline); + + testCaseName = "Basic inclusion projection with excluded _id (variation 1)"; + pipeline = [{$project: {_id: 0, a: "$b.c", z: 1}}, {$match: {a: {$eq: 42}}}]; + runPipeline(testCaseName, pipeline); + + testCaseName = "Basic inclusion projection with excluded _id (variation 2)"; + pipeline = [{$project: {_id: 0, a: "$b.c"}}, {$match: {a: {$eq: 42}}}]; + runPipeline(testCaseName, pipeline); + + testCaseName = "Exclusion projection followed by inclusion projection"; + pipeline = [{$project: {_id: 0, z: 0}}, {$project: {a: "$b.c"}}, {$match: {a: {$eq: 42}}}]; + runPipeline(testCaseName, pipeline); + + testCaseName = "Basic $addFields"; + pipeline = [{$addFields: {a: "$b.c"}}, {$match: {a: {$eq: 42}}}]; + runPipeline(testCaseName, pipeline); + + testCaseName = "Basic $set"; + pipeline = [{$set: {a: "$b.c"}}, {$match: {a: {$eq: 42}}}]; + runPipeline(testCaseName, pipeline); + + testCaseName = + "Inclusion projection with a match on a subpath of the renamed path (variation 1)"; + pipeline = [{$project: {_id: 1, a: "$b.c", z: 1}}, {$match: {"a.e": {$eq: 42}}}]; + runPipeline(testCaseName, pipeline); + + testCaseName = + "Inclusion projection with a match on a subpath of the renamed path (variation 2)"; + pipeline = [{$project: {_id: 0, a: "$b.c", z: 1}}, {$match: {"a.e": {$gte: 42}}}]; + runPipeline(testCaseName, pipeline); + + testCaseName = + "Inclusion projection with a match on a subpath of the renamed path (variation 3)"; + pipeline = [{$project: {_id: 0, a: "$b.c"}}, {$match: {"a.e": {$type: "number"}}}]; + runPipeline(testCaseName, pipeline); + + testCaseName = "Exclusion/inclusion projection with a match on a subpath of the renamed path"; + pipeline = + [{$project: {_id: 0, z: 0}}, {$project: {a: "$b.c"}}, {$match: {"a.e": {$mod: [7, 0]}}}]; + runPipeline(testCaseName, pipeline); + + testCaseName = "$addFields with a match on a subpath of the renamed path"; + pipeline = [{$addFields: {a: "$b.c"}}, {$match: {"a.e": {$eq: 42}}}]; + runPipeline(testCaseName, pipeline); + + testCaseName = "$set with a match on a subpath of the renamed path"; + pipeline = [{$set: {a: "$b.c"}}, {$match: {"a.e": {$lte: 42}}}]; + runPipeline(testCaseName, pipeline); + + testCaseName = "Chain of complex renames"; + pipeline = [ + {$project: {_id: 0, n: "$b.c"}}, + {$addFields: {q: "$n.f"}}, + {$set: {r: "$q.g"}}, + {$match: {r: {$eq: 9}}}, + ]; + runPipeline(testCaseName, pipeline); + + testCaseName = "Multiple complex renames"; + pipeline = + [{$project: {n: "$b.c", q: "$h.i"}}, {$match: {$or: [{n: {$gt: 15}}, {q: {$lt: 13}}]}}]; + runPipeline(testCaseName, pipeline); + + testCaseName = "Multiple complex renames as successive pipeline stages"; + pipeline = [ + {$project: {n: "$b.c", h: 1}}, + {$addFields: {q: "$h.i"}}, + {$project: {h: 0}}, + {$match: {$or: [{n: {$gt: 15}}, {q: {$lt: 13}}]}}, + ]; + runPipeline(testCaseName, pipeline); + + testCaseName = "$match swaps past rename due to group"; + pipeline = [{$group: {_id: {z: "$z"}}}, {$match: {"_id.z": {$lte: 14}}}]; + runPipeline(testCaseName, pipeline); + + // Here is a case that demonstrates one danger of pushing $match past a complex rename. Even + // when the data doesn't have arrays, the pipeline itself can introduce arrays. + testCaseName = "$match swaps past rename in the presence of arrays created by the pipeline"; + pipeline = [ + {$lookup: {from: "complex_match_swap", pipeline: [{$group: {_id: "$a", b: {$push: "$b"}}}], as: "arr"}}, + {$project: {c: "$arr.b"}}, + {$match: {c: {$eq: {}}}}, + ]; + runPipeline(testCaseName, pipeline); + + testCaseName = "$match with $exists swaps past rename"; + pipeline = [{$project: {_id: 0, a: "$b.c", z: 1}}, {$match: {a: {$exists: true}}}]; + runPipeline(testCaseName, pipeline); + + testCaseName = "$match with $expr swaps past rename"; + pipeline = [{$project: {_id: 0, a: "$b.c", z: 1}}, {$match: {$expr: {$eq: ["$a", 42]}}}]; + runPipeline(testCaseName, pipeline); + + testCaseName = "$match can be pushed beneath $replaceRoot"; + pipeline = [{$replaceRoot: {newRoot: "$b"}}, {$match: {c: {$eq: 42}}}]; + runPipeline(testCaseName, pipeline); + + testCaseName = "$match can be pushed beneath $replaceWith"; + pipeline = [{$replaceWith: "$b"}, {$match: {c: {$eq: 42}}}]; + runPipeline(testCaseName, pipeline); + + // + // The remaining test cases are negative tests, meaning that we do not expect the $match to be + // pushed down. + // + testCaseName = "Negative case: Dotted path on the left and the right"; + pipeline = [{$project: {_id: 0, "x.y": "$b.c", z: 1}}, {$match: {"x.y": {$lte: 42}}}]; + runPipeline(testCaseName, pipeline); + + testCaseName = + "Negative case: Dotted path on the left and the right with match on a subpath of the renamed path"; + pipeline = [{$project: {_id: 0, "x.y": "$b.c", z: 1}}, {$match: {"x.y.e": {$lte: 42}}}]; + runPipeline(testCaseName, pipeline); + + testCaseName = "Negative case: Dotted path of length 3 on the left"; + pipeline = [{$project: {_id: 0, "n.q.r": "$b.c", z: 1}}, {$match: {"n.q.r.e": {$lte: 42}}}]; + runPipeline(testCaseName, pipeline); + + testCaseName = + "Negative case: Dotted path of length 3 on the left, expressed with nested objects"; + pipeline = [{$project: {_id: 0, n: {q: {r: "$b.c"}}, z: 1}}, {$match: {"n.q.r.e": {$lte: 42}}}]; + runPipeline(testCaseName, pipeline); + + testCaseName = + "Negative case: Dotted path of length 3 on the left, expressed with nested objects and $addFields"; + pipeline = [{$addFields: {n: {q: {r: "$b.c"}}}}, {$match: {"n.q.r.e": {$lte: 42}}}]; + runPipeline(testCaseName, pipeline); + + testCaseName = "Negative case: conditional projection"; + pipeline = [ + {$project: {a: {$cond: {if: {$eq: [null, "$b.c"]}, then: "$$REMOVE", else: "$b.c"}}}}, + {$match: {a: {$eq: 42}}}, + ]; + runPipeline(testCaseName, pipeline); + + testCaseName = "Negative case: field path of length 3"; + pipeline = [{$project: {_id: 1, a: "$b.c.e", z: 1}}, {$match: {a: {$eq: 42}}}]; + runPipeline(testCaseName, pipeline); + + testCaseName = "Negative case: field path of length 3 with _id excluded (variation 1)"; + pipeline = [{$project: {_id: 0, a: "$b.c.e", z: 1}}, {$match: {a: {$eq: 42}}}]; + runPipeline(testCaseName, pipeline); + + testCaseName = "Negative case: field path of length 3 with _id excluded (variation 2)"; + pipeline = [{$project: {_id: 0, a: "$b.c.e"}}, {$match: {a: {$eq: 42}}}]; + runPipeline(testCaseName, pipeline); + + testCaseName = "Negative case: $addFields with field path of length 3"; + pipeline = [{$addFields: {a: "$b.c.e"}}, {$match: {a: {$eq: 42}}}]; + runPipeline(testCaseName, pipeline); + + testCaseName = "Negative case: $set with field path of length 3"; + pipeline = [{$set: {a: "$b.c.e"}}, {$match: {a: {$eq: 42}}}]; + runPipeline(testCaseName, pipeline); + + testCaseName = "Negative case: field path of length 4"; + pipeline = [{$project: {a: "$b.c.f.g", z: 1}}, {$match: {a: {$eq: 9}}}]; + runPipeline(testCaseName, pipeline); + + testCaseName = + "Negative case: $match cannot swap past complex rename when matching on subfield of $group key"; + pipeline = [{$group: {_id: {x: "$b.c"}}}, {$match: {"_id.x.e": {$lte: 42}}}]; + runPipeline(testCaseName, pipeline); + + // The dotted path on the left makes it so that "a" is always an object after the $addFields, + // which impacts the results of the $match stage. + testCaseName = "Negative case: dotted path on the left followed by equals-null $match"; + pipeline = [{$addFields: {"a.d": "$c"}}, {$match: {"a.e": null}}]; + runPipeline(testCaseName, pipeline); +} finally { + // Reset the parameter to its default value. + assert.commandWorked(db.adminCommand( + {setParameter: 1, internalQueryPermitMatchSwappingForComplexRenames: false})); +} diff --git a/jstests/query_golden/expected_output/complex_match_swap.md b/jstests/query_golden/expected_output/complex_match_swap.md new file mode 100644 index 00000000000..504ead4ea14 --- /dev/null +++ b/jstests/query_golden/expected_output/complex_match_swap.md @@ -0,0 +1,1274 @@ +## 1. Inserting docs: +```json +[ + { + "_id" : 1, + "z" : 11, + "h" : { + "i" : 11 + }, + "b" : { + "c" : 42 + } + }, + { + "_id" : 2, + "z" : 12, + "h" : { + "i" : 12 + }, + "b" : { + + } + }, + { + "_id" : 3, + "z" : 13, + "h" : { + "i" : 13 + }, + "b" : { + "c" : null + } + }, + { + "_id" : 4, + "z" : 14, + "h" : { + "i" : 14 + }, + "b" : { + "c" : 42, + "d" : "foo" + } + }, + { + "_id" : 5, + "z" : 15, + "h" : { + "i" : 15 + }, + "b" : { + "c" : { + "e" : 42, + "f" : "bar" + } + } + }, + { + "_id" : 6, + "z" : 16, + "h" : { + "i" : 16 + }, + "b" : { + "c" : { + "e" : 42, + "f" : { + "g" : 9 + } + }, + "d" : "foo" + } + } +] +``` +## 2. Basic inclusion projection +### Pipeline +```json +[ { "$project" : { "_id" : 1, "a" : "$b.c", "z" : 1 } }, { "$match" : { "a" : { "$eq" : 42 } } } ] +``` +### Results +```json +{ "_id" : 1, "a" : 42, "z" : 11 } +{ "_id" : 4, "a" : 42, "z" : 14 } +``` +### Explain +```json +[ + { + "$match" : { + "b.c" : { + "$eq" : 42 + } + } + }, + { + "$project" : { + "_id" : true, + "z" : true, + "a" : "$b.c" + } + } +] +``` + +## 3. Basic inclusion projection with excluded _id (variation 1) +### Pipeline +```json +[ { "$project" : { "_id" : 0, "a" : "$b.c", "z" : 1 } }, { "$match" : { "a" : { "$eq" : 42 } } } ] +``` +### Results +```json +{ "a" : 42, "z" : 11 } +{ "a" : 42, "z" : 14 } +``` +### Explain +```json +[ + { + "$match" : { + "b.c" : { + "$eq" : 42 + } + } + }, + { + "$project" : { + "z" : true, + "a" : "$b.c", + "_id" : false + } + } +] +``` + +## 4. Basic inclusion projection with excluded _id (variation 2) +### Pipeline +```json +[ { "$project" : { "_id" : 0, "a" : "$b.c" } }, { "$match" : { "a" : { "$eq" : 42 } } } ] +``` +### Results +```json +{ "a" : 42 } +{ "a" : 42 } +``` +### Explain +```json +[ + { + "$match" : { + "b.c" : { + "$eq" : 42 + } + } + }, + { + "$project" : { + "a" : "$b.c", + "_id" : false + } + } +] +``` + +## 5. Exclusion projection followed by inclusion projection +### Pipeline +```json +[ { "$project" : { "_id" : 0, "z" : 0 } }, { "$project" : { "a" : "$b.c" } }, { "$match" : { "a" : { "$eq" : 42 } } } ] +``` +### Results +```json +{ "a" : 42 } +{ "a" : 42 } +``` +### Explain +```json +[ + { + "$match" : { + "b.c" : { + "$eq" : 42 + } + } + }, + { + "$project" : { + "_id" : false, + "z" : false + } + }, + { + "$project" : { + "_id" : true, + "a" : "$b.c" + } + } +] +``` + +## 6. Basic $addFields +### Pipeline +```json +[ { "$addFields" : { "a" : "$b.c" } }, { "$match" : { "a" : { "$eq" : 42 } } } ] +``` +### Results +```json +{ "_id" : 1, "a" : 42, "b" : { "c" : 42 }, "h" : { "i" : 11 }, "z" : 11 } +{ "_id" : 4, "a" : 42, "b" : { "c" : 42, "d" : "foo" }, "h" : { "i" : 14 }, "z" : 14 } +``` +### Explain +```json +[ + { + "$match" : { + "b.c" : { + "$eq" : 42 + } + } + }, + { + "$addFields" : { + "a" : "$b.c" + } + } +] +``` + +## 7. Basic $set +### Pipeline +```json +[ { "$set" : { "a" : "$b.c" } }, { "$match" : { "a" : { "$eq" : 42 } } } ] +``` +### Results +```json +{ "_id" : 1, "a" : 42, "b" : { "c" : 42 }, "h" : { "i" : 11 }, "z" : 11 } +{ "_id" : 4, "a" : 42, "b" : { "c" : 42, "d" : "foo" }, "h" : { "i" : 14 }, "z" : 14 } +``` +### Explain +```json +[ + { + "$match" : { + "b.c" : { + "$eq" : 42 + } + } + }, + { + "$set" : { + "a" : "$b.c" + } + } +] +``` + +## 8. Inclusion projection with a match on a subpath of the renamed path (variation 1) +### Pipeline +```json +[ { "$project" : { "_id" : 1, "a" : "$b.c", "z" : 1 } }, { "$match" : { "a.e" : { "$eq" : 42 } } } ] +``` +### Results +```json +{ "_id" : 5, "a" : { "e" : 42, "f" : "bar" }, "z" : 15 } +{ "_id" : 6, "a" : { "e" : 42, "f" : { "g" : 9 } }, "z" : 16 } +``` +### Explain +```json +[ + { + "$match" : { + "b.c.e" : { + "$eq" : 42 + } + } + }, + { + "$project" : { + "_id" : true, + "z" : true, + "a" : "$b.c" + } + } +] +``` + +## 9. Inclusion projection with a match on a subpath of the renamed path (variation 2) +### Pipeline +```json +[ { "$project" : { "_id" : 0, "a" : "$b.c", "z" : 1 } }, { "$match" : { "a.e" : { "$gte" : 42 } } } ] +``` +### Results +```json +{ "a" : { "e" : 42, "f" : "bar" }, "z" : 15 } +{ "a" : { "e" : 42, "f" : { "g" : 9 } }, "z" : 16 } +``` +### Explain +```json +[ + { + "$match" : { + "b.c.e" : { + "$gte" : 42 + } + } + }, + { + "$project" : { + "z" : true, + "a" : "$b.c", + "_id" : false + } + } +] +``` + +## 10. Inclusion projection with a match on a subpath of the renamed path (variation 3) +### Pipeline +```json +[ { "$project" : { "_id" : 0, "a" : "$b.c" } }, { "$match" : { "a.e" : { "$type" : "number" } } } ] +``` +### Results +```json +{ "a" : { "e" : 42, "f" : "bar" } } +{ "a" : { "e" : 42, "f" : { "g" : 9 } } } +``` +### Explain +```json +[ + { + "$match" : { + "b.c.e" : { + "$type" : [ + "number" + ] + } + } + }, + { + "$project" : { + "a" : "$b.c", + "_id" : false + } + } +] +``` + +## 11. Exclusion/inclusion projection with a match on a subpath of the renamed path +### Pipeline +```json +[ { "$project" : { "_id" : 0, "z" : 0 } }, { "$project" : { "a" : "$b.c" } }, { "$match" : { "a.e" : { "$mod" : [ 7, 0 ] } } } ] +``` +### Results +```json +{ "a" : { "e" : 42, "f" : "bar" } } +{ "a" : { "e" : 42, "f" : { "g" : 9 } } } +``` +### Explain +```json +[ + { + "$match" : { + "b.c.e" : { + "$mod" : [ + NumberLong(7), + NumberLong(0) + ] + } + } + }, + { + "$project" : { + "_id" : false, + "z" : false + } + }, + { + "$project" : { + "_id" : true, + "a" : "$b.c" + } + } +] +``` + +## 12. $addFields with a match on a subpath of the renamed path +### Pipeline +```json +[ { "$addFields" : { "a" : "$b.c" } }, { "$match" : { "a.e" : { "$eq" : 42 } } } ] +``` +### Results +```json +{ "_id" : 5, "a" : { "e" : 42, "f" : "bar" }, "b" : { "c" : { "e" : 42, "f" : "bar" } }, "h" : { "i" : 15 }, "z" : 15 } +{ "_id" : 6, "a" : { "e" : 42, "f" : { "g" : 9 } }, "b" : { "c" : { "e" : 42, "f" : { "g" : 9 } }, "d" : "foo" }, "h" : { "i" : 16 }, "z" : 16 } +``` +### Explain +```json +[ + { + "$match" : { + "b.c.e" : { + "$eq" : 42 + } + } + }, + { + "$addFields" : { + "a" : "$b.c" + } + } +] +``` + +## 13. $set with a match on a subpath of the renamed path +### Pipeline +```json +[ { "$set" : { "a" : "$b.c" } }, { "$match" : { "a.e" : { "$lte" : 42 } } } ] +``` +### Results +```json +{ "_id" : 5, "a" : { "e" : 42, "f" : "bar" }, "b" : { "c" : { "e" : 42, "f" : "bar" } }, "h" : { "i" : 15 }, "z" : 15 } +{ "_id" : 6, "a" : { "e" : 42, "f" : { "g" : 9 } }, "b" : { "c" : { "e" : 42, "f" : { "g" : 9 } }, "d" : "foo" }, "h" : { "i" : 16 }, "z" : 16 } +``` +### Explain +```json +[ + { + "$match" : { + "b.c.e" : { + "$lte" : 42 + } + } + }, + { + "$set" : { + "a" : "$b.c" + } + } +] +``` + +## 14. Chain of complex renames +### Pipeline +```json +[ { "$project" : { "_id" : 0, "n" : "$b.c" } }, { "$addFields" : { "q" : "$n.f" } }, { "$set" : { "r" : "$q.g" } }, { "$match" : { "r" : { "$eq" : 9 } } } ] +``` +### Results +```json +{ "n" : { "e" : 42, "f" : { "g" : 9 } }, "q" : { "g" : 9 }, "r" : 9 } +``` +### Explain +```json +[ + { + "$match" : { + "b.c.f.g" : { + "$eq" : 9 + } + } + }, + { + "$project" : { + "n" : "$b.c", + "_id" : false + } + }, + { + "$addFields" : { + "q" : "$n.f" + } + }, + { + "$set" : { + "r" : "$q.g" + } + } +] +``` + +## 15. Multiple complex renames +### Pipeline +```json +[ { "$project" : { "n" : "$b.c", "q" : "$h.i" } }, { "$match" : { "$or" : [ { "n" : { "$gt" : 15 } }, { "q" : { "$lt" : 13 } } ] } } ] +``` +### Results +```json +{ "_id" : 1, "n" : 42, "q" : 11 } +{ "_id" : 2, "q" : 12 } +{ "_id" : 4, "n" : 42, "q" : 14 } +``` +### Explain +```json +[ + { + "$match" : { + "$or" : [ + { + "b.c" : { + "$gt" : 15 + } + }, + { + "h.i" : { + "$lt" : 13 + } + } + ] + } + }, + { + "$project" : { + "_id" : true, + "n" : "$b.c", + "q" : "$h.i" + } + } +] +``` + +## 16. Multiple complex renames as successive pipeline stages +### Pipeline +```json +[ { "$project" : { "n" : "$b.c", "h" : 1 } }, { "$addFields" : { "q" : "$h.i" } }, { "$project" : { "h" : 0 } }, { "$match" : { "$or" : [ { "n" : { "$gt" : 15 } }, { "q" : { "$lt" : 13 } } ] } } ] +``` +### Results +```json +{ "_id" : 1, "n" : 42, "q" : 11 } +{ "_id" : 2, "q" : 12 } +{ "_id" : 4, "n" : 42, "q" : 14 } +``` +### Explain +```json +[ + { + "$match" : { + "$or" : [ + { + "b.c" : { + "$gt" : 15 + } + }, + { + "h.i" : { + "$lt" : 13 + } + } + ] + } + }, + { + "$project" : { + "_id" : true, + "h" : true, + "n" : "$b.c" + } + }, + { + "$addFields" : { + "q" : "$h.i" + } + }, + { + "$project" : { + "h" : false, + "_id" : true + } + } +] +``` + +## 17. $match swaps past rename due to group +### Pipeline +```json +[ { "$group" : { "_id" : { "z" : "$z" } } }, { "$match" : { "_id.z" : { "$lte" : 14 } } } ] +``` +### Results +```json +{ "_id" : { "z" : 11 } } +{ "_id" : { "z" : 12 } } +{ "_id" : { "z" : 13 } } +{ "_id" : { "z" : 14 } } +``` +### Explain +```json +[ + { + "$match" : { + "z" : { + "$lte" : 14 + } + } + }, + { + "$group" : { + "_id" : { + "z" : "$z" + }, + "$willBeMerged" : false + } + } +] +``` + +## 18. $match swaps past rename in the presence of arrays created by the pipeline +### Pipeline +```json +[ { "$lookup" : { "from" : "complex_match_swap", "pipeline" : [ { "$group" : { "_id" : "$a", "b" : { "$push" : "$b" } } } ], "as" : "arr" } }, { "$project" : { "c" : "$arr.b" } }, { "$match" : { "c" : { "$eq" : { } } } } ] +``` +### Results +```json +{ "_id" : 1, "c" : [ [ { "c" : 42 }, { }, { "c" : null }, { "c" : 42, "d" : "foo" }, { "c" : { "e" : 42, "f" : "bar" } }, { "c" : { "e" : 42, "f" : { "g" : 9 } }, "d" : "foo" } ] ] } +{ "_id" : 2, "c" : [ [ { "c" : 42 }, { }, { "c" : null }, { "c" : 42, "d" : "foo" }, { "c" : { "e" : 42, "f" : "bar" } }, { "c" : { "e" : 42, "f" : { "g" : 9 } }, "d" : "foo" } ] ] } +{ "_id" : 3, "c" : [ [ { "c" : 42 }, { }, { "c" : null }, { "c" : 42, "d" : "foo" }, { "c" : { "e" : 42, "f" : "bar" } }, { "c" : { "e" : 42, "f" : { "g" : 9 } }, "d" : "foo" } ] ] } +{ "_id" : 4, "c" : [ [ { "c" : 42 }, { }, { "c" : null }, { "c" : 42, "d" : "foo" }, { "c" : { "e" : 42, "f" : "bar" } }, { "c" : { "e" : 42, "f" : { "g" : 9 } }, "d" : "foo" } ] ] } +{ "_id" : 5, "c" : [ [ { "c" : 42 }, { }, { "c" : null }, { "c" : 42, "d" : "foo" }, { "c" : { "e" : 42, "f" : "bar" } }, { "c" : { "e" : 42, "f" : { "g" : 9 } }, "d" : "foo" } ] ] } +{ "_id" : 6, "c" : [ [ { "c" : 42 }, { }, { "c" : null }, { "c" : 42, "d" : "foo" }, { "c" : { "e" : 42, "f" : "bar" } }, { "c" : { "e" : 42, "f" : { "g" : 9 } }, "d" : "foo" } ] ] } +``` +### Explain +```json +[ + { + "$lookup" : { + "from" : "complex_match_swap", + "as" : "arr", + "let" : { + + }, + "pipeline" : [ + { + "$group" : { + "_id" : "$a", + "b" : { + "$push" : "$b" + } + } + } + ] + } + }, + { + "$match" : { + "arr.b" : { + "$eq" : { + + } + } + } + }, + { + "$project" : { + "_id" : true, + "c" : "$arr.b" + } + } +] +``` + +## 19. $match with $exists swaps past rename +### Pipeline +```json +[ { "$project" : { "_id" : 0, "a" : "$b.c", "z" : 1 } }, { "$match" : { "a" : { "$exists" : true } } } ] +``` +### Results +```json +{ "a" : 42, "z" : 11 } +{ "a" : 42, "z" : 14 } +{ "a" : null, "z" : 13 } +{ "a" : { "e" : 42, "f" : "bar" }, "z" : 15 } +{ "a" : { "e" : 42, "f" : { "g" : 9 } }, "z" : 16 } +``` +### Explain +```json +[ + { + "$match" : { + "b.c" : { + "$exists" : true + } + } + }, + { + "$project" : { + "z" : true, + "a" : "$b.c", + "_id" : false + } + } +] +``` + +## 20. $match with $expr swaps past rename +### Pipeline +```json +[ { "$project" : { "_id" : 0, "a" : "$b.c", "z" : 1 } }, { "$match" : { "$expr" : { "$eq" : [ "$a", 42 ] } } } ] +``` +### Results +```json +{ "a" : 42, "z" : 11 } +{ "a" : 42, "z" : 14 } +``` +### Explain +```json +[ + { + "$match" : { + "$and" : [ + { + "b.c" : { + "$_internalExprEq" : 42 + } + }, + { + "$expr" : { + "$eq" : [ + "$b.c", + { + "$const" : 42 + } + ] + } + } + ] + } + }, + { + "$project" : { + "z" : true, + "a" : "$b.c", + "_id" : false + } + } +] +``` + +## 21. $match can be pushed beneath $replaceRoot +### Pipeline +```json +[ { "$replaceRoot" : { "newRoot" : "$b" } }, { "$match" : { "c" : { "$eq" : 42 } } } ] +``` +### Results +```json +{ "c" : 42 } +{ "c" : 42, "d" : "foo" } +``` +### Explain +```json +[ + { + "$match" : { + "$or" : [ + { + "b" : { + "$type" : [ + 4 + ] + } + }, + { + "b" : { + "$not" : { + "$type" : [ + 3 + ] + } + } + }, + { + "b.c" : { + "$eq" : 42 + } + } + ] + } + }, + { + "$replaceRoot" : { + "newRoot" : "$b" + } + } +] +``` + +## 22. $match can be pushed beneath $replaceWith +### Pipeline +```json +[ { "$replaceWith" : "$b" }, { "$match" : { "c" : { "$eq" : 42 } } } ] +``` +### Results +```json +{ "c" : 42 } +{ "c" : 42, "d" : "foo" } +``` +### Explain +```json +[ + { + "$match" : { + "$or" : [ + { + "b" : { + "$type" : [ + 4 + ] + } + }, + { + "b" : { + "$not" : { + "$type" : [ + 3 + ] + } + } + }, + { + "b.c" : { + "$eq" : 42 + } + } + ] + } + }, + { + "$replaceRoot" : { + "newRoot" : "$b" + } + } +] +``` + +## 23. Negative case: Dotted path on the left and the right +### Pipeline +```json +[ { "$project" : { "_id" : 0, "x.y" : "$b.c", "z" : 1 } }, { "$match" : { "x.y" : { "$lte" : 42 } } } ] +``` +### Results +```json +{ "x" : { "y" : 42 }, "z" : 11 } +{ "x" : { "y" : 42 }, "z" : 14 } +``` +### Explain +```json +[ + { + "$project" : { + "z" : true, + "x" : { + "y" : "$b.c" + }, + "_id" : false + } + }, + { + "$match" : { + "x.y" : { + "$lte" : 42 + } + } + } +] +``` + +## 24. Negative case: Dotted path on the left and the right with match on a subpath of the renamed path +### Pipeline +```json +[ { "$project" : { "_id" : 0, "x.y" : "$b.c", "z" : 1 } }, { "$match" : { "x.y.e" : { "$lte" : 42 } } } ] +``` +### Results +```json +{ "x" : { "y" : { "e" : 42, "f" : "bar" } }, "z" : 15 } +{ "x" : { "y" : { "e" : 42, "f" : { "g" : 9 } } }, "z" : 16 } +``` +### Explain +```json +[ + { + "$project" : { + "z" : true, + "x" : { + "y" : "$b.c" + }, + "_id" : false + } + }, + { + "$match" : { + "x.y.e" : { + "$lte" : 42 + } + } + } +] +``` + +## 25. Negative case: Dotted path of length 3 on the left +### Pipeline +```json +[ { "$project" : { "_id" : 0, "n.q.r" : "$b.c", "z" : 1 } }, { "$match" : { "n.q.r.e" : { "$lte" : 42 } } } ] +``` +### Results +```json +{ "n" : { "q" : { "r" : { "e" : 42, "f" : "bar" } } }, "z" : 15 } +{ "n" : { "q" : { "r" : { "e" : 42, "f" : { "g" : 9 } } } }, "z" : 16 } +``` +### Explain +```json +[ + { + "$project" : { + "z" : true, + "n" : { + "q" : { + "r" : "$b.c" + } + }, + "_id" : false + } + }, + { + "$match" : { + "n.q.r.e" : { + "$lte" : 42 + } + } + } +] +``` + +## 26. Negative case: Dotted path of length 3 on the left, expressed with nested objects +### Pipeline +```json +[ { "$project" : { "_id" : 0, "n" : { "q" : { "r" : "$b.c" } }, "z" : 1 } }, { "$match" : { "n.q.r.e" : { "$lte" : 42 } } } ] +``` +### Results +```json +{ "n" : { "q" : { "r" : { "e" : 42, "f" : "bar" } } }, "z" : 15 } +{ "n" : { "q" : { "r" : { "e" : 42, "f" : { "g" : 9 } } } }, "z" : 16 } +``` +### Explain +```json +[ + { + "$project" : { + "z" : true, + "n" : { + "q" : { + "r" : "$b.c" + } + }, + "_id" : false + } + }, + { + "$match" : { + "n.q.r.e" : { + "$lte" : 42 + } + } + } +] +``` + +## 27. Negative case: Dotted path of length 3 on the left, expressed with nested objects and $addFields +### Pipeline +```json +[ { "$addFields" : { "n" : { "q" : { "r" : "$b.c" } } } }, { "$match" : { "n.q.r.e" : { "$lte" : 42 } } } ] +``` +### Results +```json +{ "_id" : 5, "b" : { "c" : { "e" : 42, "f" : "bar" } }, "h" : { "i" : 15 }, "n" : { "q" : { "r" : { "e" : 42, "f" : "bar" } } }, "z" : 15 } +{ "_id" : 6, "b" : { "c" : { "e" : 42, "f" : { "g" : 9 } }, "d" : "foo" }, "h" : { "i" : 16 }, "n" : { "q" : { "r" : { "e" : 42, "f" : { "g" : 9 } } } }, "z" : 16 } +``` +### Explain +```json +[ + { + "$addFields" : { + "n" : { + "q" : { + "r" : "$b.c" + } + } + } + }, + { + "$match" : { + "n.q.r.e" : { + "$lte" : 42 + } + } + } +] +``` + +## 28. Negative case: conditional projection +### Pipeline +```json +[ { "$project" : { "a" : { "$cond" : { "if" : { "$eq" : [ null, "$b.c" ] }, "then" : "$$REMOVE", "else" : "$b.c" } } } }, { "$match" : { "a" : { "$eq" : 42 } } } ] +``` +### Results +```json +{ "_id" : 1, "a" : 42 } +{ "_id" : 4, "a" : 42 } +``` +### Explain +```json +[ + { + "$project" : { + "_id" : true, + "a" : { + "$cond" : [ + { + "$eq" : [ + { + "$const" : null + }, + "$b.c" + ] + }, + "$$REMOVE", + "$b.c" + ] + } + } + }, + { + "$match" : { + "a" : { + "$eq" : 42 + } + } + } +] +``` + +## 29. Negative case: field path of length 3 +### Pipeline +```json +[ { "$project" : { "_id" : 1, "a" : "$b.c.e", "z" : 1 } }, { "$match" : { "a" : { "$eq" : 42 } } } ] +``` +### Results +```json +{ "_id" : 5, "a" : 42, "z" : 15 } +{ "_id" : 6, "a" : 42, "z" : 16 } +``` +### Explain +```json +[ + { + "$project" : { + "_id" : true, + "z" : true, + "a" : "$b.c.e" + } + }, + { + "$match" : { + "a" : { + "$eq" : 42 + } + } + } +] +``` + +## 30. Negative case: field path of length 3 with _id excluded (variation 1) +### Pipeline +```json +[ { "$project" : { "_id" : 0, "a" : "$b.c.e", "z" : 1 } }, { "$match" : { "a" : { "$eq" : 42 } } } ] +``` +### Results +```json +{ "a" : 42, "z" : 15 } +{ "a" : 42, "z" : 16 } +``` +### Explain +```json +[ + { + "$project" : { + "z" : true, + "a" : "$b.c.e", + "_id" : false + } + }, + { + "$match" : { + "a" : { + "$eq" : 42 + } + } + } +] +``` + +## 31. Negative case: field path of length 3 with _id excluded (variation 2) +### Pipeline +```json +[ { "$project" : { "_id" : 0, "a" : "$b.c.e" } }, { "$match" : { "a" : { "$eq" : 42 } } } ] +``` +### Results +```json +{ "a" : 42 } +{ "a" : 42 } +``` +### Explain +```json +[ + { + "$project" : { + "a" : "$b.c.e", + "_id" : false + } + }, + { + "$match" : { + "a" : { + "$eq" : 42 + } + } + } +] +``` + +## 32. Negative case: $addFields with field path of length 3 +### Pipeline +```json +[ { "$addFields" : { "a" : "$b.c.e" } }, { "$match" : { "a" : { "$eq" : 42 } } } ] +``` +### Results +```json +{ "_id" : 5, "a" : 42, "b" : { "c" : { "e" : 42, "f" : "bar" } }, "h" : { "i" : 15 }, "z" : 15 } +{ "_id" : 6, "a" : 42, "b" : { "c" : { "e" : 42, "f" : { "g" : 9 } }, "d" : "foo" }, "h" : { "i" : 16 }, "z" : 16 } +``` +### Explain +```json +[ + { + "$addFields" : { + "a" : "$b.c.e" + } + }, + { + "$match" : { + "a" : { + "$eq" : 42 + } + } + } +] +``` + +## 33. Negative case: $set with field path of length 3 +### Pipeline +```json +[ { "$set" : { "a" : "$b.c.e" } }, { "$match" : { "a" : { "$eq" : 42 } } } ] +``` +### Results +```json +{ "_id" : 5, "a" : 42, "b" : { "c" : { "e" : 42, "f" : "bar" } }, "h" : { "i" : 15 }, "z" : 15 } +{ "_id" : 6, "a" : 42, "b" : { "c" : { "e" : 42, "f" : { "g" : 9 } }, "d" : "foo" }, "h" : { "i" : 16 }, "z" : 16 } +``` +### Explain +```json +[ + { + "$set" : { + "a" : "$b.c.e" + } + }, + { + "$match" : { + "a" : { + "$eq" : 42 + } + } + } +] +``` + +## 34. Negative case: field path of length 4 +### Pipeline +```json +[ { "$project" : { "a" : "$b.c.f.g", "z" : 1 } }, { "$match" : { "a" : { "$eq" : 9 } } } ] +``` +### Results +```json +{ "_id" : 6, "a" : 9, "z" : 16 } +``` +### Explain +```json +[ + { + "$project" : { + "_id" : true, + "z" : true, + "a" : "$b.c.f.g" + } + }, + { + "$match" : { + "a" : { + "$eq" : 9 + } + } + } +] +``` + +## 35. Negative case: $match cannot swap past complex rename when matching on subfield of $group key +### Pipeline +```json +[ { "$group" : { "_id" : { "x" : "$b.c" } } }, { "$match" : { "_id.x.e" : { "$lte" : 42 } } } ] +``` +### Results +```json +{ "_id" : { "x" : { "e" : 42, "f" : "bar" } } } +{ "_id" : { "x" : { "e" : 42, "f" : { "g" : 9 } } } } +``` +### Explain +```json +[ + { + "$group" : { + "_id" : { + "x" : "$b.c" + }, + "$willBeMerged" : false + } + }, + { + "$match" : { + "_id.x.e" : { + "$lte" : 42 + } + } + } +] +``` + +## 36. Negative case: dotted path on the left followed by equals-null $match +### Pipeline +```json +[ { "$addFields" : { "a.d" : "$c" } }, { "$match" : { "a.e" : null } } ] +``` +### Results +```json +{ "_id" : 1, "a" : { }, "b" : { "c" : 42 }, "h" : { "i" : 11 }, "z" : 11 } +{ "_id" : 2, "a" : { }, "b" : { }, "h" : { "i" : 12 }, "z" : 12 } +{ "_id" : 3, "a" : { }, "b" : { "c" : null }, "h" : { "i" : 13 }, "z" : 13 } +{ "_id" : 4, "a" : { }, "b" : { "c" : 42, "d" : "foo" }, "h" : { "i" : 14 }, "z" : 14 } +{ "_id" : 5, "a" : { }, "b" : { "c" : { "e" : 42, "f" : "bar" } }, "h" : { "i" : 15 }, "z" : 15 } +{ "_id" : 6, "a" : { }, "b" : { "c" : { "e" : 42, "f" : { "g" : 9 } }, "d" : "foo" }, "h" : { "i" : 16 }, "z" : 16 } +``` +### Explain +```json +[ + { + "$addFields" : { + "a" : { + "d" : "$c" + } + } + }, + { + "$match" : { + "a.e" : { + "$eq" : null + } + } + } +] +``` + diff --git a/jstests/query_golden/expected_output/lookup_unwind_complex_match_swap.md b/jstests/query_golden/expected_output/lookup_unwind_complex_match_swap.md new file mode 100644 index 00000000000..879aa3a9ef5 --- /dev/null +++ b/jstests/query_golden/expected_output/lookup_unwind_complex_match_swap.md @@ -0,0 +1,846 @@ +## 1. Inserting docs into collection "a": +```json +[ + { + "_id" : 1, + "b" : 4, + "my_id" : 100, + "m" : { + "c" : 42 + } + }, + { + "_id" : 2, + "b" : 4, + "my_id" : 101, + "m" : { + + } + }, + { + "_id" : 3, + "b" : 4, + "my_id" : 100 + }, + { + "_id" : 4, + "b" : 4, + "m" : { + "c" : null + } + }, + { + "_id" : 5, + "b" : 4, + "m" : { + "c" : 42, + "d" : "foo" + } + } +] +``` +## 2. Inserting docs into collection "b": +```json +[ + { + "_id" : 1, + "b" : 4, + "indicator" : "X" + }, + { + "_id" : 2, + "b" : 4, + "indicator" : "Y" + }, + { + "_id" : 3, + "b" : 4 + }, + { + "_id" : 4, + "b" : 4, + "indicator" : { + "Z" : "Y" + } + }, + { + "_id" : 5, + "b" : 4, + "indicator" : "Z" + } +] +``` +## 3. Inserting docs into collection "c": +```json +[ + { + "_id" : 1, + "b" : 4, + "code" : "X" + }, + { + "_id" : 2, + "b" : 4, + "other_id" : 42, + "code" : "bar" + }, + { + "_id" : 3, + "b" : 4, + "other_id" : 42 + }, + { + "_id" : 4, + "b" : 4, + "code" : "blah" + }, + { + "_id" : 5, + "b" : 4, + "other_id" : 20, + "code" : "foo" + }, + { + "_id" : 6, + "b" : 4, + "other_id" : { + "zip" : 42, + "zap" : 20 + }, + "code" : "bar" + }, + { + "_id" : 7, + "b" : 4, + "other_id" : { + "zip" : 20, + "zap" : 42 + } + } +] +``` +## 4. View pipeline +```json +[ + { + "$match" : { + "my_id" : 100 + } + }, + { + "$lookup" : { + "from" : "lu_complex_swap_b", + "as" : "B_data", + "localField" : "b", + "foreignField" : "b" + } + }, + { + "$unwind" : "$B_data" + }, + { + "$match" : { + "B_data.indicator" : "Y" + } + }, + { + "$lookup" : { + "from" : "lu_complex_swap_c", + "as" : "C_data", + "localField" : "b", + "foreignField" : "b" + } + }, + { + "$unwind" : "$C_data" + }, + { + "$addFields" : { + "other_id" : "$C_data.other_id" + } + } +] +``` +### Query +```json +{ "other_id" : 42 } +``` +### Results +```json +{ "B_data" : { "_id" : 2, "b" : 4, "indicator" : "Y" }, "C_data" : { "_id" : 2, "b" : 4, "code" : "bar", "other_id" : 42 }, "_id" : 1, "b" : 4, "m" : { "c" : 42 }, "my_id" : 100, "other_id" : 42 } +{ "B_data" : { "_id" : 2, "b" : 4, "indicator" : "Y" }, "C_data" : { "_id" : 2, "b" : 4, "code" : "bar", "other_id" : 42 }, "_id" : 3, "b" : 4, "my_id" : 100, "other_id" : 42 } +{ "B_data" : { "_id" : 2, "b" : 4, "indicator" : "Y" }, "C_data" : { "_id" : 3, "b" : 4, "other_id" : 42 }, "_id" : 1, "b" : 4, "m" : { "c" : 42 }, "my_id" : 100, "other_id" : 42 } +{ "B_data" : { "_id" : 2, "b" : 4, "indicator" : "Y" }, "C_data" : { "_id" : 3, "b" : 4, "other_id" : 42 }, "_id" : 3, "b" : 4, "my_id" : 100, "other_id" : 42 } +``` +### Explain +```json +[ + { + "$match" : { + "my_id" : { + "$eq" : 100 + } + } + }, + { + "$lookup" : { + "from" : "lu_complex_swap_b", + "as" : "B_data", + "localField" : "b", + "foreignField" : "b", + "let" : { + + }, + "pipeline" : [ + { + "$match" : { + "indicator" : { + "$eq" : "Y" + } + } + } + ], + "unwinding" : { + "preserveNullAndEmptyArrays" : false + } + } + }, + { + "$lookup" : { + "from" : "lu_complex_swap_c", + "as" : "C_data", + "localField" : "b", + "foreignField" : "b", + "let" : { + + }, + "pipeline" : [ + { + "$match" : { + "other_id" : { + "$eq" : 42 + } + } + } + ], + "unwinding" : { + "preserveNullAndEmptyArrays" : false + } + } + }, + { + "$addFields" : { + "other_id" : "$C_data.other_id" + } + } +] +``` + +### Query +```json +{ "other_id.zip" : 42 } +``` +### Results +```json +{ "B_data" : { "_id" : 2, "b" : 4, "indicator" : "Y" }, "C_data" : { "_id" : 6, "b" : 4, "code" : "bar", "other_id" : { "zap" : 20, "zip" : 42 } }, "_id" : 1, "b" : 4, "m" : { "c" : 42 }, "my_id" : 100, "other_id" : { "zap" : 20, "zip" : 42 } } +{ "B_data" : { "_id" : 2, "b" : 4, "indicator" : "Y" }, "C_data" : { "_id" : 6, "b" : 4, "code" : "bar", "other_id" : { "zap" : 20, "zip" : 42 } }, "_id" : 3, "b" : 4, "my_id" : 100, "other_id" : { "zap" : 20, "zip" : 42 } } +``` +### Explain +```json +[ + { + "$match" : { + "my_id" : { + "$eq" : 100 + } + } + }, + { + "$lookup" : { + "from" : "lu_complex_swap_b", + "as" : "B_data", + "localField" : "b", + "foreignField" : "b", + "let" : { + + }, + "pipeline" : [ + { + "$match" : { + "indicator" : { + "$eq" : "Y" + } + } + } + ], + "unwinding" : { + "preserveNullAndEmptyArrays" : false + } + } + }, + { + "$lookup" : { + "from" : "lu_complex_swap_c", + "as" : "C_data", + "localField" : "b", + "foreignField" : "b", + "let" : { + + }, + "pipeline" : [ + { + "$match" : { + "other_id.zip" : { + "$eq" : 42 + } + } + } + ], + "unwinding" : { + "preserveNullAndEmptyArrays" : false + } + } + }, + { + "$addFields" : { + "other_id" : "$C_data.other_id" + } + } +] +``` + +## 5. View pipeline +```json +[ + { + "$match" : { + "my_id" : 100 + } + }, + { + "$lookup" : { + "from" : "lu_complex_swap_b", + "as" : "B_data", + "localField" : "b", + "foreignField" : "b" + } + }, + { + "$unwind" : "$B_data" + }, + { + "$match" : { + "B_data.indicator" : "Y" + } + }, + { + "$lookup" : { + "from" : "lu_complex_swap_c", + "as" : "C_data", + "localField" : "b", + "foreignField" : "b" + } + }, + { + "$unwind" : "$C_data" + }, + { + "$addFields" : { + "zip" : "$C_data.other_id.zip" + } + } +] +``` +### Query +```json +{ "zip" : 42 } +``` +### Results +```json +{ "B_data" : { "_id" : 2, "b" : 4, "indicator" : "Y" }, "C_data" : { "_id" : 6, "b" : 4, "code" : "bar", "other_id" : { "zap" : 20, "zip" : 42 } }, "_id" : 1, "b" : 4, "m" : { "c" : 42 }, "my_id" : 100, "zip" : 42 } +{ "B_data" : { "_id" : 2, "b" : 4, "indicator" : "Y" }, "C_data" : { "_id" : 6, "b" : 4, "code" : "bar", "other_id" : { "zap" : 20, "zip" : 42 } }, "_id" : 3, "b" : 4, "my_id" : 100, "zip" : 42 } +``` +### Explain +```json +[ + { + "$match" : { + "my_id" : { + "$eq" : 100 + } + } + }, + { + "$lookup" : { + "from" : "lu_complex_swap_b", + "as" : "B_data", + "localField" : "b", + "foreignField" : "b", + "let" : { + + }, + "pipeline" : [ + { + "$match" : { + "indicator" : { + "$eq" : "Y" + } + } + } + ], + "unwinding" : { + "preserveNullAndEmptyArrays" : false + } + } + }, + { + "$lookup" : { + "from" : "lu_complex_swap_c", + "as" : "C_data", + "localField" : "b", + "foreignField" : "b", + "unwinding" : { + "preserveNullAndEmptyArrays" : false + } + } + }, + { + "$addFields" : { + "zip" : "$C_data.other_id.zip" + } + }, + { + "$match" : { + "zip" : { + "$eq" : 42 + } + } + } +] +``` + +## 6. View pipeline +```json +[ + { + "$match" : { + "my_id" : 100 + } + }, + { + "$lookup" : { + "from" : "lu_complex_swap_b", + "as" : "B_data", + "localField" : "b", + "foreignField" : "b" + } + }, + { + "$unwind" : "$B_data" + }, + { + "$match" : { + "B_data.indicator" : "Y" + } + }, + { + "$lookup" : { + "from" : "lu_complex_swap_c", + "as" : "C_data", + "localField" : "b", + "foreignField" : "b" + } + }, + { + "$unwind" : "$C_data" + }, + { + "$project" : { + "_id" : 1, + "other_id" : "$C_data.other_id", + "code" : 1 + } + } +] +``` +### Query +```json +{ "other_id" : 42 } +``` +### Results +```json +{ "_id" : 1, "other_id" : 42 } +{ "_id" : 1, "other_id" : 42 } +{ "_id" : 3, "other_id" : 42 } +{ "_id" : 3, "other_id" : 42 } +``` +### Explain +```json +[ + { + "$match" : { + "my_id" : { + "$eq" : 100 + } + } + }, + { + "$lookup" : { + "from" : "lu_complex_swap_b", + "as" : "B_data", + "localField" : "b", + "foreignField" : "b", + "let" : { + + }, + "pipeline" : [ + { + "$match" : { + "indicator" : { + "$eq" : "Y" + } + } + } + ], + "unwinding" : { + "preserveNullAndEmptyArrays" : false + } + } + }, + { + "$lookup" : { + "from" : "lu_complex_swap_c", + "as" : "C_data", + "localField" : "b", + "foreignField" : "b", + "let" : { + + }, + "pipeline" : [ + { + "$match" : { + "other_id" : { + "$eq" : 42 + } + } + } + ], + "unwinding" : { + "preserveNullAndEmptyArrays" : false + } + } + }, + { + "$project" : { + "_id" : true, + "code" : true, + "other_id" : "$C_data.other_id" + } + } +] +``` + +### Query +```json +{ "other_id.zip" : 42 } +``` +### Results +```json +{ "_id" : 1, "other_id" : { "zap" : 20, "zip" : 42 } } +{ "_id" : 3, "other_id" : { "zap" : 20, "zip" : 42 } } +``` +### Explain +```json +[ + { + "$match" : { + "my_id" : { + "$eq" : 100 + } + } + }, + { + "$lookup" : { + "from" : "lu_complex_swap_b", + "as" : "B_data", + "localField" : "b", + "foreignField" : "b", + "let" : { + + }, + "pipeline" : [ + { + "$match" : { + "indicator" : { + "$eq" : "Y" + } + } + } + ], + "unwinding" : { + "preserveNullAndEmptyArrays" : false + } + } + }, + { + "$lookup" : { + "from" : "lu_complex_swap_c", + "as" : "C_data", + "localField" : "b", + "foreignField" : "b", + "let" : { + + }, + "pipeline" : [ + { + "$match" : { + "other_id.zip" : { + "$eq" : 42 + } + } + } + ], + "unwinding" : { + "preserveNullAndEmptyArrays" : false + } + } + }, + { + "$project" : { + "_id" : true, + "code" : true, + "other_id" : "$C_data.other_id" + } + } +] +``` + +## 7. View pipeline +```json +[ + { + "$match" : { + "my_id" : 100 + } + }, + { + "$lookup" : { + "from" : "lu_complex_swap_b", + "as" : "B_data", + "localField" : "b", + "foreignField" : "b" + } + }, + { + "$unwind" : "$B_data" + }, + { + "$match" : { + "B_data.indicator" : "Y" + } + }, + { + "$lookup" : { + "from" : "lu_complex_swap_c", + "as" : "C_data", + "localField" : "b", + "foreignField" : "b" + } + }, + { + "$unwind" : "$C_data" + }, + { + "$project" : { + "_id" : 1, + "zip" : "$C_data.other_id.zip", + "code" : 1 + } + } +] +``` +### Query +```json +{ "zip" : 42 } +``` +### Results +```json +{ "_id" : 1, "zip" : 42 } +{ "_id" : 3, "zip" : 42 } +``` +### Explain +```json +[ + { + "$match" : { + "my_id" : { + "$eq" : 100 + } + } + }, + { + "$lookup" : { + "from" : "lu_complex_swap_b", + "as" : "B_data", + "localField" : "b", + "foreignField" : "b", + "let" : { + + }, + "pipeline" : [ + { + "$match" : { + "indicator" : { + "$eq" : "Y" + } + } + } + ], + "unwinding" : { + "preserveNullAndEmptyArrays" : false + } + } + }, + { + "$lookup" : { + "from" : "lu_complex_swap_c", + "as" : "C_data", + "localField" : "b", + "foreignField" : "b", + "unwinding" : { + "preserveNullAndEmptyArrays" : false + } + } + }, + { + "$project" : { + "_id" : true, + "code" : true, + "zip" : "$C_data.other_id.zip" + } + }, + { + "$match" : { + "zip" : { + "$eq" : 42 + } + } + } +] +``` + +## 8. View pipeline +```json +[ + { + "$match" : { + "my_id" : 100 + } + }, + { + "$lookup" : { + "from" : "lu_complex_swap_b", + "as" : "B_data", + "localField" : "b", + "foreignField" : "b" + } + }, + { + "$unwind" : "$B_data" + }, + { + "$match" : { + "B_data.indicator" : "Y" + } + }, + { + "$lookup" : { + "from" : "lu_complex_swap_c", + "as" : "C_data", + "localField" : "b", + "foreignField" : "b" + } + }, + { + "$unwind" : "$C_data" + }, + { + "$project" : { + "_id" : 0, + "indicator" : "$B_data.indicator", + "code" : "$C_data.code" + } + } +] +``` +### Query +```json +{ "indicator.Z" : "Y" } +``` +### Results +```json + +``` +### Explain +```json +[ + { + "$match" : { + "my_id" : { + "$eq" : 100 + } + } + }, + { + "$lookup" : { + "from" : "lu_complex_swap_b", + "as" : "B_data", + "localField" : "b", + "foreignField" : "b", + "let" : { + + }, + "pipeline" : [ + { + "$match" : { + "$and" : [ + { + "indicator" : { + "$eq" : "Y" + } + }, + { + "indicator.Z" : { + "$eq" : "Y" + } + } + ] + } + } + ], + "unwinding" : { + "preserveNullAndEmptyArrays" : false + } + } + }, + { + "$lookup" : { + "from" : "lu_complex_swap_c", + "as" : "C_data", + "localField" : "b", + "foreignField" : "b", + "unwinding" : { + "preserveNullAndEmptyArrays" : false + } + } + }, + { + "$project" : { + "indicator" : "$B_data.indicator", + "code" : "$C_data.code", + "_id" : false + } + } +] +``` + diff --git a/jstests/query_golden/lookup_unwind_complex_match_swap_md.js b/jstests/query_golden/lookup_unwind_complex_match_swap_md.js new file mode 100644 index 00000000000..7046d0a214c --- /dev/null +++ b/jstests/query_golden/lookup_unwind_complex_match_swap_md.js @@ -0,0 +1,195 @@ +/** + * Tests that when the parameter internalQueryPermitMatchSwappingForComplexRenames is set, + * then match will get pushed down into $lookup/$unwind. + * + * This emulates a use case in which an application with a relational schema defines views which + * use $lookup-$unwind to join several tables. Then predicates may be applied on top of the view. + * In this case, we want to make sure that the predicates are pushed down as far as possible to + * the appropriate base collections of the view. + */ + +import {normalizeArray} from "jstests/libs/golden_test.js"; +import {code, linebreak, section, subSection} from "jstests/libs/pretty_md.js"; + +try { + assert.commandWorked(db.adminCommand( + {setParameter: 1, internalQueryPermitMatchSwappingForComplexRenames: true})); + + const coll_a = db.lu_complex_swap_a; + const coll_b = db.lu_complex_swap_b; + const coll_c = db.lu_complex_swap_c; + const view = db.lu_complex_swap_view; + + coll_a.drop(); + coll_b.drop(); + coll_c.drop(); + view.drop(); + + const coll_a_name = coll_a.getName(); + const coll_b_name = coll_b.getName(); + const coll_c_name = coll_c.getName(); + const view_name = view.getName(); + + section('Inserting docs into collection "a":'); + + const a_docs = [ + {_id: 1, b: 4, my_id: 100, m: {c: 42}}, + {_id: 2, b: 4, my_id: 101, m: {}}, + {_id: 3, b: 4, my_id: 100}, + {_id: 4, b: 4, m: {c: null}}, + {_id: 5, b: 4, m: {c: 42, d: "foo"}}, + ]; + code(tojson(a_docs)); + + assert.commandWorked(coll_a.insert(a_docs)); + + section('Inserting docs into collection "b":'); + + const b_docs = [ + {_id: 1, b: 4, indicator: "X"}, + {_id: 2, b: 4, indicator: "Y"}, + {_id: 3, b: 4}, + {_id: 4, b: 4, indicator: {"Z": "Y"}}, + {_id: 5, b: 4, indicator: "Z"}, + ]; + code(tojson(b_docs)); + + assert.commandWorked(coll_b.insert(b_docs)); + + section('Inserting docs into collection "c":'); + + const c_docs = [ + {_id: 1, b: 4, code: "X"}, + {_id: 2, b: 4, other_id: 42, code: "bar"}, + {_id: 3, b: 4, other_id: 42}, + {_id: 4, b: 4, code: "blah"}, + {_id: 5, b: 4, other_id: 20, code: "foo"}, + {_id: 6, b: 4, other_id: {zip: 42, zap: 20}, code: "bar"}, + {_id: 7, b: 4, other_id: {zip: 20, zap: 42}}, + ]; + code(tojson(c_docs)); + + assert.commandWorked(coll_c.insert(c_docs)); + + function runFindOnPipeline(pipeline, queries) { + section("View pipeline"); + code(tojson(pipeline)); + + // Append {$_internalInhibitOptimization: {}} to the front of the pipeline. This prevents + // pushdown into the find layer, which means that we can just print the pipeline (without + // $cursor) to the golden file. + pipeline.unshift({$_internalInhibitOptimization: {}}); + + view.drop(); + assert.commandWorked(db.createView(view_name, coll_a_name, pipeline)); + + for (let query of queries) { + subSection("Query"); + code(tojsononeline(query)); + + // Print the results of the query to the golden file. + subSection("Results"); + code(normalizeArray(view.find(query).toArray())); + + let explain = view.find(query).explain("queryPlanner"); + // Since we prevented pushdown into the find layer, we expect an array of pipeline + // stages to be present in the explain output. + assert(explain.hasOwnProperty("stages"), explain); + + // Drop the first two stages, since we don't need to see the $cursor or + // $_inhibitOptimization in the golden output. + let stages = explain.stages; + assert.gte(stages.length, 3, explain); + stages = stages.slice(2); + subSection("Explain"); + code(tojson(stages)); + linebreak(); + } + } + + let pipeline = [ + {$match: {my_id: 100}}, + {$lookup: {from: coll_b_name, as: "B_data", localField: "b", foreignField: "b"}}, + {$unwind: "$B_data"}, + {$match: {"B_data.indicator": "Y"}}, + {$lookup: {from: coll_c_name, as: "C_data", localField: "b", foreignField: "b"}}, + {$unwind: "$C_data"}, + { + $addFields: { + other_id: "$C_data.other_id", + }, + }, + ]; + + let queries = [{other_id: 42}, {"other_id.zip": 42}]; + + runFindOnPipeline(pipeline, queries); + + pipeline = [ + {$match: {my_id: 100}}, + {$lookup: {from: coll_b_name, as: "B_data", localField: "b", foreignField: "b"}}, + {$unwind: "$B_data"}, + {$match: {"B_data.indicator": "Y"}}, + {$lookup: {from: coll_c_name, as: "C_data", localField: "b", foreignField: "b"}}, + {$unwind: "$C_data"}, + { + $addFields: { + zip: "$C_data.other_id.zip", + }, + }, + ]; + + // We only support "complex renames" where the field path is 2 components long. In this case, + // the field path has three components, so we don't expect the match to be pushed down. + queries = [{zip: 42}]; + runFindOnPipeline(pipeline, queries); + + pipeline = [ + {$match: {my_id: 100}}, + {$lookup: {from: coll_b_name, as: "B_data", localField: "b", foreignField: "b"}}, + {$unwind: "$B_data"}, + {$match: {"B_data.indicator": "Y"}}, + {$lookup: {from: coll_c_name, as: "C_data", localField: "b", foreignField: "b"}}, + {$unwind: "$C_data"}, + {$project: {_id: 1, other_id: "$C_data.other_id", code: 1}}, + ]; + + queries = [{other_id: 42}, {"other_id.zip": 42}]; + + runFindOnPipeline(pipeline, queries); + + pipeline = [ + {$match: {my_id: 100}}, + {$lookup: {from: coll_b_name, as: "B_data", localField: "b", foreignField: "b"}}, + {$unwind: "$B_data"}, + {$match: {"B_data.indicator": "Y"}}, + {$lookup: {from: coll_c_name, as: "C_data", localField: "b", foreignField: "b"}}, + {$unwind: "$C_data"}, + {$project: {_id: 1, zip: "$C_data.other_id.zip", code: 1}}, + ]; + + // Like above, the renamed path is 3 components long, so we don't expect the match to be pushed + // down. + queries = [{zip: 42}]; + runFindOnPipeline(pipeline, queries); + + pipeline = [ + {$match: {my_id: 100}}, + {$lookup: {from: coll_b_name, as: "B_data", localField: "b", foreignField: "b"}}, + {$unwind: "$B_data"}, + {$match: {"B_data.indicator": "Y"}}, + {$lookup: {from: coll_c_name, as: "C_data", localField: "b", foreignField: "b"}}, + {$unwind: "$C_data"}, + {$project: {_id: 0, indicator: "$B_data.indicator", code: "$C_data.code"}}, + ]; + + // In this case, the match should be pushed down through the rename done by the $project. Then + // it should be pushed down past the first second $lookup-$unwind pair and into the subpipeline + // of the first $lookup-$unwind pair. + queries = [{"indicator.Z": "Y"}]; + runFindOnPipeline(pipeline, queries); +} finally { + // Reset the parameter to its default value. + assert.commandWorked(db.adminCommand( + {setParameter: 1, internalQueryPermitMatchSwappingForComplexRenames: false})); +} diff --git a/src/mongo/db/pipeline/document_source_match.cpp b/src/mongo/db/pipeline/document_source_match.cpp index b82b00c0404..613f9dd606b 100644 --- a/src/mongo/db/pipeline/document_source_match.cpp +++ b/src/mongo/db/pipeline/document_source_match.cpp @@ -44,6 +44,7 @@ #include "mongo/db/pipeline/lite_parsed_document_source.h" #include "mongo/db/pipeline/semantic_analysis.h" #include "mongo/db/query/allowed_contexts.h" +#include "mongo/db/query/query_knobs_gen.h" #include "mongo/util/assert_util.h" #include "mongo/util/str.h" @@ -550,7 +551,24 @@ DocumentSourceMatch::splitMatchByModifiedFields( const boost::intrusive_ptr& match, const DocumentSource::GetModPathsReturn& modifiedPathsRet) { // Attempt to move some or all of this $match before this stage. - OrderedPathSet modifiedPaths; + OrderedPathSet modifiedPaths = modifiedPathsRet.paths; + auto renames = modifiedPathsRet.renames; + + // A "complex rename" is a rename-like operation which involves a dotted path, such as + // "a":"$b.c". If "b" is an array, then this is not a rename but a reshaping operation. + // Therefore, the typical behavior of getModifiedPaths() is to report "a" as a modified path and + // "a" -> "b.c" as a complex rename. + // + // When match swapping is permitted for complex renames we must reclassify "a":"$b.c" as a + // regular rename. This is done by removing "a" from the set of modified paths and adding "a" -> + // "b.c" to the renames map. + if (internalQueryPermitMatchSwappingForComplexRenames.load()) { + for (auto&& complexRename : modifiedPathsRet.complexRenames) { + renames[complexRename.first] = complexRename.second; + modifiedPaths.erase(complexRename.first); + } + } + switch (modifiedPathsRet.type) { case DocumentSource::GetModPathsReturn::Type::kNotSupported: // We don't know what paths this stage might modify, so refrain from swapping. @@ -559,14 +577,13 @@ DocumentSourceMatch::splitMatchByModifiedFields( // This stage modifies all paths, so cannot be swapped with a $match at all. return {nullptr, match}; case DocumentSource::GetModPathsReturn::Type::kFiniteSet: - modifiedPaths = modifiedPathsRet.paths; break; case DocumentSource::GetModPathsReturn::Type::kAllExcept: { DepsTracker depsTracker; match->getDependencies(&depsTracker); - auto preservedPaths = modifiedPathsRet.paths; - for (auto&& rename : modifiedPathsRet.renames) { + auto preservedPaths = modifiedPaths; + for (auto&& rename : renames) { preservedPaths.insert(rename.first); } modifiedPaths = @@ -574,7 +591,7 @@ DocumentSourceMatch::splitMatchByModifiedFields( .modified; } } - return std::move(*match).splitSourceBy(modifiedPaths, modifiedPathsRet.renames); + return std::move(*match).splitSourceBy(modifiedPaths, renames); } intrusive_ptr DocumentSourceMatch::create( diff --git a/src/mongo/db/query/query_knobs.idl b/src/mongo/db/query/query_knobs.idl index aeaa1b36055..be2a09ef0f5 100644 --- a/src/mongo/db/query/query_knobs.idl +++ b/src/mongo/db/query/query_knobs.idl @@ -1743,6 +1743,21 @@ server_parameters: redact: false on_update: plan_cache_util::clearSbeCacheOnParameterChange + internalQueryPermitMatchSwappingForComplexRenames: + description: + "When enabled, the system assumes that a projection like a:'$b.c' is a renaming + operation. In the absence of this flag, it is possible for 'b' to be an array -- in + which case this projection does not just rename the field but reshapes the structure + of the document. This should be used with caution, as it will cause an incorrect rewrite + for queries that actually wish to perform this reshaping operation. The rewrite is + limited to field paths of length 2; a projection like a:'$b.c.d' is always treated as a + reshaping operation." + set_at: [startup, runtime] + cpp_varname: "internalQueryPermitMatchSwappingForComplexRenames" + cpp_vartype: AtomicWord + default: false + redact: false + # TODO SERVER-85426 Remove this knob. bypassRankFusionFCVGate: description: "If enabled, bypasses FCV-gating for featureFlagRankFusionBasic and featureFlagRankFusionFull."