diff --git a/buildscripts/resmokeconfig/suites/query_join_optimization_passthrough.yml b/buildscripts/resmokeconfig/suites/query_join_optimization_passthrough.yml index 1abd05a14c5..b2ca1d6e686 100644 --- a/buildscripts/resmokeconfig/suites/query_join_optimization_passthrough.yml +++ b/buildscripts/resmokeconfig/suites/query_join_optimization_passthrough.yml @@ -43,10 +43,6 @@ selector: - jstests/core/timeseries/**/*.js # TODO SERVER-114739 Join optimization: collation disregarded when joining - jstests/sharding/query/collation/collation_lookup.js - # TODO SERVER-113718 Do not use join optimizer path if join predicates/fields operate over arrays - - jstests/core/query/release_memory/hash_lookup_unwind.js - - jstests/aggregation/sources/match/sbe_non_leading_match_pbt.js - - jstests/aggregation/sources/lookup/lookup_unwind_equijoin.js # TODO SERVER-118416 Fix join reordering tripwire assertions for lookup+unwind pipelines and re-enable lookup_unwind_pbt PBT - jstests/aggregation/sources/lookup/lookup_unwind_pbt.js # Shard level user writes block is not supported on standalone. diff --git a/jstests/auth/lib/commands_lib.js b/jstests/auth/lib/commands_lib.js index cf12306e3eb..4b70325aefa 100644 --- a/jstests/auth/lib/commands_lib.js +++ b/jstests/auth/lib/commands_lib.js @@ -8645,8 +8645,12 @@ export const authCommandsLib = { testcases: testcases_transformationOnly, skipTest: (conn) => { // Can't run on mongos. Also, $_internalJoinHint requires join optimization which - // is unavailable when the classic engine is forced. - return !isStandalone(conn) || isForceClassicEngine(conn); + // is unavailable when the classic engine is forced, or the path arrayness feature is disabled. + return ( + !isStandalone(conn) || + isForceClassicEngine(conn) || + !isFeatureEnabled(conn, "featureFlagPathArrayness") + ); }, setup: function (db) { // Only works with join optimization enabled. @@ -8656,6 +8660,8 @@ export const authCommandsLib = { internalEnableJoinOptimization: true, }), ); + // Need an index for multikeyness info. + assert.commandWorked(db.foo.createIndex({dummy: -1, i: 1})); // Add a document to collection "foo". assert.commandWorked(db.foo.insertOne({_id: 0, i: 0})); }, @@ -8666,7 +8672,8 @@ export const authCommandsLib = { internalEnableJoinOptimization: false, }), ); - // Clean up doc. + // Clean up doc & index. + assert.commandWorked(db.foo.dropIndex({dummy: -1, i: 1})); assert.commandWorked(db.foo.deleteOne({_id: 0, i: 0})); }, }, diff --git a/jstests/noPassthrough/query/joins/arrayness.js b/jstests/noPassthrough/query/joins/arrayness.js new file mode 100644 index 00000000000..cd644024d11 --- /dev/null +++ b/jstests/noPassthrough/query/joins/arrayness.js @@ -0,0 +1,978 @@ +/** + * End to end test for join optimization being enabled iff no join predicate fields may contain arrays. + * + * @tags: [ + * requires_fcv_90, + * requires_sbe + * ] + */ + +import {runTestWithUnorderedComparison, joinTestWrapper} from "jstests/libs/query/join_utils.js"; + +// Must enable path arrayness tracking for this test. +const conn = MongoRunner.runMongod({setParameter: "featureFlagPathArrayness=true"}); +const db = conn.getDB(`${jsTestName()}_db`); + +joinTestWrapper(db, function runArraynessTest() { + assert.commandWorked( + db.adminCommand({setParameter: 1, internalEnableJoinOptimization: true, internalEnablePathArrayness: true}), + ); + + const c1 = db.c1; + const c2 = db.c2; + const c3 = db.c3; + + c1.drop(); + c2.drop(); + c3.drop(); + + assert.commandWorked( + c1.insertMany([ + { + _id: 0, + alwaysArray: [], + sometimesArray: 3, + neverArray: 1, + obj: {array: [1, 2, 3], scalar: 1}, + }, + { + _id: 1, + alwaysArray: [1, 2, 3], + sometimesArray: 2, + neverArray: 1, + obj: {}, + }, + { + _id: 2, + alwaysArray: [2, 3], + sometimesArray: [3, 4], + neverArray: 1, + obj: {array: [], scalar: 2}, + }, + ]), + ); + + assert.commandWorked( + c2.insertMany([ + {_id: 0, a: 1}, + {_id: 1, a: 2}, + {_id: 2, a: 3}, + ]), + ); + + assert.commandWorked( + c3.insertMany([ + {_id: 0, a: 1, obj: {array: [1, 2, 3], scalar: 1}}, + {_id: 1, a: 2, obj: {array: [], scalar: 2}}, + {_id: 2, a: 3, obj: {}}, + ]), + ); + + runTestWithUnorderedComparison({ + db, + description: "No arrayness (no indexes) => no joinopt (2 node, no suffix)", + coll: c1, + pipeline: [ + { + $lookup: { + from: c2.getName(), + localField: "neverArray", + foreignField: "a", + as: "x", + }, + }, + {$unwind: "$x"}, + ], + expectedResults: [ + { + "_id": 0, + "alwaysArray": [], + "sometimesArray": 3, + "neverArray": 1, + "x": {"_id": 0, "a": 1}, + obj: {array: [1, 2, 3], scalar: 1}, + }, + { + "_id": 1, + "alwaysArray": [1, 2, 3], + "sometimesArray": 2, + "neverArray": 1, + "x": {"_id": 0, "a": 1}, + obj: {}, + }, + { + "_id": 2, + "alwaysArray": [2, 3], + "sometimesArray": [3, 4], + "neverArray": 1, + "x": {"_id": 0, "a": 1}, + obj: {array: [], scalar: 2}, + }, + ], + expectedUsedJoinOptimization: false, + }); + + runTestWithUnorderedComparison({ + db, + description: "No arrayness => no joinopt ($expr, 2 node, no suffix)", + coll: c1, + pipeline: [ + { + $lookup: { + from: c2.getName(), + as: "x", + let: {neverArray: "$neverArray"}, + pipeline: [{$match: {$expr: {$eq: ["$a", "$$neverArray"]}}}], + }, + }, + {$unwind: "$x"}, + ], + expectedResults: [ + { + "_id": 0, + "alwaysArray": [], + "sometimesArray": 3, + "neverArray": 1, + "obj": { + "array": [1, 2, 3], + "scalar": 1, + }, + "x": { + "_id": 0, + "a": 1, + }, + }, + { + "_id": 1, + "alwaysArray": [1, 2, 3], + "sometimesArray": 2, + "neverArray": 1, + "obj": {}, + "x": { + "_id": 0, + "a": 1, + }, + }, + { + "_id": 2, + "alwaysArray": [2, 3], + "sometimesArray": [3, 4], + "neverArray": 1, + "obj": { + "array": [], + "scalar": 2, + }, + "x": { + "_id": 0, + "a": 1, + }, + }, + ], + expectedUsedJoinOptimization: false, + }); + + runTestWithUnorderedComparison({ + db, + description: "No arrayness => no joinopt (2 node, suffix)", + coll: c1, + pipeline: [ + { + $lookup: { + from: c2.getName(), + localField: "neverArray", + foreignField: "a", + as: "x", + }, + }, + {$unwind: "$x"}, + {$project: {_id: 0, obj: 0}}, + ], + expectedResults: [ + {"alwaysArray": [], "sometimesArray": 3, "neverArray": 1, "x": {"_id": 0, "a": 1}}, + {"alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1, "x": {"_id": 0, "a": 1}}, + {"alwaysArray": [2, 3], "sometimesArray": [3, 4], "neverArray": 1, "x": {"_id": 0, "a": 1}}, + ], + expectedUsedJoinOptimization: false, + }); + + runTestWithUnorderedComparison({ + db, + description: "No arrayness => no joinopt ($expr, 2 node, suffix)", + coll: c1, + pipeline: [ + { + $lookup: { + from: c2.getName(), + as: "x", + let: {neverArray: "$neverArray"}, + pipeline: [{$match: {$expr: {$eq: ["$$neverArray", "$a"]}}}], + }, + }, + {$unwind: "$x"}, + {$project: {_id: 0, obj: 0}}, + ], + expectedResults: [ + {"alwaysArray": [], "sometimesArray": 3, "neverArray": 1, "x": {"_id": 0, "a": 1}}, + {"alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1, "x": {"_id": 0, "a": 1}}, + {"alwaysArray": [2, 3], "sometimesArray": [3, 4], "neverArray": 1, "x": {"_id": 0, "a": 1}}, + ], + expectedUsedJoinOptimization: false, + }); + + assert.commandWorked(c1.createIndex({neverArray: 1})); + + runTestWithUnorderedComparison({ + db, + description: "No arrayness on foreign field => no joinopt (2 node, suffix)", + coll: c1, + pipeline: [ + { + $lookup: { + from: c2.getName(), + localField: "neverArray", + foreignField: "a", + as: "x", + }, + }, + {$unwind: "$x"}, + {$project: {_id: 0, obj: 0}}, + ], + expectedResults: [ + {"alwaysArray": [], "sometimesArray": 3, "neverArray": 1, "x": {"_id": 0, "a": 1}}, + {"alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1, "x": {"_id": 0, "a": 1}}, + {"alwaysArray": [2, 3], "sometimesArray": [3, 4], "neverArray": 1, "x": {"_id": 0, "a": 1}}, + ], + expectedUsedJoinOptimization: false, + }); + + runTestWithUnorderedComparison({ + db, + description: "No arrayness on foreign field => no joinopt ($expr, 2 node, suffix)", + coll: c1, + pipeline: [ + { + $lookup: { + from: c2.getName(), + as: "x", + let: {neverArray: "$neverArray"}, + pipeline: [{$match: {$expr: {$eq: ["$$neverArray", "$a"]}}}], + }, + }, + {$unwind: "$x"}, + {$project: {_id: 0, obj: 0}}, + ], + expectedResults: [ + {"alwaysArray": [], "sometimesArray": 3, "neverArray": 1, "x": {"_id": 0, "a": 1}}, + {"alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1, "x": {"_id": 0, "a": 1}}, + {"alwaysArray": [2, 3], "sometimesArray": [3, 4], "neverArray": 1, "x": {"_id": 0, "a": 1}}, + ], + expectedUsedJoinOptimization: false, + }); + + assert.commandWorked(c2.createIndex({a: 1})); + + runTestWithUnorderedComparison({ + db, + description: "No arrayness on local field => no joinopt (2 node, no suffix)", + coll: c1, + pipeline: [ + { + $lookup: { + from: c2.getName(), + localField: "sometimesArray", + foreignField: "a", + as: "x", + }, + }, + {$unwind: "$x"}, + ], + expectedResults: [ + { + "_id": 0, + "alwaysArray": [], + "sometimesArray": 3, + "neverArray": 1, + "obj": { + "array": [1, 2, 3], + "scalar": 1, + }, + "x": { + "_id": 2, + "a": 3, + }, + }, + { + "_id": 1, + "alwaysArray": [1, 2, 3], + "sometimesArray": 2, + "neverArray": 1, + "obj": {}, + "x": { + "_id": 1, + "a": 2, + }, + }, + { + "_id": 2, + "alwaysArray": [2, 3], + "sometimesArray": [3, 4], + "neverArray": 1, + "obj": { + "array": [], + "scalar": 2, + }, + "x": { + "_id": 2, + "a": 3, + }, + }, + ], + expectedUsedJoinOptimization: false, + }); + + runTestWithUnorderedComparison({ + db, + description: "No arrayness on local field => no joinopt ($expr, 2 node, no suffix)", + coll: c1, + pipeline: [ + { + $lookup: { + from: c2.getName(), + as: "x", + let: {sometimesArray: "$sometimesArray"}, + pipeline: [{$match: {$expr: {$eq: ["$$sometimesArray", "$a"]}}}], + }, + }, + {$unwind: "$x"}, + ], + expectedResults: [ + { + "_id": 0, + "alwaysArray": [], + "sometimesArray": 3, + "neverArray": 1, + "obj": { + "array": [1, 2, 3], + "scalar": 1, + }, + "x": { + "_id": 2, + "a": 3, + }, + }, + { + "_id": 1, + "alwaysArray": [1, 2, 3], + "sometimesArray": 2, + "neverArray": 1, + "obj": {}, + "x": { + "_id": 1, + "a": 2, + }, + }, + ], + expectedUsedJoinOptimization: false, + }); + + runTestWithUnorderedComparison({ + db, + description: "Arrayness on all fields => join opt (2 node, suffix)", + coll: c1, + pipeline: [ + { + $lookup: { + from: c2.getName(), + localField: "neverArray", + foreignField: "a", + as: "x", + }, + }, + {$unwind: "$x"}, + {$project: {_id: 0, obj: 0}}, + ], + expectedResults: [ + {"alwaysArray": [], "sometimesArray": 3, "neverArray": 1, "x": {"_id": 0, "a": 1}}, + {"alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1, "x": {"_id": 0, "a": 1}}, + {"alwaysArray": [2, 3], "sometimesArray": [3, 4], "neverArray": 1, "x": {"_id": 0, "a": 1}}, + ], + expectedUsedJoinOptimization: true, + expectedNumJoinStages: 1, + }); + + runTestWithUnorderedComparison({ + db, + description: "Arrayness on all fields => join opt ($expr, 2 node, suffix)", + coll: c1, + pipeline: [ + { + $lookup: { + from: c2.getName(), + as: "x", + let: {na: "$neverArray"}, + pipeline: [{$match: {$expr: {$eq: ["$$na", "$a"]}}}], + }, + }, + {$unwind: "$x"}, + {$project: {_id: 0, obj: 0}}, + ], + expectedResults: [ + {"alwaysArray": [], "sometimesArray": 3, "neverArray": 1, "x": {"_id": 0, "a": 1}}, + {"alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1, "x": {"_id": 0, "a": 1}}, + {"alwaysArray": [2, 3], "sometimesArray": [3, 4], "neverArray": 1, "x": {"_id": 0, "a": 1}}, + ], + expectedUsedJoinOptimization: true, + expectedNumJoinStages: 1, + }); + + runTestWithUnorderedComparison({ + db, + description: "Arrayness on all fields => join opt (2 node, suffix)", + coll: c1, + pipeline: [ + { + $lookup: { + from: c2.getName(), + localField: "neverArray", + foreignField: "a", + as: "sometimesArray", // Arrayness of "as" field doesn't matter. + }, + }, + {$unwind: "$sometimesArray"}, + {$project: {_id: 0, obj: 0}}, + ], + expectedResults: [ + {"alwaysArray": [], "sometimesArray": {"_id": 0, "a": 1}, "neverArray": 1}, + {"alwaysArray": [1, 2, 3], "sometimesArray": {"_id": 0, "a": 1}, "neverArray": 1}, + {"alwaysArray": [2, 3], "sometimesArray": {"_id": 0, "a": 1}, "neverArray": 1}, + ], + expectedUsedJoinOptimization: true, + expectedNumJoinStages: 1, + }); + + runTestWithUnorderedComparison({ + db, + description: "Arrayness on all fields => join opt ($expr, 2 node, suffix)", + coll: c1, + pipeline: [ + { + $lookup: { + from: c2.getName(), + as: "sometimesArray", // Arrayness of "as" field doesn't matter. + let: {na: "$neverArray"}, + pipeline: [{$match: {$expr: {$eq: ["$$na", "$a"]}}}], + }, + }, + {$unwind: "$sometimesArray"}, + {$project: {_id: 0, obj: 0}}, + ], + expectedResults: [ + {"alwaysArray": [], "sometimesArray": {"_id": 0, "a": 1}, "neverArray": 1}, + {"alwaysArray": [1, 2, 3], "sometimesArray": {"_id": 0, "a": 1}, "neverArray": 1}, + {"alwaysArray": [2, 3], "sometimesArray": {"_id": 0, "a": 1}, "neverArray": 1}, + ], + expectedUsedJoinOptimization: true, + expectedNumJoinStages: 1, + }); + + assert.commandWorked(c1.createIndexes([{sometimesArray: -1}, {alwaysArray: 1}])); + + runTestWithUnorderedComparison({ + db, + description: "Arrayness on all fields, multikey localField => no join opt (2 node, suffix)", + coll: c1, + pipeline: [ + { + $lookup: { + from: c2.getName(), + localField: "sometimesArray", + foreignField: "a", + as: "y", + }, + }, + {$unwind: "$y"}, + {$project: {_id: 0, obj: 0}}, + ], + expectedResults: [ + {"alwaysArray": [], "sometimesArray": 3, "neverArray": 1, "y": {"_id": 2, "a": 3}}, + {"alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1, "y": {"_id": 1, "a": 2}}, + {"alwaysArray": [2, 3], "sometimesArray": [3, 4], "neverArray": 1, "y": {"_id": 2, "a": 3}}, + ], + expectedUsedJoinOptimization: false, + }); + + runTestWithUnorderedComparison({ + db, + description: "Arrayness on all fields, multikey localField => no join opt ($expr, 2 node, suffix)", + coll: c1, + pipeline: [ + { + $lookup: { + from: c2.getName(), + as: "y", + let: {sa: "$sometimesArray"}, + pipeline: [{$match: {$expr: {$eq: ["$a", "$$sa"]}}}], + }, + }, + {$unwind: "$y"}, + {$project: {_id: 0, obj: 0}}, + ], + expectedResults: [ + {"alwaysArray": [], "sometimesArray": 3, "neverArray": 1, "y": {"_id": 2, "a": 3}}, + {"alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1, "y": {"_id": 1, "a": 2}}, + ], + expectedUsedJoinOptimization: false, + }); + + runTestWithUnorderedComparison({ + db, + description: "Arrayness on all fields, multikey foreignField => no join opt (2 node, suffix)", + coll: c2, + pipeline: [ + { + $lookup: { + from: c1.getName(), + localField: "a", + foreignField: "alwaysArray", + as: "y", + }, + }, + {$unwind: "$y"}, + {$project: {_id: 0, "y.obj": 0}}, + ], + expectedResults: [ + {"a": 1, "y": {"_id": 1, "alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1}}, + {"a": 2, "y": {"_id": 1, "alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1}}, + {"a": 2, "y": {"_id": 2, "alwaysArray": [2, 3], "sometimesArray": [3, 4], "neverArray": 1}}, + {"a": 3, "y": {"_id": 1, "alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1}}, + {"a": 3, "y": {"_id": 2, "alwaysArray": [2, 3], "sometimesArray": [3, 4], "neverArray": 1}}, + ], + expectedUsedJoinOptimization: false, + }); + + runTestWithUnorderedComparison({ + db, + description: "Arrayness on all fields, multikey foreignField => no join opt ($expr, 2 node, suffix)", + coll: c2, + pipeline: [ + { + $lookup: { + from: c1.getName(), + as: "y", + let: {aaa: "$a"}, + pipeline: [{$match: {$expr: {$eq: ["$alwaysArray", "$$aaa"]}}}], + }, + }, + {$unwind: "$y"}, + {$project: {_id: 0, obj: 0}}, + ], + expectedResults: [], + expectedUsedJoinOptimization: false, + }); + + runTestWithUnorderedComparison({ + db, + description: "Arrayness on all fields, multikey foreignField/localField => no join opt (2 node, suffix)", + coll: c1, + pipeline: [ + { + $lookup: { + from: c1.getName(), + localField: "sometimesArray", + foreignField: "alwaysArray", + as: "y", + }, + }, + {$unwind: "$y"}, + {$project: {_id: 0, obj: 0, "y.obj": 0}}, + ], + expectedResults: [ + { + "alwaysArray": [], + "sometimesArray": 3, + "neverArray": 1, + "y": {"_id": 1, "alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1}, + }, + { + "alwaysArray": [], + "sometimesArray": 3, + "neverArray": 1, + "y": {"_id": 2, "alwaysArray": [2, 3], "sometimesArray": [3, 4], "neverArray": 1}, + }, + { + "alwaysArray": [1, 2, 3], + "sometimesArray": 2, + "neverArray": 1, + "y": {"_id": 1, "alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1}, + }, + { + "alwaysArray": [1, 2, 3], + "sometimesArray": 2, + "neverArray": 1, + "y": {"_id": 2, "alwaysArray": [2, 3], "sometimesArray": [3, 4], "neverArray": 1}, + }, + { + "alwaysArray": [2, 3], + "sometimesArray": [3, 4], + "neverArray": 1, + "y": {"_id": 1, "alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1}, + }, + { + "alwaysArray": [2, 3], + "sometimesArray": [3, 4], + "neverArray": 1, + "y": {"_id": 2, "alwaysArray": [2, 3], "sometimesArray": [3, 4], "neverArray": 1}, + }, + ], + expectedUsedJoinOptimization: false, + }); + + runTestWithUnorderedComparison({ + db, + description: "Arrayness on all fields, multikey foreignField/localField => no join opt ($expr, 2 node, suffix)", + coll: c1, + pipeline: [ + { + $lookup: { + from: c1.getName(), + as: "y", + let: {sa: "$sometimesArray"}, + pipeline: [{$match: {$expr: {$eq: ["$alwaysArray", "$$sa"]}}}], + }, + }, + {$unwind: "$y"}, + ], + expectedResults: [], + expectedUsedJoinOptimization: false, + }); + + // Ensure we have arrayness info for c3 & obj field in c1. + assert.commandWorked(c3.createIndex({a: -1, obj: 1})); + assert.commandWorked(c1.createIndex({obj: 1})); + + runTestWithUnorderedComparison({ + db, + description: "As field has an array subfield, used in subsequent join => no join opt in suffix", + coll: c2, + pipeline: [ + // This is ok, should use join opt. + { + $lookup: { + from: c1.getName(), + localField: "a", + foreignField: "neverArray", + as: "y", + }, + }, + {$unwind: "$y"}, + // Prefix should end here: next predicate involves an array. + { + $lookup: { + from: c3.getName(), + localField: "y.sometimesArray", + foreignField: "a", + as: "z", + }, + }, + {$unwind: "$z"}, + {$project: {"y.obj": 0, "z.obj": 0}}, + ], + expectedResults: [ + { + "_id": 0, + "a": 1, + "y": { + "_id": 0, + "alwaysArray": [], + "sometimesArray": 3, + "neverArray": 1, + }, + "z": { + "_id": 2, + "a": 3, + }, + }, + { + "_id": 0, + "a": 1, + "y": { + "_id": 1, + "alwaysArray": [1, 2, 3], + "sometimesArray": 2, + "neverArray": 1, + }, + "z": { + "_id": 1, + "a": 2, + }, + }, + { + "_id": 0, + "a": 1, + "y": { + "_id": 2, + "alwaysArray": [2, 3], + "sometimesArray": [3, 4], + "neverArray": 1, + }, + "z": { + "_id": 2, + "a": 3, + }, + }, + ], + expectedUsedJoinOptimization: true, + expectedNumJoinStages: 1, // We should not see a second! + }); + + runTestWithUnorderedComparison({ + db, + description: "As field has scalar subfield, used in subsequent join => no join opt in suffix", + coll: c2, + pipeline: [ + // This is ok, should use join opt. + { + $lookup: { + from: c1.getName(), + as: "y", + let: {aaa: "$a"}, + pipeline: [{$match: {$expr: {$eq: ["$$aaa", "$neverArray"]}}}], + }, + }, + {$unwind: "$y"}, + // The following is as well! We should have the whole pipeline in our eligible prefix. + { + $lookup: { + from: c3.getName(), + localField: "y.neverArray", + foreignField: "a", + as: "z", + }, + }, + {$unwind: "$z"}, + {$project: {"y.obj": 0, "z.obj": 0}}, + ], + expectedResults: [ + { + "_id": 0, + "a": 1, + "y": { + "_id": 0, + "alwaysArray": [], + "sometimesArray": 3, + "neverArray": 1, + }, + "z": { + "_id": 0, + "a": 1, + }, + }, + { + "_id": 0, + "a": 1, + "y": { + "_id": 1, + "alwaysArray": [1, 2, 3], + "sometimesArray": 2, + "neverArray": 1, + }, + "z": { + "_id": 0, + "a": 1, + }, + }, + { + "_id": 0, + "a": 1, + "y": { + "_id": 2, + "alwaysArray": [2, 3], + "sometimesArray": [3, 4], + "neverArray": 1, + }, + "z": { + "_id": 0, + "a": 1, + }, + }, + ], + expectedUsedJoinOptimization: true, + expectedNumJoinStages: 2, // Both $lookups should be pushed down! + }); + + runTestWithUnorderedComparison({ + db, + description: "Test arrayness check works for subfields + compound join predicates.", + coll: c2, + pipeline: [ + // This is ok, should use join opt. + { + $lookup: { + from: c1.getName(), + as: "y", + let: {aaa: "$a"}, + pipeline: [ + { + $match: { + $expr: { + $and: [ + {$eq: ["$$aaa", "$neverArray"]}, + {$gt: ["$sometimesArray", 0]}, // Residual predicate, should still be ok. + ], + }, + }, + }, + ], + }, + }, + {$unwind: "$y"}, + // The following should be ok as well. + { + $lookup: { + from: c3.getName(), + as: "z", + let: {ooo: "$y.obj"}, + pipeline: [{$match: {$expr: {$eq: ["$obj", "$$ooo"]}}}], + }, + }, + {$unwind: "$z"}, + // But not this (since we don't have arrayness for obj.scalar). + { + $lookup: { + from: c1.getName(), + as: "w", + let: {ooo: "$z.obj.scalar"}, + pipeline: [{$match: {$expr: {$eq: ["$neverArray", "$$ooo"]}}}], + }, + }, + {$unwind: "$w"}, + ], + expectedResults: [ + { + "_id": 0, + "a": 1, + "y": { + "_id": 0, + "alwaysArray": [], + "sometimesArray": 3, + "neverArray": 1, + "obj": { + "array": [1, 2, 3], + "scalar": 1, + }, + }, + "z": { + "_id": 0, + "a": 1, + "obj": { + "array": [1, 2, 3], + "scalar": 1, + }, + }, + "w": { + "_id": 0, + "alwaysArray": [], + "sometimesArray": 3, + "neverArray": 1, + "obj": { + "array": [1, 2, 3], + "scalar": 1, + }, + }, + }, + { + "_id": 0, + "a": 1, + "y": { + "_id": 0, + "alwaysArray": [], + "sometimesArray": 3, + "neverArray": 1, + "obj": { + "array": [1, 2, 3], + "scalar": 1, + }, + }, + "z": { + "_id": 0, + "a": 1, + "obj": { + "array": [1, 2, 3], + "scalar": 1, + }, + }, + "w": { + "_id": 1, + "alwaysArray": [1, 2, 3], + "sometimesArray": 2, + "neverArray": 1, + "obj": {}, + }, + }, + { + "_id": 0, + "a": 1, + "y": { + "_id": 0, + "alwaysArray": [], + "sometimesArray": 3, + "neverArray": 1, + "obj": { + "array": [1, 2, 3], + "scalar": 1, + }, + }, + "z": { + "_id": 0, + "a": 1, + "obj": { + "array": [1, 2, 3], + "scalar": 1, + }, + }, + "w": { + "_id": 2, + "alwaysArray": [2, 3], + "sometimesArray": [3, 4], + "neverArray": 1, + "obj": { + "array": [], + "scalar": 2, + }, + }, + }, + ], + expectedUsedJoinOptimization: true, + expectedNumJoinStages: 2, + }); + + // Disabling internalEnablePathArrayness should prevent join optimization from using arrayness + // info, so a query that previously qualified for joinopt must no longer qualify. + assert.commandWorked(db.adminCommand({setParameter: 1, internalEnablePathArrayness: false})); + + runTestWithUnorderedComparison({ + db, + description: "internalEnablePathArrayness=false => no joinopt even when arrayness is known", + coll: c1, + pipeline: [ + { + $lookup: { + from: c2.getName(), + localField: "neverArray", + foreignField: "a", + as: "x", + }, + }, + {$unwind: "$x"}, + {$project: {_id: 0, obj: 0}}, + ], + expectedResults: [ + {"alwaysArray": [], "sometimesArray": 3, "neverArray": 1, "x": {"_id": 0, "a": 1}}, + {"alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1, "x": {"_id": 0, "a": 1}}, + {"alwaysArray": [2, 3], "sometimesArray": [3, 4], "neverArray": 1, "x": {"_id": 0, "a": 1}}, + ], + expectedUsedJoinOptimization: false, + }); +}); + +MongoRunner.stopMongod(conn); diff --git a/jstests/noPassthrough/query/lookup/lookup_sharded_no_join_opt.js b/jstests/noPassthrough/query/lookup/lookup_sharded_no_join_opt.js index 2f841e54c08..b6e15367af5 100644 --- a/jstests/noPassthrough/query/lookup/lookup_sharded_no_join_opt.js +++ b/jstests/noPassthrough/query/lookup/lookup_sharded_no_join_opt.js @@ -2,8 +2,9 @@ // Validate that join optimization does not run on sharded collections. // // @tags: [ -// requires_fcv_83, -// requires_sbe +// requires_fcv_90, +// requires_sbe, +// featureFlagPathArrayness // ] // import {joinOptUsed} from "jstests/libs/query/join_utils.js"; @@ -30,10 +31,10 @@ const db = sharded.getDB("test"); sharded.shard0.getDB("test").setLogLevel(5, "query"); sharded.shard1.getDB("test").setLogLevel(5, "query"); const docs = [{f1: "aaa", f2: 123}, {f1: "bbb", f2: 0}, {f2: -1}, {f1: "zzz"}]; -assert.commandWorked(db["coll1"].insertMany(docs)); -assert.commandWorked(db["coll2"].insertMany(docs)); -assert.commandWorked(db["coll3"].insertMany(docs)); -assert.commandWorked(db["coll4"].insertMany(docs)); +for (const coll of ["coll1", "coll2", "coll3", "coll4"]) { + assert.commandWorked(db[coll].insertMany(docs)); + assert.commandWorked(db[coll].createIndex({"dummy": 1, "f1": 1, "f2": -1})); +} // Ensure join optimization is disabled. assert(sharded.shard0.getDB("test").adminCommand({setParameter: 1, internalEnableJoinOptimization: false})); diff --git a/jstests/query_golden/join_cardinality_estimation_md.js b/jstests/query_golden/join_cardinality_estimation_md.js index c5ccbeed889..dcfa89305cf 100644 --- a/jstests/query_golden/join_cardinality_estimation_md.js +++ b/jstests/query_golden/join_cardinality_estimation_md.js @@ -1,7 +1,9 @@ // // Test that the cardinality estimates for two-table joins approximate reality // @tags: [ -// requires_sbe +// requires_sbe, +// featureFlagPathArrayness, +// requires_fcv_90 // ] // @@ -33,6 +35,7 @@ function populate() { }); } + db.many_rows.drop(); db.many_rows.insertMany(documents); db.many_rows.createIndex({i_idx: 1}); db.many_rows.createIndex({i_idx_offset: 1}); @@ -40,16 +43,23 @@ function populate() { db.many_rows.createIndex({c_idx: 1}); db.many_rows.createIndex({d_idx: 1}); db.many_rows.createIndex({n_idx: 1}); + // Not used in planning, but needed for multikeyness info. + db.many_rows.createIndex({dummy: 1, i_noidx: -1, missing_field: 1}); // An empty collection + db.no_rows.drop(); db.no_rows.createIndex({i_idx: 1}); + // Not used in planning, but needed for multikeyness info. + db.no_rows.createIndex({dummy: 1, i_idx_offset: -1}); // Collection with a single row + db.one_row.drop(); db.one_row.insert({i_idx: 1}); db.one_row.createIndex({i_idx: 1}); // Collection with 1 non-null document const nullDocuments = []; + db.mostly_nulls.drop(); db.mostly_nulls.insert({i_idx: 1}); for (let i = 0; i < collSize; i++) { nullDocuments.push({ diff --git a/jstests/query_golden/join_opt/hint_md.js b/jstests/query_golden/join_opt/hint_md.js index afeeb9d47e4..dc70cd4fc77 100644 --- a/jstests/query_golden/join_opt/hint_md.js +++ b/jstests/query_golden/join_opt/hint_md.js @@ -2,8 +2,9 @@ * Tests hinting joins orders. * * @tags: [ - * requires_fcv_83, - * requires_sbe + * requires_fcv_90, + * requires_sbe, + * featureFlagPathArrayness * ] */ import {normalizeArray} from "jstests/libs/query_optimization/golden_test.js"; diff --git a/jstests/query_golden/join_opt/null_semantics_md.js b/jstests/query_golden/join_opt/null_semantics_md.js index 452ff777134..101a13464fa 100644 --- a/jstests/query_golden/join_opt/null_semantics_md.js +++ b/jstests/query_golden/join_opt/null_semantics_md.js @@ -37,7 +37,7 @@ const thirdColl = db[jsTestName() + "_third"]; thirdColl.drop(); assert.commandWorked(thirdColl.insertMany(testDocs)); // Add index for multikeyness info for path arrayness. -assert.commandWorked(thirdColl.createIndex({dummy: 1, "key.foo": 1})); +assert.commandWorked(thirdColl.createIndex({dummy: 1, "foo": 1, "key.foo": 1})); const testCases = [ { diff --git a/jstests/query_golden/logs_spilling_md.js b/jstests/query_golden/logs_spilling_md.js index e033d50c63b..e2989864ee2 100644 --- a/jstests/query_golden/logs_spilling_md.js +++ b/jstests/query_golden/logs_spilling_md.js @@ -316,6 +316,9 @@ const animalsDocs = [ assert.commandWorked(locations.insertMany(locationsDocs)); assert.commandWorked(animals.insertMany(animalsDocs)); +// Dummy indexes used for multikeyness info by join opt. +assert.commandWorked(animals.createIndex({"dummy": -1, "locationName": -1})); +assert.commandWorked(locations.createIndex({"dummy": 1, "name": -1})); outputPipelineAndSlowQueryLog( animals, [ diff --git a/src/mongo/db/BUILD.bazel b/src/mongo/db/BUILD.bazel index 1ef80f3a3b6..578e16d9d77 100644 --- a/src/mongo/db/BUILD.bazel +++ b/src/mongo/db/BUILD.bazel @@ -974,6 +974,7 @@ mongo_cc_library( "//src/mongo/db/query/compiler/ce/sampling:ce_multikey_dotted_path_support", "//src/mongo/db/query/compiler/ce/sampling:sampling_estimator_interface", "//src/mongo/db/query/compiler/ce/sampling:sampling_math", + "//src/mongo/db/query/compiler/dependency_analysis:pipeline_dependency_graph", "//src/mongo/db/query/compiler/optimizer/cost_based_ranker:estimates", "//src/mongo/db/query/compiler/optimizer/join:plan_enumerator", "//src/mongo/db/query/compiler/optimizer/join:reorder_joins", diff --git a/src/mongo/db/query/compiler/optimizer/join/agg_join_model.cpp b/src/mongo/db/query/compiler/optimizer/join/agg_join_model.cpp index 80033f53cd9..67a4aa059d5 100644 --- a/src/mongo/db/query/compiler/optimizer/join/agg_join_model.cpp +++ b/src/mongo/db/query/compiler/optimizer/join/agg_join_model.cpp @@ -34,10 +34,10 @@ #include "mongo/db/pipeline/document_source_geo_near.h" #include "mongo/db/pipeline/document_source_internal_join_hint.h" #include "mongo/db/pipeline/document_source_lookup.h" -#include "mongo/db/pipeline/document_source_sort.h" #include "mongo/db/pipeline/expression_context_builder.h" #include "mongo/db/pipeline/pipeline_d.h" #include "mongo/db/pipeline/pipeline_factory.h" +#include "mongo/db/query/compiler/dependency_analysis/pipeline_dependency_graph.h" #include "mongo/db/query/compiler/optimizer/join/path_resolver.h" #include "mongo/db/query/compiler/optimizer/join/predicate_extractor.h" #include "mongo/db/query/util/disjoint_set.h" @@ -222,6 +222,67 @@ Status addExprJoinPredicates(MutableJoinGraph& graph, return Status::OK(); } +/** + * Helper function to determine the arrayness of a field that may have been modified by the pipeline + * while tracking "as" path arrayness. Note: 'expCtx' must be non-const since the arrayness check + * updates state that provides a non-multikey guarantee for any field we check the arrayness of. + * + * TODO SERVER-123929: replace this function once dependency analysis supports tracking arrayness of + * lookup "as" fields. + */ +bool canPipelinePathBeArray(const pipeline::dependency_graph::DependencyGraph& pipelineBaseCollDeps, + ExpressionContext* expCtx, + DocumentSource* ds, + const FieldPath& fp) { + auto path = fp.fullPath(); + auto* declStage = pipelineBaseCollDeps.getDeclaringStage(ds, path).get(); + tassert(11371801, "Expected stage to differ", declStage != ds); + if (auto* originLookup = dynamic_cast(declStage); originLookup) { + // The "as" field produced by a previous $lookup cannot be an array, since any previous + // $lookup must have an $unwind + be eligible for join-optimization (i.e. be part of the + // prefix). + auto asField = originLookup->getAsField(); + if (fp == asField) { + return false; + } + + if (asField.isPrefixOf(fp)) { + // This is a sub-field of the $lookup's "as" field- we need to look at the secondary + // collection to learn about its arrayness. + // TODO SERVER-123953: We will need to actually look at a dependency graph here the + // second we support any subpipeline more complex than a single $match stage. + return expCtx->canPathBeArrayForNss(fp.subtractPrefix(asField.getPathLength()), + originLookup->getFromNs()); + } + + tassert(11371800, + "It should not be possible for a $lookup to modify a field unrelated to its " + "'as' field", + fp.isPrefixOf(asField)); + // We're in a scenario where our "as" field is something like "a.b", vs the join predicate + // field we're looking at is in fact field "a". We should verify the arrayness of field "a" + // at the point when it was last modified. + return canPipelinePathBeArray(pipelineBaseCollDeps, expCtx, declStage, fp); + } + + // If this path doesn't originate from a $lookup, we can just check the base coll deps. + return pipelineBaseCollDeps.canPathBeArray(ds, path); +}; + +/** + * Validates that neither field in the join predicate can include arrays. + * TODO SERVER-123953: Use a dependency graph instead of directly accessing foreign path arrayness. + */ +bool canJoinPredicateIncludeArrays(const pipeline::dependency_graph::DependencyGraph& baseCollDeps, + ExpressionContext* expCtx, + DocumentSource* ds, + const FieldPath& localField, + const NamespaceString& foreignNs, + const FieldPath& foreignField) { + return canPipelinePathBeArray(baseCollDeps, expCtx, ds, localField) || + expCtx->canPathBeArrayForNss(foreignField, foreignNs); +} + } // namespace bool AggJoinModel::pipelineEligibleForJoinReordering(const Pipeline& pipeline) { @@ -257,6 +318,15 @@ StatusWith AggJoinModel::constructJoinModel(const Pipeline& pipeli hint = suffix->popFront(); } + // Initialize deps after popping the $hint stage, but BEFORE we try to push a pipeline prefix + // into our base collection CQ. This is important so we don't miss (for instance) $projects at + // the start of the pipeline that might rename fields. + auto canMainCollPathBeArray = [clonedExpCtx, &nss](StringData path) { + return clonedExpCtx->canPathBeArrayForNss(FieldRef(path), nss); + }; + pipeline::dependency_graph::DependencyGraph mainCollDeps(suffix->getSources(), + canMainCollPathBeArray); + ExpressionContext::PlanCacheOptions oldPlanCache = expCtx->getPlanCache(); expCtx->setPlanCache(ExpressionContext::PlanCacheOptions::kDisablePlanCache); auto swCQ = createCanonicalQuery(expCtx, nss, *suffix); @@ -307,6 +377,18 @@ StatusWith AggJoinModel::constructJoinModel(const Pipeline& pipeli break; } + // Ensure that neither local nor foreign field can include arrays (if present). + if (lookup->hasLocalFieldForeignFieldJoin() && + canJoinPredicateIncludeArrays(mainCollDeps, + clonedExpCtx.get(), + lookup, + *lookup->getLocalField(), + lookup->getFromNs(), + *lookup->getForeignField())) { + // End prefix here, this join predicate might include arrays. + break; + } + // Attempt to extract join predicates and single table predicates from the $lookup // expressed as $expr in $match stage. If there is no subpipeline, this returns no join // predicates and a CanonicalQuery with empty predicate. If this returns a bad status, @@ -315,10 +397,30 @@ StatusWith AggJoinModel::constructJoinModel(const Pipeline& pipeli if (!swPreds.isOK()) { break; } + auto preds = std::move(swPreds.getValue()); - auto foreignNodeId = graph.addNode(lookup->getFromNs(), - std::move(swPreds.getValue().canonicalQuery), - lookup->getAsField()); + // Similar check as above, but now for predicates extracted from the sub-pipeline. + if (std::any_of( + preds.joinPredicates.begin(), preds.joinPredicates.end(), [&](auto&& jp) { + return canJoinPredicateIncludeArrays(mainCollDeps, + clonedExpCtx.get(), + lookup, + jp.localField(), + lookup->getFromNs(), + jp.foreignField()); + })) { + // Some field in a join predicate introduced by a $expr $match in a sub-pipeline + // might have array values. End prefix here. + break; + } + + // If we get here, it means we're ready to modify the join graph to include this + // $lookup. Once the join graph has been modified, any failure case should cause us to + // bail out of join optimization completely, rather than just ending the prefix here + // (since we've already partially incorporated the current join). + + auto foreignNodeId = graph.addNode( + lookup->getFromNs(), std::move(preds.canonicalQuery), lookup->getAsField()); if (!foreignNodeId) { return Status(ErrorCodes::BadValue, "Graph is too big: too many nodes"); @@ -351,7 +453,7 @@ StatusWith AggJoinModel::constructJoinModel(const Pipeline& pipeli // Add join predicates expressed as $expr in subpipelines to join graph. auto status = addExprJoinPredicates( - graph, swPreds.getValue().joinPredicates, pathResolver, *foreignNodeId); + graph, std::move(preds.joinPredicates), pathResolver, *foreignNodeId); if (!status.isOK()) { return status; } diff --git a/src/mongo/db/query/compiler/optimizer/join/agg_join_model_fixture.h b/src/mongo/db/query/compiler/optimizer/join/agg_join_model_fixture.h index 792cc6109c9..e6d4ee16080 100644 --- a/src/mongo/db/query/compiler/optimizer/join/agg_join_model_fixture.h +++ b/src/mongo/db/query/compiler/optimizer/join/agg_join_model_fixture.h @@ -30,7 +30,9 @@ #pragma once #include "mongo/db/pipeline/aggregation_context_fixture.h" +#include "mongo/db/query/compiler/metadata/path_arrayness.h" #include "mongo/db/query/compiler/optimizer/join/agg_join_model.h" +#include "mongo/idl/server_parameter_test_controller.h" #include "mongo/util/modules.h" namespace mongo::join_ordering { @@ -54,7 +56,41 @@ public: std::unique_ptr makePipelineOfSize(size_t numJoins); + /** + * Marks the given fields as non-array (scalar) in the pipeline's ExpressionContext. + * 'mainCollFields' are fields on the main collection; 'secondaryCollFieldMap' maps secondary + * collection names to their fields. + */ + static void markFieldsAsScalar( + Pipeline& pipeline, + const std::vector& mainCollFields, + const StringMap>& secondaryCollFieldMap) { + auto expCtx = pipeline.getContext(); + + auto mainPathArrayness = std::make_shared(); + for (const auto& field : mainCollFields) { + mainPathArrayness->addPath( + FieldPath(field), MultikeyComponents{}, /*isFullRebuild=*/true); + } + expCtx->setPathArraynessForNss(expCtx->getNamespaceString(), std::move(mainPathArrayness)); + + for (const auto& [collName, fields] : secondaryCollFieldMap) { + auto pathArrayness = std::make_shared(); + for (const auto& field : fields) { + pathArrayness->addPath( + FieldPath(field), MultikeyComponents{}, /*isFullRebuild=*/true); + } + expCtx->setPathArraynessForNss( + NamespaceString::createNamespaceString_forTest("test", collName), + std::move(pathArrayness)); + } + } + const AggModelBuildParams defaultBuildParams{.maxNumberNodesConsideredForImplicitEdges = kMaxNumberNodesConsideredForImplicitEdges}; + +private: + // Ensure path arrayness is enabled for all tests. + RAIIServerParameterControllerForTest queryKnobController{"featureFlagPathArrayness", true}; }; } // namespace mongo::join_ordering diff --git a/src/mongo/db/query/compiler/optimizer/join/agg_join_model_golden_test.cpp b/src/mongo/db/query/compiler/optimizer/join/agg_join_model_golden_test.cpp index 6ea5dabceb4..e5d4f8871b3 100644 --- a/src/mongo/db/query/compiler/optimizer/join/agg_join_model_golden_test.cpp +++ b/src/mongo/db/query/compiler/optimizer/join/agg_join_model_golden_test.cpp @@ -84,12 +84,14 @@ TEST_F(AggJoinModelGoldenTest, longPrefix) { {$unwind: "$fromB"} ])"; auto pipeline = makePipeline(query, {"A", "B"}); + markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}, {"B", {"b"_sd}}}); auto joinModel = runVariation(std::move(pipeline), "longPrefix"); ASSERT_OK(joinModel); } TEST_F(AggJoinModelGoldenTest, veryLargePipeline) { auto pipeline = makePipelineOfSize(/*numJoins*/ kHardMaxNodesInJoin + 3); + markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}}); auto joinModel = runVariation(std::move(pipeline), "veryLargePipeline"); ASSERT_OK(joinModel); } @@ -106,6 +108,7 @@ TEST_F(AggJoinModelGoldenTest, addImplicitEdges_OneImplictEdge) { {$unwind: "$fromB"} ])"; auto pipeline = makePipeline(query, {"A", "B"}); + markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}, {"B", {"b"_sd}}}); auto joinModel = runVariation(std::move(pipeline), "addImplicitEdges_OneImplictEdge"); ASSERT_OK(joinModel); ASSERT_EQ(joinModel.getValue().graph.numNodes(), 3); @@ -126,6 +129,7 @@ TEST_F(AggJoinModelGoldenTest, addImplicitEdges_MultipleImplictEdges) { {$unwind: "$fromC"} ])"; auto pipeline = makePipeline(query, {"A", "B", "C"}); + markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"a"_sd}}, {"B", {"b"_sd}}, {"C", {"c"_sd}}}); auto joinModel = runVariation(std::move(pipeline), "addImplicitEdges_MultipleImplictEdges"); ASSERT_OK(joinModel); ASSERT_EQ(joinModel.getValue().graph.numNodes(), 4); @@ -154,6 +158,13 @@ TEST_F(AggJoinModelGoldenTest, addImplicitEdges_TwoConnectedComponents) { {$unwind: "$fromE"} ])"; auto pipeline = makePipeline(query, {"A", "B", "C", "D", "E"}); + markFieldsAsScalar(*pipeline, + {"a"_sd}, + {{"A", {"a"_sd}}, + {"B", {"b"_sd}}, + {"C", {"c"_sd, "d"_sd}}, + {"D", {"d"_sd}}, + {"E", {"e"_sd}}}); auto joinModel = runVariation(std::move(pipeline), "addImplicitEdges_TwoConnectedComponents"); ASSERT_OK(joinModel); ASSERT_EQ(joinModel.getValue().graph.numNodes(), 6); @@ -179,6 +190,13 @@ TEST_F(AggJoinModelGoldenTest, addImplicitEdges_NoImplicitEdges) { {$unwind: "$fromE"} ])"; auto pipeline = makePipeline(query, {"A", "B", "C", "D", "E"}); + markFieldsAsScalar(*pipeline, + {"a"_sd}, + {{"A", {"a"_sd, "b"_sd}}, + {"B", {"b"_sd, "c"_sd}}, + {"C", {"c"_sd, "d"_sd}}, + {"D", {"d"_sd, "e"_sd}}, + {"E", {"e"_sd}}}); auto joinModel = runVariation(std::move(pipeline), "addImplicitEdges_NoImplicitEdges"); ASSERT_OK(joinModel); ASSERT_EQ(joinModel.getValue().graph.numNodes(), 6); @@ -209,6 +227,9 @@ TEST_F(AggJoinModelGoldenTest, addEdgesFromExpr_predicatesAtEnd) { } ])"; auto pipeline = makePipeline(query, {"A", "B", "C", "D"}); + markFieldsAsScalar(*pipeline, + {"s1"_sd, "s2"_sd, "s3"_sd, "s4"_sd}, + {{"A", {"s1"_sd}}, {"B", {"s2"_sd}}, {"C", {"s3"_sd}}, {"D", {"s4"_sd}}}); auto joinModel = runVariation(std::move(pipeline), "addEdgesFromExpr_predicatesAtEnd"); ASSERT_OK(joinModel); ASSERT_EQ(joinModel.getValue().graph.numNodes(), 5); @@ -236,6 +257,9 @@ TEST_F(AggJoinModelGoldenTest, addEdgesFromExpr_predicatesInBetween) { {$match: {$expr: {$eq: ["$fromD.d", "$fromA.d"]}}} ])"; auto pipeline = makePipeline(query, {"A", "B", "C", "D"}); + markFieldsAsScalar(*pipeline, + {"s1"_sd, "s2"_sd, "s3"_sd, "s4"_sd}, + {{"A", {"s1"_sd}}, {"B", {"s2"_sd}}, {"C", {"s3"_sd}}, {"D", {"s4"_sd}}}); auto joinModel = runVariation(std::move(pipeline), "addEdgesFromExpr_predicatesInBetween"); ASSERT_OK(joinModel); ASSERT_EQ(joinModel.getValue().graph.numNodes(), 5); @@ -265,6 +289,9 @@ TEST_F(AggJoinModelGoldenTest, addEdgesFromExpr_earlyEnd) { {$unwind: "$fromD"} ])"; auto pipeline = makePipeline(query, {"A", "B", "C", "D"}); + markFieldsAsScalar(*pipeline, + {"s1"_sd, "s2"_sd, "s3"_sd, "s4"_sd}, + {{"A", {"s1"_sd}}, {"B", {"s2"_sd}}, {"C", {"s3"_sd}}, {"D", {"s4"_sd}}}); auto joinModel = runVariation(std::move(pipeline), "addEdgesFromExpr_earlyEnd"); ASSERT_OK(joinModel); ASSERT_EQ(joinModel.getValue().graph.numNodes(), 3); @@ -294,6 +321,12 @@ TEST_F(AggJoinModelGoldenTest, addEdgesFromExpr_addImplicitEdge) { {$match: {$expr: {$eq: ["$fromB.b", "$fromC.c"]}}} ])"; auto pipeline = makePipeline(query, {"A", "B", "C", "D"}); + markFieldsAsScalar(*pipeline, + {"a"_sd}, + {{"A", {"a"_sd, "b"_sd}}, + {"B", {"b"_sd, "s"_sd}}, + {"C", {"s"_sd, "c"_sd}}, + {"D", {"d"_sd}}}); auto joinModel = runVariation(std::move(pipeline), "addEdgesFromExpr_addImplicitEdge"); ASSERT_OK(joinModel); ASSERT_EQ(joinModel.getValue().graph.numNodes(), 5); @@ -336,6 +369,12 @@ TEST_F(AggJoinModelGoldenTest, subPipelineEdge_addImplicitEdge) { {$unwind: "$fromD"} ])"; auto pipeline = makePipeline(query, {"A", "B", "C", "D"}); + markFieldsAsScalar(*pipeline, + {"a"_sd}, + {{"A", {"a"_sd, "b"_sd}}, + {"B", {"b"_sd, "s"_sd}}, + {"C", {"s"_sd, "c"_sd}}, + {"D", {"d"_sd, "a"_sd}}}); auto joinModel = runVariation(std::move(pipeline), "subPipelineEdge_addImplicitEdge"); ASSERT_OK(joinModel); ASSERT_EQ(joinModel.getValue().graph.numNodes(), 5); @@ -369,6 +408,10 @@ TEST_F(AggJoinModelGoldenTest, addEdgesFromExpr_subPipelineEdge_addImplicitEdge) {$match: {$expr: {$eq: ["$fromA.a", "$fromB.a"]}}} ])"; auto pipeline = makePipeline(query, {"A", "B", "C"}); + markFieldsAsScalar( + *pipeline, + {"a"_sd}, + {{"A", {"a"_sd, "b"_sd}}, {"B", {"b"_sd, "c"_sd, "a"_sd}}, {"C", {"c"_sd, "a"_sd}}}); auto joinModel = runVariation(std::move(pipeline), "addEdgesFromExpr_subPipelineEdge_addImplicitEdge"); ASSERT_OK(joinModel); diff --git a/src/mongo/db/query/compiler/optimizer/join/agg_join_model_test.cpp b/src/mongo/db/query/compiler/optimizer/join/agg_join_model_test.cpp index 9be141e1cc0..10f2b90e93c 100644 --- a/src/mongo/db/query/compiler/optimizer/join/agg_join_model_test.cpp +++ b/src/mongo/db/query/compiler/optimizer/join/agg_join_model_test.cpp @@ -65,6 +65,7 @@ TEST_F(PipelineAnalyzerTest, PipelinePrefixEligibleForJoinReorderingNoLocalForei ])"; auto pipeline = makePipeline(query, {"A", "B"}); + markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}}); // This pipeline's prefix is eligible for reordering. ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline)); @@ -82,6 +83,7 @@ TEST_F(PipelineAnalyzerTest, PipelineEligibleForJoinReorderingSingleLookupUnwind ])"; auto pipeline = makePipeline(query, {"A"}); + markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}}); // This pipeline is eligible for reordering. ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline)); @@ -123,6 +125,7 @@ TEST_F(PipelineAnalyzerTest, TwoLookupUnwinds) { ])"; auto pipeline = makePipeline(query, {"A", "B"}); + markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}, {"B", {"b"_sd}}}); ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline)); @@ -142,6 +145,7 @@ TEST_F(PipelineAnalyzerTest, MatchOnMainCollection) { ])"; auto pipeline = makePipeline(query, {"A", "B"}); + markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}, {"B", {"b"_sd}}}); ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline)); @@ -163,6 +167,7 @@ TEST_F(PipelineAnalyzerTest, MatchInSubPipeline) { ])"; auto pipeline = makePipeline(query, {"A", "B"}); + markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}, {"B", {"b"_sd}}}); ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline)); @@ -187,6 +192,7 @@ TEST_F(PipelineAnalyzerTest, GroupOnMainCollection) { ])"; auto pipeline = makePipeline(query, {"A", "B"}); + markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}, {"B", {"b"_sd}}}); // We don't detect ineligibility here. ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline)); @@ -207,6 +213,7 @@ TEST_F(PipelineAnalyzerTest, ConflictingLocalFields) { ])"; auto pipeline = makePipeline(query, {"B", "C"}); + markFieldsAsScalar(*pipeline, {"x"_sd, "a"_sd}, {{"B", {"y"_sd}}, {"C", {"z"_sd}}}); // We don't detect ineligibility of local path fields here. ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline)); auto swJoinModel = AggJoinModel::constructJoinModel(*pipeline, defaultBuildParams); @@ -241,6 +248,8 @@ TEST_F(PipelineAnalyzerTest, ConflictingLocalFieldExprSyntax) { ])"; auto pipeline = makePipeline(query, {"B", "A"}); + markFieldsAsScalar( + *pipeline, {"x"_sd, "foo"_sd, "bar"_sd}, {{"B", {"y"_sd}}, {"A", {"foo"_sd, "bar"_sd}}}); // We don't detect ineligibility of local path fields here. ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline)); auto swJoinModel = AggJoinModel::constructJoinModel(*pipeline, defaultBuildParams); @@ -256,6 +265,7 @@ TEST_F(PipelineAnalyzerTest, CompatibleAsFields) { {$unwind: "$x.z"} ])"; auto pipeline = makePipeline(query, {"B", "C"}); + markFieldsAsScalar(*pipeline, {"x.c"_sd}, {{"B", {"c"_sd, "d"_sd}}, {"C", {"d"_sd}}}); ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline)); auto swJoinModel = AggJoinModel::constructJoinModel(*pipeline, defaultBuildParams); @@ -273,6 +283,7 @@ TEST_F(PipelineAnalyzerTest, GroupInMiddleIneligible) { ])"; auto pipeline = makePipeline(query, {"A", "B"}); + markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}}); // We don't detect ineligibility here. ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline)); @@ -295,6 +306,7 @@ TEST_F(PipelineAnalyzerTest, GroupInSubPipeline) { ])"; auto pipeline = makePipeline(query, {"A", "B"}); + markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}, {"B", {"b"_sd}}}); ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline)); @@ -367,6 +379,7 @@ TEST_F(PipelineAnalyzerTest, IneligibleSubPipelineStage) { ])"; auto pipeline = makePipeline(query, {"A", "B"}); + markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}, {"B", {"b"_sd}}}); ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline)); @@ -439,6 +452,7 @@ TEST_F(PipelineAnalyzerTest, LongPrefix) { ])"; auto pipeline = makePipeline(query, {"A", "B"}); + markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}, {"B", {"b"_sd}}}); ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline)); @@ -460,6 +474,7 @@ TEST_F(PipelineAnalyzerTest, PipelineInEligibleForSortStage) { ])"; auto pipeline = makePipeline(sortPrefixQuery, {"A", "B"}); + markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}, {"B", {"b"_sd}}}); // This is not where we examine the pipeline for a $sort stage. ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline)); auto status = AggJoinModel::constructJoinModel(*pipeline, defaultBuildParams).getStatus(); @@ -492,6 +507,7 @@ TEST_F(PipelineAnalyzerTest, LocalFieldOverride) { ])"; auto pipeline = makePipeline(query, {"A", "B"}); + markFieldsAsScalar(*pipeline, {"a"_sd, "b"_sd}, {{"A", {"b"_sd}}, {"B", {"b"_sd}}}); ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline)); @@ -504,6 +520,7 @@ TEST_F(PipelineAnalyzerTest, LocalFieldOverride) { TEST_F(PipelineAnalyzerTest, tooManyNodes) { static constexpr size_t numJoins = 5; auto pipeline = makePipelineOfSize(numJoins); + markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}}); // Configure the buildParams that one $lookup/$unwind pair is forced to the suffix because the // maximum number of nodes is hit. AggModelBuildParams buildParams{ @@ -519,6 +536,7 @@ TEST_F(PipelineAnalyzerTest, tooManyNodes) { TEST_F(PipelineAnalyzerTest, tooManyEdges) { static constexpr size_t numJoins = 5; auto pipeline = makePipelineOfSize(numJoins); + markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}}); // Configure the buildParams that one $lookup/$unwind pair is forced to the suffix because the // maximum number of edges is hit. AggModelBuildParams buildParams{ @@ -557,6 +575,7 @@ TEST_F(PipelineAnalyzerTest, SingleJoinCompoundPredicate) { ])"; auto pipeline = makePipeline(query, {"A"}); + markFieldsAsScalar(*pipeline, {"foo"_sd, "bar"_sd}, {{"A", {"foo"_sd, "bar"_sd}}}); ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline)); @@ -617,6 +636,9 @@ TEST_F(PipelineAnalyzerTest, CompoundJoinKeyWithLocalForeignSyntax) { ])"; auto pipeline = makePipeline(query, {"A", "B"}); + markFieldsAsScalar(*pipeline, + {"foo"_sd, "bar"_sd}, + {{"A", {"foo"_sd, "bar"_sd}}, {"B", {"foo"_sd, "bar"_sd}}}); ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline)); @@ -676,6 +698,8 @@ TEST_F(PipelineAnalyzerTest, DuplicateExprEqAndEqEdges) { ])"; auto pipeline = makePipeline(query, {"A", "B", "C"}); + markFieldsAsScalar( + *pipeline, {"bar"_sd}, {{"A", {"bar"_sd}}, {"B", {"bar"_sd}}, {"C", {"bar"_sd}}}); ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline)); @@ -721,6 +745,7 @@ TEST_F(PipelineAnalyzerTest, ExprOnlyImplicitEdges) { ])"; auto pipeline = makePipeline(query, {"A", "B"}); + markFieldsAsScalar(*pipeline, {"bar"_sd}, {{"A", {"bar"_sd}}, {"B", {"bar"_sd}}}); ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline)); @@ -752,6 +777,7 @@ TEST_F(PipelineAnalyzerTest, PipelineIneligibleWithCorrelatedNonJoinPredicate) { ])"; auto pipeline = makePipeline(query, {"A", "B"}); + markFieldsAsScalar(*pipeline, {"foo"_sd}, {{"A", {"foo"_sd}}}); ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline)); @@ -781,6 +807,7 @@ TEST_F(PipelineAnalyzerTest, PipelineIneligibleWithNonFieldPathVariable) { ])"; auto pipeline = makePipeline(query, {"A", "B"}); + markFieldsAsScalar(*pipeline, {"foo"_sd}, {{"A", {"foo"_sd}}}); ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));