SERVER-113718 Fallback on join opt for non-scalar predicates (#52564)

GitOrigin-RevId: b937852641032a8cb8ce0a93f5ac7a622e5478e8
This commit is contained in:
Alya Carina Berciu 2026-04-24 15:40:47 +02:00 committed by MongoDB Bot
parent 5027083cb6
commit a6ff860685
13 changed files with 1227 additions and 22 deletions

View File

@ -43,10 +43,6 @@ selector:
- jstests/core/timeseries/**/*.js
# TODO SERVER-114739 Join optimization: collation disregarded when joining
- jstests/sharding/query/collation/collation_lookup.js
# TODO SERVER-113718 Do not use join optimizer path if join predicates/fields operate over arrays
- jstests/core/query/release_memory/hash_lookup_unwind.js
- jstests/aggregation/sources/match/sbe_non_leading_match_pbt.js
- jstests/aggregation/sources/lookup/lookup_unwind_equijoin.js
# TODO SERVER-118416 Fix join reordering tripwire assertions for lookup+unwind pipelines and re-enable lookup_unwind_pbt PBT
- jstests/aggregation/sources/lookup/lookup_unwind_pbt.js
# Shard level user writes block is not supported on standalone.

View File

@ -8645,8 +8645,12 @@ export const authCommandsLib = {
testcases: testcases_transformationOnly,
skipTest: (conn) => {
// Can't run on mongos. Also, $_internalJoinHint requires join optimization which
// is unavailable when the classic engine is forced.
return !isStandalone(conn) || isForceClassicEngine(conn);
// is unavailable when the classic engine is forced, or the path arrayness feature is disabled.
return (
!isStandalone(conn) ||
isForceClassicEngine(conn) ||
!isFeatureEnabled(conn, "featureFlagPathArrayness")
);
},
setup: function (db) {
// Only works with join optimization enabled.
@ -8656,6 +8660,8 @@ export const authCommandsLib = {
internalEnableJoinOptimization: true,
}),
);
// Need an index for multikeyness info.
assert.commandWorked(db.foo.createIndex({dummy: -1, i: 1}));
// Add a document to collection "foo".
assert.commandWorked(db.foo.insertOne({_id: 0, i: 0}));
},
@ -8666,7 +8672,8 @@ export const authCommandsLib = {
internalEnableJoinOptimization: false,
}),
);
// Clean up doc.
// Clean up doc & index.
assert.commandWorked(db.foo.dropIndex({dummy: -1, i: 1}));
assert.commandWorked(db.foo.deleteOne({_id: 0, i: 0}));
},
},

View File

@ -0,0 +1,978 @@
/**
* End to end test for join optimization being enabled iff no join predicate fields may contain arrays.
*
* @tags: [
* requires_fcv_90,
* requires_sbe
* ]
*/
import {runTestWithUnorderedComparison, joinTestWrapper} from "jstests/libs/query/join_utils.js";
// Must enable path arrayness tracking for this test.
const conn = MongoRunner.runMongod({setParameter: "featureFlagPathArrayness=true"});
const db = conn.getDB(`${jsTestName()}_db`);
joinTestWrapper(db, function runArraynessTest() {
assert.commandWorked(
db.adminCommand({setParameter: 1, internalEnableJoinOptimization: true, internalEnablePathArrayness: true}),
);
const c1 = db.c1;
const c2 = db.c2;
const c3 = db.c3;
c1.drop();
c2.drop();
c3.drop();
assert.commandWorked(
c1.insertMany([
{
_id: 0,
alwaysArray: [],
sometimesArray: 3,
neverArray: 1,
obj: {array: [1, 2, 3], scalar: 1},
},
{
_id: 1,
alwaysArray: [1, 2, 3],
sometimesArray: 2,
neverArray: 1,
obj: {},
},
{
_id: 2,
alwaysArray: [2, 3],
sometimesArray: [3, 4],
neverArray: 1,
obj: {array: [], scalar: 2},
},
]),
);
assert.commandWorked(
c2.insertMany([
{_id: 0, a: 1},
{_id: 1, a: 2},
{_id: 2, a: 3},
]),
);
assert.commandWorked(
c3.insertMany([
{_id: 0, a: 1, obj: {array: [1, 2, 3], scalar: 1}},
{_id: 1, a: 2, obj: {array: [], scalar: 2}},
{_id: 2, a: 3, obj: {}},
]),
);
runTestWithUnorderedComparison({
db,
description: "No arrayness (no indexes) => no joinopt (2 node, no suffix)",
coll: c1,
pipeline: [
{
$lookup: {
from: c2.getName(),
localField: "neverArray",
foreignField: "a",
as: "x",
},
},
{$unwind: "$x"},
],
expectedResults: [
{
"_id": 0,
"alwaysArray": [],
"sometimesArray": 3,
"neverArray": 1,
"x": {"_id": 0, "a": 1},
obj: {array: [1, 2, 3], scalar: 1},
},
{
"_id": 1,
"alwaysArray": [1, 2, 3],
"sometimesArray": 2,
"neverArray": 1,
"x": {"_id": 0, "a": 1},
obj: {},
},
{
"_id": 2,
"alwaysArray": [2, 3],
"sometimesArray": [3, 4],
"neverArray": 1,
"x": {"_id": 0, "a": 1},
obj: {array: [], scalar: 2},
},
],
expectedUsedJoinOptimization: false,
});
runTestWithUnorderedComparison({
db,
description: "No arrayness => no joinopt ($expr, 2 node, no suffix)",
coll: c1,
pipeline: [
{
$lookup: {
from: c2.getName(),
as: "x",
let: {neverArray: "$neverArray"},
pipeline: [{$match: {$expr: {$eq: ["$a", "$$neverArray"]}}}],
},
},
{$unwind: "$x"},
],
expectedResults: [
{
"_id": 0,
"alwaysArray": [],
"sometimesArray": 3,
"neverArray": 1,
"obj": {
"array": [1, 2, 3],
"scalar": 1,
},
"x": {
"_id": 0,
"a": 1,
},
},
{
"_id": 1,
"alwaysArray": [1, 2, 3],
"sometimesArray": 2,
"neverArray": 1,
"obj": {},
"x": {
"_id": 0,
"a": 1,
},
},
{
"_id": 2,
"alwaysArray": [2, 3],
"sometimesArray": [3, 4],
"neverArray": 1,
"obj": {
"array": [],
"scalar": 2,
},
"x": {
"_id": 0,
"a": 1,
},
},
],
expectedUsedJoinOptimization: false,
});
runTestWithUnorderedComparison({
db,
description: "No arrayness => no joinopt (2 node, suffix)",
coll: c1,
pipeline: [
{
$lookup: {
from: c2.getName(),
localField: "neverArray",
foreignField: "a",
as: "x",
},
},
{$unwind: "$x"},
{$project: {_id: 0, obj: 0}},
],
expectedResults: [
{"alwaysArray": [], "sometimesArray": 3, "neverArray": 1, "x": {"_id": 0, "a": 1}},
{"alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1, "x": {"_id": 0, "a": 1}},
{"alwaysArray": [2, 3], "sometimesArray": [3, 4], "neverArray": 1, "x": {"_id": 0, "a": 1}},
],
expectedUsedJoinOptimization: false,
});
runTestWithUnorderedComparison({
db,
description: "No arrayness => no joinopt ($expr, 2 node, suffix)",
coll: c1,
pipeline: [
{
$lookup: {
from: c2.getName(),
as: "x",
let: {neverArray: "$neverArray"},
pipeline: [{$match: {$expr: {$eq: ["$$neverArray", "$a"]}}}],
},
},
{$unwind: "$x"},
{$project: {_id: 0, obj: 0}},
],
expectedResults: [
{"alwaysArray": [], "sometimesArray": 3, "neverArray": 1, "x": {"_id": 0, "a": 1}},
{"alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1, "x": {"_id": 0, "a": 1}},
{"alwaysArray": [2, 3], "sometimesArray": [3, 4], "neverArray": 1, "x": {"_id": 0, "a": 1}},
],
expectedUsedJoinOptimization: false,
});
assert.commandWorked(c1.createIndex({neverArray: 1}));
runTestWithUnorderedComparison({
db,
description: "No arrayness on foreign field => no joinopt (2 node, suffix)",
coll: c1,
pipeline: [
{
$lookup: {
from: c2.getName(),
localField: "neverArray",
foreignField: "a",
as: "x",
},
},
{$unwind: "$x"},
{$project: {_id: 0, obj: 0}},
],
expectedResults: [
{"alwaysArray": [], "sometimesArray": 3, "neverArray": 1, "x": {"_id": 0, "a": 1}},
{"alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1, "x": {"_id": 0, "a": 1}},
{"alwaysArray": [2, 3], "sometimesArray": [3, 4], "neverArray": 1, "x": {"_id": 0, "a": 1}},
],
expectedUsedJoinOptimization: false,
});
runTestWithUnorderedComparison({
db,
description: "No arrayness on foreign field => no joinopt ($expr, 2 node, suffix)",
coll: c1,
pipeline: [
{
$lookup: {
from: c2.getName(),
as: "x",
let: {neverArray: "$neverArray"},
pipeline: [{$match: {$expr: {$eq: ["$$neverArray", "$a"]}}}],
},
},
{$unwind: "$x"},
{$project: {_id: 0, obj: 0}},
],
expectedResults: [
{"alwaysArray": [], "sometimesArray": 3, "neverArray": 1, "x": {"_id": 0, "a": 1}},
{"alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1, "x": {"_id": 0, "a": 1}},
{"alwaysArray": [2, 3], "sometimesArray": [3, 4], "neverArray": 1, "x": {"_id": 0, "a": 1}},
],
expectedUsedJoinOptimization: false,
});
assert.commandWorked(c2.createIndex({a: 1}));
runTestWithUnorderedComparison({
db,
description: "No arrayness on local field => no joinopt (2 node, no suffix)",
coll: c1,
pipeline: [
{
$lookup: {
from: c2.getName(),
localField: "sometimesArray",
foreignField: "a",
as: "x",
},
},
{$unwind: "$x"},
],
expectedResults: [
{
"_id": 0,
"alwaysArray": [],
"sometimesArray": 3,
"neverArray": 1,
"obj": {
"array": [1, 2, 3],
"scalar": 1,
},
"x": {
"_id": 2,
"a": 3,
},
},
{
"_id": 1,
"alwaysArray": [1, 2, 3],
"sometimesArray": 2,
"neverArray": 1,
"obj": {},
"x": {
"_id": 1,
"a": 2,
},
},
{
"_id": 2,
"alwaysArray": [2, 3],
"sometimesArray": [3, 4],
"neverArray": 1,
"obj": {
"array": [],
"scalar": 2,
},
"x": {
"_id": 2,
"a": 3,
},
},
],
expectedUsedJoinOptimization: false,
});
runTestWithUnorderedComparison({
db,
description: "No arrayness on local field => no joinopt ($expr, 2 node, no suffix)",
coll: c1,
pipeline: [
{
$lookup: {
from: c2.getName(),
as: "x",
let: {sometimesArray: "$sometimesArray"},
pipeline: [{$match: {$expr: {$eq: ["$$sometimesArray", "$a"]}}}],
},
},
{$unwind: "$x"},
],
expectedResults: [
{
"_id": 0,
"alwaysArray": [],
"sometimesArray": 3,
"neverArray": 1,
"obj": {
"array": [1, 2, 3],
"scalar": 1,
},
"x": {
"_id": 2,
"a": 3,
},
},
{
"_id": 1,
"alwaysArray": [1, 2, 3],
"sometimesArray": 2,
"neverArray": 1,
"obj": {},
"x": {
"_id": 1,
"a": 2,
},
},
],
expectedUsedJoinOptimization: false,
});
runTestWithUnorderedComparison({
db,
description: "Arrayness on all fields => join opt (2 node, suffix)",
coll: c1,
pipeline: [
{
$lookup: {
from: c2.getName(),
localField: "neverArray",
foreignField: "a",
as: "x",
},
},
{$unwind: "$x"},
{$project: {_id: 0, obj: 0}},
],
expectedResults: [
{"alwaysArray": [], "sometimesArray": 3, "neverArray": 1, "x": {"_id": 0, "a": 1}},
{"alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1, "x": {"_id": 0, "a": 1}},
{"alwaysArray": [2, 3], "sometimesArray": [3, 4], "neverArray": 1, "x": {"_id": 0, "a": 1}},
],
expectedUsedJoinOptimization: true,
expectedNumJoinStages: 1,
});
runTestWithUnorderedComparison({
db,
description: "Arrayness on all fields => join opt ($expr, 2 node, suffix)",
coll: c1,
pipeline: [
{
$lookup: {
from: c2.getName(),
as: "x",
let: {na: "$neverArray"},
pipeline: [{$match: {$expr: {$eq: ["$$na", "$a"]}}}],
},
},
{$unwind: "$x"},
{$project: {_id: 0, obj: 0}},
],
expectedResults: [
{"alwaysArray": [], "sometimesArray": 3, "neverArray": 1, "x": {"_id": 0, "a": 1}},
{"alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1, "x": {"_id": 0, "a": 1}},
{"alwaysArray": [2, 3], "sometimesArray": [3, 4], "neverArray": 1, "x": {"_id": 0, "a": 1}},
],
expectedUsedJoinOptimization: true,
expectedNumJoinStages: 1,
});
runTestWithUnorderedComparison({
db,
description: "Arrayness on all fields => join opt (2 node, suffix)",
coll: c1,
pipeline: [
{
$lookup: {
from: c2.getName(),
localField: "neverArray",
foreignField: "a",
as: "sometimesArray", // Arrayness of "as" field doesn't matter.
},
},
{$unwind: "$sometimesArray"},
{$project: {_id: 0, obj: 0}},
],
expectedResults: [
{"alwaysArray": [], "sometimesArray": {"_id": 0, "a": 1}, "neverArray": 1},
{"alwaysArray": [1, 2, 3], "sometimesArray": {"_id": 0, "a": 1}, "neverArray": 1},
{"alwaysArray": [2, 3], "sometimesArray": {"_id": 0, "a": 1}, "neverArray": 1},
],
expectedUsedJoinOptimization: true,
expectedNumJoinStages: 1,
});
runTestWithUnorderedComparison({
db,
description: "Arrayness on all fields => join opt ($expr, 2 node, suffix)",
coll: c1,
pipeline: [
{
$lookup: {
from: c2.getName(),
as: "sometimesArray", // Arrayness of "as" field doesn't matter.
let: {na: "$neverArray"},
pipeline: [{$match: {$expr: {$eq: ["$$na", "$a"]}}}],
},
},
{$unwind: "$sometimesArray"},
{$project: {_id: 0, obj: 0}},
],
expectedResults: [
{"alwaysArray": [], "sometimesArray": {"_id": 0, "a": 1}, "neverArray": 1},
{"alwaysArray": [1, 2, 3], "sometimesArray": {"_id": 0, "a": 1}, "neverArray": 1},
{"alwaysArray": [2, 3], "sometimesArray": {"_id": 0, "a": 1}, "neverArray": 1},
],
expectedUsedJoinOptimization: true,
expectedNumJoinStages: 1,
});
assert.commandWorked(c1.createIndexes([{sometimesArray: -1}, {alwaysArray: 1}]));
runTestWithUnorderedComparison({
db,
description: "Arrayness on all fields, multikey localField => no join opt (2 node, suffix)",
coll: c1,
pipeline: [
{
$lookup: {
from: c2.getName(),
localField: "sometimesArray",
foreignField: "a",
as: "y",
},
},
{$unwind: "$y"},
{$project: {_id: 0, obj: 0}},
],
expectedResults: [
{"alwaysArray": [], "sometimesArray": 3, "neverArray": 1, "y": {"_id": 2, "a": 3}},
{"alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1, "y": {"_id": 1, "a": 2}},
{"alwaysArray": [2, 3], "sometimesArray": [3, 4], "neverArray": 1, "y": {"_id": 2, "a": 3}},
],
expectedUsedJoinOptimization: false,
});
runTestWithUnorderedComparison({
db,
description: "Arrayness on all fields, multikey localField => no join opt ($expr, 2 node, suffix)",
coll: c1,
pipeline: [
{
$lookup: {
from: c2.getName(),
as: "y",
let: {sa: "$sometimesArray"},
pipeline: [{$match: {$expr: {$eq: ["$a", "$$sa"]}}}],
},
},
{$unwind: "$y"},
{$project: {_id: 0, obj: 0}},
],
expectedResults: [
{"alwaysArray": [], "sometimesArray": 3, "neverArray": 1, "y": {"_id": 2, "a": 3}},
{"alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1, "y": {"_id": 1, "a": 2}},
],
expectedUsedJoinOptimization: false,
});
runTestWithUnorderedComparison({
db,
description: "Arrayness on all fields, multikey foreignField => no join opt (2 node, suffix)",
coll: c2,
pipeline: [
{
$lookup: {
from: c1.getName(),
localField: "a",
foreignField: "alwaysArray",
as: "y",
},
},
{$unwind: "$y"},
{$project: {_id: 0, "y.obj": 0}},
],
expectedResults: [
{"a": 1, "y": {"_id": 1, "alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1}},
{"a": 2, "y": {"_id": 1, "alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1}},
{"a": 2, "y": {"_id": 2, "alwaysArray": [2, 3], "sometimesArray": [3, 4], "neverArray": 1}},
{"a": 3, "y": {"_id": 1, "alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1}},
{"a": 3, "y": {"_id": 2, "alwaysArray": [2, 3], "sometimesArray": [3, 4], "neverArray": 1}},
],
expectedUsedJoinOptimization: false,
});
runTestWithUnorderedComparison({
db,
description: "Arrayness on all fields, multikey foreignField => no join opt ($expr, 2 node, suffix)",
coll: c2,
pipeline: [
{
$lookup: {
from: c1.getName(),
as: "y",
let: {aaa: "$a"},
pipeline: [{$match: {$expr: {$eq: ["$alwaysArray", "$$aaa"]}}}],
},
},
{$unwind: "$y"},
{$project: {_id: 0, obj: 0}},
],
expectedResults: [],
expectedUsedJoinOptimization: false,
});
runTestWithUnorderedComparison({
db,
description: "Arrayness on all fields, multikey foreignField/localField => no join opt (2 node, suffix)",
coll: c1,
pipeline: [
{
$lookup: {
from: c1.getName(),
localField: "sometimesArray",
foreignField: "alwaysArray",
as: "y",
},
},
{$unwind: "$y"},
{$project: {_id: 0, obj: 0, "y.obj": 0}},
],
expectedResults: [
{
"alwaysArray": [],
"sometimesArray": 3,
"neverArray": 1,
"y": {"_id": 1, "alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1},
},
{
"alwaysArray": [],
"sometimesArray": 3,
"neverArray": 1,
"y": {"_id": 2, "alwaysArray": [2, 3], "sometimesArray": [3, 4], "neverArray": 1},
},
{
"alwaysArray": [1, 2, 3],
"sometimesArray": 2,
"neverArray": 1,
"y": {"_id": 1, "alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1},
},
{
"alwaysArray": [1, 2, 3],
"sometimesArray": 2,
"neverArray": 1,
"y": {"_id": 2, "alwaysArray": [2, 3], "sometimesArray": [3, 4], "neverArray": 1},
},
{
"alwaysArray": [2, 3],
"sometimesArray": [3, 4],
"neverArray": 1,
"y": {"_id": 1, "alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1},
},
{
"alwaysArray": [2, 3],
"sometimesArray": [3, 4],
"neverArray": 1,
"y": {"_id": 2, "alwaysArray": [2, 3], "sometimesArray": [3, 4], "neverArray": 1},
},
],
expectedUsedJoinOptimization: false,
});
runTestWithUnorderedComparison({
db,
description: "Arrayness on all fields, multikey foreignField/localField => no join opt ($expr, 2 node, suffix)",
coll: c1,
pipeline: [
{
$lookup: {
from: c1.getName(),
as: "y",
let: {sa: "$sometimesArray"},
pipeline: [{$match: {$expr: {$eq: ["$alwaysArray", "$$sa"]}}}],
},
},
{$unwind: "$y"},
],
expectedResults: [],
expectedUsedJoinOptimization: false,
});
// Ensure we have arrayness info for c3 & obj field in c1.
assert.commandWorked(c3.createIndex({a: -1, obj: 1}));
assert.commandWorked(c1.createIndex({obj: 1}));
runTestWithUnorderedComparison({
db,
description: "As field has an array subfield, used in subsequent join => no join opt in suffix",
coll: c2,
pipeline: [
// This is ok, should use join opt.
{
$lookup: {
from: c1.getName(),
localField: "a",
foreignField: "neverArray",
as: "y",
},
},
{$unwind: "$y"},
// Prefix should end here: next predicate involves an array.
{
$lookup: {
from: c3.getName(),
localField: "y.sometimesArray",
foreignField: "a",
as: "z",
},
},
{$unwind: "$z"},
{$project: {"y.obj": 0, "z.obj": 0}},
],
expectedResults: [
{
"_id": 0,
"a": 1,
"y": {
"_id": 0,
"alwaysArray": [],
"sometimesArray": 3,
"neverArray": 1,
},
"z": {
"_id": 2,
"a": 3,
},
},
{
"_id": 0,
"a": 1,
"y": {
"_id": 1,
"alwaysArray": [1, 2, 3],
"sometimesArray": 2,
"neverArray": 1,
},
"z": {
"_id": 1,
"a": 2,
},
},
{
"_id": 0,
"a": 1,
"y": {
"_id": 2,
"alwaysArray": [2, 3],
"sometimesArray": [3, 4],
"neverArray": 1,
},
"z": {
"_id": 2,
"a": 3,
},
},
],
expectedUsedJoinOptimization: true,
expectedNumJoinStages: 1, // We should not see a second!
});
runTestWithUnorderedComparison({
db,
description: "As field has scalar subfield, used in subsequent join => no join opt in suffix",
coll: c2,
pipeline: [
// This is ok, should use join opt.
{
$lookup: {
from: c1.getName(),
as: "y",
let: {aaa: "$a"},
pipeline: [{$match: {$expr: {$eq: ["$$aaa", "$neverArray"]}}}],
},
},
{$unwind: "$y"},
// The following is as well! We should have the whole pipeline in our eligible prefix.
{
$lookup: {
from: c3.getName(),
localField: "y.neverArray",
foreignField: "a",
as: "z",
},
},
{$unwind: "$z"},
{$project: {"y.obj": 0, "z.obj": 0}},
],
expectedResults: [
{
"_id": 0,
"a": 1,
"y": {
"_id": 0,
"alwaysArray": [],
"sometimesArray": 3,
"neverArray": 1,
},
"z": {
"_id": 0,
"a": 1,
},
},
{
"_id": 0,
"a": 1,
"y": {
"_id": 1,
"alwaysArray": [1, 2, 3],
"sometimesArray": 2,
"neverArray": 1,
},
"z": {
"_id": 0,
"a": 1,
},
},
{
"_id": 0,
"a": 1,
"y": {
"_id": 2,
"alwaysArray": [2, 3],
"sometimesArray": [3, 4],
"neverArray": 1,
},
"z": {
"_id": 0,
"a": 1,
},
},
],
expectedUsedJoinOptimization: true,
expectedNumJoinStages: 2, // Both $lookups should be pushed down!
});
runTestWithUnorderedComparison({
db,
description: "Test arrayness check works for subfields + compound join predicates.",
coll: c2,
pipeline: [
// This is ok, should use join opt.
{
$lookup: {
from: c1.getName(),
as: "y",
let: {aaa: "$a"},
pipeline: [
{
$match: {
$expr: {
$and: [
{$eq: ["$$aaa", "$neverArray"]},
{$gt: ["$sometimesArray", 0]}, // Residual predicate, should still be ok.
],
},
},
},
],
},
},
{$unwind: "$y"},
// The following should be ok as well.
{
$lookup: {
from: c3.getName(),
as: "z",
let: {ooo: "$y.obj"},
pipeline: [{$match: {$expr: {$eq: ["$obj", "$$ooo"]}}}],
},
},
{$unwind: "$z"},
// But not this (since we don't have arrayness for obj.scalar).
{
$lookup: {
from: c1.getName(),
as: "w",
let: {ooo: "$z.obj.scalar"},
pipeline: [{$match: {$expr: {$eq: ["$neverArray", "$$ooo"]}}}],
},
},
{$unwind: "$w"},
],
expectedResults: [
{
"_id": 0,
"a": 1,
"y": {
"_id": 0,
"alwaysArray": [],
"sometimesArray": 3,
"neverArray": 1,
"obj": {
"array": [1, 2, 3],
"scalar": 1,
},
},
"z": {
"_id": 0,
"a": 1,
"obj": {
"array": [1, 2, 3],
"scalar": 1,
},
},
"w": {
"_id": 0,
"alwaysArray": [],
"sometimesArray": 3,
"neverArray": 1,
"obj": {
"array": [1, 2, 3],
"scalar": 1,
},
},
},
{
"_id": 0,
"a": 1,
"y": {
"_id": 0,
"alwaysArray": [],
"sometimesArray": 3,
"neverArray": 1,
"obj": {
"array": [1, 2, 3],
"scalar": 1,
},
},
"z": {
"_id": 0,
"a": 1,
"obj": {
"array": [1, 2, 3],
"scalar": 1,
},
},
"w": {
"_id": 1,
"alwaysArray": [1, 2, 3],
"sometimesArray": 2,
"neverArray": 1,
"obj": {},
},
},
{
"_id": 0,
"a": 1,
"y": {
"_id": 0,
"alwaysArray": [],
"sometimesArray": 3,
"neverArray": 1,
"obj": {
"array": [1, 2, 3],
"scalar": 1,
},
},
"z": {
"_id": 0,
"a": 1,
"obj": {
"array": [1, 2, 3],
"scalar": 1,
},
},
"w": {
"_id": 2,
"alwaysArray": [2, 3],
"sometimesArray": [3, 4],
"neverArray": 1,
"obj": {
"array": [],
"scalar": 2,
},
},
},
],
expectedUsedJoinOptimization: true,
expectedNumJoinStages: 2,
});
// Disabling internalEnablePathArrayness should prevent join optimization from using arrayness
// info, so a query that previously qualified for joinopt must no longer qualify.
assert.commandWorked(db.adminCommand({setParameter: 1, internalEnablePathArrayness: false}));
runTestWithUnorderedComparison({
db,
description: "internalEnablePathArrayness=false => no joinopt even when arrayness is known",
coll: c1,
pipeline: [
{
$lookup: {
from: c2.getName(),
localField: "neverArray",
foreignField: "a",
as: "x",
},
},
{$unwind: "$x"},
{$project: {_id: 0, obj: 0}},
],
expectedResults: [
{"alwaysArray": [], "sometimesArray": 3, "neverArray": 1, "x": {"_id": 0, "a": 1}},
{"alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1, "x": {"_id": 0, "a": 1}},
{"alwaysArray": [2, 3], "sometimesArray": [3, 4], "neverArray": 1, "x": {"_id": 0, "a": 1}},
],
expectedUsedJoinOptimization: false,
});
});
MongoRunner.stopMongod(conn);

View File

@ -2,8 +2,9 @@
// Validate that join optimization does not run on sharded collections.
//
// @tags: [
// requires_fcv_83,
// requires_sbe
// requires_fcv_90,
// requires_sbe,
// featureFlagPathArrayness
// ]
//
import {joinOptUsed} from "jstests/libs/query/join_utils.js";
@ -30,10 +31,10 @@ const db = sharded.getDB("test");
sharded.shard0.getDB("test").setLogLevel(5, "query");
sharded.shard1.getDB("test").setLogLevel(5, "query");
const docs = [{f1: "aaa", f2: 123}, {f1: "bbb", f2: 0}, {f2: -1}, {f1: "zzz"}];
assert.commandWorked(db["coll1"].insertMany(docs));
assert.commandWorked(db["coll2"].insertMany(docs));
assert.commandWorked(db["coll3"].insertMany(docs));
assert.commandWorked(db["coll4"].insertMany(docs));
for (const coll of ["coll1", "coll2", "coll3", "coll4"]) {
assert.commandWorked(db[coll].insertMany(docs));
assert.commandWorked(db[coll].createIndex({"dummy": 1, "f1": 1, "f2": -1}));
}
// Ensure join optimization is disabled.
assert(sharded.shard0.getDB("test").adminCommand({setParameter: 1, internalEnableJoinOptimization: false}));

View File

@ -1,7 +1,9 @@
//
// Test that the cardinality estimates for two-table joins approximate reality
// @tags: [
// requires_sbe
// requires_sbe,
// featureFlagPathArrayness,
// requires_fcv_90
// ]
//
@ -33,6 +35,7 @@ function populate() {
});
}
db.many_rows.drop();
db.many_rows.insertMany(documents);
db.many_rows.createIndex({i_idx: 1});
db.many_rows.createIndex({i_idx_offset: 1});
@ -40,16 +43,23 @@ function populate() {
db.many_rows.createIndex({c_idx: 1});
db.many_rows.createIndex({d_idx: 1});
db.many_rows.createIndex({n_idx: 1});
// Not used in planning, but needed for multikeyness info.
db.many_rows.createIndex({dummy: 1, i_noidx: -1, missing_field: 1});
// An empty collection
db.no_rows.drop();
db.no_rows.createIndex({i_idx: 1});
// Not used in planning, but needed for multikeyness info.
db.no_rows.createIndex({dummy: 1, i_idx_offset: -1});
// Collection with a single row
db.one_row.drop();
db.one_row.insert({i_idx: 1});
db.one_row.createIndex({i_idx: 1});
// Collection with 1 non-null document
const nullDocuments = [];
db.mostly_nulls.drop();
db.mostly_nulls.insert({i_idx: 1});
for (let i = 0; i < collSize; i++) {
nullDocuments.push({

View File

@ -2,8 +2,9 @@
* Tests hinting joins orders.
*
* @tags: [
* requires_fcv_83,
* requires_sbe
* requires_fcv_90,
* requires_sbe,
* featureFlagPathArrayness
* ]
*/
import {normalizeArray} from "jstests/libs/query_optimization/golden_test.js";

View File

@ -37,7 +37,7 @@ const thirdColl = db[jsTestName() + "_third"];
thirdColl.drop();
assert.commandWorked(thirdColl.insertMany(testDocs));
// Add index for multikeyness info for path arrayness.
assert.commandWorked(thirdColl.createIndex({dummy: 1, "key.foo": 1}));
assert.commandWorked(thirdColl.createIndex({dummy: 1, "foo": 1, "key.foo": 1}));
const testCases = [
{

View File

@ -316,6 +316,9 @@ const animalsDocs = [
assert.commandWorked(locations.insertMany(locationsDocs));
assert.commandWorked(animals.insertMany(animalsDocs));
// Dummy indexes used for multikeyness info by join opt.
assert.commandWorked(animals.createIndex({"dummy": -1, "locationName": -1}));
assert.commandWorked(locations.createIndex({"dummy": 1, "name": -1}));
outputPipelineAndSlowQueryLog(
animals,
[

View File

@ -974,6 +974,7 @@ mongo_cc_library(
"//src/mongo/db/query/compiler/ce/sampling:ce_multikey_dotted_path_support",
"//src/mongo/db/query/compiler/ce/sampling:sampling_estimator_interface",
"//src/mongo/db/query/compiler/ce/sampling:sampling_math",
"//src/mongo/db/query/compiler/dependency_analysis:pipeline_dependency_graph",
"//src/mongo/db/query/compiler/optimizer/cost_based_ranker:estimates",
"//src/mongo/db/query/compiler/optimizer/join:plan_enumerator",
"//src/mongo/db/query/compiler/optimizer/join:reorder_joins",

View File

@ -34,10 +34,10 @@
#include "mongo/db/pipeline/document_source_geo_near.h"
#include "mongo/db/pipeline/document_source_internal_join_hint.h"
#include "mongo/db/pipeline/document_source_lookup.h"
#include "mongo/db/pipeline/document_source_sort.h"
#include "mongo/db/pipeline/expression_context_builder.h"
#include "mongo/db/pipeline/pipeline_d.h"
#include "mongo/db/pipeline/pipeline_factory.h"
#include "mongo/db/query/compiler/dependency_analysis/pipeline_dependency_graph.h"
#include "mongo/db/query/compiler/optimizer/join/path_resolver.h"
#include "mongo/db/query/compiler/optimizer/join/predicate_extractor.h"
#include "mongo/db/query/util/disjoint_set.h"
@ -222,6 +222,67 @@ Status addExprJoinPredicates(MutableJoinGraph& graph,
return Status::OK();
}
/**
* Helper function to determine the arrayness of a field that may have been modified by the pipeline
* while tracking "as" path arrayness. Note: 'expCtx' must be non-const since the arrayness check
* updates state that provides a non-multikey guarantee for any field we check the arrayness of.
*
* TODO SERVER-123929: replace this function once dependency analysis supports tracking arrayness of
* lookup "as" fields.
*/
bool canPipelinePathBeArray(const pipeline::dependency_graph::DependencyGraph& pipelineBaseCollDeps,
ExpressionContext* expCtx,
DocumentSource* ds,
const FieldPath& fp) {
auto path = fp.fullPath();
auto* declStage = pipelineBaseCollDeps.getDeclaringStage(ds, path).get();
tassert(11371801, "Expected stage to differ", declStage != ds);
if (auto* originLookup = dynamic_cast<DocumentSourceLookUp*>(declStage); originLookup) {
// The "as" field produced by a previous $lookup cannot be an array, since any previous
// $lookup must have an $unwind + be eligible for join-optimization (i.e. be part of the
// prefix).
auto asField = originLookup->getAsField();
if (fp == asField) {
return false;
}
if (asField.isPrefixOf(fp)) {
// This is a sub-field of the $lookup's "as" field- we need to look at the secondary
// collection to learn about its arrayness.
// TODO SERVER-123953: We will need to actually look at a dependency graph here the
// second we support any subpipeline more complex than a single $match stage.
return expCtx->canPathBeArrayForNss(fp.subtractPrefix(asField.getPathLength()),
originLookup->getFromNs());
}
tassert(11371800,
"It should not be possible for a $lookup to modify a field unrelated to its "
"'as' field",
fp.isPrefixOf(asField));
// We're in a scenario where our "as" field is something like "a.b", vs the join predicate
// field we're looking at is in fact field "a". We should verify the arrayness of field "a"
// at the point when it was last modified.
return canPipelinePathBeArray(pipelineBaseCollDeps, expCtx, declStage, fp);
}
// If this path doesn't originate from a $lookup, we can just check the base coll deps.
return pipelineBaseCollDeps.canPathBeArray(ds, path);
};
/**
* Validates that neither field in the join predicate can include arrays.
* TODO SERVER-123953: Use a dependency graph instead of directly accessing foreign path arrayness.
*/
bool canJoinPredicateIncludeArrays(const pipeline::dependency_graph::DependencyGraph& baseCollDeps,
ExpressionContext* expCtx,
DocumentSource* ds,
const FieldPath& localField,
const NamespaceString& foreignNs,
const FieldPath& foreignField) {
return canPipelinePathBeArray(baseCollDeps, expCtx, ds, localField) ||
expCtx->canPathBeArrayForNss(foreignField, foreignNs);
}
} // namespace
bool AggJoinModel::pipelineEligibleForJoinReordering(const Pipeline& pipeline) {
@ -257,6 +318,15 @@ StatusWith<AggJoinModel> AggJoinModel::constructJoinModel(const Pipeline& pipeli
hint = suffix->popFront();
}
// Initialize deps after popping the $hint stage, but BEFORE we try to push a pipeline prefix
// into our base collection CQ. This is important so we don't miss (for instance) $projects at
// the start of the pipeline that might rename fields.
auto canMainCollPathBeArray = [clonedExpCtx, &nss](StringData path) {
return clonedExpCtx->canPathBeArrayForNss(FieldRef(path), nss);
};
pipeline::dependency_graph::DependencyGraph mainCollDeps(suffix->getSources(),
canMainCollPathBeArray);
ExpressionContext::PlanCacheOptions oldPlanCache = expCtx->getPlanCache();
expCtx->setPlanCache(ExpressionContext::PlanCacheOptions::kDisablePlanCache);
auto swCQ = createCanonicalQuery(expCtx, nss, *suffix);
@ -307,6 +377,18 @@ StatusWith<AggJoinModel> AggJoinModel::constructJoinModel(const Pipeline& pipeli
break;
}
// Ensure that neither local nor foreign field can include arrays (if present).
if (lookup->hasLocalFieldForeignFieldJoin() &&
canJoinPredicateIncludeArrays(mainCollDeps,
clonedExpCtx.get(),
lookup,
*lookup->getLocalField(),
lookup->getFromNs(),
*lookup->getForeignField())) {
// End prefix here, this join predicate might include arrays.
break;
}
// Attempt to extract join predicates and single table predicates from the $lookup
// expressed as $expr in $match stage. If there is no subpipeline, this returns no join
// predicates and a CanonicalQuery with empty predicate. If this returns a bad status,
@ -315,10 +397,30 @@ StatusWith<AggJoinModel> AggJoinModel::constructJoinModel(const Pipeline& pipeli
if (!swPreds.isOK()) {
break;
}
auto preds = std::move(swPreds.getValue());
auto foreignNodeId = graph.addNode(lookup->getFromNs(),
std::move(swPreds.getValue().canonicalQuery),
lookup->getAsField());
// Similar check as above, but now for predicates extracted from the sub-pipeline.
if (std::any_of(
preds.joinPredicates.begin(), preds.joinPredicates.end(), [&](auto&& jp) {
return canJoinPredicateIncludeArrays(mainCollDeps,
clonedExpCtx.get(),
lookup,
jp.localField(),
lookup->getFromNs(),
jp.foreignField());
})) {
// Some field in a join predicate introduced by a $expr $match in a sub-pipeline
// might have array values. End prefix here.
break;
}
// If we get here, it means we're ready to modify the join graph to include this
// $lookup. Once the join graph has been modified, any failure case should cause us to
// bail out of join optimization completely, rather than just ending the prefix here
// (since we've already partially incorporated the current join).
auto foreignNodeId = graph.addNode(
lookup->getFromNs(), std::move(preds.canonicalQuery), lookup->getAsField());
if (!foreignNodeId) {
return Status(ErrorCodes::BadValue, "Graph is too big: too many nodes");
@ -351,7 +453,7 @@ StatusWith<AggJoinModel> AggJoinModel::constructJoinModel(const Pipeline& pipeli
// Add join predicates expressed as $expr in subpipelines to join graph.
auto status = addExprJoinPredicates(
graph, swPreds.getValue().joinPredicates, pathResolver, *foreignNodeId);
graph, std::move(preds.joinPredicates), pathResolver, *foreignNodeId);
if (!status.isOK()) {
return status;
}

View File

@ -30,7 +30,9 @@
#pragma once
#include "mongo/db/pipeline/aggregation_context_fixture.h"
#include "mongo/db/query/compiler/metadata/path_arrayness.h"
#include "mongo/db/query/compiler/optimizer/join/agg_join_model.h"
#include "mongo/idl/server_parameter_test_controller.h"
#include "mongo/util/modules.h"
namespace mongo::join_ordering {
@ -54,7 +56,41 @@ public:
std::unique_ptr<Pipeline> makePipelineOfSize(size_t numJoins);
/**
* Marks the given fields as non-array (scalar) in the pipeline's ExpressionContext.
* 'mainCollFields' are fields on the main collection; 'secondaryCollFieldMap' maps secondary
* collection names to their fields.
*/
static void markFieldsAsScalar(
Pipeline& pipeline,
const std::vector<StringData>& mainCollFields,
const StringMap<std::vector<StringData>>& secondaryCollFieldMap) {
auto expCtx = pipeline.getContext();
auto mainPathArrayness = std::make_shared<PathArrayness>();
for (const auto& field : mainCollFields) {
mainPathArrayness->addPath(
FieldPath(field), MultikeyComponents{}, /*isFullRebuild=*/true);
}
expCtx->setPathArraynessForNss(expCtx->getNamespaceString(), std::move(mainPathArrayness));
for (const auto& [collName, fields] : secondaryCollFieldMap) {
auto pathArrayness = std::make_shared<PathArrayness>();
for (const auto& field : fields) {
pathArrayness->addPath(
FieldPath(field), MultikeyComponents{}, /*isFullRebuild=*/true);
}
expCtx->setPathArraynessForNss(
NamespaceString::createNamespaceString_forTest("test", collName),
std::move(pathArrayness));
}
}
const AggModelBuildParams defaultBuildParams{.maxNumberNodesConsideredForImplicitEdges =
kMaxNumberNodesConsideredForImplicitEdges};
private:
// Ensure path arrayness is enabled for all tests.
RAIIServerParameterControllerForTest queryKnobController{"featureFlagPathArrayness", true};
};
} // namespace mongo::join_ordering

View File

@ -84,12 +84,14 @@ TEST_F(AggJoinModelGoldenTest, longPrefix) {
{$unwind: "$fromB"}
])";
auto pipeline = makePipeline(query, {"A", "B"});
markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}, {"B", {"b"_sd}}});
auto joinModel = runVariation(std::move(pipeline), "longPrefix");
ASSERT_OK(joinModel);
}
TEST_F(AggJoinModelGoldenTest, veryLargePipeline) {
auto pipeline = makePipelineOfSize(/*numJoins*/ kHardMaxNodesInJoin + 3);
markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}});
auto joinModel = runVariation(std::move(pipeline), "veryLargePipeline");
ASSERT_OK(joinModel);
}
@ -106,6 +108,7 @@ TEST_F(AggJoinModelGoldenTest, addImplicitEdges_OneImplictEdge) {
{$unwind: "$fromB"}
])";
auto pipeline = makePipeline(query, {"A", "B"});
markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}, {"B", {"b"_sd}}});
auto joinModel = runVariation(std::move(pipeline), "addImplicitEdges_OneImplictEdge");
ASSERT_OK(joinModel);
ASSERT_EQ(joinModel.getValue().graph.numNodes(), 3);
@ -126,6 +129,7 @@ TEST_F(AggJoinModelGoldenTest, addImplicitEdges_MultipleImplictEdges) {
{$unwind: "$fromC"}
])";
auto pipeline = makePipeline(query, {"A", "B", "C"});
markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"a"_sd}}, {"B", {"b"_sd}}, {"C", {"c"_sd}}});
auto joinModel = runVariation(std::move(pipeline), "addImplicitEdges_MultipleImplictEdges");
ASSERT_OK(joinModel);
ASSERT_EQ(joinModel.getValue().graph.numNodes(), 4);
@ -154,6 +158,13 @@ TEST_F(AggJoinModelGoldenTest, addImplicitEdges_TwoConnectedComponents) {
{$unwind: "$fromE"}
])";
auto pipeline = makePipeline(query, {"A", "B", "C", "D", "E"});
markFieldsAsScalar(*pipeline,
{"a"_sd},
{{"A", {"a"_sd}},
{"B", {"b"_sd}},
{"C", {"c"_sd, "d"_sd}},
{"D", {"d"_sd}},
{"E", {"e"_sd}}});
auto joinModel = runVariation(std::move(pipeline), "addImplicitEdges_TwoConnectedComponents");
ASSERT_OK(joinModel);
ASSERT_EQ(joinModel.getValue().graph.numNodes(), 6);
@ -179,6 +190,13 @@ TEST_F(AggJoinModelGoldenTest, addImplicitEdges_NoImplicitEdges) {
{$unwind: "$fromE"}
])";
auto pipeline = makePipeline(query, {"A", "B", "C", "D", "E"});
markFieldsAsScalar(*pipeline,
{"a"_sd},
{{"A", {"a"_sd, "b"_sd}},
{"B", {"b"_sd, "c"_sd}},
{"C", {"c"_sd, "d"_sd}},
{"D", {"d"_sd, "e"_sd}},
{"E", {"e"_sd}}});
auto joinModel = runVariation(std::move(pipeline), "addImplicitEdges_NoImplicitEdges");
ASSERT_OK(joinModel);
ASSERT_EQ(joinModel.getValue().graph.numNodes(), 6);
@ -209,6 +227,9 @@ TEST_F(AggJoinModelGoldenTest, addEdgesFromExpr_predicatesAtEnd) {
}
])";
auto pipeline = makePipeline(query, {"A", "B", "C", "D"});
markFieldsAsScalar(*pipeline,
{"s1"_sd, "s2"_sd, "s3"_sd, "s4"_sd},
{{"A", {"s1"_sd}}, {"B", {"s2"_sd}}, {"C", {"s3"_sd}}, {"D", {"s4"_sd}}});
auto joinModel = runVariation(std::move(pipeline), "addEdgesFromExpr_predicatesAtEnd");
ASSERT_OK(joinModel);
ASSERT_EQ(joinModel.getValue().graph.numNodes(), 5);
@ -236,6 +257,9 @@ TEST_F(AggJoinModelGoldenTest, addEdgesFromExpr_predicatesInBetween) {
{$match: {$expr: {$eq: ["$fromD.d", "$fromA.d"]}}}
])";
auto pipeline = makePipeline(query, {"A", "B", "C", "D"});
markFieldsAsScalar(*pipeline,
{"s1"_sd, "s2"_sd, "s3"_sd, "s4"_sd},
{{"A", {"s1"_sd}}, {"B", {"s2"_sd}}, {"C", {"s3"_sd}}, {"D", {"s4"_sd}}});
auto joinModel = runVariation(std::move(pipeline), "addEdgesFromExpr_predicatesInBetween");
ASSERT_OK(joinModel);
ASSERT_EQ(joinModel.getValue().graph.numNodes(), 5);
@ -265,6 +289,9 @@ TEST_F(AggJoinModelGoldenTest, addEdgesFromExpr_earlyEnd) {
{$unwind: "$fromD"}
])";
auto pipeline = makePipeline(query, {"A", "B", "C", "D"});
markFieldsAsScalar(*pipeline,
{"s1"_sd, "s2"_sd, "s3"_sd, "s4"_sd},
{{"A", {"s1"_sd}}, {"B", {"s2"_sd}}, {"C", {"s3"_sd}}, {"D", {"s4"_sd}}});
auto joinModel = runVariation(std::move(pipeline), "addEdgesFromExpr_earlyEnd");
ASSERT_OK(joinModel);
ASSERT_EQ(joinModel.getValue().graph.numNodes(), 3);
@ -294,6 +321,12 @@ TEST_F(AggJoinModelGoldenTest, addEdgesFromExpr_addImplicitEdge) {
{$match: {$expr: {$eq: ["$fromB.b", "$fromC.c"]}}}
])";
auto pipeline = makePipeline(query, {"A", "B", "C", "D"});
markFieldsAsScalar(*pipeline,
{"a"_sd},
{{"A", {"a"_sd, "b"_sd}},
{"B", {"b"_sd, "s"_sd}},
{"C", {"s"_sd, "c"_sd}},
{"D", {"d"_sd}}});
auto joinModel = runVariation(std::move(pipeline), "addEdgesFromExpr_addImplicitEdge");
ASSERT_OK(joinModel);
ASSERT_EQ(joinModel.getValue().graph.numNodes(), 5);
@ -336,6 +369,12 @@ TEST_F(AggJoinModelGoldenTest, subPipelineEdge_addImplicitEdge) {
{$unwind: "$fromD"}
])";
auto pipeline = makePipeline(query, {"A", "B", "C", "D"});
markFieldsAsScalar(*pipeline,
{"a"_sd},
{{"A", {"a"_sd, "b"_sd}},
{"B", {"b"_sd, "s"_sd}},
{"C", {"s"_sd, "c"_sd}},
{"D", {"d"_sd, "a"_sd}}});
auto joinModel = runVariation(std::move(pipeline), "subPipelineEdge_addImplicitEdge");
ASSERT_OK(joinModel);
ASSERT_EQ(joinModel.getValue().graph.numNodes(), 5);
@ -369,6 +408,10 @@ TEST_F(AggJoinModelGoldenTest, addEdgesFromExpr_subPipelineEdge_addImplicitEdge)
{$match: {$expr: {$eq: ["$fromA.a", "$fromB.a"]}}}
])";
auto pipeline = makePipeline(query, {"A", "B", "C"});
markFieldsAsScalar(
*pipeline,
{"a"_sd},
{{"A", {"a"_sd, "b"_sd}}, {"B", {"b"_sd, "c"_sd, "a"_sd}}, {"C", {"c"_sd, "a"_sd}}});
auto joinModel =
runVariation(std::move(pipeline), "addEdgesFromExpr_subPipelineEdge_addImplicitEdge");
ASSERT_OK(joinModel);

View File

@ -65,6 +65,7 @@ TEST_F(PipelineAnalyzerTest, PipelinePrefixEligibleForJoinReorderingNoLocalForei
])";
auto pipeline = makePipeline(query, {"A", "B"});
markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}});
// This pipeline's prefix is eligible for reordering.
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));
@ -82,6 +83,7 @@ TEST_F(PipelineAnalyzerTest, PipelineEligibleForJoinReorderingSingleLookupUnwind
])";
auto pipeline = makePipeline(query, {"A"});
markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}});
// This pipeline is eligible for reordering.
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));
@ -123,6 +125,7 @@ TEST_F(PipelineAnalyzerTest, TwoLookupUnwinds) {
])";
auto pipeline = makePipeline(query, {"A", "B"});
markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}, {"B", {"b"_sd}}});
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));
@ -142,6 +145,7 @@ TEST_F(PipelineAnalyzerTest, MatchOnMainCollection) {
])";
auto pipeline = makePipeline(query, {"A", "B"});
markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}, {"B", {"b"_sd}}});
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));
@ -163,6 +167,7 @@ TEST_F(PipelineAnalyzerTest, MatchInSubPipeline) {
])";
auto pipeline = makePipeline(query, {"A", "B"});
markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}, {"B", {"b"_sd}}});
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));
@ -187,6 +192,7 @@ TEST_F(PipelineAnalyzerTest, GroupOnMainCollection) {
])";
auto pipeline = makePipeline(query, {"A", "B"});
markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}, {"B", {"b"_sd}}});
// We don't detect ineligibility here.
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));
@ -207,6 +213,7 @@ TEST_F(PipelineAnalyzerTest, ConflictingLocalFields) {
])";
auto pipeline = makePipeline(query, {"B", "C"});
markFieldsAsScalar(*pipeline, {"x"_sd, "a"_sd}, {{"B", {"y"_sd}}, {"C", {"z"_sd}}});
// We don't detect ineligibility of local path fields here.
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));
auto swJoinModel = AggJoinModel::constructJoinModel(*pipeline, defaultBuildParams);
@ -241,6 +248,8 @@ TEST_F(PipelineAnalyzerTest, ConflictingLocalFieldExprSyntax) {
])";
auto pipeline = makePipeline(query, {"B", "A"});
markFieldsAsScalar(
*pipeline, {"x"_sd, "foo"_sd, "bar"_sd}, {{"B", {"y"_sd}}, {"A", {"foo"_sd, "bar"_sd}}});
// We don't detect ineligibility of local path fields here.
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));
auto swJoinModel = AggJoinModel::constructJoinModel(*pipeline, defaultBuildParams);
@ -256,6 +265,7 @@ TEST_F(PipelineAnalyzerTest, CompatibleAsFields) {
{$unwind: "$x.z"}
])";
auto pipeline = makePipeline(query, {"B", "C"});
markFieldsAsScalar(*pipeline, {"x.c"_sd}, {{"B", {"c"_sd, "d"_sd}}, {"C", {"d"_sd}}});
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));
auto swJoinModel = AggJoinModel::constructJoinModel(*pipeline, defaultBuildParams);
@ -273,6 +283,7 @@ TEST_F(PipelineAnalyzerTest, GroupInMiddleIneligible) {
])";
auto pipeline = makePipeline(query, {"A", "B"});
markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}});
// We don't detect ineligibility here.
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));
@ -295,6 +306,7 @@ TEST_F(PipelineAnalyzerTest, GroupInSubPipeline) {
])";
auto pipeline = makePipeline(query, {"A", "B"});
markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}, {"B", {"b"_sd}}});
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));
@ -367,6 +379,7 @@ TEST_F(PipelineAnalyzerTest, IneligibleSubPipelineStage) {
])";
auto pipeline = makePipeline(query, {"A", "B"});
markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}, {"B", {"b"_sd}}});
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));
@ -439,6 +452,7 @@ TEST_F(PipelineAnalyzerTest, LongPrefix) {
])";
auto pipeline = makePipeline(query, {"A", "B"});
markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}, {"B", {"b"_sd}}});
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));
@ -460,6 +474,7 @@ TEST_F(PipelineAnalyzerTest, PipelineInEligibleForSortStage) {
])";
auto pipeline = makePipeline(sortPrefixQuery, {"A", "B"});
markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}, {"B", {"b"_sd}}});
// This is not where we examine the pipeline for a $sort stage.
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));
auto status = AggJoinModel::constructJoinModel(*pipeline, defaultBuildParams).getStatus();
@ -492,6 +507,7 @@ TEST_F(PipelineAnalyzerTest, LocalFieldOverride) {
])";
auto pipeline = makePipeline(query, {"A", "B"});
markFieldsAsScalar(*pipeline, {"a"_sd, "b"_sd}, {{"A", {"b"_sd}}, {"B", {"b"_sd}}});
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));
@ -504,6 +520,7 @@ TEST_F(PipelineAnalyzerTest, LocalFieldOverride) {
TEST_F(PipelineAnalyzerTest, tooManyNodes) {
static constexpr size_t numJoins = 5;
auto pipeline = makePipelineOfSize(numJoins);
markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}});
// Configure the buildParams that one $lookup/$unwind pair is forced to the suffix because the
// maximum number of nodes is hit.
AggModelBuildParams buildParams{
@ -519,6 +536,7 @@ TEST_F(PipelineAnalyzerTest, tooManyNodes) {
TEST_F(PipelineAnalyzerTest, tooManyEdges) {
static constexpr size_t numJoins = 5;
auto pipeline = makePipelineOfSize(numJoins);
markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}});
// Configure the buildParams that one $lookup/$unwind pair is forced to the suffix because the
// maximum number of edges is hit.
AggModelBuildParams buildParams{
@ -557,6 +575,7 @@ TEST_F(PipelineAnalyzerTest, SingleJoinCompoundPredicate) {
])";
auto pipeline = makePipeline(query, {"A"});
markFieldsAsScalar(*pipeline, {"foo"_sd, "bar"_sd}, {{"A", {"foo"_sd, "bar"_sd}}});
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));
@ -617,6 +636,9 @@ TEST_F(PipelineAnalyzerTest, CompoundJoinKeyWithLocalForeignSyntax) {
])";
auto pipeline = makePipeline(query, {"A", "B"});
markFieldsAsScalar(*pipeline,
{"foo"_sd, "bar"_sd},
{{"A", {"foo"_sd, "bar"_sd}}, {"B", {"foo"_sd, "bar"_sd}}});
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));
@ -676,6 +698,8 @@ TEST_F(PipelineAnalyzerTest, DuplicateExprEqAndEqEdges) {
])";
auto pipeline = makePipeline(query, {"A", "B", "C"});
markFieldsAsScalar(
*pipeline, {"bar"_sd}, {{"A", {"bar"_sd}}, {"B", {"bar"_sd}}, {"C", {"bar"_sd}}});
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));
@ -721,6 +745,7 @@ TEST_F(PipelineAnalyzerTest, ExprOnlyImplicitEdges) {
])";
auto pipeline = makePipeline(query, {"A", "B"});
markFieldsAsScalar(*pipeline, {"bar"_sd}, {{"A", {"bar"_sd}}, {"B", {"bar"_sd}}});
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));
@ -752,6 +777,7 @@ TEST_F(PipelineAnalyzerTest, PipelineIneligibleWithCorrelatedNonJoinPredicate) {
])";
auto pipeline = makePipeline(query, {"A", "B"});
markFieldsAsScalar(*pipeline, {"foo"_sd}, {{"A", {"foo"_sd}}});
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));
@ -781,6 +807,7 @@ TEST_F(PipelineAnalyzerTest, PipelineIneligibleWithNonFieldPathVariable) {
])";
auto pipeline = makePipeline(query, {"A", "B"});
markFieldsAsScalar(*pipeline, {"foo"_sd}, {{"A", {"foo"_sd}}});
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));