diff --git a/jstests/noPassthroughWithMongod/query/cbr/cbr_fallback.js b/jstests/noPassthroughWithMongod/query/cbr/cbr_fallback.js index 472dda69ddc..dc5604a6f8b 100644 --- a/jstests/noPassthroughWithMongod/query/cbr/cbr_fallback.js +++ b/jstests/noPassthroughWithMongod/query/cbr/cbr_fallback.js @@ -1,8 +1,15 @@ /** * Verify that collection, index and query types unsupported by CBR fallback to multiplanning. */ -import {getAllPlans, getPlanStages, isExpress} from "jstests/libs/query/analyze_plan.js"; -import {assertPlanNotCosted} from "jstests/libs/query/cbr_utils.js"; +import { + getAllPlans, + getPlanStages, + getWinningPlanFromExplain, + isExpress, + isSubplannerCompositePlan, + planHasStage, +} from "jstests/libs/query/analyze_plan.js"; +import {assertPlanCosted, assertPlanNotCosted} from "jstests/libs/query/cbr_utils.js"; import {checkSbeFullyEnabled} from "jstests/libs/query/sbe_util.js"; // TODO SERVER-92589: Remove this exemption @@ -212,6 +219,89 @@ function testSortKeyGenerator() { assert.commandWorked(coll.dropIndexes()); } +// Maximum $in-list size that CBR will estimate. Must match plan_ranking::kMaxInListSize. +const kMaxInListSize = 2048; + +function testLargeInList() { + const bulk = coll.initializeUnorderedBulkOp(); + for (let i = 0; i < kMaxInListSize + 1000; i++) { + bulk.insert({a: i, b: i % 100}); + } + assert.commandWorked(bulk.execute()); + + // Two indexes on different fields so the planner enumerates at least two plans + // (one per index) when both fields appear in the query predicate. + assert.commandWorked(coll.createIndexes([{a: 1}, {b: 1}])); + + // Use samplingCE so the test does not depend on histograms. + const prevCEMode = assert.commandWorked(db.adminCommand({setParameter: 1, internalQueryCBRCEMode: "samplingCE"})); + + // Two non-overlapping small $in-lists and one large $in-list reused across all sub-tests. + const smallIn1 = Array.from({length: 100}, (_, i) => i); + const smallIn2 = Array.from({length: 100}, (_, i) => i + 100); + const largeIn = Array.from({length: kMaxInListSize + 1}, (_, i) => i); + + // Runs a find query and asserts that all enumerated plans are costed (or not costed). + function testQuery(query, cbrExpected) { + const explain = coll.find(query).explain(); + const plans = getAllPlans(explain); + assert.gte(plans.length, 2, "Expected at least two plans"); + plans.forEach(cbrExpected ? assertPlanCosted : assertPlanNotCosted); + } + + // Small $in-list: CBR should be able to estimate it. + testQuery({a: {$in: smallIn1}, b: {$lt: 50}}, true); + + // Large $in-list (> kMaxInListSize elements): CBR should fall back to multiplanning. + testQuery({a: {$in: largeIn}, b: {$lt: 50}}, false); + + // $or with small $in in both branches: CBR should be used. + // The $or is combined with a top-level predicate on 'b' so the query goes through the regular + // planner (not subplanning). With indexes {a: 1} and {b: 1}, the planner enumerates at + // least two candidate plans. containsLargeInList walks the full expression tree, so it detects + // $in-lists inside $or branches. + testQuery({b: {$lt: 50}, $or: [{a: {$in: smallIn1}}, {a: {$in: smallIn2}}]}, true); + + // $or where the second branch has a large $in-list: should fall back to multiplanning. + testQuery({b: {$lt: 50}, $or: [{a: {$in: smallIn1}}, {a: {$in: largeIn}}]}, false); + + // Rooted $or (subplanner path): each branch has predicates on 'a' and 'b' so that with + // indexes {a: 1} and {b: 1} each branch independently has at least two candidate plans. + // The subplanner combines per-branch winners into a single composite plan. We verify the + // subplanner is used and the plan structure is correct. Note: the subplanner composite + // explain does not expose costEstimate on per-branch plans, so we can only exercise the code + // path, veryfy it via other means, and verify structural plan properties here. + function testRootedOrQuery(query) { + const explain = coll.find(query).explain(); + assert(isSubplannerCompositePlan(explain), "Expected subplanner composite plan"); + const winningPlan = getWinningPlanFromExplain(explain); + assert(planHasStage(db, winningPlan, "OR"), "Expected OR stage in subplanner composite plan"); + const ixscans = getPlanStages(winningPlan, "IXSCAN"); + assert.eq(ixscans.length, 2, "Expected one IXSCAN per $or branch"); + } + + // Rooted $or with small $in in both branches: exercises the CBR-per-branch code path. + testRootedOrQuery({ + $or: [ + {a: {$in: smallIn1}, b: {$lt: 50}}, + {a: {$in: smallIn2}, b: {$lt: 50}}, + ], + }); + + // Rooted $or where the second branch has a large $in: exercises the multiplanning-per-branch + // fallback code path in the subplanner. + testRootedOrQuery({ + $or: [ + {a: {$in: smallIn1}, b: {$lt: 50}}, + {a: {$in: largeIn}, b: {$lt: 50}}, + ], + }); + + // Restore CE mode for the remaining tests. + assert.commandWorked(db.adminCommand({setParameter: 1, internalQueryCBRCEMode: prevCEMode.was})); + assert.commandWorked(coll.dropIndexes()); +} + function testDistictScan() { assert.commandWorked(coll.createIndex({a: 1, b: 1})); const explain = coll.explain().aggregate([{$sort: {a: 1, b: 1}}, {$group: {_id: "$a", f: {$first: "$b"}}}]); @@ -244,6 +334,7 @@ try { testReturnKey(); testSortKeyGenerator(); testDistictScan(); + testLargeInList(); } finally { // Ensure that query knob doesn't leak into other testcases in the suite. assert.commandWorked(db.adminCommand({setParameter: 1, featureFlagCostBasedRanker: false})); diff --git a/src/mongo/db/exec/classic/subplan.cpp b/src/mongo/db/exec/classic/subplan.cpp index 7c15d96af85..635e1855749 100644 --- a/src/mongo/db/exec/classic/subplan.cpp +++ b/src/mongo/db/exec/classic/subplan.cpp @@ -34,6 +34,7 @@ #include "mongo/bson/bsonobj.h" #include "mongo/db/exec/plan_cache_util.h" #include "mongo/db/matcher/expression.h" +#include "mongo/db/matcher/expression_algo.h" #include "mongo/db/query/collection_query_info.h" #include "mongo/db/query/compiler/ce/exact/exact_cardinality_impl.h" #include "mongo/db/query/compiler/ce/sampling/sampling_estimator_impl.h" @@ -41,6 +42,7 @@ #include "mongo/db/query/plan_cache/classic_plan_cache.h" #include "mongo/db/query/plan_cache/plan_cache.h" #include "mongo/db/query/plan_cache/plan_cache_key_factory.h" +#include "mongo/db/query/plan_ranking/cbr_plan_ranking.h" #include "mongo/db/query/query_planner.h" #include "mongo/db/query/stage_builder/stage_builder_util.h" #include "mongo/util/assert_util.h" @@ -237,6 +239,8 @@ Status SubplanStage::pickBestPlan(const QueryPlannerParams& plannerParams, } } + // Run the plan enumerator for each of the $or branches thus enumerating all plans for each + // $or branch. auto subplanningStatus = samplingEstimator ? QueryPlanner::planSubqueries(expCtx()->getOperationContext(), getSolutionCachedData, @@ -258,7 +262,10 @@ Status SubplanStage::pickBestPlan(const QueryPlannerParams& plannerParams, // If the plan ranking is a CBR strategy, plan each branch of the $or using the respective // cost-based ranking. Multiplanning and automaticCE strategy plan each branch // of the $or using multiplanning as defined in the multiplanCallback below. - bool useMultiplanner = !cbrEnabled || rankerMode == QueryPlanRankerModeEnum::kAutomaticCE; + // Disable CBR for queries with large $in lists. + bool useMultiplanner = !cbrEnabled || rankerMode == QueryPlanRankerModeEnum::kAutomaticCE || + expression::containsLargeInList(*_query->getPrimaryMatchExpression(), + plan_ranking::kMaxInListSize); if (!useMultiplanner && subplanningStatus.isOK()) { if (rankerMode == QueryPlanRankerModeEnum::kSamplingCE) { // If we do not have any fields that we want to sample then we just include all the diff --git a/src/mongo/db/matcher/expression_algo.cpp b/src/mongo/db/matcher/expression_algo.cpp index 108179fbb2d..1f97b78baca 100644 --- a/src/mongo/db/matcher/expression_algo.cpp +++ b/src/mongo/db/matcher/expression_algo.cpp @@ -47,6 +47,8 @@ #include "mongo/db/matcher/expression_path.h" #include "mongo/db/matcher/expression_tree.h" #include "mongo/db/matcher/expression_type.h" +#include "mongo/db/matcher/expression_visitor.h" +#include "mongo/db/matcher/match_expression_walker.h" #include "mongo/db/matcher/matcher_type_set.h" #include "mongo/db/query/collation/collation_index_key.h" #include "mongo/db/query/collation/collator_interface.h" @@ -914,6 +916,27 @@ bool hasPredicateOnPaths(const MatchExpression& expr, return hasPredicateOnPathsHelper(expr, searchType, paths, boost::none /* parentPath */); } +bool containsLargeInList(const MatchExpression& expr, size_t maxInListSize) { + struct Visitor : public SelectiveMatchExpressionVisitorBase { + using SelectiveMatchExpressionVisitorBase::visit; + size_t maxSize; + bool found = false; + + explicit Visitor(size_t maxSize) : maxSize(maxSize) {} + + void visit(const InMatchExpression* expr) final { + if (expr->getEqualities().size() > maxSize) { + found = true; + } + } + }; + + Visitor visitor(maxInListSize); + MatchExpressionWalker walker(&visitor, nullptr, nullptr); + tree_walker::walk(&expr, &walker); + return visitor.found; +} + bool isSubsetOf(const MatchExpression* lhs, const MatchExpression* rhs) { // lhs is the query and rhs is the index. tassert(11052402, "lhs must not be null", lhs); diff --git a/src/mongo/db/matcher/expression_algo.h b/src/mongo/db/matcher/expression_algo.h index 6a675d91cb9..c748e956f07 100644 --- a/src/mongo/db/matcher/expression_algo.h +++ b/src/mongo/db/matcher/expression_algo.h @@ -66,6 +66,12 @@ bool hasPredicateOnPaths(const MatchExpression& expr, mongo::MatchExpression::MatchType searchType, const OrderedPathSet& paths); +/** + * Return true if the expression tree contains an $in-list whose equalities vector exceeds + * 'maxInListSize'. + */ +bool containsLargeInList(const MatchExpression& expr, size_t maxInListSize); + using PathOrExprMatchExpression = std::variant; /** diff --git a/src/mongo/db/query/plan_ranking/cbr_plan_ranking.h b/src/mongo/db/query/plan_ranking/cbr_plan_ranking.h index 5ef13992ee5..219bfa52582 100644 --- a/src/mongo/db/query/plan_ranking/cbr_plan_ranking.h +++ b/src/mongo/db/query/plan_ranking/cbr_plan_ranking.h @@ -40,6 +40,10 @@ namespace mongo { namespace plan_ranking { +// Maximum number of elements in an $in-list that CBR will attempt to estimate. Queries with larger +// $in-lists fall back to multiplanning. +inline constexpr size_t kMaxInListSize = 2048; + // SERVER-118020: Investigate a more distinctive name to contrast with CostBasedPlanRankingStrategy class CBRPlanRankingStrategy : public PlanRankingStrategy { public: diff --git a/src/mongo/db/query/plan_ranking/plan_ranker.cpp b/src/mongo/db/query/plan_ranking/plan_ranker.cpp index 84abad57a4e..10e006f58de 100644 --- a/src/mongo/db/query/plan_ranking/plan_ranker.cpp +++ b/src/mongo/db/query/plan_ranking/plan_ranker.cpp @@ -29,6 +29,7 @@ #include "mongo/db/query/plan_ranking/plan_ranker.h" +#include "mongo/db/matcher/expression_algo.h" #include "mongo/db/query/canonical_query.h" #include "mongo/db/query/multiple_collection_accessor.h" #include "mongo/db/query/plan_ranking/cbr_for_no_mp_results.h" @@ -88,7 +89,9 @@ StatusWith PlanRanker::rankPlans(OperationContext* opCtx, bool isClassic) { auto rankerMode = plannerParams.planRankerMode; - const bool canUseCBR = plannerParams.cbrEnabled && isClassic; + const bool canUseCBR = plannerParams.cbrEnabled && isClassic && + // Disable CBR for queries with large $in lists. + !expression::containsLargeInList(*query.getPrimaryMatchExpression(), kMaxInListSize); std::unique_ptr strategy; if (!canUseCBR) { strategy = std::make_unique();