SERVER-122868 Disable CBR for large in-lists (#50739)

GitOrigin-RevId: 4b30228acb3c6273ac272703d365fda66fb2d55f
This commit is contained in:
Timour Katchaounov 2026-03-31 14:01:06 +03:00 committed by MongoDB Bot
parent b66da4ef3d
commit 402fed2bda
6 changed files with 138 additions and 4 deletions

View File

@ -1,8 +1,15 @@
/**
* Verify that collection, index and query types unsupported by CBR fallback to multiplanning.
*/
import {getAllPlans, getPlanStages, isExpress} from "jstests/libs/query/analyze_plan.js";
import {assertPlanNotCosted} from "jstests/libs/query/cbr_utils.js";
import {
getAllPlans,
getPlanStages,
getWinningPlanFromExplain,
isExpress,
isSubplannerCompositePlan,
planHasStage,
} from "jstests/libs/query/analyze_plan.js";
import {assertPlanCosted, assertPlanNotCosted} from "jstests/libs/query/cbr_utils.js";
import {checkSbeFullyEnabled} from "jstests/libs/query/sbe_util.js";
// TODO SERVER-92589: Remove this exemption
@ -212,6 +219,89 @@ function testSortKeyGenerator() {
assert.commandWorked(coll.dropIndexes());
}
// Maximum $in-list size that CBR will estimate. Must match plan_ranking::kMaxInListSize.
const kMaxInListSize = 2048;
function testLargeInList() {
const bulk = coll.initializeUnorderedBulkOp();
for (let i = 0; i < kMaxInListSize + 1000; i++) {
bulk.insert({a: i, b: i % 100});
}
assert.commandWorked(bulk.execute());
// Two indexes on different fields so the planner enumerates at least two plans
// (one per index) when both fields appear in the query predicate.
assert.commandWorked(coll.createIndexes([{a: 1}, {b: 1}]));
// Use samplingCE so the test does not depend on histograms.
const prevCEMode = assert.commandWorked(db.adminCommand({setParameter: 1, internalQueryCBRCEMode: "samplingCE"}));
// Two non-overlapping small $in-lists and one large $in-list reused across all sub-tests.
const smallIn1 = Array.from({length: 100}, (_, i) => i);
const smallIn2 = Array.from({length: 100}, (_, i) => i + 100);
const largeIn = Array.from({length: kMaxInListSize + 1}, (_, i) => i);
// Runs a find query and asserts that all enumerated plans are costed (or not costed).
function testQuery(query, cbrExpected) {
const explain = coll.find(query).explain();
const plans = getAllPlans(explain);
assert.gte(plans.length, 2, "Expected at least two plans");
plans.forEach(cbrExpected ? assertPlanCosted : assertPlanNotCosted);
}
// Small $in-list: CBR should be able to estimate it.
testQuery({a: {$in: smallIn1}, b: {$lt: 50}}, true);
// Large $in-list (> kMaxInListSize elements): CBR should fall back to multiplanning.
testQuery({a: {$in: largeIn}, b: {$lt: 50}}, false);
// $or with small $in in both branches: CBR should be used.
// The $or is combined with a top-level predicate on 'b' so the query goes through the regular
// planner (not subplanning). With indexes {a: 1} and {b: 1}, the planner enumerates at
// least two candidate plans. containsLargeInList walks the full expression tree, so it detects
// $in-lists inside $or branches.
testQuery({b: {$lt: 50}, $or: [{a: {$in: smallIn1}}, {a: {$in: smallIn2}}]}, true);
// $or where the second branch has a large $in-list: should fall back to multiplanning.
testQuery({b: {$lt: 50}, $or: [{a: {$in: smallIn1}}, {a: {$in: largeIn}}]}, false);
// Rooted $or (subplanner path): each branch has predicates on 'a' and 'b' so that with
// indexes {a: 1} and {b: 1} each branch independently has at least two candidate plans.
// The subplanner combines per-branch winners into a single composite plan. We verify the
// subplanner is used and the plan structure is correct. Note: the subplanner composite
// explain does not expose costEstimate on per-branch plans, so we can only exercise the code
// path, veryfy it via other means, and verify structural plan properties here.
function testRootedOrQuery(query) {
const explain = coll.find(query).explain();
assert(isSubplannerCompositePlan(explain), "Expected subplanner composite plan");
const winningPlan = getWinningPlanFromExplain(explain);
assert(planHasStage(db, winningPlan, "OR"), "Expected OR stage in subplanner composite plan");
const ixscans = getPlanStages(winningPlan, "IXSCAN");
assert.eq(ixscans.length, 2, "Expected one IXSCAN per $or branch");
}
// Rooted $or with small $in in both branches: exercises the CBR-per-branch code path.
testRootedOrQuery({
$or: [
{a: {$in: smallIn1}, b: {$lt: 50}},
{a: {$in: smallIn2}, b: {$lt: 50}},
],
});
// Rooted $or where the second branch has a large $in: exercises the multiplanning-per-branch
// fallback code path in the subplanner.
testRootedOrQuery({
$or: [
{a: {$in: smallIn1}, b: {$lt: 50}},
{a: {$in: largeIn}, b: {$lt: 50}},
],
});
// Restore CE mode for the remaining tests.
assert.commandWorked(db.adminCommand({setParameter: 1, internalQueryCBRCEMode: prevCEMode.was}));
assert.commandWorked(coll.dropIndexes());
}
function testDistictScan() {
assert.commandWorked(coll.createIndex({a: 1, b: 1}));
const explain = coll.explain().aggregate([{$sort: {a: 1, b: 1}}, {$group: {_id: "$a", f: {$first: "$b"}}}]);
@ -244,6 +334,7 @@ try {
testReturnKey();
testSortKeyGenerator();
testDistictScan();
testLargeInList();
} finally {
// Ensure that query knob doesn't leak into other testcases in the suite.
assert.commandWorked(db.adminCommand({setParameter: 1, featureFlagCostBasedRanker: false}));

View File

@ -34,6 +34,7 @@
#include "mongo/bson/bsonobj.h"
#include "mongo/db/exec/plan_cache_util.h"
#include "mongo/db/matcher/expression.h"
#include "mongo/db/matcher/expression_algo.h"
#include "mongo/db/query/collection_query_info.h"
#include "mongo/db/query/compiler/ce/exact/exact_cardinality_impl.h"
#include "mongo/db/query/compiler/ce/sampling/sampling_estimator_impl.h"
@ -41,6 +42,7 @@
#include "mongo/db/query/plan_cache/classic_plan_cache.h"
#include "mongo/db/query/plan_cache/plan_cache.h"
#include "mongo/db/query/plan_cache/plan_cache_key_factory.h"
#include "mongo/db/query/plan_ranking/cbr_plan_ranking.h"
#include "mongo/db/query/query_planner.h"
#include "mongo/db/query/stage_builder/stage_builder_util.h"
#include "mongo/util/assert_util.h"
@ -237,6 +239,8 @@ Status SubplanStage::pickBestPlan(const QueryPlannerParams& plannerParams,
}
}
// Run the plan enumerator for each of the $or branches thus enumerating all plans for each
// $or branch.
auto subplanningStatus = samplingEstimator
? QueryPlanner::planSubqueries(expCtx()->getOperationContext(),
getSolutionCachedData,
@ -258,7 +262,10 @@ Status SubplanStage::pickBestPlan(const QueryPlannerParams& plannerParams,
// If the plan ranking is a CBR strategy, plan each branch of the $or using the respective
// cost-based ranking. Multiplanning and automaticCE strategy plan each branch
// of the $or using multiplanning as defined in the multiplanCallback below.
bool useMultiplanner = !cbrEnabled || rankerMode == QueryPlanRankerModeEnum::kAutomaticCE;
// Disable CBR for queries with large $in lists.
bool useMultiplanner = !cbrEnabled || rankerMode == QueryPlanRankerModeEnum::kAutomaticCE ||
expression::containsLargeInList(*_query->getPrimaryMatchExpression(),
plan_ranking::kMaxInListSize);
if (!useMultiplanner && subplanningStatus.isOK()) {
if (rankerMode == QueryPlanRankerModeEnum::kSamplingCE) {
// If we do not have any fields that we want to sample then we just include all the

View File

@ -47,6 +47,8 @@
#include "mongo/db/matcher/expression_path.h"
#include "mongo/db/matcher/expression_tree.h"
#include "mongo/db/matcher/expression_type.h"
#include "mongo/db/matcher/expression_visitor.h"
#include "mongo/db/matcher/match_expression_walker.h"
#include "mongo/db/matcher/matcher_type_set.h"
#include "mongo/db/query/collation/collation_index_key.h"
#include "mongo/db/query/collation/collator_interface.h"
@ -914,6 +916,27 @@ bool hasPredicateOnPaths(const MatchExpression& expr,
return hasPredicateOnPathsHelper(expr, searchType, paths, boost::none /* parentPath */);
}
bool containsLargeInList(const MatchExpression& expr, size_t maxInListSize) {
struct Visitor : public SelectiveMatchExpressionVisitorBase<true> {
using SelectiveMatchExpressionVisitorBase<true>::visit;
size_t maxSize;
bool found = false;
explicit Visitor(size_t maxSize) : maxSize(maxSize) {}
void visit(const InMatchExpression* expr) final {
if (expr->getEqualities().size() > maxSize) {
found = true;
}
}
};
Visitor visitor(maxInListSize);
MatchExpressionWalker walker(&visitor, nullptr, nullptr);
tree_walker::walk<true, MatchExpression>(&expr, &walker);
return visitor.found;
}
bool isSubsetOf(const MatchExpression* lhs, const MatchExpression* rhs) {
// lhs is the query and rhs is the index.
tassert(11052402, "lhs must not be null", lhs);

View File

@ -66,6 +66,12 @@ bool hasPredicateOnPaths(const MatchExpression& expr,
mongo::MatchExpression::MatchType searchType,
const OrderedPathSet& paths);
/**
* Return true if the expression tree contains an $in-list whose equalities vector exceeds
* 'maxInListSize'.
*/
bool containsLargeInList(const MatchExpression& expr, size_t maxInListSize);
using PathOrExprMatchExpression = std::variant<PathMatchExpression*, ExprMatchExpression*>;
/**

View File

@ -40,6 +40,10 @@
namespace mongo {
namespace plan_ranking {
// Maximum number of elements in an $in-list that CBR will attempt to estimate. Queries with larger
// $in-lists fall back to multiplanning.
inline constexpr size_t kMaxInListSize = 2048;
// SERVER-118020: Investigate a more distinctive name to contrast with CostBasedPlanRankingStrategy
class CBRPlanRankingStrategy : public PlanRankingStrategy {
public:

View File

@ -29,6 +29,7 @@
#include "mongo/db/query/plan_ranking/plan_ranker.h"
#include "mongo/db/matcher/expression_algo.h"
#include "mongo/db/query/canonical_query.h"
#include "mongo/db/query/multiple_collection_accessor.h"
#include "mongo/db/query/plan_ranking/cbr_for_no_mp_results.h"
@ -88,7 +89,9 @@ StatusWith<PlanRankingResult> PlanRanker::rankPlans(OperationContext* opCtx,
bool isClassic) {
auto rankerMode = plannerParams.planRankerMode;
const bool canUseCBR = plannerParams.cbrEnabled && isClassic;
const bool canUseCBR = plannerParams.cbrEnabled && isClassic &&
// Disable CBR for queries with large $in lists.
!expression::containsLargeInList(*query.getPrimaryMatchExpression(), kMaxInListSize);
std::unique_ptr<PlanRankingStrategy> strategy;
if (!canUseCBR) {
strategy = std::make_unique<MPPlanRankingStrategy>();