SERVER-122868 Disable CBR for large in-lists (#50739)
GitOrigin-RevId: 4b30228acb3c6273ac272703d365fda66fb2d55f
This commit is contained in:
parent
b66da4ef3d
commit
402fed2bda
@ -1,8 +1,15 @@
|
||||
/**
|
||||
* Verify that collection, index and query types unsupported by CBR fallback to multiplanning.
|
||||
*/
|
||||
import {getAllPlans, getPlanStages, isExpress} from "jstests/libs/query/analyze_plan.js";
|
||||
import {assertPlanNotCosted} from "jstests/libs/query/cbr_utils.js";
|
||||
import {
|
||||
getAllPlans,
|
||||
getPlanStages,
|
||||
getWinningPlanFromExplain,
|
||||
isExpress,
|
||||
isSubplannerCompositePlan,
|
||||
planHasStage,
|
||||
} from "jstests/libs/query/analyze_plan.js";
|
||||
import {assertPlanCosted, assertPlanNotCosted} from "jstests/libs/query/cbr_utils.js";
|
||||
import {checkSbeFullyEnabled} from "jstests/libs/query/sbe_util.js";
|
||||
|
||||
// TODO SERVER-92589: Remove this exemption
|
||||
@ -212,6 +219,89 @@ function testSortKeyGenerator() {
|
||||
assert.commandWorked(coll.dropIndexes());
|
||||
}
|
||||
|
||||
// Maximum $in-list size that CBR will estimate. Must match plan_ranking::kMaxInListSize.
|
||||
const kMaxInListSize = 2048;
|
||||
|
||||
function testLargeInList() {
|
||||
const bulk = coll.initializeUnorderedBulkOp();
|
||||
for (let i = 0; i < kMaxInListSize + 1000; i++) {
|
||||
bulk.insert({a: i, b: i % 100});
|
||||
}
|
||||
assert.commandWorked(bulk.execute());
|
||||
|
||||
// Two indexes on different fields so the planner enumerates at least two plans
|
||||
// (one per index) when both fields appear in the query predicate.
|
||||
assert.commandWorked(coll.createIndexes([{a: 1}, {b: 1}]));
|
||||
|
||||
// Use samplingCE so the test does not depend on histograms.
|
||||
const prevCEMode = assert.commandWorked(db.adminCommand({setParameter: 1, internalQueryCBRCEMode: "samplingCE"}));
|
||||
|
||||
// Two non-overlapping small $in-lists and one large $in-list reused across all sub-tests.
|
||||
const smallIn1 = Array.from({length: 100}, (_, i) => i);
|
||||
const smallIn2 = Array.from({length: 100}, (_, i) => i + 100);
|
||||
const largeIn = Array.from({length: kMaxInListSize + 1}, (_, i) => i);
|
||||
|
||||
// Runs a find query and asserts that all enumerated plans are costed (or not costed).
|
||||
function testQuery(query, cbrExpected) {
|
||||
const explain = coll.find(query).explain();
|
||||
const plans = getAllPlans(explain);
|
||||
assert.gte(plans.length, 2, "Expected at least two plans");
|
||||
plans.forEach(cbrExpected ? assertPlanCosted : assertPlanNotCosted);
|
||||
}
|
||||
|
||||
// Small $in-list: CBR should be able to estimate it.
|
||||
testQuery({a: {$in: smallIn1}, b: {$lt: 50}}, true);
|
||||
|
||||
// Large $in-list (> kMaxInListSize elements): CBR should fall back to multiplanning.
|
||||
testQuery({a: {$in: largeIn}, b: {$lt: 50}}, false);
|
||||
|
||||
// $or with small $in in both branches: CBR should be used.
|
||||
// The $or is combined with a top-level predicate on 'b' so the query goes through the regular
|
||||
// planner (not subplanning). With indexes {a: 1} and {b: 1}, the planner enumerates at
|
||||
// least two candidate plans. containsLargeInList walks the full expression tree, so it detects
|
||||
// $in-lists inside $or branches.
|
||||
testQuery({b: {$lt: 50}, $or: [{a: {$in: smallIn1}}, {a: {$in: smallIn2}}]}, true);
|
||||
|
||||
// $or where the second branch has a large $in-list: should fall back to multiplanning.
|
||||
testQuery({b: {$lt: 50}, $or: [{a: {$in: smallIn1}}, {a: {$in: largeIn}}]}, false);
|
||||
|
||||
// Rooted $or (subplanner path): each branch has predicates on 'a' and 'b' so that with
|
||||
// indexes {a: 1} and {b: 1} each branch independently has at least two candidate plans.
|
||||
// The subplanner combines per-branch winners into a single composite plan. We verify the
|
||||
// subplanner is used and the plan structure is correct. Note: the subplanner composite
|
||||
// explain does not expose costEstimate on per-branch plans, so we can only exercise the code
|
||||
// path, veryfy it via other means, and verify structural plan properties here.
|
||||
function testRootedOrQuery(query) {
|
||||
const explain = coll.find(query).explain();
|
||||
assert(isSubplannerCompositePlan(explain), "Expected subplanner composite plan");
|
||||
const winningPlan = getWinningPlanFromExplain(explain);
|
||||
assert(planHasStage(db, winningPlan, "OR"), "Expected OR stage in subplanner composite plan");
|
||||
const ixscans = getPlanStages(winningPlan, "IXSCAN");
|
||||
assert.eq(ixscans.length, 2, "Expected one IXSCAN per $or branch");
|
||||
}
|
||||
|
||||
// Rooted $or with small $in in both branches: exercises the CBR-per-branch code path.
|
||||
testRootedOrQuery({
|
||||
$or: [
|
||||
{a: {$in: smallIn1}, b: {$lt: 50}},
|
||||
{a: {$in: smallIn2}, b: {$lt: 50}},
|
||||
],
|
||||
});
|
||||
|
||||
// Rooted $or where the second branch has a large $in: exercises the multiplanning-per-branch
|
||||
// fallback code path in the subplanner.
|
||||
testRootedOrQuery({
|
||||
$or: [
|
||||
{a: {$in: smallIn1}, b: {$lt: 50}},
|
||||
{a: {$in: largeIn}, b: {$lt: 50}},
|
||||
],
|
||||
});
|
||||
|
||||
// Restore CE mode for the remaining tests.
|
||||
assert.commandWorked(db.adminCommand({setParameter: 1, internalQueryCBRCEMode: prevCEMode.was}));
|
||||
assert.commandWorked(coll.dropIndexes());
|
||||
}
|
||||
|
||||
function testDistictScan() {
|
||||
assert.commandWorked(coll.createIndex({a: 1, b: 1}));
|
||||
const explain = coll.explain().aggregate([{$sort: {a: 1, b: 1}}, {$group: {_id: "$a", f: {$first: "$b"}}}]);
|
||||
@ -244,6 +334,7 @@ try {
|
||||
testReturnKey();
|
||||
testSortKeyGenerator();
|
||||
testDistictScan();
|
||||
testLargeInList();
|
||||
} finally {
|
||||
// Ensure that query knob doesn't leak into other testcases in the suite.
|
||||
assert.commandWorked(db.adminCommand({setParameter: 1, featureFlagCostBasedRanker: false}));
|
||||
|
||||
@ -34,6 +34,7 @@
|
||||
#include "mongo/bson/bsonobj.h"
|
||||
#include "mongo/db/exec/plan_cache_util.h"
|
||||
#include "mongo/db/matcher/expression.h"
|
||||
#include "mongo/db/matcher/expression_algo.h"
|
||||
#include "mongo/db/query/collection_query_info.h"
|
||||
#include "mongo/db/query/compiler/ce/exact/exact_cardinality_impl.h"
|
||||
#include "mongo/db/query/compiler/ce/sampling/sampling_estimator_impl.h"
|
||||
@ -41,6 +42,7 @@
|
||||
#include "mongo/db/query/plan_cache/classic_plan_cache.h"
|
||||
#include "mongo/db/query/plan_cache/plan_cache.h"
|
||||
#include "mongo/db/query/plan_cache/plan_cache_key_factory.h"
|
||||
#include "mongo/db/query/plan_ranking/cbr_plan_ranking.h"
|
||||
#include "mongo/db/query/query_planner.h"
|
||||
#include "mongo/db/query/stage_builder/stage_builder_util.h"
|
||||
#include "mongo/util/assert_util.h"
|
||||
@ -237,6 +239,8 @@ Status SubplanStage::pickBestPlan(const QueryPlannerParams& plannerParams,
|
||||
}
|
||||
}
|
||||
|
||||
// Run the plan enumerator for each of the $or branches thus enumerating all plans for each
|
||||
// $or branch.
|
||||
auto subplanningStatus = samplingEstimator
|
||||
? QueryPlanner::planSubqueries(expCtx()->getOperationContext(),
|
||||
getSolutionCachedData,
|
||||
@ -258,7 +262,10 @@ Status SubplanStage::pickBestPlan(const QueryPlannerParams& plannerParams,
|
||||
// If the plan ranking is a CBR strategy, plan each branch of the $or using the respective
|
||||
// cost-based ranking. Multiplanning and automaticCE strategy plan each branch
|
||||
// of the $or using multiplanning as defined in the multiplanCallback below.
|
||||
bool useMultiplanner = !cbrEnabled || rankerMode == QueryPlanRankerModeEnum::kAutomaticCE;
|
||||
// Disable CBR for queries with large $in lists.
|
||||
bool useMultiplanner = !cbrEnabled || rankerMode == QueryPlanRankerModeEnum::kAutomaticCE ||
|
||||
expression::containsLargeInList(*_query->getPrimaryMatchExpression(),
|
||||
plan_ranking::kMaxInListSize);
|
||||
if (!useMultiplanner && subplanningStatus.isOK()) {
|
||||
if (rankerMode == QueryPlanRankerModeEnum::kSamplingCE) {
|
||||
// If we do not have any fields that we want to sample then we just include all the
|
||||
|
||||
@ -47,6 +47,8 @@
|
||||
#include "mongo/db/matcher/expression_path.h"
|
||||
#include "mongo/db/matcher/expression_tree.h"
|
||||
#include "mongo/db/matcher/expression_type.h"
|
||||
#include "mongo/db/matcher/expression_visitor.h"
|
||||
#include "mongo/db/matcher/match_expression_walker.h"
|
||||
#include "mongo/db/matcher/matcher_type_set.h"
|
||||
#include "mongo/db/query/collation/collation_index_key.h"
|
||||
#include "mongo/db/query/collation/collator_interface.h"
|
||||
@ -914,6 +916,27 @@ bool hasPredicateOnPaths(const MatchExpression& expr,
|
||||
return hasPredicateOnPathsHelper(expr, searchType, paths, boost::none /* parentPath */);
|
||||
}
|
||||
|
||||
bool containsLargeInList(const MatchExpression& expr, size_t maxInListSize) {
|
||||
struct Visitor : public SelectiveMatchExpressionVisitorBase<true> {
|
||||
using SelectiveMatchExpressionVisitorBase<true>::visit;
|
||||
size_t maxSize;
|
||||
bool found = false;
|
||||
|
||||
explicit Visitor(size_t maxSize) : maxSize(maxSize) {}
|
||||
|
||||
void visit(const InMatchExpression* expr) final {
|
||||
if (expr->getEqualities().size() > maxSize) {
|
||||
found = true;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Visitor visitor(maxInListSize);
|
||||
MatchExpressionWalker walker(&visitor, nullptr, nullptr);
|
||||
tree_walker::walk<true, MatchExpression>(&expr, &walker);
|
||||
return visitor.found;
|
||||
}
|
||||
|
||||
bool isSubsetOf(const MatchExpression* lhs, const MatchExpression* rhs) {
|
||||
// lhs is the query and rhs is the index.
|
||||
tassert(11052402, "lhs must not be null", lhs);
|
||||
|
||||
@ -66,6 +66,12 @@ bool hasPredicateOnPaths(const MatchExpression& expr,
|
||||
mongo::MatchExpression::MatchType searchType,
|
||||
const OrderedPathSet& paths);
|
||||
|
||||
/**
|
||||
* Return true if the expression tree contains an $in-list whose equalities vector exceeds
|
||||
* 'maxInListSize'.
|
||||
*/
|
||||
bool containsLargeInList(const MatchExpression& expr, size_t maxInListSize);
|
||||
|
||||
using PathOrExprMatchExpression = std::variant<PathMatchExpression*, ExprMatchExpression*>;
|
||||
|
||||
/**
|
||||
|
||||
@ -40,6 +40,10 @@
|
||||
namespace mongo {
|
||||
namespace plan_ranking {
|
||||
|
||||
// Maximum number of elements in an $in-list that CBR will attempt to estimate. Queries with larger
|
||||
// $in-lists fall back to multiplanning.
|
||||
inline constexpr size_t kMaxInListSize = 2048;
|
||||
|
||||
// SERVER-118020: Investigate a more distinctive name to contrast with CostBasedPlanRankingStrategy
|
||||
class CBRPlanRankingStrategy : public PlanRankingStrategy {
|
||||
public:
|
||||
|
||||
@ -29,6 +29,7 @@
|
||||
|
||||
#include "mongo/db/query/plan_ranking/plan_ranker.h"
|
||||
|
||||
#include "mongo/db/matcher/expression_algo.h"
|
||||
#include "mongo/db/query/canonical_query.h"
|
||||
#include "mongo/db/query/multiple_collection_accessor.h"
|
||||
#include "mongo/db/query/plan_ranking/cbr_for_no_mp_results.h"
|
||||
@ -88,7 +89,9 @@ StatusWith<PlanRankingResult> PlanRanker::rankPlans(OperationContext* opCtx,
|
||||
bool isClassic) {
|
||||
auto rankerMode = plannerParams.planRankerMode;
|
||||
|
||||
const bool canUseCBR = plannerParams.cbrEnabled && isClassic;
|
||||
const bool canUseCBR = plannerParams.cbrEnabled && isClassic &&
|
||||
// Disable CBR for queries with large $in lists.
|
||||
!expression::containsLargeInList(*query.getPrimaryMatchExpression(), kMaxInListSize);
|
||||
std::unique_ptr<PlanRankingStrategy> strategy;
|
||||
if (!canUseCBR) {
|
||||
strategy = std::make_unique<MPPlanRankingStrategy>();
|
||||
|
||||
Loading…
Reference in New Issue
Block a user