SERVER-109309 Fix multi-planning issue when cached plan has decisionReads=0 (#40191)

GitOrigin-RevId: d7f2b5c545c81a6d71a9b37a9926dd604fd349b9
This commit is contained in:
Drew Paroski 2025-09-18 11:40:11 -04:00 committed by MongoDB Bot
parent d3f99007aa
commit 07bfb33e54
6 changed files with 174 additions and 30 deletions

View File

@ -0,0 +1,96 @@
/**
* For the scenario where the multi-planner cached a winning plan that hit EOF immediately (with
* totalKeysExamined=0 and totalDocsExamined=0) during its trial run, this test verifies that
* replanning will be triggered if the cached plans starts performing signficantly worse.
*
* This test was adapted from a repro for a bug fixed by SERVER-109309.
*
* @tags: [
* requires_profiling,
* ]
*/
import {getLatestProfilerEntry} from "jstests/libs/profiler.js";
const conn = MongoRunner.runMongod();
const db = conn.getDB("test");
const coll = db[jsTestName()];
coll.drop();
// Define two batches of docs that will be inserted into the collection during this test.
let docs1 = [];
let docs2 = [];
for (let i = 0; i < 200; ++i) {
docs1.push({x: i % 2, y: Math.floor(i / 4)});
}
for (let i = 0; i < 400; ++i) {
docs2.push({x: (i % 2) + 2, y: Math.floor(i / 8)});
}
// Create an index on "x" and an index on "y".
assert.commandWorked(coll.createIndex({x: 1}));
assert.commandWorked(coll.createIndex({y: 1}));
// Enable profiling.
assert.commandWorked(db.setProfilingLevel(2));
// Insert the first batch of documents into the collection.
assert.commandWorked(coll.insert(docs1));
// Define the pipeline that will be used for this test.
//
// It's important that the "x" values in the $match's filter are greater than the "x" values of all
// of the documents that are currently in the collection, so that WiredTigerIndexCursorBase::seek()
// will return boost::none when MongoDB attempts to seek x=2 and x=3 in the {x: 1} index. This
// ensures that "totalKeysExamined" will be 0 (which is important to ensure this test covers the
// code paths relevant to SERVER-109309).
const pipeline =
[{$match: {x: {$in: [2, 3]}, y: 47}}, {$group: {_id: null, num: {$sum: NumberInt(1)}}}];
// Run the aggregate() command twice so that it's entry in the plan cache gets marked as "active".
for (let i = 0; i < 2; ++i) {
assert.eq(0, coll.aggregate(pipeline).itcount());
}
// Run explain and get the winning plan from "allPlansExecution".
let explain = coll.explain("allPlansExecution").aggregate(pipeline);
let executionStats = explain.hasOwnProperty("executionStats")
? explain.executionStats
: explain.stages[0].$cursor.executionStats;
let winningPlan = executionStats.allPlansExecution[0];
// The plan using index {x: 1} should be the winning plan here. Because there are currently no
// documents in the collection with x >= 2, this plan will hit EOF immediately (and both calls
// to WiredTigerIndexCursorBase::seek() on index {x: 1} will return boost::none).
//
// Verify that "totalKeysExamined" and "totalDocsExamined" for the winning plan are both zero.
// It's important that we use explain("allPlansExecution") here because we specifically want the
// stats from the trial run (because the trial run's stats are used to compute "decisionReads"),
// and the stats from the profiler ("keysExamined" and "docsExamined") could be slightly different
// (due to differences in SBE vs classic).
assert.eq(0, winningPlan.totalKeysExamined, () => tojson(explain));
assert.eq(0, winningPlan.totalDocsExamined, () => tojson(explain));
// Insert the second batch of documents into the collection.
assert.commandWorked(coll.insert(docs2));
// Run the aggregate() command one more time.
assert.eq(1, coll.aggregate(pipeline).itcount());
// Get the profile entry for the most recent command.
let entry = getLatestProfilerEntry(db, {op: "command", ns: coll.getFullName()});
// Re-planning should have occurred (because the old plan should perform much worse now that we've
// inserted hundreds of documents with x=2 and x=3) and the plan using index {y: 1} should be the
// winning plan now.
assert.eq(entry.replanned, true, () => tojson(entry));
assert.includes(
entry.replanReason, "cached plan was less efficient than expected", () => tojson(entry));
// Verify that the query examined fewer than 100 keys and 100 docs. In the event that re-planning
// hasn't happened yet, we don't want to force it to happen by running explain("allPlansExecution"),
// so for these checks we use the stats reported by the profiler instead.
assert.lt(entry.keysExamined, 100, () => tojson(entry));
assert.lt(entry.docsExamined, 100, () => tojson(entry));
MongoRunner.stopMongod(conn);

View File

@ -404,7 +404,7 @@ void ClassicPlanCacheWriter::operator()(const CanonicalQuery& cq,
if (_executeInSbe) {
auto stats = mps.getStats();
auto nReads = computeNumReadsFromWorks(*stats, *ranking);
auto nReads = computeNumReadsFromStats(*stats, *ranking);
updateClassicPlanCacheFromClassicCandidatesForSbeExecution(
_opCtx, _collection.getCollectionPtr(), cq, nReads, std::move(ranking), candidates);
@ -476,11 +476,19 @@ bool ConditionalClassicPlanCacheWriter::shouldCacheBasedOnCachingMode(
MONGO_UNREACHABLE;
}
NumReads computeNumReadsFromWorks(const PlanStageStats& stats,
NumReads computeNumReadsFromStats(const PlanStageStats& stats,
const plan_ranker::PlanRankingDecision& ranking) {
auto winnerIdx = ranking.candidateOrder[0];
auto summary = collectExecutionStatsSummary(&stats, winnerIdx);
return NumReads{summary.totalKeysExamined + summary.totalDocsExamined};
// The original "all classic" multiplanner uses the "works" stat as its unit of measure for
// tracking how much work a plan has done, while this multiplanner (the "CRP SBE" multiplanner)
// uses the "reads" metric (totalKeysExamined + totalDocsExamined) as its unit of measure.
//
// The "works" stat is always greater than zero. To play it safe and make it easier for the "all
// classic" and "CRP SBE" multiplanners to coexist, this function makes sure to always return
// a positive "reads" value.
return NumReads{std::max<size_t>(summary.totalKeysExamined + summary.totalDocsExamined, 1)};
}
} // namespace plan_cache_util

View File

@ -212,6 +212,8 @@ protected:
const Mode _planCachingMode;
};
NumReads computeNumReadsFromWorks(const PlanStageStats& stats,
// This function computes the value of the "reads" metric for the winning plan using the specified
// 'stats'. This function will always return a positive value.
NumReads computeNumReadsFromStats(const PlanStageStats& stats,
const plan_ranker::PlanRankingDecision& ranking);
} // namespace mongo::plan_cache_util

View File

@ -136,7 +136,7 @@ void MultiPlanner::_buildSbePlanAndMaybeCache(
boost::optional<NumReads> numReads;
if (_shouldWriteToPlanCache) {
auto stats = _multiPlanStage->getStats();
numReads = plan_cache_util::computeNumReadsFromWorks(*stats, *ranking);
numReads = plan_cache_util::computeNumReadsFromStats(*stats, *ranking);
}
// If classic plan cache is enabled, write to it. We need to do this before we extend the QSN

View File

@ -90,7 +90,7 @@ TEST_F(TrialRunTrackerTest, TrackerAttachesToStreamingStage) {
kEmptyPlanNodeId /* nodeId */,
ScanCallbacks());
auto tracker = std::make_unique<TrialRunTracker>(size_t{0}, size_t{0});
auto tracker = std::make_unique<TrialRunTracker>(boost::none, boost::none);
ON_BLOCK_EXIT([&]() { scanStage->detachFromTrialRunTracker(); });
auto attachResult = scanStage->attachToTrialRunTracker(tracker.get());
@ -112,7 +112,7 @@ TEST_F(TrialRunTrackerTest, TrackerAttachesToBlockingStage) {
nullptr /* yieldPolicy */,
kEmptyPlanNodeId);
auto tracker = std::make_unique<TrialRunTracker>(size_t{0}, size_t{0});
auto tracker = std::make_unique<TrialRunTracker>(boost::none, boost::none);
ON_BLOCK_EXIT([&]() { sortStage->detachFromTrialRunTracker(); });
auto attachResult = sortStage->attachToTrialRunTracker(tracker.get());
@ -150,7 +150,7 @@ TEST_F(TrialRunTrackerTest, TrackerAttachesToBothBlockingAndStreamingStages) {
nullptr /* yieldPolicy */,
kEmptyPlanNodeId);
auto tracker = std::make_unique<TrialRunTracker>(size_t{0}, size_t{0});
auto tracker = std::make_unique<TrialRunTracker>(boost::none, boost::none);
ON_BLOCK_EXIT([&]() { rootSortStage->detachFromTrialRunTracker(); });
auto attachResult = rootSortStage->attachToTrialRunTracker(tracker.get());
@ -181,7 +181,7 @@ TEST_F(TrialRunTrackerTest, TrialRunTrackingCanBeDisabled) {
ScanCallbacks());
scanStage->disableTrialRunTracking();
auto tracker = std::make_unique<TrialRunTracker>(size_t{0}, size_t{0});
auto tracker = std::make_unique<TrialRunTracker>(boost::none, boost::none);
auto attachResult = scanStage->attachToTrialRunTracker(tracker.get());
ASSERT_EQ(attachResult, PlanStage::TrialRunTrackingType::NoTracking);
}
@ -220,8 +220,7 @@ TEST_F(TrialRunTrackerTest, DisablingTrackingForChildDoesNotInhibitTrackingForPa
nullptr /* yieldPolicy */,
kEmptyPlanNodeId);
auto tracker = std::make_unique<TrialRunTracker>(size_t{0}, size_t{0});
auto tracker = std::make_unique<TrialRunTracker>(boost::none, boost::none);
ON_BLOCK_EXIT([&]() { rootSortStage->detachFromTrialRunTracker(); });
auto attachResult = rootSortStage->attachToTrialRunTracker(tracker.get(), PlanNodeId{1});
@ -284,7 +283,7 @@ TEST_F(TrialRunTrackerTest, DisablingTrackingForAChildStagePreventsEarlyExit) {
kEmptyPlanNodeId);
// We expect the TrialRunTracker to attach to _only_ the left child.
auto tracker = std::make_unique<TrialRunTracker>(size_t{9}, size_t{0});
auto tracker = std::make_unique<TrialRunTracker>(size_t{9}, boost::none);
auto attachResult = sortStage->attachToTrialRunTracker(tracker.get());
// Note: A scan is a streaming stage, but the "virtual scan" used here does not attach to the
@ -318,7 +317,7 @@ TEST_F(TrialRunTrackerTest, TrackerAttachesToPlanningRootStageAndTracksTheDocume
nullptr /* yieldPolicy */,
PlanNodeId{3});
auto tracker = std::make_unique<TrialRunTracker>(size_t{3}, size_t{0});
auto tracker = std::make_unique<TrialRunTracker>(size_t{3}, boost::none);
auto attachResult = sortStage->attachToTrialRunTracker(tracker.get());
ASSERT_EQ(attachResult, PlanStage::TrialRunTrackingType::NoTracking);
@ -328,4 +327,43 @@ TEST_F(TrialRunTrackerTest, TrackerAttachesToPlanningRootStageAndTracksTheDocume
prepareTree(ctx.get(), sortStage.get(), unwindSlot);
ASSERT_EQ(tracker->getMetric<TrialRunTracker::kNumResults>(), 0);
}
TEST_F(TrialRunTrackerTest, TrackerCanTrackMetricWithMaxMetricSetToZero) {
auto ctx = makeCompileCtx();
auto [inputTag, inputVal] = stage_builder::makeValue(
BSON_ARRAY(BSON("a" << 1) << BSON("a" << 2) << BSON("a" << 3) << BSON("a" << 4)));
auto [scanSlot, scanStage] = generateVirtualScan(inputTag, inputVal, PlanNodeId{1});
auto sortStage =
makeS<SortStage>(std::move(scanStage),
makeSV(scanSlot),
std::vector<value::SortDirection>{value::SortDirection::Ascending},
makeSV(),
nullptr /*limit*/,
1024 * 1024,
false,
nullptr /* yieldPolicy */,
PlanNodeId{3});
auto onMetricReached = [&](TrialRunTracker::TrialRunMetric metric) {
// This callback should get called for the kNumResults metric.
ASSERT_EQ(metric, TrialRunTracker::kNumResults);
// Detach the TrialRunTracker and allow the plan to continue running.
sortStage->detachFromTrialRunTracker();
return false;
};
auto tracker =
std::make_unique<TrialRunTracker>(std::move(onMetricReached), size_t{0}, boost::none);
auto attachResult = sortStage->attachToTrialRunTracker(tracker.get(), PlanNodeId{1});
ASSERT_EQ(attachResult, PlanStage::TrialRunTrackingType::TrackResults);
// The 'prepareTree()' function opens the SortStage, causing it to read documents from its
// child. The scan stage (which is marked as the planning root) should return 4 documents.
// The tracker should only observe the first document, because the callback we supplied should
// detach the tracker after the first document is observed (because that is precisely when
// 'tracker->getMetric<TrialRunTracker::kNumResults>()' becomes greater than the maximum of 0.)
prepareTree(ctx.get(), sortStage.get(), scanSlot);
ASSERT_EQ(tracker->getMetric<TrialRunTracker::kNumResults>(), 1);
}
} // namespace mongo::sbe

View File

@ -53,20 +53,21 @@ public:
// Number of physical reads performed during a trial run. Once a storage cursor advances,
// it counts as a single physical read.
kNumReads,
// Must always be the last element to hold the number of element in the enum.
// Must always be the last element to hold the number of elements in the enum.
kLastElem
};
static constexpr size_t numTrialRunMetrics = static_cast<size_t>(TrialRunMetric::kLastElem);
/**
* Constructs a 'TrialRunTracker' which indicates that the trial period is over when any
* 'TrialRunMetric' exceeds the maximum provided at construction.
*
* Callers can also pass a value of zero to indicate that the given metric should not be
* tracked.
* Callers can also pass a value of boost::none to indicate that the given metric should not
* be tracked.
*/
template <typename... MaxMetrics,
std::enable_if_t<sizeof...(MaxMetrics) == TrialRunMetric::kLastElem, int> = 0>
TrialRunTracker(MaxMetrics... maxMetrics) : _maxMetrics{maxMetrics...} {}
TrialRunTracker(boost::optional<size_t> maxNumResults, boost::optional<size_t> maxNumReads)
: _maxMetrics{maxNumResults, maxNumReads} {}
/**
* Constructs a 'TrialRunTracker' that also has an '_onMetricReached' function, which gets
@ -75,11 +76,10 @@ public:
* '_onMetricReached' can prevent tracking from halting plan execution, thereby upgrading a
* trial run to a normal run.
*/
template <typename... MaxMetrics>
TrialRunTracker(std::function<bool(TrialRunMetric)> onMetricReached, MaxMetrics... maxMetrics)
: TrialRunTracker{maxMetrics...} {
_onMetricReached = std::move(onMetricReached);
}
TrialRunTracker(std::function<bool(TrialRunMetric)> onMetricReached,
boost::optional<size_t> maxNumResults,
boost::optional<size_t> maxNumReads)
: _maxMetrics{maxNumResults, maxNumReads}, _onMetricReached(std::move(onMetricReached)) {}
/**
* Increments the trial run metric specified as a template parameter 'metric' by the
@ -97,7 +97,7 @@ public:
bool trackProgress(size_t metricIncrement) {
static_assert(metric >= 0 && metric < sizeof(_metrics) / sizeof(size_t));
if (_maxMetrics[metric] == 0) {
if (!_maxMetrics[metric].has_value()) {
// This metric is not being tracked.
return false;
}
@ -126,24 +126,24 @@ public:
template <TrialRunMetric metric>
bool metricReached() const {
static_assert(metric >= 0 && metric < sizeof(_metrics) / sizeof(size_t));
return metricTracked<metric>() && _metrics[metric] > _maxMetrics[metric];
return _maxMetrics[metric].has_value() && _metrics[metric] > *_maxMetrics[metric];
}
template <TrialRunMetric metric>
bool metricTracked() const {
static_assert(metric >= 0 && metric < sizeof(_metrics) / sizeof(size_t));
return _maxMetrics[metric] != 0;
return _maxMetrics[metric].has_value();
}
template <TrialRunMetric metric>
void updateMaxMetric(size_t newMaxMetric) {
void updateMaxMetric(boost::optional<size_t> newMaxMetric) {
static_assert(metric >= 0 && metric < sizeof(_metrics) / sizeof(size_t));
_maxMetrics[metric] = newMaxMetric;
}
private:
size_t _maxMetrics[TrialRunMetric::kLastElem];
size_t _metrics[TrialRunMetric::kLastElem]{0};
boost::optional<size_t> _maxMetrics[numTrialRunMetrics];
size_t _metrics[numTrialRunMetrics]{0};
bool _done{false};
std::function<bool(TrialRunMetric)> _onMetricReached{};
};