From 07bfb33e5462fe62b9c19b77af788a4da2f7b413 Mon Sep 17 00:00:00 2001 From: Drew Paroski Date: Thu, 18 Sep 2025 11:40:11 -0400 Subject: [PATCH] SERVER-109309 Fix multi-planning issue when cached plan has decisionReads=0 (#40191) GitOrigin-RevId: d7f2b5c545c81a6d71a9b37a9926dd604fd349b9 --- ...plan_cache_replan_decisionReads_is_zero.js | 96 +++++++++++++++++++ src/mongo/db/exec/plan_cache_util.cpp | 14 ++- src/mongo/db/exec/plan_cache_util.h | 4 +- .../multi_planner.cpp | 2 +- .../exec/sbe/sbe_trial_run_tracker_test.cpp | 54 +++++++++-- src/mongo/db/exec/trial_run_tracker.h | 34 +++---- 6 files changed, 174 insertions(+), 30 deletions(-) create mode 100644 jstests/noPassthrough/query/plan_cache/plan_cache_replan_decisionReads_is_zero.js diff --git a/jstests/noPassthrough/query/plan_cache/plan_cache_replan_decisionReads_is_zero.js b/jstests/noPassthrough/query/plan_cache/plan_cache_replan_decisionReads_is_zero.js new file mode 100644 index 00000000000..75d872ec53c --- /dev/null +++ b/jstests/noPassthrough/query/plan_cache/plan_cache_replan_decisionReads_is_zero.js @@ -0,0 +1,96 @@ +/** + * For the scenario where the multi-planner cached a winning plan that hit EOF immediately (with + * totalKeysExamined=0 and totalDocsExamined=0) during its trial run, this test verifies that + * replanning will be triggered if the cached plans starts performing signficantly worse. + * + * This test was adapted from a repro for a bug fixed by SERVER-109309. + * + * @tags: [ + * requires_profiling, + * ] + */ +import {getLatestProfilerEntry} from "jstests/libs/profiler.js"; + +const conn = MongoRunner.runMongod(); +const db = conn.getDB("test"); +const coll = db[jsTestName()]; +coll.drop(); + +// Define two batches of docs that will be inserted into the collection during this test. +let docs1 = []; +let docs2 = []; +for (let i = 0; i < 200; ++i) { + docs1.push({x: i % 2, y: Math.floor(i / 4)}); +} +for (let i = 0; i < 400; ++i) { + docs2.push({x: (i % 2) + 2, y: Math.floor(i / 8)}); +} + +// Create an index on "x" and an index on "y". +assert.commandWorked(coll.createIndex({x: 1})); +assert.commandWorked(coll.createIndex({y: 1})); + +// Enable profiling. +assert.commandWorked(db.setProfilingLevel(2)); + +// Insert the first batch of documents into the collection. +assert.commandWorked(coll.insert(docs1)); + +// Define the pipeline that will be used for this test. +// +// It's important that the "x" values in the $match's filter are greater than the "x" values of all +// of the documents that are currently in the collection, so that WiredTigerIndexCursorBase::seek() +// will return boost::none when MongoDB attempts to seek x=2 and x=3 in the {x: 1} index. This +// ensures that "totalKeysExamined" will be 0 (which is important to ensure this test covers the +// code paths relevant to SERVER-109309). +const pipeline = + [{$match: {x: {$in: [2, 3]}, y: 47}}, {$group: {_id: null, num: {$sum: NumberInt(1)}}}]; + +// Run the aggregate() command twice so that it's entry in the plan cache gets marked as "active". +for (let i = 0; i < 2; ++i) { + assert.eq(0, coll.aggregate(pipeline).itcount()); +} + +// Run explain and get the winning plan from "allPlansExecution". +let explain = coll.explain("allPlansExecution").aggregate(pipeline); +let executionStats = explain.hasOwnProperty("executionStats") + ? explain.executionStats + : explain.stages[0].$cursor.executionStats; + +let winningPlan = executionStats.allPlansExecution[0]; + +// The plan using index {x: 1} should be the winning plan here. Because there are currently no +// documents in the collection with x >= 2, this plan will hit EOF immediately (and both calls +// to WiredTigerIndexCursorBase::seek() on index {x: 1} will return boost::none). +// +// Verify that "totalKeysExamined" and "totalDocsExamined" for the winning plan are both zero. +// It's important that we use explain("allPlansExecution") here because we specifically want the +// stats from the trial run (because the trial run's stats are used to compute "decisionReads"), +// and the stats from the profiler ("keysExamined" and "docsExamined") could be slightly different +// (due to differences in SBE vs classic). +assert.eq(0, winningPlan.totalKeysExamined, () => tojson(explain)); +assert.eq(0, winningPlan.totalDocsExamined, () => tojson(explain)); + +// Insert the second batch of documents into the collection. +assert.commandWorked(coll.insert(docs2)); + +// Run the aggregate() command one more time. +assert.eq(1, coll.aggregate(pipeline).itcount()); + +// Get the profile entry for the most recent command. +let entry = getLatestProfilerEntry(db, {op: "command", ns: coll.getFullName()}); + +// Re-planning should have occurred (because the old plan should perform much worse now that we've +// inserted hundreds of documents with x=2 and x=3) and the plan using index {y: 1} should be the +// winning plan now. +assert.eq(entry.replanned, true, () => tojson(entry)); +assert.includes( + entry.replanReason, "cached plan was less efficient than expected", () => tojson(entry)); + +// Verify that the query examined fewer than 100 keys and 100 docs. In the event that re-planning +// hasn't happened yet, we don't want to force it to happen by running explain("allPlansExecution"), +// so for these checks we use the stats reported by the profiler instead. +assert.lt(entry.keysExamined, 100, () => tojson(entry)); +assert.lt(entry.docsExamined, 100, () => tojson(entry)); + +MongoRunner.stopMongod(conn); diff --git a/src/mongo/db/exec/plan_cache_util.cpp b/src/mongo/db/exec/plan_cache_util.cpp index 2398d57e630..1f7f96034de 100644 --- a/src/mongo/db/exec/plan_cache_util.cpp +++ b/src/mongo/db/exec/plan_cache_util.cpp @@ -404,7 +404,7 @@ void ClassicPlanCacheWriter::operator()(const CanonicalQuery& cq, if (_executeInSbe) { auto stats = mps.getStats(); - auto nReads = computeNumReadsFromWorks(*stats, *ranking); + auto nReads = computeNumReadsFromStats(*stats, *ranking); updateClassicPlanCacheFromClassicCandidatesForSbeExecution( _opCtx, _collection.getCollectionPtr(), cq, nReads, std::move(ranking), candidates); @@ -476,11 +476,19 @@ bool ConditionalClassicPlanCacheWriter::shouldCacheBasedOnCachingMode( MONGO_UNREACHABLE; } -NumReads computeNumReadsFromWorks(const PlanStageStats& stats, +NumReads computeNumReadsFromStats(const PlanStageStats& stats, const plan_ranker::PlanRankingDecision& ranking) { auto winnerIdx = ranking.candidateOrder[0]; auto summary = collectExecutionStatsSummary(&stats, winnerIdx); - return NumReads{summary.totalKeysExamined + summary.totalDocsExamined}; + + // The original "all classic" multiplanner uses the "works" stat as its unit of measure for + // tracking how much work a plan has done, while this multiplanner (the "CRP SBE" multiplanner) + // uses the "reads" metric (totalKeysExamined + totalDocsExamined) as its unit of measure. + // + // The "works" stat is always greater than zero. To play it safe and make it easier for the "all + // classic" and "CRP SBE" multiplanners to coexist, this function makes sure to always return + // a positive "reads" value. + return NumReads{std::max(summary.totalKeysExamined + summary.totalDocsExamined, 1)}; } } // namespace plan_cache_util diff --git a/src/mongo/db/exec/plan_cache_util.h b/src/mongo/db/exec/plan_cache_util.h index b7683b64d32..00c9e940e63 100644 --- a/src/mongo/db/exec/plan_cache_util.h +++ b/src/mongo/db/exec/plan_cache_util.h @@ -212,6 +212,8 @@ protected: const Mode _planCachingMode; }; -NumReads computeNumReadsFromWorks(const PlanStageStats& stats, +// This function computes the value of the "reads" metric for the winning plan using the specified +// 'stats'. This function will always return a positive value. +NumReads computeNumReadsFromStats(const PlanStageStats& stats, const plan_ranker::PlanRankingDecision& ranking); } // namespace mongo::plan_cache_util diff --git a/src/mongo/db/exec/runtime_planners/classic_runtime_planner_for_sbe/multi_planner.cpp b/src/mongo/db/exec/runtime_planners/classic_runtime_planner_for_sbe/multi_planner.cpp index b54fe4c489c..86ffe6bf4c4 100644 --- a/src/mongo/db/exec/runtime_planners/classic_runtime_planner_for_sbe/multi_planner.cpp +++ b/src/mongo/db/exec/runtime_planners/classic_runtime_planner_for_sbe/multi_planner.cpp @@ -136,7 +136,7 @@ void MultiPlanner::_buildSbePlanAndMaybeCache( boost::optional numReads; if (_shouldWriteToPlanCache) { auto stats = _multiPlanStage->getStats(); - numReads = plan_cache_util::computeNumReadsFromWorks(*stats, *ranking); + numReads = plan_cache_util::computeNumReadsFromStats(*stats, *ranking); } // If classic plan cache is enabled, write to it. We need to do this before we extend the QSN diff --git a/src/mongo/db/exec/sbe/sbe_trial_run_tracker_test.cpp b/src/mongo/db/exec/sbe/sbe_trial_run_tracker_test.cpp index cec146c9e08..ba5b1fef358 100644 --- a/src/mongo/db/exec/sbe/sbe_trial_run_tracker_test.cpp +++ b/src/mongo/db/exec/sbe/sbe_trial_run_tracker_test.cpp @@ -90,7 +90,7 @@ TEST_F(TrialRunTrackerTest, TrackerAttachesToStreamingStage) { kEmptyPlanNodeId /* nodeId */, ScanCallbacks()); - auto tracker = std::make_unique(size_t{0}, size_t{0}); + auto tracker = std::make_unique(boost::none, boost::none); ON_BLOCK_EXIT([&]() { scanStage->detachFromTrialRunTracker(); }); auto attachResult = scanStage->attachToTrialRunTracker(tracker.get()); @@ -112,7 +112,7 @@ TEST_F(TrialRunTrackerTest, TrackerAttachesToBlockingStage) { nullptr /* yieldPolicy */, kEmptyPlanNodeId); - auto tracker = std::make_unique(size_t{0}, size_t{0}); + auto tracker = std::make_unique(boost::none, boost::none); ON_BLOCK_EXIT([&]() { sortStage->detachFromTrialRunTracker(); }); auto attachResult = sortStage->attachToTrialRunTracker(tracker.get()); @@ -150,7 +150,7 @@ TEST_F(TrialRunTrackerTest, TrackerAttachesToBothBlockingAndStreamingStages) { nullptr /* yieldPolicy */, kEmptyPlanNodeId); - auto tracker = std::make_unique(size_t{0}, size_t{0}); + auto tracker = std::make_unique(boost::none, boost::none); ON_BLOCK_EXIT([&]() { rootSortStage->detachFromTrialRunTracker(); }); auto attachResult = rootSortStage->attachToTrialRunTracker(tracker.get()); @@ -181,7 +181,7 @@ TEST_F(TrialRunTrackerTest, TrialRunTrackingCanBeDisabled) { ScanCallbacks()); scanStage->disableTrialRunTracking(); - auto tracker = std::make_unique(size_t{0}, size_t{0}); + auto tracker = std::make_unique(boost::none, boost::none); auto attachResult = scanStage->attachToTrialRunTracker(tracker.get()); ASSERT_EQ(attachResult, PlanStage::TrialRunTrackingType::NoTracking); } @@ -220,8 +220,7 @@ TEST_F(TrialRunTrackerTest, DisablingTrackingForChildDoesNotInhibitTrackingForPa nullptr /* yieldPolicy */, kEmptyPlanNodeId); - - auto tracker = std::make_unique(size_t{0}, size_t{0}); + auto tracker = std::make_unique(boost::none, boost::none); ON_BLOCK_EXIT([&]() { rootSortStage->detachFromTrialRunTracker(); }); auto attachResult = rootSortStage->attachToTrialRunTracker(tracker.get(), PlanNodeId{1}); @@ -284,7 +283,7 @@ TEST_F(TrialRunTrackerTest, DisablingTrackingForAChildStagePreventsEarlyExit) { kEmptyPlanNodeId); // We expect the TrialRunTracker to attach to _only_ the left child. - auto tracker = std::make_unique(size_t{9}, size_t{0}); + auto tracker = std::make_unique(size_t{9}, boost::none); auto attachResult = sortStage->attachToTrialRunTracker(tracker.get()); // Note: A scan is a streaming stage, but the "virtual scan" used here does not attach to the @@ -318,7 +317,7 @@ TEST_F(TrialRunTrackerTest, TrackerAttachesToPlanningRootStageAndTracksTheDocume nullptr /* yieldPolicy */, PlanNodeId{3}); - auto tracker = std::make_unique(size_t{3}, size_t{0}); + auto tracker = std::make_unique(size_t{3}, boost::none); auto attachResult = sortStage->attachToTrialRunTracker(tracker.get()); ASSERT_EQ(attachResult, PlanStage::TrialRunTrackingType::NoTracking); @@ -328,4 +327,43 @@ TEST_F(TrialRunTrackerTest, TrackerAttachesToPlanningRootStageAndTracksTheDocume prepareTree(ctx.get(), sortStage.get(), unwindSlot); ASSERT_EQ(tracker->getMetric(), 0); } + +TEST_F(TrialRunTrackerTest, TrackerCanTrackMetricWithMaxMetricSetToZero) { + auto ctx = makeCompileCtx(); + + auto [inputTag, inputVal] = stage_builder::makeValue( + BSON_ARRAY(BSON("a" << 1) << BSON("a" << 2) << BSON("a" << 3) << BSON("a" << 4))); + auto [scanSlot, scanStage] = generateVirtualScan(inputTag, inputVal, PlanNodeId{1}); + auto sortStage = + makeS(std::move(scanStage), + makeSV(scanSlot), + std::vector{value::SortDirection::Ascending}, + makeSV(), + nullptr /*limit*/, + 1024 * 1024, + false, + nullptr /* yieldPolicy */, + PlanNodeId{3}); + + auto onMetricReached = [&](TrialRunTracker::TrialRunMetric metric) { + // This callback should get called for the kNumResults metric. + ASSERT_EQ(metric, TrialRunTracker::kNumResults); + // Detach the TrialRunTracker and allow the plan to continue running. + sortStage->detachFromTrialRunTracker(); + return false; + }; + + auto tracker = + std::make_unique(std::move(onMetricReached), size_t{0}, boost::none); + auto attachResult = sortStage->attachToTrialRunTracker(tracker.get(), PlanNodeId{1}); + ASSERT_EQ(attachResult, PlanStage::TrialRunTrackingType::TrackResults); + + // The 'prepareTree()' function opens the SortStage, causing it to read documents from its + // child. The scan stage (which is marked as the planning root) should return 4 documents. + // The tracker should only observe the first document, because the callback we supplied should + // detach the tracker after the first document is observed (because that is precisely when + // 'tracker->getMetric()' becomes greater than the maximum of 0.) + prepareTree(ctx.get(), sortStage.get(), scanSlot); + ASSERT_EQ(tracker->getMetric(), 1); +} } // namespace mongo::sbe diff --git a/src/mongo/db/exec/trial_run_tracker.h b/src/mongo/db/exec/trial_run_tracker.h index 8d9bd39ecad..0cc490e4012 100644 --- a/src/mongo/db/exec/trial_run_tracker.h +++ b/src/mongo/db/exec/trial_run_tracker.h @@ -53,20 +53,21 @@ public: // Number of physical reads performed during a trial run. Once a storage cursor advances, // it counts as a single physical read. kNumReads, - // Must always be the last element to hold the number of element in the enum. + // Must always be the last element to hold the number of elements in the enum. kLastElem }; + static constexpr size_t numTrialRunMetrics = static_cast(TrialRunMetric::kLastElem); + /** * Constructs a 'TrialRunTracker' which indicates that the trial period is over when any * 'TrialRunMetric' exceeds the maximum provided at construction. * - * Callers can also pass a value of zero to indicate that the given metric should not be - * tracked. + * Callers can also pass a value of boost::none to indicate that the given metric should not + * be tracked. */ - template = 0> - TrialRunTracker(MaxMetrics... maxMetrics) : _maxMetrics{maxMetrics...} {} + TrialRunTracker(boost::optional maxNumResults, boost::optional maxNumReads) + : _maxMetrics{maxNumResults, maxNumReads} {} /** * Constructs a 'TrialRunTracker' that also has an '_onMetricReached' function, which gets @@ -75,11 +76,10 @@ public: * '_onMetricReached' can prevent tracking from halting plan execution, thereby upgrading a * trial run to a normal run. */ - template - TrialRunTracker(std::function onMetricReached, MaxMetrics... maxMetrics) - : TrialRunTracker{maxMetrics...} { - _onMetricReached = std::move(onMetricReached); - } + TrialRunTracker(std::function onMetricReached, + boost::optional maxNumResults, + boost::optional maxNumReads) + : _maxMetrics{maxNumResults, maxNumReads}, _onMetricReached(std::move(onMetricReached)) {} /** * Increments the trial run metric specified as a template parameter 'metric' by the @@ -97,7 +97,7 @@ public: bool trackProgress(size_t metricIncrement) { static_assert(metric >= 0 && metric < sizeof(_metrics) / sizeof(size_t)); - if (_maxMetrics[metric] == 0) { + if (!_maxMetrics[metric].has_value()) { // This metric is not being tracked. return false; } @@ -126,24 +126,24 @@ public: template bool metricReached() const { static_assert(metric >= 0 && metric < sizeof(_metrics) / sizeof(size_t)); - return metricTracked() && _metrics[metric] > _maxMetrics[metric]; + return _maxMetrics[metric].has_value() && _metrics[metric] > *_maxMetrics[metric]; } template bool metricTracked() const { static_assert(metric >= 0 && metric < sizeof(_metrics) / sizeof(size_t)); - return _maxMetrics[metric] != 0; + return _maxMetrics[metric].has_value(); } template - void updateMaxMetric(size_t newMaxMetric) { + void updateMaxMetric(boost::optional newMaxMetric) { static_assert(metric >= 0 && metric < sizeof(_metrics) / sizeof(size_t)); _maxMetrics[metric] = newMaxMetric; } private: - size_t _maxMetrics[TrialRunMetric::kLastElem]; - size_t _metrics[TrialRunMetric::kLastElem]{0}; + boost::optional _maxMetrics[numTrialRunMetrics]; + size_t _metrics[numTrialRunMetrics]{0}; bool _done{false}; std::function _onMetricReached{}; };