SERVER-109309 Fix multi-planning issue when cached plan has decisionReads=0 (#40191)

GitOrigin-RevId: d7f2b5c545c81a6d71a9b37a9926dd604fd349b9
2025-09-18 11:40:11 -04:00 · 2025-09-18 11:40:11 -04:00 · 07bfb33e54
commit 07bfb33e54
parent d3f99007aa
6 changed files with 174 additions and 30 deletions
--- a/jstests/noPassthrough/query/plan_cache/plan_cache_replan_decisionReads_is_zero.js
+++ b/jstests/noPassthrough/query/plan_cache/plan_cache_replan_decisionReads_is_zero.js
@ -0,0 +1,96 @@
+/**
+ * For the scenario where the multi-planner cached a winning plan that hit EOF immediately (with
+ * totalKeysExamined=0 and totalDocsExamined=0) during its trial run, this test verifies that
+ * replanning will be triggered if the cached plans starts performing signficantly worse.
+ *
+ * This test was adapted from a repro for a bug fixed by SERVER-109309.
+ *
+ * @tags: [
+ *   requires_profiling,
+ * ]
+ */
+import {getLatestProfilerEntry} from "jstests/libs/profiler.js";
+
+const conn = MongoRunner.runMongod();
+const db = conn.getDB("test");
+const coll = db[jsTestName()];
+coll.drop();
+
+// Define two batches of docs that will be inserted into the collection during this test.
+let docs1 = [];
+let docs2 = [];
+for (let i = 0; i < 200; ++i) {
+    docs1.push({x: i % 2, y: Math.floor(i / 4)});
+}
+for (let i = 0; i < 400; ++i) {
+    docs2.push({x: (i % 2) + 2, y: Math.floor(i / 8)});
+}
+
+// Create an index on "x" and an index on "y".
+assert.commandWorked(coll.createIndex({x: 1}));
+assert.commandWorked(coll.createIndex({y: 1}));
+
+// Enable profiling.
+assert.commandWorked(db.setProfilingLevel(2));
+
+// Insert the first batch of documents into the collection.
+assert.commandWorked(coll.insert(docs1));
+
+// Define the pipeline that will be used for this test.
+//
+// It's important that the "x" values in the $match's filter are greater than the "x" values of all
+// of the documents that are currently in the collection, so that WiredTigerIndexCursorBase::seek()
+// will return boost::none when MongoDB attempts to seek x=2 and x=3 in the {x: 1} index. This
+// ensures that "totalKeysExamined" will be 0 (which is important to ensure this test covers the
+// code paths relevant to SERVER-109309).
+const pipeline =
+    [{$match: {x: {$in: [2, 3]}, y: 47}}, {$group: {_id: null, num: {$sum: NumberInt(1)}}}];
+
+// Run the aggregate() command twice so that it's entry in the plan cache gets marked as "active".
+for (let i = 0; i < 2; ++i) {
+    assert.eq(0, coll.aggregate(pipeline).itcount());
+}
+
+// Run explain and get the winning plan from "allPlansExecution".
+let explain = coll.explain("allPlansExecution").aggregate(pipeline);
+let executionStats = explain.hasOwnProperty("executionStats")
+    ? explain.executionStats
+    : explain.stages[0].$cursor.executionStats;
+
+let winningPlan = executionStats.allPlansExecution[0];
+
+// The plan using index {x: 1} should be the winning plan here. Because there are currently no
+// documents in the collection with x >= 2, this plan will hit EOF immediately (and both calls
+// to WiredTigerIndexCursorBase::seek() on index {x: 1} will return boost::none).
+//
+// Verify that "totalKeysExamined" and "totalDocsExamined" for the winning plan are both zero.
+// It's important that we use explain("allPlansExecution") here because we specifically want the
+// stats from the trial run (because the trial run's stats are used to compute "decisionReads"),
+// and the stats from the profiler ("keysExamined" and "docsExamined") could be slightly different
+// (due to differences in SBE vs classic).
+assert.eq(0, winningPlan.totalKeysExamined, () => tojson(explain));
+assert.eq(0, winningPlan.totalDocsExamined, () => tojson(explain));
+
+// Insert the second batch of documents into the collection.
+assert.commandWorked(coll.insert(docs2));
+
+// Run the aggregate() command one more time.
+assert.eq(1, coll.aggregate(pipeline).itcount());
+
+// Get the profile entry for the most recent command.
+let entry = getLatestProfilerEntry(db, {op: "command", ns: coll.getFullName()});
+
+// Re-planning should have occurred (because the old plan should perform much worse now that we've
+// inserted hundreds of documents with x=2 and x=3) and the plan using index {y: 1} should be the
+// winning plan now.
+assert.eq(entry.replanned, true, () => tojson(entry));
+assert.includes(
+    entry.replanReason, "cached plan was less efficient than expected", () => tojson(entry));
+
+// Verify that the query examined fewer than 100 keys and 100 docs. In the event that re-planning
+// hasn't happened yet, we don't want to force it to happen by running explain("allPlansExecution"),
+// so for these checks we use the stats reported by the profiler instead.
+assert.lt(entry.keysExamined, 100, () => tojson(entry));
+assert.lt(entry.docsExamined, 100, () => tojson(entry));
+
+MongoRunner.stopMongod(conn);
--- a/src/mongo/db/exec/plan_cache_util.cpp
+++ b/src/mongo/db/exec/plan_cache_util.cpp
@ -404,7 +404,7 @@ void ClassicPlanCacheWriter::operator()(const CanonicalQuery& cq,

    if (_executeInSbe) {
        auto stats = mps.getStats();
-        auto nReads = computeNumReadsFromWorks(*stats, *ranking);
+        auto nReads = computeNumReadsFromStats(*stats, *ranking);

        updateClassicPlanCacheFromClassicCandidatesForSbeExecution(
            _opCtx, _collection.getCollectionPtr(), cq, nReads, std::move(ranking), candidates);
@ -476,11 +476,19 @@ bool ConditionalClassicPlanCacheWriter::shouldCacheBasedOnCachingMode(
    MONGO_UNREACHABLE;
 }

-NumReads computeNumReadsFromWorks(const PlanStageStats& stats,
+NumReads computeNumReadsFromStats(const PlanStageStats& stats,
                                  const plan_ranker::PlanRankingDecision& ranking) {
    auto winnerIdx = ranking.candidateOrder[0];
    auto summary = collectExecutionStatsSummary(&stats, winnerIdx);
-    return NumReads{summary.totalKeysExamined + summary.totalDocsExamined};
+
+    // The original "all classic" multiplanner uses the "works" stat as its unit of measure for
+    // tracking how much work a plan has done, while this multiplanner (the "CRP SBE" multiplanner)
+    // uses the "reads" metric (totalKeysExamined + totalDocsExamined) as its unit of measure.
+    //
+    // The "works" stat is always greater than zero. To play it safe and make it easier for the "all
+    // classic" and "CRP SBE" multiplanners to coexist, this function makes sure to always return
+    // a positive "reads" value.
+    return NumReads{std::max<size_t>(summary.totalKeysExamined + summary.totalDocsExamined, 1)};
 }

 }  // namespace plan_cache_util
--- a/src/mongo/db/exec/plan_cache_util.h
+++ b/src/mongo/db/exec/plan_cache_util.h
@ -212,6 +212,8 @@ protected:
    const Mode _planCachingMode;
 };

-NumReads computeNumReadsFromWorks(const PlanStageStats& stats,
+// This function computes the value of the "reads" metric for the winning plan using the specified
+// 'stats'. This function will always return a positive value.
+NumReads computeNumReadsFromStats(const PlanStageStats& stats,
                                  const plan_ranker::PlanRankingDecision& ranking);
 }  // namespace mongo::plan_cache_util
--- a/src/mongo/db/exec/runtime_planners/classic_runtime_planner_for_sbe/multi_planner.cpp
+++ b/src/mongo/db/exec/runtime_planners/classic_runtime_planner_for_sbe/multi_planner.cpp
@ -136,7 +136,7 @@ void MultiPlanner::_buildSbePlanAndMaybeCache(
    boost::optional<NumReads> numReads;
    if (_shouldWriteToPlanCache) {
        auto stats = _multiPlanStage->getStats();
-        numReads = plan_cache_util::computeNumReadsFromWorks(*stats, *ranking);
+        numReads = plan_cache_util::computeNumReadsFromStats(*stats, *ranking);
    }

    // If classic plan cache is enabled, write to it. We need to do this before we extend the QSN
--- a/src/mongo/db/exec/sbe/sbe_trial_run_tracker_test.cpp
+++ b/src/mongo/db/exec/sbe/sbe_trial_run_tracker_test.cpp
@ -90,7 +90,7 @@ TEST_F(TrialRunTrackerTest, TrackerAttachesToStreamingStage) {
                                           kEmptyPlanNodeId /* nodeId */,
                                           ScanCallbacks());

-    auto tracker = std::make_unique<TrialRunTracker>(size_t{0}, size_t{0});
+    auto tracker = std::make_unique<TrialRunTracker>(boost::none, boost::none);
    ON_BLOCK_EXIT([&]() { scanStage->detachFromTrialRunTracker(); });

    auto attachResult = scanStage->attachToTrialRunTracker(tracker.get());
@ -112,7 +112,7 @@ TEST_F(TrialRunTrackerTest, TrackerAttachesToBlockingStage) {
                         nullptr /* yieldPolicy */,
                         kEmptyPlanNodeId);

-    auto tracker = std::make_unique<TrialRunTracker>(size_t{0}, size_t{0});
+    auto tracker = std::make_unique<TrialRunTracker>(boost::none, boost::none);
    ON_BLOCK_EXIT([&]() { sortStage->detachFromTrialRunTracker(); });

    auto attachResult = sortStage->attachToTrialRunTracker(tracker.get());
@ -150,7 +150,7 @@ TEST_F(TrialRunTrackerTest, TrackerAttachesToBothBlockingAndStreamingStages) {
                                          nullptr /* yieldPolicy */,
                                          kEmptyPlanNodeId);

-    auto tracker = std::make_unique<TrialRunTracker>(size_t{0}, size_t{0});
+    auto tracker = std::make_unique<TrialRunTracker>(boost::none, boost::none);
    ON_BLOCK_EXIT([&]() { rootSortStage->detachFromTrialRunTracker(); });

    auto attachResult = rootSortStage->attachToTrialRunTracker(tracker.get());
@ -181,7 +181,7 @@ TEST_F(TrialRunTrackerTest, TrialRunTrackingCanBeDisabled) {
                              ScanCallbacks());

    scanStage->disableTrialRunTracking();
-    auto tracker = std::make_unique<TrialRunTracker>(size_t{0}, size_t{0});
+    auto tracker = std::make_unique<TrialRunTracker>(boost::none, boost::none);
    auto attachResult = scanStage->attachToTrialRunTracker(tracker.get());
    ASSERT_EQ(attachResult, PlanStage::TrialRunTrackingType::NoTracking);
 }
@ -220,8 +220,7 @@ TEST_F(TrialRunTrackerTest, DisablingTrackingForChildDoesNotInhibitTrackingForPa
                                          nullptr /* yieldPolicy */,
                                          kEmptyPlanNodeId);

-
-    auto tracker = std::make_unique<TrialRunTracker>(size_t{0}, size_t{0});
+    auto tracker = std::make_unique<TrialRunTracker>(boost::none, boost::none);
    ON_BLOCK_EXIT([&]() { rootSortStage->detachFromTrialRunTracker(); });

    auto attachResult = rootSortStage->attachToTrialRunTracker(tracker.get(), PlanNodeId{1});
@ -284,7 +283,7 @@ TEST_F(TrialRunTrackerTest, DisablingTrackingForAChildStagePreventsEarlyExit) {
                         kEmptyPlanNodeId);

    // We expect the TrialRunTracker to attach to _only_ the left child.
-    auto tracker = std::make_unique<TrialRunTracker>(size_t{9}, size_t{0});
+    auto tracker = std::make_unique<TrialRunTracker>(size_t{9}, boost::none);
    auto attachResult = sortStage->attachToTrialRunTracker(tracker.get());

    // Note: A scan is a streaming stage, but the "virtual scan" used here does not attach to the
@ -318,7 +317,7 @@ TEST_F(TrialRunTrackerTest, TrackerAttachesToPlanningRootStageAndTracksTheDocume
                         nullptr /* yieldPolicy */,
                         PlanNodeId{3});

-    auto tracker = std::make_unique<TrialRunTracker>(size_t{3}, size_t{0});
+    auto tracker = std::make_unique<TrialRunTracker>(size_t{3}, boost::none);
    auto attachResult = sortStage->attachToTrialRunTracker(tracker.get());
    ASSERT_EQ(attachResult, PlanStage::TrialRunTrackingType::NoTracking);

@ -328,4 +327,43 @@ TEST_F(TrialRunTrackerTest, TrackerAttachesToPlanningRootStageAndTracksTheDocume
    prepareTree(ctx.get(), sortStage.get(), unwindSlot);
    ASSERT_EQ(tracker->getMetric<TrialRunTracker::kNumResults>(), 0);
 }
+
+TEST_F(TrialRunTrackerTest, TrackerCanTrackMetricWithMaxMetricSetToZero) {
+    auto ctx = makeCompileCtx();
+
+    auto [inputTag, inputVal] = stage_builder::makeValue(
+        BSON_ARRAY(BSON("a" << 1) << BSON("a" << 2) << BSON("a" << 3) << BSON("a" << 4)));
+    auto [scanSlot, scanStage] = generateVirtualScan(inputTag, inputVal, PlanNodeId{1});
+    auto sortStage =
+        makeS<SortStage>(std::move(scanStage),
+                         makeSV(scanSlot),
+                         std::vector<value::SortDirection>{value::SortDirection::Ascending},
+                         makeSV(),
+                         nullptr /*limit*/,
+                         1024 * 1024,
+                         false,
+                         nullptr /* yieldPolicy */,
+                         PlanNodeId{3});
+
+    auto onMetricReached = [&](TrialRunTracker::TrialRunMetric metric) {
+        // This callback should get called for the kNumResults metric.
+        ASSERT_EQ(metric, TrialRunTracker::kNumResults);
+        // Detach the TrialRunTracker and allow the plan to continue running.
+        sortStage->detachFromTrialRunTracker();
+        return false;
+    };
+
+    auto tracker =
+        std::make_unique<TrialRunTracker>(std::move(onMetricReached), size_t{0}, boost::none);
+    auto attachResult = sortStage->attachToTrialRunTracker(tracker.get(), PlanNodeId{1});
+    ASSERT_EQ(attachResult, PlanStage::TrialRunTrackingType::TrackResults);
+
+    // The 'prepareTree()' function opens the SortStage, causing it to read documents from its
+    // child. The scan stage (which is marked as the planning root) should return 4 documents.
+    // The tracker should only observe the first document, because the callback we supplied should
+    // detach the tracker after the first document is observed (because that is precisely when
+    // 'tracker->getMetric<TrialRunTracker::kNumResults>()' becomes greater than the maximum of 0.)
+    prepareTree(ctx.get(), sortStage.get(), scanSlot);
+    ASSERT_EQ(tracker->getMetric<TrialRunTracker::kNumResults>(), 1);
+}
 }  // namespace mongo::sbe
--- a/src/mongo/db/exec/trial_run_tracker.h
+++ b/src/mongo/db/exec/trial_run_tracker.h
@ -53,20 +53,21 @@ public:
        // Number of physical reads performed during a trial run. Once a storage cursor advances,
        // it counts as a single physical read.
        kNumReads,
-        // Must always be the last element to hold the number of element in the enum.
+        // Must always be the last element to hold the number of elements in the enum.
        kLastElem
    };

+    static constexpr size_t numTrialRunMetrics = static_cast<size_t>(TrialRunMetric::kLastElem);
+
    /**
     * Constructs a 'TrialRunTracker' which indicates that the trial period is over when any
     * 'TrialRunMetric' exceeds the maximum provided at construction.
     *
-     * Callers can also pass a value of zero to indicate that the given metric should not be
-     * tracked.
+     * Callers can also pass a value of boost::none to indicate that the given metric should not
+     * be tracked.
     */
-    template <typename... MaxMetrics,
-              std::enable_if_t<sizeof...(MaxMetrics) == TrialRunMetric::kLastElem, int> = 0>
-    TrialRunTracker(MaxMetrics... maxMetrics) : _maxMetrics{maxMetrics...} {}
+    TrialRunTracker(boost::optional<size_t> maxNumResults, boost::optional<size_t> maxNumReads)
+        : _maxMetrics{maxNumResults, maxNumReads} {}

    /**
     * Constructs a 'TrialRunTracker' that also has an '_onMetricReached' function, which gets
@ -75,11 +76,10 @@ public:
     * '_onMetricReached' can prevent tracking from halting plan execution, thereby upgrading a
     * trial run to a normal run.
     */
-    template <typename... MaxMetrics>
-    TrialRunTracker(std::function<bool(TrialRunMetric)> onMetricReached, MaxMetrics... maxMetrics)
-        : TrialRunTracker{maxMetrics...} {
-        _onMetricReached = std::move(onMetricReached);
-    }
+    TrialRunTracker(std::function<bool(TrialRunMetric)> onMetricReached,
+                    boost::optional<size_t> maxNumResults,
+                    boost::optional<size_t> maxNumReads)
+        : _maxMetrics{maxNumResults, maxNumReads}, _onMetricReached(std::move(onMetricReached)) {}

    /**
     * Increments the trial run metric specified as a template parameter 'metric' by the
@ -97,7 +97,7 @@ public:
    bool trackProgress(size_t metricIncrement) {
        static_assert(metric >= 0 && metric < sizeof(_metrics) / sizeof(size_t));

-        if (_maxMetrics[metric] == 0) {
+        if (!_maxMetrics[metric].has_value()) {
            // This metric is not being tracked.
            return false;
        }
@ -126,24 +126,24 @@ public:
    template <TrialRunMetric metric>
    bool metricReached() const {
        static_assert(metric >= 0 && metric < sizeof(_metrics) / sizeof(size_t));
-        return metricTracked<metric>() && _metrics[metric] > _maxMetrics[metric];
+        return _maxMetrics[metric].has_value() && _metrics[metric] > *_maxMetrics[metric];
    }

    template <TrialRunMetric metric>
    bool metricTracked() const {
        static_assert(metric >= 0 && metric < sizeof(_metrics) / sizeof(size_t));
-        return _maxMetrics[metric] != 0;
+        return _maxMetrics[metric].has_value();
    }

    template <TrialRunMetric metric>
-    void updateMaxMetric(size_t newMaxMetric) {
+    void updateMaxMetric(boost::optional<size_t> newMaxMetric) {
        static_assert(metric >= 0 && metric < sizeof(_metrics) / sizeof(size_t));
        _maxMetrics[metric] = newMaxMetric;
    }

 private:
-    size_t _maxMetrics[TrialRunMetric::kLastElem];
-    size_t _metrics[TrialRunMetric::kLastElem]{0};
+    boost::optional<size_t> _maxMetrics[numTrialRunMetrics];
+    size_t _metrics[numTrialRunMetrics]{0};
    bool _done{false};
    std::function<bool(TrialRunMetric)> _onMetricReached{};
 };