From 07bfb33e5462fe62b9c19b77af788a4da2f7b413 Mon Sep 17 00:00:00 2001
From: Drew Paroski <paroski@users.noreply.github.com>
Date: Thu, 18 Sep 2025 11:40:11 -0400
Subject: [PATCH] SERVER-109309 Fix multi-planning issue when cached plan has
 decisionReads=0 (#40191)

GitOrigin-RevId: d7f2b5c545c81a6d71a9b37a9926dd604fd349b9
---
 ...plan_cache_replan_decisionReads_is_zero.js | 96 +++++++++++++++++++
 src/mongo/db/exec/plan_cache_util.cpp         | 14 ++-
 src/mongo/db/exec/plan_cache_util.h           |  4 +-
 .../multi_planner.cpp                         |  2 +-
 .../exec/sbe/sbe_trial_run_tracker_test.cpp   | 54 +++++++++--
 src/mongo/db/exec/trial_run_tracker.h         | 34 +++----
 6 files changed, 174 insertions(+), 30 deletions(-)
 create mode 100644 jstests/noPassthrough/query/plan_cache/plan_cache_replan_decisionReads_is_zero.js

diff --git a/jstests/noPassthrough/query/plan_cache/plan_cache_replan_decisionReads_is_zero.js b/jstests/noPassthrough/query/plan_cache/plan_cache_replan_decisionReads_is_zero.js
new file mode 100644
index 00000000000..75d872ec53c
--- /dev/null
+++ b/jstests/noPassthrough/query/plan_cache/plan_cache_replan_decisionReads_is_zero.js
@@ -0,0 +1,96 @@
+/**
+ * For the scenario where the multi-planner cached a winning plan that hit EOF immediately (with
+ * totalKeysExamined=0 and totalDocsExamined=0) during its trial run, this test verifies that
+ * replanning will be triggered if the cached plans starts performing signficantly worse.
+ *
+ * This test was adapted from a repro for a bug fixed by SERVER-109309.
+ *
+ * @tags: [
+ *   requires_profiling,
+ * ]
+ */
+import {getLatestProfilerEntry} from "jstests/libs/profiler.js";
+
+const conn = MongoRunner.runMongod();
+const db = conn.getDB("test");
+const coll = db[jsTestName()];
+coll.drop();
+
+// Define two batches of docs that will be inserted into the collection during this test.
+let docs1 = [];
+let docs2 = [];
+for (let i = 0; i < 200; ++i) {
+    docs1.push({x: i % 2, y: Math.floor(i / 4)});
+}
+for (let i = 0; i < 400; ++i) {
+    docs2.push({x: (i % 2) + 2, y: Math.floor(i / 8)});
+}
+
+// Create an index on "x" and an index on "y".
+assert.commandWorked(coll.createIndex({x: 1}));
+assert.commandWorked(coll.createIndex({y: 1}));
+
+// Enable profiling.
+assert.commandWorked(db.setProfilingLevel(2));
+
+// Insert the first batch of documents into the collection.
+assert.commandWorked(coll.insert(docs1));
+
+// Define the pipeline that will be used for this test.
+//
+// It's important that the "x" values in the $match's filter are greater than the "x" values of all
+// of the documents that are currently in the collection, so that WiredTigerIndexCursorBase::seek()
+// will return boost::none when MongoDB attempts to seek x=2 and x=3 in the {x: 1} index. This
+// ensures that "totalKeysExamined" will be 0 (which is important to ensure this test covers the
+// code paths relevant to SERVER-109309).
+const pipeline =
+    [{$match: {x: {$in: [2, 3]}, y: 47}}, {$group: {_id: null, num: {$sum: NumberInt(1)}}}];
+
+// Run the aggregate() command twice so that it's entry in the plan cache gets marked as "active".
+for (let i = 0; i < 2; ++i) {
+    assert.eq(0, coll.aggregate(pipeline).itcount());
+}
+
+// Run explain and get the winning plan from "allPlansExecution".
+let explain = coll.explain("allPlansExecution").aggregate(pipeline);
+let executionStats = explain.hasOwnProperty("executionStats")
+    ? explain.executionStats
+    : explain.stages[0].$cursor.executionStats;
+
+let winningPlan = executionStats.allPlansExecution[0];
+
+// The plan using index {x: 1} should be the winning plan here. Because there are currently no
+// documents in the collection with x >= 2, this plan will hit EOF immediately (and both calls
+// to WiredTigerIndexCursorBase::seek() on index {x: 1} will return boost::none).
+//
+// Verify that "totalKeysExamined" and "totalDocsExamined" for the winning plan are both zero.
+// It's important that we use explain("allPlansExecution") here because we specifically want the
+// stats from the trial run (because the trial run's stats are used to compute "decisionReads"),
+// and the stats from the profiler ("keysExamined" and "docsExamined") could be slightly different
+// (due to differences in SBE vs classic).
+assert.eq(0, winningPlan.totalKeysExamined, () => tojson(explain));
+assert.eq(0, winningPlan.totalDocsExamined, () => tojson(explain));
+
+// Insert the second batch of documents into the collection.
+assert.commandWorked(coll.insert(docs2));
+
+// Run the aggregate() command one more time.
+assert.eq(1, coll.aggregate(pipeline).itcount());
+
+// Get the profile entry for the most recent command.
+let entry = getLatestProfilerEntry(db, {op: "command", ns: coll.getFullName()});
+
+// Re-planning should have occurred (because the old plan should perform much worse now that we've
+// inserted hundreds of documents with x=2 and x=3) and the plan using index {y: 1} should be the
+// winning plan now.
+assert.eq(entry.replanned, true, () => tojson(entry));
+assert.includes(
+    entry.replanReason, "cached plan was less efficient than expected", () => tojson(entry));
+
+// Verify that the query examined fewer than 100 keys and 100 docs. In the event that re-planning
+// hasn't happened yet, we don't want to force it to happen by running explain("allPlansExecution"),
+// so for these checks we use the stats reported by the profiler instead.
+assert.lt(entry.keysExamined, 100, () => tojson(entry));
+assert.lt(entry.docsExamined, 100, () => tojson(entry));
+
+MongoRunner.stopMongod(conn);
diff --git a/src/mongo/db/exec/plan_cache_util.cpp b/src/mongo/db/exec/plan_cache_util.cpp
index 2398d57e630..1f7f96034de 100644
--- a/src/mongo/db/exec/plan_cache_util.cpp
+++ b/src/mongo/db/exec/plan_cache_util.cpp
@@ -404,7 +404,7 @@ void ClassicPlanCacheWriter::operator()(const CanonicalQuery& cq,
 
     if (_executeInSbe) {
         auto stats = mps.getStats();
-        auto nReads = computeNumReadsFromWorks(*stats, *ranking);
+        auto nReads = computeNumReadsFromStats(*stats, *ranking);
 
         updateClassicPlanCacheFromClassicCandidatesForSbeExecution(
             _opCtx, _collection.getCollectionPtr(), cq, nReads, std::move(ranking), candidates);
@@ -476,11 +476,19 @@ bool ConditionalClassicPlanCacheWriter::shouldCacheBasedOnCachingMode(
     MONGO_UNREACHABLE;
 }
 
-NumReads computeNumReadsFromWorks(const PlanStageStats& stats,
+NumReads computeNumReadsFromStats(const PlanStageStats& stats,
                                   const plan_ranker::PlanRankingDecision& ranking) {
     auto winnerIdx = ranking.candidateOrder[0];
     auto summary = collectExecutionStatsSummary(&stats, winnerIdx);
-    return NumReads{summary.totalKeysExamined + summary.totalDocsExamined};
+
+    // The original "all classic" multiplanner uses the "works" stat as its unit of measure for
+    // tracking how much work a plan has done, while this multiplanner (the "CRP SBE" multiplanner)
+    // uses the "reads" metric (totalKeysExamined + totalDocsExamined) as its unit of measure.
+    //
+    // The "works" stat is always greater than zero. To play it safe and make it easier for the "all
+    // classic" and "CRP SBE" multiplanners to coexist, this function makes sure to always return
+    // a positive "reads" value.
+    return NumReads{std::max<size_t>(summary.totalKeysExamined + summary.totalDocsExamined, 1)};
 }
 
 }  // namespace plan_cache_util
diff --git a/src/mongo/db/exec/plan_cache_util.h b/src/mongo/db/exec/plan_cache_util.h
index b7683b64d32..00c9e940e63 100644
--- a/src/mongo/db/exec/plan_cache_util.h
+++ b/src/mongo/db/exec/plan_cache_util.h
@@ -212,6 +212,8 @@ protected:
     const Mode _planCachingMode;
 };
 
-NumReads computeNumReadsFromWorks(const PlanStageStats& stats,
+// This function computes the value of the "reads" metric for the winning plan using the specified
+// 'stats'. This function will always return a positive value.
+NumReads computeNumReadsFromStats(const PlanStageStats& stats,
                                   const plan_ranker::PlanRankingDecision& ranking);
 }  // namespace mongo::plan_cache_util
diff --git a/src/mongo/db/exec/runtime_planners/classic_runtime_planner_for_sbe/multi_planner.cpp b/src/mongo/db/exec/runtime_planners/classic_runtime_planner_for_sbe/multi_planner.cpp
index b54fe4c489c..86ffe6bf4c4 100644
--- a/src/mongo/db/exec/runtime_planners/classic_runtime_planner_for_sbe/multi_planner.cpp
+++ b/src/mongo/db/exec/runtime_planners/classic_runtime_planner_for_sbe/multi_planner.cpp
@@ -136,7 +136,7 @@ void MultiPlanner::_buildSbePlanAndMaybeCache(
     boost::optional<NumReads> numReads;
     if (_shouldWriteToPlanCache) {
         auto stats = _multiPlanStage->getStats();
-        numReads = plan_cache_util::computeNumReadsFromWorks(*stats, *ranking);
+        numReads = plan_cache_util::computeNumReadsFromStats(*stats, *ranking);
     }
 
     // If classic plan cache is enabled, write to it. We need to do this before we extend the QSN
diff --git a/src/mongo/db/exec/sbe/sbe_trial_run_tracker_test.cpp b/src/mongo/db/exec/sbe/sbe_trial_run_tracker_test.cpp
index cec146c9e08..ba5b1fef358 100644
--- a/src/mongo/db/exec/sbe/sbe_trial_run_tracker_test.cpp
+++ b/src/mongo/db/exec/sbe/sbe_trial_run_tracker_test.cpp
@@ -90,7 +90,7 @@ TEST_F(TrialRunTrackerTest, TrackerAttachesToStreamingStage) {
                                            kEmptyPlanNodeId /* nodeId */,
                                            ScanCallbacks());
 
-    auto tracker = std::make_unique<TrialRunTracker>(size_t{0}, size_t{0});
+    auto tracker = std::make_unique<TrialRunTracker>(boost::none, boost::none);
     ON_BLOCK_EXIT([&]() { scanStage->detachFromTrialRunTracker(); });
 
     auto attachResult = scanStage->attachToTrialRunTracker(tracker.get());
@@ -112,7 +112,7 @@ TEST_F(TrialRunTrackerTest, TrackerAttachesToBlockingStage) {
                          nullptr /* yieldPolicy */,
                          kEmptyPlanNodeId);
 
-    auto tracker = std::make_unique<TrialRunTracker>(size_t{0}, size_t{0});
+    auto tracker = std::make_unique<TrialRunTracker>(boost::none, boost::none);
     ON_BLOCK_EXIT([&]() { sortStage->detachFromTrialRunTracker(); });
 
     auto attachResult = sortStage->attachToTrialRunTracker(tracker.get());
@@ -150,7 +150,7 @@ TEST_F(TrialRunTrackerTest, TrackerAttachesToBothBlockingAndStreamingStages) {
                                           nullptr /* yieldPolicy */,
                                           kEmptyPlanNodeId);
 
-    auto tracker = std::make_unique<TrialRunTracker>(size_t{0}, size_t{0});
+    auto tracker = std::make_unique<TrialRunTracker>(boost::none, boost::none);
     ON_BLOCK_EXIT([&]() { rootSortStage->detachFromTrialRunTracker(); });
 
     auto attachResult = rootSortStage->attachToTrialRunTracker(tracker.get());
@@ -181,7 +181,7 @@ TEST_F(TrialRunTrackerTest, TrialRunTrackingCanBeDisabled) {
                               ScanCallbacks());
 
     scanStage->disableTrialRunTracking();
-    auto tracker = std::make_unique<TrialRunTracker>(size_t{0}, size_t{0});
+    auto tracker = std::make_unique<TrialRunTracker>(boost::none, boost::none);
     auto attachResult = scanStage->attachToTrialRunTracker(tracker.get());
     ASSERT_EQ(attachResult, PlanStage::TrialRunTrackingType::NoTracking);
 }
@@ -220,8 +220,7 @@ TEST_F(TrialRunTrackerTest, DisablingTrackingForChildDoesNotInhibitTrackingForPa
                                           nullptr /* yieldPolicy */,
                                           kEmptyPlanNodeId);
 
-
-    auto tracker = std::make_unique<TrialRunTracker>(size_t{0}, size_t{0});
+    auto tracker = std::make_unique<TrialRunTracker>(boost::none, boost::none);
     ON_BLOCK_EXIT([&]() { rootSortStage->detachFromTrialRunTracker(); });
 
     auto attachResult = rootSortStage->attachToTrialRunTracker(tracker.get(), PlanNodeId{1});
@@ -284,7 +283,7 @@ TEST_F(TrialRunTrackerTest, DisablingTrackingForAChildStagePreventsEarlyExit) {
                          kEmptyPlanNodeId);
 
     // We expect the TrialRunTracker to attach to _only_ the left child.
-    auto tracker = std::make_unique<TrialRunTracker>(size_t{9}, size_t{0});
+    auto tracker = std::make_unique<TrialRunTracker>(size_t{9}, boost::none);
     auto attachResult = sortStage->attachToTrialRunTracker(tracker.get());
 
     // Note: A scan is a streaming stage, but the "virtual scan" used here does not attach to the
@@ -318,7 +317,7 @@ TEST_F(TrialRunTrackerTest, TrackerAttachesToPlanningRootStageAndTracksTheDocume
                          nullptr /* yieldPolicy */,
                          PlanNodeId{3});
 
-    auto tracker = std::make_unique<TrialRunTracker>(size_t{3}, size_t{0});
+    auto tracker = std::make_unique<TrialRunTracker>(size_t{3}, boost::none);
     auto attachResult = sortStage->attachToTrialRunTracker(tracker.get());
     ASSERT_EQ(attachResult, PlanStage::TrialRunTrackingType::NoTracking);
 
@@ -328,4 +327,43 @@ TEST_F(TrialRunTrackerTest, TrackerAttachesToPlanningRootStageAndTracksTheDocume
     prepareTree(ctx.get(), sortStage.get(), unwindSlot);
     ASSERT_EQ(tracker->getMetric<TrialRunTracker::kNumResults>(), 0);
 }
+
+TEST_F(TrialRunTrackerTest, TrackerCanTrackMetricWithMaxMetricSetToZero) {
+    auto ctx = makeCompileCtx();
+
+    auto [inputTag, inputVal] = stage_builder::makeValue(
+        BSON_ARRAY(BSON("a" << 1) << BSON("a" << 2) << BSON("a" << 3) << BSON("a" << 4)));
+    auto [scanSlot, scanStage] = generateVirtualScan(inputTag, inputVal, PlanNodeId{1});
+    auto sortStage =
+        makeS<SortStage>(std::move(scanStage),
+                         makeSV(scanSlot),
+                         std::vector<value::SortDirection>{value::SortDirection::Ascending},
+                         makeSV(),
+                         nullptr /*limit*/,
+                         1024 * 1024,
+                         false,
+                         nullptr /* yieldPolicy */,
+                         PlanNodeId{3});
+
+    auto onMetricReached = [&](TrialRunTracker::TrialRunMetric metric) {
+        // This callback should get called for the kNumResults metric.
+        ASSERT_EQ(metric, TrialRunTracker::kNumResults);
+        // Detach the TrialRunTracker and allow the plan to continue running.
+        sortStage->detachFromTrialRunTracker();
+        return false;
+    };
+
+    auto tracker =
+        std::make_unique<TrialRunTracker>(std::move(onMetricReached), size_t{0}, boost::none);
+    auto attachResult = sortStage->attachToTrialRunTracker(tracker.get(), PlanNodeId{1});
+    ASSERT_EQ(attachResult, PlanStage::TrialRunTrackingType::TrackResults);
+
+    // The 'prepareTree()' function opens the SortStage, causing it to read documents from its
+    // child. The scan stage (which is marked as the planning root) should return 4 documents.
+    // The tracker should only observe the first document, because the callback we supplied should
+    // detach the tracker after the first document is observed (because that is precisely when
+    // 'tracker->getMetric<TrialRunTracker::kNumResults>()' becomes greater than the maximum of 0.)
+    prepareTree(ctx.get(), sortStage.get(), scanSlot);
+    ASSERT_EQ(tracker->getMetric<TrialRunTracker::kNumResults>(), 1);
+}
 }  // namespace mongo::sbe
diff --git a/src/mongo/db/exec/trial_run_tracker.h b/src/mongo/db/exec/trial_run_tracker.h
index 8d9bd39ecad..0cc490e4012 100644
--- a/src/mongo/db/exec/trial_run_tracker.h
+++ b/src/mongo/db/exec/trial_run_tracker.h
@@ -53,20 +53,21 @@ public:
         // Number of physical reads performed during a trial run. Once a storage cursor advances,
         // it counts as a single physical read.
         kNumReads,
-        // Must always be the last element to hold the number of element in the enum.
+        // Must always be the last element to hold the number of elements in the enum.
         kLastElem
     };
 
+    static constexpr size_t numTrialRunMetrics = static_cast<size_t>(TrialRunMetric::kLastElem);
+
     /**
      * Constructs a 'TrialRunTracker' which indicates that the trial period is over when any
      * 'TrialRunMetric' exceeds the maximum provided at construction.
      *
-     * Callers can also pass a value of zero to indicate that the given metric should not be
-     * tracked.
+     * Callers can also pass a value of boost::none to indicate that the given metric should not
+     * be tracked.
      */
-    template <typename... MaxMetrics,
-              std::enable_if_t<sizeof...(MaxMetrics) == TrialRunMetric::kLastElem, int> = 0>
-    TrialRunTracker(MaxMetrics... maxMetrics) : _maxMetrics{maxMetrics...} {}
+    TrialRunTracker(boost::optional<size_t> maxNumResults, boost::optional<size_t> maxNumReads)
+        : _maxMetrics{maxNumResults, maxNumReads} {}
 
     /**
      * Constructs a 'TrialRunTracker' that also has an '_onMetricReached' function, which gets
@@ -75,11 +76,10 @@ public:
      * '_onMetricReached' can prevent tracking from halting plan execution, thereby upgrading a
      * trial run to a normal run.
      */
-    template <typename... MaxMetrics>
-    TrialRunTracker(std::function<bool(TrialRunMetric)> onMetricReached, MaxMetrics... maxMetrics)
-        : TrialRunTracker{maxMetrics...} {
-        _onMetricReached = std::move(onMetricReached);
-    }
+    TrialRunTracker(std::function<bool(TrialRunMetric)> onMetricReached,
+                    boost::optional<size_t> maxNumResults,
+                    boost::optional<size_t> maxNumReads)
+        : _maxMetrics{maxNumResults, maxNumReads}, _onMetricReached(std::move(onMetricReached)) {}
 
     /**
      * Increments the trial run metric specified as a template parameter 'metric' by the
@@ -97,7 +97,7 @@ public:
     bool trackProgress(size_t metricIncrement) {
         static_assert(metric >= 0 && metric < sizeof(_metrics) / sizeof(size_t));
 
-        if (_maxMetrics[metric] == 0) {
+        if (!_maxMetrics[metric].has_value()) {
             // This metric is not being tracked.
             return false;
         }
@@ -126,24 +126,24 @@ public:
     template <TrialRunMetric metric>
     bool metricReached() const {
         static_assert(metric >= 0 && metric < sizeof(_metrics) / sizeof(size_t));
-        return metricTracked<metric>() && _metrics[metric] > _maxMetrics[metric];
+        return _maxMetrics[metric].has_value() && _metrics[metric] > *_maxMetrics[metric];
     }
 
     template <TrialRunMetric metric>
     bool metricTracked() const {
         static_assert(metric >= 0 && metric < sizeof(_metrics) / sizeof(size_t));
-        return _maxMetrics[metric] != 0;
+        return _maxMetrics[metric].has_value();
     }
 
     template <TrialRunMetric metric>
-    void updateMaxMetric(size_t newMaxMetric) {
+    void updateMaxMetric(boost::optional<size_t> newMaxMetric) {
         static_assert(metric >= 0 && metric < sizeof(_metrics) / sizeof(size_t));
         _maxMetrics[metric] = newMaxMetric;
     }
 
 private:
-    size_t _maxMetrics[TrialRunMetric::kLastElem];
-    size_t _metrics[TrialRunMetric::kLastElem]{0};
+    boost::optional<size_t> _maxMetrics[numTrialRunMetrics];
+    size_t _metrics[numTrialRunMetrics]{0};
     bool _done{false};
     std::function<bool(TrialRunMetric)> _onMetricReached{};
 };