SERVER-109309 Fix multi-planning issue when cached plan has decisionReads=0 (#40191)
GitOrigin-RevId: d7f2b5c545c81a6d71a9b37a9926dd604fd349b9
This commit is contained in:
parent
d3f99007aa
commit
07bfb33e54
@ -0,0 +1,96 @@
|
||||
/**
|
||||
* For the scenario where the multi-planner cached a winning plan that hit EOF immediately (with
|
||||
* totalKeysExamined=0 and totalDocsExamined=0) during its trial run, this test verifies that
|
||||
* replanning will be triggered if the cached plans starts performing signficantly worse.
|
||||
*
|
||||
* This test was adapted from a repro for a bug fixed by SERVER-109309.
|
||||
*
|
||||
* @tags: [
|
||||
* requires_profiling,
|
||||
* ]
|
||||
*/
|
||||
import {getLatestProfilerEntry} from "jstests/libs/profiler.js";
|
||||
|
||||
const conn = MongoRunner.runMongod();
|
||||
const db = conn.getDB("test");
|
||||
const coll = db[jsTestName()];
|
||||
coll.drop();
|
||||
|
||||
// Define two batches of docs that will be inserted into the collection during this test.
|
||||
let docs1 = [];
|
||||
let docs2 = [];
|
||||
for (let i = 0; i < 200; ++i) {
|
||||
docs1.push({x: i % 2, y: Math.floor(i / 4)});
|
||||
}
|
||||
for (let i = 0; i < 400; ++i) {
|
||||
docs2.push({x: (i % 2) + 2, y: Math.floor(i / 8)});
|
||||
}
|
||||
|
||||
// Create an index on "x" and an index on "y".
|
||||
assert.commandWorked(coll.createIndex({x: 1}));
|
||||
assert.commandWorked(coll.createIndex({y: 1}));
|
||||
|
||||
// Enable profiling.
|
||||
assert.commandWorked(db.setProfilingLevel(2));
|
||||
|
||||
// Insert the first batch of documents into the collection.
|
||||
assert.commandWorked(coll.insert(docs1));
|
||||
|
||||
// Define the pipeline that will be used for this test.
|
||||
//
|
||||
// It's important that the "x" values in the $match's filter are greater than the "x" values of all
|
||||
// of the documents that are currently in the collection, so that WiredTigerIndexCursorBase::seek()
|
||||
// will return boost::none when MongoDB attempts to seek x=2 and x=3 in the {x: 1} index. This
|
||||
// ensures that "totalKeysExamined" will be 0 (which is important to ensure this test covers the
|
||||
// code paths relevant to SERVER-109309).
|
||||
const pipeline =
|
||||
[{$match: {x: {$in: [2, 3]}, y: 47}}, {$group: {_id: null, num: {$sum: NumberInt(1)}}}];
|
||||
|
||||
// Run the aggregate() command twice so that it's entry in the plan cache gets marked as "active".
|
||||
for (let i = 0; i < 2; ++i) {
|
||||
assert.eq(0, coll.aggregate(pipeline).itcount());
|
||||
}
|
||||
|
||||
// Run explain and get the winning plan from "allPlansExecution".
|
||||
let explain = coll.explain("allPlansExecution").aggregate(pipeline);
|
||||
let executionStats = explain.hasOwnProperty("executionStats")
|
||||
? explain.executionStats
|
||||
: explain.stages[0].$cursor.executionStats;
|
||||
|
||||
let winningPlan = executionStats.allPlansExecution[0];
|
||||
|
||||
// The plan using index {x: 1} should be the winning plan here. Because there are currently no
|
||||
// documents in the collection with x >= 2, this plan will hit EOF immediately (and both calls
|
||||
// to WiredTigerIndexCursorBase::seek() on index {x: 1} will return boost::none).
|
||||
//
|
||||
// Verify that "totalKeysExamined" and "totalDocsExamined" for the winning plan are both zero.
|
||||
// It's important that we use explain("allPlansExecution") here because we specifically want the
|
||||
// stats from the trial run (because the trial run's stats are used to compute "decisionReads"),
|
||||
// and the stats from the profiler ("keysExamined" and "docsExamined") could be slightly different
|
||||
// (due to differences in SBE vs classic).
|
||||
assert.eq(0, winningPlan.totalKeysExamined, () => tojson(explain));
|
||||
assert.eq(0, winningPlan.totalDocsExamined, () => tojson(explain));
|
||||
|
||||
// Insert the second batch of documents into the collection.
|
||||
assert.commandWorked(coll.insert(docs2));
|
||||
|
||||
// Run the aggregate() command one more time.
|
||||
assert.eq(1, coll.aggregate(pipeline).itcount());
|
||||
|
||||
// Get the profile entry for the most recent command.
|
||||
let entry = getLatestProfilerEntry(db, {op: "command", ns: coll.getFullName()});
|
||||
|
||||
// Re-planning should have occurred (because the old plan should perform much worse now that we've
|
||||
// inserted hundreds of documents with x=2 and x=3) and the plan using index {y: 1} should be the
|
||||
// winning plan now.
|
||||
assert.eq(entry.replanned, true, () => tojson(entry));
|
||||
assert.includes(
|
||||
entry.replanReason, "cached plan was less efficient than expected", () => tojson(entry));
|
||||
|
||||
// Verify that the query examined fewer than 100 keys and 100 docs. In the event that re-planning
|
||||
// hasn't happened yet, we don't want to force it to happen by running explain("allPlansExecution"),
|
||||
// so for these checks we use the stats reported by the profiler instead.
|
||||
assert.lt(entry.keysExamined, 100, () => tojson(entry));
|
||||
assert.lt(entry.docsExamined, 100, () => tojson(entry));
|
||||
|
||||
MongoRunner.stopMongod(conn);
|
||||
@ -404,7 +404,7 @@ void ClassicPlanCacheWriter::operator()(const CanonicalQuery& cq,
|
||||
|
||||
if (_executeInSbe) {
|
||||
auto stats = mps.getStats();
|
||||
auto nReads = computeNumReadsFromWorks(*stats, *ranking);
|
||||
auto nReads = computeNumReadsFromStats(*stats, *ranking);
|
||||
|
||||
updateClassicPlanCacheFromClassicCandidatesForSbeExecution(
|
||||
_opCtx, _collection.getCollectionPtr(), cq, nReads, std::move(ranking), candidates);
|
||||
@ -476,11 +476,19 @@ bool ConditionalClassicPlanCacheWriter::shouldCacheBasedOnCachingMode(
|
||||
MONGO_UNREACHABLE;
|
||||
}
|
||||
|
||||
NumReads computeNumReadsFromWorks(const PlanStageStats& stats,
|
||||
NumReads computeNumReadsFromStats(const PlanStageStats& stats,
|
||||
const plan_ranker::PlanRankingDecision& ranking) {
|
||||
auto winnerIdx = ranking.candidateOrder[0];
|
||||
auto summary = collectExecutionStatsSummary(&stats, winnerIdx);
|
||||
return NumReads{summary.totalKeysExamined + summary.totalDocsExamined};
|
||||
|
||||
// The original "all classic" multiplanner uses the "works" stat as its unit of measure for
|
||||
// tracking how much work a plan has done, while this multiplanner (the "CRP SBE" multiplanner)
|
||||
// uses the "reads" metric (totalKeysExamined + totalDocsExamined) as its unit of measure.
|
||||
//
|
||||
// The "works" stat is always greater than zero. To play it safe and make it easier for the "all
|
||||
// classic" and "CRP SBE" multiplanners to coexist, this function makes sure to always return
|
||||
// a positive "reads" value.
|
||||
return NumReads{std::max<size_t>(summary.totalKeysExamined + summary.totalDocsExamined, 1)};
|
||||
}
|
||||
|
||||
} // namespace plan_cache_util
|
||||
|
||||
@ -212,6 +212,8 @@ protected:
|
||||
const Mode _planCachingMode;
|
||||
};
|
||||
|
||||
NumReads computeNumReadsFromWorks(const PlanStageStats& stats,
|
||||
// This function computes the value of the "reads" metric for the winning plan using the specified
|
||||
// 'stats'. This function will always return a positive value.
|
||||
NumReads computeNumReadsFromStats(const PlanStageStats& stats,
|
||||
const plan_ranker::PlanRankingDecision& ranking);
|
||||
} // namespace mongo::plan_cache_util
|
||||
|
||||
@ -136,7 +136,7 @@ void MultiPlanner::_buildSbePlanAndMaybeCache(
|
||||
boost::optional<NumReads> numReads;
|
||||
if (_shouldWriteToPlanCache) {
|
||||
auto stats = _multiPlanStage->getStats();
|
||||
numReads = plan_cache_util::computeNumReadsFromWorks(*stats, *ranking);
|
||||
numReads = plan_cache_util::computeNumReadsFromStats(*stats, *ranking);
|
||||
}
|
||||
|
||||
// If classic plan cache is enabled, write to it. We need to do this before we extend the QSN
|
||||
|
||||
@ -90,7 +90,7 @@ TEST_F(TrialRunTrackerTest, TrackerAttachesToStreamingStage) {
|
||||
kEmptyPlanNodeId /* nodeId */,
|
||||
ScanCallbacks());
|
||||
|
||||
auto tracker = std::make_unique<TrialRunTracker>(size_t{0}, size_t{0});
|
||||
auto tracker = std::make_unique<TrialRunTracker>(boost::none, boost::none);
|
||||
ON_BLOCK_EXIT([&]() { scanStage->detachFromTrialRunTracker(); });
|
||||
|
||||
auto attachResult = scanStage->attachToTrialRunTracker(tracker.get());
|
||||
@ -112,7 +112,7 @@ TEST_F(TrialRunTrackerTest, TrackerAttachesToBlockingStage) {
|
||||
nullptr /* yieldPolicy */,
|
||||
kEmptyPlanNodeId);
|
||||
|
||||
auto tracker = std::make_unique<TrialRunTracker>(size_t{0}, size_t{0});
|
||||
auto tracker = std::make_unique<TrialRunTracker>(boost::none, boost::none);
|
||||
ON_BLOCK_EXIT([&]() { sortStage->detachFromTrialRunTracker(); });
|
||||
|
||||
auto attachResult = sortStage->attachToTrialRunTracker(tracker.get());
|
||||
@ -150,7 +150,7 @@ TEST_F(TrialRunTrackerTest, TrackerAttachesToBothBlockingAndStreamingStages) {
|
||||
nullptr /* yieldPolicy */,
|
||||
kEmptyPlanNodeId);
|
||||
|
||||
auto tracker = std::make_unique<TrialRunTracker>(size_t{0}, size_t{0});
|
||||
auto tracker = std::make_unique<TrialRunTracker>(boost::none, boost::none);
|
||||
ON_BLOCK_EXIT([&]() { rootSortStage->detachFromTrialRunTracker(); });
|
||||
|
||||
auto attachResult = rootSortStage->attachToTrialRunTracker(tracker.get());
|
||||
@ -181,7 +181,7 @@ TEST_F(TrialRunTrackerTest, TrialRunTrackingCanBeDisabled) {
|
||||
ScanCallbacks());
|
||||
|
||||
scanStage->disableTrialRunTracking();
|
||||
auto tracker = std::make_unique<TrialRunTracker>(size_t{0}, size_t{0});
|
||||
auto tracker = std::make_unique<TrialRunTracker>(boost::none, boost::none);
|
||||
auto attachResult = scanStage->attachToTrialRunTracker(tracker.get());
|
||||
ASSERT_EQ(attachResult, PlanStage::TrialRunTrackingType::NoTracking);
|
||||
}
|
||||
@ -220,8 +220,7 @@ TEST_F(TrialRunTrackerTest, DisablingTrackingForChildDoesNotInhibitTrackingForPa
|
||||
nullptr /* yieldPolicy */,
|
||||
kEmptyPlanNodeId);
|
||||
|
||||
|
||||
auto tracker = std::make_unique<TrialRunTracker>(size_t{0}, size_t{0});
|
||||
auto tracker = std::make_unique<TrialRunTracker>(boost::none, boost::none);
|
||||
ON_BLOCK_EXIT([&]() { rootSortStage->detachFromTrialRunTracker(); });
|
||||
|
||||
auto attachResult = rootSortStage->attachToTrialRunTracker(tracker.get(), PlanNodeId{1});
|
||||
@ -284,7 +283,7 @@ TEST_F(TrialRunTrackerTest, DisablingTrackingForAChildStagePreventsEarlyExit) {
|
||||
kEmptyPlanNodeId);
|
||||
|
||||
// We expect the TrialRunTracker to attach to _only_ the left child.
|
||||
auto tracker = std::make_unique<TrialRunTracker>(size_t{9}, size_t{0});
|
||||
auto tracker = std::make_unique<TrialRunTracker>(size_t{9}, boost::none);
|
||||
auto attachResult = sortStage->attachToTrialRunTracker(tracker.get());
|
||||
|
||||
// Note: A scan is a streaming stage, but the "virtual scan" used here does not attach to the
|
||||
@ -318,7 +317,7 @@ TEST_F(TrialRunTrackerTest, TrackerAttachesToPlanningRootStageAndTracksTheDocume
|
||||
nullptr /* yieldPolicy */,
|
||||
PlanNodeId{3});
|
||||
|
||||
auto tracker = std::make_unique<TrialRunTracker>(size_t{3}, size_t{0});
|
||||
auto tracker = std::make_unique<TrialRunTracker>(size_t{3}, boost::none);
|
||||
auto attachResult = sortStage->attachToTrialRunTracker(tracker.get());
|
||||
ASSERT_EQ(attachResult, PlanStage::TrialRunTrackingType::NoTracking);
|
||||
|
||||
@ -328,4 +327,43 @@ TEST_F(TrialRunTrackerTest, TrackerAttachesToPlanningRootStageAndTracksTheDocume
|
||||
prepareTree(ctx.get(), sortStage.get(), unwindSlot);
|
||||
ASSERT_EQ(tracker->getMetric<TrialRunTracker::kNumResults>(), 0);
|
||||
}
|
||||
|
||||
TEST_F(TrialRunTrackerTest, TrackerCanTrackMetricWithMaxMetricSetToZero) {
|
||||
auto ctx = makeCompileCtx();
|
||||
|
||||
auto [inputTag, inputVal] = stage_builder::makeValue(
|
||||
BSON_ARRAY(BSON("a" << 1) << BSON("a" << 2) << BSON("a" << 3) << BSON("a" << 4)));
|
||||
auto [scanSlot, scanStage] = generateVirtualScan(inputTag, inputVal, PlanNodeId{1});
|
||||
auto sortStage =
|
||||
makeS<SortStage>(std::move(scanStage),
|
||||
makeSV(scanSlot),
|
||||
std::vector<value::SortDirection>{value::SortDirection::Ascending},
|
||||
makeSV(),
|
||||
nullptr /*limit*/,
|
||||
1024 * 1024,
|
||||
false,
|
||||
nullptr /* yieldPolicy */,
|
||||
PlanNodeId{3});
|
||||
|
||||
auto onMetricReached = [&](TrialRunTracker::TrialRunMetric metric) {
|
||||
// This callback should get called for the kNumResults metric.
|
||||
ASSERT_EQ(metric, TrialRunTracker::kNumResults);
|
||||
// Detach the TrialRunTracker and allow the plan to continue running.
|
||||
sortStage->detachFromTrialRunTracker();
|
||||
return false;
|
||||
};
|
||||
|
||||
auto tracker =
|
||||
std::make_unique<TrialRunTracker>(std::move(onMetricReached), size_t{0}, boost::none);
|
||||
auto attachResult = sortStage->attachToTrialRunTracker(tracker.get(), PlanNodeId{1});
|
||||
ASSERT_EQ(attachResult, PlanStage::TrialRunTrackingType::TrackResults);
|
||||
|
||||
// The 'prepareTree()' function opens the SortStage, causing it to read documents from its
|
||||
// child. The scan stage (which is marked as the planning root) should return 4 documents.
|
||||
// The tracker should only observe the first document, because the callback we supplied should
|
||||
// detach the tracker after the first document is observed (because that is precisely when
|
||||
// 'tracker->getMetric<TrialRunTracker::kNumResults>()' becomes greater than the maximum of 0.)
|
||||
prepareTree(ctx.get(), sortStage.get(), scanSlot);
|
||||
ASSERT_EQ(tracker->getMetric<TrialRunTracker::kNumResults>(), 1);
|
||||
}
|
||||
} // namespace mongo::sbe
|
||||
|
||||
@ -53,20 +53,21 @@ public:
|
||||
// Number of physical reads performed during a trial run. Once a storage cursor advances,
|
||||
// it counts as a single physical read.
|
||||
kNumReads,
|
||||
// Must always be the last element to hold the number of element in the enum.
|
||||
// Must always be the last element to hold the number of elements in the enum.
|
||||
kLastElem
|
||||
};
|
||||
|
||||
static constexpr size_t numTrialRunMetrics = static_cast<size_t>(TrialRunMetric::kLastElem);
|
||||
|
||||
/**
|
||||
* Constructs a 'TrialRunTracker' which indicates that the trial period is over when any
|
||||
* 'TrialRunMetric' exceeds the maximum provided at construction.
|
||||
*
|
||||
* Callers can also pass a value of zero to indicate that the given metric should not be
|
||||
* tracked.
|
||||
* Callers can also pass a value of boost::none to indicate that the given metric should not
|
||||
* be tracked.
|
||||
*/
|
||||
template <typename... MaxMetrics,
|
||||
std::enable_if_t<sizeof...(MaxMetrics) == TrialRunMetric::kLastElem, int> = 0>
|
||||
TrialRunTracker(MaxMetrics... maxMetrics) : _maxMetrics{maxMetrics...} {}
|
||||
TrialRunTracker(boost::optional<size_t> maxNumResults, boost::optional<size_t> maxNumReads)
|
||||
: _maxMetrics{maxNumResults, maxNumReads} {}
|
||||
|
||||
/**
|
||||
* Constructs a 'TrialRunTracker' that also has an '_onMetricReached' function, which gets
|
||||
@ -75,11 +76,10 @@ public:
|
||||
* '_onMetricReached' can prevent tracking from halting plan execution, thereby upgrading a
|
||||
* trial run to a normal run.
|
||||
*/
|
||||
template <typename... MaxMetrics>
|
||||
TrialRunTracker(std::function<bool(TrialRunMetric)> onMetricReached, MaxMetrics... maxMetrics)
|
||||
: TrialRunTracker{maxMetrics...} {
|
||||
_onMetricReached = std::move(onMetricReached);
|
||||
}
|
||||
TrialRunTracker(std::function<bool(TrialRunMetric)> onMetricReached,
|
||||
boost::optional<size_t> maxNumResults,
|
||||
boost::optional<size_t> maxNumReads)
|
||||
: _maxMetrics{maxNumResults, maxNumReads}, _onMetricReached(std::move(onMetricReached)) {}
|
||||
|
||||
/**
|
||||
* Increments the trial run metric specified as a template parameter 'metric' by the
|
||||
@ -97,7 +97,7 @@ public:
|
||||
bool trackProgress(size_t metricIncrement) {
|
||||
static_assert(metric >= 0 && metric < sizeof(_metrics) / sizeof(size_t));
|
||||
|
||||
if (_maxMetrics[metric] == 0) {
|
||||
if (!_maxMetrics[metric].has_value()) {
|
||||
// This metric is not being tracked.
|
||||
return false;
|
||||
}
|
||||
@ -126,24 +126,24 @@ public:
|
||||
template <TrialRunMetric metric>
|
||||
bool metricReached() const {
|
||||
static_assert(metric >= 0 && metric < sizeof(_metrics) / sizeof(size_t));
|
||||
return metricTracked<metric>() && _metrics[metric] > _maxMetrics[metric];
|
||||
return _maxMetrics[metric].has_value() && _metrics[metric] > *_maxMetrics[metric];
|
||||
}
|
||||
|
||||
template <TrialRunMetric metric>
|
||||
bool metricTracked() const {
|
||||
static_assert(metric >= 0 && metric < sizeof(_metrics) / sizeof(size_t));
|
||||
return _maxMetrics[metric] != 0;
|
||||
return _maxMetrics[metric].has_value();
|
||||
}
|
||||
|
||||
template <TrialRunMetric metric>
|
||||
void updateMaxMetric(size_t newMaxMetric) {
|
||||
void updateMaxMetric(boost::optional<size_t> newMaxMetric) {
|
||||
static_assert(metric >= 0 && metric < sizeof(_metrics) / sizeof(size_t));
|
||||
_maxMetrics[metric] = newMaxMetric;
|
||||
}
|
||||
|
||||
private:
|
||||
size_t _maxMetrics[TrialRunMetric::kLastElem];
|
||||
size_t _metrics[TrialRunMetric::kLastElem]{0};
|
||||
boost::optional<size_t> _maxMetrics[numTrialRunMetrics];
|
||||
size_t _metrics[numTrialRunMetrics]{0};
|
||||
bool _done{false};
|
||||
std::function<bool(TrialRunMetric)> _onMetricReached{};
|
||||
};
|
||||
|
||||
Loading…
Reference in New Issue
Block a user