SERVER-122255 Refactor EstimateMap to store pointer to QSNEstimate (#50321)

GitOrigin-RevId: 6213aa20f64eb85a5b7c316e71f28ea7c3423233
This commit is contained in:
Ben Shteinfeld 2026-03-30 13:27:39 -04:00 committed by MongoDB Bot
parent 41eb133463
commit 3447192f97
16 changed files with 105 additions and 93 deletions

View File

@ -53,9 +53,8 @@ CEResult ExactCardinalityImpl::populateCardinalities(
}
const auto commonStats = execStage->getCommonStats();
cost_based_ranker::QSNEstimate card{
.outCE = CardinalityEstimate{CardinalityType{(double)commonStats->advanced},
EstimationSource::Code}};
cost_based_ranker::QSNEstimate card{CardinalityEstimate{
CardinalityType{(double)commonStats->advanced}, EstimationSource::Code}};
// If we are at a leaf node, we must record inCE as well. We get this from the SpecificStats.
if (execStage->getChildren().empty()) {
// TODO SERVER-99075: Add a case for distinct scan here
@ -83,7 +82,7 @@ CEResult ExactCardinalityImpl::populateCardinalities(
}
}
CardinalityEstimate res{card.outCE};
cardinalities.emplace(node, std::move(card));
cardinalities.emplace(node, std::make_unique<cost_based_ranker::QSNEstimate>(std::move(card)));
tassert(10659801,
"A QSN should have the same number of children as its corresponding execution stage",

View File

@ -133,7 +133,7 @@ CEResult CardinalityEstimator::estimate(const QuerySolutionNode* node) {
break;
}
case STAGE_EOF: {
_qsnEstimates[node] = QSNEstimate{.inCE = zeroCE, .outCE = zeroCE};
_qsnEstimates[node] = std::make_unique<QSNEstimate>(zeroCE, zeroCE);
return zeroCE;
}
case STAGE_LIMIT:
@ -365,7 +365,7 @@ CEResult CardinalityEstimator::scanCard(const QuerySolutionNode* node,
if (_inputCard == zeroCE) {
est.outCE = _inputCard;
_qsnEstimates.emplace(node, std::move(est));
_qsnEstimates.emplace(node, std::make_unique<QSNEstimate>(std::move(est)));
return _inputCard;
}
@ -379,7 +379,7 @@ CEResult CardinalityEstimator::scanCard(const QuerySolutionNode* node,
est.outCE = card;
}
CardinalityEstimate outCE{est.outCE};
_qsnEstimates.emplace(node, std::move(est));
_qsnEstimates.emplace(node, std::make_unique<QSNEstimate>(std::move(est)));
return outCE;
}
@ -466,7 +466,7 @@ CEResult CardinalityEstimator::estimate(const IndexScanNode* node) {
node->bounds.isUnbounded())) {
est.inCE = _inputCard;
est.outCE = _inputCard;
_qsnEstimates.emplace(node, std::move(est));
_qsnEstimates.emplace(node, std::make_unique<QSNEstimate>(std::move(est)));
return _inputCard;
}
@ -529,7 +529,7 @@ CEResult CardinalityEstimator::estimate(const IndexScanNode* node) {
}
CardinalityEstimate outCE{est.outCE};
_qsnEstimates.emplace(node, std::move(est));
_qsnEstimates.emplace(node, std::make_unique<QSNEstimate>(std::move(est)));
return outCE;
}
@ -546,11 +546,11 @@ CEResult CardinalityEstimator::estimate(const IndexScanNode* node) {
}
// Estimate the cardinality of the combined index scan and filter conditions.
// TODO: conjCard doesn't account for double-counting because some of the filter conditions
// may re-evaluate the interval bounds.
// TODO SERVER-122570: conjCard doesn't account for double-counting because some of the filter
// conditions may re-evaluate the interval bounds.
est.outCE = conjCard(selOffset, _inputCard);
CardinalityEstimate outCE{est.outCE};
_qsnEstimates.emplace(node, std::move(est));
_qsnEstimates.emplace(node, std::make_unique<QSNEstimate>(std::move(est)));
return outCE;
}
@ -568,7 +568,7 @@ CEResult CardinalityEstimator::estimate(const FetchNode* node) {
if (ceRes1 == zeroCE) {
est.outCE = ceRes1.getValue();
_qsnEstimates.emplace(node, std::move(est));
_qsnEstimates.emplace(node, std::make_unique<QSNEstimate>(std::move(est)));
return ceRes1.getValue();
}
@ -578,7 +578,7 @@ CEResult CardinalityEstimator::estimate(const FetchNode* node) {
// from its input cardinality.
if (node->filter == nullptr) {
est.outCE = ceRes1.getValue();
_qsnEstimates.emplace(node, std::move(est));
_qsnEstimates.emplace(node, std::make_unique<QSNEstimate>(std::move(est)));
return ceRes1.getValue();
}
@ -604,7 +604,7 @@ CEResult CardinalityEstimator::estimate(const FetchNode* node) {
popSelectivities();
_conjSels.emplace_back(ce / _inputCard);
est.outCE = ce;
_qsnEstimates.emplace(node, std::move(est));
_qsnEstimates.emplace(node, std::make_unique<QSNEstimate>(std::move(est)));
return ce;
}
}
@ -622,7 +622,7 @@ CEResult CardinalityEstimator::estimate(const FetchNode* node) {
// Combine the selectivity of this node's filter (if any) with its child selectivities.
est.outCE = conjCard(0, _inputCard);
CardinalityEstimate outCE{est.outCE};
_qsnEstimates.emplace(node, std::move(est));
_qsnEstimates.emplace(node, std::make_unique<QSNEstimate>(std::move(est)));
return outCE;
}
@ -635,7 +635,7 @@ CEResult CardinalityEstimator::passThroughNodeCard(const QuerySolutionNode* node
if (!ceRes.isOK()) {
return ceRes;
}
_qsnEstimates.emplace(node, QSNEstimate{.outCE = ceRes.getValue()});
_qsnEstimates.emplace(node, std::make_unique<QSNEstimate>(ceRes.getValue()));
return ceRes.getValue();
}
@ -645,13 +645,13 @@ CEResult CardinalityEstimator::limitNodeCard(const QuerySolutionNode* node, size
return ceRes;
}
if (ceRes == zeroCE) {
_qsnEstimates.emplace(node, QSNEstimate{.outCE = ceRes.getValue()});
_qsnEstimates.emplace(node, std::make_unique<QSNEstimate>(ceRes.getValue()));
return ceRes.getValue();
}
auto limitCE = CardinalityEstimate{CardinalityType{static_cast<double>(limit)},
EstimationSource::Metadata};
auto est = std::min(limitCE, ceRes.getValue());
_qsnEstimates.emplace(node, QSNEstimate{.outCE = est});
_qsnEstimates.emplace(node, std::make_unique<QSNEstimate>(est));
return est;
}
@ -682,7 +682,7 @@ CEResult CardinalityEstimator::indexIntersectionCard(const T* node) {
}
CardinalityEstimate outCE{est.outCE};
_qsnEstimates.emplace(node, std::move(est));
_qsnEstimates.emplace(node, std::make_unique<QSNEstimate>(std::move(est)));
return outCE;
}
@ -719,7 +719,7 @@ CEResult CardinalityEstimator::indexUnionCard(const T* node) {
if (_inputCard != zeroCE) {
_conjSels.emplace_back(outCE / _inputCard);
}
_qsnEstimates.emplace(node, std::move(est));
_qsnEstimates.emplace(node, std::make_unique<QSNEstimate>(std::move(est)));
return outCE;
}
@ -765,7 +765,7 @@ CEResult CardinalityEstimator::estimateConjWithHistogram(
return zeroMetadataCE;
}
}
// TODO: SERVER-98094 use tightness depending the context in which a predicate is estimated
// TODO SERVER-98094: use tightness depending the context in which a predicate is estimated
return estimate(&oil, true);
}
@ -844,7 +844,7 @@ CEResult CardinalityEstimator::estimate(const SortNode* node) {
CEResult CardinalityEstimator::estimate(const LimitNode* node) {
auto ceRes = limitNodeCard(node, node->limit);
if (ceRes != zeroCE) {
if (ceRes.isOK() && ceRes != zeroCE) {
propagateLimit(node->children[0].get(), node->limit);
}
return ceRes;
@ -856,7 +856,7 @@ CEResult CardinalityEstimator::estimate(const SkipNode* node) {
return ceRes;
}
if (ceRes == zeroCE) {
_qsnEstimates.emplace(node, QSNEstimate{.outCE = ceRes.getValue()});
_qsnEstimates.emplace(node, std::make_unique<QSNEstimate>(ceRes.getValue()));
return ceRes.getValue();
}
auto childEst = ceRes.getValue();
@ -869,7 +869,7 @@ CEResult CardinalityEstimator::estimate(const SkipNode* node) {
if (skip <= childEst) {
card = childEst - skip;
}
_qsnEstimates.emplace(node, QSNEstimate{.outCE = card});
_qsnEstimates.emplace(node, std::make_unique<QSNEstimate>(card));
_conjSels.push_back(card / _inputCard);
return card;
}
@ -927,15 +927,18 @@ CEResult CardinalityEstimator::estimate(const AndMatchExpression* node) {
}
// Try to use histograms to estimate all children of this AndMatchExpression.
// TODO: Suppose we have an AND with some predicates on 'a' that can answered with a
// histogram and some predicates on 'b' that can't. Should we still try to use histogram for
// TODO SERVER-122571: Suppose we have an AND with some predicates on 'a' that can answered with
// a histogram and some predicates on 'b' that can't. Should we still try to use histogram for
// 'a'? The code as written will not.
if (_rankerMode == QueryPlanRankerModeEnum::kHistogramCE ||
_rankerMode == QueryPlanRankerModeEnum::kAutomaticCE) {
size_t selOffset = _conjSels.size();
auto ceRes = tryHistogramAnd(node);
if (ceRes.isOK()) {
return ceRes.getValue();
}
// Clean up any selectivities leaked by partial histogram estimation before falling back.
popSelectivities(selOffset);
// Fallback to generic AndMatchExpression estimation.
}
@ -1173,8 +1176,8 @@ CEResult CardinalityEstimator::estimate(const IndexBounds* node) {
}
if (_rankerMode == QueryPlanRankerModeEnum::kSamplingCE) {
// TODO: avoid copies to construct the equality prefix. We could do this by teaching
// SamplingEstimator or IndexBounds about the equality prefix concept.
// TODO SERVER-122572: avoid copies to construct the equality prefix. We could do this by
// teaching SamplingEstimator or IndexBounds about the equality prefix concept.
auto eqPrefix = equalityPrefix(node);
if (eqPrefix.isIndexSkipScan) {
return Status(ErrorCodes::UnsupportedCbrNode, "encountered index skip scan case");
@ -1289,7 +1292,7 @@ CEResult CardinalityEstimator::estimate(const OrderedIntervalList* node, bool fo
}
void CardinalityEstimator::propagateLimit(const QuerySolutionNode* node, size_t limit) {
auto& outCE = _qsnEstimates[node].outCE;
auto& outCE = _qsnEstimates[node]->outCE;
const auto limitCE =
CardinalityEstimate{CardinalityType{double(limit)}, EstimationSource::Metadata};
@ -1325,7 +1328,7 @@ void CardinalityEstimator::propagateLimit(const QuerySolutionNode* node, size_t
case STAGE_COLLSCAN:
case STAGE_VIRTUAL_SCAN:
case STAGE_IXSCAN: {
auto& inCE = *_qsnEstimates[node].inCE;
auto& inCE = *_qsnEstimates[node]->inCE;
if (inCE == zeroCE) {
break;
}
@ -1334,7 +1337,7 @@ void CardinalityEstimator::propagateLimit(const QuerySolutionNode* node, size_t
break;
}
case STAGE_FETCH: {
const auto& inCE = _qsnEstimates[node->children[0].get()].outCE;
const auto& inCE = _qsnEstimates[node->children[0].get()]->outCE;
if (inCE == zeroCE) {
break;
}
@ -1379,7 +1382,7 @@ void CardinalityEstimator::propagateLimit(const QuerySolutionNode* node, size_t
// ...
// * _inputCard = 2000
for (auto& child : node->children) {
const auto& childCE = _qsnEstimates[child.get()].outCE;
const auto& childCE = _qsnEstimates[child.get()]->outCE;
if (childCE == zeroCE) {
continue;
}

View File

@ -40,12 +40,12 @@ CostEstimate CostEstimator::costTree(const QuerySolutionNode* qsn) {
childCosts.push_back(childCost);
auto foundChildEst = _estimateMap.find(child.get());
tassert(9695101, "All QSNs must have a CE.", foundChildEst != _estimateMap.end());
childCEs.push_back(foundChildEst->second.outCE);
childCEs.push_back(foundChildEst->second->outCE);
}
auto foundQSNEst = _estimateMap.find(qsn);
tassert(9695100, "All QSNs must have a CE.", foundQSNEst != _estimateMap.end());
QSNEstimate& qsnEstimate = foundQSNEst->second;
QSNEstimate& qsnEstimate = *foundQSNEst->second;
computeAndSetNodeCost(qsn, childCosts, childCEs, qsnEstimate);
return qsnEstimate.cost;
}

View File

@ -42,27 +42,27 @@ TEST(CostEstimator, FullCollScanVsFilteredCollScan) {
EstimateMap estimates;
auto fullCollScan = makeCollScanPlan(nullptr);
estimates[fullCollScan->root()] = QSNEstimate{.inCE = makeCard(100), .outCE = makeCard(100)};
estimates[fullCollScan->root()] = std::make_unique<QSNEstimate>(makeCard(100), makeCard(100));
BSONObj query = fromjson("{a: {$gt: 5}}");
auto collScanFilter = makeCollScanPlan(parse(query));
// The predicate filters out 50 documents.
estimates[collScanFilter->root()] = QSNEstimate{.inCE = makeCard(100), .outCE = makeCard(50)};
estimates[collScanFilter->root()] = std::make_unique<QSNEstimate>(makeCard(100), makeCard(50));
CostEstimator costEstimator{estimates};
costEstimator.estimatePlan(*fullCollScan);
costEstimator.estimatePlan(*collScanFilter);
ASSERT_LT(estimates[fullCollScan->root()].cost, estimates[collScanFilter->root()].cost);
ASSERT_LT(estimates[fullCollScan->root()]->cost, estimates[collScanFilter->root()]->cost);
}
CostEstimate getCollScanWithFilterCost(const BSONObj& filterObj) {
EstimateMap estimates;
auto plan = makeCollScanPlan(parse(filterObj));
estimates[plan->root()] = QSNEstimate{.inCE = makeCard(100), .outCE = makeCard(50)};
estimates[plan->root()] = std::make_unique<QSNEstimate>(makeCard(100), makeCard(50));
CostEstimator costEstimator{estimates};
costEstimator.estimatePlan(*plan);
return estimates[plan->root()].cost;
return estimates[plan->root()]->cost;
}
TEST(CostEstimator, FilterCostForSingleLeaf) {
@ -119,18 +119,18 @@ TEST(CostEstimator, VirtualScan) {
EstimateMap estimates;
auto fullCollScan = makeVirtualCollScanPlan(100, nullptr);
estimates[fullCollScan->root()] = QSNEstimate{.inCE = makeCard(100), .outCE = makeCard(100)};
estimates[fullCollScan->root()] = std::make_unique<QSNEstimate>(makeCard(100), makeCard(100));
BSONObj query = fromjson("{a: {$gt: 5}}");
auto collScanFilter = makeVirtualCollScanPlan(100, parse(query));
// The predicate filters out 50 documents.
estimates[collScanFilter->root()] = QSNEstimate{.inCE = makeCard(100), .outCE = makeCard(50)};
estimates[collScanFilter->root()] = std::make_unique<QSNEstimate>(makeCard(100), makeCard(50));
CostEstimator costEstimator{estimates};
costEstimator.estimatePlan(*fullCollScan);
costEstimator.estimatePlan(*collScanFilter);
ASSERT_LT(estimates[fullCollScan->root()].cost, estimates[collScanFilter->root()].cost);
ASSERT_LT(estimates[fullCollScan->root()]->cost, estimates[collScanFilter->root()]->cost);
}
TEST(CostEstimator, PointIndexScanLessCostThanRange) {
@ -139,10 +139,10 @@ TEST(CostEstimator, PointIndexScanLessCostThanRange) {
auto testNss = NamespaceString::createNamespaceString_forTest("testdb.coll");
auto pointIndexScan = makeIndexScanFetchPlan(testNss, makePointIntervalBounds(1, "a"), {"a"});
// Fetch
estimates[pointIndexScan->root()] = QSNEstimate{.outCE = makeCard(1)};
estimates[pointIndexScan->root()] = std::make_unique<QSNEstimate>(makeCard(1));
// IndexScan
estimates[pointIndexScan->root()->children[0].get()] =
QSNEstimate{.inCE = makeCard(100), .outCE = makeCard(1)};
std::make_unique<QSNEstimate>(makeCard(100), makeCard(1));
auto rangeIndexScan = makeIndexScanFetchPlan(
testNss,
@ -150,27 +150,27 @@ TEST(CostEstimator, PointIndexScanLessCostThanRange) {
BSON("" << 5 << "" << 6), BoundInclusion::kIncludeBothStartAndEndKeys, "a"),
{"a"});
// Fetch
estimates[rangeIndexScan->root()] = QSNEstimate{.outCE = makeCard(10)};
estimates[rangeIndexScan->root()] = std::make_unique<QSNEstimate>(makeCard(10));
// IndexScan
estimates[rangeIndexScan->root()->children[0].get()] =
QSNEstimate{.inCE = makeCard(100), .outCE = makeCard(10)};
std::make_unique<QSNEstimate>(makeCard(100), makeCard(10));
CostEstimator costEstimator{estimates};
costEstimator.estimatePlan(*pointIndexScan);
costEstimator.estimatePlan(*rangeIndexScan);
// Cost of point scan plan should be less than that of the range scan
ASSERT_LT(estimates[pointIndexScan->root()].cost, estimates[rangeIndexScan->root()].cost);
ASSERT_LT(estimates[pointIndexScan->root()]->cost, estimates[rangeIndexScan->root()]->cost);
// Cost of fetch node should be greater than cost of the index scan as costs are cumulative
ASSERT_GT(estimates[pointIndexScan->root()].cost,
estimates[pointIndexScan->root()->children[0].get()].cost);
ASSERT_GT(estimates[pointIndexScan->root()]->cost,
estimates[pointIndexScan->root()->children[0].get()]->cost);
}
std::unique_ptr<IndexScanNode> indexScanNode(const NamespaceString& nss,
EstimateMap& estimates,
QSNEstimate est) {
auto node = makeIndexScan(nss, makePointIntervalBounds(1, "a"), {"a"});
estimates[node.get()] = est;
estimates[node.get()] = std::make_unique<QSNEstimate>(std::move(est));
return node;
}
@ -181,33 +181,33 @@ void testIndexCombinationDependsOnChildren() {
auto indexIntersectNode = std::make_unique<IndexCombinationNode>();
indexIntersectNode->addChildren([&]() {
std::vector<std::unique_ptr<QuerySolutionNode>> children;
children.push_back(indexScanNode(
testNss, estimates, QSNEstimate{.inCE = makeCard(10), .outCE = makeCard(10)}));
children.push_back(indexScanNode(
testNss, estimates, QSNEstimate{.inCE = makeCard(10), .outCE = makeCard(10)}));
children.push_back(
indexScanNode(testNss, estimates, QSNEstimate{makeCard(10), makeCard(10)}));
children.push_back(
indexScanNode(testNss, estimates, QSNEstimate{makeCard(10), makeCard(10)}));
return children;
}());
estimates[indexIntersectNode.get()] = QSNEstimate{.outCE = makeCard(5)};
estimates[indexIntersectNode.get()] = std::make_unique<QSNEstimate>(makeCard(5));
auto cheapPlan = std::make_unique<QuerySolution>();
cheapPlan->setRoot(std::move(indexIntersectNode));
auto expensiveIndexIntersectNode = std::make_unique<IndexCombinationNode>();
expensiveIndexIntersectNode->addChildren([&]() {
std::vector<std::unique_ptr<QuerySolutionNode>> children;
children.push_back(indexScanNode(
testNss, estimates, QSNEstimate{.inCE = makeCard(100), .outCE = makeCard(100)}));
children.push_back(indexScanNode(
testNss, estimates, QSNEstimate{.inCE = makeCard(100), .outCE = makeCard(100)}));
children.push_back(
indexScanNode(testNss, estimates, QSNEstimate{makeCard(100), makeCard(100)}));
children.push_back(
indexScanNode(testNss, estimates, QSNEstimate{makeCard(100), makeCard(100)}));
return children;
}());
estimates[expensiveIndexIntersectNode.get()] = QSNEstimate{.outCE = makeCard(5)};
estimates[expensiveIndexIntersectNode.get()] = std::make_unique<QSNEstimate>(makeCard(5));
auto expensivePlan = std::make_unique<QuerySolution>();
expensivePlan->setRoot(std::move(expensiveIndexIntersectNode));
CostEstimator costEstimator{estimates};
costEstimator.estimatePlan(*cheapPlan);
costEstimator.estimatePlan(*expensivePlan);
ASSERT_LT(estimates[cheapPlan->root()].cost, estimates[expensivePlan->root()].cost);
ASSERT_LT(estimates[cheapPlan->root()]->cost, estimates[expensivePlan->root()]->cost);
}
// Increasing child cost increases the cost of index intersection and union plans
@ -220,33 +220,31 @@ TEST(CostEstimator, IndexCombinationDependsOnChildren) {
std::unique_ptr<CollectionScanNode> collScanNode(EstimateMap& estimates, QSNEstimate est) {
auto node = std::make_unique<CollectionScanNode>();
estimates[node.get()] = est;
estimates[node.get()] = std::make_unique<QSNEstimate>(std::move(est));
return node;
}
template <typename SortNode>
void testSortCostDependsOnChildren() {
EstimateMap estimates;
auto cheapCollScan =
collScanNode(estimates, QSNEstimate{.inCE = makeCard(10), .outCE = makeCard(10)});
auto cheapCollScan = collScanNode(estimates, QSNEstimate{makeCard(10), makeCard(10)});
auto cheapSort = std::make_unique<SortNode>(
std::move(cheapCollScan), BSON("a" << 1), 0, LimitSkipParameterization::Disabled);
estimates[cheapSort.get()] = QSNEstimate{.outCE = makeCard(10)};
estimates[cheapSort.get()] = std::make_unique<QSNEstimate>(makeCard(10));
auto cheapPlan = std::make_unique<QuerySolution>();
cheapPlan->setRoot(std::move(cheapSort));
auto expsensiveCollScan =
collScanNode(estimates, QSNEstimate{.inCE = makeCard(100), .outCE = makeCard(100)});
auto expsensiveCollScan = collScanNode(estimates, QSNEstimate{makeCard(100), makeCard(100)});
auto expensiveSort = std::make_unique<SortNode>(
std::move(expsensiveCollScan), BSON("a" << 1), 0, LimitSkipParameterization::Disabled);
estimates[expensiveSort.get()] = QSNEstimate{.outCE = makeCard(100)};
estimates[expensiveSort.get()] = std::make_unique<QSNEstimate>(makeCard(100));
auto expensivePlan = std::make_unique<QuerySolution>();
expensivePlan->setRoot(std::move(expensiveSort));
CostEstimator costEstimator{estimates};
costEstimator.estimatePlan(*cheapPlan);
costEstimator.estimatePlan(*expensivePlan);
ASSERT_LT(estimates[cheapPlan->root()].cost, estimates[expensivePlan->root()].cost);
ASSERT_LT(estimates[cheapPlan->root()]->cost, estimates[expensivePlan->root()]->cost);
}
TEST(CostEstimator, SortDefaultOrSimple) {

View File

@ -592,6 +592,23 @@ struct QSNEstimate {
boost::optional<CardinalityEstimate> inCE;
CardinalityEstimate outCE{CardinalityType{0}, EstimationSource::Code};
CostEstimate cost{CostType::maxValue(), EstimationSource::Code};
QSNEstimate() = default;
QSNEstimate(CardinalityEstimate outCE,
CostEstimate cost = CostEstimate{CostType::maxValue(), EstimationSource::Code})
: outCE(std::move(outCE)), cost(std::move(cost)) {}
QSNEstimate(boost::optional<CardinalityEstimate> inCE, CardinalityEstimate outCE)
: inCE(std::move(inCE)), outCE(std::move(outCE)) {}
virtual ~QSNEstimate() = default;
virtual void serialize(BSONObjBuilder& bob) const {
bob.append("costEstimate", cost.toDouble());
bob.append("cardinalityEstimate", outCE.toDouble());
BSONObjBuilder metadataBob(bob.subobjStart("estimatesMetadata"));
metadataBob.append("ceSource", toStringData(outCE.source()));
metadataBob.done();
}
};
// Predefined constants

View File

@ -33,11 +33,13 @@
#include "mongo/db/query/compiler/physical_model/query_solution/query_solution.h"
#include "mongo/util/modules.h"
#include <memory>
namespace mongo::cost_based_ranker {
/**
* EstimateMap is a type representing a mapping from QuerySolutionNodes to cost estimates.
*/
using EstimateMap = absl::flat_hash_map<const QuerySolutionNode*, QSNEstimate>;
using EstimateMap = absl::flat_hash_map<const QuerySolutionNode*, std::unique_ptr<QSNEstimate>>;
} // namespace mongo::cost_based_ranker

View File

@ -92,7 +92,7 @@ NodeCardinalities JoinCardinalityEstimator::extractNodeCardinalities(
tassert(11514600, "Missing QSN for CanonicalQuery", qsn != ctx.cbrCqQsns.end());
auto cbrRes = estimates.find(qsn->second->root());
tassert(11514601, "Missing estimate for QSN root", cbrRes != estimates.end());
nodeCardinalities.push_back(cbrRes->second.outCE);
nodeCardinalities.push_back(cbrRes->second->outCE);
}
return nodeCardinalities;
}

View File

@ -184,12 +184,12 @@ TEST_F(JoinPredicateEstimatorFixture, ExtractNodeCardinalities) {
cost_based_ranker::EstimateMap estimates;
{
auto aPlan = makeCollScanPlan(aNss);
estimates[aPlan->root()] = {inCE, aCE};
estimates[aPlan->root()] = std::make_unique<cost_based_ranker::QSNEstimate>(inCE, aCE);
cbrCqQsns[graph.getNode(aNodeId).accessPath.get()] = std::move(aPlan);
}
{
auto bPlan = makeCollScanPlan(bNss);
estimates[bPlan->root()] = {inCE, bCE};
estimates[bPlan->root()] = std::make_unique<cost_based_ranker::QSNEstimate>(inCE, bCE);
cbrCqQsns[graph.getNode(bNodeId).accessPath.get()] = std::move(bPlan);
}

View File

@ -166,7 +166,7 @@ void addEstimatesIfExplain(const JoinReorderingContext& ctx,
auto ce = peCtx.getJoinCardinalityEstimator()->getOrEstimateSubsetCardinality(set);
estimates.insert_or_assign(
node, cost_based_ranker::QSNEstimate{.outCE = ce, .cost = cost.getTotalCost()});
node, std::make_unique<cost_based_ranker::QSNEstimate>(ce, cost.getTotalCost()));
}
// Forward-declare because of mutual recursion.

View File

@ -45,7 +45,7 @@ using SingleTableAccessTestFixture = JoinOrderingTestFixture;
void assertQuerySolutionHasEstimate(const QuerySolutionNode* qsn, const EstimateMap& estimates) {
auto it = estimates.find(qsn);
ASSERT(it != estimates.end());
ASSERT_EQ(EstimationSource::Sampling, it->second.outCE.source());
ASSERT_EQ(EstimationSource::Sampling, it->second->outCE.source());
for (auto&& child : qsn->children) {
assertQuerySolutionHasEstimate(child.get(), estimates);
}

View File

@ -264,7 +264,7 @@ void captureCardinalityEstimationMethodForQueryStats(
if (it != maybeExplainData->estimates.end()) {
auto& ceMethods =
CurOp::get(opCtx)->debug().getAdditiveMetrics().cardinalityEstimationMethods;
switch (it->second.outCE.source()) {
switch (it->second->outCE.source()) {
case cost_based_ranker::EstimationSource::Histogram:
ceMethods.setHistogram(ceMethods.getHistogram().value_or(0) + 1);
break;

View File

@ -67,7 +67,9 @@ inline PlanExplainerData& operator<<(PlanExplainerData& lhs, PlanExplainerData&&
std::make_move_iterator(rhs.rejectedPlansWithStages.begin()),
std::make_move_iterator(rhs.rejectedPlansWithStages.end()));
lhs.planStageQsnMap.insert(rhs.planStageQsnMap.begin(), rhs.planStageQsnMap.end());
lhs.estimates.insert(rhs.estimates.begin(), rhs.estimates.end());
for (auto& [k, v] : rhs.estimates) {
lhs.estimates.insert_or_assign(k, std::move(v));
}
return lhs;
}

View File

@ -270,9 +270,8 @@ void statsToBSON(const stage_builder::PlanStageToQsnMap& planStageQsnMap,
// Cost and cardinality of the stage.
if (querySolutionNode && estimates.contains(querySolutionNode)) {
const auto& est = estimates.at(querySolutionNode);
bob->append("costEstimate", est.cost.toDouble());
bob->append("cardinalityEstimate", est.outCE.toDouble());
const auto& est = *estimates.at(querySolutionNode);
est.serialize(*bob);
// Display 'inCE' as 'numKeys' for index scan and 'numDocs' for collection scan.
if (est.inCE.has_value()) {
double ce = est.inCE->toDouble();
@ -282,9 +281,6 @@ void statsToBSON(const stage_builder::PlanStageToQsnMap& planStageQsnMap,
bob->append("numDocsEstimate", ce);
}
}
BSONObjBuilder metadataBob(bob->subobjStart("estimatesMetadata"));
metadataBob.append("ceSource", toStringData(est.outCE.source()));
metadataBob.done();
}
// Display the BSON representation of the filter, if there is one.

View File

@ -294,12 +294,7 @@ void statsToBSON(const QuerySolutionNode* node,
// Cost and cardinality of the stage.
if (estimates.contains(node)) {
const auto& est = estimates.at(node);
bob->append("costEstimate", est.cost.toDouble());
bob->append("cardinalityEstimate", est.outCE.toDouble());
BSONObjBuilder metadataBob(bob->subobjStart("estimatesMetadata"));
metadataBob.append("ceSource", toStringData(est.outCE.source()));
metadataBob.done();
estimates.at(node)->serialize(*bob);
}
// Display the BSON representation of the filter, if there is one.

View File

@ -609,14 +609,14 @@ TEST_F(PlanExplainerTest, PlanExplainerDataMergeFull) {
// Use distinct pointer values to avoid key collision
data1.planStageQsnMap.emplace(reinterpret_cast<const PlanStage*>(0x1), nullptr);
data1.estimates.emplace(reinterpret_cast<const QuerySolutionNode*>(0x1),
cost_based_ranker::QSNEstimate{});
std::make_unique<cost_based_ranker::QSNEstimate>());
PlanExplainerData data2;
auto qsn2 = std::make_unique<QuerySolution>();
data2.rejectedPlansWithStages.push_back({std::move(qsn2), nullptr});
data2.planStageQsnMap.emplace(reinterpret_cast<const PlanStage*>(0x2), nullptr);
data2.estimates.emplace(reinterpret_cast<const QuerySolutionNode*>(0x2),
cost_based_ranker::QSNEstimate{});
std::make_unique<cost_based_ranker::QSNEstimate>());
data1 << std::move(data2);

View File

@ -2186,7 +2186,7 @@ std::pair<SbStage, PlanStageSlots> SlotBasedStageBuilder::buildHashJoinEmbedding
if (_estimates) {
if (auto it = _estimates->find(hashJoinEmbeddingNode->children[0].get());
it != _estimates->end()) {
estimatedBuildCardinality = static_cast<size_t>(it->second.outCE.toDouble());
estimatedBuildCardinality = static_cast<size_t>(it->second->outCE.toDouble());
}
}