SERVER-122255 Refactor EstimateMap to store pointer to QSNEstimate (#50321)
GitOrigin-RevId: 6213aa20f64eb85a5b7c316e71f28ea7c3423233
This commit is contained in:
parent
41eb133463
commit
3447192f97
@ -53,9 +53,8 @@ CEResult ExactCardinalityImpl::populateCardinalities(
|
||||
}
|
||||
|
||||
const auto commonStats = execStage->getCommonStats();
|
||||
cost_based_ranker::QSNEstimate card{
|
||||
.outCE = CardinalityEstimate{CardinalityType{(double)commonStats->advanced},
|
||||
EstimationSource::Code}};
|
||||
cost_based_ranker::QSNEstimate card{CardinalityEstimate{
|
||||
CardinalityType{(double)commonStats->advanced}, EstimationSource::Code}};
|
||||
// If we are at a leaf node, we must record inCE as well. We get this from the SpecificStats.
|
||||
if (execStage->getChildren().empty()) {
|
||||
// TODO SERVER-99075: Add a case for distinct scan here
|
||||
@ -83,7 +82,7 @@ CEResult ExactCardinalityImpl::populateCardinalities(
|
||||
}
|
||||
}
|
||||
CardinalityEstimate res{card.outCE};
|
||||
cardinalities.emplace(node, std::move(card));
|
||||
cardinalities.emplace(node, std::make_unique<cost_based_ranker::QSNEstimate>(std::move(card)));
|
||||
|
||||
tassert(10659801,
|
||||
"A QSN should have the same number of children as its corresponding execution stage",
|
||||
|
||||
@ -133,7 +133,7 @@ CEResult CardinalityEstimator::estimate(const QuerySolutionNode* node) {
|
||||
break;
|
||||
}
|
||||
case STAGE_EOF: {
|
||||
_qsnEstimates[node] = QSNEstimate{.inCE = zeroCE, .outCE = zeroCE};
|
||||
_qsnEstimates[node] = std::make_unique<QSNEstimate>(zeroCE, zeroCE);
|
||||
return zeroCE;
|
||||
}
|
||||
case STAGE_LIMIT:
|
||||
@ -365,7 +365,7 @@ CEResult CardinalityEstimator::scanCard(const QuerySolutionNode* node,
|
||||
|
||||
if (_inputCard == zeroCE) {
|
||||
est.outCE = _inputCard;
|
||||
_qsnEstimates.emplace(node, std::move(est));
|
||||
_qsnEstimates.emplace(node, std::make_unique<QSNEstimate>(std::move(est)));
|
||||
return _inputCard;
|
||||
}
|
||||
|
||||
@ -379,7 +379,7 @@ CEResult CardinalityEstimator::scanCard(const QuerySolutionNode* node,
|
||||
est.outCE = card;
|
||||
}
|
||||
CardinalityEstimate outCE{est.outCE};
|
||||
_qsnEstimates.emplace(node, std::move(est));
|
||||
_qsnEstimates.emplace(node, std::make_unique<QSNEstimate>(std::move(est)));
|
||||
|
||||
return outCE;
|
||||
}
|
||||
@ -466,7 +466,7 @@ CEResult CardinalityEstimator::estimate(const IndexScanNode* node) {
|
||||
node->bounds.isUnbounded())) {
|
||||
est.inCE = _inputCard;
|
||||
est.outCE = _inputCard;
|
||||
_qsnEstimates.emplace(node, std::move(est));
|
||||
_qsnEstimates.emplace(node, std::make_unique<QSNEstimate>(std::move(est)));
|
||||
return _inputCard;
|
||||
}
|
||||
|
||||
@ -529,7 +529,7 @@ CEResult CardinalityEstimator::estimate(const IndexScanNode* node) {
|
||||
}
|
||||
|
||||
CardinalityEstimate outCE{est.outCE};
|
||||
_qsnEstimates.emplace(node, std::move(est));
|
||||
_qsnEstimates.emplace(node, std::make_unique<QSNEstimate>(std::move(est)));
|
||||
return outCE;
|
||||
}
|
||||
|
||||
@ -546,11 +546,11 @@ CEResult CardinalityEstimator::estimate(const IndexScanNode* node) {
|
||||
}
|
||||
|
||||
// Estimate the cardinality of the combined index scan and filter conditions.
|
||||
// TODO: conjCard doesn't account for double-counting because some of the filter conditions
|
||||
// may re-evaluate the interval bounds.
|
||||
// TODO SERVER-122570: conjCard doesn't account for double-counting because some of the filter
|
||||
// conditions may re-evaluate the interval bounds.
|
||||
est.outCE = conjCard(selOffset, _inputCard);
|
||||
CardinalityEstimate outCE{est.outCE};
|
||||
_qsnEstimates.emplace(node, std::move(est));
|
||||
_qsnEstimates.emplace(node, std::make_unique<QSNEstimate>(std::move(est)));
|
||||
return outCE;
|
||||
}
|
||||
|
||||
@ -568,7 +568,7 @@ CEResult CardinalityEstimator::estimate(const FetchNode* node) {
|
||||
|
||||
if (ceRes1 == zeroCE) {
|
||||
est.outCE = ceRes1.getValue();
|
||||
_qsnEstimates.emplace(node, std::move(est));
|
||||
_qsnEstimates.emplace(node, std::make_unique<QSNEstimate>(std::move(est)));
|
||||
return ceRes1.getValue();
|
||||
}
|
||||
|
||||
@ -578,7 +578,7 @@ CEResult CardinalityEstimator::estimate(const FetchNode* node) {
|
||||
// from its input cardinality.
|
||||
if (node->filter == nullptr) {
|
||||
est.outCE = ceRes1.getValue();
|
||||
_qsnEstimates.emplace(node, std::move(est));
|
||||
_qsnEstimates.emplace(node, std::make_unique<QSNEstimate>(std::move(est)));
|
||||
return ceRes1.getValue();
|
||||
}
|
||||
|
||||
@ -604,7 +604,7 @@ CEResult CardinalityEstimator::estimate(const FetchNode* node) {
|
||||
popSelectivities();
|
||||
_conjSels.emplace_back(ce / _inputCard);
|
||||
est.outCE = ce;
|
||||
_qsnEstimates.emplace(node, std::move(est));
|
||||
_qsnEstimates.emplace(node, std::make_unique<QSNEstimate>(std::move(est)));
|
||||
return ce;
|
||||
}
|
||||
}
|
||||
@ -622,7 +622,7 @@ CEResult CardinalityEstimator::estimate(const FetchNode* node) {
|
||||
// Combine the selectivity of this node's filter (if any) with its child selectivities.
|
||||
est.outCE = conjCard(0, _inputCard);
|
||||
CardinalityEstimate outCE{est.outCE};
|
||||
_qsnEstimates.emplace(node, std::move(est));
|
||||
_qsnEstimates.emplace(node, std::make_unique<QSNEstimate>(std::move(est)));
|
||||
|
||||
return outCE;
|
||||
}
|
||||
@ -635,7 +635,7 @@ CEResult CardinalityEstimator::passThroughNodeCard(const QuerySolutionNode* node
|
||||
if (!ceRes.isOK()) {
|
||||
return ceRes;
|
||||
}
|
||||
_qsnEstimates.emplace(node, QSNEstimate{.outCE = ceRes.getValue()});
|
||||
_qsnEstimates.emplace(node, std::make_unique<QSNEstimate>(ceRes.getValue()));
|
||||
return ceRes.getValue();
|
||||
}
|
||||
|
||||
@ -645,13 +645,13 @@ CEResult CardinalityEstimator::limitNodeCard(const QuerySolutionNode* node, size
|
||||
return ceRes;
|
||||
}
|
||||
if (ceRes == zeroCE) {
|
||||
_qsnEstimates.emplace(node, QSNEstimate{.outCE = ceRes.getValue()});
|
||||
_qsnEstimates.emplace(node, std::make_unique<QSNEstimate>(ceRes.getValue()));
|
||||
return ceRes.getValue();
|
||||
}
|
||||
auto limitCE = CardinalityEstimate{CardinalityType{static_cast<double>(limit)},
|
||||
EstimationSource::Metadata};
|
||||
auto est = std::min(limitCE, ceRes.getValue());
|
||||
_qsnEstimates.emplace(node, QSNEstimate{.outCE = est});
|
||||
_qsnEstimates.emplace(node, std::make_unique<QSNEstimate>(est));
|
||||
return est;
|
||||
}
|
||||
|
||||
@ -682,7 +682,7 @@ CEResult CardinalityEstimator::indexIntersectionCard(const T* node) {
|
||||
}
|
||||
|
||||
CardinalityEstimate outCE{est.outCE};
|
||||
_qsnEstimates.emplace(node, std::move(est));
|
||||
_qsnEstimates.emplace(node, std::make_unique<QSNEstimate>(std::move(est)));
|
||||
|
||||
return outCE;
|
||||
}
|
||||
@ -719,7 +719,7 @@ CEResult CardinalityEstimator::indexUnionCard(const T* node) {
|
||||
if (_inputCard != zeroCE) {
|
||||
_conjSels.emplace_back(outCE / _inputCard);
|
||||
}
|
||||
_qsnEstimates.emplace(node, std::move(est));
|
||||
_qsnEstimates.emplace(node, std::make_unique<QSNEstimate>(std::move(est)));
|
||||
|
||||
return outCE;
|
||||
}
|
||||
@ -765,7 +765,7 @@ CEResult CardinalityEstimator::estimateConjWithHistogram(
|
||||
return zeroMetadataCE;
|
||||
}
|
||||
}
|
||||
// TODO: SERVER-98094 use tightness depending the context in which a predicate is estimated
|
||||
// TODO SERVER-98094: use tightness depending the context in which a predicate is estimated
|
||||
|
||||
return estimate(&oil, true);
|
||||
}
|
||||
@ -844,7 +844,7 @@ CEResult CardinalityEstimator::estimate(const SortNode* node) {
|
||||
|
||||
CEResult CardinalityEstimator::estimate(const LimitNode* node) {
|
||||
auto ceRes = limitNodeCard(node, node->limit);
|
||||
if (ceRes != zeroCE) {
|
||||
if (ceRes.isOK() && ceRes != zeroCE) {
|
||||
propagateLimit(node->children[0].get(), node->limit);
|
||||
}
|
||||
return ceRes;
|
||||
@ -856,7 +856,7 @@ CEResult CardinalityEstimator::estimate(const SkipNode* node) {
|
||||
return ceRes;
|
||||
}
|
||||
if (ceRes == zeroCE) {
|
||||
_qsnEstimates.emplace(node, QSNEstimate{.outCE = ceRes.getValue()});
|
||||
_qsnEstimates.emplace(node, std::make_unique<QSNEstimate>(ceRes.getValue()));
|
||||
return ceRes.getValue();
|
||||
}
|
||||
auto childEst = ceRes.getValue();
|
||||
@ -869,7 +869,7 @@ CEResult CardinalityEstimator::estimate(const SkipNode* node) {
|
||||
if (skip <= childEst) {
|
||||
card = childEst - skip;
|
||||
}
|
||||
_qsnEstimates.emplace(node, QSNEstimate{.outCE = card});
|
||||
_qsnEstimates.emplace(node, std::make_unique<QSNEstimate>(card));
|
||||
_conjSels.push_back(card / _inputCard);
|
||||
return card;
|
||||
}
|
||||
@ -927,15 +927,18 @@ CEResult CardinalityEstimator::estimate(const AndMatchExpression* node) {
|
||||
}
|
||||
|
||||
// Try to use histograms to estimate all children of this AndMatchExpression.
|
||||
// TODO: Suppose we have an AND with some predicates on 'a' that can answered with a
|
||||
// histogram and some predicates on 'b' that can't. Should we still try to use histogram for
|
||||
// TODO SERVER-122571: Suppose we have an AND with some predicates on 'a' that can answered with
|
||||
// a histogram and some predicates on 'b' that can't. Should we still try to use histogram for
|
||||
// 'a'? The code as written will not.
|
||||
if (_rankerMode == QueryPlanRankerModeEnum::kHistogramCE ||
|
||||
_rankerMode == QueryPlanRankerModeEnum::kAutomaticCE) {
|
||||
size_t selOffset = _conjSels.size();
|
||||
auto ceRes = tryHistogramAnd(node);
|
||||
if (ceRes.isOK()) {
|
||||
return ceRes.getValue();
|
||||
}
|
||||
// Clean up any selectivities leaked by partial histogram estimation before falling back.
|
||||
popSelectivities(selOffset);
|
||||
// Fallback to generic AndMatchExpression estimation.
|
||||
}
|
||||
|
||||
@ -1173,8 +1176,8 @@ CEResult CardinalityEstimator::estimate(const IndexBounds* node) {
|
||||
}
|
||||
|
||||
if (_rankerMode == QueryPlanRankerModeEnum::kSamplingCE) {
|
||||
// TODO: avoid copies to construct the equality prefix. We could do this by teaching
|
||||
// SamplingEstimator or IndexBounds about the equality prefix concept.
|
||||
// TODO SERVER-122572: avoid copies to construct the equality prefix. We could do this by
|
||||
// teaching SamplingEstimator or IndexBounds about the equality prefix concept.
|
||||
auto eqPrefix = equalityPrefix(node);
|
||||
if (eqPrefix.isIndexSkipScan) {
|
||||
return Status(ErrorCodes::UnsupportedCbrNode, "encountered index skip scan case");
|
||||
@ -1289,7 +1292,7 @@ CEResult CardinalityEstimator::estimate(const OrderedIntervalList* node, bool fo
|
||||
}
|
||||
|
||||
void CardinalityEstimator::propagateLimit(const QuerySolutionNode* node, size_t limit) {
|
||||
auto& outCE = _qsnEstimates[node].outCE;
|
||||
auto& outCE = _qsnEstimates[node]->outCE;
|
||||
|
||||
const auto limitCE =
|
||||
CardinalityEstimate{CardinalityType{double(limit)}, EstimationSource::Metadata};
|
||||
@ -1325,7 +1328,7 @@ void CardinalityEstimator::propagateLimit(const QuerySolutionNode* node, size_t
|
||||
case STAGE_COLLSCAN:
|
||||
case STAGE_VIRTUAL_SCAN:
|
||||
case STAGE_IXSCAN: {
|
||||
auto& inCE = *_qsnEstimates[node].inCE;
|
||||
auto& inCE = *_qsnEstimates[node]->inCE;
|
||||
if (inCE == zeroCE) {
|
||||
break;
|
||||
}
|
||||
@ -1334,7 +1337,7 @@ void CardinalityEstimator::propagateLimit(const QuerySolutionNode* node, size_t
|
||||
break;
|
||||
}
|
||||
case STAGE_FETCH: {
|
||||
const auto& inCE = _qsnEstimates[node->children[0].get()].outCE;
|
||||
const auto& inCE = _qsnEstimates[node->children[0].get()]->outCE;
|
||||
if (inCE == zeroCE) {
|
||||
break;
|
||||
}
|
||||
@ -1379,7 +1382,7 @@ void CardinalityEstimator::propagateLimit(const QuerySolutionNode* node, size_t
|
||||
// ...
|
||||
// * _inputCard = 2000
|
||||
for (auto& child : node->children) {
|
||||
const auto& childCE = _qsnEstimates[child.get()].outCE;
|
||||
const auto& childCE = _qsnEstimates[child.get()]->outCE;
|
||||
if (childCE == zeroCE) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -40,12 +40,12 @@ CostEstimate CostEstimator::costTree(const QuerySolutionNode* qsn) {
|
||||
childCosts.push_back(childCost);
|
||||
auto foundChildEst = _estimateMap.find(child.get());
|
||||
tassert(9695101, "All QSNs must have a CE.", foundChildEst != _estimateMap.end());
|
||||
childCEs.push_back(foundChildEst->second.outCE);
|
||||
childCEs.push_back(foundChildEst->second->outCE);
|
||||
}
|
||||
|
||||
auto foundQSNEst = _estimateMap.find(qsn);
|
||||
tassert(9695100, "All QSNs must have a CE.", foundQSNEst != _estimateMap.end());
|
||||
QSNEstimate& qsnEstimate = foundQSNEst->second;
|
||||
QSNEstimate& qsnEstimate = *foundQSNEst->second;
|
||||
computeAndSetNodeCost(qsn, childCosts, childCEs, qsnEstimate);
|
||||
return qsnEstimate.cost;
|
||||
}
|
||||
|
||||
@ -42,27 +42,27 @@ TEST(CostEstimator, FullCollScanVsFilteredCollScan) {
|
||||
EstimateMap estimates;
|
||||
|
||||
auto fullCollScan = makeCollScanPlan(nullptr);
|
||||
estimates[fullCollScan->root()] = QSNEstimate{.inCE = makeCard(100), .outCE = makeCard(100)};
|
||||
estimates[fullCollScan->root()] = std::make_unique<QSNEstimate>(makeCard(100), makeCard(100));
|
||||
|
||||
BSONObj query = fromjson("{a: {$gt: 5}}");
|
||||
auto collScanFilter = makeCollScanPlan(parse(query));
|
||||
// The predicate filters out 50 documents.
|
||||
estimates[collScanFilter->root()] = QSNEstimate{.inCE = makeCard(100), .outCE = makeCard(50)};
|
||||
estimates[collScanFilter->root()] = std::make_unique<QSNEstimate>(makeCard(100), makeCard(50));
|
||||
|
||||
CostEstimator costEstimator{estimates};
|
||||
costEstimator.estimatePlan(*fullCollScan);
|
||||
costEstimator.estimatePlan(*collScanFilter);
|
||||
|
||||
ASSERT_LT(estimates[fullCollScan->root()].cost, estimates[collScanFilter->root()].cost);
|
||||
ASSERT_LT(estimates[fullCollScan->root()]->cost, estimates[collScanFilter->root()]->cost);
|
||||
}
|
||||
|
||||
CostEstimate getCollScanWithFilterCost(const BSONObj& filterObj) {
|
||||
EstimateMap estimates;
|
||||
auto plan = makeCollScanPlan(parse(filterObj));
|
||||
estimates[plan->root()] = QSNEstimate{.inCE = makeCard(100), .outCE = makeCard(50)};
|
||||
estimates[plan->root()] = std::make_unique<QSNEstimate>(makeCard(100), makeCard(50));
|
||||
CostEstimator costEstimator{estimates};
|
||||
costEstimator.estimatePlan(*plan);
|
||||
return estimates[plan->root()].cost;
|
||||
return estimates[plan->root()]->cost;
|
||||
}
|
||||
|
||||
TEST(CostEstimator, FilterCostForSingleLeaf) {
|
||||
@ -119,18 +119,18 @@ TEST(CostEstimator, VirtualScan) {
|
||||
EstimateMap estimates;
|
||||
|
||||
auto fullCollScan = makeVirtualCollScanPlan(100, nullptr);
|
||||
estimates[fullCollScan->root()] = QSNEstimate{.inCE = makeCard(100), .outCE = makeCard(100)};
|
||||
estimates[fullCollScan->root()] = std::make_unique<QSNEstimate>(makeCard(100), makeCard(100));
|
||||
|
||||
BSONObj query = fromjson("{a: {$gt: 5}}");
|
||||
auto collScanFilter = makeVirtualCollScanPlan(100, parse(query));
|
||||
// The predicate filters out 50 documents.
|
||||
estimates[collScanFilter->root()] = QSNEstimate{.inCE = makeCard(100), .outCE = makeCard(50)};
|
||||
estimates[collScanFilter->root()] = std::make_unique<QSNEstimate>(makeCard(100), makeCard(50));
|
||||
|
||||
CostEstimator costEstimator{estimates};
|
||||
costEstimator.estimatePlan(*fullCollScan);
|
||||
costEstimator.estimatePlan(*collScanFilter);
|
||||
|
||||
ASSERT_LT(estimates[fullCollScan->root()].cost, estimates[collScanFilter->root()].cost);
|
||||
ASSERT_LT(estimates[fullCollScan->root()]->cost, estimates[collScanFilter->root()]->cost);
|
||||
}
|
||||
|
||||
TEST(CostEstimator, PointIndexScanLessCostThanRange) {
|
||||
@ -139,10 +139,10 @@ TEST(CostEstimator, PointIndexScanLessCostThanRange) {
|
||||
auto testNss = NamespaceString::createNamespaceString_forTest("testdb.coll");
|
||||
auto pointIndexScan = makeIndexScanFetchPlan(testNss, makePointIntervalBounds(1, "a"), {"a"});
|
||||
// Fetch
|
||||
estimates[pointIndexScan->root()] = QSNEstimate{.outCE = makeCard(1)};
|
||||
estimates[pointIndexScan->root()] = std::make_unique<QSNEstimate>(makeCard(1));
|
||||
// IndexScan
|
||||
estimates[pointIndexScan->root()->children[0].get()] =
|
||||
QSNEstimate{.inCE = makeCard(100), .outCE = makeCard(1)};
|
||||
std::make_unique<QSNEstimate>(makeCard(100), makeCard(1));
|
||||
|
||||
auto rangeIndexScan = makeIndexScanFetchPlan(
|
||||
testNss,
|
||||
@ -150,27 +150,27 @@ TEST(CostEstimator, PointIndexScanLessCostThanRange) {
|
||||
BSON("" << 5 << "" << 6), BoundInclusion::kIncludeBothStartAndEndKeys, "a"),
|
||||
{"a"});
|
||||
// Fetch
|
||||
estimates[rangeIndexScan->root()] = QSNEstimate{.outCE = makeCard(10)};
|
||||
estimates[rangeIndexScan->root()] = std::make_unique<QSNEstimate>(makeCard(10));
|
||||
// IndexScan
|
||||
estimates[rangeIndexScan->root()->children[0].get()] =
|
||||
QSNEstimate{.inCE = makeCard(100), .outCE = makeCard(10)};
|
||||
std::make_unique<QSNEstimate>(makeCard(100), makeCard(10));
|
||||
|
||||
CostEstimator costEstimator{estimates};
|
||||
costEstimator.estimatePlan(*pointIndexScan);
|
||||
costEstimator.estimatePlan(*rangeIndexScan);
|
||||
|
||||
// Cost of point scan plan should be less than that of the range scan
|
||||
ASSERT_LT(estimates[pointIndexScan->root()].cost, estimates[rangeIndexScan->root()].cost);
|
||||
ASSERT_LT(estimates[pointIndexScan->root()]->cost, estimates[rangeIndexScan->root()]->cost);
|
||||
// Cost of fetch node should be greater than cost of the index scan as costs are cumulative
|
||||
ASSERT_GT(estimates[pointIndexScan->root()].cost,
|
||||
estimates[pointIndexScan->root()->children[0].get()].cost);
|
||||
ASSERT_GT(estimates[pointIndexScan->root()]->cost,
|
||||
estimates[pointIndexScan->root()->children[0].get()]->cost);
|
||||
}
|
||||
|
||||
std::unique_ptr<IndexScanNode> indexScanNode(const NamespaceString& nss,
|
||||
EstimateMap& estimates,
|
||||
QSNEstimate est) {
|
||||
auto node = makeIndexScan(nss, makePointIntervalBounds(1, "a"), {"a"});
|
||||
estimates[node.get()] = est;
|
||||
estimates[node.get()] = std::make_unique<QSNEstimate>(std::move(est));
|
||||
return node;
|
||||
}
|
||||
|
||||
@ -181,33 +181,33 @@ void testIndexCombinationDependsOnChildren() {
|
||||
auto indexIntersectNode = std::make_unique<IndexCombinationNode>();
|
||||
indexIntersectNode->addChildren([&]() {
|
||||
std::vector<std::unique_ptr<QuerySolutionNode>> children;
|
||||
children.push_back(indexScanNode(
|
||||
testNss, estimates, QSNEstimate{.inCE = makeCard(10), .outCE = makeCard(10)}));
|
||||
children.push_back(indexScanNode(
|
||||
testNss, estimates, QSNEstimate{.inCE = makeCard(10), .outCE = makeCard(10)}));
|
||||
children.push_back(
|
||||
indexScanNode(testNss, estimates, QSNEstimate{makeCard(10), makeCard(10)}));
|
||||
children.push_back(
|
||||
indexScanNode(testNss, estimates, QSNEstimate{makeCard(10), makeCard(10)}));
|
||||
return children;
|
||||
}());
|
||||
estimates[indexIntersectNode.get()] = QSNEstimate{.outCE = makeCard(5)};
|
||||
estimates[indexIntersectNode.get()] = std::make_unique<QSNEstimate>(makeCard(5));
|
||||
auto cheapPlan = std::make_unique<QuerySolution>();
|
||||
cheapPlan->setRoot(std::move(indexIntersectNode));
|
||||
|
||||
auto expensiveIndexIntersectNode = std::make_unique<IndexCombinationNode>();
|
||||
expensiveIndexIntersectNode->addChildren([&]() {
|
||||
std::vector<std::unique_ptr<QuerySolutionNode>> children;
|
||||
children.push_back(indexScanNode(
|
||||
testNss, estimates, QSNEstimate{.inCE = makeCard(100), .outCE = makeCard(100)}));
|
||||
children.push_back(indexScanNode(
|
||||
testNss, estimates, QSNEstimate{.inCE = makeCard(100), .outCE = makeCard(100)}));
|
||||
children.push_back(
|
||||
indexScanNode(testNss, estimates, QSNEstimate{makeCard(100), makeCard(100)}));
|
||||
children.push_back(
|
||||
indexScanNode(testNss, estimates, QSNEstimate{makeCard(100), makeCard(100)}));
|
||||
return children;
|
||||
}());
|
||||
estimates[expensiveIndexIntersectNode.get()] = QSNEstimate{.outCE = makeCard(5)};
|
||||
estimates[expensiveIndexIntersectNode.get()] = std::make_unique<QSNEstimate>(makeCard(5));
|
||||
auto expensivePlan = std::make_unique<QuerySolution>();
|
||||
expensivePlan->setRoot(std::move(expensiveIndexIntersectNode));
|
||||
|
||||
CostEstimator costEstimator{estimates};
|
||||
costEstimator.estimatePlan(*cheapPlan);
|
||||
costEstimator.estimatePlan(*expensivePlan);
|
||||
ASSERT_LT(estimates[cheapPlan->root()].cost, estimates[expensivePlan->root()].cost);
|
||||
ASSERT_LT(estimates[cheapPlan->root()]->cost, estimates[expensivePlan->root()]->cost);
|
||||
}
|
||||
|
||||
// Increasing child cost increases the cost of index intersection and union plans
|
||||
@ -220,33 +220,31 @@ TEST(CostEstimator, IndexCombinationDependsOnChildren) {
|
||||
|
||||
std::unique_ptr<CollectionScanNode> collScanNode(EstimateMap& estimates, QSNEstimate est) {
|
||||
auto node = std::make_unique<CollectionScanNode>();
|
||||
estimates[node.get()] = est;
|
||||
estimates[node.get()] = std::make_unique<QSNEstimate>(std::move(est));
|
||||
return node;
|
||||
}
|
||||
|
||||
template <typename SortNode>
|
||||
void testSortCostDependsOnChildren() {
|
||||
EstimateMap estimates;
|
||||
auto cheapCollScan =
|
||||
collScanNode(estimates, QSNEstimate{.inCE = makeCard(10), .outCE = makeCard(10)});
|
||||
auto cheapCollScan = collScanNode(estimates, QSNEstimate{makeCard(10), makeCard(10)});
|
||||
auto cheapSort = std::make_unique<SortNode>(
|
||||
std::move(cheapCollScan), BSON("a" << 1), 0, LimitSkipParameterization::Disabled);
|
||||
estimates[cheapSort.get()] = QSNEstimate{.outCE = makeCard(10)};
|
||||
estimates[cheapSort.get()] = std::make_unique<QSNEstimate>(makeCard(10));
|
||||
auto cheapPlan = std::make_unique<QuerySolution>();
|
||||
cheapPlan->setRoot(std::move(cheapSort));
|
||||
|
||||
auto expsensiveCollScan =
|
||||
collScanNode(estimates, QSNEstimate{.inCE = makeCard(100), .outCE = makeCard(100)});
|
||||
auto expsensiveCollScan = collScanNode(estimates, QSNEstimate{makeCard(100), makeCard(100)});
|
||||
auto expensiveSort = std::make_unique<SortNode>(
|
||||
std::move(expsensiveCollScan), BSON("a" << 1), 0, LimitSkipParameterization::Disabled);
|
||||
estimates[expensiveSort.get()] = QSNEstimate{.outCE = makeCard(100)};
|
||||
estimates[expensiveSort.get()] = std::make_unique<QSNEstimate>(makeCard(100));
|
||||
auto expensivePlan = std::make_unique<QuerySolution>();
|
||||
expensivePlan->setRoot(std::move(expensiveSort));
|
||||
|
||||
CostEstimator costEstimator{estimates};
|
||||
costEstimator.estimatePlan(*cheapPlan);
|
||||
costEstimator.estimatePlan(*expensivePlan);
|
||||
ASSERT_LT(estimates[cheapPlan->root()].cost, estimates[expensivePlan->root()].cost);
|
||||
ASSERT_LT(estimates[cheapPlan->root()]->cost, estimates[expensivePlan->root()]->cost);
|
||||
}
|
||||
|
||||
TEST(CostEstimator, SortDefaultOrSimple) {
|
||||
|
||||
@ -592,6 +592,23 @@ struct QSNEstimate {
|
||||
boost::optional<CardinalityEstimate> inCE;
|
||||
CardinalityEstimate outCE{CardinalityType{0}, EstimationSource::Code};
|
||||
CostEstimate cost{CostType::maxValue(), EstimationSource::Code};
|
||||
|
||||
QSNEstimate() = default;
|
||||
QSNEstimate(CardinalityEstimate outCE,
|
||||
CostEstimate cost = CostEstimate{CostType::maxValue(), EstimationSource::Code})
|
||||
: outCE(std::move(outCE)), cost(std::move(cost)) {}
|
||||
QSNEstimate(boost::optional<CardinalityEstimate> inCE, CardinalityEstimate outCE)
|
||||
: inCE(std::move(inCE)), outCE(std::move(outCE)) {}
|
||||
|
||||
virtual ~QSNEstimate() = default;
|
||||
|
||||
virtual void serialize(BSONObjBuilder& bob) const {
|
||||
bob.append("costEstimate", cost.toDouble());
|
||||
bob.append("cardinalityEstimate", outCE.toDouble());
|
||||
BSONObjBuilder metadataBob(bob.subobjStart("estimatesMetadata"));
|
||||
metadataBob.append("ceSource", toStringData(outCE.source()));
|
||||
metadataBob.done();
|
||||
}
|
||||
};
|
||||
|
||||
// Predefined constants
|
||||
|
||||
@ -33,11 +33,13 @@
|
||||
#include "mongo/db/query/compiler/physical_model/query_solution/query_solution.h"
|
||||
#include "mongo/util/modules.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
namespace mongo::cost_based_ranker {
|
||||
|
||||
/**
|
||||
* EstimateMap is a type representing a mapping from QuerySolutionNodes to cost estimates.
|
||||
*/
|
||||
using EstimateMap = absl::flat_hash_map<const QuerySolutionNode*, QSNEstimate>;
|
||||
using EstimateMap = absl::flat_hash_map<const QuerySolutionNode*, std::unique_ptr<QSNEstimate>>;
|
||||
|
||||
} // namespace mongo::cost_based_ranker
|
||||
|
||||
@ -92,7 +92,7 @@ NodeCardinalities JoinCardinalityEstimator::extractNodeCardinalities(
|
||||
tassert(11514600, "Missing QSN for CanonicalQuery", qsn != ctx.cbrCqQsns.end());
|
||||
auto cbrRes = estimates.find(qsn->second->root());
|
||||
tassert(11514601, "Missing estimate for QSN root", cbrRes != estimates.end());
|
||||
nodeCardinalities.push_back(cbrRes->second.outCE);
|
||||
nodeCardinalities.push_back(cbrRes->second->outCE);
|
||||
}
|
||||
return nodeCardinalities;
|
||||
}
|
||||
|
||||
@ -184,12 +184,12 @@ TEST_F(JoinPredicateEstimatorFixture, ExtractNodeCardinalities) {
|
||||
cost_based_ranker::EstimateMap estimates;
|
||||
{
|
||||
auto aPlan = makeCollScanPlan(aNss);
|
||||
estimates[aPlan->root()] = {inCE, aCE};
|
||||
estimates[aPlan->root()] = std::make_unique<cost_based_ranker::QSNEstimate>(inCE, aCE);
|
||||
cbrCqQsns[graph.getNode(aNodeId).accessPath.get()] = std::move(aPlan);
|
||||
}
|
||||
{
|
||||
auto bPlan = makeCollScanPlan(bNss);
|
||||
estimates[bPlan->root()] = {inCE, bCE};
|
||||
estimates[bPlan->root()] = std::make_unique<cost_based_ranker::QSNEstimate>(inCE, bCE);
|
||||
cbrCqQsns[graph.getNode(bNodeId).accessPath.get()] = std::move(bPlan);
|
||||
}
|
||||
|
||||
|
||||
@ -166,7 +166,7 @@ void addEstimatesIfExplain(const JoinReorderingContext& ctx,
|
||||
|
||||
auto ce = peCtx.getJoinCardinalityEstimator()->getOrEstimateSubsetCardinality(set);
|
||||
estimates.insert_or_assign(
|
||||
node, cost_based_ranker::QSNEstimate{.outCE = ce, .cost = cost.getTotalCost()});
|
||||
node, std::make_unique<cost_based_ranker::QSNEstimate>(ce, cost.getTotalCost()));
|
||||
}
|
||||
|
||||
// Forward-declare because of mutual recursion.
|
||||
|
||||
@ -45,7 +45,7 @@ using SingleTableAccessTestFixture = JoinOrderingTestFixture;
|
||||
void assertQuerySolutionHasEstimate(const QuerySolutionNode* qsn, const EstimateMap& estimates) {
|
||||
auto it = estimates.find(qsn);
|
||||
ASSERT(it != estimates.end());
|
||||
ASSERT_EQ(EstimationSource::Sampling, it->second.outCE.source());
|
||||
ASSERT_EQ(EstimationSource::Sampling, it->second->outCE.source());
|
||||
for (auto&& child : qsn->children) {
|
||||
assertQuerySolutionHasEstimate(child.get(), estimates);
|
||||
}
|
||||
|
||||
@ -264,7 +264,7 @@ void captureCardinalityEstimationMethodForQueryStats(
|
||||
if (it != maybeExplainData->estimates.end()) {
|
||||
auto& ceMethods =
|
||||
CurOp::get(opCtx)->debug().getAdditiveMetrics().cardinalityEstimationMethods;
|
||||
switch (it->second.outCE.source()) {
|
||||
switch (it->second->outCE.source()) {
|
||||
case cost_based_ranker::EstimationSource::Histogram:
|
||||
ceMethods.setHistogram(ceMethods.getHistogram().value_or(0) + 1);
|
||||
break;
|
||||
|
||||
@ -67,7 +67,9 @@ inline PlanExplainerData& operator<<(PlanExplainerData& lhs, PlanExplainerData&&
|
||||
std::make_move_iterator(rhs.rejectedPlansWithStages.begin()),
|
||||
std::make_move_iterator(rhs.rejectedPlansWithStages.end()));
|
||||
lhs.planStageQsnMap.insert(rhs.planStageQsnMap.begin(), rhs.planStageQsnMap.end());
|
||||
lhs.estimates.insert(rhs.estimates.begin(), rhs.estimates.end());
|
||||
for (auto& [k, v] : rhs.estimates) {
|
||||
lhs.estimates.insert_or_assign(k, std::move(v));
|
||||
}
|
||||
return lhs;
|
||||
}
|
||||
|
||||
|
||||
@ -270,9 +270,8 @@ void statsToBSON(const stage_builder::PlanStageToQsnMap& planStageQsnMap,
|
||||
|
||||
// Cost and cardinality of the stage.
|
||||
if (querySolutionNode && estimates.contains(querySolutionNode)) {
|
||||
const auto& est = estimates.at(querySolutionNode);
|
||||
bob->append("costEstimate", est.cost.toDouble());
|
||||
bob->append("cardinalityEstimate", est.outCE.toDouble());
|
||||
const auto& est = *estimates.at(querySolutionNode);
|
||||
est.serialize(*bob);
|
||||
// Display 'inCE' as 'numKeys' for index scan and 'numDocs' for collection scan.
|
||||
if (est.inCE.has_value()) {
|
||||
double ce = est.inCE->toDouble();
|
||||
@ -282,9 +281,6 @@ void statsToBSON(const stage_builder::PlanStageToQsnMap& planStageQsnMap,
|
||||
bob->append("numDocsEstimate", ce);
|
||||
}
|
||||
}
|
||||
BSONObjBuilder metadataBob(bob->subobjStart("estimatesMetadata"));
|
||||
metadataBob.append("ceSource", toStringData(est.outCE.source()));
|
||||
metadataBob.done();
|
||||
}
|
||||
|
||||
// Display the BSON representation of the filter, if there is one.
|
||||
|
||||
@ -294,12 +294,7 @@ void statsToBSON(const QuerySolutionNode* node,
|
||||
|
||||
// Cost and cardinality of the stage.
|
||||
if (estimates.contains(node)) {
|
||||
const auto& est = estimates.at(node);
|
||||
bob->append("costEstimate", est.cost.toDouble());
|
||||
bob->append("cardinalityEstimate", est.outCE.toDouble());
|
||||
BSONObjBuilder metadataBob(bob->subobjStart("estimatesMetadata"));
|
||||
metadataBob.append("ceSource", toStringData(est.outCE.source()));
|
||||
metadataBob.done();
|
||||
estimates.at(node)->serialize(*bob);
|
||||
}
|
||||
|
||||
// Display the BSON representation of the filter, if there is one.
|
||||
|
||||
@ -609,14 +609,14 @@ TEST_F(PlanExplainerTest, PlanExplainerDataMergeFull) {
|
||||
// Use distinct pointer values to avoid key collision
|
||||
data1.planStageQsnMap.emplace(reinterpret_cast<const PlanStage*>(0x1), nullptr);
|
||||
data1.estimates.emplace(reinterpret_cast<const QuerySolutionNode*>(0x1),
|
||||
cost_based_ranker::QSNEstimate{});
|
||||
std::make_unique<cost_based_ranker::QSNEstimate>());
|
||||
|
||||
PlanExplainerData data2;
|
||||
auto qsn2 = std::make_unique<QuerySolution>();
|
||||
data2.rejectedPlansWithStages.push_back({std::move(qsn2), nullptr});
|
||||
data2.planStageQsnMap.emplace(reinterpret_cast<const PlanStage*>(0x2), nullptr);
|
||||
data2.estimates.emplace(reinterpret_cast<const QuerySolutionNode*>(0x2),
|
||||
cost_based_ranker::QSNEstimate{});
|
||||
std::make_unique<cost_based_ranker::QSNEstimate>());
|
||||
|
||||
data1 << std::move(data2);
|
||||
|
||||
|
||||
@ -2186,7 +2186,7 @@ std::pair<SbStage, PlanStageSlots> SlotBasedStageBuilder::buildHashJoinEmbedding
|
||||
if (_estimates) {
|
||||
if (auto it = _estimates->find(hashJoinEmbeddingNode->children[0].get());
|
||||
it != _estimates->end()) {
|
||||
estimatedBuildCardinality = static_cast<size_t>(it->second.outCE.toDouble());
|
||||
estimatedBuildCardinality = static_cast<size_t>(it->second->outCE.toDouble());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user