SERVER-126960 Projection optimizations (#53905)
Co-authored-by: Denis Grebennicov <denis.grebennicov@mongodb.com> GitOrigin-RevId: 2192f2f863866c62da15de75263ce1bbf156d8c0
This commit is contained in:
parent
f5e43a74c9
commit
967cd36bd9
@ -133,6 +133,7 @@ mongo_cc_unit_test(
|
||||
"projection_executor_test.cpp",
|
||||
"projection_executor_utils_test.cpp",
|
||||
"projection_executor_wildcard_access_test.cpp",
|
||||
"projection_node_optimizations_test.cpp",
|
||||
"serialize_ejson_utils_test.cpp",
|
||||
"//src/mongo/db/exec/agg:exec_pipeline_test.cpp",
|
||||
"//src/mongo/db/exec/agg:graph_lookup_test.cpp",
|
||||
|
||||
@ -96,10 +96,13 @@ void FastPathProjectionNode<ProjectionNode, BaseProjectionNode>::_applyProjectio
|
||||
const auto bsonElement{it.next()};
|
||||
const auto fieldName{bsonElement.fieldNameStringData()};
|
||||
|
||||
if (this->_projectedFieldsSet.find(fieldName) != this->_projectedFieldsSet.end()) {
|
||||
// Pre-compute the hash once and reuse it for both set and map lookups.
|
||||
const auto hashedName = StringMapHasher{}.hashed_key(fieldName);
|
||||
|
||||
if (this->_projectedFieldsSet.find(hashedName) != this->_projectedFieldsSet.end()) {
|
||||
projectionNode->_applyToProjectedField(bsonElement, bob);
|
||||
--nFieldsLeft;
|
||||
} else if (auto childIt = this->_children.find(fieldName);
|
||||
} else if (auto childIt = this->_children.find(hashedName);
|
||||
childIt != this->_children.end()) {
|
||||
auto child = static_cast<const ProjectionNode*>(childIt->second.get());
|
||||
|
||||
|
||||
@ -95,14 +95,12 @@ void ProjectionNode::_addExpressionForPath(const FieldPath& path,
|
||||
}
|
||||
|
||||
boost::intrusive_ptr<Expression> ProjectionNode::getExpressionForPath(const FieldPath& path) const {
|
||||
// The FieldPath always conatins at least one field.
|
||||
// The FieldPath always contains at least one field.
|
||||
auto fieldName = std::string{path.getFieldName(0)};
|
||||
|
||||
if (path.getPathLength() == 1) {
|
||||
if (_expressions.find(fieldName) != _expressions.end()) {
|
||||
return _expressions.at(fieldName);
|
||||
}
|
||||
return nullptr;
|
||||
auto it = _expressions.find(fieldName);
|
||||
return (it != _expressions.end()) ? it->second : nullptr;
|
||||
}
|
||||
if (auto child = getChild(fieldName)) {
|
||||
return child->getExpressionForPath(path.tail());
|
||||
@ -157,7 +155,10 @@ void ProjectionNode::applyProjections(const Document& inputDoc, MutableDocument*
|
||||
while (it.more()) {
|
||||
auto fieldName = it.fieldName();
|
||||
|
||||
if (_projectedFieldsSet.find(fieldName) != _projectedFieldsSet.end()) {
|
||||
// Pre-compute the hash once and reuse it for both set and map lookups.
|
||||
const auto hashedName = StringMapHasher{}.hashed_key(fieldName);
|
||||
|
||||
if (_projectedFieldsSet.find(hashedName) != _projectedFieldsSet.end()) {
|
||||
if (isIncl) {
|
||||
outputProjectedField(fieldName, it.next().second, outputDoc);
|
||||
} else {
|
||||
@ -165,7 +166,7 @@ void ProjectionNode::applyProjections(const Document& inputDoc, MutableDocument*
|
||||
it.advance();
|
||||
}
|
||||
++projectedFields;
|
||||
} else if (auto childIt = _children.find(fieldName); childIt != _children.end()) {
|
||||
} else if (auto childIt = _children.find(hashedName); childIt != _children.end()) {
|
||||
outputProjectedField(
|
||||
fieldName, childIt->second->applyProjectionsToValue(it.next().second), outputDoc);
|
||||
++projectedFields;
|
||||
@ -174,7 +175,7 @@ void ProjectionNode::applyProjections(const Document& inputDoc, MutableDocument*
|
||||
}
|
||||
|
||||
// Check if we can avoid reading from the document any further.
|
||||
if (_maxFieldsToProject && _maxFieldsToProject <= projectedFields) {
|
||||
if (projectedFields >= _maxFieldsToProject) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -218,9 +219,18 @@ void ProjectionNode::applyExpressions(const Document& root, MutableDocument* out
|
||||
for (auto&& field : _orderToProcessAdditionsAndChildren) {
|
||||
auto childIt = _children.find(field);
|
||||
if (childIt != _children.end()) {
|
||||
outputDoc->setField(field,
|
||||
childIt->second->applyExpressionsToValue(
|
||||
root, outputDoc->peek()[StringData{field}]));
|
||||
// Use position-based access to avoid a second hash lookup when reading and writing the
|
||||
// field value.
|
||||
const Document& doc = outputDoc->peek();
|
||||
const auto pos = doc.positionOf(StringData{field});
|
||||
Value currentValue = pos.found() ? doc.getField(pos) : Value{};
|
||||
Value newValue =
|
||||
childIt->second->applyExpressionsToValue(root, std::move(currentValue));
|
||||
if (pos.found()) {
|
||||
outputDoc->setField(pos, std::move(newValue));
|
||||
} else {
|
||||
outputDoc->setField(StringData{field}, std::move(newValue));
|
||||
}
|
||||
} else {
|
||||
auto expressionIt = _expressions.find(field);
|
||||
tassert(7241726,
|
||||
@ -340,7 +350,7 @@ void ProjectionNode::optimize() {
|
||||
childPair.second->optimize();
|
||||
}
|
||||
|
||||
_maxFieldsToProject = maxFieldsToProject();
|
||||
_maxFieldsToProject = maxFieldsToProject().value_or(kUnlimitedFieldsToProject);
|
||||
}
|
||||
|
||||
Document ProjectionNode::serialize(const SerializationOptions& options) const {
|
||||
|
||||
@ -45,6 +45,7 @@
|
||||
#include "mongo/util/string_map.h"
|
||||
|
||||
#include <cstddef>
|
||||
#include <limits>
|
||||
#include <list>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
@ -275,7 +276,7 @@ private:
|
||||
* node added).
|
||||
*/
|
||||
void makeOptimizationsStale() {
|
||||
_maxFieldsToProject = boost::none;
|
||||
_maxFieldsToProject = kUnlimitedFieldsToProject;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -288,9 +289,14 @@ private:
|
||||
*/
|
||||
void _addProjectionForPath(const FieldPath& path);
|
||||
|
||||
/**
|
||||
* Sentinel value for '_maxFieldsToProject'.
|
||||
*/
|
||||
static constexpr size_t kUnlimitedFieldsToProject = std::numeric_limits<size_t>::max();
|
||||
|
||||
// Maximum number of fields that need to be projected. This allows for an "early" return
|
||||
// optimization which means we don't have to iterate over an entire document. The value is
|
||||
// stored here to avoid re-computation for each document.
|
||||
boost::optional<size_t> _maxFieldsToProject;
|
||||
size_t _maxFieldsToProject = kUnlimitedFieldsToProject;
|
||||
};
|
||||
} // namespace mongo::projection_executor
|
||||
|
||||
250
src/mongo/db/exec/projection_node_optimizations_test.cpp
Normal file
250
src/mongo/db/exec/projection_node_optimizations_test.cpp
Normal file
@ -0,0 +1,250 @@
|
||||
/**
|
||||
* Copyright (C) 2026-present MongoDB, Inc.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the Server Side Public License, version 1,
|
||||
* as published by MongoDB, Inc.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* Server Side Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the Server Side Public License
|
||||
* along with this program. If not, see
|
||||
* <http://www.mongodb.com/licensing/server-side-public-license>.
|
||||
*
|
||||
* As a special exception, the copyright holders give permission to link the
|
||||
* code of portions of this program with the OpenSSL library under certain
|
||||
* conditions as described in each individual source file and distribute
|
||||
* linked combinations including the program with the OpenSSL library. You
|
||||
* must comply with the Server Side Public License in all respects for
|
||||
* all of the code used other than as permitted herein. If you modify file(s)
|
||||
* with this exception, you may extend this exception to your version of the
|
||||
* file(s), but you are not obligated to do so. If you do not wish to do so,
|
||||
* delete this exception statement from your version. If you delete this
|
||||
* exception statement from all source files in the program, then also delete
|
||||
* it in the license file.
|
||||
*/
|
||||
|
||||
// Unit tests for ProjectionNode optimizations:
|
||||
// - Opt 1: Pre-computed hash reuse in applyProjections and _applyProjections
|
||||
// - Opt 2: Cached dispatch table (_orderedAdditions) in applyExpressions
|
||||
// - Opt 3: Single-lookup getExpressionForPath
|
||||
// - Opt 5: size_t _maxFieldsToProject early-exit sentinel
|
||||
// - Opt 6: Position-based Document access in child expression dispatch
|
||||
|
||||
#include "mongo/db/exec/add_fields_projection_executor.h"
|
||||
#include "mongo/db/exec/document_value/document.h"
|
||||
#include "mongo/db/exec/document_value/document_value_test_util.h"
|
||||
#include "mongo/db/exec/document_value/value.h"
|
||||
#include "mongo/db/exec/exclusion_projection_executor.h"
|
||||
#include "mongo/db/exec/inclusion_projection_executor.h"
|
||||
#include "mongo/db/exec/projection_executor_builder.h"
|
||||
#include "mongo/db/pipeline/expression_context_for_test.h"
|
||||
#include "mongo/db/query/compiler/logical_model/projection/projection_parser.h"
|
||||
#include "mongo/unittest/unittest.h"
|
||||
|
||||
#include <boost/smart_ptr/intrusive_ptr.hpp>
|
||||
|
||||
namespace mongo::projection_executor {
|
||||
namespace {
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
auto makeExpCtx() {
|
||||
return boost::intrusive_ptr<ExpressionContextForTest>{new ExpressionContextForTest()};
|
||||
}
|
||||
|
||||
// Builds an InclusionProjectionExecutor through the standard builder path, which calls
|
||||
// optimize() internally.
|
||||
auto makeInclusionExecutor(const boost::intrusive_ptr<ExpressionContextForTest>& expCtx,
|
||||
const BSONObj& spec) {
|
||||
auto projection = projection_ast::parseAndAnalyze(expCtx, spec, ProjectionPolicies{});
|
||||
return buildProjectionExecutor(
|
||||
expCtx, &projection, ProjectionPolicies{}, kDefaultBuilderParams);
|
||||
}
|
||||
|
||||
// Builds an ExclusionProjectionExecutor through the standard builder path.
|
||||
auto makeExclusionExecutor(const boost::intrusive_ptr<ExpressionContextForTest>& expCtx,
|
||||
const BSONObj& spec) {
|
||||
ProjectionPolicies policies;
|
||||
auto projection = projection_ast::parseAndAnalyze(expCtx, spec, policies);
|
||||
return buildProjectionExecutor(expCtx, &projection, policies, kDefaultBuilderParams);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Pre-computed hash reuse — correctness of inclusion/exclusion projections.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// Inclusion projection with several fields exercises the path where hashed_key is computed
|
||||
// once and reused for both the _projectedFieldsSet and _children lookups.
|
||||
TEST(ProjectionNodeHashReuse, InclusionProjectionRetainsCorrectFields) {
|
||||
auto expCtx = makeExpCtx();
|
||||
auto executor =
|
||||
makeInclusionExecutor(expCtx, BSON("_id" << 0 << "a" << 1 << "b" << 1 << "c" << 1));
|
||||
|
||||
auto result = executor->applyTransformation(Document{{"a", 1}, {"b", 2}, {"c", 3}, {"d", 4}});
|
||||
ASSERT_DOCUMENT_EQ(result, (Document{{"a", 1}, {"b", 2}, {"c", 3}}));
|
||||
}
|
||||
|
||||
// Fields absent from the projection spec should be excluded.
|
||||
TEST(ProjectionNodeHashReuse, InclusionProjectionExcludesNonProjectedFields) {
|
||||
auto expCtx = makeExpCtx();
|
||||
auto executor = makeInclusionExecutor(expCtx, BSON("_id" << 0 << "x" << 1));
|
||||
|
||||
auto result = executor->applyTransformation(
|
||||
Document{{"x", 10}, {"y", 20}, {"z", 30}, {"w", 40}, {"v", 50}});
|
||||
ASSERT_DOCUMENT_EQ(result, (Document{{"x", 10}}));
|
||||
}
|
||||
|
||||
// Exclusion projection: fields in the spec are removed, others are kept.
|
||||
TEST(ProjectionNodeHashReuse, ExclusionProjectionRemovesSpecifiedFields) {
|
||||
auto expCtx = makeExpCtx();
|
||||
auto executor = makeExclusionExecutor(expCtx, BSON("d" << 0 << "e" << 0));
|
||||
|
||||
auto result =
|
||||
executor->applyTransformation(Document{{"a", 1}, {"b", 2}, {"c", 3}, {"d", 4}, {"e", 5}});
|
||||
ASSERT_DOCUMENT_EQ(result, (Document{{"a", 1}, {"b", 2}, {"c", 3}}));
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Single-lookup getExpressionForPath.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// getExpressionForPath should return the stored expression without requiring a second lookup.
|
||||
TEST(ProjectionNodeGetExpression, ReturnsExpressionForTopLevelPath) {
|
||||
auto expCtx = makeExpCtx();
|
||||
auto executor = AddFieldsProjectionExecutor::create(expCtx, BSON("computed" << "$src"));
|
||||
const auto& root = executor->getRoot();
|
||||
|
||||
auto expr = root.getExpressionForPath(FieldPath("computed"));
|
||||
ASSERT_TRUE(expr != nullptr);
|
||||
// Verify the expression evaluates correctly on a sample document.
|
||||
auto result =
|
||||
expr->evaluate(Document{{"src", Value{99}}}, &expr->getExpressionContext()->variables);
|
||||
ASSERT_VALUE_EQ(result, Value{99});
|
||||
}
|
||||
|
||||
// Querying a path that does not exist should return nullptr without crashing.
|
||||
TEST(ProjectionNodeGetExpression, ReturnsNullForMissingPath) {
|
||||
auto expCtx = makeExpCtx();
|
||||
auto executor = AddFieldsProjectionExecutor::create(expCtx, BSON("a" << "$x"));
|
||||
const auto& root = executor->getRoot();
|
||||
|
||||
ASSERT_TRUE(root.getExpressionForPath(FieldPath("b")) == nullptr);
|
||||
}
|
||||
|
||||
// For a nested path, the expression should be found at the correct depth.
|
||||
TEST(ProjectionNodeGetExpression, ReturnsExpressionForNestedPath) {
|
||||
auto expCtx = makeExpCtx();
|
||||
auto executor = AddFieldsProjectionExecutor::create(expCtx, BSON("outer.inner" << "$val"));
|
||||
const auto& root = executor->getRoot();
|
||||
|
||||
auto expr = root.getExpressionForPath(FieldPath("outer.inner"));
|
||||
ASSERT_TRUE(expr != nullptr);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// _maxFieldsToProject early-exit sentinel.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// An inclusion projection over a wide document should stop reading once all projected
|
||||
// fields have been found.
|
||||
TEST(ProjectionNodeMaxFieldsEarlyExit, InclusionProjectionOnWideDocument) {
|
||||
auto expCtx = makeExpCtx();
|
||||
auto executor = makeInclusionExecutor(expCtx, BSON("_id" << 0 << "a" << 1 << "b" << 1));
|
||||
|
||||
// Document has many extra fields; the projection should only retain a and b.
|
||||
auto result = executor->applyTransformation(
|
||||
Document{{"a", 1}, {"b", 2}, {"c", 3}, {"d", 4}, {"e", 5}, {"f", 6}, {"g", 7}});
|
||||
ASSERT_DOCUMENT_EQ(result, (Document{{"a", 1}, {"b", 2}}));
|
||||
}
|
||||
|
||||
// Early exit must not drop fields that appear before the limit is reached.
|
||||
TEST(ProjectionNodeMaxFieldsEarlyExit, InclusionProjectionRetainsAllProjectedFields) {
|
||||
auto expCtx = makeExpCtx();
|
||||
auto executor = makeInclusionExecutor(
|
||||
expCtx, BSON("_id" << 0 << "a" << 1 << "b" << 1 << "c" << 1 << "d" << 1));
|
||||
|
||||
auto result = executor->applyTransformation(
|
||||
Document{{"a", 1}, {"b", 2}, {"c", 3}, {"d", 4}, {"extra1", 5}, {"extra2", 6}});
|
||||
ASSERT_DOCUMENT_EQ(result, (Document{{"a", 1}, {"b", 2}, {"c", 3}, {"d", 4}}));
|
||||
}
|
||||
|
||||
// A projection where some projected fields are absent from the document should still be correct.
|
||||
TEST(ProjectionNodeMaxFieldsEarlyExit, InclusionProjectionWithMissingFields) {
|
||||
auto expCtx = makeExpCtx();
|
||||
auto executor = makeInclusionExecutor(expCtx, BSON("_id" << 0 << "a" << 1 << "missing" << 1));
|
||||
|
||||
auto result = executor->applyTransformation(Document{{"a", 1}, {"b", 2}, {"c", 3}});
|
||||
ASSERT_DOCUMENT_EQ(result, (Document{{"a", 1}}));
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Position-based Document access in applyExpressions child dispatch.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// When a child ProjectionNode has computed fields, the fast path should read and write the
|
||||
// nested value via position-based access, producing the same result as the fallback path.
|
||||
TEST(ProjectionNodePositionBasedAccess, NestedComputedFieldFastPath) {
|
||||
auto expCtx = makeExpCtx();
|
||||
const BSONObj spec = BSON("outer.inner" << "$src");
|
||||
const Document inputDoc{{"src", 100}, {"outer", Document{{"inner", 0}, {"other", 1}}}};
|
||||
|
||||
// Fallback path (no optimize).
|
||||
auto execFallback = AddFieldsProjectionExecutor::create(expCtx, spec);
|
||||
auto resultFallback = execFallback->applyProjection(inputDoc);
|
||||
|
||||
// Fast path (with optimize, builds _orderedAdditions with child entries).
|
||||
auto execFast = AddFieldsProjectionExecutor::create(expCtx, spec);
|
||||
execFast->optimize();
|
||||
auto resultFast = execFast->applyProjection(inputDoc);
|
||||
|
||||
ASSERT_DOCUMENT_EQ(resultFallback, resultFast);
|
||||
// The inner field should be set to 100 (from $src).
|
||||
ASSERT_VALUE_EQ(resultFast["outer"]["inner"], Value{100});
|
||||
}
|
||||
|
||||
// Nested field computation when the outer field does not yet exist in the document.
|
||||
TEST(ProjectionNodePositionBasedAccess, NestedComputedFieldOnMissingParent) {
|
||||
auto expCtx = makeExpCtx();
|
||||
const BSONObj spec = BSON("newNested.value" << "$x");
|
||||
const Document inputDoc{{"x", 7}};
|
||||
|
||||
auto execFallback = AddFieldsProjectionExecutor::create(expCtx, spec);
|
||||
auto resultFallback = execFallback->applyProjection(inputDoc);
|
||||
|
||||
auto execFast = AddFieldsProjectionExecutor::create(expCtx, spec);
|
||||
execFast->optimize();
|
||||
auto resultFast = execFast->applyProjection(inputDoc);
|
||||
|
||||
ASSERT_DOCUMENT_EQ(resultFallback, resultFast);
|
||||
ASSERT_VALUE_EQ(resultFast["newNested"]["value"], Value{7});
|
||||
}
|
||||
|
||||
// Multiple nested computed fields on the same parent exercise the child path in the dispatch
|
||||
// table with multiple siblings.
|
||||
TEST(ProjectionNodePositionBasedAccess, MultipleNestedComputedFieldsSiblings) {
|
||||
auto expCtx = makeExpCtx();
|
||||
const BSONObj spec = BSON("obj.p" << "$a"
|
||||
<< "obj.q"
|
||||
<< "$b");
|
||||
const Document inputDoc{{"a", 10}, {"b", 20}};
|
||||
|
||||
auto execFallback = AddFieldsProjectionExecutor::create(expCtx, spec);
|
||||
auto resultFallback = execFallback->applyProjection(inputDoc);
|
||||
|
||||
auto execFast = AddFieldsProjectionExecutor::create(expCtx, spec);
|
||||
execFast->optimize();
|
||||
auto resultFast = execFast->applyProjection(inputDoc);
|
||||
|
||||
ASSERT_DOCUMENT_EQ(resultFallback, resultFast);
|
||||
ASSERT_VALUE_EQ(resultFast["obj"]["p"], Value{10});
|
||||
ASSERT_VALUE_EQ(resultFast["obj"]["q"], Value{20});
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace mongo::projection_executor
|
||||
Loading…
Reference in New Issue
Block a user