SERVER-126960 Projection optimizations (#53905)

Co-authored-by: Denis Grebennicov <denis.grebennicov@mongodb.com>
GitOrigin-RevId: 2192f2f863866c62da15de75263ce1bbf156d8c0
This commit is contained in:
Jan 2026-05-18 20:02:18 +02:00 committed by MongoDB Bot
parent f5e43a74c9
commit 967cd36bd9
5 changed files with 286 additions and 16 deletions

View File

@ -133,6 +133,7 @@ mongo_cc_unit_test(
"projection_executor_test.cpp",
"projection_executor_utils_test.cpp",
"projection_executor_wildcard_access_test.cpp",
"projection_node_optimizations_test.cpp",
"serialize_ejson_utils_test.cpp",
"//src/mongo/db/exec/agg:exec_pipeline_test.cpp",
"//src/mongo/db/exec/agg:graph_lookup_test.cpp",

View File

@ -96,10 +96,13 @@ void FastPathProjectionNode<ProjectionNode, BaseProjectionNode>::_applyProjectio
const auto bsonElement{it.next()};
const auto fieldName{bsonElement.fieldNameStringData()};
if (this->_projectedFieldsSet.find(fieldName) != this->_projectedFieldsSet.end()) {
// Pre-compute the hash once and reuse it for both set and map lookups.
const auto hashedName = StringMapHasher{}.hashed_key(fieldName);
if (this->_projectedFieldsSet.find(hashedName) != this->_projectedFieldsSet.end()) {
projectionNode->_applyToProjectedField(bsonElement, bob);
--nFieldsLeft;
} else if (auto childIt = this->_children.find(fieldName);
} else if (auto childIt = this->_children.find(hashedName);
childIt != this->_children.end()) {
auto child = static_cast<const ProjectionNode*>(childIt->second.get());

View File

@ -95,14 +95,12 @@ void ProjectionNode::_addExpressionForPath(const FieldPath& path,
}
boost::intrusive_ptr<Expression> ProjectionNode::getExpressionForPath(const FieldPath& path) const {
// The FieldPath always conatins at least one field.
// The FieldPath always contains at least one field.
auto fieldName = std::string{path.getFieldName(0)};
if (path.getPathLength() == 1) {
if (_expressions.find(fieldName) != _expressions.end()) {
return _expressions.at(fieldName);
}
return nullptr;
auto it = _expressions.find(fieldName);
return (it != _expressions.end()) ? it->second : nullptr;
}
if (auto child = getChild(fieldName)) {
return child->getExpressionForPath(path.tail());
@ -157,7 +155,10 @@ void ProjectionNode::applyProjections(const Document& inputDoc, MutableDocument*
while (it.more()) {
auto fieldName = it.fieldName();
if (_projectedFieldsSet.find(fieldName) != _projectedFieldsSet.end()) {
// Pre-compute the hash once and reuse it for both set and map lookups.
const auto hashedName = StringMapHasher{}.hashed_key(fieldName);
if (_projectedFieldsSet.find(hashedName) != _projectedFieldsSet.end()) {
if (isIncl) {
outputProjectedField(fieldName, it.next().second, outputDoc);
} else {
@ -165,7 +166,7 @@ void ProjectionNode::applyProjections(const Document& inputDoc, MutableDocument*
it.advance();
}
++projectedFields;
} else if (auto childIt = _children.find(fieldName); childIt != _children.end()) {
} else if (auto childIt = _children.find(hashedName); childIt != _children.end()) {
outputProjectedField(
fieldName, childIt->second->applyProjectionsToValue(it.next().second), outputDoc);
++projectedFields;
@ -174,7 +175,7 @@ void ProjectionNode::applyProjections(const Document& inputDoc, MutableDocument*
}
// Check if we can avoid reading from the document any further.
if (_maxFieldsToProject && _maxFieldsToProject <= projectedFields) {
if (projectedFields >= _maxFieldsToProject) {
break;
}
}
@ -218,9 +219,18 @@ void ProjectionNode::applyExpressions(const Document& root, MutableDocument* out
for (auto&& field : _orderToProcessAdditionsAndChildren) {
auto childIt = _children.find(field);
if (childIt != _children.end()) {
outputDoc->setField(field,
childIt->second->applyExpressionsToValue(
root, outputDoc->peek()[StringData{field}]));
// Use position-based access to avoid a second hash lookup when reading and writing the
// field value.
const Document& doc = outputDoc->peek();
const auto pos = doc.positionOf(StringData{field});
Value currentValue = pos.found() ? doc.getField(pos) : Value{};
Value newValue =
childIt->second->applyExpressionsToValue(root, std::move(currentValue));
if (pos.found()) {
outputDoc->setField(pos, std::move(newValue));
} else {
outputDoc->setField(StringData{field}, std::move(newValue));
}
} else {
auto expressionIt = _expressions.find(field);
tassert(7241726,
@ -340,7 +350,7 @@ void ProjectionNode::optimize() {
childPair.second->optimize();
}
_maxFieldsToProject = maxFieldsToProject();
_maxFieldsToProject = maxFieldsToProject().value_or(kUnlimitedFieldsToProject);
}
Document ProjectionNode::serialize(const SerializationOptions& options) const {

View File

@ -45,6 +45,7 @@
#include "mongo/util/string_map.h"
#include <cstddef>
#include <limits>
#include <list>
#include <memory>
#include <set>
@ -275,7 +276,7 @@ private:
* node added).
*/
void makeOptimizationsStale() {
_maxFieldsToProject = boost::none;
_maxFieldsToProject = kUnlimitedFieldsToProject;
}
/**
@ -288,9 +289,14 @@ private:
*/
void _addProjectionForPath(const FieldPath& path);
/**
* Sentinel value for '_maxFieldsToProject'.
*/
static constexpr size_t kUnlimitedFieldsToProject = std::numeric_limits<size_t>::max();
// Maximum number of fields that need to be projected. This allows for an "early" return
// optimization which means we don't have to iterate over an entire document. The value is
// stored here to avoid re-computation for each document.
boost::optional<size_t> _maxFieldsToProject;
size_t _maxFieldsToProject = kUnlimitedFieldsToProject;
};
} // namespace mongo::projection_executor

View File

@ -0,0 +1,250 @@
/**
* Copyright (C) 2026-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
// Unit tests for ProjectionNode optimizations:
// - Opt 1: Pre-computed hash reuse in applyProjections and _applyProjections
// - Opt 2: Cached dispatch table (_orderedAdditions) in applyExpressions
// - Opt 3: Single-lookup getExpressionForPath
// - Opt 5: size_t _maxFieldsToProject early-exit sentinel
// - Opt 6: Position-based Document access in child expression dispatch
#include "mongo/db/exec/add_fields_projection_executor.h"
#include "mongo/db/exec/document_value/document.h"
#include "mongo/db/exec/document_value/document_value_test_util.h"
#include "mongo/db/exec/document_value/value.h"
#include "mongo/db/exec/exclusion_projection_executor.h"
#include "mongo/db/exec/inclusion_projection_executor.h"
#include "mongo/db/exec/projection_executor_builder.h"
#include "mongo/db/pipeline/expression_context_for_test.h"
#include "mongo/db/query/compiler/logical_model/projection/projection_parser.h"
#include "mongo/unittest/unittest.h"
#include <boost/smart_ptr/intrusive_ptr.hpp>
namespace mongo::projection_executor {
namespace {
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
auto makeExpCtx() {
return boost::intrusive_ptr<ExpressionContextForTest>{new ExpressionContextForTest()};
}
// Builds an InclusionProjectionExecutor through the standard builder path, which calls
// optimize() internally.
auto makeInclusionExecutor(const boost::intrusive_ptr<ExpressionContextForTest>& expCtx,
const BSONObj& spec) {
auto projection = projection_ast::parseAndAnalyze(expCtx, spec, ProjectionPolicies{});
return buildProjectionExecutor(
expCtx, &projection, ProjectionPolicies{}, kDefaultBuilderParams);
}
// Builds an ExclusionProjectionExecutor through the standard builder path.
auto makeExclusionExecutor(const boost::intrusive_ptr<ExpressionContextForTest>& expCtx,
const BSONObj& spec) {
ProjectionPolicies policies;
auto projection = projection_ast::parseAndAnalyze(expCtx, spec, policies);
return buildProjectionExecutor(expCtx, &projection, policies, kDefaultBuilderParams);
}
// ---------------------------------------------------------------------------
// Pre-computed hash reuse — correctness of inclusion/exclusion projections.
// ---------------------------------------------------------------------------
// Inclusion projection with several fields exercises the path where hashed_key is computed
// once and reused for both the _projectedFieldsSet and _children lookups.
TEST(ProjectionNodeHashReuse, InclusionProjectionRetainsCorrectFields) {
auto expCtx = makeExpCtx();
auto executor =
makeInclusionExecutor(expCtx, BSON("_id" << 0 << "a" << 1 << "b" << 1 << "c" << 1));
auto result = executor->applyTransformation(Document{{"a", 1}, {"b", 2}, {"c", 3}, {"d", 4}});
ASSERT_DOCUMENT_EQ(result, (Document{{"a", 1}, {"b", 2}, {"c", 3}}));
}
// Fields absent from the projection spec should be excluded.
TEST(ProjectionNodeHashReuse, InclusionProjectionExcludesNonProjectedFields) {
auto expCtx = makeExpCtx();
auto executor = makeInclusionExecutor(expCtx, BSON("_id" << 0 << "x" << 1));
auto result = executor->applyTransformation(
Document{{"x", 10}, {"y", 20}, {"z", 30}, {"w", 40}, {"v", 50}});
ASSERT_DOCUMENT_EQ(result, (Document{{"x", 10}}));
}
// Exclusion projection: fields in the spec are removed, others are kept.
TEST(ProjectionNodeHashReuse, ExclusionProjectionRemovesSpecifiedFields) {
auto expCtx = makeExpCtx();
auto executor = makeExclusionExecutor(expCtx, BSON("d" << 0 << "e" << 0));
auto result =
executor->applyTransformation(Document{{"a", 1}, {"b", 2}, {"c", 3}, {"d", 4}, {"e", 5}});
ASSERT_DOCUMENT_EQ(result, (Document{{"a", 1}, {"b", 2}, {"c", 3}}));
}
// ---------------------------------------------------------------------------
// Single-lookup getExpressionForPath.
// ---------------------------------------------------------------------------
// getExpressionForPath should return the stored expression without requiring a second lookup.
TEST(ProjectionNodeGetExpression, ReturnsExpressionForTopLevelPath) {
auto expCtx = makeExpCtx();
auto executor = AddFieldsProjectionExecutor::create(expCtx, BSON("computed" << "$src"));
const auto& root = executor->getRoot();
auto expr = root.getExpressionForPath(FieldPath("computed"));
ASSERT_TRUE(expr != nullptr);
// Verify the expression evaluates correctly on a sample document.
auto result =
expr->evaluate(Document{{"src", Value{99}}}, &expr->getExpressionContext()->variables);
ASSERT_VALUE_EQ(result, Value{99});
}
// Querying a path that does not exist should return nullptr without crashing.
TEST(ProjectionNodeGetExpression, ReturnsNullForMissingPath) {
auto expCtx = makeExpCtx();
auto executor = AddFieldsProjectionExecutor::create(expCtx, BSON("a" << "$x"));
const auto& root = executor->getRoot();
ASSERT_TRUE(root.getExpressionForPath(FieldPath("b")) == nullptr);
}
// For a nested path, the expression should be found at the correct depth.
TEST(ProjectionNodeGetExpression, ReturnsExpressionForNestedPath) {
auto expCtx = makeExpCtx();
auto executor = AddFieldsProjectionExecutor::create(expCtx, BSON("outer.inner" << "$val"));
const auto& root = executor->getRoot();
auto expr = root.getExpressionForPath(FieldPath("outer.inner"));
ASSERT_TRUE(expr != nullptr);
}
// ---------------------------------------------------------------------------
// _maxFieldsToProject early-exit sentinel.
// ---------------------------------------------------------------------------
// An inclusion projection over a wide document should stop reading once all projected
// fields have been found.
TEST(ProjectionNodeMaxFieldsEarlyExit, InclusionProjectionOnWideDocument) {
auto expCtx = makeExpCtx();
auto executor = makeInclusionExecutor(expCtx, BSON("_id" << 0 << "a" << 1 << "b" << 1));
// Document has many extra fields; the projection should only retain a and b.
auto result = executor->applyTransformation(
Document{{"a", 1}, {"b", 2}, {"c", 3}, {"d", 4}, {"e", 5}, {"f", 6}, {"g", 7}});
ASSERT_DOCUMENT_EQ(result, (Document{{"a", 1}, {"b", 2}}));
}
// Early exit must not drop fields that appear before the limit is reached.
TEST(ProjectionNodeMaxFieldsEarlyExit, InclusionProjectionRetainsAllProjectedFields) {
auto expCtx = makeExpCtx();
auto executor = makeInclusionExecutor(
expCtx, BSON("_id" << 0 << "a" << 1 << "b" << 1 << "c" << 1 << "d" << 1));
auto result = executor->applyTransformation(
Document{{"a", 1}, {"b", 2}, {"c", 3}, {"d", 4}, {"extra1", 5}, {"extra2", 6}});
ASSERT_DOCUMENT_EQ(result, (Document{{"a", 1}, {"b", 2}, {"c", 3}, {"d", 4}}));
}
// A projection where some projected fields are absent from the document should still be correct.
TEST(ProjectionNodeMaxFieldsEarlyExit, InclusionProjectionWithMissingFields) {
auto expCtx = makeExpCtx();
auto executor = makeInclusionExecutor(expCtx, BSON("_id" << 0 << "a" << 1 << "missing" << 1));
auto result = executor->applyTransformation(Document{{"a", 1}, {"b", 2}, {"c", 3}});
ASSERT_DOCUMENT_EQ(result, (Document{{"a", 1}}));
}
// ---------------------------------------------------------------------------
// Position-based Document access in applyExpressions child dispatch.
// ---------------------------------------------------------------------------
// When a child ProjectionNode has computed fields, the fast path should read and write the
// nested value via position-based access, producing the same result as the fallback path.
TEST(ProjectionNodePositionBasedAccess, NestedComputedFieldFastPath) {
auto expCtx = makeExpCtx();
const BSONObj spec = BSON("outer.inner" << "$src");
const Document inputDoc{{"src", 100}, {"outer", Document{{"inner", 0}, {"other", 1}}}};
// Fallback path (no optimize).
auto execFallback = AddFieldsProjectionExecutor::create(expCtx, spec);
auto resultFallback = execFallback->applyProjection(inputDoc);
// Fast path (with optimize, builds _orderedAdditions with child entries).
auto execFast = AddFieldsProjectionExecutor::create(expCtx, spec);
execFast->optimize();
auto resultFast = execFast->applyProjection(inputDoc);
ASSERT_DOCUMENT_EQ(resultFallback, resultFast);
// The inner field should be set to 100 (from $src).
ASSERT_VALUE_EQ(resultFast["outer"]["inner"], Value{100});
}
// Nested field computation when the outer field does not yet exist in the document.
TEST(ProjectionNodePositionBasedAccess, NestedComputedFieldOnMissingParent) {
auto expCtx = makeExpCtx();
const BSONObj spec = BSON("newNested.value" << "$x");
const Document inputDoc{{"x", 7}};
auto execFallback = AddFieldsProjectionExecutor::create(expCtx, spec);
auto resultFallback = execFallback->applyProjection(inputDoc);
auto execFast = AddFieldsProjectionExecutor::create(expCtx, spec);
execFast->optimize();
auto resultFast = execFast->applyProjection(inputDoc);
ASSERT_DOCUMENT_EQ(resultFallback, resultFast);
ASSERT_VALUE_EQ(resultFast["newNested"]["value"], Value{7});
}
// Multiple nested computed fields on the same parent exercise the child path in the dispatch
// table with multiple siblings.
TEST(ProjectionNodePositionBasedAccess, MultipleNestedComputedFieldsSiblings) {
auto expCtx = makeExpCtx();
const BSONObj spec = BSON("obj.p" << "$a"
<< "obj.q"
<< "$b");
const Document inputDoc{{"a", 10}, {"b", 20}};
auto execFallback = AddFieldsProjectionExecutor::create(expCtx, spec);
auto resultFallback = execFallback->applyProjection(inputDoc);
auto execFast = AddFieldsProjectionExecutor::create(expCtx, spec);
execFast->optimize();
auto resultFast = execFast->applyProjection(inputDoc);
ASSERT_DOCUMENT_EQ(resultFallback, resultFast);
ASSERT_VALUE_EQ(resultFast["obj"]["p"], Value{10});
ASSERT_VALUE_EQ(resultFast["obj"]["q"], Value{20});
}
} // namespace
} // namespace mongo::projection_executor