SERVER-102364 Periodically check $reduce accumulated depth less than… (#38333)

Co-authored-by: Mihai Andrei <mihai.andrei@mongodb.com>
GitOrigin-RevId: b61d749d15f49fbb051f9e965ba016101c63f41b
This commit is contained in:
Evan Bergeron 2025-08-06 14:46:37 -04:00 committed by MongoDB Bot
parent b92a12c682
commit 02d07dadb5
11 changed files with 321 additions and 0 deletions

0
.bazelrc.sync Normal file
View File

View File

@ -476,6 +476,8 @@ last-continuous:
ticket: SERVER-95570 ticket: SERVER-95570
- test_file: jstests/core/timeseries/query/timeseries_hybrid_search_disallowed.js - test_file: jstests/core/timeseries/query/timeseries_hybrid_search_disallowed.js
ticket: SERVER-82020 ticket: SERVER-82020
- test_file: jstests/aggregation/expressions/reduce_overflow.js
ticket: SERVER-102364
suites: null suites: null
last-lts: last-lts:
all: all:
@ -1007,4 +1009,6 @@ last-lts:
ticket: SERVER-108341 ticket: SERVER-108341
- test_file: jstests/concurrency/fsm_workloads/query/remove/update_and_batched_delete.js - test_file: jstests/concurrency/fsm_workloads/query/remove/update_and_batched_delete.js
ticket: SERVER-95570 ticket: SERVER-95570
- test_file: jstests/aggregation/expressions/reduce_overflow.js
ticket: SERVER-102364
suites: null suites: null

View File

@ -0,0 +1,51 @@
/**
* Verify the server does not crash when $reduce creates a deeply nested intermediate document.
*/
const coll = db[jsTestName()];
let seenSuccess = false;
let seenOverflow = false;
let recursiveObject = {"a": "$$value.array"};
let depth = 0;
for (let recursiveObjectDepth = 10; recursiveObjectDepth < 150; recursiveObjectDepth *= 2) {
while (depth < recursiveObjectDepth) {
recursiveObject = {"a": recursiveObject};
depth = depth + 1;
}
const pipeline = [
{"$group": {"_id": null, "entries": {"$push": "$value"}}},
{
"$project": {
"filtered": {
"$reduce": {
"input": "$entries",
"initialValue": {"array": []},
"in": {"array": [recursiveObject]}
}
}
}
}
];
for (let numDocs = 10; numDocs < 500; numDocs *= 2) {
coll.drop();
const bulk = coll.initializeUnorderedBulkOp();
for (let i = 0; i < numDocs; i++) {
bulk.insert({"value": 0});
}
assert.commandWorked(bulk.execute());
try {
coll.aggregate(pipeline);
jsTestLog("Pipeline succeeded");
seenSuccess = true;
assert(!seenOverflow);
} catch (error) {
assert(seenSuccess);
assert(error.code === ErrorCodes.Overflow, error);
jsTestLog("Pipeline exceeded max BSON depth", numDocs, recursiveObjectDepth);
seenOverflow = true;
}
}
}
assert(seenOverflow, "expected test to trigger overflow case");

View File

@ -195,6 +195,77 @@ TEST(DocumentSerialization, CannotSerializeDocumentThatExceedsDepthLimit) {
throwaway.abandon(); throwaway.abandon();
} }
TEST(DocumentDepthCalculations, Sanity) {
{
// A scalar has depth 0.
ASSERT_EQ(0, Value(1).depth(BSONDepth::getMaxAllowableDepth()));
}
{
// Nesting documents increments depth.
int32_t initialDepth = 1;
MutableDocument md;
md.addField("a", Value(1));
Document doc(md.freeze());
Value val(doc);
int32_t iters = 16;
ASSERT_EQ(initialDepth, val.depth(BSONDepth::getMaxAllowableDepth()));
for (int32_t idx = 0; idx < iters; ++idx) {
MutableDocument md;
md.addField("a", Value(doc));
doc = md.freeze();
Value val(doc);
ASSERT_EQ(idx + initialDepth + 1, val.depth(BSONDepth::getMaxAllowableDepth()));
}
}
{
// Simple document with no nested paths has depth 1.
Value val(BSON("a" << 1));
ASSERT_EQ(1, val.depth(BSONDepth::getMaxAllowableDepth()));
}
{
// Depth is max of children.
BSONObj bson = BSON("a" << 1 << "b" << BSON("c" << 1));
Document document = fromBson(bson);
Value val(document);
ASSERT_EQ(2, val.depth(BSONDepth::getMaxAllowableDepth()));
}
{
// Arrays increment depth.
BSONObj bson = BSON("a" << BSON_ARRAY(1 << 1));
Value val(fromBson(bson));
ASSERT_EQ(2, val.depth(BSONDepth::getMaxAllowableDepth()));
}
{
// Array length does not affect depth.
BSONObj bson = BSON("a" << BSON_ARRAY(1 << 1));
BSONObj bson2 = BSON("a" << BSON_ARRAY(1 << 1 << 1));
Value val(fromBson(bson));
Value val2(fromBson(bson2));
ASSERT_EQ(val.depth(BSONDepth::getMaxAllowableDepth()),
val2.depth(BSONDepth::getMaxAllowableDepth()));
}
{
// Nested arrays increment depth.
BSONObj bson = BSON("a" << BSON_ARRAY(1 << BSON_ARRAY(1 << 1)));
Value val(fromBson(bson));
ASSERT_EQ(3, val.depth(BSONDepth::getMaxAllowableDepth()));
}
{
// If maxDepth at least document depth, this function returns -1.
BSONObj bson = BSON("a" << 1 << "b" << BSON("c" << 1));
Document document = fromBson(bson);
Value val(document);
int32_t depth = 2;
for (int32_t maxDepth = 0; maxDepth < 2 * depth; maxDepth++) {
if (maxDepth <= depth) {
ASSERT_EQ(-1, val.depth(maxDepth));
} else {
ASSERT_EQ(depth, val.depth(maxDepth));
}
}
}
}
TEST(DocumentGetFieldNonCaching, UncachedTopLevelFields) { TEST(DocumentGetFieldNonCaching, UncachedTopLevelFields) {
BSONObj bson = BSON("scalar" << 1 << "scalar2" << true); BSONObj bson = BSON("scalar" << 1 << "scalar2" << true);
Document document = fromBson(bson); Document document = fromBson(bson);

View File

@ -1192,6 +1192,42 @@ size_t Value::getApproximateSize() const {
MONGO_verify(false); MONGO_verify(false);
} }
int32_t Value::depth(int32_t maxDepth, int32_t curDepth /*=0*/) const {
if (curDepth >= maxDepth) {
return -1;
}
int32_t maxChildDepth = -1;
switch (getType()) {
case BSONType::object: {
FieldIterator f(getDocument());
while (f.more()) {
auto fp = f.next();
int32_t childDepth = fp.second.depth(maxDepth, curDepth + 1);
if (childDepth == -1) {
return -1;
}
maxChildDepth = std::max(maxChildDepth, childDepth);
}
break;
}
case BSONType::array: {
for (const auto& val : getArray()) {
int32_t childDepth = val.depth(maxDepth, curDepth + 1);
if (childDepth == -1) {
return -1;
}
maxChildDepth = std::max(maxChildDepth, childDepth);
}
break;
}
default:
// No op
break;
}
// Increment depth to account for this level.
return maxChildDepth + 1;
}
string Value::toString() const { string Value::toString() const {
// TODO use StringBuilder when operator << is ready // TODO use StringBuilder when operator << is ready
stringstream out; stringstream out;

View File

@ -395,6 +395,12 @@ public:
/// Get the approximate memory size of the value, in bytes. Includes sizeof(Value) /// Get the approximate memory size of the value, in bytes. Includes sizeof(Value)
size_t getApproximateSize() const; size_t getApproximateSize() const;
/**
* Returns object/array depth of this value. Returns -1 if the depth is at least 'maxDepth'.
* Returns 0 on a scalar value.
*/
int32_t depth(int32_t maxDepth, int32_t curDepth = 0) const;
/** /**
* Calculate a hash value. * Calculate a hash value.
* *

View File

@ -27,6 +27,7 @@
* it in the license file. * it in the license file.
*/ */
#include "mongo/bson/bson_depth.h"
#include "mongo/bson/bsontypes.h" #include "mongo/bson/bsontypes.h"
#include "mongo/db/curop_failpoint_helpers.h" #include "mongo/db/curop_failpoint_helpers.h"
#include "mongo/db/exec/expression/evaluate.h" #include "mongo/db/exec/expression/evaluate.h"
@ -132,6 +133,9 @@ Value evaluate(const ExpressionReduce& expr, const Document& root, Variables* va
size_t memLimit = internalQueryMaxMapFilterReduceBytes.load(); size_t memLimit = internalQueryMaxMapFilterReduceBytes.load();
Value accumulatedValue = expr.getInitial()->evaluate(root, variables); Value accumulatedValue = expr.getInitial()->evaluate(root, variables);
size_t itr = 0;
int32_t prevDepth = -1;
size_t interval = expr.getAccumulatedValueDepthCheckInterval();
for (auto&& elem : inputVal.getArray()) { for (auto&& elem : inputVal.getArray()) {
checkForInterrupt(); checkForInterrupt();
@ -139,10 +143,28 @@ Value evaluate(const ExpressionReduce& expr, const Document& root, Variables* va
variables->setValue(expr.getValueVar(), accumulatedValue); variables->setValue(expr.getValueVar(), accumulatedValue);
accumulatedValue = expr.getIn()->evaluate(root, variables); accumulatedValue = expr.getIn()->evaluate(root, variables);
if ((interval > 0) && (itr % interval) == 0 &&
(accumulatedValue.isObject() || accumulatedValue.isArray())) {
int32_t depth =
accumulatedValue.depth(2 * BSONDepth::getMaxAllowableDepth() /*maxDepth*/);
if (MONGO_unlikely(depth == -1)) {
uasserted(ErrorCodes::Overflow,
"$reduce accumulated value exceeded max allowable BSON depth");
}
// Exponential backoff if depth has not increased.
if (depth == prevDepth) {
tassert(10236400,
"unexpected control flow in $reduce object/array depth verification",
prevDepth != -1);
interval *= 2;
}
prevDepth = depth;
}
if (MONGO_unlikely(accumulatedValue.getApproximateSize() > memLimit)) { if (MONGO_unlikely(accumulatedValue.getApproximateSize() > memLimit)) {
uasserted(ErrorCodes::ExceededMemoryLimit, uasserted(ErrorCodes::ExceededMemoryLimit,
"$reduce would use too much memory and cannot spill"); "$reduce would use too much memory and cannot spill");
} }
itr++;
} }
return accumulatedValue; return accumulatedValue;

View File

@ -1908,4 +1908,83 @@ void ExpressionBenchmarkFixture::benchmarkAddWithDottedFieldPath(benchmark::Stat
{"rhs"_sd, BSON("a" << BSON("b" << BSON("c" << BSON("d" << 20))))}})); {"rhs"_sd, BSON("a" << BSON("b" << BSON("c" << BSON("d" << 20))))}}));
} }
/**
* Tests performance of $reduce that sums an array.
*/
void ExpressionBenchmarkFixture::benchmarkReduceSum(benchmark::State& state) {
const size_t numEntries = 16 * 1000;
BSONArray entries = rangeBSONArray(0, numEntries);
BSONObj constantDepthExpr = BSON("$sum" << BSON_ARRAY("$$value" << 1));
BSONObj reduceExpression =
BSON("$reduce" << BSON("input" << "$entries"
<< "initialValue" << 0 << "in" << constantDepthExpr));
benchmarkExpression(
reduceExpression, state, std::vector<Document>(1, {{"entries"_sd, entries}}));
}
/**
* Tests performance of $reduce that's equivalent to the identity function (uses $concatArrays
* to build the same array again).
*/
void ExpressionBenchmarkFixture::benchmarkReduceConcatArrays(benchmark::State& state) {
const size_t numEntries = 1000;
BSONArray entries = rangeBSONArray(0, numEntries);
BSONArray emptyArray = rangeBSONArray(0, 0);
BSONObj reduceExpression =
BSON("$reduce" << BSON("input" << "$entries"
<< "initialValue" << emptyArray << "in"
<< BSON("$concatArrays"
<< BSON_ARRAY("$$value" << BSON_ARRAY("$$this")))));
benchmarkExpression(
reduceExpression, state, std::vector<Document>(1, {{"entries"_sd, entries}}));
}
/**
* Tests performance of $reduce that transforms an array into a deeply nested document.
*
* "$reduce": {
* "input": "$entries",
* "initialValue": [],
* "in": {"a": {"a" : {"a": ... {"a": {}}}}}
* }
*
* The nestedness of the "in" expression is configured by 'perIterationNestingDepth'.
*/
void ExpressionBenchmarkFixture::benchmarkReduceCreatingNestedObject(
benchmark::State& state, size_t perIterationNestingDepth) {
const size_t numEntries = 16 * 8 / perIterationNestingDepth;
BSONArray entries = rangeBSONArray(0, numEntries);
BSONObj recursiveObject = BSON("a" << "$$value");
for (size_t depth = 1; depth < perIterationNestingDepth; depth++) {
recursiveObject = BSON("a" << recursiveObject);
}
BSONArray emptyArray = rangeBSONArray(0, 0);
BSONObj reduceExpression =
BSON("$reduce" << BSON("input" << "$entries"
<< "initialValue" << emptyArray << "in" << recursiveObject));
benchmarkExpression(
reduceExpression, state, std::vector<Document>(1, {{"entries"_sd, entries}}));
}
void ExpressionBenchmarkFixture::benchmarkReduceCreatingNestedObject1(benchmark::State& state) {
benchmarkReduceCreatingNestedObject(state, 1);
}
void ExpressionBenchmarkFixture::benchmarkReduceCreatingNestedObject2(benchmark::State& state) {
benchmarkReduceCreatingNestedObject(state, 2);
}
void ExpressionBenchmarkFixture::benchmarkReduceCreatingNestedObject4(benchmark::State& state) {
benchmarkReduceCreatingNestedObject(state, 4);
}
void ExpressionBenchmarkFixture::benchmarkReduceCreatingNestedObject8(benchmark::State& state) {
benchmarkReduceCreatingNestedObject(state, 8);
}
} // namespace mongo } // namespace mongo

View File

@ -281,6 +281,14 @@ public:
void benchmarkRegexMatch(benchmark::State& state); void benchmarkRegexMatch(benchmark::State& state);
void benchmarkAddWithDottedFieldPath(benchmark::State& state); void benchmarkAddWithDottedFieldPath(benchmark::State& state);
void benchmarkReduceSum(benchmark::State& state);
void benchmarkReduceConcatArrays(benchmark::State& state);
void benchmarkReduceCreatingNestedObject(benchmark::State& state,
size_t perIterationNestingDepth);
void benchmarkReduceCreatingNestedObject1(benchmark::State& state);
void benchmarkReduceCreatingNestedObject2(benchmark::State& state);
void benchmarkReduceCreatingNestedObject4(benchmark::State& state);
void benchmarkReduceCreatingNestedObject8(benchmark::State& state);
private: private:
void testDateDiffExpression(long long startDate, void testDateDiffExpression(long long startDate,
@ -344,6 +352,30 @@ private:
benchmarkArrayFilter10(state); \ benchmarkArrayFilter10(state); \
} \ } \
\ \
BENCHMARK_F(Fixture, ReduceSum)(benchmark::State & state) { \
benchmarkReduceSum(state); \
} \
\
BENCHMARK_F(Fixture, ReduceConcatArrays)(benchmark::State & state) { \
benchmarkReduceConcatArrays(state); \
} \
\
BENCHMARK_F(Fixture, ReduceCreatingNestedObject1)(benchmark::State & state) { \
benchmarkReduceCreatingNestedObject1(state); \
} \
\
BENCHMARK_F(Fixture, ReduceCreatingNestedObject2)(benchmark::State & state) { \
benchmarkReduceCreatingNestedObject2(state); \
} \
\
BENCHMARK_F(Fixture, ReduceCreatingNestedObject4)(benchmark::State & state) { \
benchmarkReduceCreatingNestedObject4(state); \
} \
\
BENCHMARK_F(Fixture, ReduceCreatingNestedObject8)(benchmark::State & state) { \
benchmarkReduceCreatingNestedObject8(state); \
} \
\
BENCHMARK_F(Fixture, SortArrayInt)(benchmark::State & state) { \ BENCHMARK_F(Fixture, SortArrayInt)(benchmark::State & state) { \
benchmarkSortArrayInt(state); \ benchmarkSortArrayInt(state); \
} \ } \

View File

@ -2772,6 +2772,10 @@ public:
return _valueVar; return _valueVar;
} }
size_t getAccumulatedValueDepthCheckInterval() const {
return _accumulatedValueDepthCheckInterval;
}
private: private:
static constexpr size_t _kInput = 0; static constexpr size_t _kInput = 0;
static constexpr size_t _kInitial = 1; static constexpr size_t _kInitial = 1;
@ -2780,6 +2784,9 @@ private:
Variables::Id _thisVar; Variables::Id _thisVar;
Variables::Id _valueVar; Variables::Id _valueVar;
const size_t _accumulatedValueDepthCheckInterval =
gInternalReduceAccumulatedValueDepthCheckInterval.load();
template <typename H> template <typename H>
friend class ExpressionHashVisitor; friend class ExpressionHashVisitor;
}; };

View File

@ -1593,6 +1593,19 @@ server_parameters:
default: false default: false
redact: false redact: false
on_update: plan_cache_util::clearSbeCacheOnParameterChange on_update: plan_cache_util::clearSbeCacheOnParameterChange
internalReduceAccumulatedValueDepthCheckInterval:
description: >-
Configures how frequently $reduce checks if its accumulated value has exceeded the maximum
allowable nestedness. Arrays and subdocuments both count. If set to 0, no check is performed.
set_at: [startup, runtime]
cpp_vartype: "AtomicWord<long long>"
cpp_varname: gInternalReduceAccumulatedValueDepthCheckInterval
default: 16
validator:
gte: 0
lte: 1048576 # 1024 ** 2
redact: false
# Note for adding additional query knobs: # Note for adding additional query knobs:
# #
# When adding a new query knob, you should consider whether or not you need to add an 'on_update' # When adding a new query knob, you should consider whether or not you need to add an 'on_update'