diff --git a/.bazelrc.sync b/.bazelrc.sync new file mode 100644 index 00000000000..e69de29bb2d diff --git a/etc/backports_required_for_multiversion_tests.yml b/etc/backports_required_for_multiversion_tests.yml index 30350c40c4c..b6e6987a9e7 100644 --- a/etc/backports_required_for_multiversion_tests.yml +++ b/etc/backports_required_for_multiversion_tests.yml @@ -476,6 +476,8 @@ last-continuous: ticket: SERVER-95570 - test_file: jstests/core/timeseries/query/timeseries_hybrid_search_disallowed.js ticket: SERVER-82020 + - test_file: jstests/aggregation/expressions/reduce_overflow.js + ticket: SERVER-102364 suites: null last-lts: all: @@ -1007,4 +1009,6 @@ last-lts: ticket: SERVER-108341 - test_file: jstests/concurrency/fsm_workloads/query/remove/update_and_batched_delete.js ticket: SERVER-95570 + - test_file: jstests/aggregation/expressions/reduce_overflow.js + ticket: SERVER-102364 suites: null diff --git a/jstests/aggregation/expressions/reduce_overflow.js b/jstests/aggregation/expressions/reduce_overflow.js new file mode 100644 index 00000000000..a6d4387c8b6 --- /dev/null +++ b/jstests/aggregation/expressions/reduce_overflow.js @@ -0,0 +1,51 @@ +/** + * Verify the server does not crash when $reduce creates a deeply nested intermediate document. + */ +const coll = db[jsTestName()]; + +let seenSuccess = false; +let seenOverflow = false; +let recursiveObject = {"a": "$$value.array"}; +let depth = 0; +for (let recursiveObjectDepth = 10; recursiveObjectDepth < 150; recursiveObjectDepth *= 2) { + while (depth < recursiveObjectDepth) { + recursiveObject = {"a": recursiveObject}; + depth = depth + 1; + } + + const pipeline = [ + {"$group": {"_id": null, "entries": {"$push": "$value"}}}, + { + "$project": { + "filtered": { + "$reduce": { + "input": "$entries", + "initialValue": {"array": []}, + "in": {"array": [recursiveObject]} + } + } + } + } + ]; + + for (let numDocs = 10; numDocs < 500; numDocs *= 2) { + coll.drop(); + const bulk = coll.initializeUnorderedBulkOp(); + for (let i = 0; i < numDocs; i++) { + bulk.insert({"value": 0}); + } + assert.commandWorked(bulk.execute()); + try { + coll.aggregate(pipeline); + jsTestLog("Pipeline succeeded"); + seenSuccess = true; + assert(!seenOverflow); + } catch (error) { + assert(seenSuccess); + assert(error.code === ErrorCodes.Overflow, error); + jsTestLog("Pipeline exceeded max BSON depth", numDocs, recursiveObjectDepth); + seenOverflow = true; + } + } +} +assert(seenOverflow, "expected test to trigger overflow case"); diff --git a/src/mongo/db/exec/document_value/document_value_test.cpp b/src/mongo/db/exec/document_value/document_value_test.cpp index e79f5690627..e08595a4831 100644 --- a/src/mongo/db/exec/document_value/document_value_test.cpp +++ b/src/mongo/db/exec/document_value/document_value_test.cpp @@ -195,6 +195,77 @@ TEST(DocumentSerialization, CannotSerializeDocumentThatExceedsDepthLimit) { throwaway.abandon(); } +TEST(DocumentDepthCalculations, Sanity) { + { + // A scalar has depth 0. + ASSERT_EQ(0, Value(1).depth(BSONDepth::getMaxAllowableDepth())); + } + { + // Nesting documents increments depth. + int32_t initialDepth = 1; + MutableDocument md; + md.addField("a", Value(1)); + Document doc(md.freeze()); + Value val(doc); + int32_t iters = 16; + ASSERT_EQ(initialDepth, val.depth(BSONDepth::getMaxAllowableDepth())); + for (int32_t idx = 0; idx < iters; ++idx) { + MutableDocument md; + md.addField("a", Value(doc)); + doc = md.freeze(); + Value val(doc); + ASSERT_EQ(idx + initialDepth + 1, val.depth(BSONDepth::getMaxAllowableDepth())); + } + } + { + // Simple document with no nested paths has depth 1. + Value val(BSON("a" << 1)); + ASSERT_EQ(1, val.depth(BSONDepth::getMaxAllowableDepth())); + } + { + // Depth is max of children. + BSONObj bson = BSON("a" << 1 << "b" << BSON("c" << 1)); + Document document = fromBson(bson); + Value val(document); + ASSERT_EQ(2, val.depth(BSONDepth::getMaxAllowableDepth())); + } + { + // Arrays increment depth. + BSONObj bson = BSON("a" << BSON_ARRAY(1 << 1)); + Value val(fromBson(bson)); + ASSERT_EQ(2, val.depth(BSONDepth::getMaxAllowableDepth())); + } + { + // Array length does not affect depth. + BSONObj bson = BSON("a" << BSON_ARRAY(1 << 1)); + BSONObj bson2 = BSON("a" << BSON_ARRAY(1 << 1 << 1)); + Value val(fromBson(bson)); + Value val2(fromBson(bson2)); + ASSERT_EQ(val.depth(BSONDepth::getMaxAllowableDepth()), + val2.depth(BSONDepth::getMaxAllowableDepth())); + } + { + // Nested arrays increment depth. + BSONObj bson = BSON("a" << BSON_ARRAY(1 << BSON_ARRAY(1 << 1))); + Value val(fromBson(bson)); + ASSERT_EQ(3, val.depth(BSONDepth::getMaxAllowableDepth())); + } + { + // If maxDepth at least document depth, this function returns -1. + BSONObj bson = BSON("a" << 1 << "b" << BSON("c" << 1)); + Document document = fromBson(bson); + Value val(document); + int32_t depth = 2; + for (int32_t maxDepth = 0; maxDepth < 2 * depth; maxDepth++) { + if (maxDepth <= depth) { + ASSERT_EQ(-1, val.depth(maxDepth)); + } else { + ASSERT_EQ(depth, val.depth(maxDepth)); + } + } + } +} + TEST(DocumentGetFieldNonCaching, UncachedTopLevelFields) { BSONObj bson = BSON("scalar" << 1 << "scalar2" << true); Document document = fromBson(bson); diff --git a/src/mongo/db/exec/document_value/value.cpp b/src/mongo/db/exec/document_value/value.cpp index 1d4c05a3e93..0d46e2a4641 100644 --- a/src/mongo/db/exec/document_value/value.cpp +++ b/src/mongo/db/exec/document_value/value.cpp @@ -1192,6 +1192,42 @@ size_t Value::getApproximateSize() const { MONGO_verify(false); } +int32_t Value::depth(int32_t maxDepth, int32_t curDepth /*=0*/) const { + if (curDepth >= maxDepth) { + return -1; + } + int32_t maxChildDepth = -1; + switch (getType()) { + case BSONType::object: { + FieldIterator f(getDocument()); + while (f.more()) { + auto fp = f.next(); + int32_t childDepth = fp.second.depth(maxDepth, curDepth + 1); + if (childDepth == -1) { + return -1; + } + maxChildDepth = std::max(maxChildDepth, childDepth); + } + break; + } + case BSONType::array: { + for (const auto& val : getArray()) { + int32_t childDepth = val.depth(maxDepth, curDepth + 1); + if (childDepth == -1) { + return -1; + } + maxChildDepth = std::max(maxChildDepth, childDepth); + } + break; + } + default: + // No op + break; + } + // Increment depth to account for this level. + return maxChildDepth + 1; +} + string Value::toString() const { // TODO use StringBuilder when operator << is ready stringstream out; diff --git a/src/mongo/db/exec/document_value/value.h b/src/mongo/db/exec/document_value/value.h index ba017c60266..bd12e4d893d 100644 --- a/src/mongo/db/exec/document_value/value.h +++ b/src/mongo/db/exec/document_value/value.h @@ -395,6 +395,12 @@ public: /// Get the approximate memory size of the value, in bytes. Includes sizeof(Value) size_t getApproximateSize() const; + /** + * Returns object/array depth of this value. Returns -1 if the depth is at least 'maxDepth'. + * Returns 0 on a scalar value. + */ + int32_t depth(int32_t maxDepth, int32_t curDepth = 0) const; + /** * Calculate a hash value. * diff --git a/src/mongo/db/exec/expression/evaluate_map_reduce.cpp b/src/mongo/db/exec/expression/evaluate_map_reduce.cpp index 503051f823f..756be6b3660 100644 --- a/src/mongo/db/exec/expression/evaluate_map_reduce.cpp +++ b/src/mongo/db/exec/expression/evaluate_map_reduce.cpp @@ -27,6 +27,7 @@ * it in the license file. */ +#include "mongo/bson/bson_depth.h" #include "mongo/bson/bsontypes.h" #include "mongo/db/curop_failpoint_helpers.h" #include "mongo/db/exec/expression/evaluate.h" @@ -132,6 +133,9 @@ Value evaluate(const ExpressionReduce& expr, const Document& root, Variables* va size_t memLimit = internalQueryMaxMapFilterReduceBytes.load(); Value accumulatedValue = expr.getInitial()->evaluate(root, variables); + size_t itr = 0; + int32_t prevDepth = -1; + size_t interval = expr.getAccumulatedValueDepthCheckInterval(); for (auto&& elem : inputVal.getArray()) { checkForInterrupt(); @@ -139,10 +143,28 @@ Value evaluate(const ExpressionReduce& expr, const Document& root, Variables* va variables->setValue(expr.getValueVar(), accumulatedValue); accumulatedValue = expr.getIn()->evaluate(root, variables); + if ((interval > 0) && (itr % interval) == 0 && + (accumulatedValue.isObject() || accumulatedValue.isArray())) { + int32_t depth = + accumulatedValue.depth(2 * BSONDepth::getMaxAllowableDepth() /*maxDepth*/); + if (MONGO_unlikely(depth == -1)) { + uasserted(ErrorCodes::Overflow, + "$reduce accumulated value exceeded max allowable BSON depth"); + } + // Exponential backoff if depth has not increased. + if (depth == prevDepth) { + tassert(10236400, + "unexpected control flow in $reduce object/array depth verification", + prevDepth != -1); + interval *= 2; + } + prevDepth = depth; + } if (MONGO_unlikely(accumulatedValue.getApproximateSize() > memLimit)) { uasserted(ErrorCodes::ExceededMemoryLimit, "$reduce would use too much memory and cannot spill"); } + itr++; } return accumulatedValue; diff --git a/src/mongo/db/exec/expression_bm_fixture.cpp b/src/mongo/db/exec/expression_bm_fixture.cpp index cb3768ea684..bf83ea91395 100644 --- a/src/mongo/db/exec/expression_bm_fixture.cpp +++ b/src/mongo/db/exec/expression_bm_fixture.cpp @@ -1908,4 +1908,83 @@ void ExpressionBenchmarkFixture::benchmarkAddWithDottedFieldPath(benchmark::Stat {"rhs"_sd, BSON("a" << BSON("b" << BSON("c" << BSON("d" << 20))))}})); } +/** + * Tests performance of $reduce that sums an array. + */ +void ExpressionBenchmarkFixture::benchmarkReduceSum(benchmark::State& state) { + const size_t numEntries = 16 * 1000; + BSONArray entries = rangeBSONArray(0, numEntries); + + BSONObj constantDepthExpr = BSON("$sum" << BSON_ARRAY("$$value" << 1)); + + BSONObj reduceExpression = + BSON("$reduce" << BSON("input" << "$entries" + << "initialValue" << 0 << "in" << constantDepthExpr)); + + benchmarkExpression( + reduceExpression, state, std::vector(1, {{"entries"_sd, entries}})); +} + +/** + * Tests performance of $reduce that's equivalent to the identity function (uses $concatArrays + * to build the same array again). + */ +void ExpressionBenchmarkFixture::benchmarkReduceConcatArrays(benchmark::State& state) { + const size_t numEntries = 1000; + BSONArray entries = rangeBSONArray(0, numEntries); + + BSONArray emptyArray = rangeBSONArray(0, 0); + + BSONObj reduceExpression = + BSON("$reduce" << BSON("input" << "$entries" + << "initialValue" << emptyArray << "in" + << BSON("$concatArrays" + << BSON_ARRAY("$$value" << BSON_ARRAY("$$this"))))); + + benchmarkExpression( + reduceExpression, state, std::vector(1, {{"entries"_sd, entries}})); +} + + +/** + * Tests performance of $reduce that transforms an array into a deeply nested document. + * + * "$reduce": { + * "input": "$entries", + * "initialValue": [], + * "in": {"a": {"a" : {"a": ... {"a": {}}}}} + * } + * + * The nestedness of the "in" expression is configured by 'perIterationNestingDepth'. + */ +void ExpressionBenchmarkFixture::benchmarkReduceCreatingNestedObject( + benchmark::State& state, size_t perIterationNestingDepth) { + const size_t numEntries = 16 * 8 / perIterationNestingDepth; + BSONArray entries = rangeBSONArray(0, numEntries); + BSONObj recursiveObject = BSON("a" << "$$value"); + for (size_t depth = 1; depth < perIterationNestingDepth; depth++) { + recursiveObject = BSON("a" << recursiveObject); + } + BSONArray emptyArray = rangeBSONArray(0, 0); + BSONObj reduceExpression = + BSON("$reduce" << BSON("input" << "$entries" + << "initialValue" << emptyArray << "in" << recursiveObject)); + benchmarkExpression( + reduceExpression, state, std::vector(1, {{"entries"_sd, entries}})); +} + + +void ExpressionBenchmarkFixture::benchmarkReduceCreatingNestedObject1(benchmark::State& state) { + benchmarkReduceCreatingNestedObject(state, 1); +} +void ExpressionBenchmarkFixture::benchmarkReduceCreatingNestedObject2(benchmark::State& state) { + benchmarkReduceCreatingNestedObject(state, 2); +} +void ExpressionBenchmarkFixture::benchmarkReduceCreatingNestedObject4(benchmark::State& state) { + benchmarkReduceCreatingNestedObject(state, 4); +} +void ExpressionBenchmarkFixture::benchmarkReduceCreatingNestedObject8(benchmark::State& state) { + benchmarkReduceCreatingNestedObject(state, 8); +} + } // namespace mongo diff --git a/src/mongo/db/exec/expression_bm_fixture.h b/src/mongo/db/exec/expression_bm_fixture.h index 413813652ad..70037d56b85 100644 --- a/src/mongo/db/exec/expression_bm_fixture.h +++ b/src/mongo/db/exec/expression_bm_fixture.h @@ -281,6 +281,14 @@ public: void benchmarkRegexMatch(benchmark::State& state); void benchmarkAddWithDottedFieldPath(benchmark::State& state); + void benchmarkReduceSum(benchmark::State& state); + void benchmarkReduceConcatArrays(benchmark::State& state); + void benchmarkReduceCreatingNestedObject(benchmark::State& state, + size_t perIterationNestingDepth); + void benchmarkReduceCreatingNestedObject1(benchmark::State& state); + void benchmarkReduceCreatingNestedObject2(benchmark::State& state); + void benchmarkReduceCreatingNestedObject4(benchmark::State& state); + void benchmarkReduceCreatingNestedObject8(benchmark::State& state); private: void testDateDiffExpression(long long startDate, @@ -344,6 +352,30 @@ private: benchmarkArrayFilter10(state); \ } \ \ + BENCHMARK_F(Fixture, ReduceSum)(benchmark::State & state) { \ + benchmarkReduceSum(state); \ + } \ + \ + BENCHMARK_F(Fixture, ReduceConcatArrays)(benchmark::State & state) { \ + benchmarkReduceConcatArrays(state); \ + } \ + \ + BENCHMARK_F(Fixture, ReduceCreatingNestedObject1)(benchmark::State & state) { \ + benchmarkReduceCreatingNestedObject1(state); \ + } \ + \ + BENCHMARK_F(Fixture, ReduceCreatingNestedObject2)(benchmark::State & state) { \ + benchmarkReduceCreatingNestedObject2(state); \ + } \ + \ + BENCHMARK_F(Fixture, ReduceCreatingNestedObject4)(benchmark::State & state) { \ + benchmarkReduceCreatingNestedObject4(state); \ + } \ + \ + BENCHMARK_F(Fixture, ReduceCreatingNestedObject8)(benchmark::State & state) { \ + benchmarkReduceCreatingNestedObject8(state); \ + } \ + \ BENCHMARK_F(Fixture, SortArrayInt)(benchmark::State & state) { \ benchmarkSortArrayInt(state); \ } \ diff --git a/src/mongo/db/pipeline/expression.h b/src/mongo/db/pipeline/expression.h index f0ef9969966..9d50a48ef7f 100644 --- a/src/mongo/db/pipeline/expression.h +++ b/src/mongo/db/pipeline/expression.h @@ -2772,6 +2772,10 @@ public: return _valueVar; } + size_t getAccumulatedValueDepthCheckInterval() const { + return _accumulatedValueDepthCheckInterval; + } + private: static constexpr size_t _kInput = 0; static constexpr size_t _kInitial = 1; @@ -2780,6 +2784,9 @@ private: Variables::Id _thisVar; Variables::Id _valueVar; + const size_t _accumulatedValueDepthCheckInterval = + gInternalReduceAccumulatedValueDepthCheckInterval.load(); + template friend class ExpressionHashVisitor; }; diff --git a/src/mongo/db/query/query_knobs.idl b/src/mongo/db/query/query_knobs.idl index 6631ec542fc..e65234e3000 100644 --- a/src/mongo/db/query/query_knobs.idl +++ b/src/mongo/db/query/query_knobs.idl @@ -1593,6 +1593,19 @@ server_parameters: default: false redact: false on_update: plan_cache_util::clearSbeCacheOnParameterChange + + internalReduceAccumulatedValueDepthCheckInterval: + description: >- + Configures how frequently $reduce checks if its accumulated value has exceeded the maximum + allowable nestedness. Arrays and subdocuments both count. If set to 0, no check is performed. + set_at: [startup, runtime] + cpp_vartype: "AtomicWord" + cpp_varname: gInternalReduceAccumulatedValueDepthCheckInterval + default: 16 + validator: + gte: 0 + lte: 1048576 # 1024 ** 2 + redact: false # Note for adding additional query knobs: # # When adding a new query knob, you should consider whether or not you need to add an 'on_update'