From ec47150c40063a5b5ddb7cc31a1f6e3db34ced26 Mon Sep 17 00:00:00 2001 From: henrikedin Date: Tue, 26 May 2026 09:01:29 -0400 Subject: [PATCH] SERVER-126021 Prohibit measurements with duplicate field names in timeseries collections (#53223) GitOrigin-RevId: b044bfbe6a297aa52182fc347310e14851971192 --- .../timeseries/bucket_catalog/flat_bson.cpp | 24 +++++++++----- .../bucket_catalog/measurement_map.cpp | 12 +++++-- .../bucket_catalog/measurement_map_test.cpp | 21 ++++++++++++ .../timeseries/bucket_catalog/minmax_test.cpp | 33 +++++++++++++++++++ 4 files changed, 80 insertions(+), 10 deletions(-) diff --git a/src/mongo/db/timeseries/bucket_catalog/flat_bson.cpp b/src/mongo/db/timeseries/bucket_catalog/flat_bson.cpp index d75c483a0ca..c711ec764c2 100644 --- a/src/mongo/db/timeseries/bucket_catalog/flat_bson.cpp +++ b/src/mongo/db/timeseries/bucket_catalog/flat_bson.cpp @@ -273,9 +273,14 @@ typename FlatBSONStore::Iterator FlatBSONStore:: auto it = begin(); auto itEnd = end(); for (; it != itEnd; ++it) { - _pos->_fieldNameToIndex->try_emplace( - tracking::make_string(_trackingContext, it->fieldName().data(), it->fieldName().size()), - it._pos->_offsetParent); + uassert(12602100, + "Duplicate field names cannot be present in the same FlatBSON object", + _pos->_fieldNameToIndex + ->try_emplace(tracking::make_string(_trackingContext, + it->fieldName().data(), + it->fieldName().size()), + it._pos->_offsetParent) + .second); } // Retry the search now when the map is created. @@ -302,11 +307,14 @@ FlatBSONStore::Obj::insert(FlatBSONStore::Iterat // Also store our offset in the fast lookup map if it is available. if (_pos->_fieldNameToIndex) { - _pos->_fieldNameToIndex->try_emplace( - tracking::make_string(_trackingContext, - inserted->_element.fieldName().data(), - inserted->_element.fieldName().size()), - inserted->_offsetParent); + uassert(12602101, + "Duplicate field names cannot be present in the same FlatBSON object", + _pos->_fieldNameToIndex + ->try_emplace(tracking::make_string(_trackingContext, + inserted->_element.fieldName().data(), + inserted->_element.fieldName().size()), + inserted->_offsetParent) + .second); } // We need to traverse the hiearchy up to the root and modify stored offsets to account for diff --git a/src/mongo/db/timeseries/bucket_catalog/measurement_map.cpp b/src/mongo/db/timeseries/bucket_catalog/measurement_map.cpp index b38ab2adf2d..b578cfff462 100644 --- a/src/mongo/db/timeseries/bucket_catalog/measurement_map.cpp +++ b/src/mongo/db/timeseries/bucket_catalog/measurement_map.cpp @@ -117,6 +117,7 @@ void MeasurementMap::_insertNewKey(StringData key, const BSONElement& elem, size } void MeasurementMap::insertOne(const BSONObj& measurement, boost::optional metaField) { + // First pass to append all elements present in this measurement. for (const auto& elem : measurement) { StringData key = elem.fieldNameStringData(); // Skip the meta field values because they aren't stored in a BSONColumn. @@ -129,11 +130,18 @@ void MeasurementMap::insertOne(const BSONObj& measurement, boost::optionalsecond.builder.append(elem); - ++builderIt->second.count; + + uassert(12602102, + "Measurements with duplicate field names cannot be stored in timeseries " + "collections", + builderIt->second.count++ <= _measurementCount); } } - // Increment our total measurement count + + // Increment our total measurement count. Needs to be after the first pass above so that + // builders for new keys are initialized with the correct count. ++_measurementCount; + // Perform a second pass over our builders and perform a skip for the ones that did not get an // element appended to them in the first pass above. for (auto&& entry : _builders) { diff --git a/src/mongo/db/timeseries/bucket_catalog/measurement_map_test.cpp b/src/mongo/db/timeseries/bucket_catalog/measurement_map_test.cpp index 83f4af81e74..b6a2547d805 100644 --- a/src/mongo/db/timeseries/bucket_catalog/measurement_map_test.cpp +++ b/src/mongo/db/timeseries/bucket_catalog/measurement_map_test.cpp @@ -263,6 +263,27 @@ TEST_F(MeasurementMapTest, InitBuilders) { invariant(measurementMap.numFields() == 3); } +TEST_F(MeasurementMapTest, DuplicateFieldNameThrows) { + BSONObjBuilder builder; + builder.append("a", 1); + builder.append("a", 2); + + ASSERT_THROWS_CODE( + measurementMap.insertOne(builder.obj(), /*metaField=*/boost::none), DBException, 12602102); +} + +TEST_F(MeasurementMapTest, DuplicateFieldNameInSubsequentThrows) { + const BSONObj m = BSON("a" << 1); + measurementMap.insertOne(m, /*metaField=*/boost::none); + + BSONObjBuilder builder; + builder.append("a", 2); + builder.append("a", 3); + + ASSERT_THROWS_CODE( + measurementMap.insertOne(builder.obj(), /*metaField=*/boost::none), DBException, 12602102); +} + using MeasurementMapTestDeathTest = MeasurementMapTest; DEATH_TEST_REGEX_F(MeasurementMapTestDeathTest, GetTimeForNonexistentField, "Invariant failure.*") { measurementMap.timeOfLastMeasurement(_timeField); diff --git a/src/mongo/db/timeseries/bucket_catalog/minmax_test.cpp b/src/mongo/db/timeseries/bucket_catalog/minmax_test.cpp index 8a142dfa396..e0616905bb2 100644 --- a/src/mongo/db/timeseries/bucket_catalog/minmax_test.cpp +++ b/src/mongo/db/timeseries/bucket_catalog/minmax_test.cpp @@ -305,5 +305,38 @@ TEST(MinMax, SearchLookupMap) { EXPECT_EQ(obj.search(obj.begin(), "50")->fieldName(), "50"); } +TEST(MinMax, DuplicateFieldNamesWithLookupMap) { + tracking::Context trackingContext; + MinMaxStore minmax{trackingContext}; + auto obj = minmax.root(); + + // Insert 12 (kMaxLinearSearchLength) distinct fields ("0".."11") followed by two duplicate "a" + // entries. This will trigger the lookup map internally in flat_bson. + for (int i = 0; i < 12; ++i) { + obj.insert(obj.end(), std::to_string(i)); + } + obj.insert(obj.end(), "a"); + obj.insert(obj.end(), "a"); + + // Try to search for "a", this will trigger the lookup map internally in flat_bson as we fail to + // find it within 'kMaxLinearSearchLength' attempts. The map cannot contain duplicates so this + // search is well defined and throws. + ASSERT_THROWS(obj.search(obj.begin(), "a"), AssertionException); + + // Try to insert another duplicate which will throw earlier as the lookup map exists and needs + // to be maintained. + obj.insert(obj.begin(), "x"); + ASSERT_THROWS(obj.insert(obj.begin(), "x"), AssertionException); + + // Searching for "a" or "x" is possible as we inserted one of them into the map. + auto found = obj.search(obj.begin(), "a"); + ASSERT(found != obj.end()); + ASSERT_EQ(found->fieldName(), "a"); + + found = obj.search(obj.begin(), "x"); + ASSERT(found != obj.end()); + ASSERT_EQ(found->fieldName(), "x"); +} + } // namespace } // namespace mongo::timeseries::bucket_catalog