diff --git a/src/mongo/db/timeseries/bucket_catalog/flat_bson.cpp b/src/mongo/db/timeseries/bucket_catalog/flat_bson.cpp index 70e77592d8b..383f4b552e8 100644 --- a/src/mongo/db/timeseries/bucket_catalog/flat_bson.cpp +++ b/src/mongo/db/timeseries/bucket_catalog/flat_bson.cpp @@ -271,9 +271,14 @@ typename FlatBSONStore::Iterator FlatBSONStore:: auto it = begin(); auto itEnd = end(); for (; it != itEnd; ++it) { - _pos->_fieldNameToIndex->try_emplace( - make_tracked_string(_trackingContext, it->fieldName().data(), it->fieldName().size()), - it._pos->_offsetParent); + uassert(12602100, + "Duplicate field names cannot be present in the same FlatBSON object", + _pos->_fieldNameToIndex + ->try_emplace(make_tracked_string(_trackingContext, + it->fieldName().data(), + it->fieldName().size()), + it._pos->_offsetParent) + .second); } // Retry the search now when the map is created. @@ -303,11 +308,14 @@ FlatBSONStore::Obj::insert(FlatBSONStore::Iterat // Also store our offset in the fast lookup map if it is available. if (_pos->_fieldNameToIndex) { - _pos->_fieldNameToIndex->try_emplace( - make_tracked_string(_trackingContext, - inserted->_element.fieldName().data(), - inserted->_element.fieldName().size()), - inserted->_offsetParent); + uassert(12602101, + "Duplicate field names cannot be present in the same FlatBSON object", + _pos->_fieldNameToIndex + ->try_emplace(make_tracked_string(_trackingContext, + inserted->_element.fieldName().data(), + inserted->_element.fieldName().size()), + inserted->_offsetParent) + .second); } // We need to traverse the hiearchy up to the root and modify stored offsets to account for diff --git a/src/mongo/db/timeseries/bucket_catalog/measurement_map.cpp b/src/mongo/db/timeseries/bucket_catalog/measurement_map.cpp index 4fb72412157..363052a50e4 100644 --- a/src/mongo/db/timeseries/bucket_catalog/measurement_map.cpp +++ b/src/mongo/db/timeseries/bucket_catalog/measurement_map.cpp @@ -129,7 +129,11 @@ void MeasurementMap::insertOne(const std::vector& oneMeasurementDat for (const auto& elem : oneMeasurementDataFields) { StringData key = elem.fieldNameStringData(); - fieldsSeen.insert(key); + + uassert(12602102, + "Measurements with duplicate field names cannot be stored in timeseries " + "collections", + fieldsSeen.insert(key).second); auto builderIt = _builders.find(key); if (builderIt == _builders.end()) { diff --git a/src/mongo/db/timeseries/bucket_catalog/measurement_map_test.cpp b/src/mongo/db/timeseries/bucket_catalog/measurement_map_test.cpp index 85b84b1cc4e..f59edd46c46 100644 --- a/src/mongo/db/timeseries/bucket_catalog/measurement_map_test.cpp +++ b/src/mongo/db/timeseries/bucket_catalog/measurement_map_test.cpp @@ -231,6 +231,27 @@ TEST_F(MeasurementMapTest, InitBuilders) { invariant(measurementMap.numFields() == 3); } +TEST_F(MeasurementMapTest, DuplicateFieldNameThrows) { + BSONObjBuilder builder; + builder.append("a", 1); + builder.append("a", 2); + + ASSERT_THROWS(measurementMap.insertOne(genMeasurementFieldsFromObj(builder.obj())), + AssertionException); +} + +TEST_F(MeasurementMapTest, DuplicateFieldNameInSubsequentThrows) { + const BSONObj m = BSON("a" << 1); + measurementMap.insertOne(genMeasurementFieldsFromObj(m)); + + BSONObjBuilder builder; + builder.append("a", 2); + builder.append("a", 3); + + ASSERT_THROWS(measurementMap.insertOne(genMeasurementFieldsFromObj(builder.obj())), + AssertionException); +} + DEATH_TEST_REGEX_F(MeasurementMapTest, GetTimeForNonexistentField, "Invariant failure.*") { measurementMap.timeOfLastMeasurement("time"); } diff --git a/src/mongo/db/timeseries/bucket_catalog/minmax_test.cpp b/src/mongo/db/timeseries/bucket_catalog/minmax_test.cpp index 56f7f18a075..cd97fab3215 100644 --- a/src/mongo/db/timeseries/bucket_catalog/minmax_test.cpp +++ b/src/mongo/db/timeseries/bucket_catalog/minmax_test.cpp @@ -308,5 +308,38 @@ TEST(MinMax, SearchLookupMap) { ASSERT_EQ(obj.search(obj.begin(), "50")->fieldName(), "50"); } +TEST(MinMax, DuplicateFieldNamesWithLookupMap) { + TrackingContext trackingContext; + MinMaxStore minmax{trackingContext}; + auto obj = minmax.root(); + + // Insert 12 (kMaxLinearSearchLength) distinct fields ("0".."11") followed by two duplicate "a" + // entries. This will trigger the lookup map internally in flat_bson. + for (int i = 0; i < 12; ++i) { + obj.insert(obj.end(), std::to_string(i)); + } + obj.insert(obj.end(), "a"); + obj.insert(obj.end(), "a"); + + // Try to search for "a", this will trigger the lookup map internally in flat_bson as we fail to + // find it within 'kMaxLinearSearchLength' attempts. The map cannot contain duplicates so this + // search is well defined and throws. + ASSERT_THROWS(obj.search(obj.begin(), "a"), AssertionException); + + // Try to insert another duplicate which will throw earlier as the lookup map exists and needs + // to be maintained. + obj.insert(obj.begin(), "x"); + ASSERT_THROWS(obj.insert(obj.begin(), "x"), AssertionException); + + // Searching for "a" or "x" is possible as we inserted one of them into the map. + auto found = obj.search(obj.begin(), "a"); + ASSERT(found != obj.end()); + ASSERT_EQ(found->fieldName(), "a"); + + found = obj.search(obj.begin(), "x"); + ASSERT(found != obj.end()); + ASSERT_EQ(found->fieldName(), "x"); +} + } // namespace } // namespace mongo::timeseries::bucket_catalog