SERVER-126021 Prohibit measurements with duplicate field names in timeseries collections (#53223)

GitOrigin-RevId: b044bfbe6a297aa52182fc347310e14851971192
This commit is contained in:
henrikedin 2026-05-26 09:01:29 -04:00 committed by MongoDB Bot
parent e1986470a0
commit ec47150c40
4 changed files with 80 additions and 10 deletions

View File

@ -273,9 +273,14 @@ typename FlatBSONStore<Element, Value>::Iterator FlatBSONStore<Element, Value>::
auto it = begin();
auto itEnd = end();
for (; it != itEnd; ++it) {
_pos->_fieldNameToIndex->try_emplace(
tracking::make_string(_trackingContext, it->fieldName().data(), it->fieldName().size()),
it._pos->_offsetParent);
uassert(12602100,
"Duplicate field names cannot be present in the same FlatBSON object",
_pos->_fieldNameToIndex
->try_emplace(tracking::make_string(_trackingContext,
it->fieldName().data(),
it->fieldName().size()),
it._pos->_offsetParent)
.second);
}
// Retry the search now when the map is created.
@ -302,11 +307,14 @@ FlatBSONStore<Element, Value>::Obj::insert(FlatBSONStore<Element, Value>::Iterat
// Also store our offset in the fast lookup map if it is available.
if (_pos->_fieldNameToIndex) {
_pos->_fieldNameToIndex->try_emplace(
tracking::make_string(_trackingContext,
inserted->_element.fieldName().data(),
inserted->_element.fieldName().size()),
inserted->_offsetParent);
uassert(12602101,
"Duplicate field names cannot be present in the same FlatBSON object",
_pos->_fieldNameToIndex
->try_emplace(tracking::make_string(_trackingContext,
inserted->_element.fieldName().data(),
inserted->_element.fieldName().size()),
inserted->_offsetParent)
.second);
}
// We need to traverse the hiearchy up to the root and modify stored offsets to account for

View File

@ -117,6 +117,7 @@ void MeasurementMap::_insertNewKey(StringData key, const BSONElement& elem, size
}
void MeasurementMap::insertOne(const BSONObj& measurement, boost::optional<StringData> metaField) {
// First pass to append all elements present in this measurement.
for (const auto& elem : measurement) {
StringData key = elem.fieldNameStringData();
// Skip the meta field values because they aren't stored in a BSONColumn.
@ -129,11 +130,18 @@ void MeasurementMap::insertOne(const BSONObj& measurement, boost::optional<Strin
_insertNewKey(key, elem, _measurementCount);
} else {
builderIt->second.builder.append(elem);
++builderIt->second.count;
uassert(12602102,
"Measurements with duplicate field names cannot be stored in timeseries "
"collections",
builderIt->second.count++ <= _measurementCount);
}
}
// Increment our total measurement count
// Increment our total measurement count. Needs to be after the first pass above so that
// builders for new keys are initialized with the correct count.
++_measurementCount;
// Perform a second pass over our builders and perform a skip for the ones that did not get an
// element appended to them in the first pass above.
for (auto&& entry : _builders) {

View File

@ -263,6 +263,27 @@ TEST_F(MeasurementMapTest, InitBuilders) {
invariant(measurementMap.numFields() == 3);
}
TEST_F(MeasurementMapTest, DuplicateFieldNameThrows) {
BSONObjBuilder builder;
builder.append("a", 1);
builder.append("a", 2);
ASSERT_THROWS_CODE(
measurementMap.insertOne(builder.obj(), /*metaField=*/boost::none), DBException, 12602102);
}
TEST_F(MeasurementMapTest, DuplicateFieldNameInSubsequentThrows) {
const BSONObj m = BSON("a" << 1);
measurementMap.insertOne(m, /*metaField=*/boost::none);
BSONObjBuilder builder;
builder.append("a", 2);
builder.append("a", 3);
ASSERT_THROWS_CODE(
measurementMap.insertOne(builder.obj(), /*metaField=*/boost::none), DBException, 12602102);
}
using MeasurementMapTestDeathTest = MeasurementMapTest;
DEATH_TEST_REGEX_F(MeasurementMapTestDeathTest, GetTimeForNonexistentField, "Invariant failure.*") {
measurementMap.timeOfLastMeasurement(_timeField);

View File

@ -305,5 +305,38 @@ TEST(MinMax, SearchLookupMap) {
EXPECT_EQ(obj.search(obj.begin(), "50")->fieldName(), "50");
}
TEST(MinMax, DuplicateFieldNamesWithLookupMap) {
tracking::Context trackingContext;
MinMaxStore minmax{trackingContext};
auto obj = minmax.root();
// Insert 12 (kMaxLinearSearchLength) distinct fields ("0".."11") followed by two duplicate "a"
// entries. This will trigger the lookup map internally in flat_bson.
for (int i = 0; i < 12; ++i) {
obj.insert(obj.end(), std::to_string(i));
}
obj.insert(obj.end(), "a");
obj.insert(obj.end(), "a");
// Try to search for "a", this will trigger the lookup map internally in flat_bson as we fail to
// find it within 'kMaxLinearSearchLength' attempts. The map cannot contain duplicates so this
// search is well defined and throws.
ASSERT_THROWS(obj.search(obj.begin(), "a"), AssertionException);
// Try to insert another duplicate which will throw earlier as the lookup map exists and needs
// to be maintained.
obj.insert(obj.begin(), "x");
ASSERT_THROWS(obj.insert(obj.begin(), "x"), AssertionException);
// Searching for "a" or "x" is possible as we inserted one of them into the map.
auto found = obj.search(obj.begin(), "a");
ASSERT(found != obj.end());
ASSERT_EQ(found->fieldName(), "a");
found = obj.search(obj.begin(), "x");
ASSERT(found != obj.end());
ASSERT_EQ(found->fieldName(), "x");
}
} // namespace
} // namespace mongo::timeseries::bucket_catalog