SERVER-126021 Prohibit measurements with duplicate field names in timeseries collections (#53223)
GitOrigin-RevId: b044bfbe6a297aa52182fc347310e14851971192
This commit is contained in:
parent
e1986470a0
commit
ec47150c40
@ -273,9 +273,14 @@ typename FlatBSONStore<Element, Value>::Iterator FlatBSONStore<Element, Value>::
|
||||
auto it = begin();
|
||||
auto itEnd = end();
|
||||
for (; it != itEnd; ++it) {
|
||||
_pos->_fieldNameToIndex->try_emplace(
|
||||
tracking::make_string(_trackingContext, it->fieldName().data(), it->fieldName().size()),
|
||||
it._pos->_offsetParent);
|
||||
uassert(12602100,
|
||||
"Duplicate field names cannot be present in the same FlatBSON object",
|
||||
_pos->_fieldNameToIndex
|
||||
->try_emplace(tracking::make_string(_trackingContext,
|
||||
it->fieldName().data(),
|
||||
it->fieldName().size()),
|
||||
it._pos->_offsetParent)
|
||||
.second);
|
||||
}
|
||||
|
||||
// Retry the search now when the map is created.
|
||||
@ -302,11 +307,14 @@ FlatBSONStore<Element, Value>::Obj::insert(FlatBSONStore<Element, Value>::Iterat
|
||||
|
||||
// Also store our offset in the fast lookup map if it is available.
|
||||
if (_pos->_fieldNameToIndex) {
|
||||
_pos->_fieldNameToIndex->try_emplace(
|
||||
tracking::make_string(_trackingContext,
|
||||
inserted->_element.fieldName().data(),
|
||||
inserted->_element.fieldName().size()),
|
||||
inserted->_offsetParent);
|
||||
uassert(12602101,
|
||||
"Duplicate field names cannot be present in the same FlatBSON object",
|
||||
_pos->_fieldNameToIndex
|
||||
->try_emplace(tracking::make_string(_trackingContext,
|
||||
inserted->_element.fieldName().data(),
|
||||
inserted->_element.fieldName().size()),
|
||||
inserted->_offsetParent)
|
||||
.second);
|
||||
}
|
||||
|
||||
// We need to traverse the hiearchy up to the root and modify stored offsets to account for
|
||||
|
||||
@ -117,6 +117,7 @@ void MeasurementMap::_insertNewKey(StringData key, const BSONElement& elem, size
|
||||
}
|
||||
|
||||
void MeasurementMap::insertOne(const BSONObj& measurement, boost::optional<StringData> metaField) {
|
||||
// First pass to append all elements present in this measurement.
|
||||
for (const auto& elem : measurement) {
|
||||
StringData key = elem.fieldNameStringData();
|
||||
// Skip the meta field values because they aren't stored in a BSONColumn.
|
||||
@ -129,11 +130,18 @@ void MeasurementMap::insertOne(const BSONObj& measurement, boost::optional<Strin
|
||||
_insertNewKey(key, elem, _measurementCount);
|
||||
} else {
|
||||
builderIt->second.builder.append(elem);
|
||||
++builderIt->second.count;
|
||||
|
||||
uassert(12602102,
|
||||
"Measurements with duplicate field names cannot be stored in timeseries "
|
||||
"collections",
|
||||
builderIt->second.count++ <= _measurementCount);
|
||||
}
|
||||
}
|
||||
// Increment our total measurement count
|
||||
|
||||
// Increment our total measurement count. Needs to be after the first pass above so that
|
||||
// builders for new keys are initialized with the correct count.
|
||||
++_measurementCount;
|
||||
|
||||
// Perform a second pass over our builders and perform a skip for the ones that did not get an
|
||||
// element appended to them in the first pass above.
|
||||
for (auto&& entry : _builders) {
|
||||
|
||||
@ -263,6 +263,27 @@ TEST_F(MeasurementMapTest, InitBuilders) {
|
||||
invariant(measurementMap.numFields() == 3);
|
||||
}
|
||||
|
||||
TEST_F(MeasurementMapTest, DuplicateFieldNameThrows) {
|
||||
BSONObjBuilder builder;
|
||||
builder.append("a", 1);
|
||||
builder.append("a", 2);
|
||||
|
||||
ASSERT_THROWS_CODE(
|
||||
measurementMap.insertOne(builder.obj(), /*metaField=*/boost::none), DBException, 12602102);
|
||||
}
|
||||
|
||||
TEST_F(MeasurementMapTest, DuplicateFieldNameInSubsequentThrows) {
|
||||
const BSONObj m = BSON("a" << 1);
|
||||
measurementMap.insertOne(m, /*metaField=*/boost::none);
|
||||
|
||||
BSONObjBuilder builder;
|
||||
builder.append("a", 2);
|
||||
builder.append("a", 3);
|
||||
|
||||
ASSERT_THROWS_CODE(
|
||||
measurementMap.insertOne(builder.obj(), /*metaField=*/boost::none), DBException, 12602102);
|
||||
}
|
||||
|
||||
using MeasurementMapTestDeathTest = MeasurementMapTest;
|
||||
DEATH_TEST_REGEX_F(MeasurementMapTestDeathTest, GetTimeForNonexistentField, "Invariant failure.*") {
|
||||
measurementMap.timeOfLastMeasurement(_timeField);
|
||||
|
||||
@ -305,5 +305,38 @@ TEST(MinMax, SearchLookupMap) {
|
||||
EXPECT_EQ(obj.search(obj.begin(), "50")->fieldName(), "50");
|
||||
}
|
||||
|
||||
TEST(MinMax, DuplicateFieldNamesWithLookupMap) {
|
||||
tracking::Context trackingContext;
|
||||
MinMaxStore minmax{trackingContext};
|
||||
auto obj = minmax.root();
|
||||
|
||||
// Insert 12 (kMaxLinearSearchLength) distinct fields ("0".."11") followed by two duplicate "a"
|
||||
// entries. This will trigger the lookup map internally in flat_bson.
|
||||
for (int i = 0; i < 12; ++i) {
|
||||
obj.insert(obj.end(), std::to_string(i));
|
||||
}
|
||||
obj.insert(obj.end(), "a");
|
||||
obj.insert(obj.end(), "a");
|
||||
|
||||
// Try to search for "a", this will trigger the lookup map internally in flat_bson as we fail to
|
||||
// find it within 'kMaxLinearSearchLength' attempts. The map cannot contain duplicates so this
|
||||
// search is well defined and throws.
|
||||
ASSERT_THROWS(obj.search(obj.begin(), "a"), AssertionException);
|
||||
|
||||
// Try to insert another duplicate which will throw earlier as the lookup map exists and needs
|
||||
// to be maintained.
|
||||
obj.insert(obj.begin(), "x");
|
||||
ASSERT_THROWS(obj.insert(obj.begin(), "x"), AssertionException);
|
||||
|
||||
// Searching for "a" or "x" is possible as we inserted one of them into the map.
|
||||
auto found = obj.search(obj.begin(), "a");
|
||||
ASSERT(found != obj.end());
|
||||
ASSERT_EQ(found->fieldName(), "a");
|
||||
|
||||
found = obj.search(obj.begin(), "x");
|
||||
ASSERT(found != obj.end());
|
||||
ASSERT_EQ(found->fieldName(), "x");
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace mongo::timeseries::bucket_catalog
|
||||
|
||||
Loading…
Reference in New Issue
Block a user