SERVER-112511 Recategorize BSON validate checks by severity (#43270)

GitOrigin-RevId: 78ed3a303139eccd9d673912f527f67af99da0bc
This commit is contained in:
Benjamin Pearce 2025-11-10 13:23:08 -05:00 committed by MongoDB Bot
parent 639b649c9b
commit 5d920814a8
6 changed files with 394 additions and 390 deletions

View File

@ -78,8 +78,8 @@ resetDbpath(dbpath);
res = coll.validate({checkBSONConformance: false});
assert(res.valid, tojson(res));
assert.eq(res.nNonCompliantDocuments, 2);
assert.eq(res.warnings.length, 1);
assert.eq(res.nNonCompliantDocuments, 0);
assert.eq(res.warnings.length, 0);
MongoRunner.stopMongod(mongod, null, {skipValidation: true});
})();
@ -100,8 +100,8 @@ resetDbpath(dbpath);
let res = coll.validate({checkBSONConformance: false});
assert(res.valid, tojson(res));
assert.eq(res.nNonCompliantDocuments, 5);
assert.eq(res.warnings.length, 1);
assert.eq(res.nNonCompliantDocuments, 0);
assert.eq(res.warnings.length, 0);
res = coll.validate({checkBSONConformance: true});
assert(res.valid, tojson(res));
@ -136,8 +136,8 @@ resetDbpath(dbpath);
assert.commandWorked(testColl.insert({"md5Improper": improperMD5}));
res = assert.commandWorked(testColl.validate());
assert(res.valid, tojson(res));
assert.eq(res.nNonCompliantDocuments, 1);
assert.eq(res.warnings.length, 1);
assert.eq(res.nNonCompliantDocuments, 0);
assert.eq(res.warnings.length, 0);
// Tests that calling validate, with BSONConsistencyCheck true, on a collection with an
// improperly sized md5 returns a warning.
@ -145,6 +145,7 @@ resetDbpath(dbpath);
res = assert.commandWorked(testColl.validate({checkBSONConformance: true}));
assert(res.valid, tojson(res));
assert.eq(res.nNonCompliantDocuments, 2);
assert.eq(res.warnings.length, 1);
MongoRunner.stopMongod(mongod, null, {skipValidation: true});
})();
@ -186,8 +187,8 @@ resetDbpath(dbpath);
let res = assert.commandWorked(testColl.validate());
assert(res.valid, tojson(res));
assert.eq(res.nNonCompliantDocuments, 7);
assert.eq(res.warnings.length, 1);
assert.eq(res.nNonCompliantDocuments, 0);
assert.eq(res.warnings.length, 0);
res = assert.commandWorked(testColl.validate({checkBSONConformance: true}));
assert(res.valid, tojson(res));
@ -245,8 +246,8 @@ resetDbpath(dbpath);
let res = assert.commandWorked(testColl.validate());
assert(res.valid, tojson(res));
assert.eq(res.nNonCompliantDocuments, 10);
assert.eq(res.warnings.length, 1);
assert.eq(res.nNonCompliantDocuments, 0);
assert.eq(res.warnings.length, 0);
res = assert.commandWorked(testColl.validate({checkBSONConformance: true}));
assert(res.valid, tojson(res));

View File

@ -233,7 +233,7 @@ function testInvalidUuid() {
// Insert 2 documents with invalid UUID (length is 4 or 20 instead of 16).
assert.commandWorked(primaryDb[collName].insert({u: HexData(4, "deadbeef")}));
assert.commandWorked(primaryDb[collName].insert({u: HexData(20, "deadbeefdeadbeefdeadbeefdeadbeef")}));
assert.commandWorked(primaryDb[collName].insert({u: HexData(4, "deadbeef".repeat(5))}));
replSet.awaitReplication();
runDbCheck(
@ -243,7 +243,7 @@ function testInvalidUuid() {
{
maxDocsPerBatch: maxDocsPerBatch,
validateMode: "dataConsistencyAndMissingIndexKeysCheck",
bsonValidateMode: "kExtended",
bsonValidateMode: "kFull",
},
true /* awaitCompletion */,
);

View File

@ -93,15 +93,24 @@ BSONObj buildSampleObj(uint64_t i) {
// clang-format on
}
BSONObj buildWideObj(uint64_t i, int numFields) {
std::vector<std::variant<int, std::string>> possibleValues;
possibleValues.reserve(6);
BSONObj buildDeepObj(uint64_t i, int numFields, int maxNesting, size_t possibleValueIndex = 0) {
std::vector<std::variant<int, std::string, BSONObj>> possibleValues;
possibleValues.reserve(8);
possibleValues.emplace_back(pseudoRandomAge(i));
possibleValues.emplace_back(static_cast<int>(i));
possibleValues.emplace_back(pseudoRandomZipCode(i));
possibleValues.emplace_back(pseudoRandom7Digits(i));
possibleValues.emplace_back(pseudoRandomPhoneNo(i));
possibleValues.emplace_back(pseudoRandomLongStr(i));
if (maxNesting > 0) {
possibleValues.emplace_back(buildDeepObj(i, numFields, maxNesting - 1, possibleValueIndex));
BSONArrayBuilder arrBuilder;
for (int i = 0; i < 5; ++i) {
arrBuilder.append(buildDeepObj(i, numFields, 0));
}
possibleValues.emplace_back(arrBuilder.arr());
}
BSONObjBuilder builder;
for (int j = 0; j < numFields; ++j) {
@ -111,8 +120,8 @@ BSONObj buildWideObj(uint64_t i, int numFields) {
std::string fieldName = fmt::format("{:08x}", hash);
// Round robin through the list of possible values computed previously, dealing with the
// fact that they may be either int or string.
auto& value = possibleValues[j % possibleValues.size()];
// fact that they may be either int or string or nested object or array.
auto& value = possibleValues[possibleValueIndex++ % possibleValues.size()];
std::visit(
OverloadedVisitor{
[&builder, &fieldName](const auto& v) { builder.append(fieldName, v); },
@ -308,14 +317,15 @@ void BM_validate_contents(benchmark::State& state) {
* The template parameter 'M' specifies the validation mode (default, extended, or full).
*/
template <BSONValidateModeEnum M>
void BM_validateWideObj(benchmark::State& state) {
void BM_validateObj(benchmark::State& state) {
auto arrayLen = state.range(0);
auto numFields = state.range(1);
auto nestingLimit = state.range(2);
BSONArrayBuilder builder;
size_t totalSize = 0;
for (auto i = 0; i < arrayLen; i++) {
builder.append(buildWideObj(i, numFields));
for (auto i = 0; i < arrayLen; ++i) {
builder.append(buildDeepObj(i, numFields, nestingLimit));
}
BSONObj array = builder.done();
@ -327,6 +337,7 @@ void BM_validateWideObj(benchmark::State& state) {
for (auto _ : state) {
benchmark::ClobberMemory();
benchmark::DoNotOptimize(validateBSON(array.objdata(), array.objsize(), M));
totalSize += array.objsize();
}
@ -344,12 +355,24 @@ BENCHMARK(BM_bsonIteratorSortedConstruction)->Ranges({{{1}, {100'000}}});
// BSON validation benchmarks.
BENCHMARK(BM_validate)->Ranges({{{1}, {1'000}}});
BENCHMARK(BM_validate_contents)->Ranges({{{1}, {1'000}}});
BENCHMARK_TEMPLATE(BM_validateWideObj, BSONValidateModeEnum::kDefault)
->Ranges({{64, 512}, {50, 1'000}});
BENCHMARK_TEMPLATE(BM_validateWideObj, BSONValidateModeEnum::kExtended)
->Ranges({{64, 512}, {50, 1'000}});
BENCHMARK_TEMPLATE(BM_validateWideObj, BSONValidateModeEnum::kFull)
->Ranges({{64, 512}, {50, 1'000}});
BENCHMARK_TEMPLATE(BM_validateObj, BSONValidateModeEnum::kDefault)
->Ranges({{64, 512}, {50, 1'000}, {0, 0}})
->Unit(benchmark::kMicrosecond);
BENCHMARK_TEMPLATE(BM_validateObj, BSONValidateModeEnum::kExtended)
->Ranges({{64, 512}, {50, 1'000}, {0, 0}})
->Unit(benchmark::kMicrosecond);
BENCHMARK_TEMPLATE(BM_validateObj, BSONValidateModeEnum::kFull)
->Ranges({{64, 512}, {50, 1'000}, {0, 0}})
->Unit(benchmark::kMicrosecond);
BENCHMARK_TEMPLATE(BM_validateObj, BSONValidateModeEnum::kDefault)
->Ranges({{64, 512}, {10, 20}, {2, 5}})
->Unit(benchmark::kMicrosecond);
BENCHMARK_TEMPLATE(BM_validateObj, BSONValidateModeEnum::kExtended)
->Ranges({{64, 512}, {10, 20}, {2, 5}})
->Unit(benchmark::kMicrosecond);
BENCHMARK_TEMPLATE(BM_validateObj, BSONValidateModeEnum::kFull)
->Ranges({{64, 512}, {10, 20}, {2, 5}})
->Unit(benchmark::kMicrosecond);
void BM_objBuilderAppendInt(benchmark::State& state) {
int n = state.range(0);

View File

@ -107,6 +107,7 @@ MONGO_STATIC_ASSERT(sizeof(kTypeInfoTable) == 32);
constexpr ErrorCodes::Error InvalidBSON = ErrorCodes::InvalidBSON;
constexpr ErrorCodes::Error NonConformantBSON = ErrorCodes::NonConformantBSON;
constexpr ErrorCodes::Error InvalidBSONColumn = ErrorCodes::InvalidBSONColumn;
class DefaultValidator {
public:
@ -116,7 +117,7 @@ public:
void popLevel() {}
BSONValidateModeEnum validateMode() {
BSONValidateModeEnum validateMode() const {
return BSONValidateModeEnum::kDefault;
}
};
@ -124,141 +125,37 @@ public:
class ExtendedValidator {
public:
void checkNonConformantElem(const char* ptr, uint32_t offsetToValue, uint8_t type) {
// Checks the field name before the element, if inside array.
checkArrIndex(ptr);
// Increments the pointer to the actual element value.
BSONElementValue bsonElemVal(ptr + offsetToValue);
switch (type) {
case stdx::to_underlying(BSONType::undefined):
case stdx::to_underlying(BSONType::dbRef):
case stdx::to_underlying(BSONType::symbol):
case stdx::to_underlying(BSONType::codeWScope):
uasserted(NonConformantBSON, fmt::format("Use of deprecated BSON type {}", type));
break;
case stdx::to_underlying(BSONType::array):
addIndexLevel(true /* isArr */);
break;
case stdx::to_underlying(BSONType::object):
addIndexLevel(false /* isArr */);
break;
case stdx::to_underlying(BSONType::regEx): {
_checkRegexOptions(bsonElemVal);
break;
}
case stdx::to_underlying(BSONType::binData): {
auto binData = bsonElemVal.BinData();
auto subtype = binData.type;
switch (subtype) {
case BinDataType::BinDataGeneral:
case BinDataType::Function:
case BinDataType::Sensitive:
case BinDataType::bdtCustom:
case BinDataType::Vector:
break;
case BinDataType::Column:
break;
case BinDataType::ByteArrayDeprecated:
case BinDataType::bdtUUID:
uasserted(
NonConformantBSON,
fmt::format("Use of deprecated BSON binary data subtype {}", subtype));
break;
case BinDataType::newUUID: {
constexpr int32_t UUIDLength = 16;
auto l = binData.length;
uassert(ErrorCodes::NonConformantBSON,
fmt::format(
"BSON UUID length should be 16 bytes. Found {} instead.", l),
l == UUIDLength);
const auto binData = bsonElemVal.BinData();
switch (binData.type) {
case BinDataType::Column: {
// Check for exceptions when decompressing.
// Calling size() decompresses the entire column.
BSONColumn(BSONElement(ptr)).size();
break;
}
case BinDataType::MD5Type: {
constexpr int32_t md5Length = 16;
auto l = binData.length;
uassert(NonConformantBSON,
fmt::format("MD5 must be 16 bytes, got {} instead.", l),
l == md5Length);
break;
}
case BinDataType::Encrypt: {
case BinDataType::Encrypt:
_checkEncryptedBSONValue(binData);
break;
}
default:
uasserted(ErrorCodes::NonConformantBSON,
fmt::format("Unknown BSON Binary Data Type {}", subtype));
// No additional checks on other BinTypes
break;
}
break;
}
}
}
void checkDuplicateFieldName() {}
void popLevel() {}
void popLevel() {
if (!indexCount.empty()) {
indexCount.pop_back();
}
}
BSONValidateModeEnum validateMode() {
BSONValidateModeEnum validateMode() const {
return BSONValidateModeEnum::kExtended;
}
private:
struct Level {
DecimalCounter<uint32_t> counter; // Counter used to check whether indexes are sequential.
bool isArr; // Indicates whether level is an array or other (object).
};
void addIndexLevel(bool isArr) {
if (isArr) {
indexCount.push_back(Level{DecimalCounter<uint32_t>(0), true /* isArr */});
} else {
indexCount.push_back(Level{DecimalCounter<uint32_t>(0), false /* isArr */});
}
}
bool inArr() {
return !indexCount.empty() && indexCount.back().isArr;
}
void checkArrIndex(const char* ptr) {
if (!inArr()) {
return;
}
// Checks the actual index, skipping the type byte.
auto actualIndex = StringData(ptr + sizeof(char));
uassert(NonConformantBSON,
fmt::format("Indices of BSON Array are invalid. Expected {}, but got {}.",
(StringData)indexCount.back().counter,
actualIndex),
indexCount.back().counter == actualIndex);
++indexCount.back().counter;
}
void _checkRegexOptions(const BSONElementValue& regex) {
// Checks that the options are in ascending alphabetical order and that they're all valid.
const static std::string validRegexOptions("ilmsux");
std::string options = regex.RegexFlags();
for (size_t i = 0; i < options.size(); i++) {
char option = options.at(i);
uassert(
NonConformantBSON,
fmt::format("Valid regex options are [ i, l, m, s, u, x], but found '{}' instead.",
option),
validRegexOptions.find(option) != std::string::npos);
if (i > 0) {
char previousOption = options.at(i - 1);
uassert(NonConformantBSON,
fmt::format("Regex options should be in ascending alphabetical order. "
"Found {} instead.",
options),
option > previousOption);
}
}
}
void _checkEncryptedBSONValue(const BSONBinData& binData) {
constexpr uint32_t UUIDLength = 16;
constexpr int32_t minLength = sizeof(uint8_t) + UUIDLength + sizeof(uint8_t);
@ -310,102 +207,157 @@ private:
}
}
}
protected:
// Behaves like a stack, used to validate array index count.
std::vector<Level> indexCount;
};
class FullValidator : private ExtendedValidator {
public:
FullValidator() noexcept {
_objFrames.push_back({.type = BSONType::object, .indexCounter = 0});
}
void checkNonConformantElem(const char* ptr, uint32_t offsetToValue, uint8_t type) {
registerFieldName(ptr + 1 /* fieldName */, offsetToValue - 1 /* length */);
// Validate that array indices are monotonically increasing base-10 strings, and field
// names are UTF8 strings.
_checkFieldName(ptr);
ExtendedValidator::checkNonConformantElem(ptr, offsetToValue, type);
// Increments the pointer to the actual element value.
BSONElementValue bsonElemVal(ptr + offsetToValue);
const BSONElementValue bsonElemVal(ptr + offsetToValue);
switch (type) {
case stdx::to_underlying(BSONType::array): {
objFrames.push_back({std::vector<StringData>(), false});
case stdx::to_underlying(BSONType::array):
case stdx::to_underlying(BSONType::object):
_objFrames.push_back({.type = BSONType(type), .indexCounter = 0});
break;
}
case stdx::to_underlying(BSONType::object): {
objFrames.push_back({std::vector<StringData>(), true});
break;
};
case stdx::to_underlying(BSONType::binData): {
auto subtype = bsonElemVal.BinData().type;
switch (subtype) {
case BinDataType::Column: {
// Check for exceptions when decompressing.
// Calling size() decompresses the entire column.
try {
BSONColumn(BSONElement(ptr)).size();
} catch (DBException& e) {
uasserted(
NonConformantBSON,
str::stream()
<< "Exception occurred while decompressing a BSON column: "
<< e.toString());
}
const auto binData = bsonElemVal.BinData();
switch (binData.type) {
case BinDataType::newUUID: {
constexpr int32_t UUIDLength = 16;
auto l = binData.length;
uassert(ErrorCodes::NonConformantBSON,
fmt::format(
"BSON UUID length should be 16 bytes. Found {} instead.", l),
l == UUIDLength);
break;
}
case BinDataType::MD5Type: {
constexpr int32_t md5Length = 16;
auto l = binData.length;
uassert(NonConformantBSON,
fmt::format("MD5 must be 16 bytes, got {} instead.", l),
l == md5Length);
break;
}
case BinDataType::ByteArrayDeprecated:
case BinDataType::bdtUUID:
uasserted(NonConformantBSON,
fmt::format("Use of deprecated BSON binary data subtype {} ({})",
typeName(BinDataType(binData.type)),
binData.type));
break;
default:
break;
}
break;
}
case stdx::to_underlying(BSONType::string): {
case stdx::to_underlying(BSONType::string):
// Increment pointer to actual value and then four more to skip size.
checkUTF8Char(bsonElemVal.String());
}
_checkUTF8Char(bsonElemVal.String());
break;
case stdx::to_underlying(BSONType::regEx):
_checkRegexOptions(bsonElemVal);
break;
case stdx::to_underlying(BSONType::undefined):
case stdx::to_underlying(BSONType::dbRef):
case stdx::to_underlying(BSONType::symbol):
case stdx::to_underlying(BSONType::codeWScope):
uasserted(NonConformantBSON,
fmt::format("Use of deprecated BSON type {} ({})",
typeName(BSONType(type)),
type));
break;
}
}
void checkDuplicateFieldName() {
invariant(!objFrames.empty());
auto& curr = objFrames.back().first;
// If curr is not an object frame, it will always be empty, so no need to check.
if (curr.empty()) {
objFrames.pop_back();
return;
}
invariant(objFrames.back().second);
std::sort(curr.begin(), curr.end());
auto duplicate = std::adjacent_find(curr.begin(), curr.end());
uassert(NonConformantBSON,
fmt::format("A BSON document contains a duplicate field name : {}", *duplicate),
duplicate == curr.end());
objFrames.pop_back();
}
void popLevel() {
ExtendedValidator::popLevel();
checkDuplicateFieldName();
if (_objFrames.size() > 0) {
if (_inObj()) {
// This was benchmarked against the alternative of using set, unordered_set, and
// flat_hash_set, it was found to be significantly more performant. For the
// typical case, BSON documents are expected to be conformant and have unique
// keys, so the earlier detection permitted by using a set data structure is not
// expected to be beneficial.
auto& cur = _objFrames.back();
std::sort(cur.fieldNames.begin(), cur.fieldNames.end());
const auto dup = std::adjacent_find(cur.fieldNames.begin(), cur.fieldNames.end());
uassert(
NonConformantBSON,
fmt::format("Duplicate key found \"{}\", element names must be unique.", *dup),
dup == cur.fieldNames.end());
}
_objFrames.pop_back();
}
}
BSONValidateModeEnum validateMode() {
BSONValidateModeEnum validateMode() const {
return BSONValidateModeEnum::kFull;
}
private:
// A given frame is an object if and only if frame.second == true.
std::vector<std::pair<std::vector<StringData>, bool>> objFrames = {
{std::vector<StringData>(), true}};
struct ObjectFrame {
BSONType type;
DecimalCounter<uint32_t> indexCounter;
std::vector<StringData> fieldNames;
};
void registerFieldName(const char* ptr, uint32_t length) {
// Check the field name is UTF-8 encoded.
StringData fieldName(ptr, length);
checkUTF8Char(fieldName);
if (objFrames.back().second) {
objFrames.back().first.emplace_back(fieldName);
};
}
void checkUTF8Char(StringData str) {
void _checkUTF8Char(StringData str) {
uassert(NonConformantBSON,
"Found string that doesn't follow UTF-8 encoding.",
str::validUTF8(str));
}
bool _inArr() const {
return _objFrames.size() > 0 && _objFrames.back().type == BSONType::array;
}
bool _inObj() const {
return _objFrames.size() > 0 && _objFrames.back().type == BSONType::object;
}
void _checkFieldName(const char* ptr) {
if (_inArr()) {
// Checks the actual index field value, starting after the type byte
const StringData actualIndex(ptr + sizeof(char));
uassert(NonConformantBSON,
fmt::format("Indices of BSON Array are invalid. Expected {}, but got {}.",
StringData(_objFrames.back().indexCounter),
actualIndex),
_objFrames.back().indexCounter == actualIndex);
++_objFrames.back().indexCounter;
} else if (_inObj()) {
const StringData fieldName(ptr + sizeof(char));
_checkUTF8Char(fieldName);
_objFrames.back().fieldNames.push_back(fieldName);
} else {
MONGO_UNREACHABLE;
}
}
void _checkRegexOptions(const BSONElementValue& regex) {
// Checks that the options are in ascending alphabetical order and that they're all
// valid.
static constexpr StringData validRegexOptions("ilmsux");
const StringData opt = regex.RegexFlags();
uassert(NonConformantBSON,
fmt::format("Bad regex options {:?}: Only {:?} allowed", opt, validRegexOptions),
opt.find_first_not_of(validRegexOptions) == std::string::npos);
uassert(NonConformantBSON,
fmt::format("Bad regex options {:?}: Must be sorted", opt),
std::is_sorted(opt.begin(), opt.end()));
}
// Behaves like a stack, used to validate array index count.
std::vector<ObjectFrame> _objFrames;
};
template <bool precise>
@ -514,8 +466,8 @@ private:
typename std::conditional<precise, std::vector<Frame>, std::array<Frame, 32>>::type;
struct Cursor {
/* Also requires remaining buf after the skip (both BSONColumn and BSONObj guarantee this
by having at minimum a trailing EOO) */
/* Also requires remaining buf after the skip (both BSONColumn and BSONObj guarantee
this by having at minimum a trailing EOO) */
void skip(size_t len) {
uassert(InvalidBSON, "BSON size is larger than buffer size", (ptr += len) < end);
}
@ -632,7 +584,8 @@ private:
void _maybePopCodeWithScope(Cursor cursor) {
if constexpr (precise) {
// When ending the scope of a CodeWScope, pop the extra dummy frame and check its size.
// When ending the scope of a CodeWScope, pop the extra dummy frame and check its
// size.
if (_currFrame != _frames.begin() &&
(_currFrame - 1)->elem.type() == BSONType::codeWScope) {
invariant(_popFrame());
@ -671,8 +624,9 @@ private:
MONGO_COMPILER_NOINLINE void _validateIterative(Cursor cursor) {
do {
// Use the fact that the EOO byte is 0, just like the end of string, so checking for EOO
// is same as finding len == 0. The cursor cannot point past EOO, so the strlen is safe.
// Use the fact that the EOO byte is 0, just like the end of string, so checking for
// EOO is same as finding len == 0. The cursor cannot point past EOO, so the strlen
// is safe.
uassert(InvalidBSON, "BSON size is larger than buffer size", cursor.ptr < cursor.end);
while (size_t len = cursor.strlen()) {
uint8_t type = *cursor.ptr;
@ -687,7 +641,8 @@ private:
_validator.checkNonConformantElem(elemStart, len + 1, type);
if constexpr (precise) {
// See if the _id field was just validated. If so, set the global scope element.
// See if the _id field was just validated. If so, set the global scope
// element.
if (_currFrame == _frames.begin() && StringData(_currElem + 1) == "_id"_sd)
_currFrame->elem = BSONElement(_currElem); // This is fully validated now.
}
@ -699,8 +654,7 @@ private:
_maybePopCodeWithScope(cursor);
} while (_popFrame()); // Finished when there are no frames left.
// Check the top level field names.
_validator.checkDuplicateFieldName();
_validator.popLevel();
}
/**
@ -740,8 +694,8 @@ Status _doValidate(const char* originalBuffer,
BSONValidator validator,
ValidationVersion validationVersion) {
// First try validating using the fast but less precise version. That version will return
// a not-OK status for objects with CodeWScope or nesting exceeding 32 levels. These cases and
// actual failures will rerun the precise version that gives a detailed error context.
// a not-OK status for objects with CodeWScope or nesting exceeding 32 levels. These cases
// and actual failures will rerun the precise version that gives a detailed error context.
if (MONGO_likely((ValidateBuffer<false, BSONValidator>(
originalBuffer, maxLength, validator, validationVersion)
.validate()
@ -811,13 +765,15 @@ public:
ptr += size;
} else if (bsoncolumn::isInterleavedStartControlByte(control)) {
// interleaved objects begin with a reference object, and then a series
// of diff blocks for followup objects, ending with an EOO. Nesting interleaved
// mode is not allowed.
uassert(NonConformantBSON, "Nested interleaved mode", !interleavedMode);
ptr++;
uassert(NonConformantBSON,
"Invalid reference object for interleaved mode",
validateBSON(ptr, end - ptr, mode).isOK());
// of diff blocks for followup objects, ending with an EOO. Nesting
// interleaved mode is not allowed.
uassert(InvalidBSONColumn, "Nested interleaved mode", !interleavedMode);
++ptr;
const auto validateResult = validateBSON(ptr, end - ptr, mode);
uassert(InvalidBSONColumn,
fmt::format("Invalid reference object for interleaved mode, {}",
validateResult.reason()),
validateResult.isOK());
// we now know due to validateBSON that it is safe to interpret *ptr
BSONObj reference(ptr);
ptr += reference.objsize();
@ -826,7 +782,7 @@ public:
// Simple8b block sequence, just check for memory overflow of block count
uint8_t numBlocks = bsoncolumn::numSimple8bBlocksForControlByte(control);
int size = sizeof(uint64_t) * numBlocks;
uassert(NonConformantBSON,
uassert(InvalidBSONColumn,
"BSONColumn blocks exceed buffer size",
ptr + size + 1 <= end);
ptr += 1 + size;
@ -847,9 +803,10 @@ Status _doValidateColumn(const char* originalBuffer,
BSONValidateModeEnum mode,
ValidationVersion validationVersion) {
if constexpr (precise) {
// First try validating using the fast but less precise version. That version will return
// a not-OK status for objects with CodeWScope or nesting exceeding 32 levels. These cases
// and actual failures will rerun the precise version that gives a detailed error context.
// First try validating using the fast but less precise version. That version will
// return a not-OK status for objects with CodeWScope or nesting exceeding 32 levels.
// These cases and actual failures will rerun the precise version that gives a detailed
// error context.
if (MONGO_likely(ColumnValidator<false>::doValidateBSONColumn(
originalBuffer, maxLength, mode, validationVersion)
.isOK()))

View File

@ -245,121 +245,131 @@ TEST(BSONValidateExtended, MD5Size) {
auto improperSizeMD5 = "aaaaaaaaaaaaaaa";
BSONObj x2 = BSON("md5" << BSONBinData(improperSizeMD5, 15, MD5Type));
Status status = validateBSON(x2, mongo::BSONValidateModeEnum::kExtended);
ASSERT_EQ(status.code(), ErrorCodes::NonConformantBSON);
ASSERT_OK(status);
status = validateBSON(x2, mongo::BSONValidateModeEnum::kFull);
ASSERT_EQ(status.code(), ErrorCodes::NonConformantBSON);
}
TEST(BSONValidateExtended, BSONArrayIndexes) {
BSONObj arr = BSON("0" << "a"
<< "1"
<< "b");
BSONObj x1 = BSON("arr" << BSONArray(arr));
ASSERT_OK(validateBSON(x1.objdata(), x1.objsize(), mongo::BSONValidateModeEnum::kExtended));
ASSERT_OK(validateBSON(x1.objdata(), x1.objsize(), mongo::BSONValidateModeEnum::kFull));
// Valid arrays
for (const auto mode : {BSONValidateModeEnum::kDefault,
BSONValidateModeEnum::kExtended,
BSONValidateModeEnum::kFull}) {
{
const auto arr = BSON("0" << "a"
<< "1"
<< "b");
const auto x1 = BSON("arr" << BSONArray(arr));
ASSERT_OK(validateBSON(x1, mode));
}
{
const auto x1 = BSON("longArray" << BSONArray(BSON("0" << "a"
<< "1"
<< "b"
<< "2"
<< "c"
<< "3"
<< "d"
<< "4"
<< "e"
<< "5"
<< "f"
<< "6"
<< "g"
<< "7"
<< "h"
<< "8"
<< "i"
<< "9"
<< "j"
<< "10"
<< "k")));
ASSERT_OK(validateBSON(x1, mode));
}
{
const auto x1 =
BSON("validNestedArraysAndObjects"
<< BSON("arr" << BSONArray(BSON("0" << BSON("2" << 1 << "1" << 0 << "3"
<< BSONArray(BSON("0" << "a"
<< "1"
<< "b"))
<< "4"
<< "b")))));
ASSERT_OK(validateBSON(x1, mode));
}
}
arr = BSON("a" << 1 << "b" << 2);
x1 = BSON("nonNumericalArray" << BSONArray(arr));
Status status =
validateBSON(x1.objdata(), x1.objsize(), mongo::BSONValidateModeEnum::kExtended);
ASSERT_EQ(status, ErrorCodes::NonConformantBSON);
status = validateBSON(x1.objdata(), x1.objsize(), mongo::BSONValidateModeEnum::kFull);
ASSERT_EQ(status, ErrorCodes::NonConformantBSON);
arr = BSON("1" << "a"
<< "2"
<< "b");
x1 = BSON("nonSequentialArray" << BSONArray(arr));
status = validateBSON(x1.objdata(), x1.objsize(), mongo::BSONValidateModeEnum::kExtended);
ASSERT_EQ(status, ErrorCodes::NonConformantBSON);
status = validateBSON(x1.objdata(), x1.objsize(), mongo::BSONValidateModeEnum::kFull);
ASSERT_EQ(status, ErrorCodes::NonConformantBSON);
x1 = BSON("nestedArraysAndObjects" << BSONArray(BSON("0" << "a"
<< "1"
<< BSONArray(BSON("0" << "a"
<< "2"
<< "b"))
<< "2"
<< "b")));
status = validateBSON(x1.objdata(), x1.objsize(), mongo::BSONValidateModeEnum::kExtended);
ASSERT_EQ(status, ErrorCodes::NonConformantBSON);
status = validateBSON(x1.objdata(), x1.objsize(), mongo::BSONValidateModeEnum::kFull);
ASSERT_EQ(status, ErrorCodes::NonConformantBSON);
x1 = BSON("longArray" << BSONArray(BSON("0" << "a"
<< "1"
<< "b"
<< "2"
<< "c"
<< "3"
<< "d"
<< "4"
<< "e"
<< "5"
<< "f"
<< "6"
<< "g"
<< "7"
<< "h"
<< "8"
<< "i"
<< "9"
<< "j"
<< "10"
<< "k")));
ASSERT_OK(validateBSON(x1.objdata(), x1.objsize(), mongo::BSONValidateModeEnum::kExtended));
ASSERT_OK(validateBSON(x1.objdata(), x1.objsize(), mongo::BSONValidateModeEnum::kFull));
x1 = BSON("longNonSequentialArray" << BSONArray(BSON("0" << "a"
<< "1"
<< "b"
<< "2"
<< "c"
<< "3"
<< "d"
<< "4"
<< "e"
<< "5"
<< "f"
<< "6"
<< "g"
<< "7"
<< "h"
<< "8"
<< "i"
<< "9"
<< "j"
<< "11"
<< "k")));
status = validateBSON(x1.objdata(), x1.objsize(), mongo::BSONValidateModeEnum::kExtended);
ASSERT_EQ(status, ErrorCodes::NonConformantBSON);
status = validateBSON(x1.objdata(), x1.objsize(), mongo::BSONValidateModeEnum::kFull);
ASSERT_EQ(status, ErrorCodes::NonConformantBSON);
x1 = BSON("validNestedArraysAndObjects"
<< BSON("arr" << BSONArray(BSON("0" << BSON("2" << 1 << "1" << 0 << "3"
<< BSONArray(BSON("0" << "a"
<< "1"
<< "b"))
<< "4"
<< "b")))));
ASSERT_OK(validateBSON(x1.objdata(), x1.objsize(), mongo::BSONValidateModeEnum::kExtended));
ASSERT_OK(validateBSON(x1.objdata(), x1.objsize(), mongo::BSONValidateModeEnum::kFull));
x1 = BSON("invalidNestedArraysAndObjects"
<< BSON("arr" << BSONArray(BSON("0" << BSON("2" << 1 << "1" << 0 << "1"
<< BSONArray(BSON("0" << "a"
<< "2"
<< "b"))
<< "1"
<< "b")))));
status = validateBSON(x1.objdata(), x1.objsize(), mongo::BSONValidateModeEnum::kExtended);
ASSERT_EQ(status, ErrorCodes::NonConformantBSON);
status = validateBSON(x1.objdata(), x1.objsize(), mongo::BSONValidateModeEnum::kFull);
ASSERT_EQ(status, ErrorCodes::NonConformantBSON);
}
// Invalid Arrays, only Full Validation generates an error
for (const auto& [mode, result] :
{std::pair{BSONValidateModeEnum::kDefault, ErrorCodes::OK},
std::pair{BSONValidateModeEnum::kExtended, ErrorCodes::OK},
std::pair{BSONValidateModeEnum::kFull, ErrorCodes::NonConformantBSON}}) {
{
const auto arr = BSON("a" << 1 << "b" << 2);
const auto x1 = BSON("nonNumericalArray" << BSONArray(arr));
const auto status = validateBSON(x1, mode);
ASSERT_EQ(status, result);
}
{
const auto arr = BSON("1" << "a"
<< "2"
<< "b");
const auto x1 = BSON("nonSequentialArray" << BSONArray(arr));
const auto status = validateBSON(x1, mode);
ASSERT_EQ(status, result);
}
{
const auto x1 =
BSON("nestedArraysAndObjects" << BSONArray(BSON("0" << "a"
<< "1"
<< BSONArray(BSON("0" << "a"
<< "2"
<< "b"))
<< "2"
<< "b")));
const auto status = validateBSON(x1, mode);
ASSERT_EQ(status, result);
}
{
const auto x1 = BSON("longNonSequentialArray" << BSONArray(BSON("0" << "a"
<< "1"
<< "b"
<< "2"
<< "c"
<< "3"
<< "d"
<< "4"
<< "e"
<< "5"
<< "f"
<< "6"
<< "g"
<< "7"
<< "h"
<< "8"
<< "i"
<< "9"
<< "j"
<< "11"
<< "k")));
const auto status = validateBSON(x1, mode);
ASSERT_EQ(status, result);
}
{
const auto x1 =
BSON("invalidNestedArraysAndObjects"
<< BSON("arr" << BSONArray(BSON("0" << BSON("2" << 1 << "3" << 0 << "4"
<< BSONArray(BSON("0" << "a"
<< "2"
<< "b"))
<< "5"
<< "b")))));
const auto status = validateBSON(x1, mode);
ASSERT_EQ(status, result);
}
}
} // namespace
TEST(BSONValidateExtended, BSONUTF8) {
auto x1 = BSON("ValidString" << "\x00"
@ -485,10 +495,11 @@ TEST(BSONValidateFast, AllTypesSimple) {
<< "13code" << BSONCode("(function(){})();") // JavaScript code
<< "14symbol" << BSONSymbol("symbol") // Symbol. Deprecated
<< "15code_w_s"
<< BSONCodeWScope("(function(){})();", BSON("a" << 1)) // JavaScript code w/ scope
<< "16int" << 42 // 32-bit integer
<< "17timestamp" << Timestamp(1, 2) // Timestamp
<< "18long" << 0x0123456789abcdefll // 64-bit integer
<< BSONCodeWScope("(function(){})();",
BSON("a" << 1)) // JavaScript code w/ scope
<< "16int" << 42 // 32-bit integer
<< "17timestamp" << Timestamp(1, 2) // Timestamp
<< "18long" << 0x0123456789abcdefll // 64-bit integer
<< "19decimal" << Decimal128("0.30") // 128-bit decimal floating point
);
ASSERT_OK(validateBSON(x));
@ -539,9 +550,9 @@ TEST(BSONValidateFast, ErrorIsInId) {
const BSONObj x = ob.done();
const Status status = validateBSON(x);
ASSERT_NOT_OK(status);
ASSERT_EQUALS(
status.reason(),
"Not null terminated string in element with field name '_id' in object with unknown _id");
ASSERT_EQUALS(status.reason(),
"Not null terminated string in element with field name '_id' in object with "
"unknown _id");
}
TEST(BSONValidateFast, NonTopLevelId) {
@ -750,8 +761,8 @@ TEST(BSONValidateFast, ErrorTooShort) {
}
TEST(BSONValidateExtended, RegexOptions) {
// Checks that RegEx with invalid options strings (either an unknown flag or not in alphabetical
// order) throws a warning.
// Checks that RegEx with invalid options strings (either an unknown flag or not in
// alphabetical order) throws a warning.
std::pair<Status, Status> stats{Status::OK(), Status::OK()};
auto fullyValidate = [&](BSONObj obj) {
return std::pair{
@ -765,17 +776,17 @@ TEST(BSONValidateExtended, RegexOptions) {
obj = BSON("a" << BSONRegEx("a*.conn", "ilmxus"));
stats = fullyValidate(obj);
ASSERT_EQ(stats.first, ErrorCodes::NonConformantBSON);
ASSERT_OK(stats.first);
ASSERT_EQ(stats.second, ErrorCodes::NonConformantBSON);
obj = BSON("a" << BSONRegEx("a*.conn", "ikl"));
stats = fullyValidate(obj);
ASSERT_EQ(stats.first, ErrorCodes::NonConformantBSON);
ASSERT_OK(stats.first);
ASSERT_EQ(stats.second, ErrorCodes::NonConformantBSON);
obj = BSON("a" << BSONRegEx("a*.conn", "ilmz"));
stats = fullyValidate(obj);
ASSERT_EQ(stats.first, ErrorCodes::NonConformantBSON);
ASSERT_OK(stats.first);
ASSERT_EQ(stats.second, ErrorCodes::NonConformantBSON);
}
@ -788,11 +799,12 @@ TEST(BSONValidateExtended, UUIDLength) {
};
BSONObj x = BSON("u" << BSONBinData("de", 2, BinDataType::newUUID));
stats = fullyValidate(x);
ASSERT_EQ(stats.first.code(), ErrorCodes::NonConformantBSON);
ASSERT_OK(stats.first);
ASSERT_EQ(stats.second.code(), ErrorCodes::NonConformantBSON);
x = BSON("u" << BSONBinData("aaaaaaaaaaaaaaaaaaaaaa", 22, BinDataType::newUUID));
stats = fullyValidate(x);
ASSERT_EQ(stats.first.code(), ErrorCodes::NonConformantBSON);
ASSERT_OK(stats.first);
ASSERT_EQ(stats.second.code(), ErrorCodes::NonConformantBSON);
// Checks that a valid UUID does not throw any warnings.
@ -805,37 +817,37 @@ TEST(BSONValidateExtended, UUIDLength) {
TEST(BSONValidateExtended, DeprecatedTypes) {
BSONObj obj = BSON("a" << BSONUndefined);
Status status = validateBSON(obj, BSONValidateModeEnum::kExtended);
ASSERT_EQ(status.code(), ErrorCodes::NonConformantBSON);
ASSERT_OK(status);
status = validateBSON(obj, BSONValidateModeEnum::kFull);
ASSERT_EQ(status.code(), ErrorCodes::NonConformantBSON);
obj = BSON("b" << BSONDBRef("db", OID("dbdbdbdbdbdbdbdbdbdbdbdb")));
status = validateBSON(obj, BSONValidateModeEnum::kExtended);
ASSERT_EQ(status.code(), ErrorCodes::NonConformantBSON);
ASSERT_OK(status);
status = validateBSON(obj, BSONValidateModeEnum::kFull);
ASSERT_EQ(status.code(), ErrorCodes::NonConformantBSON);
obj = BSON("c" << BSONSymbol("symbol"));
status = validateBSON(obj, BSONValidateModeEnum::kExtended);
ASSERT_EQ(status.code(), ErrorCodes::NonConformantBSON);
ASSERT_OK(status);
status = validateBSON(obj, BSONValidateModeEnum::kFull);
ASSERT_EQ(status.code(), ErrorCodes::NonConformantBSON);
obj = BSON("d" << BSONCodeWScope("(function(){})();", BSON("a" << 1)));
status = validateBSON(obj, BSONValidateModeEnum::kExtended);
ASSERT_EQ(status.code(), ErrorCodes::NonConformantBSON);
ASSERT_OK(status);
status = validateBSON(obj, BSONValidateModeEnum::kFull);
ASSERT_EQ(status.code(), ErrorCodes::NonConformantBSON);
obj = BSON("e" << BSONBinData("", 0, ByteArrayDeprecated));
status = validateBSON(obj, BSONValidateModeEnum::kExtended);
ASSERT_EQ(status.code(), ErrorCodes::NonConformantBSON);
ASSERT_OK(status);
status = validateBSON(obj, BSONValidateModeEnum::kFull);
ASSERT_EQ(status.code(), ErrorCodes::NonConformantBSON);
obj = BSON("f" << BSONBinData("", 0, bdtUUID));
status = validateBSON(obj, BSONValidateModeEnum::kExtended);
ASSERT_EQ(status.code(), ErrorCodes::NonConformantBSON);
ASSERT_OK(status);
status = validateBSON(obj, BSONValidateModeEnum::kFull);
ASSERT_EQ(status.code(), ErrorCodes::NonConformantBSON);
}
@ -1011,10 +1023,8 @@ TEST(BSONValidateExtended, BSONEncryptedValue) {
TEST(BSONValidateExtended, UnknownBinDataType) {
BSONObj obj = BSON("unknownBinData" << BSONBinData("", 0, static_cast<BinDataType>(42)));
Status status = validateBSON(obj, BSONValidateModeEnum::kExtended);
ASSERT_EQ(status.code(), ErrorCodes::NonConformantBSON);
status = validateBSON(obj, BSONValidateModeEnum::kFull);
ASSERT_EQ(status.code(), ErrorCodes::NonConformantBSON);
ASSERT_OK(validateBSON(obj, BSONValidateModeEnum::kExtended));
ASSERT_OK(validateBSON(obj, BSONValidateModeEnum::kFull));
}
TEST(BSONValidateColumn, BSONColumnInBSON) {
@ -1032,9 +1042,13 @@ TEST(BSONValidateColumn, BSONColumnInBSON) {
status = validateBSON(obj, BSONValidateModeEnum::kFull);
ASSERT_OK(status);
// Change one important byte.
((char*)columnData.data)[0] = '0';
// Create a copy of the column buffer and change one important byte.
std::vector<char> nonConformantBuf(columnData.length);
memcpy(nonConformantBuf.data(), columnData.data, columnData.length);
nonConformantBuf[0] = '0';
columnData.data = nonConformantBuf.data();
obj = BSON("a" << columnData);
status = validateBSON(obj, BSONValidateModeEnum::kDefault);
ASSERT_EQ(status.code(), ErrorCodes::NonConformantBSON);
status = validateBSON(obj, BSONValidateModeEnum::kExtended);
@ -1052,8 +1066,11 @@ TEST(BSONValidateColumn, BSONColumnInBSONRespectsVersion) {
BSONBinData columnData = cb.finalize();
BSONObj obj = BSON("a" << columnData);
// Change one important byte.
((char*)columnData.data)[0] = '0';
// Create a copy of the column buffer and change one important byte.
std::vector<char> nonConformantBuf(columnData.length);
memcpy(nonConformantBuf.data(), columnData.data, columnData.length);
nonConformantBuf[0] = '0';
columnData.data = nonConformantBuf.data();
obj = BSON("a" << columnData);
// Default refuses bad column
@ -1076,9 +1093,9 @@ TEST(BSONValidateColumn, BSONColumnInBSONRespectsVersion) {
status = validateBSON(obj, BSONValidateModeEnum::kDefault, mongo::V1_Original);
ASSERT_OK(status);
status = validateBSON(obj, BSONValidateModeEnum::kExtended, mongo::V1_Original);
ASSERT_OK(status);
ASSERT_EQ(status.code(), ErrorCodes::InvalidBSONColumn);
status = validateBSON(obj, BSONValidateModeEnum::kFull, mongo::V1_Original);
ASSERT_EQ(status.code(), ErrorCodes::NonConformantBSON);
ASSERT_EQ(status.code(), ErrorCodes::InvalidBSONColumn);
}
TEST(BSONValidateColumn, BSONColumnMissingEOO) {
@ -1249,7 +1266,7 @@ TEST(BSONValidateColumn, BSONColumnInterleavedNestedInterleaved) {
buffer.appendChar(0);
buffer.appendChar(0);
ASSERT_EQ(validateBSONColumn(buffer.buf(), buffer.len()), ErrorCodes::NonConformantBSON);
ASSERT_EQ(validateBSONColumn(buffer.buf(), buffer.len()), ErrorCodes::InvalidBSONColumn);
}
TEST(BSONValidateColumn, BSONColumnNoOverflowBlocksShort) {
@ -1309,14 +1326,16 @@ TEST(BSONValidateColumn, BSONColumnWithCodeWScope) {
BSONObj obj = BSON("a" << BSONCodeWScope("code", BSON("c" << 1)));
BSONColumnBuilder cb;
cb.append(obj.getField("a"));
BSONBinData columnData = cb.finalize();
ASSERT_OK(validateBSONColumn((char*)columnData.data, columnData.length));
ASSERT_FALSE(validateBSONColumn(
(char*)columnData.data, columnData.length, BSONValidateModeEnum::kExtended)
.isOK());
ASSERT_FALSE(
validateBSONColumn((char*)columnData.data, columnData.length, BSONValidateModeEnum::kFull)
.isOK());
const BSONBinData columnData = cb.finalize();
ASSERT_OK(validateBSONColumn(static_cast<const char*>(columnData.data), columnData.length));
ASSERT_OK(validateBSONColumn(static_cast<const char*>(columnData.data),
columnData.length,
BSONValidateModeEnum::kExtended));
ASSERT_EQ(validateBSONColumn(static_cast<const char*>(columnData.data),
columnData.length,
BSONValidateModeEnum::kFull)
.code(),
ErrorCodes::NonConformantBSON);
}
TEST(BSONValidateColumn, BSONColumnWithArrayNestedCodeWScope) {
@ -1326,28 +1345,32 @@ TEST(BSONValidateColumn, BSONColumnWithArrayNestedCodeWScope) {
array.done();
BSONColumnBuilder cb;
cb.append(array.arr());
BSONBinData columnData = cb.finalize();
ASSERT_OK(validateBSONColumn((char*)columnData.data, columnData.length));
ASSERT_FALSE(validateBSONColumn(
(char*)columnData.data, columnData.length, BSONValidateModeEnum::kExtended)
.isOK());
ASSERT_FALSE(
validateBSONColumn((char*)columnData.data, columnData.length, BSONValidateModeEnum::kFull)
.isOK());
const BSONBinData columnData = cb.finalize();
ASSERT_OK(validateBSONColumn(static_cast<const char*>(columnData.data), columnData.length));
ASSERT_OK(validateBSONColumn(static_cast<const char*>(columnData.data),
columnData.length,
BSONValidateModeEnum::kExtended));
ASSERT_EQ(validateBSONColumn(static_cast<const char*>(columnData.data),
columnData.length,
BSONValidateModeEnum::kFull)
.code(),
ErrorCodes::InvalidBSONColumn);
}
TEST(BSONValidateColumn, BSONColumnWithObjectNestedCodeWScope) {
BSONObj obj = BSON("a" << BSONCodeWScope("code", BSON("c" << 1)));
BSONColumnBuilder cb;
cb.append(BSON("a" << obj));
BSONBinData columnData = cb.finalize();
ASSERT_OK(validateBSONColumn((char*)columnData.data, columnData.length));
ASSERT_FALSE(validateBSONColumn(
(char*)columnData.data, columnData.length, BSONValidateModeEnum::kExtended)
.isOK());
ASSERT_FALSE(
validateBSONColumn((char*)columnData.data, columnData.length, BSONValidateModeEnum::kFull)
.isOK());
const BSONBinData columnData = cb.finalize();
ASSERT_OK(validateBSONColumn(static_cast<const char*>(columnData.data), columnData.length));
ASSERT_OK(validateBSONColumn(static_cast<const char*>(columnData.data),
columnData.length,
BSONValidateModeEnum::kExtended));
ASSERT_EQ(validateBSONColumn(static_cast<const char*>(columnData.data),
columnData.length,
BSONValidateModeEnum::kFull)
.code(),
ErrorCodes::InvalidBSONColumn);
}
} // namespace

View File

@ -163,7 +163,7 @@ TEST_F(DbCheckTest, DbCheckDocumentWithInvalidUuid) {
DbCheckValidationModeEnum::dataConsistencyAndMissingIndexKeysCheck,
"" /* secondaryIndex */,
false /* skipLookupForExtraKeys */,
BSONValidateModeEnum::kExtended);
BSONValidateModeEnum::kFull);
ASSERT_OK(runHashForCollectionCheck(opCtx, docMinKey, docMaxKey, params));
// Shut down the health log writer so that the writes get flushed to the health log collection.