From 4edd43754038019de8c0d291443994c5c1653998 Mon Sep 17 00:00:00 2001 From: henrikedin Date: Wed, 6 May 2026 00:23:00 -0400 Subject: [PATCH] SERVER-126021 Fix handling of duplicate fields in flat_bson (#53259) Co-authored-by: Erin McNulty Co-authored-by: Jan GitOrigin-RevId: 20315d70b93ab896168da81e4ed502d859ad0511 --- .github/CODEOWNERS | 11 -- buildscripts/errorcodes.py | 2 +- copy.bara.sky | 33 ----- copy.bara.staging.sky | 31 ---- docs/exception_architecture.md | 181 +++++++++++++----------- src/mongo/db/auth/README.md | 18 +-- src/mongo/db/timeseries/flat_bson.cpp | 13 +- src/mongo/db/timeseries/minmax_test.cpp | 31 ++++ 8 files changed, 151 insertions(+), 169 deletions(-) delete mode 100644 .github/CODEOWNERS delete mode 100644 copy.bara.sky delete mode 100644 copy.bara.staging.sky diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS deleted file mode 100644 index eb44d2a81be..00000000000 --- a/.github/CODEOWNERS +++ /dev/null @@ -1,11 +0,0 @@ -# All backports must be approved by the server release team -* @10gen/server-release - -# Exclude some test files and READMEs from the backport approvals -/etc/backports_required_for_multiversion_tests.yml -/etc/*.suppressions -/README.md -**/README.md - -# Exclude all the tests under "jstests" directories from backport approvals -**/jstests/**/* diff --git a/buildscripts/errorcodes.py b/buildscripts/errorcodes.py index e75c36f36e1..b8be15eefe5 100755 --- a/buildscripts/errorcodes.py +++ b/buildscripts/errorcodes.py @@ -24,7 +24,7 @@ except ImportError: import re # type: ignore ASSERT_NAMES = ["uassert", "massert", "fassert", "fassertFailed"] -MAXIMUM_CODE = 9999999 # JIRA Ticket + XX +MAXIMUM_CODE = 99999999 # JIRA Ticket + XX # pylint: disable=invalid-name codes = [] # type: ignore diff --git a/copy.bara.sky b/copy.bara.sky deleted file mode 100644 index 66c97dcf486..00000000000 --- a/copy.bara.sky +++ /dev/null @@ -1,33 +0,0 @@ -# This configuration is for migrating code from one Git repository to another using Copybara. -# It selectively copies content, excluding specific paths and preserving authorship. - -# To test locally -# sourceUrl = "/path/to/source" -# destinationUrl = "/path/to/dest" - -sourceUrl = "https://github.com/10gen/mongo.git" -destinationUrl = "https://github.com/mongodb/mongo.git" - -core.workflow( - name = "default", - origin = git.origin( - url = sourceUrl, - ref = "v5.0", - ), - destination = git.destination( - url = destinationUrl, - fetch = "v5.0", - push = "v5.0", - ), - # Change path to the folder you want to publish publicly - origin_files = glob(["**"], exclude = ["src/mongo/db/modules/**"]), - authoring = authoring.pass_thru("MongoDB "), - mode = "ITERATIVE", - transformations = [ - # (^.*?) - matches the first line (without the newline char) - # \n - matches the first newline (or nothing at all if there is no newline). If there is no match then nothing happens - # ((\n|.)*) - matches everything after - # Overall, this copies only the first line of the commit rather than the body - metadata.scrubber("(^.*?)\n((\n|.)*)", replacement = "$1"), - ], -) diff --git a/copy.bara.staging.sky b/copy.bara.staging.sky deleted file mode 100644 index 1ea02ac2096..00000000000 --- a/copy.bara.staging.sky +++ /dev/null @@ -1,31 +0,0 @@ -# This configuration is for migrating code from one Git repository to another using Copybara. -# It selectively copies content, excluding specific paths and preserving authorship. -sourceUrl = "https://github.com/10gen/mongo.git" -destinationUrl = "https://github.com/10gen/mongo-copybara.git" - -core.workflow( - name = "default", - origin = git.origin( - url = sourceUrl, - ref = "v5.0", - ), - destination = git.destination( - url = destinationUrl, - fetch = "v5.0", - push = "v5.0", - ), - # Change path to the folder you want to publish publicly - origin_files = glob(["**"], exclude=["src/mongo/db/modules/**"]), - - authoring = authoring.pass_thru("MongoDB "), - - mode = "ITERATIVE", - # Change the path here to the folder you want to publish publicly - transformations = [ - # (^.*?) - matches the first line (without the newline char) - # \n - matches the first newline (or nothing at all if there is no newline). If there is no match then nothing happens - # ((\n|.)*) - matches everything after - # Overall, this copies only the first line of the commit rather than the body - metadata.scrubber("(^.*?)\n((\n|.)*)", replacement = "$1"), - ], -) diff --git a/docs/exception_architecture.md b/docs/exception_architecture.md index d5324a6cef3..5fc9e68ae54 100644 --- a/docs/exception_architecture.md +++ b/docs/exception_architecture.md @@ -2,129 +2,146 @@ MongoDB code uses the following types of assertions that are available for use: - `uassert` and `iassert` - - Checks for per-operation user errors. Operation-fatal. + - Checks for per-operation user errors. Operation-fatal. - `tassert` - - Like uassert, but inhibits clean shutdown. + - Like uassert, but inhibits clean shutdown. - `massert` - - Checks per-operation invariants. Operation-fatal. + - Checks per-operation invariants. Operation-fatal. - `fassert` - - Checks fatal process invariants. Process-fatal. Use to detect unexpected situations (such - as a system function returning an unexpected error status). + - Checks fatal process invariants. Process-fatal. Use to detect unexpected + situations (such as a system function returning an unexpected error + status). - `invariant` - - Checks process invariant. Process-fatal. Use to detect code logic errors ("pointer should - never be null", "we should always be locked"). + - Checks process invariant. Process-fatal. Use to detect code logic errors + ("pointer should never be null", "we should always be locked"). -__Note__: Calling C function `assert` is not allowed. Use one of the above instead. +__Note__: Calling C function `assert` is not allowed. Use one of the above +instead. The following types of assertions are deprecated: - `verify` - - Checks per-operation invariants. A synonym for massert but doesn't require an error code. - Process fatal in debug mode. Do not use for new code; use invariant or fassert instead. + - Checks per-operation invariants. A synonym for massert but doesn't require + an error code. Process fatal in debug mode. Do not use for new code; use + invariant or fassert instead. - `dassert` - - Calls `invariant` but only in debug mode. Do not use! + - Calls `invariant` but only in debug mode. Do not use! -MongoDB uses a series of `ErrorCodes` (defined in [mongo/base/error_codes.yml][error_codes_yml]) to -identify and categorize error conditions. `ErrorCodes` are defined in a YAML file and converted to -C++ files using [MongoDB's IDL parser][idlc_py] at compile time. We also use error codes to create -`Status` objects, which convey the success or failure of function invocations across the code base. -`Status` objects are also used internally by `DBException`, MongoDB's primary exception class, and -its children (e.g., `AssertionException`) as a means of maintaining metadata for exceptions. The +MongoDB uses a series of `ErrorCodes` (defined in +[mongo/base/error_codes.yml][error_codes_yml]) to identify and categorize error +conditions. `ErrorCodes` are defined in a YAML file and converted to C++ files +using [MongoDB's IDL parser][idlc_py] at compile time. We also use error codes +to create `Status` objects, which convey the success or failure of function +invocations across the code base. `Status` objects are also used internally by +`DBException`, MongoDB's primary exception class, and its children (e.g., +`AssertionException`) as a means of maintaining metadata for exceptions. The proper usage of these constructs is described below. ## Considerations -When per-operation invariant checks fail, the current operation fails, but the process and -connection persist. This means that `massert`, `uassert`, `iassert` and `verify` only -terminate the current operation, not the whole process. Be careful not to corrupt process state by -mistakenly using these assertions midway through mutating process state. Examples of this include -`uassert`, `iassert` and `massert` inside of constructors and destructors. +When per-operation invariant checks fail, the current operation fails, but the +process and connection persist. This means that `massert`, `uassert`, `iassert` +and `verify` only terminate the current operation, not the whole process. Be +careful not to corrupt process state by mistakenly using these assertions midway +through mutating process state. Examples of this include `uassert`, `iassert` +and `massert` inside of constructors and destructors. -`fassert` failures will terminate the entire process; this is used for low-level checks where -continuing might lead to corrupt data or loss of data on disk. +`fassert` failures will terminate the entire process; this is used for low-level +checks where continuing might lead to corrupt data or loss of data on disk. -`tassert` is a hybrid - it will fail the operation like `uassert`, but also triggers a -"deferred-fatality tripwire flag". If this flag is set during clean shutdown, the process will -invoke the tripwire fatal assertion. This is useful for ensuring that operation failures will cause -a test suite to fail, without resorting to different behavior during testing, and without allowing -user operations to potentially disrupt production deployments by terminating the server. +`tassert` is a hybrid - it will fail the operation like `uassert`, but also +triggers a "deferred-fatality tripwire flag". If this flag is set during clean +shutdown, the process will invoke the tripwire fatal assertion. This is useful +for ensuring that operation failures will cause a test suite to fail, without +resorting to different behavior during testing, and without allowing user +operations to potentially disrupt production deployments by terminating the +server. -Both `massert` and `uassert` take error codes, so that all assertions have codes associated with -them. Currently, programmers are free to provide the error code by either using a unique location -number or choose from existing `ErrorCodes`. Unique location numbers are assigned incrementally and -have no meaning other than a way to associate a log message with a line of code. - -`iassert` provides similar functionality to `uassert`, but it logs at a higher level and -does not increment user assertion counters. We should always choose `iassert` over `uassert` -when we expect a failure, a failure might be recoverable, or failure accounting is not interesting. +Both `massert` and `uassert` take error codes, so that all assertions have codes +associated with them. Currently, programmers are free to provide the error code +by either using a unique location number or choose from existing `ErrorCodes`. +Unique location numbers are assigned incrementally and have no meaning other +than a way to associate a log message with a line of code. +`iassert` provides similar functionality to `uassert`, but it logs at a higher +level and does not increment user assertion counters. We should always choose +`iassert` over `uassert` when we expect a failure, a failure might be +recoverable, or failure accounting is not interesting. ## Exception -A failed operation-fatal assertion throws an `AssertionException` or a child of that. -The inheritance hierarchy resembles: +A failed operation-fatal assertion throws an `AssertionException` or a child of +that. The inheritance hierarchy resembles: - `std::exception` - - `mongo::DBException` - - `mongo::AssertionException` - - `mongo::UserException` - - `mongo::MsgAssertionException` + - `mongo::DBException` + - `mongo::AssertionException` + - `mongo::UserException` + - `mongo::MsgAssertionException` See util/assert_util.h. -Generally, code in the server should be able to tolerate (e.g., catch) a `DBException`. Server -functions must be structured with exception safety in mind, such that `DBException` can propagate -upwards harmlessly. The code should also expect, and properly handle, `UserException`. We use +Generally, code in the server should be able to tolerate (e.g., catch) a +`DBException`. Server functions must be structured with exception safety in +mind, such that `DBException` can propagate upwards harmlessly. The code should +also expect, and properly handle, `UserException`. We use [Resource Acquisition Is Initialization][raii] heavily. ## ErrorCodes and Status -MongoDB uses `ErrorCodes` both internally and externally: a subset of error codes (e.g., -`BadValue`) are used externally to pass errors over the wire and to clients. These error codes are -the means for MongoDB processes (e.g., *mongod* and *mongo*) to communicate errors, and are visible -to client applications. Other error codes are used internally to indicate the underlying reason for -a failed operation. For instance, `PeriodicJobIsStopped` is an internal error code that is passed -to callback functions running inside a [`PeriodicRunner`][periodic_runner_h] once the runner is -stopped. The internal error codes are for internal use only and must never be returned to clients +MongoDB uses `ErrorCodes` both internally and externally: a subset of error +codes (e.g., `BadValue`) are used externally to pass errors over the wire and to +clients. These error codes are the means for MongoDB processes (e.g., *mongod* +and *mongo*) to communicate errors, and are visible to client applications. +Other error codes are used internally to indicate the underlying reason for a +failed operation. For instance, `PeriodicJobIsStopped` is an internal error code +that is passed to callback functions running inside a +[`PeriodicRunner`][periodic_runner_h] once the runner is stopped. The internal +error codes are for internal use only and must never be returned to clients (i.e., in a network response). -Zero or more error categories can be assigned to `ErrorCodes`, which allows a single handler to -serve a group of `ErrorCodes`. `RetriableError`, for instance, is an `ErrorCategory` that includes -all retriable `ErrorCodes` (e.g., `HostUnreachable` and `HostNotFound`). This implies that an -operation that fails with any error code in this category can be safely retried. We can use -`ErrorCodes::isA<${category}>(${error})` to check if `error` belongs to `category`. Alternatively, -we can use `ErrorCodes::is${category}(${error})` to check error categories. Both methods provide -similar functionality. +Zero or more error categories can be assigned to `ErrorCodes`, which allows a +single handler to serve a group of `ErrorCodes`. `RetriableError`, for instance, +is an `ErrorCategory` that includes all retriable `ErrorCodes` (e.g., +`HostUnreachable` and `HostNotFound`). This implies that an operation that fails +with any error code in this category can be safely retried. We can use +`ErrorCodes::isA<${category}>(${error})` to check if `error` belongs to +`category`. Alternatively, we can use `ErrorCodes::is${category}(${error})` to +check error categories. Both methods provide similar functionality. -To represent the status of an executed operation (e.g., a command or a function invocation), we -use `Status` objects, which represent an error state or the absence thereof. A `Status` uses the -standardized `ErrorCodes` to determine the underlying cause of an error. It also allows assigning -a textual description, as well as code-specific extra info, to the error code for further -clarification. The extra info is a subclass of `ErrorExtraInfo` and specific to `ErrorCodes`. Look -for `extra` in [here][error_codes_yml] for reference. +To represent the status of an executed operation (e.g., a command or a function +invocation), we use `Status` objects, which represent an error state or the +absence thereof. A `Status` uses the standardized `ErrorCodes` to determine the +underlying cause of an error. It also allows assigning a textual description, as +well as code-specific extra info, to the error code for further clarification. +The extra info is a subclass of `ErrorExtraInfo` and specific to `ErrorCodes`. +Look for `extra` in [here][error_codes_yml] for reference. -MongoDB provides `StatusWith` to enable functions to return an error code or a value without -requiring them to have multiple outputs. This makes exception-free code cleaner by avoiding -functions with multiple out parameters. We can either pass an error code or an actual value to a -`StatusWith` object, indicating failure or success of the operation. For examples of the proper -usage of `StatusWith`, see [mongo/base/status_with.h][status_with_h] and -[mongo/base/status_with_test.cpp][status_with_test_cpp]. It is highly recommended to use `uassert` -or `iassert` over `StatusWith`, and catch exceptions instead of checking `Status` objects -returned from functions. Using `StatusWith` to indicate exceptions, instead of throwing via -`uassert` and `iassert`, makes it very difficult to identify that an error has occurred, and +MongoDB provides `StatusWith` to enable functions to return an error code or a +value without requiring them to have multiple outputs. This makes exception-free +code cleaner by avoiding functions with multiple out parameters. We can either +pass an error code or an actual value to a `StatusWith` object, indicating +failure or success of the operation. For examples of the proper usage of +`StatusWith`, see [mongo/base/status_with.h][status_with_h] and +[mongo/base/status_with_test.cpp][status_with_test_cpp]. It is highly +recommended to use `uassert` or `iassert` over `StatusWith`, and catch +exceptions instead of checking `Status` objects returned from functions. Using +`StatusWith` to indicate exceptions, instead of throwing via `uassert` and +`iassert`, makes it very difficult to identify that an error has occurred, and could lead to the wrong error being propagated. ## Gotchas Gotchas to watch out for: -- Generally, do not throw an `AssertionException` directly. Functions like `uasserted()` do work - beyond just that. In particular, it makes sure that the `getLastError` structures are set up - properly. -- Think about the location of your asserts in constructors, as the destructor would not be - called. But at a minimum, use `wassert` a lot therein, we want to know if something is wrong. -- Do __not__ throw in destructors or allow exceptions to leak out (if you call a function that - may throw). +- Generally, do not throw an `AssertionException` directly. Functions like + `uasserted()` do work beyond just that. In particular, it makes sure that + the `getLastError` structures are set up properly. +- Think about the location of your asserts in constructors, as the destructor + would not be called. But at a minimum, use `wassert` a lot therein, we want + to know if something is wrong. +- Do __not__ throw in destructors or allow exceptions to leak out (if you call + a function that may throw). [raii]: https://en.wikipedia.org/wiki/Resource_acquisition_is_initialization diff --git a/src/mongo/db/auth/README.md b/src/mongo/db/auth/README.md index 87354dded20..1f2a39b54ef 100644 --- a/src/mongo/db/auth/README.md +++ b/src/mongo/db/auth/README.md @@ -180,7 +180,7 @@ the storage mechanism is listed as a sub-bullet below. authentication. GSSAPI is the communication method used to communicate with Kerberos servers and with clients. When initializing this auth mechanism, the server tries to acquire its credential information from the KDC by calling - [`tryAcquireServerCredential`](https://github.com/10gen/mongo-enterprise-modules/blob/r4.4.0/src/sasl/mongo_gssapi.h#L36). + [`tryAcquireServerCredential`](https://github.com/mongodb/mongo-enterprise-modules/blob/r4.4.0/src/sasl/mongo_gssapi.h#L36). If this is not approved, the server fasserts and the mechanism is not registered. On Windows, SChannel provides a `GSSAPI` library for the server to use. On other platforms, the Cyrus SASL library is used to make calls to the KDC (Kerberos key distribution center). @@ -607,30 +607,30 @@ The user must supply roles when running the `createUser` command. Roles are stor LDAP authorization is an external method of getting roles. When a user authenticates using LDAP, there are roles stored in the User document specified by the LDAP system. The LDAP system relies on the -[`AuthzManagerExternalStateLDAP`](https://github.com/10gen/mongo-enterprise-modules/blob/r4.4.0/src/ldap/authz_manager_external_state_ldap.h) +[`AuthzManagerExternalStateLDAP`](https://github.com/mongodb/mongo-enterprise-modules/blob/r4.4.0/src/ldap/authz_manager_external_state_ldap.h) to make external requests to the LDAP server. The `AuthzManagerExternalStateLDAP` wraps the `AuthzManagerExternalStateLocal` for the current process, initially attempting to route all Authorization requests to LDAP and falling back on Local Authorization. LDAP queries are generated from -[`UserRequest`](https://github.com/10gen/mongo-enterprise-modules/blob/r4.4.0/src/ldap/authz_manager_external_state_ldap.cpp#L75-L113) +[`UserRequest`](https://github.com/mongodb/mongo-enterprise-modules/blob/r4.4.0/src/ldap/authz_manager_external_state_ldap.cpp#L75-L113) objects, passing just the username into the query. If a user has specified the `userToDNMapping` server parameter, the `AuthorizationManager` calls the LDAPManager to transform the usernames into names that the LDAP server can understand. The LDAP subsystem relies on a complicated string escaping sequence, which is handled by the LDAPQuery class. After LDAP has returned the `User` document, it resolves role names into privileges by dispatching a call to -[`Local::getUserObject`](https://github.com/10gen/mongo-enterprise-modules/blob/r4.7.0/src/ldap/authz_manager_external_state_ldap.cpp#L110-L123) +[`Local::getUserObject`](https://github.com/mongodb/mongo-enterprise-modules/blob/r4.7.0/src/ldap/authz_manager_external_state_ldap.cpp#L110-L123) with a `UserRequest` struct containing a set of roles to be resolved. Connections to LDAP servers are made by the `LDAPManager` through the -[`LDAPRunner`](https://github.com/10gen/mongo-enterprise-modules/blob/r4.4.0/src/ldap/ldap_runner.h) +[`LDAPRunner`](https://github.com/mongodb/mongo-enterprise-modules/blob/r4.4.0/src/ldap/ldap_runner.h) by calling `bindAsUser()`. `BindAsUser()` attempts to set up a connection to the LDAP server using connection parameters specified through the command line when starting the process.The -[`LDAPConnectionFactory`](https://github.com/10gen/mongo-enterprise-modules/blob/r4.4.0/src/ldap/connections/ldap_connection_factory.h) +[`LDAPConnectionFactory`](https://github.com/mongodb/mongo-enterprise-modules/blob/r4.4.0/src/ldap/connections/ldap_connection_factory.h) is the class that is actually tasked with establishing a connection and sending raw bytes over the wire to the LDAP server, all other classes decompose the information to send and use the factory to actually send the information. The `LDAPConnectionFactory` has its own thread pool and executor to drive throughput for authorization. LDAP has an -[`LDAPUserCacheInvalidator`](https://github.com/10gen/mongo-enterprise-modules/blob/r4.4.0/src/ldap/ldap_user_cache_invalidator_job.h) +[`LDAPUserCacheInvalidator`](https://github.com/mongodb/mongo-enterprise-modules/blob/r4.4.0/src/ldap/ldap_user_cache_invalidator_job.h) that periodically sweeps the `AuthorizationManager` and deletes user entries that have `$external` as their authentication database. @@ -638,7 +638,7 @@ There are a few thread safety concerns when making connections to the LDAP serve LibLDAP to make connections to the LDAP server. LibLDAP comes without any thread safety guarantees, so all the calls to libLDAP are wrapped with mutexes to ensure thread safety when connecting to LDAP servers on certain distros. The logic to see whether libLDAP is thread-safe lives -[here](https://github.com/10gen/mongo-enterprise-modules/blob/r4.4.0/src/ldap/connections/openldap_connection.cpp#L348-L378). +[here](https://github.com/mongodb/mongo-enterprise-modules/blob/r4.4.0/src/ldap/connections/openldap_connection.cpp#L348-L378). #### X.509 Authorization @@ -717,7 +717,7 @@ Refer to the following links for definitions of the Classes referenced in this d | `AuthorizationManager` | [mongo/db/auth/authorization\_manager.h](https://github.com/mongodb/mongo/blob/r4.4.0/src/mongo/db/auth/authorization_manager.h) | Interface to external state providers | | `AuthorizationSession` | [mongo/db/auth/authorization\_session.h](https://github.com/mongodb/mongo/blob/r4.4.0/src/mongo/db/auth/authorization_session.h) | Representation of currently authenticated and authorized users on the `Client` connection | | `AuthzManagerExternalStateLocal` | [.../authz\_manager\_external\_state\_local.h](https://github.com/mongodb/mongo/blob/r4.4.0/src/mongo/db/auth/authz_manager_external_state_local.h) | `Local` implementation of user/role provider | -| `AuthzManagerExternalStateLDAP` | [.../authz\_manager\_external\_state\_ldap.h](https://github.com/10gen/mongo-enterprise-modules/blob/r4.4.0/src/ldap/authz_manager_external_state_ldap.h) | `LDAP` implementation of users/role provider | +| `AuthzManagerExternalStateLDAP` | [.../authz\_manager\_external\_state\_ldap.h](https://github.com/mongodb/mongo-enterprise-modules/blob/r4.4.0/src/ldap/authz_manager_external_state_ldap.h) | `LDAP` implementation of users/role provider | | `Client` | [mongo/db/client.h](https://github.com/mongodb/mongo/blob/r4.4.0/src/mongo/db/client.h) | An active client session, typically representing a remote driver or shell | | `Privilege` | [mongo/db/auth/privilege.h](https://github.com/mongodb/mongo/blob/r4.4.0/src/mongo/db/auth/privilege.h) | A set of `ActionType`s permitted on a particular `resource' | | `ResourcePattern` | [mongo/db/auth/resource\_pattern.h](https://github.com/mongodb/mongo/blob/r4.4.0/src/mongo/db/auth/resource_pattern.h) | A reference to a namespace, db, collection, or cluster to apply a set of `ActionType` privileges to | diff --git a/src/mongo/db/timeseries/flat_bson.cpp b/src/mongo/db/timeseries/flat_bson.cpp index 6a36b679af8..fde07ee2776 100644 --- a/src/mongo/db/timeseries/flat_bson.cpp +++ b/src/mongo/db/timeseries/flat_bson.cpp @@ -250,7 +250,11 @@ typename FlatBSONStore::Iterator FlatBSONStore:: auto it = begin(); auto itEnd = end(); for (; it != itEnd; ++it) { - (*_pos->_fieldNameToIndex)[it->fieldName().toString()] = it._pos->_offsetParent; + uassert( + 12602100, + "Duplicate field names cannot be present in the same FlatBSON object", + _pos->_fieldNameToIndex->try_emplace(it->fieldName().toString(), it._pos->_offsetParent) + .second); } // Retry the search now when the map is created. @@ -280,7 +284,12 @@ FlatBSONStore::Obj::insert(FlatBSONStore::Iterat // Also store our offset in the fast lookup map if it is available. if (_pos->_fieldNameToIndex) { - _pos->_fieldNameToIndex->emplace(inserted->_element.fieldName(), inserted->_offsetParent); + uassert( + 12602101, + "Duplicate field names cannot be present in the same FlatBSON object", + _pos->_fieldNameToIndex + ->try_emplace(inserted->_element.fieldName().toString(), inserted->_offsetParent) + .second); } // We need to traverse the hiearchy up to the root and modify stored offsets to account for diff --git a/src/mongo/db/timeseries/minmax_test.cpp b/src/mongo/db/timeseries/minmax_test.cpp index 673bed1014f..c95fb37801e 100644 --- a/src/mongo/db/timeseries/minmax_test.cpp +++ b/src/mongo/db/timeseries/minmax_test.cpp @@ -261,6 +261,37 @@ TEST(MinMax, SearchLookupMap) { ASSERT_EQ(obj.search(obj.begin(), "50")->fieldName(), "50"); } +TEST(MinMax, DuplicateFieldNamesWithLookupMap) { + MinMaxStore minmax; + auto obj = minmax.root(); + + // Insert 12 (kMaxLinearSearchLength) distinct fields ("0".."11") followed by two duplicate "a" + // entries. This will trigger the lookup map internally in flat_bson. + for (int i = 0; i < 12; ++i) { + obj.insert(obj.end(), std::to_string(i)); + } + obj.insert(obj.end(), "a"); + obj.insert(obj.end(), "a"); + + // Try to search for "a", this will trigger the lookup map internally in flat_bson as we fail to + // find it within 'kMaxLinearSearchLength' attempts. The map cannot contain duplicates so this + // search is well defined and throws. + ASSERT_THROWS(obj.search(obj.begin(), "a"), AssertionException); + + // Try to insert another duplicate which will throw earlier as the lookup map exists and needs + // to be maintained. + obj.insert(obj.begin(), "x"); + ASSERT_THROWS(obj.insert(obj.begin(), "x"), AssertionException); + + // Searching for "a" or "x" is possible as we inserted one of them into the map. + auto found = obj.search(obj.begin(), "a"); + ASSERT(found != obj.end()); + ASSERT_EQ(found->fieldName(), "a"); + + found = obj.search(obj.begin(), "x"); + ASSERT(found != obj.end()); + ASSERT_EQ(found->fieldName(), "x"); +} } // namespace } // namespace mongo::timeseries