From d19fd91974abee8b225ffc0fe5d202baaf15d6c6 Mon Sep 17 00:00:00 2001 From: Allison Easton Date: Wed, 18 Mar 2026 11:19:21 +0100 Subject: [PATCH] SERVER-111575 Improve debuggability of the shard registry (#47400) GitOrigin-RevId: a8dc6a7ad58196a5762e9a1bf51ba7b5ff37f7a5 --- .../get_shard_version_latest_cached.js | 77 ++++++++ .../routing_cache/catalog_cache.cpp | 8 + .../router_role/routing_cache/catalog_cache.h | 12 ++ .../sharding_environment/cluster_commands.idl | 8 + src/mongo/db/topology/shard_registry.cpp | 1 + src/mongo/db/topology/shard_registry_test.cpp | 45 +++++ .../catalog_cache_diagnostics_helpers.cpp | 184 ++++++++++++++++++ .../catalog_cache_diagnostics_helpers.h | 57 ++++++ .../cluster_get_database_version_cmd.cpp | 18 +- .../cluster_get_shard_version_cmd.cpp | 63 +----- .../get_database_version.idl | 4 + .../get_database_version_command.cpp | 62 +++--- .../get_shard_version_command.cpp | 108 +++++----- src/mongo/s/commands/BUILD.bazel | 2 + 14 files changed, 522 insertions(+), 127 deletions(-) create mode 100644 jstests/noPassthrough/versioning_protocol/get_shard_version_latest_cached.js create mode 100644 src/mongo/db/versioning_protocol/catalog_cache_diagnostics_helpers.cpp create mode 100644 src/mongo/db/versioning_protocol/catalog_cache_diagnostics_helpers.h diff --git a/jstests/noPassthrough/versioning_protocol/get_shard_version_latest_cached.js b/jstests/noPassthrough/versioning_protocol/get_shard_version_latest_cached.js new file mode 100644 index 00000000000..86f7e257f2f --- /dev/null +++ b/jstests/noPassthrough/versioning_protocol/get_shard_version_latest_cached.js @@ -0,0 +1,77 @@ +/** + * Test that getShardVersion and getDatabaseVersion correctly support the "latestCached" option, + * returning cached information without triggering a refresh and including timeInStore. + */ +import {ShardingTest} from "jstests/libs/shardingtest.js"; +import {after, before, describe, it} from "jstests/libs/mochalite.js"; + +describe("getShardVersion and getDatabaseVersion latestCached", function () { + let st, dbName, collName, ns, shardConn; + + before(function () { + st = new ShardingTest({shards: 1}); + dbName = jsTestName(); + collName = "foo"; + ns = dbName + "." + collName; + shardConn = st.rs0.getPrimary(); + + assert.commandWorked(st.s.adminCommand({enablesharding: dbName})); + assert.commandWorked(st.s.adminCommand({shardCollection: ns, key: {x: 1}})); + assert.commandWorked(st.s.getDB(dbName).getCollection(collName).insert({x: 10})); + }); + + after(function () { + st.stop(); + }); + + describe("mongos", function () { + it("with latestCached: true returns timeInStore and same version", function () { + const latestCachedRes = assert.commandWorked(st.s.adminCommand({getShardVersion: ns, latestCached: true})); + assert.neq(undefined, latestCachedRes.timeInStore); + assert.eq(undefined, latestCachedRes.chunks); + }); + + it("with latestCached and fullMetadata returns chunks and timeInStore", function () { + const latestCachedRes = assert.commandWorked( + st.s.adminCommand({getShardVersion: ns, latestCached: true, fullMetadata: true}), + ); + assert.neq(undefined, latestCachedRes.timeInStore); + assert.eq(1, latestCachedRes.chunks.length); + assert.eq(latestCachedRes.chunks[0][0].x, MinKey); + assert.eq(latestCachedRes.chunks[0][1].x, MaxKey); + }); + + it("with latestCached for database returns primaryShard, dbVersion, timeInStore", function () { + const dbOnlyRes = assert.commandWorked(st.s.adminCommand({getDatabaseVersion: dbName, latestCached: true})); + assert.neq(undefined, dbOnlyRes.primaryShard); + assert.neq(undefined, dbOnlyRes.dbVersion); + assert.neq(undefined, dbOnlyRes.timeInStore); + }); + }); + + describe("mongod", function () { + it("with latestCached: true returns routing info with timeInStore", function () { + const shardRes = assert.commandWorked(shardConn.adminCommand({getShardVersion: ns, latestCached: true})); + assert.neq(undefined, shardRes.version); + assert.neq(undefined, shardRes.versionEpoch); + assert.neq(undefined, shardRes.timeInStore); + }); + + it("with latestCached for database returns primaryShard, dbVersion, timeInStore", function () { + // Now that shards are authoritative for the database, the catalog cache rarely has the + // database metadata so the result is likely to be UNKNOWN. + const shardRes = assert.commandWorked( + shardConn.adminCommand({getDatabaseVersion: dbName, latestCached: true}), + ); + if (shardRes.global === "UNKNOWN") { + assert.eq(undefined, shardRes.primaryShard); + assert.eq(undefined, shardRes.dbVersion); + assert.eq(undefined, shardRes.timeInStore); + } else { + assert.neq(undefined, shardRes.primaryShard); + assert.neq(undefined, shardRes.dbVersion); + assert.neq(undefined, shardRes.timeInStore); + } + }); + }); +}); diff --git a/src/mongo/db/router_role/routing_cache/catalog_cache.cpp b/src/mongo/db/router_role/routing_cache/catalog_cache.cpp index 5c10ba3723e..e906a23d772 100644 --- a/src/mongo/db/router_role/routing_cache/catalog_cache.cpp +++ b/src/mongo/db/router_role/routing_cache/catalog_cache.cpp @@ -685,6 +685,14 @@ boost::optional CatalogCache::peekCollectionCacheVersion(const Nam } } +RoutingTableHistoryValueHandle CatalogCache::peekCollectionCacheEntry(const NamespaceString& nss) { + return _collectionCache.peekLatestCached(nss); +} + +DatabaseTypeValueHandle CatalogCache::peekDatabaseCacheEntry(const DatabaseName& dbName) { + return _databaseCache.peekLatestCached(dbName); +} + void CatalogCache::Stats::report(BSONObjBuilder* builder) const { builder->append("countStaleConfigErrors", countStaleConfigErrors.load()); diff --git a/src/mongo/db/router_role/routing_cache/catalog_cache.h b/src/mongo/db/router_role/routing_cache/catalog_cache.h index 2cefc842140..82d4b4a5c78 100644 --- a/src/mongo/db/router_role/routing_cache/catalog_cache.h +++ b/src/mongo/db/router_role/routing_cache/catalog_cache.h @@ -469,6 +469,18 @@ public: */ boost::optional peekCollectionCacheVersion(const NamespaceString& nss); + /** + * Returns the current value in the collection cache for the specified namespace. Never blocks + * waiting for a refresh. + */ + RoutingTableHistoryValueHandle peekCollectionCacheEntry(const NamespaceString& nss); + + /** + * Returns the current value in the database cache for the specified database name. Never blocks + * waiting for a refresh. + */ + DatabaseTypeValueHandle peekDatabaseCacheEntry(const DatabaseName& dbName); + private: class DatabaseCache : public DatabaseTypeCache { public: diff --git a/src/mongo/db/sharding_environment/cluster_commands.idl b/src/mongo/db/sharding_environment/cluster_commands.idl index 699452793bc..5b0445c016b 100644 --- a/src/mongo/db/sharding_environment/cluster_commands.idl +++ b/src/mongo/db/sharding_environment/cluster_commands.idl @@ -61,6 +61,10 @@ structs: dbVersion: description: Version known by the node for the given database. type: database_version + timeInStore: + description: Time in the catalog cache for the given information. + type: string + optional: true commands: createDatabase: @@ -155,3 +159,7 @@ commands: type: database_name api_version: strict: false + fields: + latestCached: + type: bool + default: false diff --git a/src/mongo/db/topology/shard_registry.cpp b/src/mongo/db/topology/shard_registry.cpp index f6f62f8fc7d..02598879319 100644 --- a/src/mongo/db/topology/shard_registry.cpp +++ b/src/mongo/db/topology/shard_registry.cpp @@ -575,6 +575,7 @@ void ShardRegistry::toBSON(BSONObjBuilder* result) const { BSONObjBuilder connStrings; if (auto data = _getCachedData()) { data->toBSON(&map, &hosts, &connStrings); + result->append("timeInStore", data.getTime().toString()); } { stdx::lock_guard lk(_mutex); diff --git a/src/mongo/db/topology/shard_registry_test.cpp b/src/mongo/db/topology/shard_registry_test.cpp index cf0aff9fdbe..ab9ab26a29b 100644 --- a/src/mongo/db/topology/shard_registry_test.cpp +++ b/src/mongo/db/topology/shard_registry_test.cpp @@ -506,5 +506,50 @@ TEST_F(ShardRegistryTest, FlushShardRegistryReloadForRecovery) { << "host3 should now belong to shard2 after reassignment"; } +TEST_F(ShardRegistryTest, toBSONEmptyRegistry) { + reloadAndWait(); + + BSONObjBuilder builder; + shardRegistry()->toBSON(&builder); + auto result = builder.obj(); + + ASSERT_TRUE(result.hasField("map")); + ASSERT_TRUE(result.hasField("hosts")); + ASSERT_TRUE(result.hasField("connStrings")); + auto map = result["map"].Obj(); + // With no shards, the map should only contain the config shard + ASSERT_EQ(map.nFields(), 1); + + ASSERT_TRUE(result.hasField("timeInStore")); + std::string timeInStoreStr = result["timeInStore"].String(); + ASSERT_TRUE(timeInStoreStr.find("forceReloadIncrement") != std::string::npos); + ASSERT_TRUE(timeInStoreStr.find("topologyTime") != std::string::npos); +} + +TEST_F(ShardRegistryTest, toBSONWithShards) { + addShard({"shard0"}, kAdvanceTopologyTime); + addShard({"shard1"}, kAdvanceTopologyTime); + + auto future = launchAsync([this] { assertShardIdsFromRegistry(getData()->getAllShardIds()); }); + expectCSRSLookup(); + future.default_timed_get(); + + BSONObjBuilder builder; + shardRegistry()->toBSON(&builder); + auto result = builder.obj(); + + ASSERT_TRUE(result.hasField("map")); + ASSERT_TRUE(result.hasField("hosts")); + ASSERT_TRUE(result.hasField("connStrings")); + auto map = result["map"].Obj(); + ASSERT_GTE(map.nFields(), 2); + ASSERT_TRUE(map.hasField("shard0")); + ASSERT_TRUE(map.hasField("shard1")); + + ASSERT_TRUE(result.hasField("timeInStore")); + std::string timeInStoreStr = result["timeInStore"].String(); + ASSERT_TRUE(timeInStoreStr.find("topologyTime") != std::string::npos); +} + } // namespace } // namespace mongo diff --git a/src/mongo/db/versioning_protocol/catalog_cache_diagnostics_helpers.cpp b/src/mongo/db/versioning_protocol/catalog_cache_diagnostics_helpers.cpp new file mode 100644 index 00000000000..3ea37e2cc53 --- /dev/null +++ b/src/mongo/db/versioning_protocol/catalog_cache_diagnostics_helpers.cpp @@ -0,0 +1,184 @@ +/** + * Copyright (C) 2026-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/versioning_protocol/catalog_cache_diagnostics_helpers.h" + +#include "mongo/base/error_codes.h" +#include "mongo/base/status.h" +#include "mongo/base/string_data.h" +#include "mongo/bson/bsonelement.h" +#include "mongo/bson/bsonmisc.h" +#include "mongo/bson/bsonobj.h" +#include "mongo/bson/bsontypes.h" +#include "mongo/bson/util/builder.h" +#include "mongo/db/auth/action_type.h" +#include "mongo/db/auth/authorization_session.h" +#include "mongo/db/auth/resource_pattern.h" +#include "mongo/db/commands.h" +#include "mongo/db/database_name.h" +#include "mongo/db/global_catalog/chunk_manager.h" +#include "mongo/db/global_catalog/type_database_gen.h" +#include "mongo/db/namespace_string.h" +#include "mongo/db/service_context.h" +#include "mongo/db/sharding_environment/grid.h" +#include "mongo/db/sharding_environment/shard_id.h" +#include "mongo/db/versioning_protocol/chunk_version.h" +#include "mongo/db/versioning_protocol/database_version.h" +#include "mongo/util/assert_util.h" +#include "mongo/util/namespace_string_util.h" +#include "mongo/util/read_through_cache.h" +#include "mongo/util/str.h" + +#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding + +namespace mongo { +namespace catalog_cache_diagnostics_helpers { +namespace { + +void appendChunkManagerInfo(OperationContext* opCtx, + BSONObjBuilder* builder, + const NamespaceString& nss, + const ChunkManager& cm, + bool fullMetadata) { + uassert(ErrorCodes::NamespaceNotFound, + str::stream() << "Collection " << nss.toStringForErrorMsg() + << " does not have a routing table.", + cm.hasRoutingTable()); + + builder->appendTimestamp("version", cm.getVersion().toLong()); + builder->append("versionEpoch", cm.getVersion().epoch()); + builder->append("versionTimestamp", cm.getVersion().getTimestamp()); + + // Added to the builder bson if the max bson size is exceeded + BSONObjBuilder exceededSizeElt(BSON("exceededSize" << true)); + + if (fullMetadata) { + BSONArrayBuilder chunksArrBuilder; + bool exceedsSizeLimit = false; + + LOGV2(22753, + "Routing info requested by getShardVersion", + "routingInfo"_attr = redact(cm.toString())); + + cm.forEachChunk([&](const auto& chunk) { + if (!exceedsSizeLimit) { + BSONArrayBuilder chunkBB(chunksArrBuilder.subarrayStart()); + chunkBB.append(chunk.getMin()); + chunkBB.append(chunk.getMax()); + chunkBB.done(); + if (chunksArrBuilder.len() + builder->len() + exceededSizeElt.len() > + BSONObjMaxUserSize) { + exceedsSizeLimit = true; + } + } + + return true; + }); + + if (!exceedsSizeLimit) { + builder->append("chunks", chunksArrBuilder.arr()); + } + + if (exceedsSizeLimit) { + builder->appendElements(exceededSizeElt.done()); + } + } +} + +} // namespace + +void appendWhenUnknown(BSONObjBuilder* builder, bool fullMetadata) { + builder->append("global", "UNKNOWN"); + if (fullMetadata) { + builder->append("metadata", BSONObj()); + } +} + +void appendCatalogCacheInfo(OperationContext* opCtx, + BSONObjBuilder* builder, + const NamespaceString& nss, + bool fullMetadata) { + const auto catalogCache = Grid::get(opCtx)->catalogCache(); + // The ability to append this dbInfo is left to preserve backward compatibility, but in the + // future we should prefer using getShardVersion only for collections and getDatabaseVersion for + // databases. + if (nss.isDbOnly()) { + auto cachedDbInfo = uassertStatusOK(catalogCache->getDatabase(opCtx, nss.dbName())); + builder->append("primaryShard", cachedDbInfo->getPrimary().toString()); + builder->append("version", cachedDbInfo->getVersion().toBSON()); + } else { + const auto cri = uassertStatusOK(catalogCache->getCollectionRoutingInfo(opCtx, nss)); + appendChunkManagerInfo(opCtx, builder, nss, cri.getChunkManager(), fullMetadata); + } +} + +void appendLatestCachedCollInfo(OperationContext* opCtx, + BSONObjBuilder* builder, + const NamespaceString& nss, + bool fullMetadata) { + const auto catalogCache = Grid::get(opCtx)->catalogCache(); + uassert(ErrorCodes::InvalidOptions, + "Cannot call getShardVersion with only a dbName. Please use getDatabaseVersion for " + "database information", + !nss.isDbOnly()); + const auto cachedCollInfo = catalogCache->peekCollectionCacheEntry(nss); + if (!cachedCollInfo) { + appendWhenUnknown(builder, fullMetadata); + return; + } + builder->append("timeInStore", cachedCollInfo.getTime().toString()); + try { + appendChunkManagerInfo( + opCtx, builder, nss, CurrentChunkManager(cachedCollInfo), fullMetadata); + } catch (const ExceptionFor&) { + // For unsharded collections, we still append the timeInStore and unsharded version. + const auto& unshardedVersion = ChunkVersion::UNTRACKED(); + builder->appendTimestamp("version", unshardedVersion.toLong()); + builder->append("versionEpoch", unshardedVersion.epoch()); + builder->append("versionTimestamp", unshardedVersion.getTimestamp()); + } +} + +void appendLatestCachedDbInfo(OperationContext* opCtx, + BSONObjBuilder* builder, + const DatabaseName& dbName) { + const auto catalogCache = Grid::get(opCtx)->catalogCache(); + const auto cachedDbInfo = catalogCache->peekDatabaseCacheEntry(dbName); + if (!cachedDbInfo) { + appendWhenUnknown(builder, false /* fullMetadata */); + return; + } + builder->append("dbVersion", cachedDbInfo->getVersion().toBSON()); + builder->append("primaryShard", cachedDbInfo->getPrimary()); + builder->append("timeInStore", cachedDbInfo.getTime().toString()); +} + +} // namespace catalog_cache_diagnostics_helpers + +} // namespace mongo diff --git a/src/mongo/db/versioning_protocol/catalog_cache_diagnostics_helpers.h b/src/mongo/db/versioning_protocol/catalog_cache_diagnostics_helpers.h new file mode 100644 index 00000000000..6d2545cc385 --- /dev/null +++ b/src/mongo/db/versioning_protocol/catalog_cache_diagnostics_helpers.h @@ -0,0 +1,57 @@ +/** + * Copyright (C) 2026-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/bson/bsonobjbuilder.h" +#include "mongo/db/namespace_string.h" +#include "mongo/db/operation_context.h" + +namespace mongo { + +namespace catalog_cache_diagnostics_helpers { + +void appendWhenUnknown(BSONObjBuilder* builder, bool fullMetadata); + +void appendCatalogCacheInfo(OperationContext* opCtx, + BSONObjBuilder* builder, + const NamespaceString& nss, + bool fullMetadata); + + +void appendLatestCachedCollInfo(OperationContext* opCtx, + BSONObjBuilder* builder, + const NamespaceString& nss, + bool fullMetadata); + +void appendLatestCachedDbInfo(OperationContext* opCtx, + BSONObjBuilder* builder, + const DatabaseName& dbName); + +} // namespace catalog_cache_diagnostics_helpers + +} // namespace mongo diff --git a/src/mongo/db/versioning_protocol/cluster_get_database_version_cmd.cpp b/src/mongo/db/versioning_protocol/cluster_get_database_version_cmd.cpp index 55875afd7cc..d183ec367c9 100644 --- a/src/mongo/db/versioning_protocol/cluster_get_database_version_cmd.cpp +++ b/src/mongo/db/versioning_protocol/cluster_get_database_version_cmd.cpp @@ -31,6 +31,7 @@ #include "mongo/db/commands.h" #include "mongo/db/sharding_environment/cluster_commands_gen.h" #include "mongo/db/sharding_environment/grid.h" +#include "mongo/db/versioning_protocol/catalog_cache_diagnostics_helpers.h" #include "mongo/logv2/log.h" #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kCommand @@ -49,9 +50,20 @@ public: using InvocationBase::InvocationBase; Response typedRun(OperationContext* opCtx) { - auto catalogCache = Grid::get(opCtx)->catalogCache(); - const auto dbInfo = uassertStatusOK(catalogCache->getDatabase(opCtx, ns().dbName())); - return {dbInfo->getPrimary(), dbInfo->getVersion()}; + if (request().getLatestCached()) { + BSONObjBuilder responseBuilder; + catalog_cache_diagnostics_helpers::appendLatestCachedDbInfo( + opCtx, &responseBuilder, ns().dbName()); + uassert(ErrorCodes::NamespaceNotFound, + "Database not found in the catalog cache", + !responseBuilder.hasField("global")); + return Response::parse(responseBuilder.obj()); + } else { + auto catalogCache = Grid::get(opCtx)->catalogCache(); + const auto dbInfo = + uassertStatusOK(catalogCache->getDatabase(opCtx, ns().dbName())); + return {dbInfo->getPrimary(), dbInfo->getVersion()}; + } } private: diff --git a/src/mongo/db/versioning_protocol/cluster_get_shard_version_cmd.cpp b/src/mongo/db/versioning_protocol/cluster_get_shard_version_cmd.cpp index a438d5dbab6..5e2091408cc 100644 --- a/src/mongo/db/versioning_protocol/cluster_get_shard_version_cmd.cpp +++ b/src/mongo/db/versioning_protocol/cluster_get_shard_version_cmd.cpp @@ -27,7 +27,6 @@ * it in the license file. */ - #include "mongo/base/error_codes.h" #include "mongo/base/status.h" #include "mongo/base/string_data.h" @@ -50,6 +49,7 @@ #include "mongo/db/service_context.h" #include "mongo/db/sharding_environment/grid.h" #include "mongo/db/sharding_environment/shard_id.h" +#include "mongo/db/versioning_protocol/catalog_cache_diagnostics_helpers.h" #include "mongo/db/versioning_protocol/chunk_version.h" #include "mongo/db/versioning_protocol/database_version.h" #include "mongo/logv2/log.h" @@ -115,59 +115,16 @@ public: const BSONObj& cmdObj, BSONObjBuilder& result) override { const NamespaceString nss(parseNs(dbName, cmdObj)); - const auto catalogCache = Grid::get(opCtx)->catalogCache(); - - if (nss.coll().empty()) { - // Return the database's information. - auto cachedDbInfo = uassertStatusOK(catalogCache->getDatabase(opCtx, nss.dbName())); - result.append("primaryShard", cachedDbInfo->getPrimary().toString()); - result.append("version", cachedDbInfo->getVersion().toBSON()); + bool fullMetadata = cmdObj["fullMetadata"].trueValue(); + // On a router, we expose two options. By default, we get the cached information in a way + // which may trigger a refresh. Alternatively, providing the "latestCached" option will + // return whatever information is currently in the catalog cache. + if (cmdObj["latestCached"].trueValue()) { + catalog_cache_diagnostics_helpers::appendLatestCachedCollInfo( + opCtx, &result, nss, fullMetadata); } else { - // Return the collection's information. - const auto cri = uassertStatusOK(catalogCache->getCollectionRoutingInfo(opCtx, nss)); - const auto& cm = cri.getChunkManager(); - uassert(ErrorCodes::NamespaceNotFound, - str::stream() << "Collection " << nss.toStringForErrorMsg() - << " does not have a routing table.", - cm.hasRoutingTable()); - - result.appendTimestamp("version", cm.getVersion().toLong()); - result.append("versionEpoch", cm.getVersion().epoch()); - result.append("versionTimestamp", cm.getVersion().getTimestamp()); - // Added to the result bson if the max bson size is exceeded - BSONObjBuilder exceededSizeElt(BSON("exceededSize" << true)); - - if (cmdObj["fullMetadata"].trueValue()) { - BSONArrayBuilder chunksArrBuilder; - bool exceedsSizeLimit = false; - - LOGV2(22753, - "Routing info requested by getShardVersion", - "routingInfo"_attr = redact(cm.toString())); - - cm.forEachChunk([&](const auto& chunk) { - if (!exceedsSizeLimit) { - BSONArrayBuilder chunkBB(chunksArrBuilder.subarrayStart()); - chunkBB.append(chunk.getMin()); - chunkBB.append(chunk.getMax()); - chunkBB.done(); - if (chunksArrBuilder.len() + result.len() + exceededSizeElt.len() > - BSONObjMaxUserSize) { - exceedsSizeLimit = true; - } - } - - return true; - }); - - if (!exceedsSizeLimit) { - result.append("chunks", chunksArrBuilder.arr()); - } - - if (exceedsSizeLimit) { - result.appendElements(exceededSizeElt.done()); - } - } + catalog_cache_diagnostics_helpers::appendCatalogCacheInfo( + opCtx, &result, nss, fullMetadata); } return true; diff --git a/src/mongo/db/versioning_protocol/get_database_version.idl b/src/mongo/db/versioning_protocol/get_database_version.idl index 9f8aac86c4d..76f6063d46f 100644 --- a/src/mongo/db/versioning_protocol/get_database_version.idl +++ b/src/mongo/db/versioning_protocol/get_database_version.idl @@ -42,3 +42,7 @@ commands: namespace: type api_version: "" type: string + fields: + latestCached: + type: bool + default: false diff --git a/src/mongo/db/versioning_protocol/get_database_version_command.cpp b/src/mongo/db/versioning_protocol/get_database_version_command.cpp index f707405d883..fd88441235b 100644 --- a/src/mongo/db/versioning_protocol/get_database_version_command.cpp +++ b/src/mongo/db/versioning_protocol/get_database_version_command.cpp @@ -45,6 +45,7 @@ #include "mongo/db/shard_role/shard_catalog/database_sharding_runtime.h" #include "mongo/db/topology/cluster_role.h" #include "mongo/db/topology/sharding_state.h" +#include "mongo/db/versioning_protocol/catalog_cache_diagnostics_helpers.h" #include "mongo/db/versioning_protocol/database_version.h" #include "mongo/db/versioning_protocol/get_database_version_gen.h" #include "mongo/rpc/op_msg.h" @@ -64,6 +65,37 @@ namespace mongo { +namespace { + +void appendFilteringMetadataCacheInfo(OperationContext* opCtx, + rpc::ReplyBuilderInterface* result, + const DatabaseName& dbName) { + auto [dbPrimaryShard, dbVersion] = [&] { + const auto scopedDsr = DatabaseShardingRuntime::acquireShared(opCtx, dbName); + + // GetDatabaseVersion command can bypass the critical section to read database + // metadata as it is a command used for troubleshooting and inspect the insights of + // the DatabaseShardingRuntime. + BypassDatabaseMetadataAccess bypassDbMetadataAccess( + opCtx, BypassDatabaseMetadataAccess::Type::kReadOnly); // NOLINT + + return std::make_pair(scopedDsr->getDbPrimaryShard(opCtx), scopedDsr->getDbVersion(opCtx)); + }(); + + if (!dbVersion) { + result->getBodyBuilder().append("dbVersion", BSONObj()); + return; + } + + result->getBodyBuilder().append("dbVersion", dbVersion->toBSON()); + + if (dbPrimaryShard && ShardingState::get(opCtx)->shardId() == *dbPrimaryShard) { + result->getBodyBuilder().append("isPrimaryShardForDb", true); + } +} + +} // namespace + class GetDatabaseVersionCmd final : public TypedCommand { public: using Request = GetDatabaseVersion; @@ -97,29 +129,13 @@ public: uassert(ErrorCodes::IllegalOperation, str::stream() << definition()->getName() << " can only be run on shard servers", serverGlobalParams.clusterRole.has(ClusterRole::ShardServer)); - - auto [dbPrimaryShard, dbVersion] = [&] { - const auto scopedDsr = DatabaseShardingRuntime::acquireShared(opCtx, _targetDb()); - - // GetDatabaseVersion command can bypass the critical section to read database - // metadata as it is a command used for troubleshooting and inspect the insights of - // the DatabaseShardingRuntime. - BypassDatabaseMetadataAccess bypassDbMetadataAccess( - opCtx, BypassDatabaseMetadataAccess::Type::kReadOnly); // NOLINT - - return std::make_pair(scopedDsr->getDbPrimaryShard(opCtx), - scopedDsr->getDbVersion(opCtx)); - }(); - - if (!dbVersion) { - result->getBodyBuilder().append("dbVersion", BSONObj()); - return; - } - - result->getBodyBuilder().append("dbVersion", dbVersion->toBSON()); - - if (dbPrimaryShard && ShardingState::get(opCtx)->shardId() == *dbPrimaryShard) { - result->getBodyBuilder().append("isPrimaryShardForDb", true); + if (request().getLatestCached()) { + auto builder = result->getBodyBuilder(); + catalog_cache_diagnostics_helpers::appendLatestCachedDbInfo( + opCtx, &builder, _targetDb()); + builder.done(); + } else { + appendFilteringMetadataCacheInfo(opCtx, result, _targetDb()); } } diff --git a/src/mongo/db/versioning_protocol/get_shard_version_command.cpp b/src/mongo/db/versioning_protocol/get_shard_version_command.cpp index b9ae78df279..ffc52cbb257 100644 --- a/src/mongo/db/versioning_protocol/get_shard_version_command.cpp +++ b/src/mongo/db/versioning_protocol/get_shard_version_command.cpp @@ -53,6 +53,7 @@ #include "mongo/db/sharding_environment/grid.h" #include "mongo/db/topology/shard_registry.h" #include "mongo/db/topology/sharding_state.h" +#include "mongo/db/versioning_protocol/catalog_cache_diagnostics_helpers.h" #include "mongo/db/versioning_protocol/chunk_version.h" #include "mongo/util/assert_util.h" #include "mongo/util/namespace_string_util.h" @@ -69,6 +70,58 @@ namespace mongo { namespace { +void appendFilteringMetadataCacheInfo(OperationContext* opCtx, + BSONObjBuilder* builder, + const NamespaceString& nss, + bool fullMetadata) { + builder->append( + "configServer", + Grid::get(opCtx)->shardRegistry()->getConfigServerConnectionString().toString()); + + AutoGetCollection autoColl( + opCtx, + nss, + MODE_IS, + auto_get_collection::Options{}.viewMode(auto_get_collection::ViewMode::kViewsPermitted)); + const auto scopedCsr = + CollectionShardingRuntime::assertCollectionLockedAndAcquireShared(opCtx, nss); + + auto optMetadata = scopedCsr->getCurrentMetadataIfKnown(); + if (!optMetadata) { + catalog_cache_diagnostics_helpers::appendWhenUnknown(builder, fullMetadata); + } else { + const auto& metadata = *optMetadata; + builder->appendTimestamp("global", metadata.getShardPlacementVersion().toLong()); + + if (fullMetadata) { + BSONObjBuilder metadataBuilder(builder->subobjStart("metadata")); + if (metadata.isSharded()) { + metadataBuilder.appendTimestamp("collVersion", + metadata.getCollPlacementVersion().toLong()); + metadataBuilder.append("collVersionEpoch", + metadata.getCollPlacementVersion().epoch()); + metadataBuilder.append("collVersionTimestamp", + metadata.getCollPlacementVersion().getTimestamp()); + + metadataBuilder.appendTimestamp( + "shardVersion", metadata.getShardPlacementVersionForLogging().toLong()); + metadataBuilder.append("shardVersionEpoch", + metadata.getShardPlacementVersionForLogging().epoch()); + metadataBuilder.append( + "shardVersionTimestamp", + metadata.getShardPlacementVersionForLogging().getTimestamp()); + + metadataBuilder.append("keyPattern", metadata.getShardKeyPattern().toBSON()); + + BSONArrayBuilder chunksArr(metadataBuilder.subarrayStart("chunks")); + metadata.toBSONChunks(&chunksArr); + chunksArr.doneFast(); + } + metadataBuilder.doneFast(); + } + } +} + class GetShardVersion : public BasicCommand { public: GetShardVersion() : BasicCommand("getShardVersion") {} @@ -114,56 +167,15 @@ public: const NamespaceString nss(parseNs(dbName, cmdObj)); ShardingState::get(opCtx)->assertCanAcceptShardedCommands(); + bool fullMetadata = cmdObj["fullMetadata"].trueValue(); - result.append( - "configServer", - Grid::get(opCtx)->shardRegistry()->getConfigServerConnectionString().toString()); - - AutoGetCollection autoColl(opCtx, - nss, - MODE_IS, - auto_get_collection::Options{}.viewMode( - auto_get_collection::ViewMode::kViewsPermitted)); - const auto scopedCsr = - CollectionShardingRuntime::assertCollectionLockedAndAcquireShared(opCtx, nss); - - auto optMetadata = scopedCsr->getCurrentMetadataIfKnown(); - if (!optMetadata) { - result.append("global", "UNKNOWN"); - - if (cmdObj["fullMetadata"].trueValue()) { - result.append("metadata", BSONObj()); - } + // On shard servers we can dump either the routing info or the filtering info, controlled + // via the "latestCached" argument. + if (cmdObj["latestCached"].trueValue()) { + catalog_cache_diagnostics_helpers::appendLatestCachedCollInfo( + opCtx, &result, nss, fullMetadata); } else { - const auto& metadata = *optMetadata; - result.appendTimestamp("global", metadata.getShardPlacementVersion().toLong()); - - if (cmdObj["fullMetadata"].trueValue()) { - BSONObjBuilder metadataBuilder(result.subobjStart("metadata")); - if (metadata.isSharded()) { - metadataBuilder.appendTimestamp("collVersion", - metadata.getCollPlacementVersion().toLong()); - metadataBuilder.append("collVersionEpoch", - metadata.getCollPlacementVersion().epoch()); - metadataBuilder.append("collVersionTimestamp", - metadata.getCollPlacementVersion().getTimestamp()); - - metadataBuilder.appendTimestamp( - "shardVersion", metadata.getShardPlacementVersionForLogging().toLong()); - metadataBuilder.append("shardVersionEpoch", - metadata.getShardPlacementVersionForLogging().epoch()); - metadataBuilder.append( - "shardVersionTimestamp", - metadata.getShardPlacementVersionForLogging().getTimestamp()); - - metadataBuilder.append("keyPattern", metadata.getShardKeyPattern().toBSON()); - - BSONArrayBuilder chunksArr(metadataBuilder.subarrayStart("chunks")); - metadata.toBSONChunks(&chunksArr); - chunksArr.doneFast(); - } - metadataBuilder.doneFast(); - } + appendFilteringMetadataCacheInfo(opCtx, &result, nss, fullMetadata); } return true; diff --git a/src/mongo/s/commands/BUILD.bazel b/src/mongo/s/commands/BUILD.bazel index 8479392910f..da629a179f7 100644 --- a/src/mongo/s/commands/BUILD.bazel +++ b/src/mongo/s/commands/BUILD.bazel @@ -45,6 +45,7 @@ mongo_cc_library( srcs = [ "document_shard_key_update_util.cpp", "strategy.cpp", + "//src/mongo/db/versioning_protocol:catalog_cache_diagnostics_helpers.cpp", "//src/mongo/s/commands/query_cmd:cluster_explain.cpp", "//src/mongo/s/commands/query_cmd:cluster_write_cmd.cpp", ], @@ -66,6 +67,7 @@ mongo_cc_library( "//src/mongo/otel/traces:tracing", "//src/mongo/rpc:metadata", "//src/mongo/rpc:rewrite_state_change_errors", + "//src/mongo/s:grid", "//src/mongo/s:load_balancer_support", "//src/mongo/s:mongos_topology_coordinator", "//src/mongo/s:sharding_api",