SERVER-123720 SERVER-124026 SERVER-125698 Extend metadata consistency checks to chunkless collections (#53578)
Co-authored-by: Pol Pinol Castuera <pol.pinol@mongodb.com> GitOrigin-RevId: 584457aaa84fa537c30dfd54906dfe7fd8b39aa7
This commit is contained in:
parent
676a130dbc
commit
9db5b85185
@ -65,6 +65,7 @@
|
||||
#include "mongo/db/shard_role/shard_catalog/catalog_raii.h"
|
||||
#include "mongo/db/shard_role/shard_catalog/collection_metadata.h"
|
||||
#include "mongo/db/shard_role/shard_catalog/collection_sharding_runtime.h"
|
||||
#include "mongo/db/shard_role/shard_catalog/commit_collection_metadata_locally.h"
|
||||
#include "mongo/db/shard_role/shard_catalog/database_sharding_runtime.h"
|
||||
#include "mongo/db/sharding_environment/grid.h"
|
||||
#include "mongo/db/sharding_environment/sharding_feature_flags_gen.h"
|
||||
@ -232,6 +233,7 @@ boost::optional<CollectionType> getCollectionFromDurableShardCatalog(
|
||||
OperationContext* opCtx,
|
||||
const NamespaceString& nss,
|
||||
const UUID& uuid,
|
||||
bool isInconsistencyIfNone,
|
||||
std::vector<MetadataInconsistencyItem>& inconsistencies) {
|
||||
boost::optional<CollectionType> collectionInShardCatalog;
|
||||
|
||||
@ -247,13 +249,15 @@ boost::optional<CollectionType> getCollectionFromDurableShardCatalog(
|
||||
cursor);
|
||||
|
||||
if (!cursor->more()) {
|
||||
inconsistencies.emplace_back(makeInconsistency(
|
||||
MetadataInconsistencyTypeEnum::kInconsistentShardCatalogCollectionMetadata,
|
||||
InconsistentShardCatalogCollectionMetadataDetails{
|
||||
nss,
|
||||
uuid,
|
||||
BSON("reason" << "Collection entry not found in the durable shard catalog "
|
||||
"(config.shard.catalog.collections)")}));
|
||||
if (isInconsistencyIfNone) {
|
||||
inconsistencies.emplace_back(makeInconsistency(
|
||||
MetadataInconsistencyTypeEnum::kInconsistentShardCatalogCollectionMetadata,
|
||||
InconsistentShardCatalogCollectionMetadataDetails{
|
||||
nss,
|
||||
uuid,
|
||||
BSON("reason" << "Collection entry not found in the durable shard catalog "
|
||||
"(config.shard.catalog.collections)")}));
|
||||
}
|
||||
return boost::none;
|
||||
}
|
||||
|
||||
@ -270,10 +274,37 @@ boost::optional<CollectionType> getCollectionFromDurableShardCatalog(
|
||||
return collectionInShardCatalog;
|
||||
}
|
||||
|
||||
bool hasChunksFromDurableShardCatalog(OperationContext* opCtx,
|
||||
const NamespaceString& nss,
|
||||
const UUID& uuid,
|
||||
const ShardId& shardId,
|
||||
std::vector<MetadataInconsistencyItem>& inconsistencies) {
|
||||
ScopedReadConcern scopedReadConcern(
|
||||
opCtx, repl::ReadConcernArgs(repl::ReadConcernLevel::kSnapshotReadConcern));
|
||||
|
||||
std::vector<ChunkType> chunks;
|
||||
|
||||
try {
|
||||
DBDirectClient client(opCtx);
|
||||
FindCommandRequest chunkFindOp{NamespaceString::kConfigShardCatalogChunksNamespace};
|
||||
chunkFindOp.setFilter(BSON(ChunkType::collectionUUID() << uuid));
|
||||
chunkFindOp.setSort(BSON(ChunkType::min() << 1));
|
||||
auto chunkCursor = client.find(std::move(chunkFindOp));
|
||||
return chunkCursor->more();
|
||||
} catch (const DBException& ex) {
|
||||
inconsistencies.emplace_back(makeInconsistency(
|
||||
MetadataInconsistencyTypeEnum::kInconsistentShardCatalogCollectionMetadata,
|
||||
InconsistentShardCatalogCollectionMetadataDetails{
|
||||
nss, uuid, BSON("reason" << ex.reason())}));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<ChunkType> getChunksFromDurableShardCatalog(
|
||||
OperationContext* opCtx,
|
||||
const CollectionType& coll,
|
||||
const ShardId& shardId,
|
||||
bool isInconsistencyIfEmpty,
|
||||
std::vector<MetadataInconsistencyItem>& inconsistencies) {
|
||||
ScopedReadConcern scopedReadConcern(
|
||||
opCtx, repl::ReadConcernArgs(repl::ReadConcernLevel::kSnapshotReadConcern));
|
||||
@ -299,7 +330,7 @@ std::vector<ChunkType> getChunksFromDurableShardCatalog(
|
||||
return {};
|
||||
}
|
||||
|
||||
if (chunks.empty()) {
|
||||
if (chunks.empty() && isInconsistencyIfEmpty) {
|
||||
inconsistencies.emplace_back(makeInconsistency(
|
||||
MetadataInconsistencyTypeEnum::kInconsistentShardCatalogCollectionMetadata,
|
||||
InconsistentShardCatalogCollectionMetadataDetails{
|
||||
@ -534,33 +565,123 @@ CollectionType toCollectionType(const CollectionMetadata& cm) {
|
||||
return result;
|
||||
};
|
||||
|
||||
void checkPrimaryOnlyAuthoritativeShardCatalogInvariants(
|
||||
OperationContext* opCtx,
|
||||
const NamespaceString& nss,
|
||||
const ShardId& shardId,
|
||||
const CollectionPtr& localCollectionPtr,
|
||||
const boost::optional<CollectionType>& collectionInGlobalCatalog,
|
||||
boost::optional<CollectionMetadata> inMemoryShardCatalogMetadata,
|
||||
std::vector<MetadataInconsistencyItem>& inconsistencies) {
|
||||
const auto scopedCsr = CollectionShardingRuntime::acquireShared(opCtx, nss);
|
||||
const auto localUuid = localCollectionPtr->uuid();
|
||||
const auto durableColl = getCollectionFromDurableShardCatalog(
|
||||
opCtx, nss, localUuid, false /* not an inconsistency if none */, inconsistencies);
|
||||
const auto hasDurableChunks =
|
||||
hasChunksFromDurableShardCatalog(opCtx, nss, localUuid, shardId, inconsistencies);
|
||||
|
||||
if (!collectionInGlobalCatalog) {
|
||||
if (durableColl) {
|
||||
inconsistencies.emplace_back(
|
||||
makeInconsistency(
|
||||
MetadataInconsistencyTypeEnum::kInconsistentShardCatalogCollectionMetadata,
|
||||
InconsistentShardCatalogCollectionMetadataDetails{
|
||||
nss,
|
||||
localUuid,
|
||||
BSON("field"
|
||||
<< "shardCatalogCollectionEntry"
|
||||
<< "source" << kDurableShardCatalogSourceScope << "reason"
|
||||
<< "Collection is untracked in the global catalog but a durable "
|
||||
"collection entry exists in config.shard.catalog.collections")}));
|
||||
}
|
||||
|
||||
if (hasDurableChunks) {
|
||||
inconsistencies.emplace_back(makeInconsistency(
|
||||
MetadataInconsistencyTypeEnum::kInconsistentShardCatalogCollectionMetadata,
|
||||
InconsistentShardCatalogCollectionMetadataDetails{
|
||||
nss,
|
||||
localUuid,
|
||||
BSON("field" << "shardCatalogChunkEntry"
|
||||
<< "source" << kDurableShardCatalogSourceScope << "reason"
|
||||
<< "Collection is untracked in the global catalog but a durable "
|
||||
"chunk entry exists in config.shard.catalog.chunks")}));
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
const bool primaryOwnsNoChunks = !inMemoryShardCatalogMetadata ||
|
||||
!inMemoryShardCatalogMetadata->hasRoutingTable() ||
|
||||
!inMemoryShardCatalogMetadata->currentShardHasAnyChunks();
|
||||
if (primaryOwnsNoChunks && !durableColl) {
|
||||
inconsistencies.emplace_back(makeInconsistency(
|
||||
MetadataInconsistencyTypeEnum::kInconsistentShardCatalogCollectionMetadata,
|
||||
InconsistentShardCatalogCollectionMetadataDetails{
|
||||
nss,
|
||||
collectionInGlobalCatalog->getUuid(),
|
||||
BSON("field"
|
||||
<< "shardCatalogCollectionEntry"
|
||||
<< "source" << kDurableShardCatalogSourceScope << "reason"
|
||||
<< "Tracked collection exists in the global catalog but a no collection entry "
|
||||
"exists in config.shard.catalog.chunks")}));
|
||||
}
|
||||
}
|
||||
|
||||
void checkCollectionMetadataInShardCatalog(
|
||||
OperationContext* opCtx,
|
||||
const NamespaceString& nss,
|
||||
const ShardId& shardId,
|
||||
bool isPrimary,
|
||||
const CollectionPtr& localCollectionPtr,
|
||||
const boost::optional<CollectionType> collectionInGlobalCatalog,
|
||||
std::vector<MetadataInconsistencyItem>& inconsistencies) {
|
||||
boost::optional<CollectionMetadata> inMemoryShardCatalogMetadata;
|
||||
ChunkVersion collectionPlacementVersion;
|
||||
bool isShardCatalogAuthoritative = false;
|
||||
|
||||
|
||||
// Optimistic approach to avoid holding the CSR lock during the remote call to fetch information
|
||||
// about the global catalog: snapshot the shard catalog metadata and its placement version under
|
||||
// the CSR lock, release it, perform the remote reads, then re-acquire the lock and verify the
|
||||
// placement version hasn't changed.
|
||||
{
|
||||
auto shouldReturnEarly = [&]() -> bool {
|
||||
const auto scopedCsr = CollectionShardingRuntime::acquireShared(opCtx, nss);
|
||||
|
||||
isShardCatalogAuthoritative =
|
||||
feature_flags::gAuthoritativeShardsCRUD.isEnabled(
|
||||
VersionContext::getDecoration(opCtx),
|
||||
serverGlobalParams.featureCompatibility.acquireFCVSnapshot()) &&
|
||||
scopedCsr->getAuthoritativeState() ==
|
||||
CollectionShardingRuntime::AuthoritativeState::kAuthoritative;
|
||||
|
||||
inMemoryShardCatalogMetadata = scopedCsr->getCurrentMetadataIfKnown();
|
||||
|
||||
if (!inMemoryShardCatalogMetadata) {
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (scopedCsr->getCriticalSectionSignal(ShardingMigrationCriticalSection::kWrite)) {
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
|
||||
collectionPlacementVersion = inMemoryShardCatalogMetadata->getCollPlacementVersion();
|
||||
return false;
|
||||
};
|
||||
|
||||
auto isInMemoryEmpty = shouldReturnEarly();
|
||||
|
||||
if (isPrimary && isShardCatalogAuthoritative) {
|
||||
checkPrimaryOnlyAuthoritativeShardCatalogInvariants(opCtx,
|
||||
nss,
|
||||
shardId,
|
||||
localCollectionPtr,
|
||||
collectionInGlobalCatalog,
|
||||
inMemoryShardCatalogMetadata,
|
||||
inconsistencies);
|
||||
}
|
||||
|
||||
if (isInMemoryEmpty) {
|
||||
return;
|
||||
}
|
||||
|
||||
bool expectTracked = collectionInGlobalCatalog.has_value();
|
||||
@ -579,6 +700,8 @@ void checkCollectionMetadataInShardCatalog(
|
||||
return;
|
||||
}
|
||||
|
||||
const auto scopedCsr = CollectionShardingRuntime::acquireShared(opCtx, nss);
|
||||
|
||||
if (!expectTracked) {
|
||||
return;
|
||||
}
|
||||
@ -589,8 +712,6 @@ void checkCollectionMetadataInShardCatalog(
|
||||
auto chunksInGlobalCatalog =
|
||||
getChunksFromGlobalCatalog(opCtx, *collectionInGlobalCatalog, shardId);
|
||||
|
||||
const auto scopedCsr = CollectionShardingRuntime::acquireShared(opCtx, nss);
|
||||
|
||||
// If metadata became unknown, the critical section was acquired, or the placement version
|
||||
// changed, a migration may have occurred during the remote call. Skip the following checks to
|
||||
// avoid false positives.
|
||||
@ -636,25 +757,26 @@ void checkCollectionMetadataInShardCatalog(
|
||||
kInMemoryShardCatalogSourceScope,
|
||||
inconsistencies);
|
||||
|
||||
if (!feature_flags::gAuthoritativeShardsCRUD.isEnabled(
|
||||
VersionContext::getDecoration(opCtx),
|
||||
serverGlobalParams.featureCompatibility.acquireFCVSnapshot())) {
|
||||
if (!isShardCatalogAuthoritative) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (scopedCsr->getAuthoritativeState() ==
|
||||
CollectionShardingRuntime::AuthoritativeState::kNonAuthoritative) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto collectionInDurableShardCatalog = getCollectionFromDurableShardCatalog(
|
||||
opCtx, nss, collectionInGlobalCatalog->getUuid(), inconsistencies);
|
||||
auto collectionInDurableShardCatalog =
|
||||
getCollectionFromDurableShardCatalog(opCtx,
|
||||
nss,
|
||||
collectionInGlobalCatalog->getUuid(),
|
||||
true /* inconsistency if none */,
|
||||
inconsistencies);
|
||||
if (!collectionInDurableShardCatalog) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto chunksInDurableShardCatalog = getChunksFromDurableShardCatalog(
|
||||
opCtx, *collectionInDurableShardCatalog, shardId, inconsistencies);
|
||||
auto chunksInDurableShardCatalog =
|
||||
getChunksFromDurableShardCatalog(opCtx,
|
||||
*collectionInDurableShardCatalog,
|
||||
shardId,
|
||||
true /* inconsistency if empty */,
|
||||
inconsistencies);
|
||||
if (chunksInDurableShardCatalog.empty()) {
|
||||
return;
|
||||
}
|
||||
@ -769,6 +891,7 @@ std::vector<MetadataInconsistencyItem> _checkInconsistenciesBetweenBothCatalogs(
|
||||
OperationContext* opCtx,
|
||||
const NamespaceString& nss,
|
||||
const ShardId& shardId,
|
||||
const ShardId& primaryShardId,
|
||||
const CollectionType& catalogColl,
|
||||
const CollectionPtr& localColl,
|
||||
const bool checkRangeDeletionIndexes) {
|
||||
@ -854,8 +977,13 @@ std::vector<MetadataInconsistencyItem> _checkInconsistenciesBetweenBothCatalogs(
|
||||
// Check that the metadata type locally is compatible with the type of collection on the config
|
||||
// server.
|
||||
if (catalogUUID == localUUID) {
|
||||
checkCollectionMetadataInShardCatalog(
|
||||
opCtx, nss, shardId, localColl, catalogColl, inconsistencies);
|
||||
checkCollectionMetadataInShardCatalog(opCtx,
|
||||
nss,
|
||||
shardId,
|
||||
shardId == primaryShardId,
|
||||
localColl,
|
||||
catalogColl,
|
||||
inconsistencies);
|
||||
}
|
||||
|
||||
// Check shardKey index inconsistencies.
|
||||
@ -890,8 +1018,13 @@ std::vector<MetadataInconsistencyItem> _checkLocalInconsistencies(
|
||||
MisplacedCollectionDetails{
|
||||
nss, currentShard, localColl->uuid(), getNumDocs(opCtx, localColl.get())}));
|
||||
} else {
|
||||
checkCollectionMetadataInShardCatalog(
|
||||
opCtx, nss, currentShard, localColl, boost::none, inconsistencies);
|
||||
checkCollectionMetadataInShardCatalog(opCtx,
|
||||
nss,
|
||||
currentShard,
|
||||
primaryShard == currentShard,
|
||||
localColl,
|
||||
boost::none,
|
||||
inconsistencies);
|
||||
}
|
||||
|
||||
_checkBucketCollectionInconsistencies(
|
||||
@ -1338,8 +1471,14 @@ std::vector<MetadataInconsistencyItem> checkCollectionMetadataConsistency(
|
||||
} else if (isCollectionOnBothCatalogs) {
|
||||
// Case where we have found same collection in the catalog client than in the local
|
||||
// catalog.
|
||||
auto inconsistenciesBetweenBothCatalogs = _checkInconsistenciesBetweenBothCatalogs(
|
||||
opCtx, localNss, shardId, catalogColl, localColl, checkRangeDeletionIndexes);
|
||||
auto inconsistenciesBetweenBothCatalogs =
|
||||
_checkInconsistenciesBetweenBothCatalogs(opCtx,
|
||||
localNss,
|
||||
shardId,
|
||||
primaryShardId,
|
||||
catalogColl,
|
||||
localColl,
|
||||
checkRangeDeletionIndexes);
|
||||
inconsistencies.insert(
|
||||
inconsistencies.end(),
|
||||
std::make_move_iterator(inconsistenciesBetweenBothCatalogs.begin()),
|
||||
|
||||
@ -1963,6 +1963,104 @@ TEST_F(MetadataConsistencyShardCatalogTest, DurablePath_MissingChunksInDurableCa
|
||||
ASSERT_EQ(1, countInconsistenciesWithReasonField(inconsistencies));
|
||||
}
|
||||
|
||||
TEST_F(MetadataConsistencyShardCatalogTest, UntrackedCollectionWithCollectionEntry) {
|
||||
RAIIServerParameterControllerForTest featureFlagController("featureFlagAuthoritativeShardsCRUD",
|
||||
true);
|
||||
|
||||
OperationContext* opCtx = operationContext();
|
||||
createTestCollection(opCtx, _nss);
|
||||
const auto [localCatalogSnapshot, localCatalogCollections] = getLocalCatalog(opCtx, _nss);
|
||||
ASSERT_EQ(1, localCatalogCollections.size());
|
||||
|
||||
const auto localUuid = localCatalogCollections[0]->uuid();
|
||||
setCSRAuthoritative();
|
||||
insertDurableShardCatalogCollection(generateCollectionType(_nss, localUuid, _keyPattern));
|
||||
const auto inconsistencies = metadata_consistency_util::checkCollectionMetadataConsistency(
|
||||
opCtx,
|
||||
_shardId,
|
||||
_shardId,
|
||||
{} /* shardingCatalogCollections */,
|
||||
localCatalogSnapshot,
|
||||
localCatalogCollections,
|
||||
false /*checkRangeDeletionIndexes*/,
|
||||
false /*optionalCheckIndexes*/);
|
||||
|
||||
ASSERT_EQ(1, countInconsistenciesWithReasonField(inconsistencies));
|
||||
}
|
||||
|
||||
TEST_F(MetadataConsistencyShardCatalogTest, UntrackedCollectionWithChunkEntry) {
|
||||
RAIIServerParameterControllerForTest featureFlagController("featureFlagAuthoritativeShardsCRUD",
|
||||
true);
|
||||
|
||||
OperationContext* opCtx = operationContext();
|
||||
createTestCollection(opCtx, _nss);
|
||||
const auto [localCatalogSnapshot, localCatalogCollections] = getLocalCatalog(opCtx, _nss);
|
||||
ASSERT_EQ(1, localCatalogCollections.size());
|
||||
|
||||
const auto localUuid = localCatalogCollections[0]->uuid();
|
||||
|
||||
auto chunk = generateChunk(
|
||||
localUuid, _shardId, _keyPattern.globalMin(), _keyPattern.globalMax(), kShard0History);
|
||||
setCSRAuthoritative();
|
||||
insertDurableShardCatalogChunks({chunk});
|
||||
|
||||
const auto inconsistencies = metadata_consistency_util::checkCollectionMetadataConsistency(
|
||||
opCtx,
|
||||
_shardId,
|
||||
_shardId,
|
||||
{} /* shardingCatalogCollections */,
|
||||
localCatalogSnapshot,
|
||||
localCatalogCollections,
|
||||
false /*checkRangeDeletionIndexes*/,
|
||||
false /*optionalCheckIndexes*/);
|
||||
|
||||
ASSERT_EQ(1, countInconsistenciesWithReasonField(inconsistencies));
|
||||
}
|
||||
|
||||
TEST_F(MetadataConsistencyShardCatalogTest,
|
||||
PrimaryHasNoCollectionEntryForChunklessCollectionGlobalCatalogHas) {
|
||||
RAIIServerParameterControllerForTest featureFlagController("featureFlagAuthoritativeShardsCRUD",
|
||||
true);
|
||||
|
||||
const auto localUuid = setUpLocalCollection();
|
||||
auto globalCatalogColl = generateCollectionType(_nss, localUuid, _keyPattern);
|
||||
|
||||
setCSRAuthoritative();
|
||||
_catalogClient->setChunksToReturn({});
|
||||
|
||||
const auto inconsistencies = checkConsistency(globalCatalogColl);
|
||||
|
||||
ASSERT_EQ(1, countInconsistenciesWithReasonField(inconsistencies));
|
||||
}
|
||||
|
||||
TEST_F(MetadataConsistencyShardCatalogTest,
|
||||
GlobalCatalogHasNoCollectionEntryForChunklessCollectionPrimaryHas) {
|
||||
RAIIServerParameterControllerForTest featureFlagController("featureFlagAuthoritativeShardsCRUD",
|
||||
true);
|
||||
|
||||
OperationContext* opCtx = operationContext();
|
||||
createTestCollection(opCtx, _nss);
|
||||
const auto [localCatalogSnapshot, localCatalogCollections] = getLocalCatalog(opCtx, _nss);
|
||||
ASSERT_EQ(1, localCatalogCollections.size());
|
||||
|
||||
const auto localUuid = localCatalogCollections[0]->uuid();
|
||||
|
||||
setCSRAuthoritative();
|
||||
insertDurableShardCatalogCollection(generateCollectionType(_nss, localUuid, _keyPattern));
|
||||
|
||||
const auto inconsistencies = metadata_consistency_util::checkCollectionMetadataConsistency(
|
||||
opCtx,
|
||||
_shardId,
|
||||
_shardId,
|
||||
{} /* shardingCatalogCollections */,
|
||||
localCatalogSnapshot,
|
||||
localCatalogCollections,
|
||||
false /*checkRangeDeletionIndexes*/,
|
||||
false /*optionalCheckIndexes*/);
|
||||
|
||||
ASSERT_EQ(1, countInconsistenciesWithReasonField(inconsistencies));
|
||||
}
|
||||
|
||||
TEST_F(MetadataConsistencyShardCatalogTest, DurablePath_SkippedWhenNonAuthoritative) {
|
||||
RAIIServerParameterControllerForTest featureFlagController("featureFlagAuthoritativeShardsCRUD",
|
||||
true);
|
||||
|
||||
Loading…
Reference in New Issue
Block a user