SERVER-127284 Extend SorterChecksumCalculator to handle WCE (#54224)

Co-authored-by: Gregory Noma <gregory.noma@gmail.com>
GitOrigin-RevId: ad3a438176cc6df33788c381af08d90b7ff45c1a
This commit is contained in:
Stephanie 2026-05-21 15:37:19 -04:00 committed by MongoDB Bot
parent 7786bd7265
commit 86089a110e
4 changed files with 233 additions and 12 deletions

View File

@ -37,18 +37,18 @@
namespace mongo {
void SorterChecksumCalculator::addData(const char* data, size_t size) {
size_t SorterChecksumCalculator::_advanceChecksum(size_t seed,
const char* data,
size_t size) const {
switch (_version) {
case SorterChecksumVersion::v1:
_checksum = murmur3<sizeof(uint32_t)>(ConstDataRange{data, size}, _checksum);
return;
return murmur3<sizeof(uint32_t)>(ConstDataRange{data, size}, seed);
case SorterChecksumVersion::v2:
#ifdef MONGO_CONFIG_WIREDTIGER_ENABLED
_checksum = wiredtiger_crc32c_with_seed_func()(_checksum, data, size);
return wiredtiger_crc32c_with_seed_func()(seed, data, size);
#else
MONGO_UNIMPLEMENTED_TASSERT(7770500);
#endif
return;
}
tasserted(7784000,
str::stream() << "Unknown sorter checksum version: " << idl::serialize(_version)
@ -56,4 +56,21 @@ void SorterChecksumCalculator::addData(const char* data, size_t size) {
<< "version of MongoDB?");
}
void SorterChecksumCalculator::addData(const char* data, size_t size) {
_checksum = _advanceChecksum(_checksum, data, size);
_uncommittedChecksum = _checksum;
}
void SorterChecksumCalculator::addUncommittedData(const char* data, size_t size) {
_uncommittedChecksum = _advanceChecksum(_uncommittedChecksum, data, size);
}
void SorterChecksumCalculator::commit() {
_checksum = _uncommittedChecksum;
}
void SorterChecksumCalculator::abort() {
_uncommittedChecksum = _checksum;
}
} // namespace mongo

View File

@ -44,10 +44,26 @@ static constexpr SorterChecksumVersion kLatestChecksumVersion = SorterChecksumVe
class SorterChecksumCalculator {
public:
SorterChecksumCalculator(SorterChecksumVersion version, size_t seed = 0)
: _version(version), _checksum(seed) {}
: _version(version), _checksum(seed), _uncommittedChecksum(seed) {}
void addData(const char* data, size_t size);
/**
* Advances the uncommitted checksum. checksum() does not reflect these bytes until commit().
*/
void addUncommittedData(const char* data, size_t size);
/**
* Promotes the uncommitted checksum to the committed checksum. No-op if nothing is pending.
*/
void commit();
/**
* Discards the uncommitted checksum, reverting to the committed checksum. No-op if nothing is
* pending.
*/
void abort();
size_t checksum() const {
return _checksum;
}
@ -57,8 +73,11 @@ public:
}
private:
size_t _advanceChecksum(size_t seed, const char* data, size_t size) const;
const SorterChecksumVersion _version;
size_t _checksum = 0;
size_t _uncommittedChecksum = 0;
};
} // namespace mongo

View File

@ -96,5 +96,180 @@ TEST_P(SorterChecksumCalculatorTest, Seed) {
EXPECT_EQ(calculator.checksum(), seed);
}
TEST_P(SorterChecksumCalculatorTest, AddUncommittedDataDoesNotAffectCommittedChecksum) {
SorterChecksumCalculator calculator{GetParam()};
size_t initial = calculator.checksum();
calculator.addUncommittedData(kData.data(), kData.size());
EXPECT_EQ(calculator.checksum(), initial);
}
TEST_P(SorterChecksumCalculatorTest, CommitMatchesAddData) {
SorterChecksumCalculator viaAddData{GetParam()};
viaAddData.addData(kData.data(), kData.size());
SorterChecksumCalculator viaUncommitted{GetParam()};
viaUncommitted.addUncommittedData(kData.data(), kData.size());
viaUncommitted.commit();
EXPECT_EQ(viaUncommitted.checksum(), viaAddData.checksum());
}
TEST_P(SorterChecksumCalculatorTest, AbortDiscardsPendingBytes) {
SorterChecksumCalculator calculator{GetParam()};
size_t initial = calculator.checksum();
calculator.addUncommittedData(kData.data(), kData.size());
calculator.abort();
EXPECT_EQ(calculator.checksum(), initial);
}
TEST_P(SorterChecksumCalculatorTest, AbortThenReaddThenCommitMatchesSingleAdd) {
SorterChecksumCalculator expected{GetParam()};
expected.addData(kData.data(), kData.size());
SorterChecksumCalculator retried{GetParam()};
retried.addUncommittedData(kData.data(), kData.size());
retried.abort();
retried.addUncommittedData(kData.data(), kData.size());
retried.commit();
EXPECT_EQ(retried.checksum(), expected.checksum());
}
TEST_P(SorterChecksumCalculatorTest, MultipleUncommittedAddsAccumulate) {
constexpr StringData kPart1 = "abacaba"_sd;
constexpr StringData kPart2 = "dabacaba"_sd;
SorterChecksumCalculator expected{GetParam()};
expected.addData(kPart1.data(), kPart1.size());
expected.addData(kPart2.data(), kPart2.size());
SorterChecksumCalculator twoPart{GetParam()};
twoPart.addUncommittedData(kPart1.data(), kPart1.size());
twoPart.addUncommittedData(kPart2.data(), kPart2.size());
twoPart.commit();
EXPECT_EQ(twoPart.checksum(), expected.checksum());
}
TEST_P(SorterChecksumCalculatorTest, CommitAfterPriorCommitContinuesFromCommittedState) {
SorterChecksumCalculator expected{GetParam()};
expected.addData(kData.data(), kData.size());
expected.addData(kData.data(), kData.size());
SorterChecksumCalculator stepwise{GetParam()};
stepwise.addUncommittedData(kData.data(), kData.size());
stepwise.commit();
stepwise.addUncommittedData(kData.data(), kData.size());
stepwise.commit();
EXPECT_EQ(stepwise.checksum(), expected.checksum());
}
TEST_P(SorterChecksumCalculatorTest, CommitWithNothingPendingIsNoop) {
SorterChecksumCalculator calculator{GetParam()};
calculator.addData(kData.data(), kData.size());
size_t committed = calculator.checksum();
calculator.commit();
EXPECT_EQ(calculator.checksum(), committed);
}
TEST_P(SorterChecksumCalculatorTest, AbortWithNothingPendingIsNoop) {
SorterChecksumCalculator calculator{GetParam()};
calculator.addData(kData.data(), kData.size());
size_t committed = calculator.checksum();
calculator.abort();
EXPECT_EQ(calculator.checksum(), committed);
}
TEST_P(SorterChecksumCalculatorTest, SeedFlowsIntoUncommittedState) {
constexpr size_t kSeed = 0xdeadbeef;
SorterChecksumCalculator expected{GetParam(), kSeed};
expected.addData(kData.data(), kData.size());
SorterChecksumCalculator calculator{GetParam(), kSeed};
calculator.addUncommittedData(kData.data(), kData.size());
calculator.commit();
EXPECT_EQ(calculator.checksum(), expected.checksum());
}
TEST_P(SorterChecksumCalculatorTest, CommitAndAbortBeforeAnyDataAreNoops) {
constexpr size_t kSeed = 0xdeadbeef;
SorterChecksumCalculator calculator{GetParam(), kSeed};
calculator.commit();
EXPECT_EQ(calculator.checksum(), kSeed);
calculator.abort();
EXPECT_EQ(calculator.checksum(), kSeed);
}
TEST_P(SorterChecksumCalculatorTest, RepeatedCommitWithPendingIsNoop) {
SorterChecksumCalculator twice{GetParam()};
twice.addUncommittedData(kData.data(), kData.size());
twice.commit();
twice.commit();
SorterChecksumCalculator once{GetParam()};
once.addUncommittedData(kData.data(), kData.size());
once.commit();
EXPECT_EQ(twice.checksum(), once.checksum());
}
TEST_P(SorterChecksumCalculatorTest, RepeatedAbortWithPendingIsNoop) {
SorterChecksumCalculator calculator{GetParam()};
size_t initial = calculator.checksum();
calculator.addUncommittedData(kData.data(), kData.size());
calculator.abort();
calculator.abort();
EXPECT_EQ(calculator.checksum(), initial);
}
TEST_P(SorterChecksumCalculatorTest, AddDataAfterCommitContinuesFromCommittedState) {
SorterChecksumCalculator expected{GetParam()};
expected.addData(kData.data(), kData.size());
expected.addData(kData.data(), kData.size());
SorterChecksumCalculator calculator{GetParam()};
calculator.addUncommittedData(kData.data(), kData.size());
calculator.commit();
calculator.addData(kData.data(), kData.size());
EXPECT_EQ(calculator.checksum(), expected.checksum());
}
TEST_P(SorterChecksumCalculatorTest, AddDataAfterAbortBehavesAsSingleAdd) {
SorterChecksumCalculator expected{GetParam()};
expected.addData(kData.data(), kData.size());
SorterChecksumCalculator calculator{GetParam()};
calculator.addUncommittedData(kData.data(), kData.size());
calculator.abort();
calculator.addData(kData.data(), kData.size());
EXPECT_EQ(calculator.checksum(), expected.checksum());
}
TEST_P(SorterChecksumCalculatorTest, ZeroLengthAddUncommittedDataThenAbort) {
SorterChecksumCalculator calculator{GetParam()};
size_t initial = calculator.checksum();
calculator.addUncommittedData(kData.data(), 0);
calculator.abort();
EXPECT_EQ(calculator.checksum(), initial);
}
TEST_P(SorterChecksumCalculatorTest, AddDataWithPendingUncommittedDataDiscardsPending) {
// addData() with uncommitted bytes pending behaves identically to abort() then addData().
SorterChecksumCalculator viaImplicit{GetParam()};
viaImplicit.addUncommittedData(kData.data(), kData.size());
viaImplicit.addData(kData.data(), kData.size());
SorterChecksumCalculator viaExplicit{GetParam()};
viaExplicit.addUncommittedData(kData.data(), kData.size());
viaExplicit.abort();
viaExplicit.addData(kData.data(), kData.size());
EXPECT_EQ(viaImplicit.checksum(), viaExplicit.checksum());
}
} // namespace
} // namespace mongo

View File

@ -122,8 +122,18 @@ constexpr std::size_t kLargeNumberOfKeys = 100 * 1000;
constexpr std::size_t kAggressiveSpillMemLimit = 16 * 1024;
constexpr std::size_t kManualSpillEveryN = 10;
constexpr std::size_t dataMemLimitFromTotal(std::size_t totalMemLimit) {
return totalMemLimit - totalMemLimit / 10;
// Returns the data-memory budget production will choose: total memory minus the iterator budget,
// where the iterator budget is the 10% reservation rounded down to a multiple of `iteratorSize`.
inline std::size_t dataMemLimitFromTotal(std::size_t totalMemLimit, std::size_t iteratorSize) {
constexpr std::size_t kIteratorsMaxBytesSizeDefault = 1 * 1024 * 1024;
std::size_t reserved = kIteratorsMaxBytesSizeDefault;
const auto requested =
static_cast<std::size_t>(totalMemLimit * maxIteratorsMemoryUsagePercentage.load());
if (requested < reserved) {
reserved = std::max(iteratorSize, requested);
}
reserved = iteratorSize * (reserved / iteratorSize);
return reserved >= totalMemLimit ? 0 : totalMemLimit - reserved;
}
std::string makeSpillDirName() {
@ -179,12 +189,12 @@ struct RangeCoverageExpectation {
};
RangeCoverageExpectation expectedRangeCoverageForAggressiveSpilling() {
const auto dataMemLimit = dataMemLimitFromTotal(kAggressiveSpillMemLimit);
constexpr auto iteratorSize = sizeof(FileIterator<IntWrapper, IntWrapper>);
const auto dataMemLimit = dataMemLimitFromTotal(kAggressiveSpillMemLimit, iteratorSize);
const auto expectedNumRanges =
std::max<std::size_t>(dataMemLimit / sorter::kSortedFileBufferSize, 2);
const auto maximumNumberOfIterators = std::max<std::size_t>(
(kAggressiveSpillMemLimit - dataMemLimit) / sizeof(FileIterator<IntWrapper, IntWrapper>),
1);
const auto maximumNumberOfIterators =
std::max<std::size_t>((kAggressiveSpillMemLimit - dataMemLimit) / iteratorSize, 1);
const auto recordsPerRange = dataMemLimit / sizeof(IWPair) + 1;
std::size_t documentsToAdd = kLargeNumberOfKeys;