diff --git a/src/mongo/db/exec/sbe/sbe_hash_lookup_test.cpp b/src/mongo/db/exec/sbe/sbe_hash_lookup_test.cpp index 5007bb22db2..78d3d6f6af4 100644 --- a/src/mongo/db/exec/sbe/sbe_hash_lookup_test.cpp +++ b/src/mongo/db/exec/sbe/sbe_hash_lookup_test.cpp @@ -384,4 +384,101 @@ TEST_F(HashLookupStageTest, ForceSpillTest) { lookupStage->close(); } + +TEST_F(HashLookupStageTest, SpillLargeStringWithCollationTest) { + constexpr size_t kStringLength = 64; + constexpr size_t kStringCount = 26; + + RAIIServerParameterControllerForTest maxMemoryLimit( + "internalQuerySlotBasedExecutionHashLookupApproxMemoryUseInBytesBeforeSpill", + static_cast(kStringCount * kStringLength / 2)); + + std::vector innerKeys; + std::vector outerKeys; + innerKeys.reserve(kStringCount); + outerKeys.reserve(kStringCount); + for (size_t i = 0; i < kStringCount; ++i) { + const std::array chars{static_cast('a' + i), static_cast('A' + i)}; + std::string inner, outer; + inner.reserve(kStringLength); + outer.reserve(kStringLength); + for (size_t j = 0; j < kStringLength; ++j) { + inner.push_back(chars[j % 2]); + outer.push_back(chars[1 - j % 2]); + } + innerKeys.push_back(std::move(inner)); + outerKeys.push_back(std::move(outer)); + } + + BSONArrayBuilder innerJoin; + BSONArrayBuilder outerJoin; + for (size_t i = 0; i < kStringCount; ++i) { + BSONObj obj = BSON("_id" << static_cast(i)); + innerJoin.append(BSON_ARRAY(obj << innerKeys[i])); + outerJoin.append(BSON_ARRAY(obj << outerKeys[i])); + } + + auto [innerScanSlots, innerScanStage] = generateVirtualScanMulti(2, innerJoin.arr()); + auto [outerScanSlots, outerScanStage] = generateVirtualScanMulti(2, outerJoin.arr()); + + auto ctx = makeCompileCtx(); + + auto collatorSlot = generateSlotId(); + auto collator = + std::make_unique(CollatorInterfaceMock::MockType::kToLowerString); + value::OwnedValueAccessor collatorAccessor; + ctx->pushCorrelated(collatorSlot, &collatorAccessor); + collatorAccessor.reset(value::TypeTags::collator, + value::bitcastFrom(collator.release())); + + value::SlotId lookupStageOutputSlot = generateSlotId(); + SlotExprPair agg = std::make_pair( + lookupStageOutputSlot, makeFunction("addToArray", makeE(innerScanSlots[0]))); + auto lookupStage = makeS(std::move(outerScanStage), + std::move(innerScanStage), + outerScanSlots[1], + innerScanSlots[1], + innerScanSlots[0], + std::move(agg), + collatorSlot, + kEmptyPlanNodeId); + + value::SlotVector lookupSlots; + lookupSlots.reserve(2); + lookupSlots.push_back(outerScanSlots[0]); + lookupSlots.push_back(lookupStageOutputSlot); + auto resultAccessors = prepareTree(ctx.get(), lookupStage.get(), lookupSlots); + + std::vector>> actualResultView; + std::vector> ownedValues; + while (lookupStage->getNext() == PlanState::ADVANCED) { + std::vector> results{}; + results.reserve(resultAccessors.size()); + for (size_t i = 0; i < resultAccessors.size(); ++i) { + ownedValues.emplace_back(resultAccessors[i]->getCopyOfValue()); + results.emplace_back(ownedValues.back()); + } + actualResultView.emplace_back(std::move(results)); + } + + ValueVectorGuard resultsGuard{ownedValues}; + lookupStage->close(); + + ASSERT_EQ(actualResultView.size(), kStringCount); + for (size_t i = 0; i < kStringCount; ++i) { + const auto& result = actualResultView[i]; + ASSERT_EQ(result.size(), 2); + const BSONObj input = BSON("_id" << static_cast(i)); + assertValuesEqual(result[0].first, + result[0].second, + value::TypeTags::bsonObject, + value::bitcastFrom(input.objdata())); + const BSONArray output = BSON_ARRAY(input); + assertValuesEqual(result[1].first, + result[1].second, + value::TypeTags::bsonArray, + value::bitcastFrom(output.objdata())); + } +} + } // namespace mongo::sbe diff --git a/src/mongo/db/exec/sbe/stages/lookup_hash_table.cpp b/src/mongo/db/exec/sbe/stages/lookup_hash_table.cpp index 9dbd72c9334..48b9c29cb0e 100644 --- a/src/mongo/db/exec/sbe/stages/lookup_hash_table.cpp +++ b/src/mongo/db/exec/sbe/stages/lookup_hash_table.cpp @@ -56,8 +56,10 @@ void LookupHashTableIter::initSearchArray() { hashTableMatchIter->second.end()); } else if (_hashTable._recordStoreHt) { // The key wasn't in memory. Check the '_hashTable._recordStoreHt' disk spill. - auto [_, tagElemCollView, valElemCollView] = + auto [owned, tagElemCollView, valElemCollView] = _hashTable.normalizeStringIfCollator(tagElemView, valElemView); + value::ValueGuard elemGuard{owned, tagElemCollView, valElemCollView}; + boost::optional> indicesFromRS = _hashTable.readIndicesFromRecordStore( _hashTable._recordStoreHt.get(), tagElemCollView, valElemCollView); @@ -82,8 +84,10 @@ void LookupHashTableIter::initSearchScalar() { _hashTableMatchVectorIdx = 0; } else if (_hashTable._recordStoreHt) { // The key wasn't in memory. Check the '_hashTable._recordStoreHt' disk spill. - auto [_, tagKeyCollView, valKeyCollView] = + auto [owned, tagKeyCollView, valKeyCollView] = _hashTable.normalizeStringIfCollator(_outerKeyTag, _outerKeyVal); + value::ValueGuard keyGuard{owned, tagKeyCollView, valKeyCollView}; + boost::optional> indicesFromRS = _hashTable.readIndicesFromRecordStore( _hashTable._recordStoreHt.get(), tagKeyCollView, valKeyCollView); if (indicesFromRS) { @@ -330,7 +334,7 @@ void LookupHashTable::spillIndicesToRecordStore(SpillingStore* rs, } auto [owned, tagKeyColl, valKeyColl] = normalizeStringIfCollator(tagKey, valKey); - _htProbeKey.reset(0, owned, tagKeyColl, valKeyColl); + value::ValueGuard keyGuard{owned, tagKeyColl, valKeyColl}; auto valFromRs = readIndicesFromRecordStore(rs, tagKeyColl, valKeyColl);