diff --git a/src/mongo/db/exec/sbe/expressions/sbe_regex_test.cpp b/src/mongo/db/exec/sbe/expressions/sbe_regex_test.cpp index 9526ceea4e0..bca9a17251a 100644 --- a/src/mongo/db/exec/sbe/expressions/sbe_regex_test.cpp +++ b/src/mongo/db/exec/sbe/expressions/sbe_regex_test.cpp @@ -218,4 +218,44 @@ TEST_F(SBERegexTest, ComputesRegexFindAll) { runAndAssertFindAllExpression(compiledExpr.get(), arrayView); } +TEST_F(SBERegexTest, RegexFindAllEmptyMatchOnEmptyInput) { + value::OwnedValueAccessor slotAccessor1; + value::OwnedValueAccessor slotAccessor2; + auto regexSlot = bindAccessor(&slotAccessor1); + auto inputSlot = bindAccessor(&slotAccessor2); + auto regexExpr = sbe::makeE( + EFn::kRegexFindAll, sbe::makeEs(makeE(regexSlot), makeE(inputSlot))); + auto compiledExpr = compileExpression(*regexExpr); + + auto expectedArr = value::TagValueOwned::fromRaw(value::makeNewArray()); + auto arrayView = value::getArrayView(expectedArr.value()); + addMatchResult(arrayView, /*matchStr*/ "", /*idx*/ 0); + + auto [regexTag, regexVal] = makeNewPcreRegex("a*", ""); + auto [inputTag, inputVal] = value::makeNewString(""); + slotAccessor1.reset(regexTag, regexVal); + slotAccessor2.reset(inputTag, inputVal); + runAndAssertFindAllExpression(compiledExpr.get(), arrayView); +} + +TEST_F(SBERegexTest, RegexFindAllEndAnchorOnNonEmptyInput) { + value::OwnedValueAccessor slotAccessor1; + value::OwnedValueAccessor slotAccessor2; + auto regexSlot = bindAccessor(&slotAccessor1); + auto inputSlot = bindAccessor(&slotAccessor2); + auto regexExpr = sbe::makeE( + EFn::kRegexFindAll, sbe::makeEs(makeE(regexSlot), makeE(inputSlot))); + auto compiledExpr = compileExpression(*regexExpr); + + auto expectedArr = value::TagValueOwned::fromRaw(value::makeNewArray()); + auto arrayView = value::getArrayView(expectedArr.value()); + addMatchResult(arrayView, /*matchStr*/ "", /*idx*/ 5); + + auto [regexTag, regexVal] = makeNewPcreRegex("$", ""); + auto [inputTag, inputVal] = value::makeNewString("hello"); + slotAccessor1.reset(regexTag, regexVal); + slotAccessor2.reset(inputTag, inputVal); + runAndAssertFindAllExpression(compiledExpr.get(), arrayView); +} + } // namespace mongo::sbe diff --git a/src/mongo/db/exec/sbe/vm/vm_builtin_regex.cpp b/src/mongo/db/exec/sbe/vm/vm_builtin_regex.cpp index 71c65684738..8b6667800b5 100644 --- a/src/mongo/db/exec/sbe/vm/vm_builtin_regex.cpp +++ b/src/mongo/db/exec/sbe/vm/vm_builtin_regex.cpp @@ -229,6 +229,13 @@ value::TagValueMaybeOwned ByteCode::builtinRegexFindAll(ArityType arity) { auto [mstrTag, mstrVal] = value::getObjectView(matchVal)->getField("match"); auto matchString = value::getStringView(mstrTag, mstrVal); if (matchString.empty()) { + // The regex matched an empty string. If the empty match landed at the end of the + // input (e.g. pattern "$" or "a*" against ""), 'startBytePos' is already at + // 'inputString.size()' and there is no byte to advance over. Break out so we do not + // read past the end of the input. + if (startBytePos >= inputString.size()) { + break; + } startBytePos += str::getCodePointLength(inputString[startBytePos]); ++codePointPos; } else {