SERVER-105543: BACKPORT-25609: [v8.2] Strengthen rankFusion scoreDetails testing (#39319)

GitOrigin-RevId: 3a9d05edd8548be468cee1fadf033c9ab8c30d10
This commit is contained in:
Adithi Raghavan 2025-07-29 17:48:42 -04:00 committed by MongoDB Bot
parent 8a3d441ee9
commit 812035b596
2 changed files with 1126 additions and 213 deletions

View File

@ -13,6 +13,7 @@ import {
getMovieSearchIndexSpec,
getMovieVectorSearchIndexSpec
} from "jstests/with_mongot/e2e_lib/data/movies.js";
import {getRentalData, getRentalSearchIndexSpec} from "jstests/with_mongot/e2e_lib/data/rentals.js";
const collName = "search_rank_fusion";
const coll = db.getCollection(collName);
@ -51,28 +52,106 @@ const searchStage = {
$search: searchStageSpec
};
const searchStageSpecNoDetails = {
index: getMovieSearchIndexSpec().name,
text: {query: "ape", path: ["fullplot", "title"]},
scoreDetails: false
};
const searchStageNoDetails = {
$search: searchStageSpecNoDetails
};
const calculateReciprocalRankFusionScore = (weight, rank) => {
return (weight * (1 / (60 + rank)));
};
const scoreDetailsDescription =
"value output by reciprocal rank fusion algorithm, computed as sum of (weight * (1 / (60 " +
"+ rank))) across input pipelines from which this document is output, from:";
// Test search/vectorSearch where only search has scoreDetails.
let testQuery = [
{
$rankFusion: {
input: {pipelines: {vector: [vectorStage], search: [searchStage, {$limit: limit}]}},
combination: {weights: {search: 2}},
scoreDetails: true,
},
},
{$project: {score: {$meta: "score"}, details: {$meta: "scoreDetails"}}}
];
let results = coll.aggregate(testQuery).toArray();
function fieldPresent(field, containingObj) {
return containingObj.hasOwnProperty(field);
/**
* All input pipelines should contain the following fields when $rankFusion's scoreDetails is
* enabled: inputPipelineName, rank, and weight. Only inputPipelineName's and weight's values are
* constant across the results.
*/
function checkDefaultPipelineScoreDetails(assertFieldPresent, subDetails, pipelineName, weight) {
assertFieldPresent("inputPipelineName", subDetails);
assert.eq(subDetails["inputPipelineName"], pipelineName);
assertFieldPresent("rank", subDetails);
assertFieldPresent("weight", subDetails);
assert.eq(subDetails["weight"], weight);
}
for (const foundDoc of results) {
/**
* Checks the scoreDetails (inputPipelineName, rank, weight) for a search input pipeline. If a
* document was ouput from the input pipeline (value field is present in scoreDetails), then check
* that the value and details fields are present. If the search input pipeline has scoreDetails
* enabled, check the description field is accurate and that the pipeline's scoreDetails aren't
* empty. Returns the RRF score for this input pipeline.
*/
function checkSearchScoreDetails(
assertFieldPresent, subDetails, pipelineName, weight, isScoreDetails) {
assertFieldPresent("inputPipelineName", subDetails);
assert.eq(subDetails["inputPipelineName"], pipelineName);
assertFieldPresent("rank", subDetails);
// If there isn't a value, we didn't get this back from search at all.
let searchScore = 0;
if (subDetails.hasOwnProperty("value")) {
assertFieldPresent("weight", subDetails);
assert.eq(subDetails["weight"], weight);
assertFieldPresent("value", subDetails); // Output of rank calculation.
assertFieldPresent("details", subDetails);
if (isScoreDetails) {
assertFieldPresent("description", subDetails);
assert.eq(subDetails["description"], "sum of:");
// Note we won't check the shape of the search scoreDetails beyond here.
assert.neq(subDetails["details"], []);
} else {
assert.eq(subDetails["details"], []);
}
searchScore = calculateReciprocalRankFusionScore(subDetails["weight"], subDetails["rank"]);
} else {
assert.eq(subDetails["rank"], "NA");
}
return searchScore;
}
/**
* Checks the scoreDetails (inputPipelineName, rank, weight, details) for a vectorSearch input
* pipeline. Note that vectorSearch input pipeline do not have scoreDetails so the details field
* should always be an empty array. Returns the RRF score for this input pipeline.
*/
function checkVectorScoreDetails(assertFieldPresent, subDetails, pipelineName, weight) {
checkDefaultPipelineScoreDetails(assertFieldPresent, subDetails, pipelineName, weight);
assertFieldPresent("value", subDetails); // Original 'score' AKA vectorSearchScore.
assertFieldPresent("details", subDetails);
assert.eq(subDetails["details"], []);
const vectorSearchScore =
calculateReciprocalRankFusionScore(subDetails["weight"], subDetails["rank"]);
return vectorSearchScore;
}
/**
* Checks the scoreDetails (inputPipelineName, rank, weight, details) for a geoNear input
* pipeline. Note that geoNear input pipeline do not have scoreDetails so the details field
* should always be an empty array. Returns the RRF score for this input pipeline.
*/
function checkGeoNearScoreDetails(assertFieldPresent, subDetails, pipelineName, weight) {
checkDefaultPipelineScoreDetails(assertFieldPresent, subDetails, pipelineName, weight);
assertFieldPresent("details", subDetails);
assert.eq(subDetails["details"], []);
const geoNearScore =
calculateReciprocalRankFusionScore(subDetails["weight"], subDetails["rank"]);
return geoNearScore;
}
/**
* For each document or result, check the follwoing fields in the outer scoreDetails: score,
* details, value, description, and that the subDetails array contains two entries, 1 for each input
* pipeline.
*/
function checkOuterScoreDetails(foundDoc, numInputPipelines) {
// Assert that the score metadata has been set.
assert(fieldPresent("score", foundDoc), foundDoc);
const score = foundDoc["score"];
@ -93,229 +172,391 @@ for (const foundDoc of results) {
// Description of rank fusion. Wrapper on both search / vector.
assertFieldPresent("details", details);
const subDetails = details["details"];
assert.eq(subDetails.length, 2);
assert.eq(subDetails.length, numInputPipelines);
const searchDetails = subDetails[0];
assertFieldPresent("inputPipelineName", searchDetails);
assert.eq(searchDetails["inputPipelineName"], "search");
assertFieldPresent("rank", searchDetails);
// If there isn't a value, we didn't get this back from search at all.
if (searchDetails.hasOwnProperty("value")) {
assertFieldPresent("weight", searchDetails);
assert.eq(searchDetails["weight"], 2);
assertFieldPresent("value", searchDetails); // Output of rank calculation.
assertFieldPresent("details",
searchDetails); // Not checking description contents, just that its
// present and not our placeholder value.
assert.neq(searchDetails["details"], []);
// Note we won't check the shape of the search scoreDetails beyond here.
} else {
assert.eq(searchDetails["rank"], "NA");
assert.eq(searchDetails.hasOwnProperty("weight"), false);
assert.eq(searchDetails.hasOwnProperty("details"), false);
}
const vectorDetails = subDetails[1];
assertFieldPresent("inputPipelineName", vectorDetails);
assert.eq(vectorDetails["inputPipelineName"], "vector");
assertFieldPresent("details", vectorDetails);
assert.eq(vectorDetails["details"], []);
assertFieldPresent("rank", vectorDetails);
assertFieldPresent("weight", vectorDetails);
assert.eq(vectorDetails["weight"], 1);
return [assertFieldPresent, subDetails, score];
}
// Test vectorSearch/vectorSearch where neither has score details.
testQuery = [
{
$rankFusion: {
input: {pipelines: {vector: [vectorStage], secondVector: [vectorStage]}},
combination: {weights: {vector: 0.5, secondVector: 2.8}},
scoreDetails: true,
},
},
{$project: {score: {$meta: "score"}, details: {$meta: "scoreDetails"}}}
];
results = coll.aggregate(testQuery).toArray();
for (const foundDoc of results) {
// Assert that the score metadata has been set.
assert(fieldPresent("score", foundDoc), foundDoc);
const score = foundDoc["score"];
assert(fieldPresent("details", foundDoc), foundDoc);
const details = foundDoc["details"];
assert(fieldPresent("value", details), details);
// The output of the rank calculation.
// We don't care about the actual score, just assert that its been calculated.
assert.gt(details["value"], 0);
// Assert that the score metadata is the same value as what scoreDetails set.
assert.eq(details["value"], score);
assert(fieldPresent("description", details), details);
assert.eq(details["description"], scoreDetailsDescription);
function assertFieldPresent(field, obj) {
assert(fieldPresent(field, obj),
`Looked for ${field} in ${tojson(obj)}. Full details: ${tojson(details)}`);
}
// Description of rank fusion. Wrapper on both secondVector / vector.
assertFieldPresent("details", details);
const subDetails = details["details"];
assert.eq(subDetails.length, 2);
const secondVectorDetails = subDetails[0];
assertFieldPresent("inputPipelineName", secondVectorDetails);
assert.eq(secondVectorDetails["inputPipelineName"], "secondVector");
assertFieldPresent("rank", secondVectorDetails);
assertFieldPresent("weight", secondVectorDetails);
assert.eq(secondVectorDetails["weight"], 2.8);
assertFieldPresent("value", secondVectorDetails); // Original 'score' AKA vectorSearchScore.
assertFieldPresent("details",
secondVectorDetails); // Not checking description contents, just that its
// present and not our placeholder value.
assert.eq(secondVectorDetails["details"], []);
const vectorDetails = subDetails[1];
assertFieldPresent("inputPipelineName", vectorDetails);
assert.eq(vectorDetails["inputPipelineName"], "vector");
assertFieldPresent("details", vectorDetails);
assert.eq(vectorDetails["details"], []);
assertFieldPresent("value", vectorDetails); // Original 'score' AKA vectorSearchScore.
assertFieldPresent("rank", vectorDetails);
assertFieldPresent("weight", vectorDetails);
assert.eq(vectorDetails["weight"], 0.5);
function fieldPresent(field, containingObj) {
return containingObj.hasOwnProperty(field);
}
// Test search/vectorSearch where search scoreDetails is off but $rankFusion's scoreDetails is on.
const searchStageSpecNoDetails = {
index: getMovieSearchIndexSpec().name,
text: {query: "ape", path: ["fullplot", "title"]},
scoreDetails: false
};
const searchStageNoDetails = {
$search: searchStageSpecNoDetails
};
testQuery = [
{
$rankFusion: {
input: {
pipelines: {vector: [vectorStage], search: [searchStageNoDetails, {$limit: limit}]}
/**
* Test search/vectorSearch where only search has scoreDetails.
* "score" : 0.04918032786885246,
* "details" : {
"value" : 0.04918032786885246,
"description" : "value output by reciprocal rank fusion algorithm...",
"details" : [
{
"inputPipelineName" : "search",
"rank" : 1,
"weight" : 2,
"value" : 1.5521023273468018,
"description" : "sum of:",
"details" : [
// $search scoreDetails go here
]
},
scoreDetails: true,
},
},
{$project: {score: {$meta: "score"}, details: {$meta: "scoreDetails"}}}
];
results = coll.aggregate(testQuery).toArray();
for (const foundDoc of results) {
// Assert that the score metadata has been set.
assert(fieldPresent("score", foundDoc), foundDoc);
const score = foundDoc["score"];
assert(fieldPresent("details", foundDoc), foundDoc);
const details = foundDoc["details"];
assert(fieldPresent("value", details), details);
// We don't care about the actual score, just assert that its been calculated.
assert.gt(details["value"], 0);
// Assert that the score metadata is the same value as what scoreDetails set.
assert.eq(details["value"], score);
assert(fieldPresent("description", details), details);
assert.eq(details["description"], scoreDetailsDescription);
function assertFieldPresent(field, obj) {
assert(fieldPresent(field, obj),
`Looked for ${field} in ${tojson(obj)}. Full details: ${tojson(details)}`);
{
"inputPipelineName" : "vector",
"rank" : 1,
"weight" : 1,
"value" : 1,
"details" : [ ]
}
]
}
// Description of rank fusion. Wrapper on both search / vector.
assertFieldPresent("details", details);
const subDetails = details["details"];
assert.eq(subDetails.length, 2);
const searchDetails = subDetails[0];
assertFieldPresent("inputPipelineName", searchDetails);
assert.eq(searchDetails["inputPipelineName"], "search");
assertFieldPresent("rank", searchDetails);
// If there isn't a value, we didn't get this back from search at all.
if (searchDetails.hasOwnProperty("value")) {
assertFieldPresent("weight", searchDetails);
assert.eq(searchDetails["weight"], 1);
assertFieldPresent("value", searchDetails); // Output of rank calculation.
assertFieldPresent("details", searchDetails);
assert.eq(searchDetails["details"], []);
// Note we won't check the shape of the search scoreDetails beyond here.
} else {
assert.eq(searchDetails["rank"], "NA");
assert.eq(searchDetails.hasOwnProperty("weight"), false);
assert.eq(searchDetails.hasOwnProperty("details"), false);
}
const vectorDetails = subDetails[1];
assertFieldPresent("inputPipelineName", vectorDetails);
assert.eq(vectorDetails["inputPipelineName"], "vector");
assertFieldPresent("details", vectorDetails);
assert.eq(vectorDetails["details"], []);
assertFieldPresent("rank", vectorDetails);
assertFieldPresent("weight", vectorDetails);
assert.eq(vectorDetails["weight"], 1);
}
// Test search/vectorSearch where search scoreDetails is off and $rankFusion's scoreDetails is off.
testQuery = [
{
$rankFusion: {
input: {
pipelines: {vector: [vectorStage], search: [searchStageNoDetails, {$limit: limit}]}
*/
(function testSearchScoreDetailsWithRankFusionScoreDetailsTwoInputPipelines() {
const testQuery = [
{
$rankFusion: {
input: {pipelines: {vector: [vectorStage], search: [searchStage, {$limit: limit}]}},
combination: {weights: {search: 2}},
scoreDetails: true,
},
scoreDetails: false,
},
},
{$project: {score: {$meta: "score"}}}
];
{$project: {score: {$meta: "score"}, details: {$meta: "scoreDetails"}}}
];
results = coll.aggregate(testQuery).toArray();
for (const foundDoc of results) {
// Assert that the score metadata has been set.
assert(fieldPresent("score", foundDoc), foundDoc);
const score = foundDoc["score"];
assert.gte(score, 0);
}
const results = coll.aggregate(testQuery).toArray();
for (const foundDoc of results) {
const [assertFieldPresent, subDetails, score] = checkOuterScoreDetails(foundDoc, 2);
const searchDetails = subDetails[0];
const searchScore =
checkSearchScoreDetails(assertFieldPresent, searchDetails, "search", 2, true);
const vectorDetails = subDetails[1];
const vectorSearchScore =
checkVectorScoreDetails(assertFieldPresent, vectorDetails, "vector", 1);
assert.eq(score, searchScore + vectorSearchScore);
}
})();
/**
* Test vectorSearch/vectorSearch where neither has score details.
* "score" : 0.054098360655737705,
* "details" : {
"value" : 0.054098360655737705,
"description" : "value output by reciprocal rank fusion algorithm...",
"details" : [
{
"inputPipelineName" : "secondVector",
"rank" : 1,
"weight" : 2.8,
"value" : 1,
"details" : [ ]
},
{
"inputPipelineName" : "vector",
"rank" : 1,
"weight" : 0.5,
"value" : 1,
"details" : [ ]
}
]
}
*/
(function testVectorSearchWithRankFusionScoreDetailsTwoInputPipelines() {
const testQuery = [
{
$rankFusion: {
input: {pipelines: {vector: [vectorStage], secondVector: [vectorStage]}},
combination: {weights: {vector: 0.5, secondVector: 2.8}},
scoreDetails: true,
},
},
{$project: {score: {$meta: "score"}, details: {$meta: "scoreDetails"}}}
];
const results = coll.aggregate(testQuery).toArray();
for (const foundDoc of results) {
const [assertFieldPresent, subDetails, score] = checkOuterScoreDetails(foundDoc, 2);
const secondVectorDetails = subDetails[0];
const secondVectorSearchScore =
checkVectorScoreDetails(assertFieldPresent, secondVectorDetails, "secondVector", 2.8);
const vectorDetails = subDetails[1];
const vectorSearchScore =
checkVectorScoreDetails(assertFieldPresent, vectorDetails, "vector", 0.5);
assert.eq(score, secondVectorSearchScore + vectorSearchScore);
}
})();
/**
* Test search/vectorSearch where search scoreDetails is off but $rankFusion's scoreDetails is on.
* "score" : 0.03278688524590164,
* "details" : {
"value" : 0.03278688524590164,
"description" : "value output by reciprocal rank fusion algorithm...",
"details" : [
{
"inputPipelineName" : "search",
"rank" : 1,
"weight" : 1,
"value" : 1.5521023273468018,
"details" : [ ]
},
{
"inputPipelineName" : "vector",
"rank" : 1,
"weight" : 1,
"value" : 1,
"details" : [ ]
}
]
}
*/
(function testVectorSearchAndSearchNoScoreDetailsWithRankFusionScoreDetailsTwoInputPipelines() {
const testQuery = [
{
$rankFusion: {
input: {
pipelines:
{vector: [vectorStage], search: [searchStageNoDetails, {$limit: limit}]}
},
scoreDetails: true,
},
},
{$project: {score: {$meta: "score"}, details: {$meta: "scoreDetails"}}}
];
const results = coll.aggregate(testQuery).toArray();
for (const foundDoc of results) {
const [assertFieldPresent, subDetails, score] = checkOuterScoreDetails(foundDoc, 2);
const searchDetails = subDetails[0];
const searchScore =
checkSearchScoreDetails(assertFieldPresent, searchDetails, "search", 1, false);
const vectorDetails = subDetails[1];
const vectorSearchScore =
checkVectorScoreDetails(assertFieldPresent, vectorDetails, "vector", 1);
assert.eq(score, searchScore + vectorSearchScore);
}
})();
/**
* Test $rankFusion with scoreDetails with 1 search input pipeline that has scoreDetails.
* "score" : 0.01639344262295082,
* "details" : {
"value" : 0.01639344262295082,
"description" : "value output by reciprocal rank fusion algorithm...",
"details" : [
{
"inputPipelineName" : "search",
"rank" : 1,
"weight" : 1,
"value" : 1.5521023273468018,
"description" : "sum of:",
"details" : [
{...} // search's scoreDetails
]
}
]
}
*/
(function testSearchWithScoreDetailsWithRankFusionScoreDetailsOneInputPipeline() {
const testQuery = [
{
$rankFusion: {
input: {pipelines: {search: [searchStage, {$limit: limit}]}},
scoreDetails: true,
},
},
{$project: {score: {$meta: "score"}, details: {$meta: "scoreDetails"}}}
];
const results = coll.aggregate(testQuery).toArray();
for (const foundDoc of results) {
const [assertFieldPresent, subDetails, score] = checkOuterScoreDetails(foundDoc, 1);
const searchDetails = subDetails[0];
const searchScore =
checkSearchScoreDetails(assertFieldPresent, searchDetails, "search", 1, true);
assert.eq(score, searchScore);
}
})();
/**
* Test search/vectorSearch where search scoreDetails is off and $rankFusion's scoreDetails is off.
* { "_id" : 6, "score" : 0.03278688524590164 }
*/
(function testVectorSearchAndSearchNoScoreDetailsWithRankFusionNoScoreDetailsTwoInputPipelines() {
const testQuery = [
{
$rankFusion: {
input: {
pipelines:
{vector: [vectorStage], search: [searchStageNoDetails, {$limit: limit}]}
},
scoreDetails: false,
},
},
{$project: {score: {$meta: "score"}}}
];
const results = coll.aggregate(testQuery).toArray();
for (const foundDoc of results) {
// Assert that the score metadata has been set.
assert(fieldPresent("score", foundDoc), foundDoc);
const score = foundDoc["score"];
assert.gte(score, 0);
}
})();
/**
* Verify that when $rankFusion.scoreDetails is false and an input pipeline ($search) has
* scoreDetails set to true, the aggregation fails when scoreDetails metadata is projected out.
*/
testQuery = [
{
$rankFusion: {
input: {pipelines: {vector: [vectorStage], search: [searchStage, {$limit: limit}]}},
combination: {weights: {search: 2}},
scoreDetails: false,
(function testScoreDetailsMetadataProjectionFailsWhenRankFusionHasNoScoreDetails() {
const testQuery = [
{
$rankFusion: {
input: {pipelines: {vector: [vectorStage], search: [searchStage, {$limit: limit}]}},
combination: {weights: {search: 2}},
scoreDetails: false,
},
},
},
{$project: {score: {$meta: "score"}, details: {$meta: "scoreDetails"}}}
];
{$project: {score: {$meta: "score"}, details: {$meta: "scoreDetails"}}}
];
assertErrCodeAndErrMsgContains(coll, testQuery, 40218, "query requires scoreDetails metadata");
assertErrCodeAndErrMsgContains(coll, testQuery, 40218, "query requires scoreDetails metadata");
})();
/**
* Verify that when $rankFusion.scoreDetails is false and an input pipeline ($search) has
* scoreDetails set to true, the aggregation succeeds when scoreDetails metadata is NOT projected
* out.
*/
testQuery = [
{
$rankFusion: {
input: {pipelines: {search: [searchStage, {$limit: limit}]}},
combination: {weights: {search: 2}},
scoreDetails: false,
(function testQueryWithoutScoreDetailsMetadataProjectionWorksWhenRankFusionHasNoScoreDetails() {
const testQuery = [
{
$rankFusion: {
input: {pipelines: {search: [searchStage, {$limit: limit}]}},
combination: {weights: {search: 2}},
scoreDetails: false,
},
},
},
{$project: {plot_embedding: 0}}
];
{$project: {plot_embedding: 0}}
];
assert.commandWorked(db.runCommand({aggregate: collName, pipeline: testQuery, cursor: {}}));
assert.commandWorked(db.runCommand({aggregate: collName, pipeline: testQuery, cursor: {}}));
})();
/**
* Verify that when $rankFusion.scoreDetails is true and an input pipeline doesn't set
* score/scoreDetails metadata, the projected scoreDetails is empty.
*/
(function testQueryWithScoreDetailsForNoScoreOrScoreDetailsGeneratingPipeline() {
const testQuery = [
{
$rankFusion: {
input: {pipelines: {matchAndSort: [{$match: {title: "ape"}}, {$sort: {title: 1}}]}},
combination: {weights: {matchAndSort: 2}},
scoreDetails: true,
},
},
{$project: {plot_embedding: 0}}
];
assert.commandWorked(db.runCommand({aggregate: collName, pipeline: testQuery, cursor: {}}));
const results = coll.aggregate(testQuery).toArray();
assert.eq(results, []);
})();
// TODO SERVER-93218 Test scoreDetails with nested rankFusion.
dropSearchIndex(coll, {name: getMovieSearchIndexSpec().name});
dropSearchIndex(coll, {name: getMovieVectorSearchIndexSpec().name});
/**
* Verify scoreDetails correctly projected when $rankFusion takes a $geoNear input pipeline.
*
"_id" : 41,
"score" : 0.04891591750396616,
"details" : {
"value" : 0.04891591750396616,
"description" : "value output by reciprocal rank fusion algorithm...",
"details" : [
{
"inputPipelineName" : "geoNear",
"rank" : 1,
"weight" : 2,
"details" : [ ]
},
{
"inputPipelineName" : "search",
"rank" : 2,
"weight" : 1,
"value" : 2.7601585388183594,
"details" : [ ]
}
]
}
}
*/
(function testQueryWithScoreDetailsForGeoNearInputPipeline() {
coll.drop();
assert.commandWorked(coll.insertMany(getRentalData()));
// Index is blocking by default so that the query is only run after index has been made.
createSearchIndex(coll, getRentalSearchIndexSpec());
assert.commandWorked(coll.createIndex({"address.location.coordinates": "2d"}));
const testQuery = [
{
$rankFusion: {
input: {
pipelines: {
search: [
{
$search: {
index: getRentalSearchIndexSpec().name,
text: {
query: "brooklyn",
path: [
"name",
"summary",
"description",
"neighborhood_overview",
],
},
}
},
{$limit: limit}
],
geoNear: [{
$geoNear: {
near: [-73.97713, 40.68675],
}
}],
}
},
combination: {weights: {geoNear: 2}},
scoreDetails: true,
},
},
{$project: {score: {$meta: "score"}, details: {$meta: "scoreDetails"}}}
];
assert.commandWorked(db.runCommand({aggregate: collName, pipeline: testQuery, cursor: {}}));
const results = coll.aggregate(testQuery).toArray();
for (const foundDoc of results) {
const [assertFieldPresent, subDetails, score] = checkOuterScoreDetails(foundDoc, 2);
// Check geoNear input pipeline details.
const geoNearScore =
checkGeoNearScoreDetails(assertFieldPresent, subDetails[0], "geoNear", 2);
// Check search input pipeline details.
const searchScore =
checkSearchScoreDetails(assertFieldPresent, subDetails[1], "search", 1, false);
assert.eq(score, geoNearScore + searchScore);
}
})();

View File

@ -4423,5 +4423,677 @@ TEST_F(DocumentSourceRankFusionTest, CheckTwoPipelineRankFusionFullDesugaring) {
})",
asOneObj);
}
TEST_F(DocumentSourceRankFusionTest, CheckFourPipelinesScoreDetailsDesugaring) {
RAIIServerParameterControllerForTest featureFlagController("featureFlagRankFusionFull", true);
auto expCtx = getExpCtx();
expCtx->setResolvedNamespaces(ResolvedNamespaceMap{
{expCtx->getNamespaceString(), {expCtx->getNamespaceString(), std::vector<BSONObj>()}}});
auto spec = fromjson(R"({
$rankFusion: {
input: {
pipelines: {
matchWithTextScore: [
{ $match: { $text: { $search: "Agatha Christie" } } },
{ $sort: {author: 1} }
],
searchPipe: [
{
$search: {
index: "search_index",
text: {
query: "mystery",
path: "genres"
},
scoreDetails: true
}
}
],
vectorSearchPipe: [
{
$vectorSearch: {
queryVector: [1.0, 2.0, 3.0],
path: "plot_embedding",
numCandidates: 300,
index: "vector_index",
limit: 10
}
}
],
matchWithoutTextScore: [
{ $match : { author : "Agatha Christie" } },
{ $sort: {author: 1} }
]
}
},
combination: {
weights: {
matchWithTextScore: 3,
searchPipe: 2,
vectorSearchPipe: 4,
matchWithoutTextScore: 5
}
},
scoreDetails: true
}
})");
const auto desugaredList =
DocumentSourceRankFusion::createFromBson(spec.firstElement(), getExpCtx());
const auto pipeline = Pipeline::create(desugaredList, getExpCtx());
BSONObj asOneObj = BSON("expectedStages" << pipeline->serializeToBson());
const std::string expectedStages = std::string(R"({
"expectedStages": [)") +
std::string(R"({
"$match": {
"$text": {
"$search": "Agatha Christie"
}
}
},
{
"$sort": {
"author": 1,
"$_internalOutputSortKeyMetadata": true
}
},
{
"$replaceRoot": {
"newRoot": {
"docs": "$$ROOT"
}
}
},
{
"$_internalSetWindowFields": {
"sortBy": {
"order": 1
},
"output": {
"matchWithTextScore_rank": {
"$rank": {}
}
}
}
},
{
"$addFields": {
"matchWithTextScore_score": {
"$multiply": [
{
"$divide": [
{
"$const": 1
},
{
"$add": [
"$matchWithTextScore_rank",
{
"$const": 60
}
]
}
]
},
{
"$const": 3
}
]
}
}
},
{
"$addFields": {
"matchWithTextScore_scoreDetails": {
"value": {
"$meta": "score"
},
"details": []
}
}
},
{
"$unionWith": {
"coll": "pipeline_test",
"pipeline": [
{
"$match": {
"author": "Agatha Christie"
}
},
{
"$sort": {
"author": 1,
"$_internalOutputSortKeyMetadata": true
}
},
{
"$replaceRoot": {
"newRoot": {
"docs": "$$ROOT"
}
}
},
{
"$_internalSetWindowFields": {
"sortBy": {
"order": 1
},
"output": {
"matchWithoutTextScore_rank": {
"$rank": {}
}
}
}
},
{
"$addFields": {
"matchWithoutTextScore_score": {
"$multiply": [
{
"$divide": [
{
"$const": 1
},
{
"$add": [
"$matchWithoutTextScore_rank",
{
"$const": 60
}
]
}
]
},
{
"$const": 5
}
]
}
}
},
{
"$addFields": {
"matchWithoutTextScore_scoreDetails": {
"details": []
}
}
}
]
}
},)") +
std::string(R"(
{
"$unionWith": {
"coll": "pipeline_test",
"pipeline": [
{
"$search": {
"mongotQuery": {
"index": "search_index",
"text": {
"query": "mystery",
"path": "genres"
},
"scoreDetails": true
},
"requiresSearchSequenceToken": false,
"requiresSearchMetaCursor": true
}
},
{
"$replaceRoot": {
"newRoot": {
"docs": "$$ROOT"
}
}
},
{
"$_internalSetWindowFields": {
"sortBy": {
"order": 1
},
"output": {
"searchPipe_rank": {
"$rank": {}
}
}
}
},
{
"$addFields": {
"searchPipe_score": {
"$multiply": [
{
"$divide": [
{
"$const": 1
},
{
"$add": [
"$searchPipe_rank",
{
"$const": 60
}
]
}
]
},
{
"$const": 2
}
]
}
}
},
{
"$addFields": {
"searchPipe_scoreDetails": {
"$meta": "scoreDetails"
}
}
}
]
}
},
{
"$unionWith": {
"coll": "pipeline_test",
"pipeline": [
{
"$vectorSearch": {
"queryVector": [
1,
2,
3
],
"path": "plot_embedding",
"numCandidates": 300,
"index": "vector_index",
"limit": 10
}
},
{
"$replaceRoot": {
"newRoot": {
"docs": "$$ROOT"
}
}
},
{
"$_internalSetWindowFields": {
"sortBy": {
"order": 1
},
"output": {
"vectorSearchPipe_rank": {
"$rank": {}
}
}
}
},
{
"$addFields": {
"vectorSearchPipe_score": {
"$multiply": [
{
"$divide": [
{
"$const": 1
},
{
"$add": [
"$vectorSearchPipe_rank",
{
"$const": 60
}
]
}
]
},
{
"$const": 4
}
]
}
}
},
{
"$addFields": {
"vectorSearchPipe_scoreDetails": {
"value": {
"$meta": "score"
},
"details": []
}
}
}
]
}
},)") +
std::string(R"(
{
"$group": {
"_id": "$docs._id",
"docs": {
"$first": "$docs"
},
"matchWithTextScore_score": {
"$max": {
"$ifNull": [
"$matchWithTextScore_score",
{
"$const": 0
}
]
}
},
"matchWithTextScore_rank": {
"$max": {
"$ifNull": [
"$matchWithTextScore_rank",
{
"$const": 0
}
]
}
},
"matchWithTextScore_scoreDetails": {
"$mergeObjects": "$matchWithTextScore_scoreDetails"
},
"matchWithoutTextScore_score": {
"$max": {
"$ifNull": [
"$matchWithoutTextScore_score",
{
"$const": 0
}
]
}
},
"matchWithoutTextScore_rank": {
"$max": {
"$ifNull": [
"$matchWithoutTextScore_rank",
{
"$const": 0
}
]
}
},
"matchWithoutTextScore_scoreDetails": {
"$mergeObjects": "$matchWithoutTextScore_scoreDetails"
},
"searchPipe_score": {
"$max": {
"$ifNull": [
"$searchPipe_score",
{
"$const": 0
}
]
}
},
"searchPipe_rank": {
"$max": {
"$ifNull": [
"$searchPipe_rank",
{
"$const": 0
}
]
}
},
"searchPipe_scoreDetails": {
"$mergeObjects": "$searchPipe_scoreDetails"
},
"vectorSearchPipe_score": {
"$max": {
"$ifNull": [
"$vectorSearchPipe_score",
{
"$const": 0
}
]
}
},
"vectorSearchPipe_rank": {
"$max": {
"$ifNull": [
"$vectorSearchPipe_rank",
{
"$const": 0
}
]
}
},
"vectorSearchPipe_scoreDetails": {
"$mergeObjects": "$vectorSearchPipe_scoreDetails"
},
"$willBeMerged": false
}
},
{
"$addFields": {
"score": {
"$add": [
"$matchWithTextScore_score",
"$matchWithoutTextScore_score",
"$searchPipe_score",
"$vectorSearchPipe_score"
]
}
}
},)") +
std::string(R"(
{
"$addFields": {
"matchWithTextScore_rank": {
"$cond": [
{
"$eq": [
"$matchWithTextScore_rank",
{
"$const": 0
}
]
},
{
"$const": "NA"
},
"$matchWithTextScore_rank"
]
},
"matchWithoutTextScore_rank": {
"$cond": [
{
"$eq": [
"$matchWithoutTextScore_rank",
{
"$const": 0
}
]
},
{
"$const": "NA"
},
"$matchWithoutTextScore_rank"
]
},
"searchPipe_rank": {
"$cond": [
{
"$eq": [
"$searchPipe_rank",
{
"$const": 0
}
]
},
{
"$const": "NA"
},
"$searchPipe_rank"
]
},
"vectorSearchPipe_rank": {
"$cond": [
{
"$eq": [
"$vectorSearchPipe_rank",
{
"$const": 0
}
]
},
{
"$const": "NA"
},
"$vectorSearchPipe_rank"
]
}
}
},
{
"$addFields": {
"calculatedScoreDetails": [
{
"$mergeObjects": [
{
"inputPipelineName": {
"$const": "matchWithTextScore"
},
"rank": "$matchWithTextScore_rank",
"weight": {
"$cond": [
{
"$eq": [
"$matchWithTextScore_rank",
{
"$const": "NA"
}
]
},
"$$REMOVE",
{
"$const": 3
}
]
}
},
"$matchWithTextScore_scoreDetails"
]
},
{
"$mergeObjects": [
{
"inputPipelineName": {
"$const": "matchWithoutTextScore"
},
"rank": "$matchWithoutTextScore_rank",
"weight": {
"$cond": [
{
"$eq": [
"$matchWithoutTextScore_rank",
{
"$const": "NA"
}
]
},
"$$REMOVE",
{
"$const": 5
}
]
}
},
"$matchWithoutTextScore_scoreDetails"
]
},
{
"$mergeObjects": [
{
"inputPipelineName": {
"$const": "searchPipe"
},
"rank": "$searchPipe_rank",
"weight": {
"$cond": [
{
"$eq": [
"$searchPipe_rank",
{
"$const": "NA"
}
]
},
"$$REMOVE",
{
"$const": 2
}
]
}
},
"$searchPipe_scoreDetails"
]
},
{
"$mergeObjects": [
{
"inputPipelineName": {
"$const": "vectorSearchPipe"
},
"rank": "$vectorSearchPipe_rank",
"weight": {
"$cond": [
{
"$eq": [
"$vectorSearchPipe_rank",
{
"$const": "NA"
}
]
},
"$$REMOVE",
{
"$const": 4
}
]
}
},
"$vectorSearchPipe_scoreDetails"
]
}
]
}
},)") +
std::string(R"(
{
"$setMetadata": {
"scoreDetails": {
"value": "$score",
"description": {
"$const": "value output by reciprocal rank fusion algorithm, computed as sum of (weight * (1 / (60 + rank))) across input pipelines from which this document is output, from:"
},
"details": "$calculatedScoreDetails"
}
}
},
{
"$sort": {
"score": -1,
"_id": 1
}
},
{
"$replaceRoot": {
"newRoot": "$docs"
}
}
]
})");
ASSERT_BSONOBJ_EQ_AUTO(expectedStages, asOneObj);
}
} // namespace
} // namespace mongo