diff --git a/src/mongo/db/pipeline/BUILD.bazel b/src/mongo/db/pipeline/BUILD.bazel index 4ed53f0aae5..7b505ee9dd9 100644 --- a/src/mongo/db/pipeline/BUILD.bazel +++ b/src/mongo/db/pipeline/BUILD.bazel @@ -438,10 +438,14 @@ mongo_cc_library( "group_processor_base.cpp", "hybrid_search_pipeline_builder.cpp", "lite_parsed_graph_lookup.cpp", + "lite_parsed_hybrid_search_desugarer.cpp", + "lite_parsed_hybrid_search_desugarer_utils.cpp", "lite_parsed_lookup.cpp", "lite_parsed_pipeline_metadata_validation.cpp", "lite_parsed_rank_fusion.cpp", + "lite_parsed_rank_fusion_desugarer_utils.cpp", "lite_parsed_score_fusion.cpp", + "lite_parsed_score_fusion_desugarer_utils.cpp", "lite_parsed_union_with.cpp", "match_processor.cpp", "merge_processor.cpp", @@ -1441,7 +1445,9 @@ mongo_cc_unit_test( "document_source_union_with_test.cpp", "document_source_unwind_test.cpp", "lite_parsed_lookup_test.cpp", + "lite_parsed_rank_fusion_desugarer_test.cpp", "lite_parsed_rank_fusion_test.cpp", + "lite_parsed_score_fusion_desugarer_test.cpp", "lite_parsed_score_fusion_test.cpp", "serverless_aggregation_context_fixture.cpp", "//src/mongo/db/pipeline:document_source_coll_stats_test.cpp", diff --git a/src/mongo/db/pipeline/lite_parsed_document_source.h b/src/mongo/db/pipeline/lite_parsed_document_source.h index 858e0dc5b48..ae77de1fd32 100644 --- a/src/mongo/db/pipeline/lite_parsed_document_source.h +++ b/src/mongo/db/pipeline/lite_parsed_document_source.h @@ -538,6 +538,15 @@ public: return false; } + /** + * Returns true if this stage produces scoreDetails metadata. + * TODO SERVER-121091 This can be removed once hybrid search desugars into the internal hybrid + * search stage. + */ + virtual bool isScoreDetailsStage() const { + return false; + } + /** * Returns true if this stage is a selection stage. A selection stage does not modify or * transform documents. diff --git a/src/mongo/db/pipeline/lite_parsed_hybrid_search_desugarer.cpp b/src/mongo/db/pipeline/lite_parsed_hybrid_search_desugarer.cpp new file mode 100644 index 00000000000..22ac51b9e84 --- /dev/null +++ b/src/mongo/db/pipeline/lite_parsed_hybrid_search_desugarer.cpp @@ -0,0 +1,243 @@ +/** + * Copyright (C) 2026-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/pipeline/lite_parsed_hybrid_search_desugarer.h" + +#include "mongo/db/namespace_string.h" +#include "mongo/db/pipeline/document_source_hybrid_scoring_util.h" +#include "mongo/db/pipeline/lite_parsed_hybrid_search_desugarer_utils.h" +#include "mongo/db/pipeline/lite_parsed_rank_fusion_desugarer_utils.h" +#include "mongo/db/pipeline/lite_parsed_score_fusion_desugarer_utils.h" +#include "mongo/db/query/util/rank_fusion_util.h" +#include "mongo/util/assert_util.h" +#include "mongo/util/string_map.h" + +namespace mongo::lite_parsed_hybrid_search_desugarer { + +StageSpecs desugarRankFusion(const LiteParsedRankFusion& stage, + const NamespaceString& nss, + StringData userCollName) { + const auto& spec = stage.getSpec(); + const auto& subPipelines = *stage.getSubPipelines(); + const bool includeScoreDetails = spec.getScoreDetails(); + + // Pipeline names in spec order (so we can index by position). + std::vector pipelineNames; + pipelineNames.reserve(subPipelines.size()); + for (const auto& elem : spec.getInput().getPipelines()) { + pipelineNames.emplace_back(elem.fieldName()); + } + + tassert(12559411, + "$rankFusion: subPipelines and pipeline-name list size mismatch", + pipelineNames.size() == subPipelines.size()); + + // Resolve and validate weights against the input pipeline names; otherwise empty (all default + // to 1). + StringMap weights; + if (const auto& combinationSpec = spec.getCombination()) { + weights = common_utils::validateWeights( + combinationSpec->getWeights(), pipelineNames, "$rankFusion"_sd); + } + + StageSpecs out; + + for (size_t i = 0; i < subPipelines.size(); ++i) { + const auto& pipelineName = pipelineNames[i]; + double weight = hybrid_scoring_util::getPipelineWeight(weights, pipelineName); + + StageSpecs perPipeline = rank_fusion_utils::buildRankFusionInputPipelinePreamble( + nss, *subPipelines[i], pipelineName, weight, includeScoreDetails); + + if (i == 0) { + for (auto& s : perPipeline) { + out.push_back(std::move(s)); + } + } else { + out.push_back( + common_utils::buildUnionWithLPDS(nss, userCollName, std::move(perPipeline))); + } + } + + // ---- Tail: group + replaceRoot + rank-NA mutation + score/sort/project (+ scoreDetails) ---- + out.push_back(common_utils::parseOwnedStage( + nss, + common_utils::buildGroupBson(pipelineNames, + includeScoreDetails, + rank_fusion_utils::kInternalFieldsName, + rank_fusion_utils::kDocsName, + rank_fusion_utils::kDetailsScalarSuffix))); + out.push_back(common_utils::parseOwnedStage( + nss, + common_utils::buildReplaceRootMergeBson(pipelineNames, + includeScoreDetails, + rank_fusion_utils::kInternalFieldsName, + rank_fusion_utils::kDocsName, + rank_fusion_utils::kDetailsScalarSuffix))); + out.push_back(common_utils::parseOwnedStage( + nss, rank_fusion_utils::buildRankAddFieldsBson(pipelineNames))); + + // TODO SERVER-85426: Remove this branch once all feature flags have been removed. + if (isRankFusionFullEnabled()) { + out.push_back(common_utils::parseOwnedStage( + nss, rank_fusion_utils::buildSetMetadataScoreBson(pipelineNames))); + if (includeScoreDetails) { + out.push_back(common_utils::parseOwnedStage( + nss, + rank_fusion_utils::buildCalculatedFinalScoreDetailsBson(pipelineNames, weights))); + out.push_back(common_utils::parseOwnedStage( + nss, rank_fusion_utils::buildSetMetadataScoreDetailsBson())); + } + out.push_back(common_utils::parseOwnedStage(nss, common_utils::buildSortByScoreMetaBson())); + } else { + out.push_back(common_utils::parseOwnedStage( + nss, rank_fusion_utils::buildAddFieldsScoreBson(pipelineNames))); + out.push_back( + common_utils::parseOwnedStage(nss, rank_fusion_utils::buildSortByScoreScalarBson())); + } + out.push_back(common_utils::parseOwnedStage(nss, + common_utils::buildProjectRemoveInternalFieldsBson( + rank_fusion_utils::kInternalFieldsName))); + + return out; +} + +size_t rankFusionStageExpander(LiteParsedPipeline* pipeline, + size_t index, + LiteParsedDocumentSource& stage) { + auto* rankFusionStage = dynamic_cast(&stage); + tassert(12559412, + "rankFusionStageExpander invoked with non-$rankFusion stage", + rankFusionStage != nullptr); + const NamespaceString& nss = pipeline->getOriginalParseNss(); + StringData userCollName = nss.coll(); + auto desugared = desugarRankFusion(*rankFusionStage, nss, userCollName); + return pipeline->replaceStageWith(index, std::move(desugared)); +} + +StageSpecs desugarScoreFusion(const LiteParsedScoreFusion& stage, + const NamespaceString& nss, + StringData userCollName) { + const auto& spec = stage.getSpec(); + const auto& subPipelines = *stage.getSubPipelines(); + const bool includeScoreDetails = spec.getScoreDetails(); + + // Pipeline names in spec order. + std::vector pipelineNames; + pipelineNames.reserve(subPipelines.size()); + for (const auto& elem : spec.getInput().getPipelines()) { + pipelineNames.emplace_back(elem.fieldName()); + } + + tassert(12559413, + "$scoreFusion: subPipelines and pipeline-name list size mismatch", + pipelineNames.size() == subPipelines.size()); + + // Validate normalization / combination shape. + score_fusion_utils::ScoreFusionScoringOptions scoringOptions(spec); + + StringMap weights; + if (const auto& combinationSpec = spec.getCombination()) { + if (const auto& weightsObj = combinationSpec->getWeights()) { + weights = common_utils::validateWeights(*weightsObj, pipelineNames, "$scoreFusion"_sd); + } + } + + StageSpecs out; + + for (size_t i = 0; i < subPipelines.size(); ++i) { + const auto& pipelineName = pipelineNames[i]; + double weight = hybrid_scoring_util::getPipelineWeight(weights, pipelineName); + + StageSpecs perPipeline = score_fusion_utils::buildScoreFusionInputPipelinePreamble( + nss, + *subPipelines[i], + pipelineName, + weight, + scoringOptions.getNormalizationMethod(), + includeScoreDetails); + + if (i == 0) { + for (auto& s : perPipeline) { + out.push_back(std::move(s)); + } + } else { + out.push_back( + common_utils::buildUnionWithLPDS(nss, userCollName, std::move(perPipeline))); + } + } + + // Tail: $group + $replaceRoot + $setMetadata score (+ optional scoreDetails) + $sort + + // $project. + out.push_back(common_utils::parseOwnedStage( + nss, + common_utils::buildGroupBson(pipelineNames, + includeScoreDetails, + score_fusion_utils::kInternalFieldsName, + score_fusion_utils::kDocsName, + score_fusion_utils::kDetailsScalarSuffix))); + out.push_back(common_utils::parseOwnedStage( + nss, + common_utils::buildReplaceRootMergeBson(pipelineNames, + includeScoreDetails, + score_fusion_utils::kInternalFieldsName, + score_fusion_utils::kDocsName, + score_fusion_utils::kDetailsScalarSuffix))); + out.push_back(common_utils::parseOwnedStage( + nss, score_fusion_utils::buildSetFinalCombinedScoreBson(pipelineNames, scoringOptions))); + + if (includeScoreDetails) { + out.push_back(common_utils::parseOwnedStage( + nss, score_fusion_utils::buildCalculatedFinalScoreDetailsBson(pipelineNames, weights))); + out.push_back(common_utils::parseOwnedStage( + nss, score_fusion_utils::buildSetMetadataScoreDetailsBson(scoringOptions))); + } + + out.push_back(common_utils::parseOwnedStage(nss, common_utils::buildSortByScoreMetaBson())); + out.push_back(common_utils::parseOwnedStage(nss, + common_utils::buildProjectRemoveInternalFieldsBson( + score_fusion_utils::kInternalFieldsName))); + + return out; +} + +size_t scoreFusionStageExpander(LiteParsedPipeline* pipeline, + size_t index, + LiteParsedDocumentSource& stage) { + auto* scoreFusionStage = dynamic_cast(&stage); + tassert(12559414, + "scoreFusionStageExpander invoked with non-$scoreFusion stage", + scoreFusionStage != nullptr); + const NamespaceString& nss = pipeline->getOriginalParseNss(); + StringData userCollName = nss.coll(); + auto desugared = desugarScoreFusion(*scoreFusionStage, nss, userCollName); + return pipeline->replaceStageWith(index, std::move(desugared)); +} + +} // namespace mongo::lite_parsed_hybrid_search_desugarer diff --git a/src/mongo/db/pipeline/lite_parsed_hybrid_search_desugarer.h b/src/mongo/db/pipeline/lite_parsed_hybrid_search_desugarer.h new file mode 100644 index 00000000000..d5473bd9f8c --- /dev/null +++ b/src/mongo/db/pipeline/lite_parsed_hybrid_search_desugarer.h @@ -0,0 +1,94 @@ +/** + * Copyright (C) 2026-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/base/string_data.h" +#include "mongo/db/namespace_string.h" +#include "mongo/db/pipeline/lite_parsed_document_source.h" +#include "mongo/db/pipeline/lite_parsed_pipeline.h" +#include "mongo/db/pipeline/lite_parsed_rank_fusion.h" +#include "mongo/db/pipeline/lite_parsed_score_fusion.h" +#include "mongo/util/modules.h" + +#include + +namespace mongo::lite_parsed_hybrid_search_desugarer { + +/** + * Desugars a $rankFusion lite-parsed stage into the equivalent list of LiteParsedDocumentSource + * stages. + * + * `nss` is the namespace used to parse the synthesized stages. In production callers, this is + * `pipeline->getOriginalParseNss()` from the outer LiteParsedPipeline. + * + * `userCollName` is the collection name to use for synthesized $unionWith stages. In production + * callers, this is `pipeline->getOriginalParseNss().coll()` (the namespace the outer aggregation + * runs against). + */ +StageSpecs desugarRankFusion(const LiteParsedRankFusion& stage, + const NamespaceString& nss, + StringData userCollName); + +/** + * StageExpander implementation as required by LiteParsedDesugarer::StageExpander. Downcasts + * `stage` to LiteParsedRankFusion, derives `userCollName` from `pipeline->getOriginalParseNss()`, + * runs `desugarRankFusion`, and replaces the stage at `index` with the result. Returns the index + * after the inserted block. + */ +size_t rankFusionStageExpander(LiteParsedPipeline* pipeline, + size_t index, + LiteParsedDocumentSource& stage); + +/** + * Desugars a $scoreFusion lite-parsed stage into the equivalent list of LiteParsedDocumentSource + * stages. + * + * `nss` is the namespace used to parse the synthesized stages. In production callers, this is + * `pipeline->getOriginalParseNss()` from the outer LiteParsedPipeline. + * + * `userCollName` is the collection name to use for synthesized $unionWith stages. In production + * callers, this is `pipeline->getOriginalParseNss().coll()` (the namespace the outer aggregation + * runs against). + */ +StageSpecs desugarScoreFusion(const LiteParsedScoreFusion& stage, + const NamespaceString& nss, + StringData userCollName); + +/** + * StageExpander implementation as required by LiteParsedDesugarer::StageExpander. Downcasts + * `stage` to LiteParsedScoreFusion, derives `userCollName` from `pipeline->getOriginalParseNss()`, + * runs `desugarScoreFusion`, and replaces the stage at `index` with the result. Returns the index + * after the inserted block. + */ +size_t scoreFusionStageExpander(LiteParsedPipeline* pipeline, + size_t index, + LiteParsedDocumentSource& stage); + +} // namespace mongo::lite_parsed_hybrid_search_desugarer diff --git a/src/mongo/db/pipeline/lite_parsed_hybrid_search_desugarer_utils.cpp b/src/mongo/db/pipeline/lite_parsed_hybrid_search_desugarer_utils.cpp new file mode 100644 index 00000000000..0348767e374 --- /dev/null +++ b/src/mongo/db/pipeline/lite_parsed_hybrid_search_desugarer_utils.cpp @@ -0,0 +1,315 @@ +/** + * Copyright (C) 2026-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/pipeline/lite_parsed_hybrid_search_desugarer_utils.h" + +#include "mongo/bson/bsonobjbuilder.h" +#include "mongo/db/namespace_string_util.h" +#include "mongo/db/pipeline/document_source_hybrid_scoring_util.h" +#include "mongo/db/pipeline/lite_parsed_union_with.h" +#include "mongo/db/pipeline/owned_lite_parsed_pipeline.h" +#include "mongo/db/query/util/string_util.h" +#include "mongo/util/assert_util.h" +#include "mongo/util/str.h" + +#include + +#include +#include + +namespace mongo::lite_parsed_hybrid_search_desugarer::common_utils { + +namespace { + +// Builds and throws the "invalid weight name" error with typo suggestions. +[[noreturn]] void failWeightsValidationWithPipelineSuggestions( + const std::vector& pipelineNames, + const StringSet& matchedPipelines, + const std::vector& invalidWeights, + StringData stageName) { + std::vector unmatchedPipelines; + for (const auto& name : pipelineNames) { + if (!matchedPipelines.contains(name)) { + unmatchedPipelines.push_back(name); + } + } + + std::vector>> suggestions = + query_string_util::computeTypoSuggestions(unmatchedPipelines, invalidWeights); + + auto convertSingleSuggestionToString = [&](std::size_t i) -> std::string { + std::string s = fmt::format("(provided: '{}' -> ", suggestions[i].first); + if (suggestions[i].second.size() == 1) { + s += fmt::format("suggested: '{}')", suggestions[i].second.front()); + } else { + s += fmt::format("suggestions: [{}])", fmt::join(suggestions[i].second, ", ")); + } + if (i < suggestions.size() - 1) { + s += ", "; + } + return s; + }; + + std::string errorMsg = fmt::format( + "${} stage contained ({}) weight(s) in " + "'combination.weights' that did not reference valid pipeline names. " + "Suggestions for valid pipeline names: ", + stageName, + std::to_string(invalidWeights.size())); + for (std::size_t i = 0; i < suggestions.size(); ++i) { + errorMsg += convertSingleSuggestionToString(i); + } + + uasserted(12559400, errorMsg); +} + +} // namespace + +StringMap validateWeights(const BSONObj& inputWeights, + const std::vector& pipelineNames, + StringData stageName) { + StringSet pipelineNameSet(pipelineNames.begin(), pipelineNames.end()); + + StringMap weights; + std::vector invalidWeights; + StringSet matchedPipelines; + + for (const auto& weightEntry : inputWeights) { + const auto* fieldName = weightEntry.fieldName(); + + if (!pipelineNameSet.contains(fieldName)) { + invalidWeights.emplace_back(fieldName); + continue; + } + + uassert(12559402, + str::stream() << "A pipeline named '" << fieldName + << "' is specified more than once in the " << stageName + << "'combinations.weight' object.", + !weights.contains(fieldName)); + + uassert(12559404, + str::stream() << stageName + << "'s pipeline weight must be numeric, but given non-numeric value " + "for pipeline '" + << fieldName << "'.", + weightEntry.isNumber()); + + const double weight = weightEntry.Number(); + uassert(12559401, + str::stream() << stageName << "'s pipeline weight must be non-negative, but given " + << weight << " for pipeline '" << fieldName << "'.", + weight >= 0); + + weights[fieldName] = weight; + matchedPipelines.insert(fieldName); + } + + if (static_cast(pipelineNames.size()) < inputWeights.nFields()) { + uasserted( + 12559403, + fmt::format( + "{} input has more weights ({}) than pipelines ({}). " + "If 'combination.weights' is specified, there must be a less or equal number of " + "weights as pipelines, each of which is unique and existing. " + "Possible extraneous specified weights = [{}]", + stageName, + inputWeights.nFields(), + static_cast(pipelineNames.size()), + fmt::join(invalidWeights, ", "))); + } else if (!invalidWeights.empty()) { + failWeightsValidationWithPipelineSuggestions( + pipelineNames, matchedPipelines, invalidWeights, stageName); + } + + return weights; +} + +BSONObj buildReplaceRootBson(StringData docsName) { + return BSON("$replaceWith" << BSON(docsName << "$$ROOT")); +} + +BSONObj buildSortByScoreMetaBson() { + return BSON("$sort" << BSON("score" << BSON("$meta" << "score") << "_id" << 1)); +} + +BSONObj buildProjectRemoveInternalFieldsBson(StringData internalFieldsName) { + BSONObjBuilder bob; + { + BSONObjBuilder projectBob(bob.subobjStart("$project"_sd)); + projectBob.append(internalFieldsName, 0); + } + return bob.obj(); +} + +BSONObj buildGroupBson(const std::vector& pipelineNames, + bool includeScoreDetails, + StringData internalFieldsName, + StringData docsName, + StringData detailsScalarSuffix) { + BSONObjBuilder bob; + { + BSONObjBuilder gBob(bob.subobjStart("$group"_sd)); + gBob.append("_id", fmt::format("${}._id", docsName)); + gBob.append(docsName, BSON("$first" << fmt::format("${}", docsName))); + + auto accumulateScalar = [&](StringData field, const std::string& internalPath) { + gBob.append(fmt::format("{}{}", kHsFlatFieldPrefix, field), + BSON("$max" << BSON("$ifNull" + << BSON_ARRAY(fmt::format("${}", internalPath) << 0)))); + }; + + for (const auto& pipelineName : pipelineNames) { + const std::string scoreField = + hybrid_scoring_util::getScoreFieldFromPipelineName(pipelineName); + accumulateScalar(scoreField, + hybrid_scoring_util::applyInternalFieldPrefixToFieldName( + internalFieldsName, scoreField)); + if (includeScoreDetails) { + const std::string detailsScalarField = + fmt::format("{}{}", pipelineName, detailsScalarSuffix); + accumulateScalar(detailsScalarField, + hybrid_scoring_util::applyInternalFieldPrefixToFieldName( + internalFieldsName, detailsScalarField)); + + const std::string scoreDetailsField = fmt::format("{}_scoreDetails", pipelineName); + const std::string internalScoreDetailsPath = + hybrid_scoring_util::applyInternalFieldPrefixToFieldName(internalFieldsName, + scoreDetailsField); + gBob.append(fmt::format("{}{}", kHsFlatFieldPrefix, scoreDetailsField), + BSON("$mergeObjects" << fmt::format("${}", internalScoreDetailsPath))); + } + } + } + return bob.obj(); +} + +BSONObj buildReplaceRootMergeBson(const std::vector& pipelineNames, + bool includeScoreDetails, + StringData internalFieldsName, + StringData docsName, + StringData detailsScalarSuffix) { + BSONObjBuilder bob; + { + BSONObjBuilder rrBob(bob.subobjStart("$replaceRoot"_sd)); + BSONObjBuilder newRootBob(rrBob.subobjStart("newRoot"_sd)); + BSONArrayBuilder mergeArr(newRootBob.subarrayStart("$mergeObjects"_sd)); + mergeArr.append(fmt::format("${}", docsName)); + BSONObjBuilder wrapperBob; + { + BSONObjBuilder internalFieldsBob(wrapperBob.subobjStart(internalFieldsName)); + auto appendFlat = [&](StringData field) { + internalFieldsBob.append(field, fmt::format("${}{}", kHsFlatFieldPrefix, field)); + }; + for (const auto& pipelineName : pipelineNames) { + appendFlat(hybrid_scoring_util::getScoreFieldFromPipelineName(pipelineName)); + if (includeScoreDetails) { + appendFlat(fmt::format("{}{}", pipelineName, detailsScalarSuffix)); + appendFlat(fmt::format("{}_scoreDetails", pipelineName)); + } + } + } + mergeArr.append(wrapperBob.obj()); + } + return bob.obj(); +} + +std::unique_ptr parseOwnedStage(const NamespaceString& nss, + BSONObj stageBson) { + auto lpds = LiteParsedDocumentSource::parse(nss, stageBson); + lpds->makeOwned(); + return lpds; +} + +void mutateRightmostSortToOutputSortKey(const NamespaceString& nss, StageSpecs& stages) { + // No-op if no $sort is present; e.g. $search and $vectorSearch provide sorted output + // via sort-key metadata without an explicit $sort stage. + for (auto it = stages.rbegin(); it != stages.rend(); ++it) { + if ((*it)->getParseTimeName() == "$sort") { + BSONElement origSpec = (*it)->getOriginalBson(); + tassert(12559415, + "Expected $sort spec value to be an object", + origSpec.type() == BSONType::object); + + BSONObjBuilder mergedSortSpec; + mergedSortSpec.appendElements(origSpec.embeddedObject()); + if (!origSpec.embeddedObject().hasField("$_internalOutputSortKeyMetadata"_sd)) { + mergedSortSpec.append("$_internalOutputSortKeyMetadata", true); + } + + BSONObj newStageBson = BSON("$sort" << mergedSortSpec.obj()); + *it = parseOwnedStage(nss, std::move(newStageBson)); + return; + } + } +} + +std::unique_ptr buildUnionWithLPDS(const NamespaceString& nss, + StringData userCollName, + StageSpecs perPipelineStages) { + // Serialize the already-parsed (and sort-mutated) stages back to BSON. Each .wrap() call + // produces a self-owning copy, so rawPipeline does not alias any stage's internal buffer. + std::vector rawPipeline; + rawPipeline.reserve(perPipelineStages.size()); + for (const auto& s : perPipelineStages) { + rawPipeline.push_back(s->getOriginalBson().wrap()); + } + + NamespaceString foreignNss = NamespaceStringUtil::deserialize(nss.dbName(), userCollName); + + // OwnedLiteParsedPipeline owns its backing BSON, so no manual setOwnedBson is needed for the + // inner pipeline. + OwnedLiteParsedPipeline innerLpp(foreignNss, rawPipeline); + + BSONObjBuilder bob; + { + BSONObjBuilder uwBob(bob.subobjStart("$unionWith"_sd)); + uwBob.append("coll", userCollName); + BSONArrayBuilder pipeArr(uwBob.subarrayStart("pipeline"_sd)); + for (const auto& obj : rawPipeline) { + pipeArr.append(obj); + } + } + BSONObj unionWithObj = bob.obj(); + + auto lpuw = std::make_unique( + unionWithObj.firstElement(), + std::move(foreignNss), + boost::optional(std::move(innerLpp)), + std::move(rawPipeline), + /*hasForeignDB=*/false, // views are flattened before this point + /*isHybridSearch=*/true); + + // makeOwned() wraps _originalBson (a BSONElement view into unionWithObj) into a shared buffer, + // keeping the underlying BSON alive after unionWithObj goes out of scope. + lpuw->makeOwned(); + return lpuw; +} + +} // namespace mongo::lite_parsed_hybrid_search_desugarer::common_utils diff --git a/src/mongo/db/pipeline/lite_parsed_hybrid_search_desugarer_utils.h b/src/mongo/db/pipeline/lite_parsed_hybrid_search_desugarer_utils.h new file mode 100644 index 00000000000..50fd76e41ff --- /dev/null +++ b/src/mongo/db/pipeline/lite_parsed_hybrid_search_desugarer_utils.h @@ -0,0 +1,105 @@ +/** + * Copyright (C) 2026-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/base/string_data.h" +#include "mongo/bson/bsonobj.h" +#include "mongo/db/namespace_string.h" +#include "mongo/db/pipeline/lite_parsed_document_source.h" +#include "mongo/db/pipeline/lite_parsed_pipeline.h" +#include "mongo/util/string_map.h" + +#include +#include +#include + +namespace mongo::lite_parsed_hybrid_search_desugarer::common_utils { + +// Prefix for the flat scalar group keys (e.g. "__hs_

_score") used in the desugared $group's +// per-pipeline accumulators and in the subsequent $replaceRoot's wrapper object. +inline constexpr StringData kHsFlatFieldPrefix = "__hs_"_sd; + +// Parses a synthesized BSONObj stage into an owned LPDS at namespace `nss`. Not for use with +// stages that hold nested LiteParsedPipelines -- use buildUnionWithLPDS for $unionWith. +std::unique_ptr parseOwnedStage(const NamespaceString& nss, + BSONObj stageBson); + +// Walks `stages` right-to-left and rewrites the rightmost $sort to emit sort-key metadata. +// No-op if no $sort is present (e.g. $search and $vectorSearch emit scored output without one). +void mutateRightmostSortToOutputSortKey(const NamespaceString& nss, StageSpecs& stages); + +// Builds a $unionWith LPDS whose subpipeline is `perPipelineStages`, targeting `userCollName` +// on the same DB as `nss`. +std::unique_ptr buildUnionWithLPDS(const NamespaceString& nss, + StringData userCollName, + StageSpecs perPipelineStages); + +// Validates the user-provided combination.weights BSON against the input pipeline names and +// returns the resulting weights map. +StringMap validateWeights(const BSONObj& inputWeights, + const std::vector& pipelineNames, + StringData stageName); + +// {$replaceWith: {: "$$ROOT"}} +BSONObj buildReplaceRootBson(StringData docsName); + +// {$sort: {score: {$meta: "score"}, _id: 1}} +BSONObj buildSortByScoreMetaBson(); + +// {$project: {: 0}} +BSONObj buildProjectRemoveInternalFieldsBson(StringData internalFieldsName); + +// {$group: {_id: "$._id", +// : {$first: "$"}, +// __hs_

_score: {$max: {$ifNull: ["$.

_score", 0]}}, +// [if scoreDetails:] __hs_

: {$max: ...}, +// __hs_

_scoreDetails: {$mergeObjects: ...}, +// ...}} +// +// `detailsScalarSuffix` is the per-pipeline scoreDetails scalar field suffix: +// "_rank" for $rankFusion, "_rawScore" for $scoreFusion. +BSONObj buildGroupBson(const std::vector& pipelineNames, + bool includeScoreDetails, + StringData internalFieldsName, + StringData docsName, + StringData detailsScalarSuffix); + +// {$replaceRoot: {newRoot: {$mergeObjects: ["$", +// {: {

_score: "$__hs_

_score", +// ...}}]}}} +// +// `detailsScalarSuffix`: "_rank" for $rankFusion, "_rawScore" for $scoreFusion. +BSONObj buildReplaceRootMergeBson(const std::vector& pipelineNames, + bool includeScoreDetails, + StringData internalFieldsName, + StringData docsName, + StringData detailsScalarSuffix); + +} // namespace mongo::lite_parsed_hybrid_search_desugarer::common_utils diff --git a/src/mongo/db/pipeline/lite_parsed_pipeline.h b/src/mongo/db/pipeline/lite_parsed_pipeline.h index 085ff8f7f59..a3f2b4e587c 100644 --- a/src/mongo/db/pipeline/lite_parsed_pipeline.h +++ b/src/mongo/db/pipeline/lite_parsed_pipeline.h @@ -97,6 +97,20 @@ public: } } + /** + * Constructs a LiteParsedPipeline from already-parsed StageSpecs without re-parsing. Useful + * when a desugarer assembles a subpipeline from existing LiteParsedDocumentSources. + */ + LiteParsedPipeline(NamespaceString nss, + StageSpecs stages, + bool isRunningAgainstView_ForHybridSearch = false) + : _stageSpecs(std::move(stages)), + _originalParseNss(std::move(nss)), + _isRunningAgainstView_ForHybridSearch(isRunningAgainstView_ForHybridSearch) { + // _hasChangeStream and _involvedNamespaces are intentionally left at their in-class + // defaults; they are Deferred and will be computed on demand from _stageSpecs. + } + /** * Copy constructor. Calls clone on each LiteParsedDocumentSource in the pipeline and copies * member variables. @@ -281,6 +295,16 @@ public: }); } + /** + * Returns true if any stage in the pipeline is a scoreDetails stage (produces scoreDetails + * metadata). + */ + bool isScoreDetailsPipeline() const { + return std::any_of(_stageSpecs.begin(), _stageSpecs.end(), [](auto&& spec) { + return spec->isScoreDetailsStage(); + }); + } + /** * Returns true if all stages in the pipeline are selection stages (they do not modify or * transform documents, only retrieve, limit, or order them). diff --git a/src/mongo/db/pipeline/lite_parsed_rank_fusion.h b/src/mongo/db/pipeline/lite_parsed_rank_fusion.h index 251ab1b401d..f56080c3d66 100644 --- a/src/mongo/db/pipeline/lite_parsed_rank_fusion.h +++ b/src/mongo/db/pipeline/lite_parsed_rank_fusion.h @@ -138,6 +138,11 @@ public: return true; } + // $rankFusion produces scoreDetails metadata when the user requests it via the spec. + bool isScoreDetailsStage() const final { + return _parsedSpec.getScoreDetails(); + } + // $rankFusion does not modify documents, only combines and reorders them. bool isSelectionStage() const final { return true; @@ -166,6 +171,10 @@ public: _parsedSpec, _pipelines, getOriginalBson().wrap().getOwned()); } + const RankFusionSpec& getSpec() const { + return _parsedSpec; + } + private: RankFusionSpec _parsedSpec; }; diff --git a/src/mongo/db/pipeline/lite_parsed_rank_fusion_desugarer_test.cpp b/src/mongo/db/pipeline/lite_parsed_rank_fusion_desugarer_test.cpp new file mode 100644 index 00000000000..da70bbf82e1 --- /dev/null +++ b/src/mongo/db/pipeline/lite_parsed_rank_fusion_desugarer_test.cpp @@ -0,0 +1,1708 @@ +/** + * Copyright (C) 2026-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/bson/bsonelement.h" +#include "mongo/bson/bsonobj.h" +#include "mongo/bson/bsonobjbuilder.h" +#include "mongo/bson/json.h" +#include "mongo/db/feature_flag.h" +#include "mongo/db/namespace_string.h" +#include "mongo/db/pipeline/aggregation_context_fixture.h" +#include "mongo/db/pipeline/document_source_rank_fusion.h" +#include "mongo/db/pipeline/expression_context.h" +#include "mongo/db/pipeline/lite_parsed_desugarer.h" +#include "mongo/db/pipeline/lite_parsed_document_source.h" +#include "mongo/db/pipeline/lite_parsed_hybrid_search_desugarer.h" +#include "mongo/db/pipeline/lite_parsed_pipeline.h" +#include "mongo/db/pipeline/lite_parsed_rank_fusion.h" +#include "mongo/db/pipeline/pipeline.h" +#include "mongo/db/pipeline/resolved_namespace.h" +#include "mongo/idl/server_parameter_test_controller.h" +#include "mongo/unittest/unittest.h" + +#include +#include +#include + +namespace mongo { +namespace { + +// Convert a desugared StageSpecs vector into a single BSONObj of the form +// {expectedStages: [, , ...]} so it can be compared via ASSERT_BSONOBJ_EQ_AUTO. +BSONObj toExpectedStagesBson(const std::vector>& stages) { + BSONObjBuilder out; + BSONArrayBuilder arr(out.subarrayStart("expectedStages")); + for (const auto& stage : stages) { + arr.append(stage->getOriginalBson().wrap()); + } + arr.done(); + return out.obj(); +} + +// Parse the top-level pipeline (a vector of one stage) into a LiteParsedRankFusion to +// drive the desugarer. +std::unique_ptr parseRankFusion(const NamespaceString& nss, + const BSONObj& specObj) { + BSONObj ownedSpec = specObj.getOwned(); + auto lpds = LiteParsedDocumentSource::parse(nss, ownedSpec); + lpds->makeOwned(); + auto* rfPtr = dynamic_cast(lpds.get()); + invariant(rfPtr); + // Transfer ownership to a unique_ptr. + lpds.release(); + return std::unique_ptr(rfPtr); +} + +class LiteParsedRankFusionDesugarerTest : public service_context_test::WithSetupTransportLayer, + public AggregationContextFixture { +protected: + BSONObj desugar(const BSONObj& rankFusionSpecObj) { + const NamespaceString& nss = getExpCtx()->getNamespaceString(); + auto lprf = parseRankFusion(nss, rankFusionSpecObj); + auto desugared = + lite_parsed_hybrid_search_desugarer::desugarRankFusion(*lprf, nss, "pipeline_test"_sd); + return toExpectedStagesBson(desugared); + } + +private: + RAIIServerParameterControllerForTest featureFlagController1{"featureFlagRankFusionBasic", true}; + RAIIServerParameterControllerForTest featureFlagController2{"featureFlagRankFusionFull", true}; +}; + +TEST_F(LiteParsedRankFusionDesugarerTest, SinglePipelineBasic) { + RAIIServerParameterControllerForTest disableFull("featureFlagRankFusionFull", false); + BSONObj spec = fromjson(R"({ + $rankFusion: { + input: { + pipelines: { + agatha: [ + { $match: { author: "Agatha Christie" } }, + { $sort: { author: 1 } } + ] + } + } + } + })"); + BSONObj actual = desugar(spec); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "expectedStages": [ + { + "$match": { + "author": "Agatha Christie" + } + }, + { + "$sort": { + "author": 1, + "$_internalOutputSortKeyMetadata": true + } + }, + { + "$replaceWith": { + "_internal_rankFusion_docs": "$$ROOT" + } + }, + { + "$_internalSetWindowFields": { + "sortBy": { + "order": 1 + }, + "output": { + "_internal_rankFusion_internal_fields.agatha_rank": { + "$rank": {} + } + } + } + }, + { + "$addFields": { + "_internal_rankFusion_internal_fields.agatha_score": { + "$multiply": [ + { + "$divide": [ + 1, + { + "$add": [ + "$_internal_rankFusion_internal_fields.agatha_rank", + 60 + ] + } + ] + }, + 1 + ] + } + } + }, + { + "$group": { + "_id": "$_internal_rankFusion_docs._id", + "_internal_rankFusion_docs": { + "$first": "$_internal_rankFusion_docs" + }, + "__hs_agatha_score": { + "$max": { + "$ifNull": [ + "$_internal_rankFusion_internal_fields.agatha_score", + 0 + ] + } + } + } + }, + { + "$replaceRoot": { + "newRoot": { + "$mergeObjects": [ + "$_internal_rankFusion_docs", + { + "_internal_rankFusion_internal_fields": { + "agatha_score": "$__hs_agatha_score" + } + } + ] + } + } + }, + { + "$addFields": { + "_internal_rankFusion_internal_fields.agatha_rank": { + "$cond": [ + { + "$eq": [ + "$_internal_rankFusion_internal_fields.agatha_rank", + 0 + ] + }, + "NA", + "$_internal_rankFusion_internal_fields.agatha_rank" + ] + } + } + }, + { + "$addFields": { + "score": { + "$add": [ + "$_internal_rankFusion_internal_fields.agatha_score" + ] + } + } + }, + { + "$sort": { + "score": -1, + "_id": 1 + } + }, + { + "$project": { + "_internal_rankFusion_internal_fields": 0 + } + } + ] + })", + actual); +} + +TEST_F(LiteParsedRankFusionDesugarerTest, SinglePipelineFull) { + BSONObj spec = fromjson(R"({ + $rankFusion: { + input: { + pipelines: { + agatha: [ + { $match: { author: "Agatha Christie" } }, + { $sort: { author: 1 } } + ] + } + } + } + })"); + BSONObj actual = desugar(spec); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "expectedStages": [ + { + "$match": { + "author": "Agatha Christie" + } + }, + { + "$sort": { + "author": 1, + "$_internalOutputSortKeyMetadata": true + } + }, + { + "$replaceWith": { + "_internal_rankFusion_docs": "$$ROOT" + } + }, + { + "$_internalSetWindowFields": { + "sortBy": { + "order": 1 + }, + "output": { + "_internal_rankFusion_internal_fields.agatha_rank": { + "$rank": {} + } + } + } + }, + { + "$addFields": { + "_internal_rankFusion_internal_fields.agatha_score": { + "$multiply": [ + { + "$divide": [ + 1, + { + "$add": [ + "$_internal_rankFusion_internal_fields.agatha_rank", + 60 + ] + } + ] + }, + 1 + ] + } + } + }, + { + "$group": { + "_id": "$_internal_rankFusion_docs._id", + "_internal_rankFusion_docs": { + "$first": "$_internal_rankFusion_docs" + }, + "__hs_agatha_score": { + "$max": { + "$ifNull": [ + "$_internal_rankFusion_internal_fields.agatha_score", + 0 + ] + } + } + } + }, + { + "$replaceRoot": { + "newRoot": { + "$mergeObjects": [ + "$_internal_rankFusion_docs", + { + "_internal_rankFusion_internal_fields": { + "agatha_score": "$__hs_agatha_score" + } + } + ] + } + } + }, + { + "$addFields": { + "_internal_rankFusion_internal_fields.agatha_rank": { + "$cond": [ + { + "$eq": [ + "$_internal_rankFusion_internal_fields.agatha_rank", + 0 + ] + }, + "NA", + "$_internal_rankFusion_internal_fields.agatha_rank" + ] + } + } + }, + { + "$setMetadata": { + "score": { + "$add": [ + "$_internal_rankFusion_internal_fields.agatha_score" + ] + } + } + }, + { + "$sort": { + "score": { + "$meta": "score" + }, + "_id": 1 + } + }, + { + "$project": { + "_internal_rankFusion_internal_fields": 0 + } + } + ] + })", + actual); +} + +TEST_F(LiteParsedRankFusionDesugarerTest, MultiplePipelinesMixedBasic) { + RAIIServerParameterControllerForTest disableFull("featureFlagRankFusionFull", false); + BSONObj spec = fromjson(R"({ + $rankFusion: { + input: { + pipelines: { + matchAuthor: [ + { $match: { author: "Agatha Christie" } }, + { $sort: { author: 1 } } + ], + matchGenres: [ + { $search: { index: "search_index", + text: { query: "mystery", path: "genres" } } } + ], + matchPlot: [ + { $vectorSearch: { queryVector: [1.0, 2.0, 3.0], + path: "plot_embedding", + numCandidates: 300, + index: "vector_index", + limit: 10 } } + ] + } + } + } + })"); + BSONObj actual = desugar(spec); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "expectedStages": [ + { + "$match": { + "author": "Agatha Christie" + } + }, + { + "$sort": { + "author": 1, + "$_internalOutputSortKeyMetadata": true + } + }, + { + "$replaceWith": { + "_internal_rankFusion_docs": "$$ROOT" + } + }, + { + "$_internalSetWindowFields": { + "sortBy": { + "order": 1 + }, + "output": { + "_internal_rankFusion_internal_fields.matchAuthor_rank": { + "$rank": {} + } + } + } + }, + { + "$addFields": { + "_internal_rankFusion_internal_fields.matchAuthor_score": { + "$multiply": [ + { + "$divide": [ + 1, + { + "$add": [ + "$_internal_rankFusion_internal_fields.matchAuthor_rank", + 60 + ] + } + ] + }, + 1 + ] + } + } + }, + { + "$unionWith": { + "coll": "pipeline_test", + "pipeline": [ + { + "$search": { + "index": "search_index", + "text": { + "query": "mystery", + "path": "genres" + } + } + }, + { + "$replaceWith": { + "_internal_rankFusion_docs": "$$ROOT" + } + }, + { + "$_internalSetWindowFields": { + "sortBy": { + "order": 1 + }, + "output": { + "_internal_rankFusion_internal_fields.matchGenres_rank": { + "$rank": {} + } + } + } + }, + { + "$addFields": { + "_internal_rankFusion_internal_fields.matchGenres_score": { + "$multiply": [ + { + "$divide": [ + 1, + { + "$add": [ + "$_internal_rankFusion_internal_fields.matchGenres_rank", + 60 + ] + } + ] + }, + 1 + ] + } + } + } + ] + } + }, + { + "$unionWith": { + "coll": "pipeline_test", + "pipeline": [ + { + "$vectorSearch": { + "queryVector": [ + 1, + 2, + 3 + ], + "path": "plot_embedding", + "numCandidates": 300, + "index": "vector_index", + "limit": 10 + } + }, + { + "$replaceWith": { + "_internal_rankFusion_docs": "$$ROOT" + } + }, + { + "$_internalSetWindowFields": { + "sortBy": { + "order": 1 + }, + "output": { + "_internal_rankFusion_internal_fields.matchPlot_rank": { + "$rank": {} + } + } + } + }, + { + "$addFields": { + "_internal_rankFusion_internal_fields.matchPlot_score": { + "$multiply": [ + { + "$divide": [ + 1, + { + "$add": [ + "$_internal_rankFusion_internal_fields.matchPlot_rank", + 60 + ] + } + ] + }, + 1 + ] + } + } + } + ] + } + }, + { + "$group": { + "_id": "$_internal_rankFusion_docs._id", + "_internal_rankFusion_docs": { + "$first": "$_internal_rankFusion_docs" + }, + "__hs_matchAuthor_score": { + "$max": { + "$ifNull": [ + "$_internal_rankFusion_internal_fields.matchAuthor_score", + 0 + ] + } + }, + "__hs_matchGenres_score": { + "$max": { + "$ifNull": [ + "$_internal_rankFusion_internal_fields.matchGenres_score", + 0 + ] + } + }, + "__hs_matchPlot_score": { + "$max": { + "$ifNull": [ + "$_internal_rankFusion_internal_fields.matchPlot_score", + 0 + ] + } + } + } + }, + { + "$replaceRoot": { + "newRoot": { + "$mergeObjects": [ + "$_internal_rankFusion_docs", + { + "_internal_rankFusion_internal_fields": { + "matchAuthor_score": "$__hs_matchAuthor_score", + "matchGenres_score": "$__hs_matchGenres_score", + "matchPlot_score": "$__hs_matchPlot_score" + } + } + ] + } + } + }, + { + "$addFields": { + "_internal_rankFusion_internal_fields.matchAuthor_rank": { + "$cond": [ + { + "$eq": [ + "$_internal_rankFusion_internal_fields.matchAuthor_rank", + 0 + ] + }, + "NA", + "$_internal_rankFusion_internal_fields.matchAuthor_rank" + ] + }, + "_internal_rankFusion_internal_fields.matchGenres_rank": { + "$cond": [ + { + "$eq": [ + "$_internal_rankFusion_internal_fields.matchGenres_rank", + 0 + ] + }, + "NA", + "$_internal_rankFusion_internal_fields.matchGenres_rank" + ] + }, + "_internal_rankFusion_internal_fields.matchPlot_rank": { + "$cond": [ + { + "$eq": [ + "$_internal_rankFusion_internal_fields.matchPlot_rank", + 0 + ] + }, + "NA", + "$_internal_rankFusion_internal_fields.matchPlot_rank" + ] + } + } + }, + { + "$addFields": { + "score": { + "$add": [ + "$_internal_rankFusion_internal_fields.matchAuthor_score", + "$_internal_rankFusion_internal_fields.matchGenres_score", + "$_internal_rankFusion_internal_fields.matchPlot_score" + ] + } + } + }, + { + "$sort": { + "score": -1, + "_id": 1 + } + }, + { + "$project": { + "_internal_rankFusion_internal_fields": 0 + } + } + ] + })", + actual); +} + +TEST_F(LiteParsedRankFusionDesugarerTest, CustomWeightsBasic) { + RAIIServerParameterControllerForTest disableFull("featureFlagRankFusionFull", false); + BSONObj spec = fromjson(R"({ + $rankFusion: { + input: { + pipelines: { + agatha: [ + { $match: { author: "Agatha Christie" } }, + { $sort: { author: 1 } } + ], + other: [ + { $match: { x: 1 } }, + { $sort: { x: 1 } } + ] + } + }, + combination: { + weights: { agatha: 5, other: 2 } + } + } + })"); + BSONObj actual = desugar(spec); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "expectedStages": [ + { + "$match": { + "author": "Agatha Christie" + } + }, + { + "$sort": { + "author": 1, + "$_internalOutputSortKeyMetadata": true + } + }, + { + "$replaceWith": { + "_internal_rankFusion_docs": "$$ROOT" + } + }, + { + "$_internalSetWindowFields": { + "sortBy": { + "order": 1 + }, + "output": { + "_internal_rankFusion_internal_fields.agatha_rank": { + "$rank": {} + } + } + } + }, + { + "$addFields": { + "_internal_rankFusion_internal_fields.agatha_score": { + "$multiply": [ + { + "$divide": [ + 1, + { + "$add": [ + "$_internal_rankFusion_internal_fields.agatha_rank", + 60 + ] + } + ] + }, + 5 + ] + } + } + }, + { + "$unionWith": { + "coll": "pipeline_test", + "pipeline": [ + { + "$match": { + "x": 1 + } + }, + { + "$sort": { + "x": 1, + "$_internalOutputSortKeyMetadata": true + } + }, + { + "$replaceWith": { + "_internal_rankFusion_docs": "$$ROOT" + } + }, + { + "$_internalSetWindowFields": { + "sortBy": { + "order": 1 + }, + "output": { + "_internal_rankFusion_internal_fields.other_rank": { + "$rank": {} + } + } + } + }, + { + "$addFields": { + "_internal_rankFusion_internal_fields.other_score": { + "$multiply": [ + { + "$divide": [ + 1, + { + "$add": [ + "$_internal_rankFusion_internal_fields.other_rank", + 60 + ] + } + ] + }, + 2 + ] + } + } + } + ] + } + }, + { + "$group": { + "_id": "$_internal_rankFusion_docs._id", + "_internal_rankFusion_docs": { + "$first": "$_internal_rankFusion_docs" + }, + "__hs_agatha_score": { + "$max": { + "$ifNull": [ + "$_internal_rankFusion_internal_fields.agatha_score", + 0 + ] + } + }, + "__hs_other_score": { + "$max": { + "$ifNull": [ + "$_internal_rankFusion_internal_fields.other_score", + 0 + ] + } + } + } + }, + { + "$replaceRoot": { + "newRoot": { + "$mergeObjects": [ + "$_internal_rankFusion_docs", + { + "_internal_rankFusion_internal_fields": { + "agatha_score": "$__hs_agatha_score", + "other_score": "$__hs_other_score" + } + } + ] + } + } + }, + { + "$addFields": { + "_internal_rankFusion_internal_fields.agatha_rank": { + "$cond": [ + { + "$eq": [ + "$_internal_rankFusion_internal_fields.agatha_rank", + 0 + ] + }, + "NA", + "$_internal_rankFusion_internal_fields.agatha_rank" + ] + }, + "_internal_rankFusion_internal_fields.other_rank": { + "$cond": [ + { + "$eq": [ + "$_internal_rankFusion_internal_fields.other_rank", + 0 + ] + }, + "NA", + "$_internal_rankFusion_internal_fields.other_rank" + ] + } + } + }, + { + "$addFields": { + "score": { + "$add": [ + "$_internal_rankFusion_internal_fields.agatha_score", + "$_internal_rankFusion_internal_fields.other_score" + ] + } + } + }, + { + "$sort": { + "score": -1, + "_id": 1 + } + }, + { + "$project": { + "_internal_rankFusion_internal_fields": 0 + } + } + ] + })", + actual); +} + +TEST_F(LiteParsedRankFusionDesugarerTest, ScoreDetailsSortOnlyInput) { + BSONObj spec = fromjson(R"({ + $rankFusion: { + input: { + pipelines: { + agatha: [ + { $match: { author: "Agatha Christie" } }, + { $sort: { author: 1 } } + ] + } + }, + combination: { weights: { agatha: 5 } }, + scoreDetails: true + } + })"); + BSONObj actual = desugar(spec); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "expectedStages": [ + { + "$match": { + "author": "Agatha Christie" + } + }, + { + "$sort": { + "author": 1, + "$_internalOutputSortKeyMetadata": true + } + }, + { + "$replaceWith": { + "_internal_rankFusion_docs": "$$ROOT" + } + }, + { + "$_internalSetWindowFields": { + "sortBy": { + "order": 1 + }, + "output": { + "_internal_rankFusion_internal_fields.agatha_rank": { + "$rank": {} + } + } + } + }, + { + "$addFields": { + "_internal_rankFusion_internal_fields.agatha_score": { + "$multiply": [ + { + "$divide": [ + 1, + { + "$add": [ + "$_internal_rankFusion_internal_fields.agatha_rank", + 60 + ] + } + ] + }, + 5 + ] + } + } + }, + { + "$addFields": { + "_internal_rankFusion_internal_fields.agatha_scoreDetails": { + "details": [] + } + } + }, + { + "$group": { + "_id": "$_internal_rankFusion_docs._id", + "_internal_rankFusion_docs": { + "$first": "$_internal_rankFusion_docs" + }, + "__hs_agatha_score": { + "$max": { + "$ifNull": [ + "$_internal_rankFusion_internal_fields.agatha_score", + 0 + ] + } + }, + "__hs_agatha_rank": { + "$max": { + "$ifNull": [ + "$_internal_rankFusion_internal_fields.agatha_rank", + 0 + ] + } + }, + "__hs_agatha_scoreDetails": { + "$mergeObjects": "$_internal_rankFusion_internal_fields.agatha_scoreDetails" + } + } + }, + { + "$replaceRoot": { + "newRoot": { + "$mergeObjects": [ + "$_internal_rankFusion_docs", + { + "_internal_rankFusion_internal_fields": { + "agatha_score": "$__hs_agatha_score", + "agatha_rank": "$__hs_agatha_rank", + "agatha_scoreDetails": "$__hs_agatha_scoreDetails" + } + } + ] + } + } + }, + { + "$addFields": { + "_internal_rankFusion_internal_fields.agatha_rank": { + "$cond": [ + { + "$eq": [ + "$_internal_rankFusion_internal_fields.agatha_rank", + 0 + ] + }, + "NA", + "$_internal_rankFusion_internal_fields.agatha_rank" + ] + } + } + }, + { + "$setMetadata": { + "score": { + "$add": [ + "$_internal_rankFusion_internal_fields.agatha_score" + ] + } + } + }, + { + "$addFields": { + "_internal_rankFusion_internal_fields": { + "calculatedScoreDetails": [ + { + "$mergeObjects": [ + { + "inputPipelineName": "agatha", + "rank": "$_internal_rankFusion_internal_fields.agatha_rank", + "weight": { + "$cond": [ + { + "$eq": [ + "$_internal_rankFusion_internal_fields.agatha_rank", + "NA" + ] + }, + "$$REMOVE", + 5 + ] + } + }, + "$_internal_rankFusion_internal_fields.agatha_scoreDetails" + ] + } + ] + } + } + }, + { + "$setMetadata": { + "scoreDetails": { + "value": { + "$meta": "score" + }, + "description": "value output by reciprocal rank fusion algorithm, computed as sum of (weight * (1 / (60 + rank))) across input pipelines from which this document is output, from:", + "details": "$_internal_rankFusion_internal_fields.calculatedScoreDetails" + } + } + }, + { + "$sort": { + "score": { + "$meta": "score" + }, + "_id": 1 + } + }, + { + "$project": { + "_internal_rankFusion_internal_fields": 0 + } + } + ] + })", + actual); +} + +TEST_F(LiteParsedRankFusionDesugarerTest, ScoreDetailsSearchInputGeneratesScoreDetails) { + BSONObj spec = fromjson(R"({ + $rankFusion: { + input: { + pipelines: { + searchPipe: [ + { $search: { index: "idx", + text: { query: "x", path: "p" }, + scoreDetails: true } } + ] + } + }, + scoreDetails: true + } + })"); + BSONObj actual = desugar(spec); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "expectedStages": [ + { + "$search": { + "index": "idx", + "text": { + "query": "x", + "path": "p" + }, + "scoreDetails": true + } + }, + { + "$replaceWith": { + "_internal_rankFusion_docs": "$$ROOT" + } + }, + { + "$_internalSetWindowFields": { + "sortBy": { + "order": 1 + }, + "output": { + "_internal_rankFusion_internal_fields.searchPipe_rank": { + "$rank": {} + } + } + } + }, + { + "$addFields": { + "_internal_rankFusion_internal_fields.searchPipe_score": { + "$multiply": [ + { + "$divide": [ + 1, + { + "$add": [ + "$_internal_rankFusion_internal_fields.searchPipe_rank", + 60 + ] + } + ] + }, + 1 + ] + } + } + }, + { + "$addFields": { + "_internal_rankFusion_internal_fields.searchPipe_scoreDetails": { + "$meta": "scoreDetails" + } + } + }, + { + "$group": { + "_id": "$_internal_rankFusion_docs._id", + "_internal_rankFusion_docs": { + "$first": "$_internal_rankFusion_docs" + }, + "__hs_searchPipe_score": { + "$max": { + "$ifNull": [ + "$_internal_rankFusion_internal_fields.searchPipe_score", + 0 + ] + } + }, + "__hs_searchPipe_rank": { + "$max": { + "$ifNull": [ + "$_internal_rankFusion_internal_fields.searchPipe_rank", + 0 + ] + } + }, + "__hs_searchPipe_scoreDetails": { + "$mergeObjects": "$_internal_rankFusion_internal_fields.searchPipe_scoreDetails" + } + } + }, + { + "$replaceRoot": { + "newRoot": { + "$mergeObjects": [ + "$_internal_rankFusion_docs", + { + "_internal_rankFusion_internal_fields": { + "searchPipe_score": "$__hs_searchPipe_score", + "searchPipe_rank": "$__hs_searchPipe_rank", + "searchPipe_scoreDetails": "$__hs_searchPipe_scoreDetails" + } + } + ] + } + } + }, + { + "$addFields": { + "_internal_rankFusion_internal_fields.searchPipe_rank": { + "$cond": [ + { + "$eq": [ + "$_internal_rankFusion_internal_fields.searchPipe_rank", + 0 + ] + }, + "NA", + "$_internal_rankFusion_internal_fields.searchPipe_rank" + ] + } + } + }, + { + "$setMetadata": { + "score": { + "$add": [ + "$_internal_rankFusion_internal_fields.searchPipe_score" + ] + } + } + }, + { + "$addFields": { + "_internal_rankFusion_internal_fields": { + "calculatedScoreDetails": [ + { + "$mergeObjects": [ + { + "inputPipelineName": "searchPipe", + "rank": "$_internal_rankFusion_internal_fields.searchPipe_rank", + "weight": { + "$cond": [ + { + "$eq": [ + "$_internal_rankFusion_internal_fields.searchPipe_rank", + "NA" + ] + }, + "$$REMOVE", + 1 + ] + } + }, + "$_internal_rankFusion_internal_fields.searchPipe_scoreDetails" + ] + } + ] + } + } + }, + { + "$setMetadata": { + "scoreDetails": { + "value": { + "$meta": "score" + }, + "description": "value output by reciprocal rank fusion algorithm, computed as sum of (weight * (1 / (60 + rank))) across input pipelines from which this document is output, from:", + "details": "$_internal_rankFusion_internal_fields.calculatedScoreDetails" + } + } + }, + { + "$sort": { + "score": { + "$meta": "score" + }, + "_id": 1 + } + }, + { + "$project": { + "_internal_rankFusion_internal_fields": 0 + } + } + ] + })", + actual); +} + +TEST_F(LiteParsedRankFusionDesugarerTest, MultipleSortStagesRightmostMutated) { + // The leading $sort here should NOT receive $_internalOutputSortKeyMetadata; only the + // rightmost $sort is mutated. + RAIIServerParameterControllerForTest disableFull("featureFlagRankFusionFull", false); + BSONObj spec = fromjson(R"({ + $rankFusion: { + input: { + pipelines: { + p: [ + { $sort: { a: 1 } }, + { $match: { b: 2 } }, + { $sort: { c: 1 } } + ] + } + } + } + })"); + BSONObj actual = desugar(spec); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "expectedStages": [ + { + "$sort": { + "a": 1 + } + }, + { + "$match": { + "b": 2 + } + }, + { + "$sort": { + "c": 1, + "$_internalOutputSortKeyMetadata": true + } + }, + { + "$replaceWith": { + "_internal_rankFusion_docs": "$$ROOT" + } + }, + { + "$_internalSetWindowFields": { + "sortBy": { + "order": 1 + }, + "output": { + "_internal_rankFusion_internal_fields.p_rank": { + "$rank": {} + } + } + } + }, + { + "$addFields": { + "_internal_rankFusion_internal_fields.p_score": { + "$multiply": [ + { + "$divide": [ + 1, + { + "$add": [ + "$_internal_rankFusion_internal_fields.p_rank", + 60 + ] + } + ] + }, + 1 + ] + } + } + }, + { + "$group": { + "_id": "$_internal_rankFusion_docs._id", + "_internal_rankFusion_docs": { + "$first": "$_internal_rankFusion_docs" + }, + "__hs_p_score": { + "$max": { + "$ifNull": [ + "$_internal_rankFusion_internal_fields.p_score", + 0 + ] + } + } + } + }, + { + "$replaceRoot": { + "newRoot": { + "$mergeObjects": [ + "$_internal_rankFusion_docs", + { + "_internal_rankFusion_internal_fields": { + "p_score": "$__hs_p_score" + } + } + ] + } + } + }, + { + "$addFields": { + "_internal_rankFusion_internal_fields.p_rank": { + "$cond": [ + { + "$eq": [ + "$_internal_rankFusion_internal_fields.p_rank", + 0 + ] + }, + "NA", + "$_internal_rankFusion_internal_fields.p_rank" + ] + } + } + }, + { + "$addFields": { + "score": { + "$add": [ + "$_internal_rankFusion_internal_fields.p_score" + ] + } + } + }, + { + "$sort": { + "score": -1, + "_id": 1 + } + }, + { + "$project": { + "_internal_rankFusion_internal_fields": 0 + } + } + ] + })", + actual); +} + +// Covers the "input generates score but NOT scoreDetails" branch of +// `addInputPipelineScoreDetails`: the `$match` with `$text` publishes textScore metadata, the +// explicit `$sort` on `{$meta: "textScore"}` makes the pipeline ranked, and there is no +// scoreDetails-producing stage. Pair with top-level scoreDetails:true so the per-pipeline +// scoreDetails $addFields synthesizes the {value, details: []} form (the second branch). +TEST_F(LiteParsedRankFusionDesugarerTest, ScoreDetails_InputGeneratesScoreOnly) { + BSONObj spec = fromjson(R"({ + $rankFusion: { + input: { + pipelines: { + textPipe: [ + { $match: { $text: { $search: "mystery" } } }, + { $sort: { score: { $meta: "textScore" } } } + ] + } + }, + scoreDetails: true + } + })"); + BSONObj actual = desugar(spec); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "expectedStages": [ + { + "$match": { + "$text": { + "$search": "mystery" + } + } + }, + { + "$sort": { + "score": { + "$meta": "textScore" + }, + "$_internalOutputSortKeyMetadata": true + } + }, + { + "$replaceWith": { + "_internal_rankFusion_docs": "$$ROOT" + } + }, + { + "$_internalSetWindowFields": { + "sortBy": { + "order": 1 + }, + "output": { + "_internal_rankFusion_internal_fields.textPipe_rank": { + "$rank": {} + } + } + } + }, + { + "$addFields": { + "_internal_rankFusion_internal_fields.textPipe_score": { + "$multiply": [ + { + "$divide": [ + 1, + { + "$add": [ + "$_internal_rankFusion_internal_fields.textPipe_rank", + 60 + ] + } + ] + }, + 1 + ] + } + } + }, + { + "$addFields": { + "_internal_rankFusion_internal_fields.textPipe_scoreDetails": { + "value": { + "$meta": "score" + }, + "details": [] + } + } + }, + { + "$group": { + "_id": "$_internal_rankFusion_docs._id", + "_internal_rankFusion_docs": { + "$first": "$_internal_rankFusion_docs" + }, + "__hs_textPipe_score": { + "$max": { + "$ifNull": [ + "$_internal_rankFusion_internal_fields.textPipe_score", + 0 + ] + } + }, + "__hs_textPipe_rank": { + "$max": { + "$ifNull": [ + "$_internal_rankFusion_internal_fields.textPipe_rank", + 0 + ] + } + }, + "__hs_textPipe_scoreDetails": { + "$mergeObjects": "$_internal_rankFusion_internal_fields.textPipe_scoreDetails" + } + } + }, + { + "$replaceRoot": { + "newRoot": { + "$mergeObjects": [ + "$_internal_rankFusion_docs", + { + "_internal_rankFusion_internal_fields": { + "textPipe_score": "$__hs_textPipe_score", + "textPipe_rank": "$__hs_textPipe_rank", + "textPipe_scoreDetails": "$__hs_textPipe_scoreDetails" + } + } + ] + } + } + }, + { + "$addFields": { + "_internal_rankFusion_internal_fields.textPipe_rank": { + "$cond": [ + { + "$eq": [ + "$_internal_rankFusion_internal_fields.textPipe_rank", + 0 + ] + }, + "NA", + "$_internal_rankFusion_internal_fields.textPipe_rank" + ] + } + } + }, + { + "$setMetadata": { + "score": { + "$add": [ + "$_internal_rankFusion_internal_fields.textPipe_score" + ] + } + } + }, + { + "$addFields": { + "_internal_rankFusion_internal_fields": { + "calculatedScoreDetails": [ + { + "$mergeObjects": [ + { + "inputPipelineName": "textPipe", + "rank": "$_internal_rankFusion_internal_fields.textPipe_rank", + "weight": { + "$cond": [ + { + "$eq": [ + "$_internal_rankFusion_internal_fields.textPipe_rank", + "NA" + ] + }, + "$$REMOVE", + 1 + ] + } + }, + "$_internal_rankFusion_internal_fields.textPipe_scoreDetails" + ] + } + ] + } + } + }, + { + "$setMetadata": { + "scoreDetails": { + "value": { + "$meta": "score" + }, + "description": "value output by reciprocal rank fusion algorithm, computed as sum of (weight * (1 / (60 + rank))) across input pipelines from which this document is output, from:", + "details": "$_internal_rankFusion_internal_fields.calculatedScoreDetails" + } + } + }, + { + "$sort": { + "score": { + "$meta": "score" + }, + "_id": 1 + } + }, + { + "$project": { + "_internal_rankFusion_internal_fields": 0 + } + } + ] + })", + actual); +} + +TEST_F(LiteParsedRankFusionDesugarerTest, RejectsNegativeWeight) { + BSONObj spec = fromjson(R"({ + $rankFusion: { + input: { + pipelines: { + name1: [{ $sort: { x: 1 } }] + } + }, + combination: { weights: { name1: -1.0 } } + } + })"); + ASSERT_THROWS_CODE(desugar(spec), AssertionException, 12559401); +} + +TEST_F(LiteParsedRankFusionDesugarerTest, RejectsNonNumericWeight) { + BSONObj spec = fromjson(R"({ + $rankFusion: { + input: { + pipelines: { + name1: [{ $sort: { x: 1 } }] + } + }, + combination: { weights: { name1: "not a number" } } + } + })"); + ASSERT_THROWS_CODE(desugar(spec), AssertionException, 12559404); +} + +TEST_F(LiteParsedRankFusionDesugarerTest, RejectsUnknownPipelineNameInWeights) { + BSONObj spec = fromjson(R"({ + $rankFusion: { + input: { + pipelines: { + name1: [{ $sort: { x: 1 } }] + } + }, + combination: { weights: { typo: 1.0 } } + } + })"); + ASSERT_THROWS_CODE(desugar(spec), AssertionException, 12559400); +} + +TEST_F(LiteParsedRankFusionDesugarerTest, RejectsMoreWeightsThanPipelines) { + BSONObj spec = fromjson(R"({ + $rankFusion: { + input: { + pipelines: { + name1: [{ $sort: { x: 1 } }] + } + }, + combination: { weights: { name1: 1.0, extraName: 1.0 } } + } + })"); + ASSERT_THROWS_CODE(desugar(spec), AssertionException, 12559403); +} + +} // namespace +} // namespace mongo diff --git a/src/mongo/db/pipeline/lite_parsed_rank_fusion_desugarer_utils.cpp b/src/mongo/db/pipeline/lite_parsed_rank_fusion_desugarer_utils.cpp new file mode 100644 index 00000000000..de74db8d872 --- /dev/null +++ b/src/mongo/db/pipeline/lite_parsed_rank_fusion_desugarer_utils.cpp @@ -0,0 +1,227 @@ +/** + * Copyright (C) 2026-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/pipeline/lite_parsed_rank_fusion_desugarer_utils.h" + +#include "mongo/bson/bsonobjbuilder.h" +#include "mongo/db/pipeline/document_source_hybrid_scoring_util.h" +#include "mongo/db/pipeline/lite_parsed_hybrid_search_desugarer_utils.h" + +#include + +namespace mongo::lite_parsed_hybrid_search_desugarer::rank_fusion_utils { + +BSONObj buildSetWindowFieldsBson(const std::string& rankFieldName) { + BSONObjBuilder bob; + { + BSONObjBuilder swfBob(bob.subobjStart("$_internalSetWindowFields"_sd)); + swfBob.append("sortBy", BSON("order" << 1)); + BSONObjBuilder outputBob(swfBob.subobjStart("output"_sd)); + outputBob.append(rankFieldName, BSON("$rank" << BSONObj())); + } + return bob.obj(); +} + +BSONObj buildScoreAddFieldsBson(StringData inputPipelineName, int rankConstant, double weight) { + BSONObjBuilder bob; + { + BSONObjBuilder addFieldsBob(bob.subobjStart("$addFields"_sd)); + const std::string scoreField = hybrid_scoring_util::applyInternalFieldPrefixToFieldName( + kInternalFieldsName, fmt::format("{}_score", inputPipelineName)); + BSONObjBuilder scoreBob(addFieldsBob.subobjStart(scoreField)); + BSONArrayBuilder multArr(scoreBob.subarrayStart("$multiply"_sd)); + const std::string rankPath = + fmt::format("${}", + hybrid_scoring_util::applyInternalFieldPrefixToFieldName( + kInternalFieldsName, fmt::format("{}_rank", inputPipelineName))); + multArr.append(BSON( + "$divide" << BSON_ARRAY(1 << BSON("$add" << BSON_ARRAY(rankPath << rankConstant))))); + multArr.append(weight); + } + return bob.obj(); +} + +BSONObj buildAddInputPipelineScoreDetailsBson(StringData inputPipelineName, + bool inputGeneratesScore, + bool inputGeneratesScoreDetails) { + const std::string scoreDetailsField = hybrid_scoring_util::applyInternalFieldPrefixToFieldName( + kInternalFieldsName, fmt::format("{}_scoreDetails", inputPipelineName)); + BSONObjBuilder bob; + { + BSONObjBuilder addFieldsBob(bob.subobjStart("$addFields"_sd)); + if (inputGeneratesScoreDetails) { + addFieldsBob.append(scoreDetailsField, BSON("$meta" << "scoreDetails")); + } else if (inputGeneratesScore) { + addFieldsBob.append( + scoreDetailsField, + BSON("value" << BSON("$meta" << "score") << "details" << BSONArrayBuilder().arr())); + } else { + addFieldsBob.append(scoreDetailsField, BSON("details" << BSONArrayBuilder().arr())); + } + } + return bob.obj(); +} + +BSONObj buildRankAddFieldsBson(const std::vector& pipelineNames) { + BSONObjBuilder bob; + { + BSONObjBuilder addFieldsBob(bob.subobjStart("$addFields"_sd)); + for (const auto& pipelineName : pipelineNames) { + const std::string rankField = hybrid_scoring_util::applyInternalFieldPrefixToFieldName( + kInternalFieldsName, fmt::format("{}_rank", pipelineName)); + const std::string rankPath = fmt::format("${}", rankField); + addFieldsBob.append(rankField, + BSON("$cond" << BSON_ARRAY(BSON("$eq" << BSON_ARRAY(rankPath << 0)) + << "NA" << rankPath))); + } + } + return bob.obj(); +} + +BSONObj buildSetMetadataScoreBson(const std::vector& pipelineNames) { + BSONObjBuilder bob; + { + BSONObjBuilder smBob(bob.subobjStart("$setMetadata"_sd)); + BSONObjBuilder scoreBob(smBob.subobjStart("score"_sd)); + BSONArrayBuilder addArr(scoreBob.subarrayStart("$add"_sd)); + for (const auto& pipelineName : pipelineNames) { + addArr.append( + fmt::format("${}", + hybrid_scoring_util::applyInternalFieldPrefixToFieldName( + kInternalFieldsName, fmt::format("{}_score", pipelineName)))); + } + } + return bob.obj(); +} + +BSONObj buildAddFieldsScoreBson(const std::vector& pipelineNames) { + BSONObjBuilder bob; + { + BSONObjBuilder afBob(bob.subobjStart("$addFields"_sd)); + BSONObjBuilder scoreBob(afBob.subobjStart("score"_sd)); + BSONArrayBuilder addArr(scoreBob.subarrayStart("$add"_sd)); + for (const auto& pipelineName : pipelineNames) { + addArr.append( + fmt::format("${}", + hybrid_scoring_util::applyInternalFieldPrefixToFieldName( + kInternalFieldsName, fmt::format("{}_score", pipelineName)))); + } + } + return bob.obj(); +} + +BSONObj buildCalculatedFinalScoreDetailsBson(const std::vector& pipelineNames, + const StringMap& weights) { + BSONObjBuilder bob; + { + BSONObjBuilder addFieldsBob(bob.subobjStart("$addFields"_sd)); + BSONObjBuilder internalFieldsBob(addFieldsBob.subobjStart(kInternalFieldsName)); + BSONArrayBuilder calcArr(internalFieldsBob.subarrayStart("calculatedScoreDetails"_sd)); + for (const auto& pipelineName : pipelineNames) { + // Path of the pipeline's scoreDetails subobject: .

+ const std::string internalFieldsPipelineName = + hybrid_scoring_util::applyInternalFieldPrefixToFieldName(kInternalFieldsName, + pipelineName); + const std::string rankPath = fmt::format("${}_rank", internalFieldsPipelineName); + double weight = hybrid_scoring_util::getPipelineWeight(weights, pipelineName); + + BSONObjBuilder mergeSub; + mergeSub.append("inputPipelineName"_sd, pipelineName); + mergeSub.append("rank"_sd, rankPath); + mergeSub.append("weight", + BSON("$cond" << BSON_ARRAY(BSON("$eq" << BSON_ARRAY(rankPath << "NA")) + << "$$REMOVE" << weight))); + + BSONArrayBuilder mergeArr; + mergeArr.append(mergeSub.obj()); + mergeArr.append(fmt::format("${}.{}_scoreDetails", kInternalFieldsName, pipelineName)); + calcArr.append(BSON("$mergeObjects" << mergeArr.arr())); + } + } + return bob.obj(); +} + +BSONObj buildSetMetadataScoreDetailsBson() { + BSONObjBuilder bob; + { + BSONObjBuilder smBob(bob.subobjStart("$setMetadata"_sd)); + BSONObjBuilder sdBob(smBob.subobjStart("scoreDetails"_sd)); + sdBob.append("value", BSON("$meta" << "score")); + sdBob.append("description", kScoreDetailsDescription); + sdBob.append("details", + fmt::format("${}", + hybrid_scoring_util::applyInternalFieldPrefixToFieldName( + kInternalFieldsName, "calculatedScoreDetails"))); + } + return bob.obj(); +} + +BSONObj buildSortByScoreScalarBson() { + return BSON("$sort" << BSON("score" << -1 << "_id" << 1)); +} + +StageSpecs buildRankFusionInputPipelinePreamble(const NamespaceString& nss, + const LiteParsedPipeline& subPipeline, + const std::string& pipelineName, + double weight, + bool includeScoreDetails) { + StageSpecs out; + out.reserve(subPipeline.getStages().size() + 4); + for (const auto& stage : subPipeline.getStages()) { + out.push_back(stage->clone()); + } + + // Rewrite rightmost $sort in-place to ensure sort-key metadata is output; no-op if no $sort + // is present (e.g. $search and $vectorSearch emit scored output without an explicit $sort). + common_utils::mutateRightmostSortToOutputSortKey(nss, out); + + const bool inputGeneratesScore = subPipeline.isScoredPipeline(); + const bool inputGeneratesScoreDetails = subPipeline.isScoreDetailsPipeline(); + + out.push_back( + common_utils::parseOwnedStage(nss, common_utils::buildReplaceRootBson(kDocsName))); + + const std::string rankFieldName = hybrid_scoring_util::applyInternalFieldPrefixToFieldName( + kInternalFieldsName, fmt::format("{}_rank", pipelineName)); + out.push_back(common_utils::parseOwnedStage(nss, buildSetWindowFieldsBson(rankFieldName))); + + out.push_back(common_utils::parseOwnedStage( + nss, buildScoreAddFieldsBson(pipelineName, kRankConstant, weight))); + + if (includeScoreDetails) { + out.push_back(common_utils::parseOwnedStage( + nss, + buildAddInputPipelineScoreDetailsBson( + pipelineName, inputGeneratesScore, inputGeneratesScoreDetails))); + } + + return out; +} + +} // namespace mongo::lite_parsed_hybrid_search_desugarer::rank_fusion_utils diff --git a/src/mongo/db/pipeline/lite_parsed_rank_fusion_desugarer_utils.h b/src/mongo/db/pipeline/lite_parsed_rank_fusion_desugarer_utils.h new file mode 100644 index 00000000000..458088d5fcc --- /dev/null +++ b/src/mongo/db/pipeline/lite_parsed_rank_fusion_desugarer_utils.h @@ -0,0 +1,104 @@ +/** + * Copyright (C) 2026-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/base/string_data.h" +#include "mongo/bson/bsonobj.h" +#include "mongo/db/namespace_string.h" +#include "mongo/db/pipeline/lite_parsed_pipeline.h" +#include "mongo/db/pipeline/rank_fusion_pipeline_builder.h" +#include "mongo/util/string_map.h" + +#include +#include + +namespace mongo::lite_parsed_hybrid_search_desugarer::rank_fusion_utils { + +inline constexpr int kRankConstant = RankFusionPipelineBuilder::kRankConstant; +inline constexpr StringData kInternalFieldsName = + RankFusionPipelineBuilder::kRankFusionInternalFieldsName; +inline constexpr StringData kDocsName = RankFusionPipelineBuilder::kRankFusionDocsFieldName; +inline constexpr StringData kScoreDetailsDescription = + RankFusionPipelineBuilder::kRankFusionScoreDetailsDescription; + +// Per-pipeline scoreDetails scalar field suffix used in the desugared $group output (and the +// matching $replaceRoot wrapper). For $rankFusion the per-pipeline scalar is "

_rank". +inline constexpr StringData kDetailsScalarSuffix = "_rank"_sd; + +// {$_internalSetWindowFields: {sortBy: {order: 1}, +// output: {.

_rank: {$rank: {}}}}} +BSONObj buildSetWindowFieldsBson(const std::string& rankFieldName); + +// {$addFields: {.

_score: {$multiply: [{$divide: [1, {$add: [, K]}]}, +// ]}}} +BSONObj buildScoreAddFieldsBson(StringData inputPipelineName, int rankConstant, double weight); + +// {$addFields: {.

_scoreDetails: }} +BSONObj buildAddInputPipelineScoreDetailsBson(StringData inputPipelineName, + bool inputGeneratesScore, + bool inputGeneratesScoreDetails); + +// {$addFields: {.

_rank: {$cond: [{$eq:[,0]},"NA",]}, ...}} +BSONObj buildRankAddFieldsBson(const std::vector& pipelineNames); + +// {$setMetadata: {score: {$add: ["$.

_score", ...]}}} (Full branch) +BSONObj buildSetMetadataScoreBson(const std::vector& pipelineNames); + +// {$addFields: {score: {$add: ["$.

_score", ...]}}} (Basic branch) +BSONObj buildAddFieldsScoreBson(const std::vector& pipelineNames); + +// {$addFields: {: {calculatedScoreDetails: [ +// {$mergeObjects: [{inputPipelineName:

, rank: "$

_rank", +// weight: {$cond: [{$eq:["$

_rank","NA"]},"$$REMOVE",]}}, +// "$.

_scoreDetails"]}, +// ...]}}} +BSONObj buildCalculatedFinalScoreDetailsBson(const std::vector& pipelineNames, + const StringMap& weights); + +// {$setMetadata: {scoreDetails: {value: {$meta: "score"}, description: "...", +// details: "$.calculatedScoreDetails"}}} +BSONObj buildSetMetadataScoreDetailsBson(); + +// {$sort: {score: -1, _id: 1}} (Basic branch) +BSONObj buildSortByScoreScalarBson(); + +// Builds the per-input-pipeline preamble for $rankFusion: a clone of the input subpipeline's +// LPDSs (with the rightmost $sort mutated to output sort key metadata) followed by: +// $replaceWith ({: "$$ROOT"}) +// $_internalSetWindowFields ({sortBy: {order: 1}, output: {: {$rank: {}}}}) +// $addFields (per-pipeline weighted score) +// [optional] $addFields (per-pipeline scoreDetails) +StageSpecs buildRankFusionInputPipelinePreamble(const NamespaceString& nss, + const LiteParsedPipeline& subPipeline, + const std::string& pipelineName, + double weight, + bool includeScoreDetails); + +} // namespace mongo::lite_parsed_hybrid_search_desugarer::rank_fusion_utils diff --git a/src/mongo/db/pipeline/lite_parsed_score_fusion.h b/src/mongo/db/pipeline/lite_parsed_score_fusion.h index 2426f55f5fe..de56f473470 100644 --- a/src/mongo/db/pipeline/lite_parsed_score_fusion.h +++ b/src/mongo/db/pipeline/lite_parsed_score_fusion.h @@ -138,6 +138,11 @@ public: return true; } + // $scoreFusion produces scoreDetails metadata when the user requests it via the spec. + bool isScoreDetailsStage() const final { + return _parsedSpec.getScoreDetails(); + } + // $scoreFusion does not modify documents, only combines and reorders them. bool isSelectionStage() const final { return true; @@ -166,6 +171,10 @@ public: _parsedSpec, _pipelines, getOriginalBson().wrap().getOwned()); } + const ScoreFusionSpec& getSpec() const { + return _parsedSpec; + } + private: ScoreFusionSpec _parsedSpec; }; diff --git a/src/mongo/db/pipeline/lite_parsed_score_fusion_desugarer_test.cpp b/src/mongo/db/pipeline/lite_parsed_score_fusion_desugarer_test.cpp new file mode 100644 index 00000000000..1a25d988eeb --- /dev/null +++ b/src/mongo/db/pipeline/lite_parsed_score_fusion_desugarer_test.cpp @@ -0,0 +1,2076 @@ +/** + * Copyright (C) 2026-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/bson/bsonelement.h" +#include "mongo/bson/bsonobj.h" +#include "mongo/bson/bsonobjbuilder.h" +#include "mongo/bson/json.h" +#include "mongo/db/feature_flag.h" +#include "mongo/db/namespace_string.h" +#include "mongo/db/pipeline/aggregation_context_fixture.h" +#include "mongo/db/pipeline/document_source_score_fusion.h" +#include "mongo/db/pipeline/expression_context.h" +#include "mongo/db/pipeline/lite_parsed_desugarer.h" +#include "mongo/db/pipeline/lite_parsed_document_source.h" +#include "mongo/db/pipeline/lite_parsed_hybrid_search_desugarer.h" +#include "mongo/db/pipeline/lite_parsed_pipeline.h" +#include "mongo/db/pipeline/lite_parsed_score_fusion.h" +#include "mongo/db/pipeline/pipeline.h" +#include "mongo/db/pipeline/resolved_namespace.h" +#include "mongo/idl/server_parameter_test_controller.h" +#include "mongo/unittest/unittest.h" + +#include +#include +#include + +namespace mongo { +namespace { + +// Convert a desugared StageSpecs vector into a single BSONObj of the form +// {expectedStages: [, , ...]} so it can be compared via ASSERT_BSONOBJ_EQ_AUTO. +BSONObj toExpectedStagesBson(const std::vector>& stages) { + BSONObjBuilder out; + BSONArrayBuilder arr(out.subarrayStart("expectedStages")); + for (const auto& stage : stages) { + arr.append(stage->getOriginalBson().wrap()); + } + arr.done(); + return out.obj(); +} + +// Parse the top-level pipeline (a vector of one stage) into a LiteParsedScoreFusion to +// drive the desugarer. +std::unique_ptr parseScoreFusion(const NamespaceString& nss, + const BSONObj& specObj) { + BSONObj ownedSpec = specObj.getOwned(); + auto lpds = LiteParsedDocumentSource::parse(nss, ownedSpec); + lpds->makeOwned(); + auto* sfPtr = dynamic_cast(lpds.get()); + invariant(sfPtr); + lpds.release(); + return std::unique_ptr(sfPtr); +} + +class LiteParsedScoreFusionDesugarerTest : public service_context_test::WithSetupTransportLayer, + public AggregationContextFixture { +protected: + BSONObj desugarScore(const BSONObj& scoreFusionSpecObj) { + const NamespaceString& nss = getExpCtx()->getNamespaceString(); + auto lpsf = parseScoreFusion(nss, scoreFusionSpecObj); + auto desugared = + lite_parsed_hybrid_search_desugarer::desugarScoreFusion(*lpsf, nss, "pipeline_test"_sd); + return toExpectedStagesBson(desugared); + } + +private: + RAIIServerParameterControllerForTest featureFlagController1{ + "featureFlagSearchHybridScoringFull", true}; +}; + +TEST_F(LiteParsedScoreFusionDesugarerTest, SinglePipelineDefault) { + BSONObj spec = fromjson(R"({ + $scoreFusion: { + input: { + pipelines: { + name1: [ + { $score: { score: "$age", normalization: "none" } } + ] + }, + normalization: "none" + } + } + })"); + BSONObj actual = desugarScore(spec); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "expectedStages": [ + { + "$score": { + "score": "$age", + "normalization": "none" + } + }, + { + "$replaceWith": { + "_internal_scoreFusion_docs": "$$ROOT" + } + }, + { + "$addFields": { + "_internal_scoreFusion_internal_fields.name1_score": { + "$multiply": [ + { + "$meta": "score" + }, + 1 + ] + } + } + }, + { + "$group": { + "_id": "$_internal_scoreFusion_docs._id", + "_internal_scoreFusion_docs": { + "$first": "$_internal_scoreFusion_docs" + }, + "__hs_name1_score": { + "$max": { + "$ifNull": [ + "$_internal_scoreFusion_internal_fields.name1_score", + 0 + ] + } + } + } + }, + { + "$replaceRoot": { + "newRoot": { + "$mergeObjects": [ + "$_internal_scoreFusion_docs", + { + "_internal_scoreFusion_internal_fields": { + "name1_score": "$__hs_name1_score" + } + } + ] + } + } + }, + { + "$setMetadata": { + "score": { + "$avg": [ + "$_internal_scoreFusion_internal_fields.name1_score" + ] + } + } + }, + { + "$sort": { + "score": { + "$meta": "score" + }, + "_id": 1 + } + }, + { + "$project": { + "_internal_scoreFusion_internal_fields": 0 + } + } + ] + })", + actual); +} + +TEST_F(LiteParsedScoreFusionDesugarerTest, SinglePipelineSigmoid) { + BSONObj spec = fromjson(R"({ + $scoreFusion: { + input: { + pipelines: { + name1: [ + { $score: { score: "$age", normalization: "none" } } + ] + }, + normalization: "sigmoid" + } + } + })"); + BSONObj actual = desugarScore(spec); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "expectedStages": [ + { + "$score": { + "score": "$age", + "normalization": "none" + } + }, + { + "$replaceWith": { + "_internal_scoreFusion_docs": "$$ROOT" + } + }, + { + "$addFields": { + "_internal_scoreFusion_internal_fields.name1_score": { + "$multiply": [ + { + "$sigmoid": { + "$meta": "score" + } + }, + 1 + ] + } + } + }, + { + "$group": { + "_id": "$_internal_scoreFusion_docs._id", + "_internal_scoreFusion_docs": { + "$first": "$_internal_scoreFusion_docs" + }, + "__hs_name1_score": { + "$max": { + "$ifNull": [ + "$_internal_scoreFusion_internal_fields.name1_score", + 0 + ] + } + } + } + }, + { + "$replaceRoot": { + "newRoot": { + "$mergeObjects": [ + "$_internal_scoreFusion_docs", + { + "_internal_scoreFusion_internal_fields": { + "name1_score": "$__hs_name1_score" + } + } + ] + } + } + }, + { + "$setMetadata": { + "score": { + "$avg": [ + "$_internal_scoreFusion_internal_fields.name1_score" + ] + } + } + }, + { + "$sort": { + "score": { + "$meta": "score" + }, + "_id": 1 + } + }, + { + "$project": { + "_internal_scoreFusion_internal_fields": 0 + } + } + ] + })", + actual); +} + +TEST_F(LiteParsedScoreFusionDesugarerTest, SinglePipelineMinMaxScaler) { + BSONObj spec = fromjson(R"({ + $scoreFusion: { + input: { + pipelines: { + name1: [ + { $score: { score: "$age", normalization: "none" } } + ] + }, + normalization: "minMaxScaler" + } + } + })"); + BSONObj actual = desugarScore(spec); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "expectedStages": [ + { + "$score": { + "score": "$age", + "normalization": "none" + } + }, + { + "$replaceWith": { + "_internal_scoreFusion_docs": "$$ROOT" + } + }, + { + "$addFields": { + "_internal_scoreFusion_internal_fields.name1_score": { + "$multiply": [ + { + "$meta": "score" + }, + 1 + ] + } + } + }, + { + "$_internalSetWindowFields": { + "sortBy": { + "_internal_scoreFusion_internal_fields.name1_score": -1 + }, + "output": { + "_internal_scoreFusion_internal_fields.name1_score": { + "$minMaxScaler": { + "input": "$_internal_scoreFusion_internal_fields.name1_score" + } + } + } + } + }, + { + "$group": { + "_id": "$_internal_scoreFusion_docs._id", + "_internal_scoreFusion_docs": { + "$first": "$_internal_scoreFusion_docs" + }, + "__hs_name1_score": { + "$max": { + "$ifNull": [ + "$_internal_scoreFusion_internal_fields.name1_score", + 0 + ] + } + } + } + }, + { + "$replaceRoot": { + "newRoot": { + "$mergeObjects": [ + "$_internal_scoreFusion_docs", + { + "_internal_scoreFusion_internal_fields": { + "name1_score": "$__hs_name1_score" + } + } + ] + } + } + }, + { + "$setMetadata": { + "score": { + "$avg": [ + "$_internal_scoreFusion_internal_fields.name1_score" + ] + } + } + }, + { + "$sort": { + "score": { + "$meta": "score" + }, + "_id": 1 + } + }, + { + "$project": { + "_internal_scoreFusion_internal_fields": 0 + } + } + ] + })", + actual); +} + +TEST_F(LiteParsedScoreFusionDesugarerTest, MultiplePipelinesMixedNone) { + BSONObj spec = fromjson(R"({ + $scoreFusion: { + input: { + pipelines: { + matchGenres: [ + { $search: { index: "search_index", + text: { query: "mystery", path: "genres" } } } + ], + matchPlot: [ + { $vectorSearch: { queryVector: [1.0, 2.0, 3.0], + path: "plot_embedding", + numCandidates: 300, + index: "vector_index", + limit: 10 } } + ], + scoredMatch: [ + { $match: { author: "Agatha Christie" } }, + { $score: { score: "$age", normalization: "none" } } + ] + }, + normalization: "none" + } + } + })"); + BSONObj actual = desugarScore(spec); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "expectedStages": [ + { + "$search": { + "index": "search_index", + "text": { + "query": "mystery", + "path": "genres" + } + } + }, + { + "$replaceWith": { + "_internal_scoreFusion_docs": "$$ROOT" + } + }, + { + "$addFields": { + "_internal_scoreFusion_internal_fields.matchGenres_score": { + "$multiply": [ + { + "$meta": "score" + }, + 1 + ] + } + } + }, + { + "$unionWith": { + "coll": "pipeline_test", + "pipeline": [ + { + "$vectorSearch": { + "queryVector": [ + 1, + 2, + 3 + ], + "path": "plot_embedding", + "numCandidates": 300, + "index": "vector_index", + "limit": 10 + } + }, + { + "$replaceWith": { + "_internal_scoreFusion_docs": "$$ROOT" + } + }, + { + "$addFields": { + "_internal_scoreFusion_internal_fields.matchPlot_score": { + "$multiply": [ + { + "$meta": "score" + }, + 1 + ] + } + } + } + ] + } + }, + { + "$unionWith": { + "coll": "pipeline_test", + "pipeline": [ + { + "$match": { + "author": "Agatha Christie" + } + }, + { + "$score": { + "score": "$age", + "normalization": "none" + } + }, + { + "$replaceWith": { + "_internal_scoreFusion_docs": "$$ROOT" + } + }, + { + "$addFields": { + "_internal_scoreFusion_internal_fields.scoredMatch_score": { + "$multiply": [ + { + "$meta": "score" + }, + 1 + ] + } + } + } + ] + } + }, + { + "$group": { + "_id": "$_internal_scoreFusion_docs._id", + "_internal_scoreFusion_docs": { + "$first": "$_internal_scoreFusion_docs" + }, + "__hs_matchGenres_score": { + "$max": { + "$ifNull": [ + "$_internal_scoreFusion_internal_fields.matchGenres_score", + 0 + ] + } + }, + "__hs_matchPlot_score": { + "$max": { + "$ifNull": [ + "$_internal_scoreFusion_internal_fields.matchPlot_score", + 0 + ] + } + }, + "__hs_scoredMatch_score": { + "$max": { + "$ifNull": [ + "$_internal_scoreFusion_internal_fields.scoredMatch_score", + 0 + ] + } + } + } + }, + { + "$replaceRoot": { + "newRoot": { + "$mergeObjects": [ + "$_internal_scoreFusion_docs", + { + "_internal_scoreFusion_internal_fields": { + "matchGenres_score": "$__hs_matchGenres_score", + "matchPlot_score": "$__hs_matchPlot_score", + "scoredMatch_score": "$__hs_scoredMatch_score" + } + } + ] + } + } + }, + { + "$setMetadata": { + "score": { + "$avg": [ + "$_internal_scoreFusion_internal_fields.matchGenres_score", + "$_internal_scoreFusion_internal_fields.matchPlot_score", + "$_internal_scoreFusion_internal_fields.scoredMatch_score" + ] + } + } + }, + { + "$sort": { + "score": { + "$meta": "score" + }, + "_id": 1 + } + }, + { + "$project": { + "_internal_scoreFusion_internal_fields": 0 + } + } + ] + })", + actual); +} + +TEST_F(LiteParsedScoreFusionDesugarerTest, MultiplePipelinesSigmoid) { + BSONObj spec = fromjson(R"({ + $scoreFusion: { + input: { + pipelines: { + name1: [ + { $score: { score: "$age", normalization: "none" } } + ], + name2: [ + { $score: { score: "$rating", normalization: "none" } } + ] + }, + normalization: "sigmoid" + } + } + })"); + BSONObj actual = desugarScore(spec); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "expectedStages": [ + { + "$score": { + "score": "$age", + "normalization": "none" + } + }, + { + "$replaceWith": { + "_internal_scoreFusion_docs": "$$ROOT" + } + }, + { + "$addFields": { + "_internal_scoreFusion_internal_fields.name1_score": { + "$multiply": [ + { + "$sigmoid": { + "$meta": "score" + } + }, + 1 + ] + } + } + }, + { + "$unionWith": { + "coll": "pipeline_test", + "pipeline": [ + { + "$score": { + "score": "$rating", + "normalization": "none" + } + }, + { + "$replaceWith": { + "_internal_scoreFusion_docs": "$$ROOT" + } + }, + { + "$addFields": { + "_internal_scoreFusion_internal_fields.name2_score": { + "$multiply": [ + { + "$sigmoid": { + "$meta": "score" + } + }, + 1 + ] + } + } + } + ] + } + }, + { + "$group": { + "_id": "$_internal_scoreFusion_docs._id", + "_internal_scoreFusion_docs": { + "$first": "$_internal_scoreFusion_docs" + }, + "__hs_name1_score": { + "$max": { + "$ifNull": [ + "$_internal_scoreFusion_internal_fields.name1_score", + 0 + ] + } + }, + "__hs_name2_score": { + "$max": { + "$ifNull": [ + "$_internal_scoreFusion_internal_fields.name2_score", + 0 + ] + } + } + } + }, + { + "$replaceRoot": { + "newRoot": { + "$mergeObjects": [ + "$_internal_scoreFusion_docs", + { + "_internal_scoreFusion_internal_fields": { + "name1_score": "$__hs_name1_score", + "name2_score": "$__hs_name2_score" + } + } + ] + } + } + }, + { + "$setMetadata": { + "score": { + "$avg": [ + "$_internal_scoreFusion_internal_fields.name1_score", + "$_internal_scoreFusion_internal_fields.name2_score" + ] + } + } + }, + { + "$sort": { + "score": { + "$meta": "score" + }, + "_id": 1 + } + }, + { + "$project": { + "_internal_scoreFusion_internal_fields": 0 + } + } + ] + })", + actual); +} + +TEST_F(LiteParsedScoreFusionDesugarerTest, MultiplePipelinesMinMaxScaler) { + BSONObj spec = fromjson(R"({ + $scoreFusion: { + input: { + pipelines: { + name1: [ + { $score: { score: "$age", normalization: "none" } } + ], + name2: [ + { $score: { score: "$rating", normalization: "none" } } + ] + }, + normalization: "minMaxScaler" + } + } + })"); + BSONObj actual = desugarScore(spec); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "expectedStages": [ + { + "$score": { + "score": "$age", + "normalization": "none" + } + }, + { + "$replaceWith": { + "_internal_scoreFusion_docs": "$$ROOT" + } + }, + { + "$addFields": { + "_internal_scoreFusion_internal_fields.name1_score": { + "$multiply": [ + { + "$meta": "score" + }, + 1 + ] + } + } + }, + { + "$_internalSetWindowFields": { + "sortBy": { + "_internal_scoreFusion_internal_fields.name1_score": -1 + }, + "output": { + "_internal_scoreFusion_internal_fields.name1_score": { + "$minMaxScaler": { + "input": "$_internal_scoreFusion_internal_fields.name1_score" + } + } + } + } + }, + { + "$unionWith": { + "coll": "pipeline_test", + "pipeline": [ + { + "$score": { + "score": "$rating", + "normalization": "none" + } + }, + { + "$replaceWith": { + "_internal_scoreFusion_docs": "$$ROOT" + } + }, + { + "$addFields": { + "_internal_scoreFusion_internal_fields.name2_score": { + "$multiply": [ + { + "$meta": "score" + }, + 1 + ] + } + } + }, + { + "$_internalSetWindowFields": { + "sortBy": { + "_internal_scoreFusion_internal_fields.name2_score": -1 + }, + "output": { + "_internal_scoreFusion_internal_fields.name2_score": { + "$minMaxScaler": { + "input": "$_internal_scoreFusion_internal_fields.name2_score" + } + } + } + } + } + ] + } + }, + { + "$group": { + "_id": "$_internal_scoreFusion_docs._id", + "_internal_scoreFusion_docs": { + "$first": "$_internal_scoreFusion_docs" + }, + "__hs_name1_score": { + "$max": { + "$ifNull": [ + "$_internal_scoreFusion_internal_fields.name1_score", + 0 + ] + } + }, + "__hs_name2_score": { + "$max": { + "$ifNull": [ + "$_internal_scoreFusion_internal_fields.name2_score", + 0 + ] + } + } + } + }, + { + "$replaceRoot": { + "newRoot": { + "$mergeObjects": [ + "$_internal_scoreFusion_docs", + { + "_internal_scoreFusion_internal_fields": { + "name1_score": "$__hs_name1_score", + "name2_score": "$__hs_name2_score" + } + } + ] + } + } + }, + { + "$setMetadata": { + "score": { + "$avg": [ + "$_internal_scoreFusion_internal_fields.name1_score", + "$_internal_scoreFusion_internal_fields.name2_score" + ] + } + } + }, + { + "$sort": { + "score": { + "$meta": "score" + }, + "_id": 1 + } + }, + { + "$project": { + "_internal_scoreFusion_internal_fields": 0 + } + } + ] + })", + actual); +} + +TEST_F(LiteParsedScoreFusionDesugarerTest, CombinationMethodAvgExplicit) { + BSONObj spec = fromjson(R"({ + $scoreFusion: { + input: { + pipelines: { + name1: [ + { $score: { score: "$age", normalization: "none" } } + ], + name2: [ + { $score: { score: "$rating", normalization: "none" } } + ] + }, + normalization: "none" + }, + combination: { method: "avg" } + } + })"); + BSONObj actual = desugarScore(spec); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "expectedStages": [ + { + "$score": { + "score": "$age", + "normalization": "none" + } + }, + { + "$replaceWith": { + "_internal_scoreFusion_docs": "$$ROOT" + } + }, + { + "$addFields": { + "_internal_scoreFusion_internal_fields.name1_score": { + "$multiply": [ + { + "$meta": "score" + }, + 1 + ] + } + } + }, + { + "$unionWith": { + "coll": "pipeline_test", + "pipeline": [ + { + "$score": { + "score": "$rating", + "normalization": "none" + } + }, + { + "$replaceWith": { + "_internal_scoreFusion_docs": "$$ROOT" + } + }, + { + "$addFields": { + "_internal_scoreFusion_internal_fields.name2_score": { + "$multiply": [ + { + "$meta": "score" + }, + 1 + ] + } + } + } + ] + } + }, + { + "$group": { + "_id": "$_internal_scoreFusion_docs._id", + "_internal_scoreFusion_docs": { + "$first": "$_internal_scoreFusion_docs" + }, + "__hs_name1_score": { + "$max": { + "$ifNull": [ + "$_internal_scoreFusion_internal_fields.name1_score", + 0 + ] + } + }, + "__hs_name2_score": { + "$max": { + "$ifNull": [ + "$_internal_scoreFusion_internal_fields.name2_score", + 0 + ] + } + } + } + }, + { + "$replaceRoot": { + "newRoot": { + "$mergeObjects": [ + "$_internal_scoreFusion_docs", + { + "_internal_scoreFusion_internal_fields": { + "name1_score": "$__hs_name1_score", + "name2_score": "$__hs_name2_score" + } + } + ] + } + } + }, + { + "$setMetadata": { + "score": { + "$avg": [ + "$_internal_scoreFusion_internal_fields.name1_score", + "$_internal_scoreFusion_internal_fields.name2_score" + ] + } + } + }, + { + "$sort": { + "score": { + "$meta": "score" + }, + "_id": 1 + } + }, + { + "$project": { + "_internal_scoreFusion_internal_fields": 0 + } + } + ] + })", + actual); +} + +TEST_F(LiteParsedScoreFusionDesugarerTest, CombinationMethodExpression) { + BSONObj spec = fromjson(R"({ + $scoreFusion: { + input: { + pipelines: { + name1: [ + { $score: { score: "$age", normalization: "none" } } + ], + name2: [ + { $score: { score: "$rating", normalization: "none" } } + ] + }, + normalization: "none" + }, + combination: { + method: "expression", + expression: { $sum: ["$$name1", "$$name2", 5.0] } + } + } + })"); + BSONObj actual = desugarScore(spec); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "expectedStages": [ + { + "$score": { + "score": "$age", + "normalization": "none" + } + }, + { + "$replaceWith": { + "_internal_scoreFusion_docs": "$$ROOT" + } + }, + { + "$addFields": { + "_internal_scoreFusion_internal_fields.name1_score": { + "$multiply": [ + { + "$meta": "score" + }, + 1 + ] + } + } + }, + { + "$unionWith": { + "coll": "pipeline_test", + "pipeline": [ + { + "$score": { + "score": "$rating", + "normalization": "none" + } + }, + { + "$replaceWith": { + "_internal_scoreFusion_docs": "$$ROOT" + } + }, + { + "$addFields": { + "_internal_scoreFusion_internal_fields.name2_score": { + "$multiply": [ + { + "$meta": "score" + }, + 1 + ] + } + } + } + ] + } + }, + { + "$group": { + "_id": "$_internal_scoreFusion_docs._id", + "_internal_scoreFusion_docs": { + "$first": "$_internal_scoreFusion_docs" + }, + "__hs_name1_score": { + "$max": { + "$ifNull": [ + "$_internal_scoreFusion_internal_fields.name1_score", + 0 + ] + } + }, + "__hs_name2_score": { + "$max": { + "$ifNull": [ + "$_internal_scoreFusion_internal_fields.name2_score", + 0 + ] + } + } + } + }, + { + "$replaceRoot": { + "newRoot": { + "$mergeObjects": [ + "$_internal_scoreFusion_docs", + { + "_internal_scoreFusion_internal_fields": { + "name1_score": "$__hs_name1_score", + "name2_score": "$__hs_name2_score" + } + } + ] + } + } + }, + { + "$setMetadata": { + "score": { + "$let": { + "vars": { + "name1": "$_internal_scoreFusion_internal_fields.name1_score", + "name2": "$_internal_scoreFusion_internal_fields.name2_score" + }, + "in": { + "$sum": [ + "$$name1", + "$$name2", + 5 + ] + } + } + } + } + }, + { + "$sort": { + "score": { + "$meta": "score" + }, + "_id": 1 + } + }, + { + "$project": { + "_internal_scoreFusion_internal_fields": 0 + } + } + ] + })", + actual); +} + +TEST_F(LiteParsedScoreFusionDesugarerTest, CustomWeights) { + BSONObj spec = fromjson(R"({ + $scoreFusion: { + input: { + pipelines: { + name1: [ + { $score: { score: "$age", normalization: "none" } } + ], + name2: [ + { $score: { score: "$rating", normalization: "none" } } + ] + }, + normalization: "none" + }, + combination: { + weights: { name1: 5, name2: 2 } + } + } + })"); + BSONObj actual = desugarScore(spec); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "expectedStages": [ + { + "$score": { + "score": "$age", + "normalization": "none" + } + }, + { + "$replaceWith": { + "_internal_scoreFusion_docs": "$$ROOT" + } + }, + { + "$addFields": { + "_internal_scoreFusion_internal_fields.name1_score": { + "$multiply": [ + { + "$meta": "score" + }, + 5 + ] + } + } + }, + { + "$unionWith": { + "coll": "pipeline_test", + "pipeline": [ + { + "$score": { + "score": "$rating", + "normalization": "none" + } + }, + { + "$replaceWith": { + "_internal_scoreFusion_docs": "$$ROOT" + } + }, + { + "$addFields": { + "_internal_scoreFusion_internal_fields.name2_score": { + "$multiply": [ + { + "$meta": "score" + }, + 2 + ] + } + } + } + ] + } + }, + { + "$group": { + "_id": "$_internal_scoreFusion_docs._id", + "_internal_scoreFusion_docs": { + "$first": "$_internal_scoreFusion_docs" + }, + "__hs_name1_score": { + "$max": { + "$ifNull": [ + "$_internal_scoreFusion_internal_fields.name1_score", + 0 + ] + } + }, + "__hs_name2_score": { + "$max": { + "$ifNull": [ + "$_internal_scoreFusion_internal_fields.name2_score", + 0 + ] + } + } + } + }, + { + "$replaceRoot": { + "newRoot": { + "$mergeObjects": [ + "$_internal_scoreFusion_docs", + { + "_internal_scoreFusion_internal_fields": { + "name1_score": "$__hs_name1_score", + "name2_score": "$__hs_name2_score" + } + } + ] + } + } + }, + { + "$setMetadata": { + "score": { + "$avg": [ + "$_internal_scoreFusion_internal_fields.name1_score", + "$_internal_scoreFusion_internal_fields.name2_score" + ] + } + } + }, + { + "$sort": { + "score": { + "$meta": "score" + }, + "_id": 1 + } + }, + { + "$project": { + "_internal_scoreFusion_internal_fields": 0 + } + } + ] + })", + actual); +} + +TEST_F(LiteParsedScoreFusionDesugarerTest, ScoreDetailsScoreInputGeneratesScoreOnly) { + BSONObj spec = fromjson(R"({ + $scoreFusion: { + input: { + pipelines: { + name1: [ + { $score: { score: "$age", normalization: "none" } } + ] + }, + normalization: "none" + }, + combination: { weights: { name1: 5 } }, + scoreDetails: true + } + })"); + BSONObj actual = desugarScore(spec); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "expectedStages": [ + { + "$score": { + "score": "$age", + "normalization": "none" + } + }, + { + "$replaceWith": { + "_internal_scoreFusion_docs": "$$ROOT" + } + }, + { + "$addFields": { + "_internal_scoreFusion_internal_fields.name1_score": { + "$multiply": [ + { + "$meta": "score" + }, + 5 + ] + } + } + }, + { + "$addFields": { + "_internal_scoreFusion_internal_fields.name1_rawScore": { + "$meta": "score" + } + } + }, + { + "$addFields": { + "_internal_scoreFusion_internal_fields.name1_scoreDetails": { + "details": [] + } + } + }, + { + "$group": { + "_id": "$_internal_scoreFusion_docs._id", + "_internal_scoreFusion_docs": { + "$first": "$_internal_scoreFusion_docs" + }, + "__hs_name1_score": { + "$max": { + "$ifNull": [ + "$_internal_scoreFusion_internal_fields.name1_score", + 0 + ] + } + }, + "__hs_name1_rawScore": { + "$max": { + "$ifNull": [ + "$_internal_scoreFusion_internal_fields.name1_rawScore", + 0 + ] + } + }, + "__hs_name1_scoreDetails": { + "$mergeObjects": "$_internal_scoreFusion_internal_fields.name1_scoreDetails" + } + } + }, + { + "$replaceRoot": { + "newRoot": { + "$mergeObjects": [ + "$_internal_scoreFusion_docs", + { + "_internal_scoreFusion_internal_fields": { + "name1_score": "$__hs_name1_score", + "name1_rawScore": "$__hs_name1_rawScore", + "name1_scoreDetails": "$__hs_name1_scoreDetails" + } + } + ] + } + } + }, + { + "$setMetadata": { + "score": { + "$avg": [ + "$_internal_scoreFusion_internal_fields.name1_score" + ] + } + } + }, + { + "$addFields": { + "_internal_scoreFusion_internal_fields": { + "calculatedScoreDetails": [ + { + "$mergeObjects": [ + { + "inputPipelineName": "name1", + "inputPipelineRawScore": "$_internal_scoreFusion_internal_fields.name1_rawScore", + "weight": 5, + "value": "$_internal_scoreFusion_internal_fields.name1_score" + }, + "$_internal_scoreFusion_internal_fields.name1_scoreDetails" + ] + } + ] + } + } + }, + { + "$setMetadata": { + "scoreDetails": { + "value": { + "$meta": "score" + }, + "description": "the value calculated by combining the scores (either normalized or raw) across input pipelines from which this document is output from:", + "normalization": "none", + "combination": { + "method": "average" + }, + "details": "$_internal_scoreFusion_internal_fields.calculatedScoreDetails" + } + } + }, + { + "$sort": { + "score": { + "$meta": "score" + }, + "_id": 1 + } + }, + { + "$project": { + "_internal_scoreFusion_internal_fields": 0 + } + } + ] + })", + actual); +} + +TEST_F(LiteParsedScoreFusionDesugarerTest, ScoreDetailsSearchGeneratesScoreDetails) { + BSONObj spec = fromjson(R"({ + $scoreFusion: { + input: { + pipelines: { + searchPipe: [ + { $search: { index: "idx", + text: { query: "x", path: "p" }, + scoreDetails: true } } + ] + }, + normalization: "none" + }, + scoreDetails: true + } + })"); + BSONObj actual = desugarScore(spec); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "expectedStages": [ + { + "$search": { + "index": "idx", + "text": { + "query": "x", + "path": "p" + }, + "scoreDetails": true + } + }, + { + "$replaceWith": { + "_internal_scoreFusion_docs": "$$ROOT" + } + }, + { + "$addFields": { + "_internal_scoreFusion_internal_fields.searchPipe_score": { + "$multiply": [ + { + "$meta": "score" + }, + 1 + ] + } + } + }, + { + "$addFields": { + "_internal_scoreFusion_internal_fields.searchPipe_rawScore": { + "$meta": "score" + } + } + }, + { + "$addFields": { + "_internal_scoreFusion_internal_fields.searchPipe_scoreDetails": { + "details": { + "$meta": "scoreDetails" + } + } + } + }, + { + "$group": { + "_id": "$_internal_scoreFusion_docs._id", + "_internal_scoreFusion_docs": { + "$first": "$_internal_scoreFusion_docs" + }, + "__hs_searchPipe_score": { + "$max": { + "$ifNull": [ + "$_internal_scoreFusion_internal_fields.searchPipe_score", + 0 + ] + } + }, + "__hs_searchPipe_rawScore": { + "$max": { + "$ifNull": [ + "$_internal_scoreFusion_internal_fields.searchPipe_rawScore", + 0 + ] + } + }, + "__hs_searchPipe_scoreDetails": { + "$mergeObjects": "$_internal_scoreFusion_internal_fields.searchPipe_scoreDetails" + } + } + }, + { + "$replaceRoot": { + "newRoot": { + "$mergeObjects": [ + "$_internal_scoreFusion_docs", + { + "_internal_scoreFusion_internal_fields": { + "searchPipe_score": "$__hs_searchPipe_score", + "searchPipe_rawScore": "$__hs_searchPipe_rawScore", + "searchPipe_scoreDetails": "$__hs_searchPipe_scoreDetails" + } + } + ] + } + } + }, + { + "$setMetadata": { + "score": { + "$avg": [ + "$_internal_scoreFusion_internal_fields.searchPipe_score" + ] + } + } + }, + { + "$addFields": { + "_internal_scoreFusion_internal_fields": { + "calculatedScoreDetails": [ + { + "$mergeObjects": [ + { + "inputPipelineName": "searchPipe", + "inputPipelineRawScore": "$_internal_scoreFusion_internal_fields.searchPipe_rawScore", + "weight": 1, + "value": "$_internal_scoreFusion_internal_fields.searchPipe_score" + }, + "$_internal_scoreFusion_internal_fields.searchPipe_scoreDetails" + ] + } + ] + } + } + }, + { + "$setMetadata": { + "scoreDetails": { + "value": { + "$meta": "score" + }, + "description": "the value calculated by combining the scores (either normalized or raw) across input pipelines from which this document is output from:", + "normalization": "none", + "combination": { + "method": "average" + }, + "details": "$_internal_scoreFusion_internal_fields.calculatedScoreDetails" + } + } + }, + { + "$sort": { + "score": { + "$meta": "score" + }, + "_id": 1 + } + }, + { + "$project": { + "_internal_scoreFusion_internal_fields": 0 + } + } + ] + })", + actual); +} + +TEST_F(LiteParsedScoreFusionDesugarerTest, ScoreDetailsExpressionCombination) { + BSONObj spec = fromjson(R"({ + $scoreFusion: { + input: { + pipelines: { + name1: [ + { $score: { score: "$age", normalization: "none" } } + ], + name2: [ + { $score: { score: "$rating", normalization: "none" } } + ] + }, + normalization: "minMaxScaler" + }, + combination: { + method: "expression", + expression: { $sum: ["$$name1", "$$name2", 5.0] } + }, + scoreDetails: true + } + })"); + BSONObj actual = desugarScore(spec); + ASSERT_BSONOBJ_EQ_AUTO( // NOLINT + R"({ + "expectedStages": [ + { + "$score": { + "score": "$age", + "normalization": "none" + } + }, + { + "$replaceWith": { + "_internal_scoreFusion_docs": "$$ROOT" + } + }, + { + "$addFields": { + "_internal_scoreFusion_internal_fields.name1_score": { + "$multiply": [ + { + "$meta": "score" + }, + 1 + ] + } + } + }, + { + "$addFields": { + "_internal_scoreFusion_internal_fields.name1_rawScore": { + "$meta": "score" + } + } + }, + { + "$addFields": { + "_internal_scoreFusion_internal_fields.name1_scoreDetails": { + "details": [] + } + } + }, + { + "$_internalSetWindowFields": { + "sortBy": { + "_internal_scoreFusion_internal_fields.name1_score": -1 + }, + "output": { + "_internal_scoreFusion_internal_fields.name1_score": { + "$minMaxScaler": { + "input": "$_internal_scoreFusion_internal_fields.name1_score" + } + } + } + } + }, + { + "$unionWith": { + "coll": "pipeline_test", + "pipeline": [ + { + "$score": { + "score": "$rating", + "normalization": "none" + } + }, + { + "$replaceWith": { + "_internal_scoreFusion_docs": "$$ROOT" + } + }, + { + "$addFields": { + "_internal_scoreFusion_internal_fields.name2_score": { + "$multiply": [ + { + "$meta": "score" + }, + 1 + ] + } + } + }, + { + "$addFields": { + "_internal_scoreFusion_internal_fields.name2_rawScore": { + "$meta": "score" + } + } + }, + { + "$addFields": { + "_internal_scoreFusion_internal_fields.name2_scoreDetails": { + "details": [] + } + } + }, + { + "$_internalSetWindowFields": { + "sortBy": { + "_internal_scoreFusion_internal_fields.name2_score": -1 + }, + "output": { + "_internal_scoreFusion_internal_fields.name2_score": { + "$minMaxScaler": { + "input": "$_internal_scoreFusion_internal_fields.name2_score" + } + } + } + } + } + ] + } + }, + { + "$group": { + "_id": "$_internal_scoreFusion_docs._id", + "_internal_scoreFusion_docs": { + "$first": "$_internal_scoreFusion_docs" + }, + "__hs_name1_score": { + "$max": { + "$ifNull": [ + "$_internal_scoreFusion_internal_fields.name1_score", + 0 + ] + } + }, + "__hs_name1_rawScore": { + "$max": { + "$ifNull": [ + "$_internal_scoreFusion_internal_fields.name1_rawScore", + 0 + ] + } + }, + "__hs_name1_scoreDetails": { + "$mergeObjects": "$_internal_scoreFusion_internal_fields.name1_scoreDetails" + }, + "__hs_name2_score": { + "$max": { + "$ifNull": [ + "$_internal_scoreFusion_internal_fields.name2_score", + 0 + ] + } + }, + "__hs_name2_rawScore": { + "$max": { + "$ifNull": [ + "$_internal_scoreFusion_internal_fields.name2_rawScore", + 0 + ] + } + }, + "__hs_name2_scoreDetails": { + "$mergeObjects": "$_internal_scoreFusion_internal_fields.name2_scoreDetails" + } + } + }, + { + "$replaceRoot": { + "newRoot": { + "$mergeObjects": [ + "$_internal_scoreFusion_docs", + { + "_internal_scoreFusion_internal_fields": { + "name1_score": "$__hs_name1_score", + "name1_rawScore": "$__hs_name1_rawScore", + "name1_scoreDetails": "$__hs_name1_scoreDetails", + "name2_score": "$__hs_name2_score", + "name2_rawScore": "$__hs_name2_rawScore", + "name2_scoreDetails": "$__hs_name2_scoreDetails" + } + } + ] + } + } + }, + { + "$setMetadata": { + "score": { + "$let": { + "vars": { + "name1": "$_internal_scoreFusion_internal_fields.name1_score", + "name2": "$_internal_scoreFusion_internal_fields.name2_score" + }, + "in": { + "$sum": [ + "$$name1", + "$$name2", + 5 + ] + } + } + } + } + }, + { + "$addFields": { + "_internal_scoreFusion_internal_fields": { + "calculatedScoreDetails": [ + { + "$mergeObjects": [ + { + "inputPipelineName": "name1", + "inputPipelineRawScore": "$_internal_scoreFusion_internal_fields.name1_rawScore", + "weight": 1, + "value": "$_internal_scoreFusion_internal_fields.name1_score" + }, + "$_internal_scoreFusion_internal_fields.name1_scoreDetails" + ] + }, + { + "$mergeObjects": [ + { + "inputPipelineName": "name2", + "inputPipelineRawScore": "$_internal_scoreFusion_internal_fields.name2_rawScore", + "weight": 1, + "value": "$_internal_scoreFusion_internal_fields.name2_score" + }, + "$_internal_scoreFusion_internal_fields.name2_scoreDetails" + ] + } + ] + } + } + }, + { + "$setMetadata": { + "scoreDetails": { + "value": { + "$meta": "score" + }, + "description": "the value calculated by combining the scores (either normalized or raw) across input pipelines from which this document is output from:", + "normalization": "minMaxScaler", + "combination": { + "method": "custom expression", + "expression": "{ string: { $sum: [ '$$name1', '$$name2', 5.0 ] } }" + }, + "details": "$_internal_scoreFusion_internal_fields.calculatedScoreDetails" + } + } + }, + { + "$sort": { + "score": { + "$meta": "score" + }, + "_id": 1 + } + }, + { + "$project": { + "_internal_scoreFusion_internal_fields": 0 + } + } + ] + })", + actual); +} + +TEST_F(LiteParsedScoreFusionDesugarerTest, RejectsNegativeWeight) { + BSONObj spec = fromjson(R"({ + $scoreFusion: { + input: { + pipelines: { + name1: [{ $score: { score: "$age", normalization: "none" } }] + }, + normalization: "none" + }, + combination: { weights: { name1: -1.0 } } + } + })"); + ASSERT_THROWS_CODE(desugarScore(spec), AssertionException, 12559401); +} + +TEST_F(LiteParsedScoreFusionDesugarerTest, RejectsNonNumericWeight) { + BSONObj spec = fromjson(R"({ + $scoreFusion: { + input: { + pipelines: { + name1: [{ $score: { score: "$age", normalization: "none" } }] + }, + normalization: "none" + }, + combination: { weights: { name1: "not a number" } } + } + })"); + ASSERT_THROWS_CODE(desugarScore(spec), AssertionException, 12559404); +} + +TEST_F(LiteParsedScoreFusionDesugarerTest, RejectsUnknownPipelineNameInWeights) { + BSONObj spec = fromjson(R"({ + $scoreFusion: { + input: { + pipelines: { + name1: [{ $score: { score: "$age", normalization: "none" } }] + }, + normalization: "none" + }, + combination: { weights: { typo: 1.0 } } + } + })"); + ASSERT_THROWS_CODE(desugarScore(spec), AssertionException, 12559400); +} + +TEST_F(LiteParsedScoreFusionDesugarerTest, RejectsMoreWeightsThanPipelines) { + BSONObj spec = fromjson(R"({ + $scoreFusion: { + input: { + pipelines: { + name1: [{ $score: { score: "$age", normalization: "none" } }] + }, + normalization: "none" + }, + combination: { weights: { name1: 1.0, extraName: 1.0 } } + } + })"); + ASSERT_THROWS_CODE(desugarScore(spec), AssertionException, 12559403); +} + +TEST_F(LiteParsedScoreFusionDesugarerTest, RejectsExpressionWithoutExpressionMethod) { + BSONObj spec = fromjson(R"({ + $scoreFusion: { + input: { + pipelines: { + name1: [{ $score: { score: "$age", normalization: "none" } }] + }, + normalization: "none" + }, + combination: { method: "avg", expression: { $sum: ["$$name1", 1] } } + } + })"); + ASSERT_THROWS_CODE(desugarScore(spec), AssertionException, 12559406); +} + +TEST_F(LiteParsedScoreFusionDesugarerTest, RejectsExpressionMethodWithoutExpression) { + BSONObj spec = fromjson(R"({ + $scoreFusion: { + input: { + pipelines: { + name1: [{ $score: { score: "$age", normalization: "none" } }] + }, + normalization: "none" + }, + combination: { method: "expression" } + } + })"); + ASSERT_THROWS_CODE(desugarScore(spec), AssertionException, 12559406); +} + +TEST_F(LiteParsedScoreFusionDesugarerTest, RejectsBothExpressionAndWeights) { + BSONObj spec = fromjson(R"({ + $scoreFusion: { + input: { + pipelines: { + name1: [{ $score: { score: "$age", normalization: "none" } }] + }, + normalization: "none" + }, + combination: { + method: "expression", + expression: { $sum: ["$$name1", 1] }, + weights: { name1: 1.0 } + } + } + })"); + ASSERT_THROWS_CODE(desugarScore(spec), AssertionException, 12559407); +} + +} // namespace +} // namespace mongo diff --git a/src/mongo/db/pipeline/lite_parsed_score_fusion_desugarer_utils.cpp b/src/mongo/db/pipeline/lite_parsed_score_fusion_desugarer_utils.cpp new file mode 100644 index 00000000000..ce7fe5d0479 --- /dev/null +++ b/src/mongo/db/pipeline/lite_parsed_score_fusion_desugarer_utils.cpp @@ -0,0 +1,284 @@ +/** + * Copyright (C) 2026-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/pipeline/lite_parsed_score_fusion_desugarer_utils.h" + +#include "mongo/bson/bsonobjbuilder.h" +#include "mongo/db/pipeline/document_source_hybrid_scoring_util.h" +#include "mongo/db/pipeline/lite_parsed_hybrid_search_desugarer_utils.h" +#include "mongo/util/assert_util.h" + +#include + +#include + +namespace mongo::lite_parsed_hybrid_search_desugarer::score_fusion_utils { + +ScoreFusionScoringOptions::ScoreFusionScoringOptions(const ScoreFusionSpec& spec) { + _normalizationMethod = spec.getInput().getNormalization(); + const auto& combination = spec.getCombination(); + ScoreFusionCombinationMethodEnum combinationMethod = ScoreFusionCombinationMethodEnum::kAvg; + boost::optional combinationExpression = boost::none; + if (combination.has_value() && combination->getMethod().has_value()) { + combinationMethod = combination->getMethod().get(); + uassert(12559406, + "combination.expression should only be specified when combination.method " + "has the value \"expression\"", + (combinationMethod != ScoreFusionCombinationMethodEnum::kExpression && + !combination->getExpression().has_value()) || + (combinationMethod == ScoreFusionCombinationMethodEnum::kExpression && + combination->getExpression().has_value())); + combinationExpression = combination->getExpression(); + uassert(12559407, + "combination.expression and combination.weights cannot both be specified", + !(combination->getWeights().has_value() && combinationExpression.has_value())); + } + _combinationMethod = std::move(combinationMethod); + _combinationExpression = std::move(combinationExpression); +} + +StringData ScoreFusionScoringOptions::getNormalizationString() const { + switch (_normalizationMethod) { + case ScoreFusionNormalizationEnum::kSigmoid: + return "sigmoid"_sd; + case ScoreFusionNormalizationEnum::kMinMaxScaler: + return "minMaxScaler"_sd; + case ScoreFusionNormalizationEnum::kNone: + return "none"_sd; + } + MONGO_UNREACHABLE_TASSERT(12559408); +} + +StringData ScoreFusionScoringOptions::getCombinationMethodString() const { + switch (_combinationMethod) { + case ScoreFusionCombinationMethodEnum::kExpression: + return "custom expression"_sd; + case ScoreFusionCombinationMethodEnum::kAvg: + return "average"_sd; + } + MONGO_UNREACHABLE_TASSERT(12559409); +} + +BSONObj buildScoreAddFieldsBson(StringData inputPipelineName, + ScoreFusionNormalizationEnum normalization, + double weight) { + BSONObjBuilder bob; + { + BSONObjBuilder addFieldsBob(bob.subobjStart("$addFields"_sd)); + const std::string scoreField = hybrid_scoring_util::applyInternalFieldPrefixToFieldName( + kInternalFieldsName, fmt::format("{}_score", inputPipelineName)); + BSONObjBuilder scoreBob(addFieldsBob.subobjStart(scoreField)); + BSONArrayBuilder multArr(scoreBob.subarrayStart("$multiply"_sd)); + BSONObj scorePath = BSON("$meta" << "score"); + switch (normalization) { + case ScoreFusionNormalizationEnum::kSigmoid: + multArr.append(BSON("$sigmoid" << scorePath)); + break; + case ScoreFusionNormalizationEnum::kMinMaxScaler: + case ScoreFusionNormalizationEnum::kNone: + multArr.append(scorePath); + break; + } + multArr.append(weight); + } + return bob.obj(); +} + +BSONObj buildRawScoreAddFieldsBson(StringData inputPipelineName) { + BSONObjBuilder bob; + { + BSONObjBuilder addFieldsBob(bob.subobjStart("$addFields"_sd)); + const std::string rawScoreField = hybrid_scoring_util::applyInternalFieldPrefixToFieldName( + kInternalFieldsName, fmt::format("{}_rawScore", inputPipelineName)); + addFieldsBob.append(rawScoreField, BSON("$meta" << "score")); + } + return bob.obj(); +} + +BSONObj buildAddInputPipelineScoreDetailsBson(StringData inputPipelineName, + bool inputGeneratesScoreDetails) { + BSONObjBuilder bob; + { + BSONObjBuilder addFieldsBob(bob.subobjStart("$addFields"_sd)); + const std::string scoreDetailsField = + hybrid_scoring_util::applyInternalFieldPrefixToFieldName( + kInternalFieldsName, fmt::format("{}_scoreDetails", inputPipelineName)); + if (inputGeneratesScoreDetails) { + addFieldsBob.append(scoreDetailsField, + BSON("details" << BSON("$meta" << "scoreDetails"))); + } else { + addFieldsBob.append(scoreDetailsField, BSON("details" << BSONArrayBuilder().arr())); + } + } + return bob.obj(); +} + +BSONObj buildMinMaxScalerSetWindowFieldsBson(StringData inputPipelineName) { + const std::string internalFieldsScore = + hybrid_scoring_util::applyInternalFieldPrefixToFieldName( + kInternalFieldsName, + hybrid_scoring_util::getScoreFieldFromPipelineName(inputPipelineName)); + BSONObjBuilder bob; + { + BSONObjBuilder swfBob(bob.subobjStart("$_internalSetWindowFields"_sd)); + swfBob.append("sortBy", BSON(internalFieldsScore << -1)); + BSONObjBuilder outputBob(swfBob.subobjStart("output"_sd)); + outputBob.append(internalFieldsScore, + BSON("$minMaxScaler" << BSON("input" << ("$" + internalFieldsScore)))); + } + return bob.obj(); +} + +BSONObj buildSetFinalCombinedScoreBson(const std::vector& pipelineNames, + const ScoreFusionScoringOptions& scoringOptions) { + BSONObjBuilder bob; + { + BSONObjBuilder smBob(bob.subobjStart("$setMetadata"_sd)); + BSONObjBuilder scoreBob(smBob.subobjStart("score"_sd)); + switch (scoringOptions.getCombinationMethod()) { + case ScoreFusionCombinationMethodEnum::kExpression: { + BSONObjBuilder letBob(scoreBob.subobjStart("$let"_sd)); + BSONObjBuilder varsBob(letBob.subobjStart("vars"_sd)); + for (const auto& pipelineName : pipelineNames) { + const std::string scoreField = + hybrid_scoring_util::applyInternalFieldPrefixToFieldName( + kInternalFieldsName, + hybrid_scoring_util::getScoreFieldFromPipelineName(pipelineName)); + varsBob.append(pipelineName, fmt::format("${}", scoreField)); + } + varsBob.done(); + scoringOptions.getCombinationExpression()->serializeToBSON("in", &letBob); + letBob.done(); + break; + } + case ScoreFusionCombinationMethodEnum::kAvg: { + BSONArrayBuilder avgArr(scoreBob.subarrayStart("$avg"_sd)); + for (const auto& pipelineName : pipelineNames) { + avgArr.append(fmt::format( + "${}", + hybrid_scoring_util::applyInternalFieldPrefixToFieldName( + kInternalFieldsName, + hybrid_scoring_util::getScoreFieldFromPipelineName(pipelineName)))); + } + break; + } + } + } + return bob.obj(); +} + +BSONObj buildCalculatedFinalScoreDetailsBson(const std::vector& pipelineNames, + const StringMap& weights) { + BSONObjBuilder bob; + { + BSONObjBuilder addFieldsBob(bob.subobjStart("$addFields"_sd)); + BSONObjBuilder internalFieldsBob(addFieldsBob.subobjStart(kInternalFieldsName)); + BSONArrayBuilder calcArr(internalFieldsBob.subarrayStart("calculatedScoreDetails"_sd)); + for (const auto& pipelineName : pipelineNames) { + const std::string internalFieldsPipelineName = + hybrid_scoring_util::applyInternalFieldPrefixToFieldName(kInternalFieldsName, + pipelineName); + double weight = hybrid_scoring_util::getPipelineWeight(weights, pipelineName); + + BSONObjBuilder mergeSub; + mergeSub.append("inputPipelineName"_sd, pipelineName); + mergeSub.append("inputPipelineRawScore"_sd, + fmt::format("${}_rawScore", internalFieldsPipelineName)); + mergeSub.append("weight"_sd, weight); + mergeSub.append("value"_sd, fmt::format("${}_score", internalFieldsPipelineName)); + + BSONArrayBuilder mergeArr; + mergeArr.append(mergeSub.obj()); + mergeArr.append(fmt::format("${}.{}_scoreDetails", kInternalFieldsName, pipelineName)); + calcArr.append(BSON("$mergeObjects" << mergeArr.arr())); + } + } + return bob.obj(); +} + +BSONObj buildSetMetadataScoreDetailsBson(const ScoreFusionScoringOptions& scoringOptions) { + BSONObjBuilder bob; + { + BSONObjBuilder smBob(bob.subobjStart("$setMetadata"_sd)); + BSONObjBuilder sdBob(smBob.subobjStart("scoreDetails"_sd)); + sdBob.append("value", BSON("$meta" << "score")); + sdBob.append("description", kScoreDetailsDescription); + sdBob.append("normalization", scoringOptions.getNormalizationString()); + BSONObjBuilder combinationBob(sdBob.subobjStart("combination"_sd)); + combinationBob.append("method", scoringOptions.getCombinationMethodString()); + if (scoringOptions.getCombinationMethod() == + ScoreFusionCombinationMethodEnum::kExpression) { + combinationBob.append("expression", + hybrid_scoring_util::score_details::stringifyExpression( + scoringOptions.getCombinationExpression())); + } + combinationBob.done(); + sdBob.append("details", + fmt::format("${}", + hybrid_scoring_util::applyInternalFieldPrefixToFieldName( + kInternalFieldsName, "calculatedScoreDetails"))); + } + return bob.obj(); +} + +StageSpecs buildScoreFusionInputPipelinePreamble(const NamespaceString& nss, + const LiteParsedPipeline& subPipeline, + const std::string& pipelineName, + double weight, + ScoreFusionNormalizationEnum normalization, + bool includeScoreDetails) { + StageSpecs out; + out.reserve(subPipeline.getStages().size() + 5); + for (const auto& stage : subPipeline.getStages()) { + out.push_back(stage->clone()); + } + + const bool inputGeneratesScoreDetails = subPipeline.isScoreDetailsPipeline(); + + out.push_back( + common_utils::parseOwnedStage(nss, common_utils::buildReplaceRootBson(kDocsName))); + + out.push_back(common_utils::parseOwnedStage( + nss, buildScoreAddFieldsBson(pipelineName, normalization, weight))); + + if (includeScoreDetails) { + out.push_back(common_utils::parseOwnedStage(nss, buildRawScoreAddFieldsBson(pipelineName))); + out.push_back(common_utils::parseOwnedStage( + nss, buildAddInputPipelineScoreDetailsBson(pipelineName, inputGeneratesScoreDetails))); + } + + if (normalization == ScoreFusionNormalizationEnum::kMinMaxScaler) { + out.push_back( + common_utils::parseOwnedStage(nss, buildMinMaxScalerSetWindowFieldsBson(pipelineName))); + } + + return out; +} + +} // namespace mongo::lite_parsed_hybrid_search_desugarer::score_fusion_utils diff --git a/src/mongo/db/pipeline/lite_parsed_score_fusion_desugarer_utils.h b/src/mongo/db/pipeline/lite_parsed_score_fusion_desugarer_utils.h new file mode 100644 index 00000000000..faacc1481bb --- /dev/null +++ b/src/mongo/db/pipeline/lite_parsed_score_fusion_desugarer_utils.h @@ -0,0 +1,145 @@ +/** + * Copyright (C) 2026-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/base/string_data.h" +#include "mongo/bson/bsonobj.h" +#include "mongo/db/namespace_string.h" +#include "mongo/db/pipeline/document_source_score_fusion_gen.h" +#include "mongo/db/pipeline/lite_parsed_pipeline.h" +#include "mongo/db/pipeline/score_fusion_pipeline_builder.h" +#include "mongo/util/string_map.h" + +#include +#include + +#include + +namespace mongo::lite_parsed_hybrid_search_desugarer::score_fusion_utils { + +inline constexpr StringData kInternalFieldsName = + ScoreFusionPipelineBuilder::kScoreFusionInternalFieldsName; +inline constexpr StringData kDocsName = ScoreFusionPipelineBuilder::kScoreFusionDocsFieldName; +inline constexpr StringData kScoreDetailsDescription = + ScoreFusionPipelineBuilder::kScoreFusionScoreDetailsDescription; + +// Per-pipeline scoreDetails scalar field suffix used in the desugared $group output (and the +// matching $replaceRoot wrapper). For $scoreFusion the per-pipeline scalar is "

_rawScore". +inline constexpr StringData kDetailsScalarSuffix = "_rawScore"_sd; + +// Validation/translation of normalization + combination spec. +class ScoreFusionScoringOptions { +public: + explicit ScoreFusionScoringOptions(const ScoreFusionSpec& spec); + + ScoreFusionNormalizationEnum getNormalizationMethod() const { + return _normalizationMethod; + } + + StringData getNormalizationString() const; + + ScoreFusionCombinationMethodEnum getCombinationMethod() const { + return _combinationMethod; + } + + StringData getCombinationMethodString() const; + + const boost::optional& getCombinationExpression() const { + return _combinationExpression; + } + +private: + ScoreFusionNormalizationEnum _normalizationMethod; + ScoreFusionCombinationMethodEnum _combinationMethod; + boost::optional _combinationExpression; +}; + +// {$addFields: {.

_score: {$multiply: [, ]}}} +// where is determined by the input.normalization: +// - none / minMaxScaler: {$meta: "score"} +// - sigmoid: {$sigmoid: {$meta: "score"}} +BSONObj buildScoreAddFieldsBson(StringData inputPipelineName, + ScoreFusionNormalizationEnum normalization, + double weight); + +// {$addFields: {.

_rawScore: {$meta: "score"}}} +BSONObj buildRawScoreAddFieldsBson(StringData inputPipelineName); + +// {$addFields: {.

_scoreDetails: ...}} -- two branches: +// - inputGeneratesScoreDetails: { details: {$meta: "scoreDetails"} } +// - else : { details: [] } +// Mirrors `addInputPipelineScoreDetails` in score_fusion_pipeline_builder.cpp. +BSONObj buildAddInputPipelineScoreDetailsBson(StringData inputPipelineName, + bool inputGeneratesScoreDetails); + +// {$_internalSetWindowFields: {sortBy: {.

_score: -1}, +// output: {.

_score: +// {$minMaxScaler: {input: +// "$.

_score"}}}}} +BSONObj buildMinMaxScalerSetWindowFieldsBson(StringData inputPipelineName); + +// {$setMetadata: {score: {$avg: ["$.

_score", ...]}}} (avg branch) +// or +// {$setMetadata: {score: {$let: {vars: {

: "$.

_score", ...}, +// in: }}}} (expression branch). +// +// In the expression branch, the user's combination.expression is forwarded as raw BSON. Full +// parse will validate the expression via ExpressionLet::parse. +BSONObj buildSetFinalCombinedScoreBson(const std::vector& pipelineNames, + const ScoreFusionScoringOptions& scoringOptions); + +// {$addFields: {: {calculatedScoreDetails: [ +// {$mergeObjects: [{inputPipelineName: "

", +// inputPipelineRawScore: "$.

_rawScore", +// weight: , +// value: "$.

_score"}, +// "$.

_scoreDetails"]}, +// ...]}}} +BSONObj buildCalculatedFinalScoreDetailsBson(const std::vector& pipelineNames, + const StringMap& weights); + +// {$setMetadata: {scoreDetails: {value: {$meta: "score"}, description: ..., +// normalization: ..., combination: {...}, details: ...}}} +BSONObj buildSetMetadataScoreDetailsBson(const ScoreFusionScoringOptions& scoringOptions); + +// Builds the per-input-pipeline preamble for $scoreFusion: a clone of the input subpipeline's +// LPDSs, followed by: +// $replaceWith ({: "$$ROOT"}) +// $addFields (per-pipeline weighted, optionally normalized, score) +// [optional] $addFields rawScore + $addFields scoreDetails +// [optional] $_internalSetWindowFields (minMaxScaler normalization) +StageSpecs buildScoreFusionInputPipelinePreamble(const NamespaceString& nss, + const LiteParsedPipeline& subPipeline, + const std::string& pipelineName, + double weight, + ScoreFusionNormalizationEnum normalization, + bool includeScoreDetails); + +} // namespace mongo::lite_parsed_hybrid_search_desugarer::score_fusion_utils diff --git a/src/mongo/db/pipeline/search/document_source_search.h b/src/mongo/db/pipeline/search/document_source_search.h index 26eab157b77..a39905bdc93 100644 --- a/src/mongo/db/pipeline/search/document_source_search.h +++ b/src/mongo/db/pipeline/search/document_source_search.h @@ -68,6 +68,11 @@ public: return true; } + // $search produces scoreDetails metadata when the user requests it via the mongotQuery. + bool isScoreDetailsStage() const final { + return hasScoreDetails(); + } + // $search is not a selection stage when returnStoredSource is true since it might have an // implicit projection applied. bool isSelectionStage() const final { diff --git a/src/mongo/db/pipeline/search/lite_parsed_search.h b/src/mongo/db/pipeline/search/lite_parsed_search.h index 876c49b1917..d02c6cc81f5 100644 --- a/src/mongo/db/pipeline/search/lite_parsed_search.h +++ b/src/mongo/db/pipeline/search/lite_parsed_search.h @@ -35,6 +35,7 @@ namespace mongo { static constexpr StringData kReturnStoredSourceFieldName = "returnStoredSource"_sd; +static constexpr StringData kScoreDetailsFieldName = "scoreDetails"_sd; /** * A 'LiteParsed' representation of a search stage. This is the parent class for the @@ -97,6 +98,20 @@ protected: return false; } + // Returns true if the stage spec has scoreDetails: true inside the mongotQuery. + bool hasScoreDetails() const { + if (this->_originalBson.type() == BSONType::object) { + auto specObj = this->_originalBson.Obj(); + // SearchLiteParsed's BSON shape mirrors DocumentSourceSearch::hasScoreDetails(): + // top-level $search spec is the mongotQuery, so 'scoreDetails' lives directly on it. + if (specObj.hasField(kScoreDetailsFieldName)) { + auto sd = specObj[kScoreDetailsFieldName]; + return sd.isBoolean() && sd.boolean(); + } + } + return false; + } + private: const NamespaceString _nss; };