SERVER-107879: BACKPORT-25612: [v8.2] Best-effort ban hybrid search on timeseries collections inside of $lookup and $unionWiths (#39303)
GitOrigin-RevId: d1a6b73fdd6d8a880b2cfe4f2ccba5e1f189f156
This commit is contained in:
parent
812035b596
commit
91e5dcf79c
@ -0,0 +1,221 @@
|
||||
/*
|
||||
* Tests hybrid search with both $scoreFusion and $rankFusion get rejected when inside of $unionWith
|
||||
* or $lookup subpipelines on timeseries collections.
|
||||
*
|
||||
* This test can only run on unsharded collections because we cannot deterministically ban hybrid
|
||||
* search on timeseries collections in the sharded collections case.
|
||||
*
|
||||
* TODO SERVER-108218 Ban hybrid search on sharded collections and remove the
|
||||
* assumes_unsharded_collection tag.
|
||||
*
|
||||
* @tags: [ requires_timeseries, assumes_unsharded_collection, featureFlagSearchHybridScoringFull,
|
||||
* requires_fcv_82 ]
|
||||
*/
|
||||
|
||||
const timeseriesCollName = jsTestName() + "_timeseries";
|
||||
assert.commandWorked(
|
||||
db.createCollection(timeseriesCollName, {timeseries: {timeField: "t", metaField: "m"}}));
|
||||
const timeseriesColl = db[timeseriesCollName];
|
||||
assert.commandWorked(timeseriesColl.insert({t: new Date(), m: 1, a: 42, b: 17}));
|
||||
|
||||
const nonTimeseriesCollName = jsTestName() + "_nontimeseries";
|
||||
assert.commandWorked(db.createCollection(nonTimeseriesCollName));
|
||||
const nonTimeseriesColl = db[nonTimeseriesCollName];
|
||||
assert.commandWorked(nonTimeseriesColl.insert({a: 50, b: 20}));
|
||||
|
||||
let rankFusionPipeline = [{$rankFusion: {input: {pipelines: {sortPipeline: [{$sort: {a: 1}}]}}}}];
|
||||
let scoreFusionPipeline = [{
|
||||
$scoreFusion:
|
||||
{input: {pipelines: {scorePipeline: [{$score: {score: "$a"}}]}, normalization: "none"}}
|
||||
}];
|
||||
|
||||
function runPipeline(pipeline, collName) {
|
||||
return db.runCommand({aggregate: collName, pipeline, cursor: {}});
|
||||
}
|
||||
|
||||
(function testHybridSearchRejected() {
|
||||
assert.commandFailedWithCode(runPipeline(rankFusionPipeline, timeseriesCollName),
|
||||
[10557301, ErrorCodes.OptionNotSupportedOnView]);
|
||||
assert.commandFailedWithCode(runPipeline(scoreFusionPipeline, timeseriesCollName),
|
||||
[10557301, ErrorCodes.OptionNotSupportedOnView]);
|
||||
})();
|
||||
|
||||
// TODO SERVER-108117 Enable these tests.
|
||||
(function testUnionWithRejectsIsHybridSearchFlagFromUser() {
|
||||
let badUnionWithStageWithIsHybridSearchTrue = {
|
||||
$unionWith: {
|
||||
coll: timeseriesCollName,
|
||||
pipeline: [{$sort: {_id: 1}}],
|
||||
$_internalIsHybridSearch: true
|
||||
}
|
||||
};
|
||||
assert.commandFailedWithCode(
|
||||
runPipeline([badUnionWithStageWithIsHybridSearchTrue], timeseriesCollName), 5491300);
|
||||
|
||||
let badUnionWithStageWithIsHybridSearchFalse = {
|
||||
$unionWith: {
|
||||
coll: timeseriesCollName,
|
||||
pipeline: [{$sort: {_id: 1}}],
|
||||
as: "out",
|
||||
$_internalIsHybridSearch: false
|
||||
}
|
||||
};
|
||||
assert.commandFailedWithCode(
|
||||
runPipeline([badUnionWithStageWithIsHybridSearchFalse], timeseriesCollName), 5491300);
|
||||
});
|
||||
|
||||
// TODO SERVER-108117 Enable these tests.
|
||||
(function testLookupRejectsIsHybridSearchFlagFromUser() {
|
||||
let badLookupStageWithIsHybridSearchTrue = {
|
||||
$lookup: {
|
||||
from: timeseriesCollName,
|
||||
pipeline: [{$sort: {_id: 1}}],
|
||||
$_internalIsHybridSearch: true
|
||||
}
|
||||
};
|
||||
|
||||
assert.commandFailedWithCode(
|
||||
runPipeline([badLookupStageWithIsHybridSearchTrue], timeseriesCollName), 5491300);
|
||||
|
||||
let badLookupStageWithIsHybridSearchFalse = {
|
||||
$lookup: {
|
||||
from: timeseriesCollName,
|
||||
pipeline: [{$sort: {_id: 1}}],
|
||||
as: "out",
|
||||
$_internalIsHybridSearch: false
|
||||
}
|
||||
};
|
||||
assert.commandFailedWithCode(
|
||||
runPipeline([badLookupStageWithIsHybridSearchFalse], timeseriesCollName), 5491300);
|
||||
});
|
||||
|
||||
// Note that hybrid search cannot run against a collectionless $unionWith because a collectionless
|
||||
// $unionWith must start with the $documents stage, but hybrid search stages must be the first
|
||||
// stages in the pipeline.
|
||||
|
||||
(function testHybridSearchRejectedOnUnionWithPipeline() {
|
||||
let rankFusionUnionWithStage = {
|
||||
$unionWith: {coll: timeseriesCollName, pipeline: rankFusionPipeline}
|
||||
};
|
||||
assert.commandFailedWithCode(runPipeline([rankFusionUnionWithStage], timeseriesCollName),
|
||||
[10787900, 10787901]);
|
||||
|
||||
let scoreFusionUnionWithStage = {
|
||||
$unionWith: {coll: timeseriesCollName, pipeline: scoreFusionPipeline}
|
||||
};
|
||||
assert.commandFailedWithCode(runPipeline([scoreFusionUnionWithStage], timeseriesCollName),
|
||||
[10787900, 10787901]);
|
||||
})();
|
||||
|
||||
(function testHybridSearchOnUnionWithOnNonTimeseriesCollectionInsideTimeseriesQuery() {
|
||||
// These queries should pass because hybrid search is valid on a non-timeseries collection,
|
||||
// regardless of what the outer query is running on.
|
||||
let rankFusionUnionWithStage = {
|
||||
$unionWith: {coll: nonTimeseriesCollName, pipeline: rankFusionPipeline}
|
||||
};
|
||||
assert.commandWorked(runPipeline([rankFusionUnionWithStage], timeseriesCollName));
|
||||
|
||||
let scoreFusionUnionWithStage = {
|
||||
$unionWith: {coll: nonTimeseriesCollName, pipeline: scoreFusionPipeline}
|
||||
};
|
||||
assert.commandWorked(runPipeline([scoreFusionUnionWithStage], timeseriesCollName));
|
||||
})();
|
||||
|
||||
(function testHybridSearchOnUnionWithOnTimeseriesCollectionInsideNonTimeseriesQuery() {
|
||||
// These queries should fail because hybrid search is not valid on timeseries collections,
|
||||
// regardless of what the outer query is running on.
|
||||
let rankFusionUnionWithStage = {
|
||||
$unionWith: {coll: timeseriesCollName, pipeline: rankFusionPipeline}
|
||||
};
|
||||
assert.commandFailedWithCode(runPipeline([rankFusionUnionWithStage], nonTimeseriesCollName),
|
||||
[10787900, 10787901]);
|
||||
|
||||
let scoreFusionUnionWithStage = {
|
||||
$unionWith: {coll: timeseriesCollName, pipeline: scoreFusionPipeline}
|
||||
};
|
||||
assert.commandFailedWithCode(runPipeline([scoreFusionUnionWithStage], nonTimeseriesCollName),
|
||||
[10787900, 10787901]);
|
||||
})();
|
||||
|
||||
(function testHybridSearchOnUnionWithOnTimeseriesCollectionInsideNonTimeseriesQueryNested() {
|
||||
let rankFusionUnionWithStage = {
|
||||
$unionWith: {coll: timeseriesCollName, pipeline: rankFusionPipeline}
|
||||
};
|
||||
let nestedRankFusionUnionWithStage = {
|
||||
$unionWith: {coll: nonTimeseriesCollName, pipeline: [rankFusionUnionWithStage]}
|
||||
};
|
||||
assert.commandFailedWithCode(
|
||||
runPipeline([nestedRankFusionUnionWithStage], nonTimeseriesCollName), [10787900, 10787901]);
|
||||
|
||||
let scoreFusionUnionWithStage = {
|
||||
$unionWith: {coll: timeseriesCollName, pipeline: scoreFusionPipeline}
|
||||
};
|
||||
let nestedScoreFusionUnionWithStage = {
|
||||
$unionWith: {coll: nonTimeseriesCollName, pipeline: [scoreFusionUnionWithStage]}
|
||||
};
|
||||
assert.commandFailedWithCode(
|
||||
runPipeline([nestedScoreFusionUnionWithStage], nonTimeseriesCollName),
|
||||
[10787900, 10787901]);
|
||||
})();
|
||||
|
||||
(function testHybridSearchRejectedOnLookupPipeline() {
|
||||
let rankFusionLookupStage = {
|
||||
$lookup: {from: timeseriesCollName, pipeline: rankFusionPipeline, as: "out"}
|
||||
};
|
||||
assert.commandFailedWithCode(runPipeline([rankFusionLookupStage], timeseriesCollName),
|
||||
[10787900, 10787901]);
|
||||
|
||||
let scoreFusionLookupStage = {
|
||||
$lookup: {from: timeseriesCollName, pipeline: scoreFusionPipeline, as: "out"}
|
||||
};
|
||||
assert.commandFailedWithCode(runPipeline([scoreFusionLookupStage], timeseriesCollName),
|
||||
[10787900, 10787901]);
|
||||
})();
|
||||
|
||||
(function testHybridSearchOnLookupOnNonTimeseriesCollectionInsideTimeseriesQuery() {
|
||||
// These queries should succeed because the pipeline is running against a non timeseries
|
||||
// collection.
|
||||
let rankFusionLookupStage = {
|
||||
$lookup: {from: nonTimeseriesCollName, pipeline: rankFusionPipeline, as: "out"}
|
||||
};
|
||||
assert.commandWorked(runPipeline([rankFusionLookupStage], timeseriesCollName));
|
||||
|
||||
let scoreFusionLookupStage = {
|
||||
$lookup: {from: nonTimeseriesCollName, pipeline: scoreFusionPipeline, as: "out"}
|
||||
};
|
||||
assert.commandWorked(runPipeline([scoreFusionLookupStage], timeseriesCollName));
|
||||
})();
|
||||
|
||||
(function testHybridSearchOnLookupOnTimeseriesCollectionInsideNonTimeseriesQuery() {
|
||||
let rankFusionLookupStage = {
|
||||
$lookup: {from: timeseriesCollName, pipeline: rankFusionPipeline, as: "out"}
|
||||
};
|
||||
assert.commandFailedWithCode(runPipeline([rankFusionLookupStage], nonTimeseriesCollName),
|
||||
[10787900, 10787901]);
|
||||
|
||||
let scoreFusionLookupStage = {
|
||||
$lookup: {from: timeseriesCollName, pipeline: scoreFusionPipeline, as: "out"}
|
||||
};
|
||||
assert.commandFailedWithCode(runPipeline([scoreFusionLookupStage], nonTimeseriesCollName),
|
||||
[10787900, 10787901]);
|
||||
})();
|
||||
|
||||
(function testHybridSearchOnLookupOnTimeseriesCollectionInsideNonTimeseriesQueryNested() {
|
||||
let rankFusionLookupStage = {
|
||||
$lookup: {from: timeseriesCollName, pipeline: rankFusionPipeline, as: "out"}
|
||||
};
|
||||
let nestedLookupRankFusionStage = {
|
||||
$lookup: {from: nonTimeseriesCollName, pipeline: [rankFusionLookupStage], as: "out"}
|
||||
};
|
||||
assert.commandFailedWithCode(runPipeline([nestedLookupRankFusionStage], nonTimeseriesCollName),
|
||||
[10787900, 10787901]);
|
||||
|
||||
let scoreFusionLookupStage = {
|
||||
$lookup: {from: timeseriesCollName, pipeline: scoreFusionPipeline, as: "out"}
|
||||
};
|
||||
let nestedLookupScoreFusionStage = {
|
||||
$lookup: {from: nonTimeseriesCollName, pipeline: [scoreFusionLookupStage], as: "out"}
|
||||
};
|
||||
assert.commandFailedWithCode(runPipeline([nestedLookupScoreFusionStage], nonTimeseriesCollName),
|
||||
[10787900, 10787901]);
|
||||
})();
|
||||
@ -189,11 +189,36 @@ assert.commandFailedWithCode(
|
||||
normalization: "sigmoid"
|
||||
},
|
||||
}
|
||||
}
|
||||
},
|
||||
]
|
||||
},
|
||||
normalization: "none"
|
||||
}
|
||||
}
|
||||
}]),
|
||||
// TODO SERVER-104725 Change this to the error code from LiteParsedPipeline::validate().
|
||||
10170100);
|
||||
|
||||
assert.commandFailedWithCode(
|
||||
runPipeline([{
|
||||
$scoreFusion: {
|
||||
input: {
|
||||
pipelines: {
|
||||
nested: [
|
||||
{
|
||||
$scoreFusion: {
|
||||
input: {
|
||||
pipelines: {simple: [{$score: {score: "$score_50"}}]},
|
||||
normalization: "sigmoid"
|
||||
},
|
||||
}
|
||||
},
|
||||
{$score: 10},
|
||||
]
|
||||
},
|
||||
normalization: "none"
|
||||
}
|
||||
}
|
||||
}]),
|
||||
// TODO SERVER-104725 Change this to the error code from LiteParsedPipeline::validate().
|
||||
10473003);
|
||||
|
||||
@ -51,6 +51,8 @@
|
||||
|
||||
#include <fmt/ranges.h>
|
||||
|
||||
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery
|
||||
|
||||
namespace mongo::hybrid_scoring_util {
|
||||
|
||||
bool isScoreStage(const boost::intrusive_ptr<DocumentSource>& stage) {
|
||||
@ -380,8 +382,6 @@ Status isScoredPipeline(const std::vector<BSONObj>& bsonPipeline,
|
||||
}
|
||||
|
||||
bool isHybridSearchPipeline(const std::vector<BSONObj>& bsonPipeline) {
|
||||
tassert(10473000, "Input pipeline must not be empty.", !bsonPipeline.empty());
|
||||
|
||||
// Please keep the following in alphabetical order.
|
||||
static const std::set<StringData> hybridScoringStages{
|
||||
DocumentSourceRankFusion::kStageName,
|
||||
@ -398,6 +398,41 @@ bool isHybridSearchPipeline(const std::vector<BSONObj>& bsonPipeline) {
|
||||
return false;
|
||||
}
|
||||
|
||||
void validateIsHybridSearchNotSetByUser(boost::intrusive_ptr<ExpressionContext> expCtx,
|
||||
const BSONObj& spec) {
|
||||
if (spec.hasField(kIsHybridSearchFlagFieldName)) {
|
||||
assertAllowedInternalIfRequired(expCtx->getOperationContext(),
|
||||
kIsHybridSearchFlagFieldName,
|
||||
AllowedWithClientType::kInternal);
|
||||
}
|
||||
}
|
||||
|
||||
void assertForeignCollectionIsNotTimeseries(const NamespaceString& nss,
|
||||
const boost::intrusive_ptr<ExpressionContext>& expCtx) {
|
||||
const auto opCtx = expCtx->getOperationContext();
|
||||
const auto collectionCatalog = CollectionCatalog::get(opCtx);
|
||||
|
||||
if (auto collectionPtr = collectionCatalog->lookupCollectionByNamespace(opCtx, nss)) {
|
||||
uassert(10787900,
|
||||
"$rankFusion and $scoreFusion are unsupported on timeseries collections",
|
||||
!collectionPtr->isTimeseriesCollection());
|
||||
} else if (auto viewPtr = collectionCatalog->lookupView(opCtx, nss)) {
|
||||
uassert(10787901,
|
||||
"$rankFusion and $scoreFusion are unsupported on timeseries collections",
|
||||
!viewPtr->timeseries());
|
||||
} else {
|
||||
// Note that we try our best to ban timeseries collections on hybrid search.
|
||||
// However, in a sharded collections environment, a mongod shard might not know the
|
||||
// information about the timeseries collection (if it is owned by another shard). In
|
||||
// that case, it is non-trivial to ban the timeseries query.
|
||||
// TODO SERVER-108218 Ban hybrid search inside of subpipelines on time series collections.
|
||||
LOGV2(10787902,
|
||||
"$rankFusion and $scoreFusion are unsupported on timeseries collections, but not "
|
||||
"enough information is available to determine if a subpipeline is running on a "
|
||||
"timeseries collection.");
|
||||
}
|
||||
}
|
||||
|
||||
namespace score_details {
|
||||
|
||||
std::pair<std::string, BSONObj> constructScoreDetailsForGrouping(const std::string pipelineName) {
|
||||
|
||||
@ -36,6 +36,8 @@
|
||||
|
||||
namespace mongo::hybrid_scoring_util {
|
||||
|
||||
static constexpr StringData kIsHybridSearchFlagFieldName = "$_internalIsHybridSearch"_sd;
|
||||
|
||||
/**
|
||||
* Checks if this stage is a $score stage, where it has been desugared to $setMetadata with the meta
|
||||
* type MetaType::kScore.
|
||||
@ -114,6 +116,27 @@ bool pipelineContainsScoreStage(const std::vector<BSONObj>& bsonPipeline);
|
||||
*/
|
||||
bool isHybridSearchPipeline(const std::vector<BSONObj>& bsonPipeline);
|
||||
|
||||
/**
|
||||
* Validates that the provided spec does not have the internal-use-only $_internalIsHybridSearch
|
||||
* flag set.
|
||||
*
|
||||
* TODO SERVER-108117 This is currently not called because the validation is broken when running an
|
||||
* explain on a view in a sharded collection. In that scenario, the router desugars the subpipeline,
|
||||
* adds $_internalIsHybridSearch to the serialized BSON, and sends it to the shards. The shards
|
||||
* respond with an error that the view must be executed on the router, and then the router tries
|
||||
* executing the fully-desugared pipeline. However, on this retry, the internal client flag is not
|
||||
* set, and the router fails the explain due to this assertion.
|
||||
*/
|
||||
void validateIsHybridSearchNotSetByUser(boost::intrusive_ptr<ExpressionContext> expCtx,
|
||||
const BSONObj& spec);
|
||||
|
||||
/**
|
||||
* Validates that a given collection/view namespace is not a timeseries collection for hybrid
|
||||
* search.
|
||||
*/
|
||||
void assertForeignCollectionIsNotTimeseries(const NamespaceString& nss,
|
||||
const boost::intrusive_ptr<ExpressionContext>& expCtx);
|
||||
|
||||
namespace score_details {
|
||||
/**
|
||||
* Construct the scoreDetails field name and obj (ex: name_scoreDetails: {$mergeObjects:
|
||||
|
||||
@ -56,6 +56,7 @@
|
||||
#include "mongo/db/pipeline/document_path_support.h"
|
||||
#include "mongo/db/pipeline/document_source.h"
|
||||
#include "mongo/db/pipeline/document_source_documents.h"
|
||||
#include "mongo/db/pipeline/document_source_hybrid_scoring_util.h"
|
||||
#include "mongo/db/pipeline/document_source_merge_gen.h"
|
||||
#include "mongo/db/pipeline/document_source_queue.h"
|
||||
#include "mongo/db/pipeline/document_source_sequential_document_cache.h"
|
||||
@ -1312,6 +1313,13 @@ void DocumentSourceLookUp::serializeToArray(std::vector<Value>& array,
|
||||
output[getSourceName()]["let"] = Value(exprList.freeze());
|
||||
|
||||
output[getSourceName()]["pipeline"] = Value(serializedPipeline);
|
||||
|
||||
if (!opts.isSerializingForExplain() &&
|
||||
hybrid_scoring_util::isHybridSearchPipeline(
|
||||
_userPipeline.value_or(std::vector<BSONObj>()))) {
|
||||
output[getSourceName()][hybrid_scoring_util::kIsHybridSearchFlagFieldName] =
|
||||
Value(true);
|
||||
}
|
||||
}
|
||||
|
||||
if (opts.isSerializingForExplain()) {
|
||||
@ -1552,8 +1560,10 @@ boost::intrusive_ptr<DocumentSource> DocumentSourceLookUp::createFromBson(
|
||||
bool hasPipeline = false;
|
||||
bool hasLet = false;
|
||||
|
||||
auto lookupSpec = DocumentSourceLookupSpec::parse(IDLParserContext(kStageName), elem.Obj());
|
||||
// TODO SERVER-108117 Validate that the isHybridSearch flag is only set internally. See helper
|
||||
// hybrid_scoring_util::validateIsHybridSearchNotSetByUser to handle this.
|
||||
|
||||
auto lookupSpec = DocumentSourceLookupSpec::parse(IDLParserContext(kStageName), elem.Obj());
|
||||
|
||||
if (lookupSpec.getFrom().has_value()) {
|
||||
fromNs = parseLookupFromAndResolveNamespace(lookupSpec.getFrom().value().getElement(),
|
||||
@ -1582,6 +1592,17 @@ boost::intrusive_ptr<DocumentSource> DocumentSourceLookUp::createFromBson(
|
||||
fromNs =
|
||||
NamespaceString::makeCollectionlessAggregateNSS(pExpCtx->getNamespaceString().dbName());
|
||||
}
|
||||
|
||||
if (lookupSpec.getIsHybridSearch() || hybrid_scoring_util::isHybridSearchPipeline(pipeline)) {
|
||||
// If there is a hybrid search stage in our pipeline, then we should validate that we
|
||||
// are not running on a timeseries collection.
|
||||
//
|
||||
// If the hybrid search flag is set to true, this request may have
|
||||
// come from a mongos that does not know if the collection is a valid collection for
|
||||
// hybrid search. Therefore, we must validate it here.
|
||||
hybrid_scoring_util::assertForeignCollectionIsNotTimeseries(fromNs, pExpCtx);
|
||||
}
|
||||
|
||||
boost::intrusive_ptr<DocumentSourceLookUp> lookupStage = nullptr;
|
||||
if (hasPipeline) {
|
||||
if (localField.empty() && foreignField.empty()) {
|
||||
|
||||
@ -67,3 +67,11 @@ structs:
|
||||
description: The foreign field used to perform equality match with localField
|
||||
type: string
|
||||
optional: true
|
||||
# When a hybrid search stage is de-sugared, then serialized into sub-pipeline BSON to be sent
|
||||
# across the wire (i.e. from mongos to mongod), its unclear from inspecting the BSON that the
|
||||
# original query was a hybrid search, so this internal field preserves that information.
|
||||
$_internalIsHybridSearch:
|
||||
description: An optional internal field specifying if the subpipeline is a hybrid search.
|
||||
type: optionalBool
|
||||
stability: internal
|
||||
cpp_name: isHybridSearch
|
||||
|
||||
@ -37,11 +37,13 @@
|
||||
#include "mongo/db/exec/agg/pipeline_builder.h"
|
||||
#include "mongo/db/exec/document_value/document.h"
|
||||
#include "mongo/db/pipeline/document_source_documents.h"
|
||||
#include "mongo/db/pipeline/document_source_hybrid_scoring_util.h"
|
||||
#include "mongo/db/pipeline/document_source_match.h"
|
||||
#include "mongo/db/pipeline/document_source_queue.h"
|
||||
#include "mongo/db/pipeline/document_source_single_document_transformation.h"
|
||||
#include "mongo/db/pipeline/document_source_union_with_gen.h"
|
||||
#include "mongo/db/pipeline/process_interface/mongo_process_interface.h"
|
||||
#include "mongo/db/pipeline/search/search_helper.h"
|
||||
#include "mongo/db/query/allowed_contexts.h"
|
||||
#include "mongo/db/query/plan_summary_stats.h"
|
||||
#include "mongo/db/views/resolved_view.h"
|
||||
@ -260,6 +262,8 @@ boost::intrusive_ptr<DocumentSource> DocumentSourceUnionWith::createFromBson(
|
||||
unionNss = NamespaceStringUtil::deserialize(expCtx->getNamespaceString().dbName(),
|
||||
elem.valueStringData());
|
||||
} else {
|
||||
// TODO SERVER-108117 Validate that the isHybridSearch flag is only set internally. See
|
||||
// helper hybrid_scoring_util::validateIsHybridSearchNotSetByUser to handle this.
|
||||
auto unionWithSpec =
|
||||
UnionWithSpec::parse(IDLParserContext(kStageName), elem.embeddedObject());
|
||||
if (unionWithSpec.getColl()) {
|
||||
@ -272,6 +276,16 @@ boost::intrusive_ptr<DocumentSource> DocumentSourceUnionWith::createFromBson(
|
||||
expCtx->getNamespaceString().dbName());
|
||||
}
|
||||
pipeline = unionWithSpec.getPipeline().value_or(std::vector<BSONObj>{});
|
||||
if (unionWithSpec.getIsHybridSearch() ||
|
||||
hybrid_scoring_util::isHybridSearchPipeline(pipeline)) {
|
||||
// If there is a hybrid search stage in our pipeline, then we should validate that we
|
||||
// are not running on a timeseries collection.
|
||||
//
|
||||
// If the hybrid search flag is set to true, this request may have
|
||||
// come from a mongos that does not know if the collection is a valid collection for
|
||||
// hybrid search. Therefore, we must validate it here.
|
||||
hybrid_scoring_util::assertForeignCollectionIsNotTimeseries(unionNss, expCtx);
|
||||
}
|
||||
}
|
||||
return make_intrusive<DocumentSourceUnionWith>(
|
||||
expCtx, std::move(unionNss), std::move(pipeline));
|
||||
@ -541,10 +555,16 @@ Value DocumentSourceUnionWith::serialize(const SerializationOptions& opts) const
|
||||
return _pipeline->serializeToBson(opts);
|
||||
}();
|
||||
|
||||
auto spec = collectionless ? DOC("pipeline" << serializedPipeline)
|
||||
: DOC("coll" << opts.serializeIdentifier(_userNss.coll())
|
||||
<< "pipeline" << serializedPipeline);
|
||||
return Value(DOC(getSourceName() << spec));
|
||||
bool isHybridSearch = hybrid_scoring_util::isHybridSearchPipeline(_userPipeline);
|
||||
MutableDocument spec;
|
||||
if (!collectionless) {
|
||||
spec["coll"] = Value(opts.serializeIdentifier(_userNss.coll()));
|
||||
}
|
||||
spec["pipeline"] = Value(serializedPipeline);
|
||||
if (isHybridSearch) {
|
||||
spec[hybrid_scoring_util::kIsHybridSearchFlagFieldName] = Value(isHybridSearch);
|
||||
}
|
||||
return Value(DOC(getSourceName() << spec.freezeToValue()));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -46,3 +46,11 @@ structs:
|
||||
description: An optional pipeline to apply to the collection being unioned.
|
||||
optional: true
|
||||
type: pipeline
|
||||
# When a hybrid search stage is de-sugared, then serialized into sub-pipeline BSON to be sent
|
||||
# across the wire (i.e. from mongos to mongod), its unclear from inspecting the BSON that the
|
||||
# original query was a hybrid search, so this internal field preserves that information.
|
||||
$_internalIsHybridSearch:
|
||||
description: An optional internal field specifying if the subpipeline is a hybrid search.
|
||||
type: optionalBool
|
||||
stability: internal
|
||||
cpp_name: isHybridSearch
|
||||
|
||||
Loading…
Reference in New Issue
Block a user