SERVER-118499 Fix how sortKey gets propagated to extension stages following sort (#47367)
GitOrigin-RevId: 7b6a5386d0a0a25a222d90338de65d8b84652666
This commit is contained in:
parent
16de44dfea
commit
df3ac9be6d
@ -68,15 +68,8 @@ selector:
|
||||
- jstests/with_mongot/e2e/hybridSearch/vector_search_remove_embeddings.js
|
||||
- jstests/with_mongot/e2e/skip_limit.js
|
||||
- jstests/with_mongot/e2e/views/search_with_view_injected.js
|
||||
- jstests/with_mongot/e2e/metadata/meta_dependency_validation.js
|
||||
- jstests/with_mongot/e2e/views/vector_search/unionWith.js
|
||||
- jstests/with_mongot/e2e/hybridSearch/ranked_fusion_verbose_replace_root_test.js
|
||||
- jstests/with_mongot/e2e/hybridSearch/ranked_fusion_verbose_test.js
|
||||
- jstests/with_mongot/e2e/hybridSearch/score_fusion_verbose_test.js
|
||||
- jstests/with_mongot/e2e/hybridSearch/union_with_vector_search.js
|
||||
- jstests/with_mongot/e2e/metadata/searchScore_as_score_metadata.js
|
||||
- jstests/with_mongot/e2e/metadata/sort_by_vector_search_score.js
|
||||
- jstests/with_mongot/e2e/views/mongot_stage_in_view_definition.js
|
||||
- jstests/with_mongot/e2e/views/vector_search/unionWith.js
|
||||
exclude_with_any_tags:
|
||||
- assumes_against_mongod_not_mongos
|
||||
- assumes_unsharded_collection
|
||||
|
||||
@ -65,15 +65,8 @@ selector:
|
||||
- jstests/with_mongot/e2e/hybridSearch/vector_search_remove_embeddings.js
|
||||
- jstests/with_mongot/e2e/skip_limit.js
|
||||
- jstests/with_mongot/e2e/views/search_with_view_injected.js
|
||||
- jstests/with_mongot/e2e/metadata/meta_dependency_validation.js
|
||||
- jstests/with_mongot/e2e/views/vector_search/unionWith.js
|
||||
- jstests/with_mongot/e2e/hybridSearch/ranked_fusion_verbose_replace_root_test.js
|
||||
- jstests/with_mongot/e2e/hybridSearch/ranked_fusion_verbose_test.js
|
||||
- jstests/with_mongot/e2e/hybridSearch/score_fusion_verbose_test.js
|
||||
- jstests/with_mongot/e2e/hybridSearch/union_with_vector_search.js
|
||||
- jstests/with_mongot/e2e/metadata/searchScore_as_score_metadata.js
|
||||
- jstests/with_mongot/e2e/metadata/sort_by_vector_search_score.js
|
||||
- jstests/with_mongot/e2e/views/mongot_stage_in_view_definition.js
|
||||
- jstests/with_mongot/e2e/views/vector_search/unionWith.js
|
||||
exclude_with_any_tags:
|
||||
- assumes_against_mongod_not_mongos
|
||||
- requires_sharding
|
||||
|
||||
@ -32,18 +32,9 @@
|
||||
- jstests/with_mongot/e2e/skip_limit.js
|
||||
- jstests/with_mongot/e2e/views/search_with_view_injected.js
|
||||
|
||||
# TODO SERVER-118499: Remove this exclusion when sortKey is propogated correctly.
|
||||
- jstests/with_mongot/e2e/metadata/meta_dependency_validation.js
|
||||
- jstests/with_mongot/e2e/views/vector_search/unionWith.js
|
||||
- jstests/with_mongot/e2e/hybridSearch/ranked_fusion_verbose_replace_root_test.js
|
||||
- jstests/with_mongot/e2e/hybridSearch/ranked_fusion_verbose_test.js
|
||||
- jstests/with_mongot/e2e/hybridSearch/score_fusion_verbose_test.js
|
||||
- jstests/with_mongot/e2e/hybridSearch/union_with_vector_search.js
|
||||
- jstests/with_mongot/e2e/metadata/searchScore_as_score_metadata.js
|
||||
- jstests/with_mongot/e2e/metadata/sort_by_vector_search_score.js
|
||||
|
||||
# TODO SERVER-117794: Remove this exclusion.
|
||||
- jstests/with_mongot/e2e/views/mongot_stage_in_view_definition.js
|
||||
- jstests/with_mongot/e2e/views/vector_search/unionWith.js
|
||||
|
||||
- name: load_extension
|
||||
value:
|
||||
|
||||
109
jstests/extensions/test_extension_metadata_propagation.js
Normal file
109
jstests/extensions/test_extension_metadata_propagation.js
Normal file
@ -0,0 +1,109 @@
|
||||
/**
|
||||
* Tests that metadata (sortKey) properly propagates through extension stages to downstream
|
||||
* stages that require it. This verifies the fix for metadata propagation when stages are pushed
|
||||
* down to the query executor.
|
||||
*
|
||||
* @tags: [
|
||||
* featureFlagExtensionsAPI,
|
||||
* ]
|
||||
*/
|
||||
|
||||
import {after, before, describe, it} from "jstests/libs/mochalite.js";
|
||||
|
||||
const collName = jsTestName();
|
||||
const coll = db.getCollection(collName);
|
||||
|
||||
describe("extension stage metadata propagation", function () {
|
||||
before(function () {
|
||||
coll.drop();
|
||||
const docs = [];
|
||||
for (let i = 0; i < 10; i++) {
|
||||
docs.push({
|
||||
_id: i,
|
||||
value: i,
|
||||
text_field: i % 2 === 0 ? "test document" : "other document",
|
||||
});
|
||||
}
|
||||
assert.commandWorked(coll.insertMany(docs));
|
||||
});
|
||||
|
||||
after(function () {
|
||||
coll.drop();
|
||||
});
|
||||
|
||||
it("should preserve sortKey metadata through $vectorSearch for $setWindowFields", function () {
|
||||
// Verify $sort pushdown with $extensionLimit preserves sortKey metadata for
|
||||
// $setWindowFields. This tests that sortKey metadata flows through extension stages to
|
||||
// downstream stages that need it.
|
||||
const result = coll
|
||||
.aggregate([
|
||||
{$sort: {value: 1}},
|
||||
{$extensionLimit: 5},
|
||||
{
|
||||
$_internalSetWindowFields: {
|
||||
sortBy: {order: 1}, // This uses sortKey from the $sort
|
||||
output: {rank: {$rank: {}}},
|
||||
},
|
||||
},
|
||||
{$project: {_id: 1, value: 1, rank: 1}},
|
||||
])
|
||||
.toArray();
|
||||
|
||||
assert.eq(result.length, 5, "Expected 5 documents");
|
||||
// Verify ranking worked (requires sortKey metadata)
|
||||
for (let i = 0; i < result.length; i++) {
|
||||
assert.eq(result[i].rank, i + 1, `Document ${i} should have rank ${i + 1}`);
|
||||
}
|
||||
});
|
||||
|
||||
it("should preserve textScore metadata through $extensionLimit to downstream stages", function () {
|
||||
// Test that score metadata (not pushed down) also flows correctly through extension stages
|
||||
// Create a text index for $text search
|
||||
assert.commandWorked(coll.createIndex({text_field: "text"}));
|
||||
|
||||
const result = coll
|
||||
.aggregate([
|
||||
{$match: {$text: {$search: "test"}}}, // Generates textScore metadata
|
||||
{$extensionLimit: 3},
|
||||
{
|
||||
$addFields: {
|
||||
scoreValue: {$meta: "textScore"}, // Accesses score metadata
|
||||
},
|
||||
},
|
||||
{$project: {_id: 1, text_field: 1, scoreValue: 1}},
|
||||
])
|
||||
.toArray();
|
||||
|
||||
// Verify that score metadata was accessible (should not throw error)
|
||||
result.forEach((doc) => {
|
||||
assert(doc.hasOwnProperty("scoreValue"), "Document should have scoreValue from textScore metadata");
|
||||
assert.gte(doc.scoreValue, 0, "Score should be non-negative");
|
||||
});
|
||||
|
||||
assert.commandWorked(coll.dropIndexes());
|
||||
});
|
||||
|
||||
it("should handle multiple extension stages in sequence with metadata", function () {
|
||||
// Test multiple extension stages with sortKey metadata flowing through all of them
|
||||
const result = coll
|
||||
.aggregate([
|
||||
{$sort: {value: 1}},
|
||||
{$extensionLimit: 8},
|
||||
{$extensionLimit: 5}, // Two extension stages in a row
|
||||
{
|
||||
$_internalSetWindowFields: {
|
||||
sortBy: {value: 1},
|
||||
output: {rank: {$rank: {}}},
|
||||
},
|
||||
},
|
||||
{$project: {_id: 1, value: 1, rank: 1}},
|
||||
])
|
||||
.toArray();
|
||||
|
||||
assert.eq(result.length, 5, "Expected 5 documents after two limits");
|
||||
// Verify ranking still works with multiple extension stages
|
||||
for (let i = 0; i < result.length; i++) {
|
||||
assert.eq(result[i].rank, i + 1, `Document ${i} should have rank ${i + 1}`);
|
||||
}
|
||||
});
|
||||
});
|
||||
@ -336,8 +336,11 @@ DepsTracker::State DocumentSourceExtensionOptimizable::getDependencies(DepsTrack
|
||||
processFields(_properties.getProvidedMetadataFields(),
|
||||
[&](auto metaType) { deps->setMetadataAvailable(metaType); });
|
||||
|
||||
// Retain entire metadata and do not optimize, as it may be needed by the extension.
|
||||
return DepsTracker::State::NOT_SUPPORTED;
|
||||
// Return SEE_NEXT to ensure metadata dependencies are propagated to the pipeline.
|
||||
// Returning NOT_SUPPORTED would prevent our metadata requests from being honored.
|
||||
// We still need whole document since extensions may access any fields.
|
||||
deps->needWholeDocument = true;
|
||||
return DepsTracker::State::SEE_NEXT;
|
||||
}
|
||||
|
||||
boost::optional<DocumentSource::DistributedPlanLogic>
|
||||
|
||||
@ -976,6 +976,14 @@ StatusWith<std::unique_ptr<CanonicalQuery>> createCanonicalQuery(
|
||||
sortPattern->serialize(SortPattern::SortKeySerialization::kForPipelineSerialization)
|
||||
.toBson();
|
||||
}
|
||||
|
||||
// If the pushed-down sort stage will output sortKey metadata, mark it as available for the
|
||||
// remaining pipeline stages. This ensures that stages like extension stages or $setWindowFields
|
||||
// can declare dependencies on sortKey metadata even after the sort has been pushed down.
|
||||
if (sortStage && sortStage->shouldSetSortKeyMetadata()) {
|
||||
availableMetadata.set(DocumentMetadataFields::kSortKey);
|
||||
}
|
||||
|
||||
// =============================================================================================
|
||||
// The end of last-minute pipeline optimizations.
|
||||
// =============================================================================================
|
||||
|
||||
Loading…
Reference in New Issue
Block a user