SERVER-118499 Fix how sortKey gets propagated to extension stages following sort (#47367)

GitOrigin-RevId: 7b6a5386d0a0a25a222d90338de65d8b84652666
This commit is contained in:
Adithi Raghavan 2026-02-03 16:19:08 -05:00 committed by MongoDB Bot
parent 16de44dfea
commit df3ac9be6d
6 changed files with 125 additions and 28 deletions

View File

@ -68,15 +68,8 @@ selector:
- jstests/with_mongot/e2e/hybridSearch/vector_search_remove_embeddings.js
- jstests/with_mongot/e2e/skip_limit.js
- jstests/with_mongot/e2e/views/search_with_view_injected.js
- jstests/with_mongot/e2e/metadata/meta_dependency_validation.js
- jstests/with_mongot/e2e/views/vector_search/unionWith.js
- jstests/with_mongot/e2e/hybridSearch/ranked_fusion_verbose_replace_root_test.js
- jstests/with_mongot/e2e/hybridSearch/ranked_fusion_verbose_test.js
- jstests/with_mongot/e2e/hybridSearch/score_fusion_verbose_test.js
- jstests/with_mongot/e2e/hybridSearch/union_with_vector_search.js
- jstests/with_mongot/e2e/metadata/searchScore_as_score_metadata.js
- jstests/with_mongot/e2e/metadata/sort_by_vector_search_score.js
- jstests/with_mongot/e2e/views/mongot_stage_in_view_definition.js
- jstests/with_mongot/e2e/views/vector_search/unionWith.js
exclude_with_any_tags:
- assumes_against_mongod_not_mongos
- assumes_unsharded_collection

View File

@ -65,15 +65,8 @@ selector:
- jstests/with_mongot/e2e/hybridSearch/vector_search_remove_embeddings.js
- jstests/with_mongot/e2e/skip_limit.js
- jstests/with_mongot/e2e/views/search_with_view_injected.js
- jstests/with_mongot/e2e/metadata/meta_dependency_validation.js
- jstests/with_mongot/e2e/views/vector_search/unionWith.js
- jstests/with_mongot/e2e/hybridSearch/ranked_fusion_verbose_replace_root_test.js
- jstests/with_mongot/e2e/hybridSearch/ranked_fusion_verbose_test.js
- jstests/with_mongot/e2e/hybridSearch/score_fusion_verbose_test.js
- jstests/with_mongot/e2e/hybridSearch/union_with_vector_search.js
- jstests/with_mongot/e2e/metadata/searchScore_as_score_metadata.js
- jstests/with_mongot/e2e/metadata/sort_by_vector_search_score.js
- jstests/with_mongot/e2e/views/mongot_stage_in_view_definition.js
- jstests/with_mongot/e2e/views/vector_search/unionWith.js
exclude_with_any_tags:
- assumes_against_mongod_not_mongos
- requires_sharding

View File

@ -32,18 +32,9 @@
- jstests/with_mongot/e2e/skip_limit.js
- jstests/with_mongot/e2e/views/search_with_view_injected.js
# TODO SERVER-118499: Remove this exclusion when sortKey is propogated correctly.
- jstests/with_mongot/e2e/metadata/meta_dependency_validation.js
- jstests/with_mongot/e2e/views/vector_search/unionWith.js
- jstests/with_mongot/e2e/hybridSearch/ranked_fusion_verbose_replace_root_test.js
- jstests/with_mongot/e2e/hybridSearch/ranked_fusion_verbose_test.js
- jstests/with_mongot/e2e/hybridSearch/score_fusion_verbose_test.js
- jstests/with_mongot/e2e/hybridSearch/union_with_vector_search.js
- jstests/with_mongot/e2e/metadata/searchScore_as_score_metadata.js
- jstests/with_mongot/e2e/metadata/sort_by_vector_search_score.js
# TODO SERVER-117794: Remove this exclusion.
- jstests/with_mongot/e2e/views/mongot_stage_in_view_definition.js
- jstests/with_mongot/e2e/views/vector_search/unionWith.js
- name: load_extension
value:

View File

@ -0,0 +1,109 @@
/**
* Tests that metadata (sortKey) properly propagates through extension stages to downstream
* stages that require it. This verifies the fix for metadata propagation when stages are pushed
* down to the query executor.
*
* @tags: [
* featureFlagExtensionsAPI,
* ]
*/
import {after, before, describe, it} from "jstests/libs/mochalite.js";
const collName = jsTestName();
const coll = db.getCollection(collName);
describe("extension stage metadata propagation", function () {
before(function () {
coll.drop();
const docs = [];
for (let i = 0; i < 10; i++) {
docs.push({
_id: i,
value: i,
text_field: i % 2 === 0 ? "test document" : "other document",
});
}
assert.commandWorked(coll.insertMany(docs));
});
after(function () {
coll.drop();
});
it("should preserve sortKey metadata through $vectorSearch for $setWindowFields", function () {
// Verify $sort pushdown with $extensionLimit preserves sortKey metadata for
// $setWindowFields. This tests that sortKey metadata flows through extension stages to
// downstream stages that need it.
const result = coll
.aggregate([
{$sort: {value: 1}},
{$extensionLimit: 5},
{
$_internalSetWindowFields: {
sortBy: {order: 1}, // This uses sortKey from the $sort
output: {rank: {$rank: {}}},
},
},
{$project: {_id: 1, value: 1, rank: 1}},
])
.toArray();
assert.eq(result.length, 5, "Expected 5 documents");
// Verify ranking worked (requires sortKey metadata)
for (let i = 0; i < result.length; i++) {
assert.eq(result[i].rank, i + 1, `Document ${i} should have rank ${i + 1}`);
}
});
it("should preserve textScore metadata through $extensionLimit to downstream stages", function () {
// Test that score metadata (not pushed down) also flows correctly through extension stages
// Create a text index for $text search
assert.commandWorked(coll.createIndex({text_field: "text"}));
const result = coll
.aggregate([
{$match: {$text: {$search: "test"}}}, // Generates textScore metadata
{$extensionLimit: 3},
{
$addFields: {
scoreValue: {$meta: "textScore"}, // Accesses score metadata
},
},
{$project: {_id: 1, text_field: 1, scoreValue: 1}},
])
.toArray();
// Verify that score metadata was accessible (should not throw error)
result.forEach((doc) => {
assert(doc.hasOwnProperty("scoreValue"), "Document should have scoreValue from textScore metadata");
assert.gte(doc.scoreValue, 0, "Score should be non-negative");
});
assert.commandWorked(coll.dropIndexes());
});
it("should handle multiple extension stages in sequence with metadata", function () {
// Test multiple extension stages with sortKey metadata flowing through all of them
const result = coll
.aggregate([
{$sort: {value: 1}},
{$extensionLimit: 8},
{$extensionLimit: 5}, // Two extension stages in a row
{
$_internalSetWindowFields: {
sortBy: {value: 1},
output: {rank: {$rank: {}}},
},
},
{$project: {_id: 1, value: 1, rank: 1}},
])
.toArray();
assert.eq(result.length, 5, "Expected 5 documents after two limits");
// Verify ranking still works with multiple extension stages
for (let i = 0; i < result.length; i++) {
assert.eq(result[i].rank, i + 1, `Document ${i} should have rank ${i + 1}`);
}
});
});

View File

@ -336,8 +336,11 @@ DepsTracker::State DocumentSourceExtensionOptimizable::getDependencies(DepsTrack
processFields(_properties.getProvidedMetadataFields(),
[&](auto metaType) { deps->setMetadataAvailable(metaType); });
// Retain entire metadata and do not optimize, as it may be needed by the extension.
return DepsTracker::State::NOT_SUPPORTED;
// Return SEE_NEXT to ensure metadata dependencies are propagated to the pipeline.
// Returning NOT_SUPPORTED would prevent our metadata requests from being honored.
// We still need whole document since extensions may access any fields.
deps->needWholeDocument = true;
return DepsTracker::State::SEE_NEXT;
}
boost::optional<DocumentSource::DistributedPlanLogic>

View File

@ -976,6 +976,14 @@ StatusWith<std::unique_ptr<CanonicalQuery>> createCanonicalQuery(
sortPattern->serialize(SortPattern::SortKeySerialization::kForPipelineSerialization)
.toBson();
}
// If the pushed-down sort stage will output sortKey metadata, mark it as available for the
// remaining pipeline stages. This ensures that stages like extension stages or $setWindowFields
// can declare dependencies on sortKey metadata even after the sort has been pushed down.
if (sortStage && sortStage->shouldSetSortKeyMetadata()) {
availableMetadata.set(DocumentMetadataFields::kSortKey);
}
// =============================================================================================
// The end of last-minute pipeline optimizations.
// =============================================================================================