SERVER-111637 Extract field paths from toplevel field slots (#43046)

GitOrigin-RevId: e4b4517d63ac85b2ae8314212a5bb09579a9743b
This commit is contained in:
Evan Bergeron 2025-10-28 17:22:12 -04:00 committed by MongoDB Bot
parent 8817dd4b1c
commit 9f717e3a48
11 changed files with 423 additions and 201 deletions

View File

@ -30,5 +30,3 @@
- featureFlagReshardingCloneNoRefresh
# TODO SERVER-100132: Remove this once we know the impact of enabling tracing.
- featureFlagTracing
# TODO SERVER-111747 Re-enable 'featureFlagExtractFieldPathsSbeStage'
- featureFlagExtractFieldPathsSbeStage

View File

@ -18,8 +18,9 @@
*/
import {getEngine} from "jstests/libs/query/analyze_plan.js";
import {getSbePlanStages} from "jstests/libs/query/sbe_explain_helpers.js";
import {resultsEq} from "jstests/aggregation/extras/utils.js";
function runTestWithParameter(documents, pipeline, useExtract) {
function runTestWithParameter(documents, pipeline, useExtract, numExpectedExtractStages) {
db.c.deleteMany({});
db.c.insertMany(documents);
@ -36,8 +37,10 @@ function runTestWithParameter(documents, pipeline, useExtract) {
// Verify extract_field_paths stage exists
const extractStages = getSbePlanStages(explain, "extract_field_paths");
if (useExtract) {
assert.eq(extractStages.length, 1, "Should have one extract_field_paths stage");
assert.eq(extractStages[0]["stage"], "extract_field_paths", "Stage name should match");
assert.eq(extractStages.length, numExpectedExtractStages, "Should have extract_field_paths stage(s)");
for (let extractStage of extractStages) {
assert.eq(extractStage["stage"], "extract_field_paths", "Stage name should match");
}
} else {
assert.eq(extractStages.length, 0, "Should not have extract_field_paths stage");
}
@ -46,6 +49,13 @@ function runTestWithParameter(documents, pipeline, useExtract) {
return results;
}
function run(documents, pipeline, numExpectedExtractStages) {
jsTest.log({"Pipeline": pipeline});
const resultsWithExtract = runTestWithParameter(documents, pipeline, true, numExpectedExtractStages);
const resultsWithoutExtract = runTestWithParameter(documents, pipeline, false, numExpectedExtractStages);
assert(resultsEq(resultsWithExtract, resultsWithoutExtract));
}
const originalFrameworkControl = db.adminCommand({getParameter: 1, internalQueryFrameworkControl: 1});
const originalFeatureFlagExtract = db.adminCommand({getParameter: 1, featureFlagExtractFieldPathsSbeStage: 1});
@ -140,29 +150,8 @@ try {
{_id: 75, d: 6, a: {c: 4, b: 2}},
];
const projects = [
{x: "$a", y: "$a.b"},
{x: "$a", y: "$a.c"},
{x: "$a", y: "$b.c"},
{x: "$a.a", y: "$a.b", z: "$a.c"},
{x: "$a.a", y: "$a.b"},
{x: "$a.a"},
{x: "$a.a.a", y: "$a.a.b"},
{x: "$a.a.a", y: "$a.b.a"},
{x: "$a.a.a"},
{x: "$a.a.a.a"},
{x: "$a.a.b", y: "$a.a.c"},
{x: "$a.a.b"},
{x: "$a.b", y: "$a.c", z: "$d"},
{x: "$a.b", y: "$a.c"},
{x: "$a.b"},
{x: "$a.b.c", y: "$a.b.d"},
{x: "$a.b.c", y: "$a.d.e"},
{x: "$a.b.c"},
{x: "$a.c"},
];
const fieldPaths = [
"$a",
"$a.a",
"$a.a.a",
"$a.a.a.a",
@ -174,46 +163,76 @@ try {
"$a.b.d",
"$a.c",
"$a.d.e",
"$b",
"$b.c",
"$c",
"$d",
];
jsTest.log("Running $projects");
for (let projIndex = 0; projIndex < projects.length; projIndex++) {
const project = projects[projIndex];
const pipeline = [{$project: project}, {$sort: {_id: 1}}];
for (let fp0 of fieldPaths) {
for (let fp1 of fieldPaths) {
const indexField = fp0.replace("$", "");
const coveredPlanExpectExtractStage = fp0.includes(".");
const resultsWithExtract = runTestWithParameter(documents, pipeline, true);
const resultsWithoutExtract = runTestWithParameter(documents, pipeline, false);
// Test $match then $project with covered plan.
assert.commandWorked(db.c.createIndex({[indexField]: 1}));
const coveredIndexPipeline = [{$match: {[indexField]: {$gt: 0}}}, {$project: {x: fp0, _id: 0}}];
jsTest.log({"coveredIndexPipeline": coveredIndexPipeline});
run(documents, coveredIndexPipeline, coveredPlanExpectExtractStage ? 1 : 0);
for (let i = 0; i < resultsWithExtract.length; i++) {
assert.docEq(resultsWithExtract[i], resultsWithoutExtract[i]);
}
}
// Test $match then $project with fetch plan.
const fetchPlanExpectExtractStage = fp1.includes(".");
const fetchIndexPipeline = [
{$match: {[indexField]: {$gt: 0}}},
{$project: {x: fp1 /*use the other field*/, _id: 0}},
];
jsTest.log({"fetchIndexPipeline": fetchIndexPipeline});
run(documents, fetchIndexPipeline, fetchPlanExpectExtractStage ? 1 : 0);
jsTest.log("Running $groups");
let seenExtract = false;
for (let keyPath of fieldPaths) {
for (let accPath of fieldPaths) {
const pipeline = {$group: {_id: {path: keyPath}, pathSum: {$sum: accPath}}};
// TODO SERVER-111637 revisit this try/catch. Some of these plans do not feed a result obj
// slot into what would be the extract_field_paths stage, so the "uses extract_field_paths
// stage assertion" can fail. We expect SERVER-111637 will resolve all these cases.
try {
const resultsWithExtract = runTestWithParameter(documents, pipeline, true);
const resultsWithoutExtract = runTestWithParameter(documents, pipeline, false);
assert(resultsWithExtract.length > 0);
assert(resultsWithoutExtract.length > 0);
for (let i = 0; i < resultsWithExtract.length; i++) {
assert.docEq(resultsWithExtract[i], resultsWithoutExtract[i]);
}
seenExtract = true;
jsTest.log({"Pipeline used extract": pipeline});
} catch {
jsTest.log({"Pipeline did not use extract": pipeline});
assert(db.c.getIndexes().length > 1, "Index should still exist");
assert.commandWorked(db.c.dropIndex({[indexField]: 1}));
assert(db.c.getIndexes().length === 1, "Only _id index should still exist");
// Test $group and $project.
const hasDottedPaths = fp0.includes(".") || fp1.includes(".");
const oneExtractStagePipelines = [
[{$project: {x: fp0, y: fp1}}],
[{$group: {_id: {path: fp0}, pathSum: {$sum: fp1}}}],
];
for (let pipeline of oneExtractStagePipelines) {
run(documents, pipeline, hasDottedPaths ? 1 : 0 /*numExpectedExtractStages*/);
}
// Test $group then $project and $project then $group.
const twoExtractStagePipelines = [
{
pipeline: [{$project: {x: fp0, y: fp1}}, {$group: {_id: {path: "$x"}, pathSum: {$sum: "$y"}}}],
numExpectedExtractStages: 1,
numExpectedExtractStagesNoDottedPaths: 0,
},
{
pipeline: [
{$group: {_id: {path: fp0}, pathSum: {$sum: fp1}}},
{$project: {x: "$_id.path", total: "$pathSum"}},
],
numExpectedExtractStages: 2,
numExpectedExtractStagesNoDottedPaths: 1,
},
];
jsTest.log({"twoExtractStagePipelines": twoExtractStagePipelines});
for (let i = 0; i < twoExtractStagePipelines.length; i++) {
const pipeline = twoExtractStagePipelines[i].pipeline;
const numExpectedExtractStages = twoExtractStagePipelines[i].numExpectedExtractStages;
const numExpectedExtractStagesNoDottedPaths =
twoExtractStagePipelines[i].numExpectedExtractStagesNoDottedPaths;
run(
documents,
pipeline,
hasDottedPaths ? numExpectedExtractStages : numExpectedExtractStagesNoDottedPaths,
);
}
}
}
assert.eq(seenExtract, true, "expected at least one $group pipeline to use extract stage");
jsTest.log("All ExtractFieldPathsStage tests completed successfully!");
} finally {

View File

@ -2220,6 +2220,7 @@ mongo_cc_library(
"//src/mongo/db/exec/sbe/values:cell_interface.cpp",
"//src/mongo/db/exec/sbe/values:key_string_entry.cpp",
"//src/mongo/db/exec/sbe/values:object_walk_node.cpp",
"//src/mongo/db/exec/sbe/values:path_request.cpp",
"//src/mongo/db/exec/sbe/values:row.cpp",
"//src/mongo/db/exec/sbe/values:slot_printer.cpp",
"//src/mongo/db/exec/sbe/values:ts_block.cpp",

View File

@ -117,8 +117,19 @@ public:
auto pathReqs = makePathReqs(paths);
value::SlotVector outputSlots = generateMultipleSlotIds(pathReqs.size());
// A single input slot that holds the entire object.
std::vector<PathSlot> inputs = {{{Id{}}, scanSlots[0]}};
std::vector<PathSlot> outputs;
outputs.reserve(pathReqs.size());
tassert(11163704,
"expected an output slot for each path req",
pathReqs.size() == outputSlots.size());
for (size_t i = 0; i < pathReqs.size(); ++i) {
outputs.emplace_back(std::make_pair(pathReqs[i], outputSlots[i]));
}
auto extractFieldPathsStage = makeS<ExtractFieldPathsStage>(
std::move(scanStage), scanSlots[0], pathReqs, outputSlots, kEmptyPlanNodeId);
std::move(scanStage), inputs, outputs, kEmptyPlanNodeId);
return std::make_pair(outputSlots, std::move(extractFieldPathsStage));
};
@ -138,6 +149,80 @@ TEST_F(ExtractFieldPathsStageTest, SinglePathNonNestedNonArrayTest) {
runExtractFieldPathsTest(paths, inputs, outputs);
}
TEST_F(ExtractFieldPathsStageTest, SinglePathNonNestedNonArrayToplevelFieldSlotTest) {
// `inputBab` is an array of subarrays. Each subarray has an element for each input slot.
BSONArrayBuilder inputBab;
inputBab << BSON_ARRAY(1) << BSON_ARRAY(2);
auto [inputTag, inputVal] = stage_builder::makeValue(inputBab.arr());
value::ValueGuard inputGuard{inputTag, inputVal};
// `outputBab` is an array of subarrays. Each subarray has an element for each output slot.
BSONArrayBuilder outputBab;
outputBab << BSON_ARRAY(1) << BSON_ARRAY(2);
auto [expectedTag, expectedVal] = stage_builder::makeValue(outputBab.arr());
value::ValueGuard expectedGuard{expectedTag, expectedVal};
auto makeStageFn = [&, this](value::SlotVector scanSlots,
std::unique_ptr<PlanStage> scanStage) {
std::vector<FieldPath> paths{"a"};
auto pathReqs = makePathReqs(paths);
value::SlotVector outputSlots = generateMultipleSlotIds(pathReqs.size());
std::vector<PathSlot> inputs;
for (size_t i = 0; i < scanSlots.size(); ++i) {
// Associate each input slot with its corresponding toplevel field path.
tassert(11163708, "input path not toplevel", pathReqs[i].size() == 2);
inputs.emplace_back(pathReqs[i], scanSlots[i]);
}
std::vector<PathSlot> outputs;
outputs.reserve(pathReqs.size());
for (size_t i = 0; i < pathReqs.size(); ++i) {
// Associate each output slot with its corresponding path.
outputs.emplace_back(std::make_pair(pathReqs[i], outputSlots[i]));
}
auto extractFieldPathsStage =
makeS<ExtractFieldPathsStage>(std::move(scanStage), inputs, outputs, kEmptyPlanNodeId);
return std::make_pair(outputSlots, std::move(extractFieldPathsStage));
};
inputGuard.reset();
expectedGuard.reset();
runTestMulti(1, inputTag, inputVal, expectedTag, expectedVal, makeStageFn);
}
TEST_F(ExtractFieldPathsStageTest, SingleToplevelFieldSlotNestedPathTest) {
// `inputBab` is an array of subarrays. Each subarray has an element for each input slot.
BSONArrayBuilder inputBab;
inputBab << BSON_ARRAY(BSON("b" << 1)) << BSON_ARRAY(BSON("b" << 2));
auto [inputTag, inputVal] = stage_builder::makeValue(inputBab.arr());
value::ValueGuard inputGuard{inputTag, inputVal};
// `outputBab` is an array of subarrays. Each subarray has an element for each output slot.
BSONArrayBuilder outputBab;
outputBab << BSON_ARRAY(1) << BSON_ARRAY(2);
auto [expectedTag, expectedVal] = stage_builder::makeValue(outputBab.arr());
value::ValueGuard expectedGuard{expectedTag, expectedVal};
auto makeStageFn = [&, this](value::SlotVector scanSlots,
std::unique_ptr<PlanStage> scanStage) {
std::vector<FieldPath> paths{"a.b"};
auto pathReqs = makePathReqs(paths);
value::SlotVector outputSlots = generateMultipleSlotIds(pathReqs.size());
std::vector<PathSlot> inputs;
for (size_t i = 0; i < scanSlots.size(); ++i) {
// Associate each input slot with its corresponding toplevel field path.
Path p = {pathReqs[i][0], Id{}};
inputs.emplace_back(p, scanSlots[i]);
}
std::vector<PathSlot> outputs;
outputs.reserve(pathReqs.size());
for (size_t i = 0; i < pathReqs.size(); ++i) {
// Associate each output slot with its corresponding path.
outputs.emplace_back(pathReqs[i], outputSlots[i]);
}
auto extractFieldPathsStage =
makeS<ExtractFieldPathsStage>(std::move(scanStage), inputs, outputs, kEmptyPlanNodeId);
return std::make_pair(outputSlots, std::move(extractFieldPathsStage));
};
inputGuard.reset();
expectedGuard.reset();
runTestMulti(1, inputTag, inputVal, expectedTag, expectedVal, makeStageFn);
}
TEST_F(ExtractFieldPathsStageTest, MultiPathNonNestedNonArrayTest) {
std::vector<FieldPath> paths{"a", "b"};
std::vector<std::string> inputs{"{a: 1, b: 3}", "{b: 4, a: 2}"};

View File

@ -34,33 +34,27 @@
namespace mongo::sbe {
ExtractFieldPathsStage::ExtractFieldPathsStage(std::unique_ptr<PlanStage> input,
value::SlotId inputSlotId,
std::vector<value::Path> pathReqs,
value::SlotVector outputSlotIds,
std::vector<PathSlot> inputs,
std::vector<PathSlot> outputs,
PlanNodeId planNodeId,
bool participateInTrialRunTracking)
: PlanStage("extract_field_paths"_sd,
nullptr /* yieldPolicy */,
planNodeId,
participateInTrialRunTracking),
_inputSlotId(inputSlotId),
_pathReqs(std::move(pathReqs)),
_outputSlotIds(std::move(outputSlotIds)) {
tassert(10984201,
"expect pathReqs and outputSlotIds to be equal length",
_pathReqs.size() == _outputSlotIds.size());
_inputs(std::move(inputs)),
_outputs(std::move(outputs)) {
for (size_t i = 0; i < _outputSlotIds.size(); i++) {
_outputAccessorsIdxForSlotId[_outputSlotIds[i]] = i;
for (size_t i = 0; i < _outputs.size(); i++) {
_outputAccessorsIdxForSlotId[_outputs[i].second] = i;
}
_children.emplace_back(std::move(input));
}
std::unique_ptr<PlanStage> ExtractFieldPathsStage::clone() const {
return std::make_unique<ExtractFieldPathsStage>(_children[0]->clone(),
_inputSlotId,
_pathReqs,
_outputSlotIds,
_inputs,
_outputs,
_commonStats.nodeId,
participateInTrialRunTracking());
}
@ -68,11 +62,12 @@ std::unique_ptr<PlanStage> ExtractFieldPathsStage::clone() const {
void ExtractFieldPathsStage::constructRoot() {
_root = std::make_unique<value::ObjectWalkNode<value::ScalarProjectionPositionInfoRecorder>>();
_recorders.reserve(_pathReqs.size());
for (size_t i = 0; i < _pathReqs.size(); ++i) {
_recorders.reserve(_outputs.size());
for (size_t i = 0; i < _outputs.size(); ++i) {
_recorders.emplace_back();
_root->add(
_pathReqs[i], nullptr /* filterRecorder */, &_recorders.back() /* outProjRecorder */);
_root->add(_outputs[i].first,
nullptr /* filterRecorder */,
&_recorders.back() /* outProjRecorder */);
}
}
@ -81,8 +76,11 @@ void ExtractFieldPathsStage::prepare(CompileCtx& ctx) {
constructRoot();
_outputAccessors.resize(_pathReqs.size());
_inputAccessor = _children[0]->getAccessor(ctx, _inputSlotId);
_outputAccessors.resize(_outputs.size());
for (const auto& [path, slotId] : _inputs) {
auto inputAccessor = _children[0]->getAccessor(ctx, slotId);
_root->addAccessorAtPath(inputAccessor, path);
}
}
value::SlotAccessor* ExtractFieldPathsStage::getAccessor(CompileCtx& ctx, value::SlotId slot) {
@ -122,20 +120,36 @@ PlanState ExtractFieldPathsStage::getNext() {
return trackPlanState(state);
}
auto [inputTag, inputVal] = _inputAccessor->getViewOfValue();
value::walkObj<value::ScalarProjectionPositionInfoRecorder>(
_root.get(),
inputTag,
inputVal,
value::bitcastTo<const char*>(inputVal),
[](value::ObjectWalkNode<value::ScalarProjectionPositionInfoRecorder>* node,
value::TypeTags eltTag,
value::Value eltVal,
const char* bsonPtr) {
if (auto rec = node->projRecorder) {
rec->recordValue(eltTag, eltVal);
auto walk = [](value::ObjectWalkNode<value::ScalarProjectionPositionInfoRecorder>* node,
value::TypeTags eltTag,
value::Value eltVal,
const char* bsonPtr) {
if (auto rec = node->projRecorder) {
rec->recordValue(eltTag, eltVal);
}
};
if (_root->inputAccessor) {
// Should only be used for unit tests.
auto [inputTag, inputVal] = _root->inputAccessor->getViewOfValue();
value::walkObj<value::ScalarProjectionPositionInfoRecorder>(
_root.get(), inputTag, inputVal, value::bitcastTo<const char*>(inputVal), walk);
} else {
// Important this is only for toplevel fields. For nested fields, we would need knowledge of
// arrayness. We would also need to check for input accessors during the tree traversal.
for (const auto& child : _root->getChildren) {
const auto& childWalkNode = child.second;
if (childWalkNode->inputAccessor) {
auto [childTag, childVal] = childWalkNode->inputAccessor->getViewOfValue();
value::walkField<value::ScalarProjectionPositionInfoRecorder>(
childWalkNode.get(),
childTag,
childVal,
value::bitcastTo<const char*>(childVal),
walk);
}
});
}
}
// Consume all outputs
for (size_t i = 0; i < _recorders.size(); ++i) {
@ -169,16 +183,27 @@ const SpecificStats* ExtractFieldPathsStage::getSpecificStats() const {
std::vector<DebugPrinter::Block> ExtractFieldPathsStage::debugPrint() const {
auto ret = PlanStage::debugPrint();
DebugPrinter::addIdentifier(ret, _inputSlotId);
ret.emplace_back(DebugPrinter::Block("pathReqs[`"));
for (size_t idx = 0; idx < _pathReqs.size(); ++idx) {
ret.emplace_back(DebugPrinter::Block("inputs[`"));
for (size_t idx = 0; idx < _inputs.size(); ++idx) {
if (idx) {
ret.emplace_back(DebugPrinter::Block("`,"));
}
DebugPrinter::addIdentifier(ret, _outputSlotIds[idx]);
const auto& [path, slotId] = _inputs[idx];
DebugPrinter::addIdentifier(ret, slotId);
ret.emplace_back("=");
ret.emplace_back(value::pathToString(path));
}
ret.emplace_back("`]");
ret.emplace_back(value::pathToString(_pathReqs[idx]));
ret.emplace_back(DebugPrinter::Block("outputs[`"));
for (size_t idx = 0; idx < _outputs.size(); ++idx) {
if (idx) {
ret.emplace_back(DebugPrinter::Block("`,"));
}
const auto& [path, slotId] = _outputs[idx];
DebugPrinter::addIdentifier(ret, slotId);
ret.emplace_back("=");
ret.emplace_back(value::pathToString(path));
}
ret.emplace_back("`]");

View File

@ -44,22 +44,29 @@
namespace mongo::sbe {
using PathSlot = std::pair<value::Path, value::SlotId>;
/**
* Given an input stage `input` with a single slot `inputSlotId` containing an object and a set of
* requested paths `pathReqs`, populate output slots `outputSlotIds` with the value at each path.
* 'ExtractFieldPathsStage' takes as input:
*
* This stage is used to evaluate ExpressionFieldPath's in a single walk over the input object.
* - A list of input (path, slotId) pairs. These represent the input slot accessors, and their
* location in the path tree. There is either a single (path, slotId) pair that holds the
* entire result, or otherwise there is a list of slots for toplevel fields.
*
* Debug string representation:
* extract_field_paths `inputSlotId` pathReqs[outputSlotIds[i] = pathReqs[i], ... ,
* outputSlotIds[N] = pathReqs[N]]
* - A list of output (path, slotId) pairs. These represent the output slot accessors, and their
* location in the path tree. There can be arbitrarily many outputs.
*
* The output slot(s) are populated in a single pass over the input slot(s).
*
* Example debug string representation:
*
* extract_field_paths inputs[s4 = Get(a)/Id, s5 = Get(b)/Id] outputs[s6 =
* Get(a)/Traverse/Get(c)/Id, s7 = Get(b)/Traverse/Get(d)/Id, s8 = Get(b)/Traverse/Get(e)/Id]
*/
class ExtractFieldPathsStage final : public PlanStage {
public:
ExtractFieldPathsStage(std::unique_ptr<PlanStage> input,
value::SlotId inputSlotId,
std::vector<value::Path> pathReqs,
value::SlotVector outputSlotIds,
std::vector<PathSlot> inputs,
std::vector<PathSlot> outputs,
PlanNodeId planNodeId,
bool participateInTrialRunTracking = true);
@ -89,11 +96,9 @@ protected:
private:
void constructRoot();
const value::SlotId _inputSlotId;
const std::vector<value::Path> _pathReqs;
const value::SlotVector _outputSlotIds;
const std::vector<PathSlot> _inputs;
const std::vector<PathSlot> _outputs;
value::SlotAccessor* _inputAccessor = nullptr;
std::unique_ptr<value::ObjectWalkNode<value::ScalarProjectionPositionInfoRecorder>> _root =
nullptr;
std::vector<value::OwnedValueAccessor> _outputAccessors;

View File

@ -46,53 +46,4 @@ std::unique_ptr<CellBlock> MaterializedCellBlock::clone() const {
ret->_filterPosInfo = _filterPosInfo;
return ret;
}
std::string pathToString(const Path& p) {
std::string out;
size_t idx = 0;
for (auto& component : p) {
if (holds_alternative<Id>(component)) {
out += "Id";
} else if (holds_alternative<Get>(component)) {
out += "Get(";
out += get<Get>(component).field;
out += ')';
} else if (holds_alternative<Traverse>(component)) {
out += "Traverse";
}
++idx;
if (idx != p.size()) {
out.push_back('/');
}
}
return out;
}
std::ostream& operator<<(std::ostream& os, const Path& path) {
os << pathToString(path);
return os;
};
std::string PathRequest::toString() const {
return str::stream() << (type == kFilter ? "FilterPath" : "ProjectPath") << "("
<< pathToString(path) << ")";
}
StringData PathRequest::getTopLevelField() const {
return get<Get>(path[0]).field;
}
std::string PathRequest::getFullPath() const {
StringBuilder sb;
for (const auto& component : path) {
if (holds_alternative<Get>(component)) {
if (sb.len() != 0) {
sb.append(".");
}
sb.append(get<Get>(component).field);
}
}
return sb.str();
}
} // namespace mongo::sbe::value

View File

@ -32,6 +32,7 @@
#include "mongo/db/exec/sbe/values/block_interface.h"
#include "mongo/db/exec/sbe/values/bson.h"
#include "mongo/db/exec/sbe/values/cell_interface.h"
#include "mongo/db/exec/sbe/values/slot.h"
#include "mongo/db/exec/sbe/values/util.h"
#include "mongo/db/exec/sbe/values/value.h"
@ -171,6 +172,13 @@ struct ObjectWalkNode {
FilterPositionInfoRecorder* filterRecorder,
ProjectionRecorder* outProjBlockRecorder,
size_t pathIdx = 0);
void addAccessorAtPath(value::SlotAccessor* inputAccessor,
const Path& path,
size_t pathIdx = 0);
// Non-null if and only if this node has a source slot.
value::SlotAccessor* inputAccessor = nullptr;
};
template <class ProjectionRecorder>
@ -213,6 +221,32 @@ void ObjectWalkNode<ProjectionRecorder>::add(const Path& path,
}
}
template <class ProjectionRecorder>
void ObjectWalkNode<ProjectionRecorder>::addAccessorAtPath(value::SlotAccessor* outInputAccessor,
const Path& path,
size_t pathIdx /*= 0*/) {
if (pathIdx == 0) {
// Check some invariants about the path.
tassert(11163706, "Cannot be given empty path", !path.empty());
tassert(11163707, "Path must end with Id", holds_alternative<Id>(path.back()));
}
if (holds_alternative<Get>(path[pathIdx])) {
auto& get = std::get<Get>(path[pathIdx]);
if (auto it = getChildren.find(get.field); it != getChildren.end()) {
it->second->addAccessorAtPath(outInputAccessor, path, pathIdx + 1);
}
} else if (holds_alternative<Traverse>(path[pathIdx])) {
tassert(11163703, "expected nonzero pathIdx", pathIdx != 0);
if (traverseChild) {
traverseChild->addAccessorAtPath(outInputAccessor, path, pathIdx + 1);
}
} else if (holds_alternative<Id>(path[pathIdx])) {
tassert(11163702, "Id must be at end of path", pathIdx == path.size() - 1);
inputAccessor = outInputAccessor;
}
}
template <class ProjectionRecorder, class Cb>
requires std::invocable<Cb&, ObjectWalkNode<ProjectionRecorder>*, TypeTags, Value, const char*>
void walkField(ObjectWalkNode<ProjectionRecorder>* node,

View File

@ -0,0 +1,83 @@
/**
* Copyright (C) 2025-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#include "mongo/db/exec/sbe/values/path_request.h"
#include "mongo/db/exec/sbe/values/value.h"
namespace mongo::sbe::value {
std::string pathToString(const Path& p) {
std::string out;
size_t idx = 0;
for (auto& component : p) {
if (holds_alternative<Id>(component)) {
out += "Id";
} else if (holds_alternative<Get>(component)) {
out += "Get(";
out += get<Get>(component).field;
out += ')';
} else if (holds_alternative<Traverse>(component)) {
out += "Traverse";
}
++idx;
if (idx != p.size()) {
out.push_back('/');
}
}
return out;
}
std::ostream& operator<<(std::ostream& os, const Path& path) {
os << pathToString(path);
return os;
};
std::string PathRequest::toString() const {
return str::stream() << (type == kFilter ? "FilterPath" : "ProjectPath") << "("
<< pathToString(path) << ")";
}
StringData PathRequest::getTopLevelField() const {
return get<Get>(path[0]).field;
}
std::string PathRequest::getFullPath() const {
StringBuilder sb;
for (const auto& component : path) {
if (holds_alternative<Get>(component)) {
if (sb.len() != 0) {
sb.append(".");
}
sb.append(get<Get>(component).field);
}
}
return sb.str();
}
} // namespace mongo::sbe::value

View File

@ -27,6 +27,11 @@
* it in the license file.
*/
#include "mongo/base/string_data.h"
#include <string>
#include <variant>
#pragma once
namespace mongo::sbe::value {

View File

@ -36,29 +36,10 @@
namespace mongo::stage_builder {
bool eligibleForExtractFieldPathsStage(const PlanStageSlots& childStageOutputs) {
if (childStageOutputs.hasBlockOutput()) {
LOGV2_DEBUG(11087206,
3,
"Child stage outputs rejected for ExtractFieldPathsStage",
"reason"_attr = "has block output");
return false;
}
if (!childStageOutputs.hasResultObj()) {
LOGV2_DEBUG(11087207,
3,
"Child stage outputs rejected for ExtractFieldPathsStage",
"reason"_attr = "does not include result object");
return false;
}
return true;
}
boost::optional<PlanStageReqs> makeExtractFieldPathsPlanStageReqs(
StageBuilderState& state,
const std::vector<const Expression*>& expressions,
const PlanStageSlots& childStageOutputs) {
PlanStageReqs extractFieldPathsReqs;
if (!state.ifrContext.getSavedFlagValue(feature_flags::gFeatureFlagExtractFieldPathsSbeStage)) {
LOGV2_DEBUG(11087205,
3,
@ -66,10 +47,15 @@ boost::optional<PlanStageReqs> makeExtractFieldPathsPlanStageReqs(
"reason"_attr = "feature flag is disabled");
return boost::none;
}
if (!eligibleForExtractFieldPathsStage(childStageOutputs)) {
if (childStageOutputs.hasBlockOutput()) {
LOGV2_DEBUG(11087206,
3,
"Child stage outputs rejected for ExtractFieldPathsStage",
"reason"_attr = "has block output");
return boost::none;
}
bool ok = true;
PlanStageReqs extractFieldPathsReqs;
for (const Expression* expression : expressions) {
if (!ok) {
break;
@ -123,49 +109,79 @@ boost::optional<PlanStageReqs> makeExtractFieldPathsPlanStageReqs(
if (!ok) {
return boost::none;
}
if (extractFieldPathsReqs.size() == 0) {
return boost::none;
}
auto childStageOutputsData = childStageOutputs.getSlotNameToIdMap();
for (const std::string& pathExpr : extractFieldPathsReqs.getPathExprs()) {
FieldPath fieldPath{pathExpr};
tassert(11163705,
"expected child stage of extract_field_paths stage to have all required "
"toplevel fields",
childStageOutputs.has({PlanStageSlots::kField, fieldPath.getFieldName(0)}));
}
return boost::make_optional(extractFieldPathsReqs);
}
template <typename T>
sbe::value::Path toPath(const T& fullPath) {
sbe::value::Path ret;
FieldPath fieldPath{fullPath};
for (size_t i = 0; i < fieldPath.getPathLength() - 1; ++i) {
ret.emplace_back(sbe::value::Get{.field = std::string(fieldPath.getFieldName(i))});
ret.emplace_back(sbe::value::Traverse{});
}
// Omit the Traverse for the last path component.
if (fieldPath.getPathLength() != 0) {
ret.emplace_back(sbe::value::Get{
.field = std::string(fieldPath.getFieldName(fieldPath.getPathLength() - 1))});
}
ret.emplace_back(sbe::value::Id{});
return ret;
}
std::pair<SbStage, PlanStageSlots> buildExtractFieldPaths(SbStage stage,
StageBuilderState& state,
const PlanStageSlots& childStageOutputs,
PlanStageReqs& extractFieldPathsReqs,
const PlanNodeId nodeId) {
sbe::value::SlotVector outSlots;
std::vector<sbe::value::Path> pathReqs;
std::vector<std::pair<sbe::value::Path, sbe::value::SlotId>> outputs;
PlanStageSlots extractionOutputs;
for (const std::string& fullPath : extractFieldPathsReqs.getPathExprs()) {
FieldPath fieldPath{fullPath};
tassert(11087200,
"extract_field_paths does not extract toplevel fields that already have slots",
!childStageOutputs.has({PlanStageSlots::kField, fullPath}));
// Create path.
sbe::value::Path path;
for (size_t i = 0; i < fieldPath.getPathLength() - 1; ++i) {
path.emplace_back(sbe::value::Get{.field = std::string(fieldPath.getFieldName(i))});
path.emplace_back(sbe::value::Traverse{});
}
// Omit the Traverse for the last path component.
path.emplace_back(sbe::value::Get{
.field = std::string(fieldPath.getFieldName(fieldPath.getPathLength() - 1))});
path.emplace_back(sbe::value::Id{});
pathReqs.push_back(std::move(path));
// Create slot id for path.
sbe::value::SlotId slot = state.slotId();
outSlots.emplace_back(slot);
outputs.push_back({toPath(fullPath), slot});
extractionOutputs.set(std::pair(PlanStageSlots::kPathExpr, fullPath), SbSlot{slot});
}
tassert(10757507, "expected nonempty outSlots", !outSlots.empty());
auto childResultSlot = childStageOutputs.getResultObj();
return {sbe::makeS<sbe::ExtractFieldPathsStage>(std::move(stage),
childResultSlot.getId(),
pathReqs, // TODO this is by value
std::move(outSlots),
nodeId),
tassert(10757507, "expected nonempty outputs", outputs.size() > 0);
std::vector<std::pair<sbe::value::Path, sbe::value::SlotId>> inputs;
// Extract fields from a set of toplevel field slots.
for (auto& p : childStageOutputs.getSlotNameToIdMap()) {
const PlanStageSlots::UnownedSlotName& slotName = p.first;
if (slotName.first != PlanStageSlots::kField) {
continue;
}
auto path = toPath(slotName.second);
tassert(11163701,
"Expected only toplevel paths as input to extract_field_paths stage",
path.size() == 2);
std::pair<sbe::value::Path, sbe::value::SlotId> input = {path, p.second.getId()};
inputs.push_back(input);
}
tassert(11163700, "Expected nonempty inputs", !inputs.empty());
return {sbe::makeS<sbe::ExtractFieldPathsStage>(std::move(stage), inputs, outputs, nodeId),
extractionOutputs};
}
} // namespace mongo::stage_builder