SERVER-111637 Extract field paths from toplevel field slots (#43046)
GitOrigin-RevId: e4b4517d63ac85b2ae8314212a5bb09579a9743b
This commit is contained in:
parent
8817dd4b1c
commit
9f717e3a48
@ -30,5 +30,3 @@
|
||||
- featureFlagReshardingCloneNoRefresh
|
||||
# TODO SERVER-100132: Remove this once we know the impact of enabling tracing.
|
||||
- featureFlagTracing
|
||||
# TODO SERVER-111747 Re-enable 'featureFlagExtractFieldPathsSbeStage'
|
||||
- featureFlagExtractFieldPathsSbeStage
|
||||
|
||||
@ -18,8 +18,9 @@
|
||||
*/
|
||||
import {getEngine} from "jstests/libs/query/analyze_plan.js";
|
||||
import {getSbePlanStages} from "jstests/libs/query/sbe_explain_helpers.js";
|
||||
import {resultsEq} from "jstests/aggregation/extras/utils.js";
|
||||
|
||||
function runTestWithParameter(documents, pipeline, useExtract) {
|
||||
function runTestWithParameter(documents, pipeline, useExtract, numExpectedExtractStages) {
|
||||
db.c.deleteMany({});
|
||||
db.c.insertMany(documents);
|
||||
|
||||
@ -36,8 +37,10 @@ function runTestWithParameter(documents, pipeline, useExtract) {
|
||||
// Verify extract_field_paths stage exists
|
||||
const extractStages = getSbePlanStages(explain, "extract_field_paths");
|
||||
if (useExtract) {
|
||||
assert.eq(extractStages.length, 1, "Should have one extract_field_paths stage");
|
||||
assert.eq(extractStages[0]["stage"], "extract_field_paths", "Stage name should match");
|
||||
assert.eq(extractStages.length, numExpectedExtractStages, "Should have extract_field_paths stage(s)");
|
||||
for (let extractStage of extractStages) {
|
||||
assert.eq(extractStage["stage"], "extract_field_paths", "Stage name should match");
|
||||
}
|
||||
} else {
|
||||
assert.eq(extractStages.length, 0, "Should not have extract_field_paths stage");
|
||||
}
|
||||
@ -46,6 +49,13 @@ function runTestWithParameter(documents, pipeline, useExtract) {
|
||||
return results;
|
||||
}
|
||||
|
||||
function run(documents, pipeline, numExpectedExtractStages) {
|
||||
jsTest.log({"Pipeline": pipeline});
|
||||
const resultsWithExtract = runTestWithParameter(documents, pipeline, true, numExpectedExtractStages);
|
||||
const resultsWithoutExtract = runTestWithParameter(documents, pipeline, false, numExpectedExtractStages);
|
||||
assert(resultsEq(resultsWithExtract, resultsWithoutExtract));
|
||||
}
|
||||
|
||||
const originalFrameworkControl = db.adminCommand({getParameter: 1, internalQueryFrameworkControl: 1});
|
||||
const originalFeatureFlagExtract = db.adminCommand({getParameter: 1, featureFlagExtractFieldPathsSbeStage: 1});
|
||||
|
||||
@ -140,29 +150,8 @@ try {
|
||||
{_id: 75, d: 6, a: {c: 4, b: 2}},
|
||||
];
|
||||
|
||||
const projects = [
|
||||
{x: "$a", y: "$a.b"},
|
||||
{x: "$a", y: "$a.c"},
|
||||
{x: "$a", y: "$b.c"},
|
||||
{x: "$a.a", y: "$a.b", z: "$a.c"},
|
||||
{x: "$a.a", y: "$a.b"},
|
||||
{x: "$a.a"},
|
||||
{x: "$a.a.a", y: "$a.a.b"},
|
||||
{x: "$a.a.a", y: "$a.b.a"},
|
||||
{x: "$a.a.a"},
|
||||
{x: "$a.a.a.a"},
|
||||
{x: "$a.a.b", y: "$a.a.c"},
|
||||
{x: "$a.a.b"},
|
||||
{x: "$a.b", y: "$a.c", z: "$d"},
|
||||
{x: "$a.b", y: "$a.c"},
|
||||
{x: "$a.b"},
|
||||
{x: "$a.b.c", y: "$a.b.d"},
|
||||
{x: "$a.b.c", y: "$a.d.e"},
|
||||
{x: "$a.b.c"},
|
||||
{x: "$a.c"},
|
||||
];
|
||||
|
||||
const fieldPaths = [
|
||||
"$a",
|
||||
"$a.a",
|
||||
"$a.a.a",
|
||||
"$a.a.a.a",
|
||||
@ -174,46 +163,76 @@ try {
|
||||
"$a.b.d",
|
||||
"$a.c",
|
||||
"$a.d.e",
|
||||
"$b",
|
||||
"$b.c",
|
||||
"$c",
|
||||
"$d",
|
||||
];
|
||||
|
||||
jsTest.log("Running $projects");
|
||||
for (let projIndex = 0; projIndex < projects.length; projIndex++) {
|
||||
const project = projects[projIndex];
|
||||
const pipeline = [{$project: project}, {$sort: {_id: 1}}];
|
||||
for (let fp0 of fieldPaths) {
|
||||
for (let fp1 of fieldPaths) {
|
||||
const indexField = fp0.replace("$", "");
|
||||
const coveredPlanExpectExtractStage = fp0.includes(".");
|
||||
|
||||
const resultsWithExtract = runTestWithParameter(documents, pipeline, true);
|
||||
const resultsWithoutExtract = runTestWithParameter(documents, pipeline, false);
|
||||
// Test $match then $project with covered plan.
|
||||
assert.commandWorked(db.c.createIndex({[indexField]: 1}));
|
||||
const coveredIndexPipeline = [{$match: {[indexField]: {$gt: 0}}}, {$project: {x: fp0, _id: 0}}];
|
||||
jsTest.log({"coveredIndexPipeline": coveredIndexPipeline});
|
||||
run(documents, coveredIndexPipeline, coveredPlanExpectExtractStage ? 1 : 0);
|
||||
|
||||
for (let i = 0; i < resultsWithExtract.length; i++) {
|
||||
assert.docEq(resultsWithExtract[i], resultsWithoutExtract[i]);
|
||||
}
|
||||
}
|
||||
// Test $match then $project with fetch plan.
|
||||
const fetchPlanExpectExtractStage = fp1.includes(".");
|
||||
const fetchIndexPipeline = [
|
||||
{$match: {[indexField]: {$gt: 0}}},
|
||||
{$project: {x: fp1 /*use the other field*/, _id: 0}},
|
||||
];
|
||||
jsTest.log({"fetchIndexPipeline": fetchIndexPipeline});
|
||||
run(documents, fetchIndexPipeline, fetchPlanExpectExtractStage ? 1 : 0);
|
||||
|
||||
jsTest.log("Running $groups");
|
||||
let seenExtract = false;
|
||||
for (let keyPath of fieldPaths) {
|
||||
for (let accPath of fieldPaths) {
|
||||
const pipeline = {$group: {_id: {path: keyPath}, pathSum: {$sum: accPath}}};
|
||||
// TODO SERVER-111637 revisit this try/catch. Some of these plans do not feed a result obj
|
||||
// slot into what would be the extract_field_paths stage, so the "uses extract_field_paths
|
||||
// stage assertion" can fail. We expect SERVER-111637 will resolve all these cases.
|
||||
try {
|
||||
const resultsWithExtract = runTestWithParameter(documents, pipeline, true);
|
||||
const resultsWithoutExtract = runTestWithParameter(documents, pipeline, false);
|
||||
assert(resultsWithExtract.length > 0);
|
||||
assert(resultsWithoutExtract.length > 0);
|
||||
for (let i = 0; i < resultsWithExtract.length; i++) {
|
||||
assert.docEq(resultsWithExtract[i], resultsWithoutExtract[i]);
|
||||
}
|
||||
seenExtract = true;
|
||||
jsTest.log({"Pipeline used extract": pipeline});
|
||||
} catch {
|
||||
jsTest.log({"Pipeline did not use extract": pipeline});
|
||||
assert(db.c.getIndexes().length > 1, "Index should still exist");
|
||||
assert.commandWorked(db.c.dropIndex({[indexField]: 1}));
|
||||
assert(db.c.getIndexes().length === 1, "Only _id index should still exist");
|
||||
|
||||
// Test $group and $project.
|
||||
const hasDottedPaths = fp0.includes(".") || fp1.includes(".");
|
||||
const oneExtractStagePipelines = [
|
||||
[{$project: {x: fp0, y: fp1}}],
|
||||
[{$group: {_id: {path: fp0}, pathSum: {$sum: fp1}}}],
|
||||
];
|
||||
for (let pipeline of oneExtractStagePipelines) {
|
||||
run(documents, pipeline, hasDottedPaths ? 1 : 0 /*numExpectedExtractStages*/);
|
||||
}
|
||||
|
||||
// Test $group then $project and $project then $group.
|
||||
const twoExtractStagePipelines = [
|
||||
{
|
||||
pipeline: [{$project: {x: fp0, y: fp1}}, {$group: {_id: {path: "$x"}, pathSum: {$sum: "$y"}}}],
|
||||
numExpectedExtractStages: 1,
|
||||
numExpectedExtractStagesNoDottedPaths: 0,
|
||||
},
|
||||
{
|
||||
pipeline: [
|
||||
{$group: {_id: {path: fp0}, pathSum: {$sum: fp1}}},
|
||||
{$project: {x: "$_id.path", total: "$pathSum"}},
|
||||
],
|
||||
numExpectedExtractStages: 2,
|
||||
numExpectedExtractStagesNoDottedPaths: 1,
|
||||
},
|
||||
];
|
||||
jsTest.log({"twoExtractStagePipelines": twoExtractStagePipelines});
|
||||
for (let i = 0; i < twoExtractStagePipelines.length; i++) {
|
||||
const pipeline = twoExtractStagePipelines[i].pipeline;
|
||||
const numExpectedExtractStages = twoExtractStagePipelines[i].numExpectedExtractStages;
|
||||
const numExpectedExtractStagesNoDottedPaths =
|
||||
twoExtractStagePipelines[i].numExpectedExtractStagesNoDottedPaths;
|
||||
run(
|
||||
documents,
|
||||
pipeline,
|
||||
hasDottedPaths ? numExpectedExtractStages : numExpectedExtractStagesNoDottedPaths,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
assert.eq(seenExtract, true, "expected at least one $group pipeline to use extract stage");
|
||||
|
||||
jsTest.log("All ExtractFieldPathsStage tests completed successfully!");
|
||||
} finally {
|
||||
|
||||
@ -2220,6 +2220,7 @@ mongo_cc_library(
|
||||
"//src/mongo/db/exec/sbe/values:cell_interface.cpp",
|
||||
"//src/mongo/db/exec/sbe/values:key_string_entry.cpp",
|
||||
"//src/mongo/db/exec/sbe/values:object_walk_node.cpp",
|
||||
"//src/mongo/db/exec/sbe/values:path_request.cpp",
|
||||
"//src/mongo/db/exec/sbe/values:row.cpp",
|
||||
"//src/mongo/db/exec/sbe/values:slot_printer.cpp",
|
||||
"//src/mongo/db/exec/sbe/values:ts_block.cpp",
|
||||
|
||||
@ -117,8 +117,19 @@ public:
|
||||
auto pathReqs = makePathReqs(paths);
|
||||
value::SlotVector outputSlots = generateMultipleSlotIds(pathReqs.size());
|
||||
|
||||
// A single input slot that holds the entire object.
|
||||
std::vector<PathSlot> inputs = {{{Id{}}, scanSlots[0]}};
|
||||
std::vector<PathSlot> outputs;
|
||||
outputs.reserve(pathReqs.size());
|
||||
tassert(11163704,
|
||||
"expected an output slot for each path req",
|
||||
pathReqs.size() == outputSlots.size());
|
||||
for (size_t i = 0; i < pathReqs.size(); ++i) {
|
||||
outputs.emplace_back(std::make_pair(pathReqs[i], outputSlots[i]));
|
||||
}
|
||||
|
||||
auto extractFieldPathsStage = makeS<ExtractFieldPathsStage>(
|
||||
std::move(scanStage), scanSlots[0], pathReqs, outputSlots, kEmptyPlanNodeId);
|
||||
std::move(scanStage), inputs, outputs, kEmptyPlanNodeId);
|
||||
|
||||
return std::make_pair(outputSlots, std::move(extractFieldPathsStage));
|
||||
};
|
||||
@ -138,6 +149,80 @@ TEST_F(ExtractFieldPathsStageTest, SinglePathNonNestedNonArrayTest) {
|
||||
runExtractFieldPathsTest(paths, inputs, outputs);
|
||||
}
|
||||
|
||||
TEST_F(ExtractFieldPathsStageTest, SinglePathNonNestedNonArrayToplevelFieldSlotTest) {
|
||||
// `inputBab` is an array of subarrays. Each subarray has an element for each input slot.
|
||||
BSONArrayBuilder inputBab;
|
||||
inputBab << BSON_ARRAY(1) << BSON_ARRAY(2);
|
||||
auto [inputTag, inputVal] = stage_builder::makeValue(inputBab.arr());
|
||||
value::ValueGuard inputGuard{inputTag, inputVal};
|
||||
// `outputBab` is an array of subarrays. Each subarray has an element for each output slot.
|
||||
BSONArrayBuilder outputBab;
|
||||
outputBab << BSON_ARRAY(1) << BSON_ARRAY(2);
|
||||
auto [expectedTag, expectedVal] = stage_builder::makeValue(outputBab.arr());
|
||||
value::ValueGuard expectedGuard{expectedTag, expectedVal};
|
||||
auto makeStageFn = [&, this](value::SlotVector scanSlots,
|
||||
std::unique_ptr<PlanStage> scanStage) {
|
||||
std::vector<FieldPath> paths{"a"};
|
||||
auto pathReqs = makePathReqs(paths);
|
||||
value::SlotVector outputSlots = generateMultipleSlotIds(pathReqs.size());
|
||||
std::vector<PathSlot> inputs;
|
||||
for (size_t i = 0; i < scanSlots.size(); ++i) {
|
||||
// Associate each input slot with its corresponding toplevel field path.
|
||||
tassert(11163708, "input path not toplevel", pathReqs[i].size() == 2);
|
||||
inputs.emplace_back(pathReqs[i], scanSlots[i]);
|
||||
}
|
||||
std::vector<PathSlot> outputs;
|
||||
outputs.reserve(pathReqs.size());
|
||||
for (size_t i = 0; i < pathReqs.size(); ++i) {
|
||||
// Associate each output slot with its corresponding path.
|
||||
outputs.emplace_back(std::make_pair(pathReqs[i], outputSlots[i]));
|
||||
}
|
||||
auto extractFieldPathsStage =
|
||||
makeS<ExtractFieldPathsStage>(std::move(scanStage), inputs, outputs, kEmptyPlanNodeId);
|
||||
return std::make_pair(outputSlots, std::move(extractFieldPathsStage));
|
||||
};
|
||||
inputGuard.reset();
|
||||
expectedGuard.reset();
|
||||
runTestMulti(1, inputTag, inputVal, expectedTag, expectedVal, makeStageFn);
|
||||
}
|
||||
|
||||
TEST_F(ExtractFieldPathsStageTest, SingleToplevelFieldSlotNestedPathTest) {
|
||||
// `inputBab` is an array of subarrays. Each subarray has an element for each input slot.
|
||||
BSONArrayBuilder inputBab;
|
||||
inputBab << BSON_ARRAY(BSON("b" << 1)) << BSON_ARRAY(BSON("b" << 2));
|
||||
auto [inputTag, inputVal] = stage_builder::makeValue(inputBab.arr());
|
||||
value::ValueGuard inputGuard{inputTag, inputVal};
|
||||
// `outputBab` is an array of subarrays. Each subarray has an element for each output slot.
|
||||
BSONArrayBuilder outputBab;
|
||||
outputBab << BSON_ARRAY(1) << BSON_ARRAY(2);
|
||||
auto [expectedTag, expectedVal] = stage_builder::makeValue(outputBab.arr());
|
||||
value::ValueGuard expectedGuard{expectedTag, expectedVal};
|
||||
auto makeStageFn = [&, this](value::SlotVector scanSlots,
|
||||
std::unique_ptr<PlanStage> scanStage) {
|
||||
std::vector<FieldPath> paths{"a.b"};
|
||||
auto pathReqs = makePathReqs(paths);
|
||||
value::SlotVector outputSlots = generateMultipleSlotIds(pathReqs.size());
|
||||
std::vector<PathSlot> inputs;
|
||||
for (size_t i = 0; i < scanSlots.size(); ++i) {
|
||||
// Associate each input slot with its corresponding toplevel field path.
|
||||
Path p = {pathReqs[i][0], Id{}};
|
||||
inputs.emplace_back(p, scanSlots[i]);
|
||||
}
|
||||
std::vector<PathSlot> outputs;
|
||||
outputs.reserve(pathReqs.size());
|
||||
for (size_t i = 0; i < pathReqs.size(); ++i) {
|
||||
// Associate each output slot with its corresponding path.
|
||||
outputs.emplace_back(pathReqs[i], outputSlots[i]);
|
||||
}
|
||||
auto extractFieldPathsStage =
|
||||
makeS<ExtractFieldPathsStage>(std::move(scanStage), inputs, outputs, kEmptyPlanNodeId);
|
||||
return std::make_pair(outputSlots, std::move(extractFieldPathsStage));
|
||||
};
|
||||
inputGuard.reset();
|
||||
expectedGuard.reset();
|
||||
runTestMulti(1, inputTag, inputVal, expectedTag, expectedVal, makeStageFn);
|
||||
}
|
||||
|
||||
TEST_F(ExtractFieldPathsStageTest, MultiPathNonNestedNonArrayTest) {
|
||||
std::vector<FieldPath> paths{"a", "b"};
|
||||
std::vector<std::string> inputs{"{a: 1, b: 3}", "{b: 4, a: 2}"};
|
||||
|
||||
@ -34,33 +34,27 @@
|
||||
|
||||
namespace mongo::sbe {
|
||||
ExtractFieldPathsStage::ExtractFieldPathsStage(std::unique_ptr<PlanStage> input,
|
||||
value::SlotId inputSlotId,
|
||||
std::vector<value::Path> pathReqs,
|
||||
value::SlotVector outputSlotIds,
|
||||
std::vector<PathSlot> inputs,
|
||||
std::vector<PathSlot> outputs,
|
||||
PlanNodeId planNodeId,
|
||||
bool participateInTrialRunTracking)
|
||||
: PlanStage("extract_field_paths"_sd,
|
||||
nullptr /* yieldPolicy */,
|
||||
planNodeId,
|
||||
participateInTrialRunTracking),
|
||||
_inputSlotId(inputSlotId),
|
||||
_pathReqs(std::move(pathReqs)),
|
||||
_outputSlotIds(std::move(outputSlotIds)) {
|
||||
tassert(10984201,
|
||||
"expect pathReqs and outputSlotIds to be equal length",
|
||||
_pathReqs.size() == _outputSlotIds.size());
|
||||
_inputs(std::move(inputs)),
|
||||
_outputs(std::move(outputs)) {
|
||||
|
||||
for (size_t i = 0; i < _outputSlotIds.size(); i++) {
|
||||
_outputAccessorsIdxForSlotId[_outputSlotIds[i]] = i;
|
||||
for (size_t i = 0; i < _outputs.size(); i++) {
|
||||
_outputAccessorsIdxForSlotId[_outputs[i].second] = i;
|
||||
}
|
||||
_children.emplace_back(std::move(input));
|
||||
}
|
||||
|
||||
std::unique_ptr<PlanStage> ExtractFieldPathsStage::clone() const {
|
||||
return std::make_unique<ExtractFieldPathsStage>(_children[0]->clone(),
|
||||
_inputSlotId,
|
||||
_pathReqs,
|
||||
_outputSlotIds,
|
||||
_inputs,
|
||||
_outputs,
|
||||
_commonStats.nodeId,
|
||||
participateInTrialRunTracking());
|
||||
}
|
||||
@ -68,11 +62,12 @@ std::unique_ptr<PlanStage> ExtractFieldPathsStage::clone() const {
|
||||
void ExtractFieldPathsStage::constructRoot() {
|
||||
_root = std::make_unique<value::ObjectWalkNode<value::ScalarProjectionPositionInfoRecorder>>();
|
||||
|
||||
_recorders.reserve(_pathReqs.size());
|
||||
for (size_t i = 0; i < _pathReqs.size(); ++i) {
|
||||
_recorders.reserve(_outputs.size());
|
||||
for (size_t i = 0; i < _outputs.size(); ++i) {
|
||||
_recorders.emplace_back();
|
||||
_root->add(
|
||||
_pathReqs[i], nullptr /* filterRecorder */, &_recorders.back() /* outProjRecorder */);
|
||||
_root->add(_outputs[i].first,
|
||||
nullptr /* filterRecorder */,
|
||||
&_recorders.back() /* outProjRecorder */);
|
||||
}
|
||||
}
|
||||
|
||||
@ -81,8 +76,11 @@ void ExtractFieldPathsStage::prepare(CompileCtx& ctx) {
|
||||
|
||||
constructRoot();
|
||||
|
||||
_outputAccessors.resize(_pathReqs.size());
|
||||
_inputAccessor = _children[0]->getAccessor(ctx, _inputSlotId);
|
||||
_outputAccessors.resize(_outputs.size());
|
||||
for (const auto& [path, slotId] : _inputs) {
|
||||
auto inputAccessor = _children[0]->getAccessor(ctx, slotId);
|
||||
_root->addAccessorAtPath(inputAccessor, path);
|
||||
}
|
||||
}
|
||||
|
||||
value::SlotAccessor* ExtractFieldPathsStage::getAccessor(CompileCtx& ctx, value::SlotId slot) {
|
||||
@ -122,20 +120,36 @@ PlanState ExtractFieldPathsStage::getNext() {
|
||||
return trackPlanState(state);
|
||||
}
|
||||
|
||||
auto [inputTag, inputVal] = _inputAccessor->getViewOfValue();
|
||||
value::walkObj<value::ScalarProjectionPositionInfoRecorder>(
|
||||
_root.get(),
|
||||
inputTag,
|
||||
inputVal,
|
||||
value::bitcastTo<const char*>(inputVal),
|
||||
[](value::ObjectWalkNode<value::ScalarProjectionPositionInfoRecorder>* node,
|
||||
value::TypeTags eltTag,
|
||||
value::Value eltVal,
|
||||
const char* bsonPtr) {
|
||||
if (auto rec = node->projRecorder) {
|
||||
rec->recordValue(eltTag, eltVal);
|
||||
auto walk = [](value::ObjectWalkNode<value::ScalarProjectionPositionInfoRecorder>* node,
|
||||
value::TypeTags eltTag,
|
||||
value::Value eltVal,
|
||||
const char* bsonPtr) {
|
||||
if (auto rec = node->projRecorder) {
|
||||
rec->recordValue(eltTag, eltVal);
|
||||
}
|
||||
};
|
||||
|
||||
if (_root->inputAccessor) {
|
||||
// Should only be used for unit tests.
|
||||
auto [inputTag, inputVal] = _root->inputAccessor->getViewOfValue();
|
||||
value::walkObj<value::ScalarProjectionPositionInfoRecorder>(
|
||||
_root.get(), inputTag, inputVal, value::bitcastTo<const char*>(inputVal), walk);
|
||||
} else {
|
||||
// Important this is only for toplevel fields. For nested fields, we would need knowledge of
|
||||
// arrayness. We would also need to check for input accessors during the tree traversal.
|
||||
for (const auto& child : _root->getChildren) {
|
||||
const auto& childWalkNode = child.second;
|
||||
if (childWalkNode->inputAccessor) {
|
||||
auto [childTag, childVal] = childWalkNode->inputAccessor->getViewOfValue();
|
||||
value::walkField<value::ScalarProjectionPositionInfoRecorder>(
|
||||
childWalkNode.get(),
|
||||
childTag,
|
||||
childVal,
|
||||
value::bitcastTo<const char*>(childVal),
|
||||
walk);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Consume all outputs
|
||||
for (size_t i = 0; i < _recorders.size(); ++i) {
|
||||
@ -169,16 +183,27 @@ const SpecificStats* ExtractFieldPathsStage::getSpecificStats() const {
|
||||
std::vector<DebugPrinter::Block> ExtractFieldPathsStage::debugPrint() const {
|
||||
auto ret = PlanStage::debugPrint();
|
||||
|
||||
DebugPrinter::addIdentifier(ret, _inputSlotId);
|
||||
ret.emplace_back(DebugPrinter::Block("pathReqs[`"));
|
||||
for (size_t idx = 0; idx < _pathReqs.size(); ++idx) {
|
||||
ret.emplace_back(DebugPrinter::Block("inputs[`"));
|
||||
for (size_t idx = 0; idx < _inputs.size(); ++idx) {
|
||||
if (idx) {
|
||||
ret.emplace_back(DebugPrinter::Block("`,"));
|
||||
}
|
||||
DebugPrinter::addIdentifier(ret, _outputSlotIds[idx]);
|
||||
const auto& [path, slotId] = _inputs[idx];
|
||||
DebugPrinter::addIdentifier(ret, slotId);
|
||||
ret.emplace_back("=");
|
||||
ret.emplace_back(value::pathToString(path));
|
||||
}
|
||||
ret.emplace_back("`]");
|
||||
|
||||
ret.emplace_back(value::pathToString(_pathReqs[idx]));
|
||||
ret.emplace_back(DebugPrinter::Block("outputs[`"));
|
||||
for (size_t idx = 0; idx < _outputs.size(); ++idx) {
|
||||
if (idx) {
|
||||
ret.emplace_back(DebugPrinter::Block("`,"));
|
||||
}
|
||||
const auto& [path, slotId] = _outputs[idx];
|
||||
DebugPrinter::addIdentifier(ret, slotId);
|
||||
ret.emplace_back("=");
|
||||
ret.emplace_back(value::pathToString(path));
|
||||
}
|
||||
ret.emplace_back("`]");
|
||||
|
||||
|
||||
@ -44,22 +44,29 @@
|
||||
|
||||
|
||||
namespace mongo::sbe {
|
||||
using PathSlot = std::pair<value::Path, value::SlotId>;
|
||||
/**
|
||||
* Given an input stage `input` with a single slot `inputSlotId` containing an object and a set of
|
||||
* requested paths `pathReqs`, populate output slots `outputSlotIds` with the value at each path.
|
||||
* 'ExtractFieldPathsStage' takes as input:
|
||||
*
|
||||
* This stage is used to evaluate ExpressionFieldPath's in a single walk over the input object.
|
||||
* - A list of input (path, slotId) pairs. These represent the input slot accessors, and their
|
||||
* location in the path tree. There is either a single (path, slotId) pair that holds the
|
||||
* entire result, or otherwise there is a list of slots for toplevel fields.
|
||||
*
|
||||
* Debug string representation:
|
||||
* extract_field_paths `inputSlotId` pathReqs[outputSlotIds[i] = pathReqs[i], ... ,
|
||||
* outputSlotIds[N] = pathReqs[N]]
|
||||
* - A list of output (path, slotId) pairs. These represent the output slot accessors, and their
|
||||
* location in the path tree. There can be arbitrarily many outputs.
|
||||
*
|
||||
* The output slot(s) are populated in a single pass over the input slot(s).
|
||||
*
|
||||
* Example debug string representation:
|
||||
*
|
||||
* extract_field_paths inputs[s4 = Get(a)/Id, s5 = Get(b)/Id] outputs[s6 =
|
||||
* Get(a)/Traverse/Get(c)/Id, s7 = Get(b)/Traverse/Get(d)/Id, s8 = Get(b)/Traverse/Get(e)/Id]
|
||||
*/
|
||||
class ExtractFieldPathsStage final : public PlanStage {
|
||||
public:
|
||||
ExtractFieldPathsStage(std::unique_ptr<PlanStage> input,
|
||||
value::SlotId inputSlotId,
|
||||
std::vector<value::Path> pathReqs,
|
||||
value::SlotVector outputSlotIds,
|
||||
std::vector<PathSlot> inputs,
|
||||
std::vector<PathSlot> outputs,
|
||||
PlanNodeId planNodeId,
|
||||
bool participateInTrialRunTracking = true);
|
||||
|
||||
@ -89,11 +96,9 @@ protected:
|
||||
private:
|
||||
void constructRoot();
|
||||
|
||||
const value::SlotId _inputSlotId;
|
||||
const std::vector<value::Path> _pathReqs;
|
||||
const value::SlotVector _outputSlotIds;
|
||||
const std::vector<PathSlot> _inputs;
|
||||
const std::vector<PathSlot> _outputs;
|
||||
|
||||
value::SlotAccessor* _inputAccessor = nullptr;
|
||||
std::unique_ptr<value::ObjectWalkNode<value::ScalarProjectionPositionInfoRecorder>> _root =
|
||||
nullptr;
|
||||
std::vector<value::OwnedValueAccessor> _outputAccessors;
|
||||
|
||||
@ -46,53 +46,4 @@ std::unique_ptr<CellBlock> MaterializedCellBlock::clone() const {
|
||||
ret->_filterPosInfo = _filterPosInfo;
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::string pathToString(const Path& p) {
|
||||
std::string out;
|
||||
size_t idx = 0;
|
||||
for (auto& component : p) {
|
||||
if (holds_alternative<Id>(component)) {
|
||||
out += "Id";
|
||||
} else if (holds_alternative<Get>(component)) {
|
||||
out += "Get(";
|
||||
out += get<Get>(component).field;
|
||||
out += ')';
|
||||
} else if (holds_alternative<Traverse>(component)) {
|
||||
out += "Traverse";
|
||||
}
|
||||
++idx;
|
||||
|
||||
if (idx != p.size()) {
|
||||
out.push_back('/');
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const Path& path) {
|
||||
os << pathToString(path);
|
||||
return os;
|
||||
};
|
||||
|
||||
std::string PathRequest::toString() const {
|
||||
return str::stream() << (type == kFilter ? "FilterPath" : "ProjectPath") << "("
|
||||
<< pathToString(path) << ")";
|
||||
}
|
||||
|
||||
StringData PathRequest::getTopLevelField() const {
|
||||
return get<Get>(path[0]).field;
|
||||
}
|
||||
|
||||
std::string PathRequest::getFullPath() const {
|
||||
StringBuilder sb;
|
||||
for (const auto& component : path) {
|
||||
if (holds_alternative<Get>(component)) {
|
||||
if (sb.len() != 0) {
|
||||
sb.append(".");
|
||||
}
|
||||
sb.append(get<Get>(component).field);
|
||||
}
|
||||
}
|
||||
return sb.str();
|
||||
}
|
||||
} // namespace mongo::sbe::value
|
||||
|
||||
@ -32,6 +32,7 @@
|
||||
#include "mongo/db/exec/sbe/values/block_interface.h"
|
||||
#include "mongo/db/exec/sbe/values/bson.h"
|
||||
#include "mongo/db/exec/sbe/values/cell_interface.h"
|
||||
#include "mongo/db/exec/sbe/values/slot.h"
|
||||
#include "mongo/db/exec/sbe/values/util.h"
|
||||
#include "mongo/db/exec/sbe/values/value.h"
|
||||
|
||||
@ -171,6 +172,13 @@ struct ObjectWalkNode {
|
||||
FilterPositionInfoRecorder* filterRecorder,
|
||||
ProjectionRecorder* outProjBlockRecorder,
|
||||
size_t pathIdx = 0);
|
||||
|
||||
void addAccessorAtPath(value::SlotAccessor* inputAccessor,
|
||||
const Path& path,
|
||||
size_t pathIdx = 0);
|
||||
|
||||
// Non-null if and only if this node has a source slot.
|
||||
value::SlotAccessor* inputAccessor = nullptr;
|
||||
};
|
||||
|
||||
template <class ProjectionRecorder>
|
||||
@ -213,6 +221,32 @@ void ObjectWalkNode<ProjectionRecorder>::add(const Path& path,
|
||||
}
|
||||
}
|
||||
|
||||
template <class ProjectionRecorder>
|
||||
void ObjectWalkNode<ProjectionRecorder>::addAccessorAtPath(value::SlotAccessor* outInputAccessor,
|
||||
const Path& path,
|
||||
size_t pathIdx /*= 0*/) {
|
||||
if (pathIdx == 0) {
|
||||
// Check some invariants about the path.
|
||||
tassert(11163706, "Cannot be given empty path", !path.empty());
|
||||
tassert(11163707, "Path must end with Id", holds_alternative<Id>(path.back()));
|
||||
}
|
||||
|
||||
if (holds_alternative<Get>(path[pathIdx])) {
|
||||
auto& get = std::get<Get>(path[pathIdx]);
|
||||
if (auto it = getChildren.find(get.field); it != getChildren.end()) {
|
||||
it->second->addAccessorAtPath(outInputAccessor, path, pathIdx + 1);
|
||||
}
|
||||
} else if (holds_alternative<Traverse>(path[pathIdx])) {
|
||||
tassert(11163703, "expected nonzero pathIdx", pathIdx != 0);
|
||||
if (traverseChild) {
|
||||
traverseChild->addAccessorAtPath(outInputAccessor, path, pathIdx + 1);
|
||||
}
|
||||
} else if (holds_alternative<Id>(path[pathIdx])) {
|
||||
tassert(11163702, "Id must be at end of path", pathIdx == path.size() - 1);
|
||||
inputAccessor = outInputAccessor;
|
||||
}
|
||||
}
|
||||
|
||||
template <class ProjectionRecorder, class Cb>
|
||||
requires std::invocable<Cb&, ObjectWalkNode<ProjectionRecorder>*, TypeTags, Value, const char*>
|
||||
void walkField(ObjectWalkNode<ProjectionRecorder>* node,
|
||||
|
||||
83
src/mongo/db/exec/sbe/values/path_request.cpp
Normal file
83
src/mongo/db/exec/sbe/values/path_request.cpp
Normal file
@ -0,0 +1,83 @@
|
||||
/**
|
||||
* Copyright (C) 2025-present MongoDB, Inc.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the Server Side Public License, version 1,
|
||||
* as published by MongoDB, Inc.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* Server Side Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the Server Side Public License
|
||||
* along with this program. If not, see
|
||||
* <http://www.mongodb.com/licensing/server-side-public-license>.
|
||||
*
|
||||
* As a special exception, the copyright holders give permission to link the
|
||||
* code of portions of this program with the OpenSSL library under certain
|
||||
* conditions as described in each individual source file and distribute
|
||||
* linked combinations including the program with the OpenSSL library. You
|
||||
* must comply with the Server Side Public License in all respects for
|
||||
* all of the code used other than as permitted herein. If you modify file(s)
|
||||
* with this exception, you may extend this exception to your version of the
|
||||
* file(s), but you are not obligated to do so. If you do not wish to do so,
|
||||
* delete this exception statement from your version. If you delete this
|
||||
* exception statement from all source files in the program, then also delete
|
||||
* it in the license file.
|
||||
*/
|
||||
|
||||
#include "mongo/db/exec/sbe/values/path_request.h"
|
||||
|
||||
#include "mongo/db/exec/sbe/values/value.h"
|
||||
|
||||
namespace mongo::sbe::value {
|
||||
std::string pathToString(const Path& p) {
|
||||
std::string out;
|
||||
size_t idx = 0;
|
||||
for (auto& component : p) {
|
||||
if (holds_alternative<Id>(component)) {
|
||||
out += "Id";
|
||||
} else if (holds_alternative<Get>(component)) {
|
||||
out += "Get(";
|
||||
out += get<Get>(component).field;
|
||||
out += ')';
|
||||
} else if (holds_alternative<Traverse>(component)) {
|
||||
out += "Traverse";
|
||||
}
|
||||
++idx;
|
||||
|
||||
if (idx != p.size()) {
|
||||
out.push_back('/');
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const Path& path) {
|
||||
os << pathToString(path);
|
||||
return os;
|
||||
};
|
||||
|
||||
std::string PathRequest::toString() const {
|
||||
return str::stream() << (type == kFilter ? "FilterPath" : "ProjectPath") << "("
|
||||
<< pathToString(path) << ")";
|
||||
}
|
||||
|
||||
StringData PathRequest::getTopLevelField() const {
|
||||
return get<Get>(path[0]).field;
|
||||
}
|
||||
|
||||
std::string PathRequest::getFullPath() const {
|
||||
StringBuilder sb;
|
||||
for (const auto& component : path) {
|
||||
if (holds_alternative<Get>(component)) {
|
||||
if (sb.len() != 0) {
|
||||
sb.append(".");
|
||||
}
|
||||
sb.append(get<Get>(component).field);
|
||||
}
|
||||
}
|
||||
return sb.str();
|
||||
}
|
||||
} // namespace mongo::sbe::value
|
||||
@ -27,6 +27,11 @@
|
||||
* it in the license file.
|
||||
*/
|
||||
|
||||
#include "mongo/base/string_data.h"
|
||||
|
||||
#include <string>
|
||||
#include <variant>
|
||||
|
||||
#pragma once
|
||||
namespace mongo::sbe::value {
|
||||
|
||||
|
||||
@ -36,29 +36,10 @@
|
||||
|
||||
namespace mongo::stage_builder {
|
||||
|
||||
bool eligibleForExtractFieldPathsStage(const PlanStageSlots& childStageOutputs) {
|
||||
if (childStageOutputs.hasBlockOutput()) {
|
||||
LOGV2_DEBUG(11087206,
|
||||
3,
|
||||
"Child stage outputs rejected for ExtractFieldPathsStage",
|
||||
"reason"_attr = "has block output");
|
||||
return false;
|
||||
}
|
||||
if (!childStageOutputs.hasResultObj()) {
|
||||
LOGV2_DEBUG(11087207,
|
||||
3,
|
||||
"Child stage outputs rejected for ExtractFieldPathsStage",
|
||||
"reason"_attr = "does not include result object");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
boost::optional<PlanStageReqs> makeExtractFieldPathsPlanStageReqs(
|
||||
StageBuilderState& state,
|
||||
const std::vector<const Expression*>& expressions,
|
||||
const PlanStageSlots& childStageOutputs) {
|
||||
PlanStageReqs extractFieldPathsReqs;
|
||||
if (!state.ifrContext.getSavedFlagValue(feature_flags::gFeatureFlagExtractFieldPathsSbeStage)) {
|
||||
LOGV2_DEBUG(11087205,
|
||||
3,
|
||||
@ -66,10 +47,15 @@ boost::optional<PlanStageReqs> makeExtractFieldPathsPlanStageReqs(
|
||||
"reason"_attr = "feature flag is disabled");
|
||||
return boost::none;
|
||||
}
|
||||
if (!eligibleForExtractFieldPathsStage(childStageOutputs)) {
|
||||
if (childStageOutputs.hasBlockOutput()) {
|
||||
LOGV2_DEBUG(11087206,
|
||||
3,
|
||||
"Child stage outputs rejected for ExtractFieldPathsStage",
|
||||
"reason"_attr = "has block output");
|
||||
return boost::none;
|
||||
}
|
||||
bool ok = true;
|
||||
PlanStageReqs extractFieldPathsReqs;
|
||||
for (const Expression* expression : expressions) {
|
||||
if (!ok) {
|
||||
break;
|
||||
@ -123,49 +109,79 @@ boost::optional<PlanStageReqs> makeExtractFieldPathsPlanStageReqs(
|
||||
if (!ok) {
|
||||
return boost::none;
|
||||
}
|
||||
|
||||
if (extractFieldPathsReqs.size() == 0) {
|
||||
return boost::none;
|
||||
}
|
||||
|
||||
auto childStageOutputsData = childStageOutputs.getSlotNameToIdMap();
|
||||
for (const std::string& pathExpr : extractFieldPathsReqs.getPathExprs()) {
|
||||
FieldPath fieldPath{pathExpr};
|
||||
tassert(11163705,
|
||||
"expected child stage of extract_field_paths stage to have all required "
|
||||
"toplevel fields",
|
||||
childStageOutputs.has({PlanStageSlots::kField, fieldPath.getFieldName(0)}));
|
||||
}
|
||||
|
||||
return boost::make_optional(extractFieldPathsReqs);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
sbe::value::Path toPath(const T& fullPath) {
|
||||
sbe::value::Path ret;
|
||||
|
||||
FieldPath fieldPath{fullPath};
|
||||
for (size_t i = 0; i < fieldPath.getPathLength() - 1; ++i) {
|
||||
ret.emplace_back(sbe::value::Get{.field = std::string(fieldPath.getFieldName(i))});
|
||||
ret.emplace_back(sbe::value::Traverse{});
|
||||
}
|
||||
// Omit the Traverse for the last path component.
|
||||
if (fieldPath.getPathLength() != 0) {
|
||||
ret.emplace_back(sbe::value::Get{
|
||||
.field = std::string(fieldPath.getFieldName(fieldPath.getPathLength() - 1))});
|
||||
}
|
||||
ret.emplace_back(sbe::value::Id{});
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::pair<SbStage, PlanStageSlots> buildExtractFieldPaths(SbStage stage,
|
||||
StageBuilderState& state,
|
||||
const PlanStageSlots& childStageOutputs,
|
||||
PlanStageReqs& extractFieldPathsReqs,
|
||||
const PlanNodeId nodeId) {
|
||||
sbe::value::SlotVector outSlots;
|
||||
std::vector<sbe::value::Path> pathReqs;
|
||||
std::vector<std::pair<sbe::value::Path, sbe::value::SlotId>> outputs;
|
||||
|
||||
PlanStageSlots extractionOutputs;
|
||||
for (const std::string& fullPath : extractFieldPathsReqs.getPathExprs()) {
|
||||
FieldPath fieldPath{fullPath};
|
||||
tassert(11087200,
|
||||
"extract_field_paths does not extract toplevel fields that already have slots",
|
||||
!childStageOutputs.has({PlanStageSlots::kField, fullPath}));
|
||||
// Create path.
|
||||
sbe::value::Path path;
|
||||
for (size_t i = 0; i < fieldPath.getPathLength() - 1; ++i) {
|
||||
path.emplace_back(sbe::value::Get{.field = std::string(fieldPath.getFieldName(i))});
|
||||
path.emplace_back(sbe::value::Traverse{});
|
||||
}
|
||||
// Omit the Traverse for the last path component.
|
||||
path.emplace_back(sbe::value::Get{
|
||||
.field = std::string(fieldPath.getFieldName(fieldPath.getPathLength() - 1))});
|
||||
path.emplace_back(sbe::value::Id{});
|
||||
pathReqs.push_back(std::move(path));
|
||||
|
||||
// Create slot id for path.
|
||||
sbe::value::SlotId slot = state.slotId();
|
||||
outSlots.emplace_back(slot);
|
||||
outputs.push_back({toPath(fullPath), slot});
|
||||
extractionOutputs.set(std::pair(PlanStageSlots::kPathExpr, fullPath), SbSlot{slot});
|
||||
}
|
||||
tassert(10757507, "expected nonempty outSlots", !outSlots.empty());
|
||||
auto childResultSlot = childStageOutputs.getResultObj();
|
||||
return {sbe::makeS<sbe::ExtractFieldPathsStage>(std::move(stage),
|
||||
childResultSlot.getId(),
|
||||
pathReqs, // TODO this is by value
|
||||
std::move(outSlots),
|
||||
nodeId),
|
||||
tassert(10757507, "expected nonempty outputs", outputs.size() > 0);
|
||||
|
||||
std::vector<std::pair<sbe::value::Path, sbe::value::SlotId>> inputs;
|
||||
// Extract fields from a set of toplevel field slots.
|
||||
for (auto& p : childStageOutputs.getSlotNameToIdMap()) {
|
||||
const PlanStageSlots::UnownedSlotName& slotName = p.first;
|
||||
if (slotName.first != PlanStageSlots::kField) {
|
||||
continue;
|
||||
}
|
||||
auto path = toPath(slotName.second);
|
||||
tassert(11163701,
|
||||
"Expected only toplevel paths as input to extract_field_paths stage",
|
||||
path.size() == 2);
|
||||
std::pair<sbe::value::Path, sbe::value::SlotId> input = {path, p.second.getId()};
|
||||
inputs.push_back(input);
|
||||
}
|
||||
tassert(11163700, "Expected nonempty inputs", !inputs.empty());
|
||||
|
||||
return {sbe::makeS<sbe::ExtractFieldPathsStage>(std::move(stage), inputs, outputs, nodeId),
|
||||
extractionOutputs};
|
||||
}
|
||||
} // namespace mongo::stage_builder
|
||||
|
||||
Loading…
Reference in New Issue
Block a user