SERVER-119737 Join Optimization: A plan stability framework for testing joins (#49647)
GitOrigin-RevId: a3406ff4de8310e3de857c959ae4d03b62760fdb
This commit is contained in:
parent
7f8bd2da70
commit
c73601f049
@ -0,0 +1,27 @@
|
||||
test_kind: js_test
|
||||
|
||||
selector:
|
||||
roots:
|
||||
- jstests/query_golden/join_opt/plan_stability*.js
|
||||
executor:
|
||||
config:
|
||||
shell_options:
|
||||
crashOnInvalidBSONError: ""
|
||||
objcheck: ""
|
||||
eval: |
|
||||
// Keep in sync with query_golden_*.yml.
|
||||
await import("jstests/libs/override_methods/detect_spawning_own_mongod.js");
|
||||
await import("jstests/libs/override_methods/golden_overrides.js");
|
||||
import {beginGoldenTest} from "jstests/libs/begin_golden_test.js";
|
||||
beginGoldenTest("jstests/query_golden/expected_output");
|
||||
// Enforce plain log format until json format is the default.
|
||||
TestData.logFormat = "plain";
|
||||
fixture:
|
||||
class: MongoDFixture
|
||||
mongod_options:
|
||||
set_parameters:
|
||||
enableTestCommands: 1
|
||||
internalEnableJoinOptimization: true
|
||||
# Deterministic sampling is required in order to get
|
||||
# deterministic query plans in plan stability tests
|
||||
internalQuerySamplingBySequentialScan: true
|
||||
@ -2497,6 +2497,22 @@ tasks:
|
||||
vars:
|
||||
suite: query_golden_sharding
|
||||
|
||||
- <<: *task_template
|
||||
name: query_golden_join_optimization_plan_stability
|
||||
tags: [
|
||||
"assigned_to_jira_team_server_query_optimization",
|
||||
"experimental",
|
||||
"blocked_in_query_alias", # We explicitly specify this task in query alias
|
||||
]
|
||||
commands:
|
||||
- func: "do setup"
|
||||
- func: "fetch tpch dataset"
|
||||
vars:
|
||||
scale: 0.1
|
||||
checksum: NaH4xBjZ0Sw4oN7Dk3+I5QeVhmveHLP/rcGYAe9z5pU=
|
||||
- func: "fetch mongodb database tools"
|
||||
- func: "run tests"
|
||||
|
||||
- <<: *task_template
|
||||
name: query_tester_self_test
|
||||
tags: ["assigned_to_jira_team_server_query_optimization", "default"]
|
||||
|
||||
@ -601,6 +601,8 @@ buildvariants:
|
||||
cron: "0 1 * * *" # From the ${project_nightly_cron} parameter.
|
||||
- name: query_golden_join_optimization
|
||||
cron: "0 1 * * *" # From the ${project_nightly_cron} parameter.
|
||||
- name: query_golden_join_optimization_plan_stability
|
||||
cron: "0 1 * * *" # From the ${project_nightly_cron} parameter.
|
||||
- name: query_join_optimization_passthrough_gen
|
||||
cron: "0 1 * * *" # From the ${project_nightly_cron} parameter.
|
||||
- name: query_join_optimization_no_passthrough_with_mongod_gen
|
||||
|
||||
33
jstests/libs/query/tpch_dataset.js
Normal file
33
jstests/libs/query/tpch_dataset.js
Normal file
@ -0,0 +1,33 @@
|
||||
import {Mongorestore} from "jstests/libs/mongodb_database_tools.js";
|
||||
import {checkPauseAfterPopulate} from "jstests/libs/pause_after_populate.js";
|
||||
|
||||
/**
|
||||
* Populate the TPCH dataset by restoring it from a mongorestore archive. This requires the following prerequisites:
|
||||
* - the `mongorestore` tool, accessible on the $PATH.
|
||||
* - the TPC-H dataset, located in a directory named `tpc-h` that is on the same level as the mongodb repository.
|
||||
*
|
||||
* `mongorestore` is part of the "MongoDB Database Tools" package,
|
||||
* available at https://www.mongodb.com/try/download/database-tools
|
||||
*
|
||||
* The TPC-H dataset is available from the `query-benchmark-data` S3 bucket.
|
||||
*
|
||||
* In evergreen, tasks such as `query_golden_join_optimization_plan_stability`
|
||||
* make sure the prerequisites are already in place.
|
||||
*/
|
||||
export function populateTPCHDataset(scale) {
|
||||
const mr = new Mongorestore();
|
||||
const dbName = jsTestName();
|
||||
|
||||
mr.execute({
|
||||
archive: `../tpc-h/tpch-${scale}-normalized.archive.gz`,
|
||||
nsFrom: "tpch.*",
|
||||
nsTo: `${dbName}.*`,
|
||||
drop: true,
|
||||
maintainInsertionOrder: true,
|
||||
gzip: true,
|
||||
});
|
||||
|
||||
checkPauseAfterPopulate();
|
||||
|
||||
return db.getMongo().getDB(dbName);
|
||||
}
|
||||
@ -0,0 +1,91 @@
|
||||
# Introduction
|
||||
|
||||
The plan stability tests for join optimization are golden tests that execute a number of joins against the TPC-H dataset.
|
||||
|
||||
For each pipeline we persist the following in the golden test output:
|
||||
|
||||
- the MQL command, including the base table and the pipeline
|
||||
- a concise representation of the winning plan for the query
|
||||
- execution counters that quantify the effort it took to execute the query in terms of docs and keys examined
|
||||
- data about the resultset, such as the number of rows returned
|
||||
|
||||
## Prerequisites
|
||||
|
||||
This test requires the following:
|
||||
|
||||
- The `mongorestore` tool, accessible on the $PATH. This tool is part of the [MongoDB Database Tools](https://www.mongodb.com/try/download/database-tools) package.
|
||||
|
||||
- The TPC-H dataset, located in a directory named `tpc-h` that is on the same level as the mongodb repository. The dataset is available from the `query-benchmark-data` S3 bucket.
|
||||
|
||||
In evergreen, tasks such as `query_golden_join_optimization_plan_stability` make sure the prerequisites are already in place.
|
||||
|
||||
## Running
|
||||
|
||||
```
|
||||
buildscripts/resmoke.py run --suites=query_golden_join_optimization_plan_stability jstests/query_golden/join_opt/plan_stability_*
|
||||
```
|
||||
|
||||
followed by
|
||||
|
||||
```
|
||||
buildscripts/golden_test.py diff
|
||||
```
|
||||
|
||||
to view any differences as compared to the expected output.
|
||||
|
||||
## Debugging
|
||||
|
||||
To obtain a running, populated MongoDB instance, start `resmoke.py run` with the `--pauseAfterPopulate` option. This will start mongod, load the data and then pause resmoke at
|
||||
the following line:
|
||||
|
||||
```
|
||||
[js_test:plan_stability_join_opt_tpch] [jsTest] TestData.pauseAfterPopulate is set. Pausing indefinitely ...
|
||||
```
|
||||
|
||||
You can then access the MongoDB instance at the default port for testing:
|
||||
|
||||
```
|
||||
mongosh mongodb://127.0.0.1:20000
|
||||
```
|
||||
|
||||
Note that the data will usually be loaded in a database whose name matches the name of the test.
|
||||
|
||||
## A note on the queries
|
||||
|
||||
The queries you see in files, diffs, bug reports may be in various formats, depending on whether they were dumped using JavaScript, python, or some other method.
|
||||
|
||||
Therefore, it is important to obtain the query plan of the query and make sure that what you are seeing locally matches the plan from the bug report.
|
||||
|
||||
The following caveats are currently known:
|
||||
|
||||
### Typing information for timestamps
|
||||
|
||||
Typing information for timestamps is frequently lost, so a query may contain ISO timestamps as strings:
|
||||
|
||||
```
|
||||
{'l_commitdate': {'$lt': '1993-03-17T00:00:00'}}
|
||||
```
|
||||
|
||||
Such a predicate is unlikely to match any rows, so your query will not behave as it originally did.
|
||||
|
||||
You will need to manually convert this into a timestamp:
|
||||
|
||||
```
|
||||
{'l_commitdate': {'$lt': new ISODate('1993-03-17T00:00:00')}}
|
||||
```
|
||||
|
||||
Since the typing information has been lost somewhere along the pipeline, no existing library is available to restore it for you.
|
||||
|
||||
### EJSON output
|
||||
|
||||
Sometimes the query will be provided in EJSON, so you will see:
|
||||
|
||||
```
|
||||
{$regex: {$regex: ... }}
|
||||
```
|
||||
|
||||
in the output.
|
||||
|
||||
mongosh's `aggregate()` does not support EJSON directly, so passing EJSON to it is not going to produce the expected results.
|
||||
|
||||
Either pass this output as `EJSON.parse()` (if your input is a string), `EJSON.deserialize()` (if your input is parsed already) or manually convert it to standard MQL.
|
||||
365
jstests/query_golden/libs/plan_stability_utils.js
Normal file
365
jstests/query_golden/libs/plan_stability_utils.js
Normal file
@ -0,0 +1,365 @@
|
||||
import {getPlanRankerMode} from "jstests/libs/query/cbr_utils.js";
|
||||
import {
|
||||
joinPlanToString,
|
||||
jsonifyMultilineString,
|
||||
newlineBeforeEachStage,
|
||||
trimPlanToStagesAndIndexes,
|
||||
} from "jstests/query_golden/libs/pretty_printers.js";
|
||||
|
||||
export function padNumber(num, width = 6) {
|
||||
return num.toString().padStart(width, " ");
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes an abstract sort effort for the query, defined as
|
||||
* (LOG(nReturned) + 1) * inputStage.nReturned
|
||||
*/
|
||||
export function extractSortEffort(stage) {
|
||||
let effort = 0;
|
||||
|
||||
if (stage.stage === "SORT") {
|
||||
if (stage.inputStage.nReturned > 0) {
|
||||
// We +1 here because log(1) = 0 but the effort is still non-zero.
|
||||
effort += (Math.log(stage.nReturned) + 1) * stage.inputStage.nReturned;
|
||||
}
|
||||
}
|
||||
|
||||
if (stage.inputStage) {
|
||||
effort += extractSortEffort(stage.inputStage);
|
||||
}
|
||||
|
||||
if (stage.inputStages) {
|
||||
for (const inputStage of stage.inputStages) {
|
||||
effort += extractSortEffort(inputStage);
|
||||
}
|
||||
}
|
||||
|
||||
return Math.round(effort);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sum all top-level values for `counter` in the explain.
|
||||
* The drill-down logic is such that we will not take any counters from nested plan stages
|
||||
* into account.
|
||||
*/
|
||||
function sumCounters(tree, counter) {
|
||||
let sum = 0;
|
||||
|
||||
function walk(node) {
|
||||
if (Array.isArray(node)) {
|
||||
for (const item of node) {
|
||||
walk(item);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (node && typeof node === "object") {
|
||||
for (const [key, value] of Object.entries(node)) {
|
||||
if (key === "executionStats") {
|
||||
sum += value[counter];
|
||||
} else if (key === "$lookup" && node[counter] !== undefined) {
|
||||
// The counter is at the same nesting level as the $lookup
|
||||
sum += node[counter].toNumber();
|
||||
}
|
||||
|
||||
// Recurse into child values
|
||||
walk(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
walk(tree);
|
||||
return sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the total number of documents returned by checking the nReturned value of the last plan stage
|
||||
*
|
||||
*/
|
||||
function getNReturned(explain) {
|
||||
const lastStage = explain.stages !== undefined ? explain.stages[explain.stages.length - 1] : explain;
|
||||
let nReturned;
|
||||
|
||||
if (lastStage.executionStats !== undefined) {
|
||||
nReturned = lastStage.executionStats.nReturned;
|
||||
} else if (lastStage["$cursor"] !== undefined) {
|
||||
nReturned = lastStage["$cursor"].executionStats.nReturned;
|
||||
} else if (lastStage.nReturned !== undefined) {
|
||||
nReturned = lastStage.nReturned.toNumber();
|
||||
}
|
||||
assert.neq(nReturned, undefined, `Unable to calculate nReturned from ${tojson(explain)}`);
|
||||
return nReturned;
|
||||
}
|
||||
|
||||
/**
|
||||
* Produce the output for plan stability golden tests that target CBR
|
||||
*/
|
||||
export function runPlanStabilityPipelines(db, collName, pipelines) {
|
||||
let totalPlans = 0;
|
||||
let totalKeys = 0;
|
||||
let totalDocs = 0;
|
||||
let totalSorts = 0;
|
||||
let totalRows = 0;
|
||||
let totalErrors = 0;
|
||||
|
||||
/**
|
||||
* The output of this test is a JSON that contains both the plans and stats for each pipeline
|
||||
* as well as a summary section with totals. The structure of the output is as follows:
|
||||
* {
|
||||
* "pipelines: [
|
||||
* {
|
||||
* ">>>pipeline" : <pipeline>,
|
||||
* "winningPlan": <winningPlan>,
|
||||
* "keys" : <totalKeysExamined>,
|
||||
* "docs" : <totalDocsExamined>,
|
||||
* "sorts" : <sortEffort>,
|
||||
* "plans" : <numberOfPlans>,
|
||||
* "rows" : <nReturned>
|
||||
* },
|
||||
* ...
|
||||
* ],
|
||||
* ">>>totals": {
|
||||
* "keys": <totalKeysExamined>, "docs": <totalDocsExamined>, "sortEffort":
|
||||
* <totalSortEffort>, "plans": <numberOfPlans>, "rows": <nReturned>
|
||||
* }
|
||||
* }
|
||||
*
|
||||
* The sortEffort is an abstract measure of the complexity of any SORT stages, and is
|
||||
* defined as (LOG(nReturned) + 1) * inputStage.nReturned.
|
||||
*/
|
||||
let paramsToRestore;
|
||||
|
||||
// All-feature-flags variants enable CBR even for the
|
||||
// query_golden_classic suite.
|
||||
// Plan stability test running with CBR need the following
|
||||
// knobs. Set them before starting the tests & restore them
|
||||
// after, as the query_golden_classic suite runs other
|
||||
// golden tests, which do not expect these knobs.
|
||||
if (getPlanRankerMode(db) !== "multiPlanning") {
|
||||
// CBR enabled
|
||||
paramsToRestore = assert.commandWorked(
|
||||
db.adminCommand({
|
||||
getParameter: 1,
|
||||
internalQueryPlannerEnableSortIndexIntersection: 1,
|
||||
internalQuerySamplingBySequentialScan: 1,
|
||||
}),
|
||||
);
|
||||
assert.commandWorked(
|
||||
db.adminCommand({
|
||||
setParameter: 1,
|
||||
internalQueryPlannerEnableSortIndexIntersection: true,
|
||||
internalQuerySamplingBySequentialScan: true,
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
try {
|
||||
print('{">>>pipelines":[');
|
||||
pipelines.forEach((pipeline, index) => {
|
||||
// JSON does not allow trailing commas.
|
||||
const separator = index === pipelines.length - 1 ? "" : ",";
|
||||
|
||||
// We print the pipeline here so that, even if the test fails,
|
||||
// we have already emitted the failing pipeline.
|
||||
print(`{">>>pipeline": ${JSON.stringify(pipeline)},`);
|
||||
|
||||
// We do not use explain() as it loses the errmsg in case of an error.
|
||||
const explain = db.runCommand({
|
||||
explain: {aggregate: collName, pipeline: pipeline, cursor: {}},
|
||||
verbosity: "executionStats",
|
||||
});
|
||||
|
||||
const executionStats = explain.executionStats;
|
||||
|
||||
if (explain.ok !== 1) {
|
||||
let error = "unknown error";
|
||||
if (explain.hasOwnProperty("errmsg")) {
|
||||
error = explain.errmsg;
|
||||
} else if (
|
||||
explain.hasOwnProperty("executionStats") &&
|
||||
explain.executionStats.hasOwnProperty("errorMessage")
|
||||
) {
|
||||
error = explain.executionStats.errorMessage;
|
||||
}
|
||||
print(` "error": ${JSON.stringify(error)}}${separator}`);
|
||||
totalErrors++;
|
||||
return;
|
||||
}
|
||||
|
||||
const winningPlan = trimPlanToStagesAndIndexes(explain.queryPlanner.winningPlan);
|
||||
|
||||
const plans = explain.queryPlanner.rejectedPlans.length + 1;
|
||||
totalPlans += plans;
|
||||
|
||||
const keys = executionStats.totalKeysExamined;
|
||||
totalKeys += keys;
|
||||
|
||||
const docs = executionStats.totalDocsExamined;
|
||||
totalDocs += docs;
|
||||
|
||||
const nReturned = executionStats.nReturned;
|
||||
totalRows += nReturned;
|
||||
|
||||
const sorts = extractSortEffort(executionStats.executionStages);
|
||||
totalSorts += sorts;
|
||||
|
||||
print(` "winningPlan": ${JSON.stringify(winningPlan)},`);
|
||||
print(` "keys" : ${padNumber(keys)},`);
|
||||
print(` "docs" : ${padNumber(docs)},`);
|
||||
print(` "sorts": ${padNumber(sorts)},`);
|
||||
print(` "plans": ${padNumber(plans)},`);
|
||||
print(` "rows" : ${padNumber(nReturned)}}${separator}`);
|
||||
print();
|
||||
});
|
||||
print("],");
|
||||
|
||||
print(
|
||||
'">>>totals": {' +
|
||||
`"pipelines": ${pipelines.length}, ` +
|
||||
`"plans": ${totalPlans}, ` +
|
||||
`"keys": ${padNumber(totalKeys)}, ` +
|
||||
`"docs": ${padNumber(totalDocs)}, ` +
|
||||
`"sorts": ${padNumber(totalSorts)}, ` +
|
||||
`"rows": ${padNumber(totalRows)}, ` +
|
||||
`"errors": ${padNumber(totalErrors)}},`,
|
||||
);
|
||||
|
||||
const parameters = {
|
||||
featureFlagCostBasedRanker: null,
|
||||
internalQueryCBRCEMode: null,
|
||||
samplingMarginOfError: null,
|
||||
samplingConfidenceInterval: null,
|
||||
internalQuerySamplingCEMethod: null,
|
||||
internalQuerySamplingBySequentialScan: null,
|
||||
};
|
||||
|
||||
for (const param in parameters) {
|
||||
const result = db.adminCommand({getParameter: 1, [param]: 1});
|
||||
parameters[param] = result[param];
|
||||
}
|
||||
|
||||
if (!(parameters["featureFlagCostBasedRanker"] ?? {})["value"]) {
|
||||
// internalQueryCBRCEMode does not matter unless
|
||||
// CBR is enabled, and is likely to confuse the
|
||||
// reader.
|
||||
delete parameters["internalQueryCBRCEMode"];
|
||||
} else if (parameters["internalQueryCBRCEMode"] === "automaticCE") {
|
||||
const param = "automaticCEPlanRankingStrategy";
|
||||
const result = db.adminCommand({getParameter: 1, [param]: 1});
|
||||
parameters[param] = result[param];
|
||||
}
|
||||
|
||||
print(`">>>parameters": ${JSON.stringify(parameters)}}`);
|
||||
|
||||
jsTest.log.info("See README.plan_stability.md for more information.");
|
||||
} finally {
|
||||
if (paramsToRestore) {
|
||||
// Restore the parameters we changed
|
||||
assert.commandWorked(
|
||||
db.adminCommand(
|
||||
Object.fromEntries([
|
||||
["setParameter", 1],
|
||||
...Object.entries(paramsToRestore)
|
||||
.filter(([k, _]) => k !== "ok" && k !== "operationTime" && k !== "$clusterTime")
|
||||
.map(([param, value]) => [param, typeof value === "string" ? value : value["value"]]),
|
||||
]),
|
||||
),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Produce the output for plan stability golden tests that target Join Optimization
|
||||
*/
|
||||
|
||||
export function runPlanStabilityCommands(db, commands) {
|
||||
let totalKeys = 0;
|
||||
let totalDocs = 0;
|
||||
let totalRows = 0;
|
||||
|
||||
/**
|
||||
* The output of this test is a JSON that contains both the plans and stats for each command
|
||||
* as well as a summary section with totals. The structure of the output is as follows:
|
||||
* {
|
||||
* ">>>commands": [
|
||||
* {
|
||||
* ">>>command" : <command>,
|
||||
* "winningPlan": <winningPlan>,
|
||||
* "keys" : <totalKeysExamined>,
|
||||
* "docs" : <totalDocsExamined>,
|
||||
* "rows" : <nReturned>
|
||||
* },
|
||||
* ...
|
||||
* ],
|
||||
* ">>>totals": {
|
||||
* "keys": <totalKeysExamined>, "docs": <totalDocsExamined>, "rows": <nReturned>
|
||||
* }
|
||||
* }
|
||||
*/
|
||||
|
||||
print('{">>>commands":[');
|
||||
commands.forEach((command, index) => {
|
||||
let stringifiedCommand = JSON.stringify(command);
|
||||
|
||||
for (const commandPrettyPrinter of [newlineBeforeEachStage]) {
|
||||
stringifiedCommand = commandPrettyPrinter(stringifiedCommand);
|
||||
}
|
||||
|
||||
// We print the command here so that, even if the test fails,
|
||||
// we have already emitted the failing command.
|
||||
print(`{">>>command": ${stringifiedCommand},`);
|
||||
|
||||
let commandToRun = {...command};
|
||||
commandToRun["cursor"] = {};
|
||||
delete commandToRun["idx"];
|
||||
|
||||
// We do not use explain() as it loses the errmsg in case of an error.
|
||||
const explain = assert.commandWorked(
|
||||
db.runCommand({
|
||||
explain: commandToRun,
|
||||
verbosity: "executionStats",
|
||||
}),
|
||||
);
|
||||
|
||||
const keys = sumCounters(explain, "totalKeysExamined");
|
||||
totalKeys += keys;
|
||||
|
||||
const docs = sumCounters(explain, "totalDocsExamined");
|
||||
totalDocs += docs;
|
||||
|
||||
const nReturned = getNReturned(explain);
|
||||
totalRows += nReturned;
|
||||
|
||||
// Fish for the query plan that we want to dump
|
||||
const queryPlanner =
|
||||
explain.queryPlanner !== undefined ? explain.queryPlanner : explain.stages[0]["$cursor"].queryPlanner;
|
||||
const winningPlan = queryPlanner.winningPlan;
|
||||
const queryPlan = winningPlan.queryPlan !== undefined ? winningPlan.queryPlan : winningPlan;
|
||||
|
||||
let winningPlanString = joinPlanToString(queryPlan).trimEnd();
|
||||
for (const planPrettyPrinter of [jsonifyMultilineString]) {
|
||||
winningPlanString = planPrettyPrinter(winningPlanString);
|
||||
}
|
||||
|
||||
// JSON does not allow trailing commas, so we need a different separator for the last command
|
||||
const separator = index === commands.length - 1 ? "" : ",";
|
||||
|
||||
print(` "winningPlan": [\n${winningPlanString}],`);
|
||||
print(` "keys" : ${padNumber(keys, 9)},`);
|
||||
print(` "docs" : ${padNumber(docs, 9)},`);
|
||||
print(` "rows" : ${padNumber(nReturned, 9)}}${separator}`);
|
||||
print();
|
||||
});
|
||||
print("],");
|
||||
|
||||
print(
|
||||
'">>>totals": {' +
|
||||
`"commands": ${commands.length}, ` +
|
||||
`"keys": ${padNumber(totalKeys, 9)}, ` +
|
||||
`"docs": ${padNumber(totalDocs, 9)}, ` +
|
||||
`"rows": ${padNumber(totalRows, 9)}}`,
|
||||
);
|
||||
|
||||
print("}");
|
||||
}
|
||||
140
jstests/query_golden/libs/pretty_printers.js
Normal file
140
jstests/query_golden/libs/pretty_printers.js
Normal file
@ -0,0 +1,140 @@
|
||||
const INDENT_SIZE = 2;
|
||||
|
||||
/**
|
||||
* Indent a string by the desired indentation level
|
||||
*/
|
||||
function indentString(indent) {
|
||||
return " ".repeat(indent * INDENT_SIZE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Serialize a single join stage to string
|
||||
*/
|
||||
function joinStageToString(stage, indent) {
|
||||
let result = (function () {
|
||||
switch (stage.stage) {
|
||||
case "HASH_JOIN_EMBEDDING":
|
||||
return "HJ ";
|
||||
case "NESTED_LOOP_JOIN_EMBEDDING":
|
||||
return "NLJ ";
|
||||
case "INDEXED_NESTED_LOOP_JOIN_EMBEDDING":
|
||||
return "INLJ ";
|
||||
default:
|
||||
throw new Error(`Unknown join stage: ${stage.stage}`);
|
||||
}
|
||||
})();
|
||||
result += stage.joinPredicates.join(", ") + "\n";
|
||||
result += indentString(indent + 1) + "-> ";
|
||||
result += `[${stage.leftEmbeddingField}] `;
|
||||
result += joinPlanToString(stage.inputStages[0], indent + 2);
|
||||
result += indentString(indent + 1) + "-> ";
|
||||
result += `[${stage.rightEmbeddingField}] `;
|
||||
result += joinPlanToString(stage.inputStages[1], indent + 2);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Serialize a complete join plan to string
|
||||
*/
|
||||
export function joinPlanToString(stage, indent = 0) {
|
||||
let result = "";
|
||||
let filter = stage.filter && Object.keys(stage.filter).length > 0 ? JSON.stringify(stage.filter) + " " : "";
|
||||
|
||||
switch (stage.stage) {
|
||||
case "HASH_JOIN_EMBEDDING":
|
||||
case "NESTED_LOOP_JOIN_EMBEDDING":
|
||||
case "INDEXED_NESTED_LOOP_JOIN_EMBEDDING":
|
||||
result += joinStageToString(stage, indent);
|
||||
break;
|
||||
case "COLLSCAN":
|
||||
result += `COLLSCAN: ${stage.nss} ${filter}\n`;
|
||||
break;
|
||||
case "FETCH":
|
||||
result += `FETCH: ${stage.nss} ${filter}\n`;
|
||||
result += indentString(indent + 1) + "-> ";
|
||||
result += joinPlanToString(stage.inputStage, indent + 2);
|
||||
break;
|
||||
|
||||
case "IXSCAN":
|
||||
case "EXPRESS_IXSCAN":
|
||||
result += `${stage.stage}: ${stage.nss} ${filter}${stage.indexName} ${JSON.stringify(stage.indexBounds)}\n`;
|
||||
break;
|
||||
case "INDEX_PROBE_NODE":
|
||||
result += `INDEX_PROBE_NODE: ${stage.nss} ${filter}${stage.indexName}\n`;
|
||||
break;
|
||||
case "PROJECTION_SIMPLE":
|
||||
case "SUBPLAN":
|
||||
result += `${stage.stage}\n`;
|
||||
result += indentString(indent + 1) + "-> ";
|
||||
result += joinPlanToString(stage.inputStage, indent + 2);
|
||||
break;
|
||||
case "OR":
|
||||
result += `${stage.stage}\n`;
|
||||
result += indentString(indent + 1) + "-> ";
|
||||
result += joinPlanToString(stage.inputStages[0], indent + 2);
|
||||
result += indentString(indent + 1) + "-> ";
|
||||
result += joinPlanToString(stage.inputStages[1], indent + 2);
|
||||
break;
|
||||
default:
|
||||
throw new Error(`Unknown stage: ${stage.stage}`);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a multiline string in a JSON-compatible format.
|
||||
*
|
||||
* JSON does not natively support multiline strings without the use of \n, so
|
||||
* the only option here is to convert the string into an array so that each
|
||||
* line can be represented as a separate element and printed on a separate line of the output.
|
||||
*
|
||||
* This makes the output both human-readable and a valid JSON
|
||||
*/
|
||||
export function jsonifyMultilineString(str) {
|
||||
// Replace double quotes with single quotes in order to avoid escaping them,
|
||||
// since escaping reduces readability
|
||||
const replacedQuotes = str.replace(/"/g, "'");
|
||||
|
||||
const lines = replacedQuotes.split(/\r?\n/);
|
||||
|
||||
// Wrap each line in quotes and separate with commas, with no trailing comma
|
||||
return lines
|
||||
.map((line, i) => (i === lines.length - 1 ? JSON.stringify(line) : JSON.stringify(line) + ","))
|
||||
.join("\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a pipeline where each of $match, $lookup, $unwind starts on a new line.
|
||||
*
|
||||
* This increases the readability of joins while keeping nested $and, $or, etc. on a single line
|
||||
*/
|
||||
export function newlineBeforeEachStage(str) {
|
||||
return str.replace(/(?=\{"\$(match|lookup|unwind)")/g, "\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* Reduces a query plan in-place to a more compact representation by retaining only the fields
|
||||
* that pertain to stage names, filtering and index usage. This representation is suitable for
|
||||
* CBR golden tests such as plan_stability.js where we want to record the general shape of the
|
||||
* query plan on a single line.
|
||||
*/
|
||||
export function trimPlanToStagesAndIndexes(obj) {
|
||||
const fieldsToKeep = ["stage", "inputStage", "inputStages", "indexName", "indexBounds", "filter"];
|
||||
|
||||
if (typeof obj !== "object" || obj === null) {
|
||||
return obj;
|
||||
}
|
||||
for (let key in obj) {
|
||||
if (!Array.isArray(obj) && !fieldsToKeep.includes(key)) {
|
||||
delete obj[key];
|
||||
} else if (key == "filter") {
|
||||
// Preserve the presence of a filter without retaining the actual expression
|
||||
obj[key] = true;
|
||||
} else {
|
||||
if (typeof obj[key] === "object" && obj[key] !== null && key !== "indexBounds") {
|
||||
trimPlanToStagesAndIndexes(obj[key]);
|
||||
}
|
||||
}
|
||||
}
|
||||
return obj;
|
||||
}
|
||||
@ -1,5 +1,4 @@
|
||||
import {show} from "jstests/libs/golden_test.js";
|
||||
import {getPlanRankerMode} from "jstests/libs/query/cbr_utils.js";
|
||||
import {sequentialIds} from "jstests/query_golden/libs/example_data.js";
|
||||
|
||||
/**
|
||||
@ -24,236 +23,3 @@ export function resetCollection(coll, docs, indexes = []) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reduces a query plan in-place to a more compact representation by retaining only the fields
|
||||
* that pertain to stage names, filtering and index usage. This representation is suitable for
|
||||
* golden tests such as plan_stability.js where we want to record the general shape of the
|
||||
* query plan on a single line.
|
||||
*/
|
||||
export function trimPlanToStagesAndIndexes(obj) {
|
||||
const fieldsToKeep = ["stage", "inputStage", "inputStages", "indexName", "indexBounds", "filter"];
|
||||
|
||||
if (typeof obj !== "object" || obj === null) {
|
||||
return obj;
|
||||
}
|
||||
for (let key in obj) {
|
||||
if (!Array.isArray(obj) && !fieldsToKeep.includes(key)) {
|
||||
delete obj[key];
|
||||
} else if (key == "filter") {
|
||||
// Preserve the presence of a filter without retaining the actual expression
|
||||
obj[key] = true;
|
||||
} else {
|
||||
if (typeof obj[key] === "object" && obj[key] !== null && key !== "indexBounds") {
|
||||
trimPlanToStagesAndIndexes(obj[key]);
|
||||
}
|
||||
}
|
||||
}
|
||||
return obj;
|
||||
}
|
||||
|
||||
export function padNumber(num) {
|
||||
return num.toString().padStart(6, " ");
|
||||
}
|
||||
/**
|
||||
* Computes an abstract sort effort for the query, defined as
|
||||
* (LOG(nReturned) + 1) * inputStage.nReturned
|
||||
*/
|
||||
export function extractSortEffort(stage) {
|
||||
let effort = 0;
|
||||
|
||||
if (stage.stage === "SORT") {
|
||||
if (stage.inputStage.nReturned > 0) {
|
||||
// We +1 here because log(1) = 0 but the effort is still non-zero.
|
||||
effort += (Math.log(stage.nReturned) + 1) * stage.inputStage.nReturned;
|
||||
}
|
||||
}
|
||||
|
||||
if (stage.inputStage) {
|
||||
effort += extractSortEffort(stage.inputStage);
|
||||
}
|
||||
|
||||
if (stage.inputStages) {
|
||||
for (const inputStage of stage.inputStages) {
|
||||
effort += extractSortEffort(inputStage);
|
||||
}
|
||||
}
|
||||
|
||||
return Math.round(effort);
|
||||
}
|
||||
|
||||
export function runPlanStabilityPipelines(db, collName, pipelines) {
|
||||
let totalPlans = 0;
|
||||
let totalKeys = 0;
|
||||
let totalDocs = 0;
|
||||
let totalSorts = 0;
|
||||
let totalRows = 0;
|
||||
let totalErrors = 0;
|
||||
|
||||
/**
|
||||
* The output of this test is a JSON that contains both the plans and stats for each pipeline
|
||||
* as well as a summary section with totals. The structure of the output is as follows:
|
||||
* {
|
||||
* "pipelines: [
|
||||
* {
|
||||
* ">>>pipeline" : <pipeline>,
|
||||
* "winningPlan": <winningPlan>,
|
||||
* "keys" : <totalKeysExamined>,
|
||||
* "docs" : <totalDocsExamined>,
|
||||
* "sorts" : <sortEffort>,
|
||||
* "plans" : <numberOfPlans>,
|
||||
* "rows" : <nReturned>
|
||||
* },
|
||||
* ...
|
||||
* ],
|
||||
* ">>>totals": {
|
||||
* "keys": <totalKeysExamined>, "docs": <totalDocsExamined>, "sortEffort":
|
||||
* <totalSortEffort>, "plans": <numberOfPlans>, "rows": <nReturned>
|
||||
* }
|
||||
* }
|
||||
*
|
||||
* The sortEffort is an abstract measure of the complexity of any SORT stages, and is
|
||||
* defined as (LOG(nReturned) + 1) * inputStage.nReturned.
|
||||
*/
|
||||
|
||||
let paramsToRestore;
|
||||
|
||||
// All-feature-flags variants enable CBR even for the
|
||||
// query_golden_classic suite.
|
||||
// Plan stability test running with CBR need the following
|
||||
// knobs. Set them before starting the tests & restore them
|
||||
// after, as the query_golden_classic suite runs other
|
||||
// golden tests, which do not expect these knobs.
|
||||
if (getPlanRankerMode(db) !== "multiPlanning") {
|
||||
// CBR enabled
|
||||
paramsToRestore = assert.commandWorked(
|
||||
db.adminCommand({
|
||||
getParameter: 1,
|
||||
internalQueryPlannerEnableSortIndexIntersection: 1,
|
||||
internalQuerySamplingBySequentialScan: 1,
|
||||
}),
|
||||
);
|
||||
assert.commandWorked(
|
||||
db.adminCommand({
|
||||
setParameter: 1,
|
||||
internalQueryPlannerEnableSortIndexIntersection: true,
|
||||
internalQuerySamplingBySequentialScan: true,
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
try {
|
||||
print('{">>>pipelines":[');
|
||||
pipelines.forEach((pipeline, index) => {
|
||||
// JSON does not allow trailing commas.
|
||||
const separator = index === pipelines.length - 1 ? "" : ",";
|
||||
|
||||
// We print the pipeline here so that, even if the test fails,
|
||||
// we have already emitted the failing pipeline.
|
||||
print(`{">>>pipeline": ${JSON.stringify(pipeline)},`);
|
||||
|
||||
// We do not use explain() as it loses the errmsg in case of an error.
|
||||
const explain = db.runCommand({
|
||||
explain: {aggregate: collName, pipeline: pipeline, cursor: {}},
|
||||
verbosity: "executionStats",
|
||||
});
|
||||
|
||||
const executionStats = explain.executionStats;
|
||||
|
||||
if (explain.ok !== 1) {
|
||||
let error = "unknown error";
|
||||
if (explain.hasOwnProperty("errmsg")) {
|
||||
error = explain.errmsg;
|
||||
} else if (
|
||||
explain.hasOwnProperty("executionStats") &&
|
||||
explain.executionStats.hasOwnProperty("errorMessage")
|
||||
) {
|
||||
error = explain.executionStats.errorMessage;
|
||||
}
|
||||
print(` "error": ${JSON.stringify(error)}}${separator}`);
|
||||
totalErrors++;
|
||||
return;
|
||||
}
|
||||
|
||||
const winningPlan = trimPlanToStagesAndIndexes(explain.queryPlanner.winningPlan);
|
||||
|
||||
const plans = explain.queryPlanner.rejectedPlans.length + 1;
|
||||
totalPlans += plans;
|
||||
|
||||
const keys = executionStats.totalKeysExamined;
|
||||
totalKeys += keys;
|
||||
|
||||
const docs = executionStats.totalDocsExamined;
|
||||
totalDocs += docs;
|
||||
|
||||
const nReturned = executionStats.nReturned;
|
||||
totalRows += nReturned;
|
||||
|
||||
const sorts = extractSortEffort(executionStats.executionStages);
|
||||
totalSorts += sorts;
|
||||
|
||||
print(` "winningPlan": ${JSON.stringify(winningPlan)},`);
|
||||
print(` "keys" : ${padNumber(keys)},`);
|
||||
print(` "docs" : ${padNumber(docs)},`);
|
||||
print(` "sorts": ${padNumber(sorts)},`);
|
||||
print(` "plans": ${padNumber(plans)},`);
|
||||
print(` "rows" : ${padNumber(nReturned)}}${separator}`);
|
||||
print();
|
||||
});
|
||||
print("],");
|
||||
|
||||
print(
|
||||
'">>>totals": {' +
|
||||
`"pipelines": ${pipelines.length}, ` +
|
||||
`"plans": ${totalPlans}, ` +
|
||||
`"keys": ${padNumber(totalKeys)}, ` +
|
||||
`"docs": ${padNumber(totalDocs)}, ` +
|
||||
`"sorts": ${padNumber(totalSorts)}, ` +
|
||||
`"rows": ${padNumber(totalRows)}, ` +
|
||||
`"errors": ${padNumber(totalErrors)}},`,
|
||||
);
|
||||
|
||||
const parameters = {
|
||||
featureFlagCostBasedRanker: null,
|
||||
internalQueryCBRCEMode: null,
|
||||
samplingMarginOfError: null,
|
||||
samplingConfidenceInterval: null,
|
||||
internalQuerySamplingCEMethod: null,
|
||||
internalQuerySamplingBySequentialScan: null,
|
||||
};
|
||||
|
||||
for (const param in parameters) {
|
||||
const result = db.adminCommand({getParameter: 1, [param]: 1});
|
||||
parameters[param] = result[param];
|
||||
}
|
||||
|
||||
if (!(parameters["featureFlagCostBasedRanker"] ?? {})["value"]) {
|
||||
// internalQueryCBRCEMode does not matter unless
|
||||
// CBR is enabled, and is likely to confuse the
|
||||
// reader.
|
||||
delete parameters["internalQueryCBRCEMode"];
|
||||
} else if (parameters["internalQueryCBRCEMode"] === "automaticCE") {
|
||||
const param = "automaticCEPlanRankingStrategy";
|
||||
const result = db.adminCommand({getParameter: 1, [param]: 1});
|
||||
parameters[param] = result[param];
|
||||
}
|
||||
|
||||
print(`">>>parameters": ${JSON.stringify(parameters)}}`);
|
||||
|
||||
jsTest.log.info("See README.plan_stability.md for more information.");
|
||||
} finally {
|
||||
if (paramsToRestore) {
|
||||
// Restore the parameters we changed
|
||||
assert.commandWorked(
|
||||
db.adminCommand(
|
||||
Object.fromEntries([
|
||||
["setParameter", 1],
|
||||
...Object.entries(paramsToRestore)
|
||||
.filter(([k, _]) => !["ok", "operationTime"].includes(k))
|
||||
.map(([param, value]) => [param, typeof value === "string" ? value : value["value"]]),
|
||||
]),
|
||||
),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -20,7 +20,7 @@ import {checkSbeFullyEnabled} from "jstests/libs/query/sbe_util.js";
|
||||
import {pipelines} from "jstests/query_golden/test_inputs/plan_stability_pipelines.js";
|
||||
import {populateSimplePlanStabilityDataset} from "jstests/query_golden/test_inputs/simple_plan_stability_dataset.js";
|
||||
import {isSlowBuild} from "jstests/libs/query/aggregation_pipeline_utils.js";
|
||||
import {runPlanStabilityPipelines} from "jstests/query_golden/libs/utils.js";
|
||||
import {runPlanStabilityPipelines} from "jstests/query_golden/libs/plan_stability_utils.js";
|
||||
|
||||
if (checkSbeFullyEnabled(db)) {
|
||||
jsTest.log.info("Skipping the test because CBR only applies to the classic engine.");
|
||||
|
||||
@ -27,7 +27,7 @@
|
||||
|
||||
import {isSlowBuild} from "jstests/libs/query/aggregation_pipeline_utils.js";
|
||||
import {checkSbeFullyEnabled} from "jstests/libs/query/sbe_util.js";
|
||||
import {runPlanStabilityPipelines} from "jstests/query_golden/libs/utils.js";
|
||||
import {runPlanStabilityPipelines} from "jstests/query_golden/libs/plan_stability_utils.js";
|
||||
import {pipelines} from "jstests/query_golden/test_inputs/plan_stability_pipelines2.js";
|
||||
import {DataGenerator} from "jstests/libs/query/data_generator.js";
|
||||
import {checkPauseAfterPopulate} from "jstests/libs/pause_after_populate.js";
|
||||
|
||||
Loading…
Reference in New Issue
Block a user