SERVER-122105 IDL compiler generic annotations block (#50849)

GitOrigin-RevId: ae090517ae6147affecff7a21ae9bd7a25b37870
This commit is contained in:
Catalin Sumanaru 2026-04-23 19:23:23 +01:00 committed by MongoDB Bot
parent 90c831085c
commit 3166fae24b
220 changed files with 2243 additions and 4894 deletions

View File

@ -1,10 +1,10 @@
# This file gets automatically updated by profile_data_pr.py. Do not change the path to this file or variables in this file
# without updating that script.
DEFAULT_CLANG_PGO_DATA_URL = "https://mdb-build-public.s3.us-east-1.amazonaws.com/profiling_data/pgo/mongod_644d85ded088e94dc374d04458fbb265a4bafea1_aarch64_clang_thinlto_pgo_9.0.0-alpha0-1318-g644d85d-patch-69e9ebddaa38170007e794c5.profdata"
DEFAULT_CLANG_PGO_DATA_CHECKSUM = "4f0b0f9794f215141f71cdf29c45551d847622c8ec41740d5d8551cf00ce373c"
DEFAULT_CLANG_PGO_DATA_URL = "https://mdb-build-public.s3.us-east-1.amazonaws.com/profiling_data/pgo/mongod_85f18b6a8f35710ce7f153052873c628c9a08522_aarch64_clang_thinlto_pgo_9.0.0-alpha0-1254-g85f18b6-patch-69e894c3dd16d7000716b9d9.profdata"
DEFAULT_CLANG_PGO_DATA_CHECKSUM = "bedba152f5d8ed4df9998f8ad10c249e14139c7a21c6704854b9bcbd1c080131"
DEFAULT_GCC_PGO_DATA_URL = "https://mdb-build-public.s3.us-east-1.amazonaws.com/profiling_data/pgo/mongod_efcbfdbb937f52078925254ed32fbca7901b4ae6_aarch64_gcc_lto_pgo_8.3.0-alpha0-1055-gefcbfdb-patch-68bfb348576a720007510f50.tgz"
DEFAULT_GCC_PGO_DATA_CHECKSUM = "29b9d919abdccb4a2eeb38670e0489312792700559eb7282e0b02fe2f5ec7744"
DEFAULT_BOLT_DATA_URL = "https://mdb-build-public.s3.us-east-1.amazonaws.com/profiling_data/bolt/mongod_644d85ded088e94dc374d04458fbb265a4bafea1_aarch64_clang_thinlto_pgo_bolt_9.0.0-alpha0-1318-g644d85d-patch-69e9ebddaa38170007e794c5.fdata"
DEFAULT_BOLT_DATA_CHECKSUM = "a042d34adf574aff486e93a99292ffd60943a75d06d23feac5ea078470e385ac"
DEFAULT_BOLT_DATA_URL = "https://mdb-build-public.s3.us-east-1.amazonaws.com/profiling_data/bolt/mongod_85f18b6a8f35710ce7f153052873c628c9a08522_aarch64_clang_thinlto_pgo_bolt_9.0.0-alpha0-1254-g85f18b6-patch-69e894c3dd16d7000716b9d9.fdata"
DEFAULT_BOLT_DATA_CHECKSUM = "f8efa064f446564686c32452b87b08440395bfa710b5960ba745150044f87b27"

View File

@ -492,6 +492,8 @@ class ServerParameter(common.SourceLocation):
self.is_deprecated = False # type: bool
self.annotations = None # type: Dict[str, Any]
super(ServerParameter, self).__init__(file_name, line, column)

View File

@ -1567,6 +1567,7 @@ def _bind_server_parameter(ctxt, param):
ast_param.omit_in_ftdc = param.omit_in_ftdc
ast_param.is_deprecated = param.is_deprecated
ast_param.annotations = param.annotations
ast_param.set_at = _bind_server_parameter_set_at(ctxt, param)
if ast_param.set_at is None:

View File

@ -30,6 +30,7 @@
import hashlib
import io
import itertools
import math
import os
import re
import sys
@ -3171,6 +3172,86 @@ class _CppSourceFileWriter(_CppFileWriterBase):
f'{{"{entry.name}", {"true" if entry.generic_field_info.get_should_forward() else "false"} }},'
)
def _gen_annotations_elem(self, builder, key, value, depth=0):
# type: (str, Optional[str], Any, int) -> None
"""Dispatch an annotation value to the appropriate generator."""
if value is None:
self._gen_annotations_null(builder, key)
elif isinstance(value, dict):
self._gen_annotations_obj(builder, key, value, depth)
elif isinstance(value, list):
self._gen_annotations_arr(builder, key, value, depth)
else:
self._gen_annotations_scalar(builder, key, value)
def _gen_annotations_obj(self, builder, key, obj, depth):
# type: (str, Optional[str], Dict[str, Any], int) -> None
"""Emit a BSONObjBuilder sub-object for a dict."""
sub = "sub%d" % depth
start = "%s.subobjStart(%s)" % (builder, key) if key else "%s.subobjStart()" % builder
with self._block("{", "}"):
self._writer.write_line("BSONObjBuilder %s(%s);" % (sub, start))
for k, v in obj.items():
self._gen_annotations_elem(sub, _encaps(k), v, depth + 1)
def _gen_annotations_arr(self, builder, key, items, depth):
# type: (str, Optional[str], List[Any], int) -> None
"""Emit a BSONArrayBuilder sub-array for a list."""
arr = "arr%d" % depth
start = "%s.subarrayStart(%s)" % (builder, key) if key else "%s.subarrayStart()" % builder
with self._block("{", "}"):
self._writer.write_line("BSONArrayBuilder %s(%s);" % (arr, start))
for item in items:
self._gen_annotations_elem(arr, None, item, depth + 1)
def _gen_annotations_null(self, builder, key):
# type: (str, Optional[str]) -> None
"""Emit an appendNull call."""
if key:
self._writer.write_line("%s.appendNull(%s);" % (builder, key))
else:
self._writer.write_line("%s.appendNull();" % builder)
def _gen_annotations_scalar(self, builder, key, value):
# type: (str, Optional[str], Any) -> None
"""Emit an append call for a scalar value."""
if isinstance(value, bool):
literal = "true" if value else "false"
elif isinstance(value, int):
literal = str(value)
elif isinstance(value, float):
if not math.isfinite(value):
raise ValueError(
f"Unsupported annotation value {value!r} for key {key!r}: "
f"NaN and infinity are not valid C++ literals."
)
literal = str(value)
elif isinstance(value, str):
literal = _encaps(value)
else:
raise ValueError(
f"Unsupported annotation type {type(value).__name__} for {key}: "
f"expected bool, int, float, or str."
)
if key:
self._writer.write_line("%s.append(%s, %s);" % (builder, key, literal))
else:
self._writer.write_line("%s.append(%s);" % (builder, literal))
def _gen_server_parameter_annotations(self, var_name, param):
# type: (str, ast.ServerParameter) -> None
"""Generate setAnnotations call if the parameter has annotations."""
if param.annotations:
with self.get_initializer_lambda(
"static const auto kAnnotations", return_type="BSONObj"
):
self._writer.write_line("BSONObjBuilder builder;")
for k, v in param.annotations.items():
self._gen_annotations_elem("builder", _encaps(k), v)
self._writer.write_line("return builder.obj();")
self._writer.write_line("%s->setAnnotations(kAnnotations);" % var_name)
def _gen_server_parameter_specialized(self, param):
# type: (ast.ServerParameter) -> None
"""Generate a specialized ServerParameter."""
@ -3187,6 +3268,8 @@ class _CppSourceFileWriter(_CppFileWriterBase):
if param.is_deprecated:
self._writer.write_line("sp->setIsDeprecated(true);")
self._gen_server_parameter_annotations("sp", param)
self._writer.write_line("return std::move(sp);")
def _gen_server_parameter_class_definitions(self, param):
@ -3261,6 +3344,8 @@ class _CppSourceFileWriter(_CppFileWriterBase):
if param.is_deprecated:
self._writer.write_line("ret->setIsDeprecated(true);")
self._gen_server_parameter_annotations("ret", param)
if param.default and not (param.cpp_vartype and param.cpp_varname):
# Only need to call setDefault() if we haven't in-place initialized the declared var.
self._writer.write_line(

View File

@ -917,6 +917,13 @@ def _parse_server_parameter_class(ctxt, node):
return spc
def _parse_annotations(ctxt, node):
# type: (errors.ParserContext, yaml.nodes.MappingNode) -> Dict[str, Any]
"""Parse an annotations block as an opaque YAML mapping."""
constructor = yaml.constructor.SafeConstructor()
return constructor.construct_object(node, deep=True)
def _parse_server_parameter(ctxt, spec, name, node):
# type: (errors.ParserContext, syntax.IDLSpec, str, Union[yaml.nodes.MappingNode, yaml.nodes.ScalarNode, yaml.nodes.SequenceNode]) -> None
"""Parse a server_parameters section in the IDL file."""
@ -950,6 +957,7 @@ def _parse_server_parameter(ctxt, spec, name, node):
"omit_in_ftdc": _RuleDesc("bool_scalar"),
"cpp_class": _RuleDesc("scalar_or_mapping", mapping_parser_func=map_class),
"is_deprecated": _RuleDesc("bool_scalar"),
"annotations": _RuleDesc("mapping", mapping_parser_func=_parse_annotations),
},
)

View File

@ -858,6 +858,8 @@ class ServerParameter(common.SourceLocation):
self.is_deprecated = False # type: bool
self.annotations = None # type: Dict[str, Any]
super(ServerParameter, self).__init__(file_name, line, column)

View File

@ -690,6 +690,11 @@ properties:
description: >
Marks the server parameter as deprecated. Warns users if the server parameter
is ever used. Defaults to false.
annotations:
type: object
description: >
Opaque metadata block attached to the server parameter. Parsed as-is and
exposed at runtime via ServerParameter::annotations() as a BSONObj.
on_update:
type: string
description: >

View File

@ -39,3 +39,4 @@ import idl.errors # noqa: F401
import idl.generator # noqa: F401
import idl.parser # noqa: F401
import idl.syntax # noqa: F401
import idl.writer # noqa: F401

View File

@ -2364,6 +2364,76 @@ class TestBinder(testcase.IDLTestcase):
""")
)
def test_server_parameter_annotations_positive(self):
# type: () -> None
"""Positive server parameter annotations test cases."""
# server parameter with cpp_varname and simple annotations.
self.assert_bind(
textwrap.dedent("""
server_parameters:
foo:
set_at: startup
description: bar
redact: false
cpp_varname: baz
annotations:
query_knob:
wire_name: fooWire
applicability: queryShape
""")
)
# server parameter with cpp_class and annotations.
self.assert_bind(
textwrap.dedent("""
server_parameters:
foo:
set_at: startup
description: bar
redact: false
cpp_class: baz
annotations:
query_knob:
wire_name: fooWire
""")
)
# annotations with nested mapping (fcv sub-block).
self.assert_bind(
textwrap.dedent("""
server_parameters:
foo:
set_at: startup
description: bar
redact: false
cpp_varname: baz
annotations:
query_knob:
wire_name: fooWire
applicability: queryShape
fcv:
min: "9.0"
""")
)
# multiple annotation keys.
self.assert_bind(
textwrap.dedent("""
server_parameters:
foo:
set_at: startup
description: bar
redact: false
cpp_varname: baz
annotations:
query_knob:
wire_name: fooWire
security:
audit: true
""")
)
def test_server_parameter_negative(self):
# type: () -> None
"""Negative server parameter test cases."""

View File

@ -36,6 +36,7 @@ idl base directory:
$ coverage run run_tests.py && coverage html
"""
import datetime
import io
import os
import unittest
@ -475,6 +476,157 @@ class TestGenerator(testcase.IDLTestcase):
)
self.assertIn(expected, header)
def test_server_parameter_annotations_generates_set_call(self) -> None:
"""Test generation of setAnnotations call for annotated server parameter."""
_, source = self.assert_generate(
self.view_test_common_types
+ dedent("""
server_parameters:
testAnnotatedParam:
description: "Test annotated parameter"
set_at: ["startup", "runtime"]
redact: false
cpp_varname: testParameter
annotations:
query_knob:
wire_name: testWire
applicability: queryShape
""")
)
# Verify the BSON builder lambda is generated.
self.assertIn("kAnnotations", source)
self.assertIn("BSONObjBuilder builder", source)
self.assertIn('"query_knob"', source)
self.assertIn('"wire_name"', source)
self.assertIn('"testWire"', source)
self.assertIn('"applicability"', source)
self.assertIn('"queryShape"', source)
# Verify setAnnotations call.
self.assertIn("setAnnotations(kAnnotations)", source)
def test_server_parameter_no_annotations_no_set_call(self) -> None:
"""Test that server parameter without annotations does not generate setAnnotations."""
_, source = self.assert_generate(
self.view_test_common_types
+ dedent("""
server_parameters:
testUnannotatedParam:
description: "Test unannotated parameter"
set_at: ["startup", "runtime"]
redact: false
cpp_varname: testParameter
""")
)
self.assertNotIn("setAnnotations", source)
self.assertNotIn("kAnnotations", source)
def test_server_parameter_nested_annotations_bson(self) -> None:
"""Test nested annotation values produce correct nested BSON output."""
_, source = self.assert_generate(
self.view_test_common_types
+ dedent("""
server_parameters:
testNestedParam:
description: "Test nested annotations"
set_at: ["startup", "runtime"]
redact: false
cpp_varname: testParameter
annotations:
query_knob:
wire_name: testNestedWire
applicability: queryShape
fcv:
min: "9.0"
""")
)
# Verify nested BSON structure.
self.assertIn("kAnnotations", source)
self.assertIn("subobjStart", source)
self.assertIn('"fcv"', source)
self.assertIn('"min"', source)
self.assertIn('"9.0"', source)
# Verify setAnnotations call.
self.assertIn("setAnnotations(kAnnotations)", source)
def test_server_parameter_annotations_mixed_types_bson(self) -> None:
"""Test annotation values of different YAML types produce correct BSON output."""
_, source = self.assert_generate(
self.view_test_common_types
+ dedent("""
server_parameters:
testMixedParam:
description: "Test mixed-type annotations"
set_at: ["startup", "runtime"]
redact: false
cpp_varname: testParameter
annotations:
query_knob:
wire_name: testMixedWire
pqs_settable: true
max_retries: 3
applicability:
- queryShape
- opCtx
""")
)
# Verify string value.
self.assertIn('"testMixedWire"', source)
# Verify boolean value appended.
self.assertIn('append("pqs_settable", true)', source)
# Verify integer value appended.
self.assertIn('append("max_retries", 3)', source)
# Verify array value.
self.assertIn("subarrayStart", source)
self.assertIn('"queryShape"', source)
self.assertIn('"opCtx"', source)
# Verify setAnnotations call.
self.assertIn("setAnnotations(kAnnotations)", source)
def test_server_parameter_annotations_rejects_unquoted_date(self) -> None:
"""YAML auto-coerces unquoted dates to datetime.date; the generator must reject them."""
with self.assertRaises(ValueError):
self.assert_generate(
self.view_test_common_types
+ dedent("""
server_parameters:
testDateParam:
description: "Test param with unquoted date annotation"
set_at: ["startup", "runtime"]
redact: false
cpp_varname: testParameter
annotations:
wire_name: 2026-04-14
""")
)
def test_server_parameter_annotations_accepts_quoted_date(self) -> None:
"""A quoted date stays a string and flows through as a C++ string literal."""
_, source = self.assert_generate(
self.view_test_common_types
+ dedent("""
server_parameters:
testQuotedDateParam:
description: "Test param with quoted date annotation"
set_at: ["startup", "runtime"]
redact: false
cpp_varname: testParameter
annotations:
wire_name: "2026-04-14"
""")
)
self.assertIn('"2026-04-14"', source)
self.assertIn("setAnnotations(kAnnotations)", source)
def test_command_view_type_generates_anchor(self) -> None:
"""Test anchor generation on command with view parameter."""
header, _ = self.assert_generate(
@ -1700,5 +1852,136 @@ class TestGenerator(testcase.IDLTestcase):
)
class TestAnnotationsCodegen(testcase.IDLTestcase):
"""Direct unit tests for the _gen_annotations_* helpers on _CppSourceFileWriter."""
def _emit(self, key, value, depth=0):
# type: (object, object, int) -> str
"""Run _gen_annotations_elem on a fresh writer and return the generated text."""
stream = io.StringIO()
source_writer = idl.generator._CppSourceFileWriter(
idl.writer.IndentedTextWriter(stream), target_arch=""
)
source_writer._gen_annotations_elem("builder", key, value, depth=depth)
return stream.getvalue()
def test_scalar_string_keyed(self):
self.assertEqual(self._emit('"k"', "hello"), 'builder.append("k", "hello");\n')
def test_scalar_string_unkeyed(self):
self.assertEqual(self._emit(None, "hello"), 'builder.append("hello");\n')
def test_scalar_bool_true(self):
self.assertEqual(self._emit('"k"', True), 'builder.append("k", true);\n')
def test_scalar_bool_false(self):
self.assertEqual(self._emit('"k"', False), 'builder.append("k", false);\n')
def test_scalar_int(self):
self.assertEqual(self._emit('"k"', 3), 'builder.append("k", 3);\n')
def test_scalar_float(self):
self.assertEqual(self._emit('"k"', 3.25), 'builder.append("k", 3.25);\n')
def test_null_keyed(self):
self.assertEqual(self._emit('"k"', None), 'builder.appendNull("k");\n')
def test_null_unkeyed(self):
self.assertEqual(self._emit(None, None), "builder.appendNull();\n")
def test_empty_dict(self):
expected = dedent("""\
{
BSONObjBuilder sub0(builder.subobjStart("k"));
}
""")
self.assertEqual(self._emit('"k"', {}), expected)
def test_flat_dict(self):
expected = dedent("""\
{
BSONObjBuilder sub0(builder.subobjStart("k"));
sub0.append("a", 1);
}
""")
self.assertEqual(self._emit('"k"', {"a": 1}), expected)
def test_nested_dict_depth_counter(self):
# Depth increments so nested sub-builder names don't collide.
expected = dedent("""\
{
BSONObjBuilder sub0(builder.subobjStart("root"));
{
BSONObjBuilder sub1(sub0.subobjStart("outer"));
sub1.append("inner", 1);
}
}
""")
self.assertEqual(self._emit('"root"', {"outer": {"inner": 1}}), expected)
def test_empty_list(self):
expected = dedent("""\
{
BSONArrayBuilder arr0(builder.subarrayStart("k"));
}
""")
self.assertEqual(self._emit('"k"', []), expected)
def test_flat_list(self):
expected = dedent("""\
{
BSONArrayBuilder arr0(builder.subarrayStart("k"));
arr0.append(1);
arr0.append(2);
}
""")
self.assertEqual(self._emit('"k"', [1, 2]), expected)
def test_list_of_dicts(self):
# Items inside a list use the unkeyed subobjStart form and advance depth.
expected = dedent("""\
{
BSONArrayBuilder arr0(builder.subarrayStart("k"));
{
BSONObjBuilder sub1(arr0.subobjStart());
sub1.append("a", 1);
}
{
BSONObjBuilder sub1(arr0.subobjStart());
sub1.append("b", 2);
}
}
""")
self.assertEqual(self._emit('"k"', [{"a": 1}, {"b": 2}]), expected)
def test_scalar_rejects_date(self):
with self.assertRaises(ValueError):
self._emit('"k"', datetime.date(2026, 4, 14))
def test_scalar_rejects_datetime(self):
with self.assertRaises(ValueError):
self._emit('"k"', datetime.datetime(2026, 4, 14, 10, 30))
def test_scalar_rejects_nan(self):
with self.assertRaises(ValueError):
self._emit('"k"', float("nan"))
def test_scalar_rejects_positive_inf(self):
with self.assertRaises(ValueError):
self._emit('"k"', float("inf"))
def test_scalar_rejects_negative_inf(self):
with self.assertRaises(ValueError):
self._emit('"k"', float("-inf"))
def test_scalar_rejects_bytes(self):
with self.assertRaises(ValueError):
self._emit('"k"', b"hello")
def test_scalar_rejects_set(self):
with self.assertRaises(ValueError):
self._emit('"k"', {"a", "b"})
if __name__ == "__main__":
unittest.main()

View File

@ -1566,6 +1566,74 @@ class TestParser(testcase.IDLTestcase):
idl.errors.ERROR_ID_IS_NODE_TYPE_SCALAR_OR_MAPPING,
)
def test_server_parameter_annotations_positive(self):
# type: () -> None
"""Positive server parameter annotations tests."""
# server parameter with annotations mapping.
self.assert_parse(
textwrap.dedent("""
server_parameters:
foo:
set_at: startup
description: bar
redact: false
cpp_varname: baz
annotations:
query_knob:
wire_name: fooWire
applicability: queryShape
""")
)
def test_server_parameter_annotations_negative(self):
# type: () -> None
"""Negative server parameter annotations tests."""
# annotations as a scalar should fail. Must be a mapping.
self.assert_parse_fail(
textwrap.dedent("""
server_parameters:
foo:
set_at: startup
description: bar
redact: false
cpp_varname: baz
annotations: some_scalar_value
"""),
idl.errors.ERROR_ID_IS_NODE_TYPE,
)
# annotations as a sequence should fail. Must be a mapping.
self.assert_parse_fail(
textwrap.dedent("""
server_parameters:
foo:
set_at: startup
description: bar
redact: false
cpp_varname: baz
annotations:
- item1
- item2
"""),
idl.errors.ERROR_ID_IS_NODE_TYPE,
)
# annotations as null should fail. Must be a mapping.
self.assert_parse_fail(
textwrap.dedent("""
server_parameters:
foo:
set_at: startup
description: bar
redact: false
cpp_varname: baz
annotations: null
"""),
idl.errors.ERROR_ID_IS_NODE_TYPE,
)
def test_feature_flag(self):
# type: () -> None
"""Test feature flag."""

View File

@ -141,6 +141,7 @@ selector:
- featureFlagSearchHybridScoringFull
- featureFlagRankFusion
- featureFlagMongotIndexedViews
- featureFlagCreateViewlessTimeseriesCollections
- uses_transactions
- requires_fcv_81
- requires_fcv_82

View File

@ -101,6 +101,7 @@ selector:
- featureFlagSearchHybridScoringFull
- featureFlagRankFusion
- featureFlagMongotIndexedViews
- featureFlagCreateViewlessTimeseriesCollections
- uses_transactions
- requires_fcv_81
- requires_fcv_82

View File

@ -233,6 +233,7 @@ selector:
- featureFlagRankFusionBasic
- featureFlagRankFusionFull
- featureFlagSearchHybridScoringFull
- featureFlagCreateViewlessTimeseriesCollections
- uses_transactions
- requires_fcv_81
- requires_fcv_82

View File

@ -119,6 +119,7 @@ selector:
- featureFlagRankFusionBasic
- featureFlagRankFusionFull
- featureFlagSearchHybridScoringFull
- featureFlagCreateViewlessTimeseriesCollections
- uses_transactions
- requires_fcv_81
- requires_fcv_82

View File

@ -50,7 +50,7 @@ selector:
- assumes_standalone_mongod
- assumes_against_mongod_not_mongos
- requires_profiling
- requires_fcv_90
- featureFlagCreateViewlessTimeseriesCollections
roots:
- jstests/core/timeseries/**/*.js
test_kind: js_test

View File

@ -22,7 +22,7 @@ selector:
exclude_files:
- jstests/sharding/mongos_writes_wait_for_write_concern_timeseries.js
exclude_with_any_tags:
- requires_fcv_90
- featureFlagCreateViewlessTimeseriesCollections
roots:
- jstests/sharding/analyze_shard_key/analyze_shard_key_cmd_validation.js
- jstests/sharding/balancer_should_return_random_migrations_failpoint.js

View File

@ -151,6 +151,7 @@ selector:
- featureFlagSearchHybridScoringFull
- featureFlagRankFusion
- featureFlagMongotIndexedViews
- featureFlagCreateViewlessTimeseriesCollections
- uses_transactions
- requires_fcv_81
- requires_fcv_82

View File

@ -126,6 +126,8 @@
- featureFlagRankFusion
# TODO: SERVER-86544 remove feature flag for SPM-3672
- featureFlagMongotIndexedViews
# TODO: SERVER-101595 remove feature flag
- featureFlagCreateViewlessTimeseriesCollections
# Transactions are aborted upon fcv upgrade or downgrade.
- uses_transactions
# Exclude tests that require a specific fcv.

View File

@ -18,7 +18,7 @@
value:
selector:
exclude_with_any_tags:
- requires_fcv_90
- featureFlagCreateViewlessTimeseriesCollections
exclude_files:
# This test calls checkWriteConcernBehaviorForAllCommands() with limitToTimeseriesViews=true,
# which internally runs setFeatureCompatibilityVersion to latestFCV. This is incompatible
@ -31,7 +31,7 @@
- assumes_standalone_mongod
- assumes_against_mongod_not_mongos
- requires_profiling
- requires_fcv_90
- featureFlagCreateViewlessTimeseriesCollections
- name: all_jscore_timeseries_tests_selector
value:
selector:

View File

@ -22,6 +22,8 @@ selector:
- featureFlagRankFusion
# TODO: SERVER-86544 remove feature flag for SPM-3672
- featureFlagMongotIndexedViews
# TODO: SERVER-101595 remove feature flag
- featureFlagCreateViewlessTimeseriesCollections
# Transactions are aborted upon fcv upgrade or downgrade.
- uses_transactions
# Exclude tests that require a specific fcv.

View File

@ -220,6 +220,8 @@ selector:
- featureFlagRankFusionBasic
- featureFlagRankFusionFull
- featureFlagSearchHybridScoringFull
# TODO: SERVER-101595 remove feature flag
- featureFlagCreateViewlessTimeseriesCollections
# Transactions are aborted upon fcv upgrade or downgrade.
- uses_transactions
# Exclude tests that require a specific fcv.

View File

@ -100,6 +100,8 @@ selector:
- featureFlagRankFusionBasic
- featureFlagRankFusionFull
- featureFlagSearchHybridScoringFull
# TODO: SERVER-101595 remove feature flag
- featureFlagCreateViewlessTimeseriesCollections
# Transactions are aborted upon fcv upgrade or downgrade.
- uses_transactions
# Exclude tests that require a specific fcv.

View File

@ -43,6 +43,10 @@ selector:
- jstests/core/timeseries/**/*.js
# TODO SERVER-114739 Join optimization: collation disregarded when joining
- jstests/sharding/query/collation/collation_lookup.js
# TODO SERVER-113718 Do not use join optimizer path if join predicates/fields operate over arrays
- jstests/core/query/release_memory/hash_lookup_unwind.js
- jstests/aggregation/sources/match/sbe_non_leading_match_pbt.js
- jstests/aggregation/sources/lookup/lookup_unwind_equijoin.js
# TODO SERVER-118416 Fix join reordering tripwire assertions for lookup+unwind pipelines and re-enable lookup_unwind_pbt PBT
- jstests/aggregation/sources/lookup/lookup_unwind_pbt.js
# Shard level user writes block is not supported on standalone.

View File

@ -17,6 +17,9 @@ class TimestampFormatter(logging.Formatter):
produced by mongo{d,s} instances in case of 'json' log format but also to
preserve the format used before this flag was introduced.
TODO SERVER-99797: keep only +00:00 timestamp format after moving to json
as the default --logFormat parameter.
Note that +00:00 (UTC) time is used by default.
"""

View File

@ -54,7 +54,6 @@ const kMetadataFieldNames = [
"$sortKey",
"$indexKey",
"$searchScoreDetails",
"$searchRootDocumentId",
"$searchSortValues",
"$searchHighlights",
"$searchSequenceToken",

View File

@ -8645,12 +8645,8 @@ export const authCommandsLib = {
testcases: testcases_transformationOnly,
skipTest: (conn) => {
// Can't run on mongos. Also, $_internalJoinHint requires join optimization which
// is unavailable when the classic engine is forced, or the path arrayness feature is disabled.
return (
!isStandalone(conn) ||
isForceClassicEngine(conn) ||
!isFeatureEnabled(conn, "featureFlagPathArrayness")
);
// is unavailable when the classic engine is forced.
return !isStandalone(conn) || isForceClassicEngine(conn);
},
setup: function (db) {
// Only works with join optimization enabled.
@ -8660,8 +8656,6 @@ export const authCommandsLib = {
internalEnableJoinOptimization: true,
}),
);
// Need an index for multikeyness info.
assert.commandWorked(db.foo.createIndex({dummy: -1, i: 1}));
// Add a document to collection "foo".
assert.commandWorked(db.foo.insertOne({_id: 0, i: 0}));
},
@ -8672,8 +8666,7 @@ export const authCommandsLib = {
internalEnableJoinOptimization: false,
}),
);
// Clean up doc & index.
assert.commandWorked(db.foo.dropIndex({dummy: -1, i: 1}));
// Clean up doc.
assert.commandWorked(db.foo.deleteOne({_id: 0, i: 0}));
},
},

View File

@ -6,7 +6,6 @@
* supported anymore.
*
* @tags: [
* requires_fcv_90,
* featureFlagCreateViewlessTimeseriesCollections,
* ]
*/

View File

@ -26,7 +26,7 @@ export function handleRandomSetFCVErrors(e, targetFCV) {
);
return true;
}
if (e.code === ErrorCodes.BackgroundOperationInProgressForNamespace) {
if (e.code === 12587) {
// Cannot downgrade FCV that requires a collMod command when index builds are
// concurrently taking place.
jsTestLog(

View File

@ -83,7 +83,7 @@ export const $config = extendWorkload($baseConfig, function ($config, $super) {
10778001,
// Cannot downgrade FCV that requires a collMod command when index builds are concurrently
// taking place.
ErrorCodes.BackgroundOperationInProgressForNamespace,
12587,
];
// You might end up hitting a shard where the db might've moved.

View File

@ -9,13 +9,12 @@
* What we want to prevent is that a collMod over the {m: 1} index:
* - Starts by considering the collection as timeseries, so it translates {m: 1} to {meta: 1}.
* - Meanwhile the collection is re-created as a regular collection with a {meta: 1} index.
* - Then, it succeeds targeting the {meta: 1} index on the regular collection.
* - Then, it succeeds targetting the {meta: 1} index on the regular collection.
*
* @tags: [
* requires_timeseries,
* # Only viewless timeseries collections can be renamed over.
* featureFlagCreateViewlessTimeseriesCollections,
* requires_fcv_90,
* ]
*/

View File

@ -1,59 +0,0 @@
/**
* Repeatedly create indexes while dropping and recreating timeseries collections, with
* FCV upgrade/downgrade happening in the background.
* This is designed to exercise viewless timeseries upgrade/downgrade.
* TODO(SERVER-114573): Consider removing this test once 9.0 becomes lastLTS.
*
* @tags: [
* requires_timeseries,
* # setFCV requires all nodes on the latest binary.
* multiversion_incompatible,
* # TODO (SERVER-104171) Remove the 'assumes_balancer_off' tag
* assumes_balancer_off,
* # Runs setFCV, which can interfere with other tests.
* incompatible_with_concurrency_simultaneous,
* runs_set_fcv,
* # TODO SERVER-105509 enable test in config shard suites
* config_shard_incompatible,
* creates_background_indexes,
* ]
*/
import {extendWorkload} from "jstests/concurrency/fsm_libs/extend_workload.js";
import {uniformDistTransitions} from "jstests/concurrency/fsm_workload_helpers/state_transition_utils.js";
import {handleRandomSetFCVErrors} from "jstests/concurrency/fsm_workload_helpers/fcv/handle_setFCV_errors.js";
import {$config as $baseConfig} from "jstests/concurrency/fsm_workloads/timeseries/timeseries_create_indexes.js";
export const $config = extendWorkload($baseConfig, function ($config, $super) {
$config.states.setFCV = function (db, collName) {
const fcvValues = [lastLTSFCV, latestFCV];
const targetFCV = fcvValues[Random.randInt(2)];
jsTest.log.info("Executing FCV state, setting to:" + targetFCV);
try {
assert.commandWorked(db.adminCommand({setFeatureCompatibilityVersion: targetFCV, confirm: true}));
} catch (e) {
if (handleRandomSetFCVErrors(e, targetFCV)) return;
throw e;
}
jsTest.log.info("setFCV state finished");
};
$config.transitions = uniformDistTransitions($config.states);
$config.teardown = function teardown(db, collName, cluster) {
// TODO(SERVER-114573): Remove once v9.0 is last LTS and viewless timeseries upgrade/downgrade doesn't happen.
// A downgrade may have been interrupted due to an index build (SERVER-119738), we must complete it before upgrading to latest.
assert.commandWorkedOrFailedWithCode(
db.adminCommand({setFeatureCompatibilityVersion: lastLTSFCV, confirm: true}),
// "10778001: Cannot downgrade featureCompatibilityVersion if a previous FCV upgrade stopped in the middle ..."
// This error indicates that setFCV was interrupted during an upgrade rather than downgrade.
// The next setFCV command will complete that upgrade and set the FCV to 'latest' for tests that run afterwards.
10778001,
);
assert.commandWorked(db.adminCommand({setFeatureCompatibilityVersion: latestFCV, confirm: true}));
$super.teardown.apply(this, arguments);
};
return $config;
});

View File

@ -6,7 +6,7 @@
*
* @tags: [
* requires_timeseries,
* # setFCV requires all nodes on the latest binary.
* # Requires all nodes to be running the latest binary.
* multiversion_incompatible,
* # TODO (SERVER-104171) Remove the 'assumes_balancer_off' tag
* assumes_balancer_off,
@ -75,16 +75,6 @@ export const $config = (function () {
assert.commandWorked(db[collName].insert({t: new Date(), temp: 42}));
};
const teardown = function (db, collName, cluster) {
// TODO(SERVER-114573): Remove once v9.0 is last LTS and viewless timeseries upgrade/downgrade doesn't happen.
// A downgrade may have been interrupted due to an index build (SERVER-119738), we must complete it before upgrading to latest.
assert.commandWorkedOrFailedWithCode(
db.adminCommand({setFeatureCompatibilityVersion: lastLTSFCV, confirm: true}),
// "10778001: Cannot downgrade featureCompatibilityVersion if a previous FCV upgrade stopped in the middle ..."
// This error indicates that setFCV was interrupted during an upgrade rather than downgrade.
// The next setFCV command will complete that upgrade and set the FCV to 'latest' for tests that run afterwards.
10778001,
);
assert.commandWorked(db.adminCommand({setFeatureCompatibilityVersion: latestFCV, confirm: true}));
};

View File

@ -12,7 +12,6 @@
* # Renaming a timeseries collection is only possible with viewless timeseries collections,
* # with legacy viewful timeseries collection it was not supported.
* featureFlagCreateViewlessTimeseriesCollections,
* requires_fcv_90,
* ]
*/

View File

@ -7,7 +7,6 @@
* does_not_support_stepdowns,
* incompatible_with_preimages_by_default,
* requires_timeseries,
* requires_fcv_90,
* featureFlagCreateViewlessTimeseriesCollections,
* ]
*/

View File

@ -2,13 +2,14 @@
* This tests which collMod options are allowed on a time-series collection.
*
* @tags: [
* # Behavior clarified in binVersion 6.1
* requires_fcv_61,
* # collMod is not retryable
* requires_non_retryable_commands,
* # We need a timeseries collection.
* requires_timeseries,
* # There is a separate test for legacy timeseries: /timeseries_collmod_legacy.js
* featureFlagCreateViewlessTimeseriesCollections,
* requires_fcv_90,
* featureFlagCreateViewlessTimeseriesCollections
* ]
*/

View File

@ -4,7 +4,6 @@
*
* @tags: [
* requires_timeseries,
* requires_fcv_90,
* featureFlagCreateViewlessTimeseriesCollections,
* ]
*/

View File

@ -4,7 +4,6 @@
* @tags: [
* requires_timeseries,
* assumes_no_implicit_collection_creation_after_drop,
* requires_fcv_90,
* featureFlagCreateViewlessTimeseriesCollections,
* ]
*/

View File

@ -3,12 +3,13 @@
* isTimeseries set to true.
*
* @tags: [
* requires_fcv_90,
* requires_fcv_83,
* assumes_against_mongod_not_mongos,
* requires_capped,
* requires_getmore,
* requires_replication,
* requires_timeseries,
* featureFlagMarkTimeseriesEventsInOplog,
* incompatible_with_snapshot_reads,
* no_selinux,
* ]

View File

@ -4,9 +4,9 @@
// uses_change_streams,
// requires_replication,
// requires_timeseries,
// # The test requires the feature flag to be enabled to create viewless timeseries collections.
// featureFlagCreateViewlessTimeseriesCollections,
// featureFlagChangeStreamPreciseShardTargeting,
// requires_fcv_90,
// ]
import "jstests/multiVersion/libs/verify_versions.js";

View File

@ -7,7 +7,6 @@
* @tags: [
* requires_timeseries,
* requires_sharding,
* requires_fcv_90,
* featureFlagCreateViewlessTimeseriesCollections,
* ]
*/

View File

@ -5,7 +5,6 @@
*
* @tags: [
* requires_timeseries,
* requires_fcv_90,
* featureFlagCreateViewlessTimeseriesCollections,
* ]
*/

View File

@ -8,7 +8,6 @@
*
* @tags: [
* requires_timeseries,
* requires_fcv_90,
* featureFlagCreateViewlessTimeseriesCollections,
* ]
*/

View File

@ -6,7 +6,6 @@
*
* @tags: [
* requires_timeseries,
* requires_fcv_90,
* featureFlagCreateViewlessTimeseriesCollections,
* uses_parallel_shell,
* ]

View File

@ -5,7 +5,6 @@
*
* @tags: [
* requires_timeseries,
* requires_fcv_90,
* featureFlagCreateViewlessTimeseriesCollections,
* ]
*/

View File

@ -7,7 +7,6 @@
* @tags: [
* requires_timeseries,
* requires_sharding,
* requires_fcv_90,
* featureFlagCreateViewlessTimeseriesCollections,
* ]
*/

View File

@ -5,7 +5,6 @@
*
* @tags: [
* requires_timeseries,
* requires_fcv_90,
* featureFlagCreateViewlessTimeseriesCollections,
* ]
*/

View File

@ -5,7 +5,6 @@
*
* @tags: [
* requires_timeseries,
* requires_fcv_90,
* featureFlagCreateViewlessTimeseriesCollections,
* ]
*/

View File

@ -1,68 +0,0 @@
/**
* TODO (SERVER-116499): Remove this file once 9.0 becomes last LTS.
*
* Tests that setFCV is blocked by in-progress index builds on timeseries collections during
* viewless timeseries upgrade/downgrade, for both replica sets and sharded clusters.
*
* @tags: [
* requires_timeseries,
* ]
*/
import {ReplSetTest} from "jstests/libs/replsettest.js";
import {ShardingTest} from "jstests/libs/shardingtest.js";
import {IndexBuildTest} from "jstests/noPassthrough/libs/index_builds/index_build.js";
import {assertCommandWorkedInParallelShell} from "jstests/libs/parallel_shell_helpers.js";
if (lastLTSFCV != "8.0") {
jsTest.log.info("Skipping test because last LTS FCV is no longer 8.0");
quit();
}
function runTest(db, hangIndexBuildConn, startFCV, targetFCV) {
const coll = db.ts;
assert.commandWorked(db.adminCommand({setFeatureCompatibilityVersion: startFCV, confirm: true}));
assert(coll.drop());
assert.commandWorked(db.createCollection(coll.getName(), {timeseries: {timeField: "t", metaField: "m"}}));
// Force a two-phase index build to hang.
assert.commandWorked(coll.insertOne({t: ISODate(), m: 1}));
IndexBuildTest.pauseIndexBuilds(hangIndexBuildConn);
const awaitBuild = assertCommandWorkedInParallelShell(db.getMongo(), db, {
createIndexes: coll.getName(),
indexes: [{key: {m: 1}, name: "m_1"}],
});
IndexBuildTest.waitForIndexBuildToStart(hangIndexBuildConn.getDB(db.getName()), coll.getName(), "m_1");
// setFCV fails while an index build is in progress.
assert.commandFailedWithCode(
db.adminCommand({setFeatureCompatibilityVersion: targetFCV, confirm: true}),
ErrorCodes.BackgroundOperationInProgressForNamespace,
);
// The FCV transition succeeds after the index build finishes.
IndexBuildTest.resumeIndexBuilds(hangIndexBuildConn);
awaitBuild();
assert.commandWorked(db.adminCommand({setFeatureCompatibilityVersion: targetFCV, confirm: true}));
}
// Replica set
{
const rst = new ReplSetTest({nodes: 1});
rst.startSet();
rst.initiate();
const db = rst.getPrimary().getDB("test");
runTest(db, rst.getPrimary(), latestFCV, lastLTSFCV);
runTest(db, rst.getPrimary(), lastLTSFCV, latestFCV);
rst.stopSet();
}
// Sharded cluster
{
const st = new ShardingTest({shards: 1, rs: {nodes: 1}});
const db = st.s.getDB("test");
runTest(db, st.rs0.getPrimary(), latestFCV, lastLTSFCV);
runTest(db, st.rs0.getPrimary(), lastLTSFCV, latestFCV);
st.stop();
}

View File

@ -6,7 +6,6 @@
* @tags: [
* requires_timeseries,
* requires_sharding,
* requires_fcv_90,
* featureFlagCreateViewlessTimeseriesCollections,
* ]
*/

View File

@ -7,7 +7,6 @@
* @tags: [
* requires_timeseries,
* requires_sharding,
* requires_fcv_90,
* featureFlagCreateViewlessTimeseriesCollections,
* ]
*/

View File

@ -6,7 +6,6 @@
* @tags: [
* requires_timeseries,
* requires_sharding,
* requires_fcv_90,
* featureFlagCreateViewlessTimeseriesCollections,
* ]
*/

View File

@ -2,6 +2,7 @@
* Tests that it's not possible to shard or move a malformed timeseries collection where both
* 'coll' and 'system.buckets.coll' exist as collections (see SERVER-90862).
*
* @tags: [featureFlagMarkTimeseriesEventsInOplog_incompatible]
*/
import {skipTestIfViewlessTimeseriesEnabled} from "jstests/core/timeseries/libs/viewless_timeseries_util.js";

View File

@ -1,6 +1,9 @@
/**
* Tests that sharding or moving a buckets collection missing the `timeseries` options fails.
* Those inconsistent collections may be generated by implicit creation bugs (e.g. SERVER-87678).
* @tags: [
* featureFlagMarkTimeseriesEventsInOplog_incompatible,
* ]
*/
import {skipTestIfViewlessTimeseriesEnabled} from "jstests/core/timeseries/libs/viewless_timeseries_util.js";

View File

@ -0,0 +1,112 @@
/**
* Tests that when multiple concurrent stale routing requests target a former primary shard after
* movePrimary, the router coalesces all stale requests into a single CSRS routing refresh, rather than
* triggering a "convoy" of independent refreshes.
*
* @tags: [
* featureFlagShardAuthoritativeDbMetadataCRUD,
* featureFlagShardAuthoritativeDbMetadataDDL,
* ]
*/
import {configureFailPoint} from "jstests/libs/fail_point_util.js";
import {ShardingTest} from "jstests/libs/shardingtest.js";
import {Thread} from "jstests/libs/parallelTester.js";
const dbName = "testDB";
const collName = "testColl";
const kNumConcurrentOps = 5;
// We need two mongos instances: one for running the DDLs and one that will remain stale.
const st = new ShardingTest({
shards: 2,
mongos: 2,
});
const mongos0 = st.s0;
const mongos1 = st.s1;
// Create the database with shard0 as its primary shard.
assert.commandWorked(mongos0.adminCommand({enableSharding: dbName, primaryShard: st.shard0.shardName}));
// Create an unsharded collection with some data.
assert.commandWorked(mongos0.getDB(dbName)[collName].insertMany([{_id: 0}, {_id: 1}, {_id: 2}]));
// Prime BOTH mongos instances' routing caches so they know shard0 is the primary (V1).
assert.eq(3, mongos0.getDB(dbName)[collName].find().itcount(), "Failed to prime mongos0 routing cache");
assert.eq(3, mongos1.getDB(dbName)[collName].find().itcount(), "Failed to prime mongos1 routing cache");
// We now perform the movePrimary to make mongos1 stale.
assert.commandWorked(mongos0.adminCommand({movePrimary: dbName, to: st.shard1.shardName}));
// Enable profiling on ALL config server nodes to count how many times mongos1 fetches
// database routing metadata from the CSRS. Each fetch is a "find" on config.databases.
// We capture across all nodes because the catalog client may use Nearest read preference.
const profilingStart = new ISODate();
for (const node of st.configRS.nodes) {
assert.commandWorked(node.getDB("config").setProfilingLevel(2, {slowms: -1}));
}
// At this point mongos will perform N concurrent requests that each results in a stale db version error.
// These should all coalesce into a single refresh.
// Setup the failpoint such that all routers pause just before processing the exception. This will let us simulate the convoying.
const allowedProgressConns = {conns: []};
const waitFb = configureFailPoint(mongos1, "waitForDBVersionCacheInvalidation", allowedProgressConns);
const threads = [];
for (let i = 0; i < kNumConcurrentOps; i++) {
threads.push(
new Thread(
function (host, dbName, collName) {
const conn = new Mongo(host);
return conn.getDB(dbName).runCommand({find: collName, filter: {}});
},
mongos1.host,
dbName,
collName,
),
);
}
for (const t of threads) t.start();
waitFb.wait({timesEntered: kNumConcurrentOps});
// Now we proceed to let each operation continue one-by-one such that each exception handling occurs after the last one has completed.
const blockedConns = mongos1
.getDB("admin")
.aggregate([{$currentOp: {localOps: true}}, {$project: {connectionId: 1}}])
.toArray();
for (const conn of blockedConns) {
if (!conn.connectionId) continue;
allowedProgressConns.conns.push(conn.connectionId);
configureFailPoint(mongos1, "waitForDBVersionCacheInvalidation", allowedProgressConns);
sleep(100);
}
waitFb.off();
for (const t of threads) {
t.join();
const res = t.returnData();
assert.commandWorked(res, "find should succeed after routing refresh");
}
// Count how many times the CSRS was queried for the database routing metadata.
// Each query appears as a `find` on config.databases with filter {_id: dbName}.
// We sum across all config server nodes to handle Nearest read preference.
const profileFilter = {
ts: {$gte: profilingStart},
"command.filter._id": dbName,
};
let totalCsrsDbQueries = 0;
const queryDetails = [];
for (const node of st.configRS.nodes) {
const entries = node.getDB("config").system.profile.find(profileFilter).toArray();
totalCsrsDbQueries += entries.length;
for (const e of entries) {
queryDetails.push(e);
}
}
assert.eq(totalCsrsDbQueries, 1);
st.stop();

View File

@ -19,7 +19,6 @@ const kMetadataFieldNames = [
"$sortKey",
"$indexKey",
"$searchScoreDetails",
"$searchRootDocumentId",
"$searchSortValues",
"$searchHighlights",
"$searchSequenceToken",

View File

@ -111,7 +111,6 @@ if (!checkSbeCompletelyDisabled(null)) {
const sbeConn = MongoRunner.runMongod({
setParameter: {
featureFlagGetExecutorDeferredEngineChoice: true,
featureFlagCostBasedRanker: true,
internalQuerySamplingBySequentialScan: true,
// Yield after every document so we reliably hit a yield window in the sampling query.
internalQueryExecYieldIterations: 1,

View File

@ -1,978 +0,0 @@
/**
* End to end test for join optimization being enabled iff no join predicate fields may contain arrays.
*
* @tags: [
* requires_fcv_90,
* requires_sbe
* ]
*/
import {runTestWithUnorderedComparison, joinTestWrapper} from "jstests/libs/query/join_utils.js";
// Must enable path arrayness tracking for this test.
const conn = MongoRunner.runMongod({setParameter: "featureFlagPathArrayness=true"});
const db = conn.getDB(`${jsTestName()}_db`);
joinTestWrapper(db, function runArraynessTest() {
assert.commandWorked(
db.adminCommand({setParameter: 1, internalEnableJoinOptimization: true, internalEnablePathArrayness: true}),
);
const c1 = db.c1;
const c2 = db.c2;
const c3 = db.c3;
c1.drop();
c2.drop();
c3.drop();
assert.commandWorked(
c1.insertMany([
{
_id: 0,
alwaysArray: [],
sometimesArray: 3,
neverArray: 1,
obj: {array: [1, 2, 3], scalar: 1},
},
{
_id: 1,
alwaysArray: [1, 2, 3],
sometimesArray: 2,
neverArray: 1,
obj: {},
},
{
_id: 2,
alwaysArray: [2, 3],
sometimesArray: [3, 4],
neverArray: 1,
obj: {array: [], scalar: 2},
},
]),
);
assert.commandWorked(
c2.insertMany([
{_id: 0, a: 1},
{_id: 1, a: 2},
{_id: 2, a: 3},
]),
);
assert.commandWorked(
c3.insertMany([
{_id: 0, a: 1, obj: {array: [1, 2, 3], scalar: 1}},
{_id: 1, a: 2, obj: {array: [], scalar: 2}},
{_id: 2, a: 3, obj: {}},
]),
);
runTestWithUnorderedComparison({
db,
description: "No arrayness (no indexes) => no joinopt (2 node, no suffix)",
coll: c1,
pipeline: [
{
$lookup: {
from: c2.getName(),
localField: "neverArray",
foreignField: "a",
as: "x",
},
},
{$unwind: "$x"},
],
expectedResults: [
{
"_id": 0,
"alwaysArray": [],
"sometimesArray": 3,
"neverArray": 1,
"x": {"_id": 0, "a": 1},
obj: {array: [1, 2, 3], scalar: 1},
},
{
"_id": 1,
"alwaysArray": [1, 2, 3],
"sometimesArray": 2,
"neverArray": 1,
"x": {"_id": 0, "a": 1},
obj: {},
},
{
"_id": 2,
"alwaysArray": [2, 3],
"sometimesArray": [3, 4],
"neverArray": 1,
"x": {"_id": 0, "a": 1},
obj: {array: [], scalar: 2},
},
],
expectedUsedJoinOptimization: false,
});
runTestWithUnorderedComparison({
db,
description: "No arrayness => no joinopt ($expr, 2 node, no suffix)",
coll: c1,
pipeline: [
{
$lookup: {
from: c2.getName(),
as: "x",
let: {neverArray: "$neverArray"},
pipeline: [{$match: {$expr: {$eq: ["$a", "$$neverArray"]}}}],
},
},
{$unwind: "$x"},
],
expectedResults: [
{
"_id": 0,
"alwaysArray": [],
"sometimesArray": 3,
"neverArray": 1,
"obj": {
"array": [1, 2, 3],
"scalar": 1,
},
"x": {
"_id": 0,
"a": 1,
},
},
{
"_id": 1,
"alwaysArray": [1, 2, 3],
"sometimesArray": 2,
"neverArray": 1,
"obj": {},
"x": {
"_id": 0,
"a": 1,
},
},
{
"_id": 2,
"alwaysArray": [2, 3],
"sometimesArray": [3, 4],
"neverArray": 1,
"obj": {
"array": [],
"scalar": 2,
},
"x": {
"_id": 0,
"a": 1,
},
},
],
expectedUsedJoinOptimization: false,
});
runTestWithUnorderedComparison({
db,
description: "No arrayness => no joinopt (2 node, suffix)",
coll: c1,
pipeline: [
{
$lookup: {
from: c2.getName(),
localField: "neverArray",
foreignField: "a",
as: "x",
},
},
{$unwind: "$x"},
{$project: {_id: 0, obj: 0}},
],
expectedResults: [
{"alwaysArray": [], "sometimesArray": 3, "neverArray": 1, "x": {"_id": 0, "a": 1}},
{"alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1, "x": {"_id": 0, "a": 1}},
{"alwaysArray": [2, 3], "sometimesArray": [3, 4], "neverArray": 1, "x": {"_id": 0, "a": 1}},
],
expectedUsedJoinOptimization: false,
});
runTestWithUnorderedComparison({
db,
description: "No arrayness => no joinopt ($expr, 2 node, suffix)",
coll: c1,
pipeline: [
{
$lookup: {
from: c2.getName(),
as: "x",
let: {neverArray: "$neverArray"},
pipeline: [{$match: {$expr: {$eq: ["$$neverArray", "$a"]}}}],
},
},
{$unwind: "$x"},
{$project: {_id: 0, obj: 0}},
],
expectedResults: [
{"alwaysArray": [], "sometimesArray": 3, "neverArray": 1, "x": {"_id": 0, "a": 1}},
{"alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1, "x": {"_id": 0, "a": 1}},
{"alwaysArray": [2, 3], "sometimesArray": [3, 4], "neverArray": 1, "x": {"_id": 0, "a": 1}},
],
expectedUsedJoinOptimization: false,
});
assert.commandWorked(c1.createIndex({neverArray: 1}));
runTestWithUnorderedComparison({
db,
description: "No arrayness on foreign field => no joinopt (2 node, suffix)",
coll: c1,
pipeline: [
{
$lookup: {
from: c2.getName(),
localField: "neverArray",
foreignField: "a",
as: "x",
},
},
{$unwind: "$x"},
{$project: {_id: 0, obj: 0}},
],
expectedResults: [
{"alwaysArray": [], "sometimesArray": 3, "neverArray": 1, "x": {"_id": 0, "a": 1}},
{"alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1, "x": {"_id": 0, "a": 1}},
{"alwaysArray": [2, 3], "sometimesArray": [3, 4], "neverArray": 1, "x": {"_id": 0, "a": 1}},
],
expectedUsedJoinOptimization: false,
});
runTestWithUnorderedComparison({
db,
description: "No arrayness on foreign field => no joinopt ($expr, 2 node, suffix)",
coll: c1,
pipeline: [
{
$lookup: {
from: c2.getName(),
as: "x",
let: {neverArray: "$neverArray"},
pipeline: [{$match: {$expr: {$eq: ["$$neverArray", "$a"]}}}],
},
},
{$unwind: "$x"},
{$project: {_id: 0, obj: 0}},
],
expectedResults: [
{"alwaysArray": [], "sometimesArray": 3, "neverArray": 1, "x": {"_id": 0, "a": 1}},
{"alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1, "x": {"_id": 0, "a": 1}},
{"alwaysArray": [2, 3], "sometimesArray": [3, 4], "neverArray": 1, "x": {"_id": 0, "a": 1}},
],
expectedUsedJoinOptimization: false,
});
assert.commandWorked(c2.createIndex({a: 1}));
runTestWithUnorderedComparison({
db,
description: "No arrayness on local field => no joinopt (2 node, no suffix)",
coll: c1,
pipeline: [
{
$lookup: {
from: c2.getName(),
localField: "sometimesArray",
foreignField: "a",
as: "x",
},
},
{$unwind: "$x"},
],
expectedResults: [
{
"_id": 0,
"alwaysArray": [],
"sometimesArray": 3,
"neverArray": 1,
"obj": {
"array": [1, 2, 3],
"scalar": 1,
},
"x": {
"_id": 2,
"a": 3,
},
},
{
"_id": 1,
"alwaysArray": [1, 2, 3],
"sometimesArray": 2,
"neverArray": 1,
"obj": {},
"x": {
"_id": 1,
"a": 2,
},
},
{
"_id": 2,
"alwaysArray": [2, 3],
"sometimesArray": [3, 4],
"neverArray": 1,
"obj": {
"array": [],
"scalar": 2,
},
"x": {
"_id": 2,
"a": 3,
},
},
],
expectedUsedJoinOptimization: false,
});
runTestWithUnorderedComparison({
db,
description: "No arrayness on local field => no joinopt ($expr, 2 node, no suffix)",
coll: c1,
pipeline: [
{
$lookup: {
from: c2.getName(),
as: "x",
let: {sometimesArray: "$sometimesArray"},
pipeline: [{$match: {$expr: {$eq: ["$$sometimesArray", "$a"]}}}],
},
},
{$unwind: "$x"},
],
expectedResults: [
{
"_id": 0,
"alwaysArray": [],
"sometimesArray": 3,
"neverArray": 1,
"obj": {
"array": [1, 2, 3],
"scalar": 1,
},
"x": {
"_id": 2,
"a": 3,
},
},
{
"_id": 1,
"alwaysArray": [1, 2, 3],
"sometimesArray": 2,
"neverArray": 1,
"obj": {},
"x": {
"_id": 1,
"a": 2,
},
},
],
expectedUsedJoinOptimization: false,
});
runTestWithUnorderedComparison({
db,
description: "Arrayness on all fields => join opt (2 node, suffix)",
coll: c1,
pipeline: [
{
$lookup: {
from: c2.getName(),
localField: "neverArray",
foreignField: "a",
as: "x",
},
},
{$unwind: "$x"},
{$project: {_id: 0, obj: 0}},
],
expectedResults: [
{"alwaysArray": [], "sometimesArray": 3, "neverArray": 1, "x": {"_id": 0, "a": 1}},
{"alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1, "x": {"_id": 0, "a": 1}},
{"alwaysArray": [2, 3], "sometimesArray": [3, 4], "neverArray": 1, "x": {"_id": 0, "a": 1}},
],
expectedUsedJoinOptimization: true,
expectedNumJoinStages: 1,
});
runTestWithUnorderedComparison({
db,
description: "Arrayness on all fields => join opt ($expr, 2 node, suffix)",
coll: c1,
pipeline: [
{
$lookup: {
from: c2.getName(),
as: "x",
let: {na: "$neverArray"},
pipeline: [{$match: {$expr: {$eq: ["$$na", "$a"]}}}],
},
},
{$unwind: "$x"},
{$project: {_id: 0, obj: 0}},
],
expectedResults: [
{"alwaysArray": [], "sometimesArray": 3, "neverArray": 1, "x": {"_id": 0, "a": 1}},
{"alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1, "x": {"_id": 0, "a": 1}},
{"alwaysArray": [2, 3], "sometimesArray": [3, 4], "neverArray": 1, "x": {"_id": 0, "a": 1}},
],
expectedUsedJoinOptimization: true,
expectedNumJoinStages: 1,
});
runTestWithUnorderedComparison({
db,
description: "Arrayness on all fields => join opt (2 node, suffix)",
coll: c1,
pipeline: [
{
$lookup: {
from: c2.getName(),
localField: "neverArray",
foreignField: "a",
as: "sometimesArray", // Arrayness of "as" field doesn't matter.
},
},
{$unwind: "$sometimesArray"},
{$project: {_id: 0, obj: 0}},
],
expectedResults: [
{"alwaysArray": [], "sometimesArray": {"_id": 0, "a": 1}, "neverArray": 1},
{"alwaysArray": [1, 2, 3], "sometimesArray": {"_id": 0, "a": 1}, "neverArray": 1},
{"alwaysArray": [2, 3], "sometimesArray": {"_id": 0, "a": 1}, "neverArray": 1},
],
expectedUsedJoinOptimization: true,
expectedNumJoinStages: 1,
});
runTestWithUnorderedComparison({
db,
description: "Arrayness on all fields => join opt ($expr, 2 node, suffix)",
coll: c1,
pipeline: [
{
$lookup: {
from: c2.getName(),
as: "sometimesArray", // Arrayness of "as" field doesn't matter.
let: {na: "$neverArray"},
pipeline: [{$match: {$expr: {$eq: ["$$na", "$a"]}}}],
},
},
{$unwind: "$sometimesArray"},
{$project: {_id: 0, obj: 0}},
],
expectedResults: [
{"alwaysArray": [], "sometimesArray": {"_id": 0, "a": 1}, "neverArray": 1},
{"alwaysArray": [1, 2, 3], "sometimesArray": {"_id": 0, "a": 1}, "neverArray": 1},
{"alwaysArray": [2, 3], "sometimesArray": {"_id": 0, "a": 1}, "neverArray": 1},
],
expectedUsedJoinOptimization: true,
expectedNumJoinStages: 1,
});
assert.commandWorked(c1.createIndexes([{sometimesArray: -1}, {alwaysArray: 1}]));
runTestWithUnorderedComparison({
db,
description: "Arrayness on all fields, multikey localField => no join opt (2 node, suffix)",
coll: c1,
pipeline: [
{
$lookup: {
from: c2.getName(),
localField: "sometimesArray",
foreignField: "a",
as: "y",
},
},
{$unwind: "$y"},
{$project: {_id: 0, obj: 0}},
],
expectedResults: [
{"alwaysArray": [], "sometimesArray": 3, "neverArray": 1, "y": {"_id": 2, "a": 3}},
{"alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1, "y": {"_id": 1, "a": 2}},
{"alwaysArray": [2, 3], "sometimesArray": [3, 4], "neverArray": 1, "y": {"_id": 2, "a": 3}},
],
expectedUsedJoinOptimization: false,
});
runTestWithUnorderedComparison({
db,
description: "Arrayness on all fields, multikey localField => no join opt ($expr, 2 node, suffix)",
coll: c1,
pipeline: [
{
$lookup: {
from: c2.getName(),
as: "y",
let: {sa: "$sometimesArray"},
pipeline: [{$match: {$expr: {$eq: ["$a", "$$sa"]}}}],
},
},
{$unwind: "$y"},
{$project: {_id: 0, obj: 0}},
],
expectedResults: [
{"alwaysArray": [], "sometimesArray": 3, "neverArray": 1, "y": {"_id": 2, "a": 3}},
{"alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1, "y": {"_id": 1, "a": 2}},
],
expectedUsedJoinOptimization: false,
});
runTestWithUnorderedComparison({
db,
description: "Arrayness on all fields, multikey foreignField => no join opt (2 node, suffix)",
coll: c2,
pipeline: [
{
$lookup: {
from: c1.getName(),
localField: "a",
foreignField: "alwaysArray",
as: "y",
},
},
{$unwind: "$y"},
{$project: {_id: 0, "y.obj": 0}},
],
expectedResults: [
{"a": 1, "y": {"_id": 1, "alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1}},
{"a": 2, "y": {"_id": 1, "alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1}},
{"a": 2, "y": {"_id": 2, "alwaysArray": [2, 3], "sometimesArray": [3, 4], "neverArray": 1}},
{"a": 3, "y": {"_id": 1, "alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1}},
{"a": 3, "y": {"_id": 2, "alwaysArray": [2, 3], "sometimesArray": [3, 4], "neverArray": 1}},
],
expectedUsedJoinOptimization: false,
});
runTestWithUnorderedComparison({
db,
description: "Arrayness on all fields, multikey foreignField => no join opt ($expr, 2 node, suffix)",
coll: c2,
pipeline: [
{
$lookup: {
from: c1.getName(),
as: "y",
let: {aaa: "$a"},
pipeline: [{$match: {$expr: {$eq: ["$alwaysArray", "$$aaa"]}}}],
},
},
{$unwind: "$y"},
{$project: {_id: 0, obj: 0}},
],
expectedResults: [],
expectedUsedJoinOptimization: false,
});
runTestWithUnorderedComparison({
db,
description: "Arrayness on all fields, multikey foreignField/localField => no join opt (2 node, suffix)",
coll: c1,
pipeline: [
{
$lookup: {
from: c1.getName(),
localField: "sometimesArray",
foreignField: "alwaysArray",
as: "y",
},
},
{$unwind: "$y"},
{$project: {_id: 0, obj: 0, "y.obj": 0}},
],
expectedResults: [
{
"alwaysArray": [],
"sometimesArray": 3,
"neverArray": 1,
"y": {"_id": 1, "alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1},
},
{
"alwaysArray": [],
"sometimesArray": 3,
"neverArray": 1,
"y": {"_id": 2, "alwaysArray": [2, 3], "sometimesArray": [3, 4], "neverArray": 1},
},
{
"alwaysArray": [1, 2, 3],
"sometimesArray": 2,
"neverArray": 1,
"y": {"_id": 1, "alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1},
},
{
"alwaysArray": [1, 2, 3],
"sometimesArray": 2,
"neverArray": 1,
"y": {"_id": 2, "alwaysArray": [2, 3], "sometimesArray": [3, 4], "neverArray": 1},
},
{
"alwaysArray": [2, 3],
"sometimesArray": [3, 4],
"neverArray": 1,
"y": {"_id": 1, "alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1},
},
{
"alwaysArray": [2, 3],
"sometimesArray": [3, 4],
"neverArray": 1,
"y": {"_id": 2, "alwaysArray": [2, 3], "sometimesArray": [3, 4], "neverArray": 1},
},
],
expectedUsedJoinOptimization: false,
});
runTestWithUnorderedComparison({
db,
description: "Arrayness on all fields, multikey foreignField/localField => no join opt ($expr, 2 node, suffix)",
coll: c1,
pipeline: [
{
$lookup: {
from: c1.getName(),
as: "y",
let: {sa: "$sometimesArray"},
pipeline: [{$match: {$expr: {$eq: ["$alwaysArray", "$$sa"]}}}],
},
},
{$unwind: "$y"},
],
expectedResults: [],
expectedUsedJoinOptimization: false,
});
// Ensure we have arrayness info for c3 & obj field in c1.
assert.commandWorked(c3.createIndex({a: -1, obj: 1}));
assert.commandWorked(c1.createIndex({obj: 1}));
runTestWithUnorderedComparison({
db,
description: "As field has an array subfield, used in subsequent join => no join opt in suffix",
coll: c2,
pipeline: [
// This is ok, should use join opt.
{
$lookup: {
from: c1.getName(),
localField: "a",
foreignField: "neverArray",
as: "y",
},
},
{$unwind: "$y"},
// Prefix should end here: next predicate involves an array.
{
$lookup: {
from: c3.getName(),
localField: "y.sometimesArray",
foreignField: "a",
as: "z",
},
},
{$unwind: "$z"},
{$project: {"y.obj": 0, "z.obj": 0}},
],
expectedResults: [
{
"_id": 0,
"a": 1,
"y": {
"_id": 0,
"alwaysArray": [],
"sometimesArray": 3,
"neverArray": 1,
},
"z": {
"_id": 2,
"a": 3,
},
},
{
"_id": 0,
"a": 1,
"y": {
"_id": 1,
"alwaysArray": [1, 2, 3],
"sometimesArray": 2,
"neverArray": 1,
},
"z": {
"_id": 1,
"a": 2,
},
},
{
"_id": 0,
"a": 1,
"y": {
"_id": 2,
"alwaysArray": [2, 3],
"sometimesArray": [3, 4],
"neverArray": 1,
},
"z": {
"_id": 2,
"a": 3,
},
},
],
expectedUsedJoinOptimization: true,
expectedNumJoinStages: 1, // We should not see a second!
});
runTestWithUnorderedComparison({
db,
description: "As field has scalar subfield, used in subsequent join => no join opt in suffix",
coll: c2,
pipeline: [
// This is ok, should use join opt.
{
$lookup: {
from: c1.getName(),
as: "y",
let: {aaa: "$a"},
pipeline: [{$match: {$expr: {$eq: ["$$aaa", "$neverArray"]}}}],
},
},
{$unwind: "$y"},
// The following is as well! We should have the whole pipeline in our eligible prefix.
{
$lookup: {
from: c3.getName(),
localField: "y.neverArray",
foreignField: "a",
as: "z",
},
},
{$unwind: "$z"},
{$project: {"y.obj": 0, "z.obj": 0}},
],
expectedResults: [
{
"_id": 0,
"a": 1,
"y": {
"_id": 0,
"alwaysArray": [],
"sometimesArray": 3,
"neverArray": 1,
},
"z": {
"_id": 0,
"a": 1,
},
},
{
"_id": 0,
"a": 1,
"y": {
"_id": 1,
"alwaysArray": [1, 2, 3],
"sometimesArray": 2,
"neverArray": 1,
},
"z": {
"_id": 0,
"a": 1,
},
},
{
"_id": 0,
"a": 1,
"y": {
"_id": 2,
"alwaysArray": [2, 3],
"sometimesArray": [3, 4],
"neverArray": 1,
},
"z": {
"_id": 0,
"a": 1,
},
},
],
expectedUsedJoinOptimization: true,
expectedNumJoinStages: 2, // Both $lookups should be pushed down!
});
runTestWithUnorderedComparison({
db,
description: "Test arrayness check works for subfields + compound join predicates.",
coll: c2,
pipeline: [
// This is ok, should use join opt.
{
$lookup: {
from: c1.getName(),
as: "y",
let: {aaa: "$a"},
pipeline: [
{
$match: {
$expr: {
$and: [
{$eq: ["$$aaa", "$neverArray"]},
{$gt: ["$sometimesArray", 0]}, // Residual predicate, should still be ok.
],
},
},
},
],
},
},
{$unwind: "$y"},
// The following should be ok as well.
{
$lookup: {
from: c3.getName(),
as: "z",
let: {ooo: "$y.obj"},
pipeline: [{$match: {$expr: {$eq: ["$obj", "$$ooo"]}}}],
},
},
{$unwind: "$z"},
// But not this (since we don't have arrayness for obj.scalar).
{
$lookup: {
from: c1.getName(),
as: "w",
let: {ooo: "$z.obj.scalar"},
pipeline: [{$match: {$expr: {$eq: ["$neverArray", "$$ooo"]}}}],
},
},
{$unwind: "$w"},
],
expectedResults: [
{
"_id": 0,
"a": 1,
"y": {
"_id": 0,
"alwaysArray": [],
"sometimesArray": 3,
"neverArray": 1,
"obj": {
"array": [1, 2, 3],
"scalar": 1,
},
},
"z": {
"_id": 0,
"a": 1,
"obj": {
"array": [1, 2, 3],
"scalar": 1,
},
},
"w": {
"_id": 0,
"alwaysArray": [],
"sometimesArray": 3,
"neverArray": 1,
"obj": {
"array": [1, 2, 3],
"scalar": 1,
},
},
},
{
"_id": 0,
"a": 1,
"y": {
"_id": 0,
"alwaysArray": [],
"sometimesArray": 3,
"neverArray": 1,
"obj": {
"array": [1, 2, 3],
"scalar": 1,
},
},
"z": {
"_id": 0,
"a": 1,
"obj": {
"array": [1, 2, 3],
"scalar": 1,
},
},
"w": {
"_id": 1,
"alwaysArray": [1, 2, 3],
"sometimesArray": 2,
"neverArray": 1,
"obj": {},
},
},
{
"_id": 0,
"a": 1,
"y": {
"_id": 0,
"alwaysArray": [],
"sometimesArray": 3,
"neverArray": 1,
"obj": {
"array": [1, 2, 3],
"scalar": 1,
},
},
"z": {
"_id": 0,
"a": 1,
"obj": {
"array": [1, 2, 3],
"scalar": 1,
},
},
"w": {
"_id": 2,
"alwaysArray": [2, 3],
"sometimesArray": [3, 4],
"neverArray": 1,
"obj": {
"array": [],
"scalar": 2,
},
},
},
],
expectedUsedJoinOptimization: true,
expectedNumJoinStages: 2,
});
// Disabling internalEnablePathArrayness should prevent join optimization from using arrayness
// info, so a query that previously qualified for joinopt must no longer qualify.
assert.commandWorked(db.adminCommand({setParameter: 1, internalEnablePathArrayness: false}));
runTestWithUnorderedComparison({
db,
description: "internalEnablePathArrayness=false => no joinopt even when arrayness is known",
coll: c1,
pipeline: [
{
$lookup: {
from: c2.getName(),
localField: "neverArray",
foreignField: "a",
as: "x",
},
},
{$unwind: "$x"},
{$project: {_id: 0, obj: 0}},
],
expectedResults: [
{"alwaysArray": [], "sometimesArray": 3, "neverArray": 1, "x": {"_id": 0, "a": 1}},
{"alwaysArray": [1, 2, 3], "sometimesArray": 2, "neverArray": 1, "x": {"_id": 0, "a": 1}},
{"alwaysArray": [2, 3], "sometimesArray": [3, 4], "neverArray": 1, "x": {"_id": 0, "a": 1}},
],
expectedUsedJoinOptimization: false,
});
});
MongoRunner.stopMongod(conn);

View File

@ -2,9 +2,8 @@
// Validate that join optimization does not run on sharded collections.
//
// @tags: [
// requires_fcv_90,
// requires_sbe,
// featureFlagPathArrayness
// requires_fcv_83,
// requires_sbe
// ]
//
import {joinOptUsed} from "jstests/libs/query/join_utils.js";
@ -31,10 +30,10 @@ const db = sharded.getDB("test");
sharded.shard0.getDB("test").setLogLevel(5, "query");
sharded.shard1.getDB("test").setLogLevel(5, "query");
const docs = [{f1: "aaa", f2: 123}, {f1: "bbb", f2: 0}, {f2: -1}, {f1: "zzz"}];
for (const coll of ["coll1", "coll2", "coll3", "coll4"]) {
assert.commandWorked(db[coll].insertMany(docs));
assert.commandWorked(db[coll].createIndex({"dummy": 1, "f1": 1, "f2": -1}));
}
assert.commandWorked(db["coll1"].insertMany(docs));
assert.commandWorked(db["coll2"].insertMany(docs));
assert.commandWorked(db["coll3"].insertMany(docs));
assert.commandWorked(db["coll4"].insertMany(docs));
// Ensure join optimization is disabled.
assert(sharded.shard0.getDB("test").adminCommand({setParameter: 1, internalEnableJoinOptimization: false}));

View File

@ -7,6 +7,7 @@
* requires_timeseries,
* requires_persistence,
* requires_fcv_80,
* featureFlagMarkTimeseriesEventsInOplog_incompatible,
* ]
*/
import {skipTestIfViewlessTimeseriesEnabled} from "jstests/core/timeseries/libs/viewless_timeseries_util.js";

View File

@ -271,7 +271,8 @@ testSpillingMetrics({
if (FeatureFlagUtil.isPresentAndEnabled(db, "ExtendedAutoSpilling")) {
testSpillingMetrics({
stageName: "geoNear",
expectedSpillingMetrics: {spills: 4, spilledBytes: 648},
expectedSpillingMetrics: {spills: 6, spilledBytes: 648},
expectedSbeSpillingMetrics: {spills: 20, spilledBytes: 1130},
collName: geoCollName,
});
}

View File

@ -7,6 +7,7 @@
* does_not_support_stepdowns,
* does_not_support_transactions,
* requires_replication,
* featureFlagMarkTimeseriesEventsInOplog_incompatible,
* ]
*/
import {skipTestIfViewlessTimeseriesEnabled} from "jstests/core/timeseries/libs/viewless_timeseries_util.js";

View File

@ -5,7 +5,6 @@
* supported anymore.
*
* @tags: [
* requires_fcv_90,
* featureFlagCreateViewlessTimeseriesCollections,
* ]
*/

View File

@ -805,16 +805,16 @@
"winningPlan": [
"INLJ supplier.s_suppkey = ps_suppkey",
" -> [none] INLJ nation_s.n_nationkey = s_nationkey",
" -> [none] INLJ n_regionkey = r_regionkey",
" -> [none] NLJ r_regionkey = n_regionkey",
" -> [region_s] FETCH: plan_stability_tpch_fuzzed.region ",
" -> IXSCAN: plan_stability_tpch_fuzzed.region r_regionkey_1 {'r_regionkey':['[4.0, 4.0]']}",
" -> [nation_s] COLLSCAN: plan_stability_tpch_fuzzed.nation {'$nor':[{'n_name':{'$eq':'ALGERIA'}},{'n_regionkey':{'$eq':1}},{'n_name':{'$in':['EGYPT','INDONESIA']}},{'n_name':{'$not':{'$in':['CHINA','ROMANIA','SAUDI ARABIA']}}}]} ",
" -> [region_s] FETCH: plan_stability_tpch_fuzzed.region {'r_regionkey':{'$eq':4}} ",
" -> INDEX_PROBE_NODE: plan_stability_tpch_fuzzed.region r_regionkey_1",
" -> [supplier] FETCH: plan_stability_tpch_fuzzed.supplier {'$nor':[{'s_nationkey':{'$eq':1}},{'s_nationkey':{'$eq':14}},{'s_acctbal':{'$gte':2060.13}}]} ",
" -> INDEX_PROBE_NODE: plan_stability_tpch_fuzzed.supplier s_nationkey_1",
" -> [none] FETCH: plan_stability_tpch_fuzzed.partsupp {'ps_comment':{'$regex':'^he'}} ",
" -> INDEX_PROBE_NODE: plan_stability_tpch_fuzzed.partsupp ps_suppkey_1"],
"keys" : 930,
"docs" : 955,
"keys" : 928,
"docs" : 953,
"rows" : 11,
"csum" : "dd2566c247c4e610"},
@ -1923,16 +1923,17 @@
{"$unwind":"$region_s"},
{"$match":{"$nor":[{"nation_s.n_regionkey":1}]}}],"cursor":{},"idx":78},
"winningPlan": [
"HJ partsupp.ps_partkey = p_partkey",
"INLJ partsupp.ps_partkey = p_partkey",
" -> [none] INLJ s_suppkey = ps_suppkey",
" -> [supplier] COLLSCAN: plan_stability_tpch_fuzzed.supplier {'$and':[{'$or':[{'s_acctbal':{'$lte':-685.94}},{'s_nationkey':{'$in':[2,17]}}]},{'$nor':[{'s_nationkey':{'$eq':5}},{'s_acctbal':{'$gt':9166.95}},{'s_acctbal':{'$gt':9537.73}}]}]} ",
" -> [partsupp] FETCH: plan_stability_tpch_fuzzed.partsupp {'$and':[{'ps_supplycost':{'$lte':161.52}},{'ps_availqty':{'$lt':854}}]} ",
" -> INDEX_PROBE_NODE: plan_stability_tpch_fuzzed.partsupp ps_suppkey_1",
" -> [none] COLLSCAN: plan_stability_tpch_fuzzed.part ",
" -> [none] FETCH: plan_stability_tpch_fuzzed.part ",
" -> INDEX_PROBE_NODE: plan_stability_tpch_fuzzed.part p_partkey_1",
"$lookup nation n_nationkey_1",
"$lookup region r_regionkey_1"],
"keys" : 7892,
"docs" : 28892,
"keys" : 8003,
"docs" : 9003,
"rows" : 6,
"csum" : "d5151374bbc944dd"},
@ -4136,18 +4137,19 @@
{"$unwind":"$region_s"},
{"$match":{"$or":[{"nation_s.n_name":"ALGERIA"},{"partsupp.ps_comment":{"$regex":{"$regex":"^ a","$options":""}}},{"p_mfgr":"Manufacturer#3"},{"supplier.s_acctbal":{"$eq":5322.35}}]}}],"cursor":{},"idx":173},
"winningPlan": [
"HJ partsupp.ps_partkey = p_partkey",
"INLJ partsupp.ps_partkey = p_partkey",
" -> [none] INLJ supplier.s_suppkey = ps_suppkey",
" -> [none] HJ s_nationkey = n_nationkey",
" -> [supplier] COLLSCAN: plan_stability_tpch_fuzzed.supplier {'$and':[{'$or':[{'s_name':{'$eq':'Supplier#000000537'}},{'s_acctbal':{'$in':[1871.86,8924.02]}}]},{'s_acctbal':{'$lt':7627.85}},{'s_nationkey':{'$not':{'$eq':16}}}]} ",
" -> [nation_s] COLLSCAN: plan_stability_tpch_fuzzed.nation {'$or':[{'n_regionkey':{'$eq':4}},{'n_name':{'$not':{'$in':['ARGENTINA','INDONESIA']}}}]} ",
" -> [partsupp] FETCH: plan_stability_tpch_fuzzed.partsupp {'ps_availqty':{'$gt':3991}} ",
" -> INDEX_PROBE_NODE: plan_stability_tpch_fuzzed.partsupp ps_suppkey_1",
" -> [none] COLLSCAN: plan_stability_tpch_fuzzed.part ",
" -> [none] FETCH: plan_stability_tpch_fuzzed.part ",
" -> INDEX_PROBE_NODE: plan_stability_tpch_fuzzed.part p_partkey_1",
"$match",
"$lookup region r_regionkey_1"],
"keys" : 89,
"docs" : 21114,
"keys" : 141,
"docs" : 1166,
"rows" : 9,
"csum" : "4e49e200a7760e55"},
@ -5105,14 +5107,14 @@
{"$unwind":"$lineitem"},
{"$match":{"$and":[{"p_container":{"$in":["JUMBO PKG","LG JAR","WRAP DRUM"]}}]}}],"cursor":{},"idx":218},
"winningPlan": [
"INLJ partsupp.ps_partkey = l_partkey, p_partkey = l_partkey",
"HJ partsupp.ps_partkey = l_partkey, p_partkey = l_partkey",
" -> [none] HJ ps_partkey = p_partkey",
" -> [partsupp] COLLSCAN: plan_stability_tpch_fuzzed.partsupp {'ps_comment':{'$regex':'^pe'}} ",
" -> [none] COLLSCAN: plan_stability_tpch_fuzzed.part {'p_container':{'$in':['JUMBO PKG','LG JAR','WRAP DRUM']}} ",
" -> [lineitem] FETCH: plan_stability_tpch_fuzzed.lineitem {'$and':[{'l_shipdate':{'$lt':'1993-05-14T00:00:00.000Z'}},{'l_extendedprice':{'$gte':75749.12}},{'l_shipmode':{'$in':['FOB','MAIL','RAIL','REG AIR']}},{'$nor':[{'l_orderkey':{'$eq':232065}},{'l_quantity':{'$eq':35}},{'l_shipinstruct':{'$eq':'TAKE BACK RETURN'}},{'l_shipmode':{'$eq':'TRUCK'}},{'l_suppkey':{'$eq':543}},{'l_commitdate':{'$lt':'1992-11-21T00:00:00.000Z'}}]}]} ",
" -> INDEX_PROBE_NODE: plan_stability_tpch_fuzzed.lineitem l_partkey_1"],
"keys" : 1182,
"docs" : 101182,
" -> [lineitem] FETCH: plan_stability_tpch_fuzzed.lineitem {'$and':[{'$nor':[{'l_commitdate':{'$lt':'1992-11-21T00:00:00.000Z'}},{'l_orderkey':{'$eq':232065}},{'l_quantity':{'$eq':35}},{'l_shipinstruct':{'$eq':'TAKE BACK RETURN'}},{'l_shipmode':{'$eq':'TRUCK'}},{'l_suppkey':{'$eq':543}}]},{'l_extendedprice':{'$gte':75749.12}},{'l_shipmode':{'$in':['FOB','MAIL','RAIL','REG AIR']}}]} ",
" -> IXSCAN: plan_stability_tpch_fuzzed.lineitem l_shipdate_1 {'l_shipdate':['[new Date(-9223372036854775808), new Date(737337600000))']}"],
"keys" : 108907,
"docs" : 208907,
"rows" : 1,
"csum" : "da15757b8d3a41c6"},
@ -5322,7 +5324,7 @@
"csum" : "ca7f0bfcb0e88d85"}
],
">>>totals": {"commands": 222, "keys": 1418505, "docs": 14600273, "rows": 41006}
">>>totals": {"commands": 222, "keys": 1526391, "docs": 14668159, "rows": 41006}
}

View File

@ -1,9 +1,7 @@
//
// Test that the cardinality estimates for two-table joins approximate reality
// @tags: [
// requires_sbe,
// featureFlagPathArrayness,
// requires_fcv_90
// requires_sbe
// ]
//
@ -35,7 +33,6 @@ function populate() {
});
}
db.many_rows.drop();
db.many_rows.insertMany(documents);
db.many_rows.createIndex({i_idx: 1});
db.many_rows.createIndex({i_idx_offset: 1});
@ -43,23 +40,16 @@ function populate() {
db.many_rows.createIndex({c_idx: 1});
db.many_rows.createIndex({d_idx: 1});
db.many_rows.createIndex({n_idx: 1});
// Not used in planning, but needed for multikeyness info.
db.many_rows.createIndex({dummy: 1, i_noidx: -1, missing_field: 1});
// An empty collection
db.no_rows.drop();
db.no_rows.createIndex({i_idx: 1});
// Not used in planning, but needed for multikeyness info.
db.no_rows.createIndex({dummy: 1, i_idx_offset: -1});
// Collection with a single row
db.one_row.drop();
db.one_row.insert({i_idx: 1});
db.one_row.createIndex({i_idx: 1});
// Collection with 1 non-null document
const nullDocuments = [];
db.mostly_nulls.drop();
db.mostly_nulls.insert({i_idx: 1});
for (let i = 0; i < collSize; i++) {
nullDocuments.push({

View File

@ -37,7 +37,7 @@ const thirdColl = db[jsTestName() + "_third"];
thirdColl.drop();
assert.commandWorked(thirdColl.insertMany(testDocs));
// Add index for multikeyness info for path arrayness.
assert.commandWorked(thirdColl.createIndex({dummy: 1, "foo": 1, "key.foo": 1}));
assert.commandWorked(thirdColl.createIndex({dummy: 1, "key.foo": 1}));
const testCases = [
{

View File

@ -316,9 +316,6 @@ const animalsDocs = [
assert.commandWorked(locations.insertMany(locationsDocs));
assert.commandWorked(animals.insertMany(animalsDocs));
// Dummy indexes used for multikeyness info by join opt.
assert.commandWorked(animals.createIndex({"dummy": -1, "locationName": -1}));
assert.commandWorked(locations.createIndex({"dummy": 1, "name": -1}));
outputPipelineAndSlowQueryLog(
animals,
[

View File

@ -5,7 +5,6 @@
* @tags: [
* requires_mongobridge,
* requires_timeseries,
* requires_fcv_90,
* featureFlagCreateViewlessTimeseriesCollections,
* multiversion_incompatible,
* ]

View File

@ -974,7 +974,6 @@ mongo_cc_library(
"//src/mongo/db/query/compiler/ce/sampling:ce_multikey_dotted_path_support",
"//src/mongo/db/query/compiler/ce/sampling:sampling_estimator_interface",
"//src/mongo/db/query/compiler/ce/sampling:sampling_math",
"//src/mongo/db/query/compiler/dependency_analysis:pipeline_dependency_graph",
"//src/mongo/db/query/compiler/optimizer/cost_based_ranker:estimates",
"//src/mongo/db/query/compiler/optimizer/join:plan_enumerator",
"//src/mongo/db/query/compiler/optimizer/join:reorder_joins",

View File

@ -116,7 +116,7 @@ bool RandomSamplingStrategy::performSampling(OperationContext* opCtx,
// The first method to try when populating the 'markersMap'. Note: sampling can fall back to
// scanning if the cached collection sizes aren't accurate.
const auto initialCreationMethod = CollectionTruncateMarkers::computeInitialCreationMethod(
numRecords, dataSize, _minBytesPerMarker, false /* forceScanning */);
numRecords, dataSize, _minBytesPerMarker);
LOGV2_INFO(7658604,
"Decided on initial creation method for pre-images truncate markers initialization",
"initialCreationMethod"_attr =

View File

@ -163,8 +163,7 @@ void cloneCollectionAsCapped(OperationContext* opCtx,
static_cast<long long>(
toCollection.getCollectionPtr()->getRecordStore()->storageSize(ru) * 2));
long long excessSize =
fromCollection.getCollectionPtr()->latestSizeCount(opCtx).size - allocatedSpaceGuess;
long long excessSize = fromCollection.getCollectionPtr()->dataSize(opCtx) - allocatedSpaceGuess;
auto exec =
InternalPlanner::collectionScan(opCtx,

View File

@ -482,8 +482,6 @@ public:
BOOL_FIELD_BUILDER_FN(supportsCursorReuseForExpressPathQueries) \
BOOL_FIELD_BUILDER_FN(supportsFindAndModifyImageCollection) \
BOOL_FIELD_BUILDER_FN(supportsLocalCollections) \
BOOL_FIELD_BUILDER_FN(supportsPersistentOplogCapMaintainerThread) \
BOOL_FIELD_BUILDER_FN(supportsAsyncOplogMarkerGeneration) \
BOOL_FIELD_BUILDER_FN(supportsOplogSampling) \
BOOL_FIELD_BUILDER_FN(supportsPreservingPreparedTxnInPreciseCheckpoints) \
BOOL_FIELD_BUILDER_FN(supportsTableLogging) \

View File

@ -303,6 +303,7 @@ bool NearStage::isEOF() const {
std::unique_ptr<PlanStageStats> NearStage::getStats() {
auto ret = std::make_unique<PlanStageStats>(_commonStats, _stageType);
updateSpillingStats();
ret->specific = std::make_unique<NearStats>(_specificStats);
for (size_t i = 0; i < _childrenIntervals.size(); ++i) {
ret->children.emplace_back(_childrenIntervals[i]->covering->getStats());

View File

@ -42,7 +42,6 @@
#include "mongo/db/operation_context.h"
#include "mongo/db/shard_role/lock_manager/exception_util.h"
#include "mongo/db/shard_role/lock_manager/lock_manager_defs.h"
#include "mongo/db/shard_role/shard_catalog/allow_read_from_latest_on_secondary.h"
#include "mongo/db/shard_role/shard_catalog/catalog_raii.h"
#include "mongo/db/shard_role/shard_catalog/collection.h"
#include "mongo/db/shard_role/shard_catalog/collection_catalog.h"
@ -178,13 +177,6 @@ StatusWith<std::pair<long long, long long>> IndexBuildsManager::startBuildingInd
const char* curopMessage = "Index Build: scanning collection";
ProgressMeterHolder progressMeter;
{
// We use the number of records to track progress, so it should be fine to read from latest
// and get a potentially slightly incorrect value here. Without this block, we trip an
// assertion because we are performing a nested acquisition where the outer acquisition is a
// write acquisition and uses kNoTimestamp whereas the inner acquisition is a read and would
// require kLastApplied.
AllowReadFromLatestOnSecondaryBlock_UNSAFE AllowReadFromLatestOnSecondaryBlock_UNSAFE(
opCtx);
std::unique_lock<Client> lk(*opCtx->getClient());
progressMeter.set(lk,
CurOp::get(opCtx)->setProgress(
@ -218,14 +210,6 @@ StatusWith<std::pair<long long, long long>> IndexBuildsManager::startBuildingInd
"error"_attr = redact(validStatus));
rs->deleteRecord(opCtx, *shard_role_details::getRecoveryUnit(opCtx), id);
{
// We use the number of records to track progress, so it should be fine to
// read from latest and get a potentially slightly incorrect value here.
// Without this block, we trip an assertion because we are performing a
// nested acquisition where the outer acquisition is a write acquisition and
// uses kNoTimestamp whereas the inner acquisition is a read and would
// require kLastApplied.
AllowReadFromLatestOnSecondaryBlock_UNSAFE
AllowReadFromLatestOnSecondaryBlock_UNSAFE(opCtx);
std::unique_lock<Client> lk(*opCtx->getClient());
// Must reduce the progress meter's expected total after deleting an invalid
// document from the collection.

View File

@ -56,7 +56,6 @@
#include "mongo/db/shard_role/lock_manager/d_concurrency.h"
#include "mongo/db/shard_role/lock_manager/exception_util.h"
#include "mongo/db/shard_role/lock_manager/lock_manager_defs.h"
#include "mongo/db/shard_role/shard_catalog/allow_read_from_latest_on_secondary.h"
#include "mongo/db/shard_role/shard_catalog/collection_catalog.h"
#include "mongo/db/shard_role/shard_catalog/collection_yield_restore.h"
#include "mongo/db/shard_role/shard_catalog/index_descriptor.h"
@ -619,19 +618,11 @@ Status MultiIndexBlock::insertAllDocumentsInCollection(
MultikeyPathTracker::get(opCtx).startTrackingMultikeyPathInfo();
const char* curopMessage = "Index Build: scanning collection";
const auto numRecords = collection->getCollectionPtr()->numRecords(opCtx);
ProgressMeterHolder progress;
{
// We use the number of records to track progress, so it should be fine to read from latest
// and get a potentially slightly incorrect value here. Without this block, we trip an
// assertion because we are performing a nested acquisition where the outer acquisition is a
// write acquisition and uses kNoTimestamp whereas the inner acquisition is a read and would
// require kLastApplied.
AllowReadFromLatestOnSecondaryBlock_UNSAFE allowReadFromLatest(opCtx);
std::unique_lock<Client> lk(*opCtx->getClient());
progress.set(lk,
CurOp::get(opCtx)->setProgress(
lk, curopMessage, collection->getCollectionPtr()->numRecords(opCtx)),
opCtx);
progress.set(lk, CurOp::get(opCtx)->setProgress(lk, curopMessage, numRecords), opCtx);
}
hangAfterSettingUpIndexBuild.executeIf(
@ -761,12 +752,6 @@ Status MultiIndexBlock::insertAllDocumentsInCollection(
tassert(7683103, "Expected CollectionAcquisition to be initialized", collection);
restartCollectionScan = false;
{
// We use the number of records to track progress, so it should be fine to read from
// latest and get a potentially slightly incorrect value here. Without this block, we
// trip an assertion because we are performing a nested acquisition where the outer
// acquisition is a write acquisition and uses kNoTimestamp whereas the inner
// acquisition is a read and would require kLastApplied.
AllowReadFromLatestOnSecondaryBlock_UNSAFE allowReadFromLatest(opCtx);
std::unique_lock<Client> lk(*opCtx->getClient());
progress.get(lk)->reset(collection->getCollectionPtr()->numRecords(opCtx));
}
@ -928,12 +913,6 @@ void MultiIndexBlock::_doCollectionScan(OperationContext* opCtx,
bulkDocsScannedCounter.add(1);
{
// We use the number of records to track progress, so it should be fine to read from
// latest and get a potentially slightly incorrect value here. Without this block, we
// trip an assertion because we are performing a nested acquisition where the outer
// acquisition is a write acquisition and uses kNoTimestamp whereas the inner
// acquisition is a read and would require kLastApplied.
AllowReadFromLatestOnSecondaryBlock_UNSAFE allowReadFromLatest(opCtx);
std::unique_lock<Client> lk(*opCtx->getClient());
progress->get(lk)->setTotalWhileRunning(
collection.getCollectionPtr()->numRecords(opCtx));

View File

@ -46,7 +46,6 @@
#include "mongo/db/shard_role/shard_catalog/collection.h"
#include "mongo/db/shard_role/shard_catalog/collection_options.h"
#include "mongo/db/transaction/transaction_operations.h"
#include "mongo/platform/atomic.h"
#include "mongo/util/assert_util.h"
#include "mongo/util/modules.h"
#include "mongo/util/time_support.h"
@ -89,10 +88,8 @@ public:
}
// Add 'observer' to the list of observers to call. Observers are called in registration order.
// Registration must be done while no calls to observers are made. Callers may optionally call
// seal() once registration is complete; any addObserver() after seal() will invariant.
// Registration must be done while no calls to observers are made.
void addObserver(std::unique_ptr<OpObserver> observer) {
invariant(!_sealed.load(), "OpObserverRegistry::addObserver called after seal()");
const auto& nsFilters = observer->getNamespaceFilters();
_observers.push_back(std::move(observer));
@ -138,14 +135,6 @@ public:
}
}
// Seal the registry against further observer registration. After this call, any subsequent
// addObserver() will invariant. Iteration methods are unaffected. Idempotent; safe to call
// multiple times. This enforces the documented invariant that all observers must be
// registered before any observer callback can fire.
void seal() {
_sealed.store(true);
}
void onCreateIndex(OperationContext* const opCtx,
const NamespaceString& nss,
const UUID& uuid,
@ -771,9 +760,5 @@ private:
std::vector<OpObserver*> _onDeleteSystemObservers; // *.system.*
std::vector<OpObserver*> _onDeleteUserObservers; // not config nor system
// Will impact writes to all user collections.
// Set by seal() once registration is complete. Enforces the documented invariant that all
// observers must be registered before any callback fires.
Atomic<bool> _sealed{false};
};
} // namespace MONGO_MOD_PUB mongo

View File

@ -278,30 +278,5 @@ DEATH_TEST_F(OpObserverRegistryTestDeathTest,
checkInconsistentOpTime(op);
}
TEST_F(OpObserverRegistryTest, SealIsIdempotent) {
registry.addObserver(std::move(unique1));
registry.seal();
registry.seal();
}
TEST_F(OpObserverRegistryTest, SealDoesNotAffectCallbacks) {
registry.addObserver(std::move(unique1));
registry.addObserver(std::move(unique2));
registry.seal();
registry.onDropDatabase(opCtx,
DatabaseName::createDatabaseName_forTest(boost::none, "test"),
false /*markFromMigrate*/);
ASSERT_EQUALS(observer1->drops, 1);
ASSERT_EQUALS(observer2->drops, 1);
}
DEATH_TEST_F(OpObserverRegistryTestDeathTest,
AddObserverAfterSealInvariants,
"OpObserverRegistry::addObserver called after seal()") {
registry.addObserver(std::move(unique1));
registry.seal();
registry.addObserver(std::move(unique2));
}
} // namespace
} // namespace mongo

View File

@ -77,17 +77,17 @@ struct NodeContainer {
using Id = TypedId<T>;
T& operator[](Id id) {
tassert(12503201,
fmt::format("Invalid access at {}, container size {}", id.value, size()),
static_cast<size_t>(id.value) < size());
return _nodes[id.value];
if constexpr (kDebugBuild) {
invariant(id, str::stream() << "Invalid access, container size " << size());
}
return _nodes.at(id.value);
}
const T& operator[](Id id) const {
tassert(12503202,
fmt::format("Invalid access at {}, container size {}", id.value, size()),
static_cast<size_t>(id.value) < size());
return _nodes[id.value];
if constexpr (kDebugBuild) {
invariant(id, str::stream() << "Invalid access, container size " << size());
}
return _nodes.at(id.value);
}
bool empty() const {
@ -1356,15 +1356,6 @@ private:
*/
void verifyExactRange(DocumentSourceContainer::const_iterator startIt,
DocumentSourceContainer::const_iterator endIt) const {
auto rangeSize = std::distance(startIt, endIt);
tassert(12503203,
fmt::format("Graph size mismatch: range has {} stages but graph has {} "
"(complete pipeline has {})",
rangeSize,
_stages.getNextId().value,
_container.size()),
rangeSize == _stages.getNextId().value);
int stageId = 0;
for (auto it = startIt; it != endIt; ++it, ++stageId) {
const auto* actualPtr = _stages[StageId(stageId)].documentSource.get();
@ -1378,6 +1369,11 @@ private:
fmt::ptr(expectedPtr)),
actualPtr == expectedPtr);
}
tassert(12299002,
fmt::format("Expected to have reached the end of the graph ({} != {})",
stageId,
_stages.getNextId().value),
stageId == _stages.getNextId().value);
}
/**

View File

@ -34,10 +34,10 @@
#include "mongo/db/pipeline/document_source_geo_near.h"
#include "mongo/db/pipeline/document_source_internal_join_hint.h"
#include "mongo/db/pipeline/document_source_lookup.h"
#include "mongo/db/pipeline/document_source_sort.h"
#include "mongo/db/pipeline/expression_context_builder.h"
#include "mongo/db/pipeline/pipeline_d.h"
#include "mongo/db/pipeline/pipeline_factory.h"
#include "mongo/db/query/compiler/dependency_analysis/pipeline_dependency_graph.h"
#include "mongo/db/query/compiler/optimizer/join/path_resolver.h"
#include "mongo/db/query/compiler/optimizer/join/predicate_extractor.h"
#include "mongo/db/query/util/disjoint_set.h"
@ -222,67 +222,6 @@ Status addExprJoinPredicates(MutableJoinGraph& graph,
return Status::OK();
}
/**
* Helper function to determine the arrayness of a field that may have been modified by the pipeline
* while tracking "as" path arrayness. Note: 'expCtx' must be non-const since the arrayness check
* updates state that provides a non-multikey guarantee for any field we check the arrayness of.
*
* TODO SERVER-123929: replace this function once dependency analysis supports tracking arrayness of
* lookup "as" fields.
*/
bool canPipelinePathBeArray(const pipeline::dependency_graph::DependencyGraph& pipelineBaseCollDeps,
ExpressionContext* expCtx,
DocumentSource* ds,
const FieldPath& fp) {
auto path = fp.fullPath();
auto* declStage = pipelineBaseCollDeps.getDeclaringStage(ds, path).get();
tassert(11371801, "Expected stage to differ", declStage != ds);
if (auto* originLookup = dynamic_cast<DocumentSourceLookUp*>(declStage); originLookup) {
// The "as" field produced by a previous $lookup cannot be an array, since any previous
// $lookup must have an $unwind + be eligible for join-optimization (i.e. be part of the
// prefix).
auto asField = originLookup->getAsField();
if (fp == asField) {
return false;
}
if (asField.isPrefixOf(fp)) {
// This is a sub-field of the $lookup's "as" field- we need to look at the secondary
// collection to learn about its arrayness.
// TODO SERVER-123953: We will need to actually look at a dependency graph here the
// second we support any subpipeline more complex than a single $match stage.
return expCtx->canPathBeArrayForNss(fp.subtractPrefix(asField.getPathLength()),
originLookup->getFromNs());
}
tassert(11371800,
"It should not be possible for a $lookup to modify a field unrelated to its "
"'as' field",
fp.isPrefixOf(asField));
// We're in a scenario where our "as" field is something like "a.b", vs the join predicate
// field we're looking at is in fact field "a". We should verify the arrayness of field "a"
// at the point when it was last modified.
return canPipelinePathBeArray(pipelineBaseCollDeps, expCtx, declStage, fp);
}
// If this path doesn't originate from a $lookup, we can just check the base coll deps.
return pipelineBaseCollDeps.canPathBeArray(ds, path);
};
/**
* Validates that neither field in the join predicate can include arrays.
* TODO SERVER-123953: Use a dependency graph instead of directly accessing foreign path arrayness.
*/
bool canJoinPredicateIncludeArrays(const pipeline::dependency_graph::DependencyGraph& baseCollDeps,
ExpressionContext* expCtx,
DocumentSource* ds,
const FieldPath& localField,
const NamespaceString& foreignNs,
const FieldPath& foreignField) {
return canPipelinePathBeArray(baseCollDeps, expCtx, ds, localField) ||
expCtx->canPathBeArrayForNss(foreignField, foreignNs);
}
} // namespace
bool AggJoinModel::pipelineEligibleForJoinReordering(const Pipeline& pipeline) {
@ -318,15 +257,6 @@ StatusWith<AggJoinModel> AggJoinModel::constructJoinModel(const Pipeline& pipeli
hint = suffix->popFront();
}
// Initialize deps after popping the $hint stage, but BEFORE we try to push a pipeline prefix
// into our base collection CQ. This is important so we don't miss (for instance) $projects at
// the start of the pipeline that might rename fields.
auto canMainCollPathBeArray = [clonedExpCtx, &nss](StringData path) {
return clonedExpCtx->canPathBeArrayForNss(FieldRef(path), nss);
};
pipeline::dependency_graph::DependencyGraph mainCollDeps(suffix->getSources(),
canMainCollPathBeArray);
ExpressionContext::PlanCacheOptions oldPlanCache = expCtx->getPlanCache();
expCtx->setPlanCache(ExpressionContext::PlanCacheOptions::kDisablePlanCache);
auto swCQ = createCanonicalQuery(expCtx, nss, *suffix);
@ -377,18 +307,6 @@ StatusWith<AggJoinModel> AggJoinModel::constructJoinModel(const Pipeline& pipeli
break;
}
// Ensure that neither local nor foreign field can include arrays (if present).
if (lookup->hasLocalFieldForeignFieldJoin() &&
canJoinPredicateIncludeArrays(mainCollDeps,
clonedExpCtx.get(),
lookup,
*lookup->getLocalField(),
lookup->getFromNs(),
*lookup->getForeignField())) {
// End prefix here, this join predicate might include arrays.
break;
}
// Attempt to extract join predicates and single table predicates from the $lookup
// expressed as $expr in $match stage. If there is no subpipeline, this returns no join
// predicates and a CanonicalQuery with empty predicate. If this returns a bad status,
@ -397,30 +315,10 @@ StatusWith<AggJoinModel> AggJoinModel::constructJoinModel(const Pipeline& pipeli
if (!swPreds.isOK()) {
break;
}
auto preds = std::move(swPreds.getValue());
// Similar check as above, but now for predicates extracted from the sub-pipeline.
if (std::any_of(
preds.joinPredicates.begin(), preds.joinPredicates.end(), [&](auto&& jp) {
return canJoinPredicateIncludeArrays(mainCollDeps,
clonedExpCtx.get(),
lookup,
jp.localField(),
lookup->getFromNs(),
jp.foreignField());
})) {
// Some field in a join predicate introduced by a $expr $match in a sub-pipeline
// might have array values. End prefix here.
break;
}
// If we get here, it means we're ready to modify the join graph to include this
// $lookup. Once the join graph has been modified, any failure case should cause us to
// bail out of join optimization completely, rather than just ending the prefix here
// (since we've already partially incorporated the current join).
auto foreignNodeId = graph.addNode(
lookup->getFromNs(), std::move(preds.canonicalQuery), lookup->getAsField());
auto foreignNodeId = graph.addNode(lookup->getFromNs(),
std::move(swPreds.getValue().canonicalQuery),
lookup->getAsField());
if (!foreignNodeId) {
return Status(ErrorCodes::BadValue, "Graph is too big: too many nodes");
@ -453,7 +351,7 @@ StatusWith<AggJoinModel> AggJoinModel::constructJoinModel(const Pipeline& pipeli
// Add join predicates expressed as $expr in subpipelines to join graph.
auto status = addExprJoinPredicates(
graph, std::move(preds.joinPredicates), pathResolver, *foreignNodeId);
graph, swPreds.getValue().joinPredicates, pathResolver, *foreignNodeId);
if (!status.isOK()) {
return status;
}

View File

@ -30,9 +30,7 @@
#pragma once
#include "mongo/db/pipeline/aggregation_context_fixture.h"
#include "mongo/db/query/compiler/metadata/path_arrayness.h"
#include "mongo/db/query/compiler/optimizer/join/agg_join_model.h"
#include "mongo/idl/server_parameter_test_controller.h"
#include "mongo/util/modules.h"
namespace mongo::join_ordering {
@ -56,41 +54,7 @@ public:
std::unique_ptr<Pipeline> makePipelineOfSize(size_t numJoins);
/**
* Marks the given fields as non-array (scalar) in the pipeline's ExpressionContext.
* 'mainCollFields' are fields on the main collection; 'secondaryCollFieldMap' maps secondary
* collection names to their fields.
*/
static void markFieldsAsScalar(
Pipeline& pipeline,
const std::vector<StringData>& mainCollFields,
const StringMap<std::vector<StringData>>& secondaryCollFieldMap) {
auto expCtx = pipeline.getContext();
auto mainPathArrayness = std::make_shared<PathArrayness>();
for (const auto& field : mainCollFields) {
mainPathArrayness->addPath(
FieldPath(field), MultikeyComponents{}, /*isFullRebuild=*/true);
}
expCtx->setPathArraynessForNss(expCtx->getNamespaceString(), std::move(mainPathArrayness));
for (const auto& [collName, fields] : secondaryCollFieldMap) {
auto pathArrayness = std::make_shared<PathArrayness>();
for (const auto& field : fields) {
pathArrayness->addPath(
FieldPath(field), MultikeyComponents{}, /*isFullRebuild=*/true);
}
expCtx->setPathArraynessForNss(
NamespaceString::createNamespaceString_forTest("test", collName),
std::move(pathArrayness));
}
}
const AggModelBuildParams defaultBuildParams{.maxNumberNodesConsideredForImplicitEdges =
kMaxNumberNodesConsideredForImplicitEdges};
private:
// Ensure path arrayness is enabled for all tests.
RAIIServerParameterControllerForTest queryKnobController{"featureFlagPathArrayness", true};
};
} // namespace mongo::join_ordering

View File

@ -84,14 +84,12 @@ TEST_F(AggJoinModelGoldenTest, longPrefix) {
{$unwind: "$fromB"}
])";
auto pipeline = makePipeline(query, {"A", "B"});
markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}, {"B", {"b"_sd}}});
auto joinModel = runVariation(std::move(pipeline), "longPrefix");
ASSERT_OK(joinModel);
}
TEST_F(AggJoinModelGoldenTest, veryLargePipeline) {
auto pipeline = makePipelineOfSize(/*numJoins*/ kHardMaxNodesInJoin + 3);
markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}});
auto joinModel = runVariation(std::move(pipeline), "veryLargePipeline");
ASSERT_OK(joinModel);
}
@ -108,7 +106,6 @@ TEST_F(AggJoinModelGoldenTest, addImplicitEdges_OneImplictEdge) {
{$unwind: "$fromB"}
])";
auto pipeline = makePipeline(query, {"A", "B"});
markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}, {"B", {"b"_sd}}});
auto joinModel = runVariation(std::move(pipeline), "addImplicitEdges_OneImplictEdge");
ASSERT_OK(joinModel);
ASSERT_EQ(joinModel.getValue().graph.numNodes(), 3);
@ -129,7 +126,6 @@ TEST_F(AggJoinModelGoldenTest, addImplicitEdges_MultipleImplictEdges) {
{$unwind: "$fromC"}
])";
auto pipeline = makePipeline(query, {"A", "B", "C"});
markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"a"_sd}}, {"B", {"b"_sd}}, {"C", {"c"_sd}}});
auto joinModel = runVariation(std::move(pipeline), "addImplicitEdges_MultipleImplictEdges");
ASSERT_OK(joinModel);
ASSERT_EQ(joinModel.getValue().graph.numNodes(), 4);
@ -158,13 +154,6 @@ TEST_F(AggJoinModelGoldenTest, addImplicitEdges_TwoConnectedComponents) {
{$unwind: "$fromE"}
])";
auto pipeline = makePipeline(query, {"A", "B", "C", "D", "E"});
markFieldsAsScalar(*pipeline,
{"a"_sd},
{{"A", {"a"_sd}},
{"B", {"b"_sd}},
{"C", {"c"_sd, "d"_sd}},
{"D", {"d"_sd}},
{"E", {"e"_sd}}});
auto joinModel = runVariation(std::move(pipeline), "addImplicitEdges_TwoConnectedComponents");
ASSERT_OK(joinModel);
ASSERT_EQ(joinModel.getValue().graph.numNodes(), 6);
@ -190,13 +179,6 @@ TEST_F(AggJoinModelGoldenTest, addImplicitEdges_NoImplicitEdges) {
{$unwind: "$fromE"}
])";
auto pipeline = makePipeline(query, {"A", "B", "C", "D", "E"});
markFieldsAsScalar(*pipeline,
{"a"_sd},
{{"A", {"a"_sd, "b"_sd}},
{"B", {"b"_sd, "c"_sd}},
{"C", {"c"_sd, "d"_sd}},
{"D", {"d"_sd, "e"_sd}},
{"E", {"e"_sd}}});
auto joinModel = runVariation(std::move(pipeline), "addImplicitEdges_NoImplicitEdges");
ASSERT_OK(joinModel);
ASSERT_EQ(joinModel.getValue().graph.numNodes(), 6);
@ -227,9 +209,6 @@ TEST_F(AggJoinModelGoldenTest, addEdgesFromExpr_predicatesAtEnd) {
}
])";
auto pipeline = makePipeline(query, {"A", "B", "C", "D"});
markFieldsAsScalar(*pipeline,
{"s1"_sd, "s2"_sd, "s3"_sd, "s4"_sd},
{{"A", {"s1"_sd}}, {"B", {"s2"_sd}}, {"C", {"s3"_sd}}, {"D", {"s4"_sd}}});
auto joinModel = runVariation(std::move(pipeline), "addEdgesFromExpr_predicatesAtEnd");
ASSERT_OK(joinModel);
ASSERT_EQ(joinModel.getValue().graph.numNodes(), 5);
@ -257,9 +236,6 @@ TEST_F(AggJoinModelGoldenTest, addEdgesFromExpr_predicatesInBetween) {
{$match: {$expr: {$eq: ["$fromD.d", "$fromA.d"]}}}
])";
auto pipeline = makePipeline(query, {"A", "B", "C", "D"});
markFieldsAsScalar(*pipeline,
{"s1"_sd, "s2"_sd, "s3"_sd, "s4"_sd},
{{"A", {"s1"_sd}}, {"B", {"s2"_sd}}, {"C", {"s3"_sd}}, {"D", {"s4"_sd}}});
auto joinModel = runVariation(std::move(pipeline), "addEdgesFromExpr_predicatesInBetween");
ASSERT_OK(joinModel);
ASSERT_EQ(joinModel.getValue().graph.numNodes(), 5);
@ -289,9 +265,6 @@ TEST_F(AggJoinModelGoldenTest, addEdgesFromExpr_earlyEnd) {
{$unwind: "$fromD"}
])";
auto pipeline = makePipeline(query, {"A", "B", "C", "D"});
markFieldsAsScalar(*pipeline,
{"s1"_sd, "s2"_sd, "s3"_sd, "s4"_sd},
{{"A", {"s1"_sd}}, {"B", {"s2"_sd}}, {"C", {"s3"_sd}}, {"D", {"s4"_sd}}});
auto joinModel = runVariation(std::move(pipeline), "addEdgesFromExpr_earlyEnd");
ASSERT_OK(joinModel);
ASSERT_EQ(joinModel.getValue().graph.numNodes(), 3);
@ -321,12 +294,6 @@ TEST_F(AggJoinModelGoldenTest, addEdgesFromExpr_addImplicitEdge) {
{$match: {$expr: {$eq: ["$fromB.b", "$fromC.c"]}}}
])";
auto pipeline = makePipeline(query, {"A", "B", "C", "D"});
markFieldsAsScalar(*pipeline,
{"a"_sd},
{{"A", {"a"_sd, "b"_sd}},
{"B", {"b"_sd, "s"_sd}},
{"C", {"s"_sd, "c"_sd}},
{"D", {"d"_sd}}});
auto joinModel = runVariation(std::move(pipeline), "addEdgesFromExpr_addImplicitEdge");
ASSERT_OK(joinModel);
ASSERT_EQ(joinModel.getValue().graph.numNodes(), 5);
@ -369,12 +336,6 @@ TEST_F(AggJoinModelGoldenTest, subPipelineEdge_addImplicitEdge) {
{$unwind: "$fromD"}
])";
auto pipeline = makePipeline(query, {"A", "B", "C", "D"});
markFieldsAsScalar(*pipeline,
{"a"_sd},
{{"A", {"a"_sd, "b"_sd}},
{"B", {"b"_sd, "s"_sd}},
{"C", {"s"_sd, "c"_sd}},
{"D", {"d"_sd, "a"_sd}}});
auto joinModel = runVariation(std::move(pipeline), "subPipelineEdge_addImplicitEdge");
ASSERT_OK(joinModel);
ASSERT_EQ(joinModel.getValue().graph.numNodes(), 5);
@ -408,10 +369,6 @@ TEST_F(AggJoinModelGoldenTest, addEdgesFromExpr_subPipelineEdge_addImplicitEdge)
{$match: {$expr: {$eq: ["$fromA.a", "$fromB.a"]}}}
])";
auto pipeline = makePipeline(query, {"A", "B", "C"});
markFieldsAsScalar(
*pipeline,
{"a"_sd},
{{"A", {"a"_sd, "b"_sd}}, {"B", {"b"_sd, "c"_sd, "a"_sd}}, {"C", {"c"_sd, "a"_sd}}});
auto joinModel =
runVariation(std::move(pipeline), "addEdgesFromExpr_subPipelineEdge_addImplicitEdge");
ASSERT_OK(joinModel);

View File

@ -65,7 +65,6 @@ TEST_F(PipelineAnalyzerTest, PipelinePrefixEligibleForJoinReorderingNoLocalForei
])";
auto pipeline = makePipeline(query, {"A", "B"});
markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}});
// This pipeline's prefix is eligible for reordering.
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));
@ -83,7 +82,6 @@ TEST_F(PipelineAnalyzerTest, PipelineEligibleForJoinReorderingSingleLookupUnwind
])";
auto pipeline = makePipeline(query, {"A"});
markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}});
// This pipeline is eligible for reordering.
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));
@ -125,7 +123,6 @@ TEST_F(PipelineAnalyzerTest, TwoLookupUnwinds) {
])";
auto pipeline = makePipeline(query, {"A", "B"});
markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}, {"B", {"b"_sd}}});
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));
@ -145,7 +142,6 @@ TEST_F(PipelineAnalyzerTest, MatchOnMainCollection) {
])";
auto pipeline = makePipeline(query, {"A", "B"});
markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}, {"B", {"b"_sd}}});
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));
@ -167,7 +163,6 @@ TEST_F(PipelineAnalyzerTest, MatchInSubPipeline) {
])";
auto pipeline = makePipeline(query, {"A", "B"});
markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}, {"B", {"b"_sd}}});
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));
@ -192,7 +187,6 @@ TEST_F(PipelineAnalyzerTest, GroupOnMainCollection) {
])";
auto pipeline = makePipeline(query, {"A", "B"});
markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}, {"B", {"b"_sd}}});
// We don't detect ineligibility here.
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));
@ -213,7 +207,6 @@ TEST_F(PipelineAnalyzerTest, ConflictingLocalFields) {
])";
auto pipeline = makePipeline(query, {"B", "C"});
markFieldsAsScalar(*pipeline, {"x"_sd, "a"_sd}, {{"B", {"y"_sd}}, {"C", {"z"_sd}}});
// We don't detect ineligibility of local path fields here.
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));
auto swJoinModel = AggJoinModel::constructJoinModel(*pipeline, defaultBuildParams);
@ -248,8 +241,6 @@ TEST_F(PipelineAnalyzerTest, ConflictingLocalFieldExprSyntax) {
])";
auto pipeline = makePipeline(query, {"B", "A"});
markFieldsAsScalar(
*pipeline, {"x"_sd, "foo"_sd, "bar"_sd}, {{"B", {"y"_sd}}, {"A", {"foo"_sd, "bar"_sd}}});
// We don't detect ineligibility of local path fields here.
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));
auto swJoinModel = AggJoinModel::constructJoinModel(*pipeline, defaultBuildParams);
@ -265,7 +256,6 @@ TEST_F(PipelineAnalyzerTest, CompatibleAsFields) {
{$unwind: "$x.z"}
])";
auto pipeline = makePipeline(query, {"B", "C"});
markFieldsAsScalar(*pipeline, {"x.c"_sd}, {{"B", {"c"_sd, "d"_sd}}, {"C", {"d"_sd}}});
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));
auto swJoinModel = AggJoinModel::constructJoinModel(*pipeline, defaultBuildParams);
@ -283,7 +273,6 @@ TEST_F(PipelineAnalyzerTest, GroupInMiddleIneligible) {
])";
auto pipeline = makePipeline(query, {"A", "B"});
markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}});
// We don't detect ineligibility here.
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));
@ -306,7 +295,6 @@ TEST_F(PipelineAnalyzerTest, GroupInSubPipeline) {
])";
auto pipeline = makePipeline(query, {"A", "B"});
markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}, {"B", {"b"_sd}}});
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));
@ -379,7 +367,6 @@ TEST_F(PipelineAnalyzerTest, IneligibleSubPipelineStage) {
])";
auto pipeline = makePipeline(query, {"A", "B"});
markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}, {"B", {"b"_sd}}});
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));
@ -452,7 +439,6 @@ TEST_F(PipelineAnalyzerTest, LongPrefix) {
])";
auto pipeline = makePipeline(query, {"A", "B"});
markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}, {"B", {"b"_sd}}});
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));
@ -474,7 +460,6 @@ TEST_F(PipelineAnalyzerTest, PipelineInEligibleForSortStage) {
])";
auto pipeline = makePipeline(sortPrefixQuery, {"A", "B"});
markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}, {"B", {"b"_sd}}});
// This is not where we examine the pipeline for a $sort stage.
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));
auto status = AggJoinModel::constructJoinModel(*pipeline, defaultBuildParams).getStatus();
@ -507,7 +492,6 @@ TEST_F(PipelineAnalyzerTest, LocalFieldOverride) {
])";
auto pipeline = makePipeline(query, {"A", "B"});
markFieldsAsScalar(*pipeline, {"a"_sd, "b"_sd}, {{"A", {"b"_sd}}, {"B", {"b"_sd}}});
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));
@ -520,7 +504,6 @@ TEST_F(PipelineAnalyzerTest, LocalFieldOverride) {
TEST_F(PipelineAnalyzerTest, tooManyNodes) {
static constexpr size_t numJoins = 5;
auto pipeline = makePipelineOfSize(numJoins);
markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}});
// Configure the buildParams that one $lookup/$unwind pair is forced to the suffix because the
// maximum number of nodes is hit.
AggModelBuildParams buildParams{
@ -536,7 +519,6 @@ TEST_F(PipelineAnalyzerTest, tooManyNodes) {
TEST_F(PipelineAnalyzerTest, tooManyEdges) {
static constexpr size_t numJoins = 5;
auto pipeline = makePipelineOfSize(numJoins);
markFieldsAsScalar(*pipeline, {"a"_sd}, {{"A", {"b"_sd}}});
// Configure the buildParams that one $lookup/$unwind pair is forced to the suffix because the
// maximum number of edges is hit.
AggModelBuildParams buildParams{
@ -575,7 +557,6 @@ TEST_F(PipelineAnalyzerTest, SingleJoinCompoundPredicate) {
])";
auto pipeline = makePipeline(query, {"A"});
markFieldsAsScalar(*pipeline, {"foo"_sd, "bar"_sd}, {{"A", {"foo"_sd, "bar"_sd}}});
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));
@ -636,9 +617,6 @@ TEST_F(PipelineAnalyzerTest, CompoundJoinKeyWithLocalForeignSyntax) {
])";
auto pipeline = makePipeline(query, {"A", "B"});
markFieldsAsScalar(*pipeline,
{"foo"_sd, "bar"_sd},
{{"A", {"foo"_sd, "bar"_sd}}, {"B", {"foo"_sd, "bar"_sd}}});
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));
@ -698,8 +676,6 @@ TEST_F(PipelineAnalyzerTest, DuplicateExprEqAndEqEdges) {
])";
auto pipeline = makePipeline(query, {"A", "B", "C"});
markFieldsAsScalar(
*pipeline, {"bar"_sd}, {{"A", {"bar"_sd}}, {"B", {"bar"_sd}}, {"C", {"bar"_sd}}});
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));
@ -745,7 +721,6 @@ TEST_F(PipelineAnalyzerTest, ExprOnlyImplicitEdges) {
])";
auto pipeline = makePipeline(query, {"A", "B"});
markFieldsAsScalar(*pipeline, {"bar"_sd}, {{"A", {"bar"_sd}}, {"B", {"bar"_sd}}});
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));
@ -777,7 +752,6 @@ TEST_F(PipelineAnalyzerTest, PipelineIneligibleWithCorrelatedNonJoinPredicate) {
])";
auto pipeline = makePipeline(query, {"A", "B"});
markFieldsAsScalar(*pipeline, {"foo"_sd}, {{"A", {"foo"_sd}}});
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));
@ -807,7 +781,6 @@ TEST_F(PipelineAnalyzerTest, PipelineIneligibleWithNonFieldPathVariable) {
])";
auto pipeline = makePipeline(query, {"A", "B"});
markFieldsAsScalar(*pipeline, {"foo"_sd}, {{"A", {"foo"_sd}}});
ASSERT_TRUE(AggJoinModel::pipelineEligibleForJoinReordering(*pipeline));

View File

@ -40,12 +40,6 @@ namespace mongo::join_ordering {
* present multiple times in the graph and associated with different predicates/cardinalities.
*/
using NodeCardinalities = std::vector<cost_based_ranker::CardinalityEstimate>;
/**
* Tracks for each node ID the CBR cost of the winning single-table plan.
*/
using NodeCBRCosts = std::vector<cost_based_ranker::CostEstimate>;
/**
* Tracks for each edge ID the selectivity estimate.
*/

View File

@ -42,9 +42,13 @@ namespace mongo::join_ordering {
using namespace cost_based_ranker;
JoinCardinalityEstimator::JoinCardinalityEstimator(const JoinReorderingContext& ctx,
EdgeSelectivities edgeSelectivities)
EdgeSelectivities edgeSelectivities,
NodeCardinalities nodeCardinalities,
NodeCardinalities collCardinalities)
: _ctx(ctx),
_edgeSelectivities(std::move(edgeSelectivities)),
_nodeCardinalities(std::move(nodeCardinalities)),
_collCardinalities(std::move(collCardinalities)),
_cycleBreaker(
GraphCycleBreaker(_ctx.joinGraph, _edgeSelectivities, _ctx.resolvedPaths.size())) {
tassert(11514700,
@ -52,13 +56,18 @@ JoinCardinalityEstimator::JoinCardinalityEstimator(const JoinReorderingContext&
_edgeSelectivities.size() == _ctx.joinGraph.numEdges());
tassert(11514701,
"Missing node cardinalities",
_ctx.singleTableAccess.nodeCardinalities.size() == _ctx.joinGraph.numNodes());
_nodeCardinalities.size() == _ctx.joinGraph.numNodes());
}
JoinCardinalityEstimator JoinCardinalityEstimator::make(
const JoinReorderingContext& ctx, const SamplingEstimatorMap& samplingEstimators) {
const JoinReorderingContext& ctx,
const cost_based_ranker::EstimateMap& estimates,
const SamplingEstimatorMap& samplingEstimators) {
return JoinCardinalityEstimator(
ctx, JoinCardinalityEstimator::estimateEdgeSelectivities(ctx, samplingEstimators));
ctx,
JoinCardinalityEstimator::estimateEdgeSelectivities(ctx, samplingEstimators),
JoinCardinalityEstimator::extractNodeCardinalities(ctx, estimates),
JoinCardinalityEstimator::extractCollCardinalities(ctx, samplingEstimators));
}
EdgeSelectivities JoinCardinalityEstimator::estimateEdgeSelectivities(
@ -73,6 +82,34 @@ EdgeSelectivities JoinCardinalityEstimator::estimateEdgeSelectivities(
return edgeSelectivities;
}
NodeCardinalities JoinCardinalityEstimator::extractNodeCardinalities(
const JoinReorderingContext& ctx, const cost_based_ranker::EstimateMap& estimates) {
NodeCardinalities nodeCardinalities;
nodeCardinalities.reserve(ctx.joinGraph.numNodes());
for (size_t nodeId = 0; nodeId < ctx.joinGraph.numNodes(); nodeId++) {
auto* cq = ctx.joinGraph.accessPathAt(nodeId);
auto qsn = ctx.cbrCqQsns.find(cq);
tassert(11514600, "Missing QSN for CanonicalQuery", qsn != ctx.cbrCqQsns.end());
auto cbrRes = estimates.find(qsn->second->root());
tassert(11514601, "Missing estimate for QSN root", cbrRes != estimates.end());
nodeCardinalities.push_back(cbrRes->second->outCE);
}
return nodeCardinalities;
}
NodeCardinalities JoinCardinalityEstimator::extractCollCardinalities(
const JoinReorderingContext& ctx, const SamplingEstimatorMap& samplingEstimators) {
NodeCardinalities nodeCardinalities;
nodeCardinalities.reserve(ctx.joinGraph.numNodes());
for (size_t nodeId = 0; nodeId < ctx.joinGraph.numNodes(); nodeId++) {
auto nss = ctx.joinGraph.getNode(nodeId).collectionName;
auto samplingEstimator = samplingEstimators.at(nss).get();
nodeCardinalities.push_back(CardinalityEstimate{
CardinalityType{samplingEstimator->getCollCard()}, EstimationSource::Metadata});
}
return nodeCardinalities;
}
// This function makes a number of assumptions:
// * Join predicate are independent from single table predicates. This allows us to estimate them
// separately, which can be seen by our use of NDV(join key) over the entire collection, as opposed
@ -218,7 +255,7 @@ cost_based_ranker::CardinalityEstimate JoinCardinalityEstimator::getOrEstimateSu
// Finally, note that we have the pre-computed combination of (2) and (3) in '_nodeCEs'.
cost_based_ranker::CardinalityEstimate ce = cost_based_ranker::oneCE;
for (auto nodeIdx : iterable(nodes, _ctx.joinGraph.numNodes())) {
ce = ce * _ctx.singleTableAccess.nodeCardinalities[nodeIdx].toDouble();
ce = ce * _nodeCardinalities[nodeIdx].toDouble();
}
auto edges = _cycleBreaker.breakCycles(_ctx.joinGraph.getEdgesForSubgraph(nodes));
@ -236,6 +273,10 @@ cost_based_ranker::CardinalityEstimate JoinCardinalityEstimator::getOrEstimateSu
return ce;
}
CardinalityEstimate JoinCardinalityEstimator::getCollCardinality(NodeId node) const {
return _collCardinalities[node];
}
SelectivityEstimate JoinCardinalityEstimator::getEdgeSelectivity(EdgeId edge) const {
return _edgeSelectivities[edge];
}

View File

@ -33,7 +33,7 @@
#include "mongo/db/query/compiler/optimizer/join/cardinality_estimation_types.h"
#include "mongo/db/query/compiler/optimizer/join/graph_cycle_breaker.h"
#include "mongo/db/query/compiler/optimizer/join/join_graph.h"
#include "mongo/db/query/compiler/optimizer/join/join_reordering_context.h"
#include "mongo/db/query/compiler/optimizer/join/single_table_access.h"
#include "mongo/util/modules.h"
namespace mongo::join_ordering {
@ -46,10 +46,14 @@ using cost_based_ranker::SelectivityEstimate;
*/
class JoinCardinalityEstimator {
public:
JoinCardinalityEstimator(const JoinReorderingContext& ctx, EdgeSelectivities edgeSelectivities);
JoinCardinalityEstimator(const JoinReorderingContext& ctx,
EdgeSelectivities edgeSelectivities,
NodeCardinalities nodeCardinalities,
NodeCardinalities collCardinalities);
virtual ~JoinCardinalityEstimator() {};
static JoinCardinalityEstimator make(const JoinReorderingContext& ctx,
const cost_based_ranker::EstimateMap& estimates,
const SamplingEstimatorMap& samplingEstimators);
/**
@ -63,6 +67,12 @@ public:
static EdgeSelectivities estimateEdgeSelectivities(
const JoinReorderingContext& ctx, const SamplingEstimatorMap& samplingEstimators);
static NodeCardinalities extractNodeCardinalities(
const JoinReorderingContext& ctx, const cost_based_ranker::EstimateMap& estimates);
static NodeCardinalities extractCollCardinalities(
const JoinReorderingContext& ctx, const SamplingEstimatorMap& samplingEstimators);
/**
* Estimates the cardinality of a join plan over the given subset of nodes. This method
* constructs a spanning tree from the edges in the graph induced by 'nodes', and combines the
@ -71,6 +81,12 @@ public:
*/
virtual CardinalityEstimate getOrEstimateSubsetCardinality(const NodeSet& nodes);
/**
* Returns the cardinality of the collection referenced by the given node. This ignores any
* single table predicates.
*/
CardinalityEstimate getCollCardinality(NodeId node) const;
/**
* Returns the selectivity of the given edge.
*/
@ -79,6 +95,10 @@ public:
protected:
const JoinReorderingContext& _ctx;
const EdgeSelectivities _edgeSelectivities;
// Stores cardinality estimates for nodes after single-table predicates are applied.
const NodeCardinalities _nodeCardinalities;
// Stores cardinalities for the underlying collections as reported by the catalog.
const NodeCardinalities _collCardinalities;
GraphCycleBreaker _cycleBreaker;

View File

@ -169,6 +169,37 @@ TEST_F(JoinPredicateEstimatorFixture, NDVCompoundJoinKey) {
ASSERT_EQ(expectedSel, edgeSels[0]);
}
TEST_F(JoinPredicateEstimatorFixture, ExtractNodeCardinalities) {
auto aNss = NamespaceString::createNamespaceString_forTest("a");
auto bNss = NamespaceString::createNamespaceString_forTest("b");
auto aCQ = makeCanonicalQuery(aNss);
auto bCQ = makeCanonicalQuery(bNss);
auto aNodeId = *graph.addNode(aNss, std::move(aCQ), boost::none);
auto bNodeId = *graph.addNode(bNss, std::move(bCQ), FieldPath{"b"});
const auto inCE = CardinalityEstimate{CardinalityType{100}, EstimationSource::Sampling};
const auto aCE = CardinalityEstimate{CardinalityType{10}, EstimationSource::Sampling};
const auto bCE = CardinalityEstimate{CardinalityType{20}, EstimationSource::Sampling};
cost_based_ranker::EstimateMap estimates;
{
auto aPlan = makeCollScanPlan(aNss);
estimates[aPlan->root()] = std::make_unique<cost_based_ranker::QSNEstimate>(inCE, aCE);
cbrCqQsns[graph.getNode(aNodeId).accessPath.get()] = std::move(aPlan);
}
{
auto bPlan = makeCollScanPlan(bNss);
estimates[bPlan->root()] = std::make_unique<cost_based_ranker::QSNEstimate>(inCE, bCE);
cbrCqQsns[graph.getNode(bNodeId).accessPath.get()] = std::move(bPlan);
}
auto ctx = makeContext();
auto nodeCardinalities = JoinCardinalityEstimator::extractNodeCardinalities(ctx, estimates);
ASSERT_EQ(2U, nodeCardinalities.size());
ASSERT_EQ(aCE, nodeCardinalities[aNodeId]);
ASSERT_EQ(bCE, nodeCardinalities[bNodeId]);
}
namespace {
void pushNNodes(MutableJoinGraph& graph, size_t n) {
for (size_t i = 0; i < n; i++) {
@ -208,9 +239,8 @@ TEST_F(JoinPredicateEstimatorFixture, EstimateSubsetCardinality) {
EstimationSource::Sampling));
}
nodeCards = nodeCEs;
auto jCtx = makeContext();
JoinCardinalityEstimator jce(jCtx, edgeSels);
JoinCardinalityEstimator jce(jCtx, edgeSels, nodeCEs, {});
{
// Cardinality for subset of size 1 is pulled directly from the CE map.
ASSERT_EQ(oneCE * 10, jce.getOrEstimateSubsetCardinality(makeNodeSet(1)));
@ -276,9 +306,8 @@ TEST_F(JoinPredicateEstimatorFixture, EstimateSubsetCardinalityAlmostCycle) {
EstimationSource::Sampling));
}
nodeCards = nodeCEs;
auto jCtx = makeContext();
JoinCardinalityEstimator jce(jCtx, edgeSels);
JoinCardinalityEstimator jce(jCtx, edgeSels, nodeCEs, {});
ASSERT_EQ(oneCE * 10 * 20 * 30 * 0.1 * 0.2 * 0.3,
jce.getOrEstimateSubsetCardinality(makeNodeSet(1, 2, 3)));
}
@ -322,9 +351,8 @@ TEST_F(JoinPredicateEstimatorFixture, EstimateSubsetCardinalitySameCollectionPre
oneCE * 30,
};
nodeCards = nodeCEs;
auto jCtx = makeContext();
JoinCardinalityEstimator jce(jCtx, edgeSels);
JoinCardinalityEstimator jce(jCtx, edgeSels, nodeCEs, {});
// Show that even though the namespace is the same for two of the nodes, we are able to
// correctly associate CE with the particular filters associated with those nodes.
@ -340,6 +368,14 @@ TEST_F(JoinPredicateEstimatorFixture, EstimateSubsetCardinalitySameCollectionPre
jce.getOrEstimateSubsetCardinality(makeNodeSet(0, 1, 2)));
}
TEST_F(JoinPredicateEstimatorFixture, GetCollectionCardinality) {
auto jCtx = makeContext();
JoinCardinalityEstimator jce(jCtx, {}, {}, {oneCE * 10, oneCE * 20, oneCE * 30});
ASSERT_EQ(oneCE * 10, jce.getCollCardinality(0));
ASSERT_EQ(oneCE * 20, jce.getCollCardinality(1));
ASSERT_EQ(oneCE * 30, jce.getCollCardinality(2));
}
TEST_F(JoinPredicateEstimatorFixture, JoinPredicateSelUsesUniqueFields) {
// Create a join graph with two nodes joined by an edge: a -- b.
auto aNss = NamespaceString::createNamespaceString_forTest("a");

View File

@ -188,10 +188,10 @@ bool indexIsValidForINLJ(const std::shared_ptr<const IndexCatalogEntry>& ice) {
* Pre-process indexes to filter out those ineligible for conversion to INLJ, and output a map of
* collection namespaces to indexes available.
*/
AvailableIndexes extractINLJEligibleIndexes(const QuerySolutionMap& cbrCqQsns,
AvailableIndexes extractINLJEligibleIndexes(const QuerySolutionMap& solns,
const MultipleCollectionAccessor& mca) {
AvailableIndexes perCollIdxs;
for (const auto& [cq, _] : cbrCqQsns) {
for (const auto& [cq, _] : solns) {
const auto& ns = cq->nss();
if (perCollIdxs.contains(ns)) {
// We've already pre-processed this collection's indexes.
@ -318,7 +318,6 @@ StatusWith<JoinReorderedExecutorResult> getJoinReorderedExecutor(
if (!swAccessPlans.isOK()) {
return swAccessPlans.getStatus();
}
auto singleTableAccess = std::move(swAccessPlans.getValue());
// Retrieve a copy of the hint if present.
boost::optional<EnumerationStrategy> hintedStrat;
@ -327,10 +326,11 @@ StatusWith<JoinReorderedExecutorResult> getJoinReorderedExecutor(
hintedStrat = hintStage->getStrategy();
}
auto& solns = swAccessPlans.getValue().solns;
const auto qkc = expCtx->getQueryKnobConfiguration();
// Pre-process indexes per collection to facilitate INLJ enumeration.
auto indexesPerColl = extractINLJEligibleIndexes(singleTableAccess.cbrCqQsns, mca);
auto indexesPerColl = extractINLJEligibleIndexes(solns, mca);
PerCollUniqueFieldInfo uniqueFieldInfo;
if (qkc.getEnableJoinOptimizationUseIndexUniqueness()) {
uniqueFieldInfo = buildUniqueFieldInfo(indexesPerColl);
@ -338,14 +338,15 @@ StatusWith<JoinReorderedExecutorResult> getJoinReorderedExecutor(
JoinReorderingContext ctx{.joinGraph = model.graph,
.resolvedPaths = model.resolvedPaths,
.singleTableAccess = std::move(singleTableAccess),
.cbrCqQsns = std::move(solns),
.perCollIdxs = std::move(indexesPerColl),
.catStats = createCatalogStats(opCtx, mca),
.uniqueFieldInfo = std::move(uniqueFieldInfo),
.samplingEstimators = &samplingEstimators,
.explain = expCtx->getExplain().has_value()};
JoinCardinalityEstimator cardEstimator(JoinCardinalityEstimator::make(ctx, samplingEstimators));
JoinCardinalityEstimator cardEstimator(
JoinCardinalityEstimator::make(ctx, swAccessPlans.getValue().estimate, samplingEstimators));
JoinCostEstimatorImpl costEstimator(ctx, cardEstimator);
StatusWith<ReorderedJoinSolution> swReordered = [&]() {
@ -440,7 +441,7 @@ StatusWith<JoinReorderedExecutorResult> getJoinReorderedExecutor(
// TODO SERVER-111913: Once we are no-longer cloning QSN for single-table plans, the estimate
// map from join-reordering 'reordered.estimates' can be combined with the estimate map from
// CBR 'ctx.singleTableAccess.estimate' before creating the executor below.
// CBR 'swAccessPlans.getValue().estimate' before creating the executor below.
// We actually have several canonical queries, so we don't try to pass one in.
auto exec = plan_executor_factory::make(opCtx,
nullptr /* cq */,

View File

@ -51,24 +51,18 @@ public:
* Estimate the cost of a single table collection scan plan fragment. This function assumes that
* the given 'NodeId' corresponds to a node in the JoinGraph whose single table access path is a
* collection scan; it is the caller's responsibility to ensure this is the case.
*
* The given cost is the CPU cost of the single-table plan from CBR; IO cost is modeled by the
* fragment method itself.
*/
virtual JoinCostEstimate costCollScanFragment(NodeId id, CostEstimate singleTableCpuCost) = 0;
virtual JoinCostEstimate costCollScanFragment(NodeId) = 0;
/**
* Estimate the cost of a single table index scan plan fragment. This function assumes that the
* given 'NodeId' corresponds to a node in the JoinGraph whose single table access path is an
* index scan followed by a fetch; it is caller's responsibility to ensure this is the case.
*
* The given cost is the CPU cost of the single-table plan from CBR; IO cost is modeled by the
* fragment method itself.
*
* TODO SERVER-117506: Once we support projections and start producing covered plans, we will
* need to modify this function.
*/
virtual JoinCostEstimate costIndexScanFragment(NodeId id, CostEstimate singleTableCpuCost) = 0;
virtual JoinCostEstimate costIndexScanFragment(NodeId) = 0;
/**
* Estimate the cost of a hash join plan fragment.

View File

@ -35,32 +35,32 @@ JoinCostEstimatorImpl::JoinCostEstimatorImpl(const JoinReorderingContext& jCtx,
JoinCardinalityEstimator& cardinalityEstimator)
: _jCtx(jCtx), _cardinalityEstimator(cardinalityEstimator) {}
JoinCostEstimate JoinCostEstimatorImpl::costCollScanFragment(NodeId nodeId,
CostEstimate singleTableCpuCost) {
// CollScan outputs documents after applying single table predicates
JoinCostEstimate JoinCostEstimatorImpl::costCollScanFragment(NodeId nodeId) {
// CollScan processes all documents in the collection
CardinalityEstimate numDocsProcessed = _cardinalityEstimator.getCollCardinality(nodeId);
// CollScan outputs documenst after applying single table predicates
CardinalityEstimate numDocsOutput =
_cardinalityEstimator.getOrEstimateSubsetCardinality(makeNodeSet(nodeId));
auto& collStats = _jCtx.catStats.collStats.at(_jCtx.joinGraph.getNode(nodeId).collectionName);
// CollScan performs roughly sequential reads from disk as it is stored in a WT b-tree. We
// estimate the number of disk reads by estimating the number of pages the collscan will read.
// estimate the number of disk read by estimating the number of pages the collscan will read.
CardinalityEstimate numSeqIOs =
CardinalityEstimate{CardinalityType{collStats.numPages()}, EstimationSource::Metadata};
// CollScan does no random read from disk.
CardinalityEstimate numRandIOs = zeroCE;
return JoinCostEstimate(
numDocsProcessedFromCpuCost(singleTableCpuCost), numDocsOutput, numSeqIOs, numRandIOs);
return JoinCostEstimate(numDocsProcessed, numDocsOutput, numSeqIOs, numRandIOs);
}
JoinCostEstimate JoinCostEstimatorImpl::costIndexScanFragment(NodeId nodeId,
CostEstimate singleTableCpuCost) {
JoinCostEstimate JoinCostEstimatorImpl::costIndexScanFragment(NodeId nodeId) {
// For simplicity we assume there are no non-sargable filters applied after the index scan. This
// means that we assume the number of output documents is equal to the cardinality estimate of
// that node.
CardinalityEstimate numDocsOutput =
// means that we assume the number of documents processed and output are both equal to the
// cardinality estimate of that node.
CardinalityEstimate numDocsProcessed =
_cardinalityEstimator.getOrEstimateSubsetCardinality(makeNodeSet(nodeId));
// Assume that the sequential IO performed by scanning the index itself is negligible.
CardinalityEstimate numDocsOutput = numDocsProcessed;
// Assume that the sequential IO performed by scanning the index itself is negilible.
CardinalityEstimate numSeqIOs = zeroCE;
const auto& nss = _jCtx.joinGraph.getNode(nodeId).collectionName;
@ -84,7 +84,7 @@ JoinCostEstimate JoinCostEstimatorImpl::costIndexScanFragment(NodeId nodeId,
// TODO SERVER-123532: extend this to multikey indexes once NDV estimation supports them.
if (_jCtx.samplingEstimators) {
const auto* cq = _jCtx.joinGraph.accessPathAt(nodeId);
const auto& qsn = _jCtx.singleTableAccess.cbrCqQsns.at(cq);
const auto& qsn = _jCtx.cbrCqQsns.at(cq);
auto [ixScanNodePtr, _] = qsn->getFirstNodeByType(STAGE_IXSCAN);
tassert(12291601, "expected plan fragment to contain IndexScan QSN", ixScanNodePtr);
@ -96,7 +96,7 @@ JoinCostEstimate JoinCostEstimatorImpl::costIndexScanFragment(NodeId nodeId,
}
const auto& samplingEstimator = _jCtx.samplingEstimators->at(nss);
auto ndv = samplingEstimator->estimateNDV(fields);
double collCard = _jCtx.singleTableAccess.collCardinalities[nodeId].toDouble();
double collCard = _cardinalityEstimator.getCollCardinality(nodeId).toDouble();
// Scale NDV by selectivity of the scan.
// Guard against division by 0 and 0 NDV, in both cases fallback to estimating a random
// IO per output document.
@ -114,9 +114,7 @@ JoinCostEstimate JoinCostEstimatorImpl::costIndexScanFragment(NodeId nodeId,
numLogicalPageRequests)
.randIOPages},
EstimationSource::Sampling};
return JoinCostEstimate(
numDocsProcessedFromCpuCost(singleTableCpuCost), numDocsOutput, numSeqIOs, numRandIOs);
return JoinCostEstimate(numDocsProcessed, numDocsOutput, numSeqIOs, numRandIOs);
}
// Use catalog information to return an estimate of the size of a document from the "relation"
@ -127,7 +125,7 @@ double JoinCostEstimatorImpl::estimateDocSize(NodeSet subset) const {
for (auto nodeId : iterable(subset)) {
auto& collStats =
_jCtx.catStats.collStats.at(_jCtx.joinGraph.getNode(nodeId).collectionName);
auto collSize = _jCtx.singleTableAccess.collCardinalities[nodeId].toDouble();
auto collSize = _cardinalityEstimator.getCollCardinality(nodeId).toDouble();
if (collSize == 0) {
continue;
}
@ -221,7 +219,7 @@ JoinCostEstimate JoinCostEstimatorImpl::costINLJFragment(const JoinPlanNode& lef
// The cardinality of the outer side is the number of probes we will perform.
double numProbes = leftDocs.toDouble();
double rightBaseCard = _jCtx.singleTableAccess.collCardinalities[right].toDouble();
double rightBaseCard = _cardinalityEstimator.getCollCardinality(right).toDouble();
double joinPredSel = _cardinalityEstimator.getEdgeSelectivity(edgeId).toDouble();
// The number of documents that the INLJ probes for:
// numProbes * (rightBaseCard * joinPredSel)
@ -279,25 +277,15 @@ JoinCostEstimate JoinCostEstimatorImpl::costNLJFragment(const JoinPlanNode& left
JoinCostEstimate JoinCostEstimatorImpl::costBaseCollectionAccess(NodeId baseNode) {
const auto* cq = _jCtx.joinGraph.accessPathAt(baseNode);
tassert(11729100, "Expected an access path to exist", cq);
auto it = _jCtx.singleTableAccess.cbrCqQsns.find(cq);
tassert(11729101,
"Expected a QSN to exist for this access path",
it != _jCtx.singleTableAccess.cbrCqQsns.end());
if (it->second->root()->getType() == STAGE_EOF) {
return JoinCostEstimate(zeroCost);
}
// The full CPU cost of the single-table plan comes from CBR and is passed to the fragment
// methods, which fold it into the join cost formula alongside the output and IO costs they
// model themselves.
CostEstimate singleTableCost = _jCtx.singleTableAccess.nodeCBRCosts[baseNode];
auto it = _jCtx.cbrCqQsns.find(cq);
tassert(11729101, "Expected a QSN to exist for this access path", it != _jCtx.cbrCqQsns.end());
// TODO SERVER-117618: Stricter tree-shape validation.
if (it->second->hasNode(STAGE_COLLSCAN)) {
return costCollScanFragment(baseNode, singleTableCost);
return costCollScanFragment(baseNode);
} else if (it->second->hasNode(STAGE_IXSCAN)) {
return costIndexScanFragment(baseNode, singleTableCost);
return costIndexScanFragment(baseNode);
} else if (it->second->root()->getType() == STAGE_EOF) {
return JoinCostEstimate(zeroCost);
}
MONGO_UNIMPLEMENTED_TASSERT(11729102);
}

View File

@ -48,8 +48,8 @@ public:
JoinCostEstimatorImpl(JoinCostEstimatorImpl&&) = delete;
JoinCostEstimatorImpl& operator=(JoinCostEstimatorImpl&&) = delete;
JoinCostEstimate costCollScanFragment(NodeId nodeId, CostEstimate singleTableCpuCost) override;
JoinCostEstimate costIndexScanFragment(NodeId nodeId, CostEstimate singleTableCpuCost) override;
JoinCostEstimate costCollScanFragment(NodeId nodeId) override;
JoinCostEstimate costIndexScanFragment(NodeId nodeId) override;
JoinCostEstimate costBaseCollectionAccess(NodeId nodeId) override;
JoinCostEstimate costHashJoinFragment(const JoinPlanNode& left,
const JoinPlanNode& right) override;

View File

@ -57,28 +57,6 @@ public:
graph.addEdge(largeNodeId, unselectiveNodeId, {});
graph.addEdge(smallNodeId, largeNodeId, {});
collCards = {
makeCard(1000), // smallNode
makeCard(20'000), // largeNode
makeCard(20'000), // unselectiveNode
makeCard(1), // extremelySmallNode
makeCard(100'000'000), // selectiveNode
};
constexpr double docSizeBytes = 500;
catStats = {
.collStats = {
{smallNss,
CollectionStats{collCards[smallNodeId].toDouble() * docSizeBytes,
collCards[smallNodeId].toDouble() * docSizeBytes}},
{largeNss,
CollectionStats{collCards[largeNodeId].toDouble() * docSizeBytes,
collCards[largeNodeId].toDouble() * docSizeBytes}},
{extremelySmallNss,
CollectionStats{collCards[extremelySmallNodeId].toDouble() * docSizeBytes,
collCards[extremelySmallNodeId].toDouble() * docSizeBytes}},
}};
jCtx.emplace(makeContext());
SubsetCardinalities subsetCards{
@ -91,12 +69,34 @@ public:
{makeNodeSet(smallNodeId, unselectiveNodeId), makeCard(1000)},
{makeNodeSet(largeNodeId, unselectiveNodeId), makeCard(1000)},
};
NodeCardinalities collCards{
makeCard(1000), // smallNode
makeCard(20'000), // largeNode
makeCard(20'000), // unselectiveNode
makeCard(1), // extremelySmallNode
makeCard(100'000'000), // selectiveNode
};
EdgeSelectivities edgeSel{
makeSel(1000.0 / (1'000 * 20'000)), // smallNode <--> unselectiveNode
makeSel(1000.0 / (20'000 * 20'000)), // largeNode <--> unselectiveNode
makeSel(1.0 / (1000 * 20'000)), // smallNode <--> largeId
};
cardEstimator = std::make_unique<FakeJoinCardinalityEstimator>(*jCtx, subsetCards, edgeSel);
cardEstimator =
std::make_unique<FakeJoinCardinalityEstimator>(*jCtx, subsetCards, edgeSel, collCards);
constexpr double docSizeBytes = 500;
jCtx->catStats = {
.collStats = {
{smallNss,
CollectionStats{collCards[smallNodeId].toDouble() * docSizeBytes,
collCards[smallNodeId].toDouble() * docSizeBytes}},
{largeNss,
CollectionStats{collCards[largeNodeId].toDouble() * docSizeBytes,
collCards[largeNodeId].toDouble() * docSizeBytes}},
{extremelySmallNss,
CollectionStats{collCards[extremelySmallNodeId].toDouble() * docSizeBytes,
collCards[extremelySmallNodeId].toDouble() * docSizeBytes}},
}};
costEstimator = std::make_unique<JoinCostEstimatorImpl>(*jCtx, *cardEstimator);
planEnumCtx = std::make_unique<PlanEnumeratorContext>(
@ -135,53 +135,32 @@ public:
};
TEST_F(JoinCostEstimatorTest, LargerCollectionHasHigherCost) {
auto smallCost =
planEnumCtx->getJoinCostEstimator()->costCollScanFragment(smallNodeId, zeroCost);
auto largeCost =
planEnumCtx->getJoinCostEstimator()->costCollScanFragment(largeNodeId, zeroCost);
auto smallCost = planEnumCtx->getJoinCostEstimator()->costCollScanFragment(smallNodeId);
auto largeCost = planEnumCtx->getJoinCostEstimator()->costCollScanFragment(largeNodeId);
ASSERT_GT(largeCost, smallCost);
}
TEST_F(JoinCostEstimatorTest, LargerIndexScanHasHigherCost) {
auto smallCost =
planEnumCtx->getJoinCostEstimator()->costIndexScanFragment(smallNodeId, zeroCost);
auto largeCost =
planEnumCtx->getJoinCostEstimator()->costIndexScanFragment(largeNodeId, zeroCost);
auto smallCost = planEnumCtx->getJoinCostEstimator()->costIndexScanFragment(smallNodeId);
auto largeCost = planEnumCtx->getJoinCostEstimator()->costIndexScanFragment(largeNodeId);
ASSERT_GT(largeCost, smallCost);
}
TEST_F(JoinCostEstimatorTest, SelectiveIndexScanHasSmallerCostThanCollScan) {
auto collScanCost =
planEnumCtx->getJoinCostEstimator()->costCollScanFragment(selectiveNodeId, zeroCost);
auto collScanCost = planEnumCtx->getJoinCostEstimator()->costCollScanFragment(selectiveNodeId);
auto indexScanCost =
planEnumCtx->getJoinCostEstimator()->costIndexScanFragment(selectiveNodeId, zeroCost);
planEnumCtx->getJoinCostEstimator()->costIndexScanFragment(selectiveNodeId);
ASSERT_GT(collScanCost, indexScanCost);
}
TEST_F(JoinCostEstimatorTest, UnselectiveIndexScanHasLargerCostThanCollScan) {
auto collScanCost =
planEnumCtx->getJoinCostEstimator()->costCollScanFragment(unselectiveNodeId, zeroCost);
planEnumCtx->getJoinCostEstimator()->costCollScanFragment(unselectiveNodeId);
auto indexScanCost =
planEnumCtx->getJoinCostEstimator()->costIndexScanFragment(unselectiveNodeId, zeroCost);
planEnumCtx->getJoinCostEstimator()->costIndexScanFragment(unselectiveNodeId);
ASSERT_GT(indexScanCost, collScanCost);
}
TEST_F(JoinCostEstimatorTest, CBRCostAddedDirectlyToCollScanCost) {
auto cbrCost = CostEstimate{CostType{5.0}, EstimationSource::Code};
auto withoutCBR =
planEnumCtx->getJoinCostEstimator()->costCollScanFragment(smallNodeId, zeroCost);
auto withCBR = planEnumCtx->getJoinCostEstimator()->costCollScanFragment(smallNodeId, cbrCost);
ASSERT_EQ(withCBR.getTotalCost(), withoutCBR.getTotalCost() + cbrCost);
}
TEST_F(JoinCostEstimatorTest, CBRCostAddedDirectlyToIndexScanCost) {
auto cbrCost = CostEstimate{CostType{10.0}, EstimationSource::Code};
auto withoutCBR =
planEnumCtx->getJoinCostEstimator()->costIndexScanFragment(smallNodeId, zeroCost);
auto withCBR = planEnumCtx->getJoinCostEstimator()->costIndexScanFragment(smallNodeId, cbrCost);
ASSERT_EQ(withCBR.getTotalCost(), withoutCBR.getTotalCost() + cbrCost);
}
const JoinSubset& getJoinSubsetForNodeId(const std::vector<JoinSubset>& subsets, NodeId nodeId) {
return *std::find_if(subsets.cbegin(), subsets.cend(), [&](const JoinSubset& subset) {
return subset.getNodeId() == nodeId;
@ -291,15 +270,16 @@ public:
auto* cqPtr = graph.getNode(nodeId).accessPath.get();
cbrCqQsns.emplace(cqPtr, makeIndexScanFetchPlan(nss, IndexBounds{}, {"a"}));
collCards = {makeCard(collCardValue)};
catStats = {.collStats = {{nss,
CollectionStats{collCardValue * docSizeBytes,
collCardValue * docSizeBytes}}}};
jCtx.emplace(makeContext());
SubsetCardinalities subsetCards{{makeNodeSet(nodeId), makeCard(numDocsOutputValue)}};
cardEstimator = std::make_unique<FakeJoinCardinalityEstimator>(*jCtx, subsetCards);
NodeCardinalities collCards{makeCard(collCardValue)};
cardEstimator =
std::make_unique<FakeJoinCardinalityEstimator>(*jCtx, subsetCards, collCards);
jCtx->catStats = {.collStats = {{nss,
CollectionStats{collCardValue * docSizeBytes,
collCardValue * docSizeBytes}}}};
costEstimator = std::make_unique<JoinCostEstimatorImpl>(*jCtx, *cardEstimator);
}
@ -315,7 +295,7 @@ public:
// fewer distinct sort-sparse IO groups, so numLogicalPageRequests (= NDV * selectivity) is smaller.
TEST_F(IndexScanNDVCostTest, LowNDVHasLowerCostThanHighNDV) {
// Without sampling estimators the fallback is numLogicalPageRequests = numDocsOutput = 200.
auto costWithoutNDV = costEstimator->costIndexScanFragment(nodeId, zeroCost);
auto costWithoutNDV = costEstimator->costIndexScanFragment(nodeId);
// With NDV = 50 (< numDocsOutput = 200):
// numLogicalPageRequests = 50 * 200 / 1000 = 10 → lower cost.
@ -325,22 +305,11 @@ TEST_F(IndexScanNDVCostTest, LowNDVHasLowerCostThanHighNDV) {
samplingEstimators.emplace(nss, std::move(fakeNdvEstimator));
jCtx->samplingEstimators = &samplingEstimators;
auto costWithLowNDV = costEstimator->costIndexScanFragment(nodeId, zeroCost);
auto costWithLowNDV = costEstimator->costIndexScanFragment(nodeId);
ASSERT_GT(costWithoutNDV, costWithLowNDV);
}
TEST(JoinEstimatesTest, NumDocsProcessedFromCpuCost) {
ASSERT_EQ(0.0, numDocsProcessedFromCpuCost(zeroCost).toDouble());
// The mapping is linear in the CPU cost: doubling the input exactly doubles the output.
const CostEstimate cpu{CostType{1.0}, EstimationSource::Code};
const double docsForCpu = numDocsProcessedFromCpuCost(cpu).toDouble();
const double docsFor2xCpu = numDocsProcessedFromCpuCost(cpu * 2.0).toDouble();
ASSERT_GT(docsForCpu, 0.0);
ASSERT_EQ(2.0 * docsForCpu, docsFor2xCpu);
}
TEST(MackertLohmanTest, CollectionFitsInCache) {
auto result1 = estimateMackertLohmanRandIO(100, 1000, 10);
ASSERT_EQ(10, result1.randIOPages);

View File

@ -66,12 +66,6 @@ const CostCoefficient ioSeqIncremental{
const CostCoefficient ioRandIncremental{
CostCoefficientType{docProcessCpuIncremental.toDouble() * 1411.4}};
CardinalityEstimate numDocsProcessedFromCpuCost(CostEstimate cpuCost) {
return CardinalityEstimate{
CardinalityType{cpuCost.toDouble() / docProcessCpuIncremental.toDouble()},
cpuCost.source()};
}
JoinCostEstimate::JoinCostEstimate(CardinalityEstimate numDocsProcessed,
CardinalityEstimate numDocsOutput,
CardinalityEstimate numSeqIOs,

View File

@ -55,13 +55,6 @@ enum class MackertLohmanCase {
StringData toStringData(MackertLohmanCase c);
/**
* Convert a CPU cost into an equivalent 'numDocsProcessed' cardinality based on the per-document
* processing coefficient used by the join cost model. Used for base table accesses where the CPU
* cost comes from CBR, allowing the result to be plugged into the 'numDocsProcessed' component.
*/
CardinalityEstimate numDocsProcessedFromCpuCost(CostEstimate cpuCost);
/**
* Represents the cost estimate for a single join operation. It stores all of its inputs for
* debugging purposes, as it may be useful to see how individual components contribute to the cost

View File

@ -29,8 +29,6 @@
#pragma once
#include "mongo/db/query/compiler/ce/sampling/sampling_estimator.h"
#include "mongo/db/query/compiler/optimizer/cost_based_ranker/estimates_storage.h"
#include "mongo/db/query/compiler/optimizer/join/cardinality_estimation_types.h"
#include "mongo/db/query/compiler/optimizer/join/catalog_stats.h"
#include "mongo/db/query/compiler/optimizer/join/join_graph.h"
#include "mongo/db/query/compiler/physical_model/query_solution/query_solution.h"
@ -55,30 +53,13 @@ using PerCollUniqueFieldInfo = absl::flat_hash_map<NamespaceString, UniqueFieldI
using SamplingEstimatorMap =
stdx::unordered_map<NamespaceString, std::unique_ptr<ce::SamplingEstimator>>;
/**
* Struct containing results from 'singleTableAccessPlans()' function.
*/
struct SingleTableAccessPlansResult {
QuerySolutionMap cbrCqQsns;
cost_based_ranker::EstimateMap estimate;
// Stores cardinality estimates for nodes after single-table predicates are applied.
NodeCardinalities nodeCardinalities;
// Stores cardinalities for the underlying collections as reported by the catalog.
NodeCardinalities collCardinalities;
// Per-node CBR costs for the winning single-table plans.
NodeCBRCosts nodeCBRCosts;
};
/**
* A struct tracking all information needed to reorder joins and generate a join plan.
*/
struct JoinReorderingContext {
const JoinGraph& joinGraph;
const std::vector<ResolvedPath>& resolvedPaths;
SingleTableAccessPlansResult singleTableAccess;
QuerySolutionMap cbrCqQsns;
AvailableIndexes perCollIdxs;
CatalogStats catStats;

View File

@ -287,7 +287,7 @@ void PlanEnumeratorContext::enumerateJoinSubsets() {
// Initialize base level of joinSubsets, representing single collections (no joins).
for (size_t i = 0; i < numNodes; ++i) {
const auto* cq = _ctx.joinGraph.getNode((NodeId)i).accessPath.get();
const auto* qsn = _ctx.singleTableAccess.cbrCqQsns.at(cq).get();
const auto* qsn = _ctx.cbrCqQsns.at(cq).get();
_joinSubsets[kBaseLevel].push_back(JoinSubset(NodeSet{}.set(i)));
_joinSubsets[kBaseLevel].back().plans = {_registry.registerBaseNode(
(NodeId)i,

View File

@ -94,7 +94,8 @@ public:
subsetCards,
// Just assume all edges are 10% selective.
EdgeSelectivities(jCtx.joinGraph.numEdges(),
{SelectivityType{0.1}, EstimationSource::Code}));
{SelectivityType{0.1}, EstimationSource::Code}),
collCards);
}
auto makeCoster(const JoinReorderingContext& jCtx, JoinCardinalityEstimator& ce) {

View File

@ -81,7 +81,7 @@ protected:
// Ensure each solution has a different base node.
std::set<NodeId> baseNodes;
for (auto seed : seeds) {
auto clonedMap = cloneSolnMap(jCtx.singleTableAccess.cbrCqQsns);
auto clonedMap = cloneSolnMap(jCtx.cbrCqQsns);
auto r = constructSolutionWithRandomOrder(
jCtx, nullptr, nullptr, seed, planShape, method, false /* No pruning. */, retries);
// For tests expecting an error, we uassert.

View File

@ -84,27 +84,14 @@ StatusWith<SingleTableAccessPlansResult> singleTableAccessPlans(
const JoinGraph& graph,
const SamplingEstimatorMap& samplingEstimators,
bool isExplain) {
const auto numNodes = graph.numNodes();
QuerySolutionMap solns;
cost_based_ranker::EstimateMap estimates;
NodeCardinalities nodeCardinalities;
NodeCardinalities collCardinalities;
NodeCBRCosts nodeCBRCosts;
nodeCardinalities.reserve(numNodes);
collCardinalities.reserve(numNodes);
nodeCBRCosts.reserve(numNodes);
const auto numNodes = graph.numNodes();
for (size_t i = 0; i < numNodes; i++) {
const auto& node = graph.getNode(i);
auto& nss = node.accessPath->nss();
const auto& samplingEstimator = samplingEstimators.at(nss);
collCardinalities.push_back(cost_based_ranker::CardinalityEstimate{
cost_based_ranker::CardinalityType{samplingEstimator->getCollCard()},
cost_based_ranker::EstimationSource::Metadata});
// Re-construct MultipleCollectionAccessor so that this collection is treated as the "main"
// collection during query planning (and CE).
auto singleAcq = [&nss, &collections]() -> CollectionOrViewAcquisition {
@ -145,7 +132,7 @@ StatusWith<SingleTableAccessPlansResult> singleTableAccessPlans(
return swSolns.getStatus();
}
auto swCbrResult = QueryPlanner::planWithCostBasedRanking(params,
samplingEstimator.get(),
samplingEstimators.at(nss).get(),
nullptr /*exactCardinality*/,
std::move(swSolns.getValue()),
isExplain);
@ -159,22 +146,11 @@ StatusWith<SingleTableAccessPlansResult> singleTableAccessPlans(
ErrorCodes::NoQueryExecutionPlans,
fmt::format("CBR failed to find best plan for nss: {}", nss.toStringForErrorMsg()));
}
// Save solution and corresponding estimates for the best plan
solns[node.accessPath.get()] = std::move(cbrResult.solutions.front());
tassert(11540201,
"Expected to have estimation data for single table access plan",
cbrResult.maybeExplainData.has_value());
// Save solution and corresponding estimates for the best plan
auto& winningSolution = cbrResult.solutions.front();
const auto* rootQsn = winningSolution->root();
solns[node.accessPath.get()] = std::move(winningSolution);
auto rootEstIt = cbrResult.maybeExplainData->estimates.find(rootQsn);
tassert(11514601,
"Missing estimate for winning single-table plan's root QSN",
rootEstIt != cbrResult.maybeExplainData->estimates.end());
nodeCardinalities.push_back(rootEstIt->second->outCE);
nodeCBRCosts.push_back(rootEstIt->second->cost);
for (auto& [k, v] : cbrResult.maybeExplainData->estimates) {
// Take care to use 'insert_or_assign' which will override existing entries in
// estimates. It is possible that a QSN for a rejected plan of a previous table which
@ -186,11 +162,8 @@ StatusWith<SingleTableAccessPlansResult> singleTableAccessPlans(
}
return SingleTableAccessPlansResult{
.cbrCqQsns = std::move(solns),
.solns = std::move(solns),
.estimate = std::move(estimates),
.nodeCardinalities = std::move(nodeCardinalities),
.collCardinalities = std::move(collCardinalities),
.nodeCBRCosts = std::move(nodeCBRCosts),
};
}

View File

@ -27,6 +27,7 @@
* it in the license file.
*/
#include "mongo/db/query/compiler/optimizer/cost_based_ranker/estimates_storage.h"
#include "mongo/db/query/compiler/optimizer/join/join_graph.h"
#include "mongo/db/query/compiler/optimizer/join/join_reordering_context.h"
#include "mongo/db/query/multiple_collection_accessor.h"
@ -36,6 +37,14 @@
namespace mongo::join_ordering {
/**
* Struct containing results from 'singleTableAccessPlans()' function.
*/
struct SingleTableAccessPlansResult {
QuerySolutionMap solns;
cost_based_ranker::EstimateMap estimate;
};
/**
* Constructor for sampling estimators per collection access.
*/
@ -45,12 +54,10 @@ SamplingEstimatorMap makeSamplingEstimators(const MultipleCollectionAccessor& co
/**
* Given a JoinGraph 'model' where each node links to a CanonicalQuery and a map of
* 'SamplingEstimators' keyed by namespace, for each query this function invokes the plan
* enumerator and uses cost-based ranking (CBR) with sampling-based cardinality estimation. It
* returns a 'SingleTableAccessPlansResult' containing the winning QuerySolution for each query,
* an 'EstimateMap' with cardinality and cost estimates for every QSN in the winning plans, and
* per-NodeId summaries of each winning plan (root output cardinality and CBR CPU cost) plus the
* catalog-reported cardinality of each base collection.
* 'SamplingEstimators' keyed by namespace, for each query, this function invokes the plan
* enumerator and uses cost-based ranking (CBR) with sampling-based cardinality estimation. This
* function returns a QuerySolution representing the best plan for each query along with an
* 'EstimateMap' which contains cardinality and cost estimates for every QSN.
*/
StatusWith<SingleTableAccessPlansResult> singleTableAccessPlans(
OperationContext* opCtx,

View File

@ -51,6 +51,7 @@ void assertQuerySolutionHasEstimate(const QuerySolutionNode* qsn, const Estimate
}
}
// Test estimate map is populated for each collection
TEST_F(SingleTableAccessTestFixture, EstimatesPopulated) {
auto opCtx = operationContext();
auto nss1 = NamespaceString::createNamespaceString_forTest("test", "coll1");
@ -80,7 +81,7 @@ TEST_F(SingleTableAccessTestFixture, EstimatesPopulated) {
auto mca = multipleCollectionAccessor(opCtx, {nss1, nss2});
SamplingEstimatorMap estimators;
estimators[nss1] = samplingEstimator(mca, nss1, 1.0);
estimators[nss1] = samplingEstimator(mca, nss1);
estimators[nss2] = samplingEstimator(mca, nss2);
auto filter1 = fromjson("{a: 1, b: 1}");
@ -97,31 +98,16 @@ TEST_F(SingleTableAccessTestFixture, EstimatesPopulated) {
ASSERT_OK(swRes);
auto& res = swRes.getValue();
ASSERT_EQ(2, res.cbrCqQsns.size());
ASSERT_EQ(2, res.solns.size());
// There are no indexes on nss2, so the chosen access path must use a collection scan.
auto soln2 = res.cbrCqQsns.at(graph.accessPathAt(*node2)).get();
auto soln2 = res.solns.at(graph.accessPathAt(*node2)).get();
ASSERT(soln2);
ASSERT_EQ(soln2->getFirstNodeByType(STAGE_COLLSCAN).second, 1);
for (auto&& [_, soln] : res.cbrCqQsns) {
for (auto&& [_, soln] : res.solns) {
assertQuerySolutionHasEstimate(soln->root(), res.estimate);
}
ASSERT_EQ(graph.numNodes(), res.nodeCardinalities.size());
ASSERT_EQ(graph.numNodes(), res.nodeCBRCosts.size());
ASSERT_EQ(graph.numNodes(), res.collCardinalities.size());
// Illustrates the difference between the cardinalities before & after predicates
// are applied. The predicate only matches a single document.
ASSERT_EQ(10.0, res.collCardinalities[0].toDouble());
ASSERT_EQ(1.0, res.nodeCardinalities[0].toDouble());
ASSERT_GT(res.nodeCBRCosts[0].toDouble(), 0.0);
// Predicate matches every document so cardinalities are the same.
ASSERT_EQ(100.0, res.collCardinalities[1].toDouble());
ASSERT_EQ(100.0, res.nodeCardinalities[1].toDouble());
ASSERT_GT(res.nodeCBRCosts[1].toDouble(), 0.0);
}
} // namespace mongo::join_ordering

Some files were not shown because too many files have changed in this diff Show More