SERVER-117496 Conditionally (re)execute tests from bazel result tasks (#52497)

GitOrigin-RevId: a612c87835047f455a68ed17c0ca190271513c22
2026-04-27 09:56:59 -04:00 · 2026-04-27 09:56:59 -04:00 · 850ecaf796
commit 850ecaf796
parent f8a548384d
5 changed files with 199 additions and 165 deletions
--- a/buildscripts/bazel_burn_in.py
+++ b/buildscripts/bazel_burn_in.py
@ -231,26 +231,7 @@ def make_task(targets_to_run, variant_name):
                "execute resmoke tests via bazel",
                {
                    "targets": " ".join(targets_to_run),
-                    "bazel_args": (
-                        "--test_tag_filters=${resmoke_tests_tag_filter},-incompatible_with_bazel_remote_test "
-                        "--test_arg=--testTimeout=960 "
-                        "--test_timeout=1500 "
-                        "--config=evg "
-                    ),
-                    "task_compile_flags": (
-                        "--keep_going "
-                        "--verbose_failures "
-                        "--simple_build_id=True "
-                        "--define=MONGO_VERSION=${version} "
-                        "--linkstatic=True "
-                        "--features=strip_debug "
-                        "--separate_debug=False "
-                        "--remote_download_outputs=minimal "
-                        "--zip_undeclared_test_outputs"
-                    ),
                    "generate_burn_in_targets": True,
-                    "compiling_for_test": True,
-                    "build_timeout_seconds": 1800,
                },
            ),
        ],
--- a/buildscripts/evergreen_activate_result_tasks.py
+++ b/buildscripts/evergreen_activate_result_tasks.py
@ -50,6 +50,46 @@ def get_executed_test_labels(build_events_file: str) -> set[str]:
    return labels


+def get_result_tasks(evg_api, build_id):
+    tasks = []
+    for task in evg_api.tasks_by_build(build_id):
+        # Result tasks are bazel targets that start with "//"
+        if task.display_name.startswith("//") and "_burn_in_" not in task.display_name:
+            tasks.append(task)
+    return tasks
+
+
+def activate_or_restart_tasks(evg_api, tasks, version_id, build_variant):
+    activate = []
+    for task in tasks:
+        if task.activated:
+            evg_api.restart_task(task.task_id)
+        else:
+            activate.append(task.display_name)
+
+    if activate:
+        variants = [{"name": build_variant, "tasks": activate}]
+        evg_api.activate_version_tasks(version_id, variants)
+
+
+def assert_all_tests_have_tasks(tasks, build_events_file):
+    executed_labels = get_executed_test_labels(build_events_file)
+    task_names = set([task.display_name for task in tasks])
+    missing = executed_labels - task_names
+    if missing:
+        missing_sorted = sorted(missing)
+        LOGGER.error(
+            "Executed tests have no corresponding Evergreen task — "
+            "this indicates a bug in task generation",
+            missing_count=len(missing_sorted),
+            missing_tasks=missing_sorted,
+        )
+        raise RuntimeError(
+            f"{len(missing_sorted)} executed test(s) have no corresponding Evergreen task: "
+            + ", ".join(missing_sorted)
+        )
+
+
 def activate_result_task_group(
    build_variant: str,
    task_name: str,
@ -67,7 +107,6 @@ def activate_result_task_group(
    :param build_events_file: Optional path to build_events.json. When provided, asserts that
        every executed test has a corresponding Evergreen task; raises RuntimeError otherwise.
    """
-    result_task_group_name = f"{task_name}_results_{build_variant}"

    try:
        version = evg_api.version_by_id(version_id)
@ -81,72 +120,15 @@ def activate_result_task_group(
            )
            return

-        task_list = evg_api.tasks_by_build(build_id)
-
-        # Collect all task names that need activation
-        tasks_to_activate = []
-        already_activated_count = 0
-        evg_task_names = set()
-        for task in task_list:
-            # Result tasks are bazel targets that start with "//"
-            if task.display_name.startswith("//") and "_burn_in_" not in task.display_name:
-                evg_task_names.add(task.display_name)
-                if task.activated:
-                    already_activated_count += 1
-                    LOGGER.debug(
-                        "Task already activated, skipping",
-                        task_id=task.task_id,
-                        task_name=task.display_name,
-                    )
-                else:
-                    tasks_to_activate.append(task.display_name)
-                    LOGGER.debug(
-                        "Found result task to activate",
-                        task_id=task.task_id,
-                        task_name=task.display_name,
-                    )
+        result_tasks = get_result_tasks(evg_api, build_id)

        if build_events_file:
-            executed_labels = get_executed_test_labels(build_events_file)
-            missing = executed_labels - evg_task_names
-            if missing:
-                missing_sorted = sorted(missing)
-                LOGGER.error(
-                    "Executed tests have no corresponding Evergreen task — "
-                    "this indicates a bug in task generation",
-                    missing_count=len(missing_sorted),
-                    missing_tasks=missing_sorted,
-                )
-                raise RuntimeError(
-                    f"{len(missing_sorted)} executed test(s) have no corresponding Evergreen task: "
-                    + ", ".join(missing_sorted)
-                )
+            assert_all_tests_have_tasks(result_tasks, build_events_file)

-        if not tasks_to_activate and not already_activated_count:
-            LOGGER.warning(
-                "No result tasks found to activate",
-                task_group=result_task_group_name,
-                build_variant=build_variant,
-            )
-            return
-
-        LOGGER.info(
-            "Activating result tasks",
-            count=len(tasks_to_activate),
-            already_activated=already_activated_count,
-            task_group=result_task_group_name,
-        )
-
-        variants = [{"name": build_variant, "tasks": tasks_to_activate}]
-        evg_api.activate_version_tasks(version_id, variants)
-
-        LOGGER.info(
-            "Successfully activated result tasks",
-            count=len(tasks_to_activate),
-            task_group=result_task_group_name,
-        )
+        activate_or_restart_tasks(evg_api, result_tasks, version_id, build_variant)

    except Exception:
+        result_task_group_name = f"{task_name}_results_{build_variant}"
        LOGGER.error(
            "Failed to activate result task group",
            task_group=result_task_group_name,
--- a/buildscripts/generate_result_tasks.py
+++ b/buildscripts/generate_result_tasks.py
@ -53,6 +53,7 @@ def _bazel_binary() -> str:

 def make_results_task(target: str) -> Task:
    commands = [
+        FunctionCall("execute resmoke tests via bazel", {"targets": target, "result_task": True}),
        FunctionCall("fetch remote test results", {"test_label": target}),
    ]

@ -78,17 +79,22 @@ def make_task_group(
        setup_group_can_fail_task=True,
        setup_group=[
            FunctionCall("git get project and add git tag"),
-            FunctionCall("get engflow cert"),
-            FunctionCall("get engflow key"),
+            FunctionCall("set task expansion macros"),
+            FunctionCall("f_expansions_write"),
+            FunctionCall("set up venv"),
+            FunctionCall("configure evergreen api credentials"),
+            FunctionCall("set up credentials"),
+            FunctionCall("get engflow creds"),
            BuiltInCommand(
                "s3.get",
                {
                    "aws_key": "${aws_key_new}",
                    "aws_secret": "${aws_secret}",
-                    "local_file": "build_events.json",
+                    "local_file": "src/build_events.json",
                    "remote_file": "${project}/${version_id}/${build_variant}/"
                    + f"{resmoke_task}/build_events.json",
                    "bucket": "mciuploads",
+                    "optional": True,
                },
            ),
            BuiltInCommand(
@ -100,6 +106,7 @@ def make_task_group(
                    "remote_file": "${project}/${build_variant}/${revision}/"
                    + f"bazel-invocation-{resmoke_task}-0.txt",
                    "bucket": "mciuploads",
+                    "optional": True,
                },
            ),
        ],
--- a/evergreen/fetch_remote_test_results.sh
+++ b/evergreen/fetch_remote_test_results.sh
@ -4,7 +4,7 @@
 #   bash fetch_remote_test_results.sh
 #
 # Assumes the following files exist:
-#  ./"build_events.json"                       Build events JSON containing the records of remote test executions
+#  ./src/"build_events.json"                       Build events JSON containing the records of remote test executions
 #  engflow.cert and engflow.key located in either ${workdir}/src or ${HOME}/.engflow/creds
 #
 # Required environment variables:
@ -285,7 +285,7 @@ function resolve_file() {
    return 1
 }

-BEP_FILE='build_events.json'
+BEP_FILE='src/build_events.json'

 if ! [ -f "$ENGFLOW_CERT" ]; then
    cert_candidates=(
--- a/evergreen/resmoke_tests_execute_bazel.sh
+++ b/evergreen/resmoke_tests_execute_bazel.sh
@ -10,82 +10,117 @@ DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
 . "$DIR/prelude.sh"
 . "$DIR/bazel_evergreen_shutils.sh"

-set -o errexit
-set -o verbose
+# Result tasks re-invoke this script to conditionally re-execute the test. The test should
+# execute unless the task was activated by the resmoke_tests task that already ran all tests.
+exit_early_if_result_task() {
+    if [[ -f "src/build_events.json" && "$activated_by" == "mongodb-mongo-ci-user" ]]; then
+        echo "Tests were executed by the resmoke_tests task, test results will be fetched from their remote execution."
+        exit 0
+    fi
+}

-bazel_evergreen_shutils::activate_and_cd_src
+# Interprets the final bazel return code for the runner task.
+# Return code 3 from `bazel test` indicates that the build was OK, but some tests failed or timed out.
+# The test failures are reported in individual results tasks, so don't fail the task here.
+exit_for_runner_task() {
+    local ret=$1
+    if [[ "$ret" -eq 3 ]]; then
+        echo 'Some tests failed. See the generated task(s) for the failed targets for more details on the failure(s).'
+        exit 0
+    elif [[ "$ret" -eq 4 ]]; then
+        # Before suites are converted, this is expected and should not fail the task. Remove with SERVER-118686.
+        echo 'No tests were run.'
+        exit 0
+    elif [[ "$ret" -eq 0 ]]; then
+        exit 0
+    else
+        echo 'Some tests failed to build. Look for "FAILED TO BUILD" or other build errors above. Tests with regular test failures will have their results in separate generated tasks.'
+        exit "$ret"
+    fi
+}

-BAZEL_BINARY=$(bazel_evergreen_shutils::bazel_get_binary_path)
-export BAZEL_BINARY
+exit_for_result_task() {
+    local ret=$1
+    if [[ "$ret" -eq 3 ]]; then
+        echo 'Some tests failed, the task will be failed after fetching test results.'
+        exit 0
+    else
+        exit "$ret"
+    fi
+}

-ci_flags="--//bazel/resmoke:in_evergreen"
+build_ci_flags() {
+    ci_flags="--//bazel/resmoke:in_evergreen"

-# For simple build ID generation:
-export compile_variant="${compile_variant}"
-export version_id="${version_id}"
+    # For simple build ID generation:
+    export compile_variant="${compile_variant}"
+    export version_id="${version_id}"

-if [[ "${evergreen_remote_exec}" == "on" ]]; then
-    ci_flags="--config=remote_test ${ci_flags}"
-fi
+    if [[ "${evergreen_remote_exec}" == "on" ]]; then
+        ci_flags="--config=remote_test ${ci_flags}"
+    fi

-if [ ${should_shuffle} = true ]; then
-    ci_flags+=" --test_arg=--shuffle"
-elif [ ${should_shuffle} = false ]; then
-    ci_flags+=" --test_arg=--shuffleMode=off"
-fi
+    if [ "${should_shuffle}" = true ]; then
+        ci_flags+=" --test_arg=--shuffle"
+    elif [ "${should_shuffle}" = false ]; then
+        ci_flags+=" --test_arg=--shuffleMode=off"
+    fi

-if [ "${is_patch}" = "true" ]; then
-    ci_flags+=" --test_arg=--patchBuild"
-fi
+    if [ "${is_patch}" = "true" ]; then
+        ci_flags+=" --test_arg=--patchBuild"
+    fi

-if [ "${skip_symbolization}" = "true" ]; then
-    ci_flags+=" --test_arg=--skipSymbolization"
-fi
+    if [ "${skip_symbolization}" = "true" ]; then
+        ci_flags+=" --test_arg=--skipSymbolization"
+    fi

-# Add test selection flag based on patch parameter
-if [ "${enable_evergreen_api_test_selection}" = "true" ]; then
-    ci_flags+=" --test_arg=--enableEvergreenApiTestSelection"
-fi
+    if [ "${enable_evergreen_api_test_selection}" = "true" ]; then
+        ci_flags+=" --test_arg=--enableEvergreenApiTestSelection"
+    fi

-# Split comma separated list of strategies
-IFS=',' read -a strategies <<<"$test_selection_strategies_array"
-for strategy in "${strategies[@]}"; do
-    ci_flags+=" --test_arg=--evergreenTestSelectionStrategy=${strategy}"
-done
-
-# Add each test flag from test_flags expansion as --test_arg
-if [ -n "${test_flags:-}" ]; then
-    eval "flags_array=(${test_flags})"
-    for flag in "${flags_array[@]}"; do
-        bazel_args+=" --test_arg=\"${flag}\""
+    # Split comma separated list of strategies
+    IFS=',' read -a strategies <<<"$test_selection_strategies_array"
+    for strategy in "${strategies[@]}"; do
+        ci_flags+=" --test_arg=--evergreenTestSelectionStrategy=${strategy}"
    done
-fi

-ALL_FLAGS="${ci_flags} ${LOCAL_ARG} ${bazel_args:-} ${bazel_compile_flags:-} ${task_compile_flags:-} ${patch_compile_flags:-}"
-CONFIG_FLAGS="$(bazel_evergreen_shutils::extract_config_flags "${ALL_FLAGS}")"
-echo "${ALL_FLAGS}" >.bazel_build_flags
+    # Add each test flag from test_flags expansion as --test_arg
+    if [ -n "${test_flags:-}" ]; then
+        eval "flags_array=(${test_flags})"
+        for flag in "${flags_array[@]}"; do
+            bazel_args+=" --test_arg=\"${flag}\""
+        done
+    fi
+}

-# Save the invocation, intentionally excluding CI specific flags.
-echo "python buildscripts/install_bazel.py" >bazel-invocation.txt
-echo "bazel test ${bazel_args} ${targets}" >>bazel-invocation.txt
+save_invocation() {
+    # Save the invocation, intentionally excluding CI specific flags.
+    echo "python buildscripts/install_bazel.py" >bazel-invocation.txt
+    echo "bazel test ${bazel_args} ${targets}" >>bazel-invocation.txt
+}

-if [ "${generate_burn_in_targets}" = "true" ]; then
+maybe_generate_burn_in_targets() {
+    if [ "${generate_burn_in_targets}" != "true" ]; then
+        return
+    fi
    echo "Generating burn-in test targets..."
    base_revision="$(git merge-base ${revision} HEAD)"
    ${BAZEL_BINARY} build ${CONFIG_FLAGS} //... --build_tag_filters=resmoke_config
    bazel_evergreen_shutils::query_resmoke_configs "${BAZEL_BINARY}" "${CONFIG_FLAGS}" "resmoke_suite_configs.yml"
    ${BAZEL_BINARY} run ${CONFIG_FLAGS} //buildscripts:bazel_burn_in -- generate-targets "$base_revision" || echo "Failed to generate burn-in targets"
-fi
+}

-set +o errexit
+# Fetches then tests with retries. Leaves the result in the global RET.
+run_fetch_and_test() {
+    export RETRY_ON_FAIL=1
+    bazel_evergreen_shutils::retry_bazel_cmd 3 "$BAZEL_BINARY" \
+        fetch ${ci_flags} ${bazel_args} ${bazel_compile_flags} ${task_compile_flags} ${patch_compile_flags} ${targets}
+    RET=$?

-# Fetch then test with retries.
-export RETRY_ON_FAIL=1
-bazel_evergreen_shutils::retry_bazel_cmd 3 "$BAZEL_BINARY" \
-    fetch ${ci_flags} ${bazel_args} ${bazel_compile_flags} ${task_compile_flags} ${patch_compile_flags} ${targets}
-RET=$?
+    if [[ "$RET" != "0" ]]; then
+        return
+    fi

-if [[ "$RET" == "0" ]]; then
    export RETRY_ON_FAIL=0
    bazel_evergreen_shutils::retry_bazel_cmd 2 "$BAZEL_BINARY" \
        test ${ci_flags} ${bazel_args} ${bazel_compile_flags} ${task_compile_flags} ${patch_compile_flags} --build_event_json_file=build_events.json ${targets}
@ -96,13 +131,12 @@ if [[ "$RET" == "0" ]]; then
    elif [[ "$RET" != "0" ]]; then
        echo "Errors were found during bazel test, failing the execution"
    fi
-fi
+}

-bazel_evergreen_shutils::write_last_engflow_link
-
-set -o errexit
-
-if [[ "$RET" != "0" ]]; then
+gather_failed_tests() {
+    if [[ "$RET" == "0" ]]; then
+        return
+    fi
    # This is a hacky way to save build time for the initial build during the `bazel test` above. They
    # are stripped binaries there. We should rebuild them with debug symbols and separate debug.
    # The relinked binaries should still be hash identical when stripped with strip.
@ -111,27 +145,57 @@ if [[ "$RET" != "0" ]]; then
    # The --config flag needs to stay consistent for the `bazel run` to avoid evicting the previous results.
    # Strip out anything that isn't a --config flag that could interfere with the run command.
    eval ${BAZEL_BINARY} run ${CONFIG_FLAGS} //buildscripts:gather_failed_tests || true
-fi
+}

-if [ "${generate_burn_in_targets}" != "true" ]; then
+activate_result_tasks() {
+    if [ "${generate_burn_in_targets}" = "true" ]; then
+        return
+    fi
    echo "Activating result task group..."
    python buildscripts/evergreen_activate_result_tasks.py --expansion-file ../expansions.yml --build-events-file build_events.json
-fi
+}

-eval ${BAZEL_BINARY} shutdown # Explicitly shutdown the bazel server in case the Evergreen agent is tracking it for completion of this process.
+main() {
+    set -o errexit
+    set -o verbose

-# Return code 3 from `bazel test` indicates that the build was OK, but some tests failed or timed out.
-# The test failures are reported in individual results tasks, so don't fail the task here.
-if [[ "$RET" -eq 3 ]]; then
-    echo 'Some tests failed. See the generated task(s) for the failed targets for more details on the failure(s).'
-    exit 0
-elif [[ "$RET" -eq 4 ]]; then
-    # Before suites are converted, this is expected and should not fail the task. Remove with SERVER-118686.
-    echo 'No tests were run.'
-    exit 0
-elif [[ "$RET" -eq 0 ]]; then
-    exit 0
-else
-    echo 'Some tests failed to build. Look for "FAILED TO BUILD" or other build errors above. Tests with regular test failures will have their results in separate generated tasks.'
-    exit $RET
-fi
+    echo Expansions: activated_by:"$activated_by" execution:"$execution" is_stepback:"$is_stepback" standalone:"$standalone"
+
+    exit_early_if_result_task
+
+    bazel_evergreen_shutils::activate_and_cd_src
+
+    BAZEL_BINARY=$(bazel_evergreen_shutils::bazel_get_binary_path)
+    export BAZEL_BINARY
+
+    build_ci_flags
+
+    ALL_FLAGS="${ci_flags} ${LOCAL_ARG} ${bazel_args:-} ${bazel_compile_flags:-} ${task_compile_flags:-} ${patch_compile_flags:-}"
+    CONFIG_FLAGS="$(bazel_evergreen_shutils::extract_config_flags "${ALL_FLAGS}")"
+    echo "${ALL_FLAGS}" >.bazel_build_flags
+
+    save_invocation
+
+    maybe_generate_burn_in_targets
+
+    set +o errexit
+    run_fetch_and_test
+    bazel_evergreen_shutils::write_last_engflow_link
+    set -o errexit
+
+    if [[ -n "$result_task" ]]; then
+        # Explicitly shutdown the bazel server in case the Evergreen agent is tracking it for completion of this process.
+        eval ${BAZEL_BINARY} shutdown
+        exit_for_result_task "$RET"
+    fi
+
+    gather_failed_tests
+
+    activate_result_tasks
+
+    # Explicitly shutdown the bazel server in case the Evergreen agent is tracking it for completion of this process.
+    eval ${BAZEL_BINARY} shutdown
+    exit_for_runner_task "$RET"
+}
+
+main "$@"