SERVER-117496 Conditionally (re)execute tests from bazel result tasks (#52497)

GitOrigin-RevId: a612c87835047f455a68ed17c0ca190271513c22
This commit is contained in:
Sean Lyons 2026-04-27 09:56:59 -04:00 committed by MongoDB Bot
parent f8a548384d
commit 850ecaf796
5 changed files with 199 additions and 165 deletions

View File

@ -231,26 +231,7 @@ def make_task(targets_to_run, variant_name):
"execute resmoke tests via bazel",
{
"targets": " ".join(targets_to_run),
"bazel_args": (
"--test_tag_filters=${resmoke_tests_tag_filter},-incompatible_with_bazel_remote_test "
"--test_arg=--testTimeout=960 "
"--test_timeout=1500 "
"--config=evg "
),
"task_compile_flags": (
"--keep_going "
"--verbose_failures "
"--simple_build_id=True "
"--define=MONGO_VERSION=${version} "
"--linkstatic=True "
"--features=strip_debug "
"--separate_debug=False "
"--remote_download_outputs=minimal "
"--zip_undeclared_test_outputs"
),
"generate_burn_in_targets": True,
"compiling_for_test": True,
"build_timeout_seconds": 1800,
},
),
],

View File

@ -50,6 +50,46 @@ def get_executed_test_labels(build_events_file: str) -> set[str]:
return labels
def get_result_tasks(evg_api, build_id):
tasks = []
for task in evg_api.tasks_by_build(build_id):
# Result tasks are bazel targets that start with "//"
if task.display_name.startswith("//") and "_burn_in_" not in task.display_name:
tasks.append(task)
return tasks
def activate_or_restart_tasks(evg_api, tasks, version_id, build_variant):
activate = []
for task in tasks:
if task.activated:
evg_api.restart_task(task.task_id)
else:
activate.append(task.display_name)
if activate:
variants = [{"name": build_variant, "tasks": activate}]
evg_api.activate_version_tasks(version_id, variants)
def assert_all_tests_have_tasks(tasks, build_events_file):
executed_labels = get_executed_test_labels(build_events_file)
task_names = set([task.display_name for task in tasks])
missing = executed_labels - task_names
if missing:
missing_sorted = sorted(missing)
LOGGER.error(
"Executed tests have no corresponding Evergreen task — "
"this indicates a bug in task generation",
missing_count=len(missing_sorted),
missing_tasks=missing_sorted,
)
raise RuntimeError(
f"{len(missing_sorted)} executed test(s) have no corresponding Evergreen task: "
+ ", ".join(missing_sorted)
)
def activate_result_task_group(
build_variant: str,
task_name: str,
@ -67,7 +107,6 @@ def activate_result_task_group(
:param build_events_file: Optional path to build_events.json. When provided, asserts that
every executed test has a corresponding Evergreen task; raises RuntimeError otherwise.
"""
result_task_group_name = f"{task_name}_results_{build_variant}"
try:
version = evg_api.version_by_id(version_id)
@ -81,72 +120,15 @@ def activate_result_task_group(
)
return
task_list = evg_api.tasks_by_build(build_id)
# Collect all task names that need activation
tasks_to_activate = []
already_activated_count = 0
evg_task_names = set()
for task in task_list:
# Result tasks are bazel targets that start with "//"
if task.display_name.startswith("//") and "_burn_in_" not in task.display_name:
evg_task_names.add(task.display_name)
if task.activated:
already_activated_count += 1
LOGGER.debug(
"Task already activated, skipping",
task_id=task.task_id,
task_name=task.display_name,
)
else:
tasks_to_activate.append(task.display_name)
LOGGER.debug(
"Found result task to activate",
task_id=task.task_id,
task_name=task.display_name,
)
result_tasks = get_result_tasks(evg_api, build_id)
if build_events_file:
executed_labels = get_executed_test_labels(build_events_file)
missing = executed_labels - evg_task_names
if missing:
missing_sorted = sorted(missing)
LOGGER.error(
"Executed tests have no corresponding Evergreen task — "
"this indicates a bug in task generation",
missing_count=len(missing_sorted),
missing_tasks=missing_sorted,
)
raise RuntimeError(
f"{len(missing_sorted)} executed test(s) have no corresponding Evergreen task: "
+ ", ".join(missing_sorted)
)
assert_all_tests_have_tasks(result_tasks, build_events_file)
if not tasks_to_activate and not already_activated_count:
LOGGER.warning(
"No result tasks found to activate",
task_group=result_task_group_name,
build_variant=build_variant,
)
return
LOGGER.info(
"Activating result tasks",
count=len(tasks_to_activate),
already_activated=already_activated_count,
task_group=result_task_group_name,
)
variants = [{"name": build_variant, "tasks": tasks_to_activate}]
evg_api.activate_version_tasks(version_id, variants)
LOGGER.info(
"Successfully activated result tasks",
count=len(tasks_to_activate),
task_group=result_task_group_name,
)
activate_or_restart_tasks(evg_api, result_tasks, version_id, build_variant)
except Exception:
result_task_group_name = f"{task_name}_results_{build_variant}"
LOGGER.error(
"Failed to activate result task group",
task_group=result_task_group_name,

View File

@ -53,6 +53,7 @@ def _bazel_binary() -> str:
def make_results_task(target: str) -> Task:
commands = [
FunctionCall("execute resmoke tests via bazel", {"targets": target, "result_task": True}),
FunctionCall("fetch remote test results", {"test_label": target}),
]
@ -78,17 +79,22 @@ def make_task_group(
setup_group_can_fail_task=True,
setup_group=[
FunctionCall("git get project and add git tag"),
FunctionCall("get engflow cert"),
FunctionCall("get engflow key"),
FunctionCall("set task expansion macros"),
FunctionCall("f_expansions_write"),
FunctionCall("set up venv"),
FunctionCall("configure evergreen api credentials"),
FunctionCall("set up credentials"),
FunctionCall("get engflow creds"),
BuiltInCommand(
"s3.get",
{
"aws_key": "${aws_key_new}",
"aws_secret": "${aws_secret}",
"local_file": "build_events.json",
"local_file": "src/build_events.json",
"remote_file": "${project}/${version_id}/${build_variant}/"
+ f"{resmoke_task}/build_events.json",
"bucket": "mciuploads",
"optional": True,
},
),
BuiltInCommand(
@ -100,6 +106,7 @@ def make_task_group(
"remote_file": "${project}/${build_variant}/${revision}/"
+ f"bazel-invocation-{resmoke_task}-0.txt",
"bucket": "mciuploads",
"optional": True,
},
),
],

View File

@ -4,7 +4,7 @@
# bash fetch_remote_test_results.sh
#
# Assumes the following files exist:
# ./"build_events.json" Build events JSON containing the records of remote test executions
# ./src/"build_events.json" Build events JSON containing the records of remote test executions
# engflow.cert and engflow.key located in either ${workdir}/src or ${HOME}/.engflow/creds
#
# Required environment variables:
@ -285,7 +285,7 @@ function resolve_file() {
return 1
}
BEP_FILE='build_events.json'
BEP_FILE='src/build_events.json'
if ! [ -f "$ENGFLOW_CERT" ]; then
cert_candidates=(

View File

@ -10,82 +10,117 @@ DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
. "$DIR/prelude.sh"
. "$DIR/bazel_evergreen_shutils.sh"
set -o errexit
set -o verbose
# Result tasks re-invoke this script to conditionally re-execute the test. The test should
# execute unless the task was activated by the resmoke_tests task that already ran all tests.
exit_early_if_result_task() {
if [[ -f "src/build_events.json" && "$activated_by" == "mongodb-mongo-ci-user" ]]; then
echo "Tests were executed by the resmoke_tests task, test results will be fetched from their remote execution."
exit 0
fi
}
bazel_evergreen_shutils::activate_and_cd_src
# Interprets the final bazel return code for the runner task.
# Return code 3 from `bazel test` indicates that the build was OK, but some tests failed or timed out.
# The test failures are reported in individual results tasks, so don't fail the task here.
exit_for_runner_task() {
local ret=$1
if [[ "$ret" -eq 3 ]]; then
echo 'Some tests failed. See the generated task(s) for the failed targets for more details on the failure(s).'
exit 0
elif [[ "$ret" -eq 4 ]]; then
# Before suites are converted, this is expected and should not fail the task. Remove with SERVER-118686.
echo 'No tests were run.'
exit 0
elif [[ "$ret" -eq 0 ]]; then
exit 0
else
echo 'Some tests failed to build. Look for "FAILED TO BUILD" or other build errors above. Tests with regular test failures will have their results in separate generated tasks.'
exit "$ret"
fi
}
BAZEL_BINARY=$(bazel_evergreen_shutils::bazel_get_binary_path)
export BAZEL_BINARY
exit_for_result_task() {
local ret=$1
if [[ "$ret" -eq 3 ]]; then
echo 'Some tests failed, the task will be failed after fetching test results.'
exit 0
else
exit "$ret"
fi
}
ci_flags="--//bazel/resmoke:in_evergreen"
build_ci_flags() {
ci_flags="--//bazel/resmoke:in_evergreen"
# For simple build ID generation:
export compile_variant="${compile_variant}"
export version_id="${version_id}"
# For simple build ID generation:
export compile_variant="${compile_variant}"
export version_id="${version_id}"
if [[ "${evergreen_remote_exec}" == "on" ]]; then
ci_flags="--config=remote_test ${ci_flags}"
fi
if [[ "${evergreen_remote_exec}" == "on" ]]; then
ci_flags="--config=remote_test ${ci_flags}"
fi
if [ ${should_shuffle} = true ]; then
ci_flags+=" --test_arg=--shuffle"
elif [ ${should_shuffle} = false ]; then
ci_flags+=" --test_arg=--shuffleMode=off"
fi
if [ "${should_shuffle}" = true ]; then
ci_flags+=" --test_arg=--shuffle"
elif [ "${should_shuffle}" = false ]; then
ci_flags+=" --test_arg=--shuffleMode=off"
fi
if [ "${is_patch}" = "true" ]; then
ci_flags+=" --test_arg=--patchBuild"
fi
if [ "${is_patch}" = "true" ]; then
ci_flags+=" --test_arg=--patchBuild"
fi
if [ "${skip_symbolization}" = "true" ]; then
ci_flags+=" --test_arg=--skipSymbolization"
fi
if [ "${skip_symbolization}" = "true" ]; then
ci_flags+=" --test_arg=--skipSymbolization"
fi
# Add test selection flag based on patch parameter
if [ "${enable_evergreen_api_test_selection}" = "true" ]; then
ci_flags+=" --test_arg=--enableEvergreenApiTestSelection"
fi
if [ "${enable_evergreen_api_test_selection}" = "true" ]; then
ci_flags+=" --test_arg=--enableEvergreenApiTestSelection"
fi
# Split comma separated list of strategies
IFS=',' read -a strategies <<<"$test_selection_strategies_array"
for strategy in "${strategies[@]}"; do
ci_flags+=" --test_arg=--evergreenTestSelectionStrategy=${strategy}"
done
# Add each test flag from test_flags expansion as --test_arg
if [ -n "${test_flags:-}" ]; then
eval "flags_array=(${test_flags})"
for flag in "${flags_array[@]}"; do
bazel_args+=" --test_arg=\"${flag}\""
# Split comma separated list of strategies
IFS=',' read -a strategies <<<"$test_selection_strategies_array"
for strategy in "${strategies[@]}"; do
ci_flags+=" --test_arg=--evergreenTestSelectionStrategy=${strategy}"
done
fi
ALL_FLAGS="${ci_flags} ${LOCAL_ARG} ${bazel_args:-} ${bazel_compile_flags:-} ${task_compile_flags:-} ${patch_compile_flags:-}"
CONFIG_FLAGS="$(bazel_evergreen_shutils::extract_config_flags "${ALL_FLAGS}")"
echo "${ALL_FLAGS}" >.bazel_build_flags
# Add each test flag from test_flags expansion as --test_arg
if [ -n "${test_flags:-}" ]; then
eval "flags_array=(${test_flags})"
for flag in "${flags_array[@]}"; do
bazel_args+=" --test_arg=\"${flag}\""
done
fi
}
# Save the invocation, intentionally excluding CI specific flags.
echo "python buildscripts/install_bazel.py" >bazel-invocation.txt
echo "bazel test ${bazel_args} ${targets}" >>bazel-invocation.txt
save_invocation() {
# Save the invocation, intentionally excluding CI specific flags.
echo "python buildscripts/install_bazel.py" >bazel-invocation.txt
echo "bazel test ${bazel_args} ${targets}" >>bazel-invocation.txt
}
if [ "${generate_burn_in_targets}" = "true" ]; then
maybe_generate_burn_in_targets() {
if [ "${generate_burn_in_targets}" != "true" ]; then
return
fi
echo "Generating burn-in test targets..."
base_revision="$(git merge-base ${revision} HEAD)"
${BAZEL_BINARY} build ${CONFIG_FLAGS} //... --build_tag_filters=resmoke_config
bazel_evergreen_shutils::query_resmoke_configs "${BAZEL_BINARY}" "${CONFIG_FLAGS}" "resmoke_suite_configs.yml"
${BAZEL_BINARY} run ${CONFIG_FLAGS} //buildscripts:bazel_burn_in -- generate-targets "$base_revision" || echo "Failed to generate burn-in targets"
fi
}
set +o errexit
# Fetches then tests with retries. Leaves the result in the global RET.
run_fetch_and_test() {
export RETRY_ON_FAIL=1
bazel_evergreen_shutils::retry_bazel_cmd 3 "$BAZEL_BINARY" \
fetch ${ci_flags} ${bazel_args} ${bazel_compile_flags} ${task_compile_flags} ${patch_compile_flags} ${targets}
RET=$?
# Fetch then test with retries.
export RETRY_ON_FAIL=1
bazel_evergreen_shutils::retry_bazel_cmd 3 "$BAZEL_BINARY" \
fetch ${ci_flags} ${bazel_args} ${bazel_compile_flags} ${task_compile_flags} ${patch_compile_flags} ${targets}
RET=$?
if [[ "$RET" != "0" ]]; then
return
fi
if [[ "$RET" == "0" ]]; then
export RETRY_ON_FAIL=0
bazel_evergreen_shutils::retry_bazel_cmd 2 "$BAZEL_BINARY" \
test ${ci_flags} ${bazel_args} ${bazel_compile_flags} ${task_compile_flags} ${patch_compile_flags} --build_event_json_file=build_events.json ${targets}
@ -96,13 +131,12 @@ if [[ "$RET" == "0" ]]; then
elif [[ "$RET" != "0" ]]; then
echo "Errors were found during bazel test, failing the execution"
fi
fi
}
bazel_evergreen_shutils::write_last_engflow_link
set -o errexit
if [[ "$RET" != "0" ]]; then
gather_failed_tests() {
if [[ "$RET" == "0" ]]; then
return
fi
# This is a hacky way to save build time for the initial build during the `bazel test` above. They
# are stripped binaries there. We should rebuild them with debug symbols and separate debug.
# The relinked binaries should still be hash identical when stripped with strip.
@ -111,27 +145,57 @@ if [[ "$RET" != "0" ]]; then
# The --config flag needs to stay consistent for the `bazel run` to avoid evicting the previous results.
# Strip out anything that isn't a --config flag that could interfere with the run command.
eval ${BAZEL_BINARY} run ${CONFIG_FLAGS} //buildscripts:gather_failed_tests || true
fi
}
if [ "${generate_burn_in_targets}" != "true" ]; then
activate_result_tasks() {
if [ "${generate_burn_in_targets}" = "true" ]; then
return
fi
echo "Activating result task group..."
python buildscripts/evergreen_activate_result_tasks.py --expansion-file ../expansions.yml --build-events-file build_events.json
fi
}
eval ${BAZEL_BINARY} shutdown # Explicitly shutdown the bazel server in case the Evergreen agent is tracking it for completion of this process.
main() {
set -o errexit
set -o verbose
# Return code 3 from `bazel test` indicates that the build was OK, but some tests failed or timed out.
# The test failures are reported in individual results tasks, so don't fail the task here.
if [[ "$RET" -eq 3 ]]; then
echo 'Some tests failed. See the generated task(s) for the failed targets for more details on the failure(s).'
exit 0
elif [[ "$RET" -eq 4 ]]; then
# Before suites are converted, this is expected and should not fail the task. Remove with SERVER-118686.
echo 'No tests were run.'
exit 0
elif [[ "$RET" -eq 0 ]]; then
exit 0
else
echo 'Some tests failed to build. Look for "FAILED TO BUILD" or other build errors above. Tests with regular test failures will have their results in separate generated tasks.'
exit $RET
fi
echo Expansions: activated_by:"$activated_by" execution:"$execution" is_stepback:"$is_stepback" standalone:"$standalone"
exit_early_if_result_task
bazel_evergreen_shutils::activate_and_cd_src
BAZEL_BINARY=$(bazel_evergreen_shutils::bazel_get_binary_path)
export BAZEL_BINARY
build_ci_flags
ALL_FLAGS="${ci_flags} ${LOCAL_ARG} ${bazel_args:-} ${bazel_compile_flags:-} ${task_compile_flags:-} ${patch_compile_flags:-}"
CONFIG_FLAGS="$(bazel_evergreen_shutils::extract_config_flags "${ALL_FLAGS}")"
echo "${ALL_FLAGS}" >.bazel_build_flags
save_invocation
maybe_generate_burn_in_targets
set +o errexit
run_fetch_and_test
bazel_evergreen_shutils::write_last_engflow_link
set -o errexit
if [[ -n "$result_task" ]]; then
# Explicitly shutdown the bazel server in case the Evergreen agent is tracking it for completion of this process.
eval ${BAZEL_BINARY} shutdown
exit_for_result_task "$RET"
fi
gather_failed_tests
activate_result_tasks
# Explicitly shutdown the bazel server in case the Evergreen agent is tracking it for completion of this process.
eval ${BAZEL_BINARY} shutdown
exit_for_runner_task "$RET"
}
main "$@"