SERVER-123971 Add Evergreen parameter for disabling RBE for resmoke_tests (#53027)

GitOrigin-RevId: cb70bf9c76902e66c16f2008d9de1b4276d996e9
This commit is contained in:
Sean Lyons 2026-05-08 10:13:19 -04:00 committed by MongoDB Bot
parent 608e794daf
commit 55f60db306
8 changed files with 536 additions and 263 deletions

View File

@ -212,11 +212,8 @@ def resmoke_suite_test(
}) + select({
"//bazel/resmoke:installed_dist_test_enabled": [
"--installDir=dist-test/bin",
"--mongoVersionFile=$(location //:.resmoke_mongo_version.yml)",
],
"//conditions:default": [
"--mongoVersionFile=$(location //bazel/resmoke:resmoke_mongo_version)",
],
"//conditions:default": [],
})
deps_path = ":".join(["$(location %s)" % dep for dep in deps])
@ -224,6 +221,7 @@ def resmoke_suite_test(
default_data = [
generated_config,
python_imports_target,
"//bazel/resmoke:resmoke_mongo_version",
"//bazel/resmoke:on_feature_flags",
"//bazel/resmoke:off_feature_flags",
"//bazel/resmoke:unreleased_ifr_flags",
@ -264,8 +262,8 @@ def resmoke_suite_test(
name = name,
srcs = [resmoke_shim],
data = merged_data + select({
"//bazel/resmoke:installed_dist_test_enabled": ["//:installed-dist-test", "//:.resmoke_mongo_version.yml"],
"//conditions:default": ["//bazel/resmoke:resmoke_mongo_version"],
"//bazel/resmoke:installed_dist_test_enabled": ["//:installed-dist-test"],
"//conditions:default": [],
}),
deps = [
resmoke,
@ -284,6 +282,7 @@ def resmoke_suite_test(
"--archiveLimitMb=500",
"--testTimeout=$(RESMOKE_TEST_TIMEOUT)",
"--historicTestRuntimes=$(location :%s)" % historic_runtimes,
"--mongoVersionFile=$(location //bazel/resmoke:resmoke_mongo_version)",
] + [
"--multiversionDir=$(location %s)" % native.package_relative_label(dep)
for dep in multiversion_deps

View File

@ -122,20 +122,23 @@ class ResmokeShimContext:
self.resource_monitor = None
def create_short_symlinks(self):
"""Create short symlinks in the original tmpdir to avoid long path issues."""
original_tmpdir = tempfile.gettempdir()
"""Create short symlinks in /tmp to avoid long path issues."""
if os.path.isdir("/tmp") and os.access("/tmp", os.W_OK):
short_root = "/tmp"
else:
short_root = tempfile.gettempdir()
# Create a short symlink to TEST_TMPDIR
test_tempdir = os.environ.get("TEST_TMPDIR")
if test_tempdir:
self.tmpdir_symlink = os.path.join(original_tmpdir, f"resmoke_tmp_{uuid.uuid1()}")
self.tmpdir_symlink = os.path.join(short_root, f"resmoke_tmp_{uuid.uuid1()}")
os.symlink(test_tempdir, self.tmpdir_symlink)
self.links.append(self.tmpdir_symlink)
# Create a short symlink to TEST_UNDECLARED_OUTPUTS_DIR
undeclared_outputs_dir = os.environ.get("TEST_UNDECLARED_OUTPUTS_DIR")
if undeclared_outputs_dir:
self.outputs_symlink = os.path.join(original_tmpdir, f"resmoke_out_{uuid.uuid1()}")
self.outputs_symlink = os.path.join(short_root, f"resmoke_out_{uuid.uuid1()}")
os.symlink(undeclared_outputs_dir, self.outputs_symlink)
self.links.append(self.outputs_symlink)
@ -246,6 +249,9 @@ if __name__ == "__main__":
outputs_dir = ctx.outputs_symlink if ctx.outputs_symlink else undeclared_output_dir
otel_dir = os.path.join(outputs_dir, "build", "metrics")
os.makedirs(otel_dir, exist_ok=True)
resmoke_args.append(f"--otelCollectorDir={otel_dir}")
resmoke_args.append(f"--taskWorkDir={outputs_dir}")
resmoke_args.append(f"--reportFile={os.path.join(outputs_dir, 'report.json')}")
os.chdir(outputs_dir)

View File

@ -48,6 +48,7 @@ from buildscripts.burn_in_tests import (
from buildscripts.ciconfig.evergreen import parse_evergreen_file
from buildscripts.generate_result_tasks import make_results_task, make_task_group
from buildscripts.util import buildozer_utils as buildozer
from buildscripts.util.read_config import read_config_file
BAZEL_BURN_IN_TESTS = r"resmoke_tests_burn_in_*"
@ -298,6 +299,9 @@ def generate_tasks(
):
os.chdir(os.environ.get("BUILD_WORKSPACE_DIRECTORY", "."))
expansions = read_config_file("../expansions.yml")
resmoke_disable_rbe = expansions.get("resmoke_disable_rbe", "") == "true"
targets = query_targets_to_burn_in(origin_rev, test_changed_files)
evg_conf = parse_evergreen_file("etc/evergreen.yml")
@ -340,6 +344,7 @@ def generate_tasks(
variant.name,
targets,
f"resmoke_tests_burn_in_{variant.name}",
resmoke_disable_rbe=resmoke_disable_rbe,
)
result_tasks[results_task_group.name] = burn_in_targets_to_run
build_variant.add_task_group(results_task_group)
@ -354,9 +359,12 @@ def generate_tasks(
shrub_project.add_build_variant(build_variant)
project = shrub_project.as_dict()
tasks = [make_results_task(target) for target in targets_all] + [
task.as_dict() for task in resmoke_tests_tasks
]
tasks = [
make_results_task(
target, resmoke_disable_rbe=resmoke_disable_rbe, generate_burn_in_targets=True
)
for target in targets_all
] + [task.as_dict() for task in resmoke_tests_tasks]
project["tasks"] = tasks
for variant in project.get("buildvariants", []):
@ -367,9 +375,15 @@ def generate_tasks(
# these are not a dependency for the `resmoke_tests` task or the results tasks added here.
# Set an explicitly depends_on in the task group's reference to override it. Remove with SERVER-119809.
if task["name"] in result_tasks:
task["depends_on"] = {
"name": f"resmoke_tests_burn_in_{variant['name']}",
}
depends_on = [{"name": f"resmoke_tests_burn_in_{variant['name']}"}]
if resmoke_disable_rbe:
# archive_dist_test may live on a separate compile variant; resolve it
# per-variant here because Evergreen does not expand ${compile_variant}
# in depends_on.variant.
evg_variant = evg_conf.get_variant(variant["name"])
compile_variant = evg_variant.expansion("compile_variant") or variant["name"]
depends_on.append({"name": "archive_dist_test", "variant": compile_variant})
task["depends_on"] = depends_on
else:
task["depends_on"] = {
"name": "version_burn_in_gen",

View File

@ -52,10 +52,21 @@ def _bazel_binary() -> str:
return os.environ.get("BAZEL_BINARY", "bazel")
def make_results_task(target: str) -> Task:
def make_results_task(
target: str,
resmoke_disable_rbe: bool = False,
generate_burn_in_targets: bool = False,
) -> Task:
if resmoke_disable_rbe:
results_func = "gather local test results"
else:
results_func = "fetch remote test results"
execute_params: dict = {"targets": target, "result_task": True}
if generate_burn_in_targets:
execute_params["generate_burn_in_targets"] = True
commands = [
FunctionCall("execute resmoke tests via bazel", {"targets": target, "result_task": True}),
FunctionCall("fetch remote test results", {"test_label": target}),
FunctionCall("execute resmoke tests via bazel", execute_params),
FunctionCall(results_func, {"test_label": target}),
]
task = Task(target, commands).as_dict()
@ -67,25 +78,39 @@ def make_results_task(target: str) -> Task:
return task
def make_task_group(
name: str,
variant: str,
targets,
resmoke_task: Optional[str] = "resmoke_tests",
) -> TaskGroup:
task_group = TaskGroup(
name=f"{name}_results_{variant}",
tasks=[],
max_hosts=len(targets),
setup_group_can_fail_task=True,
setup_group=[
FunctionCall("git get project and add git tag"),
FunctionCall("set task expansion macros"),
FunctionCall("f_expansions_write"),
FunctionCall("set up venv"),
FunctionCall("configure evergreen api credentials"),
FunctionCall("set up credentials"),
FunctionCall("get engflow creds"),
def _make_setup_group(resmoke_task: str, resmoke_disable_rbe: bool) -> list:
common = [
FunctionCall("git get project and add git tag"),
FunctionCall("set task expansion macros"),
FunctionCall("f_expansions_write"),
FunctionCall("set up venv"),
FunctionCall("configure evergreen api credentials"),
FunctionCall("set up credentials"),
FunctionCall("get engflow creds"),
]
if resmoke_disable_rbe:
# Download and extract the pre-built dist-test binaries into src/ so that
# //bazel/resmoke:installed_dist_test_enabled can glob dist-test/** from the workspace root.
return common + [
BuiltInCommand(
"s3.get",
{
"aws_key": "${aws_key_new}",
"aws_secret": "${aws_secret}",
"remote_file": "${mongo_binaries}",
"bucket": "mciuploads",
"local_file": "mongo-binaries.tgz",
},
),
BuiltInCommand(
"shell.exec",
{
"script": "tar -xf mongo-binaries.tgz -C src",
},
),
]
else:
return common + [
BuiltInCommand(
"s3.get",
{
@ -110,7 +135,22 @@ def make_task_group(
"optional": True,
},
),
],
]
def make_task_group(
name: str,
variant: str,
targets,
resmoke_task: Optional[str] = "resmoke_tests",
resmoke_disable_rbe: bool = False,
) -> TaskGroup:
task_group = TaskGroup(
name=f"{name}_results_{variant}",
tasks=[],
max_hosts=len(targets),
setup_group_can_fail_task=True,
setup_group=_make_setup_group(resmoke_task, resmoke_disable_rbe),
# Between tasks, remove the test logs and outputs. The tasks share hosts and leaving them
# can cause the task to include test logs from other bazel targets.
setup_task=[BuiltInCommand("shell.exec", {"script": "rm -rf build/ results/ report.json"})],
@ -533,6 +573,7 @@ def main(outfile: Annotated[str, typer.Option()]):
expansions = read_config_file("../expansions.yml")
project_name = expansions.get("project", MASTER_PROJECT_NAME)
evg_config_path = get_evergreen_config_path(project_name)
resmoke_disable_rbe = expansions.get("resmoke_disable_rbe", "") == "true"
print(f"Parsing Evergreen configuration from {evg_config_path}...", file=sys.stderr)
# Pre-warm the @cache-decorated resolvers so their bazel-run + YAML costs
@ -563,7 +604,9 @@ def main(outfile: Annotated[str, typer.Option()]):
continue
targets_all.update(targets)
task_group = make_task_group("resmoke_tests", variant.name, targets).as_dict()
task_group = make_task_group(
"resmoke_tests", variant.name, targets, resmoke_disable_rbe=resmoke_disable_rbe
).as_dict()
task_group["tasks"] = targets
project["task_groups"].append(task_group)
@ -573,18 +616,28 @@ def main(outfile: Annotated[str, typer.Option()]):
# Set an explicitly depends_on in the task group's reference to override it.
# The task that generated the task is used as a no-op dependency, as a workaround for not
# being able to set an empty depends_on. Remove with SERVER-119809.
build_variant["tasks"] = {
"name": task_group["name"],
"activate": False,
"depends_on": {
depends_on = [
{
"name": "bazel_result_tasks_gen",
"variant": "generate-tasks-for-version",
"omit_generated_tasks": True,
},
]
if resmoke_disable_rbe:
# archive_dist_test may live on a separate compile variant; resolve it per-variant here
# because Evergreen does not expand ${compile_variant} in depends_on.variant.
compile_variant = variant.expansion("compile_variant") or variant.name
depends_on.append({"name": "archive_dist_test", "variant": compile_variant})
build_variant["tasks"] = {
"name": task_group["name"],
"activate": False,
"depends_on": depends_on,
}
project["buildvariants"].append(build_variant)
project["tasks"] = [make_results_task(target) for target in targets_all]
project["tasks"] = [
make_results_task(target, resmoke_disable_rbe=resmoke_disable_rbe) for target in targets_all
]
with open(outfile, "w") as f:
f.write(json.dumps(project, indent=4))

View File

@ -0,0 +1,240 @@
# Shared bash helpers for processing bazel resmoke test outputs in Evergreen result tasks.
#
# Required environment variables (set by callers):
# * ${workdir} - The Evergreen workdir.
# * ${test_label} - The bazel test target label (e.g. //buildscripts/resmokeconfig:core).
# Converts a bazel test label into the path-prefix convention used in ${workdir}/results.
# Example: //buildscripts/resmokeconfig:core -> buildscripts/resmokeconfig/core
function bazel_test_results::label_to_prefix() {
local label="$1"
label="${label#//}"
label="${label//:/\/}"
echo "${label}"
}
# Symlinks test logs from a per-shard test.outputs/build/TestLogs directory into Evergreen's
# log ingestion folder. Must be invoked with the per-shard results directory as cwd.
function bazel_test_results::symlink_test_logs() {
local -r build_dir='test.outputs/build/TestLogs'
if [[ ! -d "${build_dir}" ]]; then
echo "No test logs directory found at ${build_dir}, skipping symlink."
return
fi
find "${build_dir}" -type f | while read -r file; do
rel_path="${file#${build_dir}/}"
target_path="${workdir}/build/TestLogs/${rel_path}"
target_dir=$(dirname "${target_path}")
mkdir -p "${target_dir}"
abs_file=$(realpath "${file}")
ln -sf "${abs_file}" "${target_path}"
done
}
# Combines all resmoke OTel telemetry under ${workdir}/results into batched files under
# ${workdir}/build/OTelTraces. Evergreen processes slowly when there are many small files.
# However, files are kept under 4MB since that is the maximum size that
# Evergreen will send the the trace collector without re-batching them.
function bazel_test_results::combine_metrics() {
local -r output_dir="${workdir}/build/OTelTraces"
mkdir -p "${output_dir}"
local max_size=$((4 * 1024 * 1024))
local file_counter=0
local current_size=0
local current_output="${output_dir}/metrics.json"
>"${current_output}"
find "${workdir}/results" -wholename '*metrics/metrics*.json' -type f -print0 | while IFS= read -r -d '' file; do
local file_size=$(stat -c%s "${file}")
local newline_size=1
if ((current_size + file_size + newline_size > max_size && current_size > 0)); then
((file_counter++))
current_output="${output_dir}/metrics_${file_counter}.json"
current_size=0
>"${current_output}"
fi
cat "${file}" >>"${current_output}"
echo "" >>"${current_output}"
current_size=$((current_size + file_size + newline_size))
done
}
# Combines all resmoke report JSONs into a single ${workdir}/report.json for attach.results.
function bazel_test_results::combine_reports() {
local -r report_files=$(find "${workdir}" -name 'report*.json' -type f 2>/dev/null)
if [[ -z "${report_files}" ]]; then
echo 'No report.json files found'
return
fi
local -r combined_report=$(echo "${report_files}" | xargs jq -s '
{
results: map(.results // []) | add,
failures: (map(.results // []) | add | map(select(.status == "fail" or .status == "timeout")) | length)
}
')
local -r combined_report_file="${workdir}/report.json"
echo "${combined_report}" >"${combined_report_file}"
local -r total_tests=$(echo "$combined_report" | jq '.results | length')
local -r failures=$(echo "$combined_report" | jq '.failures')
echo ""
echo "Combined Report: ${total_tests} tests, ${failures} failures"
echo "Report written to: ${combined_report_file}"
}
# Writes a YAML file indicating that test failures exist (consumed by expansions.update).
function bazel_test_results::write_test_failures_expansion() {
local -r output_file="${workdir}/results/test_failures_exist.yml"
mkdir -p "$(dirname "${output_file}")"
echo "test_failures_exist: true" >"${output_file}"
}
# Prints all *test.log files with per-shard headers, ordered by shard number.
function bazel_test_results::print_executor_logs() {
local -r log_files=$(find "${workdir}/results" -name '*test.log' -type f 2>/dev/null)
if [[ -z "${log_files}" ]]; then
return
fi
local -r sorted_log_files=$(echo "${log_files}" | while IFS= read -r log_file; do
local shard_num=$(echo "${log_file}" | grep -oP 'shard_\K\d+(?=/)')
echo "${shard_num} ${log_file}"
done | sort -n | cut -d' ' -f2-)
while IFS= read -r log_file; do
local shard_path=$(echo "${log_file}" | sed "s|${workdir}/results/||" | sed 's|/[^/]*$||')
echo "================================================================================"
echo "Shard ${shard_path} log:"
echo "================================================================================"
cat "${log_file}"
echo ""
echo "================================================================================"
echo ""
done <<<"${sorted_log_files}"
}
# Displays a formatted summary of test results. Caller passes parallel arrays by name.
# Usage: bazel_test_results::display_test_summary shard_names_var shard_statuses_var shard_test_counts_var
function bazel_test_results::display_test_summary() {
local -n _names="${1}"
local -n _statuses="${2}"
local -n _counts="${3}"
echo "================================================================================"
echo "Test Results Summary"
echo "================================================================================"
echo "Target: ${test_label}"
echo "Total Shards: ${#_names[@]}"
echo "--------------------------------------------------------------------------------"
local sorted_indices=()
for i in "${!_names[@]}"; do
sorted_indices+=("$i")
done
IFS=$'\n' sorted_indices=($(
for i in "${sorted_indices[@]}"; do
local shard_num=$(echo "${_names[$i]}" | grep -oP 'shard_\K\d+$')
echo "${shard_num} ${i}"
done | sort -n | cut -d' ' -f2
))
for i in "${sorted_indices[@]}"; do
local shard="${_names[$i]}"
local status="${_statuses[$i]}"
local test_counts="${_counts[$i]}"
case "${status}" in
"PASSED")
echo "${shard}: PASSED (${test_counts} tests passed)"
;;
"FAILED")
if [[ "${test_counts}" == "0/0" ]]; then
echo "${shard}: FAILED (no report generated)"
else
echo "${shard}: FAILED (${test_counts} tests passed)"
fi
;;
"TIMEOUT")
echo "${shard}: TIMEOUT"
;;
"NO_REPORT")
echo "${shard}: NO REPORT (no tests may have been run)"
;;
esac
done
echo "================================================================================"
echo ""
}
# Reads a single test result's report.json and appends the corresponding entries to the
# parallel summary arrays passed by name. Must be invoked with the shard's results dir as cwd.
# Usage: bazel_test_results::record_shard_status <shard_path> <is_failure_flag> <is_timeout_flag>
# names_var statuses_var counts_var
# Returns 0 if the shard had a report.json, 1 otherwise.
function bazel_test_results::record_shard_status() {
local -r shard_path="$1"
local -r is_failure_flag="$2"
local -r is_timeout_flag="$3"
local -n _names="${4}"
local -n _statuses="${5}"
local -n _counts="${6}"
_names+=("${shard_path}")
local report_file
report_file=$(compgen -G "test.outputs/report*.json" | head -n 1)
if [[ -n "${report_file}" ]]; then
local total_tests failed_tests passed_tests
IFS=$'\t' read -r total_tests failed_tests passed_tests < <(
jq -r '[
(.results | length),
(.results | map(select(.status == "fail" or .status == "timeout")) | length),
(.results | map(select(.status == "pass")) | length)
] | @tsv' "${report_file}" 2>/dev/null || printf "0\t0\t0\n"
)
total_tests=${total_tests:-0}
failed_tests=${failed_tests:-0}
passed_tests=${passed_tests:-0}
_counts+=("${passed_tests}/${total_tests}")
if [[ "${is_timeout_flag}" -eq 1 ]]; then
_statuses+=("TIMEOUT")
elif [[ "${is_failure_flag}" -eq 1 || "${failed_tests}" -gt 0 ]]; then
if [[ "${total_tests}" -eq 0 ]]; then
_statuses+=("NO_REPORT")
else
_statuses+=("FAILED")
fi
else
_statuses+=("PASSED")
fi
return 0
else
if [[ "${is_timeout_flag}" -eq 1 ]]; then
_statuses+=("TIMEOUT")
_counts+=("0/0")
else
_statuses+=("NO_REPORT")
_counts+=("0/0")
fi
return 1
fi
}

View File

@ -11,6 +11,9 @@
# * ${test_label} - The resmoke bazel target to get results for, like //buildscripts/resmokeconfig:core
# * ${workdir} - The Evergreen workdir to use for test log and OTel trace ingestion.
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
. "$DIR/bazel_test_results_shutils.sh"
# Enumerates test results for each execution of ${test_label}. Shards/retries are individual executions with their own results.
function enumerate_test_results() {
jq --raw-output --compact-output --arg test_label "${test_label}" 'select(.testResult.testActionOutput != null) |
@ -91,144 +94,6 @@ function unzip_outputs() {
fi
}
# Symlinks test logs from a test result into Evergreen's log ingestion folder.
function symlink_test_logs() {
local build_dir='test.outputs/build/TestLogs'
if [[ ! -d "$build_dir" ]]; then
return
fi
find "$build_dir" -type f | while read -r file; do
# Get the relative path from the build directory
rel_path="${file#$build_dir/}"
target_path="${workdir}/build/TestLogs/${rel_path}"
target_dir=$(dirname "$target_path")
mkdir -p "$target_dir"
abs_file=$(realpath "$file")
ln -sf "$abs_file" "$target_path"
done
}
# Displays a formatted summary of test results.
function display_test_summary() {
echo "================================================================================"
echo "Test Results Summary"
echo "================================================================================"
echo "Target: ${test_label}"
echo "Total Shards: ${#shard_names[@]}"
echo "--------------------------------------------------------------------------------"
# Create a sorted list of indices based on shard names
local sorted_indices=()
for i in "${!shard_names[@]}"; do
sorted_indices+=("$i")
done
# Sort indices by extracting and comparing shard numbers
IFS=$'\n' sorted_indices=($(
for i in "${sorted_indices[@]}"; do
local shard_num=$(echo "${shard_names[$i]}" | grep -oP 'shard_\K\d+$')
echo "$shard_num $i"
done | sort -n | cut -d' ' -f2
))
for i in "${sorted_indices[@]}"; do
local shard="${shard_names[$i]}"
local status="${shard_statuses[$i]}"
local test_counts="${shard_test_counts[$i]}"
# Format status with color indicators
case "$status" in
"PASSED")
echo "$shard: PASSED ($test_counts tests passed)"
;;
"FAILED")
if [[ "$test_counts" == "0/0" ]]; then
echo "$shard: FAILED (no report generated)"
else
echo "$shard: FAILED ($test_counts tests passed)"
fi
;;
"TIMEOUT")
echo "$shard: TIMEOUT"
;;
"NO_REPORT")
echo "$shard: NO REPORT (no tests may have been run)"
;;
esac
done
echo "================================================================================"
echo ""
}
# Combine all resmoke telemetry and place it where Evergreen expects it: ${workdir}/build/OTelTraces.
# Metrics are batched into line-separated JSON files no greater than 4MB each. Evergreen processes
# fewer files faster, but hits message size limitations if they are too large.
function combine_metrics() {
local output_dir="${workdir}/build/OTelTraces"
mkdir -p "$output_dir"
local max_size=$((4 * 1024 * 1024)) # 4MB in bytes
local file_counter=0
local current_size=0
local current_output="${output_dir}/metrics.json"
# Create initial empty file
>"$current_output"
find "${workdir}/results" -wholename '*metrics/metrics*.json' -type f -print0 | while IFS= read -r -d '' file; do
local file_size=$(stat -c%s "$file")
local newline_size=1
# Check if adding this file would exceed the limit
if ((current_size + file_size + newline_size > max_size && current_size > 0)); then
# Start a new file
((file_counter++))
current_output="${output_dir}/metrics_${file_counter}.json"
current_size=0
>"$current_output"
fi
# Append the file content
cat "$file" >>"$current_output"
echo "" >>"$current_output" # Adds a single newline after each file's content
# Update current size
current_size=$((current_size + file_size + newline_size))
done
}
# Combines all Resmoke test report JSONs into a single JSON.
function combine_reports() {
local report_files=$(find "${workdir}" -name 'report*.json' -type f 2>/dev/null)
if [[ -z "$report_files" ]]; then
echo 'No report.json files found'
return
fi
local combined_report=$(echo "$report_files" | xargs jq -s '
{
results: map(.results // []) | add,
failures: (map(.results // []) | add | map(select(.status == "fail" or .status == "timeout")) | length)
}
')
local combined_report_file="${workdir}/report.json"
echo "$combined_report" >"$combined_report_file"
local total_tests=$(echo "$combined_report" | jq '.results | length')
local failures=$(echo "$combined_report" | jq '.failures')
echo ""
echo "Combined Report: ${total_tests} tests, ${failures} failures"
echo "Report written to: $combined_report_file"
}
# Writes a user-friendly bazel invocation for re-running this test target.
function write_bazel_invocation() {
# Escape special characters in the label for the second sed expression.
@ -237,42 +102,6 @@ function write_bazel_invocation() {
sed "s/\S*\$/${test_label_escaped}/" ${workdir}/resmoke-tests-bazel-invocation.txt | tail -n 1 >"${workdir}/bazel-invocation.txt"
}
# Writes a YAML file indicating that test failures exist.
function write_test_failures_expansion() {
local output_file="${workdir}/results/test_failures_exist.yml"
mkdir -p "$(dirname "$output_file")"
echo "test_failures_exist: true" >"$output_file"
}
# Print the contents of all *test.log files with headers per shard.
function print_executor_logs() {
local log_files=$(find "${workdir}/results" -name '*test.log' -type f 2>/dev/null)
if [[ -z "$log_files" ]]; then
return
fi
# Sort log files by shard number
local sorted_log_files=$(echo "$log_files" | while IFS= read -r log_file; do
# Extract shard number from path (e.g., /workdir/results/foo/bar/shard_1/test.log -> 1)
local shard_num=$(echo "$log_file" | grep -oP 'shard_\K\d+(?=/)')
echo "$shard_num $log_file"
done | sort -n | cut -d' ' -f2-)
while IFS= read -r log_file; do
# Extract shard name from path (e.g., /workdir/results/foo/bar/shard_1/test.log -> foo/bar/shard_1)
local shard_path=$(echo "$log_file" | sed "s|${workdir}/results/||" | sed 's|/[^/]*$||')
echo "================================================================================"
echo "Shard $shard_path log:"
echo "================================================================================"
cat "$log_file"
echo ""
echo "================================================================================"
echo ""
done <<<"$sorted_log_files"
}
# Resolves a file path from a list of candidate locations. Returns the first existing file path found.
function resolve_file() {
local -n paths=$1
@ -340,48 +169,21 @@ while IFS= read -r test_result; do
is_timeout_flag=1
is_failure_flag=1
fail_task=1
write_test_failures_expansion
bazel_test_results::write_test_failures_expansion
elif is_failure "$test_result"; then
is_failure_flag=1
fail_task=1
write_test_failures_expansion
bazel_test_results::write_test_failures_expansion
fi
download_outputs "$test_result" "$is_failure_flag"
unzip_outputs "$is_failure_flag"
symlink_test_logs
bazel_test_results::symlink_test_logs
# Record shard information
shard_names+=("$target_prefix")
# Check if any report*.json files exist
if compgen -G "test.outputs/report*.json" >/dev/null; then
# Extract test counts from the report
report_file=$(compgen -G "test.outputs/report*.json" | head -n 1)
total_tests=$(jq '.results | length' "$report_file" 2>/dev/null || echo "0")
failed_tests=$(jq '.results | map(select(.status == "fail" or .status == "timeout")) | length' "$report_file" 2>/dev/null || echo "0")
passed_tests=$(jq '.results | map(select(.status == "pass")) | length' "$report_file" 2>/dev/null || echo "0")
shard_test_counts+=("$passed_tests/$total_tests")
if [[ "$is_timeout_flag" -eq 1 ]]; then
shard_statuses+=("TIMEOUT")
elif [[ "$is_failure_flag" -eq 1 ]]; then
if [[ "$total_tests" -eq 0 ]]; then
shard_statuses+=("NO_REPORT")
else
shard_statuses+=("FAILED")
fi
else
shard_statuses+=("PASSED")
fi
else
# No report file found - check if we have bazel-level status information
if [[ "$is_timeout_flag" -eq 1 ]]; then
shard_statuses+=("TIMEOUT")
shard_test_counts+=("0/0")
else
shard_statuses+=("NO_REPORT")
shard_test_counts+=("0/0")
if ! bazel_test_results::record_shard_status \
"$target_prefix" "$is_failure_flag" "$is_timeout_flag" \
shard_names shard_statuses shard_test_counts; then
if [[ "$is_timeout_flag" -ne 1 ]]; then
missing_report=1
fi
fi
@ -396,13 +198,13 @@ if [[ "$result_count" -eq 0 ]]; then
exit 1
fi
print_executor_logs
bazel_test_results::print_executor_logs
display_test_summary
bazel_test_results::display_test_summary shard_names shard_statuses shard_test_counts
combine_metrics
bazel_test_results::combine_metrics
failures=$(combine_reports)
failures=$(bazel_test_results::combine_reports)
write_bazel_invocation
@ -410,7 +212,7 @@ write_bazel_invocation
for status in "${shard_statuses[@]}"; do
if [[ "$status" == "TIMEOUT" || "$status" == "NO_REPORT" ]]; then
echo "Error: One or more shards had TIMEOUT or NO_REPORT status. Not all tests ran or were reported." >&2
write_test_failures_expansion
bazel_test_results::write_test_failures_expansion
exit 1
fi
done
@ -426,7 +228,7 @@ for status in "${shard_statuses[@]}"; do
done
if [[ "$has_test_failures" -eq 1 ]]; then
write_test_failures_expansion
bazel_test_results::write_test_failures_expansion
fi
exit 0

View File

@ -0,0 +1,130 @@
# Gathers locally-executed bazel test results from bazel-testlogs/ and prepares them
# for Evergreen ingestion in the same layout as fetch_remote_test_results.sh.
#
# Usage:
# bash gather_local_test_results.sh
#
# Required environment variables:
# * ${test_label} - The bazel test target, e.g. //buildscripts/resmokeconfig:core
# * ${workdir} - The Evergreen workdir.
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
. "$DIR/bazel_test_results_shutils.sh"
readonly target_prefix=$(bazel_test_results::label_to_prefix "${test_label}")
readonly bazel_testlogs="${workdir}/src/bazel-testlogs"
readonly target_outputs="${bazel_testlogs}/${target_prefix}"
if [ ! -d "${target_outputs}" ]; then
echo "Error: No bazel test outputs found at ${target_outputs}" >&2
echo "The test may have failed to build. Check the logs from the runner task." >&2
exit 1
fi
# Collect shard directories. Sharded tests use shard_<N>_of_<M>/; single-shard tests put files
# directly under the target's directory and are treated as shard 0.
declare -a shard_paths=()
declare -a shard_nums=()
if compgen -G "${target_outputs}/shard_*_of_*" >/dev/null; then
while IFS= read -r shard_dir; do
shard_num=$(basename "${shard_dir}" | sed 's/shard_\([0-9]\+\)_of_.*/\1/')
shard_paths+=("${shard_dir}")
shard_nums+=("${shard_num}")
done < <(find "${target_outputs}" -maxdepth 1 -type d -name 'shard_*_of_*' | sort)
else
shard_paths+=("${target_outputs}")
shard_nums+=("0")
fi
shard_names=()
shard_statuses=()
shard_test_counts=()
fail_task=0
for i in "${!shard_paths[@]}"; do
shard_dir="${shard_paths[$i]}"
shard_num="${shard_nums[$i]}"
shard_path="${target_prefix}/shard_${shard_num}"
target_dir="${workdir}/results/${shard_path}"
mkdir -p "${target_dir}"
is_failure_flag=0
is_timeout_flag=0
# Copy test.log so it ends up in results/<prefix>/shard_<N>/shard_<N>_test.log,
# matching the naming convention used by fetch_remote_test_results.sh and picked up
# by the teardown S3 put filter "**/*test.log".
if [ -f "${shard_dir}/test.log" ]; then
cp "${shard_dir}/test.log" "${target_dir}/shard_${shard_num}_test.log"
fi
# Locate the undeclared outputs zip produced by --zip_undeclared_test_outputs.
output_zip=""
for candidate in \
"${shard_dir}/test.outputs/outputs.zip" \
"${shard_dir}/outputs.zip"; do
if [ -f "${candidate}" ]; then
output_zip="${candidate}"
break
fi
done
if [ -n "${output_zip}" ]; then
mkdir -p "${target_dir}/test.outputs"
unzip -o -q "${output_zip}" -d "${target_dir}/test.outputs"
fi
pushd "${target_dir}" >/dev/null
bazel_test_results::symlink_test_logs
# Determine pass/fail from the extracted report.json. record_shard_status appends to the
# parallel summary arrays and returns non-zero if no report was found.
if compgen -G "test.outputs/report*.json" >/dev/null; then
report_file=$(compgen -G "test.outputs/report*.json" | head -n 1)
failed_tests=$(jq '.results | map(select(.status == "fail" or .status == "timeout")) | length' "${report_file}" 2>/dev/null || echo 0)
if [[ "${failed_tests}" -gt 0 ]]; then
is_failure_flag=1
fail_task=1
fi
fi
bazel_test_results::record_shard_status \
"$shard_path" "$is_failure_flag" "$is_timeout_flag" \
shard_names shard_statuses shard_test_counts || true
# If this shard failed and produced an outputs zip, keep a copy alongside results/ so
# the teardown S3 put filter "**/*outputs.zip" attaches it to the task.
if [[ "${is_failure_flag}" -eq 1 && -n "${output_zip}" ]]; then
cp "${output_zip}" "shard_${shard_num}_test.outputs.zip"
fi
popd >/dev/null
done
# Surface bazel's saved invocation (written by save_invocation in resmoke_tests_execute_bazel.sh)
# at ${workdir}/bazel-invocation.txt for the teardown S3 put.
if [ -f "${workdir}/src/bazel-invocation.txt" ]; then
cp "${workdir}/src/bazel-invocation.txt" "${workdir}/bazel-invocation.txt"
fi
bazel_test_results::print_executor_logs
bazel_test_results::display_test_summary shard_names shard_statuses shard_test_counts
bazel_test_results::combine_metrics
bazel_test_results::combine_reports
# Check for system-level failures (TIMEOUT or NO_REPORT)
for status in "${shard_statuses[@]}"; do
if [[ "${status}" == "TIMEOUT" || "${status}" == "NO_REPORT" ]]; then
echo "Error: One or more shards had TIMEOUT or NO_REPORT status. Not all tests ran or were reported." >&2
bazel_test_results::write_test_failures_expansion
exit 1
fi
done
if [[ "${fail_task}" -eq 1 ]]; then
bazel_test_results::write_test_failures_expansion
fi
exit 0

View File

@ -13,6 +13,9 @@ DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
# Result tasks re-invoke this script to conditionally re-execute the test. The test should
# execute unless the task was activated by the resmoke_tests task that already ran all tests.
exit_early_if_result_task() {
if [[ "${resmoke_disable_rbe}" == "true" ]]; then
return # Local exec: result tasks must always run bazel test themselves.
fi
if [[ -f "src/build_events.json" && "$activated_by" == "mongodb-mongo-ci-user" ]]; then
echo "Tests were executed by the resmoke_tests task, test results will be fetched from their remote execution."
exit 0
@ -56,10 +59,14 @@ build_ci_flags() {
export compile_variant="${compile_variant}"
export version_id="${version_id}"
if [[ "${evergreen_remote_exec}" == "on" ]]; then
if [[ "${evergreen_remote_exec}" == "on" && "${resmoke_disable_rbe}" != "true" ]]; then
ci_flags="--config=remote_test ${ci_flags}"
fi
if [[ "${resmoke_disable_rbe}" == "true" ]]; then
ci_flags+=" --//bazel/resmoke:installed_dist_test"
fi
if [ "${should_shuffle}" = true ]; then
ci_flags+=" --test_arg=--shuffle"
elif [ "${should_shuffle}" = false ]; then
@ -112,8 +119,20 @@ maybe_generate_burn_in_targets() {
# Fetches then tests with retries. Leaves the result in the global RET.
run_fetch_and_test() {
local fetch_attempts=3
local test_attempts=2
if [[ "${resmoke_disable_rbe}" == "true" ]]; then
# Local exec runs a full suite serially on a single host; retrying would just
# repeat hours of work. Cap to a single attempt and extend the bazel-level
# timeout well beyond the remote-exec default so the run can finish.
fetch_attempts=1
test_attempts=1
build_timeout_seconds=14400
export build_timeout_seconds
fi
export RETRY_ON_FAIL=1
bazel_evergreen_shutils::retry_bazel_cmd 3 "$BAZEL_BINARY" \
bazel_evergreen_shutils::retry_bazel_cmd $fetch_attempts "$BAZEL_BINARY" \
fetch ${ci_flags} ${bazel_args} ${bazel_compile_flags} ${task_compile_flags} ${patch_compile_flags} ${targets}
RET=$?
@ -122,7 +141,7 @@ run_fetch_and_test() {
fi
export RETRY_ON_FAIL=0
bazel_evergreen_shutils::retry_bazel_cmd 2 "$BAZEL_BINARY" \
bazel_evergreen_shutils::retry_bazel_cmd $test_attempts "$BAZEL_BINARY" \
test ${ci_flags} ${bazel_args} ${bazel_compile_flags} ${task_compile_flags} ${patch_compile_flags} --build_event_json_file=build_events.json ${targets}
RET=$?
@ -152,7 +171,11 @@ activate_result_tasks() {
return
fi
echo "Activating result task group..."
python buildscripts/evergreen_activate_result_tasks.py --expansion-file ../expansions.yml --build-events-file build_events.json
local extra_args=""
if [[ "${resmoke_disable_rbe}" != "true" ]]; then
extra_args="--build-events-file build_events.json"
fi
python buildscripts/evergreen_activate_result_tasks.py --expansion-file ../expansions.yml ${extra_args}
}
main() {
@ -178,6 +201,12 @@ main() {
maybe_generate_burn_in_targets
if [[ "${resmoke_disable_rbe}" == "true" && -z "$result_task" ]]; then
# Local exec runner: skip bazel entirely; each result task will run its own bazel test.
activate_result_tasks
exit 0
fi
set +o errexit
run_fetch_and_test
bazel_evergreen_shutils::write_last_engflow_link