SERVER-117045 Trigger core analysis tasks from bazel resmoke tasks (#47599)
GitOrigin-RevId: 3d697ff268a027bf991f42a9a066bc731e330a25
This commit is contained in:
parent
11d409d41b
commit
8ea21ae1d3
2
.github/CODEOWNERS
vendored
2
.github/CODEOWNERS
vendored
@ -98,6 +98,8 @@ WORKSPACE.bazel @10gen/devprod-build @svc-auto-approve-bot
|
||||
|
||||
# The following patterns are parsed from ./buildscripts/bazel_testbuilds/OWNERS.yml
|
||||
/buildscripts/bazel_testbuilds/ @10gen/devprod-build @svc-auto-approve-bot
|
||||
/buildscripts/bazel_testbuilds/jstest_timeout* @10gen/devprod-correctness @svc-auto-approve-bot
|
||||
/buildscripts/bazel_testbuilds/verify_resmoke_coredump_test.sh @10gen/devprod-correctness @svc-auto-approve-bot
|
||||
|
||||
# The following patterns are parsed from ./buildscripts/cost_model/OWNERS.yml
|
||||
/buildscripts/cost_model/**/* @10gen/query-optimization @svc-auto-approve-bot
|
||||
|
||||
@ -34,7 +34,7 @@ fi
|
||||
main_pid=$!
|
||||
echo "Process-under-test started with PID: ${main_pid}"
|
||||
|
||||
# This is mocked out in buildscripts/bazel_testbuilds/verify_coredump_test.sh, make sure
|
||||
# This is mocked out in buildscripts/bazel_testbuilds/verify_unittest_coredump_test.sh, make sure
|
||||
# to update the test if this is changed.
|
||||
timeout_seconds=600
|
||||
|
||||
|
||||
@ -481,6 +481,7 @@ py_binary(
|
||||
srcs = [
|
||||
"create_rbe_sysroot.py",
|
||||
],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"local_rbe_container_url",
|
||||
],
|
||||
|
||||
@ -143,6 +143,7 @@ def main(outfile: Annotated[str, typer.Option()], build_events: str = "build_eve
|
||||
"content_type": "text/plain",
|
||||
},
|
||||
),
|
||||
FunctionCall("generate result task hang analyzer"),
|
||||
],
|
||||
teardown_group=[
|
||||
FunctionCall("kill processes"),
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
# Bazel build definitions used to test bazel features during build system development.
|
||||
|
||||
load("//bazel:mongo_src_rules.bzl", "mongo_cc_binary", "mongo_cc_unit_test")
|
||||
load("//bazel/resmoke:resmoke.bzl", "resmoke_suite_test")
|
||||
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
@ -42,3 +43,25 @@ mongo_cc_unit_test(
|
||||
"manual",
|
||||
],
|
||||
)
|
||||
|
||||
resmoke_suite_test(
|
||||
name = "jstest_timeout",
|
||||
srcs = [
|
||||
"jstest_timeout.js",
|
||||
],
|
||||
config = "jstest_timeout.yml",
|
||||
data = [
|
||||
"//buildscripts/resmokeconfig:common_jstest_data",
|
||||
],
|
||||
resmoke_args = [
|
||||
"--testTimeout=10", # The test sleeps for 30 seconds, so it should get killed.
|
||||
],
|
||||
tags = [
|
||||
# Manual tag to prevent this from running in normal test suites
|
||||
# since it's designed to fail.
|
||||
"manual",
|
||||
],
|
||||
deps = [
|
||||
"//src/mongo/shell:mongo",
|
||||
],
|
||||
)
|
||||
|
||||
@ -3,3 +3,9 @@ filters:
|
||||
- "*":
|
||||
approvers:
|
||||
- 10gen/devprod-build
|
||||
- "jstest_timeout*":
|
||||
approvers:
|
||||
- 10gen/devprod-correctness
|
||||
- "verify_resmoke_coredump_test.sh":
|
||||
approvers:
|
||||
- 10gen/devprod-correctness
|
||||
|
||||
1
buildscripts/bazel_testbuilds/jstest_timeout.js
Normal file
1
buildscripts/bazel_testbuilds/jstest_timeout.js
Normal file
@ -0,0 +1 @@
|
||||
sleep(30000);
|
||||
6
buildscripts/bazel_testbuilds/jstest_timeout.yml
Normal file
6
buildscripts/bazel_testbuilds/jstest_timeout.yml
Normal file
@ -0,0 +1,6 @@
|
||||
test_kind: js_test
|
||||
|
||||
executor:
|
||||
config:
|
||||
shell_options:
|
||||
nodb: ""
|
||||
122
buildscripts/bazel_testbuilds/verify_resmoke_coredump_test.sh
Executable file
122
buildscripts/bazel_testbuilds/verify_resmoke_coredump_test.sh
Executable file
@ -0,0 +1,122 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Test script to verify that the test timeouts in resmoke_suite_test generate coredumps and are picked up for core analysis tasks.
|
||||
#
|
||||
# This script:
|
||||
# 1. Runs a `bazel test` on a resmoke suite that is expected to fail and generate a core.
|
||||
# 2. Runs evergreen/fetch_remote_test_results.sh, which downloads test outputs for the remotely executed test.
|
||||
# 3. Runs the gen_hang_analyzer_tasks script that generates an Evergreen task config for core analysis.
|
||||
#
|
||||
# Usage:
|
||||
# ./buildscripts/bazel_testbuilds/verify_resmoke_coredump_test.sh
|
||||
#
|
||||
# Exit codes:
|
||||
# 0 - Success (coredump and generate task config were created)
|
||||
# 1 - Failure
|
||||
|
||||
set -o pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
|
||||
|
||||
# Change to repo root for bazel commands
|
||||
cd "$REPO_ROOT" || exit 1
|
||||
|
||||
TEST_TARGET="//buildscripts/bazel_testbuilds:jstest_timeout"
|
||||
|
||||
# Cleanup function to remove temp directories
|
||||
TEMP_DIR=""
|
||||
cleanup() {
|
||||
# Clean up temp directory if it was created
|
||||
if [[ -n "${TEMP_DIR}" && -d "${TEMP_DIR}" ]]; then
|
||||
rm -rf "${TEMP_DIR}" 2>/dev/null || true
|
||||
fi
|
||||
bazel shutdown
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
echo "=== Coredump Generation Verification Test ==="
|
||||
echo "Test target: ${TEST_TARGET}"
|
||||
echo "Repository root: ${REPO_ROOT}"
|
||||
echo ""
|
||||
|
||||
# Run the bazel test (expected to fail). --platforms is used so the test runs in the same environment regardless of the host.
|
||||
echo "Running bazel test (this test is expected to fail)..."
|
||||
echo "Command: bazel test --config=remote_test --zip_undeclared_test_outputs --build_event_json_file=build_events.json ${TEST_TARGET}"
|
||||
echo ""
|
||||
|
||||
# Use --curses=no and --color=no to prevent interactive output that might cause hangs in CI.
|
||||
bazel test --config=remote_test --zip_undeclared_test_outputs --build_event_json_file=build_events.json --curses=no --color=no "${TEST_TARGET}" 2>&1 && BAZEL_EXIT_CODE=0 || BAZEL_EXIT_CODE=$?
|
||||
|
||||
echo ""
|
||||
echo "Bazel test exit code: ${BAZEL_EXIT_CODE}"
|
||||
echo ""
|
||||
|
||||
# The test should fail (exit code != 0)
|
||||
if [[ "${BAZEL_EXIT_CODE}" -eq 0 ]]; then
|
||||
echo "ERROR: Test unexpectedly passed. The timeout mechanism may not have triggered."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Test failed as expected. Now fetching remote test results..."
|
||||
echo ""
|
||||
|
||||
# Fetch the remote test results. In reality this would be run a on different host than the one that ran `bazel test`.
|
||||
TEMP_DIR=$(mktemp -d)
|
||||
export ENGFLOW_KEY="${workdir}/src/engflow.key"
|
||||
export ENGFLOW_CERT="${workdir}/src/engflow.cert"
|
||||
export workdir="$TEMP_DIR" # Change workdir so the script downloads outputs to the temporary dir, rather than task workdir.
|
||||
export test_label="$TEST_TARGET"
|
||||
bash ./evergreen/fetch_remote_test_results.sh
|
||||
echo ""
|
||||
unset workdir # Unset workdir, it's a default Evergreen expansion that might confuse a later script.
|
||||
|
||||
OUTPUTS_DIR="${TEMP_DIR}"/results/buildscripts/bazel_testbuilds/jstest_timeout/shard_1/test.outputs
|
||||
# List all files in the test output directory for debugging.
|
||||
if [[ -d "${OUTPUTS_DIR}" ]]; then
|
||||
echo "Contents of ${OUTPUTS_DIR}:"
|
||||
find "${OUTPUTS_DIR}" -type f 2>/dev/null | head -50
|
||||
echo ""
|
||||
else
|
||||
echo "FAILED: Test output directory not found: ${OUTPUTS_DIR}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Look for the expected core file.
|
||||
echo "Searching for coredump files in ${OUTPUTS_DIR}..."
|
||||
CORE_FILES=$(find "${OUTPUTS_DIR}" -type f \( -name "*.core" -o -name "*.core.gz" -o -name "dump_*.core*" \) 2>/dev/null)
|
||||
COREDUMP_FOUND=0
|
||||
COREDUMP_FILE=""
|
||||
if [[ -n "${CORE_FILES}" ]]; then
|
||||
COREDUMP_FOUND=1
|
||||
COREDUMP_FILE=$(echo "${CORE_FILES}" | head -1)
|
||||
echo "SUCCESS: Coredump file(s) found:"
|
||||
echo "${CORE_FILES}"
|
||||
else
|
||||
echo "FAILED: No coredump files found."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Create an expansions file that is like what will exist in the tests tasks.
|
||||
EXPANSIONS_FILE="${TEMP_DIR}/expansions.yml"
|
||||
cat <<EOF >"${EXPANSIONS_FILE}"
|
||||
core_analyzer_distro_name: amazon2023-arm64-atlas-latest-m8g-2xlarge
|
||||
task_name: "${TEST_TARGET}"
|
||||
task_id: task_id_123
|
||||
execution: 0
|
||||
build_variant: build_variant_123
|
||||
core_analyzer_results_url: https://core_analyzer_results_url
|
||||
workdir: "${TEMP_DIR}"
|
||||
EOF
|
||||
|
||||
GENERATED_TASK_FILE="${TEMP_DIR}/generated_tasks.json"
|
||||
bazel run //buildscripts/resmokelib/hang_analyzer:gen_hang_analyzer_tasks --config=remote_test -- --expansions-file="${EXPANSIONS_FILE}" --output-file="${GENERATED_TASK_FILE}" --tests-use-bazel --use-mock-tasks
|
||||
|
||||
if [[ -f "${GENERATED_TASK_FILE}" ]]; then
|
||||
echo "SUCCESS: Created the Evergreen task config ${GENERATED_TASK_FILE}"
|
||||
cat "${GENERATED_TASK_FILE}"
|
||||
echo ""
|
||||
else
|
||||
echo "FAILED: Did not generate an Evergreen task config at ${GENERATED_TASK_FILE}"
|
||||
exit 1
|
||||
fi
|
||||
@ -7,7 +7,7 @@
|
||||
# 2. Verifies that a coredump file is created in the test outputs
|
||||
#
|
||||
# Usage:
|
||||
# ./buildscripts/bazel_testbuilds/verify_coredump_test.sh
|
||||
# ./buildscripts/bazel_testbuilds/verify_unittest_coredump_test.sh
|
||||
#
|
||||
# Exit codes:
|
||||
# 0 - Success (coredump was created)
|
||||
@ -12,32 +12,38 @@ if __name__ == "__main__" and __package__ is None:
|
||||
from buildscripts.local_rbe_container_url import calculate_local_rbe_container_url
|
||||
|
||||
|
||||
def main():
|
||||
os.chdir(os.environ.get("BUILD_WORKSPACE_DIRECTORY", "."))
|
||||
def create_rbe_sysroot(dir) -> bool:
|
||||
container_url = calculate_local_rbe_container_url()
|
||||
if container_url == "UNKNOWN":
|
||||
print("Could not determine local RBE container URL, cannot create rbe sysroot")
|
||||
return 1
|
||||
return False
|
||||
|
||||
print(f"Using local RBE container URL: {container_url}")
|
||||
|
||||
container_cli = shutil.which("docker") or shutil.which("podman")
|
||||
if not container_cli:
|
||||
print("Error: Neither docker nor podman is installed.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
return False
|
||||
|
||||
cid = subprocess.check_output([container_cli, "create", container_url]).decode().strip()
|
||||
|
||||
os.makedirs("./rbe_sysroot", exist_ok=True)
|
||||
os.makedirs(dir, exist_ok=True)
|
||||
|
||||
subprocess.run(["sudo", container_cli, "cp", f"{cid}:/", "./rbe_sysroot/"], check=True)
|
||||
subprocess.run(["sudo", container_cli, "cp", f"{cid}:/", dir], check=True)
|
||||
|
||||
user = getpass.getuser()
|
||||
subprocess.run(["sudo", "chown", "-R", f"{user}:{user}", "./rbe_sysroot"], check=True)
|
||||
subprocess.run(["sudo", "chown", "-R", f"{user}:{user}", dir], check=True)
|
||||
subprocess.run([container_cli, "rm", cid], check=True)
|
||||
|
||||
return 0
|
||||
return True
|
||||
|
||||
|
||||
def main():
|
||||
os.chdir(os.environ.get("BUILD_WORKSPACE_DIRECTORY", "."))
|
||||
|
||||
success = create_rbe_sysroot("./rbe_sysroot")
|
||||
return not success
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit(main())
|
||||
sys.exit(main())
|
||||
|
||||
@ -30,8 +30,7 @@ mongo_js_library(
|
||||
resmoke_suite_test(
|
||||
name = "core",
|
||||
srcs = [
|
||||
"//jstests/core:all_subpackage_javascript_files",
|
||||
"//jstests/core_standalone:all_subpackage_javascript_files",
|
||||
"//jstests/core/timeseries/pbt:timeseries_cache_usage_pbt.js",
|
||||
],
|
||||
config = ":suites/core.yml",
|
||||
data = [
|
||||
@ -58,7 +57,7 @@ resmoke_suite_test(
|
||||
resmoke_args = [
|
||||
"--storageEngineCacheSizeGB=1",
|
||||
],
|
||||
shard_count = 24,
|
||||
shard_count = 1,
|
||||
tags = [
|
||||
"ci-development-critical-single-variant",
|
||||
],
|
||||
|
||||
@ -17,6 +17,7 @@ py_library(
|
||||
],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//buildscripts:create_rbe_sysroot",
|
||||
"//buildscripts:simple_report",
|
||||
"//buildscripts/resmokelib/run",
|
||||
"//buildscripts/resmokelib/symbolizer",
|
||||
@ -42,3 +43,14 @@ py_library(
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
py_binary(
|
||||
name = "gen_hang_analyzer_tasks",
|
||||
srcs = ["gen_hang_analyzer_tasks.py"],
|
||||
main = "gen_hang_analyzer_tasks.py",
|
||||
deps = [
|
||||
"//buildscripts/resmokelib",
|
||||
"//buildscripts/resmokelib/core",
|
||||
"//buildscripts/resmokelib/hang_analyzer",
|
||||
],
|
||||
)
|
||||
|
||||
@ -12,7 +12,6 @@ sys.path.append(mongo_path)
|
||||
from buildscripts.resmokelib.hang_analyzer.gen_hang_analyzer_tasks import (
|
||||
GENERATED_TASK_PREFIX,
|
||||
RANDOM_STRING_LENGTH,
|
||||
should_activate_core_analysis_task,
|
||||
)
|
||||
from buildscripts.resmokelib.utils import evergreen_conn
|
||||
from buildscripts.util.read_config import read_config_file
|
||||
@ -49,7 +48,9 @@ def maybe_attach_core_analyzer_task(
|
||||
search_tasks = build.tasks
|
||||
|
||||
# The task id uses underscores instead of hyphens
|
||||
task_id_search_term = f"{GENERATED_TASK_PREFIX}_{current_task_name.replace('-', '_')}"
|
||||
task_id_search_term = (
|
||||
f"{GENERATED_TASK_PREFIX}_{current_task_name.replace('-', '_').replace('/', '_')}"
|
||||
)
|
||||
|
||||
matching_task = None
|
||||
matching_execution = None
|
||||
@ -99,16 +100,11 @@ def maybe_attach_core_analyzer_task(
|
||||
if not gen_from_cur_execution:
|
||||
return
|
||||
|
||||
if should_activate_core_analysis_task(current_task):
|
||||
first_line = "Core analysis is in progress."
|
||||
else:
|
||||
first_line = "Core analysis was not scheduled because only archived fails were detected."
|
||||
|
||||
file_lines = [
|
||||
first_line,
|
||||
"This file will be overwritten with the results when core analysis is finished.",
|
||||
"You can view the core analysis task at this here:",
|
||||
core_analysis_task_url,
|
||||
"If it was not scheduled, it can be manually activated.",
|
||||
]
|
||||
|
||||
with open(results_output_file, "w") as file:
|
||||
|
||||
@ -10,7 +10,10 @@ from opentelemetry.trace.status import StatusCode
|
||||
|
||||
from buildscripts.resmokelib import configure_resmoke
|
||||
from buildscripts.resmokelib.hang_analyzer import dumper
|
||||
from buildscripts.resmokelib.hang_analyzer.extractor import download_task_artifacts
|
||||
from buildscripts.resmokelib.hang_analyzer.extractor import (
|
||||
download_bazel_task_artifacts,
|
||||
download_task_artifacts,
|
||||
)
|
||||
from buildscripts.resmokelib.plugin import PluginInterface, Subcommand
|
||||
from buildscripts.resmokelib.utils.otel_utils import get_default_current_span
|
||||
|
||||
@ -29,6 +32,7 @@ class CoreAnalyzer(Subcommand):
|
||||
self.boring_core_dump_pids = set(pid for pid in boring_pids_str.split(",") if pid)
|
||||
else:
|
||||
self.boring_core_dump_pids = set()
|
||||
self.is_bazel_task = options.get("is_bazel_task", False)
|
||||
self.root_logger = self.setup_logging(logger)
|
||||
self.extra_otel_options = {}
|
||||
for option in options["otel_extra_data"]:
|
||||
@ -38,6 +42,7 @@ class CoreAnalyzer(Subcommand):
|
||||
@TRACER.start_as_current_span("core_analyzer.execute")
|
||||
def execute(self):
|
||||
base_dir = self.options["working_dir"]
|
||||
sysroot_dir = None
|
||||
current_span = get_default_current_span(
|
||||
{"failed_task_id": self.task_id} | self.extra_otel_options
|
||||
)
|
||||
@ -55,24 +60,51 @@ class CoreAnalyzer(Subcommand):
|
||||
)
|
||||
|
||||
multiversion_dir = os.path.join(base_dir, "multiversion")
|
||||
if not skip_download and not download_task_artifacts(
|
||||
self.root_logger,
|
||||
self.task_id,
|
||||
base_dir,
|
||||
dumpers.dbg,
|
||||
multiversion_dir,
|
||||
self.execution,
|
||||
):
|
||||
self.root_logger.error("Artifacts were not found.")
|
||||
current_span.set_attributes(
|
||||
{
|
||||
"core_analyzer_execute_error": "Artifacts were not found.",
|
||||
}
|
||||
)
|
||||
current_span.set_status(StatusCode.ERROR, description="Artifacts were not found.")
|
||||
raise RuntimeError(
|
||||
"Artifacts were not found for specified task. Could not analyze cores."
|
||||
)
|
||||
|
||||
if self.is_bazel_task:
|
||||
if not skip_download:
|
||||
all_downloaded, sysroot_dir = download_bazel_task_artifacts(
|
||||
self.root_logger,
|
||||
self.task_id,
|
||||
base_dir,
|
||||
multiversion_dir,
|
||||
self.execution,
|
||||
)
|
||||
if not all_downloaded:
|
||||
self.root_logger.error("Artifacts were not found.")
|
||||
current_span.set_attributes(
|
||||
{
|
||||
"core_analyzer_execute_error": "Artifacts were not found.",
|
||||
}
|
||||
)
|
||||
current_span.set_status(
|
||||
StatusCode.ERROR, description="Bazel artifacts were not found."
|
||||
)
|
||||
raise RuntimeError(
|
||||
"Artifacts were not found for specified tasks. Could not analyze cores."
|
||||
)
|
||||
|
||||
else:
|
||||
if not skip_download and not download_task_artifacts(
|
||||
self.root_logger,
|
||||
self.task_id,
|
||||
base_dir,
|
||||
dumpers.dbg,
|
||||
multiversion_dir,
|
||||
self.execution,
|
||||
):
|
||||
self.root_logger.error("Artifacts were not found.")
|
||||
current_span.set_attributes(
|
||||
{
|
||||
"core_analyzer_execute_error": "Artifacts were not found.",
|
||||
}
|
||||
)
|
||||
current_span.set_status(
|
||||
StatusCode.ERROR, description="Artifacts were not found."
|
||||
)
|
||||
raise RuntimeError(
|
||||
"Artifacts were not found for specified task. Could not analyze cores."
|
||||
)
|
||||
|
||||
with open(task_id_file, "w") as file:
|
||||
file.write(self.task_id)
|
||||
@ -92,6 +124,7 @@ class CoreAnalyzer(Subcommand):
|
||||
install_dir,
|
||||
analysis_dir,
|
||||
multiversion_dir,
|
||||
sysroot_dir,
|
||||
self.gdb_index_cache,
|
||||
self.boring_core_dump_pids,
|
||||
)
|
||||
@ -146,6 +179,13 @@ class CoreAnalyzerPlugin(PluginInterface):
|
||||
help="Fetch corresponding core dumps and binaries for a given task id.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--is-bazel-task",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Indicates that this is a bazel task and should use bazel-specific artifact download.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--execution",
|
||||
"-e",
|
||||
@ -162,7 +202,7 @@ class CoreAnalyzerPlugin(PluginInterface):
|
||||
action="store",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Directory that contains binaires and debugsymbols.",
|
||||
help="Directory that contains binaries and debugsymbols.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
|
||||
@ -15,7 +15,7 @@ from abc import ABCMeta, abstractmethod
|
||||
from collections import namedtuple
|
||||
from datetime import datetime, timedelta
|
||||
from io import StringIO
|
||||
from typing import List, Tuple
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
import psutil
|
||||
from opentelemetry import trace
|
||||
@ -739,6 +739,7 @@ class GDBDumper(Dumper):
|
||||
install_dir: str,
|
||||
analysis_dir: str,
|
||||
multiversion_dir: str,
|
||||
sysroot_dir: Optional[str],
|
||||
gdb_index_cache: str,
|
||||
boring_core_dump_pids: set = None,
|
||||
max_core_dumps: int = 10,
|
||||
@ -783,6 +784,7 @@ class GDBDumper(Dumper):
|
||||
exit_code, status = self.analyze_core(
|
||||
core_file_path=core_file_path,
|
||||
install_dir=install_dir,
|
||||
sysroot_dir=sysroot_dir,
|
||||
analysis_dir=analysis_dir,
|
||||
tmp_dir=tmp_dir,
|
||||
logger=logger,
|
||||
@ -841,6 +843,7 @@ class GDBDumper(Dumper):
|
||||
self,
|
||||
core_file_path: str,
|
||||
install_dir: str,
|
||||
sysroot_dir: Optional[str],
|
||||
analysis_dir: str,
|
||||
tmp_dir: str,
|
||||
multiversion_dir: str,
|
||||
@ -881,6 +884,9 @@ class GDBDumper(Dumper):
|
||||
logging_dir = os.path.join(analysis_dir, basename)
|
||||
os.makedirs(logging_dir, exist_ok=True)
|
||||
|
||||
if sysroot_dir:
|
||||
cmds.append(f"set sysroot {sysroot_dir}")
|
||||
|
||||
cmds += [
|
||||
f"set solib-search-path {lib_dir}",
|
||||
f"set index-cache directory {tmp_dir}",
|
||||
|
||||
@ -12,6 +12,7 @@ import sys
|
||||
import tarfile
|
||||
import time
|
||||
import urllib.request
|
||||
import zipfile
|
||||
from logging import Logger
|
||||
from pathlib import Path
|
||||
from typing import Callable, Optional
|
||||
@ -20,8 +21,12 @@ from opentelemetry import trace
|
||||
from opentelemetry.trace.status import StatusCode
|
||||
from retry import retry
|
||||
|
||||
from buildscripts.create_rbe_sysroot import create_rbe_sysroot
|
||||
from buildscripts.resmokelib.hang_analyzer.dumper import Dumper
|
||||
from buildscripts.resmokelib.setup_multiversion.download import DownloadError
|
||||
from buildscripts.resmokelib.setup_multiversion.download import (
|
||||
DownloadError,
|
||||
download_from_s3_with_requests,
|
||||
)
|
||||
from buildscripts.resmokelib.setup_multiversion.setup_multiversion import (
|
||||
SetupMultiversion,
|
||||
_DownloadOptions,
|
||||
@ -441,10 +446,12 @@ def post_install_gdb_optimization(download_dir: str, root_looger: Logger):
|
||||
|
||||
root_looger.debug("Finished recalculating the debuglink for %s", file_path)
|
||||
|
||||
dist_dir = os.path.join(download_dir, "install", "dist-test")
|
||||
bin_dir = os.path.join(dist_dir, "bin")
|
||||
install_dir = os.path.join(download_dir, "install")
|
||||
if os.path.exists(os.path.join(install_dir, "dist-test")):
|
||||
install_dir = os.path.join(install_dir, "dist-test")
|
||||
bin_dir = os.path.join(install_dir, "bin")
|
||||
bin_files = [os.path.join(bin_dir, file_path) for file_path in os.listdir(bin_dir)]
|
||||
lib_dir = os.path.join(dist_dir, "lib")
|
||||
lib_dir = os.path.join(install_dir, "lib")
|
||||
lib_files = []
|
||||
if os.path.exists(lib_dir):
|
||||
lib_files = [os.path.join(lib_dir, file_path) for file_path in os.listdir(lib_dir)]
|
||||
@ -737,3 +744,261 @@ def _get_symbol_files():
|
||||
for needle in glob.glob(haystack):
|
||||
out.append((needle, os.path.join(os.getcwd(), os.path.basename(needle))))
|
||||
return out
|
||||
|
||||
|
||||
def check_manifest_for_cores(root_logger: Logger, manifest_url: str) -> bool:
|
||||
"""Check if a test.outputs manifest contains core dumps."""
|
||||
try:
|
||||
with urllib.request.urlopen(manifest_url) as response:
|
||||
manifest_content = response.read().decode("utf-8")
|
||||
|
||||
has_cores = ".core" in manifest_content or ".mdmp" in manifest_content
|
||||
if has_cores:
|
||||
root_logger.info("Manifest indicates core dumps are present")
|
||||
else:
|
||||
root_logger.info("Manifest indicates no core dumps")
|
||||
return has_cores
|
||||
except Exception as ex:
|
||||
root_logger.warning(
|
||||
f"Could not read manifest: {ex}. Will download entire test outputs and check for cores."
|
||||
)
|
||||
return True # If we can't read the manifest, assume cores might be present
|
||||
|
||||
|
||||
@TRACER.start_as_current_span("core_analyzer.download_bazel_result_task_cores")
|
||||
def download_bazel_result_task_cores(root_logger: Logger, task_id: str, download_dir: str) -> bool:
|
||||
root_logger.info(f"Downloading cores from task {task_id}")
|
||||
current_span = get_default_current_span({"download_task_id": task_id})
|
||||
|
||||
evg_api = evergreen_conn.get_evergreen_api()
|
||||
task_info = evg_api.task_by_id(task_id)
|
||||
|
||||
core_dumps_dir = os.path.join(download_dir, "core-dumps")
|
||||
os.makedirs(core_dumps_dir, exist_ok=True)
|
||||
|
||||
outputs_artifacts = []
|
||||
manifest_map = {} # Map of outputs zip archive to its manifest
|
||||
for artifact in task_info.artifacts:
|
||||
if "test.outputs" in artifact.name and artifact.name.endswith(".zip"):
|
||||
outputs_artifacts.append(artifact)
|
||||
elif "_manifest__MANIFEST" in artifact.name:
|
||||
manifest_map[artifact.name.replace("_manifest__MANIFEST", "__outputs.zip")] = (
|
||||
artifact.url
|
||||
)
|
||||
|
||||
if not outputs_artifacts:
|
||||
root_logger.warning("No test.outputs artifacts found in result task")
|
||||
return False
|
||||
|
||||
core_dumps_found = 0
|
||||
|
||||
for artifact in outputs_artifacts:
|
||||
root_logger.info(f"Processing artifact: {artifact.name}")
|
||||
|
||||
# Check manifest first to see if cores are present
|
||||
manifest_url = manifest_map.get(artifact.name)
|
||||
if manifest_url:
|
||||
if not check_manifest_for_cores(root_logger, manifest_url):
|
||||
root_logger.info(f"Skipping {artifact.name} - no cores in manifest")
|
||||
continue
|
||||
else:
|
||||
root_logger.warning(f"No manifest found for {artifact.name}, will download anyway")
|
||||
|
||||
file_name = artifact.name
|
||||
zip_path = os.path.join(download_dir, file_name)
|
||||
|
||||
try:
|
||||
|
||||
@retry(tries=3, delay=5)
|
||||
def download_outputs_zip():
|
||||
root_logger.info(f"Downloading {file_name}")
|
||||
if os.path.exists(zip_path):
|
||||
os.remove(zip_path)
|
||||
download_from_s3_with_requests(artifact.url, zip_path)
|
||||
|
||||
download_outputs_zip()
|
||||
|
||||
with zipfile.ZipFile(zip_path, "r") as zip_ref:
|
||||
for member in zip_ref.namelist():
|
||||
if member.endswith(".core") or member.endswith(".mdmp"):
|
||||
core_name = os.path.basename(member)
|
||||
extract_path = os.path.join(core_dumps_dir, core_name)
|
||||
root_logger.info(f"Extracting core dump: {core_name}")
|
||||
|
||||
with zip_ref.open(member) as source, open(extract_path, "wb") as target:
|
||||
shutil.copyfileobj(source, target)
|
||||
|
||||
core_dumps_found += 1
|
||||
|
||||
os.remove(zip_path)
|
||||
|
||||
except Exception as ex:
|
||||
root_logger.error(f"Error processing artifact {artifact.name}: {ex}")
|
||||
current_span.set_status(
|
||||
StatusCode.ERROR, f"Failed to download artifact {artifact.name}"
|
||||
)
|
||||
current_span.set_attribute("download_error", str(ex))
|
||||
|
||||
root_logger.info(f"Downloaded {core_dumps_found} core dump(s)")
|
||||
|
||||
if core_dumps_found == 0:
|
||||
root_logger.error("No core dumps found in test.outputs")
|
||||
current_span.set_status(StatusCode.ERROR, "No core dumps found")
|
||||
return False
|
||||
|
||||
current_span.set_attribute("core_dumps_found", core_dumps_found)
|
||||
return True
|
||||
|
||||
|
||||
def find_test_task_with_binaries(evg_api, results_task_id: str):
|
||||
"""
|
||||
Find the test task in the same build that has dist-tests artifacts.
|
||||
|
||||
:param evg_api: Evergreen API client
|
||||
:param results_task_id:
|
||||
:return: Task ID of the resmoke_tests task, or None if not found
|
||||
"""
|
||||
|
||||
try:
|
||||
task = evg_api.task_by_id(results_task_id)
|
||||
tasks = evg_api.tasks_by_build(task.build_id)
|
||||
|
||||
if "_burn_in_" in task.display_name:
|
||||
resmoke_tests_task = list(
|
||||
filter(lambda t: t.display_name.startswith("resmoke_tests_burn_in"), tasks)
|
||||
)
|
||||
else:
|
||||
resmoke_tests_task = list(filter(lambda t: t.display_name == "resmoke_tests", tasks))
|
||||
assert (
|
||||
len(resmoke_tests_task) == 1
|
||||
), f"Could not find a unique resmoke test task in this variant {task.build_variant_display_name}"
|
||||
|
||||
return resmoke_tests_task[0]
|
||||
|
||||
except Exception as ex:
|
||||
print(f"ERROR: Failed to query Evergreen for test task: {ex}")
|
||||
return None
|
||||
|
||||
|
||||
@TRACER.start_as_current_span("core_analyzer.download_bazel_test_task_binaries")
|
||||
def download_bazel_test_task_binaries(root_logger: Logger, task_id: str, download_dir: str) -> bool:
|
||||
evg_api = evergreen_conn.get_evergreen_api()
|
||||
resmoke_task = find_test_task_with_binaries(evg_api, task_id)
|
||||
|
||||
root_logger.info(f"Downloading binaries from task {resmoke_task.task_id}")
|
||||
|
||||
dist_tests_artifacts = [
|
||||
a for a in resmoke_task.artifacts if "Test binaries and libraries" in a.name
|
||||
]
|
||||
if not dist_tests_artifacts:
|
||||
root_logger.error("No binary archive found in the resmoke_test task")
|
||||
return False
|
||||
|
||||
install_dir = os.path.join(download_dir, "install")
|
||||
os.makedirs(install_dir, exist_ok=True)
|
||||
|
||||
for artifact in dist_tests_artifacts:
|
||||
file_name = "resmoke_tests.tgz"
|
||||
download_path = os.path.join(download_dir, file_name)
|
||||
|
||||
try:
|
||||
|
||||
@retry(tries=3, delay=5)
|
||||
def download_binary_artifact():
|
||||
root_logger.info(f"Downloading {file_name}")
|
||||
if os.path.exists(download_path):
|
||||
os.remove(download_path)
|
||||
download_from_s3_with_requests(artifact.url, download_path)
|
||||
|
||||
download_binary_artifact()
|
||||
|
||||
root_logger.info(f"Extracting {file_name}")
|
||||
|
||||
with tarfile.open(download_path, "r:gz") as tar:
|
||||
# Extract members, mapping dist-tests -> dist-test
|
||||
for member in tar.getmembers():
|
||||
if member.name.startswith("dist-tests/"):
|
||||
member.name = member.name.replace("dist-tests/", "dist-test/", 1)
|
||||
tar.extract(member, install_dir)
|
||||
|
||||
os.remove(download_path)
|
||||
|
||||
except Exception as ex:
|
||||
root_logger.error(f"Error downloading/extracting {file_name}: {ex}")
|
||||
return False
|
||||
|
||||
root_logger.info("Successfully downloaded and extracted binaries")
|
||||
return True
|
||||
|
||||
|
||||
@TRACER.start_as_current_span("core_analyzer.download_bazel_task_artifacts")
|
||||
def download_bazel_task_artifacts(
|
||||
root_logger: Logger,
|
||||
task_id: str,
|
||||
download_dir: str,
|
||||
retry_secs: int = 10,
|
||||
download_timeout_secs: int = 30 * 60,
|
||||
) -> bool:
|
||||
if os.path.exists(download_dir):
|
||||
# quick sanity check to ensure we don't delete a repo
|
||||
if os.path.exists(os.path.join(download_dir, ".git")):
|
||||
raise RuntimeError(f"Input dir cannot be a git repo: {download_dir}")
|
||||
|
||||
shutil.rmtree(download_dir)
|
||||
root_logger.info(f"Deleted existing dir at {download_dir}")
|
||||
|
||||
os.mkdir(download_dir)
|
||||
|
||||
current_span = get_default_current_span({"task_id": task_id})
|
||||
|
||||
# Download cores and binaries in parallel
|
||||
with OtelThreadPoolExecutor() as executor:
|
||||
futures = []
|
||||
|
||||
# Download core dumps from result task
|
||||
futures.append(
|
||||
executor.submit(
|
||||
run_with_retries,
|
||||
root_logger=root_logger,
|
||||
func=download_bazel_result_task_cores,
|
||||
timeout_secs=download_timeout_secs,
|
||||
retry_secs=retry_secs,
|
||||
task_id=task_id,
|
||||
download_dir=download_dir,
|
||||
)
|
||||
)
|
||||
|
||||
# Download binaries from test task
|
||||
futures.append(
|
||||
executor.submit(
|
||||
run_with_retries,
|
||||
root_logger=root_logger,
|
||||
func=download_bazel_test_task_binaries,
|
||||
timeout_secs=download_timeout_secs,
|
||||
retry_secs=retry_secs,
|
||||
task_id=task_id,
|
||||
download_dir=download_dir,
|
||||
)
|
||||
)
|
||||
|
||||
all_downloaded = True
|
||||
for future in concurrent.futures.as_completed(futures):
|
||||
if not future.result():
|
||||
current_span.set_status(
|
||||
StatusCode.ERROR, "Errors occurred while fetching artifacts"
|
||||
)
|
||||
current_span.set_attribute(
|
||||
"download_bazel_task_artifacts_error",
|
||||
"Errors occurred while fetching artifacts",
|
||||
)
|
||||
root_logger.error("Errors occurred while fetching bazel artifacts")
|
||||
all_downloaded = False
|
||||
break
|
||||
|
||||
if all_downloaded and sys.platform.startswith("linux"):
|
||||
sysroot = os.path.join(download_dir, "rbe_sysroot")
|
||||
create_rbe_sysroot(sysroot)
|
||||
|
||||
post_install_gdb_optimization(download_dir, root_logger)
|
||||
|
||||
return all_downloaded, sysroot
|
||||
|
||||
@ -2,6 +2,7 @@
|
||||
"""Generate a task to run core analysis on uploaded core dumps in evergreen."""
|
||||
|
||||
import argparse
|
||||
import glob
|
||||
import json
|
||||
import os
|
||||
import pathlib
|
||||
@ -9,14 +10,15 @@ import random
|
||||
import re
|
||||
import string
|
||||
import sys
|
||||
from typing import List
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import List, NamedTuple, Optional
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from shrub.v2 import BuildVariant, FunctionCall, ShrubProject, Task, TaskDependency
|
||||
from shrub.v2.command import BuiltInCommand
|
||||
|
||||
from buildscripts.resmokelib.core.process import BORING_CORE_DUMP_PIDS_FILE
|
||||
from buildscripts.resmokelib.hang_analyzer import dumper
|
||||
from evergreen.api import RetryingEvergreenApi
|
||||
from evergreen.task import Task as EvgTask
|
||||
|
||||
mongo_path = pathlib.Path(__file__).parents[3]
|
||||
@ -32,6 +34,288 @@ LOCAL_BIN_DIR = os.path.join("dist-test", "bin")
|
||||
MULTIVERSION_BIN_DIR = os.path.normpath("/data/multiversion")
|
||||
|
||||
|
||||
class CoreInfo(NamedTuple):
|
||||
path: str
|
||||
binary_name: str
|
||||
pid: str
|
||||
marked_boring: bool
|
||||
|
||||
|
||||
class CoreAnalysisTaskGenerator(ABC):
|
||||
@abstractmethod
|
||||
def get_core_analyzer_commands(
|
||||
self,
|
||||
task_id: str,
|
||||
execution: str,
|
||||
core_analyzer_results_url: str,
|
||||
gdb_index_cache: str,
|
||||
has_interesting_core_dumps: bool,
|
||||
boring_core_dump_pids: set,
|
||||
) -> List[FunctionCall]:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def find_cores(self) -> list[CoreInfo]:
|
||||
pass
|
||||
|
||||
def get_core_analysis_task_dependencies(self, compile_variant: str) -> set[TaskDependency]:
|
||||
return []
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
expansions_file: str = "../expansions.yml",
|
||||
use_mock_tasks: bool = False,
|
||||
):
|
||||
self.expansions = read_config_file(expansions_file)
|
||||
|
||||
if use_mock_tasks:
|
||||
task = MagicMock()
|
||||
task.display_name = "resmoke_tests"
|
||||
task.id = "resmoke_tests_task_id_123"
|
||||
build = MagicMock()
|
||||
build.get_tasks.return_value = [task]
|
||||
self.evg_api = MagicMock()
|
||||
self.evg_api.build_by_id.return_value = build
|
||||
else:
|
||||
try:
|
||||
self.evg_api = evergreen_conn.get_evergreen_api()
|
||||
except RuntimeError:
|
||||
print(
|
||||
"WARNING: Cannot generate core analysis because the evergreen api file could not be found.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
print(
|
||||
"This is probably not an error, if you want core analysis to run on this task make sure",
|
||||
file=sys.stderr,
|
||||
)
|
||||
print(
|
||||
"the evergreen function 'configure evergreen api credentials' is called before this task",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return None
|
||||
|
||||
def generate(self) -> Optional[dict]:
|
||||
if not sys.platform.startswith("linux"):
|
||||
print("This platform is not supported, skipping core analysis task generation.")
|
||||
return None
|
||||
|
||||
# gather information from the current task being run
|
||||
distro = None
|
||||
for distro_expansion in ["core_analyzer_distro_name", "large_distro_name", "distro_id"]:
|
||||
if distro := self.expansions.get(distro_expansion, None):
|
||||
break
|
||||
assert distro is not None
|
||||
current_task_name = self.expansions.get("task_name")
|
||||
task_id = self.expansions.get("task_id")
|
||||
execution = self.expansions.get("execution")
|
||||
gdb_index_cache = (
|
||||
"off" if self.expansions.get("core_analyzer_gdb_index_cache") == "off" else "on"
|
||||
)
|
||||
build_variant_name = self.expansions.get("build_variant")
|
||||
core_analyzer_results_url = self.expansions.get("core_analyzer_results_url")
|
||||
compile_variant = self.expansions.get("compile_variant")
|
||||
|
||||
task_info = self.evg_api.task_by_id(task_id)
|
||||
|
||||
skip_variants = ["commit-queue"]
|
||||
if task_info.build_variant in skip_variants:
|
||||
print(f"Skipping core analysis task generation for variant: {task_info.build_variant}")
|
||||
return None
|
||||
|
||||
# make sure we are not creating an infinite loop by generating a task from another generated task
|
||||
if current_task_name.startswith(GENERATED_TASK_PREFIX):
|
||||
print(
|
||||
f"Skipping task generation because {current_task_name} starts with {GENERATED_TASK_PREFIX}"
|
||||
)
|
||||
return None
|
||||
|
||||
cores = self.find_cores()
|
||||
boring_cores = [core for core in cores if core.marked_boring]
|
||||
interesting_cores = [core for core in cores if not core.marked_boring]
|
||||
boring_core_dump_pids = set([core.pid for core in boring_cores])
|
||||
|
||||
if not cores:
|
||||
print("No core dumps found.")
|
||||
return None
|
||||
|
||||
print(f"Detected core dumps: {[core.path for core in cores]}")
|
||||
print(f"Core dumps marked as boring by resmoke: {[core.path for core in boring_cores]}")
|
||||
|
||||
if not interesting_cores:
|
||||
print(
|
||||
"No interesting core dumps were found. Not activating the core analysis task. It is still generated, but must be manually activated."
|
||||
)
|
||||
should_activate = len(interesting_cores) > 0 and not self._should_skip_task(task_info)
|
||||
|
||||
build_variant = BuildVariant(name=build_variant_name)
|
||||
commands = self.get_core_analyzer_commands(
|
||||
task_id,
|
||||
execution,
|
||||
core_analyzer_results_url,
|
||||
gdb_index_cache,
|
||||
should_activate,
|
||||
boring_core_dump_pids,
|
||||
)
|
||||
|
||||
deps = self.get_core_analysis_task_dependencies(compile_variant)
|
||||
|
||||
sub_tasks = set(
|
||||
[Task(get_generated_task_name(current_task_name, execution), commands, deps)]
|
||||
)
|
||||
|
||||
build_variant.add_tasks(sub_tasks, distros=[distro], activate=should_activate)
|
||||
|
||||
shrub_project = ShrubProject.empty()
|
||||
shrub_project.add_build_variant(build_variant)
|
||||
|
||||
# shrub.py currently does not support adding task deps that override the variant deps
|
||||
output_dict = shrub_project.as_dict()
|
||||
deps_list = []
|
||||
for dep in deps:
|
||||
deps_list.append(dep.as_dict())
|
||||
for variant in output_dict["buildvariants"]:
|
||||
for task in variant["tasks"]:
|
||||
task["depends_on"] = deps_list
|
||||
|
||||
return output_dict
|
||||
|
||||
def _should_skip_task(self, task: EvgTask) -> bool:
|
||||
# We hardcode some task names where the core analysis is extending the long pole
|
||||
# of required patch builds by 100 mins and the BFs are taking too long to fix.
|
||||
# This list is a quick fix to improve development velocity.
|
||||
# TODO(SERVER-118661): Remove disagg suites from skip list.
|
||||
skip_tasks = [
|
||||
"disagg_repl_jscore_passthrough",
|
||||
"disagg_repl_jscore_passthrough_secondary_reads",
|
||||
"disagg_sharded_colls_jscore_passthrough_secondary_reads_with_balancer",
|
||||
"disagg_two_nodes_repl_jscore_passthrough",
|
||||
"no_passthrough_disagg_override",
|
||||
]
|
||||
|
||||
current_task_name = task.display_name
|
||||
if task.parent_task_id:
|
||||
parent_task = self.evg_api.task_by_id(task.parent_task_id)
|
||||
current_task_name = parent_task.display_name
|
||||
if current_task_name in skip_tasks:
|
||||
print(f"Not activating core analysis task for task: {current_task_name}")
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
class ResmokeCoreAnalysisTaskGenerator(CoreAnalysisTaskGenerator):
|
||||
def get_core_analyzer_commands(
|
||||
self,
|
||||
task_id: str,
|
||||
execution: str,
|
||||
core_analyzer_results_url: str,
|
||||
gdb_index_cache: str,
|
||||
has_interesting_core_dumps: bool,
|
||||
boring_core_dump_pids: set,
|
||||
) -> List[FunctionCall]:
|
||||
return _get_core_analyzer_commands(
|
||||
task_id,
|
||||
execution,
|
||||
core_analyzer_results_url,
|
||||
gdb_index_cache,
|
||||
has_interesting_core_dumps,
|
||||
boring_core_dump_pids,
|
||||
)
|
||||
|
||||
def get_core_analysis_task_dependencies(self, compile_variant: str) -> set[TaskDependency]:
|
||||
# TODO SERVER-92571 add archive_jstestshell_debug dep for variants that have it.
|
||||
return {TaskDependency("archive_dist_test_debug", compile_variant)}
|
||||
|
||||
def find_cores(self) -> list[CoreInfo]:
|
||||
cores = []
|
||||
|
||||
# LOCAL_BIN_DIR does not exists on non-resmoke tasks, so return early as there is no work to be done.
|
||||
if not os.path.exists(LOCAL_BIN_DIR):
|
||||
print(f"Skipping task generation because binary directory not found: {LOCAL_BIN_DIR}")
|
||||
return cores
|
||||
|
||||
# Get boring core dump PIDs to pass to the analyzer
|
||||
boring_core_dump_pids = set()
|
||||
if os.path.exists(BORING_CORE_DUMP_PIDS_FILE):
|
||||
with open(BORING_CORE_DUMP_PIDS_FILE, "r") as file:
|
||||
boring_core_dump_pids = set(file.read().split())
|
||||
|
||||
task_id = self.expansions.get("task_id")
|
||||
task_info = self.evg_api.task_by_id(task_id)
|
||||
dumpers = dumper.get_dumpers(None, None)
|
||||
|
||||
for artifact in task_info.artifacts:
|
||||
regex = re.search(r"Core Dump [0-9]+ \((.*)\.gz\)", artifact.name)
|
||||
if not regex:
|
||||
continue
|
||||
|
||||
core_file = regex.group(1)
|
||||
binary_name, bin_version = dumpers.dbg.get_binary_from_core_dump(core_file)
|
||||
dir_to_check = MULTIVERSION_BIN_DIR if bin_version else LOCAL_BIN_DIR
|
||||
binary_files = os.listdir(dir_to_check)
|
||||
if binary_name not in binary_files:
|
||||
print(f"{core_file} was generated by {binary_name} but the binary was not found.")
|
||||
continue
|
||||
|
||||
pid = get_core_pid(core_file)
|
||||
boring = pid in boring_core_dump_pids
|
||||
|
||||
cores.append(
|
||||
CoreInfo(path=core_file, binary_name=binary_name, marked_boring=boring, pid=pid)
|
||||
)
|
||||
return cores
|
||||
|
||||
|
||||
class BazelCoreAnalysisTaskGenerator(CoreAnalysisTaskGenerator):
|
||||
def get_core_analyzer_commands(
|
||||
self,
|
||||
task_id: str,
|
||||
execution: str,
|
||||
core_analyzer_results_url: str,
|
||||
gdb_index_cache: str,
|
||||
has_interesting_core_dumps: bool,
|
||||
boring_core_dump_pids: set,
|
||||
) -> List[FunctionCall]:
|
||||
return _get_core_analyzer_commands(
|
||||
task_id,
|
||||
execution,
|
||||
core_analyzer_results_url,
|
||||
gdb_index_cache,
|
||||
has_interesting_core_dumps,
|
||||
boring_core_dump_pids,
|
||||
is_bazel_task=True,
|
||||
)
|
||||
|
||||
def find_cores(self) -> list[CoreInfo]:
|
||||
cores = []
|
||||
results_dir = os.path.join(self.expansions.get("workdir"), "results")
|
||||
if not os.path.exists(results_dir):
|
||||
return cores
|
||||
|
||||
# Search for core files in results/**/test.outputs/ directories
|
||||
results_dirs = glob.glob(os.path.join(results_dir, "**", "test.outputs"), recursive=True)
|
||||
for dir in results_dirs:
|
||||
boring_dump_file = os.path.join(dir, BORING_CORE_DUMP_PIDS_FILE)
|
||||
if os.path.exists(boring_dump_file):
|
||||
with open(BORING_CORE_DUMP_PIDS_FILE, "r") as file:
|
||||
boring_core_dump_pids = set(file.read().split())
|
||||
else:
|
||||
boring_core_dump_pids = {}
|
||||
|
||||
core_patterns = [
|
||||
os.path.join(dir, "*.core"),
|
||||
os.path.join(dir, "*.mdmp"),
|
||||
]
|
||||
for pattern in core_patterns:
|
||||
for core in glob.glob(pattern, recursive=True):
|
||||
# Check if resmoke reported this core dump as a "boring one", in the BORING_CORE_DUMP_PIDS_FILE.
|
||||
pid = get_core_pid(os.path.basename(core))
|
||||
boring = pid in boring_core_dump_pids
|
||||
|
||||
cores.append(CoreInfo(path=core, binary_name="", marked_boring=boring, pid=pid))
|
||||
return cores
|
||||
|
||||
|
||||
def get_generated_task_name(current_task_name: str, execution: str) -> str:
|
||||
# random string so we do not define the same task name for multiple variants which causes issues
|
||||
random_string = "".join(
|
||||
@ -42,73 +326,23 @@ def get_generated_task_name(current_task_name: str, execution: str) -> str:
|
||||
return f"{GENERATED_TASK_PREFIX}_{current_task_name}{execution}_{random_string}"
|
||||
|
||||
|
||||
def should_activate_core_analysis_task(task: EvgTask, evg_api: RetryingEvergreenApi) -> bool:
|
||||
# We hardcode some task names where the core analysis is extending the long pole
|
||||
# of required patch builds by 100 mins and the BFs are taking too long to fix.
|
||||
# This list is a quick fix to improve development velocity.
|
||||
# TODO(SERVER-118661): Remove disagg suites from skip list.
|
||||
skip_tasks = [
|
||||
"disagg_repl_jscore_passthrough",
|
||||
"disagg_repl_jscore_passthrough_secondary_reads",
|
||||
"disagg_sharded_colls_jscore_passthrough_secondary_reads_with_balancer",
|
||||
"disagg_two_nodes_repl_jscore_passthrough",
|
||||
"no_passthrough_disagg_override",
|
||||
]
|
||||
current_task_name = task.display_name
|
||||
if task.parent_task_id:
|
||||
parent_task = evg_api.task_by_id(task.parent_task_id)
|
||||
current_task_name = parent_task.display_name
|
||||
|
||||
if current_task_name in skip_tasks:
|
||||
print(f"Skipping core analysis task generation for task: {current_task_name}")
|
||||
return False
|
||||
|
||||
core_dump_pids = set()
|
||||
for artifact in task.artifacts:
|
||||
# Matches "Core Dump 2 (dump_mongo.670872.core.gz)", capturing "dump_mongo.670872.core"
|
||||
regex = re.search(r"Core Dump [0-9]+ \((.*)\.gz\)", artifact.name)
|
||||
if not regex:
|
||||
continue
|
||||
|
||||
core_file = regex.group(1)
|
||||
core_file_parts = core_file.split(".")
|
||||
|
||||
# Expected format is like dump_mongod.429814.core or dump_mongod-8.2.429814.core, where 429814 is the PID.
|
||||
assert len(core_file_parts) >= 3, "Unknown core dump file name format"
|
||||
assert str.isdigit(
|
||||
core_file_parts[-2]
|
||||
), "PID not in expected location of core dump file name"
|
||||
pid = core_file_parts[-2]
|
||||
core_dump_pids.add(pid)
|
||||
|
||||
boring_core_dump_pids = set()
|
||||
if os.path.exists(BORING_CORE_DUMP_PIDS_FILE):
|
||||
with open(BORING_CORE_DUMP_PIDS_FILE, "r") as file:
|
||||
boring_core_dump_pids = set(file.read().split())
|
||||
|
||||
print(f"detected core dump pids: {core_dump_pids}")
|
||||
print(f"boring core dump pids: {boring_core_dump_pids}")
|
||||
|
||||
interesting_core_dumps = core_dump_pids - boring_core_dump_pids
|
||||
|
||||
if interesting_core_dumps:
|
||||
print(f"The following interesting core dump pids were found: {interesting_core_dumps}")
|
||||
print("Activating core analysis task.")
|
||||
should_activate = True
|
||||
else:
|
||||
print("No interesting core dumps were found. Not activating core analysis task.")
|
||||
should_activate = False
|
||||
|
||||
return should_activate
|
||||
def get_core_pid(core_file_name: str) -> int:
|
||||
# Expected format is like dump_mongod.429814.core or dump_mongod-8.2.429814.core, where 429814 is the PID.
|
||||
parts = core_file_name.split(".")
|
||||
assert len(parts) >= 3, "Unknown core dump file name format"
|
||||
assert str.isdigit(parts[-2]), "PID not in expected location of core dump file name"
|
||||
pid = parts[-2]
|
||||
return pid
|
||||
|
||||
|
||||
def get_core_analyzer_commands(
|
||||
def _get_core_analyzer_commands(
|
||||
task_id: str,
|
||||
execution: str,
|
||||
core_analyzer_results_url: str,
|
||||
gdb_index_cache: str,
|
||||
has_interesting_core_dumps: bool,
|
||||
boring_core_dump_pids: set,
|
||||
is_bazel_task: bool = False,
|
||||
) -> List[FunctionCall]:
|
||||
"""Return setup commands."""
|
||||
return [
|
||||
@ -134,9 +368,9 @@ def get_core_analyzer_commands(
|
||||
f"--gdb-index-cache={gdb_index_cache}",
|
||||
f"--boring-core-dump-pids={','.join(boring_core_dump_pids)}",
|
||||
"--generate-report",
|
||||
"--otel-extra-data",
|
||||
f"has_interesting_core_dumps={str(has_interesting_core_dumps).lower()}",
|
||||
],
|
||||
f"--otel-extra-data=has_interesting_core_dumps={str(has_interesting_core_dumps).lower()}",
|
||||
]
|
||||
+ ["--is-bazel-task" if is_bazel_task else None],
|
||||
"env": {
|
||||
"OTEL_TRACE_ID": "${otel_trace_id}",
|
||||
"OTEL_PARENT_ID": "${otel_parent_id}",
|
||||
@ -178,123 +412,6 @@ def get_core_analyzer_commands(
|
||||
]
|
||||
|
||||
|
||||
def generate(
|
||||
expansions_file: str = "../expansions.yml", output_file: str = "hang_analyzer_task.json"
|
||||
) -> None:
|
||||
if not sys.platform.startswith("linux"):
|
||||
print("This platform is not supported, skipping core analysis task generation.")
|
||||
return
|
||||
|
||||
# gather information from the current task being run
|
||||
expansions = read_config_file(expansions_file)
|
||||
distro = None
|
||||
for distro_expansion in ["core_analyzer_distro_name", "large_distro_name", "distro_id"]:
|
||||
if distro := expansions.get(distro_expansion, None):
|
||||
break
|
||||
assert distro is not None
|
||||
current_task_name = expansions.get("task_name")
|
||||
task_id = expansions.get("task_id")
|
||||
execution = expansions.get("execution")
|
||||
gdb_index_cache = "off" if expansions.get("core_analyzer_gdb_index_cache") == "off" else "on"
|
||||
build_variant_name = expansions.get("build_variant")
|
||||
core_analyzer_results_url = expansions.get("core_analyzer_results_url")
|
||||
compile_variant = expansions.get("compile_variant")
|
||||
|
||||
try:
|
||||
evg_api = evergreen_conn.get_evergreen_api()
|
||||
except RuntimeError:
|
||||
print(
|
||||
"WARNING: Cannot generate core analysis because the evergreen api file could not be found.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
print(
|
||||
"This is probably not an error, if you want core analysis to run on this task make sure",
|
||||
file=sys.stderr,
|
||||
)
|
||||
print(
|
||||
"the evergreen function 'configure evergreen api credentials' is called before this task",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return
|
||||
|
||||
task_info = evg_api.task_by_id(task_id)
|
||||
|
||||
# make sure we are not creating an infinite loop by generating a task from another generated task
|
||||
if current_task_name.startswith(GENERATED_TASK_PREFIX):
|
||||
print(
|
||||
f"Skipping task generation because {current_task_name} starts with {GENERATED_TASK_PREFIX}"
|
||||
)
|
||||
return
|
||||
|
||||
# LOCAL_BIN_DIR does not exists on non-resmoke tasks, so return early as there is no work to be done.
|
||||
if not os.path.exists(LOCAL_BIN_DIR):
|
||||
print(f"Skipping task generation because binary directory not found: {LOCAL_BIN_DIR}")
|
||||
return
|
||||
|
||||
# See if any core dumps were uploaded for this task
|
||||
has_known_core_dumps = False
|
||||
dumpers = dumper.get_dumpers(None, None)
|
||||
|
||||
for artifact in task_info.artifacts:
|
||||
regex = re.search(r"Core Dump [0-9]+ \((.*)\.gz\)", artifact.name)
|
||||
if not regex:
|
||||
continue
|
||||
|
||||
core_file = regex.group(1)
|
||||
binary_name, bin_version = dumpers.dbg.get_binary_from_core_dump(core_file)
|
||||
dir_to_check = MULTIVERSION_BIN_DIR if bin_version else LOCAL_BIN_DIR
|
||||
binary_files = os.listdir(dir_to_check)
|
||||
if binary_name in binary_files:
|
||||
has_known_core_dumps = True
|
||||
break
|
||||
print(f"{core_file} was generated by {binary_name} but the binary was not found.")
|
||||
|
||||
if not has_known_core_dumps:
|
||||
print(
|
||||
"No core dumps with known binaries found for this task, skipping core analysis task generation."
|
||||
)
|
||||
return
|
||||
|
||||
should_activate = should_activate_core_analysis_task(task_info, evg_api)
|
||||
|
||||
# Get boring core dump PIDs to pass to the analyzer
|
||||
boring_core_dump_pids = set()
|
||||
if os.path.exists(BORING_CORE_DUMP_PIDS_FILE):
|
||||
with open(BORING_CORE_DUMP_PIDS_FILE, "r") as file:
|
||||
boring_core_dump_pids = set(file.read().split())
|
||||
|
||||
# Make the evergreen variant that will be generated
|
||||
build_variant = BuildVariant(name=build_variant_name)
|
||||
commands = get_core_analyzer_commands(
|
||||
task_id,
|
||||
execution,
|
||||
core_analyzer_results_url,
|
||||
gdb_index_cache,
|
||||
should_activate,
|
||||
boring_core_dump_pids,
|
||||
)
|
||||
|
||||
deps = {TaskDependency("archive_dist_test_debug", compile_variant)}
|
||||
# TODO SERVER-92571 add archive_jstestshell_debug dep for variants that have it.
|
||||
sub_tasks = set([Task(get_generated_task_name(current_task_name, execution), commands, deps)])
|
||||
|
||||
build_variant.add_tasks(sub_tasks, distros=[distro], activate=should_activate)
|
||||
|
||||
shrub_project = ShrubProject.empty()
|
||||
shrub_project.add_build_variant(build_variant)
|
||||
|
||||
# shrub.py currently does not support adding task deps that override the variant deps
|
||||
output_dict = shrub_project.as_dict()
|
||||
deps_list = []
|
||||
for dep in deps:
|
||||
deps_list.append(dep.as_dict())
|
||||
for variant in output_dict["buildvariants"]:
|
||||
for task in variant["tasks"]:
|
||||
task["depends_on"] = deps_list
|
||||
|
||||
write_file(output_file, json.dumps(output_dict))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
@ -307,7 +424,23 @@ if __name__ == "__main__":
|
||||
help="Name of output file to write the generated task config to.",
|
||||
default="hang_analyzer_task.json",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tests-use-bazel",
|
||||
action="store_true",
|
||||
help="Generate for bazel result task (look for cores in results/*/test.outputs/)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--use-mock-tasks",
|
||||
action="store_true",
|
||||
help=argparse.SUPPRESS, # Use mock Evergreen tasks and skip Evergreen API calls, for unit testing this script.
|
||||
)
|
||||
args = parser.parse_args()
|
||||
expansions_file = args.expansions_file
|
||||
output_file = args.output_file
|
||||
generate(expansions_file, output_file)
|
||||
|
||||
if args.tests_use_bazel:
|
||||
generator = BazelCoreAnalysisTaskGenerator(args.expansions_file, args.use_mock_tasks)
|
||||
else:
|
||||
generator = ResmokeCoreAnalysisTaskGenerator(args.expansions_file, args.use_mock_tasks)
|
||||
|
||||
task_config = generator.generate()
|
||||
if task_config:
|
||||
write_file(args.output_file, json.dumps(task_config, indent=4))
|
||||
|
||||
@ -52,3 +52,14 @@ py_test(
|
||||
"//buildscripts/resmokelib/hang_analyzer",
|
||||
],
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "test_gen_hang_analyzer_tasks",
|
||||
srcs = [
|
||||
"test_gen_hang_analyzer_tasks.py",
|
||||
],
|
||||
deps = [
|
||||
"//buildscripts/resmokelib",
|
||||
"//buildscripts/resmokelib/hang_analyzer",
|
||||
],
|
||||
)
|
||||
|
||||
@ -53,6 +53,7 @@ class TestCoreDumpFiltering(unittest.TestCase):
|
||||
install_dir,
|
||||
analysis_dir,
|
||||
multiversion_dir,
|
||||
None,
|
||||
"on",
|
||||
boring_pids,
|
||||
)
|
||||
@ -88,7 +89,13 @@ class TestCoreDumpFiltering(unittest.TestCase):
|
||||
|
||||
# Pass empty set of boring PIDs
|
||||
report = self.dumper.analyze_cores(
|
||||
tmpdir, "/mock/install", tmpdir, "/mock/multiversion", "on", set()
|
||||
tmpdir,
|
||||
"/mock/install",
|
||||
tmpdir,
|
||||
"/mock/multiversion",
|
||||
None,
|
||||
"on",
|
||||
set(),
|
||||
)
|
||||
|
||||
# Should analyze all cores
|
||||
@ -112,7 +119,13 @@ class TestCoreDumpFiltering(unittest.TestCase):
|
||||
|
||||
# Pass None for boring PIDs
|
||||
report = self.dumper.analyze_cores(
|
||||
tmpdir, "/mock/install", tmpdir, "/mock/multiversion", "on", None
|
||||
tmpdir,
|
||||
"/mock/install",
|
||||
tmpdir,
|
||||
"/mock/multiversion",
|
||||
None,
|
||||
"on",
|
||||
None,
|
||||
)
|
||||
|
||||
# Should analyze all cores
|
||||
@ -137,7 +150,13 @@ class TestCoreDumpFiltering(unittest.TestCase):
|
||||
|
||||
# Should cap at 10 by default
|
||||
report = self.dumper.analyze_cores(
|
||||
tmpdir, "/mock/install", tmpdir, "/mock/multiversion", "on", None
|
||||
tmpdir,
|
||||
"/mock/install",
|
||||
tmpdir,
|
||||
"/mock/multiversion",
|
||||
None,
|
||||
"on",
|
||||
None,
|
||||
)
|
||||
|
||||
# Should only analyze 10 cores (default max)
|
||||
@ -166,6 +185,7 @@ class TestCoreDumpFiltering(unittest.TestCase):
|
||||
"/mock/install",
|
||||
tmpdir,
|
||||
"/mock/multiversion",
|
||||
None,
|
||||
"on",
|
||||
None,
|
||||
max_core_dumps=10,
|
||||
@ -202,6 +222,7 @@ class TestCoreDumpFiltering(unittest.TestCase):
|
||||
"/mock/install",
|
||||
tmpdir,
|
||||
"/mock/multiversion",
|
||||
None,
|
||||
"on",
|
||||
boring_pids,
|
||||
max_core_dumps=20,
|
||||
@ -234,7 +255,13 @@ class TestCoreDumpFiltering(unittest.TestCase):
|
||||
boring_pids = {"12345"}
|
||||
|
||||
self.dumper.analyze_cores(
|
||||
tmpdir, "/mock/install", tmpdir, "/mock/multiversion", "on", boring_pids
|
||||
tmpdir,
|
||||
"/mock/install",
|
||||
tmpdir,
|
||||
"/mock/multiversion",
|
||||
None,
|
||||
"on",
|
||||
boring_pids,
|
||||
)
|
||||
|
||||
# Should analyze 2 cores (the unparseable ones are treated as interesting)
|
||||
@ -271,7 +298,13 @@ class TestCoreDumpFiltering(unittest.TestCase):
|
||||
boring_pids = {"12345"}
|
||||
|
||||
self.dumper.analyze_cores(
|
||||
tmpdir, "/mock/install", tmpdir, "/mock/multiversion", "on", boring_pids
|
||||
tmpdir,
|
||||
"/mock/install",
|
||||
tmpdir,
|
||||
"/mock/multiversion",
|
||||
None,
|
||||
"on",
|
||||
boring_pids,
|
||||
)
|
||||
|
||||
# Should analyze 2 interesting cores
|
||||
|
||||
@ -0,0 +1,585 @@
|
||||
"""Unit tests for the buildscripts.resmokelib.hang_analyzer.gen_hang_analyzer_tasks module."""
|
||||
|
||||
import sys
|
||||
import unittest
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from buildscripts.resmokelib.hang_analyzer.gen_hang_analyzer_tasks import (
|
||||
GENERATED_TASK_PREFIX,
|
||||
BazelCoreAnalysisTaskGenerator,
|
||||
CoreInfo,
|
||||
ResmokeCoreAnalysisTaskGenerator,
|
||||
_get_core_analyzer_commands,
|
||||
get_core_pid,
|
||||
)
|
||||
|
||||
|
||||
class TestCorePidExtraction(unittest.TestCase):
|
||||
"""Unit tests for get_core_pid function."""
|
||||
|
||||
def test_standard_core_dump_format(self):
|
||||
core_file = "dump_mongod.429814.core"
|
||||
pid = get_core_pid(core_file)
|
||||
self.assertEqual(pid, "429814")
|
||||
|
||||
def test_multiversion_core_dump_format(self):
|
||||
core_file = "dump_mongod-8.2.429814.core"
|
||||
pid = get_core_pid(core_file)
|
||||
self.assertEqual(pid, "429814")
|
||||
|
||||
def test_with_path(self):
|
||||
core_file = "/path/to/dump_mongod.789012.core"
|
||||
pid = get_core_pid(core_file)
|
||||
self.assertEqual(pid, "789012")
|
||||
|
||||
def test_invalid_format_non_digit_pid(self):
|
||||
"""Test that non-digit PID raises an assertion."""
|
||||
with self.assertRaises(AssertionError):
|
||||
get_core_pid("dump_mongod.notanumber.core")
|
||||
|
||||
|
||||
@unittest.skipIf(
|
||||
not sys.platform.startswith("linux"),
|
||||
reason="Core analysis is only support on linux",
|
||||
)
|
||||
class TestGetCoreAnalyzerCommands(unittest.TestCase):
|
||||
"""Unit tests for get_core_analyzer_commands function."""
|
||||
|
||||
def test_returns_list_of_function_calls(self):
|
||||
"""Test that function returns a list."""
|
||||
commands = _get_core_analyzer_commands("task123", "0", "s3://results", "on", True, set())
|
||||
self.assertIsInstance(commands, list)
|
||||
self.assertGreater(len(commands), 0)
|
||||
|
||||
def test_includes_task_id_in_subprocess_command(self):
|
||||
"""Test that task ID is included in subprocess command."""
|
||||
task_id = "task_abc_123"
|
||||
commands = _get_core_analyzer_commands(task_id, "0", "s3://results", "on", True, set())
|
||||
|
||||
# Find the subprocess.exec command
|
||||
subprocess_cmd = None
|
||||
for cmd in commands:
|
||||
if hasattr(cmd, "as_dict") and "subprocess.exec" in str(cmd.as_dict()):
|
||||
subprocess_cmd = cmd
|
||||
break
|
||||
|
||||
self.assertIsNotNone(subprocess_cmd)
|
||||
cmd_dict = subprocess_cmd.as_dict()
|
||||
args = cmd_dict["params"]["args"]
|
||||
self.assertIn(f"--task-id={task_id}", args)
|
||||
|
||||
def test_includes_execution_in_subprocess_command(self):
|
||||
"""Test that execution is included in subprocess command."""
|
||||
execution = "3"
|
||||
commands = _get_core_analyzer_commands(
|
||||
"task123", execution, "s3://results", "on", True, set()
|
||||
)
|
||||
|
||||
subprocess_cmd = None
|
||||
for cmd in commands:
|
||||
if hasattr(cmd, "as_dict") and "subprocess.exec" in str(cmd.as_dict()):
|
||||
subprocess_cmd = cmd
|
||||
break
|
||||
|
||||
cmd_dict = subprocess_cmd.as_dict()
|
||||
args = cmd_dict["params"]["args"]
|
||||
self.assertIn(f"--execution={execution}", args)
|
||||
|
||||
def test_includes_gdb_index_cache_setting(self):
|
||||
"""Test that gdb index cache setting is included."""
|
||||
for cache_setting in ["on", "off"]:
|
||||
commands = _get_core_analyzer_commands(
|
||||
"task123", "0", "s3://results", cache_setting, True, set()
|
||||
)
|
||||
|
||||
subprocess_cmd = None
|
||||
for cmd in commands:
|
||||
if hasattr(cmd, "as_dict") and "subprocess.exec" in str(cmd.as_dict()):
|
||||
subprocess_cmd = cmd
|
||||
break
|
||||
|
||||
cmd_dict = subprocess_cmd.as_dict()
|
||||
args = cmd_dict["params"]["args"]
|
||||
self.assertIn(f"--gdb-index-cache={cache_setting}", args)
|
||||
|
||||
def test_includes_boring_core_dump_pids(self):
|
||||
"""Test that boring core dump PIDs are included."""
|
||||
boring_pids = {"12345", "67890", "11111"}
|
||||
commands = _get_core_analyzer_commands(
|
||||
"task123", "0", "s3://results", "on", True, boring_pids
|
||||
)
|
||||
|
||||
subprocess_cmd = None
|
||||
for cmd in commands:
|
||||
if hasattr(cmd, "as_dict") and "subprocess.exec" in str(cmd.as_dict()):
|
||||
subprocess_cmd = cmd
|
||||
break
|
||||
|
||||
cmd_dict = subprocess_cmd.as_dict()
|
||||
args = cmd_dict["params"]["args"]
|
||||
|
||||
# Find the boring PIDs argument
|
||||
boring_arg = None
|
||||
for arg in args:
|
||||
if arg and arg.startswith("--boring-core-dump-pids="):
|
||||
boring_arg = arg
|
||||
break
|
||||
|
||||
self.assertIsNotNone(boring_arg)
|
||||
# Check that all PIDs are in the argument
|
||||
for pid in boring_pids:
|
||||
self.assertIn(pid, boring_arg)
|
||||
|
||||
def test_empty_boring_pids(self):
|
||||
"""Test handling of empty boring PIDs set."""
|
||||
commands = _get_core_analyzer_commands("task123", "0", "s3://results", "on", True, set())
|
||||
|
||||
subprocess_cmd = None
|
||||
for cmd in commands:
|
||||
if hasattr(cmd, "as_dict") and "subprocess.exec" in str(cmd.as_dict()):
|
||||
subprocess_cmd = cmd
|
||||
break
|
||||
|
||||
cmd_dict = subprocess_cmd.as_dict()
|
||||
args = cmd_dict["params"]["args"]
|
||||
|
||||
boring_arg = None
|
||||
for arg in args:
|
||||
if arg and arg.startswith("--boring-core-dump-pids="):
|
||||
boring_arg = arg
|
||||
break
|
||||
|
||||
self.assertIsNotNone(boring_arg)
|
||||
self.assertEqual(boring_arg, "--boring-core-dump-pids=")
|
||||
|
||||
def test_bazel_task_flag(self):
|
||||
"""Test that is_bazel_task flag is passed correctly."""
|
||||
commands = _get_core_analyzer_commands(
|
||||
"task123", "0", "s3://results", "on", True, set(), is_bazel_task=True
|
||||
)
|
||||
|
||||
subprocess_cmd = None
|
||||
for cmd in commands:
|
||||
if hasattr(cmd, "as_dict") and "subprocess.exec" in str(cmd.as_dict()):
|
||||
subprocess_cmd = cmd
|
||||
break
|
||||
|
||||
cmd_dict = subprocess_cmd.as_dict()
|
||||
args = cmd_dict["params"]["args"]
|
||||
self.assertIn("--is-bazel-task", args)
|
||||
|
||||
def test_non_bazel_task_no_flag(self):
|
||||
"""Test that non-bazel tasks don't include the bazel flag."""
|
||||
commands = _get_core_analyzer_commands(
|
||||
"task123", "0", "s3://results", "on", True, set(), is_bazel_task=False
|
||||
)
|
||||
|
||||
subprocess_cmd = None
|
||||
for cmd in commands:
|
||||
if hasattr(cmd, "as_dict") and "subprocess.exec" in str(cmd.as_dict()):
|
||||
subprocess_cmd = cmd
|
||||
break
|
||||
|
||||
cmd_dict = subprocess_cmd.as_dict()
|
||||
args = cmd_dict["params"]["args"]
|
||||
# Filter out None values
|
||||
args = [arg for arg in args if arg is not None]
|
||||
self.assertNotIn("--is-bazel-task", args)
|
||||
|
||||
def test_includes_s3_put_with_results_url(self):
|
||||
"""Test that S3 put command includes correct results URL."""
|
||||
results_url = "s3://bucket/path/to/results.tgz"
|
||||
commands = _get_core_analyzer_commands("task123", "0", results_url, "on", True, set())
|
||||
|
||||
s3_cmd = None
|
||||
for cmd in commands:
|
||||
if hasattr(cmd, "as_dict") and "s3.put" in str(cmd.as_dict()):
|
||||
s3_cmd = cmd
|
||||
break
|
||||
|
||||
self.assertIsNotNone(s3_cmd)
|
||||
cmd_dict = s3_cmd.as_dict()
|
||||
self.assertEqual(cmd_dict["params"]["remote_file"], results_url)
|
||||
|
||||
def test_includes_otel_extra_data(self):
|
||||
"""Test that OTEL extra data includes has_interesting_core_dumps flag."""
|
||||
for has_interesting in [True, False]:
|
||||
commands = _get_core_analyzer_commands(
|
||||
"task123", "0", "s3://results", "on", has_interesting, set()
|
||||
)
|
||||
|
||||
subprocess_cmd = None
|
||||
for cmd in commands:
|
||||
if hasattr(cmd, "as_dict") and "subprocess.exec" in str(cmd.as_dict()):
|
||||
subprocess_cmd = cmd
|
||||
break
|
||||
|
||||
cmd_dict = subprocess_cmd.as_dict()
|
||||
args = cmd_dict["params"]["args"]
|
||||
|
||||
expected_str = (
|
||||
f"--otel-extra-data=has_interesting_core_dumps={str(has_interesting).lower()}"
|
||||
)
|
||||
self.assertIn(expected_str, args)
|
||||
|
||||
|
||||
@unittest.skipIf(
|
||||
not sys.platform.startswith("linux"),
|
||||
reason="Core analysis is only support on linux",
|
||||
)
|
||||
class TestCoreAnalysisTaskGenerator(unittest.TestCase):
|
||||
"""Unit tests for CoreAnalysisTaskGenerator base class."""
|
||||
|
||||
def setUp(self):
|
||||
"""Set up test fixtures."""
|
||||
self.expansions_file = "test_expansions.yml"
|
||||
self.mock_expansions = {
|
||||
"task_name": "resmoke_test",
|
||||
"task_id": "test_task_123",
|
||||
"execution": "0",
|
||||
"build_variant": "ubuntu2204",
|
||||
"distro_id": "ubuntu2204-large",
|
||||
"core_analyzer_results_url": "s3://bucket/results.tgz",
|
||||
"compile_variant": "ubuntu2204-compile",
|
||||
"workdir": "/data/mci",
|
||||
}
|
||||
|
||||
@patch("buildscripts.resmokelib.hang_analyzer.gen_hang_analyzer_tasks.read_config_file")
|
||||
def test_generate_creates_task_config_with_interesting_cores(self, mock_read_config):
|
||||
"""Test that generate creates proper task config when interesting cores are found."""
|
||||
mock_read_config.return_value = self.mock_expansions
|
||||
|
||||
mock_cores = [
|
||||
CoreInfo(
|
||||
path="/tmp/dump_mongod.123.core",
|
||||
binary_name="mongod",
|
||||
pid="123",
|
||||
marked_boring=False,
|
||||
),
|
||||
CoreInfo(
|
||||
path="/tmp/dump_mongos.456.core",
|
||||
binary_name="mongos",
|
||||
pid="456",
|
||||
marked_boring=False,
|
||||
),
|
||||
]
|
||||
|
||||
with patch.object(ResmokeCoreAnalysisTaskGenerator, "find_cores", return_value=mock_cores):
|
||||
generator = ResmokeCoreAnalysisTaskGenerator(self.expansions_file, use_mock_tasks=True)
|
||||
result = generator.generate()
|
||||
|
||||
self.assertIsNotNone(result)
|
||||
self.assertIn("buildvariants", result)
|
||||
self.assertEqual(len(result["buildvariants"]), 1)
|
||||
|
||||
variant = result["buildvariants"][0]
|
||||
self.assertEqual(variant["name"], "ubuntu2204")
|
||||
self.assertEqual(len(variant["tasks"]), 1)
|
||||
|
||||
task = variant["tasks"][0]
|
||||
self.assertTrue(task["activate"])
|
||||
self.assertTrue(task["name"].startswith(GENERATED_TASK_PREFIX))
|
||||
|
||||
@patch("buildscripts.resmokelib.hang_analyzer.gen_hang_analyzer_tasks.read_config_file")
|
||||
def test_generate_does_not_activate_with_only_boring_cores(self, mock_read_config):
|
||||
"""Test that task is not activated when only boring cores are found."""
|
||||
mock_read_config.return_value = self.mock_expansions
|
||||
|
||||
mock_cores = [
|
||||
CoreInfo(
|
||||
path="/tmp/dump_mongod.123.core",
|
||||
binary_name="mongod",
|
||||
pid="123",
|
||||
marked_boring=True,
|
||||
),
|
||||
CoreInfo(
|
||||
path="/tmp/dump_mongos.456.core",
|
||||
binary_name="mongos",
|
||||
pid="456",
|
||||
marked_boring=True,
|
||||
),
|
||||
]
|
||||
|
||||
with patch.object(ResmokeCoreAnalysisTaskGenerator, "find_cores", return_value=mock_cores):
|
||||
generator = ResmokeCoreAnalysisTaskGenerator(self.expansions_file, use_mock_tasks=True)
|
||||
result = generator.generate()
|
||||
|
||||
self.assertIsNotNone(result)
|
||||
variant = result["buildvariants"][0]
|
||||
task = variant["tasks"][0]
|
||||
self.assertFalse(task["activate"])
|
||||
|
||||
def test_should_skip_task_for_hardcoded_task_names(self):
|
||||
"""Test that hardcoded task names are skipped."""
|
||||
with patch(
|
||||
"buildscripts.resmokelib.hang_analyzer.gen_hang_analyzer_tasks.read_config_file"
|
||||
) as mock_read:
|
||||
mock_read.return_value = self.mock_expansions
|
||||
generator = ResmokeCoreAnalysisTaskGenerator(self.expansions_file, use_mock_tasks=True)
|
||||
|
||||
# Test skipped task names
|
||||
for task_name in ["no_passthrough_disagg_override", "disagg_repl_jscore_passthrough"]:
|
||||
mock_task = MagicMock()
|
||||
mock_task.display_name = task_name
|
||||
mock_task.parent_task_id = None
|
||||
mock_task.build_variant = "ubuntu2204"
|
||||
|
||||
self.assertTrue(generator._should_skip_task(mock_task))
|
||||
|
||||
def test_should_not_skip_normal_task(self):
|
||||
"""Test that normal tasks are not skipped."""
|
||||
with patch(
|
||||
"buildscripts.resmokelib.hang_analyzer.gen_hang_analyzer_tasks.read_config_file"
|
||||
) as mock_read:
|
||||
mock_read.return_value = self.mock_expansions
|
||||
generator = ResmokeCoreAnalysisTaskGenerator(self.expansions_file, use_mock_tasks=True)
|
||||
|
||||
mock_task = MagicMock()
|
||||
mock_task.display_name = "normal_task"
|
||||
mock_task.parent_task_id = None
|
||||
mock_task.build_variant = "ubuntu2204"
|
||||
|
||||
self.assertFalse(generator._should_skip_task(mock_task))
|
||||
|
||||
|
||||
@unittest.skipIf(
|
||||
not sys.platform.startswith("linux"),
|
||||
reason="Core analysis is only support on linux",
|
||||
)
|
||||
class TestResmokeCoreAnalysisTaskGenerator(unittest.TestCase):
|
||||
"""Unit tests for ResmokeCoreAnalysisTaskGenerator."""
|
||||
|
||||
def setUp(self):
|
||||
"""Set up test fixtures."""
|
||||
self.expansions_file = "test_expansions.yml"
|
||||
self.mock_expansions = {
|
||||
"task_name": "resmoke_test",
|
||||
"task_id": "test_task_123",
|
||||
"execution": "0",
|
||||
"build_variant": "ubuntu2204",
|
||||
"distro_id": "ubuntu2204-large",
|
||||
"core_analyzer_results_url": "s3://bucket/results.tgz",
|
||||
"compile_variant": "ubuntu2204-compile",
|
||||
"workdir": "/data/mci",
|
||||
}
|
||||
|
||||
@patch("buildscripts.resmokelib.hang_analyzer.gen_hang_analyzer_tasks.read_config_file")
|
||||
@patch("buildscripts.resmokelib.hang_analyzer.gen_hang_analyzer_tasks.os.path.exists")
|
||||
@patch("buildscripts.resmokelib.hang_analyzer.gen_hang_analyzer_tasks.os.listdir")
|
||||
@patch("buildscripts.resmokelib.hang_analyzer.gen_hang_analyzer_tasks.dumper.get_dumpers")
|
||||
def test_find_cores_discovers_cores_from_artifacts(
|
||||
self, mock_get_dumpers, mock_listdir, mock_exists, mock_read_config
|
||||
):
|
||||
"""Test that find_cores discovers cores from task artifacts."""
|
||||
mock_read_config.return_value = self.mock_expansions
|
||||
|
||||
# Mock binary directory exists
|
||||
def exists_side_effect(path):
|
||||
if "dist-test/bin" in path or "boring_core_dumps.txt" in path:
|
||||
return True
|
||||
return False
|
||||
|
||||
mock_exists.side_effect = exists_side_effect
|
||||
mock_listdir.return_value = ["mongod", "mongos"]
|
||||
|
||||
# Mock task artifacts
|
||||
mock_artifact1 = MagicMock()
|
||||
mock_artifact1.name = "Core Dump 1 (dump_mongod.12345.core.gz)"
|
||||
mock_artifact2 = MagicMock()
|
||||
mock_artifact2.name = "Core Dump 2 (dump_mongos.67890.core.gz)"
|
||||
|
||||
mock_task = MagicMock()
|
||||
mock_task.artifacts = [mock_artifact1, mock_artifact2]
|
||||
|
||||
# Mock dumper
|
||||
mock_dbg = MagicMock()
|
||||
mock_dbg.get_binary_from_core_dump.side_effect = [
|
||||
("mongod", None),
|
||||
("mongos", None),
|
||||
]
|
||||
mock_dumpers = MagicMock()
|
||||
mock_dumpers.dbg = mock_dbg
|
||||
mock_get_dumpers.return_value = mock_dumpers
|
||||
|
||||
with patch("builtins.open", unittest.mock.mock_open(read_data="")):
|
||||
generator = ResmokeCoreAnalysisTaskGenerator(self.expansions_file, use_mock_tasks=True)
|
||||
generator.evg_api.task_by_id.return_value = mock_task
|
||||
|
||||
cores = generator.find_cores()
|
||||
|
||||
self.assertEqual(len(cores), 2)
|
||||
self.assertEqual(cores[0].path, "dump_mongod.12345.core")
|
||||
self.assertEqual(cores[0].binary_name, "mongod")
|
||||
self.assertEqual(cores[0].pid, "12345")
|
||||
self.assertEqual(cores[1].path, "dump_mongos.67890.core")
|
||||
self.assertEqual(cores[1].binary_name, "mongos")
|
||||
self.assertEqual(cores[1].pid, "67890")
|
||||
|
||||
@patch("buildscripts.resmokelib.hang_analyzer.gen_hang_analyzer_tasks.read_config_file")
|
||||
@patch("buildscripts.resmokelib.hang_analyzer.gen_hang_analyzer_tasks.os.path.exists")
|
||||
@patch("buildscripts.resmokelib.hang_analyzer.gen_hang_analyzer_tasks.os.listdir")
|
||||
@patch("buildscripts.resmokelib.hang_analyzer.gen_hang_analyzer_tasks.dumper.get_dumpers")
|
||||
def test_find_cores_marks_boring_cores(
|
||||
self, mock_get_dumpers, mock_listdir, mock_exists, mock_read_config
|
||||
):
|
||||
"""Test that find_cores correctly marks boring cores."""
|
||||
mock_read_config.return_value = self.mock_expansions
|
||||
mock_exists.return_value = True
|
||||
mock_listdir.return_value = ["mongod"]
|
||||
|
||||
# Mock artifact with boring core
|
||||
mock_artifact = MagicMock()
|
||||
mock_artifact.name = "Core Dump 1 (dump_mongod.12345.core.gz)"
|
||||
|
||||
mock_task = MagicMock()
|
||||
mock_task.artifacts = [mock_artifact]
|
||||
|
||||
# Mock dumper
|
||||
mock_dbg = MagicMock()
|
||||
mock_dbg.get_binary_from_core_dump.return_value = ("mongod", None)
|
||||
mock_dumpers = MagicMock()
|
||||
mock_dumpers.dbg = mock_dbg
|
||||
mock_get_dumpers.return_value = mock_dumpers
|
||||
|
||||
# Mock boring PIDs file with PID 12345
|
||||
with patch("builtins.open", unittest.mock.mock_open(read_data="12345\n67890\n")):
|
||||
generator = ResmokeCoreAnalysisTaskGenerator(self.expansions_file, use_mock_tasks=True)
|
||||
generator.evg_api.task_by_id.return_value = mock_task
|
||||
|
||||
cores = generator.find_cores()
|
||||
|
||||
self.assertEqual(len(cores), 1)
|
||||
self.assertTrue(cores[0].marked_boring)
|
||||
|
||||
|
||||
@unittest.skipIf(
|
||||
not sys.platform.startswith("linux"),
|
||||
reason="Core analysis is only support on linux",
|
||||
)
|
||||
class TestBazelCoreAnalysisTaskGenerator(unittest.TestCase):
|
||||
"""Unit tests for BazelCoreAnalysisTaskGenerator."""
|
||||
|
||||
def setUp(self):
|
||||
"""Set up test fixtures."""
|
||||
self.expansions_file = "test_expansions.yml"
|
||||
self.mock_expansions = {
|
||||
"task_name": "bazel_test",
|
||||
"task_id": "test_task_123",
|
||||
"execution": "0",
|
||||
"build_variant": "ubuntu2204",
|
||||
"distro_id": "ubuntu2204-large",
|
||||
"core_analyzer_results_url": "s3://bucket/results.tgz",
|
||||
"compile_variant": "ubuntu2204-compile",
|
||||
"workdir": "/data/mci",
|
||||
}
|
||||
|
||||
@patch("buildscripts.resmokelib.hang_analyzer.gen_hang_analyzer_tasks.read_config_file")
|
||||
@patch("buildscripts.resmokelib.hang_analyzer.gen_hang_analyzer_tasks.os.path.exists")
|
||||
@patch("buildscripts.resmokelib.hang_analyzer.gen_hang_analyzer_tasks.glob.glob")
|
||||
def test_find_cores_discovers_cores_in_test_outputs(
|
||||
self, mock_glob, mock_exists, mock_read_config
|
||||
):
|
||||
"""Test that find_cores discovers cores in test.outputs directories."""
|
||||
mock_read_config.return_value = self.mock_expansions
|
||||
|
||||
def exists_side_effect(path):
|
||||
if "results" in path and "boring_core_dumps.txt" not in path:
|
||||
return True
|
||||
return False
|
||||
|
||||
mock_exists.side_effect = exists_side_effect
|
||||
|
||||
# Mock glob to return test.outputs directories
|
||||
def glob_side_effect(pattern, **kwargs):
|
||||
if ".core" in pattern:
|
||||
if "test1" in pattern:
|
||||
return ["/data/mci/results/test1/test.outputs/dump_mongod.12345.core"]
|
||||
elif "test2" in pattern:
|
||||
return ["/data/mci/results/test2/test.outputs/dump_mongos.67890.core"]
|
||||
elif ".mdmp" in pattern:
|
||||
return []
|
||||
elif "test.outputs" in pattern and "recursive" in kwargs:
|
||||
return [
|
||||
"/data/mci/results/test1/test.outputs",
|
||||
"/data/mci/results/test2/test.outputs",
|
||||
]
|
||||
return []
|
||||
|
||||
mock_glob.side_effect = glob_side_effect
|
||||
|
||||
generator = BazelCoreAnalysisTaskGenerator(self.expansions_file, use_mock_tasks=True)
|
||||
cores = generator.find_cores()
|
||||
|
||||
self.assertEqual(len(cores), 2)
|
||||
self.assertIn("dump_mongod.12345.core", cores[0].path)
|
||||
self.assertEqual(cores[0].pid, "12345")
|
||||
self.assertIn("dump_mongos.67890.core", cores[1].path)
|
||||
self.assertEqual(cores[1].pid, "67890")
|
||||
|
||||
@patch("buildscripts.resmokelib.hang_analyzer.gen_hang_analyzer_tasks.read_config_file")
|
||||
@patch("buildscripts.resmokelib.hang_analyzer.gen_hang_analyzer_tasks.os.path.exists")
|
||||
@patch("buildscripts.resmokelib.hang_analyzer.gen_hang_analyzer_tasks.glob.glob")
|
||||
def test_find_cores_marks_boring_cores_from_test_outputs(
|
||||
self, mock_glob, mock_exists, mock_read_config
|
||||
):
|
||||
"""Test that find_cores marks boring cores based on boring_core_dumps.txt."""
|
||||
mock_read_config.return_value = self.mock_expansions
|
||||
|
||||
boring_file_path = None
|
||||
|
||||
def exists_side_effect(path):
|
||||
if "results" in path and "test.outputs" not in path:
|
||||
return True
|
||||
if "boring_core_dumps.txt" in path:
|
||||
nonlocal boring_file_path
|
||||
boring_file_path = path
|
||||
return True
|
||||
return False
|
||||
|
||||
mock_exists.side_effect = exists_side_effect
|
||||
|
||||
def glob_side_effect(pattern, **kwargs):
|
||||
if ".core" in pattern:
|
||||
return ["/data/mci/results/test1/test.outputs/dump_mongod.12345.core"]
|
||||
elif ".mdmp" in pattern:
|
||||
return []
|
||||
elif "test.outputs" in pattern and "recursive" in kwargs:
|
||||
return ["/data/mci/results/test1/test.outputs"]
|
||||
return []
|
||||
|
||||
mock_glob.side_effect = glob_side_effect
|
||||
|
||||
# Mock boring PIDs file
|
||||
with patch("builtins.open", unittest.mock.mock_open(read_data="12345\n")):
|
||||
generator = BazelCoreAnalysisTaskGenerator(self.expansions_file, use_mock_tasks=True)
|
||||
cores = generator.find_cores()
|
||||
|
||||
self.assertEqual(len(cores), 1)
|
||||
self.assertTrue(cores[0].marked_boring)
|
||||
|
||||
@patch("buildscripts.resmokelib.hang_analyzer.gen_hang_analyzer_tasks.read_config_file")
|
||||
def test_get_core_analyzer_commands_includes_bazel_flag(self, mock_read_config):
|
||||
"""Test that get_core_analyzer_commands includes bazel flag."""
|
||||
mock_read_config.return_value = self.mock_expansions
|
||||
|
||||
generator = BazelCoreAnalysisTaskGenerator(self.expansions_file, use_mock_tasks=True)
|
||||
|
||||
commands = generator.get_core_analyzer_commands(
|
||||
"task123", "0", "s3://results", "on", True, set()
|
||||
)
|
||||
|
||||
# Find subprocess command and verify it has --is-bazel-task flag
|
||||
subprocess_cmd = None
|
||||
for cmd in commands:
|
||||
if hasattr(cmd, "as_dict") and "subprocess.exec" in str(cmd.as_dict()):
|
||||
subprocess_cmd = cmd
|
||||
break
|
||||
|
||||
self.assertIsNotNone(subprocess_cmd)
|
||||
cmd_dict = subprocess_cmd.as_dict()
|
||||
args = cmd_dict["params"]["args"]
|
||||
self.assertIn("--is-bazel-task", args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@ -1302,12 +1302,18 @@ functions:
|
||||
- *execute_resmoke_tests_via_bazel_sh
|
||||
|
||||
"fetch remote test results":
|
||||
- *f_expansions_write
|
||||
- command: subprocess.exec
|
||||
params:
|
||||
binary: bash
|
||||
add_expansions_to_env: true
|
||||
args:
|
||||
- "./src/evergreen/fetch_remote_test_results.sh"
|
||||
- command: expansions.update
|
||||
params:
|
||||
file: "${workdir}/results/test_failures_exist.yml"
|
||||
ignore_missing_file: true
|
||||
- *f_expansions_write
|
||||
|
||||
"assume ECR role": &assume_ecr_role
|
||||
command: ec2.assume_role
|
||||
@ -2967,6 +2973,65 @@ functions:
|
||||
display_name: Core Analyzer Results
|
||||
optional: true
|
||||
|
||||
"generate result task hang analyzer":
|
||||
- command: expansions.update
|
||||
params:
|
||||
updates:
|
||||
- key: core_analyzer_results_url
|
||||
value: ${project}/${build_variant}/${task_id}/${execution}/core-analyzer-results.tgz
|
||||
- key: parent_task_id
|
||||
value: ${parent_task_id}
|
||||
- *f_expansions_write
|
||||
- command: subprocess.exec
|
||||
display_name: "Generate core analysis task"
|
||||
params:
|
||||
binary: bash
|
||||
include_expansions_in_env:
|
||||
- workdir
|
||||
- test_failures_exist
|
||||
args:
|
||||
- "src/evergreen/generate_core_analysis_task.sh"
|
||||
- "--tests-use-bazel"
|
||||
- command: s3.put
|
||||
params:
|
||||
aws_key: ${aws_key}
|
||||
aws_secret: ${aws_secret}
|
||||
local_file: src/hang_analyzer_task.json
|
||||
remote_file: ${project}/${build_variant}/${revision}/hang_analyzer_tasks/${task_id}.json
|
||||
bucket: mciuploads
|
||||
permissions: public-read
|
||||
content_type: application/json
|
||||
display_name: Generated Hang Analyzer Task Config - Execution ${execution}
|
||||
optional: true
|
||||
- command: generate.tasks
|
||||
params:
|
||||
optional: true
|
||||
files:
|
||||
- src/hang_analyzer_task.json
|
||||
- command: subprocess.exec
|
||||
params:
|
||||
binary: bash
|
||||
args:
|
||||
- "src/evergreen/run_python_script.sh"
|
||||
- "buildscripts/resmokelib/hang_analyzer/attach_core_analyzer_task.py"
|
||||
- command: attach.artifacts
|
||||
params:
|
||||
optional: true
|
||||
exact_file_names: true
|
||||
files:
|
||||
- src/core_analyzer_artifact.json
|
||||
- command: s3.put
|
||||
params:
|
||||
aws_key: ${aws_key}
|
||||
aws_secret: ${aws_secret}
|
||||
local_file: src/core_analyzer_results.txt
|
||||
remote_file: ${core_analyzer_results_url}
|
||||
bucket: mciuploads
|
||||
permissions: public-read
|
||||
content_type: text/plain
|
||||
display_name: Core Analyzer Results
|
||||
optional: true
|
||||
|
||||
"save unsymbolized stacktraces and local invocation":
|
||||
- command: s3.put
|
||||
params:
|
||||
|
||||
@ -2359,4 +2359,26 @@ tasks:
|
||||
params:
|
||||
binary: bash
|
||||
args:
|
||||
- "./src/buildscripts/bazel_testbuilds/verify_coredump_test.sh"
|
||||
- "./src/buildscripts/bazel_testbuilds/verify_unittest_coredump_test.sh"
|
||||
|
||||
- name: verify_resmoke_coredump
|
||||
tags: ["assigned_to_jira_team_devprod_correctness", "auxiliary"]
|
||||
exec_timeout_secs: 1800 # 30 minute timeout
|
||||
commands:
|
||||
- command: manifest.load
|
||||
- func: "git get project and add git tag"
|
||||
- func: "f_expansions_write"
|
||||
- func: "kill processes"
|
||||
- func: "cleanup environment"
|
||||
- func: "set up venv"
|
||||
- func: "get engflow creds"
|
||||
- command: subprocess.exec
|
||||
display_name: "Verify resmoke coredump generation"
|
||||
type: test
|
||||
timeout_secs: 1800 # 30 minutes
|
||||
params:
|
||||
binary: bash
|
||||
include_expansions_in_env:
|
||||
- workdir
|
||||
args:
|
||||
- "./src/buildscripts/bazel_testbuilds/verify_resmoke_coredump_test.sh"
|
||||
|
||||
@ -93,6 +93,7 @@ buildvariants:
|
||||
build_mongot: true
|
||||
download_mongot_release: true
|
||||
compile_variant: *amazon-linux2023-arm64-static-compile
|
||||
core_analyzer_distro_name: amazon2023-arm64-latest-xlarge
|
||||
evergreen_remote_exec: on
|
||||
skip_debug_link: true
|
||||
remote_link: true
|
||||
|
||||
@ -36,6 +36,9 @@ buildvariants:
|
||||
- name: verify_ci_wrapper_coredump
|
||||
distros:
|
||||
- amazon2023-arm64-latest-m8g-2xlarge
|
||||
- name: verify_resmoke_coredump
|
||||
distros:
|
||||
- amazon2023-arm64-latest-m8g-2xlarge
|
||||
|
||||
# Experimental variant running bazel targets for integration tests. To be removed with SERVER-103537.
|
||||
- name: bazel-integration-tests
|
||||
|
||||
@ -4,9 +4,8 @@
|
||||
# bash fetch_remote_test_results.sh
|
||||
#
|
||||
# Assumes the following files exist:
|
||||
# ./"build_events.json" Build events JSON containing the records of remote test executions
|
||||
# "${workdir}/src/engflow.cert" EngFlow cert
|
||||
# "${workdir}/src/engflow.key" EngFlow key
|
||||
# ./"build_events.json" Build events JSON containing the records of remote test executions
|
||||
# engflow.cert and engflow.key located in either ${workdir}/src or ${HOME}/.engflow/creds
|
||||
#
|
||||
# Required environment variables:
|
||||
# * ${test_label} - The resmoke bazel target to get results for, like //buildscripts/resmokeconfig:core
|
||||
@ -180,15 +179,48 @@ function write_bazel_invocation() {
|
||||
sed "s/\S*\$/${test_label_escaped}/" ${workdir}/resmoke-tests-bazel-invocation.txt | tail -n 1 >"${workdir}/bazel-invocation.txt"
|
||||
}
|
||||
|
||||
# Writes a YAML file indicating that test failures exist.
|
||||
function write_test_failures_expansion() {
|
||||
local output_file="${workdir}/results/test_failures_exist.yml"
|
||||
mkdir -p "$(dirname "$output_file")"
|
||||
echo "test_failures_exist: true" >"$output_file"
|
||||
}
|
||||
|
||||
# Print the contents of all *test.log files.
|
||||
function print_executor_logs() {
|
||||
echo "Executor logs for all failed shards:"
|
||||
find "${workdir}/results" -name '*test.log' -type f -exec cat {} +
|
||||
}
|
||||
|
||||
# Resolves a file path from a list of candidate locations. Returns the first existing file path found.
|
||||
function resolve_file() {
|
||||
local -n paths=$1
|
||||
for path in "${paths[@]}"; do
|
||||
if [ -f "$path" ]; then
|
||||
echo "$path"
|
||||
return 0
|
||||
fi
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
BEP_FILE='build_events.json'
|
||||
ENGFLOW_CERT="${workdir}/src/engflow.cert"
|
||||
ENGFLOW_KEY="${workdir}/src/engflow.key"
|
||||
|
||||
if ! [ -f "$ENGFLOW_CERT" ]; then
|
||||
cert_candidates=(
|
||||
"${workdir}/src/engflow.cert"
|
||||
"${HOME}/.engflow/creds/engflow.crt"
|
||||
)
|
||||
ENGFLOW_CERT=$(resolve_file cert_candidates)
|
||||
fi
|
||||
|
||||
if ! [ -f "$ENGFLOW_KEY" ]; then
|
||||
key_candidates=(
|
||||
"${workdir}/src/engflow.key"
|
||||
"${HOME}/.engflow/creds/engflow.key"
|
||||
)
|
||||
ENGFLOW_KEY=$(resolve_file key_candidates)
|
||||
fi
|
||||
|
||||
if [ ! -f "$BEP_FILE" ]; then
|
||||
echo "Error: File '$BEP_FILE' not found" >&2
|
||||
@ -217,6 +249,7 @@ while IFS= read -r test_result; do
|
||||
if is_failure "$test_result"; then
|
||||
is_failure_flag=1
|
||||
fail_task=1
|
||||
write_test_failures_expansion
|
||||
fi
|
||||
|
||||
download_outputs "$test_result" "$is_failure_flag"
|
||||
@ -239,6 +272,7 @@ if [[ "$failures" == 'No report.json files found' ]]; then
|
||||
if [[ "$fail_task" -eq 1 ]]; then
|
||||
echo 'No report/test logs were found, but the bazel test failed. Check the test executor logs below.'
|
||||
fi
|
||||
write_test_failures_expansion
|
||||
print_executor_logs
|
||||
exit $fail_task
|
||||
else
|
||||
|
||||
53
evergreen/generate_core_analysis_task.sh
Normal file
53
evergreen/generate_core_analysis_task.sh
Normal file
@ -0,0 +1,53 @@
|
||||
# Conditionally runs buildscripts/resmokelib/hang_analyzer/gen_hang_analyzer_tasks.py, if core dumps are present.
|
||||
set -o errexit
|
||||
set -o verbose
|
||||
|
||||
# Check if test failures exist before proceeding. This expansion is created in fetch_remote_test_results.sh.
|
||||
# We should only trigger core analysis if there are test failures.
|
||||
if [ "${test_failures_exist}" != "true" ]; then
|
||||
echo "No test failures detected (test_failures_exist: ${test_failures_exist}). Skipping core analysis task generation."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Check if there are any core dumps present before proceeding. Presence of a core dump here
|
||||
# does not necessarily mean a core analysis task will be generated, just that the python
|
||||
# script will run. It has more conditional logic within it. This check is implemented here
|
||||
# to avoid needing to setup the Python virtual environment for every results tasks.
|
||||
|
||||
# Search for core files in ${workdir}/results/**/test.outputs/ directories
|
||||
results_dir="${workdir}/results"
|
||||
if [ ! -d "$results_dir" ]; then
|
||||
echo "No results directory found at $results_dir. Skipping core analysis task generation."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Look for *.core or *.mdmp files in results/**/test.outputs/ directories
|
||||
core_dumps_found=false
|
||||
while IFS= read -r -d '' test_outputs_dir; do
|
||||
if compgen -G "${test_outputs_dir}/*.core" >/dev/null || compgen -G "${test_outputs_dir}/*.mdmp" >/dev/null; then
|
||||
core_dumps_found=true
|
||||
break
|
||||
fi
|
||||
done < <(find "$results_dir" -type d -name "test.outputs" -print0)
|
||||
|
||||
if [ "$core_dumps_found" = false ]; then
|
||||
echo "No core dumps found in $results_dir. Skipping core analysis task generation."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "Core dumps found. Proceeding with core analysis task generation."
|
||||
|
||||
# Virtual environment setup is performed here, so that results tasks remain fast in the
|
||||
# common case where there are no core dumps.
|
||||
bash "${workdir}/src/evergreen/functions/venv_setup.sh"
|
||||
|
||||
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
|
||||
. "$DIR/prelude.sh"
|
||||
|
||||
bash "${workdir}/src/evergreen/functions/evergreen_api_credentials_configure.sh"
|
||||
|
||||
cd src
|
||||
|
||||
activate_venv
|
||||
echo $python
|
||||
$python buildscripts/resmokelib/hang_analyzer/gen_hang_analyzer_tasks.py "$@"
|
||||
@ -5,6 +5,7 @@ from urllib.request import urlretrieve
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), "."))
|
||||
from download_archive_dist_test_debug import get_task_id
|
||||
|
||||
from buildscripts.resmokelib.hang_analyzer.extractor import find_test_task_with_binaries
|
||||
from buildscripts.resmokelib.utils import evergreen_conn
|
||||
|
||||
|
||||
@ -13,19 +14,7 @@ def main():
|
||||
evg_api = evergreen_conn.get_evergreen_api(evergreen_config=evg_config)
|
||||
task_id = get_task_id(evg_api)
|
||||
|
||||
task = evg_api.task_by_id(task_id)
|
||||
tasks_in_variant = evg_api.tasks_by_build(task.build_id)
|
||||
|
||||
if "_burn_in_" in task.display_name:
|
||||
resmoke_tests_task = list(
|
||||
filter(lambda t: t.display_name.startswith("resmoke_tests_burn_in"), tasks_in_variant)
|
||||
)
|
||||
else:
|
||||
resmoke_tests_task = list(
|
||||
filter(lambda t: t.display_name == "resmoke_tests", tasks_in_variant)
|
||||
)
|
||||
assert len(resmoke_tests_task) == 1, "Could not find a unique resmoke test task"
|
||||
resmoke_tests_task = resmoke_tests_task[0]
|
||||
resmoke_tests_task = find_test_task_with_binaries(evg_api, task_id)
|
||||
|
||||
output_dir = "/data/mci/artifacts-resmoke_tests"
|
||||
os.mkdir(output_dir)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user