SERVER-127547: Fix resmoke e2e flake (#54391)

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
GitOrigin-RevId: 85fb3a165f9f6fc0d3ae8517c260fd238281d43b
This commit is contained in:
Steve McClure 2026-05-26 14:50:56 -04:00 committed by MongoDB Bot
parent 0966c1e4b1
commit d584641e47
2 changed files with 16 additions and 12 deletions

View File

@ -30,9 +30,13 @@ def register(logger, suites, start_time):
log suite summaries.
"""
# Snapshot subprocesses before setting HANG_ANALYZER_CALLED. Other threads observe
# that flag and may tear down fixtures (killing mongod/mongo processes). Capturing
# here ensures we see the full process set before any teardown races can occur.
pids_to_analyze = _get_pids() if "is_inner_level" not in config.INTERNAL_PARAMS else []
HANG_ANALYZER_CALLED.set()
header_msg = "Dumping stacks due to SIGUSR1 signal"
_dump_and_log(header_msg)
_dump_and_log(header_msg, pids_to_analyze)
def _handle_set_event(event_handle):
"""Event object handler for Windows.
@ -51,25 +55,24 @@ def register(logger, suites, start_time):
except win32event.error as err:
logger.error("Exception from win32event.WaitForSingleObject with error: %s" % err)
else:
# Snapshot subprocesses before setting HANG_ANALYZER_CALLED for the same
# reason as in _handle_sigusr1.
pids_to_analyze = (
_get_pids() if "is_inner_level" not in config.INTERNAL_PARAMS else []
)
HANG_ANALYZER_CALLED.set()
header_msg = "Dumping stacks due to signal from win32event.SetEvent"
_dump_and_log(header_msg)
_dump_and_log(header_msg, pids_to_analyze)
def _dump_and_log(header_msg):
def _dump_and_log(header_msg, pids_to_analyze):
"""Dump the stacks of all threads, write report file, and log suite summaries."""
_dump_stacks(logger, header_msg)
reportfile.write(suites)
testing.suite.Suite.log_summaries(logger, suites, time.time() - start_time)
if "is_inner_level" not in config.INTERNAL_PARAMS:
# Gather and analyze pids of all subprocesses.
# Do nothing for child resmoke process started by another resmoke process
# (e.g. backup_restore.js) The child processes of the child resmoke will be
# analyzed by the signal handler of the top-level resmoke process.
# i.e. the next few lines of code.
pids_to_analyze = _get_pids()
if pids_to_analyze:
_analyze_pids(logger, pids_to_analyze)
# On Windows spawn a thread to wait on an event object for signal to dump stacks. For Cygwin

View File

@ -11,8 +11,9 @@ def test_analysis(logger: logging.Logger, pids: list[int]) -> None:
Write the pids out to a file and kill them instead of running analysis.
This option will only be specified in resmoke selftests.
"""
with open(os.path.join(config.DBPATH_PREFIX, "test_analysis.txt"), "w") as analysis_file:
analysis_file.write("\n".join([str(pid) for pid in pids]))
with open(os.path.join(config.DBPATH_PREFIX, "test_analysis.txt"), "a") as analysis_file:
if pids:
analysis_file.write("\n".join(str(pid) for pid in pids) + "\n")
for pid in pids:
try:
proc = psutil.Process(pid)