mongo/buildscripts/resmokelib/hang_analyzer/dumper.py
Sean Lyons 5ba0504741 SERVER-115245 core-analyzer: match program name from core dumps across new lines (#45514)
GitOrigin-RevId: a2c361260454dcffeb02957b148bb1ce420b47b0
2025-12-23 18:56:54 +00:00

928 lines
34 KiB
Python

"""Tools to dump debug info for each OS."""
import glob
import itertools
import logging
import os
import re
import shutil
import subprocess
import sys
import tempfile
import time
from abc import ABCMeta, abstractmethod
from collections import namedtuple
from datetime import datetime
from distutils import spawn
from io import StringIO
from typing import List, Tuple
from opentelemetry import trace
from opentelemetry.trace.status import StatusCode
from buildscripts.resmokelib import config as resmoke_config
from buildscripts.resmokelib.hang_analyzer.process import call, callo, find_program
from buildscripts.resmokelib.hang_analyzer.process_list import Pinfo
from buildscripts.resmokelib.utils.otel_utils import get_default_current_span
from buildscripts.simple_report import Report, Result
Dumpers = namedtuple("Dumpers", ["dbg", "jstack"])
TRACER = trace.get_tracer("resmoke")
def get_dumpers(root_logger: logging.Logger, dbg_output: str):
"""
Return OS-appropriate dumpers.
:param root_logger: Top-level logger
:param dbg_output: 'stdout' or 'file'
"""
dbg = None
jstack = None
if sys.platform.startswith("linux"):
dbg = GDBDumper(root_logger, dbg_output)
jstack = JstackDumper()
elif sys.platform in ["win32", "cygwin"]:
dbg = WindowsDumper(root_logger, dbg_output)
jstack = JstackWindowsDumper()
elif sys.platform == "darwin":
dbg = LLDBDumper(root_logger, dbg_output)
jstack = JstackDumper()
return Dumpers(dbg=dbg, jstack=jstack)
def find_files(file_name: str, path: str) -> List[str]:
return glob.glob(f"{path}/**/{file_name}", recursive=True)
class Dumper(metaclass=ABCMeta):
"""
Abstract base class for OS-specific dumpers.
:param dbg_output: 'stdout' or 'file'
:param root_logger: Top-level logger
"""
def __init__(self, root_logger: logging.Logger, dbg_output: str):
"""Initialize dumper."""
self._root_logger = root_logger
self._dbg_output = dbg_output
@abstractmethod
def dump_info(
self,
pinfo: Pinfo,
take_dump: bool,
):
"""
Perform dump for a process.
:param pinfo: A Pinfo describing the process
:param take_dump: Whether to take a core dump
"""
raise NotImplementedError("dump_info must be implemented in OS-specific subclasses")
@abstractmethod
def get_dump_ext(self):
"""Return the dump file extension."""
raise NotImplementedError("get_dump_ext must be implemented in OS-specific subclasses")
@abstractmethod
def _find_debugger(self):
"""Find the installed debugger."""
raise NotImplementedError("_find_debugger must be implemented in OS-specific subclasses")
@abstractmethod
def _prefix(self):
"""Return the commands to set up a debugger process."""
raise NotImplementedError("_prefix must be implemented in OS-specific subclasses")
@abstractmethod
def _process_specific(self, pinfo: Pinfo, take_dump: bool, logger: logging.Logger = None):
"""
Return the commands that attach to each process, dump info and detach.
:param pinfo: A Pinfo describing the process
:param take_dump: Whether to take a core dump
:param logger: Logger to output dump info to
"""
raise NotImplementedError("_process_specific must be implemented in OS-specific subclasses")
@abstractmethod
def analyze_cores(self, core_file_dir: str, install_dir: str, analysis_dir: str):
"""
Analyzes the inputted core dumps.
:param core_file_dir: Directory to be scanned for core dumps
:param install_dir: Directory to be scanned for binaries and debugsymbols
"""
raise NotImplementedError("analyze_cores must be implemented in OS-specific subclasses")
@abstractmethod
def _postfix(self):
"""Return the commands to exit the debugger."""
raise NotImplementedError("_postfix must be implemented in OS-specific subclasses")
@abstractmethod
def get_binary_from_core_dump(self, core_file_path):
"""Return the name of the binary that created the input core dump."""
raise NotImplementedError(
"get_binary_from_core_dump must be implemented in OS-specific subclasses"
)
class WindowsDumper(Dumper):
"""WindowsDumper class."""
def _find_debugger(self):
"""Find the installed debugger."""
# We are looking for c:\Program Files (x86)\Windows Kits\8.1\Debuggers\x64
debugger = "cdb.exe"
cdb = spawn.find_executable(debugger)
if cdb is not None:
return cdb
from win32com.shell import shell, shellcon
# Cygwin via sshd does not expose the normal environment variables
# Use the shell api to get the variable instead
root_dir = shell.SHGetFolderPath(0, shellcon.CSIDL_PROGRAM_FILESX86, None, 0)
# Construct the debugger search paths in most-recent order
debugger_paths = [os.path.join(root_dir, "Windows Kits", "10", "Debuggers", "x64")]
for idx in reversed(range(0, 2)):
debugger_paths.append(
os.path.join(root_dir, "Windows Kits", "8." + str(idx), "Debuggers", "x64")
)
for dbg_path in debugger_paths:
self._root_logger.info("Checking for debugger in %s", dbg_path)
cdb = os.path.join(dbg_path, debugger)
if os.path.exists(cdb):
return cdb
return None
def _prefix(self):
"""Return the commands to set up a debugger process."""
cmds = [
".symfix", # Fixup symbol path
"!sym noisy", # Enable noisy symbol loading
".symopt +0x10", # Enable line loading (off by default in CDB, on by default in WinDBG)
".reload", # Reload symbols
]
return cmds
def _process_specific(self, pinfo, take_dump, logger=None):
"""Return the commands that attach to each process, dump info and detach."""
assert isinstance(pinfo.pidv, int)
if take_dump:
# Dump to file, dump_<process name>.<pid>.mdmp
dump_file = "dump_%s.%d.%s" % (
os.path.splitext(pinfo.name)[0],
pinfo.pidv,
self.get_dump_ext(),
)
dump_command = ".dump /ma %s" % dump_file
self._root_logger.info("Dumping core to %s", dump_file)
cmds = [
dump_command,
".detach", # Detach
]
else:
cmds = [
"!peb", # Dump current exe, & environment variables
"lm", # Dump loaded modules
"!uniqstack -pn", # Dump All unique Threads with function arguments
"!cs -l", # Dump all locked critical sections
".detach", # Detach
]
return cmds
def _postfix(self):
"""Return the commands to exit the debugger."""
cmds = [
"q" # Quit
]
return cmds
def dump_info(self, pinfo, take_dump):
"""Dump useful information to the console."""
dbg = self._find_debugger()
if dbg is None:
self._root_logger.warning("Debugger not found, skipping dumping of %s", str(pinfo.pidv))
return
self._root_logger.info(
"Debugger %s, analyzing %s processes with PIDs %s", dbg, pinfo.name, str(pinfo.pidv)
)
for pid in pinfo.pidv:
logger = _get_process_logger(self._dbg_output, pinfo.name, pid=pid)
process = Pinfo(name=pinfo.name, pidv=pid)
cmds = self._prefix() + self._process_specific(process, take_dump) + self._postfix()
call([dbg, "-c", ";".join(cmds), "-p", str(pid)], logger)
self._root_logger.info("Done analyzing %s process with PID %d", pinfo.name, pid)
def analyze_cores(self, core_file_dir: str, install_dir: str, analysis_dir: str):
install_dir = os.path.abspath(install_dir)
core_files = find_files(f"*.{self.get_dump_ext()}", core_file_dir)
if not core_files:
raise RuntimeError(f"No core dumps found in {core_file_dir}")
for filename in core_files:
file_path = os.path.abspath(filename)
try:
self.analyze_core(file_path, install_dir)
except Exception as e:
self._root_logger.error("Failed to process core dump: %s", filename)
self._root_logger.error(e)
def analyze_core(self, core_file_path: str, install_dir: str):
filename = os.path.basename(core_file_path)
regex = re.search(rf"dump_(.+)\.([0-9]+)\.{self.get_dump_ext()}", filename)
if not regex:
self._root_logger.warning(
"Core dump file name does not match expected pattern, skipping %s", filename
)
return
binary_name = f"{regex.group(1)}.exe"
pid = int(regex.group(2))
binary_files = find_files(binary_name, install_dir)
logger = _get_process_logger(self._dbg_output, binary_name, pid)
logger.info("analyzing %s", filename)
if not binary_files:
logger.warn("Binary %s not found, cannot process %s", binary_name, filename)
return
if len(binary_files) > 1:
logger.error("More than one file found in %s matching %s", install_dir, binary_name)
raise RuntimeError(f"More than one file found in {install_dir} matching {binary_name}")
binary_path = binary_files[0]
symbol_path = binary_path.replace(".exe", ".pdb")
dbg = self._find_debugger()
if dbg is None:
self._root_logger.warning("Debugger not found, skipping dumping of %s", filename)
return
cmds = (
self._prefix()
+ [
"!peb", # Dump current exe, & environment variables
"lm", # Dump loaded modules
"!uniqstack -pn", # Dump All unique Threads with function arguments
"!cs -l", # Dump all locked critical sections
]
+ self._postfix()
)
call(
[dbg, "-i", binary_path, "-z", core_file_path, "-y", symbol_path, "-v", ";".join(cmds)],
logger,
)
def get_dump_ext(self):
"""Return the dump file extension."""
return "mdmp"
def get_binary_from_core_dump(self, core_file_path):
raise NotImplementedError("get_binary_from_core_dump is not implemented on windows")
# LLDB dumper is for MacOS X
class LLDBDumper(Dumper):
"""LLDBDumper class."""
@staticmethod
def _find_debugger():
"""Find the installed debugger."""
debugger = "lldb"
return find_program(debugger, ["/usr/bin"])
def _prefix(self):
pass
def _process_specific(self, pinfo, take_dump, logger=None):
"""Return the commands that attach to each process, dump info and detach."""
cmds = []
if take_dump:
dump_files = self._dump_files(pinfo)
for pid in pinfo.pidv:
# Dump to file, dump_<process name>.<pid>.core
dump_file = dump_files[pid]
dump_command = "process save-core %s" % dump_file
self._root_logger.info("Dumping core to %s", dump_file)
cmds += [
"platform shell kill -CONT %d" % pid,
"attach -p %d" % pid,
dump_command,
"process detach",
"platform shell kill -STOP %d" % pid,
]
else:
for pid in pinfo.pidv:
cmds += [
"platform shell kill -CONT %d" % pid,
"attach -p %d" % pid,
"target modules list",
"thread backtrace all",
"process detach",
"platform shell kill -STOP %d" % pid,
]
return cmds
def _postfix(self):
"""Return the commands to exit the debugger."""
cmds = [
"settings set interpreter.prompt-on-quit false",
"quit",
]
return cmds
def dump_info(self, pinfo, take_dump):
"""Dump info."""
dbg = self._find_debugger()
logger = _get_process_logger(self._dbg_output, pinfo.name)
if dbg is None:
self._root_logger.warning("Debugger not found, skipping dumping of %s", str(pinfo.pidv))
return
self._root_logger.info(
"Debugger %s, analyzing %s processes with PIDs %s", dbg, pinfo.name, str(pinfo.pidv)
)
lldb_version = callo([dbg, "--version"], logger)
logger.info(lldb_version)
# Do we have the XCode or LLVM version of lldb?
# Old versions of lldb do not work well when taking commands via a file
# XCode (7.2): lldb-340.4.119
# LLVM - lldb version 3.7.0 ( revision )
if "version" not in lldb_version:
# We have XCode's lldb
lldb_version = lldb_version[lldb_version.index("lldb-") :]
lldb_version = lldb_version.replace("lldb-", "")
lldb_major_version = int(lldb_version[: lldb_version.index(".")])
if lldb_major_version < 340:
logger.warning("Debugger lldb is too old, please upgrade to XCode 7.2")
return
cmds = self._process_specific(pinfo, take_dump) + self._postfix()
tf = tempfile.NamedTemporaryFile(mode="w", encoding="utf-8")
for cmd in cmds:
tf.write(cmd + "\n")
tf.flush()
# Works on in MacOS 10.9 & later
# call([dbg] + list( itertools.chain.from_iterable([['-o', b] for b in cmds])), logger)
call(["cat", tf.name], logger)
call([dbg, "--source", tf.name], logger)
self._root_logger.info(
"Done analyzing %s processes with PIDs %s", pinfo.name, str(pinfo.pidv)
)
if take_dump:
need_sigabrt = {}
files = self._dump_files(pinfo)
for pid in files:
if not os.path.exists(files[pid]):
need_sigabrt[pid] = files[pid]
if need_sigabrt:
raise DumpError(need_sigabrt)
def analyze_cores(self, core_file_dir: str, install_dir: str, analysis_dir: str):
raise NotImplementedError("analyze_cores is not implemented on macos")
def get_dump_ext(self):
"""Return the dump file extension."""
return "core"
def _dump_files(self, pinfo):
"""Return a dict mapping pids to core dump filenames that this dumper can create."""
files = {}
for pid in pinfo.pidv:
files[pid] = "dump_%s.%d.%s" % (pinfo.name, pid, self.get_dump_ext())
return files
def get_binary_from_core_dump(self, core_file_path):
raise NotImplementedError("get_binary_from_core_dump is not implemented on macos")
# GDB dumper is for Linux
class GDBDumper(Dumper):
"""GDBDumper class."""
def __init__(
self, root_logger: logging.Logger, dbg_output: str, timeout_seconds_for_gdb_process=720
):
"""Initialize GDBDumper."""
if resmoke_config.EVERGREEN_TASK_ID is None:
# Set 24 hours time out for hang analyzer being run in locally
timeout_seconds_for_gdb_process = 86400
# Timeout for hang analyzer, default timeout is 12mins(out of total 15mins) in Evergreen
self._timeout_seconds_for_gdb_process = timeout_seconds_for_gdb_process
super().__init__(root_logger, dbg_output)
def get_timeout_secs(self):
"""Returns the remaining time left before we timeout."""
return self._timeout_seconds_for_gdb_process
def _reduce_timeout_for_gdb_process(self, timeout_period: int):
"""Reduce timeout for remaining gdb processes."""
self._timeout_seconds_for_gdb_process -= timeout_period
def _find_debugger(self):
"""Find the installed debugger."""
debugger = "gdb"
return find_program(
debugger, ["/opt/mongodbtoolchain/v5/bin", "/opt/mongodbtoolchain/v4/bin", "/usr/bin"]
)
def _prefix(self):
"""Return the commands to set up a debugger process."""
add_venv_sys_path = (
f"py sys.path.extend({sys.path})" # Makes venv packages available in GDB
)
cmds = [
"set interactive-mode off",
"set print thread-events off", # Suppress GDB messages of threads starting/finishing.
add_venv_sys_path,
"source .gdbinit",
]
return cmds
def _process_specific(self, pinfo, take_dump, logger=None):
"""Return the commands that attach to each process, dump info and detach."""
cmds = []
if take_dump:
for pid in pinfo.pidv:
# Dump to file, dump_<process name>.<pid>.core
dump_file = "dump_%s.%d.%s" % (pinfo.name, pid, self.get_dump_ext())
dump_command = "gcore %s" % dump_file
self._root_logger.info("Dumping core to %s", dump_file)
cmds += [
"attach %d" % pid,
"handle SIGSTOP ignore noprint",
# Lock the scheduler, before running commands, which execute code in the attached process.
"set scheduler-locking on",
dump_command,
"detach",
]
else:
mongodb_dump_locks = "mongodb-dump-locks"
mongodb_show_locks = "mongodb-show-locks"
mongodb_uniqstack = "mongodb-uniqstack mongodb-bt-if-active"
mongodb_javascript_stack = "mongodb-javascript-stack"
mongod_dump_sessions = "mongod-dump-sessions"
mongodb_dump_recovery_units = "mongodb-dump-recovery-units"
mongodb_dump_storage_engine_info = "mongodb-dump-storage-engine-info"
for pid in pinfo.pidv:
filename = "debugger_%s_%d.log" % (os.path.splitext(pinfo.name)[0], pid)
if logger:
base, ext = os.path.splitext(filename)
set_logging_on_commands = [
"set logging file %s_%d%s" % (base, pid, ext),
"set logging on",
]
set_logging_off_commands = ["set logging off"]
raw_stacks_filename = "%s_%d_raw_stacks%s" % (base, pid, ext)
raw_stacks_commands = [
"echo \\nWriting raw stacks to %s.\\n" % raw_stacks_filename,
# This sends output to log file rather than stdout until we turn logging off.
"set logging redirect on",
"set logging file " + raw_stacks_filename,
"set logging on",
"thread apply all bt",
"set logging off",
"set logging redirect off",
]
else:
set_logging_on_commands = []
set_logging_off_commands = []
raw_stacks_commands = []
mongodb_waitsfor_graph = "mongodb-waitsfor-graph debugger_waitsfor_%s_%d.gv" % (
pinfo.name,
pid,
)
cmds += (
set_logging_on_commands
+ [
"attach %d" % pid,
"handle SIGSTOP ignore noprint",
"info sharedlibrary",
"info threads", # Dump a simple list of commands to get the thread name
]
+ set_logging_off_commands
+ raw_stacks_commands
+ set_logging_on_commands
+ [
mongodb_uniqstack,
# Lock the scheduler, before running commands, which execute code in the attached process.
"set scheduler-locking on",
mongodb_dump_locks,
mongodb_show_locks,
mongodb_waitsfor_graph,
mongodb_javascript_stack,
mongod_dump_sessions,
mongodb_dump_recovery_units,
mongodb_dump_storage_engine_info,
"detach",
]
+ set_logging_off_commands
)
return cmds
def _postfix(self):
"""Return the commands to exit the debugger."""
cmds = ["set confirm off", "quit"]
return cmds
def dump_info(self, pinfo, take_dump):
"""Dump info."""
dbg = self._find_debugger()
logger = self._root_logger
_start_time = datetime.now()
if dbg is None:
self._root_logger.warning("Debugger not found, skipping dumping of %s", str(pinfo.pidv))
return
if self._timeout_seconds_for_gdb_process <= 0:
self._root_logger.warning(
"Skipping dumping of %s processes with PIDs %s because the time limit expired",
pinfo.name,
str(pinfo.pidv),
)
return
self._root_logger.info(
"Debugger %s, analyzing %s processes with PIDs %s", dbg, pinfo.name, str(pinfo.pidv)
)
call([dbg, "--version"], logger)
cmds = self._prefix() + self._process_specific(pinfo, take_dump, logger) + self._postfix()
# gcore is both a command within GDB and a script packaged alongside gdb. The gcore script
# invokes the gdb binary with --readnever to avoid spending time loading the debug symbols
# prior to taking the core dump. The debug symbols are unneeded to generate the core dump.
#
# For reference:
# https://sourceware.org/git/?p=binutils-gdb.git;a=blob;f=gdb/gcore.in;h=34860de630cf0ee766e102eb82f7a3fddba6b368#l101
skip_reading_symbols_on_take_dump = ["--readnever"] if take_dump else []
# Live process debugging is not done in the core analyzer subcommand and is no longer
# supported in the hang analyzer. The only gdb commands we run here are to take core
# dumps of the running processes.
if take_dump:
call(
[dbg, "--quiet", "--nx"]
+ skip_reading_symbols_on_take_dump
+ list(itertools.chain.from_iterable([["-ex", b] for b in cmds])),
logger,
self._timeout_seconds_for_gdb_process,
pinfo,
)
time_period = (datetime.now() - _start_time).total_seconds()
self._reduce_timeout_for_gdb_process(time_period)
self._root_logger.info(
"Done analyzing %s processes with PIDs %s", pinfo.name, str(pinfo.pidv)
)
@TRACER.start_as_current_span("core_analyzer.analyze_cores")
def analyze_cores(
self,
core_file_dir: str,
install_dir: str,
analysis_dir: str,
multiversion_dir: str,
gdb_index_cache: str,
) -> Report:
core_files = find_files(f"*.{self.get_dump_ext()}", core_file_dir)
analyze_cores_span = get_default_current_span()
if not core_files:
analyze_cores_span.set_status(StatusCode.ERROR, "No core dumps found")
analyze_cores_span.set_attribute(
"analyze_cores_error", f"No core dumps found in {core_file_dir}"
)
raise RuntimeError(f"No core dumps found in {core_file_dir}")
tmp_dir = os.path.join(analysis_dir, "tmp")
os.makedirs(tmp_dir, exist_ok=True)
report = Report({"failures": 0, "results": []})
for filename in core_files:
core_file_path = os.path.abspath(filename)
basename = os.path.basename(filename)
self._root_logger.info("Starting analysis of %s", basename)
log_stream = StringIO()
logger = logging.Logger(basename, level=logging.DEBUG)
handler = logging.StreamHandler(log_stream)
handler.setFormatter(logging.Formatter(fmt="%(message)s"))
logger.addHandler(handler)
with TRACER.start_as_current_span("core_analyzer.analyze_core") as analyze_core_span:
start_time = time.time()
analyze_core_span.set_status(StatusCode.OK)
try:
exit_code, status = self.analyze_core(
core_file_path=core_file_path,
install_dir=install_dir,
analysis_dir=analysis_dir,
tmp_dir=tmp_dir,
logger=logger,
multiversion_dir=multiversion_dir,
gdb_index_cache=gdb_index_cache,
)
except Exception:
logger.exception("Exception occured while analyzing core")
exit_code = 1
status = "fail"
analyze_core_span.set_attributes(
{
"analyze_core_status": status,
"core_file": core_file_path,
}
)
if status == "fail":
analyze_core_span.set_status(
StatusCode.ERROR, description="Failed to analyze core dump."
)
end_time = time.time()
output = log_stream.getvalue()
result = Result(
Result(
{
"status": status,
"exit_code": exit_code,
"test_file": basename,
"log_raw": output,
"start": start_time,
"end": end_time,
"elapsed": end_time - start_time,
}
)
)
if exit_code == 1:
report["failures"] += 1
report["results"].append(result)
self._root_logger.info("Analysis of %s ended with status %s", basename, status)
analyze_cores_span.set_attributes(
{"failures": report["failures"], "core_dump_count": len(core_files)}
)
shutil.rmtree(tmp_dir)
return report
def analyze_core(
self,
core_file_path: str,
install_dir: str,
analysis_dir: str,
tmp_dir: str,
multiversion_dir: str,
logger: logging.Logger,
gdb_index_cache: str,
) -> Tuple[int, str]: # returns (exit_code, test_status)
cmds = []
dbg = self._find_debugger()
basename = os.path.basename(core_file_path)
if dbg is None:
self._root_logger.error("Debugger not found, skipping dumping of %s", basename)
return 1, "fail"
# ensure debugger version is loggged
call([dbg, "--version"], logger)
lib_dir = None
binary_name, bin_version = self.get_binary_from_core_dump(core_file_path)
search_dir = multiversion_dir if bin_version else install_dir
binary_files = find_files(binary_name, search_dir)
if not binary_files:
# This can sometimes happen because coredumps can appear from non-mongo processes
logger.warn("Binary %s not found, cannot process %s", binary_name, basename)
return 0, "skip"
if len(binary_files) > 1:
logger.error("More than one file found in %s matching %s", install_dir, binary_name)
return 1, "fail"
binary_path = os.path.realpath(os.path.abspath(binary_files[0]))
lib_dir = os.path.abspath(os.path.join(os.path.dirname(binary_files[0]), "..", "lib"))
basename = os.path.basename(core_file_path)
logging_dir = os.path.join(analysis_dir, basename)
os.makedirs(logging_dir, exist_ok=True)
cmds += [
f"set solib-search-path {lib_dir}",
f"set index-cache directory {tmp_dir}",
f"set index-cache enabled {gdb_index_cache}",
f"file {binary_path}",
]
# We run gdb once to populate the gdb index cache because gdb can sometimes
# behave weirdly and take a long time when pulling from the index cache
# in the same invocation it creates the index cache.
gdb_index_cmds = self._prefix() + cmds + self._postfix()
args = [dbg, "--nx"] + list(
itertools.chain.from_iterable([["-ex", b] for b in gdb_index_cmds])
)
exit_code = call(args, logger, check=False)
if exit_code != 0:
raise Exception("Bad exit code %d from %s" % (exit_code, " ".join(args)))
cmds += [
f"core-file {core_file_path}",
"python import gdbmongo",
"python gdbmongo.register_printers()",
"set width 0",
]
def add_commands(command: str, name: str):
file_path = os.path.join(logging_dir, f"{basename}.{name}.txt")
cmds.extend(
[
f"echo \\nWriting {name} to {file_path}.\\n",
f"set logging file {file_path}",
"set logging enabled on",
command,
"set logging enabled off",
]
)
add_commands("info threads", "info_threads")
add_commands("thread apply all bt", "backtraces")
add_commands("mongodb-uniqstack mongodb-bt-if-active", "uniqstack")
add_commands("mongodb-show-locks", "show_locks")
add_commands("mongod-dump-sessions", "dump_sessions")
add_commands("mongodb-dump-recovery-units", "dump_recovery_units")
# depends on gdbmongo python dependency
add_commands("python print(gdbmongo.LockManagerPrinter.from_global().val)", "dump_locks")
cmds = self._prefix() + cmds + self._postfix()
args = [dbg, "--nx"] + list(itertools.chain.from_iterable([["-ex", b] for b in cmds]))
exit_code = call(args, logger, check=False)
current_span = trace.get_current_span()
current_span.set_attribute("gdb_exit_code", exit_code)
if exit_code != 0:
raise Exception("Bad exit code %d from %s" % (exit_code, " ".join(args)))
return 0, "pass"
def get_dump_ext(self):
"""Return the dump file extension."""
return "core"
def get_binary_from_core_dump(self, core_file_path):
dbg = self._find_debugger()
if dbg is None:
raise RuntimeError("Debugger not found, can't run get_binary_from_core_dump")
process = subprocess.run(
[dbg, "-batch", "--quiet", "-ex", f"core {core_file_path}"],
check=True,
capture_output=True,
text=True,
)
regex = re.search("Core was generated by `(.*)'.", process.stdout, re.DOTALL)
if not regex:
raise RuntimeError("gdb output did not match pattern, could not find binary name")
binary_path = regex.group(1)
binary_name = binary_path.split(" ")[0]
binary_name = binary_name.split("/")[-1]
regex = re.match(r"[a-zA-Z]+-([0-9]{1,2}\.[0-9])", binary_name)
bin_version = None
if regex:
bin_version = regex.group(1)
return binary_name, bin_version
@staticmethod
def _find_gcore():
"""Find the installed gcore."""
dbg = "/usr/bin/gcore"
if os.path.exists(dbg):
return dbg
return None
# jstack is a JDK utility
class JstackDumper(object):
"""JstackDumper class."""
@staticmethod
def _find_debugger():
"""Find the installed jstack debugger."""
debugger = "jstack"
return find_program(debugger, ["/usr/bin"])
def dump_info(self, root_logger, dbg_output, pid, process_name):
"""Dump java thread stack traces to the console."""
jstack = self._find_debugger()
logger = _get_process_logger(dbg_output, process_name, pid=pid)
if jstack is None:
logger.warning("Debugger not found, skipping dumping of %d", pid)
return
root_logger.info("Debugger %s, analyzing %s process with PID %d", jstack, process_name, pid)
call([jstack, "-l", str(pid)], logger)
root_logger.info("Done analyzing %s process with PID %d", process_name, pid)
# jstack is a JDK utility
class JstackWindowsDumper(object):
"""JstackWindowsDumper class."""
@staticmethod
def dump_info(root_logger, pid):
"""Dump java thread stack traces to the logger."""
root_logger.warning("Debugger jstack not supported, skipping dumping of %d", pid)
def _get_process_logger(dbg_output, pname: str, pid: int = None):
"""Return the process logger from options specified."""
process_logger = logging.Logger("process", level=logging.DEBUG)
process_logger.mongo_process_filename = None
if "stdout" in dbg_output:
s_handler = logging.StreamHandler(sys.stdout)
s_handler.setFormatter(logging.Formatter(fmt="%(message)s"))
process_logger.addHandler(s_handler)
if "file" in dbg_output:
if pid:
filename = "debugger_%s_%d.log" % (os.path.splitext(pname)[0], pid)
else:
filename = "debugger_%s.log" % (os.path.splitext(pname)[0])
process_logger.mongo_process_filename = filename
f_handler = logging.FileHandler(filename=filename, mode="w")
f_handler.setFormatter(logging.Formatter(fmt="%(message)s"))
process_logger.addHandler(f_handler)
return process_logger
class DumpError(Exception):
"""
Exception raised for errors while dumping processes.
Tracks what cores still need to be generated.
"""
def __init__(
self,
dump_pids,
message=(
"Failed to create core dumps for some processes,"
" SIGABRT will be sent as a fallback if -k is set."
),
):
"""Initialize error."""
self.dump_pids = dump_pids
self.message = message
super().__init__(self.message)