mongo/buildscripts/gdb/mongo_utils.py
Ryan Berryhill 49eead22a2 SERVER-125403 Move shared gdb pretty printer utilities into their own module (#53137)
GitOrigin-RevId: b56da6d78c10eec9844f03d9eb9f75473b616d3b
2026-05-07 14:30:09 +00:00

245 lines
9.5 KiB
Python

"""Shared utilities used by the MongoDB GDB scripts.
This module is imported (not sourced) by other scripts in this directory. It
must remain free of module-level side effects so it can be safely loaded as a
module while those scripts are sourced by .gdbinit.
"""
import re
import sys
import gdb
MAIN_GLOBAL_BLOCK = None
def lookup_type(gdb_type_str: str) -> gdb.Type:
"""
Try to find the type object from string.
GDB says it searches the global blocks, however this appear not to be the
case or at least it doesn't search all global blocks, sometimes it required
to get the global block based off the current frame.
"""
global MAIN_GLOBAL_BLOCK
exceptions = []
try:
return gdb.lookup_type(gdb_type_str)
except Exception as exc:
exceptions.append(exc)
if MAIN_GLOBAL_BLOCK is None:
MAIN_GLOBAL_BLOCK = gdb.lookup_symbol("main")[0].symtab.global_block()
try:
return gdb.lookup_type(gdb_type_str, MAIN_GLOBAL_BLOCK)
except Exception as exc:
exceptions.append(exc)
raise gdb.error("Failed to get type, tried:\n%s" % "\n".join([str(exc) for exc in exceptions]))
def get_thread_id():
"""Return the thread_id of the current GDB thread."""
# GDB thread example:
# RHEL
# [Current thread is 1 (Thread 0x7f072426cca0 (LWP 12867))]
thread_info = gdb.execute("thread", from_tty=False, to_string=True)
if sys.platform.startswith("linux"):
match = re.search(r"Thread (?P<pthread_id>0x[0-9a-f]+)", thread_info)
if match:
return int(match.group("pthread_id"), 16)
elif sys.platform.startswith("sunos"):
match = re.search(r"Thread (?P<pthread_id>[0-9]+)", thread_info)
if match:
return int(match.group("pthread_id"), 10)
lwpid = gdb.selected_thread().ptid[1]
if lwpid != 0:
return lwpid
raise ValueError("Failed to find thread id in {}".format(thread_info))
def get_current_thread_name():
"""Return the name of the current GDB thread."""
fallback_name = '"%s"' % (gdb.selected_thread().name or "")
try:
# This goes through the pretty printer for StringData which adds "" around the name.
name = str(gdb.parse_and_eval("mongo::getThreadName()"))
if name == '""':
return fallback_name
return name
except gdb.error:
return fallback_name
class RegisterMongoCommand(object):
"""Class to register mongo commands with GDB."""
_MONGO_COMMANDS = {} # type: ignore
@classmethod
def register(cls, obj, name, command_class):
"""Register a command with no completer as a mongo command."""
gdb.Command.__init__(obj, name, command_class)
cls._MONGO_COMMANDS[name] = obj.__doc__
@classmethod
def print_commands(cls):
"""Print the registered mongo commands."""
print("Command - Description")
for key in cls._MONGO_COMMANDS:
print("%s - %s" % (key, cls._MONGO_COMMANDS[key]))
def get_bytes(obj):
"""
Returns a gdb.Value where its type resolves to `unsigned char*`. The caller must take care to
cast the returned value themselves. This function is particularly useful in the context of
mongo::Decorable<> types which store the decorations as a slab of memory with unsigned char*.
"""
return obj.cast(gdb.lookup_type("unsigned char").pointer())
def get_unique_ptr_bytes(obj):
"""Read the value of a libstdc++ std::unique_ptr."""
return obj.cast(gdb.lookup_type("std::_Head_base<0, unsigned char*, false>"))["_M_head_impl"]
def get_unique_ptr(obj):
"""Read the value of a libstdc++ std::unique_ptr."""
return get_unique_ptr_bytes(obj).cast(obj.type.template_argument(0).pointer())
def _cast_decoration_value(type_name: str, decoration_address: int, /) -> gdb.Value:
# We cannot use gdb.lookup_type() when the decoration type is a pointer type, e.g.
# ServiceContext::declareDecoration<VectorClock*>(). gdb.parse_and_eval() is one of the few
# ways to convert a type expression into a gdb.Type value. Some care is taken to quote the
# non-pointer portion of the type so resolution for a type defined within an anonymous
# namespace works correctly.
type_name_regex = re.compile(r"^(.*[\w>])([\s\*]*)$")
escaped = type_name_regex.sub(r"'\1'\2*", type_name)
return gdb.parse_and_eval(f"({escaped}) {decoration_address}").dereference()
def get_object_decoration(decorable, start, index):
decoration_data = get_bytes(decorable["_decorations"]["_data"])
entry = start[index]
deco_type_info = str(entry["typeInfo"])
deco_type_name = re.sub(r".* <typeinfo for (.*)>", r"\1", deco_type_info)
offset = int(entry["offset"])
obj = decoration_data[offset]
obj_addr = re.sub(r"^(.*) .*", r"\1", str(obj.address))
obj = _cast_decoration_value(deco_type_name, int(obj.address))
return (deco_type_name, obj, obj_addr)
def get_decorable_info(decorable):
decorable_t = decorable.type.template_argument(0).name
reg_sym, _ = gdb.lookup_symbol("mongo::decorable_detail::gdbRegistry<{}>".format(decorable_t))
decl_vector = reg_sym.value()["_entries"]
start = decl_vector["_M_impl"]["_M_start"]
finish = decl_vector["_M_impl"]["_M_finish"]
decinfo_t = lookup_type("mongo::decorable_detail::Registry::Entry")
count = int((int(finish) - int(start)) / decinfo_t.sizeof)
return start, count
def get_boost_optional(optional):
"""
Retrieve the value stored in a boost::optional type, if it is non-empty.
Returns None if the optional is empty.
TODO: Import the boost pretty printers instead of using this custom function.
"""
if not optional["m_initialized"]:
return None
value_ref_type = optional.type.template_argument(0).pointer()
# boost::optional<T> is either stored using boost::optional_detail::aligned_storage<T> or
# using direct storage of `T`. Scalar types are able to take advantage of direct storage.
#
# https://www.boost.org/doc/libs/1_79_0/libs/optional/doc/html/boost_optional/tutorial/performance_considerations.html
if optional["m_storage"].type.strip_typedefs().pointer() == value_ref_type:
return optional["m_storage"]
storage = optional["m_storage"]["dummy_"]["data"]
return storage.cast(value_ref_type).dereference()
# Cache for types found via find_type_from_info_types as they can be expensive to look up.
_type_cache: dict[str, gdb.Type] = {}
# Helper to find the gdb.Type of the given symbol given by a regex.
# This is useful when compilers disagree about the spelling of a symbol/template instantiation.
# Uses `info types <regex>` gdb command to find the type, parses the output and then looks up the type.
def find_type_from_info_types(regex):
if regex in _type_cache:
return _type_cache[regex]
output = gdb.execute(f"info types {regex}", to_string=True)
# Example output:
# All types matching regular expression "absl::lts_.*::container_internal::internal_compressed_tuple::Storage<absl::lts_.*::container_internal::CommonFields, 0.*, false>":
# File src/third_party/abseil-cpp/dist/absl/container/internal/compressed_tuple.h:
# 85: absl::lts_20250512::container_internal::internal_compressed_tuple::Storage<absl::lts_20250512::container_internal::CommonFields, 0, false>;
# Regex looking for: number + colon + whitespace + capture group + semicolon
type_pattern = re.compile(r"^\s*\d+:\s+(.*?);$", re.MULTILINE)
match = re.search(type_pattern, output)
if match:
type_str = match.group(1)
res = gdb.lookup_type(type_str)
_type_cache[regex] = res
return res
raise RuntimeError(f"No types found for regex: {regex}")
def absl_get_settings(val):
"""Gets the settings_ field for abseil (flat/node)_hash_(map/set)."""
# Find the type of the CompressedTuple Storage template.
# Abseil uses an inline namespace for versioning, so it may contain '::lts_20250512' in the middle of the symbol name.
# Clang and GCC may mangle the templates differently for the 0 size_t parameter, so we use '0.*' to match both '0' and '0ul'.
common_fields_storage_type = find_type_from_info_types(
"absl.*::container_internal::internal_compressed_tuple::Storage<absl.*::container_internal::CommonFields, 0.*, false>",
)
# The Hash, Eq, or Alloc functors may not be zero-sized objects.
# mongo::LogicalSessionIdHash is one such example. An explicit cast is needed to
# disambiguate which `value` member variable of the CompressedTuple is to be accessed.
return val["settings_"].cast(common_fields_storage_type)["value"]
def absl_container_size(settings):
return settings["size_"]["data_"] >> 17
def absl_get_nodes(val):
"""Return a generator of every node in absl::container_internal::raw_hash_set and derived classes."""
settings = absl_get_settings(val)
size = absl_container_size(settings)
if size == 0:
return
capacity = int(settings["capacity_"])
heap = settings["heap_or_soo_"]["heap"]
ctrl = heap["control"]
# Derive the underlying type stored in the container.
slot_type = lookup_type(str(val.type.strip_typedefs().name) + "::slot_type").strip_typedefs()
# Using the array of ctrl bytes, search for in-use slots and return them
# https://github.com/abseil/abseil-cpp/blob/8a3caf7dea955b513a6c1b572a2423c6b4213402/absl/container/internal/raw_hash_set.h#L2108-L2113
for item in range(capacity):
ctrl_t = int(ctrl[item])
if ctrl_t >= 0:
yield heap["slot_array"]["p"].cast(slot_type.pointer())[item]