SERVER-125403 Move shared gdb pretty printer utilities into their own module (#53137)

GitOrigin-RevId: b56da6d78c10eec9844f03d9eb9f75473b616d3b
This commit is contained in:
Ryan Berryhill 2026-05-07 09:58:08 -04:00 committed by MongoDB Bot
parent f869a2b8de
commit 49eead22a2
7 changed files with 277 additions and 273 deletions

View File

@ -5,7 +5,6 @@ set python print-stack full
source buildscripts/gdb/mongo.py
# Load the mongodb pretty printers
source buildscripts/gdb/optimizer_printers.py
source buildscripts/gdb/mongo_printers.py
# Load the mongodb lock analysis

View File

@ -11,16 +11,18 @@ from pathlib import Path
import gdb
# When we run under gdb, both this file and mongo_printers.py are sourced and run in one global
# namespace, so we don't want to import mongo_printers here. A real import would trigger Python
# to load mongo_printers.py a second time as a module with its own namespace, which in turn
# triggers a second load of this file and clobbers the gdb Command registrations made when this
# file was first sourced into __main__. The guarded import is kept only to satisfy linters that
# would otherwise flag the names used from mongo_printers as undefined.
# TODO SERVER-125403 factor out shared functionality to avoid the guarded import.
if not gdb:
sys.path.insert(0, str(Path(os.path.abspath(__file__)).parent.parent.parent))
from buildscripts.gdb.mongo_printers import absl_get_nodes, get_bytes, get_unique_ptr
sys.path.insert(0, str(Path(os.path.abspath(__file__)).parent.parent.parent))
from buildscripts.gdb.mongo_utils import (
RegisterMongoCommand,
absl_get_nodes,
get_boost_optional,
get_current_thread_name,
get_decorable_info,
get_object_decoration,
get_thread_id,
get_unique_ptr,
lookup_type,
)
def detect_toolchain(progspace):
@ -155,70 +157,6 @@ def get_process_name():
return os.path.splitext(os.path.basename(main_binary_name))[0]
def get_thread_id():
"""Return the thread_id of the current GDB thread."""
# GDB thread example:
# RHEL
# [Current thread is 1 (Thread 0x7f072426cca0 (LWP 12867))]
thread_info = gdb.execute("thread", from_tty=False, to_string=True)
if sys.platform.startswith("linux"):
match = re.search(r"Thread (?P<pthread_id>0x[0-9a-f]+)", thread_info)
if match:
return int(match.group("pthread_id"), 16)
elif sys.platform.startswith("sunos"):
match = re.search(r"Thread (?P<pthread_id>[0-9]+)", thread_info)
if match:
return int(match.group("pthread_id"), 10)
lwpid = gdb.selected_thread().ptid[1]
if lwpid != 0:
return lwpid
raise ValueError("Failed to find thread id in {}".format(thread_info))
MAIN_GLOBAL_BLOCK = None
def lookup_type(gdb_type_str: str) -> gdb.Type:
"""
Try to find the type object from string.
GDB says it searches the global blocks, however this appear not to be the
case or at least it doesn't search all global blocks, sometimes it required
to get the global block based off the current frame.
"""
global MAIN_GLOBAL_BLOCK
exceptions = []
try:
return gdb.lookup_type(gdb_type_str)
except Exception as exc:
exceptions.append(exc)
if MAIN_GLOBAL_BLOCK is None:
MAIN_GLOBAL_BLOCK = gdb.lookup_symbol("main")[0].symtab.global_block()
try:
return gdb.lookup_type(gdb_type_str, MAIN_GLOBAL_BLOCK)
except Exception as exc:
exceptions.append(exc)
raise gdb.error("Failed to get type, tried:\n%s" % "\n".join([str(exc) for exc in exceptions]))
def get_current_thread_name():
"""Return the name of the current GDB thread."""
fallback_name = '"%s"' % (gdb.selected_thread().name or "")
try:
# This goes through the pretty printer for StringData which adds "" around the name.
name = str(gdb.parse_and_eval("mongo::getThreadName()"))
if name == '""':
return fallback_name
return name
except gdb.error:
return fallback_name
def get_global_service_context():
"""Return the global ServiceContext object."""
return gdb.parse_and_eval("'mongo::(anonymous namespace)::globalServiceContext'").dereference()
@ -295,40 +233,6 @@ def get_decorations(obj):
print("Failed to look up decoration type: " + deco_type_name + ": " + str(err))
def get_object_decoration(decorable, start, index):
decoration_data = get_bytes(decorable["_decorations"]["_data"])
entry = start[index]
deco_type_info = str(entry["typeInfo"])
deco_type_name = re.sub(r".* <typeinfo for (.*)>", r"\1", deco_type_info)
offset = int(entry["offset"])
obj = decoration_data[offset]
obj_addr = re.sub(r"^(.*) .*", r"\1", str(obj.address))
obj = _cast_decoration_value(deco_type_name, int(obj.address))
return (deco_type_name, obj, obj_addr)
def get_decorable_info(decorable):
decorable_t = decorable.type.template_argument(0).name
reg_sym, _ = gdb.lookup_symbol("mongo::decorable_detail::gdbRegistry<{}>".format(decorable_t))
decl_vector = reg_sym.value()["_entries"]
start = decl_vector["_M_impl"]["_M_start"]
finish = decl_vector["_M_impl"]["_M_finish"]
decinfo_t = lookup_type("mongo::decorable_detail::Registry::Entry")
count = int((int(finish) - int(start)) / decinfo_t.sizeof)
return start, count
def _cast_decoration_value(type_name: str, decoration_address: int, /) -> gdb.Value:
# We cannot use gdb.lookup_type() when the decoration type is a pointer type, e.g.
# ServiceContext::declareDecoration<VectorClock*>(). gdb.parse_and_eval() is one of the few
# ways to convert a type expression into a gdb.Type value. Some care is taken to quote the
# non-pointer portion of the type so resolution for a type defined within an anonymous
# namespace works correctly.
type_name_regex = re.compile(r"^(.*[\w>])([\s\*]*)$")
escaped = type_name_regex.sub(r"'\1'\2*", type_name)
return gdb.parse_and_eval(f"({escaped}) {decoration_address}").dereference()
def get_decoration(obj, type_name):
"""Find a decoration on 'obj' where the string 'type_name' is in the decoration's type name.
@ -346,29 +250,6 @@ def get_decoration(obj, type_name):
return None
def get_boost_optional(optional):
"""
Retrieve the value stored in a boost::optional type, if it is non-empty.
Returns None if the optional is empty.
TODO: Import the boost pretty printers instead of using this custom function.
"""
if not optional["m_initialized"]:
return None
value_ref_type = optional.type.template_argument(0).pointer()
# boost::optional<T> is either stored using boost::optional_detail::aligned_storage<T> or
# using direct storage of `T`. Scalar types are able to take advantage of direct storage.
#
# https://www.boost.org/doc/libs/1_79_0/libs/optional/doc/html/boost_optional/tutorial/performance_considerations.html
if optional["m_storage"].type.strip_typedefs().pointer() == value_ref_type:
return optional["m_storage"]
storage = optional["m_storage"]["dummy_"]["data"]
return storage.cast(value_ref_type).dereference()
def get_field_names(value):
"""Return a list of all field names on a given GDB value."""
return [typ.name for typ in value.type.fields()]
@ -381,25 +262,6 @@ def get_field_names(value):
###################################################################################################
class RegisterMongoCommand(object):
"""Class to register mongo commands with GDB."""
_MONGO_COMMANDS = {} # type: ignore
@classmethod
def register(cls, obj, name, command_class):
"""Register a command with no completer as a mongo command."""
gdb.Command.__init__(obj, name, command_class)
cls._MONGO_COMMANDS[name] = obj.__doc__
@classmethod
def print_commands(cls):
"""Print the registered mongo commands."""
print("Command - Description")
for key in cls._MONGO_COMMANDS:
print("%s - %s" % (key, cls._MONGO_COMMANDS[key]))
class DumpGlobalServiceContext(gdb.Command):
"""Dump the Global Service Context."""

View File

@ -8,14 +8,13 @@ from pathlib import Path
import gdb
import gdb.printing
if not gdb:
sys.path.insert(0, str(Path(os.path.abspath(__file__)).parent.parent.parent))
from buildscripts.gdb.mongo import (
RegisterMongoCommand,
get_current_thread_name,
get_thread_id,
lookup_type,
)
sys.path.insert(0, str(Path(os.path.abspath(__file__)).parent.parent.parent))
from buildscripts.gdb.mongo_utils import (
RegisterMongoCommand,
get_current_thread_name,
get_thread_id,
lookup_type,
)
if sys.version_info[0] < 3:
raise gdb.GdbError(

View File

@ -17,20 +17,16 @@ from src.third_party.immer.dist.tools.gdb_pretty_printers.printers import (
ListIter as ImmerListIter,
)
# When we run under gdb, both this file and mongo.py are sourced and run in one global namespace,
# so we don't want to import mongo here. A real import would trigger Python to load mongo.py a
# second time as a module with its own namespace, clobbering the gdb Command registrations made
# when mongo.py was first sourced into __main__. The guarded import is kept only to satisfy
# linters that would otherwise flag the names used from mongo as undefined.
# TODO SERVER-125403 factor out shared functionality to avoid the guarded import.
if not gdb:
from buildscripts.gdb.mongo import (
get_boost_optional,
get_decorable_info,
get_object_decoration,
lookup_type,
)
from buildscripts.gdb.optimizer_printers import register_optimizer_printers
from buildscripts.gdb.mongo_utils import (
absl_container_size,
absl_get_nodes,
absl_get_settings,
get_boost_optional,
get_decorable_info,
get_object_decoration,
lookup_type,
)
from buildscripts.gdb.optimizer_printers import register_optimizer_printers
try:
import collections
@ -49,25 +45,6 @@ if sys.version_info[0] < 3:
)
def get_bytes(obj):
"""
Returns a gdb.Value where its type resolves to `unsigned char*`. The caller must take care to
cast the returned value themselves. This function is particularly useful in the context of
mongo::Decorable<> types which store the decorations as a slab of memory with unsigned char*.
"""
return obj.cast(gdb.lookup_type("unsigned char").pointer())
def get_unique_ptr_bytes(obj):
"""Read the value of a libstdc++ std::unique_ptr."""
return obj.cast(gdb.lookup_type("std::_Head_base<0, unsigned char*, false>"))["_M_head_impl"]
def get_unique_ptr(obj):
"""Read the value of a libstdc++ std::unique_ptr."""
return get_unique_ptr_bytes(obj).cast(obj.type.template_argument(0).pointer())
###################################################################################################
#
# Pretty-Printers
@ -918,81 +895,6 @@ def absl_insert_version_after_absl(cpp_name):
)
# Cache for types found via find_type_from_info_types as they can be expensive to look up.
_type_cache: dict[str, gdb.Type] = {}
# Helper to find the gdb.Type of the given symbol given by a regex.
# This is useful when compilers disagree about the spelling of a symbol/template instantiation.
# Uses `info types <regex>` gdb command to find the type, parses the output and then looks up the type.
def find_type_from_info_types(regex):
if regex in _type_cache:
return _type_cache[regex]
output = gdb.execute(f"info types {regex}", to_string=True)
# Example output:
# All types matching regular expression "absl::lts_.*::container_internal::internal_compressed_tuple::Storage<absl::lts_.*::container_internal::CommonFields, 0.*, false>":
# File src/third_party/abseil-cpp/dist/absl/container/internal/compressed_tuple.h:
# 85: absl::lts_20250512::container_internal::internal_compressed_tuple::Storage<absl::lts_20250512::container_internal::CommonFields, 0, false>;
# Regex looking for: number + colon + whitespace + capture group + semicolon
type_pattern = re.compile(r"^\s*\d+:\s+(.*?);$", re.MULTILINE)
match = re.search(type_pattern, output)
if match:
type_str = match.group(1)
res = gdb.lookup_type(type_str)
_type_cache[regex] = res
return res
raise RuntimeError(f"No types found for regex: {regex}")
def absl_get_settings(val):
"""Gets the settings_ field for abseil (flat/node)_hash_(map/set)."""
# Find the type of the CompressedTuple Storage template.
# Abseil uses an inline namespace for versioning, so it may contain '::lts_20250512' in the middle of the symbol name.
# Clang and GCC may mangle the templates differently for the 0 size_t parameter, so we use '0.*' to match both '0' and '0ul'.
common_fields_storage_type = find_type_from_info_types(
"absl.*::container_internal::internal_compressed_tuple::Storage<absl.*::container_internal::CommonFields, 0.*, false>",
)
# The Hash, Eq, or Alloc functors may not be zero-sized objects.
# mongo::LogicalSessionIdHash is one such example. An explicit cast is needed to
# disambiguate which `value` member variable of the CompressedTuple is to be accessed.
return val["settings_"].cast(common_fields_storage_type)["value"]
def absl_container_size(settings):
return settings["size_"]["data_"] >> 17
def absl_get_nodes(val):
"""Return a generator of every node in absl::container_internal::raw_hash_set and derived classes."""
settings = absl_get_settings(val)
size = absl_container_size(settings)
if size == 0:
return
capacity = int(settings["capacity_"])
heap = settings["heap_or_soo_"]["heap"]
ctrl = heap["control"]
# Derive the underlying type stored in the container.
slot_type = lookup_type(str(val.type.strip_typedefs().name) + "::slot_type").strip_typedefs()
# Using the array of ctrl bytes, search for in-use slots and return them
# https://github.com/abseil/abseil-cpp/blob/8a3caf7dea955b513a6c1b572a2423c6b4213402/absl/container/internal/raw_hash_set.h#L2108-L2113
for item in range(capacity):
ctrl_t = int(ctrl[item])
if ctrl_t >= 0:
yield heap["slot_array"]["p"].cast(slot_type.pointer())[item]
class AbslHashSetPrinterBase(object):
"""Pretty-printer base class for absl::[node/flat]_hash_set<>."""

View File

@ -0,0 +1,244 @@
"""Shared utilities used by the MongoDB GDB scripts.
This module is imported (not sourced) by other scripts in this directory. It
must remain free of module-level side effects so it can be safely loaded as a
module while those scripts are sourced by .gdbinit.
"""
import re
import sys
import gdb
MAIN_GLOBAL_BLOCK = None
def lookup_type(gdb_type_str: str) -> gdb.Type:
"""
Try to find the type object from string.
GDB says it searches the global blocks, however this appear not to be the
case or at least it doesn't search all global blocks, sometimes it required
to get the global block based off the current frame.
"""
global MAIN_GLOBAL_BLOCK
exceptions = []
try:
return gdb.lookup_type(gdb_type_str)
except Exception as exc:
exceptions.append(exc)
if MAIN_GLOBAL_BLOCK is None:
MAIN_GLOBAL_BLOCK = gdb.lookup_symbol("main")[0].symtab.global_block()
try:
return gdb.lookup_type(gdb_type_str, MAIN_GLOBAL_BLOCK)
except Exception as exc:
exceptions.append(exc)
raise gdb.error("Failed to get type, tried:\n%s" % "\n".join([str(exc) for exc in exceptions]))
def get_thread_id():
"""Return the thread_id of the current GDB thread."""
# GDB thread example:
# RHEL
# [Current thread is 1 (Thread 0x7f072426cca0 (LWP 12867))]
thread_info = gdb.execute("thread", from_tty=False, to_string=True)
if sys.platform.startswith("linux"):
match = re.search(r"Thread (?P<pthread_id>0x[0-9a-f]+)", thread_info)
if match:
return int(match.group("pthread_id"), 16)
elif sys.platform.startswith("sunos"):
match = re.search(r"Thread (?P<pthread_id>[0-9]+)", thread_info)
if match:
return int(match.group("pthread_id"), 10)
lwpid = gdb.selected_thread().ptid[1]
if lwpid != 0:
return lwpid
raise ValueError("Failed to find thread id in {}".format(thread_info))
def get_current_thread_name():
"""Return the name of the current GDB thread."""
fallback_name = '"%s"' % (gdb.selected_thread().name or "")
try:
# This goes through the pretty printer for StringData which adds "" around the name.
name = str(gdb.parse_and_eval("mongo::getThreadName()"))
if name == '""':
return fallback_name
return name
except gdb.error:
return fallback_name
class RegisterMongoCommand(object):
"""Class to register mongo commands with GDB."""
_MONGO_COMMANDS = {} # type: ignore
@classmethod
def register(cls, obj, name, command_class):
"""Register a command with no completer as a mongo command."""
gdb.Command.__init__(obj, name, command_class)
cls._MONGO_COMMANDS[name] = obj.__doc__
@classmethod
def print_commands(cls):
"""Print the registered mongo commands."""
print("Command - Description")
for key in cls._MONGO_COMMANDS:
print("%s - %s" % (key, cls._MONGO_COMMANDS[key]))
def get_bytes(obj):
"""
Returns a gdb.Value where its type resolves to `unsigned char*`. The caller must take care to
cast the returned value themselves. This function is particularly useful in the context of
mongo::Decorable<> types which store the decorations as a slab of memory with unsigned char*.
"""
return obj.cast(gdb.lookup_type("unsigned char").pointer())
def get_unique_ptr_bytes(obj):
"""Read the value of a libstdc++ std::unique_ptr."""
return obj.cast(gdb.lookup_type("std::_Head_base<0, unsigned char*, false>"))["_M_head_impl"]
def get_unique_ptr(obj):
"""Read the value of a libstdc++ std::unique_ptr."""
return get_unique_ptr_bytes(obj).cast(obj.type.template_argument(0).pointer())
def _cast_decoration_value(type_name: str, decoration_address: int, /) -> gdb.Value:
# We cannot use gdb.lookup_type() when the decoration type is a pointer type, e.g.
# ServiceContext::declareDecoration<VectorClock*>(). gdb.parse_and_eval() is one of the few
# ways to convert a type expression into a gdb.Type value. Some care is taken to quote the
# non-pointer portion of the type so resolution for a type defined within an anonymous
# namespace works correctly.
type_name_regex = re.compile(r"^(.*[\w>])([\s\*]*)$")
escaped = type_name_regex.sub(r"'\1'\2*", type_name)
return gdb.parse_and_eval(f"({escaped}) {decoration_address}").dereference()
def get_object_decoration(decorable, start, index):
decoration_data = get_bytes(decorable["_decorations"]["_data"])
entry = start[index]
deco_type_info = str(entry["typeInfo"])
deco_type_name = re.sub(r".* <typeinfo for (.*)>", r"\1", deco_type_info)
offset = int(entry["offset"])
obj = decoration_data[offset]
obj_addr = re.sub(r"^(.*) .*", r"\1", str(obj.address))
obj = _cast_decoration_value(deco_type_name, int(obj.address))
return (deco_type_name, obj, obj_addr)
def get_decorable_info(decorable):
decorable_t = decorable.type.template_argument(0).name
reg_sym, _ = gdb.lookup_symbol("mongo::decorable_detail::gdbRegistry<{}>".format(decorable_t))
decl_vector = reg_sym.value()["_entries"]
start = decl_vector["_M_impl"]["_M_start"]
finish = decl_vector["_M_impl"]["_M_finish"]
decinfo_t = lookup_type("mongo::decorable_detail::Registry::Entry")
count = int((int(finish) - int(start)) / decinfo_t.sizeof)
return start, count
def get_boost_optional(optional):
"""
Retrieve the value stored in a boost::optional type, if it is non-empty.
Returns None if the optional is empty.
TODO: Import the boost pretty printers instead of using this custom function.
"""
if not optional["m_initialized"]:
return None
value_ref_type = optional.type.template_argument(0).pointer()
# boost::optional<T> is either stored using boost::optional_detail::aligned_storage<T> or
# using direct storage of `T`. Scalar types are able to take advantage of direct storage.
#
# https://www.boost.org/doc/libs/1_79_0/libs/optional/doc/html/boost_optional/tutorial/performance_considerations.html
if optional["m_storage"].type.strip_typedefs().pointer() == value_ref_type:
return optional["m_storage"]
storage = optional["m_storage"]["dummy_"]["data"]
return storage.cast(value_ref_type).dereference()
# Cache for types found via find_type_from_info_types as they can be expensive to look up.
_type_cache: dict[str, gdb.Type] = {}
# Helper to find the gdb.Type of the given symbol given by a regex.
# This is useful when compilers disagree about the spelling of a symbol/template instantiation.
# Uses `info types <regex>` gdb command to find the type, parses the output and then looks up the type.
def find_type_from_info_types(regex):
if regex in _type_cache:
return _type_cache[regex]
output = gdb.execute(f"info types {regex}", to_string=True)
# Example output:
# All types matching regular expression "absl::lts_.*::container_internal::internal_compressed_tuple::Storage<absl::lts_.*::container_internal::CommonFields, 0.*, false>":
# File src/third_party/abseil-cpp/dist/absl/container/internal/compressed_tuple.h:
# 85: absl::lts_20250512::container_internal::internal_compressed_tuple::Storage<absl::lts_20250512::container_internal::CommonFields, 0, false>;
# Regex looking for: number + colon + whitespace + capture group + semicolon
type_pattern = re.compile(r"^\s*\d+:\s+(.*?);$", re.MULTILINE)
match = re.search(type_pattern, output)
if match:
type_str = match.group(1)
res = gdb.lookup_type(type_str)
_type_cache[regex] = res
return res
raise RuntimeError(f"No types found for regex: {regex}")
def absl_get_settings(val):
"""Gets the settings_ field for abseil (flat/node)_hash_(map/set)."""
# Find the type of the CompressedTuple Storage template.
# Abseil uses an inline namespace for versioning, so it may contain '::lts_20250512' in the middle of the symbol name.
# Clang and GCC may mangle the templates differently for the 0 size_t parameter, so we use '0.*' to match both '0' and '0ul'.
common_fields_storage_type = find_type_from_info_types(
"absl.*::container_internal::internal_compressed_tuple::Storage<absl.*::container_internal::CommonFields, 0.*, false>",
)
# The Hash, Eq, or Alloc functors may not be zero-sized objects.
# mongo::LogicalSessionIdHash is one such example. An explicit cast is needed to
# disambiguate which `value` member variable of the CompressedTuple is to be accessed.
return val["settings_"].cast(common_fields_storage_type)["value"]
def absl_container_size(settings):
return settings["size_"]["data_"] >> 17
def absl_get_nodes(val):
"""Return a generator of every node in absl::container_internal::raw_hash_set and derived classes."""
settings = absl_get_settings(val)
size = absl_container_size(settings)
if size == 0:
return
capacity = int(settings["capacity_"])
heap = settings["heap_or_soo_"]["heap"]
ctrl = heap["control"]
# Derive the underlying type stored in the container.
slot_type = lookup_type(str(val.type.strip_typedefs().name) + "::slot_type").strip_typedefs()
# Using the array of ctrl bytes, search for in-use slots and return them
# https://github.com/abseil/abseil-cpp/blob/8a3caf7dea955b513a6c1b572a2423c6b4213402/absl/container/internal/raw_hash_set.h#L2108-L2113
for item in range(capacity):
ctrl_t = int(ctrl[item])
if ctrl_t >= 0:
yield heap["slot_array"]["p"].cast(slot_type.pointer())[item]

View File

@ -7,9 +7,8 @@ from pathlib import Path
import gdb
import gdb.printing
if not gdb:
sys.path.insert(0, str(Path(os.path.abspath(__file__)).parent.parent.parent))
from buildscripts.gdb.mongo import lookup_type
sys.path.insert(0, str(Path(os.path.abspath(__file__)).parent.parent.parent))
from buildscripts.gdb.mongo_utils import lookup_type
ABT_NS = "mongo::abt"

View File

@ -6,9 +6,8 @@ from pprint import pprint
import bson
import gdb
if not gdb:
sys.path.insert(0, str(Path(os.path.abspath(__file__)).parent.parent.parent))
from buildscripts.gdb.mongo import lookup_type
sys.path.insert(0, str(Path(os.path.abspath(__file__)).parent.parent.parent))
from buildscripts.gdb.mongo_utils import lookup_type
DEBUGGING = False
"""