diff --git a/.bazelrc b/.bazelrc index 13a076bab72..9b4aacbee93 100644 --- a/.bazelrc +++ b/.bazelrc @@ -544,6 +544,15 @@ common:mod-scanner --output_groups=report common:mod-scanner --aspects //modules_poc:mod_scanner.bzl%mod_scanner_aspect common:mod-scanner --remote_download_regex=.*\.mod_scanner_decls.json$ +--config=compiledb +common:compiledb --aspects //bazel/compiledb:compiledb_aspect.bzl%compiledb_aspect +common:compiledb --output_groups=compiledb_report +common:compiledb --keep_going +common:compiledb --remote_download_regex=.*\.compile_command\.json$ + +--config=compiledb-aspect +common:compiledb-aspect --config=compiledb + --config=symbol-checker common:symbol-checker --aspects //bazel/symbol_checker:symbol_checker.bzl%symbol_checker_aspect common:symbol-checker --output_groups=symbol_checker diff --git a/BUILD.bazel b/BUILD.bazel index 0f31e59117e..dc5214aa796 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -93,6 +93,11 @@ alias( actual = "//src/mongo/tools/mongo_tidy_checks/tests:MongoTidyCheck_unittest", ) +alias( + name = "setup_clang_tidy", + actual = "//buildscripts:setup_clang_tidy", +) + setup_mongo_toolchain_aliases() setup_gdb_toolchain_aliases() @@ -125,7 +130,7 @@ render_template( ) genrule( - name = "compiledb", + name = "compiledb_only", srcs = ["compile_commands.json"], outs = ["compile_commands_done"], cmd = "echo noop > $(location :compile_commands_done)", @@ -135,6 +140,24 @@ genrule( ], ) +genrule( + name = "compiledb", + srcs = ["compile_commands.json"] + select({ + "@platforms//os:windows": [], + "//conditions:default": [ + ":setup_clang_tidy", + ":clang_tidy_config", + "//src/mongo/tools/mongo_tidy_checks", + ], + }), + outs = ["compiledb_setup_done"], + cmd = "echo noop > $(location :compiledb_setup_done)", + tags = [ + "no-cache", + "no-remote-exec", + ], +) + # This sets up targets for install-wiredtiger and archive-wiredtiger mongo_install( name = "wiredtiger", diff --git a/bazel/auto_header/auto_header.bzl b/bazel/auto_header/auto_header.bzl index ba1cca51aa9..c899047918a 100644 --- a/bazel/auto_header/auto_header.bzl +++ b/bazel/auto_header/auto_header.bzl @@ -26,6 +26,17 @@ def dedupe_stable(xs): out.append(x) return out +_AUTO_HEADER_EXTENSIONS = { + ".c": True, + ".cc": True, + ".cpp": True, + ".cxx": True, + ".h": True, + ".hh": True, + ".hpp": True, + ".hxx": True, +} + def _fg_name_for_filename(name): # NEW: collapse to leaf to match Python generator leaf = name.rsplit("/", 1)[-1] @@ -75,6 +86,10 @@ def _is_third_party_pkg(pkg): "third_party/" in pkg # safety ) +def _has_auto_header_extension(name): + dot = name.rfind(".") + return dot != -1 and name[dot:] in _AUTO_HEADER_EXTENSIONS + def maybe_compute_auto_headers(srcs): # Only handle plain list-of-strings; if configurable/mixed, return None if type(srcs) != "list": @@ -90,12 +105,12 @@ def maybe_compute_auto_headers(srcs): out.append(s) continue - pkg, name = _split_label_or_file(s) - if _is_third_party_pkg(pkg): + # Skip external repos entirely. + if s.startswith("@"): continue - # Skip external repos and any third_party package entirely - if s.startswith("@"): + pkg, name = _split_label_or_file(s) + if _is_third_party_pkg(pkg): continue # If *_gen listed in srcs, add its auto-header (transitive headers), @@ -105,8 +120,7 @@ def maybe_compute_auto_headers(srcs): continue # Regular mapping for files we care about - if (name.endswith(".c") or name.endswith(".cc") or name.endswith(".cpp") or name.endswith(".cxx") or - name.endswith(".h") or name.endswith(".hh") or name.endswith(".hpp") or name.endswith(".hxx")): + if _has_auto_header_extension(name): out.append(_auto_header_label(pkg, name)) continue @@ -261,6 +275,7 @@ def build_selects_and_flat_files(srcs_select, *, lib_name, debug = False): return [], [] select_objs = [] flat_files = [] + seen_flat_files = {} for i, condmap in enumerate(srcs_select): if type(condmap) != type({}): fail("mongo_cc macro({}): srcs_select[{}] must be a dict of {cond: [srcs]}." @@ -276,6 +291,8 @@ def build_selects_and_flat_files(srcs_select, *, lib_name, debug = False): if type(s) != "string": fail("mongo_cc macro({}): srcs_select[{}][{}] item must be string, got {}" .format(lib_name, i, cond, type(s))) - flat_files.extend(src_list) + if s not in seen_flat_files: + seen_flat_files[s] = True + flat_files.append(s) select_objs.append(select(condmap)) - return select_objs, dedupe_preserve_order(flat_files) + return select_objs, flat_files diff --git a/bazel/compiledb/BUILD.bazel b/bazel/compiledb/BUILD.bazel new file mode 100644 index 00000000000..e69de29bb2d diff --git a/bazel/compiledb/compiledb_aspect.bzl b/bazel/compiledb/compiledb_aspect.bzl new file mode 100644 index 00000000000..bd8dea775a2 --- /dev/null +++ b/bazel/compiledb/compiledb_aspect.bzl @@ -0,0 +1,422 @@ +"""Aspect-based compile_commands fragment generation.""" + +load("@bazel_tools//tools/build_defs/cc:action_names.bzl", "ACTION_NAMES") +load("@bazel_tools//tools/cpp:toolchain_utils.bzl", "find_cpp_toolchain") + +_SOURCE_EXTENSIONS = { + "c": True, + "cc": True, + "cpp": True, + "cxx": True, + "c++": True, + "C": True, +} + +CompileCommandInfo = provider( + "Transitive compile_commands fragment files.", + fields = { + "files": "depset of compile command fragments", + "required_inputs": "depset of generated compile inputs that must be materialized", + }, +) + +def _is_cpp_source(src): + return src.extension in _SOURCE_EXTENSIONS + +def _rule_sources(ctx): + srcs = [] + if hasattr(ctx.rule.files, "srcs"): + srcs.extend(ctx.rule.files.srcs) + if hasattr(ctx.rule.file, "src") and ctx.rule.file.src: + srcs.append(ctx.rule.file.src) + elif hasattr(ctx.rule.attr, "srcs"): + for src in ctx.rule.attr.srcs: + srcs.extend(src.files.to_list()) + + seen = {} + filtered = [] + for src in srcs: + if not _is_cpp_source(src): + continue + if src.path in seen: + continue + seen[src.path] = True + filtered.append(src) + return filtered + +def _expand_flags(ctx, flags): + needs_location_expansion = False + needs_make_expansion = False + for flag in flags: + if "$(" in flag: + needs_make_expansion = True + if "$(location" in flag: + needs_location_expansion = True + + if not needs_make_expansion: + return flags + + location_targets = [] + if needs_location_expansion: + seen_labels = {} + for attr_name in [ + "srcs", + "hdrs", + "textual_hdrs", + "deps", + "implementation_deps", + "additional_compiler_inputs", + "data", + "binary_with_debug", + ]: + if not hasattr(ctx.rule.attr, attr_name): + continue + attr_value = getattr(ctx.rule.attr, attr_name) + if type(attr_value) == "list": + values = attr_value + elif attr_value != None: + values = [attr_value] + else: + values = [] + + for value in values: + if not hasattr(value, "label"): + continue + label = str(value.label) + if label in seen_labels: + continue + seen_labels[label] = True + location_targets.append(value) + + expanded = [] + for flag in flags: + if needs_location_expansion and "$(location" in flag: + flag = ctx.expand_location(flag, location_targets) + if "$(" in flag: + flag = ctx.expand_make_variables("compiledb_expand_flags", flag, ctx.var) + expanded.append(flag) + return expanded + +def _compile_variables(cc_toolchain, feature_configuration, compilation_context, user_compile_flags): + return cc_common.create_compile_variables( + feature_configuration = feature_configuration, + cc_toolchain = cc_toolchain, + user_compile_flags = user_compile_flags, + include_directories = compilation_context.includes, + quote_include_directories = compilation_context.quote_includes, + system_include_directories = depset( + transitive = [ + compilation_context.system_includes, + compilation_context.external_includes, + ], + ), + framework_include_directories = compilation_context.framework_includes, + preprocessor_defines = depset( + transitive = [ + compilation_context.defines, + compilation_context.local_defines, + ], + ), + ) + +def _rule_compile_flags(ctx): + common_flags = [] + cxx_flags = [] + + if hasattr(ctx.rule.attr, "copts"): + common_flags.extend(ctx.rule.attr.copts) + if hasattr(ctx.rule.attr, "cxxopts"): + cxx_flags.extend(ctx.rule.attr.cxxopts) + + return common_flags, cxx_flags + +def _requested_and_unsupported_features(ctx): + requested_features = list(ctx.features) + unsupported_features = list(ctx.disabled_features) + + if hasattr(ctx.rule.attr, "features"): + for feature in ctx.rule.attr.features: + if feature.startswith("-"): + unsupported_features.append(feature[1:]) + else: + requested_features.append(feature) + + return requested_features, unsupported_features + +def _toolchain_flags(feature_configuration, action_name, compile_variables): + return cc_common.get_memory_inefficient_command_line( + feature_configuration = feature_configuration, + action_name = action_name, + variables = compile_variables, + ) + +def _compiler_path(cc_toolchain): + compiler = cc_toolchain.compiler_executable + if hasattr(compiler, "path"): + return compiler.path + return compiler + +def _should_materialize_artifact(path): + return path.startswith("bazel-out/") + +def _collect_dep_compile_command_info(ctx): + output_files = [] + required_inputs = [] + for attr_name in ["deps", "implementation_deps", "binary_with_debug"]: + if not hasattr(ctx.rule.attr, attr_name): + continue + + attr_value = getattr(ctx.rule.attr, attr_name) + if type(attr_value) == "list": + deps = attr_value + else: + deps = [attr_value] + + for dep in deps: + if dep != None and CompileCommandInfo in dep: + output_files.append(dep[CompileCommandInfo].files) + required_inputs.append(dep[CompileCommandInfo].required_inputs) + + return depset(transitive = output_files), depset(transitive = required_inputs) + +def _required_compile_inputs(ctx, compilation_context, srcs): + direct = [] + seen = {} + + if hasattr(ctx.rule.files, "srcs"): + for artifact in ctx.rule.files.srcs: + if _should_materialize_artifact(artifact.path) and artifact.path not in seen: + seen[artifact.path] = True + direct.append(artifact) + + if hasattr(ctx.rule.file, "src") and ctx.rule.file.src: + artifact = ctx.rule.file.src + if _should_materialize_artifact(artifact.path) and artifact.path not in seen: + seen[artifact.path] = True + direct.append(artifact) + + for src in srcs: + if _should_materialize_artifact(src.path) and src.path not in seen: + seen[src.path] = True + direct.append(src) + + for header in compilation_context.headers.to_list(): + if _should_materialize_artifact(header.path) and header.path not in seen: + seen[header.path] = True + direct.append(header) + + for attr_name in ["hdrs", "textual_hdrs", "additional_compiler_inputs"]: + if not hasattr(ctx.rule.files, attr_name): + continue + for artifact in getattr(ctx.rule.files, attr_name): + if _should_materialize_artifact(artifact.path) and artifact.path not in seen: + seen[artifact.path] = True + direct.append(artifact) + + return depset(direct = direct) + +def _package_root(label): + parts = [] + if label.workspace_root: + parts.append(label.workspace_root) + if label.package: + parts.append(label.package) + return "/".join(parts) + +def _hex32(val): + v = val & 0xFFFFFFFF + s = "%x" % v + return ("0" * (8 - len(s))) + s + +def _fragment_file_id(target, src): + return src.basename + "." + _hex32(hash(src.path)) + "." + _hex32(hash(str(target.label))) + +def _output_path(ctx, target, src): + package_root = _package_root(target.label) + parts = [ctx.bin_dir.path] + if package_root: + parts.append(package_root) + src_parts = src.short_path.split("/") + parts.extend([ + "_compiledb_objs", + target.label.name, + ]) + parts.extend(src_parts[:-1]) + parts.append(src_parts[-1] + ".o") + return "/".join(parts) + +def _rewrite_msvc_external_include_flags(args, compilation_context): + external_include_paths = { + path: True + for path in compilation_context.external_includes.to_list() + } + if not external_include_paths: + return args + + rewritten = [] + skip_next = False + for index in range(len(args)): + if skip_next: + skip_next = False + else: + arg = args[index] + if arg == "/I" and index + 1 < len(args): + include_path = args[index + 1] + if include_path in external_include_paths: + rewritten.extend([ + "/external:I" + include_path, + "/external:W0", + ]) + else: + rewritten.extend([arg, include_path]) + skip_next = True + elif arg.startswith("/I") and arg[2:] in external_include_paths: + rewritten.extend([ + "/external:I" + arg[2:], + "/external:W0", + ]) + else: + rewritten.append(arg) + + return rewritten + +def _command_line_args(target, src, compiler, is_msvc, toolchain_flags, compilation_context, ctx): + output_path = _output_path(ctx, target, src) + + args = [compiler] + args.extend(toolchain_flags) + if is_msvc: + args = _rewrite_msvc_external_include_flags(args, compilation_context) + + if is_msvc: + args.extend(["/c", src.path, "/Fo" + output_path]) + else: + args.extend(["-c", src.path, "-o", output_path]) + + return args, output_path + +def _emit_compile_command(ctx, target, src, command_line, output_path): + file_id = _fragment_file_id(target, src) + output = ctx.actions.declare_file("compiledb/" + file_id + ".compile_command.json") + ctx.actions.write( + output = output, + content = json.encode({ + "file": src.path, + "arguments": command_line, + "output": output_path, + "target": str(target.label), + }), + ) + return output + +def _compiledb_aspect_impl(target, ctx): + dep_outputs, dep_required_inputs = _collect_dep_compile_command_info(ctx) + if CcInfo not in target: + return [ + CompileCommandInfo(files = dep_outputs, required_inputs = dep_required_inputs), + OutputGroupInfo(compiledb_report = depset(transitive = [dep_outputs, dep_required_inputs])), + ] + + compilation_context = target[CcInfo].compilation_context + srcs = _rule_sources(ctx) + if not srcs: + return [ + CompileCommandInfo(files = dep_outputs, required_inputs = dep_required_inputs), + OutputGroupInfo(compiledb_report = depset(transitive = [dep_outputs, dep_required_inputs])), + ] + + cc_toolchain = find_cpp_toolchain(ctx) + requested_features, unsupported_features = _requested_and_unsupported_features(ctx) + feature_configuration = cc_common.configure_features( + ctx = ctx, + cc_toolchain = cc_toolchain, + requested_features = requested_features, + unsupported_features = unsupported_features, + ) + rule_compile_flags, rule_cxx_flags = _rule_compile_flags(ctx) + c_user_compile_flags = _expand_flags( + ctx, + ctx.fragments.cpp.conlyopts + ctx.fragments.cpp.copts + rule_compile_flags, + ) + cpp_user_compile_flags = _expand_flags( + ctx, + ctx.fragments.cpp.cxxopts + ctx.fragments.cpp.copts + rule_compile_flags + rule_cxx_flags, + ) + c_compile_variables = _compile_variables( + cc_toolchain, + feature_configuration, + compilation_context, + c_user_compile_flags, + ) + cpp_compile_variables = _compile_variables( + cc_toolchain, + feature_configuration, + compilation_context, + cpp_user_compile_flags, + ) + compiler = _compiler_path(cc_toolchain) + is_msvc = compiler.endswith("cl.exe") or compiler.endswith("/cl") or compiler.endswith("\\cl.exe") + c_toolchain_flags = None + cpp_toolchain_flags = None + outputs = [] + for src in srcs: + if src.extension == "c": + if c_toolchain_flags == None: + c_toolchain_flags = _toolchain_flags( + feature_configuration, + ACTION_NAMES.c_compile, + c_compile_variables, + ) + toolchain_flags = c_toolchain_flags + else: + if cpp_toolchain_flags == None: + cpp_toolchain_flags = _toolchain_flags( + feature_configuration, + ACTION_NAMES.cpp_compile, + cpp_compile_variables, + ) + toolchain_flags = cpp_toolchain_flags + + command_line, output_path = _command_line_args( + target, + src, + compiler, + is_msvc, + toolchain_flags, + compilation_context, + ctx, + ) + outputs.append( + _emit_compile_command( + ctx, + target, + src, + command_line, + output_path, + ), + ) + all_outputs = depset(direct = outputs, transitive = [dep_outputs]) + required_inputs = depset( + transitive = [ + dep_required_inputs, + _required_compile_inputs(ctx, compilation_context, srcs), + ], + ) + + return [ + CompileCommandInfo(files = all_outputs, required_inputs = required_inputs), + OutputGroupInfo(compiledb_report = depset(transitive = [all_outputs, required_inputs])), + ] + +compiledb_aspect = aspect( + implementation = _compiledb_aspect_impl, + fragments = ["cpp"], + attrs = { + "_cc_toolchain": attr.label(default = Label("@bazel_tools//tools/cpp:current_cc_toolchain")), + }, + toolchains = [ + "@bazel_tools//tools/cpp:toolchain_type", + ], + attr_aspects = ["deps", "implementation_deps", "binary_with_debug"], + required_providers = [CcInfo], +) diff --git a/bazel/install_rules/install_rules.bzl b/bazel/install_rules/install_rules.bzl index cd5a5df227d..5e647bd026e 100644 --- a/bazel/install_rules/install_rules.bzl +++ b/bazel/install_rules/install_rules.bzl @@ -13,6 +13,26 @@ load("//bazel/install_rules:providers.bzl", "TestBinaryInfo") load("//bazel/toolchains/cc:mongo_errors.bzl", "DWP_ERROR_MESSAGE") load("//bazel:transitions.bzl", "extensions_transition") +_WINDOWS_BINARY_EXTENSIONS = { + ".dll": True, + ".exe": True, + ".pdb": True, + ".ps1": True, +} + +_WINDOWS_DEBUG_EXTENSIONS = { + ".pdb": True, +} + +_LINUX_DEBUG_EXTENSIONS = { + ".debug": True, + ".dwp": True, +} + +_MACOS_DEBUG_EXTENSIONS = { + ".dSYM": True, +} + # Used to skip rules on certain OS architectures def _empty_rule_impl(ctx): pass @@ -108,7 +128,30 @@ def get_constraints(ctx): windows_constraint = ctx.attr._windows_constraint[platform_common.ConstraintValueInfo] return linux_constraint, macos_constraint, windows_constraint -def is_binary_file(ctx, basename): +def _platform_kind(ctx): + linux_constraint, macos_constraint, windows_constraint = get_constraints(ctx) + if ctx.target_platform_has_constraint(linux_constraint): + return "linux" + if ctx.target_platform_has_constraint(macos_constraint): + return "macos" + if ctx.target_platform_has_constraint(windows_constraint): + return "windows" + ctx.fail("Unknown OS") + return "" + +def _basename(path): + slash = path.rfind("/") + if slash == -1: + return path + return path[slash + 1:] + +def _extension(basename): + dot = basename.rfind(".") + if dot == -1: + return "" + return basename[dot:] + +def is_binary_file(platform_kind, basename): """Check if file looks like a binary Args: @@ -118,18 +161,16 @@ def is_binary_file(ctx, basename): Returns: True if it looks like a binary, False otherwise """ - linux_constraint, macos_constraint, windows_constraint = get_constraints(ctx) - if ctx.target_platform_has_constraint(linux_constraint): + if platform_kind == "linux": return not (basename.startswith("lib") or basename.startswith("mongo_crypt_v") or basename.startswith("stitch_support.so")) - elif ctx.target_platform_has_constraint(macos_constraint): + elif platform_kind == "macos": return not (basename.startswith("lib") or basename.startswith("mongo_crypt_v") or basename.startswith("stitch_support.dylib")) - elif ctx.target_platform_has_constraint(windows_constraint): - return basename.endswith(".exe") or basename.endswith(".pdb") or basename.endswith(".dll") or basename.endswith(".ps1") + elif platform_kind == "windows": + return _extension(basename) in _WINDOWS_BINARY_EXTENSIONS else: - ctx.fail("Unknown OS") return False -def is_debug_file(ctx, basename): +def is_debug_file(platform_kind, basename): """Check if file looks a debug file Args: @@ -139,15 +180,14 @@ def is_debug_file(ctx, basename): Returns: True if it looks like a debug file, False otherwise """ - linux_constraint, macos_constraint, windows_constraint = get_constraints(ctx) - if ctx.target_platform_has_constraint(linux_constraint): - return basename.endswith(".debug") or basename.endswith(".dwp") - elif ctx.target_platform_has_constraint(macos_constraint): - return basename.endswith(".dSYM") - elif ctx.target_platform_has_constraint(windows_constraint): - return basename.endswith(".pdb") + ext = _extension(basename) + if platform_kind == "linux": + return ext in _LINUX_DEBUG_EXTENSIONS + elif platform_kind == "macos": + return ext in _MACOS_DEBUG_EXTENSIONS + elif platform_kind == "windows": + return ext in _WINDOWS_DEBUG_EXTENSIONS else: - ctx.fail("Unknown OS") return False def declare_output(ctx, output, is_directory): @@ -166,7 +206,7 @@ def declare_output(ctx, output, is_directory): else: return ctx.actions.declare_file(output) -def sort_file(ctx, file, install_dir, file_map, is_directory): +def sort_file(ctx, file, basename, install_dir, file_map, is_directory, platform_kind): """Determine location a file should be installed to Args: @@ -177,24 +217,28 @@ def sort_file(ctx, file, install_dir, file_map, is_directory): is_directory: determines if the file is a directory """ - _, macos_constraint, _ = get_constraints(ctx) - basename = paths.basename(file) - bin_install = install_dir + "/bin/" + basename - if bin_install.endswith(".dwp"): + install_basename = basename + ext = _extension(basename) + if ext == ".dwp": # Due to us creating our binaries using the _with_debug name # the dwp files also contain it. Strip the _with_debug from the name - bin_install = bin_install.replace("_with_debug.dwp", ".dwp") + install_basename = install_basename.replace("_with_debug.dwp", ".dwp") - lib_install = install_dir + "/lib/" + basename + bin_install = install_dir + "/bin/" + install_basename - if is_binary_file(ctx, basename) or basename.endswith(".py"): - if not is_debug_file(ctx, basename): + lib_install = install_dir + "/lib/" + install_basename + is_binary = is_binary_file(platform_kind, basename) + is_debug = is_debug_file(platform_kind, basename) + is_python = ext == ".py" + + if is_binary or is_python: + if not is_debug: if ctx.attr.debug != "debug": file_map["binaries"][file] = declare_output(ctx, bin_install, is_directory) elif ctx.attr.debug != "stripped" or ctx.attr.publish_debug_in_stripped: file_map["binaries_debug"][file] = declare_output(ctx, bin_install, is_directory) - elif not is_debug_file(ctx, basename): + elif not is_debug: if ctx.attr.debug != "debug": file_map["dynamic_libs"][file] = declare_output(ctx, lib_install, is_directory) @@ -225,19 +269,24 @@ def mongo_install_rule_impl(ctx): outputs = [] dwps = [] install_dir = ctx.label.name + platform_kind = _platform_kind(ctx) + install_script = ctx.attr._install_script.files.to_list()[0] # sort direct sources for input_bin in ctx.attr.srcs: if DebugPackageInfo in input_bin and ctx.attr.create_dwp and ctx.attr.debug != "stripped": bin = input_bin[DebugPackageInfo].dwp_file dwps.append(bin) - sort_file(ctx, bin.path, install_dir, file_map, bin.is_directory) - test_files.extend(input_bin[TestBinaryInfo].test_binaries.to_list()) - for bin in input_bin.files.to_list(): - sort_file(ctx, bin.path, install_dir, file_map, bin.is_directory) + sort_file(ctx, bin.path, bin.basename, install_dir, file_map, bin.is_directory, platform_kind) + input_test_binaries = input_bin[TestBinaryInfo].test_binaries.to_list() + input_files = input_bin.files.to_list() + test_files.extend(input_test_binaries) + for bin in input_files: + sort_file(ctx, bin.path, bin.basename, install_dir, file_map, bin.is_directory, platform_kind) for input_label, output_folder in ctx.attr.root_files.items(): - for file in input_label.files.to_list(): + label_files = input_label.files.to_list() + for file in label_files: file_map["root_files"][file.path] = declare_output(ctx, install_dir + "/" + output_folder + "/" + file.basename, file.is_directory) for input_label, output_path in ctx.attr.include_files.items(): @@ -246,24 +295,25 @@ def mongo_install_rule_impl(ctx): # sort dependency install files for dep in ctx.attr.deps: - test_files.extend(dep[TestBinaryInfo].test_binaries.to_list()) + dep_test_binaries = dep[TestBinaryInfo].test_binaries.to_list() + dep_default_files = dep[DefaultInfo].files.to_list() + dep_src_map_file = dep[MongoInstallInfo].src_map.to_list()[0] + test_files.extend(dep_test_binaries) # Create a map of filename to if its a directory, ie. { coolfolder: True, coolfile: False } as the json loses that info - file_directory_map = {file_dep.basename: file_dep.is_directory for file_dep in dep[DefaultInfo].files.to_list()} - src_map = json.decode(dep[MongoInstallInfo].src_map.to_list()[0]) - files = [] + file_directory_map = {file_dep.basename: file_dep.is_directory for file_dep in dep_default_files} + src_map = json.decode(dep_src_map_file) for key in src_map: if key != "roots": - files.extend(src_map[key]) - for file in files: - filename = file.split("/")[-1] + for file in src_map[key]: + filename = _basename(file) - # Due to us creating our binaries using the _with_debug name - # the dwp files also contain it. Strip the _with_debug from the name - filename = filename.replace("_with_debug.dwp", ".dwp") - sort_file(ctx, file, install_dir, file_map, file_directory_map[filename]) + # Due to us creating our binaries using the _with_debug name + # the dwp files also contain it. Strip the _with_debug from the name + filename = filename.replace("_with_debug.dwp", ".dwp") + sort_file(ctx, file, filename, install_dir, file_map, file_directory_map[filename], platform_kind) for file, folder in src_map["roots"].items(): - filename = file.split("/")[-1] + filename = _basename(file) file_map["root_files"][file] = declare_output(ctx, install_dir + "/" + folder + "/" + filename, file_directory_map[filename]) # aggregate based on type of installs @@ -287,8 +337,9 @@ def mongo_install_rule_impl(ctx): input_deps = [] installed_tests = [] for file in test_files: - if not is_debug_file(ctx, file.basename) and ctx.attr.debug != "debug": - if is_binary_file(ctx, file.basename) or file.basename.endswith(".py"): + file_basename = file.basename + if not is_debug_file(platform_kind, file_basename) and ctx.attr.debug != "debug": + if is_binary_file(platform_kind, file_basename) or _extension(file_basename) == ".py": test_path = file_map["binaries"][file.path].path # point at the binaries in bazel-bin/install/ rather than bazel-out//bin// @@ -378,7 +429,7 @@ def mongo_install_rule_impl(ctx): outputs = outputs, inputs = inputs, arguments = [ - ctx.attr._install_script.files.to_list()[0].path, + install_script.path, "--depfile=" + deps_file.path, "--install-dir=" + full_install_dir, ] + ["--depfile=" + str(dep[MongoInstallInfo].deps_files.to_list()[0].path) for dep in ctx.attr.deps], diff --git a/bazel/mongo_src_rules.bzl b/bazel/mongo_src_rules.bzl index 889f2b6e75d..edc270e935a 100644 --- a/bazel/mongo_src_rules.bzl +++ b/bazel/mongo_src_rules.bzl @@ -34,6 +34,8 @@ load("//bazel/config:generate_config_header.bzl", "generate_config_header") load("//bazel/auto_header:auto_header.bzl", "binary_srcs_with_all_headers", "build_selects_and_flat_files", "concat_selects", "dedupe_preserve_order", "maybe_all_headers", "maybe_compute_auto_headers", "strings_only") load("//bazel:test_exec_properties.bzl", "test_exec_properties") +COMPILEDB_TAG = "mongo_compiledb" + # These will throw an error if the following condition is not met: # (libunwind == on && os == linux) || libunwind == off || libunwind == auto LIBUNWIND_DEPS = select({ @@ -456,7 +458,7 @@ def mongo_cc_library( copts = copts, cxxopts = cxxopts, data = data, - tags = tags + ["mongo_library", "check_symbol_target"], + tags = tags + ["mongo_library", "check_symbol_target", COMPILEDB_TAG], linkopts = linkopts, linkstatic = select({ "@platforms//os:windows": True, @@ -708,7 +710,7 @@ def _mongo_cc_binary_and_test( # we dont want the intermediate build targets to be picked up by tags # so we empty it out original_tags = list(args["tags"]) - args["tags"] = ["intermediate_debug"] + [ + args["tags"] = ["intermediate_debug", COMPILEDB_TAG] + [ tag + "_debug" if # Transformations via `test_exec_properties` have already been applied at this point. # Need to leave cpu tags unchanged, since more parsing validation is done deeper in bazel. diff --git a/bazel/separate_debug.bzl b/bazel/separate_debug.bzl index 79f83996d18..1fa0eebd0ba 100644 --- a/bazel/separate_debug.bzl +++ b/bazel/separate_debug.bzl @@ -31,17 +31,20 @@ def get_inputs_and_outputs(ctx, shared_ext, static_ext, debug_ext): if len(input_files) == 0: return None, None, None, None if ctx.attr.type == "library": - for file in ctx.attr.binary_with_debug.files.to_list(): + shared_input = None + for file in input_files: if file.path.endswith(WITH_DEBUG_SUFFIX + static_ext): static_lib = file + elif file.path.endswith(WITH_DEBUG_SUFFIX + shared_ext): + shared_input = file if ctx.attr.cc_shared_library != None: for file in ctx.attr.cc_shared_library.files.to_list(): if file.path.endswith(WITH_DEBUG_SUFFIX + shared_ext): shared_lib = file - if file.path.endswith(WITH_DEBUG_SUFFIX + shared_ext) or shared_lib: - basename = file.basename[:-len(WITH_DEBUG_SUFFIX + shared_ext)] + if shared_input or shared_lib: + basename = shared_input.basename[:-len(WITH_DEBUG_SUFFIX + shared_ext)] if shared_input else "" if shared_lib: basename = shared_lib.basename[:-len(WITH_DEBUG_SUFFIX + shared_ext + CC_SHARED_LIBRARY_SUFFIX)] @@ -53,7 +56,7 @@ def get_inputs_and_outputs(ctx, shared_ext, static_ext, debug_ext): else: debug_info = None output_bin = ctx.actions.declare_file(basename + shared_ext) - input_bin = file + input_bin = shared_input if shared_lib: input_bin = shared_lib else: @@ -61,7 +64,7 @@ def get_inputs_and_outputs(ctx, shared_ext, static_ext, debug_ext): output_bin = None input_bin = None elif ctx.attr.type == "program": - program_bin = ctx.attr.binary_with_debug.files.to_list()[0] + program_bin = input_files[0] basename = program_bin.basename[:-len(WITH_DEBUG_SUFFIX)] if ctx.attr.enabled: @@ -157,6 +160,7 @@ def create_new_ccinfo_library(ctx, cc_toolchain, shared_lib, static_lib, cc_shar linker_input_deps.append(dep[CcInfo].linking_context.linker_inputs) if shared_lib or static_lib: + binary_linker_inputs = ctx.attr.binary_with_debug[CcInfo].linking_context.linker_inputs.to_list() if shared_lib: so_path = shared_lib.path.replace(ctx.bin_dir.path + "/", "") else: @@ -180,8 +184,8 @@ def create_new_ccinfo_library(ctx, cc_toolchain, shared_lib, static_lib, cc_shar # # This solution may break in the case where a base dependency contains only one positional argument, # but this should never happen since we will always inject at least one non positional argument globally. - cur_flags = ctx.attr.binary_with_debug[CcInfo].linking_context.linker_inputs.to_list()[0].user_link_flags - for dep in ctx.attr.binary_with_debug[CcInfo].linking_context.linker_inputs.to_list()[1:]: + cur_flags = list(binary_linker_inputs[0].user_link_flags) + for dep in binary_linker_inputs[1:]: for i in range(len(cur_flags)): dep_flags = dep.user_link_flags if dep_flags and cur_flags: @@ -237,7 +241,8 @@ def create_new_cc_shared_library_info(ctx, cc_toolchain, output_shared_lib, orig # cc_library's linkopts field for both static and dynamic transitive link opts # cc_shared_library's user_link_flags field for dynamic non-transitive link opts all_user_link_flags = dict() - for input in ctx.attr.binary_with_debug[CcInfo].linking_context.linker_inputs.to_list(): + binary_linker_inputs = ctx.attr.binary_with_debug[CcInfo].linking_context.linker_inputs.to_list() + for input in binary_linker_inputs: for flag in input.user_link_flags: all_user_link_flags[flag] = True @@ -350,14 +355,15 @@ def linux_extraction(ctx, cc_toolchain, inputs): # build-without-the-bytes enabled, these aren't downloaded. Manually collect them and add them to the # output set. dynamic_deps_runfiles = ctx.runfiles(files = []) + transitive_debug_files = get_transitive_debug_files(ctx.attr.deps) if ctx.attr.type == "program": dynamic_deps = get_transitive_dyn_libs(ctx.attr.deps) - dynamic_deps_runfiles = ctx.attr.binary_with_debug[DefaultInfo].data_runfiles.merge(ctx.runfiles(files = get_transitive_dyn_libs(ctx.attr.deps))) + dynamic_deps_runfiles = ctx.attr.binary_with_debug[DefaultInfo].data_runfiles.merge(ctx.runfiles(files = dynamic_deps)) outputs.extend(dynamic_deps) provided_info = [ DefaultInfo( - files = depset(outputs, transitive = [depset(get_transitive_debug_files(ctx.attr.deps))]), + files = depset(outputs, transitive = [depset(transitive_debug_files)]), runfiles = dynamic_deps_runfiles, executable = output_bin if ctx.attr.type == "program" else None, ), @@ -437,7 +443,7 @@ def macos_extraction(ctx, cc_toolchain, inputs): dynamic_deps_runfiles = ctx.runfiles(files = []) if ctx.attr.type == "program": dynamic_deps = get_transitive_dyn_libs(ctx.attr.deps) - dynamic_deps_runfiles = ctx.attr.binary_with_debug[DefaultInfo].data_runfiles.merge(ctx.runfiles(files = get_transitive_dyn_libs(ctx.attr.deps))) + dynamic_deps_runfiles = ctx.attr.binary_with_debug[DefaultInfo].data_runfiles.merge(ctx.runfiles(files = dynamic_deps)) outputs.extend(dynamic_deps) provided_info = [ @@ -483,10 +489,10 @@ def windows_extraction(ctx, cc_toolchain, inputs): output_dynamic_library = None if len(input_file): - basename = ctx.attr.binary_with_debug.files.to_list()[0].basename[:-len(WITH_DEBUG_SUFFIX + ext)] + basename = input_file[0].basename[:-len(WITH_DEBUG_SUFFIX + ext)] output = ctx.actions.declare_file(basename + ext) - for input in ctx.attr.binary_with_debug.files.to_list(): + for input in input_file: ext = "." + input.extension basename = input.basename[:-len(WITH_DEBUG_SUFFIX + ext)] @@ -516,6 +522,7 @@ def windows_extraction(ctx, cc_toolchain, inputs): ) if pdb: + pdb_files = pdb.to_list() if ctx.attr.cc_shared_library != None: basename = input.basename[:-len(WITH_DEBUG_SUFFIX + ".pdb")] pdb_output = ctx.actions.declare_file(basename + ".dll.pdb") @@ -526,7 +533,7 @@ def windows_extraction(ctx, cc_toolchain, inputs): ctx.actions.symlink( output = pdb_output, - target_file = pdb.to_list()[0], + target_file = pdb_files[0], ) if ctx.attr.shared_archive: diff --git a/bazel/wrapper_hook/autogenerated_targets.py b/bazel/wrapper_hook/autogenerated_targets.py index cc05d217e6f..41343db6eb4 100644 --- a/bazel/wrapper_hook/autogenerated_targets.py +++ b/bazel/wrapper_hook/autogenerated_targets.py @@ -60,18 +60,29 @@ def get_bazel_labels_from_tags(args, bazel, tag): extra_args = [arg for arg in args if arg.startswith("--")] # The .cquery file is used to get info on which targets are compatible # with our current config. Without it dependent targets would just be skipped. + query_args = [ + f"kind(extract_debuginfo_test, attr(tags, '\\b{tag}\\b', //src/...))", + "--output=starlark", + "--starlark:file=bazel/wrapper_hook/target_compatable.cquery", + ] proc = subprocess.run( - [bazel, "cquery", "--config=local"] - + extra_args - + [ - f"kind(extract_debuginfo_test, attr(tags, '\\b{tag}\\b', //src/...))", - "--output=starlark", - "--starlark:file=bazel/wrapper_hook/target_compatable.cquery", - ], + [bazel, "cquery", "--remote_executor="] + extra_args + query_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, ) + if proc.returncode != 0: + print( + "WARNING: Autogenerated query failed with remote cache/downloader enabled; " + "retrying with `--config=local`.", + file=sys.stderr, + ) + proc = subprocess.run( + [bazel, "cquery", "--config=local"] + extra_args + query_args, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) if proc.returncode != 0: print("ERROR: Autogenerated query failed:") print(proc.stderr) diff --git a/bazel/wrapper_hook/compiledb.py b/bazel/wrapper_hook/compiledb.py index 974239a1009..63b32150b10 100644 --- a/bazel/wrapper_hook/compiledb.py +++ b/bazel/wrapper_hook/compiledb.py @@ -1,5 +1,4 @@ import errno -import fileinput import json import os import pathlib @@ -8,11 +7,39 @@ import re import shutil import subprocess import sys +import tempfile +import time REPO_ROOT = pathlib.Path(__file__).parent.parent.parent sys.path.append(str(REPO_ROOT)) +from bazel.wrapper_hook.compiledb_postprocess import ( + compile_command_sort_key, + load_compile_command_fragments, + load_compile_command_fragments_from_paths, + write_compile_commands, +) from bazel.wrapper_hook.write_wrapper_hook_bazelrc import write_wrapper_hook_bazelrc +from buildscripts.setup_clang_tidy import PLUGIN_CANDIDATES, materialize_clang_tidy_ide_files + +COMPILEDB_START_TIME = time.monotonic() +COMPILEDB_POSTHOOK_STATE = REPO_ROOT / ".compiledb" / "posthook_state.json" +COMPILEDB_BUILD_TAG_FILTERS = "--build_tag_filters=mongo_compiledb" +COMPILEDB_REQUIRED_OUTPUT_REGEX = ( + r".*(_virtual_includes|_virtual_imports)/.*" + r"|.*\.(compile_command\.json|h|hh|hpp|hxx|inc|ipp|c|cc|cpp|cxx)$" +) +WITH_DEBUG_SUFFIX = "_with_debug" +SETUP_CLANG_TIDY_BUILD_TARGETS = [ + "//:setup_clang_tidy", + "//:clang_tidy_config", + "//src/mongo/tools/mongo_tidy_checks:mongo_tidy_checks", +] +_WINDOWS_SYMLINKS_AVAILABLE = None + + +def _should_passthrough_target_name(target_name): + return target_name.startswith(("install-", "archive-")) def run_pty_command(cmd): @@ -35,27 +62,553 @@ def run_pty_command(cmd): else: if not data: # EOF break - stdout += data.decode() + stdout += data.decode(errors="replace") + returncode = proc.wait() except ModuleNotFoundError: proc = subprocess.run( cmd, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + stdout = proc.stdout + returncode = proc.returncode + if returncode != 0: + raise RuntimeError( + f"Command failed (rc={returncode}): {' '.join(cmd)}\n" + f"--- stdout ---\n{proc.stdout}\n" + f"--- stderr ---\n{proc.stderr}" + ) + if returncode != 0: + raise RuntimeError( + f"Command failed (rc={returncode}): {' '.join(cmd)}\n" f"--- output ---\n{stdout}" ) - stdout = proc.stdout.decode() return stdout -def generate_compiledb(bazel_bin, persistent_compdb, enterprise, atlas): - # compiledb ignores command line args so just make a version rc file in anycase +def _format_elapsed(reference_time): + elapsed = time.monotonic() - reference_time + if elapsed < 60: + return f"{elapsed:.1f}s" + + minutes, seconds = divmod(elapsed, 60) + if minutes < 60: + return f"{int(minutes)}m {seconds:.1f}s" + + hours, minutes = divmod(minutes, 60) + return f"{int(hours)}h {int(minutes)}m {seconds:.1f}s" + + +def _log_progress(message): + line = f"[compiledb +{_format_elapsed(COMPILEDB_START_TIME)}] {message}" + for stream in [sys.stdout, sys.stderr, sys.__stderr__]: + if not stream: + continue + try: + print(line, file=stream, flush=True) + return + except (ValueError, OSError): + continue + + +def clear_compiledb_posthook_state(): + try: + os.remove(COMPILEDB_POSTHOOK_STATE) + except OSError: + pass + + +def _compiledb_build_settings(enterprise, atlas, log_default=False): + compiledb_bazelrc = [] + compiledb_config = [COMPILEDB_BUILD_TAG_FILTERS] + if (REPO_ROOT / ".bazelrc.compiledb").exists(): + compiledb_bazelrc = ["--bazelrc=.bazelrc", "--bazelrc=.bazelrc.compiledb"] + else: + if log_default: + _log_progress( + "No '.bazelrc.compiledb' found; using the Bazel invocation config for compiledb." + ) + + if not enterprise: + compiledb_config.append("--build_enterprise=False") + + if not atlas: + compiledb_config.append("--build_atlas=False") + + return compiledb_bazelrc, compiledb_config + + +def _resolve_compiledb_output_base(bazel_bin, persistent_compdb, startup_args=None): + info_proc = subprocess.run( + [bazel_bin] + list(startup_args or []) + ["info", "output_base"], + capture_output=True, + text=True, + ) + if info_proc.returncode != 0: + raise RuntimeError( + f"Failed to query bazel output_base: rc={info_proc.returncode}\n" + f"--- stdout ---\n{info_proc.stdout}\n" + f"--- stderr ---\n{info_proc.stderr}" + ) + + symlink_prefix = None + if persistent_compdb: + output_base = pathlib.Path(info_proc.stdout.strip() + "_bazel_compiledb") + os.makedirs(REPO_ROOT / ".compiledb", exist_ok=True) + symlink_prefix = REPO_ROOT / ".compiledb" / "compiledb-" + else: + output_base = pathlib.Path(info_proc.stdout.strip()) + return output_base, symlink_prefix + + +def _compiledb_build_target(target): + if target.endswith(WITH_DEBUG_SUFFIX): + return target + if "..." in target or "*" in target: + return target + if target.startswith("-"): + return target + if target.startswith("//") and ":" in target: + package, name = target.rsplit(":", 1) + if _should_passthrough_target_name(name): + return target + return f"{package}:{name}{WITH_DEBUG_SUFFIX}" + if target.startswith(":"): + if _should_passthrough_target_name(target[1:]): + return target + return target + WITH_DEBUG_SUFFIX + if "/" not in target and ":" not in target: + if _should_passthrough_target_name(target): + return target + return target + WITH_DEBUG_SUFFIX + return target + + +def _resolve_compiledb_targets(target_scope_override=None, requested_targets=None): + default_target_scope = "//src/..." + if requested_targets: + build_targets = [_compiledb_build_target(target) for target in requested_targets] + else: + scope = target_scope_override or os.environ.get( + "MONGO_COMPILEDB_TARGET_SCOPE", default_target_scope + ) + build_targets = [_compiledb_build_target(scope)] + + if build_targets != [default_target_scope]: + _log_progress(f"Using compiledb target scope: {' '.join(build_targets)}") + + if len(build_targets) == 1: + target_scope_expr = build_targets[0] + else: + target_scope_expr = "set(" + " ".join(build_targets) + ")" + + return default_target_scope, build_targets, target_scope_expr + + +def _resolve_compiledb_flags(compiledb_config, requested_build_flags=None): + build_flags = list(requested_build_flags or []) + + for default_flag in compiledb_config: + if default_flag.startswith("--config="): + if default_flag not in build_flags: + build_flags.append(default_flag) + elif default_flag.startswith("--build_enterprise="): + if not any(arg.startswith("--build_enterprise=") for arg in build_flags): + build_flags.append(default_flag) + elif default_flag.startswith("--build_atlas="): + if not any(arg.startswith("--build_atlas=") for arg in build_flags): + build_flags.append(default_flag) + elif default_flag not in build_flags: + build_flags.append(default_flag) + + if not any(arg in ("--config=compiledb", "--config=compiledb-aspect") for arg in build_flags): + build_flags.append("--config=compiledb") + + build_flags = [arg for arg in build_flags if not arg.startswith("--remote_download_regex=")] + build_flags.append(f"--remote_download_regex={COMPILEDB_REQUIRED_OUTPUT_REGEX}") + + return build_flags + + +_EMBEDDED_ARG_OPTIONS = ( + "-include", + "-imacros", + "-include-pch", + "-iquote", + "-isystem", + "-idirafter", + "-iprefix", + "-iwithprefix", + "-iwithprefixbefore", + "-isysroot", + "-iframework", + "-iframeworkwithsysroot", + "--sysroot", + "-Xclang", + "-mllvm", + "-target", + "--target", + "--gcc-toolchain", + "-MF", + "-MT", + "-MQ", + "-o", + "-x", +) + + +def _split_embedded_arg_options(args): + normalized = [] + for arg in args: + split = False + for option in _EMBEDDED_ARG_OPTIONS: + prefix = option + " " + if arg.startswith(prefix): + normalized.extend([option, arg[len(prefix) :]]) + split = True + break + if not split: + normalized.append(arg) + return normalized + + +def _build_final_compile_command_entry( + entry, arguments, repo_root_resolved, rewrite_exec_path, out_root_str, external_root_str +): + compiledb_entry = { + "file": rewrite_exec_path(entry["file"], out_root_str, external_root_str), + "arguments": arguments, + "directory": repo_root_resolved, + } + output_file = entry.get("output") + if output_file: + compiledb_entry["output"] = rewrite_exec_path(output_file, out_root_str, external_root_str) + return compiledb_entry + + +def prepare_compiledb_posthook_args( + bazel_bin, + startup_args, + command, + build_flags, + build_targets, + persistent_compdb, + enterprise, + atlas, + compiledb_targets=None, + extra_build_targets=None, + setup_clang_tidy=False, +): + startup_args = list(startup_args) + compiledb_targets = list(compiledb_targets or build_targets) + extra_build_targets = list(extra_build_targets or []) + owns_buildevents_path = False + existing_output_base = next( + (arg.split("=", 1)[1] for arg in startup_args if arg.startswith("--output_base=")), + None, + ) + existing_symlink_prefix = next( + (arg.split("=", 1)[1] for arg in build_flags if arg.startswith("--symlink_prefix=")), + None, + ) + + if existing_output_base: + output_base = pathlib.Path(existing_output_base) + symlink_prefix = pathlib.Path(existing_symlink_prefix) if existing_symlink_prefix else None + else: + output_base, symlink_prefix = _resolve_compiledb_output_base( + bazel_bin, + persistent_compdb, + startup_args=startup_args, + ) + if existing_symlink_prefix: + symlink_prefix = pathlib.Path(existing_symlink_prefix) + + _, compiledb_config = _compiledb_build_settings(enterprise, atlas, log_default=True) + build_flags = _resolve_compiledb_flags(compiledb_config, requested_build_flags=build_flags) + + if persistent_compdb and not any(arg.startswith("--output_base=") for arg in startup_args): + startup_args.append(f"--output_base={output_base}") + + if ( + REPO_ROOT / ".bazelrc.compiledb" + ).exists() and "--bazelrc=.bazelrc.compiledb" not in startup_args: + startup_args.append("--bazelrc=.bazelrc.compiledb") + + if persistent_compdb and not any(arg.startswith("--symlink_prefix=") for arg in build_flags): + build_flags.append(f"--symlink_prefix={symlink_prefix}") + + buildevents_path = None + for arg in build_flags: + if arg.startswith("--build_event_json_file="): + buildevents_path = arg.split("=", 1)[1] + break + if not buildevents_path: + with tempfile.NamedTemporaryFile(delete=False) as buildevents: + buildevents_path = buildevents.name + owns_buildevents_path = True + build_flags.append(f"--build_event_json_file={buildevents_path}") + + os.makedirs(COMPILEDB_POSTHOOK_STATE.parent, exist_ok=True) + with open(COMPILEDB_POSTHOOK_STATE, "w", encoding="utf-8") as state_file: + json.dump( + { + "start_time": time.monotonic(), + "persistent_compdb": persistent_compdb, + "output_base": str(output_base), + "symlink_prefix": str(symlink_prefix) if symlink_prefix else None, + "build_flags": build_flags, + "build_targets": compiledb_targets, + "requested_targets": compiledb_targets, + "setup_clang_tidy": setup_clang_tidy, + "buildevents_path": buildevents_path, + "delete_buildevents": owns_buildevents_path, + }, + state_file, + ) + + return startup_args + [command] + build_flags + build_targets + extra_build_targets + + +def _artifact_exec_path(artifact, path_fragment_map): + exec_path = artifact.get("execPath") + if exec_path: + return exec_path + + path_fragment_id = artifact.get("pathFragmentId") + if not path_fragment_id: + return None + + labels = [] + while path_fragment_id: + fragment = path_fragment_map.get(path_fragment_id) + if not fragment: + return None + labels.append(fragment["label"]) + path_fragment_id = fragment.get("parentId") + + labels.reverse() + return "/".join(labels) + + +def _artifact_exec_path_by_id( + artifact_id, artifact_map, path_fragment_map, artifact_exec_path_cache +): + if artifact_id in artifact_exec_path_cache: + return artifact_exec_path_cache[artifact_id] + + artifact = artifact_map.get(artifact_id) + if not artifact: + artifact_exec_path_cache[artifact_id] = None + return None + + exec_path = _artifact_exec_path(artifact, path_fragment_map) + artifact_exec_path_cache[artifact_id] = exec_path + return exec_path + + +def _remove_existing_path(path): + """Remove files, symlinks, and Windows junction-style directory entries safely.""" + try: + path.unlink() + return + except FileNotFoundError: + return + except IsADirectoryError: + pass + except PermissionError: + # Windows directory symlinks/junctions can land here instead of IsADirectoryError. + pass + + if not os.path.lexists(path): + return + + try: + os.rmdir(path) + return + except OSError: + pass + + shutil.rmtree(path) + + +def _windows_symlinks_available(): + global _WINDOWS_SYMLINKS_AVAILABLE + + if os.name != "nt": + return True + if _WINDOWS_SYMLINKS_AVAILABLE is not None: + return _WINDOWS_SYMLINKS_AVAILABLE + + probe_root = pathlib.Path(tempfile.mkdtemp(prefix="compiledb-symlink-probe-")) + probe_target = probe_root / "target" + probe_link = probe_root / "link" + probe_target.mkdir() + + try: + os.symlink(probe_target.name, probe_link, target_is_directory=True) + _WINDOWS_SYMLINKS_AVAILABLE = True + except (NotImplementedError, OSError): + _WINDOWS_SYMLINKS_AVAILABLE = False + finally: + _remove_existing_path(probe_link) + shutil.rmtree(probe_root, ignore_errors=True) + + return _WINDOWS_SYMLINKS_AVAILABLE + + +def _copy_path(src, dst): + if src.is_dir(): + shutil.copytree(src, dst, dirs_exist_ok=True) + else: + dst.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(src, dst) + + +def materialize_execroot_external_symlinks(output_base): + external_root = output_base / "external" + execroot_external = output_base / "execroot" / "_main" / "external" + + if not external_root.exists(): + return + + step_start = time.monotonic() + execroot_external.mkdir(parents=True, exist_ok=True) + created = 0 + updated = 0 + use_symlinks = _windows_symlinks_available() + if not use_symlinks: + _log_progress( + "Symlink creation is unavailable; copying external repos into the compiledb execroot." + ) + + for repo in external_root.iterdir(): + link = execroot_external / repo.name + link_target = os.path.relpath(repo, execroot_external) + + if os.path.lexists(link): + if use_symlinks and link.is_symlink() and os.readlink(link) == link_target: + continue + if not use_symlinks and not link.is_symlink(): + _copy_path(repo, link) + continue + _remove_existing_path(link) + updated += 1 + else: + created += 1 + + if use_symlinks: + os.symlink(link_target, link, target_is_directory=repo.is_dir()) + else: + _copy_path(repo, link) + + _log_progress( + "Materialized execroot external repo symlinks " + f"in {_format_elapsed(step_start)}: created={created} updated={updated}" + ) + + +def _exec_path_to_abs(output_base, path): + path_obj = pathlib.Path(path) + if path.startswith("external/"): + return (output_base / "external" / path_obj.relative_to("external")).resolve(strict=False) + return (output_base / "execroot" / "_main" / path_obj).resolve(strict=False) + + +def _collect_aspect_fragment_paths( + bazel_bin, + persistent_compdb, + output_base, + symlink_prefix, + compiledb_bazelrc, + compiledb_config, + target_scope_expr, +): + query_cmd = ( + [bazel_bin] + + ([f"--output_base={output_base}"] if persistent_compdb else []) + + compiledb_bazelrc + + ["aquery"] + + ([f"--symlink_prefix={symlink_prefix}"] if persistent_compdb else []) + + compiledb_config + + [ + "--bes_backend=", + "--bes_results_url=", + "--include_artifacts", + f"deps({target_scope_expr})", + "--output=jsonproto", + ] + ) + data = json.loads(run_pty_command(query_cmd)) + path_fragment_map = {fragment["id"]: fragment for fragment in data.get("pathFragments", [])} + artifact_map = {artifact["id"]: artifact for artifact in data.get("artifacts", [])} + artifact_exec_path_cache = {} + + fragment_paths = set() + for action in data.get("actions", []): + for artifact_id in action.get("outputIds", []): + output_path = _artifact_exec_path_by_id( + artifact_id, + artifact_map, + path_fragment_map, + artifact_exec_path_cache, + ) + if output_path and output_path.endswith(".compile_command.json"): + fragment_paths.add(str(_exec_path_to_abs(output_base, output_path))) + return sorted(fragment_paths) + + +def _generate_compiledb_via_aspect( + bazel_bin, + persistent_compdb, + enterprise, + atlas, + target_scope_override=None, + requested_build_flags=None, + requested_targets=None, + extra_build_targets=None, + setup_clang_tidy=False, + startup_args=None, + prepared_output_base=None, + prepared_symlink_prefix=None, + prepared_buildevents_path=None, + delete_buildevents=True, + skip_build=False, +): write_wrapper_hook_bazelrc([]) + def rewrite_exec_path(path, out_root_str, external_root_str): + if not path: + return path + if path.startswith("bazel-out/"): + return out_root_str + "/" + path[len("bazel-out/") :] + if path.startswith("external/"): + return external_root_str + "/" + path[len("external/") :] + return path + def rewrite_args(args, out_root_str, external_root_str): + def rewrite_arg_path(arg): + if out_root_str and arg.startswith("bazel-out/"): + return out_root_str + "/" + arg[len("bazel-out/") :] + if external_root_str and arg.startswith("external/"): + return external_root_str + "/" + arg[len("external/") :] + + # Some toolchain flags embed execroot-relative paths as option=value, e.g. + # "-fprofile-use=external/.../clang_pgo.profdata". + if "=" in arg: + prefix, value = arg.split("=", 1) + rewritten_value = rewrite_arg_path(value) + if rewritten_value != value: + return f"{prefix}={rewritten_value}" + + return arg + + args = _split_embedded_arg_options(args) rewritten = [] for arg in args: - if out_root_str and arg.startswith("bazel-out/"): - arg = out_root_str + "/" + arg[len("bazel-out/") :] - elif external_root_str and arg.startswith("external/"): - arg = external_root_str + "/" + arg[len("external/") :] + rewritten_arg = rewrite_arg_path(arg) + if rewritten_arg != arg: + arg = rewritten_arg else: # Preserve compiler prefixes while rewriting paths. m = re.match(r"^(/external:I)(bazel-out|external)/(.*)$", arg) @@ -85,208 +638,269 @@ def generate_compiledb(bazel_bin, persistent_compdb, enterprise, atlas): rewritten.append(arg) return rewritten - info_proc = subprocess.run([bazel_bin, "info", "output_base"], capture_output=True, text=True) - if persistent_compdb: - output_base = pathlib.Path(info_proc.stdout.strip() + "_bazel_compiledb") - os.makedirs(REPO_ROOT / ".compiledb", exist_ok=True) - symlink_prefix = REPO_ROOT / ".compiledb" / "compiledb-" - else: - output_base = pathlib.Path(info_proc.stdout.strip()) + def with_librdkafka_config_header(args, input_file, out_root_str): + if "src/third_party/private/librdkafka/dist/src/" not in input_file and ( + "src/third_party/private/librdkafka/dist/src-cpp/" not in input_file + ): + return args + + out_bin_root = None + for arg in args: + if "/bazel-out/" in arg and arg.endswith("/bin"): + out_bin_root = pathlib.Path(arg) + break + + if out_bin_root: + config_header = ( + out_bin_root + / "src" + / "third_party" + / "private" + / "librdkafka" + / "dist" + / "FAKE" + / "config.h" + ) + else: + config_header = ( + pathlib.Path(out_root_str) + / "src" + / "third_party" + / "private" + / "librdkafka" + / "dist" + / "FAKE" + / "config.h" + ) + config_header_str = config_header.as_posix() + if any(arg == config_header_str for arg in args): + return args + + rewritten = list(args) + try: + c_index = next(i for i, arg in enumerate(rewritten) if arg in ("-c", "/c")) + rewritten[c_index:c_index] = ["-include", config_header_str] + except StopIteration: + rewritten.extend(["-include", config_header_str]) + return rewritten + + def setup_clang_tidy_from_built_outputs(): + candidate_bin_dirs = [] + if persistent_compdb and symlink_prefix: + candidate_bin_dirs.append(pathlib.Path(f"{symlink_prefix}bin")) + candidate_bin_dirs.append(REPO_ROOT / "bazel-bin") + + config_src = None + plugin_src = None + for bin_dir in candidate_bin_dirs: + config_candidate = bin_dir / ".clang-tidy" + plugin_dir = bin_dir / "src" / "mongo" / "tools" / "mongo_tidy_checks" + plugin_candidate = next( + ( + plugin_dir / candidate + for candidate in PLUGIN_CANDIDATES + if (plugin_dir / candidate).exists() + ), + None, + ) + if config_candidate.exists() and plugin_candidate: + config_src = config_candidate + plugin_src = plugin_candidate + break + + if not config_src: + _log_progress( + "Skipping clang-tidy IDE setup because the .clang-tidy output is unavailable." + ) + return + if not plugin_src: + _log_progress( + "Skipping clang-tidy IDE setup because the mongo_tidy_checks plugin output is unavailable." + ) + return + + materialize_clang_tidy_ide_files(REPO_ROOT, config_src, plugin_src) + _log_progress("Set up clang-tidy IDE integration files.") + + output_base = pathlib.Path(prepared_output_base) if prepared_output_base else None + symlink_prefix = pathlib.Path(prepared_symlink_prefix) if prepared_symlink_prefix else None + if output_base is None: + output_base, symlink_prefix = _resolve_compiledb_output_base( + bazel_bin, + persistent_compdb, + startup_args=startup_args, + ) - # Prefer real paths in compile_commands.json (avoid symlink forest paths like - # ".compiledb/compiledb-out/..."). Resolve via output_base to avoid symlink prefix. real_out_root = pathlib.Path(os.path.realpath(output_base / "execroot" / "_main" / "bazel-out")) real_external_root = pathlib.Path(os.path.realpath(output_base / "external")) - # Use forward slashes consistently in compile_commands.json across platforms. real_out_root_str = real_out_root.as_posix() real_external_root_str = real_external_root.as_posix() - compiledb_bazelrc = [] - compiledb_config = [] - if (REPO_ROOT / ".bazelrc.compiledb").exists(): - compiledb_bazelrc = ["--bazelrc=.bazelrc", "--bazelrc=.bazelrc.compiledb"] + compiledb_bazelrc, compiledb_config = _compiledb_build_settings( + enterprise, + atlas, + log_default=not skip_build, + ) + default_target_scope, build_targets, target_scope_expr = _resolve_compiledb_targets( + target_scope_override=target_scope_override, + requested_targets=requested_targets, + ) + extra_build_targets = list(extra_build_targets or []) + build_flags = _resolve_compiledb_flags( + compiledb_config, + requested_build_flags=requested_build_flags, + ) + if prepared_buildevents_path: + buildevents_path = prepared_buildevents_path else: - print( - "Using default compiledb config, create a '.bazelrc.compiledb' file to customize the compiledb config..." + with tempfile.NamedTemporaryFile(delete=False) as buildevents: + buildevents_path = buildevents.name + + try: + if not skip_build: + build_start = time.monotonic() + _log_progress("Generating compiledb command fragments via aspect...") + build_cmd = ( + [bazel_bin] + + ([f"--output_base={output_base}"] if persistent_compdb else []) + + compiledb_bazelrc + + ["build"] + + ([f"--symlink_prefix={symlink_prefix}"] if persistent_compdb else []) + + build_flags + + [ + f"--build_event_json_file={buildevents_path}", + ] + + build_targets + + extra_build_targets + ) + run_pty_command(build_cmd) + _log_progress( + "Generated compiledb command fragments via aspect " + f"in {_format_elapsed(build_start)}" + ) + + materialize_execroot_external_symlinks(output_base) + + load_start = time.monotonic() + raw_entries = load_compile_command_fragments( + buildevents_path, + output_base=output_base, ) - compiledb_config = ["--config=dbg"] - - if not enterprise: - compiledb_config.append("--build_enterprise=False") - - if not atlas: - compiledb_config.append("--build_atlas=False") - - query_cmd = ( - [bazel_bin] - + ([f"--output_base={output_base}"] if persistent_compdb else []) - + compiledb_bazelrc - + ["aquery"] - + ([f"--symlink_prefix={symlink_prefix}"] if persistent_compdb else []) - + compiledb_config - + [ - "--bes_backend=", - "--bes_results_url=", - "--noinclude_artifacts", - 'mnemonic("CppCompile|LinkCompile", //src/...)', - "--output=jsonproto", - ] - ) - - first_time = "" - if persistent_compdb and not output_base.exists(): - first_time = " (the first time takes longer)" - - print(f"Generating compiledb command lines via aquery{first_time}...") - stdout = run_pty_command(query_cmd) - data = json.loads(stdout) - - output_json = [] - repo_root_resolved = str(REPO_ROOT.resolve()) - - for action in data["actions"]: - input_file = None - output_file = None - prev_arg = None - for arg in reversed(action["arguments"]): - if not input_file: - if arg == "-c" or arg == "/c": - input_file = prev_arg - elif arg.startswith("/c"): - input_file = arg[2:] - if not output_file: - if arg == "-o" or arg == "/Fo": - output_file = prev_arg - elif arg.startswith("/Fo"): - output_file = arg[3:] - if input_file and output_file: - break - prev_arg = arg - - if not input_file: - raise Exception( - f"failed to parse '-c' or '/c' from command line:{os.linesep}{' '.join(action['arguments'])}" + if not raw_entries: + fragment_paths = _collect_aspect_fragment_paths( + bazel_bin=bazel_bin, + persistent_compdb=persistent_compdb, + output_base=output_base, + symlink_prefix=symlink_prefix if persistent_compdb else None, + compiledb_bazelrc=compiledb_bazelrc, + compiledb_config=build_flags, + target_scope_expr=target_scope_expr, ) - - if not output_file: - raise Exception( - f"failed to parse '-o' or '/Fo' from command line:{os.linesep}{' '.join(action['arguments'])}" + raw_entries = load_compile_command_fragments_from_paths(fragment_paths) + if not raw_entries: + raise RuntimeError( + "No compile command fragments were produced by the compiledb aspect." ) - - if persistent_compdb: - # We need to adjust the args so actions can be runnable locally - args = rewrite_args(action["arguments"], real_out_root_str, real_external_root_str) - - output_json.append( - { - "file": input_file.replace("bazel-out", real_out_root_str), - "arguments": args, - "directory": repo_root_resolved, - "output": output_file.replace("bazel-out", real_out_root_str), - } - ) - else: - output_json.append( - { - "file": input_file, - "arguments": rewrite_args( - action["arguments"], real_out_root_str, real_external_root_str - ), - "directory": repo_root_resolved, - "output": output_file, - } - ) - - json_str = json.dumps(output_json, indent=4) - compile_commands_json = REPO_ROOT / "compile_commands.json" - need_rewrite = True - if compile_commands_json.exists(): - with open(compile_commands_json, "r") as f: - need_rewrite = json_str != f.read() - - if need_rewrite: - with open(compile_commands_json, "w") as f: - f.write(json_str) - - if not persistent_compdb: - external_link = REPO_ROOT / "external" - if external_link.exists(): - os.unlink(external_link) - os.symlink( - pathlib.Path(os.readlink(REPO_ROOT / "bazel-out")).parent.parent.parent / "external", - external_link, - target_is_directory=True, + _log_progress( + "Loaded compiledb command fragments " + f"in {_format_elapsed(load_start)}: {len(raw_entries)} fragment(s)" ) - print("Generating sources for compiledb...") - gen_source_cmd = ( - [bazel_bin] - + ([f"--output_base={output_base}"] if persistent_compdb else []) - + compiledb_bazelrc - + ["build"] - + ([f"--symlink_prefix={symlink_prefix}"] if persistent_compdb else []) - + compiledb_config - + [ - f"--build_tag_filters=gen_source{',mongo-tidy-checks' if platform.system() != 'Windows' else ''}", - "//src/...", - ] - + (["//:clang_tidy_config"] if platform.system() != "Windows" else []) - + (["//:clang_tidy_config_strict"] if platform.system() != "Windows" else []) - ) - run_pty_command(gen_source_cmd) - - if platform.system() != "Windows": - clang_tidy_file = pathlib.Path(REPO_ROOT) / ".clang-tidy" - - if persistent_compdb: - configs = [ - pathlib.Path(f"{symlink_prefix}bin") / config - for config in [".clang-tidy.strict", ".clang-tidy"] - ] - for config in configs: - os.chmod(config, 0o744) - with fileinput.FileInput(config, inplace=True) as file: - for line in file: - print(line.replace("bazel-out/", f"{symlink_prefix}out/"), end="") - shutil.copyfile(configs[1], clang_tidy_file) - with open(".mongo_checks_module_path", "w") as f: - f.write( - os.path.join( - f"{symlink_prefix}bin", - "src", - "mongo", - "tools", - "mongo_tidy_checks", - "libmongo_tidy_checks.so", - ) + repo_root_resolved = str(REPO_ROOT.resolve()) + output_json = [] + for entry in raw_entries: + input_file = entry["file"] + args = rewrite_args(entry["arguments"], real_out_root_str, real_external_root_str) + args = with_librdkafka_config_header(args, input_file, real_out_root_str) + output_json.append( + _build_final_compile_command_entry( + entry, + args, + repo_root_resolved, + rewrite_exec_path, + real_out_root_str, + real_external_root_str, ) - else: - shutil.copyfile(pathlib.Path("bazel-bin") / ".clang-tidy", clang_tidy_file) + ) - if platform.system() == "Linux": - # TODO: SERVER-110144 optimize this to only generate the extensions source code - # instead of build the extension target entirely. - gen_source_cmd = ( - [bazel_bin] - + ([f"--output_base={output_base}"] if persistent_compdb else []) - + compiledb_bazelrc - + ["build"] - + ([f"--symlink_prefix={symlink_prefix}"] if persistent_compdb else []) - + compiledb_config - + [ - "//src/mongo/db/extension/test_examples:dist_test_extensions", - ] - ) - run_pty_command(gen_source_cmd) + output_json.sort(key=compile_command_sort_key) + write_compile_commands(output_json, REPO_ROOT / "compile_commands.json") + if setup_clang_tidy: + setup_clang_tidy_from_built_outputs() + finally: + if delete_buildevents: + try: + os.remove(buildevents_path) + except OSError: + pass if persistent_compdb: shutdown_proc = subprocess.run( [bazel_bin, f"--output_base={output_base}", "shutdown"], capture_output=True, text=True ) if shutdown_proc.returncode != 0: - print(f"Failed to shutdown compiledb output_base: {shutdown_proc.returncode}") - print("--- stdout ---:") + _log_progress(f"Failed to shutdown compiledb output_base: {shutdown_proc.returncode}") + _log_progress("--- stdout ---") print(shutdown_proc.stdout) - print("--- stderr ---:") + _log_progress("--- stderr ---") print(shutdown_proc.stderr) - print("compiledb target done, finishing any other targets...") + + _log_progress("compiledb target done, finishing any other targets...") + + +def finalize_compiledb_posthook(bazel_bin, enterprise, atlas): + global COMPILEDB_START_TIME + + if platform.system() == "Windows": + return + if not COMPILEDB_POSTHOOK_STATE.exists(): + return + + with open(COMPILEDB_POSTHOOK_STATE, "r", encoding="utf-8") as state_file: + state = json.load(state_file) + + COMPILEDB_START_TIME = state.get("start_time", COMPILEDB_START_TIME) + + try: + _generate_compiledb_via_aspect( + bazel_bin=bazel_bin, + persistent_compdb=state["persistent_compdb"], + enterprise=enterprise, + atlas=atlas, + requested_build_flags=state["build_flags"], + requested_targets=state.get("requested_targets", state["build_targets"]), + setup_clang_tidy=state.get("setup_clang_tidy", False), + prepared_output_base=state["output_base"], + prepared_symlink_prefix=state.get("symlink_prefix"), + prepared_buildevents_path=state["buildevents_path"], + delete_buildevents=state.get("delete_buildevents", False), + skip_build=True, + ) + finally: + clear_compiledb_posthook_state() + + +def generate_compiledb( + bazel_bin, + persistent_compdb, + enterprise, + atlas, + target_scope_override=None, + requested_build_flags=None, + requested_targets=None, + extra_build_targets=None, + setup_clang_tidy=False, + startup_args=None, +): + return _generate_compiledb_via_aspect( + bazel_bin=bazel_bin, + persistent_compdb=persistent_compdb, + enterprise=enterprise, + atlas=atlas, + target_scope_override=target_scope_override, + requested_build_flags=requested_build_flags, + requested_targets=requested_targets, + extra_build_targets=extra_build_targets, + setup_clang_tidy=setup_clang_tidy, + startup_args=startup_args, + ) diff --git a/bazel/wrapper_hook/compiledb_postprocess.py b/bazel/wrapper_hook/compiledb_postprocess.py new file mode 100644 index 00000000000..611ef765974 --- /dev/null +++ b/bazel/wrapper_hook/compiledb_postprocess.py @@ -0,0 +1,182 @@ +import json +import os +import pathlib +import tempfile +from urllib.parse import urlparse, unquote + + +COMPILE_COMMAND_FRAGMENT_EXT = ".compile_command.json" + + +def _path_for_open(path): + normalized = os.path.normpath(path) + if os.name != "nt": + return normalized + + # Bazel fragment paths can exceed MAX_PATH on Windows even when the build succeeds. + # Use the extended-length path prefix so Python can open them reliably. + if normalized.startswith("\\\\?\\"): + return normalized + if not os.path.isabs(normalized): + return normalized + if normalized.startswith("\\\\"): + return "\\\\?\\UNC\\" + normalized[2:] + return "\\\\?\\" + normalized + + +def _bep_file_path(file_entry): + if "pathPrefix" in file_entry and "name" in file_entry: + return os.path.normpath( + os.path.join(*(file_entry.get("pathPrefix", []) + [file_entry["name"]])) + ) + + uri = file_entry.get("uri") + if not uri: + return None + + parsed = urlparse(uri) + if parsed.scheme != "file": + return None + path = unquote(parsed.path) + if os.name == "nt" and len(path) >= 3 and path[0] == "/" and path[2] == ":": + path = path[1:] + return os.path.normpath(path) + + +def collect_compile_command_fragments(build_event_json): + fragment_paths = set() + with open(_path_for_open(build_event_json), "r", encoding="utf-8") as events: + for line in events: + if not line.strip(): + continue + event = json.loads(line) + named_set = event.get("namedSetOfFiles") + if not named_set: + continue + for file_entry in named_set.get("files", []): + path = _bep_file_path(file_entry) + if path and path.endswith(COMPILE_COMMAND_FRAGMENT_EXT): + fragment_paths.add(path) + + return sorted(fragment_paths) + + +def _resolve_fragment_path(fragment_path, output_base=None): + path = pathlib.Path(fragment_path) + if path.is_absolute() or path.exists(): + return str(path) + if output_base is None: + return str(path) + + output_base_path = pathlib.Path(output_base) + if fragment_path.startswith("external/"): + return str(output_base_path / fragment_path) + return str(output_base_path / "execroot" / "_main" / fragment_path) + + +def collect_compile_command_fragments_from_roots(search_roots): + fragment_paths = set() + for root in search_roots: + root_path = pathlib.Path(root) + if not root_path.exists(): + continue + for fragment in root_path.rglob(f"*{COMPILE_COMMAND_FRAGMENT_EXT}"): + fragment_paths.add(str(fragment)) + return sorted(fragment_paths) + + +def load_compile_command_fragments(build_event_json, search_roots=None, output_base=None): + fragment_paths = collect_compile_command_fragments(build_event_json) + if not fragment_paths and search_roots: + fragment_paths = collect_compile_command_fragments_from_roots(search_roots) + + entries = [] + for fragment in fragment_paths: + resolved_fragment = _resolve_fragment_path(fragment, output_base=output_base) + with open(_path_for_open(resolved_fragment), "r", encoding="utf-8") as infile: + fragment_data = json.load(infile) + if isinstance(fragment_data, list): + entries.extend(fragment_data) + else: + entries.append(fragment_data) + return entries + + +def load_compile_command_fragments_from_paths(fragment_paths): + entries = [] + for fragment in sorted(fragment_paths): + with open(_path_for_open(fragment), "r", encoding="utf-8") as infile: + fragment_data = json.load(infile) + if isinstance(fragment_data, list): + entries.extend(fragment_data) + else: + entries.append(fragment_data) + return entries + + +def _entry_key(entry): + return ( + entry.get("file", ""), + entry.get("output", ""), + entry.get("target", ""), + ) + + +def compile_command_sort_key(entry): + return ( + entry.get("file", ""), + entry.get("output", ""), + entry.get("target", ""), + entry.get("arguments", []), + ) + + +def merge_compile_commands(existing_entries, new_entries): + updated_targets = { + entry.get("target") + for entry in new_entries + if isinstance(entry.get("target"), str) and entry.get("target") + } + new_keys = {_entry_key(entry) for entry in new_entries} + new_file_output_keys = { + ( + entry.get("file", ""), + entry.get("output", ""), + ) + for entry in new_entries + } + + merged = [] + for entry in existing_entries: + if _entry_key(entry) in new_keys: + continue + if ( + entry.get("file", ""), + entry.get("output", ""), + ) in new_file_output_keys: + continue + if updated_targets and entry.get("target") in updated_targets: + continue + merged.append(entry) + + merged.extend(new_entries) + merged.sort(key=compile_command_sort_key) + return merged + + +def write_compile_commands(entries, output_path): + output = pathlib.Path(output_path) + output.parent.mkdir(parents=True, exist_ok=True) + json_str = json.dumps(entries, separators=(",", ":"), ensure_ascii=False) + + with tempfile.NamedTemporaryFile( + mode="w", + encoding="utf-8", + dir=output.parent, + delete=False, + ) as tmp: + tmp.write(json_str) + tmp_path = pathlib.Path(tmp.name) + + os.replace(tmp_path, output) + return True diff --git a/bazel/wrapper_hook/plus_interface.py b/bazel/wrapper_hook/plus_interface.py index d5fc1b8fe0b..9063c745dc7 100644 --- a/bazel/wrapper_hook/plus_interface.py +++ b/bazel/wrapper_hook/plus_interface.py @@ -12,7 +12,10 @@ WRAPPER_CONFIG_MODE_FILE = f"{REPO_ROOT}/.tmp/mongo_wrapper_config_mode" sys.path.append(str(REPO_ROOT)) -from bazel.wrapper_hook.compiledb import generate_compiledb +from bazel.wrapper_hook.compiledb import ( + clear_compiledb_posthook_state, + generate_compiledb, +) from bazel.wrapper_hook.lint import run_rules_lint from bazel.wrapper_hook.wrapper_debug import wrapper_debug @@ -63,13 +66,62 @@ def check_bazel_command_type(args): return arg -def swap_default_config(args, command, config_mode, compiledb_target, clang_tidy): +def _read_target_pattern_file(path): + with open(path, "r", encoding="utf-8") as target_file: + return [ + line.strip() + for line in target_file + if line.strip() and not line.lstrip().startswith("#") + ] + + +def _parse_targets_and_flags(args, replacements, compiledb_targets, compiledb_only_targets): + build_flags = [] + build_targets = [] + target_pattern_file = None + parsing_targets = True + expect_target_pattern_file_arg = False + + for arg in args: + if expect_target_pattern_file_arg: + target_pattern_file = arg + build_flags.append(arg) + expect_target_pattern_file_arg = False + continue + if arg == "--": + parsing_targets = False + continue + if arg in replacements: + continue + if parsing_targets and arg.startswith("-"): + if arg == "--target_pattern_file": + build_flags.append(arg) + expect_target_pattern_file_arg = True + continue + if arg.startswith("--target_pattern_file="): + target_pattern_file = arg.split("=", 1)[1] + if arg == "--config=compiledb-aspect": + arg = "--config=compiledb" + build_flags.append(arg) + elif parsing_targets: + if arg in compiledb_targets or arg in compiledb_only_targets: + continue + build_targets.append(arg) + + return build_flags, build_targets, target_pattern_file + + +def swap_default_config( + args, command, config_mode, compiledb_target, clang_tidy, user_specified_config +): # Remember the user's last specified config mode to prevent invalidating cache on run or lint commands. if os.path.exists(f"{REPO_ROOT}/.bazelrc.local"): return config_mode try: if config_mode is None: + if user_specified_config: + return config_mode if os.path.exists(WRAPPER_CONFIG_MODE_FILE): # Reset to fastbuild if it's been more than 2 days since the file was written, # since we don't want users to stay locked on dbg/opt if they forgot to change it back @@ -111,10 +163,13 @@ def test_runner_interface( plus_starts = ("+", ":+", "//:+") skip_plus_interface = True compiledb_target = False + setup_clang_tidy = False clang_tidy = False lint_target = False persistent_compdb = True compiledb_targets = ["//:compiledb", ":compiledb", "compiledb"] + compiledb_only_targets = ["//:compiledb_only", ":compiledb_only", "compiledb_only"] + compiledb_target_scope = None lint_targets = ["//:lint", ":lint", "lint"] sources_to_bin = {} select_sources = {} @@ -128,6 +183,11 @@ def test_runner_interface( source_targets = {} current_bazel_command = check_bazel_command_type(args) + command_index = next( + (i for i, arg in enumerate(args) if arg == current_bazel_command), + 1, + ) + startup_args = args[1:command_index] if autocomplete_query: str_args = " ".join(args) @@ -138,17 +198,41 @@ def test_runner_interface( persistent_compdb = False config_mode = None - for arg in args: + user_specified_config = False + for index, arg in enumerate(args): + if index > 0 and args[index - 1] == "--config": + continue if arg in compiledb_targets: compiledb_target = True + setup_clang_tidy = True + replacements[arg] = [] + skip_plus_interface = False + if arg in compiledb_only_targets: + compiledb_target = True + replacements[arg] = [] + skip_plus_interface = False if arg in lint_targets: lint_target = True + if arg.startswith("--compiledb-target-scope="): + compiledb_target_scope = arg.split("=", 1)[1] + replacements[arg] = [] + skip_plus_interface = False + if arg.startswith("--compiledb_target_scope="): + compiledb_target_scope = arg.split("=", 1)[1] + replacements[arg] = [] + skip_plus_interface = False if arg == "--intree_compdb": replacements[arg] = [] persistent_compdb = False skip_plus_interface = False - if "--config=" in arg: - val = arg.split("=")[1] + config_value = None + if arg.startswith("--config="): + config_value = arg.split("=", 1)[1] + elif arg == "--config" and index + 1 < len(args): + config_value = args[index + 1] + if config_value is not None: + user_specified_config = True + val = config_value if val in {"opt", "dbg", "fastbuild", "dbg_aubsan", "dbg_tsan"}: config_mode = val if val == "clang-tidy": @@ -159,8 +243,17 @@ def test_runner_interface( catch_all_target = True config_mode = swap_default_config( - args, current_bazel_command, config_mode, compiledb_target, clang_tidy + args, + current_bazel_command, + config_mode, + compiledb_target, + clang_tidy, + user_specified_config, ) + clear_compiledb_posthook_state() + + if platform.system() == "Windows": + setup_clang_tidy = False for arg in args: if arg.startswith("--runs_per_test=") and catch_all_target: @@ -175,8 +268,32 @@ def test_runner_interface( except ValueError: pass # Non-integer value, let bazel handle the error + parsed_build_flags = None + parsed_build_targets = None + parsed_target_pattern_file = None + if current_bazel_command == "build" and compiledb_target: + parsed_build_flags, parsed_build_targets, parsed_target_pattern_file = ( + _parse_targets_and_flags( + args[command_index + 1 :], replacements, compiledb_targets, compiledb_only_targets + ) + ) + if compiledb_target: - generate_compiledb(args[0], persistent_compdb, enterprise, atlas) + generate_compiledb( + args[0], + persistent_compdb, + enterprise, + atlas, + target_scope_override=compiledb_target_scope, + setup_clang_tidy=setup_clang_tidy, + startup_args=startup_args, + ) + if ( + current_bazel_command == "build" + and not parsed_build_targets + and not parsed_target_pattern_file + ): + return [] if lint_target: for lint_arg in lint_targets: diff --git a/bazel/wrapper_hook/post_bazel_hook.py b/bazel/wrapper_hook/post_bazel_hook.py index 2d314d66ecd..5157b504eaf 100644 --- a/bazel/wrapper_hook/post_bazel_hook.py +++ b/bazel/wrapper_hook/post_bazel_hook.py @@ -15,6 +15,7 @@ BAZEL_CI_NAMESPACE = "ci-prod" def main(): install_modules(sys.argv[1], sys.argv[1:]) + from bazel.wrapper_hook.compiledb import finalize_compiledb_posthook from bazel.wrapper_hook.flag_sync import sync_flags if os.environ.get("NO_FLAG_SYNC") is None: @@ -23,6 +24,10 @@ def main(): else: sync_flags(BAZEL_CI_NAMESPACE) + enterprise = (REPO_ROOT / "src" / "mongo" / "db" / "modules" / "enterprise").exists() + atlas = (REPO_ROOT / "src" / "mongo" / "db" / "modules" / "atlas").exists() + finalize_compiledb_posthook(sys.argv[1], enterprise=enterprise, atlas=atlas) + if __name__ == "__main__": main() diff --git a/bazel/wrapper_hook/wrapper_hook.py b/bazel/wrapper_hook/wrapper_hook.py index f2c76bbea97..8c6ea933f9f 100644 --- a/bazel/wrapper_hook/wrapper_hook.py +++ b/bazel/wrapper_hook/wrapper_hook.py @@ -160,8 +160,9 @@ def main(): os.chmod(os.environ.get("MONGO_BAZEL_WRAPPER_ARGS"), 0o644) with open(os.environ.get("MONGO_BAZEL_WRAPPER_ARGS"), "w") as f: - f.write("\n".join(args)) - f.write("\n") + if args: + f.write("\n".join(args)) + f.write("\n") if __name__ == "__main__": diff --git a/buildscripts/BUILD.bazel b/buildscripts/BUILD.bazel index a01cc675144..8156d8ba00a 100644 --- a/buildscripts/BUILD.bazel +++ b/buildscripts/BUILD.bazel @@ -80,6 +80,25 @@ py_binary( ], ) +py_binary( + name = "setup_clang_tidy", + srcs = ["setup_clang_tidy.py"], + args = [ + "--config-rlocation=$(rlocationpath //:clang_tidy_config)", + ], + data = [ + "//:clang_tidy_config", + "//src/mongo/tools/mongo_tidy_checks", + ], + visibility = ["//visibility:public"], + deps = [ + dependency( + "bazel-runfiles", + group = "testing", + ), + ], +) + py_library( name = "install_bazel", srcs = [ diff --git a/buildscripts/clang_tidy.py b/buildscripts/clang_tidy.py index f515d65405e..299badc2519 100755 --- a/buildscripts/clang_tidy.py +++ b/buildscripts/clang_tidy.py @@ -23,6 +23,7 @@ import yaml sys.path.append(os.path.dirname(os.path.abspath(__file__))) from mongo_toolchain import get_mongo_toolchain +from setup_clang_tidy import clang_tidy_setup_recovery_message from simple_report import make_report, put_report, try_combine_reports checks_so = None @@ -32,7 +33,11 @@ if os.path.exists(".mongo_checks_module_path"): config_file = "" -for config in ["/tmp/compiledb-bin/.clang-tidy.strict", "bazel-bin/.clang-tidy.strict"]: +for config in [ + ".clang-tidy", + "/tmp/compiledb-bin/.clang-tidy.strict", + "bazel-bin/.clang-tidy.strict", +]: if os.path.exists(config): config_file = config break @@ -181,7 +186,7 @@ def _run_tidy(args, parser_defaults): if args.compile_commands == parser_defaults.compile_commands: print( f"Could not find compile commands: '{args.compile_commands}', to generate it, use the build command:\n\n" - + "bazel build compiledb\n" + + "bazel build --config=compiledb //src/...\n" ) else: print(f"Could not find compile commands: {args.compile_commands}") @@ -194,7 +199,8 @@ def _run_tidy(args, parser_defaults): if args.clang_tidy_cfg == parser_defaults.clang_tidy_cfg: print( f"Could not find config file: '{args.clang_tidy_cfg}', to generate it, use the build command:\n\n" - + "bazel build compiledb\n" + + clang_tidy_setup_recovery_message() + + "\n" ) else: print(f"Could not find config file: {args.clang_tidy_cfg}") diff --git a/buildscripts/clang_tidy_vscode.py b/buildscripts/clang_tidy_vscode.py index 3267385442d..77f45baaf26 100755 --- a/buildscripts/clang_tidy_vscode.py +++ b/buildscripts/clang_tidy_vscode.py @@ -33,6 +33,7 @@ import sys import time from mongo_toolchain import get_mongo_toolchain +from setup_clang_tidy import clang_tidy_setup_recovery_message CLTCONFIG = """ # This file is intended to document the configuration options available @@ -99,7 +100,7 @@ def main(): if checks_so and os.path.isfile(checks_so): clang_tidy_cmd += [f"-load={checks_so}"] else: - print("ERROR: failed to find mongo tidy checks, run `bazel build compiledb'") + print(f"ERROR: failed to find mongo tidy checks. {clang_tidy_setup_recovery_message()}") sys.exit(1) files_to_check = [] @@ -125,7 +126,10 @@ def main(): ) if not os.path.exists("compile_commands.json"): - print("ERROR: failed to find compile_commands.json, run 'bazel build compiledb'") + print( + "ERROR: failed to find compile_commands.json, run " + "`bazel build --config=compiledb //src/...`" + ) sys.exit(1) with open("compile_commands.json") as f: diff --git a/buildscripts/setup_clang_tidy.py b/buildscripts/setup_clang_tidy.py new file mode 100644 index 00000000000..9538ffd1e73 --- /dev/null +++ b/buildscripts/setup_clang_tidy.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python3 +"""Materialize clang-tidy IDE integration files from Bazel outputs.""" + +import argparse +import os +import pathlib +import platform +import sys + +PLUGIN_CANDIDATES = [ + "libmongo_tidy_checks.so", + "libmongo_tidy_checks.dylib", + "mongo_tidy_checks.dll", + "libmongo_tidy_checks.dll", +] + + +def _linux_distribution_id_version() -> tuple[str | None, str | None]: + if platform.system() != "Linux": + return None, None + + os_release = pathlib.Path("/etc/os-release") + if not os_release.exists(): + return None, None + + metadata: dict[str, str] = {} + for line in os_release.read_text(encoding="utf-8").splitlines(): + if "=" not in line: + continue + key, value = line.split("=", 1) + metadata[key] = value.strip().strip('"').strip("'") + + return metadata.get("ID"), metadata.get("VERSION_ID") + + +def mongo_tidy_checks_supported_platform() -> bool: + if platform.system() != "Linux": + return False + + distro_id, version_id = _linux_distribution_id_version() + return not (distro_id == "ubuntu" and version_id == "18.04") + + +def clang_tidy_setup_recovery_message() -> str: + if mongo_tidy_checks_supported_platform(): + return "Run `bazel run //:setup_clang_tidy` to materialize the clang-tidy config and mongo_tidy_checks plugin." + + return ( + "clang-tidy setup via Bazel is not supported on this platform. " + "The mongo_tidy_checks plugin is only supported on Linux excluding Ubuntu 18.04." + ) + + +def _copy_if_changed(src: pathlib.Path, dst: pathlib.Path) -> bool: + src_bytes = src.read_bytes() + if dst.exists() and dst.read_bytes() == src_bytes: + return False + dst.write_bytes(src_bytes) + return True + + +def _write_if_changed(path: pathlib.Path, contents: str) -> bool: + if path.exists() and path.read_text(encoding="utf-8") == contents: + return False + path.write_text(contents, encoding="utf-8") + return True + + +def materialize_clang_tidy_ide_files( + repo_root: pathlib.Path, + config_src: pathlib.Path, + plugin_src: pathlib.Path, +) -> tuple[bool, bool]: + config_changed = _copy_if_changed(config_src, repo_root / ".clang-tidy") + marker_changed = _write_if_changed(repo_root / ".mongo_checks_module_path", str(plugin_src)) + return config_changed, marker_changed + + +def _resolve_runfile(r, rlocation_path: str) -> pathlib.Path: + resolved = r.Rlocation(rlocation_path) + if not resolved: + raise FileNotFoundError(f"Failed to resolve Bazel runfile: {rlocation_path}") + path = pathlib.Path(resolved) + if not path.is_file(): + raise FileNotFoundError(f"Resolved runfile is not a file: {path}") + return path.resolve() + + +def _resolve_plugin( + r, workspace_prefix: str, package_path: str = "src/mongo/tools/mongo_tidy_checks" +) -> pathlib.Path: + for candidate in PLUGIN_CANDIDATES: + resolved = r.Rlocation(f"{workspace_prefix}/{package_path}/{candidate}") + if resolved and pathlib.Path(resolved).is_file(): + return pathlib.Path(resolved).resolve() + + candidate_list = ", ".join(PLUGIN_CANDIDATES) + raise FileNotFoundError( + "Failed to resolve mongo_tidy_checks plugin from Bazel runfiles. " + f"Tried: {candidate_list}" + ) + + +def main() -> int: + try: + import runfiles + except ModuleNotFoundError: + print( + "The `bazel-runfiles` dependency is required to run `bazel run //:setup_clang_tidy`.", + file=sys.stderr, + ) + return 1 + + parser = argparse.ArgumentParser() + parser.add_argument("--config-rlocation", required=True) + args = parser.parse_args() + + workspace_dir = os.environ.get("BUILD_WORKSPACE_DIRECTORY") + if not workspace_dir: + print("This tool must be run with `bazel run //:setup_clang_tidy`.", file=sys.stderr) + return 1 + + repo_root = pathlib.Path(workspace_dir).resolve() + config_rlocation = args.config_rlocation + if "/" not in config_rlocation: + print( + f"Unexpected config runfile path: {config_rlocation}", + file=sys.stderr, + ) + return 1 + + workspace_prefix = config_rlocation.split("/", 1)[0] + r = runfiles.Create() + if r is None: + print("Failed to initialize Bazel runfiles support.", file=sys.stderr) + return 1 + + config_src = _resolve_runfile(r, config_rlocation) + plugin_src = _resolve_plugin(r, workspace_prefix) + + config_changed, marker_changed = materialize_clang_tidy_ide_files( + repo_root, + config_src, + plugin_src, + ) + + print(f"Configured clang-tidy for IDE use at {repo_root / '.clang-tidy'}") + print(f"Configured mongo tidy checks plugin at {plugin_src}") + if not config_changed and not marker_changed: + print("clang-tidy IDE files were already up to date.") + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/buildscripts/tests/test_bazel_plus_test_interface.py b/buildscripts/tests/test_bazel_plus_test_interface.py index 1edc1944440..48027d27318 100644 --- a/buildscripts/tests/test_bazel_plus_test_interface.py +++ b/buildscripts/tests/test_bazel_plus_test_interface.py @@ -1,10 +1,13 @@ +import os import sys +import tempfile import unittest from contextlib import redirect_stderr from io import StringIO sys.path.append(".") +import bazel.wrapper_hook.plus_interface as plus_interface from bazel.wrapper_hook.plus_interface import ( BinAndSourceIncompatible, DuplicateSourceNames, @@ -299,6 +302,235 @@ class Tests(unittest.TestCase): stderr_output = stderr_capture.getvalue() validate_first_suggestion(stderr_output, "+bson_obj_test") + def test_compiledb_target_runs_separately_and_leaves_other_targets(self): + def buildozer_output(autocomplete_query): + return "" + + args = ["wrapper_hook", "build", "compiledb", "//src/mongo/base:error_codes"] + generate_calls = [] + + def fake_generate_compiledb(*call_args, **call_kwargs): + generate_calls.append((call_args, call_kwargs)) + + original_generate_compiledb = plus_interface.generate_compiledb + original_swap_default_config = plus_interface.swap_default_config + plus_interface.generate_compiledb = fake_generate_compiledb + plus_interface.swap_default_config = ( + lambda args, + command, + config_mode, + compiledb_target, + clang_tidy, + user_specified_config: config_mode + ) + try: + result = test_runner_interface(args, False, buildozer_output) + finally: + plus_interface.generate_compiledb = original_generate_compiledb + plus_interface.swap_default_config = original_swap_default_config + + assert result == ["build", "//src/mongo/base:error_codes"] + assert len(generate_calls) == 1 + assert "requested_build_flags" not in generate_calls[0][1] + + def test_compiledb_only_target_skips_final_bazel_invocation(self): + def buildozer_output(autocomplete_query): + return "" + + args = ["wrapper_hook", "build", "compiledb_only"] + generate_calls = [] + + def fake_generate_compiledb(*call_args, **call_kwargs): + generate_calls.append((call_args, call_kwargs)) + + original_generate_compiledb = plus_interface.generate_compiledb + original_swap_default_config = plus_interface.swap_default_config + plus_interface.generate_compiledb = fake_generate_compiledb + plus_interface.swap_default_config = ( + lambda args, + command, + config_mode, + compiledb_target, + clang_tidy, + user_specified_config: config_mode + ) + try: + result = test_runner_interface(args, False, buildozer_output) + finally: + plus_interface.generate_compiledb = original_generate_compiledb + plus_interface.swap_default_config = original_swap_default_config + + assert result == [] + assert len(generate_calls) == 1 + + def test_compiledb_target_preserves_define_flag_value(self): + def buildozer_output(autocomplete_query): + return "" + + args = [ + "wrapper_hook", + "build", + "compiledb", + "--define", + "MONGO_VERSION=1", + "--keep_going", + "//src/mongo/base:error_codes", + ] + generate_calls = [] + + def fake_generate_compiledb(*call_args, **call_kwargs): + generate_calls.append((call_args, call_kwargs)) + + original_generate_compiledb = plus_interface.generate_compiledb + original_swap_default_config = plus_interface.swap_default_config + plus_interface.generate_compiledb = fake_generate_compiledb + plus_interface.swap_default_config = ( + lambda args, + command, + config_mode, + compiledb_target, + clang_tidy, + user_specified_config: config_mode + ) + try: + result = test_runner_interface(args, False, buildozer_output) + finally: + plus_interface.generate_compiledb = original_generate_compiledb + plus_interface.swap_default_config = original_swap_default_config + + assert result == [ + "build", + "--define", + "MONGO_VERSION=1", + "--keep_going", + "//src/mongo/base:error_codes", + ] + assert len(generate_calls) == 1 + assert "requested_build_flags" not in generate_calls[0][1] + + def test_config_equals_compiledb_runs_normally(self): + def buildozer_output(autocomplete_query): + return "" + + args = ["wrapper_hook", "build", "--config=compiledb", "//src/mongo/base:error_codes"] + generate_calls = [] + + def fake_generate_compiledb(*call_args, **call_kwargs): + generate_calls.append((call_args, call_kwargs)) + + original_generate_compiledb = plus_interface.generate_compiledb + original_wrapper_config_mode_file = plus_interface.WRAPPER_CONFIG_MODE_FILE + with tempfile.TemporaryDirectory() as tempdir: + wrapper_config_mode_file = os.path.join(tempdir, "mongo_wrapper_config_mode") + with open(wrapper_config_mode_file, "w", encoding="utf-8") as file_handle: + file_handle.write("dbg") + plus_interface.generate_compiledb = fake_generate_compiledb + plus_interface.WRAPPER_CONFIG_MODE_FILE = wrapper_config_mode_file + try: + result = test_runner_interface(args, False, buildozer_output) + finally: + plus_interface.generate_compiledb = original_generate_compiledb + plus_interface.WRAPPER_CONFIG_MODE_FILE = original_wrapper_config_mode_file + + assert result == ["build", "--config=compiledb", "//src/mongo/base:error_codes"] + assert len(generate_calls) == 0 + + def test_config_separate_compiledb_runs_normally(self): + def buildozer_output(autocomplete_query): + return "" + + args = ["wrapper_hook", "build", "--config", "compiledb", "//src/mongo/base:error_codes"] + generate_calls = [] + + def fake_generate_compiledb(*call_args, **call_kwargs): + generate_calls.append((call_args, call_kwargs)) + + original_generate_compiledb = plus_interface.generate_compiledb + original_wrapper_config_mode_file = plus_interface.WRAPPER_CONFIG_MODE_FILE + with tempfile.TemporaryDirectory() as tempdir: + wrapper_config_mode_file = os.path.join(tempdir, "mongo_wrapper_config_mode") + with open(wrapper_config_mode_file, "w", encoding="utf-8") as file_handle: + file_handle.write("dbg") + plus_interface.generate_compiledb = fake_generate_compiledb + plus_interface.WRAPPER_CONFIG_MODE_FILE = wrapper_config_mode_file + try: + result = test_runner_interface(args, False, buildozer_output) + finally: + plus_interface.generate_compiledb = original_generate_compiledb + plus_interface.WRAPPER_CONFIG_MODE_FILE = original_wrapper_config_mode_file + + assert result == [ + "build", + "--config", + "compiledb", + "//src/mongo/base:error_codes", + ] + assert len(generate_calls) == 0 + + def test_config_separate_compiledb_runs_normally_with_plain_target(self): + def buildozer_output(autocomplete_query): + return "" + + args = ["wrapper_hook", "build", "--config", "compiledb", "install-dist-test"] + generate_calls = [] + + def fake_generate_compiledb(*call_args, **call_kwargs): + generate_calls.append((call_args, call_kwargs)) + + original_generate_compiledb = plus_interface.generate_compiledb + original_wrapper_config_mode_file = plus_interface.WRAPPER_CONFIG_MODE_FILE + with tempfile.TemporaryDirectory() as tempdir: + wrapper_config_mode_file = os.path.join(tempdir, "mongo_wrapper_config_mode") + with open(wrapper_config_mode_file, "w", encoding="utf-8") as file_handle: + file_handle.write("dbg") + plus_interface.generate_compiledb = fake_generate_compiledb + plus_interface.WRAPPER_CONFIG_MODE_FILE = wrapper_config_mode_file + try: + result = test_runner_interface(args, False, buildozer_output) + finally: + plus_interface.generate_compiledb = original_generate_compiledb + plus_interface.WRAPPER_CONFIG_MODE_FILE = original_wrapper_config_mode_file + + assert result == [ + "build", + "--config", + "compiledb", + "install-dist-test", + ] + assert len(generate_calls) == 0 + + def test_config_separate_compiledb_runs_normally_with_target_before_config(self): + def buildozer_output(autocomplete_query): + return "" + + args = ["wrapper_hook", "build", "install-dist-test", "--config", "compiledb"] + generate_calls = [] + + def fake_generate_compiledb(*call_args, **call_kwargs): + generate_calls.append((call_args, call_kwargs)) + + original_generate_compiledb = plus_interface.generate_compiledb + original_wrapper_config_mode_file = plus_interface.WRAPPER_CONFIG_MODE_FILE + with tempfile.TemporaryDirectory() as tempdir: + wrapper_config_mode_file = os.path.join(tempdir, "mongo_wrapper_config_mode") + with open(wrapper_config_mode_file, "w", encoding="utf-8") as file_handle: + file_handle.write("dbg") + plus_interface.generate_compiledb = fake_generate_compiledb + plus_interface.WRAPPER_CONFIG_MODE_FILE = wrapper_config_mode_file + try: + result = test_runner_interface(args, False, buildozer_output) + finally: + plus_interface.generate_compiledb = original_generate_compiledb + plus_interface.WRAPPER_CONFIG_MODE_FILE = original_wrapper_config_mode_file + + assert result == [ + "build", + "install-dist-test", + "--config", + "compiledb", + ] + assert len(generate_calls) == 0 + if __name__ == "__main__": unittest.main() diff --git a/buildscripts/tests/test_compiledb_output_format.py b/buildscripts/tests/test_compiledb_output_format.py new file mode 100644 index 00000000000..d4d61fb2c82 --- /dev/null +++ b/buildscripts/tests/test_compiledb_output_format.py @@ -0,0 +1,42 @@ +import sys +import unittest + +sys.path.append(".") + +from bazel.wrapper_hook.compiledb import _build_final_compile_command_entry + + +class CompiledbOutputFormatTest(unittest.TestCase): + def test_final_entry_omits_non_standard_target_key(self): + def rewrite_exec_path(path, out_root_str, external_root_str): + if path.startswith("bazel-out/"): + return out_root_str + "/" + path[len("bazel-out/") :] + return path + + entry = { + "file": "bazel-out/k8/bin/src/mongo/base/error_codes.cpp", + "arguments": ["clang++", "-c", "src/mongo/base/error_codes.cpp"], + "output": "bazel-out/k8/bin/src/mongo/base/error_codes.cpp.o", + "target": "//src/mongo/base:error_codes", + } + + formatted_entry = _build_final_compile_command_entry( + entry=entry, + arguments=entry["arguments"], + repo_root_resolved="/repo", + rewrite_exec_path=rewrite_exec_path, + out_root_str="/real/bazel-out", + external_root_str="/real/external", + ) + + assert formatted_entry == { + "file": "/real/bazel-out/k8/bin/src/mongo/base/error_codes.cpp", + "arguments": ["clang++", "-c", "src/mongo/base/error_codes.cpp"], + "directory": "/repo", + "output": "/real/bazel-out/k8/bin/src/mongo/base/error_codes.cpp.o", + } + assert "target" not in formatted_entry + + +if __name__ == "__main__": + unittest.main() diff --git a/etc/evergreen_yml_components/tasks/compile_tasks.yml b/etc/evergreen_yml_components/tasks/compile_tasks.yml index 1067c22fda6..42aacc9ecf5 100644 --- a/etc/evergreen_yml_components/tasks/compile_tasks.yml +++ b/etc/evergreen_yml_components/tasks/compile_tasks.yml @@ -160,6 +160,13 @@ tasks: --output_groups=compilation_outputs --keep_going --build_tag_filters=${bazel_filters_for_cache_hydration} + - func: "bazel compile" + vars: + targets: compiledb + bazel_args: >- + --config=fastbuild + --define GIT_COMMIT_HASH=nogitversion + --keep_going - name: hydrate_bazel_profile_all_headers tags: @@ -185,6 +192,14 @@ tasks: --output_groups=compilation_outputs --keep_going --build_tag_filters=${bazel_filters_for_cache_hydration} + - func: "bazel compile" + vars: + targets: compiledb + bazel_args: >- + --config=fastbuild + --all_headers=True + --define GIT_COMMIT_HASH=nogitversion + --keep_going - name: build_source_graph_index tags: ["assigned_to_jira_team_devprod_build", "auxiliary"] @@ -230,6 +245,13 @@ tasks: --output_groups=compilation_outputs --keep_going --build_tag_filters=${bazel_filters_for_cache_hydration} + - func: "bazel compile" + vars: + targets: compiledb + bazel_args: >- + --config=opt + --define GIT_COMMIT_HASH=nogitversion + --keep_going - name: hydrate_bazel_profile_dbg tags: @@ -253,6 +275,13 @@ tasks: --output_groups=compilation_outputs --keep_going --build_tag_filters=${bazel_filters_for_cache_hydration} + - func: "bazel compile" + vars: + targets: compiledb + bazel_args: >- + --config=dbg + --define GIT_COMMIT_HASH=nogitversion + --keep_going - name: hydrate_bazel_profile_dbg_aubsan tags: @@ -276,6 +305,13 @@ tasks: --output_groups=compilation_outputs --keep_going --build_tag_filters=${bazel_filters_for_cache_hydration} + - func: "bazel compile" + vars: + targets: compiledb + bazel_args: >- + --config=dbg_aubsan + --define GIT_COMMIT_HASH=nogitversion + --keep_going - name: hydrate_bazel_profile_dbg_tsan tags: @@ -299,6 +335,13 @@ tasks: --output_groups=compilation_outputs --keep_going --build_tag_filters=${bazel_filters_for_cache_hydration} + - func: "bazel compile" + vars: + targets: compiledb + bazel_args: >- + --config=dbg_tsan + --define GIT_COMMIT_HASH=nogitversion + --keep_going - name: hydrate_bazel_unit_tests tags: @@ -395,6 +438,22 @@ tasks: vars: target: //evergreen:validate_compile_commands + - name: full_bazel_compiledb + tags: ["assigned_to_jira_team_devprod_build", "auxiliary"] + exec_timeout_secs: 43200 # 12 hour timeout: validates every compile_commands entry. + depends_on: + - name: version_expansions_gen + variant: generate-tasks-for-version + commands: + - func: "do bazel setup" + - func: "bazel compile" + vars: + targets: compiledb + - func: bazel run + vars: + target: //evergreen:validate_compile_commands + args: -- --run-all + - name: compile_upload_benchmarks tags: ["assigned_to_jira_team_devprod_build", "auxiliary"] depends_on: diff --git a/etc/evergreen_yml_components/variants/amazon/test_release.yml b/etc/evergreen_yml_components/variants/amazon/test_release.yml index 25150fcabcf..5be0db3d19f 100644 --- a/etc/evergreen_yml_components/variants/amazon/test_release.yml +++ b/etc/evergreen_yml_components/variants/amazon/test_release.yml @@ -452,6 +452,10 @@ buildvariants: - name: test_packages distros: - ubuntu2204-arm64-m8g-4xlarge + - name: full_bazel_compiledb + cron: "0 4 * * 0" # From the ${project_weekly_cron} parameter. + distros: + - amazon2023-arm64-latest-m8gd-4xlarge - name: .development_critical !.requires_large_host - name: .development_critical .requires_large_host distros: diff --git a/etc/evergreen_yml_components/variants/windows/test_release.yml b/etc/evergreen_yml_components/variants/windows/test_release.yml index fcc457039d0..9f0af013383 100644 --- a/etc/evergreen_yml_components/variants/windows/test_release.yml +++ b/etc/evergreen_yml_components/variants/windows/test_release.yml @@ -102,6 +102,10 @@ buildvariants: - name: run_unit_tests_no_sandbox_TG distros: - windows-2022-xxlarge + - name: full_bazel_compiledb + cron: "0 4 * * 0" # From the ${project_weekly_cron} parameter. + distros: + - windows-2022-xxxlarge-compile - name: .development_critical !.requires_large_host !.incompatible_windows - name: .development_critical .requires_large_host !.incompatible_windows distros: diff --git a/evergreen/BUILD.bazel b/evergreen/BUILD.bazel index c893e5b0460..7024fd0635d 100644 --- a/evergreen/BUILD.bazel +++ b/evergreen/BUILD.bazel @@ -1,5 +1,5 @@ load("@poetry//:dependencies.bzl", "dependency") -load("@rules_python//python:defs.bzl", "py_binary", "py_library") +load("@rules_python//python:defs.bzl", "py_binary", "py_library", "py_test") package(default_visibility = ["//visibility:public"]) @@ -9,6 +9,14 @@ py_binary( main = "validate_compile_commands.py", ) +py_test( + name = "validate_compile_commands_test", + srcs = [ + "validate_compile_commands.py", + "validate_compile_commands_test.py", + ], +) + sh_binary( name = "wiki_page_minimized_agg_query_fuzzer", srcs = ["wiki_page_minimized_agg_query_fuzzer.sh"], diff --git a/evergreen/coverity_build.sh b/evergreen/coverity_build.sh index ae0fbeae6a2..04315c46ef9 100644 --- a/evergreen/coverity_build.sh +++ b/evergreen/coverity_build.sh @@ -17,29 +17,108 @@ activate_venv export MONGO_WRAPPER_OUTPUT_ALL=1 # number of parallel jobs to use for build. # Even with scale=0 (the default), bc command adds decimal digits in case of multiplication. Division by 1 gives us a whole number with scale=0 -bazel_jobs=$(bc <<<"$(grep -c '^processor' /proc/cpuinfo) * .85 / 1") -cov_jobs=$(bc <<<"$(grep -c '^processor' /proc/cpuinfo) * .40 / 1") +coverity_config_dir="$workdir/coverity/config" +coverity_config_file="$coverity_config_dir/coverity_config.xml" -build_config="--config=local --jobs=$bazel_jobs --build_atlas=True --compiler_type=gcc --opt=off --dbg=False --allocator=system --define=MONGO_VERSION=${version}" -bazel_query='mnemonic("CppCompile|LinkCompile", filter(//src/mongo, deps(//:install-core)))' +build_config="--config=local --build_atlas=True --compiler_type=gcc --opt=off --dbg=False --allocator=system --define=MONGO_VERSION=${version}" bazel_cache="--output_user_root=$workdir/bazel_cache" +compiledb_target_pattern_file="$(mktemp "$workdir/install-core-compiledb-targets.XXXXXX")" +query_stderr_file="$(mktemp "$workdir/install-core-compiledb-query-stderr.XXXXXX")" +trap 'rm -f "$compiledb_target_pattern_file" "$query_stderr_file"' EXIT -python bazel/coverity/generate_coverity_targets.py --bazel_executable="bazel" --bazel_cache=$bazel_cache --bazel_query="$bazel_query" $build_config --noinclude_artifacts -bazel $bazel_cache build $build_config --build_tag_filters=gen_source //src/... +echo "Generating compile_commands.json for Coverity capture" +echo "Resolving mongo_compiledb targets under //:install-core" +query_command=( + bazel + $bazel_cache + cquery + $build_config + 'attr("tags", ".*mongo_compiledb.*", deps(//:install-core))' +) +printf ' %q' "${query_command[@]}" +echo +if ! "${query_command[@]}" \ + 2>"$query_stderr_file" | grep "//src/mongo" | awk '{print $1}' | sort -u >"$compiledb_target_pattern_file"; then + echo "Failed to resolve mongo_compiledb targets under //:install-core" + cat "$query_stderr_file" + exit 1 +fi -buildCommand="bazel \ - $bazel_cache \ - build \ - $build_config \ - --target_pattern_file=coverity_targets.list" -echo Building $buildCommand -cat coverity_targets.list -if ! $workdir/coverity/bin/cov-build --dir "$covIdir" --verbose 0 -j $cov_jobs --return-emit-failures --parse-error-threshold=99 --bazel \ - $buildCommand; then +echo "Contents of $compiledb_target_pattern_file" +cat "$compiledb_target_pattern_file" + +if [ ! -s "$compiledb_target_pattern_file" ]; then + echo "No mongo_compiledb targets found under //:install-core" + exit 1 +fi + +build_compiledb_command=( + bazel + $bazel_cache + build + $build_config + --config=compiledb + --target_pattern_file="$compiledb_target_pattern_file" +) +printf ' %q' "${build_compiledb_command[@]}" +echo +"${build_compiledb_command[@]}" + +compiledb_output_base="$(bazel $bazel_cache info output_base)" +repo_python="" +python_candidates=( + "$compiledb_output_base/external/_main~setup_mongo_python_toolchains~py_host/dist/bin/python3" + "$compiledb_output_base/external/py_host/dist/bin/python3" + "$compiledb_output_base/external/_main~setup_mongo_python_toolchains~py_host/dist/python.exe" + "$compiledb_output_base/external/py_host/dist/python.exe" +) +for candidate in "${python_candidates[@]}"; do + if [ -x "$candidate" ]; then + repo_python="$candidate" + break + fi +done +if [ -z "$repo_python" ]; then + for candidate in "$compiledb_output_base"/external/*py_host*/dist/bin/python3 \ + "$compiledb_output_base"/external/*py_host*/dist/python.exe; do + if [ -x "$candidate" ]; then + repo_python="$candidate" + break + fi + done +fi +echo "Resolved repo-rule python: $repo_python" +if [ -z "$repo_python" ] || [ ! -x "$repo_python" ]; then + echo "Failed to resolve repo-rule python in Bazel output tree" + exit 1 +fi + +mkdir -p "$coverity_config_dir" +if [ ! -f "$coverity_config_file" ]; then + echo "Configuring Coverity compiler capture with default gcc template" + "$workdir/coverity/bin/cov-configure" --gcc --config "$coverity_config_file" +fi + +capture_command=( + env + "BUILD_WORKSPACE_DIRECTORY=$PWD" + "VALIDATE_COMPILE_COMMANDS_RUN_ALL=1" + "VALIDATE_COMPILE_COMMANDS_OUT_DIR=$workdir/validate_compile_commands_out" + "$repo_python" + evergreen/validate_compile_commands.py +) + +echo "Running Coverity capture via compile_commands.json replay" +printf ' %q' "${capture_command[@]}" +echo +if $workdir/coverity/bin/cov-build --dir "$covIdir" --config "$coverity_config_file" --verbose 0 --return-emit-failures --parse-error-threshold=99 \ + "${capture_command[@]}"; then + echo "cov-build was successful" +else ret=$? echo "cov-build failed with exit code $ret" - cat $covIdir/replay-log.txt + if [ -f "$covIdir/replay-log.txt" ]; then + cat "$covIdir/replay-log.txt" + fi exit $ret -else - echo "cov-build was successful" fi diff --git a/evergreen/validate_compile_commands.py b/evergreen/validate_compile_commands.py index 42eb5bde6e7..20c607804cd 100644 --- a/evergreen/validate_compile_commands.py +++ b/evergreen/validate_compile_commands.py @@ -1,9 +1,11 @@ +import argparse import concurrent.futures import hashlib import heapq import json import os import platform +import random import re import shlex import subprocess @@ -11,19 +13,29 @@ import sys import tempfile from typing import Any, Iterator -default_dir = os.environ.get("BUILD_WORKSPACE_DIRECTORY") -if not default_dir: - print( - "This script must be run though bazel. Please run 'bazel run //evergreen:validate_compile_commands' instead." +STANDARD_COMPILE_COMMAND_KEYS = frozenset({"arguments", "command", "directory", "file", "output"}) +COMPILEDB_GENERATION_TARGETS = ["compiledb", "install-wiredtiger"] + + +def _get_workspace_dir() -> str: + workspace_dir = os.environ.get("BUILD_WORKSPACE_DIRECTORY") + if workspace_dir: + return workspace_dir + raise RuntimeError( + "This script must be run through bazel. " + "Please run 'bazel run //evergreen:validate_compile_commands' instead." ) - sys.exit(1) -os.chdir(default_dir) -if not os.path.exists("compile_commands.json"): - sys.stderr.write("The 'compile_commands.json' file was not found.\n") - sys.stderr.write("Attempting to run 'bazel build compiledb' to generate it.\n") - subprocess.run(["bazel", "build", "compiledb"], check=True) +def _ensure_compiledb_exists(compdb_path: str) -> None: + if os.path.exists(compdb_path): + return + sys.stderr.write(f"The '{compdb_path}' file was not found.\n") + sys.stderr.write( + "Attempting to run " + f"'bazel build {' '.join(COMPILEDB_GENERATION_TARGETS)}' to generate it.\n" + ) + subprocess.run(["bazel", "build", *COMPILEDB_GENERATION_TARGETS], check=True) def _parse_repo_env_from_bazelrc(bazelrc_path: str, var_name: str) -> str | None: @@ -197,12 +209,181 @@ def _iter_compiledb_entries(path: str) -> Iterator[dict[str, Any]]: pos = 0 +def _validate_compiledb_entry(entry: dict[str, Any], *, index: int) -> None: + extra_keys = sorted(set(entry) - STANDARD_COMPILE_COMMAND_KEYS) + if extra_keys: + raise ValueError( + f"compile_commands.json entry {index} has non-standard keys {extra_keys}. " + f"Only {sorted(STANDARD_COMPILE_COMMAND_KEYS)} are allowed by the Clang JSON " + "Compilation Database format." + ) + + directory = entry.get("directory") + if not isinstance(directory, str) or not directory: + raise ValueError( + f"compile_commands.json entry {index} must contain a non-empty string 'directory'." + ) + + file_name = entry.get("file") + if not isinstance(file_name, str) or not file_name: + raise ValueError( + f"compile_commands.json entry {index} must contain a non-empty string 'file'." + ) + + has_arguments = "arguments" in entry + has_command = "command" in entry + if has_arguments == has_command: + raise ValueError( + f"compile_commands.json entry {index} must contain exactly one of " + "'arguments' or 'command'." + ) + + if has_arguments: + arguments = entry["arguments"] + if ( + not isinstance(arguments, list) + or not arguments + or not all(isinstance(arg, str) for arg in arguments) + ): + raise ValueError( + f"compile_commands.json entry {index} 'arguments' must be a non-empty " + "list of strings." + ) + + if has_command: + command = entry["command"] + if not isinstance(command, str) or not command: + raise ValueError( + f"compile_commands.json entry {index} 'command' must be a non-empty string." + ) + + if "output" in entry: + output = entry["output"] + if not isinstance(output, str) or not output: + raise ValueError( + f"compile_commands.json entry {index} 'output' must be a non-empty string " + "when present." + ) + + def _hash_file_name(file_name: str) -> int: # Deterministic across runs; 'file' in compile_commands is typically relative and stable. digest = hashlib.sha256(file_name.encode("utf-8")).digest() return int.from_bytes(digest[:8], byteorder="big", signed=False) +def _selection_key_for_entry(entry: dict[str, Any]) -> str | None: + """Build a canonical key for cross-platform deterministic selection. + + compile_commands entries may use absolute paths rooted in machine-specific Bazel + output locations (e.g. /tmp/.../external/... or Z:/.../bazel-out//...). + We normalize those prefixes so the same logical source tends to hash the same + across Linux/Windows. + """ + file_name = entry.get("file") + if not isinstance(file_name, str): + return None + + directory = entry.get("directory") + p = file_name.replace("\\", "/") + # Windows paths are case-insensitive; lowercasing also improves cross-OS stability. + p = p.lower() + + # If this is an absolute path under the entry directory, strip that prefix. + if isinstance(directory, str): + d = directory.replace("\\", "/").lower().rstrip("/") + if d and p.startswith(d + "/"): + p = p[len(d) + 1 :] + + # Strip machine-specific prefixes while preserving meaningful roots. + if "/execroot/_main/" in p: + p = p.split("/execroot/_main/", 1)[1] + elif "/external/" in p: + p = "external/" + p.split("/external/", 1)[1] + elif "/bazel-out/" in p: + p = "bazel-out/" + p.split("/bazel-out/", 1)[1] + elif "/src/" in p: + p = "src/" + p.split("/src/", 1)[1] + + # Normalize bazel configuration segment (platform/config differs by OS). + p = re.sub(r"(^|/)bazel-out/[^/]+/", r"\1bazel-out//", p) + p = re.sub(r"/+", "/", p).lstrip("./") + return p or file_name.lower() + + +def _is_truthy_env(value: str | None) -> bool: + if value is None: + return False + return value.strip().lower() not in ("", "0", "false", "no", "off") + + +def _parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser() + selection_group = parser.add_mutually_exclusive_group() + selection_group.add_argument( + "--run-all", + action="store_true", + help="Validate every compile_commands entry instead of sampling.", + ) + selection_group.add_argument( + "--sample-size", + type=int, + help="Validate a fixed number of compile_commands entries.", + ) + return parser.parse_args() + + +def _determine_selection_count( + default_count: int = 10, + *, + cli_run_all: bool = False, + cli_sample_size: int | None = None, +) -> int: + """Resolve how many compile_commands entries to test. + + CLI flags take precedence over environment variables. + + - --run-all: run all entries. + - --sample-size N: run N entries (N > 0). + - VALIDATE_COMPILE_COMMANDS_RUN_ALL=1: run all entries. + - VALIDATE_COMPILE_COMMANDS_SAMPLE_SIZE=: run N entries (N > 0). + """ + if cli_run_all: + return 0 + + if cli_sample_size is not None: + if cli_sample_size <= 0: + raise ValueError(f"--sample-size must be > 0, got: {cli_sample_size}") + return cli_sample_size + + if _is_truthy_env(os.environ.get("VALIDATE_COMPILE_COMMANDS_RUN_ALL")): + return 0 + + sample_size_env = os.environ.get("VALIDATE_COMPILE_COMMANDS_SAMPLE_SIZE") + if sample_size_env is None: + return default_count + try: + sample_size = int(sample_size_env) + except ValueError as exc: + raise ValueError( + f"VALIDATE_COMPILE_COMMANDS_SAMPLE_SIZE must be an integer, got: {sample_size_env!r}" + ) from exc + if sample_size <= 0: + raise ValueError(f"VALIDATE_COMPILE_COMMANDS_SAMPLE_SIZE must be > 0, got: {sample_size}") + return sample_size + + +def _should_validate_entry(entry: dict[str, Any]) -> bool: + selection_key = _selection_key_for_entry(entry) + if not selection_key: + return False + + # Keep the sample focused on MongoDB workspace sources. External repositories and + # vendored third-party code have their own generated include layouts that are not + # meaningful for validating the repo's compile_commands coverage. + return selection_key.startswith("src/mongo/") + + def _make_test_compile_args(args: list[str]) -> list[str]: """Convert a compile command into a 'test compile' command. @@ -252,6 +433,22 @@ def _map_writable_output_path(out_root: str, original_path: str) -> str: return "_" return comp + if platform.system() == "Windows": + drive, _ = os.path.splitdrive(original_path) + drive_tag = drive.rstrip(":") + drive_tag = drive_tag.lstrip("\\/").replace("\\", "_").replace("/", "_") + drive_tag = _sanitize_component(drive_tag) if drive_tag else "PATH" + + normalized = os.path.normcase(os.path.normpath(original_path)) + digest = hashlib.sha256(normalized.encode("utf-8")).hexdigest()[:16] + basename = _sanitize_component(os.path.basename(original_path)) or "out" + stem, ext = os.path.splitext(basename) + if len(stem) > 48: + stem = stem[:48] + short_name = f"{stem}-{digest}{ext}" + + return os.path.normpath(os.path.join(out_root, "win", drive_tag, short_name)) + drive, tail = os.path.splitdrive(original_path) parts: list[str] = [] @@ -461,38 +658,83 @@ def _ensure_parent_dirs_exist_for_outputs(args: list[str], cwd: str, repo_root: def _select_entries_for_test_compile(path: str, n: int) -> tuple[int, list[dict[str, Any]]]: - """Pick N entries by sorting deterministic hashes of entry['file'] and taking the first N.""" + """Pick N entries by sorting deterministic hashes of a canonicalized file key.""" + if n <= 0: + total = 0 + selected: list[dict[str, Any]] = [] + for index, entry in enumerate(_iter_compiledb_entries(path), start=1): + _validate_compiledb_entry(entry, index=index) + total += 1 + file_name = entry.get("file") + if not isinstance(file_name, str): + continue + selected.append(entry) + return total, selected + # Keep a max-heap of the N smallest hashes. # IMPORTANT: include stable, comparable tie-breakers so heapq never compares dicts. - # Tuple: (-hash, file_name, seq, entry) - heap: list[tuple[int, str, int, dict[str, Any]]] = [] + # Tuple: (-hash, selection_key, file_name, seq, entry) + heap: list[tuple[int, str, str, int, dict[str, Any]]] = [] total = 0 seq = 0 - for entry in _iter_compiledb_entries(path): + for index, entry in enumerate(_iter_compiledb_entries(path), start=1): + _validate_compiledb_entry(entry, index=index) total += 1 file_name = entry.get("file") if not isinstance(file_name, str): continue - h = _hash_file_name(file_name) - item = (-h, file_name, seq, entry) + selection_key = _selection_key_for_entry(entry) + if not selection_key or not _should_validate_entry(entry): + continue + h = _hash_file_name(selection_key) + item = (-h, selection_key, file_name, seq, entry) seq += 1 if len(heap) < n: heapq.heappush(heap, item) else: # If this hash is smaller than the current largest in the heap, replace it. - if item[:3] > heap[0][:3]: + if item[:4] > heap[0][:4]: heapq.heapreplace(heap, item) # Sort ascending by hash. selected = [ - e for (_neg_h, _file_name, _seq, e) in sorted(heap, key=lambda t: (-t[0], t[1], t[2])) + e + for (_neg_h, _selection_key, _file_name, _seq, e) in sorted( + heap, key=lambda t: (-t[0], t[1], t[2], t[3]) + ) ] return total, selected def main() -> int: + try: + workspace_dir = _get_workspace_dir() + except RuntimeError as e: + print(e) + return 1 + + os.chdir(workspace_dir) + + cli_args = _parse_args() compdb_path = "compile_commands.json" - total, selected = _select_entries_for_test_compile(compdb_path, n=10) + _ensure_compiledb_exists(compdb_path) + try: + selection_count = _determine_selection_count( + default_count=10, + cli_run_all=cli_args.run_all, + cli_sample_size=cli_args.sample_size, + ) + except ValueError as e: + sys.stderr.write(f"ERROR: {e}\n") + return 1 + + try: + total, selected = _select_entries_for_test_compile(compdb_path, n=selection_count) + except ValueError as e: + sys.stderr.write(f"ERROR: {e}\n") + return 1 + if selection_count <= 0: + random.shuffle(selected) if total < 1000: sys.stderr.write( @@ -504,9 +746,16 @@ def main() -> int: sys.stderr.write("ERROR: Failed to select any entries for test compilation.\n") return 1 + if selection_count <= 0: + print( + f"Selected all compile_commands entries for validation ({len(selected)}).", flush=True + ) + else: + print(f"Selected {len(selected)} compile_commands entries for validation.", flush=True) + out_root = os.environ.get( "VALIDATE_COMPILE_COMMANDS_OUT_DIR", - os.path.join(default_dir, ".validate_compile_commands_out"), + os.path.join(workspace_dir, ".validate_compile_commands_out"), ) os.makedirs(out_root, exist_ok=True) @@ -550,11 +799,11 @@ def main() -> int: max_workers = max(1, min(max_workers, len(work))) print(f"Running {len(work)} test compiles...", flush=True) - compile_env = _maybe_add_windows_toolchain_env(os.environ.copy(), repo_root=default_dir) + compile_env = _maybe_add_windows_toolchain_env(os.environ.copy(), repo_root=workspace_dir) def _run_one(item: tuple[str, str, list[str]]) -> tuple[str, int, list[str], str, str]: file_name, directory, test_args = item - _ensure_parent_dirs_exist_for_outputs(test_args, cwd=directory, repo_root=default_dir) + _ensure_parent_dirs_exist_for_outputs(test_args, cwd=directory, repo_root=workspace_dir) proc = subprocess.run( test_args, cwd=directory, env=compile_env, capture_output=True, text=True ) diff --git a/evergreen/validate_compile_commands_test.py b/evergreen/validate_compile_commands_test.py new file mode 100644 index 00000000000..357a5929740 --- /dev/null +++ b/evergreen/validate_compile_commands_test.py @@ -0,0 +1,91 @@ +import json +import os +import sys +import tempfile +import unittest +from unittest import mock + +sys.path.append(os.path.dirname(__file__)) + +import validate_compile_commands as validator + + +class ValidateCompileCommandsTest(unittest.TestCase): + def test_accepts_standard_arguments_entry(self): + validator._validate_compiledb_entry( + { + "directory": "/repo", + "file": "src/mongo/db/example.cpp", + "arguments": ["clang++", "-c", "src/mongo/db/example.cpp"], + "output": "bazel-out/example.o", + }, + index=1, + ) + + def test_accepts_standard_command_entry(self): + validator._validate_compiledb_entry( + { + "directory": "/repo", + "file": "src/mongo/db/example.cpp", + "command": "clang++ -c src/mongo/db/example.cpp -o bazel-out/example.o", + }, + index=1, + ) + + def test_rejects_non_standard_keys(self): + with self.assertRaisesRegex(ValueError, r"non-standard keys \['target'\]"): + validator._validate_compiledb_entry( + { + "directory": "/repo", + "file": "src/mongo/db/example.cpp", + "arguments": ["clang++", "-c", "src/mongo/db/example.cpp"], + "target": "//src/mongo/db:example", + }, + index=1, + ) + + def test_rejects_entries_with_both_command_and_arguments(self): + with self.assertRaisesRegex(ValueError, r"exactly one of 'arguments' or 'command'"): + validator._validate_compiledb_entry( + { + "directory": "/repo", + "file": "src/mongo/db/example.cpp", + "arguments": ["clang++", "-c", "src/mongo/db/example.cpp"], + "command": "clang++ -c src/mongo/db/example.cpp", + }, + index=1, + ) + + def test_selection_rejects_non_standard_compile_commands_json(self): + with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as compiledb: + json.dump( + [ + { + "directory": "/repo", + "file": "src/mongo/db/example.cpp", + "arguments": ["clang++", "-c", "src/mongo/db/example.cpp"], + "target": "//src/mongo/db:example", + } + ], + compiledb, + ) + compiledb_path = compiledb.name + + try: + with self.assertRaisesRegex(ValueError, r"non-standard keys \['target'\]"): + validator._select_entries_for_test_compile(compiledb_path, n=0) + finally: + os.remove(compiledb_path) + + def test_ensure_compiledb_exists_builds_install_wiredtiger_too(self): + with mock.patch.object(validator.os.path, "exists", return_value=False): + with mock.patch.object(validator.subprocess, "run") as mock_run: + validator._ensure_compiledb_exists("compile_commands.json") + + mock_run.assert_called_once_with( + ["bazel", "build", "compiledb", "install-wiredtiger"], check=True + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/src/mongo/db/extension/host/BUILD.bazel b/src/mongo/db/extension/host/BUILD.bazel index 688b6e0a48a..0a5a0cd9588 100644 --- a/src/mongo/db/extension/host/BUILD.bazel +++ b/src/mongo/db/extension/host/BUILD.bazel @@ -103,6 +103,7 @@ generate_embedded_public_key_header( name = "embed_mongot_key", embedded_key_header_path = "mongot_extension_signing_key.h", public_key_path = "mongot-extension.asc", + tags = ["gen_source"], target_compatible_with = select({ "@platforms//os:linux": [], "//conditions:default": ["@platforms//:incompatible"], diff --git a/tools/bazel b/tools/bazel index 44bafbfb056..eed696f33cb 100755 --- a/tools/bazel +++ b/tools/bazel @@ -310,9 +310,12 @@ else if [[ "$wrapper_redirect_output" == "1" ]]; then exec 1>&3 2>&4 fi - - $bazel_real "${new_args[@]}" - bazel_exit_code=$? + + bazel_exit_code=0 + if [[ ${#new_args[@]} -ne 0 ]]; then + $bazel_real "${new_args[@]}" + bazel_exit_code=$? + fi ( >&2 $python $REPO_ROOT/bazel/wrapper_hook/post_bazel_hook.py $bazel_real ) exit $bazel_exit_code fi