From 07623b79e4c2b2260cb7875fc5d4c82b00fcf697 Mon Sep 17 00:00:00 2001 From: Andrew Bradshaw Date: Wed, 16 Jul 2025 12:25:15 -0700 Subject: [PATCH] SERVER-107518 Add support for PGO and Bolt for mongod (#38562) GitOrigin-RevId: 409b736954123600b5d906e4827cbbb392ad170e --- .bazelrc | 7 +- BUILD.bazel | 41 +++++- MODULE.bazel | 40 ++++++ bazel/config/BUILD.bazel | 125 ++++++++++++++-- bazel/config/configs.bzl | 78 +++++++++- bazel/install_rules/bolt.bzl | 75 ++++++++++ bazel/mongo_src_rules.bzl | 2 - bazel/repository_rules/bolt_data.bzl | 107 ++++++++++++++ bazel/repository_rules/pgo_data.bzl | 98 +++++++++++++ .../cc/mongo_linux/mongo_compiler_flags.bzl | 10 -- .../mongo_linux_cc_toolchain_config.bzl | 134 ++++++++++++++++++ .../cc/mongo_linux/mongo_toolchain.BUILD.tmpl | 37 ++++- src/mongo/BUILD.bazel | 3 +- src/mongo/db/BUILD.bazel | 32 +++++ 14 files changed, 753 insertions(+), 36 deletions(-) create mode 100644 bazel/install_rules/bolt.bzl create mode 100644 bazel/repository_rules/bolt_data.bzl create mode 100644 bazel/repository_rules/pgo_data.bzl diff --git a/.bazelrc b/.bazelrc index 2299e3d73ca..16ef4265337 100644 --- a/.bazelrc +++ b/.bazelrc @@ -143,7 +143,12 @@ common --flag_alias=build_enterprise=//bazel/config:build_enterprise common --flag_alias=visibility_support=//bazel/config:visibility_support common --flag_alias=disable_warnings_as_errors=//bazel/config:disable_warnings_as_errors common --flag_alias=gcov=//bazel/config:gcov -common --flag_alias=pgo_profile=//bazel/config:pgo_profile +common --flag_alias=pgo_profile_generate=//bazel/config:pgo_profile_generate +common --flag_alias=pgo_profile_use=//bazel/config:pgo_profile_use +common --flag_alias=bolt_profile_generate=//bazel/config:bolt_profile_generate +common --flag_alias=bolt_profile_use=//bazel/config:bolt_profile_use +common --flag_alias=propeller_profile_generate=//bazel/config:propeller_profile_generate +common --flag_alias=propeller_profile_use=//bazel/config:propeller_profile_use common --flag_alias=server_js=//bazel/config:server_js common --flag_alias=ssl=//bazel/config:ssl common --flag_alias=js_engine=//bazel/config:js_engine diff --git a/BUILD.bazel b/BUILD.bazel index 1faf83bc08f..5350268b5bd 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -1,6 +1,7 @@ load("@npm//:defs.bzl", "npm_link_all_packages") load("@aspect_rules_js//npm:defs.bzl", "npm_link_package") load("//bazel/install_rules:install_rules.bzl", "TEST_TAGS", "mongo_install") +load("//bazel/install_rules:bolt.bzl", "bolt_instrument", "bolt_optimize") load("//bazel:mongo_src_rules.bzl", "mongo_cc_binary") load("//bazel/toolchains/cc/mongo_linux:mongo_toolchain.bzl", "setup_mongo_toolchain_aliases") load("//bazel/config:render_template.bzl", "render_template") @@ -209,12 +210,14 @@ mongo_install( mongo_install( name = "dist", srcs = [ - "//src/mongo/db:mongod", "//src/mongo/installer/compass:compass_files", "//src/mongo/s:mongos", ] + select({ "@platforms//os:windows": ["@local_windows_msvc//:vc_redist_x64"], "//conditions:default": [], + }) + select({ + "//bazel/config:bolt_profile_use_enabled": ["//:bolt_optimized_mongod"], + "//conditions:default": ["//src/mongo/db:mongod"], }), package_extract_name = select({ "//bazel/config:build_enterprise_linux_enabled": "mongodb-linux-{TARGET_CPU}-enterprise-{MONGO_DISTMOD}-{MONGO_VERSION}", @@ -270,7 +273,6 @@ copy_to_directory( mongo_install( name = "dist-test", srcs = [ - "//src/mongo/db:mongod", "//src/mongo/db:mongotrafficreader", "//src/mongo/db/query/query_tester:mongotest", "//src/mongo/db/query/search/mongotmock", @@ -287,6 +289,9 @@ mongo_install( }) + select({ "//bazel/config:include_mongot_enabled": ["//:mongot_folder"], "//conditions:default": [], + }) + select({ + "//bazel/config:bolt_profile_use_enabled": ["//:bolt_optimized_mongod"], + "//conditions:default": ["//src/mongo/db:mongod"], }), pretty_printer_tests = { "//src/mongo/util:pretty_printer_test.py": "//src/mongo/util:pretty_printer_test_program", @@ -479,3 +484,35 @@ sh_binary( "BINARY_PATH": "$(location @scip-src//file)", }, ) + +# Unused for now due to instrumented mongod crashing on machines slightly different than the one +# it gets built on +bolt_instrument( + name = "bolt_instrumented_mongod", + binary_to_instrument = "//src/mongo/db:mongod", + exec_properties = { + "no-cache": "1", + "no-remote": "1", + "local": "1", + }, + instrumentation_output_file = "/tmp/mongod_bolt/mongod.fdata", + target_compatible_with = select({ + "//bazel/config:bolt_profile_generate_enabled": [], + "//conditions:default": ["@platforms//:incompatible"], + }), +) + +bolt_optimize( + name = "bolt_optimized_mongod", + binary_to_optimize = "//src/mongo/db:mongod", + exec_properties = { + "no-cache": "1", + "no-remote": "1", + "local": "1", + }, + perf_data = "@bolt_data//:bolt_fdata", + target_compatible_with = select({ + "//bazel/config:bolt_profile_use_enabled": [], + "//conditions:default": ["@platforms//:incompatible"], + }), +) diff --git a/MODULE.bazel b/MODULE.bazel index 082db8d525a..e629d15e7cc 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -24,6 +24,38 @@ http_archive( ] * 5, ) +# TODO(SERVER-107519): Get these binaries from an actual pipeline build and not locally produced binaries +http_archive( + name = "bolt_binaries", + build_file_content = """ +package(default_visibility = ["//visibility:public"]) + +filegroup( + name = "merge-fdata", + srcs = ["bolt/bin/merge-fdata"], +) + +filegroup( + name = "bolt", + srcs = ["bolt/bin/llvm-bolt"], +) + +filegroup( + name = "perf2bolt", + srcs = ["bolt/bin/perf2bolt"], +) + +filegroup( + name = "libbolt_rt_instr", + srcs = ["bolt/lib/libbolt_rt_instr.a"], +) + """, + sha256 = "3075be7271266550a02a0fce25622a7130a9fdf947d708e0abd45213ee17e0cf", + urls = [ + "https://mdb-build-public.s3.us-east-1.amazonaws.com/andrew_pgo_scratch/bolt_good.tar.gz", + ] * 5, +) + # SourceGraph indexer http_file( name = "scip-clang", @@ -208,6 +240,14 @@ setup_evergreen_variables = use_repo_rule("//bazel/repository_rules:evergreen_va setup_evergreen_variables(name = "evergreen_variables") +setup_pgo_data = use_repo_rule("//bazel/repository_rules:pgo_data.bzl", "setup_pgo_data") + +setup_pgo_data(name = "pgo_data") + +setup_bolt_data = use_repo_rule("//bazel/repository_rules:bolt_data.bzl", "setup_bolt_data") + +setup_bolt_data(name = "bolt_data") + setup_mongo_windows_toolchains_extension = use_extension("//bazel/toolchains/cc/mongo_windows:mongo_toolchain.bzl", "setup_mongo_windows_toolchain_extension") use_repo(setup_mongo_windows_toolchains_extension, "mongo_windows_toolchain") diff --git a/bazel/config/BUILD.bazel b/bazel/config/BUILD.bazel index b95041e8a6f..2a2383c3f3f 100644 --- a/bazel/config/BUILD.bazel +++ b/bazel/config/BUILD.bazel @@ -8,6 +8,8 @@ load( "//bazel/config:configs.bzl", "allocator", "asan", + "bolt_profile_generate", + "bolt_profile_use", "build_enterprise", "build_otel", "compiler_type", @@ -33,7 +35,10 @@ load( "mongo_toolchain_version", "msan", "opt", - "pgo_profile", + "pgo_profile_generate", + "pgo_profile_use", + "propeller_profile_generate", + "propeller_profile_use", "release", "sdkroot", "separate_debug", @@ -2223,30 +2228,130 @@ config_setting( ) # -------------------------------------- -# pgo_profile options +# pgo_profile_generate options # -------------------------------------- -pgo_profile( - name = "pgo_profile", +pgo_profile_generate( + name = "pgo_profile_generate", build_setting_default = False, ) config_setting( - name = "pgo_profile_enabled", + name = "pgo_profile_generate_enabled", flag_values = { - "//bazel/config:pgo_profile": "True", + "//bazel/config:pgo_profile_generate": "True", }, ) -bool_flag( - name = "bolt", +selects.config_setting_group( + name = "pgo_profile_generate_clang_enabled", + match_all = [ + ":linux_clang", + ":pgo_profile_generate_enabled", + ], +) + +selects.config_setting_group( + name = "pgo_profile_generate_gcc_enabled", + match_all = [ + ":linux_gcc", + ":pgo_profile_generate_enabled", + ], +) + +# -------------------------------------- +# pgo_profile_use options +# -------------------------------------- + +pgo_profile_use( + name = "pgo_profile_use", build_setting_default = False, ) config_setting( - name = "bolt_enabled", + name = "pgo_profile_use_enabled", flag_values = { - "//bazel/config:bolt": "True", + "//bazel/config:pgo_profile_use": "True", + }, +) + +selects.config_setting_group( + name = "pgo_profile_use_clang_enabled", + match_all = [ + ":linux_clang", + ":pgo_profile_use_enabled", + ], +) + +selects.config_setting_group( + name = "pgo_profile_use_gcc_enabled", + match_all = [ + ":linux_gcc", + ":pgo_profile_use_enabled", + ], +) + +# -------------------------------------- +# bolt_profile_generate options +# -------------------------------------- + +bool_flag( + name = "bolt_profile_generate", + build_setting_default = False, +) + +config_setting( + name = "bolt_profile_generate_enabled", + flag_values = { + "//bazel/config:bolt_profile_generate": "True", + }, +) + +# -------------------------------------- +# bolt_profile_use options +# -------------------------------------- + +bool_flag( + name = "bolt_profile_use", + build_setting_default = False, +) + +config_setting( + name = "bolt_profile_use_enabled", + flag_values = { + "//bazel/config:bolt_profile_use": "True", + }, +) + +# -------------------------------------- +# propeller_profile_generate options +# -------------------------------------- + +bool_flag( + name = "propeller_profile_generate", + build_setting_default = False, +) + +config_setting( + name = "propeller_profile_generate_enabled", + flag_values = { + "//bazel/config:propeller_profile_generate": "True", + }, +) + +# -------------------------------------- +# propeller_profile_use options +# -------------------------------------- + +bool_flag( + name = "propeller_profile_use", + build_setting_default = False, +) + +config_setting( + name = "propeller_profile_use_enabled", + flag_values = { + "//bazel/config:propeller_profile_use": "True", }, ) diff --git a/bazel/config/configs.bzl b/bazel/config/configs.bzl index 5688167e542..22067fb9bbb 100644 --- a/bazel/config/configs.bzl +++ b/bazel/config/configs.bzl @@ -625,16 +625,86 @@ gcov = rule( ) # ========= -# pgo_profile +# pgo_profile_generate # ========= -pgo_profile_provider = provider( +pgo_profile_generate_provider = provider( doc = "Choose if pgo profiling should be generated", fields = ["enabled"], ) -pgo_profile = rule( - implementation = lambda ctx: pgo_profile_provider(enabled = ctx.build_setting_value), +pgo_profile_generate = rule( + implementation = lambda ctx: pgo_profile_generate_provider(enabled = ctx.build_setting_value), + build_setting = config.bool(flag = True), +) + +# ========= +# pgo_profile_use +# ========= + +pgo_profile_use_provider = provider( + doc = "Choose if pgo profiling should be used", + fields = ["enabled"], +) + +pgo_profile_use = rule( + implementation = lambda ctx: pgo_profile_use_provider(enabled = ctx.build_setting_value), + build_setting = config.bool(flag = True), +) + +# ========= +# bolt_profile_generate +# ========= + +bolt_profile_generate_provider = provider( + doc = "Choose if bolt profiling should be generated", + fields = ["enabled"], +) + +bolt_profile_generate = rule( + implementation = lambda ctx: bolt_profile_generate_provider(enabled = ctx.build_setting_value), + build_setting = config.bool(flag = True), +) + +# ========= +# bolt_profile_use +# ========= + +bolt_profile_use_provider = provider( + doc = "Choose if bolt profiling should be used", + fields = ["enabled"], +) + +bolt_profile_use = rule( + implementation = lambda ctx: bolt_profile_use_provider(enabled = ctx.build_setting_value), + build_setting = config.bool(flag = True), +) + +# ========= +# propeller_profile_generate +# ========= + +propeller_profile_generate_provider = provider( + doc = "Choose if binary should be prepared to be run under perf for propeller", + fields = ["enabled"], +) + +propeller_profile_generate = rule( + implementation = lambda ctx: propeller_profile_generate_provider(enabled = ctx.build_setting_value), + build_setting = config.bool(flag = True), +) + +# ========= +# propeller_profile_use +# ========= + +propeller_profile_use_provider = provider( + doc = "Choose if propeller profiling should be used", + fields = ["enabled"], +) + +propeller_profile_use = rule( + implementation = lambda ctx: propeller_profile_use_provider(enabled = ctx.build_setting_value), build_setting = config.bool(flag = True), ) diff --git a/bazel/install_rules/bolt.bzl b/bazel/install_rules/bolt.bzl new file mode 100644 index 00000000000..095a9d68be8 --- /dev/null +++ b/bazel/install_rules/bolt.bzl @@ -0,0 +1,75 @@ +SKIP_FUNCTIONS = [ + "_ZN8tcmalloc17tcmalloc_internal6subtle6percpu12TcmallocSlab4GrowEimmN4absl12lts_2023080211FunctionRefIFmhEEE", + "_ZN8tcmalloc17tcmalloc_internal18cpu_cache_internal8CpuCacheINS1_15StaticForwarderEE21DeallocateSlowNoHooksEPvm", + "_ZN8tcmalloc17tcmalloc_internal18cpu_cache_internal8CpuCacheINS1_15StaticForwarderEE19AllocateSlowNoHooksEm", + "calloc*", + "malloc*", + "_Znwm*", + "_ZnwmSt11align_val_t*", + "_ZdaPv*", + "realloc*", + "_ZdlPvmSt11align_val_t*", + "_ZdlPvm*", + "_ZN8tcmalloc17tcmalloc_internal6subtle6percpu12TcmallocSlab16CacheCpuSlabSlowEv", + "TcmallocSlab_Internal_PushBatch", + "TcmallocSlab_Internal_PopBatch", +] + +def _bolt_instrument_impl(ctx): + input_binary = ctx.files.binary_to_instrument[0] + output_binary = ctx.actions.declare_file(ctx.files.binary_to_instrument[0].basename) + functions_to_skip = ",".join(SKIP_FUNCTIONS) + ctx.actions.run( + inputs = [input_binary, ctx.files._bolt_needed_lib[0]], + outputs = [output_binary], + executable = ctx.executable._bolt_binary, + arguments = [input_binary.path, "-instrument", "-o", output_binary.path, "--instrumentation-file=" + ctx.attr.instrumentation_output_file, "--instrumentation-file-append-pid", "--skip-funcs=" + functions_to_skip], + mnemonic = "BoltInstrument", + ) + return DefaultInfo(files = depset([output_binary])) + +bolt_instrument = rule( + implementation = _bolt_instrument_impl, + attrs = { + "binary_to_instrument": attr.label(allow_files = True), + "instrumentation_output_file": attr.string(), + "_bolt_binary": attr.label(allow_single_file = True, default = "@bolt_binaries//:bolt", executable = True, cfg = "host"), + "_bolt_needed_lib": attr.label(allow_single_file = True, default = "@bolt_binaries//:libbolt_rt_instr"), + }, +) + +def _bolt_optimize_impl(ctx): + input_binary = ctx.files.binary_to_optimize[0] + output_binary = ctx.actions.declare_file(ctx.files.binary_to_optimize[0].basename) + functions_to_skip = ",".join(SKIP_FUNCTIONS) + ctx.actions.run( + inputs = [input_binary], + outputs = [output_binary], + executable = ctx.executable._bolt_binary, + arguments = [ + input_binary.path, + "-o", + output_binary.path, + "-data", + ctx.files.perf_data[0].path, + "-reorder-blocks=ext-tsp", + "-reorder-functions=cdsort", + "-split-functions", + "-split-all-cold", + "-split-eh", + "-dyno-stats", + "--lite", + "-skip-funcs=" + functions_to_skip, + ], + mnemonic = "BoltOptimize", + ) + return DefaultInfo(files = depset([output_binary])) + +bolt_optimize = rule( + implementation = _bolt_optimize_impl, + attrs = { + "binary_to_optimize": attr.label(allow_files = True), + "perf_data": attr.label(allow_single_file = True), + "_bolt_binary": attr.label(allow_single_file = True, default = "@bolt_binaries//:bolt", executable = True, cfg = "host"), + }, +) diff --git a/bazel/mongo_src_rules.bzl b/bazel/mongo_src_rules.bzl index a6f6cd25b81..c3acca251ab 100644 --- a/bazel/mongo_src_rules.bzl +++ b/bazel/mongo_src_rules.bzl @@ -757,7 +757,6 @@ def _mongo_cc_binary_and_test( "tags": tags, "linkopts": linkopts + rpath_flags + select({ "//bazel/config:thin_lto_enabled": ["-Wl,--threads=" + str(NUM_CPUS)], - "//bazel/config:bolt_enabled": ["-Wl,--threads=" + str(NUM_CPUS)], "//conditions:default": [], }) + select({ "//bazel/config:simple_build_id_enabled": ["-Wl,--build-id=0x" + @@ -789,7 +788,6 @@ def _mongo_cc_binary_and_test( "//conditions:default": {"cpp_link.coefficient": "3.0"}, }) | select({ "//bazel/config:thin_lto_enabled": {"cpp_link.cpus": str(NUM_CPUS)}, - "//bazel/config:bolt_enabled": {"cpp_link.cpus": str(NUM_CPUS)}, "//conditions:default": {}, }), "env": env | SANITIZER_ENV, diff --git a/bazel/repository_rules/bolt_data.bzl b/bazel/repository_rules/bolt_data.bzl new file mode 100644 index 00000000000..9c6d3baeda1 --- /dev/null +++ b/bazel/repository_rules/bolt_data.bzl @@ -0,0 +1,107 @@ +load("//bazel/repository_rules:pgo_data.bzl", "get_all_files") + +# TODO(SERVER-107522): Get perf data from the actual training pipeline +DEFAULT_BOLT_DATA_URL = "https://mdb-build-public.s3.us-east-1.amazonaws.com/andrew_pgo_scratch/ltopgobolt.fdata" +DEFAULT_BOLT_DATA_CHECKSUM = "e2e0ea260d0436d77bd8a3a6513fed60de39ffbc04425663c03f9608d6a20f39" + +def _setup_bolt_data(repository_ctx): + bolt_fdata_filename = "bolt.fdata" + + # This potentially contains multiple urls, separated by a | eg. url1|url2|url3 + bolt_profile_urls_env = repository_ctx.os.environ.get("bolt_profile_url", None) + + # This is the binary the bolt data came from + bolt_binary_env = repository_ctx.os.environ.get("bolt_binary_url", None) + + # Incase you want to bolt a binary instead of the main binary mongod + bolt_binary_name = repository_ctx.os.environ.get("bolt_binary_name", None) + + if bolt_binary_name == None: + bolt_binary_name = "mongod" + + # We should be using the default bolt data because we are not being passed bolt data + if bolt_profile_urls_env == None and bolt_binary_env == None: + repository_ctx.download(DEFAULT_BOLT_DATA_URL, bolt_fdata_filename, sha256 = DEFAULT_BOLT_DATA_CHECKSUM) + + # This is mainly used for the bolt training pipeline + else: + # 2 main scenarios + # 1. They are passing us a single bolt .fdata file, just download the file + # 2. They are passing us one or more unprocessed .tgz files with .data files inside + # the .data files need to be turned into fdata files using perf2bolt, which then can be merged + # into a single fdata file using merge-fdata + + bolt_urls = bolt_profile_urls_env.split("|") + + # They are passing us a single bolt fdata file, just download the file + if len(bolt_urls) == 1 and bolt_urls[0].endswith(".fdata"): + print("Downloading single fdata file for bolt: " + bolt_urls[0]) + repository_ctx.download(bolt_profile_urls_env, bolt_fdata_filename) + else: + url_num = 0 + print("Downloading and extracting multiple bolt files.") + for url in bolt_urls: + print("Downloading and extracting: " + url) + repository_ctx.download_and_extract(url, str(url_num)) + url_num += 1 + + # Download the mongod binary for perf2bolt + print("Download the mongo binaries from: " + bolt_binary_env) + repository_ctx.download_and_extract(bolt_binary_env, "binaries") + + files = get_all_files(repository_ctx.path("."), 20) + data_files = [file for file in files if file.basename.endswith(".data")] + fdata_files = [file for file in files if file.basename.endswith(".fdata")] + binary = [file for file in files if file.basename.endswith(bolt_binary_name)][0] + + processed_fdata_files = 0 + + # This is scenario 2, we need to turn these data files into fdata files using perf2bolt + if len(data_files) > 0: + print("Found data files, turning them into fdata files with perf2bolt.") + for file in data_files: + fdata_file_name = "bolt" + str(processed_fdata_files) + ".fdata" + arguments = [repository_ctx.attr._perf2bolt_binary, "-nl", "-p", file, "-o", fdata_file_name, binary] + result = repository_ctx.execute(arguments) + print(result.stdout) + if result.return_code != 0: + print(result.stderr) + fail("Failed to run perf2bolt.") + processed_fdata_files += 1 + fdata_files.append(fdata_file_name) + + # If we have multiple fdata files we need to merge them together using merge-fdata + if len(fdata_files) > 1: + print("Merging fdata files with merge-fdata.") + arguments = [repository_ctx.attr._merge_fdata_binary, "-o", bolt_fdata_filename] + fdata_files + result = repository_ctx.execute(arguments) + print(result.stdout) + if result.return_code != 0: + print(result.stderr) + fail("Failed to run merge-fdata.") + + # clean up the pre-merged fdata files + for file in fdata_files: + repository_ctx.delete(file) + + repository_ctx.file( + "BUILD.bazel", + """ +package(default_visibility = ["//visibility:public"]) + +filegroup( + name = "bolt_fdata", + srcs = glob(["**/*.fdata"]), +) +""", + ) + +setup_bolt_data = repository_rule( + implementation = _setup_bolt_data, + environ = ["bolt_profile_url", "bolt_binary_url", "bolt_binary_name"], + attrs = { + # There is a bug where the repo rule does not properly evaluate these labels so we have to list the full path to the binaries + "_merge_fdata_binary": attr.label(allow_single_file = True, default = "@bolt_binaries//:bolt/bin/merge-fdata", executable = True, cfg = "host"), + "_perf2bolt_binary": attr.label(allow_single_file = True, default = "@bolt_binaries//:bolt/bin/perf2bolt", executable = True, cfg = "host"), + }, +) diff --git a/bazel/repository_rules/pgo_data.bzl b/bazel/repository_rules/pgo_data.bzl new file mode 100644 index 00000000000..379723fdb5f --- /dev/null +++ b/bazel/repository_rules/pgo_data.bzl @@ -0,0 +1,98 @@ +# TODO(SERVER-107522): Get perf data from the actual training pipeline +DEFAULT_CLANG_PGO_DATA_URL = "https://mdb-build-public.s3.us-east-1.amazonaws.com/andrew_pgo_scratch/pgolto.profdata" +DEFAULT_CLANG_PGO_DATA_CHECKSUM = "9ce0bbfce69b4b4d3032f2b2078a6de3aeae3aa8cf16b00bd1016ee5fcb839da" + +DEFAULT_GCC_PGO_DATA_URL = "https://mdb-build-public.s3.us-east-1.amazonaws.com/andrew_pgo_scratch/pgo_gcda.tgz" +DEFAULT_GCC_PGO_DATA_CHECKSUM = "4a0f3191776d2b8cc36fe2d69b07034b325c9989252e412bacfef35826362c35" + +# TODO(SERVER-107582): Get llvm-profdata from actual pipeline build +LLVM_PROFDATA_URL = "https://mdb-build-public.s3.us-east-1.amazonaws.com/andrew_pgo_scratch/llvm-profdata" +LLVM_PROFDATA_CHECKSUM = "22d4694a0d80691c16c41212c192938992444d6d43a95d12f298506012640753" + +# recursion and while loops are not allowed, we can only count files to a certain depth +def get_all_files(root_path, depth): + files = [] + root_paths = [root_path] + for _ in range(depth): + if len(root_paths) == 0: + break + new_root_paths = [] + for root_path in root_paths: + for path in root_path.readdir(): + if path.is_dir: + new_root_paths.append(path) + else: + files.append(path) + root_paths = new_root_paths + return files + +def _setup_pgo_data(repository_ctx): + # This potentially contains multiple urls, separated by a | eg. url1|url2|url3 + pgo_urls_env = repository_ctx.os.environ.get("pgo_profile_url", None) + clang_profdata_filename = "clang_pgo.profdata" + + # We should be using the default pgo data because we are not being passed pgo data + if pgo_urls_env == None: + repository_ctx.download(DEFAULT_CLANG_PGO_DATA_URL, clang_profdata_filename, sha256 = DEFAULT_CLANG_PGO_DATA_CHECKSUM) + repository_ctx.download_and_extract(DEFAULT_GCC_PGO_DATA_URL, "gcc_pgo", sha256 = DEFAULT_GCC_PGO_DATA_CHECKSUM) + + # This is mainly used for the pgo training pipeline + else: + # 4 main scenarios + # 1. They are passing us a single clang profdata file, just download the file + # 2. They are passing us a single gcc .tgz with gcda files inside, just download and extract + # 3. They are passing us one or more clang unprocessed .tgz files with .profraw files inside + # the .profraw files need to be merged using llvm-profdata into a single .profdata file + # 4. They are passing us multiple gcc .tgz with gcda files inside, they need to be merged + # but this is currently unsupported by us + + pgo_urls = pgo_urls_env.split("|") + + # They are passing us a single clang profdata file, just download the file + if len(pgo_urls) == 1 and pgo_urls[0].endswith(".profdata"): + print("Downloading single clang profdata file for pgo: " + pgo_urls[0]) + repository_ctx.download(pgo_urls_env, clang_profdata_filename) + else: + url_num = 0 + print("Downloading and extracting multiple pgo files.") + for url in pgo_urls: + print("Downloading and extracting: " + url) + repository_ctx.download_and_extract(url, str(url_num)) + url_num += 1 + + files = get_all_files(repository_ctx.path("."), 20) + profraw_files = [file for file in files if file.basename.endswith(".profraw")] + + # This is scenario 3, we need to merge these profraw files + if len(profraw_files) > 0: + print("Found profraw files, merging them with llvm-profdata.") + print(profraw_files) + repository_ctx.download(LLVM_PROFDATA_URL, "llvm-profdata", executable = True, sha256 = LLVM_PROFDATA_CHECKSUM) + arguments = ["./llvm-profdata", "merge", "-output=" + clang_profdata_filename] + profraw_files + result = repository_ctx.execute(arguments) + print(result.stdout) + if result.return_code != 0: + print(result.stderr) + fail("Failed to run llvm-profdata.") + + repository_ctx.file( + "BUILD.bazel", + """ +package(default_visibility = ["//visibility:public"]) + +filegroup( + name = "clang_pgo_files", + srcs = glob(["**/*.profdata"]), +) + +filegroup( + name = "gcc_pgo_files", + srcs = glob(["**/*.gcda"]), +) +""", + ) + +setup_pgo_data = repository_rule( + implementation = _setup_pgo_data, + environ = ["pgo_profile_url"], +) diff --git a/bazel/toolchains/cc/mongo_linux/mongo_compiler_flags.bzl b/bazel/toolchains/cc/mongo_linux/mongo_compiler_flags.bzl index 241b5615a38..d0451855cff 100644 --- a/bazel/toolchains/cc/mongo_linux/mongo_compiler_flags.bzl +++ b/bazel/toolchains/cc/mongo_linux/mongo_compiler_flags.bzl @@ -329,14 +329,6 @@ COVERAGE_FLAGS = select({ "//conditions:default": [], }) -# Passed to both the compiler and linker -PGO_PROFILE_FLAGS = select({ - "//bazel/config:pgo_profile_enabled": [ - "-fprofile-instr-generate", - ], - "//conditions:default": [], -}) - # Hack to throw an error if the user isn't running bazel through bazelisk, # since we want to make sure the hook inside of tools/bazel gets run. RUNNING_THROUGH_BAZELISK_CHECK = select({ @@ -369,7 +361,6 @@ MONGO_LINUX_CC_COPTS = ( THIN_LTO_FLAGS + SYMBOL_ORDER_COPTS + COVERAGE_FLAGS + - PGO_PROFILE_FLAGS + SHARED_ARCHIVE_COPTS + RUNNING_THROUGH_BAZELISK_CHECK ) @@ -389,7 +380,6 @@ MONGO_LINUX_CC_LINKFLAGS = ( THIN_LTO_FLAGS + SYMBOL_ORDER_LINKFLAGS + COVERAGE_FLAGS + - PGO_PROFILE_FLAGS + SANITIZE_WITHOUT_TSAN_LINKFLAGS + SHARED_ARCHIVE_LINKFLAGS_GNU_UNIQUE + SHARED_ARCHIVE_LINKFLAGS_B_SYMBOLIC + diff --git a/bazel/toolchains/cc/mongo_linux/mongo_linux_cc_toolchain_config.bzl b/bazel/toolchains/cc/mongo_linux/mongo_linux_cc_toolchain_config.bzl index 6d14f6a0913..48118e365d8 100644 --- a/bazel/toolchains/cc/mongo_linux/mongo_linux_cc_toolchain_config.bzl +++ b/bazel/toolchains/cc/mongo_linux/mongo_linux_cc_toolchain_config.bzl @@ -1118,6 +1118,130 @@ def _impl(ctx): ], ) + pgo_profile_generate_feature = feature( + name = "pgo_profile_generate", + enabled = ctx.attr.pgo_profile_generate, + flag_sets = [ + flag_set( + actions = [ + ACTION_NAMES.c_compile, + ACTION_NAMES.cpp_compile, + ] + all_link_actions + lto_index_actions, + flag_groups = [ + flag_group( + flags = [ + "-fprofile-generate", + "-fno-data-sections", + ] if ctx.attr.compiler == COMPILERS.CLANG else [ + "-fprofile-generate", + "-fno-data-sections", + "-fprofile-dir=mongod_perf", + "-Wl,-S", + ], + ), + ], + ), + flag_set( + actions = [ + ACTION_NAMES.cpp_compile, + ], + flag_groups = [ + flag_group( + flags = [""] if ctx.attr.compiler == COMPILERS.CLANG else ["-Wno-mismatched-new-delete"], + ), + ], + ), + ], + ) + + pgo_profile_use_feature = feature( + name = "pgo_profile_use", + enabled = ctx.attr.pgo_profile_use != None, + flag_sets = [ + flag_set( + actions = [ + ACTION_NAMES.c_compile, + ACTION_NAMES.cpp_compile, + ], + flag_groups = [ + flag_group( + flags = [ + "-fprofile-use=" + ctx.attr.pgo_profile_use[DefaultInfo].files.to_list()[0].path if ctx.attr.pgo_profile_use != None else "", + "-Wno-profile-instr-unprofiled", + "-Wno-profile-instr-out-of-date", + "-Wno-backend-plugin", + ] if ctx.attr.compiler == COMPILERS.CLANG else [ + "-fprofile-use", + "-Wno-missing-profile", + "-fprofile-correction", + "-Wno-coverage-mismatch", + "-fprofile-dir=" + ctx.attr.pgo_profile_use[DefaultInfo].files.to_list()[0].dirname if ctx.attr.pgo_profile_use != None else "", + ], + ), + ], + ), + ], + ) + + propeller_profile_generate_feature = feature( + name = "propeller_profile_generate", + enabled = ctx.attr.propeller_profile_generate, + flag_sets = [ + flag_set( + actions = [ + ACTION_NAMES.c_compile, + ACTION_NAMES.cpp_compile, + ] + all_link_actions + lto_index_actions, + flag_groups = [ + flag_group( + flags = [ + "-funique-internal-linkage-names", + "-fbasic-block-address-map", + ], + ), + ], + ), + ], + ) + + propeller_profile_use_cc_feature = feature( + name = "propeller_profile_use_cc", + enabled = ctx.attr.propeller_profile_use != None, + flag_sets = [ + flag_set( + actions = [ + ACTION_NAMES.c_compile, + ACTION_NAMES.cpp_compile, + ], + flag_groups = [ + flag_group( + flags = [ + "-funique-internal-linkage-names", + "-fbasic-block-sections=list=CCprofile.txt", + ], + ), + ], + ), + ], + ) + + propeller_profile_use_link_feature = feature( + name = "propeller_profile_use_link", + enabled = ctx.attr.propeller_profile_use != None, + flag_sets = [ + flag_set( + actions = all_link_actions + lto_index_actions, + flag_groups = [ + flag_group( + flags = [ + "-Wl,--symbol-ordering-file=LINKERprofile.txt", + ], + ), + ], + ), + ], + ) + features = [ enable_all_warnings_feature, general_clang_or_gcc_warnings_feature, @@ -1181,6 +1305,11 @@ def _impl(ctx): build_id_feature, gcc_no_ignored_attributes_features, mold_shared_libraries_feature, + pgo_profile_generate_feature, + pgo_profile_use_feature, + propeller_profile_generate_feature, + propeller_profile_use_cc_feature, + propeller_profile_use_link_feature, ] + get_common_features(ctx) return [ @@ -1232,6 +1361,11 @@ mongo_linux_cc_toolchain_config = rule( "debug_level": attr.int(mandatory = False), "disable_debug_symbols": attr.bool(mandatory = False), "optimization_level": attr.string(mandatory = False), + "pgo_profile_generate": attr.bool(default = False, mandatory = False), + "pgo_profile_use": attr.label(default = None, mandatory = False), + "bolt_enabled": attr.bool(default = False, mandatory = False), + "propeller_profile_generate": attr.bool(default = False, mandatory = False), + "propeller_profile_use": attr.label(default = None, allow_single_file = True, mandatory = False), }, provides = [CcToolchainConfigInfo], ) diff --git a/bazel/toolchains/cc/mongo_linux/mongo_toolchain.BUILD.tmpl b/bazel/toolchains/cc/mongo_linux/mongo_toolchain.BUILD.tmpl index 12075198405..1978d5e795f 100644 --- a/bazel/toolchains/cc/mongo_linux/mongo_toolchain.BUILD.tmpl +++ b/bazel/toolchains/cc/mongo_linux/mongo_toolchain.BUILD.tmpl @@ -16,12 +16,6 @@ load( package(default_visibility = ["//visibility:public"]) -# Helper target for the toolchain (see below): -filegroup( - name = "all_files", - srcs = glob(["**/*"]), -) - # Export headers used for clang-tidy checks. cc_library( name = "llvm_headers", @@ -98,8 +92,34 @@ DISABLE_DEBUG_SYMBOLS = select({ "@//conditions:default": False, }) +PGO_PROFILE_GENERATE_ENABLED = select({ + "@//bazel/config:pgo_profile_generate_enabled": True, + "@//conditions:default": False, +}) + +PGO_PROFILE_USE_ENABLED = select({ + "@//bazel/config:pgo_profile_use_clang_enabled": "@pgo_data//:clang_pgo_files", + "@//bazel/config:pgo_profile_use_gcc_enabled": "@pgo_data//:gcc_pgo_files", + "@//conditions:default": None, +}) + +PROPELLER_PROFILE_GENERATE_ENABLED = select({ + "@//bazel/config:propeller_profile_generate_enabled": True, + "@//conditions:default": False, +}) + LINK_FLAGS = ["-L" + flag for flag in COMMON_LINK_FLAGS] + LINKER_LINKFLAGS +# Helper target for the toolchain (see below): +filegroup( + name = "all_files", + srcs = glob(["**/*"]) + select({ + "@//bazel/config:pgo_profile_use_clang_enabled": ["@pgo_data//:clang_pgo_files"], + "@//bazel/config:pgo_profile_use_gcc_enabled": ["@pgo_data//:gcc_pgo_files"], + "@//conditions:default": [], + }) +) + feature_attrs = get_common_features_attrs() mongo_linux_cc_toolchain_config( @@ -139,6 +159,8 @@ mongo_linux_cc_toolchain_config( debug_level = DEBUG_LEVEL, disable_debug_symbols = DISABLE_DEBUG_SYMBOLS, optimization_level = feature_attrs[FEATURES_ATTR_NAMES.OPT_LEVEL], + pgo_profile_generate = PGO_PROFILE_GENERATE_ENABLED, + pgo_profile_use = PGO_PROFILE_USE_ENABLED, ) mongo_linux_cc_toolchain_config( @@ -183,6 +205,9 @@ mongo_linux_cc_toolchain_config( debug_level = DEBUG_LEVEL, disable_debug_symbols = DISABLE_DEBUG_SYMBOLS, optimization_level = feature_attrs[FEATURES_ATTR_NAMES.OPT_LEVEL], + pgo_profile_generate = PGO_PROFILE_GENERATE_ENABLED, + pgo_profile_use = PGO_PROFILE_USE_ENABLED, + propeller_profile_generate = PROPELLER_PROFILE_GENERATE_ENABLED, ) cc_toolchain( diff --git a/src/mongo/BUILD.bazel b/src/mongo/BUILD.bazel index 9bd4f43ee5a..9150d56d999 100644 --- a/src/mongo/BUILD.bazel +++ b/src/mongo/BUILD.bazel @@ -382,7 +382,8 @@ mongo_cc_library( "//bazel/config:gcov_enabled": ["MONGO_GCOV"], "//conditions:default": [], }) + select({ - "//bazel/config:pgo_profile_enabled": ["MONGO_PGO_PROFILE"], + "//bazel/config:pgo_profile_generate_clang_enabled": ["MONGO_PGO_PROFILE"], + "//bazel/config:pgo_profile_generate_gcc_enabled": ["MONGO_GCOV"], "//conditions:default": [], }), deps = [ diff --git a/src/mongo/db/BUILD.bazel b/src/mongo/db/BUILD.bazel index 52e9ad6e1d8..b1590b4eabb 100644 --- a/src/mongo/db/BUILD.bazel +++ b/src/mongo/db/BUILD.bazel @@ -3683,6 +3683,23 @@ mongo_cc_binary( srcs = [ "mongod.cpp", ], + cxxopts = select({ + "//bazel/config:pgo_profile_generate_clang_enabled": [ + "-fprofile-generate=mongod_perf", + "-fno-data-sections", + ], + "//bazel/config:pgo_profile_generate_gcc_enabled": [ + "-fprofile-generate=mongod_perf", + "-fno-data-sections", + "-fprofile-dir=mongod_perf", + ], + "//conditions:default": [], + }), + features = select({ + # We want to pass our custom -fprofile-generate so we can specify the output directory of files + "//bazel/config:pgo_profile_generate_enabled": ["-pgo_profile_generate"], + "//conditions:default": [], + }), linkopts = select({ "@platforms//os:linux": [ "-ldl", @@ -3693,6 +3710,21 @@ mongo_cc_binary( "-latomic", ], "//conditions:default": [], + }) + select({ + "//bazel/config:pgo_profile_generate_clang_enabled": [ + "-fprofile-generate=mongod_perf", + "-fno-data-sections", + ], + "//bazel/config:pgo_profile_generate_gcc_enabled": [ + "-fprofile-generate=mongod_perf", + "-fno-data-sections", + "-fprofile-dir=mongod_perf", + ], + "//conditions:default": [], + }) + select({ + "//bazel/config:bolt_profile_generate_enabled": ["-Wl,--emit-relocs"], + "//bazel/config:bolt_profile_use_enabled": ["-Wl,--emit-relocs"], + "//conditions:default": [], }), tags = [ "devcore",