mongo/bazel/repository_rules/bolt_data.bzl
Andrew Bradshaw bf64d875fe SERVER-113559 Fix bolt instrumentation (#52511)
GitOrigin-RevId: 59ae821cd73286bf21cb35fcc43a7eefb78b6d7a
2026-04-28 18:29:56 +00:00

137 lines
6.1 KiB
Python

load("//bazel/repository_rules:pgo_data.bzl", "get_all_files")
load("//bazel/repository_rules:profiling_data.bzl", "DEFAULT_BOLT_DATA_CHECKSUM", "DEFAULT_BOLT_DATA_URL")
# This is used so we can tell when the build created new bolt files vs. using ones from stored url
CREATED_FILEGROUP = """
filegroup(
name = "created_bolt_fdata",
srcs = glob(["**/*.fdata"]),
)
"""
EMPTY_CREATED_FILEGROUP = """
filegroup(
name = "created_bolt_fdata",
srcs = [],
target_compatible_with = ["@platforms//:incompatible"],
)
"""
def _setup_bolt_data(repository_ctx):
bolt_fdata_filename = "bolt.fdata"
# This potentially contains multiple urls, separated by a | eg. url1|url2|url3
bolt_profile_urls_env = repository_ctx.os.environ.get("bolt_profile_url", None)
# This is the binary the bolt data came from
bolt_binary_env = repository_ctx.os.environ.get("bolt_binary_url", None)
# Incase you want to bolt a binary instead of the main binary mongod
bolt_binary_name = repository_ctx.os.environ.get("bolt_binary_name", None)
created_files = EMPTY_CREATED_FILEGROUP
# Perf2bolt will use the path to call the perf tool
path_env = repository_ctx.os.environ.get("PATH", None)
perf_path_env = str(repository_ctx.path(repository_ctx.attr._perf_binary).dirname) + ":" + path_env
if bolt_binary_name == None:
bolt_binary_name = "mongod"
# We should be using the default bolt data because we are not being passed bolt data
if bolt_profile_urls_env == None and bolt_binary_env == None:
repository_ctx.download(DEFAULT_BOLT_DATA_URL, bolt_fdata_filename, sha256 = DEFAULT_BOLT_DATA_CHECKSUM)
# This is mainly used for the bolt training pipeline
else:
# 2 main scenarios
# 1. They are passing us a single bolt .fdata file, just download the file
# 2. They are passing us one or more unprocessed .tgz files with .data files inside
# the .data files need to be turned into fdata files using perf2bolt, which then can be merged
# into a single fdata file using merge-fdata
bolt_urls = bolt_profile_urls_env.split("|")
created_files = CREATED_FILEGROUP
# They are passing us a single bolt fdata file, just download the file
if len(bolt_urls) == 1 and bolt_urls[0].endswith(".fdata"):
print("Downloading single fdata file for bolt: " + bolt_urls[0])
repository_ctx.download(bolt_profile_urls_env, bolt_fdata_filename)
else:
url_num = 0
print("Downloading multiple bolt files.")
need_perf2bolt = False
for url in bolt_urls:
if url.endswith(".fdata"):
print("Downloading fdata: " + url)
repository_ctx.download(url, str(url_num) + "/bolt.fdata")
else:
print("Downloading and extracting: " + url)
repository_ctx.download_and_extract(url, str(url_num))
need_perf2bolt = True
url_num += 1
files = get_all_files(repository_ctx.path("."), 20)
data_files = [file for file in files if file.basename.endswith(".data")]
fdata_files = [file for file in files if file.basename.endswith(".fdata")]
# This is scenario 2, we need to turn these data files into fdata files using perf2bolt
if need_perf2bolt and len(data_files) > 0:
# Download the mongod binary for perf2bolt
print("Download the mongo binaries from: " + bolt_binary_env)
repository_ctx.download_and_extract(bolt_binary_env, "binaries")
files = get_all_files(repository_ctx.path("."), 20)
binary = [file for file in files if file.basename.endswith(bolt_binary_name)][0]
processed_fdata_files = 0
print("Found data files, turning them into fdata files with perf2bolt.")
for file in data_files:
fdata_file_name = "bolt" + str(processed_fdata_files) + ".fdata"
arguments = [repository_ctx.attr._perf2bolt_binary, "-nl", "-p", file, "-o", fdata_file_name, binary]
result = repository_ctx.execute(arguments, environment = {"PATH": perf_path_env})
print(result.stdout)
if result.return_code != 0:
print(result.stderr)
fail("Failed to run perf2bolt.")
processed_fdata_files += 1
fdata_files.append(fdata_file_name)
# If we have multiple fdata files we need to merge them together using merge-fdata
if len(fdata_files) > 1:
print("Merging fdata files with merge-fdata.")
arguments = [repository_ctx.attr._merge_fdata_binary, "-o", bolt_fdata_filename] + fdata_files
result = repository_ctx.execute(arguments)
print(result.stdout)
if result.return_code != 0:
print(result.stderr)
fail("Failed to run merge-fdata.")
# clean up the pre-merged fdata files
for file in fdata_files:
repository_ctx.delete(file)
repository_ctx.file(
"BUILD.bazel",
"""
package(default_visibility = ["//visibility:public"])
filegroup(
name = "bolt_fdata",
srcs = glob(["**/*.fdata"]),
)
""" + created_files,
)
setup_bolt_data = repository_rule(
implementation = _setup_bolt_data,
environ = ["bolt_profile_url", "bolt_binary_url", "bolt_binary_name", "PATH"],
attrs = {
# There is a bug where the repo rule does not properly evaluate these labels so we have to list the full path to the binaries
"_merge_fdata_binary": attr.label(allow_single_file = True, default = "@bolt_binaries//:bolt/bin/merge-fdata", executable = True, cfg = "host"),
"_perf2bolt_binary": attr.label(allow_single_file = True, default = "@bolt_binaries//:bolt/bin/perf2bolt", executable = True, cfg = "host"),
"_perf_binary": attr.label(allow_single_file = True, default = "@bolt_binaries//:bolt/bin/perf", executable = True, cfg = "host"),
},
)