mongo/buildscripts/profile_data_pr.py
Andrew Bradshaw b91befeaad SERVER-125638 Add CSPGO flag (#52870)
GitOrigin-RevId: 54a700fdd29962e111437c8917083d3982d01899
2026-05-04 18:23:46 +00:00

320 lines
12 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Script that opens a PR using a bot to update profile data links for PGO, CSPGO, and BOLT.
This updates profiling_data.bzl and is reliant on the formatting of it to not change.
Two invocation modes:
1. PGO + BOLT (original): 3 positional URLs (bolt, clang_pgo, gcc_pgo). Expects exactly
one of clang_pgo / gcc_pgo to be populated so only one is updated at a time.
2. CSPGO (--cspgo_url): updates only the CSPGO URL + checksum. Orthogonal to the PGO/BOLT
flow;
"""
import argparse
import hashlib
import os
import re
import sys
import tempfile
import requests
from github.GithubException import GithubException
from github.GithubIntegration import GithubIntegration
from jira import JIRAError
if __name__ == "__main__" and __package__ is None:
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from buildscripts.client.jiraclient import JiraAuth, JiraClient
OWNER_NAME = "10gen"
REPO_NAME = "mongo"
PROFILE_DATA_FILE_PATH = "bazel/repository_rules/profiling_data.bzl"
JIRA_SERVER = "https://jira.mongodb.org"
PROFILE_DATA_OWNING_TEAM = "Product Performance"
def get_mongo_repository(app_id, private_key):
"""
Gets the mongo github repository
"""
app = GithubIntegration(int(app_id), private_key)
installation = app.get_repo_installation(OWNER_NAME, REPO_NAME)
g = installation.get_github_for_installation()
return g.get_repo(f"{OWNER_NAME}/{REPO_NAME}")
def compute_sha256(file_path: str) -> str:
"""
Compute the sha256 hash of a file
"""
sha256 = hashlib.sha256()
with open(file_path, "rb") as f:
for block in iter(lambda: f.read(4096), b""):
sha256.update(block)
return sha256.hexdigest()
def download_file(url: str, output_location: str) -> bool:
"""
Download a file to a specific output_location and return if the file existed remotely
"""
try:
response = requests.get(url)
response.raise_for_status()
with open(output_location, "wb") as file:
file.write(response.content)
return True
except requests.exceptions.RequestException:
return False
def replace_quoted_text_in_tagged_line(text: str, tag: str, new_text: str) -> str:
"""
Replace the text between quotes in a line that starts with a specific tag
eg. FOO = "replace_this" -> FOO = "new_text"
"""
if tag not in text:
print(f"Tag: {tag} did not exist in the file.", file=sys.stderr)
sys.exit(1)
pattern = rf'({tag}.*?"(.*?)")'
return re.sub(pattern, lambda match: match.group(0).replace(match.group(2), new_text), text)
def update_bolt_info(file_content: str, new_url: str, new_checksum: str) -> str:
"""
Updates the bolt url and checksum lines in a file
"""
bolt_url_tag = "DEFAULT_BOLT_DATA_URL"
bolt_checksum_tag = "DEFAULT_BOLT_DATA_CHECKSUM"
updated_text = replace_quoted_text_in_tagged_line(file_content, bolt_url_tag, new_url)
return replace_quoted_text_in_tagged_line(updated_text, bolt_checksum_tag, new_checksum)
def update_clang_pgo_info(file_content: str, new_url: str, new_checksum: str) -> str:
"""
Updates the clang pgo url and checksum lines in a file
"""
clang_pgo_url_tag = "DEFAULT_CLANG_PGO_DATA_URL"
clang_pgo_checksum_tag = "DEFAULT_CLANG_PGO_DATA_CHECKSUM"
updated_text = replace_quoted_text_in_tagged_line(file_content, clang_pgo_url_tag, new_url)
return replace_quoted_text_in_tagged_line(updated_text, clang_pgo_checksum_tag, new_checksum)
def update_gcc_pgo_info(file_content: str, new_url: str, new_checksum: str) -> str:
"""
Updates the gcc pgo url and checksum lines in a file
"""
gcc_pgo_url_tag = "DEFAULT_GCC_PGO_DATA_URL"
gcc_pgo_checksum_tag = "DEFAULT_GCC_PGO_DATA_CHECKSUM"
updated_text = replace_quoted_text_in_tagged_line(file_content, gcc_pgo_url_tag, new_url)
return replace_quoted_text_in_tagged_line(updated_text, gcc_pgo_checksum_tag, new_checksum)
def update_clang_cspgo_info(file_content: str, new_url: str, new_checksum: str) -> str:
"""
Updates the clang cspgo url and checksum lines in a file
"""
clang_cspgo_url_tag = "DEFAULT_CLANG_CSPGO_DATA_URL"
clang_cspgo_checksum_tag = "DEFAULT_CLANG_CSPGO_DATA_CHECKSUM"
updated_text = replace_quoted_text_in_tagged_line(file_content, clang_cspgo_url_tag, new_url)
return replace_quoted_text_in_tagged_line(updated_text, clang_cspgo_checksum_tag, new_checksum)
def create_backport_ticket(version: str):
jira = JiraClient(JIRA_SERVER, JiraAuth(), dry_run=False)
jira = jira._jira
server_issue_dict = {
"project": {"key": "SERVER"},
"issuetype": {"name": "Task"},
"summary": "Update PGO profiles",
"description": "Updated PGO profile numbers for performance.",
"customfield_12751": [{"value": PROFILE_DATA_OWNING_TEAM}],
}
backport_issue_dict = {
"project": {"key": "BACKPORT"},
"issuetype": {"name": "Backport"},
"summary": f"[{version}] Update PGO profiles",
# Branch
"customfield_14166": {"value": version},
# Backport Justification
"customfield_25156": "Updated PGO profile numbers for performance.",
}
for attempt in range(3):
try:
server_issue = jira.create_issue(fields=server_issue_dict)
backport_issue = jira.create_issue(fields=backport_issue_dict)
# For some reason you cant assign a team on creation for backport tickets
backport_issue.update({"customfield_12751": [{"value": PROFILE_DATA_OWNING_TEAM}]})
jira.create_issue_link(
type="backported by", inwardIssue=server_issue.key, outwardIssue=backport_issue.key
)
break
except JIRAError as err:
print(err)
return None
return server_issue
def create_pr(target_branch: str, new_branch: str, original_file, new_content: str):
"""
Opens up a pr for a single file with new contents
"""
target_repo_branch = repo.get_branch(target_branch)
ref = f"refs/heads/{new_branch}"
try:
repo.get_branch(branch=new_branch)
except GithubException as e:
if e.status == 404:
print(f"Branch doesn't exist, creating branch {new_branch}.")
repo.create_git_ref(ref=ref, sha=target_repo_branch.commit.sha)
else:
raise
jira_ticket = "SERVER-110427"
# This is a versioned backport branch if it stats with v
if target_branch != "master" and target_branch[0] == "v":
# get v8.0 from either v8.0 or v8.0-staging
version = target_branch.split("-")[0]
new_ticket = create_backport_ticket(version)
if new_ticket:
jira_ticket = new_ticket.key
else:
jira_ticket = "[Jira Ticket Creation Broken]"
repo.update_file(
path=PROFILE_DATA_FILE_PATH,
content=new_content,
branch=new_branch,
message="Updating profile files.",
sha=original_file.sha,
)
repo.create_pull(
base=target_branch,
head=new_branch,
title=f"{jira_ticket} Update profiling data",
body="Automated PR updating the profiling data.",
)
def create_profile_data_pr(repo, args, target_branch, new_branch):
"""
Get the new text needed and create a pr for updating the profiling_data.bzl
"""
temp_dir = tempfile.mkdtemp()
bolt_file = os.path.join(temp_dir, "bolt.fdata")
clang_pgo_file = os.path.join(temp_dir, "clang_pgo.profdata")
gcc_pgo_file = os.path.join(temp_dir, "gcc_pgo.tgz")
bolt_file_exists = download_file(args.bolt_url, bolt_file)
clang_pgo_file_exists = download_file(args.clang_pgo_url, clang_pgo_file)
gcc_pgo_file_exists = download_file(args.gcc_pgo_url, gcc_pgo_file)
# These are not errors because the script can run when no files were meant to be updated.
if not bolt_file_exists:
print(f"Bolt file did not exist at {args.bolt_url}. Not creating PR.")
sys.exit(0)
if clang_pgo_file_exists and gcc_pgo_file_exists:
print(
f"Both clang and gcc had pgo files that existed. Clang: {args.clang_pgo_url} GCC: {args.gcc_pgo_url}. Only one should be updated at a time. Not creating PR."
)
sys.exit(1)
if not clang_pgo_file_exists and not gcc_pgo_file_exists:
print(
f"Neither clang nor gcc had pgo files that existed at either {args.clang_pgo_url} or {args.gcc_pgo_url}. Not creating PR."
)
sys.exit(0)
profiling_data_file = repo.get_contents(
PROFILE_DATA_FILE_PATH, ref=f"refs/heads/{target_branch}"
)
profiling_data_file_content = profiling_data_file.decoded_content.decode()
profiling_file_updated_text = update_bolt_info(
profiling_data_file_content, args.bolt_url, compute_sha256(bolt_file)
)
if clang_pgo_file_exists:
profiling_file_updated_text = update_clang_pgo_info(
profiling_file_updated_text, args.clang_pgo_url, compute_sha256(clang_pgo_file)
)
else:
profiling_file_updated_text = update_gcc_pgo_info(
profiling_file_updated_text, args.gcc_pgo_url, compute_sha256(gcc_pgo_file)
)
create_pr(target_branch, new_branch, profiling_data_file, profiling_file_updated_text)
def create_cspgo_pr(repo, cspgo_url: str, target_branch: str, new_branch: str):
"""
Download the cspgo profdata, compute its checksum, and open a PR updating only
the CSPGO url/checksum in profiling_data.bzl.
"""
temp_dir = tempfile.mkdtemp()
cspgo_file = os.path.join(temp_dir, "clang_cspgo.profdata")
if not download_file(cspgo_url, cspgo_file):
print(f"CSPGO file did not exist at {cspgo_url}. Not creating PR.")
sys.exit(0)
profiling_data_file = repo.get_contents(
PROFILE_DATA_FILE_PATH, ref=f"refs/heads/{target_branch}"
)
profiling_data_file_content = profiling_data_file.decoded_content.decode()
profiling_file_updated_text = update_clang_cspgo_info(
profiling_data_file_content, cspgo_url, compute_sha256(cspgo_file)
)
create_pr(target_branch, new_branch, profiling_data_file, profiling_file_updated_text)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="This script uses bolt file url, clang pgo file url and gcc pgo file url to create a PR updating the links to these files. Pass --cspgo_url instead to update only the CSPGO entries."
)
parser.add_argument(
"bolt_url", nargs="?", help="URL that BOLT data was uploaded to.", default=None
)
parser.add_argument(
"clang_pgo_url", nargs="?", help="URL that clang pgo data was uploaded to.", default=None
)
parser.add_argument(
"gcc_pgo_url", nargs="?", help="URL that gcc pgo data was uploaded to.", default=None
)
parser.add_argument("target_branch", help="The branch you want to create a PR into.")
parser.add_argument("new_branch", help="The new branch to create a PR from.")
parser.add_argument(
"--cspgo_url",
help="URL that clang cspgo data was uploaded to. When set, only the CSPGO entries are updated.",
default=None,
)
parser.add_argument(
"--app_id", help="App ID used for authentication.", default=os.getenv("MONGO_PR_BOT_APP_ID")
)
parser.add_argument(
"--private_key",
help="Key to use for authentication.",
default=os.getenv("MONGO_PR_BOT_PRIVATE_KEY"),
)
args = parser.parse_args()
if not args.app_id or not args.private_key:
parser.error(
"Must define --app-id or env MONGO_PR_BOT_APP_ID and --private-key or env MONGO_PR_BOT_PRIVATE_KEY."
)
if not args.cspgo_url and not (args.bolt_url and args.clang_pgo_url and args.gcc_pgo_url):
parser.error(
"Must provide either --cspgo_url for a CSPGO-only PR, or bolt_url/clang_pgo_url/gcc_pgo_url positional args for a PGO+BOLT PR."
)
# Replace spaces with newline, if applicable
private_key = (
args.private_key[:31] + args.private_key[31:-29].replace(" ", "\n") + args.private_key[-29:]
)
repo = get_mongo_repository(args.app_id, private_key)
if args.cspgo_url:
create_cspgo_pr(repo, args.cspgo_url, args.target_branch, args.new_branch)
else:
create_profile_data_pr(repo, args, args.target_branch, args.new_branch)