320 lines
12 KiB
Python
Executable File
320 lines
12 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Script that opens a PR using a bot to update profile data links for PGO, CSPGO, and BOLT.
|
|
This updates profiling_data.bzl and is reliant on the formatting of it to not change.
|
|
|
|
Two invocation modes:
|
|
1. PGO + BOLT (original): 3 positional URLs (bolt, clang_pgo, gcc_pgo). Expects exactly
|
|
one of clang_pgo / gcc_pgo to be populated so only one is updated at a time.
|
|
2. CSPGO (--cspgo_url): updates only the CSPGO URL + checksum. Orthogonal to the PGO/BOLT
|
|
flow;
|
|
"""
|
|
|
|
import argparse
|
|
import hashlib
|
|
import os
|
|
import re
|
|
import sys
|
|
import tempfile
|
|
|
|
import requests
|
|
from github.GithubException import GithubException
|
|
from github.GithubIntegration import GithubIntegration
|
|
from jira import JIRAError
|
|
|
|
if __name__ == "__main__" and __package__ is None:
|
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
from buildscripts.client.jiraclient import JiraAuth, JiraClient
|
|
|
|
OWNER_NAME = "10gen"
|
|
REPO_NAME = "mongo"
|
|
PROFILE_DATA_FILE_PATH = "bazel/repository_rules/profiling_data.bzl"
|
|
JIRA_SERVER = "https://jira.mongodb.org"
|
|
PROFILE_DATA_OWNING_TEAM = "Product Performance"
|
|
|
|
|
|
def get_mongo_repository(app_id, private_key):
|
|
"""
|
|
Gets the mongo github repository
|
|
"""
|
|
app = GithubIntegration(int(app_id), private_key)
|
|
installation = app.get_repo_installation(OWNER_NAME, REPO_NAME)
|
|
g = installation.get_github_for_installation()
|
|
return g.get_repo(f"{OWNER_NAME}/{REPO_NAME}")
|
|
|
|
|
|
def compute_sha256(file_path: str) -> str:
|
|
"""
|
|
Compute the sha256 hash of a file
|
|
"""
|
|
sha256 = hashlib.sha256()
|
|
with open(file_path, "rb") as f:
|
|
for block in iter(lambda: f.read(4096), b""):
|
|
sha256.update(block)
|
|
return sha256.hexdigest()
|
|
|
|
|
|
def download_file(url: str, output_location: str) -> bool:
|
|
"""
|
|
Download a file to a specific output_location and return if the file existed remotely
|
|
"""
|
|
try:
|
|
response = requests.get(url)
|
|
response.raise_for_status()
|
|
with open(output_location, "wb") as file:
|
|
file.write(response.content)
|
|
return True
|
|
except requests.exceptions.RequestException:
|
|
return False
|
|
|
|
|
|
def replace_quoted_text_in_tagged_line(text: str, tag: str, new_text: str) -> str:
|
|
"""
|
|
Replace the text between quotes in a line that starts with a specific tag
|
|
eg. FOO = "replace_this" -> FOO = "new_text"
|
|
"""
|
|
if tag not in text:
|
|
print(f"Tag: {tag} did not exist in the file.", file=sys.stderr)
|
|
sys.exit(1)
|
|
pattern = rf'({tag}.*?"(.*?)")'
|
|
return re.sub(pattern, lambda match: match.group(0).replace(match.group(2), new_text), text)
|
|
|
|
|
|
def update_bolt_info(file_content: str, new_url: str, new_checksum: str) -> str:
|
|
"""
|
|
Updates the bolt url and checksum lines in a file
|
|
"""
|
|
bolt_url_tag = "DEFAULT_BOLT_DATA_URL"
|
|
bolt_checksum_tag = "DEFAULT_BOLT_DATA_CHECKSUM"
|
|
updated_text = replace_quoted_text_in_tagged_line(file_content, bolt_url_tag, new_url)
|
|
return replace_quoted_text_in_tagged_line(updated_text, bolt_checksum_tag, new_checksum)
|
|
|
|
|
|
def update_clang_pgo_info(file_content: str, new_url: str, new_checksum: str) -> str:
|
|
"""
|
|
Updates the clang pgo url and checksum lines in a file
|
|
"""
|
|
clang_pgo_url_tag = "DEFAULT_CLANG_PGO_DATA_URL"
|
|
clang_pgo_checksum_tag = "DEFAULT_CLANG_PGO_DATA_CHECKSUM"
|
|
updated_text = replace_quoted_text_in_tagged_line(file_content, clang_pgo_url_tag, new_url)
|
|
return replace_quoted_text_in_tagged_line(updated_text, clang_pgo_checksum_tag, new_checksum)
|
|
|
|
|
|
def update_gcc_pgo_info(file_content: str, new_url: str, new_checksum: str) -> str:
|
|
"""
|
|
Updates the gcc pgo url and checksum lines in a file
|
|
"""
|
|
gcc_pgo_url_tag = "DEFAULT_GCC_PGO_DATA_URL"
|
|
gcc_pgo_checksum_tag = "DEFAULT_GCC_PGO_DATA_CHECKSUM"
|
|
updated_text = replace_quoted_text_in_tagged_line(file_content, gcc_pgo_url_tag, new_url)
|
|
return replace_quoted_text_in_tagged_line(updated_text, gcc_pgo_checksum_tag, new_checksum)
|
|
|
|
|
|
def update_clang_cspgo_info(file_content: str, new_url: str, new_checksum: str) -> str:
|
|
"""
|
|
Updates the clang cspgo url and checksum lines in a file
|
|
"""
|
|
clang_cspgo_url_tag = "DEFAULT_CLANG_CSPGO_DATA_URL"
|
|
clang_cspgo_checksum_tag = "DEFAULT_CLANG_CSPGO_DATA_CHECKSUM"
|
|
updated_text = replace_quoted_text_in_tagged_line(file_content, clang_cspgo_url_tag, new_url)
|
|
return replace_quoted_text_in_tagged_line(updated_text, clang_cspgo_checksum_tag, new_checksum)
|
|
|
|
|
|
def create_backport_ticket(version: str):
|
|
jira = JiraClient(JIRA_SERVER, JiraAuth(), dry_run=False)
|
|
jira = jira._jira
|
|
server_issue_dict = {
|
|
"project": {"key": "SERVER"},
|
|
"issuetype": {"name": "Task"},
|
|
"summary": "Update PGO profiles",
|
|
"description": "Updated PGO profile numbers for performance.",
|
|
"customfield_12751": [{"value": PROFILE_DATA_OWNING_TEAM}],
|
|
}
|
|
backport_issue_dict = {
|
|
"project": {"key": "BACKPORT"},
|
|
"issuetype": {"name": "Backport"},
|
|
"summary": f"[{version}] Update PGO profiles",
|
|
# Branch
|
|
"customfield_14166": {"value": version},
|
|
# Backport Justification
|
|
"customfield_25156": "Updated PGO profile numbers for performance.",
|
|
}
|
|
for attempt in range(3):
|
|
try:
|
|
server_issue = jira.create_issue(fields=server_issue_dict)
|
|
backport_issue = jira.create_issue(fields=backport_issue_dict)
|
|
# For some reason you cant assign a team on creation for backport tickets
|
|
backport_issue.update({"customfield_12751": [{"value": PROFILE_DATA_OWNING_TEAM}]})
|
|
jira.create_issue_link(
|
|
type="backported by", inwardIssue=server_issue.key, outwardIssue=backport_issue.key
|
|
)
|
|
break
|
|
except JIRAError as err:
|
|
print(err)
|
|
return None
|
|
return server_issue
|
|
|
|
|
|
def create_pr(target_branch: str, new_branch: str, original_file, new_content: str):
|
|
"""
|
|
Opens up a pr for a single file with new contents
|
|
"""
|
|
target_repo_branch = repo.get_branch(target_branch)
|
|
ref = f"refs/heads/{new_branch}"
|
|
try:
|
|
repo.get_branch(branch=new_branch)
|
|
except GithubException as e:
|
|
if e.status == 404:
|
|
print(f"Branch doesn't exist, creating branch {new_branch}.")
|
|
repo.create_git_ref(ref=ref, sha=target_repo_branch.commit.sha)
|
|
else:
|
|
raise
|
|
|
|
jira_ticket = "SERVER-110427"
|
|
# This is a versioned backport branch if it stats with v
|
|
if target_branch != "master" and target_branch[0] == "v":
|
|
# get v8.0 from either v8.0 or v8.0-staging
|
|
version = target_branch.split("-")[0]
|
|
new_ticket = create_backport_ticket(version)
|
|
if new_ticket:
|
|
jira_ticket = new_ticket.key
|
|
else:
|
|
jira_ticket = "[Jira Ticket Creation Broken]"
|
|
|
|
repo.update_file(
|
|
path=PROFILE_DATA_FILE_PATH,
|
|
content=new_content,
|
|
branch=new_branch,
|
|
message="Updating profile files.",
|
|
sha=original_file.sha,
|
|
)
|
|
repo.create_pull(
|
|
base=target_branch,
|
|
head=new_branch,
|
|
title=f"{jira_ticket} Update profiling data",
|
|
body="Automated PR updating the profiling data.",
|
|
)
|
|
|
|
|
|
def create_profile_data_pr(repo, args, target_branch, new_branch):
|
|
"""
|
|
Get the new text needed and create a pr for updating the profiling_data.bzl
|
|
"""
|
|
temp_dir = tempfile.mkdtemp()
|
|
bolt_file = os.path.join(temp_dir, "bolt.fdata")
|
|
clang_pgo_file = os.path.join(temp_dir, "clang_pgo.profdata")
|
|
gcc_pgo_file = os.path.join(temp_dir, "gcc_pgo.tgz")
|
|
|
|
bolt_file_exists = download_file(args.bolt_url, bolt_file)
|
|
clang_pgo_file_exists = download_file(args.clang_pgo_url, clang_pgo_file)
|
|
gcc_pgo_file_exists = download_file(args.gcc_pgo_url, gcc_pgo_file)
|
|
|
|
# These are not errors because the script can run when no files were meant to be updated.
|
|
if not bolt_file_exists:
|
|
print(f"Bolt file did not exist at {args.bolt_url}. Not creating PR.")
|
|
sys.exit(0)
|
|
|
|
if clang_pgo_file_exists and gcc_pgo_file_exists:
|
|
print(
|
|
f"Both clang and gcc had pgo files that existed. Clang: {args.clang_pgo_url} GCC: {args.gcc_pgo_url}. Only one should be updated at a time. Not creating PR."
|
|
)
|
|
sys.exit(1)
|
|
|
|
if not clang_pgo_file_exists and not gcc_pgo_file_exists:
|
|
print(
|
|
f"Neither clang nor gcc had pgo files that existed at either {args.clang_pgo_url} or {args.gcc_pgo_url}. Not creating PR."
|
|
)
|
|
sys.exit(0)
|
|
|
|
profiling_data_file = repo.get_contents(
|
|
PROFILE_DATA_FILE_PATH, ref=f"refs/heads/{target_branch}"
|
|
)
|
|
profiling_data_file_content = profiling_data_file.decoded_content.decode()
|
|
|
|
profiling_file_updated_text = update_bolt_info(
|
|
profiling_data_file_content, args.bolt_url, compute_sha256(bolt_file)
|
|
)
|
|
|
|
if clang_pgo_file_exists:
|
|
profiling_file_updated_text = update_clang_pgo_info(
|
|
profiling_file_updated_text, args.clang_pgo_url, compute_sha256(clang_pgo_file)
|
|
)
|
|
else:
|
|
profiling_file_updated_text = update_gcc_pgo_info(
|
|
profiling_file_updated_text, args.gcc_pgo_url, compute_sha256(gcc_pgo_file)
|
|
)
|
|
|
|
create_pr(target_branch, new_branch, profiling_data_file, profiling_file_updated_text)
|
|
|
|
|
|
def create_cspgo_pr(repo, cspgo_url: str, target_branch: str, new_branch: str):
|
|
"""
|
|
Download the cspgo profdata, compute its checksum, and open a PR updating only
|
|
the CSPGO url/checksum in profiling_data.bzl.
|
|
"""
|
|
temp_dir = tempfile.mkdtemp()
|
|
cspgo_file = os.path.join(temp_dir, "clang_cspgo.profdata")
|
|
|
|
if not download_file(cspgo_url, cspgo_file):
|
|
print(f"CSPGO file did not exist at {cspgo_url}. Not creating PR.")
|
|
sys.exit(0)
|
|
|
|
profiling_data_file = repo.get_contents(
|
|
PROFILE_DATA_FILE_PATH, ref=f"refs/heads/{target_branch}"
|
|
)
|
|
profiling_data_file_content = profiling_data_file.decoded_content.decode()
|
|
|
|
profiling_file_updated_text = update_clang_cspgo_info(
|
|
profiling_data_file_content, cspgo_url, compute_sha256(cspgo_file)
|
|
)
|
|
|
|
create_pr(target_branch, new_branch, profiling_data_file, profiling_file_updated_text)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(
|
|
description="This script uses bolt file url, clang pgo file url and gcc pgo file url to create a PR updating the links to these files. Pass --cspgo_url instead to update only the CSPGO entries."
|
|
)
|
|
parser.add_argument(
|
|
"bolt_url", nargs="?", help="URL that BOLT data was uploaded to.", default=None
|
|
)
|
|
parser.add_argument(
|
|
"clang_pgo_url", nargs="?", help="URL that clang pgo data was uploaded to.", default=None
|
|
)
|
|
parser.add_argument(
|
|
"gcc_pgo_url", nargs="?", help="URL that gcc pgo data was uploaded to.", default=None
|
|
)
|
|
parser.add_argument("target_branch", help="The branch you want to create a PR into.")
|
|
parser.add_argument("new_branch", help="The new branch to create a PR from.")
|
|
parser.add_argument(
|
|
"--cspgo_url",
|
|
help="URL that clang cspgo data was uploaded to. When set, only the CSPGO entries are updated.",
|
|
default=None,
|
|
)
|
|
parser.add_argument(
|
|
"--app_id", help="App ID used for authentication.", default=os.getenv("MONGO_PR_BOT_APP_ID")
|
|
)
|
|
parser.add_argument(
|
|
"--private_key",
|
|
help="Key to use for authentication.",
|
|
default=os.getenv("MONGO_PR_BOT_PRIVATE_KEY"),
|
|
)
|
|
args = parser.parse_args()
|
|
if not args.app_id or not args.private_key:
|
|
parser.error(
|
|
"Must define --app-id or env MONGO_PR_BOT_APP_ID and --private-key or env MONGO_PR_BOT_PRIVATE_KEY."
|
|
)
|
|
if not args.cspgo_url and not (args.bolt_url and args.clang_pgo_url and args.gcc_pgo_url):
|
|
parser.error(
|
|
"Must provide either --cspgo_url for a CSPGO-only PR, or bolt_url/clang_pgo_url/gcc_pgo_url positional args for a PGO+BOLT PR."
|
|
)
|
|
# Replace spaces with newline, if applicable
|
|
private_key = (
|
|
args.private_key[:31] + args.private_key[31:-29].replace(" ", "\n") + args.private_key[-29:]
|
|
)
|
|
repo = get_mongo_repository(args.app_id, private_key)
|
|
if args.cspgo_url:
|
|
create_cspgo_pr(repo, args.cspgo_url, args.target_branch, args.new_branch)
|
|
else:
|
|
create_profile_data_pr(repo, args, args.target_branch, args.new_branch)
|