#!/usr/bin/env python3 """ Script that opens a PR using a bot to update profile data links for PGO, CSPGO, and BOLT. This updates profiling_data.bzl and is reliant on the formatting of it to not change. Two invocation modes: 1. PGO + BOLT (original): 3 positional URLs (bolt, clang_pgo, gcc_pgo). Expects exactly one of clang_pgo / gcc_pgo to be populated so only one is updated at a time. 2. CSPGO (--cspgo_url): updates only the CSPGO URL + checksum. Orthogonal to the PGO/BOLT flow; """ import argparse import hashlib import os import re import sys import tempfile import requests from github.GithubException import GithubException from github.GithubIntegration import GithubIntegration from jira import JIRAError if __name__ == "__main__" and __package__ is None: sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from buildscripts.client.jiraclient import JiraAuth, JiraClient OWNER_NAME = "10gen" REPO_NAME = "mongo" PROFILE_DATA_FILE_PATH = "bazel/repository_rules/profiling_data.bzl" JIRA_SERVER = "https://jira.mongodb.org" PROFILE_DATA_OWNING_TEAM = "Product Performance" def get_mongo_repository(app_id, private_key): """ Gets the mongo github repository """ app = GithubIntegration(int(app_id), private_key) installation = app.get_repo_installation(OWNER_NAME, REPO_NAME) g = installation.get_github_for_installation() return g.get_repo(f"{OWNER_NAME}/{REPO_NAME}") def compute_sha256(file_path: str) -> str: """ Compute the sha256 hash of a file """ sha256 = hashlib.sha256() with open(file_path, "rb") as f: for block in iter(lambda: f.read(4096), b""): sha256.update(block) return sha256.hexdigest() def download_file(url: str, output_location: str) -> bool: """ Download a file to a specific output_location and return if the file existed remotely """ try: response = requests.get(url) response.raise_for_status() with open(output_location, "wb") as file: file.write(response.content) return True except requests.exceptions.RequestException: return False def replace_quoted_text_in_tagged_line(text: str, tag: str, new_text: str) -> str: """ Replace the text between quotes in a line that starts with a specific tag eg. FOO = "replace_this" -> FOO = "new_text" """ if tag not in text: print(f"Tag: {tag} did not exist in the file.", file=sys.stderr) sys.exit(1) pattern = rf'({tag}.*?"(.*?)")' return re.sub(pattern, lambda match: match.group(0).replace(match.group(2), new_text), text) def update_bolt_info(file_content: str, new_url: str, new_checksum: str) -> str: """ Updates the bolt url and checksum lines in a file """ bolt_url_tag = "DEFAULT_BOLT_DATA_URL" bolt_checksum_tag = "DEFAULT_BOLT_DATA_CHECKSUM" updated_text = replace_quoted_text_in_tagged_line(file_content, bolt_url_tag, new_url) return replace_quoted_text_in_tagged_line(updated_text, bolt_checksum_tag, new_checksum) def update_clang_pgo_info(file_content: str, new_url: str, new_checksum: str) -> str: """ Updates the clang pgo url and checksum lines in a file """ clang_pgo_url_tag = "DEFAULT_CLANG_PGO_DATA_URL" clang_pgo_checksum_tag = "DEFAULT_CLANG_PGO_DATA_CHECKSUM" updated_text = replace_quoted_text_in_tagged_line(file_content, clang_pgo_url_tag, new_url) return replace_quoted_text_in_tagged_line(updated_text, clang_pgo_checksum_tag, new_checksum) def update_gcc_pgo_info(file_content: str, new_url: str, new_checksum: str) -> str: """ Updates the gcc pgo url and checksum lines in a file """ gcc_pgo_url_tag = "DEFAULT_GCC_PGO_DATA_URL" gcc_pgo_checksum_tag = "DEFAULT_GCC_PGO_DATA_CHECKSUM" updated_text = replace_quoted_text_in_tagged_line(file_content, gcc_pgo_url_tag, new_url) return replace_quoted_text_in_tagged_line(updated_text, gcc_pgo_checksum_tag, new_checksum) def update_clang_cspgo_info(file_content: str, new_url: str, new_checksum: str) -> str: """ Updates the clang cspgo url and checksum lines in a file """ clang_cspgo_url_tag = "DEFAULT_CLANG_CSPGO_DATA_URL" clang_cspgo_checksum_tag = "DEFAULT_CLANG_CSPGO_DATA_CHECKSUM" updated_text = replace_quoted_text_in_tagged_line(file_content, clang_cspgo_url_tag, new_url) return replace_quoted_text_in_tagged_line(updated_text, clang_cspgo_checksum_tag, new_checksum) def create_backport_ticket(version: str): jira = JiraClient(JIRA_SERVER, JiraAuth(), dry_run=False) jira = jira._jira server_issue_dict = { "project": {"key": "SERVER"}, "issuetype": {"name": "Task"}, "summary": "Update PGO profiles", "description": "Updated PGO profile numbers for performance.", "customfield_12751": [{"value": PROFILE_DATA_OWNING_TEAM}], } backport_issue_dict = { "project": {"key": "BACKPORT"}, "issuetype": {"name": "Backport"}, "summary": f"[{version}] Update PGO profiles", # Branch "customfield_14166": {"value": version}, # Backport Justification "customfield_25156": "Updated PGO profile numbers for performance.", } for attempt in range(3): try: server_issue = jira.create_issue(fields=server_issue_dict) backport_issue = jira.create_issue(fields=backport_issue_dict) # For some reason you cant assign a team on creation for backport tickets backport_issue.update({"customfield_12751": [{"value": PROFILE_DATA_OWNING_TEAM}]}) jira.create_issue_link( type="backported by", inwardIssue=server_issue.key, outwardIssue=backport_issue.key ) break except JIRAError as err: print(err) return None return server_issue def create_pr(target_branch: str, new_branch: str, original_file, new_content: str): """ Opens up a pr for a single file with new contents """ target_repo_branch = repo.get_branch(target_branch) ref = f"refs/heads/{new_branch}" try: repo.get_branch(branch=new_branch) except GithubException as e: if e.status == 404: print(f"Branch doesn't exist, creating branch {new_branch}.") repo.create_git_ref(ref=ref, sha=target_repo_branch.commit.sha) else: raise jira_ticket = "SERVER-110427" # This is a versioned backport branch if it stats with v if target_branch != "master" and target_branch[0] == "v": # get v8.0 from either v8.0 or v8.0-staging version = target_branch.split("-")[0] new_ticket = create_backport_ticket(version) if new_ticket: jira_ticket = new_ticket.key else: jira_ticket = "[Jira Ticket Creation Broken]" repo.update_file( path=PROFILE_DATA_FILE_PATH, content=new_content, branch=new_branch, message="Updating profile files.", sha=original_file.sha, ) repo.create_pull( base=target_branch, head=new_branch, title=f"{jira_ticket} Update profiling data", body="Automated PR updating the profiling data.", ) def create_profile_data_pr(repo, args, target_branch, new_branch): """ Get the new text needed and create a pr for updating the profiling_data.bzl """ temp_dir = tempfile.mkdtemp() bolt_file = os.path.join(temp_dir, "bolt.fdata") clang_pgo_file = os.path.join(temp_dir, "clang_pgo.profdata") gcc_pgo_file = os.path.join(temp_dir, "gcc_pgo.tgz") bolt_file_exists = download_file(args.bolt_url, bolt_file) clang_pgo_file_exists = download_file(args.clang_pgo_url, clang_pgo_file) gcc_pgo_file_exists = download_file(args.gcc_pgo_url, gcc_pgo_file) # These are not errors because the script can run when no files were meant to be updated. if not bolt_file_exists: print(f"Bolt file did not exist at {args.bolt_url}. Not creating PR.") sys.exit(0) if clang_pgo_file_exists and gcc_pgo_file_exists: print( f"Both clang and gcc had pgo files that existed. Clang: {args.clang_pgo_url} GCC: {args.gcc_pgo_url}. Only one should be updated at a time. Not creating PR." ) sys.exit(1) if not clang_pgo_file_exists and not gcc_pgo_file_exists: print( f"Neither clang nor gcc had pgo files that existed at either {args.clang_pgo_url} or {args.gcc_pgo_url}. Not creating PR." ) sys.exit(0) profiling_data_file = repo.get_contents( PROFILE_DATA_FILE_PATH, ref=f"refs/heads/{target_branch}" ) profiling_data_file_content = profiling_data_file.decoded_content.decode() profiling_file_updated_text = update_bolt_info( profiling_data_file_content, args.bolt_url, compute_sha256(bolt_file) ) if clang_pgo_file_exists: profiling_file_updated_text = update_clang_pgo_info( profiling_file_updated_text, args.clang_pgo_url, compute_sha256(clang_pgo_file) ) else: profiling_file_updated_text = update_gcc_pgo_info( profiling_file_updated_text, args.gcc_pgo_url, compute_sha256(gcc_pgo_file) ) create_pr(target_branch, new_branch, profiling_data_file, profiling_file_updated_text) def create_cspgo_pr(repo, cspgo_url: str, target_branch: str, new_branch: str): """ Download the cspgo profdata, compute its checksum, and open a PR updating only the CSPGO url/checksum in profiling_data.bzl. """ temp_dir = tempfile.mkdtemp() cspgo_file = os.path.join(temp_dir, "clang_cspgo.profdata") if not download_file(cspgo_url, cspgo_file): print(f"CSPGO file did not exist at {cspgo_url}. Not creating PR.") sys.exit(0) profiling_data_file = repo.get_contents( PROFILE_DATA_FILE_PATH, ref=f"refs/heads/{target_branch}" ) profiling_data_file_content = profiling_data_file.decoded_content.decode() profiling_file_updated_text = update_clang_cspgo_info( profiling_data_file_content, cspgo_url, compute_sha256(cspgo_file) ) create_pr(target_branch, new_branch, profiling_data_file, profiling_file_updated_text) if __name__ == "__main__": parser = argparse.ArgumentParser( description="This script uses bolt file url, clang pgo file url and gcc pgo file url to create a PR updating the links to these files. Pass --cspgo_url instead to update only the CSPGO entries." ) parser.add_argument( "bolt_url", nargs="?", help="URL that BOLT data was uploaded to.", default=None ) parser.add_argument( "clang_pgo_url", nargs="?", help="URL that clang pgo data was uploaded to.", default=None ) parser.add_argument( "gcc_pgo_url", nargs="?", help="URL that gcc pgo data was uploaded to.", default=None ) parser.add_argument("target_branch", help="The branch you want to create a PR into.") parser.add_argument("new_branch", help="The new branch to create a PR from.") parser.add_argument( "--cspgo_url", help="URL that clang cspgo data was uploaded to. When set, only the CSPGO entries are updated.", default=None, ) parser.add_argument( "--app_id", help="App ID used for authentication.", default=os.getenv("MONGO_PR_BOT_APP_ID") ) parser.add_argument( "--private_key", help="Key to use for authentication.", default=os.getenv("MONGO_PR_BOT_PRIVATE_KEY"), ) args = parser.parse_args() if not args.app_id or not args.private_key: parser.error( "Must define --app-id or env MONGO_PR_BOT_APP_ID and --private-key or env MONGO_PR_BOT_PRIVATE_KEY." ) if not args.cspgo_url and not (args.bolt_url and args.clang_pgo_url and args.gcc_pgo_url): parser.error( "Must provide either --cspgo_url for a CSPGO-only PR, or bolt_url/clang_pgo_url/gcc_pgo_url positional args for a PGO+BOLT PR." ) # Replace spaces with newline, if applicable private_key = ( args.private_key[:31] + args.private_key[31:-29].replace(" ", "\n") + args.private_key[-29:] ) repo = get_mongo_repository(args.app_id, private_key) if args.cspgo_url: create_cspgo_pr(repo, args.cspgo_url, args.target_branch, args.new_branch) else: create_profile_data_pr(repo, args, args.target_branch, args.new_branch)