#!/usr/bin/env python3 import json import os import sys # Get relative imports to work when the package is not installed on the PYTHONPATH. if __name__ == "__main__" and __package__ is None: sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from buildscripts.ciconfig.evergreen import parse_evergreen_file from buildscripts.resmokelib.utils import evergreen_conn FILE_SIZE_THRESHOLD_IN_BYTES = 16 * 1024 * 1024 # 16MB FALLBACK_TASK_COUNT_THRESHOLD = 50_000 # if we can't reach Evergreen API for any reason # buffer to avoid hitting the limit exactly and leave room for # tasks generated by tasks other than version_gen TASK_BUFFER = 1500 def validate_file_size(file_path: str) -> None: """Validate that the generated tasks config file size is below the threshold. This only fails if the file is missing. A warning is issued if the file size exceeds the threshold.""" file_path = os.path.join(os.path.dirname(os.getcwd()), file_path) if not os.path.exists(file_path): raise FileNotFoundError(f"{file_path} does not exist") file_size_in_bytes = os.path.getsize(file_path) if file_size_in_bytes > FILE_SIZE_THRESHOLD_IN_BYTES: print( f"WARNING! {file_path} is {file_size_in_bytes} bytes, exceeding threshold " f"{FILE_SIZE_THRESHOLD_IN_BYTES} bytes. File upload may fail due to network issues, or Evergreen " f"may reject very large yaml sizes" ) else: print( f"File size validation passed: {file_path} is {file_size_in_bytes} bytes (threshold: {FILE_SIZE_THRESHOLD_IN_BYTES})" ) def get_task_limit_from_evergreen() -> int: """Fetch the task limit from Evergreen's REST API.""" try: # Call the admin/task_limits endpoint evg_api = evergreen_conn.get_evergreen_api() url = evg_api._create_url("/admin/task_limits") json = evg_api._call_api(url, method="GET").json() task_limit = json.get("max_tasks_per_version") if task_limit is None: print( f"WARNING: 'max_tasks_per_version' not found in API response, using fallback: {FALLBACK_TASK_COUNT_THRESHOLD}" ) return FALLBACK_TASK_COUNT_THRESHOLD print(f"Fetched task limit from Evergreen API: {task_limit}") return task_limit except Exception as e: print( f"WARNING: Failed to fetch task limit from Evergreen API ({e}), using fallback: {FALLBACK_TASK_COUNT_THRESHOLD}" ) return FALLBACK_TASK_COUNT_THRESHOLD def calculate_total_task_count(config: dict) -> int: task_group_dict = {tg["name"]: len(tg["tasks"]) for tg in config.get("task_groups", [])} task_count = 0 for bv in config.get("buildvariants", []): for t in bv.get("tasks", []): if t["name"] in task_group_dict: # expand task group task_count += task_group_dict[t["name"]] else: task_count += 1 display_tasks = sum(len(bv.get("display_tasks", [])) for bv in config.get("buildvariants", [])) task_count += display_tasks return task_count def calculate_generated_task_count() -> int: """Calculate the total number of generated tasks from the Evergreen config file.""" # Script runs from src/ directory, so generated config is at ./generated_resmoke_config/evergreen_config.json config_path = os.path.join(os.getcwd(), "generated_resmoke_config", "evergreen_config.json") try: with open(config_path, "r") as f: config = json.load(f) return calculate_total_task_count(config) except Exception as e: raise Exception(f"Failed to calculate generated task count: {e}") def calculate_nongenerated_task_count() -> int: """Calculate the total number of non-generated tasks from the Evergreen config file.""" evg_project_conf = parse_evergreen_file("etc/evergreen.yml") config = evg_project_conf._conf return calculate_total_task_count(config) def validate_task_count() -> None: """Validate that the total number of generated tasks is below the threshold.""" generated_task_num = calculate_generated_task_count() print(f"Generated task count: {generated_task_num}") nongenerated_task_num = calculate_nongenerated_task_count() print(f"Non-generated task count: {nongenerated_task_num}") total_tasks = generated_task_num + nongenerated_task_num print(f"Total task count: {total_tasks}") total_tasks = total_tasks + TASK_BUFFER print(f"Total task count with buffer ({TASK_BUFFER}): {total_tasks}") task_limit = get_task_limit_from_evergreen() if total_tasks > task_limit: raise Exception( f"Generated configuration contains {total_tasks} tasks, exceeding threshold " f"of {task_limit} tasks. This causes Evergreen performance issues or failures. Reduce the number of tasks." ) print(f"Task count validation passed: {total_tasks} tasks (threshold: {task_limit})") def main(): args = sys.argv[1:] tgz_file = args[0] # tgz file validate_file_size(tgz_file) validate_task_count() if __name__ == "__main__": main()