diff --git a/buildscripts/validate_file_size.py b/buildscripts/validate_file_size.py deleted file mode 100644 index b64d65465ca..00000000000 --- a/buildscripts/validate_file_size.py +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env python3 - -import os -import sys - -FILE_SIZE_THRESHOLD_IN_BYTES = 16 * 1024 * 1024 # 16MB - - -def main(): - args = sys.argv[1:] - file_name = args[0] - file_path = os.path.join(os.path.dirname(os.getcwd()), file_name) - if os.path.exists(file_path): - file_size_in_bytes = os.path.getsize(file_path) - if file_size_in_bytes > FILE_SIZE_THRESHOLD_IN_BYTES: - print( - f"WARNING! {file_name} is {file_size_in_bytes} bytes, exceeding threshold" - f" {FILE_SIZE_THRESHOLD_IN_BYTES} bytes, file upload may fail due to network issues, or Evergreen" - f" may reject very large yaml sizes" - ) - else: - print( - f"{file_name} is {file_size_in_bytes} bytes, below threshold {FILE_SIZE_THRESHOLD_IN_BYTES} bytes" - ) - else: - print(f"{file_path} does not exist") - - -if __name__ == "__main__": - main() diff --git a/buildscripts/validate_task_gen.py b/buildscripts/validate_task_gen.py new file mode 100644 index 00000000000..df0a7b1f50b --- /dev/null +++ b/buildscripts/validate_task_gen.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python3 + +import json +import os +import sys + +# Get relative imports to work when the package is not installed on the PYTHONPATH. +if __name__ == "__main__" and __package__ is None: + sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from buildscripts.ciconfig.evergreen import parse_evergreen_file +from buildscripts.resmokelib.utils import evergreen_conn + +FILE_SIZE_THRESHOLD_IN_BYTES = 16 * 1024 * 1024 # 16MB +FALLBACK_TASK_COUNT_THRESHOLD = 50_000 # if we can't reach Evergreen API for any reason + +# buffer to avoid hitting the limit exactly and leave room for +# tasks generated by tasks other than version_gen +TASK_BUFFER = 1500 + + +def validate_file_size(file_path: str) -> None: + """Validate that the generated tasks config file size is below the threshold. This only fails if the file is missing. A warning is issued if the file size exceeds the threshold.""" + + file_path = os.path.join(os.path.dirname(os.getcwd()), file_path) + if not os.path.exists(file_path): + raise FileNotFoundError(f"{file_path} does not exist") + + file_size_in_bytes = os.path.getsize(file_path) + if file_size_in_bytes > FILE_SIZE_THRESHOLD_IN_BYTES: + print( + f"WARNING! {file_path} is {file_size_in_bytes} bytes, exceeding threshold " + f"{FILE_SIZE_THRESHOLD_IN_BYTES} bytes. File upload may fail due to network issues, or Evergreen " + f"may reject very large yaml sizes" + ) + else: + print( + f"File size validation passed: {file_path} is {file_size_in_bytes} bytes (threshold: {FILE_SIZE_THRESHOLD_IN_BYTES})" + ) + + +def get_task_limit_from_evergreen() -> int: + """Fetch the task limit from Evergreen's REST API.""" + try: + # Call the admin/task_limits endpoint + evg_api = evergreen_conn.get_evergreen_api() + url = evg_api._create_url("/admin/task_limits") + json = evg_api._call_api(url, method="GET").json() + task_limit = json.get("max_tasks_per_version") + + if task_limit is None: + print( + f"WARNING: 'max_tasks_per_version' not found in API response, using fallback: {FALLBACK_TASK_COUNT_THRESHOLD}" + ) + return FALLBACK_TASK_COUNT_THRESHOLD + + print(f"Fetched task limit from Evergreen API: {task_limit}") + return task_limit + except Exception as e: + print( + f"WARNING: Failed to fetch task limit from Evergreen API ({e}), using fallback: {FALLBACK_TASK_COUNT_THRESHOLD}" + ) + return FALLBACK_TASK_COUNT_THRESHOLD + + +def calculate_total_task_count(config: dict) -> int: + task_group_dict = {tg["name"]: len(tg["tasks"]) for tg in config.get("task_groups", [])} + + task_count = 0 + for bv in config.get("buildvariants", []): + for t in bv.get("tasks", []): + if t["name"] in task_group_dict: # expand task group + task_count += task_group_dict[t["name"]] + else: + task_count += 1 + + display_tasks = sum(len(bv.get("display_tasks", [])) for bv in config.get("buildvariants", [])) + task_count += display_tasks + + return task_count + + +def calculate_generated_task_count() -> int: + """Calculate the total number of generated tasks from the Evergreen config file.""" + + # Script runs from src/ directory, so generated config is at ./generated_resmoke_config/evergreen_config.json + config_path = os.path.join(os.getcwd(), "generated_resmoke_config", "evergreen_config.json") + + try: + with open(config_path, "r") as f: + config = json.load(f) + return calculate_total_task_count(config) + + except Exception as e: + raise Exception(f"Failed to calculate generated task count: {e}") + + +def calculate_nongenerated_task_count() -> int: + """Calculate the total number of non-generated tasks from the Evergreen config file.""" + + evg_project_conf = parse_evergreen_file("etc/evergreen.yml") + config = evg_project_conf._conf + + return calculate_total_task_count(config) + + +def validate_task_count() -> None: + """Validate that the total number of generated tasks is below the threshold.""" + + generated_task_num = calculate_generated_task_count() + print(f"Generated task count: {generated_task_num}") + + nongenerated_task_num = calculate_nongenerated_task_count() + print(f"Non-generated task count: {nongenerated_task_num}") + + total_tasks = generated_task_num + nongenerated_task_num + print(f"Total task count: {total_tasks}") + + total_tasks = total_tasks + TASK_BUFFER + print(f"Total task count with buffer ({TASK_BUFFER}): {total_tasks}") + + task_limit = get_task_limit_from_evergreen() + + if total_tasks > task_limit: + raise Exception( + f"Generated configuration contains {total_tasks} tasks, exceeding threshold " + f"of {task_limit} tasks. This causes Evergreen performance issues or failures. Reduce the number of tasks." + ) + + print(f"Task count validation passed: {total_tasks} tasks (threshold: {task_limit})") + + +def main(): + args = sys.argv[1:] + tgz_file = args[0] # tgz file + + validate_file_size(tgz_file) + + validate_task_count() + + +if __name__ == "__main__": + main() diff --git a/etc/evergreen_yml_components/definitions.yml b/etc/evergreen_yml_components/definitions.yml index 6af5b78245f..6c4b8f651ca 100644 --- a/etc/evergreen_yml_components/definitions.yml +++ b/etc/evergreen_yml_components/definitions.yml @@ -1418,8 +1418,7 @@ functions: include: - "*" - "validate generate tasks config filesize": - &validate_generate_tasks_config_filesize + "validate generate tasks config": &validate_generate_tasks_config command: subprocess.exec display_name: "validate generate tasks config filesize" type: test @@ -1427,8 +1426,11 @@ functions: binary: bash args: - "src/evergreen/run_python_script.sh" - - "buildscripts/validate_file_size.py" + - "buildscripts/validate_task_gen.py" - "generate_tasks_config.tgz" + env: + EVERGREEN_API_USER: ${evergreen_api_user} + EVERGREEN_API_KEY: ${evergreen_api_key} "upload generate tasks config": &upload_generate_tasks_config command: s3.put @@ -1471,7 +1473,7 @@ functions: - *configure_evergreen_api_credentials - *generate_version_sh - *pack_generate_tasks_config - - *validate_generate_tasks_config_filesize + - *validate_generate_tasks_config - *upload_generate_tasks_config - *generate_resmoke_tasks_config @@ -1480,7 +1482,7 @@ functions: - *configure_evergreen_api_credentials - *generate_version_sh - *pack_generate_tasks_config - - *validate_generate_tasks_config_filesize + - *validate_generate_tasks_config "generate version burn in": - *f_expansions_write @@ -1493,7 +1495,7 @@ functions: args: - "./src/evergreen/generate_version_burn_in.sh" - *pack_generate_tasks_config - - *validate_generate_tasks_config_filesize + - *validate_generate_tasks_config - *upload_burn_in_generate_tasks_config - *generate_resmoke_tasks_config