mongo/buildscripts/validate_task_gen.py
Steve McClure 5f0fd1605c SERVER-115541: Add guards in commit queue for task-gen thresholds (#45415)
Co-authored-by: trevor <trevor.guidry@mongodb.com>
GitOrigin-RevId: f677d21d21edc1359e27882483a2af479367aa3e
2025-12-26 22:05:55 +00:00

144 lines
5.1 KiB
Python

#!/usr/bin/env python3
import json
import os
import sys
# Get relative imports to work when the package is not installed on the PYTHONPATH.
if __name__ == "__main__" and __package__ is None:
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from buildscripts.ciconfig.evergreen import parse_evergreen_file
from buildscripts.resmokelib.utils import evergreen_conn
FILE_SIZE_THRESHOLD_IN_BYTES = 16 * 1024 * 1024 # 16MB
FALLBACK_TASK_COUNT_THRESHOLD = 50_000 # if we can't reach Evergreen API for any reason
# buffer to avoid hitting the limit exactly and leave room for
# tasks generated by tasks other than version_gen
TASK_BUFFER = 1500
def validate_file_size(file_path: str) -> None:
"""Validate that the generated tasks config file size is below the threshold. This only fails if the file is missing. A warning is issued if the file size exceeds the threshold."""
file_path = os.path.join(os.path.dirname(os.getcwd()), file_path)
if not os.path.exists(file_path):
raise FileNotFoundError(f"{file_path} does not exist")
file_size_in_bytes = os.path.getsize(file_path)
if file_size_in_bytes > FILE_SIZE_THRESHOLD_IN_BYTES:
print(
f"WARNING! {file_path} is {file_size_in_bytes} bytes, exceeding threshold "
f"{FILE_SIZE_THRESHOLD_IN_BYTES} bytes. File upload may fail due to network issues, or Evergreen "
f"may reject very large yaml sizes"
)
else:
print(
f"File size validation passed: {file_path} is {file_size_in_bytes} bytes (threshold: {FILE_SIZE_THRESHOLD_IN_BYTES})"
)
def get_task_limit_from_evergreen() -> int:
"""Fetch the task limit from Evergreen's REST API."""
try:
# Call the admin/task_limits endpoint
evg_api = evergreen_conn.get_evergreen_api()
url = evg_api._create_url("/admin/task_limits")
json = evg_api._call_api(url, method="GET").json()
task_limit = json.get("max_tasks_per_version")
if task_limit is None:
print(
f"WARNING: 'max_tasks_per_version' not found in API response, using fallback: {FALLBACK_TASK_COUNT_THRESHOLD}"
)
return FALLBACK_TASK_COUNT_THRESHOLD
print(f"Fetched task limit from Evergreen API: {task_limit}")
return task_limit
except Exception as e:
print(
f"WARNING: Failed to fetch task limit from Evergreen API ({e}), using fallback: {FALLBACK_TASK_COUNT_THRESHOLD}"
)
return FALLBACK_TASK_COUNT_THRESHOLD
def calculate_total_task_count(config: dict) -> int:
task_group_dict = {tg["name"]: len(tg["tasks"]) for tg in config.get("task_groups", [])}
task_count = 0
for bv in config.get("buildvariants", []):
for t in bv.get("tasks", []):
if t["name"] in task_group_dict: # expand task group
task_count += task_group_dict[t["name"]]
else:
task_count += 1
display_tasks = sum(len(bv.get("display_tasks", [])) for bv in config.get("buildvariants", []))
task_count += display_tasks
return task_count
def calculate_generated_task_count() -> int:
"""Calculate the total number of generated tasks from the Evergreen config file."""
# Script runs from src/ directory, so generated config is at ./generated_resmoke_config/evergreen_config.json
config_path = os.path.join(os.getcwd(), "generated_resmoke_config", "evergreen_config.json")
try:
with open(config_path, "r") as f:
config = json.load(f)
return calculate_total_task_count(config)
except Exception as e:
raise Exception(f"Failed to calculate generated task count: {e}")
def calculate_nongenerated_task_count() -> int:
"""Calculate the total number of non-generated tasks from the Evergreen config file."""
evg_project_conf = parse_evergreen_file("etc/evergreen.yml")
config = evg_project_conf._conf
return calculate_total_task_count(config)
def validate_task_count() -> None:
"""Validate that the total number of generated tasks is below the threshold."""
generated_task_num = calculate_generated_task_count()
print(f"Generated task count: {generated_task_num}")
nongenerated_task_num = calculate_nongenerated_task_count()
print(f"Non-generated task count: {nongenerated_task_num}")
total_tasks = generated_task_num + nongenerated_task_num
print(f"Total task count: {total_tasks}")
total_tasks = total_tasks + TASK_BUFFER
print(f"Total task count with buffer ({TASK_BUFFER}): {total_tasks}")
task_limit = get_task_limit_from_evergreen()
if total_tasks > task_limit:
raise Exception(
f"Generated configuration contains {total_tasks} tasks, exceeding threshold "
f"of {task_limit} tasks. This causes Evergreen performance issues or failures. Reduce the number of tasks."
)
print(f"Task count validation passed: {total_tasks} tasks (threshold: {task_limit})")
def main():
args = sys.argv[1:]
tgz_file = args[0] # tgz file
validate_file_size(tgz_file)
validate_task_count()
if __name__ == "__main__":
main()