mongo/buildscripts/monitor_build_status/cli.py

from __future__ import annotations

import os
import sys
from datetime import datetime, timedelta, timezone
from enum import Enum

import structlog
import typer
from tabulate import tabulate
from typing_extensions import Annotated

from evergreen import EvergreenApi

# Get relative imports to work when the package is not installed on the PYTHONPATH.
if __name__ == "__main__" and __package__ is None:
    sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))

from buildscripts.client.jiraclient import JiraAuth, JiraClient
from buildscripts.monitor_build_status.code_lockdown_config import (
    CodeLockdownConfig,
    IssueThresholds,
    NotificationsConfig,
    ScopesConfig,
)
from buildscripts.monitor_build_status.issue_report import IssueCategory, IssueReport
from buildscripts.monitor_build_status.jira_service import JiraService
from buildscripts.resmokelib.utils.evergreen_conn import get_evergreen_api
from buildscripts.util.cmdutils import enable_logging

LOGGER = structlog.get_logger(__name__)

CODE_LOCKDOWN_CONFIG = "etc/code_lockdown.yml"

JIRA_SERVER = "https://jira.mongodb.org"
DEFAULT_REPO = "10gen/mongo"

ORG_OVERALL = "[Org] Overall"


class CodeMergeStatus(Enum):
    RED = "RED"
    GREEN = "GREEN"

    @classmethod
    def from_threshold_percentages(cls, threshold_percentages: list[float]) -> CodeMergeStatus:
        if any(percentage > 100 for percentage in threshold_percentages):
            return cls.RED
        return cls.GREEN


class SummaryMsg(Enum):
    BELOW_THRESHOLDS = "All metrics are within 100% of their thresholds. All merges are allowed."
    THRESHOLD_EXCEEDED = (
        "At least one metric exceeds 100% of its threshold. "
        "Approve only changes that fix BFs, Bugs, and Performance Regressions in the following scopes:"
    )


class MonitorBuildStatusOrchestrator:
    def __init__(
        self,
        jira_service: JiraService,
        evg_api: EvergreenApi,
        code_lockdown_config: CodeLockdownConfig,
    ) -> None:
        self.jira_service = jira_service
        self.evg_api = evg_api
        self.code_lockdown_config = code_lockdown_config

    def evaluate_build_redness(self, notify: bool) -> None:
        for notification_config in self.code_lockdown_config.notifications:
            status_message = f"\n`[STATUS]` Issue count for '{DEFAULT_REPO}' repo\n"
            summaries = ""

            for scopes_config in notification_config.scopes:
                scope_percentages: dict[str, list[float]] = {}

                issue_report = self._make_report(scopes_config)
                issue_count_status_msg, issue_count_percentages = self._get_issue_counts_status(
                    scopes_config.name, issue_report, notification_config
                )
                status_message = f"{status_message}{issue_count_status_msg}\n"
                scope_percentages.update(issue_count_percentages)

                summary = self._summarize(scopes_config.name, scope_percentages)
                summaries = f"{summaries}{summary}\n"

            status_message = f"{status_message}{summaries}"
            status_message = f"{status_message}\n{notification_config.slack.message_footer}"

            for line in status_message.split("\n"):
                LOGGER.info(line)

            if notify:
                slack_channel = notification_config.slack.channel
                LOGGER.info("Notifying slack channel with results", slack_channel=slack_channel)
                self.evg_api.send_slack_message(target=slack_channel, msg=status_message.strip())

    def _make_report(self, scopes_config: ScopesConfig) -> IssueReport:
        LOGGER.info("Processing scope", name=scopes_config.name)
        hot_query = scopes_config.jira_queries.hot
        LOGGER.info("Getting hot issues from Jira", query=hot_query)
        hot_issues = self.jira_service.fetch_issues(hot_query)

        cold_query = scopes_config.jira_queries.cold
        LOGGER.info("Getting cold issues from Jira", query=cold_query)
        cold_issues = self.jira_service.fetch_issues(cold_query)

        LOGGER.info("Got active Issues", count_hot=len(hot_issues), count_cold=len(cold_issues))

        report = IssueReport.empty()
        report.add_issues(hot=hot_issues, cold=cold_issues)

        return report

    def _get_issue_counts_status(
        self, scope_name: str, issue_report: IssueReport, notification_config: NotificationsConfig
    ) -> tuple[str, dict[str, list[float]]]:
        now = datetime.now(timezone.utc)
        percentages: dict[str, list[float]] = {}

        headers = [scope_name, "Hot Issues", "Cold Issues"]
        table_data = []

        def _process_thresholds(
            sub_scope_name: str,
            hot_issue_count: int,
            cold_issue_count: int,
            thresholds: IssueThresholds,
            slack_tags: list[str],
        ) -> None:
            if all(count == 0 for count in [hot_issue_count, cold_issue_count]):
                return

            try:
                hot_bf_percentage = hot_issue_count / thresholds.hot.count * 100
                hot_bf_percentage_str = f"{hot_bf_percentage:.0f}"
            except ZeroDivisionError:
                if hot_issue_count > 0:
                    hot_bf_percentage = 9999
                    hot_bf_percentage_str = "∞"
                else:
                    hot_bf_percentage = 0
                    hot_bf_percentage_str = "0"

            try:
                cold_bf_percentage = cold_issue_count / thresholds.cold.count * 100
                cold_bf_percentage_str = f"{cold_bf_percentage:.0f}"
            except ZeroDivisionError:
                if cold_issue_count > 0:
                    cold_bf_percentage = 9999
                    cold_bf_percentage_str = "∞"
                else:
                    cold_bf_percentage = 0
                    cold_bf_percentage_str = "0"

            label = f"{sub_scope_name} {' '.join(slack_tags)}"
            percentages[label] = [hot_bf_percentage, cold_bf_percentage]

            if (
                sub_scope_name != ORG_OVERALL
                and notification_config.slack.short_issue_data_table
                and CodeMergeStatus.from_threshold_percentages(
                    [hot_bf_percentage, cold_bf_percentage]
                )
                == CodeMergeStatus.GREEN
            ):
                return

            table_data.append(
                [
                    sub_scope_name,
                    f"{hot_bf_percentage_str}% ({hot_issue_count} / {thresholds.hot.count})",
                    f"{cold_bf_percentage_str}% ({cold_issue_count} / {thresholds.cold.count})",
                ]
            )

        overall_thresholds = notification_config.thresholds.overall
        overall_slack_tags = notification_config.slack.overall_scope_tags
        _process_thresholds(
            ORG_OVERALL,
            issue_report.get_issue_count(
                IssueCategory.HOT,
                now - timedelta(days=overall_thresholds.hot.grace_period_days),
            ),
            issue_report.get_issue_count(
                IssueCategory.COLD,
                now - timedelta(days=overall_thresholds.cold.grace_period_days),
            ),
            overall_thresholds,
            overall_slack_tags,
        )

        for group_name in sorted(self.code_lockdown_config.get_all_group_names()):
            group_teams = self.code_lockdown_config.get_group_teams(group_name)
            group_thresholds = notification_config.thresholds.group
            group_slack_tags = self.code_lockdown_config.get_group_slack_tags(group_name)
            _process_thresholds(
                f"[Group] {group_name}",
                issue_report.get_issue_count(
                    IssueCategory.HOT,
                    now - timedelta(days=group_thresholds.hot.grace_period_days),
                    group_teams,
                ),
                issue_report.get_issue_count(
                    IssueCategory.COLD,
                    now - timedelta(days=group_thresholds.cold.grace_period_days),
                    group_teams,
                ),
                group_thresholds,
                group_slack_tags,
            )

        for assigned_team in sorted(list(issue_report.team_reports.keys())):
            team_thresholds = self.code_lockdown_config.get_team_thresholds(
                assigned_team, notification_config.thresholds.team
            )
            team_slack_tags = self.code_lockdown_config.get_team_slack_tags(assigned_team)
            _process_thresholds(
                f"[Team] {assigned_team}",
                issue_report.get_issue_count(
                    IssueCategory.HOT,
                    now - timedelta(days=team_thresholds.hot.grace_period_days),
                    [assigned_team],
                ),
                issue_report.get_issue_count(
                    IssueCategory.COLD,
                    now - timedelta(days=team_thresholds.cold.grace_period_days),
                    [assigned_team],
                ),
                team_thresholds,
                team_slack_tags,
            )

        table_str = tabulate(
            table_data, headers, tablefmt="outline", colalign=("left", "right", "right")
        )
        message = f"```\n{table_str}\n```"

        return message, percentages

    @staticmethod
    def _summarize(scope_name: str, scope_percentages: dict[str, list[float]]) -> str:
        summary = f"`SUMMARY [{scope_name}]`"

        red_sub_scopes = []
        for sub_scope, percentages in scope_percentages.items():
            status = CodeMergeStatus.from_threshold_percentages(percentages)
            if status == CodeMergeStatus.RED:
                red_sub_scopes.append(sub_scope)

        if len(red_sub_scopes) == 0:
            summary = f"{summary} {SummaryMsg.BELOW_THRESHOLDS.value}"
        else:
            summary = f"{summary} {SummaryMsg.THRESHOLD_EXCEEDED.value}"
            for sub_scope in red_sub_scopes:
                summary = f"{summary}\n\t- {sub_scope}"

        return summary


def main(
    notify: Annotated[
        bool, typer.Option(help="Whether to send slack notification with the results")
    ] = False,  # default to the more "quiet" setting
) -> None:
    """
    Analyze Jira BFs count for redness reports.

    For Jira API authentication, use `JIRA_AUTH_PAT` env variable.
    More about Jira Personal Access Tokens (PATs) here:

    - https://wiki.corp.mongodb.com/pages/viewpage.action?pageId=218995581

    Example:

    JIRA_AUTH_PAT=<auth-token> python buildscripts/monitor_build_status/cli.py --help
    """
    enable_logging(verbose=False)

    jira_client = JiraClient(JIRA_SERVER, JiraAuth())
    evg_api = get_evergreen_api()

    jira_service = JiraService(jira_client=jira_client)
    code_lockdown_config = CodeLockdownConfig.from_yaml_config(CODE_LOCKDOWN_CONFIG)
    orchestrator = MonitorBuildStatusOrchestrator(
        jira_service=jira_service,
        evg_api=evg_api,
        code_lockdown_config=code_lockdown_config,
    )

    orchestrator.evaluate_build_redness(notify)


if __name__ == "__main__":
    typer.run(main)