358 lines
13 KiB
Python
358 lines
13 KiB
Python
"""Parser for BUILD.bazel files to extract resmoke_suite_test configuration.
|
|
|
|
This module parses BUILD.bazel files without invoking bazel, supporting a simplified
|
|
subset of Bazel syntax:
|
|
- Simple lists of targets (no select() expressions)
|
|
- Direct file targets (e.g., "//jstests/foo:bar.js")
|
|
- all_javascript_files targets (globs *.js in directory)
|
|
- all_subpackage_javascript_files targets (recursively includes all JS from subpackages)
|
|
"""
|
|
|
|
import functools
|
|
import os
|
|
import re
|
|
|
|
|
|
class BazelParseError(Exception):
|
|
"""Exception raised when parsing BUILD.bazel files fails."""
|
|
|
|
pass
|
|
|
|
|
|
@functools.cache
|
|
def parse_resmoke_suite_test(target_label: str) -> dict[str, list[str]]:
|
|
"""Parse a resmoke_suite_test target from BUILD.bazel.
|
|
Args:
|
|
target_label: Bazel target label like "//buildscripts/resmokeconfig:core"
|
|
Returns:
|
|
Dictionary with extracted attributes:
|
|
- srcs: List of test file labels
|
|
- exclude_files: List of test file labels to exclude
|
|
- exclude_with_any_tags: List of tag strings
|
|
- include_with_any_tags: List of tag strings
|
|
- group_size: Integer or None for number of tests per group (for test_kind: parallel_fsm_workload_test)
|
|
- group_count_multiplier: String for group count multiplier (for test_kind: parallel_fsm_workload_test)
|
|
Raises:
|
|
BazelParseError: If BUILD.bazel file not found or target not found
|
|
"""
|
|
package, target_name = _parse_label(target_label)
|
|
build_file = os.path.join(package, "BUILD.bazel")
|
|
|
|
if not os.path.exists(build_file):
|
|
raise BazelParseError(
|
|
f"BUILD.bazel file not found at '{build_file}' for target '{target_label}'"
|
|
)
|
|
with open(build_file, "r") as f:
|
|
content = f.read()
|
|
|
|
# Parse load statements to build identifier -> .bzl file mapping
|
|
identifier_to_bzl_file = _parse_load_statements(content, package)
|
|
|
|
# Find the resmoke_suite_test block
|
|
# Pattern matches: resmoke_suite_test(name = "target_name", ...)
|
|
pattern = r'resmoke_suite_test\s*\(\s*name\s*=\s*["\']' + re.escape(target_name) + r'["\']'
|
|
match = re.search(pattern, content)
|
|
if not match:
|
|
raise BazelParseError(
|
|
f"Target '{target_name}' not found in '{build_file}'. "
|
|
f'Expected a resmoke_suite_test rule with name = "{target_name}"'
|
|
)
|
|
|
|
# Extract the rule block by finding balanced parentheses
|
|
rule_start = match.start()
|
|
paren_start = content.index("(", rule_start)
|
|
paren_count = 0
|
|
rule_end = paren_start
|
|
for i in range(paren_start, len(content)):
|
|
if content[i] == "(":
|
|
paren_count += 1
|
|
elif content[i] == ")":
|
|
paren_count -= 1
|
|
if paren_count == 0:
|
|
rule_end = i + 1
|
|
break
|
|
|
|
if paren_count != 0:
|
|
raise BazelParseError(
|
|
f"Unbalanced parentheses in resmoke_suite_test definition for '{target_label}'"
|
|
)
|
|
rule_block = content[rule_start:rule_end]
|
|
|
|
return {
|
|
"srcs": _extract_attribute(rule_block, "srcs", identifier_to_bzl_file, build_file),
|
|
"exclude_files": _extract_attribute(
|
|
rule_block, "exclude_files", identifier_to_bzl_file, build_file
|
|
),
|
|
"exclude_with_any_tags": _extract_attribute(
|
|
rule_block, "exclude_with_any_tags", identifier_to_bzl_file, build_file
|
|
),
|
|
"include_with_any_tags": _extract_attribute(
|
|
rule_block, "include_with_any_tags", identifier_to_bzl_file, build_file
|
|
),
|
|
"group_size": _extract_int_attribute(rule_block, "group_size"),
|
|
"group_count_multiplier": _extract_scalar_attribute(rule_block, "group_count_multiplier"),
|
|
}
|
|
|
|
|
|
def _parse_label(target_label: str) -> tuple[str, str]:
|
|
"""Parse a Bazel target label into package path and target name.
|
|
Args:
|
|
target_label: A Bazel target label like "//package/path:target_name"
|
|
Returns:
|
|
Tuple of (package_path, target_name)
|
|
Raises:
|
|
BazelParseError: If the label format is invalid
|
|
"""
|
|
if not target_label.startswith("//"):
|
|
raise BazelParseError(
|
|
f"Unsupported Bazel target label '{target_label}': must start with '//'"
|
|
)
|
|
# Remove leading "//"
|
|
label_without_prefix = target_label[2:]
|
|
|
|
# Split on ":"
|
|
if ":" not in label_without_prefix:
|
|
raise BazelParseError(
|
|
f"Unsupported Bazel target label '{target_label}': must contain ':' separator"
|
|
)
|
|
package, target_name = label_without_prefix.split(":", 1)
|
|
|
|
return package, target_name
|
|
|
|
|
|
def _parse_load_statements(content: str, package: str) -> dict[str, str]:
|
|
"""Parse load statements from BUILD.bazel content.
|
|
|
|
Extracts identifier to .bzl file mappings from load statements.
|
|
Example: load("//path/to:file.bzl", "identifier1", "identifier2")
|
|
|
|
Args:
|
|
content: The BUILD.bazel file content
|
|
package: The package path of the BUILD.bazel file
|
|
|
|
Returns:
|
|
Dictionary mapping identifier names to absolute .bzl file paths
|
|
"""
|
|
identifier_to_bzl_file = {}
|
|
|
|
# Find all load statements
|
|
for line in content.split("\n"):
|
|
if not line.strip().startswith("load("):
|
|
continue
|
|
|
|
# Extract the full load statement (may span multiple lines)
|
|
# Looking for load("//path/to:file.bzl", "identifier1", "identifier2", ...)
|
|
match = re.match(r'load\s*\(\s*["\']([^"\']+)["\'](.+?)\)', line)
|
|
if match:
|
|
bzl_label = match.group(1)
|
|
identifiers_str = match.group(2)
|
|
|
|
# Convert the .bzl label to a file path
|
|
# Example: "//jstests/suites:selectors.bzl"
|
|
# -> "jstests/suites/selectors.bzl"
|
|
if bzl_label.startswith("//"):
|
|
bzl_path = bzl_label[2:].replace(":", "/")
|
|
else:
|
|
# Relative path - resolve relative to current package
|
|
bzl_path = os.path.join(package, bzl_label.replace(":", ""))
|
|
bzl_path = os.path.join(*bzl_path.split("/"))
|
|
|
|
# Extract all identifiers from the load statement
|
|
identifier_pattern = r'["\']([^"\']+)["\']'
|
|
identifiers = re.findall(identifier_pattern, identifiers_str)
|
|
|
|
# Map each identifier to the .bzl file
|
|
for identifier in identifiers:
|
|
identifier_to_bzl_file[identifier] = bzl_path
|
|
|
|
return identifier_to_bzl_file
|
|
|
|
|
|
def _extract_attribute(
|
|
block: str,
|
|
attribute_name: str,
|
|
identifier_to_bzl_file: dict[str, str] = None,
|
|
build_file: str = None,
|
|
) -> list[str]:
|
|
"""Extract an attribute from a BUILD.bazel rule block.
|
|
|
|
Supports simple lists and list concatenation with identifiers:
|
|
- Simple list: srcs = ["file1.js", "file2.js"]
|
|
- List concatenation: srcs = ["file1.js"] + some_identifier
|
|
|
|
Args:
|
|
block: The text content of a BUILD.bazel rule block
|
|
attribute_name: The name of the attribute to extract (e.g., "srcs")
|
|
identifier_to_bzl_file: Dictionary mapping identifier names to .bzl file paths
|
|
build_file: Path to the BUILD.bazel file for resolving local identifiers
|
|
Returns:
|
|
List of string values from the attribute. Returns empty list if attribute not found.
|
|
"""
|
|
if identifier_to_bzl_file is None:
|
|
identifier_to_bzl_file = {}
|
|
|
|
# Pattern to match: attribute_name = <expression>
|
|
# Captures everything until comma + newline + next attribute/paren, or end of string
|
|
pattern = rf"{attribute_name}\s*=\s*(.+?)(?=,\s*\n\s*(?:\w+\s*=|\))|\Z)"
|
|
match = re.search(pattern, block, re.DOTALL | re.MULTILINE)
|
|
if not match:
|
|
return []
|
|
|
|
expression = match.group(1).strip()
|
|
|
|
# Split expression by '+' operator to handle concatenation
|
|
items = []
|
|
parts = re.split(r"\+", expression)
|
|
|
|
for part in parts:
|
|
part = part.strip()
|
|
|
|
# Check if this part is a list literal
|
|
if part.startswith("[") and part.endswith("]"):
|
|
# Extract the content between brackets
|
|
list_content = part[1:-1]
|
|
|
|
# Extract quoted strings, handling both single and double quotes
|
|
for line in list_content.split("\n"):
|
|
# Remove inline comments
|
|
line = re.sub(r"#.*$", "", line)
|
|
|
|
# Find all quoted strings in the line
|
|
string_pattern = r'["\']([^"\']+)["\']'
|
|
items.extend(re.findall(string_pattern, line))
|
|
|
|
# Check if this part is an identifier (not a list literal)
|
|
elif re.match(r"^[a-zA-Z_][a-zA-Z0-9_]*$", part):
|
|
# Resolve the identifier to a list of labels
|
|
resolved_items = _resolve_identifier_to_labels(part, identifier_to_bzl_file, build_file)
|
|
items.extend(resolved_items)
|
|
|
|
return items
|
|
|
|
|
|
def _resolve_identifier_to_labels(
|
|
identifier: str, identifier_to_bzl_file: dict[str, str], build_file: str = None
|
|
) -> list[str]:
|
|
"""Convert a Bazel identifier to a list of labels.
|
|
|
|
This function resolves identifiers used in list concatenation expressions.
|
|
For example, in: srcs = ["file.js"] + sharding_jscore_passthrough_srcs
|
|
The identifier 'sharding_jscore_passthrough_srcs' would be resolved to its
|
|
corresponding list of labels by reading its definition from the .bzl file.
|
|
|
|
If the identifier is not found in load statements, this function will attempt
|
|
to find it defined in the BUILD.bazel file itself.
|
|
|
|
Args:
|
|
identifier: The identifier name to resolve (e.g., "sharding_jscore_passthrough_srcs")
|
|
identifier_to_bzl_file: Dictionary mapping identifier names to .bzl file paths
|
|
build_file: Path to the BUILD.bazel file for resolving local identifiers
|
|
|
|
Returns:
|
|
List of resolved label strings
|
|
"""
|
|
identifier_pattern = rf"^{re.escape(identifier)}\s*=\s*\[(.+?)\]"
|
|
|
|
if identifier in identifier_to_bzl_file:
|
|
bzl_file_path = identifier_to_bzl_file[identifier]
|
|
with open(bzl_file_path, "r") as f:
|
|
bzl_content = f.read()
|
|
|
|
# Find the identifier definition in the .bzl file
|
|
match = re.search(identifier_pattern, bzl_content, re.MULTILINE | re.DOTALL)
|
|
if not match:
|
|
raise BazelParseError(
|
|
f"Could not find definition of identifier '{identifier}' in '{bzl_file_path}'"
|
|
)
|
|
else:
|
|
# Try to find the identifier in the BUILD.bazel file itself
|
|
with open(build_file, "r") as f:
|
|
build_content = f.read()
|
|
|
|
# Look for identifier definition in BUILD.bazel
|
|
match = re.search(identifier_pattern, build_content, re.MULTILINE | re.DOTALL)
|
|
if not match:
|
|
raise BazelParseError(
|
|
f"Identifier '{identifier}' referenced but not found in load statements "
|
|
f"or in BUILD.bazel file."
|
|
)
|
|
|
|
# Extract all quoted strings from the list
|
|
items = []
|
|
for line in match.group(1).split("\n"):
|
|
# Remove inline comments
|
|
line = re.sub(r"#.*$", "", line)
|
|
|
|
# Find all quoted strings in the line
|
|
string_pattern = r'["\']([^"\']+)["\']'
|
|
items.extend(re.findall(string_pattern, line))
|
|
|
|
return items
|
|
|
|
|
|
def _extract_int_attribute(block: str, attribute_name: str) -> int | None:
|
|
"""Extract an integer attribute from a BUILD.bazel rule block.
|
|
Args:
|
|
block: The text content of a BUILD.bazel rule block
|
|
attribute_name: The name of the attribute to extract (e.g., "group_size")
|
|
Returns:
|
|
Integer value of the attribute. Returns None if attribute not found.
|
|
"""
|
|
# Pattern to match: attribute_name = <integer>
|
|
pattern = rf"{attribute_name}\s*=\s*(\d+)"
|
|
match = re.search(pattern, block)
|
|
if not match:
|
|
return None
|
|
return int(match.group(1))
|
|
|
|
|
|
def _extract_scalar_attribute(block: str, attribute_name: str) -> str:
|
|
"""Extract a string attribute from a BUILD.bazel rule block.
|
|
Args:
|
|
block: The text content of a BUILD.bazel rule block
|
|
attribute_name: The name of the attribute to extract (e.g., "group_count_multiplier")
|
|
Returns:
|
|
String value of the attribute. Returns empty string if attribute not found.
|
|
"""
|
|
# Pattern to match: attribute_name = "<value>"
|
|
quoted_pattern = rf'{attribute_name}\s*=\s*["\']([^"\']+)["\']'
|
|
match = re.search(quoted_pattern, block)
|
|
if match:
|
|
return match.group(1)
|
|
|
|
return ""
|
|
|
|
|
|
def resolve_target_to_files(target_label: str) -> str:
|
|
"""Resolve a Bazel target label to glob patterns or file paths.
|
|
Supported target types:
|
|
- Direct file: "//jstests/foo:bar.js" → "jstests/foo/bar.js"
|
|
- all_javascript_files: returns glob pattern "package/*.js"
|
|
- all_subpackage_javascript_files: returns glob pattern "package/**/*.js"
|
|
Args:
|
|
target_label: Bazel target label to resolve
|
|
Returns:
|
|
File path or glob pattern (relative to repo root)
|
|
Raises:
|
|
BazelParseError: If target type is unsupported
|
|
"""
|
|
package, target_name = _parse_label(target_label)
|
|
|
|
if target_name.endswith(".js"):
|
|
# Direct file reference
|
|
return os.path.join(package, target_name)
|
|
|
|
elif target_name == "all_javascript_files":
|
|
# Return glob pattern for *.js in package directory
|
|
return os.path.join(package, "*.js")
|
|
|
|
elif target_name == "all_subpackage_javascript_files":
|
|
# Return glob pattern for recursive **/*.js
|
|
return os.path.join(package, "**/*.js")
|
|
|
|
else:
|
|
raise BazelParseError(
|
|
f"Unsupported target type '{target_label}'. "
|
|
f"Supported types: direct .js files, all_javascript_files, all_subpackage_javascript_files"
|
|
)
|