SERVER-122303 Move copybara syncs to master (#50337)

GitOrigin-RevId: d2045870869e65f9c864106f9351cb3e3a0db823
This commit is contained in:
Daniel Moody 2026-04-29 13:35:19 -05:00 committed by MongoDB Bot
parent 2575db9dd1
commit fc6c183e2d
13 changed files with 3498 additions and 1113 deletions

1
.gitignore vendored
View File

@ -17,6 +17,7 @@
/.tmp /.tmp
!/.tmp/_placeholder_ !/.tmp/_placeholder_
venv venv
/buildscripts/copybara/copybara_path_rules.bara.sky
*~ *~
*.test_iwyu.h *.test_iwyu.h

View File

@ -8,7 +8,7 @@ filters:
- 10gen/server-root-ownership - 10gen/server-root-ownership
metadata: metadata:
emeritus_approvers: emeritus_approvers:
- visemet # TODO: add back to approvers once project work is finished (Ignore linting) - visemet # TODO SERVER-122669: add back to approvers once project work is finished
- "/BUILD.bazel": - "/BUILD.bazel":
approvers: approvers:
- 10gen/devprod-build - 10gen/devprod-build
@ -61,10 +61,6 @@ filters:
- "BUILD.bazel": - "BUILD.bazel":
approvers: approvers:
- 10gen/devprod-build - 10gen/devprod-build
- "copy.bara.sky":
approvers:
- IamXander
- smcclure15
- "eslint.config.mjs": - "eslint.config.mjs":
approvers: approvers:
- 10gen/devprod-test-infrastructure - 10gen/devprod-test-infrastructure

View File

@ -314,12 +314,25 @@ class LintRunner:
else: else:
print(f"All {type_name} files have BUILD.bazel targets!") print(f"All {type_name} files have BUILD.bazel targets!")
def run_bazel(self, target: str, args: list = []): def run_bazel(self, target: str, args: list | None = None) -> bool:
args = args or []
p = subprocess.run([self.bazel_bin, "run", target] + (["--"] + args if args else [])) p = subprocess.run([self.bazel_bin, "run", target] + (["--"] + args if args else []))
if p.returncode != 0: if p.returncode != 0:
self.fail = True self.fail = True
if not self.keep_going: if not self.keep_going:
raise LinterFail("Linter failed") raise LinterFail("Linter failed")
return False
return True
def check_copybara_generated_evergreen(self, *, fix: bool, dry_run: bool) -> None:
print("Checking generated Copybara Evergreen yaml...")
if fix and not dry_run:
if self.run_bazel("//buildscripts/copybara:generate_evergreen"):
print("Generated Copybara Evergreen yaml has been updated")
return
if self.run_bazel("//buildscripts/copybara:generate_evergreen", ["--check"]):
print("Generated Copybara Evergreen yaml is up to date")
def refresh_module_lockfile( def refresh_module_lockfile(
self, self,
@ -533,6 +546,125 @@ def _get_files_changed_since_fork_point(origin_branch: str = "origin/master") ->
return list(file_set) return list(file_set)
def _get_existing_python_files(files_to_lint: list[str]) -> list[str]:
"""Return Python files that still exist in the working tree."""
return [str(file) for file in files_to_lint if file.endswith(".py") and os.path.exists(file)]
def _source_label_to_workspace_path(label: str) -> str | None:
"""Return the workspace-relative path represented by a Bazel source-file label."""
local_repository_prefixes = {
"@bazel_rules_mongo": "buildscripts/bazel_rules_mongo",
"@@bazel_rules_mongo": "buildscripts/bazel_rules_mongo",
}
if label.startswith("//"):
label_body = label[2:]
local_root = ""
elif label.startswith("@"):
repository, _, label_body = label.partition("//")
local_root = local_repository_prefixes.get(repository)
if local_root is None:
return None
else:
return None
package, separator, target = label_body.partition(":")
if not separator:
return None
return os.path.normpath(os.path.join(local_root, package, target))
def _get_rules_lint_source_labels_for_changed_files(
files_to_lint: list[str],
files_with_targets: list[str],
) -> list[str]:
"""Return Bazel source-file labels for changed files supported by rules_lint."""
path_to_label = {}
for label in files_with_targets:
workspace_path = _source_label_to_workspace_path(label)
if workspace_path is not None:
path_to_label[workspace_path] = label
rules_lint_labels = []
seen = set()
for file in files_to_lint:
if not file.endswith((".py", ".js", ".mjs")):
continue
workspace_path = os.path.normpath(file).removeprefix(f".{os.sep}")
label = path_to_label.get(workspace_path)
if label is None or label in seen:
continue
rules_lint_labels.append(label)
seen.add(label)
return rules_lint_labels
def _get_rules_lint_targets_for_source_labels(
bazel_bin: str,
source_labels: list[str],
) -> list[str]:
"""Return Bazel rule targets that directly own the given source-file labels."""
owner_targets = []
seen = set()
for source_label in source_labels:
query = f'kind(".* rule", same_pkg_direct_rdeps({source_label}))'
result = subprocess.run(
[bazel_bin, "query", query, "--output=label"],
capture_output=True,
text=True,
check=False,
)
if result.returncode != 0:
print(f"Failed to query rules_lint owner target for {source_label}:")
if result.stderr.strip():
print(result.stderr.strip())
raise LinterFail(f"Failed to query rules_lint owner target for {source_label}")
for target in result.stdout.splitlines():
target = target.strip()
if not target or target in seen:
continue
owner_targets.append(target)
seen.add(target)
return owner_targets
def _get_rules_lint_targets_for_changed_files(
bazel_bin: str,
files_to_lint: list[str],
files_with_targets: list[str],
) -> list[str]:
"""Return Bazel rule targets that own changed files supported by rules_lint."""
return _get_rules_lint_targets_for_source_labels(
bazel_bin,
_get_rules_lint_source_labels_for_changed_files(files_to_lint, files_with_targets),
)
def _should_check_copybara_generated_evergreen(lint_all: bool, files_to_lint: list[str]) -> bool:
"""Return whether lint should check the generated Copybara Evergreen YAML."""
if lint_all:
return True
copybara_prefixes = (
"buildscripts/copybara/",
"etc/evergreen_yml_components/copybara/",
)
copybara_files = {
"etc/evergreen.yml",
}
return any(
file in copybara_files or file.startswith(copybara_prefixes) for file in files_to_lint
)
def get_parsed_args(args): def get_parsed_args(args):
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument( parser.add_argument(
@ -666,17 +798,21 @@ def run_rules_lint(bazel_bin: str, args: list[str]):
): ):
lr.run_bazel("//buildscripts:errorcodes", ["--quiet"]) lr.run_bazel("//buildscripts:errorcodes", ["--quiet"])
existing_python_files = _get_existing_python_files(files_to_lint)
if lint_all: if lint_all:
lr.run_bazel("//buildscripts:pyrightlint", ["lint-all"]) lr.run_bazel("//buildscripts:pyrightlint", ["lint-all"])
elif any(file.endswith(".py") for file in files_to_lint): elif existing_python_files:
lr.run_bazel( lr.run_bazel("//buildscripts:pyrightlint", ["lints"] + existing_python_files)
"//buildscripts:pyrightlint",
["lints"] + [str(file) for file in files_to_lint if file.endswith(".py")],
)
if lint_all or "poetry.lock" in files_to_lint or "pyproject.toml" in files_to_lint: if lint_all or "poetry.lock" in files_to_lint or "pyproject.toml" in files_to_lint:
lr.run_bazel("//buildscripts:poetry_lock_check") lr.run_bazel("//buildscripts:poetry_lock_check")
if _should_check_copybara_generated_evergreen(lint_all, files_to_lint):
lr.check_copybara_generated_evergreen(
fix=parsed_args.fix,
dry_run=parsed_args.dry_run,
)
if lint_all or any(file.endswith(".yml") for file in files_to_lint): if lint_all or any(file.endswith(".yml") for file in files_to_lint):
print("Linting evergreen yaml...") print("Linting evergreen yaml...")
lr.run_bazel( lr.run_bazel(
@ -714,9 +850,22 @@ def run_rules_lint(bazel_bin: str, args: list[str]):
if lr.fail: if lr.fail:
raise LinterFail("Linter(s) failed") raise LinterFail("Linter(s) failed")
# Default to linting everything in rules_lint if no path was passed in. # Default to linting changed files in rules_lint if no path was passed in.
if len([arg for arg in args if not arg.startswith("--")]) == 0: if len([arg for arg in args if not arg.startswith("--")]) == 0:
args = ["//..."] + args rules_lint_targets = _get_rules_lint_targets_for_changed_files(
bazel_bin,
files_to_lint,
files_with_targets,
)
if not rules_lint_targets:
print("No changed files with rules_lint owner targets; skipping rules_lint.")
return
print(
f"No explicit rules_lint target provided; running rules_lint on "
f"{len(rules_lint_targets)} owner target(s)."
)
args = rules_lint_targets + args
fix = "" fix = ""
buildevents_fd, buildevents_path = tempfile.mkstemp() buildevents_fd, buildevents_path = tempfile.mkstemp()
@ -804,17 +953,25 @@ def run_rules_lint(bazel_bin: str, args: list[str]):
[bazel_bin, "build"] + fix_args, check=True, stdout=sys.stdout, stderr=sys.stderr [bazel_bin, "build"] + fix_args, check=True, stdout=sys.stdout, stderr=sys.stderr
) )
applied_patch_contents: set[str] = set()
for patch in _jq_files(".patch", fix_buildevents_path): for patch in _jq_files(".patch", fix_buildevents_path):
if "coverage.dat" in patch or not os.path.exists(patch) or not os.path.getsize(patch): if "coverage.dat" in patch or not os.path.exists(patch) or not os.path.getsize(patch):
continue continue
patch_contents = pathlib.Path(patch).read_text(encoding="utf-8")
if patch_contents in applied_patch_contents:
continue
applied_patch_contents.add(patch_contents)
if fix == "print": if fix == "print":
print(f"From {patch}:") print(f"From {patch}:")
with open(patch, "r", encoding="utf-8") as f: print(patch_contents)
print(f.read())
print() print()
elif fix == "patch": elif fix == "patch":
subprocess.run( subprocess.run(
["patch", "-p1"], check=True, stdin=open(patch, "r", encoding="utf-8") ["patch", "-p1"],
check=True,
input=patch_contents,
text=True,
) )
else: else:
print(f"ERROR: unknown fix type {fix}", file=sys.stderr) print(f"ERROR: unknown fix type {fix}", file=sys.stderr)

View File

@ -161,12 +161,11 @@ class RunRulesLintTest(unittest.TestCase):
self._patches = [ self._patches = [
mock.patch.object(lint.platform, "system", return_value="Linux"), mock.patch.object(lint.platform, "system", return_value="Linux"),
mock.patch.object(lint, "create_build_files_in_new_js_dirs"), mock.patch.object(lint, "create_build_files_in_new_js_dirs"),
mock.patch.object(lint, "list_files_with_targets", return_value=[]), mock.patch.object(lint, "list_files_with_targets", return_value=["//:foo.py"]),
mock.patch.object(lint.LintRunner, "refresh_module_lockfile"), mock.patch.object(lint.LintRunner, "refresh_module_lockfile"),
mock.patch.object(lint.LintRunner, "list_files_without_targets"), mock.patch.object(lint.LintRunner, "list_files_without_targets"),
mock.patch.object(lint.LintRunner, "run_bazel"), mock.patch.object(lint.LintRunner, "run_bazel"),
mock.patch.object(lint, "_git_distance", return_value=0), mock.patch.object(lint, "_git_distance", return_value=0),
mock.patch.object(lint, "_get_files_changed_since_fork_point", return_value=[]),
] ]
for p in self._patches: for p in self._patches:
p.start() p.start()
@ -180,8 +179,9 @@ class RunRulesLintTest(unittest.TestCase):
extra_args: list[str], extra_args: list[str],
*, *,
check_report: str | None = None, check_report: str | None = None,
fix_patch: str | None = None, fix_patch: str | list[str] | None = None,
) -> tuple[list[list[str]], bool, lint.LinterFail | None]: changed_files: list[str] | None = None,
) -> tuple[list[list[str]], int, lint.LinterFail | None]:
""" """
Invoke run_rules_lint with the preamble mocked out. Invoke run_rules_lint with the preamble mocked out.
@ -190,25 +190,45 @@ class RunRulesLintTest(unittest.TestCase):
fix_patch: content written into the .patch file that the fix pass "finds". fix_patch: content written into the .patch file that the fix pass "finds".
None means nothing to fix. None means nothing to fix.
Returns (bazel_build_calls, patch_was_applied, raised_exception). Returns (bazel_build_calls, patch_apply_count, raised_exception).
""" """
with tempfile.TemporaryDirectory() as tmpdir: with tempfile.TemporaryDirectory() as tmpdir:
tmpdir_path = pathlib.Path(tmpdir) tmpdir_path = pathlib.Path(tmpdir)
check_report_path = str(tmpdir_path / "check.out") check_report_path = str(tmpdir_path / "check.out")
fix_patch_path = str(tmpdir_path / "fix.patch")
check_events_path = str(tmpdir_path / "check_events") check_events_path = str(tmpdir_path / "check_events")
fix_events_path = str(tmpdir_path / "fix_events") fix_events_path = str(tmpdir_path / "fix_events")
fix_patch_paths: list[str] = []
if changed_files is None:
changed_files = ["foo.py"]
if check_report is not None: if check_report is not None:
pathlib.Path(check_report_path).write_text(check_report, encoding="utf-8") pathlib.Path(check_report_path).write_text(check_report, encoding="utf-8")
if fix_patch is not None: if fix_patch is not None:
pathlib.Path(fix_patch_path).write_text(fix_patch, encoding="utf-8") fix_patches = [fix_patch] if isinstance(fix_patch, str) else fix_patch
for index, fix_patch_contents in enumerate(fix_patches):
fix_patch_path = str(tmpdir_path / f"fix_{index}.patch")
pathlib.Path(fix_patch_path).write_text(
fix_patch_contents,
encoding="utf-8",
)
fix_patch_paths.append(fix_patch_path)
bazel_build_calls: list[list[str]] = [] bazel_build_calls: list[list[str]] = []
patch_applied = [False] patch_apply_count = [0]
def fake_run(args, **kwargs): def fake_run(args, **kwargs):
args = list(args) args = list(args)
if args[:2] == ["bazel", "query"]:
self.assertEqual(
args,
[
"bazel",
"query",
'kind(".* rule", same_pkg_direct_rdeps(//:foo.py))',
"--output=label",
],
)
return subprocess.CompletedProcess(args, 0, stdout="//:foo_lib\n")
if args[:2] == ["bazel", "build"]: if args[:2] == ["bazel", "build"]:
bazel_build_calls.append(args) bazel_build_calls.append(args)
return subprocess.CompletedProcess(args, 0) return subprocess.CompletedProcess(args, 0)
@ -218,7 +238,7 @@ class RunRulesLintTest(unittest.TestCase):
ext = args[3] ext = args[3]
events_path = args[-1] events_path = args[-1]
if ext == ".patch" and events_path == fix_events_path and fix_patch is not None: if ext == ".patch" and events_path == fix_events_path and fix_patch is not None:
stdout = fix_patch_path stdout = "\n".join(fix_patch_paths)
elif ( elif (
ext == ".out" ext == ".out"
and events_path == check_events_path and events_path == check_events_path
@ -229,7 +249,7 @@ class RunRulesLintTest(unittest.TestCase):
stdout = "" stdout = ""
return subprocess.CompletedProcess(args, 0, stdout=stdout) return subprocess.CompletedProcess(args, 0, stdout=stdout)
if args[0] == "patch": if args[0] == "patch":
patch_applied[0] = True patch_apply_count[0] += 1
if "stdin" in kwargs: if "stdin" in kwargs:
kwargs["stdin"].close() kwargs["stdin"].close()
return subprocess.CompletedProcess(args, 0) return subprocess.CompletedProcess(args, 0)
@ -247,6 +267,11 @@ class RunRulesLintTest(unittest.TestCase):
raised: lint.LinterFail | None = None raised: lint.LinterFail | None = None
with ( with (
mock.patch.object(
lint,
"_get_files_changed_since_fork_point",
return_value=changed_files,
),
mock.patch.object(lint.subprocess, "run", side_effect=fake_run), mock.patch.object(lint.subprocess, "run", side_effect=fake_run),
mock.patch.object(lint.tempfile, "mkstemp", side_effect=fake_mkstemp), mock.patch.object(lint.tempfile, "mkstemp", side_effect=fake_mkstemp),
mock.patch.object(lint.os, "close"), mock.patch.object(lint.os, "close"),
@ -257,7 +282,7 @@ class RunRulesLintTest(unittest.TestCase):
except lint.LinterFail as e: except lint.LinterFail as e:
raised = e raised = e
return bazel_build_calls, patch_applied[0], raised return bazel_build_calls, patch_apply_count[0], raised
def test_check_only_no_violations_runs_single_build_and_passes(self): def test_check_only_no_violations_runs_single_build_and_passes(self):
builds, patched, exc = self._run([]) builds, patched, exc = self._run([])
@ -276,7 +301,7 @@ class RunRulesLintTest(unittest.TestCase):
def test_fix_with_only_fixable_violations_applies_patch_and_passes(self): def test_fix_with_only_fixable_violations_applies_patch_and_passes(self):
patch_content = "--- a/foo.py\n+++ b/foo.py\n@@ -1 +1 @@\n-import os,sys\n+import os\n" patch_content = "--- a/foo.py\n+++ b/foo.py\n@@ -1 +1 @@\n-import os,sys\n+import os\n"
builds, patched, exc = self._run(["--fix"], fix_patch=patch_content) builds, patched, exc = self._run(["--fix", "foo.py"], fix_patch=patch_content)
self.assertIsNone(exc) self.assertIsNone(exc)
self.assertEqual(len(builds), 2) self.assertEqual(len(builds), 2)
# First build is the fix pass — must carry the fix flags. # First build is the fix pass — must carry the fix flags.
@ -285,11 +310,12 @@ class RunRulesLintTest(unittest.TestCase):
# Second build is the check pass — must not carry fix flags. # Second build is the check pass — must not carry fix flags.
self.assertNotIn("--@aspect_rules_lint//lint:fix", builds[1]) self.assertNotIn("--@aspect_rules_lint//lint:fix", builds[1])
self.assertTrue(patched) self.assertTrue(patched)
self.assertNotIn("//...", builds[0])
def test_fix_with_unfixable_violations_remaining_applies_patch_and_fails(self): def test_fix_with_unfixable_violations_remaining_applies_patch_and_fails(self):
patch_content = "--- a/foo.py\n+++ b/foo.py\n@@ -1 +1 @@\n-import os,sys\n+import os\n" patch_content = "--- a/foo.py\n+++ b/foo.py\n@@ -1 +1 @@\n-import os,sys\n+import os\n"
builds, patched, exc = self._run( builds, patched, exc = self._run(
["--fix"], ["--fix", "foo.py"],
fix_patch=patch_content, fix_patch=patch_content,
check_report="F841 local variable `result` is assigned to but never used", check_report="F841 local variable `result` is assigned to but never used",
) )
@ -301,7 +327,8 @@ class RunRulesLintTest(unittest.TestCase):
def test_fix_with_only_unfixable_violations_runs_two_builds_and_fails(self): def test_fix_with_only_unfixable_violations_runs_two_builds_and_fails(self):
builds, patched, exc = self._run( builds, patched, exc = self._run(
["--fix"], check_report="F841 local variable `result` is assigned to but never used" ["--fix", "foo.py"],
check_report="F841 local variable `result` is assigned to but never used",
) )
self.assertIsInstance(exc, lint.LinterFail) self.assertIsInstance(exc, lint.LinterFail)
self.assertEqual(len(builds), 2) self.assertEqual(len(builds), 2)
@ -309,11 +336,210 @@ class RunRulesLintTest(unittest.TestCase):
def test_dry_run_prints_patches_without_applying_them(self): def test_dry_run_prints_patches_without_applying_them(self):
patch_content = "--- a/foo.py\n+++ b/foo.py\n@@ -1 +1 @@\n-import os,sys\n+import os\n" patch_content = "--- a/foo.py\n+++ b/foo.py\n@@ -1 +1 @@\n-import os,sys\n+import os\n"
builds, patched, exc = self._run(["--fix", "--dry-run"], fix_patch=patch_content) builds, patched, exc = self._run(
["--fix", "--dry-run", "foo.py"],
fix_patch=patch_content,
)
self.assertIsNone(exc) self.assertIsNone(exc)
self.assertEqual(len(builds), 2) self.assertEqual(len(builds), 2)
self.assertFalse(patched) # patch -p1 must NOT be called in dry-run mode self.assertFalse(patched) # patch -p1 must NOT be called in dry-run mode
def test_fix_skips_duplicate_patch_contents(self):
patch_content = "--- a/foo.py\n+++ b/foo.py\n@@ -1 +1 @@\n-import os,sys\n+import os\n"
builds, patched, exc = self._run(
["--fix", "foo.py"],
fix_patch=[patch_content, patch_content],
)
self.assertIsNone(exc)
self.assertEqual(len(builds), 2)
self.assertEqual(patched, 1)
def test_no_target_fix_defaults_to_changed_rules_lint_files(self):
builds, patched, exc = self._run(["--fix"])
self.assertIsNone(exc)
self.assertEqual(len(builds), 2)
self.assertIn("//:foo_lib", builds[0])
self.assertIn("//:foo_lib", builds[1])
self.assertNotIn("//:foo.py", builds[0])
self.assertNotIn("//:foo.py", builds[1])
self.assertNotIn("//...", builds[0])
self.assertNotIn("//...", builds[1])
self.assertEqual(patched, 0)
class ExistingPythonFilesTest(unittest.TestCase):
def test_filters_deleted_python_paths(self):
files_to_lint = [
"buildscripts/sync_repo_with_copybara.py",
"buildscripts/copybara/sync_repo_with_copybara.py",
"docs/branching/README.md",
]
with mock.patch.object(
lint.os.path,
"exists",
side_effect=lambda path: path == "buildscripts/copybara/sync_repo_with_copybara.py",
):
self.assertEqual(
lint._get_existing_python_files(files_to_lint),
["buildscripts/copybara/sync_repo_with_copybara.py"],
)
def test_maps_main_repo_source_label_to_workspace_path(self):
self.assertEqual(
lint._source_label_to_workspace_path("//buildscripts/copybara:generate_evergreen.py"),
"buildscripts/copybara/generate_evergreen.py",
)
def test_maps_local_repository_source_label_to_workspace_path(self):
self.assertEqual(
lint._source_label_to_workspace_path("@bazel_rules_mongo//codeowners:parsers/foo.py"),
"buildscripts/bazel_rules_mongo/codeowners/parsers/foo.py",
)
def test_get_rules_lint_source_labels_for_changed_files(self):
self.assertEqual(
lint._get_rules_lint_source_labels_for_changed_files(
[
"buildscripts/copybara/generate_evergreen.py",
"buildscripts/bazel_rules_mongo/codeowners/parsers/owners_v1.py",
"etc/evergreen.yml",
],
[
"//buildscripts/copybara:generate_evergreen.py",
"@bazel_rules_mongo//codeowners:parsers/owners_v1.py",
"//etc:evergreen.yml",
],
),
[
"//buildscripts/copybara:generate_evergreen.py",
"@bazel_rules_mongo//codeowners:parsers/owners_v1.py",
],
)
def test_maps_canonical_local_repository_source_label_to_workspace_path(self):
self.assertEqual(
lint._source_label_to_workspace_path("@@bazel_rules_mongo//codeowners:parsers/foo.py"),
"buildscripts/bazel_rules_mongo/codeowners/parsers/foo.py",
)
def test_get_rules_lint_targets_for_source_labels_queries_owner_rules(self):
def fake_run(args, **kwargs):
self.assertEqual(
args,
[
"bazel",
"query",
'kind(".* rule", same_pkg_direct_rdeps(//buildscripts/copybara:generate_evergreen.py))',
"--output=label",
],
)
self.assertTrue(kwargs["capture_output"])
self.assertTrue(kwargs["text"])
self.assertFalse(kwargs["check"])
return subprocess.CompletedProcess(
args,
0,
stdout=(
"//buildscripts/copybara:generate_evergreen\n"
"//buildscripts/copybara:generate_evergreen_test\n"
),
)
with mock.patch.object(lint.subprocess, "run", side_effect=fake_run):
self.assertEqual(
lint._get_rules_lint_targets_for_source_labels(
"bazel",
["//buildscripts/copybara:generate_evergreen.py"],
),
[
"//buildscripts/copybara:generate_evergreen",
"//buildscripts/copybara:generate_evergreen_test",
],
)
def test_get_rules_lint_targets_for_changed_files_returns_owner_rules(self):
with mock.patch.object(
lint,
"_get_rules_lint_targets_for_source_labels",
return_value=["//buildscripts/copybara:generate_evergreen"],
) as mock_get_targets:
self.assertEqual(
lint._get_rules_lint_targets_for_changed_files(
"bazel",
["buildscripts/copybara/generate_evergreen.py"],
["//buildscripts/copybara:generate_evergreen.py"],
),
["//buildscripts/copybara:generate_evergreen"],
)
mock_get_targets.assert_called_once_with(
"bazel",
["//buildscripts/copybara:generate_evergreen.py"],
)
class CopybaraGeneratedEvergreenCheckTest(unittest.TestCase):
def test_runs_for_lint_all(self):
self.assertTrue(lint._should_check_copybara_generated_evergreen(True, []))
def test_runs_for_copybara_config_change(self):
self.assertTrue(
lint._should_check_copybara_generated_evergreen(
False,
["buildscripts/copybara/v8_2.sky"],
)
)
def test_runs_for_generated_copybara_yaml_change(self):
self.assertTrue(
lint._should_check_copybara_generated_evergreen(
False,
["etc/evergreen_yml_components/copybara/copybara_gen.yml"],
)
)
def test_skips_unrelated_files(self):
self.assertFalse(
lint._should_check_copybara_generated_evergreen(
False,
["src/mongo/db/query/query.cpp"],
)
)
def test_check_mode_runs_generated_yaml_check(self):
runner = lint.LintRunner(keep_going=False, bazel_bin="bazel")
with mock.patch.object(runner, "run_bazel", return_value=True) as mock_run_bazel:
with contextlib.redirect_stdout(io.StringIO()):
runner.check_copybara_generated_evergreen(fix=False, dry_run=False)
mock_run_bazel.assert_called_once_with(
"//buildscripts/copybara:generate_evergreen",
["--check"],
)
def test_fix_mode_runs_generated_yaml_writer(self):
runner = lint.LintRunner(keep_going=False, bazel_bin="bazel")
with mock.patch.object(runner, "run_bazel", return_value=True) as mock_run_bazel:
with contextlib.redirect_stdout(io.StringIO()):
runner.check_copybara_generated_evergreen(fix=True, dry_run=False)
mock_run_bazel.assert_called_once_with("//buildscripts/copybara:generate_evergreen")
def test_fix_dry_run_keeps_generated_yaml_check_only(self):
runner = lint.LintRunner(keep_going=False, bazel_bin="bazel")
with mock.patch.object(runner, "run_bazel", return_value=True) as mock_run_bazel:
with contextlib.redirect_stdout(io.StringIO()):
runner.check_copybara_generated_evergreen(fix=True, dry_run=True)
mock_run_bazel.assert_called_once_with(
"//buildscripts/copybara:generate_evergreen",
["--check"],
)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()

View File

@ -2,18 +2,21 @@ import glob
import os import os
import pathlib import pathlib
from functools import cache from functools import cache
from typing import Any
import yaml import yaml
# Parser for OWNERS.yml files version 1.0.0 # Parser for OWNERS.yml files version 1.0.0
class OwnersParserV1: class OwnersParserV1:
def parse(self, directory: str, owners_file_path: str, contents: dict[str, any]) -> list[str]: def parse(self, directory: str, owners_file_path: str, contents: dict[str, Any]) -> list[str]:
lines = [] lines = []
no_parent_owners = False no_parent_owners = False
no_auto_approver = False
if "options" in contents: if "options" in contents:
options = contents["options"] options = contents["options"]
no_parent_owners = "no_parent_owners" in options and options["no_parent_owners"] no_parent_owners = "no_parent_owners" in options and options["no_parent_owners"]
no_auto_approver = "no_auto_approver" in options and options["no_auto_approver"]
if no_parent_owners: if no_parent_owners:
# Specfying no owners will ensure that no file in this directory has an owner unless it # Specfying no owners will ensure that no file in this directory has an owner unless it
@ -63,7 +66,7 @@ class OwnersParserV1:
else: else:
process_owner(approver) process_owner(approver)
# Add the auto revert bot # Add the auto revert bot
if self.should_add_auto_approver(): if self.should_add_auto_approver() and not no_auto_approver:
process_owner("svc-auto-approve-bot") process_owner("svc-auto-approve-bot")
lines.append(self.get_owner_line(directory, pattern, owners)) lines.append(self.get_owner_line(directory, pattern, owners))

View File

@ -1,907 +0,0 @@
"""Module for syncing a repo with Copybara and setting up configurations."""
from __future__ import annotations
import argparse
import fileinput
import os
import re
import shutil
import subprocess
import sys
from datetime import datetime
from pathlib import Path
from typing import NamedTuple, Optional
from github import GithubIntegration
from buildscripts.util.read_config import read_config_file
from evergreen.api import RetryingEvergreenApi
# this will be populated by the github jwt tokens (1 hour lifetimes)
REDACTED_STRINGS = []
# This is the list of file globs to check for
# after the dryrun has created the destination output tree
EXCLUDED_PATTERNS = [
"src/mongo/db/modules/",
"buildscripts/modules/",
".github/workflows/",
"src/third_party/private/",
"sbom.private.json",
".agents/",
".cursor/",
".claude/",
"AGENTS.md",
"CLAUDE.md",
".github/CODEOWNERS",
"monguard/",
"etc/evergreen_yml_components/",
]
ACCEPTABLE_ERROR_MESSAGES = [
# Indicates the two repositories are identical.
"No new changes to import for resolved ref",
# Indicates differences exist but no changes affect the destination (for example: exclusion rules).
"Iterative workflow produced no changes in the destination for resolved ref",
# Indicates commits have already been synced over with another copybara task.
"Updates were rejected because the remote contains work that you do",
]
PROD_PINNED_REF_VARIABLE = "prodRefForPinnedSourceCommit"
# Commit hash of Copybara to use (v20251110)
COPYBARA_COMMIT_HASH = "3f050c9e08b84aeda98875bf1b02a3288d351333"
class CopybaraRepoConfig(NamedTuple):
"""Copybara source and destination repo sync configuration."""
git_url: Optional[str] = None
repo_name: Optional[str] = None
branch: Optional[str] = None
class CopybaraConfig(NamedTuple):
"""Copybara sync configuration."""
source: Optional[CopybaraRepoConfig] = None
destination: Optional[CopybaraRepoConfig] = None
@classmethod
def empty(cls) -> CopybaraConfig:
return cls(
source=None,
destination=None,
)
@classmethod
def from_copybara_sky_file(cls, workflow: str, branch: str, file_path: str) -> CopybaraConfig:
with open(file_path, "r") as file:
content = file.read()
# Drop inline comments so key/value regexes do not match commented-out config.
content = re.sub(r"#.*", "", content)
# Capture the URL string assigned to sourceUrl (inside double quotes).
source_url_match = re.search(r'sourceUrl = "(.+?)"', content)
if source_url_match is None:
return cls.empty()
if workflow == "prod":
destination_url_match = re.search(r'prodUrl = "(.+?)"', content)
if destination_url_match is None:
return cls.empty()
else:
destination_url_match = re.search(r'testUrl = "(.+?)"', content)
if destination_url_match is None:
return cls.empty()
# Extract "owner/repo" from a git remote URL, e.g. ".../10gen/mongo.git".
repo_name_regex = re.compile(r"([^:/]+/[^:/]+)\.git")
source_git_url = source_url_match.group(1)
source_repo_name_match = repo_name_regex.search(source_git_url)
if source_repo_name_match is None:
return cls.empty()
destination_git_url = destination_url_match.group(1)
destination_repo_name_match = repo_name_regex.search(destination_git_url)
if destination_repo_name_match is None:
return cls.empty()
return cls(
source=CopybaraRepoConfig(
git_url=source_git_url,
repo_name=source_repo_name_match.group(1),
branch=branch,
),
destination=CopybaraRepoConfig(
git_url=destination_git_url,
repo_name=destination_repo_name_match.group(1),
branch=branch,
),
)
def is_complete(self) -> bool:
return self.source is not None and self.destination is not None
def run_command(command):
redacted_command = redact_secrets(command)
print(redacted_command)
try:
process = subprocess.Popen(
command,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT, # Merge stderr into stdout
text=True,
bufsize=1,
)
output_lines = []
for line in process.stdout:
safe_line = redact_secrets(line)
print(safe_line, end="")
output_lines.append(safe_line)
full_output = "".join(output_lines)
process.wait()
if process.returncode != 0:
# Attach output so except block can read it
raise subprocess.CalledProcessError(
process.returncode, redacted_command, output=full_output
)
return full_output
except subprocess.CalledProcessError:
# Let main handle it
raise
def redact_secrets(text: str) -> str:
"""Mask known token values and token-like credentials in GitHub URLs."""
# First redact exact known values (runtime-generated app tokens).
for secret in filter(None, REDACTED_STRINGS):
text = text.replace(secret, "<REDACTED>")
# Then redact any tokenized GitHub URL credential to catch unknown/ambient secrets
# (for example Evergreen-generated credentials not in REDACTED_STRINGS).
return re.sub(
r"(https://x-access-token:)[^@\s]+(@github\.com)",
r"\1<REDACTED>\2",
text,
)
def create_mongodb_bot_gitconfig():
"""Create the mongodb-bot.gitconfig file with the desired content."""
content = """
[user]
name = MongoDB Bot
email = mongo-bot@mongodb.com
"""
gitconfig_path = os.path.expanduser("~/mongodb-bot.gitconfig")
with open(gitconfig_path, "w") as file:
file.write(content)
print("mongodb-bot.gitconfig file created.")
def get_installation_access_token(
app_id: int, private_key: str, installation_id: int
) -> Optional[str]: # noqa: D407,D413
"""
Obtain an installation access token using JWT.
Args:
- app_id (int): The application ID for GitHub App.
- private_key (str): The private key associated with the GitHub App.
- installation_id (int): The installation ID of the GitHub App for a particular account.
Returns
- Optional[str]: The installation access token. Returns `None` if there's an error obtaining the token.
"""
integration = GithubIntegration(app_id, private_key)
auth = integration.get_access_token(installation_id)
if auth:
return auth.token
else:
print("Error obtaining installation token")
return None
def send_failure_message_to_slack(expansions, error_message):
"""
Send a failure message to a specific Slack channel when the Copybara task fails.
:param expansions: Dictionary containing various expansion data.
"""
truncated_error_message = error_message[0:200]
task_id = expansions.get("task_id", None)
error_msg = "\n".join(
[
"Evergreen task '* Copybara Sync Between Repos' failed",
"See troubleshooting doc <http://go/copybara-troubleshoot|here>.",
f"See task: <https://spruce.mongodb.com/task/{task_id}|here>.",
f"Error message: {truncated_error_message}"
+ ("... (truncated)" if len(error_message) > 200 else ""),
]
)
evg_api = RetryingEvergreenApi.get_api(config_file=".evergreen.yml")
evg_api.send_slack_message(
target="#devprod-build-automation",
msg=error_msg,
)
def check_destination_branch_exists(copybara_config: CopybaraConfig) -> bool:
"""
Check if a specific branch exists in the destination git repository.
Args:
- copybara_config (CopybaraConfig): Copybara configuration.
Returns
- bool: `True` if the branch exists in the destination repository, `False` otherwise.
"""
command = (
f"git ls-remote {copybara_config.destination.git_url} {copybara_config.destination.branch}"
)
output = run_command(command)
return copybara_config.destination.branch in output
def find_matching_commit(dir_source_repo: str, dir_destination_repo: str) -> Optional[str]:
"""
Finds a matching commit in the destination repository based on the commit hash from the source repository.
Args:
- dir_source_repo: The directory of the source repository.
- dir_destination_repo: The directory of the destination repository.
Returns
The hash of the matching commit if found; otherwise, prints a message and returns None.
"""
# Navigate to the source repository
os.chdir(dir_source_repo)
# Find the latest commit hash.
source_hash = run_command('git log --pretty=format:"%H" -1')
# Attempt to find a matching commit in the destination repository.
commit = run_command(
f'git --git-dir={dir_destination_repo}/.git log -1 --pretty=format:"%H" --grep "GitOrigin-RevId: {source_hash}"'
)
first_commit = run_command("git rev-list --max-parents=0 HEAD")
# Loop until a matching commit is found or the first commit is reached.
while len(commit.splitlines()) != 1:
current_commit = run_command('git log --pretty=format:"%H" -1')
if current_commit.strip() == first_commit.strip():
print(
"No matching commit found, and have reverted to the first commit of the repository."
)
return None
# Revert to the previous commit in the source repository and try again.
run_command("git checkout HEAD~1")
source_hash = run_command('git log --pretty=format:"%H" -1')
# Attempt to find a matching commit again in the destination repository.
commit = run_command(
f'git --git-dir={dir_destination_repo}/.git log -1 --pretty=format:"%H" --grep "GitOrigin-RevId: {source_hash}"'
)
return commit
def has_only_destination_repo_remote(repo_name: str):
"""
Check if the current directory's Git repository only contains the destination repository remote URL.
Returns
bool: True if the repository only contains the destination repository remote URL, False otherwise.
"""
git_config_path = os.path.join(".git", "config")
with open(git_config_path, "r") as f:
config_content = f.read()
# Define a regular expression pattern to match the '{owner}/{repo}.git'
url_pattern = r"url\s*=\s*(.*?\.git\s*)"
matches = re.findall(url_pattern, config_content)
if len(matches) == 1 and matches[0].strip().endswith(f"{repo_name}.git"):
return True
print(
f"The current directory's Git repository contains not only the '{repo_name}.git' remote URL."
)
return False
def push_branch_to_destination_repo(
destination_repo_dir: str, copybara_config: CopybaraConfig, branching_off_commit: str
):
"""
Pushes a new branch to the remote repository after ensuring it branches off the public repository.
Args:
destination_repo_dir (str): Path to the cloned destination repository.
copybara_config (CopybaraConfig): Copybara configuration.
branching_off_commit (str): The commit hash of the matching commit in the destination repository.
Raises
Exception: If the new branch is not branching off the destination repository.
"""
os.chdir(destination_repo_dir)
# Check the current repo has only destination repository remote.
if not has_only_destination_repo_remote(copybara_config.destination.repo_name):
raise Exception(f"{destination_repo_dir} git repo has not only the destination repo remote")
# Confirm the top commit is matching the found commit before pushing
new_branch_top_commit = run_command('git log --pretty=format:"%H" -1')
if not new_branch_top_commit == branching_off_commit:
raise Exception(
"The new branch top commit does not match the branching_off_commit. Aborting push."
)
# Confirming whether the commit exists in the destination repository to ensure
# we are not pushing anything that isn't already in the destination repository.
# run_command will raise an exception if the commit is not found in the destination branch.
run_command(f"git branch -r --contains {new_branch_top_commit}")
# Push the new branch to the destination repository
run_command(
f"git push {copybara_config.destination.git_url} {copybara_config.destination.branch}"
)
def handle_failure(expansions, error_message, output_logs):
if not has_acceptable_copybara_message(output_logs):
send_failure_message_to_slack(expansions, error_message)
def create_branch_from_matching_commit(copybara_config: CopybaraConfig) -> None:
"""
Create a new branch in the copybara destination repository based on a matching commit found in
source repository and destination repository.
Args:
copybara_config (CopybaraConfig): Copybara configuration.
"""
# Save original directory
original_dir = os.getcwd()
try:
# Create a unique directory based on the current timestamp.
working_dir = os.path.join(
original_dir, "make_branch_attempt_" + datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
)
os.makedirs(working_dir, exist_ok=True)
os.chdir(working_dir)
# Clone the specified branch of the source repository and master of destination repository
cloned_source_repo_dir = os.path.join(working_dir, "source-repo")
cloned_destination_repo_dir = os.path.join(working_dir, "destination-repo")
run_command(
f"git clone -b {copybara_config.source.branch}"
f" {copybara_config.source.git_url} {cloned_source_repo_dir}"
)
run_command(
f"git clone {copybara_config.destination.git_url} {cloned_destination_repo_dir}"
)
# Find matching commits to branching off
commit = find_matching_commit(cloned_source_repo_dir, cloned_destination_repo_dir)
if commit is not None:
# Delete the cloned_source_repo_dir folder
shutil.rmtree(cloned_source_repo_dir)
if os.path.exists(cloned_source_repo_dir):
raise Exception(cloned_source_repo_dir + ": did not get removed")
# Once a matching commit is found, create a new branch based on it.
os.chdir(cloned_destination_repo_dir)
run_command(f"git checkout -b {copybara_config.destination.branch} {commit}")
# Push the new branch to the remote repository
push_branch_to_destination_repo(cloned_destination_repo_dir, copybara_config, commit)
else:
print(
f"Could not find matching commits between {copybara_config.destination.repo_name}/master"
f" and {copybara_config.source.repo_name}/{copybara_config.source.branch} to branching off"
)
sys.exit(1)
except Exception as err:
print(f"An error occurred when creating destination branch: {err}")
raise
finally:
# Change back to the original directory
os.chdir(original_dir)
def is_current_repo_origin(expected_repo: str) -> bool:
"""Check if the current repo's origin matches 'owner/repo'."""
try:
url = run_command("git config --get remote.origin.url").strip()
except subprocess.CalledProcessError:
return False
# Accept SSH/HTTPS-style remotes and capture the trailing "owner/repo" before ".git".
m = re.search(r"([^/:]+/[^/:]+)\.git$", url)
return bool(m and m.group(1) == expected_repo)
def sky_file_has_version_id(config_file: str, version_id: str) -> bool:
contents = Path(config_file).read_text()
return str(version_id) in contents
def branch_exists_remote(remote_url: str, branch_name: str) -> bool:
"""Return True if branch exists on the remote."""
try:
output = run_command(f"git ls-remote --heads {remote_url} {branch_name}")
return bool(output.strip())
except subprocess.CalledProcessError:
return False
def canonicalize_excluded_pattern(pattern: str) -> str:
"""
Canonicalize exclusion patterns for parity checks and local matching.
Supported forms:
- Root-relative exact file path, e.g. "AGENTS.md"
- Root-relative directory subtree, e.g. "monguard/" or "monguard/**"
"""
normalized = pattern.strip()
if not normalized:
print("ERROR: Found empty exclusion pattern.")
sys.exit(1)
is_directory_pattern = False
if normalized.endswith("/**"):
normalized = normalized.removesuffix("/**")
is_directory_pattern = True
if normalized.endswith("/"):
normalized = normalized.rstrip("/")
is_directory_pattern = True
if not normalized:
print(f"ERROR: Invalid exclusion pattern '{pattern}'.")
sys.exit(1)
# Keep local dry-run semantics explicit and aligned with what this script can evaluate.
if any(char in normalized for char in ["*", "?", "[", "]", "{", "}"]):
print(
"ERROR: Unsupported exclusion pattern "
f"'{pattern}'. Only exact paths and directory subtrees are supported."
)
sys.exit(1)
return f"{normalized}/" if is_directory_pattern else normalized
def get_checkout_relative_path(file_path: Path, preview_dir: Path) -> str:
"""Return a POSIX-style path rooted at the checkout directory if present."""
posix_parts = file_path.parts
if "checkout" in posix_parts:
checkout_index = posix_parts.index("checkout")
return Path(*posix_parts[checkout_index + 1 :]).as_posix()
return file_path.relative_to(preview_dir).as_posix()
def matches_excluded_pattern(path_in_checkout: str, pattern: str) -> bool:
"""
Match exclusions with checkout-root anchoring.
Directory patterns (ending in "/") match only that directory subtree at repo root.
File patterns (without trailing slash) match exact relative paths.
"""
canonical_pattern = canonicalize_excluded_pattern(pattern)
if canonical_pattern.endswith("/"):
prefix = canonical_pattern.rstrip("/")
return path_in_checkout == prefix or path_in_checkout.startswith(prefix + "/")
return path_in_checkout == canonical_pattern
def extract_sky_excluded_patterns(config_file: str) -> set[str]:
contents = Path(config_file).read_text()
# Remove single-line comments so commented-out exclude entries are ignored.
contents = re.sub(r"#.*", "", contents)
# Find the origin_files = glob(..., exclude=[...]) block and capture only the list body.
# DOTALL allows this to match when the glob call spans multiple lines.
origin_files_match = re.search(
r"origin_files\s*=\s*glob\((?:.|\n)*?exclude\s*=\s*\[(.*?)\]\s*\)",
contents,
flags=re.DOTALL,
)
if origin_files_match is None:
print(f"ERROR: Could not locate origin_files exclude list in {config_file}")
sys.exit(1)
excludes = set()
exclude_list = origin_files_match.group(1)
# Each exclude entry is a quoted string; capture the contents between quotes.
for pattern_match in re.finditer(r'"([^"]+)"', exclude_list):
excludes.add(pattern_match.group(1))
return excludes
def has_acceptable_copybara_message(output_logs: Optional[str]) -> bool:
return bool(
output_logs
and any(
acceptable_message in output_logs for acceptable_message in ACCEPTABLE_ERROR_MESSAGES
)
)
def check_script_exclusions_match_sky(config_file: str):
sky_excluded = {
canonicalize_excluded_pattern(pattern)
for pattern in extract_sky_excluded_patterns(config_file)
}
script_excluded = {canonicalize_excluded_pattern(pattern) for pattern in EXCLUDED_PATTERNS}
missing = sorted(script_excluded - sky_excluded)
extra = sorted(sky_excluded - script_excluded)
if missing or extra:
if missing:
print(f"ERROR: Missing required exclusions in {config_file}: " + ", ".join(missing))
if extra:
print(f"ERROR: Unexpected exclusions in {config_file}: " + ", ".join(extra))
sys.exit(1)
def pin_prod_workflow_ref_to_commit(config_file: str, source_commit_sha: str):
contents = Path(config_file).read_text()
# Match exactly one assignment line for PROD_PINNED_REF_VARIABLE.
# Group 1 preserves leading indentation so formatting stays unchanged on replacement.
variable_pattern = rf"^(\s*){re.escape(PROD_PINNED_REF_VARIABLE)}\s*=\s*\"[^\"]*\"\s*$"
if not re.search(variable_pattern, contents, flags=re.MULTILINE):
print(
f"ERROR: Could not pin prod workflow ref in {config_file}. "
f'Expected to find variable assignment for "{PROD_PINNED_REF_VARIABLE}".'
)
sys.exit(1)
updated_contents = re.sub(
variable_pattern,
lambda m: f'{m.group(1)}{PROD_PINNED_REF_VARIABLE} = "{source_commit_sha}"',
contents,
flags=re.MULTILINE,
)
Path(config_file).write_text(updated_contents)
def get_prod_pinned_source_ref(config_file: str) -> str:
contents = Path(config_file).read_text()
variable_pattern = rf'^\s*{re.escape(PROD_PINNED_REF_VARIABLE)}\s*=\s*"([^"]+)"\s*(?:#.*)?$'
match = re.search(variable_pattern, contents, flags=re.MULTILINE)
if match is None:
print(
f"ERROR: Could not read pinned prod source ref from {config_file}. "
f'Expected to find variable assignment for "{PROD_PINNED_REF_VARIABLE}".'
)
sys.exit(1)
pinned_ref = match.group(1).strip()
if not pinned_ref:
print(
f"ERROR: {PROD_PINNED_REF_VARIABLE} in {config_file} is empty. "
"Expected a branch name such as master or v8.0."
)
sys.exit(1)
return pinned_ref
def get_prod_copybara_config_from_master(current_dir: str) -> str:
source_config_file = os.path.join(current_dir, "copy.bara.sky")
source_ref = get_prod_pinned_source_ref(source_config_file)
run_command(f"git fetch origin {source_ref}")
source_commit_sha = run_command(f"git rev-parse origin/{source_ref}").strip()
config_file = os.path.join(current_dir, "tmp_copybara_config_from_master.sky")
sky_contents = run_command(f"git --no-pager show {source_commit_sha}:copy.bara.sky")
Path(config_file).write_text(sky_contents)
pin_prod_workflow_ref_to_commit(config_file, source_commit_sha)
return config_file
def delete_remote_branch(remote_url: str, branch_name: str):
"""Delete branch from remote if it exists."""
if branch_exists_remote(remote_url, branch_name):
print(f"Deleting remote branch {branch_name} from {remote_url}")
run_command(f"git push {remote_url} --delete {branch_name}")
def push_test_branches(copybara_config, expansions):
"""Push test branch with Evergreen patch changes to source, and clean revision to destination."""
# Safety checks
if copybara_config.source.branch != copybara_config.destination.branch:
print(
f"ERROR: test branches must match: source={copybara_config.source.branch} dest={copybara_config.destination.branch}"
)
sys.exit(1)
if not copybara_config.source.branch.startswith(
"copybara_test_branch"
) or not copybara_config.destination.branch.startswith("copybara_test_branch"):
print(f"ERROR: can not push non copybara test branch: {copybara_config.source.branch}")
sys.exit(1)
if not is_current_repo_origin("10gen/mongo"):
print("Refusing to push copybara_test_branch to non 10gen/mongo repo")
sys.exit(1)
# First, delete stale remote branches if present
delete_remote_branch(copybara_config.source.git_url, copybara_config.source.branch)
delete_remote_branch(copybara_config.destination.git_url, copybara_config.destination.branch)
# --- Push patched branch to DEST repo (local base Evergreen state) ---
run_command(f"git remote add dest_repo {copybara_config.destination.git_url}")
run_command(f"git checkout -B {copybara_config.destination.branch}")
run_command(f"git push dest_repo {copybara_config.destination.branch}")
# --- Push patched branch to SOURCE repo (local patched Evergreen state) ---
run_command(f'git commit -am "Evergreen patch for version_id {expansions["version_id"]}"')
run_command(f"git remote add source_repo {copybara_config.source.git_url}")
run_command(f"git push source_repo {copybara_config.source.branch}")
def main():
global REDACTED_STRINGS
"""Clone the Copybara repo, build its Docker image, and set up and run migrations."""
parser = argparse.ArgumentParser()
parser.add_argument(
"--expansions-file",
"-e",
default="../expansions.yml",
help="Location of expansions file generated by evergreen.",
)
parser.add_argument(
"--workflow",
default="test",
choices=["prod", "test"],
help="The copybara workflow to use (test is a dryrun)",
)
args = parser.parse_args()
# Check if the copybara directory already exists
if os.path.exists("copybara"):
print("Copybara directory already exists.")
else:
run_command("git clone https://github.com/10gen/copybara.git")
# Checkout the specific commit of Copybara we want to use
run_command(f"cd copybara && git checkout {COPYBARA_COMMIT_HASH}")
# Navigate to the Copybara directory and build the Copybara Docker image
run_command("cd copybara && docker build --rm -t copybara_container .")
# Read configurations
expansions = read_config_file(args.expansions_file)
token_mongodb_mongo = get_installation_access_token(
expansions["app_id_copybara_syncer_after_fix"],
expansions["private_key_copybara_syncer"],
expansions["installation_id_copybara_syncer"],
)
token_10gen_mongo = get_installation_access_token(
expansions["app_id_copybara_syncer_10gen"],
expansions["private_key_copybara_syncer_10gen"],
expansions["installation_id_copybara_syncer_10gen"],
)
REDACTED_STRINGS += [token_mongodb_mongo, token_10gen_mongo]
tokens_map = {
"https://github.com/mongodb/mongo.git": token_mongodb_mongo,
"https://github.com/10gen/mongo.git": token_10gen_mongo,
"https://github.com/10gen/mongo-copybara.git": token_10gen_mongo,
}
# Create the mongodb-bot.gitconfig file as necessary.
create_mongodb_bot_gitconfig()
current_dir = os.getcwd()
if args.workflow == "test":
test_args = ["--init-history", f"--last-rev={expansions['revision']}"]
branch = f"copybara_test_branch_{expansions['version_id']}"
test_branch_str = 'testBranch = "copybara_test_branch"'
config_file = f"{current_dir}/copy.bara.sky"
elif args.workflow == "prod":
if expansions["is_patch"] == "true":
print("ERROR: prod workflow should not be run in patch builds!")
sys.exit(1)
test_args = []
branch = "master"
config_file = get_prod_copybara_config_from_master(current_dir)
else:
raise Exception(f"invalid workflow {args.workflow}")
# Overwrite repo urls in copybara config in-place
with fileinput.FileInput(config_file, inplace=True) as file:
for line in file:
token = None
# Replace GitHub URL with token-authenticated URL
for repo, value in tokens_map.items():
if repo in line:
token = value
break # no need to check other repos
if token:
print(
line.replace(
"https://github.com",
f"https://x-access-token:{token}@github.com",
),
end="",
)
# Update testBranch in .sky file if running test workflow
elif args.workflow == "test" and test_branch_str in line:
print(
line.replace(
test_branch_str,
test_branch_str[:-1] + f"_{expansions['version_id']}\"\n",
),
end="",
)
else:
print(line, end="")
if args.workflow == "test":
if not sky_file_has_version_id(config_file, expansions["version_id"]):
print(
f"Copybara test branch in {config_file} does not contain version_id {expansions['version_id']}"
)
sys.exit(1)
copybara_config = CopybaraConfig.from_copybara_sky_file(args.workflow, branch, config_file)
if args.workflow == "test":
push_test_branches(copybara_config, expansions)
# Create destination branch if it does not exist
if not copybara_config.is_complete():
print("ERROR!!!")
print(
f"ERROR!!! Source or destination configuration could not be parsed from the {config_file}."
)
print("ERROR!!!")
sys.exit(1)
else:
if args.workflow == "prod":
if not check_destination_branch_exists(copybara_config):
create_branch_from_matching_commit(copybara_config)
print(
f"New branch named '{copybara_config.destination.branch}' has been created"
f" for the '{copybara_config.destination.repo_name}' repo"
)
else:
print(
f"The branch named '{copybara_config.destination.branch}' already exists"
f" in the '{copybara_config.destination.repo_name}' repo."
)
os.makedirs("tmp_copybara")
docker_cmd = [
"docker",
"run",
"--rm",
"-v",
f"{os.path.expanduser('~/.ssh')}:/root/.ssh",
"-v",
f"{os.path.expanduser('~/mongodb-bot.gitconfig')}:/root/.gitconfig",
"-v",
f"{config_file}:/usr/src/app/copy.bara.sky",
"-v",
f"{os.getcwd()}/tmp_copybara:/tmp/copybara-preview",
"copybara_container",
"migrate",
"/usr/src/app/copy.bara.sky",
args.workflow,
"-v",
"--output-root=/tmp/copybara-preview",
]
try:
run_command(" ".join(docker_cmd + ["--dry-run"] + test_args))
found_forbidden = False
preview_dir = Path("tmp_copybara")
check_script_exclusions_match_sky(config_file)
for file_path in preview_dir.rglob("*"):
if file_path.is_file():
path_in_checkout = get_checkout_relative_path(file_path, preview_dir)
for pattern in EXCLUDED_PATTERNS:
if matches_excluded_pattern(path_in_checkout, pattern):
print(f"ERROR: Found excluded path: {file_path}")
found_forbidden = True
if found_forbidden:
sys.exit(1)
except subprocess.CalledProcessError as err:
if has_acceptable_copybara_message(err.output):
print("Copybara dry-run reported an acceptable no-op result. Skipping sync.")
return
if args.workflow == "prod":
error_message = f"Copybara failed with error: {err.returncode}"
handle_failure(expansions, error_message, err.output)
raise
# Write newly generated tokens to the config file to make sure
# the token isn't expired by the time the dry-run finishes
token_mongodb_mongo = get_installation_access_token(
expansions["app_id_copybara_syncer_after_fix"],
expansions["private_key_copybara_syncer"],
expansions["installation_id_copybara_syncer"],
)
token_10gen_mongo = get_installation_access_token(
expansions["app_id_copybara_syncer_10gen"],
expansions["private_key_copybara_syncer_10gen"],
expansions["installation_id_copybara_syncer_10gen"],
)
REDACTED_STRINGS += [token_mongodb_mongo, token_10gen_mongo]
tokens_map = {
"mongodb/mongo.git": token_mongodb_mongo,
"10gen/mongo.git": token_10gen_mongo,
"10gen/mongo-copybara.git": token_10gen_mongo,
}
with fileinput.FileInput(config_file, inplace=True) as file:
for line in file:
token = None
for repo, value in tokens_map.items():
if repo in line:
token = value
break
if token:
print(
# Replace any existing GitHub token in the URL while preserving the rest of the line.
re.sub(
r"https://x-access-token:.*@github.com",
f"https://x-access-token:{token}@github.com",
line,
),
end="",
)
else:
print(line, end="")
# dry run successful, time to push
try:
run_command(" ".join(docker_cmd + test_args))
except subprocess.CalledProcessError as err:
if has_acceptable_copybara_message(err.output):
print("Copybara migrate reported an acceptable no-op result.")
return
if args.workflow == "prod":
error_message = f"Copybara failed with error: {err.returncode}"
handle_failure(expansions, error_message, err.output)
raise
if __name__ == "__main__":
main()

View File

@ -1,6 +1,6 @@
load("@poetry//:dependencies.bzl", "dependency") load("@poetry//:dependencies.bzl", "dependency")
load("//bazel:mongo_script_rules.bzl", "mongo_toolchain_py_cxx_test") load("//bazel:mongo_script_rules.bzl", "mongo_toolchain_py_cxx_test")
load("@rules_python//python:defs.bzl", "py_library") load("@rules_python//python:defs.bzl", "py_library", "py_test")
mongo_toolchain_py_cxx_test( mongo_toolchain_py_cxx_test(
name = "test_clang_tidy", name = "test_clang_tidy",
@ -19,6 +19,21 @@ mongo_toolchain_py_cxx_test(
], ],
) )
py_test(
name = "test_sync_repo_with_copybara",
srcs = [
"test_sync_repo_with_copybara.py",
],
data = [
"//buildscripts/copybara:copybara_config_files",
"//etc:evergreen_yml_components/copybara/copybara_gen.yml",
],
visibility = ["//visibility:public"],
deps = [
"//buildscripts/copybara",
],
)
# TODO(SERVER-105817): The following library is autogenerated, please split these out into individual python targets # TODO(SERVER-105817): The following library is autogenerated, please split these out into individual python targets
py_library( py_library(
name = "all_python_files", name = "all_python_files",

View File

@ -0,0 +1,93 @@
"""Tests for OWNERS auto-approver generation behavior."""
import importlib.util
import os
import sys
import types
import unittest
from copy import deepcopy
from pathlib import Path
from unittest.mock import patch
CODEOWNERS_ROOT = Path(__file__).resolve().parents[1] / "bazel_rules_mongo" / "codeowners"
PARSERS_ROOT = CODEOWNERS_ROOT / "parsers"
def _load_module(module_name: str, module_path: Path):
spec = importlib.util.spec_from_file_location(module_name, module_path)
if spec is None or spec.loader is None:
raise RuntimeError(f"Failed to load module spec for {module_path}")
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
return module
codeowners_package = types.ModuleType("codeowners")
parsers_package = types.ModuleType("codeowners.parsers")
sys.modules.setdefault("codeowners", codeowners_package)
sys.modules.setdefault("codeowners.parsers", parsers_package)
owners_v1_module = _load_module("codeowners.parsers.owners_v1", PARSERS_ROOT / "owners_v1.py")
sys.modules["codeowners.parsers.owners_v1"] = owners_v1_module
OwnersParserV1 = owners_v1_module.OwnersParserV1
owners_v2_module = _load_module("codeowners.parsers.owners_v2", PARSERS_ROOT / "owners_v2.py")
sys.modules["codeowners.parsers.owners_v2"] = owners_v2_module
OwnersParserV2 = owners_v2_module.OwnersParserV2
class _TestOwnersParserV1(OwnersParserV1):
def test_pattern(self, pattern: str) -> bool:
return True
class _TestOwnersParserV2(OwnersParserV2):
def test_pattern(self, pattern: str) -> bool:
return True
class TestCodeownersAutoApprover(unittest.TestCase):
def setUp(self) -> None:
self.contents = {
"filters": [
{
"*": None,
"approvers": ["example-user"],
}
]
}
def test_adds_auto_approver_without_opt_out(self) -> None:
with patch.dict(os.environ, {"ADD_AUTO_APPROVE_USER": "true"}, clear=False):
for parser_cls in (_TestOwnersParserV1, _TestOwnersParserV2):
with self.subTest(parser=parser_cls.__name__):
parser = parser_cls()
result = parser.parse(
"buildscripts/copybara",
"buildscripts/copybara/OWNERS.yml",
deepcopy(self.contents),
)
self.assertEqual(len(result), 1)
self.assertIn("@example-user", result[0])
self.assertIn("@svc-auto-approve-bot", result[0])
def test_skips_auto_approver_when_owners_file_opts_out(self) -> None:
contents = deepcopy(self.contents)
contents["options"] = {"no_auto_approver": True}
with patch.dict(os.environ, {"ADD_AUTO_APPROVE_USER": "true"}, clear=False):
for parser_cls in (_TestOwnersParserV1, _TestOwnersParserV2):
with self.subTest(parser=parser_cls.__name__):
parser = parser_cls()
result = parser.parse(
"buildscripts/copybara",
"buildscripts/copybara/OWNERS.yml",
deepcopy(contents),
)
self.assertEqual(len(result), 1)
self.assertIn("@example-user", result[0])
self.assertNotIn("@svc-auto-approve-bot", result[0])
if __name__ == "__main__":
unittest.main()

File diff suppressed because it is too large Load Diff

View File

@ -51,16 +51,15 @@ VERSION=8.3
### Copybara configuration ### Copybara configuration
Run the following automation and verify results: Run the following automation in the private repo and verify results:
```sh ```sh
sed -i "s/master/v$VERSION/g" copy.bara.sky sed -i "s/master/v$VERSION/g" buildscripts/copybara/copy.bara.sky buildscripts/copybara/sync_repo_with_copybara.py
sed -i 's/branch = "master"/branch = "v'"$VERSION"'"/' buildscripts/sync_repo_with_copybara.py
``` ```
For each file [`copy.bara.sky`](../../copy.bara.sky) and In the private repo, `buildscripts/copybara/copy.bara.sky` and
[`sync_repo_with_copybara.py`](../../buildscripts/sync_repo_with_copybara.py), the "master" branch `buildscripts/copybara/sync_repo_with_copybara.py` should have their `"master"` branch references
references should be replaced with the new branch name. replaced with the new branch name.
### Evergreen YAML configurations ### Evergreen YAML configurations

View File

@ -33,8 +33,11 @@ programmatically to, for example, generate a report of all the files owned by a
even though that team has nominated specific engineers as approvers. even though that team has nominated specific engineers as approvers.
`options` are not required and are various options about how to use this OWNERS.yml file. Currently `options` are not required and are various options about how to use this OWNERS.yml file. Currently
there is only a single option `no_parent_owners` which is defaulted to false. If this option is set there are two options:
to true it will stop upwards OWNERS resolution.
- `no_parent_owners`, which defaults to false. If set to true it stops upwards OWNERS resolution.
- `no_auto_approver`, which defaults to false. If set to true it prevents the generated `CODEOWNERS`
entry for this `OWNERS.yml` file from automatically including `@svc-auto-approve-bot`.
### Example file ### Example file
@ -70,6 +73,7 @@ filters: # List of all filters
- bazel-approvers - bazel-approvers
options: # All options for this file options: # All options for this file
no_parent_owners: false # See above for no_parent_owners. Defaulted to false so this line is not needed. no_parent_owners: false # See above for no_parent_owners. Defaulted to false so this line is not needed.
no_auto_approver: false # Prevents auto-adding @svc-auto-approve-bot for this OWNERS file.
``` ```
### Filter examples ### Filter examples

View File

@ -7,6 +7,7 @@ exports_files([
"tsan.suppressions", "tsan.suppressions",
"burn_in_tests.yml", "burn_in_tests.yml",
"extensions.yml", "extensions.yml",
"evergreen_yml_components/copybara/copybara_gen.yml",
"backports_required_for_multiversion_tests.yml", "backports_required_for_multiversion_tests.yml",
]) ])

View File

@ -97,6 +97,8 @@ include:
- filename: etc/evergreen_yml_components/variants/amazon/streams/streams_dev.yml - filename: etc/evergreen_yml_components/variants/amazon/streams/streams_dev.yml
- filename: src/mongo/db/modules/atlas/atlas_dev.yml - filename: src/mongo/db/modules/atlas/atlas_dev.yml
- filename: etc/evergreen_yml_components/copybara/copybara.yml
- filename: etc/evergreen_yml_components/copybara/copybara_gen.yml
- filename: monguard/.evergreen/config.yml - filename: monguard/.evergreen/config.yml