mongo/bazel/wrapper_hook/install_modules.py
Andrew Bradshaw ae4b3b02be SERVER-124992: Improve windows build times (#52301)
GitOrigin-RevId: 3e0c3ff1e2029f3ce105bedfeec9ba8fc87e1ed3
2026-04-21 21:42:10 +00:00

195 lines
6.4 KiB
Python

import hashlib
import os
import pathlib
import platform
import shutil
import subprocess
import sys
import tempfile
REPO_ROOT = pathlib.Path(__file__).parent.parent.parent
sys.path.append(str(REPO_ROOT))
from bazel.wrapper_hook.wrapper_debug import wrapper_debug
MODULES_READY_ENV = "MONGO_BAZEL_WRAPPER_MODULES_READY"
def get_deps_dirs(deps):
tmp_dir = pathlib.Path(os.environ["Temp"] if platform.system() == "Windows" else "/tmp")
bazel_bin = REPO_ROOT / "bazel-bin"
for dep in deps:
try:
for out_dir in [
REPO_ROOT / "bazel-out",
tmp_dir / "compiledb-out",
]:
for child in os.listdir(out_dir):
yield f"{out_dir}/{child}/bin/external/poetry/{dep}", dep
except OSError:
pass
yield f"{bazel_bin}/external/poetry/{dep}", dep
def add_module_to_path(poetry_dir, modules_added):
for module in poetry_dir.iterdir():
try:
entries = list(module.iterdir())
except FileNotFoundError:
# Entry may be a dangling symlink in the bazel output tree
continue
for dist_info in entries:
if str(dist_info).endswith(".dist-info"):
dirname = dist_info.parent
module = dirname.name
if module not in modules_added:
modules_added.add(module)
sys.path.append(str(dirname))
def setup_python_path():
tmp_dir = pathlib.Path(os.environ["Temp"] if platform.system() == "Windows" else "/tmp")
modules_added = set()
for out_dir in [
REPO_ROOT / "bazel-out",
tmp_dir / "compiledb-out",
]:
if out_dir.exists():
for child in out_dir.iterdir():
poetry_dir = child / "bin" / "external" / "poetry"
if poetry_dir.exists():
add_module_to_path(poetry_dir, modules_added)
poetry_dir = REPO_ROOT / "bazel-bin" / "external" / "poetry"
if poetry_dir.exists():
add_module_to_path(poetry_dir, modules_added)
def search_for_modules(deps, deps_installed, lockfile_changed=False):
deps_not_found = deps.copy()
wrapper_debug(f"deps_installed: {deps_installed}")
for target_dir, dep in get_deps_dirs(deps):
wrapper_debug(f"checking for {dep} in target_dir: {target_dir}")
if dep in deps_installed:
continue
if not pathlib.Path(target_dir).exists():
continue
if not lockfile_changed:
for entry in os.listdir(target_dir):
if entry.endswith(".dist-info"):
wrapper_debug(f"found: {target_dir}")
deps_installed.append(dep)
deps_not_found.remove(dep)
break
else:
os.chmod(target_dir, 0o777)
for root, dirs, files in os.walk(target_dir):
for somedir in dirs:
os.chmod(pathlib.Path(root) / somedir, 0o777)
for file in files:
os.chmod(pathlib.Path(root) / file, 0o777)
shutil.rmtree(target_dir)
wrapper_debug(f"deps_not_found: {deps_not_found}")
return deps_not_found
def skip_cplusplus_toolchain(args):
if any("no_c++_toolchain" in arg for arg in args):
return True
return False
def _reexec_current_python(env_var: str = MODULES_READY_ENV) -> None:
wrapper_debug("python deps changed; restarting wrapper interpreter")
env = os.environ.copy()
env[env_var] = "1"
if os.name == "nt":
# os.execve on Windows spawns a new process and immediately exits the
# current one; tools/bazel.bat then reads MONGO_BAZEL_WRAPPER_ARGS
# before the new process has written it. subprocess.run keeps the
# current process alive until the child finishes, so bazel.bat reads
# the file only after the child has written the correct args.
import subprocess
result = subprocess.run([sys.executable, *sys.argv], env=env)
sys.exit(result.returncode)
os.execve(sys.executable, [sys.executable, *sys.argv], env)
def bootstrap_modules(bazel, args):
# Nested Bazel installs can refresh the repo-rule python tree under the
# running interpreter. Re-exec so later stdlib imports come from the
# refreshed tree instead of the potentially stale one this process started
# with.
if os.environ.get(MODULES_READY_ENV) == "1":
setup_python_path()
return
if install_modules(bazel, args):
_reexec_current_python()
def install_modules(bazel, args):
need_to_install = False
pwd_hash = hashlib.md5(str(REPO_ROOT).encode()).hexdigest()
lockfile_hash_file = pathlib.Path(tempfile.gettempdir()) / f"{pwd_hash}_lockfile_hash"
with open(REPO_ROOT / "poetry.lock", "rb") as f:
current_hash = hashlib.md5(f.read()).hexdigest()
old_hash = None
if lockfile_hash_file.exists():
with open(lockfile_hash_file) as f:
old_hash = f.read()
if old_hash != current_hash:
with open(lockfile_hash_file, "w") as f:
f.write(current_hash)
deps = ["retry", "gitpython", "requests", "timeout-decorator", "boto3", "pyyaml", "pymongo"]
deps_installed = []
deps_needed = search_for_modules(
deps, deps_installed, lockfile_changed=old_hash != current_hash
)
if deps_needed:
need_to_install = True
if old_hash != current_hash:
need_to_install = True
deps_needed = deps
if need_to_install:
cmd = [
bazel,
"build",
] + ["@poetry//:library_" + dep.replace("-", "_") for dep in deps_needed]
if skip_cplusplus_toolchain(args):
cmd += ["--repo_env=no_c++_toolchain=1"]
proc = subprocess.run(
cmd
+ [
"--remote_download_all",
"--bes_backend=",
"--bes_results_url=",
"--workspace_status_command=",
]
)
if proc.returncode != 0:
print("Failed to install modules using remote exec/cache, falling back to local...")
proc = subprocess.run(
cmd
+ [
"--config=local",
]
)
deps_missing = search_for_modules(deps_needed, deps_installed)
if deps_missing:
raise Exception(f"Failed to install python deps {deps_missing}")
setup_python_path()
return need_to_install