SERVER-79790 Add option to generate bazel conversion targets to libdeps

This commit is contained in:
Zack Winter 2023-09-13 20:06:53 +00:00 committed by Evergreen Agent
parent b9235dbace
commit 7cccfc0caf
8 changed files with 96 additions and 2 deletions

5
.bazelignore Normal file
View File

@ -0,0 +1,5 @@
# TODO(SERVER-81039): Remove once these can be compiled from the root directory.
src/third_party/grpc
src/third_party/abseil-cpp
src/third_party/protobuf
src/third_party/re2

View File

@ -14,6 +14,7 @@ RUN git clone -b 2.3.1 https://github.com/bazelbuild/bazel-buildfarm.git
# Switch into the cloned Buildfarm repository
WORKDIR /bazel-buildfarm
# TODO(SERVER-81038): remove once bazel/bazelisk is added to the toolchain.
# Obtain Bazelisk and make it executable
RUN wget https://github.com/bazelbuild/bazelisk/releases/download/v1.17.0/bazelisk-linux-arm64 -O bazelisk && chmod +x bazelisk

View File

@ -6,7 +6,7 @@ The Libdeps Graph analysis tools perform analysis and queries on graph represent
The scons build can create the graph files for analysis. To build the graphml file run the build with this minimal set of args required:
python3 buildscripts/scons.py --link-model=dynamic --build-tools=next generate-libdeps-graph
python3 buildscripts/scons.py --link-model=dynamic --build-tools=next generate-libdeps-graph --linker=gold
The target `generate-libdeps-graph` has special meaning and will turn on extra build items to generate the graph. This target will build everything so that the graph is fully representative of the build. The graph file by default will be found at `build/opt/libdeps/libdeps.graphml` (where `build/opt` is the `$BUILD_DIR`).
@ -18,7 +18,7 @@ The Command Line tool will process a single graph file based off a list of input
By default it will performs some basic operations and print the output in human readable format:
python3.8 buildscripts/libdeps/gacli.py --graph-file build/opt/libdeps/libdeps.graphml
python3 buildscripts/libdeps/gacli.py --graph-file build/opt/libdeps/libdeps.graphml
Which will give an output similar to this:

View File

@ -177,6 +177,12 @@ def setup_args_parser():
"Find candidate nodes for merging by searching the graph for nodes with only one node which depends on them."
)
parser.add_argument(
'--bazel-conv-candidates', action='store_true', default=False, help=
"Find candidate nodes ready for bazel conversion. This effectively means the node is currently not being built "
"with bazel and the node does not have any dependency nodes that are not being built in bazel."
)
args = parser.parse_args()
for arg_list in args.graph_paths:
@ -281,6 +287,9 @@ def main():
if args.indegree_one:
analysis.append(libdeps_analyzer.InDegreeOne(libdeps_graph))
if args.bazel_conv_candidates:
analysis.append(libdeps_analyzer.BazelConversionCandidates(libdeps_graph))
analysis += libdeps_analyzer.linter_factory(libdeps_graph, args.lint)
if args.build_data:

View File

@ -36,6 +36,7 @@ import json
import inspect
import functools
from pathlib import Path
import subprocess
import networkx
import cxxfilt
@ -498,6 +499,75 @@ class InDegreeOne(Analyzer):
report[DependsReportTypes.IN_DEGREE_ONE.name] = self.run()
class BazelConversionCandidates(Analyzer):
"""
Finds nodes ready for bazel conversion.
This effectively means that they are not currently being built with bazel and
do not have any dependencies that are not being built with bazel.
Such nodes are ready to be built as bazel targets.
"""
def get_bazel_converted_scons_targets(self):
# Extract a list of all bazel targets from the root of the tree.
# Note: //... is the bazel catch-all for referencing all targets in that directory. For
# example, //src/... will expand to include all targets under //src/.
# TODO(SERVER-81038): remove /tmp/ prefix once bazel/bazelisk is added to the toolchain.
proc = subprocess.run(["/tmp/bazelisk", "query", "//..."], capture_output=True, text=True,
check=True)
# "bazel query" outputs how many packages were loaded in addition to the targets.
# Ignore lines not starting with // to skip over that line.
targets = [
self.simplify_bazel_target(line) for line in proc.stdout.split("\n")
if line.startswith("//")
]
return targets
def simplify_bazel_target(self, bazel_target: str):
# Remove leading // and "src" to make comparison with scons targets simpler.
bazel_target = bazel_target.lstrip("/")
if bazel_target.startswith("src/"):
bazel_target = bazel_target[4:]
return bazel_target
def scons_target_to_bazel(self, scons_target: str):
# Remove library extensions, "lib" prefix, and replace final / with : to make it possible
# to compare scons target strings with bazel target strings.
if scons_target.endswith(".so") or scons_target.endswith(".a"):
scons_target = scons_target.rsplit(".", 1)[0]
scons_target = ":".join(scons_target.rsplit("/lib", 1))
else:
scons_target = ":".join(scons_target.rsplit("/", 1))
return scons_target
@schema_check(schema_version=1)
def run(self):
"""Finds bazel conversion candidate nodes."""
# Exclude counting dependencies that already have bazel targets.
bazelfied_scons_targets = set(self.get_bazel_converted_scons_targets())
candidate_nodes = []
for node, _ in self._dependency_graph.nodes(data=True):
if self.scons_target_to_bazel(node) not in bazelfied_scons_targets:
non_bazelfied_deps = list(
filter(
lambda dep: self.scons_target_to_bazel(dep) not in bazelfied_scons_targets,
self._dependency_graph[node]))
if len(non_bazelfied_deps) == 0:
candidate_nodes.append(node)
return sorted(candidate_nodes)
def report(self, report):
"""Adds scons target list to the report."""
report[DependsReportTypes.BAZEL_CONV_CANDIDATES.name] = self.run()
class GraphPaths(Analyzer):
"""Finds all paths between two nodes in the graph."""
@ -967,6 +1037,11 @@ class GaPrettyPrinter(GaPrinter):
print('[ ' + str(round(data[to_node]['efficiency'] * 100, 1)) + '% ] ' +
from_node + ' -> ' + to_node)
if DependsReportTypes.BAZEL_CONV_CANDIDATES.name in results:
print("\nNon-bazelfied nodes with no non-bazelfied dependencies:")
for node in results[DependsReportTypes.BAZEL_CONV_CANDIDATES.name]:
print(f"\t{node}")
if LinterTypes.EFFICIENCY_LINT.name in results:
data = results[LinterTypes.EFFICIENCY_LINT.name]
print("\nLibdepsLinter: Efficiency of Direct Public Edges")

View File

@ -69,6 +69,7 @@ class DependsReportTypes(Enum):
IN_DEGREE_ONE = auto()
SYMBOL_DEPENDS = auto()
EFFICIENCY = auto()
BAZEL_CONV_CANDIDATES = auto()
class LinterTypes(Enum):

View File

@ -3,4 +3,5 @@ cd src
set -o errexit
set -o verbose
# TODO(SERVER-81038): remove once bazel/bazelisk is added to the toolchain.
curl -L https://github.com/bazelbuild/bazelisk/releases/download/v1.17.0/bazelisk-linux-arm64 --output ./bazelisk && chmod +x ./bazelisk

View File

@ -4,6 +4,7 @@ import SCons
import stat
import urllib.request
# TODO(SERVER-81038): remove /tmp/ bazel/bazelisk is added to the toolchain.
# Note: The /tmp location is, ironically, temporary. We expect to implement Bazilisk-installation
# as a standard part of the Bazel solution soon.
BAZELISK_PATH = "/tmp/bazelisk"
@ -69,6 +70,7 @@ def generate(env):
raise Exception(
f'Bazel not supported on this architecture ({current_architecture}); supported architectures are: [{supported_architectures}]'
)
# TODO(SERVER-81038): remove once bazel/bazelisk is added to the toolchain.
if not os.path.exists("bazelisk"):
urllib.request.urlretrieve(
"https://github.com/bazelbuild/bazelisk/releases/download/v1.17.0/bazelisk-linux-arm64",