SERVER-124285: Improve SBOM generation automation and change sbom_liny/py ownership (#51987)

Co-authored-by: mongo-pr-bot[bot] <230616009+mongo-pr-bot[bot]@users.noreply.github.com>
GitOrigin-RevId: 22814ad6a5230346b4f8b394163e3975d019bb5a
This commit is contained in:
Jason Hills 2026-04-22 17:50:37 -04:00 committed by MongoDB Bot
parent 9d9dda0499
commit 16ee6258a5
6 changed files with 321 additions and 46 deletions

View File

@ -62,3 +62,6 @@ filters:
- "bazel_burn_in.py":
approvers:
- 10gen/devprod-correctness
- "sbom_linter.py":
approvers:
- 10gen/code-review-team-ssdlc

View File

@ -819,7 +819,7 @@ def main() -> None:
)
)
logger.warning(
"VERSION MISMATCH: %s: Endor version %s; Import script version %s. 'priority_version_source' from metadata: %s",
"VERSION MISMATCH: %s: Endor %s; Import script %s. 'priority_version_source': %s",
component_key,
versions["endor"],
versions["import_script"],
@ -878,13 +878,12 @@ def main() -> None:
try:
jira_teams = owners.get_jira_team_from_codeowner(codeowner)
except KeyError:
logger.warning(
logger.debug(
"CODEOWNER: %s could not determine JIRA teams for codeowner %s. Mapping may be missing from buildscripts/util/co_jira_map.yml",
component_key,
codeowner,
)
jira_teams = [codeowner]
continue
for jira_team in jira_teams:
add_component_property(
component, "internal:team_responsible", jira_team
@ -916,7 +915,7 @@ def main() -> None:
location,
)
else:
logger.warning(
logger.debug(
"THIRD_PARTY FOLDER: %s lists a location as '%s'. Ideally, all third-party components are located under 'src/third_party/'.",
component_key,
location,
@ -961,12 +960,31 @@ def main() -> None:
# region Parse unmatched Endor Labs components
print_banner("New Endor Labs components")
# Build a set of stripped bom-refs from .metadata.component.components[] so that
# components Endor lists as first-party sub-packages are not also added as unmatched
# as-is third-party components, which would create duplicates in the final SBOM.
endor_metadata_sub_component_refs = {
c["bom-ref"].split("@")[0]
for c in endor_bom["metadata"]["component"].get("components", [])
if "bom-ref" in c
}
if endor_components:
logger.info(
"ENDOR SBOM: There are %d unmatched components in the Endor Labs SBOM. Adding as-is. The applicable metadata should be added to the metadata SBOM for the next run.",
len(endor_components),
)
for component in endor_components:
# Skip components that Endor also lists as first-party sub-packages in
# .metadata.component.components[]; adding them here would create duplicates.
if component in endor_metadata_sub_component_refs:
logger.info(
"ENDOR SBOM: Skipping unmatched component '%s' — already listed in .metadata.component.components[]",
component,
)
continue
# set scope to excluded by default until the component is evaluated
endor_components[component]["scope"] = "excluded"
@ -980,18 +998,54 @@ def main() -> None:
add_component_property(endor_components[component], "internal:as-is_component", "true")
meta_bom["components"].append(endor_components[component])
meta_bom["dependencies"].extend(
[
d
for d in endor_bom["dependencies"]
if d.get("ref") == endor_components[component]["bom-ref"]
]
)
if component.startswith(("pkg:github/", "pkg:generic/")):
logger.warning("SBOM AS-IS COMPONENT: Added %s", component)
# endregion Parse unmatched Endor Labs components
# region Merge Endor Labs dependency data
print_banner("Merging Endor Labs dependency data")
# Build a lookup of current meta_bom dependency entries by ref.
# These may originate from metadata.cdx.json or from runtime add_component_dependsOn calls.
meta_deps_by_ref = {d["ref"]: d for d in meta_bom["dependencies"]}
# Only add/update dependency entries whose ref is actually present in the final SBOM
final_component_refs = {meta_bom["metadata"]["component"]["bom-ref"]} | {
c["bom-ref"] for c in meta_bom["components"]
}
merged_new = 0
merged_collisions = 0
for endor_dep in endor_bom["dependencies"]:
ref = endor_dep.get("ref")
if not ref or ref not in final_component_refs:
continue
if ref in meta_deps_by_ref:
existing = meta_deps_by_ref[ref]
if set(existing.get("dependsOn", [])) != set(endor_dep.get("dependsOn", [])):
logger.warning(
"DEPENDENCIES: Collision on ref '%s': metadata dependsOn %s; Endor Labs dependsOn %s. Using Endor Labs data.",
ref,
existing.get("dependsOn", []),
endor_dep.get("dependsOn", []),
)
existing["dependsOn"] = endor_dep["dependsOn"]
merged_collisions += 1
else:
meta_bom["dependencies"].append(endor_dep)
meta_deps_by_ref[ref] = endor_dep
merged_new += 1
logger.info(
"DEPENDENCIES: Added %d new dependency entries from Endor Labs; %d collision(s) resolved in favor of Endor Labs data.",
merged_new,
merged_collisions,
)
# endregion Merge Endor Labs dependency data
# region Finalize SBOM
# Have the SBOM app version changed?
@ -1078,7 +1132,49 @@ def main() -> None:
write_sbom_json_file(meta_bom, sbom_out_internal_path)
# Load the previous public SBOM to track its serialNumber/version independently
if os.path.exists(sbom_out_public_path):
prev_public_bom = read_sbom_json_file(sbom_out_public_path)
else:
prev_public_bom = {
"serialNumber": None,
"version": 0,
"metadata": {"timestamp": meta_bom["metadata"]["timestamp"]},
"components": [],
"dependencies": [],
}
convert_sbom_to_public(meta_bom)
# Determine if the public SBOM's components changed vs. the previous public SBOM
prev_public_components = sbom_components_to_dict(prev_public_bom, with_version=True)
new_public_components = sbom_components_to_dict(meta_bom, with_version=True)
public_components_changed = prev_public_components.keys() != new_public_components.keys()
logger.info(
"SBOM_DIFF: Public SBOM components changed: %s. Previous public SBOM has %d components; New public SBOM has %d components",
public_components_changed,
len(prev_public_components),
len(new_public_components),
)
# serialNumber and version for the public SBOM are tracked independently from the private SBOM
if sbom_app_version_changed or not prev_public_bom["serialNumber"]:
meta_bom["serialNumber"] = uuid.uuid4().urn
meta_bom["version"] = 1
else:
meta_bom["serialNumber"] = prev_public_bom["serialNumber"]
meta_bom["version"] = prev_public_bom["version"]
if public_components_changed:
meta_bom["version"] += 1
# Timestamp for the public SBOM is also tracked independently
if sbom_app_version_changed or public_components_changed:
meta_bom["metadata"]["timestamp"] = (
datetime.now(timezone.utc).isoformat(timespec="seconds").replace("+00:00", "Z")
)
else:
meta_bom["metadata"]["timestamp"] = prev_public_bom["metadata"]["timestamp"]
write_sbom_json_file(meta_bom, sbom_out_public_path)
# Access the collected warnings

View File

@ -61,6 +61,135 @@
}
},
"components": [
{
"type": "library",
"bom-ref": "pkg:cargo/ftdc-io@{{VERSION}}",
"supplier": {
"name": "MongoDB, Inc.",
"url": [
"https://mongodb.com"
]
},
"group": "10gen",
"name": "ftdc-io (Full-Time Diagnostics Capture)",
"version": "{{VERSION}}",
"description": "FTDC (Full-Time Diagnostics Capture) IO",
"scope": "excluded",
"licenses": [
{
"license": {
"name": "Internal Use Only"
}
}
],
"copyright": "MongoDB, Inc.",
"purl": "pkg:cargo/ftdc-io@{{VERSION}}",
"externalReferences": [
{
"url": "https://github.com/10gen/mongo/tree/master/monguard/src/ftdc-io",
"type": "vcs"
}
],
"evidence": {
"occurrences": [
{
"location": "monguard/src/ftdc-io"
}
]
},
"properties": [
{
"name": "internal:private",
"value": "true"
}
]
},
{
"type": "library",
"bom-ref": "pkg:cargo/monguard@{{VERSION}}",
"supplier": {
"name": "MongoDB, Inc.",
"url": [
"https://mongodb.com"
]
},
"group": "10gen",
"name": "monguard",
"version": "{{VERSION}}",
"description": "A pingora-based proxy for MongoDB, focusing on hardening against malicious pre-auth external clients.",
"scope": "excluded",
"licenses": [
{
"license": {
"name": "Internal Use Only"
}
}
],
"copyright": "MongoDB, Inc.",
"purl": "pkg:cargo/monguard@{{VERSION}}",
"externalReferences": [
{
"url": "https://github.com/10gen/mongo/tree/master/monguard",
"type": "vcs"
}
],
"evidence": {
"occurrences": [
{
"location": "monguard/"
}
]
},
"properties": [
{
"name": "internal:private",
"value": "true"
}
]
},
{
"type": "library",
"bom-ref": "pkg:cargo/tracing-logv2@{{VERSION}}",
"supplier": {
"name": "MongoDB, Inc.",
"url": [
"https://mongodb.com"
]
},
"group": "10gen",
"name": "tracing-logv2",
"version": "{{VERSION}}",
"description": "logV2 structured logging for monguard",
"scope": "excluded",
"licenses": [
{
"license": {
"name": "Internal Use Only"
}
}
],
"copyright": "MongoDB, Inc.",
"purl": "pkg:cargo/tracing-logv2@{{VERSION}}",
"externalReferences": [
{
"url": "https://github.com/10gen/mongo/tree/master/monguard/src/tracing-logv2",
"type": "vcs"
}
],
"evidence": {
"occurrences": [
{
"location": "monguard/src/tracing-logv2"
}
]
},
"properties": [
{
"name": "internal:private",
"value": "true"
}
]
},
{
"type": "library",
"bom-ref": "pkg:deb/debian/firefox-esr@{{VERSION}}-1?arch=source",
@ -3274,6 +3403,49 @@
}
]
},
{
"type": "library",
"bom-ref": "pkg:maven/com.mongodb.streams/aspio@{{VERSION}}",
"supplier": {
"name": "MongoDB, Inc.",
"url": [
"https://mongodb.com"
]
},
"group": "com.mongodb.streams",
"name": "aspio (Mongostream External IO)",
"version": "{{VERSION}}",
"description": "The External IO module allows Mongostream to interact with external data sources and sinks through a standardized gRPC interface. It provides bidirectional streaming capabilities to transfer data between Mongostream and external systems.",
"scope": "excluded",
"licenses": [
{
"license": {
"name": "Internal Use Only"
}
}
],
"copyright": "MongoDB, Inc.",
"purl": "pkg:maven/com.mongodb.streams/aspio@{{VERSION}}",
"externalReferences": [
{
"url": "https://github.com/10gen/mongo/tree/master/src/mongo/db/modules/enterprise/src/streams/aspio",
"type": "vcs"
}
],
"evidence": {
"occurrences": [
{
"location": "src/mongo/db/modules/enterprise/src/streams/aspio"
}
]
},
"properties": [
{
"name": "internal:private",
"value": "true"
}
]
},
{
"type": "library",
"bom-ref": "pkg:pypi/ocspbuilder@0.10.2",

View File

@ -51,6 +51,18 @@ REGEX_PURL = {
r"[a-z0-9_-]+" # Name, letters must be lowercase, dashes, underscore
+ REGEX_STR_PURL_OPTIONAL
),
# Cargo PURL. https://github.com/package-url/purl-spec/blob/main/types-doc/cargo-definition.md
"cargo": re.compile(
r"^pkg:cargo/" # Scheme and type
r"[a-zA-Z0-9_-]+" + REGEX_STR_PURL_OPTIONAL # Name (no namespace)
),
# Maven PURL. https://github.com/package-url/purl-spec/blob/main/types-doc/maven-definition.md
"maven": re.compile(
r"^pkg:maven/" # Scheme and type
r"[a-zA-Z0-9._-]+" # Namespace (group ID, required)
r"/"
r"[a-zA-Z0-9._-]+" + REGEX_STR_PURL_OPTIONAL # Name (artifact ID)
),
}
# Metadata SBOM requirements
@ -114,7 +126,8 @@ def convert_sbom_to_public(sbom_dict: dict):
for occurence in c.get("evidence", {}).get("occurrences", [])
)
or any(
property.get("name", "") == "internal:as-is_component"
property.get("name", "") in ["internal:as-is_component", "internal:private"]
and property.get("value") == "true"
for property in c.get("properties", [])
)
]
@ -134,6 +147,20 @@ def convert_sbom_to_public(sbom_dict: dict):
"PUBLIC SBOM: Removed %d internal components",
original_components_len - len(sbom_dict["components"]),
)
# Remove orphaned dependency entries — refs that are not present in the final component set.
# This covers entries added from Endor Labs that reference components which were filtered out
# at earlier stages (e.g. sub-packages, removed components) and are not internal-flagged.
valid_refs = {sbom_dict["metadata"]["component"]["bom-ref"]} | {
c["bom-ref"] for c in sbom_dict["components"]
}
original_deps_len = len(sbom_dict["dependencies"])
sbom_dict["dependencies"] = [d for d in sbom_dict["dependencies"] if d["ref"] in valid_refs]
for dependency in sbom_dict["dependencies"]:
dependency["dependsOn"] = [d for d in dependency["dependsOn"] if d in valid_refs]
removed_deps = original_deps_len - len(sbom_dict["dependencies"])
if removed_deps:
logger.info("PUBLIC SBOM: Removed %d orphaned dependency entries", removed_deps)
# Remove internal properties from public components
original_properties_len = sum(len(c.get("properties", [])) for c in sbom_dict["components"])
for component in sbom_dict["components"]:
@ -281,7 +308,7 @@ def write_sbom_json_file(sbom_dict: dict, file_path: str) -> None:
try:
file_path = os.path.abspath(file_path)
with open(file_path, "w", encoding="utf-8") as output_json:
formatted_sbom = json.dumps(sbom_dict, indent=2) + "\n"
formatted_sbom = json.dumps(sbom_dict, indent=2)
output_json.write(formatted_sbom)
except OSError as e:
logger.error("Error writing SBOM file to %s", file_path)

View File

@ -34,7 +34,6 @@ MISSING_VERSION_IN_SBOM_COMPONENT_ERROR = "Component must include a version."
MISSING_VERSION_IN_IMPORT_FILE_ERROR = "Missing version in the import file: "
MISSING_LICENSE_IN_SBOM_COMPONENT_ERROR = "Component must include a license."
COULD_NOT_FIND_OR_READ_SCRIPT_FILE_ERROR = "Could not find or read the import script file"
VERSION_MISMATCH_ERROR = "Version mismatch (may simply be an artifact of SBOM automation): "
# A class for managing error messages for components
@ -110,12 +109,6 @@ def get_script_version(
return result
# A version string sometimes contains an extra prefix like "v1.2" instead of "1.2"
# This function strips that extra prefix.
def strip_extra_prefixes(string_with_prefix: str) -> str:
return string_with_prefix.removeprefix("mongo/").removeprefix("v")
def validate_license(component: dict, error_manager: ErrorManager) -> None:
if "licenses" not in component:
error_manager.append_full_error_message(MISSING_LICENSE_IN_SBOM_COMPONENT_ERROR)
@ -179,16 +172,6 @@ def validate_properties(component: dict, error_manager: ErrorManager) -> None:
if comp_version == "Unknown" or script_path == "":
return
# Include the .pedigree.descendants[0] version for version matching
if (
"pedigree" in component
and "descendants" in component["pedigree"]
and "version" in component["pedigree"]["descendants"][0]
):
comp_pedigree_version = component["pedigree"]["descendants"][0]["version"]
else:
comp_pedigree_version = ""
# At this point a version is attempted to be read from the import script file
script_version_key = "VERSION"
if "properties" in component:
@ -199,12 +182,6 @@ def validate_properties(component: dict, error_manager: ErrorManager) -> None:
script_version = get_script_version(script_path, script_version_key, error_manager)
if script_version == "":
error_manager.append_full_error_message(MISSING_VERSION_IN_IMPORT_FILE_ERROR + script_path)
elif strip_extra_prefixes(script_version) != strip_extra_prefixes(
comp_version
) and strip_extra_prefixes(script_version) != strip_extra_prefixes(comp_pedigree_version):
print(
f"WARNING: {VERSION_MISMATCH_ERROR}\n script version:{script_version}\n sbom component version:{comp_version}\n sbom component pedigree version:{comp_pedigree_version}"
)
def validate_component(component: dict, third_party_libs: set, error_manager: ErrorManager) -> None:
@ -276,7 +253,7 @@ def lint_sbom(
error_manager.append(f" {lib}")
formatted_sbom = json.dumps(sbom, indent=2) + "\n"
if formatted_sbom != sbom_text:
if sbom_text not in (formatted_sbom, formatted_sbom.rstrip("\n")):
error_manager.append(f"{input_file} {FORMATTING_ERROR}")
if should_format:

View File

@ -3,9 +3,9 @@
"bomFormat": "CycloneDX",
"specVersion": "1.5",
"serialNumber": "urn:uuid:2d7fbf85-c8b6-4f90-9966-70da88224a36",
"version": 5,
"version": 3,
"metadata": {
"timestamp": "2026-04-20T06:11:35Z",
"timestamp": "2026-04-13T06:06:17Z",
"lifecycles": [
{
"phase": "pre-build"
@ -63,7 +63,7 @@
"services": [
{
"name": "Endor Labs Inc",
"version": "v1.7.932"
"version": "v1.7.924"
}
]
}
@ -1760,7 +1760,7 @@
},
{
"type": "library",
"bom-ref": "pkg:github/mongodb/libmongocrypt@1.16.0",
"bom-ref": "pkg:github/mongodb/libmongocrypt@1.15.0",
"supplier": {
"name": "MongoDB, Inc.",
"url": [
@ -1770,7 +1770,7 @@
"author": "MongoDB, Inc.",
"group": "mongodb",
"name": "libmongocrypt",
"version": "1.16.0",
"version": "1.15.0",
"description": "Required C library for Client Side and Queryable Encryption in MongoDB",
"scope": "required",
"licenses": [
@ -1781,8 +1781,8 @@
}
],
"copyright": "Copyright 2019-present MongoDB, Inc.",
"cpe": "cpe:2.3:a:mongodb:libmongocrypt:1.16.0:*:*:*:*:*:*:*",
"purl": "pkg:github/mongodb/libmongocrypt@1.16.0",
"cpe": "cpe:2.3:a:mongodb:libmongocrypt:1.15.0:*:*:*:*:*:*:*",
"purl": "pkg:github/mongodb/libmongocrypt@1.15.0",
"externalReferences": [
{
"url": "https://github.com/mongodb/libmongocrypt.git",
@ -2707,7 +2707,7 @@
"dependsOn": []
},
{
"ref": "pkg:github/mongodb/libmongocrypt@1.16.0",
"ref": "pkg:github/mongodb/libmongocrypt@1.15.0",
"dependsOn": []
},
{
@ -2752,7 +2752,7 @@
"pkg:github/libtom/libtomcrypt@v1.18.2",
"pkg:github/libunwind/libunwind@v1.8.1",
"pkg:github/madler/zlib@1.3.2",
"pkg:github/mongodb/libmongocrypt@1.16.0",
"pkg:github/mongodb/libmongocrypt@1.15.0",
"pkg:github/nlohmann/json@v3.11.3",
"pkg:github/nodejs/node@22.1.0?download_url=https%3A%2F%2Fgithub.com%2Fnodejs%2Fnode%2Fblob%2F8b45c5d26a829bcd3280401dbc1874bcd1302289%2Fsrc%2Fnode_i18n.cc%23L825%23src%2Fnode_i18n.cc%3AGetStringWidth#src/node_i18n.cc",
"pkg:github/open-telemetry/opentelemetry-cpp@v1.24.0",