Co-authored-by: mongo-pr-bot[bot] <230616009+mongo-pr-bot[bot]@users.noreply.github.com> Co-authored-by: Jason Hills <jason.hills@mongodb.com> GitOrigin-RevId: b8ba75da724800391249ed4266928c96bd537875
374 lines
14 KiB
Python
374 lines
14 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Utility functions for processing CycloneDX SBOMs
|
|
|
|
"""
|
|
|
|
import json
|
|
import logging
|
|
import os
|
|
import re
|
|
import urllib.parse
|
|
|
|
logger = logging.getLogger("generate_sbom")
|
|
logger.setLevel(logging.NOTSET)
|
|
|
|
# ################ PURL Validation ################
|
|
REGEX_STR_PURL_OPTIONAL = ( # Optional Version (any chars except ? @ #)
|
|
r"(?:@[^?@#]*)?"
|
|
# Optional Qualifiers (any chars except @ #)
|
|
r"(?:\?[^@#]*)?"
|
|
# Optional Subpath (any chars)
|
|
r"(?:#.*)?$"
|
|
)
|
|
|
|
REGEX_PURL = {
|
|
# deb PURL. https://github.com/package-url/purl-spec/blob/main/types-doc/deb-definition.md
|
|
"deb": re.compile(
|
|
r"^pkg:deb/" # Scheme and type
|
|
# Namespace (organization/user), letters must be lowercase
|
|
r"(debian|ubuntu)+"
|
|
r"/"
|
|
r"[a-z0-9._-]+" + REGEX_STR_PURL_OPTIONAL # Name
|
|
),
|
|
# Generic PURL. https://github.com/package-url/purl-spec/blob/main/types-doc/generic-definition.md
|
|
"generic": re.compile(
|
|
r"^pkg:generic/" # Scheme and type
|
|
r"([a-zA-Z0-9._-]+/)?" # Optional namespace segment
|
|
r"[a-zA-Z0-9._-]+" + REGEX_STR_PURL_OPTIONAL # Name (required)
|
|
),
|
|
# GitHub PURL. https://github.com/package-url/purl-spec/blob/main/types-doc/github-definition.md
|
|
"github": re.compile(
|
|
r"^pkg:github/" # Scheme and type
|
|
# Namespace (organization/user), letters must be lowercase
|
|
r"[a-z0-9-]+"
|
|
r"/"
|
|
r"[a-z0-9._-]+" + REGEX_STR_PURL_OPTIONAL # Name (repository)
|
|
),
|
|
# PyPI PURL. https://github.com/package-url/purl-spec/blob/main/types-doc/pypi-definition.md
|
|
"pypi": re.compile(
|
|
r"^pkg:pypi/" # Scheme and type
|
|
r"[a-z0-9_-]+" # Name, letters must be lowercase, dashes, underscore
|
|
+ REGEX_STR_PURL_OPTIONAL
|
|
),
|
|
# Cargo PURL. https://github.com/package-url/purl-spec/blob/main/types-doc/cargo-definition.md
|
|
"cargo": re.compile(
|
|
r"^pkg:cargo/" # Scheme and type
|
|
r"[a-zA-Z0-9_-]+" + REGEX_STR_PURL_OPTIONAL # Name (no namespace)
|
|
),
|
|
# Maven PURL. https://github.com/package-url/purl-spec/blob/main/types-doc/maven-definition.md
|
|
"maven": re.compile(
|
|
r"^pkg:maven/" # Scheme and type
|
|
r"[a-zA-Z0-9._-]+" # Namespace (group ID, required)
|
|
r"/"
|
|
r"[a-zA-Z0-9._-]+" + REGEX_STR_PURL_OPTIONAL # Name (artifact ID)
|
|
),
|
|
}
|
|
|
|
# Metadata SBOM requirements
|
|
METADATA_FIELDS_REQUIRED = [
|
|
"type",
|
|
"bom-ref",
|
|
"group",
|
|
"name",
|
|
"version",
|
|
"description",
|
|
"licenses",
|
|
"copyright",
|
|
"externalReferences",
|
|
"scope",
|
|
]
|
|
METADATA_FIELDS_ONE_OF = [
|
|
["author", "supplier"],
|
|
["purl", "cpe"],
|
|
]
|
|
|
|
|
|
def add_component_property(component: dict, name: str, value: str) -> None:
|
|
"""Add a key/value to to 'properties' in SBOM component"""
|
|
if "properties" not in component:
|
|
component["properties"] = []
|
|
component["properties"].append({"name": name, "value": value})
|
|
|
|
|
|
def check_metadata_sbom(meta_bom: dict) -> None:
|
|
"""Run checks on SBOM component metadata for expected fields."""
|
|
for component in meta_bom["components"]:
|
|
for field in METADATA_FIELDS_REQUIRED:
|
|
if field not in component:
|
|
logger.warning(
|
|
"METADATA: %s is missing required field '%s'.",
|
|
(component.get("bom-ref") or component.get("name")),
|
|
field,
|
|
)
|
|
for fields in METADATA_FIELDS_ONE_OF:
|
|
found = False
|
|
for field in fields:
|
|
found = found or field in component
|
|
if not found:
|
|
logger.warning(
|
|
"METADATA: %s is missing one of fields '%s'.",
|
|
(component.get("bom-ref") or component.get("name")),
|
|
fields,
|
|
)
|
|
|
|
|
|
def check_components_and_dependencies(sbom: dict, label: str = "") -> None:
|
|
"""Warn if .components[].bom-ref and .dependencies[].ref are not in one-to-one correspondence."""
|
|
prefix = f"{label}: " if label else ""
|
|
component_refs = {c["bom-ref"] for c in sbom.get("components", [])}
|
|
if primary_ref := sbom.get("metadata", {}).get("component", {}).get("bom-ref"):
|
|
component_refs.add(primary_ref)
|
|
dependency_refs = {d["ref"] for d in sbom.get("dependencies", [])}
|
|
|
|
in_components_not_deps = component_refs - dependency_refs
|
|
in_deps_not_components = dependency_refs - component_refs
|
|
|
|
if in_components_not_deps:
|
|
logger.warning(
|
|
"%sCOMPONENTS/DEPENDENCIES MISMATCH: components with no matching dependency ref: %s",
|
|
prefix,
|
|
sorted(in_components_not_deps),
|
|
)
|
|
if in_deps_not_components:
|
|
logger.warning(
|
|
"%sCOMPONENTS/DEPENDENCIES MISMATCH: dependency refs with no matching component: %s",
|
|
prefix,
|
|
sorted(in_deps_not_components),
|
|
)
|
|
|
|
|
|
def reconcile_dependency_refs(sbom: dict) -> None:
|
|
"""Add stub dependency entries for missing component refs; remove and warn about orphaned refs."""
|
|
component_refs = {c["bom-ref"] for c in sbom.get("components", [])}
|
|
meta_component = sbom.get("metadata", {}).get("component", {})
|
|
if primary_ref := meta_component.get("bom-ref"):
|
|
component_refs.add(primary_ref)
|
|
for sub in meta_component.get("components", []):
|
|
if sub_ref := sub.get("bom-ref"):
|
|
component_refs.add(sub_ref)
|
|
dependency_refs = {d["ref"] for d in sbom.get("dependencies", [])}
|
|
|
|
missing = component_refs - dependency_refs
|
|
if missing:
|
|
if "dependencies" not in sbom:
|
|
sbom["dependencies"] = []
|
|
for ref in sorted(missing):
|
|
sbom["dependencies"].append({"ref": ref, "dependsOn": []})
|
|
logger.debug("reconcile_dependency_refs: added missing dependency ref '%s'", ref)
|
|
|
|
orphaned = dependency_refs - component_refs
|
|
if orphaned:
|
|
logger.warning(
|
|
"COMPONENTS/DEPENDENCIES MISMATCH: removing orphaned dependency refs with no matching component: %s",
|
|
sorted(orphaned),
|
|
)
|
|
sbom["dependencies"] = [d for d in sbom["dependencies"] if d["ref"] not in orphaned]
|
|
|
|
|
|
def convert_sbom_to_public(sbom_dict: dict):
|
|
"""Remove internal-only properties and components from SBOM"""
|
|
|
|
original_components_len = len(sbom_dict["components"])
|
|
# Identify internal components based on evidence occurrence in internal folders
|
|
internal_components = [
|
|
c["bom-ref"]
|
|
for c in sbom_dict["components"]
|
|
if any(
|
|
occurence.get("location", "").startswith("src/third_party/private")
|
|
for occurence in c.get("evidence", {}).get("occurrences", [])
|
|
)
|
|
or any(
|
|
property.get("name", "") in ["internal:as-is_component", "internal:private"]
|
|
and property.get("value") == "true"
|
|
for property in c.get("properties", [])
|
|
)
|
|
]
|
|
|
|
# Remove internal components and any dependencies on them from the SBOM
|
|
sbom_dict["components"] = [
|
|
c for c in sbom_dict["components"] if c["bom-ref"] not in internal_components
|
|
]
|
|
sbom_dict["dependencies"] = [
|
|
d for d in sbom_dict["dependencies"] if d["ref"] not in internal_components
|
|
]
|
|
for dependency in sbom_dict["dependencies"]:
|
|
dependency["dependsOn"] = [
|
|
d for d in dependency["dependsOn"] if d not in internal_components
|
|
]
|
|
logger.info(
|
|
"PUBLIC SBOM: Removed %d internal components",
|
|
original_components_len - len(sbom_dict["components"]),
|
|
)
|
|
|
|
# Remove orphaned dependency entries — refs that are not present in the final component set.
|
|
# This covers entries added from Endor Labs that reference components which were filtered out
|
|
# at earlier stages (e.g. sub-packages, removed components) and are not internal-flagged.
|
|
valid_refs = {sbom_dict["metadata"]["component"]["bom-ref"]} | {
|
|
c["bom-ref"] for c in sbom_dict["components"]
|
|
}
|
|
original_deps_len = len(sbom_dict["dependencies"])
|
|
sbom_dict["dependencies"] = [d for d in sbom_dict["dependencies"] if d["ref"] in valid_refs]
|
|
for dependency in sbom_dict["dependencies"]:
|
|
dependency["dependsOn"] = [d for d in dependency["dependsOn"] if d in valid_refs]
|
|
removed_deps = original_deps_len - len(sbom_dict["dependencies"])
|
|
if removed_deps:
|
|
logger.info("PUBLIC SBOM: Removed %d orphaned dependency entries", removed_deps)
|
|
# Remove internal properties from public components
|
|
original_properties_len = sum(len(c.get("properties", [])) for c in sbom_dict["components"])
|
|
for component in sbom_dict["components"]:
|
|
if "properties" in component:
|
|
component["properties"] = [
|
|
p
|
|
for p in component.get("properties", [])
|
|
if not p.get("name", "").startswith("internal:")
|
|
]
|
|
logger.info(
|
|
"PUBLIC SBOM: Removed %d internal properties from public components",
|
|
original_properties_len
|
|
- sum(len(c.get("properties", [])) for c in sbom_dict["components"]),
|
|
)
|
|
|
|
|
|
def is_valid_purl(purl: str) -> bool:
|
|
"""Validate a GitHub or Generic PURL"""
|
|
for purl_type, regex in REGEX_PURL.items():
|
|
if regex.match(purl):
|
|
logger.debug(
|
|
"PURL: %s matched PURL type '%s' regex '%s'", purl, purl_type, regex.pattern
|
|
)
|
|
return True
|
|
return False
|
|
|
|
|
|
def read_sbom_json_file(file_path: str) -> dict:
|
|
"""Load a JSON SBOM file (schema is not validated)"""
|
|
try:
|
|
with open(file_path, "r", encoding="utf-8") as input_json:
|
|
sbom_json = input_json.read()
|
|
result = json.loads(sbom_json)
|
|
logger.info("SBOM loaded from %s with %d components", file_path, len(result["components"]))
|
|
return result
|
|
except OSError as e:
|
|
logger.error("Error loading SBOM file from %s", file_path)
|
|
logger.error(e)
|
|
except json.JSONDecodeError as e:
|
|
logger.error("Error decoding JSON SBOM file from %s", file_path)
|
|
logger.error(e)
|
|
|
|
|
|
def remove_sbom_component(sbom_dict: dict, component_key: str) -> None:
|
|
"""Remove a component from the SBOM by its bom-ref key"""
|
|
sbom_dict["components"] = [
|
|
c for c in sbom_dict["components"] if not c["bom-ref"].startswith(component_key)
|
|
]
|
|
sbom_dict["dependencies"] = [
|
|
d for d in sbom_dict["dependencies"] if not d["ref"].startswith(component_key)
|
|
]
|
|
for dependency in sbom_dict["dependencies"]:
|
|
dependency["dependsOn"] = [
|
|
d for d in dependency["dependsOn"] if not d.startswith(component_key)
|
|
]
|
|
logger.debug("Removed component '%s' from SBOM", component_key)
|
|
|
|
|
|
def set_component_version(
|
|
component: dict, version: str, purl_version: str = None, cpe_version: str = None
|
|
) -> None:
|
|
"""Update the appropriate version fields in a component from the metadata SBOM"""
|
|
if not purl_version:
|
|
purl_version = version
|
|
|
|
if not cpe_version:
|
|
cpe_version = version
|
|
|
|
component["bom-ref"] = component["bom-ref"].replace("{{VERSION}}", purl_version)
|
|
component["version"] = component["version"].replace("{{VERSION}}", version)
|
|
if component.get("purl"):
|
|
component["purl"] = component["purl"].replace(
|
|
"{{VERSION}}", urllib.parse.quote(purl_version)
|
|
)
|
|
if not is_valid_purl(component["purl"]):
|
|
logger.warning("PURL: Invalid PURL (%s)", component["purl"])
|
|
if component.get("cpe"):
|
|
component["cpe"] = component["cpe"].replace("{{VERSION}}", cpe_version)
|
|
|
|
|
|
def set_dependency_version(dependencies: list, meta_bom_ref: str, purl_version: str) -> None:
|
|
"""Update the appropriate dependency version fields from the metadata SBOM"""
|
|
r = 0
|
|
d = 0
|
|
for dependency in dependencies:
|
|
if "{{VERSION}}" in dependency["ref"] and dependency["ref"] == meta_bom_ref:
|
|
dependency["ref"] = dependency["ref"].replace("{{VERSION}}", purl_version)
|
|
r += 1
|
|
for i in range(len(dependency["dependsOn"])):
|
|
if dependency["dependsOn"][i] == meta_bom_ref:
|
|
dependency["dependsOn"][i] = dependency["dependsOn"][i].replace(
|
|
"{{VERSION}}", purl_version
|
|
)
|
|
d += 1
|
|
|
|
logger.debug(
|
|
"set_dependency_version: '%s' updated %d refs and %d dependsOn", meta_bom_ref, r, d
|
|
)
|
|
|
|
|
|
def add_component_dependsOn(dependencies: list, component_ref: str, depends_on_ref: str) -> None:
|
|
"""Add a dependsOn reference to a component in the SBOM dependencies"""
|
|
for dependency in dependencies:
|
|
if dependency["ref"] == component_ref:
|
|
if depends_on_ref not in dependency["dependsOn"]:
|
|
dependency["dependsOn"].append(depends_on_ref)
|
|
logger.debug(
|
|
"Added dependsOn reference '%s' to component '%s'",
|
|
depends_on_ref,
|
|
component_ref,
|
|
)
|
|
else:
|
|
logger.debug(
|
|
"Component '%s' already has dependsOn reference '%s'",
|
|
component_ref,
|
|
depends_on_ref,
|
|
)
|
|
return
|
|
# ref missing from .dependencies[]
|
|
dependencies.append({"ref": component_ref, "dependsOn": [depends_on_ref]})
|
|
logger.debug(
|
|
"Added new dependency ref for component '%s' with dependsOn reference '%s'",
|
|
component_ref,
|
|
depends_on_ref,
|
|
)
|
|
|
|
|
|
def sbom_components_to_dict(sbom: dict, with_version: bool = False) -> dict:
|
|
"""Create a dict of SBOM components with a version-less PURL as the key"""
|
|
components = sbom["components"]
|
|
if with_version:
|
|
components_dict = {
|
|
urllib.parse.unquote(component["bom-ref"]): component for component in components
|
|
}
|
|
else:
|
|
components_dict = {
|
|
urllib.parse.unquote(component["bom-ref"]).split("@")[0]: component
|
|
for component in components
|
|
}
|
|
return components_dict
|
|
|
|
|
|
def write_sbom_json_file(sbom_dict: dict, file_path: str) -> None:
|
|
"""Save a JSON SBOM file (schema is not validated)"""
|
|
try:
|
|
file_path = os.path.abspath(file_path)
|
|
with open(file_path, "w", encoding="utf-8") as output_json:
|
|
formatted_sbom = json.dumps(sbom_dict, indent=2)
|
|
output_json.write(formatted_sbom)
|
|
except OSError as e:
|
|
logger.error("Error writing SBOM file to %s", file_path)
|
|
logger.error(e)
|
|
except TypeError as e:
|
|
logger.error("Error serializing SBOM to JSON for file %s", file_path)
|
|
logger.error(e)
|
|
else:
|
|
logger.info("SBOM file saved to %s", file_path)
|