From bd1b01fdc0176a9e247df5d82928451f3cce7877 Mon Sep 17 00:00:00 2001 From: tegwick Date: Thu, 19 Mar 2026 01:04:34 +0100 Subject: [PATCH] feat(sbom): add go.sum parser to ingest_sbom.py Parses go.sum lockfiles for Go projects. Reads go.mod alongside to mark direct vs indirect dependencies. Deduplicates by (module, version), skipping go.mod hash lines. Used to ingest key-cape (netkingdom domain): 23 Go modules. Co-Authored-By: Claude Sonnet 4.6 --- scripts/ingest_sbom.py | 58 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/scripts/ingest_sbom.py b/scripts/ingest_sbom.py index 6696234..738b5e1 100644 --- a/scripts/ingest_sbom.py +++ b/scripts/ingest_sbom.py @@ -10,6 +10,7 @@ Auto-detects all of the following in one scan: package-lock.json → node yarn.lock → node Cargo.lock → rust + go.sum → go (reads go.mod alongside for direct/indirect) .terraform.lock.hcl → terraform (anywhere in tree) ansible/requirements.yml → ansible (anywhere under ansible/ dirs) ansible/requirements.yaml → ansible @@ -275,6 +276,62 @@ def _parse_ansible_requirements(path: Path) -> list[dict]: return entries +def _parse_go_sum(path: Path) -> list[dict]: + """Parse go.sum — deduplicated Go module list with direct/indirect from go.mod.""" + # Determine direct deps by reading go.mod in the same directory + direct: set[str] = set() + go_mod = path.parent / "go.mod" + if go_mod.exists(): + in_require = False + for line in go_mod.read_text().splitlines(): + stripped = line.strip() + if stripped.startswith("require ("): + in_require = True + continue + if in_require and stripped == ")": + in_require = False + continue + if in_require and stripped and not stripped.startswith("//"): + if "// indirect" not in stripped: + parts = stripped.split() + if parts: + direct.add(parts[0]) + # single-line require without parens + elif stripped.startswith("require ") and "(" not in stripped: + rest = stripped[len("require "):].strip() + if "// indirect" not in rest: + parts = rest.split() + if parts: + direct.add(parts[0]) + + seen: set[tuple[str, str | None]] = set() + entries = [] + for line in path.read_text().splitlines(): + line = line.strip() + if not line or line.startswith("//"): + continue + parts = line.split() + if len(parts) < 3: + continue + module, version = parts[0], parts[1] + # Skip go.mod hash lines — only ingest the module itself + if "/go.mod" in version: + continue + key = (module, version) + if key in seen: + continue + seen.add(key) + entries.append({ + "package_name": module, + "package_version": version, + "ecosystem": "go", + "license_spdx": None, + "is_direct": module in direct, + "is_dev": False, + }) + return entries + + def _parse_sbom_tools_yaml(path: Path) -> list[dict]: """Parse sbom-tools.yaml — agent-generated tool manifest at repo root.""" if not _YAML_AVAILABLE: @@ -333,6 +390,7 @@ _LOCKFILE_PARSERS: dict[str, object] = { "yarn.lock": _parse_yarn_lock, "Cargo.lock": _parse_cargo_lock, ".terraform.lock.hcl": _parse_terraform_lock_hcl, + "go.sum": _parse_go_sum, } # Directories that never contain project-level lockfiles