feat(sbom): scan mode, domain grouping dashboard, SBOM convention doc

- ingest_sbom.py: add --scan flag (recursive lockfile discovery) +
  --lockfile repeatable for explicit multi-file ingestion; skip
  .venv/node_modules/.git/dist/etc; Makefile gains SCAN= and REPO_PATH= vars
- sbom.md: add /domains/ fetch; domain-level summary table; per-repo
  accordion with details/summary; domain filter on package table; dual-
  licence false-positive note; +1 KPI card (Domains Covered)
- canon/standards/sbom-convention_v0.1.md: authoritative lockfile table,
  ingest workflow (single/scan/explicit), snapshot semantics, direct-vs-
  transitive caveats, licence governance + copyleft escalation, update
  cadence, multi-repo domain pattern, planned enhancements

First ingest: the-custodian — 420 pkgs (88 python + 332 node), 13 licence
groups, 1 copyleft flag (jszip dual-licensed MIT OR GPL-3.0-or-later)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-01 16:15:40 +01:00
parent 7d3487d4fe
commit 4c157d43a8
3 changed files with 197 additions and 29 deletions

View File

@@ -188,9 +188,19 @@ _LOCKFILE_PARSERS = {
"Cargo.lock": _parse_cargo_lock,
}
# Directories that never contain project-level lockfiles
_SKIP_DIRS = {
".git", ".hg", ".svn",
".venv", "venv", ".env",
"node_modules",
"__pycache__", ".mypy_cache", ".pytest_cache", ".ruff_cache",
"dist", "build", ".build", "target",
".tox", ".nox",
}
def detect_lockfile(repo_path: Path) -> tuple[Path, str] | None:
"""Return (lockfile_path, ecosystem) for the first recognised lockfile found."""
"""Return (lockfile_path, filename) for the first recognised lockfile at repo root."""
for name in _LOCKFILE_PARSERS:
candidate = repo_path / name
if candidate.exists():
@@ -198,6 +208,17 @@ def detect_lockfile(repo_path: Path) -> tuple[Path, str] | None:
return None
def detect_lockfiles_recursive(repo_path: Path) -> list[Path]:
"""Walk repo_path and return all recognised lockfiles, skipping non-dep dirs."""
found: list[Path] = []
for dirpath, dirnames, filenames in os.walk(repo_path):
dirnames[:] = sorted(d for d in dirnames if d not in _SKIP_DIRS)
for name in _LOCKFILE_PARSERS:
if name in filenames:
found.append(Path(dirpath) / name)
return found
def parse_lockfile(lockfile_path: Path) -> list[dict]:
filename = lockfile_path.name
parser = _LOCKFILE_PARSERS.get(filename)
@@ -236,38 +257,60 @@ def post_ingest(api_base: str, repo_slug: str, entries: list[dict]) -> dict:
# ---------------------------------------------------------------------------
def main() -> None:
parser = argparse.ArgumentParser(description="Ingest a lockfile into the State Hub SBOM store.")
parser = argparse.ArgumentParser(description="Ingest a repo's lockfiles into the State Hub SBOM store.")
parser.add_argument("--repo", required=True, help="Managed-repo slug (e.g. 'the-custodian')")
parser.add_argument("--lockfile", help="Path to lockfile (auto-detected if omitted)")
parser.add_argument("--repo-path", default=".", help="Repo root for auto-detection (default: cwd)")
parser.add_argument("--lockfile", action="append", dest="lockfiles",
metavar="PATH", help="Path to a specific lockfile (repeatable)")
parser.add_argument("--repo-path", default=".", help="Repo root for auto-detection/scan (default: cwd)")
parser.add_argument("--scan", action="store_true",
help="Recursively find ALL lockfiles under --repo-path (handles multi-ecosystem repos)")
parser.add_argument("--api-base", default=API_BASE, help="State Hub API base URL")
parser.add_argument("--dry-run", action="store_true", help="Parse only — do not submit")
args = parser.parse_args()
if args.lockfile:
lockfile_path = Path(args.lockfile).resolve()
repo_root = Path(args.repo_path).resolve()
lockfile_paths: list[Path] = []
if args.lockfiles:
lockfile_paths = [Path(lf).resolve() for lf in args.lockfiles]
elif args.scan:
lockfile_paths = detect_lockfiles_recursive(repo_root)
if not lockfile_paths:
print(f"No lockfiles found under '{repo_root}'.", file=sys.stderr)
sys.exit(1)
print(f"Scan found {len(lockfile_paths)} lockfile(s):")
for lf in lockfile_paths:
print(f" {lf.relative_to(repo_root) if lf.is_relative_to(repo_root) else lf}")
else:
found = detect_lockfile(Path(args.repo_path).resolve())
found = detect_lockfile(repo_root)
if not found:
print(
f"No recognised lockfile found in '{args.repo_path}'. "
"Supported: " + ", ".join(_LOCKFILE_PARSERS),
f"No recognised lockfile found in '{repo_root}'. "
f"Supported: {', '.join(_LOCKFILE_PARSERS)}. "
"Use --scan to search subdirectories.",
file=sys.stderr,
)
sys.exit(1)
lockfile_path, _ = found
print(f"Auto-detected: {lockfile_path}")
lockfile_paths = [lockfile_path]
entries = parse_lockfile(lockfile_path)
print(f"Parsed {len(entries)} packages from {lockfile_path.name}")
all_entries: list[dict] = []
for lf in lockfile_paths:
parsed = parse_lockfile(lf)
rel = lf.relative_to(repo_root) if lf.is_relative_to(repo_root) else lf
print(f" {rel}: {len(parsed)} packages")
all_entries.extend(parsed)
print(f"Total: {len(all_entries)} packages across {len(lockfile_paths)} lockfile(s)")
if args.dry_run:
print(json.dumps(entries[:5], indent=2))
if len(entries) > 5:
print(f" … and {len(entries) - 5} more")
print(json.dumps(all_entries[:5], indent=2))
if len(all_entries) > 5:
print(f" … and {len(all_entries) - 5} more")
return
result = post_ingest(args.api_base, args.repo, entries)
result = post_ingest(args.api_base, args.repo, all_entries)
print(f"Ingested {result.get('ingested', '?')} entries for repo '{args.repo}'")
print(f"Snapshot at: {result.get('snapshot_at', '?')}")