generated from coulomb/repo-seed
feat(sbom): scan mode, domain grouping dashboard, SBOM convention doc
- ingest_sbom.py: add --scan flag (recursive lockfile discovery) + --lockfile repeatable for explicit multi-file ingestion; skip .venv/node_modules/.git/dist/etc; Makefile gains SCAN= and REPO_PATH= vars - sbom.md: add /domains/ fetch; domain-level summary table; per-repo accordion with details/summary; domain filter on package table; dual- licence false-positive note; +1 KPI card (Domains Covered) - canon/standards/sbom-convention_v0.1.md: authoritative lockfile table, ingest workflow (single/scan/explicit), snapshot semantics, direct-vs- transitive caveats, licence governance + copyleft escalation, update cadence, multi-repo domain pattern, planned enhancements First ingest: the-custodian — 420 pkgs (88 python + 332 node), 13 licence groups, 1 copyleft flag (jszip dual-licensed MIT OR GPL-3.0-or-later) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -188,9 +188,19 @@ _LOCKFILE_PARSERS = {
|
||||
"Cargo.lock": _parse_cargo_lock,
|
||||
}
|
||||
|
||||
# Directories that never contain project-level lockfiles
|
||||
_SKIP_DIRS = {
|
||||
".git", ".hg", ".svn",
|
||||
".venv", "venv", ".env",
|
||||
"node_modules",
|
||||
"__pycache__", ".mypy_cache", ".pytest_cache", ".ruff_cache",
|
||||
"dist", "build", ".build", "target",
|
||||
".tox", ".nox",
|
||||
}
|
||||
|
||||
|
||||
def detect_lockfile(repo_path: Path) -> tuple[Path, str] | None:
|
||||
"""Return (lockfile_path, ecosystem) for the first recognised lockfile found."""
|
||||
"""Return (lockfile_path, filename) for the first recognised lockfile at repo root."""
|
||||
for name in _LOCKFILE_PARSERS:
|
||||
candidate = repo_path / name
|
||||
if candidate.exists():
|
||||
@@ -198,6 +208,17 @@ def detect_lockfile(repo_path: Path) -> tuple[Path, str] | None:
|
||||
return None
|
||||
|
||||
|
||||
def detect_lockfiles_recursive(repo_path: Path) -> list[Path]:
|
||||
"""Walk repo_path and return all recognised lockfiles, skipping non-dep dirs."""
|
||||
found: list[Path] = []
|
||||
for dirpath, dirnames, filenames in os.walk(repo_path):
|
||||
dirnames[:] = sorted(d for d in dirnames if d not in _SKIP_DIRS)
|
||||
for name in _LOCKFILE_PARSERS:
|
||||
if name in filenames:
|
||||
found.append(Path(dirpath) / name)
|
||||
return found
|
||||
|
||||
|
||||
def parse_lockfile(lockfile_path: Path) -> list[dict]:
|
||||
filename = lockfile_path.name
|
||||
parser = _LOCKFILE_PARSERS.get(filename)
|
||||
@@ -236,38 +257,60 @@ def post_ingest(api_base: str, repo_slug: str, entries: list[dict]) -> dict:
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="Ingest a lockfile into the State Hub SBOM store.")
|
||||
parser = argparse.ArgumentParser(description="Ingest a repo's lockfiles into the State Hub SBOM store.")
|
||||
parser.add_argument("--repo", required=True, help="Managed-repo slug (e.g. 'the-custodian')")
|
||||
parser.add_argument("--lockfile", help="Path to lockfile (auto-detected if omitted)")
|
||||
parser.add_argument("--repo-path", default=".", help="Repo root for auto-detection (default: cwd)")
|
||||
parser.add_argument("--lockfile", action="append", dest="lockfiles",
|
||||
metavar="PATH", help="Path to a specific lockfile (repeatable)")
|
||||
parser.add_argument("--repo-path", default=".", help="Repo root for auto-detection/scan (default: cwd)")
|
||||
parser.add_argument("--scan", action="store_true",
|
||||
help="Recursively find ALL lockfiles under --repo-path (handles multi-ecosystem repos)")
|
||||
parser.add_argument("--api-base", default=API_BASE, help="State Hub API base URL")
|
||||
parser.add_argument("--dry-run", action="store_true", help="Parse only — do not submit")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.lockfile:
|
||||
lockfile_path = Path(args.lockfile).resolve()
|
||||
repo_root = Path(args.repo_path).resolve()
|
||||
lockfile_paths: list[Path] = []
|
||||
|
||||
if args.lockfiles:
|
||||
lockfile_paths = [Path(lf).resolve() for lf in args.lockfiles]
|
||||
elif args.scan:
|
||||
lockfile_paths = detect_lockfiles_recursive(repo_root)
|
||||
if not lockfile_paths:
|
||||
print(f"No lockfiles found under '{repo_root}'.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
print(f"Scan found {len(lockfile_paths)} lockfile(s):")
|
||||
for lf in lockfile_paths:
|
||||
print(f" {lf.relative_to(repo_root) if lf.is_relative_to(repo_root) else lf}")
|
||||
else:
|
||||
found = detect_lockfile(Path(args.repo_path).resolve())
|
||||
found = detect_lockfile(repo_root)
|
||||
if not found:
|
||||
print(
|
||||
f"No recognised lockfile found in '{args.repo_path}'. "
|
||||
"Supported: " + ", ".join(_LOCKFILE_PARSERS),
|
||||
f"No recognised lockfile found in '{repo_root}'. "
|
||||
f"Supported: {', '.join(_LOCKFILE_PARSERS)}. "
|
||||
"Use --scan to search subdirectories.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
lockfile_path, _ = found
|
||||
print(f"Auto-detected: {lockfile_path}")
|
||||
lockfile_paths = [lockfile_path]
|
||||
|
||||
entries = parse_lockfile(lockfile_path)
|
||||
print(f"Parsed {len(entries)} packages from {lockfile_path.name}")
|
||||
all_entries: list[dict] = []
|
||||
for lf in lockfile_paths:
|
||||
parsed = parse_lockfile(lf)
|
||||
rel = lf.relative_to(repo_root) if lf.is_relative_to(repo_root) else lf
|
||||
print(f" {rel}: {len(parsed)} packages")
|
||||
all_entries.extend(parsed)
|
||||
|
||||
print(f"Total: {len(all_entries)} packages across {len(lockfile_paths)} lockfile(s)")
|
||||
|
||||
if args.dry_run:
|
||||
print(json.dumps(entries[:5], indent=2))
|
||||
if len(entries) > 5:
|
||||
print(f" … and {len(entries) - 5} more")
|
||||
print(json.dumps(all_entries[:5], indent=2))
|
||||
if len(all_entries) > 5:
|
||||
print(f" … and {len(all_entries) - 5} more")
|
||||
return
|
||||
|
||||
result = post_ingest(args.api_base, args.repo, entries)
|
||||
result = post_ingest(args.api_base, args.repo, all_entries)
|
||||
print(f"Ingested {result.get('ingested', '?')} entries for repo '{args.repo}'")
|
||||
print(f"Snapshot at: {result.get('snapshot_at', '?')}")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user