From 1c3c6ef27d4819c0dc7ba57b6ebdf8b2eecc2d93 Mon Sep 17 00:00:00 2001 From: tegwick Date: Sun, 1 Mar 2026 16:15:40 +0100 Subject: [PATCH] feat(sbom): scan mode, domain grouping dashboard, SBOM convention doc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - ingest_sbom.py: add --scan flag (recursive lockfile discovery) + --lockfile repeatable for explicit multi-file ingestion; skip .venv/node_modules/.git/dist/etc; Makefile gains SCAN= and REPO_PATH= vars - sbom.md: add /domains/ fetch; domain-level summary table; per-repo accordion with details/summary; domain filter on package table; dual- licence false-positive note; +1 KPI card (Domains Covered) - canon/standards/sbom-convention_v0.1.md: authoritative lockfile table, ingest workflow (single/scan/explicit), snapshot semantics, direct-vs- transitive caveats, licence governance + copyleft escalation, update cadence, multi-repo domain pattern, planned enhancements First ingest: the-custodian — 420 pkgs (88 python + 332 node), 13 licence groups, 1 copyleft flag (jszip dual-licensed MIT OR GPL-3.0-or-later) Co-Authored-By: Claude Sonnet 4.6 --- canon/standards/sbom-convention_v0.1.md | 253 ++++++++++++++++++++++++ state-hub/Makefile | 12 +- state-hub/dashboard/src/sbom.md | 141 +++++++++++-- state-hub/scripts/ingest_sbom.py | 73 +++++-- 4 files changed, 450 insertions(+), 29 deletions(-) create mode 100644 canon/standards/sbom-convention_v0.1.md diff --git a/canon/standards/sbom-convention_v0.1.md b/canon/standards/sbom-convention_v0.1.md new file mode 100644 index 0000000..5b82cc1 --- /dev/null +++ b/canon/standards/sbom-convention_v0.1.md @@ -0,0 +1,253 @@ +--- +id: SBOM-CONV-001 +type: standard +title: "SBOM Convention v0.1 — Dependency Tracking & Licence Governance" +domain: custodian +status: active +version: "0.1" +created: "2026-03-01" +updated: "2026-03-01" +--- + +# SBOM Convention v0.1 — Dependency Tracking & Licence Governance + +## Purpose + +This convention defines how every Custodian-registered project captures, +stores, and reports its software supply-chain inventory to the State Hub SBOM +store. It establishes: + +- Which lockfiles are authoritative per ecosystem +- How to run SBOM ingestion (single-ecosystem and multi-ecosystem repos) +- How to keep the data current +- Licence governance rules and escalation thresholds + +The State Hub SBOM store aggregates across all registered repos. The +dashboard (`/sbom`) provides domain-level and repo-level drill-down. + +--- + +## 1. Authoritative Lockfiles per Ecosystem + +| Ecosystem | Authoritative file | Notes | +|-----------|-------------------|-------| +| Python | `uv.lock` | Preferred. `requirements.txt` accepted as fallback | +| Node / npm | `package-lock.json` | Preferred. `yarn.lock` accepted | +| Rust | `Cargo.lock` | Auto-detected | +| Go | `go.sum` | *Not yet parsed — planned* | +| Java / JVM | `gradle.lockfile` / `pom.xml` | *Not yet parsed — planned* | + +**Principle:** commit lockfiles to the repo. Lockfiles are the SBOM source +of truth; do not generate them at ingest time. + +--- + +## 2. Repo Registration Prerequisite + +Before SBOM data can be reported, the repo must be registered in the State Hub: + +```bash +cd ~/the-custodian/state-hub +make add-repo DOMAIN= SLUG= NAME="" PATH=/absolute/path/to/repo +``` + +Check registered repos: +```bash +make list-repos +# or +curl -s http://127.0.0.1:8000/repos/ | python3 -m json.tool +``` + +--- + +## 3. SBOM Ingestion + +### 3.1 Standard ingest (single lockfile at repo root) + +```bash +cd ~/the-custodian/state-hub +make ingest-sbom REPO= REPO_PATH=/path/to/repo +``` + +The script auto-detects the first recognised lockfile at `REPO_PATH`. + +### 3.2 Multi-ecosystem repos (recommended for complex repos) + +Use `SCAN=1` to walk the repo tree and combine **all** lockfiles into a single +snapshot. Non-dep directories (`.venv`, `node_modules`, `.git`, `dist`, etc.) +are automatically skipped. + +```bash +make ingest-sbom REPO=the-custodian SCAN=1 REPO_PATH=/home/worsch/the-custodian +``` + +This is the correct approach for repos that contain both a backend and a +frontend (e.g., a Python API + Node/Observable dashboard). + +### 3.3 Explicit lockfile path + +```bash +make ingest-sbom REPO= LOCKFILE=/path/to/specific/uv.lock +``` + +Multiple lockfiles can be passed by calling the script directly with repeated +`--lockfile` flags: + +```bash +cd ~/the-custodian/state-hub +.venv/bin/python scripts/ingest_sbom.py \ + --repo \ + --lockfile /path/to/uv.lock \ + --lockfile /path/to/package-lock.json +``` + +### 3.4 Dry run (inspect without submitting) + +```bash +make ingest-sbom REPO= SCAN=1 REPO_PATH=/path/to/repo +# append: add --dry-run to the command, or run the script directly: +.venv/bin/python scripts/ingest_sbom.py --repo --scan --repo-path /path/to/repo --dry-run +``` + +--- + +## 4. Snapshot Semantics + +Each `POST /sbom/ingest/` call **replaces** the entire previous snapshot for +that repo. This means: + +- There is always exactly one snapshot per repo (the most recent ingest) +- Re-running ingest after a dependency update is idempotent — it simply + refreshes the data +- Historical snapshots are **not** retained (v0.1 scope; versioned history is + a planned extension) + +The `last_sbom_at` timestamp on the managed_repo record indicates when the +last ingest ran. + +--- + +## 5. Direct vs Transitive Dependencies + +| Source | `is_direct` | Notes | +|--------|-------------|-------| +| `package-lock.json` | Accurate — npm `indirect` flag used | Dev packages also detected via `dev` flag | +| `yarn.lock` | `false` for all (yarn.lock doesn't distinguish) | Treat output as transitive | +| `uv.lock` | `false` for all (uv.lock doesn't distinguish direct from transitive) | | +| `requirements.txt` | `true` for all (every line is a direct dep) | | +| `Cargo.lock` | `false` for all (workspace member packages not yet distinguished) | | + +**Governance implication:** `is_direct=true` entries receive stricter licence +scrutiny. Copyleft risk is reported specifically for `is_direct=true AND is_dev=false`. + +--- + +## 6. Licence Governance + +### 6.1 Copyleft detection + +The following SPDX identifier substrings trigger a copyleft flag: +`GPL`, `AGPL`, `LGPL`, `EUPL`, `CDDL`, `MPL` + +A copyleft flag on a **direct prod dependency** (`is_direct=true`, `is_dev=false`) +increments the `licence_risk_count` in the State Hub summary and triggers a +warning on the SBOM dashboard. + +### 6.2 Dual-licensed packages + +Packages with SPDX expressions like `(MIT OR GPL-3.0-or-later)` are flagged +**conservatively** — the presence of a copyleft identifier in the SPDX string +is sufficient to trigger the flag, regardless of the OR clause. + +**Action required:** review flagged packages. If the non-copyleft licence is +used in practice, document this decision in a `contrib/` BR or FR artifact and +note it in the repo's CLAUDE.md. + +### 6.3 Unknown licences + +Packages with `license_spdx = null` are those whose lockfile did not contain +licence metadata (`uv.lock`, `yarn.lock`, `Cargo.lock` do not embed licence +info). These are listed in the dashboard but do not trigger risk flags. + +To resolve unknowns, consult the package's registry page (PyPI, npm, crates.io) +and either accept the unknown status or enhance the ingest script. + +### 6.4 Escalation + +Per the Custodian Constitution, a copyleft direct prod dep **must be reviewed** +before the next production deployment. Record the decision via: + +``` +register_contribution(type="br", title="Licence review: ", ...) +``` + +or directly in `contrib/bug-reports/` using the BR template. + +--- + +## 7. Keeping Data Current + +### 7.1 When to re-run ingest + +Re-run `make ingest-sbom` after any of the following: +- `uv add` / `uv remove` (Python) +- `npm install` / `npm update` (Node) +- `cargo add` / `cargo update` (Rust) +- Any lockfile regeneration + +### 7.2 Recommended workflow integration + +Add to your repo's CLAUDE.md (or developer runbook): + +> After updating dependencies, run: +> ```bash +> cd ~/the-custodian/state-hub +> make ingest-sbom REPO= SCAN=1 REPO_PATH= +> ``` + +### 7.3 Verification + +After ingest: +```bash +curl -s http://127.0.0.1:8000/sbom// | python3 -m json.tool | head -30 +curl -s http://127.0.0.1:8000/sbom/report/licences/ | python3 -m json.tool +``` + +Or visit the State Hub dashboard → SBOM → By Repo to see the updated snapshot. + +--- + +## 8. Multi-Repo Domains + +When a domain has multiple repos (e.g., `api` + `frontend` + `infra`), each +repo should be registered separately and ingested separately: + +```bash +make ingest-sbom REPO=myapp-api SCAN=1 REPO_PATH=/home/worsch/myapp +make ingest-sbom REPO=myapp-frontend SCAN=1 REPO_PATH=/home/worsch/myapp-frontend +``` + +The SBOM dashboard aggregates across all repos within a domain in the +**By Domain** table. + +--- + +## 9. Current Registered Repos & Status + +| Repo | Domain | Ecosystems | Last Ingest | +|------|--------|------------|-------------| +| `the-custodian` | custodian | python, node | 2026-03-01 | + +*(This table is informational. The live view is at the SBOM dashboard.)* + +--- + +## 10. Planned Enhancements + +- **Go / Java parsers** — add to `ingest_sbom.py` +- **Versioned snapshots** — retain history per repo for trend analysis +- **Licence override file** — allow repos to document known-acceptable + copyleft exceptions (`.sbom-overrides.yaml`) +- **CI integration** — GitHub Actions step to run ingest on lockfile change +- **Direct-dep detection for uv.lock** — parse `pyproject.toml` `[project.dependencies]` + to mark direct deps accurately diff --git a/state-hub/Makefile b/state-hub/Makefile index 1bf0772..a378dc4 100644 --- a/state-hub/Makefile +++ b/state-hub/Makefile @@ -73,10 +73,16 @@ list-repos: @test -n "$(DOMAIN)" || (echo "ERROR: DOMAIN is required."; exit 1) curl -sf "http://127.0.0.1:8000/repos/?domain=$(DOMAIN)" | python3 -m json.tool -## Ingest a repo's lockfile into the SBOM store: make ingest-sbom REPO=the-custodian [LOCKFILE=uv.lock] +## Ingest SBOM data for a repo. +## Single lockfile (explicit): make ingest-sbom REPO=the-custodian LOCKFILE=/path/to/uv.lock +## Scan all lockfiles in tree: make ingest-sbom REPO=the-custodian SCAN=1 REPO_PATH=/home/worsch/the-custodian +## Auto-detect at repo root: make ingest-sbom REPO=the-custodian REPO_PATH=/home/worsch/the-custodian ingest-sbom: - @test -n "$(REPO)" || (echo "ERROR: REPO is required. Usage: make ingest-sbom REPO= [LOCKFILE=]"; exit 1) - uv run python scripts/ingest_sbom.py --repo "$(REPO)" $(if $(LOCKFILE),--lockfile "$(LOCKFILE)",) + @test -n "$(REPO)" || (echo "ERROR: REPO is required."; exit 1) + uv run python scripts/ingest_sbom.py --repo "$(REPO)" \ + $(if $(LOCKFILE),--lockfile "$(LOCKFILE)") \ + $(if $(SCAN),--scan) \ + $(if $(REPO_PATH),--repo-path "$(REPO_PATH)") ## Check a repo for ADR-001 compliance: make validate-adr REPO=/path/to/repo [DOMAIN=custodian] validate-adr: diff --git a/state-hub/dashboard/src/sbom.md b/state-hub/dashboard/src/sbom.md index 76abff6..fc6b024 100644 --- a/state-hub/dashboard/src/sbom.md +++ b/state-hub/dashboard/src/sbom.md @@ -8,12 +8,13 @@ const API = "http://127.0.0.1:8000"; ```js // Fetch SBOM data on load -let _entries = [], _report = {groups: [], copyleft_direct_count: 0}, _repos = []; +let _entries = [], _report = {groups: [], copyleft_direct_count: 0}, _repos = [], _domains = []; try { - [_entries, _report, _repos] = await Promise.all([ + [_entries, _report, _repos, _domains] = await Promise.all([ fetch(`${API}/sbom/`).then(r => r.ok ? r.json() : []), fetch(`${API}/sbom/report/licences/`).then(r => r.ok ? r.json() : {groups:[], copyleft_direct_count: 0}), fetch(`${API}/repos/`).then(r => r.ok ? r.json() : []), + fetch(`${API}/domains/`).then(r => r.ok ? r.json() : []), ]); } catch {} ``` @@ -22,13 +23,24 @@ try { const entries = _entries ?? []; const report = _report ?? {groups: [], copyleft_direct_count: 0}; const repos = _repos ?? []; +const domains = _domains ?? []; const groups = report.groups ?? []; const riskCount = report.copyleft_direct_count ?? 0; + +// Domain + repo lookups +const domainById = Object.fromEntries(domains.map(d => [d.id, d])); +const repoById = Object.fromEntries(repos.map(r => [r.id, r])); +const repoDomain = Object.fromEntries(repos.map(r => [r.id, domainById[r.domain_id]?.slug ?? "—"])); +const domainSlugs = [...new Set(repos.map(r => repoDomain[r.id]).filter(s => s !== "—"))].sort(); + +// Copyleft detector (mirrors server-side logic) +const COPYLEFT_KW = ["GPL", "AGPL", "LGPL", "EUPL", "CDDL", "MPL"]; +const isCopyleft = spdx => spdx && COPYLEFT_KW.some(k => spdx.toUpperCase().includes(k)); ``` # SBOM -## Licence Risk +## Overview ```js const riskBadge = riskCount === 0 @@ -43,6 +55,10 @@ display(html`

Repos Scanned

${new Set(entries.map(e => e.repo_id)).size}

+
+

Domains Covered

+

${domainSlugs.length || new Set(Object.values(repoDomain).filter(s => s !== "—")).size}

+

Licence Risk

${riskCount}

@@ -55,13 +71,50 @@ display(html`
`); ``` +## By Domain + +```js +if (entries.length === 0) { + display(html`

No SBOM data ingested yet. Run make ingest-sbom REPO=<slug> SCAN=1 REPO_PATH=<path>.

`); +} else { + // Group entries by domain + const byDomain = {}; + for (const e of entries) { + const slug = repoDomain[e.repo_id] ?? "—"; + (byDomain[slug] = byDomain[slug] ?? []).push(e); + } + + const domainTableRows = Object.entries(byDomain).map(([slug, es]) => { + const dom = domains.find(d => d.slug === slug); + const repoCount = new Set(es.map(e => e.repo_id)).size; + const directProd = es.filter(e => e.is_direct && !e.is_dev); + const copyleftRisk = directProd.filter(e => isCopyleft(e.license_spdx)).length; + const ecosystems = [...new Set(es.map(e => e.ecosystem))].sort().join(", "); + return { + domain: dom?.name ?? slug, + repos: repoCount, + packages: es.length, + direct: directProd.length, + copyleft: copyleftRisk, + ecosystems, + }; + }).sort((a, b) => a.domain.localeCompare(b.domain)); + + display(Inputs.table(domainTableRows, { + columns: ["domain", "repos", "packages", "direct", "copyleft", "ecosystems"], + header: {domain: "Domain", repos: "Repos", packages: "All Pkgs", direct: "Direct Prod", copyleft: "Copyleft ⚠", ecosystems: "Ecosystems"}, + maxWidth: 900, + })); +} +``` + ## Licence Distribution ```js import * as Plot from "npm:@observablehq/plot"; if (groups.length === 0) { - display(html`

No SBOM data ingested yet. Run make ingest-sbom REPO=<slug>.

`); + display(html`

No SBOM data ingested yet.

`); } else { const plotData = groups.slice(0, 15).map(g => ({ licence: g.license_spdx ?? "(unknown)", @@ -98,6 +151,57 @@ if (copyleftGroups.length === 0) { ${g.repos.join(", ")}
`)} + +

Note: dual-licensed packages (e.g. "MIT OR GPL-3.0") are flagged conservatively. Review if the non-copyleft variant is used.

`); +} +``` + +## By Repo + +```js +// Group entries by repo, sorted by domain then repo name +const byRepo = {}; +for (const e of entries) { + (byRepo[e.repo_id] = byRepo[e.repo_id] ?? []).push(e); +} + +const repoSections = Object.entries(byRepo) + .map(([repoId, es]) => { + const repo = repoById[repoId]; + const domSlug = repoDomain[repoId] ?? "—"; + const dom = domains.find(d => d.slug === domSlug); + const directProd = es.filter(e => e.is_direct && !e.is_dev); + const copyleftRisk = directProd.filter(e => isCopyleft(e.license_spdx)).length; + const ecosystems = [...new Set(es.map(e => e.ecosystem))].sort(); + return { repoId, repo, dom, domSlug, es, directProd, copyleftRisk, ecosystems }; + }) + .sort((a, b) => (a.domSlug + a.repo?.slug).localeCompare(b.domSlug + b.repo?.slug)); + +if (repoSections.length === 0) { + display(html`

No repo data.

`); +} else { + display(html`
+ ${repoSections.map(({repoId, repo, dom, domSlug, es, directProd, copyleftRisk, ecosystems}) => html` +
+ + ${dom?.name ?? domSlug} + ${repo?.slug ?? repoId.slice(0,8)} + ${es.length} pkgs · ${ecosystems.join(" + ")} · ${directProd.length} direct + ${copyleftRisk > 0 ? html`⚠ ${copyleftRisk} copyleft` : ""} + +
+ ${Inputs.table(es.slice(0, 200).map(e => ({ + Package: e.package_name, + Version: e.package_version ?? "—", + Ecosystem: e.ecosystem, + Licence: e.license_spdx ?? "—", + Direct: e.is_direct ? "✓" : "", + Dev: e.is_dev ? "✓" : "", + })), {maxWidth: 860})} + ${es.length > 200 ? html`

Showing first 200 of ${es.length}

` : ""} +
+
+ `)}
`); } ``` @@ -106,19 +210,19 @@ if (copyleftGroups.length === 0) { ```js // Filters +const domainOpts = ["all", ...domainSlugs]; +const domainFilter = Inputs.select(domainOpts, {label: "Domain", value: "all"}); const ecoFilter = Inputs.select(["all", "python", "node", "rust", "go", "java", "other"], {label: "Ecosystem", value: "all"}); const directOnly = Inputs.toggle({label: "Direct deps only", value: false}); const prodOnly = Inputs.toggle({label: "Prod deps only (no dev)", value: false}); display(html`
- ${ecoFilter}${directOnly}${prodOnly} + ${domainFilter}${ecoFilter}${directOnly}${prodOnly}
`); ``` ```js -// Build repo_id → slug lookup -const repoById = Object.fromEntries(_repos.map(r => [r.id, r.slug])); - const filteredEntries = entries.filter(e => + (domainFilter.value === "all" || repoDomain[e.repo_id] === domainFilter.value) && (ecoFilter.value === "all" || e.ecosystem === ecoFilter.value) && (!directOnly.value || e.is_direct) && (!prodOnly.value || !e.is_dev) @@ -129,22 +233,37 @@ display(Inputs.table(filteredEntries.map(e => ({ Version: e.package_version ?? "—", Ecosystem: e.ecosystem, Licence: e.license_spdx ?? "—", - Repo: repoById[e.repo_id] ?? e.repo_id?.slice(0, 8) ?? "—", + Domain: repoDomain[e.repo_id] ?? "—", + Repo: repoById[e.repo_id]?.slug ?? e.repo_id?.slice(0, 8) ?? "—", Direct: e.is_direct ? "✓" : "", Dev: e.is_dev ? "✓" : "", -})), {maxWidth: 900})); +})), {maxWidth: 960})); ``` diff --git a/state-hub/scripts/ingest_sbom.py b/state-hub/scripts/ingest_sbom.py index c21b7d8..59ce949 100644 --- a/state-hub/scripts/ingest_sbom.py +++ b/state-hub/scripts/ingest_sbom.py @@ -188,9 +188,19 @@ _LOCKFILE_PARSERS = { "Cargo.lock": _parse_cargo_lock, } +# Directories that never contain project-level lockfiles +_SKIP_DIRS = { + ".git", ".hg", ".svn", + ".venv", "venv", ".env", + "node_modules", + "__pycache__", ".mypy_cache", ".pytest_cache", ".ruff_cache", + "dist", "build", ".build", "target", + ".tox", ".nox", +} + def detect_lockfile(repo_path: Path) -> tuple[Path, str] | None: - """Return (lockfile_path, ecosystem) for the first recognised lockfile found.""" + """Return (lockfile_path, filename) for the first recognised lockfile at repo root.""" for name in _LOCKFILE_PARSERS: candidate = repo_path / name if candidate.exists(): @@ -198,6 +208,17 @@ def detect_lockfile(repo_path: Path) -> tuple[Path, str] | None: return None +def detect_lockfiles_recursive(repo_path: Path) -> list[Path]: + """Walk repo_path and return all recognised lockfiles, skipping non-dep dirs.""" + found: list[Path] = [] + for dirpath, dirnames, filenames in os.walk(repo_path): + dirnames[:] = sorted(d for d in dirnames if d not in _SKIP_DIRS) + for name in _LOCKFILE_PARSERS: + if name in filenames: + found.append(Path(dirpath) / name) + return found + + def parse_lockfile(lockfile_path: Path) -> list[dict]: filename = lockfile_path.name parser = _LOCKFILE_PARSERS.get(filename) @@ -236,38 +257,60 @@ def post_ingest(api_base: str, repo_slug: str, entries: list[dict]) -> dict: # --------------------------------------------------------------------------- def main() -> None: - parser = argparse.ArgumentParser(description="Ingest a lockfile into the State Hub SBOM store.") + parser = argparse.ArgumentParser(description="Ingest a repo's lockfiles into the State Hub SBOM store.") parser.add_argument("--repo", required=True, help="Managed-repo slug (e.g. 'the-custodian')") - parser.add_argument("--lockfile", help="Path to lockfile (auto-detected if omitted)") - parser.add_argument("--repo-path", default=".", help="Repo root for auto-detection (default: cwd)") + parser.add_argument("--lockfile", action="append", dest="lockfiles", + metavar="PATH", help="Path to a specific lockfile (repeatable)") + parser.add_argument("--repo-path", default=".", help="Repo root for auto-detection/scan (default: cwd)") + parser.add_argument("--scan", action="store_true", + help="Recursively find ALL lockfiles under --repo-path (handles multi-ecosystem repos)") parser.add_argument("--api-base", default=API_BASE, help="State Hub API base URL") parser.add_argument("--dry-run", action="store_true", help="Parse only — do not submit") args = parser.parse_args() - if args.lockfile: - lockfile_path = Path(args.lockfile).resolve() + repo_root = Path(args.repo_path).resolve() + lockfile_paths: list[Path] = [] + + if args.lockfiles: + lockfile_paths = [Path(lf).resolve() for lf in args.lockfiles] + elif args.scan: + lockfile_paths = detect_lockfiles_recursive(repo_root) + if not lockfile_paths: + print(f"No lockfiles found under '{repo_root}'.", file=sys.stderr) + sys.exit(1) + print(f"Scan found {len(lockfile_paths)} lockfile(s):") + for lf in lockfile_paths: + print(f" {lf.relative_to(repo_root) if lf.is_relative_to(repo_root) else lf}") else: - found = detect_lockfile(Path(args.repo_path).resolve()) + found = detect_lockfile(repo_root) if not found: print( - f"No recognised lockfile found in '{args.repo_path}'. " - "Supported: " + ", ".join(_LOCKFILE_PARSERS), + f"No recognised lockfile found in '{repo_root}'. " + f"Supported: {', '.join(_LOCKFILE_PARSERS)}. " + "Use --scan to search subdirectories.", file=sys.stderr, ) sys.exit(1) lockfile_path, _ = found print(f"Auto-detected: {lockfile_path}") + lockfile_paths = [lockfile_path] - entries = parse_lockfile(lockfile_path) - print(f"Parsed {len(entries)} packages from {lockfile_path.name}") + all_entries: list[dict] = [] + for lf in lockfile_paths: + parsed = parse_lockfile(lf) + rel = lf.relative_to(repo_root) if lf.is_relative_to(repo_root) else lf + print(f" {rel}: {len(parsed)} packages") + all_entries.extend(parsed) + + print(f"Total: {len(all_entries)} packages across {len(lockfile_paths)} lockfile(s)") if args.dry_run: - print(json.dumps(entries[:5], indent=2)) - if len(entries) > 5: - print(f" … and {len(entries) - 5} more") + print(json.dumps(all_entries[:5], indent=2)) + if len(all_entries) > 5: + print(f" … and {len(all_entries) - 5} more") return - result = post_ingest(args.api_base, args.repo, entries) + result = post_ingest(args.api_base, args.repo, all_entries) print(f"Ingested {result.get('ingested', '?')} entries for repo '{args.repo}'") print(f"Snapshot at: {result.get('snapshot_at', '?')}")