Files
state-hub/api/routers/sbom.py
tegwick fc87e26b4b feat(gems): three-pass schema migration aligning state-hub with GEMS
Implements CUST-WP-0007. Resolves inconsistencies I-1, I-2, I-5, I-6
identified in the GEMS audit (GenericEntityModellingSystem.md).

Pass 1 (e1f2a3b4c5d6): domain_id FK on extension_points and
technical_debt (replaces raw string column); repo_id FK on contributions.
Fixes domain-filtering bugs in EP/TD dashboard pages.

Pass 2 (f2a3b4c5d6e7): repo_id nullable FK on workstreams, aligning
the GEMS primary attachment with ADR-001 (repo > topic). Dashboard
pages updated to prefer repo->domain over topic->domain.

Pass 3 (a3b4c5d6e7f8): SBOMSnapshot container entity (GEMS Complex
between Repository and SBOMEntry). Ingest is now additive — each call
creates a new snapshot; history is retained. List/report endpoints
filter to latest snapshot per repo via _latest_snapshot_ids_subquery().
New endpoints: GET /sbom/snapshots/, GET /sbom/snapshots/{id}/.
Dashboard gains a Snapshot History section.

Also adds GEMS analysis artefacts: wiki/GEMS-StateHub-TypeRegistry.md,
wiki/GEMS-StateHub-SWOT.md, workplans/CUST-WP-0006 (analysis),
workplans/CUST-WP-0007 (migration, now completed).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-02 23:39:17 +01:00

246 lines
8.1 KiB
Python

import uuid
from datetime import datetime, timezone
from fastapi import APIRouter, Depends, HTTPException, Query
from sqlalchemy import and_, func, select
from sqlalchemy.ext.asyncio import AsyncSession
from api.database import get_session
from api.models.managed_repo import ManagedRepo
from api.models.sbom_entry import Ecosystem, SBOMEntry
from api.models.sbom_snapshot import SBOMSnapshot
from api.schemas.sbom import (
LicenceGroup,
LicenceReport,
SBOMEntryRead,
SBOMIngest,
SBOMRepoView,
SBOMSnapshotDetail,
SBOMSnapshotRead,
)
router = APIRouter(prefix="/sbom", tags=["sbom"])
_COPYLEFT_PATTERNS = {"GPL", "AGPL", "LGPL", "EUPL", "CDDL", "MPL"}
def _is_copyleft(spdx: str | None) -> bool:
if not spdx:
return False
upper = spdx.upper()
return any(pat in upper for pat in _COPYLEFT_PATTERNS)
def _latest_snapshot_ids_subquery():
"""Subquery returning the latest SBOMSnapshot.id per repo."""
max_at_sq = (
select(SBOMSnapshot.repo_id, func.max(SBOMSnapshot.snapshot_at).label("max_at"))
.group_by(SBOMSnapshot.repo_id)
.subquery("max_snap_at")
)
return (
select(SBOMSnapshot.id)
.join(
max_at_sq,
and_(
SBOMSnapshot.repo_id == max_at_sq.c.repo_id,
SBOMSnapshot.snapshot_at == max_at_sq.c.max_at,
),
)
.subquery("latest_snap_ids")
)
@router.post("/ingest/")
async def ingest_sbom(
body: SBOMIngest,
session: AsyncSession = Depends(get_session),
) -> dict:
"""Create a new SBOM snapshot for a repo. Previous snapshots are retained."""
repo = await _get_repo_by_slug(body.repo_slug, session)
now = datetime.now(tz=timezone.utc)
snap = SBOMSnapshot(
repo_id=repo.id,
snapshot_at=now,
source="manual",
entry_count=len(body.entries),
created_at=now,
)
session.add(snap)
await session.flush() # materialise snap.id before creating entries
for entry in body.entries:
sbom = SBOMEntry(
repo_id=repo.id,
snapshot_id=snap.id,
package_name=entry.package_name,
package_version=entry.package_version,
ecosystem=entry.ecosystem,
license_spdx=entry.license_spdx,
is_direct=entry.is_direct,
is_dev=entry.is_dev,
snapshot_at=now,
created_at=now,
)
session.add(sbom)
repo.last_sbom_at = now
if not repo.sbom_source:
repo.sbom_source = "manual"
await session.commit()
return {
"repo_slug": body.repo_slug,
"snapshot_id": str(snap.id),
"ingested": len(body.entries),
"snapshot_at": now.isoformat(),
}
@router.get("/snapshots/", response_model=list[SBOMSnapshotRead])
async def list_snapshots(
repo_slug: str | None = Query(None),
session: AsyncSession = Depends(get_session),
) -> list[SBOMSnapshotRead]:
"""List SBOM snapshots, newest first. Optionally filter by repo."""
q = select(SBOMSnapshot).order_by(SBOMSnapshot.snapshot_at.desc())
if repo_slug:
repo = await _get_repo_by_slug(repo_slug, session)
q = q.where(SBOMSnapshot.repo_id == repo.id)
result = await session.execute(q)
return [SBOMSnapshotRead.model_validate(s) for s in result.scalars().all()]
@router.get("/snapshots/{snapshot_id}/", response_model=SBOMSnapshotDetail)
async def get_snapshot(
snapshot_id: uuid.UUID,
session: AsyncSession = Depends(get_session),
) -> SBOMSnapshotDetail:
"""Get a snapshot with its full entry list."""
snap = await session.get(SBOMSnapshot, snapshot_id)
if snap is None:
raise HTTPException(status_code=404, detail=f"Snapshot '{snapshot_id}' not found")
result = await session.execute(
select(SBOMEntry)
.where(SBOMEntry.snapshot_id == snapshot_id)
.order_by(SBOMEntry.package_name)
)
entries = list(result.scalars().all())
return SBOMSnapshotDetail(
id=snap.id,
repo_id=snap.repo_id,
snapshot_at=snap.snapshot_at,
source=snap.source,
entry_count=snap.entry_count,
created_at=snap.created_at,
entries=[SBOMEntryRead.model_validate(e) for e in entries],
)
@router.get("/")
async def list_sbom_entries(
repo_slug: str | None = Query(None),
ecosystem: Ecosystem | None = Query(None),
license_spdx: str | None = Query(None),
is_direct: bool | None = Query(None),
is_dev: bool | None = Query(None),
session: AsyncSession = Depends(get_session),
) -> list[SBOMEntryRead]:
"""Return entries from the latest snapshot per repo (default) or filter by repo."""
if repo_slug:
repo = await _get_repo_by_slug(repo_slug, session)
latest_snap_id_sq = (
select(SBOMSnapshot.id)
.where(SBOMSnapshot.repo_id == repo.id)
.order_by(SBOMSnapshot.snapshot_at.desc())
.limit(1)
.scalar_subquery()
)
q = select(SBOMEntry).where(SBOMEntry.snapshot_id == latest_snap_id_sq)
else:
latest_ids_sq = _latest_snapshot_ids_subquery()
q = select(SBOMEntry).where(SBOMEntry.snapshot_id.in_(select(latest_ids_sq.c.id)))
if ecosystem is not None:
q = q.where(SBOMEntry.ecosystem == ecosystem)
if license_spdx:
q = q.where(SBOMEntry.license_spdx == license_spdx)
if is_direct is not None:
q = q.where(SBOMEntry.is_direct == is_direct)
if is_dev is not None:
q = q.where(SBOMEntry.is_dev == is_dev)
q = q.order_by(SBOMEntry.package_name)
result = await session.execute(q)
return [SBOMEntryRead.model_validate(e) for e in result.scalars().all()]
@router.get("/report/licences/", response_model=LicenceReport)
async def licence_report(
session: AsyncSession = Depends(get_session),
) -> LicenceReport:
"""Group latest-snapshot SBOM entries by SPDX licence identifier, flag copyleft."""
latest_ids_sq = _latest_snapshot_ids_subquery()
rows = await session.execute(
select(SBOMEntry, ManagedRepo.slug)
.join(ManagedRepo, ManagedRepo.id == SBOMEntry.repo_id)
.where(SBOMEntry.snapshot_id.in_(select(latest_ids_sq.c.id)))
)
groups: dict[str | None, dict] = {}
copyleft_direct_count = 0
for entry, repo_slug in rows.all():
key = entry.license_spdx
if key not in groups:
groups[key] = {"count": 0, "repos": set()}
groups[key]["count"] += 1
groups[key]["repos"].add(repo_slug)
if _is_copyleft(key) and entry.is_direct and not entry.is_dev:
copyleft_direct_count += 1
licence_groups = [
LicenceGroup(
license_spdx=lic,
count=info["count"],
repos=sorted(info["repos"]),
is_copyleft=_is_copyleft(lic),
)
for lic, info in sorted(groups.items(), key=lambda x: -x[1]["count"])
]
return LicenceReport(groups=licence_groups, copyleft_direct_count=copyleft_direct_count)
@router.get("/{repo_slug}/", response_model=SBOMRepoView)
async def get_repo_sbom(
repo_slug: str,
session: AsyncSession = Depends(get_session),
) -> SBOMRepoView:
"""Return the latest snapshot entries for a specific repo."""
repo = await _get_repo_by_slug(repo_slug, session)
latest_snap_id_sq = (
select(SBOMSnapshot.id)
.where(SBOMSnapshot.repo_id == repo.id)
.order_by(SBOMSnapshot.snapshot_at.desc())
.limit(1)
.scalar_subquery()
)
rows = await session.execute(
select(SBOMEntry)
.where(SBOMEntry.snapshot_id == latest_snap_id_sq)
.order_by(SBOMEntry.package_name)
)
entries = list(rows.scalars().all())
return SBOMRepoView(
repo_slug=repo_slug,
last_sbom_at=repo.last_sbom_at,
entry_count=len(entries),
entries=[SBOMEntryRead.model_validate(e) for e in entries],
)
async def _get_repo_by_slug(slug: str, session: AsyncSession) -> ManagedRepo:
result = await session.execute(select(ManagedRepo).where(ManagedRepo.slug == slug))
repo = result.scalar_one_or_none()
if repo is None:
raise HTTPException(status_code=404, detail=f"Repo '{slug}' not found")
return repo