import uuid from datetime import datetime, timezone from fastapi import APIRouter, Depends, HTTPException, Query from sqlalchemy import and_, func, select from sqlalchemy.ext.asyncio import AsyncSession from api.database import get_session from api.models.managed_repo import ManagedRepo from api.models.sbom_entry import Ecosystem, SBOMEntry from api.models.sbom_snapshot import SBOMSnapshot from api.schemas.sbom import ( LicenceGroup, LicenceReport, SBOMEntryRead, SBOMIngest, SBOMRepoView, SBOMSnapshotDetail, SBOMSnapshotRead, ) router = APIRouter(prefix="/sbom", tags=["sbom"]) _COPYLEFT_PATTERNS = {"GPL", "AGPL", "LGPL", "EUPL", "CDDL", "MPL"} def _is_copyleft(spdx: str | None) -> bool: if not spdx: return False upper = spdx.upper() return any(pat in upper for pat in _COPYLEFT_PATTERNS) def _latest_snapshot_ids_subquery(): """Subquery returning the latest SBOMSnapshot.id per repo.""" max_at_sq = ( select(SBOMSnapshot.repo_id, func.max(SBOMSnapshot.snapshot_at).label("max_at")) .group_by(SBOMSnapshot.repo_id) .subquery("max_snap_at") ) return ( select(SBOMSnapshot.id) .join( max_at_sq, and_( SBOMSnapshot.repo_id == max_at_sq.c.repo_id, SBOMSnapshot.snapshot_at == max_at_sq.c.max_at, ), ) .subquery("latest_snap_ids") ) @router.post("/ingest/") async def ingest_sbom( body: SBOMIngest, session: AsyncSession = Depends(get_session), ) -> dict: """Create a new SBOM snapshot for a repo. Previous snapshots are retained.""" repo = await _get_repo_by_slug(body.repo_slug, session) now = datetime.now(tz=timezone.utc) snap = SBOMSnapshot( repo_id=repo.id, snapshot_at=now, source="manual", entry_count=len(body.entries), created_at=now, ) session.add(snap) await session.flush() # materialise snap.id before creating entries for entry in body.entries: sbom = SBOMEntry( repo_id=repo.id, snapshot_id=snap.id, package_name=entry.package_name, package_version=entry.package_version, ecosystem=entry.ecosystem, license_spdx=entry.license_spdx, is_direct=entry.is_direct, is_dev=entry.is_dev, snapshot_at=now, created_at=now, ) session.add(sbom) repo.last_sbom_at = now if not repo.sbom_source: repo.sbom_source = "manual" await session.commit() return { "repo_slug": body.repo_slug, "snapshot_id": str(snap.id), "ingested": len(body.entries), "snapshot_at": now.isoformat(), } @router.get("/snapshots/", response_model=list[SBOMSnapshotRead]) async def list_snapshots( repo_slug: str | None = Query(None), session: AsyncSession = Depends(get_session), ) -> list[SBOMSnapshotRead]: """List SBOM snapshots, newest first. Optionally filter by repo.""" q = select(SBOMSnapshot).order_by(SBOMSnapshot.snapshot_at.desc()) if repo_slug: repo = await _get_repo_by_slug(repo_slug, session) q = q.where(SBOMSnapshot.repo_id == repo.id) result = await session.execute(q) return [SBOMSnapshotRead.model_validate(s) for s in result.scalars().all()] @router.get("/snapshots/{snapshot_id}", response_model=SBOMSnapshotDetail) async def get_snapshot( snapshot_id: uuid.UUID, session: AsyncSession = Depends(get_session), ) -> SBOMSnapshotDetail: """Get a snapshot with its full entry list.""" snap = await session.get(SBOMSnapshot, snapshot_id) if snap is None: raise HTTPException(status_code=404, detail=f"Snapshot '{snapshot_id}' not found") result = await session.execute( select(SBOMEntry) .where(SBOMEntry.snapshot_id == snapshot_id) .order_by(SBOMEntry.package_name) ) entries = list(result.scalars().all()) return SBOMSnapshotDetail( id=snap.id, repo_id=snap.repo_id, snapshot_at=snap.snapshot_at, source=snap.source, entry_count=snap.entry_count, created_at=snap.created_at, entries=[SBOMEntryRead.model_validate(e) for e in entries], ) @router.get("/") async def list_sbom_entries( repo_slug: str | None = Query(None), ecosystem: Ecosystem | None = Query(None), license_spdx: str | None = Query(None), is_direct: bool | None = Query(None), is_dev: bool | None = Query(None), session: AsyncSession = Depends(get_session), ) -> list[SBOMEntryRead]: """Return entries from the latest snapshot per repo (default) or filter by repo.""" if repo_slug: repo = await _get_repo_by_slug(repo_slug, session) latest_snap_id_sq = ( select(SBOMSnapshot.id) .where(SBOMSnapshot.repo_id == repo.id) .order_by(SBOMSnapshot.snapshot_at.desc()) .limit(1) .scalar_subquery() ) q = select(SBOMEntry).where(SBOMEntry.snapshot_id == latest_snap_id_sq) else: latest_ids_sq = _latest_snapshot_ids_subquery() q = select(SBOMEntry).where(SBOMEntry.snapshot_id.in_(select(latest_ids_sq.c.id))) if ecosystem is not None: q = q.where(SBOMEntry.ecosystem == ecosystem) if license_spdx: q = q.where(SBOMEntry.license_spdx == license_spdx) if is_direct is not None: q = q.where(SBOMEntry.is_direct == is_direct) if is_dev is not None: q = q.where(SBOMEntry.is_dev == is_dev) q = q.order_by(SBOMEntry.package_name) result = await session.execute(q) return [SBOMEntryRead.model_validate(e) for e in result.scalars().all()] @router.get("/report/licences/", response_model=LicenceReport) async def licence_report( session: AsyncSession = Depends(get_session), ) -> LicenceReport: """Group latest-snapshot SBOM entries by SPDX licence identifier, flag copyleft.""" latest_ids_sq = _latest_snapshot_ids_subquery() rows = await session.execute( select(SBOMEntry, ManagedRepo.slug) .join(ManagedRepo, ManagedRepo.id == SBOMEntry.repo_id) .where(SBOMEntry.snapshot_id.in_(select(latest_ids_sq.c.id))) ) groups: dict[str | None, dict] = {} copyleft_direct_count = 0 for entry, repo_slug in rows.all(): key = entry.license_spdx if key not in groups: groups[key] = {"count": 0, "repos": set()} groups[key]["count"] += 1 groups[key]["repos"].add(repo_slug) if _is_copyleft(key) and entry.is_direct and not entry.is_dev: copyleft_direct_count += 1 licence_groups = [ LicenceGroup( license_spdx=lic, count=info["count"], repos=sorted(info["repos"]), is_copyleft=_is_copyleft(lic), ) for lic, info in sorted(groups.items(), key=lambda x: -x[1]["count"]) ] return LicenceReport(groups=licence_groups, copyleft_direct_count=copyleft_direct_count) @router.get("/{repo_slug}", response_model=SBOMRepoView) async def get_repo_sbom( repo_slug: str, session: AsyncSession = Depends(get_session), ) -> SBOMRepoView: """Return the latest snapshot entries for a specific repo.""" repo = await _get_repo_by_slug(repo_slug, session) latest_snap_id_sq = ( select(SBOMSnapshot.id) .where(SBOMSnapshot.repo_id == repo.id) .order_by(SBOMSnapshot.snapshot_at.desc()) .limit(1) .scalar_subquery() ) rows = await session.execute( select(SBOMEntry) .where(SBOMEntry.snapshot_id == latest_snap_id_sq) .order_by(SBOMEntry.package_name) ) entries = list(rows.scalars().all()) return SBOMRepoView( repo_slug=repo_slug, last_sbom_at=repo.last_sbom_at, entry_count=len(entries), entries=[SBOMEntryRead.model_validate(e) for e in entries], ) async def _get_repo_by_slug(slug: str, session: AsyncSession) -> ManagedRepo: result = await session.execute(select(ManagedRepo).where(ManagedRepo.slug == slug)) repo = result.scalar_one_or_none() if repo is None: raise HTTPException(status_code=404, detail=f"Repo '{slug}' not found") return repo