feat(gems): three-pass schema migration aligning state-hub with GEMS

Implements CUST-WP-0007. Resolves inconsistencies I-1, I-2, I-5, I-6
identified in the GEMS audit (GenericEntityModellingSystem.md).

Pass 1 (e1f2a3b4c5d6): domain_id FK on extension_points and
technical_debt (replaces raw string column); repo_id FK on contributions.
Fixes domain-filtering bugs in EP/TD dashboard pages.

Pass 2 (f2a3b4c5d6e7): repo_id nullable FK on workstreams, aligning
the GEMS primary attachment with ADR-001 (repo > topic). Dashboard
pages updated to prefer repo->domain over topic->domain.

Pass 3 (a3b4c5d6e7f8): SBOMSnapshot container entity (GEMS Complex
between Repository and SBOMEntry). Ingest is now additive — each call
creates a new snapshot; history is retained. List/report endpoints
filter to latest snapshot per repo via _latest_snapshot_ids_subquery().
New endpoints: GET /sbom/snapshots/, GET /sbom/snapshots/{id}/.
Dashboard gains a Snapshot History section.

Also adds GEMS analysis artefacts: wiki/GEMS-StateHub-TypeRegistry.md,
wiki/GEMS-StateHub-SWOT.md, workplans/CUST-WP-0006 (analysis),
workplans/CUST-WP-0007 (migration, now completed).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-02 23:39:17 +01:00
parent 62fbe884e3
commit fc87e26b4b
30 changed files with 675 additions and 84 deletions

View File

@@ -12,10 +12,21 @@ from api.schemas.extension_point import EPCreate, EPRead, EPUpdate
router = APIRouter(prefix="/extension-points", tags=["extension-points"])
async def _get_valid_domain_slugs(session: AsyncSession) -> set[str]:
"""Return the set of active domain slugs from the DB."""
rows = await session.execute(select(Domain.slug).where(Domain.status == "active"))
return {r[0] for r in rows.all()}
async def _resolve_domain_id(slug: str, session: AsyncSession) -> uuid.UUID:
"""Resolve a domain slug to its UUID, raising 422 if unknown."""
row = await session.execute(
select(Domain.id).where(Domain.slug == slug, Domain.status == "active")
)
domain_id = row.scalar_one_or_none()
if domain_id is None:
valid = [r[0] for r in (await session.execute(
select(Domain.slug).where(Domain.status == "active")
)).all()]
raise HTTPException(
status_code=422,
detail=f"Unknown domain '{slug}'. Valid domains: {sorted(valid)}",
)
return domain_id
@router.get("/", response_model=list[EPRead])
@@ -27,7 +38,8 @@ async def list_eps(
) -> list[ExtensionPoint]:
q = select(ExtensionPoint)
if domain:
q = q.where(ExtensionPoint.domain == domain)
domain_id = await _resolve_domain_id(domain, session)
q = q.where(ExtensionPoint.domain_id == domain_id)
if status:
q = q.where(ExtensionPoint.status == status)
if ep_type:
@@ -42,13 +54,10 @@ async def create_ep(
body: EPCreate,
session: AsyncSession = Depends(get_session),
) -> ExtensionPoint:
valid_domains = await _get_valid_domain_slugs(session)
if body.domain not in valid_domains:
raise HTTPException(
status_code=422,
detail=f"Unknown domain '{body.domain}'. Valid domains: {sorted(valid_domains)}",
)
ep = ExtensionPoint(**body.model_dump())
domain_id = await _resolve_domain_id(body.domain, session)
data = body.model_dump(exclude={"domain"})
data["domain_id"] = domain_id
ep = ExtensionPoint(**data)
session.add(ep)
await session.commit()
await session.refresh(ep)

View File

@@ -1,18 +1,22 @@
import uuid
from datetime import datetime, timezone
from fastapi import APIRouter, Depends, HTTPException, Query
from sqlalchemy import delete, func, select
from sqlalchemy import and_, func, select
from sqlalchemy.ext.asyncio import AsyncSession
from api.database import get_session
from api.models.managed_repo import ManagedRepo
from api.models.sbom_entry import Ecosystem, SBOMEntry
from api.models.sbom_snapshot import SBOMSnapshot
from api.schemas.sbom import (
LicenceGroup,
LicenceReport,
SBOMEntryRead,
SBOMIngest,
SBOMRepoView,
SBOMSnapshotDetail,
SBOMSnapshotRead,
)
router = APIRouter(prefix="/sbom", tags=["sbom"])
@@ -27,22 +31,49 @@ def _is_copyleft(spdx: str | None) -> bool:
return any(pat in upper for pat in _COPYLEFT_PATTERNS)
def _latest_snapshot_ids_subquery():
"""Subquery returning the latest SBOMSnapshot.id per repo."""
max_at_sq = (
select(SBOMSnapshot.repo_id, func.max(SBOMSnapshot.snapshot_at).label("max_at"))
.group_by(SBOMSnapshot.repo_id)
.subquery("max_snap_at")
)
return (
select(SBOMSnapshot.id)
.join(
max_at_sq,
and_(
SBOMSnapshot.repo_id == max_at_sq.c.repo_id,
SBOMSnapshot.snapshot_at == max_at_sq.c.max_at,
),
)
.subquery("latest_snap_ids")
)
@router.post("/ingest/")
async def ingest_sbom(
body: SBOMIngest,
session: AsyncSession = Depends(get_session),
) -> dict:
"""Replace the SBOM snapshot for a repo. Old entries are deleted first."""
"""Create a new SBOM snapshot for a repo. Previous snapshots are retained."""
repo = await _get_repo_by_slug(body.repo_slug, session)
now = datetime.now(tz=timezone.utc)
# Delete existing snapshot for this repo
await session.execute(delete(SBOMEntry).where(SBOMEntry.repo_id == repo.id))
snap = SBOMSnapshot(
repo_id=repo.id,
snapshot_at=now,
source="manual",
entry_count=len(body.entries),
created_at=now,
)
session.add(snap)
await session.flush() # materialise snap.id before creating entries
# Insert new entries
for entry in body.entries:
sbom = SBOMEntry(
repo_id=repo.id,
snapshot_id=snap.id,
package_name=entry.package_name,
package_version=entry.package_version,
ecosystem=entry.ecosystem,
@@ -59,7 +90,52 @@ async def ingest_sbom(
repo.sbom_source = "manual"
await session.commit()
return {"repo_slug": body.repo_slug, "ingested": len(body.entries), "snapshot_at": now.isoformat()}
return {
"repo_slug": body.repo_slug,
"snapshot_id": str(snap.id),
"ingested": len(body.entries),
"snapshot_at": now.isoformat(),
}
@router.get("/snapshots/", response_model=list[SBOMSnapshotRead])
async def list_snapshots(
repo_slug: str | None = Query(None),
session: AsyncSession = Depends(get_session),
) -> list[SBOMSnapshotRead]:
"""List SBOM snapshots, newest first. Optionally filter by repo."""
q = select(SBOMSnapshot).order_by(SBOMSnapshot.snapshot_at.desc())
if repo_slug:
repo = await _get_repo_by_slug(repo_slug, session)
q = q.where(SBOMSnapshot.repo_id == repo.id)
result = await session.execute(q)
return [SBOMSnapshotRead.model_validate(s) for s in result.scalars().all()]
@router.get("/snapshots/{snapshot_id}/", response_model=SBOMSnapshotDetail)
async def get_snapshot(
snapshot_id: uuid.UUID,
session: AsyncSession = Depends(get_session),
) -> SBOMSnapshotDetail:
"""Get a snapshot with its full entry list."""
snap = await session.get(SBOMSnapshot, snapshot_id)
if snap is None:
raise HTTPException(status_code=404, detail=f"Snapshot '{snapshot_id}' not found")
result = await session.execute(
select(SBOMEntry)
.where(SBOMEntry.snapshot_id == snapshot_id)
.order_by(SBOMEntry.package_name)
)
entries = list(result.scalars().all())
return SBOMSnapshotDetail(
id=snap.id,
repo_id=snap.repo_id,
snapshot_at=snap.snapshot_at,
source=snap.source,
entry_count=snap.entry_count,
created_at=snap.created_at,
entries=[SBOMEntryRead.model_validate(e) for e in entries],
)
@router.get("/")
@@ -71,10 +147,21 @@ async def list_sbom_entries(
is_dev: bool | None = Query(None),
session: AsyncSession = Depends(get_session),
) -> list[SBOMEntryRead]:
q = select(SBOMEntry).order_by(SBOMEntry.package_name)
"""Return entries from the latest snapshot per repo (default) or filter by repo."""
if repo_slug:
repo = await _get_repo_by_slug(repo_slug, session)
q = q.where(SBOMEntry.repo_id == repo.id)
latest_snap_id_sq = (
select(SBOMSnapshot.id)
.where(SBOMSnapshot.repo_id == repo.id)
.order_by(SBOMSnapshot.snapshot_at.desc())
.limit(1)
.scalar_subquery()
)
q = select(SBOMEntry).where(SBOMEntry.snapshot_id == latest_snap_id_sq)
else:
latest_ids_sq = _latest_snapshot_ids_subquery()
q = select(SBOMEntry).where(SBOMEntry.snapshot_id.in_(select(latest_ids_sq.c.id)))
if ecosystem is not None:
q = q.where(SBOMEntry.ecosystem == ecosystem)
if license_spdx:
@@ -83,6 +170,7 @@ async def list_sbom_entries(
q = q.where(SBOMEntry.is_direct == is_direct)
if is_dev is not None:
q = q.where(SBOMEntry.is_dev == is_dev)
q = q.order_by(SBOMEntry.package_name)
result = await session.execute(q)
return [SBOMEntryRead.model_validate(e) for e in result.scalars().all()]
@@ -91,12 +179,13 @@ async def list_sbom_entries(
async def licence_report(
session: AsyncSession = Depends(get_session),
) -> LicenceReport:
"""Group SBOM entries by SPDX licence identifier, flag copyleft."""
"""Group latest-snapshot SBOM entries by SPDX licence identifier, flag copyleft."""
latest_ids_sq = _latest_snapshot_ids_subquery()
rows = await session.execute(
select(SBOMEntry, ManagedRepo.slug)
.join(ManagedRepo, ManagedRepo.id == SBOMEntry.repo_id)
.where(SBOMEntry.snapshot_id.in_(select(latest_ids_sq.c.id)))
)
# Build: license_spdx → {count, repos set}
groups: dict[str | None, dict] = {}
copyleft_direct_count = 0
for entry, repo_slug in rows.all():
@@ -125,9 +214,19 @@ async def get_repo_sbom(
repo_slug: str,
session: AsyncSession = Depends(get_session),
) -> SBOMRepoView:
"""Return the latest snapshot entries for a specific repo."""
repo = await _get_repo_by_slug(repo_slug, session)
latest_snap_id_sq = (
select(SBOMSnapshot.id)
.where(SBOMSnapshot.repo_id == repo.id)
.order_by(SBOMSnapshot.snapshot_at.desc())
.limit(1)
.scalar_subquery()
)
rows = await session.execute(
select(SBOMEntry).where(SBOMEntry.repo_id == repo.id).order_by(SBOMEntry.package_name)
select(SBOMEntry)
.where(SBOMEntry.snapshot_id == latest_snap_id_sq)
.order_by(SBOMEntry.package_name)
)
entries = list(rows.scalars().all())
return SBOMRepoView(

View File

@@ -255,14 +255,14 @@ async def _build_domain_summaries(session: AsyncSession) -> list[DomainSummary]:
):
ws_per_domain[domain_id] = cnt
# EP counts per domain slug
# EP counts per domain id (via FK)
ep_counts = {r[0]: r[1] for r in await session.execute(
select(ExtensionPoint.domain, func.count()).group_by(ExtensionPoint.domain)
select(ExtensionPoint.domain_id, func.count()).group_by(ExtensionPoint.domain_id)
)}
# TD counts per domain slug
# TD counts per domain id (via FK)
td_counts = {r[0]: r[1] for r in await session.execute(
select(TechnicalDebt.domain, func.count()).group_by(TechnicalDebt.domain)
select(TechnicalDebt.domain_id, func.count()).group_by(TechnicalDebt.domain_id)
)}
return [
@@ -271,8 +271,8 @@ async def _build_domain_summaries(session: AsyncSession) -> list[DomainSummary]:
name=d.name,
repo_count=repo_counts.get(d.id, 0),
active_workstream_count=ws_per_domain.get(d.id, 0),
ep_count=ep_counts.get(d.slug, 0),
td_count=td_counts.get(d.slug, 0),
ep_count=ep_counts.get(d.id, 0),
td_count=td_counts.get(d.id, 0),
)
for d in domains
]

View File

@@ -12,10 +12,21 @@ from api.schemas.technical_debt import TDCreate, TDRead, TDUpdate
router = APIRouter(prefix="/technical-debt", tags=["technical-debt"])
async def _get_valid_domain_slugs(session: AsyncSession) -> set[str]:
"""Return the set of active domain slugs from the DB."""
rows = await session.execute(select(Domain.slug).where(Domain.status == "active"))
return {r[0] for r in rows.all()}
async def _resolve_domain_id(slug: str, session: AsyncSession) -> uuid.UUID:
"""Resolve a domain slug to its UUID, raising 422 if unknown."""
row = await session.execute(
select(Domain.id).where(Domain.slug == slug, Domain.status == "active")
)
domain_id = row.scalar_one_or_none()
if domain_id is None:
valid = [r[0] for r in (await session.execute(
select(Domain.slug).where(Domain.status == "active")
)).all()]
raise HTTPException(
status_code=422,
detail=f"Unknown domain '{slug}'. Valid domains: {sorted(valid)}",
)
return domain_id
@router.get("/", response_model=list[TDRead])
@@ -28,7 +39,8 @@ async def list_td(
) -> list[TechnicalDebt]:
q = select(TechnicalDebt)
if domain:
q = q.where(TechnicalDebt.domain == domain)
domain_id = await _resolve_domain_id(domain, session)
q = q.where(TechnicalDebt.domain_id == domain_id)
if status:
q = q.where(TechnicalDebt.status == status)
if debt_type:
@@ -45,13 +57,10 @@ async def create_td(
body: TDCreate,
session: AsyncSession = Depends(get_session),
) -> TechnicalDebt:
valid_domains = await _get_valid_domain_slugs(session)
if body.domain not in valid_domains:
raise HTTPException(
status_code=422,
detail=f"Unknown domain '{body.domain}'. Valid domains: {sorted(valid_domains)}",
)
td = TechnicalDebt(**body.model_dump())
domain_id = await _resolve_domain_id(body.domain, session)
data = body.model_dump(exclude={"domain"})
data["domain_id"] = domain_id
td = TechnicalDebt(**data)
session.add(td)
await session.commit()
await session.refresh(td)

View File

@@ -14,12 +14,15 @@ router = APIRouter(prefix="/workstreams", tags=["workstreams"])
@router.get("/", response_model=list[WorkstreamRead])
async def list_workstreams(
topic_id: uuid.UUID | None = None,
repo_id: uuid.UUID | None = None,
status: WorkstreamStatus | None = None,
session: AsyncSession = Depends(get_session),
) -> list[Workstream]:
q = select(Workstream)
if topic_id:
q = q.where(Workstream.topic_id == topic_id)
if repo_id:
q = q.where(Workstream.repo_id == repo_id)
if status:
q = q.where(Workstream.status == status)
q = q.order_by(Workstream.created_at)