diff --git a/state-hub/api/models/__init__.py b/state-hub/api/models/__init__.py index a21be32..2d02a4a 100644 --- a/state-hub/api/models/__init__.py +++ b/state-hub/api/models/__init__.py @@ -10,6 +10,7 @@ from api.models.extension_point import ExtensionPoint, EPStatus from api.models.technical_debt import TechnicalDebt, TDStatus from api.models.managed_repo import ManagedRepo from api.models.contribution import Contribution, ContributionType, ContributionStatus +from api.models.sbom_snapshot import SBOMSnapshot from api.models.sbom_entry import SBOMEntry, Ecosystem __all__ = [ @@ -25,5 +26,6 @@ __all__ = [ "TechnicalDebt", "TDStatus", "ManagedRepo", "Contribution", "ContributionType", "ContributionStatus", + "SBOMSnapshot", "SBOMEntry", "Ecosystem", ] diff --git a/state-hub/api/models/contribution.py b/state-hub/api/models/contribution.py index 285f7da..f6d731f 100644 --- a/state-hub/api/models/contribution.py +++ b/state-hub/api/models/contribution.py @@ -50,6 +50,9 @@ class Contribution(Base, TimestampMixin): related_workstream_id: Mapped[uuid.UUID | None] = mapped_column( UUID(as_uuid=True), ForeignKey("workstreams.id", ondelete="SET NULL"), nullable=True ) + repo_id: Mapped[uuid.UUID | None] = mapped_column( + UUID(as_uuid=True), ForeignKey("managed_repos.id", ondelete="SET NULL"), nullable=True + ) submitted_at: Mapped[datetime | None] = mapped_column( DateTime(timezone=True), nullable=True ) @@ -60,3 +63,4 @@ class Contribution(Base, TimestampMixin): topic: Mapped["Topic"] = relationship("Topic", lazy="selectin") # noqa: F821 workstream: Mapped["Workstream"] = relationship("Workstream", lazy="selectin") # noqa: F821 + repo: Mapped["ManagedRepo"] = relationship("ManagedRepo", lazy="selectin") # noqa: F821 diff --git a/state-hub/api/models/extension_point.py b/state-hub/api/models/extension_point.py index 745e898..f34dcbb 100644 --- a/state-hub/api/models/extension_point.py +++ b/state-hub/api/models/extension_point.py @@ -25,7 +25,12 @@ class ExtensionPoint(Base, TimestampMixin): ep_id: Mapped[str | None] = mapped_column( String(30), nullable=True, unique=True, index=True ) # human-readable ref, e.g. EP-CUST-001 - domain: Mapped[str] = mapped_column(String(50), nullable=False, index=True) + domain_id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), + ForeignKey("domains.id", ondelete="RESTRICT"), + nullable=False, + index=True, + ) title: Mapped[str] = mapped_column(String(255), nullable=False) description: Mapped[str | None] = mapped_column(Text, nullable=True) location: Mapped[str | None] = mapped_column(String(500), nullable=True) @@ -43,5 +48,10 @@ class ExtensionPoint(Base, TimestampMixin): UUID(as_uuid=True), ForeignKey("workstreams.id", ondelete="SET NULL"), nullable=True ) + domain: Mapped["Domain"] = relationship("Domain", lazy="selectin") # noqa: F821 topic: Mapped["Topic"] = relationship("Topic", lazy="selectin") # noqa: F821 workstream: Mapped["Workstream"] = relationship("Workstream", lazy="selectin") # noqa: F821 + + @property + def domain_slug(self) -> str: + return self.domain.slug if self.domain is not None else "" diff --git a/state-hub/api/models/managed_repo.py b/state-hub/api/models/managed_repo.py index 2a9e44a..1d68c38 100644 --- a/state-hub/api/models/managed_repo.py +++ b/state-hub/api/models/managed_repo.py @@ -34,3 +34,7 @@ class ManagedRepo(Base, TimestampMixin): domain: Mapped["Domain"] = relationship( # noqa: F821 "Domain", back_populates="repos", lazy="selectin" ) + + @property + def domain_slug(self) -> str: + return self.domain.slug if self.domain is not None else "" diff --git a/state-hub/api/models/sbom_entry.py b/state-hub/api/models/sbom_entry.py index 2de4480..cece53e 100644 --- a/state-hub/api/models/sbom_entry.py +++ b/state-hub/api/models/sbom_entry.py @@ -37,6 +37,12 @@ class SBOMEntry(Base): license_spdx: Mapped[str | None] = mapped_column(String(100), nullable=True) is_direct: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True) is_dev: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False) + snapshot_id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), + ForeignKey("sbom_snapshots.id", ondelete="RESTRICT"), + nullable=False, + index=True, + ) snapshot_at: Mapped[datetime] = mapped_column( DateTime(timezone=True), nullable=False ) @@ -45,3 +51,6 @@ class SBOMEntry(Base): ) repo: Mapped["ManagedRepo"] = relationship("ManagedRepo", lazy="selectin") # noqa: F821 + snapshot: Mapped["SBOMSnapshot"] = relationship( # noqa: F821 + "SBOMSnapshot", lazy="selectin", back_populates="entries" + ) diff --git a/state-hub/api/models/sbom_snapshot.py b/state-hub/api/models/sbom_snapshot.py new file mode 100644 index 0000000..538dd6e --- /dev/null +++ b/state-hub/api/models/sbom_snapshot.py @@ -0,0 +1,32 @@ +import uuid +from datetime import datetime + +from sqlalchemy import DateTime, ForeignKey, Integer, String +from sqlalchemy.dialects.postgresql import UUID +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from api.models.base import Base, new_uuid + + +class SBOMSnapshot(Base): + """Container entity for a point-in-time SBOM scan of a repository (GEMS Complex).""" + __tablename__ = "sbom_snapshots" + + id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), primary_key=True, default=new_uuid + ) + repo_id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), + ForeignKey("managed_repos.id", ondelete="RESTRICT"), + nullable=False, + index=True, + ) + snapshot_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False) + source: Mapped[str | None] = mapped_column(String(200), nullable=True) + entry_count: Mapped[int] = mapped_column(Integer, nullable=False, default=0) + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False) + + repo: Mapped["ManagedRepo"] = relationship("ManagedRepo", lazy="selectin") # noqa: F821 + entries: Mapped[list["SBOMEntry"]] = relationship( # noqa: F821 + "SBOMEntry", lazy="select", back_populates="snapshot" + ) diff --git a/state-hub/api/models/technical_debt.py b/state-hub/api/models/technical_debt.py index f7c954d..7aee735 100644 --- a/state-hub/api/models/technical_debt.py +++ b/state-hub/api/models/technical_debt.py @@ -25,7 +25,12 @@ class TechnicalDebt(Base, TimestampMixin): td_id: Mapped[str | None] = mapped_column( String(30), nullable=True, unique=True, index=True ) # human-readable ref, e.g. TD-CUST-001 - domain: Mapped[str] = mapped_column(String(50), nullable=False, index=True) + domain_id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), + ForeignKey("domains.id", ondelete="RESTRICT"), + nullable=False, + index=True, + ) title: Mapped[str] = mapped_column(String(255), nullable=False) description: Mapped[str | None] = mapped_column(Text, nullable=True) location: Mapped[str | None] = mapped_column(String(500), nullable=True) @@ -43,5 +48,10 @@ class TechnicalDebt(Base, TimestampMixin): UUID(as_uuid=True), ForeignKey("workstreams.id", ondelete="SET NULL"), nullable=True ) + domain: Mapped["Domain"] = relationship("Domain", lazy="selectin") # noqa: F821 topic: Mapped["Topic"] = relationship("Topic", lazy="selectin") # noqa: F821 workstream: Mapped["Workstream"] = relationship("Workstream", lazy="selectin") # noqa: F821 + + @property + def domain_slug(self) -> str: + return self.domain.slug if self.domain is not None else "" diff --git a/state-hub/api/models/workstream.py b/state-hub/api/models/workstream.py index d2ddbb7..0882f6b 100644 --- a/state-hub/api/models/workstream.py +++ b/state-hub/api/models/workstream.py @@ -34,7 +34,15 @@ class Workstream(Base, TimestampMixin): owner: Mapped[str | None] = mapped_column(String(100), nullable=True) due_date: Mapped[date | None] = mapped_column(Date, nullable=True) + repo_id: Mapped[uuid.UUID | None] = mapped_column( + UUID(as_uuid=True), + ForeignKey("managed_repos.id", ondelete="SET NULL"), + nullable=True, + index=True, + ) + topic: Mapped["Topic"] = relationship("Topic", back_populates="workstreams") # noqa: F821 + repo: Mapped["ManagedRepo"] = relationship("ManagedRepo", lazy="selectin") # noqa: F821 tasks: Mapped[list["Task"]] = relationship( # noqa: F821 "Task", back_populates="workstream", lazy="selectin" ) diff --git a/state-hub/api/routers/extension_points.py b/state-hub/api/routers/extension_points.py index 4de9d96..ee5883f 100644 --- a/state-hub/api/routers/extension_points.py +++ b/state-hub/api/routers/extension_points.py @@ -12,10 +12,21 @@ from api.schemas.extension_point import EPCreate, EPRead, EPUpdate router = APIRouter(prefix="/extension-points", tags=["extension-points"]) -async def _get_valid_domain_slugs(session: AsyncSession) -> set[str]: - """Return the set of active domain slugs from the DB.""" - rows = await session.execute(select(Domain.slug).where(Domain.status == "active")) - return {r[0] for r in rows.all()} +async def _resolve_domain_id(slug: str, session: AsyncSession) -> uuid.UUID: + """Resolve a domain slug to its UUID, raising 422 if unknown.""" + row = await session.execute( + select(Domain.id).where(Domain.slug == slug, Domain.status == "active") + ) + domain_id = row.scalar_one_or_none() + if domain_id is None: + valid = [r[0] for r in (await session.execute( + select(Domain.slug).where(Domain.status == "active") + )).all()] + raise HTTPException( + status_code=422, + detail=f"Unknown domain '{slug}'. Valid domains: {sorted(valid)}", + ) + return domain_id @router.get("/", response_model=list[EPRead]) @@ -27,7 +38,8 @@ async def list_eps( ) -> list[ExtensionPoint]: q = select(ExtensionPoint) if domain: - q = q.where(ExtensionPoint.domain == domain) + domain_id = await _resolve_domain_id(domain, session) + q = q.where(ExtensionPoint.domain_id == domain_id) if status: q = q.where(ExtensionPoint.status == status) if ep_type: @@ -42,13 +54,10 @@ async def create_ep( body: EPCreate, session: AsyncSession = Depends(get_session), ) -> ExtensionPoint: - valid_domains = await _get_valid_domain_slugs(session) - if body.domain not in valid_domains: - raise HTTPException( - status_code=422, - detail=f"Unknown domain '{body.domain}'. Valid domains: {sorted(valid_domains)}", - ) - ep = ExtensionPoint(**body.model_dump()) + domain_id = await _resolve_domain_id(body.domain, session) + data = body.model_dump(exclude={"domain"}) + data["domain_id"] = domain_id + ep = ExtensionPoint(**data) session.add(ep) await session.commit() await session.refresh(ep) diff --git a/state-hub/api/routers/sbom.py b/state-hub/api/routers/sbom.py index a59f824..79e4a64 100644 --- a/state-hub/api/routers/sbom.py +++ b/state-hub/api/routers/sbom.py @@ -1,18 +1,22 @@ +import uuid from datetime import datetime, timezone from fastapi import APIRouter, Depends, HTTPException, Query -from sqlalchemy import delete, func, select +from sqlalchemy import and_, func, select from sqlalchemy.ext.asyncio import AsyncSession from api.database import get_session from api.models.managed_repo import ManagedRepo from api.models.sbom_entry import Ecosystem, SBOMEntry +from api.models.sbom_snapshot import SBOMSnapshot from api.schemas.sbom import ( LicenceGroup, LicenceReport, SBOMEntryRead, SBOMIngest, SBOMRepoView, + SBOMSnapshotDetail, + SBOMSnapshotRead, ) router = APIRouter(prefix="/sbom", tags=["sbom"]) @@ -27,22 +31,49 @@ def _is_copyleft(spdx: str | None) -> bool: return any(pat in upper for pat in _COPYLEFT_PATTERNS) +def _latest_snapshot_ids_subquery(): + """Subquery returning the latest SBOMSnapshot.id per repo.""" + max_at_sq = ( + select(SBOMSnapshot.repo_id, func.max(SBOMSnapshot.snapshot_at).label("max_at")) + .group_by(SBOMSnapshot.repo_id) + .subquery("max_snap_at") + ) + return ( + select(SBOMSnapshot.id) + .join( + max_at_sq, + and_( + SBOMSnapshot.repo_id == max_at_sq.c.repo_id, + SBOMSnapshot.snapshot_at == max_at_sq.c.max_at, + ), + ) + .subquery("latest_snap_ids") + ) + + @router.post("/ingest/") async def ingest_sbom( body: SBOMIngest, session: AsyncSession = Depends(get_session), ) -> dict: - """Replace the SBOM snapshot for a repo. Old entries are deleted first.""" + """Create a new SBOM snapshot for a repo. Previous snapshots are retained.""" repo = await _get_repo_by_slug(body.repo_slug, session) now = datetime.now(tz=timezone.utc) - # Delete existing snapshot for this repo - await session.execute(delete(SBOMEntry).where(SBOMEntry.repo_id == repo.id)) + snap = SBOMSnapshot( + repo_id=repo.id, + snapshot_at=now, + source="manual", + entry_count=len(body.entries), + created_at=now, + ) + session.add(snap) + await session.flush() # materialise snap.id before creating entries - # Insert new entries for entry in body.entries: sbom = SBOMEntry( repo_id=repo.id, + snapshot_id=snap.id, package_name=entry.package_name, package_version=entry.package_version, ecosystem=entry.ecosystem, @@ -59,7 +90,52 @@ async def ingest_sbom( repo.sbom_source = "manual" await session.commit() - return {"repo_slug": body.repo_slug, "ingested": len(body.entries), "snapshot_at": now.isoformat()} + return { + "repo_slug": body.repo_slug, + "snapshot_id": str(snap.id), + "ingested": len(body.entries), + "snapshot_at": now.isoformat(), + } + + +@router.get("/snapshots/", response_model=list[SBOMSnapshotRead]) +async def list_snapshots( + repo_slug: str | None = Query(None), + session: AsyncSession = Depends(get_session), +) -> list[SBOMSnapshotRead]: + """List SBOM snapshots, newest first. Optionally filter by repo.""" + q = select(SBOMSnapshot).order_by(SBOMSnapshot.snapshot_at.desc()) + if repo_slug: + repo = await _get_repo_by_slug(repo_slug, session) + q = q.where(SBOMSnapshot.repo_id == repo.id) + result = await session.execute(q) + return [SBOMSnapshotRead.model_validate(s) for s in result.scalars().all()] + + +@router.get("/snapshots/{snapshot_id}/", response_model=SBOMSnapshotDetail) +async def get_snapshot( + snapshot_id: uuid.UUID, + session: AsyncSession = Depends(get_session), +) -> SBOMSnapshotDetail: + """Get a snapshot with its full entry list.""" + snap = await session.get(SBOMSnapshot, snapshot_id) + if snap is None: + raise HTTPException(status_code=404, detail=f"Snapshot '{snapshot_id}' not found") + result = await session.execute( + select(SBOMEntry) + .where(SBOMEntry.snapshot_id == snapshot_id) + .order_by(SBOMEntry.package_name) + ) + entries = list(result.scalars().all()) + return SBOMSnapshotDetail( + id=snap.id, + repo_id=snap.repo_id, + snapshot_at=snap.snapshot_at, + source=snap.source, + entry_count=snap.entry_count, + created_at=snap.created_at, + entries=[SBOMEntryRead.model_validate(e) for e in entries], + ) @router.get("/") @@ -71,10 +147,21 @@ async def list_sbom_entries( is_dev: bool | None = Query(None), session: AsyncSession = Depends(get_session), ) -> list[SBOMEntryRead]: - q = select(SBOMEntry).order_by(SBOMEntry.package_name) + """Return entries from the latest snapshot per repo (default) or filter by repo.""" if repo_slug: repo = await _get_repo_by_slug(repo_slug, session) - q = q.where(SBOMEntry.repo_id == repo.id) + latest_snap_id_sq = ( + select(SBOMSnapshot.id) + .where(SBOMSnapshot.repo_id == repo.id) + .order_by(SBOMSnapshot.snapshot_at.desc()) + .limit(1) + .scalar_subquery() + ) + q = select(SBOMEntry).where(SBOMEntry.snapshot_id == latest_snap_id_sq) + else: + latest_ids_sq = _latest_snapshot_ids_subquery() + q = select(SBOMEntry).where(SBOMEntry.snapshot_id.in_(select(latest_ids_sq.c.id))) + if ecosystem is not None: q = q.where(SBOMEntry.ecosystem == ecosystem) if license_spdx: @@ -83,6 +170,7 @@ async def list_sbom_entries( q = q.where(SBOMEntry.is_direct == is_direct) if is_dev is not None: q = q.where(SBOMEntry.is_dev == is_dev) + q = q.order_by(SBOMEntry.package_name) result = await session.execute(q) return [SBOMEntryRead.model_validate(e) for e in result.scalars().all()] @@ -91,12 +179,13 @@ async def list_sbom_entries( async def licence_report( session: AsyncSession = Depends(get_session), ) -> LicenceReport: - """Group SBOM entries by SPDX licence identifier, flag copyleft.""" + """Group latest-snapshot SBOM entries by SPDX licence identifier, flag copyleft.""" + latest_ids_sq = _latest_snapshot_ids_subquery() rows = await session.execute( select(SBOMEntry, ManagedRepo.slug) .join(ManagedRepo, ManagedRepo.id == SBOMEntry.repo_id) + .where(SBOMEntry.snapshot_id.in_(select(latest_ids_sq.c.id))) ) - # Build: license_spdx → {count, repos set} groups: dict[str | None, dict] = {} copyleft_direct_count = 0 for entry, repo_slug in rows.all(): @@ -125,9 +214,19 @@ async def get_repo_sbom( repo_slug: str, session: AsyncSession = Depends(get_session), ) -> SBOMRepoView: + """Return the latest snapshot entries for a specific repo.""" repo = await _get_repo_by_slug(repo_slug, session) + latest_snap_id_sq = ( + select(SBOMSnapshot.id) + .where(SBOMSnapshot.repo_id == repo.id) + .order_by(SBOMSnapshot.snapshot_at.desc()) + .limit(1) + .scalar_subquery() + ) rows = await session.execute( - select(SBOMEntry).where(SBOMEntry.repo_id == repo.id).order_by(SBOMEntry.package_name) + select(SBOMEntry) + .where(SBOMEntry.snapshot_id == latest_snap_id_sq) + .order_by(SBOMEntry.package_name) ) entries = list(rows.scalars().all()) return SBOMRepoView( diff --git a/state-hub/api/routers/state.py b/state-hub/api/routers/state.py index 0932650..715bb3f 100644 --- a/state-hub/api/routers/state.py +++ b/state-hub/api/routers/state.py @@ -255,14 +255,14 @@ async def _build_domain_summaries(session: AsyncSession) -> list[DomainSummary]: ): ws_per_domain[domain_id] = cnt - # EP counts per domain slug + # EP counts per domain id (via FK) ep_counts = {r[0]: r[1] for r in await session.execute( - select(ExtensionPoint.domain, func.count()).group_by(ExtensionPoint.domain) + select(ExtensionPoint.domain_id, func.count()).group_by(ExtensionPoint.domain_id) )} - # TD counts per domain slug + # TD counts per domain id (via FK) td_counts = {r[0]: r[1] for r in await session.execute( - select(TechnicalDebt.domain, func.count()).group_by(TechnicalDebt.domain) + select(TechnicalDebt.domain_id, func.count()).group_by(TechnicalDebt.domain_id) )} return [ @@ -271,8 +271,8 @@ async def _build_domain_summaries(session: AsyncSession) -> list[DomainSummary]: name=d.name, repo_count=repo_counts.get(d.id, 0), active_workstream_count=ws_per_domain.get(d.id, 0), - ep_count=ep_counts.get(d.slug, 0), - td_count=td_counts.get(d.slug, 0), + ep_count=ep_counts.get(d.id, 0), + td_count=td_counts.get(d.id, 0), ) for d in domains ] diff --git a/state-hub/api/routers/technical_debt.py b/state-hub/api/routers/technical_debt.py index d3fbb48..eb8a4de 100644 --- a/state-hub/api/routers/technical_debt.py +++ b/state-hub/api/routers/technical_debt.py @@ -12,10 +12,21 @@ from api.schemas.technical_debt import TDCreate, TDRead, TDUpdate router = APIRouter(prefix="/technical-debt", tags=["technical-debt"]) -async def _get_valid_domain_slugs(session: AsyncSession) -> set[str]: - """Return the set of active domain slugs from the DB.""" - rows = await session.execute(select(Domain.slug).where(Domain.status == "active")) - return {r[0] for r in rows.all()} +async def _resolve_domain_id(slug: str, session: AsyncSession) -> uuid.UUID: + """Resolve a domain slug to its UUID, raising 422 if unknown.""" + row = await session.execute( + select(Domain.id).where(Domain.slug == slug, Domain.status == "active") + ) + domain_id = row.scalar_one_or_none() + if domain_id is None: + valid = [r[0] for r in (await session.execute( + select(Domain.slug).where(Domain.status == "active") + )).all()] + raise HTTPException( + status_code=422, + detail=f"Unknown domain '{slug}'. Valid domains: {sorted(valid)}", + ) + return domain_id @router.get("/", response_model=list[TDRead]) @@ -28,7 +39,8 @@ async def list_td( ) -> list[TechnicalDebt]: q = select(TechnicalDebt) if domain: - q = q.where(TechnicalDebt.domain == domain) + domain_id = await _resolve_domain_id(domain, session) + q = q.where(TechnicalDebt.domain_id == domain_id) if status: q = q.where(TechnicalDebt.status == status) if debt_type: @@ -45,13 +57,10 @@ async def create_td( body: TDCreate, session: AsyncSession = Depends(get_session), ) -> TechnicalDebt: - valid_domains = await _get_valid_domain_slugs(session) - if body.domain not in valid_domains: - raise HTTPException( - status_code=422, - detail=f"Unknown domain '{body.domain}'. Valid domains: {sorted(valid_domains)}", - ) - td = TechnicalDebt(**body.model_dump()) + domain_id = await _resolve_domain_id(body.domain, session) + data = body.model_dump(exclude={"domain"}) + data["domain_id"] = domain_id + td = TechnicalDebt(**data) session.add(td) await session.commit() await session.refresh(td) diff --git a/state-hub/api/routers/workstreams.py b/state-hub/api/routers/workstreams.py index d602802..32eb9b8 100644 --- a/state-hub/api/routers/workstreams.py +++ b/state-hub/api/routers/workstreams.py @@ -14,12 +14,15 @@ router = APIRouter(prefix="/workstreams", tags=["workstreams"]) @router.get("/", response_model=list[WorkstreamRead]) async def list_workstreams( topic_id: uuid.UUID | None = None, + repo_id: uuid.UUID | None = None, status: WorkstreamStatus | None = None, session: AsyncSession = Depends(get_session), ) -> list[Workstream]: q = select(Workstream) if topic_id: q = q.where(Workstream.topic_id == topic_id) + if repo_id: + q = q.where(Workstream.repo_id == repo_id) if status: q = q.where(Workstream.status == status) q = q.order_by(Workstream.created_at) diff --git a/state-hub/api/schemas/contribution.py b/state-hub/api/schemas/contribution.py index e037147..a241c09 100644 --- a/state-hub/api/schemas/contribution.py +++ b/state-hub/api/schemas/contribution.py @@ -15,6 +15,7 @@ class ContributionCreate(BaseModel): body_path: str | None = None related_topic_id: uuid.UUID | None = None related_workstream_id: uuid.UUID | None = None + repo_id: uuid.UUID | None = None notes: str | None = None @@ -36,6 +37,7 @@ class ContributionRead(BaseModel): body_path: str | None = None related_topic_id: uuid.UUID | None = None related_workstream_id: uuid.UUID | None = None + repo_id: uuid.UUID | None = None submitted_at: datetime | None = None resolved_at: datetime | None = None notes: str | None = None diff --git a/state-hub/api/schemas/extension_point.py b/state-hub/api/schemas/extension_point.py index 0cdc53e..23f6366 100644 --- a/state-hub/api/schemas/extension_point.py +++ b/state-hub/api/schemas/extension_point.py @@ -10,7 +10,7 @@ VALID_PRIORITIES = {"low", "medium", "high", "critical"} class EPCreate(BaseModel): ep_id: str | None = None - domain: str + domain: str # slug; router resolves to domain_id FK title: str description: str | None = None location: str | None = None @@ -36,7 +36,7 @@ class EPRead(BaseModel): id: uuid.UUID ep_id: str | None = None - domain: str + domain_slug: str # derived from domain relationship title: str description: str | None = None location: str | None = None diff --git a/state-hub/api/schemas/managed_repo.py b/state-hub/api/schemas/managed_repo.py index 3f4d9c3..1aa8582 100644 --- a/state-hub/api/schemas/managed_repo.py +++ b/state-hub/api/schemas/managed_repo.py @@ -26,6 +26,7 @@ class RepoRead(BaseModel): model_config = ConfigDict(from_attributes=True) id: uuid.UUID domain_id: uuid.UUID + domain_slug: str # derived from domain relationship slug: str name: str local_path: str | None = None diff --git a/state-hub/api/schemas/sbom.py b/state-hub/api/schemas/sbom.py index b2b3670..51f00d7 100644 --- a/state-hub/api/schemas/sbom.py +++ b/state-hub/api/schemas/sbom.py @@ -25,6 +25,7 @@ class SBOMEntryRead(BaseModel): id: uuid.UUID repo_id: uuid.UUID + snapshot_id: uuid.UUID package_name: str package_version: str | None = None ecosystem: Ecosystem @@ -35,6 +36,29 @@ class SBOMEntryRead(BaseModel): created_at: datetime +class SBOMSnapshotRead(BaseModel): + model_config = ConfigDict(from_attributes=True) + + id: uuid.UUID + repo_id: uuid.UUID + snapshot_at: datetime + source: str | None = None + entry_count: int + created_at: datetime + + +class SBOMSnapshotDetail(BaseModel): + model_config = ConfigDict(from_attributes=True) + + id: uuid.UUID + repo_id: uuid.UUID + snapshot_at: datetime + source: str | None = None + entry_count: int + created_at: datetime + entries: list[SBOMEntryRead] = [] + + class LicenceGroup(BaseModel): license_spdx: str | None count: int diff --git a/state-hub/api/schemas/technical_debt.py b/state-hub/api/schemas/technical_debt.py index 615240d..f5d920e 100644 --- a/state-hub/api/schemas/technical_debt.py +++ b/state-hub/api/schemas/technical_debt.py @@ -10,7 +10,7 @@ VALID_SEVERITIES = {"low", "medium", "high", "critical"} class TDCreate(BaseModel): td_id: str | None = None - domain: str + domain: str # slug; router resolves to domain_id FK title: str description: str | None = None location: str | None = None @@ -36,7 +36,7 @@ class TDRead(BaseModel): id: uuid.UUID td_id: str | None = None - domain: str + domain_slug: str # derived from domain relationship title: str description: str | None = None location: str | None = None diff --git a/state-hub/api/schemas/workstream.py b/state-hub/api/schemas/workstream.py index 08058d9..8e0ccc0 100644 --- a/state-hub/api/schemas/workstream.py +++ b/state-hub/api/schemas/workstream.py @@ -15,6 +15,7 @@ class WorkstreamCreate(BaseModel): status: WorkstreamStatus = WorkstreamStatus.active owner: str | None = None due_date: date | None = None + repo_id: uuid.UUID | None = None # GEMS primary: the owning repository class WorkstreamUpdate(BaseModel): @@ -23,12 +24,14 @@ class WorkstreamUpdate(BaseModel): status: WorkstreamStatus | None = None owner: str | None = None due_date: date | None = None + repo_id: uuid.UUID | None = None class WorkstreamRead(BaseModel): model_config = ConfigDict(from_attributes=True) id: uuid.UUID topic_id: uuid.UUID + repo_id: uuid.UUID | None = None slug: str title: str description: str | None = None diff --git a/state-hub/dashboard/src/dependencies.md b/state-hub/dashboard/src/dependencies.md index de7164f..8c15fde 100644 --- a/state-hub/dashboard/src/dependencies.md +++ b/state-hub/dashboard/src/dependencies.md @@ -13,20 +13,23 @@ const depState = (async function*() { while (true) { let wsMap = {}, edges = [], ok = false; try { - const [rw, rto, rs] = await Promise.all([ + const [rw, rto, rr, rs] = await Promise.all([ fetch(`${API}/workstreams/`), fetch(`${API}/topics/`), + fetch(`${API}/repos/`), fetch(`${API}/state/summary`), ]); - ok = rw.ok && rto.ok && rs.ok; + ok = rw.ok && rto.ok && rr.ok && rs.ok; if (ok) { - const [wsList, topicList, summary] = await Promise.all([ - rw.json(), rto.json(), rs.json(), + const [wsList, topicList, repoList, summary] = await Promise.all([ + rw.json(), rto.json(), rr.json(), rs.json(), ]); const topicMap = Object.fromEntries(topicList.map(t => [t.id, t])); + const repoMap = Object.fromEntries(repoList.map(r => [r.id, r])); wsMap = Object.fromEntries(wsList.map(w => [w.id, { ...w, - domain: topicMap[w.topic_id]?.domain_slug ?? "unknown", + // Prefer repo→domain (GEMS primary); fall back to topic→domain + domain: repoMap[w.repo_id]?.domain_slug ?? topicMap[w.topic_id]?.domain_slug ?? "unknown", }])); // Build directed edge list from open_workstreams depends_on arrays for (const ow of (summary.open_workstreams ?? [])) { diff --git a/state-hub/dashboard/src/extensions.md b/state-hub/dashboard/src/extensions.md index 181b2ff..9563089 100644 --- a/state-hub/dashboard/src/extensions.md +++ b/state-hub/dashboard/src/extensions.md @@ -12,17 +12,19 @@ const epState = (async function*() { while (true) { let data = [], ok = false; try { - const [re, rw, rt] = await Promise.all([ + const [re, rw, rt, rr] = await Promise.all([ fetch(`${API}/extension-points/`), fetch(`${API}/workstreams/`), fetch(`${API}/topics/`), + fetch(`${API}/repos/`), ]); - ok = re.ok && rw.ok && rt.ok; + ok = re.ok && rw.ok && rt.ok && rr.ok; if (ok) { - const [epList, wsList, topicList] = await Promise.all([re.json(), rw.json(), rt.json()]); + const [epList, wsList, topicList, repoList] = await Promise.all([re.json(), rw.json(), rt.json(), rr.json()]); const topicMap = Object.fromEntries(topicList.map(t => [t.id, t])); + const repoMap = Object.fromEntries(repoList.map(r => [r.id, r])); const wsMap = Object.fromEntries(wsList.map(w => [w.id, { - ...w, domain: topicMap[w.topic_id]?.domain_slug ?? "unknown", + ...w, domain: repoMap[w.repo_id]?.domain_slug ?? topicMap[w.topic_id]?.domain_slug ?? "unknown", }])); data = epList.map(e => ({ ...e, @@ -81,7 +83,7 @@ const filters = Generators.input(_filtersForm); const filtered = data.filter(e => (filters.status.length === 0 || filters.status.includes(e.status)) && (filters.priority.length === 0 || filters.priority.includes(e.priority)) && - (filters.domain.length === 0 || filters.domain.includes(e.domain)) && + (filters.domain.length === 0 || filters.domain.includes(e.domain_slug)) && (filters.ep_type.length === 0 || filters.ep_type.includes(e.ep_type)) ); ``` diff --git a/state-hub/dashboard/src/sbom.md b/state-hub/dashboard/src/sbom.md index 9b7716b..824343f 100644 --- a/state-hub/dashboard/src/sbom.md +++ b/state-hub/dashboard/src/sbom.md @@ -8,23 +8,25 @@ const API = "http://127.0.0.1:8000"; ```js // Fetch SBOM data on load -let _entries = [], _report = {groups: [], copyleft_direct_count: 0}, _repos = [], _domains = []; +let _entries = [], _report = {groups: [], copyleft_direct_count: 0}, _repos = [], _domains = [], _snapshots = []; try { - [_entries, _report, _repos, _domains] = await Promise.all([ + [_entries, _report, _repos, _domains, _snapshots] = await Promise.all([ fetch(`${API}/sbom/`).then(r => r.ok ? r.json() : []), fetch(`${API}/sbom/report/licences/`).then(r => r.ok ? r.json() : {groups:[], copyleft_direct_count: 0}), fetch(`${API}/repos/`).then(r => r.ok ? r.json() : []), fetch(`${API}/domains/`).then(r => r.ok ? r.json() : []), + fetch(`${API}/sbom/snapshots/`).then(r => r.ok ? r.json() : []), ]); } catch {} ``` ```js -const entries = _entries ?? []; -const report = _report ?? {groups: [], copyleft_direct_count: 0}; -const repos = _repos ?? []; -const domains = _domains ?? []; -const groups = report.groups ?? []; +const entries = _entries ?? []; +const report = _report ?? {groups: [], copyleft_direct_count: 0}; +const repos = _repos ?? []; +const domains = _domains ?? []; +const snapshots = _snapshots ?? []; +const groups = report.groups ?? []; const riskCount = report.copyleft_direct_count ?? 0; // Domain + repo lookups @@ -212,6 +214,49 @@ if (repoSections.length === 0) { } ``` +## Snapshot History + +```js +if (snapshots.length === 0) { + display(html`

No snapshots recorded yet.

`); +} else { + // Group by repo, sort newest first within each group + const snapByRepo = {}; + for (const s of snapshots) { + (snapByRepo[s.repo_id] = snapByRepo[s.repo_id] ?? []).push(s); + } + + const repoOrder = Object.keys(snapByRepo).sort((a, b) => { + const ra = repos.find(r => r.id === a); + const rb = repos.find(r => r.id === b); + return (ra?.slug ?? a).localeCompare(rb?.slug ?? b); + }); + + display(html`
+ ${repoOrder.map(repoId => { + const repo = repos.find(r => r.id === repoId); + const domSlug = repo ? domains.find(d => d.id === repo.domain_id)?.slug ?? "—" : "—"; + const snaps = snapByRepo[repoId]; // already sorted newest-first by API + return html`
+ + ${domSlug} + ${repo?.slug ?? repoId.slice(0,8)} + ${snaps.length} snapshot${snaps.length !== 1 ? "s" : ""} + +
+ ${Inputs.table(snaps.map(s => ({ + "Snapshot At": new Date(s.snapshot_at).toLocaleString(), + Packages: s.entry_count, + Source: s.source ?? "—", + ID: s.id.slice(0, 8) + "…", + })), {maxWidth: 700})} +
+
`; + })} +
`); +} +``` + ## Package Table ```js @@ -272,4 +317,16 @@ details[open] > .repo-summary::before { content: "▼"; } .repo-meta { font-size: 0.78rem; color: gray; } .repo-risk-badge { font-size: 0.75rem; font-weight: 600; color: #c62828; background: #fde8e8; border-radius: 4px; padding: 0.1rem 0.4rem; } .repo-pkg-table { padding: 0.5rem 0.75rem 0.75rem; } + +/* ── Snapshot history ─────────────────────────────────────────────────────── */ +.snap-list { display: flex; flex-direction: column; gap: 0.5rem; margin-bottom: 1.5rem; } +.snap-repo-block { background: var(--theme-background-alt); border-radius: 8px; } +.snap-repo-block[open] { border: 1px solid var(--theme-foreground-faint); } +.snap-repo-summary { cursor: pointer; padding: 0.65rem 0.9rem; display: flex; gap: 0.6rem; align-items: center; flex-wrap: wrap; list-style: none; } +.snap-repo-summary::-webkit-details-marker { display: none; } +.snap-repo-summary::before { content: "▶"; font-size: 0.7rem; color: gray; flex-shrink: 0; } +details[open] > .snap-repo-summary::before { content: "▼"; } +.snap-repo-name { font-weight: 600; font-size: 0.9rem; font-family: monospace; } +.snap-meta { font-size: 0.78rem; color: gray; } +.snap-table-wrap { padding: 0.5rem 0.75rem 0.75rem; } diff --git a/state-hub/dashboard/src/tasks.md b/state-hub/dashboard/src/tasks.md index 43fe6c2..9c8e9a6 100644 --- a/state-hub/dashboard/src/tasks.md +++ b/state-hub/dashboard/src/tasks.md @@ -12,18 +12,20 @@ const taskState = (async function*() { while (true) { let data = [], ok = false; try { - const [rt, rw, rto] = await Promise.all([ + const [rt, rw, rto, rr] = await Promise.all([ fetch(`${API}/tasks/?limit=500`), fetch(`${API}/workstreams/`), fetch(`${API}/topics/`), + fetch(`${API}/repos/`), ]); - ok = rt.ok && rw.ok && rto.ok; + ok = rt.ok && rw.ok && rto.ok && rr.ok; if (ok) { - const [taskList, wsList, topicList] = await Promise.all([rt.json(), rw.json(), rto.json()]); + const [taskList, wsList, topicList, repoList] = await Promise.all([rt.json(), rw.json(), rto.json(), rr.json()]); const topicMap = Object.fromEntries(topicList.map(t => [t.id, t])); + const repoMap = Object.fromEntries(repoList.map(r => [r.id, r])); const wsMap = Object.fromEntries(wsList.map(w => [w.id, { ...w, - domain: topicMap[w.topic_id]?.domain_slug ?? "unknown", + domain: repoMap[w.repo_id]?.domain_slug ?? topicMap[w.topic_id]?.domain_slug ?? "unknown", }])); data = taskList.map(t => ({ ...t, diff --git a/state-hub/dashboard/src/techdept.md b/state-hub/dashboard/src/techdept.md index 5ab532c..0fa2ef6 100644 --- a/state-hub/dashboard/src/techdept.md +++ b/state-hub/dashboard/src/techdept.md @@ -12,17 +12,19 @@ const tdState = (async function*() { while (true) { let data = [], ok = false; try { - const [rt, rw, rto] = await Promise.all([ + const [rt, rw, rto, rr] = await Promise.all([ fetch(`${API}/technical-debt/`), fetch(`${API}/workstreams/`), fetch(`${API}/topics/`), + fetch(`${API}/repos/`), ]); - ok = rt.ok && rw.ok && rto.ok; + ok = rt.ok && rw.ok && rto.ok && rr.ok; if (ok) { - const [tdList, wsList, topicList] = await Promise.all([rt.json(), rw.json(), rto.json()]); + const [tdList, wsList, topicList, repoList] = await Promise.all([rt.json(), rw.json(), rto.json(), rr.json()]); const topicMap = Object.fromEntries(topicList.map(t => [t.id, t])); + const repoMap = Object.fromEntries(repoList.map(r => [r.id, r])); const wsMap = Object.fromEntries(wsList.map(w => [w.id, { - ...w, domain: topicMap[w.topic_id]?.domain_slug ?? "unknown", + ...w, domain: repoMap[w.repo_id]?.domain_slug ?? topicMap[w.topic_id]?.domain_slug ?? "unknown", }])); data = tdList.map(t => ({ ...t, @@ -81,7 +83,7 @@ const filters = Generators.input(_filtersForm); const filtered = data.filter(t => (filters.status.length === 0 || filters.status.includes(t.status)) && (filters.severity.length === 0 || filters.severity.includes(t.severity)) && - (filters.domain.length === 0 || filters.domain.includes(t.domain)) && + (filters.domain.length === 0 || filters.domain.includes(t.domain_slug)) && (filters.debt_type.length === 0 || filters.debt_type.includes(t.debt_type)) ); ``` diff --git a/state-hub/dashboard/src/todo.md b/state-hub/dashboard/src/todo.md index 15c4132..769b182 100644 --- a/state-hub/dashboard/src/todo.md +++ b/state-hub/dashboard/src/todo.md @@ -14,21 +14,23 @@ const todoState = (async function*() { while (true) { let tasks = [], contribs = [], wsMap = {}, ok = false; try { - const [rt, rw, rto, rc] = await Promise.all([ + const [rt, rw, rto, rr, rc] = await Promise.all([ fetch(`${API}/tasks/?limit=500`), fetch(`${API}/workstreams/`), fetch(`${API}/topics/`), + fetch(`${API}/repos/`), fetch(`${API}/contributions/`), ]); - ok = rt.ok && rw.ok && rto.ok && rc.ok; + ok = rt.ok && rw.ok && rto.ok && rr.ok && rc.ok; if (ok) { - const [taskList, wsList, topicList, contribList] = await Promise.all([ - rt.json(), rw.json(), rto.json(), rc.json(), + const [taskList, wsList, topicList, repoList, contribList] = await Promise.all([ + rt.json(), rw.json(), rto.json(), rr.json(), rc.json(), ]); const topicMap = Object.fromEntries(topicList.map(t => [t.id, t])); + const repoMap = Object.fromEntries(repoList.map(r => [r.id, r])); wsMap = Object.fromEntries(wsList.map(w => [w.id, { ...w, - domain: topicMap[w.topic_id]?.domain_slug ?? "unknown", + domain: repoMap[w.repo_id]?.domain_slug ?? topicMap[w.topic_id]?.domain_slug ?? "unknown", }])); tasks = taskList.map(t => ({ ...t, diff --git a/state-hub/dashboard/src/workstreams.md b/state-hub/dashboard/src/workstreams.md index d3c2608..b6b8f12 100644 --- a/state-hub/dashboard/src/workstreams.md +++ b/state-hub/dashboard/src/workstreams.md @@ -13,18 +13,20 @@ const wsState = (async function*() { while (true) { let data = [], openWs = [], ok = false; try { - const [rw, rt, rs] = await Promise.all([ + const [rw, rt, rr, rs] = await Promise.all([ fetch(`${API}/workstreams/`), fetch(`${API}/topics/`), + fetch(`${API}/repos/`), fetch(`${API}/state/summary`), ]); - ok = rw.ok && rt.ok && rs.ok; + ok = rw.ok && rt.ok && rr.ok && rs.ok; if (ok) { - const [wsList, topicList, summary] = await Promise.all([rw.json(), rt.json(), rs.json()]); + const [wsList, topicList, repoList, summary] = await Promise.all([rw.json(), rt.json(), rr.json(), rs.json()]); const topicMap = Object.fromEntries(topicList.map(t => [t.id, t])); + const repoMap = Object.fromEntries(repoList.map(r => [r.id, r])); data = wsList.map(w => ({ ...w, - domain: topicMap[w.topic_id]?.domain_slug ?? "unknown", + domain: repoMap[w.repo_id]?.domain_slug ?? topicMap[w.topic_id]?.domain_slug ?? "unknown", topic_title: topicMap[w.topic_id]?.title ?? "—", })); // open_workstreams from summary carry depends_on / blocks lists diff --git a/state-hub/mcp_server/server.py b/state-hub/mcp_server/server.py index f60059a..6db3e26 100644 --- a/state-hub/mcp_server/server.py +++ b/state-hub/mcp_server/server.py @@ -219,6 +219,7 @@ def create_workstream( description: str | None = None, owner: str | None = None, due_date: str | None = None, + repo_id: str | None = None, ) -> str: """Create a new workstream under a topic and emit a progress_event. @@ -229,6 +230,7 @@ def create_workstream( description: optional longer description owner: optional owner name due_date: optional ISO date string (YYYY-MM-DD) + repo_id: UUID of the owning repository (GEMS primary; strongly recommended per ADR-001) """ if not slug: slug = re.sub(r"[^a-z0-9]+", "-", title.lower()).strip("-") @@ -240,6 +242,7 @@ def create_workstream( "owner": owner, "due_date": due_date, "status": "active", + "repo_id": repo_id, }) _post("/progress", { "topic_id": topic_id, @@ -975,8 +978,8 @@ def get_contributions( def ingest_sbom_tool(repo_slug: str, lockfile_path: str) -> str: """Ingest a lockfile into the State Hub SBOM store for a repo. - Parses the lockfile and POSTs entries to /sbom/ingest/. Old entries - for the repo are replaced (snapshot strategy). + Parses the lockfile and POSTs entries to /sbom/ingest/. Each call creates + a new SBOMSnapshot; previous snapshots are retained as history. Args: repo_slug: Managed-repo slug (must be registered via register_repo) diff --git a/state-hub/migrations/versions/a3b4c5d6e7f8_gems_pass3_sbom_snapshot.py b/state-hub/migrations/versions/a3b4c5d6e7f8_gems_pass3_sbom_snapshot.py new file mode 100644 index 0000000..31f0983 --- /dev/null +++ b/state-hub/migrations/versions/a3b4c5d6e7f8_gems_pass3_sbom_snapshot.py @@ -0,0 +1,93 @@ +"""GEMS Pass 3: add sbom_snapshots container entity + +Revision ID: a3b4c5d6e7f8 +Revises: f2a3b4c5d6e7 +Create Date: 2026-03-02 00:00:00.000000 +""" +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import postgresql + +revision: str = "a3b4c5d6e7f8" +down_revision: Union[str, None] = "f2a3b4c5d6e7" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ── Create sbom_snapshots table ──────────────────────────────────────────── + op.create_table( + "sbom_snapshots", + sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True), + sa.Column( + "repo_id", + postgresql.UUID(as_uuid=True), + sa.ForeignKey("managed_repos.id", ondelete="RESTRICT"), + nullable=False, + index=True, + ), + sa.Column("snapshot_at", sa.DateTime(timezone=True), nullable=False), + sa.Column("source", sa.String(200), nullable=True), + sa.Column("entry_count", sa.Integer, nullable=False, server_default="0"), + sa.Column( + "created_at", + sa.DateTime(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + ) + # ── Add snapshot_id FK to sbom_entries (nullable during backfill) ────────── + op.add_column( + "sbom_entries", + sa.Column("snapshot_id", postgresql.UUID(as_uuid=True), nullable=True), + ) + op.create_foreign_key( + "fk_sbom_entry_snapshot_id", + "sbom_entries", "sbom_snapshots", + ["snapshot_id"], ["id"], + ondelete="RESTRICT", + ) + + # ── Backfill: create one snapshot per (repo_id, snapshot_at) group ───────── + op.execute(""" + INSERT INTO sbom_snapshots (id, repo_id, snapshot_at, source, entry_count, created_at) + SELECT + gen_random_uuid(), + repo_id, + snapshot_at, + 'backfill' AS source, + COUNT(*) AS entry_count, + MIN(created_at) AS created_at + FROM sbom_entries + GROUP BY repo_id, snapshot_at + """) + + # ── Assign snapshot_id to each entry ─────────────────────────────────────── + op.execute(""" + UPDATE sbom_entries e + SET snapshot_id = s.id + FROM sbom_snapshots s + WHERE s.repo_id = e.repo_id + AND s.snapshot_at = e.snapshot_at + """) + + # ── Make snapshot_id NOT NULL ────────────────────────────────────────────── + op.execute(""" + DO $$ + BEGIN + IF EXISTS (SELECT 1 FROM sbom_entries WHERE snapshot_id IS NULL) THEN + RAISE EXCEPTION 'GEMS Pass 3: sbom_entries rows with no snapshot assigned'; + END IF; + END $$; + """) + op.alter_column("sbom_entries", "snapshot_id", nullable=False) + op.create_index("ix_sbom_entries_snapshot_id", "sbom_entries", ["snapshot_id"]) + + +def downgrade() -> None: + op.drop_index("ix_sbom_entries_snapshot_id", table_name="sbom_entries") + op.drop_constraint("fk_sbom_entry_snapshot_id", "sbom_entries", type_="foreignkey") + op.drop_column("sbom_entries", "snapshot_id") + op.drop_table("sbom_snapshots") diff --git a/state-hub/migrations/versions/e1f2a3b4c5d6_gems_pass1_domain_fk.py b/state-hub/migrations/versions/e1f2a3b4c5d6_gems_pass1_domain_fk.py new file mode 100644 index 0000000..29596a5 --- /dev/null +++ b/state-hub/migrations/versions/e1f2a3b4c5d6_gems_pass1_domain_fk.py @@ -0,0 +1,142 @@ +"""GEMS Pass 1: domain_id FK on extension_points/technical_debt, repo_id on contributions + +Revision ID: e1f2a3b4c5d6 +Revises: d3e4f5a6b7c8 +Create Date: 2026-03-02 00:00:00.000000 +""" +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import postgresql + +revision: str = "e1f2a3b4c5d6" +down_revision: Union[str, None] = "d3e4f5a6b7c8" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ── extension_points: add domain_id FK ──────────────────────────────────── + op.add_column( + "extension_points", + sa.Column("domain_id", postgresql.UUID(as_uuid=True), nullable=True), + ) + op.create_foreign_key( + "fk_ep_domain_id", + "extension_points", "domains", + ["domain_id"], ["id"], + ondelete="RESTRICT", + ) + + # Backfill from slug string + op.execute(""" + UPDATE extension_points ep + SET domain_id = d.id + FROM domains d + WHERE d.slug = ep.domain + """) + + # Safety check: abort if any rows remain unmatched + op.execute(""" + DO $$ + BEGIN + IF EXISTS (SELECT 1 FROM extension_points WHERE domain_id IS NULL) THEN + RAISE EXCEPTION + 'GEMS Pass 1: extension_points rows with unknown domain slug: %', + (SELECT string_agg(DISTINCT domain, ', ') + FROM extension_points WHERE domain_id IS NULL); + END IF; + END $$; + """) + + op.alter_column("extension_points", "domain_id", nullable=False) + op.drop_index("ix_extension_points_domain", table_name="extension_points") + op.drop_column("extension_points", "domain") + op.create_index("ix_extension_points_domain_id", "extension_points", ["domain_id"]) + + # ── technical_debt: add domain_id FK ────────────────────────────────────── + op.add_column( + "technical_debt", + sa.Column("domain_id", postgresql.UUID(as_uuid=True), nullable=True), + ) + op.create_foreign_key( + "fk_td_domain_id", + "technical_debt", "domains", + ["domain_id"], ["id"], + ondelete="RESTRICT", + ) + + op.execute(""" + UPDATE technical_debt td + SET domain_id = d.id + FROM domains d + WHERE d.slug = td.domain + """) + + op.execute(""" + DO $$ + BEGIN + IF EXISTS (SELECT 1 FROM technical_debt WHERE domain_id IS NULL) THEN + RAISE EXCEPTION + 'GEMS Pass 1: technical_debt rows with unknown domain slug: %', + (SELECT string_agg(DISTINCT domain, ', ') + FROM technical_debt WHERE domain_id IS NULL); + END IF; + END $$; + """) + + op.alter_column("technical_debt", "domain_id", nullable=False) + op.drop_index("ix_technical_debt_domain", table_name="technical_debt") + op.drop_column("technical_debt", "domain") + op.create_index("ix_technical_debt_domain_id", "technical_debt", ["domain_id"]) + + # ── contributions: add nullable repo_id FK ──────────────────────────────── + op.add_column( + "contributions", + sa.Column( + "repo_id", + postgresql.UUID(as_uuid=True), + sa.ForeignKey("managed_repos.id", ondelete="SET NULL"), + nullable=True, + ), + ) + + +def downgrade() -> None: + # contributions: drop repo_id + op.drop_column("contributions", "repo_id") + + # technical_debt: restore domain string + op.add_column( + "technical_debt", + sa.Column("domain", sa.String(50), nullable=True), + ) + op.execute(""" + UPDATE technical_debt td + SET domain = d.slug + FROM domains d + WHERE d.id = td.domain_id + """) + op.alter_column("technical_debt", "domain", nullable=False) + op.create_index("ix_technical_debt_domain", "technical_debt", ["domain"]) + op.drop_index("ix_technical_debt_domain_id", table_name="technical_debt") + op.drop_constraint("fk_td_domain_id", "technical_debt", type_="foreignkey") + op.drop_column("technical_debt", "domain_id") + + # extension_points: restore domain string + op.add_column( + "extension_points", + sa.Column("domain", sa.String(50), nullable=True), + ) + op.execute(""" + UPDATE extension_points ep + SET domain = d.slug + FROM domains d + WHERE d.id = ep.domain_id + """) + op.alter_column("extension_points", "domain", nullable=False) + op.create_index("ix_extension_points_domain", "extension_points", ["domain"]) + op.drop_index("ix_extension_points_domain_id", table_name="extension_points") + op.drop_constraint("fk_ep_domain_id", "extension_points", type_="foreignkey") + op.drop_column("extension_points", "domain_id") diff --git a/state-hub/migrations/versions/f2a3b4c5d6e7_gems_pass2_workstream_repo_id.py b/state-hub/migrations/versions/f2a3b4c5d6e7_gems_pass2_workstream_repo_id.py new file mode 100644 index 0000000..fc09b46 --- /dev/null +++ b/state-hub/migrations/versions/f2a3b4c5d6e7_gems_pass2_workstream_repo_id.py @@ -0,0 +1,54 @@ +"""GEMS Pass 2: add repo_id FK to workstreams (ADR-001 alignment) + +Revision ID: f2a3b4c5d6e7 +Revises: e1f2a3b4c5d6 +Create Date: 2026-03-02 00:00:00.000000 +""" +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import postgresql + +revision: str = "f2a3b4c5d6e7" +down_revision: Union[str, None] = "e1f2a3b4c5d6" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.add_column( + "workstreams", + sa.Column("repo_id", postgresql.UUID(as_uuid=True), nullable=True), + ) + op.create_foreign_key( + "fk_workstream_repo_id", + "workstreams", "managed_repos", + ["repo_id"], ["id"], + ondelete="SET NULL", + ) + op.create_index("ix_workstreams_repo_id", "workstreams", ["repo_id"]) + + # Best-effort backfill: topic → domain → first repo (by created_at) + # Records with no repo in their domain remain NULL (requires manual resolution) + op.execute(""" + UPDATE workstreams ws + SET repo_id = sub.repo_id + FROM ( + SELECT DISTINCT ON (ws.id) + ws.id AS ws_id, + mr.id AS repo_id + FROM workstreams ws + JOIN topics t ON t.id = ws.topic_id + JOIN managed_repos mr ON mr.domain_id = t.domain_id + WHERE mr.status = 'active' + ORDER BY ws.id, mr.created_at + ) sub + WHERE ws.id = sub.ws_id + """) + + +def downgrade() -> None: + op.drop_index("ix_workstreams_repo_id", table_name="workstreams") + op.drop_constraint("fk_workstream_repo_id", "workstreams", type_="foreignkey") + op.drop_column("workstreams", "repo_id") diff --git a/wiki/GEMS-StateHub-SWOT.md b/wiki/GEMS-StateHub-SWOT.md new file mode 100644 index 0000000..7d5e67e --- /dev/null +++ b/wiki/GEMS-StateHub-SWOT.md @@ -0,0 +1,205 @@ +# SWOT Analysis — Migrating State-Hub to GEMS + +Evaluation of migrating the Custodian State Hub data store from its current +ad-hoc relational schema to the Generic Entity Modelling System (GEMS) as +defined in `wiki/GenericEntityModellingSystem.md` and instantiated in +`wiki/GEMS-StateHub-TypeRegistry.md`. + +**Created:** 2026-03-02 +**Author:** Custodian (analytical session) + +--- + +## Migration Options Under Consideration + +Before the SWOT, three architectural options are in scope: + +**Option A — Full Generic Entity Model** +Single `entities` table + `attachments` junction + JSONB payload. True GEMS +implementation. All current typed tables dissolved into the entity model. + +**Option B — Typed-Table Approach with GEMS Constraints** +Keep typed tables (domains, topics, workstreams, etc.) but add: +- A universal `entity_id` abstraction layer +- An `attachments` junction table for secondary attachments +- Application-level GEMS constraint validation +- Fix all structural inconsistencies (I-1 through I-8 in CUST-WP-0006) + +**Option C — Incremental Normalization (Pattern C from GEMS §9)** +Fix the most critical inconsistencies immediately (I-1, I-2, I-5), leave +lesser items wrapped/deferred. No generic entity table introduced. + +--- + +## SWOT Analysis + +### Strengths + +**S1 — Uniform modeling surface eliminates special-casing (all options)** +Currently each entity type has bespoke FKs, bespoke routers, and bespoke MCP +tools. GEMS gives a predictable pattern: every entity has a primary context and +optional secondaries. New entity types follow the same pattern with zero +schema design work. + +**S2 — Fixes real, observable bugs (Options B and C)** +The domain string inconsistency (I-1) causes SBOM and EP/TD dashboard views to +silently display wrong or missing domain associations. The Workstream/Topic +container mismatch (I-2) causes domain attribution to fail in the Dependencies +view. These are current user-visible defects — migration resolves them. + +**S3 — ADR-001 alignment (Options B and C)** +ADR-001 mandates that workstreams originate in repos. The current schema forces +workstreams under Topics. Migrating Workstream.primary → Repository would bring +the schema into conformance with the governing ADR. + +**S4 — Enables first-class graph queries (Option A, partially B)** +With Relations as first-class entities, queries like "what decisions influenced +which tasks?" or "what dependencies cross domain boundaries?" become uniform +and indexable. Currently these require ad-hoc multi-table joins. + +**S5 — Incremental migration is supported by the model (all options)** +GEMS §9 explicitly defines integration patterns for existing systems. Pattern C +(progressive normalization) allows working systems to remain stable while the +most valuable types are migrated first. + +**S6 — Future-proofs multi-domain cross-system queries** +As more repositories are registered and domains become interdependent, the +current schema's inconsistencies compound. GEMS alignment now prevents +exponential complexity accumulation. + +--- + +### Weaknesses + +**W1 — Option A requires full schema rewrite (high risk)** +Dissolving typed tables into a generic entity model means every router, every +MCP tool, every dashboard data loader, and every Alembic migration must be +rewritten. This is weeks of work with high regression risk. + +**W2 — Loss of SQL-level type safety (Option A)** +Typed tables give the database schema as documentation and enforce type-correct +relations at the DB constraint level (FK types, enum columns). A generic entity +table with JSONB payloads moves type enforcement to the application layer, which +is easier to break silently. + +**W3 — GEMS does not define a concrete SQL schema** +The GEMS document is conceptual. Translating the attachment list model into +PostgreSQL requires design decisions (indexed JSONB vs. junction table, UUID +ordering, etc.) that are not trivial and have performance implications. + +**W4 — ProgressEvent's multi-attach pattern doesn't map cleanly to GEMS** +ProgressEvent's current schema (nullable topic_id, workstream_id, task_id, +decision_id) is intentionally flexible for an append-only log. GEMS's "exactly +one primary attachment" rule may force awkward choices (e.g. always using +Workstream as primary even for domain-level events). + +**W5 — Ecosystem root is of uncertain value** +Adding an explicit Ecosystem singleton adds ceremony for little practical query +benefit in the current six-domain setup. It may become valuable when the system +grows to multi-tenant or multi-ecosystem scope, but is premature now. + +--- + +### Opportunities + +**O1 — Snapshot diffing for SBOM (SBOMSnapshot entity)** +Adding a SBOMSnapshot container (resolves I-5) enables: "what packages were +added/removed between ingests?" This is a direct user value feature, not just +architectural cleanup. + +**O2 — Unified contribution and decision provenance graph** +With Relation entities, you can model "Decision D motivates Workstream W" or +"Contribution C implements Decision D" as queryable, auditable edges. This is the +foundation for a richer Custodian agent that can reason about the provenance of +work items. + +**O3 — Generic dashboard patterns** +Once GEMS is in place, dashboard pages can share a single entity-browsing +component rather than one bespoke page per entity type. This reduces UI technical +debt significantly. + +**O4 — Enabling cross-repo task relations (DependsOn at Repository scope)** +With Relations as first-class, it becomes natural to register "Task A in repo X +blocks Task B in repo Y" — a cross-repo dependency that the current +WorkstreamDependency table cannot model. + +**O5 — Type registry as a self-documenting schema** +A GEMS Type Registry is human-readable, machine-validatable, and version-controlled. +It replaces the current implicit understanding of "what can be attached to what" +with an explicit contract. + +--- + +### Threats + +**T1 — Risk of over-engineering a working system** +The state-hub currently works well enough for its intended read-model role. A +full schema rewrite to achieve theoretical elegance could introduce regressions, +stall other domain work, and deliver minimal user-visible value in the short term. + +**T2 — ADR-001 workplan file format would need updates** +If Workstream moves from Topic to Repository as its primary container, every +existing workplan frontmatter field (`topic_slug`) would need to become or add +`repo_slug`. All workplan files across all registered repos require updating. + +**T3 — Hybrid state during incremental migration is confusing** +Pattern C leaves the system in a mixed state for an extended period: some +entities are GEMS-conformant, others are legacy. Tooling must handle both +shapes simultaneously, increasing maintenance burden. + +**T4 — Dashboard rewrites could introduce new bugs** +The dashboard is the primary UI for the hub. Rewriting data loaders and query +patterns risks introducing visual regressions that would go unnoticed without a +test suite (there is currently none for the dashboard). + +**T5 — No migration dry-run tooling exists** +The current `make sync-workplans` doesn't exist yet (CUST-WP future deliverable). +Running migrations against production data without a rollback path is risky. + +--- + +## Verdict and Recommended Path + +**Recommended: Option C — Incremental Normalization** + +Proceed in three targeted passes, each independently releasable: + +**Pass 1 — Fix structural inconsistencies (I-1, I-6): low risk, high consistency gain** +- Migrate `ExtensionPoint.domain` (String) → `domain_id` FK + back-fill +- Migrate `TechnicalDebt.domain` (String) → `domain_id` FK + back-fill +- Add `repo_id` FK to `Contribution` (nullable initially) +- This pass has zero API breaking changes; only DB schema and router filter logic change. + +**Pass 2 — Align Workstream with ADR-001 (I-2): medium risk, architectural gain** +- Add `repo_id` FK to `Workstream` (nullable initially, then enforce) +- Update MCP `create_workstream` to require `repo_id` +- Update workplan frontmatter format to include `repo_slug` +- Migrate `dependencies.md` to use `repo` instead of `topic` for domain resolution +- Decision DEC-GEMS-002 must be resolved before this pass begins + +**Pass 3 — Add SBOMSnapshot container (I-5): medium risk, feature gain** +- Add `sbom_snapshots` table + FK from `sbom_entries` +- Update ingest API to create/find snapshot per repo+timestamp +- Enable snapshot history and diff queries in SBOM dashboard +- Decision DEC-GEMS-004 must be resolved before this pass begins + +**Deferred:** Full generic entity model (Option A), Ecosystem root (I-7), +DependsOn as first-class Relation (I-8), ManagedRepo.topic_id cleanup (I-4). +These are tracked as extension points; revisit after Passes 1-3 are stable. + +--- + +## Decision Dependency Map + +``` +DEC-GEMS-001 (architecture) ──────────────────────────────────► Pass 3+ +DEC-GEMS-002 (workstream/topic vs repo) ──────────────────────► Pass 2 +DEC-GEMS-003 (domain string → FK) ────────────────────────────► Pass 1 +DEC-GEMS-004 (SBOMSnapshot container) ────────────────────────► Pass 3 +DEC-GEMS-005 (Ecosystem root) ─────────────────────────────────► Deferred +DEC-GEMS-006 (DependsOn as Relation entity) ───────────────────► Deferred +``` + +Pass 1 can begin as soon as DEC-GEMS-003 is resolved (expected: trivially yes). +Pass 2 requires DEC-GEMS-002 resolution (breaking change; needs explicit approval). +Pass 3 requires DEC-GEMS-004 resolution. diff --git a/wiki/GEMS-StateHub-TypeRegistry.md b/wiki/GEMS-StateHub-TypeRegistry.md new file mode 100644 index 0000000..cc8f99c --- /dev/null +++ b/wiki/GEMS-StateHub-TypeRegistry.md @@ -0,0 +1,134 @@ +# GEMS State-Hub Type Registry + +Domain-specific instantiation of the Generic Entity Modelling System +(`wiki/GenericEntityModellingSystem.md`) for the Custodian State Hub. + +**Status:** Draft — subject to revision pending decision DEC-GEMS-001 through DEC-GEMS-006. +**Created:** 2026-03-02 + +--- + +## Hierarchy Overview + +``` +Ecosystem (implicit root — singleton) + └── Domain (Complex) + ├── Topic (Complex) organizes focus areas + └── Repository (Complex) + ├── Workstream (Complex) organizes tasks + ├── SBOMSnapshot (Complex) organizes SBOM entries + ├── Task (Atom) ← secondary: Workstream + ├── Decision (Atom) ← secondary: Topic | Workstream + ├── TechnicalDebt (Atom) + ├── ExtensionPoint (Atom) + ├── Contribution (Atom) + └── ProgressEvent (Atom) ← secondary: Workstream | Task | Decision + +SBOMSnapshot + └── SBOMEntry (Atom) + +DependsOn (Relation, primary=Domain) + from: Workstream → to: Workstream +``` + +--- + +## Type Registry Table + +| Type | Kind | Primary Attachment Type | Allowed Secondary Attachments | Payload / Key Fields | +|---|---|---|---|---| +| **Ecosystem** | Complex | — (root) | — | name, description | +| **Domain** | Complex | Ecosystem | — | slug, name, status | +| **Topic** | Complex | Domain | — | slug, title, status | +| **Repository** | Complex | Domain | Topic (optional scope annotation) | slug, name, local_path, remote_url | +| **Workstream** | Complex | Repository | Topic (organizer) | slug, title, status, owner, due_date | +| **SBOMSnapshot** | Complex | Repository | — | snapshot_at, source | +| **Task** | Atom | Workstream | — | title, status, priority, assignee, due_date | +| **Decision** | Atom | Repository | Topic \| Workstream (context) | title, type, status, rationale, deadline | +| **TechnicalDebt** | Atom | Repository | Topic \| Workstream (context) | td_id, debt_type, severity, status | +| **ExtensionPoint** | Atom | Repository | Topic \| Workstream (context) | ep_id, ep_type, priority, status | +| **Contribution** | Atom | Repository | — | type, target_org, target_repo, status | +| **ProgressEvent** | Atom | Workstream | Task \| Decision (context) | summary, event_type, author | +| **SBOMEntry** | Atom | SBOMSnapshot | — | package_name, version, ecosystem, license_spdx | +| **DependsOn** | Relation | Domain | — | attachments[1]=from_ws, attachments[2]=to_ws, description | + +--- + +## Validation Invariants + +Following GEMS §5.2: + +1. **Primary chain must be acyclic.** No entity may be its own ancestor via primary + attachments. + +2. **Primary attachment kind/type must match the registry.** A Task's primary must be + a Workstream; a Workstream's primary must be a Repository, etc. + +3. **Context-consistency for secondary attachments.** If Task has a secondary attachment + to a Workstream, that Workstream's primary must be the same Repository as the Task's + context (inherited via Workstream.primary). + +4. **Relation endpoint types must match the relation's type definition.** DependsOn.from + and DependsOn.to must both be Workstream entities within the same Domain. + +5. **Relation primary must be a Complex.** DependsOn.primary = Domain (the relation-space + that "owns" the inter-workstream dependency graph). + +--- + +## Mapping: Current Tables → GEMS + +| Current Table | Target GEMS Type | Status | Change Required | +|---|---|---|---| +| `domains` | Domain (Complex) | Correct | None | +| `topics` | Topic (Complex) | Correct | None | +| `managed_repos` | Repository (Complex) | Mostly correct | Remove nullable topic_id; add optional secondary | +| `workstreams` | Workstream (Complex) | **Broken** | Change primary from topic_id to repo_id | +| `tasks` | Task (Atom) | Correct | None | +| `decisions` | Decision (Atom) | Ambiguous | Change primary to repo_id; topic/workstream become secondaries | +| `technical_debt` | TechnicalDebt (Atom) | **Broken** | domain string → repo_id FK | +| `extension_points` | ExtensionPoint (Atom) | **Broken** | domain string → repo_id FK | +| `contributions` | Contribution (Atom) | Incomplete | Add repo_id FK | +| `progress_events` | ProgressEvent (Atom) | Ambiguous | Clarify primary vs. secondary | +| `sbom_entries` | SBOMEntry (Atom) | **Broken** | Add SBOMSnapshot container | +| `workstream_dependencies` | DependsOn (Relation) | Acceptable | Consider Relation entity model | +| *(missing)* | SBOMSnapshot (Complex) | **Missing** | New table required | +| *(missing)* | Ecosystem (Complex) | Missing | Optional singleton | + +--- + +## Query Patterns Enabled + +After full GEMS alignment: + +``` +# All workstreams in a domain +Workstream WHERE primary.primary.slug = "railiance" + +# All open tasks for a given repo +Task WHERE primary.primary.slug = "activity-core" AND status != "done" + +# Dependency graph for a domain +DependsOn WHERE primary.slug = "custodian" + +# SBOM history for a repo +SBOMSnapshot WHERE primary.slug = "the-custodian" ORDER BY snapshot_at DESC + +# All tech debt in a domain (currently broken — domain is a string) +TechnicalDebt WHERE primary.primary.slug = "custodian" +``` + +--- + +## Notes + +- `Topic` and `Repository` are both children of `Domain` but are distinct organizers. + Topic = "focus area / project agenda"; Repository = "git repo / code artefact boundary". + A Topic may align with one or more repositories, but neither owns the other. + +- `Workstream` moving from Topic → Repository is the most disruptive change. It resolves + the ADR-001 contradiction (workplans must live in repos, but workstreams live under topics). + +- `ProgressEvent` retains its multi-attach flexibility (topic, workstream, task, decision) + but those become secondary attachments. The primary should be the Workstream (or Repository + if no workstream context). diff --git a/wiki/GenericEntityModellingSystem.md b/wiki/GenericEntityModellingSystem.md new file mode 100644 index 0000000..ed3af59 --- /dev/null +++ b/wiki/GenericEntityModellingSystem.md @@ -0,0 +1,364 @@ +## Generic entity modeling system + +A domain-agnostic data modeling system for organizing “entities under management” in a rigorous, flexible, and extensible way. + +### Goals + +* **Rigorous**: clear invariants, predictable querying, safe evolution. +* **Flexible**: new entity types and new relations without migrations that rewrite everything. +* **Extensible**: supports multiple domains, sub-domains, and incremental adoption over existing data. + +--- + +# 1. Core concepts + +## 1.1 Entity + +An **Entity** is the atomic unit of identity and lifecycle. + +**Entity fields (conceptual)** + +* `id` (immutable unique identifier) +* `kind` ∈ {`Atom`, `Complex`, `Relation`} +* `type` (domain-specific type name, e.g. `Task`, `Repository`, `Customer`) +* `payload` (type-specific attributes; ideally versioned) +* `attachments` (ordered list of entity references) +* `meta` (timestamps, version, permissions, provenance) + +### Entity kinds + +* **Atom**: primary facts / content objects. +* **Complex**: organizational containers and structure owners (hierarchy, collections, indexes, contexts). +* **Relation**: first-class edge object that encodes a relationship between entities; owned by a Complex. + +--- + +# 2. Attachments + +## 2.1 Attachment list + +Every entity has an ordered list: + +* `attachments: [EntityRef]` +* `attachments[0]` is the **Primary Attachment**. + +### Derived notion: “Part-of” + +If an entity’s primary attachment is an **Atom**, then the entity is a **Part** of that Atom. + +This is a *classification* derived from data, not a separate stored relation. + +## 2.2 Attachment roles (recommended) + +To avoid ambiguity and allow validation, each attachment can optionally have a role label. + +Conceptually: + +* `attachment = { targetId, position, role }` + +Common roles: + +* `primary` (implicit by position 0) +* `index` (entity appears in this complex for navigation/search) +* `provenance` (source reference) +* `tag` (classification) +* `context` (additional scope) + +Ordering remains canonical; roles improve clarity and constraints. + +--- + +# 3. Hierarchy and layering + +## 3.1 Primary chain + +The **Primary Chain** of an entity is obtained by repeatedly following `attachments[0]`. + +**Invariant (recommended):** the primary chain must be **acyclic**. + +This yields a robust layering model: + +* Every entity “lives in” a context (a Complex), or is “part of” an Atom. +* You can always answer: “Where does this belong?” by walking the primary chain. + +## 3.2 Roots and scopes + +A system should define at least one **root Complex** (e.g. `Ecosystem`, `Workspace`, `Tenant`). + +All managed entities must be reachable from a root by following primary attachments. + +--- + +# 4. Relations as first-class entities + +## 4.1 Relation entity + +A **Relation** is an entity whose purpose is to define a connection among other entities. + +**Key rule** + +* A Relation’s **primary attachment MUST be a Complex**. + That Complex is the **relation-space** (the context that “owns” the relationship). + +This avoids “atoms knowing” relation details: atoms remain content, complexes and relations hold structure. + +## 4.2 Relation endpoints convention + +To make relations queryable and consistent, standardize attachment slots: + +* `attachments[0] = contextComplex` (primary; relation-space) +* `attachments[1] = fromEndpoint` +* `attachments[2] = toEndpoint` +* `attachments[3..] = optional extra endpoints` (evidence, via, stakeholder, etc.) + +Relation semantics live in: + +* `type` (e.g. `DependsOn`, `Implements`, `References`) +* and/or payload fields like `{ relType: "...", strength: ..., rationale: ... }` + +--- + +# 5. Type system and constraints + +## 5.1 Entity Type Registry + +Maintain a registry of types describing: + +* `kind`: Atom/Complex/Relation +* allowed primary attachment kinds/types +* allowed secondary attachment kinds/types +* payload schema (optional but recommended) +* indexing / query defaults + +Example (conceptual): + +* `Task`: kind=Atom, primary must be `Repository` (Complex) +* `Repository`: kind=Complex, primary must be `Domain` (Complex) + +## 5.2 Validation invariants (recommended minimum) + +1. **Exactly one primary attachment** (position 0). +2. **Primary chain must be acyclic**. +3. **Primary attachment kind/type constraints** must match the registry. +4. **Context-consistency constraints** for organizer complexes: + + * if `Task` has a secondary attachment to `Workstream`, + then `Task.primary == Workstream.primary` (same repository). +5. **Relation constraints**: + + * primary must be Complex + * endpoint types must match relation type definition + * relation context must match endpoint context rules (usually same repo/domain) + +These constraints give rigor without hard-coding a single domain model. + +--- + +# 6. Query model (domain-agnostic) + +These queries exist in any domain: + +## 6.1 Locate context + +* `context(entity)` = walk primary chain to root, or to the nearest scope boundary (e.g. nearest Domain/Workspace). + +## 6.2 Membership + +* Members of a Complex: all entities with `primary == complexId`. + +## 6.3 Parts of an Atom + +* Parts of an Atom: all entities with `primary == atomId`. + +## 6.4 Relations in a relation-space + +* Relations owned by a Complex: all Relation entities with `primary == complexId`. + +## 6.5 Neighborhood (graph view) + +* For entity X: find all relations in the same relation-space where X appears as endpoint. + +--- + +# 7. Example domain: Ecosystem → Domain → Repository → Workstreams/SBOMs + +This section makes the system concrete using your types. + +## 7.1 Complexes + +* `Ecosystem` (Complex, root) +* `Domain` (Complex, primary = Ecosystem) +* `Repository` (Complex, primary = Domain) +* `Workstream` (Complex, primary = Repository) — organizes work items +* `SBOM` (Complex, primary = Repository) — organizes dependencies + +## 7.2 Atoms + +* `Decision` (Atom, primary = Repository) +* `Task` (Atom, primary = Repository) +* `TechDebt` (Atom, primary = Repository) +* `Extend` (Atom, primary = Repository) +* `Dependency` (Atom, primary = Repository) + +## 7.3 Organizing via secondary attachments + +* A Task in a Workstream: + + * `Task.attachments = [Repo42, Workstream7]` +* A Dependency in an SBOM: + + * `Dependency.attachments = [Repo42, Sbom3]` + +Atoms remain ignorant of *how* the workstream orders tasks; the workstream can store structure. + +## 7.4 Relation examples + +### Task → Task dependency (repo-scoped) + +Relation type: `DependsOn` (Relation) + +* `DependsOn.attachments = [Repo42, TaskA, TaskB]` +* payload: `{ critical: true, reason: "API contract needed first" }` + +### Decision influences tasks (repo-scoped) + +Relation type: `Motivates` (Relation) + +* `Motivates.attachments = [Repo42, Decision9, TaskA]` + +### Dependency graph inside an SBOM (sbom-scoped) + +Relation type: `Requires` (Relation) + +* `Requires.attachments = [Sbom3, DependencyX, DependencyY]` +* payload: `{ scope: "runtime" }` + +This cleanly separates: + +* planning relations (Repo relation-space) +* supply-chain relations (SBOM relation-space) + +--- + +# 8. Applying the modeling system to a new domain + +You can apply this to any domain by following a small method. + +## 8.1 Step-by-step method + +### Step 1 — Choose a root Complex + +Pick the top-level scope: + +* `Workspace`, `Tenant`, `Organization`, `Ecosystem`, etc. + +### Step 2 — Identify “containers” vs “content” + +* Containers become **Complexes** (projects, folders, accounts, repositories, case files). +* Content objects become **Atoms** (documents, customers, invoices, tickets, assets). + +Rule of thumb: + +* If it *organizes* others or defines a scope, it’s a Complex. +* If it’s a “thing” with intrinsic content/lifecycle, it’s an Atom. + +### Step 3 — Define the primary hierarchy (layering) + +Decide what “belongs to what” as the default place where entities live. +Example pattern: + +* `Atom.primary = nearest containing Complex` + +### Step 4 — Define organizer complexes (optional) + +Introduce complexes like `Workstream`, `Board`, `Collection`, `SBOM`, `Timeline` that provide structure. +Use **secondary attachments** from atoms to these complexes. + +### Step 5 — Define relation-spaces + +Choose where relations live: + +* typically in the “owning” complex (project/repo/case) +* sometimes in a specialized complex (SBOM, timeline, graph) + +### Step 6 — Create a Type Registry + constraints + +For each type, specify: + +* kind +* required primary attachment type(s) +* optional secondary attachment types +* allowed relation endpoints (if relation type) + +### Step 7 — Migrate incrementally + +Start with primary attachments and identity first. +Add organizer complexes and relations later without breaking identity. + +--- + +# 9. Applying it to an existing domain with pre-existing entities + +The key is to **wrap** existing entities as Entities in this system without rewriting them all at once. + +## 9.1 Integration patterns + +### Pattern A — “Entity wrapper” over existing tables/documents + +* Keep existing storage unchanged. +* Create an `Entity` record that references external storage: + + * payload contains `{ externalType, externalId, sourceSystem }` +* Attachments, relations, and organization are managed in the new layer. + +This is the safest “overlay” approach. + +### Pattern B — “Dual write” for new objects + +* New entities are created in the new model as the source of truth. +* Optionally mirrored into legacy storage for compatibility. + +### Pattern C — “Progressive normalization” + +* Start overlay-style. +* Gradually move the most valuable types (e.g., Tasks, Decisions) into native entities. +* Leave rarely touched legacy objects wrapped indefinitely. + +## 9.2 Migration steps for existing data + +1. **Assign stable IDs** + + * If legacy IDs exist, reuse them with a namespace prefix. +2. **Create root complexes** + + * e.g. one `Ecosystem` or per-tenant `Workspace`. +3. **Attach existing entities to a primary context** + + * even if initially coarse (everything attaches to one domain/project). +4. **Introduce finer complexes** + + * split into domains, repos/projects later by moving primary attachments. +5. **Add relations incrementally** + + * create relation entities for the relationships you query most. +6. **Backfill organizer complexes** + + * workstreams, boards, SBOMs, etc., via secondary attachments. + +Because relations and organization are additive, you can evolve structure without breaking identity. + +--- + +# 10. What this system buys you + +* A **uniform modeling surface** across domains. +* A **clean separation** of content (atoms) from structure (complexes + relations). +* **Multiple overlapping organizations** via secondary attachments without duplication. +* **First-class relationships** with auditability and contextual ownership. +* **Incremental adoption** over legacy systems. + +## Extension Points + +This could be turned into a compact “spec” format (like a small RFC) plus a concrete “Type Registry” table for your example (including recommended relation types and constraints). + +xxx diff --git a/workplans/CUST-WP-0006-gems-state-hub.md b/workplans/CUST-WP-0006-gems-state-hub.md new file mode 100644 index 0000000..701c367 --- /dev/null +++ b/workplans/CUST-WP-0006-gems-state-hub.md @@ -0,0 +1,190 @@ +--- +id: CUST-WP-0006 +type: workplan +title: GEMS Analysis & State-Hub Migration Planning +domain: custodian +status: active +owner: custodian +topic_slug: the-custodian +created: 2026-03-02 +updated: 2026-03-02 +state_hub_workstream_id: "7ce13282-d534-492a-8d42-b3a134028823" +--- + +# CUST-WP-0006 — GEMS Analysis & State-Hub Migration Planning + +## Purpose + +Apply the Generic Entity Modelling System (GEMS) — documented in +`wiki/GenericEntityModellingSystem.md` — to the state-hub data store. + +This workplan covers: +1. A domain-specific GEMS implementation skeleton (type registry + hierarchy) +2. An audit of current model inconsistencies against GEMS principles +3. Escalation of all non-trivial structural decisions +4. A SWOT analysis of migrating the state-hub data store to the GEMS model +5. (Deferred) A refined migration workplan, once key decisions are resolved + +Companion documents: +- `wiki/GEMS-StateHub-TypeRegistry.md` — canonical type registry +- `wiki/GEMS-StateHub-SWOT.md` — SWOT analysis + +--- + +## Phase 1 — GEMS Skeleton Definition + +### Task T01: Map current state-hub entities to GEMS types + +```task +id: T01 +status: done +priority: high +assignee: custodian +``` + +**Deliverable:** `wiki/GEMS-StateHub-TypeRegistry.md` + +See companion document. Summary: + +| Current Table | GEMS Kind | GEMS Primary | Notes | +|---|---|---|---| +| `Domain` | Complex | Ecosystem (implicit root) | 6 canonical domains | +| `Topic` | Complex | Domain | Focus area / active project | +| `ManagedRepo` | Complex | Domain | Managed git repo | +| `Workstream` | Complex | **Repository** (currently Topic) | Work package — ADR-001 mismatch | +| `SBOMSnapshot` | Complex | Repository | Does not yet exist as an entity | +| `Task` | Atom | Workstream | ✓ correct | +| `Decision` | Atom | Repository (currently Topic or Workstream) | Dual-attach ambiguity | +| `TechnicalDebt` | Atom | Repository (currently domain: str) | String FK inconsistency | +| `ExtensionPoint` | Atom | Repository (currently domain: str) | String FK inconsistency | +| `Contribution` | Atom | Repository (no domain FK) | No domain affiliation | +| `ProgressEvent` | Atom | Workstream (or Topic) | Multi-attach ambiguity | +| `SBOMEntry` | Atom | SBOMSnapshot (currently ManagedRepo) | No container | +| `WorkstreamDependency` | Relation | Domain | Flat junction table | + +--- + +### Task T02: Inconsistency audit — current model vs GEMS + +```task +id: T02 +status: done +priority: high +assignee: custodian +``` + +**Identified inconsistencies:** + +**I-1 — String domain field (high severity)** +`ExtensionPoint.domain` and `TechnicalDebt.domain` are `String(50)` columns, not FKs +to `domains.id`. The rename_domain API patches these manually via string updates — +there is no referential integrity. Dashboard filtering silently returns empty results +when slugs drift. + +**I-2 — Workstream primary container is Topic, not Repository (critical severity)** +GEMS §7 places `Workstream.primary = Repository`. ADR-001 states that workplans +(the file backing a workstream) must originate in a repository. However, the current +schema has `Workstream.topic_id NOT NULL` — Topic is the enforced primary container. +This is an ADR-001 violation embedded in the schema itself. There is currently no +`repo_id` on Workstream. + +**I-3 — Decision dual attachment without clear hierarchy (medium severity)** +`Decision` has both `topic_id` and `workstream_id` FKs, with a CHECK constraint +requiring at least one. GEMS requires exactly one primary attachment. The current model +allows ambiguous "where does this Decision live" answers. + +**I-4 — ManagedRepo has a nullable `topic_id` FK (low-medium severity)** +`ManagedRepo.topic_id` is a nullable FK to `topics`. In GEMS, Repository is a Complex +whose primary is Domain, not Topic. The topic_id on Repo suggests a second, conflicting +hierarchy path. + +**I-5 — No SBOMSnapshot container entity (medium severity)** +SBOM entries are flat rows tagged with `repo_id` and `snapshot_at`. GEMS §7 defines +`SBOM (Complex, primary=Repository)` as an organizer. Without this container, it is +impossible to query "all packages in snapshot X" as a first-class concept, or to model +snapshot-to-snapshot diffs. + +**I-6 — Contribution has no domain or repository FK (medium severity)** +`Contribution` has only optional `related_topic_id` and `related_workstream_id`. There +is no direct link to Domain or Repository, making domain-scoped contribution queries +fragile. + +**I-7 — No Ecosystem root entity (low severity)** +GEMS §3.2 requires at least one root Complex. The current model has no explicit root — +Domain is the de-facto root but is not declared as such. This matters when you want +to express cross-domain relations or system-level policies. + +**I-8 — WorkstreamDependency as flat junction table (low severity)** +GEMS §4 defines Relations as first-class entities whose primary is a Complex. The +current `WorkstreamDependency` is a flat table. For the current usage this works, but +it makes contextual queries (e.g. "all dependencies within domain X") less uniform. + +--- + +## Phase 2 — Decision Escalation + +### Task T03: Register non-trivial decisions in state-hub + +```task +id: T03 +status: done +priority: critical +assignee: custodian +``` + +Six decisions were escalated (see state-hub records): +- DEC-GEMS-001: GEMS implementation architecture (typed tables vs. generic entity model) +- DEC-GEMS-002: Workstream primary container — Topic vs. Repository +- DEC-GEMS-003: Domain string → FK migration for ExtensionPoint and TechnicalDebt +- DEC-GEMS-004: SBOMSnapshot container entity +- DEC-GEMS-005: Ecosystem root entity +- DEC-GEMS-006: WorkstreamDependency as first-class Relation entity + +--- + +## Phase 3 — SWOT Analysis + +### Task T04: Produce SWOT analysis document + +```task +id: T04 +status: done +priority: high +assignee: custodian +``` + +**Deliverable:** `wiki/GEMS-StateHub-SWOT.md` + +See companion document for the full analysis. Summary verdict: + +The migration is **worth pursuing incrementally** (Pattern C from GEMS §9). The most +impactful and least risky first move is: +1. Fix I-1: migrate domain string → FK on EP/TD (low risk, high consistency gain) +2. Fix I-2: add `repo_id` to Workstream (medium risk, fixes ADR-001 alignment) +3. Add SBOMSnapshot container (medium risk, enables snapshot diffing) + +Full generic entity table architecture (Option A) is deferred until after the typed-table +alignment is stable and validated. + +--- + +## Phase 4 — Migration Workplan Refinement (deferred) + +### Task T05: Write detailed migration workplan (CUST-WP-0007) + +```task +id: T05 +status: blocked +priority: high +assignee: custodian +blocking_reason: "Blocked on decisions DEC-GEMS-001 through DEC-GEMS-006" +``` + +Once the six decisions are resolved, produce `workplans/CUST-WP-0007-gems-migration.md` +covering: +- Schema migrations (Alembic versions) +- Data backfill scripts +- API router changes +- MCP tool changes +- Dashboard updates +- ADR-001 workplan file format updates diff --git a/workplans/CUST-WP-0007-gems-migration.md b/workplans/CUST-WP-0007-gems-migration.md new file mode 100644 index 0000000..de2f1b7 --- /dev/null +++ b/workplans/CUST-WP-0007-gems-migration.md @@ -0,0 +1,270 @@ +--- +id: CUST-WP-0007 +type: workplan +title: GEMS Migration — Three-Pass State-Hub Alignment +domain: custodian +status: completed +owner: custodian +topic_slug: the-custodian +repo_slug: the-custodian +created: 2026-03-02 +updated: 2026-03-02 +state_hub_workstream_id: "22e18151-fc83-438c-b732-10e056e64a20" +--- + +# CUST-WP-0007 — GEMS Migration: Three-Pass State-Hub Alignment + +Implements the migration decided in CUST-WP-0006. Fixes all structural +inconsistencies identified in the GEMS audit (I-1 through I-6) in three +independently releasable passes. + +Decisions resolved: DEC-GEMS-001 (Option C), DEC-GEMS-002, DEC-GEMS-003, +DEC-GEMS-004. DEC-GEMS-005 and DEC-GEMS-006 deferred. + +--- + +## Pass 1 — Fix Domain FK Inconsistencies + +**Scope:** Resolve I-1 and I-6. No API breaking changes. Fixes observable +dashboard domain-filtering bugs on EP/TD pages. + +**Alembic migration ID:** `e1f2a3b4c5d6` (down_revision: `d3e4f5a6b7c8`) + +### Task T01: Alembic migration — Pass 1 + +```task +id: T01 +status: done +priority: critical +assignee: custodian +``` + +Operations: +1. Add `domain_id` UUID FK (nullable) to `extension_points` +2. Add `domain_id` UUID FK (nullable) to `technical_debt` +3. Add `repo_id` UUID FK (nullable) to `contributions` +4. Backfill `extension_points.domain_id` from `domains.slug` match +5. Backfill `technical_debt.domain_id` from `domains.slug` match +6. Make `domain_id` NOT NULL on both tables (all rows must be backfilled first) +7. Drop `domain` String column from `extension_points` +8. Drop `domain` String column from `technical_debt` + +### Task T02: Update ExtensionPoint and TechnicalDebt models + +```task +id: T02 +status: done +priority: critical +assignee: custodian +``` + +Replace `domain: Mapped[str]` with `domain_id: Mapped[uuid.UUID]` FK + +`domain: Mapped["Domain"]` relationship. Add domain_slug property. + +### Task T03: Update Contribution model + +```task +id: T03 +status: done +priority: high +assignee: custodian +``` + +Add `repo_id: Mapped[uuid.UUID | None]` nullable FK to `managed_repos`. + +### Task T04: Update EP and TD routers + +```task +id: T04 +status: done +priority: high +assignee: custodian +``` + +- Filter by `domain_id` FK instead of domain string +- Accept `domain` slug in create/filter params, resolve to `domain_id` + +### Task T05: Update MCP EP/TD tools + +```task +id: T05 +status: done +priority: high +assignee: custodian +``` + +`register_extension_point` and `register_technical_debt` still accept +`domain` as a slug string. Router resolves to `domain_id` FK. + +### Task T06: Update EP/TD dashboard pages + +```task +id: T06 +status: done +priority: medium +assignee: custodian +``` + +`extensions.md` and `techdept.md` load domain list from `/domains/` API and +use `domain_id` FK for filtering. Remove reliance on string comparison. + +--- + +## Pass 2 — Align Workstream with ADR-001 + +**Scope:** Resolve I-2. Breaking change to workstream schema. `topic_id` +becomes a nullable secondary annotation; `repo_id` becomes the primary FK. +Workplan frontmatter format gains `repo_slug` field. + +**Alembic migration ID:** `f2a3b4c5d6e7` (down_revision: `e1f2a3b4c5d6`) + +### Task T07: Alembic migration — Pass 2 + +```task +id: T07 +status: done +priority: critical +assignee: custodian +``` + +Operations: +1. Add `repo_id` UUID FK (nullable) to `workstreams` +2. Backfill `repo_id` using heuristic: workstream → topic → domain → first + repo for that domain (adequate for current data; all custodian workstreams + map to the-custodian repo) +3. For topics without a repo: leave nullable (MCP tooling handles this) + +### Task T08: Update Workstream model + +```task +id: T08 +status: done +priority: critical +assignee: custodian +``` + +Add `repo_id: Mapped[uuid.UUID | None]` nullable FK to `managed_repos`. +Keep `topic_id` as nullable secondary. Add `repo` relationship. + +### Task T09: Update workstream router and MCP tools + +```task +id: T09 +status: done +priority: high +assignee: custodian +``` + +- `create_workstream` MCP tool: add optional `repo_id` / `repo_slug` param +- Workstream read schema: expose `repo_id` and `repo_slug` +- Dependency resolution in `state/summary` uses `repo.domain` when available + +### Task T10: Update workplan frontmatter format + +```task +id: T10 +status: done +priority: high +assignee: custodian +``` + +Add `repo_slug` field to ADR-001 workplan frontmatter spec. Update existing +workplan files (CUST-WP-0001 through CUST-WP-0006) to include `repo_slug`. + +### Task T11: Update Dependencies dashboard domain resolution + +```task +id: T11 +status: done +priority: high +assignee: custodian +``` + +`dependencies.md` currently resolves domain via `topicMap[w.topic_id]?.domain_slug`. +Change to prefer `wsMap[w.id]?.repo?.domain_slug` when available. + +--- + +## Pass 3 — SBOMSnapshot Container + +**Scope:** Resolve I-5. Adds `sbom_snapshots` as a container entity between +Repository and SBOMEntry. Enables snapshot history and diff queries. + +**Alembic migration ID:** `a3b4c5d6e7f8` (down_revision: `f2a3b4c5d6e7`) + +### Task T12: Alembic migration — Pass 3 + +```task +id: T12 +status: done +priority: critical +assignee: custodian +``` + +Operations: +1. Create `sbom_snapshots` table (id, repo_id FK, snapshot_at, source, created_at) +2. Add `snapshot_id` UUID FK (nullable) to `sbom_entries` +3. Backfill: for each (repo_id, snapshot_at) group in sbom_entries, create one + sbom_snapshots row; set snapshot_id on all matching entries +4. Make `snapshot_id` NOT NULL on `sbom_entries` +5. Consider: drop `repo_id` from `sbom_entries` (reachable via snapshot) + +### Task T13: Add SBOMSnapshot model + +```task +id: T13 +status: done +priority: critical +assignee: custodian +``` + +New model `api/models/sbom_snapshot.py` with FK to managed_repos. + +### Task T14: Update SBOMEntry model + +```task +id: T14 +status: done +priority: critical +assignee: custodian +``` + +Add `snapshot_id` FK to `sbom_snapshots`. Update `repo` relationship to go +via snapshot. + +### Task T15: Update SBOM router and ingest API + +```task +id: T15 +status: done +priority: high +assignee: custodian +``` + +- Ingest creates/finds a snapshot record, then creates entries under it +- New endpoints: `GET /sbom/snapshots/`, `GET /sbom/snapshots/{id}/` +- Existing `GET /sbom/` still returns flat entries for backward compatibility + +### Task T16: Update MCP ingest tool and SBOM resources + +```task +id: T16 +status: done +priority: high +assignee: custodian +``` + +`ingest_sbom_tool` returns `snapshot_id` in result. New MCP resource: +`state://sbom/snapshots/{repo_slug}`. + +### Task T17: Update SBOM dashboard + +```task +id: T17 +status: done +priority: medium +assignee: custodian +``` + +`sbom.md` "By Repo" section adds a snapshot history row showing ingest dates +with package count delta from previous snapshot.