feat(gems): three-pass schema migration aligning state-hub with GEMS

Implements CUST-WP-0007. Resolves inconsistencies I-1, I-2, I-5, I-6
identified in the GEMS audit (GenericEntityModellingSystem.md).

Pass 1 (e1f2a3b4c5d6): domain_id FK on extension_points and
technical_debt (replaces raw string column); repo_id FK on contributions.
Fixes domain-filtering bugs in EP/TD dashboard pages.

Pass 2 (f2a3b4c5d6e7): repo_id nullable FK on workstreams, aligning
the GEMS primary attachment with ADR-001 (repo > topic). Dashboard
pages updated to prefer repo->domain over topic->domain.

Pass 3 (a3b4c5d6e7f8): SBOMSnapshot container entity (GEMS Complex
between Repository and SBOMEntry). Ingest is now additive — each call
creates a new snapshot; history is retained. List/report endpoints
filter to latest snapshot per repo via _latest_snapshot_ids_subquery().
New endpoints: GET /sbom/snapshots/, GET /sbom/snapshots/{id}/.
Dashboard gains a Snapshot History section.

Also adds GEMS analysis artefacts: wiki/GEMS-StateHub-TypeRegistry.md,
wiki/GEMS-StateHub-SWOT.md, workplans/CUST-WP-0006 (analysis),
workplans/CUST-WP-0007 (migration, now completed).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-02 23:39:17 +01:00
parent 62fbe884e3
commit fc87e26b4b
30 changed files with 675 additions and 84 deletions

View File

@@ -0,0 +1,93 @@
"""GEMS Pass 3: add sbom_snapshots container entity
Revision ID: a3b4c5d6e7f8
Revises: f2a3b4c5d6e7
Create Date: 2026-03-02 00:00:00.000000
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql
revision: str = "a3b4c5d6e7f8"
down_revision: Union[str, None] = "f2a3b4c5d6e7"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ── Create sbom_snapshots table ────────────────────────────────────────────
op.create_table(
"sbom_snapshots",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column(
"repo_id",
postgresql.UUID(as_uuid=True),
sa.ForeignKey("managed_repos.id", ondelete="RESTRICT"),
nullable=False,
index=True,
),
sa.Column("snapshot_at", sa.DateTime(timezone=True), nullable=False),
sa.Column("source", sa.String(200), nullable=True),
sa.Column("entry_count", sa.Integer, nullable=False, server_default="0"),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
)
# ── Add snapshot_id FK to sbom_entries (nullable during backfill) ──────────
op.add_column(
"sbom_entries",
sa.Column("snapshot_id", postgresql.UUID(as_uuid=True), nullable=True),
)
op.create_foreign_key(
"fk_sbom_entry_snapshot_id",
"sbom_entries", "sbom_snapshots",
["snapshot_id"], ["id"],
ondelete="RESTRICT",
)
# ── Backfill: create one snapshot per (repo_id, snapshot_at) group ─────────
op.execute("""
INSERT INTO sbom_snapshots (id, repo_id, snapshot_at, source, entry_count, created_at)
SELECT
gen_random_uuid(),
repo_id,
snapshot_at,
'backfill' AS source,
COUNT(*) AS entry_count,
MIN(created_at) AS created_at
FROM sbom_entries
GROUP BY repo_id, snapshot_at
""")
# ── Assign snapshot_id to each entry ───────────────────────────────────────
op.execute("""
UPDATE sbom_entries e
SET snapshot_id = s.id
FROM sbom_snapshots s
WHERE s.repo_id = e.repo_id
AND s.snapshot_at = e.snapshot_at
""")
# ── Make snapshot_id NOT NULL ──────────────────────────────────────────────
op.execute("""
DO $$
BEGIN
IF EXISTS (SELECT 1 FROM sbom_entries WHERE snapshot_id IS NULL) THEN
RAISE EXCEPTION 'GEMS Pass 3: sbom_entries rows with no snapshot assigned';
END IF;
END $$;
""")
op.alter_column("sbom_entries", "snapshot_id", nullable=False)
op.create_index("ix_sbom_entries_snapshot_id", "sbom_entries", ["snapshot_id"])
def downgrade() -> None:
op.drop_index("ix_sbom_entries_snapshot_id", table_name="sbom_entries")
op.drop_constraint("fk_sbom_entry_snapshot_id", "sbom_entries", type_="foreignkey")
op.drop_column("sbom_entries", "snapshot_id")
op.drop_table("sbom_snapshots")

View File

@@ -0,0 +1,142 @@
"""GEMS Pass 1: domain_id FK on extension_points/technical_debt, repo_id on contributions
Revision ID: e1f2a3b4c5d6
Revises: d3e4f5a6b7c8
Create Date: 2026-03-02 00:00:00.000000
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql
revision: str = "e1f2a3b4c5d6"
down_revision: Union[str, None] = "d3e4f5a6b7c8"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ── extension_points: add domain_id FK ────────────────────────────────────
op.add_column(
"extension_points",
sa.Column("domain_id", postgresql.UUID(as_uuid=True), nullable=True),
)
op.create_foreign_key(
"fk_ep_domain_id",
"extension_points", "domains",
["domain_id"], ["id"],
ondelete="RESTRICT",
)
# Backfill from slug string
op.execute("""
UPDATE extension_points ep
SET domain_id = d.id
FROM domains d
WHERE d.slug = ep.domain
""")
# Safety check: abort if any rows remain unmatched
op.execute("""
DO $$
BEGIN
IF EXISTS (SELECT 1 FROM extension_points WHERE domain_id IS NULL) THEN
RAISE EXCEPTION
'GEMS Pass 1: extension_points rows with unknown domain slug: %',
(SELECT string_agg(DISTINCT domain, ', ')
FROM extension_points WHERE domain_id IS NULL);
END IF;
END $$;
""")
op.alter_column("extension_points", "domain_id", nullable=False)
op.drop_index("ix_extension_points_domain", table_name="extension_points")
op.drop_column("extension_points", "domain")
op.create_index("ix_extension_points_domain_id", "extension_points", ["domain_id"])
# ── technical_debt: add domain_id FK ──────────────────────────────────────
op.add_column(
"technical_debt",
sa.Column("domain_id", postgresql.UUID(as_uuid=True), nullable=True),
)
op.create_foreign_key(
"fk_td_domain_id",
"technical_debt", "domains",
["domain_id"], ["id"],
ondelete="RESTRICT",
)
op.execute("""
UPDATE technical_debt td
SET domain_id = d.id
FROM domains d
WHERE d.slug = td.domain
""")
op.execute("""
DO $$
BEGIN
IF EXISTS (SELECT 1 FROM technical_debt WHERE domain_id IS NULL) THEN
RAISE EXCEPTION
'GEMS Pass 1: technical_debt rows with unknown domain slug: %',
(SELECT string_agg(DISTINCT domain, ', ')
FROM technical_debt WHERE domain_id IS NULL);
END IF;
END $$;
""")
op.alter_column("technical_debt", "domain_id", nullable=False)
op.drop_index("ix_technical_debt_domain", table_name="technical_debt")
op.drop_column("technical_debt", "domain")
op.create_index("ix_technical_debt_domain_id", "technical_debt", ["domain_id"])
# ── contributions: add nullable repo_id FK ────────────────────────────────
op.add_column(
"contributions",
sa.Column(
"repo_id",
postgresql.UUID(as_uuid=True),
sa.ForeignKey("managed_repos.id", ondelete="SET NULL"),
nullable=True,
),
)
def downgrade() -> None:
# contributions: drop repo_id
op.drop_column("contributions", "repo_id")
# technical_debt: restore domain string
op.add_column(
"technical_debt",
sa.Column("domain", sa.String(50), nullable=True),
)
op.execute("""
UPDATE technical_debt td
SET domain = d.slug
FROM domains d
WHERE d.id = td.domain_id
""")
op.alter_column("technical_debt", "domain", nullable=False)
op.create_index("ix_technical_debt_domain", "technical_debt", ["domain"])
op.drop_index("ix_technical_debt_domain_id", table_name="technical_debt")
op.drop_constraint("fk_td_domain_id", "technical_debt", type_="foreignkey")
op.drop_column("technical_debt", "domain_id")
# extension_points: restore domain string
op.add_column(
"extension_points",
sa.Column("domain", sa.String(50), nullable=True),
)
op.execute("""
UPDATE extension_points ep
SET domain = d.slug
FROM domains d
WHERE d.id = ep.domain_id
""")
op.alter_column("extension_points", "domain", nullable=False)
op.create_index("ix_extension_points_domain", "extension_points", ["domain"])
op.drop_index("ix_extension_points_domain_id", table_name="extension_points")
op.drop_constraint("fk_ep_domain_id", "extension_points", type_="foreignkey")
op.drop_column("extension_points", "domain_id")

View File

@@ -0,0 +1,54 @@
"""GEMS Pass 2: add repo_id FK to workstreams (ADR-001 alignment)
Revision ID: f2a3b4c5d6e7
Revises: e1f2a3b4c5d6
Create Date: 2026-03-02 00:00:00.000000
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql
revision: str = "f2a3b4c5d6e7"
down_revision: Union[str, None] = "e1f2a3b4c5d6"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.add_column(
"workstreams",
sa.Column("repo_id", postgresql.UUID(as_uuid=True), nullable=True),
)
op.create_foreign_key(
"fk_workstream_repo_id",
"workstreams", "managed_repos",
["repo_id"], ["id"],
ondelete="SET NULL",
)
op.create_index("ix_workstreams_repo_id", "workstreams", ["repo_id"])
# Best-effort backfill: topic → domain → first repo (by created_at)
# Records with no repo in their domain remain NULL (requires manual resolution)
op.execute("""
UPDATE workstreams ws
SET repo_id = sub.repo_id
FROM (
SELECT DISTINCT ON (ws.id)
ws.id AS ws_id,
mr.id AS repo_id
FROM workstreams ws
JOIN topics t ON t.id = ws.topic_id
JOIN managed_repos mr ON mr.domain_id = t.domain_id
WHERE mr.status = 'active'
ORDER BY ws.id, mr.created_at
) sub
WHERE ws.id = sub.ws_id
""")
def downgrade() -> None:
op.drop_index("ix_workstreams_repo_id", table_name="workstreams")
op.drop_constraint("fk_workstream_repo_id", "workstreams", type_="foreignkey")
op.drop_column("workstreams", "repo_id")