Files
state-hub/api/routers/tpsc.py
tegwick 5eeeeeb6c4 feat(doi): Repository DoI automated gate and dashboard integration (CUST-WP-0024)
Implements the 14-criterion DoI checklist as a runnable gate with API,
MCP tools, CLI script, and dashboard integration.

Core components:
- api/doi_engine.py — async engine evaluating all 14 criteria (asyncio.to_thread
  for non-blocking HTTP self-calls), shared by API and CLI
- api/schemas/doi.py — DoICriterion, DoIReport, DoISummaryEntry schemas
- api/routers/repos.py — GET /repos/{slug}/doi + GET /repos/doi/summary
- scripts/check_doi.py — CLI: make check-doi REPO=<slug> / check-doi-all
- mcp_server/server.py — check_repo_doi(), get_doi_summary() tools

Dashboard (repos.md):
- DoI tier badge per repo (None/Core/Standard/Full) colour-coded red→green
- Domain block shows lowest DoI tier across its repos
- DoI KPI card in summary row
- DoI filter in All Repos Table
- Link to Repository DoI policy page

Also fixes: TPSC snapshots 500 error (missing nested selectinload for
catalog_entry relationship in list_snapshots endpoint).

Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
2026-03-20 01:08:18 +01:00

241 lines
9.0 KiB
Python

from datetime import datetime, timezone
from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy import select, func
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from api.database import get_session
from api.models.managed_repo import ManagedRepo
from api.models.tpsc import TPSCCatalog, TPSCSnapshot, TPSCEntry
from api.schemas.tpsc import (
TPSCCatalogCreate, TPSCCatalogRead,
TPSCEntryRead, TPSCIngestRequest, TPSCSnapshotRead,
TPSCGDPRReport, TPSCGDPRWarning, GDPR_WARNING_LEVELS,
)
router = APIRouter(prefix="/tpsc", tags=["tpsc"])
# ---------------------------------------------------------------------------
# Catalog
# ---------------------------------------------------------------------------
@router.get("/catalog/", response_model=list[TPSCCatalogRead])
async def list_catalog(
gdpr_maturity: str | None = None,
category: str | None = None,
pricing_model: str | None = None,
session: AsyncSession = Depends(get_session),
):
q = select(TPSCCatalog).where(TPSCCatalog.status != "deprecated")
if gdpr_maturity:
q = q.where(TPSCCatalog.gdpr_maturity == gdpr_maturity)
if category:
q = q.where(TPSCCatalog.category == category)
if pricing_model:
q = q.where(TPSCCatalog.pricing_model == pricing_model)
q = q.order_by(TPSCCatalog.name)
rows = (await session.execute(q)).scalars().all()
return rows
@router.get("/catalog/{slug}", response_model=TPSCCatalogRead)
async def get_catalog_entry(slug: str, session: AsyncSession = Depends(get_session)):
row = (await session.execute(select(TPSCCatalog).where(TPSCCatalog.slug == slug))).scalar_one_or_none()
if not row:
raise HTTPException(404, f"Service '{slug}' not found in catalog")
return row
@router.post("/catalog/", response_model=TPSCCatalogRead, status_code=201)
async def register_service(body: TPSCCatalogCreate, session: AsyncSession = Depends(get_session)):
"""Register a new service or upsert an existing one by slug."""
existing = (await session.execute(select(TPSCCatalog).where(TPSCCatalog.slug == body.slug))).scalar_one_or_none()
if existing:
for k, v in body.model_dump(exclude_unset=True).items():
setattr(existing, k, v)
existing.updated_at = datetime.now(tz=timezone.utc)
await session.commit()
await session.refresh(existing)
return existing
entry = TPSCCatalog(**body.model_dump())
session.add(entry)
await session.commit()
await session.refresh(entry)
return entry
# ---------------------------------------------------------------------------
# Ingest
# ---------------------------------------------------------------------------
@router.post("/ingest/", response_model=TPSCSnapshotRead, status_code=201)
async def ingest_tpsc(body: TPSCIngestRequest, session: AsyncSession = Depends(get_session)):
"""Accept a tpsc.yaml snapshot for a repo."""
# Resolve repo_id
repo = (await session.execute(select(ManagedRepo).where(ManagedRepo.slug == body.repo_slug))).scalar_one_or_none()
repo_id = repo.id if repo else None
# Build catalog lookup by slug
slugs = {e.service_slug for e in body.entries}
catalog_rows = (await session.execute(select(TPSCCatalog).where(TPSCCatalog.slug.in_(slugs)))).scalars().all()
catalog_map = {r.slug: r for r in catalog_rows}
snapshot = TPSCSnapshot(
repo_id=repo_id,
source_file=body.source_file,
entry_count=len(body.entries),
)
session.add(snapshot)
await session.flush()
entries_with_cats = []
for e in body.entries:
cat = catalog_map.get(e.service_slug)
entry = TPSCEntry(
snapshot_id=snapshot.id,
catalog_id=cat.id if cat else None,
service_slug=e.service_slug,
purpose=e.purpose,
auth_type=e.auth_type,
endpoint_override=e.endpoint_override,
notes=e.notes,
)
session.add(entry)
entries_with_cats.append((entry, cat))
await session.flush() # assign UUIDs to all entries
await session.commit()
await session.refresh(snapshot)
entry_reads = [
TPSCEntryRead(
id=entry.id,
snapshot_id=snapshot.id,
catalog_id=cat.id if cat else None,
service_slug=entry.service_slug,
purpose=entry.purpose,
auth_type=entry.auth_type,
endpoint_override=entry.endpoint_override,
notes=entry.notes,
gdpr_maturity=cat.gdpr_maturity if cat else None,
gdpr_warning=(cat.gdpr_maturity in GDPR_WARNING_LEVELS) if cat else True,
pricing_model=cat.pricing_model if cat else None,
)
for entry, cat in entries_with_cats
]
return TPSCSnapshotRead(
id=snapshot.id,
repo_id=snapshot.repo_id,
snapshot_at=snapshot.snapshot_at,
source_file=snapshot.source_file,
entry_count=snapshot.entry_count,
entries=entry_reads,
)
# ---------------------------------------------------------------------------
# Snapshots
# ---------------------------------------------------------------------------
@router.get("/snapshots/", response_model=list[TPSCSnapshotRead])
async def list_snapshots(
repo_slug: str | None = None,
session: AsyncSession = Depends(get_session),
):
q = select(TPSCSnapshot).options(
selectinload(TPSCSnapshot.entries).selectinload(TPSCEntry.catalog_entry)
)
if repo_slug:
repo = (await session.execute(select(ManagedRepo).where(ManagedRepo.slug == repo_slug))).scalar_one_or_none()
if not repo:
raise HTTPException(404, f"Repo '{repo_slug}' not found")
q = q.where(TPSCSnapshot.repo_id == repo.id)
q = q.order_by(TPSCSnapshot.snapshot_at.desc())
rows = (await session.execute(q)).scalars().all()
result = []
for snap in rows:
entry_reads = []
for e in snap.entries:
cat = e.catalog_entry
entry_reads.append(TPSCEntryRead(
id=e.id,
snapshot_id=e.snapshot_id,
catalog_id=e.catalog_id,
service_slug=e.service_slug,
purpose=e.purpose,
auth_type=e.auth_type,
endpoint_override=e.endpoint_override,
notes=e.notes,
gdpr_maturity=cat.gdpr_maturity if cat else None,
gdpr_warning=(cat.gdpr_maturity in GDPR_WARNING_LEVELS) if cat else True,
pricing_model=cat.pricing_model if cat else None,
))
result.append(TPSCSnapshotRead(
id=snap.id,
repo_id=snap.repo_id,
snapshot_at=snap.snapshot_at,
source_file=snap.source_file,
entry_count=snap.entry_count,
entries=entry_reads,
))
return result
# ---------------------------------------------------------------------------
# GDPR report
# ---------------------------------------------------------------------------
@router.get("/report/gdpr", response_model=TPSCGDPRReport)
async def gdpr_report(session: AsyncSession = Depends(get_session)):
"""Aggregated GDPR warnings across all latest repo snapshots."""
# Latest snapshot per repo
latest_sub = (
select(TPSCSnapshot.repo_id, func.max(TPSCSnapshot.snapshot_at).label("max_at"))
.group_by(TPSCSnapshot.repo_id)
.subquery()
)
latest_snaps = (await session.execute(
select(TPSCSnapshot)
.join(latest_sub, (TPSCSnapshot.repo_id == latest_sub.c.repo_id) & (TPSCSnapshot.snapshot_at == latest_sub.c.max_at))
.options(selectinload(TPSCSnapshot.entries).selectinload(TPSCEntry.catalog_entry))
)).scalars().all()
# Repo slug lookup
all_repos = (await session.execute(select(ManagedRepo))).scalars().all()
repo_map = {r.id: r.slug for r in all_repos}
all_services = (await session.execute(select(TPSCCatalog))).scalars().all()
by_maturity: dict[str, int] = {}
for s in all_services:
by_maturity[s.gdpr_maturity] = by_maturity.get(s.gdpr_maturity, 0) + 1
warnings = []
seen = set()
for snap in latest_snaps:
repo_slug = repo_map.get(snap.repo_id) if snap.repo_id else None
for entry in snap.entries:
cat = entry.catalog_entry
maturity = cat.gdpr_maturity if cat else "unknown"
if maturity in GDPR_WARNING_LEVELS:
key = (repo_slug, entry.service_slug)
if key not in seen:
seen.add(key)
warnings.append(TPSCGDPRWarning(
repo_slug=repo_slug,
service_slug=entry.service_slug,
gdpr_maturity=maturity,
purpose=entry.purpose,
pricing_model=cat.pricing_model if cat else None,
))
return TPSCGDPRReport(
generated_at=datetime.now(tz=timezone.utc),
total_services=len(all_services),
warning_count=len(warnings),
warnings=warnings,
by_maturity=by_maturity,
)