Files
state-hub/api/routers/tpsc.py
tegwick 60beb1ff35 feat(tpsc): Third-Party Services Catalog (CUST-WP-0023)
Introduces TPSC for tracking external service dependencies with GDPR
compliance maturity (CNIL/IAPP CMMI scale), pricing model, ToS, and
data retention information across all repos.

Primary data:
- canon/tpsc/{openai,anthropic,gemini,openrouter}-api.yaml — service definitions
- tpsc.yaml in each repo (llm-connect seeded with 4 services)

State-hub additions:
- Migration j7e8f9a0b1c2: tpsc_catalog + tpsc_snapshots + tpsc_entries
- api/models/tpsc.py, api/schemas/tpsc.py, api/routers/tpsc.py
- /tpsc/catalog/, /tpsc/ingest/, /tpsc/snapshots/, /tpsc/report/gdpr endpoints
- 4 MCP tools: register_service, list_services, ingest_tpsc_tool, get_gdpr_report
- scripts/ingest_tpsc.py + make ingest-tpsc[/-all] targets
- Dashboard: tpsc.md page + docs/tpsc.md

GDPR maturity scale: unknown | non_compliant | initial | developing | defined | managed | certified
Warnings triggered at: unknown, non_compliant, initial

Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
2026-03-20 00:15:26 +01:00

239 lines
9.0 KiB
Python

from datetime import datetime, timezone
from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy import select, func
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from api.database import get_session
from api.models.managed_repo import ManagedRepo
from api.models.tpsc import TPSCCatalog, TPSCSnapshot, TPSCEntry
from api.schemas.tpsc import (
TPSCCatalogCreate, TPSCCatalogRead,
TPSCEntryRead, TPSCIngestRequest, TPSCSnapshotRead,
TPSCGDPRReport, TPSCGDPRWarning, GDPR_WARNING_LEVELS,
)
router = APIRouter(prefix="/tpsc", tags=["tpsc"])
# ---------------------------------------------------------------------------
# Catalog
# ---------------------------------------------------------------------------
@router.get("/catalog/", response_model=list[TPSCCatalogRead])
async def list_catalog(
gdpr_maturity: str | None = None,
category: str | None = None,
pricing_model: str | None = None,
session: AsyncSession = Depends(get_session),
):
q = select(TPSCCatalog).where(TPSCCatalog.status != "deprecated")
if gdpr_maturity:
q = q.where(TPSCCatalog.gdpr_maturity == gdpr_maturity)
if category:
q = q.where(TPSCCatalog.category == category)
if pricing_model:
q = q.where(TPSCCatalog.pricing_model == pricing_model)
q = q.order_by(TPSCCatalog.name)
rows = (await session.execute(q)).scalars().all()
return rows
@router.get("/catalog/{slug}", response_model=TPSCCatalogRead)
async def get_catalog_entry(slug: str, session: AsyncSession = Depends(get_session)):
row = (await session.execute(select(TPSCCatalog).where(TPSCCatalog.slug == slug))).scalar_one_or_none()
if not row:
raise HTTPException(404, f"Service '{slug}' not found in catalog")
return row
@router.post("/catalog/", response_model=TPSCCatalogRead, status_code=201)
async def register_service(body: TPSCCatalogCreate, session: AsyncSession = Depends(get_session)):
"""Register a new service or upsert an existing one by slug."""
existing = (await session.execute(select(TPSCCatalog).where(TPSCCatalog.slug == body.slug))).scalar_one_or_none()
if existing:
for k, v in body.model_dump(exclude_unset=True).items():
setattr(existing, k, v)
existing.updated_at = datetime.now(tz=timezone.utc)
await session.commit()
await session.refresh(existing)
return existing
entry = TPSCCatalog(**body.model_dump())
session.add(entry)
await session.commit()
await session.refresh(entry)
return entry
# ---------------------------------------------------------------------------
# Ingest
# ---------------------------------------------------------------------------
@router.post("/ingest/", response_model=TPSCSnapshotRead, status_code=201)
async def ingest_tpsc(body: TPSCIngestRequest, session: AsyncSession = Depends(get_session)):
"""Accept a tpsc.yaml snapshot for a repo."""
# Resolve repo_id
repo = (await session.execute(select(ManagedRepo).where(ManagedRepo.slug == body.repo_slug))).scalar_one_or_none()
repo_id = repo.id if repo else None
# Build catalog lookup by slug
slugs = {e.service_slug for e in body.entries}
catalog_rows = (await session.execute(select(TPSCCatalog).where(TPSCCatalog.slug.in_(slugs)))).scalars().all()
catalog_map = {r.slug: r for r in catalog_rows}
snapshot = TPSCSnapshot(
repo_id=repo_id,
source_file=body.source_file,
entry_count=len(body.entries),
)
session.add(snapshot)
await session.flush()
entries_with_cats = []
for e in body.entries:
cat = catalog_map.get(e.service_slug)
entry = TPSCEntry(
snapshot_id=snapshot.id,
catalog_id=cat.id if cat else None,
service_slug=e.service_slug,
purpose=e.purpose,
auth_type=e.auth_type,
endpoint_override=e.endpoint_override,
notes=e.notes,
)
session.add(entry)
entries_with_cats.append((entry, cat))
await session.flush() # assign UUIDs to all entries
await session.commit()
await session.refresh(snapshot)
entry_reads = [
TPSCEntryRead(
id=entry.id,
snapshot_id=snapshot.id,
catalog_id=cat.id if cat else None,
service_slug=entry.service_slug,
purpose=entry.purpose,
auth_type=entry.auth_type,
endpoint_override=entry.endpoint_override,
notes=entry.notes,
gdpr_maturity=cat.gdpr_maturity if cat else None,
gdpr_warning=(cat.gdpr_maturity in GDPR_WARNING_LEVELS) if cat else True,
pricing_model=cat.pricing_model if cat else None,
)
for entry, cat in entries_with_cats
]
return TPSCSnapshotRead(
id=snapshot.id,
repo_id=snapshot.repo_id,
snapshot_at=snapshot.snapshot_at,
source_file=snapshot.source_file,
entry_count=snapshot.entry_count,
entries=entry_reads,
)
# ---------------------------------------------------------------------------
# Snapshots
# ---------------------------------------------------------------------------
@router.get("/snapshots/", response_model=list[TPSCSnapshotRead])
async def list_snapshots(
repo_slug: str | None = None,
session: AsyncSession = Depends(get_session),
):
q = select(TPSCSnapshot).options(selectinload(TPSCSnapshot.entries))
if repo_slug:
repo = (await session.execute(select(ManagedRepo).where(ManagedRepo.slug == repo_slug))).scalar_one_or_none()
if not repo:
raise HTTPException(404, f"Repo '{repo_slug}' not found")
q = q.where(TPSCSnapshot.repo_id == repo.id)
q = q.order_by(TPSCSnapshot.snapshot_at.desc())
rows = (await session.execute(q)).scalars().all()
result = []
for snap in rows:
entry_reads = []
for e in snap.entries:
cat = e.catalog_entry
entry_reads.append(TPSCEntryRead(
id=e.id,
snapshot_id=e.snapshot_id,
catalog_id=e.catalog_id,
service_slug=e.service_slug,
purpose=e.purpose,
auth_type=e.auth_type,
endpoint_override=e.endpoint_override,
notes=e.notes,
gdpr_maturity=cat.gdpr_maturity if cat else None,
gdpr_warning=(cat.gdpr_maturity in GDPR_WARNING_LEVELS) if cat else True,
pricing_model=cat.pricing_model if cat else None,
))
result.append(TPSCSnapshotRead(
id=snap.id,
repo_id=snap.repo_id,
snapshot_at=snap.snapshot_at,
source_file=snap.source_file,
entry_count=snap.entry_count,
entries=entry_reads,
))
return result
# ---------------------------------------------------------------------------
# GDPR report
# ---------------------------------------------------------------------------
@router.get("/report/gdpr", response_model=TPSCGDPRReport)
async def gdpr_report(session: AsyncSession = Depends(get_session)):
"""Aggregated GDPR warnings across all latest repo snapshots."""
# Latest snapshot per repo
latest_sub = (
select(TPSCSnapshot.repo_id, func.max(TPSCSnapshot.snapshot_at).label("max_at"))
.group_by(TPSCSnapshot.repo_id)
.subquery()
)
latest_snaps = (await session.execute(
select(TPSCSnapshot)
.join(latest_sub, (TPSCSnapshot.repo_id == latest_sub.c.repo_id) & (TPSCSnapshot.snapshot_at == latest_sub.c.max_at))
.options(selectinload(TPSCSnapshot.entries).selectinload(TPSCEntry.catalog_entry))
)).scalars().all()
# Repo slug lookup
all_repos = (await session.execute(select(ManagedRepo))).scalars().all()
repo_map = {r.id: r.slug for r in all_repos}
all_services = (await session.execute(select(TPSCCatalog))).scalars().all()
by_maturity: dict[str, int] = {}
for s in all_services:
by_maturity[s.gdpr_maturity] = by_maturity.get(s.gdpr_maturity, 0) + 1
warnings = []
seen = set()
for snap in latest_snaps:
repo_slug = repo_map.get(snap.repo_id) if snap.repo_id else None
for entry in snap.entries:
cat = entry.catalog_entry
maturity = cat.gdpr_maturity if cat else "unknown"
if maturity in GDPR_WARNING_LEVELS:
key = (repo_slug, entry.service_slug)
if key not in seen:
seen.add(key)
warnings.append(TPSCGDPRWarning(
repo_slug=repo_slug,
service_slug=entry.service_slug,
gdpr_maturity=maturity,
purpose=entry.purpose,
pricing_model=cat.pricing_model if cat else None,
))
return TPSCGDPRReport(
generated_at=datetime.now(tz=timezone.utc),
total_services=len(all_services),
warning_count=len(warnings),
warnings=warnings,
by_maturity=by_maturity,
)