Files
hub-core/hub_core/routers/tpsc.py
tegwick 986ac4d40b Add hub-core package, docs, and State Hub integration scaffold
Extract the first reusable slice (models, schemas, routers, MCP, migrations)
from state-hub with INTENT/SCOPE, agent instructions, workplan, and aligned
inter_hub capability registry index.
2026-06-16 02:39:36 +02:00

241 lines
9.2 KiB
Python

from collections.abc import Callable
from datetime import datetime, timezone
from typing import Any
from fastapi import APIRouter, Depends, HTTPException, status
from sqlalchemy import func, select
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from hub_core.models.managed_repo import ManagedRepo
from hub_core.models.tpsc import TPSCCatalog, TPSCEntry, TPSCSnapshot
from hub_core.schemas.tpsc import (
GDPR_WARNING_LEVELS,
TPSCCatalogCreate,
TPSCCatalogRead,
TPSCEntryRead,
TPSCGDPRReport,
TPSCGDPRWarning,
TPSCIngestRequest,
TPSCSnapshotRead,
)
def create_tpsc_router(
get_session: Callable[..., AsyncSession],
*,
repo_model: type[ManagedRepo] = ManagedRepo,
catalog_model: type[TPSCCatalog] = TPSCCatalog,
snapshot_model: type[TPSCSnapshot] = TPSCSnapshot,
entry_model: type[TPSCEntry] = TPSCEntry,
) -> APIRouter:
router = APIRouter(prefix="/tpsc", tags=["tpsc"])
@router.get("/catalog/", response_model=list[TPSCCatalogRead])
async def list_catalog(
gdpr_maturity: str | None = None,
category: str | None = None,
pricing_model: str | None = None,
session: AsyncSession = Depends(get_session),
) -> list[Any]:
q = select(catalog_model).where(catalog_model.status != "deprecated")
if gdpr_maturity:
q = q.where(catalog_model.gdpr_maturity == gdpr_maturity)
if category:
q = q.where(catalog_model.category == category)
if pricing_model:
q = q.where(catalog_model.pricing_model == pricing_model)
q = q.order_by(catalog_model.name)
result = await session.execute(q)
return list(result.scalars().all())
@router.get("/catalog/{slug}", response_model=TPSCCatalogRead)
async def get_catalog_entry(
slug: str,
session: AsyncSession = Depends(get_session),
) -> Any:
row = (
await session.execute(select(catalog_model).where(catalog_model.slug == slug))
).scalar_one_or_none()
if row is None:
raise HTTPException(status_code=404, detail=f"Service '{slug}' not found in catalog")
return row
@router.post("/catalog/", response_model=TPSCCatalogRead, status_code=status.HTTP_201_CREATED)
async def register_service(
body: TPSCCatalogCreate,
session: AsyncSession = Depends(get_session),
) -> Any:
existing = (
await session.execute(select(catalog_model).where(catalog_model.slug == body.slug))
).scalar_one_or_none()
if existing:
for field, value in body.model_dump(exclude_unset=True).items():
setattr(existing, field, value)
existing.updated_at = datetime.now(tz=timezone.utc)
await session.commit()
await session.refresh(existing)
return existing
entry = catalog_model(**body.model_dump())
session.add(entry)
await session.commit()
await session.refresh(entry)
return entry
@router.post("/ingest/", response_model=TPSCSnapshotRead, status_code=status.HTTP_201_CREATED)
async def ingest_tpsc(
body: TPSCIngestRequest,
session: AsyncSession = Depends(get_session),
) -> TPSCSnapshotRead:
repo = (
await session.execute(select(repo_model).where(repo_model.slug == body.repo_slug))
).scalar_one_or_none()
repo_id = repo.id if repo else None
slugs = {entry.service_slug for entry in body.entries}
catalog_rows = []
if slugs:
catalog_rows = (
await session.execute(select(catalog_model).where(catalog_model.slug.in_(slugs)))
).scalars().all()
catalog_map = {row.slug: row for row in catalog_rows}
snapshot = snapshot_model(
repo_id=repo_id,
source_file=body.source_file,
entry_count=len(body.entries),
)
session.add(snapshot)
await session.flush()
entries_with_catalogs = []
for body_entry in body.entries:
catalog_entry = catalog_map.get(body_entry.service_slug)
entry = entry_model(
snapshot_id=snapshot.id,
catalog_id=catalog_entry.id if catalog_entry else None,
**body_entry.model_dump(),
)
session.add(entry)
entries_with_catalogs.append((entry, catalog_entry))
await session.flush()
await session.commit()
await session.refresh(snapshot)
return TPSCSnapshotRead(
id=snapshot.id,
repo_id=snapshot.repo_id,
snapshot_at=snapshot.snapshot_at,
source_file=snapshot.source_file,
entry_count=snapshot.entry_count,
entries=[
_entry_read(entry, catalog_entry)
for entry, catalog_entry in entries_with_catalogs
],
)
@router.get("/snapshots/", response_model=list[TPSCSnapshotRead])
async def list_snapshots(
repo_slug: str | None = None,
session: AsyncSession = Depends(get_session),
) -> list[TPSCSnapshotRead]:
q = select(snapshot_model).options(
selectinload(snapshot_model.entries).selectinload(entry_model.catalog_entry)
)
if repo_slug:
repo = (
await session.execute(select(repo_model).where(repo_model.slug == repo_slug))
).scalar_one_or_none()
if repo is None:
raise HTTPException(status_code=404, detail=f"Repo '{repo_slug}' not found")
q = q.where(snapshot_model.repo_id == repo.id)
q = q.order_by(snapshot_model.snapshot_at.desc())
rows = (await session.execute(q)).scalars().all()
return [_snapshot_read(row) for row in rows]
@router.get("/report/gdpr", response_model=TPSCGDPRReport)
async def gdpr_report(
session: AsyncSession = Depends(get_session),
) -> TPSCGDPRReport:
latest_sub = (
select(snapshot_model.repo_id, func.max(snapshot_model.snapshot_at).label("max_at"))
.group_by(snapshot_model.repo_id)
.subquery()
)
latest_snaps = (
await session.execute(
select(snapshot_model)
.join(
latest_sub,
(snapshot_model.repo_id == latest_sub.c.repo_id)
& (snapshot_model.snapshot_at == latest_sub.c.max_at),
)
.options(selectinload(snapshot_model.entries).selectinload(entry_model.catalog_entry))
)
).scalars().all()
all_repos = (await session.execute(select(repo_model))).scalars().all()
repo_map = {repo.id: repo.slug for repo in all_repos}
all_services = (await session.execute(select(catalog_model))).scalars().all()
by_maturity: dict[str, int] = {}
for service in all_services:
by_maturity[service.gdpr_maturity] = by_maturity.get(service.gdpr_maturity, 0) + 1
warnings = []
seen = set()
for snap in latest_snaps:
repo_slug = repo_map.get(snap.repo_id) if snap.repo_id else None
for entry in snap.entries:
catalog_entry = entry.catalog_entry
maturity = catalog_entry.gdpr_maturity if catalog_entry else "unknown"
if maturity not in GDPR_WARNING_LEVELS:
continue
key = (repo_slug, entry.service_slug)
if key in seen:
continue
seen.add(key)
warnings.append(
TPSCGDPRWarning(
repo_slug=repo_slug,
service_slug=entry.service_slug,
gdpr_maturity=maturity,
purpose=entry.purpose,
pricing_model=catalog_entry.pricing_model if catalog_entry else None,
)
)
return TPSCGDPRReport(
generated_at=datetime.now(tz=timezone.utc),
total_services=len(all_services),
warning_count=len(warnings),
warnings=warnings,
by_maturity=by_maturity,
)
return router
def _entry_read(entry: TPSCEntry, catalog_entry: TPSCCatalog | None) -> TPSCEntryRead:
return TPSCEntryRead(
id=entry.id,
snapshot_id=entry.snapshot_id,
catalog_id=entry.catalog_id,
service_slug=entry.service_slug,
purpose=entry.purpose,
auth_type=entry.auth_type,
endpoint_override=entry.endpoint_override,
notes=entry.notes,
gdpr_maturity=catalog_entry.gdpr_maturity if catalog_entry else None,
gdpr_warning=(catalog_entry.gdpr_maturity in GDPR_WARNING_LEVELS) if catalog_entry else True,
pricing_model=catalog_entry.pricing_model if catalog_entry else None,
)
def _snapshot_read(snapshot: TPSCSnapshot) -> TPSCSnapshotRead:
return TPSCSnapshotRead(
id=snapshot.id,
repo_id=snapshot.repo_id,
snapshot_at=snapshot.snapshot_at,
source_file=snapshot.source_file,
entry_count=snapshot.entry_count,
entries=[_entry_read(entry, entry.catalog_entry) for entry in snapshot.entries],
)