generated from coulomb/repo-seed
Implements the 14-criterion DoI checklist as a runnable gate with API,
MCP tools, CLI script, and dashboard integration.
Core components:
- api/doi_engine.py — async engine evaluating all 14 criteria (asyncio.to_thread
for non-blocking HTTP self-calls), shared by API and CLI
- api/schemas/doi.py — DoICriterion, DoIReport, DoISummaryEntry schemas
- api/routers/repos.py — GET /repos/{slug}/doi + GET /repos/doi/summary
- scripts/check_doi.py — CLI: make check-doi REPO=<slug> / check-doi-all
- mcp_server/server.py — check_repo_doi(), get_doi_summary() tools
Dashboard (repos.md):
- DoI tier badge per repo (None/Core/Standard/Full) colour-coded red→green
- Domain block shows lowest DoI tier across its repos
- DoI KPI card in summary row
- DoI filter in All Repos Table
- Link to Repository DoI policy page
Also fixes: TPSC snapshots 500 error (missing nested selectinload for
catalog_entry relationship in list_snapshots endpoint).
Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
241 lines
9.0 KiB
Python
241 lines
9.0 KiB
Python
from datetime import datetime, timezone
|
|
from fastapi import APIRouter, Depends, HTTPException
|
|
from sqlalchemy import select, func
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
from sqlalchemy.orm import selectinload
|
|
|
|
from api.database import get_session
|
|
from api.models.managed_repo import ManagedRepo
|
|
from api.models.tpsc import TPSCCatalog, TPSCSnapshot, TPSCEntry
|
|
from api.schemas.tpsc import (
|
|
TPSCCatalogCreate, TPSCCatalogRead,
|
|
TPSCEntryRead, TPSCIngestRequest, TPSCSnapshotRead,
|
|
TPSCGDPRReport, TPSCGDPRWarning, GDPR_WARNING_LEVELS,
|
|
)
|
|
|
|
router = APIRouter(prefix="/tpsc", tags=["tpsc"])
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Catalog
|
|
# ---------------------------------------------------------------------------
|
|
|
|
@router.get("/catalog/", response_model=list[TPSCCatalogRead])
|
|
async def list_catalog(
|
|
gdpr_maturity: str | None = None,
|
|
category: str | None = None,
|
|
pricing_model: str | None = None,
|
|
session: AsyncSession = Depends(get_session),
|
|
):
|
|
q = select(TPSCCatalog).where(TPSCCatalog.status != "deprecated")
|
|
if gdpr_maturity:
|
|
q = q.where(TPSCCatalog.gdpr_maturity == gdpr_maturity)
|
|
if category:
|
|
q = q.where(TPSCCatalog.category == category)
|
|
if pricing_model:
|
|
q = q.where(TPSCCatalog.pricing_model == pricing_model)
|
|
q = q.order_by(TPSCCatalog.name)
|
|
rows = (await session.execute(q)).scalars().all()
|
|
return rows
|
|
|
|
|
|
@router.get("/catalog/{slug}", response_model=TPSCCatalogRead)
|
|
async def get_catalog_entry(slug: str, session: AsyncSession = Depends(get_session)):
|
|
row = (await session.execute(select(TPSCCatalog).where(TPSCCatalog.slug == slug))).scalar_one_or_none()
|
|
if not row:
|
|
raise HTTPException(404, f"Service '{slug}' not found in catalog")
|
|
return row
|
|
|
|
|
|
@router.post("/catalog/", response_model=TPSCCatalogRead, status_code=201)
|
|
async def register_service(body: TPSCCatalogCreate, session: AsyncSession = Depends(get_session)):
|
|
"""Register a new service or upsert an existing one by slug."""
|
|
existing = (await session.execute(select(TPSCCatalog).where(TPSCCatalog.slug == body.slug))).scalar_one_or_none()
|
|
if existing:
|
|
for k, v in body.model_dump(exclude_unset=True).items():
|
|
setattr(existing, k, v)
|
|
existing.updated_at = datetime.now(tz=timezone.utc)
|
|
await session.commit()
|
|
await session.refresh(existing)
|
|
return existing
|
|
entry = TPSCCatalog(**body.model_dump())
|
|
session.add(entry)
|
|
await session.commit()
|
|
await session.refresh(entry)
|
|
return entry
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Ingest
|
|
# ---------------------------------------------------------------------------
|
|
|
|
@router.post("/ingest/", response_model=TPSCSnapshotRead, status_code=201)
|
|
async def ingest_tpsc(body: TPSCIngestRequest, session: AsyncSession = Depends(get_session)):
|
|
"""Accept a tpsc.yaml snapshot for a repo."""
|
|
# Resolve repo_id
|
|
repo = (await session.execute(select(ManagedRepo).where(ManagedRepo.slug == body.repo_slug))).scalar_one_or_none()
|
|
repo_id = repo.id if repo else None
|
|
|
|
# Build catalog lookup by slug
|
|
slugs = {e.service_slug for e in body.entries}
|
|
catalog_rows = (await session.execute(select(TPSCCatalog).where(TPSCCatalog.slug.in_(slugs)))).scalars().all()
|
|
catalog_map = {r.slug: r for r in catalog_rows}
|
|
|
|
snapshot = TPSCSnapshot(
|
|
repo_id=repo_id,
|
|
source_file=body.source_file,
|
|
entry_count=len(body.entries),
|
|
)
|
|
session.add(snapshot)
|
|
await session.flush()
|
|
|
|
entries_with_cats = []
|
|
for e in body.entries:
|
|
cat = catalog_map.get(e.service_slug)
|
|
entry = TPSCEntry(
|
|
snapshot_id=snapshot.id,
|
|
catalog_id=cat.id if cat else None,
|
|
service_slug=e.service_slug,
|
|
purpose=e.purpose,
|
|
auth_type=e.auth_type,
|
|
endpoint_override=e.endpoint_override,
|
|
notes=e.notes,
|
|
)
|
|
session.add(entry)
|
|
entries_with_cats.append((entry, cat))
|
|
|
|
await session.flush() # assign UUIDs to all entries
|
|
await session.commit()
|
|
await session.refresh(snapshot)
|
|
|
|
entry_reads = [
|
|
TPSCEntryRead(
|
|
id=entry.id,
|
|
snapshot_id=snapshot.id,
|
|
catalog_id=cat.id if cat else None,
|
|
service_slug=entry.service_slug,
|
|
purpose=entry.purpose,
|
|
auth_type=entry.auth_type,
|
|
endpoint_override=entry.endpoint_override,
|
|
notes=entry.notes,
|
|
gdpr_maturity=cat.gdpr_maturity if cat else None,
|
|
gdpr_warning=(cat.gdpr_maturity in GDPR_WARNING_LEVELS) if cat else True,
|
|
pricing_model=cat.pricing_model if cat else None,
|
|
)
|
|
for entry, cat in entries_with_cats
|
|
]
|
|
|
|
return TPSCSnapshotRead(
|
|
id=snapshot.id,
|
|
repo_id=snapshot.repo_id,
|
|
snapshot_at=snapshot.snapshot_at,
|
|
source_file=snapshot.source_file,
|
|
entry_count=snapshot.entry_count,
|
|
entries=entry_reads,
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Snapshots
|
|
# ---------------------------------------------------------------------------
|
|
|
|
@router.get("/snapshots/", response_model=list[TPSCSnapshotRead])
|
|
async def list_snapshots(
|
|
repo_slug: str | None = None,
|
|
session: AsyncSession = Depends(get_session),
|
|
):
|
|
q = select(TPSCSnapshot).options(
|
|
selectinload(TPSCSnapshot.entries).selectinload(TPSCEntry.catalog_entry)
|
|
)
|
|
if repo_slug:
|
|
repo = (await session.execute(select(ManagedRepo).where(ManagedRepo.slug == repo_slug))).scalar_one_or_none()
|
|
if not repo:
|
|
raise HTTPException(404, f"Repo '{repo_slug}' not found")
|
|
q = q.where(TPSCSnapshot.repo_id == repo.id)
|
|
q = q.order_by(TPSCSnapshot.snapshot_at.desc())
|
|
rows = (await session.execute(q)).scalars().all()
|
|
|
|
result = []
|
|
for snap in rows:
|
|
entry_reads = []
|
|
for e in snap.entries:
|
|
cat = e.catalog_entry
|
|
entry_reads.append(TPSCEntryRead(
|
|
id=e.id,
|
|
snapshot_id=e.snapshot_id,
|
|
catalog_id=e.catalog_id,
|
|
service_slug=e.service_slug,
|
|
purpose=e.purpose,
|
|
auth_type=e.auth_type,
|
|
endpoint_override=e.endpoint_override,
|
|
notes=e.notes,
|
|
gdpr_maturity=cat.gdpr_maturity if cat else None,
|
|
gdpr_warning=(cat.gdpr_maturity in GDPR_WARNING_LEVELS) if cat else True,
|
|
pricing_model=cat.pricing_model if cat else None,
|
|
))
|
|
result.append(TPSCSnapshotRead(
|
|
id=snap.id,
|
|
repo_id=snap.repo_id,
|
|
snapshot_at=snap.snapshot_at,
|
|
source_file=snap.source_file,
|
|
entry_count=snap.entry_count,
|
|
entries=entry_reads,
|
|
))
|
|
return result
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# GDPR report
|
|
# ---------------------------------------------------------------------------
|
|
|
|
@router.get("/report/gdpr", response_model=TPSCGDPRReport)
|
|
async def gdpr_report(session: AsyncSession = Depends(get_session)):
|
|
"""Aggregated GDPR warnings across all latest repo snapshots."""
|
|
# Latest snapshot per repo
|
|
latest_sub = (
|
|
select(TPSCSnapshot.repo_id, func.max(TPSCSnapshot.snapshot_at).label("max_at"))
|
|
.group_by(TPSCSnapshot.repo_id)
|
|
.subquery()
|
|
)
|
|
latest_snaps = (await session.execute(
|
|
select(TPSCSnapshot)
|
|
.join(latest_sub, (TPSCSnapshot.repo_id == latest_sub.c.repo_id) & (TPSCSnapshot.snapshot_at == latest_sub.c.max_at))
|
|
.options(selectinload(TPSCSnapshot.entries).selectinload(TPSCEntry.catalog_entry))
|
|
)).scalars().all()
|
|
|
|
# Repo slug lookup
|
|
all_repos = (await session.execute(select(ManagedRepo))).scalars().all()
|
|
repo_map = {r.id: r.slug for r in all_repos}
|
|
|
|
all_services = (await session.execute(select(TPSCCatalog))).scalars().all()
|
|
by_maturity: dict[str, int] = {}
|
|
for s in all_services:
|
|
by_maturity[s.gdpr_maturity] = by_maturity.get(s.gdpr_maturity, 0) + 1
|
|
|
|
warnings = []
|
|
seen = set()
|
|
for snap in latest_snaps:
|
|
repo_slug = repo_map.get(snap.repo_id) if snap.repo_id else None
|
|
for entry in snap.entries:
|
|
cat = entry.catalog_entry
|
|
maturity = cat.gdpr_maturity if cat else "unknown"
|
|
if maturity in GDPR_WARNING_LEVELS:
|
|
key = (repo_slug, entry.service_slug)
|
|
if key not in seen:
|
|
seen.add(key)
|
|
warnings.append(TPSCGDPRWarning(
|
|
repo_slug=repo_slug,
|
|
service_slug=entry.service_slug,
|
|
gdpr_maturity=maturity,
|
|
purpose=entry.purpose,
|
|
pricing_model=cat.pricing_model if cat else None,
|
|
))
|
|
|
|
return TPSCGDPRReport(
|
|
generated_at=datetime.now(tz=timezone.utc),
|
|
total_services=len(all_services),
|
|
warning_count=len(warnings),
|
|
warnings=warnings,
|
|
by_maturity=by_maturity,
|
|
)
|