feat(tpsc): Third-Party Services Catalog (CUST-WP-0023)

Introduces TPSC for tracking external service dependencies with GDPR
compliance maturity (CNIL/IAPP CMMI scale), pricing model, ToS, and
data retention information across all repos.

Primary data:
- canon/tpsc/{openai,anthropic,gemini,openrouter}-api.yaml — service definitions
- tpsc.yaml in each repo (llm-connect seeded with 4 services)

State-hub additions:
- Migration j7e8f9a0b1c2: tpsc_catalog + tpsc_snapshots + tpsc_entries
- api/models/tpsc.py, api/schemas/tpsc.py, api/routers/tpsc.py
- /tpsc/catalog/, /tpsc/ingest/, /tpsc/snapshots/, /tpsc/report/gdpr endpoints
- 4 MCP tools: register_service, list_services, ingest_tpsc_tool, get_gdpr_report
- scripts/ingest_tpsc.py + make ingest-tpsc[/-all] targets
- Dashboard: tpsc.md page + docs/tpsc.md

GDPR maturity scale: unknown | non_compliant | initial | developing | defined | managed | certified
Warnings triggered at: unknown, non_compliant, initial

Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-20 00:15:26 +01:00
parent 4e28cab297
commit 60beb1ff35
14 changed files with 1126 additions and 1 deletions

View File

@@ -6,7 +6,7 @@ from fastapi.middleware.cors import CORSMiddleware
from api.database import engine
from api.routers import decisions, extension_points, progress, state, tasks, technical_debt, topics, workstreams, workstream_dependencies
from api.routers import domains, repos, contributions, sbom, policy, domain_goals, repo_goals, messages, capability_requests
from api.routers import domains, repos, contributions, sbom, policy, domain_goals, repo_goals, messages, capability_requests, tpsc
@asynccontextmanager
@@ -48,6 +48,7 @@ app.include_router(contributions.router)
app.include_router(sbom.router)
app.include_router(messages.router)
app.include_router(capability_requests.router)
app.include_router(tpsc.router)
app.include_router(state.router)
app.include_router(policy.router)

View File

@@ -17,6 +17,7 @@ from api.models.sbom_entry import SBOMEntry, Ecosystem
from api.models.agent_message import AgentMessage
from api.models.capability_catalog import CapabilityCatalog
from api.models.capability_request import CapabilityRequest
from api.models.tpsc import TPSCCatalog, TPSCSnapshot, TPSCEntry
__all__ = [
"Base",
@@ -38,4 +39,5 @@ __all__ = [
"AgentMessage",
"CapabilityCatalog",
"CapabilityRequest",
"TPSCCatalog", "TPSCSnapshot", "TPSCEntry",
]

64
api/models/tpsc.py Normal file
View File

@@ -0,0 +1,64 @@
import uuid
from datetime import datetime
from sqlalchemy import Boolean, DateTime, ForeignKey, Integer, String, Text, func
from sqlalchemy.dialects.postgresql import JSON, UUID
from sqlalchemy.orm import Mapped, mapped_column, relationship
from api.models.base import Base
class TPSCCatalog(Base):
__tablename__ = "tpsc_catalog"
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
slug: Mapped[str] = mapped_column(String(100), nullable=False, unique=True, index=True)
name: Mapped[str] = mapped_column(String(200), nullable=False)
provider: Mapped[str | None] = mapped_column(String(200), nullable=True)
category: Mapped[str | None] = mapped_column(String(100), nullable=True)
website_url: Mapped[str | None] = mapped_column(Text, nullable=True)
# Pricing: free | paid | freemium | usage_based | unknown
pricing_model: Mapped[str] = mapped_column(String(20), nullable=False, server_default="unknown")
# GDPR maturity (CNIL/IAPP CMMI-aligned):
# unknown | non_compliant | initial | developing | defined | managed | certified
gdpr_maturity: Mapped[str] = mapped_column(String(20), nullable=False, server_default="unknown", index=True)
gdpr_notes: Mapped[str | None] = mapped_column(Text, nullable=True)
dpa_available: Mapped[bool] = mapped_column(Boolean, nullable=False, server_default="false")
tos_url: Mapped[str | None] = mapped_column(Text, nullable=True)
privacy_policy_url: Mapped[str | None] = mapped_column(Text, nullable=True)
data_processing_regions: Mapped[list | None] = mapped_column(JSON, nullable=True)
data_retention_notes: Mapped[str | None] = mapped_column(Text, nullable=True)
# status: active | deprecated
status: Mapped[str] = mapped_column(String(20), nullable=False, server_default="active")
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now())
entries: Mapped[list["TPSCEntry"]] = relationship("TPSCEntry", back_populates="catalog_entry")
class TPSCSnapshot(Base):
__tablename__ = "tpsc_snapshots"
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
repo_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True), ForeignKey("managed_repos.id", ondelete="SET NULL"), nullable=True, index=True)
snapshot_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
source_file: Mapped[str | None] = mapped_column(String(200), nullable=True)
entry_count: Mapped[int] = mapped_column(Integer, nullable=False, server_default="0")
entries: Mapped[list["TPSCEntry"]] = relationship("TPSCEntry", back_populates="snapshot", cascade="all, delete-orphan")
class TPSCEntry(Base):
__tablename__ = "tpsc_entries"
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
snapshot_id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), ForeignKey("tpsc_snapshots.id", ondelete="CASCADE"), nullable=False, index=True)
catalog_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True), ForeignKey("tpsc_catalog.id", ondelete="SET NULL"), nullable=True)
service_slug: Mapped[str] = mapped_column(String(100), nullable=False, index=True)
purpose: Mapped[str | None] = mapped_column(Text, nullable=True)
# auth_type: api_key | oauth | cli | none | unknown
auth_type: Mapped[str | None] = mapped_column(String(50), nullable=True)
endpoint_override: Mapped[str | None] = mapped_column(Text, nullable=True)
notes: Mapped[str | None] = mapped_column(Text, nullable=True)
snapshot: Mapped["TPSCSnapshot"] = relationship("TPSCSnapshot", back_populates="entries")
catalog_entry: Mapped["TPSCCatalog | None"] = relationship("TPSCCatalog", back_populates="entries")

238
api/routers/tpsc.py Normal file
View File

@@ -0,0 +1,238 @@
from datetime import datetime, timezone
from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy import select, func
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from api.database import get_session
from api.models.managed_repo import ManagedRepo
from api.models.tpsc import TPSCCatalog, TPSCSnapshot, TPSCEntry
from api.schemas.tpsc import (
TPSCCatalogCreate, TPSCCatalogRead,
TPSCEntryRead, TPSCIngestRequest, TPSCSnapshotRead,
TPSCGDPRReport, TPSCGDPRWarning, GDPR_WARNING_LEVELS,
)
router = APIRouter(prefix="/tpsc", tags=["tpsc"])
# ---------------------------------------------------------------------------
# Catalog
# ---------------------------------------------------------------------------
@router.get("/catalog/", response_model=list[TPSCCatalogRead])
async def list_catalog(
gdpr_maturity: str | None = None,
category: str | None = None,
pricing_model: str | None = None,
session: AsyncSession = Depends(get_session),
):
q = select(TPSCCatalog).where(TPSCCatalog.status != "deprecated")
if gdpr_maturity:
q = q.where(TPSCCatalog.gdpr_maturity == gdpr_maturity)
if category:
q = q.where(TPSCCatalog.category == category)
if pricing_model:
q = q.where(TPSCCatalog.pricing_model == pricing_model)
q = q.order_by(TPSCCatalog.name)
rows = (await session.execute(q)).scalars().all()
return rows
@router.get("/catalog/{slug}", response_model=TPSCCatalogRead)
async def get_catalog_entry(slug: str, session: AsyncSession = Depends(get_session)):
row = (await session.execute(select(TPSCCatalog).where(TPSCCatalog.slug == slug))).scalar_one_or_none()
if not row:
raise HTTPException(404, f"Service '{slug}' not found in catalog")
return row
@router.post("/catalog/", response_model=TPSCCatalogRead, status_code=201)
async def register_service(body: TPSCCatalogCreate, session: AsyncSession = Depends(get_session)):
"""Register a new service or upsert an existing one by slug."""
existing = (await session.execute(select(TPSCCatalog).where(TPSCCatalog.slug == body.slug))).scalar_one_or_none()
if existing:
for k, v in body.model_dump(exclude_unset=True).items():
setattr(existing, k, v)
existing.updated_at = datetime.now(tz=timezone.utc)
await session.commit()
await session.refresh(existing)
return existing
entry = TPSCCatalog(**body.model_dump())
session.add(entry)
await session.commit()
await session.refresh(entry)
return entry
# ---------------------------------------------------------------------------
# Ingest
# ---------------------------------------------------------------------------
@router.post("/ingest/", response_model=TPSCSnapshotRead, status_code=201)
async def ingest_tpsc(body: TPSCIngestRequest, session: AsyncSession = Depends(get_session)):
"""Accept a tpsc.yaml snapshot for a repo."""
# Resolve repo_id
repo = (await session.execute(select(ManagedRepo).where(ManagedRepo.slug == body.repo_slug))).scalar_one_or_none()
repo_id = repo.id if repo else None
# Build catalog lookup by slug
slugs = {e.service_slug for e in body.entries}
catalog_rows = (await session.execute(select(TPSCCatalog).where(TPSCCatalog.slug.in_(slugs)))).scalars().all()
catalog_map = {r.slug: r for r in catalog_rows}
snapshot = TPSCSnapshot(
repo_id=repo_id,
source_file=body.source_file,
entry_count=len(body.entries),
)
session.add(snapshot)
await session.flush()
entries_with_cats = []
for e in body.entries:
cat = catalog_map.get(e.service_slug)
entry = TPSCEntry(
snapshot_id=snapshot.id,
catalog_id=cat.id if cat else None,
service_slug=e.service_slug,
purpose=e.purpose,
auth_type=e.auth_type,
endpoint_override=e.endpoint_override,
notes=e.notes,
)
session.add(entry)
entries_with_cats.append((entry, cat))
await session.flush() # assign UUIDs to all entries
await session.commit()
await session.refresh(snapshot)
entry_reads = [
TPSCEntryRead(
id=entry.id,
snapshot_id=snapshot.id,
catalog_id=cat.id if cat else None,
service_slug=entry.service_slug,
purpose=entry.purpose,
auth_type=entry.auth_type,
endpoint_override=entry.endpoint_override,
notes=entry.notes,
gdpr_maturity=cat.gdpr_maturity if cat else None,
gdpr_warning=(cat.gdpr_maturity in GDPR_WARNING_LEVELS) if cat else True,
pricing_model=cat.pricing_model if cat else None,
)
for entry, cat in entries_with_cats
]
return TPSCSnapshotRead(
id=snapshot.id,
repo_id=snapshot.repo_id,
snapshot_at=snapshot.snapshot_at,
source_file=snapshot.source_file,
entry_count=snapshot.entry_count,
entries=entry_reads,
)
# ---------------------------------------------------------------------------
# Snapshots
# ---------------------------------------------------------------------------
@router.get("/snapshots/", response_model=list[TPSCSnapshotRead])
async def list_snapshots(
repo_slug: str | None = None,
session: AsyncSession = Depends(get_session),
):
q = select(TPSCSnapshot).options(selectinload(TPSCSnapshot.entries))
if repo_slug:
repo = (await session.execute(select(ManagedRepo).where(ManagedRepo.slug == repo_slug))).scalar_one_or_none()
if not repo:
raise HTTPException(404, f"Repo '{repo_slug}' not found")
q = q.where(TPSCSnapshot.repo_id == repo.id)
q = q.order_by(TPSCSnapshot.snapshot_at.desc())
rows = (await session.execute(q)).scalars().all()
result = []
for snap in rows:
entry_reads = []
for e in snap.entries:
cat = e.catalog_entry
entry_reads.append(TPSCEntryRead(
id=e.id,
snapshot_id=e.snapshot_id,
catalog_id=e.catalog_id,
service_slug=e.service_slug,
purpose=e.purpose,
auth_type=e.auth_type,
endpoint_override=e.endpoint_override,
notes=e.notes,
gdpr_maturity=cat.gdpr_maturity if cat else None,
gdpr_warning=(cat.gdpr_maturity in GDPR_WARNING_LEVELS) if cat else True,
pricing_model=cat.pricing_model if cat else None,
))
result.append(TPSCSnapshotRead(
id=snap.id,
repo_id=snap.repo_id,
snapshot_at=snap.snapshot_at,
source_file=snap.source_file,
entry_count=snap.entry_count,
entries=entry_reads,
))
return result
# ---------------------------------------------------------------------------
# GDPR report
# ---------------------------------------------------------------------------
@router.get("/report/gdpr", response_model=TPSCGDPRReport)
async def gdpr_report(session: AsyncSession = Depends(get_session)):
"""Aggregated GDPR warnings across all latest repo snapshots."""
# Latest snapshot per repo
latest_sub = (
select(TPSCSnapshot.repo_id, func.max(TPSCSnapshot.snapshot_at).label("max_at"))
.group_by(TPSCSnapshot.repo_id)
.subquery()
)
latest_snaps = (await session.execute(
select(TPSCSnapshot)
.join(latest_sub, (TPSCSnapshot.repo_id == latest_sub.c.repo_id) & (TPSCSnapshot.snapshot_at == latest_sub.c.max_at))
.options(selectinload(TPSCSnapshot.entries).selectinload(TPSCEntry.catalog_entry))
)).scalars().all()
# Repo slug lookup
all_repos = (await session.execute(select(ManagedRepo))).scalars().all()
repo_map = {r.id: r.slug for r in all_repos}
all_services = (await session.execute(select(TPSCCatalog))).scalars().all()
by_maturity: dict[str, int] = {}
for s in all_services:
by_maturity[s.gdpr_maturity] = by_maturity.get(s.gdpr_maturity, 0) + 1
warnings = []
seen = set()
for snap in latest_snaps:
repo_slug = repo_map.get(snap.repo_id) if snap.repo_id else None
for entry in snap.entries:
cat = entry.catalog_entry
maturity = cat.gdpr_maturity if cat else "unknown"
if maturity in GDPR_WARNING_LEVELS:
key = (repo_slug, entry.service_slug)
if key not in seen:
seen.add(key)
warnings.append(TPSCGDPRWarning(
repo_slug=repo_slug,
service_slug=entry.service_slug,
gdpr_maturity=maturity,
purpose=entry.purpose,
pricing_model=cat.pricing_model if cat else None,
))
return TPSCGDPRReport(
generated_at=datetime.now(tz=timezone.utc),
total_services=len(all_services),
warning_count=len(warnings),
warnings=warnings,
by_maturity=by_maturity,
)

115
api/schemas/tpsc.py Normal file
View File

@@ -0,0 +1,115 @@
import uuid
from datetime import datetime
from typing import Literal
from pydantic import BaseModel, computed_field
# GDPR maturity scale (CNIL/IAPP CMMI-aligned, adapted for third-party assessment)
GDPRMaturity = Literal["unknown", "non_compliant", "initial", "developing", "defined", "managed", "certified"]
# Services at these levels trigger a GDPR warning
GDPR_WARNING_LEVELS = {"unknown", "non_compliant", "initial"}
PricingModel = Literal["free", "paid", "freemium", "usage_based", "unknown"]
AuthType = Literal["api_key", "oauth", "cli", "none", "unknown"]
class TPSCCatalogCreate(BaseModel):
slug: str
name: str
provider: str | None = None
category: str | None = None
website_url: str | None = None
pricing_model: PricingModel = "unknown"
gdpr_maturity: GDPRMaturity = "unknown"
gdpr_notes: str | None = None
dpa_available: bool = False
tos_url: str | None = None
privacy_policy_url: str | None = None
data_processing_regions: list[str] | None = None
data_retention_notes: str | None = None
status: str = "active"
class TPSCCatalogRead(BaseModel):
model_config = {"from_attributes": True}
id: uuid.UUID
slug: str
name: str
provider: str | None
category: str | None
website_url: str | None
pricing_model: str
gdpr_maturity: str
gdpr_notes: str | None
dpa_available: bool
tos_url: str | None
privacy_policy_url: str | None
data_processing_regions: list[str] | None
data_retention_notes: str | None
status: str
created_at: datetime
updated_at: datetime
@computed_field
@property
def gdpr_warning(self) -> bool:
return self.gdpr_maturity in GDPR_WARNING_LEVELS
class TPSCEntryCreate(BaseModel):
service_slug: str
purpose: str | None = None
auth_type: str | None = None
endpoint_override: str | None = None
notes: str | None = None
class TPSCEntryRead(BaseModel):
model_config = {"from_attributes": True}
id: uuid.UUID
snapshot_id: uuid.UUID
catalog_id: uuid.UUID | None
service_slug: str
purpose: str | None
auth_type: str | None
endpoint_override: str | None
notes: str | None
# Denormalised from catalog for convenience
gdpr_maturity: str | None = None
gdpr_warning: bool = False
pricing_model: str | None = None
class TPSCIngestRequest(BaseModel):
repo_slug: str
source_file: str = "tpsc.yaml"
entries: list[TPSCEntryCreate]
class TPSCSnapshotRead(BaseModel):
model_config = {"from_attributes": True}
id: uuid.UUID
repo_id: uuid.UUID | None
snapshot_at: datetime
source_file: str | None
entry_count: int
entries: list[TPSCEntryRead] = []
class TPSCGDPRWarning(BaseModel):
repo_slug: str | None
service_slug: str
gdpr_maturity: str
purpose: str | None
pricing_model: str | None
class TPSCGDPRReport(BaseModel):
generated_at: datetime
total_services: int
warning_count: int
warnings: list[TPSCGDPRWarning]
by_maturity: dict[str, int]