From c7a893f0684f956d880c3f5f5dce36c7913cd078 Mon Sep 17 00:00:00 2001 From: tegwick Date: Fri, 20 Mar 2026 00:15:26 +0100 Subject: [PATCH] feat(tpsc): Third-Party Services Catalog (CUST-WP-0023) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces TPSC for tracking external service dependencies with GDPR compliance maturity (CNIL/IAPP CMMI scale), pricing model, ToS, and data retention information across all repos. Primary data: - canon/tpsc/{openai,anthropic,gemini,openrouter}-api.yaml — service definitions - tpsc.yaml in each repo (llm-connect seeded with 4 services) State-hub additions: - Migration j7e8f9a0b1c2: tpsc_catalog + tpsc_snapshots + tpsc_entries - api/models/tpsc.py, api/schemas/tpsc.py, api/routers/tpsc.py - /tpsc/catalog/, /tpsc/ingest/, /tpsc/snapshots/, /tpsc/report/gdpr endpoints - 4 MCP tools: register_service, list_services, ingest_tpsc_tool, get_gdpr_report - scripts/ingest_tpsc.py + make ingest-tpsc[/-all] targets - Dashboard: tpsc.md page + docs/tpsc.md GDPR maturity scale: unknown | non_compliant | initial | developing | defined | managed | certified Warnings triggered at: unknown, non_compliant, initial Co-Authored-By: Claude Sonnet 4.6 (1M context) --- agents/agent-scope-analyst.md | 17 ++ canon/tpsc/anthropic-api.yaml | 22 ++ canon/tpsc/gemini-api.yaml | 26 ++ canon/tpsc/openai-api.yaml | 24 ++ canon/tpsc/openrouter-api.yaml | 27 ++ state-hub/Makefile | 13 + state-hub/api/main.py | 3 +- state-hub/api/models/__init__.py | 2 + state-hub/api/models/tpsc.py | 64 +++++ state-hub/api/routers/tpsc.py | 238 ++++++++++++++++++ state-hub/api/schemas/tpsc.py | 115 +++++++++ state-hub/dashboard/observablehq.config.js | 2 + state-hub/dashboard/src/docs/tpsc.md | 136 ++++++++++ state-hub/dashboard/src/tpsc.md | 193 ++++++++++++++ state-hub/mcp_server/server.py | 134 ++++++++++ .../migrations/versions/j7e8f9a0b1c2_tpsc.py | 70 ++++++ state-hub/pyproject.toml | 1 + state-hub/scripts/ingest_tpsc.py | 145 +++++++++++ state-hub/uv.lock | 11 + workplans/CUST-WP-0023-tpsc.md | 210 ++++++++++++++++ 20 files changed, 1452 insertions(+), 1 deletion(-) create mode 100644 canon/tpsc/anthropic-api.yaml create mode 100644 canon/tpsc/gemini-api.yaml create mode 100644 canon/tpsc/openai-api.yaml create mode 100644 canon/tpsc/openrouter-api.yaml create mode 100644 state-hub/api/models/tpsc.py create mode 100644 state-hub/api/routers/tpsc.py create mode 100644 state-hub/api/schemas/tpsc.py create mode 100644 state-hub/dashboard/src/docs/tpsc.md create mode 100644 state-hub/dashboard/src/tpsc.md create mode 100644 state-hub/migrations/versions/j7e8f9a0b1c2_tpsc.py create mode 100644 state-hub/scripts/ingest_tpsc.py create mode 100644 workplans/CUST-WP-0023-tpsc.md diff --git a/agents/agent-scope-analyst.md b/agents/agent-scope-analyst.md index b8d491c..6e6c31b 100644 --- a/agents/agent-scope-analyst.md +++ b/agents/agent-scope-analyst.md @@ -309,6 +309,23 @@ Use this structure when creating or rewriting SCOPE.md: --- +## Provided Capabilities + + + + + + + +--- + ## Notes diff --git a/canon/tpsc/anthropic-api.yaml b/canon/tpsc/anthropic-api.yaml new file mode 100644 index 0000000..82fbead --- /dev/null +++ b/canon/tpsc/anthropic-api.yaml @@ -0,0 +1,22 @@ +slug: anthropic-api +name: Anthropic API (Claude) +provider: Anthropic, PBC +category: llm_inference +website_url: https://anthropic.com +pricing_model: usage_based +gdpr_maturity: developing +gdpr_notes: > + DPA available. SCCs provided for EU→US transfers. Data processed in the US. + Prompts and completions are not used for training by default (API usage). + Data may be retained for up to 30 days for trust & safety review. + Privacy Shield successor mechanisms in place for international transfers. + Reference: https://www.anthropic.com/legal/data-processing-agreement +dpa_available: true +tos_url: https://www.anthropic.com/legal/aup +privacy_policy_url: https://www.anthropic.com/legal/privacy +data_processing_regions: + - us +data_retention_notes: > + API inputs/outputs not used for model training. Retained up to 30 days + for safety review. Enterprise customers can negotiate reduced retention. +status: active diff --git a/canon/tpsc/gemini-api.yaml b/canon/tpsc/gemini-api.yaml new file mode 100644 index 0000000..3a4a705 --- /dev/null +++ b/canon/tpsc/gemini-api.yaml @@ -0,0 +1,26 @@ +slug: gemini-api +name: Google Gemini API +provider: Google LLC +category: llm_inference +website_url: https://ai.google.dev +pricing_model: usage_based +gdpr_maturity: defined +gdpr_notes: > + Google Cloud (including Vertex AI / Gemini API via Google Cloud) has a + comprehensive GDPR compliance programme. DPA is part of Google Cloud ToS. + Data Processing Addendum and SCCs available. Google Cloud has EU data + residency options (data can stay in EU). Google AI Studio (free tier) + has weaker protections — data may be used to improve Google products. + Use Vertex AI / Google Cloud API endpoint for GDPR-adequate usage. + Reference: https://cloud.google.com/terms/data-processing-addendum +dpa_available: true +tos_url: https://ai.google.dev/gemini-api/terms +privacy_policy_url: https://policies.google.com/privacy +data_processing_regions: + - us + - eu # when using Vertex AI with EU region selection +data_retention_notes: > + Google AI Studio: prompts may be reviewed by human raters and used to + improve models. Google Cloud / Vertex AI: data not used for training, + retained per Cloud data retention policy (configurable). +status: active diff --git a/canon/tpsc/openai-api.yaml b/canon/tpsc/openai-api.yaml new file mode 100644 index 0000000..a92ad5d --- /dev/null +++ b/canon/tpsc/openai-api.yaml @@ -0,0 +1,24 @@ +slug: openai-api +name: OpenAI API +provider: OpenAI, Inc. +category: llm_inference +website_url: https://openai.com +pricing_model: usage_based +gdpr_maturity: developing +gdpr_notes: > + DPA available (Data Processing Addendum). Standard Contractual Clauses (SCCs) + provided for EU→US data transfers. Data is processed in the US. + Input/output retained up to 30 days for safety monitoring unless opted out + via the API zero-data-retention setting. Zero-data-retention is available + on eligible endpoints. Not suitable for sensitive personal data without a + signed DPA and explicit zero-retention configuration. + Reference: https://openai.com/policies/data-processing-addendum +dpa_available: true +tos_url: https://openai.com/policies/terms-of-use +privacy_policy_url: https://openai.com/policies/privacy-policy +data_processing_regions: + - us +data_retention_notes: > + Default: 30 days for abuse monitoring. Zero-data-retention available + on eligible API endpoints via opt-in. Training opt-out available. +status: active diff --git a/canon/tpsc/openrouter-api.yaml b/canon/tpsc/openrouter-api.yaml new file mode 100644 index 0000000..96d8a0f --- /dev/null +++ b/canon/tpsc/openrouter-api.yaml @@ -0,0 +1,27 @@ +slug: openrouter-api +name: OpenRouter API +provider: OpenRouter, Inc. +category: llm_routing +website_url: https://openrouter.ai +pricing_model: usage_based +gdpr_maturity: initial +gdpr_notes: > + OpenRouter is a US-based routing proxy for multiple LLM providers. + Privacy policy exists but as of early 2026 no formal DPA or SCCs are + publicly available. Requests are forwarded to underlying providers + (OpenAI, Anthropic, Google, Mistral, etc.) each with their own data + handling. GDPR compliance is therefore dependent on both OpenRouter + and the selected downstream model provider. Not recommended for + processing personal data in corporate/regulated environments without + a signed DPA. Suitable for development, prototyping, model comparison. + Reference: https://openrouter.ai/privacy +dpa_available: false +tos_url: https://openrouter.ai/terms +privacy_policy_url: https://openrouter.ai/privacy +data_processing_regions: + - us +data_retention_notes: > + Requests forwarded to upstream providers. OpenRouter may log requests + for billing and abuse prevention. Retention policy not formally published. + Downstream provider retention policies apply per selected model. +status: active diff --git a/state-hub/Makefile b/state-hub/Makefile index 7712e70..947ea1a 100644 --- a/state-hub/Makefile +++ b/state-hub/Makefile @@ -183,6 +183,19 @@ ingest-capabilities-all: uv run python scripts/ingest_capabilities.py --all \ $(if $(DRY_RUN),--dry-run) +## Ingest tpsc.yaml service declarations from a repo into the TPSC catalog. +## Usage: make ingest-tpsc REPO=llm-connect +## Or: make ingest-tpsc-all +## Add DRY_RUN=1 to preview without writing. +ingest-tpsc: + @test -n "$(REPO)" || (echo "ERROR: REPO is required."; exit 1) + uv run python scripts/ingest_tpsc.py --repo "$(REPO)" \ + $(if $(DRY_RUN),--dry-run) + +ingest-tpsc-all: + uv run python scripts/ingest_tpsc.py --all \ + $(if $(DRY_RUN),--dry-run) + ## Run SBOM capture agent for a repo — generates/updates sbom-tools.yaml. ## Usage: make capture-tools REPO=railiance-infra [REPO_PATH=/home/worsch/railiance-infra] ## Add DRY_RUN=1 to preview without writing. diff --git a/state-hub/api/main.py b/state-hub/api/main.py index 605bce8..7551442 100644 --- a/state-hub/api/main.py +++ b/state-hub/api/main.py @@ -6,7 +6,7 @@ from fastapi.middleware.cors import CORSMiddleware from api.database import engine from api.routers import decisions, extension_points, progress, state, tasks, technical_debt, topics, workstreams, workstream_dependencies -from api.routers import domains, repos, contributions, sbom, policy, domain_goals, repo_goals, messages, capability_requests +from api.routers import domains, repos, contributions, sbom, policy, domain_goals, repo_goals, messages, capability_requests, tpsc @asynccontextmanager @@ -48,6 +48,7 @@ app.include_router(contributions.router) app.include_router(sbom.router) app.include_router(messages.router) app.include_router(capability_requests.router) +app.include_router(tpsc.router) app.include_router(state.router) app.include_router(policy.router) diff --git a/state-hub/api/models/__init__.py b/state-hub/api/models/__init__.py index 4b7e6f9..16b25f4 100644 --- a/state-hub/api/models/__init__.py +++ b/state-hub/api/models/__init__.py @@ -17,6 +17,7 @@ from api.models.sbom_entry import SBOMEntry, Ecosystem from api.models.agent_message import AgentMessage from api.models.capability_catalog import CapabilityCatalog from api.models.capability_request import CapabilityRequest +from api.models.tpsc import TPSCCatalog, TPSCSnapshot, TPSCEntry __all__ = [ "Base", @@ -38,4 +39,5 @@ __all__ = [ "AgentMessage", "CapabilityCatalog", "CapabilityRequest", + "TPSCCatalog", "TPSCSnapshot", "TPSCEntry", ] diff --git a/state-hub/api/models/tpsc.py b/state-hub/api/models/tpsc.py new file mode 100644 index 0000000..1ed9128 --- /dev/null +++ b/state-hub/api/models/tpsc.py @@ -0,0 +1,64 @@ +import uuid +from datetime import datetime +from sqlalchemy import Boolean, DateTime, ForeignKey, Integer, String, Text, func +from sqlalchemy.dialects.postgresql import JSON, UUID +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from api.models.base import Base + + +class TPSCCatalog(Base): + __tablename__ = "tpsc_catalog" + + id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + slug: Mapped[str] = mapped_column(String(100), nullable=False, unique=True, index=True) + name: Mapped[str] = mapped_column(String(200), nullable=False) + provider: Mapped[str | None] = mapped_column(String(200), nullable=True) + category: Mapped[str | None] = mapped_column(String(100), nullable=True) + website_url: Mapped[str | None] = mapped_column(Text, nullable=True) + # Pricing: free | paid | freemium | usage_based | unknown + pricing_model: Mapped[str] = mapped_column(String(20), nullable=False, server_default="unknown") + # GDPR maturity (CNIL/IAPP CMMI-aligned): + # unknown | non_compliant | initial | developing | defined | managed | certified + gdpr_maturity: Mapped[str] = mapped_column(String(20), nullable=False, server_default="unknown", index=True) + gdpr_notes: Mapped[str | None] = mapped_column(Text, nullable=True) + dpa_available: Mapped[bool] = mapped_column(Boolean, nullable=False, server_default="false") + tos_url: Mapped[str | None] = mapped_column(Text, nullable=True) + privacy_policy_url: Mapped[str | None] = mapped_column(Text, nullable=True) + data_processing_regions: Mapped[list | None] = mapped_column(JSON, nullable=True) + data_retention_notes: Mapped[str | None] = mapped_column(Text, nullable=True) + # status: active | deprecated + status: Mapped[str] = mapped_column(String(20), nullable=False, server_default="active") + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now()) + updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now()) + + entries: Mapped[list["TPSCEntry"]] = relationship("TPSCEntry", back_populates="catalog_entry") + + +class TPSCSnapshot(Base): + __tablename__ = "tpsc_snapshots" + + id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + repo_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True), ForeignKey("managed_repos.id", ondelete="SET NULL"), nullable=True, index=True) + snapshot_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now()) + source_file: Mapped[str | None] = mapped_column(String(200), nullable=True) + entry_count: Mapped[int] = mapped_column(Integer, nullable=False, server_default="0") + + entries: Mapped[list["TPSCEntry"]] = relationship("TPSCEntry", back_populates="snapshot", cascade="all, delete-orphan") + + +class TPSCEntry(Base): + __tablename__ = "tpsc_entries" + + id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + snapshot_id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), ForeignKey("tpsc_snapshots.id", ondelete="CASCADE"), nullable=False, index=True) + catalog_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True), ForeignKey("tpsc_catalog.id", ondelete="SET NULL"), nullable=True) + service_slug: Mapped[str] = mapped_column(String(100), nullable=False, index=True) + purpose: Mapped[str | None] = mapped_column(Text, nullable=True) + # auth_type: api_key | oauth | cli | none | unknown + auth_type: Mapped[str | None] = mapped_column(String(50), nullable=True) + endpoint_override: Mapped[str | None] = mapped_column(Text, nullable=True) + notes: Mapped[str | None] = mapped_column(Text, nullable=True) + + snapshot: Mapped["TPSCSnapshot"] = relationship("TPSCSnapshot", back_populates="entries") + catalog_entry: Mapped["TPSCCatalog | None"] = relationship("TPSCCatalog", back_populates="entries") diff --git a/state-hub/api/routers/tpsc.py b/state-hub/api/routers/tpsc.py new file mode 100644 index 0000000..c16801c --- /dev/null +++ b/state-hub/api/routers/tpsc.py @@ -0,0 +1,238 @@ +from datetime import datetime, timezone +from fastapi import APIRouter, Depends, HTTPException +from sqlalchemy import select, func +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.orm import selectinload + +from api.database import get_session +from api.models.managed_repo import ManagedRepo +from api.models.tpsc import TPSCCatalog, TPSCSnapshot, TPSCEntry +from api.schemas.tpsc import ( + TPSCCatalogCreate, TPSCCatalogRead, + TPSCEntryRead, TPSCIngestRequest, TPSCSnapshotRead, + TPSCGDPRReport, TPSCGDPRWarning, GDPR_WARNING_LEVELS, +) + +router = APIRouter(prefix="/tpsc", tags=["tpsc"]) + + +# --------------------------------------------------------------------------- +# Catalog +# --------------------------------------------------------------------------- + +@router.get("/catalog/", response_model=list[TPSCCatalogRead]) +async def list_catalog( + gdpr_maturity: str | None = None, + category: str | None = None, + pricing_model: str | None = None, + session: AsyncSession = Depends(get_session), +): + q = select(TPSCCatalog).where(TPSCCatalog.status != "deprecated") + if gdpr_maturity: + q = q.where(TPSCCatalog.gdpr_maturity == gdpr_maturity) + if category: + q = q.where(TPSCCatalog.category == category) + if pricing_model: + q = q.where(TPSCCatalog.pricing_model == pricing_model) + q = q.order_by(TPSCCatalog.name) + rows = (await session.execute(q)).scalars().all() + return rows + + +@router.get("/catalog/{slug}", response_model=TPSCCatalogRead) +async def get_catalog_entry(slug: str, session: AsyncSession = Depends(get_session)): + row = (await session.execute(select(TPSCCatalog).where(TPSCCatalog.slug == slug))).scalar_one_or_none() + if not row: + raise HTTPException(404, f"Service '{slug}' not found in catalog") + return row + + +@router.post("/catalog/", response_model=TPSCCatalogRead, status_code=201) +async def register_service(body: TPSCCatalogCreate, session: AsyncSession = Depends(get_session)): + """Register a new service or upsert an existing one by slug.""" + existing = (await session.execute(select(TPSCCatalog).where(TPSCCatalog.slug == body.slug))).scalar_one_or_none() + if existing: + for k, v in body.model_dump(exclude_unset=True).items(): + setattr(existing, k, v) + existing.updated_at = datetime.now(tz=timezone.utc) + await session.commit() + await session.refresh(existing) + return existing + entry = TPSCCatalog(**body.model_dump()) + session.add(entry) + await session.commit() + await session.refresh(entry) + return entry + + +# --------------------------------------------------------------------------- +# Ingest +# --------------------------------------------------------------------------- + +@router.post("/ingest/", response_model=TPSCSnapshotRead, status_code=201) +async def ingest_tpsc(body: TPSCIngestRequest, session: AsyncSession = Depends(get_session)): + """Accept a tpsc.yaml snapshot for a repo.""" + # Resolve repo_id + repo = (await session.execute(select(ManagedRepo).where(ManagedRepo.slug == body.repo_slug))).scalar_one_or_none() + repo_id = repo.id if repo else None + + # Build catalog lookup by slug + slugs = {e.service_slug for e in body.entries} + catalog_rows = (await session.execute(select(TPSCCatalog).where(TPSCCatalog.slug.in_(slugs)))).scalars().all() + catalog_map = {r.slug: r for r in catalog_rows} + + snapshot = TPSCSnapshot( + repo_id=repo_id, + source_file=body.source_file, + entry_count=len(body.entries), + ) + session.add(snapshot) + await session.flush() + + entries_with_cats = [] + for e in body.entries: + cat = catalog_map.get(e.service_slug) + entry = TPSCEntry( + snapshot_id=snapshot.id, + catalog_id=cat.id if cat else None, + service_slug=e.service_slug, + purpose=e.purpose, + auth_type=e.auth_type, + endpoint_override=e.endpoint_override, + notes=e.notes, + ) + session.add(entry) + entries_with_cats.append((entry, cat)) + + await session.flush() # assign UUIDs to all entries + await session.commit() + await session.refresh(snapshot) + + entry_reads = [ + TPSCEntryRead( + id=entry.id, + snapshot_id=snapshot.id, + catalog_id=cat.id if cat else None, + service_slug=entry.service_slug, + purpose=entry.purpose, + auth_type=entry.auth_type, + endpoint_override=entry.endpoint_override, + notes=entry.notes, + gdpr_maturity=cat.gdpr_maturity if cat else None, + gdpr_warning=(cat.gdpr_maturity in GDPR_WARNING_LEVELS) if cat else True, + pricing_model=cat.pricing_model if cat else None, + ) + for entry, cat in entries_with_cats + ] + + return TPSCSnapshotRead( + id=snapshot.id, + repo_id=snapshot.repo_id, + snapshot_at=snapshot.snapshot_at, + source_file=snapshot.source_file, + entry_count=snapshot.entry_count, + entries=entry_reads, + ) + + +# --------------------------------------------------------------------------- +# Snapshots +# --------------------------------------------------------------------------- + +@router.get("/snapshots/", response_model=list[TPSCSnapshotRead]) +async def list_snapshots( + repo_slug: str | None = None, + session: AsyncSession = Depends(get_session), +): + q = select(TPSCSnapshot).options(selectinload(TPSCSnapshot.entries)) + if repo_slug: + repo = (await session.execute(select(ManagedRepo).where(ManagedRepo.slug == repo_slug))).scalar_one_or_none() + if not repo: + raise HTTPException(404, f"Repo '{repo_slug}' not found") + q = q.where(TPSCSnapshot.repo_id == repo.id) + q = q.order_by(TPSCSnapshot.snapshot_at.desc()) + rows = (await session.execute(q)).scalars().all() + + result = [] + for snap in rows: + entry_reads = [] + for e in snap.entries: + cat = e.catalog_entry + entry_reads.append(TPSCEntryRead( + id=e.id, + snapshot_id=e.snapshot_id, + catalog_id=e.catalog_id, + service_slug=e.service_slug, + purpose=e.purpose, + auth_type=e.auth_type, + endpoint_override=e.endpoint_override, + notes=e.notes, + gdpr_maturity=cat.gdpr_maturity if cat else None, + gdpr_warning=(cat.gdpr_maturity in GDPR_WARNING_LEVELS) if cat else True, + pricing_model=cat.pricing_model if cat else None, + )) + result.append(TPSCSnapshotRead( + id=snap.id, + repo_id=snap.repo_id, + snapshot_at=snap.snapshot_at, + source_file=snap.source_file, + entry_count=snap.entry_count, + entries=entry_reads, + )) + return result + + +# --------------------------------------------------------------------------- +# GDPR report +# --------------------------------------------------------------------------- + +@router.get("/report/gdpr", response_model=TPSCGDPRReport) +async def gdpr_report(session: AsyncSession = Depends(get_session)): + """Aggregated GDPR warnings across all latest repo snapshots.""" + # Latest snapshot per repo + latest_sub = ( + select(TPSCSnapshot.repo_id, func.max(TPSCSnapshot.snapshot_at).label("max_at")) + .group_by(TPSCSnapshot.repo_id) + .subquery() + ) + latest_snaps = (await session.execute( + select(TPSCSnapshot) + .join(latest_sub, (TPSCSnapshot.repo_id == latest_sub.c.repo_id) & (TPSCSnapshot.snapshot_at == latest_sub.c.max_at)) + .options(selectinload(TPSCSnapshot.entries).selectinload(TPSCEntry.catalog_entry)) + )).scalars().all() + + # Repo slug lookup + all_repos = (await session.execute(select(ManagedRepo))).scalars().all() + repo_map = {r.id: r.slug for r in all_repos} + + all_services = (await session.execute(select(TPSCCatalog))).scalars().all() + by_maturity: dict[str, int] = {} + for s in all_services: + by_maturity[s.gdpr_maturity] = by_maturity.get(s.gdpr_maturity, 0) + 1 + + warnings = [] + seen = set() + for snap in latest_snaps: + repo_slug = repo_map.get(snap.repo_id) if snap.repo_id else None + for entry in snap.entries: + cat = entry.catalog_entry + maturity = cat.gdpr_maturity if cat else "unknown" + if maturity in GDPR_WARNING_LEVELS: + key = (repo_slug, entry.service_slug) + if key not in seen: + seen.add(key) + warnings.append(TPSCGDPRWarning( + repo_slug=repo_slug, + service_slug=entry.service_slug, + gdpr_maturity=maturity, + purpose=entry.purpose, + pricing_model=cat.pricing_model if cat else None, + )) + + return TPSCGDPRReport( + generated_at=datetime.now(tz=timezone.utc), + total_services=len(all_services), + warning_count=len(warnings), + warnings=warnings, + by_maturity=by_maturity, + ) diff --git a/state-hub/api/schemas/tpsc.py b/state-hub/api/schemas/tpsc.py new file mode 100644 index 0000000..dabed4e --- /dev/null +++ b/state-hub/api/schemas/tpsc.py @@ -0,0 +1,115 @@ +import uuid +from datetime import datetime +from typing import Literal +from pydantic import BaseModel, computed_field + +# GDPR maturity scale (CNIL/IAPP CMMI-aligned, adapted for third-party assessment) +GDPRMaturity = Literal["unknown", "non_compliant", "initial", "developing", "defined", "managed", "certified"] + +# Services at these levels trigger a GDPR warning +GDPR_WARNING_LEVELS = {"unknown", "non_compliant", "initial"} + +PricingModel = Literal["free", "paid", "freemium", "usage_based", "unknown"] +AuthType = Literal["api_key", "oauth", "cli", "none", "unknown"] + + +class TPSCCatalogCreate(BaseModel): + slug: str + name: str + provider: str | None = None + category: str | None = None + website_url: str | None = None + pricing_model: PricingModel = "unknown" + gdpr_maturity: GDPRMaturity = "unknown" + gdpr_notes: str | None = None + dpa_available: bool = False + tos_url: str | None = None + privacy_policy_url: str | None = None + data_processing_regions: list[str] | None = None + data_retention_notes: str | None = None + status: str = "active" + + +class TPSCCatalogRead(BaseModel): + model_config = {"from_attributes": True} + + id: uuid.UUID + slug: str + name: str + provider: str | None + category: str | None + website_url: str | None + pricing_model: str + gdpr_maturity: str + gdpr_notes: str | None + dpa_available: bool + tos_url: str | None + privacy_policy_url: str | None + data_processing_regions: list[str] | None + data_retention_notes: str | None + status: str + created_at: datetime + updated_at: datetime + + @computed_field + @property + def gdpr_warning(self) -> bool: + return self.gdpr_maturity in GDPR_WARNING_LEVELS + + +class TPSCEntryCreate(BaseModel): + service_slug: str + purpose: str | None = None + auth_type: str | None = None + endpoint_override: str | None = None + notes: str | None = None + + +class TPSCEntryRead(BaseModel): + model_config = {"from_attributes": True} + + id: uuid.UUID + snapshot_id: uuid.UUID + catalog_id: uuid.UUID | None + service_slug: str + purpose: str | None + auth_type: str | None + endpoint_override: str | None + notes: str | None + # Denormalised from catalog for convenience + gdpr_maturity: str | None = None + gdpr_warning: bool = False + pricing_model: str | None = None + + +class TPSCIngestRequest(BaseModel): + repo_slug: str + source_file: str = "tpsc.yaml" + entries: list[TPSCEntryCreate] + + +class TPSCSnapshotRead(BaseModel): + model_config = {"from_attributes": True} + + id: uuid.UUID + repo_id: uuid.UUID | None + snapshot_at: datetime + source_file: str | None + entry_count: int + entries: list[TPSCEntryRead] = [] + + +class TPSCGDPRWarning(BaseModel): + repo_slug: str | None + service_slug: str + gdpr_maturity: str + purpose: str | None + pricing_model: str | None + + +class TPSCGDPRReport(BaseModel): + generated_at: datetime + total_services: int + warning_count: int + warnings: list[TPSCGDPRWarning] + by_maturity: dict[str, int] diff --git a/state-hub/dashboard/observablehq.config.js b/state-hub/dashboard/observablehq.config.js index f975898..c24dba8 100644 --- a/state-hub/dashboard/observablehq.config.js +++ b/state-hub/dashboard/observablehq.config.js @@ -25,6 +25,7 @@ export default { { name: "Goals", path: "/goals" }, { name: "Inbox", path: "/inbox" }, { name: "Progress", path: "/progress" }, + { name: "Services (TPSC)", path: "/tpsc" }, { name: "Todo", path: "/todo" }, // ── Sections (alphabetical) ─────────────────────────────────────────────── { @@ -88,6 +89,7 @@ export default { { name: "SBOM", path: "/docs/sbom" }, { name: "SCOPE.md", path: "/docs/scope" }, { name: "Tasks", path: "/docs/tasks" }, + { name: "TPSC", path: "/docs/tpsc" }, { name: "Technical Debt", path: "/docs/debt" }, { name: "Todo", path: "/docs/todo" }, { name: "Workstream Health", path: "/docs/workstream-health-index" }, diff --git a/state-hub/dashboard/src/docs/tpsc.md b/state-hub/dashboard/src/docs/tpsc.md new file mode 100644 index 0000000..08d4f7d --- /dev/null +++ b/state-hub/dashboard/src/docs/tpsc.md @@ -0,0 +1,136 @@ +--- +title: Third-Party Services Catalog (TPSC) +--- + +# Third-Party Services Catalog (TPSC) + +The TPSC tracks external service dependencies (APIs, SaaS, CLIs) across all +registered repos — complementing the SBOM for package dependencies. + +--- + +## Why TPSC? + +Package lockfiles capture Python/JS/Rust dependencies but miss the external +HTTP services your code calls. These carry compliance, cost, and privacy +implications that are invisible to standard SBOM tooling. + +TPSC provides: +- A registry of which repos use which external services +- GDPR compliance maturity ratings per service +- Pricing model tracking (paid/usage-based costs) +- Data processing region and retention information +- GDPR warnings for services not suitable in regulated environments + +--- + +## Primary Data Locations + +Following ADR-001 (workplans as repo artefacts), TPSC data lives in two places: + +| Location | Purpose | +|---|---| +| `/tpsc.yaml` | Declares which services the repo uses | +| `the-custodian/canon/tpsc/.yaml` | Canonical service metadata (ToS, GDPR, pricing) | + +The state-hub is a collector — it can be rebuilt from scratch by re-ingesting +all `tpsc.yaml` files and re-seeding the catalog from canon files. + +--- + +## tpsc.yaml Format + +```yaml +# tpsc.yaml — Third-Party Services Catalog declarations +# Ingest: cd state-hub && make ingest-tpsc REPO= + +services: + - slug: openai-api # Must match a slug in canon/tpsc/ + purpose: LLM inference via OpenAI-compatible API + auth: api_key # api_key | oauth | cli | none | unknown + + - slug: stripe + purpose: Payment processing + auth: api_key + endpoint: https://api.stripe.com # Optional override if non-standard + notes: Only used in production tier +``` + +--- + +## Canon Service File Format + +```yaml +# canon/tpsc/openai-api.yaml +slug: openai-api +name: OpenAI API +provider: OpenAI, Inc. +category: llm_inference # llm_inference | storage | payments | search | etc. +website_url: https://openai.com +pricing_model: usage_based # free | paid | freemium | usage_based | unknown +gdpr_maturity: developing # See scale below +gdpr_notes: > + DPA available. SCCs for EU→US transfer. 30-day retention for safety. +dpa_available: true +tos_url: https://openai.com/policies/terms-of-use +privacy_policy_url: https://openai.com/policies/privacy-policy +data_processing_regions: + - us +data_retention_notes: > + 30 days default; zero-retention available on eligible endpoints. +status: active +``` + +--- + +## GDPR Maturity Scale + +Based on the **CNIL / IAPP CMMI Privacy Maturity Model**, adapted for +third-party service assessment: + +| Level | Name | Description | Dashboard | +|---|---|---|---| +| 0 | `unknown` | No information about GDPR stance | 🔴 Warning | +| 1 | `non_compliant` | Known GDPR issues, no remediation | 🔴 Warning | +| 2 | `initial` | Basic privacy policy only, ad hoc approach | 🟠 Warning | +| 3 | `developing` | DPA available, some controls, SCCs provided | 🟡 | +| 4 | `defined` | Formal DPA, SCCs documented, clear retention policy | 🟢 | +| 5 | `managed` | Independently audited, metrics tracked | 🟢 | +| 6 | `certified` | ISO 27701 / SOC2 privacy certified | 🟢 | + +Services at levels 0–2 (**Warning**) may limit use in GDPR-regulated or +corporate environments. At minimum, `developing` is needed for routine +processing of personal data with an API provider. + +Reference: [CNIL GDPR maturity model](https://iapp.org/news/b/cnil-publishes-data-protection-management-maturity-model), [IAPP Privacy Maturity Model](https://iapp.org/news/a/achieving-privacy-excellence-understanding-the-privacy-maturity-model) + +--- + +## Adding a New Service + +1. Create `the-custodian/canon/tpsc/.yaml` following the format above +2. Seed it into the state-hub: `cd state-hub && make api` then POST to `/tpsc/catalog/` + (or use the MCP tool: `register_service(slug=..., ...)`) +3. Add it to your repo's `tpsc.yaml` +4. Ingest: `make ingest-tpsc REPO=` + +--- + +## MCP Tools + +| Tool | Purpose | +|---|---| +| `register_service(slug, ...)` | Add/update a service in the catalog | +| `list_services(gdpr_maturity?, category?, pricing_model?)` | Browse catalog | +| `ingest_tpsc_tool(repo_slug)` | Parse tpsc.yaml and ingest snapshot | +| `get_gdpr_report()` | GDPR warning summary across all repos | + +--- + +## Makefile Targets + +```bash +make ingest-tpsc REPO=llm-connect # Ingest single repo +make ingest-tpsc-all # Ingest all repos +make ingest-tpsc REPO=llm-connect DRY_RUN=1 # Preview only +``` diff --git a/state-hub/dashboard/src/tpsc.md b/state-hub/dashboard/src/tpsc.md new file mode 100644 index 0000000..5a7465d --- /dev/null +++ b/state-hub/dashboard/src/tpsc.md @@ -0,0 +1,193 @@ +--- +title: Third-Party Services (TPSC) +--- + +# Third-Party Services Catalog + +```js +const API = "http://127.0.0.1:8000"; +let apiOk = true; + +const catalog = await fetch(`${API}/tpsc/catalog/`) + .then(r => r.json()) + .catch(() => { apiOk = false; return []; }); + +const gdprReport = await fetch(`${API}/tpsc/report/gdpr`) + .then(r => r.json()) + .catch(() => ({ warnings: [], by_maturity: {}, total_services: 0, warning_count: 0 })); + +const snapshots = await fetch(`${API}/tpsc/snapshots/`) + .then(r => r.json()) + .catch(() => []); +``` + +```js +// GDPR maturity colour coding (CNIL/IAPP scale) +const maturityColor = { + unknown: "#ef4444", // red + non_compliant: "#dc2626", // deep red + initial: "#f97316", // orange + developing: "#eab308", // amber + defined: "#84cc16", // lime + managed: "#22c55e", // green + certified: "#16a34a", // deep green +}; + +const maturityLabel = { + unknown: "Unknown", + non_compliant: "Non-Compliant", + initial: "Initial", + developing: "Developing", + defined: "Defined", + managed: "Managed", + certified: "Certified", +}; + +const WARNING_LEVELS = new Set(["unknown", "non_compliant", "initial"]); +``` + +```js +// KPI summary +const warningServices = catalog.filter(s => WARNING_LEVELS.has(s.gdpr_maturity)); +const paidServices = catalog.filter(s => ["paid", "usage_based"].includes(s.pricing_model)); +``` + +
+
+
${gdprReport.warning_count}
+
GDPR warnings
+
+
+
${gdprReport.total_services}
+
Services in catalog
+
+
+
${paidServices.length}
+
Paid / usage-based
+
+
+ +--- + +## Service Catalog + +```js +import {html} from "npm:htl"; + +function maturityBadge(m) { + const color = maturityColor[m] || "#9ca3af"; + const label = maturityLabel[m] || m; + return html`${label}`; +} + +function pricingBadge(p) { + const colors = { paid: "#7c3aed", usage_based: "#7c3aed", freemium: "#0369a1", free: "#166534", unknown: "#6b7280" }; + const c = colors[p] || "#6b7280"; + return html`${p.replace("_", " ")}`; +} + +const catalogTable = html` + + + + + + + + + + + + ${catalog.map(s => html` + + + + + + + `)} + +
ServiceProviderCategoryPricingGDPR MaturityDPA
+ ${s.website_url + ? html`${s.name}` + : s.name} + ${s.provider || "—"}${s.category || "—"}${pricingBadge(s.pricing_model)}${maturityBadge(s.gdpr_maturity)}${s.dpa_available ? "✅" : "❌"}
`; +display(catalogTable); +``` + +--- + +## GDPR Warnings + +_Services at **Unknown**, **Non-Compliant**, or **Initial** maturity may limit use in GDPR-regulated or corporate environments._ + +```js +if (gdprReport.warnings.length === 0) { + display(html`

✅ No GDPR warnings across active repos.

`); +} else { + const warningCards = html`
+ ${gdprReport.warnings.map(w => { + const color = maturityColor[w.gdpr_maturity] || "#ef4444"; + return html`
+
${w.service_slug} + in ${w.repo_slug || "unknown repo"} +
+
+ ${maturityBadge(w.gdpr_maturity)} + ${w.pricing_model ? html` ${pricingBadge(w.pricing_model)}` : ""} + ${w.purpose ? html`— ${w.purpose}` : ""} +
+
`; + })} +
`; + display(warningCards); +} +``` + +--- + +## Per-Repo Breakdown + +```js +// Build: latest snapshot per repo → service list +const repoBreakdown = new Map(); +for (const snap of snapshots) { + const repoSlug = snap.repo_id || "unknown"; + if (!repoBreakdown.has(repoSlug) || snap.snapshot_at > repoBreakdown.get(repoSlug).snapshot_at) { + repoBreakdown.set(repoSlug, snap); + } +} + +// Enrich with catalog data +const catalogBySlug = Object.fromEntries(catalog.map(s => [s.slug, s])); + +const repoTable = html` + + + + + + + + + ${[...repoBreakdown.entries()].map(([repoSlug, snap]) => html` + + + + `)} + +
RepoServicesIngested
${repoSlug} + ${snap.entries.map(e => { + const cat = catalogBySlug[e.service_slug]; + const m = cat?.gdpr_maturity || "unknown"; + const color = maturityColor[m] || "#9ca3af"; + return html` + + ${e.service_slug} + `; + })} + ${new Date(snap.snapshot_at).toLocaleDateString()}
`; +display(repoTable); +``` diff --git a/state-hub/mcp_server/server.py b/state-hub/mcp_server/server.py index 4cb59c6..9d4177e 100644 --- a/state-hub/mcp_server/server.py +++ b/state-hub/mcp_server/server.py @@ -1911,6 +1911,140 @@ def get_capability_request(request_id: str) -> str: return json.dumps(_get(f"/capability-requests/{request_id}"), indent=2) +# --------------------------------------------------------------------------- +# Third-Party Services Catalog (TPSC) +# --------------------------------------------------------------------------- + +@mcp.tool() +def register_service( + slug: str, + name: str, + provider: str | None = None, + category: str | None = None, + pricing_model: str = "unknown", + gdpr_maturity: str = "unknown", + gdpr_notes: str | None = None, + dpa_available: bool = False, + tos_url: str | None = None, + privacy_policy_url: str | None = None, + data_processing_regions: list[str] | None = None, + data_retention_notes: str | None = None, + website_url: str | None = None, +) -> str: + """Register or update a service in the Third-Party Services Catalog (TPSC). + + GDPR maturity scale (CNIL/IAPP CMMI-aligned): + unknown | non_compliant | initial | developing | defined | managed | certified + + Pricing model: free | paid | freemium | usage_based | unknown + + Args: + slug: Unique identifier (e.g. 'openai-api', 'stripe') + name: Human-readable service name + provider: Company/organisation name + category: Category (e.g. 'llm_inference', 'storage', 'payments', 'search') + pricing_model: free | paid | freemium | usage_based | unknown + gdpr_maturity: GDPR compliance maturity level (see scale above) + gdpr_notes: Free-text GDPR notes (DPA details, transfer mechanisms, etc.) + dpa_available: Whether a Data Processing Agreement is available + tos_url: Terms of Service URL + privacy_policy_url: Privacy Policy URL + data_processing_regions: List of regions where data is processed (e.g. ['us', 'eu']) + data_retention_notes: Data retention policy summary + website_url: Service website URL + """ + return json.dumps(_post("/tpsc/catalog", { + "slug": slug, + "name": name, + "provider": provider, + "category": category, + "website_url": website_url, + "pricing_model": pricing_model, + "gdpr_maturity": gdpr_maturity, + "gdpr_notes": gdpr_notes, + "dpa_available": dpa_available, + "tos_url": tos_url, + "privacy_policy_url": privacy_policy_url, + "data_processing_regions": data_processing_regions or [], + "data_retention_notes": data_retention_notes, + }), indent=2) + + +@mcp.tool() +def list_services( + gdpr_maturity: str | None = None, + category: str | None = None, + pricing_model: str | None = None, +) -> str: + """Browse the Third-Party Services Catalog (TPSC). + + Returns services with their GDPR maturity level and gdpr_warning flag + (True when maturity is unknown, non_compliant, or initial — may limit + use in corporate/GDPR-regulated environments). + + Args: + gdpr_maturity: Filter by maturity level (unknown/non_compliant/initial/developing/defined/managed/certified) + category: Filter by category (e.g. 'llm_inference', 'storage') + pricing_model: Filter by pricing model (free/paid/freemium/usage_based/unknown) + """ + return json.dumps(_get("/tpsc/catalog", { + "gdpr_maturity": gdpr_maturity, + "category": category, + "pricing_model": pricing_model, + }), indent=2) + + +@mcp.tool() +def ingest_tpsc_tool(repo_slug: str) -> str: + """Ingest tpsc.yaml service dependency declarations for a repo. + + Reads /tpsc.yaml, resolves service slugs against the catalog, + and creates a new TPSC snapshot. The repo path is resolved the same way + as the SBOM ingest tool (host_paths → local_path with existence check). + + Args: + repo_slug: Registered repo slug (e.g. 'llm-connect', 'markitect-project') + """ + import socket as _socket + import subprocess + + repo = _get(f"/repos/{repo_slug}") + if isinstance(repo, dict) and repo.get("error"): + return f"Repo '{repo_slug}' not found: {repo['error']}" + + repo_root = _resolve_repo_path(repo) + if not repo_root: + hostname = _socket.gethostname() + return ( + f"⚠ No accessible path found for repo '{repo_slug}' on host '{hostname}'.\n" + f"Register with: update_repo_path('{repo_slug}', '/path/to/repo')" + ) + + script = Path(__file__).parent.parent / "scripts" / "ingest_tpsc.py" + result = subprocess.run( + ["uv", "run", "python", str(script), "--repo", repo_slug], + capture_output=True, text=True, + cwd=str(Path(__file__).parent.parent), + ) + output = result.stdout + result.stderr + if result.returncode != 0: + return f"ingest_tpsc failed (exit {result.returncode}):\n{output}" + return output.strip() + + +@mcp.tool() +def get_gdpr_report() -> str: + """Get an aggregated GDPR compliance report across all repos' latest TPSC snapshots. + + Returns a warning summary for services with gdpr_maturity in: + unknown | non_compliant | initial + + These may limit usability in GDPR-regulated / corporate environments. + Services at 'developing' or above have at least a DPA available. + """ + return json.dumps(_get("/tpsc/report/gdpr"), indent=2) + + # --------------------------------------------------------------------------- # Entry point # --------------------------------------------------------------------------- diff --git a/state-hub/migrations/versions/j7e8f9a0b1c2_tpsc.py b/state-hub/migrations/versions/j7e8f9a0b1c2_tpsc.py new file mode 100644 index 0000000..65fbc28 --- /dev/null +++ b/state-hub/migrations/versions/j7e8f9a0b1c2_tpsc.py @@ -0,0 +1,70 @@ +"""tpsc: third-party services catalog + +Revision ID: j7e8f9a0b1c2 +Revises: i6d7e8f9a0b1 +Create Date: 2026-03-19 +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects.postgresql import UUID, JSON +import uuid + +revision = "j7e8f9a0b1c2" +down_revision = "i6d7e8f9a0b1" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.create_table( + "tpsc_catalog", + sa.Column("id", UUID(as_uuid=True), primary_key=True, default=uuid.uuid4), + sa.Column("slug", sa.String(100), nullable=False, unique=True), + sa.Column("name", sa.String(200), nullable=False), + sa.Column("provider", sa.String(200), nullable=True), + sa.Column("category", sa.String(100), nullable=True), + sa.Column("website_url", sa.Text, nullable=True), + sa.Column("pricing_model", sa.String(20), nullable=False, server_default="unknown"), + sa.Column("gdpr_maturity", sa.String(20), nullable=False, server_default="unknown"), + sa.Column("gdpr_notes", sa.Text, nullable=True), + sa.Column("dpa_available", sa.Boolean, nullable=False, server_default="false"), + sa.Column("tos_url", sa.Text, nullable=True), + sa.Column("privacy_policy_url", sa.Text, nullable=True), + sa.Column("data_processing_regions", JSON, nullable=True), + sa.Column("data_retention_notes", sa.Text, nullable=True), + sa.Column("status", sa.String(20), nullable=False, server_default="active"), + sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()), + sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now(), onupdate=sa.func.now()), + ) + op.create_index("ix_tpsc_catalog_slug", "tpsc_catalog", ["slug"]) + op.create_index("ix_tpsc_catalog_gdpr_maturity", "tpsc_catalog", ["gdpr_maturity"]) + + op.create_table( + "tpsc_snapshots", + sa.Column("id", UUID(as_uuid=True), primary_key=True, default=uuid.uuid4), + sa.Column("repo_id", UUID(as_uuid=True), sa.ForeignKey("managed_repos.id", ondelete="SET NULL"), nullable=True), + sa.Column("snapshot_at", sa.DateTime(timezone=True), server_default=sa.func.now()), + sa.Column("source_file", sa.String(200), nullable=True), + sa.Column("entry_count", sa.Integer, nullable=False, server_default="0"), + ) + op.create_index("ix_tpsc_snapshots_repo_id", "tpsc_snapshots", ["repo_id"]) + + op.create_table( + "tpsc_entries", + sa.Column("id", UUID(as_uuid=True), primary_key=True, default=uuid.uuid4), + sa.Column("snapshot_id", UUID(as_uuid=True), sa.ForeignKey("tpsc_snapshots.id", ondelete="CASCADE"), nullable=False), + sa.Column("catalog_id", UUID(as_uuid=True), sa.ForeignKey("tpsc_catalog.id", ondelete="SET NULL"), nullable=True), + sa.Column("service_slug", sa.String(100), nullable=False), + sa.Column("purpose", sa.Text, nullable=True), + sa.Column("auth_type", sa.String(50), nullable=True), + sa.Column("endpoint_override", sa.Text, nullable=True), + sa.Column("notes", sa.Text, nullable=True), + ) + op.create_index("ix_tpsc_entries_snapshot_id", "tpsc_entries", ["snapshot_id"]) + op.create_index("ix_tpsc_entries_service_slug", "tpsc_entries", ["service_slug"]) + + +def downgrade() -> None: + op.drop_table("tpsc_entries") + op.drop_table("tpsc_snapshots") + op.drop_table("tpsc_catalog") diff --git a/state-hub/pyproject.toml b/state-hub/pyproject.toml index bfc7b20..3f351c0 100644 --- a/state-hub/pyproject.toml +++ b/state-hub/pyproject.toml @@ -16,6 +16,7 @@ dependencies = [ "python-dotenv>=1.0.0", "psycopg2-binary>=2.9.0", "llm-connect", + "pyyaml>=6.0.3", ] [project.scripts] diff --git a/state-hub/scripts/ingest_tpsc.py b/state-hub/scripts/ingest_tpsc.py new file mode 100644 index 0000000..36a6802 --- /dev/null +++ b/state-hub/scripts/ingest_tpsc.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python3 +"""Ingest tpsc.yaml service dependency declarations into the State Hub. + +Usage: + uv run python scripts/ingest_tpsc.py --repo [--dry-run] + uv run python scripts/ingest_tpsc.py --all [--dry-run] +""" +import argparse +import json +import sys +import urllib.error +import urllib.request +from pathlib import Path + +try: + import yaml +except ImportError: + import tomllib as _t # noqa — fallback not really viable; yaml is required + yaml = None + +API_BASE = "http://127.0.0.1:8000" +TPSC_FILENAME = "tpsc.yaml" + + +def _get(path: str) -> dict | list: + req = urllib.request.Request(f"{API_BASE}{path}", headers={"Accept": "application/json"}) + with urllib.request.urlopen(req) as r: + return json.loads(r.read()) + + +def _post(path: str, payload: dict) -> dict: + data = json.dumps(payload).encode() + req = urllib.request.Request( + f"{API_BASE}{path}/", + data=data, + headers={"Content-Type": "application/json"}, + method="POST", + ) + try: + with urllib.request.urlopen(req) as r: + return json.loads(r.read()) + except urllib.error.HTTPError as e: + body = e.read().decode() + print(f" ERROR {e.code}: {body}", file=sys.stderr) + raise + + +def _load_yaml(path: Path) -> dict: + if yaml is None: + raise RuntimeError("PyYAML is required: uv add pyyaml") + with open(path) as f: + return yaml.safe_load(f) or {} + + +def _resolve_repo_path(repo: dict) -> str: + import socket + hostname = socket.gethostname() + host_paths = repo.get("host_paths") or {} + candidates = [] + if host_paths.get(hostname): + candidates.append(host_paths[hostname]) + if repo.get("local_path"): + candidates.append(repo["local_path"]) + for raw in candidates: + p = Path(raw).expanduser() + if p.is_dir(): + return str(p) + return "" + + +def ingest_repo(slug: str, dry_run: bool = False) -> bool: + try: + repo = _get(f"/repos/{slug}") + except Exception as e: + print(f" ✗ Repo '{slug}' not found: {e}", file=sys.stderr) + return False + + if isinstance(repo, dict) and repo.get("error"): + print(f" ✗ {repo['error']}", file=sys.stderr) + return False + + repo_path = _resolve_repo_path(repo) + if not repo_path: + print(f" ✗ No accessible local path for '{slug}' on this host.", file=sys.stderr) + return False + + tpsc_file = Path(repo_path) / TPSC_FILENAME + if not tpsc_file.exists(): + print(f" — '{slug}': no {TPSC_FILENAME} found, skipping.") + return True + + data = _load_yaml(tpsc_file) + services = data.get("services", []) + if not services: + print(f" — '{slug}': {TPSC_FILENAME} has no services entries, skipping.") + return True + + entries = [ + { + "service_slug": svc.get("slug", ""), + "purpose": svc.get("purpose"), + "auth_type": svc.get("auth"), + "endpoint_override": svc.get("endpoint"), + "notes": svc.get("notes"), + } + for svc in services + if svc.get("slug") + ] + + print(f" {'[dry-run] ' if dry_run else ''}'{slug}': {len(entries)} service(s) from {TPSC_FILENAME}") + for e in entries: + print(f" • {e['service_slug']} ({e.get('auth_type', '?')}) — {e.get('purpose', '')}") + + if dry_run: + return True + + result = _post("/tpsc/ingest", { + "repo_slug": slug, + "source_file": TPSC_FILENAME, + "entries": entries, + }) + print(f" ✓ Snapshot {result['id'][:8]}… ingested {result['entry_count']} entries") + return True + + +def main() -> None: + parser = argparse.ArgumentParser(description="Ingest tpsc.yaml into State Hub") + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument("--repo", metavar="SLUG", help="Single repo slug") + group.add_argument("--all", action="store_true", help="All registered repos") + parser.add_argument("--dry-run", action="store_true", help="Parse only, do not POST") + args = parser.parse_args() + + if args.all: + repos = _get("/repos/") + slugs = [r["slug"] for r in repos] + else: + slugs = [args.repo] + + ok = all(ingest_repo(slug, dry_run=args.dry_run) for slug in slugs) + sys.exit(0 if ok else 1) + + +if __name__ == "__main__": + main() diff --git a/state-hub/uv.lock b/state-hub/uv.lock index ad8b2e6..228b3d9 100644 --- a/state-hub/uv.lock +++ b/state-hub/uv.lock @@ -145,10 +145,16 @@ sdist = { url = "https://files.pythonhosted.org/packages/92/88/b8527e1b00c1811db wheels = [ { url = "https://files.pythonhosted.org/packages/d3/25/79c98ebe12df31548ba4eaf44db11b7cad6b3e7b4203718335620939083c/caio-0.9.25-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:fb7ff95af4c31ad3f03179149aab61097a71fd85e05f89b4786de0359dffd044", size = 36983 }, { url = "https://files.pythonhosted.org/packages/a3/2b/21288691f16d479945968a0a4f2856818c1c5be56881d51d4dac9b255d26/caio-0.9.25-cp312-cp312-manylinux2010_x86_64.manylinux2014_x86_64.manylinux_2_12_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:97084e4e30dfa598449d874c4d8e0c8d5ea17d2f752ef5e48e150ff9d240cd64", size = 82012 }, + { url = "https://files.pythonhosted.org/packages/03/c4/8a1b580875303500a9c12b9e0af58cb82e47f5bcf888c2457742a138273c/caio-0.9.25-cp312-cp312-manylinux_2_34_aarch64.whl", hash = "sha256:4fa69eba47e0f041b9d4f336e2ad40740681c43e686b18b191b6c5f4c5544bfb", size = 81502 }, + { url = "https://files.pythonhosted.org/packages/d1/1c/0fe770b8ffc8362c48134d1592d653a81a3d8748d764bec33864db36319d/caio-0.9.25-cp312-cp312-manylinux_2_34_x86_64.whl", hash = "sha256:6bebf6f079f1341d19f7386db9b8b1f07e8cc15ae13bfdaff573371ba0575d69", size = 80200 }, { url = "https://files.pythonhosted.org/packages/31/57/5e6ff127e6f62c9f15d989560435c642144aa4210882f9494204bc892305/caio-0.9.25-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:d6c2a3411af97762a2b03840c3cec2f7f728921ff8adda53d7ea2315a8563451", size = 36979 }, { url = "https://files.pythonhosted.org/packages/a3/9f/f21af50e72117eb528c422d4276cbac11fb941b1b812b182e0a9c70d19c5/caio-0.9.25-cp313-cp313-manylinux2010_x86_64.manylinux2014_x86_64.manylinux_2_12_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0998210a4d5cd5cb565b32ccfe4e53d67303f868a76f212e002a8554692870e6", size = 81900 }, + { url = "https://files.pythonhosted.org/packages/9c/12/c39ae2a4037cb10ad5eb3578eb4d5f8c1a2575c62bba675f3406b7ef0824/caio-0.9.25-cp313-cp313-manylinux_2_34_aarch64.whl", hash = "sha256:1a177d4777141b96f175fe2c37a3d96dec7911ed9ad5f02bac38aaa1c936611f", size = 81523 }, + { url = "https://files.pythonhosted.org/packages/22/59/f8f2e950eb4f1a5a3883e198dca514b9d475415cb6cd7b78b9213a0dd45a/caio-0.9.25-cp313-cp313-manylinux_2_34_x86_64.whl", hash = "sha256:9ed3cfb28c0e99fec5e208c934e5c157d0866aa9c32aa4dc5e9b6034af6286b7", size = 80243 }, { url = "https://files.pythonhosted.org/packages/69/ca/a08fdc7efdcc24e6a6131a93c85be1f204d41c58f474c42b0670af8c016b/caio-0.9.25-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:fab6078b9348e883c80a5e14b382e6ad6aabbc4429ca034e76e730cf464269db", size = 36978 }, { url = "https://files.pythonhosted.org/packages/5e/6c/d4d24f65e690213c097174d26eda6831f45f4734d9d036d81790a27e7b78/caio-0.9.25-cp314-cp314-manylinux2010_x86_64.manylinux2014_x86_64.manylinux_2_12_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:44a6b58e52d488c75cfaa5ecaa404b2b41cc965e6c417e03251e868ecd5b6d77", size = 81832 }, + { url = "https://files.pythonhosted.org/packages/87/a4/e534cf7d2d0e8d880e25dd61e8d921ffcfe15bd696734589826f5a2df727/caio-0.9.25-cp314-cp314-manylinux_2_34_aarch64.whl", hash = "sha256:628a630eb7fb22381dd8e3c8ab7f59e854b9c806639811fc3f4310c6bd711d79", size = 81565 }, + { url = "https://files.pythonhosted.org/packages/3f/ed/bf81aeac1d290017e5e5ac3e880fd56ee15e50a6d0353986799d1bc5cfd5/caio-0.9.25-cp314-cp314-manylinux_2_34_x86_64.whl", hash = "sha256:0ba16aa605ccb174665357fc729cf500679c2d94d5f1458a6f0d5ca48f2060a7", size = 80071 }, { url = "https://files.pythonhosted.org/packages/86/93/1f76c8d1bafe3b0614e06b2195784a3765bbf7b0a067661af9e2dd47fc33/caio-0.9.25-py3-none-any.whl", hash = "sha256:06c0bb02d6b929119b1cfbe1ca403c768b2013a369e2db46bfa2a5761cf82e40", size = 19087 }, ] @@ -677,6 +683,9 @@ requires-dist = [ { name = "toml" }, ] +[package.metadata.requires-dev] +dev = [{ name = "pytest", specifier = ">=9.0.2" }] + [[package]] name = "mako" version = "1.3.10" @@ -1424,6 +1433,7 @@ dependencies = [ { name = "pydantic" }, { name = "pydantic-settings" }, { name = "python-dotenv" }, + { name = "pyyaml" }, { name = "sqlalchemy", extra = ["asyncio"] }, { name = "uvicorn", extra = ["standard"] }, ] @@ -1447,6 +1457,7 @@ requires-dist = [ { name = "pydantic", specifier = ">=2.10.0" }, { name = "pydantic-settings", specifier = ">=2.7.0" }, { name = "python-dotenv", specifier = ">=1.0.0" }, + { name = "pyyaml", specifier = ">=6.0.3" }, { name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.0" }, { name = "uvicorn", extras = ["standard"], specifier = ">=0.32.0" }, ] diff --git a/workplans/CUST-WP-0023-tpsc.md b/workplans/CUST-WP-0023-tpsc.md new file mode 100644 index 0000000..999291c --- /dev/null +++ b/workplans/CUST-WP-0023-tpsc.md @@ -0,0 +1,210 @@ +--- +id: CUST-WP-0023 +type: feature +title: Third-Party Services Catalog (TPSC) +domain: custodian +status: done +owner: custodian-agent +topic_slug: custodian +created: 2026-03-19 +updated: 2026-03-20 +state_hub_workstream_id: "23208a99-0ef6-4154-9454-a2f2065b6b19" +--- + +# TPSC — Third-Party Services Catalog + +Track external service dependencies (APIs, SaaS, CLIs) across all repos. +Primary data lives in repos (`tpsc.yaml`) and a canon catalog +(`canon/tpsc/.yaml`). State-hub collects and reports. + +GDPR maturity scale (CNIL/IAPP CMMI-aligned): +`unknown | non_compliant | initial | developing | defined | managed | certified` + +Pricing model: `free | paid | freemium | usage_based | unknown` + +## Task: DB migration — tpsc_catalog, tpsc_snapshots, tpsc_entries + +```task +id: CUST-WP-0023-T01 +status: done +priority: high +state_hub_task_id: "038a0284-bb76-4ce7-8861-1686667acbb5" +``` + +Create Alembic migration `j7e8f9a0b1c2_tpsc.py`. + +Tables: +- `tpsc_catalog`: id, slug (unique), name, provider, category, website_url, + pricing_model, gdpr_maturity, gdpr_notes, dpa_available, tos_url, + privacy_policy_url, data_processing_regions (JSON), data_retention_notes, + status (active/deprecated), created_at, updated_at +- `tpsc_snapshots`: id, repo_id (FK managed_repos nullable), snapshot_at, + source_file, entry_count +- `tpsc_entries`: id, snapshot_id (FK), catalog_id (FK tpsc_catalog nullable), + service_slug, purpose, auth_type, endpoint_override, notes + +--- + +## Task: SQLAlchemy models + +```task +id: CUST-WP-0023-T02 +status: done +priority: high +state_hub_task_id: "990d4e58-35b0-45f0-8de6-8955049aa7d5" +``` + +Create `api/models/tpsc.py` with TPSCCatalog, TPSCSnapshot, TPSCEntry models. +Register in `api/models/__init__.py`. + +--- + +## Task: Pydantic schemas + +```task +id: CUST-WP-0023-T03 +status: done +priority: high +state_hub_task_id: "5feeb161-b654-4e4a-8a6a-bb685c239ce5" +``` + +Create `api/schemas/tpsc.py` with Read/Create schemas for all three models. +Include `GDPRMaturity` and `PricingModel` string enums. +`TPSCCatalog` schema: include `gdpr_warning: bool` computed field +(True when gdpr_maturity in [unknown, non_compliant, initial]). + +--- + +## Task: FastAPI router /tpsc/ + +```task +id: CUST-WP-0023-T04 +status: done +priority: high +state_hub_task_id: "593471b4-cd3a-4251-8c5c-ee42b6a9e089" +``` + +Create `api/routers/tpsc.py`: +- `GET /tpsc/catalog/` — list services (filter: gdpr_maturity, category, pricing_model) +- `GET /tpsc/catalog/{slug}` — single service +- `POST /tpsc/catalog/` — register/upsert service +- `POST /tpsc/ingest/` — accept snapshot + entries for a repo +- `GET /tpsc/snapshots/` — list snapshots (filter: repo_slug) +- `GET /tpsc/report/gdpr` — aggregated GDPR warnings across all repos + +Register in `api/main.py`. + +--- + +## Task: MCP tools + +```task +id: CUST-WP-0023-T05 +status: done +priority: high +state_hub_task_id: "7370d020-06cf-4ebe-9f78-619e41c4b85c" +``` + +Add to `mcp_server/server.py`: +- `register_service(slug, name, provider, pricing_model, gdpr_maturity, ...)` +- `list_services(gdpr_maturity?, category?, pricing_model?)` +- `ingest_tpsc_tool(repo_slug)` — runs ingest_tpsc.py for the repo +- `get_gdpr_report()` — returns warning summary across all repos + +--- + +## Task: Ingest script + +```task +id: CUST-WP-0023-T06 +status: done +priority: high +state_hub_task_id: "5ec305d3-fdfb-4f81-b3ab-1ea57a4ec0c5" +``` + +Create `scripts/ingest_tpsc.py`: +- Reads `tpsc.yaml` from repo root (auto-detected via registered local_path) +- Resolves catalog_id by slug for each entry +- POSTs snapshot + entries to `/tpsc/ingest/` +- `--dry-run` flag +- `--repo SLUG` or `--all` flags + +Add Makefile targets: +``` +make ingest-tpsc REPO= +make ingest-tpsc-all +``` + +--- + +## Task: Canon service catalog seed files + +```task +id: CUST-WP-0023-T07 +status: done +priority: medium +state_hub_task_id: "21c13b4d-585a-4a60-a283-29baa2dd6d7d" +``` + +Create `canon/tpsc/` with YAML files for: +- `openai-api.yaml` +- `anthropic-api.yaml` +- `gemini-api.yaml` +- `openrouter-api.yaml` + +Each file: slug, name, provider, category, pricing_model, gdpr_maturity, +gdpr_notes, dpa_available, tos_url, privacy_policy_url, +data_processing_regions, data_retention_notes. + +--- + +## Task: tpsc.yaml for llm-connect + +```task +id: CUST-WP-0023-T08 +status: done +priority: medium +state_hub_task_id: "d658f81b-7408-456d-b4bd-440b44a67e43" +``` + +Create `/home/worsch/llm-connect/tpsc.yaml` declaring: +openai-api, anthropic-api (via claude_code CLI), gemini-api, openrouter-api. + +--- + +## Task: Dashboard page + +```task +id: CUST-WP-0023-T09 +status: done +priority: medium +state_hub_task_id: "a5367fc4-ef12-4f52-b642-d33b91b7cc2c" +``` + +Create `dashboard/src/tpsc.md`: +- Service catalog table with GDPR maturity color coding + (red: non_compliant/unknown, amber: initial/developing, green: defined+) +- Warning cards for repos using non-green services +- Per-repo breakdown table +- Pricing model summary + +Add to `observablehq.config.js` nav. + +--- + +## Task: Documentation page + +```task +id: CUST-WP-0023-T10 +status: done +priority: low +state_hub_task_id: "6cf1aaec-5c24-4cb5-a1aa-415caeaaca10" +``` + +Create `dashboard/src/docs/tpsc.md` explaining: +- TPSC concept and rationale +- tpsc.yaml format with full example +- GDPR maturity scale definitions (linked to CNIL/IAPP) +- How to add a new service to the canon catalog + +Add to docs nav in `observablehq.config.js`.