feat(WARDEN-WP-0014): T1 — structured handoff fields in routing catalog

Adds optional assist-layer fields (auth_method, path_template, fetch_command, exec_capable, policy_ref) to RouteEntry, parsed and secret-screened in catalog.py. Handoff fields are templates/pointers only — _assert_no_secret_material rejects known token prefixes and high-entropy runs, and exec_capable requires a fetch_command. The openbao-api-key entry is populated as the reference example (covers the coulomb_social npm shape). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-27 16:00:56 +02:00
parent 18b2a42463
commit 1f7970ad9b
5 changed files with 165 additions and 11 deletions
--- a/src/warden/routing/catalog.py
+++ b/src/warden/routing/catalog.py
@@ -14,6 +14,7 @@ never restates another subsystem's procedure.
 from __future__ import annotations

 import os
+import re
 from dataclasses import dataclass
 from datetime import date
 from pathlib import Path
@@ -23,6 +24,24 @@ import yaml

 from warden.routing.models import RouteEntry

+# Structured handoff string fields (WP-0014) — templates and pointers only.
+# Every one is scanned for accidental secret material; see _assert_no_secret_material.
+_HANDOFF_STR_FIELDS = ("auth_method", "path_template", "fetch_command", "policy_ref")
+
+# Known secret-bearing token prefixes — a literal here means a value leaked into
+# the catalog (which is git-tracked and agent-visible). Templates use `<...>`.
+_SECRET_PREFIXES = (
+    "ghp_", "gho_", "ghs_", "github_pat_",  # GitHub
+    "sk-", "sk_live_", "sk_test_",           # OpenAI / Stripe
+    "xoxb-", "xoxp-",                          # Slack
+    "AKIA", "ASIA",                            # AWS access key ids
+    "hvs.", "hvb.", "s.",                      # Vault/OpenBao service tokens
+    "AIza",                                    # Google
+    "eyJ",                                     # JWT
+)
+# A long unbroken high-entropy run that is not a placeholder — likely a raw value.
+_HIGH_ENTROPY_RUN = re.compile(r"[A-Za-z0-9_\-]{32,}")
+
 _REQUIRED_FIELDS = (
    "id",
    "title",
@@ -125,6 +144,35 @@ class Catalog:
        ]


+def _assert_no_secret_material(entry_id: str, field_name: str, value: str) -> None:
+    """Reject a handoff field that appears to embed a literal secret value.
+
+    The structured handoff fields are command/path *templates*: concrete values
+    must be placeholders (`<...>`) or field names, never a real credential. The
+    catalog is git-tracked and agent-visible, so a leaked value here is the exact
+    custody failure WP-0014 forbids. We screen for known token prefixes and for a
+    long high-entropy run that is not a placeholder.
+    """
+    lowered = value.lower()
+    for prefix in _SECRET_PREFIXES:
+        if prefix.lower() in lowered:
+            raise CatalogError(
+                f"entry {entry_id!r} field {field_name!r} appears to contain a literal "
+                f"secret (matched {prefix!r}). Handoff fields are templates — use "
+                "placeholders like <FIELD>/<PATH>, never a real value."
+            )
+    for run in _HIGH_ENTROPY_RUN.findall(value):
+        # Allow long placeholder/path/identifier tokens; flag anything else.
+        if "<" in run or ">" in run:
+            continue
+        if run.replace("_", "").replace("-", "").isalpha():
+            continue  # all-letters run (e.g. a long word) — not a credential
+        raise CatalogError(
+            f"entry {entry_id!r} field {field_name!r} contains a high-entropy token "
+            f"({run[:8]}…) that is not a placeholder — suspected leaked secret value."
+        )
+
+
 def _parse_entry(raw: dict, index: int) -> RouteEntry:
    if not isinstance(raw, dict):
        raise CatalogError(f"entry #{index} is not a mapping")
@@ -159,8 +207,28 @@ def _parse_entry(raw: dict, index: int) -> RouteEntry:
    if not isinstance(raw["need_keywords"], list):
        raise CatalogError(f"entry {raw['id']!r} need_keywords must be a list")

+    # Structured handoff fields (WP-0014) — optional, screened for secret material.
+    entry_id = str(raw["id"])
+    handoff: dict[str, Optional[str]] = {}
+    for fname in _HANDOFF_STR_FIELDS:
+        val = raw.get(fname)
+        if val is None or val == "":
+            handoff[fname] = None
+            continue
+        sval = str(val)
+        _assert_no_secret_material(entry_id, fname, sval)
+        handoff[fname] = sval
+
+    exec_capable = bool(raw.get("exec_capable", False))
+    # A lane cannot be proxy-executable without a fetch_command to run.
+    if exec_capable and not handoff["fetch_command"]:
+        raise CatalogError(
+            f"entry {entry_id!r} sets exec_capable: true but has no fetch_command — "
+            "a proxyable lane must declare the command warden runs as the caller."
+        )
+
    return RouteEntry(
-        id=str(raw["id"]),
+        id=entry_id,
        title=str(raw["title"]),
        need_keywords=[str(k) for k in raw["need_keywords"]],
        owner_repo=str(raw["owner_repo"]),
@@ -172,6 +240,11 @@ def _parse_entry(raw: dict, index: int) -> RouteEntry:
        status=status,
        steps=[str(s) for s in steps],
        cert_command=str(cert_command) if cert_command else None,
+        auth_method=handoff["auth_method"],
+        path_template=handoff["path_template"],
+        fetch_command=handoff["fetch_command"],
+        exec_capable=exec_capable,
+        policy_ref=handoff["policy_ref"],
    )


--- a/src/warden/routing/models.py
+++ b/src/warden/routing/models.py
@@ -26,11 +26,26 @@ class RouteEntry:
    # SSH lane only — None/empty for routed (non-executed) needs.
    steps: List[str] = field(default_factory=list)
    cert_command: Optional[str] = None
+    # Structured handoff (WP-0014) — optional, allowed on any lane. These are
+    # *templates and pointers* the `warden access` assist layer renders (and, for
+    # exec_capable lanes, proxies). They are NOT authored procedure prose and they
+    # never carry a secret value — only placeholders (`<...>`) and field names.
+    # Validation in catalog.py enforces the no-secret-material rule on every one.
+    auth_method: Optional[str] = None       # how the caller authenticates to the owner
+    path_template: Optional[str] = None      # owner-side path with `<...>` placeholders
+    fetch_command: Optional[str] = None      # command skeleton run *as the caller*
+    exec_capable: bool = False               # may `warden access --fetch/--exec` proxy it
+    policy_ref: Optional[str] = None         # flex-auth check the fetch path runs first

    @property
    def is_active(self) -> bool:
        return self.status == "active"

+    @property
+    def has_handoff(self) -> bool:
+        """True when structured assist fields are present (advisory richness)."""
+        return any((self.auth_method, self.path_template, self.fetch_command))
+
    def match_score(self, tokens: List[str]) -> int:
        """Keyword-overlap score against need_keywords, title, and id.