feat(WARDEN-WP-0014): T3 — OpenBao proxy lane (--fetch / --exec)

Adds transparent, policy-gated, audited proxy of a non-SSH credential through `warden access`, for exec_capable lanes. Three guardrails in code: - G1 caller identity: runs the owner's tool with the caller's own env; warden injects no token of its own (caller_auth_present check). - G2 transit-only: --fetch inherits stdout (never PIPE) so the value never enters warden's memory or any log; --exec injects into the child env only. Audit (access-audit.log) is metadata-only. - G3 policy gate: check_fetch_policy runs before any fetch; with policy.enabled=false the proxy refuses unless --no-policy is given. resolve_fetch_command refuses unresolved <…> placeholders rather than guess owner-side names. New warden/proxy.py + policy.check_fetch_policy; tests/test_proxy.py asserts all three guardrails. 168 passed, lint clean. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-27 16:26:03 +02:00
parent 830a775bcf
commit 6dfa69e310
5 changed files with 588 additions and 11 deletions
--- a/src/warden/proxy.py
+++ b/src/warden/proxy.py
@@ -0,0 +1,184 @@
+"""Operator access proxy — transparent, audited fetch of a non-SSH credential.
+
+WP-0014 T3. ops-warden does not own these secrets; the proxy lane lets an operator
+obtain one *through* the `warden access` front door while keeping the security model
+intact. Three guardrails are enforced here in code:
+
+* **G1 — caller identity, never warden's.** The proxy runs the owner's tool with the
+  caller's own environment. ops-warden injects no token of its own; if the caller has
+  no credential, the underlying tool fails and we surface the auth pointer. We never
+  add a `*_TOKEN` warden owns to the child environment.
+* **G2 — transit only, no persistence/logging of values.** ``proxy_fetch`` runs the
+  tool with **inherited** stdout/stderr (never a pipe), so the value streams to the
+  caller and never enters warden's memory. ``proxy_exec`` reads the value solely to
+  place it in a child process's environment (the accepted proxy tradeoff) and never
+  writes it to disk or log. The audit record is metadata only.
+* **G3 — policy gate before fetch.** The CLI runs ``check_fetch_policy`` before
+  calling anything here; this module refuses to run an unresolved command template.
+
+This module shells out but never *interprets* secret bytes in the ``--fetch`` path.
+"""
+from __future__ import annotations
+
+import json
+import os
+import re
+import shlex
+import subprocess
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import List, Optional
+
+from warden.routing.models import RouteEntry
+
+_PLACEHOLDER = re.compile(r"<[^>]+>")
+
+
+class ProxyError(Exception):
+    """Raised when a proxy fetch cannot be performed safely."""
+
+
+def resolve_fetch_command(
+    entry: RouteEntry,
+    *,
+    domain: Optional[str] = None,
+    field: Optional[str] = None,
+    path: Optional[str] = None,
+) -> List[str]:
+    """Build the concrete argv for an entry's fetch, or raise if under-specified.
+
+    Starts from the catalog ``fetch_command`` template (with ``<path_template>``
+    inlined), substitutes ``<domain>``/``<FIELD>`` and an explicit ``--path`` override,
+    then **refuses** if any ``<…>`` placeholder remains. We never run a half-templated
+    command — an unresolved placeholder means the operator has not named the owner-side
+    resource, and guessing it is exactly the failure mode we avoid.
+    """
+    if not entry.exec_capable or not entry.fetch_command:
+        raise ProxyError(
+            f"{entry.id!r} is not exec_capable — it has no proxyable fetch command. "
+            "Use `warden access` (advisory) and obtain it from the owner directly."
+        )
+
+    cmd = entry.fetch_command
+    if entry.path_template and "<path_template>" in cmd:
+        cmd = cmd.replace("<path_template>", path or entry.path_template)
+    elif path:
+        # No <path_template> token but caller supplied a path — append/override is
+        # ambiguous, so require the template to carry the token.
+        raise ProxyError(
+            f"{entry.id!r} fetch_command has no <path_template> token to override with --path."
+        )
+
+    if domain:
+        cmd = cmd.replace("<domain>", domain)
+    if field:
+        cmd = cmd.replace("<FIELD>", field)
+
+    leftover = _PLACEHOLDER.findall(cmd)
+    if leftover:
+        raise ProxyError(
+            f"unresolved placeholder(s) {', '.join(sorted(set(leftover)))} in fetch command. "
+            "Supply --domain/--field (and --path for owner-side names) — warden will not "
+            "guess owner-confirmed resource names."
+        )
+    return shlex.split(cmd)
+
+
+def caller_auth_present(token_envs: tuple[str, ...] = ("VAULT_TOKEN", "BAO_TOKEN")) -> bool:
+    """True if the *caller* appears to hold an auth token (G1 sanity check).
+
+    Best-effort: also accepts a ``~/.vault-token`` file. We do not validate it — the
+    owner's tool does that — we only avoid proxying when the caller clearly has no
+    credential, so the failure is a clear auth pointer rather than a confusing tool error.
+    """
+    if any(os.environ.get(e, "").strip() for e in token_envs):
+        return True
+    return (Path.home() / ".vault-token").exists()
+
+
+def write_audit(
+    state_dir: Path,
+    *,
+    need_id: str,
+    owner_repo: str,
+    domain: Optional[str],
+    action: str,
+    decision_id: Optional[str],
+    exit_code: Optional[int] = None,
+) -> Path:
+    """Append a metadata-only audit record. Never contains a secret value (G2)."""
+    state_dir.mkdir(parents=True, exist_ok=True)
+    log_path = state_dir / "access-audit.log"
+    record = {
+        "timestamp": datetime.now(timezone.utc).isoformat(),
+        "action": action,  # "fetch" | "exec"
+        "need_id": need_id,
+        "owner_repo": owner_repo,
+        "domain": domain,
+        "subject": os.environ.get("WARDEN_POLICY_SUBJECT", "").strip() or "operator",
+        "policy_decision_id": decision_id,
+        "exit_code": exit_code,
+    }
+    with log_path.open("a") as f:
+        f.write(json.dumps(record) + "\n")
+    return log_path
+
+
+def _caller_env() -> dict:
+    """The child environment = the caller's own env. warden adds no credential (G1)."""
+    return dict(os.environ)
+
+
+def proxy_fetch(argv: List[str]) -> int:
+    """Run the owner's tool, streaming its output straight to the caller.
+
+    stdout/stderr are **inherited** (``None``), never piped — the secret value flows
+    subsystem → caller and is never read into warden's memory, buffer, or log (G2).
+    Returns the tool's exit code.
+    """
+    completed = subprocess.run(  # noqa: S603 — argv is shlex-split from a validated template
+        argv,
+        stdout=None,
+        stderr=None,
+        stdin=None,
+        env=_caller_env(),
+        check=False,
+    )
+    return completed.returncode
+
+
+def proxy_exec(argv: List[str], *, env_var: str, child_argv: List[str]) -> int:
+    """Fetch the value and inject it into a child command's environment only.
+
+    The value transits warden's memory here (the accepted proxy tradeoff for `--exec`)
+    but is never written to disk or log and never enters the caller's own shell env.
+    Captures the fetch tool's stdout to obtain the value, strips a single trailing
+    newline, and runs ``child_argv`` with ``env_var`` set in its environment.
+    """
+    if not env_var:
+        raise ProxyError("--exec requires --field (the env var name to inject), e.g. NPM_AUTH_TOKEN")
+
+    fetched = subprocess.run(  # noqa: S603
+        argv, stdout=subprocess.PIPE, stderr=None, stdin=None,
+        env=_caller_env(), check=False, text=True,
+    )
+    if fetched.returncode != 0:
+        raise ProxyError(
+            f"fetch failed (exit {fetched.returncode}) — check caller auth and the path."
+        )
+
+    value = fetched.stdout
+    if value.endswith("\n"):
+        value = value[:-1]
+
+    child_env = _caller_env()
+    child_env[env_var] = value
+    try:
+        child = subprocess.run(  # noqa: S603
+            child_argv, stdout=None, stderr=None, stdin=None, env=child_env, check=False
+        )
+        return child.returncode
+    finally:
+        # Best-effort scrub of the local reference; do not log it.
+        value = ""  # noqa: F841
+        del child_env[env_var]