feat(WARDEN-WP-0014): T3 — OpenBao proxy lane (--fetch / --exec)

Adds transparent, policy-gated, audited proxy of a non-SSH credential through `warden access`, for exec_capable lanes. Three guardrails in code: - G1 caller identity: runs the owner's tool with the caller's own env; warden injects no token of its own (caller_auth_present check). - G2 transit-only: --fetch inherits stdout (never PIPE) so the value never enters warden's memory or any log; --exec injects into the child env only. Audit (access-audit.log) is metadata-only. - G3 policy gate: check_fetch_policy runs before any fetch; with policy.enabled=false the proxy refuses unless --no-policy is given. resolve_fetch_command refuses unresolved <…> placeholders rather than guess owner-side names. New warden/proxy.py + policy.check_fetch_policy; tests/test_proxy.py asserts all three guardrails. 168 passed, lint clean. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-27 16:26:03 +02:00
parent 830a775bcf
commit 6dfa69e310
5 changed files with 588 additions and 11 deletions
--- a/src/warden/cli.py
+++ b/src/warden/cli.py
@@ -737,8 +737,121 @@ def _access_json(entry, expanded, gate: str, domain: Optional[str]) -> dict:
    return payload


-@app.command("access")
+def _access_proxy(
+    entry,
+    *,
+    domain: Optional[str],
+    field: Optional[str],
+    path: Optional[str],
+    do_exec: bool,
+    child_argv: list,
+    no_policy: bool,
+) -> None:
+    """Proxy a non-SSH credential fetch as the caller (WP-0014 T3).
+
+    Enforces the three guardrails: caller identity (no warden token), policy gate
+    before fetch, and transit-only (no value persisted or logged). All warden chatter
+    goes to stderr so --fetch stdout carries only the secret.
+    """
+    from warden.proxy import (
+        ProxyError,
+        caller_auth_present,
+        proxy_exec,
+        proxy_fetch,
+        resolve_fetch_command,
+        write_audit,
+    )
+    from warden.policy import check_fetch_policy
+
+    if not entry.exec_capable:
+        err.print(
+            f"[red]{entry.id!r} is not exec_capable.[/red] "
+            "Use `warden access` (advisory) and obtain it from the owner directly."
+        )
+        raise typer.Exit(2)
+
+    # Proxy is privileged — require a real config for policy posture + audit sink.
+    try:
+        cfg = load_config()
+    except ConfigError as e:
+        err.print(
+            f"[red]Proxy requires warden.yaml[/red] (policy gate + audit sink): {e}\n"
+            "Advisory mode works without it: drop --fetch/--exec."
+        )
+        raise typer.Exit(2)
+
+    # G1 — caller identity. ops-warden adds no token of its own.
+    if not caller_auth_present():
+        err.print(
+            "[red]No caller credential found[/red] (VAULT_TOKEN/BAO_TOKEN or ~/.vault-token). "
+            f"Authenticate first: {entry.auth_method or 'see the owner auth path'}."
+        )
+        raise typer.Exit(3)
+
+    # G3 — policy gate before fetch.
+    decision_id = None
+    if cfg.policy.enabled:
+        try:
+            decision_id = check_fetch_policy(
+                cfg.policy, need_id=entry.id, owner_repo=entry.owner_repo, domain=domain
+            )
+        except CAError as e:
+            err.print(f"[red]Policy gate denied the fetch:[/red] {e}")
+            raise typer.Exit(4)
+        err.print(f"[green]flex-auth allow[/green] (decision {decision_id}).")
+    elif not no_policy:
+        err.print(
+            "[yellow]flex-auth gate is not enforced[/yellow] (policy.enabled=false). "
+            "Re-run with [bold]--no-policy[/bold] to proxy ungated, or enable the gate."
+        )
+        raise typer.Exit(4)
+    else:
+        err.print("[yellow]Proxying ungated[/yellow] (--no-policy; gate not enforced).")
+
+    try:
+        argv = resolve_fetch_command(entry, domain=domain, field=field, path=path)
+    except ProxyError as e:
+        err.print(f"[red]{e}[/red]")
+        raise typer.Exit(2)
+
+    action = "exec" if do_exec else "fetch"
+    err.print(
+        f"[dim]proxy {action}: {entry.id} → {entry.owner_repo} "
+        f"(caller identity; value not persisted)[/dim]"
+    )
+    try:
+        if do_exec:
+            if not child_argv:
+                err.print("[red]--exec needs a command after `--`[/red], e.g. `-- npm publish`.")
+                raise typer.Exit(2)
+            rc = proxy_exec(argv, env_var=field or "", child_argv=child_argv)
+        else:
+            rc = proxy_fetch(argv)
+    except ProxyError as e:
+        err.print(f"[red]{e}[/red]")
+        raise typer.Exit(5)
+    finally:
+        try:
+            write_audit(
+                cfg.state_dir,
+                need_id=entry.id,
+                owner_repo=entry.owner_repo,
+                domain=domain,
+                action=action,
+                decision_id=decision_id,
+            )
+        except OSError as e:
+            err.print(f"[yellow]audit write failed:[/yellow] {e}")
+
+    raise typer.Exit(rc)
+
+
+@app.command(
+    "access",
+    context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
+)
 def access(
+    ctx: typer.Context,
    need: Annotated[str, typer.Argument(help="Free-text need, e.g. 'npm token', 'db password'")],
    domain: Annotated[
        Optional[str],
@@ -746,13 +859,34 @@ def access(
    ] = None,
    output_json: Annotated[bool, typer.Option("--json", help="Output JSON (stable, secret-free)")] = False,
    all_entries: Annotated[bool, typer.Option("--all", help="Include draft entries")] = False,
+    do_fetch: Annotated[
+        bool, typer.Option("--fetch", help="Proxy the fetch as the caller; value streams to stdout")
+    ] = False,
+    do_exec: Annotated[
+        bool,
+        typer.Option("--exec", help="Run the trailing command (after --) with the secret in its env"),
+    ] = False,
+    field: Annotated[
+        Optional[str], typer.Option("--field", help="Secret field / env-var name, e.g. NPM_AUTH_TOKEN")
+    ] = None,
+    path: Annotated[
+        Optional[str], typer.Option("--path", help="Override the owner-side path template")
+    ] = None,
+    no_policy: Annotated[
+        bool,
+        typer.Option("--no-policy", help="Acknowledge proxying when the flex-auth gate is not enforced"),
+    ] = False,
 ) -> None:
    """Operator front door: how to obtain any credential, gated and audited.

    Advisory by default — renders the owner, auth method, path template, command
    skeleton, and policy gate status for the best-matching need. ops-warden issues
    the SSH lane directly and **routes every other need to its owner** — it never
-    holds or vends the secret value. (Proxy fetch arrives in WP-0014 T3.)
+    holds or vends the secret value.
+
+    With --fetch / --exec it proxies the fetch *as the caller* for exec_capable lanes:
+    the flex-auth gate runs first, ops-warden adds no credential of its own, the value
+    is never persisted or logged, and only metadata is audited.
    """
    from warden.access import expand_handoff, policy_gate_status

@@ -766,6 +900,19 @@ def access(
        raise typer.Exit(1)

    entry = matches[0]
+
+    if do_fetch or do_exec:
+        _access_proxy(
+            entry,
+            domain=domain,
+            field=field,
+            path=path,
+            do_exec=do_exec,
+            child_argv=list(ctx.args),
+            no_policy=no_policy,
+        )
+        return
+
    expanded = expand_handoff(entry, domain)
    gate = policy_gate_status()

--- a/src/warden/policy.py
+++ b/src/warden/policy.py
@@ -88,6 +88,64 @@ def check_sign_policy(cfg: PolicyConfig, spec: CertSpec) -> str | None:
        reason = decision.get("reason") or "no reason provided"
        raise CAError(f"flex-auth denied SSH sign for {spec.actor_name!r}: {reason}")

+    if not decision_id:
+        raise CAError("flex-auth allow decision missing id")
+    return str(decision_id)
+
+
+def check_fetch_policy(
+    cfg: PolicyConfig, *, need_id: str, owner_repo: str, domain: str | None
+) -> str | None:
+    """Call flex-auth /v1/check before proxying a non-SSH credential fetch (WP-0014).
+
+    The action is ``read`` on a ``secret`` resource owned by another subsystem —
+    ops-warden is the conduit, not the owner. Returns the decision id on allow,
+    None when policy is disabled, and raises CAError on deny (or on an unreachable
+    flex-auth when fail_closed). No secret value is ever part of this request.
+    """
+    if not cfg.enabled:
+        return None
+
+    subject_id = os.environ.get(cfg.subject_env, "").strip() or "operator"
+    request = {
+        "subject": {"id": subject_id, "type": "operator", "tenant": cfg.tenant},
+        "action": "read",
+        "resource": {
+            "id": f"secret:{need_id}" + (f"/{domain}" if domain else ""),
+            "type": "secret",
+            "system": owner_repo,
+            "tenant": cfg.tenant,
+        },
+        "context": {"need_id": need_id, "owner_repo": owner_repo, "domain": domain},
+    }
+
+    url = cfg.flex_auth_url.rstrip("/") + "/v1/check"
+    try:
+        response = httpx.post(url, json=request, timeout=10.0)
+        response.raise_for_status()
+    except httpx.HTTPStatusError as e:
+        if cfg.fail_closed:
+            raise CAError(
+                f"flex-auth denied or rejected fetch policy check (HTTP {e.response.status_code})"
+            ) from e
+        return None
+    except httpx.RequestError as e:
+        if cfg.fail_closed:
+            raise CAError(
+                f"flex-auth unreachable at {cfg.flex_auth_url!r} (fail_closed=true): {e}"
+            ) from e
+        return None
+
+    try:
+        decision = response.json()
+    except ValueError as e:
+        raise CAError("flex-auth returned non-JSON decision") from e
+
+    effect = str(decision.get("effect", "")).lower()
+    decision_id = decision.get("id") or decision.get("request_id")
+    if effect != "allow":
+        reason = decision.get("reason") or "no reason provided"
+        raise CAError(f"flex-auth denied secret read for {need_id!r}: {reason}")
    if not decision_id:
        raise CAError("flex-auth allow decision missing id")
    return str(decision_id)
--- a/src/warden/proxy.py
+++ b/src/warden/proxy.py
@@ -0,0 +1,184 @@
+"""Operator access proxy — transparent, audited fetch of a non-SSH credential.
+
+WP-0014 T3. ops-warden does not own these secrets; the proxy lane lets an operator
+obtain one *through* the `warden access` front door while keeping the security model
+intact. Three guardrails are enforced here in code:
+
+* **G1 — caller identity, never warden's.** The proxy runs the owner's tool with the
+  caller's own environment. ops-warden injects no token of its own; if the caller has
+  no credential, the underlying tool fails and we surface the auth pointer. We never
+  add a `*_TOKEN` warden owns to the child environment.
+* **G2 — transit only, no persistence/logging of values.** ``proxy_fetch`` runs the
+  tool with **inherited** stdout/stderr (never a pipe), so the value streams to the
+  caller and never enters warden's memory. ``proxy_exec`` reads the value solely to
+  place it in a child process's environment (the accepted proxy tradeoff) and never
+  writes it to disk or log. The audit record is metadata only.
+* **G3 — policy gate before fetch.** The CLI runs ``check_fetch_policy`` before
+  calling anything here; this module refuses to run an unresolved command template.
+
+This module shells out but never *interprets* secret bytes in the ``--fetch`` path.
+"""
+from __future__ import annotations
+
+import json
+import os
+import re
+import shlex
+import subprocess
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import List, Optional
+
+from warden.routing.models import RouteEntry
+
+_PLACEHOLDER = re.compile(r"<[^>]+>")
+
+
+class ProxyError(Exception):
+    """Raised when a proxy fetch cannot be performed safely."""
+
+
+def resolve_fetch_command(
+    entry: RouteEntry,
+    *,
+    domain: Optional[str] = None,
+    field: Optional[str] = None,
+    path: Optional[str] = None,
+) -> List[str]:
+    """Build the concrete argv for an entry's fetch, or raise if under-specified.
+
+    Starts from the catalog ``fetch_command`` template (with ``<path_template>``
+    inlined), substitutes ``<domain>``/``<FIELD>`` and an explicit ``--path`` override,
+    then **refuses** if any ``<…>`` placeholder remains. We never run a half-templated
+    command — an unresolved placeholder means the operator has not named the owner-side
+    resource, and guessing it is exactly the failure mode we avoid.
+    """
+    if not entry.exec_capable or not entry.fetch_command:
+        raise ProxyError(
+            f"{entry.id!r} is not exec_capable — it has no proxyable fetch command. "
+            "Use `warden access` (advisory) and obtain it from the owner directly."
+        )
+
+    cmd = entry.fetch_command
+    if entry.path_template and "<path_template>" in cmd:
+        cmd = cmd.replace("<path_template>", path or entry.path_template)
+    elif path:
+        # No <path_template> token but caller supplied a path — append/override is
+        # ambiguous, so require the template to carry the token.
+        raise ProxyError(
+            f"{entry.id!r} fetch_command has no <path_template> token to override with --path."
+        )
+
+    if domain:
+        cmd = cmd.replace("<domain>", domain)
+    if field:
+        cmd = cmd.replace("<FIELD>", field)
+
+    leftover = _PLACEHOLDER.findall(cmd)
+    if leftover:
+        raise ProxyError(
+            f"unresolved placeholder(s) {', '.join(sorted(set(leftover)))} in fetch command. "
+            "Supply --domain/--field (and --path for owner-side names) — warden will not "
+            "guess owner-confirmed resource names."
+        )
+    return shlex.split(cmd)
+
+
+def caller_auth_present(token_envs: tuple[str, ...] = ("VAULT_TOKEN", "BAO_TOKEN")) -> bool:
+    """True if the *caller* appears to hold an auth token (G1 sanity check).
+
+    Best-effort: also accepts a ``~/.vault-token`` file. We do not validate it — the
+    owner's tool does that — we only avoid proxying when the caller clearly has no
+    credential, so the failure is a clear auth pointer rather than a confusing tool error.
+    """
+    if any(os.environ.get(e, "").strip() for e in token_envs):
+        return True
+    return (Path.home() / ".vault-token").exists()
+
+
+def write_audit(
+    state_dir: Path,
+    *,
+    need_id: str,
+    owner_repo: str,
+    domain: Optional[str],
+    action: str,
+    decision_id: Optional[str],
+    exit_code: Optional[int] = None,
+) -> Path:
+    """Append a metadata-only audit record. Never contains a secret value (G2)."""
+    state_dir.mkdir(parents=True, exist_ok=True)
+    log_path = state_dir / "access-audit.log"
+    record = {
+        "timestamp": datetime.now(timezone.utc).isoformat(),
+        "action": action,  # "fetch" | "exec"
+        "need_id": need_id,
+        "owner_repo": owner_repo,
+        "domain": domain,
+        "subject": os.environ.get("WARDEN_POLICY_SUBJECT", "").strip() or "operator",
+        "policy_decision_id": decision_id,
+        "exit_code": exit_code,
+    }
+    with log_path.open("a") as f:
+        f.write(json.dumps(record) + "\n")
+    return log_path
+
+
+def _caller_env() -> dict:
+    """The child environment = the caller's own env. warden adds no credential (G1)."""
+    return dict(os.environ)
+
+
+def proxy_fetch(argv: List[str]) -> int:
+    """Run the owner's tool, streaming its output straight to the caller.
+
+    stdout/stderr are **inherited** (``None``), never piped — the secret value flows
+    subsystem → caller and is never read into warden's memory, buffer, or log (G2).
+    Returns the tool's exit code.
+    """
+    completed = subprocess.run(  # noqa: S603 — argv is shlex-split from a validated template
+        argv,
+        stdout=None,
+        stderr=None,
+        stdin=None,
+        env=_caller_env(),
+        check=False,
+    )
+    return completed.returncode
+
+
+def proxy_exec(argv: List[str], *, env_var: str, child_argv: List[str]) -> int:
+    """Fetch the value and inject it into a child command's environment only.
+
+    The value transits warden's memory here (the accepted proxy tradeoff for `--exec`)
+    but is never written to disk or log and never enters the caller's own shell env.
+    Captures the fetch tool's stdout to obtain the value, strips a single trailing
+    newline, and runs ``child_argv`` with ``env_var`` set in its environment.
+    """
+    if not env_var:
+        raise ProxyError("--exec requires --field (the env var name to inject), e.g. NPM_AUTH_TOKEN")
+
+    fetched = subprocess.run(  # noqa: S603
+        argv, stdout=subprocess.PIPE, stderr=None, stdin=None,
+        env=_caller_env(), check=False, text=True,
+    )
+    if fetched.returncode != 0:
+        raise ProxyError(
+            f"fetch failed (exit {fetched.returncode}) — check caller auth and the path."
+        )
+
+    value = fetched.stdout
+    if value.endswith("\n"):
+        value = value[:-1]
+
+    child_env = _caller_env()
+    child_env[env_var] = value
+    try:
+        child = subprocess.run(  # noqa: S603
+            child_argv, stdout=None, stderr=None, stdin=None, env=child_env, check=False
+        )
+        return child.returncode
+    finally:
+        # Best-effort scrub of the local reference; do not log it.
+        value = ""  # noqa: F841
+        del child_env[env_var]