state-hub/scripts/capture_sbom_tools.py

#!/usr/bin/env python3
"""Invoke the SBOM capture agent to generate/update sbom-tools.yaml for a repo.

Usage:
    python capture_sbom_tools.py --repo <slug> [--repo-path <path>] [--dry-run]

The script:
1. Resolves repo path from the state-hub API (if --repo-path is not given)
2. Loads the agent prompt from prompts/sbom-capture-agent.md
3. Substitutes {repo_slug}, {repo_path}, {date} placeholders
4. Invokes `claude -p "<prompt>"` non-interactively
5. Extracts the YAML block from the response
6. Writes (or shows diff of) sbom-tools.yaml in the repo root

Requirements:
  - `claude` CLI must be on PATH (Claude Code)
  - PyYAML must be available in the active venv
"""
from __future__ import annotations

import argparse
import datetime
import difflib
import json
import os
import re
import subprocess
import sys
import urllib.error
import urllib.request
from pathlib import Path

API_BASE = os.environ.get("API_BASE", "http://127.0.0.1:8000").rstrip("/")
SCRIPT_DIR = Path(__file__).parent
PROMPT_FILE = SCRIPT_DIR.parent / "prompts" / "sbom-capture-agent.md"


def resolve_repo_path(repo_slug: str) -> Path | None:
    """Look up the registered path for a repo slug via the state-hub API."""
    url = f"{API_BASE}/repos/{repo_slug}/"
    try:
        with urllib.request.urlopen(url, timeout=10) as resp:
            data = json.loads(resp.read())
            path_str = data.get("local_path")
            if path_str:
                return Path(path_str)
    except (urllib.error.URLError, KeyError):
        pass
    return None


def load_prompt(repo_slug: str, repo_path: Path) -> str:
    if not PROMPT_FILE.exists():
        print(f"Error: prompt file not found at {PROMPT_FILE}", file=sys.stderr)
        sys.exit(1)
    template = PROMPT_FILE.read_text()
    today = datetime.date.today().isoformat()
    return (
        template
        .replace("{repo_slug}", repo_slug)
        .replace("{repo_path}", str(repo_path))
        .replace("{date}", today)
    )


def invoke_agent(prompt: str) -> str:
    """Run `claude -p <prompt>` and return stdout."""
    try:
        result = subprocess.run(
            ["claude", "-p", prompt],
            capture_output=True,
            text=True,
            timeout=120,
        )
    except FileNotFoundError:
        print("Error: `claude` CLI not found on PATH. Install Claude Code.", file=sys.stderr)
        sys.exit(1)
    except subprocess.TimeoutExpired:
        print("Error: claude invocation timed out after 120s.", file=sys.stderr)
        sys.exit(1)

    if result.returncode != 0:
        print(f"Error: claude exited with code {result.returncode}", file=sys.stderr)
        if result.stderr:
            print(result.stderr, file=sys.stderr)
        sys.exit(1)

    return result.stdout


def extract_yaml(response: str) -> str:
    """Extract YAML content from the agent response.

    Accepts:
    - Raw YAML (starts with # or 'tools:')
    - YAML wrapped in ```yaml ... ``` fences
    """
    # Try fenced block first
    m = re.search(r"```(?:yaml)?\s*\n(.*?)```", response, re.DOTALL)
    if m:
        return m.group(1).strip()

    # Otherwise treat entire response as YAML
    stripped = response.strip()
    if stripped.startswith("#") or stripped.startswith("tools:"):
        return stripped

    print("Warning: could not extract YAML from agent response.", file=sys.stderr)
    print("Raw response:", file=sys.stderr)
    print(response[:500], file=sys.stderr)
    sys.exit(1)


def show_diff(old: str | None, new: str, target: Path) -> None:
    if old is None:
        print(f"[new file] {target}")
        for line in new.splitlines():
            print(f"  + {line}")
    else:
        diff = list(difflib.unified_diff(
            old.splitlines(keepends=True),
            new.splitlines(keepends=True),
            fromfile=f"a/{target.name}",
            tofile=f"b/{target.name}",
        ))
        if diff:
            print("".join(diff))
        else:
            print(f"[no changes] {target}")


def main() -> None:
    parser = argparse.ArgumentParser(
        description="Generate/update sbom-tools.yaml for a repo using the SBOM capture agent."
    )
    parser.add_argument("--repo", required=True, help="Repo slug (e.g. 'railiance-infra')")
    parser.add_argument("--repo-path", help="Path to repo root (auto-resolved from state-hub if omitted)")
    parser.add_argument("--dry-run", action="store_true",
                        help="Show prompt and diff without writing sbom-tools.yaml")
    parser.add_argument("--print-prompt", action="store_true",
                        help="Print the rendered prompt and exit (useful for inspection)")
    args = parser.parse_args()

    # Resolve repo path
    if args.repo_path:
        repo_path = Path(args.repo_path).resolve()
    else:
        repo_path = resolve_repo_path(args.repo)
        if repo_path is None:
            # Fall back to ~/repo_slug convention
            repo_path = Path.home() / args.repo
            print(f"Could not resolve path from API; trying {repo_path}", file=sys.stderr)

    if not repo_path.exists():
        print(f"Error: repo path does not exist: {repo_path}", file=sys.stderr)
        sys.exit(1)

    target = repo_path / "sbom-tools.yaml"
    existing_content = target.read_text() if target.exists() else None

    prompt = load_prompt(args.repo, repo_path)

    if args.print_prompt:
        print(prompt)
        return

    print(f"Running SBOM capture agent for {args.repo} ({repo_path})…")
    response = invoke_agent(prompt)
    yaml_content = extract_yaml(response)

    # Ensure trailing newline
    if not yaml_content.endswith("\n"):
        yaml_content += "\n"

    show_diff(existing_content, yaml_content, target)

    if args.dry_run:
        print("\n[dry-run] sbom-tools.yaml not written.")
        return

    target.write_text(yaml_content)
    print(f"\nWritten: {target}")
    print("Review the file, correct any 'confidence: low' entries, then commit.")


if __name__ == "__main__":
    main()