feat(custodian): add ADR-001 compliance validator

Scripts, Makefile target, and MCP tool for checking a repository against ADR-001 (workplans as repo artefacts, state-hub as cache). Checks performed: File-side: workplans/ dir exists, valid YAML frontmatter (required fields, type, status, id format), filename matches id, embedded task blocks have id/status/priority. State-hub cross-reference: state_hub_workstream_id references resolve to real DB records; orphan detection flags active DB workstreams with no backing workplan file. Usage: make validate-adr REPO=<path> [DOMAIN=<slug>] validate_repo_adr(repo_path, domain_slug?) # MCP tool Running against the-custodian itself correctly surfaces the 4 pre-ADR-001 workstreams that still need workplan files written. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-28 12:00:09 +01:00
parent 0546a1bb2a
commit c3efb099f1
4 changed files with 548 additions and 1 deletions
--- a/7
+++ b/7
@@ -1,4 +1,4 @@
-.PHONY: install install-cli db db-tools migrate seed api dashboard check start clean register-project
+.PHONY: install install-cli db db-tools migrate seed api dashboard check start clean register-project validate-adr
 COMPOSE = docker compose -f infra/docker-compose.yml --env-file .env
@@ -45,5 +45,10 @@ register-project:
 	@test -n "$(PROJECT_PATH)" || (echo "ERROR: PROJECT_PATH is required."; exit 1)
 	scripts/register_project.sh "$(DOMAIN)" "$(PROJECT_PATH)"
 ## Check a repo for ADR-001 compliance: make validate-adr REPO=/path/to/repo [DOMAIN=custodian]
 validate-adr:
 	@test -n "$(REPO)" || (echo "ERROR: REPO is required. Usage: make validate-adr REPO=<path> [DOMAIN=<slug>]"; exit 1)
 	uv run python scripts/validate_repo_adr.py "$(REPO)" $(if $(DOMAIN),--domain "$(DOMAIN)",)
 clean:
 	$(COMPOSE) down -v
--- a/mcp_server/TOOLS.md
+++ b/mcp_server/TOOLS.md
@@ -57,6 +57,14 @@ Do not use them as a substitute for formal work definition inside the domain rep
 ---
 ## Governance Tools
 | Tool | Key Args | When to use |
 |------|----------|-------------|
 | `validate_repo_adr(repo_path, domain_slug?)` | `repo_path`: absolute path; `domain_slug?`: for orphan detection | Check a repo against ADR-001. Detects missing workplans/ dir, invalid frontmatter, stale workstream ID references, and DB-only orphan workstreams. Run before and after any workplan changes. |
 ---
 ## Resources (URI-addressable, read-only)
 | URI | Returns |
--- a/mcp_server/server.py
+++ b/mcp_server/server.py
@@ -10,6 +10,7 @@ import os
 import re
 import sys
 from datetime import datetime
 from pathlib import Path
 from typing import Any
 from uuid import UUID
@@ -629,6 +630,71 @@ def update_td_status(td_uuid: str, status: str) -> str:
    return json.dumps(td, indent=2)
 # ---------------------------------------------------------------------------
 # ADR-001 compliance validation
 # ---------------------------------------------------------------------------
@mcp.tool()
 def validate_repo_adr(repo_path: str, domain_slug: str | None = None) -> str:
    """Check whether a repository is consistent with ADR-001.
    Validates that workplan files exist in workplans/ with correct frontmatter,
    that state_hub_workstream_id references resolve to real DB records, and that
    no active state-hub workstreams for the domain lack a backing file (orphan
    detection — DB-only records are an ADR-001 violation).
    Args:
        repo_path: Absolute path to the repository root.
        domain_slug: Domain slug for orphan detection (e.g. 'custodian').
                     If omitted, inferred from workplan frontmatter.
    """
    import subprocess
    script = Path(__file__).parent.parent / "scripts" / "validate_repo_adr.py"
    cmd = [sys.executable, str(script), repo_path, "--json",
           "--api-base", API_BASE]
    if domain_slug:
        cmd += ["--domain", domain_slug]
    result = subprocess.run(cmd, capture_output=True, text=True)
    try:
        data = json.loads(result.stdout)
    except json.JSONDecodeError:
        return f"Validator script error:\n{result.stderr or result.stdout or '(no output)'}"
    findings = data.get("findings", [])
    summary = data.get("summary", {})
    overall = data.get("result", "unknown")
    failures = [f for f in findings if f["level"] == "FAIL"]
    warnings = [f for f in findings if f["level"] == "WARN"]
    lines = [f"ADR-001 Compliance: {repo_path}", ""]
    if failures:
        lines.append(f"FAILURES ({len(failures)}):")
        for f in failures:
            loc = f"  [{f['file']}]" if f.get("file") else ""
            lines.append(f"  FAIL  {f['check']}{loc}")
            lines.append(f"        {f['detail']}")
        lines.append("")
    if warnings:
        lines.append(f"WARNINGS ({len(warnings)}):")
        for f in warnings:
            loc = f"  [{f['file']}]" if f.get("file") else ""
            lines.append(f"  WARN  {f['check']}{loc}")
            lines.append(f"        {f['detail']}")
        lines.append("")
    lines.append(
        f"Summary: {summary.get('pass', 0)} pass | "
        f"{summary.get('warn', 0)} warn | "
        f"{summary.get('fail', 0)} fail"
    )
    lines.append(f"Result: {'FAIL' if overall == 'fail' else 'PASS (with warnings)' if overall == 'warn' else 'PASS'}")
    return "\n".join(lines)
 # ---------------------------------------------------------------------------
 # Entry point
 # ---------------------------------------------------------------------------
--- a/scripts/validate_repo_adr.py
+++ b/scripts/validate_repo_adr.py
@@ -0,0 +1,468 @@
 #!/usr/bin/env python3
 """validate_repo_adr.py — ADR-001 compliance checker.
 Checks whether a repository is consistent with ADR-001: workplans and
 work items must originate as Markdown files in the native repository;
 the state-hub is a read/cache layer, never the origin.
 Checks performed:
  File-side (no API required):
    1. workplans/ directory exists
    2. Each .md file has valid YAML frontmatter with required fields
    3. type == "workplan", status in valid set, id matches pattern
    4. Filename starts with the id value
    5. Embedded ```task blocks have id and status fields
  State-hub cross-reference (requires API):
    6. state_hub_workstream_id references resolve to real DB records
    7. Orphan detection: DB workstreams for the domain with no backing file
 Usage:
    python scripts/validate_repo_adr.py <repo_path> [OPTIONS]
    Options:
      --domain SLUG    Domain slug for orphan detection
      --api-base URL   State Hub API (default: http://127.0.0.1:8000)
      --no-api         Skip state-hub consistency checks
      --json           Output JSON instead of text
 Exit codes:
    0 — all checks pass (including warnings)
    1 — one or more FAIL findings
 """
 from __future__ import annotations
 import argparse
 import json
 import re
 import sys
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any
 try:
    import yaml as _yaml
    _HAS_YAML = True
 except ImportError:
    _HAS_YAML = False
 try:
    import httpx as _httpx
    _HAS_HTTPX = True
 except ImportError:
    _HAS_HTTPX = False
 # ---------------------------------------------------------------------------
 # Constants
 # ---------------------------------------------------------------------------
 REQUIRED_FRONTMATTER = {"id", "type", "title", "domain", "status", "owner", "created"}
 VALID_WP_STATUSES = {"active", "completed", "archived"}
 VALID_TASK_STATUSES = {"todo", "in_progress", "blocked", "done", "cancelled"}
 VALID_TASK_PRIORITIES = {"low", "medium", "high", "critical"}
 _WP_ID_RE = re.compile(r"^[A-Z]+-WP-\d+$")
 _TASK_ID_RE = re.compile(r"^[A-Z]+-WP-\d+-T\d+$")
 _TASK_BLOCK_RE = re.compile(r"```task\s*\n(.*?)\n```", re.DOTALL)
 # ---------------------------------------------------------------------------
 # Data types
 # ---------------------------------------------------------------------------
 class Level:
    PASS = "PASS"
    WARN = "WARN"
    FAIL = "FAIL"
@dataclass
 class Finding:
    level: str
    check: str
    detail: str
    file: str = ""
@dataclass
 class Report:
    repo_path: str
    findings: list[Finding] = field(default_factory=list)
    def add(self, level: str, check: str, detail: str, file: str = "") -> None:
        self.findings.append(Finding(level=level, check=check, detail=detail, file=file))
    @property
    def failures(self) -> list[Finding]:
        return [f for f in self.findings if f.level == Level.FAIL]
    @property
    def warnings(self) -> list[Finding]:
        return [f for f in self.findings if f.level == Level.WARN]
    @property
    def passes(self) -> list[Finding]:
        return [f for f in self.findings if f.level == Level.PASS]
 # ---------------------------------------------------------------------------
 # Parsing helpers
 # ---------------------------------------------------------------------------
 def _parse_yaml_block(raw: str) -> dict:
    """Parse a YAML string into a dict, with fallback to simple key:value."""
    if _HAS_YAML:
        try:
            return _yaml.safe_load(raw) or {}
        except _yaml.YAMLError:
            return {"_parse_error": True}
    # Minimal fallback: flat key: value only
    result: dict = {}
    for line in raw.splitlines():
        if ":" in line and not line.startswith(" "):
            k, _, v = line.partition(":")
            result[k.strip()] = v.strip().strip('"').strip("'")
    return result
 def parse_frontmatter(text: str) -> tuple[dict, str]:
    """Split YAML frontmatter from body. Returns ({}, text) if no frontmatter."""
    if not text.startswith("---"):
        return {}, text
    parts = text.split("---", 2)
    if len(parts) < 3:
        return {}, text
    meta = _parse_yaml_block(parts[1].strip())
    return meta, parts[2]
 def parse_task_blocks(body: str) -> list[dict]:
    """Extract all ```task ... ``` YAML blocks from a workplan body."""
    return [_parse_yaml_block(m.group(1).strip()) for m in _TASK_BLOCK_RE.finditer(body)]
 # ---------------------------------------------------------------------------
 # File-side checks
 # ---------------------------------------------------------------------------
 def _check_workplan_file(wp_file: Path, report: Report) -> dict | None:
    """Validate one workplan file. Returns parsed frontmatter on success."""
    fname = wp_file.name
    try:
        text = wp_file.read_text(encoding="utf-8")
    except OSError as e:
        report.add(Level.FAIL, "file-readable", str(e), fname)
        return None
    if not text.startswith("---"):
        report.add(Level.FAIL, "frontmatter-present",
                   "File does not start with '---'; YAML frontmatter required", fname)
        return None
    meta, body = parse_frontmatter(text)
    if not meta or meta.get("_parse_error"):
        report.add(Level.FAIL, "frontmatter-parseable",
                   "YAML frontmatter could not be parsed", fname)
        return None
    # Required fields
    missing = REQUIRED_FRONTMATTER - set(meta.keys())
    if missing:
        report.add(Level.FAIL, "frontmatter-required-fields",
                   f"Missing fields: {', '.join(sorted(missing))}", fname)
    else:
        report.add(Level.PASS, "frontmatter-required-fields",
                   "All required fields present", fname)
    # type
    if meta.get("type") != "workplan":
        report.add(Level.FAIL, "frontmatter-type",
                   f"type must be 'workplan', got {meta.get('type')!r}", fname)
    else:
        report.add(Level.PASS, "frontmatter-type", "type=workplan", fname)
    # status
    status = str(meta.get("status", ""))
    if status not in VALID_WP_STATUSES:
        report.add(Level.FAIL, "frontmatter-status",
                   f"status must be one of {sorted(VALID_WP_STATUSES)}, got {status!r}", fname)
    else:
        report.add(Level.PASS, "frontmatter-status", f"status={status}", fname)
    # id format
    wp_id = str(meta.get("id", ""))
    if not _WP_ID_RE.match(wp_id):
        report.add(Level.FAIL, "frontmatter-id-format",
                   f"id must match [A-Z]+-WP-\\d+ (e.g. CUST-WP-0001), got {wp_id!r}", fname)
    else:
        report.add(Level.PASS, "frontmatter-id-format", f"id={wp_id}", fname)
    # filename prefix
    if wp_id and not fname.startswith(wp_id):
        report.add(Level.WARN, "filename-id-prefix",
                   f"Filename should start with id '{wp_id}', got {fname!r}", fname)
    elif wp_id:
        report.add(Level.PASS, "filename-id-prefix", "Filename matches id prefix", fname)
    # domain non-empty
    domain = str(meta.get("domain", "")).strip()
    if not domain:
        report.add(Level.FAIL, "frontmatter-domain", "domain must be a non-empty string", fname)
    else:
        report.add(Level.PASS, "frontmatter-domain", f"domain={domain}", fname)
    # task blocks
    tasks = parse_task_blocks(body)
    if not tasks:
        report.add(Level.WARN, "tasks-present",
                   "No ```task blocks found — intentional for a workplan with no tasks?", fname)
    else:
        report.add(Level.PASS, "tasks-present", f"{len(tasks)} task block(s) found", fname)
    for i, task in enumerate(tasks, 1):
        tref = f"{fname}#task[{i}]"
        if task.get("_parse_error"):
            report.add(Level.FAIL, "task-parseable", f"Task block {i} failed to parse", tref)
            continue
        t_id = str(task.get("id", ""))
        if not t_id:
            report.add(Level.FAIL, "task-id", "Missing 'id' field", tref)
        elif not _TASK_ID_RE.match(t_id):
            report.add(Level.WARN, "task-id-format",
                       f"id {t_id!r} doesn't match [A-Z]+-WP-\\d+-T\\d+", tref)
        t_status = str(task.get("status", ""))
        if not t_status:
            report.add(Level.FAIL, "task-status", "Missing 'status' field", tref)
        elif t_status not in VALID_TASK_STATUSES:
            report.add(Level.FAIL, "task-status-value",
                       f"status {t_status!r} not in {sorted(VALID_TASK_STATUSES)}", tref)
        t_prio = str(task.get("priority", ""))
        if not t_prio:
            report.add(Level.WARN, "task-priority", "Missing 'priority' field", tref)
        elif t_prio not in VALID_TASK_PRIORITIES:
            report.add(Level.WARN, "task-priority-value",
                       f"priority {t_prio!r} not in {sorted(VALID_TASK_PRIORITIES)}", tref)
    return meta
 def check_files(workplans_dir: Path, report: Report) -> list[dict]:
    """Check all workplan .md files in workplans_dir."""
    md_files = sorted(workplans_dir.glob("*.md"))
    if not md_files:
        report.add(Level.WARN, "workplans-not-empty",
                   "workplans/ directory exists but contains no .md files")
        return []
    metas = []
    for wp_file in md_files:
        meta = _check_workplan_file(wp_file, report)
        if meta:
            metas.append(meta)
    return metas
 # ---------------------------------------------------------------------------
 # State-hub API checks
 # ---------------------------------------------------------------------------
 def _api_get(api_base: str, path: str, params: dict | None = None) -> Any:
    if not _HAS_HTTPX:
        return None
    if not path.endswith("/"):
        path += "/"
    try:
        with _httpx.Client(base_url=api_base, timeout=10.0, follow_redirects=True) as c:
            r = c.get(path, params={k: v for k, v in (params or {}).items() if v is not None})
            r.raise_for_status()
            return r.json()
    except Exception:
        return None
 def check_api(api_base: str, metas: list[dict], domain_slug: str | None,
              report: Report) -> None:
    """Cross-reference workplan files against the live state-hub database."""
    health = _api_get(api_base, "/state/health")
    if health is None:
        report.add(Level.WARN, "api-reachable",
                   f"State Hub API not reachable at {api_base} — skipping cross-reference checks")
        return
    report.add(Level.PASS, "api-reachable", f"State Hub API reachable at {api_base}")
    # Verify each state_hub_workstream_id reference
    file_ws_ids: set[str] = set()
    for meta in metas:
        ws_id = str(meta.get("state_hub_workstream_id", "")).strip()
        if not ws_id:
            report.add(Level.WARN, "workstream-id-present",
                       f"Workplan {meta.get('id')} has no state_hub_workstream_id "
                       f"— not indexed in state-hub",
                       str(meta.get("id", "")))
            continue
        file_ws_ids.add(ws_id)
        ws = _api_get(api_base, f"/workstreams/{ws_id}")
        if ws is None:
            report.add(Level.FAIL, "workstream-ref-exists",
                       f"state_hub_workstream_id {ws_id} not found in DB (stale reference)",
                       str(meta.get("id", "")))
        else:
            report.add(Level.PASS, "workstream-ref-exists",
                       f"Workstream {ws_id[:8]}… ({ws.get('slug')}) confirmed in DB",
                       str(meta.get("id", "")))
    # Orphan detection: DB workstreams with no backing file
    domains_to_check: set[str] = set()
    if domain_slug:
        domains_to_check.add(domain_slug)
    for meta in metas:
        d = str(meta.get("domain", "")).strip()
        if d:
            domains_to_check.add(d)
    if not domains_to_check:
        report.add(Level.WARN, "orphan-detection",
                   "No domain slugs available for orphan detection — pass --domain to enable")
        return
    topics = _api_get(api_base, "/topics")
    if not isinstance(topics, list):
        report.add(Level.WARN, "orphan-detection", "Could not fetch topics for orphan detection")
        return
    for topic in topics:
        t_domain = topic.get("domain", "")
        if t_domain not in domains_to_check:
            continue
        t_id = topic["id"]
        workstreams = _api_get(api_base, "/workstreams", {"topic_id": t_id})
        if not isinstance(workstreams, list):
            report.add(Level.WARN, "orphan-detection",
                       f"Could not fetch workstreams for topic {t_id[:8]}… (domain={t_domain})")
            continue
        for ws in workstreams:
            ws_status = ws.get("status", "")
            if ws_status in ("completed", "archived"):
                continue
            ws_id = ws["id"]
            ws_slug = ws.get("slug", "")
            if ws_id not in file_ws_ids:
                report.add(
                    Level.FAIL, "orphan-workstream",
                    f"Active workstream '{ws_slug}' (id={ws_id[:8]}…, domain={t_domain}) "
                    f"exists in DB but has no backing workplan file — ADR-001 violation",
                )
            else:
                report.add(Level.PASS, "orphan-workstream",
                           f"Workstream '{ws_slug}' is backed by a workplan file")
 # ---------------------------------------------------------------------------
 # Top-level runner
 # ---------------------------------------------------------------------------
 def validate(repo_path: Path, api_base: str = "http://127.0.0.1:8000",
             domain_slug: str | None = None, skip_api: bool = False) -> Report:
    """Run all ADR-001 checks for a repository. Returns a Report."""
    report = Report(repo_path=str(repo_path))
    workplans_dir = repo_path / "workplans"
    if not workplans_dir.is_dir():
        report.add(Level.FAIL, "workplans-dir",
                   "No workplans/ directory found. "
                   "ADR-001 requires workplan files at <repo>/workplans/<ID>-<slug>.md")
        return report
    report.add(Level.PASS, "workplans-dir", "workplans/ directory exists")
    metas = check_files(workplans_dir, report)
    if not skip_api:
        check_api(api_base, metas, domain_slug, report)
    return report
 def render_text(report: Report) -> str:
    """Render a Report as human-readable text."""
    SEP = "=" * 62
    lines = [f"ADR-001 Compliance Report", f"Repo: {report.repo_path}", SEP]
    for level in (Level.FAIL, Level.WARN, Level.PASS):
        section = [f for f in report.findings if f.level == level]
        if not section:
            continue
        lines.append(f"\n  {level}S ({len(section)}):")
        for f in section:
            loc = f"  [{f.file}]" if f.file else ""
            lines.append(f"    {f.check}{loc}")
            lines.append(f"      {f.detail}")
    lines.append(f"\n{SEP}")
    lines.append(
        f"  {len(report.passes)} pass  |  "
        f"{len(report.warnings)} warn  |  "
        f"{len(report.failures)} fail"
    )
    if report.failures:
        lines.append("  RESULT: ✗ FAIL")
    elif report.warnings:
        lines.append("  RESULT: ✓ PASS (with warnings)")
    else:
        lines.append("  RESULT: ✓ PASS")
    lines.append(SEP)
    return "\n".join(lines)
 # ---------------------------------------------------------------------------
 # CLI entry point
 # ---------------------------------------------------------------------------
 def main() -> None:
    parser = argparse.ArgumentParser(
        description="ADR-001 compliance checker for custodian-ecosystem repos",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog=__doc__,
    )
    parser.add_argument("repo_path", help="Path to the repository root")
    parser.add_argument("--domain", dest="domain_slug", default=None,
                        help="Domain slug for orphan detection (e.g. custodian)")
    parser.add_argument("--api-base", default="http://127.0.0.1:8000",
                        help="State Hub API base URL")
    parser.add_argument("--no-api", action="store_true",
                        help="Skip state-hub API consistency checks")
    parser.add_argument("--json", action="store_true", dest="as_json",
                        help="Output JSON instead of text")
    args = parser.parse_args()
    report = validate(
        repo_path=Path(args.repo_path).resolve(),
        api_base=args.api_base,
        domain_slug=args.domain_slug,
        skip_api=args.no_api,
    )
    if args.as_json:
        print(json.dumps({
            "repo_path": report.repo_path,
            "findings": [
                {"level": f.level, "check": f.check, "detail": f.detail, "file": f.file}
                for f in report.findings
            ],
            "summary": {
                "pass": len(report.passes),
                "warn": len(report.warnings),
                "fail": len(report.failures),
            },
            "result": "fail" if report.failures else "warn" if report.warnings else "pass",
        }, indent=2))
    else:
        print(render_text(report))
    sys.exit(1 if report.failures else 0)
 if __name__ == "__main__":
    main()