Implement operational discovery rescan loops

2026-05-20 22:52:26 +02:00
parent 50810ffd54
commit 4fdf552f73
8 changed files with 1079 additions and 26 deletions
--- a/railiance_fabric/cli.py
+++ b/railiance_fabric/cli.py
@@ -1,7 +1,9 @@
 from __future__ import annotations

 import argparse
+import hashlib
 import json
+import os
 import re
 import subprocess
 import sys
@@ -10,6 +12,7 @@ import urllib.request
 from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any
+from urllib.parse import quote

 from .connectors import ConnectorConfig
 from .loader import declaration_files, load_yaml
@@ -17,10 +20,15 @@ from .graph import FabricGraph, build_graph
 from .graph_explorer import fabric_graph_explorer_payload
 from .llm_extraction import LLMExtractionConfig
 from .reconciliation import reconcile_discovery_snapshots
-from .scanner import ScanOptions, scan_repo
+from .scanner import EXTRACTOR_VERSION, ScanOptions, scan_repo
 from .validation import validate_roots


+DEFAULT_DISCOVERY_DIR = ".fabric-discovery"
+DEFAULT_SNAPSHOT_DIR = "snapshots"
+DEFAULT_REPORT_DIR = "reports"
+
+
 def build_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(
        prog="railiance-fabric",
@@ -125,10 +133,64 @@ def build_parser() -> argparse.ArgumentParser:
    scan_manifest.add_argument("--profile", default="deterministic", help="Discovery scan profile name.")
    scan_manifest.add_argument("--output-dir", type=Path, default=None, help="Write one discovery snapshot JSON per scanned repo.")
    scan_manifest.add_argument("--previous-dir", type=Path, default=None, help="Read previous per-repo snapshots for reconciliation.")
+    scan_manifest.add_argument(
+        "--cache-dir",
+        type=Path,
+        default=None,
+        help="Operational cache directory. Defaults to <manifest-dir>/.fabric-discovery.",
+    )
+    scan_manifest.add_argument(
+        "--no-cache",
+        action="store_true",
+        help="Disable default cache snapshots and report output unless explicit paths are provided.",
+    )
+    scan_manifest.add_argument(
+        "--previous-source",
+        choices=["dir", "registry", "none"],
+        default="dir",
+        help="Where to read previous discovery snapshots from before reconciling.",
+    )
+    scan_manifest.add_argument(
+        "--previous-from-registry",
+        action="store_true",
+        help="Shortcut for --previous-source registry.",
+    )
+    scan_manifest.add_argument("--report-output", type=Path, default=None, help="Write an operational rescan report JSON.")
    scan_manifest.add_argument("--dry-run", action="store_true", help="Do not write to the registry.")
    scan_manifest.add_argument("--ingest", action="store_true", help="Store each discovery snapshot in the registry.")
+    scan_manifest.add_argument(
+        "--ingest-unchanged",
+        action="store_true",
+        help="Also ingest unchanged snapshots when --ingest is enabled.",
+    )
    scan_manifest.add_argument("--accept", action="store_true", help="Accept ingested snapshots after scanning.")
+    scan_manifest.add_argument(
+        "--accepted-key",
+        action="append",
+        default=[],
+        help="Candidate stable key to include during acceptance. May be repeated.",
+    )
+    scan_manifest.add_argument(
+        "--accept-review-state",
+        action="append",
+        default=None,
+        help="Review state accepted during projection. Defaults to accepted.",
+    )
+    scan_manifest.add_argument(
+        "--accept-policy",
+        choices=["safe", "explicit"],
+        default="safe",
+        help="Safe blocks conflicts/tombstones and accepts only accepted review state; explicit allows provided overrides.",
+    )
    scan_manifest.add_argument("--strict", action="store_true", help="Exit non-zero when any repo cannot be scanned or stored.")
+    scan_manifest.add_argument(
+        "--exit-code-mode",
+        choices=["default", "operational"],
+        default="default",
+        help="Use operational exit codes: 2 changes/baseline, 3 review required, 4 partial failure.",
+    )
+    scan_manifest.add_argument("--lock-file", type=Path, default=None, help="Lock file path for automation-safe runs.")
+    scan_manifest.add_argument("--no-lock", action="store_true", help="Disable the default cache-directory rescan lock.")
    scan_manifest.add_argument("--json", action="store_true", help="Print the raw manifest scan summary.")
    scan_manifest.add_argument("--llm", action="store_true", help="Enable llm-connect assisted extraction.")
    scan_manifest.add_argument("--deterministic-only", action="store_true", help="Force deterministic-only scans even when --llm is set.")
@@ -152,6 +214,12 @@ def build_parser() -> argparse.ArgumentParser:
        help="Manifest path for the local-fabric-registry connector. Defaults to the scanned manifest.",
    )

+    rescan_status = registry_sub.add_parser("rescan-status", help="Show discovery rescan freshness and review status.")
+    rescan_status.add_argument("--registry-url", default="http://127.0.0.1:8765")
+    rescan_status.add_argument("--review-only", action="store_true", help="Only show repos whose latest discovery needs review.")
+    rescan_status.add_argument("--stale-after-hours", type=float, default=None, help="Only show discovery snapshots older than this age.")
+    rescan_status.add_argument("--json", action="store_true", help="Print the raw rescan status summary.")
+
    cyclonedx = registry_sub.add_parser("ingest-cyclonedx", help="Ingest a CycloneDX SBOM as library inventory.")
    cyclonedx.add_argument("sbom", type=Path)
    cyclonedx.add_argument("--registry-url", default="http://127.0.0.1:8765")
@@ -235,6 +303,8 @@ def main(argv: list[str] | None = None) -> int:
            return _registry_sync_manifest(args)
        if args.registry_command == "scan-manifest":
            return _registry_scan_manifest(args)
+        if args.registry_command == "rescan-status":
+            return _registry_rescan_status(args)
        if args.registry_command == "ingest-cyclonedx":
            return _registry_ingest_cyclonedx(args)
        if args.registry_command == "ingest-discovery":
@@ -340,6 +410,15 @@ def _registry_scan_manifest(args: argparse.Namespace) -> int:
    if args.accept and not args.ingest and not args.dry_run:
        print("ERROR --accept requires --ingest unless --dry-run is set", file=sys.stderr)
        return 1
+    if args.accept_policy == "safe":
+        if args.accept_review_state and set(args.accept_review_state) != {"accepted"}:
+            print("ERROR --accept-review-state requires --accept-policy explicit unless it is only 'accepted'", file=sys.stderr)
+            return 1
+        if args.accepted_key:
+            print("ERROR --accepted-key requires --accept-policy explicit", file=sys.stderr)
+            return 1
+    if args.previous_from_registry:
+        args.previous_source = "registry"

    manifest_path = args.manifest.resolve()
    manifest = load_yaml(manifest_path)
@@ -351,6 +430,15 @@ def _registry_scan_manifest(args: argparse.Namespace) -> int:
        print(f"ERROR {manifest_path}: manifest requires a repositories list", file=sys.stderr)
        return 1

+    lock_path = _scan_manifest_lock_path(args, manifest_path)
+    lock_fd: int | None = None
+    if lock_path is not None:
+        try:
+            lock_fd = _scan_manifest_acquire_lock(lock_path)
+        except FileExistsError:
+            print(f"ERROR rescan lock already exists: {lock_path}", file=sys.stderr)
+            return 5 if args.exit_code_mode == "operational" else 1
+
    registry_url = args.registry_url or str(manifest.get("registry_url") or "http://127.0.0.1:8765")
    allowlist = {slug for slug in args.repo_slug if slug}
    selected = [
@@ -382,6 +470,8 @@ def _registry_scan_manifest(args: argparse.Namespace) -> int:
        results.append(result)

    summary = {
+        "apiVersion": "railiance.fabric/v1alpha1",
+        "kind": "FabricDiscoveryRescanReport",
        "manifest": str(manifest_path),
        "registry_url": registry_url,
        "generated_at": _utc_now(),
@@ -390,6 +480,10 @@ def _registry_scan_manifest(args: argparse.Namespace) -> int:
        "ingest": args.ingest and not args.dry_run,
        "accept": args.accept and args.ingest and not args.dry_run,
        "allowlist": sorted(allowlist),
+        "cache": _scan_manifest_cache_config(args, manifest_path),
+        "scanner": {"extractor_version": EXTRACTOR_VERSION},
+        "previous_source": args.previous_source,
+        "accept_policy": args.accept_policy,
        "llm": {
            "requested": bool(args.llm),
            "deterministic_only": bool(args.deterministic_only or not args.llm),
@@ -399,13 +493,83 @@ def _registry_scan_manifest(args: argparse.Namespace) -> int:
        "repositories": results,
        "counts": _scan_manifest_counts(results),
    }
+    report_path = _scan_manifest_report_path(args, manifest_path, summary["generated_at"])
+    if report_path is not None:
+        summary["report_path"] = str(report_path)
+        try:
+            report_path.parent.mkdir(parents=True, exist_ok=True)
+            report_path.write_text(json.dumps(summary, indent=2, sort_keys=True) + "\n", encoding="utf-8")
+        except Exception as exc:
+            _scan_manifest_release_lock(lock_fd, lock_path)
+            print(f"ERROR cannot write rescan report {report_path}: {exc}", file=sys.stderr)
+            return 1

    if args.json:
        print(json.dumps(summary, indent=2, sort_keys=True))
    else:
        _print_scan_manifest_summary(summary)
-    if args.strict and summary["counts"]["errors"]:
-        return 1
+        if summary.get("report_path"):
+            print(f"report: {summary['report_path']}")
+    exit_code = _scan_manifest_exit_code(summary, args)
+    _scan_manifest_release_lock(lock_fd, lock_path)
+    return exit_code
+
+
+def _registry_rescan_status(args: argparse.Namespace) -> int:
+    status = _registry_get_checked(args.registry_url, "/status")
+    snapshots = status.get("latest_discovery_snapshots", [])
+    if not isinstance(snapshots, list):
+        snapshots = []
+    now = datetime.now(timezone.utc)
+    filtered: list[dict[str, Any]] = []
+    for item in snapshots:
+        if not isinstance(item, dict):
+            continue
+        if args.review_only and not item.get("review_required"):
+            continue
+        age_hours = _age_hours(item.get("generated_at"), now)
+        item = {**item, "age_hours": age_hours}
+        if args.stale_after_hours is not None and (age_hours is None or age_hours <= args.stale_after_hours):
+            continue
+        filtered.append(item)
+    summary = {
+        "apiVersion": "railiance.fabric/v1alpha1",
+        "kind": "FabricDiscoveryRescanStatus",
+        "generated_at": _utc_now(),
+        "registry_url": args.registry_url,
+        "review_only": args.review_only,
+        "stale_after_hours": args.stale_after_hours,
+        "counts": {
+            "total": len(snapshots),
+            "shown": len(filtered),
+            "review_required": sum(
+                1 for item in snapshots
+                if isinstance(item, dict) and item.get("review_required")
+            ),
+        },
+        "repositories": filtered,
+    }
+    if args.json:
+        print(json.dumps(summary, indent=2, sort_keys=True))
+    else:
+        for item in filtered:
+            age = item.get("age_hours")
+            age_text = f", age {age:.1f}h" if isinstance(age, float) else ""
+            diff = item.get("diff_counts", {}) if isinstance(item.get("diff_counts"), dict) else {}
+            print(
+                f"{item.get('health', 'unknown')} {item.get('repo_slug')} "
+                f"({item.get('profile')}, {item.get('commit')}): "
+                f"diff +{diff.get('added', 0)}/~{diff.get('changed', 0)}"
+                f"/-{diff.get('retired', 0)}/!{diff.get('conflicted', 0)}"
+                f", review artifacts {item.get('review_artifact_count', 0)}"
+                f", tombstones {item.get('tombstone_count', 0)}"
+                f"{age_text}"
+            )
+        counts = summary["counts"]
+        print(
+            f"summary: {counts['shown']} shown, {counts['total']} latest discovery snapshot(s), "
+            f"{counts['review_required']} review required"
+        )
    return 0


@@ -445,6 +609,7 @@ def _scan_manifest_repo(
        "scanned": False,
        "ingested": False,
        "accepted": False,
+        "change_state": "unknown",
        "llm": {
            "enabled": llm_enabled,
            "attempted": False,
@@ -494,15 +659,15 @@ def _scan_manifest_repo(
                connectors=connectors,
            )
        )
-        previous_path = _manifest_discovery_snapshot_path(args.previous_dir, slug, args.profile) if args.previous_dir else None
-        if previous_path and previous_path.is_file():
-            previous = json.loads(previous_path.read_text(encoding="utf-8"))
-            if not isinstance(previous, dict):
-                raise ValueError(f"previous snapshot must be a JSON object: {previous_path}")
+        previous, previous_metadata = _scan_manifest_previous_snapshot(
+            args,
+            registry_url,
+            manifest_path,
+            slug,
+        )
+        result["previous"] = previous_metadata
+        if previous is not None:
            snapshot = reconcile_discovery_snapshots(previous, snapshot)
-            result["previous_snapshot_path"] = str(previous_path)
-        elif previous_path:
-            result["previous_snapshot_path"] = None
    except Exception as exc:
        result["status"] = "error"
        result["error"] = f"scan failed: {exc}"
@@ -520,12 +685,14 @@ def _scan_manifest_repo(
            "diff_counts": _discovery_diff_counts(snapshot),
            "review_artifact_counts": _review_artifact_counts(snapshot),
            "connector_runs": _connector_run_summaries(snapshot),
+            "tombstone_count": len(snapshot.get("tombstones", [])),
+            "change_state": _scan_manifest_change_state(snapshot, result.get("previous")),
        }
    )
    _set_llm_result(result, snapshot, llm_enabled, llm_skip_reason)

-    if args.output_dir:
-        output_path = _manifest_discovery_snapshot_path(args.output_dir, slug, args.profile)
+    output_path = _scan_manifest_output_path(args, manifest_path, slug)
+    if output_path is not None:
        try:
            output_path.parent.mkdir(parents=True, exist_ok=True)
            output_path.write_text(json.dumps(snapshot, indent=2, sort_keys=True) + "\n", encoding="utf-8")
@@ -538,6 +705,9 @@ def _scan_manifest_repo(
        result["registry_action"] = "skipped_dry_run"
        return result
    if args.ingest:
+        if result["change_state"] == "unchanged" and not args.ingest_unchanged:
+            result.update({"status": "skipped_unchanged", "registry_action": "skipped_unchanged"})
+            return result
        try:
            repository = _registry_post_checked(
                registry_url,
@@ -564,10 +734,27 @@ def _scan_manifest_repo(
                }
            )
            if args.accept:
+                blockers = _scan_manifest_acceptance_blockers(snapshot, args)
+                if blockers:
+                    result.update(
+                        {
+                            "status": "review_required",
+                            "registry_action": "accept_blocked",
+                            "acceptance_blockers": blockers,
+                        }
+                    )
+                    return result
+                accept_payload: dict[str, object] = {
+                    "commit": f"discovery:{snapshot['source']['commit']}",
+                }
+                if args.accepted_key:
+                    accept_payload["accepted_keys"] = args.accepted_key
+                if args.accept_review_state is not None:
+                    accept_payload["accept_review_states"] = args.accept_review_state
                accepted = _registry_post_checked(
                    registry_url,
                    f"/repositories/{slug}/discovery-snapshots/{stored['id']}/accept",
-                    {"commit": f"discovery:{snapshot['source']['commit']}"},
+                    accept_payload,
                )
                graph_snapshot = accepted["graph_snapshot"]
                result.update(
@@ -587,9 +774,12 @@ def _scan_manifest_counts(results: list[dict[str, Any]]) -> dict[str, int]:
    return {
        "total": len(results),
        "scanned": sum(1 for item in results if item.get("scanned")),
+        "baseline": sum(1 for item in results if item.get("change_state") == "baseline"),
+        "unchanged": sum(1 for item in results if item.get("change_state") == "unchanged"),
        "changed": sum(1 for item in results if _scan_manifest_has_diff(item)),
        "retired": sum(_int_from_nested(item, "diff_counts", "retired") for item in results),
        "conflicted": sum(_int_from_nested(item, "diff_counts", "conflicted") for item in results),
+        "review_required": sum(1 for item in results if _scan_manifest_needs_review(item)),
        "llm_skipped": sum(1 for item in results if item.get("llm", {}).get("status") == "skipped"),
        "llm_failed": sum(1 for item in results if item.get("llm", {}).get("status") == "failed"),
        "ingested": sum(1 for item in results if item.get("ingested")),
@@ -610,18 +800,27 @@ def _print_scan_manifest_summary(summary: dict[str, Any]) -> None:
        action = "accepted" if item.get("accepted") else "ingested" if item.get("ingested") else "scanned"
        if item.get("registry_action") == "skipped_dry_run":
            action = "dry-run scanned"
+        elif item.get("registry_action") == "skipped_unchanged":
+            action = "unchanged"
+        elif item.get("registry_action") == "accept_blocked":
+            action = "review required"
+        previous = item.get("previous") if isinstance(item.get("previous"), dict) else {}
+        previous_summary = f", previous {previous.get('source')}" if previous.get("source") else ""
        print(
            f"{action} {slug} ({item.get('commit', 'working-tree')}): "
            f"{counts.get('nodes', 0)} node(s), {counts.get('edges', 0)} edge(s), "
            f"{counts.get('attributes', 0)} attribute(s), "
            f"diff +{diff.get('added', 0)}/~{diff.get('changed', 0)}"
            f"/-{diff.get('retired', 0)}/!{diff.get('conflicted', 0)}"
+            f"{previous_summary}"
        )
    counts = summary["counts"]
    print(
        f"summary: {counts['total']} repo(s), {counts['scanned']} scanned, "
+        f"{counts['baseline']} baseline, {counts['unchanged']} unchanged, "
        f"{counts['changed']} changed, {counts['retired']} retired, "
-        f"{counts['conflicted']} conflicted, {counts['llm_skipped']} LLM skipped, "
+        f"{counts['conflicted']} conflicted, {counts['review_required']} review required, "
+        f"{counts['llm_skipped']} LLM skipped, "
        f"{counts['llm_failed']} LLM failed, {counts['accepted']} accepted, "
        f"{counts['errors']} error(s)"
    )
@@ -644,6 +843,173 @@ def _scan_manifest_connector_manifest(
    return path.resolve() if path.is_absolute() else (manifest_dir / path).resolve()


+def _scan_manifest_cache_config(args: argparse.Namespace, manifest_path: Path) -> dict[str, object]:
+    cache_dir = _scan_manifest_cache_dir(args, manifest_path)
+    if cache_dir is None:
+        return {"enabled": False}
+    return {
+        "enabled": True,
+        "cache_dir": str(cache_dir),
+        "snapshot_dir": str(cache_dir / DEFAULT_SNAPSHOT_DIR),
+        "report_dir": str(cache_dir / DEFAULT_REPORT_DIR),
+    }
+
+
+def _scan_manifest_cache_dir(args: argparse.Namespace, manifest_path: Path) -> Path | None:
+    if args.no_cache:
+        return None
+    cache_dir = args.cache_dir or Path(DEFAULT_DISCOVERY_DIR)
+    if cache_dir.is_absolute():
+        return cache_dir
+    return (manifest_path.parent / cache_dir).resolve()
+
+
+def _scan_manifest_lock_path(args: argparse.Namespace, manifest_path: Path) -> Path | None:
+    if args.no_lock:
+        return None
+    if args.lock_file:
+        path = args.lock_file.expanduser()
+        return path.resolve() if path.is_absolute() else (manifest_path.parent / path).resolve()
+    cache_dir = _scan_manifest_cache_dir(args, manifest_path)
+    if cache_dir is None:
+        return None
+    return cache_dir / "rescan.lock"
+
+
+def _scan_manifest_acquire_lock(lock_path: Path) -> int:
+    lock_path.parent.mkdir(parents=True, exist_ok=True)
+    fd = os.open(lock_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0o644)
+    os.write(fd, f"pid={os.getpid()}\ncreated_at={_utc_now()}\n".encode("utf-8"))
+    return fd
+
+
+def _scan_manifest_release_lock(lock_fd: int | None, lock_path: Path | None) -> None:
+    if lock_fd is not None:
+        os.close(lock_fd)
+    if lock_path is not None:
+        try:
+            lock_path.unlink()
+        except FileNotFoundError:
+            pass
+
+
+def _scan_manifest_output_path(args: argparse.Namespace, manifest_path: Path, slug: str) -> Path | None:
+    if args.output_dir:
+        return _manifest_discovery_snapshot_path(args.output_dir, slug, args.profile)
+    cache_dir = _scan_manifest_cache_dir(args, manifest_path)
+    if cache_dir is None:
+        return None
+    return _manifest_discovery_snapshot_path(cache_dir / DEFAULT_SNAPSHOT_DIR, slug, args.profile)
+
+
+def _scan_manifest_report_path(args: argparse.Namespace, manifest_path: Path, generated_at: str) -> Path | None:
+    if args.report_output:
+        path = args.report_output.expanduser()
+        return path.resolve() if path.is_absolute() else (manifest_path.parent / path).resolve()
+    cache_dir = _scan_manifest_cache_dir(args, manifest_path)
+    if cache_dir is None:
+        return None
+    timestamp = _slugify(generated_at.replace(":", "").replace("+", ""))
+    return cache_dir / DEFAULT_REPORT_DIR / f"{timestamp}-{_slugify(args.profile)}.rescan-report.json"
+
+
+def _scan_manifest_previous_snapshot(
+    args: argparse.Namespace,
+    registry_url: str,
+    manifest_path: Path,
+    slug: str,
+) -> tuple[dict[str, Any] | None, dict[str, Any]]:
+    if args.previous_source == "none":
+        return None, {"source": "none", "found": False}
+    if args.previous_source == "registry":
+        try:
+            previous = _registry_get_checked(
+                registry_url,
+                f"/repositories/{quote(slug)}/discovery-snapshots/latest?profile={quote(args.profile)}",
+            )
+        except RegistryRequestError as exc:
+            if exc.status_code == 404:
+                return None, {"source": "registry", "found": False}
+            raise
+        snapshot = previous.get("snapshot") if isinstance(previous.get("snapshot"), dict) else None
+        if snapshot is None:
+            raise ValueError(f"registry latest discovery snapshot for {slug} has no snapshot object")
+        return snapshot, {
+            "source": "registry",
+            "found": True,
+            "discovery_snapshot_id": previous.get("id"),
+            "commit": previous.get("commit"),
+            "profile": previous.get("profile"),
+        }
+
+    previous_dir = args.previous_dir
+    if previous_dir is None and not args.no_cache:
+        cache_dir = _scan_manifest_cache_dir(args, manifest_path)
+        previous_dir = cache_dir / DEFAULT_SNAPSHOT_DIR if cache_dir is not None else None
+    if previous_dir is None:
+        return None, {"source": "dir", "found": False}
+    previous_path = _manifest_discovery_snapshot_path(previous_dir, slug, args.profile)
+    if not previous_path.is_file():
+        return None, {"source": "dir", "found": False, "path": str(previous_path)}
+    previous = json.loads(previous_path.read_text(encoding="utf-8"))
+    if not isinstance(previous, dict):
+        raise ValueError(f"previous snapshot must be a JSON object: {previous_path}")
+    return previous, {"source": "dir", "found": True, "path": str(previous_path)}
+
+
+def _scan_manifest_change_state(snapshot: dict[str, Any], previous_metadata: object) -> str:
+    previous = previous_metadata if isinstance(previous_metadata, dict) else {}
+    if not previous.get("found"):
+        return "baseline"
+    return "changed" if any(_discovery_diff_counts(snapshot).values()) else "unchanged"
+
+
+def _scan_manifest_acceptance_blockers(snapshot: dict[str, Any], args: argparse.Namespace) -> list[str]:
+    if args.accept_policy == "explicit":
+        return []
+    blockers: list[str] = []
+    diff = _discovery_diff_counts(snapshot)
+    if diff["conflicted"]:
+        blockers.append(f"{diff['conflicted']} conflicted candidate(s)")
+    tombstone_count = len(snapshot.get("tombstones", []))
+    if tombstone_count:
+        blockers.append(f"{tombstone_count} tombstone(s)")
+    review_artifacts = _review_artifact_counts(snapshot)
+    low_confidence = review_artifacts.get("llm_low_confidence", 0)
+    if low_confidence:
+        blockers.append(f"{low_confidence} low-confidence LLM artifact(s)")
+    if args.accept_review_state is not None and set(args.accept_review_state) != {"accepted"}:
+        blockers.append("non-accepted review states requested")
+    return blockers
+
+
+def _scan_manifest_needs_review(item: dict[str, Any]) -> bool:
+    if item.get("status") == "review_required":
+        return True
+    if _int_from_nested(item, "diff_counts", "conflicted"):
+        return True
+    if int(item.get("tombstone_count") or 0):
+        return True
+    review_artifacts = item.get("review_artifact_counts")
+    return isinstance(review_artifacts, dict) and any(int(value or 0) for value in review_artifacts.values())
+
+
+def _scan_manifest_exit_code(summary: dict[str, Any], args: argparse.Namespace) -> int:
+    counts = summary.get("counts") if isinstance(summary.get("counts"), dict) else {}
+    errors = int(counts.get("errors", 0) or 0)
+    if args.strict and errors:
+        return 1
+    if args.exit_code_mode != "operational":
+        return 0
+    if errors:
+        return 4
+    if int(counts.get("review_required", 0) or 0):
+        return 3
+    if int(counts.get("changed", 0) or 0) or int(counts.get("baseline", 0) or 0):
+        return 2
+    return 0
+
+
 def _manifest_discovery_snapshot_path(base_dir: Path, slug: str, profile: str) -> Path:
    return base_dir.resolve() / f"{_slugify(slug)}-{_slugify(profile)}.discovery.json"

@@ -998,7 +1364,9 @@ def _scan_repo(args: argparse.Namespace) -> int:


 class RegistryRequestError(Exception):
-    pass
+    def __init__(self, message: str, status_code: int | None = None) -> None:
+        super().__init__(message)
+        self.status_code = status_code


 def _registry_post(registry_url: str, path: str, payload: dict[str, object]) -> dict[str, object]:
@@ -1022,7 +1390,22 @@ def _registry_post_checked(registry_url: str, path: str, payload: dict[str, obje
            body = json.loads(response.read())
    except urllib.error.HTTPError as exc:
        detail = exc.read().decode("utf-8", errors="replace")
-        raise RegistryRequestError(f"registry request failed ({exc.code}): {detail}") from exc
+        raise RegistryRequestError(f"registry request failed ({exc.code}): {detail}", status_code=exc.code) from exc
+    except urllib.error.URLError as exc:
+        raise RegistryRequestError(f"cannot reach registry at {registry_url}: {exc}") from exc
+    if not isinstance(body, dict):
+        raise RegistryRequestError("registry returned a non-object response")
+    return body
+
+
+def _registry_get_checked(registry_url: str, path: str) -> dict[str, object]:
+    request = urllib.request.Request(registry_url.rstrip("/") + path, method="GET")
+    try:
+        with urllib.request.urlopen(request, timeout=15) as response:
+            body = json.loads(response.read())
+    except urllib.error.HTTPError as exc:
+        detail = exc.read().decode("utf-8", errors="replace")
+        raise RegistryRequestError(f"registry request failed ({exc.code}): {detail}", status_code=exc.code) from exc
    except urllib.error.URLError as exc:
        raise RegistryRequestError(f"cannot reach registry at {registry_url}: {exc}") from exc
    if not isinstance(body, dict):
@@ -1088,6 +1471,19 @@ def _git_value(repo_path: Path | None, *args: str) -> str | None:
 def _utc_now() -> str:
    return datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z")

+
+def _age_hours(value: object, now: datetime) -> float | None:
+    if not isinstance(value, str) or not value:
+        return None
+    try:
+        parsed = datetime.fromisoformat(value.replace("Z", "+00:00"))
+    except ValueError:
+        return None
+    if parsed.tzinfo is None:
+        parsed = parsed.replace(tzinfo=timezone.utc)
+    return max(0.0, (now - parsed).total_seconds() / 3600)
+
+
 def _load_graph_or_exit(paths: list[Path]) -> FabricGraph:
    graph = build_graph(paths)
    if graph.load_errors:
--- a/railiance_fabric/registry.py
+++ b/railiance_fabric/registry.py
@@ -555,6 +555,11 @@ class RegistryStore:
                if latest_discovery_snapshot
                else None
            ),
+            "discovery_health": (
+                _discovery_snapshot_health(latest_discovery_snapshot)
+                if latest_discovery_snapshot
+                else {"health": "unknown", "review_required": False}
+            ),
            "counts": {
                "snapshots": len(self.list_snapshots(repo_slug)),
                "discovery_snapshots": len(discovery_snapshots),
@@ -736,13 +741,42 @@ class RegistryStore:
            }
            for snapshot in self.latest_snapshots()
        ]
+        latest_discovery = [
+            _discovery_snapshot_public_summary(snapshot)
+            for snapshot in self.latest_discovery_snapshots()
+        ]
        return {
            "status": "ok",
            "database": str(self.path),
            "counts": counts,
            "latest_snapshots": latest,
+            "latest_discovery_snapshots": latest_discovery,
        }

+    def latest_discovery_snapshots(self, profile: str | None = None) -> list[dict[str, Any]]:
+        params: list[Any] = []
+        where = ""
+        if profile:
+            where = "where profile = ?"
+            params.append(profile)
+        with self._connect() as db:
+            rows = db.execute(
+                f"""
+                select ds.id, ds.repo_slug, ds.commit_sha, ds.profile, ds.generated_at,
+                       ds.snapshot_json, ds.accepted_graph_snapshot_id, ds.created_at
+                from discovery_snapshots ds
+                join (
+                  select repo_slug, profile, max(id) as latest_id
+                  from discovery_snapshots
+                  {where}
+                  group by repo_slug, profile
+                ) latest on latest.latest_id = ds.id
+                order by ds.repo_slug, ds.profile
+                """,
+                params,
+            ).fetchall()
+        return [_discovery_snapshot_dict(row) for row in rows]
+
    def _latest_graph_or_empty(self, repo_slug: str) -> dict[str, Any]:
        try:
            return self.latest_snapshot(repo_slug)["graph"]
@@ -1170,7 +1204,7 @@ def _discovery_snapshot_public_summary(snapshot: dict[str, Any]) -> dict[str, An
    candidates = snapshot["snapshot"].get("candidates", {})
    reconciliation = snapshot["snapshot"].get("reconciliation", {})
    diff = reconciliation.get("diff", {}) if isinstance(reconciliation, dict) else {}
-    return {
+    result = {
        "id": snapshot["id"],
        "repo_slug": snapshot["repo_slug"],
        "commit": snapshot["commit"],
@@ -1190,6 +1224,50 @@ def _discovery_snapshot_public_summary(snapshot: dict[str, Any]) -> dict[str, An
            "conflicted": len(diff.get("conflicted", [])) if isinstance(diff.get("conflicted"), list) else 0,
        },
    }
+    result.update(_discovery_snapshot_health(snapshot))
+    return result
+
+
+def _discovery_snapshot_health(snapshot: dict[str, Any]) -> dict[str, Any]:
+    payload = snapshot.get("snapshot") if isinstance(snapshot.get("snapshot"), dict) else {}
+    reconciliation = payload.get("reconciliation") if isinstance(payload.get("reconciliation"), dict) else {}
+    diff = reconciliation.get("diff") if isinstance(reconciliation.get("diff"), dict) else {}
+    diff_counts = {
+        "added": len(diff.get("added", [])) if isinstance(diff.get("added"), list) else 0,
+        "changed": len(diff.get("changed", [])) if isinstance(diff.get("changed"), list) else 0,
+        "retired": len(diff.get("retired", [])) if isinstance(diff.get("retired"), list) else 0,
+        "conflicted": len(diff.get("conflicted", [])) if isinstance(diff.get("conflicted"), list) else 0,
+    }
+    review_artifact_count = (
+        len(payload.get("review_artifacts", []))
+        if isinstance(payload.get("review_artifacts"), list)
+        else 0
+    )
+    connector_run_count = (
+        len(payload.get("connector_runs", []))
+        if isinstance(payload.get("connector_runs"), list)
+        else 0
+    )
+    tombstone_count = (
+        len(payload.get("tombstones", []))
+        if isinstance(payload.get("tombstones"), list)
+        else 0
+    )
+    review_required = bool(diff_counts["conflicted"] or review_artifact_count or tombstone_count)
+    changed = any(diff_counts.values())
+    if review_required:
+        health = "needs_review"
+    elif changed:
+        health = "changed"
+    else:
+        health = "fresh"
+    return {
+        "health": health,
+        "review_required": review_required,
+        "review_artifact_count": review_artifact_count,
+        "connector_run_count": connector_run_count,
+        "tombstone_count": tombstone_count,
+    }


 def _row_dict(row: sqlite3.Row) -> dict[str, Any]: