Implement operational discovery rescan loops

This commit is contained in:
2026-05-20 22:52:26 +02:00
parent 50810ffd54
commit 4fdf552f73
8 changed files with 1079 additions and 26 deletions

View File

@@ -1,7 +1,9 @@
from __future__ import annotations
import argparse
import hashlib
import json
import os
import re
import subprocess
import sys
@@ -10,6 +12,7 @@ import urllib.request
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
from urllib.parse import quote
from .connectors import ConnectorConfig
from .loader import declaration_files, load_yaml
@@ -17,10 +20,15 @@ from .graph import FabricGraph, build_graph
from .graph_explorer import fabric_graph_explorer_payload
from .llm_extraction import LLMExtractionConfig
from .reconciliation import reconcile_discovery_snapshots
from .scanner import ScanOptions, scan_repo
from .scanner import EXTRACTOR_VERSION, ScanOptions, scan_repo
from .validation import validate_roots
DEFAULT_DISCOVERY_DIR = ".fabric-discovery"
DEFAULT_SNAPSHOT_DIR = "snapshots"
DEFAULT_REPORT_DIR = "reports"
def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
prog="railiance-fabric",
@@ -125,10 +133,64 @@ def build_parser() -> argparse.ArgumentParser:
scan_manifest.add_argument("--profile", default="deterministic", help="Discovery scan profile name.")
scan_manifest.add_argument("--output-dir", type=Path, default=None, help="Write one discovery snapshot JSON per scanned repo.")
scan_manifest.add_argument("--previous-dir", type=Path, default=None, help="Read previous per-repo snapshots for reconciliation.")
scan_manifest.add_argument(
"--cache-dir",
type=Path,
default=None,
help="Operational cache directory. Defaults to <manifest-dir>/.fabric-discovery.",
)
scan_manifest.add_argument(
"--no-cache",
action="store_true",
help="Disable default cache snapshots and report output unless explicit paths are provided.",
)
scan_manifest.add_argument(
"--previous-source",
choices=["dir", "registry", "none"],
default="dir",
help="Where to read previous discovery snapshots from before reconciling.",
)
scan_manifest.add_argument(
"--previous-from-registry",
action="store_true",
help="Shortcut for --previous-source registry.",
)
scan_manifest.add_argument("--report-output", type=Path, default=None, help="Write an operational rescan report JSON.")
scan_manifest.add_argument("--dry-run", action="store_true", help="Do not write to the registry.")
scan_manifest.add_argument("--ingest", action="store_true", help="Store each discovery snapshot in the registry.")
scan_manifest.add_argument(
"--ingest-unchanged",
action="store_true",
help="Also ingest unchanged snapshots when --ingest is enabled.",
)
scan_manifest.add_argument("--accept", action="store_true", help="Accept ingested snapshots after scanning.")
scan_manifest.add_argument(
"--accepted-key",
action="append",
default=[],
help="Candidate stable key to include during acceptance. May be repeated.",
)
scan_manifest.add_argument(
"--accept-review-state",
action="append",
default=None,
help="Review state accepted during projection. Defaults to accepted.",
)
scan_manifest.add_argument(
"--accept-policy",
choices=["safe", "explicit"],
default="safe",
help="Safe blocks conflicts/tombstones and accepts only accepted review state; explicit allows provided overrides.",
)
scan_manifest.add_argument("--strict", action="store_true", help="Exit non-zero when any repo cannot be scanned or stored.")
scan_manifest.add_argument(
"--exit-code-mode",
choices=["default", "operational"],
default="default",
help="Use operational exit codes: 2 changes/baseline, 3 review required, 4 partial failure.",
)
scan_manifest.add_argument("--lock-file", type=Path, default=None, help="Lock file path for automation-safe runs.")
scan_manifest.add_argument("--no-lock", action="store_true", help="Disable the default cache-directory rescan lock.")
scan_manifest.add_argument("--json", action="store_true", help="Print the raw manifest scan summary.")
scan_manifest.add_argument("--llm", action="store_true", help="Enable llm-connect assisted extraction.")
scan_manifest.add_argument("--deterministic-only", action="store_true", help="Force deterministic-only scans even when --llm is set.")
@@ -152,6 +214,12 @@ def build_parser() -> argparse.ArgumentParser:
help="Manifest path for the local-fabric-registry connector. Defaults to the scanned manifest.",
)
rescan_status = registry_sub.add_parser("rescan-status", help="Show discovery rescan freshness and review status.")
rescan_status.add_argument("--registry-url", default="http://127.0.0.1:8765")
rescan_status.add_argument("--review-only", action="store_true", help="Only show repos whose latest discovery needs review.")
rescan_status.add_argument("--stale-after-hours", type=float, default=None, help="Only show discovery snapshots older than this age.")
rescan_status.add_argument("--json", action="store_true", help="Print the raw rescan status summary.")
cyclonedx = registry_sub.add_parser("ingest-cyclonedx", help="Ingest a CycloneDX SBOM as library inventory.")
cyclonedx.add_argument("sbom", type=Path)
cyclonedx.add_argument("--registry-url", default="http://127.0.0.1:8765")
@@ -235,6 +303,8 @@ def main(argv: list[str] | None = None) -> int:
return _registry_sync_manifest(args)
if args.registry_command == "scan-manifest":
return _registry_scan_manifest(args)
if args.registry_command == "rescan-status":
return _registry_rescan_status(args)
if args.registry_command == "ingest-cyclonedx":
return _registry_ingest_cyclonedx(args)
if args.registry_command == "ingest-discovery":
@@ -340,6 +410,15 @@ def _registry_scan_manifest(args: argparse.Namespace) -> int:
if args.accept and not args.ingest and not args.dry_run:
print("ERROR --accept requires --ingest unless --dry-run is set", file=sys.stderr)
return 1
if args.accept_policy == "safe":
if args.accept_review_state and set(args.accept_review_state) != {"accepted"}:
print("ERROR --accept-review-state requires --accept-policy explicit unless it is only 'accepted'", file=sys.stderr)
return 1
if args.accepted_key:
print("ERROR --accepted-key requires --accept-policy explicit", file=sys.stderr)
return 1
if args.previous_from_registry:
args.previous_source = "registry"
manifest_path = args.manifest.resolve()
manifest = load_yaml(manifest_path)
@@ -351,6 +430,15 @@ def _registry_scan_manifest(args: argparse.Namespace) -> int:
print(f"ERROR {manifest_path}: manifest requires a repositories list", file=sys.stderr)
return 1
lock_path = _scan_manifest_lock_path(args, manifest_path)
lock_fd: int | None = None
if lock_path is not None:
try:
lock_fd = _scan_manifest_acquire_lock(lock_path)
except FileExistsError:
print(f"ERROR rescan lock already exists: {lock_path}", file=sys.stderr)
return 5 if args.exit_code_mode == "operational" else 1
registry_url = args.registry_url or str(manifest.get("registry_url") or "http://127.0.0.1:8765")
allowlist = {slug for slug in args.repo_slug if slug}
selected = [
@@ -382,6 +470,8 @@ def _registry_scan_manifest(args: argparse.Namespace) -> int:
results.append(result)
summary = {
"apiVersion": "railiance.fabric/v1alpha1",
"kind": "FabricDiscoveryRescanReport",
"manifest": str(manifest_path),
"registry_url": registry_url,
"generated_at": _utc_now(),
@@ -390,6 +480,10 @@ def _registry_scan_manifest(args: argparse.Namespace) -> int:
"ingest": args.ingest and not args.dry_run,
"accept": args.accept and args.ingest and not args.dry_run,
"allowlist": sorted(allowlist),
"cache": _scan_manifest_cache_config(args, manifest_path),
"scanner": {"extractor_version": EXTRACTOR_VERSION},
"previous_source": args.previous_source,
"accept_policy": args.accept_policy,
"llm": {
"requested": bool(args.llm),
"deterministic_only": bool(args.deterministic_only or not args.llm),
@@ -399,13 +493,83 @@ def _registry_scan_manifest(args: argparse.Namespace) -> int:
"repositories": results,
"counts": _scan_manifest_counts(results),
}
report_path = _scan_manifest_report_path(args, manifest_path, summary["generated_at"])
if report_path is not None:
summary["report_path"] = str(report_path)
try:
report_path.parent.mkdir(parents=True, exist_ok=True)
report_path.write_text(json.dumps(summary, indent=2, sort_keys=True) + "\n", encoding="utf-8")
except Exception as exc:
_scan_manifest_release_lock(lock_fd, lock_path)
print(f"ERROR cannot write rescan report {report_path}: {exc}", file=sys.stderr)
return 1
if args.json:
print(json.dumps(summary, indent=2, sort_keys=True))
else:
_print_scan_manifest_summary(summary)
if args.strict and summary["counts"]["errors"]:
return 1
if summary.get("report_path"):
print(f"report: {summary['report_path']}")
exit_code = _scan_manifest_exit_code(summary, args)
_scan_manifest_release_lock(lock_fd, lock_path)
return exit_code
def _registry_rescan_status(args: argparse.Namespace) -> int:
status = _registry_get_checked(args.registry_url, "/status")
snapshots = status.get("latest_discovery_snapshots", [])
if not isinstance(snapshots, list):
snapshots = []
now = datetime.now(timezone.utc)
filtered: list[dict[str, Any]] = []
for item in snapshots:
if not isinstance(item, dict):
continue
if args.review_only and not item.get("review_required"):
continue
age_hours = _age_hours(item.get("generated_at"), now)
item = {**item, "age_hours": age_hours}
if args.stale_after_hours is not None and (age_hours is None or age_hours <= args.stale_after_hours):
continue
filtered.append(item)
summary = {
"apiVersion": "railiance.fabric/v1alpha1",
"kind": "FabricDiscoveryRescanStatus",
"generated_at": _utc_now(),
"registry_url": args.registry_url,
"review_only": args.review_only,
"stale_after_hours": args.stale_after_hours,
"counts": {
"total": len(snapshots),
"shown": len(filtered),
"review_required": sum(
1 for item in snapshots
if isinstance(item, dict) and item.get("review_required")
),
},
"repositories": filtered,
}
if args.json:
print(json.dumps(summary, indent=2, sort_keys=True))
else:
for item in filtered:
age = item.get("age_hours")
age_text = f", age {age:.1f}h" if isinstance(age, float) else ""
diff = item.get("diff_counts", {}) if isinstance(item.get("diff_counts"), dict) else {}
print(
f"{item.get('health', 'unknown')} {item.get('repo_slug')} "
f"({item.get('profile')}, {item.get('commit')}): "
f"diff +{diff.get('added', 0)}/~{diff.get('changed', 0)}"
f"/-{diff.get('retired', 0)}/!{diff.get('conflicted', 0)}"
f", review artifacts {item.get('review_artifact_count', 0)}"
f", tombstones {item.get('tombstone_count', 0)}"
f"{age_text}"
)
counts = summary["counts"]
print(
f"summary: {counts['shown']} shown, {counts['total']} latest discovery snapshot(s), "
f"{counts['review_required']} review required"
)
return 0
@@ -445,6 +609,7 @@ def _scan_manifest_repo(
"scanned": False,
"ingested": False,
"accepted": False,
"change_state": "unknown",
"llm": {
"enabled": llm_enabled,
"attempted": False,
@@ -494,15 +659,15 @@ def _scan_manifest_repo(
connectors=connectors,
)
)
previous_path = _manifest_discovery_snapshot_path(args.previous_dir, slug, args.profile) if args.previous_dir else None
if previous_path and previous_path.is_file():
previous = json.loads(previous_path.read_text(encoding="utf-8"))
if not isinstance(previous, dict):
raise ValueError(f"previous snapshot must be a JSON object: {previous_path}")
previous, previous_metadata = _scan_manifest_previous_snapshot(
args,
registry_url,
manifest_path,
slug,
)
result["previous"] = previous_metadata
if previous is not None:
snapshot = reconcile_discovery_snapshots(previous, snapshot)
result["previous_snapshot_path"] = str(previous_path)
elif previous_path:
result["previous_snapshot_path"] = None
except Exception as exc:
result["status"] = "error"
result["error"] = f"scan failed: {exc}"
@@ -520,12 +685,14 @@ def _scan_manifest_repo(
"diff_counts": _discovery_diff_counts(snapshot),
"review_artifact_counts": _review_artifact_counts(snapshot),
"connector_runs": _connector_run_summaries(snapshot),
"tombstone_count": len(snapshot.get("tombstones", [])),
"change_state": _scan_manifest_change_state(snapshot, result.get("previous")),
}
)
_set_llm_result(result, snapshot, llm_enabled, llm_skip_reason)
if args.output_dir:
output_path = _manifest_discovery_snapshot_path(args.output_dir, slug, args.profile)
output_path = _scan_manifest_output_path(args, manifest_path, slug)
if output_path is not None:
try:
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text(json.dumps(snapshot, indent=2, sort_keys=True) + "\n", encoding="utf-8")
@@ -538,6 +705,9 @@ def _scan_manifest_repo(
result["registry_action"] = "skipped_dry_run"
return result
if args.ingest:
if result["change_state"] == "unchanged" and not args.ingest_unchanged:
result.update({"status": "skipped_unchanged", "registry_action": "skipped_unchanged"})
return result
try:
repository = _registry_post_checked(
registry_url,
@@ -564,10 +734,27 @@ def _scan_manifest_repo(
}
)
if args.accept:
blockers = _scan_manifest_acceptance_blockers(snapshot, args)
if blockers:
result.update(
{
"status": "review_required",
"registry_action": "accept_blocked",
"acceptance_blockers": blockers,
}
)
return result
accept_payload: dict[str, object] = {
"commit": f"discovery:{snapshot['source']['commit']}",
}
if args.accepted_key:
accept_payload["accepted_keys"] = args.accepted_key
if args.accept_review_state is not None:
accept_payload["accept_review_states"] = args.accept_review_state
accepted = _registry_post_checked(
registry_url,
f"/repositories/{slug}/discovery-snapshots/{stored['id']}/accept",
{"commit": f"discovery:{snapshot['source']['commit']}"},
accept_payload,
)
graph_snapshot = accepted["graph_snapshot"]
result.update(
@@ -587,9 +774,12 @@ def _scan_manifest_counts(results: list[dict[str, Any]]) -> dict[str, int]:
return {
"total": len(results),
"scanned": sum(1 for item in results if item.get("scanned")),
"baseline": sum(1 for item in results if item.get("change_state") == "baseline"),
"unchanged": sum(1 for item in results if item.get("change_state") == "unchanged"),
"changed": sum(1 for item in results if _scan_manifest_has_diff(item)),
"retired": sum(_int_from_nested(item, "diff_counts", "retired") for item in results),
"conflicted": sum(_int_from_nested(item, "diff_counts", "conflicted") for item in results),
"review_required": sum(1 for item in results if _scan_manifest_needs_review(item)),
"llm_skipped": sum(1 for item in results if item.get("llm", {}).get("status") == "skipped"),
"llm_failed": sum(1 for item in results if item.get("llm", {}).get("status") == "failed"),
"ingested": sum(1 for item in results if item.get("ingested")),
@@ -610,18 +800,27 @@ def _print_scan_manifest_summary(summary: dict[str, Any]) -> None:
action = "accepted" if item.get("accepted") else "ingested" if item.get("ingested") else "scanned"
if item.get("registry_action") == "skipped_dry_run":
action = "dry-run scanned"
elif item.get("registry_action") == "skipped_unchanged":
action = "unchanged"
elif item.get("registry_action") == "accept_blocked":
action = "review required"
previous = item.get("previous") if isinstance(item.get("previous"), dict) else {}
previous_summary = f", previous {previous.get('source')}" if previous.get("source") else ""
print(
f"{action} {slug} ({item.get('commit', 'working-tree')}): "
f"{counts.get('nodes', 0)} node(s), {counts.get('edges', 0)} edge(s), "
f"{counts.get('attributes', 0)} attribute(s), "
f"diff +{diff.get('added', 0)}/~{diff.get('changed', 0)}"
f"/-{diff.get('retired', 0)}/!{diff.get('conflicted', 0)}"
f"{previous_summary}"
)
counts = summary["counts"]
print(
f"summary: {counts['total']} repo(s), {counts['scanned']} scanned, "
f"{counts['baseline']} baseline, {counts['unchanged']} unchanged, "
f"{counts['changed']} changed, {counts['retired']} retired, "
f"{counts['conflicted']} conflicted, {counts['llm_skipped']} LLM skipped, "
f"{counts['conflicted']} conflicted, {counts['review_required']} review required, "
f"{counts['llm_skipped']} LLM skipped, "
f"{counts['llm_failed']} LLM failed, {counts['accepted']} accepted, "
f"{counts['errors']} error(s)"
)
@@ -644,6 +843,173 @@ def _scan_manifest_connector_manifest(
return path.resolve() if path.is_absolute() else (manifest_dir / path).resolve()
def _scan_manifest_cache_config(args: argparse.Namespace, manifest_path: Path) -> dict[str, object]:
cache_dir = _scan_manifest_cache_dir(args, manifest_path)
if cache_dir is None:
return {"enabled": False}
return {
"enabled": True,
"cache_dir": str(cache_dir),
"snapshot_dir": str(cache_dir / DEFAULT_SNAPSHOT_DIR),
"report_dir": str(cache_dir / DEFAULT_REPORT_DIR),
}
def _scan_manifest_cache_dir(args: argparse.Namespace, manifest_path: Path) -> Path | None:
if args.no_cache:
return None
cache_dir = args.cache_dir or Path(DEFAULT_DISCOVERY_DIR)
if cache_dir.is_absolute():
return cache_dir
return (manifest_path.parent / cache_dir).resolve()
def _scan_manifest_lock_path(args: argparse.Namespace, manifest_path: Path) -> Path | None:
if args.no_lock:
return None
if args.lock_file:
path = args.lock_file.expanduser()
return path.resolve() if path.is_absolute() else (manifest_path.parent / path).resolve()
cache_dir = _scan_manifest_cache_dir(args, manifest_path)
if cache_dir is None:
return None
return cache_dir / "rescan.lock"
def _scan_manifest_acquire_lock(lock_path: Path) -> int:
lock_path.parent.mkdir(parents=True, exist_ok=True)
fd = os.open(lock_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0o644)
os.write(fd, f"pid={os.getpid()}\ncreated_at={_utc_now()}\n".encode("utf-8"))
return fd
def _scan_manifest_release_lock(lock_fd: int | None, lock_path: Path | None) -> None:
if lock_fd is not None:
os.close(lock_fd)
if lock_path is not None:
try:
lock_path.unlink()
except FileNotFoundError:
pass
def _scan_manifest_output_path(args: argparse.Namespace, manifest_path: Path, slug: str) -> Path | None:
if args.output_dir:
return _manifest_discovery_snapshot_path(args.output_dir, slug, args.profile)
cache_dir = _scan_manifest_cache_dir(args, manifest_path)
if cache_dir is None:
return None
return _manifest_discovery_snapshot_path(cache_dir / DEFAULT_SNAPSHOT_DIR, slug, args.profile)
def _scan_manifest_report_path(args: argparse.Namespace, manifest_path: Path, generated_at: str) -> Path | None:
if args.report_output:
path = args.report_output.expanduser()
return path.resolve() if path.is_absolute() else (manifest_path.parent / path).resolve()
cache_dir = _scan_manifest_cache_dir(args, manifest_path)
if cache_dir is None:
return None
timestamp = _slugify(generated_at.replace(":", "").replace("+", ""))
return cache_dir / DEFAULT_REPORT_DIR / f"{timestamp}-{_slugify(args.profile)}.rescan-report.json"
def _scan_manifest_previous_snapshot(
args: argparse.Namespace,
registry_url: str,
manifest_path: Path,
slug: str,
) -> tuple[dict[str, Any] | None, dict[str, Any]]:
if args.previous_source == "none":
return None, {"source": "none", "found": False}
if args.previous_source == "registry":
try:
previous = _registry_get_checked(
registry_url,
f"/repositories/{quote(slug)}/discovery-snapshots/latest?profile={quote(args.profile)}",
)
except RegistryRequestError as exc:
if exc.status_code == 404:
return None, {"source": "registry", "found": False}
raise
snapshot = previous.get("snapshot") if isinstance(previous.get("snapshot"), dict) else None
if snapshot is None:
raise ValueError(f"registry latest discovery snapshot for {slug} has no snapshot object")
return snapshot, {
"source": "registry",
"found": True,
"discovery_snapshot_id": previous.get("id"),
"commit": previous.get("commit"),
"profile": previous.get("profile"),
}
previous_dir = args.previous_dir
if previous_dir is None and not args.no_cache:
cache_dir = _scan_manifest_cache_dir(args, manifest_path)
previous_dir = cache_dir / DEFAULT_SNAPSHOT_DIR if cache_dir is not None else None
if previous_dir is None:
return None, {"source": "dir", "found": False}
previous_path = _manifest_discovery_snapshot_path(previous_dir, slug, args.profile)
if not previous_path.is_file():
return None, {"source": "dir", "found": False, "path": str(previous_path)}
previous = json.loads(previous_path.read_text(encoding="utf-8"))
if not isinstance(previous, dict):
raise ValueError(f"previous snapshot must be a JSON object: {previous_path}")
return previous, {"source": "dir", "found": True, "path": str(previous_path)}
def _scan_manifest_change_state(snapshot: dict[str, Any], previous_metadata: object) -> str:
previous = previous_metadata if isinstance(previous_metadata, dict) else {}
if not previous.get("found"):
return "baseline"
return "changed" if any(_discovery_diff_counts(snapshot).values()) else "unchanged"
def _scan_manifest_acceptance_blockers(snapshot: dict[str, Any], args: argparse.Namespace) -> list[str]:
if args.accept_policy == "explicit":
return []
blockers: list[str] = []
diff = _discovery_diff_counts(snapshot)
if diff["conflicted"]:
blockers.append(f"{diff['conflicted']} conflicted candidate(s)")
tombstone_count = len(snapshot.get("tombstones", []))
if tombstone_count:
blockers.append(f"{tombstone_count} tombstone(s)")
review_artifacts = _review_artifact_counts(snapshot)
low_confidence = review_artifacts.get("llm_low_confidence", 0)
if low_confidence:
blockers.append(f"{low_confidence} low-confidence LLM artifact(s)")
if args.accept_review_state is not None and set(args.accept_review_state) != {"accepted"}:
blockers.append("non-accepted review states requested")
return blockers
def _scan_manifest_needs_review(item: dict[str, Any]) -> bool:
if item.get("status") == "review_required":
return True
if _int_from_nested(item, "diff_counts", "conflicted"):
return True
if int(item.get("tombstone_count") or 0):
return True
review_artifacts = item.get("review_artifact_counts")
return isinstance(review_artifacts, dict) and any(int(value or 0) for value in review_artifacts.values())
def _scan_manifest_exit_code(summary: dict[str, Any], args: argparse.Namespace) -> int:
counts = summary.get("counts") if isinstance(summary.get("counts"), dict) else {}
errors = int(counts.get("errors", 0) or 0)
if args.strict and errors:
return 1
if args.exit_code_mode != "operational":
return 0
if errors:
return 4
if int(counts.get("review_required", 0) or 0):
return 3
if int(counts.get("changed", 0) or 0) or int(counts.get("baseline", 0) or 0):
return 2
return 0
def _manifest_discovery_snapshot_path(base_dir: Path, slug: str, profile: str) -> Path:
return base_dir.resolve() / f"{_slugify(slug)}-{_slugify(profile)}.discovery.json"
@@ -998,7 +1364,9 @@ def _scan_repo(args: argparse.Namespace) -> int:
class RegistryRequestError(Exception):
pass
def __init__(self, message: str, status_code: int | None = None) -> None:
super().__init__(message)
self.status_code = status_code
def _registry_post(registry_url: str, path: str, payload: dict[str, object]) -> dict[str, object]:
@@ -1022,7 +1390,22 @@ def _registry_post_checked(registry_url: str, path: str, payload: dict[str, obje
body = json.loads(response.read())
except urllib.error.HTTPError as exc:
detail = exc.read().decode("utf-8", errors="replace")
raise RegistryRequestError(f"registry request failed ({exc.code}): {detail}") from exc
raise RegistryRequestError(f"registry request failed ({exc.code}): {detail}", status_code=exc.code) from exc
except urllib.error.URLError as exc:
raise RegistryRequestError(f"cannot reach registry at {registry_url}: {exc}") from exc
if not isinstance(body, dict):
raise RegistryRequestError("registry returned a non-object response")
return body
def _registry_get_checked(registry_url: str, path: str) -> dict[str, object]:
request = urllib.request.Request(registry_url.rstrip("/") + path, method="GET")
try:
with urllib.request.urlopen(request, timeout=15) as response:
body = json.loads(response.read())
except urllib.error.HTTPError as exc:
detail = exc.read().decode("utf-8", errors="replace")
raise RegistryRequestError(f"registry request failed ({exc.code}): {detail}", status_code=exc.code) from exc
except urllib.error.URLError as exc:
raise RegistryRequestError(f"cannot reach registry at {registry_url}: {exc}") from exc
if not isinstance(body, dict):
@@ -1088,6 +1471,19 @@ def _git_value(repo_path: Path | None, *args: str) -> str | None:
def _utc_now() -> str:
return datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z")
def _age_hours(value: object, now: datetime) -> float | None:
if not isinstance(value, str) or not value:
return None
try:
parsed = datetime.fromisoformat(value.replace("Z", "+00:00"))
except ValueError:
return None
if parsed.tzinfo is None:
parsed = parsed.replace(tzinfo=timezone.utc)
return max(0.0, (now - parsed).total_seconds() / 3600)
def _load_graph_or_exit(paths: list[Path]) -> FabricGraph:
graph = build_graph(paths)
if graph.load_errors:

View File

@@ -555,6 +555,11 @@ class RegistryStore:
if latest_discovery_snapshot
else None
),
"discovery_health": (
_discovery_snapshot_health(latest_discovery_snapshot)
if latest_discovery_snapshot
else {"health": "unknown", "review_required": False}
),
"counts": {
"snapshots": len(self.list_snapshots(repo_slug)),
"discovery_snapshots": len(discovery_snapshots),
@@ -736,13 +741,42 @@ class RegistryStore:
}
for snapshot in self.latest_snapshots()
]
latest_discovery = [
_discovery_snapshot_public_summary(snapshot)
for snapshot in self.latest_discovery_snapshots()
]
return {
"status": "ok",
"database": str(self.path),
"counts": counts,
"latest_snapshots": latest,
"latest_discovery_snapshots": latest_discovery,
}
def latest_discovery_snapshots(self, profile: str | None = None) -> list[dict[str, Any]]:
params: list[Any] = []
where = ""
if profile:
where = "where profile = ?"
params.append(profile)
with self._connect() as db:
rows = db.execute(
f"""
select ds.id, ds.repo_slug, ds.commit_sha, ds.profile, ds.generated_at,
ds.snapshot_json, ds.accepted_graph_snapshot_id, ds.created_at
from discovery_snapshots ds
join (
select repo_slug, profile, max(id) as latest_id
from discovery_snapshots
{where}
group by repo_slug, profile
) latest on latest.latest_id = ds.id
order by ds.repo_slug, ds.profile
""",
params,
).fetchall()
return [_discovery_snapshot_dict(row) for row in rows]
def _latest_graph_or_empty(self, repo_slug: str) -> dict[str, Any]:
try:
return self.latest_snapshot(repo_slug)["graph"]
@@ -1170,7 +1204,7 @@ def _discovery_snapshot_public_summary(snapshot: dict[str, Any]) -> dict[str, An
candidates = snapshot["snapshot"].get("candidates", {})
reconciliation = snapshot["snapshot"].get("reconciliation", {})
diff = reconciliation.get("diff", {}) if isinstance(reconciliation, dict) else {}
return {
result = {
"id": snapshot["id"],
"repo_slug": snapshot["repo_slug"],
"commit": snapshot["commit"],
@@ -1190,6 +1224,50 @@ def _discovery_snapshot_public_summary(snapshot: dict[str, Any]) -> dict[str, An
"conflicted": len(diff.get("conflicted", [])) if isinstance(diff.get("conflicted"), list) else 0,
},
}
result.update(_discovery_snapshot_health(snapshot))
return result
def _discovery_snapshot_health(snapshot: dict[str, Any]) -> dict[str, Any]:
payload = snapshot.get("snapshot") if isinstance(snapshot.get("snapshot"), dict) else {}
reconciliation = payload.get("reconciliation") if isinstance(payload.get("reconciliation"), dict) else {}
diff = reconciliation.get("diff") if isinstance(reconciliation.get("diff"), dict) else {}
diff_counts = {
"added": len(diff.get("added", [])) if isinstance(diff.get("added"), list) else 0,
"changed": len(diff.get("changed", [])) if isinstance(diff.get("changed"), list) else 0,
"retired": len(diff.get("retired", [])) if isinstance(diff.get("retired"), list) else 0,
"conflicted": len(diff.get("conflicted", [])) if isinstance(diff.get("conflicted"), list) else 0,
}
review_artifact_count = (
len(payload.get("review_artifacts", []))
if isinstance(payload.get("review_artifacts"), list)
else 0
)
connector_run_count = (
len(payload.get("connector_runs", []))
if isinstance(payload.get("connector_runs"), list)
else 0
)
tombstone_count = (
len(payload.get("tombstones", []))
if isinstance(payload.get("tombstones"), list)
else 0
)
review_required = bool(diff_counts["conflicted"] or review_artifact_count or tombstone_count)
changed = any(diff_counts.values())
if review_required:
health = "needs_review"
elif changed:
health = "changed"
else:
health = "fresh"
return {
"health": health,
"review_required": review_required,
"review_artifact_count": review_artifact_count,
"connector_run_count": connector_run_count,
"tombstone_count": tombstone_count,
}
def _row_dict(row: sqlite3.Row) -> dict[str, Any]: