generated from coulomb/repo-seed
Implement operational discovery rescan loops
This commit is contained in:
@@ -1,7 +1,9 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
@@ -10,6 +12,7 @@ import urllib.request
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from urllib.parse import quote
|
||||
|
||||
from .connectors import ConnectorConfig
|
||||
from .loader import declaration_files, load_yaml
|
||||
@@ -17,10 +20,15 @@ from .graph import FabricGraph, build_graph
|
||||
from .graph_explorer import fabric_graph_explorer_payload
|
||||
from .llm_extraction import LLMExtractionConfig
|
||||
from .reconciliation import reconcile_discovery_snapshots
|
||||
from .scanner import ScanOptions, scan_repo
|
||||
from .scanner import EXTRACTOR_VERSION, ScanOptions, scan_repo
|
||||
from .validation import validate_roots
|
||||
|
||||
|
||||
DEFAULT_DISCOVERY_DIR = ".fabric-discovery"
|
||||
DEFAULT_SNAPSHOT_DIR = "snapshots"
|
||||
DEFAULT_REPORT_DIR = "reports"
|
||||
|
||||
|
||||
def build_parser() -> argparse.ArgumentParser:
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="railiance-fabric",
|
||||
@@ -125,10 +133,64 @@ def build_parser() -> argparse.ArgumentParser:
|
||||
scan_manifest.add_argument("--profile", default="deterministic", help="Discovery scan profile name.")
|
||||
scan_manifest.add_argument("--output-dir", type=Path, default=None, help="Write one discovery snapshot JSON per scanned repo.")
|
||||
scan_manifest.add_argument("--previous-dir", type=Path, default=None, help="Read previous per-repo snapshots for reconciliation.")
|
||||
scan_manifest.add_argument(
|
||||
"--cache-dir",
|
||||
type=Path,
|
||||
default=None,
|
||||
help="Operational cache directory. Defaults to <manifest-dir>/.fabric-discovery.",
|
||||
)
|
||||
scan_manifest.add_argument(
|
||||
"--no-cache",
|
||||
action="store_true",
|
||||
help="Disable default cache snapshots and report output unless explicit paths are provided.",
|
||||
)
|
||||
scan_manifest.add_argument(
|
||||
"--previous-source",
|
||||
choices=["dir", "registry", "none"],
|
||||
default="dir",
|
||||
help="Where to read previous discovery snapshots from before reconciling.",
|
||||
)
|
||||
scan_manifest.add_argument(
|
||||
"--previous-from-registry",
|
||||
action="store_true",
|
||||
help="Shortcut for --previous-source registry.",
|
||||
)
|
||||
scan_manifest.add_argument("--report-output", type=Path, default=None, help="Write an operational rescan report JSON.")
|
||||
scan_manifest.add_argument("--dry-run", action="store_true", help="Do not write to the registry.")
|
||||
scan_manifest.add_argument("--ingest", action="store_true", help="Store each discovery snapshot in the registry.")
|
||||
scan_manifest.add_argument(
|
||||
"--ingest-unchanged",
|
||||
action="store_true",
|
||||
help="Also ingest unchanged snapshots when --ingest is enabled.",
|
||||
)
|
||||
scan_manifest.add_argument("--accept", action="store_true", help="Accept ingested snapshots after scanning.")
|
||||
scan_manifest.add_argument(
|
||||
"--accepted-key",
|
||||
action="append",
|
||||
default=[],
|
||||
help="Candidate stable key to include during acceptance. May be repeated.",
|
||||
)
|
||||
scan_manifest.add_argument(
|
||||
"--accept-review-state",
|
||||
action="append",
|
||||
default=None,
|
||||
help="Review state accepted during projection. Defaults to accepted.",
|
||||
)
|
||||
scan_manifest.add_argument(
|
||||
"--accept-policy",
|
||||
choices=["safe", "explicit"],
|
||||
default="safe",
|
||||
help="Safe blocks conflicts/tombstones and accepts only accepted review state; explicit allows provided overrides.",
|
||||
)
|
||||
scan_manifest.add_argument("--strict", action="store_true", help="Exit non-zero when any repo cannot be scanned or stored.")
|
||||
scan_manifest.add_argument(
|
||||
"--exit-code-mode",
|
||||
choices=["default", "operational"],
|
||||
default="default",
|
||||
help="Use operational exit codes: 2 changes/baseline, 3 review required, 4 partial failure.",
|
||||
)
|
||||
scan_manifest.add_argument("--lock-file", type=Path, default=None, help="Lock file path for automation-safe runs.")
|
||||
scan_manifest.add_argument("--no-lock", action="store_true", help="Disable the default cache-directory rescan lock.")
|
||||
scan_manifest.add_argument("--json", action="store_true", help="Print the raw manifest scan summary.")
|
||||
scan_manifest.add_argument("--llm", action="store_true", help="Enable llm-connect assisted extraction.")
|
||||
scan_manifest.add_argument("--deterministic-only", action="store_true", help="Force deterministic-only scans even when --llm is set.")
|
||||
@@ -152,6 +214,12 @@ def build_parser() -> argparse.ArgumentParser:
|
||||
help="Manifest path for the local-fabric-registry connector. Defaults to the scanned manifest.",
|
||||
)
|
||||
|
||||
rescan_status = registry_sub.add_parser("rescan-status", help="Show discovery rescan freshness and review status.")
|
||||
rescan_status.add_argument("--registry-url", default="http://127.0.0.1:8765")
|
||||
rescan_status.add_argument("--review-only", action="store_true", help="Only show repos whose latest discovery needs review.")
|
||||
rescan_status.add_argument("--stale-after-hours", type=float, default=None, help="Only show discovery snapshots older than this age.")
|
||||
rescan_status.add_argument("--json", action="store_true", help="Print the raw rescan status summary.")
|
||||
|
||||
cyclonedx = registry_sub.add_parser("ingest-cyclonedx", help="Ingest a CycloneDX SBOM as library inventory.")
|
||||
cyclonedx.add_argument("sbom", type=Path)
|
||||
cyclonedx.add_argument("--registry-url", default="http://127.0.0.1:8765")
|
||||
@@ -235,6 +303,8 @@ def main(argv: list[str] | None = None) -> int:
|
||||
return _registry_sync_manifest(args)
|
||||
if args.registry_command == "scan-manifest":
|
||||
return _registry_scan_manifest(args)
|
||||
if args.registry_command == "rescan-status":
|
||||
return _registry_rescan_status(args)
|
||||
if args.registry_command == "ingest-cyclonedx":
|
||||
return _registry_ingest_cyclonedx(args)
|
||||
if args.registry_command == "ingest-discovery":
|
||||
@@ -340,6 +410,15 @@ def _registry_scan_manifest(args: argparse.Namespace) -> int:
|
||||
if args.accept and not args.ingest and not args.dry_run:
|
||||
print("ERROR --accept requires --ingest unless --dry-run is set", file=sys.stderr)
|
||||
return 1
|
||||
if args.accept_policy == "safe":
|
||||
if args.accept_review_state and set(args.accept_review_state) != {"accepted"}:
|
||||
print("ERROR --accept-review-state requires --accept-policy explicit unless it is only 'accepted'", file=sys.stderr)
|
||||
return 1
|
||||
if args.accepted_key:
|
||||
print("ERROR --accepted-key requires --accept-policy explicit", file=sys.stderr)
|
||||
return 1
|
||||
if args.previous_from_registry:
|
||||
args.previous_source = "registry"
|
||||
|
||||
manifest_path = args.manifest.resolve()
|
||||
manifest = load_yaml(manifest_path)
|
||||
@@ -351,6 +430,15 @@ def _registry_scan_manifest(args: argparse.Namespace) -> int:
|
||||
print(f"ERROR {manifest_path}: manifest requires a repositories list", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
lock_path = _scan_manifest_lock_path(args, manifest_path)
|
||||
lock_fd: int | None = None
|
||||
if lock_path is not None:
|
||||
try:
|
||||
lock_fd = _scan_manifest_acquire_lock(lock_path)
|
||||
except FileExistsError:
|
||||
print(f"ERROR rescan lock already exists: {lock_path}", file=sys.stderr)
|
||||
return 5 if args.exit_code_mode == "operational" else 1
|
||||
|
||||
registry_url = args.registry_url or str(manifest.get("registry_url") or "http://127.0.0.1:8765")
|
||||
allowlist = {slug for slug in args.repo_slug if slug}
|
||||
selected = [
|
||||
@@ -382,6 +470,8 @@ def _registry_scan_manifest(args: argparse.Namespace) -> int:
|
||||
results.append(result)
|
||||
|
||||
summary = {
|
||||
"apiVersion": "railiance.fabric/v1alpha1",
|
||||
"kind": "FabricDiscoveryRescanReport",
|
||||
"manifest": str(manifest_path),
|
||||
"registry_url": registry_url,
|
||||
"generated_at": _utc_now(),
|
||||
@@ -390,6 +480,10 @@ def _registry_scan_manifest(args: argparse.Namespace) -> int:
|
||||
"ingest": args.ingest and not args.dry_run,
|
||||
"accept": args.accept and args.ingest and not args.dry_run,
|
||||
"allowlist": sorted(allowlist),
|
||||
"cache": _scan_manifest_cache_config(args, manifest_path),
|
||||
"scanner": {"extractor_version": EXTRACTOR_VERSION},
|
||||
"previous_source": args.previous_source,
|
||||
"accept_policy": args.accept_policy,
|
||||
"llm": {
|
||||
"requested": bool(args.llm),
|
||||
"deterministic_only": bool(args.deterministic_only or not args.llm),
|
||||
@@ -399,13 +493,83 @@ def _registry_scan_manifest(args: argparse.Namespace) -> int:
|
||||
"repositories": results,
|
||||
"counts": _scan_manifest_counts(results),
|
||||
}
|
||||
report_path = _scan_manifest_report_path(args, manifest_path, summary["generated_at"])
|
||||
if report_path is not None:
|
||||
summary["report_path"] = str(report_path)
|
||||
try:
|
||||
report_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
report_path.write_text(json.dumps(summary, indent=2, sort_keys=True) + "\n", encoding="utf-8")
|
||||
except Exception as exc:
|
||||
_scan_manifest_release_lock(lock_fd, lock_path)
|
||||
print(f"ERROR cannot write rescan report {report_path}: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if args.json:
|
||||
print(json.dumps(summary, indent=2, sort_keys=True))
|
||||
else:
|
||||
_print_scan_manifest_summary(summary)
|
||||
if args.strict and summary["counts"]["errors"]:
|
||||
return 1
|
||||
if summary.get("report_path"):
|
||||
print(f"report: {summary['report_path']}")
|
||||
exit_code = _scan_manifest_exit_code(summary, args)
|
||||
_scan_manifest_release_lock(lock_fd, lock_path)
|
||||
return exit_code
|
||||
|
||||
|
||||
def _registry_rescan_status(args: argparse.Namespace) -> int:
|
||||
status = _registry_get_checked(args.registry_url, "/status")
|
||||
snapshots = status.get("latest_discovery_snapshots", [])
|
||||
if not isinstance(snapshots, list):
|
||||
snapshots = []
|
||||
now = datetime.now(timezone.utc)
|
||||
filtered: list[dict[str, Any]] = []
|
||||
for item in snapshots:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
if args.review_only and not item.get("review_required"):
|
||||
continue
|
||||
age_hours = _age_hours(item.get("generated_at"), now)
|
||||
item = {**item, "age_hours": age_hours}
|
||||
if args.stale_after_hours is not None and (age_hours is None or age_hours <= args.stale_after_hours):
|
||||
continue
|
||||
filtered.append(item)
|
||||
summary = {
|
||||
"apiVersion": "railiance.fabric/v1alpha1",
|
||||
"kind": "FabricDiscoveryRescanStatus",
|
||||
"generated_at": _utc_now(),
|
||||
"registry_url": args.registry_url,
|
||||
"review_only": args.review_only,
|
||||
"stale_after_hours": args.stale_after_hours,
|
||||
"counts": {
|
||||
"total": len(snapshots),
|
||||
"shown": len(filtered),
|
||||
"review_required": sum(
|
||||
1 for item in snapshots
|
||||
if isinstance(item, dict) and item.get("review_required")
|
||||
),
|
||||
},
|
||||
"repositories": filtered,
|
||||
}
|
||||
if args.json:
|
||||
print(json.dumps(summary, indent=2, sort_keys=True))
|
||||
else:
|
||||
for item in filtered:
|
||||
age = item.get("age_hours")
|
||||
age_text = f", age {age:.1f}h" if isinstance(age, float) else ""
|
||||
diff = item.get("diff_counts", {}) if isinstance(item.get("diff_counts"), dict) else {}
|
||||
print(
|
||||
f"{item.get('health', 'unknown')} {item.get('repo_slug')} "
|
||||
f"({item.get('profile')}, {item.get('commit')}): "
|
||||
f"diff +{diff.get('added', 0)}/~{diff.get('changed', 0)}"
|
||||
f"/-{diff.get('retired', 0)}/!{diff.get('conflicted', 0)}"
|
||||
f", review artifacts {item.get('review_artifact_count', 0)}"
|
||||
f", tombstones {item.get('tombstone_count', 0)}"
|
||||
f"{age_text}"
|
||||
)
|
||||
counts = summary["counts"]
|
||||
print(
|
||||
f"summary: {counts['shown']} shown, {counts['total']} latest discovery snapshot(s), "
|
||||
f"{counts['review_required']} review required"
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
@@ -445,6 +609,7 @@ def _scan_manifest_repo(
|
||||
"scanned": False,
|
||||
"ingested": False,
|
||||
"accepted": False,
|
||||
"change_state": "unknown",
|
||||
"llm": {
|
||||
"enabled": llm_enabled,
|
||||
"attempted": False,
|
||||
@@ -494,15 +659,15 @@ def _scan_manifest_repo(
|
||||
connectors=connectors,
|
||||
)
|
||||
)
|
||||
previous_path = _manifest_discovery_snapshot_path(args.previous_dir, slug, args.profile) if args.previous_dir else None
|
||||
if previous_path and previous_path.is_file():
|
||||
previous = json.loads(previous_path.read_text(encoding="utf-8"))
|
||||
if not isinstance(previous, dict):
|
||||
raise ValueError(f"previous snapshot must be a JSON object: {previous_path}")
|
||||
previous, previous_metadata = _scan_manifest_previous_snapshot(
|
||||
args,
|
||||
registry_url,
|
||||
manifest_path,
|
||||
slug,
|
||||
)
|
||||
result["previous"] = previous_metadata
|
||||
if previous is not None:
|
||||
snapshot = reconcile_discovery_snapshots(previous, snapshot)
|
||||
result["previous_snapshot_path"] = str(previous_path)
|
||||
elif previous_path:
|
||||
result["previous_snapshot_path"] = None
|
||||
except Exception as exc:
|
||||
result["status"] = "error"
|
||||
result["error"] = f"scan failed: {exc}"
|
||||
@@ -520,12 +685,14 @@ def _scan_manifest_repo(
|
||||
"diff_counts": _discovery_diff_counts(snapshot),
|
||||
"review_artifact_counts": _review_artifact_counts(snapshot),
|
||||
"connector_runs": _connector_run_summaries(snapshot),
|
||||
"tombstone_count": len(snapshot.get("tombstones", [])),
|
||||
"change_state": _scan_manifest_change_state(snapshot, result.get("previous")),
|
||||
}
|
||||
)
|
||||
_set_llm_result(result, snapshot, llm_enabled, llm_skip_reason)
|
||||
|
||||
if args.output_dir:
|
||||
output_path = _manifest_discovery_snapshot_path(args.output_dir, slug, args.profile)
|
||||
output_path = _scan_manifest_output_path(args, manifest_path, slug)
|
||||
if output_path is not None:
|
||||
try:
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
output_path.write_text(json.dumps(snapshot, indent=2, sort_keys=True) + "\n", encoding="utf-8")
|
||||
@@ -538,6 +705,9 @@ def _scan_manifest_repo(
|
||||
result["registry_action"] = "skipped_dry_run"
|
||||
return result
|
||||
if args.ingest:
|
||||
if result["change_state"] == "unchanged" and not args.ingest_unchanged:
|
||||
result.update({"status": "skipped_unchanged", "registry_action": "skipped_unchanged"})
|
||||
return result
|
||||
try:
|
||||
repository = _registry_post_checked(
|
||||
registry_url,
|
||||
@@ -564,10 +734,27 @@ def _scan_manifest_repo(
|
||||
}
|
||||
)
|
||||
if args.accept:
|
||||
blockers = _scan_manifest_acceptance_blockers(snapshot, args)
|
||||
if blockers:
|
||||
result.update(
|
||||
{
|
||||
"status": "review_required",
|
||||
"registry_action": "accept_blocked",
|
||||
"acceptance_blockers": blockers,
|
||||
}
|
||||
)
|
||||
return result
|
||||
accept_payload: dict[str, object] = {
|
||||
"commit": f"discovery:{snapshot['source']['commit']}",
|
||||
}
|
||||
if args.accepted_key:
|
||||
accept_payload["accepted_keys"] = args.accepted_key
|
||||
if args.accept_review_state is not None:
|
||||
accept_payload["accept_review_states"] = args.accept_review_state
|
||||
accepted = _registry_post_checked(
|
||||
registry_url,
|
||||
f"/repositories/{slug}/discovery-snapshots/{stored['id']}/accept",
|
||||
{"commit": f"discovery:{snapshot['source']['commit']}"},
|
||||
accept_payload,
|
||||
)
|
||||
graph_snapshot = accepted["graph_snapshot"]
|
||||
result.update(
|
||||
@@ -587,9 +774,12 @@ def _scan_manifest_counts(results: list[dict[str, Any]]) -> dict[str, int]:
|
||||
return {
|
||||
"total": len(results),
|
||||
"scanned": sum(1 for item in results if item.get("scanned")),
|
||||
"baseline": sum(1 for item in results if item.get("change_state") == "baseline"),
|
||||
"unchanged": sum(1 for item in results if item.get("change_state") == "unchanged"),
|
||||
"changed": sum(1 for item in results if _scan_manifest_has_diff(item)),
|
||||
"retired": sum(_int_from_nested(item, "diff_counts", "retired") for item in results),
|
||||
"conflicted": sum(_int_from_nested(item, "diff_counts", "conflicted") for item in results),
|
||||
"review_required": sum(1 for item in results if _scan_manifest_needs_review(item)),
|
||||
"llm_skipped": sum(1 for item in results if item.get("llm", {}).get("status") == "skipped"),
|
||||
"llm_failed": sum(1 for item in results if item.get("llm", {}).get("status") == "failed"),
|
||||
"ingested": sum(1 for item in results if item.get("ingested")),
|
||||
@@ -610,18 +800,27 @@ def _print_scan_manifest_summary(summary: dict[str, Any]) -> None:
|
||||
action = "accepted" if item.get("accepted") else "ingested" if item.get("ingested") else "scanned"
|
||||
if item.get("registry_action") == "skipped_dry_run":
|
||||
action = "dry-run scanned"
|
||||
elif item.get("registry_action") == "skipped_unchanged":
|
||||
action = "unchanged"
|
||||
elif item.get("registry_action") == "accept_blocked":
|
||||
action = "review required"
|
||||
previous = item.get("previous") if isinstance(item.get("previous"), dict) else {}
|
||||
previous_summary = f", previous {previous.get('source')}" if previous.get("source") else ""
|
||||
print(
|
||||
f"{action} {slug} ({item.get('commit', 'working-tree')}): "
|
||||
f"{counts.get('nodes', 0)} node(s), {counts.get('edges', 0)} edge(s), "
|
||||
f"{counts.get('attributes', 0)} attribute(s), "
|
||||
f"diff +{diff.get('added', 0)}/~{diff.get('changed', 0)}"
|
||||
f"/-{diff.get('retired', 0)}/!{diff.get('conflicted', 0)}"
|
||||
f"{previous_summary}"
|
||||
)
|
||||
counts = summary["counts"]
|
||||
print(
|
||||
f"summary: {counts['total']} repo(s), {counts['scanned']} scanned, "
|
||||
f"{counts['baseline']} baseline, {counts['unchanged']} unchanged, "
|
||||
f"{counts['changed']} changed, {counts['retired']} retired, "
|
||||
f"{counts['conflicted']} conflicted, {counts['llm_skipped']} LLM skipped, "
|
||||
f"{counts['conflicted']} conflicted, {counts['review_required']} review required, "
|
||||
f"{counts['llm_skipped']} LLM skipped, "
|
||||
f"{counts['llm_failed']} LLM failed, {counts['accepted']} accepted, "
|
||||
f"{counts['errors']} error(s)"
|
||||
)
|
||||
@@ -644,6 +843,173 @@ def _scan_manifest_connector_manifest(
|
||||
return path.resolve() if path.is_absolute() else (manifest_dir / path).resolve()
|
||||
|
||||
|
||||
def _scan_manifest_cache_config(args: argparse.Namespace, manifest_path: Path) -> dict[str, object]:
|
||||
cache_dir = _scan_manifest_cache_dir(args, manifest_path)
|
||||
if cache_dir is None:
|
||||
return {"enabled": False}
|
||||
return {
|
||||
"enabled": True,
|
||||
"cache_dir": str(cache_dir),
|
||||
"snapshot_dir": str(cache_dir / DEFAULT_SNAPSHOT_DIR),
|
||||
"report_dir": str(cache_dir / DEFAULT_REPORT_DIR),
|
||||
}
|
||||
|
||||
|
||||
def _scan_manifest_cache_dir(args: argparse.Namespace, manifest_path: Path) -> Path | None:
|
||||
if args.no_cache:
|
||||
return None
|
||||
cache_dir = args.cache_dir or Path(DEFAULT_DISCOVERY_DIR)
|
||||
if cache_dir.is_absolute():
|
||||
return cache_dir
|
||||
return (manifest_path.parent / cache_dir).resolve()
|
||||
|
||||
|
||||
def _scan_manifest_lock_path(args: argparse.Namespace, manifest_path: Path) -> Path | None:
|
||||
if args.no_lock:
|
||||
return None
|
||||
if args.lock_file:
|
||||
path = args.lock_file.expanduser()
|
||||
return path.resolve() if path.is_absolute() else (manifest_path.parent / path).resolve()
|
||||
cache_dir = _scan_manifest_cache_dir(args, manifest_path)
|
||||
if cache_dir is None:
|
||||
return None
|
||||
return cache_dir / "rescan.lock"
|
||||
|
||||
|
||||
def _scan_manifest_acquire_lock(lock_path: Path) -> int:
|
||||
lock_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
fd = os.open(lock_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0o644)
|
||||
os.write(fd, f"pid={os.getpid()}\ncreated_at={_utc_now()}\n".encode("utf-8"))
|
||||
return fd
|
||||
|
||||
|
||||
def _scan_manifest_release_lock(lock_fd: int | None, lock_path: Path | None) -> None:
|
||||
if lock_fd is not None:
|
||||
os.close(lock_fd)
|
||||
if lock_path is not None:
|
||||
try:
|
||||
lock_path.unlink()
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
|
||||
def _scan_manifest_output_path(args: argparse.Namespace, manifest_path: Path, slug: str) -> Path | None:
|
||||
if args.output_dir:
|
||||
return _manifest_discovery_snapshot_path(args.output_dir, slug, args.profile)
|
||||
cache_dir = _scan_manifest_cache_dir(args, manifest_path)
|
||||
if cache_dir is None:
|
||||
return None
|
||||
return _manifest_discovery_snapshot_path(cache_dir / DEFAULT_SNAPSHOT_DIR, slug, args.profile)
|
||||
|
||||
|
||||
def _scan_manifest_report_path(args: argparse.Namespace, manifest_path: Path, generated_at: str) -> Path | None:
|
||||
if args.report_output:
|
||||
path = args.report_output.expanduser()
|
||||
return path.resolve() if path.is_absolute() else (manifest_path.parent / path).resolve()
|
||||
cache_dir = _scan_manifest_cache_dir(args, manifest_path)
|
||||
if cache_dir is None:
|
||||
return None
|
||||
timestamp = _slugify(generated_at.replace(":", "").replace("+", ""))
|
||||
return cache_dir / DEFAULT_REPORT_DIR / f"{timestamp}-{_slugify(args.profile)}.rescan-report.json"
|
||||
|
||||
|
||||
def _scan_manifest_previous_snapshot(
|
||||
args: argparse.Namespace,
|
||||
registry_url: str,
|
||||
manifest_path: Path,
|
||||
slug: str,
|
||||
) -> tuple[dict[str, Any] | None, dict[str, Any]]:
|
||||
if args.previous_source == "none":
|
||||
return None, {"source": "none", "found": False}
|
||||
if args.previous_source == "registry":
|
||||
try:
|
||||
previous = _registry_get_checked(
|
||||
registry_url,
|
||||
f"/repositories/{quote(slug)}/discovery-snapshots/latest?profile={quote(args.profile)}",
|
||||
)
|
||||
except RegistryRequestError as exc:
|
||||
if exc.status_code == 404:
|
||||
return None, {"source": "registry", "found": False}
|
||||
raise
|
||||
snapshot = previous.get("snapshot") if isinstance(previous.get("snapshot"), dict) else None
|
||||
if snapshot is None:
|
||||
raise ValueError(f"registry latest discovery snapshot for {slug} has no snapshot object")
|
||||
return snapshot, {
|
||||
"source": "registry",
|
||||
"found": True,
|
||||
"discovery_snapshot_id": previous.get("id"),
|
||||
"commit": previous.get("commit"),
|
||||
"profile": previous.get("profile"),
|
||||
}
|
||||
|
||||
previous_dir = args.previous_dir
|
||||
if previous_dir is None and not args.no_cache:
|
||||
cache_dir = _scan_manifest_cache_dir(args, manifest_path)
|
||||
previous_dir = cache_dir / DEFAULT_SNAPSHOT_DIR if cache_dir is not None else None
|
||||
if previous_dir is None:
|
||||
return None, {"source": "dir", "found": False}
|
||||
previous_path = _manifest_discovery_snapshot_path(previous_dir, slug, args.profile)
|
||||
if not previous_path.is_file():
|
||||
return None, {"source": "dir", "found": False, "path": str(previous_path)}
|
||||
previous = json.loads(previous_path.read_text(encoding="utf-8"))
|
||||
if not isinstance(previous, dict):
|
||||
raise ValueError(f"previous snapshot must be a JSON object: {previous_path}")
|
||||
return previous, {"source": "dir", "found": True, "path": str(previous_path)}
|
||||
|
||||
|
||||
def _scan_manifest_change_state(snapshot: dict[str, Any], previous_metadata: object) -> str:
|
||||
previous = previous_metadata if isinstance(previous_metadata, dict) else {}
|
||||
if not previous.get("found"):
|
||||
return "baseline"
|
||||
return "changed" if any(_discovery_diff_counts(snapshot).values()) else "unchanged"
|
||||
|
||||
|
||||
def _scan_manifest_acceptance_blockers(snapshot: dict[str, Any], args: argparse.Namespace) -> list[str]:
|
||||
if args.accept_policy == "explicit":
|
||||
return []
|
||||
blockers: list[str] = []
|
||||
diff = _discovery_diff_counts(snapshot)
|
||||
if diff["conflicted"]:
|
||||
blockers.append(f"{diff['conflicted']} conflicted candidate(s)")
|
||||
tombstone_count = len(snapshot.get("tombstones", []))
|
||||
if tombstone_count:
|
||||
blockers.append(f"{tombstone_count} tombstone(s)")
|
||||
review_artifacts = _review_artifact_counts(snapshot)
|
||||
low_confidence = review_artifacts.get("llm_low_confidence", 0)
|
||||
if low_confidence:
|
||||
blockers.append(f"{low_confidence} low-confidence LLM artifact(s)")
|
||||
if args.accept_review_state is not None and set(args.accept_review_state) != {"accepted"}:
|
||||
blockers.append("non-accepted review states requested")
|
||||
return blockers
|
||||
|
||||
|
||||
def _scan_manifest_needs_review(item: dict[str, Any]) -> bool:
|
||||
if item.get("status") == "review_required":
|
||||
return True
|
||||
if _int_from_nested(item, "diff_counts", "conflicted"):
|
||||
return True
|
||||
if int(item.get("tombstone_count") or 0):
|
||||
return True
|
||||
review_artifacts = item.get("review_artifact_counts")
|
||||
return isinstance(review_artifacts, dict) and any(int(value or 0) for value in review_artifacts.values())
|
||||
|
||||
|
||||
def _scan_manifest_exit_code(summary: dict[str, Any], args: argparse.Namespace) -> int:
|
||||
counts = summary.get("counts") if isinstance(summary.get("counts"), dict) else {}
|
||||
errors = int(counts.get("errors", 0) or 0)
|
||||
if args.strict and errors:
|
||||
return 1
|
||||
if args.exit_code_mode != "operational":
|
||||
return 0
|
||||
if errors:
|
||||
return 4
|
||||
if int(counts.get("review_required", 0) or 0):
|
||||
return 3
|
||||
if int(counts.get("changed", 0) or 0) or int(counts.get("baseline", 0) or 0):
|
||||
return 2
|
||||
return 0
|
||||
|
||||
|
||||
def _manifest_discovery_snapshot_path(base_dir: Path, slug: str, profile: str) -> Path:
|
||||
return base_dir.resolve() / f"{_slugify(slug)}-{_slugify(profile)}.discovery.json"
|
||||
|
||||
@@ -998,7 +1364,9 @@ def _scan_repo(args: argparse.Namespace) -> int:
|
||||
|
||||
|
||||
class RegistryRequestError(Exception):
|
||||
pass
|
||||
def __init__(self, message: str, status_code: int | None = None) -> None:
|
||||
super().__init__(message)
|
||||
self.status_code = status_code
|
||||
|
||||
|
||||
def _registry_post(registry_url: str, path: str, payload: dict[str, object]) -> dict[str, object]:
|
||||
@@ -1022,7 +1390,22 @@ def _registry_post_checked(registry_url: str, path: str, payload: dict[str, obje
|
||||
body = json.loads(response.read())
|
||||
except urllib.error.HTTPError as exc:
|
||||
detail = exc.read().decode("utf-8", errors="replace")
|
||||
raise RegistryRequestError(f"registry request failed ({exc.code}): {detail}") from exc
|
||||
raise RegistryRequestError(f"registry request failed ({exc.code}): {detail}", status_code=exc.code) from exc
|
||||
except urllib.error.URLError as exc:
|
||||
raise RegistryRequestError(f"cannot reach registry at {registry_url}: {exc}") from exc
|
||||
if not isinstance(body, dict):
|
||||
raise RegistryRequestError("registry returned a non-object response")
|
||||
return body
|
||||
|
||||
|
||||
def _registry_get_checked(registry_url: str, path: str) -> dict[str, object]:
|
||||
request = urllib.request.Request(registry_url.rstrip("/") + path, method="GET")
|
||||
try:
|
||||
with urllib.request.urlopen(request, timeout=15) as response:
|
||||
body = json.loads(response.read())
|
||||
except urllib.error.HTTPError as exc:
|
||||
detail = exc.read().decode("utf-8", errors="replace")
|
||||
raise RegistryRequestError(f"registry request failed ({exc.code}): {detail}", status_code=exc.code) from exc
|
||||
except urllib.error.URLError as exc:
|
||||
raise RegistryRequestError(f"cannot reach registry at {registry_url}: {exc}") from exc
|
||||
if not isinstance(body, dict):
|
||||
@@ -1088,6 +1471,19 @@ def _git_value(repo_path: Path | None, *args: str) -> str | None:
|
||||
def _utc_now() -> str:
|
||||
return datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z")
|
||||
|
||||
|
||||
def _age_hours(value: object, now: datetime) -> float | None:
|
||||
if not isinstance(value, str) or not value:
|
||||
return None
|
||||
try:
|
||||
parsed = datetime.fromisoformat(value.replace("Z", "+00:00"))
|
||||
except ValueError:
|
||||
return None
|
||||
if parsed.tzinfo is None:
|
||||
parsed = parsed.replace(tzinfo=timezone.utc)
|
||||
return max(0.0, (now - parsed).total_seconds() / 3600)
|
||||
|
||||
|
||||
def _load_graph_or_exit(paths: list[Path]) -> FabricGraph:
|
||||
graph = build_graph(paths)
|
||||
if graph.load_errors:
|
||||
|
||||
@@ -555,6 +555,11 @@ class RegistryStore:
|
||||
if latest_discovery_snapshot
|
||||
else None
|
||||
),
|
||||
"discovery_health": (
|
||||
_discovery_snapshot_health(latest_discovery_snapshot)
|
||||
if latest_discovery_snapshot
|
||||
else {"health": "unknown", "review_required": False}
|
||||
),
|
||||
"counts": {
|
||||
"snapshots": len(self.list_snapshots(repo_slug)),
|
||||
"discovery_snapshots": len(discovery_snapshots),
|
||||
@@ -736,13 +741,42 @@ class RegistryStore:
|
||||
}
|
||||
for snapshot in self.latest_snapshots()
|
||||
]
|
||||
latest_discovery = [
|
||||
_discovery_snapshot_public_summary(snapshot)
|
||||
for snapshot in self.latest_discovery_snapshots()
|
||||
]
|
||||
return {
|
||||
"status": "ok",
|
||||
"database": str(self.path),
|
||||
"counts": counts,
|
||||
"latest_snapshots": latest,
|
||||
"latest_discovery_snapshots": latest_discovery,
|
||||
}
|
||||
|
||||
def latest_discovery_snapshots(self, profile: str | None = None) -> list[dict[str, Any]]:
|
||||
params: list[Any] = []
|
||||
where = ""
|
||||
if profile:
|
||||
where = "where profile = ?"
|
||||
params.append(profile)
|
||||
with self._connect() as db:
|
||||
rows = db.execute(
|
||||
f"""
|
||||
select ds.id, ds.repo_slug, ds.commit_sha, ds.profile, ds.generated_at,
|
||||
ds.snapshot_json, ds.accepted_graph_snapshot_id, ds.created_at
|
||||
from discovery_snapshots ds
|
||||
join (
|
||||
select repo_slug, profile, max(id) as latest_id
|
||||
from discovery_snapshots
|
||||
{where}
|
||||
group by repo_slug, profile
|
||||
) latest on latest.latest_id = ds.id
|
||||
order by ds.repo_slug, ds.profile
|
||||
""",
|
||||
params,
|
||||
).fetchall()
|
||||
return [_discovery_snapshot_dict(row) for row in rows]
|
||||
|
||||
def _latest_graph_or_empty(self, repo_slug: str) -> dict[str, Any]:
|
||||
try:
|
||||
return self.latest_snapshot(repo_slug)["graph"]
|
||||
@@ -1170,7 +1204,7 @@ def _discovery_snapshot_public_summary(snapshot: dict[str, Any]) -> dict[str, An
|
||||
candidates = snapshot["snapshot"].get("candidates", {})
|
||||
reconciliation = snapshot["snapshot"].get("reconciliation", {})
|
||||
diff = reconciliation.get("diff", {}) if isinstance(reconciliation, dict) else {}
|
||||
return {
|
||||
result = {
|
||||
"id": snapshot["id"],
|
||||
"repo_slug": snapshot["repo_slug"],
|
||||
"commit": snapshot["commit"],
|
||||
@@ -1190,6 +1224,50 @@ def _discovery_snapshot_public_summary(snapshot: dict[str, Any]) -> dict[str, An
|
||||
"conflicted": len(diff.get("conflicted", [])) if isinstance(diff.get("conflicted"), list) else 0,
|
||||
},
|
||||
}
|
||||
result.update(_discovery_snapshot_health(snapshot))
|
||||
return result
|
||||
|
||||
|
||||
def _discovery_snapshot_health(snapshot: dict[str, Any]) -> dict[str, Any]:
|
||||
payload = snapshot.get("snapshot") if isinstance(snapshot.get("snapshot"), dict) else {}
|
||||
reconciliation = payload.get("reconciliation") if isinstance(payload.get("reconciliation"), dict) else {}
|
||||
diff = reconciliation.get("diff") if isinstance(reconciliation.get("diff"), dict) else {}
|
||||
diff_counts = {
|
||||
"added": len(diff.get("added", [])) if isinstance(diff.get("added"), list) else 0,
|
||||
"changed": len(diff.get("changed", [])) if isinstance(diff.get("changed"), list) else 0,
|
||||
"retired": len(diff.get("retired", [])) if isinstance(diff.get("retired"), list) else 0,
|
||||
"conflicted": len(diff.get("conflicted", [])) if isinstance(diff.get("conflicted"), list) else 0,
|
||||
}
|
||||
review_artifact_count = (
|
||||
len(payload.get("review_artifacts", []))
|
||||
if isinstance(payload.get("review_artifacts"), list)
|
||||
else 0
|
||||
)
|
||||
connector_run_count = (
|
||||
len(payload.get("connector_runs", []))
|
||||
if isinstance(payload.get("connector_runs"), list)
|
||||
else 0
|
||||
)
|
||||
tombstone_count = (
|
||||
len(payload.get("tombstones", []))
|
||||
if isinstance(payload.get("tombstones"), list)
|
||||
else 0
|
||||
)
|
||||
review_required = bool(diff_counts["conflicted"] or review_artifact_count or tombstone_count)
|
||||
changed = any(diff_counts.values())
|
||||
if review_required:
|
||||
health = "needs_review"
|
||||
elif changed:
|
||||
health = "changed"
|
||||
else:
|
||||
health = "fresh"
|
||||
return {
|
||||
"health": health,
|
||||
"review_required": review_required,
|
||||
"review_artifact_count": review_artifact_count,
|
||||
"connector_run_count": connector_run_count,
|
||||
"tombstone_count": tombstone_count,
|
||||
}
|
||||
|
||||
|
||||
def _row_dict(row: sqlite3.Row) -> dict[str, Any]:
|
||||
|
||||
Reference in New Issue
Block a user