#!/usr/bin/env python3 """Shared base for read-only discovery connectors (ATLAS-WP-0003). A connector scans a source and emits *candidate* surface entries for human/agent PR review. Connectors are stateless and read-only: they NEVER write a source system, NEVER auto-merge, and NEVER read or store configuration values or secret values (PRD FR-8; docs/discovery-connectors.md). Candidates are written to registry/surfaces/candidates/.md with `status: candidate` and provenance in `evidence`. A candidate is never written if a promoted entry with the same id already exists (the registry is the source of truth; connectors propose, they do not overwrite). """ from __future__ import annotations import datetime as _dt import json from pathlib import Path try: import yaml from jsonschema import Draft202012Validator except ImportError as exc: # pragma: no cover raise SystemExit(f"setup error: missing dependency ({exc}). pip install pyyaml jsonschema") ROOT = Path(__file__).resolve().parent.parent SCHEMA_PATH = ROOT / "schemas" / "surface-entry.schema.json" SURFACES_DIR = ROOT / "registry" / "surfaces" CANDIDATES_DIR = SURFACES_DIR / "candidates" _VALIDATOR = Draft202012Validator(json.loads(SCHEMA_PATH.read_text())) TODAY = _dt.date.today().isoformat() def promoted_ids() -> set[str]: """Ids of already-promoted (non-candidate) surface entries.""" return {p.stem for p in SURFACES_DIR.glob("*.md")} def validate_entry(entry: dict) -> list[str]: return [f"{'/'.join(str(p) for p in e.path) or '(root)'}: {e.message}" for e in _VALIDATOR.iter_errors(entry)] def emit_candidate(entry: dict, *, connector: str, body: str = "") -> tuple[str, Path | None]: """Validate and write one candidate. Returns (status_message, path|None). status_message is one of: 'written', 'skipped (promoted)', 'invalid: ...'. """ entry = dict(entry) entry["status"] = "candidate" ev = dict(entry.get("evidence", {}) or {}) ev.setdefault("discovery_method", f"connector:{connector}") ev.setdefault("last_seen", TODAY) entry["evidence"] = ev sid = entry.get("id", "") if sid in promoted_ids(): return (f"skipped (promoted): {sid}", None) errs = validate_entry(entry) if errs: return (f"invalid: {sid}: {errs[0]}", None) CANDIDATES_DIR.mkdir(parents=True, exist_ok=True) fm = yaml.safe_dump(entry, sort_keys=False).strip() text = f"---\n{fm}\n---\n\n# {entry.get('name', sid)} (candidate)\n\n" text += body or ( f"Discovered by `{connector}`. Review, refine, and promote to " f"`registry/surfaces/{sid}.md` + `surfaces.yaml`, or reject.\n" ) path = CANDIDATES_DIR / f"{sid}.md" path.write_text(text) return (f"written: {sid}", path) def run_connector(name: str, candidates: list[tuple[dict, str]]) -> int: """Emit a batch; print a summary. candidates = list of (entry, body).""" if not candidates: print(f"{name}: no candidates discovered (source empty or unavailable)") return 0 written = skipped = invalid = 0 for entry, body in candidates: msg, _ = emit_candidate(entry, connector=name, body=body) print(f" {msg}") written += msg.startswith("written") skipped += msg.startswith("skipped") invalid += msg.startswith("invalid") print(f"{name}: {written} written, {skipped} skipped, {invalid} invalid " f"-> registry/surfaces/candidates/") return 1 if invalid else 0