diff --git a/docs/registry-api.md b/docs/registry-api.md index 8d58bf6..247e16b 100644 --- a/docs/registry-api.md +++ b/docs/registry-api.md @@ -29,6 +29,38 @@ GET /repositories/{repo_slug}/snapshots/diff Snapshot ingestion accepts a `FabricGraphExport` under `graph` plus `commit` and optional `generated_at`. +## Discovery Snapshots + +```text +POST /repositories/{repo_slug}/discovery-snapshots +GET /repositories/{repo_slug}/discovery-snapshots +GET /repositories/{repo_slug}/discovery-snapshots/latest +GET /repositories/{repo_slug}/discovery-snapshots/diff +GET /repositories/{repo_slug}/discovery-snapshots/{snapshot_id} +POST /repositories/{repo_slug}/discovery-snapshots/{snapshot_id}/accept +``` + +Discovery ingestion accepts a `FabricDiscoverySnapshot`. Snapshots are stored by +repo, commit, and scan profile for dry-run review. Discovery diffs include +candidate additions, removals, changes, confidence changes, reconciliation +conflicts, and scoped retirements. + +Acceptance projects only explicitly accepted discovery candidates into a normal +`FabricGraphExport` snapshot. Existing graph nodes are preserved, so repo-owned +declarations are not overwritten by discovery output. Projected nodes carry +discovery provenance, review state, confidence, and source anchors in their +attributes; graph explorer exports surface that metadata. + +CLI helpers: + +```bash +railiance-fabric registry ingest-discovery discovery.json \ + --repo-slug railiance-fabric + +railiance-fabric registry accept-discovery railiance-fabric 12 \ + --accepted-key discovery:railiance-fabric:service-declaration:example +``` + ## Graph Queries ```text diff --git a/docs/repo-reality-scanner.md b/docs/repo-reality-scanner.md index bdb5856..9ac0a00 100644 --- a/docs/repo-reality-scanner.md +++ b/docs/repo-reality-scanner.md @@ -131,6 +131,39 @@ is present in the current scan and has `mode: replacement`. Missing candidates from additive scopes, such as broad LLM evidence bundles, are left alone. Existing tombstones are preserved so repeated scans can explain graph drift. +## Registry Review And Acceptance + +Discovery snapshots can be stored in the Fabric registry for review: + +```bash +railiance-fabric scan . \ + --repo-slug railiance-fabric \ + --previous-snapshot previous-discovery.json \ + --output discovery.json + +railiance-fabric registry ingest-discovery discovery.json \ + --repo-slug railiance-fabric +``` + +The registry keeps discovery snapshots separately from accepted graph snapshots +by repo, commit, and scan profile. It exposes latest/list/diff API routes so a +dry run can be reviewed without changing the accepted graph. + +Accepted discovery can be projected into a normal graph snapshot: + +```bash +railiance-fabric registry accept-discovery railiance-fabric 12 \ + --accepted-key discovery:railiance-fabric:service-declaration:example +``` + +By default, the accept path only projects candidates already marked +`review_state: accepted`. Passing `--accepted-key` explicitly includes selected +candidate stable keys. Existing accepted graph nodes win over discovery nodes +with the same graph id, so repo-owned declarations are preserved. Projected +nodes carry discovery stable key, origin, review state, confidence, provenance, +and source anchors in graph attributes; the graph explorer payload exposes +those fields for review. + ## Connector Follow-Up Connector follow-up is explicit and separated from repo-local extraction: diff --git a/railiance_fabric/cli.py b/railiance_fabric/cli.py index ef1e0f7..d605949 100644 --- a/railiance_fabric/cli.py +++ b/railiance_fabric/cli.py @@ -122,6 +122,21 @@ def build_parser() -> argparse.ArgumentParser: cyclonedx.add_argument("--registry-url", default="http://127.0.0.1:8765") cyclonedx.add_argument("--repo-slug", required=True) cyclonedx.add_argument("--json", action="store_true", help="Print the raw ingest response.") + + discovery = registry_sub.add_parser("ingest-discovery", help="Store a discovery snapshot for review.") + discovery.add_argument("snapshot", type=Path) + discovery.add_argument("--registry-url", default="http://127.0.0.1:8765") + discovery.add_argument("--repo-slug", default=None) + discovery.add_argument("--json", action="store_true", help="Print the raw ingest response.") + + accept_discovery = registry_sub.add_parser("accept-discovery", help="Project accepted discovery candidates into a graph snapshot.") + accept_discovery.add_argument("repo_slug") + accept_discovery.add_argument("discovery_snapshot_id", type=int) + accept_discovery.add_argument("--registry-url", default="http://127.0.0.1:8765") + accept_discovery.add_argument("--accepted-key", action="append", default=[]) + accept_discovery.add_argument("--accept-review-state", action="append", default=None) + accept_discovery.add_argument("--commit", default=None) + accept_discovery.add_argument("--json", action="store_true", help="Print the raw accept response.") return parser @@ -185,6 +200,10 @@ def main(argv: list[str] | None = None) -> int: return _registry_sync_manifest(args) if args.registry_command == "ingest-cyclonedx": return _registry_ingest_cyclonedx(args) + if args.registry_command == "ingest-discovery": + return _registry_ingest_discovery(args) + if args.registry_command == "accept-discovery": + return _registry_accept_discovery(args) parser.error(f"unknown command {args.command!r}") return 2 @@ -406,6 +425,62 @@ def _registry_ingest_cyclonedx(args: argparse.Namespace) -> int: return 0 +def _registry_ingest_discovery(args: argparse.Namespace) -> int: + payload = json.loads(args.snapshot.read_text(encoding="utf-8")) + if not isinstance(payload, dict): + print(f"ERROR {args.snapshot}: discovery snapshot must be a JSON object", file=sys.stderr) + return 1 + source = payload.get("source") if isinstance(payload.get("source"), dict) else {} + repo_slug = args.repo_slug or str(source.get("repo_slug") or "").strip() + if not repo_slug: + print("ERROR discovery snapshot source.repo_slug is required unless --repo-slug is provided", file=sys.stderr) + return 1 + result = _registry_post( + args.registry_url, + f"/repositories/{repo_slug}/discovery-snapshots", + payload, + ) + if args.json: + print(json.dumps(result, indent=2, sort_keys=True)) + else: + candidates = result.get("snapshot", {}).get("candidates", {}) + counts = { + "nodes": len(candidates.get("nodes", [])), + "edges": len(candidates.get("edges", [])), + "attributes": len(candidates.get("attributes", [])), + } + print( + f"ingested discovery snapshot {result['id']} for {result['repo_slug']} " + f"({result['profile']}, {result['commit']}): " + f"{counts['nodes']} node candidate(s), {counts['edges']} edge candidate(s), " + f"{counts['attributes']} attribute candidate(s)" + ) + return 0 + + +def _registry_accept_discovery(args: argparse.Namespace) -> int: + payload = { + "accepted_keys": args.accepted_key, + "commit": args.commit, + } + if args.accept_review_state is not None: + payload["accept_review_states"] = args.accept_review_state + result = _registry_post( + args.registry_url, + f"/repositories/{args.repo_slug}/discovery-snapshots/{args.discovery_snapshot_id}/accept", + payload, + ) + if args.json: + print(json.dumps(result, indent=2, sort_keys=True)) + else: + graph_snapshot = result["graph_snapshot"] + print( + f"accepted discovery snapshot {args.discovery_snapshot_id} for {args.repo_slug}; " + f"graph snapshot {graph_snapshot['id']} stored for {graph_snapshot['commit']}" + ) + return 0 + + def _scan_repo(args: argparse.Namespace) -> int: snapshot = scan_repo( ScanOptions( diff --git a/railiance_fabric/graph_explorer.py b/railiance_fabric/graph_explorer.py index 4a9bdf1..f18c985 100644 --- a/railiance_fabric/graph_explorer.py +++ b/railiance_fabric/graph_explorer.py @@ -284,6 +284,8 @@ def fabric_graph_explorer_payload( layer = _layer_for_kind(kind) is_unresolved = node_id in unresolved attributes = node.get("attributes") if isinstance(node.get("attributes"), dict) else {} + review_state = str(attributes.get("discovery_review_state") or "accepted") + confidence = attributes.get("discovery_confidence") elements.append( { "data": { @@ -297,13 +299,24 @@ def fabric_graph_explorer_payload( "repo": str(node.get("repo", "")), "domain": str(node.get("domain", "")), "lifecycle": str(node.get("lifecycle", "")), - "reviewState": "accepted", + "reviewState": review_state, "freshnessState": "current", "unresolved": is_unresolved, - "confidence": 0.45 if is_unresolved else 1.0, + "confidence": ( + float(confidence) + if isinstance(confidence, (int, float)) + else 0.45 if is_unresolved else 1.0 + ), "visualSize": 34 if layer == "binding" else 46 if is_unresolved else 50, - "ownership": str(attributes.get("owner") or "repo"), + "ownership": str(attributes.get("owner") or attributes.get("discovery_origin") or "repo"), "attributes": attributes, + "discovery": { + "stableKey": attributes.get("discovery_stable_key", ""), + "origin": attributes.get("discovery_origin", ""), + "reviewState": attributes.get("discovery_review_state", ""), + "confidence": attributes.get("discovery_confidence", ""), + "provenance": attributes.get("discovery_provenance", []), + }, "displayState": "show", "visibilitySource": "default", "visibilityReason": "default", @@ -312,7 +325,7 @@ def fabric_graph_explorer_payload( }, "classes": " ".join( part - for part in (layer, kind, "unresolved" if is_unresolved else "accepted") + for part in (layer, kind, "unresolved" if is_unresolved else review_state) if part ), } @@ -733,6 +746,15 @@ def _source_references(node: dict[str, Any]) -> list[dict[str, str]]: for source in attributes.get("source_links", []): if isinstance(source, dict): references.append({key: str(value) for key, value in source.items()}) + for anchor in attributes.get("discovery_source_anchors", []): + if isinstance(anchor, dict): + reference = { + "label": f"Discovery {anchor.get('source_kind', 'source')}", + } + for key in ("path", "url", "ref", "json_pointer"): + if anchor.get(key): + reference[key] = str(anchor[key]) + references.append(reference) return references diff --git a/railiance_fabric/registry.py b/railiance_fabric/registry.py index fc284a1..029353f 100644 --- a/railiance_fabric/registry.py +++ b/railiance_fabric/registry.py @@ -1,6 +1,8 @@ from __future__ import annotations import json +import hashlib +import re import sqlite3 from dataclasses import dataclass from datetime import datetime, timezone @@ -50,6 +52,20 @@ class RegistryStore: create index if not exists idx_snapshots_repo_latest on snapshots(repo_slug, id desc); + create table if not exists discovery_snapshots ( + id integer primary key autoincrement, + repo_slug text not null references repositories(slug), + commit_sha text not null, + profile text not null, + generated_at text not null, + snapshot_json text not null, + accepted_graph_snapshot_id integer references snapshots(id), + created_at text not null + ); + + create index if not exists idx_discovery_snapshots_repo_latest + on discovery_snapshots(repo_slug, profile, id desc); + create table if not exists artifacts ( id integer primary key autoincrement, repo_slug text not null references repositories(slug), @@ -230,6 +246,166 @@ class RegistryStore: ).fetchall() return [_snapshot_dict(row) for row in rows] + def add_discovery_snapshot(self, repo_slug: str, payload: dict[str, Any]) -> dict[str, Any]: + self.get_repository(repo_slug) + if not isinstance(payload, dict): + raise RegistryError("discovery snapshot payload must be an object") + validate_discovery_snapshot(payload) + source = payload.get("source") if isinstance(payload.get("source"), dict) else {} + if source.get("repo_slug") and source.get("repo_slug") != repo_slug: + raise RegistryError("discovery snapshot repo_slug does not match request path") + commit = str(source.get("commit") or "working-tree") + scan = payload.get("scan") if isinstance(payload.get("scan"), dict) else {} + profile = str(scan.get("profile") or "deterministic") + generated_at = str(payload.get("generated_at") or _utc_now()) + now = _utc_now() + with self._connect() as db: + cursor = db.execute( + """ + insert into discovery_snapshots ( + repo_slug, commit_sha, profile, generated_at, snapshot_json, + accepted_graph_snapshot_id, created_at + ) + values (?, ?, ?, ?, ?, null, ?) + """, + (repo_slug, commit, profile, generated_at, json.dumps(payload, sort_keys=True), now), + ) + discovery_id = int(cursor.lastrowid) + return self.get_discovery_snapshot(discovery_id) + + def get_discovery_snapshot(self, discovery_snapshot_id: int) -> dict[str, Any]: + with self._connect() as db: + row = db.execute( + """ + select id, repo_slug, commit_sha, profile, generated_at, snapshot_json, + accepted_graph_snapshot_id, created_at + from discovery_snapshots + where id = ? + """, + (discovery_snapshot_id,), + ).fetchone() + if row is None: + raise RegistryError(f"discovery snapshot not found: {discovery_snapshot_id}", 404) + return _discovery_snapshot_dict(row) + + def list_discovery_snapshots(self, repo_slug: str, profile: str | None = None) -> list[dict[str, Any]]: + self.get_repository(repo_slug) + params: list[Any] = [repo_slug] + where = "where repo_slug = ?" + if profile: + where += " and profile = ?" + params.append(profile) + with self._connect() as db: + rows = db.execute( + f""" + select id, repo_slug, commit_sha, profile, generated_at, snapshot_json, + accepted_graph_snapshot_id, created_at + from discovery_snapshots + {where} + order by id desc + """, + params, + ).fetchall() + return [_discovery_snapshot_summary(row) for row in rows] + + def latest_discovery_snapshot(self, repo_slug: str, profile: str | None = None) -> dict[str, Any]: + self.get_repository(repo_slug) + params: list[Any] = [repo_slug] + where = "where repo_slug = ?" + if profile: + where += " and profile = ?" + params.append(profile) + with self._connect() as db: + row = db.execute( + f""" + select id, repo_slug, commit_sha, profile, generated_at, snapshot_json, + accepted_graph_snapshot_id, created_at + from discovery_snapshots + {where} + order by id desc + limit 1 + """, + params, + ).fetchone() + if row is None: + raise RegistryError(f"no discovery snapshots for repository: {repo_slug}", 404) + return _discovery_snapshot_dict(row) + + def discovery_snapshot_diff( + self, + repo_slug: str, + from_id: int | None = None, + to_id: int | None = None, + profile: str | None = None, + ) -> dict[str, Any]: + self.get_repository(repo_slug) + if from_id is None or to_id is None: + snapshots = self.list_discovery_snapshots(repo_slug, profile=profile) + if len(snapshots) < 2: + raise RegistryError(f"at least two discovery snapshots are required for diff: {repo_slug}", 404) + to_id = snapshots[0]["id"] if to_id is None else to_id + from_id = snapshots[1]["id"] if from_id is None else from_id + + before = self.get_discovery_snapshot(from_id) + after = self.get_discovery_snapshot(to_id) + if before["repo_slug"] != repo_slug or after["repo_slug"] != repo_slug: + raise RegistryError("discovery snapshot ids must belong to the requested repository") + + return { + "repo_slug": repo_slug, + "from": _discovery_snapshot_public_summary(before), + "to": _discovery_snapshot_public_summary(after), + "discovery": _discovery_diff(before["snapshot"], after["snapshot"]), + "reconciliation": after["snapshot"].get("reconciliation", {}), + } + + def accept_discovery_snapshot( + self, + repo_slug: str, + discovery_snapshot_id: int, + payload: dict[str, Any] | None = None, + ) -> dict[str, Any]: + payload = payload or {} + discovery = self.get_discovery_snapshot(discovery_snapshot_id) + if discovery["repo_slug"] != repo_slug: + raise RegistryError("discovery snapshot id must belong to the requested repository") + base_graph = self._latest_graph_or_empty(repo_slug) + accepted_keys = payload.get("accepted_keys") + if accepted_keys is not None and not isinstance(accepted_keys, list): + raise RegistryError("field 'accepted_keys' must be an array when provided") + accepted_key_set = {str(key) for key in accepted_keys or []} + accept_review_states = payload.get("accept_review_states", ["accepted"]) + if not isinstance(accept_review_states, list): + raise RegistryError("field 'accept_review_states' must be an array") + graph = _project_discovery_snapshot( + base_graph, + discovery["snapshot"], + accepted_keys=accepted_key_set, + accept_review_states={str(state) for state in accept_review_states}, + ) + snapshot = self.add_snapshot( + repo_slug, + { + "commit": str(payload.get("commit") or f"discovery:{discovery['commit']}"), + "generated_at": _utc_now(), + "graph": graph, + }, + ) + with self._connect() as db: + db.execute( + "update discovery_snapshots set accepted_graph_snapshot_id = ? where id = ?", + (snapshot["id"], discovery_snapshot_id), + ) + return { + "repo_slug": repo_slug, + "discovery_snapshot": self.get_discovery_snapshot(discovery_snapshot_id), + "graph_snapshot": snapshot, + "projected": { + "node_count": len(graph.get("nodes", [])), + "edge_count": len(graph.get("edges", [])), + }, + } + def combined_graph(self) -> dict[str, Any]: nodes: dict[str, dict[str, Any]] = {} edges: list[dict[str, str]] = [] @@ -358,17 +534,30 @@ class RegistryStore: if exc.status_code != 404: raise latest_snapshot = None + try: + latest_discovery_snapshot = self.latest_discovery_snapshot(repo_slug) + except RegistryError as exc: + if exc.status_code != 404: + raise + latest_discovery_snapshot = None graph = latest_snapshot["graph"] if latest_snapshot else _empty_graph() nodes = [node for node in graph.get("nodes", []) if isinstance(node, dict)] edges = [edge for edge in graph.get("edges", []) if isinstance(edge, dict)] artifacts = self.list_artifacts(repo_slug=repo_slug) libraries = self.list_libraries(repo_slug=repo_slug) + discovery_snapshots = self.list_discovery_snapshots(repo_slug) return { "repository": repository, "latest_snapshot": _snapshot_public_summary(latest_snapshot) if latest_snapshot else None, + "latest_discovery_snapshot": ( + _discovery_snapshot_public_summary(latest_discovery_snapshot) + if latest_discovery_snapshot + else None + ), "counts": { "snapshots": len(self.list_snapshots(repo_slug)), + "discovery_snapshots": len(discovery_snapshots), "nodes": len(nodes), "edges": len(edges), "artifacts": len(artifacts), @@ -534,6 +723,7 @@ class RegistryStore: counts = { "repositories": db.execute("select count(*) from repositories").fetchone()[0], "snapshots": db.execute("select count(*) from snapshots").fetchone()[0], + "discovery_snapshots": db.execute("select count(*) from discovery_snapshots").fetchone()[0], "artifacts": db.execute("select count(*) from artifacts").fetchone()[0], "libraries": db.execute("select count(*) from libraries").fetchone()[0], } @@ -553,6 +743,14 @@ class RegistryStore: "latest_snapshots": latest, } + def _latest_graph_or_empty(self, repo_slug: str) -> dict[str, Any]: + try: + return self.latest_snapshot(repo_slug)["graph"] + except RegistryError as exc: + if exc.status_code != 404: + raise + return _empty_graph() + def _connect(self) -> sqlite3.Connection: db = sqlite3.connect(self.path) db.row_factory = sqlite3.Row @@ -569,6 +767,16 @@ def validate_graph_export(graph: dict[str, Any]) -> None: raise RegistryError(f"invalid FabricGraphExport at {location}: {error.message}") +def validate_discovery_snapshot(snapshot: dict[str, Any]) -> None: + schema_path = repo_root() / "schemas" / "discovery-snapshot.schema.yaml" + validator = draft202012_validator(schema_path) + errors = sorted(validator.iter_errors(snapshot), key=lambda error: list(error.path)) + if errors: + error = errors[0] + location = ".".join(str(part) for part in error.path) or "" + raise RegistryError(f"invalid FabricDiscoverySnapshot at {location}: {error.message}") + + def providers(graph: dict[str, Any], capability: str) -> list[dict[str, Any]]: result = [] for node in _nodes(graph): @@ -911,6 +1119,79 @@ def _snapshot_public_summary(snapshot: dict[str, Any]) -> dict[str, Any]: } +def _discovery_snapshot_dict(row: sqlite3.Row) -> dict[str, Any]: + snapshot = json.loads(row["snapshot_json"]) + return { + "id": row["id"], + "repo_slug": row["repo_slug"], + "commit": row["commit_sha"], + "profile": row["profile"], + "generated_at": row["generated_at"], + "snapshot": snapshot, + "accepted_graph_snapshot_id": row["accepted_graph_snapshot_id"], + "created_at": row["created_at"], + } + + +def _discovery_snapshot_summary(row: sqlite3.Row) -> dict[str, Any]: + snapshot = json.loads(row["snapshot_json"]) + candidates = snapshot.get("candidates") if isinstance(snapshot.get("candidates"), dict) else {} + reconciliation = snapshot.get("reconciliation") if isinstance(snapshot.get("reconciliation"), dict) else {} + diff = reconciliation.get("diff") if isinstance(reconciliation.get("diff"), dict) else {} + return { + "id": row["id"], + "repo_slug": row["repo_slug"], + "commit": row["commit_sha"], + "profile": row["profile"], + "generated_at": row["generated_at"], + "accepted_graph_snapshot_id": row["accepted_graph_snapshot_id"], + "created_at": row["created_at"], + "candidate_counts": { + "nodes": len(candidates.get("nodes", [])) if isinstance(candidates.get("nodes"), list) else 0, + "edges": len(candidates.get("edges", [])) if isinstance(candidates.get("edges"), list) else 0, + "attributes": len(candidates.get("attributes", [])) if isinstance(candidates.get("attributes"), list) else 0, + }, + "diff_counts": { + "added": len(diff.get("added", [])) if isinstance(diff.get("added"), list) else 0, + "changed": len(diff.get("changed", [])) if isinstance(diff.get("changed"), list) else 0, + "retired": len(diff.get("retired", [])) if isinstance(diff.get("retired"), list) else 0, + "conflicted": len(diff.get("conflicted", [])) if isinstance(diff.get("conflicted"), list) else 0, + }, + "review_artifact_count": len(snapshot.get("review_artifacts", [])) + if isinstance(snapshot.get("review_artifacts"), list) + else 0, + "connector_run_count": len(snapshot.get("connector_runs", [])) + if isinstance(snapshot.get("connector_runs"), list) + else 0, + } + + +def _discovery_snapshot_public_summary(snapshot: dict[str, Any]) -> dict[str, Any]: + candidates = snapshot["snapshot"].get("candidates", {}) + reconciliation = snapshot["snapshot"].get("reconciliation", {}) + diff = reconciliation.get("diff", {}) if isinstance(reconciliation, dict) else {} + return { + "id": snapshot["id"], + "repo_slug": snapshot["repo_slug"], + "commit": snapshot["commit"], + "profile": snapshot["profile"], + "generated_at": snapshot["generated_at"], + "accepted_graph_snapshot_id": snapshot["accepted_graph_snapshot_id"], + "created_at": snapshot["created_at"], + "candidate_counts": { + "nodes": len(candidates.get("nodes", [])) if isinstance(candidates.get("nodes"), list) else 0, + "edges": len(candidates.get("edges", [])) if isinstance(candidates.get("edges"), list) else 0, + "attributes": len(candidates.get("attributes", [])) if isinstance(candidates.get("attributes"), list) else 0, + }, + "diff_counts": { + "added": len(diff.get("added", [])) if isinstance(diff.get("added"), list) else 0, + "changed": len(diff.get("changed", [])) if isinstance(diff.get("changed"), list) else 0, + "retired": len(diff.get("retired", [])) if isinstance(diff.get("retired"), list) else 0, + "conflicted": len(diff.get("conflicted", [])) if isinstance(diff.get("conflicted"), list) else 0, + }, + } + + def _row_dict(row: sqlite3.Row) -> dict[str, Any]: return {key: row[key] for key in row.keys()} @@ -1027,6 +1308,186 @@ def _empty_graph() -> dict[str, Any]: } +def _discovery_diff(before: dict[str, Any], after: dict[str, Any]) -> dict[str, Any]: + result: dict[str, Any] = {} + for collection in ("nodes", "edges", "attributes"): + before_items = _discovery_candidates_by_key(before, collection) + after_items = _discovery_candidates_by_key(after, collection) + added = sorted(set(after_items) - set(before_items)) + removed = sorted(set(before_items) - set(after_items)) + common = sorted(set(before_items) & set(after_items)) + changed = [ + key for key in common + if _stable_json(_without_review_noise(before_items[key])) != _stable_json(_without_review_noise(after_items[key])) + ] + confidence_changed = [ + { + "stable_key": key, + "before": before_items[key].get("confidence"), + "after": after_items[key].get("confidence"), + } + for key in common + if before_items[key].get("confidence") != after_items[key].get("confidence") + ] + result[collection] = { + "added": [after_items[key] for key in added], + "removed": [before_items[key] for key in removed], + "changed": [ + { + "stable_key": key, + "before": before_items[key], + "after": after_items[key], + } + for key in changed + ], + "confidence_changed": confidence_changed, + } + after_reconciliation = after.get("reconciliation") if isinstance(after.get("reconciliation"), dict) else {} + result["review"] = { + "retired": after_reconciliation.get("diff", {}).get("retired", []) + if isinstance(after_reconciliation.get("diff"), dict) + else [], + "conflicted": after_reconciliation.get("diff", {}).get("conflicted", []) + if isinstance(after_reconciliation.get("diff"), dict) + else [], + "conflicts": after_reconciliation.get("conflicts", []), + } + return result + + +def _project_discovery_snapshot( + base_graph: dict[str, Any], + discovery_snapshot: dict[str, Any], + *, + accepted_keys: set[str], + accept_review_states: set[str], +) -> dict[str, Any]: + graph = json.loads(json.dumps(base_graph)) + graph.setdefault("apiVersion", "railiance.fabric/v1alpha1") + graph.setdefault("kind", "FabricGraphExport") + graph.setdefault("nodes", []) + graph.setdefault("edges", []) + candidates = discovery_snapshot.get("candidates") if isinstance(discovery_snapshot.get("candidates"), dict) else {} + candidate_nodes = [ + node for node in candidates.get("nodes", []) + if isinstance(node, dict) and _candidate_is_accepted(node, accepted_keys, accept_review_states) + ] + candidate_edges = [ + edge for edge in candidates.get("edges", []) + if isinstance(edge, dict) and _candidate_is_accepted(edge, accepted_keys, accept_review_states) + ] + existing_nodes = { + str(node.get("id")): node + for node in graph.get("nodes", []) + if isinstance(node, dict) and node.get("id") + } + key_to_graph_id = { + str(node.get("id")): str(node.get("id")) + for node in graph.get("nodes", []) + if isinstance(node, dict) and node.get("id") + } + for candidate in candidate_nodes: + graph_id = _candidate_graph_id(candidate) + key_to_graph_id[str(candidate.get("stable_key"))] = graph_id + if candidate.get("graph_id"): + key_to_graph_id[str(candidate.get("graph_id"))] = graph_id + if graph_id in existing_nodes: + continue + projected = _project_candidate_node(candidate, graph_id) + existing_nodes[graph_id] = projected + graph["nodes"].append(projected) + + existing_edges = { + _edge_key(edge) + for edge in graph.get("edges", []) + if isinstance(edge, dict) + } + for candidate in candidate_edges: + source = key_to_graph_id.get(str(candidate.get("source_key") or "")) + target = key_to_graph_id.get(str(candidate.get("target_key") or "")) + if not source or not target: + continue + edge = {"from": source, "to": target, "type": str(candidate.get("edge_type") or "")} + edge_key = _edge_key(edge) + if edge["type"] and edge_key not in existing_edges: + existing_edges.add(edge_key) + graph["edges"].append(edge) + validate_graph_export(graph) + return graph + + +def _project_candidate_node(candidate: dict[str, Any], graph_id: str) -> dict[str, Any]: + attributes = candidate.get("attributes") if isinstance(candidate.get("attributes"), dict) else {} + return { + "id": graph_id, + "kind": str(candidate.get("kind") or "DiscoveredEntity"), + "name": str(candidate.get("label") or graph_id), + "repo": str(candidate.get("repo") or ""), + "domain": str(candidate.get("domain") or ""), + "lifecycle": str(candidate.get("lifecycle") or "active"), + "attributes": { + **attributes, + "discovery_stable_key": candidate.get("stable_key"), + "discovery_origin": candidate.get("origin"), + "discovery_review_state": candidate.get("review_state"), + "discovery_confidence": candidate.get("confidence"), + "discovery_source_anchors": candidate.get("source_anchors", []), + "discovery_provenance": candidate.get("provenance", []), + }, + } + + +def _candidate_graph_id(candidate: dict[str, Any]) -> str: + graph_id = str(candidate.get("graph_id") or "").strip() + if graph_id: + return graph_id + repo = _graph_id_part(str(candidate.get("repo") or "repo")) + kind = _graph_id_part(str(candidate.get("kind") or "entity")) + label = _graph_id_part(str(candidate.get("label") or candidate.get("stable_key") or "candidate")) + candidate_id = f"{repo}.{kind}.{label}".strip(".") + if len(candidate_id) > 150: + digest = hashlib.sha256(str(candidate.get("stable_key") or candidate_id).encode("utf-8")).hexdigest()[:10] + candidate_id = f"{candidate_id[:139].rstrip('.')}.{digest}" + if len(candidate_id) < 3: + candidate_id = f"{candidate_id}.id" + return candidate_id + + +def _graph_id_part(value: str) -> str: + text = re.sub(r"[^a-z0-9.-]+", "-", value.lower()).strip(".-") + text = re.sub(r"-+", "-", text) + text = re.sub(r"\.+", ".", text) + return text or "unknown" + + +def _candidate_is_accepted( + candidate: dict[str, Any], + accepted_keys: set[str], + accept_review_states: set[str], +) -> bool: + stable_key = str(candidate.get("stable_key") or "") + if stable_key in accepted_keys: + return True + return str(candidate.get("review_state") or "") in accept_review_states + + +def _discovery_candidates_by_key(snapshot: dict[str, Any], collection: str) -> dict[str, dict[str, Any]]: + candidates = snapshot.get("candidates") if isinstance(snapshot.get("candidates"), dict) else {} + return { + str(item["stable_key"]): item + for item in candidates.get(collection, []) + if isinstance(item, dict) and item.get("stable_key") + } + + +def _without_review_noise(candidate: dict[str, Any]) -> dict[str, Any]: + return { + key: value + for key, value in candidate.items() + if key not in {"provenance"} + } + + def _graph_diff(before: dict[str, Any], after: dict[str, Any]) -> dict[str, Any]: before_nodes = _nodes_by_id(before) after_nodes = _nodes_by_id(after) diff --git a/railiance_fabric/server.py b/railiance_fabric/server.py index c4c4917..a70a3df 100644 --- a/railiance_fabric/server.py +++ b/railiance_fabric/server.py @@ -67,6 +67,28 @@ class RegistryHandler(BaseHTTPRequestHandler): from_id=_query_optional_int(query, "from_id"), to_id=_query_optional_int(query, "to_id"), ) + if len(parts) == 3 and parts[0] == "repositories" and parts[2] == "discovery-snapshots": + return HTTPStatus.OK, self.store.list_discovery_snapshots( + parts[1], + profile=_query_optional(query, "profile"), + ) + if len(parts) == 4 and parts[0] == "repositories" and parts[2] == "discovery-snapshots" and parts[3] == "latest": + return HTTPStatus.OK, self.store.latest_discovery_snapshot( + parts[1], + profile=_query_optional(query, "profile"), + ) + if len(parts) == 4 and parts[0] == "repositories" and parts[2] == "discovery-snapshots" and parts[3] == "diff": + return HTTPStatus.OK, self.store.discovery_snapshot_diff( + parts[1], + from_id=_query_optional_int(query, "from_id"), + to_id=_query_optional_int(query, "to_id"), + profile=_query_optional(query, "profile"), + ) + if len(parts) == 4 and parts[0] == "repositories" and parts[2] == "discovery-snapshots": + snapshot = self.store.get_discovery_snapshot(_int_id(parts[3], "discovery_snapshot_id")) + if snapshot["repo_slug"] != parts[1]: + raise RegistryError("discovery snapshot id must belong to the requested repository", 404) + return HTTPStatus.OK, snapshot if parts == ["search"]: return HTTPStatus.OK, self.store.search(_query_one(query, "q")) if parts == ["graph", "nodes"]: @@ -127,6 +149,14 @@ class RegistryHandler(BaseHTTPRequestHandler): return HTTPStatus.CREATED, self.store.upsert_repository(body) if len(parts) == 3 and parts[0] == "repositories" and parts[2] == "snapshots": return HTTPStatus.CREATED, self.store.add_snapshot(parts[1], body) + if len(parts) == 3 and parts[0] == "repositories" and parts[2] == "discovery-snapshots": + return HTTPStatus.CREATED, self.store.add_discovery_snapshot(parts[1], body) + if len(parts) == 5 and parts[0] == "repositories" and parts[2] == "discovery-snapshots" and parts[4] == "accept": + return HTTPStatus.CREATED, self.store.accept_discovery_snapshot( + parts[1], + _int_id(parts[3], "discovery_snapshot_id"), + body, + ) if len(parts) == 4 and parts[0] == "repositories" and parts[2] == "libraries" and parts[3] == "cyclonedx": return HTTPStatus.CREATED, self.store.ingest_cyclonedx(parts[1], body) if parts == ["artifacts"]: diff --git a/tests/test_discovery_registry.py b/tests/test_discovery_registry.py new file mode 100644 index 0000000..651f857 --- /dev/null +++ b/tests/test_discovery_registry.py @@ -0,0 +1,282 @@ +from __future__ import annotations + +import json +import threading +import urllib.request +from http.server import ThreadingHTTPServer +from pathlib import Path + +from railiance_fabric.cli import main as cli_main +from railiance_fabric.discovery import ( + attribute_stable_key, + discovery_stable_key, + relationship_stable_key, + replacement_scope_id, + source_fingerprint, +) +from railiance_fabric.registry import RegistryStore +from railiance_fabric.server import RegistryHandler + + +def test_registry_stores_diffs_and_accepts_discovery_snapshots(tmp_path: Path) -> None: + store = RegistryStore(tmp_path / "registry.sqlite3") + store.init_schema() + store.upsert_repository({"slug": "fixture-repo", "name": "Fixture Repo"}) + base_snapshot = store.add_snapshot( + "fixture-repo", + { + "commit": "base", + "graph": _base_graph(), + }, + ) + + first = _discovery_snapshot("disc-1", accepted_label="Discovered API", confidence=0.8) + second = _discovery_snapshot("disc-2", accepted_label="Discovered API", confidence=0.95, extra=True) + stored_first = store.add_discovery_snapshot("fixture-repo", first) + stored_second = store.add_discovery_snapshot("fixture-repo", second) + + assert stored_first["profile"] == "deterministic" + assert store.latest_discovery_snapshot("fixture-repo")["id"] == stored_second["id"] + assert store.repository_inventory("fixture-repo")["counts"]["discovery_snapshots"] == 2 + assert store.status()["counts"]["discovery_snapshots"] == 2 + + diff = store.discovery_snapshot_diff("fixture-repo") + assert diff["from"]["id"] == stored_first["id"] + assert diff["to"]["id"] == stored_second["id"] + assert diff["discovery"]["nodes"]["confidence_changed"][0]["before"] == 0.8 + assert diff["discovery"]["nodes"]["confidence_changed"][0]["after"] == 0.95 + assert diff["discovery"]["nodes"]["added"][0]["label"] == "Extra Accepted Capability" + + accepted = store.accept_discovery_snapshot("fixture-repo", stored_second["id"]) + graph_snapshot = accepted["graph_snapshot"] + nodes_by_id = {node["id"]: node for node in graph_snapshot["graph"]["nodes"]} + assert graph_snapshot["id"] > base_snapshot["id"] + assert nodes_by_id["fixture.repo"]["name"] == "Fixture Repo" + assert nodes_by_id["fixture.discovered-api"]["attributes"]["discovery_review_state"] == "accepted" + assert nodes_by_id["fixture.extra-capability"]["attributes"]["discovery_confidence"] == 0.7 + assert store.get_discovery_snapshot(stored_second["id"])["accepted_graph_snapshot_id"] == graph_snapshot["id"] + + +def test_discovery_snapshot_http_and_cli_paths(tmp_path: Path, capsys) -> None: + store = RegistryStore(tmp_path / "registry.sqlite3") + store.init_schema() + store.upsert_repository({"slug": "fixture-repo", "name": "Fixture Repo"}) + store.add_snapshot("fixture-repo", {"commit": "base", "graph": _base_graph()}) + + class Handler(RegistryHandler): + pass + + Handler.store = store + server = ThreadingHTTPServer(("127.0.0.1", 0), Handler) + thread = threading.Thread(target=server.serve_forever, daemon=True) + thread.start() + try: + base_url = f"http://127.0.0.1:{server.server_port}" + snapshot_path = tmp_path / "discovery.json" + snapshot_path.write_text(json.dumps(_discovery_snapshot("disc-http")), encoding="utf-8") + assert cli_main( + [ + "registry", + "ingest-discovery", + str(snapshot_path), + "--registry-url", + base_url, + ] + ) == 0 + ingest_summary = capsys.readouterr().out + assert "ingested discovery snapshot 1 for fixture-repo" in ingest_summary + + with urllib.request.urlopen( + f"{base_url}/repositories/fixture-repo/discovery-snapshots/latest", + timeout=5, + ) as response: + latest = json.loads(response.read()) + with urllib.request.urlopen( + f"{base_url}/repositories/fixture-repo/inventory", + timeout=5, + ) as response: + inventory = json.loads(response.read()) + + assert latest["snapshot"]["kind"] == "FabricDiscoverySnapshot" + assert inventory["latest_discovery_snapshot"]["id"] == latest["id"] + assert cli_main( + [ + "registry", + "accept-discovery", + "fixture-repo", + str(latest["id"]), + "--registry-url", + base_url, + ] + ) == 0 + accept_summary = capsys.readouterr().out + assert "accepted discovery snapshot" in accept_summary + + with urllib.request.urlopen(f"{base_url}/exports/graph-explorer", timeout=5) as response: + explorer = json.loads(response.read()) + discovered = next( + element for element in explorer["elements"] + if element["data"].get("id") == "fixture.discovered-api" + ) + assert discovered["data"]["reviewState"] == "accepted" + assert discovered["data"]["discovery"]["origin"] == "deterministic" + assert discovered["data"]["sourceReferences"][0]["label"].startswith("Discovery") + finally: + server.shutdown() + server.server_close() + thread.join(timeout=5) + + +def _base_graph() -> dict[str, object]: + return { + "apiVersion": "railiance.fabric/v1alpha1", + "kind": "FabricGraphExport", + "nodes": [ + { + "id": "fixture.repo", + "kind": "Repository", + "name": "Fixture Repo", + "repo": "fixture-repo", + "domain": "testing", + "lifecycle": "active", + "attributes": {}, + } + ], + "edges": [], + } + + +def _discovery_snapshot( + commit: str, + *, + accepted_label: str = "Discovered API", + confidence: float = 0.8, + extra: bool = False, +) -> dict[str, object]: + repo_key = discovery_stable_key("fixture-repo", "Repository", "fixture-repo") + service_key = discovery_stable_key("fixture-repo", "ServiceDeclaration", accepted_label) + scope_id = replacement_scope_id("fixture-repo", "fixture", "file", source_path="README.md") + anchor = { + "source_kind": "file", + "path": "README.md", + "fingerprint": source_fingerprint({"source_kind": "file", "path": "README.md"}), + } + provenance = { + "extractor_id": "fixture", + "extractor_version": "0.1.0", + "method": "deterministic", + "origin": "deterministic", + } + nodes = [ + { + "stable_key": repo_key, + "graph_id": "fixture.repo", + "kind": "Repository", + "label": "Do Not Overwrite", + "repo": "fixture-repo", + "domain": "testing", + "aliases": ["fixture-repo"], + "origin": "deterministic", + "review_state": "accepted", + "status": "active", + "confidence": 1.0, + "replacement_scope": scope_id, + "provenance": [provenance], + "source_anchors": [anchor], + }, + { + "stable_key": service_key, + "graph_id": "fixture.discovered-api", + "kind": "ServiceDeclaration", + "label": accepted_label, + "repo": "fixture-repo", + "domain": "testing", + "lifecycle": "active", + "aliases": [accepted_label], + "attributes": {"description": "Accepted discovery candidate."}, + "origin": "deterministic", + "review_state": "accepted", + "status": "active", + "confidence": confidence, + "replacement_scope": scope_id, + "provenance": [provenance], + "source_anchors": [anchor], + }, + ] + edges = [ + { + "stable_key": relationship_stable_key(repo_key, "declares", service_key), + "edge_type": "declares", + "source_key": repo_key, + "target_key": service_key, + "origin": "deterministic", + "review_state": "accepted", + "status": "active", + "confidence": 0.8, + "replacement_scope": scope_id, + "provenance": [provenance], + "source_anchors": [anchor], + } + ] + attributes = [ + { + "stable_key": attribute_stable_key(service_key, "description"), + "entity_key": service_key, + "name": "description", + "value": "Accepted discovery candidate.", + "origin": "deterministic", + "review_state": "accepted", + "confidence": confidence, + "replacement_scope": scope_id, + "provenance": [provenance], + "source_anchors": [anchor], + } + ] + if extra: + extra_key = discovery_stable_key("fixture-repo", "CapabilityDeclaration", "Extra Accepted Capability") + nodes.append( + { + "stable_key": extra_key, + "graph_id": "fixture.extra-capability", + "kind": "CapabilityDeclaration", + "label": "Extra Accepted Capability", + "repo": "fixture-repo", + "domain": "testing", + "lifecycle": "active", + "origin": "deterministic", + "review_state": "accepted", + "status": "active", + "confidence": 0.7, + "replacement_scope": scope_id, + "provenance": [provenance], + "source_anchors": [anchor], + } + ) + return { + "apiVersion": "railiance.fabric/v1alpha1", + "kind": "FabricDiscoverySnapshot", + "generated_at": "2026-05-19T00:00:00Z", + "source": {"repo_slug": "fixture-repo", "repo_name": "Fixture Repo", "commit": commit}, + "scan": { + "run_id": f"scan:fixture-repo:{commit}", + "profile": "deterministic", + "deterministic_only": True, + "llm_enabled": False, + }, + "replacement_scopes": [ + { + "id": scope_id, + "extractor_id": "fixture", + "source_kind": "file", + "source_path": "README.md", + "mode": "replacement", + } + ], + "candidates": {"nodes": nodes, "edges": edges, "attributes": attributes}, + "tombstones": [], + "reconciliation": { + "precedence": ["repo_declaration", "deterministic", "catalog", "registry", "llm", "manual"], + "duplicate_policy": "stable-key matches merge automatically", + "retirement_policy": "missing candidates retire only inside their replacement scope", + }, + } diff --git a/workplans/RAIL-FAB-WP-0010-repo-reality-scanner.md b/workplans/RAIL-FAB-WP-0010-repo-reality-scanner.md index 0ed85ee..9efe480 100644 --- a/workplans/RAIL-FAB-WP-0010-repo-reality-scanner.md +++ b/workplans/RAIL-FAB-WP-0010-repo-reality-scanner.md @@ -249,7 +249,7 @@ Acceptance notes: ```task id: RAIL-FAB-WP-0010-T06 -status: todo +status: done priority: high state_hub_task_id: "9a8420f1-0072-4f40-8d0f-775f59cbe772" ```