Add discovery registry review flow

This commit is contained in:
2026-05-19 15:40:57 +02:00
parent 0b093741e2
commit 68cf01aa39
8 changed files with 940 additions and 5 deletions

View File

@@ -29,6 +29,38 @@ GET /repositories/{repo_slug}/snapshots/diff
Snapshot ingestion accepts a `FabricGraphExport` under `graph` plus `commit`
and optional `generated_at`.
## Discovery Snapshots
```text
POST /repositories/{repo_slug}/discovery-snapshots
GET /repositories/{repo_slug}/discovery-snapshots
GET /repositories/{repo_slug}/discovery-snapshots/latest
GET /repositories/{repo_slug}/discovery-snapshots/diff
GET /repositories/{repo_slug}/discovery-snapshots/{snapshot_id}
POST /repositories/{repo_slug}/discovery-snapshots/{snapshot_id}/accept
```
Discovery ingestion accepts a `FabricDiscoverySnapshot`. Snapshots are stored by
repo, commit, and scan profile for dry-run review. Discovery diffs include
candidate additions, removals, changes, confidence changes, reconciliation
conflicts, and scoped retirements.
Acceptance projects only explicitly accepted discovery candidates into a normal
`FabricGraphExport` snapshot. Existing graph nodes are preserved, so repo-owned
declarations are not overwritten by discovery output. Projected nodes carry
discovery provenance, review state, confidence, and source anchors in their
attributes; graph explorer exports surface that metadata.
CLI helpers:
```bash
railiance-fabric registry ingest-discovery discovery.json \
--repo-slug railiance-fabric
railiance-fabric registry accept-discovery railiance-fabric 12 \
--accepted-key discovery:railiance-fabric:service-declaration:example
```
## Graph Queries
```text

View File

@@ -131,6 +131,39 @@ is present in the current scan and has `mode: replacement`. Missing candidates
from additive scopes, such as broad LLM evidence bundles, are left alone.
Existing tombstones are preserved so repeated scans can explain graph drift.
## Registry Review And Acceptance
Discovery snapshots can be stored in the Fabric registry for review:
```bash
railiance-fabric scan . \
--repo-slug railiance-fabric \
--previous-snapshot previous-discovery.json \
--output discovery.json
railiance-fabric registry ingest-discovery discovery.json \
--repo-slug railiance-fabric
```
The registry keeps discovery snapshots separately from accepted graph snapshots
by repo, commit, and scan profile. It exposes latest/list/diff API routes so a
dry run can be reviewed without changing the accepted graph.
Accepted discovery can be projected into a normal graph snapshot:
```bash
railiance-fabric registry accept-discovery railiance-fabric 12 \
--accepted-key discovery:railiance-fabric:service-declaration:example
```
By default, the accept path only projects candidates already marked
`review_state: accepted`. Passing `--accepted-key` explicitly includes selected
candidate stable keys. Existing accepted graph nodes win over discovery nodes
with the same graph id, so repo-owned declarations are preserved. Projected
nodes carry discovery stable key, origin, review state, confidence, provenance,
and source anchors in graph attributes; the graph explorer payload exposes
those fields for review.
## Connector Follow-Up
Connector follow-up is explicit and separated from repo-local extraction:

View File

@@ -122,6 +122,21 @@ def build_parser() -> argparse.ArgumentParser:
cyclonedx.add_argument("--registry-url", default="http://127.0.0.1:8765")
cyclonedx.add_argument("--repo-slug", required=True)
cyclonedx.add_argument("--json", action="store_true", help="Print the raw ingest response.")
discovery = registry_sub.add_parser("ingest-discovery", help="Store a discovery snapshot for review.")
discovery.add_argument("snapshot", type=Path)
discovery.add_argument("--registry-url", default="http://127.0.0.1:8765")
discovery.add_argument("--repo-slug", default=None)
discovery.add_argument("--json", action="store_true", help="Print the raw ingest response.")
accept_discovery = registry_sub.add_parser("accept-discovery", help="Project accepted discovery candidates into a graph snapshot.")
accept_discovery.add_argument("repo_slug")
accept_discovery.add_argument("discovery_snapshot_id", type=int)
accept_discovery.add_argument("--registry-url", default="http://127.0.0.1:8765")
accept_discovery.add_argument("--accepted-key", action="append", default=[])
accept_discovery.add_argument("--accept-review-state", action="append", default=None)
accept_discovery.add_argument("--commit", default=None)
accept_discovery.add_argument("--json", action="store_true", help="Print the raw accept response.")
return parser
@@ -185,6 +200,10 @@ def main(argv: list[str] | None = None) -> int:
return _registry_sync_manifest(args)
if args.registry_command == "ingest-cyclonedx":
return _registry_ingest_cyclonedx(args)
if args.registry_command == "ingest-discovery":
return _registry_ingest_discovery(args)
if args.registry_command == "accept-discovery":
return _registry_accept_discovery(args)
parser.error(f"unknown command {args.command!r}")
return 2
@@ -406,6 +425,62 @@ def _registry_ingest_cyclonedx(args: argparse.Namespace) -> int:
return 0
def _registry_ingest_discovery(args: argparse.Namespace) -> int:
payload = json.loads(args.snapshot.read_text(encoding="utf-8"))
if not isinstance(payload, dict):
print(f"ERROR {args.snapshot}: discovery snapshot must be a JSON object", file=sys.stderr)
return 1
source = payload.get("source") if isinstance(payload.get("source"), dict) else {}
repo_slug = args.repo_slug or str(source.get("repo_slug") or "").strip()
if not repo_slug:
print("ERROR discovery snapshot source.repo_slug is required unless --repo-slug is provided", file=sys.stderr)
return 1
result = _registry_post(
args.registry_url,
f"/repositories/{repo_slug}/discovery-snapshots",
payload,
)
if args.json:
print(json.dumps(result, indent=2, sort_keys=True))
else:
candidates = result.get("snapshot", {}).get("candidates", {})
counts = {
"nodes": len(candidates.get("nodes", [])),
"edges": len(candidates.get("edges", [])),
"attributes": len(candidates.get("attributes", [])),
}
print(
f"ingested discovery snapshot {result['id']} for {result['repo_slug']} "
f"({result['profile']}, {result['commit']}): "
f"{counts['nodes']} node candidate(s), {counts['edges']} edge candidate(s), "
f"{counts['attributes']} attribute candidate(s)"
)
return 0
def _registry_accept_discovery(args: argparse.Namespace) -> int:
payload = {
"accepted_keys": args.accepted_key,
"commit": args.commit,
}
if args.accept_review_state is not None:
payload["accept_review_states"] = args.accept_review_state
result = _registry_post(
args.registry_url,
f"/repositories/{args.repo_slug}/discovery-snapshots/{args.discovery_snapshot_id}/accept",
payload,
)
if args.json:
print(json.dumps(result, indent=2, sort_keys=True))
else:
graph_snapshot = result["graph_snapshot"]
print(
f"accepted discovery snapshot {args.discovery_snapshot_id} for {args.repo_slug}; "
f"graph snapshot {graph_snapshot['id']} stored for {graph_snapshot['commit']}"
)
return 0
def _scan_repo(args: argparse.Namespace) -> int:
snapshot = scan_repo(
ScanOptions(

View File

@@ -284,6 +284,8 @@ def fabric_graph_explorer_payload(
layer = _layer_for_kind(kind)
is_unresolved = node_id in unresolved
attributes = node.get("attributes") if isinstance(node.get("attributes"), dict) else {}
review_state = str(attributes.get("discovery_review_state") or "accepted")
confidence = attributes.get("discovery_confidence")
elements.append(
{
"data": {
@@ -297,13 +299,24 @@ def fabric_graph_explorer_payload(
"repo": str(node.get("repo", "")),
"domain": str(node.get("domain", "")),
"lifecycle": str(node.get("lifecycle", "")),
"reviewState": "accepted",
"reviewState": review_state,
"freshnessState": "current",
"unresolved": is_unresolved,
"confidence": 0.45 if is_unresolved else 1.0,
"confidence": (
float(confidence)
if isinstance(confidence, (int, float))
else 0.45 if is_unresolved else 1.0
),
"visualSize": 34 if layer == "binding" else 46 if is_unresolved else 50,
"ownership": str(attributes.get("owner") or "repo"),
"ownership": str(attributes.get("owner") or attributes.get("discovery_origin") or "repo"),
"attributes": attributes,
"discovery": {
"stableKey": attributes.get("discovery_stable_key", ""),
"origin": attributes.get("discovery_origin", ""),
"reviewState": attributes.get("discovery_review_state", ""),
"confidence": attributes.get("discovery_confidence", ""),
"provenance": attributes.get("discovery_provenance", []),
},
"displayState": "show",
"visibilitySource": "default",
"visibilityReason": "default",
@@ -312,7 +325,7 @@ def fabric_graph_explorer_payload(
},
"classes": " ".join(
part
for part in (layer, kind, "unresolved" if is_unresolved else "accepted")
for part in (layer, kind, "unresolved" if is_unresolved else review_state)
if part
),
}
@@ -733,6 +746,15 @@ def _source_references(node: dict[str, Any]) -> list[dict[str, str]]:
for source in attributes.get("source_links", []):
if isinstance(source, dict):
references.append({key: str(value) for key, value in source.items()})
for anchor in attributes.get("discovery_source_anchors", []):
if isinstance(anchor, dict):
reference = {
"label": f"Discovery {anchor.get('source_kind', 'source')}",
}
for key in ("path", "url", "ref", "json_pointer"):
if anchor.get(key):
reference[key] = str(anchor[key])
references.append(reference)
return references

View File

@@ -1,6 +1,8 @@
from __future__ import annotations
import json
import hashlib
import re
import sqlite3
from dataclasses import dataclass
from datetime import datetime, timezone
@@ -50,6 +52,20 @@ class RegistryStore:
create index if not exists idx_snapshots_repo_latest
on snapshots(repo_slug, id desc);
create table if not exists discovery_snapshots (
id integer primary key autoincrement,
repo_slug text not null references repositories(slug),
commit_sha text not null,
profile text not null,
generated_at text not null,
snapshot_json text not null,
accepted_graph_snapshot_id integer references snapshots(id),
created_at text not null
);
create index if not exists idx_discovery_snapshots_repo_latest
on discovery_snapshots(repo_slug, profile, id desc);
create table if not exists artifacts (
id integer primary key autoincrement,
repo_slug text not null references repositories(slug),
@@ -230,6 +246,166 @@ class RegistryStore:
).fetchall()
return [_snapshot_dict(row) for row in rows]
def add_discovery_snapshot(self, repo_slug: str, payload: dict[str, Any]) -> dict[str, Any]:
self.get_repository(repo_slug)
if not isinstance(payload, dict):
raise RegistryError("discovery snapshot payload must be an object")
validate_discovery_snapshot(payload)
source = payload.get("source") if isinstance(payload.get("source"), dict) else {}
if source.get("repo_slug") and source.get("repo_slug") != repo_slug:
raise RegistryError("discovery snapshot repo_slug does not match request path")
commit = str(source.get("commit") or "working-tree")
scan = payload.get("scan") if isinstance(payload.get("scan"), dict) else {}
profile = str(scan.get("profile") or "deterministic")
generated_at = str(payload.get("generated_at") or _utc_now())
now = _utc_now()
with self._connect() as db:
cursor = db.execute(
"""
insert into discovery_snapshots (
repo_slug, commit_sha, profile, generated_at, snapshot_json,
accepted_graph_snapshot_id, created_at
)
values (?, ?, ?, ?, ?, null, ?)
""",
(repo_slug, commit, profile, generated_at, json.dumps(payload, sort_keys=True), now),
)
discovery_id = int(cursor.lastrowid)
return self.get_discovery_snapshot(discovery_id)
def get_discovery_snapshot(self, discovery_snapshot_id: int) -> dict[str, Any]:
with self._connect() as db:
row = db.execute(
"""
select id, repo_slug, commit_sha, profile, generated_at, snapshot_json,
accepted_graph_snapshot_id, created_at
from discovery_snapshots
where id = ?
""",
(discovery_snapshot_id,),
).fetchone()
if row is None:
raise RegistryError(f"discovery snapshot not found: {discovery_snapshot_id}", 404)
return _discovery_snapshot_dict(row)
def list_discovery_snapshots(self, repo_slug: str, profile: str | None = None) -> list[dict[str, Any]]:
self.get_repository(repo_slug)
params: list[Any] = [repo_slug]
where = "where repo_slug = ?"
if profile:
where += " and profile = ?"
params.append(profile)
with self._connect() as db:
rows = db.execute(
f"""
select id, repo_slug, commit_sha, profile, generated_at, snapshot_json,
accepted_graph_snapshot_id, created_at
from discovery_snapshots
{where}
order by id desc
""",
params,
).fetchall()
return [_discovery_snapshot_summary(row) for row in rows]
def latest_discovery_snapshot(self, repo_slug: str, profile: str | None = None) -> dict[str, Any]:
self.get_repository(repo_slug)
params: list[Any] = [repo_slug]
where = "where repo_slug = ?"
if profile:
where += " and profile = ?"
params.append(profile)
with self._connect() as db:
row = db.execute(
f"""
select id, repo_slug, commit_sha, profile, generated_at, snapshot_json,
accepted_graph_snapshot_id, created_at
from discovery_snapshots
{where}
order by id desc
limit 1
""",
params,
).fetchone()
if row is None:
raise RegistryError(f"no discovery snapshots for repository: {repo_slug}", 404)
return _discovery_snapshot_dict(row)
def discovery_snapshot_diff(
self,
repo_slug: str,
from_id: int | None = None,
to_id: int | None = None,
profile: str | None = None,
) -> dict[str, Any]:
self.get_repository(repo_slug)
if from_id is None or to_id is None:
snapshots = self.list_discovery_snapshots(repo_slug, profile=profile)
if len(snapshots) < 2:
raise RegistryError(f"at least two discovery snapshots are required for diff: {repo_slug}", 404)
to_id = snapshots[0]["id"] if to_id is None else to_id
from_id = snapshots[1]["id"] if from_id is None else from_id
before = self.get_discovery_snapshot(from_id)
after = self.get_discovery_snapshot(to_id)
if before["repo_slug"] != repo_slug or after["repo_slug"] != repo_slug:
raise RegistryError("discovery snapshot ids must belong to the requested repository")
return {
"repo_slug": repo_slug,
"from": _discovery_snapshot_public_summary(before),
"to": _discovery_snapshot_public_summary(after),
"discovery": _discovery_diff(before["snapshot"], after["snapshot"]),
"reconciliation": after["snapshot"].get("reconciliation", {}),
}
def accept_discovery_snapshot(
self,
repo_slug: str,
discovery_snapshot_id: int,
payload: dict[str, Any] | None = None,
) -> dict[str, Any]:
payload = payload or {}
discovery = self.get_discovery_snapshot(discovery_snapshot_id)
if discovery["repo_slug"] != repo_slug:
raise RegistryError("discovery snapshot id must belong to the requested repository")
base_graph = self._latest_graph_or_empty(repo_slug)
accepted_keys = payload.get("accepted_keys")
if accepted_keys is not None and not isinstance(accepted_keys, list):
raise RegistryError("field 'accepted_keys' must be an array when provided")
accepted_key_set = {str(key) for key in accepted_keys or []}
accept_review_states = payload.get("accept_review_states", ["accepted"])
if not isinstance(accept_review_states, list):
raise RegistryError("field 'accept_review_states' must be an array")
graph = _project_discovery_snapshot(
base_graph,
discovery["snapshot"],
accepted_keys=accepted_key_set,
accept_review_states={str(state) for state in accept_review_states},
)
snapshot = self.add_snapshot(
repo_slug,
{
"commit": str(payload.get("commit") or f"discovery:{discovery['commit']}"),
"generated_at": _utc_now(),
"graph": graph,
},
)
with self._connect() as db:
db.execute(
"update discovery_snapshots set accepted_graph_snapshot_id = ? where id = ?",
(snapshot["id"], discovery_snapshot_id),
)
return {
"repo_slug": repo_slug,
"discovery_snapshot": self.get_discovery_snapshot(discovery_snapshot_id),
"graph_snapshot": snapshot,
"projected": {
"node_count": len(graph.get("nodes", [])),
"edge_count": len(graph.get("edges", [])),
},
}
def combined_graph(self) -> dict[str, Any]:
nodes: dict[str, dict[str, Any]] = {}
edges: list[dict[str, str]] = []
@@ -358,17 +534,30 @@ class RegistryStore:
if exc.status_code != 404:
raise
latest_snapshot = None
try:
latest_discovery_snapshot = self.latest_discovery_snapshot(repo_slug)
except RegistryError as exc:
if exc.status_code != 404:
raise
latest_discovery_snapshot = None
graph = latest_snapshot["graph"] if latest_snapshot else _empty_graph()
nodes = [node for node in graph.get("nodes", []) if isinstance(node, dict)]
edges = [edge for edge in graph.get("edges", []) if isinstance(edge, dict)]
artifacts = self.list_artifacts(repo_slug=repo_slug)
libraries = self.list_libraries(repo_slug=repo_slug)
discovery_snapshots = self.list_discovery_snapshots(repo_slug)
return {
"repository": repository,
"latest_snapshot": _snapshot_public_summary(latest_snapshot) if latest_snapshot else None,
"latest_discovery_snapshot": (
_discovery_snapshot_public_summary(latest_discovery_snapshot)
if latest_discovery_snapshot
else None
),
"counts": {
"snapshots": len(self.list_snapshots(repo_slug)),
"discovery_snapshots": len(discovery_snapshots),
"nodes": len(nodes),
"edges": len(edges),
"artifacts": len(artifacts),
@@ -534,6 +723,7 @@ class RegistryStore:
counts = {
"repositories": db.execute("select count(*) from repositories").fetchone()[0],
"snapshots": db.execute("select count(*) from snapshots").fetchone()[0],
"discovery_snapshots": db.execute("select count(*) from discovery_snapshots").fetchone()[0],
"artifacts": db.execute("select count(*) from artifacts").fetchone()[0],
"libraries": db.execute("select count(*) from libraries").fetchone()[0],
}
@@ -553,6 +743,14 @@ class RegistryStore:
"latest_snapshots": latest,
}
def _latest_graph_or_empty(self, repo_slug: str) -> dict[str, Any]:
try:
return self.latest_snapshot(repo_slug)["graph"]
except RegistryError as exc:
if exc.status_code != 404:
raise
return _empty_graph()
def _connect(self) -> sqlite3.Connection:
db = sqlite3.connect(self.path)
db.row_factory = sqlite3.Row
@@ -569,6 +767,16 @@ def validate_graph_export(graph: dict[str, Any]) -> None:
raise RegistryError(f"invalid FabricGraphExport at {location}: {error.message}")
def validate_discovery_snapshot(snapshot: dict[str, Any]) -> None:
schema_path = repo_root() / "schemas" / "discovery-snapshot.schema.yaml"
validator = draft202012_validator(schema_path)
errors = sorted(validator.iter_errors(snapshot), key=lambda error: list(error.path))
if errors:
error = errors[0]
location = ".".join(str(part) for part in error.path) or "<root>"
raise RegistryError(f"invalid FabricDiscoverySnapshot at {location}: {error.message}")
def providers(graph: dict[str, Any], capability: str) -> list[dict[str, Any]]:
result = []
for node in _nodes(graph):
@@ -911,6 +1119,79 @@ def _snapshot_public_summary(snapshot: dict[str, Any]) -> dict[str, Any]:
}
def _discovery_snapshot_dict(row: sqlite3.Row) -> dict[str, Any]:
snapshot = json.loads(row["snapshot_json"])
return {
"id": row["id"],
"repo_slug": row["repo_slug"],
"commit": row["commit_sha"],
"profile": row["profile"],
"generated_at": row["generated_at"],
"snapshot": snapshot,
"accepted_graph_snapshot_id": row["accepted_graph_snapshot_id"],
"created_at": row["created_at"],
}
def _discovery_snapshot_summary(row: sqlite3.Row) -> dict[str, Any]:
snapshot = json.loads(row["snapshot_json"])
candidates = snapshot.get("candidates") if isinstance(snapshot.get("candidates"), dict) else {}
reconciliation = snapshot.get("reconciliation") if isinstance(snapshot.get("reconciliation"), dict) else {}
diff = reconciliation.get("diff") if isinstance(reconciliation.get("diff"), dict) else {}
return {
"id": row["id"],
"repo_slug": row["repo_slug"],
"commit": row["commit_sha"],
"profile": row["profile"],
"generated_at": row["generated_at"],
"accepted_graph_snapshot_id": row["accepted_graph_snapshot_id"],
"created_at": row["created_at"],
"candidate_counts": {
"nodes": len(candidates.get("nodes", [])) if isinstance(candidates.get("nodes"), list) else 0,
"edges": len(candidates.get("edges", [])) if isinstance(candidates.get("edges"), list) else 0,
"attributes": len(candidates.get("attributes", [])) if isinstance(candidates.get("attributes"), list) else 0,
},
"diff_counts": {
"added": len(diff.get("added", [])) if isinstance(diff.get("added"), list) else 0,
"changed": len(diff.get("changed", [])) if isinstance(diff.get("changed"), list) else 0,
"retired": len(diff.get("retired", [])) if isinstance(diff.get("retired"), list) else 0,
"conflicted": len(diff.get("conflicted", [])) if isinstance(diff.get("conflicted"), list) else 0,
},
"review_artifact_count": len(snapshot.get("review_artifacts", []))
if isinstance(snapshot.get("review_artifacts"), list)
else 0,
"connector_run_count": len(snapshot.get("connector_runs", []))
if isinstance(snapshot.get("connector_runs"), list)
else 0,
}
def _discovery_snapshot_public_summary(snapshot: dict[str, Any]) -> dict[str, Any]:
candidates = snapshot["snapshot"].get("candidates", {})
reconciliation = snapshot["snapshot"].get("reconciliation", {})
diff = reconciliation.get("diff", {}) if isinstance(reconciliation, dict) else {}
return {
"id": snapshot["id"],
"repo_slug": snapshot["repo_slug"],
"commit": snapshot["commit"],
"profile": snapshot["profile"],
"generated_at": snapshot["generated_at"],
"accepted_graph_snapshot_id": snapshot["accepted_graph_snapshot_id"],
"created_at": snapshot["created_at"],
"candidate_counts": {
"nodes": len(candidates.get("nodes", [])) if isinstance(candidates.get("nodes"), list) else 0,
"edges": len(candidates.get("edges", [])) if isinstance(candidates.get("edges"), list) else 0,
"attributes": len(candidates.get("attributes", [])) if isinstance(candidates.get("attributes"), list) else 0,
},
"diff_counts": {
"added": len(diff.get("added", [])) if isinstance(diff.get("added"), list) else 0,
"changed": len(diff.get("changed", [])) if isinstance(diff.get("changed"), list) else 0,
"retired": len(diff.get("retired", [])) if isinstance(diff.get("retired"), list) else 0,
"conflicted": len(diff.get("conflicted", [])) if isinstance(diff.get("conflicted"), list) else 0,
},
}
def _row_dict(row: sqlite3.Row) -> dict[str, Any]:
return {key: row[key] for key in row.keys()}
@@ -1027,6 +1308,186 @@ def _empty_graph() -> dict[str, Any]:
}
def _discovery_diff(before: dict[str, Any], after: dict[str, Any]) -> dict[str, Any]:
result: dict[str, Any] = {}
for collection in ("nodes", "edges", "attributes"):
before_items = _discovery_candidates_by_key(before, collection)
after_items = _discovery_candidates_by_key(after, collection)
added = sorted(set(after_items) - set(before_items))
removed = sorted(set(before_items) - set(after_items))
common = sorted(set(before_items) & set(after_items))
changed = [
key for key in common
if _stable_json(_without_review_noise(before_items[key])) != _stable_json(_without_review_noise(after_items[key]))
]
confidence_changed = [
{
"stable_key": key,
"before": before_items[key].get("confidence"),
"after": after_items[key].get("confidence"),
}
for key in common
if before_items[key].get("confidence") != after_items[key].get("confidence")
]
result[collection] = {
"added": [after_items[key] for key in added],
"removed": [before_items[key] for key in removed],
"changed": [
{
"stable_key": key,
"before": before_items[key],
"after": after_items[key],
}
for key in changed
],
"confidence_changed": confidence_changed,
}
after_reconciliation = after.get("reconciliation") if isinstance(after.get("reconciliation"), dict) else {}
result["review"] = {
"retired": after_reconciliation.get("diff", {}).get("retired", [])
if isinstance(after_reconciliation.get("diff"), dict)
else [],
"conflicted": after_reconciliation.get("diff", {}).get("conflicted", [])
if isinstance(after_reconciliation.get("diff"), dict)
else [],
"conflicts": after_reconciliation.get("conflicts", []),
}
return result
def _project_discovery_snapshot(
base_graph: dict[str, Any],
discovery_snapshot: dict[str, Any],
*,
accepted_keys: set[str],
accept_review_states: set[str],
) -> dict[str, Any]:
graph = json.loads(json.dumps(base_graph))
graph.setdefault("apiVersion", "railiance.fabric/v1alpha1")
graph.setdefault("kind", "FabricGraphExport")
graph.setdefault("nodes", [])
graph.setdefault("edges", [])
candidates = discovery_snapshot.get("candidates") if isinstance(discovery_snapshot.get("candidates"), dict) else {}
candidate_nodes = [
node for node in candidates.get("nodes", [])
if isinstance(node, dict) and _candidate_is_accepted(node, accepted_keys, accept_review_states)
]
candidate_edges = [
edge for edge in candidates.get("edges", [])
if isinstance(edge, dict) and _candidate_is_accepted(edge, accepted_keys, accept_review_states)
]
existing_nodes = {
str(node.get("id")): node
for node in graph.get("nodes", [])
if isinstance(node, dict) and node.get("id")
}
key_to_graph_id = {
str(node.get("id")): str(node.get("id"))
for node in graph.get("nodes", [])
if isinstance(node, dict) and node.get("id")
}
for candidate in candidate_nodes:
graph_id = _candidate_graph_id(candidate)
key_to_graph_id[str(candidate.get("stable_key"))] = graph_id
if candidate.get("graph_id"):
key_to_graph_id[str(candidate.get("graph_id"))] = graph_id
if graph_id in existing_nodes:
continue
projected = _project_candidate_node(candidate, graph_id)
existing_nodes[graph_id] = projected
graph["nodes"].append(projected)
existing_edges = {
_edge_key(edge)
for edge in graph.get("edges", [])
if isinstance(edge, dict)
}
for candidate in candidate_edges:
source = key_to_graph_id.get(str(candidate.get("source_key") or ""))
target = key_to_graph_id.get(str(candidate.get("target_key") or ""))
if not source or not target:
continue
edge = {"from": source, "to": target, "type": str(candidate.get("edge_type") or "")}
edge_key = _edge_key(edge)
if edge["type"] and edge_key not in existing_edges:
existing_edges.add(edge_key)
graph["edges"].append(edge)
validate_graph_export(graph)
return graph
def _project_candidate_node(candidate: dict[str, Any], graph_id: str) -> dict[str, Any]:
attributes = candidate.get("attributes") if isinstance(candidate.get("attributes"), dict) else {}
return {
"id": graph_id,
"kind": str(candidate.get("kind") or "DiscoveredEntity"),
"name": str(candidate.get("label") or graph_id),
"repo": str(candidate.get("repo") or ""),
"domain": str(candidate.get("domain") or ""),
"lifecycle": str(candidate.get("lifecycle") or "active"),
"attributes": {
**attributes,
"discovery_stable_key": candidate.get("stable_key"),
"discovery_origin": candidate.get("origin"),
"discovery_review_state": candidate.get("review_state"),
"discovery_confidence": candidate.get("confidence"),
"discovery_source_anchors": candidate.get("source_anchors", []),
"discovery_provenance": candidate.get("provenance", []),
},
}
def _candidate_graph_id(candidate: dict[str, Any]) -> str:
graph_id = str(candidate.get("graph_id") or "").strip()
if graph_id:
return graph_id
repo = _graph_id_part(str(candidate.get("repo") or "repo"))
kind = _graph_id_part(str(candidate.get("kind") or "entity"))
label = _graph_id_part(str(candidate.get("label") or candidate.get("stable_key") or "candidate"))
candidate_id = f"{repo}.{kind}.{label}".strip(".")
if len(candidate_id) > 150:
digest = hashlib.sha256(str(candidate.get("stable_key") or candidate_id).encode("utf-8")).hexdigest()[:10]
candidate_id = f"{candidate_id[:139].rstrip('.')}.{digest}"
if len(candidate_id) < 3:
candidate_id = f"{candidate_id}.id"
return candidate_id
def _graph_id_part(value: str) -> str:
text = re.sub(r"[^a-z0-9.-]+", "-", value.lower()).strip(".-")
text = re.sub(r"-+", "-", text)
text = re.sub(r"\.+", ".", text)
return text or "unknown"
def _candidate_is_accepted(
candidate: dict[str, Any],
accepted_keys: set[str],
accept_review_states: set[str],
) -> bool:
stable_key = str(candidate.get("stable_key") or "")
if stable_key in accepted_keys:
return True
return str(candidate.get("review_state") or "") in accept_review_states
def _discovery_candidates_by_key(snapshot: dict[str, Any], collection: str) -> dict[str, dict[str, Any]]:
candidates = snapshot.get("candidates") if isinstance(snapshot.get("candidates"), dict) else {}
return {
str(item["stable_key"]): item
for item in candidates.get(collection, [])
if isinstance(item, dict) and item.get("stable_key")
}
def _without_review_noise(candidate: dict[str, Any]) -> dict[str, Any]:
return {
key: value
for key, value in candidate.items()
if key not in {"provenance"}
}
def _graph_diff(before: dict[str, Any], after: dict[str, Any]) -> dict[str, Any]:
before_nodes = _nodes_by_id(before)
after_nodes = _nodes_by_id(after)

View File

@@ -67,6 +67,28 @@ class RegistryHandler(BaseHTTPRequestHandler):
from_id=_query_optional_int(query, "from_id"),
to_id=_query_optional_int(query, "to_id"),
)
if len(parts) == 3 and parts[0] == "repositories" and parts[2] == "discovery-snapshots":
return HTTPStatus.OK, self.store.list_discovery_snapshots(
parts[1],
profile=_query_optional(query, "profile"),
)
if len(parts) == 4 and parts[0] == "repositories" and parts[2] == "discovery-snapshots" and parts[3] == "latest":
return HTTPStatus.OK, self.store.latest_discovery_snapshot(
parts[1],
profile=_query_optional(query, "profile"),
)
if len(parts) == 4 and parts[0] == "repositories" and parts[2] == "discovery-snapshots" and parts[3] == "diff":
return HTTPStatus.OK, self.store.discovery_snapshot_diff(
parts[1],
from_id=_query_optional_int(query, "from_id"),
to_id=_query_optional_int(query, "to_id"),
profile=_query_optional(query, "profile"),
)
if len(parts) == 4 and parts[0] == "repositories" and parts[2] == "discovery-snapshots":
snapshot = self.store.get_discovery_snapshot(_int_id(parts[3], "discovery_snapshot_id"))
if snapshot["repo_slug"] != parts[1]:
raise RegistryError("discovery snapshot id must belong to the requested repository", 404)
return HTTPStatus.OK, snapshot
if parts == ["search"]:
return HTTPStatus.OK, self.store.search(_query_one(query, "q"))
if parts == ["graph", "nodes"]:
@@ -127,6 +149,14 @@ class RegistryHandler(BaseHTTPRequestHandler):
return HTTPStatus.CREATED, self.store.upsert_repository(body)
if len(parts) == 3 and parts[0] == "repositories" and parts[2] == "snapshots":
return HTTPStatus.CREATED, self.store.add_snapshot(parts[1], body)
if len(parts) == 3 and parts[0] == "repositories" and parts[2] == "discovery-snapshots":
return HTTPStatus.CREATED, self.store.add_discovery_snapshot(parts[1], body)
if len(parts) == 5 and parts[0] == "repositories" and parts[2] == "discovery-snapshots" and parts[4] == "accept":
return HTTPStatus.CREATED, self.store.accept_discovery_snapshot(
parts[1],
_int_id(parts[3], "discovery_snapshot_id"),
body,
)
if len(parts) == 4 and parts[0] == "repositories" and parts[2] == "libraries" and parts[3] == "cyclonedx":
return HTTPStatus.CREATED, self.store.ingest_cyclonedx(parts[1], body)
if parts == ["artifacts"]:

View File

@@ -0,0 +1,282 @@
from __future__ import annotations
import json
import threading
import urllib.request
from http.server import ThreadingHTTPServer
from pathlib import Path
from railiance_fabric.cli import main as cli_main
from railiance_fabric.discovery import (
attribute_stable_key,
discovery_stable_key,
relationship_stable_key,
replacement_scope_id,
source_fingerprint,
)
from railiance_fabric.registry import RegistryStore
from railiance_fabric.server import RegistryHandler
def test_registry_stores_diffs_and_accepts_discovery_snapshots(tmp_path: Path) -> None:
store = RegistryStore(tmp_path / "registry.sqlite3")
store.init_schema()
store.upsert_repository({"slug": "fixture-repo", "name": "Fixture Repo"})
base_snapshot = store.add_snapshot(
"fixture-repo",
{
"commit": "base",
"graph": _base_graph(),
},
)
first = _discovery_snapshot("disc-1", accepted_label="Discovered API", confidence=0.8)
second = _discovery_snapshot("disc-2", accepted_label="Discovered API", confidence=0.95, extra=True)
stored_first = store.add_discovery_snapshot("fixture-repo", first)
stored_second = store.add_discovery_snapshot("fixture-repo", second)
assert stored_first["profile"] == "deterministic"
assert store.latest_discovery_snapshot("fixture-repo")["id"] == stored_second["id"]
assert store.repository_inventory("fixture-repo")["counts"]["discovery_snapshots"] == 2
assert store.status()["counts"]["discovery_snapshots"] == 2
diff = store.discovery_snapshot_diff("fixture-repo")
assert diff["from"]["id"] == stored_first["id"]
assert diff["to"]["id"] == stored_second["id"]
assert diff["discovery"]["nodes"]["confidence_changed"][0]["before"] == 0.8
assert diff["discovery"]["nodes"]["confidence_changed"][0]["after"] == 0.95
assert diff["discovery"]["nodes"]["added"][0]["label"] == "Extra Accepted Capability"
accepted = store.accept_discovery_snapshot("fixture-repo", stored_second["id"])
graph_snapshot = accepted["graph_snapshot"]
nodes_by_id = {node["id"]: node for node in graph_snapshot["graph"]["nodes"]}
assert graph_snapshot["id"] > base_snapshot["id"]
assert nodes_by_id["fixture.repo"]["name"] == "Fixture Repo"
assert nodes_by_id["fixture.discovered-api"]["attributes"]["discovery_review_state"] == "accepted"
assert nodes_by_id["fixture.extra-capability"]["attributes"]["discovery_confidence"] == 0.7
assert store.get_discovery_snapshot(stored_second["id"])["accepted_graph_snapshot_id"] == graph_snapshot["id"]
def test_discovery_snapshot_http_and_cli_paths(tmp_path: Path, capsys) -> None:
store = RegistryStore(tmp_path / "registry.sqlite3")
store.init_schema()
store.upsert_repository({"slug": "fixture-repo", "name": "Fixture Repo"})
store.add_snapshot("fixture-repo", {"commit": "base", "graph": _base_graph()})
class Handler(RegistryHandler):
pass
Handler.store = store
server = ThreadingHTTPServer(("127.0.0.1", 0), Handler)
thread = threading.Thread(target=server.serve_forever, daemon=True)
thread.start()
try:
base_url = f"http://127.0.0.1:{server.server_port}"
snapshot_path = tmp_path / "discovery.json"
snapshot_path.write_text(json.dumps(_discovery_snapshot("disc-http")), encoding="utf-8")
assert cli_main(
[
"registry",
"ingest-discovery",
str(snapshot_path),
"--registry-url",
base_url,
]
) == 0
ingest_summary = capsys.readouterr().out
assert "ingested discovery snapshot 1 for fixture-repo" in ingest_summary
with urllib.request.urlopen(
f"{base_url}/repositories/fixture-repo/discovery-snapshots/latest",
timeout=5,
) as response:
latest = json.loads(response.read())
with urllib.request.urlopen(
f"{base_url}/repositories/fixture-repo/inventory",
timeout=5,
) as response:
inventory = json.loads(response.read())
assert latest["snapshot"]["kind"] == "FabricDiscoverySnapshot"
assert inventory["latest_discovery_snapshot"]["id"] == latest["id"]
assert cli_main(
[
"registry",
"accept-discovery",
"fixture-repo",
str(latest["id"]),
"--registry-url",
base_url,
]
) == 0
accept_summary = capsys.readouterr().out
assert "accepted discovery snapshot" in accept_summary
with urllib.request.urlopen(f"{base_url}/exports/graph-explorer", timeout=5) as response:
explorer = json.loads(response.read())
discovered = next(
element for element in explorer["elements"]
if element["data"].get("id") == "fixture.discovered-api"
)
assert discovered["data"]["reviewState"] == "accepted"
assert discovered["data"]["discovery"]["origin"] == "deterministic"
assert discovered["data"]["sourceReferences"][0]["label"].startswith("Discovery")
finally:
server.shutdown()
server.server_close()
thread.join(timeout=5)
def _base_graph() -> dict[str, object]:
return {
"apiVersion": "railiance.fabric/v1alpha1",
"kind": "FabricGraphExport",
"nodes": [
{
"id": "fixture.repo",
"kind": "Repository",
"name": "Fixture Repo",
"repo": "fixture-repo",
"domain": "testing",
"lifecycle": "active",
"attributes": {},
}
],
"edges": [],
}
def _discovery_snapshot(
commit: str,
*,
accepted_label: str = "Discovered API",
confidence: float = 0.8,
extra: bool = False,
) -> dict[str, object]:
repo_key = discovery_stable_key("fixture-repo", "Repository", "fixture-repo")
service_key = discovery_stable_key("fixture-repo", "ServiceDeclaration", accepted_label)
scope_id = replacement_scope_id("fixture-repo", "fixture", "file", source_path="README.md")
anchor = {
"source_kind": "file",
"path": "README.md",
"fingerprint": source_fingerprint({"source_kind": "file", "path": "README.md"}),
}
provenance = {
"extractor_id": "fixture",
"extractor_version": "0.1.0",
"method": "deterministic",
"origin": "deterministic",
}
nodes = [
{
"stable_key": repo_key,
"graph_id": "fixture.repo",
"kind": "Repository",
"label": "Do Not Overwrite",
"repo": "fixture-repo",
"domain": "testing",
"aliases": ["fixture-repo"],
"origin": "deterministic",
"review_state": "accepted",
"status": "active",
"confidence": 1.0,
"replacement_scope": scope_id,
"provenance": [provenance],
"source_anchors": [anchor],
},
{
"stable_key": service_key,
"graph_id": "fixture.discovered-api",
"kind": "ServiceDeclaration",
"label": accepted_label,
"repo": "fixture-repo",
"domain": "testing",
"lifecycle": "active",
"aliases": [accepted_label],
"attributes": {"description": "Accepted discovery candidate."},
"origin": "deterministic",
"review_state": "accepted",
"status": "active",
"confidence": confidence,
"replacement_scope": scope_id,
"provenance": [provenance],
"source_anchors": [anchor],
},
]
edges = [
{
"stable_key": relationship_stable_key(repo_key, "declares", service_key),
"edge_type": "declares",
"source_key": repo_key,
"target_key": service_key,
"origin": "deterministic",
"review_state": "accepted",
"status": "active",
"confidence": 0.8,
"replacement_scope": scope_id,
"provenance": [provenance],
"source_anchors": [anchor],
}
]
attributes = [
{
"stable_key": attribute_stable_key(service_key, "description"),
"entity_key": service_key,
"name": "description",
"value": "Accepted discovery candidate.",
"origin": "deterministic",
"review_state": "accepted",
"confidence": confidence,
"replacement_scope": scope_id,
"provenance": [provenance],
"source_anchors": [anchor],
}
]
if extra:
extra_key = discovery_stable_key("fixture-repo", "CapabilityDeclaration", "Extra Accepted Capability")
nodes.append(
{
"stable_key": extra_key,
"graph_id": "fixture.extra-capability",
"kind": "CapabilityDeclaration",
"label": "Extra Accepted Capability",
"repo": "fixture-repo",
"domain": "testing",
"lifecycle": "active",
"origin": "deterministic",
"review_state": "accepted",
"status": "active",
"confidence": 0.7,
"replacement_scope": scope_id,
"provenance": [provenance],
"source_anchors": [anchor],
}
)
return {
"apiVersion": "railiance.fabric/v1alpha1",
"kind": "FabricDiscoverySnapshot",
"generated_at": "2026-05-19T00:00:00Z",
"source": {"repo_slug": "fixture-repo", "repo_name": "Fixture Repo", "commit": commit},
"scan": {
"run_id": f"scan:fixture-repo:{commit}",
"profile": "deterministic",
"deterministic_only": True,
"llm_enabled": False,
},
"replacement_scopes": [
{
"id": scope_id,
"extractor_id": "fixture",
"source_kind": "file",
"source_path": "README.md",
"mode": "replacement",
}
],
"candidates": {"nodes": nodes, "edges": edges, "attributes": attributes},
"tombstones": [],
"reconciliation": {
"precedence": ["repo_declaration", "deterministic", "catalog", "registry", "llm", "manual"],
"duplicate_policy": "stable-key matches merge automatically",
"retirement_policy": "missing candidates retire only inside their replacement scope",
},
}

View File

@@ -249,7 +249,7 @@ Acceptance notes:
```task
id: RAIL-FAB-WP-0010-T06
status: todo
status: done
priority: high
state_hub_task_id: "9a8420f1-0072-4f40-8d0f-775f59cbe772"
```