from __future__ import annotations import json import sqlite3 from dataclasses import dataclass from datetime import datetime, timezone from pathlib import Path from typing import Any from .loader import repo_root from .schema_validation import draft202012_validator class RegistryError(Exception): def __init__(self, message: str, status_code: int = 400) -> None: super().__init__(message) self.message = message self.status_code = status_code @dataclass(frozen=True) class RegistryStore: path: Path def init_schema(self) -> None: if str(self.path) != ":memory:": self.path.parent.mkdir(parents=True, exist_ok=True) with self._connect() as db: db.executescript( """ create table if not exists repositories ( slug text primary key, name text not null, remote_url text, default_branch text, state_hub_repo_id text, created_at text not null, updated_at text not null ); create table if not exists snapshots ( id integer primary key autoincrement, repo_slug text not null references repositories(slug), commit_sha text not null, generated_at text not null, graph_json text not null, created_at text not null ); create index if not exists idx_snapshots_repo_latest on snapshots(repo_slug, id desc); create table if not exists artifacts ( id integer primary key autoincrement, repo_slug text not null references repositories(slug), target_id text, target_kind text, artifact_type text not null, name text not null, uri text not null, media_type text, digest text, version text, metadata_json text not null, created_at text not null ); create index if not exists idx_artifacts_repo on artifacts(repo_slug); create index if not exists idx_artifacts_target on artifacts(target_id); create table if not exists libraries ( id integer primary key autoincrement, repo_slug text not null references repositories(slug), bom_ref text, component_type text not null, name text not null, version text, purl text, scope text, licenses_json text not null, hashes_json text not null, metadata_json text not null, created_at text not null ); create index if not exists idx_libraries_repo on libraries(repo_slug); create index if not exists idx_libraries_purl on libraries(purl); """ ) def upsert_repository(self, payload: dict[str, Any]) -> dict[str, Any]: slug = _required_text(payload, "slug") now = _utc_now() name = str(payload.get("name") or slug) remote_url = _optional_text(payload, "remote_url") default_branch = str(payload.get("default_branch") or "main") state_hub_repo_id = _optional_text(payload, "state_hub_repo_id") with self._connect() as db: db.execute( """ insert into repositories ( slug, name, remote_url, default_branch, state_hub_repo_id, created_at, updated_at ) values (?, ?, ?, ?, ?, ?, ?) on conflict(slug) do update set name = excluded.name, remote_url = excluded.remote_url, default_branch = excluded.default_branch, state_hub_repo_id = excluded.state_hub_repo_id, updated_at = excluded.updated_at """, (slug, name, remote_url, default_branch, state_hub_repo_id, now, now), ) return self.get_repository(slug) def list_repositories(self) -> list[dict[str, Any]]: with self._connect() as db: rows = db.execute( """ select slug, name, remote_url, default_branch, state_hub_repo_id, created_at, updated_at from repositories order by slug """ ).fetchall() return [_row_dict(row) for row in rows] def get_repository(self, slug: str) -> dict[str, Any]: with self._connect() as db: row = db.execute( """ select slug, name, remote_url, default_branch, state_hub_repo_id, created_at, updated_at from repositories where slug = ? """, (slug,), ).fetchone() if row is None: raise RegistryError(f"repository not found: {slug}", 404) return _row_dict(row) def add_snapshot(self, repo_slug: str, payload: dict[str, Any]) -> dict[str, Any]: self.get_repository(repo_slug) commit = _required_text(payload, "commit") generated_at = str(payload.get("generated_at") or _utc_now()) graph = payload.get("graph") if not isinstance(graph, dict): raise RegistryError("snapshot payload requires object field 'graph'") graph = _with_source(graph, repo_slug, commit, generated_at) validate_graph_export(graph) now = _utc_now() with self._connect() as db: cursor = db.execute( """ insert into snapshots (repo_slug, commit_sha, generated_at, graph_json, created_at) values (?, ?, ?, ?, ?) """, (repo_slug, commit, generated_at, json.dumps(graph, sort_keys=True), now), ) snapshot_id = int(cursor.lastrowid) return self.get_snapshot(snapshot_id) def get_snapshot(self, snapshot_id: int) -> dict[str, Any]: with self._connect() as db: row = db.execute( """ select id, repo_slug, commit_sha, generated_at, graph_json, created_at from snapshots where id = ? """, (snapshot_id,), ).fetchone() if row is None: raise RegistryError(f"snapshot not found: {snapshot_id}", 404) return _snapshot_dict(row) def list_snapshots(self, repo_slug: str) -> list[dict[str, Any]]: self.get_repository(repo_slug) with self._connect() as db: rows = db.execute( """ select id, repo_slug, commit_sha, generated_at, graph_json, created_at from snapshots where repo_slug = ? order by id desc """, (repo_slug,), ).fetchall() return [_snapshot_summary(row) for row in rows] def latest_snapshot(self, repo_slug: str) -> dict[str, Any]: self.get_repository(repo_slug) with self._connect() as db: row = db.execute( """ select id, repo_slug, commit_sha, generated_at, graph_json, created_at from snapshots where repo_slug = ? order by id desc limit 1 """, (repo_slug,), ).fetchone() if row is None: raise RegistryError(f"no snapshots for repository: {repo_slug}", 404) return _snapshot_dict(row) def latest_snapshots(self) -> list[dict[str, Any]]: with self._connect() as db: rows = db.execute( """ select s.id, s.repo_slug, s.commit_sha, s.generated_at, s.graph_json, s.created_at from snapshots s join ( select repo_slug, max(id) as latest_id from snapshots group by repo_slug ) latest on latest.latest_id = s.id order by s.repo_slug """ ).fetchall() return [_snapshot_dict(row) for row in rows] def combined_graph(self) -> dict[str, Any]: nodes: dict[str, dict[str, Any]] = {} edges: list[dict[str, str]] = [] for snapshot in self.latest_snapshots(): graph = snapshot["graph"] for node in graph.get("nodes", []): if isinstance(node, dict): nodes[str(node.get("id", ""))] = node for edge in graph.get("edges", []): if isinstance(edge, dict): edges.append( { "from": str(edge.get("from", "")), "to": str(edge.get("to", "")), "type": str(edge.get("type", "")), } ) return { "apiVersion": "railiance.fabric/v1alpha1", "kind": "FabricGraphExport", "generated_at": _utc_now(), "source": {"repo": "registry", "commit": "", "path": ""}, "nodes": [nodes[key] for key in sorted(nodes)], "edges": sorted(edges, key=lambda edge: (edge["from"], edge["to"], edge["type"])), } def add_artifact(self, payload: dict[str, Any]) -> dict[str, Any]: repo_slug = _required_text(payload, "repo_slug") self.get_repository(repo_slug) artifact_type = _required_text(payload, "artifact_type", fallback_key="type") uri = _required_text(payload, "uri") name = str(payload.get("name") or uri) target_id = _optional_text(payload, "target_id") target_kind = _optional_text(payload, "target_kind") media_type = _optional_text(payload, "media_type") digest = _optional_text(payload, "digest") version = _optional_text(payload, "version") metadata = payload.get("metadata", {}) if not isinstance(metadata, dict): raise RegistryError("field 'metadata' must be an object") now = _utc_now() with self._connect() as db: cursor = db.execute( """ insert into artifacts ( repo_slug, target_id, target_kind, artifact_type, name, uri, media_type, digest, version, metadata_json, created_at ) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( repo_slug, target_id, target_kind, artifact_type, name, uri, media_type, digest, version, json.dumps(metadata, sort_keys=True), now, ), ) artifact_id = int(cursor.lastrowid) return self.get_artifact(artifact_id) def list_artifacts( self, repo_slug: str | None = None, target_id: str | None = None, artifact_type: str | None = None, ) -> list[dict[str, Any]]: conditions: list[str] = [] params: list[str] = [] if repo_slug: conditions.append("repo_slug = ?") params.append(repo_slug) if target_id: conditions.append("target_id = ?") params.append(target_id) if artifact_type: conditions.append("artifact_type = ?") params.append(artifact_type) where = f" where {' and '.join(conditions)}" if conditions else "" with self._connect() as db: rows = db.execute( """ select id, repo_slug, target_id, target_kind, artifact_type, name, uri, media_type, digest, version, metadata_json, created_at from artifacts """ + where + " order by repo_slug, target_id, artifact_type, id", params, ).fetchall() return [_artifact_dict(row) for row in rows] def get_artifact(self, artifact_id: int) -> dict[str, Any]: with self._connect() as db: row = db.execute( """ select id, repo_slug, target_id, target_kind, artifact_type, name, uri, media_type, digest, version, metadata_json, created_at from artifacts where id = ? """, (artifact_id,), ).fetchone() if row is None: raise RegistryError(f"artifact not found: {artifact_id}", 404) return _artifact_dict(row) def graph_node_detail(self, graph_id: str) -> dict[str, Any]: node = graph_node(self.combined_graph(), graph_id) enriched = json.loads(json.dumps(node)) enriched["artifacts"] = self.list_artifacts(target_id=graph_id) return enriched def repository_inventory(self, repo_slug: str) -> dict[str, Any]: repository = self.get_repository(repo_slug) try: latest_snapshot = self.latest_snapshot(repo_slug) except RegistryError as exc: if exc.status_code != 404: raise latest_snapshot = None graph = latest_snapshot["graph"] if latest_snapshot else _empty_graph() nodes = [node for node in graph.get("nodes", []) if isinstance(node, dict)] edges = [edge for edge in graph.get("edges", []) if isinstance(edge, dict)] artifacts = self.list_artifacts(repo_slug=repo_slug) libraries = self.list_libraries(repo_slug=repo_slug) return { "repository": repository, "latest_snapshot": _snapshot_public_summary(latest_snapshot) if latest_snapshot else None, "counts": { "snapshots": len(self.list_snapshots(repo_slug)), "nodes": len(nodes), "edges": len(edges), "artifacts": len(artifacts), "libraries": len(libraries), }, "graph": { "nodes": nodes, "edges": edges, "owner_repos": sorted({str(node.get("repo", "")) for node in nodes if node.get("repo")}), }, "artifacts": artifacts, "libraries": libraries, } def ingest_cyclonedx(self, repo_slug: str, payload: dict[str, Any]) -> dict[str, Any]: self.get_repository(repo_slug) bom = payload.get("bom") if "bom" in payload else payload if not isinstance(bom, dict): raise RegistryError("CycloneDX payload must be an object") if bom.get("bomFormat") and bom.get("bomFormat") != "CycloneDX": raise RegistryError("CycloneDX payload must have bomFormat 'CycloneDX'") entries = _cyclonedx_entries(bom) now = _utc_now() with self._connect() as db: db.execute("delete from libraries where repo_slug = ?", (repo_slug,)) for entry in entries: db.execute( """ insert into libraries ( repo_slug, bom_ref, component_type, name, version, purl, scope, licenses_json, hashes_json, metadata_json, created_at ) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( repo_slug, entry["bom_ref"], entry["component_type"], entry["name"], entry["version"], entry["purl"], entry["scope"], json.dumps(entry["licenses"], sort_keys=True), json.dumps(entry["hashes"], sort_keys=True), json.dumps(entry["metadata"], sort_keys=True), now, ), ) return { "repo_slug": repo_slug, "component_count": len(entries), "libraries": self.list_libraries(repo_slug=repo_slug), } def list_libraries( self, repo_slug: str | None = None, name: str | None = None, purl: str | None = None, component_type: str | None = None, ) -> list[dict[str, Any]]: conditions: list[str] = [] params: list[str] = [] if repo_slug: conditions.append("repo_slug = ?") params.append(repo_slug) if name: conditions.append("name = ?") params.append(name) if purl: conditions.append("purl = ?") params.append(purl) if component_type: conditions.append("component_type = ?") params.append(component_type) where = f" where {' and '.join(conditions)}" if conditions else "" with self._connect() as db: rows = db.execute( """ select id, repo_slug, bom_ref, component_type, name, version, purl, scope, licenses_json, hashes_json, metadata_json, created_at from libraries """ + where + " order by repo_slug, name, version, id", params, ).fetchall() return [_library_dict(row) for row in rows] def get_library(self, library_id: int) -> dict[str, Any]: with self._connect() as db: row = db.execute( """ select id, repo_slug, bom_ref, component_type, name, version, purl, scope, licenses_json, hashes_json, metadata_json, created_at from libraries where id = ? """, (library_id,), ).fetchone() if row is None: raise RegistryError(f"library not found: {library_id}", 404) return _library_dict(row) def snapshot_diff( self, repo_slug: str, from_id: int | None = None, to_id: int | None = None, ) -> dict[str, Any]: self.get_repository(repo_slug) if from_id is None or to_id is None: snapshots = self.list_snapshots(repo_slug) if len(snapshots) < 2: raise RegistryError(f"at least two snapshots are required for diff: {repo_slug}", 404) to_id = snapshots[0]["id"] if to_id is None else to_id from_id = snapshots[1]["id"] if from_id is None else from_id before = self.get_snapshot(from_id) after = self.get_snapshot(to_id) if before["repo_slug"] != repo_slug or after["repo_slug"] != repo_slug: raise RegistryError("snapshot ids must belong to the requested repository") return { "repo_slug": repo_slug, "from": _snapshot_public_summary(before), "to": _snapshot_public_summary(after), "graph": _graph_diff(before["graph"], after["graph"]), } def search(self, query: str) -> dict[str, Any]: needle = query.strip().lower() if not needle: raise RegistryError("query parameter 'q' is required") graph = self.combined_graph() return { "query": query, "nodes": [ node for node in graph.get("nodes", []) if isinstance(node, dict) and _matches(needle, node) ], "artifacts": [ artifact for artifact in self.list_artifacts() if _matches(needle, artifact) ], "libraries": [ library for library in self.list_libraries() if _matches(needle, library) ], "repositories": [ repository for repository in self.list_repositories() if _matches(needle, repository) ], } def status(self) -> dict[str, Any]: with self._connect() as db: counts = { "repositories": db.execute("select count(*) from repositories").fetchone()[0], "snapshots": db.execute("select count(*) from snapshots").fetchone()[0], "artifacts": db.execute("select count(*) from artifacts").fetchone()[0], "libraries": db.execute("select count(*) from libraries").fetchone()[0], } latest = [ { "repo_slug": snapshot["repo_slug"], "snapshot_id": snapshot["id"], "commit": snapshot["commit"], "generated_at": snapshot["generated_at"], } for snapshot in self.latest_snapshots() ] return { "status": "ok", "database": str(self.path), "counts": counts, "latest_snapshots": latest, } def _connect(self) -> sqlite3.Connection: db = sqlite3.connect(self.path) db.row_factory = sqlite3.Row return db def validate_graph_export(graph: dict[str, Any]) -> None: schema_path = repo_root() / "schemas" / "state-hub-export.schema.yaml" validator = draft202012_validator(schema_path) errors = sorted(validator.iter_errors(graph), key=lambda error: list(error.path)) if errors: error = errors[0] location = ".".join(str(part) for part in error.path) or "" raise RegistryError(f"invalid FabricGraphExport at {location}: {error.message}") def providers(graph: dict[str, Any], capability: str) -> list[dict[str, Any]]: result = [] for node in _nodes(graph): attrs = _attrs(node) if node.get("kind") != "CapabilityDeclaration": continue if node.get("id") == capability or attrs.get("capability_type") == capability: result.append( { "provider_id": node.get("id", ""), "name": node.get("name", ""), "service_id": attrs.get("service_id", ""), "capability_type": attrs.get("capability_type", ""), "lifecycle": node.get("lifecycle", ""), "interfaces": attrs.get("interface_ids", []), "repo": node.get("repo", ""), "domain": node.get("domain", ""), } ) return sorted(result, key=lambda item: item["provider_id"]) def consumers(graph: dict[str, Any], target: str) -> list[dict[str, Any]]: nodes = _nodes_by_id(graph) target_interface_type = "" target_node = nodes.get(target) if target_node and target_node.get("kind") == "InterfaceDeclaration": target_interface_type = str(_attrs(target_node).get("interface_type", "")) result: list[dict[str, Any]] = [] bindings_by_dependency = _bindings_by_dependency(graph) for dependency in _dependency_nodes(graph): attrs = _attrs(dependency) dependency_id = str(dependency.get("id", "")) dependency_matches = target in { dependency_id, str(attrs.get("requires_capability_id", "")), str(attrs.get("requires_capability_type", "")), str(attrs.get("interface_type", "")), } or bool(target_interface_type and target_interface_type == attrs.get("interface_type")) bindings = bindings_by_dependency.get(dependency_id, []) matching_bindings = [ binding for binding in bindings if target in {binding["provider_capability_id"], binding["provider_interface_id"]} ] if dependency_matches and not bindings: result.append(_consumer_match(attrs, dependency_id, {})) for binding in bindings: if dependency_matches or binding in matching_bindings: result.append(_consumer_match(attrs, dependency_id, binding)) return sorted(result, key=lambda item: (item["consumer_service_id"], item["dependency_id"])) def unresolved_dependencies(graph: dict[str, Any]) -> list[dict[str, Any]]: result = [] bindings_by_dependency = _bindings_by_dependency(graph) for dependency in _dependency_nodes(graph): attrs = _attrs(dependency) dependency_id = str(dependency.get("id", "")) required_id = str(attrs.get("requires_capability_id", "")) required_type = str(attrs.get("requires_capability_type", "")) provider_matches = providers(graph, required_id or required_type) bindings = bindings_by_dependency.get(dependency_id, []) has_missing_binding = any(binding.get("status") in {"missing", "disputed"} for binding in bindings) if not provider_matches or has_missing_binding: result.append( { "dependency_id": dependency_id, "consumer_service_id": attrs.get("consumer_service_id", ""), "requires_capability_id": required_id, "requires_capability_type": required_type, "interface_type": attrs.get("interface_type", ""), "reason": "missing_provider" if not provider_matches else "binding_not_exact", } ) return sorted(result, key=lambda item: item["dependency_id"]) def blast_radius(graph: dict[str, Any], interface: str) -> list[dict[str, Any]]: target_node = _nodes_by_id(graph).get(interface) matches = consumers(graph, interface) if target_node and target_node.get("kind") == "InterfaceDeclaration": return [match for match in matches if match.get("provider_interface_id") == interface] return [ match for match in matches if _dependency_attrs(graph, match["dependency_id"]).get("interface_type") == interface ] def dependency_path_lines(graph: dict[str, Any], service_id: str) -> list[str]: nodes = _nodes_by_id(graph) if service_id not in nodes or nodes[service_id].get("kind") != "ServiceDeclaration": return [f"unknown service: {service_id}"] deps_by_consumer: dict[str, list[dict[str, Any]]] = {} for dependency in _dependency_nodes(graph): attrs = _attrs(dependency) deps_by_consumer.setdefault(str(attrs.get("consumer_service_id", "")), []).append(dependency) capability_service = { str(node.get("id", "")): str(_attrs(node).get("service_id", "")) for node in _nodes(graph) if node.get("kind") == "CapabilityDeclaration" } bindings_by_dependency = _bindings_by_dependency(graph) lines: list[str] = [] def walk(current: str, indent: int, stack: list[str]) -> None: prefix = " " * indent if current in stack: lines.append(f"{prefix}{current} (cycle)") return lines.append(f"{prefix}{current}") dependencies = sorted(deps_by_consumer.get(current, []), key=lambda item: str(item.get("id", ""))) if not dependencies: lines.append(f"{prefix} no declared dependencies") return for dependency in dependencies: dependency_id = str(dependency.get("id", "")) attrs = _attrs(dependency) required = attrs.get("requires_capability_type", "") lines.append(f"{prefix} requires {required}: {dependency_id}") bindings = bindings_by_dependency.get(dependency_id, []) if not bindings: candidate_providers = providers(graph, str(required)) if candidate_providers: for provider in candidate_providers: lines.append(f"{prefix} candidate {provider['provider_id']}") else: lines.append(f"{prefix} unresolved") continue for binding in bindings: provider_id = binding.get("provider_capability_id", "") provider_service = capability_service.get(provider_id, "") status = binding.get("status", "") lines.append(f"{prefix} {status} -> {provider_id}") if provider_service and provider_service != current: walk(provider_service, indent + 3, stack + [current]) walk(service_id, 0, []) return lines def graph_node(graph: dict[str, Any], graph_id: str) -> dict[str, Any]: node = _nodes_by_id(graph).get(graph_id) if node is None: raise RegistryError(f"graph node not found: {graph_id}", 404) return node def backstage_projection(graph: dict[str, Any]) -> dict[str, Any]: items: list[dict[str, Any]] = [] domains = sorted({str(node.get("domain", "")) for node in _nodes(graph) if node.get("domain")}) for domain in domains: items.append( { "apiVersion": "backstage.io/v1alpha1", "kind": "Domain", "metadata": {"name": _backstage_name(domain), "title": domain}, "spec": {"owner": _backstage_name(domain)}, } ) items.append( { "apiVersion": "backstage.io/v1alpha1", "kind": "System", "metadata": {"name": _backstage_name(domain), "title": domain}, "spec": {"owner": _backstage_name(domain), "domain": _backstage_name(domain)}, } ) for node in _nodes(graph): kind = str(node.get("kind", "")) if kind == "ServiceDeclaration": attrs = _attrs(node) items.append( { "apiVersion": "backstage.io/v1alpha1", "kind": "Component", "metadata": _backstage_metadata(node), "spec": { "type": "service", "lifecycle": _backstage_lifecycle(str(node.get("lifecycle", ""))), "owner": _backstage_owner(node), "system": _backstage_name(str(node.get("domain", ""))), "providesApis": [_backstage_name(value) for value in attrs.get("exposes_interfaces", [])], }, } ) elif kind == "InterfaceDeclaration": attrs = _attrs(node) items.append( { "apiVersion": "backstage.io/v1alpha1", "kind": "API", "metadata": _backstage_metadata(node), "spec": { "type": _backstage_api_type(str(attrs.get("interface_type", ""))), "lifecycle": _backstage_lifecycle(str(node.get("lifecycle", ""))), "owner": _backstage_owner(node), "system": _backstage_name(str(node.get("domain", ""))), "definition": "", }, } ) elif kind == "CapabilityDeclaration": attrs = _attrs(node) items.append( { "apiVersion": "backstage.io/v1alpha1", "kind": "Resource", "metadata": _backstage_metadata(node), "spec": { "type": attrs.get("capability_type", "capability"), "owner": _backstage_owner(node), "system": _backstage_name(str(node.get("domain", ""))), }, } ) return { "apiVersion": "railiance.fabric/v1alpha1", "kind": "BackstageCatalogProjection", "items": items, } def xregistry_projection(graph: dict[str, Any]) -> dict[str, Any]: groups = { "services": _xregistry_group("service", "services"), "capabilities": _xregistry_group("capability", "capabilities"), "interfaces": _xregistry_group("interface", "interfaces"), "dependencies": _xregistry_group("dependency", "dependencies"), "bindings": _xregistry_group("binding", "bindings"), } group_by_kind = { "ServiceDeclaration": "services", "CapabilityDeclaration": "capabilities", "InterfaceDeclaration": "interfaces", "DependencyDeclaration": "dependencies", "BindingAssertion": "bindings", } for node in _nodes(graph): group_name = group_by_kind.get(str(node.get("kind", ""))) if group_name is None: continue node_id = str(node.get("id", "")) groups[group_name]["resources"][_xregistry_key(node_id)] = { "id": node_id, "name": node.get("name", node_id), "versionid": "latest", "attributes": { "kind": node.get("kind", ""), "repo": node.get("repo", ""), "domain": node.get("domain", ""), "lifecycle": node.get("lifecycle", ""), **_attrs(node), }, } return { "apiVersion": "railiance.fabric/v1alpha1", "kind": "XRegistryProjection", "groups": groups, } def library_xregistry_projection(libraries: list[dict[str, Any]]) -> dict[str, Any]: group = _xregistry_group("library", "libraries") for library in libraries: key = _xregistry_key(str(library.get("purl") or library.get("bom_ref") or library.get("name", ""))) group["resources"][key] = { "id": library.get("purl") or library.get("bom_ref") or library.get("name", ""), "name": library.get("name", ""), "versionid": library.get("version") or "unknown", "attributes": { "repo": library.get("repo_slug", ""), "bom_ref": library.get("bom_ref", ""), "component_type": library.get("component_type", ""), "scope": library.get("scope", ""), "licenses": library.get("licenses", []), "hashes": library.get("hashes", []), }, } return { "apiVersion": "railiance.fabric/v1alpha1", "kind": "LibraryXRegistryProjection", "groups": {"libraries": group}, } def _with_source(graph: dict[str, Any], repo_slug: str, commit: str, generated_at: str) -> dict[str, Any]: copy = json.loads(json.dumps(graph)) copy.setdefault("generated_at", generated_at) copy.setdefault("source", {}) copy["source"].setdefault("repo", repo_slug) copy["source"].setdefault("commit", commit) copy["source"].setdefault("path", "") return copy def _snapshot_dict(row: sqlite3.Row) -> dict[str, Any]: return { "id": row["id"], "repo_slug": row["repo_slug"], "commit": row["commit_sha"], "generated_at": row["generated_at"], "graph": json.loads(row["graph_json"]), "created_at": row["created_at"], } def _snapshot_summary(row: sqlite3.Row) -> dict[str, Any]: graph = json.loads(row["graph_json"]) return { "id": row["id"], "repo_slug": row["repo_slug"], "commit": row["commit_sha"], "generated_at": row["generated_at"], "created_at": row["created_at"], "node_count": len(graph.get("nodes", [])), "edge_count": len(graph.get("edges", [])), } def _snapshot_public_summary(snapshot: dict[str, Any]) -> dict[str, Any]: graph = snapshot["graph"] return { "id": snapshot["id"], "repo_slug": snapshot["repo_slug"], "commit": snapshot["commit"], "generated_at": snapshot["generated_at"], "created_at": snapshot["created_at"], "node_count": len(graph.get("nodes", [])), "edge_count": len(graph.get("edges", [])), } def _row_dict(row: sqlite3.Row) -> dict[str, Any]: return {key: row[key] for key in row.keys()} def _artifact_dict(row: sqlite3.Row) -> dict[str, Any]: return { "id": row["id"], "repo_slug": row["repo_slug"], "target_id": row["target_id"], "target_kind": row["target_kind"], "artifact_type": row["artifact_type"], "name": row["name"], "uri": row["uri"], "media_type": row["media_type"], "digest": row["digest"], "version": row["version"], "metadata": json.loads(row["metadata_json"]), "created_at": row["created_at"], } def _library_dict(row: sqlite3.Row) -> dict[str, Any]: return { "id": row["id"], "repo_slug": row["repo_slug"], "bom_ref": row["bom_ref"], "component_type": row["component_type"], "name": row["name"], "version": row["version"], "purl": row["purl"], "scope": row["scope"], "licenses": json.loads(row["licenses_json"]), "hashes": json.loads(row["hashes_json"]), "metadata": json.loads(row["metadata_json"]), "created_at": row["created_at"], } def _cyclonedx_entries(bom: dict[str, Any]) -> list[dict[str, Any]]: entries: list[dict[str, Any]] = [] for component in bom.get("components", []): if not isinstance(component, dict): continue name = str(component.get("name") or "").strip() if not name: continue entries.append( { "bom_ref": _optional_component_text(component, "bom-ref"), "component_type": str(component.get("type") or "library"), "name": name, "version": _optional_component_text(component, "version"), "purl": _optional_component_text(component, "purl"), "scope": _optional_component_text(component, "scope"), "licenses": _normalize_licenses(component.get("licenses", [])), "hashes": component.get("hashes", []) if isinstance(component.get("hashes", []), list) else [], "metadata": { "group": component.get("group"), "publisher": component.get("publisher"), "description": component.get("description"), "externalReferences": component.get("externalReferences", []), }, } ) for service in bom.get("services", []): if not isinstance(service, dict): continue name = str(service.get("name") or "").strip() if not name: continue entries.append( { "bom_ref": _optional_component_text(service, "bom-ref"), "component_type": "service", "name": name, "version": _optional_component_text(service, "version"), "purl": None, "scope": None, "licenses": [], "hashes": [], "metadata": { "provider": service.get("provider"), "endpoints": service.get("endpoints", []), "externalReferences": service.get("externalReferences", []), }, } ) return entries def _optional_component_text(component: dict[str, Any], key: str) -> str | None: value = component.get(key) if value is None: return None return str(value) def _normalize_licenses(raw: Any) -> list[dict[str, Any]]: if not isinstance(raw, list): return [] normalized = [] for item in raw: if isinstance(item, dict): normalized.append(item) return normalized def _empty_graph() -> dict[str, Any]: return { "apiVersion": "railiance.fabric/v1alpha1", "kind": "FabricGraphExport", "nodes": [], "edges": [], } def _graph_diff(before: dict[str, Any], after: dict[str, Any]) -> dict[str, Any]: before_nodes = _nodes_by_id(before) after_nodes = _nodes_by_id(after) before_edges = {_edge_key(edge): edge for edge in before.get("edges", []) if isinstance(edge, dict)} after_edges = {_edge_key(edge): edge for edge in after.get("edges", []) if isinstance(edge, dict)} added_node_ids = sorted(set(after_nodes) - set(before_nodes)) removed_node_ids = sorted(set(before_nodes) - set(after_nodes)) common_node_ids = sorted(set(before_nodes) & set(after_nodes)) changed_node_ids = [ node_id for node_id in common_node_ids if _stable_json(before_nodes[node_id]) != _stable_json(after_nodes[node_id]) ] added_edge_keys = sorted(set(after_edges) - set(before_edges)) removed_edge_keys = sorted(set(before_edges) - set(after_edges)) return { "added_nodes": [after_nodes[node_id] for node_id in added_node_ids], "removed_nodes": [before_nodes[node_id] for node_id in removed_node_ids], "changed_nodes": [ { "id": node_id, "before": before_nodes[node_id], "after": after_nodes[node_id], } for node_id in changed_node_ids ], "added_edges": [after_edges[key] for key in added_edge_keys], "removed_edges": [before_edges[key] for key in removed_edge_keys], } def _edge_key(edge: dict[str, Any]) -> tuple[str, str, str]: return (str(edge.get("from", "")), str(edge.get("to", "")), str(edge.get("type", ""))) def _stable_json(value: Any) -> str: return json.dumps(value, sort_keys=True, separators=(",", ":")) def _matches(needle: str, value: Any) -> bool: return needle in _stable_json(value).lower() def _required_text(payload: dict[str, Any], key: str, fallback_key: str | None = None) -> str: value = payload.get(key) if value is None and fallback_key is not None: value = payload.get(fallback_key) if not isinstance(value, str) or not value.strip(): raise RegistryError(f"field '{key}' is required") return value.strip() def _optional_text(payload: dict[str, Any], key: str) -> str | None: value = payload.get(key) if value is None: return None if not isinstance(value, str): raise RegistryError(f"field '{key}' must be a string") return value def _utc_now() -> str: return datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z") def _nodes(graph: dict[str, Any]) -> list[dict[str, Any]]: return [node for node in graph.get("nodes", []) if isinstance(node, dict)] def _nodes_by_id(graph: dict[str, Any]) -> dict[str, dict[str, Any]]: return {str(node.get("id", "")): node for node in _nodes(graph)} def _attrs(node: dict[str, Any]) -> dict[str, Any]: attrs = node.get("attributes", {}) return attrs if isinstance(attrs, dict) else {} def _dependency_nodes(graph: dict[str, Any]) -> list[dict[str, Any]]: return [node for node in _nodes(graph) if node.get("kind") == "DependencyDeclaration"] def _dependency_attrs(graph: dict[str, Any], dependency_id: str) -> dict[str, Any]: node = _nodes_by_id(graph).get(dependency_id, {}) return _attrs(node) def _bindings_by_dependency(graph: dict[str, Any]) -> dict[str, list[dict[str, str]]]: result: dict[str, list[dict[str, str]]] = {} for node in _nodes(graph): if node.get("kind") != "BindingAssertion": continue attrs = _attrs(node) dependency_id = str(attrs.get("dependency_id", "")) if not dependency_id: continue result.setdefault(dependency_id, []).append( { "binding_id": str(node.get("id", "")), "provider_capability_id": str(attrs.get("provider_capability_id", "")), "provider_interface_id": str(attrs.get("provider_interface_id", "")), "status": str(attrs.get("status", "")), } ) for bindings in result.values(): bindings.sort(key=lambda item: item["binding_id"]) return result def _consumer_match(attrs: dict[str, Any], dependency_id: str, binding: dict[str, str]) -> dict[str, Any]: return { "consumer_service_id": attrs.get("consumer_service_id", ""), "dependency_id": dependency_id, "required_capability_type": attrs.get("requires_capability_type", ""), "provider_capability_id": binding.get("provider_capability_id", ""), "provider_interface_id": binding.get("provider_interface_id", ""), "status": binding.get("status", ""), } def _backstage_metadata(node: dict[str, Any]) -> dict[str, Any]: node_id = str(node.get("id", "")) metadata = { "name": _backstage_name(node_id), "title": node.get("name", node_id), "annotations": { "railiance.fabric/id": node_id, "railiance.fabric/repo": str(node.get("repo", "")), "railiance.fabric/kind": str(node.get("kind", "")), }, } return metadata def _backstage_owner(node: dict[str, Any]) -> str: repo = str(node.get("repo", "")) domain = str(node.get("domain", "")) return _backstage_name(repo or domain or "unknown") def _backstage_name(value: str) -> str: cleaned = "".join(char.lower() if char.isalnum() else "-" for char in value) cleaned = "-".join(part for part in cleaned.split("-") if part) return cleaned or "unknown" def _backstage_lifecycle(value: str) -> str: if value in {"active", "deprecated"}: return value if value == "retired": return "deprecated" return "experimental" def _backstage_api_type(interface_type: str) -> str: if interface_type == "http-api": return "openapi" if interface_type == "event-stream": return "asyncapi" return "other" def _xregistry_group(singular: str, plural: str) -> dict[str, Any]: return { "singular": singular, "plural": plural, "resources": {}, } def _xregistry_key(value: str) -> str: return value.replace("/", ".").strip(".") or "unknown"