generated from coulomb/repo-seed
1744 lines
69 KiB
Python
1744 lines
69 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
import hashlib
|
|
import re
|
|
import sqlite3
|
|
from dataclasses import dataclass
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
from .loader import repo_root
|
|
from .schema_validation import draft202012_validator
|
|
|
|
|
|
class RegistryError(Exception):
|
|
def __init__(self, message: str, status_code: int = 400) -> None:
|
|
super().__init__(message)
|
|
self.message = message
|
|
self.status_code = status_code
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class RegistryStore:
|
|
path: Path
|
|
|
|
def init_schema(self) -> None:
|
|
if str(self.path) != ":memory:":
|
|
self.path.parent.mkdir(parents=True, exist_ok=True)
|
|
with self._connect() as db:
|
|
db.executescript(
|
|
"""
|
|
create table if not exists repositories (
|
|
slug text primary key,
|
|
name text not null,
|
|
remote_url text,
|
|
default_branch text,
|
|
state_hub_repo_id text,
|
|
created_at text not null,
|
|
updated_at text not null
|
|
);
|
|
|
|
create table if not exists snapshots (
|
|
id integer primary key autoincrement,
|
|
repo_slug text not null references repositories(slug),
|
|
commit_sha text not null,
|
|
generated_at text not null,
|
|
graph_json text not null,
|
|
created_at text not null
|
|
);
|
|
|
|
create index if not exists idx_snapshots_repo_latest
|
|
on snapshots(repo_slug, id desc);
|
|
|
|
create table if not exists discovery_snapshots (
|
|
id integer primary key autoincrement,
|
|
repo_slug text not null references repositories(slug),
|
|
commit_sha text not null,
|
|
profile text not null,
|
|
generated_at text not null,
|
|
snapshot_json text not null,
|
|
accepted_graph_snapshot_id integer references snapshots(id),
|
|
created_at text not null
|
|
);
|
|
|
|
create index if not exists idx_discovery_snapshots_repo_latest
|
|
on discovery_snapshots(repo_slug, profile, id desc);
|
|
|
|
create table if not exists artifacts (
|
|
id integer primary key autoincrement,
|
|
repo_slug text not null references repositories(slug),
|
|
target_id text,
|
|
target_kind text,
|
|
artifact_type text not null,
|
|
name text not null,
|
|
uri text not null,
|
|
media_type text,
|
|
digest text,
|
|
version text,
|
|
metadata_json text not null,
|
|
created_at text not null
|
|
);
|
|
|
|
create index if not exists idx_artifacts_repo
|
|
on artifacts(repo_slug);
|
|
|
|
create index if not exists idx_artifacts_target
|
|
on artifacts(target_id);
|
|
|
|
create table if not exists libraries (
|
|
id integer primary key autoincrement,
|
|
repo_slug text not null references repositories(slug),
|
|
bom_ref text,
|
|
component_type text not null,
|
|
name text not null,
|
|
version text,
|
|
purl text,
|
|
scope text,
|
|
licenses_json text not null,
|
|
hashes_json text not null,
|
|
metadata_json text not null,
|
|
created_at text not null
|
|
);
|
|
|
|
create index if not exists idx_libraries_repo
|
|
on libraries(repo_slug);
|
|
|
|
create index if not exists idx_libraries_purl
|
|
on libraries(purl);
|
|
"""
|
|
)
|
|
|
|
def upsert_repository(self, payload: dict[str, Any]) -> dict[str, Any]:
|
|
slug = _required_text(payload, "slug")
|
|
now = _utc_now()
|
|
name = str(payload.get("name") or slug)
|
|
remote_url = _optional_text(payload, "remote_url")
|
|
default_branch = str(payload.get("default_branch") or "main")
|
|
state_hub_repo_id = _optional_text(payload, "state_hub_repo_id")
|
|
with self._connect() as db:
|
|
db.execute(
|
|
"""
|
|
insert into repositories (
|
|
slug, name, remote_url, default_branch, state_hub_repo_id,
|
|
created_at, updated_at
|
|
)
|
|
values (?, ?, ?, ?, ?, ?, ?)
|
|
on conflict(slug) do update set
|
|
name = excluded.name,
|
|
remote_url = excluded.remote_url,
|
|
default_branch = excluded.default_branch,
|
|
state_hub_repo_id = excluded.state_hub_repo_id,
|
|
updated_at = excluded.updated_at
|
|
""",
|
|
(slug, name, remote_url, default_branch, state_hub_repo_id, now, now),
|
|
)
|
|
return self.get_repository(slug)
|
|
|
|
def list_repositories(self) -> list[dict[str, Any]]:
|
|
with self._connect() as db:
|
|
rows = db.execute(
|
|
"""
|
|
select slug, name, remote_url, default_branch, state_hub_repo_id,
|
|
created_at, updated_at
|
|
from repositories
|
|
order by slug
|
|
"""
|
|
).fetchall()
|
|
return [_row_dict(row) for row in rows]
|
|
|
|
def get_repository(self, slug: str) -> dict[str, Any]:
|
|
with self._connect() as db:
|
|
row = db.execute(
|
|
"""
|
|
select slug, name, remote_url, default_branch, state_hub_repo_id,
|
|
created_at, updated_at
|
|
from repositories
|
|
where slug = ?
|
|
""",
|
|
(slug,),
|
|
).fetchone()
|
|
if row is None:
|
|
raise RegistryError(f"repository not found: {slug}", 404)
|
|
return _row_dict(row)
|
|
|
|
def add_snapshot(self, repo_slug: str, payload: dict[str, Any]) -> dict[str, Any]:
|
|
self.get_repository(repo_slug)
|
|
commit = _required_text(payload, "commit")
|
|
generated_at = str(payload.get("generated_at") or _utc_now())
|
|
graph = payload.get("graph")
|
|
if not isinstance(graph, dict):
|
|
raise RegistryError("snapshot payload requires object field 'graph'")
|
|
graph = _with_source(graph, repo_slug, commit, generated_at)
|
|
validate_graph_export(graph)
|
|
|
|
now = _utc_now()
|
|
with self._connect() as db:
|
|
cursor = db.execute(
|
|
"""
|
|
insert into snapshots (repo_slug, commit_sha, generated_at, graph_json, created_at)
|
|
values (?, ?, ?, ?, ?)
|
|
""",
|
|
(repo_slug, commit, generated_at, json.dumps(graph, sort_keys=True), now),
|
|
)
|
|
snapshot_id = int(cursor.lastrowid)
|
|
return self.get_snapshot(snapshot_id)
|
|
|
|
def get_snapshot(self, snapshot_id: int) -> dict[str, Any]:
|
|
with self._connect() as db:
|
|
row = db.execute(
|
|
"""
|
|
select id, repo_slug, commit_sha, generated_at, graph_json, created_at
|
|
from snapshots
|
|
where id = ?
|
|
""",
|
|
(snapshot_id,),
|
|
).fetchone()
|
|
if row is None:
|
|
raise RegistryError(f"snapshot not found: {snapshot_id}", 404)
|
|
return _snapshot_dict(row)
|
|
|
|
def list_snapshots(self, repo_slug: str) -> list[dict[str, Any]]:
|
|
self.get_repository(repo_slug)
|
|
with self._connect() as db:
|
|
rows = db.execute(
|
|
"""
|
|
select id, repo_slug, commit_sha, generated_at, graph_json, created_at
|
|
from snapshots
|
|
where repo_slug = ?
|
|
order by id desc
|
|
""",
|
|
(repo_slug,),
|
|
).fetchall()
|
|
return [_snapshot_summary(row) for row in rows]
|
|
|
|
def latest_snapshot(self, repo_slug: str) -> dict[str, Any]:
|
|
self.get_repository(repo_slug)
|
|
with self._connect() as db:
|
|
row = db.execute(
|
|
"""
|
|
select id, repo_slug, commit_sha, generated_at, graph_json, created_at
|
|
from snapshots
|
|
where repo_slug = ?
|
|
order by id desc
|
|
limit 1
|
|
""",
|
|
(repo_slug,),
|
|
).fetchone()
|
|
if row is None:
|
|
raise RegistryError(f"no snapshots for repository: {repo_slug}", 404)
|
|
return _snapshot_dict(row)
|
|
|
|
def latest_snapshots(self) -> list[dict[str, Any]]:
|
|
with self._connect() as db:
|
|
rows = db.execute(
|
|
"""
|
|
select s.id, s.repo_slug, s.commit_sha, s.generated_at, s.graph_json, s.created_at
|
|
from snapshots s
|
|
join (
|
|
select repo_slug, max(id) as latest_id
|
|
from snapshots
|
|
group by repo_slug
|
|
) latest on latest.latest_id = s.id
|
|
order by s.repo_slug
|
|
"""
|
|
).fetchall()
|
|
return [_snapshot_dict(row) for row in rows]
|
|
|
|
def add_discovery_snapshot(self, repo_slug: str, payload: dict[str, Any]) -> dict[str, Any]:
|
|
self.get_repository(repo_slug)
|
|
if not isinstance(payload, dict):
|
|
raise RegistryError("discovery snapshot payload must be an object")
|
|
validate_discovery_snapshot(payload)
|
|
source = payload.get("source") if isinstance(payload.get("source"), dict) else {}
|
|
if source.get("repo_slug") and source.get("repo_slug") != repo_slug:
|
|
raise RegistryError("discovery snapshot repo_slug does not match request path")
|
|
commit = str(source.get("commit") or "working-tree")
|
|
scan = payload.get("scan") if isinstance(payload.get("scan"), dict) else {}
|
|
profile = str(scan.get("profile") or "deterministic")
|
|
generated_at = str(payload.get("generated_at") or _utc_now())
|
|
now = _utc_now()
|
|
with self._connect() as db:
|
|
cursor = db.execute(
|
|
"""
|
|
insert into discovery_snapshots (
|
|
repo_slug, commit_sha, profile, generated_at, snapshot_json,
|
|
accepted_graph_snapshot_id, created_at
|
|
)
|
|
values (?, ?, ?, ?, ?, null, ?)
|
|
""",
|
|
(repo_slug, commit, profile, generated_at, json.dumps(payload, sort_keys=True), now),
|
|
)
|
|
discovery_id = int(cursor.lastrowid)
|
|
return self.get_discovery_snapshot(discovery_id)
|
|
|
|
def get_discovery_snapshot(self, discovery_snapshot_id: int) -> dict[str, Any]:
|
|
with self._connect() as db:
|
|
row = db.execute(
|
|
"""
|
|
select id, repo_slug, commit_sha, profile, generated_at, snapshot_json,
|
|
accepted_graph_snapshot_id, created_at
|
|
from discovery_snapshots
|
|
where id = ?
|
|
""",
|
|
(discovery_snapshot_id,),
|
|
).fetchone()
|
|
if row is None:
|
|
raise RegistryError(f"discovery snapshot not found: {discovery_snapshot_id}", 404)
|
|
return _discovery_snapshot_dict(row)
|
|
|
|
def list_discovery_snapshots(self, repo_slug: str, profile: str | None = None) -> list[dict[str, Any]]:
|
|
self.get_repository(repo_slug)
|
|
params: list[Any] = [repo_slug]
|
|
where = "where repo_slug = ?"
|
|
if profile:
|
|
where += " and profile = ?"
|
|
params.append(profile)
|
|
with self._connect() as db:
|
|
rows = db.execute(
|
|
f"""
|
|
select id, repo_slug, commit_sha, profile, generated_at, snapshot_json,
|
|
accepted_graph_snapshot_id, created_at
|
|
from discovery_snapshots
|
|
{where}
|
|
order by id desc
|
|
""",
|
|
params,
|
|
).fetchall()
|
|
return [_discovery_snapshot_summary(row) for row in rows]
|
|
|
|
def latest_discovery_snapshot(self, repo_slug: str, profile: str | None = None) -> dict[str, Any]:
|
|
self.get_repository(repo_slug)
|
|
params: list[Any] = [repo_slug]
|
|
where = "where repo_slug = ?"
|
|
if profile:
|
|
where += " and profile = ?"
|
|
params.append(profile)
|
|
with self._connect() as db:
|
|
row = db.execute(
|
|
f"""
|
|
select id, repo_slug, commit_sha, profile, generated_at, snapshot_json,
|
|
accepted_graph_snapshot_id, created_at
|
|
from discovery_snapshots
|
|
{where}
|
|
order by id desc
|
|
limit 1
|
|
""",
|
|
params,
|
|
).fetchone()
|
|
if row is None:
|
|
raise RegistryError(f"no discovery snapshots for repository: {repo_slug}", 404)
|
|
return _discovery_snapshot_dict(row)
|
|
|
|
def discovery_snapshot_diff(
|
|
self,
|
|
repo_slug: str,
|
|
from_id: int | None = None,
|
|
to_id: int | None = None,
|
|
profile: str | None = None,
|
|
) -> dict[str, Any]:
|
|
self.get_repository(repo_slug)
|
|
if from_id is None or to_id is None:
|
|
snapshots = self.list_discovery_snapshots(repo_slug, profile=profile)
|
|
if len(snapshots) < 2:
|
|
raise RegistryError(f"at least two discovery snapshots are required for diff: {repo_slug}", 404)
|
|
to_id = snapshots[0]["id"] if to_id is None else to_id
|
|
from_id = snapshots[1]["id"] if from_id is None else from_id
|
|
|
|
before = self.get_discovery_snapshot(from_id)
|
|
after = self.get_discovery_snapshot(to_id)
|
|
if before["repo_slug"] != repo_slug or after["repo_slug"] != repo_slug:
|
|
raise RegistryError("discovery snapshot ids must belong to the requested repository")
|
|
|
|
return {
|
|
"repo_slug": repo_slug,
|
|
"from": _discovery_snapshot_public_summary(before),
|
|
"to": _discovery_snapshot_public_summary(after),
|
|
"discovery": _discovery_diff(before["snapshot"], after["snapshot"]),
|
|
"reconciliation": after["snapshot"].get("reconciliation", {}),
|
|
}
|
|
|
|
def accept_discovery_snapshot(
|
|
self,
|
|
repo_slug: str,
|
|
discovery_snapshot_id: int,
|
|
payload: dict[str, Any] | None = None,
|
|
) -> dict[str, Any]:
|
|
payload = payload or {}
|
|
discovery = self.get_discovery_snapshot(discovery_snapshot_id)
|
|
if discovery["repo_slug"] != repo_slug:
|
|
raise RegistryError("discovery snapshot id must belong to the requested repository")
|
|
base_graph = self._latest_graph_or_empty(repo_slug)
|
|
accepted_keys = payload.get("accepted_keys")
|
|
if accepted_keys is not None and not isinstance(accepted_keys, list):
|
|
raise RegistryError("field 'accepted_keys' must be an array when provided")
|
|
accepted_key_set = {str(key) for key in accepted_keys or []}
|
|
accept_review_states = payload.get("accept_review_states", ["accepted"])
|
|
if not isinstance(accept_review_states, list):
|
|
raise RegistryError("field 'accept_review_states' must be an array")
|
|
graph = _project_discovery_snapshot(
|
|
base_graph,
|
|
discovery["snapshot"],
|
|
accepted_keys=accepted_key_set,
|
|
accept_review_states={str(state) for state in accept_review_states},
|
|
)
|
|
snapshot = self.add_snapshot(
|
|
repo_slug,
|
|
{
|
|
"commit": str(payload.get("commit") or f"discovery:{discovery['commit']}"),
|
|
"generated_at": _utc_now(),
|
|
"graph": graph,
|
|
},
|
|
)
|
|
with self._connect() as db:
|
|
db.execute(
|
|
"update discovery_snapshots set accepted_graph_snapshot_id = ? where id = ?",
|
|
(snapshot["id"], discovery_snapshot_id),
|
|
)
|
|
return {
|
|
"repo_slug": repo_slug,
|
|
"discovery_snapshot": self.get_discovery_snapshot(discovery_snapshot_id),
|
|
"graph_snapshot": snapshot,
|
|
"projected": {
|
|
"node_count": len(graph.get("nodes", [])),
|
|
"edge_count": len(graph.get("edges", [])),
|
|
},
|
|
}
|
|
|
|
def combined_graph(self) -> dict[str, Any]:
|
|
nodes: dict[str, dict[str, Any]] = {}
|
|
edges: list[dict[str, str]] = []
|
|
for snapshot in self.latest_snapshots():
|
|
graph = snapshot["graph"]
|
|
for node in graph.get("nodes", []):
|
|
if isinstance(node, dict):
|
|
nodes[str(node.get("id", ""))] = node
|
|
for edge in graph.get("edges", []):
|
|
if isinstance(edge, dict):
|
|
edges.append(
|
|
{
|
|
"from": str(edge.get("from", "")),
|
|
"to": str(edge.get("to", "")),
|
|
"type": str(edge.get("type", "")),
|
|
}
|
|
)
|
|
return {
|
|
"apiVersion": "railiance.fabric/v1alpha1",
|
|
"kind": "FabricGraphExport",
|
|
"generated_at": _utc_now(),
|
|
"source": {"repo": "registry", "commit": "", "path": ""},
|
|
"nodes": [nodes[key] for key in sorted(nodes)],
|
|
"edges": sorted(edges, key=lambda edge: (edge["from"], edge["to"], edge["type"])),
|
|
}
|
|
|
|
def add_artifact(self, payload: dict[str, Any]) -> dict[str, Any]:
|
|
repo_slug = _required_text(payload, "repo_slug")
|
|
self.get_repository(repo_slug)
|
|
artifact_type = _required_text(payload, "artifact_type", fallback_key="type")
|
|
uri = _required_text(payload, "uri")
|
|
name = str(payload.get("name") or uri)
|
|
target_id = _optional_text(payload, "target_id")
|
|
target_kind = _optional_text(payload, "target_kind")
|
|
media_type = _optional_text(payload, "media_type")
|
|
digest = _optional_text(payload, "digest")
|
|
version = _optional_text(payload, "version")
|
|
metadata = payload.get("metadata", {})
|
|
if not isinstance(metadata, dict):
|
|
raise RegistryError("field 'metadata' must be an object")
|
|
|
|
now = _utc_now()
|
|
with self._connect() as db:
|
|
cursor = db.execute(
|
|
"""
|
|
insert into artifacts (
|
|
repo_slug, target_id, target_kind, artifact_type, name, uri,
|
|
media_type, digest, version, metadata_json, created_at
|
|
)
|
|
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
""",
|
|
(
|
|
repo_slug,
|
|
target_id,
|
|
target_kind,
|
|
artifact_type,
|
|
name,
|
|
uri,
|
|
media_type,
|
|
digest,
|
|
version,
|
|
json.dumps(metadata, sort_keys=True),
|
|
now,
|
|
),
|
|
)
|
|
artifact_id = int(cursor.lastrowid)
|
|
return self.get_artifact(artifact_id)
|
|
|
|
def list_artifacts(
|
|
self,
|
|
repo_slug: str | None = None,
|
|
target_id: str | None = None,
|
|
artifact_type: str | None = None,
|
|
) -> list[dict[str, Any]]:
|
|
conditions: list[str] = []
|
|
params: list[str] = []
|
|
if repo_slug:
|
|
conditions.append("repo_slug = ?")
|
|
params.append(repo_slug)
|
|
if target_id:
|
|
conditions.append("target_id = ?")
|
|
params.append(target_id)
|
|
if artifact_type:
|
|
conditions.append("artifact_type = ?")
|
|
params.append(artifact_type)
|
|
where = f" where {' and '.join(conditions)}" if conditions else ""
|
|
with self._connect() as db:
|
|
rows = db.execute(
|
|
"""
|
|
select id, repo_slug, target_id, target_kind, artifact_type, name, uri,
|
|
media_type, digest, version, metadata_json, created_at
|
|
from artifacts
|
|
"""
|
|
+ where
|
|
+ " order by repo_slug, target_id, artifact_type, id",
|
|
params,
|
|
).fetchall()
|
|
return [_artifact_dict(row) for row in rows]
|
|
|
|
def get_artifact(self, artifact_id: int) -> dict[str, Any]:
|
|
with self._connect() as db:
|
|
row = db.execute(
|
|
"""
|
|
select id, repo_slug, target_id, target_kind, artifact_type, name, uri,
|
|
media_type, digest, version, metadata_json, created_at
|
|
from artifacts
|
|
where id = ?
|
|
""",
|
|
(artifact_id,),
|
|
).fetchone()
|
|
if row is None:
|
|
raise RegistryError(f"artifact not found: {artifact_id}", 404)
|
|
return _artifact_dict(row)
|
|
|
|
def graph_node_detail(self, graph_id: str) -> dict[str, Any]:
|
|
node = graph_node(self.combined_graph(), graph_id)
|
|
enriched = json.loads(json.dumps(node))
|
|
enriched["artifacts"] = self.list_artifacts(target_id=graph_id)
|
|
return enriched
|
|
|
|
def repository_inventory(self, repo_slug: str) -> dict[str, Any]:
|
|
repository = self.get_repository(repo_slug)
|
|
try:
|
|
latest_snapshot = self.latest_snapshot(repo_slug)
|
|
except RegistryError as exc:
|
|
if exc.status_code != 404:
|
|
raise
|
|
latest_snapshot = None
|
|
try:
|
|
latest_discovery_snapshot = self.latest_discovery_snapshot(repo_slug)
|
|
except RegistryError as exc:
|
|
if exc.status_code != 404:
|
|
raise
|
|
latest_discovery_snapshot = None
|
|
|
|
graph = latest_snapshot["graph"] if latest_snapshot else _empty_graph()
|
|
nodes = [node for node in graph.get("nodes", []) if isinstance(node, dict)]
|
|
edges = [edge for edge in graph.get("edges", []) if isinstance(edge, dict)]
|
|
artifacts = self.list_artifacts(repo_slug=repo_slug)
|
|
libraries = self.list_libraries(repo_slug=repo_slug)
|
|
discovery_snapshots = self.list_discovery_snapshots(repo_slug)
|
|
return {
|
|
"repository": repository,
|
|
"latest_snapshot": _snapshot_public_summary(latest_snapshot) if latest_snapshot else None,
|
|
"latest_discovery_snapshot": (
|
|
_discovery_snapshot_public_summary(latest_discovery_snapshot)
|
|
if latest_discovery_snapshot
|
|
else None
|
|
),
|
|
"discovery_health": (
|
|
_discovery_snapshot_health(latest_discovery_snapshot)
|
|
if latest_discovery_snapshot
|
|
else {"health": "unknown", "review_required": False}
|
|
),
|
|
"counts": {
|
|
"snapshots": len(self.list_snapshots(repo_slug)),
|
|
"discovery_snapshots": len(discovery_snapshots),
|
|
"nodes": len(nodes),
|
|
"edges": len(edges),
|
|
"artifacts": len(artifacts),
|
|
"libraries": len(libraries),
|
|
},
|
|
"graph": {
|
|
"nodes": nodes,
|
|
"edges": edges,
|
|
"owner_repos": sorted({str(node.get("repo", "")) for node in nodes if node.get("repo")}),
|
|
},
|
|
"artifacts": artifacts,
|
|
"libraries": libraries,
|
|
}
|
|
|
|
def ingest_cyclonedx(self, repo_slug: str, payload: dict[str, Any]) -> dict[str, Any]:
|
|
self.get_repository(repo_slug)
|
|
bom = payload.get("bom") if "bom" in payload else payload
|
|
if not isinstance(bom, dict):
|
|
raise RegistryError("CycloneDX payload must be an object")
|
|
if bom.get("bomFormat") and bom.get("bomFormat") != "CycloneDX":
|
|
raise RegistryError("CycloneDX payload must have bomFormat 'CycloneDX'")
|
|
|
|
entries = _cyclonedx_entries(bom)
|
|
now = _utc_now()
|
|
with self._connect() as db:
|
|
db.execute("delete from libraries where repo_slug = ?", (repo_slug,))
|
|
for entry in entries:
|
|
db.execute(
|
|
"""
|
|
insert into libraries (
|
|
repo_slug, bom_ref, component_type, name, version, purl,
|
|
scope, licenses_json, hashes_json, metadata_json, created_at
|
|
)
|
|
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
""",
|
|
(
|
|
repo_slug,
|
|
entry["bom_ref"],
|
|
entry["component_type"],
|
|
entry["name"],
|
|
entry["version"],
|
|
entry["purl"],
|
|
entry["scope"],
|
|
json.dumps(entry["licenses"], sort_keys=True),
|
|
json.dumps(entry["hashes"], sort_keys=True),
|
|
json.dumps(entry["metadata"], sort_keys=True),
|
|
now,
|
|
),
|
|
)
|
|
return {
|
|
"repo_slug": repo_slug,
|
|
"component_count": len(entries),
|
|
"libraries": self.list_libraries(repo_slug=repo_slug),
|
|
}
|
|
|
|
def list_libraries(
|
|
self,
|
|
repo_slug: str | None = None,
|
|
name: str | None = None,
|
|
purl: str | None = None,
|
|
component_type: str | None = None,
|
|
) -> list[dict[str, Any]]:
|
|
conditions: list[str] = []
|
|
params: list[str] = []
|
|
if repo_slug:
|
|
conditions.append("repo_slug = ?")
|
|
params.append(repo_slug)
|
|
if name:
|
|
conditions.append("name = ?")
|
|
params.append(name)
|
|
if purl:
|
|
conditions.append("purl = ?")
|
|
params.append(purl)
|
|
if component_type:
|
|
conditions.append("component_type = ?")
|
|
params.append(component_type)
|
|
where = f" where {' and '.join(conditions)}" if conditions else ""
|
|
with self._connect() as db:
|
|
rows = db.execute(
|
|
"""
|
|
select id, repo_slug, bom_ref, component_type, name, version, purl,
|
|
scope, licenses_json, hashes_json, metadata_json, created_at
|
|
from libraries
|
|
"""
|
|
+ where
|
|
+ " order by repo_slug, name, version, id",
|
|
params,
|
|
).fetchall()
|
|
return [_library_dict(row) for row in rows]
|
|
|
|
def get_library(self, library_id: int) -> dict[str, Any]:
|
|
with self._connect() as db:
|
|
row = db.execute(
|
|
"""
|
|
select id, repo_slug, bom_ref, component_type, name, version, purl,
|
|
scope, licenses_json, hashes_json, metadata_json, created_at
|
|
from libraries
|
|
where id = ?
|
|
""",
|
|
(library_id,),
|
|
).fetchone()
|
|
if row is None:
|
|
raise RegistryError(f"library not found: {library_id}", 404)
|
|
return _library_dict(row)
|
|
|
|
def snapshot_diff(
|
|
self,
|
|
repo_slug: str,
|
|
from_id: int | None = None,
|
|
to_id: int | None = None,
|
|
) -> dict[str, Any]:
|
|
self.get_repository(repo_slug)
|
|
if from_id is None or to_id is None:
|
|
snapshots = self.list_snapshots(repo_slug)
|
|
if len(snapshots) < 2:
|
|
raise RegistryError(f"at least two snapshots are required for diff: {repo_slug}", 404)
|
|
to_id = snapshots[0]["id"] if to_id is None else to_id
|
|
from_id = snapshots[1]["id"] if from_id is None else from_id
|
|
|
|
before = self.get_snapshot(from_id)
|
|
after = self.get_snapshot(to_id)
|
|
if before["repo_slug"] != repo_slug or after["repo_slug"] != repo_slug:
|
|
raise RegistryError("snapshot ids must belong to the requested repository")
|
|
|
|
return {
|
|
"repo_slug": repo_slug,
|
|
"from": _snapshot_public_summary(before),
|
|
"to": _snapshot_public_summary(after),
|
|
"graph": _graph_diff(before["graph"], after["graph"]),
|
|
}
|
|
|
|
def search(self, query: str) -> dict[str, Any]:
|
|
needle = query.strip().lower()
|
|
if not needle:
|
|
raise RegistryError("query parameter 'q' is required")
|
|
graph = self.combined_graph()
|
|
return {
|
|
"query": query,
|
|
"nodes": [
|
|
node
|
|
for node in graph.get("nodes", [])
|
|
if isinstance(node, dict) and _matches(needle, node)
|
|
],
|
|
"artifacts": [
|
|
artifact
|
|
for artifact in self.list_artifacts()
|
|
if _matches(needle, artifact)
|
|
],
|
|
"libraries": [
|
|
library
|
|
for library in self.list_libraries()
|
|
if _matches(needle, library)
|
|
],
|
|
"repositories": [
|
|
repository
|
|
for repository in self.list_repositories()
|
|
if _matches(needle, repository)
|
|
],
|
|
}
|
|
|
|
def status(self) -> dict[str, Any]:
|
|
with self._connect() as db:
|
|
counts = {
|
|
"repositories": db.execute("select count(*) from repositories").fetchone()[0],
|
|
"snapshots": db.execute("select count(*) from snapshots").fetchone()[0],
|
|
"discovery_snapshots": db.execute("select count(*) from discovery_snapshots").fetchone()[0],
|
|
"artifacts": db.execute("select count(*) from artifacts").fetchone()[0],
|
|
"libraries": db.execute("select count(*) from libraries").fetchone()[0],
|
|
}
|
|
latest = [
|
|
{
|
|
"repo_slug": snapshot["repo_slug"],
|
|
"snapshot_id": snapshot["id"],
|
|
"commit": snapshot["commit"],
|
|
"generated_at": snapshot["generated_at"],
|
|
}
|
|
for snapshot in self.latest_snapshots()
|
|
]
|
|
latest_discovery = [
|
|
_discovery_snapshot_public_summary(snapshot)
|
|
for snapshot in self.latest_discovery_snapshots()
|
|
]
|
|
return {
|
|
"status": "ok",
|
|
"database": str(self.path),
|
|
"counts": counts,
|
|
"latest_snapshots": latest,
|
|
"latest_discovery_snapshots": latest_discovery,
|
|
}
|
|
|
|
def latest_discovery_snapshots(self, profile: str | None = None) -> list[dict[str, Any]]:
|
|
params: list[Any] = []
|
|
where = ""
|
|
if profile:
|
|
where = "where profile = ?"
|
|
params.append(profile)
|
|
with self._connect() as db:
|
|
rows = db.execute(
|
|
f"""
|
|
select ds.id, ds.repo_slug, ds.commit_sha, ds.profile, ds.generated_at,
|
|
ds.snapshot_json, ds.accepted_graph_snapshot_id, ds.created_at
|
|
from discovery_snapshots ds
|
|
join (
|
|
select repo_slug, profile, max(id) as latest_id
|
|
from discovery_snapshots
|
|
{where}
|
|
group by repo_slug, profile
|
|
) latest on latest.latest_id = ds.id
|
|
order by ds.repo_slug, ds.profile
|
|
""",
|
|
params,
|
|
).fetchall()
|
|
return [_discovery_snapshot_dict(row) for row in rows]
|
|
|
|
def _latest_graph_or_empty(self, repo_slug: str) -> dict[str, Any]:
|
|
try:
|
|
return self.latest_snapshot(repo_slug)["graph"]
|
|
except RegistryError as exc:
|
|
if exc.status_code != 404:
|
|
raise
|
|
return _empty_graph()
|
|
|
|
def _connect(self) -> sqlite3.Connection:
|
|
db = sqlite3.connect(self.path)
|
|
db.row_factory = sqlite3.Row
|
|
return db
|
|
|
|
|
|
def validate_graph_export(graph: dict[str, Any]) -> None:
|
|
schema_path = repo_root() / "schemas" / "state-hub-export.schema.yaml"
|
|
validator = draft202012_validator(schema_path)
|
|
errors = sorted(validator.iter_errors(graph), key=lambda error: list(error.path))
|
|
if errors:
|
|
error = errors[0]
|
|
location = ".".join(str(part) for part in error.path) or "<root>"
|
|
raise RegistryError(f"invalid FabricGraphExport at {location}: {error.message}")
|
|
|
|
|
|
def validate_discovery_snapshot(snapshot: dict[str, Any]) -> None:
|
|
schema_path = repo_root() / "schemas" / "discovery-snapshot.schema.yaml"
|
|
validator = draft202012_validator(schema_path)
|
|
errors = sorted(validator.iter_errors(snapshot), key=lambda error: list(error.path))
|
|
if errors:
|
|
error = errors[0]
|
|
location = ".".join(str(part) for part in error.path) or "<root>"
|
|
raise RegistryError(f"invalid FabricDiscoverySnapshot at {location}: {error.message}")
|
|
|
|
|
|
def providers(graph: dict[str, Any], capability: str) -> list[dict[str, Any]]:
|
|
result = []
|
|
for node in _nodes(graph):
|
|
attrs = _attrs(node)
|
|
if node.get("kind") != "CapabilityDeclaration":
|
|
continue
|
|
if node.get("id") == capability or attrs.get("capability_type") == capability:
|
|
result.append(
|
|
{
|
|
"provider_id": node.get("id", ""),
|
|
"name": node.get("name", ""),
|
|
"service_id": attrs.get("service_id", ""),
|
|
"capability_type": attrs.get("capability_type", ""),
|
|
"lifecycle": node.get("lifecycle", ""),
|
|
"interfaces": attrs.get("interface_ids", []),
|
|
"repo": node.get("repo", ""),
|
|
"domain": node.get("domain", ""),
|
|
}
|
|
)
|
|
return sorted(result, key=lambda item: item["provider_id"])
|
|
|
|
|
|
def consumers(graph: dict[str, Any], target: str) -> list[dict[str, Any]]:
|
|
nodes = _nodes_by_id(graph)
|
|
target_interface_type = ""
|
|
target_node = nodes.get(target)
|
|
if target_node and target_node.get("kind") == "InterfaceDeclaration":
|
|
target_interface_type = str(_attrs(target_node).get("interface_type", ""))
|
|
|
|
result: list[dict[str, Any]] = []
|
|
bindings_by_dependency = _bindings_by_dependency(graph)
|
|
for dependency in _dependency_nodes(graph):
|
|
attrs = _attrs(dependency)
|
|
dependency_id = str(dependency.get("id", ""))
|
|
dependency_matches = target in {
|
|
dependency_id,
|
|
str(attrs.get("requires_capability_id", "")),
|
|
str(attrs.get("requires_capability_type", "")),
|
|
str(attrs.get("interface_type", "")),
|
|
} or bool(target_interface_type and target_interface_type == attrs.get("interface_type"))
|
|
|
|
bindings = bindings_by_dependency.get(dependency_id, [])
|
|
matching_bindings = [
|
|
binding
|
|
for binding in bindings
|
|
if target in {binding["provider_capability_id"], binding["provider_interface_id"]}
|
|
]
|
|
if dependency_matches and not bindings:
|
|
result.append(_consumer_match(attrs, dependency_id, {}))
|
|
for binding in bindings:
|
|
if dependency_matches or binding in matching_bindings:
|
|
result.append(_consumer_match(attrs, dependency_id, binding))
|
|
return sorted(result, key=lambda item: (item["consumer_service_id"], item["dependency_id"]))
|
|
|
|
|
|
def unresolved_dependencies(graph: dict[str, Any]) -> list[dict[str, Any]]:
|
|
result = []
|
|
bindings_by_dependency = _bindings_by_dependency(graph)
|
|
for dependency in _dependency_nodes(graph):
|
|
attrs = _attrs(dependency)
|
|
dependency_id = str(dependency.get("id", ""))
|
|
required_id = str(attrs.get("requires_capability_id", ""))
|
|
required_type = str(attrs.get("requires_capability_type", ""))
|
|
provider_matches = providers(graph, required_id or required_type)
|
|
bindings = bindings_by_dependency.get(dependency_id, [])
|
|
has_missing_binding = any(binding.get("status") in {"missing", "disputed"} for binding in bindings)
|
|
if not provider_matches or has_missing_binding:
|
|
result.append(
|
|
{
|
|
"dependency_id": dependency_id,
|
|
"consumer_service_id": attrs.get("consumer_service_id", ""),
|
|
"requires_capability_id": required_id,
|
|
"requires_capability_type": required_type,
|
|
"interface_type": attrs.get("interface_type", ""),
|
|
"reason": "missing_provider" if not provider_matches else "binding_not_exact",
|
|
}
|
|
)
|
|
return sorted(result, key=lambda item: item["dependency_id"])
|
|
|
|
|
|
def blast_radius(graph: dict[str, Any], interface: str) -> list[dict[str, Any]]:
|
|
target_node = _nodes_by_id(graph).get(interface)
|
|
matches = consumers(graph, interface)
|
|
if target_node and target_node.get("kind") == "InterfaceDeclaration":
|
|
return [match for match in matches if match.get("provider_interface_id") == interface]
|
|
return [
|
|
match
|
|
for match in matches
|
|
if _dependency_attrs(graph, match["dependency_id"]).get("interface_type") == interface
|
|
]
|
|
|
|
|
|
def dependency_path_lines(graph: dict[str, Any], service_id: str) -> list[str]:
|
|
nodes = _nodes_by_id(graph)
|
|
if service_id not in nodes or nodes[service_id].get("kind") != "ServiceDeclaration":
|
|
return [f"unknown service: {service_id}"]
|
|
|
|
deps_by_consumer: dict[str, list[dict[str, Any]]] = {}
|
|
for dependency in _dependency_nodes(graph):
|
|
attrs = _attrs(dependency)
|
|
deps_by_consumer.setdefault(str(attrs.get("consumer_service_id", "")), []).append(dependency)
|
|
|
|
capability_service = {
|
|
str(node.get("id", "")): str(_attrs(node).get("service_id", ""))
|
|
for node in _nodes(graph)
|
|
if node.get("kind") == "CapabilityDeclaration"
|
|
}
|
|
bindings_by_dependency = _bindings_by_dependency(graph)
|
|
|
|
lines: list[str] = []
|
|
|
|
def walk(current: str, indent: int, stack: list[str]) -> None:
|
|
prefix = " " * indent
|
|
if current in stack:
|
|
lines.append(f"{prefix}{current} (cycle)")
|
|
return
|
|
lines.append(f"{prefix}{current}")
|
|
dependencies = sorted(deps_by_consumer.get(current, []), key=lambda item: str(item.get("id", "")))
|
|
if not dependencies:
|
|
lines.append(f"{prefix} no declared dependencies")
|
|
return
|
|
for dependency in dependencies:
|
|
dependency_id = str(dependency.get("id", ""))
|
|
attrs = _attrs(dependency)
|
|
required = attrs.get("requires_capability_type", "")
|
|
lines.append(f"{prefix} requires {required}: {dependency_id}")
|
|
bindings = bindings_by_dependency.get(dependency_id, [])
|
|
if not bindings:
|
|
candidate_providers = providers(graph, str(required))
|
|
if candidate_providers:
|
|
for provider in candidate_providers:
|
|
lines.append(f"{prefix} candidate {provider['provider_id']}")
|
|
else:
|
|
lines.append(f"{prefix} unresolved")
|
|
continue
|
|
for binding in bindings:
|
|
provider_id = binding.get("provider_capability_id", "")
|
|
provider_service = capability_service.get(provider_id, "")
|
|
status = binding.get("status", "")
|
|
lines.append(f"{prefix} {status} -> {provider_id}")
|
|
if provider_service and provider_service != current:
|
|
walk(provider_service, indent + 3, stack + [current])
|
|
|
|
walk(service_id, 0, [])
|
|
return lines
|
|
|
|
|
|
def graph_node(graph: dict[str, Any], graph_id: str) -> dict[str, Any]:
|
|
node = _nodes_by_id(graph).get(graph_id)
|
|
if node is None:
|
|
raise RegistryError(f"graph node not found: {graph_id}", 404)
|
|
return node
|
|
|
|
|
|
def backstage_projection(graph: dict[str, Any]) -> dict[str, Any]:
|
|
items: list[dict[str, Any]] = []
|
|
domains = sorted({str(node.get("domain", "")) for node in _nodes(graph) if node.get("domain")})
|
|
for domain in domains:
|
|
items.append(
|
|
{
|
|
"apiVersion": "backstage.io/v1alpha1",
|
|
"kind": "Domain",
|
|
"metadata": {"name": _backstage_name(domain), "title": domain},
|
|
"spec": {"owner": _backstage_name(domain)},
|
|
}
|
|
)
|
|
items.append(
|
|
{
|
|
"apiVersion": "backstage.io/v1alpha1",
|
|
"kind": "System",
|
|
"metadata": {"name": _backstage_name(domain), "title": domain},
|
|
"spec": {"owner": _backstage_name(domain), "domain": _backstage_name(domain)},
|
|
}
|
|
)
|
|
|
|
for node in _nodes(graph):
|
|
kind = str(node.get("kind", ""))
|
|
if kind == "ServiceDeclaration":
|
|
attrs = _attrs(node)
|
|
items.append(
|
|
{
|
|
"apiVersion": "backstage.io/v1alpha1",
|
|
"kind": "Component",
|
|
"metadata": _backstage_metadata(node),
|
|
"spec": {
|
|
"type": "service",
|
|
"lifecycle": _backstage_lifecycle(str(node.get("lifecycle", ""))),
|
|
"owner": _backstage_owner(node),
|
|
"system": _backstage_name(str(node.get("domain", ""))),
|
|
"providesApis": [_backstage_name(value) for value in attrs.get("exposes_interfaces", [])],
|
|
},
|
|
}
|
|
)
|
|
elif kind == "InterfaceDeclaration":
|
|
attrs = _attrs(node)
|
|
items.append(
|
|
{
|
|
"apiVersion": "backstage.io/v1alpha1",
|
|
"kind": "API",
|
|
"metadata": _backstage_metadata(node),
|
|
"spec": {
|
|
"type": _backstage_api_type(str(attrs.get("interface_type", ""))),
|
|
"lifecycle": _backstage_lifecycle(str(node.get("lifecycle", ""))),
|
|
"owner": _backstage_owner(node),
|
|
"system": _backstage_name(str(node.get("domain", ""))),
|
|
"definition": "",
|
|
},
|
|
}
|
|
)
|
|
elif kind == "CapabilityDeclaration":
|
|
attrs = _attrs(node)
|
|
items.append(
|
|
{
|
|
"apiVersion": "backstage.io/v1alpha1",
|
|
"kind": "Resource",
|
|
"metadata": _backstage_metadata(node),
|
|
"spec": {
|
|
"type": attrs.get("capability_type", "capability"),
|
|
"owner": _backstage_owner(node),
|
|
"system": _backstage_name(str(node.get("domain", ""))),
|
|
},
|
|
}
|
|
)
|
|
|
|
return {
|
|
"apiVersion": "railiance.fabric/v1alpha1",
|
|
"kind": "BackstageCatalogProjection",
|
|
"items": items,
|
|
}
|
|
|
|
|
|
def xregistry_projection(graph: dict[str, Any]) -> dict[str, Any]:
|
|
groups = {
|
|
"services": _xregistry_group("service", "services"),
|
|
"capabilities": _xregistry_group("capability", "capabilities"),
|
|
"interfaces": _xregistry_group("interface", "interfaces"),
|
|
"dependencies": _xregistry_group("dependency", "dependencies"),
|
|
"bindings": _xregistry_group("binding", "bindings"),
|
|
}
|
|
group_by_kind = {
|
|
"ServiceDeclaration": "services",
|
|
"CapabilityDeclaration": "capabilities",
|
|
"InterfaceDeclaration": "interfaces",
|
|
"DependencyDeclaration": "dependencies",
|
|
"BindingAssertion": "bindings",
|
|
}
|
|
for node in _nodes(graph):
|
|
group_name = group_by_kind.get(str(node.get("kind", "")))
|
|
if group_name is None:
|
|
continue
|
|
node_id = str(node.get("id", ""))
|
|
groups[group_name]["resources"][_xregistry_key(node_id)] = {
|
|
"id": node_id,
|
|
"name": node.get("name", node_id),
|
|
"versionid": "latest",
|
|
"attributes": {
|
|
"kind": node.get("kind", ""),
|
|
"repo": node.get("repo", ""),
|
|
"domain": node.get("domain", ""),
|
|
"lifecycle": node.get("lifecycle", ""),
|
|
**_attrs(node),
|
|
},
|
|
}
|
|
|
|
return {
|
|
"apiVersion": "railiance.fabric/v1alpha1",
|
|
"kind": "XRegistryProjection",
|
|
"groups": groups,
|
|
}
|
|
|
|
|
|
def library_xregistry_projection(libraries: list[dict[str, Any]]) -> dict[str, Any]:
|
|
group = _xregistry_group("library", "libraries")
|
|
for library in libraries:
|
|
key = _xregistry_key(str(library.get("purl") or library.get("bom_ref") or library.get("name", "")))
|
|
group["resources"][key] = {
|
|
"id": library.get("purl") or library.get("bom_ref") or library.get("name", ""),
|
|
"name": library.get("name", ""),
|
|
"versionid": library.get("version") or "unknown",
|
|
"attributes": {
|
|
"repo": library.get("repo_slug", ""),
|
|
"bom_ref": library.get("bom_ref", ""),
|
|
"component_type": library.get("component_type", ""),
|
|
"scope": library.get("scope", ""),
|
|
"licenses": library.get("licenses", []),
|
|
"hashes": library.get("hashes", []),
|
|
},
|
|
}
|
|
return {
|
|
"apiVersion": "railiance.fabric/v1alpha1",
|
|
"kind": "LibraryXRegistryProjection",
|
|
"groups": {"libraries": group},
|
|
}
|
|
|
|
|
|
def _with_source(graph: dict[str, Any], repo_slug: str, commit: str, generated_at: str) -> dict[str, Any]:
|
|
copy = json.loads(json.dumps(graph))
|
|
copy.setdefault("generated_at", generated_at)
|
|
copy.setdefault("source", {})
|
|
copy["source"].setdefault("repo", repo_slug)
|
|
copy["source"].setdefault("commit", commit)
|
|
copy["source"].setdefault("path", "")
|
|
return copy
|
|
|
|
|
|
def _snapshot_dict(row: sqlite3.Row) -> dict[str, Any]:
|
|
return {
|
|
"id": row["id"],
|
|
"repo_slug": row["repo_slug"],
|
|
"commit": row["commit_sha"],
|
|
"generated_at": row["generated_at"],
|
|
"graph": json.loads(row["graph_json"]),
|
|
"created_at": row["created_at"],
|
|
}
|
|
|
|
|
|
def _snapshot_summary(row: sqlite3.Row) -> dict[str, Any]:
|
|
graph = json.loads(row["graph_json"])
|
|
return {
|
|
"id": row["id"],
|
|
"repo_slug": row["repo_slug"],
|
|
"commit": row["commit_sha"],
|
|
"generated_at": row["generated_at"],
|
|
"created_at": row["created_at"],
|
|
"node_count": len(graph.get("nodes", [])),
|
|
"edge_count": len(graph.get("edges", [])),
|
|
}
|
|
|
|
|
|
def _snapshot_public_summary(snapshot: dict[str, Any]) -> dict[str, Any]:
|
|
graph = snapshot["graph"]
|
|
return {
|
|
"id": snapshot["id"],
|
|
"repo_slug": snapshot["repo_slug"],
|
|
"commit": snapshot["commit"],
|
|
"generated_at": snapshot["generated_at"],
|
|
"created_at": snapshot["created_at"],
|
|
"node_count": len(graph.get("nodes", [])),
|
|
"edge_count": len(graph.get("edges", [])),
|
|
}
|
|
|
|
|
|
def _discovery_snapshot_dict(row: sqlite3.Row) -> dict[str, Any]:
|
|
snapshot = json.loads(row["snapshot_json"])
|
|
return {
|
|
"id": row["id"],
|
|
"repo_slug": row["repo_slug"],
|
|
"commit": row["commit_sha"],
|
|
"profile": row["profile"],
|
|
"generated_at": row["generated_at"],
|
|
"snapshot": snapshot,
|
|
"accepted_graph_snapshot_id": row["accepted_graph_snapshot_id"],
|
|
"created_at": row["created_at"],
|
|
}
|
|
|
|
|
|
def _discovery_snapshot_summary(row: sqlite3.Row) -> dict[str, Any]:
|
|
snapshot = json.loads(row["snapshot_json"])
|
|
candidates = snapshot.get("candidates") if isinstance(snapshot.get("candidates"), dict) else {}
|
|
reconciliation = snapshot.get("reconciliation") if isinstance(snapshot.get("reconciliation"), dict) else {}
|
|
diff = reconciliation.get("diff") if isinstance(reconciliation.get("diff"), dict) else {}
|
|
return {
|
|
"id": row["id"],
|
|
"repo_slug": row["repo_slug"],
|
|
"commit": row["commit_sha"],
|
|
"profile": row["profile"],
|
|
"generated_at": row["generated_at"],
|
|
"accepted_graph_snapshot_id": row["accepted_graph_snapshot_id"],
|
|
"created_at": row["created_at"],
|
|
"candidate_counts": {
|
|
"nodes": len(candidates.get("nodes", [])) if isinstance(candidates.get("nodes"), list) else 0,
|
|
"edges": len(candidates.get("edges", [])) if isinstance(candidates.get("edges"), list) else 0,
|
|
"attributes": len(candidates.get("attributes", [])) if isinstance(candidates.get("attributes"), list) else 0,
|
|
},
|
|
"diff_counts": {
|
|
"added": len(diff.get("added", [])) if isinstance(diff.get("added"), list) else 0,
|
|
"changed": len(diff.get("changed", [])) if isinstance(diff.get("changed"), list) else 0,
|
|
"retired": len(diff.get("retired", [])) if isinstance(diff.get("retired"), list) else 0,
|
|
"conflicted": len(diff.get("conflicted", [])) if isinstance(diff.get("conflicted"), list) else 0,
|
|
},
|
|
"review_artifact_count": len(snapshot.get("review_artifacts", []))
|
|
if isinstance(snapshot.get("review_artifacts"), list)
|
|
else 0,
|
|
"connector_run_count": len(snapshot.get("connector_runs", []))
|
|
if isinstance(snapshot.get("connector_runs"), list)
|
|
else 0,
|
|
}
|
|
|
|
|
|
def _discovery_snapshot_public_summary(snapshot: dict[str, Any]) -> dict[str, Any]:
|
|
candidates = snapshot["snapshot"].get("candidates", {})
|
|
reconciliation = snapshot["snapshot"].get("reconciliation", {})
|
|
diff = reconciliation.get("diff", {}) if isinstance(reconciliation, dict) else {}
|
|
result = {
|
|
"id": snapshot["id"],
|
|
"repo_slug": snapshot["repo_slug"],
|
|
"commit": snapshot["commit"],
|
|
"profile": snapshot["profile"],
|
|
"generated_at": snapshot["generated_at"],
|
|
"accepted_graph_snapshot_id": snapshot["accepted_graph_snapshot_id"],
|
|
"created_at": snapshot["created_at"],
|
|
"candidate_counts": {
|
|
"nodes": len(candidates.get("nodes", [])) if isinstance(candidates.get("nodes"), list) else 0,
|
|
"edges": len(candidates.get("edges", [])) if isinstance(candidates.get("edges"), list) else 0,
|
|
"attributes": len(candidates.get("attributes", [])) if isinstance(candidates.get("attributes"), list) else 0,
|
|
},
|
|
"diff_counts": {
|
|
"added": len(diff.get("added", [])) if isinstance(diff.get("added"), list) else 0,
|
|
"changed": len(diff.get("changed", [])) if isinstance(diff.get("changed"), list) else 0,
|
|
"retired": len(diff.get("retired", [])) if isinstance(diff.get("retired"), list) else 0,
|
|
"conflicted": len(diff.get("conflicted", [])) if isinstance(diff.get("conflicted"), list) else 0,
|
|
},
|
|
}
|
|
result.update(_discovery_snapshot_health(snapshot))
|
|
return result
|
|
|
|
|
|
def _discovery_snapshot_health(snapshot: dict[str, Any]) -> dict[str, Any]:
|
|
payload = snapshot.get("snapshot") if isinstance(snapshot.get("snapshot"), dict) else {}
|
|
reconciliation = payload.get("reconciliation") if isinstance(payload.get("reconciliation"), dict) else {}
|
|
diff = reconciliation.get("diff") if isinstance(reconciliation.get("diff"), dict) else {}
|
|
diff_counts = {
|
|
"added": len(diff.get("added", [])) if isinstance(diff.get("added"), list) else 0,
|
|
"changed": len(diff.get("changed", [])) if isinstance(diff.get("changed"), list) else 0,
|
|
"retired": len(diff.get("retired", [])) if isinstance(diff.get("retired"), list) else 0,
|
|
"conflicted": len(diff.get("conflicted", [])) if isinstance(diff.get("conflicted"), list) else 0,
|
|
}
|
|
review_artifact_count = (
|
|
len(payload.get("review_artifacts", []))
|
|
if isinstance(payload.get("review_artifacts"), list)
|
|
else 0
|
|
)
|
|
connector_run_count = (
|
|
len(payload.get("connector_runs", []))
|
|
if isinstance(payload.get("connector_runs"), list)
|
|
else 0
|
|
)
|
|
tombstone_count = (
|
|
len(payload.get("tombstones", []))
|
|
if isinstance(payload.get("tombstones"), list)
|
|
else 0
|
|
)
|
|
review_required = bool(diff_counts["conflicted"] or review_artifact_count or tombstone_count)
|
|
changed = any(diff_counts.values())
|
|
if review_required:
|
|
health = "needs_review"
|
|
elif changed:
|
|
health = "changed"
|
|
else:
|
|
health = "fresh"
|
|
return {
|
|
"health": health,
|
|
"review_required": review_required,
|
|
"review_artifact_count": review_artifact_count,
|
|
"connector_run_count": connector_run_count,
|
|
"tombstone_count": tombstone_count,
|
|
}
|
|
|
|
|
|
def _row_dict(row: sqlite3.Row) -> dict[str, Any]:
|
|
return {key: row[key] for key in row.keys()}
|
|
|
|
|
|
def _artifact_dict(row: sqlite3.Row) -> dict[str, Any]:
|
|
return {
|
|
"id": row["id"],
|
|
"repo_slug": row["repo_slug"],
|
|
"target_id": row["target_id"],
|
|
"target_kind": row["target_kind"],
|
|
"artifact_type": row["artifact_type"],
|
|
"name": row["name"],
|
|
"uri": row["uri"],
|
|
"media_type": row["media_type"],
|
|
"digest": row["digest"],
|
|
"version": row["version"],
|
|
"metadata": json.loads(row["metadata_json"]),
|
|
"created_at": row["created_at"],
|
|
}
|
|
|
|
|
|
def _library_dict(row: sqlite3.Row) -> dict[str, Any]:
|
|
return {
|
|
"id": row["id"],
|
|
"repo_slug": row["repo_slug"],
|
|
"bom_ref": row["bom_ref"],
|
|
"component_type": row["component_type"],
|
|
"name": row["name"],
|
|
"version": row["version"],
|
|
"purl": row["purl"],
|
|
"scope": row["scope"],
|
|
"licenses": json.loads(row["licenses_json"]),
|
|
"hashes": json.loads(row["hashes_json"]),
|
|
"metadata": json.loads(row["metadata_json"]),
|
|
"created_at": row["created_at"],
|
|
}
|
|
|
|
|
|
def _cyclonedx_entries(bom: dict[str, Any]) -> list[dict[str, Any]]:
|
|
entries: list[dict[str, Any]] = []
|
|
for component in bom.get("components", []):
|
|
if not isinstance(component, dict):
|
|
continue
|
|
name = str(component.get("name") or "").strip()
|
|
if not name:
|
|
continue
|
|
entries.append(
|
|
{
|
|
"bom_ref": _optional_component_text(component, "bom-ref"),
|
|
"component_type": str(component.get("type") or "library"),
|
|
"name": name,
|
|
"version": _optional_component_text(component, "version"),
|
|
"purl": _optional_component_text(component, "purl"),
|
|
"scope": _optional_component_text(component, "scope"),
|
|
"licenses": _normalize_licenses(component.get("licenses", [])),
|
|
"hashes": component.get("hashes", []) if isinstance(component.get("hashes", []), list) else [],
|
|
"metadata": {
|
|
"group": component.get("group"),
|
|
"publisher": component.get("publisher"),
|
|
"description": component.get("description"),
|
|
"externalReferences": component.get("externalReferences", []),
|
|
},
|
|
}
|
|
)
|
|
for service in bom.get("services", []):
|
|
if not isinstance(service, dict):
|
|
continue
|
|
name = str(service.get("name") or "").strip()
|
|
if not name:
|
|
continue
|
|
entries.append(
|
|
{
|
|
"bom_ref": _optional_component_text(service, "bom-ref"),
|
|
"component_type": "service",
|
|
"name": name,
|
|
"version": _optional_component_text(service, "version"),
|
|
"purl": None,
|
|
"scope": None,
|
|
"licenses": [],
|
|
"hashes": [],
|
|
"metadata": {
|
|
"provider": service.get("provider"),
|
|
"endpoints": service.get("endpoints", []),
|
|
"externalReferences": service.get("externalReferences", []),
|
|
},
|
|
}
|
|
)
|
|
return entries
|
|
|
|
|
|
def _optional_component_text(component: dict[str, Any], key: str) -> str | None:
|
|
value = component.get(key)
|
|
if value is None:
|
|
return None
|
|
return str(value)
|
|
|
|
|
|
def _normalize_licenses(raw: Any) -> list[dict[str, Any]]:
|
|
if not isinstance(raw, list):
|
|
return []
|
|
normalized = []
|
|
for item in raw:
|
|
if isinstance(item, dict):
|
|
normalized.append(item)
|
|
return normalized
|
|
|
|
|
|
def _empty_graph() -> dict[str, Any]:
|
|
return {
|
|
"apiVersion": "railiance.fabric/v1alpha1",
|
|
"kind": "FabricGraphExport",
|
|
"nodes": [],
|
|
"edges": [],
|
|
}
|
|
|
|
|
|
def _discovery_diff(before: dict[str, Any], after: dict[str, Any]) -> dict[str, Any]:
|
|
result: dict[str, Any] = {}
|
|
for collection in ("nodes", "edges", "attributes"):
|
|
before_items = _discovery_candidates_by_key(before, collection)
|
|
after_items = _discovery_candidates_by_key(after, collection)
|
|
added = sorted(set(after_items) - set(before_items))
|
|
removed = sorted(set(before_items) - set(after_items))
|
|
common = sorted(set(before_items) & set(after_items))
|
|
changed = [
|
|
key for key in common
|
|
if _stable_json(_without_review_noise(before_items[key])) != _stable_json(_without_review_noise(after_items[key]))
|
|
]
|
|
confidence_changed = [
|
|
{
|
|
"stable_key": key,
|
|
"before": before_items[key].get("confidence"),
|
|
"after": after_items[key].get("confidence"),
|
|
}
|
|
for key in common
|
|
if before_items[key].get("confidence") != after_items[key].get("confidence")
|
|
]
|
|
result[collection] = {
|
|
"added": [after_items[key] for key in added],
|
|
"removed": [before_items[key] for key in removed],
|
|
"changed": [
|
|
{
|
|
"stable_key": key,
|
|
"before": before_items[key],
|
|
"after": after_items[key],
|
|
}
|
|
for key in changed
|
|
],
|
|
"confidence_changed": confidence_changed,
|
|
}
|
|
after_reconciliation = after.get("reconciliation") if isinstance(after.get("reconciliation"), dict) else {}
|
|
result["review"] = {
|
|
"retired": after_reconciliation.get("diff", {}).get("retired", [])
|
|
if isinstance(after_reconciliation.get("diff"), dict)
|
|
else [],
|
|
"conflicted": after_reconciliation.get("diff", {}).get("conflicted", [])
|
|
if isinstance(after_reconciliation.get("diff"), dict)
|
|
else [],
|
|
"conflicts": after_reconciliation.get("conflicts", []),
|
|
}
|
|
return result
|
|
|
|
|
|
def _project_discovery_snapshot(
|
|
base_graph: dict[str, Any],
|
|
discovery_snapshot: dict[str, Any],
|
|
*,
|
|
accepted_keys: set[str],
|
|
accept_review_states: set[str],
|
|
) -> dict[str, Any]:
|
|
graph = json.loads(json.dumps(base_graph))
|
|
graph.setdefault("apiVersion", "railiance.fabric/v1alpha1")
|
|
graph.setdefault("kind", "FabricGraphExport")
|
|
graph.setdefault("nodes", [])
|
|
graph.setdefault("edges", [])
|
|
candidates = discovery_snapshot.get("candidates") if isinstance(discovery_snapshot.get("candidates"), dict) else {}
|
|
candidate_nodes = [
|
|
node for node in candidates.get("nodes", [])
|
|
if isinstance(node, dict) and _candidate_is_accepted(node, accepted_keys, accept_review_states)
|
|
]
|
|
candidate_edges = [
|
|
edge for edge in candidates.get("edges", [])
|
|
if isinstance(edge, dict) and _candidate_is_accepted(edge, accepted_keys, accept_review_states)
|
|
]
|
|
existing_nodes = {
|
|
str(node.get("id")): node
|
|
for node in graph.get("nodes", [])
|
|
if isinstance(node, dict) and node.get("id")
|
|
}
|
|
key_to_graph_id = {
|
|
str(node.get("id")): str(node.get("id"))
|
|
for node in graph.get("nodes", [])
|
|
if isinstance(node, dict) and node.get("id")
|
|
}
|
|
for candidate in candidate_nodes:
|
|
graph_id = _candidate_graph_id(candidate)
|
|
key_to_graph_id[str(candidate.get("stable_key"))] = graph_id
|
|
if candidate.get("graph_id"):
|
|
key_to_graph_id[str(candidate.get("graph_id"))] = graph_id
|
|
if graph_id in existing_nodes:
|
|
continue
|
|
projected = _project_candidate_node(candidate, graph_id)
|
|
existing_nodes[graph_id] = projected
|
|
graph["nodes"].append(projected)
|
|
|
|
existing_edges = {
|
|
_edge_key(edge)
|
|
for edge in graph.get("edges", [])
|
|
if isinstance(edge, dict)
|
|
}
|
|
for candidate in candidate_edges:
|
|
source = key_to_graph_id.get(str(candidate.get("source_key") or ""))
|
|
target = key_to_graph_id.get(str(candidate.get("target_key") or ""))
|
|
if not source or not target:
|
|
continue
|
|
edge = {"from": source, "to": target, "type": str(candidate.get("edge_type") or "")}
|
|
edge_key = _edge_key(edge)
|
|
if edge["type"] and edge_key not in existing_edges:
|
|
existing_edges.add(edge_key)
|
|
graph["edges"].append(edge)
|
|
validate_graph_export(graph)
|
|
return graph
|
|
|
|
|
|
def _project_candidate_node(candidate: dict[str, Any], graph_id: str) -> dict[str, Any]:
|
|
attributes = candidate.get("attributes") if isinstance(candidate.get("attributes"), dict) else {}
|
|
return {
|
|
"id": graph_id,
|
|
"kind": str(candidate.get("kind") or "DiscoveredEntity"),
|
|
"name": str(candidate.get("label") or graph_id),
|
|
"repo": str(candidate.get("repo") or ""),
|
|
"domain": str(candidate.get("domain") or ""),
|
|
"lifecycle": str(candidate.get("lifecycle") or "active"),
|
|
"attributes": {
|
|
**attributes,
|
|
"discovery_stable_key": candidate.get("stable_key"),
|
|
"discovery_origin": candidate.get("origin"),
|
|
"discovery_review_state": candidate.get("review_state"),
|
|
"discovery_confidence": candidate.get("confidence"),
|
|
"discovery_source_anchors": candidate.get("source_anchors", []),
|
|
"discovery_provenance": candidate.get("provenance", []),
|
|
},
|
|
}
|
|
|
|
|
|
def _candidate_graph_id(candidate: dict[str, Any]) -> str:
|
|
graph_id = str(candidate.get("graph_id") or "").strip()
|
|
if graph_id:
|
|
return graph_id
|
|
repo = _graph_id_part(str(candidate.get("repo") or "repo"))
|
|
kind = _graph_id_part(str(candidate.get("kind") or "entity"))
|
|
label = _graph_id_part(str(candidate.get("label") or candidate.get("stable_key") or "candidate"))
|
|
candidate_id = f"{repo}.{kind}.{label}".strip(".")
|
|
if len(candidate_id) > 150:
|
|
digest = hashlib.sha256(str(candidate.get("stable_key") or candidate_id).encode("utf-8")).hexdigest()[:10]
|
|
candidate_id = f"{candidate_id[:139].rstrip('.')}.{digest}"
|
|
if len(candidate_id) < 3:
|
|
candidate_id = f"{candidate_id}.id"
|
|
return candidate_id
|
|
|
|
|
|
def _graph_id_part(value: str) -> str:
|
|
text = re.sub(r"[^a-z0-9.-]+", "-", value.lower()).strip(".-")
|
|
text = re.sub(r"-+", "-", text)
|
|
text = re.sub(r"\.+", ".", text)
|
|
return text or "unknown"
|
|
|
|
|
|
def _candidate_is_accepted(
|
|
candidate: dict[str, Any],
|
|
accepted_keys: set[str],
|
|
accept_review_states: set[str],
|
|
) -> bool:
|
|
stable_key = str(candidate.get("stable_key") or "")
|
|
if stable_key in accepted_keys:
|
|
return True
|
|
return str(candidate.get("review_state") or "") in accept_review_states
|
|
|
|
|
|
def _discovery_candidates_by_key(snapshot: dict[str, Any], collection: str) -> dict[str, dict[str, Any]]:
|
|
candidates = snapshot.get("candidates") if isinstance(snapshot.get("candidates"), dict) else {}
|
|
return {
|
|
str(item["stable_key"]): item
|
|
for item in candidates.get(collection, [])
|
|
if isinstance(item, dict) and item.get("stable_key")
|
|
}
|
|
|
|
|
|
def _without_review_noise(candidate: dict[str, Any]) -> dict[str, Any]:
|
|
return {
|
|
key: value
|
|
for key, value in candidate.items()
|
|
if key not in {"provenance"}
|
|
}
|
|
|
|
|
|
def _graph_diff(before: dict[str, Any], after: dict[str, Any]) -> dict[str, Any]:
|
|
before_nodes = _nodes_by_id(before)
|
|
after_nodes = _nodes_by_id(after)
|
|
before_edges = {_edge_key(edge): edge for edge in before.get("edges", []) if isinstance(edge, dict)}
|
|
after_edges = {_edge_key(edge): edge for edge in after.get("edges", []) if isinstance(edge, dict)}
|
|
|
|
added_node_ids = sorted(set(after_nodes) - set(before_nodes))
|
|
removed_node_ids = sorted(set(before_nodes) - set(after_nodes))
|
|
common_node_ids = sorted(set(before_nodes) & set(after_nodes))
|
|
changed_node_ids = [
|
|
node_id
|
|
for node_id in common_node_ids
|
|
if _stable_json(before_nodes[node_id]) != _stable_json(after_nodes[node_id])
|
|
]
|
|
|
|
added_edge_keys = sorted(set(after_edges) - set(before_edges))
|
|
removed_edge_keys = sorted(set(before_edges) - set(after_edges))
|
|
|
|
return {
|
|
"added_nodes": [after_nodes[node_id] for node_id in added_node_ids],
|
|
"removed_nodes": [before_nodes[node_id] for node_id in removed_node_ids],
|
|
"changed_nodes": [
|
|
{
|
|
"id": node_id,
|
|
"before": before_nodes[node_id],
|
|
"after": after_nodes[node_id],
|
|
}
|
|
for node_id in changed_node_ids
|
|
],
|
|
"added_edges": [after_edges[key] for key in added_edge_keys],
|
|
"removed_edges": [before_edges[key] for key in removed_edge_keys],
|
|
}
|
|
|
|
|
|
def _edge_key(edge: dict[str, Any]) -> tuple[str, str, str]:
|
|
return (str(edge.get("from", "")), str(edge.get("to", "")), str(edge.get("type", "")))
|
|
|
|
|
|
def _stable_json(value: Any) -> str:
|
|
return json.dumps(value, sort_keys=True, separators=(",", ":"))
|
|
|
|
|
|
def _matches(needle: str, value: Any) -> bool:
|
|
return needle in _stable_json(value).lower()
|
|
|
|
|
|
def _required_text(payload: dict[str, Any], key: str, fallback_key: str | None = None) -> str:
|
|
value = payload.get(key)
|
|
if value is None and fallback_key is not None:
|
|
value = payload.get(fallback_key)
|
|
if not isinstance(value, str) or not value.strip():
|
|
raise RegistryError(f"field '{key}' is required")
|
|
return value.strip()
|
|
|
|
|
|
def _optional_text(payload: dict[str, Any], key: str) -> str | None:
|
|
value = payload.get(key)
|
|
if value is None:
|
|
return None
|
|
if not isinstance(value, str):
|
|
raise RegistryError(f"field '{key}' must be a string")
|
|
return value
|
|
|
|
|
|
def _utc_now() -> str:
|
|
return datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z")
|
|
|
|
|
|
def _nodes(graph: dict[str, Any]) -> list[dict[str, Any]]:
|
|
return [node for node in graph.get("nodes", []) if isinstance(node, dict)]
|
|
|
|
|
|
def _nodes_by_id(graph: dict[str, Any]) -> dict[str, dict[str, Any]]:
|
|
return {str(node.get("id", "")): node for node in _nodes(graph)}
|
|
|
|
|
|
def _attrs(node: dict[str, Any]) -> dict[str, Any]:
|
|
attrs = node.get("attributes", {})
|
|
return attrs if isinstance(attrs, dict) else {}
|
|
|
|
|
|
def _dependency_nodes(graph: dict[str, Any]) -> list[dict[str, Any]]:
|
|
return [node for node in _nodes(graph) if node.get("kind") == "DependencyDeclaration"]
|
|
|
|
|
|
def _dependency_attrs(graph: dict[str, Any], dependency_id: str) -> dict[str, Any]:
|
|
node = _nodes_by_id(graph).get(dependency_id, {})
|
|
return _attrs(node)
|
|
|
|
|
|
def _bindings_by_dependency(graph: dict[str, Any]) -> dict[str, list[dict[str, str]]]:
|
|
result: dict[str, list[dict[str, str]]] = {}
|
|
for node in _nodes(graph):
|
|
if node.get("kind") != "BindingAssertion":
|
|
continue
|
|
attrs = _attrs(node)
|
|
dependency_id = str(attrs.get("dependency_id", ""))
|
|
if not dependency_id:
|
|
continue
|
|
result.setdefault(dependency_id, []).append(
|
|
{
|
|
"binding_id": str(node.get("id", "")),
|
|
"provider_capability_id": str(attrs.get("provider_capability_id", "")),
|
|
"provider_interface_id": str(attrs.get("provider_interface_id", "")),
|
|
"status": str(attrs.get("status", "")),
|
|
}
|
|
)
|
|
for bindings in result.values():
|
|
bindings.sort(key=lambda item: item["binding_id"])
|
|
return result
|
|
|
|
|
|
def _consumer_match(attrs: dict[str, Any], dependency_id: str, binding: dict[str, str]) -> dict[str, Any]:
|
|
return {
|
|
"consumer_service_id": attrs.get("consumer_service_id", ""),
|
|
"dependency_id": dependency_id,
|
|
"required_capability_type": attrs.get("requires_capability_type", ""),
|
|
"provider_capability_id": binding.get("provider_capability_id", ""),
|
|
"provider_interface_id": binding.get("provider_interface_id", ""),
|
|
"status": binding.get("status", ""),
|
|
}
|
|
|
|
|
|
def _backstage_metadata(node: dict[str, Any]) -> dict[str, Any]:
|
|
node_id = str(node.get("id", ""))
|
|
metadata = {
|
|
"name": _backstage_name(node_id),
|
|
"title": node.get("name", node_id),
|
|
"annotations": {
|
|
"railiance.fabric/id": node_id,
|
|
"railiance.fabric/repo": str(node.get("repo", "")),
|
|
"railiance.fabric/kind": str(node.get("kind", "")),
|
|
},
|
|
}
|
|
return metadata
|
|
|
|
|
|
def _backstage_owner(node: dict[str, Any]) -> str:
|
|
repo = str(node.get("repo", ""))
|
|
domain = str(node.get("domain", ""))
|
|
return _backstage_name(repo or domain or "unknown")
|
|
|
|
|
|
def _backstage_name(value: str) -> str:
|
|
cleaned = "".join(char.lower() if char.isalnum() else "-" for char in value)
|
|
cleaned = "-".join(part for part in cleaned.split("-") if part)
|
|
return cleaned or "unknown"
|
|
|
|
|
|
def _backstage_lifecycle(value: str) -> str:
|
|
if value in {"active", "deprecated"}:
|
|
return value
|
|
if value == "retired":
|
|
return "deprecated"
|
|
return "experimental"
|
|
|
|
|
|
def _backstage_api_type(interface_type: str) -> str:
|
|
if interface_type == "http-api":
|
|
return "openapi"
|
|
if interface_type == "event-stream":
|
|
return "asyncapi"
|
|
return "other"
|
|
|
|
|
|
def _xregistry_group(singular: str, plural: str) -> dict[str, Any]:
|
|
return {
|
|
"singular": singular,
|
|
"plural": plural,
|
|
"resources": {},
|
|
}
|
|
|
|
|
|
def _xregistry_key(value: str) -> str:
|
|
return value.replace("/", ".").strip(".") or "unknown"
|