Files
state-hub/api/services/fabric_graph.py

631 lines
25 KiB
Python

from __future__ import annotations
import hashlib
import json
from datetime import datetime, timezone
from typing import Any
from pydantic import ValidationError
from sqlalchemy import select, update
from sqlalchemy.ext.asyncio import AsyncSession
from api.models.fabric_graph import FabricGraphEdge, FabricGraphImport, FabricGraphNode
from api.models.progress_event import ProgressEvent
from api.schemas.fabric_graph import FabricGraphExportPayload
DISPLAY_ONLY_EDGE_TYPES = {
"collapsed_into",
"declares",
"grouped_with",
"highlight_path",
"near",
"owns_deployment",
"same_color_group",
}
class FabricGraphValidationError(Exception):
def __init__(self, detail: dict[str, Any]) -> None:
super().__init__(str(detail))
self.detail = detail
def split_graph_ingest_body(body: dict[str, Any]) -> tuple[dict[str, Any], dict[str, Any]]:
"""Support both direct FabricGraphExport payloads and the documented wrapper."""
if isinstance(body.get("graph"), dict):
metadata = {key: value for key, value in body.items() if key != "graph"}
return body["graph"], metadata
return body, {}
async def ingest_fabric_graph_export(
session: AsyncSession,
payload: dict[str, Any],
*,
source_repo_slug: str,
source_url: str | None,
requested_by: str,
) -> tuple[FabricGraphImport, bool, bool]:
now = datetime.now(timezone.utc)
content_hash = graph_content_hash(payload)
try:
export = validate_fabric_graph_export(payload)
except ValueError as exc:
import_run = await _record_invalid_import(
session,
payload,
source_repo_slug=source_repo_slug,
source_url=source_url,
content_hash=content_hash,
error=str(exc),
now=now,
requested_by=requested_by,
)
raise FabricGraphValidationError(
{
"message": str(exc),
"import_id": str(import_run.id),
"validation_status": import_run.validation_status,
}
) from exc
existing = await _find_import(session, source_repo_slug, content_hash)
if existing and existing.validation_status == "valid":
await _mark_latest(session, existing)
existing.last_seen_at = now
await _record_progress(
session,
"Fabric graph export already ingested; refreshed latest marker.",
{
"source_repo_slug": source_repo_slug,
"source_url": source_url,
"content_hash": content_hash,
"node_count": existing.node_count,
"edge_count": existing.edge_count,
"requested_by": requested_by,
"idempotent": True,
},
)
await session.commit()
await session.refresh(existing)
return existing, False, True
source = export.source
import_run = FabricGraphImport(
source_repo_slug=source_repo_slug,
source_url=source_url,
source_commit=source.commit if source else None,
source_path=source.path if source else None,
api_version=export.api_version,
schema_version=export.schema_version,
export_kind=export.kind,
exported_at=export.generated_at,
netkingdom_id=export.netkingdom.id if export.netkingdom else None,
king_actor_id=export.netkingdom.king_actor_id if export.netkingdom else None,
actor_count=len(export.actors),
fabric_count=len(export.fabrics),
unresolved_count=len(export.unresolved),
compatibility=export.compatibility,
content_hash=content_hash,
node_count=len(export.nodes),
edge_count=len(export.edges),
validation_status="valid",
error_details=None,
graph_json=export.model_dump(mode="json", by_alias=True),
is_latest=True,
last_seen_at=now,
)
await _mark_previous_not_latest(session, source_repo_slug)
session.add(import_run)
await session.flush()
for node in export.nodes:
raw = node.model_dump(mode="json")
containment = raw.get("containment") if isinstance(raw.get("containment"), dict) else {}
ownership = raw.get("ownership") if isinstance(raw.get("ownership"), dict) else {}
accounting = raw.get("accounting") if isinstance(raw.get("accounting"), dict) else {}
evidence = raw.get("evidence") if isinstance(raw.get("evidence"), dict) else {}
session.add(
FabricGraphNode(
import_id=import_run.id,
source_repo_slug=source_repo_slug,
graph_id=node.id,
kind=node.kind,
name=node.name,
repo_slug=node.repo or "",
domain_slug=node.domain or "",
lifecycle=node.lifecycle or "",
canonical_type=raw.get("canonical_type"),
canon_category=node.canon_category,
canon_anchor=node.canon_anchor,
mapping_fit=node.mapping_fit,
evidence_state=node.evidence_state or evidence.get("state"),
evidence_review_state=evidence.get("review_state"),
evidence_confidence=_float_or_none(evidence.get("confidence")),
netkingdom_id=containment.get("netkingdom_id"),
fabric_id=containment.get("fabric_id"),
subfabric_id=containment.get("subfabric_id"),
environment=containment.get("environment"),
deployment_scenario_id=containment.get("deployment_scenario_id"),
owner_actor_id=ownership.get("owner_actor_id"),
owner_role=ownership.get("owner_role"),
ownership_resolution=ownership.get("resolution"),
cost_center_id=accounting.get("cost_center_id"),
profit_center_id=accounting.get("profit_center_id"),
display_only=bool(raw.get("display_only", False)),
attributes=node.attributes,
raw_json=raw,
)
)
for edge in export.edges:
raw = edge.model_dump(mode="json", by_alias=True)
provider = raw.get("provider") if isinstance(raw.get("provider"), dict) else {}
consumer = raw.get("consumer") if isinstance(raw.get("consumer"), dict) else {}
boundary = raw.get("boundary") if isinstance(raw.get("boundary"), dict) else {}
utility = raw.get("utility") if isinstance(raw.get("utility"), dict) else {}
accounting = raw.get("accounting") if isinstance(raw.get("accounting"), dict) else {}
evidence = raw.get("evidence") if isinstance(raw.get("evidence"), dict) else {}
session.add(
FabricGraphEdge(
import_id=import_run.id,
source_repo_slug=source_repo_slug,
edge_key=edge_key(raw),
from_graph_id=edge.from_graph_id,
to_graph_id=edge.to_graph_id,
edge_type=edge.edge_type,
canonical_type=edge.canonical_type,
canon_anchor=edge.canon_anchor,
mapping_fit=edge.mapping_fit,
evidence_state=edge.evidence_state or evidence.get("state"),
evidence_review_state=evidence.get("review_state"),
evidence_confidence=_float_or_none(evidence.get("confidence")),
relationship_category=edge.relationship_category,
provider_owner_actor_id=provider.get("owner_actor_id"),
provider_fabric_id=provider.get("fabric_id"),
provider_subfabric_id=provider.get("subfabric_id"),
consumer_owner_actor_id=consumer.get("owner_actor_id"),
consumer_fabric_id=consumer.get("fabric_id"),
consumer_subfabric_id=consumer.get("subfabric_id"),
crosses_fabric_boundary=boundary.get("crosses_fabric_boundary"),
crosses_subfabric_boundary=boundary.get("crosses_subfabric_boundary"),
utility_type=utility.get("utility_type"),
utility_contract_id=utility.get("contract_id"),
utility_payment_schema_id=utility.get("payment_schema_id"),
utility_metering_basis=utility.get("metering_basis"),
utility_business_model=utility.get("business_model"),
cost_center_id=accounting.get("cost_center_id"),
profit_center_id=accounting.get("profit_center_id"),
provider_profit_center_id=accounting.get("provider_profit_center_id"),
consumer_cost_center_id=accounting.get("consumer_cost_center_id"),
display_only=bool(edge.display_only),
attributes=edge.attributes,
raw_json=raw,
)
)
await _record_progress(
session,
"Fabric graph export ingested as State Hub read model.",
{
"source_repo_slug": source_repo_slug,
"source_url": source_url,
"content_hash": content_hash,
"node_count": len(export.nodes),
"edge_count": len(export.edges),
"requested_by": requested_by,
},
)
await session.commit()
await session.refresh(import_run)
return import_run, True, False
def validate_fabric_graph_export(payload: dict[str, Any]) -> FabricGraphExportPayload:
try:
export = FabricGraphExportPayload.model_validate(payload)
except ValidationError as exc:
first = exc.errors()[0]
location = ".".join(str(part) for part in first.get("loc", [])) or "<root>"
message = first.get("msg", "invalid payload")
raise ValueError(f"invalid FabricGraphExport at {location}: {message}") from exc
contract_errors = _contract_errors(export, payload)
if contract_errors:
raise ValueError(f"invalid FabricGraphExport contract: {contract_errors[0]}")
canon_errors = _canon_metadata_errors(export)
if canon_errors:
raise ValueError(f"invalid FabricGraphExport canon metadata: {canon_errors[0]}")
return export
def graph_content_hash(payload: dict[str, Any]) -> str:
canonical = _canonical_payload(payload)
raw = json.dumps(canonical, sort_keys=True, separators=(",", ":"), ensure_ascii=True)
return hashlib.sha256(raw.encode("utf-8")).hexdigest()
def edge_key(edge: dict[str, Any]) -> str:
raw = json.dumps(edge, sort_keys=True, separators=(",", ":"), ensure_ascii=True)
return hashlib.sha256(raw.encode("utf-8")).hexdigest()
def _float_or_none(value: Any) -> float | None:
return float(value) if isinstance(value, (int, float)) else None
async def record_fabric_graph_error(
session: AsyncSession,
summary: str,
*,
source_repo_slug: str,
source_url: str | None,
error: str,
requested_by: str,
) -> None:
await _record_progress(
session,
summary,
{
"source_repo_slug": source_repo_slug,
"source_url": source_url,
"error": error,
"requested_by": requested_by,
},
)
await session.commit()
async def _find_import(
session: AsyncSession, source_repo_slug: str, content_hash: str
) -> FabricGraphImport | None:
result = await session.execute(
select(FabricGraphImport).where(
FabricGraphImport.source_repo_slug == source_repo_slug,
FabricGraphImport.content_hash == content_hash,
)
)
return result.scalar_one_or_none()
async def _mark_latest(session: AsyncSession, import_run: FabricGraphImport) -> None:
await _mark_previous_not_latest(session, import_run.source_repo_slug, exclude_id=import_run.id)
import_run.is_latest = True
async def _mark_previous_not_latest(
session: AsyncSession,
source_repo_slug: str,
*,
exclude_id: Any | None = None,
) -> None:
stmt = update(FabricGraphImport).where(FabricGraphImport.source_repo_slug == source_repo_slug)
if exclude_id is not None:
stmt = stmt.where(FabricGraphImport.id != exclude_id)
await session.execute(stmt.values(is_latest=False))
async def _record_invalid_import(
session: AsyncSession,
payload: dict[str, Any],
*,
source_repo_slug: str,
source_url: str | None,
content_hash: str,
error: str,
now: datetime,
requested_by: str,
) -> FabricGraphImport:
existing = await _find_import(session, source_repo_slug, content_hash)
if existing and existing.validation_status == "invalid":
existing.last_seen_at = now
import_run = existing
else:
import_run = FabricGraphImport(
source_repo_slug=source_repo_slug,
source_url=source_url,
source_commit=_source_value(payload, "commit"),
source_path=_source_value(payload, "path"),
api_version=str(payload.get("apiVersion")) if payload.get("apiVersion") else None,
export_kind=str(payload.get("kind")) if payload.get("kind") else None,
exported_at=_parse_datetime(payload.get("generated_at")),
content_hash=content_hash,
node_count=0,
edge_count=0,
validation_status="invalid",
error_details={"error": error},
graph_json=payload,
is_latest=False,
last_seen_at=now,
)
session.add(import_run)
await session.flush()
await _record_progress(
session,
"Fabric graph export rejected during validation.",
{
"source_repo_slug": source_repo_slug,
"source_url": source_url,
"content_hash": content_hash,
"error": error,
"requested_by": requested_by,
},
)
await session.commit()
await session.refresh(import_run)
return import_run
async def _record_progress(session: AsyncSession, summary: str, detail: dict[str, Any]) -> None:
session.add(
ProgressEvent(
event_type="fabric_graph_import",
summary=summary,
detail=detail,
author="state-hub",
)
)
def _canonical_payload(payload: dict[str, Any]) -> dict[str, Any]:
canonical = json.loads(json.dumps(payload, sort_keys=True, default=str))
canonical.pop("generated_at", None)
return canonical
def _contract_errors(export: FabricGraphExportPayload, payload: dict[str, Any]) -> list[str]:
if export.api_version == "railiance.fabric/v1alpha2":
return _financial_contract_errors(export, payload)
return _legacy_contract_errors(export)
def _legacy_contract_errors(export: FabricGraphExportPayload) -> list[str]:
errors: list[str] = []
if export.schema_version:
errors.append("v1alpha1 exports must not set schema_version")
for index, node in enumerate(export.nodes):
_require_fields(
errors,
f"nodes[{index}]",
{
"repo": node.repo,
"domain": node.domain,
"lifecycle": node.lifecycle,
},
("repo", "domain", "lifecycle"),
)
return errors
def _financial_contract_errors(
export: FabricGraphExportPayload, payload: dict[str, Any]
) -> list[str]:
errors: list[str] = []
for field in ("schema_version", "netkingdom", "actors", "fabrics"):
if field not in payload:
errors.append(f"missing required financial export field {field!r}")
if export.schema_version != "financial-fabric-v1":
errors.append("schema_version must be 'financial-fabric-v1' for v1alpha2 exports")
if export.netkingdom is None:
errors.append("netkingdom must be an object for v1alpha2 exports")
netkingdom_id = ""
king_actor_id = ""
else:
netkingdom_id = export.netkingdom.id
king_actor_id = export.netkingdom.king_actor_id
actor_roles: dict[str, str] = {}
for index, actor in enumerate(export.actors):
if actor.id in actor_roles:
errors.append(f"actors[{index}].id {actor.id!r} is duplicated")
actor_roles[actor.id] = actor.role
if king_actor_id and actor_roles.get(king_actor_id) != "king":
errors.append("netkingdom.king_actor_id must reference an actor with role 'king'")
fabric_kinds: dict[str, str] = {}
for index, fabric in enumerate(export.fabrics):
if fabric.id in fabric_kinds:
errors.append(f"fabrics[{index}].id {fabric.id!r} is duplicated")
fabric_kinds[fabric.id] = fabric.kind
if fabric.netkingdom_id != netkingdom_id:
errors.append(f"fabrics[{index}].netkingdom_id must match netkingdom.id")
if fabric.kind == "Fabric":
if not fabric.lord_actor_id:
errors.append(f"fabrics[{index}].lord_actor_id is required for Fabric")
elif actor_roles.get(fabric.lord_actor_id) not in {"lord", "king"}:
errors.append(f"fabrics[{index}].lord_actor_id must reference a lord or king actor")
if fabric.kind == "Subfabric":
if not fabric.parent_fabric_id:
errors.append(f"fabrics[{index}].parent_fabric_id is required for Subfabric")
elif fabric.parent_fabric_id not in fabric_kinds:
errors.append(
f"fabrics[{index}].parent_fabric_id references unknown fabric {fabric.parent_fabric_id!r}"
)
if not fabric.tenant_actor_id:
errors.append(f"fabrics[{index}].tenant_actor_id is required for Subfabric")
elif actor_roles.get(fabric.tenant_actor_id) != "tenant":
errors.append(f"fabrics[{index}].tenant_actor_id must reference a tenant actor")
node_ids: set[str] = set()
for index, node in enumerate(export.nodes):
path = f"nodes[{index}]"
if node.id in node_ids:
errors.append(f"{path}.id {node.id!r} is duplicated")
node_ids.add(node.id)
if node.containment is None:
errors.append(f"{path}.containment must be an object for v1alpha2 exports")
else:
if node.containment.netkingdom_id != netkingdom_id:
errors.append(f"{path}.containment.netkingdom_id must match netkingdom.id")
_validate_fabric_ref(errors, f"{path}.containment.fabric_id", node.containment.fabric_id, fabric_kinds, "Fabric")
if node.containment.subfabric_id:
_validate_fabric_ref(
errors,
f"{path}.containment.subfabric_id",
node.containment.subfabric_id,
fabric_kinds,
"Subfabric",
)
if node.ownership is None:
errors.append(f"{path}.ownership must be an object for v1alpha2 exports")
else:
_validate_actor_ref(errors, f"{path}.ownership.owner_actor_id", node.ownership.owner_actor_id, actor_roles)
if actor_roles.get(node.ownership.owner_actor_id) not in {node.ownership.owner_role, ""}:
errors.append(f"{path}.ownership.owner_role does not match referenced actor role")
if node.evidence is None:
errors.append(f"{path}.evidence must be an object for v1alpha2 exports")
elif (
node.evidence.review_state == "accepted"
and node.ownership
and node.ownership.resolution not in {"explicit", "inherited"}
):
errors.append(f"{path}.ownership.resolution must be explicit or inherited for accepted nodes")
for index, edge in enumerate(export.edges):
path = f"edges[{index}]"
if edge.from_graph_id not in node_ids:
errors.append(f"{path}.from references unknown node {edge.from_graph_id!r}")
if edge.to_graph_id not in node_ids:
errors.append(f"{path}.to references unknown node {edge.to_graph_id!r}")
if edge.relationship_category is None:
errors.append(f"{path}.relationship_category is required for v1alpha2 exports")
if edge.evidence is None:
errors.append(f"{path}.evidence must be an object for v1alpha2 exports")
if edge.edge_type == "provides_utility_to" and edge.relationship_category != "utility":
errors.append(f"{path}.relationship_category must be 'utility' for provides_utility_to edges")
if edge.relationship_category == "utility":
if edge.provider is None:
errors.append(f"{path}.provider is required for utility edges")
else:
_validate_utility_side(errors, f"{path}.provider", edge.provider, actor_roles, fabric_kinds)
if edge.consumer is None:
errors.append(f"{path}.consumer is required for utility edges")
else:
_validate_utility_side(errors, f"{path}.consumer", edge.consumer, actor_roles, fabric_kinds)
if edge.boundary is None:
errors.append(f"{path}.boundary is required for utility edges")
if edge.utility is None:
errors.append(f"{path}.utility is required for utility edges")
return errors
def _validate_actor_ref(
errors: list[str],
path: str,
actor_id: str,
actor_roles: dict[str, str],
) -> None:
if actor_id not in actor_roles:
errors.append(f"{path} references unknown actor {actor_id!r}")
def _validate_fabric_ref(
errors: list[str],
path: str,
fabric_id: str,
fabric_kinds: dict[str, str],
expected_kind: str,
) -> None:
actual_kind = fabric_kinds.get(fabric_id)
if actual_kind is None:
errors.append(f"{path} references unknown fabric {fabric_id!r}")
elif actual_kind != expected_kind:
errors.append(f"{path} must reference a {expected_kind}")
def _validate_utility_side(
errors: list[str],
path: str,
side: Any,
actor_roles: dict[str, str],
fabric_kinds: dict[str, str],
) -> None:
_validate_actor_ref(errors, f"{path}.owner_actor_id", side.owner_actor_id, actor_roles)
if side.fabric_id not in fabric_kinds:
errors.append(f"{path}.fabric_id references unknown fabric {side.fabric_id!r}")
if side.subfabric_id:
_validate_fabric_ref(errors, f"{path}.subfabric_id", side.subfabric_id, fabric_kinds, "Subfabric")
def _canon_metadata_errors(export: FabricGraphExportPayload) -> list[str]:
errors: list[str] = []
for index, node in enumerate(export.nodes):
if any(
value is not None
for value in (
node.canon_category,
node.canon_anchor,
node.mapping_fit,
node.evidence_state,
)
):
_require_fields(
errors,
f"nodes[{index}]",
{
"canon_category": node.canon_category,
"mapping_fit": node.mapping_fit,
"evidence_state": node.evidence_state,
},
("canon_category", "mapping_fit", "evidence_state"),
)
for index, edge in enumerate(export.edges):
has_canon_fields = any(
value is not None
for value in (
edge.canonical_type,
edge.canon_anchor,
edge.mapping_fit,
edge.display_only,
edge.evidence_state,
)
)
if has_canon_fields:
_require_fields(
errors,
f"edges[{index}]",
{
"mapping_fit": edge.mapping_fit,
"display_only": edge.display_only,
"evidence_state": edge.evidence_state,
},
("mapping_fit", "display_only", "evidence_state"),
)
if edge.edge_type in DISPLAY_ONLY_EDGE_TYPES and edge.display_only is not True:
errors.append(
f"edges[{index}] uses display-only edge type {edge.edge_type!r} without display_only=true"
)
if edge.display_only is True and edge.edge_type and not has_canon_fields:
errors.append(f"edges[{index}] is display-only but lacks canon metadata")
return errors
def _require_fields(
errors: list[str],
path: str,
item: dict[str, Any],
fields: tuple[str, ...],
) -> None:
for field in fields:
if item.get(field) in (None, ""):
errors.append(f"{path} missing required canon metadata field {field!r}")
def _source_value(payload: dict[str, Any], field: str) -> str | None:
source = payload.get("source")
if not isinstance(source, dict):
return None
value = source.get(field)
return str(value) if value else None
def _parse_datetime(value: Any) -> datetime | None:
if not isinstance(value, str) or not value:
return None
try:
normalized = value.replace("Z", "+00:00")
return datetime.fromisoformat(normalized)
except ValueError:
return None