Files
kontextual-engine/src/kontextual_engine/core/memory.py

557 lines
22 KiB
Python

"""Runtime memory graph records mapped from Markitect contracts."""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any
from .metadata import LifecycleState
from .primitives import compact_dict, mapping_digest, utc_now
MARKITECT_MEMORY_GRAPH_SCHEMA_VERSION = "markitect.memory.graph.v1"
MARKITECT_MEMORY_PROFILE_SCHEMA_VERSION = "markitect.memory.profile.v1"
@dataclass(frozen=True)
class MemorySourceSpan:
path: str
snapshot_id: str | None = None
unit_kind: str | None = None
unit_index: int | None = None
line_start: int | None = None
line_end: int | None = None
selector: str | None = None
engine: str | None = None
@classmethod
def from_contract(cls, data: dict[str, Any]) -> "MemorySourceSpan":
return cls(
path=str(data["path"]),
snapshot_id=_optional_str(data.get("snapshot_id")),
unit_kind=_optional_str(data.get("unit_kind")),
unit_index=_optional_int(data.get("unit_index")),
line_start=_optional_int(data.get("line_start")),
line_end=_optional_int(data.get("line_end")),
selector=_optional_str(data.get("selector")),
engine=_optional_str(data.get("engine")),
)
def to_dict(self) -> dict[str, Any]:
return compact_dict(
{
"path": self.path,
"snapshot_id": self.snapshot_id,
"unit_kind": self.unit_kind,
"unit_index": self.unit_index,
"line_start": self.line_start,
"line_end": self.line_end,
"selector": self.selector,
"engine": self.engine,
}
)
@dataclass(frozen=True)
class MemoryProfileRecord:
contract_profile_id: str
schema_version: str
title: str = ""
intent: str = ""
memory_kinds: tuple[str, ...] = ()
stores: dict[str, Any] = field(default_factory=dict)
limits: dict[str, Any] = field(default_factory=dict)
latency: dict[str, Any] = field(default_factory=dict)
retention: dict[str, Any] = field(default_factory=dict)
refresh: dict[str, Any] = field(default_factory=dict)
compaction: dict[str, Any] = field(default_factory=dict)
activation: dict[str, Any] = field(default_factory=dict)
policy: dict[str, Any] = field(default_factory=dict)
observability: dict[str, Any] = field(default_factory=dict)
failure: dict[str, Any] = field(default_factory=dict)
metadata: dict[str, Any] = field(default_factory=dict)
profile_id: str | None = None
imported_at: str = field(default_factory=lambda: utc_now().isoformat())
def __post_init__(self) -> None:
if self.profile_id is None:
object.__setattr__(
self,
"profile_id",
runtime_id("memprofile", self.schema_version, self.contract_profile_id),
)
object.__setattr__(self, "memory_kinds", tuple(self.memory_kinds))
@classmethod
def from_markitect_contract(cls, data: dict[str, Any]) -> "MemoryProfileRecord":
schema_version = str(data.get("schema_version") or data.get("schema") or "")
if schema_version != MARKITECT_MEMORY_PROFILE_SCHEMA_VERSION:
raise ValueError(f"Unsupported memory profile schema `{schema_version}`.")
contract_id = str(data.get("id") or "")
if not contract_id:
raise ValueError("Memory profile contract must declare an id.")
return cls(
contract_profile_id=contract_id,
schema_version=schema_version,
title=str(data.get("title") or data.get("name") or ""),
intent=str(data.get("intent") or data.get("description") or ""),
memory_kinds=tuple(str(item) for item in data.get("memory_kinds", ())),
stores=dict(data.get("stores") or {}),
limits=dict(data.get("limits") or {}),
latency=dict(data.get("latency") or data.get("latency_targets") or {}),
retention=dict(data.get("retention") or data.get("retention_policy") or {}),
refresh=dict(data.get("refresh") or {}),
compaction=dict(data.get("compaction") or {}),
activation=dict(data.get("activation") or data.get("context_budget") or {}),
policy=dict(data.get("policy") or {}),
observability=dict(data.get("observability") or {}),
failure=dict(data.get("failure") or {}),
metadata=dict(data.get("metadata") or {}),
)
def to_dict(self) -> dict[str, Any]:
return compact_dict(
{
"profile_id": self.profile_id,
"contract_profile_id": self.contract_profile_id,
"schema_version": self.schema_version,
"title": self.title,
"intent": self.intent,
"memory_kinds": list(self.memory_kinds),
"stores": dict(self.stores),
"limits": dict(self.limits),
"latency": dict(self.latency),
"retention": dict(self.retention),
"refresh": dict(self.refresh),
"compaction": dict(self.compaction),
"activation": dict(self.activation),
"policy": dict(self.policy),
"observability": dict(self.observability),
"failure": dict(self.failure),
"metadata": dict(self.metadata),
"imported_at": self.imported_at,
}
)
@dataclass(frozen=True)
class MemoryNodeRecord:
graph_id: str
contract_graph_id: str
contract_node_id: str
kind: str
text: str = ""
namespace: dict[str, Any] = field(default_factory=dict)
source_spans: tuple[MemorySourceSpan, ...] = ()
provenance: tuple[dict[str, Any], ...] = ()
freshness: dict[str, Any] = field(default_factory=dict)
confidence: float | None = None
policy: dict[str, Any] = field(default_factory=dict)
lifecycle: LifecycleState = LifecycleState.ACTIVE
metadata: dict[str, Any] = field(default_factory=dict)
node_id: str | None = None
created_at: str = field(default_factory=lambda: utc_now().isoformat())
updated_at: str = field(default_factory=lambda: utc_now().isoformat())
def __post_init__(self) -> None:
if self.node_id is None:
object.__setattr__(
self,
"node_id",
runtime_id("memnode", self.contract_graph_id, self.contract_node_id),
)
object.__setattr__(self, "source_spans", tuple(self.source_spans))
object.__setattr__(self, "provenance", tuple(dict(item) for item in self.provenance))
object.__setattr__(self, "lifecycle", LifecycleState(self.lifecycle))
@classmethod
def from_markitect_contract(
cls,
*,
graph_id: str,
contract_graph_id: str,
data: dict[str, Any],
graph_namespace: dict[str, Any] | None = None,
) -> "MemoryNodeRecord":
contract_node_id = str(data.get("id") or "")
if not contract_node_id:
raise ValueError("Memory graph node must declare an id.")
spans = data.get("source_spans") or data.get("spans") or []
return cls(
graph_id=graph_id,
contract_graph_id=contract_graph_id,
contract_node_id=contract_node_id,
kind=str(data.get("kind") or data.get("type") or ""),
text=str(data.get("text") or data.get("content") or data.get("summary") or ""),
namespace=dict(data.get("namespace") or graph_namespace or {}),
source_spans=tuple(MemorySourceSpan.from_contract(span) for span in _mapping_list(spans)),
provenance=tuple(_mapping_list(data.get("provenance"))),
freshness=dict(data.get("freshness") or {}),
confidence=_optional_float(data.get("confidence")),
policy=dict(data.get("policy") or {}),
metadata=dict(data.get("metadata") or {}),
)
@property
def resource_id(self) -> str:
return f"memory-node:{self.node_id}"
def to_dict(self) -> dict[str, Any]:
return compact_dict(
{
"node_id": self.node_id,
"graph_id": self.graph_id,
"contract_graph_id": self.contract_graph_id,
"contract_node_id": self.contract_node_id,
"kind": self.kind,
"text": self.text,
"namespace": dict(self.namespace),
"source_spans": [span.to_dict() for span in self.source_spans],
"provenance": [dict(item) for item in self.provenance],
"freshness": dict(self.freshness),
"confidence": self.confidence,
"policy": dict(self.policy),
"lifecycle": self.lifecycle.value,
"metadata": dict(self.metadata),
"created_at": self.created_at,
"updated_at": self.updated_at,
}
)
@dataclass(frozen=True)
class MemoryEdgeRecord:
graph_id: str
contract_graph_id: str
contract_edge_id: str
kind: str
source_node_id: str
target_node_id: str
source_contract_node_id: str
target_contract_node_id: str
provenance: tuple[dict[str, Any], ...] = ()
freshness: dict[str, Any] = field(default_factory=dict)
confidence: float | None = None
policy: dict[str, Any] = field(default_factory=dict)
lifecycle: LifecycleState = LifecycleState.ACTIVE
metadata: dict[str, Any] = field(default_factory=dict)
edge_id: str | None = None
created_at: str = field(default_factory=lambda: utc_now().isoformat())
def __post_init__(self) -> None:
if self.edge_id is None:
object.__setattr__(
self,
"edge_id",
runtime_id("memedge", self.contract_graph_id, self.contract_edge_id),
)
object.__setattr__(self, "provenance", tuple(dict(item) for item in self.provenance))
object.__setattr__(self, "lifecycle", LifecycleState(self.lifecycle))
@classmethod
def from_markitect_contract(
cls,
*,
graph_id: str,
contract_graph_id: str,
data: dict[str, Any],
node_id_by_contract_id: dict[str, str],
) -> "MemoryEdgeRecord":
kind = str(data.get("kind") or data.get("type") or "")
source_contract_id = str(data.get("source") or data.get("from") or "")
target_contract_id = str(data.get("target") or data.get("to") or "")
contract_edge_id = str(
data.get("id")
or f"edge:{stable_suffix(kind, source_contract_id, target_contract_id)}"
)
try:
source_node_id = node_id_by_contract_id[source_contract_id]
target_node_id = node_id_by_contract_id[target_contract_id]
except KeyError as exc:
missing = source_contract_id if source_contract_id not in node_id_by_contract_id else target_contract_id
raise ValueError(f"Memory edge `{contract_edge_id}` references unknown node `{missing}`.") from exc
return cls(
graph_id=graph_id,
contract_graph_id=contract_graph_id,
contract_edge_id=contract_edge_id,
kind=kind,
source_node_id=source_node_id,
target_node_id=target_node_id,
source_contract_node_id=source_contract_id,
target_contract_node_id=target_contract_id,
provenance=tuple(_mapping_list(data.get("provenance"))),
freshness=dict(data.get("freshness") or {}),
confidence=_optional_float(data.get("confidence")),
policy=dict(data.get("policy") or {}),
metadata=dict(data.get("metadata") or {}),
)
def to_dict(self) -> dict[str, Any]:
return compact_dict(
{
"edge_id": self.edge_id,
"graph_id": self.graph_id,
"contract_graph_id": self.contract_graph_id,
"contract_edge_id": self.contract_edge_id,
"kind": self.kind,
"source_node_id": self.source_node_id,
"target_node_id": self.target_node_id,
"source_contract_node_id": self.source_contract_node_id,
"target_contract_node_id": self.target_contract_node_id,
"provenance": [dict(item) for item in self.provenance],
"freshness": dict(self.freshness),
"confidence": self.confidence,
"policy": dict(self.policy),
"lifecycle": self.lifecycle.value,
"metadata": dict(self.metadata),
"created_at": self.created_at,
}
)
@dataclass(frozen=True)
class MemoryEventRecord:
graph_id: str
contract_graph_id: str
contract_event_id: str
kind: str
timestamp: str
namespace: dict[str, Any] = field(default_factory=dict)
actor_id: str | None = None
thread: str | None = None
task: str | None = None
node_updates: tuple[dict[str, Any], ...] = ()
edge_updates: tuple[dict[str, Any], ...] = ()
package_refs: tuple[str, ...] = ()
activation_refs: tuple[str, ...] = ()
branch: dict[str, Any] = field(default_factory=dict)
policy: dict[str, Any] = field(default_factory=dict)
metadata: dict[str, Any] = field(default_factory=dict)
event_id: str | None = None
appended_at: str = field(default_factory=lambda: utc_now().isoformat())
def __post_init__(self) -> None:
if self.event_id is None:
object.__setattr__(
self,
"event_id",
runtime_id("memevent", self.contract_graph_id, self.contract_event_id),
)
object.__setattr__(self, "node_updates", tuple(dict(item) for item in self.node_updates))
object.__setattr__(self, "edge_updates", tuple(dict(item) for item in self.edge_updates))
object.__setattr__(self, "package_refs", tuple(self.package_refs))
object.__setattr__(self, "activation_refs", tuple(self.activation_refs))
@classmethod
def from_markitect_contract(
cls,
*,
graph_id: str,
contract_graph_id: str,
data: dict[str, Any],
graph_namespace: dict[str, Any] | None = None,
) -> "MemoryEventRecord":
kind = str(data.get("kind") or data.get("type") or "recorded")
timestamp = str(data.get("timestamp") or data.get("at") or utc_now().isoformat())
contract_event_id = str(data.get("id") or f"event:{stable_suffix(kind, timestamp, data)}")
return cls(
graph_id=graph_id,
contract_graph_id=contract_graph_id,
contract_event_id=contract_event_id,
kind=kind,
timestamp=timestamp,
namespace=dict(data.get("namespace") or graph_namespace or {}),
actor_id=_optional_str(data.get("actor") or data.get("actor_id")),
thread=_optional_str(data.get("thread")),
task=_optional_str(data.get("task")),
node_updates=tuple(_mapping_list(data.get("node_updates") or data.get("nodes"))),
edge_updates=tuple(_mapping_list(data.get("edge_updates") or data.get("edges"))),
package_refs=tuple(_string_list(data.get("package_refs") or data.get("packages"))),
activation_refs=tuple(_string_list(data.get("activation_refs") or data.get("activations"))),
branch=dict(data.get("branch") or {}),
policy=dict(data.get("policy") or {}),
metadata=dict(data.get("metadata") or {}),
)
def to_dict(self) -> dict[str, Any]:
return compact_dict(
{
"event_id": self.event_id,
"graph_id": self.graph_id,
"contract_graph_id": self.contract_graph_id,
"contract_event_id": self.contract_event_id,
"kind": self.kind,
"timestamp": self.timestamp,
"namespace": dict(self.namespace),
"actor_id": self.actor_id,
"thread": self.thread,
"task": self.task,
"node_updates": [dict(item) for item in self.node_updates],
"edge_updates": [dict(item) for item in self.edge_updates],
"package_refs": list(self.package_refs),
"activation_refs": list(self.activation_refs),
"branch": dict(self.branch),
"policy": dict(self.policy),
"metadata": dict(self.metadata),
"appended_at": self.appended_at,
}
)
@dataclass(frozen=True)
class MemoryGraphImportResult:
graph_id: str
contract_graph_id: str
schema_version: str
title: str = ""
intent: str = ""
namespace: dict[str, Any] = field(default_factory=dict)
profile: MemoryProfileRecord | None = None
nodes: tuple[MemoryNodeRecord, ...] = ()
edges: tuple[MemoryEdgeRecord, ...] = ()
events: tuple[MemoryEventRecord, ...] = ()
metadata: dict[str, Any] = field(default_factory=dict)
def __post_init__(self) -> None:
object.__setattr__(self, "nodes", tuple(self.nodes))
object.__setattr__(self, "edges", tuple(self.edges))
object.__setattr__(self, "events", tuple(self.events))
@classmethod
def from_markitect_contract(
cls,
graph_contract: dict[str, Any],
*,
profile_contract: dict[str, Any] | None = None,
) -> "MemoryGraphImportResult":
schema_version = str(graph_contract.get("schema_version") or graph_contract.get("schema") or "")
if schema_version != MARKITECT_MEMORY_GRAPH_SCHEMA_VERSION:
raise ValueError(f"Unsupported memory graph schema `{schema_version}`.")
contract_graph_id = str(graph_contract.get("id") or "")
if not contract_graph_id:
raise ValueError("Memory graph contract must declare an id.")
graph_id = runtime_id("memgraph", schema_version, contract_graph_id)
namespace = dict(graph_contract.get("namespace") or {})
nodes = tuple(
MemoryNodeRecord.from_markitect_contract(
graph_id=graph_id,
contract_graph_id=contract_graph_id,
data=node,
graph_namespace=namespace,
)
for node in _mapping_values(graph_contract.get("nodes"))
)
node_id_by_contract_id = {node.contract_node_id: str(node.node_id) for node in nodes}
edges = tuple(
MemoryEdgeRecord.from_markitect_contract(
graph_id=graph_id,
contract_graph_id=contract_graph_id,
data=edge,
node_id_by_contract_id=node_id_by_contract_id,
)
for edge in _mapping_values(graph_contract.get("edges"))
)
events = tuple(
MemoryEventRecord.from_markitect_contract(
graph_id=graph_id,
contract_graph_id=contract_graph_id,
data=event,
graph_namespace=namespace,
)
for event in _mapping_values(graph_contract.get("events"))
)
profile = MemoryProfileRecord.from_markitect_contract(profile_contract) if profile_contract else None
return cls(
graph_id=graph_id,
contract_graph_id=contract_graph_id,
schema_version=schema_version,
title=str(graph_contract.get("title") or graph_contract.get("name") or ""),
intent=str(graph_contract.get("intent") or graph_contract.get("description") or ""),
namespace=namespace,
profile=profile,
nodes=nodes,
edges=edges,
events=events,
metadata=dict(graph_contract.get("metadata") or {}),
)
def to_dict(self) -> dict[str, Any]:
return compact_dict(
{
"graph_id": self.graph_id,
"contract_graph_id": self.contract_graph_id,
"schema_version": self.schema_version,
"title": self.title,
"intent": self.intent,
"namespace": dict(self.namespace),
"profile": self.profile.to_dict() if self.profile else None,
"nodes": [node.to_dict() for node in self.nodes],
"edges": [edge.to_dict() for edge in self.edges],
"events": [event.to_dict() for event in self.events],
"metadata": dict(self.metadata),
}
)
def runtime_id(prefix: str, *parts: Any) -> str:
return f"{prefix}_{stable_suffix(*parts)}"
def stable_suffix(*parts: Any) -> str:
digest = mapping_digest(parts)
return digest.removeprefix("sha256:")[:24]
def _mapping_values(value: Any) -> list[dict[str, Any]]:
if value is None:
return []
if isinstance(value, dict):
items: list[dict[str, Any]] = []
for key, raw in value.items():
if not isinstance(raw, dict):
continue
item = dict(raw)
item.setdefault("id", str(key))
items.append(item)
return items
return _mapping_list(value)
def _mapping_list(value: Any) -> list[dict[str, Any]]:
if value is None:
return []
if isinstance(value, dict):
return [dict(value)]
if isinstance(value, list):
return [dict(item) for item in value if isinstance(item, dict)]
if isinstance(value, tuple):
return [dict(item) for item in value if isinstance(item, dict)]
return []
def _string_list(value: Any) -> list[str]:
if value is None:
return []
if isinstance(value, (list, tuple)):
return [str(item) for item in value if item is not None]
return [str(value)]
def _optional_str(value: Any) -> str | None:
if value is None:
return None
text = str(value)
return text if text else None
def _optional_int(value: Any) -> int | None:
if value is None or value == "":
return None
return int(value)
def _optional_float(value: Any) -> float | None:
if value is None or value == "":
return None
return float(value)