Files
phase-memory/src/phase_memory/adapters.py

312 lines
13 KiB
Python

"""Deterministic local adapters used by tests and first integrations."""
from __future__ import annotations
import json
from pathlib import Path
from typing import Any
from .models import Diagnostic, MemoryEdge, MemoryEvent, MemoryGraph, MemoryNode, MemoryPath, PolicyDecision, ProfileIntent
LOCAL_STORE_SCHEMA = "phase_memory.local_store.v1"
class InMemoryMemoryGraphStore:
def __init__(self) -> None:
self._profiles: dict[str, ProfileIntent] = {}
self._nodes: dict[str, MemoryNode] = {}
self._edges: dict[str, MemoryEdge] = {}
def save_profile(self, profile: ProfileIntent) -> ProfileIntent:
self._profiles[profile.profile_id] = profile
return profile
def get_profile(self, profile_id: str) -> ProfileIntent:
return self._profiles[profile_id]
def save_node(self, node: MemoryNode) -> MemoryNode:
self._nodes[node.node_id] = node
return node
def get_node(self, node_id: str) -> MemoryNode:
return self._nodes[node_id]
def list_nodes(self, *, kind: str | None = None) -> list[MemoryNode]:
nodes = list(self._nodes.values())
if kind:
nodes = [node for node in nodes if node.kind == kind]
return sorted(nodes, key=lambda node: node.node_id)
def save_edge(self, edge: MemoryEdge) -> MemoryEdge:
self._edges[edge.edge_id] = edge
return edge
def list_edges(self, *, source: str | None = None, target: str | None = None) -> list[MemoryEdge]:
edges = list(self._edges.values())
if source:
edges = [edge for edge in edges if edge.source == source]
if target:
edges = [edge for edge in edges if edge.target == target]
return sorted(edges, key=lambda edge: edge.edge_id)
class InMemoryMemoryEventLog:
def __init__(self) -> None:
self._events: list[MemoryEvent] = []
def append(self, event: MemoryEvent) -> MemoryEvent:
if any(existing.event_id == event.event_id for existing in self._events):
raise ValueError(f"Duplicate memory event id: {event.event_id}")
self._events.append(event)
return event
def list_events(self, *, kind: str | None = None) -> list[MemoryEvent]:
events = list(self._events)
if kind:
events = [event for event in events if event.kind == kind]
return events
class FileBackedMemoryGraphStore:
"""Versioned JSON-backed local graph store.
Layout:
- `phase-memory.json`
- `profiles/<profile-id>.json`
- `nodes/<node-id>.json`
- `edges/<edge-id>.json`
- `paths/<path-id>.json`
- `activations/`
- `audit.jsonl`
"""
def __init__(self, root: str | Path) -> None:
self.root = Path(root)
self.profiles_dir = self.root / "profiles"
self.nodes_dir = self.root / "nodes"
self.edges_dir = self.root / "edges"
self.paths_dir = self.root / "paths"
self.activations_dir = self.root / "activations"
self.audit_path = self.root / "audit.jsonl"
self._ensure_layout()
def save_profile(self, profile: ProfileIntent) -> ProfileIntent:
_write_json(self.profiles_dir / f"{_safe_name(profile.profile_id)}.json", profile.to_dict())
return profile
def get_profile(self, profile_id: str) -> ProfileIntent:
return ProfileIntent.from_mapping(_read_json(self.profiles_dir / f"{_safe_name(profile_id)}.json"))
def save_node(self, node: MemoryNode) -> MemoryNode:
_write_json(self.nodes_dir / f"{_safe_name(node.node_id)}.json", node.to_dict())
return node
def get_node(self, node_id: str) -> MemoryNode:
return MemoryNode.from_mapping(_read_json(self.nodes_dir / f"{_safe_name(node_id)}.json"))
def list_nodes(self, *, kind: str | None = None) -> list[MemoryNode]:
nodes = [MemoryNode.from_mapping(_read_json(path)) for path in sorted(self.nodes_dir.glob("*.json"))]
if kind:
nodes = [node for node in nodes if node.kind == kind]
return sorted(nodes, key=lambda node: node.node_id)
def save_edge(self, edge: MemoryEdge) -> MemoryEdge:
_write_json(self.edges_dir / f"{_safe_name(edge.edge_id)}.json", edge.to_dict())
return edge
def list_edges(self, *, source: str | None = None, target: str | None = None) -> list[MemoryEdge]:
edges = [MemoryEdge.from_mapping(_read_json(path)) for path in sorted(self.edges_dir.glob("*.json"))]
if source:
edges = [edge for edge in edges if edge.source == source]
if target:
edges = [edge for edge in edges if edge.target == target]
return sorted(edges, key=lambda edge: edge.edge_id)
def save_path(self, path: MemoryPath) -> MemoryPath:
_write_json(self.paths_dir / f"{_safe_name(path.path_id)}.json", path.to_dict())
return path
def get_path(self, path_id: str) -> MemoryPath:
return MemoryPath.from_mapping(_read_json(self.paths_dir / f"{_safe_name(path_id)}.json"))
def list_paths(self) -> list[MemoryPath]:
return [MemoryPath.from_mapping(_read_json(path)) for path in sorted(self.paths_dir.glob("*.json"))]
def export_graph(self, *, graph_id: str = "local", events: list[MemoryEvent] | None = None) -> MemoryGraph:
return MemoryGraph(
graph_id=graph_id,
nodes=tuple(self.list_nodes()),
edges=tuple(self.list_edges()),
events=tuple(events or ()),
metadata={"store_schema_version": LOCAL_STORE_SCHEMA, "store_path": str(self.root)},
)
def repair_diagnostics(self, *, events: list[MemoryEvent] | None = None) -> tuple[Diagnostic, ...]:
diagnostics: list[Diagnostic] = []
node_ids = {node.node_id for node in self.list_nodes()}
event_ids = {event.event_id for event in events or ()}
for edge in self.list_edges():
if edge.source not in node_ids:
diagnostics.append(Diagnostic("error", "missing_edge_source", "Edge source does not reference a node.", edge.edge_id, {"source": edge.source}))
if edge.target not in node_ids:
diagnostics.append(Diagnostic("error", "missing_edge_target", "Edge target does not reference a node.", edge.edge_id, {"target": edge.target}))
for path in self.list_paths():
for event_id in path.event_ids:
if event_id not in event_ids:
diagnostics.append(Diagnostic("warn", "orphaned_path_event", "Path references an event not present in the event log.", path.path_id, {"event_id": event_id}))
return tuple(diagnostics)
def _ensure_layout(self) -> None:
for directory in (self.root, self.profiles_dir, self.nodes_dir, self.edges_dir, self.paths_dir, self.activations_dir):
directory.mkdir(parents=True, exist_ok=True)
metadata_path = self.root / "phase-memory.json"
if not metadata_path.exists():
_write_json(metadata_path, {"schema_version": LOCAL_STORE_SCHEMA})
class JsonlMemoryEventLog:
def __init__(self, path: str | Path) -> None:
self.path = Path(path)
self.path.parent.mkdir(parents=True, exist_ok=True)
self.path.touch(exist_ok=True)
def append(self, event: MemoryEvent) -> MemoryEvent:
if any(existing.event_id == event.event_id for existing in self.list_events()):
raise ValueError(f"Duplicate memory event id: {event.event_id}")
with self.path.open("a", encoding="utf-8") as handle:
handle.write(json.dumps(event.to_dict(), sort_keys=True, separators=(",", ":")) + "\n")
return event
def list_events(self, *, kind: str | None = None) -> list[MemoryEvent]:
events: list[MemoryEvent] = []
for data in self._iter_valid_event_dicts():
event = MemoryEvent.from_mapping(data)
if kind is None or event.kind == kind:
events.append(event)
return sorted(events, key=lambda event: (event.timestamp, event.event_id))
def replay_graph(self, store: FileBackedMemoryGraphStore, *, graph_id: str = "local") -> MemoryGraph:
return store.export_graph(graph_id=graph_id, events=self.list_events())
def diagnostics(self) -> tuple[Diagnostic, ...]:
diagnostics: list[Diagnostic] = []
seen: set[str] = set()
for line_number, raw in self._iter_lines():
try:
data = json.loads(raw)
except json.JSONDecodeError as exc:
diagnostics.append(Diagnostic("error", "malformed_event_log_line", "Event log line is not valid JSON.", f"line:{line_number}", {"error": str(exc)}))
continue
schema = data.get("schema_version")
if schema and schema != "markitect.memory.event.v1":
diagnostics.append(Diagnostic("warn", "unknown_event_schema", "Event declares an unknown schema version.", f"line:{line_number}", {"schema_version": schema}))
event_id = str(data.get("id") or data.get("event_id") or "")
if event_id in seen:
diagnostics.append(Diagnostic("error", "duplicate_event_id", "Event log contains a duplicate event id.", f"line:{line_number}", {"event_id": event_id}))
if event_id:
seen.add(event_id)
return tuple(diagnostics)
def _iter_valid_event_dicts(self):
for _, raw in self._iter_lines():
try:
data = json.loads(raw)
except json.JSONDecodeError:
continue
yield data
def _iter_lines(self):
for line_number, raw in enumerate(self.path.read_text(encoding="utf-8").splitlines(), start=1):
if raw.strip():
yield line_number, raw
class NoopContextPackageCompiler:
def compile_selection(self, selection: dict[str, Any]) -> dict[str, Any]:
return {
"package_id": f"package:{selection.get('id', 'anonymous')}",
"selection": dict(selection),
"item_count": len(selection.get("nodes", ())) + len(selection.get("events", ())),
}
class AllowAllPolicyGateway:
def authorize(self, *, action: str, resource: str, context: dict[str, Any] | None = None) -> PolicyDecision:
return PolicyDecision(True, reason="local allow-all policy", metadata={"action": action, "resource": resource, "context": context or {}})
class RecordingAuditSink:
def __init__(self) -> None:
self.events: list[dict[str, Any]] = []
def record(self, event: dict[str, Any]) -> dict[str, Any]:
stored = dict(event)
self.events.append(stored)
return {"recorded": True, "index": len(self.events) - 1, "event": stored}
class JsonlAuditSink:
def __init__(self, path: str | Path) -> None:
self.path = Path(path)
self.path.parent.mkdir(parents=True, exist_ok=True)
self.path.touch(exist_ok=True)
def record(self, event: dict[str, Any]) -> dict[str, Any]:
stored = dict(event)
with self.path.open("a", encoding="utf-8") as handle:
handle.write(json.dumps(stored, sort_keys=True, separators=(",", ":")) + "\n")
with self.path.open(encoding="utf-8") as handle:
index = max(sum(1 for _ in handle) - 1, 0)
return {"recorded": True, "index": index, "event": stored}
class InMemorySemanticIndex:
def __init__(self) -> None:
self._nodes_by_graph: dict[str, list[MemoryNode]] = {}
def upsert_nodes(self, nodes: list[MemoryNode]) -> dict[str, Any]:
for node in nodes:
graph_id = str(node.metadata.get("graph_id") or "local")
existing = [item for item in self._nodes_by_graph.get(graph_id, []) if item.node_id != node.node_id]
existing.append(node)
self._nodes_by_graph[graph_id] = sorted(existing, key=lambda item: item.node_id)
return {"upserted": len(nodes)}
def query(self, *, graph_id: str, query: str, limit: int = 10) -> list[dict[str, Any]]:
terms = {term.lower() for term in query.split() if term.strip()}
results: list[dict[str, Any]] = []
for node in self._nodes_by_graph.get(graph_id, []):
text = f"{node.kind} {node.text}".lower()
score = sum(1 for term in terms if term in text)
if score:
results.append({"id": node.node_id, "score": score, "kind": node.kind})
return sorted(results, key=lambda item: (-item["score"], item["id"]))[:limit]
class InMemoryRuntimeRegistry:
def __init__(self) -> None:
self._envelopes: dict[str, dict[str, Any]] = {}
def publish_runtime_envelope(self, envelope: dict[str, Any]) -> dict[str, Any]:
reference = str(envelope.get("operation_id") or envelope.get("id") or f"envelope:{len(self._envelopes)}")
stored = dict(envelope)
self._envelopes[reference] = stored
return {"published": True, "reference": reference, "envelope": stored}
def fetch_runtime_envelope(self, reference: str) -> dict[str, Any]:
return dict(self._envelopes[reference])
def _safe_name(identifier: str) -> str:
safe = "".join(char if char.isalnum() or char in ("-", "_", ".") else "_" for char in identifier)
return safe or "anonymous"
def _read_json(path: Path) -> dict[str, Any]:
return json.loads(path.read_text(encoding="utf-8"))
def _write_json(path: Path, data: dict[str, Any]) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps(data, indent=2, sort_keys=True) + "\n", encoding="utf-8")