Files
phase-memory/src/phase_memory/adapters.py

636 lines
27 KiB
Python

"""Deterministic local adapters used by tests and first integrations."""
from __future__ import annotations
import json
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
from .models import Diagnostic, MemoryEdge, MemoryEvent, MemoryGraph, MemoryNode, MemoryPath, PolicyDecision, ProfileIntent
from .utils import parse_iso_datetime, stable_digest, utc_now_iso
LOCAL_STORE_SCHEMA = "phase_memory.local_store.v1"
LOCAL_STORE_METADATA_FILE = "phase-memory.json"
LOCAL_STORE_MIGRATION_PLAN_SCHEMA = "phase_memory.local_store.migration_plan.v1"
LOCAL_STORE_MIGRATION_RESULT_SCHEMA = "phase_memory.local_store.migration_result.v1"
AUDIT_EXPORT_BATCH_SCHEMA = "phase_memory.audit.export_batch.v1"
AUDIT_RETENTION_PLAN_SCHEMA = "phase_memory.audit.retention_plan.v1"
class InMemoryMemoryGraphStore:
def __init__(self) -> None:
self._profiles: dict[str, ProfileIntent] = {}
self._nodes: dict[str, MemoryNode] = {}
self._edges: dict[str, MemoryEdge] = {}
def save_profile(self, profile: ProfileIntent) -> ProfileIntent:
self._profiles[profile.profile_id] = profile
return profile
def get_profile(self, profile_id: str) -> ProfileIntent:
return self._profiles[profile_id]
def save_node(self, node: MemoryNode) -> MemoryNode:
self._nodes[node.node_id] = node
return node
def get_node(self, node_id: str) -> MemoryNode:
return self._nodes[node_id]
def list_nodes(self, *, kind: str | None = None) -> list[MemoryNode]:
nodes = list(self._nodes.values())
if kind:
nodes = [node for node in nodes if node.kind == kind]
return sorted(nodes, key=lambda node: node.node_id)
def save_edge(self, edge: MemoryEdge) -> MemoryEdge:
self._edges[edge.edge_id] = edge
return edge
def list_edges(self, *, source: str | None = None, target: str | None = None) -> list[MemoryEdge]:
edges = list(self._edges.values())
if source:
edges = [edge for edge in edges if edge.source == source]
if target:
edges = [edge for edge in edges if edge.target == target]
return sorted(edges, key=lambda edge: edge.edge_id)
class InMemoryMemoryEventLog:
def __init__(self) -> None:
self._events: list[MemoryEvent] = []
def append(self, event: MemoryEvent) -> MemoryEvent:
if any(existing.event_id == event.event_id for existing in self._events):
raise ValueError(f"Duplicate memory event id: {event.event_id}")
self._events.append(event)
return event
def list_events(self, *, kind: str | None = None) -> list[MemoryEvent]:
events = list(self._events)
if kind:
events = [event for event in events if event.kind == kind]
return events
class FileBackedMemoryGraphStore:
"""Versioned JSON-backed local graph store.
Layout:
- `phase-memory.json`
- `profiles/<profile-id>.json`
- `nodes/<node-id>.json`
- `edges/<edge-id>.json`
- `paths/<path-id>.json`
- `activations/`
- `audit.jsonl`
"""
def __init__(self, root: str | Path) -> None:
self.root = Path(root)
self.profiles_dir = self.root / "profiles"
self.nodes_dir = self.root / "nodes"
self.edges_dir = self.root / "edges"
self.paths_dir = self.root / "paths"
self.activations_dir = self.root / "activations"
self.audit_path = self.root / "audit.jsonl"
self._ensure_layout()
def save_profile(self, profile: ProfileIntent) -> ProfileIntent:
_write_json(self.profiles_dir / f"{_safe_name(profile.profile_id)}.json", profile.to_dict())
return profile
def get_profile(self, profile_id: str) -> ProfileIntent:
return ProfileIntent.from_mapping(_read_json(self.profiles_dir / f"{_safe_name(profile_id)}.json"))
def save_node(self, node: MemoryNode) -> MemoryNode:
_write_json(self.nodes_dir / f"{_safe_name(node.node_id)}.json", node.to_dict())
return node
def get_node(self, node_id: str) -> MemoryNode:
return MemoryNode.from_mapping(_read_json(self.nodes_dir / f"{_safe_name(node_id)}.json"))
def list_nodes(self, *, kind: str | None = None) -> list[MemoryNode]:
nodes = [MemoryNode.from_mapping(_read_json(path)) for path in sorted(self.nodes_dir.glob("*.json"))]
if kind:
nodes = [node for node in nodes if node.kind == kind]
return sorted(nodes, key=lambda node: node.node_id)
def save_edge(self, edge: MemoryEdge) -> MemoryEdge:
_write_json(self.edges_dir / f"{_safe_name(edge.edge_id)}.json", edge.to_dict())
return edge
def list_edges(self, *, source: str | None = None, target: str | None = None) -> list[MemoryEdge]:
edges = [MemoryEdge.from_mapping(_read_json(path)) for path in sorted(self.edges_dir.glob("*.json"))]
if source:
edges = [edge for edge in edges if edge.source == source]
if target:
edges = [edge for edge in edges if edge.target == target]
return sorted(edges, key=lambda edge: edge.edge_id)
def save_path(self, path: MemoryPath) -> MemoryPath:
_write_json(self.paths_dir / f"{_safe_name(path.path_id)}.json", path.to_dict())
return path
def get_path(self, path_id: str) -> MemoryPath:
return MemoryPath.from_mapping(_read_json(self.paths_dir / f"{_safe_name(path_id)}.json"))
def list_paths(self) -> list[MemoryPath]:
return [MemoryPath.from_mapping(_read_json(path)) for path in sorted(self.paths_dir.glob("*.json"))]
def export_graph(self, *, graph_id: str = "local", events: list[MemoryEvent] | None = None) -> MemoryGraph:
return MemoryGraph(
graph_id=graph_id,
nodes=tuple(self.list_nodes()),
edges=tuple(self.list_edges()),
events=tuple(events or ()),
metadata={"store_schema_version": LOCAL_STORE_SCHEMA, "store_path": str(self.root)},
)
def metadata(self) -> dict[str, Any]:
return _read_json(self.root / LOCAL_STORE_METADATA_FILE)
def migration_plan(self) -> dict[str, Any]:
metadata_path = self.root / LOCAL_STORE_METADATA_FILE
diagnostics = list(self.metadata_diagnostics())
metadata: dict[str, Any] = {}
schema_version = ""
if metadata_path.exists():
try:
metadata = _read_json(metadata_path)
schema_version = str(metadata.get("schema_version") or "")
except json.JSONDecodeError:
pass
actions: list[dict[str, Any]] = []
if not any(diagnostic.code == "corrupt_store_metadata" for diagnostic in diagnostics):
if schema_version != LOCAL_STORE_SCHEMA:
actions.append(
{
"id": "set_schema_version",
"action": "set_schema_version",
"from_schema_version": schema_version,
"to_schema_version": LOCAL_STORE_SCHEMA,
}
)
planned = metadata.get("planned_migrations") or metadata.get("migrations") or ()
for item in planned:
migration_id = str(item)
actions.append(
{
"id": f"complete_planned:{migration_id}",
"action": "complete_planned_migration",
"migration": migration_id,
}
)
plan_id = f"store-migration:{stable_digest([str(self.root), schema_version, actions])}"
return {
"schema_version": LOCAL_STORE_MIGRATION_PLAN_SCHEMA,
"id": plan_id,
"store_path": str(self.root),
"metadata_path": str(metadata_path),
"current_schema_version": schema_version,
"target_schema_version": LOCAL_STORE_SCHEMA,
"valid": not any(diagnostic.severity == "error" for diagnostic in diagnostics),
"dry_run": True,
"actions": actions,
"diagnostics": [diagnostic.to_dict() for diagnostic in diagnostics],
}
def apply_migration_plan(self, plan: dict[str, Any] | None = None, *, actor: str = "local") -> dict[str, Any]:
plan = dict(plan or self.migration_plan())
diagnostics = [dict(item) for item in plan.get("diagnostics", ())]
errors = [item for item in diagnostics if item.get("severity") == "error"]
if errors:
return {
"schema_version": LOCAL_STORE_MIGRATION_RESULT_SCHEMA,
"plan_id": plan.get("id", ""),
"store_path": str(self.root),
"applied": False,
"changed": False,
"actions": [],
"diagnostics": diagnostics,
}
metadata_path = self.root / LOCAL_STORE_METADATA_FILE
try:
metadata = _read_json(metadata_path) if metadata_path.exists() else {}
except json.JSONDecodeError:
metadata = {}
actions = [dict(item) for item in plan.get("actions", ())]
completed = list(metadata.get("completed_migrations") or ())
for action in actions:
if action.get("action") == "set_schema_version":
metadata["schema_version"] = LOCAL_STORE_SCHEMA
if action.get("action") == "complete_planned_migration":
completed.append(str(action.get("migration") or ""))
if actions:
metadata["schema_version"] = LOCAL_STORE_SCHEMA
metadata["migrations"] = []
metadata.pop("planned_migrations", None)
if completed:
metadata["completed_migrations"] = sorted({item for item in completed if item})
metadata["last_migration"] = {
"plan_id": str(plan.get("id") or ""),
"actor": actor,
"applied_at": utc_now_iso(),
"actions": [str(action.get("id") or "") for action in actions],
}
_write_json(metadata_path, metadata)
return {
"schema_version": LOCAL_STORE_MIGRATION_RESULT_SCHEMA,
"plan_id": plan.get("id", ""),
"store_path": str(self.root),
"applied": True,
"changed": bool(actions),
"actions": actions,
"metadata": metadata,
"diagnostics": diagnostics,
}
def repair_diagnostics(self, *, events: list[MemoryEvent] | None = None) -> tuple[Diagnostic, ...]:
diagnostics: list[Diagnostic] = []
nodes, node_diagnostics = _read_records(self.nodes_dir, MemoryNode.from_mapping, record_type="node")
edges, edge_diagnostics = _read_records(self.edges_dir, MemoryEdge.from_mapping, record_type="edge")
paths, path_diagnostics = _read_records(self.paths_dir, MemoryPath.from_mapping, record_type="path")
diagnostics.extend(self.metadata_diagnostics())
diagnostics.extend(node_diagnostics)
diagnostics.extend(edge_diagnostics)
diagnostics.extend(path_diagnostics)
node_ids = {node.node_id for node in nodes}
event_ids = {event.event_id for event in events or ()}
for edge in edges:
if edge.source not in node_ids:
diagnostics.append(Diagnostic("error", "missing_edge_source", "Edge source does not reference a node.", edge.edge_id, {"source": edge.source}))
if edge.target not in node_ids:
diagnostics.append(Diagnostic("error", "missing_edge_target", "Edge target does not reference a node.", edge.edge_id, {"target": edge.target}))
for path in paths:
for event_id in path.event_ids:
if event_id not in event_ids:
diagnostics.append(Diagnostic("warn", "orphaned_path_event", "Path references an event not present in the event log.", path.path_id, {"event_id": event_id}))
return tuple(diagnostics)
def metadata_diagnostics(self) -> tuple[Diagnostic, ...]:
metadata_path = self.root / LOCAL_STORE_METADATA_FILE
if not metadata_path.exists():
return (
Diagnostic(
"error",
"missing_store_metadata",
"Local store metadata file is missing.",
str(metadata_path),
{"expected_schema_version": LOCAL_STORE_SCHEMA},
),
)
try:
metadata = _read_json(metadata_path)
except json.JSONDecodeError as exc:
return (
Diagnostic(
"error",
"corrupt_store_metadata",
"Local store metadata file is not valid JSON.",
str(metadata_path),
{"error": str(exc)},
),
)
diagnostics: list[Diagnostic] = []
schema_version = str(metadata.get("schema_version") or "")
if not schema_version:
diagnostics.append(
Diagnostic(
"warn",
"store_migration_required",
"Local store metadata does not declare a schema version.",
str(metadata_path),
{"from_schema_version": "", "to_schema_version": LOCAL_STORE_SCHEMA},
)
)
elif schema_version != LOCAL_STORE_SCHEMA:
diagnostics.append(
Diagnostic(
"warn",
"store_migration_required",
"Local store metadata declares a schema version that needs migration.",
str(metadata_path),
{"from_schema_version": schema_version, "to_schema_version": LOCAL_STORE_SCHEMA},
)
)
planned = metadata.get("planned_migrations") or metadata.get("migrations") or ()
if planned:
diagnostics.append(
Diagnostic(
"warn",
"planned_store_migrations",
"Local store metadata declares planned migrations.",
str(metadata_path),
{"migrations": list(planned)},
)
)
return tuple(diagnostics)
def _ensure_layout(self) -> None:
for directory in (self.root, self.profiles_dir, self.nodes_dir, self.edges_dir, self.paths_dir, self.activations_dir):
directory.mkdir(parents=True, exist_ok=True)
metadata_path = self.root / LOCAL_STORE_METADATA_FILE
if not metadata_path.exists():
_write_json(metadata_path, {"schema_version": LOCAL_STORE_SCHEMA, "migrations": []})
class JsonlMemoryEventLog:
def __init__(self, path: str | Path) -> None:
self.path = Path(path)
self.path.parent.mkdir(parents=True, exist_ok=True)
self.path.touch(exist_ok=True)
def append(self, event: MemoryEvent) -> MemoryEvent:
if any(existing.event_id == event.event_id for existing in self.list_events()):
raise ValueError(f"Duplicate memory event id: {event.event_id}")
with self.path.open("a", encoding="utf-8") as handle:
handle.write(json.dumps(event.to_dict(), sort_keys=True, separators=(",", ":")) + "\n")
return event
def list_events(self, *, kind: str | None = None) -> list[MemoryEvent]:
events: list[MemoryEvent] = []
for data in self._iter_valid_event_dicts():
event = MemoryEvent.from_mapping(data)
if kind is None or event.kind == kind:
events.append(event)
return sorted(events, key=lambda event: (event.timestamp, event.event_id))
def replay_graph(self, store: FileBackedMemoryGraphStore, *, graph_id: str = "local") -> MemoryGraph:
return store.export_graph(graph_id=graph_id, events=self.list_events())
def diagnostics(self) -> tuple[Diagnostic, ...]:
diagnostics: list[Diagnostic] = []
seen: set[str] = set()
for line_number, raw in self._iter_lines():
try:
data = json.loads(raw)
except json.JSONDecodeError as exc:
diagnostics.append(Diagnostic("error", "malformed_event_log_line", "Event log line is not valid JSON.", f"line:{line_number}", {"error": str(exc)}))
continue
schema = data.get("schema_version")
if schema and schema != "markitect.memory.event.v1":
diagnostics.append(Diagnostic("warn", "unknown_event_schema", "Event declares an unknown schema version.", f"line:{line_number}", {"schema_version": schema}))
event_id = str(data.get("id") or data.get("event_id") or "")
if event_id in seen:
diagnostics.append(Diagnostic("error", "duplicate_event_id", "Event log contains a duplicate event id.", f"line:{line_number}", {"event_id": event_id}))
if event_id:
seen.add(event_id)
return tuple(diagnostics)
def _iter_valid_event_dicts(self):
for _, raw in self._iter_lines():
try:
data = json.loads(raw)
except json.JSONDecodeError:
continue
yield data
def _iter_lines(self):
for line_number, raw in enumerate(self.path.read_text(encoding="utf-8").splitlines(), start=1):
if raw.strip():
yield line_number, raw
class NoopContextPackageCompiler:
def compile_selection(self, selection: dict[str, Any]) -> dict[str, Any]:
return {
"package_id": f"package:{selection.get('id', 'anonymous')}",
"selection": dict(selection),
"item_count": len(selection.get("nodes", ())) + len(selection.get("events", ())),
}
class AllowAllPolicyGateway:
def authorize(self, *, action: str, resource: str, context: dict[str, Any] | None = None) -> PolicyDecision:
return PolicyDecision(True, reason="local allow-all policy", metadata={"action": action, "resource": resource, "context": context or {}})
class RecordingAuditSink:
def __init__(self) -> None:
self.events: list[dict[str, Any]] = []
def record(self, event: dict[str, Any]) -> dict[str, Any]:
stored = dict(event)
self.events.append(stored)
return {"recorded": True, "index": len(self.events) - 1, "event": stored}
def query(self, **filters: Any) -> list[dict[str, Any]]:
return filter_audit_events(self.events, **filters)
def retention_metadata(self) -> dict[str, Any]:
return {"mode": "in_memory", "retention_days": None}
def retention_plan(self, *, retention_days: int | None = None, now: datetime | None = None) -> dict[str, Any]:
return audit_retention_plan(self.events, retention_days=retention_days, now=now, retention=self.retention_metadata())
def export_batch(self, **filters: Any) -> dict[str, Any]:
events = self.query(**filters)
return audit_export_batch(events, filters=filters, retention=self.retention_metadata())
class JsonlAuditSink:
def __init__(self, path: str | Path) -> None:
self.path = Path(path)
self.path.parent.mkdir(parents=True, exist_ok=True)
self.path.touch(exist_ok=True)
def record(self, event: dict[str, Any]) -> dict[str, Any]:
stored = dict(event)
with self.path.open("a", encoding="utf-8") as handle:
handle.write(json.dumps(stored, sort_keys=True, separators=(",", ":")) + "\n")
with self.path.open(encoding="utf-8") as handle:
index = max(sum(1 for _ in handle) - 1, 0)
return {"recorded": True, "index": index, "event": stored}
def query(self, **filters: Any) -> list[dict[str, Any]]:
events: list[dict[str, Any]] = []
for raw in self.path.read_text(encoding="utf-8").splitlines():
if not raw.strip():
continue
try:
events.append(json.loads(raw))
except json.JSONDecodeError:
continue
return filter_audit_events(events, **filters)
def retention_metadata(self) -> dict[str, Any]:
return {"mode": "jsonl", "path": str(self.path), "retention_days": None}
def retention_plan(self, *, retention_days: int | None = None, now: datetime | None = None) -> dict[str, Any]:
return audit_retention_plan(self.query(), retention_days=retention_days, now=now, retention=self.retention_metadata())
def export_batch(self, **filters: Any) -> dict[str, Any]:
events = self.query(**filters)
return audit_export_batch(events, filters=filters, retention=self.retention_metadata())
class InMemorySemanticIndex:
def __init__(self) -> None:
self._nodes_by_graph: dict[str, list[MemoryNode]] = {}
def upsert_nodes(self, nodes: list[MemoryNode]) -> dict[str, Any]:
for node in nodes:
graph_id = str(node.metadata.get("graph_id") or "local")
existing = [item for item in self._nodes_by_graph.get(graph_id, []) if item.node_id != node.node_id]
existing.append(node)
self._nodes_by_graph[graph_id] = sorted(existing, key=lambda item: item.node_id)
return {"upserted": len(nodes)}
def query(self, *, graph_id: str, query: str, limit: int = 10) -> list[dict[str, Any]]:
terms = {term.lower() for term in query.split() if term.strip()}
results: list[dict[str, Any]] = []
for node in self._nodes_by_graph.get(graph_id, []):
text = f"{node.kind} {node.text}".lower()
score = sum(1 for term in terms if term in text)
if score:
results.append({"id": node.node_id, "score": score, "kind": node.kind})
return sorted(results, key=lambda item: (-item["score"], item["id"]))[:limit]
class InMemoryRuntimeRegistry:
def __init__(self) -> None:
self._envelopes: dict[str, dict[str, Any]] = {}
def publish_runtime_envelope(self, envelope: dict[str, Any]) -> dict[str, Any]:
reference = str(envelope.get("operation_id") or envelope.get("id") or f"envelope:{len(self._envelopes)}")
stored = dict(envelope)
self._envelopes[reference] = stored
return {"published": True, "reference": reference, "envelope": stored}
def fetch_runtime_envelope(self, reference: str) -> dict[str, Any]:
return dict(self._envelopes[reference])
def _safe_name(identifier: str) -> str:
safe = "".join(char if char.isalnum() or char in ("-", "_", ".") else "_" for char in identifier)
return safe or "anonymous"
def _read_json(path: Path) -> dict[str, Any]:
return json.loads(path.read_text(encoding="utf-8"))
def _write_json(path: Path, data: dict[str, Any]) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
tmp_path = path.with_name(f".{path.name}.tmp")
tmp_path.write_text(json.dumps(data, indent=2, sort_keys=True) + "\n", encoding="utf-8")
tmp_path.replace(path)
def _read_records(directory: Path, factory, *, record_type: str) -> tuple[list[Any], list[Diagnostic]]:
records: list[Any] = []
diagnostics: list[Diagnostic] = []
for path in sorted(directory.glob("*.json")):
try:
records.append(factory(_read_json(path)))
except (json.JSONDecodeError, ValueError, TypeError, KeyError) as exc:
diagnostics.append(
Diagnostic(
"error",
"corrupt_store_record",
"Local store record could not be decoded.",
str(path),
{"record_type": record_type, "error": str(exc)},
)
)
return records, diagnostics
def filter_audit_events(events: list[dict[str, Any]], **filters: Any) -> list[dict[str, Any]]:
return [dict(event) for event in events if _audit_event_matches(event, filters)]
def audit_export_batch(
events: list[dict[str, Any]],
*,
filters: dict[str, Any] | None = None,
retention: dict[str, Any] | None = None,
) -> dict[str, Any]:
return {
"schema_version": AUDIT_EXPORT_BATCH_SCHEMA,
"id": f"audit-export:{stable_digest([filters or {}, events])}",
"filters": dict(filters or {}),
"count": len(events),
"events": [dict(event) for event in events],
"retention": dict(retention or {}),
}
def audit_retention_plan(
events: list[dict[str, Any]],
*,
retention_days: int | None = None,
now: datetime | None = None,
retention: dict[str, Any] | None = None,
) -> dict[str, Any]:
retention = dict(retention or {})
if retention_days is None:
retention_days = retention.get("retention_days")
now = now or datetime.now(timezone.utc)
eligible: list[str] = []
retained: list[str] = []
for event in events:
event_id = str(event.get("operation_id") or event.get("id") or stable_digest(event))
age = _event_age_days(event, now=now)
if retention_days is not None and age is not None and age >= int(retention_days):
eligible.append(event_id)
else:
retained.append(event_id)
return {
"schema_version": AUDIT_RETENTION_PLAN_SCHEMA,
"id": f"audit-retention:{stable_digest([retention_days, eligible, retained])}",
"retention_days": retention_days,
"eligible_count": len(eligible),
"retained_count": len(retained),
"eligible_operation_ids": eligible,
"retained_operation_ids": retained,
"retention": retention,
}
def _audit_event_matches(event: dict[str, Any], filters: dict[str, Any]) -> bool:
operation = filters.get("operation")
if operation is not None and event.get("operation") != operation:
return False
operation_id = filters.get("operation_id")
if operation_id is not None and event.get("operation_id") != operation_id:
return False
subject_kind = filters.get("subject_kind")
if subject_kind is not None and dict(event.get("subject") or {}).get("kind") != subject_kind:
return False
subject_id = filters.get("subject_id")
if subject_id is not None and dict(event.get("subject") or {}).get("id") != subject_id:
return False
source_ref = filters.get("source_ref")
if source_ref is not None and dict(event.get("source") or {}).get("ref") != source_ref:
return False
actor = filters.get("actor")
if actor is not None and event.get("actor") != actor:
return False
dry_run = filters.get("dry_run")
if dry_run is not None and bool(event.get("dry_run")) is not bool(dry_run):
return False
allowed = filters.get("allowed")
if allowed is not None and bool(event.get("allowed")) is not bool(allowed):
return False
return True
def _event_age_days(event: dict[str, Any], *, now: datetime) -> int | None:
timestamp = parse_iso_datetime(str(event.get("timestamp") or ""))
if timestamp is None:
return None
return max((now - timestamp).days, 0)