"""Deterministic local adapters used by tests and first integrations.""" from __future__ import annotations import json from datetime import datetime, timezone from pathlib import Path from typing import Any from .models import Diagnostic, MemoryEdge, MemoryEvent, MemoryGraph, MemoryNode, MemoryPath, PolicyDecision, ProfileIntent from .utils import parse_iso_datetime, stable_digest, utc_now_iso LOCAL_STORE_SCHEMA = "phase_memory.local_store.v1" LOCAL_STORE_METADATA_FILE = "phase-memory.json" LOCAL_STORE_MIGRATION_PLAN_SCHEMA = "phase_memory.local_store.migration_plan.v1" LOCAL_STORE_MIGRATION_RESULT_SCHEMA = "phase_memory.local_store.migration_result.v1" AUDIT_EXPORT_BATCH_SCHEMA = "phase_memory.audit.export_batch.v1" AUDIT_RETENTION_PLAN_SCHEMA = "phase_memory.audit.retention_plan.v1" class InMemoryMemoryGraphStore: def __init__(self) -> None: self._profiles: dict[str, ProfileIntent] = {} self._nodes: dict[str, MemoryNode] = {} self._edges: dict[str, MemoryEdge] = {} def save_profile(self, profile: ProfileIntent) -> ProfileIntent: self._profiles[profile.profile_id] = profile return profile def get_profile(self, profile_id: str) -> ProfileIntent: return self._profiles[profile_id] def save_node(self, node: MemoryNode) -> MemoryNode: self._nodes[node.node_id] = node return node def get_node(self, node_id: str) -> MemoryNode: return self._nodes[node_id] def list_nodes(self, *, kind: str | None = None) -> list[MemoryNode]: nodes = list(self._nodes.values()) if kind: nodes = [node for node in nodes if node.kind == kind] return sorted(nodes, key=lambda node: node.node_id) def save_edge(self, edge: MemoryEdge) -> MemoryEdge: self._edges[edge.edge_id] = edge return edge def list_edges(self, *, source: str | None = None, target: str | None = None) -> list[MemoryEdge]: edges = list(self._edges.values()) if source: edges = [edge for edge in edges if edge.source == source] if target: edges = [edge for edge in edges if edge.target == target] return sorted(edges, key=lambda edge: edge.edge_id) class InMemoryMemoryEventLog: def __init__(self) -> None: self._events: list[MemoryEvent] = [] def append(self, event: MemoryEvent) -> MemoryEvent: if any(existing.event_id == event.event_id for existing in self._events): raise ValueError(f"Duplicate memory event id: {event.event_id}") self._events.append(event) return event def list_events(self, *, kind: str | None = None) -> list[MemoryEvent]: events = list(self._events) if kind: events = [event for event in events if event.kind == kind] return events class FileBackedMemoryGraphStore: """Versioned JSON-backed local graph store. Layout: - `phase-memory.json` - `profiles/.json` - `nodes/.json` - `edges/.json` - `paths/.json` - `activations/` - `audit.jsonl` """ def __init__(self, root: str | Path) -> None: self.root = Path(root) self.profiles_dir = self.root / "profiles" self.nodes_dir = self.root / "nodes" self.edges_dir = self.root / "edges" self.paths_dir = self.root / "paths" self.activations_dir = self.root / "activations" self.audit_path = self.root / "audit.jsonl" self._ensure_layout() def save_profile(self, profile: ProfileIntent) -> ProfileIntent: _write_json(self.profiles_dir / f"{_safe_name(profile.profile_id)}.json", profile.to_dict()) return profile def get_profile(self, profile_id: str) -> ProfileIntent: return ProfileIntent.from_mapping(_read_json(self.profiles_dir / f"{_safe_name(profile_id)}.json")) def save_node(self, node: MemoryNode) -> MemoryNode: _write_json(self.nodes_dir / f"{_safe_name(node.node_id)}.json", node.to_dict()) return node def get_node(self, node_id: str) -> MemoryNode: return MemoryNode.from_mapping(_read_json(self.nodes_dir / f"{_safe_name(node_id)}.json")) def list_nodes(self, *, kind: str | None = None) -> list[MemoryNode]: nodes = [MemoryNode.from_mapping(_read_json(path)) for path in sorted(self.nodes_dir.glob("*.json"))] if kind: nodes = [node for node in nodes if node.kind == kind] return sorted(nodes, key=lambda node: node.node_id) def save_edge(self, edge: MemoryEdge) -> MemoryEdge: _write_json(self.edges_dir / f"{_safe_name(edge.edge_id)}.json", edge.to_dict()) return edge def list_edges(self, *, source: str | None = None, target: str | None = None) -> list[MemoryEdge]: edges = [MemoryEdge.from_mapping(_read_json(path)) for path in sorted(self.edges_dir.glob("*.json"))] if source: edges = [edge for edge in edges if edge.source == source] if target: edges = [edge for edge in edges if edge.target == target] return sorted(edges, key=lambda edge: edge.edge_id) def save_path(self, path: MemoryPath) -> MemoryPath: _write_json(self.paths_dir / f"{_safe_name(path.path_id)}.json", path.to_dict()) return path def get_path(self, path_id: str) -> MemoryPath: return MemoryPath.from_mapping(_read_json(self.paths_dir / f"{_safe_name(path_id)}.json")) def list_paths(self) -> list[MemoryPath]: return [MemoryPath.from_mapping(_read_json(path)) for path in sorted(self.paths_dir.glob("*.json"))] def export_graph(self, *, graph_id: str = "local", events: list[MemoryEvent] | None = None) -> MemoryGraph: return MemoryGraph( graph_id=graph_id, nodes=tuple(self.list_nodes()), edges=tuple(self.list_edges()), events=tuple(events or ()), metadata={"store_schema_version": LOCAL_STORE_SCHEMA, "store_path": str(self.root)}, ) def metadata(self) -> dict[str, Any]: return _read_json(self.root / LOCAL_STORE_METADATA_FILE) def migration_plan(self) -> dict[str, Any]: metadata_path = self.root / LOCAL_STORE_METADATA_FILE diagnostics = list(self.metadata_diagnostics()) metadata: dict[str, Any] = {} schema_version = "" if metadata_path.exists(): try: metadata = _read_json(metadata_path) schema_version = str(metadata.get("schema_version") or "") except json.JSONDecodeError: pass actions: list[dict[str, Any]] = [] if not any(diagnostic.code == "corrupt_store_metadata" for diagnostic in diagnostics): if schema_version != LOCAL_STORE_SCHEMA: actions.append( { "id": "set_schema_version", "action": "set_schema_version", "from_schema_version": schema_version, "to_schema_version": LOCAL_STORE_SCHEMA, } ) planned = metadata.get("planned_migrations") or metadata.get("migrations") or () for item in planned: migration_id = str(item) actions.append( { "id": f"complete_planned:{migration_id}", "action": "complete_planned_migration", "migration": migration_id, } ) plan_id = f"store-migration:{stable_digest([str(self.root), schema_version, actions])}" return { "schema_version": LOCAL_STORE_MIGRATION_PLAN_SCHEMA, "id": plan_id, "store_path": str(self.root), "metadata_path": str(metadata_path), "current_schema_version": schema_version, "target_schema_version": LOCAL_STORE_SCHEMA, "valid": not any(diagnostic.severity == "error" for diagnostic in diagnostics), "dry_run": True, "actions": actions, "diagnostics": [diagnostic.to_dict() for diagnostic in diagnostics], } def apply_migration_plan(self, plan: dict[str, Any] | None = None, *, actor: str = "local") -> dict[str, Any]: plan = dict(plan or self.migration_plan()) diagnostics = [dict(item) for item in plan.get("diagnostics", ())] errors = [item for item in diagnostics if item.get("severity") == "error"] if errors: return { "schema_version": LOCAL_STORE_MIGRATION_RESULT_SCHEMA, "plan_id": plan.get("id", ""), "store_path": str(self.root), "applied": False, "changed": False, "actions": [], "diagnostics": diagnostics, } metadata_path = self.root / LOCAL_STORE_METADATA_FILE try: metadata = _read_json(metadata_path) if metadata_path.exists() else {} except json.JSONDecodeError: metadata = {} actions = [dict(item) for item in plan.get("actions", ())] completed = list(metadata.get("completed_migrations") or ()) for action in actions: if action.get("action") == "set_schema_version": metadata["schema_version"] = LOCAL_STORE_SCHEMA if action.get("action") == "complete_planned_migration": completed.append(str(action.get("migration") or "")) if actions: metadata["schema_version"] = LOCAL_STORE_SCHEMA metadata["migrations"] = [] metadata.pop("planned_migrations", None) if completed: metadata["completed_migrations"] = sorted({item for item in completed if item}) metadata["last_migration"] = { "plan_id": str(plan.get("id") or ""), "actor": actor, "applied_at": utc_now_iso(), "actions": [str(action.get("id") or "") for action in actions], } _write_json(metadata_path, metadata) return { "schema_version": LOCAL_STORE_MIGRATION_RESULT_SCHEMA, "plan_id": plan.get("id", ""), "store_path": str(self.root), "applied": True, "changed": bool(actions), "actions": actions, "metadata": metadata, "diagnostics": diagnostics, } def repair_diagnostics(self, *, events: list[MemoryEvent] | None = None) -> tuple[Diagnostic, ...]: diagnostics: list[Diagnostic] = [] nodes, node_diagnostics = _read_records(self.nodes_dir, MemoryNode.from_mapping, record_type="node") edges, edge_diagnostics = _read_records(self.edges_dir, MemoryEdge.from_mapping, record_type="edge") paths, path_diagnostics = _read_records(self.paths_dir, MemoryPath.from_mapping, record_type="path") diagnostics.extend(self.metadata_diagnostics()) diagnostics.extend(node_diagnostics) diagnostics.extend(edge_diagnostics) diagnostics.extend(path_diagnostics) node_ids = {node.node_id for node in nodes} event_ids = {event.event_id for event in events or ()} for edge in edges: if edge.source not in node_ids: diagnostics.append(Diagnostic("error", "missing_edge_source", "Edge source does not reference a node.", edge.edge_id, {"source": edge.source})) if edge.target not in node_ids: diagnostics.append(Diagnostic("error", "missing_edge_target", "Edge target does not reference a node.", edge.edge_id, {"target": edge.target})) for path in paths: for event_id in path.event_ids: if event_id not in event_ids: diagnostics.append(Diagnostic("warn", "orphaned_path_event", "Path references an event not present in the event log.", path.path_id, {"event_id": event_id})) return tuple(diagnostics) def metadata_diagnostics(self) -> tuple[Diagnostic, ...]: metadata_path = self.root / LOCAL_STORE_METADATA_FILE if not metadata_path.exists(): return ( Diagnostic( "error", "missing_store_metadata", "Local store metadata file is missing.", str(metadata_path), {"expected_schema_version": LOCAL_STORE_SCHEMA}, ), ) try: metadata = _read_json(metadata_path) except json.JSONDecodeError as exc: return ( Diagnostic( "error", "corrupt_store_metadata", "Local store metadata file is not valid JSON.", str(metadata_path), {"error": str(exc)}, ), ) diagnostics: list[Diagnostic] = [] schema_version = str(metadata.get("schema_version") or "") if not schema_version: diagnostics.append( Diagnostic( "warn", "store_migration_required", "Local store metadata does not declare a schema version.", str(metadata_path), {"from_schema_version": "", "to_schema_version": LOCAL_STORE_SCHEMA}, ) ) elif schema_version != LOCAL_STORE_SCHEMA: diagnostics.append( Diagnostic( "warn", "store_migration_required", "Local store metadata declares a schema version that needs migration.", str(metadata_path), {"from_schema_version": schema_version, "to_schema_version": LOCAL_STORE_SCHEMA}, ) ) planned = metadata.get("planned_migrations") or metadata.get("migrations") or () if planned: diagnostics.append( Diagnostic( "warn", "planned_store_migrations", "Local store metadata declares planned migrations.", str(metadata_path), {"migrations": list(planned)}, ) ) return tuple(diagnostics) def _ensure_layout(self) -> None: for directory in (self.root, self.profiles_dir, self.nodes_dir, self.edges_dir, self.paths_dir, self.activations_dir): directory.mkdir(parents=True, exist_ok=True) metadata_path = self.root / LOCAL_STORE_METADATA_FILE if not metadata_path.exists(): _write_json(metadata_path, {"schema_version": LOCAL_STORE_SCHEMA, "migrations": []}) class JsonlMemoryEventLog: def __init__(self, path: str | Path) -> None: self.path = Path(path) self.path.parent.mkdir(parents=True, exist_ok=True) self.path.touch(exist_ok=True) def append(self, event: MemoryEvent) -> MemoryEvent: if any(existing.event_id == event.event_id for existing in self.list_events()): raise ValueError(f"Duplicate memory event id: {event.event_id}") with self.path.open("a", encoding="utf-8") as handle: handle.write(json.dumps(event.to_dict(), sort_keys=True, separators=(",", ":")) + "\n") return event def list_events(self, *, kind: str | None = None) -> list[MemoryEvent]: events: list[MemoryEvent] = [] for data in self._iter_valid_event_dicts(): event = MemoryEvent.from_mapping(data) if kind is None or event.kind == kind: events.append(event) return sorted(events, key=lambda event: (event.timestamp, event.event_id)) def replay_graph(self, store: FileBackedMemoryGraphStore, *, graph_id: str = "local") -> MemoryGraph: return store.export_graph(graph_id=graph_id, events=self.list_events()) def diagnostics(self) -> tuple[Diagnostic, ...]: diagnostics: list[Diagnostic] = [] seen: set[str] = set() for line_number, raw in self._iter_lines(): try: data = json.loads(raw) except json.JSONDecodeError as exc: diagnostics.append(Diagnostic("error", "malformed_event_log_line", "Event log line is not valid JSON.", f"line:{line_number}", {"error": str(exc)})) continue schema = data.get("schema_version") if schema and schema != "markitect.memory.event.v1": diagnostics.append(Diagnostic("warn", "unknown_event_schema", "Event declares an unknown schema version.", f"line:{line_number}", {"schema_version": schema})) event_id = str(data.get("id") or data.get("event_id") or "") if event_id in seen: diagnostics.append(Diagnostic("error", "duplicate_event_id", "Event log contains a duplicate event id.", f"line:{line_number}", {"event_id": event_id})) if event_id: seen.add(event_id) return tuple(diagnostics) def _iter_valid_event_dicts(self): for _, raw in self._iter_lines(): try: data = json.loads(raw) except json.JSONDecodeError: continue yield data def _iter_lines(self): for line_number, raw in enumerate(self.path.read_text(encoding="utf-8").splitlines(), start=1): if raw.strip(): yield line_number, raw class NoopContextPackageCompiler: def compile_selection(self, selection: dict[str, Any]) -> dict[str, Any]: return { "package_id": f"package:{selection.get('id', 'anonymous')}", "selection": dict(selection), "item_count": len(selection.get("nodes", ())) + len(selection.get("events", ())), } class AllowAllPolicyGateway: def authorize(self, *, action: str, resource: str, context: dict[str, Any] | None = None) -> PolicyDecision: return PolicyDecision(True, reason="local allow-all policy", metadata={"action": action, "resource": resource, "context": context or {}}) class RecordingAuditSink: def __init__(self) -> None: self.events: list[dict[str, Any]] = [] def record(self, event: dict[str, Any]) -> dict[str, Any]: stored = dict(event) self.events.append(stored) return {"recorded": True, "index": len(self.events) - 1, "event": stored} def query(self, **filters: Any) -> list[dict[str, Any]]: return filter_audit_events(self.events, **filters) def retention_metadata(self) -> dict[str, Any]: return {"mode": "in_memory", "retention_days": None} def retention_plan(self, *, retention_days: int | None = None, now: datetime | None = None) -> dict[str, Any]: return audit_retention_plan(self.events, retention_days=retention_days, now=now, retention=self.retention_metadata()) def export_batch(self, **filters: Any) -> dict[str, Any]: events = self.query(**filters) return audit_export_batch(events, filters=filters, retention=self.retention_metadata()) class JsonlAuditSink: def __init__(self, path: str | Path) -> None: self.path = Path(path) self.path.parent.mkdir(parents=True, exist_ok=True) self.path.touch(exist_ok=True) def record(self, event: dict[str, Any]) -> dict[str, Any]: stored = dict(event) with self.path.open("a", encoding="utf-8") as handle: handle.write(json.dumps(stored, sort_keys=True, separators=(",", ":")) + "\n") with self.path.open(encoding="utf-8") as handle: index = max(sum(1 for _ in handle) - 1, 0) return {"recorded": True, "index": index, "event": stored} def query(self, **filters: Any) -> list[dict[str, Any]]: events: list[dict[str, Any]] = [] for raw in self.path.read_text(encoding="utf-8").splitlines(): if not raw.strip(): continue try: events.append(json.loads(raw)) except json.JSONDecodeError: continue return filter_audit_events(events, **filters) def retention_metadata(self) -> dict[str, Any]: return {"mode": "jsonl", "path": str(self.path), "retention_days": None} def retention_plan(self, *, retention_days: int | None = None, now: datetime | None = None) -> dict[str, Any]: return audit_retention_plan(self.query(), retention_days=retention_days, now=now, retention=self.retention_metadata()) def export_batch(self, **filters: Any) -> dict[str, Any]: events = self.query(**filters) return audit_export_batch(events, filters=filters, retention=self.retention_metadata()) class InMemorySemanticIndex: def __init__(self) -> None: self._nodes_by_graph: dict[str, list[MemoryNode]] = {} def upsert_nodes(self, nodes: list[MemoryNode]) -> dict[str, Any]: for node in nodes: graph_id = str(node.metadata.get("graph_id") or "local") existing = [item for item in self._nodes_by_graph.get(graph_id, []) if item.node_id != node.node_id] existing.append(node) self._nodes_by_graph[graph_id] = sorted(existing, key=lambda item: item.node_id) return {"upserted": len(nodes)} def query(self, *, graph_id: str, query: str, limit: int = 10) -> list[dict[str, Any]]: terms = {term.lower() for term in query.split() if term.strip()} results: list[dict[str, Any]] = [] for node in self._nodes_by_graph.get(graph_id, []): text = f"{node.kind} {node.text}".lower() score = sum(1 for term in terms if term in text) if score: results.append({"id": node.node_id, "score": score, "kind": node.kind}) return sorted(results, key=lambda item: (-item["score"], item["id"]))[:limit] class InMemoryRuntimeRegistry: def __init__(self) -> None: self._envelopes: dict[str, dict[str, Any]] = {} def publish_runtime_envelope(self, envelope: dict[str, Any]) -> dict[str, Any]: reference = str(envelope.get("operation_id") or envelope.get("id") or f"envelope:{len(self._envelopes)}") stored = dict(envelope) self._envelopes[reference] = stored return {"published": True, "reference": reference, "envelope": stored} def fetch_runtime_envelope(self, reference: str) -> dict[str, Any]: return dict(self._envelopes[reference]) def _safe_name(identifier: str) -> str: safe = "".join(char if char.isalnum() or char in ("-", "_", ".") else "_" for char in identifier) return safe or "anonymous" def _read_json(path: Path) -> dict[str, Any]: return json.loads(path.read_text(encoding="utf-8")) def _write_json(path: Path, data: dict[str, Any]) -> None: path.parent.mkdir(parents=True, exist_ok=True) tmp_path = path.with_name(f".{path.name}.tmp") tmp_path.write_text(json.dumps(data, indent=2, sort_keys=True) + "\n", encoding="utf-8") tmp_path.replace(path) def _read_records(directory: Path, factory, *, record_type: str) -> tuple[list[Any], list[Diagnostic]]: records: list[Any] = [] diagnostics: list[Diagnostic] = [] for path in sorted(directory.glob("*.json")): try: records.append(factory(_read_json(path))) except (json.JSONDecodeError, ValueError, TypeError, KeyError) as exc: diagnostics.append( Diagnostic( "error", "corrupt_store_record", "Local store record could not be decoded.", str(path), {"record_type": record_type, "error": str(exc)}, ) ) return records, diagnostics def filter_audit_events(events: list[dict[str, Any]], **filters: Any) -> list[dict[str, Any]]: return [dict(event) for event in events if _audit_event_matches(event, filters)] def audit_export_batch( events: list[dict[str, Any]], *, filters: dict[str, Any] | None = None, retention: dict[str, Any] | None = None, ) -> dict[str, Any]: return { "schema_version": AUDIT_EXPORT_BATCH_SCHEMA, "id": f"audit-export:{stable_digest([filters or {}, events])}", "filters": dict(filters or {}), "count": len(events), "events": [dict(event) for event in events], "retention": dict(retention or {}), } def audit_retention_plan( events: list[dict[str, Any]], *, retention_days: int | None = None, now: datetime | None = None, retention: dict[str, Any] | None = None, ) -> dict[str, Any]: retention = dict(retention or {}) if retention_days is None: retention_days = retention.get("retention_days") now = now or datetime.now(timezone.utc) eligible: list[str] = [] retained: list[str] = [] for event in events: event_id = str(event.get("operation_id") or event.get("id") or stable_digest(event)) age = _event_age_days(event, now=now) if retention_days is not None and age is not None and age >= int(retention_days): eligible.append(event_id) else: retained.append(event_id) return { "schema_version": AUDIT_RETENTION_PLAN_SCHEMA, "id": f"audit-retention:{stable_digest([retention_days, eligible, retained])}", "retention_days": retention_days, "eligible_count": len(eligible), "retained_count": len(retained), "eligible_operation_ids": eligible, "retained_operation_ids": retained, "retention": retention, } def _audit_event_matches(event: dict[str, Any], filters: dict[str, Any]) -> bool: operation = filters.get("operation") if operation is not None and event.get("operation") != operation: return False operation_id = filters.get("operation_id") if operation_id is not None and event.get("operation_id") != operation_id: return False subject_kind = filters.get("subject_kind") if subject_kind is not None and dict(event.get("subject") or {}).get("kind") != subject_kind: return False subject_id = filters.get("subject_id") if subject_id is not None and dict(event.get("subject") or {}).get("id") != subject_id: return False source_ref = filters.get("source_ref") if source_ref is not None and dict(event.get("source") or {}).get("ref") != source_ref: return False actor = filters.get("actor") if actor is not None and event.get("actor") != actor: return False dry_run = filters.get("dry_run") if dry_run is not None and bool(event.get("dry_run")) is not bool(dry_run): return False allowed = filters.get("allowed") if allowed is not None and bool(event.get("allowed")) is not bool(allowed): return False return True def _event_age_days(event: dict[str, Any], *, now: datetime) -> int | None: timestamp = parse_iso_datetime(str(event.get("timestamp") or "")) if timestamp is None: return None return max((now - timestamp).days, 0)