Implement refinement hardening workplan

This commit is contained in:
2026-05-18 23:56:41 +02:00
parent 836acf7e01
commit 0eea94d05e
17 changed files with 1164 additions and 68 deletions

View File

@@ -9,6 +9,7 @@ from typing import Any
from .models import Diagnostic, MemoryEdge, MemoryEvent, MemoryGraph, MemoryNode, MemoryPath, PolicyDecision, ProfileIntent
LOCAL_STORE_SCHEMA = "phase_memory.local_store.v1"
LOCAL_STORE_METADATA_FILE = "phase-memory.json"
class InMemoryMemoryGraphStore:
@@ -141,27 +142,99 @@ class FileBackedMemoryGraphStore:
metadata={"store_schema_version": LOCAL_STORE_SCHEMA, "store_path": str(self.root)},
)
def metadata(self) -> dict[str, Any]:
return _read_json(self.root / LOCAL_STORE_METADATA_FILE)
def repair_diagnostics(self, *, events: list[MemoryEvent] | None = None) -> tuple[Diagnostic, ...]:
diagnostics: list[Diagnostic] = []
node_ids = {node.node_id for node in self.list_nodes()}
nodes, node_diagnostics = _read_records(self.nodes_dir, MemoryNode.from_mapping, record_type="node")
edges, edge_diagnostics = _read_records(self.edges_dir, MemoryEdge.from_mapping, record_type="edge")
paths, path_diagnostics = _read_records(self.paths_dir, MemoryPath.from_mapping, record_type="path")
diagnostics.extend(self.metadata_diagnostics())
diagnostics.extend(node_diagnostics)
diagnostics.extend(edge_diagnostics)
diagnostics.extend(path_diagnostics)
node_ids = {node.node_id for node in nodes}
event_ids = {event.event_id for event in events or ()}
for edge in self.list_edges():
for edge in edges:
if edge.source not in node_ids:
diagnostics.append(Diagnostic("error", "missing_edge_source", "Edge source does not reference a node.", edge.edge_id, {"source": edge.source}))
if edge.target not in node_ids:
diagnostics.append(Diagnostic("error", "missing_edge_target", "Edge target does not reference a node.", edge.edge_id, {"target": edge.target}))
for path in self.list_paths():
for path in paths:
for event_id in path.event_ids:
if event_id not in event_ids:
diagnostics.append(Diagnostic("warn", "orphaned_path_event", "Path references an event not present in the event log.", path.path_id, {"event_id": event_id}))
return tuple(diagnostics)
def metadata_diagnostics(self) -> tuple[Diagnostic, ...]:
metadata_path = self.root / LOCAL_STORE_METADATA_FILE
if not metadata_path.exists():
return (
Diagnostic(
"error",
"missing_store_metadata",
"Local store metadata file is missing.",
str(metadata_path),
{"expected_schema_version": LOCAL_STORE_SCHEMA},
),
)
try:
metadata = _read_json(metadata_path)
except json.JSONDecodeError as exc:
return (
Diagnostic(
"error",
"corrupt_store_metadata",
"Local store metadata file is not valid JSON.",
str(metadata_path),
{"error": str(exc)},
),
)
diagnostics: list[Diagnostic] = []
schema_version = str(metadata.get("schema_version") or "")
if not schema_version:
diagnostics.append(
Diagnostic(
"warn",
"store_migration_required",
"Local store metadata does not declare a schema version.",
str(metadata_path),
{"from_schema_version": "", "to_schema_version": LOCAL_STORE_SCHEMA},
)
)
elif schema_version != LOCAL_STORE_SCHEMA:
diagnostics.append(
Diagnostic(
"warn",
"store_migration_required",
"Local store metadata declares a schema version that needs migration.",
str(metadata_path),
{"from_schema_version": schema_version, "to_schema_version": LOCAL_STORE_SCHEMA},
)
)
planned = metadata.get("planned_migrations") or metadata.get("migrations") or ()
if planned:
diagnostics.append(
Diagnostic(
"warn",
"planned_store_migrations",
"Local store metadata declares planned migrations.",
str(metadata_path),
{"migrations": list(planned)},
)
)
return tuple(diagnostics)
def _ensure_layout(self) -> None:
for directory in (self.root, self.profiles_dir, self.nodes_dir, self.edges_dir, self.paths_dir, self.activations_dir):
directory.mkdir(parents=True, exist_ok=True)
metadata_path = self.root / "phase-memory.json"
metadata_path = self.root / LOCAL_STORE_METADATA_FILE
if not metadata_path.exists():
_write_json(metadata_path, {"schema_version": LOCAL_STORE_SCHEMA})
_write_json(metadata_path, {"schema_version": LOCAL_STORE_SCHEMA, "migrations": []})
class JsonlMemoryEventLog:
@@ -244,6 +317,12 @@ class RecordingAuditSink:
self.events.append(stored)
return {"recorded": True, "index": len(self.events) - 1, "event": stored}
def query(self, **filters: Any) -> list[dict[str, Any]]:
return filter_audit_events(self.events, **filters)
def retention_metadata(self) -> dict[str, Any]:
return {"mode": "in_memory", "retention_days": None}
class JsonlAuditSink:
def __init__(self, path: str | Path) -> None:
@@ -259,6 +338,20 @@ class JsonlAuditSink:
index = max(sum(1 for _ in handle) - 1, 0)
return {"recorded": True, "index": index, "event": stored}
def query(self, **filters: Any) -> list[dict[str, Any]]:
events: list[dict[str, Any]] = []
for raw in self.path.read_text(encoding="utf-8").splitlines():
if not raw.strip():
continue
try:
events.append(json.loads(raw))
except json.JSONDecodeError:
continue
return filter_audit_events(events, **filters)
def retention_metadata(self) -> dict[str, Any]:
return {"mode": "jsonl", "path": str(self.path), "retention_days": None}
class InMemorySemanticIndex:
def __init__(self) -> None:
@@ -308,4 +401,57 @@ def _read_json(path: Path) -> dict[str, Any]:
def _write_json(path: Path, data: dict[str, Any]) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps(data, indent=2, sort_keys=True) + "\n", encoding="utf-8")
tmp_path = path.with_name(f".{path.name}.tmp")
tmp_path.write_text(json.dumps(data, indent=2, sort_keys=True) + "\n", encoding="utf-8")
tmp_path.replace(path)
def _read_records(directory: Path, factory, *, record_type: str) -> tuple[list[Any], list[Diagnostic]]:
records: list[Any] = []
diagnostics: list[Diagnostic] = []
for path in sorted(directory.glob("*.json")):
try:
records.append(factory(_read_json(path)))
except (json.JSONDecodeError, ValueError, TypeError, KeyError) as exc:
diagnostics.append(
Diagnostic(
"error",
"corrupt_store_record",
"Local store record could not be decoded.",
str(path),
{"record_type": record_type, "error": str(exc)},
)
)
return records, diagnostics
def filter_audit_events(events: list[dict[str, Any]], **filters: Any) -> list[dict[str, Any]]:
return [dict(event) for event in events if _audit_event_matches(event, filters)]
def _audit_event_matches(event: dict[str, Any], filters: dict[str, Any]) -> bool:
operation = filters.get("operation")
if operation is not None and event.get("operation") != operation:
return False
operation_id = filters.get("operation_id")
if operation_id is not None and event.get("operation_id") != operation_id:
return False
subject_kind = filters.get("subject_kind")
if subject_kind is not None and dict(event.get("subject") or {}).get("kind") != subject_kind:
return False
subject_id = filters.get("subject_id")
if subject_id is not None and dict(event.get("subject") or {}).get("id") != subject_id:
return False
source_ref = filters.get("source_ref")
if source_ref is not None and dict(event.get("source") or {}).get("ref") != source_ref:
return False
actor = filters.get("actor")
if actor is not None and event.get("actor") != actor:
return False
dry_run = filters.get("dry_run")
if dry_run is not None and bool(event.get("dry_run")) is not bool(dry_run):
return False
allowed = filters.get("allowed")
if allowed is not None and bool(event.get("allowed")) is not bool(allowed):
return False
return True