generated from coulomb/repo-seed
636 lines
27 KiB
Python
636 lines
27 KiB
Python
"""Deterministic local adapters used by tests and first integrations."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
from .models import Diagnostic, MemoryEdge, MemoryEvent, MemoryGraph, MemoryNode, MemoryPath, PolicyDecision, ProfileIntent
|
|
from .utils import parse_iso_datetime, stable_digest, utc_now_iso
|
|
|
|
LOCAL_STORE_SCHEMA = "phase_memory.local_store.v1"
|
|
LOCAL_STORE_METADATA_FILE = "phase-memory.json"
|
|
LOCAL_STORE_MIGRATION_PLAN_SCHEMA = "phase_memory.local_store.migration_plan.v1"
|
|
LOCAL_STORE_MIGRATION_RESULT_SCHEMA = "phase_memory.local_store.migration_result.v1"
|
|
AUDIT_EXPORT_BATCH_SCHEMA = "phase_memory.audit.export_batch.v1"
|
|
AUDIT_RETENTION_PLAN_SCHEMA = "phase_memory.audit.retention_plan.v1"
|
|
|
|
|
|
class InMemoryMemoryGraphStore:
|
|
def __init__(self) -> None:
|
|
self._profiles: dict[str, ProfileIntent] = {}
|
|
self._nodes: dict[str, MemoryNode] = {}
|
|
self._edges: dict[str, MemoryEdge] = {}
|
|
|
|
def save_profile(self, profile: ProfileIntent) -> ProfileIntent:
|
|
self._profiles[profile.profile_id] = profile
|
|
return profile
|
|
|
|
def get_profile(self, profile_id: str) -> ProfileIntent:
|
|
return self._profiles[profile_id]
|
|
|
|
def save_node(self, node: MemoryNode) -> MemoryNode:
|
|
self._nodes[node.node_id] = node
|
|
return node
|
|
|
|
def get_node(self, node_id: str) -> MemoryNode:
|
|
return self._nodes[node_id]
|
|
|
|
def list_nodes(self, *, kind: str | None = None) -> list[MemoryNode]:
|
|
nodes = list(self._nodes.values())
|
|
if kind:
|
|
nodes = [node for node in nodes if node.kind == kind]
|
|
return sorted(nodes, key=lambda node: node.node_id)
|
|
|
|
def save_edge(self, edge: MemoryEdge) -> MemoryEdge:
|
|
self._edges[edge.edge_id] = edge
|
|
return edge
|
|
|
|
def list_edges(self, *, source: str | None = None, target: str | None = None) -> list[MemoryEdge]:
|
|
edges = list(self._edges.values())
|
|
if source:
|
|
edges = [edge for edge in edges if edge.source == source]
|
|
if target:
|
|
edges = [edge for edge in edges if edge.target == target]
|
|
return sorted(edges, key=lambda edge: edge.edge_id)
|
|
|
|
|
|
class InMemoryMemoryEventLog:
|
|
def __init__(self) -> None:
|
|
self._events: list[MemoryEvent] = []
|
|
|
|
def append(self, event: MemoryEvent) -> MemoryEvent:
|
|
if any(existing.event_id == event.event_id for existing in self._events):
|
|
raise ValueError(f"Duplicate memory event id: {event.event_id}")
|
|
self._events.append(event)
|
|
return event
|
|
|
|
def list_events(self, *, kind: str | None = None) -> list[MemoryEvent]:
|
|
events = list(self._events)
|
|
if kind:
|
|
events = [event for event in events if event.kind == kind]
|
|
return events
|
|
|
|
|
|
class FileBackedMemoryGraphStore:
|
|
"""Versioned JSON-backed local graph store.
|
|
|
|
Layout:
|
|
- `phase-memory.json`
|
|
- `profiles/<profile-id>.json`
|
|
- `nodes/<node-id>.json`
|
|
- `edges/<edge-id>.json`
|
|
- `paths/<path-id>.json`
|
|
- `activations/`
|
|
- `audit.jsonl`
|
|
"""
|
|
|
|
def __init__(self, root: str | Path) -> None:
|
|
self.root = Path(root)
|
|
self.profiles_dir = self.root / "profiles"
|
|
self.nodes_dir = self.root / "nodes"
|
|
self.edges_dir = self.root / "edges"
|
|
self.paths_dir = self.root / "paths"
|
|
self.activations_dir = self.root / "activations"
|
|
self.audit_path = self.root / "audit.jsonl"
|
|
self._ensure_layout()
|
|
|
|
def save_profile(self, profile: ProfileIntent) -> ProfileIntent:
|
|
_write_json(self.profiles_dir / f"{_safe_name(profile.profile_id)}.json", profile.to_dict())
|
|
return profile
|
|
|
|
def get_profile(self, profile_id: str) -> ProfileIntent:
|
|
return ProfileIntent.from_mapping(_read_json(self.profiles_dir / f"{_safe_name(profile_id)}.json"))
|
|
|
|
def save_node(self, node: MemoryNode) -> MemoryNode:
|
|
_write_json(self.nodes_dir / f"{_safe_name(node.node_id)}.json", node.to_dict())
|
|
return node
|
|
|
|
def get_node(self, node_id: str) -> MemoryNode:
|
|
return MemoryNode.from_mapping(_read_json(self.nodes_dir / f"{_safe_name(node_id)}.json"))
|
|
|
|
def list_nodes(self, *, kind: str | None = None) -> list[MemoryNode]:
|
|
nodes = [MemoryNode.from_mapping(_read_json(path)) for path in sorted(self.nodes_dir.glob("*.json"))]
|
|
if kind:
|
|
nodes = [node for node in nodes if node.kind == kind]
|
|
return sorted(nodes, key=lambda node: node.node_id)
|
|
|
|
def save_edge(self, edge: MemoryEdge) -> MemoryEdge:
|
|
_write_json(self.edges_dir / f"{_safe_name(edge.edge_id)}.json", edge.to_dict())
|
|
return edge
|
|
|
|
def list_edges(self, *, source: str | None = None, target: str | None = None) -> list[MemoryEdge]:
|
|
edges = [MemoryEdge.from_mapping(_read_json(path)) for path in sorted(self.edges_dir.glob("*.json"))]
|
|
if source:
|
|
edges = [edge for edge in edges if edge.source == source]
|
|
if target:
|
|
edges = [edge for edge in edges if edge.target == target]
|
|
return sorted(edges, key=lambda edge: edge.edge_id)
|
|
|
|
def save_path(self, path: MemoryPath) -> MemoryPath:
|
|
_write_json(self.paths_dir / f"{_safe_name(path.path_id)}.json", path.to_dict())
|
|
return path
|
|
|
|
def get_path(self, path_id: str) -> MemoryPath:
|
|
return MemoryPath.from_mapping(_read_json(self.paths_dir / f"{_safe_name(path_id)}.json"))
|
|
|
|
def list_paths(self) -> list[MemoryPath]:
|
|
return [MemoryPath.from_mapping(_read_json(path)) for path in sorted(self.paths_dir.glob("*.json"))]
|
|
|
|
def export_graph(self, *, graph_id: str = "local", events: list[MemoryEvent] | None = None) -> MemoryGraph:
|
|
return MemoryGraph(
|
|
graph_id=graph_id,
|
|
nodes=tuple(self.list_nodes()),
|
|
edges=tuple(self.list_edges()),
|
|
events=tuple(events or ()),
|
|
metadata={"store_schema_version": LOCAL_STORE_SCHEMA, "store_path": str(self.root)},
|
|
)
|
|
|
|
def metadata(self) -> dict[str, Any]:
|
|
return _read_json(self.root / LOCAL_STORE_METADATA_FILE)
|
|
|
|
def migration_plan(self) -> dict[str, Any]:
|
|
metadata_path = self.root / LOCAL_STORE_METADATA_FILE
|
|
diagnostics = list(self.metadata_diagnostics())
|
|
metadata: dict[str, Any] = {}
|
|
schema_version = ""
|
|
|
|
if metadata_path.exists():
|
|
try:
|
|
metadata = _read_json(metadata_path)
|
|
schema_version = str(metadata.get("schema_version") or "")
|
|
except json.JSONDecodeError:
|
|
pass
|
|
|
|
actions: list[dict[str, Any]] = []
|
|
if not any(diagnostic.code == "corrupt_store_metadata" for diagnostic in diagnostics):
|
|
if schema_version != LOCAL_STORE_SCHEMA:
|
|
actions.append(
|
|
{
|
|
"id": "set_schema_version",
|
|
"action": "set_schema_version",
|
|
"from_schema_version": schema_version,
|
|
"to_schema_version": LOCAL_STORE_SCHEMA,
|
|
}
|
|
)
|
|
planned = metadata.get("planned_migrations") or metadata.get("migrations") or ()
|
|
for item in planned:
|
|
migration_id = str(item)
|
|
actions.append(
|
|
{
|
|
"id": f"complete_planned:{migration_id}",
|
|
"action": "complete_planned_migration",
|
|
"migration": migration_id,
|
|
}
|
|
)
|
|
|
|
plan_id = f"store-migration:{stable_digest([str(self.root), schema_version, actions])}"
|
|
return {
|
|
"schema_version": LOCAL_STORE_MIGRATION_PLAN_SCHEMA,
|
|
"id": plan_id,
|
|
"store_path": str(self.root),
|
|
"metadata_path": str(metadata_path),
|
|
"current_schema_version": schema_version,
|
|
"target_schema_version": LOCAL_STORE_SCHEMA,
|
|
"valid": not any(diagnostic.severity == "error" for diagnostic in diagnostics),
|
|
"dry_run": True,
|
|
"actions": actions,
|
|
"diagnostics": [diagnostic.to_dict() for diagnostic in diagnostics],
|
|
}
|
|
|
|
def apply_migration_plan(self, plan: dict[str, Any] | None = None, *, actor: str = "local") -> dict[str, Any]:
|
|
plan = dict(plan or self.migration_plan())
|
|
diagnostics = [dict(item) for item in plan.get("diagnostics", ())]
|
|
errors = [item for item in diagnostics if item.get("severity") == "error"]
|
|
if errors:
|
|
return {
|
|
"schema_version": LOCAL_STORE_MIGRATION_RESULT_SCHEMA,
|
|
"plan_id": plan.get("id", ""),
|
|
"store_path": str(self.root),
|
|
"applied": False,
|
|
"changed": False,
|
|
"actions": [],
|
|
"diagnostics": diagnostics,
|
|
}
|
|
|
|
metadata_path = self.root / LOCAL_STORE_METADATA_FILE
|
|
try:
|
|
metadata = _read_json(metadata_path) if metadata_path.exists() else {}
|
|
except json.JSONDecodeError:
|
|
metadata = {}
|
|
|
|
actions = [dict(item) for item in plan.get("actions", ())]
|
|
completed = list(metadata.get("completed_migrations") or ())
|
|
for action in actions:
|
|
if action.get("action") == "set_schema_version":
|
|
metadata["schema_version"] = LOCAL_STORE_SCHEMA
|
|
if action.get("action") == "complete_planned_migration":
|
|
completed.append(str(action.get("migration") or ""))
|
|
|
|
if actions:
|
|
metadata["schema_version"] = LOCAL_STORE_SCHEMA
|
|
metadata["migrations"] = []
|
|
metadata.pop("planned_migrations", None)
|
|
if completed:
|
|
metadata["completed_migrations"] = sorted({item for item in completed if item})
|
|
metadata["last_migration"] = {
|
|
"plan_id": str(plan.get("id") or ""),
|
|
"actor": actor,
|
|
"applied_at": utc_now_iso(),
|
|
"actions": [str(action.get("id") or "") for action in actions],
|
|
}
|
|
_write_json(metadata_path, metadata)
|
|
|
|
return {
|
|
"schema_version": LOCAL_STORE_MIGRATION_RESULT_SCHEMA,
|
|
"plan_id": plan.get("id", ""),
|
|
"store_path": str(self.root),
|
|
"applied": True,
|
|
"changed": bool(actions),
|
|
"actions": actions,
|
|
"metadata": metadata,
|
|
"diagnostics": diagnostics,
|
|
}
|
|
|
|
def repair_diagnostics(self, *, events: list[MemoryEvent] | None = None) -> tuple[Diagnostic, ...]:
|
|
diagnostics: list[Diagnostic] = []
|
|
nodes, node_diagnostics = _read_records(self.nodes_dir, MemoryNode.from_mapping, record_type="node")
|
|
edges, edge_diagnostics = _read_records(self.edges_dir, MemoryEdge.from_mapping, record_type="edge")
|
|
paths, path_diagnostics = _read_records(self.paths_dir, MemoryPath.from_mapping, record_type="path")
|
|
diagnostics.extend(self.metadata_diagnostics())
|
|
diagnostics.extend(node_diagnostics)
|
|
diagnostics.extend(edge_diagnostics)
|
|
diagnostics.extend(path_diagnostics)
|
|
|
|
node_ids = {node.node_id for node in nodes}
|
|
event_ids = {event.event_id for event in events or ()}
|
|
for edge in edges:
|
|
if edge.source not in node_ids:
|
|
diagnostics.append(Diagnostic("error", "missing_edge_source", "Edge source does not reference a node.", edge.edge_id, {"source": edge.source}))
|
|
if edge.target not in node_ids:
|
|
diagnostics.append(Diagnostic("error", "missing_edge_target", "Edge target does not reference a node.", edge.edge_id, {"target": edge.target}))
|
|
for path in paths:
|
|
for event_id in path.event_ids:
|
|
if event_id not in event_ids:
|
|
diagnostics.append(Diagnostic("warn", "orphaned_path_event", "Path references an event not present in the event log.", path.path_id, {"event_id": event_id}))
|
|
return tuple(diagnostics)
|
|
|
|
def metadata_diagnostics(self) -> tuple[Diagnostic, ...]:
|
|
metadata_path = self.root / LOCAL_STORE_METADATA_FILE
|
|
if not metadata_path.exists():
|
|
return (
|
|
Diagnostic(
|
|
"error",
|
|
"missing_store_metadata",
|
|
"Local store metadata file is missing.",
|
|
str(metadata_path),
|
|
{"expected_schema_version": LOCAL_STORE_SCHEMA},
|
|
),
|
|
)
|
|
try:
|
|
metadata = _read_json(metadata_path)
|
|
except json.JSONDecodeError as exc:
|
|
return (
|
|
Diagnostic(
|
|
"error",
|
|
"corrupt_store_metadata",
|
|
"Local store metadata file is not valid JSON.",
|
|
str(metadata_path),
|
|
{"error": str(exc)},
|
|
),
|
|
)
|
|
|
|
diagnostics: list[Diagnostic] = []
|
|
schema_version = str(metadata.get("schema_version") or "")
|
|
if not schema_version:
|
|
diagnostics.append(
|
|
Diagnostic(
|
|
"warn",
|
|
"store_migration_required",
|
|
"Local store metadata does not declare a schema version.",
|
|
str(metadata_path),
|
|
{"from_schema_version": "", "to_schema_version": LOCAL_STORE_SCHEMA},
|
|
)
|
|
)
|
|
elif schema_version != LOCAL_STORE_SCHEMA:
|
|
diagnostics.append(
|
|
Diagnostic(
|
|
"warn",
|
|
"store_migration_required",
|
|
"Local store metadata declares a schema version that needs migration.",
|
|
str(metadata_path),
|
|
{"from_schema_version": schema_version, "to_schema_version": LOCAL_STORE_SCHEMA},
|
|
)
|
|
)
|
|
|
|
planned = metadata.get("planned_migrations") or metadata.get("migrations") or ()
|
|
if planned:
|
|
diagnostics.append(
|
|
Diagnostic(
|
|
"warn",
|
|
"planned_store_migrations",
|
|
"Local store metadata declares planned migrations.",
|
|
str(metadata_path),
|
|
{"migrations": list(planned)},
|
|
)
|
|
)
|
|
return tuple(diagnostics)
|
|
|
|
def _ensure_layout(self) -> None:
|
|
for directory in (self.root, self.profiles_dir, self.nodes_dir, self.edges_dir, self.paths_dir, self.activations_dir):
|
|
directory.mkdir(parents=True, exist_ok=True)
|
|
metadata_path = self.root / LOCAL_STORE_METADATA_FILE
|
|
if not metadata_path.exists():
|
|
_write_json(metadata_path, {"schema_version": LOCAL_STORE_SCHEMA, "migrations": []})
|
|
|
|
|
|
class JsonlMemoryEventLog:
|
|
def __init__(self, path: str | Path) -> None:
|
|
self.path = Path(path)
|
|
self.path.parent.mkdir(parents=True, exist_ok=True)
|
|
self.path.touch(exist_ok=True)
|
|
|
|
def append(self, event: MemoryEvent) -> MemoryEvent:
|
|
if any(existing.event_id == event.event_id for existing in self.list_events()):
|
|
raise ValueError(f"Duplicate memory event id: {event.event_id}")
|
|
with self.path.open("a", encoding="utf-8") as handle:
|
|
handle.write(json.dumps(event.to_dict(), sort_keys=True, separators=(",", ":")) + "\n")
|
|
return event
|
|
|
|
def list_events(self, *, kind: str | None = None) -> list[MemoryEvent]:
|
|
events: list[MemoryEvent] = []
|
|
for data in self._iter_valid_event_dicts():
|
|
event = MemoryEvent.from_mapping(data)
|
|
if kind is None or event.kind == kind:
|
|
events.append(event)
|
|
return sorted(events, key=lambda event: (event.timestamp, event.event_id))
|
|
|
|
def replay_graph(self, store: FileBackedMemoryGraphStore, *, graph_id: str = "local") -> MemoryGraph:
|
|
return store.export_graph(graph_id=graph_id, events=self.list_events())
|
|
|
|
def diagnostics(self) -> tuple[Diagnostic, ...]:
|
|
diagnostics: list[Diagnostic] = []
|
|
seen: set[str] = set()
|
|
for line_number, raw in self._iter_lines():
|
|
try:
|
|
data = json.loads(raw)
|
|
except json.JSONDecodeError as exc:
|
|
diagnostics.append(Diagnostic("error", "malformed_event_log_line", "Event log line is not valid JSON.", f"line:{line_number}", {"error": str(exc)}))
|
|
continue
|
|
schema = data.get("schema_version")
|
|
if schema and schema != "markitect.memory.event.v1":
|
|
diagnostics.append(Diagnostic("warn", "unknown_event_schema", "Event declares an unknown schema version.", f"line:{line_number}", {"schema_version": schema}))
|
|
event_id = str(data.get("id") or data.get("event_id") or "")
|
|
if event_id in seen:
|
|
diagnostics.append(Diagnostic("error", "duplicate_event_id", "Event log contains a duplicate event id.", f"line:{line_number}", {"event_id": event_id}))
|
|
if event_id:
|
|
seen.add(event_id)
|
|
return tuple(diagnostics)
|
|
|
|
def _iter_valid_event_dicts(self):
|
|
for _, raw in self._iter_lines():
|
|
try:
|
|
data = json.loads(raw)
|
|
except json.JSONDecodeError:
|
|
continue
|
|
yield data
|
|
|
|
def _iter_lines(self):
|
|
for line_number, raw in enumerate(self.path.read_text(encoding="utf-8").splitlines(), start=1):
|
|
if raw.strip():
|
|
yield line_number, raw
|
|
|
|
|
|
class NoopContextPackageCompiler:
|
|
def compile_selection(self, selection: dict[str, Any]) -> dict[str, Any]:
|
|
return {
|
|
"package_id": f"package:{selection.get('id', 'anonymous')}",
|
|
"selection": dict(selection),
|
|
"item_count": len(selection.get("nodes", ())) + len(selection.get("events", ())),
|
|
}
|
|
|
|
|
|
class AllowAllPolicyGateway:
|
|
def authorize(self, *, action: str, resource: str, context: dict[str, Any] | None = None) -> PolicyDecision:
|
|
return PolicyDecision(True, reason="local allow-all policy", metadata={"action": action, "resource": resource, "context": context or {}})
|
|
|
|
|
|
class RecordingAuditSink:
|
|
def __init__(self) -> None:
|
|
self.events: list[dict[str, Any]] = []
|
|
|
|
def record(self, event: dict[str, Any]) -> dict[str, Any]:
|
|
stored = dict(event)
|
|
self.events.append(stored)
|
|
return {"recorded": True, "index": len(self.events) - 1, "event": stored}
|
|
|
|
def query(self, **filters: Any) -> list[dict[str, Any]]:
|
|
return filter_audit_events(self.events, **filters)
|
|
|
|
def retention_metadata(self) -> dict[str, Any]:
|
|
return {"mode": "in_memory", "retention_days": None}
|
|
|
|
def retention_plan(self, *, retention_days: int | None = None, now: datetime | None = None) -> dict[str, Any]:
|
|
return audit_retention_plan(self.events, retention_days=retention_days, now=now, retention=self.retention_metadata())
|
|
|
|
def export_batch(self, **filters: Any) -> dict[str, Any]:
|
|
events = self.query(**filters)
|
|
return audit_export_batch(events, filters=filters, retention=self.retention_metadata())
|
|
|
|
|
|
class JsonlAuditSink:
|
|
def __init__(self, path: str | Path) -> None:
|
|
self.path = Path(path)
|
|
self.path.parent.mkdir(parents=True, exist_ok=True)
|
|
self.path.touch(exist_ok=True)
|
|
|
|
def record(self, event: dict[str, Any]) -> dict[str, Any]:
|
|
stored = dict(event)
|
|
with self.path.open("a", encoding="utf-8") as handle:
|
|
handle.write(json.dumps(stored, sort_keys=True, separators=(",", ":")) + "\n")
|
|
with self.path.open(encoding="utf-8") as handle:
|
|
index = max(sum(1 for _ in handle) - 1, 0)
|
|
return {"recorded": True, "index": index, "event": stored}
|
|
|
|
def query(self, **filters: Any) -> list[dict[str, Any]]:
|
|
events: list[dict[str, Any]] = []
|
|
for raw in self.path.read_text(encoding="utf-8").splitlines():
|
|
if not raw.strip():
|
|
continue
|
|
try:
|
|
events.append(json.loads(raw))
|
|
except json.JSONDecodeError:
|
|
continue
|
|
return filter_audit_events(events, **filters)
|
|
|
|
def retention_metadata(self) -> dict[str, Any]:
|
|
return {"mode": "jsonl", "path": str(self.path), "retention_days": None}
|
|
|
|
def retention_plan(self, *, retention_days: int | None = None, now: datetime | None = None) -> dict[str, Any]:
|
|
return audit_retention_plan(self.query(), retention_days=retention_days, now=now, retention=self.retention_metadata())
|
|
|
|
def export_batch(self, **filters: Any) -> dict[str, Any]:
|
|
events = self.query(**filters)
|
|
return audit_export_batch(events, filters=filters, retention=self.retention_metadata())
|
|
|
|
|
|
class InMemorySemanticIndex:
|
|
def __init__(self) -> None:
|
|
self._nodes_by_graph: dict[str, list[MemoryNode]] = {}
|
|
|
|
def upsert_nodes(self, nodes: list[MemoryNode]) -> dict[str, Any]:
|
|
for node in nodes:
|
|
graph_id = str(node.metadata.get("graph_id") or "local")
|
|
existing = [item for item in self._nodes_by_graph.get(graph_id, []) if item.node_id != node.node_id]
|
|
existing.append(node)
|
|
self._nodes_by_graph[graph_id] = sorted(existing, key=lambda item: item.node_id)
|
|
return {"upserted": len(nodes)}
|
|
|
|
def query(self, *, graph_id: str, query: str, limit: int = 10) -> list[dict[str, Any]]:
|
|
terms = {term.lower() for term in query.split() if term.strip()}
|
|
results: list[dict[str, Any]] = []
|
|
for node in self._nodes_by_graph.get(graph_id, []):
|
|
text = f"{node.kind} {node.text}".lower()
|
|
score = sum(1 for term in terms if term in text)
|
|
if score:
|
|
results.append({"id": node.node_id, "score": score, "kind": node.kind})
|
|
return sorted(results, key=lambda item: (-item["score"], item["id"]))[:limit]
|
|
|
|
|
|
class InMemoryRuntimeRegistry:
|
|
def __init__(self) -> None:
|
|
self._envelopes: dict[str, dict[str, Any]] = {}
|
|
|
|
def publish_runtime_envelope(self, envelope: dict[str, Any]) -> dict[str, Any]:
|
|
reference = str(envelope.get("operation_id") or envelope.get("id") or f"envelope:{len(self._envelopes)}")
|
|
stored = dict(envelope)
|
|
self._envelopes[reference] = stored
|
|
return {"published": True, "reference": reference, "envelope": stored}
|
|
|
|
def fetch_runtime_envelope(self, reference: str) -> dict[str, Any]:
|
|
return dict(self._envelopes[reference])
|
|
|
|
|
|
def _safe_name(identifier: str) -> str:
|
|
safe = "".join(char if char.isalnum() or char in ("-", "_", ".") else "_" for char in identifier)
|
|
return safe or "anonymous"
|
|
|
|
|
|
def _read_json(path: Path) -> dict[str, Any]:
|
|
return json.loads(path.read_text(encoding="utf-8"))
|
|
|
|
|
|
def _write_json(path: Path, data: dict[str, Any]) -> None:
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
tmp_path = path.with_name(f".{path.name}.tmp")
|
|
tmp_path.write_text(json.dumps(data, indent=2, sort_keys=True) + "\n", encoding="utf-8")
|
|
tmp_path.replace(path)
|
|
|
|
|
|
def _read_records(directory: Path, factory, *, record_type: str) -> tuple[list[Any], list[Diagnostic]]:
|
|
records: list[Any] = []
|
|
diagnostics: list[Diagnostic] = []
|
|
for path in sorted(directory.glob("*.json")):
|
|
try:
|
|
records.append(factory(_read_json(path)))
|
|
except (json.JSONDecodeError, ValueError, TypeError, KeyError) as exc:
|
|
diagnostics.append(
|
|
Diagnostic(
|
|
"error",
|
|
"corrupt_store_record",
|
|
"Local store record could not be decoded.",
|
|
str(path),
|
|
{"record_type": record_type, "error": str(exc)},
|
|
)
|
|
)
|
|
return records, diagnostics
|
|
|
|
|
|
def filter_audit_events(events: list[dict[str, Any]], **filters: Any) -> list[dict[str, Any]]:
|
|
return [dict(event) for event in events if _audit_event_matches(event, filters)]
|
|
|
|
|
|
def audit_export_batch(
|
|
events: list[dict[str, Any]],
|
|
*,
|
|
filters: dict[str, Any] | None = None,
|
|
retention: dict[str, Any] | None = None,
|
|
) -> dict[str, Any]:
|
|
return {
|
|
"schema_version": AUDIT_EXPORT_BATCH_SCHEMA,
|
|
"id": f"audit-export:{stable_digest([filters or {}, events])}",
|
|
"filters": dict(filters or {}),
|
|
"count": len(events),
|
|
"events": [dict(event) for event in events],
|
|
"retention": dict(retention or {}),
|
|
}
|
|
|
|
|
|
def audit_retention_plan(
|
|
events: list[dict[str, Any]],
|
|
*,
|
|
retention_days: int | None = None,
|
|
now: datetime | None = None,
|
|
retention: dict[str, Any] | None = None,
|
|
) -> dict[str, Any]:
|
|
retention = dict(retention or {})
|
|
if retention_days is None:
|
|
retention_days = retention.get("retention_days")
|
|
now = now or datetime.now(timezone.utc)
|
|
eligible: list[str] = []
|
|
retained: list[str] = []
|
|
for event in events:
|
|
event_id = str(event.get("operation_id") or event.get("id") or stable_digest(event))
|
|
age = _event_age_days(event, now=now)
|
|
if retention_days is not None and age is not None and age >= int(retention_days):
|
|
eligible.append(event_id)
|
|
else:
|
|
retained.append(event_id)
|
|
return {
|
|
"schema_version": AUDIT_RETENTION_PLAN_SCHEMA,
|
|
"id": f"audit-retention:{stable_digest([retention_days, eligible, retained])}",
|
|
"retention_days": retention_days,
|
|
"eligible_count": len(eligible),
|
|
"retained_count": len(retained),
|
|
"eligible_operation_ids": eligible,
|
|
"retained_operation_ids": retained,
|
|
"retention": retention,
|
|
}
|
|
|
|
|
|
def _audit_event_matches(event: dict[str, Any], filters: dict[str, Any]) -> bool:
|
|
operation = filters.get("operation")
|
|
if operation is not None and event.get("operation") != operation:
|
|
return False
|
|
operation_id = filters.get("operation_id")
|
|
if operation_id is not None and event.get("operation_id") != operation_id:
|
|
return False
|
|
subject_kind = filters.get("subject_kind")
|
|
if subject_kind is not None and dict(event.get("subject") or {}).get("kind") != subject_kind:
|
|
return False
|
|
subject_id = filters.get("subject_id")
|
|
if subject_id is not None and dict(event.get("subject") or {}).get("id") != subject_id:
|
|
return False
|
|
source_ref = filters.get("source_ref")
|
|
if source_ref is not None and dict(event.get("source") or {}).get("ref") != source_ref:
|
|
return False
|
|
actor = filters.get("actor")
|
|
if actor is not None and event.get("actor") != actor:
|
|
return False
|
|
dry_run = filters.get("dry_run")
|
|
if dry_run is not None and bool(event.get("dry_run")) is not bool(dry_run):
|
|
return False
|
|
allowed = filters.get("allowed")
|
|
if allowed is not None and bool(event.get("allowed")) is not bool(allowed):
|
|
return False
|
|
return True
|
|
|
|
|
|
def _event_age_days(event: dict[str, Any], *, now: datetime) -> int | None:
|
|
timestamp = parse_iso_datetime(str(event.get("timestamp") or ""))
|
|
if timestamp is None:
|
|
return None
|
|
return max((now - timestamp).days, 0)
|