Files
agentic-resources/session_memory/core/schema.py
tegwick 97379e9658 session-memory: error-body mining into digest (WP-0006 T01)
build_digest now extracts normalized error fingerprints + samples from failed
events (error kind + failing tool_result bodies) into a durable error_snippets
list — paths/numbers/uuids/addrs stripped so the same error collapses to one
fingerprint with a count; Python traceback header skipped in favour of the real
exception line. Durable in Tier 2 (survives Tier 1 eviction). SCHEMA_VERSION ->
2 (re-ingest needed to populate). 7 new tests; suite 95/95 green.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-07 12:45:01 +02:00

157 lines
4.9 KiB
Python

"""Normalized session schema (Tier 1) — design doc §4.
Two record kinds, ``Session`` and ``SessionEvent``, plus the small enums every
adapter targets. Field names here are the stable contract; per-flavor quirks are
absorbed inside each adapter (see design §4.3 native -> kind mapping).
"""
from __future__ import annotations
import json
from dataclasses import asdict, dataclass, field, fields
from typing import Any, Optional
SCHEMA_VERSION = 2 # v2: digest carries error_snippets (WP-0006 T01)
# Supported agent flavors. ``session_uid`` is always "<flavor>:<native id>".
FLAVORS = ("claude", "codex", "grok")
# SessionEvent.kind universe (design §4.2 / §4.3).
KINDS = (
"user_msg",
"assistant_msg",
"thinking",
"tool_call",
"tool_result",
"error",
"test_run",
"edit",
"retry",
"human_intervention",
"decision",
"lifecycle",
"completion",
)
# Session.outcome universe.
OUTCOMES = ("success", "fail", "abandoned", "unknown")
@dataclass
class Cost:
"""Token + effort accounting for a session."""
input_tokens: int = 0
output_tokens: int = 0
cache_tokens: int = 0
wall_clock_s: float = 0.0
turns: int = 0
retries: int = 0
@dataclass
class Session:
"""One bounded run of a coding agent against a repo (design §4.1)."""
session_uid: str # "<flavor>:<native id>" — globally unique
flavor: str
native_session_id: str
repo: Optional[str] = None
domain: Optional[str] = None
cwd: Optional[str] = None
git_branch: Optional[str] = None
model: Optional[str] = None
started_at: Optional[str] = None # ISO-8601 UTC
ended_at: Optional[str] = None
outcome: str = "unknown"
cost: Cost = field(default_factory=Cost)
task_ref: Optional[str] = None
source_path: Optional[str] = None
source_bytes: int = 0
schema_version: int = SCHEMA_VERSION
# watermarks (design §3.1): discovered -> ingested -> analyzed -> evicted
discovered_at: Optional[str] = None
ingested_at: Optional[str] = None
analyzed_at: Optional[str] = None
evicted_at: Optional[str] = None
def __post_init__(self) -> None:
if self.flavor not in FLAVORS:
raise ValueError(f"unknown flavor {self.flavor!r}; expected one of {FLAVORS}")
if self.outcome not in OUTCOMES:
raise ValueError(f"unknown outcome {self.outcome!r}; expected one of {OUTCOMES}")
expected_prefix = f"{self.flavor}:"
if not self.session_uid.startswith(expected_prefix):
raise ValueError(
f"session_uid {self.session_uid!r} must start with {expected_prefix!r}"
)
@property
def is_evictable(self) -> bool:
"""A session may be evicted from Tier 1 only once analyzed (design §3.1)."""
return self.analyzed_at is not None and self.evicted_at is None
@staticmethod
def make_uid(flavor: str, native_session_id: str) -> str:
return f"{flavor}:{native_session_id}"
def to_dict(self) -> dict[str, Any]:
d = asdict(self)
return d
def to_json(self) -> str:
return json.dumps(self.to_dict(), sort_keys=True)
@classmethod
def from_dict(cls, d: dict[str, Any]) -> "Session":
d = dict(d)
cost = d.pop("cost", None)
obj = cls(**{k: v for k, v in d.items() if k in _SESSION_FIELDS})
if cost is not None:
obj.cost = Cost(**{k: v for k, v in cost.items() if k in _COST_FIELDS})
return obj
@classmethod
def from_json(cls, s: str) -> "Session":
return cls.from_dict(json.loads(s))
@dataclass
class SessionEvent:
"""One atomic record within a session (design §4.2)."""
session_uid: str
seq: int # monotonic within session
ts: Optional[str] = None
kind: str = "lifecycle"
parent_seq: Optional[int] = None # turn DAG (Claude); None for flat flavors
role: Optional[str] = None # user|assistant|system|tool
tool: Optional[str] = None # when kind in {tool_call, tool_result}
summary: Optional[str] = None # short, human-readable
payload_ref: Optional[str] = None # pointer to full body in Tier 1 blob store
tokens: int = 0
is_sidechain: bool = False
def __post_init__(self) -> None:
if self.kind not in KINDS:
raise ValueError(f"unknown kind {self.kind!r}; expected one of {KINDS}")
def to_dict(self) -> dict[str, Any]:
return asdict(self)
def to_json(self) -> str:
return json.dumps(self.to_dict(), sort_keys=True)
@classmethod
def from_dict(cls, d: dict[str, Any]) -> "SessionEvent":
return cls(**{k: v for k, v in d.items() if k in _EVENT_FIELDS})
@classmethod
def from_json(cls, s: str) -> "SessionEvent":
return cls.from_dict(json.loads(s))
_SESSION_FIELDS = {f.name for f in fields(Session)}
_COST_FIELDS = {f.name for f in fields(Cost)}
_EVENT_FIELDS = {f.name for f in fields(SessionEvent)}