generated from coulomb/repo-seed
- session_memory/core/schema.py: Session/SessionEvent/Cost dataclasses, flavor-prefixed uids, watermarks, kind/outcome validation (T01) - session_memory/adapters/claude.py: JSONL -> Normalized bundle, turn DAG via uuid/parentUuid, kind mapping, cost from message.usage (T02) - tests: schema round-trip + adapter (synthetic + real local session) Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
157 lines
4.8 KiB
Python
157 lines
4.8 KiB
Python
"""Normalized session schema (Tier 1) — design doc §4.
|
|
|
|
Two record kinds, ``Session`` and ``SessionEvent``, plus the small enums every
|
|
adapter targets. Field names here are the stable contract; per-flavor quirks are
|
|
absorbed inside each adapter (see design §4.3 native -> kind mapping).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from dataclasses import asdict, dataclass, field, fields
|
|
from typing import Any, Optional
|
|
|
|
SCHEMA_VERSION = 1
|
|
|
|
# Supported agent flavors. ``session_uid`` is always "<flavor>:<native id>".
|
|
FLAVORS = ("claude", "codex", "grok")
|
|
|
|
# SessionEvent.kind universe (design §4.2 / §4.3).
|
|
KINDS = (
|
|
"user_msg",
|
|
"assistant_msg",
|
|
"thinking",
|
|
"tool_call",
|
|
"tool_result",
|
|
"error",
|
|
"test_run",
|
|
"edit",
|
|
"retry",
|
|
"human_intervention",
|
|
"decision",
|
|
"lifecycle",
|
|
"completion",
|
|
)
|
|
|
|
# Session.outcome universe.
|
|
OUTCOMES = ("success", "fail", "abandoned", "unknown")
|
|
|
|
|
|
@dataclass
|
|
class Cost:
|
|
"""Token + effort accounting for a session."""
|
|
|
|
input_tokens: int = 0
|
|
output_tokens: int = 0
|
|
cache_tokens: int = 0
|
|
wall_clock_s: float = 0.0
|
|
turns: int = 0
|
|
retries: int = 0
|
|
|
|
|
|
@dataclass
|
|
class Session:
|
|
"""One bounded run of a coding agent against a repo (design §4.1)."""
|
|
|
|
session_uid: str # "<flavor>:<native id>" — globally unique
|
|
flavor: str
|
|
native_session_id: str
|
|
repo: Optional[str] = None
|
|
domain: Optional[str] = None
|
|
cwd: Optional[str] = None
|
|
git_branch: Optional[str] = None
|
|
model: Optional[str] = None
|
|
started_at: Optional[str] = None # ISO-8601 UTC
|
|
ended_at: Optional[str] = None
|
|
outcome: str = "unknown"
|
|
cost: Cost = field(default_factory=Cost)
|
|
task_ref: Optional[str] = None
|
|
source_path: Optional[str] = None
|
|
source_bytes: int = 0
|
|
schema_version: int = SCHEMA_VERSION
|
|
# watermarks (design §3.1): discovered -> ingested -> analyzed -> evicted
|
|
discovered_at: Optional[str] = None
|
|
ingested_at: Optional[str] = None
|
|
analyzed_at: Optional[str] = None
|
|
evicted_at: Optional[str] = None
|
|
|
|
def __post_init__(self) -> None:
|
|
if self.flavor not in FLAVORS:
|
|
raise ValueError(f"unknown flavor {self.flavor!r}; expected one of {FLAVORS}")
|
|
if self.outcome not in OUTCOMES:
|
|
raise ValueError(f"unknown outcome {self.outcome!r}; expected one of {OUTCOMES}")
|
|
expected_prefix = f"{self.flavor}:"
|
|
if not self.session_uid.startswith(expected_prefix):
|
|
raise ValueError(
|
|
f"session_uid {self.session_uid!r} must start with {expected_prefix!r}"
|
|
)
|
|
|
|
@property
|
|
def is_evictable(self) -> bool:
|
|
"""A session may be evicted from Tier 1 only once analyzed (design §3.1)."""
|
|
return self.analyzed_at is not None and self.evicted_at is None
|
|
|
|
@staticmethod
|
|
def make_uid(flavor: str, native_session_id: str) -> str:
|
|
return f"{flavor}:{native_session_id}"
|
|
|
|
def to_dict(self) -> dict[str, Any]:
|
|
d = asdict(self)
|
|
return d
|
|
|
|
def to_json(self) -> str:
|
|
return json.dumps(self.to_dict(), sort_keys=True)
|
|
|
|
@classmethod
|
|
def from_dict(cls, d: dict[str, Any]) -> "Session":
|
|
d = dict(d)
|
|
cost = d.pop("cost", None)
|
|
obj = cls(**{k: v for k, v in d.items() if k in _SESSION_FIELDS})
|
|
if cost is not None:
|
|
obj.cost = Cost(**{k: v for k, v in cost.items() if k in _COST_FIELDS})
|
|
return obj
|
|
|
|
@classmethod
|
|
def from_json(cls, s: str) -> "Session":
|
|
return cls.from_dict(json.loads(s))
|
|
|
|
|
|
@dataclass
|
|
class SessionEvent:
|
|
"""One atomic record within a session (design §4.2)."""
|
|
|
|
session_uid: str
|
|
seq: int # monotonic within session
|
|
ts: Optional[str] = None
|
|
kind: str = "lifecycle"
|
|
parent_seq: Optional[int] = None # turn DAG (Claude); None for flat flavors
|
|
role: Optional[str] = None # user|assistant|system|tool
|
|
tool: Optional[str] = None # when kind in {tool_call, tool_result}
|
|
summary: Optional[str] = None # short, human-readable
|
|
payload_ref: Optional[str] = None # pointer to full body in Tier 1 blob store
|
|
tokens: int = 0
|
|
is_sidechain: bool = False
|
|
|
|
def __post_init__(self) -> None:
|
|
if self.kind not in KINDS:
|
|
raise ValueError(f"unknown kind {self.kind!r}; expected one of {KINDS}")
|
|
|
|
def to_dict(self) -> dict[str, Any]:
|
|
return asdict(self)
|
|
|
|
def to_json(self) -> str:
|
|
return json.dumps(self.to_dict(), sort_keys=True)
|
|
|
|
@classmethod
|
|
def from_dict(cls, d: dict[str, Any]) -> "SessionEvent":
|
|
return cls(**{k: v for k, v in d.items() if k in _EVENT_FIELDS})
|
|
|
|
@classmethod
|
|
def from_json(cls, s: str) -> "SessionEvent":
|
|
return cls.from_dict(json.loads(s))
|
|
|
|
|
|
_SESSION_FIELDS = {f.name for f in fields(Session)}
|
|
_COST_FIELDS = {f.name for f in fields(Cost)}
|
|
_EVENT_FIELDS = {f.name for f in fields(SessionEvent)}
|