session-memory Phase 0: normalized schema (T01) + Claude adapter (T02)

- session_memory/core/schema.py: Session/SessionEvent/Cost dataclasses,
  flavor-prefixed uids, watermarks, kind/outcome validation (T01)
- session_memory/adapters/claude.py: JSONL -> Normalized bundle, turn DAG
  via uuid/parentUuid, kind mapping, cost from message.usage (T02)
- tests: schema round-trip + adapter (synthetic + real local session)

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-06 19:06:10 +02:00
parent ffe191d44e
commit 1c29a94fa9
9 changed files with 598 additions and 3 deletions

View File

@@ -0,0 +1,99 @@
"""Claude adapter tests (T02): synthetic fixture + a real on-disk session."""
import glob
import json
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from session_memory.adapters.claude import parse_session # noqa: E402
REPO_MAP = {"agentic-resources": "helix_forge"}
def _write_jsonl(path, records):
with open(path, "w", encoding="utf-8") as f:
for r in records:
f.write(json.dumps(r) + "\n")
def test_synthetic_session(tmp_path):
p = tmp_path / "11111111-2222-3333-4444-555555555555.jsonl"
_write_jsonl(p, [
{"type": "user", "uuid": "u1", "parentUuid": None,
"timestamp": "2026-06-06T10:00:00Z", "sessionId": "sess-1",
"cwd": "/home/worsch/agentic-resources", "gitBranch": "main",
"version": "1.0", "message": {"role": "user", "content": "fix the bug"}},
{"type": "assistant", "uuid": "a1", "parentUuid": "u1",
"timestamp": "2026-06-06T10:00:05Z", "sessionId": "sess-1",
"message": {"role": "assistant", "model": "claude-opus-4-8",
"usage": {"input_tokens": 100, "output_tokens": 20,
"cache_read_input_tokens": 10},
"content": [
{"type": "thinking", "thinking": "let me look"},
{"type": "text", "text": "I'll edit the file."},
{"type": "tool_use", "name": "Edit",
"input": {"file_path": "x.py", "old_string": "a", "new_string": "b"}},
{"type": "tool_use", "name": "Bash",
"input": {"command": "pytest -q"}},
]}},
{"type": "user", "uuid": "u2", "parentUuid": "a1",
"timestamp": "2026-06-06T10:00:10Z", "sessionId": "sess-1",
"message": {"role": "user",
"content": [{"type": "tool_result", "content": "6 passed"}]}},
])
norm = parse_session(str(p), REPO_MAP)
assert norm is not None
s = norm.session
assert s.session_uid == "claude:sess-1"
assert s.repo == "agentic-resources" and s.domain == "helix_forge"
assert s.model == "claude-opus-4-8"
assert s.cost.input_tokens == 100 and s.cost.output_tokens == 20
assert s.cost.cache_tokens == 10
assert s.cost.turns == 1
assert s.cost.wall_clock_s == 10.0
kinds = [e.kind for e in norm.events]
assert kinds == ["user_msg", "thinking", "assistant_msg", "edit", "test_run", "tool_result"]
# turn DAG: assistant events link back to the first user msg (seq 0)
edit_ev = next(e for e in norm.events if e.kind == "edit")
assert edit_ev.parent_seq == 0
assert edit_ev.tool == "Edit"
# bodies captured as blobs, referenced by payload_ref
assert edit_ev.payload_ref in norm.blobs
assert "x.py" in norm.blobs[edit_ev.payload_ref]
def test_sidechain_filename_marks_events(tmp_path):
p = tmp_path / "agent-deadbeef.jsonl"
_write_jsonl(p, [
{"type": "assistant", "uuid": "a1", "sessionId": "side-1",
"timestamp": "2026-06-06T10:00:00Z",
"message": {"role": "assistant", "content": [{"type": "text", "text": "hi"}]}},
])
norm = parse_session(str(p), REPO_MAP)
assert norm.events[0].is_sidechain is True
def test_real_local_session_if_available():
"""Smoke-parse a real Claude transcript on this workstation, if present."""
base = os.path.expanduser("~/.claude/projects/-home-worsch-agentic-resources")
files = sorted(glob.glob(os.path.join(base, "*.jsonl")))
if not files:
return # environment without local sessions; synthetic tests cover logic
parsed = 0
for fp in files:
norm = parse_session(fp, REPO_MAP)
if norm is None:
continue
parsed += 1
assert norm.session.session_uid.startswith("claude:")
# seq is monotonic and unique
seqs = [e.seq for e in norm.events]
assert seqs == sorted(seqs)
assert len(seqs) == len(set(seqs))
assert parsed >= 1

97
tests/test_schema.py Normal file
View File

@@ -0,0 +1,97 @@
"""Round-trip + validation tests for the normalized schema (T01)."""
import os
import sys
import pytest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from session_memory.core.schema import ( # noqa: E402
SCHEMA_VERSION,
Cost,
Session,
SessionEvent,
)
def _sample_session() -> Session:
return Session(
session_uid=Session.make_uid("claude", "abc-123"),
flavor="claude",
native_session_id="abc-123",
repo="agentic-resources",
domain="helix_forge",
cwd="/home/worsch/agentic-resources",
git_branch="main",
model="claude-opus-4-8",
started_at="2026-06-06T10:00:00Z",
ended_at="2026-06-06T10:15:00Z",
outcome="success",
cost=Cost(input_tokens=100, output_tokens=50, turns=3, retries=1),
task_ref="AGENTIC-WP-0002-T01",
source_path="~/.claude/projects/x/abc-123.jsonl",
source_bytes=2048,
ingested_at="2026-06-06T10:16:00Z",
)
def test_session_round_trip():
s = _sample_session()
restored = Session.from_json(s.to_json())
assert restored == s
assert restored.cost == s.cost
assert restored.schema_version == SCHEMA_VERSION
def test_session_uid_helper_and_prefix_enforced():
assert Session.make_uid("grok", "z9") == "grok:z9"
with pytest.raises(ValueError):
Session(session_uid="codex:wrong", flavor="claude", native_session_id="wrong")
def test_unknown_flavor_and_outcome_rejected():
with pytest.raises(ValueError):
Session(session_uid="x:1", flavor="x", native_session_id="1")
with pytest.raises(ValueError):
Session(
session_uid="claude:1",
flavor="claude",
native_session_id="1",
outcome="bogus",
)
def test_is_evictable_requires_analyzed_not_evicted():
s = _sample_session()
assert s.is_evictable is False # not analyzed yet
s.analyzed_at = "2026-06-06T10:17:00Z"
assert s.is_evictable is True
s.evicted_at = "2026-06-06T11:00:00Z"
assert s.is_evictable is False # already evicted
def test_event_round_trip_and_kind_validation():
e = SessionEvent(
session_uid="claude:abc-123",
seq=4,
parent_seq=3,
ts="2026-06-06T10:01:00Z",
kind="tool_call",
role="assistant",
tool="Bash",
summary="ran pytest -q",
payload_ref="blob://abc-123/4",
tokens=12,
)
assert SessionEvent.from_json(e.to_json()) == e
with pytest.raises(ValueError):
SessionEvent(session_uid="claude:1", seq=0, kind="not_a_kind")
def test_from_dict_ignores_unknown_fields():
d = _sample_session().to_dict()
d["future_field"] = "ignored"
d["cost"]["future_cost"] = 999
restored = Session.from_dict(d)
assert restored.repo == "agentic-resources"