generated from coulomb/repo-seed
session-memory Phase 1: Codex adapter (T01) + multi-file merge (T03)
- adapters/common.py: shared Normalized + helpers (resolve_repo, classify_tool,
jsonl iter, etc.); claude.py refactored to use it (Normalized re-exported)
- adapters/codex.py: rollout {timestamp,type,payload} parser; session_meta/
response_item/event_msg mapping; flat call_id join; token_count cost;
registered in ingest dispatch
- core/store.py: ingest() now merges multi-file sessions by content
fingerprint, appends new events with offset seq (design OQ6); idempotent
- tests/test_codex_adapter.py, tests/test_merge.py
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
86
tests/test_codex_adapter.py
Normal file
86
tests/test_codex_adapter.py
Normal file
@@ -0,0 +1,86 @@
|
||||
"""Codex adapter tests (T01): synthetic rollout fixture."""
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from session_memory.adapters.codex import parse_session # noqa: E402
|
||||
|
||||
REPO_MAP = {"agentic-resources": "helix_forge"}
|
||||
|
||||
|
||||
def _rollout(path, lines):
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
for ln in lines:
|
||||
f.write(json.dumps(ln) + "\n")
|
||||
|
||||
|
||||
def test_codex_rollout_parse(tmp_path):
|
||||
p = tmp_path / "rollout-2026-06-06-abc.jsonl"
|
||||
_rollout(p, [
|
||||
{"timestamp": "2026-06-06T10:00:00Z", "type": "session_meta",
|
||||
"payload": {"id": "cdx-1", "cwd": "/home/worsch/agentic-resources",
|
||||
"model_provider": "openai", "cli_version": "0.44.0", "model": "gpt-5-codex"}},
|
||||
{"timestamp": "2026-06-06T10:00:01Z", "type": "turn_context",
|
||||
"payload": {"model": "gpt-5-codex", "approval_policy": "on-request"}},
|
||||
{"timestamp": "2026-06-06T10:00:02Z", "type": "event_msg",
|
||||
"payload": {"type": "task_started"}},
|
||||
{"timestamp": "2026-06-06T10:00:03Z", "type": "response_item",
|
||||
"payload": {"type": "message", "role": "user",
|
||||
"content": [{"type": "input_text", "text": "fix the bug"}]}},
|
||||
{"timestamp": "2026-06-06T10:00:04Z", "type": "response_item",
|
||||
"payload": {"type": "reasoning", "summary": "think about it"}},
|
||||
{"timestamp": "2026-06-06T10:00:05Z", "type": "response_item",
|
||||
"payload": {"type": "function_call", "name": "apply_patch",
|
||||
"arguments": "{\"path\":\"x.py\"}", "call_id": "call_1"}},
|
||||
{"timestamp": "2026-06-06T10:00:06Z", "type": "response_item",
|
||||
"payload": {"type": "function_call", "name": "shell",
|
||||
"arguments": "{\"command\":\"pytest -q\"}", "call_id": "call_2"}},
|
||||
{"timestamp": "2026-06-06T10:00:07Z", "type": "response_item",
|
||||
"payload": {"type": "function_call_output", "call_id": "call_2", "output": "2 passed"}},
|
||||
{"timestamp": "2026-06-06T10:00:08Z", "type": "response_item",
|
||||
"payload": {"type": "message", "role": "assistant",
|
||||
"content": [{"type": "output_text", "text": "done"}]}},
|
||||
{"timestamp": "2026-06-06T10:00:09Z", "type": "event_msg",
|
||||
"payload": {"type": "token_count",
|
||||
"info": {"total_token_usage": {"input_tokens": 200, "output_tokens": 30,
|
||||
"cached_input_tokens": 15}}}},
|
||||
{"timestamp": "2026-06-06T10:00:10Z", "type": "event_msg",
|
||||
"payload": {"type": "task_complete"}},
|
||||
])
|
||||
|
||||
norm = parse_session(str(p), REPO_MAP)
|
||||
assert norm is not None
|
||||
s = norm.session
|
||||
assert s.session_uid == "codex:cdx-1"
|
||||
assert s.flavor == "codex"
|
||||
assert s.repo == "agentic-resources" and s.domain == "helix_forge"
|
||||
assert s.model == "gpt-5-codex"
|
||||
assert s.cost.input_tokens == 200 and s.cost.output_tokens == 30 and s.cost.cache_tokens == 15
|
||||
assert s.cost.turns == 1
|
||||
assert s.cost.wall_clock_s == 10.0
|
||||
|
||||
kinds = [e.kind for e in norm.events]
|
||||
assert kinds == ["lifecycle", "user_msg", "thinking", "edit", "test_run",
|
||||
"tool_result", "assistant_msg", "completion"]
|
||||
|
||||
# flat linkage: function_call_output links to its function_call by call_id
|
||||
out = next(e for e in norm.events if e.kind == "tool_result")
|
||||
test_call = next(e for e in norm.events if e.kind == "test_run")
|
||||
assert out.parent_seq == test_call.seq
|
||||
|
||||
# apply_patch classified as edit; pytest as test_run
|
||||
edit = next(e for e in norm.events if e.kind == "edit")
|
||||
assert edit.tool == "apply_patch"
|
||||
|
||||
|
||||
def test_codex_empty_or_no_meta_returns_none(tmp_path):
|
||||
p = tmp_path / "rollout-empty.jsonl"
|
||||
p.write_text("")
|
||||
assert parse_session(str(p), REPO_MAP) is None
|
||||
|
||||
p2 = tmp_path / "rollout-nometa.jsonl"
|
||||
_rollout(p2, [{"timestamp": "t", "type": "event_msg", "payload": {"type": "task_started"}}])
|
||||
assert parse_session(str(p2), REPO_MAP) is None # no session_meta -> no id
|
||||
66
tests/test_merge.py
Normal file
66
tests/test_merge.py
Normal file
@@ -0,0 +1,66 @@
|
||||
"""Multi-file session merge tests (T03)."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from session_memory.adapters.common import Normalized # noqa: E402
|
||||
from session_memory.core.schema import Session, SessionEvent # noqa: E402
|
||||
from session_memory.core.store import Store # noqa: E402
|
||||
|
||||
|
||||
def _part(native, kinds, base_blob="b"):
|
||||
uid = Session.make_uid("claude", native)
|
||||
s = Session(session_uid=uid, flavor="claude", native_session_id=native)
|
||||
events, blobs = [], {}
|
||||
for i, k in enumerate(kinds):
|
||||
ref = f"blob://{native}/{i}"
|
||||
events.append(SessionEvent(session_uid=uid, seq=i, parent_seq=(i - 1 if i else None),
|
||||
kind=k, ts=f"2026-06-06T10:0{i}:00Z", payload_ref=ref))
|
||||
blobs[ref] = f"{base_blob}-{k}-{i}"
|
||||
return Normalized(session=s, events=events, blobs=blobs)
|
||||
|
||||
|
||||
def test_second_file_appends_not_overwrites(tmp_path):
|
||||
st = Store(str(tmp_path / "m.db"), str(tmp_path / "blobs"))
|
||||
uid = Session.make_uid("claude", "s1")
|
||||
|
||||
# file 1: 3 events (seq 0..2)
|
||||
n1 = _part("s1", ["user_msg", "assistant_msg", "tool_call"])
|
||||
added1 = st.ingest(n1)
|
||||
assert added1 == 3
|
||||
assert st.count_events(uid) == 3
|
||||
|
||||
# file 2 for the SAME session: repeats event 0 + adds 2 new (continuation)
|
||||
n2 = _part("s1", ["user_msg", "edit", "completion"])
|
||||
# make the first event identical to file1's first event so it dedups
|
||||
n2.events[0].kind = "user_msg"
|
||||
n2.events[0].ts = "2026-06-06T10:00:00Z"
|
||||
n2.blobs[n2.events[0].payload_ref] = "b-user_msg-0"
|
||||
added2 = st.ingest(n2)
|
||||
|
||||
# only the 2 genuinely-new events appended; total grows additively
|
||||
assert added2 == 2
|
||||
assert st.count_events(uid) == 5
|
||||
seqs = [e.seq for e in st.get_events(uid)]
|
||||
assert seqs == [0, 1, 2, 3, 4] # contiguous, offset
|
||||
|
||||
|
||||
def test_reingest_same_bundle_is_idempotent(tmp_path):
|
||||
st = Store(str(tmp_path / "m.db"), str(tmp_path / "blobs"))
|
||||
uid = Session.make_uid("claude", "s2")
|
||||
n = _part("s2", ["user_msg", "assistant_msg"])
|
||||
assert st.ingest(n) == 2
|
||||
assert st.ingest(n) == 0 # nothing new on re-run
|
||||
assert st.count_events(uid) == 2
|
||||
|
||||
|
||||
def test_appended_event_parent_remapped_within_part(tmp_path):
|
||||
st = Store(str(tmp_path / "m.db"), str(tmp_path / "blobs"))
|
||||
uid = Session.make_uid("claude", "s3")
|
||||
st.ingest(_part("s3", ["user_msg", "assistant_msg"])) # seq 0,1
|
||||
st.ingest(_part("s3", ["x_unused"]) if False else _part("s3", ["thinking", "edit"])) # new 2,3
|
||||
events = {e.seq: e for e in st.get_events(uid)}
|
||||
# the 'edit' (seq 3) had parent_seq=0 within its part -> remapped to its part's first new seq (2)
|
||||
assert events[3].parent_seq == 2
|
||||
Reference in New Issue
Block a user