Files
agentic-resources/tests/test_merge.py
tegwick bc11cb9aec session-memory Phase 1: Codex adapter (T01) + multi-file merge (T03)
- adapters/common.py: shared Normalized + helpers (resolve_repo, classify_tool,
  jsonl iter, etc.); claude.py refactored to use it (Normalized re-exported)
- adapters/codex.py: rollout {timestamp,type,payload} parser; session_meta/
  response_item/event_msg mapping; flat call_id join; token_count cost;
  registered in ingest dispatch
- core/store.py: ingest() now merges multi-file sessions by content
  fingerprint, appends new events with offset seq (design OQ6); idempotent
- tests/test_codex_adapter.py, tests/test_merge.py

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-06 21:55:32 +02:00

67 lines
2.7 KiB
Python

"""Multi-file session merge tests (T03)."""
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from session_memory.adapters.common import Normalized # noqa: E402
from session_memory.core.schema import Session, SessionEvent # noqa: E402
from session_memory.core.store import Store # noqa: E402
def _part(native, kinds, base_blob="b"):
uid = Session.make_uid("claude", native)
s = Session(session_uid=uid, flavor="claude", native_session_id=native)
events, blobs = [], {}
for i, k in enumerate(kinds):
ref = f"blob://{native}/{i}"
events.append(SessionEvent(session_uid=uid, seq=i, parent_seq=(i - 1 if i else None),
kind=k, ts=f"2026-06-06T10:0{i}:00Z", payload_ref=ref))
blobs[ref] = f"{base_blob}-{k}-{i}"
return Normalized(session=s, events=events, blobs=blobs)
def test_second_file_appends_not_overwrites(tmp_path):
st = Store(str(tmp_path / "m.db"), str(tmp_path / "blobs"))
uid = Session.make_uid("claude", "s1")
# file 1: 3 events (seq 0..2)
n1 = _part("s1", ["user_msg", "assistant_msg", "tool_call"])
added1 = st.ingest(n1)
assert added1 == 3
assert st.count_events(uid) == 3
# file 2 for the SAME session: repeats event 0 + adds 2 new (continuation)
n2 = _part("s1", ["user_msg", "edit", "completion"])
# make the first event identical to file1's first event so it dedups
n2.events[0].kind = "user_msg"
n2.events[0].ts = "2026-06-06T10:00:00Z"
n2.blobs[n2.events[0].payload_ref] = "b-user_msg-0"
added2 = st.ingest(n2)
# only the 2 genuinely-new events appended; total grows additively
assert added2 == 2
assert st.count_events(uid) == 5
seqs = [e.seq for e in st.get_events(uid)]
assert seqs == [0, 1, 2, 3, 4] # contiguous, offset
def test_reingest_same_bundle_is_idempotent(tmp_path):
st = Store(str(tmp_path / "m.db"), str(tmp_path / "blobs"))
uid = Session.make_uid("claude", "s2")
n = _part("s2", ["user_msg", "assistant_msg"])
assert st.ingest(n) == 2
assert st.ingest(n) == 0 # nothing new on re-run
assert st.count_events(uid) == 2
def test_appended_event_parent_remapped_within_part(tmp_path):
st = Store(str(tmp_path / "m.db"), str(tmp_path / "blobs"))
uid = Session.make_uid("claude", "s3")
st.ingest(_part("s3", ["user_msg", "assistant_msg"])) # seq 0,1
st.ingest(_part("s3", ["x_unused"]) if False else _part("s3", ["thinking", "edit"])) # new 2,3
events = {e.seq: e for e in st.get_events(uid)}
# the 'edit' (seq 3) had parent_seq=0 within its part -> remapped to its part's first new seq (2)
assert events[3].parent_seq == 2