session-memory Phase 1: Codex adapter (T01) + multi-file merge (T03)

- adapters/common.py: shared Normalized + helpers (resolve_repo, classify_tool,
  jsonl iter, etc.); claude.py refactored to use it (Normalized re-exported)
- adapters/codex.py: rollout {timestamp,type,payload} parser; session_meta/
  response_item/event_msg mapping; flat call_id join; token_count cost;
  registered in ingest dispatch
- core/store.py: ingest() now merges multi-file sessions by content
  fingerprint, appends new events with offset seq (design OQ6); idempotent
- tests/test_codex_adapter.py, tests/test_merge.py

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-06 21:55:32 +02:00
parent 5aea22f24f
commit bc11cb9aec
8 changed files with 521 additions and 90 deletions

View File

@@ -11,54 +11,23 @@ that the store persists out-of-line so Tier 1 rows stay light.
from __future__ import annotations
import json
import os
from dataclasses import dataclass, field
from datetime import datetime, timezone
from typing import Any, Iterable, Optional
from typing import Any, Optional
from ..core.schema import Cost, Session, SessionEvent
from .common import ( # noqa: F401 (Normalized re-exported for back-compat)
Normalized,
classify_tool,
first_line as _first_line,
iter_jsonl as _iter_records,
now_iso as _now,
resolve_repo as _resolve_repo,
seconds_between as _seconds_between,
stringify as _stringify,
)
FLAVOR = "claude"
# tool_use names that mutate files -> kind "edit"
_EDIT_TOOLS = {"Edit", "Write", "NotebookEdit", "MultiEdit"}
# crude test-runner detection inside Bash commands -> kind "test_run"
_TEST_HINTS = ("pytest", "unittest", "npm test", "npm run test", "go test", "cargo test", "jest", "vitest")
@dataclass
class Normalized:
session: Session
events: list[SessionEvent]
blobs: dict[str, str] = field(default_factory=dict)
def _iter_records(path: str) -> Iterable[dict[str, Any]]:
with open(path, "r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if not line:
continue
try:
yield json.loads(line)
except json.JSONDecodeError:
continue # tolerate partial/corrupt trailing lines
def _resolve_repo(cwd: Optional[str], repo_domain_map: dict[str, str]) -> tuple[Optional[str], Optional[str]]:
"""cwd -> (repo, domain). repo is the cwd basename; domain via map."""
if not cwd:
return None, None
repo = os.path.basename(cwd.rstrip("/")) or None
domain = repo_domain_map.get(repo) if repo else None
return repo, domain
def _is_test_command(text: str) -> bool:
low = text.lower()
return any(h in low for h in _TEST_HINTS)
def _content_blocks(message: dict[str, Any]) -> list[dict[str, Any]]:
content = message.get("content")
@@ -159,11 +128,8 @@ def parse_session(path: str, repo_domain_map: Optional[dict[str, str]] = None) -
name = b.get("name", "")
inp = b.get("input", {})
body = _stringify(inp)
kind = "tool_call"
if name in _EDIT_TOOLS:
kind = "edit"
elif name == "Bash" and _is_test_command(_stringify(inp.get("command", ""))):
kind = "test_run"
cmd = inp.get("command", "") if isinstance(inp, dict) else ""
kind = classify_tool(name, _stringify(cmd))
add_event(uuid, parent, ts, kind, role="assistant", tool=name,
summary=f"{name}", body=body, sidechain=sidechain)
@@ -194,35 +160,3 @@ def parse_session(path: str, repo_domain_map: Optional[dict[str, str]] = None) -
discovered_at=_now(),
)
return Normalized(session=session, events=events, blobs=blobs)
# ---- helpers ---------------------------------------------------------------
def _stringify(v: Any) -> str:
if v is None:
return ""
if isinstance(v, str):
return v
try:
return json.dumps(v, ensure_ascii=False)[:20000]
except (TypeError, ValueError):
return str(v)[:20000]
def _first_line(text: str) -> str:
return (text or "").strip().splitlines()[0] if (text or "").strip() else ""
def _seconds_between(start: Optional[str], end: Optional[str]) -> float:
if not start or not end:
return 0.0
try:
a = datetime.fromisoformat(start.replace("Z", "+00:00"))
b = datetime.fromisoformat(end.replace("Z", "+00:00"))
return max(0.0, (b - a).total_seconds())
except ValueError:
return 0.0
def _now() -> str:
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")