generated from coulomb/repo-seed
session-memory Phase 1: Grok adapter (T02)
- adapters/grok.py: reads the per-session dir (summary.json + chat_history.jsonl + events.jsonl + updates.jsonl); conversation from chat_history, lifecycle/ turn from events, tool-call names paired in order from updates ACP stream - registered in ingest dispatch; codex+grok sources enabled in config.toml - tests/test_grok_adapter.py (synthetic + real local sessions) - live multi-flavor dry-run discovers 89 sessions across flavors Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
182
session_memory/adapters/grok.py
Normal file
182
session_memory/adapters/grok.py
Normal file
@@ -0,0 +1,182 @@
|
||||
"""Grok CLI collector adapter — Tier 0 -> Tier 1 (design §2.3, §4.3).
|
||||
|
||||
A Grok session is a *directory* ``~/.grok/sessions/<enc-cwd>/<uuid>/`` containing
|
||||
``summary.json`` (metadata), ``chat_history.jsonl`` (the canonical transcript),
|
||||
``events.jsonl`` (explicit lifecycle + ``turn_number``), and ``updates.jsonl``
|
||||
(ACP ``session/update`` stream, which carries tool-call names/args).
|
||||
|
||||
The ingest glob matches ``chat_history.jsonl``; this adapter derives its sibling
|
||||
files from the same directory. Conversation order is taken from
|
||||
``chat_history.jsonl``; tool-call names are paired, in order, from
|
||||
``updates.jsonl`` ``tool_call`` entries to classify edits/test runs.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from typing import Any, Optional
|
||||
|
||||
from ..core.schema import Cost, Session, SessionEvent
|
||||
from .common import (
|
||||
Normalized,
|
||||
classify_tool,
|
||||
first_line,
|
||||
iter_jsonl,
|
||||
now_iso,
|
||||
resolve_repo,
|
||||
seconds_between,
|
||||
stringify,
|
||||
)
|
||||
|
||||
FLAVOR = "grok"
|
||||
|
||||
|
||||
def _text_content(content: Any) -> str:
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
if isinstance(content, list):
|
||||
return "\n".join(
|
||||
(b.get("text") or "") for b in content if isinstance(b, dict)
|
||||
)
|
||||
return ""
|
||||
|
||||
|
||||
def _tool_calls_in_order(session_dir: str) -> list[dict[str, Any]]:
|
||||
"""Ordered list of {title, rawInput} from updates.jsonl tool_call entries."""
|
||||
calls: list[dict[str, Any]] = []
|
||||
upd = os.path.join(session_dir, "updates.jsonl")
|
||||
if not os.path.exists(upd):
|
||||
return calls
|
||||
for rec in iter_jsonl(upd):
|
||||
u = (rec.get("params") or {}).get("update") or {}
|
||||
if u.get("sessionUpdate") == "tool_call":
|
||||
calls.append({"title": u.get("title") or "", "rawInput": u.get("rawInput") or {},
|
||||
"id": u.get("toolCallId")})
|
||||
return calls
|
||||
|
||||
|
||||
def _session_meta(session_dir: str) -> dict[str, Any]:
|
||||
p = os.path.join(session_dir, "summary.json")
|
||||
if not os.path.exists(p):
|
||||
return {}
|
||||
try:
|
||||
with open(p, "r", encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
except (OSError, ValueError):
|
||||
return {}
|
||||
|
||||
|
||||
def _lifecycle(session_dir: str) -> tuple[list[dict[str, Any]], Optional[str]]:
|
||||
"""events.jsonl records + the model id seen there."""
|
||||
evs, model = [], None
|
||||
p = os.path.join(session_dir, "events.jsonl")
|
||||
if os.path.exists(p):
|
||||
for rec in iter_jsonl(p):
|
||||
evs.append(rec)
|
||||
model = model or rec.get("model_id")
|
||||
return evs, model
|
||||
|
||||
|
||||
def parse_session(path: str, repo_domain_map: Optional[dict[str, str]] = None) -> Optional[Normalized]:
|
||||
repo_domain_map = repo_domain_map or {}
|
||||
# accept either the chat_history.jsonl path or the session dir
|
||||
session_dir = path if os.path.isdir(path) else os.path.dirname(path)
|
||||
chat = os.path.join(session_dir, "chat_history.jsonl")
|
||||
if not os.path.exists(chat):
|
||||
return None
|
||||
|
||||
meta = _session_meta(session_dir)
|
||||
info = meta.get("info") or {}
|
||||
session_id = info.get("id") or os.path.basename(session_dir.rstrip("/"))
|
||||
cwd = info.get("cwd") or meta.get("git_root_dir")
|
||||
life_events, life_model = _lifecycle(session_dir)
|
||||
model = meta.get("current_model_id") or life_model
|
||||
pending_calls = _tool_calls_in_order(session_dir)
|
||||
call_idx = 0
|
||||
|
||||
events: list[SessionEvent] = []
|
||||
blobs: dict[str, str] = {}
|
||||
seq = 0
|
||||
|
||||
def add(kind, *, role=None, tool=None, summary=None, body=None, parent_seq=None) -> int:
|
||||
nonlocal seq
|
||||
s = seq
|
||||
seq += 1
|
||||
ref = None
|
||||
if body:
|
||||
ref = f"blob://{session_id}/{s}"
|
||||
blobs[ref] = body
|
||||
events.append(SessionEvent(
|
||||
session_uid=Session.make_uid(FLAVOR, session_id), seq=s, parent_seq=parent_seq,
|
||||
ts=None, kind=kind, role=role, tool=tool,
|
||||
summary=(summary or "")[:300] or None, payload_ref=ref,
|
||||
))
|
||||
return s
|
||||
|
||||
# explicit lifecycle first (turn_started/turn_ended carry no bodies)
|
||||
for le in life_events:
|
||||
t = le.get("type")
|
||||
if t in ("turn_started", "loop_started", "turn_ended", "phase_changed"):
|
||||
add("lifecycle", summary=t)
|
||||
|
||||
for rec in iter_jsonl(chat):
|
||||
rtype = rec.get("type")
|
||||
content = rec.get("content")
|
||||
if rtype == "user":
|
||||
text = _text_content(content)
|
||||
if text.strip():
|
||||
add("user_msg", role="user", summary=first_line(text), body=text)
|
||||
elif rtype == "reasoning":
|
||||
text = _text_content(content)
|
||||
if text.strip():
|
||||
add("thinking", role="assistant", summary="reasoning", body=text)
|
||||
elif rtype == "assistant":
|
||||
text = _text_content(content)
|
||||
if text.strip():
|
||||
add("assistant_msg", role="assistant", summary=first_line(text), body=text)
|
||||
elif rtype == "tool_result":
|
||||
# pair with the next tool_call (in order) to recover name/args
|
||||
tool = None
|
||||
parent = None
|
||||
if call_idx < len(pending_calls):
|
||||
call = pending_calls[call_idx]
|
||||
call_idx += 1
|
||||
tool = call["title"]
|
||||
cmd = stringify(call["rawInput"])
|
||||
kind = classify_tool(tool, cmd)
|
||||
parent = add(kind, role="assistant", tool=tool, summary=tool, body=cmd)
|
||||
body = _text_content(content) if not isinstance(content, str) else content
|
||||
add("tool_result", role="tool", tool=tool, summary="tool result",
|
||||
body=stringify(body), parent_seq=parent)
|
||||
|
||||
if not events:
|
||||
return None
|
||||
|
||||
cost = Cost(turns=sum(1 for e in events if e.kind == "user_msg"))
|
||||
started = info.get("created_at") or meta.get("created_at")
|
||||
ended = meta.get("last_active_at") or info.get("updated_at") or meta.get("updated_at")
|
||||
cost.wall_clock_s = seconds_between(started, ended)
|
||||
|
||||
repo, domain = resolve_repo(cwd, repo_domain_map)
|
||||
session = Session(
|
||||
session_uid=Session.make_uid(FLAVOR, session_id), flavor=FLAVOR,
|
||||
native_session_id=session_id, repo=repo, domain=domain, cwd=cwd,
|
||||
git_branch=meta.get("head_branch"), model=model,
|
||||
started_at=started, ended_at=ended, outcome="unknown", cost=cost,
|
||||
source_path=chat,
|
||||
source_bytes=_dir_bytes(session_dir),
|
||||
discovered_at=now_iso(),
|
||||
)
|
||||
return Normalized(session=session, events=events, blobs=blobs)
|
||||
|
||||
|
||||
def _dir_bytes(d: str) -> int:
|
||||
total = 0
|
||||
for root, _, files in os.walk(d):
|
||||
for f in files:
|
||||
try:
|
||||
total += os.path.getsize(os.path.join(root, f))
|
||||
except OSError:
|
||||
pass
|
||||
return total
|
||||
@@ -20,14 +20,14 @@ root = "~/.claude/projects"
|
||||
# glob, relative to root; covers sessions and agent-* sidechains
|
||||
glob = "*/*.jsonl"
|
||||
|
||||
# Codex / Grok adapters land in Phase 1 (schemas confirmed in the design doc).
|
||||
# Codex / Grok adapters added in Phase 1 (AGENTIC-WP-0003).
|
||||
[sources.codex]
|
||||
enabled = false
|
||||
enabled = true
|
||||
root = "~/.codex/sessions"
|
||||
glob = "*/*/*/rollout-*.jsonl"
|
||||
|
||||
[sources.grok]
|
||||
enabled = false
|
||||
enabled = true
|
||||
root = "~/.grok/sessions"
|
||||
glob = "*/*/chat_history.jsonl"
|
||||
|
||||
@@ -37,3 +37,5 @@ agentic-resources = "helix_forge"
|
||||
the-custodian = "custodian"
|
||||
state-hub = "custodian"
|
||||
ops-bridge = "custodian"
|
||||
net-kingdom = "netkingdom"
|
||||
can-you-assist = "coulomb_social"
|
||||
|
||||
@@ -20,6 +20,7 @@ from typing import Any
|
||||
|
||||
from .adapters import claude as claude_adapter
|
||||
from .adapters import codex as codex_adapter
|
||||
from .adapters import grok as grok_adapter
|
||||
from .core import digest as digest_mod
|
||||
from .core.cursor import Cursors
|
||||
from .core.retention import RetentionConfig, sweep as retention_sweep
|
||||
@@ -29,6 +30,7 @@ from .core.store import Store
|
||||
_ADAPTERS = {
|
||||
"claude": claude_adapter.parse_session,
|
||||
"codex": codex_adapter.parse_session,
|
||||
"grok": grok_adapter.parse_session,
|
||||
}
|
||||
|
||||
|
||||
|
||||
92
tests/test_grok_adapter.py
Normal file
92
tests/test_grok_adapter.py
Normal file
@@ -0,0 +1,92 @@
|
||||
"""Grok adapter tests (T02): synthetic session dir + real local sessions."""
|
||||
|
||||
import glob
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from session_memory.adapters.grok import parse_session # noqa: E402
|
||||
|
||||
REPO_MAP = {"agentic-resources": "helix_forge", "net-kingdom": "netkingdom",
|
||||
"can-you-assist": "coulomb_social"}
|
||||
|
||||
|
||||
def _mk_session(dir_path, sid):
|
||||
os.makedirs(dir_path, exist_ok=True)
|
||||
with open(os.path.join(dir_path, "summary.json"), "w") as f:
|
||||
json.dump({"info": {"id": sid, "cwd": "/home/worsch/agentic-resources"},
|
||||
"created_at": "2026-06-06T10:00:00Z",
|
||||
"last_active_at": "2026-06-06T10:05:00Z",
|
||||
"current_model_id": "grok-build", "head_branch": "main"}, f)
|
||||
with open(os.path.join(dir_path, "events.jsonl"), "w") as f:
|
||||
f.write(json.dumps({"ts": "2026-06-06T10:00:00Z", "type": "turn_started",
|
||||
"turn_number": 0, "model_id": "grok-build"}) + "\n")
|
||||
f.write(json.dumps({"ts": "2026-06-06T10:05:00Z", "type": "turn_ended",
|
||||
"turn_number": 0}) + "\n")
|
||||
with open(os.path.join(dir_path, "chat_history.jsonl"), "w") as f:
|
||||
for rec in [
|
||||
{"type": "system", "content": "sys prompt"},
|
||||
{"type": "user", "content": [{"type": "text", "text": "fix the bug"}]},
|
||||
{"type": "reasoning", "content": [{"type": "text", "text": "thinking..."}]},
|
||||
{"type": "assistant", "content": ""}, # empty -> skipped
|
||||
{"type": "tool_result", "content": "The file x.py has been updated"},
|
||||
{"type": "assistant", "content": "done"},
|
||||
{"type": "tool_result", "content": "6 passed"},
|
||||
]:
|
||||
f.write(json.dumps(rec) + "\n")
|
||||
with open(os.path.join(dir_path, "updates.jsonl"), "w") as f:
|
||||
for u in [
|
||||
{"sessionUpdate": "tool_call", "toolCallId": "c1", "title": "edit_file",
|
||||
"rawInput": {"target_file": "x.py"}},
|
||||
{"sessionUpdate": "tool_call", "toolCallId": "c2", "title": "shell",
|
||||
"rawInput": {"command": "pytest -q"}},
|
||||
]:
|
||||
f.write(json.dumps({"timestamp": "t", "method": "session/update",
|
||||
"params": {"sessionId": sid, "update": u}}) + "\n")
|
||||
|
||||
|
||||
def test_grok_synthetic_dir(tmp_path):
|
||||
d = tmp_path / "%2Fhome%2Fworsch%2Fagentic-resources" / "sid-1"
|
||||
_mk_session(str(d), "sid-1")
|
||||
|
||||
norm = parse_session(str(d / "chat_history.jsonl"), REPO_MAP)
|
||||
assert norm is not None
|
||||
s = norm.session
|
||||
assert s.session_uid == "grok:sid-1"
|
||||
assert s.flavor == "grok"
|
||||
assert s.repo == "agentic-resources" and s.domain == "helix_forge"
|
||||
assert s.model == "grok-build"
|
||||
assert s.git_branch == "main"
|
||||
assert s.cost.turns == 1
|
||||
assert s.cost.wall_clock_s == 300.0
|
||||
|
||||
kinds = [e.kind for e in norm.events]
|
||||
# 4 lifecycle from events.jsonl? no: turn_started + turn_ended = 2 lifecycle
|
||||
assert kinds.count("lifecycle") == 2
|
||||
assert "user_msg" in kinds and "thinking" in kinds and "assistant_msg" in kinds
|
||||
# paired tool calls recovered names -> edit + test_run, each followed by tool_result
|
||||
assert "edit" in kinds and "test_run" in kinds
|
||||
edit = next(e for e in norm.events if e.kind == "edit")
|
||||
assert edit.tool == "edit_file"
|
||||
# tool_result after test_run links to it
|
||||
tr = [e for e in norm.events if e.kind == "tool_result"]
|
||||
assert len(tr) == 2
|
||||
|
||||
|
||||
def test_real_local_grok_sessions_if_available():
|
||||
base = os.path.expanduser("~/.grok/sessions")
|
||||
chats = glob.glob(os.path.join(base, "*", "*", "chat_history.jsonl"))
|
||||
if not chats:
|
||||
return
|
||||
parsed = 0
|
||||
for c in chats:
|
||||
norm = parse_session(c, REPO_MAP)
|
||||
if norm is None:
|
||||
continue
|
||||
parsed += 1
|
||||
assert norm.session.session_uid.startswith("grok:")
|
||||
seqs = [e.seq for e in norm.events]
|
||||
assert seqs == sorted(seqs) and len(seqs) == len(set(seqs))
|
||||
assert parsed >= 1
|
||||
@@ -52,7 +52,7 @@ order (no native DAG). Version-detect on `session_meta.cli_version`. Reuse the
|
||||
|
||||
```task
|
||||
id: AGENTIC-WP-0003-T02
|
||||
status: progress
|
||||
status: done
|
||||
priority: high
|
||||
state_hub_task_id: "fe3d7d1c-110e-4f16-8d56-062fa4a651aa"
|
||||
```
|
||||
|
||||
Reference in New Issue
Block a user