from __future__ import annotations import json from api.services.token_sources import parse_iso from api.services.token_sources.attribution import RepoRef, normalise_cwd, resolve_repo from api.services.token_sources.claude import parse_claude_transcript from api.services.token_sources.codex import collect_codex_sessions, parse_codex_session def _write_jsonl(path, rows): path.parent.mkdir(parents=True, exist_ok=True) with path.open("w", encoding="utf-8") as handle: for row in rows: if row == "BAD": handle.write("{not json}\n") else: handle.write(json.dumps(row) + "\n") def test_parse_codex_session_sums_token_count_records(tmp_path): path = tmp_path / "sessions" / "2026" / "05" / "23" / "rollout-local.jsonl" _write_jsonl( path, [ {"type": "session_meta", "payload": {"id": "s1", "cwd": "/repo", "timestamp": "2026-05-23T00:00:00Z"}}, {"type": "turn_context", "payload": {"cwd": "/repo", "model": "gpt-5.3-codex"}}, { "type": "event_msg", "timestamp": "2026-05-22T23:00:00Z", "payload": {"type": "token_count", "info": {"last_token_usage": {"input_tokens": 99, "output_tokens": 1}}}, }, "BAD", { "type": "event_msg", "timestamp": "2026-05-23T01:00:00Z", "payload": { "type": "token_count", "info": { "last_token_usage": { "input_tokens": 100, "output_tokens": 40, "cached_input_tokens": 15, "reasoning_output_tokens": 7, "total_tokens": 155, } }, }, }, ], ) record = parse_codex_session(path, parse_iso("2026-05-23")) assert record is not None assert record.source_id == "codex:s1" assert record.tokens_in == 100 assert record.tokens_out == 40 assert record.cached_input_tokens == 15 assert record.reasoning_output_tokens == 7 assert record.raw_total_tokens == 155 assert record.raw_metadata["malformed_lines"] == 1 def test_collect_codex_sessions_dedupes_archived_and_live(tmp_path): live = tmp_path / "sessions" / "2026" / "05" / "23" / "rollout-live.jsonl" archived = tmp_path / "archived_sessions" / "rollout-archived.jsonl" rows = [ {"type": "session_meta", "payload": {"id": "same", "cwd": "/repo", "timestamp": "2026-05-23T00:00:00Z"}}, { "type": "event_msg", "timestamp": "2026-05-23T01:00:00Z", "payload": {"type": "token_count", "info": {"last_token_usage": {"input_tokens": 10, "output_tokens": 5}}}, }, ] _write_jsonl(live, rows) _write_jsonl( archived, rows + [ { "type": "event_msg", "timestamp": "2026-05-23T02:00:00Z", "payload": {"type": "token_count", "info": {"last_token_usage": {"input_tokens": 20, "output_tokens": 5}}}, } ], ) records = collect_codex_sessions(tmp_path, parse_iso("2026-05-23")) assert len(records) == 1 assert records[0].source_id == "codex:same" assert records[0].tokens_total == 40 def test_parse_claude_transcript_sums_usage_without_content(tmp_path): path = tmp_path / "projects" / "repo" / "session.jsonl" _write_jsonl( path, [ { "timestamp": "2026-05-23T01:00:00Z", "session_id": "c1", "cwd": "/repo", "message": { "model": "claude-sonnet", "content": "do not store me", "usage": { "input_tokens": 30, "cache_creation_input_tokens": 5, "cache_read_input_tokens": 7, "output_tokens": 11, }, }, } ], ) record = parse_claude_transcript(path, parse_iso("2026-05-23")) assert record is not None assert record.source_id == "claude:c1" assert record.tokens_in == 30 assert record.cached_input_tokens == 12 assert record.tokens_out == 11 assert "content" not in record.raw_metadata def test_resolve_repo_uses_normalised_path_prefix(): refs = [ RepoRef(repo_id="1", slug="state-hub", local_path="/home/worsch/state-hub"), RepoRef(repo_id="2", slug="other", local_path="/home/worsch/other"), ] match = resolve_repo("//wsl.localhost/Ubuntu-24.04/home/worsch/state-hub/api", refs) assert normalise_cwd("//wsl.localhost/Ubuntu-24.04/home/worsch/state-hub") == "/home/worsch/state-hub" assert match is not None assert match.repo_id == "1" assert match.method == "path_prefix"