Files
state-hub/tests/test_token_sources.py

140 lines
4.9 KiB
Python

from __future__ import annotations
import json
from api.services.token_sources import parse_iso
from api.services.token_sources.attribution import RepoRef, normalise_cwd, resolve_repo
from api.services.token_sources.claude import parse_claude_transcript
from api.services.token_sources.codex import collect_codex_sessions, parse_codex_session
def _write_jsonl(path, rows):
path.parent.mkdir(parents=True, exist_ok=True)
with path.open("w", encoding="utf-8") as handle:
for row in rows:
if row == "BAD":
handle.write("{not json}\n")
else:
handle.write(json.dumps(row) + "\n")
def test_parse_codex_session_sums_token_count_records(tmp_path):
path = tmp_path / "sessions" / "2026" / "05" / "23" / "rollout-local.jsonl"
_write_jsonl(
path,
[
{"type": "session_meta", "payload": {"id": "s1", "cwd": "/repo", "timestamp": "2026-05-23T00:00:00Z"}},
{"type": "turn_context", "payload": {"cwd": "/repo", "model": "gpt-5.3-codex"}},
{
"type": "event_msg",
"timestamp": "2026-05-22T23:00:00Z",
"payload": {"type": "token_count", "info": {"last_token_usage": {"input_tokens": 99, "output_tokens": 1}}},
},
"BAD",
{
"type": "event_msg",
"timestamp": "2026-05-23T01:00:00Z",
"payload": {
"type": "token_count",
"info": {
"last_token_usage": {
"input_tokens": 100,
"output_tokens": 40,
"cached_input_tokens": 15,
"reasoning_output_tokens": 7,
"total_tokens": 155,
}
},
},
},
],
)
record = parse_codex_session(path, parse_iso("2026-05-23"))
assert record is not None
assert record.source_id == "codex:s1"
assert record.tokens_in == 100
assert record.tokens_out == 40
assert record.cached_input_tokens == 15
assert record.reasoning_output_tokens == 7
assert record.raw_total_tokens == 155
assert record.raw_metadata["malformed_lines"] == 1
def test_collect_codex_sessions_dedupes_archived_and_live(tmp_path):
live = tmp_path / "sessions" / "2026" / "05" / "23" / "rollout-live.jsonl"
archived = tmp_path / "archived_sessions" / "rollout-archived.jsonl"
rows = [
{"type": "session_meta", "payload": {"id": "same", "cwd": "/repo", "timestamp": "2026-05-23T00:00:00Z"}},
{
"type": "event_msg",
"timestamp": "2026-05-23T01:00:00Z",
"payload": {"type": "token_count", "info": {"last_token_usage": {"input_tokens": 10, "output_tokens": 5}}},
},
]
_write_jsonl(live, rows)
_write_jsonl(
archived,
rows + [
{
"type": "event_msg",
"timestamp": "2026-05-23T02:00:00Z",
"payload": {"type": "token_count", "info": {"last_token_usage": {"input_tokens": 20, "output_tokens": 5}}},
}
],
)
records = collect_codex_sessions(tmp_path, parse_iso("2026-05-23"))
assert len(records) == 1
assert records[0].source_id == "codex:same"
assert records[0].tokens_total == 40
def test_parse_claude_transcript_sums_usage_without_content(tmp_path):
path = tmp_path / "projects" / "repo" / "session.jsonl"
_write_jsonl(
path,
[
{
"timestamp": "2026-05-23T01:00:00Z",
"session_id": "c1",
"cwd": "/repo",
"message": {
"model": "claude-sonnet",
"content": "do not store me",
"usage": {
"input_tokens": 30,
"cache_creation_input_tokens": 5,
"cache_read_input_tokens": 7,
"output_tokens": 11,
},
},
}
],
)
record = parse_claude_transcript(path, parse_iso("2026-05-23"))
assert record is not None
assert record.source_id == "claude:c1"
assert record.tokens_in == 30
assert record.cached_input_tokens == 12
assert record.tokens_out == 11
assert "content" not in record.raw_metadata
def test_resolve_repo_uses_normalised_path_prefix():
refs = [
RepoRef(repo_id="1", slug="state-hub", local_path="/home/worsch/state-hub"),
RepoRef(repo_id="2", slug="other", local_path="/home/worsch/other"),
]
match = resolve_repo("//wsl.localhost/Ubuntu-24.04/home/worsch/state-hub/api", refs)
assert normalise_cwd("//wsl.localhost/Ubuntu-24.04/home/worsch/state-hub") == "/home/worsch/state-hub"
assert match is not None
assert match.repo_id == "1"
assert match.method == "path_prefix"