generated from coulomb/repo-seed
140 lines
4.9 KiB
Python
140 lines
4.9 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
|
|
from api.services.token_sources import parse_iso
|
|
from api.services.token_sources.attribution import RepoRef, normalise_cwd, resolve_repo
|
|
from api.services.token_sources.claude import parse_claude_transcript
|
|
from api.services.token_sources.codex import collect_codex_sessions, parse_codex_session
|
|
|
|
|
|
def _write_jsonl(path, rows):
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
with path.open("w", encoding="utf-8") as handle:
|
|
for row in rows:
|
|
if row == "BAD":
|
|
handle.write("{not json}\n")
|
|
else:
|
|
handle.write(json.dumps(row) + "\n")
|
|
|
|
|
|
def test_parse_codex_session_sums_token_count_records(tmp_path):
|
|
path = tmp_path / "sessions" / "2026" / "05" / "23" / "rollout-local.jsonl"
|
|
_write_jsonl(
|
|
path,
|
|
[
|
|
{"type": "session_meta", "payload": {"id": "s1", "cwd": "/repo", "timestamp": "2026-05-23T00:00:00Z"}},
|
|
{"type": "turn_context", "payload": {"cwd": "/repo", "model": "gpt-5.3-codex"}},
|
|
{
|
|
"type": "event_msg",
|
|
"timestamp": "2026-05-22T23:00:00Z",
|
|
"payload": {"type": "token_count", "info": {"last_token_usage": {"input_tokens": 99, "output_tokens": 1}}},
|
|
},
|
|
"BAD",
|
|
{
|
|
"type": "event_msg",
|
|
"timestamp": "2026-05-23T01:00:00Z",
|
|
"payload": {
|
|
"type": "token_count",
|
|
"info": {
|
|
"last_token_usage": {
|
|
"input_tokens": 100,
|
|
"output_tokens": 40,
|
|
"cached_input_tokens": 15,
|
|
"reasoning_output_tokens": 7,
|
|
"total_tokens": 155,
|
|
}
|
|
},
|
|
},
|
|
},
|
|
],
|
|
)
|
|
|
|
record = parse_codex_session(path, parse_iso("2026-05-23"))
|
|
|
|
assert record is not None
|
|
assert record.source_id == "codex:s1"
|
|
assert record.tokens_in == 100
|
|
assert record.tokens_out == 40
|
|
assert record.cached_input_tokens == 15
|
|
assert record.reasoning_output_tokens == 7
|
|
assert record.raw_total_tokens == 155
|
|
assert record.raw_metadata["malformed_lines"] == 1
|
|
|
|
|
|
def test_collect_codex_sessions_dedupes_archived_and_live(tmp_path):
|
|
live = tmp_path / "sessions" / "2026" / "05" / "23" / "rollout-live.jsonl"
|
|
archived = tmp_path / "archived_sessions" / "rollout-archived.jsonl"
|
|
rows = [
|
|
{"type": "session_meta", "payload": {"id": "same", "cwd": "/repo", "timestamp": "2026-05-23T00:00:00Z"}},
|
|
{
|
|
"type": "event_msg",
|
|
"timestamp": "2026-05-23T01:00:00Z",
|
|
"payload": {"type": "token_count", "info": {"last_token_usage": {"input_tokens": 10, "output_tokens": 5}}},
|
|
},
|
|
]
|
|
_write_jsonl(live, rows)
|
|
_write_jsonl(
|
|
archived,
|
|
rows + [
|
|
{
|
|
"type": "event_msg",
|
|
"timestamp": "2026-05-23T02:00:00Z",
|
|
"payload": {"type": "token_count", "info": {"last_token_usage": {"input_tokens": 20, "output_tokens": 5}}},
|
|
}
|
|
],
|
|
)
|
|
|
|
records = collect_codex_sessions(tmp_path, parse_iso("2026-05-23"))
|
|
|
|
assert len(records) == 1
|
|
assert records[0].source_id == "codex:same"
|
|
assert records[0].tokens_total == 40
|
|
|
|
|
|
def test_parse_claude_transcript_sums_usage_without_content(tmp_path):
|
|
path = tmp_path / "projects" / "repo" / "session.jsonl"
|
|
_write_jsonl(
|
|
path,
|
|
[
|
|
{
|
|
"timestamp": "2026-05-23T01:00:00Z",
|
|
"session_id": "c1",
|
|
"cwd": "/repo",
|
|
"message": {
|
|
"model": "claude-sonnet",
|
|
"content": "do not store me",
|
|
"usage": {
|
|
"input_tokens": 30,
|
|
"cache_creation_input_tokens": 5,
|
|
"cache_read_input_tokens": 7,
|
|
"output_tokens": 11,
|
|
},
|
|
},
|
|
}
|
|
],
|
|
)
|
|
|
|
record = parse_claude_transcript(path, parse_iso("2026-05-23"))
|
|
|
|
assert record is not None
|
|
assert record.source_id == "claude:c1"
|
|
assert record.tokens_in == 30
|
|
assert record.cached_input_tokens == 12
|
|
assert record.tokens_out == 11
|
|
assert "content" not in record.raw_metadata
|
|
|
|
|
|
def test_resolve_repo_uses_normalised_path_prefix():
|
|
refs = [
|
|
RepoRef(repo_id="1", slug="state-hub", local_path="/home/worsch/state-hub"),
|
|
RepoRef(repo_id="2", slug="other", local_path="/home/worsch/other"),
|
|
]
|
|
|
|
match = resolve_repo("//wsl.localhost/Ubuntu-24.04/home/worsch/state-hub/api", refs)
|
|
|
|
assert normalise_cwd("//wsl.localhost/Ubuntu-24.04/home/worsch/state-hub") == "/home/worsch/state-hub"
|
|
assert match is not None
|
|
assert match.repo_id == "1"
|
|
assert match.method == "path_prefix"
|