generated from coulomb/repo-seed
Fixed and improved token tracking
This commit is contained in:
@@ -55,8 +55,23 @@ class TestTokenEventsCreate:
|
||||
assert ev["tokens_in"] == 200
|
||||
assert ev["tokens_out"] == 100
|
||||
assert ev["tokens_total"] == 300
|
||||
assert ev["measurement_kind"] == "estimated"
|
||||
assert ev["source_provider"] == "manual"
|
||||
assert ev["raw_total_tokens"] == 300
|
||||
assert ev["id"] is not None
|
||||
|
||||
async def test_create_with_created_at_backfill_timestamp(self, client):
|
||||
created_at = "2026-05-19T01:02:03Z"
|
||||
ev = await _post_event(
|
||||
client,
|
||||
tokens_in=200,
|
||||
tokens_out=100,
|
||||
ref_type="session",
|
||||
ref_id="codex:test-session",
|
||||
created_at=created_at,
|
||||
)
|
||||
assert ev["created_at"].startswith("2026-05-19T01:02:03")
|
||||
|
||||
async def test_create_with_all_fields(self, client):
|
||||
await _create_domain(client)
|
||||
topic = await _create_topic(client)
|
||||
@@ -74,11 +89,76 @@ class TestTokenEventsCreate:
|
||||
ref_id=task["id"],
|
||||
note="T01 done",
|
||||
session_id="ses-abc",
|
||||
measurement_kind="measured",
|
||||
source_provider="manual",
|
||||
source_id="manual:test-event",
|
||||
confidence=0.95,
|
||||
cached_input_tokens=10,
|
||||
reasoning_output_tokens=20,
|
||||
raw_total_tokens=1530,
|
||||
raw_metadata={"source": "unit-test"},
|
||||
)
|
||||
assert ev["task_id"] == task["id"]
|
||||
assert ev["workstream_id"] == ws["id"] # auto-populated from task
|
||||
assert ev["model"] == "claude-sonnet-4-6"
|
||||
assert ev["tokens_total"] == 1500
|
||||
assert ev["measurement_kind"] == "measured"
|
||||
assert ev["source_provider"] == "manual"
|
||||
assert ev["source_id"] == "manual:test-event"
|
||||
assert ev["cached_input_tokens"] == 10
|
||||
assert ev["reasoning_output_tokens"] == 20
|
||||
assert ev["token_evidence_total"] == 1530
|
||||
assert ev["raw_metadata"] == {"source": "unit-test"}
|
||||
|
||||
async def test_upsert_source_event_updates_existing_session(self, client):
|
||||
body = {
|
||||
"tokens_in": 100,
|
||||
"tokens_out": 50,
|
||||
"measurement_kind": "measured",
|
||||
"source_provider": "codex_session",
|
||||
"source_id": "codex:abc",
|
||||
"ref_type": "session",
|
||||
"ref_id": "codex:abc",
|
||||
"session_id": "abc",
|
||||
"cached_input_tokens": 5,
|
||||
}
|
||||
first = await client.post("/token-events/upsert", json=body)
|
||||
assert first.status_code == 200, first.text
|
||||
second = await client.post("/token-events/upsert", json={**body, "tokens_in": 300, "tokens_out": 80})
|
||||
assert second.status_code == 200, second.text
|
||||
assert first.json()["id"] == second.json()["id"]
|
||||
assert second.json()["tokens_total"] == 380
|
||||
|
||||
listed = (await client.get("/token-events/", params={"source_provider": "codex_session"})).json()
|
||||
assert len(listed) == 1
|
||||
|
||||
async def test_patch_backfill_fields(self, client):
|
||||
ev = await _post_event(client, tokens_in=100, tokens_out=50)
|
||||
|
||||
r = await client.patch(f"/token-events/{ev['id']}", json={
|
||||
"tokens_in": 500,
|
||||
"tokens_out": 250,
|
||||
"session_id": "codex-session",
|
||||
"ref_type": "session",
|
||||
"ref_id": "codex:session",
|
||||
"created_at": "2026-05-20T01:02:03Z",
|
||||
"note": "backfill:codex-session",
|
||||
"measurement_kind": "measured",
|
||||
"source_provider": "codex_session",
|
||||
"source_id": "codex:session",
|
||||
"cached_input_tokens": 10,
|
||||
})
|
||||
assert r.status_code == 200
|
||||
patched = r.json()
|
||||
assert patched["tokens_total"] == 750
|
||||
assert patched["session_id"] == "codex-session"
|
||||
assert patched["ref_type"] == "session"
|
||||
assert patched["ref_id"] == "codex:session"
|
||||
assert patched["created_at"].startswith("2026-05-20T01:02:03")
|
||||
assert patched["measurement_kind"] == "measured"
|
||||
assert patched["source_provider"] == "codex_session"
|
||||
assert patched["source_id"] == "codex:session"
|
||||
assert patched["cached_input_tokens"] == 10
|
||||
|
||||
async def test_workstream_auto_populated_from_task(self, client):
|
||||
await _create_domain(client)
|
||||
@@ -129,6 +209,26 @@ class TestTokenEventsList:
|
||||
assert len(events) == 1
|
||||
assert events[0]["model"] == "claude-sonnet-4-6"
|
||||
|
||||
async def test_filter_by_measurement_kind_and_source_provider(self, client):
|
||||
await _post_event(
|
||||
client,
|
||||
tokens_in=100,
|
||||
tokens_out=50,
|
||||
measurement_kind="measured",
|
||||
source_provider="codex_session",
|
||||
source_id="codex:filter",
|
||||
)
|
||||
await _post_event(client, tokens_in=200, tokens_out=100, note="heuristic")
|
||||
|
||||
r = await client.get(
|
||||
"/token-events/",
|
||||
params={"measurement_kind": "measured", "source_provider": "codex_session"},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
events = r.json()
|
||||
assert len(events) == 1
|
||||
assert events[0]["source_id"] == "codex:filter"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestTokenSummary:
|
||||
@@ -184,6 +284,7 @@ class TestTokenSummary:
|
||||
s = r.json()
|
||||
assert s["event_count"] == 1
|
||||
assert s["tokens_total"] == 75
|
||||
assert s["by_measurement_kind"]["estimated"] == 75
|
||||
|
||||
async def test_summary_unknown_scope_returns_422(self, client):
|
||||
r = await client.get("/token-events/summary/", params={"scope": "foobar", "id": "x"})
|
||||
@@ -215,3 +316,32 @@ class TestTokenEventGetById:
|
||||
import uuid
|
||||
r = await client.get(f"/token-events/{uuid.uuid4()}")
|
||||
assert r.status_code == 404
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestTokenAggregateAndQuality:
|
||||
async def test_aggregate_and_quality_expose_evidence_breakdown(self, client):
|
||||
await _post_event(
|
||||
client,
|
||||
tokens_in=100,
|
||||
tokens_out=50,
|
||||
measurement_kind="measured",
|
||||
source_provider="codex_session",
|
||||
source_id="codex:agg",
|
||||
)
|
||||
await _post_event(client, tokens_in=1000, tokens_out=500, note="heuristic")
|
||||
|
||||
agg = (await client.get("/token-events/aggregate/", params={"include_superseded": "false"})).json()
|
||||
assert agg["tokens_total"] == 1650
|
||||
assert agg["by_measurement_kind"]["measured"] == 150
|
||||
assert agg["by_measurement_kind"]["estimated"] == 1500
|
||||
assert agg["by_source_provider"]["codex_session"] == 150
|
||||
assert agg["by_source_provider"]["task_fallback"] == 1500
|
||||
|
||||
measured = (await client.get("/token-events/aggregate/", params={"measurement_kind": "measured"})).json()
|
||||
assert measured["tokens_total"] == 150
|
||||
|
||||
quality = (await client.get("/token-events/quality/")).json()
|
||||
assert quality["measured_event_count"] == 1
|
||||
assert quality["fallback_event_count"] == 1
|
||||
assert quality["missing_provenance_event_count"] == 0
|
||||
|
||||
@@ -66,6 +66,9 @@ class TestTokenPassthrough:
|
||||
assert ev["agent"] == "custodian"
|
||||
assert ev["workstream_id"] == ws["id"]
|
||||
assert ev["note"] == "measured"
|
||||
assert ev["measurement_kind"] == "measured"
|
||||
assert ev["source_provider"] == "manual"
|
||||
assert ev["source_id"] == f"task:{task['id']}:manual"
|
||||
|
||||
async def test_tier1_userbased_note_override(self, client):
|
||||
"""Tier 1 with note='userbased' records that note instead of 'measured'."""
|
||||
@@ -84,6 +87,7 @@ class TestTokenPassthrough:
|
||||
|
||||
events = (await client.get("/token-events/", params={"task_id": task["id"]})).json()
|
||||
assert events[0]["note"] == "userbased"
|
||||
assert events[0]["measurement_kind"] == "measured"
|
||||
|
||||
async def test_tier2_workplan_prorated(self, client):
|
||||
"""Tier 2: workplan totals prorated across 4 tasks → 250/125 each, note='workplan'."""
|
||||
@@ -108,6 +112,8 @@ class TestTokenPassthrough:
|
||||
assert ev["tokens_in"] == 250 # 1000 // 4
|
||||
assert ev["tokens_out"] == 125 # 500 // 4
|
||||
assert ev["note"] == "workplan"
|
||||
assert ev["measurement_kind"] == "allocated"
|
||||
assert ev["raw_metadata"]["allocation_method"] == "workplan_prorated"
|
||||
|
||||
async def test_tier3_heuristic_fallback(self, client):
|
||||
"""Tier 3: status=done with no token args → heuristic 1000/500, note='heuristic'."""
|
||||
@@ -125,6 +131,40 @@ class TestTokenPassthrough:
|
||||
assert ev["tokens_in"] == 1000
|
||||
assert ev["tokens_out"] == 500
|
||||
assert ev["note"] == "heuristic"
|
||||
assert ev["measurement_kind"] == "estimated"
|
||||
assert ev["source_provider"] == "task_fallback"
|
||||
|
||||
async def test_suppress_token_event_skips_done_fallback(self, client):
|
||||
"""File/cache sync can mark a task done without minting a heuristic event."""
|
||||
await _create_domain(client)
|
||||
topic = await _create_topic(client)
|
||||
ws = await _create_workstream(client, topic["id"])
|
||||
task = await _create_task(client, ws["id"])
|
||||
|
||||
r = await client.patch(f"/tasks/{task['id']}", json={
|
||||
"status": "done",
|
||||
"suppress_token_event": True,
|
||||
})
|
||||
assert r.status_code == 200
|
||||
assert r.json()["status"] == "done"
|
||||
|
||||
events = (await client.get("/token-events/", params={"task_id": task["id"]})).json()
|
||||
assert events == []
|
||||
|
||||
async def test_repeated_done_update_does_not_duplicate_event(self, client):
|
||||
"""Only the transition into done records token usage."""
|
||||
await _create_domain(client)
|
||||
topic = await _create_topic(client)
|
||||
ws = await _create_workstream(client, topic["id"])
|
||||
task = await _create_task(client, ws["id"])
|
||||
|
||||
r = await client.patch(f"/tasks/{task['id']}", json={"status": "done"})
|
||||
assert r.status_code == 200
|
||||
r = await client.patch(f"/tasks/{task['id']}", json={"status": "done"})
|
||||
assert r.status_code == 200
|
||||
|
||||
events = (await client.get("/token-events/", params={"task_id": task["id"]})).json()
|
||||
assert len(events) == 1
|
||||
|
||||
async def test_non_done_status_creates_no_event(self, client):
|
||||
"""Non-done status updates never create a token event."""
|
||||
|
||||
139
tests/test_token_sources.py
Normal file
139
tests/test_token_sources.py
Normal file
@@ -0,0 +1,139 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
from api.services.token_sources import parse_iso
|
||||
from api.services.token_sources.attribution import RepoRef, normalise_cwd, resolve_repo
|
||||
from api.services.token_sources.claude import parse_claude_transcript
|
||||
from api.services.token_sources.codex import collect_codex_sessions, parse_codex_session
|
||||
|
||||
|
||||
def _write_jsonl(path, rows):
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with path.open("w", encoding="utf-8") as handle:
|
||||
for row in rows:
|
||||
if row == "BAD":
|
||||
handle.write("{not json}\n")
|
||||
else:
|
||||
handle.write(json.dumps(row) + "\n")
|
||||
|
||||
|
||||
def test_parse_codex_session_sums_token_count_records(tmp_path):
|
||||
path = tmp_path / "sessions" / "2026" / "05" / "23" / "rollout-local.jsonl"
|
||||
_write_jsonl(
|
||||
path,
|
||||
[
|
||||
{"type": "session_meta", "payload": {"id": "s1", "cwd": "/repo", "timestamp": "2026-05-23T00:00:00Z"}},
|
||||
{"type": "turn_context", "payload": {"cwd": "/repo", "model": "gpt-5.3-codex"}},
|
||||
{
|
||||
"type": "event_msg",
|
||||
"timestamp": "2026-05-22T23:00:00Z",
|
||||
"payload": {"type": "token_count", "info": {"last_token_usage": {"input_tokens": 99, "output_tokens": 1}}},
|
||||
},
|
||||
"BAD",
|
||||
{
|
||||
"type": "event_msg",
|
||||
"timestamp": "2026-05-23T01:00:00Z",
|
||||
"payload": {
|
||||
"type": "token_count",
|
||||
"info": {
|
||||
"last_token_usage": {
|
||||
"input_tokens": 100,
|
||||
"output_tokens": 40,
|
||||
"cached_input_tokens": 15,
|
||||
"reasoning_output_tokens": 7,
|
||||
"total_tokens": 155,
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
record = parse_codex_session(path, parse_iso("2026-05-23"))
|
||||
|
||||
assert record is not None
|
||||
assert record.source_id == "codex:s1"
|
||||
assert record.tokens_in == 100
|
||||
assert record.tokens_out == 40
|
||||
assert record.cached_input_tokens == 15
|
||||
assert record.reasoning_output_tokens == 7
|
||||
assert record.raw_total_tokens == 155
|
||||
assert record.raw_metadata["malformed_lines"] == 1
|
||||
|
||||
|
||||
def test_collect_codex_sessions_dedupes_archived_and_live(tmp_path):
|
||||
live = tmp_path / "sessions" / "2026" / "05" / "23" / "rollout-live.jsonl"
|
||||
archived = tmp_path / "archived_sessions" / "rollout-archived.jsonl"
|
||||
rows = [
|
||||
{"type": "session_meta", "payload": {"id": "same", "cwd": "/repo", "timestamp": "2026-05-23T00:00:00Z"}},
|
||||
{
|
||||
"type": "event_msg",
|
||||
"timestamp": "2026-05-23T01:00:00Z",
|
||||
"payload": {"type": "token_count", "info": {"last_token_usage": {"input_tokens": 10, "output_tokens": 5}}},
|
||||
},
|
||||
]
|
||||
_write_jsonl(live, rows)
|
||||
_write_jsonl(
|
||||
archived,
|
||||
rows + [
|
||||
{
|
||||
"type": "event_msg",
|
||||
"timestamp": "2026-05-23T02:00:00Z",
|
||||
"payload": {"type": "token_count", "info": {"last_token_usage": {"input_tokens": 20, "output_tokens": 5}}},
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
records = collect_codex_sessions(tmp_path, parse_iso("2026-05-23"))
|
||||
|
||||
assert len(records) == 1
|
||||
assert records[0].source_id == "codex:same"
|
||||
assert records[0].tokens_total == 40
|
||||
|
||||
|
||||
def test_parse_claude_transcript_sums_usage_without_content(tmp_path):
|
||||
path = tmp_path / "projects" / "repo" / "session.jsonl"
|
||||
_write_jsonl(
|
||||
path,
|
||||
[
|
||||
{
|
||||
"timestamp": "2026-05-23T01:00:00Z",
|
||||
"session_id": "c1",
|
||||
"cwd": "/repo",
|
||||
"message": {
|
||||
"model": "claude-sonnet",
|
||||
"content": "do not store me",
|
||||
"usage": {
|
||||
"input_tokens": 30,
|
||||
"cache_creation_input_tokens": 5,
|
||||
"cache_read_input_tokens": 7,
|
||||
"output_tokens": 11,
|
||||
},
|
||||
},
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
record = parse_claude_transcript(path, parse_iso("2026-05-23"))
|
||||
|
||||
assert record is not None
|
||||
assert record.source_id == "claude:c1"
|
||||
assert record.tokens_in == 30
|
||||
assert record.cached_input_tokens == 12
|
||||
assert record.tokens_out == 11
|
||||
assert "content" not in record.raw_metadata
|
||||
|
||||
|
||||
def test_resolve_repo_uses_normalised_path_prefix():
|
||||
refs = [
|
||||
RepoRef(repo_id="1", slug="state-hub", local_path="/home/worsch/state-hub"),
|
||||
RepoRef(repo_id="2", slug="other", local_path="/home/worsch/other"),
|
||||
]
|
||||
|
||||
match = resolve_repo("//wsl.localhost/Ubuntu-24.04/home/worsch/state-hub/api", refs)
|
||||
|
||||
assert normalise_cwd("//wsl.localhost/Ubuntu-24.04/home/worsch/state-hub") == "/home/worsch/state-hub"
|
||||
assert match is not None
|
||||
assert match.repo_id == "1"
|
||||
assert match.method == "path_prefix"
|
||||
Reference in New Issue
Block a user