feat(token-tracking): three-tier token recording on task done

Token events are now always created when update_task_status is called with status="done", using the best available data: Tier 1 (best): exact tokens_in + tokens_out passed by agent Tier 2: workplan_tokens_in + workplan_tokens_out prorated across workstream task count (note="workplan") Tier 3 (fallback): heuristic 1000 in / 500 out (note="heuristic") Non-done status changes never create a token event. MCP tool updated with workplan_tokens_in/out params and tiered docs. Ralph-workplan skill files updated with the three-tier guidance. 184 tests pass. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-29 18:28:18 +02:00
parent 58e1bafce9
commit fdfd4365cd
4 changed files with 119 additions and 47 deletions
--- a/api/routers/tasks.py
+++ b/api/routers/tasks.py
@@ -2,7 +2,7 @@ import uuid
 from datetime import date

 from fastapi import APIRouter, Depends, HTTPException, Query, status
-from sqlalchemy import select
+from sqlalchemy import func, select
 from sqlalchemy.ext.asyncio import AsyncSession

 from api.database import get_session
@@ -75,27 +75,44 @@ async def update_task(
        raise HTTPException(status_code=404, detail="Task not found")

    # Separate token fields from task fields
-    token_fields = {"tokens_in", "tokens_out", "model", "agent", "session_id"}
+    token_field_names = {"tokens_in", "tokens_out", "workplan_tokens_in", "workplan_tokens_out", "model", "agent", "session_id"}
    update_data = body.model_dump(exclude_unset=True)
-    token_data = {k: update_data.pop(k) for k in list(update_data.keys()) if k in token_fields}
+    token_data = {k: update_data.pop(k) for k in list(update_data.keys()) if k in token_field_names}

    for field, value in update_data.items():
        setattr(task, field, value)
    await session.commit()
    await session.refresh(task)

-    # Create token event if token passthrough fields provided
-    if "tokens_in" in token_data and "tokens_out" in token_data:
+    # Token event — three-tier logic, only when marking done
+    if update_data.get("status") == "done":
+        if "tokens_in" in token_data and "tokens_out" in token_data:
+            # Tier 1: exact counts provided
+            tin, tout, tnote = token_data["tokens_in"], token_data["tokens_out"], None
+        elif "workplan_tokens_in" in token_data and "workplan_tokens_out" in token_data:
+            # Tier 2: prorate workplan total across task count
+            count_result = await session.execute(
+                select(func.count(Task.id)).where(Task.workstream_id == task.workstream_id)
+            )
+            task_count = max(count_result.scalar() or 1, 1)
+            tin = token_data["workplan_tokens_in"] // task_count
+            tout = token_data["workplan_tokens_out"] // task_count
+            tnote = "workplan"
+        else:
+            # Tier 3: heuristic fallback
+            tin, tout, tnote = 1000, 500, "heuristic"
+
        event = TokenEvent(
            task_id=task_id,
            workstream_id=task.workstream_id,
-            tokens_in=token_data["tokens_in"],
-            tokens_out=token_data["tokens_out"],
+            tokens_in=tin,
+            tokens_out=tout,
            model=token_data.get("model"),
            agent=token_data.get("agent"),
            session_id=token_data.get("session_id"),
            ref_type="task",
            ref_id=str(task_id),
+            note=tnote,
        )
        session.add(event)
        await session.commit()
--- a/api/schemas/task.py
+++ b/api/schemas/task.py
@@ -38,9 +38,14 @@ class TaskUpdate(BaseModel):
    needs_human: bool | None = None
    intervention_note: str | None = None
    parent_task_id: uuid.UUID | None = None
-    # Optional token passthrough — when provided, a token_event is created
+    # Token passthrough — three tiers (highest precision wins):
+    # 1. tokens_in + tokens_out  → exact counts (best practice)
+    # 2. workplan_tokens_in + workplan_tokens_out  → prorated across task count (note="workplan")
+    # 3. neither provided, status=done  → heuristic 1000/500 (note="heuristic")
    tokens_in: int | None = None
    tokens_out: int | None = None
+    workplan_tokens_in: int | None = None
+    workplan_tokens_out: int | None = None
    model: str | None = None
    agent: str | None = None
    session_id: str | None = None