diff --git a/api/routers/tasks.py b/api/routers/tasks.py index c332bfa..27dbd1a 100644 --- a/api/routers/tasks.py +++ b/api/routers/tasks.py @@ -75,7 +75,7 @@ async def update_task( raise HTTPException(status_code=404, detail="Task not found") # Separate token fields from task fields - token_field_names = {"tokens_in", "tokens_out", "workplan_tokens_in", "workplan_tokens_out", "model", "agent", "session_id"} + token_field_names = {"tokens_in", "tokens_out", "workplan_tokens_in", "workplan_tokens_out", "token_note", "model", "agent", "session_id"} update_data = body.model_dump(exclude_unset=True) token_data = {k: update_data.pop(k) for k in list(update_data.keys()) if k in token_field_names} @@ -87,8 +87,10 @@ async def update_task( # Token event — three-tier logic, only when marking done if update_data.get("status") == "done": if "tokens_in" in token_data and "tokens_out" in token_data: - # Tier 1: exact counts provided - tin, tout, tnote = token_data["tokens_in"], token_data["tokens_out"], None + # Tier 1: exact counts — default note "measured"; caller may override with token_note + tin = token_data["tokens_in"] + tout = token_data["tokens_out"] + tnote = token_data.get("token_note") or "measured" elif "workplan_tokens_in" in token_data and "workplan_tokens_out" in token_data: # Tier 2: prorate workplan total across task count count_result = await session.execute( diff --git a/api/schemas/task.py b/api/schemas/task.py index 65df601..048dba5 100644 --- a/api/schemas/task.py +++ b/api/schemas/task.py @@ -39,13 +39,15 @@ class TaskUpdate(BaseModel): intervention_note: str | None = None parent_task_id: uuid.UUID | None = None # Token passthrough — three tiers (highest precision wins): - # 1. tokens_in + tokens_out → exact counts (best practice) + # 1. tokens_in + tokens_out → exact counts; note defaults to "measured" # 2. workplan_tokens_in + workplan_tokens_out → prorated across task count (note="workplan") # 3. neither provided, status=done → heuristic 1000/500 (note="heuristic") + # token_note overrides the auto-assigned note for Tier 1 only (e.g. "userbased") tokens_in: int | None = None tokens_out: int | None = None workplan_tokens_in: int | None = None workplan_tokens_out: int | None = None + token_note: str | None = None model: str | None = None agent: str | None = None session_id: str | None = None diff --git a/mcp_server/TOOLS.md b/mcp_server/TOOLS.md index 6505fd1..721e8e7 100644 --- a/mcp_server/TOOLS.md +++ b/mcp_server/TOOLS.md @@ -83,6 +83,16 @@ Agents should call `record_token_event` (or pass `tokens_in`/`tokens_out` via |------|----------|-------| | `record_token_event(tokens_in, tokens_out, ...)` | `task_id`?, `workstream_id`?, `repo_id`?, `model`?, `agent`?, `ref_type`?, `ref_id`?, `note`?, `session_id`? | POSTs to `/token-events/`. `workstream_id` auto-filled from task. Returns event id + running total. | | `get_token_summary(scope, id)` | `scope`: task\|workstream\|repo\|commit\|release\|session; `id`: UUID or ref string | Returns formatted table of tokens_in/out/total, event_count, by_model, by_agent. | +| `record_interactive_task(title, repo_slug, ...)` | `tokens_in`?, `tokens_out`?, `note`?, `model`?, `agent`?, `description`?, `session_id`? | Find-or-create `interactive-` workstream, create task, mark done, record token event. | + +**Token note taxonomy:** + +| note | meaning | +|------|---------| +| `"measured"` | Exact counts read from Claude Code status bar — default when `tokens_in`/`tokens_out` provided | +| `"userbased"` | Counts provided by a human (pass `note="userbased"` explicitly) | +| `"workplan"` | Prorated from workplan total across task count | +| `"heuristic"` | Server fallback — 1 000 in / 500 out, no agent input | --- diff --git a/mcp_server/server.py b/mcp_server/server.py index a0edc32..77ee8d7 100644 --- a/mcp_server/server.py +++ b/mcp_server/server.py @@ -430,6 +430,7 @@ def update_task_status( tokens_out: Optional[int] = None, workplan_tokens_in: Optional[int] = None, workplan_tokens_out: Optional[int] = None, + note: Optional[str] = None, model: Optional[str] = None, agent: Optional[str] = None, session_id: Optional[str] = None, @@ -438,6 +439,8 @@ def update_task_status( When status='done', always records a token event using the best available data: Tier 1 (best): pass tokens_in + tokens_out — exact counts from the session + note defaults to "measured"; pass note="userbased" if the + numbers were provided by a human rather than read from the bar Tier 2: pass workplan_tokens_in + workplan_tokens_out — total workplan effort prorated across task count (note="workplan") Tier 3 (fallback): no token args — heuristic 1000 in / 500 out (note="heuristic") @@ -452,6 +455,8 @@ def update_task_status( tokens_out: exact output token count for this task (Tier 1) workplan_tokens_in: total input tokens for the whole workplan (Tier 2) workplan_tokens_out: total output tokens for the whole workplan (Tier 2) + note: override the auto note — use "userbased" when counts came from a human; + omit to get the default ("measured" for Tier 1, "workplan"/"heuristic" otherwise) model: model identifier, e.g. 'claude-sonnet-4-6' agent: agent name, e.g. 'custodian', 'ralph' session_id: agent session identifier @@ -472,6 +477,8 @@ def update_task_status( body["workplan_tokens_in"] = workplan_tokens_in if workplan_tokens_out is not None: body["workplan_tokens_out"] = workplan_tokens_out + if note is not None: + body["token_note"] = note task = _patch(f"/tasks/{task_id}", body) _post("/progress", { @@ -2233,6 +2240,7 @@ def record_interactive_task( repo_slug: str, tokens_in: Optional[int] = None, tokens_out: Optional[int] = None, + note: Optional[str] = None, model: Optional[str] = None, agent: Optional[str] = None, description: Optional[str] = None, @@ -2241,8 +2249,13 @@ def record_interactive_task( """Record ad-hoc interactive work as a task with token consumption. Finds or creates a persistent 'interactive-' workstream for the repo, - creates the task, marks it done immediately, and records a token event using - the three-tier logic (exact > heuristic). + creates the task, marks it done immediately, and records a token event. + + Token note convention: + "measured" — exact counts read from the Claude Code status bar (default when + tokens_in/tokens_out provided and note omitted) + "userbased" — counts provided by a human (pass note="userbased" explicitly) + "heuristic" — server fallback when no counts given (automatic) Use this for work done outside a formal workplan: quick fixes, config changes, code reviews, one-off investigations, or any session work worth tracking. @@ -2250,8 +2263,9 @@ def record_interactive_task( Args: title: Short description of the work done repo_slug: Registered repo slug, e.g. 'the-custodian', 'inter-hub' - tokens_in: Exact input token count for this task (Tier 1 — best practice) - tokens_out: Exact output token count for this task (Tier 1) + tokens_in: Input token count (Tier 1 — read from Claude Code status bar) + tokens_out: Output token count (Tier 1) + note: Override token note — use "userbased" when counts came from a human model: Model identifier, e.g. 'claude-sonnet-4-6' agent: Agent name, e.g. 'custodian', 'ralph' description: Optional longer description of what was done @@ -2316,16 +2330,18 @@ def record_interactive_task( body["tokens_in"] = tokens_in if tokens_out is not None: body["tokens_out"] = tokens_out + if note is not None: + body["token_note"] = note _patch(f"/tasks/{task['id']}", body) - tier = "exact" if tokens_in is not None else "heuristic" + effective_note = note or ("measured" if tokens_in is not None else "heuristic") return json.dumps({ "task_id": task["id"], "workstream_id": ws["id"], "workstream_slug": ws_slug, "title": title, - "token_tier": tier, + "token_note": effective_note, }, indent=2) diff --git a/tests/test_token_passthrough.py b/tests/test_token_passthrough.py index c7bec5f..5220a50 100644 --- a/tests/test_token_passthrough.py +++ b/tests/test_token_passthrough.py @@ -65,7 +65,25 @@ class TestTokenPassthrough: assert ev["model"] == "claude-sonnet-4-6" assert ev["agent"] == "custodian" assert ev["workstream_id"] == ws["id"] - assert ev["note"] is None + assert ev["note"] == "measured" + + async def test_tier1_userbased_note_override(self, client): + """Tier 1 with note='userbased' records that note instead of 'measured'.""" + await _create_domain(client) + topic = await _create_topic(client) + ws = await _create_workstream(client, topic["id"]) + task = await _create_task(client, ws["id"]) + + r = await client.patch(f"/tasks/{task['id']}", json={ + "status": "done", + "tokens_in": 500, + "tokens_out": 200, + "token_note": "userbased", + }) + assert r.status_code == 200 + + events = (await client.get("/token-events/", params={"task_id": task["id"]})).json() + assert events[0]["note"] == "userbased" async def test_tier2_workplan_prorated(self, client): """Tier 2: workplan totals prorated across 4 tasks → 250/125 each, note='workplan'."""