feat(token-tracking): introduce token note taxonomy (measured/userbased/workplan/heuristic)

Tier 1 (exact counts) now defaults to note="measured" instead of null, signalling the counts were read from the Claude Code status bar. Callers can pass note="userbased" when a human provided the numbers. measured — agent read exact counts from the Claude Code status bar userbased — counts provided by a human workplan — prorated from workplan total across task count heuristic — server fallback, 1000/500, no agent input Added token_note field to TaskUpdate schema and exposed note param on update_task_status and record_interactive_task MCP tools. TOOLS.md documents the full taxonomy. 185 tests pass. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-29 18:47:40 +02:00
parent 8c87a9a799
commit af3fdfde80
5 changed files with 59 additions and 11 deletions
--- a/mcp_server/server.py
+++ b/mcp_server/server.py
@@ -430,6 +430,7 @@ def update_task_status(
    tokens_out: Optional[int] = None,
    workplan_tokens_in: Optional[int] = None,
    workplan_tokens_out: Optional[int] = None,
+    note: Optional[str] = None,
    model: Optional[str] = None,
    agent: Optional[str] = None,
    session_id: Optional[str] = None,
@@ -438,6 +439,8 @@ def update_task_status(

    When status='done', always records a token event using the best available data:
      Tier 1 (best):   pass tokens_in + tokens_out — exact counts from the session
+                       note defaults to "measured"; pass note="userbased" if the
+                       numbers were provided by a human rather than read from the bar
      Tier 2:          pass workplan_tokens_in + workplan_tokens_out — total workplan
                       effort prorated across task count (note="workplan")
      Tier 3 (fallback): no token args — heuristic 1000 in / 500 out (note="heuristic")
@@ -452,6 +455,8 @@ def update_task_status(
        tokens_out: exact output token count for this task (Tier 1)
        workplan_tokens_in: total input tokens for the whole workplan (Tier 2)
        workplan_tokens_out: total output tokens for the whole workplan (Tier 2)
+        note: override the auto note — use "userbased" when counts came from a human;
+              omit to get the default ("measured" for Tier 1, "workplan"/"heuristic" otherwise)
        model: model identifier, e.g. 'claude-sonnet-4-6'
        agent: agent name, e.g. 'custodian', 'ralph'
        session_id: agent session identifier
@@ -472,6 +477,8 @@ def update_task_status(
        body["workplan_tokens_in"] = workplan_tokens_in
    if workplan_tokens_out is not None:
        body["workplan_tokens_out"] = workplan_tokens_out
+    if note is not None:
+        body["token_note"] = note

    task = _patch(f"/tasks/{task_id}", body)
    _post("/progress", {
@@ -2233,6 +2240,7 @@ def record_interactive_task(
    repo_slug: str,
    tokens_in: Optional[int] = None,
    tokens_out: Optional[int] = None,
+    note: Optional[str] = None,
    model: Optional[str] = None,
    agent: Optional[str] = None,
    description: Optional[str] = None,
@@ -2241,8 +2249,13 @@ def record_interactive_task(
    """Record ad-hoc interactive work as a task with token consumption.

    Finds or creates a persistent 'interactive-<repo>' workstream for the repo,
-    creates the task, marks it done immediately, and records a token event using
-    the three-tier logic (exact > heuristic).
+    creates the task, marks it done immediately, and records a token event.
+
+    Token note convention:
+      "measured"  — exact counts read from the Claude Code status bar (default when
+                    tokens_in/tokens_out provided and note omitted)
+      "userbased" — counts provided by a human (pass note="userbased" explicitly)
+      "heuristic" — server fallback when no counts given (automatic)

    Use this for work done outside a formal workplan: quick fixes, config changes,
    code reviews, one-off investigations, or any session work worth tracking.
@@ -2250,8 +2263,9 @@ def record_interactive_task(
    Args:
        title: Short description of the work done
        repo_slug: Registered repo slug, e.g. 'the-custodian', 'inter-hub'
-        tokens_in: Exact input token count for this task (Tier 1 — best practice)
-        tokens_out: Exact output token count for this task (Tier 1)
+        tokens_in: Input token count (Tier 1 — read from Claude Code status bar)
+        tokens_out: Output token count (Tier 1)
+        note: Override token note — use "userbased" when counts came from a human
        model: Model identifier, e.g. 'claude-sonnet-4-6'
        agent: Agent name, e.g. 'custodian', 'ralph'
        description: Optional longer description of what was done
@@ -2316,16 +2330,18 @@ def record_interactive_task(
        body["tokens_in"] = tokens_in
    if tokens_out is not None:
        body["tokens_out"] = tokens_out
+    if note is not None:
+        body["token_note"] = note

    _patch(f"/tasks/{task['id']}", body)

-    tier = "exact" if tokens_in is not None else "heuristic"
+    effective_note = note or ("measured" if tokens_in is not None else "heuristic")
    return json.dumps({
        "task_id": task["id"],
        "workstream_id": ws["id"],
        "workstream_slug": ws_slug,
        "title": title,
-        "token_tier": tier,
+        "token_note": effective_note,
    }, indent=2)