diff --git a/Makefile b/Makefile
index 0071413..a7f1b7e 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,7 @@
-.PHONY: install install-cli dashboard-install dashboard-check db db-tools migrate seed api dashboard check test test-python clean register-project register-codex-project validate-adr add-domain rename-domain add-repo list-repos register-path cleanup-stale tunnels-up tunnels-status tunnels-check bridges install-hooks install-hooks-all gitea-inventory
+.PHONY: install install-cli dashboard-install dashboard-check db db-tools migrate seed api dashboard check test test-python clean register-project register-codex-project register-mcp bootstrap-env validate-adr add-domain rename-domain add-repo list-repos register-path cleanup-stale tunnels-up tunnels-status tunnels-check bridges install-hooks install-hooks-all gitea-inventory token-reconcile
COMPOSE = docker compose -f infra/docker-compose.yml --env-file .env
+PYTHON ?= python3
start:
@echo "# run in different terminals"
@@ -111,6 +112,17 @@ register-codex-project:
@test -n "$(PROJECT_PATH)" || (echo "ERROR: PROJECT_PATH is required."; exit 1)
scripts/register_project.sh "$(DOMAIN)" "$(PROJECT_PATH)" --codex
+## Register State Hub MCP for Claude Code. Optional: make register-mcp MCP_URL=http://127.0.0.1:18001/sse
+register-mcp:
+ scripts/register-mcp.sh \
+ $(if $(MCP_URL),--url "$(MCP_URL)",) \
+ $(if $(API_BASE),--api-base "$(API_BASE)",) \
+ $(if $(DRY_RUN),--dry-run,)
+
+## Bootstrap a new operator/collaborator environment. Optional: make bootstrap-env ARGS="--install-missing"
+bootstrap-env:
+ scripts/bootstrap-env.sh $(ARGS)
+
## Add a second repo to an existing domain: make add-repo DOMAIN=railiance REPO_PATH=/home/worsch/railiance-infra
add-repo:
@test -n "$(DOMAIN)" || (echo "ERROR: DOMAIN is required."; exit 1)
@@ -229,6 +241,17 @@ fix-consistency:
$(if $(REPO_PATH),--repo-path "$(REPO_PATH)",); \
e=$$?; [ $$e -eq 2 ] && exit 0 || exit $$e
+## Reconcile measured token sources against State Hub.
+## Usage: make token-reconcile [SINCE=2026-05-19] [APPLY=1] [ZERO_FALLBACKS=1]
+token-reconcile:
+ $(PYTHON) scripts/token_reconcile.py \
+ $(if $(SINCE),--since "$(SINCE)",) \
+ $(if $(API_BASE),--api-base "$(API_BASE)",) \
+ $(if $(CODEX_HOME),--codex-home "$(CODEX_HOME)",) \
+ $(if $(CLAUDE_HOME),--claude-home "$(CLAUDE_HOME)",) \
+ $(if $(APPLY),--apply,) \
+ $(if $(ZERO_FALLBACKS),--zero-superseded-fallbacks,)
+
## Pull then fix: single repo or all repos if REPO omitted
## make fix-consistency-remote — smart pull+fix all repos that need it
## make fix-consistency-remote REPO=slug — pull+fix one repo
diff --git a/api/models/token_event.py b/api/models/token_event.py
index 01ae8d2..9396d13 100644
--- a/api/models/token_event.py
+++ b/api/models/token_event.py
@@ -1,8 +1,10 @@
import uuid
from datetime import datetime
-from sqlalchemy import DateTime, ForeignKey, Integer, Text, func
-from sqlalchemy.dialects.postgresql import UUID
+from typing import Any
+
+from sqlalchemy import DateTime, Float, ForeignKey, Integer, Text, UniqueConstraint, func
+from sqlalchemy.dialects.postgresql import JSONB, UUID
from sqlalchemy.orm import Mapped, mapped_column, relationship
from api.models.base import Base, new_uuid
@@ -10,6 +12,14 @@ from api.models.base import Base, new_uuid
class TokenEvent(Base):
__tablename__ = "token_events"
+ __table_args__ = (
+ UniqueConstraint(
+ "measurement_kind",
+ "source_provider",
+ "source_id",
+ name="uq_token_events_source_identity",
+ ),
+ )
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), primary_key=True, default=new_uuid
@@ -31,6 +41,35 @@ class TokenEvent(Base):
ref_type: Mapped[str | None] = mapped_column(Text, nullable=True)
ref_id: Mapped[str | None] = mapped_column(Text, nullable=True)
note: Mapped[str | None] = mapped_column(Text, nullable=True)
+ measurement_kind: Mapped[str] = mapped_column(
+ Text, nullable=False, default="estimated", server_default="estimated", index=True
+ )
+ source_provider: Mapped[str] = mapped_column(
+ Text, nullable=False, default="manual", server_default="manual", index=True
+ )
+ source_id: Mapped[str | None] = mapped_column(Text, nullable=True, index=True)
+ source_path: Mapped[str | None] = mapped_column(Text, nullable=True)
+ source_created_at: Mapped[datetime | None] = mapped_column(
+ DateTime(timezone=True), nullable=True, index=True
+ )
+ ingested_at: Mapped[datetime] = mapped_column(
+ DateTime(timezone=True), server_default=func.now(), nullable=False, index=True
+ )
+ parser_version: Mapped[str | None] = mapped_column(Text, nullable=True)
+ confidence: Mapped[float] = mapped_column(
+ Float, nullable=False, default=0.35, server_default="0.35"
+ )
+ cached_input_tokens: Mapped[int] = mapped_column(
+ Integer, nullable=False, default=0, server_default="0"
+ )
+ reasoning_output_tokens: Mapped[int] = mapped_column(
+ Integer, nullable=False, default=0, server_default="0"
+ )
+ raw_total_tokens: Mapped[int | None] = mapped_column(Integer, nullable=True)
+ cost_estimated_usd: Mapped[float | None] = mapped_column(Float, nullable=True)
+ raw_metadata: Mapped[dict[str, Any]] = mapped_column(
+ JSONB, nullable=False, default=dict, server_default="{}"
+ )
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now(), nullable=False, index=True
)
diff --git a/api/routers/tasks.py b/api/routers/tasks.py
index adf6cc6..1551164 100644
--- a/api/routers/tasks.py
+++ b/api/routers/tasks.py
@@ -75,23 +75,47 @@ async def update_task(
if task is None:
raise HTTPException(status_code=404, detail="Task not found")
+ previous_status = task.status.value
+
# Separate token fields from task fields
- token_field_names = {"tokens_in", "tokens_out", "workplan_tokens_in", "workplan_tokens_out", "token_note", "model", "agent", "session_id"}
+ token_field_names = {
+ "tokens_in",
+ "tokens_out",
+ "workplan_tokens_in",
+ "workplan_tokens_out",
+ "token_note",
+ "model",
+ "agent",
+ "session_id",
+ "suppress_token_event",
+ }
update_data = body.model_dump(exclude_unset=True)
token_data = {k: update_data.pop(k) for k in list(update_data.keys()) if k in token_field_names}
+ suppress_token_event = bool(token_data.pop("suppress_token_event", False))
for field, value in update_data.items():
setattr(task, field, value)
await session.commit()
await session.refresh(task)
- # Token event — three-tier logic, only when marking done
- if update_data.get("status") == "done":
+ # Token event — three-tier logic, only for an intentional transition to done.
+ status_update = update_data.get("status")
+ new_status = status_update.value if hasattr(status_update, "value") else status_update
+ if (
+ new_status == "done"
+ and previous_status != "done"
+ and not suppress_token_event
+ ):
if "tokens_in" in token_data and "tokens_out" in token_data:
# Tier 1: exact counts — default note "measured"; caller may override with token_note
tin = token_data["tokens_in"]
tout = token_data["tokens_out"]
tnote = token_data.get("token_note") or "measured"
+ measurement_kind = "measured"
+ source_provider = "manual"
+ confidence = 1.0
+ source_id = f"task:{task_id}:manual"
+ raw_metadata = {"input_source": "task_status_patch"}
elif "workplan_tokens_in" in token_data and "workplan_tokens_out" in token_data:
# Tier 2: prorate workplan total across task count
count_result = await session.execute(
@@ -101,9 +125,24 @@ async def update_task(
tin = token_data["workplan_tokens_in"] // task_count
tout = token_data["workplan_tokens_out"] // task_count
tnote = "workplan"
+ measurement_kind = "allocated"
+ source_provider = "manual"
+ confidence = 0.7
+ source_id = f"task:{task_id}:workplan-allocation"
+ raw_metadata = {
+ "allocation_method": "workplan_prorated",
+ "workplan_tokens_in": token_data["workplan_tokens_in"],
+ "workplan_tokens_out": token_data["workplan_tokens_out"],
+ "task_count": task_count,
+ }
else:
# Tier 3: heuristic fallback
tin, tout, tnote = 1000, 500, "heuristic"
+ measurement_kind = "estimated"
+ source_provider = "task_fallback"
+ confidence = 0.35
+ source_id = f"task:{task_id}:heuristic"
+ raw_metadata = {"estimation_method": "fixed_task_done_fallback"}
# Resolve repo_id via workstream
ws = await session.get(Workstream, task.workstream_id)
@@ -121,6 +160,12 @@ async def update_task(
ref_type="task",
ref_id=str(task_id),
note=tnote,
+ measurement_kind=measurement_kind,
+ source_provider=source_provider,
+ source_id=source_id,
+ confidence=confidence,
+ raw_total_tokens=tin + tout,
+ raw_metadata=raw_metadata,
)
session.add(event)
await session.commit()
diff --git a/api/routers/token_events.py b/api/routers/token_events.py
index 6dd6bf7..b282fda 100644
--- a/api/routers/token_events.py
+++ b/api/routers/token_events.py
@@ -1,5 +1,7 @@
import uuid
from collections import defaultdict
+from datetime import datetime
+from typing import Any
from fastapi import APIRouter, Depends, HTTPException, Query, status
from sqlalchemy import select
@@ -10,18 +12,95 @@ from api.models.managed_repo import ManagedRepo
from api.models.task import Task
from api.models.token_event import TokenEvent
from api.models.workstream import Workstream
-from api.schemas.token_event import RepoTokenSummary, TokenEventCreate, TokenEventPatch, TokenEventRead, TokenSummary
+from api.schemas.token_event import (
+ RepoTokenSummary,
+ TokenAggregateRow,
+ TokenAggregateSummary,
+ TokenEventCreate,
+ TokenEventPatch,
+ TokenEventRead,
+ TokenQualitySummary,
+ TokenSummary,
+)
router = APIRouter(prefix="/token-events", tags=["token-events"])
+DEFAULT_CONFIDENCE = {
+ "measured": 1.0,
+ "allocated": 0.70,
+ "estimated": 0.35,
+ "superseded": 0.0,
+}
-@router.post("/", response_model=TokenEventRead, status_code=status.HTTP_201_CREATED)
-async def create_token_event(
- body: TokenEventCreate,
- session: AsyncSession = Depends(get_session),
-) -> TokenEvent:
- data = body.model_dump()
+SOURCE_PARSER_DEFAULTS = {
+ "codex_session": "codex-desktop-v1",
+ "claude_transcript": "claude-transcript-v1",
+ "llm_connect": "llm-connect-v1",
+}
+
+def _event_total(event: TokenEvent) -> int:
+ return event.tokens_in + event.tokens_out
+
+
+def _infer_measurement_kind(data: dict[str, Any]) -> str:
+ if data.get("measurement_kind"):
+ return str(data["measurement_kind"])
+ note = data.get("note")
+ if note == "heuristic_superseded_by_codex_backfill":
+ return "superseded"
+ if note == "workplan":
+ return "allocated"
+ if note == "heuristic":
+ return "estimated"
+ if note == "measured" or str(note or "").startswith("backfill:codex-session"):
+ return "measured"
+ provider = data.get("source_provider")
+ if provider in {"codex_session", "claude_transcript", "llm_connect"}:
+ return "measured"
+ return "estimated"
+
+
+def _infer_source_provider(data: dict[str, Any], measurement_kind: str) -> str:
+ if data.get("source_provider"):
+ return str(data["source_provider"])
+ note = data.get("note")
+ ref_id = str(data.get("ref_id") or "")
+ agent = str(data.get("agent") or "").lower()
+ if note == "heuristic":
+ return "task_fallback"
+ if ref_id.startswith("codex:") or str(note or "").startswith("backfill:codex-session"):
+ return "codex_session"
+ if measurement_kind == "measured" and "claude" in agent:
+ return "claude_transcript"
+ return "manual"
+
+
+def _apply_event_defaults(data: dict[str, Any]) -> dict[str, Any]:
+ measurement_kind = _infer_measurement_kind(data)
+ source_provider = _infer_source_provider(data, measurement_kind)
+ data["measurement_kind"] = measurement_kind
+ data["source_provider"] = source_provider
+
+ if not data.get("source_id") and source_provider in {"codex_session", "claude_transcript", "llm_connect"}:
+ source_id = data.get("ref_id") or data.get("session_id")
+ if source_id:
+ data["source_id"] = str(source_id)
+
+ if not data.get("source_created_at") and data.get("created_at") and data.get("source_id"):
+ data["source_created_at"] = data["created_at"]
+
+ data.setdefault("confidence", DEFAULT_CONFIDENCE.get(measurement_kind, 0.35))
+ data.setdefault("cached_input_tokens", 0)
+ data.setdefault("reasoning_output_tokens", 0)
+ data.setdefault("raw_total_tokens", (data.get("tokens_in") or 0) + (data.get("tokens_out") or 0))
+ data.setdefault("raw_metadata", {})
+ if source_provider in SOURCE_PARSER_DEFAULTS:
+ data.setdefault("parser_version", SOURCE_PARSER_DEFAULTS[source_provider])
+ return data
+
+
+async def _populate_relationship_defaults(data: dict[str, Any], session: AsyncSession) -> dict[str, Any]:
# Auto-populate workstream_id from task if not provided
if data.get("task_id") and not data.get("workstream_id"):
task = await session.get(Task, data["task_id"])
@@ -33,6 +112,34 @@ async def create_token_event(
ws = await session.get(Workstream, data["workstream_id"])
if ws and ws.repo_id:
data["repo_id"] = ws.repo_id
+ return data
+
+
+async def _find_source_event(data: dict[str, Any], session: AsyncSession) -> TokenEvent | None:
+ source_id = data.get("source_id")
+ if not source_id:
+ return None
+ result = await session.execute(
+ select(TokenEvent).where(
+ TokenEvent.measurement_kind == data["measurement_kind"],
+ TokenEvent.source_provider == data["source_provider"],
+ TokenEvent.source_id == source_id,
+ )
+ )
+ return result.scalar_one_or_none()
+
+
+async def _create_or_upsert_event(data: dict[str, Any], session: AsyncSession) -> TokenEvent:
+ data = _apply_event_defaults(data)
+ data = await _populate_relationship_defaults(data, session)
+
+ existing = await _find_source_event(data, session)
+ if existing is not None:
+ for field, value in data.items():
+ setattr(existing, field, value)
+ await session.commit()
+ await session.refresh(existing)
+ return existing
event = TokenEvent(**data)
session.add(event)
@@ -41,6 +148,77 @@ async def create_token_event(
return event
+def _filter_query(
+ q,
+ *,
+ task_id: uuid.UUID | None = None,
+ workstream_id: uuid.UUID | None = None,
+ repo_id: uuid.UUID | None = None,
+ ref_type: str | None = None,
+ ref_id: str | None = None,
+ model: str | None = None,
+ agent: str | None = None,
+ note: str | None = None,
+ measurement_kind: str | None = None,
+ source_provider: str | None = None,
+ since: datetime | None = None,
+ until: datetime | None = None,
+ include_superseded: bool = True,
+ unattributed: bool = False,
+):
+ if task_id:
+ q = q.where(TokenEvent.task_id == task_id)
+ if workstream_id:
+ q = q.where(TokenEvent.workstream_id == workstream_id)
+ if repo_id:
+ q = q.where(TokenEvent.repo_id == repo_id)
+ if ref_type:
+ q = q.where(TokenEvent.ref_type == ref_type)
+ if ref_id:
+ q = q.where(TokenEvent.ref_id == ref_id)
+ if model:
+ q = q.where(TokenEvent.model == model)
+ if agent:
+ q = q.where(TokenEvent.agent == agent)
+ if note:
+ q = q.where(TokenEvent.note == note)
+ if measurement_kind:
+ q = q.where(TokenEvent.measurement_kind == measurement_kind)
+ if source_provider:
+ q = q.where(TokenEvent.source_provider == source_provider)
+ if since:
+ q = q.where(TokenEvent.created_at >= since)
+ if until:
+ q = q.where(TokenEvent.created_at < until)
+ if not include_superseded:
+ q = q.where(TokenEvent.measurement_kind != "superseded")
+ if unattributed:
+ q = q.where(
+ TokenEvent.repo_id.is_(None),
+ TokenEvent.workstream_id.is_(None),
+ TokenEvent.task_id.is_(None),
+ )
+ return q
+
+
+@router.post("/", response_model=TokenEventRead, status_code=status.HTTP_201_CREATED)
+async def create_token_event(
+ body: TokenEventCreate,
+ session: AsyncSession = Depends(get_session),
+) -> TokenEvent:
+ data = body.model_dump(exclude_none=True)
+ return await _create_or_upsert_event(data, session)
+
+
+@router.post("/upsert", response_model=TokenEventRead)
+async def upsert_token_event(
+ body: TokenEventCreate,
+ session: AsyncSession = Depends(get_session),
+) -> TokenEvent:
+ data = body.model_dump(exclude_none=True)
+ return await _create_or_upsert_event(data, session)
+
+
@router.get("/summary/", response_model=TokenSummary)
async def get_token_summary(
scope: str = Query(..., description="task|workstream|repo|commit|release|session"),
@@ -80,11 +258,16 @@ async def get_token_summary(
by_model: dict[str, int] = defaultdict(int)
by_agent: dict[str, int] = defaultdict(int)
+ by_measurement_kind: dict[str, int] = defaultdict(int)
+ by_source_provider: dict[str, int] = defaultdict(int)
for e in events:
+ total = _event_total(e)
if e.model:
- by_model[e.model] += e.tokens_in + e.tokens_out
+ by_model[e.model] += total
if e.agent:
- by_agent[e.agent] += e.tokens_in + e.tokens_out
+ by_agent[e.agent] += total
+ by_measurement_kind[e.measurement_kind] += total
+ by_source_provider[e.source_provider] += total
return TokenSummary(
scope=scope,
@@ -95,11 +278,18 @@ async def get_token_summary(
event_count=len(events),
by_model=dict(by_model),
by_agent=dict(by_agent),
+ by_measurement_kind=dict(by_measurement_kind),
+ by_source_provider=dict(by_source_provider),
)
@router.get("/by-repo/", response_model=list[RepoTokenSummary])
async def get_tokens_by_repo(
+ measurement_kind: str | None = None,
+ source_provider: str | None = None,
+ since: datetime | None = None,
+ until: datetime | None = None,
+ include_superseded: bool = Query(True),
session: AsyncSession = Depends(get_session),
) -> list[RepoTokenSummary]:
"""Aggregate token consumption per repo, resolving via the full graph.
@@ -112,7 +302,16 @@ async def get_tokens_by_repo(
Only events that resolve to a repo are included.
"""
# Fetch all events, workstreams, repos in three queries (avoids N+1)
- events_result = await session.execute(select(TokenEvent))
+ events_result = await session.execute(
+ _filter_query(
+ select(TokenEvent),
+ measurement_kind=measurement_kind,
+ source_provider=source_provider,
+ since=since,
+ until=until,
+ include_superseded=include_superseded,
+ )
+ )
events = list(events_result.scalars().all())
ws_result = await session.execute(select(Workstream))
@@ -148,14 +347,19 @@ async def get_tokens_by_repo(
"event_count": 0,
"by_model": defaultdict(int),
"by_note": defaultdict(int),
+ "by_measurement_kind": defaultdict(int),
+ "by_source_provider": defaultdict(int),
}
g = groups[rid]
g["tokens_in"] += e.tokens_in
g["tokens_out"] += e.tokens_out
g["event_count"] += 1
+ total = _event_total(e)
if e.model:
- g["by_model"][e.model] += e.tokens_in + e.tokens_out
- g["by_note"][e.note or "unknown"] += e.tokens_in + e.tokens_out
+ g["by_model"][e.model] += total
+ g["by_note"][e.note or "unknown"] += total
+ g["by_measurement_kind"][e.measurement_kind] += total
+ g["by_source_provider"][e.source_provider] += total
return [
RepoTokenSummary(
@@ -166,6 +370,188 @@ async def get_tokens_by_repo(
]
+@router.get("/aggregate/", response_model=TokenAggregateSummary)
+async def get_token_aggregate(
+ measurement_kind: str | None = None,
+ source_provider: str | None = None,
+ since: datetime | None = None,
+ until: datetime | None = None,
+ include_superseded: bool = Query(False),
+ session: AsyncSession = Depends(get_session),
+) -> TokenAggregateSummary:
+ events_result = await session.execute(
+ _filter_query(
+ select(TokenEvent),
+ measurement_kind=measurement_kind,
+ source_provider=source_provider,
+ since=since,
+ until=until,
+ include_superseded=include_superseded,
+ )
+ )
+ events = list(events_result.scalars().all())
+
+ ws_result = await session.execute(select(Workstream))
+ ws_map: dict[uuid.UUID, Workstream] = {w.id: w for w in ws_result.scalars().all()}
+
+ task_result = await session.execute(select(Task))
+ task_map: dict[uuid.UUID, Task] = {t.id: t for t in task_result.scalars().all()}
+
+ repo_result = await session.execute(select(ManagedRepo))
+ repo_map: dict[uuid.UUID, ManagedRepo] = {r.id: r for r in repo_result.scalars().all()}
+
+ def resolve_repo_id(e: TokenEvent) -> uuid.UUID | None:
+ if e.repo_id:
+ return e.repo_id
+ ws_id = e.workstream_id
+ if not ws_id and e.task_id and e.task_id in task_map:
+ ws_id = task_map[e.task_id].workstream_id
+ if ws_id and ws_id in ws_map:
+ return ws_map[ws_id].repo_id
+ return None
+
+ def add(groups: dict[str, dict[str, Any]], key: str | None, label: str | None, e: TokenEvent) -> None:
+ if not key:
+ return
+ if key not in groups:
+ groups[key] = {
+ "scope_id": key,
+ "label": label,
+ "tokens_in": 0,
+ "tokens_out": 0,
+ "event_count": 0,
+ "by_measurement_kind": defaultdict(int),
+ "by_source_provider": defaultdict(int),
+ }
+ row = groups[key]
+ total = _event_total(e)
+ row["tokens_in"] += e.tokens_in
+ row["tokens_out"] += e.tokens_out
+ row["event_count"] += 1
+ row["by_measurement_kind"][e.measurement_kind] += total
+ row["by_source_provider"][e.source_provider] += total
+
+ by_repo: dict[str, dict[str, Any]] = {}
+ by_workstream: dict[str, dict[str, Any]] = {}
+ by_task: dict[str, dict[str, Any]] = {}
+ by_model: dict[str, dict[str, Any]] = {}
+ by_measurement_kind: dict[str, int] = defaultdict(int)
+ by_source_provider: dict[str, int] = defaultdict(int)
+
+ first_event_at = last_event_at = last_ingested_at = None
+ tokens_in = tokens_out = 0
+ for e in events:
+ total = _event_total(e)
+ tokens_in += e.tokens_in
+ tokens_out += e.tokens_out
+ by_measurement_kind[e.measurement_kind] += total
+ by_source_provider[e.source_provider] += total
+
+ if first_event_at is None or e.created_at < first_event_at:
+ first_event_at = e.created_at
+ if last_event_at is None or e.created_at > last_event_at:
+ last_event_at = e.created_at
+ if last_ingested_at is None or e.ingested_at > last_ingested_at:
+ last_ingested_at = e.ingested_at
+
+ rid = resolve_repo_id(e)
+ repo = repo_map.get(rid) if rid else None
+ add(by_repo, str(rid) if rid else None, repo.slug if repo else None, e)
+
+ ws_id = e.workstream_id or (task_map[e.task_id].workstream_id if e.task_id in task_map else None)
+ ws = ws_map.get(ws_id) if ws_id else None
+ add(by_workstream, str(ws_id) if ws_id else None, ws.title if ws else None, e)
+
+ task = task_map.get(e.task_id) if e.task_id else None
+ add(by_task, str(e.task_id) if e.task_id else None, task.title if task else None, e)
+
+ add(by_model, e.model or "unknown", e.model or "unknown", e)
+
+ def rows(groups: dict[str, dict[str, Any]]) -> list[TokenAggregateRow]:
+ result = []
+ for row in groups.values():
+ result.append(
+ TokenAggregateRow(
+ **{k: (dict(v) if isinstance(v, defaultdict) else v) for k, v in row.items()},
+ tokens_total=row["tokens_in"] + row["tokens_out"],
+ )
+ )
+ return sorted(result, key=lambda item: -item.tokens_total)
+
+ return TokenAggregateSummary(
+ tokens_in=tokens_in,
+ tokens_out=tokens_out,
+ tokens_total=tokens_in + tokens_out,
+ event_count=len(events),
+ first_event_at=first_event_at,
+ last_event_at=last_event_at,
+ last_ingested_at=last_ingested_at,
+ by_repo=rows(by_repo),
+ by_workstream=rows(by_workstream),
+ by_task=rows(by_task),
+ by_model=rows(by_model),
+ by_measurement_kind=dict(by_measurement_kind),
+ by_source_provider=dict(by_source_provider),
+ )
+
+
+@router.get("/quality/", response_model=TokenQualitySummary)
+async def get_token_quality(
+ since: datetime | None = None,
+ until: datetime | None = None,
+ session: AsyncSession = Depends(get_session),
+) -> TokenQualitySummary:
+ result = await session.execute(_filter_query(select(TokenEvent), since=since, until=until))
+ events = list(result.scalars().all())
+
+ by_measurement_kind: dict[str, int] = defaultdict(int)
+ by_source_provider: dict[str, int] = defaultdict(int)
+ source_counts: dict[tuple[str, str, str], int] = defaultdict(int)
+ last_codex_ingested_at = None
+ last_claude_ingested_at = None
+
+ fallback_count = 0
+ unattributed_measured_count = 0
+ missing_provenance_count = 0
+ for e in events:
+ by_measurement_kind[e.measurement_kind] += 1
+ by_source_provider[e.source_provider] += 1
+ if e.source_id:
+ source_counts[(e.measurement_kind, e.source_provider, e.source_id)] += 1
+ if e.source_provider == "task_fallback" or e.note == "heuristic":
+ fallback_count += 1
+ if e.measurement_kind == "measured" and not (e.repo_id or e.workstream_id or e.task_id):
+ unattributed_measured_count += 1
+ if e.measurement_kind == "measured" and not e.source_id:
+ missing_provenance_count += 1
+ if e.source_provider == "codex_session" and (
+ last_codex_ingested_at is None or e.ingested_at > last_codex_ingested_at
+ ):
+ last_codex_ingested_at = e.ingested_at
+ if e.source_provider == "claude_transcript" and (
+ last_claude_ingested_at is None or e.ingested_at > last_claude_ingested_at
+ ):
+ last_claude_ingested_at = e.ingested_at
+
+ duplicate_source_count = sum(1 for count in source_counts.values() if count > 1)
+ return TokenQualitySummary(
+ event_count=len(events),
+ measured_event_count=by_measurement_kind.get("measured", 0),
+ estimated_event_count=by_measurement_kind.get("estimated", 0),
+ allocated_event_count=by_measurement_kind.get("allocated", 0),
+ superseded_event_count=by_measurement_kind.get("superseded", 0),
+ fallback_event_count=fallback_count,
+ unattributed_measured_event_count=unattributed_measured_count,
+ missing_provenance_event_count=missing_provenance_count,
+ duplicate_source_count=duplicate_source_count,
+ last_codex_ingested_at=last_codex_ingested_at,
+ last_claude_ingested_at=last_claude_ingested_at,
+ last_reconciliation_at=None,
+ by_measurement_kind=dict(by_measurement_kind),
+ by_source_provider=dict(by_source_provider),
+ )
+
+
@router.patch("/{event_id}", response_model=TokenEventRead)
async def patch_token_event(
event_id: uuid.UUID,
@@ -175,7 +561,26 @@ async def patch_token_event(
event = await session.get(TokenEvent, event_id)
if event is None:
raise HTTPException(status_code=404, detail="Token event not found")
- for field, value in body.model_dump(exclude_none=True).items():
+ data = body.model_dump(exclude_none=True)
+ if "note" in data or "measurement_kind" in data or "source_provider" in data:
+ merged = {
+ "tokens_in": data.get("tokens_in", event.tokens_in),
+ "tokens_out": data.get("tokens_out", event.tokens_out),
+ "note": data.get("note", event.note),
+ "agent": data.get("agent", event.agent),
+ "ref_id": data.get("ref_id", event.ref_id),
+ "session_id": data.get("session_id", event.session_id),
+ "measurement_kind": data.get("measurement_kind", event.measurement_kind),
+ "source_provider": data.get("source_provider", event.source_provider),
+ "source_id": data.get("source_id", event.source_id),
+ }
+ inferred = _apply_event_defaults({k: v for k, v in merged.items() if v is not None})
+ data.setdefault("measurement_kind", inferred["measurement_kind"])
+ data.setdefault("source_provider", inferred["source_provider"])
+ data.setdefault("confidence", inferred["confidence"])
+ if inferred.get("source_id"):
+ data.setdefault("source_id", inferred["source_id"])
+ for field, value in data.items():
setattr(event, field, value)
await session.commit()
await session.refresh(event)
@@ -203,26 +608,33 @@ async def list_token_events(
model: str | None = None,
agent: str | None = None,
note: str | None = None,
+ measurement_kind: str | None = None,
+ source_provider: str | None = None,
+ since: datetime | None = None,
+ until: datetime | None = None,
+ include_superseded: bool = Query(True),
+ unattributed: bool = False,
+ offset: int = Query(0, ge=0),
limit: int = Query(100, le=1000),
session: AsyncSession = Depends(get_session),
) -> list[TokenEvent]:
- q = select(TokenEvent)
- if task_id:
- q = q.where(TokenEvent.task_id == task_id)
- if workstream_id:
- q = q.where(TokenEvent.workstream_id == workstream_id)
- if repo_id:
- q = q.where(TokenEvent.repo_id == repo_id)
- if ref_type:
- q = q.where(TokenEvent.ref_type == ref_type)
- if ref_id:
- q = q.where(TokenEvent.ref_id == ref_id)
- if model:
- q = q.where(TokenEvent.model == model)
- if agent:
- q = q.where(TokenEvent.agent == agent)
- if note:
- q = q.where(TokenEvent.note == note)
- q = q.order_by(TokenEvent.created_at.desc()).limit(limit)
+ q = _filter_query(
+ select(TokenEvent),
+ task_id=task_id,
+ workstream_id=workstream_id,
+ repo_id=repo_id,
+ ref_type=ref_type,
+ ref_id=ref_id,
+ model=model,
+ agent=agent,
+ note=note,
+ measurement_kind=measurement_kind,
+ source_provider=source_provider,
+ since=since,
+ until=until,
+ include_superseded=include_superseded,
+ unattributed=unattributed,
+ )
+ q = q.order_by(TokenEvent.created_at.desc()).offset(offset).limit(limit)
result = await session.execute(q)
return list(result.scalars().all())
diff --git a/api/schemas/task.py b/api/schemas/task.py
index 048dba5..2c9af32 100644
--- a/api/schemas/task.py
+++ b/api/schemas/task.py
@@ -43,6 +43,7 @@ class TaskUpdate(BaseModel):
# 2. workplan_tokens_in + workplan_tokens_out → prorated across task count (note="workplan")
# 3. neither provided, status=done → heuristic 1000/500 (note="heuristic")
# token_note overrides the auto-assigned note for Tier 1 only (e.g. "userbased")
+ # suppress_token_event lets file/cache sync update status without recording usage.
tokens_in: int | None = None
tokens_out: int | None = None
workplan_tokens_in: int | None = None
@@ -51,6 +52,7 @@ class TaskUpdate(BaseModel):
model: str | None = None
agent: str | None = None
session_id: str | None = None
+ suppress_token_event: bool | None = None
@model_validator(mode="after")
def blocking_reason_required_when_blocked(self) -> Self:
diff --git a/api/schemas/token_event.py b/api/schemas/token_event.py
index 60acbda..7c7ca9d 100644
--- a/api/schemas/token_event.py
+++ b/api/schemas/token_event.py
@@ -1,7 +1,8 @@
import uuid
from datetime import datetime
+from typing import Any
-from pydantic import BaseModel, ConfigDict, computed_field
+from pydantic import BaseModel, ConfigDict, Field, computed_field
class TokenEventCreate(BaseModel):
@@ -16,6 +17,19 @@ class TokenEventCreate(BaseModel):
ref_type: str | None = None
ref_id: str | None = None
note: str | None = None
+ created_at: datetime | None = None
+ measurement_kind: str | None = None
+ source_provider: str | None = None
+ source_id: str | None = None
+ source_path: str | None = None
+ source_created_at: datetime | None = None
+ parser_version: str | None = None
+ confidence: float | None = None
+ cached_input_tokens: int | None = None
+ reasoning_output_tokens: int | None = None
+ raw_total_tokens: int | None = None
+ cost_estimated_usd: float | None = None
+ raw_metadata: dict[str, Any] | None = None
class TokenEventRead(BaseModel):
@@ -33,6 +47,19 @@ class TokenEventRead(BaseModel):
ref_type: str | None = None
ref_id: str | None = None
note: str | None = None
+ measurement_kind: str
+ source_provider: str
+ source_id: str | None = None
+ source_path: str | None = None
+ source_created_at: datetime | None = None
+ ingested_at: datetime
+ parser_version: str | None = None
+ confidence: float
+ cached_input_tokens: int
+ reasoning_output_tokens: int
+ raw_total_tokens: int | None = None
+ cost_estimated_usd: float | None = None
+ raw_metadata: dict[str, Any] = Field(default_factory=dict)
created_at: datetime
@computed_field
@@ -40,6 +67,11 @@ class TokenEventRead(BaseModel):
def tokens_total(self) -> int:
return self.tokens_in + self.tokens_out
+ @computed_field
+ @property
+ def token_evidence_total(self) -> int:
+ return (self.raw_total_tokens or self.tokens_in + self.tokens_out)
+
class TokenSummary(BaseModel):
scope: str
@@ -50,14 +82,36 @@ class TokenSummary(BaseModel):
event_count: int
by_model: dict[str, int]
by_agent: dict[str, int]
+ by_measurement_kind: dict[str, int] = Field(default_factory=dict)
+ by_source_provider: dict[str, int] = Field(default_factory=dict)
class TokenEventPatch(BaseModel):
tokens_in: int | None = None
tokens_out: int | None = None
+ task_id: uuid.UUID | None = None
+ workstream_id: uuid.UUID | None = None
+ repo_id: uuid.UUID | None = None
+ session_id: str | None = None
note: str | None = None
model: str | None = None
agent: str | None = None
+ ref_type: str | None = None
+ ref_id: str | None = None
+ created_at: datetime | None = None
+ measurement_kind: str | None = None
+ source_provider: str | None = None
+ source_id: str | None = None
+ source_path: str | None = None
+ source_created_at: datetime | None = None
+ ingested_at: datetime | None = None
+ parser_version: str | None = None
+ confidence: float | None = None
+ cached_input_tokens: int | None = None
+ reasoning_output_tokens: int | None = None
+ raw_total_tokens: int | None = None
+ cost_estimated_usd: float | None = None
+ raw_metadata: dict[str, Any] | None = None
class RepoTokenSummary(BaseModel):
@@ -69,3 +123,49 @@ class RepoTokenSummary(BaseModel):
event_count: int
by_model: dict[str, int]
by_note: dict[str, int]
+ by_measurement_kind: dict[str, int] = Field(default_factory=dict)
+ by_source_provider: dict[str, int] = Field(default_factory=dict)
+
+
+class TokenAggregateRow(BaseModel):
+ scope_id: str
+ label: str | None = None
+ tokens_in: int
+ tokens_out: int
+ tokens_total: int
+ event_count: int
+ by_measurement_kind: dict[str, int] = Field(default_factory=dict)
+ by_source_provider: dict[str, int] = Field(default_factory=dict)
+
+
+class TokenAggregateSummary(BaseModel):
+ tokens_in: int
+ tokens_out: int
+ tokens_total: int
+ event_count: int
+ first_event_at: datetime | None = None
+ last_event_at: datetime | None = None
+ last_ingested_at: datetime | None = None
+ by_repo: list[TokenAggregateRow] = Field(default_factory=list)
+ by_workstream: list[TokenAggregateRow] = Field(default_factory=list)
+ by_task: list[TokenAggregateRow] = Field(default_factory=list)
+ by_model: list[TokenAggregateRow] = Field(default_factory=list)
+ by_measurement_kind: dict[str, int] = Field(default_factory=dict)
+ by_source_provider: dict[str, int] = Field(default_factory=dict)
+
+
+class TokenQualitySummary(BaseModel):
+ event_count: int
+ measured_event_count: int
+ estimated_event_count: int
+ allocated_event_count: int
+ superseded_event_count: int
+ fallback_event_count: int
+ unattributed_measured_event_count: int
+ missing_provenance_event_count: int
+ duplicate_source_count: int
+ last_codex_ingested_at: datetime | None = None
+ last_claude_ingested_at: datetime | None = None
+ last_reconciliation_at: datetime | None = None
+ by_measurement_kind: dict[str, int] = Field(default_factory=dict)
+ by_source_provider: dict[str, int] = Field(default_factory=dict)
diff --git a/api/services/token_sources/__init__.py b/api/services/token_sources/__init__.py
new file mode 100644
index 0000000..f0e95f5
--- /dev/null
+++ b/api/services/token_sources/__init__.py
@@ -0,0 +1,16 @@
+"""Token source adapters for measured agent usage."""
+
+from api.services.token_sources.base import TokenSourceRecord, parse_iso
+from api.services.token_sources.codex import collect_codex_sessions, iter_codex_session_files, parse_codex_session
+from api.services.token_sources.claude import collect_claude_transcripts, iter_claude_transcript_files, parse_claude_transcript
+
+__all__ = [
+ "TokenSourceRecord",
+ "parse_iso",
+ "collect_codex_sessions",
+ "iter_codex_session_files",
+ "parse_codex_session",
+ "collect_claude_transcripts",
+ "iter_claude_transcript_files",
+ "parse_claude_transcript",
+]
diff --git a/api/services/token_sources/attribution.py b/api/services/token_sources/attribution.py
new file mode 100644
index 0000000..2a9eec4
--- /dev/null
+++ b/api/services/token_sources/attribution.py
@@ -0,0 +1,171 @@
+from __future__ import annotations
+
+import subprocess
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+
+@dataclass(frozen=True)
+class RepoRef:
+ repo_id: str
+ slug: str
+ local_path: str | None = None
+ host_paths: dict[str, Any] | None = None
+ remote_url: str | None = None
+ git_fingerprint: str | None = None
+
+
+@dataclass(frozen=True)
+class RepoMatch:
+ repo_id: str
+ slug: str
+ method: str
+ confidence: float
+
+
+def normalise_cwd(raw: str | None) -> str | None:
+ if not raw:
+ return None
+ value = raw.replace("\\", "/")
+ prefixes = (
+ "//wsl.localhost/Ubuntu-24.04",
+ "//wsl$/Ubuntu-24.04",
+ )
+ for prefix in prefixes:
+ if value.startswith(prefix):
+ return value[len(prefix):] or "/"
+ if len(value) >= 3 and value[1:3] == ":/":
+ drive = value[0].lower()
+ return f"/mnt/{drive}{value[2:]}"
+ return value
+
+
+def normalise_remote_url(raw: str | None) -> str | None:
+ if not raw:
+ return None
+ value = raw.strip()
+ if value.endswith(".git"):
+ value = value[:-4]
+ if value.startswith("git@") and ":" in value:
+ host, path = value[4:].split(":", 1)
+ value = f"ssh://{host}/{path}"
+ return value.lower().rstrip("/")
+
+
+def repo_refs_from_api(repos: list[dict[str, Any]]) -> list[RepoRef]:
+ refs = []
+ for repo in repos:
+ repo_id = repo.get("id")
+ slug = repo.get("slug")
+ if not repo_id or not slug:
+ continue
+ refs.append(
+ RepoRef(
+ repo_id=str(repo_id),
+ slug=str(slug),
+ local_path=repo.get("local_path"),
+ host_paths=repo.get("host_paths") if isinstance(repo.get("host_paths"), dict) else {},
+ remote_url=repo.get("remote_url"),
+ git_fingerprint=repo.get("git_fingerprint"),
+ )
+ )
+ return refs
+
+
+def _git(cwd: str, *args: str) -> str | None:
+ try:
+ result = subprocess.run(
+ ["git", *args],
+ cwd=cwd,
+ check=False,
+ capture_output=True,
+ text=True,
+ timeout=5,
+ )
+ except (OSError, subprocess.SubprocessError):
+ return None
+ if result.returncode != 0:
+ return None
+ value = result.stdout.strip().splitlines()
+ return value[0] if value else None
+
+
+def git_fingerprint_for_path(cwd: str | None) -> str | None:
+ path = normalise_cwd(cwd)
+ if not path or not Path(path).exists():
+ return None
+ root = _git(path, "rev-parse", "--show-toplevel")
+ if not root:
+ return None
+ return _git(root, "rev-list", "--max-parents=0", "HEAD")
+
+
+def git_remote_for_path(cwd: str | None) -> str | None:
+ path = normalise_cwd(cwd)
+ if not path or not Path(path).exists():
+ return None
+ root = _git(path, "rev-parse", "--show-toplevel")
+ if not root:
+ return None
+ return _git(root, "remote", "get-url", "origin")
+
+
+def _repo_paths(repo: RepoRef) -> list[str]:
+ paths = [repo.local_path]
+ if repo.host_paths:
+ paths.extend(str(v) for v in repo.host_paths.values() if v)
+ result = []
+ for raw in paths:
+ path = normalise_cwd(str(raw)) if raw and raw != "(unknown)" else None
+ if path:
+ result.append(path.rstrip("/"))
+ return result
+
+
+def resolve_repo(cwd: str | None, repos: list[RepoRef]) -> RepoMatch | None:
+ path = normalise_cwd(cwd)
+ fingerprint = git_fingerprint_for_path(path)
+ remote = normalise_remote_url(git_remote_for_path(path))
+
+ if fingerprint:
+ candidates = [repo for repo in repos if repo.git_fingerprint == fingerprint]
+ if len(candidates) == 1:
+ repo = candidates[0]
+ return RepoMatch(repo.repo_id, repo.slug, "git_fingerprint", 0.98)
+ if remote:
+ remote_candidates = [
+ repo for repo in candidates
+ if normalise_remote_url(repo.remote_url) == remote
+ ]
+ if len(remote_candidates) == 1:
+ repo = remote_candidates[0]
+ return RepoMatch(repo.repo_id, repo.slug, "git_fingerprint_remote", 0.99)
+
+ if remote:
+ candidates = [repo for repo in repos if normalise_remote_url(repo.remote_url) == remote]
+ if len(candidates) == 1:
+ repo = candidates[0]
+ return RepoMatch(repo.repo_id, repo.slug, "remote_url", 0.90)
+
+ if not path:
+ return None
+
+ path_matches: list[tuple[str, RepoRef]] = []
+ for repo in repos:
+ for repo_path in _repo_paths(repo):
+ if path == repo_path or path.startswith(f"{repo_path}/"):
+ path_matches.append((repo_path, repo))
+ if not path_matches:
+ return None
+ path_matches.sort(key=lambda item: len(item[0]), reverse=True)
+ exact = [item for item in path_matches if path == item[0]]
+ if exact:
+ basename = Path(path).name
+ for _, repo in exact:
+ if repo.slug == basename:
+ return RepoMatch(repo.repo_id, repo.slug, "path_exact_slug", 0.85)
+ repo = exact[0][1]
+ return RepoMatch(repo.repo_id, repo.slug, "path_exact", 0.80)
+ repo = path_matches[0][1]
+ return RepoMatch(repo.repo_id, repo.slug, "path_prefix", 0.75)
diff --git a/api/services/token_sources/base.py b/api/services/token_sources/base.py
new file mode 100644
index 0000000..77a1392
--- /dev/null
+++ b/api/services/token_sources/base.py
@@ -0,0 +1,71 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any
+
+
+def parse_iso(value: str) -> datetime:
+ raw = value.strip()
+ if raw.endswith("Z"):
+ raw = raw[:-1] + "+00:00"
+ if "T" not in raw:
+ raw = f"{raw}T00:00:00+00:00"
+ parsed = datetime.fromisoformat(raw)
+ if parsed.tzinfo is None:
+ parsed = parsed.replace(tzinfo=timezone.utc)
+ return parsed.astimezone(timezone.utc)
+
+
+@dataclass
+class TokenSourceRecord:
+ source_provider: str
+ source_id: str
+ source_path: Path
+ source_created_at: datetime | None
+ session_id: str | None = None
+ cwd: str | None = None
+ model: str | None = None
+ agent: str | None = None
+ tokens_in: int = 0
+ tokens_out: int = 0
+ cached_input_tokens: int = 0
+ reasoning_output_tokens: int = 0
+ raw_total_tokens: int | None = None
+ parser_version: str | None = None
+ confidence: float = 1.0
+ raw_metadata: dict[str, Any] = field(default_factory=dict)
+
+ @property
+ def tokens_total(self) -> int:
+ return self.tokens_in + self.tokens_out
+
+ def to_token_event_payload(self, repo_id: str | None = None) -> dict[str, Any]:
+ raw_total = self.raw_total_tokens
+ if raw_total is None:
+ raw_total = self.tokens_in + self.tokens_out
+ created_at = self.source_created_at.isoformat() if self.source_created_at else None
+ return {
+ "tokens_in": self.tokens_in,
+ "tokens_out": self.tokens_out,
+ "repo_id": repo_id,
+ "session_id": self.session_id,
+ "model": self.model,
+ "agent": self.agent,
+ "ref_type": "session",
+ "ref_id": self.source_id,
+ "note": f"measured:{self.source_provider}",
+ "created_at": created_at,
+ "measurement_kind": "measured",
+ "source_provider": self.source_provider,
+ "source_id": self.source_id,
+ "source_path": str(self.source_path),
+ "source_created_at": created_at,
+ "parser_version": self.parser_version,
+ "confidence": self.confidence,
+ "cached_input_tokens": self.cached_input_tokens,
+ "reasoning_output_tokens": self.reasoning_output_tokens,
+ "raw_total_tokens": raw_total,
+ "raw_metadata": self.raw_metadata,
+ }
diff --git a/api/services/token_sources/claude.py b/api/services/token_sources/claude.py
new file mode 100644
index 0000000..e97acc2
--- /dev/null
+++ b/api/services/token_sources/claude.py
@@ -0,0 +1,120 @@
+from __future__ import annotations
+
+import json
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+
+from api.services.token_sources.base import TokenSourceRecord, parse_iso
+
+PARSER_VERSION = "claude-transcript-v1"
+
+
+def iter_claude_transcript_files(claude_home: Path) -> list[Path]:
+ projects = claude_home / "projects"
+ if not projects.is_dir():
+ return []
+ return sorted(projects.glob("**/*.jsonl"))
+
+
+def _usage_from_entry(entry: dict[str, Any]) -> dict[str, Any]:
+ message = entry.get("message")
+ if isinstance(message, dict) and isinstance(message.get("usage"), dict):
+ return message["usage"]
+ usage = entry.get("usage")
+ return usage if isinstance(usage, dict) else {}
+
+
+def parse_claude_transcript(path: Path, since: datetime) -> TokenSourceRecord | None:
+ session_id = path.stem
+ cwd: str | None = None
+ model: str | None = None
+ first_at: datetime | None = None
+ last_at: datetime | None = None
+ tokens_in = tokens_out = 0
+ cached_input_tokens = 0
+ raw_total_tokens = 0
+ usage_records = 0
+ malformed_lines = 0
+
+ try:
+ handle = path.open("r", encoding="utf-8", errors="ignore")
+ except OSError:
+ return None
+
+ with handle:
+ for line in handle:
+ try:
+ entry: dict[str, Any] = json.loads(line)
+ except json.JSONDecodeError:
+ malformed_lines += 1
+ continue
+
+ ts = entry.get("timestamp") or entry.get("created_at")
+ parsed_ts = parse_iso(ts) if isinstance(ts, str) else None
+ if parsed_ts:
+ first_at = first_at or parsed_ts
+ last_at = parsed_ts
+
+ session_id = str(entry.get("session_id") or entry.get("conversation_id") or session_id)
+ cwd = entry.get("cwd") or entry.get("project_cwd") or cwd
+ model = entry.get("model") or model
+ message = entry.get("message")
+ if isinstance(message, dict):
+ model = message.get("model") or model
+
+ usage = _usage_from_entry(entry)
+ if not usage:
+ continue
+ if parsed_ts is not None and parsed_ts < since:
+ continue
+
+ input_tokens = int(usage.get("input_tokens") or 0)
+ cache_creation = int(usage.get("cache_creation_input_tokens") or 0)
+ cache_read = int(usage.get("cache_read_input_tokens") or 0)
+ output_tokens = int(usage.get("output_tokens") or 0)
+ if input_tokens == 0 and output_tokens == 0 and cache_creation == 0 and cache_read == 0:
+ continue
+ tokens_in += input_tokens
+ tokens_out += output_tokens
+ cached_input_tokens += cache_creation + cache_read
+ raw_total_tokens += input_tokens + cache_creation + cache_read + output_tokens
+ usage_records += 1
+
+ if usage_records == 0 or tokens_in + tokens_out + cached_input_tokens == 0:
+ return None
+
+ return TokenSourceRecord(
+ source_provider="claude_transcript",
+ source_id=f"claude:{session_id}",
+ source_path=path,
+ source_created_at=last_at,
+ session_id=session_id,
+ cwd=cwd,
+ model=model,
+ agent="claude",
+ tokens_in=tokens_in,
+ tokens_out=tokens_out,
+ cached_input_tokens=cached_input_tokens,
+ raw_total_tokens=raw_total_tokens or None,
+ parser_version=PARSER_VERSION,
+ confidence=1.0,
+ raw_metadata={
+ "started_at": first_at.isoformat() if first_at else None,
+ "usage_records": usage_records,
+ "malformed_lines": malformed_lines,
+ "source_file_name": path.name,
+ },
+ )
+
+
+def collect_claude_transcripts(claude_home: Path, since: datetime) -> list[TokenSourceRecord]:
+ by_id: dict[str, TokenSourceRecord] = {}
+ for path in iter_claude_transcript_files(claude_home):
+ parsed = parse_claude_transcript(path, since)
+ if parsed is None:
+ continue
+ current = by_id.get(parsed.source_id)
+ if current is None or parsed.tokens_total > current.tokens_total:
+ by_id[parsed.source_id] = parsed
+ return sorted(by_id.values(), key=lambda item: item.source_created_at or datetime.min.replace(tzinfo=since.tzinfo))
diff --git a/api/services/token_sources/codex.py b/api/services/token_sources/codex.py
new file mode 100644
index 0000000..991f7a2
--- /dev/null
+++ b/api/services/token_sources/codex.py
@@ -0,0 +1,124 @@
+from __future__ import annotations
+
+import json
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+
+from api.services.token_sources.base import TokenSourceRecord, parse_iso
+
+PARSER_VERSION = "codex-desktop-v1"
+
+
+def iter_codex_session_files(codex_home: Path) -> list[Path]:
+ files: list[Path] = []
+ sessions = codex_home / "sessions"
+ archived = codex_home / "archived_sessions"
+ if sessions.is_dir():
+ files.extend(sorted(sessions.glob("*/*/*/*.jsonl")))
+ if archived.is_dir():
+ files.extend(sorted(archived.glob("*.jsonl")))
+ return files
+
+
+def parse_codex_session(path: Path, since: datetime) -> TokenSourceRecord | None:
+ fallback_id = path.stem.removeprefix("rollout-")
+ session_id = fallback_id
+ started_at: datetime | None = None
+ last_at: datetime | None = None
+ cwd: str | None = None
+ model: str | None = None
+ tokens_in = tokens_out = 0
+ cached_input_tokens = reasoning_output_tokens = 0
+ raw_total_tokens = 0
+ usage_records = 0
+ malformed_lines = 0
+
+ try:
+ handle = path.open("r", encoding="utf-8", errors="ignore")
+ except OSError:
+ return None
+
+ with handle:
+ for line in handle:
+ try:
+ entry: dict[str, Any] = json.loads(line)
+ except json.JSONDecodeError:
+ malformed_lines += 1
+ continue
+
+ ts = entry.get("timestamp")
+ parsed_ts = parse_iso(ts) if isinstance(ts, str) else None
+ if parsed_ts:
+ last_at = parsed_ts
+ started_at = started_at or parsed_ts
+
+ payload = entry.get("payload") or {}
+ if entry.get("type") == "session_meta":
+ meta_id = payload.get("id")
+ if meta_id:
+ session_id = str(meta_id)
+ cwd = payload.get("cwd") or cwd
+ meta_ts = payload.get("timestamp")
+ if isinstance(meta_ts, str):
+ started_at = parse_iso(meta_ts)
+ elif entry.get("type") == "turn_context":
+ cwd = payload.get("cwd") or cwd
+ model = payload.get("model") or model
+ elif entry.get("type") == "event_msg" and payload.get("type") == "token_count":
+ if parsed_ts is None or parsed_ts < since:
+ continue
+ info = payload.get("info") or {}
+ last = info.get("last_token_usage") or {}
+ if not isinstance(last, dict):
+ continue
+ input_tokens = int(last.get("input_tokens") or 0)
+ output_tokens = int(last.get("output_tokens") or 0)
+ if input_tokens == 0 and output_tokens == 0:
+ continue
+ tokens_in += input_tokens
+ tokens_out += output_tokens
+ cached_input_tokens += int(last.get("cached_input_tokens") or 0)
+ reasoning_output_tokens += int(last.get("reasoning_output_tokens") or 0)
+ raw_total_tokens += int(last.get("total_tokens") or input_tokens + output_tokens)
+ usage_records += 1
+ last_at = parsed_ts
+
+ if usage_records == 0 or tokens_in + tokens_out == 0:
+ return None
+
+ return TokenSourceRecord(
+ source_provider="codex_session",
+ source_id=f"codex:{session_id}",
+ source_path=path,
+ source_created_at=last_at,
+ session_id=session_id,
+ cwd=cwd,
+ model=model,
+ agent="codex",
+ tokens_in=tokens_in,
+ tokens_out=tokens_out,
+ cached_input_tokens=cached_input_tokens,
+ reasoning_output_tokens=reasoning_output_tokens,
+ raw_total_tokens=raw_total_tokens or None,
+ parser_version=PARSER_VERSION,
+ confidence=1.0,
+ raw_metadata={
+ "started_at": started_at.isoformat() if started_at else None,
+ "usage_records": usage_records,
+ "malformed_lines": malformed_lines,
+ "source_file_name": path.name,
+ },
+ )
+
+
+def collect_codex_sessions(codex_home: Path, since: datetime) -> list[TokenSourceRecord]:
+ by_id: dict[str, TokenSourceRecord] = {}
+ for path in iter_codex_session_files(codex_home):
+ parsed = parse_codex_session(path, since)
+ if parsed is None:
+ continue
+ current = by_id.get(parsed.source_id)
+ if current is None or parsed.tokens_total > current.tokens_total:
+ by_id[parsed.source_id] = parsed
+ return sorted(by_id.values(), key=lambda item: item.source_created_at or datetime.min.replace(tzinfo=since.tzinfo))
diff --git a/dashboard/src/token-cost.md b/dashboard/src/token-cost.md
index 1649e4d..4d9923e 100644
--- a/dashboard/src/token-cost.md
+++ b/dashboard/src/token-cost.md
@@ -9,79 +9,54 @@ const POLL = 60_000;
```
```js
-// Fetch token events, by-repo summary, workstreams, and tasks in parallel
+const evidenceSel = Inputs.radio(
+ ["Measured only", "Active evidence", "All evidence"],
+ {value: "Measured only", label: "Evidence"}
+);
+const sortSel = Inputs.select(
+ ["Tokens Total", "Event Count"],
+ {label: "Sort by"}
+);
+const maxSel = Inputs.select(
+ [10, 20, 50, 100, 500],
+ {value: 20, label: "Show"}
+);
+display(html`
${evidenceSel}${sortSel}${maxSel}
`);
+const evidenceMode = view(evidenceSel);
+const sortOrder = view(sortSel);
+const maxResults = view(maxSel);
+```
+
+```js
+function aggregatePath(mode) {
+ if (mode === "Measured only") return "/token-events/aggregate/?measurement_kind=measured&include_superseded=false";
+ if (mode === "All evidence") return "/token-events/aggregate/?include_superseded=true";
+ return "/token-events/aggregate/?include_superseded=false";
+}
+
const tokenState = (async function*() {
let failures = 0;
while (true) {
- let byRepo = [], events = [], wsMap = {}, taskMap = {}, ok = false;
+ let aggregate = null, quality = null, ok = false;
try {
- const [r1, r2, r3, r4] = await Promise.all([
- apiFetch("/token-events/by-repo/"),
- apiFetch("/token-events/?limit=1000"),
- apiFetch("/workstreams/"),
- apiFetch("/tasks/"),
+ const [r1, r2] = await Promise.all([
+ apiFetch(aggregatePath(evidenceMode)),
+ apiFetch("/token-events/quality/"),
]);
ok = r1.ok && r2.ok;
if (ok) {
- byRepo = await r1.json();
- events = await r2.json();
- }
- if (r3.ok) {
- const wsList = await r3.json();
- for (const w of wsList) wsMap[w.id] = w;
- }
- if (r4.ok) {
- const taskList = await r4.json();
- for (const t of taskList) taskMap[t.id] = t;
+ aggregate = await r1.json();
+ quality = await r2.json();
}
} catch {}
failures = ok ? 0 : failures + 1;
- yield {byRepo, events, wsMap, taskMap, ok, ts: new Date()};
+ yield {aggregate, quality, ok, ts: new Date()};
await waitForVisible(pollDelay({ok, base: POLL, failures}));
}
})();
```
```js
-// Resolve an event's repo_id via the 3-level chain: direct → workstream → task→workstream
-function resolveRepoId(e, wsMap, taskMap) {
- if (e.repo_id) return e.repo_id;
- const wsId = e.workstream_id ?? taskMap[e.task_id]?.workstream_id;
- return wsId ? (wsMap[wsId]?.repo_id ?? null) : null;
-}
-
-function buildSummary(events) {
- const byWs = {}, byModel = {}, byTask = {};
- for (const e of events) {
- const tot = (e.tokens_in || 0) + (e.tokens_out || 0);
- if (e.workstream_id) {
- byWs[e.workstream_id] = byWs[e.workstream_id] || {scope_id: e.workstream_id, tokens_in: 0, tokens_out: 0, event_count: 0};
- byWs[e.workstream_id].tokens_in += e.tokens_in || 0;
- byWs[e.workstream_id].tokens_out += e.tokens_out || 0;
- byWs[e.workstream_id].event_count++;
- }
- const model = e.model || "unknown";
- byModel[model] = (byModel[model] || 0) + tot;
- if (e.task_id) {
- byTask[e.task_id] = byTask[e.task_id] || {task_id: e.task_id, tokens_in: 0, tokens_out: 0, event_count: 0};
- byTask[e.task_id].tokens_in += e.tokens_in || 0;
- byTask[e.task_id].tokens_out += e.tokens_out || 0;
- byTask[e.task_id].event_count++;
- }
- }
- const toRows = obj => Object.values(obj)
- .map(v => ({...v, tokens_total: (v.tokens_in || 0) + (v.tokens_out || 0)}))
- .sort((a, b) => b.tokens_total - a.tokens_total);
- return {
- by_workstream: toRows(byWs),
- by_model: Object.entries(byModel)
- .map(([model, tokens_total]) => ({model, tokens_total}))
- .sort((a, b) => b.tokens_total - a.tokens_total),
- top_tasks: toRows(byTask),
- total_events: events.length,
- };
-}
-
function nameCell(name, fullName) {
const s = String(name ?? fullName ?? "—");
const full = String(fullName ?? name ?? "—");
@@ -92,21 +67,40 @@ function nameCell(name, fullName) {
}
function sortRows(rows, sortField) {
- if (sortField === "Tokens Total") return rows; // already sorted by buildSummary / by-repo API
const s = [...rows];
- if (sortField === "Tokens In") s.sort((a, b) => (b.tokens_in || 0) - (a.tokens_in || 0));
- else if (sortField === "Tokens Out") s.sort((a, b) => (b.tokens_out || 0) - (a.tokens_out || 0));
- else if (sortField === "Event Count") s.sort((a, b) => (b.event_count || 0) - (a.event_count || 0));
- else if (sortField === "Most Recent") s.sort((a, b) => (b._lastAt || 0) - (a._lastAt || 0));
+ if (sortField === "Event Count") s.sort((a, b) => (b.event_count || 0) - (a.event_count || 0));
+ else s.sort((a, b) => (b.tokens_total || 0) - (a.tokens_total || 0));
return s;
}
+
+function dictRows(obj, labelKey) {
+ return Object.entries(obj ?? {})
+ .map(([label, tokens_total]) => ({[labelKey]: label, tokens_total}))
+ .sort((a, b) => b.tokens_total - a.tokens_total);
+}
+
+function metricRows(quality) {
+ if (!quality) return [];
+ return [
+ {metric: "Measured", value: quality.measured_event_count},
+ {metric: "Allocated", value: quality.allocated_event_count},
+ {metric: "Estimated", value: quality.estimated_event_count},
+ {metric: "Superseded", value: quality.superseded_event_count},
+ {metric: "Fallback", value: quality.fallback_event_count},
+ {metric: "Unattributed measured", value: quality.unattributed_measured_event_count},
+ {metric: "Missing provenance", value: quality.missing_provenance_event_count},
+ {metric: "Duplicate sources", value: quality.duplicate_source_count},
+ ];
+}
```
```js
-const byRepo = tokenState.byRepo ?? [];
-const events = tokenState.events ?? [];
-const wsMap = tokenState.wsMap ?? {};
-const taskMap = tokenState.taskMap ?? {};
+const aggregate = tokenState.aggregate ?? {
+ tokens_in: 0, tokens_out: 0, tokens_total: 0, event_count: 0,
+ by_repo: [], by_workstream: [], by_task: [], by_model: [],
+ by_measurement_kind: {}, by_source_provider: {},
+};
+const quality = tokenState.quality ?? null;
const _ok = tokenState.ok ?? false;
const _ts = tokenState.ts;
```
@@ -115,66 +109,37 @@ const _ts = tokenState.ts;
```js
display(html`
- ● ${_ok ? `Live · ${_ts?.toLocaleTimeString()} · ${events.length} events` : "API offline"}
+ ● ${_ok ? `Live · ${_ts?.toLocaleTimeString()} · ${aggregate.event_count.toLocaleString()} events · ${aggregate.tokens_total.toLocaleString()} tokens` : "API offline"}
`);
```
```js
-const repoSel = Inputs.select(
- ["All repos", ...byRepo.map(r => r.repo_slug)],
- {label: "Filter by repo"}
-);
-const sortSel = Inputs.select(
- ["Tokens Total", "Tokens In", "Tokens Out", "Event Count", "Most Recent"],
- {label: "Sort by"}
-);
-const maxSel = Inputs.select(
- [10, 20, 50, 100, 500],
- {value: 20, label: "Show"}
-);
-display(html`${repoSel}${sortSel}${maxSel}
`);
-const repoFilter = view(repoSel);
-const sortOrder = view(sortSel);
-const maxResults = view(maxSel);
-```
-
-```js
-// Build filtered and last-event-annotated row sets
-const selectedRepoId = repoFilter === "All repos"
- ? null
- : (byRepo.find(r => r.repo_slug === repoFilter)?.repo_id ?? null);
-
-const filteredEvents = selectedRepoId
- ? events.filter(e => resolveRepoId(e, wsMap, taskMap) === selectedRepoId)
- : events;
-
-const lastAtByRepo = {}, lastAtByWs = {}, lastAtByTask = {};
-for (const e of filteredEvents) {
- const t = e.created_at ? new Date(e.created_at).getTime() : 0;
- const rid = resolveRepoId(e, wsMap, taskMap);
- if (rid) lastAtByRepo[rid] = Math.max(lastAtByRepo[rid] || 0, t);
- if (e.workstream_id) lastAtByWs[e.workstream_id] = Math.max(lastAtByWs[e.workstream_id] || 0, t);
- if (e.task_id) lastAtByTask[e.task_id] = Math.max(lastAtByTask[e.task_id] || 0, t);
-}
-
-const filteredByRepo = (selectedRepoId
- ? byRepo.filter(r => r.repo_id === selectedRepoId)
- : byRepo
-).map(r => ({...r, _lastAt: lastAtByRepo[r.repo_id] || 0}));
-
-const summary = buildSummary(filteredEvents);
-const wsRowsFull = summary.by_workstream.map(r => ({...r, _lastAt: lastAtByWs[r.scope_id] || 0}));
-const taskRowsFull = summary.top_tasks.map(r => ({...r, _lastAt: lastAtByTask[r.task_id] || 0}));
+display(html`
+
+
Tokens
+
${aggregate.tokens_total.toLocaleString()}
+
+
+
Events
+
${aggregate.event_count.toLocaleString()}
+
+
+
Last Event
+
${aggregate.last_event_at ? new Date(aggregate.last_event_at).toLocaleString() : "—"}
+
+
+
Last Ingested
+
${aggregate.last_ingested_at ? new Date(aggregate.last_ingested_at).toLocaleString() : "—"}
+
+
`);
```
## By Repo
```js
{
- const sorted = sortRows(filteredByRepo, sortOrder);
- const total = sorted.length;
- const rows = sorted.slice(0, maxResults);
-
+ const sorted = sortRows(aggregate.by_repo ?? [], sortOrder);
+ const rows = sorted.slice(0, maxResults);
if (rows.length === 0) {
display(html`No token events with repo association yet.
`);
} else {
@@ -184,40 +149,20 @@ const taskRowsFull = summary.top_tasks.map(r => ({...r, _lastAt: lastAtByTask
width: Math.min(900, width),
x: {label: "Tokens", tickFormat: "~s"},
y: {label: null},
- color: {legend: true, domain: ["tokens_in", "tokens_out"], range: ["#4e79a7","#f28e2b"]},
- marks: [
- Plot.barX(
- rows.flatMap(r => [
- {repo: r.repo_slug, type: "tokens_in", value: r.tokens_in},
- {repo: r.repo_slug, type: "tokens_out", value: r.tokens_out},
- ]),
- {x: "value", y: "repo", fill: "type", tip: true}
- ),
- ],
+ marks: [Plot.barX(rows, {x: "tokens_total", y: "label", fill: "#4e79a7", tip: true})],
}));
-
display(Inputs.table(rows.map((r, i) => ({...r, _ref: i})), {
- columns: ["_ref", "repo_slug", "tokens_in", "tokens_out", "tokens_total", "event_count"],
- header: {
- _ref: "REF",
- repo_slug: "Repo",
- tokens_in: "Tokens In",
- tokens_out: "Tokens Out",
- tokens_total: "Total",
- event_count: "Events",
- },
+ columns: ["_ref", "label", "tokens_in", "tokens_out", "tokens_total", "event_count"],
+ header: {_ref: "REF", label: "Repo", tokens_in: "Tokens In", tokens_out: "Tokens Out", tokens_total: "Total", event_count: "Events"},
format: {
- _ref: (_, i) => refCell(i + 1, "repos", rows[i].repo_slug),
- repo_slug: d => nameCell(d, d),
- tokens_in: d => d.toLocaleString(),
- tokens_out: d => d.toLocaleString(),
+ _ref: (_, i) => refCell(i + 1, "repos", rows[i].label),
+ label: d => nameCell(d, d),
+ tokens_in: d => d.toLocaleString(),
+ tokens_out: d => d.toLocaleString(),
tokens_total: d => d.toLocaleString(),
},
- width: {_ref: 50, repo_slug: 160, tokens_in: 110, tokens_out: 110, tokens_total: 110, event_count: 80},
+ width: {_ref: 50, label: 160, tokens_in: 110, tokens_out: 110, tokens_total: 110, event_count: 80},
}));
-
- if (total > maxResults)
- display(html`Showing ${maxResults} of ${total} repos
`);
}
}
```
@@ -226,38 +171,48 @@ const taskRowsFull = summary.top_tasks.map(r => ({...r, _lastAt: lastAtByTask
```js
{
- const sorted = sortRows(wsRowsFull, sortOrder);
- const total = sorted.length;
- const rows = sorted.slice(0, maxResults);
-
+ const sorted = sortRows(aggregate.by_workstream ?? [], sortOrder);
+ const rows = sorted.slice(0, maxResults);
if (rows.length === 0) {
display(html`No workstream data yet.
`);
} else {
display(Inputs.table(rows.map((r, i) => ({...r, _ref: i})), {
- columns: ["_ref", "scope_id", "tokens_in", "tokens_out", "tokens_total", "event_count"],
- header: {
- _ref: "REF",
- scope_id: "Workstream",
- tokens_in: "Tokens In",
- tokens_out: "Tokens Out",
- tokens_total: "Total",
- event_count: "Events",
- },
+ columns: ["_ref", "label", "tokens_in", "tokens_out", "tokens_total", "event_count"],
+ header: {_ref: "REF", label: "Workstream", tokens_in: "Tokens In", tokens_out: "Tokens Out", tokens_total: "Total", event_count: "Events"},
format: {
- _ref: (_, i) => refCell(i + 1, "workstreams", rows[i].scope_id),
- scope_id: d => {
- const ws = wsMap[d];
- return nameCell(ws?.title ?? ws?.slug, d);
- },
- tokens_in: d => d.toLocaleString(),
- tokens_out: d => d.toLocaleString(),
+ _ref: (_, i) => refCell(i + 1, "workstreams", rows[i].scope_id),
+ label: d => nameCell(d, d),
+ tokens_in: d => d.toLocaleString(),
+ tokens_out: d => d.toLocaleString(),
tokens_total: d => d.toLocaleString(),
},
- width: {_ref: 50, scope_id: 200, tokens_in: 110, tokens_out: 110, tokens_total: 110, event_count: 80},
+ width: {_ref: 50, label: 240, tokens_in: 110, tokens_out: 110, tokens_total: 110, event_count: 80},
}));
+ }
+}
+```
- if (total > maxResults)
- display(html`Showing ${maxResults} of ${total} workstreams
`);
+## By Evidence
+
+```js
+{
+ const kindRows = dictRows(aggregate.by_measurement_kind, "kind");
+ const sourceRows = dictRows(aggregate.by_source_provider, "source");
+ if (kindRows.length === 0 && sourceRows.length === 0) {
+ display(html`No evidence breakdown yet.
`);
+ } else {
+ display(html`
+
${Inputs.table(kindRows, {
+ columns: ["kind", "tokens_total"],
+ header: {kind: "Kind", tokens_total: "Tokens"},
+ format: {tokens_total: d => d.toLocaleString()},
+ })}
+
${Inputs.table(sourceRows, {
+ columns: ["source", "tokens_total"],
+ header: {source: "Source", tokens_total: "Tokens"},
+ format: {tokens_total: d => d.toLocaleString()},
+ })}
+
`);
}
}
```
@@ -265,18 +220,38 @@ const taskRowsFull = summary.top_tasks.map(r => ({...r, _lastAt: lastAtByTask
## By Model
```js
-if (summary.by_model.length === 0) {
- display(html`No model data yet.
`);
+{
+ const rows = (aggregate.by_model ?? []).slice(0, maxResults);
+ if (rows.length === 0) {
+ display(html`No model data yet.
`);
+ } else {
+ display(Plot.plot({
+ title: "Token consumption by model",
+ marginLeft: 200,
+ width: Math.min(700, width),
+ x: {label: "Total tokens", tickFormat: "~s"},
+ marks: [Plot.barX(rows, {x: "tokens_total", y: "label", fill: "#59a14f", tip: true})],
+ }));
+ }
+}
+```
+
+## Data Quality
+
+```js
+if (!quality) {
+ display(html`No quality data yet.
`);
} else {
- display(Plot.plot({
- title: "Token consumption by model",
- marginLeft: 200,
- width: Math.min(700, width),
- x: {label: "Total tokens", tickFormat: "~s"},
- marks: [
- Plot.barX(summary.by_model, {x: "tokens_total", y: "model", fill: "#4e79a7", tip: true}),
- ],
+ display(Inputs.table(metricRows(quality), {
+ columns: ["metric", "value"],
+ header: {metric: "Signal", value: "Count"},
+ format: {value: d => d.toLocaleString()},
}));
+ display(html`
+ Codex: ${quality.last_codex_ingested_at ? new Date(quality.last_codex_ingested_at).toLocaleString() : "—"}
+ · Claude: ${quality.last_claude_ingested_at ? new Date(quality.last_claude_ingested_at).toLocaleString() : "—"}
+ · Reconcile: ${quality.last_reconciliation_at ? new Date(quality.last_reconciliation_at).toLocaleString() : "—"}
+
`);
}
```
@@ -284,31 +259,23 @@ if (summary.by_model.length === 0) {
```js
{
- const sorted = sortRows(taskRowsFull, sortOrder);
- const total = sorted.length;
- const rows = sorted.slice(0, maxResults);
-
+ const sorted = sortRows(aggregate.by_task ?? [], sortOrder);
+ const rows = sorted.slice(0, maxResults);
if (rows.length === 0) {
display(html`No task-level data yet.
`);
} else {
display(Inputs.table(rows.map((r, i) => ({...r, _ref: i})), {
- columns: ["_ref", "task_id", "tokens_in", "tokens_out", "tokens_total"],
- header: {_ref: "REF", task_id: "Task", tokens_in: "In", tokens_out: "Out", tokens_total: "Total"},
+ columns: ["_ref", "label", "tokens_in", "tokens_out", "tokens_total"],
+ header: {_ref: "REF", label: "Task", tokens_in: "In", tokens_out: "Out", tokens_total: "Total"},
format: {
- _ref: (_, i) => refCell(i + 1, "tasks", rows[i].task_id),
- task_id: d => {
- const task = taskMap[d];
- return nameCell(task?.title, d);
- },
- tokens_in: d => d.toLocaleString(),
- tokens_out: d => d.toLocaleString(),
+ _ref: (_, i) => refCell(i + 1, "tasks", rows[i].scope_id),
+ label: d => nameCell(d, d),
+ tokens_in: d => d.toLocaleString(),
+ tokens_out: d => d.toLocaleString(),
tokens_total: d => d.toLocaleString(),
},
- width: {_ref: 50, task_id: 240},
+ width: {_ref: 50, label: 260},
}));
-
- if (total > maxResults)
- display(html`Showing ${maxResults} of ${total} tasks
`);
}
}
```
diff --git a/dashboard/src/token-events/[id].md b/dashboard/src/token-events/[id].md
index 770d3fd..d4c3ae0 100644
--- a/dashboard/src/token-events/[id].md
+++ b/dashboard/src/token-events/[id].md
@@ -23,10 +23,14 @@ if (raw.error) {
display(html`← Token Cost
`);
const FIELD_ORDER = [
- "id","tokens_in","tokens_out","tokens_total",
+ "id","measurement_kind","source_provider","source_id",
+ "tokens_in","tokens_out","tokens_total","token_evidence_total",
+ "cached_input_tokens","reasoning_output_tokens","raw_total_tokens",
"note","model","agent","session_id",
"task_id","workstream_id","repo_id",
- "ref_type","ref_id","created_at",
+ "ref_type","ref_id","source_path","source_created_at",
+ "parser_version","confidence","ingested_at","created_at",
+ "raw_metadata",
];
const rows = FIELD_ORDER.map(k => fieldRow(k, raw[k] ?? null));
diff --git a/docs/multi-user-access-model.md b/docs/multi-user-access-model.md
new file mode 100644
index 0000000..04cb5b5
--- /dev/null
+++ b/docs/multi-user-access-model.md
@@ -0,0 +1,75 @@
+# State Hub Multi-User Access Model
+
+State Hub is local-first coordination infrastructure. It reflects repo-backed
+workplans, progress, and operational state; it is not the authority for source
+control, host access, identity, or runtime secret custody.
+
+## Decision
+
+For the current phase, enforce user access through the systems that already own
+the boundary:
+
+- Gitea controls repository read/write rights.
+- SSH authorized keys control host access.
+- ops-bridge controls whether a remote machine can reach local services.
+- OpenBao controls runtime secret custody after bootstrap.
+
+State Hub API authentication is deferred until there is an active external
+collaborator or an exposed deployment that needs per-user write enforcement.
+Until then, State Hub stays private to local or tunneled operator networks.
+
+## Roles
+
+| Role | State Hub access | Source of authority |
+|------|------------------|---------------------|
+| Primary operator | Full read/write across domains | host access, repo ownership, operator secret custody |
+| Domain collaborator | Read all public coordination state; write through owned domain repo and approved hub actions | Gitea repo permissions plus SSH/tunnel authorization |
+| Observer | Read-only brief/dashboard access where explicitly exposed | tunnel or future API token |
+
+## Current Enforcement Boundary
+
+1. Repo files remain authoritative. A collaborator can change workplans only in
+ repos where Gitea allows them to push.
+2. State Hub indexes files and records progress events, but it should not become
+ the primary identity authority.
+3. Direct dashboard/API access is private by default. Do not publish State Hub
+ unauthenticated on the public internet.
+4. Runtime secrets, service account keys, database credentials, and package
+ tokens should move into OpenBao after the OpenBao bootstrap, unseal, audit,
+ and recovery procedure is complete.
+
+## Future API Auth Trigger
+
+Add API-layer auth when one of these becomes true:
+
+- a second human needs direct State Hub API/dashboard mutation rights
+- State Hub is exposed beyond localhost or a tightly controlled SSH tunnel
+- automation needs per-consumer attribution and revocation independent of repo
+ commits
+- domain-scoped write checks are needed at request time
+
+## Future Token Shape
+
+When the trigger is reached, implement a small token model rather than a full
+identity provider inside State Hub:
+
+- accept NetKingdom IAM Profile OIDC tokens when the identity plane is ready
+- support one emergency local admin token for break-glass operation
+- map claims to `primary_operator`, `domain_collaborator`, or `observer`
+- enforce domain write scopes in mutating endpoints
+- keep repo permissions as the durable source of contribution authority
+
+Candidate scopes:
+
+```text
+statehub:read
+statehub:write
+statehub:domain::write
+statehub:admin
+```
+
+## Operator Rule
+
+Do not store collaborator credentials in the State Hub database. Store secrets
+in OpenBao or the approved bootstrap bundle, and store source permissions in
+Gitea.
diff --git a/docs/onboarding.md b/docs/onboarding.md
new file mode 100644
index 0000000..f39e6b5
--- /dev/null
+++ b/docs/onboarding.md
@@ -0,0 +1,212 @@
+# State Hub Onboarding
+
+This guide turns a new machine into a usable State Hub operator or collaborator
+environment. It covers local credentials, SSH reachability, Gitea access, and
+Claude Code MCP registration.
+
+State Hub remains a coordination read/cache layer. Repo permissions, SSH
+access, and controlled tunnels are the first access boundary. OpenBao is the
+runtime secret authority for platform and workload secrets once its bootstrap
+ceremony is complete.
+
+## Quick Start
+
+Clone the repo, then run the bootstrap script:
+
+```bash
+git clone https://gitea.coulomb.social/coulomb/state-hub.git ~/state-hub
+cd ~/state-hub
+make bootstrap-env
+```
+
+On a clean Ubuntu 24.04 machine, allow package installation explicitly:
+
+```bash
+make bootstrap-env ARGS="--install-missing"
+```
+
+For a remote machine that reaches State Hub through ops-bridge:
+
+```bash
+make bridges
+make register-mcp MCP_URL=http://127.0.0.1:18001/sse API_BASE=http://127.0.0.1:18000
+```
+
+Restart Claude Code after MCP registration.
+
+## Primary Operator: New Machine
+
+1. Install minimal host prerequisites:
+
+ ```bash
+ sudo apt-get update
+ sudo apt-get install -y git curl openssh-client make python3
+ ```
+
+2. Clone `state-hub` and any domain repo you expect to operate:
+
+ ```bash
+ git clone https://gitea.coulomb.social/coulomb/state-hub.git ~/state-hub
+ git clone https://gitea.coulomb.social/coulomb/the-custodian.git ~/the-custodian
+ ```
+
+3. Run the bootstrap:
+
+ ```bash
+ cd ~/state-hub
+ make bootstrap-env ARGS="--install-missing"
+ ```
+
+ The script will:
+
+ - check required tools
+ - configure `git credential.helper`
+ - create `~/.ssh/id_ed25519` when missing
+ - print the public key for managed hosts
+ - create `~/.railiance_gitea.conf` when you provide a Gitea token
+ - register the State Hub MCP server for Claude Code
+ - check State Hub API reachability
+
+4. Authorize the SSH key on managed hosts. If password or existing key access
+ is available, rerun:
+
+ ```bash
+ make bootstrap-env ARGS="--authorize-ssh --skip-gitea --skip-mcp"
+ ```
+
+ Default targets:
+
+ - `tegwick@92.205.62.239` for Railiance01
+ - `tegwick@92.205.130.254` for CoulombCore
+
+5. Start or connect to State Hub:
+
+ ```bash
+ make api
+ make mcp-http
+ ```
+
+ If the hub is remote, use ops-bridge:
+
+ ```bash
+ make bridges
+ ```
+
+6. Restart Claude Code and verify that `state-hub` appears in the MCP server
+ list. In the first session, call `get_state_summary()` when MCP tools are
+ available. If not, use:
+
+ ```bash
+ cat .custodian-brief.md
+ curl -s "http://127.0.0.1:8000/workstreams/?status=active" | python3 -m json.tool
+ ```
+
+## Domain Collaborator: New Person
+
+1. Get a Gitea account with write access to the relevant domain repo.
+2. Clone this repo and the domain repo:
+
+ ```bash
+ git clone https://gitea.coulomb.social/coulomb/state-hub.git ~/state-hub
+ git clone https://gitea.coulomb.social/coulomb/.git ~/
+ ```
+
+3. Run the bootstrap:
+
+ ```bash
+ cd ~/state-hub
+ make bootstrap-env
+ ```
+
+4. Send the printed SSH public key to the operator, or authorize it yourself if
+ you already have host access:
+
+ ```bash
+ ssh-copy-id -i ~/.ssh/id_ed25519.pub tegwick@92.205.62.239
+ ```
+
+5. Bring up the State Hub tunnel when direct local access is unavailable:
+
+ ```bash
+ make bridges
+ make register-mcp MCP_URL=http://127.0.0.1:18001/sse API_BASE=http://127.0.0.1:18000
+ ```
+
+6. Restart Claude Code, open the domain repo, and orient from the repo brief:
+
+ ```bash
+ cat .custodian-brief.md
+ ```
+
+7. Contribute work through repo-backed workplans. A new workplan lives under
+ `workplans/` and follows ADR-001. The hub indexes files; the files remain
+ authoritative.
+
+## Credential Helper Choices
+
+`make bootstrap-env` configures Git credentials only when no global helper is
+already set.
+
+Default behavior:
+
+- use `libsecret` when the helper exists
+- otherwise use `credential.helper=cache --timeout=3600`
+
+For headless hosts where a persistent plaintext helper is acceptable:
+
+```bash
+make bootstrap-env ARGS="--git-helper store --allow-plaintext-store"
+```
+
+Prefer SSH remotes or a keyring-backed helper for normal operator machines.
+
+## Gitea Token File
+
+Some Railiance scripts read `~/.railiance_gitea.conf`:
+
+```bash
+GITEA_URL="http://92.205.130.254:32166"
+GITEA_USER=""
+GITEA_TOKEN=""
+```
+
+Required token capabilities depend on the action:
+
+- repo creation needs `read:user` and repository write/admin scope
+- package publishing needs package write scope
+- inventory reads need repository read scope
+
+The bootstrap script writes this file with mode `0600` and does not print the
+token.
+
+## MCP Registration
+
+Local registration:
+
+```bash
+make register-mcp
+```
+
+Tunnel registration:
+
+```bash
+make register-mcp MCP_URL=http://127.0.0.1:18001/sse API_BASE=http://127.0.0.1:18000
+```
+
+The current State Hub MCP transport is SSE. The old `.mcp.json`/stdio flow is
+legacy; use `make mcp-http` to run the SSE service on `127.0.0.1:8001`.
+
+## Verification Checklist
+
+Run these checks after bootstrap:
+
+```bash
+git config --global --get credential.helper
+test -f ~/.ssh/id_ed25519.pub
+test -f ~/.railiance_gitea.conf
+curl -fsS http://127.0.0.1:8000/state/health || curl -fsS http://127.0.0.1:18000/state/health
+make register-mcp DRY_RUN=1
+```
+
+Then restart Claude Code and confirm that the `state-hub` MCP server is
+available.
diff --git a/docs/token-evidence-model.md b/docs/token-evidence-model.md
new file mode 100644
index 0000000..d631e59
--- /dev/null
+++ b/docs/token-evidence-model.md
@@ -0,0 +1,57 @@
+# Token Evidence Model
+
+State Hub token events distinguish source-backed measurements from inferred
+operational signals. Dashboards and reports should use structured fields for
+quality and provenance; `note` remains human context only.
+
+## Measurement Kinds
+
+| Kind | Meaning | Default confidence |
+| --- | --- | --- |
+| `measured` | Parsed from a source that reports usage metadata, such as Codex session logs or Claude transcript usage blocks. | `1.0` |
+| `allocated` | A share of a larger known total, assigned to a task/workstream by a documented allocation method. | `0.70` |
+| `estimated` | A fallback or operator-entered estimate without direct source evidence. | `0.35` |
+| `superseded` | Historical rows retained for audit but excluded from active totals. | `0.0` |
+
+## Source Providers
+
+| Provider | Source |
+| --- | --- |
+| `codex_session` | Codex Desktop `.codex/sessions/**` and `.codex/archived_sessions/**` JSONL token_count events. |
+| `claude_transcript` | Claude Code `.claude/projects/**/*.jsonl` usage metadata. Transcript text is never stored. |
+| `llm_connect` | Future llm-connect usage metadata. |
+| `manual` | Explicit operator/API input. |
+| `task_fallback` | Fixed task-completion fallback rows created when no source data is available. |
+
+## Provenance Fields
+
+Each source-backed row should include:
+
+- `source_provider`, `source_id`, `source_path`, `source_created_at`
+- `parser_version`, `ingested_at`, `confidence`
+- `cached_input_tokens`, `reasoning_output_tokens`, `raw_total_tokens`
+- `raw_metadata` with parser and attribution metadata, never transcript content
+
+`tokens_in + tokens_out` remains the default active total. Cached input and
+reasoning output are preserved separately so dashboards can show both default
+and provider-style totals without rewriting history.
+
+## Idempotency
+
+Measured sources must be written with a stable `source_id`. State Hub enforces
+one row for each `(measurement_kind, source_provider, source_id)` tuple and
+`POST /token-events/upsert` updates a growing live session rather than creating
+duplicates.
+
+## Migration Playbook
+
+1. Run the token-event provenance migration.
+2. Run `python3 scripts/token_reconcile.py --since 2026-05-19` and inspect the
+ dry-run report.
+3. Run `python3 scripts/token_reconcile.py --since 2026-05-19 --apply` to
+ upsert measured Codex/Claude source rows.
+4. Run the same command with `--zero-superseded-fallbacks` only after measured
+ source rows cover the affected window.
+5. Check `/token-events/quality/` or the Token Cost dashboard for fallback,
+ missing-provenance, duplicate-source, and unattributed measured signals.
+6. Keep historical fallback rows as `superseded`; do not delete them.
diff --git a/migrations/versions/v9q0r1s2t3u4_token_event_provenance.py b/migrations/versions/v9q0r1s2t3u4_token_event_provenance.py
new file mode 100644
index 0000000..b604bb4
--- /dev/null
+++ b/migrations/versions/v9q0r1s2t3u4_token_event_provenance.py
@@ -0,0 +1,128 @@
+"""add token event provenance fields
+
+Revision ID: v9q0r1s2t3u4
+Revises: u8p9q0r1s2t3
+Create Date: 2026-05-23
+"""
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+revision = "v9q0r1s2t3u4"
+down_revision = "u8p9q0r1s2t3"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+ op.add_column(
+ "token_events",
+ sa.Column("measurement_kind", sa.Text(), nullable=False, server_default="estimated"),
+ )
+ op.add_column(
+ "token_events",
+ sa.Column("source_provider", sa.Text(), nullable=False, server_default="manual"),
+ )
+ op.add_column("token_events", sa.Column("source_id", sa.Text(), nullable=True))
+ op.add_column("token_events", sa.Column("source_path", sa.Text(), nullable=True))
+ op.add_column(
+ "token_events",
+ sa.Column("source_created_at", sa.TIMESTAMP(timezone=True), nullable=True),
+ )
+ op.add_column(
+ "token_events",
+ sa.Column("ingested_at", sa.TIMESTAMP(timezone=True), nullable=False, server_default=sa.text("now()")),
+ )
+ op.add_column("token_events", sa.Column("parser_version", sa.Text(), nullable=True))
+ op.add_column(
+ "token_events",
+ sa.Column("confidence", sa.Float(), nullable=False, server_default="0.35"),
+ )
+ op.add_column(
+ "token_events",
+ sa.Column("cached_input_tokens", sa.Integer(), nullable=False, server_default="0"),
+ )
+ op.add_column(
+ "token_events",
+ sa.Column("reasoning_output_tokens", sa.Integer(), nullable=False, server_default="0"),
+ )
+ op.add_column("token_events", sa.Column("raw_total_tokens", sa.Integer(), nullable=True))
+ op.add_column("token_events", sa.Column("cost_estimated_usd", sa.Float(), nullable=True))
+ op.add_column(
+ "token_events",
+ sa.Column(
+ "raw_metadata",
+ postgresql.JSONB(astext_type=sa.Text()),
+ nullable=False,
+ server_default=sa.text("'{}'::jsonb"),
+ ),
+ )
+
+ op.execute(
+ """
+ UPDATE token_events
+ SET
+ measurement_kind = CASE
+ WHEN note = 'heuristic_superseded_by_codex_backfill' THEN 'superseded'
+ WHEN note = 'workplan' THEN 'allocated'
+ WHEN note = 'heuristic' THEN 'estimated'
+ WHEN note = 'measured' OR note LIKE 'backfill:codex-session%' THEN 'measured'
+ ELSE measurement_kind
+ END,
+ source_provider = CASE
+ WHEN note = 'heuristic' THEN 'task_fallback'
+ WHEN note LIKE 'backfill:codex-session%' OR ref_id LIKE 'codex:%' THEN 'codex_session'
+ WHEN note = 'measured' AND agent ILIKE '%claude%' THEN 'claude_transcript'
+ ELSE source_provider
+ END,
+ source_id = CASE
+ WHEN source_id IS NULL AND (note LIKE 'backfill:codex-session%' OR ref_id LIKE 'codex:%')
+ THEN ref_id
+ ELSE source_id
+ END,
+ raw_total_tokens = CASE
+ WHEN raw_total_tokens IS NULL THEN tokens_in + tokens_out
+ ELSE raw_total_tokens
+ END,
+ confidence = CASE
+ WHEN note = 'heuristic_superseded_by_codex_backfill' THEN 0.0
+ WHEN note = 'heuristic' THEN 0.35
+ WHEN note = 'workplan' THEN 0.70
+ WHEN note = 'measured' OR note LIKE 'backfill:codex-session%' THEN 1.0
+ ELSE confidence
+ END
+ """
+ )
+
+ op.create_index("ix_token_events_measurement_kind", "token_events", ["measurement_kind"])
+ op.create_index("ix_token_events_source_provider", "token_events", ["source_provider"])
+ op.create_index("ix_token_events_source_id", "token_events", ["source_id"])
+ op.create_index("ix_token_events_source_created_at", "token_events", ["source_created_at"])
+ op.create_index("ix_token_events_ingested_at", "token_events", ["ingested_at"])
+ op.create_unique_constraint(
+ "uq_token_events_source_identity",
+ "token_events",
+ ["measurement_kind", "source_provider", "source_id"],
+ )
+
+
+def downgrade() -> None:
+ op.drop_constraint("uq_token_events_source_identity", "token_events", type_="unique")
+ op.drop_index("ix_token_events_ingested_at", table_name="token_events")
+ op.drop_index("ix_token_events_source_created_at", table_name="token_events")
+ op.drop_index("ix_token_events_source_id", table_name="token_events")
+ op.drop_index("ix_token_events_source_provider", table_name="token_events")
+ op.drop_index("ix_token_events_measurement_kind", table_name="token_events")
+ op.drop_column("token_events", "raw_metadata")
+ op.drop_column("token_events", "cost_estimated_usd")
+ op.drop_column("token_events", "raw_total_tokens")
+ op.drop_column("token_events", "reasoning_output_tokens")
+ op.drop_column("token_events", "cached_input_tokens")
+ op.drop_column("token_events", "confidence")
+ op.drop_column("token_events", "parser_version")
+ op.drop_column("token_events", "ingested_at")
+ op.drop_column("token_events", "source_created_at")
+ op.drop_column("token_events", "source_path")
+ op.drop_column("token_events", "source_id")
+ op.drop_column("token_events", "source_provider")
+ op.drop_column("token_events", "measurement_kind")
diff --git a/migrations/versions/w0r1s2t3u4v5_token_event_legacy_source_ids.py b/migrations/versions/w0r1s2t3u4v5_token_event_legacy_source_ids.py
new file mode 100644
index 0000000..a9d0227
--- /dev/null
+++ b/migrations/versions/w0r1s2t3u4v5_token_event_legacy_source_ids.py
@@ -0,0 +1,33 @@
+"""assign legacy source ids to measured token events
+
+Revision ID: w0r1s2t3u4v5
+Revises: v9q0r1s2t3u4
+Create Date: 2026-05-23
+"""
+from alembic import op
+
+revision = "w0r1s2t3u4v5"
+down_revision = "v9q0r1s2t3u4"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+ op.execute(
+ """
+ UPDATE token_events
+ SET source_id = source_provider || ':legacy:' || id::text
+ WHERE measurement_kind = 'measured'
+ AND source_id IS NULL
+ """
+ )
+
+
+def downgrade() -> None:
+ op.execute(
+ """
+ UPDATE token_events
+ SET source_id = NULL
+ WHERE source_id = source_provider || ':legacy:' || id::text
+ """
+ )
diff --git a/scripts/backfill_codex_token_events.py b/scripts/backfill_codex_token_events.py
new file mode 100644
index 0000000..0dc306e
--- /dev/null
+++ b/scripts/backfill_codex_token_events.py
@@ -0,0 +1,192 @@
+#!/usr/bin/env python3
+"""Backfill State Hub token events from local Codex session logs.
+
+The parser lives in ``api.services.token_sources.codex`` so this CLI only
+handles operator flags, repo attribution, idempotent writes, and fallback
+cleanup.
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import sys
+import urllib.parse
+import urllib.request
+from pathlib import Path
+from typing import Any
+
+ROOT = Path(__file__).resolve().parent.parent
+if str(ROOT) not in sys.path:
+ sys.path.insert(0, str(ROOT))
+
+from api.services.token_sources import collect_codex_sessions, parse_iso # noqa: E402
+from api.services.token_sources.attribution import repo_refs_from_api, resolve_repo # noqa: E402
+
+DEFAULT_API = os.environ.get("STATE_HUB_API", "http://127.0.0.1:8000")
+BACKFILL_NOTE = "backfill:codex-session"
+SUPERSEDED_HEURISTIC_NOTE = "heuristic_superseded_by_codex_backfill"
+
+
+def http_json(api_base: str, method: str, path: str, body: dict[str, Any] | None = None) -> Any:
+ url = f"{api_base.rstrip('/')}/{path.lstrip('/')}"
+ data = None
+ headers = {"Content-Type": "application/json"}
+ if body is not None:
+ data = json.dumps(body).encode("utf-8")
+ req = urllib.request.Request(url, data=data, headers=headers, method=method)
+ with urllib.request.urlopen(req, timeout=30) as resp:
+ return json.loads(resp.read() or b"null")
+
+
+def find_codex_home(explicit: str | None) -> Path:
+ candidates: list[Path] = []
+ if explicit:
+ candidates.append(Path(explicit))
+ env_home = os.environ.get("CODEX_HOME")
+ if env_home:
+ candidates.append(Path(env_home))
+ candidates.extend(
+ [
+ Path.home() / ".codex",
+ Path("/mnt/c/Users/bernd.worsch/.codex"),
+ ]
+ )
+ for candidate in candidates:
+ if candidate.is_dir():
+ return candidate
+ raise SystemExit("Could not find Codex home; pass --codex-home")
+
+
+def list_events(api_base: str, params: dict[str, Any]) -> list[dict[str, Any]]:
+ events: list[dict[str, Any]] = []
+ offset = 0
+ while True:
+ page_params = {**params, "limit": 1000, "offset": offset}
+ encoded = urllib.parse.urlencode(page_params)
+ page = http_json(api_base, "GET", f"/token-events/?{encoded}")
+ if not isinstance(page, list) or not page:
+ break
+ events.extend(page)
+ if len(page) < 1000:
+ break
+ offset += 1000
+ return events
+
+
+def existing_codex_events(api_base: str) -> dict[str, dict[str, Any]]:
+ events = list_events(
+ api_base,
+ {"source_provider": "codex_session", "include_superseded": "true"},
+ )
+ by_source: dict[str, dict[str, Any]] = {}
+ for event in events:
+ source_id = event.get("source_id") or event.get("ref_id")
+ if isinstance(source_id, str):
+ by_source[source_id] = event
+ return by_source
+
+
+def fetch_heuristics(api_base: str, since: str) -> list[dict[str, Any]]:
+ return list_events(
+ api_base,
+ {
+ "source_provider": "task_fallback",
+ "note": "heuristic",
+ "since": since,
+ "include_superseded": "false",
+ },
+ )
+
+
+def patch_superseded_heuristic(api_base: str, event_id: str) -> None:
+ http_json(
+ api_base,
+ "PATCH",
+ f"/token-events/{event_id}",
+ {
+ "tokens_in": 0,
+ "tokens_out": 0,
+ "note": SUPERSEDED_HEURISTIC_NOTE,
+ "measurement_kind": "superseded",
+ "source_provider": "task_fallback",
+ "confidence": 0.0,
+ "raw_total_tokens": 0,
+ },
+ )
+
+
+def main() -> int:
+ parser = argparse.ArgumentParser(description=__doc__)
+ parser.add_argument("--since", default="2026-05-19", help="UTC date/time to backfill from")
+ parser.add_argument("--api-base", default=DEFAULT_API)
+ parser.add_argument("--codex-home")
+ parser.add_argument("--apply", action="store_true", help="write backfill events")
+ parser.add_argument(
+ "--zero-heuristics",
+ action="store_true",
+ help="set post-since heuristic task fallback events to zero after backfill",
+ )
+ args = parser.parse_args()
+
+ since = parse_iso(args.since)
+ since_param = since.isoformat()
+ codex_home = find_codex_home(args.codex_home)
+ repo_refs = repo_refs_from_api(http_json(args.api_base, "GET", "/repos/"))
+ existing = existing_codex_events(args.api_base)
+ sessions = collect_codex_sessions(codex_home, since)
+
+ planned: list[tuple[str, Any, str | None, str | None]] = []
+ by_repo: dict[str, list[int]] = {}
+ for session in sessions:
+ event = existing.get(session.source_id)
+ existing_total = (event.get("tokens_in", 0) + event.get("tokens_out", 0)) if event else 0
+ action = "create" if event is None else ("update" if session.tokens_total > existing_total else "skip")
+ match = resolve_repo(session.cwd, repo_refs)
+ repo_id = match.repo_id if match else None
+ repo_slug = match.slug if match else None
+ if action != "skip":
+ planned.append((action, session, repo_id, repo_slug))
+ label = repo_slug or "(unattributed)"
+ totals = by_repo.setdefault(label, [0, 0, 0])
+ totals[0] += 1
+ totals[1] += session.tokens_in
+ totals[2] += session.tokens_out
+
+ heuristics = fetch_heuristics(args.api_base, since_param) if args.zero_heuristics else []
+
+ print(f"codex_home: {codex_home}")
+ print(f"since: {since.isoformat()}")
+ print(f"sessions found: {len(sessions)}")
+ print(f"backfill events to create: {sum(1 for action, *_ in planned if action == 'create')}")
+ print(f"backfill events to update: {sum(1 for action, *_ in planned if action == 'update')}")
+ for repo_slug, (count, tokens_in, tokens_out) in sorted(by_repo.items()):
+ print(f" {repo_slug}: {count} sessions, {tokens_in + tokens_out:,} tokens")
+ if args.zero_heuristics:
+ total = sum((e.get("tokens_in") or 0) + (e.get("tokens_out") or 0) for e in heuristics)
+ print(f"heuristic events to zero: {len(heuristics)} ({total:,} tokens)")
+
+ if not args.apply:
+ print("dry run only; pass --apply to write changes")
+ return 0
+
+ for _action, session, repo_id, repo_slug in planned:
+ payload = session.to_token_event_payload(repo_id=repo_id)
+ payload["note"] = BACKFILL_NOTE
+ payload["raw_metadata"] = {
+ **payload.get("raw_metadata", {}),
+ "repo_slug": repo_slug,
+ "attribution_method": resolve_repo(session.cwd, repo_refs).method if resolve_repo(session.cwd, repo_refs) else None,
+ }
+ http_json(args.api_base, "POST", "/token-events/upsert", payload)
+ for event in heuristics:
+ patch_superseded_heuristic(args.api_base, event["id"])
+
+ print(f"upserted {len(planned)} backfill events")
+ if args.zero_heuristics:
+ print(f"zeroed {len(heuristics)} heuristic events")
+ return 0
+
+
+if __name__ == "__main__":
+ raise SystemExit(main())
diff --git a/scripts/bootstrap-env.sh b/scripts/bootstrap-env.sh
new file mode 100755
index 0000000..c9d3b2a
--- /dev/null
+++ b/scripts/bootstrap-env.sh
@@ -0,0 +1,369 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+STATE_HUB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+GITEA_CONF="${GITEA_CONF:-$HOME/.railiance_gitea.conf}"
+GITEA_URL="${GITEA_URL:-http://92.205.130.254:32166}"
+GITEA_USER="${GITEA_USER:-}"
+GITEA_TOKEN="${GITEA_TOKEN:-}"
+GIT_HELPER="${GIT_HELPER:-auto}"
+INSTALL_MISSING=0
+NON_INTERACTIVE=0
+DRY_RUN=0
+AUTHORIZE_SSH=0
+ALLOW_PLAINTEXT_STORE=0
+SKIP_GITEA=0
+SKIP_MCP=0
+SSH_KEY="${SSH_KEY:-$HOME/.ssh/id_ed25519}"
+SSH_TARGETS=(
+ "tegwick@92.205.62.239"
+ "tegwick@92.205.130.254"
+)
+
+usage() {
+ cat <<'USAGE'
+Usage: scripts/bootstrap-env.sh [options]
+
+Idempotently prepares a State Hub operator or collaborator environment.
+
+Options:
+ --install-missing Install missing apt packages when possible.
+ --non-interactive Do not prompt; warn instead of asking for secrets.
+ --dry-run Show intended actions without changing local config.
+ --git-helper MODE auto, libsecret, cache, or store. Default: auto.
+ --allow-plaintext-store Allow git credential.helper=store in auto mode.
+ --authorize-ssh Run ssh-copy-id for configured SSH targets.
+ --ssh-target USER@HOST Add an SSH authorization target. May repeat.
+ --gitea-url URL Gitea base URL for ~/.railiance_gitea.conf.
+ --gitea-user USER Gitea user for ~/.railiance_gitea.conf.
+ --gitea-token TOKEN Gitea token; otherwise prompted when interactive.
+ --skip-gitea Do not create or update ~/.railiance_gitea.conf.
+ --skip-mcp Do not run make register-mcp.
+ -h, --help Show this help.
+USAGE
+}
+
+ok() { printf '[OK] %s\n' "$*"; }
+warn() { printf '[WARN] %s\n' "$*"; }
+err() { printf '[ERR] %s\n' "$*" >&2; }
+step() { printf '\n==> %s\n' "$*"; }
+
+run() {
+ if [ "$DRY_RUN" -eq 1 ]; then
+ printf 'DRY-RUN: %s\n' "$*"
+ else
+ "$@"
+ fi
+}
+
+need_arg() {
+ if [ -z "${2:-}" ]; then
+ err "$1 requires a value"
+ exit 2
+ fi
+}
+
+while [ "$#" -gt 0 ]; do
+ case "$1" in
+ --install-missing)
+ INSTALL_MISSING=1
+ shift
+ ;;
+ --non-interactive)
+ NON_INTERACTIVE=1
+ shift
+ ;;
+ --dry-run)
+ DRY_RUN=1
+ shift
+ ;;
+ --git-helper)
+ need_arg "$1" "${2:-}"
+ GIT_HELPER="$2"
+ shift 2
+ ;;
+ --allow-plaintext-store)
+ ALLOW_PLAINTEXT_STORE=1
+ shift
+ ;;
+ --authorize-ssh)
+ AUTHORIZE_SSH=1
+ shift
+ ;;
+ --ssh-target)
+ need_arg "$1" "${2:-}"
+ SSH_TARGETS+=("$2")
+ shift 2
+ ;;
+ --gitea-url)
+ need_arg "$1" "${2:-}"
+ GITEA_URL="$2"
+ shift 2
+ ;;
+ --gitea-user)
+ need_arg "$1" "${2:-}"
+ GITEA_USER="$2"
+ shift 2
+ ;;
+ --gitea-token)
+ need_arg "$1" "${2:-}"
+ GITEA_TOKEN="$2"
+ shift 2
+ ;;
+ --skip-gitea)
+ SKIP_GITEA=1
+ shift
+ ;;
+ --skip-mcp)
+ SKIP_MCP=1
+ shift
+ ;;
+ -h|--help)
+ usage
+ exit 0
+ ;;
+ *)
+ err "unknown argument: $1"
+ usage >&2
+ exit 2
+ ;;
+ esac
+done
+
+case "$GIT_HELPER" in
+ auto|libsecret|cache|store) ;;
+ *)
+ err "--git-helper must be auto, libsecret, cache, or store"
+ exit 2
+ ;;
+esac
+
+apt_install() {
+ local packages=("$@")
+ if [ "$INSTALL_MISSING" -ne 1 ]; then
+ warn "Missing packages: ${packages[*]}"
+ warn "Rerun with --install-missing or install them manually."
+ return
+ fi
+ if ! command -v sudo >/dev/null 2>&1; then
+ warn "sudo is not available; cannot install: ${packages[*]}"
+ return
+ fi
+ run sudo apt-get update
+ run sudo apt-get install -y "${packages[@]}"
+}
+
+check_commands() {
+ step "Checking prerequisites"
+ local missing=()
+ local commands=(git curl ssh-keygen ssh-copy-id python3 make)
+ local optional=(sops age helm kubectl uv claude)
+
+ for cmd in "${commands[@]}"; do
+ if command -v "$cmd" >/dev/null 2>&1; then
+ ok "$cmd found"
+ else
+ missing+=("$cmd")
+ warn "$cmd missing"
+ fi
+ done
+
+ for cmd in "${optional[@]}"; do
+ if command -v "$cmd" >/dev/null 2>&1; then
+ ok "$cmd found"
+ else
+ warn "$cmd missing"
+ fi
+ done
+
+ if [ "${#missing[@]}" -gt 0 ]; then
+ apt_install "${missing[@]}"
+ fi
+}
+
+libsecret_helper_path() {
+ local candidates=(
+ "/usr/share/doc/git/contrib/credential/libsecret/git-credential-libsecret"
+ "/usr/lib/git-core/git-credential-libsecret"
+ "/usr/libexec/git-core/git-credential-libsecret"
+ )
+ local candidate
+ for candidate in "${candidates[@]}"; do
+ if [ -x "$candidate" ]; then
+ printf '%s\n' "$candidate"
+ return 0
+ fi
+ done
+ return 1
+}
+
+build_libsecret_helper() {
+ local source_dir="/usr/share/doc/git/contrib/credential/libsecret"
+ if [ ! -d "$source_dir" ]; then
+ apt_install libsecret-1-0 libsecret-1-dev make gcc
+ fi
+ if [ -d "$source_dir" ]; then
+ run sudo make -C "$source_dir"
+ fi
+}
+
+configure_git_helper() {
+ step "Configuring Git credential helper"
+
+ local current
+ current="$(git config --global --get credential.helper || true)"
+ if [ -n "$current" ]; then
+ ok "credential.helper already set: $current"
+ return
+ fi
+
+ local helper="$GIT_HELPER"
+ if [ "$helper" = "auto" ]; then
+ if libsecret_helper_path >/dev/null 2>&1; then
+ helper="libsecret"
+ elif [ "$ALLOW_PLAINTEXT_STORE" -eq 1 ]; then
+ helper="store"
+ else
+ helper="cache"
+ fi
+ fi
+
+ case "$helper" in
+ libsecret)
+ local path
+ path="$(libsecret_helper_path || true)"
+ if [ -z "$path" ]; then
+ build_libsecret_helper
+ path="$(libsecret_helper_path || true)"
+ fi
+ if [ -z "$path" ]; then
+ warn "libsecret helper is not available; using cache helper for this machine."
+ run git config --global credential.helper "cache --timeout=3600"
+ else
+ run git config --global credential.helper "$path"
+ fi
+ ;;
+ cache)
+ run git config --global credential.helper "cache --timeout=3600"
+ ;;
+ store)
+ if [ "$ALLOW_PLAINTEXT_STORE" -ne 1 ]; then
+ err "credential.helper=store writes plaintext credentials."
+ err "Rerun with --allow-plaintext-store if that is intended for this host."
+ exit 1
+ fi
+ run git config --global credential.helper store
+ ;;
+ esac
+
+ ok "credential.helper configured"
+}
+
+setup_ssh_key() {
+ step "Checking SSH key"
+ mkdir -p "$HOME/.ssh"
+ chmod 700 "$HOME/.ssh"
+
+ if [ -f "$SSH_KEY" ]; then
+ ok "SSH key exists: $SSH_KEY"
+ else
+ run ssh-keygen -t ed25519 -f "$SSH_KEY" -N "" -C "$USER@$(hostname)-state-hub"
+ ok "SSH key generated: $SSH_KEY"
+ fi
+
+ if [ -f "${SSH_KEY}.pub" ]; then
+ printf '\nPublic key to authorize on managed hosts:\n\n'
+ sed 's/^/ /' "${SSH_KEY}.pub"
+ printf '\n'
+ fi
+
+ if [ "$AUTHORIZE_SSH" -eq 1 ]; then
+ local target
+ for target in "${SSH_TARGETS[@]}"; do
+ run ssh-copy-id -i "${SSH_KEY}.pub" "$target"
+ done
+ else
+ warn "SSH authorization not attempted. Use --authorize-ssh after confirming host access."
+ fi
+}
+
+write_gitea_conf() {
+ step "Checking Gitea config"
+ if [ "$SKIP_GITEA" -eq 1 ]; then
+ warn "Skipping Gitea config by request."
+ return
+ fi
+
+ if [ -f "$GITEA_CONF" ]; then
+ chmod 600 "$GITEA_CONF"
+ ok "$GITEA_CONF already exists"
+ return
+ fi
+
+ if [ -z "$GITEA_USER" ] && [ "$NON_INTERACTIVE" -eq 0 ]; then
+ read -r -p "Gitea username: " GITEA_USER
+ fi
+
+ if [ -z "$GITEA_TOKEN" ] && [ "$NON_INTERACTIVE" -eq 0 ]; then
+ read -r -s -p "Gitea token (requires read:user and repository write scopes): " GITEA_TOKEN
+ printf '\n'
+ fi
+
+ if [ -z "$GITEA_USER" ] || [ -z "$GITEA_TOKEN" ]; then
+ warn "Gitea config not written. Set GITEA_USER/GITEA_TOKEN or rerun interactively."
+ return
+ fi
+
+ if [ "$DRY_RUN" -eq 1 ]; then
+ printf 'DRY-RUN: would write %s with GITEA_URL and GITEA_USER; token hidden\n' "$GITEA_CONF"
+ return
+ fi
+
+ umask 077
+ {
+ printf 'GITEA_URL="%s"\n' "$GITEA_URL"
+ printf 'GITEA_USER="%s"\n' "$GITEA_USER"
+ printf 'GITEA_TOKEN="%s"\n' "$GITEA_TOKEN"
+ } >"$GITEA_CONF"
+ chmod 600 "$GITEA_CONF"
+ ok "Wrote $GITEA_CONF"
+}
+
+register_mcp() {
+ step "Registering State Hub MCP"
+ if [ "$SKIP_MCP" -eq 1 ]; then
+ warn "Skipping MCP registration by request."
+ return
+ fi
+ if [ "$DRY_RUN" -eq 1 ]; then
+ run make -C "$STATE_HUB_DIR" register-mcp DRY_RUN=1
+ else
+ make -C "$STATE_HUB_DIR" register-mcp
+ fi
+}
+
+health_check() {
+ step "Checking State Hub reachability"
+ if curl -fsS --max-time 2 "http://127.0.0.1:8000/state/health" >/dev/null 2>&1; then
+ ok "State Hub API reachable at http://127.0.0.1:8000"
+ elif curl -fsS --max-time 2 "http://127.0.0.1:18000/state/health" >/dev/null 2>&1; then
+ ok "State Hub API reachable through tunnel at http://127.0.0.1:18000"
+ else
+ warn "State Hub API is not reachable locally or through the default tunnel."
+ warn "Start it with 'make api' or run 'make bridges' if this machine uses ops-bridge."
+ fi
+}
+
+main() {
+ step "State Hub environment bootstrap"
+ printf 'Repository: %s\n' "$STATE_HUB_DIR"
+ check_commands
+ configure_git_helper
+ setup_ssh_key
+ write_gitea_conf
+ register_mcp
+ health_check
+ ok "Bootstrap checks complete."
+}
+
+main "$@"
diff --git a/scripts/consistency_check.py b/scripts/consistency_check.py
index 2ae6003..ee61482 100644
--- a/scripts/consistency_check.py
+++ b/scripts/consistency_check.py
@@ -1596,7 +1596,7 @@ def fix_repo(
task_id = ctx["task_id"]
status = ctx["status"]
result = _api_patch(api_base, f"/tasks/{task_id}",
- {"status": status})
+ {"status": status, "suppress_token_event": True})
if result is not None and "_error" not in result:
report.fixes_applied.append(
f"C-10 fixed: task {task_id[:8]}… status → {status!r}"
diff --git a/scripts/register-mcp.sh b/scripts/register-mcp.sh
new file mode 100755
index 0000000..ae63bf5
--- /dev/null
+++ b/scripts/register-mcp.sh
@@ -0,0 +1,151 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+STATE_HUB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+CLAUDE_JSON="${CLAUDE_JSON:-$HOME/.claude.json}"
+SERVER_NAME="${STATE_HUB_MCP_NAME:-state-hub}"
+API_BASE="${API_BASE:-}"
+MCP_URL="${MCP_URL:-}"
+DRY_RUN=0
+
+usage() {
+ cat <<'USAGE'
+Usage: scripts/register-mcp.sh [--url URL] [--api-base URL] [--dry-run]
+
+Registers the State Hub MCP server for Claude Code.
+
+Options:
+ --url URL MCP SSE URL to register. Defaults to local :8001 or tunnel :18001.
+ --api-base URL State Hub API URL used for reachability checks.
+ --dry-run Print what would happen without changing Claude config.
+ -h, --help Show this help.
+USAGE
+}
+
+while [ "$#" -gt 0 ]; do
+ case "$1" in
+ --url)
+ MCP_URL="${2:-}"
+ shift 2
+ ;;
+ --api-base)
+ API_BASE="${2:-}"
+ shift 2
+ ;;
+ --dry-run)
+ DRY_RUN=1
+ shift
+ ;;
+ -h|--help)
+ usage
+ exit 0
+ ;;
+ *)
+ echo "ERROR: unknown argument: $1" >&2
+ usage >&2
+ exit 2
+ ;;
+ esac
+done
+
+status() {
+ printf '%s\n' "$*"
+}
+
+api_healthy() {
+ local base="$1"
+ curl -fsS --max-time 2 "${base%/}/state/health" >/dev/null 2>&1
+}
+
+port_open() {
+ local host="$1"
+ local port="$2"
+ timeout 2 bash -c ":/dev/null 2>&1
+}
+
+if [ -z "$API_BASE" ]; then
+ if api_healthy "http://127.0.0.1:8000"; then
+ API_BASE="http://127.0.0.1:8000"
+ elif api_healthy "http://127.0.0.1:18000"; then
+ API_BASE="http://127.0.0.1:18000"
+ else
+ API_BASE="http://127.0.0.1:8000"
+ fi
+fi
+
+if [ -z "$MCP_URL" ]; then
+ if port_open 127.0.0.1 8001; then
+ MCP_URL="http://127.0.0.1:8001/sse"
+ elif port_open 127.0.0.1 18001; then
+ MCP_URL="http://127.0.0.1:18001/sse"
+ elif [ "$API_BASE" = "http://127.0.0.1:18000" ]; then
+ MCP_URL="http://127.0.0.1:18001/sse"
+ else
+ MCP_URL="http://127.0.0.1:8001/sse"
+ fi
+fi
+
+CONFIG="$(python3 - "$MCP_URL" <<'PY'
+import json
+import sys
+
+print(json.dumps({"type": "sse", "url": sys.argv[1]}, separators=(",", ":")))
+PY
+)"
+
+status "State Hub directory: $STATE_HUB_DIR"
+status "API health check: ${API_BASE%/}/state/health"
+status "MCP registration: $SERVER_NAME -> $MCP_URL"
+
+if api_healthy "$API_BASE"; then
+ status "OK: State Hub API is reachable."
+else
+ status "WARN: State Hub API is not reachable at ${API_BASE%/}/state/health."
+ status " Start it with 'make api' or bring up the ops-bridge tunnel."
+fi
+
+if ! command -v claude >/dev/null 2>&1; then
+ if [ "$DRY_RUN" -eq 1 ]; then
+ status "WARN: claude CLI not found on PATH; dry-run will still show the command."
+ else
+ status "ERROR: claude CLI not found on PATH."
+ status " Install or expose Claude Code CLI, then rerun: make register-mcp"
+ exit 1
+ fi
+fi
+
+CURRENT_URL="$(python3 - "$CLAUDE_JSON" "$SERVER_NAME" <<'PY'
+import json
+import sys
+from pathlib import Path
+
+path = Path(sys.argv[1])
+name = sys.argv[2]
+if not path.exists():
+ print("")
+ raise SystemExit
+try:
+ data = json.loads(path.read_text())
+except json.JSONDecodeError:
+ print("")
+ raise SystemExit
+entry = data.get("mcpServers", {}).get(name, {})
+print(entry.get("url", ""))
+PY
+)"
+
+if [ "$CURRENT_URL" = "$MCP_URL" ]; then
+ status "OK: $SERVER_NAME is already registered with this URL."
+ exit 0
+fi
+
+if [ "$DRY_RUN" -eq 1 ]; then
+ status "DRY-RUN: would run:"
+ status " claude mcp add-json -s user $SERVER_NAME '$CONFIG'"
+ exit 0
+fi
+
+claude mcp add-json -s user "$SERVER_NAME" "$CONFIG"
+
+status "OK: registered $SERVER_NAME."
+status "Restart Claude Code so the MCP server list is refreshed."
diff --git a/scripts/task_token_hook.py b/scripts/task_token_hook.py
index 00a72f8..d9a12aa 100755
--- a/scripts/task_token_hook.py
+++ b/scripts/task_token_hook.py
@@ -1,27 +1,48 @@
#!/usr/bin/env python3
"""PostToolUse hook: replace heuristic token events with real transcript-derived counts.
-Fires after mcp__state-hub__update_task_status when status=done.
+Fires after supported task completion tools when status=done.
Reads the Claude Code session transcript to compute the token delta since the
previous task completion, then PATCHes the heuristic event with real counts.
-State is persisted per session in /tmp/custodian_tokens_.json so
-deltas are correctly scoped even when multiple tasks complete in one session.
+State is persisted per session in a durable cache directory so deltas survive
+restarts and multiple task completions in one session.
"""
import json
import os
import sys
import urllib.error
import urllib.request
+from datetime import datetime, timezone
from pathlib import Path
API = os.environ.get("CUSTODIAN_API", "http://127.0.0.1:8000")
-STATE_DIR = Path(os.environ.get("TMPDIR", "/tmp"))
+STATE_DIR = Path(os.environ.get("CUSTODIAN_TOKEN_STATE_DIR", Path.home() / ".cache" / "state-hub" / "token-hooks"))
+HEALTH_LOG = STATE_DIR / "hook-health.jsonl"
+PARSER_VERSION = "claude-transcript-delta-v1"
+SUPPORTED_TOOL_HINTS = (
+ "update_task_status",
+ "tasks",
+ "task",
+)
-def read_transcript_totals(transcript_path: str) -> tuple[int, int]:
+def utc_now() -> str:
+ return datetime.now(timezone.utc).isoformat()
+
+
+def write_health(event: dict) -> None:
+ try:
+ STATE_DIR.mkdir(parents=True, exist_ok=True)
+ with HEALTH_LOG.open("a", encoding="utf-8") as handle:
+ handle.write(json.dumps({"ts": utc_now(), **event}, sort_keys=True) + "\n")
+ except OSError:
+ pass
+
+
+def read_transcript_totals(transcript_path: str) -> tuple[int, int, int]:
"""Sum all usage entries in the transcript JSONL up to the current point."""
- total_in = total_out = 0
+ total_in = total_out = cached_in = 0
try:
with open(transcript_path) as f:
for line in f:
@@ -29,10 +50,9 @@ def read_transcript_totals(transcript_path: str) -> tuple[int, int]:
entry = json.loads(line)
usage = entry.get("message", {}).get("usage", {})
if usage:
- # Count all input token variants (direct + cache creation + cache read)
- total_in += (
- usage.get("input_tokens", 0)
- + usage.get("cache_creation_input_tokens", 0)
+ total_in += usage.get("input_tokens", 0)
+ cached_in += (
+ usage.get("cache_creation_input_tokens", 0)
+ usage.get("cache_read_input_tokens", 0)
)
total_out += usage.get("output_tokens", 0)
@@ -40,21 +60,22 @@ def read_transcript_totals(transcript_path: str) -> tuple[int, int]:
continue
except OSError:
pass
- return total_in, total_out
+ return total_in, total_out, cached_in
-def load_state(session_id: str) -> tuple[int, int]:
+def load_state(session_id: str) -> tuple[int, int, int]:
state_file = STATE_DIR / f"custodian_tokens_{session_id}.json"
try:
data = json.loads(state_file.read_text())
- return data.get("total_in", 0), data.get("total_out", 0)
+ return data.get("total_in", 0), data.get("total_out", 0), data.get("cached_in", 0)
except (OSError, json.JSONDecodeError):
- return 0, 0
+ return 0, 0, 0
-def save_state(session_id: str, total_in: int, total_out: int) -> None:
+def save_state(session_id: str, total_in: int, total_out: int, cached_in: int) -> None:
+ STATE_DIR.mkdir(parents=True, exist_ok=True)
state_file = STATE_DIR / f"custodian_tokens_{session_id}.json"
- state_file.write_text(json.dumps({"total_in": total_in, "total_out": total_out}))
+ state_file.write_text(json.dumps({"total_in": total_in, "total_out": total_out, "cached_in": cached_in}))
def api_get(path: str):
@@ -75,51 +96,89 @@ def api_patch(path: str, data: dict):
return json.loads(r.read())
+def extract_done_task(payload: dict) -> tuple[str | None, dict]:
+ tool_name = payload.get("tool_name", "")
+ if not any(hint in tool_name for hint in SUPPORTED_TOOL_HINTS):
+ return None, {}
+
+ tool_input = payload.get("tool_input", {}) or {}
+ status = tool_input.get("status")
+ if status != "done":
+ return None, {}
+
+ task_id = (
+ tool_input.get("task_id")
+ or tool_input.get("id")
+ or tool_input.get("taskId")
+ )
+ return task_id, tool_input
+
+
def main() -> None:
try:
payload = json.loads(sys.stdin.read())
except json.JSONDecodeError:
return
- tool_name = payload.get("tool_name", "")
- if "update_task_status" not in tool_name:
- return
-
- tool_input = payload.get("tool_input", {})
- if tool_input.get("status") != "done":
- return
-
- task_id = tool_input.get("task_id")
+ task_id, tool_input = extract_done_task(payload)
if not task_id:
+ write_health({"status": "skipped", "reason": "not_done_task_completion", "tool_name": payload.get("tool_name")})
return
transcript_path = payload.get("transcript_path", "")
session_id = payload.get("session_id", "unknown")
# Compute token delta for this task
- current_in, current_out = read_transcript_totals(transcript_path)
- last_in, last_out = load_state(session_id)
+ current_in, current_out, current_cached = read_transcript_totals(transcript_path)
+ last_in, last_out, last_cached = load_state(session_id)
delta_in = max(0, current_in - last_in)
delta_out = max(0, current_out - last_out)
- save_state(session_id, current_in, current_out)
+ delta_cached = max(0, current_cached - last_cached)
+ save_state(session_id, current_in, current_out, current_cached)
- if delta_in == 0 and delta_out == 0:
- return # Nothing measurable — leave heuristic in place
+ if delta_in == 0 and delta_out == 0 and delta_cached == 0:
+ write_health({
+ "status": "skipped",
+ "reason": "zero_delta",
+ "session_id": session_id,
+ "task_id": task_id,
+ "source_path": transcript_path,
+ })
+ return
# Find the most recent heuristic event for this task and replace it
try:
events = api_get(f"/token-events/?task_id={task_id}¬e=heuristic&limit=5")
except (urllib.error.URLError, OSError):
+ write_health({"status": "skipped", "reason": "api_offline", "session_id": session_id, "task_id": task_id})
return # API offline — leave heuristic as-is
if not events:
+ write_health({"status": "skipped", "reason": "no_fallback_event", "session_id": session_id, "task_id": task_id})
return
event_id = events[0]["id"]
model = tool_input.get("model")
agent = tool_input.get("agent")
- patch_body: dict = {"tokens_in": delta_in, "tokens_out": delta_out, "note": "measured"}
+ patch_body: dict = {
+ "tokens_in": delta_in,
+ "tokens_out": delta_out,
+ "note": "measured",
+ "measurement_kind": "measured",
+ "source_provider": "claude_transcript",
+ "source_id": f"claude:{session_id}:task:{task_id}",
+ "source_path": transcript_path or None,
+ "parser_version": PARSER_VERSION,
+ "confidence": 1.0,
+ "cached_input_tokens": delta_cached,
+ "raw_total_tokens": delta_in + delta_out + delta_cached,
+ "raw_metadata": {
+ "hook": "post_tool_use",
+ "tool_name": payload.get("tool_name"),
+ "state_dir": str(STATE_DIR),
+ },
+ }
if model:
patch_body["model"] = model
if agent:
@@ -128,7 +187,19 @@ def main() -> None:
try:
api_patch(f"/token-events/{event_id}", patch_body)
except (urllib.error.URLError, OSError):
- pass
+ write_health({"status": "skipped", "reason": "patch_failed", "session_id": session_id, "task_id": task_id})
+ return
+
+ write_health({
+ "status": "patched",
+ "session_id": session_id,
+ "task_id": task_id,
+ "event_id": event_id,
+ "tokens_in": delta_in,
+ "tokens_out": delta_out,
+ "cached_input_tokens": delta_cached,
+ "source_path": transcript_path,
+ })
if __name__ == "__main__":
diff --git a/scripts/token_reconcile.py b/scripts/token_reconcile.py
new file mode 100644
index 0000000..9b1bbaf
--- /dev/null
+++ b/scripts/token_reconcile.py
@@ -0,0 +1,239 @@
+#!/usr/bin/env python3
+"""Reconcile token evidence from local agent sources against State Hub.
+
+Dry-run is the default. Use ``--apply`` to upsert measured source events and
+``--zero-superseded-fallbacks`` to zero task fallback rows that are covered by
+source-backed measurements.
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import sys
+import urllib.parse
+import urllib.request
+from collections import Counter, defaultdict
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+
+ROOT = Path(__file__).resolve().parent.parent
+if str(ROOT) not in sys.path:
+ sys.path.insert(0, str(ROOT))
+
+from api.services.token_sources import collect_claude_transcripts, collect_codex_sessions, parse_iso # noqa: E402
+from api.services.token_sources.attribution import repo_refs_from_api, resolve_repo # noqa: E402
+
+DEFAULT_API = os.environ.get("STATE_HUB_API", "http://127.0.0.1:8000")
+SUPERSEDED_HEURISTIC_NOTE = "heuristic_superseded_by_source_measurement"
+
+
+def http_json(api_base: str, method: str, path: str, body: dict[str, Any] | None = None) -> Any:
+ url = f"{api_base.rstrip('/')}/{path.lstrip('/')}"
+ data = None
+ headers = {"Content-Type": "application/json"}
+ if body is not None:
+ data = json.dumps(body).encode("utf-8")
+ req = urllib.request.Request(url, data=data, headers=headers, method=method)
+ with urllib.request.urlopen(req, timeout=30) as resp:
+ return json.loads(resp.read() or b"null")
+
+
+def list_events(api_base: str, params: dict[str, Any]) -> list[dict[str, Any]]:
+ events: list[dict[str, Any]] = []
+ offset = 0
+ while True:
+ encoded = urllib.parse.urlencode({**params, "limit": 1000, "offset": offset})
+ page = http_json(api_base, "GET", f"/token-events/?{encoded}")
+ if not isinstance(page, list) or not page:
+ break
+ events.extend(page)
+ if len(page) < 1000:
+ break
+ offset += 1000
+ return events
+
+
+def find_home(explicit: str | None, env_name: str, default: Path) -> Path | None:
+ candidates: list[Path] = []
+ if explicit:
+ candidates.append(Path(explicit))
+ env_home = os.environ.get(env_name)
+ if env_home:
+ candidates.append(Path(env_home))
+ candidates.append(default)
+ for candidate in candidates:
+ if candidate.is_dir():
+ return candidate
+ return None
+
+
+def event_total(event: dict[str, Any]) -> int:
+ return int(event.get("tokens_in") or 0) + int(event.get("tokens_out") or 0)
+
+
+def source_index(events: list[dict[str, Any]]) -> dict[str, dict[str, Any]]:
+ by_source: dict[str, dict[str, Any]] = {}
+ for event in events:
+ source_id = event.get("source_id") or event.get("ref_id")
+ if isinstance(source_id, str):
+ by_source[source_id] = event
+ return by_source
+
+
+def print_report(report: dict[str, Any]) -> None:
+ print(json.dumps(report, indent=2, sort_keys=True, default=str))
+
+
+def main() -> int:
+ parser = argparse.ArgumentParser(description=__doc__)
+ parser.add_argument("--since", default="2026-05-19", help="UTC date/time to reconcile from")
+ parser.add_argument("--api-base", default=DEFAULT_API)
+ parser.add_argument("--codex-home")
+ parser.add_argument("--claude-home")
+ parser.add_argument("--apply", action="store_true", help="upsert measured source events")
+ parser.add_argument(
+ "--zero-superseded-fallbacks",
+ action="store_true",
+ help="with --apply, zero heuristic fallback rows after measured source ingestion",
+ )
+ args = parser.parse_args()
+
+ since = parse_iso(args.since)
+ since_param = since.isoformat()
+ codex_home = find_home(args.codex_home, "CODEX_HOME", Path.home() / ".codex")
+ if codex_home is None:
+ windows_codex = Path("/mnt/c/Users/bernd.worsch/.codex")
+ codex_home = windows_codex if windows_codex.is_dir() else None
+ claude_home = find_home(args.claude_home, "CLAUDE_HOME", Path.home() / ".claude")
+
+ records = []
+ source_health: dict[str, dict[str, Any]] = {}
+ if codex_home:
+ codex_records = collect_codex_sessions(codex_home, since)
+ records.extend(codex_records)
+ source_health["codex_session"] = {"home": str(codex_home), "sessions_found": len(codex_records)}
+ else:
+ source_health["codex_session"] = {"home": None, "sessions_found": 0, "warning": "Codex home not found"}
+ if claude_home:
+ claude_records = collect_claude_transcripts(claude_home, since)
+ records.extend(claude_records)
+ source_health["claude_transcript"] = {"home": str(claude_home), "sessions_found": len(claude_records)}
+ else:
+ source_health["claude_transcript"] = {"home": None, "sessions_found": 0, "warning": "Claude home not found"}
+
+ repos = repo_refs_from_api(http_json(args.api_base, "GET", "/repos/"))
+ existing_events = list_events(args.api_base, {"since": since_param, "include_superseded": "true"})
+ existing_by_source = source_index(existing_events)
+ fallback_events = [
+ event for event in existing_events
+ if event.get("source_provider") == "task_fallback" or event.get("note") == "heuristic"
+ ]
+ superseded_events = [
+ event for event in existing_events
+ if event.get("measurement_kind") == "superseded" or str(event.get("note") or "").startswith("heuristic_superseded")
+ ]
+
+ planned_upserts = []
+ unattributed = 0
+ stale = 0
+ source_totals: dict[str, int] = defaultdict(int)
+ for record in records:
+ source_totals[record.source_provider] += record.tokens_total
+ existing = existing_by_source.get(record.source_id)
+ if existing and event_total(existing) >= record.tokens_total:
+ continue
+ if existing:
+ stale += 1
+ match = resolve_repo(record.cwd, repos)
+ if match is None:
+ unattributed += 1
+ planned_upserts.append((record, match))
+
+ source_ids = [
+ event.get("source_id")
+ for event in existing_events
+ if event.get("source_id") and event.get("measurement_kind") == "measured"
+ ]
+ duplicate_sources = {
+ source_id: count for source_id, count in Counter(source_ids).items() if count > 1
+ }
+ missing_provenance = [
+ event for event in existing_events
+ if event.get("measurement_kind") == "measured" and not event.get("source_id")
+ ]
+ progress_events = http_json(args.api_base, "GET", f"/progress/?since={urllib.parse.quote(since_param)}&limit=1000")
+ measured_total = sum(
+ event_total(event)
+ for event in existing_events
+ if event.get("measurement_kind") == "measured"
+ ) + sum(record.tokens_total for record, _ in planned_upserts)
+ canary_failed = bool(progress_events) and measured_total == 0
+
+ report = {
+ "since": since.isoformat(),
+ "apply": args.apply,
+ "sources": source_health,
+ "sessions_found": len(records),
+ "source_tokens_total": dict(source_totals),
+ "events_existing": len(existing_events),
+ "events_to_upsert": len(planned_upserts),
+ "sessions_stale": stale,
+ "fallback_events": len(fallback_events),
+ "superseded_events": len(superseded_events),
+ "unattributed_source_records": unattributed,
+ "missing_provenance_events": len(missing_provenance),
+ "duplicate_source_ids": duplicate_sources,
+ "progress_events": len(progress_events) if isinstance(progress_events, list) else 0,
+ "measured_tokens_total_after_plan": measured_total,
+ "canary_failed": canary_failed,
+ }
+
+ if args.apply:
+ for record, match in planned_upserts:
+ payload = record.to_token_event_payload(repo_id=match.repo_id if match else None)
+ payload["raw_metadata"] = {
+ **payload.get("raw_metadata", {}),
+ "repo_slug": match.slug if match else None,
+ "attribution_method": match.method if match else None,
+ }
+ http_json(args.api_base, "POST", "/token-events/upsert", payload)
+ if args.zero_superseded_fallbacks:
+ for event in fallback_events:
+ http_json(
+ args.api_base,
+ "PATCH",
+ f"/token-events/{event['id']}",
+ {
+ "tokens_in": 0,
+ "tokens_out": 0,
+ "note": SUPERSEDED_HEURISTIC_NOTE,
+ "measurement_kind": "superseded",
+ "source_provider": "task_fallback",
+ "confidence": 0.0,
+ "raw_total_tokens": 0,
+ },
+ )
+ http_json(
+ args.api_base,
+ "POST",
+ "/progress/",
+ {
+ "summary": (
+ "Token reconciliation: "
+ f"{len(records)} source records, {len(planned_upserts)} upserts, "
+ f"{len(fallback_events)} fallback events, canary_failed={canary_failed}"
+ ),
+ "event_type": "token_reconciliation",
+ "author": "codex",
+ "detail": report,
+ },
+ )
+
+ print_report(report)
+ return 1 if canary_failed else 0
+
+
+if __name__ == "__main__":
+ raise SystemExit(main())
diff --git a/tests/test_token_events.py b/tests/test_token_events.py
index 324e95e..d392373 100644
--- a/tests/test_token_events.py
+++ b/tests/test_token_events.py
@@ -55,8 +55,23 @@ class TestTokenEventsCreate:
assert ev["tokens_in"] == 200
assert ev["tokens_out"] == 100
assert ev["tokens_total"] == 300
+ assert ev["measurement_kind"] == "estimated"
+ assert ev["source_provider"] == "manual"
+ assert ev["raw_total_tokens"] == 300
assert ev["id"] is not None
+ async def test_create_with_created_at_backfill_timestamp(self, client):
+ created_at = "2026-05-19T01:02:03Z"
+ ev = await _post_event(
+ client,
+ tokens_in=200,
+ tokens_out=100,
+ ref_type="session",
+ ref_id="codex:test-session",
+ created_at=created_at,
+ )
+ assert ev["created_at"].startswith("2026-05-19T01:02:03")
+
async def test_create_with_all_fields(self, client):
await _create_domain(client)
topic = await _create_topic(client)
@@ -74,11 +89,76 @@ class TestTokenEventsCreate:
ref_id=task["id"],
note="T01 done",
session_id="ses-abc",
+ measurement_kind="measured",
+ source_provider="manual",
+ source_id="manual:test-event",
+ confidence=0.95,
+ cached_input_tokens=10,
+ reasoning_output_tokens=20,
+ raw_total_tokens=1530,
+ raw_metadata={"source": "unit-test"},
)
assert ev["task_id"] == task["id"]
assert ev["workstream_id"] == ws["id"] # auto-populated from task
assert ev["model"] == "claude-sonnet-4-6"
assert ev["tokens_total"] == 1500
+ assert ev["measurement_kind"] == "measured"
+ assert ev["source_provider"] == "manual"
+ assert ev["source_id"] == "manual:test-event"
+ assert ev["cached_input_tokens"] == 10
+ assert ev["reasoning_output_tokens"] == 20
+ assert ev["token_evidence_total"] == 1530
+ assert ev["raw_metadata"] == {"source": "unit-test"}
+
+ async def test_upsert_source_event_updates_existing_session(self, client):
+ body = {
+ "tokens_in": 100,
+ "tokens_out": 50,
+ "measurement_kind": "measured",
+ "source_provider": "codex_session",
+ "source_id": "codex:abc",
+ "ref_type": "session",
+ "ref_id": "codex:abc",
+ "session_id": "abc",
+ "cached_input_tokens": 5,
+ }
+ first = await client.post("/token-events/upsert", json=body)
+ assert first.status_code == 200, first.text
+ second = await client.post("/token-events/upsert", json={**body, "tokens_in": 300, "tokens_out": 80})
+ assert second.status_code == 200, second.text
+ assert first.json()["id"] == second.json()["id"]
+ assert second.json()["tokens_total"] == 380
+
+ listed = (await client.get("/token-events/", params={"source_provider": "codex_session"})).json()
+ assert len(listed) == 1
+
+ async def test_patch_backfill_fields(self, client):
+ ev = await _post_event(client, tokens_in=100, tokens_out=50)
+
+ r = await client.patch(f"/token-events/{ev['id']}", json={
+ "tokens_in": 500,
+ "tokens_out": 250,
+ "session_id": "codex-session",
+ "ref_type": "session",
+ "ref_id": "codex:session",
+ "created_at": "2026-05-20T01:02:03Z",
+ "note": "backfill:codex-session",
+ "measurement_kind": "measured",
+ "source_provider": "codex_session",
+ "source_id": "codex:session",
+ "cached_input_tokens": 10,
+ })
+ assert r.status_code == 200
+ patched = r.json()
+ assert patched["tokens_total"] == 750
+ assert patched["session_id"] == "codex-session"
+ assert patched["ref_type"] == "session"
+ assert patched["ref_id"] == "codex:session"
+ assert patched["created_at"].startswith("2026-05-20T01:02:03")
+ assert patched["measurement_kind"] == "measured"
+ assert patched["source_provider"] == "codex_session"
+ assert patched["source_id"] == "codex:session"
+ assert patched["cached_input_tokens"] == 10
async def test_workstream_auto_populated_from_task(self, client):
await _create_domain(client)
@@ -129,6 +209,26 @@ class TestTokenEventsList:
assert len(events) == 1
assert events[0]["model"] == "claude-sonnet-4-6"
+ async def test_filter_by_measurement_kind_and_source_provider(self, client):
+ await _post_event(
+ client,
+ tokens_in=100,
+ tokens_out=50,
+ measurement_kind="measured",
+ source_provider="codex_session",
+ source_id="codex:filter",
+ )
+ await _post_event(client, tokens_in=200, tokens_out=100, note="heuristic")
+
+ r = await client.get(
+ "/token-events/",
+ params={"measurement_kind": "measured", "source_provider": "codex_session"},
+ )
+ assert r.status_code == 200
+ events = r.json()
+ assert len(events) == 1
+ assert events[0]["source_id"] == "codex:filter"
+
@pytest.mark.asyncio
class TestTokenSummary:
@@ -184,6 +284,7 @@ class TestTokenSummary:
s = r.json()
assert s["event_count"] == 1
assert s["tokens_total"] == 75
+ assert s["by_measurement_kind"]["estimated"] == 75
async def test_summary_unknown_scope_returns_422(self, client):
r = await client.get("/token-events/summary/", params={"scope": "foobar", "id": "x"})
@@ -215,3 +316,32 @@ class TestTokenEventGetById:
import uuid
r = await client.get(f"/token-events/{uuid.uuid4()}")
assert r.status_code == 404
+
+
+@pytest.mark.asyncio
+class TestTokenAggregateAndQuality:
+ async def test_aggregate_and_quality_expose_evidence_breakdown(self, client):
+ await _post_event(
+ client,
+ tokens_in=100,
+ tokens_out=50,
+ measurement_kind="measured",
+ source_provider="codex_session",
+ source_id="codex:agg",
+ )
+ await _post_event(client, tokens_in=1000, tokens_out=500, note="heuristic")
+
+ agg = (await client.get("/token-events/aggregate/", params={"include_superseded": "false"})).json()
+ assert agg["tokens_total"] == 1650
+ assert agg["by_measurement_kind"]["measured"] == 150
+ assert agg["by_measurement_kind"]["estimated"] == 1500
+ assert agg["by_source_provider"]["codex_session"] == 150
+ assert agg["by_source_provider"]["task_fallback"] == 1500
+
+ measured = (await client.get("/token-events/aggregate/", params={"measurement_kind": "measured"})).json()
+ assert measured["tokens_total"] == 150
+
+ quality = (await client.get("/token-events/quality/")).json()
+ assert quality["measured_event_count"] == 1
+ assert quality["fallback_event_count"] == 1
+ assert quality["missing_provenance_event_count"] == 0
diff --git a/tests/test_token_passthrough.py b/tests/test_token_passthrough.py
index 5220a50..c1c5c0a 100644
--- a/tests/test_token_passthrough.py
+++ b/tests/test_token_passthrough.py
@@ -66,6 +66,9 @@ class TestTokenPassthrough:
assert ev["agent"] == "custodian"
assert ev["workstream_id"] == ws["id"]
assert ev["note"] == "measured"
+ assert ev["measurement_kind"] == "measured"
+ assert ev["source_provider"] == "manual"
+ assert ev["source_id"] == f"task:{task['id']}:manual"
async def test_tier1_userbased_note_override(self, client):
"""Tier 1 with note='userbased' records that note instead of 'measured'."""
@@ -84,6 +87,7 @@ class TestTokenPassthrough:
events = (await client.get("/token-events/", params={"task_id": task["id"]})).json()
assert events[0]["note"] == "userbased"
+ assert events[0]["measurement_kind"] == "measured"
async def test_tier2_workplan_prorated(self, client):
"""Tier 2: workplan totals prorated across 4 tasks → 250/125 each, note='workplan'."""
@@ -108,6 +112,8 @@ class TestTokenPassthrough:
assert ev["tokens_in"] == 250 # 1000 // 4
assert ev["tokens_out"] == 125 # 500 // 4
assert ev["note"] == "workplan"
+ assert ev["measurement_kind"] == "allocated"
+ assert ev["raw_metadata"]["allocation_method"] == "workplan_prorated"
async def test_tier3_heuristic_fallback(self, client):
"""Tier 3: status=done with no token args → heuristic 1000/500, note='heuristic'."""
@@ -125,6 +131,40 @@ class TestTokenPassthrough:
assert ev["tokens_in"] == 1000
assert ev["tokens_out"] == 500
assert ev["note"] == "heuristic"
+ assert ev["measurement_kind"] == "estimated"
+ assert ev["source_provider"] == "task_fallback"
+
+ async def test_suppress_token_event_skips_done_fallback(self, client):
+ """File/cache sync can mark a task done without minting a heuristic event."""
+ await _create_domain(client)
+ topic = await _create_topic(client)
+ ws = await _create_workstream(client, topic["id"])
+ task = await _create_task(client, ws["id"])
+
+ r = await client.patch(f"/tasks/{task['id']}", json={
+ "status": "done",
+ "suppress_token_event": True,
+ })
+ assert r.status_code == 200
+ assert r.json()["status"] == "done"
+
+ events = (await client.get("/token-events/", params={"task_id": task["id"]})).json()
+ assert events == []
+
+ async def test_repeated_done_update_does_not_duplicate_event(self, client):
+ """Only the transition into done records token usage."""
+ await _create_domain(client)
+ topic = await _create_topic(client)
+ ws = await _create_workstream(client, topic["id"])
+ task = await _create_task(client, ws["id"])
+
+ r = await client.patch(f"/tasks/{task['id']}", json={"status": "done"})
+ assert r.status_code == 200
+ r = await client.patch(f"/tasks/{task['id']}", json={"status": "done"})
+ assert r.status_code == 200
+
+ events = (await client.get("/token-events/", params={"task_id": task["id"]})).json()
+ assert len(events) == 1
async def test_non_done_status_creates_no_event(self, client):
"""Non-done status updates never create a token event."""
diff --git a/tests/test_token_sources.py b/tests/test_token_sources.py
new file mode 100644
index 0000000..c30edf9
--- /dev/null
+++ b/tests/test_token_sources.py
@@ -0,0 +1,139 @@
+from __future__ import annotations
+
+import json
+
+from api.services.token_sources import parse_iso
+from api.services.token_sources.attribution import RepoRef, normalise_cwd, resolve_repo
+from api.services.token_sources.claude import parse_claude_transcript
+from api.services.token_sources.codex import collect_codex_sessions, parse_codex_session
+
+
+def _write_jsonl(path, rows):
+ path.parent.mkdir(parents=True, exist_ok=True)
+ with path.open("w", encoding="utf-8") as handle:
+ for row in rows:
+ if row == "BAD":
+ handle.write("{not json}\n")
+ else:
+ handle.write(json.dumps(row) + "\n")
+
+
+def test_parse_codex_session_sums_token_count_records(tmp_path):
+ path = tmp_path / "sessions" / "2026" / "05" / "23" / "rollout-local.jsonl"
+ _write_jsonl(
+ path,
+ [
+ {"type": "session_meta", "payload": {"id": "s1", "cwd": "/repo", "timestamp": "2026-05-23T00:00:00Z"}},
+ {"type": "turn_context", "payload": {"cwd": "/repo", "model": "gpt-5.3-codex"}},
+ {
+ "type": "event_msg",
+ "timestamp": "2026-05-22T23:00:00Z",
+ "payload": {"type": "token_count", "info": {"last_token_usage": {"input_tokens": 99, "output_tokens": 1}}},
+ },
+ "BAD",
+ {
+ "type": "event_msg",
+ "timestamp": "2026-05-23T01:00:00Z",
+ "payload": {
+ "type": "token_count",
+ "info": {
+ "last_token_usage": {
+ "input_tokens": 100,
+ "output_tokens": 40,
+ "cached_input_tokens": 15,
+ "reasoning_output_tokens": 7,
+ "total_tokens": 155,
+ }
+ },
+ },
+ },
+ ],
+ )
+
+ record = parse_codex_session(path, parse_iso("2026-05-23"))
+
+ assert record is not None
+ assert record.source_id == "codex:s1"
+ assert record.tokens_in == 100
+ assert record.tokens_out == 40
+ assert record.cached_input_tokens == 15
+ assert record.reasoning_output_tokens == 7
+ assert record.raw_total_tokens == 155
+ assert record.raw_metadata["malformed_lines"] == 1
+
+
+def test_collect_codex_sessions_dedupes_archived_and_live(tmp_path):
+ live = tmp_path / "sessions" / "2026" / "05" / "23" / "rollout-live.jsonl"
+ archived = tmp_path / "archived_sessions" / "rollout-archived.jsonl"
+ rows = [
+ {"type": "session_meta", "payload": {"id": "same", "cwd": "/repo", "timestamp": "2026-05-23T00:00:00Z"}},
+ {
+ "type": "event_msg",
+ "timestamp": "2026-05-23T01:00:00Z",
+ "payload": {"type": "token_count", "info": {"last_token_usage": {"input_tokens": 10, "output_tokens": 5}}},
+ },
+ ]
+ _write_jsonl(live, rows)
+ _write_jsonl(
+ archived,
+ rows + [
+ {
+ "type": "event_msg",
+ "timestamp": "2026-05-23T02:00:00Z",
+ "payload": {"type": "token_count", "info": {"last_token_usage": {"input_tokens": 20, "output_tokens": 5}}},
+ }
+ ],
+ )
+
+ records = collect_codex_sessions(tmp_path, parse_iso("2026-05-23"))
+
+ assert len(records) == 1
+ assert records[0].source_id == "codex:same"
+ assert records[0].tokens_total == 40
+
+
+def test_parse_claude_transcript_sums_usage_without_content(tmp_path):
+ path = tmp_path / "projects" / "repo" / "session.jsonl"
+ _write_jsonl(
+ path,
+ [
+ {
+ "timestamp": "2026-05-23T01:00:00Z",
+ "session_id": "c1",
+ "cwd": "/repo",
+ "message": {
+ "model": "claude-sonnet",
+ "content": "do not store me",
+ "usage": {
+ "input_tokens": 30,
+ "cache_creation_input_tokens": 5,
+ "cache_read_input_tokens": 7,
+ "output_tokens": 11,
+ },
+ },
+ }
+ ],
+ )
+
+ record = parse_claude_transcript(path, parse_iso("2026-05-23"))
+
+ assert record is not None
+ assert record.source_id == "claude:c1"
+ assert record.tokens_in == 30
+ assert record.cached_input_tokens == 12
+ assert record.tokens_out == 11
+ assert "content" not in record.raw_metadata
+
+
+def test_resolve_repo_uses_normalised_path_prefix():
+ refs = [
+ RepoRef(repo_id="1", slug="state-hub", local_path="/home/worsch/state-hub"),
+ RepoRef(repo_id="2", slug="other", local_path="/home/worsch/other"),
+ ]
+
+ match = resolve_repo("//wsl.localhost/Ubuntu-24.04/home/worsch/state-hub/api", refs)
+
+ assert normalise_cwd("//wsl.localhost/Ubuntu-24.04/home/worsch/state-hub") == "/home/worsch/state-hub"
+ assert match is not None
+ assert match.repo_id == "1"
+ assert match.method == "path_prefix"
diff --git a/workplans/CUST-WP-0012-multi-user-onboarding.md b/workplans/CUST-WP-0012-multi-user-onboarding.md
index a754d8b..bb7c346 100644
--- a/workplans/CUST-WP-0012-multi-user-onboarding.md
+++ b/workplans/CUST-WP-0012-multi-user-onboarding.md
@@ -4,12 +4,12 @@ type: workplan
title: "Multi-User Onboarding and Environment Bootstrap"
domain: custodian
repo: state-hub
-status: active
+status: finished
owner: custodian
topic_slug: custodian
state_hub_workstream_id: "a28d9e29-4119-4b73-9469-f921920253ef"
created: "2026-03-11"
-updated: "2026-05-17"
+updated: "2026-05-23"
---
# Multi-User Onboarding and Environment Bootstrap
@@ -51,7 +51,7 @@ Two personas:
```task
id: CUST-WP-0012-T01
state_hub_task_id: 71628269-9a75-4dae-a347-e64a86040322
-status: todo
+status: done
priority: medium
```
@@ -79,6 +79,12 @@ git config --global credential.helper 'cache --timeout=3600'
**Done when:** included in bootstrap script; push to Gitea works without
re-entering credentials on second attempt.
+**Implemented 2026-05-23:** `scripts/bootstrap-env.sh` configures a global
+credential helper when one is not already present. It prefers `libsecret`, uses
+`cache --timeout=3600` as the safe automatic fallback, and supports explicit
+headless plaintext storage via `--git-helper store --allow-plaintext-store`.
+`docs/onboarding.md` documents the tradeoffs.
+
---
### T02 — SSH key generation and authorization automation
@@ -86,7 +92,7 @@ re-entering credentials on second attempt.
```task
id: CUST-WP-0012-T02
state_hub_task_id: fea965e9-8a8f-439c-9096-8f7756eb71ed
-status: todo
+status: done
priority: medium
```
@@ -110,6 +116,11 @@ ssh-copy-id -i ~/.ssh/id_ed25519.pub tegwick@92.205.130.254
**Done when:** included in bootstrap script; documented in onboarding guide.
+**Implemented 2026-05-23:** `scripts/bootstrap-env.sh` generates
+`~/.ssh/id_ed25519` if missing, prints the public key, and can run
+`ssh-copy-id` for Railiance01 and CoulombCore with `--authorize-ssh`.
+`docs/onboarding.md` documents the operator and collaborator path.
+
---
### T03 — Claude Code MCP registration automation
@@ -117,7 +128,7 @@ ssh-copy-id -i ~/.ssh/id_ed25519.pub tegwick@92.205.130.254
```task
id: CUST-WP-0012-T03
state_hub_task_id: 60318e9a-972e-45c8-afde-82ed0625f594
-status: todo
+status: done
priority: medium
```
@@ -132,10 +143,10 @@ make register-mcp # idempotent; safe to re-run
The script should:
1. Detect whether `state-hub` is already in `~/.claude.json`
-2. Extract the server config from `.mcp.json`
+2. Use the current SSE MCP config (`http://127.0.0.1:8001/sse` locally or
+ `http://127.0.0.1:18001/sse` through ops-bridge)
3. Run `claude mcp add-json -s user state-hub `
-4. Run `patch_mcp_cwd.py` to restore the cwd field
-5. Print instructions to restart Claude Code
+4. Print instructions to restart Claude Code
Should also detect whether the state hub is reachable directly
(`http://127.0.0.1:8000`) or needs a tunnel (via ops-bridge), and emit
@@ -144,6 +155,12 @@ a warning if neither is available.
**Done when:** `make register-mcp` works on a clean machine; documented
in onboarding guide.
+**Implemented 2026-05-23:** `scripts/register-mcp.sh` and the
+`make register-mcp` target register the current SSE MCP transport
+idempotently. The script detects local/tunnel reachability, supports
+`MCP_URL`, `API_BASE`, and `DRY_RUN=1`, and documents the old `.mcp.json` cwd
+patch path as legacy.
+
---
### T04 — Environment bootstrap script
@@ -151,7 +168,7 @@ in onboarding guide.
```task
id: CUST-WP-0012-T04
state_hub_task_id: 84a94761-e424-4470-a9a2-64d9cabadb7f
-status: todo
+status: done
priority: high
```
@@ -176,6 +193,11 @@ Design constraints:
**Done when:** running the script on a clean Ubuntu 24.04 machine
produces a working Custodian environment with no additional manual steps.
+**Implemented 2026-05-23:** `scripts/bootstrap-env.sh` and
+`make bootstrap-env` provide the idempotent entrypoint. It supports dry-run,
+non-interactive mode, optional apt package installation, SSH authorization,
+Gitea token prompting, MCP registration, and State Hub health checks.
+
---
### T05 — Onboarding guide and user journey documentation
@@ -183,7 +205,7 @@ produces a working Custodian environment with no additional manual steps.
```task
id: CUST-WP-0012-T05
state_hub_task_id: b0839802-659a-475b-8b84-ab7341ea3d15
-status: todo
+status: done
priority: medium
```
@@ -208,6 +230,10 @@ for both personas:
**Done when:** a new collaborator can follow the guide without
clarification from the primary operator.
+**Implemented 2026-05-23:** `docs/onboarding.md` covers primary operator and
+domain collaborator journeys, including SSH, Gitea token file, credential
+helper choices, MCP registration, tunnel setup, and verification checks.
+
---
### T06 — State Hub multi-user model (deferred)
@@ -215,7 +241,7 @@ clarification from the primary operator.
```task
id: CUST-WP-0012-T06
state_hub_task_id: d5df3302-67b9-4765-a8d8-ea2df53dff6e
-status: todo
+status: done
priority: low
```
@@ -235,6 +261,11 @@ domain) or rely on Gitea repo permissions as the authoritative boundary
Implement T01–T05 first; multi-user access control is only needed when
there is more than one user.
+**Implemented 2026-05-23:** `docs/multi-user-access-model.md` records the
+current decision: repo permissions, SSH access, tunnels, and OpenBao remain the
+authoritative boundaries for this phase; State Hub API auth is deferred until a
+real second-user or exposed-deployment trigger exists.
+
---
## References
diff --git a/workplans/STATE-WP-0045-token-measurement-accuracy.md b/workplans/STATE-WP-0045-token-measurement-accuracy.md
new file mode 100644
index 0000000..77f06f7
--- /dev/null
+++ b/workplans/STATE-WP-0045-token-measurement-accuracy.md
@@ -0,0 +1,310 @@
+---
+id: STATE-WP-0045
+type: workplan
+title: "Token Measurement Accuracy and Resilience"
+domain: custodian
+repo: state-hub
+status: finished
+owner: codex
+topic_slug: custodian
+created: "2026-05-23"
+updated: "2026-05-23"
+state_hub_workstream_id: "0aefe379-c182-4471-84dd-c136d5e1206b"
+---
+
+# Token Measurement Accuracy and Resilience
+
+## Summary
+
+Make State Hub token tracking accurate enough to trust for daily operations and
+robust enough to survive agent/tool changes.
+
+The May 19 flatline showed the current weak spots: token events mixed measured
+usage, task-completion fallbacks, and file-sync side effects in the same table;
+Claude measurement depended on one hook path; Codex usage lived in local session
+logs until a manual backfill; and the dashboard treated every token event as the
+same quality of evidence. The immediate fix restored Codex session totals and
+suppressed sync-generated fallback events, but the system still needs a durable
+measurement model, idempotent source adapters, reconciliation checks, and a
+dashboard that exposes provenance and confidence.
+
+## Current Findings
+
+- `token_events` stores counts, associations, free-text notes, and timestamps,
+ but not structured provenance such as source system, source event id, parser
+ version, raw token categories, confidence, or whether the row is measured,
+ allocated, estimated, or superseded.
+- `PATCH /tasks/{id}` can still create heuristic token events on a transition to
+ `done`. That fallback is useful as a temporary operational signal, but it is
+ not a measurement and should not be blended into measured totals.
+- `fix-consistency` now suppresses token events while syncing file-backed task
+ status, but this is a narrow guard. Other bulk sync, import, and migration
+ paths need the same invariant.
+- Codex Desktop session logs contain structured `token_count` events with
+ `last_token_usage`, `total_token_usage`, cached-input counts, and reasoning
+ output counts. The new backfill script can restore these, but it is not yet a
+ scheduled or monitored ingestion path.
+- Claude Code measurement currently depends on `scripts/task_token_hook.py`
+ firing after one MCP tool name. It uses per-session state in `/tmp`, so missed
+ hooks, restarts, renamed tools, and non-MCP REST paths can silently degrade to
+ fallback events.
+- Repository attribution for Codex backfill is path-based. This is good enough
+ for the emergency restore, but long-term attribution should prefer registered
+ repo fingerprints/remotes and then fall back to paths.
+- The Token Cost dashboard currently aggregates all events returned by
+ `/token-events/?limit=1000`; it does not show measurement quality, source,
+ superseded rows, ingestion freshness, or possible gaps.
+
+## Out of Scope
+
+- Exact billing reconciliation against vendor invoices.
+- Capturing private transcript content in State Hub.
+- Replacing existing task/workstream/repo relationships.
+- Implementing every provider-specific parser in one pass. The first pass should
+ cover Codex Desktop and Claude Code, with a documented adapter contract for
+ others.
+
+## T01 - Define Token Evidence Model
+
+```task
+id: STATE-WP-0045-T01
+status: done
+priority: high
+state_hub_task_id: "29aed6d9-40aa-40fc-9e9a-3eb3e6f985bc"
+```
+
+Define a structured model that separates measured usage from allocated,
+estimated, and superseded rows.
+
+Implementation notes:
+
+- Add a short design note or ADR section covering token event semantics.
+- Define measurement classes such as `measured`, `allocated`, `estimated`, and
+ `superseded`.
+- Define source classes such as `codex_session`, `claude_transcript`,
+ `llm_connect`, `manual`, and `task_fallback`.
+- Define structured provenance fields: source system, source id, source path or
+ URI, source timestamp, parser version, ingestion timestamp, and confidence.
+- Decide how to represent raw token categories: input, cached input, output,
+ reasoning output, and provider total.
+- Decide whether cached input should be included in default totals or shown as a
+ separate metric. Preserve enough fields to support both views.
+- Replace free-text note taxonomy as the primary quality signal. Notes can
+ remain for human context, but dashboards and APIs should rely on structured
+ fields.
+
+Done when the repo has a reviewed token evidence contract and the follow-on
+schema/API tasks can implement it without ambiguity.
+
+## T02 - Add Provenance Schema and Idempotent Upsert API
+
+```task
+id: STATE-WP-0045-T02
+status: done
+priority: high
+state_hub_task_id: "ade2bd40-343c-4829-ba4f-44bc8b7cbef9"
+```
+
+Extend token storage so source-derived events can be written repeatedly without
+duplicates and without losing provenance.
+
+Implementation notes:
+
+- Add migration fields for the evidence model from T01. Candidate fields:
+ `measurement_kind`, `source_provider`, `source_id`, `source_path`,
+ `source_created_at`, `ingested_at`, `parser_version`, `confidence`,
+ `cached_input_tokens`, `reasoning_output_tokens`, `raw_total_tokens`,
+ `cost_estimated_usd`, and `raw_metadata`.
+- Add a unique constraint or partial unique index that prevents duplicate
+ measured source rows. For example: source provider plus source id, scoped by
+ measurement kind.
+- Provide an upsert endpoint or make `POST /token-events/` support an explicit
+ idempotency key. The behavior should update a growing live session rather than
+ creating a second row.
+- Keep backward compatibility for existing clients that only post
+ `tokens_in`/`tokens_out`, but classify those rows explicitly.
+- Update schemas, router tests, and migration tests.
+
+Done when source-backed token events can be inserted or updated idempotently and
+legacy callers continue to work.
+
+## T03 - Build Reusable Token Source Adapters
+
+```task
+id: STATE-WP-0045-T03
+status: done
+priority: high
+state_hub_task_id: "3844fb70-4ceb-4f90-9894-d4845970f0a6"
+```
+
+Move source-specific parsing out of one-off scripts and hooks into reusable,
+tested adapter modules.
+
+Implementation notes:
+
+- Add an `api/services/token_sources/` package or equivalent service layer.
+- Implement a Codex Desktop adapter for `.codex/sessions/**` and
+ `.codex/archived_sessions/**`.
+- Implement a Claude Code adapter for `.claude/projects/**/*.jsonl` that reads
+ usage metadata without storing transcript text.
+- Provide a common adapter result type with source id, timestamps, token
+ categories, model, agent, cwd/path context, and raw parser metadata.
+- Make parsing safe by default: no conversation text in logs, progress events,
+ token notes, or API payloads.
+- Add fixtures with synthetic Codex and Claude session records that cover live
+ sessions, archived sessions, duplicate files, malformed JSONL, resets, and
+ missing usage records.
+- Keep `scripts/backfill_codex_token_events.py` as a thin CLI over the reusable
+ service or replace it with a new unified CLI.
+
+Done when Codex and Claude token sources have deterministic parser tests and a
+shared ingestion interface.
+
+## T04 - Improve Repo, Workstream, and Task Attribution
+
+```task
+id: STATE-WP-0045-T04
+status: done
+priority: high
+state_hub_task_id: "d78b36ea-2a1a-40d6-bd83-03d48ff2ad9b"
+```
+
+Make attribution accurate without relying solely on local path string matching.
+
+Implementation notes:
+
+- Resolve repo attribution by git root fingerprint and remote URL when possible,
+ then fall back to registered host paths.
+- Handle duplicate local paths or alias repos explicitly, especially where one
+ checkout is registered under multiple slugs.
+- Attribute session-level usage to repo first, then optionally to workstreams or
+ tasks when there is strong evidence.
+- Define task allocation rules that do not change measured session totals. For
+ example, produce `allocated` child rows from measured session rows using task
+ completion timestamps, tool-call metadata, or explicit operator input.
+- Record the allocation method and confidence for every task-level allocation.
+- Avoid minting task-level heuristic rows automatically for bulk import, status
+ sync, migration, and consistency tooling.
+
+Done when measured session totals are stable and task/workstream attribution is
+explicitly either measured, allocated, or estimated.
+
+## T05 - Add Reconciliation, Gap Detection, and Backfill Operations
+
+```task
+id: STATE-WP-0045-T05
+status: done
+priority: high
+state_hub_task_id: "efaa2629-4f9a-439c-b0a3-85d77b03580f"
+```
+
+Add an operator-safe reconciliation command that detects flatlines, duplicate
+rows, stale ingestion, and fallback leakage.
+
+Implementation notes:
+
+- Add a command such as `make token-reconcile` or
+ `python scripts/token_reconcile.py --since `.
+- Report sessions found, sessions ingested, sessions stale, duplicate source
+ ids, fallback events, superseded rows, unattributed sessions, and rows missing
+ structured provenance.
+- Support `--dry-run` by default and `--apply` for writes.
+- Include an explicit `--zero-superseded-fallbacks` or equivalent flag rather
+ than silently editing historical rows.
+- Store reconciliation summaries as progress events or report files without
+ including transcript content.
+- Add a canary threshold: alert or fail when measured token volume is zero while
+ task/progress activity exists for the same window.
+
+Done when an operator can run one command to verify token tracking health and
+perform safe, idempotent backfills.
+
+## T06 - Harden Hooks and Runtime Integration
+
+```task
+id: STATE-WP-0045-T06
+status: done
+priority: medium
+state_hub_task_id: "5fd99241-e6dd-4ca6-8c58-a0048f08f0ca"
+```
+
+Make token collection survive hook misses, tool renames, restarts, and multiple
+agent runtimes.
+
+Implementation notes:
+
+- Update Claude hook handling so it can match supported task completion paths,
+ not just one exact MCP tool name.
+- Persist hook high-water marks in a durable State Hub or repo-local location
+ instead of only `/tmp`.
+- Add hook health logging that records when a hook ran, what source id it
+ processed, and whether it patched or skipped a token event.
+- Add a Codex ingestion path that can run on demand and from a schedule without
+ requiring manual script execution.
+- Document required environment variables and path discovery for Windows, WSL,
+ and remote Linux hosts.
+- Ensure failures degrade to visible `estimated` events or health warnings, not
+ silent flatlines.
+
+Done when missing or stale token ingestion becomes visible within one reporting
+window and can be recovered without ad hoc inspection.
+
+## T07 - Upgrade Token APIs and Dashboard Quality Signals
+
+```task
+id: STATE-WP-0045-T07
+status: done
+priority: medium
+state_hub_task_id: "ecaf6ff8-59aa-4c56-8163-125dc96b2068"
+```
+
+Expose token quality, source, and freshness in APIs and dashboard views.
+
+Implementation notes:
+
+- Add API filters for measurement kind, source provider, repo, time range,
+ superseded rows, and unattributed rows.
+- Replace the hard dashboard dependence on `/token-events/?limit=1000` with
+ paginated or pre-aggregated endpoints that support time windows.
+- Add dashboard controls for measured-only, include allocated, include
+ estimates, and show superseded rows.
+- Show ingestion freshness: last Codex session ingested, last Claude transcript
+ ingested, and last reconciliation run.
+- Add a data-quality section listing fallback events, unattributed measured
+ sessions, duplicate source ids, and days with progress/task activity but zero
+ measured tokens.
+- Update the Token Cost page and docs so operators know which numbers are
+ measured versus inferred.
+
+Done when the dashboard no longer presents fallback, allocated, and measured
+usage as indistinguishable totals.
+
+## T08 - Verification and Migration Playbook
+
+```task
+id: STATE-WP-0045-T08
+status: done
+priority: medium
+state_hub_task_id: "61baff79-832e-45f8-80f3-106abe262096"
+```
+
+Cover the new measurement system with tests and a safe rollout plan.
+
+Implementation notes:
+
+- Add unit tests for the evidence model, source adapters, source-id
+ deduplication, repo attribution, and task allocation.
+- Add router tests for idempotent upsert, source filters, measurement-kind
+ filters, created-at preservation, and backwards-compatible legacy posts.
+- Add reconciliation tests with synthetic pre-May-19 and post-May-19 flatline
+ scenarios.
+- Add dashboard/data-loader tests or fixture checks for quality filters and
+ aggregate counts.
+- Write a migration playbook covering old heuristic rows, existing
+ `backfill:codex-session` rows, and any rows without structured provenance.
+- Verify the full suite and run a dry-run reconciliation before marking this
+ workplan finished.
+
+Done when the improved token measurement path has automated coverage, an
+operator playbook, and a dry-run reconciliation report showing no hidden
+fallback leakage.