Fixed and improved token tracking

This commit is contained in:
2026-05-23 13:59:05 +02:00
parent dd3279ea1a
commit c12091c2eb
29 changed files with 3549 additions and 278 deletions

View File

@@ -1,6 +1,7 @@
.PHONY: install install-cli dashboard-install dashboard-check db db-tools migrate seed api dashboard check test test-python clean register-project register-codex-project validate-adr add-domain rename-domain add-repo list-repos register-path cleanup-stale tunnels-up tunnels-status tunnels-check bridges install-hooks install-hooks-all gitea-inventory
.PHONY: install install-cli dashboard-install dashboard-check db db-tools migrate seed api dashboard check test test-python clean register-project register-codex-project register-mcp bootstrap-env validate-adr add-domain rename-domain add-repo list-repos register-path cleanup-stale tunnels-up tunnels-status tunnels-check bridges install-hooks install-hooks-all gitea-inventory token-reconcile
COMPOSE = docker compose -f infra/docker-compose.yml --env-file .env
PYTHON ?= python3
start:
@echo "# run in different terminals"
@@ -111,6 +112,17 @@ register-codex-project:
@test -n "$(PROJECT_PATH)" || (echo "ERROR: PROJECT_PATH is required."; exit 1)
scripts/register_project.sh "$(DOMAIN)" "$(PROJECT_PATH)" --codex
## Register State Hub MCP for Claude Code. Optional: make register-mcp MCP_URL=http://127.0.0.1:18001/sse
register-mcp:
scripts/register-mcp.sh \
$(if $(MCP_URL),--url "$(MCP_URL)",) \
$(if $(API_BASE),--api-base "$(API_BASE)",) \
$(if $(DRY_RUN),--dry-run,)
## Bootstrap a new operator/collaborator environment. Optional: make bootstrap-env ARGS="--install-missing"
bootstrap-env:
scripts/bootstrap-env.sh $(ARGS)
## Add a second repo to an existing domain: make add-repo DOMAIN=railiance REPO_PATH=/home/worsch/railiance-infra
add-repo:
@test -n "$(DOMAIN)" || (echo "ERROR: DOMAIN is required."; exit 1)
@@ -229,6 +241,17 @@ fix-consistency:
$(if $(REPO_PATH),--repo-path "$(REPO_PATH)",); \
e=$$?; [ $$e -eq 2 ] && exit 0 || exit $$e
## Reconcile measured token sources against State Hub.
## Usage: make token-reconcile [SINCE=2026-05-19] [APPLY=1] [ZERO_FALLBACKS=1]
token-reconcile:
$(PYTHON) scripts/token_reconcile.py \
$(if $(SINCE),--since "$(SINCE)",) \
$(if $(API_BASE),--api-base "$(API_BASE)",) \
$(if $(CODEX_HOME),--codex-home "$(CODEX_HOME)",) \
$(if $(CLAUDE_HOME),--claude-home "$(CLAUDE_HOME)",) \
$(if $(APPLY),--apply,) \
$(if $(ZERO_FALLBACKS),--zero-superseded-fallbacks,)
## Pull then fix: single repo or all repos if REPO omitted
## make fix-consistency-remote — smart pull+fix all repos that need it
## make fix-consistency-remote REPO=slug — pull+fix one repo

View File

@@ -1,8 +1,10 @@
import uuid
from datetime import datetime
from sqlalchemy import DateTime, ForeignKey, Integer, Text, func
from sqlalchemy.dialects.postgresql import UUID
from typing import Any
from sqlalchemy import DateTime, Float, ForeignKey, Integer, Text, UniqueConstraint, func
from sqlalchemy.dialects.postgresql import JSONB, UUID
from sqlalchemy.orm import Mapped, mapped_column, relationship
from api.models.base import Base, new_uuid
@@ -10,6 +12,14 @@ from api.models.base import Base, new_uuid
class TokenEvent(Base):
__tablename__ = "token_events"
__table_args__ = (
UniqueConstraint(
"measurement_kind",
"source_provider",
"source_id",
name="uq_token_events_source_identity",
),
)
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), primary_key=True, default=new_uuid
@@ -31,6 +41,35 @@ class TokenEvent(Base):
ref_type: Mapped[str | None] = mapped_column(Text, nullable=True)
ref_id: Mapped[str | None] = mapped_column(Text, nullable=True)
note: Mapped[str | None] = mapped_column(Text, nullable=True)
measurement_kind: Mapped[str] = mapped_column(
Text, nullable=False, default="estimated", server_default="estimated", index=True
)
source_provider: Mapped[str] = mapped_column(
Text, nullable=False, default="manual", server_default="manual", index=True
)
source_id: Mapped[str | None] = mapped_column(Text, nullable=True, index=True)
source_path: Mapped[str | None] = mapped_column(Text, nullable=True)
source_created_at: Mapped[datetime | None] = mapped_column(
DateTime(timezone=True), nullable=True, index=True
)
ingested_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now(), nullable=False, index=True
)
parser_version: Mapped[str | None] = mapped_column(Text, nullable=True)
confidence: Mapped[float] = mapped_column(
Float, nullable=False, default=0.35, server_default="0.35"
)
cached_input_tokens: Mapped[int] = mapped_column(
Integer, nullable=False, default=0, server_default="0"
)
reasoning_output_tokens: Mapped[int] = mapped_column(
Integer, nullable=False, default=0, server_default="0"
)
raw_total_tokens: Mapped[int | None] = mapped_column(Integer, nullable=True)
cost_estimated_usd: Mapped[float | None] = mapped_column(Float, nullable=True)
raw_metadata: Mapped[dict[str, Any]] = mapped_column(
JSONB, nullable=False, default=dict, server_default="{}"
)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now(), nullable=False, index=True
)

View File

@@ -75,23 +75,47 @@ async def update_task(
if task is None:
raise HTTPException(status_code=404, detail="Task not found")
previous_status = task.status.value
# Separate token fields from task fields
token_field_names = {"tokens_in", "tokens_out", "workplan_tokens_in", "workplan_tokens_out", "token_note", "model", "agent", "session_id"}
token_field_names = {
"tokens_in",
"tokens_out",
"workplan_tokens_in",
"workplan_tokens_out",
"token_note",
"model",
"agent",
"session_id",
"suppress_token_event",
}
update_data = body.model_dump(exclude_unset=True)
token_data = {k: update_data.pop(k) for k in list(update_data.keys()) if k in token_field_names}
suppress_token_event = bool(token_data.pop("suppress_token_event", False))
for field, value in update_data.items():
setattr(task, field, value)
await session.commit()
await session.refresh(task)
# Token event — three-tier logic, only when marking done
if update_data.get("status") == "done":
# Token event — three-tier logic, only for an intentional transition to done.
status_update = update_data.get("status")
new_status = status_update.value if hasattr(status_update, "value") else status_update
if (
new_status == "done"
and previous_status != "done"
and not suppress_token_event
):
if "tokens_in" in token_data and "tokens_out" in token_data:
# Tier 1: exact counts — default note "measured"; caller may override with token_note
tin = token_data["tokens_in"]
tout = token_data["tokens_out"]
tnote = token_data.get("token_note") or "measured"
measurement_kind = "measured"
source_provider = "manual"
confidence = 1.0
source_id = f"task:{task_id}:manual"
raw_metadata = {"input_source": "task_status_patch"}
elif "workplan_tokens_in" in token_data and "workplan_tokens_out" in token_data:
# Tier 2: prorate workplan total across task count
count_result = await session.execute(
@@ -101,9 +125,24 @@ async def update_task(
tin = token_data["workplan_tokens_in"] // task_count
tout = token_data["workplan_tokens_out"] // task_count
tnote = "workplan"
measurement_kind = "allocated"
source_provider = "manual"
confidence = 0.7
source_id = f"task:{task_id}:workplan-allocation"
raw_metadata = {
"allocation_method": "workplan_prorated",
"workplan_tokens_in": token_data["workplan_tokens_in"],
"workplan_tokens_out": token_data["workplan_tokens_out"],
"task_count": task_count,
}
else:
# Tier 3: heuristic fallback
tin, tout, tnote = 1000, 500, "heuristic"
measurement_kind = "estimated"
source_provider = "task_fallback"
confidence = 0.35
source_id = f"task:{task_id}:heuristic"
raw_metadata = {"estimation_method": "fixed_task_done_fallback"}
# Resolve repo_id via workstream
ws = await session.get(Workstream, task.workstream_id)
@@ -121,6 +160,12 @@ async def update_task(
ref_type="task",
ref_id=str(task_id),
note=tnote,
measurement_kind=measurement_kind,
source_provider=source_provider,
source_id=source_id,
confidence=confidence,
raw_total_tokens=tin + tout,
raw_metadata=raw_metadata,
)
session.add(event)
await session.commit()

View File

@@ -1,5 +1,7 @@
import uuid
from collections import defaultdict
from datetime import datetime
from typing import Any
from fastapi import APIRouter, Depends, HTTPException, Query, status
from sqlalchemy import select
@@ -10,18 +12,95 @@ from api.models.managed_repo import ManagedRepo
from api.models.task import Task
from api.models.token_event import TokenEvent
from api.models.workstream import Workstream
from api.schemas.token_event import RepoTokenSummary, TokenEventCreate, TokenEventPatch, TokenEventRead, TokenSummary
from api.schemas.token_event import (
RepoTokenSummary,
TokenAggregateRow,
TokenAggregateSummary,
TokenEventCreate,
TokenEventPatch,
TokenEventRead,
TokenQualitySummary,
TokenSummary,
)
router = APIRouter(prefix="/token-events", tags=["token-events"])
DEFAULT_CONFIDENCE = {
"measured": 1.0,
"allocated": 0.70,
"estimated": 0.35,
"superseded": 0.0,
}
@router.post("/", response_model=TokenEventRead, status_code=status.HTTP_201_CREATED)
async def create_token_event(
body: TokenEventCreate,
session: AsyncSession = Depends(get_session),
) -> TokenEvent:
data = body.model_dump()
SOURCE_PARSER_DEFAULTS = {
"codex_session": "codex-desktop-v1",
"claude_transcript": "claude-transcript-v1",
"llm_connect": "llm-connect-v1",
}
def _event_total(event: TokenEvent) -> int:
return event.tokens_in + event.tokens_out
def _infer_measurement_kind(data: dict[str, Any]) -> str:
if data.get("measurement_kind"):
return str(data["measurement_kind"])
note = data.get("note")
if note == "heuristic_superseded_by_codex_backfill":
return "superseded"
if note == "workplan":
return "allocated"
if note == "heuristic":
return "estimated"
if note == "measured" or str(note or "").startswith("backfill:codex-session"):
return "measured"
provider = data.get("source_provider")
if provider in {"codex_session", "claude_transcript", "llm_connect"}:
return "measured"
return "estimated"
def _infer_source_provider(data: dict[str, Any], measurement_kind: str) -> str:
if data.get("source_provider"):
return str(data["source_provider"])
note = data.get("note")
ref_id = str(data.get("ref_id") or "")
agent = str(data.get("agent") or "").lower()
if note == "heuristic":
return "task_fallback"
if ref_id.startswith("codex:") or str(note or "").startswith("backfill:codex-session"):
return "codex_session"
if measurement_kind == "measured" and "claude" in agent:
return "claude_transcript"
return "manual"
def _apply_event_defaults(data: dict[str, Any]) -> dict[str, Any]:
measurement_kind = _infer_measurement_kind(data)
source_provider = _infer_source_provider(data, measurement_kind)
data["measurement_kind"] = measurement_kind
data["source_provider"] = source_provider
if not data.get("source_id") and source_provider in {"codex_session", "claude_transcript", "llm_connect"}:
source_id = data.get("ref_id") or data.get("session_id")
if source_id:
data["source_id"] = str(source_id)
if not data.get("source_created_at") and data.get("created_at") and data.get("source_id"):
data["source_created_at"] = data["created_at"]
data.setdefault("confidence", DEFAULT_CONFIDENCE.get(measurement_kind, 0.35))
data.setdefault("cached_input_tokens", 0)
data.setdefault("reasoning_output_tokens", 0)
data.setdefault("raw_total_tokens", (data.get("tokens_in") or 0) + (data.get("tokens_out") or 0))
data.setdefault("raw_metadata", {})
if source_provider in SOURCE_PARSER_DEFAULTS:
data.setdefault("parser_version", SOURCE_PARSER_DEFAULTS[source_provider])
return data
async def _populate_relationship_defaults(data: dict[str, Any], session: AsyncSession) -> dict[str, Any]:
# Auto-populate workstream_id from task if not provided
if data.get("task_id") and not data.get("workstream_id"):
task = await session.get(Task, data["task_id"])
@@ -33,6 +112,34 @@ async def create_token_event(
ws = await session.get(Workstream, data["workstream_id"])
if ws and ws.repo_id:
data["repo_id"] = ws.repo_id
return data
async def _find_source_event(data: dict[str, Any], session: AsyncSession) -> TokenEvent | None:
source_id = data.get("source_id")
if not source_id:
return None
result = await session.execute(
select(TokenEvent).where(
TokenEvent.measurement_kind == data["measurement_kind"],
TokenEvent.source_provider == data["source_provider"],
TokenEvent.source_id == source_id,
)
)
return result.scalar_one_or_none()
async def _create_or_upsert_event(data: dict[str, Any], session: AsyncSession) -> TokenEvent:
data = _apply_event_defaults(data)
data = await _populate_relationship_defaults(data, session)
existing = await _find_source_event(data, session)
if existing is not None:
for field, value in data.items():
setattr(existing, field, value)
await session.commit()
await session.refresh(existing)
return existing
event = TokenEvent(**data)
session.add(event)
@@ -41,6 +148,77 @@ async def create_token_event(
return event
def _filter_query(
q,
*,
task_id: uuid.UUID | None = None,
workstream_id: uuid.UUID | None = None,
repo_id: uuid.UUID | None = None,
ref_type: str | None = None,
ref_id: str | None = None,
model: str | None = None,
agent: str | None = None,
note: str | None = None,
measurement_kind: str | None = None,
source_provider: str | None = None,
since: datetime | None = None,
until: datetime | None = None,
include_superseded: bool = True,
unattributed: bool = False,
):
if task_id:
q = q.where(TokenEvent.task_id == task_id)
if workstream_id:
q = q.where(TokenEvent.workstream_id == workstream_id)
if repo_id:
q = q.where(TokenEvent.repo_id == repo_id)
if ref_type:
q = q.where(TokenEvent.ref_type == ref_type)
if ref_id:
q = q.where(TokenEvent.ref_id == ref_id)
if model:
q = q.where(TokenEvent.model == model)
if agent:
q = q.where(TokenEvent.agent == agent)
if note:
q = q.where(TokenEvent.note == note)
if measurement_kind:
q = q.where(TokenEvent.measurement_kind == measurement_kind)
if source_provider:
q = q.where(TokenEvent.source_provider == source_provider)
if since:
q = q.where(TokenEvent.created_at >= since)
if until:
q = q.where(TokenEvent.created_at < until)
if not include_superseded:
q = q.where(TokenEvent.measurement_kind != "superseded")
if unattributed:
q = q.where(
TokenEvent.repo_id.is_(None),
TokenEvent.workstream_id.is_(None),
TokenEvent.task_id.is_(None),
)
return q
@router.post("/", response_model=TokenEventRead, status_code=status.HTTP_201_CREATED)
async def create_token_event(
body: TokenEventCreate,
session: AsyncSession = Depends(get_session),
) -> TokenEvent:
data = body.model_dump(exclude_none=True)
return await _create_or_upsert_event(data, session)
@router.post("/upsert", response_model=TokenEventRead)
async def upsert_token_event(
body: TokenEventCreate,
session: AsyncSession = Depends(get_session),
) -> TokenEvent:
data = body.model_dump(exclude_none=True)
return await _create_or_upsert_event(data, session)
@router.get("/summary/", response_model=TokenSummary)
async def get_token_summary(
scope: str = Query(..., description="task|workstream|repo|commit|release|session"),
@@ -80,11 +258,16 @@ async def get_token_summary(
by_model: dict[str, int] = defaultdict(int)
by_agent: dict[str, int] = defaultdict(int)
by_measurement_kind: dict[str, int] = defaultdict(int)
by_source_provider: dict[str, int] = defaultdict(int)
for e in events:
total = _event_total(e)
if e.model:
by_model[e.model] += e.tokens_in + e.tokens_out
by_model[e.model] += total
if e.agent:
by_agent[e.agent] += e.tokens_in + e.tokens_out
by_agent[e.agent] += total
by_measurement_kind[e.measurement_kind] += total
by_source_provider[e.source_provider] += total
return TokenSummary(
scope=scope,
@@ -95,11 +278,18 @@ async def get_token_summary(
event_count=len(events),
by_model=dict(by_model),
by_agent=dict(by_agent),
by_measurement_kind=dict(by_measurement_kind),
by_source_provider=dict(by_source_provider),
)
@router.get("/by-repo/", response_model=list[RepoTokenSummary])
async def get_tokens_by_repo(
measurement_kind: str | None = None,
source_provider: str | None = None,
since: datetime | None = None,
until: datetime | None = None,
include_superseded: bool = Query(True),
session: AsyncSession = Depends(get_session),
) -> list[RepoTokenSummary]:
"""Aggregate token consumption per repo, resolving via the full graph.
@@ -112,7 +302,16 @@ async def get_tokens_by_repo(
Only events that resolve to a repo are included.
"""
# Fetch all events, workstreams, repos in three queries (avoids N+1)
events_result = await session.execute(select(TokenEvent))
events_result = await session.execute(
_filter_query(
select(TokenEvent),
measurement_kind=measurement_kind,
source_provider=source_provider,
since=since,
until=until,
include_superseded=include_superseded,
)
)
events = list(events_result.scalars().all())
ws_result = await session.execute(select(Workstream))
@@ -148,14 +347,19 @@ async def get_tokens_by_repo(
"event_count": 0,
"by_model": defaultdict(int),
"by_note": defaultdict(int),
"by_measurement_kind": defaultdict(int),
"by_source_provider": defaultdict(int),
}
g = groups[rid]
g["tokens_in"] += e.tokens_in
g["tokens_out"] += e.tokens_out
g["event_count"] += 1
total = _event_total(e)
if e.model:
g["by_model"][e.model] += e.tokens_in + e.tokens_out
g["by_note"][e.note or "unknown"] += e.tokens_in + e.tokens_out
g["by_model"][e.model] += total
g["by_note"][e.note or "unknown"] += total
g["by_measurement_kind"][e.measurement_kind] += total
g["by_source_provider"][e.source_provider] += total
return [
RepoTokenSummary(
@@ -166,6 +370,188 @@ async def get_tokens_by_repo(
]
@router.get("/aggregate/", response_model=TokenAggregateSummary)
async def get_token_aggregate(
measurement_kind: str | None = None,
source_provider: str | None = None,
since: datetime | None = None,
until: datetime | None = None,
include_superseded: bool = Query(False),
session: AsyncSession = Depends(get_session),
) -> TokenAggregateSummary:
events_result = await session.execute(
_filter_query(
select(TokenEvent),
measurement_kind=measurement_kind,
source_provider=source_provider,
since=since,
until=until,
include_superseded=include_superseded,
)
)
events = list(events_result.scalars().all())
ws_result = await session.execute(select(Workstream))
ws_map: dict[uuid.UUID, Workstream] = {w.id: w for w in ws_result.scalars().all()}
task_result = await session.execute(select(Task))
task_map: dict[uuid.UUID, Task] = {t.id: t for t in task_result.scalars().all()}
repo_result = await session.execute(select(ManagedRepo))
repo_map: dict[uuid.UUID, ManagedRepo] = {r.id: r for r in repo_result.scalars().all()}
def resolve_repo_id(e: TokenEvent) -> uuid.UUID | None:
if e.repo_id:
return e.repo_id
ws_id = e.workstream_id
if not ws_id and e.task_id and e.task_id in task_map:
ws_id = task_map[e.task_id].workstream_id
if ws_id and ws_id in ws_map:
return ws_map[ws_id].repo_id
return None
def add(groups: dict[str, dict[str, Any]], key: str | None, label: str | None, e: TokenEvent) -> None:
if not key:
return
if key not in groups:
groups[key] = {
"scope_id": key,
"label": label,
"tokens_in": 0,
"tokens_out": 0,
"event_count": 0,
"by_measurement_kind": defaultdict(int),
"by_source_provider": defaultdict(int),
}
row = groups[key]
total = _event_total(e)
row["tokens_in"] += e.tokens_in
row["tokens_out"] += e.tokens_out
row["event_count"] += 1
row["by_measurement_kind"][e.measurement_kind] += total
row["by_source_provider"][e.source_provider] += total
by_repo: dict[str, dict[str, Any]] = {}
by_workstream: dict[str, dict[str, Any]] = {}
by_task: dict[str, dict[str, Any]] = {}
by_model: dict[str, dict[str, Any]] = {}
by_measurement_kind: dict[str, int] = defaultdict(int)
by_source_provider: dict[str, int] = defaultdict(int)
first_event_at = last_event_at = last_ingested_at = None
tokens_in = tokens_out = 0
for e in events:
total = _event_total(e)
tokens_in += e.tokens_in
tokens_out += e.tokens_out
by_measurement_kind[e.measurement_kind] += total
by_source_provider[e.source_provider] += total
if first_event_at is None or e.created_at < first_event_at:
first_event_at = e.created_at
if last_event_at is None or e.created_at > last_event_at:
last_event_at = e.created_at
if last_ingested_at is None or e.ingested_at > last_ingested_at:
last_ingested_at = e.ingested_at
rid = resolve_repo_id(e)
repo = repo_map.get(rid) if rid else None
add(by_repo, str(rid) if rid else None, repo.slug if repo else None, e)
ws_id = e.workstream_id or (task_map[e.task_id].workstream_id if e.task_id in task_map else None)
ws = ws_map.get(ws_id) if ws_id else None
add(by_workstream, str(ws_id) if ws_id else None, ws.title if ws else None, e)
task = task_map.get(e.task_id) if e.task_id else None
add(by_task, str(e.task_id) if e.task_id else None, task.title if task else None, e)
add(by_model, e.model or "unknown", e.model or "unknown", e)
def rows(groups: dict[str, dict[str, Any]]) -> list[TokenAggregateRow]:
result = []
for row in groups.values():
result.append(
TokenAggregateRow(
**{k: (dict(v) if isinstance(v, defaultdict) else v) for k, v in row.items()},
tokens_total=row["tokens_in"] + row["tokens_out"],
)
)
return sorted(result, key=lambda item: -item.tokens_total)
return TokenAggregateSummary(
tokens_in=tokens_in,
tokens_out=tokens_out,
tokens_total=tokens_in + tokens_out,
event_count=len(events),
first_event_at=first_event_at,
last_event_at=last_event_at,
last_ingested_at=last_ingested_at,
by_repo=rows(by_repo),
by_workstream=rows(by_workstream),
by_task=rows(by_task),
by_model=rows(by_model),
by_measurement_kind=dict(by_measurement_kind),
by_source_provider=dict(by_source_provider),
)
@router.get("/quality/", response_model=TokenQualitySummary)
async def get_token_quality(
since: datetime | None = None,
until: datetime | None = None,
session: AsyncSession = Depends(get_session),
) -> TokenQualitySummary:
result = await session.execute(_filter_query(select(TokenEvent), since=since, until=until))
events = list(result.scalars().all())
by_measurement_kind: dict[str, int] = defaultdict(int)
by_source_provider: dict[str, int] = defaultdict(int)
source_counts: dict[tuple[str, str, str], int] = defaultdict(int)
last_codex_ingested_at = None
last_claude_ingested_at = None
fallback_count = 0
unattributed_measured_count = 0
missing_provenance_count = 0
for e in events:
by_measurement_kind[e.measurement_kind] += 1
by_source_provider[e.source_provider] += 1
if e.source_id:
source_counts[(e.measurement_kind, e.source_provider, e.source_id)] += 1
if e.source_provider == "task_fallback" or e.note == "heuristic":
fallback_count += 1
if e.measurement_kind == "measured" and not (e.repo_id or e.workstream_id or e.task_id):
unattributed_measured_count += 1
if e.measurement_kind == "measured" and not e.source_id:
missing_provenance_count += 1
if e.source_provider == "codex_session" and (
last_codex_ingested_at is None or e.ingested_at > last_codex_ingested_at
):
last_codex_ingested_at = e.ingested_at
if e.source_provider == "claude_transcript" and (
last_claude_ingested_at is None or e.ingested_at > last_claude_ingested_at
):
last_claude_ingested_at = e.ingested_at
duplicate_source_count = sum(1 for count in source_counts.values() if count > 1)
return TokenQualitySummary(
event_count=len(events),
measured_event_count=by_measurement_kind.get("measured", 0),
estimated_event_count=by_measurement_kind.get("estimated", 0),
allocated_event_count=by_measurement_kind.get("allocated", 0),
superseded_event_count=by_measurement_kind.get("superseded", 0),
fallback_event_count=fallback_count,
unattributed_measured_event_count=unattributed_measured_count,
missing_provenance_event_count=missing_provenance_count,
duplicate_source_count=duplicate_source_count,
last_codex_ingested_at=last_codex_ingested_at,
last_claude_ingested_at=last_claude_ingested_at,
last_reconciliation_at=None,
by_measurement_kind=dict(by_measurement_kind),
by_source_provider=dict(by_source_provider),
)
@router.patch("/{event_id}", response_model=TokenEventRead)
async def patch_token_event(
event_id: uuid.UUID,
@@ -175,7 +561,26 @@ async def patch_token_event(
event = await session.get(TokenEvent, event_id)
if event is None:
raise HTTPException(status_code=404, detail="Token event not found")
for field, value in body.model_dump(exclude_none=True).items():
data = body.model_dump(exclude_none=True)
if "note" in data or "measurement_kind" in data or "source_provider" in data:
merged = {
"tokens_in": data.get("tokens_in", event.tokens_in),
"tokens_out": data.get("tokens_out", event.tokens_out),
"note": data.get("note", event.note),
"agent": data.get("agent", event.agent),
"ref_id": data.get("ref_id", event.ref_id),
"session_id": data.get("session_id", event.session_id),
"measurement_kind": data.get("measurement_kind", event.measurement_kind),
"source_provider": data.get("source_provider", event.source_provider),
"source_id": data.get("source_id", event.source_id),
}
inferred = _apply_event_defaults({k: v for k, v in merged.items() if v is not None})
data.setdefault("measurement_kind", inferred["measurement_kind"])
data.setdefault("source_provider", inferred["source_provider"])
data.setdefault("confidence", inferred["confidence"])
if inferred.get("source_id"):
data.setdefault("source_id", inferred["source_id"])
for field, value in data.items():
setattr(event, field, value)
await session.commit()
await session.refresh(event)
@@ -203,26 +608,33 @@ async def list_token_events(
model: str | None = None,
agent: str | None = None,
note: str | None = None,
measurement_kind: str | None = None,
source_provider: str | None = None,
since: datetime | None = None,
until: datetime | None = None,
include_superseded: bool = Query(True),
unattributed: bool = False,
offset: int = Query(0, ge=0),
limit: int = Query(100, le=1000),
session: AsyncSession = Depends(get_session),
) -> list[TokenEvent]:
q = select(TokenEvent)
if task_id:
q = q.where(TokenEvent.task_id == task_id)
if workstream_id:
q = q.where(TokenEvent.workstream_id == workstream_id)
if repo_id:
q = q.where(TokenEvent.repo_id == repo_id)
if ref_type:
q = q.where(TokenEvent.ref_type == ref_type)
if ref_id:
q = q.where(TokenEvent.ref_id == ref_id)
if model:
q = q.where(TokenEvent.model == model)
if agent:
q = q.where(TokenEvent.agent == agent)
if note:
q = q.where(TokenEvent.note == note)
q = q.order_by(TokenEvent.created_at.desc()).limit(limit)
q = _filter_query(
select(TokenEvent),
task_id=task_id,
workstream_id=workstream_id,
repo_id=repo_id,
ref_type=ref_type,
ref_id=ref_id,
model=model,
agent=agent,
note=note,
measurement_kind=measurement_kind,
source_provider=source_provider,
since=since,
until=until,
include_superseded=include_superseded,
unattributed=unattributed,
)
q = q.order_by(TokenEvent.created_at.desc()).offset(offset).limit(limit)
result = await session.execute(q)
return list(result.scalars().all())

View File

@@ -43,6 +43,7 @@ class TaskUpdate(BaseModel):
# 2. workplan_tokens_in + workplan_tokens_out → prorated across task count (note="workplan")
# 3. neither provided, status=done → heuristic 1000/500 (note="heuristic")
# token_note overrides the auto-assigned note for Tier 1 only (e.g. "userbased")
# suppress_token_event lets file/cache sync update status without recording usage.
tokens_in: int | None = None
tokens_out: int | None = None
workplan_tokens_in: int | None = None
@@ -51,6 +52,7 @@ class TaskUpdate(BaseModel):
model: str | None = None
agent: str | None = None
session_id: str | None = None
suppress_token_event: bool | None = None
@model_validator(mode="after")
def blocking_reason_required_when_blocked(self) -> Self:

View File

@@ -1,7 +1,8 @@
import uuid
from datetime import datetime
from typing import Any
from pydantic import BaseModel, ConfigDict, computed_field
from pydantic import BaseModel, ConfigDict, Field, computed_field
class TokenEventCreate(BaseModel):
@@ -16,6 +17,19 @@ class TokenEventCreate(BaseModel):
ref_type: str | None = None
ref_id: str | None = None
note: str | None = None
created_at: datetime | None = None
measurement_kind: str | None = None
source_provider: str | None = None
source_id: str | None = None
source_path: str | None = None
source_created_at: datetime | None = None
parser_version: str | None = None
confidence: float | None = None
cached_input_tokens: int | None = None
reasoning_output_tokens: int | None = None
raw_total_tokens: int | None = None
cost_estimated_usd: float | None = None
raw_metadata: dict[str, Any] | None = None
class TokenEventRead(BaseModel):
@@ -33,6 +47,19 @@ class TokenEventRead(BaseModel):
ref_type: str | None = None
ref_id: str | None = None
note: str | None = None
measurement_kind: str
source_provider: str
source_id: str | None = None
source_path: str | None = None
source_created_at: datetime | None = None
ingested_at: datetime
parser_version: str | None = None
confidence: float
cached_input_tokens: int
reasoning_output_tokens: int
raw_total_tokens: int | None = None
cost_estimated_usd: float | None = None
raw_metadata: dict[str, Any] = Field(default_factory=dict)
created_at: datetime
@computed_field
@@ -40,6 +67,11 @@ class TokenEventRead(BaseModel):
def tokens_total(self) -> int:
return self.tokens_in + self.tokens_out
@computed_field
@property
def token_evidence_total(self) -> int:
return (self.raw_total_tokens or self.tokens_in + self.tokens_out)
class TokenSummary(BaseModel):
scope: str
@@ -50,14 +82,36 @@ class TokenSummary(BaseModel):
event_count: int
by_model: dict[str, int]
by_agent: dict[str, int]
by_measurement_kind: dict[str, int] = Field(default_factory=dict)
by_source_provider: dict[str, int] = Field(default_factory=dict)
class TokenEventPatch(BaseModel):
tokens_in: int | None = None
tokens_out: int | None = None
task_id: uuid.UUID | None = None
workstream_id: uuid.UUID | None = None
repo_id: uuid.UUID | None = None
session_id: str | None = None
note: str | None = None
model: str | None = None
agent: str | None = None
ref_type: str | None = None
ref_id: str | None = None
created_at: datetime | None = None
measurement_kind: str | None = None
source_provider: str | None = None
source_id: str | None = None
source_path: str | None = None
source_created_at: datetime | None = None
ingested_at: datetime | None = None
parser_version: str | None = None
confidence: float | None = None
cached_input_tokens: int | None = None
reasoning_output_tokens: int | None = None
raw_total_tokens: int | None = None
cost_estimated_usd: float | None = None
raw_metadata: dict[str, Any] | None = None
class RepoTokenSummary(BaseModel):
@@ -69,3 +123,49 @@ class RepoTokenSummary(BaseModel):
event_count: int
by_model: dict[str, int]
by_note: dict[str, int]
by_measurement_kind: dict[str, int] = Field(default_factory=dict)
by_source_provider: dict[str, int] = Field(default_factory=dict)
class TokenAggregateRow(BaseModel):
scope_id: str
label: str | None = None
tokens_in: int
tokens_out: int
tokens_total: int
event_count: int
by_measurement_kind: dict[str, int] = Field(default_factory=dict)
by_source_provider: dict[str, int] = Field(default_factory=dict)
class TokenAggregateSummary(BaseModel):
tokens_in: int
tokens_out: int
tokens_total: int
event_count: int
first_event_at: datetime | None = None
last_event_at: datetime | None = None
last_ingested_at: datetime | None = None
by_repo: list[TokenAggregateRow] = Field(default_factory=list)
by_workstream: list[TokenAggregateRow] = Field(default_factory=list)
by_task: list[TokenAggregateRow] = Field(default_factory=list)
by_model: list[TokenAggregateRow] = Field(default_factory=list)
by_measurement_kind: dict[str, int] = Field(default_factory=dict)
by_source_provider: dict[str, int] = Field(default_factory=dict)
class TokenQualitySummary(BaseModel):
event_count: int
measured_event_count: int
estimated_event_count: int
allocated_event_count: int
superseded_event_count: int
fallback_event_count: int
unattributed_measured_event_count: int
missing_provenance_event_count: int
duplicate_source_count: int
last_codex_ingested_at: datetime | None = None
last_claude_ingested_at: datetime | None = None
last_reconciliation_at: datetime | None = None
by_measurement_kind: dict[str, int] = Field(default_factory=dict)
by_source_provider: dict[str, int] = Field(default_factory=dict)

View File

@@ -0,0 +1,16 @@
"""Token source adapters for measured agent usage."""
from api.services.token_sources.base import TokenSourceRecord, parse_iso
from api.services.token_sources.codex import collect_codex_sessions, iter_codex_session_files, parse_codex_session
from api.services.token_sources.claude import collect_claude_transcripts, iter_claude_transcript_files, parse_claude_transcript
__all__ = [
"TokenSourceRecord",
"parse_iso",
"collect_codex_sessions",
"iter_codex_session_files",
"parse_codex_session",
"collect_claude_transcripts",
"iter_claude_transcript_files",
"parse_claude_transcript",
]

View File

@@ -0,0 +1,171 @@
from __future__ import annotations
import subprocess
from dataclasses import dataclass
from pathlib import Path
from typing import Any
@dataclass(frozen=True)
class RepoRef:
repo_id: str
slug: str
local_path: str | None = None
host_paths: dict[str, Any] | None = None
remote_url: str | None = None
git_fingerprint: str | None = None
@dataclass(frozen=True)
class RepoMatch:
repo_id: str
slug: str
method: str
confidence: float
def normalise_cwd(raw: str | None) -> str | None:
if not raw:
return None
value = raw.replace("\\", "/")
prefixes = (
"//wsl.localhost/Ubuntu-24.04",
"//wsl$/Ubuntu-24.04",
)
for prefix in prefixes:
if value.startswith(prefix):
return value[len(prefix):] or "/"
if len(value) >= 3 and value[1:3] == ":/":
drive = value[0].lower()
return f"/mnt/{drive}{value[2:]}"
return value
def normalise_remote_url(raw: str | None) -> str | None:
if not raw:
return None
value = raw.strip()
if value.endswith(".git"):
value = value[:-4]
if value.startswith("git@") and ":" in value:
host, path = value[4:].split(":", 1)
value = f"ssh://{host}/{path}"
return value.lower().rstrip("/")
def repo_refs_from_api(repos: list[dict[str, Any]]) -> list[RepoRef]:
refs = []
for repo in repos:
repo_id = repo.get("id")
slug = repo.get("slug")
if not repo_id or not slug:
continue
refs.append(
RepoRef(
repo_id=str(repo_id),
slug=str(slug),
local_path=repo.get("local_path"),
host_paths=repo.get("host_paths") if isinstance(repo.get("host_paths"), dict) else {},
remote_url=repo.get("remote_url"),
git_fingerprint=repo.get("git_fingerprint"),
)
)
return refs
def _git(cwd: str, *args: str) -> str | None:
try:
result = subprocess.run(
["git", *args],
cwd=cwd,
check=False,
capture_output=True,
text=True,
timeout=5,
)
except (OSError, subprocess.SubprocessError):
return None
if result.returncode != 0:
return None
value = result.stdout.strip().splitlines()
return value[0] if value else None
def git_fingerprint_for_path(cwd: str | None) -> str | None:
path = normalise_cwd(cwd)
if not path or not Path(path).exists():
return None
root = _git(path, "rev-parse", "--show-toplevel")
if not root:
return None
return _git(root, "rev-list", "--max-parents=0", "HEAD")
def git_remote_for_path(cwd: str | None) -> str | None:
path = normalise_cwd(cwd)
if not path or not Path(path).exists():
return None
root = _git(path, "rev-parse", "--show-toplevel")
if not root:
return None
return _git(root, "remote", "get-url", "origin")
def _repo_paths(repo: RepoRef) -> list[str]:
paths = [repo.local_path]
if repo.host_paths:
paths.extend(str(v) for v in repo.host_paths.values() if v)
result = []
for raw in paths:
path = normalise_cwd(str(raw)) if raw and raw != "(unknown)" else None
if path:
result.append(path.rstrip("/"))
return result
def resolve_repo(cwd: str | None, repos: list[RepoRef]) -> RepoMatch | None:
path = normalise_cwd(cwd)
fingerprint = git_fingerprint_for_path(path)
remote = normalise_remote_url(git_remote_for_path(path))
if fingerprint:
candidates = [repo for repo in repos if repo.git_fingerprint == fingerprint]
if len(candidates) == 1:
repo = candidates[0]
return RepoMatch(repo.repo_id, repo.slug, "git_fingerprint", 0.98)
if remote:
remote_candidates = [
repo for repo in candidates
if normalise_remote_url(repo.remote_url) == remote
]
if len(remote_candidates) == 1:
repo = remote_candidates[0]
return RepoMatch(repo.repo_id, repo.slug, "git_fingerprint_remote", 0.99)
if remote:
candidates = [repo for repo in repos if normalise_remote_url(repo.remote_url) == remote]
if len(candidates) == 1:
repo = candidates[0]
return RepoMatch(repo.repo_id, repo.slug, "remote_url", 0.90)
if not path:
return None
path_matches: list[tuple[str, RepoRef]] = []
for repo in repos:
for repo_path in _repo_paths(repo):
if path == repo_path or path.startswith(f"{repo_path}/"):
path_matches.append((repo_path, repo))
if not path_matches:
return None
path_matches.sort(key=lambda item: len(item[0]), reverse=True)
exact = [item for item in path_matches if path == item[0]]
if exact:
basename = Path(path).name
for _, repo in exact:
if repo.slug == basename:
return RepoMatch(repo.repo_id, repo.slug, "path_exact_slug", 0.85)
repo = exact[0][1]
return RepoMatch(repo.repo_id, repo.slug, "path_exact", 0.80)
repo = path_matches[0][1]
return RepoMatch(repo.repo_id, repo.slug, "path_prefix", 0.75)

View File

@@ -0,0 +1,71 @@
from __future__ import annotations
from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
def parse_iso(value: str) -> datetime:
raw = value.strip()
if raw.endswith("Z"):
raw = raw[:-1] + "+00:00"
if "T" not in raw:
raw = f"{raw}T00:00:00+00:00"
parsed = datetime.fromisoformat(raw)
if parsed.tzinfo is None:
parsed = parsed.replace(tzinfo=timezone.utc)
return parsed.astimezone(timezone.utc)
@dataclass
class TokenSourceRecord:
source_provider: str
source_id: str
source_path: Path
source_created_at: datetime | None
session_id: str | None = None
cwd: str | None = None
model: str | None = None
agent: str | None = None
tokens_in: int = 0
tokens_out: int = 0
cached_input_tokens: int = 0
reasoning_output_tokens: int = 0
raw_total_tokens: int | None = None
parser_version: str | None = None
confidence: float = 1.0
raw_metadata: dict[str, Any] = field(default_factory=dict)
@property
def tokens_total(self) -> int:
return self.tokens_in + self.tokens_out
def to_token_event_payload(self, repo_id: str | None = None) -> dict[str, Any]:
raw_total = self.raw_total_tokens
if raw_total is None:
raw_total = self.tokens_in + self.tokens_out
created_at = self.source_created_at.isoformat() if self.source_created_at else None
return {
"tokens_in": self.tokens_in,
"tokens_out": self.tokens_out,
"repo_id": repo_id,
"session_id": self.session_id,
"model": self.model,
"agent": self.agent,
"ref_type": "session",
"ref_id": self.source_id,
"note": f"measured:{self.source_provider}",
"created_at": created_at,
"measurement_kind": "measured",
"source_provider": self.source_provider,
"source_id": self.source_id,
"source_path": str(self.source_path),
"source_created_at": created_at,
"parser_version": self.parser_version,
"confidence": self.confidence,
"cached_input_tokens": self.cached_input_tokens,
"reasoning_output_tokens": self.reasoning_output_tokens,
"raw_total_tokens": raw_total,
"raw_metadata": self.raw_metadata,
}

View File

@@ -0,0 +1,120 @@
from __future__ import annotations
import json
from datetime import datetime
from pathlib import Path
from typing import Any
from api.services.token_sources.base import TokenSourceRecord, parse_iso
PARSER_VERSION = "claude-transcript-v1"
def iter_claude_transcript_files(claude_home: Path) -> list[Path]:
projects = claude_home / "projects"
if not projects.is_dir():
return []
return sorted(projects.glob("**/*.jsonl"))
def _usage_from_entry(entry: dict[str, Any]) -> dict[str, Any]:
message = entry.get("message")
if isinstance(message, dict) and isinstance(message.get("usage"), dict):
return message["usage"]
usage = entry.get("usage")
return usage if isinstance(usage, dict) else {}
def parse_claude_transcript(path: Path, since: datetime) -> TokenSourceRecord | None:
session_id = path.stem
cwd: str | None = None
model: str | None = None
first_at: datetime | None = None
last_at: datetime | None = None
tokens_in = tokens_out = 0
cached_input_tokens = 0
raw_total_tokens = 0
usage_records = 0
malformed_lines = 0
try:
handle = path.open("r", encoding="utf-8", errors="ignore")
except OSError:
return None
with handle:
for line in handle:
try:
entry: dict[str, Any] = json.loads(line)
except json.JSONDecodeError:
malformed_lines += 1
continue
ts = entry.get("timestamp") or entry.get("created_at")
parsed_ts = parse_iso(ts) if isinstance(ts, str) else None
if parsed_ts:
first_at = first_at or parsed_ts
last_at = parsed_ts
session_id = str(entry.get("session_id") or entry.get("conversation_id") or session_id)
cwd = entry.get("cwd") or entry.get("project_cwd") or cwd
model = entry.get("model") or model
message = entry.get("message")
if isinstance(message, dict):
model = message.get("model") or model
usage = _usage_from_entry(entry)
if not usage:
continue
if parsed_ts is not None and parsed_ts < since:
continue
input_tokens = int(usage.get("input_tokens") or 0)
cache_creation = int(usage.get("cache_creation_input_tokens") or 0)
cache_read = int(usage.get("cache_read_input_tokens") or 0)
output_tokens = int(usage.get("output_tokens") or 0)
if input_tokens == 0 and output_tokens == 0 and cache_creation == 0 and cache_read == 0:
continue
tokens_in += input_tokens
tokens_out += output_tokens
cached_input_tokens += cache_creation + cache_read
raw_total_tokens += input_tokens + cache_creation + cache_read + output_tokens
usage_records += 1
if usage_records == 0 or tokens_in + tokens_out + cached_input_tokens == 0:
return None
return TokenSourceRecord(
source_provider="claude_transcript",
source_id=f"claude:{session_id}",
source_path=path,
source_created_at=last_at,
session_id=session_id,
cwd=cwd,
model=model,
agent="claude",
tokens_in=tokens_in,
tokens_out=tokens_out,
cached_input_tokens=cached_input_tokens,
raw_total_tokens=raw_total_tokens or None,
parser_version=PARSER_VERSION,
confidence=1.0,
raw_metadata={
"started_at": first_at.isoformat() if first_at else None,
"usage_records": usage_records,
"malformed_lines": malformed_lines,
"source_file_name": path.name,
},
)
def collect_claude_transcripts(claude_home: Path, since: datetime) -> list[TokenSourceRecord]:
by_id: dict[str, TokenSourceRecord] = {}
for path in iter_claude_transcript_files(claude_home):
parsed = parse_claude_transcript(path, since)
if parsed is None:
continue
current = by_id.get(parsed.source_id)
if current is None or parsed.tokens_total > current.tokens_total:
by_id[parsed.source_id] = parsed
return sorted(by_id.values(), key=lambda item: item.source_created_at or datetime.min.replace(tzinfo=since.tzinfo))

View File

@@ -0,0 +1,124 @@
from __future__ import annotations
import json
from datetime import datetime
from pathlib import Path
from typing import Any
from api.services.token_sources.base import TokenSourceRecord, parse_iso
PARSER_VERSION = "codex-desktop-v1"
def iter_codex_session_files(codex_home: Path) -> list[Path]:
files: list[Path] = []
sessions = codex_home / "sessions"
archived = codex_home / "archived_sessions"
if sessions.is_dir():
files.extend(sorted(sessions.glob("*/*/*/*.jsonl")))
if archived.is_dir():
files.extend(sorted(archived.glob("*.jsonl")))
return files
def parse_codex_session(path: Path, since: datetime) -> TokenSourceRecord | None:
fallback_id = path.stem.removeprefix("rollout-")
session_id = fallback_id
started_at: datetime | None = None
last_at: datetime | None = None
cwd: str | None = None
model: str | None = None
tokens_in = tokens_out = 0
cached_input_tokens = reasoning_output_tokens = 0
raw_total_tokens = 0
usage_records = 0
malformed_lines = 0
try:
handle = path.open("r", encoding="utf-8", errors="ignore")
except OSError:
return None
with handle:
for line in handle:
try:
entry: dict[str, Any] = json.loads(line)
except json.JSONDecodeError:
malformed_lines += 1
continue
ts = entry.get("timestamp")
parsed_ts = parse_iso(ts) if isinstance(ts, str) else None
if parsed_ts:
last_at = parsed_ts
started_at = started_at or parsed_ts
payload = entry.get("payload") or {}
if entry.get("type") == "session_meta":
meta_id = payload.get("id")
if meta_id:
session_id = str(meta_id)
cwd = payload.get("cwd") or cwd
meta_ts = payload.get("timestamp")
if isinstance(meta_ts, str):
started_at = parse_iso(meta_ts)
elif entry.get("type") == "turn_context":
cwd = payload.get("cwd") or cwd
model = payload.get("model") or model
elif entry.get("type") == "event_msg" and payload.get("type") == "token_count":
if parsed_ts is None or parsed_ts < since:
continue
info = payload.get("info") or {}
last = info.get("last_token_usage") or {}
if not isinstance(last, dict):
continue
input_tokens = int(last.get("input_tokens") or 0)
output_tokens = int(last.get("output_tokens") or 0)
if input_tokens == 0 and output_tokens == 0:
continue
tokens_in += input_tokens
tokens_out += output_tokens
cached_input_tokens += int(last.get("cached_input_tokens") or 0)
reasoning_output_tokens += int(last.get("reasoning_output_tokens") or 0)
raw_total_tokens += int(last.get("total_tokens") or input_tokens + output_tokens)
usage_records += 1
last_at = parsed_ts
if usage_records == 0 or tokens_in + tokens_out == 0:
return None
return TokenSourceRecord(
source_provider="codex_session",
source_id=f"codex:{session_id}",
source_path=path,
source_created_at=last_at,
session_id=session_id,
cwd=cwd,
model=model,
agent="codex",
tokens_in=tokens_in,
tokens_out=tokens_out,
cached_input_tokens=cached_input_tokens,
reasoning_output_tokens=reasoning_output_tokens,
raw_total_tokens=raw_total_tokens or None,
parser_version=PARSER_VERSION,
confidence=1.0,
raw_metadata={
"started_at": started_at.isoformat() if started_at else None,
"usage_records": usage_records,
"malformed_lines": malformed_lines,
"source_file_name": path.name,
},
)
def collect_codex_sessions(codex_home: Path, since: datetime) -> list[TokenSourceRecord]:
by_id: dict[str, TokenSourceRecord] = {}
for path in iter_codex_session_files(codex_home):
parsed = parse_codex_session(path, since)
if parsed is None:
continue
current = by_id.get(parsed.source_id)
if current is None or parsed.tokens_total > current.tokens_total:
by_id[parsed.source_id] = parsed
return sorted(by_id.values(), key=lambda item: item.source_created_at or datetime.min.replace(tzinfo=since.tzinfo))

View File

@@ -9,79 +9,54 @@ const POLL = 60_000;
```
```js
// Fetch token events, by-repo summary, workstreams, and tasks in parallel
const evidenceSel = Inputs.radio(
["Measured only", "Active evidence", "All evidence"],
{value: "Measured only", label: "Evidence"}
);
const sortSel = Inputs.select(
["Tokens Total", "Event Count"],
{label: "Sort by"}
);
const maxSel = Inputs.select(
[10, 20, 50, 100, 500],
{value: 20, label: "Show"}
);
display(html`<div style="display:flex;gap:1.5rem;align-items:flex-end;flex-wrap:wrap;margin:0.5rem 0 1.5rem">${evidenceSel}${sortSel}${maxSel}</div>`);
const evidenceMode = view(evidenceSel);
const sortOrder = view(sortSel);
const maxResults = view(maxSel);
```
```js
function aggregatePath(mode) {
if (mode === "Measured only") return "/token-events/aggregate/?measurement_kind=measured&include_superseded=false";
if (mode === "All evidence") return "/token-events/aggregate/?include_superseded=true";
return "/token-events/aggregate/?include_superseded=false";
}
const tokenState = (async function*() {
let failures = 0;
while (true) {
let byRepo = [], events = [], wsMap = {}, taskMap = {}, ok = false;
let aggregate = null, quality = null, ok = false;
try {
const [r1, r2, r3, r4] = await Promise.all([
apiFetch("/token-events/by-repo/"),
apiFetch("/token-events/?limit=1000"),
apiFetch("/workstreams/"),
apiFetch("/tasks/"),
const [r1, r2] = await Promise.all([
apiFetch(aggregatePath(evidenceMode)),
apiFetch("/token-events/quality/"),
]);
ok = r1.ok && r2.ok;
if (ok) {
byRepo = await r1.json();
events = await r2.json();
}
if (r3.ok) {
const wsList = await r3.json();
for (const w of wsList) wsMap[w.id] = w;
}
if (r4.ok) {
const taskList = await r4.json();
for (const t of taskList) taskMap[t.id] = t;
aggregate = await r1.json();
quality = await r2.json();
}
} catch {}
failures = ok ? 0 : failures + 1;
yield {byRepo, events, wsMap, taskMap, ok, ts: new Date()};
yield {aggregate, quality, ok, ts: new Date()};
await waitForVisible(pollDelay({ok, base: POLL, failures}));
}
})();
```
```js
// Resolve an event's repo_id via the 3-level chain: direct → workstream → task→workstream
function resolveRepoId(e, wsMap, taskMap) {
if (e.repo_id) return e.repo_id;
const wsId = e.workstream_id ?? taskMap[e.task_id]?.workstream_id;
return wsId ? (wsMap[wsId]?.repo_id ?? null) : null;
}
function buildSummary(events) {
const byWs = {}, byModel = {}, byTask = {};
for (const e of events) {
const tot = (e.tokens_in || 0) + (e.tokens_out || 0);
if (e.workstream_id) {
byWs[e.workstream_id] = byWs[e.workstream_id] || {scope_id: e.workstream_id, tokens_in: 0, tokens_out: 0, event_count: 0};
byWs[e.workstream_id].tokens_in += e.tokens_in || 0;
byWs[e.workstream_id].tokens_out += e.tokens_out || 0;
byWs[e.workstream_id].event_count++;
}
const model = e.model || "unknown";
byModel[model] = (byModel[model] || 0) + tot;
if (e.task_id) {
byTask[e.task_id] = byTask[e.task_id] || {task_id: e.task_id, tokens_in: 0, tokens_out: 0, event_count: 0};
byTask[e.task_id].tokens_in += e.tokens_in || 0;
byTask[e.task_id].tokens_out += e.tokens_out || 0;
byTask[e.task_id].event_count++;
}
}
const toRows = obj => Object.values(obj)
.map(v => ({...v, tokens_total: (v.tokens_in || 0) + (v.tokens_out || 0)}))
.sort((a, b) => b.tokens_total - a.tokens_total);
return {
by_workstream: toRows(byWs),
by_model: Object.entries(byModel)
.map(([model, tokens_total]) => ({model, tokens_total}))
.sort((a, b) => b.tokens_total - a.tokens_total),
top_tasks: toRows(byTask),
total_events: events.length,
};
}
function nameCell(name, fullName) {
const s = String(name ?? fullName ?? "—");
const full = String(fullName ?? name ?? "—");
@@ -92,21 +67,40 @@ function nameCell(name, fullName) {
}
function sortRows(rows, sortField) {
if (sortField === "Tokens Total") return rows; // already sorted by buildSummary / by-repo API
const s = [...rows];
if (sortField === "Tokens In") s.sort((a, b) => (b.tokens_in || 0) - (a.tokens_in || 0));
else if (sortField === "Tokens Out") s.sort((a, b) => (b.tokens_out || 0) - (a.tokens_out || 0));
else if (sortField === "Event Count") s.sort((a, b) => (b.event_count || 0) - (a.event_count || 0));
else if (sortField === "Most Recent") s.sort((a, b) => (b._lastAt || 0) - (a._lastAt || 0));
if (sortField === "Event Count") s.sort((a, b) => (b.event_count || 0) - (a.event_count || 0));
else s.sort((a, b) => (b.tokens_total || 0) - (a.tokens_total || 0));
return s;
}
function dictRows(obj, labelKey) {
return Object.entries(obj ?? {})
.map(([label, tokens_total]) => ({[labelKey]: label, tokens_total}))
.sort((a, b) => b.tokens_total - a.tokens_total);
}
function metricRows(quality) {
if (!quality) return [];
return [
{metric: "Measured", value: quality.measured_event_count},
{metric: "Allocated", value: quality.allocated_event_count},
{metric: "Estimated", value: quality.estimated_event_count},
{metric: "Superseded", value: quality.superseded_event_count},
{metric: "Fallback", value: quality.fallback_event_count},
{metric: "Unattributed measured", value: quality.unattributed_measured_event_count},
{metric: "Missing provenance", value: quality.missing_provenance_event_count},
{metric: "Duplicate sources", value: quality.duplicate_source_count},
];
}
```
```js
const byRepo = tokenState.byRepo ?? [];
const events = tokenState.events ?? [];
const wsMap = tokenState.wsMap ?? {};
const taskMap = tokenState.taskMap ?? {};
const aggregate = tokenState.aggregate ?? {
tokens_in: 0, tokens_out: 0, tokens_total: 0, event_count: 0,
by_repo: [], by_workstream: [], by_task: [], by_model: [],
by_measurement_kind: {}, by_source_provider: {},
};
const quality = tokenState.quality ?? null;
const _ok = tokenState.ok ?? false;
const _ts = tokenState.ts;
```
@@ -115,66 +109,37 @@ const _ts = tokenState.ts;
```js
display(html`<div style="font-size:0.8rem;color:${_ok ? 'var(--theme-foreground-focus)' : 'red'}">
${_ok ? `Live · ${_ts?.toLocaleTimeString()} · ${events.length} events` : "API offline"}
${_ok ? `Live · ${_ts?.toLocaleTimeString()} · ${aggregate.event_count.toLocaleString()} events · ${aggregate.tokens_total.toLocaleString()} tokens` : "API offline"}
</div>`);
```
```js
const repoSel = Inputs.select(
["All repos", ...byRepo.map(r => r.repo_slug)],
{label: "Filter by repo"}
);
const sortSel = Inputs.select(
["Tokens Total", "Tokens In", "Tokens Out", "Event Count", "Most Recent"],
{label: "Sort by"}
);
const maxSel = Inputs.select(
[10, 20, 50, 100, 500],
{value: 20, label: "Show"}
);
display(html`<div style="display:flex;gap:1.5rem;align-items:flex-end;flex-wrap:wrap;margin:0.5rem 0 1.5rem">${repoSel}${sortSel}${maxSel}</div>`);
const repoFilter = view(repoSel);
const sortOrder = view(sortSel);
const maxResults = view(maxSel);
```
```js
// Build filtered and last-event-annotated row sets
const selectedRepoId = repoFilter === "All repos"
? null
: (byRepo.find(r => r.repo_slug === repoFilter)?.repo_id ?? null);
const filteredEvents = selectedRepoId
? events.filter(e => resolveRepoId(e, wsMap, taskMap) === selectedRepoId)
: events;
const lastAtByRepo = {}, lastAtByWs = {}, lastAtByTask = {};
for (const e of filteredEvents) {
const t = e.created_at ? new Date(e.created_at).getTime() : 0;
const rid = resolveRepoId(e, wsMap, taskMap);
if (rid) lastAtByRepo[rid] = Math.max(lastAtByRepo[rid] || 0, t);
if (e.workstream_id) lastAtByWs[e.workstream_id] = Math.max(lastAtByWs[e.workstream_id] || 0, t);
if (e.task_id) lastAtByTask[e.task_id] = Math.max(lastAtByTask[e.task_id] || 0, t);
}
const filteredByRepo = (selectedRepoId
? byRepo.filter(r => r.repo_id === selectedRepoId)
: byRepo
).map(r => ({...r, _lastAt: lastAtByRepo[r.repo_id] || 0}));
const summary = buildSummary(filteredEvents);
const wsRowsFull = summary.by_workstream.map(r => ({...r, _lastAt: lastAtByWs[r.scope_id] || 0}));
const taskRowsFull = summary.top_tasks.map(r => ({...r, _lastAt: lastAtByTask[r.task_id] || 0}));
display(html`<div style="display:grid;grid-template-columns:repeat(auto-fit,minmax(150px,1fr));gap:0.75rem;margin:1rem 0">
<div style="border:1px solid var(--theme-foreground-faint);border-radius:6px;padding:0.75rem">
<div style="font-size:0.75rem;color:var(--theme-foreground-muted)">Tokens</div>
<div style="font-size:1.4rem;font-weight:650">${aggregate.tokens_total.toLocaleString()}</div>
</div>
<div style="border:1px solid var(--theme-foreground-faint);border-radius:6px;padding:0.75rem">
<div style="font-size:0.75rem;color:var(--theme-foreground-muted)">Events</div>
<div style="font-size:1.4rem;font-weight:650">${aggregate.event_count.toLocaleString()}</div>
</div>
<div style="border:1px solid var(--theme-foreground-faint);border-radius:6px;padding:0.75rem">
<div style="font-size:0.75rem;color:var(--theme-foreground-muted)">Last Event</div>
<div style="font-size:1rem;font-weight:650">${aggregate.last_event_at ? new Date(aggregate.last_event_at).toLocaleString() : "—"}</div>
</div>
<div style="border:1px solid var(--theme-foreground-faint);border-radius:6px;padding:0.75rem">
<div style="font-size:0.75rem;color:var(--theme-foreground-muted)">Last Ingested</div>
<div style="font-size:1rem;font-weight:650">${aggregate.last_ingested_at ? new Date(aggregate.last_ingested_at).toLocaleString() : "—"}</div>
</div>
</div>`);
```
## By Repo
```js
{
const sorted = sortRows(filteredByRepo, sortOrder);
const total = sorted.length;
const rows = sorted.slice(0, maxResults);
const sorted = sortRows(aggregate.by_repo ?? [], sortOrder);
const rows = sorted.slice(0, maxResults);
if (rows.length === 0) {
display(html`<p style="color:var(--theme-foreground-muted)">No token events with repo association yet.</p>`);
} else {
@@ -184,40 +149,20 @@ const taskRowsFull = summary.top_tasks.map(r => ({...r, _lastAt: lastAtByTask
width: Math.min(900, width),
x: {label: "Tokens", tickFormat: "~s"},
y: {label: null},
color: {legend: true, domain: ["tokens_in", "tokens_out"], range: ["#4e79a7","#f28e2b"]},
marks: [
Plot.barX(
rows.flatMap(r => [
{repo: r.repo_slug, type: "tokens_in", value: r.tokens_in},
{repo: r.repo_slug, type: "tokens_out", value: r.tokens_out},
]),
{x: "value", y: "repo", fill: "type", tip: true}
),
],
marks: [Plot.barX(rows, {x: "tokens_total", y: "label", fill: "#4e79a7", tip: true})],
}));
display(Inputs.table(rows.map((r, i) => ({...r, _ref: i})), {
columns: ["_ref", "repo_slug", "tokens_in", "tokens_out", "tokens_total", "event_count"],
header: {
_ref: "REF",
repo_slug: "Repo",
tokens_in: "Tokens In",
tokens_out: "Tokens Out",
tokens_total: "Total",
event_count: "Events",
},
columns: ["_ref", "label", "tokens_in", "tokens_out", "tokens_total", "event_count"],
header: {_ref: "REF", label: "Repo", tokens_in: "Tokens In", tokens_out: "Tokens Out", tokens_total: "Total", event_count: "Events"},
format: {
_ref: (_, i) => refCell(i + 1, "repos", rows[i].repo_slug),
repo_slug: d => nameCell(d, d),
tokens_in: d => d.toLocaleString(),
tokens_out: d => d.toLocaleString(),
_ref: (_, i) => refCell(i + 1, "repos", rows[i].label),
label: d => nameCell(d, d),
tokens_in: d => d.toLocaleString(),
tokens_out: d => d.toLocaleString(),
tokens_total: d => d.toLocaleString(),
},
width: {_ref: 50, repo_slug: 160, tokens_in: 110, tokens_out: 110, tokens_total: 110, event_count: 80},
width: {_ref: 50, label: 160, tokens_in: 110, tokens_out: 110, tokens_total: 110, event_count: 80},
}));
if (total > maxResults)
display(html`<p style="font-size:0.8rem;color:var(--theme-foreground-muted);margin-top:0.25rem">Showing ${maxResults} of ${total} repos</p>`);
}
}
```
@@ -226,38 +171,48 @@ const taskRowsFull = summary.top_tasks.map(r => ({...r, _lastAt: lastAtByTask
```js
{
const sorted = sortRows(wsRowsFull, sortOrder);
const total = sorted.length;
const rows = sorted.slice(0, maxResults);
const sorted = sortRows(aggregate.by_workstream ?? [], sortOrder);
const rows = sorted.slice(0, maxResults);
if (rows.length === 0) {
display(html`<p style="color:var(--theme-foreground-muted)">No workstream data yet.</p>`);
} else {
display(Inputs.table(rows.map((r, i) => ({...r, _ref: i})), {
columns: ["_ref", "scope_id", "tokens_in", "tokens_out", "tokens_total", "event_count"],
header: {
_ref: "REF",
scope_id: "Workstream",
tokens_in: "Tokens In",
tokens_out: "Tokens Out",
tokens_total: "Total",
event_count: "Events",
},
columns: ["_ref", "label", "tokens_in", "tokens_out", "tokens_total", "event_count"],
header: {_ref: "REF", label: "Workstream", tokens_in: "Tokens In", tokens_out: "Tokens Out", tokens_total: "Total", event_count: "Events"},
format: {
_ref: (_, i) => refCell(i + 1, "workstreams", rows[i].scope_id),
scope_id: d => {
const ws = wsMap[d];
return nameCell(ws?.title ?? ws?.slug, d);
},
tokens_in: d => d.toLocaleString(),
tokens_out: d => d.toLocaleString(),
_ref: (_, i) => refCell(i + 1, "workstreams", rows[i].scope_id),
label: d => nameCell(d, d),
tokens_in: d => d.toLocaleString(),
tokens_out: d => d.toLocaleString(),
tokens_total: d => d.toLocaleString(),
},
width: {_ref: 50, scope_id: 200, tokens_in: 110, tokens_out: 110, tokens_total: 110, event_count: 80},
width: {_ref: 50, label: 240, tokens_in: 110, tokens_out: 110, tokens_total: 110, event_count: 80},
}));
}
}
```
if (total > maxResults)
display(html`<p style="font-size:0.8rem;color:var(--theme-foreground-muted);margin-top:0.25rem">Showing ${maxResults} of ${total} workstreams</p>`);
## By Evidence
```js
{
const kindRows = dictRows(aggregate.by_measurement_kind, "kind");
const sourceRows = dictRows(aggregate.by_source_provider, "source");
if (kindRows.length === 0 && sourceRows.length === 0) {
display(html`<p style="color:var(--theme-foreground-muted)">No evidence breakdown yet.</p>`);
} else {
display(html`<div style="display:grid;grid-template-columns:repeat(auto-fit,minmax(260px,1fr));gap:1rem">
<div>${Inputs.table(kindRows, {
columns: ["kind", "tokens_total"],
header: {kind: "Kind", tokens_total: "Tokens"},
format: {tokens_total: d => d.toLocaleString()},
})}</div>
<div>${Inputs.table(sourceRows, {
columns: ["source", "tokens_total"],
header: {source: "Source", tokens_total: "Tokens"},
format: {tokens_total: d => d.toLocaleString()},
})}</div>
</div>`);
}
}
```
@@ -265,18 +220,38 @@ const taskRowsFull = summary.top_tasks.map(r => ({...r, _lastAt: lastAtByTask
## By Model
```js
if (summary.by_model.length === 0) {
display(html`<p style="color:var(--theme-foreground-muted)">No model data yet.</p>`);
{
const rows = (aggregate.by_model ?? []).slice(0, maxResults);
if (rows.length === 0) {
display(html`<p style="color:var(--theme-foreground-muted)">No model data yet.</p>`);
} else {
display(Plot.plot({
title: "Token consumption by model",
marginLeft: 200,
width: Math.min(700, width),
x: {label: "Total tokens", tickFormat: "~s"},
marks: [Plot.barX(rows, {x: "tokens_total", y: "label", fill: "#59a14f", tip: true})],
}));
}
}
```
## Data Quality
```js
if (!quality) {
display(html`<p style="color:var(--theme-foreground-muted)">No quality data yet.</p>`);
} else {
display(Plot.plot({
title: "Token consumption by model",
marginLeft: 200,
width: Math.min(700, width),
x: {label: "Total tokens", tickFormat: "~s"},
marks: [
Plot.barX(summary.by_model, {x: "tokens_total", y: "model", fill: "#4e79a7", tip: true}),
],
display(Inputs.table(metricRows(quality), {
columns: ["metric", "value"],
header: {metric: "Signal", value: "Count"},
format: {value: d => d.toLocaleString()},
}));
display(html`<p style="font-size:0.8rem;color:var(--theme-foreground-muted)">
Codex: ${quality.last_codex_ingested_at ? new Date(quality.last_codex_ingested_at).toLocaleString() : "—"}
&nbsp;·&nbsp; Claude: ${quality.last_claude_ingested_at ? new Date(quality.last_claude_ingested_at).toLocaleString() : "—"}
&nbsp;·&nbsp; Reconcile: ${quality.last_reconciliation_at ? new Date(quality.last_reconciliation_at).toLocaleString() : "—"}
</p>`);
}
```
@@ -284,31 +259,23 @@ if (summary.by_model.length === 0) {
```js
{
const sorted = sortRows(taskRowsFull, sortOrder);
const total = sorted.length;
const rows = sorted.slice(0, maxResults);
const sorted = sortRows(aggregate.by_task ?? [], sortOrder);
const rows = sorted.slice(0, maxResults);
if (rows.length === 0) {
display(html`<p style="color:var(--theme-foreground-muted)">No task-level data yet.</p>`);
} else {
display(Inputs.table(rows.map((r, i) => ({...r, _ref: i})), {
columns: ["_ref", "task_id", "tokens_in", "tokens_out", "tokens_total"],
header: {_ref: "REF", task_id: "Task", tokens_in: "In", tokens_out: "Out", tokens_total: "Total"},
columns: ["_ref", "label", "tokens_in", "tokens_out", "tokens_total"],
header: {_ref: "REF", label: "Task", tokens_in: "In", tokens_out: "Out", tokens_total: "Total"},
format: {
_ref: (_, i) => refCell(i + 1, "tasks", rows[i].task_id),
task_id: d => {
const task = taskMap[d];
return nameCell(task?.title, d);
},
tokens_in: d => d.toLocaleString(),
tokens_out: d => d.toLocaleString(),
_ref: (_, i) => refCell(i + 1, "tasks", rows[i].scope_id),
label: d => nameCell(d, d),
tokens_in: d => d.toLocaleString(),
tokens_out: d => d.toLocaleString(),
tokens_total: d => d.toLocaleString(),
},
width: {_ref: 50, task_id: 240},
width: {_ref: 50, label: 260},
}));
if (total > maxResults)
display(html`<p style="font-size:0.8rem;color:var(--theme-foreground-muted);margin-top:0.25rem">Showing ${maxResults} of ${total} tasks</p>`);
}
}
```

View File

@@ -23,10 +23,14 @@ if (raw.error) {
display(html`<p style="margin-top:0"><a href="/token-cost">← Token Cost</a></p>`);
const FIELD_ORDER = [
"id","tokens_in","tokens_out","tokens_total",
"id","measurement_kind","source_provider","source_id",
"tokens_in","tokens_out","tokens_total","token_evidence_total",
"cached_input_tokens","reasoning_output_tokens","raw_total_tokens",
"note","model","agent","session_id",
"task_id","workstream_id","repo_id",
"ref_type","ref_id","created_at",
"ref_type","ref_id","source_path","source_created_at",
"parser_version","confidence","ingested_at","created_at",
"raw_metadata",
];
const rows = FIELD_ORDER.map(k => fieldRow(k, raw[k] ?? null));

View File

@@ -0,0 +1,75 @@
# State Hub Multi-User Access Model
State Hub is local-first coordination infrastructure. It reflects repo-backed
workplans, progress, and operational state; it is not the authority for source
control, host access, identity, or runtime secret custody.
## Decision
For the current phase, enforce user access through the systems that already own
the boundary:
- Gitea controls repository read/write rights.
- SSH authorized keys control host access.
- ops-bridge controls whether a remote machine can reach local services.
- OpenBao controls runtime secret custody after bootstrap.
State Hub API authentication is deferred until there is an active external
collaborator or an exposed deployment that needs per-user write enforcement.
Until then, State Hub stays private to local or tunneled operator networks.
## Roles
| Role | State Hub access | Source of authority |
|------|------------------|---------------------|
| Primary operator | Full read/write across domains | host access, repo ownership, operator secret custody |
| Domain collaborator | Read all public coordination state; write through owned domain repo and approved hub actions | Gitea repo permissions plus SSH/tunnel authorization |
| Observer | Read-only brief/dashboard access where explicitly exposed | tunnel or future API token |
## Current Enforcement Boundary
1. Repo files remain authoritative. A collaborator can change workplans only in
repos where Gitea allows them to push.
2. State Hub indexes files and records progress events, but it should not become
the primary identity authority.
3. Direct dashboard/API access is private by default. Do not publish State Hub
unauthenticated on the public internet.
4. Runtime secrets, service account keys, database credentials, and package
tokens should move into OpenBao after the OpenBao bootstrap, unseal, audit,
and recovery procedure is complete.
## Future API Auth Trigger
Add API-layer auth when one of these becomes true:
- a second human needs direct State Hub API/dashboard mutation rights
- State Hub is exposed beyond localhost or a tightly controlled SSH tunnel
- automation needs per-consumer attribution and revocation independent of repo
commits
- domain-scoped write checks are needed at request time
## Future Token Shape
When the trigger is reached, implement a small token model rather than a full
identity provider inside State Hub:
- accept NetKingdom IAM Profile OIDC tokens when the identity plane is ready
- support one emergency local admin token for break-glass operation
- map claims to `primary_operator`, `domain_collaborator`, or `observer`
- enforce domain write scopes in mutating endpoints
- keep repo permissions as the durable source of contribution authority
Candidate scopes:
```text
statehub:read
statehub:write
statehub:domain:<slug>:write
statehub:admin
```
## Operator Rule
Do not store collaborator credentials in the State Hub database. Store secrets
in OpenBao or the approved bootstrap bundle, and store source permissions in
Gitea.

212
docs/onboarding.md Normal file
View File

@@ -0,0 +1,212 @@
# State Hub Onboarding
This guide turns a new machine into a usable State Hub operator or collaborator
environment. It covers local credentials, SSH reachability, Gitea access, and
Claude Code MCP registration.
State Hub remains a coordination read/cache layer. Repo permissions, SSH
access, and controlled tunnels are the first access boundary. OpenBao is the
runtime secret authority for platform and workload secrets once its bootstrap
ceremony is complete.
## Quick Start
Clone the repo, then run the bootstrap script:
```bash
git clone https://gitea.coulomb.social/coulomb/state-hub.git ~/state-hub
cd ~/state-hub
make bootstrap-env
```
On a clean Ubuntu 24.04 machine, allow package installation explicitly:
```bash
make bootstrap-env ARGS="--install-missing"
```
For a remote machine that reaches State Hub through ops-bridge:
```bash
make bridges
make register-mcp MCP_URL=http://127.0.0.1:18001/sse API_BASE=http://127.0.0.1:18000
```
Restart Claude Code after MCP registration.
## Primary Operator: New Machine
1. Install minimal host prerequisites:
```bash
sudo apt-get update
sudo apt-get install -y git curl openssh-client make python3
```
2. Clone `state-hub` and any domain repo you expect to operate:
```bash
git clone https://gitea.coulomb.social/coulomb/state-hub.git ~/state-hub
git clone https://gitea.coulomb.social/coulomb/the-custodian.git ~/the-custodian
```
3. Run the bootstrap:
```bash
cd ~/state-hub
make bootstrap-env ARGS="--install-missing"
```
The script will:
- check required tools
- configure `git credential.helper`
- create `~/.ssh/id_ed25519` when missing
- print the public key for managed hosts
- create `~/.railiance_gitea.conf` when you provide a Gitea token
- register the State Hub MCP server for Claude Code
- check State Hub API reachability
4. Authorize the SSH key on managed hosts. If password or existing key access
is available, rerun:
```bash
make bootstrap-env ARGS="--authorize-ssh --skip-gitea --skip-mcp"
```
Default targets:
- `tegwick@92.205.62.239` for Railiance01
- `tegwick@92.205.130.254` for CoulombCore
5. Start or connect to State Hub:
```bash
make api
make mcp-http
```
If the hub is remote, use ops-bridge:
```bash
make bridges
```
6. Restart Claude Code and verify that `state-hub` appears in the MCP server
list. In the first session, call `get_state_summary()` when MCP tools are
available. If not, use:
```bash
cat .custodian-brief.md
curl -s "http://127.0.0.1:8000/workstreams/?status=active" | python3 -m json.tool
```
## Domain Collaborator: New Person
1. Get a Gitea account with write access to the relevant domain repo.
2. Clone this repo and the domain repo:
```bash
git clone https://gitea.coulomb.social/coulomb/state-hub.git ~/state-hub
git clone https://gitea.coulomb.social/coulomb/<domain-repo>.git ~/<domain-repo>
```
3. Run the bootstrap:
```bash
cd ~/state-hub
make bootstrap-env
```
4. Send the printed SSH public key to the operator, or authorize it yourself if
you already have host access:
```bash
ssh-copy-id -i ~/.ssh/id_ed25519.pub tegwick@92.205.62.239
```
5. Bring up the State Hub tunnel when direct local access is unavailable:
```bash
make bridges
make register-mcp MCP_URL=http://127.0.0.1:18001/sse API_BASE=http://127.0.0.1:18000
```
6. Restart Claude Code, open the domain repo, and orient from the repo brief:
```bash
cat .custodian-brief.md
```
7. Contribute work through repo-backed workplans. A new workplan lives under
`workplans/` and follows ADR-001. The hub indexes files; the files remain
authoritative.
## Credential Helper Choices
`make bootstrap-env` configures Git credentials only when no global helper is
already set.
Default behavior:
- use `libsecret` when the helper exists
- otherwise use `credential.helper=cache --timeout=3600`
For headless hosts where a persistent plaintext helper is acceptable:
```bash
make bootstrap-env ARGS="--git-helper store --allow-plaintext-store"
```
Prefer SSH remotes or a keyring-backed helper for normal operator machines.
## Gitea Token File
Some Railiance scripts read `~/.railiance_gitea.conf`:
```bash
GITEA_URL="http://92.205.130.254:32166"
GITEA_USER="<user>"
GITEA_TOKEN="<token>"
```
Required token capabilities depend on the action:
- repo creation needs `read:user` and repository write/admin scope
- package publishing needs package write scope
- inventory reads need repository read scope
The bootstrap script writes this file with mode `0600` and does not print the
token.
## MCP Registration
Local registration:
```bash
make register-mcp
```
Tunnel registration:
```bash
make register-mcp MCP_URL=http://127.0.0.1:18001/sse API_BASE=http://127.0.0.1:18000
```
The current State Hub MCP transport is SSE. The old `.mcp.json`/stdio flow is
legacy; use `make mcp-http` to run the SSE service on `127.0.0.1:8001`.
## Verification Checklist
Run these checks after bootstrap:
```bash
git config --global --get credential.helper
test -f ~/.ssh/id_ed25519.pub
test -f ~/.railiance_gitea.conf
curl -fsS http://127.0.0.1:8000/state/health || curl -fsS http://127.0.0.1:18000/state/health
make register-mcp DRY_RUN=1
```
Then restart Claude Code and confirm that the `state-hub` MCP server is
available.

View File

@@ -0,0 +1,57 @@
# Token Evidence Model
State Hub token events distinguish source-backed measurements from inferred
operational signals. Dashboards and reports should use structured fields for
quality and provenance; `note` remains human context only.
## Measurement Kinds
| Kind | Meaning | Default confidence |
| --- | --- | --- |
| `measured` | Parsed from a source that reports usage metadata, such as Codex session logs or Claude transcript usage blocks. | `1.0` |
| `allocated` | A share of a larger known total, assigned to a task/workstream by a documented allocation method. | `0.70` |
| `estimated` | A fallback or operator-entered estimate without direct source evidence. | `0.35` |
| `superseded` | Historical rows retained for audit but excluded from active totals. | `0.0` |
## Source Providers
| Provider | Source |
| --- | --- |
| `codex_session` | Codex Desktop `.codex/sessions/**` and `.codex/archived_sessions/**` JSONL token_count events. |
| `claude_transcript` | Claude Code `.claude/projects/**/*.jsonl` usage metadata. Transcript text is never stored. |
| `llm_connect` | Future llm-connect usage metadata. |
| `manual` | Explicit operator/API input. |
| `task_fallback` | Fixed task-completion fallback rows created when no source data is available. |
## Provenance Fields
Each source-backed row should include:
- `source_provider`, `source_id`, `source_path`, `source_created_at`
- `parser_version`, `ingested_at`, `confidence`
- `cached_input_tokens`, `reasoning_output_tokens`, `raw_total_tokens`
- `raw_metadata` with parser and attribution metadata, never transcript content
`tokens_in + tokens_out` remains the default active total. Cached input and
reasoning output are preserved separately so dashboards can show both default
and provider-style totals without rewriting history.
## Idempotency
Measured sources must be written with a stable `source_id`. State Hub enforces
one row for each `(measurement_kind, source_provider, source_id)` tuple and
`POST /token-events/upsert` updates a growing live session rather than creating
duplicates.
## Migration Playbook
1. Run the token-event provenance migration.
2. Run `python3 scripts/token_reconcile.py --since 2026-05-19` and inspect the
dry-run report.
3. Run `python3 scripts/token_reconcile.py --since 2026-05-19 --apply` to
upsert measured Codex/Claude source rows.
4. Run the same command with `--zero-superseded-fallbacks` only after measured
source rows cover the affected window.
5. Check `/token-events/quality/` or the Token Cost dashboard for fallback,
missing-provenance, duplicate-source, and unattributed measured signals.
6. Keep historical fallback rows as `superseded`; do not delete them.

View File

@@ -0,0 +1,128 @@
"""add token event provenance fields
Revision ID: v9q0r1s2t3u4
Revises: u8p9q0r1s2t3
Create Date: 2026-05-23
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
revision = "v9q0r1s2t3u4"
down_revision = "u8p9q0r1s2t3"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.add_column(
"token_events",
sa.Column("measurement_kind", sa.Text(), nullable=False, server_default="estimated"),
)
op.add_column(
"token_events",
sa.Column("source_provider", sa.Text(), nullable=False, server_default="manual"),
)
op.add_column("token_events", sa.Column("source_id", sa.Text(), nullable=True))
op.add_column("token_events", sa.Column("source_path", sa.Text(), nullable=True))
op.add_column(
"token_events",
sa.Column("source_created_at", sa.TIMESTAMP(timezone=True), nullable=True),
)
op.add_column(
"token_events",
sa.Column("ingested_at", sa.TIMESTAMP(timezone=True), nullable=False, server_default=sa.text("now()")),
)
op.add_column("token_events", sa.Column("parser_version", sa.Text(), nullable=True))
op.add_column(
"token_events",
sa.Column("confidence", sa.Float(), nullable=False, server_default="0.35"),
)
op.add_column(
"token_events",
sa.Column("cached_input_tokens", sa.Integer(), nullable=False, server_default="0"),
)
op.add_column(
"token_events",
sa.Column("reasoning_output_tokens", sa.Integer(), nullable=False, server_default="0"),
)
op.add_column("token_events", sa.Column("raw_total_tokens", sa.Integer(), nullable=True))
op.add_column("token_events", sa.Column("cost_estimated_usd", sa.Float(), nullable=True))
op.add_column(
"token_events",
sa.Column(
"raw_metadata",
postgresql.JSONB(astext_type=sa.Text()),
nullable=False,
server_default=sa.text("'{}'::jsonb"),
),
)
op.execute(
"""
UPDATE token_events
SET
measurement_kind = CASE
WHEN note = 'heuristic_superseded_by_codex_backfill' THEN 'superseded'
WHEN note = 'workplan' THEN 'allocated'
WHEN note = 'heuristic' THEN 'estimated'
WHEN note = 'measured' OR note LIKE 'backfill:codex-session%' THEN 'measured'
ELSE measurement_kind
END,
source_provider = CASE
WHEN note = 'heuristic' THEN 'task_fallback'
WHEN note LIKE 'backfill:codex-session%' OR ref_id LIKE 'codex:%' THEN 'codex_session'
WHEN note = 'measured' AND agent ILIKE '%claude%' THEN 'claude_transcript'
ELSE source_provider
END,
source_id = CASE
WHEN source_id IS NULL AND (note LIKE 'backfill:codex-session%' OR ref_id LIKE 'codex:%')
THEN ref_id
ELSE source_id
END,
raw_total_tokens = CASE
WHEN raw_total_tokens IS NULL THEN tokens_in + tokens_out
ELSE raw_total_tokens
END,
confidence = CASE
WHEN note = 'heuristic_superseded_by_codex_backfill' THEN 0.0
WHEN note = 'heuristic' THEN 0.35
WHEN note = 'workplan' THEN 0.70
WHEN note = 'measured' OR note LIKE 'backfill:codex-session%' THEN 1.0
ELSE confidence
END
"""
)
op.create_index("ix_token_events_measurement_kind", "token_events", ["measurement_kind"])
op.create_index("ix_token_events_source_provider", "token_events", ["source_provider"])
op.create_index("ix_token_events_source_id", "token_events", ["source_id"])
op.create_index("ix_token_events_source_created_at", "token_events", ["source_created_at"])
op.create_index("ix_token_events_ingested_at", "token_events", ["ingested_at"])
op.create_unique_constraint(
"uq_token_events_source_identity",
"token_events",
["measurement_kind", "source_provider", "source_id"],
)
def downgrade() -> None:
op.drop_constraint("uq_token_events_source_identity", "token_events", type_="unique")
op.drop_index("ix_token_events_ingested_at", table_name="token_events")
op.drop_index("ix_token_events_source_created_at", table_name="token_events")
op.drop_index("ix_token_events_source_id", table_name="token_events")
op.drop_index("ix_token_events_source_provider", table_name="token_events")
op.drop_index("ix_token_events_measurement_kind", table_name="token_events")
op.drop_column("token_events", "raw_metadata")
op.drop_column("token_events", "cost_estimated_usd")
op.drop_column("token_events", "raw_total_tokens")
op.drop_column("token_events", "reasoning_output_tokens")
op.drop_column("token_events", "cached_input_tokens")
op.drop_column("token_events", "confidence")
op.drop_column("token_events", "parser_version")
op.drop_column("token_events", "ingested_at")
op.drop_column("token_events", "source_created_at")
op.drop_column("token_events", "source_path")
op.drop_column("token_events", "source_id")
op.drop_column("token_events", "source_provider")
op.drop_column("token_events", "measurement_kind")

View File

@@ -0,0 +1,33 @@
"""assign legacy source ids to measured token events
Revision ID: w0r1s2t3u4v5
Revises: v9q0r1s2t3u4
Create Date: 2026-05-23
"""
from alembic import op
revision = "w0r1s2t3u4v5"
down_revision = "v9q0r1s2t3u4"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.execute(
"""
UPDATE token_events
SET source_id = source_provider || ':legacy:' || id::text
WHERE measurement_kind = 'measured'
AND source_id IS NULL
"""
)
def downgrade() -> None:
op.execute(
"""
UPDATE token_events
SET source_id = NULL
WHERE source_id = source_provider || ':legacy:' || id::text
"""
)

View File

@@ -0,0 +1,192 @@
#!/usr/bin/env python3
"""Backfill State Hub token events from local Codex session logs.
The parser lives in ``api.services.token_sources.codex`` so this CLI only
handles operator flags, repo attribution, idempotent writes, and fallback
cleanup.
"""
from __future__ import annotations
import argparse
import json
import os
import sys
import urllib.parse
import urllib.request
from pathlib import Path
from typing import Any
ROOT = Path(__file__).resolve().parent.parent
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
from api.services.token_sources import collect_codex_sessions, parse_iso # noqa: E402
from api.services.token_sources.attribution import repo_refs_from_api, resolve_repo # noqa: E402
DEFAULT_API = os.environ.get("STATE_HUB_API", "http://127.0.0.1:8000")
BACKFILL_NOTE = "backfill:codex-session"
SUPERSEDED_HEURISTIC_NOTE = "heuristic_superseded_by_codex_backfill"
def http_json(api_base: str, method: str, path: str, body: dict[str, Any] | None = None) -> Any:
url = f"{api_base.rstrip('/')}/{path.lstrip('/')}"
data = None
headers = {"Content-Type": "application/json"}
if body is not None:
data = json.dumps(body).encode("utf-8")
req = urllib.request.Request(url, data=data, headers=headers, method=method)
with urllib.request.urlopen(req, timeout=30) as resp:
return json.loads(resp.read() or b"null")
def find_codex_home(explicit: str | None) -> Path:
candidates: list[Path] = []
if explicit:
candidates.append(Path(explicit))
env_home = os.environ.get("CODEX_HOME")
if env_home:
candidates.append(Path(env_home))
candidates.extend(
[
Path.home() / ".codex",
Path("/mnt/c/Users/bernd.worsch/.codex"),
]
)
for candidate in candidates:
if candidate.is_dir():
return candidate
raise SystemExit("Could not find Codex home; pass --codex-home")
def list_events(api_base: str, params: dict[str, Any]) -> list[dict[str, Any]]:
events: list[dict[str, Any]] = []
offset = 0
while True:
page_params = {**params, "limit": 1000, "offset": offset}
encoded = urllib.parse.urlencode(page_params)
page = http_json(api_base, "GET", f"/token-events/?{encoded}")
if not isinstance(page, list) or not page:
break
events.extend(page)
if len(page) < 1000:
break
offset += 1000
return events
def existing_codex_events(api_base: str) -> dict[str, dict[str, Any]]:
events = list_events(
api_base,
{"source_provider": "codex_session", "include_superseded": "true"},
)
by_source: dict[str, dict[str, Any]] = {}
for event in events:
source_id = event.get("source_id") or event.get("ref_id")
if isinstance(source_id, str):
by_source[source_id] = event
return by_source
def fetch_heuristics(api_base: str, since: str) -> list[dict[str, Any]]:
return list_events(
api_base,
{
"source_provider": "task_fallback",
"note": "heuristic",
"since": since,
"include_superseded": "false",
},
)
def patch_superseded_heuristic(api_base: str, event_id: str) -> None:
http_json(
api_base,
"PATCH",
f"/token-events/{event_id}",
{
"tokens_in": 0,
"tokens_out": 0,
"note": SUPERSEDED_HEURISTIC_NOTE,
"measurement_kind": "superseded",
"source_provider": "task_fallback",
"confidence": 0.0,
"raw_total_tokens": 0,
},
)
def main() -> int:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--since", default="2026-05-19", help="UTC date/time to backfill from")
parser.add_argument("--api-base", default=DEFAULT_API)
parser.add_argument("--codex-home")
parser.add_argument("--apply", action="store_true", help="write backfill events")
parser.add_argument(
"--zero-heuristics",
action="store_true",
help="set post-since heuristic task fallback events to zero after backfill",
)
args = parser.parse_args()
since = parse_iso(args.since)
since_param = since.isoformat()
codex_home = find_codex_home(args.codex_home)
repo_refs = repo_refs_from_api(http_json(args.api_base, "GET", "/repos/"))
existing = existing_codex_events(args.api_base)
sessions = collect_codex_sessions(codex_home, since)
planned: list[tuple[str, Any, str | None, str | None]] = []
by_repo: dict[str, list[int]] = {}
for session in sessions:
event = existing.get(session.source_id)
existing_total = (event.get("tokens_in", 0) + event.get("tokens_out", 0)) if event else 0
action = "create" if event is None else ("update" if session.tokens_total > existing_total else "skip")
match = resolve_repo(session.cwd, repo_refs)
repo_id = match.repo_id if match else None
repo_slug = match.slug if match else None
if action != "skip":
planned.append((action, session, repo_id, repo_slug))
label = repo_slug or "(unattributed)"
totals = by_repo.setdefault(label, [0, 0, 0])
totals[0] += 1
totals[1] += session.tokens_in
totals[2] += session.tokens_out
heuristics = fetch_heuristics(args.api_base, since_param) if args.zero_heuristics else []
print(f"codex_home: {codex_home}")
print(f"since: {since.isoformat()}")
print(f"sessions found: {len(sessions)}")
print(f"backfill events to create: {sum(1 for action, *_ in planned if action == 'create')}")
print(f"backfill events to update: {sum(1 for action, *_ in planned if action == 'update')}")
for repo_slug, (count, tokens_in, tokens_out) in sorted(by_repo.items()):
print(f" {repo_slug}: {count} sessions, {tokens_in + tokens_out:,} tokens")
if args.zero_heuristics:
total = sum((e.get("tokens_in") or 0) + (e.get("tokens_out") or 0) for e in heuristics)
print(f"heuristic events to zero: {len(heuristics)} ({total:,} tokens)")
if not args.apply:
print("dry run only; pass --apply to write changes")
return 0
for _action, session, repo_id, repo_slug in planned:
payload = session.to_token_event_payload(repo_id=repo_id)
payload["note"] = BACKFILL_NOTE
payload["raw_metadata"] = {
**payload.get("raw_metadata", {}),
"repo_slug": repo_slug,
"attribution_method": resolve_repo(session.cwd, repo_refs).method if resolve_repo(session.cwd, repo_refs) else None,
}
http_json(args.api_base, "POST", "/token-events/upsert", payload)
for event in heuristics:
patch_superseded_heuristic(args.api_base, event["id"])
print(f"upserted {len(planned)} backfill events")
if args.zero_heuristics:
print(f"zeroed {len(heuristics)} heuristic events")
return 0
if __name__ == "__main__":
raise SystemExit(main())

369
scripts/bootstrap-env.sh Executable file
View File

@@ -0,0 +1,369 @@
#!/usr/bin/env bash
set -euo pipefail
STATE_HUB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
GITEA_CONF="${GITEA_CONF:-$HOME/.railiance_gitea.conf}"
GITEA_URL="${GITEA_URL:-http://92.205.130.254:32166}"
GITEA_USER="${GITEA_USER:-}"
GITEA_TOKEN="${GITEA_TOKEN:-}"
GIT_HELPER="${GIT_HELPER:-auto}"
INSTALL_MISSING=0
NON_INTERACTIVE=0
DRY_RUN=0
AUTHORIZE_SSH=0
ALLOW_PLAINTEXT_STORE=0
SKIP_GITEA=0
SKIP_MCP=0
SSH_KEY="${SSH_KEY:-$HOME/.ssh/id_ed25519}"
SSH_TARGETS=(
"tegwick@92.205.62.239"
"tegwick@92.205.130.254"
)
usage() {
cat <<'USAGE'
Usage: scripts/bootstrap-env.sh [options]
Idempotently prepares a State Hub operator or collaborator environment.
Options:
--install-missing Install missing apt packages when possible.
--non-interactive Do not prompt; warn instead of asking for secrets.
--dry-run Show intended actions without changing local config.
--git-helper MODE auto, libsecret, cache, or store. Default: auto.
--allow-plaintext-store Allow git credential.helper=store in auto mode.
--authorize-ssh Run ssh-copy-id for configured SSH targets.
--ssh-target USER@HOST Add an SSH authorization target. May repeat.
--gitea-url URL Gitea base URL for ~/.railiance_gitea.conf.
--gitea-user USER Gitea user for ~/.railiance_gitea.conf.
--gitea-token TOKEN Gitea token; otherwise prompted when interactive.
--skip-gitea Do not create or update ~/.railiance_gitea.conf.
--skip-mcp Do not run make register-mcp.
-h, --help Show this help.
USAGE
}
ok() { printf '[OK] %s\n' "$*"; }
warn() { printf '[WARN] %s\n' "$*"; }
err() { printf '[ERR] %s\n' "$*" >&2; }
step() { printf '\n==> %s\n' "$*"; }
run() {
if [ "$DRY_RUN" -eq 1 ]; then
printf 'DRY-RUN: %s\n' "$*"
else
"$@"
fi
}
need_arg() {
if [ -z "${2:-}" ]; then
err "$1 requires a value"
exit 2
fi
}
while [ "$#" -gt 0 ]; do
case "$1" in
--install-missing)
INSTALL_MISSING=1
shift
;;
--non-interactive)
NON_INTERACTIVE=1
shift
;;
--dry-run)
DRY_RUN=1
shift
;;
--git-helper)
need_arg "$1" "${2:-}"
GIT_HELPER="$2"
shift 2
;;
--allow-plaintext-store)
ALLOW_PLAINTEXT_STORE=1
shift
;;
--authorize-ssh)
AUTHORIZE_SSH=1
shift
;;
--ssh-target)
need_arg "$1" "${2:-}"
SSH_TARGETS+=("$2")
shift 2
;;
--gitea-url)
need_arg "$1" "${2:-}"
GITEA_URL="$2"
shift 2
;;
--gitea-user)
need_arg "$1" "${2:-}"
GITEA_USER="$2"
shift 2
;;
--gitea-token)
need_arg "$1" "${2:-}"
GITEA_TOKEN="$2"
shift 2
;;
--skip-gitea)
SKIP_GITEA=1
shift
;;
--skip-mcp)
SKIP_MCP=1
shift
;;
-h|--help)
usage
exit 0
;;
*)
err "unknown argument: $1"
usage >&2
exit 2
;;
esac
done
case "$GIT_HELPER" in
auto|libsecret|cache|store) ;;
*)
err "--git-helper must be auto, libsecret, cache, or store"
exit 2
;;
esac
apt_install() {
local packages=("$@")
if [ "$INSTALL_MISSING" -ne 1 ]; then
warn "Missing packages: ${packages[*]}"
warn "Rerun with --install-missing or install them manually."
return
fi
if ! command -v sudo >/dev/null 2>&1; then
warn "sudo is not available; cannot install: ${packages[*]}"
return
fi
run sudo apt-get update
run sudo apt-get install -y "${packages[@]}"
}
check_commands() {
step "Checking prerequisites"
local missing=()
local commands=(git curl ssh-keygen ssh-copy-id python3 make)
local optional=(sops age helm kubectl uv claude)
for cmd in "${commands[@]}"; do
if command -v "$cmd" >/dev/null 2>&1; then
ok "$cmd found"
else
missing+=("$cmd")
warn "$cmd missing"
fi
done
for cmd in "${optional[@]}"; do
if command -v "$cmd" >/dev/null 2>&1; then
ok "$cmd found"
else
warn "$cmd missing"
fi
done
if [ "${#missing[@]}" -gt 0 ]; then
apt_install "${missing[@]}"
fi
}
libsecret_helper_path() {
local candidates=(
"/usr/share/doc/git/contrib/credential/libsecret/git-credential-libsecret"
"/usr/lib/git-core/git-credential-libsecret"
"/usr/libexec/git-core/git-credential-libsecret"
)
local candidate
for candidate in "${candidates[@]}"; do
if [ -x "$candidate" ]; then
printf '%s\n' "$candidate"
return 0
fi
done
return 1
}
build_libsecret_helper() {
local source_dir="/usr/share/doc/git/contrib/credential/libsecret"
if [ ! -d "$source_dir" ]; then
apt_install libsecret-1-0 libsecret-1-dev make gcc
fi
if [ -d "$source_dir" ]; then
run sudo make -C "$source_dir"
fi
}
configure_git_helper() {
step "Configuring Git credential helper"
local current
current="$(git config --global --get credential.helper || true)"
if [ -n "$current" ]; then
ok "credential.helper already set: $current"
return
fi
local helper="$GIT_HELPER"
if [ "$helper" = "auto" ]; then
if libsecret_helper_path >/dev/null 2>&1; then
helper="libsecret"
elif [ "$ALLOW_PLAINTEXT_STORE" -eq 1 ]; then
helper="store"
else
helper="cache"
fi
fi
case "$helper" in
libsecret)
local path
path="$(libsecret_helper_path || true)"
if [ -z "$path" ]; then
build_libsecret_helper
path="$(libsecret_helper_path || true)"
fi
if [ -z "$path" ]; then
warn "libsecret helper is not available; using cache helper for this machine."
run git config --global credential.helper "cache --timeout=3600"
else
run git config --global credential.helper "$path"
fi
;;
cache)
run git config --global credential.helper "cache --timeout=3600"
;;
store)
if [ "$ALLOW_PLAINTEXT_STORE" -ne 1 ]; then
err "credential.helper=store writes plaintext credentials."
err "Rerun with --allow-plaintext-store if that is intended for this host."
exit 1
fi
run git config --global credential.helper store
;;
esac
ok "credential.helper configured"
}
setup_ssh_key() {
step "Checking SSH key"
mkdir -p "$HOME/.ssh"
chmod 700 "$HOME/.ssh"
if [ -f "$SSH_KEY" ]; then
ok "SSH key exists: $SSH_KEY"
else
run ssh-keygen -t ed25519 -f "$SSH_KEY" -N "" -C "$USER@$(hostname)-state-hub"
ok "SSH key generated: $SSH_KEY"
fi
if [ -f "${SSH_KEY}.pub" ]; then
printf '\nPublic key to authorize on managed hosts:\n\n'
sed 's/^/ /' "${SSH_KEY}.pub"
printf '\n'
fi
if [ "$AUTHORIZE_SSH" -eq 1 ]; then
local target
for target in "${SSH_TARGETS[@]}"; do
run ssh-copy-id -i "${SSH_KEY}.pub" "$target"
done
else
warn "SSH authorization not attempted. Use --authorize-ssh after confirming host access."
fi
}
write_gitea_conf() {
step "Checking Gitea config"
if [ "$SKIP_GITEA" -eq 1 ]; then
warn "Skipping Gitea config by request."
return
fi
if [ -f "$GITEA_CONF" ]; then
chmod 600 "$GITEA_CONF"
ok "$GITEA_CONF already exists"
return
fi
if [ -z "$GITEA_USER" ] && [ "$NON_INTERACTIVE" -eq 0 ]; then
read -r -p "Gitea username: " GITEA_USER
fi
if [ -z "$GITEA_TOKEN" ] && [ "$NON_INTERACTIVE" -eq 0 ]; then
read -r -s -p "Gitea token (requires read:user and repository write scopes): " GITEA_TOKEN
printf '\n'
fi
if [ -z "$GITEA_USER" ] || [ -z "$GITEA_TOKEN" ]; then
warn "Gitea config not written. Set GITEA_USER/GITEA_TOKEN or rerun interactively."
return
fi
if [ "$DRY_RUN" -eq 1 ]; then
printf 'DRY-RUN: would write %s with GITEA_URL and GITEA_USER; token hidden\n' "$GITEA_CONF"
return
fi
umask 077
{
printf 'GITEA_URL="%s"\n' "$GITEA_URL"
printf 'GITEA_USER="%s"\n' "$GITEA_USER"
printf 'GITEA_TOKEN="%s"\n' "$GITEA_TOKEN"
} >"$GITEA_CONF"
chmod 600 "$GITEA_CONF"
ok "Wrote $GITEA_CONF"
}
register_mcp() {
step "Registering State Hub MCP"
if [ "$SKIP_MCP" -eq 1 ]; then
warn "Skipping MCP registration by request."
return
fi
if [ "$DRY_RUN" -eq 1 ]; then
run make -C "$STATE_HUB_DIR" register-mcp DRY_RUN=1
else
make -C "$STATE_HUB_DIR" register-mcp
fi
}
health_check() {
step "Checking State Hub reachability"
if curl -fsS --max-time 2 "http://127.0.0.1:8000/state/health" >/dev/null 2>&1; then
ok "State Hub API reachable at http://127.0.0.1:8000"
elif curl -fsS --max-time 2 "http://127.0.0.1:18000/state/health" >/dev/null 2>&1; then
ok "State Hub API reachable through tunnel at http://127.0.0.1:18000"
else
warn "State Hub API is not reachable locally or through the default tunnel."
warn "Start it with 'make api' or run 'make bridges' if this machine uses ops-bridge."
fi
}
main() {
step "State Hub environment bootstrap"
printf 'Repository: %s\n' "$STATE_HUB_DIR"
check_commands
configure_git_helper
setup_ssh_key
write_gitea_conf
register_mcp
health_check
ok "Bootstrap checks complete."
}
main "$@"

View File

@@ -1596,7 +1596,7 @@ def fix_repo(
task_id = ctx["task_id"]
status = ctx["status"]
result = _api_patch(api_base, f"/tasks/{task_id}",
{"status": status})
{"status": status, "suppress_token_event": True})
if result is not None and "_error" not in result:
report.fixes_applied.append(
f"C-10 fixed: task {task_id[:8]}… status → {status!r}"

151
scripts/register-mcp.sh Executable file
View File

@@ -0,0 +1,151 @@
#!/usr/bin/env bash
set -euo pipefail
STATE_HUB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
CLAUDE_JSON="${CLAUDE_JSON:-$HOME/.claude.json}"
SERVER_NAME="${STATE_HUB_MCP_NAME:-state-hub}"
API_BASE="${API_BASE:-}"
MCP_URL="${MCP_URL:-}"
DRY_RUN=0
usage() {
cat <<'USAGE'
Usage: scripts/register-mcp.sh [--url URL] [--api-base URL] [--dry-run]
Registers the State Hub MCP server for Claude Code.
Options:
--url URL MCP SSE URL to register. Defaults to local :8001 or tunnel :18001.
--api-base URL State Hub API URL used for reachability checks.
--dry-run Print what would happen without changing Claude config.
-h, --help Show this help.
USAGE
}
while [ "$#" -gt 0 ]; do
case "$1" in
--url)
MCP_URL="${2:-}"
shift 2
;;
--api-base)
API_BASE="${2:-}"
shift 2
;;
--dry-run)
DRY_RUN=1
shift
;;
-h|--help)
usage
exit 0
;;
*)
echo "ERROR: unknown argument: $1" >&2
usage >&2
exit 2
;;
esac
done
status() {
printf '%s\n' "$*"
}
api_healthy() {
local base="$1"
curl -fsS --max-time 2 "${base%/}/state/health" >/dev/null 2>&1
}
port_open() {
local host="$1"
local port="$2"
timeout 2 bash -c ":</dev/tcp/$host/$port" >/dev/null 2>&1
}
if [ -z "$API_BASE" ]; then
if api_healthy "http://127.0.0.1:8000"; then
API_BASE="http://127.0.0.1:8000"
elif api_healthy "http://127.0.0.1:18000"; then
API_BASE="http://127.0.0.1:18000"
else
API_BASE="http://127.0.0.1:8000"
fi
fi
if [ -z "$MCP_URL" ]; then
if port_open 127.0.0.1 8001; then
MCP_URL="http://127.0.0.1:8001/sse"
elif port_open 127.0.0.1 18001; then
MCP_URL="http://127.0.0.1:18001/sse"
elif [ "$API_BASE" = "http://127.0.0.1:18000" ]; then
MCP_URL="http://127.0.0.1:18001/sse"
else
MCP_URL="http://127.0.0.1:8001/sse"
fi
fi
CONFIG="$(python3 - "$MCP_URL" <<'PY'
import json
import sys
print(json.dumps({"type": "sse", "url": sys.argv[1]}, separators=(",", ":")))
PY
)"
status "State Hub directory: $STATE_HUB_DIR"
status "API health check: ${API_BASE%/}/state/health"
status "MCP registration: $SERVER_NAME -> $MCP_URL"
if api_healthy "$API_BASE"; then
status "OK: State Hub API is reachable."
else
status "WARN: State Hub API is not reachable at ${API_BASE%/}/state/health."
status " Start it with 'make api' or bring up the ops-bridge tunnel."
fi
if ! command -v claude >/dev/null 2>&1; then
if [ "$DRY_RUN" -eq 1 ]; then
status "WARN: claude CLI not found on PATH; dry-run will still show the command."
else
status "ERROR: claude CLI not found on PATH."
status " Install or expose Claude Code CLI, then rerun: make register-mcp"
exit 1
fi
fi
CURRENT_URL="$(python3 - "$CLAUDE_JSON" "$SERVER_NAME" <<'PY'
import json
import sys
from pathlib import Path
path = Path(sys.argv[1])
name = sys.argv[2]
if not path.exists():
print("")
raise SystemExit
try:
data = json.loads(path.read_text())
except json.JSONDecodeError:
print("")
raise SystemExit
entry = data.get("mcpServers", {}).get(name, {})
print(entry.get("url", ""))
PY
)"
if [ "$CURRENT_URL" = "$MCP_URL" ]; then
status "OK: $SERVER_NAME is already registered with this URL."
exit 0
fi
if [ "$DRY_RUN" -eq 1 ]; then
status "DRY-RUN: would run:"
status " claude mcp add-json -s user $SERVER_NAME '$CONFIG'"
exit 0
fi
claude mcp add-json -s user "$SERVER_NAME" "$CONFIG"
status "OK: registered $SERVER_NAME."
status "Restart Claude Code so the MCP server list is refreshed."

View File

@@ -1,27 +1,48 @@
#!/usr/bin/env python3
"""PostToolUse hook: replace heuristic token events with real transcript-derived counts.
Fires after mcp__state-hub__update_task_status when status=done.
Fires after supported task completion tools when status=done.
Reads the Claude Code session transcript to compute the token delta since the
previous task completion, then PATCHes the heuristic event with real counts.
State is persisted per session in /tmp/custodian_tokens_<session_id>.json so
deltas are correctly scoped even when multiple tasks complete in one session.
State is persisted per session in a durable cache directory so deltas survive
restarts and multiple task completions in one session.
"""
import json
import os
import sys
import urllib.error
import urllib.request
from datetime import datetime, timezone
from pathlib import Path
API = os.environ.get("CUSTODIAN_API", "http://127.0.0.1:8000")
STATE_DIR = Path(os.environ.get("TMPDIR", "/tmp"))
STATE_DIR = Path(os.environ.get("CUSTODIAN_TOKEN_STATE_DIR", Path.home() / ".cache" / "state-hub" / "token-hooks"))
HEALTH_LOG = STATE_DIR / "hook-health.jsonl"
PARSER_VERSION = "claude-transcript-delta-v1"
SUPPORTED_TOOL_HINTS = (
"update_task_status",
"tasks",
"task",
)
def read_transcript_totals(transcript_path: str) -> tuple[int, int]:
def utc_now() -> str:
return datetime.now(timezone.utc).isoformat()
def write_health(event: dict) -> None:
try:
STATE_DIR.mkdir(parents=True, exist_ok=True)
with HEALTH_LOG.open("a", encoding="utf-8") as handle:
handle.write(json.dumps({"ts": utc_now(), **event}, sort_keys=True) + "\n")
except OSError:
pass
def read_transcript_totals(transcript_path: str) -> tuple[int, int, int]:
"""Sum all usage entries in the transcript JSONL up to the current point."""
total_in = total_out = 0
total_in = total_out = cached_in = 0
try:
with open(transcript_path) as f:
for line in f:
@@ -29,10 +50,9 @@ def read_transcript_totals(transcript_path: str) -> tuple[int, int]:
entry = json.loads(line)
usage = entry.get("message", {}).get("usage", {})
if usage:
# Count all input token variants (direct + cache creation + cache read)
total_in += (
usage.get("input_tokens", 0)
+ usage.get("cache_creation_input_tokens", 0)
total_in += usage.get("input_tokens", 0)
cached_in += (
usage.get("cache_creation_input_tokens", 0)
+ usage.get("cache_read_input_tokens", 0)
)
total_out += usage.get("output_tokens", 0)
@@ -40,21 +60,22 @@ def read_transcript_totals(transcript_path: str) -> tuple[int, int]:
continue
except OSError:
pass
return total_in, total_out
return total_in, total_out, cached_in
def load_state(session_id: str) -> tuple[int, int]:
def load_state(session_id: str) -> tuple[int, int, int]:
state_file = STATE_DIR / f"custodian_tokens_{session_id}.json"
try:
data = json.loads(state_file.read_text())
return data.get("total_in", 0), data.get("total_out", 0)
return data.get("total_in", 0), data.get("total_out", 0), data.get("cached_in", 0)
except (OSError, json.JSONDecodeError):
return 0, 0
return 0, 0, 0
def save_state(session_id: str, total_in: int, total_out: int) -> None:
def save_state(session_id: str, total_in: int, total_out: int, cached_in: int) -> None:
STATE_DIR.mkdir(parents=True, exist_ok=True)
state_file = STATE_DIR / f"custodian_tokens_{session_id}.json"
state_file.write_text(json.dumps({"total_in": total_in, "total_out": total_out}))
state_file.write_text(json.dumps({"total_in": total_in, "total_out": total_out, "cached_in": cached_in}))
def api_get(path: str):
@@ -75,51 +96,89 @@ def api_patch(path: str, data: dict):
return json.loads(r.read())
def extract_done_task(payload: dict) -> tuple[str | None, dict]:
tool_name = payload.get("tool_name", "")
if not any(hint in tool_name for hint in SUPPORTED_TOOL_HINTS):
return None, {}
tool_input = payload.get("tool_input", {}) or {}
status = tool_input.get("status")
if status != "done":
return None, {}
task_id = (
tool_input.get("task_id")
or tool_input.get("id")
or tool_input.get("taskId")
)
return task_id, tool_input
def main() -> None:
try:
payload = json.loads(sys.stdin.read())
except json.JSONDecodeError:
return
tool_name = payload.get("tool_name", "")
if "update_task_status" not in tool_name:
return
tool_input = payload.get("tool_input", {})
if tool_input.get("status") != "done":
return
task_id = tool_input.get("task_id")
task_id, tool_input = extract_done_task(payload)
if not task_id:
write_health({"status": "skipped", "reason": "not_done_task_completion", "tool_name": payload.get("tool_name")})
return
transcript_path = payload.get("transcript_path", "")
session_id = payload.get("session_id", "unknown")
# Compute token delta for this task
current_in, current_out = read_transcript_totals(transcript_path)
last_in, last_out = load_state(session_id)
current_in, current_out, current_cached = read_transcript_totals(transcript_path)
last_in, last_out, last_cached = load_state(session_id)
delta_in = max(0, current_in - last_in)
delta_out = max(0, current_out - last_out)
save_state(session_id, current_in, current_out)
delta_cached = max(0, current_cached - last_cached)
save_state(session_id, current_in, current_out, current_cached)
if delta_in == 0 and delta_out == 0:
return # Nothing measurable — leave heuristic in place
if delta_in == 0 and delta_out == 0 and delta_cached == 0:
write_health({
"status": "skipped",
"reason": "zero_delta",
"session_id": session_id,
"task_id": task_id,
"source_path": transcript_path,
})
return
# Find the most recent heuristic event for this task and replace it
try:
events = api_get(f"/token-events/?task_id={task_id}&note=heuristic&limit=5")
except (urllib.error.URLError, OSError):
write_health({"status": "skipped", "reason": "api_offline", "session_id": session_id, "task_id": task_id})
return # API offline — leave heuristic as-is
if not events:
write_health({"status": "skipped", "reason": "no_fallback_event", "session_id": session_id, "task_id": task_id})
return
event_id = events[0]["id"]
model = tool_input.get("model")
agent = tool_input.get("agent")
patch_body: dict = {"tokens_in": delta_in, "tokens_out": delta_out, "note": "measured"}
patch_body: dict = {
"tokens_in": delta_in,
"tokens_out": delta_out,
"note": "measured",
"measurement_kind": "measured",
"source_provider": "claude_transcript",
"source_id": f"claude:{session_id}:task:{task_id}",
"source_path": transcript_path or None,
"parser_version": PARSER_VERSION,
"confidence": 1.0,
"cached_input_tokens": delta_cached,
"raw_total_tokens": delta_in + delta_out + delta_cached,
"raw_metadata": {
"hook": "post_tool_use",
"tool_name": payload.get("tool_name"),
"state_dir": str(STATE_DIR),
},
}
if model:
patch_body["model"] = model
if agent:
@@ -128,7 +187,19 @@ def main() -> None:
try:
api_patch(f"/token-events/{event_id}", patch_body)
except (urllib.error.URLError, OSError):
pass
write_health({"status": "skipped", "reason": "patch_failed", "session_id": session_id, "task_id": task_id})
return
write_health({
"status": "patched",
"session_id": session_id,
"task_id": task_id,
"event_id": event_id,
"tokens_in": delta_in,
"tokens_out": delta_out,
"cached_input_tokens": delta_cached,
"source_path": transcript_path,
})
if __name__ == "__main__":

239
scripts/token_reconcile.py Normal file
View File

@@ -0,0 +1,239 @@
#!/usr/bin/env python3
"""Reconcile token evidence from local agent sources against State Hub.
Dry-run is the default. Use ``--apply`` to upsert measured source events and
``--zero-superseded-fallbacks`` to zero task fallback rows that are covered by
source-backed measurements.
"""
from __future__ import annotations
import argparse
import json
import os
import sys
import urllib.parse
import urllib.request
from collections import Counter, defaultdict
from datetime import datetime
from pathlib import Path
from typing import Any
ROOT = Path(__file__).resolve().parent.parent
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
from api.services.token_sources import collect_claude_transcripts, collect_codex_sessions, parse_iso # noqa: E402
from api.services.token_sources.attribution import repo_refs_from_api, resolve_repo # noqa: E402
DEFAULT_API = os.environ.get("STATE_HUB_API", "http://127.0.0.1:8000")
SUPERSEDED_HEURISTIC_NOTE = "heuristic_superseded_by_source_measurement"
def http_json(api_base: str, method: str, path: str, body: dict[str, Any] | None = None) -> Any:
url = f"{api_base.rstrip('/')}/{path.lstrip('/')}"
data = None
headers = {"Content-Type": "application/json"}
if body is not None:
data = json.dumps(body).encode("utf-8")
req = urllib.request.Request(url, data=data, headers=headers, method=method)
with urllib.request.urlopen(req, timeout=30) as resp:
return json.loads(resp.read() or b"null")
def list_events(api_base: str, params: dict[str, Any]) -> list[dict[str, Any]]:
events: list[dict[str, Any]] = []
offset = 0
while True:
encoded = urllib.parse.urlencode({**params, "limit": 1000, "offset": offset})
page = http_json(api_base, "GET", f"/token-events/?{encoded}")
if not isinstance(page, list) or not page:
break
events.extend(page)
if len(page) < 1000:
break
offset += 1000
return events
def find_home(explicit: str | None, env_name: str, default: Path) -> Path | None:
candidates: list[Path] = []
if explicit:
candidates.append(Path(explicit))
env_home = os.environ.get(env_name)
if env_home:
candidates.append(Path(env_home))
candidates.append(default)
for candidate in candidates:
if candidate.is_dir():
return candidate
return None
def event_total(event: dict[str, Any]) -> int:
return int(event.get("tokens_in") or 0) + int(event.get("tokens_out") or 0)
def source_index(events: list[dict[str, Any]]) -> dict[str, dict[str, Any]]:
by_source: dict[str, dict[str, Any]] = {}
for event in events:
source_id = event.get("source_id") or event.get("ref_id")
if isinstance(source_id, str):
by_source[source_id] = event
return by_source
def print_report(report: dict[str, Any]) -> None:
print(json.dumps(report, indent=2, sort_keys=True, default=str))
def main() -> int:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--since", default="2026-05-19", help="UTC date/time to reconcile from")
parser.add_argument("--api-base", default=DEFAULT_API)
parser.add_argument("--codex-home")
parser.add_argument("--claude-home")
parser.add_argument("--apply", action="store_true", help="upsert measured source events")
parser.add_argument(
"--zero-superseded-fallbacks",
action="store_true",
help="with --apply, zero heuristic fallback rows after measured source ingestion",
)
args = parser.parse_args()
since = parse_iso(args.since)
since_param = since.isoformat()
codex_home = find_home(args.codex_home, "CODEX_HOME", Path.home() / ".codex")
if codex_home is None:
windows_codex = Path("/mnt/c/Users/bernd.worsch/.codex")
codex_home = windows_codex if windows_codex.is_dir() else None
claude_home = find_home(args.claude_home, "CLAUDE_HOME", Path.home() / ".claude")
records = []
source_health: dict[str, dict[str, Any]] = {}
if codex_home:
codex_records = collect_codex_sessions(codex_home, since)
records.extend(codex_records)
source_health["codex_session"] = {"home": str(codex_home), "sessions_found": len(codex_records)}
else:
source_health["codex_session"] = {"home": None, "sessions_found": 0, "warning": "Codex home not found"}
if claude_home:
claude_records = collect_claude_transcripts(claude_home, since)
records.extend(claude_records)
source_health["claude_transcript"] = {"home": str(claude_home), "sessions_found": len(claude_records)}
else:
source_health["claude_transcript"] = {"home": None, "sessions_found": 0, "warning": "Claude home not found"}
repos = repo_refs_from_api(http_json(args.api_base, "GET", "/repos/"))
existing_events = list_events(args.api_base, {"since": since_param, "include_superseded": "true"})
existing_by_source = source_index(existing_events)
fallback_events = [
event for event in existing_events
if event.get("source_provider") == "task_fallback" or event.get("note") == "heuristic"
]
superseded_events = [
event for event in existing_events
if event.get("measurement_kind") == "superseded" or str(event.get("note") or "").startswith("heuristic_superseded")
]
planned_upserts = []
unattributed = 0
stale = 0
source_totals: dict[str, int] = defaultdict(int)
for record in records:
source_totals[record.source_provider] += record.tokens_total
existing = existing_by_source.get(record.source_id)
if existing and event_total(existing) >= record.tokens_total:
continue
if existing:
stale += 1
match = resolve_repo(record.cwd, repos)
if match is None:
unattributed += 1
planned_upserts.append((record, match))
source_ids = [
event.get("source_id")
for event in existing_events
if event.get("source_id") and event.get("measurement_kind") == "measured"
]
duplicate_sources = {
source_id: count for source_id, count in Counter(source_ids).items() if count > 1
}
missing_provenance = [
event for event in existing_events
if event.get("measurement_kind") == "measured" and not event.get("source_id")
]
progress_events = http_json(args.api_base, "GET", f"/progress/?since={urllib.parse.quote(since_param)}&limit=1000")
measured_total = sum(
event_total(event)
for event in existing_events
if event.get("measurement_kind") == "measured"
) + sum(record.tokens_total for record, _ in planned_upserts)
canary_failed = bool(progress_events) and measured_total == 0
report = {
"since": since.isoformat(),
"apply": args.apply,
"sources": source_health,
"sessions_found": len(records),
"source_tokens_total": dict(source_totals),
"events_existing": len(existing_events),
"events_to_upsert": len(planned_upserts),
"sessions_stale": stale,
"fallback_events": len(fallback_events),
"superseded_events": len(superseded_events),
"unattributed_source_records": unattributed,
"missing_provenance_events": len(missing_provenance),
"duplicate_source_ids": duplicate_sources,
"progress_events": len(progress_events) if isinstance(progress_events, list) else 0,
"measured_tokens_total_after_plan": measured_total,
"canary_failed": canary_failed,
}
if args.apply:
for record, match in planned_upserts:
payload = record.to_token_event_payload(repo_id=match.repo_id if match else None)
payload["raw_metadata"] = {
**payload.get("raw_metadata", {}),
"repo_slug": match.slug if match else None,
"attribution_method": match.method if match else None,
}
http_json(args.api_base, "POST", "/token-events/upsert", payload)
if args.zero_superseded_fallbacks:
for event in fallback_events:
http_json(
args.api_base,
"PATCH",
f"/token-events/{event['id']}",
{
"tokens_in": 0,
"tokens_out": 0,
"note": SUPERSEDED_HEURISTIC_NOTE,
"measurement_kind": "superseded",
"source_provider": "task_fallback",
"confidence": 0.0,
"raw_total_tokens": 0,
},
)
http_json(
args.api_base,
"POST",
"/progress/",
{
"summary": (
"Token reconciliation: "
f"{len(records)} source records, {len(planned_upserts)} upserts, "
f"{len(fallback_events)} fallback events, canary_failed={canary_failed}"
),
"event_type": "token_reconciliation",
"author": "codex",
"detail": report,
},
)
print_report(report)
return 1 if canary_failed else 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -55,8 +55,23 @@ class TestTokenEventsCreate:
assert ev["tokens_in"] == 200
assert ev["tokens_out"] == 100
assert ev["tokens_total"] == 300
assert ev["measurement_kind"] == "estimated"
assert ev["source_provider"] == "manual"
assert ev["raw_total_tokens"] == 300
assert ev["id"] is not None
async def test_create_with_created_at_backfill_timestamp(self, client):
created_at = "2026-05-19T01:02:03Z"
ev = await _post_event(
client,
tokens_in=200,
tokens_out=100,
ref_type="session",
ref_id="codex:test-session",
created_at=created_at,
)
assert ev["created_at"].startswith("2026-05-19T01:02:03")
async def test_create_with_all_fields(self, client):
await _create_domain(client)
topic = await _create_topic(client)
@@ -74,11 +89,76 @@ class TestTokenEventsCreate:
ref_id=task["id"],
note="T01 done",
session_id="ses-abc",
measurement_kind="measured",
source_provider="manual",
source_id="manual:test-event",
confidence=0.95,
cached_input_tokens=10,
reasoning_output_tokens=20,
raw_total_tokens=1530,
raw_metadata={"source": "unit-test"},
)
assert ev["task_id"] == task["id"]
assert ev["workstream_id"] == ws["id"] # auto-populated from task
assert ev["model"] == "claude-sonnet-4-6"
assert ev["tokens_total"] == 1500
assert ev["measurement_kind"] == "measured"
assert ev["source_provider"] == "manual"
assert ev["source_id"] == "manual:test-event"
assert ev["cached_input_tokens"] == 10
assert ev["reasoning_output_tokens"] == 20
assert ev["token_evidence_total"] == 1530
assert ev["raw_metadata"] == {"source": "unit-test"}
async def test_upsert_source_event_updates_existing_session(self, client):
body = {
"tokens_in": 100,
"tokens_out": 50,
"measurement_kind": "measured",
"source_provider": "codex_session",
"source_id": "codex:abc",
"ref_type": "session",
"ref_id": "codex:abc",
"session_id": "abc",
"cached_input_tokens": 5,
}
first = await client.post("/token-events/upsert", json=body)
assert first.status_code == 200, first.text
second = await client.post("/token-events/upsert", json={**body, "tokens_in": 300, "tokens_out": 80})
assert second.status_code == 200, second.text
assert first.json()["id"] == second.json()["id"]
assert second.json()["tokens_total"] == 380
listed = (await client.get("/token-events/", params={"source_provider": "codex_session"})).json()
assert len(listed) == 1
async def test_patch_backfill_fields(self, client):
ev = await _post_event(client, tokens_in=100, tokens_out=50)
r = await client.patch(f"/token-events/{ev['id']}", json={
"tokens_in": 500,
"tokens_out": 250,
"session_id": "codex-session",
"ref_type": "session",
"ref_id": "codex:session",
"created_at": "2026-05-20T01:02:03Z",
"note": "backfill:codex-session",
"measurement_kind": "measured",
"source_provider": "codex_session",
"source_id": "codex:session",
"cached_input_tokens": 10,
})
assert r.status_code == 200
patched = r.json()
assert patched["tokens_total"] == 750
assert patched["session_id"] == "codex-session"
assert patched["ref_type"] == "session"
assert patched["ref_id"] == "codex:session"
assert patched["created_at"].startswith("2026-05-20T01:02:03")
assert patched["measurement_kind"] == "measured"
assert patched["source_provider"] == "codex_session"
assert patched["source_id"] == "codex:session"
assert patched["cached_input_tokens"] == 10
async def test_workstream_auto_populated_from_task(self, client):
await _create_domain(client)
@@ -129,6 +209,26 @@ class TestTokenEventsList:
assert len(events) == 1
assert events[0]["model"] == "claude-sonnet-4-6"
async def test_filter_by_measurement_kind_and_source_provider(self, client):
await _post_event(
client,
tokens_in=100,
tokens_out=50,
measurement_kind="measured",
source_provider="codex_session",
source_id="codex:filter",
)
await _post_event(client, tokens_in=200, tokens_out=100, note="heuristic")
r = await client.get(
"/token-events/",
params={"measurement_kind": "measured", "source_provider": "codex_session"},
)
assert r.status_code == 200
events = r.json()
assert len(events) == 1
assert events[0]["source_id"] == "codex:filter"
@pytest.mark.asyncio
class TestTokenSummary:
@@ -184,6 +284,7 @@ class TestTokenSummary:
s = r.json()
assert s["event_count"] == 1
assert s["tokens_total"] == 75
assert s["by_measurement_kind"]["estimated"] == 75
async def test_summary_unknown_scope_returns_422(self, client):
r = await client.get("/token-events/summary/", params={"scope": "foobar", "id": "x"})
@@ -215,3 +316,32 @@ class TestTokenEventGetById:
import uuid
r = await client.get(f"/token-events/{uuid.uuid4()}")
assert r.status_code == 404
@pytest.mark.asyncio
class TestTokenAggregateAndQuality:
async def test_aggregate_and_quality_expose_evidence_breakdown(self, client):
await _post_event(
client,
tokens_in=100,
tokens_out=50,
measurement_kind="measured",
source_provider="codex_session",
source_id="codex:agg",
)
await _post_event(client, tokens_in=1000, tokens_out=500, note="heuristic")
agg = (await client.get("/token-events/aggregate/", params={"include_superseded": "false"})).json()
assert agg["tokens_total"] == 1650
assert agg["by_measurement_kind"]["measured"] == 150
assert agg["by_measurement_kind"]["estimated"] == 1500
assert agg["by_source_provider"]["codex_session"] == 150
assert agg["by_source_provider"]["task_fallback"] == 1500
measured = (await client.get("/token-events/aggregate/", params={"measurement_kind": "measured"})).json()
assert measured["tokens_total"] == 150
quality = (await client.get("/token-events/quality/")).json()
assert quality["measured_event_count"] == 1
assert quality["fallback_event_count"] == 1
assert quality["missing_provenance_event_count"] == 0

View File

@@ -66,6 +66,9 @@ class TestTokenPassthrough:
assert ev["agent"] == "custodian"
assert ev["workstream_id"] == ws["id"]
assert ev["note"] == "measured"
assert ev["measurement_kind"] == "measured"
assert ev["source_provider"] == "manual"
assert ev["source_id"] == f"task:{task['id']}:manual"
async def test_tier1_userbased_note_override(self, client):
"""Tier 1 with note='userbased' records that note instead of 'measured'."""
@@ -84,6 +87,7 @@ class TestTokenPassthrough:
events = (await client.get("/token-events/", params={"task_id": task["id"]})).json()
assert events[0]["note"] == "userbased"
assert events[0]["measurement_kind"] == "measured"
async def test_tier2_workplan_prorated(self, client):
"""Tier 2: workplan totals prorated across 4 tasks → 250/125 each, note='workplan'."""
@@ -108,6 +112,8 @@ class TestTokenPassthrough:
assert ev["tokens_in"] == 250 # 1000 // 4
assert ev["tokens_out"] == 125 # 500 // 4
assert ev["note"] == "workplan"
assert ev["measurement_kind"] == "allocated"
assert ev["raw_metadata"]["allocation_method"] == "workplan_prorated"
async def test_tier3_heuristic_fallback(self, client):
"""Tier 3: status=done with no token args → heuristic 1000/500, note='heuristic'."""
@@ -125,6 +131,40 @@ class TestTokenPassthrough:
assert ev["tokens_in"] == 1000
assert ev["tokens_out"] == 500
assert ev["note"] == "heuristic"
assert ev["measurement_kind"] == "estimated"
assert ev["source_provider"] == "task_fallback"
async def test_suppress_token_event_skips_done_fallback(self, client):
"""File/cache sync can mark a task done without minting a heuristic event."""
await _create_domain(client)
topic = await _create_topic(client)
ws = await _create_workstream(client, topic["id"])
task = await _create_task(client, ws["id"])
r = await client.patch(f"/tasks/{task['id']}", json={
"status": "done",
"suppress_token_event": True,
})
assert r.status_code == 200
assert r.json()["status"] == "done"
events = (await client.get("/token-events/", params={"task_id": task["id"]})).json()
assert events == []
async def test_repeated_done_update_does_not_duplicate_event(self, client):
"""Only the transition into done records token usage."""
await _create_domain(client)
topic = await _create_topic(client)
ws = await _create_workstream(client, topic["id"])
task = await _create_task(client, ws["id"])
r = await client.patch(f"/tasks/{task['id']}", json={"status": "done"})
assert r.status_code == 200
r = await client.patch(f"/tasks/{task['id']}", json={"status": "done"})
assert r.status_code == 200
events = (await client.get("/token-events/", params={"task_id": task["id"]})).json()
assert len(events) == 1
async def test_non_done_status_creates_no_event(self, client):
"""Non-done status updates never create a token event."""

139
tests/test_token_sources.py Normal file
View File

@@ -0,0 +1,139 @@
from __future__ import annotations
import json
from api.services.token_sources import parse_iso
from api.services.token_sources.attribution import RepoRef, normalise_cwd, resolve_repo
from api.services.token_sources.claude import parse_claude_transcript
from api.services.token_sources.codex import collect_codex_sessions, parse_codex_session
def _write_jsonl(path, rows):
path.parent.mkdir(parents=True, exist_ok=True)
with path.open("w", encoding="utf-8") as handle:
for row in rows:
if row == "BAD":
handle.write("{not json}\n")
else:
handle.write(json.dumps(row) + "\n")
def test_parse_codex_session_sums_token_count_records(tmp_path):
path = tmp_path / "sessions" / "2026" / "05" / "23" / "rollout-local.jsonl"
_write_jsonl(
path,
[
{"type": "session_meta", "payload": {"id": "s1", "cwd": "/repo", "timestamp": "2026-05-23T00:00:00Z"}},
{"type": "turn_context", "payload": {"cwd": "/repo", "model": "gpt-5.3-codex"}},
{
"type": "event_msg",
"timestamp": "2026-05-22T23:00:00Z",
"payload": {"type": "token_count", "info": {"last_token_usage": {"input_tokens": 99, "output_tokens": 1}}},
},
"BAD",
{
"type": "event_msg",
"timestamp": "2026-05-23T01:00:00Z",
"payload": {
"type": "token_count",
"info": {
"last_token_usage": {
"input_tokens": 100,
"output_tokens": 40,
"cached_input_tokens": 15,
"reasoning_output_tokens": 7,
"total_tokens": 155,
}
},
},
},
],
)
record = parse_codex_session(path, parse_iso("2026-05-23"))
assert record is not None
assert record.source_id == "codex:s1"
assert record.tokens_in == 100
assert record.tokens_out == 40
assert record.cached_input_tokens == 15
assert record.reasoning_output_tokens == 7
assert record.raw_total_tokens == 155
assert record.raw_metadata["malformed_lines"] == 1
def test_collect_codex_sessions_dedupes_archived_and_live(tmp_path):
live = tmp_path / "sessions" / "2026" / "05" / "23" / "rollout-live.jsonl"
archived = tmp_path / "archived_sessions" / "rollout-archived.jsonl"
rows = [
{"type": "session_meta", "payload": {"id": "same", "cwd": "/repo", "timestamp": "2026-05-23T00:00:00Z"}},
{
"type": "event_msg",
"timestamp": "2026-05-23T01:00:00Z",
"payload": {"type": "token_count", "info": {"last_token_usage": {"input_tokens": 10, "output_tokens": 5}}},
},
]
_write_jsonl(live, rows)
_write_jsonl(
archived,
rows + [
{
"type": "event_msg",
"timestamp": "2026-05-23T02:00:00Z",
"payload": {"type": "token_count", "info": {"last_token_usage": {"input_tokens": 20, "output_tokens": 5}}},
}
],
)
records = collect_codex_sessions(tmp_path, parse_iso("2026-05-23"))
assert len(records) == 1
assert records[0].source_id == "codex:same"
assert records[0].tokens_total == 40
def test_parse_claude_transcript_sums_usage_without_content(tmp_path):
path = tmp_path / "projects" / "repo" / "session.jsonl"
_write_jsonl(
path,
[
{
"timestamp": "2026-05-23T01:00:00Z",
"session_id": "c1",
"cwd": "/repo",
"message": {
"model": "claude-sonnet",
"content": "do not store me",
"usage": {
"input_tokens": 30,
"cache_creation_input_tokens": 5,
"cache_read_input_tokens": 7,
"output_tokens": 11,
},
},
}
],
)
record = parse_claude_transcript(path, parse_iso("2026-05-23"))
assert record is not None
assert record.source_id == "claude:c1"
assert record.tokens_in == 30
assert record.cached_input_tokens == 12
assert record.tokens_out == 11
assert "content" not in record.raw_metadata
def test_resolve_repo_uses_normalised_path_prefix():
refs = [
RepoRef(repo_id="1", slug="state-hub", local_path="/home/worsch/state-hub"),
RepoRef(repo_id="2", slug="other", local_path="/home/worsch/other"),
]
match = resolve_repo("//wsl.localhost/Ubuntu-24.04/home/worsch/state-hub/api", refs)
assert normalise_cwd("//wsl.localhost/Ubuntu-24.04/home/worsch/state-hub") == "/home/worsch/state-hub"
assert match is not None
assert match.repo_id == "1"
assert match.method == "path_prefix"

View File

@@ -4,12 +4,12 @@ type: workplan
title: "Multi-User Onboarding and Environment Bootstrap"
domain: custodian
repo: state-hub
status: active
status: finished
owner: custodian
topic_slug: custodian
state_hub_workstream_id: "a28d9e29-4119-4b73-9469-f921920253ef"
created: "2026-03-11"
updated: "2026-05-17"
updated: "2026-05-23"
---
# Multi-User Onboarding and Environment Bootstrap
@@ -51,7 +51,7 @@ Two personas:
```task
id: CUST-WP-0012-T01
state_hub_task_id: 71628269-9a75-4dae-a347-e64a86040322
status: todo
status: done
priority: medium
```
@@ -79,6 +79,12 @@ git config --global credential.helper 'cache --timeout=3600'
**Done when:** included in bootstrap script; push to Gitea works without
re-entering credentials on second attempt.
**Implemented 2026-05-23:** `scripts/bootstrap-env.sh` configures a global
credential helper when one is not already present. It prefers `libsecret`, uses
`cache --timeout=3600` as the safe automatic fallback, and supports explicit
headless plaintext storage via `--git-helper store --allow-plaintext-store`.
`docs/onboarding.md` documents the tradeoffs.
---
### T02 — SSH key generation and authorization automation
@@ -86,7 +92,7 @@ re-entering credentials on second attempt.
```task
id: CUST-WP-0012-T02
state_hub_task_id: fea965e9-8a8f-439c-9096-8f7756eb71ed
status: todo
status: done
priority: medium
```
@@ -110,6 +116,11 @@ ssh-copy-id -i ~/.ssh/id_ed25519.pub tegwick@92.205.130.254
**Done when:** included in bootstrap script; documented in onboarding guide.
**Implemented 2026-05-23:** `scripts/bootstrap-env.sh` generates
`~/.ssh/id_ed25519` if missing, prints the public key, and can run
`ssh-copy-id` for Railiance01 and CoulombCore with `--authorize-ssh`.
`docs/onboarding.md` documents the operator and collaborator path.
---
### T03 — Claude Code MCP registration automation
@@ -117,7 +128,7 @@ ssh-copy-id -i ~/.ssh/id_ed25519.pub tegwick@92.205.130.254
```task
id: CUST-WP-0012-T03
state_hub_task_id: 60318e9a-972e-45c8-afde-82ed0625f594
status: todo
status: done
priority: medium
```
@@ -132,10 +143,10 @@ make register-mcp # idempotent; safe to re-run
The script should:
1. Detect whether `state-hub` is already in `~/.claude.json`
2. Extract the server config from `.mcp.json`
2. Use the current SSE MCP config (`http://127.0.0.1:8001/sse` locally or
`http://127.0.0.1:18001/sse` through ops-bridge)
3. Run `claude mcp add-json -s user state-hub <config>`
4. Run `patch_mcp_cwd.py` to restore the cwd field
5. Print instructions to restart Claude Code
4. Print instructions to restart Claude Code
Should also detect whether the state hub is reachable directly
(`http://127.0.0.1:8000`) or needs a tunnel (via ops-bridge), and emit
@@ -144,6 +155,12 @@ a warning if neither is available.
**Done when:** `make register-mcp` works on a clean machine; documented
in onboarding guide.
**Implemented 2026-05-23:** `scripts/register-mcp.sh` and the
`make register-mcp` target register the current SSE MCP transport
idempotently. The script detects local/tunnel reachability, supports
`MCP_URL`, `API_BASE`, and `DRY_RUN=1`, and documents the old `.mcp.json` cwd
patch path as legacy.
---
### T04 — Environment bootstrap script
@@ -151,7 +168,7 @@ in onboarding guide.
```task
id: CUST-WP-0012-T04
state_hub_task_id: 84a94761-e424-4470-a9a2-64d9cabadb7f
status: todo
status: done
priority: high
```
@@ -176,6 +193,11 @@ Design constraints:
**Done when:** running the script on a clean Ubuntu 24.04 machine
produces a working Custodian environment with no additional manual steps.
**Implemented 2026-05-23:** `scripts/bootstrap-env.sh` and
`make bootstrap-env` provide the idempotent entrypoint. It supports dry-run,
non-interactive mode, optional apt package installation, SSH authorization,
Gitea token prompting, MCP registration, and State Hub health checks.
---
### T05 — Onboarding guide and user journey documentation
@@ -183,7 +205,7 @@ produces a working Custodian environment with no additional manual steps.
```task
id: CUST-WP-0012-T05
state_hub_task_id: b0839802-659a-475b-8b84-ab7341ea3d15
status: todo
status: done
priority: medium
```
@@ -208,6 +230,10 @@ for both personas:
**Done when:** a new collaborator can follow the guide without
clarification from the primary operator.
**Implemented 2026-05-23:** `docs/onboarding.md` covers primary operator and
domain collaborator journeys, including SSH, Gitea token file, credential
helper choices, MCP registration, tunnel setup, and verification checks.
---
### T06 — State Hub multi-user model (deferred)
@@ -215,7 +241,7 @@ clarification from the primary operator.
```task
id: CUST-WP-0012-T06
state_hub_task_id: d5df3302-67b9-4765-a8d8-ea2df53dff6e
status: todo
status: done
priority: low
```
@@ -235,6 +261,11 @@ domain) or rely on Gitea repo permissions as the authoritative boundary
Implement T01T05 first; multi-user access control is only needed when
there is more than one user.
**Implemented 2026-05-23:** `docs/multi-user-access-model.md` records the
current decision: repo permissions, SSH access, tunnels, and OpenBao remain the
authoritative boundaries for this phase; State Hub API auth is deferred until a
real second-user or exposed-deployment trigger exists.
---
## References

View File

@@ -0,0 +1,310 @@
---
id: STATE-WP-0045
type: workplan
title: "Token Measurement Accuracy and Resilience"
domain: custodian
repo: state-hub
status: finished
owner: codex
topic_slug: custodian
created: "2026-05-23"
updated: "2026-05-23"
state_hub_workstream_id: "0aefe379-c182-4471-84dd-c136d5e1206b"
---
# Token Measurement Accuracy and Resilience
## Summary
Make State Hub token tracking accurate enough to trust for daily operations and
robust enough to survive agent/tool changes.
The May 19 flatline showed the current weak spots: token events mixed measured
usage, task-completion fallbacks, and file-sync side effects in the same table;
Claude measurement depended on one hook path; Codex usage lived in local session
logs until a manual backfill; and the dashboard treated every token event as the
same quality of evidence. The immediate fix restored Codex session totals and
suppressed sync-generated fallback events, but the system still needs a durable
measurement model, idempotent source adapters, reconciliation checks, and a
dashboard that exposes provenance and confidence.
## Current Findings
- `token_events` stores counts, associations, free-text notes, and timestamps,
but not structured provenance such as source system, source event id, parser
version, raw token categories, confidence, or whether the row is measured,
allocated, estimated, or superseded.
- `PATCH /tasks/{id}` can still create heuristic token events on a transition to
`done`. That fallback is useful as a temporary operational signal, but it is
not a measurement and should not be blended into measured totals.
- `fix-consistency` now suppresses token events while syncing file-backed task
status, but this is a narrow guard. Other bulk sync, import, and migration
paths need the same invariant.
- Codex Desktop session logs contain structured `token_count` events with
`last_token_usage`, `total_token_usage`, cached-input counts, and reasoning
output counts. The new backfill script can restore these, but it is not yet a
scheduled or monitored ingestion path.
- Claude Code measurement currently depends on `scripts/task_token_hook.py`
firing after one MCP tool name. It uses per-session state in `/tmp`, so missed
hooks, restarts, renamed tools, and non-MCP REST paths can silently degrade to
fallback events.
- Repository attribution for Codex backfill is path-based. This is good enough
for the emergency restore, but long-term attribution should prefer registered
repo fingerprints/remotes and then fall back to paths.
- The Token Cost dashboard currently aggregates all events returned by
`/token-events/?limit=1000`; it does not show measurement quality, source,
superseded rows, ingestion freshness, or possible gaps.
## Out of Scope
- Exact billing reconciliation against vendor invoices.
- Capturing private transcript content in State Hub.
- Replacing existing task/workstream/repo relationships.
- Implementing every provider-specific parser in one pass. The first pass should
cover Codex Desktop and Claude Code, with a documented adapter contract for
others.
## T01 - Define Token Evidence Model
```task
id: STATE-WP-0045-T01
status: done
priority: high
state_hub_task_id: "29aed6d9-40aa-40fc-9e9a-3eb3e6f985bc"
```
Define a structured model that separates measured usage from allocated,
estimated, and superseded rows.
Implementation notes:
- Add a short design note or ADR section covering token event semantics.
- Define measurement classes such as `measured`, `allocated`, `estimated`, and
`superseded`.
- Define source classes such as `codex_session`, `claude_transcript`,
`llm_connect`, `manual`, and `task_fallback`.
- Define structured provenance fields: source system, source id, source path or
URI, source timestamp, parser version, ingestion timestamp, and confidence.
- Decide how to represent raw token categories: input, cached input, output,
reasoning output, and provider total.
- Decide whether cached input should be included in default totals or shown as a
separate metric. Preserve enough fields to support both views.
- Replace free-text note taxonomy as the primary quality signal. Notes can
remain for human context, but dashboards and APIs should rely on structured
fields.
Done when the repo has a reviewed token evidence contract and the follow-on
schema/API tasks can implement it without ambiguity.
## T02 - Add Provenance Schema and Idempotent Upsert API
```task
id: STATE-WP-0045-T02
status: done
priority: high
state_hub_task_id: "ade2bd40-343c-4829-ba4f-44bc8b7cbef9"
```
Extend token storage so source-derived events can be written repeatedly without
duplicates and without losing provenance.
Implementation notes:
- Add migration fields for the evidence model from T01. Candidate fields:
`measurement_kind`, `source_provider`, `source_id`, `source_path`,
`source_created_at`, `ingested_at`, `parser_version`, `confidence`,
`cached_input_tokens`, `reasoning_output_tokens`, `raw_total_tokens`,
`cost_estimated_usd`, and `raw_metadata`.
- Add a unique constraint or partial unique index that prevents duplicate
measured source rows. For example: source provider plus source id, scoped by
measurement kind.
- Provide an upsert endpoint or make `POST /token-events/` support an explicit
idempotency key. The behavior should update a growing live session rather than
creating a second row.
- Keep backward compatibility for existing clients that only post
`tokens_in`/`tokens_out`, but classify those rows explicitly.
- Update schemas, router tests, and migration tests.
Done when source-backed token events can be inserted or updated idempotently and
legacy callers continue to work.
## T03 - Build Reusable Token Source Adapters
```task
id: STATE-WP-0045-T03
status: done
priority: high
state_hub_task_id: "3844fb70-4ceb-4f90-9894-d4845970f0a6"
```
Move source-specific parsing out of one-off scripts and hooks into reusable,
tested adapter modules.
Implementation notes:
- Add an `api/services/token_sources/` package or equivalent service layer.
- Implement a Codex Desktop adapter for `.codex/sessions/**` and
`.codex/archived_sessions/**`.
- Implement a Claude Code adapter for `.claude/projects/**/*.jsonl` that reads
usage metadata without storing transcript text.
- Provide a common adapter result type with source id, timestamps, token
categories, model, agent, cwd/path context, and raw parser metadata.
- Make parsing safe by default: no conversation text in logs, progress events,
token notes, or API payloads.
- Add fixtures with synthetic Codex and Claude session records that cover live
sessions, archived sessions, duplicate files, malformed JSONL, resets, and
missing usage records.
- Keep `scripts/backfill_codex_token_events.py` as a thin CLI over the reusable
service or replace it with a new unified CLI.
Done when Codex and Claude token sources have deterministic parser tests and a
shared ingestion interface.
## T04 - Improve Repo, Workstream, and Task Attribution
```task
id: STATE-WP-0045-T04
status: done
priority: high
state_hub_task_id: "d78b36ea-2a1a-40d6-bd83-03d48ff2ad9b"
```
Make attribution accurate without relying solely on local path string matching.
Implementation notes:
- Resolve repo attribution by git root fingerprint and remote URL when possible,
then fall back to registered host paths.
- Handle duplicate local paths or alias repos explicitly, especially where one
checkout is registered under multiple slugs.
- Attribute session-level usage to repo first, then optionally to workstreams or
tasks when there is strong evidence.
- Define task allocation rules that do not change measured session totals. For
example, produce `allocated` child rows from measured session rows using task
completion timestamps, tool-call metadata, or explicit operator input.
- Record the allocation method and confidence for every task-level allocation.
- Avoid minting task-level heuristic rows automatically for bulk import, status
sync, migration, and consistency tooling.
Done when measured session totals are stable and task/workstream attribution is
explicitly either measured, allocated, or estimated.
## T05 - Add Reconciliation, Gap Detection, and Backfill Operations
```task
id: STATE-WP-0045-T05
status: done
priority: high
state_hub_task_id: "efaa2629-4f9a-439c-b0a3-85d77b03580f"
```
Add an operator-safe reconciliation command that detects flatlines, duplicate
rows, stale ingestion, and fallback leakage.
Implementation notes:
- Add a command such as `make token-reconcile` or
`python scripts/token_reconcile.py --since <date>`.
- Report sessions found, sessions ingested, sessions stale, duplicate source
ids, fallback events, superseded rows, unattributed sessions, and rows missing
structured provenance.
- Support `--dry-run` by default and `--apply` for writes.
- Include an explicit `--zero-superseded-fallbacks` or equivalent flag rather
than silently editing historical rows.
- Store reconciliation summaries as progress events or report files without
including transcript content.
- Add a canary threshold: alert or fail when measured token volume is zero while
task/progress activity exists for the same window.
Done when an operator can run one command to verify token tracking health and
perform safe, idempotent backfills.
## T06 - Harden Hooks and Runtime Integration
```task
id: STATE-WP-0045-T06
status: done
priority: medium
state_hub_task_id: "5fd99241-e6dd-4ca6-8c58-a0048f08f0ca"
```
Make token collection survive hook misses, tool renames, restarts, and multiple
agent runtimes.
Implementation notes:
- Update Claude hook handling so it can match supported task completion paths,
not just one exact MCP tool name.
- Persist hook high-water marks in a durable State Hub or repo-local location
instead of only `/tmp`.
- Add hook health logging that records when a hook ran, what source id it
processed, and whether it patched or skipped a token event.
- Add a Codex ingestion path that can run on demand and from a schedule without
requiring manual script execution.
- Document required environment variables and path discovery for Windows, WSL,
and remote Linux hosts.
- Ensure failures degrade to visible `estimated` events or health warnings, not
silent flatlines.
Done when missing or stale token ingestion becomes visible within one reporting
window and can be recovered without ad hoc inspection.
## T07 - Upgrade Token APIs and Dashboard Quality Signals
```task
id: STATE-WP-0045-T07
status: done
priority: medium
state_hub_task_id: "ecaf6ff8-59aa-4c56-8163-125dc96b2068"
```
Expose token quality, source, and freshness in APIs and dashboard views.
Implementation notes:
- Add API filters for measurement kind, source provider, repo, time range,
superseded rows, and unattributed rows.
- Replace the hard dashboard dependence on `/token-events/?limit=1000` with
paginated or pre-aggregated endpoints that support time windows.
- Add dashboard controls for measured-only, include allocated, include
estimates, and show superseded rows.
- Show ingestion freshness: last Codex session ingested, last Claude transcript
ingested, and last reconciliation run.
- Add a data-quality section listing fallback events, unattributed measured
sessions, duplicate source ids, and days with progress/task activity but zero
measured tokens.
- Update the Token Cost page and docs so operators know which numbers are
measured versus inferred.
Done when the dashboard no longer presents fallback, allocated, and measured
usage as indistinguishable totals.
## T08 - Verification and Migration Playbook
```task
id: STATE-WP-0045-T08
status: done
priority: medium
state_hub_task_id: "61baff79-832e-45f8-80f3-106abe262096"
```
Cover the new measurement system with tests and a safe rollout plan.
Implementation notes:
- Add unit tests for the evidence model, source adapters, source-id
deduplication, repo attribution, and task allocation.
- Add router tests for idempotent upsert, source filters, measurement-kind
filters, created-at preservation, and backwards-compatible legacy posts.
- Add reconciliation tests with synthetic pre-May-19 and post-May-19 flatline
scenarios.
- Add dashboard/data-loader tests or fixture checks for quality filters and
aggregate counts.
- Write a migration playbook covering old heuristic rows, existing
`backfill:codex-session` rows, and any rows without structured provenance.
- Verify the full suite and run a dry-run reconciliation before marking this
workplan finished.
Done when the improved token measurement path has automated coverage, an
operator playbook, and a dry-run reconciliation report showing no hidden
fallback leakage.