diff --git a/Makefile b/Makefile index 0071413..a7f1b7e 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,7 @@ -.PHONY: install install-cli dashboard-install dashboard-check db db-tools migrate seed api dashboard check test test-python clean register-project register-codex-project validate-adr add-domain rename-domain add-repo list-repos register-path cleanup-stale tunnels-up tunnels-status tunnels-check bridges install-hooks install-hooks-all gitea-inventory +.PHONY: install install-cli dashboard-install dashboard-check db db-tools migrate seed api dashboard check test test-python clean register-project register-codex-project register-mcp bootstrap-env validate-adr add-domain rename-domain add-repo list-repos register-path cleanup-stale tunnels-up tunnels-status tunnels-check bridges install-hooks install-hooks-all gitea-inventory token-reconcile COMPOSE = docker compose -f infra/docker-compose.yml --env-file .env +PYTHON ?= python3 start: @echo "# run in different terminals" @@ -111,6 +112,17 @@ register-codex-project: @test -n "$(PROJECT_PATH)" || (echo "ERROR: PROJECT_PATH is required."; exit 1) scripts/register_project.sh "$(DOMAIN)" "$(PROJECT_PATH)" --codex +## Register State Hub MCP for Claude Code. Optional: make register-mcp MCP_URL=http://127.0.0.1:18001/sse +register-mcp: + scripts/register-mcp.sh \ + $(if $(MCP_URL),--url "$(MCP_URL)",) \ + $(if $(API_BASE),--api-base "$(API_BASE)",) \ + $(if $(DRY_RUN),--dry-run,) + +## Bootstrap a new operator/collaborator environment. Optional: make bootstrap-env ARGS="--install-missing" +bootstrap-env: + scripts/bootstrap-env.sh $(ARGS) + ## Add a second repo to an existing domain: make add-repo DOMAIN=railiance REPO_PATH=/home/worsch/railiance-infra add-repo: @test -n "$(DOMAIN)" || (echo "ERROR: DOMAIN is required."; exit 1) @@ -229,6 +241,17 @@ fix-consistency: $(if $(REPO_PATH),--repo-path "$(REPO_PATH)",); \ e=$$?; [ $$e -eq 2 ] && exit 0 || exit $$e +## Reconcile measured token sources against State Hub. +## Usage: make token-reconcile [SINCE=2026-05-19] [APPLY=1] [ZERO_FALLBACKS=1] +token-reconcile: + $(PYTHON) scripts/token_reconcile.py \ + $(if $(SINCE),--since "$(SINCE)",) \ + $(if $(API_BASE),--api-base "$(API_BASE)",) \ + $(if $(CODEX_HOME),--codex-home "$(CODEX_HOME)",) \ + $(if $(CLAUDE_HOME),--claude-home "$(CLAUDE_HOME)",) \ + $(if $(APPLY),--apply,) \ + $(if $(ZERO_FALLBACKS),--zero-superseded-fallbacks,) + ## Pull then fix: single repo or all repos if REPO omitted ## make fix-consistency-remote — smart pull+fix all repos that need it ## make fix-consistency-remote REPO=slug — pull+fix one repo diff --git a/api/models/token_event.py b/api/models/token_event.py index 01ae8d2..9396d13 100644 --- a/api/models/token_event.py +++ b/api/models/token_event.py @@ -1,8 +1,10 @@ import uuid from datetime import datetime -from sqlalchemy import DateTime, ForeignKey, Integer, Text, func -from sqlalchemy.dialects.postgresql import UUID +from typing import Any + +from sqlalchemy import DateTime, Float, ForeignKey, Integer, Text, UniqueConstraint, func +from sqlalchemy.dialects.postgresql import JSONB, UUID from sqlalchemy.orm import Mapped, mapped_column, relationship from api.models.base import Base, new_uuid @@ -10,6 +12,14 @@ from api.models.base import Base, new_uuid class TokenEvent(Base): __tablename__ = "token_events" + __table_args__ = ( + UniqueConstraint( + "measurement_kind", + "source_provider", + "source_id", + name="uq_token_events_source_identity", + ), + ) id: Mapped[uuid.UUID] = mapped_column( UUID(as_uuid=True), primary_key=True, default=new_uuid @@ -31,6 +41,35 @@ class TokenEvent(Base): ref_type: Mapped[str | None] = mapped_column(Text, nullable=True) ref_id: Mapped[str | None] = mapped_column(Text, nullable=True) note: Mapped[str | None] = mapped_column(Text, nullable=True) + measurement_kind: Mapped[str] = mapped_column( + Text, nullable=False, default="estimated", server_default="estimated", index=True + ) + source_provider: Mapped[str] = mapped_column( + Text, nullable=False, default="manual", server_default="manual", index=True + ) + source_id: Mapped[str | None] = mapped_column(Text, nullable=True, index=True) + source_path: Mapped[str | None] = mapped_column(Text, nullable=True) + source_created_at: Mapped[datetime | None] = mapped_column( + DateTime(timezone=True), nullable=True, index=True + ) + ingested_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), server_default=func.now(), nullable=False, index=True + ) + parser_version: Mapped[str | None] = mapped_column(Text, nullable=True) + confidence: Mapped[float] = mapped_column( + Float, nullable=False, default=0.35, server_default="0.35" + ) + cached_input_tokens: Mapped[int] = mapped_column( + Integer, nullable=False, default=0, server_default="0" + ) + reasoning_output_tokens: Mapped[int] = mapped_column( + Integer, nullable=False, default=0, server_default="0" + ) + raw_total_tokens: Mapped[int | None] = mapped_column(Integer, nullable=True) + cost_estimated_usd: Mapped[float | None] = mapped_column(Float, nullable=True) + raw_metadata: Mapped[dict[str, Any]] = mapped_column( + JSONB, nullable=False, default=dict, server_default="{}" + ) created_at: Mapped[datetime] = mapped_column( DateTime(timezone=True), server_default=func.now(), nullable=False, index=True ) diff --git a/api/routers/tasks.py b/api/routers/tasks.py index adf6cc6..1551164 100644 --- a/api/routers/tasks.py +++ b/api/routers/tasks.py @@ -75,23 +75,47 @@ async def update_task( if task is None: raise HTTPException(status_code=404, detail="Task not found") + previous_status = task.status.value + # Separate token fields from task fields - token_field_names = {"tokens_in", "tokens_out", "workplan_tokens_in", "workplan_tokens_out", "token_note", "model", "agent", "session_id"} + token_field_names = { + "tokens_in", + "tokens_out", + "workplan_tokens_in", + "workplan_tokens_out", + "token_note", + "model", + "agent", + "session_id", + "suppress_token_event", + } update_data = body.model_dump(exclude_unset=True) token_data = {k: update_data.pop(k) for k in list(update_data.keys()) if k in token_field_names} + suppress_token_event = bool(token_data.pop("suppress_token_event", False)) for field, value in update_data.items(): setattr(task, field, value) await session.commit() await session.refresh(task) - # Token event — three-tier logic, only when marking done - if update_data.get("status") == "done": + # Token event — three-tier logic, only for an intentional transition to done. + status_update = update_data.get("status") + new_status = status_update.value if hasattr(status_update, "value") else status_update + if ( + new_status == "done" + and previous_status != "done" + and not suppress_token_event + ): if "tokens_in" in token_data and "tokens_out" in token_data: # Tier 1: exact counts — default note "measured"; caller may override with token_note tin = token_data["tokens_in"] tout = token_data["tokens_out"] tnote = token_data.get("token_note") or "measured" + measurement_kind = "measured" + source_provider = "manual" + confidence = 1.0 + source_id = f"task:{task_id}:manual" + raw_metadata = {"input_source": "task_status_patch"} elif "workplan_tokens_in" in token_data and "workplan_tokens_out" in token_data: # Tier 2: prorate workplan total across task count count_result = await session.execute( @@ -101,9 +125,24 @@ async def update_task( tin = token_data["workplan_tokens_in"] // task_count tout = token_data["workplan_tokens_out"] // task_count tnote = "workplan" + measurement_kind = "allocated" + source_provider = "manual" + confidence = 0.7 + source_id = f"task:{task_id}:workplan-allocation" + raw_metadata = { + "allocation_method": "workplan_prorated", + "workplan_tokens_in": token_data["workplan_tokens_in"], + "workplan_tokens_out": token_data["workplan_tokens_out"], + "task_count": task_count, + } else: # Tier 3: heuristic fallback tin, tout, tnote = 1000, 500, "heuristic" + measurement_kind = "estimated" + source_provider = "task_fallback" + confidence = 0.35 + source_id = f"task:{task_id}:heuristic" + raw_metadata = {"estimation_method": "fixed_task_done_fallback"} # Resolve repo_id via workstream ws = await session.get(Workstream, task.workstream_id) @@ -121,6 +160,12 @@ async def update_task( ref_type="task", ref_id=str(task_id), note=tnote, + measurement_kind=measurement_kind, + source_provider=source_provider, + source_id=source_id, + confidence=confidence, + raw_total_tokens=tin + tout, + raw_metadata=raw_metadata, ) session.add(event) await session.commit() diff --git a/api/routers/token_events.py b/api/routers/token_events.py index 6dd6bf7..b282fda 100644 --- a/api/routers/token_events.py +++ b/api/routers/token_events.py @@ -1,5 +1,7 @@ import uuid from collections import defaultdict +from datetime import datetime +from typing import Any from fastapi import APIRouter, Depends, HTTPException, Query, status from sqlalchemy import select @@ -10,18 +12,95 @@ from api.models.managed_repo import ManagedRepo from api.models.task import Task from api.models.token_event import TokenEvent from api.models.workstream import Workstream -from api.schemas.token_event import RepoTokenSummary, TokenEventCreate, TokenEventPatch, TokenEventRead, TokenSummary +from api.schemas.token_event import ( + RepoTokenSummary, + TokenAggregateRow, + TokenAggregateSummary, + TokenEventCreate, + TokenEventPatch, + TokenEventRead, + TokenQualitySummary, + TokenSummary, +) router = APIRouter(prefix="/token-events", tags=["token-events"]) +DEFAULT_CONFIDENCE = { + "measured": 1.0, + "allocated": 0.70, + "estimated": 0.35, + "superseded": 0.0, +} -@router.post("/", response_model=TokenEventRead, status_code=status.HTTP_201_CREATED) -async def create_token_event( - body: TokenEventCreate, - session: AsyncSession = Depends(get_session), -) -> TokenEvent: - data = body.model_dump() +SOURCE_PARSER_DEFAULTS = { + "codex_session": "codex-desktop-v1", + "claude_transcript": "claude-transcript-v1", + "llm_connect": "llm-connect-v1", +} + +def _event_total(event: TokenEvent) -> int: + return event.tokens_in + event.tokens_out + + +def _infer_measurement_kind(data: dict[str, Any]) -> str: + if data.get("measurement_kind"): + return str(data["measurement_kind"]) + note = data.get("note") + if note == "heuristic_superseded_by_codex_backfill": + return "superseded" + if note == "workplan": + return "allocated" + if note == "heuristic": + return "estimated" + if note == "measured" or str(note or "").startswith("backfill:codex-session"): + return "measured" + provider = data.get("source_provider") + if provider in {"codex_session", "claude_transcript", "llm_connect"}: + return "measured" + return "estimated" + + +def _infer_source_provider(data: dict[str, Any], measurement_kind: str) -> str: + if data.get("source_provider"): + return str(data["source_provider"]) + note = data.get("note") + ref_id = str(data.get("ref_id") or "") + agent = str(data.get("agent") or "").lower() + if note == "heuristic": + return "task_fallback" + if ref_id.startswith("codex:") or str(note or "").startswith("backfill:codex-session"): + return "codex_session" + if measurement_kind == "measured" and "claude" in agent: + return "claude_transcript" + return "manual" + + +def _apply_event_defaults(data: dict[str, Any]) -> dict[str, Any]: + measurement_kind = _infer_measurement_kind(data) + source_provider = _infer_source_provider(data, measurement_kind) + data["measurement_kind"] = measurement_kind + data["source_provider"] = source_provider + + if not data.get("source_id") and source_provider in {"codex_session", "claude_transcript", "llm_connect"}: + source_id = data.get("ref_id") or data.get("session_id") + if source_id: + data["source_id"] = str(source_id) + + if not data.get("source_created_at") and data.get("created_at") and data.get("source_id"): + data["source_created_at"] = data["created_at"] + + data.setdefault("confidence", DEFAULT_CONFIDENCE.get(measurement_kind, 0.35)) + data.setdefault("cached_input_tokens", 0) + data.setdefault("reasoning_output_tokens", 0) + data.setdefault("raw_total_tokens", (data.get("tokens_in") or 0) + (data.get("tokens_out") or 0)) + data.setdefault("raw_metadata", {}) + if source_provider in SOURCE_PARSER_DEFAULTS: + data.setdefault("parser_version", SOURCE_PARSER_DEFAULTS[source_provider]) + return data + + +async def _populate_relationship_defaults(data: dict[str, Any], session: AsyncSession) -> dict[str, Any]: # Auto-populate workstream_id from task if not provided if data.get("task_id") and not data.get("workstream_id"): task = await session.get(Task, data["task_id"]) @@ -33,6 +112,34 @@ async def create_token_event( ws = await session.get(Workstream, data["workstream_id"]) if ws and ws.repo_id: data["repo_id"] = ws.repo_id + return data + + +async def _find_source_event(data: dict[str, Any], session: AsyncSession) -> TokenEvent | None: + source_id = data.get("source_id") + if not source_id: + return None + result = await session.execute( + select(TokenEvent).where( + TokenEvent.measurement_kind == data["measurement_kind"], + TokenEvent.source_provider == data["source_provider"], + TokenEvent.source_id == source_id, + ) + ) + return result.scalar_one_or_none() + + +async def _create_or_upsert_event(data: dict[str, Any], session: AsyncSession) -> TokenEvent: + data = _apply_event_defaults(data) + data = await _populate_relationship_defaults(data, session) + + existing = await _find_source_event(data, session) + if existing is not None: + for field, value in data.items(): + setattr(existing, field, value) + await session.commit() + await session.refresh(existing) + return existing event = TokenEvent(**data) session.add(event) @@ -41,6 +148,77 @@ async def create_token_event( return event +def _filter_query( + q, + *, + task_id: uuid.UUID | None = None, + workstream_id: uuid.UUID | None = None, + repo_id: uuid.UUID | None = None, + ref_type: str | None = None, + ref_id: str | None = None, + model: str | None = None, + agent: str | None = None, + note: str | None = None, + measurement_kind: str | None = None, + source_provider: str | None = None, + since: datetime | None = None, + until: datetime | None = None, + include_superseded: bool = True, + unattributed: bool = False, +): + if task_id: + q = q.where(TokenEvent.task_id == task_id) + if workstream_id: + q = q.where(TokenEvent.workstream_id == workstream_id) + if repo_id: + q = q.where(TokenEvent.repo_id == repo_id) + if ref_type: + q = q.where(TokenEvent.ref_type == ref_type) + if ref_id: + q = q.where(TokenEvent.ref_id == ref_id) + if model: + q = q.where(TokenEvent.model == model) + if agent: + q = q.where(TokenEvent.agent == agent) + if note: + q = q.where(TokenEvent.note == note) + if measurement_kind: + q = q.where(TokenEvent.measurement_kind == measurement_kind) + if source_provider: + q = q.where(TokenEvent.source_provider == source_provider) + if since: + q = q.where(TokenEvent.created_at >= since) + if until: + q = q.where(TokenEvent.created_at < until) + if not include_superseded: + q = q.where(TokenEvent.measurement_kind != "superseded") + if unattributed: + q = q.where( + TokenEvent.repo_id.is_(None), + TokenEvent.workstream_id.is_(None), + TokenEvent.task_id.is_(None), + ) + return q + + +@router.post("/", response_model=TokenEventRead, status_code=status.HTTP_201_CREATED) +async def create_token_event( + body: TokenEventCreate, + session: AsyncSession = Depends(get_session), +) -> TokenEvent: + data = body.model_dump(exclude_none=True) + return await _create_or_upsert_event(data, session) + + +@router.post("/upsert", response_model=TokenEventRead) +async def upsert_token_event( + body: TokenEventCreate, + session: AsyncSession = Depends(get_session), +) -> TokenEvent: + data = body.model_dump(exclude_none=True) + return await _create_or_upsert_event(data, session) + + @router.get("/summary/", response_model=TokenSummary) async def get_token_summary( scope: str = Query(..., description="task|workstream|repo|commit|release|session"), @@ -80,11 +258,16 @@ async def get_token_summary( by_model: dict[str, int] = defaultdict(int) by_agent: dict[str, int] = defaultdict(int) + by_measurement_kind: dict[str, int] = defaultdict(int) + by_source_provider: dict[str, int] = defaultdict(int) for e in events: + total = _event_total(e) if e.model: - by_model[e.model] += e.tokens_in + e.tokens_out + by_model[e.model] += total if e.agent: - by_agent[e.agent] += e.tokens_in + e.tokens_out + by_agent[e.agent] += total + by_measurement_kind[e.measurement_kind] += total + by_source_provider[e.source_provider] += total return TokenSummary( scope=scope, @@ -95,11 +278,18 @@ async def get_token_summary( event_count=len(events), by_model=dict(by_model), by_agent=dict(by_agent), + by_measurement_kind=dict(by_measurement_kind), + by_source_provider=dict(by_source_provider), ) @router.get("/by-repo/", response_model=list[RepoTokenSummary]) async def get_tokens_by_repo( + measurement_kind: str | None = None, + source_provider: str | None = None, + since: datetime | None = None, + until: datetime | None = None, + include_superseded: bool = Query(True), session: AsyncSession = Depends(get_session), ) -> list[RepoTokenSummary]: """Aggregate token consumption per repo, resolving via the full graph. @@ -112,7 +302,16 @@ async def get_tokens_by_repo( Only events that resolve to a repo are included. """ # Fetch all events, workstreams, repos in three queries (avoids N+1) - events_result = await session.execute(select(TokenEvent)) + events_result = await session.execute( + _filter_query( + select(TokenEvent), + measurement_kind=measurement_kind, + source_provider=source_provider, + since=since, + until=until, + include_superseded=include_superseded, + ) + ) events = list(events_result.scalars().all()) ws_result = await session.execute(select(Workstream)) @@ -148,14 +347,19 @@ async def get_tokens_by_repo( "event_count": 0, "by_model": defaultdict(int), "by_note": defaultdict(int), + "by_measurement_kind": defaultdict(int), + "by_source_provider": defaultdict(int), } g = groups[rid] g["tokens_in"] += e.tokens_in g["tokens_out"] += e.tokens_out g["event_count"] += 1 + total = _event_total(e) if e.model: - g["by_model"][e.model] += e.tokens_in + e.tokens_out - g["by_note"][e.note or "unknown"] += e.tokens_in + e.tokens_out + g["by_model"][e.model] += total + g["by_note"][e.note or "unknown"] += total + g["by_measurement_kind"][e.measurement_kind] += total + g["by_source_provider"][e.source_provider] += total return [ RepoTokenSummary( @@ -166,6 +370,188 @@ async def get_tokens_by_repo( ] +@router.get("/aggregate/", response_model=TokenAggregateSummary) +async def get_token_aggregate( + measurement_kind: str | None = None, + source_provider: str | None = None, + since: datetime | None = None, + until: datetime | None = None, + include_superseded: bool = Query(False), + session: AsyncSession = Depends(get_session), +) -> TokenAggregateSummary: + events_result = await session.execute( + _filter_query( + select(TokenEvent), + measurement_kind=measurement_kind, + source_provider=source_provider, + since=since, + until=until, + include_superseded=include_superseded, + ) + ) + events = list(events_result.scalars().all()) + + ws_result = await session.execute(select(Workstream)) + ws_map: dict[uuid.UUID, Workstream] = {w.id: w for w in ws_result.scalars().all()} + + task_result = await session.execute(select(Task)) + task_map: dict[uuid.UUID, Task] = {t.id: t for t in task_result.scalars().all()} + + repo_result = await session.execute(select(ManagedRepo)) + repo_map: dict[uuid.UUID, ManagedRepo] = {r.id: r for r in repo_result.scalars().all()} + + def resolve_repo_id(e: TokenEvent) -> uuid.UUID | None: + if e.repo_id: + return e.repo_id + ws_id = e.workstream_id + if not ws_id and e.task_id and e.task_id in task_map: + ws_id = task_map[e.task_id].workstream_id + if ws_id and ws_id in ws_map: + return ws_map[ws_id].repo_id + return None + + def add(groups: dict[str, dict[str, Any]], key: str | None, label: str | None, e: TokenEvent) -> None: + if not key: + return + if key not in groups: + groups[key] = { + "scope_id": key, + "label": label, + "tokens_in": 0, + "tokens_out": 0, + "event_count": 0, + "by_measurement_kind": defaultdict(int), + "by_source_provider": defaultdict(int), + } + row = groups[key] + total = _event_total(e) + row["tokens_in"] += e.tokens_in + row["tokens_out"] += e.tokens_out + row["event_count"] += 1 + row["by_measurement_kind"][e.measurement_kind] += total + row["by_source_provider"][e.source_provider] += total + + by_repo: dict[str, dict[str, Any]] = {} + by_workstream: dict[str, dict[str, Any]] = {} + by_task: dict[str, dict[str, Any]] = {} + by_model: dict[str, dict[str, Any]] = {} + by_measurement_kind: dict[str, int] = defaultdict(int) + by_source_provider: dict[str, int] = defaultdict(int) + + first_event_at = last_event_at = last_ingested_at = None + tokens_in = tokens_out = 0 + for e in events: + total = _event_total(e) + tokens_in += e.tokens_in + tokens_out += e.tokens_out + by_measurement_kind[e.measurement_kind] += total + by_source_provider[e.source_provider] += total + + if first_event_at is None or e.created_at < first_event_at: + first_event_at = e.created_at + if last_event_at is None or e.created_at > last_event_at: + last_event_at = e.created_at + if last_ingested_at is None or e.ingested_at > last_ingested_at: + last_ingested_at = e.ingested_at + + rid = resolve_repo_id(e) + repo = repo_map.get(rid) if rid else None + add(by_repo, str(rid) if rid else None, repo.slug if repo else None, e) + + ws_id = e.workstream_id or (task_map[e.task_id].workstream_id if e.task_id in task_map else None) + ws = ws_map.get(ws_id) if ws_id else None + add(by_workstream, str(ws_id) if ws_id else None, ws.title if ws else None, e) + + task = task_map.get(e.task_id) if e.task_id else None + add(by_task, str(e.task_id) if e.task_id else None, task.title if task else None, e) + + add(by_model, e.model or "unknown", e.model or "unknown", e) + + def rows(groups: dict[str, dict[str, Any]]) -> list[TokenAggregateRow]: + result = [] + for row in groups.values(): + result.append( + TokenAggregateRow( + **{k: (dict(v) if isinstance(v, defaultdict) else v) for k, v in row.items()}, + tokens_total=row["tokens_in"] + row["tokens_out"], + ) + ) + return sorted(result, key=lambda item: -item.tokens_total) + + return TokenAggregateSummary( + tokens_in=tokens_in, + tokens_out=tokens_out, + tokens_total=tokens_in + tokens_out, + event_count=len(events), + first_event_at=first_event_at, + last_event_at=last_event_at, + last_ingested_at=last_ingested_at, + by_repo=rows(by_repo), + by_workstream=rows(by_workstream), + by_task=rows(by_task), + by_model=rows(by_model), + by_measurement_kind=dict(by_measurement_kind), + by_source_provider=dict(by_source_provider), + ) + + +@router.get("/quality/", response_model=TokenQualitySummary) +async def get_token_quality( + since: datetime | None = None, + until: datetime | None = None, + session: AsyncSession = Depends(get_session), +) -> TokenQualitySummary: + result = await session.execute(_filter_query(select(TokenEvent), since=since, until=until)) + events = list(result.scalars().all()) + + by_measurement_kind: dict[str, int] = defaultdict(int) + by_source_provider: dict[str, int] = defaultdict(int) + source_counts: dict[tuple[str, str, str], int] = defaultdict(int) + last_codex_ingested_at = None + last_claude_ingested_at = None + + fallback_count = 0 + unattributed_measured_count = 0 + missing_provenance_count = 0 + for e in events: + by_measurement_kind[e.measurement_kind] += 1 + by_source_provider[e.source_provider] += 1 + if e.source_id: + source_counts[(e.measurement_kind, e.source_provider, e.source_id)] += 1 + if e.source_provider == "task_fallback" or e.note == "heuristic": + fallback_count += 1 + if e.measurement_kind == "measured" and not (e.repo_id or e.workstream_id or e.task_id): + unattributed_measured_count += 1 + if e.measurement_kind == "measured" and not e.source_id: + missing_provenance_count += 1 + if e.source_provider == "codex_session" and ( + last_codex_ingested_at is None or e.ingested_at > last_codex_ingested_at + ): + last_codex_ingested_at = e.ingested_at + if e.source_provider == "claude_transcript" and ( + last_claude_ingested_at is None or e.ingested_at > last_claude_ingested_at + ): + last_claude_ingested_at = e.ingested_at + + duplicate_source_count = sum(1 for count in source_counts.values() if count > 1) + return TokenQualitySummary( + event_count=len(events), + measured_event_count=by_measurement_kind.get("measured", 0), + estimated_event_count=by_measurement_kind.get("estimated", 0), + allocated_event_count=by_measurement_kind.get("allocated", 0), + superseded_event_count=by_measurement_kind.get("superseded", 0), + fallback_event_count=fallback_count, + unattributed_measured_event_count=unattributed_measured_count, + missing_provenance_event_count=missing_provenance_count, + duplicate_source_count=duplicate_source_count, + last_codex_ingested_at=last_codex_ingested_at, + last_claude_ingested_at=last_claude_ingested_at, + last_reconciliation_at=None, + by_measurement_kind=dict(by_measurement_kind), + by_source_provider=dict(by_source_provider), + ) + + @router.patch("/{event_id}", response_model=TokenEventRead) async def patch_token_event( event_id: uuid.UUID, @@ -175,7 +561,26 @@ async def patch_token_event( event = await session.get(TokenEvent, event_id) if event is None: raise HTTPException(status_code=404, detail="Token event not found") - for field, value in body.model_dump(exclude_none=True).items(): + data = body.model_dump(exclude_none=True) + if "note" in data or "measurement_kind" in data or "source_provider" in data: + merged = { + "tokens_in": data.get("tokens_in", event.tokens_in), + "tokens_out": data.get("tokens_out", event.tokens_out), + "note": data.get("note", event.note), + "agent": data.get("agent", event.agent), + "ref_id": data.get("ref_id", event.ref_id), + "session_id": data.get("session_id", event.session_id), + "measurement_kind": data.get("measurement_kind", event.measurement_kind), + "source_provider": data.get("source_provider", event.source_provider), + "source_id": data.get("source_id", event.source_id), + } + inferred = _apply_event_defaults({k: v for k, v in merged.items() if v is not None}) + data.setdefault("measurement_kind", inferred["measurement_kind"]) + data.setdefault("source_provider", inferred["source_provider"]) + data.setdefault("confidence", inferred["confidence"]) + if inferred.get("source_id"): + data.setdefault("source_id", inferred["source_id"]) + for field, value in data.items(): setattr(event, field, value) await session.commit() await session.refresh(event) @@ -203,26 +608,33 @@ async def list_token_events( model: str | None = None, agent: str | None = None, note: str | None = None, + measurement_kind: str | None = None, + source_provider: str | None = None, + since: datetime | None = None, + until: datetime | None = None, + include_superseded: bool = Query(True), + unattributed: bool = False, + offset: int = Query(0, ge=0), limit: int = Query(100, le=1000), session: AsyncSession = Depends(get_session), ) -> list[TokenEvent]: - q = select(TokenEvent) - if task_id: - q = q.where(TokenEvent.task_id == task_id) - if workstream_id: - q = q.where(TokenEvent.workstream_id == workstream_id) - if repo_id: - q = q.where(TokenEvent.repo_id == repo_id) - if ref_type: - q = q.where(TokenEvent.ref_type == ref_type) - if ref_id: - q = q.where(TokenEvent.ref_id == ref_id) - if model: - q = q.where(TokenEvent.model == model) - if agent: - q = q.where(TokenEvent.agent == agent) - if note: - q = q.where(TokenEvent.note == note) - q = q.order_by(TokenEvent.created_at.desc()).limit(limit) + q = _filter_query( + select(TokenEvent), + task_id=task_id, + workstream_id=workstream_id, + repo_id=repo_id, + ref_type=ref_type, + ref_id=ref_id, + model=model, + agent=agent, + note=note, + measurement_kind=measurement_kind, + source_provider=source_provider, + since=since, + until=until, + include_superseded=include_superseded, + unattributed=unattributed, + ) + q = q.order_by(TokenEvent.created_at.desc()).offset(offset).limit(limit) result = await session.execute(q) return list(result.scalars().all()) diff --git a/api/schemas/task.py b/api/schemas/task.py index 048dba5..2c9af32 100644 --- a/api/schemas/task.py +++ b/api/schemas/task.py @@ -43,6 +43,7 @@ class TaskUpdate(BaseModel): # 2. workplan_tokens_in + workplan_tokens_out → prorated across task count (note="workplan") # 3. neither provided, status=done → heuristic 1000/500 (note="heuristic") # token_note overrides the auto-assigned note for Tier 1 only (e.g. "userbased") + # suppress_token_event lets file/cache sync update status without recording usage. tokens_in: int | None = None tokens_out: int | None = None workplan_tokens_in: int | None = None @@ -51,6 +52,7 @@ class TaskUpdate(BaseModel): model: str | None = None agent: str | None = None session_id: str | None = None + suppress_token_event: bool | None = None @model_validator(mode="after") def blocking_reason_required_when_blocked(self) -> Self: diff --git a/api/schemas/token_event.py b/api/schemas/token_event.py index 60acbda..7c7ca9d 100644 --- a/api/schemas/token_event.py +++ b/api/schemas/token_event.py @@ -1,7 +1,8 @@ import uuid from datetime import datetime +from typing import Any -from pydantic import BaseModel, ConfigDict, computed_field +from pydantic import BaseModel, ConfigDict, Field, computed_field class TokenEventCreate(BaseModel): @@ -16,6 +17,19 @@ class TokenEventCreate(BaseModel): ref_type: str | None = None ref_id: str | None = None note: str | None = None + created_at: datetime | None = None + measurement_kind: str | None = None + source_provider: str | None = None + source_id: str | None = None + source_path: str | None = None + source_created_at: datetime | None = None + parser_version: str | None = None + confidence: float | None = None + cached_input_tokens: int | None = None + reasoning_output_tokens: int | None = None + raw_total_tokens: int | None = None + cost_estimated_usd: float | None = None + raw_metadata: dict[str, Any] | None = None class TokenEventRead(BaseModel): @@ -33,6 +47,19 @@ class TokenEventRead(BaseModel): ref_type: str | None = None ref_id: str | None = None note: str | None = None + measurement_kind: str + source_provider: str + source_id: str | None = None + source_path: str | None = None + source_created_at: datetime | None = None + ingested_at: datetime + parser_version: str | None = None + confidence: float + cached_input_tokens: int + reasoning_output_tokens: int + raw_total_tokens: int | None = None + cost_estimated_usd: float | None = None + raw_metadata: dict[str, Any] = Field(default_factory=dict) created_at: datetime @computed_field @@ -40,6 +67,11 @@ class TokenEventRead(BaseModel): def tokens_total(self) -> int: return self.tokens_in + self.tokens_out + @computed_field + @property + def token_evidence_total(self) -> int: + return (self.raw_total_tokens or self.tokens_in + self.tokens_out) + class TokenSummary(BaseModel): scope: str @@ -50,14 +82,36 @@ class TokenSummary(BaseModel): event_count: int by_model: dict[str, int] by_agent: dict[str, int] + by_measurement_kind: dict[str, int] = Field(default_factory=dict) + by_source_provider: dict[str, int] = Field(default_factory=dict) class TokenEventPatch(BaseModel): tokens_in: int | None = None tokens_out: int | None = None + task_id: uuid.UUID | None = None + workstream_id: uuid.UUID | None = None + repo_id: uuid.UUID | None = None + session_id: str | None = None note: str | None = None model: str | None = None agent: str | None = None + ref_type: str | None = None + ref_id: str | None = None + created_at: datetime | None = None + measurement_kind: str | None = None + source_provider: str | None = None + source_id: str | None = None + source_path: str | None = None + source_created_at: datetime | None = None + ingested_at: datetime | None = None + parser_version: str | None = None + confidence: float | None = None + cached_input_tokens: int | None = None + reasoning_output_tokens: int | None = None + raw_total_tokens: int | None = None + cost_estimated_usd: float | None = None + raw_metadata: dict[str, Any] | None = None class RepoTokenSummary(BaseModel): @@ -69,3 +123,49 @@ class RepoTokenSummary(BaseModel): event_count: int by_model: dict[str, int] by_note: dict[str, int] + by_measurement_kind: dict[str, int] = Field(default_factory=dict) + by_source_provider: dict[str, int] = Field(default_factory=dict) + + +class TokenAggregateRow(BaseModel): + scope_id: str + label: str | None = None + tokens_in: int + tokens_out: int + tokens_total: int + event_count: int + by_measurement_kind: dict[str, int] = Field(default_factory=dict) + by_source_provider: dict[str, int] = Field(default_factory=dict) + + +class TokenAggregateSummary(BaseModel): + tokens_in: int + tokens_out: int + tokens_total: int + event_count: int + first_event_at: datetime | None = None + last_event_at: datetime | None = None + last_ingested_at: datetime | None = None + by_repo: list[TokenAggregateRow] = Field(default_factory=list) + by_workstream: list[TokenAggregateRow] = Field(default_factory=list) + by_task: list[TokenAggregateRow] = Field(default_factory=list) + by_model: list[TokenAggregateRow] = Field(default_factory=list) + by_measurement_kind: dict[str, int] = Field(default_factory=dict) + by_source_provider: dict[str, int] = Field(default_factory=dict) + + +class TokenQualitySummary(BaseModel): + event_count: int + measured_event_count: int + estimated_event_count: int + allocated_event_count: int + superseded_event_count: int + fallback_event_count: int + unattributed_measured_event_count: int + missing_provenance_event_count: int + duplicate_source_count: int + last_codex_ingested_at: datetime | None = None + last_claude_ingested_at: datetime | None = None + last_reconciliation_at: datetime | None = None + by_measurement_kind: dict[str, int] = Field(default_factory=dict) + by_source_provider: dict[str, int] = Field(default_factory=dict) diff --git a/api/services/token_sources/__init__.py b/api/services/token_sources/__init__.py new file mode 100644 index 0000000..f0e95f5 --- /dev/null +++ b/api/services/token_sources/__init__.py @@ -0,0 +1,16 @@ +"""Token source adapters for measured agent usage.""" + +from api.services.token_sources.base import TokenSourceRecord, parse_iso +from api.services.token_sources.codex import collect_codex_sessions, iter_codex_session_files, parse_codex_session +from api.services.token_sources.claude import collect_claude_transcripts, iter_claude_transcript_files, parse_claude_transcript + +__all__ = [ + "TokenSourceRecord", + "parse_iso", + "collect_codex_sessions", + "iter_codex_session_files", + "parse_codex_session", + "collect_claude_transcripts", + "iter_claude_transcript_files", + "parse_claude_transcript", +] diff --git a/api/services/token_sources/attribution.py b/api/services/token_sources/attribution.py new file mode 100644 index 0000000..2a9eec4 --- /dev/null +++ b/api/services/token_sources/attribution.py @@ -0,0 +1,171 @@ +from __future__ import annotations + +import subprocess +from dataclasses import dataclass +from pathlib import Path +from typing import Any + + +@dataclass(frozen=True) +class RepoRef: + repo_id: str + slug: str + local_path: str | None = None + host_paths: dict[str, Any] | None = None + remote_url: str | None = None + git_fingerprint: str | None = None + + +@dataclass(frozen=True) +class RepoMatch: + repo_id: str + slug: str + method: str + confidence: float + + +def normalise_cwd(raw: str | None) -> str | None: + if not raw: + return None + value = raw.replace("\\", "/") + prefixes = ( + "//wsl.localhost/Ubuntu-24.04", + "//wsl$/Ubuntu-24.04", + ) + for prefix in prefixes: + if value.startswith(prefix): + return value[len(prefix):] or "/" + if len(value) >= 3 and value[1:3] == ":/": + drive = value[0].lower() + return f"/mnt/{drive}{value[2:]}" + return value + + +def normalise_remote_url(raw: str | None) -> str | None: + if not raw: + return None + value = raw.strip() + if value.endswith(".git"): + value = value[:-4] + if value.startswith("git@") and ":" in value: + host, path = value[4:].split(":", 1) + value = f"ssh://{host}/{path}" + return value.lower().rstrip("/") + + +def repo_refs_from_api(repos: list[dict[str, Any]]) -> list[RepoRef]: + refs = [] + for repo in repos: + repo_id = repo.get("id") + slug = repo.get("slug") + if not repo_id or not slug: + continue + refs.append( + RepoRef( + repo_id=str(repo_id), + slug=str(slug), + local_path=repo.get("local_path"), + host_paths=repo.get("host_paths") if isinstance(repo.get("host_paths"), dict) else {}, + remote_url=repo.get("remote_url"), + git_fingerprint=repo.get("git_fingerprint"), + ) + ) + return refs + + +def _git(cwd: str, *args: str) -> str | None: + try: + result = subprocess.run( + ["git", *args], + cwd=cwd, + check=False, + capture_output=True, + text=True, + timeout=5, + ) + except (OSError, subprocess.SubprocessError): + return None + if result.returncode != 0: + return None + value = result.stdout.strip().splitlines() + return value[0] if value else None + + +def git_fingerprint_for_path(cwd: str | None) -> str | None: + path = normalise_cwd(cwd) + if not path or not Path(path).exists(): + return None + root = _git(path, "rev-parse", "--show-toplevel") + if not root: + return None + return _git(root, "rev-list", "--max-parents=0", "HEAD") + + +def git_remote_for_path(cwd: str | None) -> str | None: + path = normalise_cwd(cwd) + if not path or not Path(path).exists(): + return None + root = _git(path, "rev-parse", "--show-toplevel") + if not root: + return None + return _git(root, "remote", "get-url", "origin") + + +def _repo_paths(repo: RepoRef) -> list[str]: + paths = [repo.local_path] + if repo.host_paths: + paths.extend(str(v) for v in repo.host_paths.values() if v) + result = [] + for raw in paths: + path = normalise_cwd(str(raw)) if raw and raw != "(unknown)" else None + if path: + result.append(path.rstrip("/")) + return result + + +def resolve_repo(cwd: str | None, repos: list[RepoRef]) -> RepoMatch | None: + path = normalise_cwd(cwd) + fingerprint = git_fingerprint_for_path(path) + remote = normalise_remote_url(git_remote_for_path(path)) + + if fingerprint: + candidates = [repo for repo in repos if repo.git_fingerprint == fingerprint] + if len(candidates) == 1: + repo = candidates[0] + return RepoMatch(repo.repo_id, repo.slug, "git_fingerprint", 0.98) + if remote: + remote_candidates = [ + repo for repo in candidates + if normalise_remote_url(repo.remote_url) == remote + ] + if len(remote_candidates) == 1: + repo = remote_candidates[0] + return RepoMatch(repo.repo_id, repo.slug, "git_fingerprint_remote", 0.99) + + if remote: + candidates = [repo for repo in repos if normalise_remote_url(repo.remote_url) == remote] + if len(candidates) == 1: + repo = candidates[0] + return RepoMatch(repo.repo_id, repo.slug, "remote_url", 0.90) + + if not path: + return None + + path_matches: list[tuple[str, RepoRef]] = [] + for repo in repos: + for repo_path in _repo_paths(repo): + if path == repo_path or path.startswith(f"{repo_path}/"): + path_matches.append((repo_path, repo)) + if not path_matches: + return None + path_matches.sort(key=lambda item: len(item[0]), reverse=True) + exact = [item for item in path_matches if path == item[0]] + if exact: + basename = Path(path).name + for _, repo in exact: + if repo.slug == basename: + return RepoMatch(repo.repo_id, repo.slug, "path_exact_slug", 0.85) + repo = exact[0][1] + return RepoMatch(repo.repo_id, repo.slug, "path_exact", 0.80) + repo = path_matches[0][1] + return RepoMatch(repo.repo_id, repo.slug, "path_prefix", 0.75) diff --git a/api/services/token_sources/base.py b/api/services/token_sources/base.py new file mode 100644 index 0000000..77a1392 --- /dev/null +++ b/api/services/token_sources/base.py @@ -0,0 +1,71 @@ +from __future__ import annotations + +from dataclasses import dataclass, field +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + + +def parse_iso(value: str) -> datetime: + raw = value.strip() + if raw.endswith("Z"): + raw = raw[:-1] + "+00:00" + if "T" not in raw: + raw = f"{raw}T00:00:00+00:00" + parsed = datetime.fromisoformat(raw) + if parsed.tzinfo is None: + parsed = parsed.replace(tzinfo=timezone.utc) + return parsed.astimezone(timezone.utc) + + +@dataclass +class TokenSourceRecord: + source_provider: str + source_id: str + source_path: Path + source_created_at: datetime | None + session_id: str | None = None + cwd: str | None = None + model: str | None = None + agent: str | None = None + tokens_in: int = 0 + tokens_out: int = 0 + cached_input_tokens: int = 0 + reasoning_output_tokens: int = 0 + raw_total_tokens: int | None = None + parser_version: str | None = None + confidence: float = 1.0 + raw_metadata: dict[str, Any] = field(default_factory=dict) + + @property + def tokens_total(self) -> int: + return self.tokens_in + self.tokens_out + + def to_token_event_payload(self, repo_id: str | None = None) -> dict[str, Any]: + raw_total = self.raw_total_tokens + if raw_total is None: + raw_total = self.tokens_in + self.tokens_out + created_at = self.source_created_at.isoformat() if self.source_created_at else None + return { + "tokens_in": self.tokens_in, + "tokens_out": self.tokens_out, + "repo_id": repo_id, + "session_id": self.session_id, + "model": self.model, + "agent": self.agent, + "ref_type": "session", + "ref_id": self.source_id, + "note": f"measured:{self.source_provider}", + "created_at": created_at, + "measurement_kind": "measured", + "source_provider": self.source_provider, + "source_id": self.source_id, + "source_path": str(self.source_path), + "source_created_at": created_at, + "parser_version": self.parser_version, + "confidence": self.confidence, + "cached_input_tokens": self.cached_input_tokens, + "reasoning_output_tokens": self.reasoning_output_tokens, + "raw_total_tokens": raw_total, + "raw_metadata": self.raw_metadata, + } diff --git a/api/services/token_sources/claude.py b/api/services/token_sources/claude.py new file mode 100644 index 0000000..e97acc2 --- /dev/null +++ b/api/services/token_sources/claude.py @@ -0,0 +1,120 @@ +from __future__ import annotations + +import json +from datetime import datetime +from pathlib import Path +from typing import Any + +from api.services.token_sources.base import TokenSourceRecord, parse_iso + +PARSER_VERSION = "claude-transcript-v1" + + +def iter_claude_transcript_files(claude_home: Path) -> list[Path]: + projects = claude_home / "projects" + if not projects.is_dir(): + return [] + return sorted(projects.glob("**/*.jsonl")) + + +def _usage_from_entry(entry: dict[str, Any]) -> dict[str, Any]: + message = entry.get("message") + if isinstance(message, dict) and isinstance(message.get("usage"), dict): + return message["usage"] + usage = entry.get("usage") + return usage if isinstance(usage, dict) else {} + + +def parse_claude_transcript(path: Path, since: datetime) -> TokenSourceRecord | None: + session_id = path.stem + cwd: str | None = None + model: str | None = None + first_at: datetime | None = None + last_at: datetime | None = None + tokens_in = tokens_out = 0 + cached_input_tokens = 0 + raw_total_tokens = 0 + usage_records = 0 + malformed_lines = 0 + + try: + handle = path.open("r", encoding="utf-8", errors="ignore") + except OSError: + return None + + with handle: + for line in handle: + try: + entry: dict[str, Any] = json.loads(line) + except json.JSONDecodeError: + malformed_lines += 1 + continue + + ts = entry.get("timestamp") or entry.get("created_at") + parsed_ts = parse_iso(ts) if isinstance(ts, str) else None + if parsed_ts: + first_at = first_at or parsed_ts + last_at = parsed_ts + + session_id = str(entry.get("session_id") or entry.get("conversation_id") or session_id) + cwd = entry.get("cwd") or entry.get("project_cwd") or cwd + model = entry.get("model") or model + message = entry.get("message") + if isinstance(message, dict): + model = message.get("model") or model + + usage = _usage_from_entry(entry) + if not usage: + continue + if parsed_ts is not None and parsed_ts < since: + continue + + input_tokens = int(usage.get("input_tokens") or 0) + cache_creation = int(usage.get("cache_creation_input_tokens") or 0) + cache_read = int(usage.get("cache_read_input_tokens") or 0) + output_tokens = int(usage.get("output_tokens") or 0) + if input_tokens == 0 and output_tokens == 0 and cache_creation == 0 and cache_read == 0: + continue + tokens_in += input_tokens + tokens_out += output_tokens + cached_input_tokens += cache_creation + cache_read + raw_total_tokens += input_tokens + cache_creation + cache_read + output_tokens + usage_records += 1 + + if usage_records == 0 or tokens_in + tokens_out + cached_input_tokens == 0: + return None + + return TokenSourceRecord( + source_provider="claude_transcript", + source_id=f"claude:{session_id}", + source_path=path, + source_created_at=last_at, + session_id=session_id, + cwd=cwd, + model=model, + agent="claude", + tokens_in=tokens_in, + tokens_out=tokens_out, + cached_input_tokens=cached_input_tokens, + raw_total_tokens=raw_total_tokens or None, + parser_version=PARSER_VERSION, + confidence=1.0, + raw_metadata={ + "started_at": first_at.isoformat() if first_at else None, + "usage_records": usage_records, + "malformed_lines": malformed_lines, + "source_file_name": path.name, + }, + ) + + +def collect_claude_transcripts(claude_home: Path, since: datetime) -> list[TokenSourceRecord]: + by_id: dict[str, TokenSourceRecord] = {} + for path in iter_claude_transcript_files(claude_home): + parsed = parse_claude_transcript(path, since) + if parsed is None: + continue + current = by_id.get(parsed.source_id) + if current is None or parsed.tokens_total > current.tokens_total: + by_id[parsed.source_id] = parsed + return sorted(by_id.values(), key=lambda item: item.source_created_at or datetime.min.replace(tzinfo=since.tzinfo)) diff --git a/api/services/token_sources/codex.py b/api/services/token_sources/codex.py new file mode 100644 index 0000000..991f7a2 --- /dev/null +++ b/api/services/token_sources/codex.py @@ -0,0 +1,124 @@ +from __future__ import annotations + +import json +from datetime import datetime +from pathlib import Path +from typing import Any + +from api.services.token_sources.base import TokenSourceRecord, parse_iso + +PARSER_VERSION = "codex-desktop-v1" + + +def iter_codex_session_files(codex_home: Path) -> list[Path]: + files: list[Path] = [] + sessions = codex_home / "sessions" + archived = codex_home / "archived_sessions" + if sessions.is_dir(): + files.extend(sorted(sessions.glob("*/*/*/*.jsonl"))) + if archived.is_dir(): + files.extend(sorted(archived.glob("*.jsonl"))) + return files + + +def parse_codex_session(path: Path, since: datetime) -> TokenSourceRecord | None: + fallback_id = path.stem.removeprefix("rollout-") + session_id = fallback_id + started_at: datetime | None = None + last_at: datetime | None = None + cwd: str | None = None + model: str | None = None + tokens_in = tokens_out = 0 + cached_input_tokens = reasoning_output_tokens = 0 + raw_total_tokens = 0 + usage_records = 0 + malformed_lines = 0 + + try: + handle = path.open("r", encoding="utf-8", errors="ignore") + except OSError: + return None + + with handle: + for line in handle: + try: + entry: dict[str, Any] = json.loads(line) + except json.JSONDecodeError: + malformed_lines += 1 + continue + + ts = entry.get("timestamp") + parsed_ts = parse_iso(ts) if isinstance(ts, str) else None + if parsed_ts: + last_at = parsed_ts + started_at = started_at or parsed_ts + + payload = entry.get("payload") or {} + if entry.get("type") == "session_meta": + meta_id = payload.get("id") + if meta_id: + session_id = str(meta_id) + cwd = payload.get("cwd") or cwd + meta_ts = payload.get("timestamp") + if isinstance(meta_ts, str): + started_at = parse_iso(meta_ts) + elif entry.get("type") == "turn_context": + cwd = payload.get("cwd") or cwd + model = payload.get("model") or model + elif entry.get("type") == "event_msg" and payload.get("type") == "token_count": + if parsed_ts is None or parsed_ts < since: + continue + info = payload.get("info") or {} + last = info.get("last_token_usage") or {} + if not isinstance(last, dict): + continue + input_tokens = int(last.get("input_tokens") or 0) + output_tokens = int(last.get("output_tokens") or 0) + if input_tokens == 0 and output_tokens == 0: + continue + tokens_in += input_tokens + tokens_out += output_tokens + cached_input_tokens += int(last.get("cached_input_tokens") or 0) + reasoning_output_tokens += int(last.get("reasoning_output_tokens") or 0) + raw_total_tokens += int(last.get("total_tokens") or input_tokens + output_tokens) + usage_records += 1 + last_at = parsed_ts + + if usage_records == 0 or tokens_in + tokens_out == 0: + return None + + return TokenSourceRecord( + source_provider="codex_session", + source_id=f"codex:{session_id}", + source_path=path, + source_created_at=last_at, + session_id=session_id, + cwd=cwd, + model=model, + agent="codex", + tokens_in=tokens_in, + tokens_out=tokens_out, + cached_input_tokens=cached_input_tokens, + reasoning_output_tokens=reasoning_output_tokens, + raw_total_tokens=raw_total_tokens or None, + parser_version=PARSER_VERSION, + confidence=1.0, + raw_metadata={ + "started_at": started_at.isoformat() if started_at else None, + "usage_records": usage_records, + "malformed_lines": malformed_lines, + "source_file_name": path.name, + }, + ) + + +def collect_codex_sessions(codex_home: Path, since: datetime) -> list[TokenSourceRecord]: + by_id: dict[str, TokenSourceRecord] = {} + for path in iter_codex_session_files(codex_home): + parsed = parse_codex_session(path, since) + if parsed is None: + continue + current = by_id.get(parsed.source_id) + if current is None or parsed.tokens_total > current.tokens_total: + by_id[parsed.source_id] = parsed + return sorted(by_id.values(), key=lambda item: item.source_created_at or datetime.min.replace(tzinfo=since.tzinfo)) diff --git a/dashboard/src/token-cost.md b/dashboard/src/token-cost.md index 1649e4d..4d9923e 100644 --- a/dashboard/src/token-cost.md +++ b/dashboard/src/token-cost.md @@ -9,79 +9,54 @@ const POLL = 60_000; ``` ```js -// Fetch token events, by-repo summary, workstreams, and tasks in parallel +const evidenceSel = Inputs.radio( + ["Measured only", "Active evidence", "All evidence"], + {value: "Measured only", label: "Evidence"} +); +const sortSel = Inputs.select( + ["Tokens Total", "Event Count"], + {label: "Sort by"} +); +const maxSel = Inputs.select( + [10, 20, 50, 100, 500], + {value: 20, label: "Show"} +); +display(html`
${evidenceSel}${sortSel}${maxSel}
`); +const evidenceMode = view(evidenceSel); +const sortOrder = view(sortSel); +const maxResults = view(maxSel); +``` + +```js +function aggregatePath(mode) { + if (mode === "Measured only") return "/token-events/aggregate/?measurement_kind=measured&include_superseded=false"; + if (mode === "All evidence") return "/token-events/aggregate/?include_superseded=true"; + return "/token-events/aggregate/?include_superseded=false"; +} + const tokenState = (async function*() { let failures = 0; while (true) { - let byRepo = [], events = [], wsMap = {}, taskMap = {}, ok = false; + let aggregate = null, quality = null, ok = false; try { - const [r1, r2, r3, r4] = await Promise.all([ - apiFetch("/token-events/by-repo/"), - apiFetch("/token-events/?limit=1000"), - apiFetch("/workstreams/"), - apiFetch("/tasks/"), + const [r1, r2] = await Promise.all([ + apiFetch(aggregatePath(evidenceMode)), + apiFetch("/token-events/quality/"), ]); ok = r1.ok && r2.ok; if (ok) { - byRepo = await r1.json(); - events = await r2.json(); - } - if (r3.ok) { - const wsList = await r3.json(); - for (const w of wsList) wsMap[w.id] = w; - } - if (r4.ok) { - const taskList = await r4.json(); - for (const t of taskList) taskMap[t.id] = t; + aggregate = await r1.json(); + quality = await r2.json(); } } catch {} failures = ok ? 0 : failures + 1; - yield {byRepo, events, wsMap, taskMap, ok, ts: new Date()}; + yield {aggregate, quality, ok, ts: new Date()}; await waitForVisible(pollDelay({ok, base: POLL, failures})); } })(); ``` ```js -// Resolve an event's repo_id via the 3-level chain: direct → workstream → task→workstream -function resolveRepoId(e, wsMap, taskMap) { - if (e.repo_id) return e.repo_id; - const wsId = e.workstream_id ?? taskMap[e.task_id]?.workstream_id; - return wsId ? (wsMap[wsId]?.repo_id ?? null) : null; -} - -function buildSummary(events) { - const byWs = {}, byModel = {}, byTask = {}; - for (const e of events) { - const tot = (e.tokens_in || 0) + (e.tokens_out || 0); - if (e.workstream_id) { - byWs[e.workstream_id] = byWs[e.workstream_id] || {scope_id: e.workstream_id, tokens_in: 0, tokens_out: 0, event_count: 0}; - byWs[e.workstream_id].tokens_in += e.tokens_in || 0; - byWs[e.workstream_id].tokens_out += e.tokens_out || 0; - byWs[e.workstream_id].event_count++; - } - const model = e.model || "unknown"; - byModel[model] = (byModel[model] || 0) + tot; - if (e.task_id) { - byTask[e.task_id] = byTask[e.task_id] || {task_id: e.task_id, tokens_in: 0, tokens_out: 0, event_count: 0}; - byTask[e.task_id].tokens_in += e.tokens_in || 0; - byTask[e.task_id].tokens_out += e.tokens_out || 0; - byTask[e.task_id].event_count++; - } - } - const toRows = obj => Object.values(obj) - .map(v => ({...v, tokens_total: (v.tokens_in || 0) + (v.tokens_out || 0)})) - .sort((a, b) => b.tokens_total - a.tokens_total); - return { - by_workstream: toRows(byWs), - by_model: Object.entries(byModel) - .map(([model, tokens_total]) => ({model, tokens_total})) - .sort((a, b) => b.tokens_total - a.tokens_total), - top_tasks: toRows(byTask), - total_events: events.length, - }; -} - function nameCell(name, fullName) { const s = String(name ?? fullName ?? "—"); const full = String(fullName ?? name ?? "—"); @@ -92,21 +67,40 @@ function nameCell(name, fullName) { } function sortRows(rows, sortField) { - if (sortField === "Tokens Total") return rows; // already sorted by buildSummary / by-repo API const s = [...rows]; - if (sortField === "Tokens In") s.sort((a, b) => (b.tokens_in || 0) - (a.tokens_in || 0)); - else if (sortField === "Tokens Out") s.sort((a, b) => (b.tokens_out || 0) - (a.tokens_out || 0)); - else if (sortField === "Event Count") s.sort((a, b) => (b.event_count || 0) - (a.event_count || 0)); - else if (sortField === "Most Recent") s.sort((a, b) => (b._lastAt || 0) - (a._lastAt || 0)); + if (sortField === "Event Count") s.sort((a, b) => (b.event_count || 0) - (a.event_count || 0)); + else s.sort((a, b) => (b.tokens_total || 0) - (a.tokens_total || 0)); return s; } + +function dictRows(obj, labelKey) { + return Object.entries(obj ?? {}) + .map(([label, tokens_total]) => ({[labelKey]: label, tokens_total})) + .sort((a, b) => b.tokens_total - a.tokens_total); +} + +function metricRows(quality) { + if (!quality) return []; + return [ + {metric: "Measured", value: quality.measured_event_count}, + {metric: "Allocated", value: quality.allocated_event_count}, + {metric: "Estimated", value: quality.estimated_event_count}, + {metric: "Superseded", value: quality.superseded_event_count}, + {metric: "Fallback", value: quality.fallback_event_count}, + {metric: "Unattributed measured", value: quality.unattributed_measured_event_count}, + {metric: "Missing provenance", value: quality.missing_provenance_event_count}, + {metric: "Duplicate sources", value: quality.duplicate_source_count}, + ]; +} ``` ```js -const byRepo = tokenState.byRepo ?? []; -const events = tokenState.events ?? []; -const wsMap = tokenState.wsMap ?? {}; -const taskMap = tokenState.taskMap ?? {}; +const aggregate = tokenState.aggregate ?? { + tokens_in: 0, tokens_out: 0, tokens_total: 0, event_count: 0, + by_repo: [], by_workstream: [], by_task: [], by_model: [], + by_measurement_kind: {}, by_source_provider: {}, +}; +const quality = tokenState.quality ?? null; const _ok = tokenState.ok ?? false; const _ts = tokenState.ts; ``` @@ -115,66 +109,37 @@ const _ts = tokenState.ts; ```js display(html`
- ● ${_ok ? `Live · ${_ts?.toLocaleTimeString()} · ${events.length} events` : "API offline"} + ● ${_ok ? `Live · ${_ts?.toLocaleTimeString()} · ${aggregate.event_count.toLocaleString()} events · ${aggregate.tokens_total.toLocaleString()} tokens` : "API offline"}
`); ``` ```js -const repoSel = Inputs.select( - ["All repos", ...byRepo.map(r => r.repo_slug)], - {label: "Filter by repo"} -); -const sortSel = Inputs.select( - ["Tokens Total", "Tokens In", "Tokens Out", "Event Count", "Most Recent"], - {label: "Sort by"} -); -const maxSel = Inputs.select( - [10, 20, 50, 100, 500], - {value: 20, label: "Show"} -); -display(html`
${repoSel}${sortSel}${maxSel}
`); -const repoFilter = view(repoSel); -const sortOrder = view(sortSel); -const maxResults = view(maxSel); -``` - -```js -// Build filtered and last-event-annotated row sets -const selectedRepoId = repoFilter === "All repos" - ? null - : (byRepo.find(r => r.repo_slug === repoFilter)?.repo_id ?? null); - -const filteredEvents = selectedRepoId - ? events.filter(e => resolveRepoId(e, wsMap, taskMap) === selectedRepoId) - : events; - -const lastAtByRepo = {}, lastAtByWs = {}, lastAtByTask = {}; -for (const e of filteredEvents) { - const t = e.created_at ? new Date(e.created_at).getTime() : 0; - const rid = resolveRepoId(e, wsMap, taskMap); - if (rid) lastAtByRepo[rid] = Math.max(lastAtByRepo[rid] || 0, t); - if (e.workstream_id) lastAtByWs[e.workstream_id] = Math.max(lastAtByWs[e.workstream_id] || 0, t); - if (e.task_id) lastAtByTask[e.task_id] = Math.max(lastAtByTask[e.task_id] || 0, t); -} - -const filteredByRepo = (selectedRepoId - ? byRepo.filter(r => r.repo_id === selectedRepoId) - : byRepo -).map(r => ({...r, _lastAt: lastAtByRepo[r.repo_id] || 0})); - -const summary = buildSummary(filteredEvents); -const wsRowsFull = summary.by_workstream.map(r => ({...r, _lastAt: lastAtByWs[r.scope_id] || 0})); -const taskRowsFull = summary.top_tasks.map(r => ({...r, _lastAt: lastAtByTask[r.task_id] || 0})); +display(html`
+
+
Tokens
+
${aggregate.tokens_total.toLocaleString()}
+
+
+
Events
+
${aggregate.event_count.toLocaleString()}
+
+
+
Last Event
+
${aggregate.last_event_at ? new Date(aggregate.last_event_at).toLocaleString() : "—"}
+
+
+
Last Ingested
+
${aggregate.last_ingested_at ? new Date(aggregate.last_ingested_at).toLocaleString() : "—"}
+
+
`); ``` ## By Repo ```js { - const sorted = sortRows(filteredByRepo, sortOrder); - const total = sorted.length; - const rows = sorted.slice(0, maxResults); - + const sorted = sortRows(aggregate.by_repo ?? [], sortOrder); + const rows = sorted.slice(0, maxResults); if (rows.length === 0) { display(html`

No token events with repo association yet.

`); } else { @@ -184,40 +149,20 @@ const taskRowsFull = summary.top_tasks.map(r => ({...r, _lastAt: lastAtByTask width: Math.min(900, width), x: {label: "Tokens", tickFormat: "~s"}, y: {label: null}, - color: {legend: true, domain: ["tokens_in", "tokens_out"], range: ["#4e79a7","#f28e2b"]}, - marks: [ - Plot.barX( - rows.flatMap(r => [ - {repo: r.repo_slug, type: "tokens_in", value: r.tokens_in}, - {repo: r.repo_slug, type: "tokens_out", value: r.tokens_out}, - ]), - {x: "value", y: "repo", fill: "type", tip: true} - ), - ], + marks: [Plot.barX(rows, {x: "tokens_total", y: "label", fill: "#4e79a7", tip: true})], })); - display(Inputs.table(rows.map((r, i) => ({...r, _ref: i})), { - columns: ["_ref", "repo_slug", "tokens_in", "tokens_out", "tokens_total", "event_count"], - header: { - _ref: "REF", - repo_slug: "Repo", - tokens_in: "Tokens In", - tokens_out: "Tokens Out", - tokens_total: "Total", - event_count: "Events", - }, + columns: ["_ref", "label", "tokens_in", "tokens_out", "tokens_total", "event_count"], + header: {_ref: "REF", label: "Repo", tokens_in: "Tokens In", tokens_out: "Tokens Out", tokens_total: "Total", event_count: "Events"}, format: { - _ref: (_, i) => refCell(i + 1, "repos", rows[i].repo_slug), - repo_slug: d => nameCell(d, d), - tokens_in: d => d.toLocaleString(), - tokens_out: d => d.toLocaleString(), + _ref: (_, i) => refCell(i + 1, "repos", rows[i].label), + label: d => nameCell(d, d), + tokens_in: d => d.toLocaleString(), + tokens_out: d => d.toLocaleString(), tokens_total: d => d.toLocaleString(), }, - width: {_ref: 50, repo_slug: 160, tokens_in: 110, tokens_out: 110, tokens_total: 110, event_count: 80}, + width: {_ref: 50, label: 160, tokens_in: 110, tokens_out: 110, tokens_total: 110, event_count: 80}, })); - - if (total > maxResults) - display(html`

Showing ${maxResults} of ${total} repos

`); } } ``` @@ -226,38 +171,48 @@ const taskRowsFull = summary.top_tasks.map(r => ({...r, _lastAt: lastAtByTask ```js { - const sorted = sortRows(wsRowsFull, sortOrder); - const total = sorted.length; - const rows = sorted.slice(0, maxResults); - + const sorted = sortRows(aggregate.by_workstream ?? [], sortOrder); + const rows = sorted.slice(0, maxResults); if (rows.length === 0) { display(html`

No workstream data yet.

`); } else { display(Inputs.table(rows.map((r, i) => ({...r, _ref: i})), { - columns: ["_ref", "scope_id", "tokens_in", "tokens_out", "tokens_total", "event_count"], - header: { - _ref: "REF", - scope_id: "Workstream", - tokens_in: "Tokens In", - tokens_out: "Tokens Out", - tokens_total: "Total", - event_count: "Events", - }, + columns: ["_ref", "label", "tokens_in", "tokens_out", "tokens_total", "event_count"], + header: {_ref: "REF", label: "Workstream", tokens_in: "Tokens In", tokens_out: "Tokens Out", tokens_total: "Total", event_count: "Events"}, format: { - _ref: (_, i) => refCell(i + 1, "workstreams", rows[i].scope_id), - scope_id: d => { - const ws = wsMap[d]; - return nameCell(ws?.title ?? ws?.slug, d); - }, - tokens_in: d => d.toLocaleString(), - tokens_out: d => d.toLocaleString(), + _ref: (_, i) => refCell(i + 1, "workstreams", rows[i].scope_id), + label: d => nameCell(d, d), + tokens_in: d => d.toLocaleString(), + tokens_out: d => d.toLocaleString(), tokens_total: d => d.toLocaleString(), }, - width: {_ref: 50, scope_id: 200, tokens_in: 110, tokens_out: 110, tokens_total: 110, event_count: 80}, + width: {_ref: 50, label: 240, tokens_in: 110, tokens_out: 110, tokens_total: 110, event_count: 80}, })); + } +} +``` - if (total > maxResults) - display(html`

Showing ${maxResults} of ${total} workstreams

`); +## By Evidence + +```js +{ + const kindRows = dictRows(aggregate.by_measurement_kind, "kind"); + const sourceRows = dictRows(aggregate.by_source_provider, "source"); + if (kindRows.length === 0 && sourceRows.length === 0) { + display(html`

No evidence breakdown yet.

`); + } else { + display(html`
+
${Inputs.table(kindRows, { + columns: ["kind", "tokens_total"], + header: {kind: "Kind", tokens_total: "Tokens"}, + format: {tokens_total: d => d.toLocaleString()}, + })}
+
${Inputs.table(sourceRows, { + columns: ["source", "tokens_total"], + header: {source: "Source", tokens_total: "Tokens"}, + format: {tokens_total: d => d.toLocaleString()}, + })}
+
`); } } ``` @@ -265,18 +220,38 @@ const taskRowsFull = summary.top_tasks.map(r => ({...r, _lastAt: lastAtByTask ## By Model ```js -if (summary.by_model.length === 0) { - display(html`

No model data yet.

`); +{ + const rows = (aggregate.by_model ?? []).slice(0, maxResults); + if (rows.length === 0) { + display(html`

No model data yet.

`); + } else { + display(Plot.plot({ + title: "Token consumption by model", + marginLeft: 200, + width: Math.min(700, width), + x: {label: "Total tokens", tickFormat: "~s"}, + marks: [Plot.barX(rows, {x: "tokens_total", y: "label", fill: "#59a14f", tip: true})], + })); + } +} +``` + +## Data Quality + +```js +if (!quality) { + display(html`

No quality data yet.

`); } else { - display(Plot.plot({ - title: "Token consumption by model", - marginLeft: 200, - width: Math.min(700, width), - x: {label: "Total tokens", tickFormat: "~s"}, - marks: [ - Plot.barX(summary.by_model, {x: "tokens_total", y: "model", fill: "#4e79a7", tip: true}), - ], + display(Inputs.table(metricRows(quality), { + columns: ["metric", "value"], + header: {metric: "Signal", value: "Count"}, + format: {value: d => d.toLocaleString()}, })); + display(html`

+ Codex: ${quality.last_codex_ingested_at ? new Date(quality.last_codex_ingested_at).toLocaleString() : "—"} +  ·  Claude: ${quality.last_claude_ingested_at ? new Date(quality.last_claude_ingested_at).toLocaleString() : "—"} +  ·  Reconcile: ${quality.last_reconciliation_at ? new Date(quality.last_reconciliation_at).toLocaleString() : "—"} +

`); } ``` @@ -284,31 +259,23 @@ if (summary.by_model.length === 0) { ```js { - const sorted = sortRows(taskRowsFull, sortOrder); - const total = sorted.length; - const rows = sorted.slice(0, maxResults); - + const sorted = sortRows(aggregate.by_task ?? [], sortOrder); + const rows = sorted.slice(0, maxResults); if (rows.length === 0) { display(html`

No task-level data yet.

`); } else { display(Inputs.table(rows.map((r, i) => ({...r, _ref: i})), { - columns: ["_ref", "task_id", "tokens_in", "tokens_out", "tokens_total"], - header: {_ref: "REF", task_id: "Task", tokens_in: "In", tokens_out: "Out", tokens_total: "Total"}, + columns: ["_ref", "label", "tokens_in", "tokens_out", "tokens_total"], + header: {_ref: "REF", label: "Task", tokens_in: "In", tokens_out: "Out", tokens_total: "Total"}, format: { - _ref: (_, i) => refCell(i + 1, "tasks", rows[i].task_id), - task_id: d => { - const task = taskMap[d]; - return nameCell(task?.title, d); - }, - tokens_in: d => d.toLocaleString(), - tokens_out: d => d.toLocaleString(), + _ref: (_, i) => refCell(i + 1, "tasks", rows[i].scope_id), + label: d => nameCell(d, d), + tokens_in: d => d.toLocaleString(), + tokens_out: d => d.toLocaleString(), tokens_total: d => d.toLocaleString(), }, - width: {_ref: 50, task_id: 240}, + width: {_ref: 50, label: 260}, })); - - if (total > maxResults) - display(html`

Showing ${maxResults} of ${total} tasks

`); } } ``` diff --git a/dashboard/src/token-events/[id].md b/dashboard/src/token-events/[id].md index 770d3fd..d4c3ae0 100644 --- a/dashboard/src/token-events/[id].md +++ b/dashboard/src/token-events/[id].md @@ -23,10 +23,14 @@ if (raw.error) { display(html`

← Token Cost

`); const FIELD_ORDER = [ - "id","tokens_in","tokens_out","tokens_total", + "id","measurement_kind","source_provider","source_id", + "tokens_in","tokens_out","tokens_total","token_evidence_total", + "cached_input_tokens","reasoning_output_tokens","raw_total_tokens", "note","model","agent","session_id", "task_id","workstream_id","repo_id", - "ref_type","ref_id","created_at", + "ref_type","ref_id","source_path","source_created_at", + "parser_version","confidence","ingested_at","created_at", + "raw_metadata", ]; const rows = FIELD_ORDER.map(k => fieldRow(k, raw[k] ?? null)); diff --git a/docs/multi-user-access-model.md b/docs/multi-user-access-model.md new file mode 100644 index 0000000..04cb5b5 --- /dev/null +++ b/docs/multi-user-access-model.md @@ -0,0 +1,75 @@ +# State Hub Multi-User Access Model + +State Hub is local-first coordination infrastructure. It reflects repo-backed +workplans, progress, and operational state; it is not the authority for source +control, host access, identity, or runtime secret custody. + +## Decision + +For the current phase, enforce user access through the systems that already own +the boundary: + +- Gitea controls repository read/write rights. +- SSH authorized keys control host access. +- ops-bridge controls whether a remote machine can reach local services. +- OpenBao controls runtime secret custody after bootstrap. + +State Hub API authentication is deferred until there is an active external +collaborator or an exposed deployment that needs per-user write enforcement. +Until then, State Hub stays private to local or tunneled operator networks. + +## Roles + +| Role | State Hub access | Source of authority | +|------|------------------|---------------------| +| Primary operator | Full read/write across domains | host access, repo ownership, operator secret custody | +| Domain collaborator | Read all public coordination state; write through owned domain repo and approved hub actions | Gitea repo permissions plus SSH/tunnel authorization | +| Observer | Read-only brief/dashboard access where explicitly exposed | tunnel or future API token | + +## Current Enforcement Boundary + +1. Repo files remain authoritative. A collaborator can change workplans only in + repos where Gitea allows them to push. +2. State Hub indexes files and records progress events, but it should not become + the primary identity authority. +3. Direct dashboard/API access is private by default. Do not publish State Hub + unauthenticated on the public internet. +4. Runtime secrets, service account keys, database credentials, and package + tokens should move into OpenBao after the OpenBao bootstrap, unseal, audit, + and recovery procedure is complete. + +## Future API Auth Trigger + +Add API-layer auth when one of these becomes true: + +- a second human needs direct State Hub API/dashboard mutation rights +- State Hub is exposed beyond localhost or a tightly controlled SSH tunnel +- automation needs per-consumer attribution and revocation independent of repo + commits +- domain-scoped write checks are needed at request time + +## Future Token Shape + +When the trigger is reached, implement a small token model rather than a full +identity provider inside State Hub: + +- accept NetKingdom IAM Profile OIDC tokens when the identity plane is ready +- support one emergency local admin token for break-glass operation +- map claims to `primary_operator`, `domain_collaborator`, or `observer` +- enforce domain write scopes in mutating endpoints +- keep repo permissions as the durable source of contribution authority + +Candidate scopes: + +```text +statehub:read +statehub:write +statehub:domain::write +statehub:admin +``` + +## Operator Rule + +Do not store collaborator credentials in the State Hub database. Store secrets +in OpenBao or the approved bootstrap bundle, and store source permissions in +Gitea. diff --git a/docs/onboarding.md b/docs/onboarding.md new file mode 100644 index 0000000..f39e6b5 --- /dev/null +++ b/docs/onboarding.md @@ -0,0 +1,212 @@ +# State Hub Onboarding + +This guide turns a new machine into a usable State Hub operator or collaborator +environment. It covers local credentials, SSH reachability, Gitea access, and +Claude Code MCP registration. + +State Hub remains a coordination read/cache layer. Repo permissions, SSH +access, and controlled tunnels are the first access boundary. OpenBao is the +runtime secret authority for platform and workload secrets once its bootstrap +ceremony is complete. + +## Quick Start + +Clone the repo, then run the bootstrap script: + +```bash +git clone https://gitea.coulomb.social/coulomb/state-hub.git ~/state-hub +cd ~/state-hub +make bootstrap-env +``` + +On a clean Ubuntu 24.04 machine, allow package installation explicitly: + +```bash +make bootstrap-env ARGS="--install-missing" +``` + +For a remote machine that reaches State Hub through ops-bridge: + +```bash +make bridges +make register-mcp MCP_URL=http://127.0.0.1:18001/sse API_BASE=http://127.0.0.1:18000 +``` + +Restart Claude Code after MCP registration. + +## Primary Operator: New Machine + +1. Install minimal host prerequisites: + + ```bash + sudo apt-get update + sudo apt-get install -y git curl openssh-client make python3 + ``` + +2. Clone `state-hub` and any domain repo you expect to operate: + + ```bash + git clone https://gitea.coulomb.social/coulomb/state-hub.git ~/state-hub + git clone https://gitea.coulomb.social/coulomb/the-custodian.git ~/the-custodian + ``` + +3. Run the bootstrap: + + ```bash + cd ~/state-hub + make bootstrap-env ARGS="--install-missing" + ``` + + The script will: + + - check required tools + - configure `git credential.helper` + - create `~/.ssh/id_ed25519` when missing + - print the public key for managed hosts + - create `~/.railiance_gitea.conf` when you provide a Gitea token + - register the State Hub MCP server for Claude Code + - check State Hub API reachability + +4. Authorize the SSH key on managed hosts. If password or existing key access + is available, rerun: + + ```bash + make bootstrap-env ARGS="--authorize-ssh --skip-gitea --skip-mcp" + ``` + + Default targets: + + - `tegwick@92.205.62.239` for Railiance01 + - `tegwick@92.205.130.254` for CoulombCore + +5. Start or connect to State Hub: + + ```bash + make api + make mcp-http + ``` + + If the hub is remote, use ops-bridge: + + ```bash + make bridges + ``` + +6. Restart Claude Code and verify that `state-hub` appears in the MCP server + list. In the first session, call `get_state_summary()` when MCP tools are + available. If not, use: + + ```bash + cat .custodian-brief.md + curl -s "http://127.0.0.1:8000/workstreams/?status=active" | python3 -m json.tool + ``` + +## Domain Collaborator: New Person + +1. Get a Gitea account with write access to the relevant domain repo. +2. Clone this repo and the domain repo: + + ```bash + git clone https://gitea.coulomb.social/coulomb/state-hub.git ~/state-hub + git clone https://gitea.coulomb.social/coulomb/.git ~/ + ``` + +3. Run the bootstrap: + + ```bash + cd ~/state-hub + make bootstrap-env + ``` + +4. Send the printed SSH public key to the operator, or authorize it yourself if + you already have host access: + + ```bash + ssh-copy-id -i ~/.ssh/id_ed25519.pub tegwick@92.205.62.239 + ``` + +5. Bring up the State Hub tunnel when direct local access is unavailable: + + ```bash + make bridges + make register-mcp MCP_URL=http://127.0.0.1:18001/sse API_BASE=http://127.0.0.1:18000 + ``` + +6. Restart Claude Code, open the domain repo, and orient from the repo brief: + + ```bash + cat .custodian-brief.md + ``` + +7. Contribute work through repo-backed workplans. A new workplan lives under + `workplans/` and follows ADR-001. The hub indexes files; the files remain + authoritative. + +## Credential Helper Choices + +`make bootstrap-env` configures Git credentials only when no global helper is +already set. + +Default behavior: + +- use `libsecret` when the helper exists +- otherwise use `credential.helper=cache --timeout=3600` + +For headless hosts where a persistent plaintext helper is acceptable: + +```bash +make bootstrap-env ARGS="--git-helper store --allow-plaintext-store" +``` + +Prefer SSH remotes or a keyring-backed helper for normal operator machines. + +## Gitea Token File + +Some Railiance scripts read `~/.railiance_gitea.conf`: + +```bash +GITEA_URL="http://92.205.130.254:32166" +GITEA_USER="" +GITEA_TOKEN="" +``` + +Required token capabilities depend on the action: + +- repo creation needs `read:user` and repository write/admin scope +- package publishing needs package write scope +- inventory reads need repository read scope + +The bootstrap script writes this file with mode `0600` and does not print the +token. + +## MCP Registration + +Local registration: + +```bash +make register-mcp +``` + +Tunnel registration: + +```bash +make register-mcp MCP_URL=http://127.0.0.1:18001/sse API_BASE=http://127.0.0.1:18000 +``` + +The current State Hub MCP transport is SSE. The old `.mcp.json`/stdio flow is +legacy; use `make mcp-http` to run the SSE service on `127.0.0.1:8001`. + +## Verification Checklist + +Run these checks after bootstrap: + +```bash +git config --global --get credential.helper +test -f ~/.ssh/id_ed25519.pub +test -f ~/.railiance_gitea.conf +curl -fsS http://127.0.0.1:8000/state/health || curl -fsS http://127.0.0.1:18000/state/health +make register-mcp DRY_RUN=1 +``` + +Then restart Claude Code and confirm that the `state-hub` MCP server is +available. diff --git a/docs/token-evidence-model.md b/docs/token-evidence-model.md new file mode 100644 index 0000000..d631e59 --- /dev/null +++ b/docs/token-evidence-model.md @@ -0,0 +1,57 @@ +# Token Evidence Model + +State Hub token events distinguish source-backed measurements from inferred +operational signals. Dashboards and reports should use structured fields for +quality and provenance; `note` remains human context only. + +## Measurement Kinds + +| Kind | Meaning | Default confidence | +| --- | --- | --- | +| `measured` | Parsed from a source that reports usage metadata, such as Codex session logs or Claude transcript usage blocks. | `1.0` | +| `allocated` | A share of a larger known total, assigned to a task/workstream by a documented allocation method. | `0.70` | +| `estimated` | A fallback or operator-entered estimate without direct source evidence. | `0.35` | +| `superseded` | Historical rows retained for audit but excluded from active totals. | `0.0` | + +## Source Providers + +| Provider | Source | +| --- | --- | +| `codex_session` | Codex Desktop `.codex/sessions/**` and `.codex/archived_sessions/**` JSONL token_count events. | +| `claude_transcript` | Claude Code `.claude/projects/**/*.jsonl` usage metadata. Transcript text is never stored. | +| `llm_connect` | Future llm-connect usage metadata. | +| `manual` | Explicit operator/API input. | +| `task_fallback` | Fixed task-completion fallback rows created when no source data is available. | + +## Provenance Fields + +Each source-backed row should include: + +- `source_provider`, `source_id`, `source_path`, `source_created_at` +- `parser_version`, `ingested_at`, `confidence` +- `cached_input_tokens`, `reasoning_output_tokens`, `raw_total_tokens` +- `raw_metadata` with parser and attribution metadata, never transcript content + +`tokens_in + tokens_out` remains the default active total. Cached input and +reasoning output are preserved separately so dashboards can show both default +and provider-style totals without rewriting history. + +## Idempotency + +Measured sources must be written with a stable `source_id`. State Hub enforces +one row for each `(measurement_kind, source_provider, source_id)` tuple and +`POST /token-events/upsert` updates a growing live session rather than creating +duplicates. + +## Migration Playbook + +1. Run the token-event provenance migration. +2. Run `python3 scripts/token_reconcile.py --since 2026-05-19` and inspect the + dry-run report. +3. Run `python3 scripts/token_reconcile.py --since 2026-05-19 --apply` to + upsert measured Codex/Claude source rows. +4. Run the same command with `--zero-superseded-fallbacks` only after measured + source rows cover the affected window. +5. Check `/token-events/quality/` or the Token Cost dashboard for fallback, + missing-provenance, duplicate-source, and unattributed measured signals. +6. Keep historical fallback rows as `superseded`; do not delete them. diff --git a/migrations/versions/v9q0r1s2t3u4_token_event_provenance.py b/migrations/versions/v9q0r1s2t3u4_token_event_provenance.py new file mode 100644 index 0000000..b604bb4 --- /dev/null +++ b/migrations/versions/v9q0r1s2t3u4_token_event_provenance.py @@ -0,0 +1,128 @@ +"""add token event provenance fields + +Revision ID: v9q0r1s2t3u4 +Revises: u8p9q0r1s2t3 +Create Date: 2026-05-23 +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +revision = "v9q0r1s2t3u4" +down_revision = "u8p9q0r1s2t3" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.add_column( + "token_events", + sa.Column("measurement_kind", sa.Text(), nullable=False, server_default="estimated"), + ) + op.add_column( + "token_events", + sa.Column("source_provider", sa.Text(), nullable=False, server_default="manual"), + ) + op.add_column("token_events", sa.Column("source_id", sa.Text(), nullable=True)) + op.add_column("token_events", sa.Column("source_path", sa.Text(), nullable=True)) + op.add_column( + "token_events", + sa.Column("source_created_at", sa.TIMESTAMP(timezone=True), nullable=True), + ) + op.add_column( + "token_events", + sa.Column("ingested_at", sa.TIMESTAMP(timezone=True), nullable=False, server_default=sa.text("now()")), + ) + op.add_column("token_events", sa.Column("parser_version", sa.Text(), nullable=True)) + op.add_column( + "token_events", + sa.Column("confidence", sa.Float(), nullable=False, server_default="0.35"), + ) + op.add_column( + "token_events", + sa.Column("cached_input_tokens", sa.Integer(), nullable=False, server_default="0"), + ) + op.add_column( + "token_events", + sa.Column("reasoning_output_tokens", sa.Integer(), nullable=False, server_default="0"), + ) + op.add_column("token_events", sa.Column("raw_total_tokens", sa.Integer(), nullable=True)) + op.add_column("token_events", sa.Column("cost_estimated_usd", sa.Float(), nullable=True)) + op.add_column( + "token_events", + sa.Column( + "raw_metadata", + postgresql.JSONB(astext_type=sa.Text()), + nullable=False, + server_default=sa.text("'{}'::jsonb"), + ), + ) + + op.execute( + """ + UPDATE token_events + SET + measurement_kind = CASE + WHEN note = 'heuristic_superseded_by_codex_backfill' THEN 'superseded' + WHEN note = 'workplan' THEN 'allocated' + WHEN note = 'heuristic' THEN 'estimated' + WHEN note = 'measured' OR note LIKE 'backfill:codex-session%' THEN 'measured' + ELSE measurement_kind + END, + source_provider = CASE + WHEN note = 'heuristic' THEN 'task_fallback' + WHEN note LIKE 'backfill:codex-session%' OR ref_id LIKE 'codex:%' THEN 'codex_session' + WHEN note = 'measured' AND agent ILIKE '%claude%' THEN 'claude_transcript' + ELSE source_provider + END, + source_id = CASE + WHEN source_id IS NULL AND (note LIKE 'backfill:codex-session%' OR ref_id LIKE 'codex:%') + THEN ref_id + ELSE source_id + END, + raw_total_tokens = CASE + WHEN raw_total_tokens IS NULL THEN tokens_in + tokens_out + ELSE raw_total_tokens + END, + confidence = CASE + WHEN note = 'heuristic_superseded_by_codex_backfill' THEN 0.0 + WHEN note = 'heuristic' THEN 0.35 + WHEN note = 'workplan' THEN 0.70 + WHEN note = 'measured' OR note LIKE 'backfill:codex-session%' THEN 1.0 + ELSE confidence + END + """ + ) + + op.create_index("ix_token_events_measurement_kind", "token_events", ["measurement_kind"]) + op.create_index("ix_token_events_source_provider", "token_events", ["source_provider"]) + op.create_index("ix_token_events_source_id", "token_events", ["source_id"]) + op.create_index("ix_token_events_source_created_at", "token_events", ["source_created_at"]) + op.create_index("ix_token_events_ingested_at", "token_events", ["ingested_at"]) + op.create_unique_constraint( + "uq_token_events_source_identity", + "token_events", + ["measurement_kind", "source_provider", "source_id"], + ) + + +def downgrade() -> None: + op.drop_constraint("uq_token_events_source_identity", "token_events", type_="unique") + op.drop_index("ix_token_events_ingested_at", table_name="token_events") + op.drop_index("ix_token_events_source_created_at", table_name="token_events") + op.drop_index("ix_token_events_source_id", table_name="token_events") + op.drop_index("ix_token_events_source_provider", table_name="token_events") + op.drop_index("ix_token_events_measurement_kind", table_name="token_events") + op.drop_column("token_events", "raw_metadata") + op.drop_column("token_events", "cost_estimated_usd") + op.drop_column("token_events", "raw_total_tokens") + op.drop_column("token_events", "reasoning_output_tokens") + op.drop_column("token_events", "cached_input_tokens") + op.drop_column("token_events", "confidence") + op.drop_column("token_events", "parser_version") + op.drop_column("token_events", "ingested_at") + op.drop_column("token_events", "source_created_at") + op.drop_column("token_events", "source_path") + op.drop_column("token_events", "source_id") + op.drop_column("token_events", "source_provider") + op.drop_column("token_events", "measurement_kind") diff --git a/migrations/versions/w0r1s2t3u4v5_token_event_legacy_source_ids.py b/migrations/versions/w0r1s2t3u4v5_token_event_legacy_source_ids.py new file mode 100644 index 0000000..a9d0227 --- /dev/null +++ b/migrations/versions/w0r1s2t3u4v5_token_event_legacy_source_ids.py @@ -0,0 +1,33 @@ +"""assign legacy source ids to measured token events + +Revision ID: w0r1s2t3u4v5 +Revises: v9q0r1s2t3u4 +Create Date: 2026-05-23 +""" +from alembic import op + +revision = "w0r1s2t3u4v5" +down_revision = "v9q0r1s2t3u4" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.execute( + """ + UPDATE token_events + SET source_id = source_provider || ':legacy:' || id::text + WHERE measurement_kind = 'measured' + AND source_id IS NULL + """ + ) + + +def downgrade() -> None: + op.execute( + """ + UPDATE token_events + SET source_id = NULL + WHERE source_id = source_provider || ':legacy:' || id::text + """ + ) diff --git a/scripts/backfill_codex_token_events.py b/scripts/backfill_codex_token_events.py new file mode 100644 index 0000000..0dc306e --- /dev/null +++ b/scripts/backfill_codex_token_events.py @@ -0,0 +1,192 @@ +#!/usr/bin/env python3 +"""Backfill State Hub token events from local Codex session logs. + +The parser lives in ``api.services.token_sources.codex`` so this CLI only +handles operator flags, repo attribution, idempotent writes, and fallback +cleanup. +""" +from __future__ import annotations + +import argparse +import json +import os +import sys +import urllib.parse +import urllib.request +from pathlib import Path +from typing import Any + +ROOT = Path(__file__).resolve().parent.parent +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + +from api.services.token_sources import collect_codex_sessions, parse_iso # noqa: E402 +from api.services.token_sources.attribution import repo_refs_from_api, resolve_repo # noqa: E402 + +DEFAULT_API = os.environ.get("STATE_HUB_API", "http://127.0.0.1:8000") +BACKFILL_NOTE = "backfill:codex-session" +SUPERSEDED_HEURISTIC_NOTE = "heuristic_superseded_by_codex_backfill" + + +def http_json(api_base: str, method: str, path: str, body: dict[str, Any] | None = None) -> Any: + url = f"{api_base.rstrip('/')}/{path.lstrip('/')}" + data = None + headers = {"Content-Type": "application/json"} + if body is not None: + data = json.dumps(body).encode("utf-8") + req = urllib.request.Request(url, data=data, headers=headers, method=method) + with urllib.request.urlopen(req, timeout=30) as resp: + return json.loads(resp.read() or b"null") + + +def find_codex_home(explicit: str | None) -> Path: + candidates: list[Path] = [] + if explicit: + candidates.append(Path(explicit)) + env_home = os.environ.get("CODEX_HOME") + if env_home: + candidates.append(Path(env_home)) + candidates.extend( + [ + Path.home() / ".codex", + Path("/mnt/c/Users/bernd.worsch/.codex"), + ] + ) + for candidate in candidates: + if candidate.is_dir(): + return candidate + raise SystemExit("Could not find Codex home; pass --codex-home") + + +def list_events(api_base: str, params: dict[str, Any]) -> list[dict[str, Any]]: + events: list[dict[str, Any]] = [] + offset = 0 + while True: + page_params = {**params, "limit": 1000, "offset": offset} + encoded = urllib.parse.urlencode(page_params) + page = http_json(api_base, "GET", f"/token-events/?{encoded}") + if not isinstance(page, list) or not page: + break + events.extend(page) + if len(page) < 1000: + break + offset += 1000 + return events + + +def existing_codex_events(api_base: str) -> dict[str, dict[str, Any]]: + events = list_events( + api_base, + {"source_provider": "codex_session", "include_superseded": "true"}, + ) + by_source: dict[str, dict[str, Any]] = {} + for event in events: + source_id = event.get("source_id") or event.get("ref_id") + if isinstance(source_id, str): + by_source[source_id] = event + return by_source + + +def fetch_heuristics(api_base: str, since: str) -> list[dict[str, Any]]: + return list_events( + api_base, + { + "source_provider": "task_fallback", + "note": "heuristic", + "since": since, + "include_superseded": "false", + }, + ) + + +def patch_superseded_heuristic(api_base: str, event_id: str) -> None: + http_json( + api_base, + "PATCH", + f"/token-events/{event_id}", + { + "tokens_in": 0, + "tokens_out": 0, + "note": SUPERSEDED_HEURISTIC_NOTE, + "measurement_kind": "superseded", + "source_provider": "task_fallback", + "confidence": 0.0, + "raw_total_tokens": 0, + }, + ) + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--since", default="2026-05-19", help="UTC date/time to backfill from") + parser.add_argument("--api-base", default=DEFAULT_API) + parser.add_argument("--codex-home") + parser.add_argument("--apply", action="store_true", help="write backfill events") + parser.add_argument( + "--zero-heuristics", + action="store_true", + help="set post-since heuristic task fallback events to zero after backfill", + ) + args = parser.parse_args() + + since = parse_iso(args.since) + since_param = since.isoformat() + codex_home = find_codex_home(args.codex_home) + repo_refs = repo_refs_from_api(http_json(args.api_base, "GET", "/repos/")) + existing = existing_codex_events(args.api_base) + sessions = collect_codex_sessions(codex_home, since) + + planned: list[tuple[str, Any, str | None, str | None]] = [] + by_repo: dict[str, list[int]] = {} + for session in sessions: + event = existing.get(session.source_id) + existing_total = (event.get("tokens_in", 0) + event.get("tokens_out", 0)) if event else 0 + action = "create" if event is None else ("update" if session.tokens_total > existing_total else "skip") + match = resolve_repo(session.cwd, repo_refs) + repo_id = match.repo_id if match else None + repo_slug = match.slug if match else None + if action != "skip": + planned.append((action, session, repo_id, repo_slug)) + label = repo_slug or "(unattributed)" + totals = by_repo.setdefault(label, [0, 0, 0]) + totals[0] += 1 + totals[1] += session.tokens_in + totals[2] += session.tokens_out + + heuristics = fetch_heuristics(args.api_base, since_param) if args.zero_heuristics else [] + + print(f"codex_home: {codex_home}") + print(f"since: {since.isoformat()}") + print(f"sessions found: {len(sessions)}") + print(f"backfill events to create: {sum(1 for action, *_ in planned if action == 'create')}") + print(f"backfill events to update: {sum(1 for action, *_ in planned if action == 'update')}") + for repo_slug, (count, tokens_in, tokens_out) in sorted(by_repo.items()): + print(f" {repo_slug}: {count} sessions, {tokens_in + tokens_out:,} tokens") + if args.zero_heuristics: + total = sum((e.get("tokens_in") or 0) + (e.get("tokens_out") or 0) for e in heuristics) + print(f"heuristic events to zero: {len(heuristics)} ({total:,} tokens)") + + if not args.apply: + print("dry run only; pass --apply to write changes") + return 0 + + for _action, session, repo_id, repo_slug in planned: + payload = session.to_token_event_payload(repo_id=repo_id) + payload["note"] = BACKFILL_NOTE + payload["raw_metadata"] = { + **payload.get("raw_metadata", {}), + "repo_slug": repo_slug, + "attribution_method": resolve_repo(session.cwd, repo_refs).method if resolve_repo(session.cwd, repo_refs) else None, + } + http_json(args.api_base, "POST", "/token-events/upsert", payload) + for event in heuristics: + patch_superseded_heuristic(args.api_base, event["id"]) + + print(f"upserted {len(planned)} backfill events") + if args.zero_heuristics: + print(f"zeroed {len(heuristics)} heuristic events") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/bootstrap-env.sh b/scripts/bootstrap-env.sh new file mode 100755 index 0000000..c9d3b2a --- /dev/null +++ b/scripts/bootstrap-env.sh @@ -0,0 +1,369 @@ +#!/usr/bin/env bash +set -euo pipefail + +STATE_HUB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +GITEA_CONF="${GITEA_CONF:-$HOME/.railiance_gitea.conf}" +GITEA_URL="${GITEA_URL:-http://92.205.130.254:32166}" +GITEA_USER="${GITEA_USER:-}" +GITEA_TOKEN="${GITEA_TOKEN:-}" +GIT_HELPER="${GIT_HELPER:-auto}" +INSTALL_MISSING=0 +NON_INTERACTIVE=0 +DRY_RUN=0 +AUTHORIZE_SSH=0 +ALLOW_PLAINTEXT_STORE=0 +SKIP_GITEA=0 +SKIP_MCP=0 +SSH_KEY="${SSH_KEY:-$HOME/.ssh/id_ed25519}" +SSH_TARGETS=( + "tegwick@92.205.62.239" + "tegwick@92.205.130.254" +) + +usage() { + cat <<'USAGE' +Usage: scripts/bootstrap-env.sh [options] + +Idempotently prepares a State Hub operator or collaborator environment. + +Options: + --install-missing Install missing apt packages when possible. + --non-interactive Do not prompt; warn instead of asking for secrets. + --dry-run Show intended actions without changing local config. + --git-helper MODE auto, libsecret, cache, or store. Default: auto. + --allow-plaintext-store Allow git credential.helper=store in auto mode. + --authorize-ssh Run ssh-copy-id for configured SSH targets. + --ssh-target USER@HOST Add an SSH authorization target. May repeat. + --gitea-url URL Gitea base URL for ~/.railiance_gitea.conf. + --gitea-user USER Gitea user for ~/.railiance_gitea.conf. + --gitea-token TOKEN Gitea token; otherwise prompted when interactive. + --skip-gitea Do not create or update ~/.railiance_gitea.conf. + --skip-mcp Do not run make register-mcp. + -h, --help Show this help. +USAGE +} + +ok() { printf '[OK] %s\n' "$*"; } +warn() { printf '[WARN] %s\n' "$*"; } +err() { printf '[ERR] %s\n' "$*" >&2; } +step() { printf '\n==> %s\n' "$*"; } + +run() { + if [ "$DRY_RUN" -eq 1 ]; then + printf 'DRY-RUN: %s\n' "$*" + else + "$@" + fi +} + +need_arg() { + if [ -z "${2:-}" ]; then + err "$1 requires a value" + exit 2 + fi +} + +while [ "$#" -gt 0 ]; do + case "$1" in + --install-missing) + INSTALL_MISSING=1 + shift + ;; + --non-interactive) + NON_INTERACTIVE=1 + shift + ;; + --dry-run) + DRY_RUN=1 + shift + ;; + --git-helper) + need_arg "$1" "${2:-}" + GIT_HELPER="$2" + shift 2 + ;; + --allow-plaintext-store) + ALLOW_PLAINTEXT_STORE=1 + shift + ;; + --authorize-ssh) + AUTHORIZE_SSH=1 + shift + ;; + --ssh-target) + need_arg "$1" "${2:-}" + SSH_TARGETS+=("$2") + shift 2 + ;; + --gitea-url) + need_arg "$1" "${2:-}" + GITEA_URL="$2" + shift 2 + ;; + --gitea-user) + need_arg "$1" "${2:-}" + GITEA_USER="$2" + shift 2 + ;; + --gitea-token) + need_arg "$1" "${2:-}" + GITEA_TOKEN="$2" + shift 2 + ;; + --skip-gitea) + SKIP_GITEA=1 + shift + ;; + --skip-mcp) + SKIP_MCP=1 + shift + ;; + -h|--help) + usage + exit 0 + ;; + *) + err "unknown argument: $1" + usage >&2 + exit 2 + ;; + esac +done + +case "$GIT_HELPER" in + auto|libsecret|cache|store) ;; + *) + err "--git-helper must be auto, libsecret, cache, or store" + exit 2 + ;; +esac + +apt_install() { + local packages=("$@") + if [ "$INSTALL_MISSING" -ne 1 ]; then + warn "Missing packages: ${packages[*]}" + warn "Rerun with --install-missing or install them manually." + return + fi + if ! command -v sudo >/dev/null 2>&1; then + warn "sudo is not available; cannot install: ${packages[*]}" + return + fi + run sudo apt-get update + run sudo apt-get install -y "${packages[@]}" +} + +check_commands() { + step "Checking prerequisites" + local missing=() + local commands=(git curl ssh-keygen ssh-copy-id python3 make) + local optional=(sops age helm kubectl uv claude) + + for cmd in "${commands[@]}"; do + if command -v "$cmd" >/dev/null 2>&1; then + ok "$cmd found" + else + missing+=("$cmd") + warn "$cmd missing" + fi + done + + for cmd in "${optional[@]}"; do + if command -v "$cmd" >/dev/null 2>&1; then + ok "$cmd found" + else + warn "$cmd missing" + fi + done + + if [ "${#missing[@]}" -gt 0 ]; then + apt_install "${missing[@]}" + fi +} + +libsecret_helper_path() { + local candidates=( + "/usr/share/doc/git/contrib/credential/libsecret/git-credential-libsecret" + "/usr/lib/git-core/git-credential-libsecret" + "/usr/libexec/git-core/git-credential-libsecret" + ) + local candidate + for candidate in "${candidates[@]}"; do + if [ -x "$candidate" ]; then + printf '%s\n' "$candidate" + return 0 + fi + done + return 1 +} + +build_libsecret_helper() { + local source_dir="/usr/share/doc/git/contrib/credential/libsecret" + if [ ! -d "$source_dir" ]; then + apt_install libsecret-1-0 libsecret-1-dev make gcc + fi + if [ -d "$source_dir" ]; then + run sudo make -C "$source_dir" + fi +} + +configure_git_helper() { + step "Configuring Git credential helper" + + local current + current="$(git config --global --get credential.helper || true)" + if [ -n "$current" ]; then + ok "credential.helper already set: $current" + return + fi + + local helper="$GIT_HELPER" + if [ "$helper" = "auto" ]; then + if libsecret_helper_path >/dev/null 2>&1; then + helper="libsecret" + elif [ "$ALLOW_PLAINTEXT_STORE" -eq 1 ]; then + helper="store" + else + helper="cache" + fi + fi + + case "$helper" in + libsecret) + local path + path="$(libsecret_helper_path || true)" + if [ -z "$path" ]; then + build_libsecret_helper + path="$(libsecret_helper_path || true)" + fi + if [ -z "$path" ]; then + warn "libsecret helper is not available; using cache helper for this machine." + run git config --global credential.helper "cache --timeout=3600" + else + run git config --global credential.helper "$path" + fi + ;; + cache) + run git config --global credential.helper "cache --timeout=3600" + ;; + store) + if [ "$ALLOW_PLAINTEXT_STORE" -ne 1 ]; then + err "credential.helper=store writes plaintext credentials." + err "Rerun with --allow-plaintext-store if that is intended for this host." + exit 1 + fi + run git config --global credential.helper store + ;; + esac + + ok "credential.helper configured" +} + +setup_ssh_key() { + step "Checking SSH key" + mkdir -p "$HOME/.ssh" + chmod 700 "$HOME/.ssh" + + if [ -f "$SSH_KEY" ]; then + ok "SSH key exists: $SSH_KEY" + else + run ssh-keygen -t ed25519 -f "$SSH_KEY" -N "" -C "$USER@$(hostname)-state-hub" + ok "SSH key generated: $SSH_KEY" + fi + + if [ -f "${SSH_KEY}.pub" ]; then + printf '\nPublic key to authorize on managed hosts:\n\n' + sed 's/^/ /' "${SSH_KEY}.pub" + printf '\n' + fi + + if [ "$AUTHORIZE_SSH" -eq 1 ]; then + local target + for target in "${SSH_TARGETS[@]}"; do + run ssh-copy-id -i "${SSH_KEY}.pub" "$target" + done + else + warn "SSH authorization not attempted. Use --authorize-ssh after confirming host access." + fi +} + +write_gitea_conf() { + step "Checking Gitea config" + if [ "$SKIP_GITEA" -eq 1 ]; then + warn "Skipping Gitea config by request." + return + fi + + if [ -f "$GITEA_CONF" ]; then + chmod 600 "$GITEA_CONF" + ok "$GITEA_CONF already exists" + return + fi + + if [ -z "$GITEA_USER" ] && [ "$NON_INTERACTIVE" -eq 0 ]; then + read -r -p "Gitea username: " GITEA_USER + fi + + if [ -z "$GITEA_TOKEN" ] && [ "$NON_INTERACTIVE" -eq 0 ]; then + read -r -s -p "Gitea token (requires read:user and repository write scopes): " GITEA_TOKEN + printf '\n' + fi + + if [ -z "$GITEA_USER" ] || [ -z "$GITEA_TOKEN" ]; then + warn "Gitea config not written. Set GITEA_USER/GITEA_TOKEN or rerun interactively." + return + fi + + if [ "$DRY_RUN" -eq 1 ]; then + printf 'DRY-RUN: would write %s with GITEA_URL and GITEA_USER; token hidden\n' "$GITEA_CONF" + return + fi + + umask 077 + { + printf 'GITEA_URL="%s"\n' "$GITEA_URL" + printf 'GITEA_USER="%s"\n' "$GITEA_USER" + printf 'GITEA_TOKEN="%s"\n' "$GITEA_TOKEN" + } >"$GITEA_CONF" + chmod 600 "$GITEA_CONF" + ok "Wrote $GITEA_CONF" +} + +register_mcp() { + step "Registering State Hub MCP" + if [ "$SKIP_MCP" -eq 1 ]; then + warn "Skipping MCP registration by request." + return + fi + if [ "$DRY_RUN" -eq 1 ]; then + run make -C "$STATE_HUB_DIR" register-mcp DRY_RUN=1 + else + make -C "$STATE_HUB_DIR" register-mcp + fi +} + +health_check() { + step "Checking State Hub reachability" + if curl -fsS --max-time 2 "http://127.0.0.1:8000/state/health" >/dev/null 2>&1; then + ok "State Hub API reachable at http://127.0.0.1:8000" + elif curl -fsS --max-time 2 "http://127.0.0.1:18000/state/health" >/dev/null 2>&1; then + ok "State Hub API reachable through tunnel at http://127.0.0.1:18000" + else + warn "State Hub API is not reachable locally or through the default tunnel." + warn "Start it with 'make api' or run 'make bridges' if this machine uses ops-bridge." + fi +} + +main() { + step "State Hub environment bootstrap" + printf 'Repository: %s\n' "$STATE_HUB_DIR" + check_commands + configure_git_helper + setup_ssh_key + write_gitea_conf + register_mcp + health_check + ok "Bootstrap checks complete." +} + +main "$@" diff --git a/scripts/consistency_check.py b/scripts/consistency_check.py index 2ae6003..ee61482 100644 --- a/scripts/consistency_check.py +++ b/scripts/consistency_check.py @@ -1596,7 +1596,7 @@ def fix_repo( task_id = ctx["task_id"] status = ctx["status"] result = _api_patch(api_base, f"/tasks/{task_id}", - {"status": status}) + {"status": status, "suppress_token_event": True}) if result is not None and "_error" not in result: report.fixes_applied.append( f"C-10 fixed: task {task_id[:8]}… status → {status!r}" diff --git a/scripts/register-mcp.sh b/scripts/register-mcp.sh new file mode 100755 index 0000000..ae63bf5 --- /dev/null +++ b/scripts/register-mcp.sh @@ -0,0 +1,151 @@ +#!/usr/bin/env bash +set -euo pipefail + +STATE_HUB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +CLAUDE_JSON="${CLAUDE_JSON:-$HOME/.claude.json}" +SERVER_NAME="${STATE_HUB_MCP_NAME:-state-hub}" +API_BASE="${API_BASE:-}" +MCP_URL="${MCP_URL:-}" +DRY_RUN=0 + +usage() { + cat <<'USAGE' +Usage: scripts/register-mcp.sh [--url URL] [--api-base URL] [--dry-run] + +Registers the State Hub MCP server for Claude Code. + +Options: + --url URL MCP SSE URL to register. Defaults to local :8001 or tunnel :18001. + --api-base URL State Hub API URL used for reachability checks. + --dry-run Print what would happen without changing Claude config. + -h, --help Show this help. +USAGE +} + +while [ "$#" -gt 0 ]; do + case "$1" in + --url) + MCP_URL="${2:-}" + shift 2 + ;; + --api-base) + API_BASE="${2:-}" + shift 2 + ;; + --dry-run) + DRY_RUN=1 + shift + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "ERROR: unknown argument: $1" >&2 + usage >&2 + exit 2 + ;; + esac +done + +status() { + printf '%s\n' "$*" +} + +api_healthy() { + local base="$1" + curl -fsS --max-time 2 "${base%/}/state/health" >/dev/null 2>&1 +} + +port_open() { + local host="$1" + local port="$2" + timeout 2 bash -c ":/dev/null 2>&1 +} + +if [ -z "$API_BASE" ]; then + if api_healthy "http://127.0.0.1:8000"; then + API_BASE="http://127.0.0.1:8000" + elif api_healthy "http://127.0.0.1:18000"; then + API_BASE="http://127.0.0.1:18000" + else + API_BASE="http://127.0.0.1:8000" + fi +fi + +if [ -z "$MCP_URL" ]; then + if port_open 127.0.0.1 8001; then + MCP_URL="http://127.0.0.1:8001/sse" + elif port_open 127.0.0.1 18001; then + MCP_URL="http://127.0.0.1:18001/sse" + elif [ "$API_BASE" = "http://127.0.0.1:18000" ]; then + MCP_URL="http://127.0.0.1:18001/sse" + else + MCP_URL="http://127.0.0.1:8001/sse" + fi +fi + +CONFIG="$(python3 - "$MCP_URL" <<'PY' +import json +import sys + +print(json.dumps({"type": "sse", "url": sys.argv[1]}, separators=(",", ":"))) +PY +)" + +status "State Hub directory: $STATE_HUB_DIR" +status "API health check: ${API_BASE%/}/state/health" +status "MCP registration: $SERVER_NAME -> $MCP_URL" + +if api_healthy "$API_BASE"; then + status "OK: State Hub API is reachable." +else + status "WARN: State Hub API is not reachable at ${API_BASE%/}/state/health." + status " Start it with 'make api' or bring up the ops-bridge tunnel." +fi + +if ! command -v claude >/dev/null 2>&1; then + if [ "$DRY_RUN" -eq 1 ]; then + status "WARN: claude CLI not found on PATH; dry-run will still show the command." + else + status "ERROR: claude CLI not found on PATH." + status " Install or expose Claude Code CLI, then rerun: make register-mcp" + exit 1 + fi +fi + +CURRENT_URL="$(python3 - "$CLAUDE_JSON" "$SERVER_NAME" <<'PY' +import json +import sys +from pathlib import Path + +path = Path(sys.argv[1]) +name = sys.argv[2] +if not path.exists(): + print("") + raise SystemExit +try: + data = json.loads(path.read_text()) +except json.JSONDecodeError: + print("") + raise SystemExit +entry = data.get("mcpServers", {}).get(name, {}) +print(entry.get("url", "")) +PY +)" + +if [ "$CURRENT_URL" = "$MCP_URL" ]; then + status "OK: $SERVER_NAME is already registered with this URL." + exit 0 +fi + +if [ "$DRY_RUN" -eq 1 ]; then + status "DRY-RUN: would run:" + status " claude mcp add-json -s user $SERVER_NAME '$CONFIG'" + exit 0 +fi + +claude mcp add-json -s user "$SERVER_NAME" "$CONFIG" + +status "OK: registered $SERVER_NAME." +status "Restart Claude Code so the MCP server list is refreshed." diff --git a/scripts/task_token_hook.py b/scripts/task_token_hook.py index 00a72f8..d9a12aa 100755 --- a/scripts/task_token_hook.py +++ b/scripts/task_token_hook.py @@ -1,27 +1,48 @@ #!/usr/bin/env python3 """PostToolUse hook: replace heuristic token events with real transcript-derived counts. -Fires after mcp__state-hub__update_task_status when status=done. +Fires after supported task completion tools when status=done. Reads the Claude Code session transcript to compute the token delta since the previous task completion, then PATCHes the heuristic event with real counts. -State is persisted per session in /tmp/custodian_tokens_.json so -deltas are correctly scoped even when multiple tasks complete in one session. +State is persisted per session in a durable cache directory so deltas survive +restarts and multiple task completions in one session. """ import json import os import sys import urllib.error import urllib.request +from datetime import datetime, timezone from pathlib import Path API = os.environ.get("CUSTODIAN_API", "http://127.0.0.1:8000") -STATE_DIR = Path(os.environ.get("TMPDIR", "/tmp")) +STATE_DIR = Path(os.environ.get("CUSTODIAN_TOKEN_STATE_DIR", Path.home() / ".cache" / "state-hub" / "token-hooks")) +HEALTH_LOG = STATE_DIR / "hook-health.jsonl" +PARSER_VERSION = "claude-transcript-delta-v1" +SUPPORTED_TOOL_HINTS = ( + "update_task_status", + "tasks", + "task", +) -def read_transcript_totals(transcript_path: str) -> tuple[int, int]: +def utc_now() -> str: + return datetime.now(timezone.utc).isoformat() + + +def write_health(event: dict) -> None: + try: + STATE_DIR.mkdir(parents=True, exist_ok=True) + with HEALTH_LOG.open("a", encoding="utf-8") as handle: + handle.write(json.dumps({"ts": utc_now(), **event}, sort_keys=True) + "\n") + except OSError: + pass + + +def read_transcript_totals(transcript_path: str) -> tuple[int, int, int]: """Sum all usage entries in the transcript JSONL up to the current point.""" - total_in = total_out = 0 + total_in = total_out = cached_in = 0 try: with open(transcript_path) as f: for line in f: @@ -29,10 +50,9 @@ def read_transcript_totals(transcript_path: str) -> tuple[int, int]: entry = json.loads(line) usage = entry.get("message", {}).get("usage", {}) if usage: - # Count all input token variants (direct + cache creation + cache read) - total_in += ( - usage.get("input_tokens", 0) - + usage.get("cache_creation_input_tokens", 0) + total_in += usage.get("input_tokens", 0) + cached_in += ( + usage.get("cache_creation_input_tokens", 0) + usage.get("cache_read_input_tokens", 0) ) total_out += usage.get("output_tokens", 0) @@ -40,21 +60,22 @@ def read_transcript_totals(transcript_path: str) -> tuple[int, int]: continue except OSError: pass - return total_in, total_out + return total_in, total_out, cached_in -def load_state(session_id: str) -> tuple[int, int]: +def load_state(session_id: str) -> tuple[int, int, int]: state_file = STATE_DIR / f"custodian_tokens_{session_id}.json" try: data = json.loads(state_file.read_text()) - return data.get("total_in", 0), data.get("total_out", 0) + return data.get("total_in", 0), data.get("total_out", 0), data.get("cached_in", 0) except (OSError, json.JSONDecodeError): - return 0, 0 + return 0, 0, 0 -def save_state(session_id: str, total_in: int, total_out: int) -> None: +def save_state(session_id: str, total_in: int, total_out: int, cached_in: int) -> None: + STATE_DIR.mkdir(parents=True, exist_ok=True) state_file = STATE_DIR / f"custodian_tokens_{session_id}.json" - state_file.write_text(json.dumps({"total_in": total_in, "total_out": total_out})) + state_file.write_text(json.dumps({"total_in": total_in, "total_out": total_out, "cached_in": cached_in})) def api_get(path: str): @@ -75,51 +96,89 @@ def api_patch(path: str, data: dict): return json.loads(r.read()) +def extract_done_task(payload: dict) -> tuple[str | None, dict]: + tool_name = payload.get("tool_name", "") + if not any(hint in tool_name for hint in SUPPORTED_TOOL_HINTS): + return None, {} + + tool_input = payload.get("tool_input", {}) or {} + status = tool_input.get("status") + if status != "done": + return None, {} + + task_id = ( + tool_input.get("task_id") + or tool_input.get("id") + or tool_input.get("taskId") + ) + return task_id, tool_input + + def main() -> None: try: payload = json.loads(sys.stdin.read()) except json.JSONDecodeError: return - tool_name = payload.get("tool_name", "") - if "update_task_status" not in tool_name: - return - - tool_input = payload.get("tool_input", {}) - if tool_input.get("status") != "done": - return - - task_id = tool_input.get("task_id") + task_id, tool_input = extract_done_task(payload) if not task_id: + write_health({"status": "skipped", "reason": "not_done_task_completion", "tool_name": payload.get("tool_name")}) return transcript_path = payload.get("transcript_path", "") session_id = payload.get("session_id", "unknown") # Compute token delta for this task - current_in, current_out = read_transcript_totals(transcript_path) - last_in, last_out = load_state(session_id) + current_in, current_out, current_cached = read_transcript_totals(transcript_path) + last_in, last_out, last_cached = load_state(session_id) delta_in = max(0, current_in - last_in) delta_out = max(0, current_out - last_out) - save_state(session_id, current_in, current_out) + delta_cached = max(0, current_cached - last_cached) + save_state(session_id, current_in, current_out, current_cached) - if delta_in == 0 and delta_out == 0: - return # Nothing measurable — leave heuristic in place + if delta_in == 0 and delta_out == 0 and delta_cached == 0: + write_health({ + "status": "skipped", + "reason": "zero_delta", + "session_id": session_id, + "task_id": task_id, + "source_path": transcript_path, + }) + return # Find the most recent heuristic event for this task and replace it try: events = api_get(f"/token-events/?task_id={task_id}¬e=heuristic&limit=5") except (urllib.error.URLError, OSError): + write_health({"status": "skipped", "reason": "api_offline", "session_id": session_id, "task_id": task_id}) return # API offline — leave heuristic as-is if not events: + write_health({"status": "skipped", "reason": "no_fallback_event", "session_id": session_id, "task_id": task_id}) return event_id = events[0]["id"] model = tool_input.get("model") agent = tool_input.get("agent") - patch_body: dict = {"tokens_in": delta_in, "tokens_out": delta_out, "note": "measured"} + patch_body: dict = { + "tokens_in": delta_in, + "tokens_out": delta_out, + "note": "measured", + "measurement_kind": "measured", + "source_provider": "claude_transcript", + "source_id": f"claude:{session_id}:task:{task_id}", + "source_path": transcript_path or None, + "parser_version": PARSER_VERSION, + "confidence": 1.0, + "cached_input_tokens": delta_cached, + "raw_total_tokens": delta_in + delta_out + delta_cached, + "raw_metadata": { + "hook": "post_tool_use", + "tool_name": payload.get("tool_name"), + "state_dir": str(STATE_DIR), + }, + } if model: patch_body["model"] = model if agent: @@ -128,7 +187,19 @@ def main() -> None: try: api_patch(f"/token-events/{event_id}", patch_body) except (urllib.error.URLError, OSError): - pass + write_health({"status": "skipped", "reason": "patch_failed", "session_id": session_id, "task_id": task_id}) + return + + write_health({ + "status": "patched", + "session_id": session_id, + "task_id": task_id, + "event_id": event_id, + "tokens_in": delta_in, + "tokens_out": delta_out, + "cached_input_tokens": delta_cached, + "source_path": transcript_path, + }) if __name__ == "__main__": diff --git a/scripts/token_reconcile.py b/scripts/token_reconcile.py new file mode 100644 index 0000000..9b1bbaf --- /dev/null +++ b/scripts/token_reconcile.py @@ -0,0 +1,239 @@ +#!/usr/bin/env python3 +"""Reconcile token evidence from local agent sources against State Hub. + +Dry-run is the default. Use ``--apply`` to upsert measured source events and +``--zero-superseded-fallbacks`` to zero task fallback rows that are covered by +source-backed measurements. +""" +from __future__ import annotations + +import argparse +import json +import os +import sys +import urllib.parse +import urllib.request +from collections import Counter, defaultdict +from datetime import datetime +from pathlib import Path +from typing import Any + +ROOT = Path(__file__).resolve().parent.parent +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + +from api.services.token_sources import collect_claude_transcripts, collect_codex_sessions, parse_iso # noqa: E402 +from api.services.token_sources.attribution import repo_refs_from_api, resolve_repo # noqa: E402 + +DEFAULT_API = os.environ.get("STATE_HUB_API", "http://127.0.0.1:8000") +SUPERSEDED_HEURISTIC_NOTE = "heuristic_superseded_by_source_measurement" + + +def http_json(api_base: str, method: str, path: str, body: dict[str, Any] | None = None) -> Any: + url = f"{api_base.rstrip('/')}/{path.lstrip('/')}" + data = None + headers = {"Content-Type": "application/json"} + if body is not None: + data = json.dumps(body).encode("utf-8") + req = urllib.request.Request(url, data=data, headers=headers, method=method) + with urllib.request.urlopen(req, timeout=30) as resp: + return json.loads(resp.read() or b"null") + + +def list_events(api_base: str, params: dict[str, Any]) -> list[dict[str, Any]]: + events: list[dict[str, Any]] = [] + offset = 0 + while True: + encoded = urllib.parse.urlencode({**params, "limit": 1000, "offset": offset}) + page = http_json(api_base, "GET", f"/token-events/?{encoded}") + if not isinstance(page, list) or not page: + break + events.extend(page) + if len(page) < 1000: + break + offset += 1000 + return events + + +def find_home(explicit: str | None, env_name: str, default: Path) -> Path | None: + candidates: list[Path] = [] + if explicit: + candidates.append(Path(explicit)) + env_home = os.environ.get(env_name) + if env_home: + candidates.append(Path(env_home)) + candidates.append(default) + for candidate in candidates: + if candidate.is_dir(): + return candidate + return None + + +def event_total(event: dict[str, Any]) -> int: + return int(event.get("tokens_in") or 0) + int(event.get("tokens_out") or 0) + + +def source_index(events: list[dict[str, Any]]) -> dict[str, dict[str, Any]]: + by_source: dict[str, dict[str, Any]] = {} + for event in events: + source_id = event.get("source_id") or event.get("ref_id") + if isinstance(source_id, str): + by_source[source_id] = event + return by_source + + +def print_report(report: dict[str, Any]) -> None: + print(json.dumps(report, indent=2, sort_keys=True, default=str)) + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--since", default="2026-05-19", help="UTC date/time to reconcile from") + parser.add_argument("--api-base", default=DEFAULT_API) + parser.add_argument("--codex-home") + parser.add_argument("--claude-home") + parser.add_argument("--apply", action="store_true", help="upsert measured source events") + parser.add_argument( + "--zero-superseded-fallbacks", + action="store_true", + help="with --apply, zero heuristic fallback rows after measured source ingestion", + ) + args = parser.parse_args() + + since = parse_iso(args.since) + since_param = since.isoformat() + codex_home = find_home(args.codex_home, "CODEX_HOME", Path.home() / ".codex") + if codex_home is None: + windows_codex = Path("/mnt/c/Users/bernd.worsch/.codex") + codex_home = windows_codex if windows_codex.is_dir() else None + claude_home = find_home(args.claude_home, "CLAUDE_HOME", Path.home() / ".claude") + + records = [] + source_health: dict[str, dict[str, Any]] = {} + if codex_home: + codex_records = collect_codex_sessions(codex_home, since) + records.extend(codex_records) + source_health["codex_session"] = {"home": str(codex_home), "sessions_found": len(codex_records)} + else: + source_health["codex_session"] = {"home": None, "sessions_found": 0, "warning": "Codex home not found"} + if claude_home: + claude_records = collect_claude_transcripts(claude_home, since) + records.extend(claude_records) + source_health["claude_transcript"] = {"home": str(claude_home), "sessions_found": len(claude_records)} + else: + source_health["claude_transcript"] = {"home": None, "sessions_found": 0, "warning": "Claude home not found"} + + repos = repo_refs_from_api(http_json(args.api_base, "GET", "/repos/")) + existing_events = list_events(args.api_base, {"since": since_param, "include_superseded": "true"}) + existing_by_source = source_index(existing_events) + fallback_events = [ + event for event in existing_events + if event.get("source_provider") == "task_fallback" or event.get("note") == "heuristic" + ] + superseded_events = [ + event for event in existing_events + if event.get("measurement_kind") == "superseded" or str(event.get("note") or "").startswith("heuristic_superseded") + ] + + planned_upserts = [] + unattributed = 0 + stale = 0 + source_totals: dict[str, int] = defaultdict(int) + for record in records: + source_totals[record.source_provider] += record.tokens_total + existing = existing_by_source.get(record.source_id) + if existing and event_total(existing) >= record.tokens_total: + continue + if existing: + stale += 1 + match = resolve_repo(record.cwd, repos) + if match is None: + unattributed += 1 + planned_upserts.append((record, match)) + + source_ids = [ + event.get("source_id") + for event in existing_events + if event.get("source_id") and event.get("measurement_kind") == "measured" + ] + duplicate_sources = { + source_id: count for source_id, count in Counter(source_ids).items() if count > 1 + } + missing_provenance = [ + event for event in existing_events + if event.get("measurement_kind") == "measured" and not event.get("source_id") + ] + progress_events = http_json(args.api_base, "GET", f"/progress/?since={urllib.parse.quote(since_param)}&limit=1000") + measured_total = sum( + event_total(event) + for event in existing_events + if event.get("measurement_kind") == "measured" + ) + sum(record.tokens_total for record, _ in planned_upserts) + canary_failed = bool(progress_events) and measured_total == 0 + + report = { + "since": since.isoformat(), + "apply": args.apply, + "sources": source_health, + "sessions_found": len(records), + "source_tokens_total": dict(source_totals), + "events_existing": len(existing_events), + "events_to_upsert": len(planned_upserts), + "sessions_stale": stale, + "fallback_events": len(fallback_events), + "superseded_events": len(superseded_events), + "unattributed_source_records": unattributed, + "missing_provenance_events": len(missing_provenance), + "duplicate_source_ids": duplicate_sources, + "progress_events": len(progress_events) if isinstance(progress_events, list) else 0, + "measured_tokens_total_after_plan": measured_total, + "canary_failed": canary_failed, + } + + if args.apply: + for record, match in planned_upserts: + payload = record.to_token_event_payload(repo_id=match.repo_id if match else None) + payload["raw_metadata"] = { + **payload.get("raw_metadata", {}), + "repo_slug": match.slug if match else None, + "attribution_method": match.method if match else None, + } + http_json(args.api_base, "POST", "/token-events/upsert", payload) + if args.zero_superseded_fallbacks: + for event in fallback_events: + http_json( + args.api_base, + "PATCH", + f"/token-events/{event['id']}", + { + "tokens_in": 0, + "tokens_out": 0, + "note": SUPERSEDED_HEURISTIC_NOTE, + "measurement_kind": "superseded", + "source_provider": "task_fallback", + "confidence": 0.0, + "raw_total_tokens": 0, + }, + ) + http_json( + args.api_base, + "POST", + "/progress/", + { + "summary": ( + "Token reconciliation: " + f"{len(records)} source records, {len(planned_upserts)} upserts, " + f"{len(fallback_events)} fallback events, canary_failed={canary_failed}" + ), + "event_type": "token_reconciliation", + "author": "codex", + "detail": report, + }, + ) + + print_report(report) + return 1 if canary_failed else 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tests/test_token_events.py b/tests/test_token_events.py index 324e95e..d392373 100644 --- a/tests/test_token_events.py +++ b/tests/test_token_events.py @@ -55,8 +55,23 @@ class TestTokenEventsCreate: assert ev["tokens_in"] == 200 assert ev["tokens_out"] == 100 assert ev["tokens_total"] == 300 + assert ev["measurement_kind"] == "estimated" + assert ev["source_provider"] == "manual" + assert ev["raw_total_tokens"] == 300 assert ev["id"] is not None + async def test_create_with_created_at_backfill_timestamp(self, client): + created_at = "2026-05-19T01:02:03Z" + ev = await _post_event( + client, + tokens_in=200, + tokens_out=100, + ref_type="session", + ref_id="codex:test-session", + created_at=created_at, + ) + assert ev["created_at"].startswith("2026-05-19T01:02:03") + async def test_create_with_all_fields(self, client): await _create_domain(client) topic = await _create_topic(client) @@ -74,11 +89,76 @@ class TestTokenEventsCreate: ref_id=task["id"], note="T01 done", session_id="ses-abc", + measurement_kind="measured", + source_provider="manual", + source_id="manual:test-event", + confidence=0.95, + cached_input_tokens=10, + reasoning_output_tokens=20, + raw_total_tokens=1530, + raw_metadata={"source": "unit-test"}, ) assert ev["task_id"] == task["id"] assert ev["workstream_id"] == ws["id"] # auto-populated from task assert ev["model"] == "claude-sonnet-4-6" assert ev["tokens_total"] == 1500 + assert ev["measurement_kind"] == "measured" + assert ev["source_provider"] == "manual" + assert ev["source_id"] == "manual:test-event" + assert ev["cached_input_tokens"] == 10 + assert ev["reasoning_output_tokens"] == 20 + assert ev["token_evidence_total"] == 1530 + assert ev["raw_metadata"] == {"source": "unit-test"} + + async def test_upsert_source_event_updates_existing_session(self, client): + body = { + "tokens_in": 100, + "tokens_out": 50, + "measurement_kind": "measured", + "source_provider": "codex_session", + "source_id": "codex:abc", + "ref_type": "session", + "ref_id": "codex:abc", + "session_id": "abc", + "cached_input_tokens": 5, + } + first = await client.post("/token-events/upsert", json=body) + assert first.status_code == 200, first.text + second = await client.post("/token-events/upsert", json={**body, "tokens_in": 300, "tokens_out": 80}) + assert second.status_code == 200, second.text + assert first.json()["id"] == second.json()["id"] + assert second.json()["tokens_total"] == 380 + + listed = (await client.get("/token-events/", params={"source_provider": "codex_session"})).json() + assert len(listed) == 1 + + async def test_patch_backfill_fields(self, client): + ev = await _post_event(client, tokens_in=100, tokens_out=50) + + r = await client.patch(f"/token-events/{ev['id']}", json={ + "tokens_in": 500, + "tokens_out": 250, + "session_id": "codex-session", + "ref_type": "session", + "ref_id": "codex:session", + "created_at": "2026-05-20T01:02:03Z", + "note": "backfill:codex-session", + "measurement_kind": "measured", + "source_provider": "codex_session", + "source_id": "codex:session", + "cached_input_tokens": 10, + }) + assert r.status_code == 200 + patched = r.json() + assert patched["tokens_total"] == 750 + assert patched["session_id"] == "codex-session" + assert patched["ref_type"] == "session" + assert patched["ref_id"] == "codex:session" + assert patched["created_at"].startswith("2026-05-20T01:02:03") + assert patched["measurement_kind"] == "measured" + assert patched["source_provider"] == "codex_session" + assert patched["source_id"] == "codex:session" + assert patched["cached_input_tokens"] == 10 async def test_workstream_auto_populated_from_task(self, client): await _create_domain(client) @@ -129,6 +209,26 @@ class TestTokenEventsList: assert len(events) == 1 assert events[0]["model"] == "claude-sonnet-4-6" + async def test_filter_by_measurement_kind_and_source_provider(self, client): + await _post_event( + client, + tokens_in=100, + tokens_out=50, + measurement_kind="measured", + source_provider="codex_session", + source_id="codex:filter", + ) + await _post_event(client, tokens_in=200, tokens_out=100, note="heuristic") + + r = await client.get( + "/token-events/", + params={"measurement_kind": "measured", "source_provider": "codex_session"}, + ) + assert r.status_code == 200 + events = r.json() + assert len(events) == 1 + assert events[0]["source_id"] == "codex:filter" + @pytest.mark.asyncio class TestTokenSummary: @@ -184,6 +284,7 @@ class TestTokenSummary: s = r.json() assert s["event_count"] == 1 assert s["tokens_total"] == 75 + assert s["by_measurement_kind"]["estimated"] == 75 async def test_summary_unknown_scope_returns_422(self, client): r = await client.get("/token-events/summary/", params={"scope": "foobar", "id": "x"}) @@ -215,3 +316,32 @@ class TestTokenEventGetById: import uuid r = await client.get(f"/token-events/{uuid.uuid4()}") assert r.status_code == 404 + + +@pytest.mark.asyncio +class TestTokenAggregateAndQuality: + async def test_aggregate_and_quality_expose_evidence_breakdown(self, client): + await _post_event( + client, + tokens_in=100, + tokens_out=50, + measurement_kind="measured", + source_provider="codex_session", + source_id="codex:agg", + ) + await _post_event(client, tokens_in=1000, tokens_out=500, note="heuristic") + + agg = (await client.get("/token-events/aggregate/", params={"include_superseded": "false"})).json() + assert agg["tokens_total"] == 1650 + assert agg["by_measurement_kind"]["measured"] == 150 + assert agg["by_measurement_kind"]["estimated"] == 1500 + assert agg["by_source_provider"]["codex_session"] == 150 + assert agg["by_source_provider"]["task_fallback"] == 1500 + + measured = (await client.get("/token-events/aggregate/", params={"measurement_kind": "measured"})).json() + assert measured["tokens_total"] == 150 + + quality = (await client.get("/token-events/quality/")).json() + assert quality["measured_event_count"] == 1 + assert quality["fallback_event_count"] == 1 + assert quality["missing_provenance_event_count"] == 0 diff --git a/tests/test_token_passthrough.py b/tests/test_token_passthrough.py index 5220a50..c1c5c0a 100644 --- a/tests/test_token_passthrough.py +++ b/tests/test_token_passthrough.py @@ -66,6 +66,9 @@ class TestTokenPassthrough: assert ev["agent"] == "custodian" assert ev["workstream_id"] == ws["id"] assert ev["note"] == "measured" + assert ev["measurement_kind"] == "measured" + assert ev["source_provider"] == "manual" + assert ev["source_id"] == f"task:{task['id']}:manual" async def test_tier1_userbased_note_override(self, client): """Tier 1 with note='userbased' records that note instead of 'measured'.""" @@ -84,6 +87,7 @@ class TestTokenPassthrough: events = (await client.get("/token-events/", params={"task_id": task["id"]})).json() assert events[0]["note"] == "userbased" + assert events[0]["measurement_kind"] == "measured" async def test_tier2_workplan_prorated(self, client): """Tier 2: workplan totals prorated across 4 tasks → 250/125 each, note='workplan'.""" @@ -108,6 +112,8 @@ class TestTokenPassthrough: assert ev["tokens_in"] == 250 # 1000 // 4 assert ev["tokens_out"] == 125 # 500 // 4 assert ev["note"] == "workplan" + assert ev["measurement_kind"] == "allocated" + assert ev["raw_metadata"]["allocation_method"] == "workplan_prorated" async def test_tier3_heuristic_fallback(self, client): """Tier 3: status=done with no token args → heuristic 1000/500, note='heuristic'.""" @@ -125,6 +131,40 @@ class TestTokenPassthrough: assert ev["tokens_in"] == 1000 assert ev["tokens_out"] == 500 assert ev["note"] == "heuristic" + assert ev["measurement_kind"] == "estimated" + assert ev["source_provider"] == "task_fallback" + + async def test_suppress_token_event_skips_done_fallback(self, client): + """File/cache sync can mark a task done without minting a heuristic event.""" + await _create_domain(client) + topic = await _create_topic(client) + ws = await _create_workstream(client, topic["id"]) + task = await _create_task(client, ws["id"]) + + r = await client.patch(f"/tasks/{task['id']}", json={ + "status": "done", + "suppress_token_event": True, + }) + assert r.status_code == 200 + assert r.json()["status"] == "done" + + events = (await client.get("/token-events/", params={"task_id": task["id"]})).json() + assert events == [] + + async def test_repeated_done_update_does_not_duplicate_event(self, client): + """Only the transition into done records token usage.""" + await _create_domain(client) + topic = await _create_topic(client) + ws = await _create_workstream(client, topic["id"]) + task = await _create_task(client, ws["id"]) + + r = await client.patch(f"/tasks/{task['id']}", json={"status": "done"}) + assert r.status_code == 200 + r = await client.patch(f"/tasks/{task['id']}", json={"status": "done"}) + assert r.status_code == 200 + + events = (await client.get("/token-events/", params={"task_id": task["id"]})).json() + assert len(events) == 1 async def test_non_done_status_creates_no_event(self, client): """Non-done status updates never create a token event.""" diff --git a/tests/test_token_sources.py b/tests/test_token_sources.py new file mode 100644 index 0000000..c30edf9 --- /dev/null +++ b/tests/test_token_sources.py @@ -0,0 +1,139 @@ +from __future__ import annotations + +import json + +from api.services.token_sources import parse_iso +from api.services.token_sources.attribution import RepoRef, normalise_cwd, resolve_repo +from api.services.token_sources.claude import parse_claude_transcript +from api.services.token_sources.codex import collect_codex_sessions, parse_codex_session + + +def _write_jsonl(path, rows): + path.parent.mkdir(parents=True, exist_ok=True) + with path.open("w", encoding="utf-8") as handle: + for row in rows: + if row == "BAD": + handle.write("{not json}\n") + else: + handle.write(json.dumps(row) + "\n") + + +def test_parse_codex_session_sums_token_count_records(tmp_path): + path = tmp_path / "sessions" / "2026" / "05" / "23" / "rollout-local.jsonl" + _write_jsonl( + path, + [ + {"type": "session_meta", "payload": {"id": "s1", "cwd": "/repo", "timestamp": "2026-05-23T00:00:00Z"}}, + {"type": "turn_context", "payload": {"cwd": "/repo", "model": "gpt-5.3-codex"}}, + { + "type": "event_msg", + "timestamp": "2026-05-22T23:00:00Z", + "payload": {"type": "token_count", "info": {"last_token_usage": {"input_tokens": 99, "output_tokens": 1}}}, + }, + "BAD", + { + "type": "event_msg", + "timestamp": "2026-05-23T01:00:00Z", + "payload": { + "type": "token_count", + "info": { + "last_token_usage": { + "input_tokens": 100, + "output_tokens": 40, + "cached_input_tokens": 15, + "reasoning_output_tokens": 7, + "total_tokens": 155, + } + }, + }, + }, + ], + ) + + record = parse_codex_session(path, parse_iso("2026-05-23")) + + assert record is not None + assert record.source_id == "codex:s1" + assert record.tokens_in == 100 + assert record.tokens_out == 40 + assert record.cached_input_tokens == 15 + assert record.reasoning_output_tokens == 7 + assert record.raw_total_tokens == 155 + assert record.raw_metadata["malformed_lines"] == 1 + + +def test_collect_codex_sessions_dedupes_archived_and_live(tmp_path): + live = tmp_path / "sessions" / "2026" / "05" / "23" / "rollout-live.jsonl" + archived = tmp_path / "archived_sessions" / "rollout-archived.jsonl" + rows = [ + {"type": "session_meta", "payload": {"id": "same", "cwd": "/repo", "timestamp": "2026-05-23T00:00:00Z"}}, + { + "type": "event_msg", + "timestamp": "2026-05-23T01:00:00Z", + "payload": {"type": "token_count", "info": {"last_token_usage": {"input_tokens": 10, "output_tokens": 5}}}, + }, + ] + _write_jsonl(live, rows) + _write_jsonl( + archived, + rows + [ + { + "type": "event_msg", + "timestamp": "2026-05-23T02:00:00Z", + "payload": {"type": "token_count", "info": {"last_token_usage": {"input_tokens": 20, "output_tokens": 5}}}, + } + ], + ) + + records = collect_codex_sessions(tmp_path, parse_iso("2026-05-23")) + + assert len(records) == 1 + assert records[0].source_id == "codex:same" + assert records[0].tokens_total == 40 + + +def test_parse_claude_transcript_sums_usage_without_content(tmp_path): + path = tmp_path / "projects" / "repo" / "session.jsonl" + _write_jsonl( + path, + [ + { + "timestamp": "2026-05-23T01:00:00Z", + "session_id": "c1", + "cwd": "/repo", + "message": { + "model": "claude-sonnet", + "content": "do not store me", + "usage": { + "input_tokens": 30, + "cache_creation_input_tokens": 5, + "cache_read_input_tokens": 7, + "output_tokens": 11, + }, + }, + } + ], + ) + + record = parse_claude_transcript(path, parse_iso("2026-05-23")) + + assert record is not None + assert record.source_id == "claude:c1" + assert record.tokens_in == 30 + assert record.cached_input_tokens == 12 + assert record.tokens_out == 11 + assert "content" not in record.raw_metadata + + +def test_resolve_repo_uses_normalised_path_prefix(): + refs = [ + RepoRef(repo_id="1", slug="state-hub", local_path="/home/worsch/state-hub"), + RepoRef(repo_id="2", slug="other", local_path="/home/worsch/other"), + ] + + match = resolve_repo("//wsl.localhost/Ubuntu-24.04/home/worsch/state-hub/api", refs) + + assert normalise_cwd("//wsl.localhost/Ubuntu-24.04/home/worsch/state-hub") == "/home/worsch/state-hub" + assert match is not None + assert match.repo_id == "1" + assert match.method == "path_prefix" diff --git a/workplans/CUST-WP-0012-multi-user-onboarding.md b/workplans/CUST-WP-0012-multi-user-onboarding.md index a754d8b..bb7c346 100644 --- a/workplans/CUST-WP-0012-multi-user-onboarding.md +++ b/workplans/CUST-WP-0012-multi-user-onboarding.md @@ -4,12 +4,12 @@ type: workplan title: "Multi-User Onboarding and Environment Bootstrap" domain: custodian repo: state-hub -status: active +status: finished owner: custodian topic_slug: custodian state_hub_workstream_id: "a28d9e29-4119-4b73-9469-f921920253ef" created: "2026-03-11" -updated: "2026-05-17" +updated: "2026-05-23" --- # Multi-User Onboarding and Environment Bootstrap @@ -51,7 +51,7 @@ Two personas: ```task id: CUST-WP-0012-T01 state_hub_task_id: 71628269-9a75-4dae-a347-e64a86040322 -status: todo +status: done priority: medium ``` @@ -79,6 +79,12 @@ git config --global credential.helper 'cache --timeout=3600' **Done when:** included in bootstrap script; push to Gitea works without re-entering credentials on second attempt. +**Implemented 2026-05-23:** `scripts/bootstrap-env.sh` configures a global +credential helper when one is not already present. It prefers `libsecret`, uses +`cache --timeout=3600` as the safe automatic fallback, and supports explicit +headless plaintext storage via `--git-helper store --allow-plaintext-store`. +`docs/onboarding.md` documents the tradeoffs. + --- ### T02 — SSH key generation and authorization automation @@ -86,7 +92,7 @@ re-entering credentials on second attempt. ```task id: CUST-WP-0012-T02 state_hub_task_id: fea965e9-8a8f-439c-9096-8f7756eb71ed -status: todo +status: done priority: medium ``` @@ -110,6 +116,11 @@ ssh-copy-id -i ~/.ssh/id_ed25519.pub tegwick@92.205.130.254 **Done when:** included in bootstrap script; documented in onboarding guide. +**Implemented 2026-05-23:** `scripts/bootstrap-env.sh` generates +`~/.ssh/id_ed25519` if missing, prints the public key, and can run +`ssh-copy-id` for Railiance01 and CoulombCore with `--authorize-ssh`. +`docs/onboarding.md` documents the operator and collaborator path. + --- ### T03 — Claude Code MCP registration automation @@ -117,7 +128,7 @@ ssh-copy-id -i ~/.ssh/id_ed25519.pub tegwick@92.205.130.254 ```task id: CUST-WP-0012-T03 state_hub_task_id: 60318e9a-972e-45c8-afde-82ed0625f594 -status: todo +status: done priority: medium ``` @@ -132,10 +143,10 @@ make register-mcp # idempotent; safe to re-run The script should: 1. Detect whether `state-hub` is already in `~/.claude.json` -2. Extract the server config from `.mcp.json` +2. Use the current SSE MCP config (`http://127.0.0.1:8001/sse` locally or + `http://127.0.0.1:18001/sse` through ops-bridge) 3. Run `claude mcp add-json -s user state-hub ` -4. Run `patch_mcp_cwd.py` to restore the cwd field -5. Print instructions to restart Claude Code +4. Print instructions to restart Claude Code Should also detect whether the state hub is reachable directly (`http://127.0.0.1:8000`) or needs a tunnel (via ops-bridge), and emit @@ -144,6 +155,12 @@ a warning if neither is available. **Done when:** `make register-mcp` works on a clean machine; documented in onboarding guide. +**Implemented 2026-05-23:** `scripts/register-mcp.sh` and the +`make register-mcp` target register the current SSE MCP transport +idempotently. The script detects local/tunnel reachability, supports +`MCP_URL`, `API_BASE`, and `DRY_RUN=1`, and documents the old `.mcp.json` cwd +patch path as legacy. + --- ### T04 — Environment bootstrap script @@ -151,7 +168,7 @@ in onboarding guide. ```task id: CUST-WP-0012-T04 state_hub_task_id: 84a94761-e424-4470-a9a2-64d9cabadb7f -status: todo +status: done priority: high ``` @@ -176,6 +193,11 @@ Design constraints: **Done when:** running the script on a clean Ubuntu 24.04 machine produces a working Custodian environment with no additional manual steps. +**Implemented 2026-05-23:** `scripts/bootstrap-env.sh` and +`make bootstrap-env` provide the idempotent entrypoint. It supports dry-run, +non-interactive mode, optional apt package installation, SSH authorization, +Gitea token prompting, MCP registration, and State Hub health checks. + --- ### T05 — Onboarding guide and user journey documentation @@ -183,7 +205,7 @@ produces a working Custodian environment with no additional manual steps. ```task id: CUST-WP-0012-T05 state_hub_task_id: b0839802-659a-475b-8b84-ab7341ea3d15 -status: todo +status: done priority: medium ``` @@ -208,6 +230,10 @@ for both personas: **Done when:** a new collaborator can follow the guide without clarification from the primary operator. +**Implemented 2026-05-23:** `docs/onboarding.md` covers primary operator and +domain collaborator journeys, including SSH, Gitea token file, credential +helper choices, MCP registration, tunnel setup, and verification checks. + --- ### T06 — State Hub multi-user model (deferred) @@ -215,7 +241,7 @@ clarification from the primary operator. ```task id: CUST-WP-0012-T06 state_hub_task_id: d5df3302-67b9-4765-a8d8-ea2df53dff6e -status: todo +status: done priority: low ``` @@ -235,6 +261,11 @@ domain) or rely on Gitea repo permissions as the authoritative boundary Implement T01–T05 first; multi-user access control is only needed when there is more than one user. +**Implemented 2026-05-23:** `docs/multi-user-access-model.md` records the +current decision: repo permissions, SSH access, tunnels, and OpenBao remain the +authoritative boundaries for this phase; State Hub API auth is deferred until a +real second-user or exposed-deployment trigger exists. + --- ## References diff --git a/workplans/STATE-WP-0045-token-measurement-accuracy.md b/workplans/STATE-WP-0045-token-measurement-accuracy.md new file mode 100644 index 0000000..77f06f7 --- /dev/null +++ b/workplans/STATE-WP-0045-token-measurement-accuracy.md @@ -0,0 +1,310 @@ +--- +id: STATE-WP-0045 +type: workplan +title: "Token Measurement Accuracy and Resilience" +domain: custodian +repo: state-hub +status: finished +owner: codex +topic_slug: custodian +created: "2026-05-23" +updated: "2026-05-23" +state_hub_workstream_id: "0aefe379-c182-4471-84dd-c136d5e1206b" +--- + +# Token Measurement Accuracy and Resilience + +## Summary + +Make State Hub token tracking accurate enough to trust for daily operations and +robust enough to survive agent/tool changes. + +The May 19 flatline showed the current weak spots: token events mixed measured +usage, task-completion fallbacks, and file-sync side effects in the same table; +Claude measurement depended on one hook path; Codex usage lived in local session +logs until a manual backfill; and the dashboard treated every token event as the +same quality of evidence. The immediate fix restored Codex session totals and +suppressed sync-generated fallback events, but the system still needs a durable +measurement model, idempotent source adapters, reconciliation checks, and a +dashboard that exposes provenance and confidence. + +## Current Findings + +- `token_events` stores counts, associations, free-text notes, and timestamps, + but not structured provenance such as source system, source event id, parser + version, raw token categories, confidence, or whether the row is measured, + allocated, estimated, or superseded. +- `PATCH /tasks/{id}` can still create heuristic token events on a transition to + `done`. That fallback is useful as a temporary operational signal, but it is + not a measurement and should not be blended into measured totals. +- `fix-consistency` now suppresses token events while syncing file-backed task + status, but this is a narrow guard. Other bulk sync, import, and migration + paths need the same invariant. +- Codex Desktop session logs contain structured `token_count` events with + `last_token_usage`, `total_token_usage`, cached-input counts, and reasoning + output counts. The new backfill script can restore these, but it is not yet a + scheduled or monitored ingestion path. +- Claude Code measurement currently depends on `scripts/task_token_hook.py` + firing after one MCP tool name. It uses per-session state in `/tmp`, so missed + hooks, restarts, renamed tools, and non-MCP REST paths can silently degrade to + fallback events. +- Repository attribution for Codex backfill is path-based. This is good enough + for the emergency restore, but long-term attribution should prefer registered + repo fingerprints/remotes and then fall back to paths. +- The Token Cost dashboard currently aggregates all events returned by + `/token-events/?limit=1000`; it does not show measurement quality, source, + superseded rows, ingestion freshness, or possible gaps. + +## Out of Scope + +- Exact billing reconciliation against vendor invoices. +- Capturing private transcript content in State Hub. +- Replacing existing task/workstream/repo relationships. +- Implementing every provider-specific parser in one pass. The first pass should + cover Codex Desktop and Claude Code, with a documented adapter contract for + others. + +## T01 - Define Token Evidence Model + +```task +id: STATE-WP-0045-T01 +status: done +priority: high +state_hub_task_id: "29aed6d9-40aa-40fc-9e9a-3eb3e6f985bc" +``` + +Define a structured model that separates measured usage from allocated, +estimated, and superseded rows. + +Implementation notes: + +- Add a short design note or ADR section covering token event semantics. +- Define measurement classes such as `measured`, `allocated`, `estimated`, and + `superseded`. +- Define source classes such as `codex_session`, `claude_transcript`, + `llm_connect`, `manual`, and `task_fallback`. +- Define structured provenance fields: source system, source id, source path or + URI, source timestamp, parser version, ingestion timestamp, and confidence. +- Decide how to represent raw token categories: input, cached input, output, + reasoning output, and provider total. +- Decide whether cached input should be included in default totals or shown as a + separate metric. Preserve enough fields to support both views. +- Replace free-text note taxonomy as the primary quality signal. Notes can + remain for human context, but dashboards and APIs should rely on structured + fields. + +Done when the repo has a reviewed token evidence contract and the follow-on +schema/API tasks can implement it without ambiguity. + +## T02 - Add Provenance Schema and Idempotent Upsert API + +```task +id: STATE-WP-0045-T02 +status: done +priority: high +state_hub_task_id: "ade2bd40-343c-4829-ba4f-44bc8b7cbef9" +``` + +Extend token storage so source-derived events can be written repeatedly without +duplicates and without losing provenance. + +Implementation notes: + +- Add migration fields for the evidence model from T01. Candidate fields: + `measurement_kind`, `source_provider`, `source_id`, `source_path`, + `source_created_at`, `ingested_at`, `parser_version`, `confidence`, + `cached_input_tokens`, `reasoning_output_tokens`, `raw_total_tokens`, + `cost_estimated_usd`, and `raw_metadata`. +- Add a unique constraint or partial unique index that prevents duplicate + measured source rows. For example: source provider plus source id, scoped by + measurement kind. +- Provide an upsert endpoint or make `POST /token-events/` support an explicit + idempotency key. The behavior should update a growing live session rather than + creating a second row. +- Keep backward compatibility for existing clients that only post + `tokens_in`/`tokens_out`, but classify those rows explicitly. +- Update schemas, router tests, and migration tests. + +Done when source-backed token events can be inserted or updated idempotently and +legacy callers continue to work. + +## T03 - Build Reusable Token Source Adapters + +```task +id: STATE-WP-0045-T03 +status: done +priority: high +state_hub_task_id: "3844fb70-4ceb-4f90-9894-d4845970f0a6" +``` + +Move source-specific parsing out of one-off scripts and hooks into reusable, +tested adapter modules. + +Implementation notes: + +- Add an `api/services/token_sources/` package or equivalent service layer. +- Implement a Codex Desktop adapter for `.codex/sessions/**` and + `.codex/archived_sessions/**`. +- Implement a Claude Code adapter for `.claude/projects/**/*.jsonl` that reads + usage metadata without storing transcript text. +- Provide a common adapter result type with source id, timestamps, token + categories, model, agent, cwd/path context, and raw parser metadata. +- Make parsing safe by default: no conversation text in logs, progress events, + token notes, or API payloads. +- Add fixtures with synthetic Codex and Claude session records that cover live + sessions, archived sessions, duplicate files, malformed JSONL, resets, and + missing usage records. +- Keep `scripts/backfill_codex_token_events.py` as a thin CLI over the reusable + service or replace it with a new unified CLI. + +Done when Codex and Claude token sources have deterministic parser tests and a +shared ingestion interface. + +## T04 - Improve Repo, Workstream, and Task Attribution + +```task +id: STATE-WP-0045-T04 +status: done +priority: high +state_hub_task_id: "d78b36ea-2a1a-40d6-bd83-03d48ff2ad9b" +``` + +Make attribution accurate without relying solely on local path string matching. + +Implementation notes: + +- Resolve repo attribution by git root fingerprint and remote URL when possible, + then fall back to registered host paths. +- Handle duplicate local paths or alias repos explicitly, especially where one + checkout is registered under multiple slugs. +- Attribute session-level usage to repo first, then optionally to workstreams or + tasks when there is strong evidence. +- Define task allocation rules that do not change measured session totals. For + example, produce `allocated` child rows from measured session rows using task + completion timestamps, tool-call metadata, or explicit operator input. +- Record the allocation method and confidence for every task-level allocation. +- Avoid minting task-level heuristic rows automatically for bulk import, status + sync, migration, and consistency tooling. + +Done when measured session totals are stable and task/workstream attribution is +explicitly either measured, allocated, or estimated. + +## T05 - Add Reconciliation, Gap Detection, and Backfill Operations + +```task +id: STATE-WP-0045-T05 +status: done +priority: high +state_hub_task_id: "efaa2629-4f9a-439c-b0a3-85d77b03580f" +``` + +Add an operator-safe reconciliation command that detects flatlines, duplicate +rows, stale ingestion, and fallback leakage. + +Implementation notes: + +- Add a command such as `make token-reconcile` or + `python scripts/token_reconcile.py --since `. +- Report sessions found, sessions ingested, sessions stale, duplicate source + ids, fallback events, superseded rows, unattributed sessions, and rows missing + structured provenance. +- Support `--dry-run` by default and `--apply` for writes. +- Include an explicit `--zero-superseded-fallbacks` or equivalent flag rather + than silently editing historical rows. +- Store reconciliation summaries as progress events or report files without + including transcript content. +- Add a canary threshold: alert or fail when measured token volume is zero while + task/progress activity exists for the same window. + +Done when an operator can run one command to verify token tracking health and +perform safe, idempotent backfills. + +## T06 - Harden Hooks and Runtime Integration + +```task +id: STATE-WP-0045-T06 +status: done +priority: medium +state_hub_task_id: "5fd99241-e6dd-4ca6-8c58-a0048f08f0ca" +``` + +Make token collection survive hook misses, tool renames, restarts, and multiple +agent runtimes. + +Implementation notes: + +- Update Claude hook handling so it can match supported task completion paths, + not just one exact MCP tool name. +- Persist hook high-water marks in a durable State Hub or repo-local location + instead of only `/tmp`. +- Add hook health logging that records when a hook ran, what source id it + processed, and whether it patched or skipped a token event. +- Add a Codex ingestion path that can run on demand and from a schedule without + requiring manual script execution. +- Document required environment variables and path discovery for Windows, WSL, + and remote Linux hosts. +- Ensure failures degrade to visible `estimated` events or health warnings, not + silent flatlines. + +Done when missing or stale token ingestion becomes visible within one reporting +window and can be recovered without ad hoc inspection. + +## T07 - Upgrade Token APIs and Dashboard Quality Signals + +```task +id: STATE-WP-0045-T07 +status: done +priority: medium +state_hub_task_id: "ecaf6ff8-59aa-4c56-8163-125dc96b2068" +``` + +Expose token quality, source, and freshness in APIs and dashboard views. + +Implementation notes: + +- Add API filters for measurement kind, source provider, repo, time range, + superseded rows, and unattributed rows. +- Replace the hard dashboard dependence on `/token-events/?limit=1000` with + paginated or pre-aggregated endpoints that support time windows. +- Add dashboard controls for measured-only, include allocated, include + estimates, and show superseded rows. +- Show ingestion freshness: last Codex session ingested, last Claude transcript + ingested, and last reconciliation run. +- Add a data-quality section listing fallback events, unattributed measured + sessions, duplicate source ids, and days with progress/task activity but zero + measured tokens. +- Update the Token Cost page and docs so operators know which numbers are + measured versus inferred. + +Done when the dashboard no longer presents fallback, allocated, and measured +usage as indistinguishable totals. + +## T08 - Verification and Migration Playbook + +```task +id: STATE-WP-0045-T08 +status: done +priority: medium +state_hub_task_id: "61baff79-832e-45f8-80f3-106abe262096" +``` + +Cover the new measurement system with tests and a safe rollout plan. + +Implementation notes: + +- Add unit tests for the evidence model, source adapters, source-id + deduplication, repo attribution, and task allocation. +- Add router tests for idempotent upsert, source filters, measurement-kind + filters, created-at preservation, and backwards-compatible legacy posts. +- Add reconciliation tests with synthetic pre-May-19 and post-May-19 flatline + scenarios. +- Add dashboard/data-loader tests or fixture checks for quality filters and + aggregate counts. +- Write a migration playbook covering old heuristic rows, existing + `backfill:codex-session` rows, and any rows without structured provenance. +- Verify the full suite and run a dry-run reconciliation before marking this + workplan finished. + +Done when the improved token measurement path has automated coverage, an +operator playbook, and a dry-run reconciliation report showing no hidden +fallback leakage.