generated from coulomb/repo-seed
Fixed and improved token tracking
This commit is contained in:
25
Makefile
25
Makefile
@@ -1,6 +1,7 @@
|
||||
.PHONY: install install-cli dashboard-install dashboard-check db db-tools migrate seed api dashboard check test test-python clean register-project register-codex-project validate-adr add-domain rename-domain add-repo list-repos register-path cleanup-stale tunnels-up tunnels-status tunnels-check bridges install-hooks install-hooks-all gitea-inventory
|
||||
.PHONY: install install-cli dashboard-install dashboard-check db db-tools migrate seed api dashboard check test test-python clean register-project register-codex-project register-mcp bootstrap-env validate-adr add-domain rename-domain add-repo list-repos register-path cleanup-stale tunnels-up tunnels-status tunnels-check bridges install-hooks install-hooks-all gitea-inventory token-reconcile
|
||||
|
||||
COMPOSE = docker compose -f infra/docker-compose.yml --env-file .env
|
||||
PYTHON ?= python3
|
||||
|
||||
start:
|
||||
@echo "# run in different terminals"
|
||||
@@ -111,6 +112,17 @@ register-codex-project:
|
||||
@test -n "$(PROJECT_PATH)" || (echo "ERROR: PROJECT_PATH is required."; exit 1)
|
||||
scripts/register_project.sh "$(DOMAIN)" "$(PROJECT_PATH)" --codex
|
||||
|
||||
## Register State Hub MCP for Claude Code. Optional: make register-mcp MCP_URL=http://127.0.0.1:18001/sse
|
||||
register-mcp:
|
||||
scripts/register-mcp.sh \
|
||||
$(if $(MCP_URL),--url "$(MCP_URL)",) \
|
||||
$(if $(API_BASE),--api-base "$(API_BASE)",) \
|
||||
$(if $(DRY_RUN),--dry-run,)
|
||||
|
||||
## Bootstrap a new operator/collaborator environment. Optional: make bootstrap-env ARGS="--install-missing"
|
||||
bootstrap-env:
|
||||
scripts/bootstrap-env.sh $(ARGS)
|
||||
|
||||
## Add a second repo to an existing domain: make add-repo DOMAIN=railiance REPO_PATH=/home/worsch/railiance-infra
|
||||
add-repo:
|
||||
@test -n "$(DOMAIN)" || (echo "ERROR: DOMAIN is required."; exit 1)
|
||||
@@ -229,6 +241,17 @@ fix-consistency:
|
||||
$(if $(REPO_PATH),--repo-path "$(REPO_PATH)",); \
|
||||
e=$$?; [ $$e -eq 2 ] && exit 0 || exit $$e
|
||||
|
||||
## Reconcile measured token sources against State Hub.
|
||||
## Usage: make token-reconcile [SINCE=2026-05-19] [APPLY=1] [ZERO_FALLBACKS=1]
|
||||
token-reconcile:
|
||||
$(PYTHON) scripts/token_reconcile.py \
|
||||
$(if $(SINCE),--since "$(SINCE)",) \
|
||||
$(if $(API_BASE),--api-base "$(API_BASE)",) \
|
||||
$(if $(CODEX_HOME),--codex-home "$(CODEX_HOME)",) \
|
||||
$(if $(CLAUDE_HOME),--claude-home "$(CLAUDE_HOME)",) \
|
||||
$(if $(APPLY),--apply,) \
|
||||
$(if $(ZERO_FALLBACKS),--zero-superseded-fallbacks,)
|
||||
|
||||
## Pull then fix: single repo or all repos if REPO omitted
|
||||
## make fix-consistency-remote — smart pull+fix all repos that need it
|
||||
## make fix-consistency-remote REPO=slug — pull+fix one repo
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
|
||||
from sqlalchemy import DateTime, ForeignKey, Integer, Text, func
|
||||
from sqlalchemy.dialects.postgresql import UUID
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import DateTime, Float, ForeignKey, Integer, Text, UniqueConstraint, func
|
||||
from sqlalchemy.dialects.postgresql import JSONB, UUID
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from api.models.base import Base, new_uuid
|
||||
@@ -10,6 +12,14 @@ from api.models.base import Base, new_uuid
|
||||
|
||||
class TokenEvent(Base):
|
||||
__tablename__ = "token_events"
|
||||
__table_args__ = (
|
||||
UniqueConstraint(
|
||||
"measurement_kind",
|
||||
"source_provider",
|
||||
"source_id",
|
||||
name="uq_token_events_source_identity",
|
||||
),
|
||||
)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True), primary_key=True, default=new_uuid
|
||||
@@ -31,6 +41,35 @@ class TokenEvent(Base):
|
||||
ref_type: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
ref_id: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
note: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
measurement_kind: Mapped[str] = mapped_column(
|
||||
Text, nullable=False, default="estimated", server_default="estimated", index=True
|
||||
)
|
||||
source_provider: Mapped[str] = mapped_column(
|
||||
Text, nullable=False, default="manual", server_default="manual", index=True
|
||||
)
|
||||
source_id: Mapped[str | None] = mapped_column(Text, nullable=True, index=True)
|
||||
source_path: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
source_created_at: Mapped[datetime | None] = mapped_column(
|
||||
DateTime(timezone=True), nullable=True, index=True
|
||||
)
|
||||
ingested_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), server_default=func.now(), nullable=False, index=True
|
||||
)
|
||||
parser_version: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
confidence: Mapped[float] = mapped_column(
|
||||
Float, nullable=False, default=0.35, server_default="0.35"
|
||||
)
|
||||
cached_input_tokens: Mapped[int] = mapped_column(
|
||||
Integer, nullable=False, default=0, server_default="0"
|
||||
)
|
||||
reasoning_output_tokens: Mapped[int] = mapped_column(
|
||||
Integer, nullable=False, default=0, server_default="0"
|
||||
)
|
||||
raw_total_tokens: Mapped[int | None] = mapped_column(Integer, nullable=True)
|
||||
cost_estimated_usd: Mapped[float | None] = mapped_column(Float, nullable=True)
|
||||
raw_metadata: Mapped[dict[str, Any]] = mapped_column(
|
||||
JSONB, nullable=False, default=dict, server_default="{}"
|
||||
)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), server_default=func.now(), nullable=False, index=True
|
||||
)
|
||||
|
||||
@@ -75,23 +75,47 @@ async def update_task(
|
||||
if task is None:
|
||||
raise HTTPException(status_code=404, detail="Task not found")
|
||||
|
||||
previous_status = task.status.value
|
||||
|
||||
# Separate token fields from task fields
|
||||
token_field_names = {"tokens_in", "tokens_out", "workplan_tokens_in", "workplan_tokens_out", "token_note", "model", "agent", "session_id"}
|
||||
token_field_names = {
|
||||
"tokens_in",
|
||||
"tokens_out",
|
||||
"workplan_tokens_in",
|
||||
"workplan_tokens_out",
|
||||
"token_note",
|
||||
"model",
|
||||
"agent",
|
||||
"session_id",
|
||||
"suppress_token_event",
|
||||
}
|
||||
update_data = body.model_dump(exclude_unset=True)
|
||||
token_data = {k: update_data.pop(k) for k in list(update_data.keys()) if k in token_field_names}
|
||||
suppress_token_event = bool(token_data.pop("suppress_token_event", False))
|
||||
|
||||
for field, value in update_data.items():
|
||||
setattr(task, field, value)
|
||||
await session.commit()
|
||||
await session.refresh(task)
|
||||
|
||||
# Token event — three-tier logic, only when marking done
|
||||
if update_data.get("status") == "done":
|
||||
# Token event — three-tier logic, only for an intentional transition to done.
|
||||
status_update = update_data.get("status")
|
||||
new_status = status_update.value if hasattr(status_update, "value") else status_update
|
||||
if (
|
||||
new_status == "done"
|
||||
and previous_status != "done"
|
||||
and not suppress_token_event
|
||||
):
|
||||
if "tokens_in" in token_data and "tokens_out" in token_data:
|
||||
# Tier 1: exact counts — default note "measured"; caller may override with token_note
|
||||
tin = token_data["tokens_in"]
|
||||
tout = token_data["tokens_out"]
|
||||
tnote = token_data.get("token_note") or "measured"
|
||||
measurement_kind = "measured"
|
||||
source_provider = "manual"
|
||||
confidence = 1.0
|
||||
source_id = f"task:{task_id}:manual"
|
||||
raw_metadata = {"input_source": "task_status_patch"}
|
||||
elif "workplan_tokens_in" in token_data and "workplan_tokens_out" in token_data:
|
||||
# Tier 2: prorate workplan total across task count
|
||||
count_result = await session.execute(
|
||||
@@ -101,9 +125,24 @@ async def update_task(
|
||||
tin = token_data["workplan_tokens_in"] // task_count
|
||||
tout = token_data["workplan_tokens_out"] // task_count
|
||||
tnote = "workplan"
|
||||
measurement_kind = "allocated"
|
||||
source_provider = "manual"
|
||||
confidence = 0.7
|
||||
source_id = f"task:{task_id}:workplan-allocation"
|
||||
raw_metadata = {
|
||||
"allocation_method": "workplan_prorated",
|
||||
"workplan_tokens_in": token_data["workplan_tokens_in"],
|
||||
"workplan_tokens_out": token_data["workplan_tokens_out"],
|
||||
"task_count": task_count,
|
||||
}
|
||||
else:
|
||||
# Tier 3: heuristic fallback
|
||||
tin, tout, tnote = 1000, 500, "heuristic"
|
||||
measurement_kind = "estimated"
|
||||
source_provider = "task_fallback"
|
||||
confidence = 0.35
|
||||
source_id = f"task:{task_id}:heuristic"
|
||||
raw_metadata = {"estimation_method": "fixed_task_done_fallback"}
|
||||
|
||||
# Resolve repo_id via workstream
|
||||
ws = await session.get(Workstream, task.workstream_id)
|
||||
@@ -121,6 +160,12 @@ async def update_task(
|
||||
ref_type="task",
|
||||
ref_id=str(task_id),
|
||||
note=tnote,
|
||||
measurement_kind=measurement_kind,
|
||||
source_provider=source_provider,
|
||||
source_id=source_id,
|
||||
confidence=confidence,
|
||||
raw_total_tokens=tin + tout,
|
||||
raw_metadata=raw_metadata,
|
||||
)
|
||||
session.add(event)
|
||||
await session.commit()
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
import uuid
|
||||
from collections import defaultdict
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, status
|
||||
from sqlalchemy import select
|
||||
@@ -10,18 +12,95 @@ from api.models.managed_repo import ManagedRepo
|
||||
from api.models.task import Task
|
||||
from api.models.token_event import TokenEvent
|
||||
from api.models.workstream import Workstream
|
||||
from api.schemas.token_event import RepoTokenSummary, TokenEventCreate, TokenEventPatch, TokenEventRead, TokenSummary
|
||||
from api.schemas.token_event import (
|
||||
RepoTokenSummary,
|
||||
TokenAggregateRow,
|
||||
TokenAggregateSummary,
|
||||
TokenEventCreate,
|
||||
TokenEventPatch,
|
||||
TokenEventRead,
|
||||
TokenQualitySummary,
|
||||
TokenSummary,
|
||||
)
|
||||
|
||||
router = APIRouter(prefix="/token-events", tags=["token-events"])
|
||||
|
||||
DEFAULT_CONFIDENCE = {
|
||||
"measured": 1.0,
|
||||
"allocated": 0.70,
|
||||
"estimated": 0.35,
|
||||
"superseded": 0.0,
|
||||
}
|
||||
|
||||
@router.post("/", response_model=TokenEventRead, status_code=status.HTTP_201_CREATED)
|
||||
async def create_token_event(
|
||||
body: TokenEventCreate,
|
||||
session: AsyncSession = Depends(get_session),
|
||||
) -> TokenEvent:
|
||||
data = body.model_dump()
|
||||
SOURCE_PARSER_DEFAULTS = {
|
||||
"codex_session": "codex-desktop-v1",
|
||||
"claude_transcript": "claude-transcript-v1",
|
||||
"llm_connect": "llm-connect-v1",
|
||||
}
|
||||
|
||||
|
||||
def _event_total(event: TokenEvent) -> int:
|
||||
return event.tokens_in + event.tokens_out
|
||||
|
||||
|
||||
def _infer_measurement_kind(data: dict[str, Any]) -> str:
|
||||
if data.get("measurement_kind"):
|
||||
return str(data["measurement_kind"])
|
||||
note = data.get("note")
|
||||
if note == "heuristic_superseded_by_codex_backfill":
|
||||
return "superseded"
|
||||
if note == "workplan":
|
||||
return "allocated"
|
||||
if note == "heuristic":
|
||||
return "estimated"
|
||||
if note == "measured" or str(note or "").startswith("backfill:codex-session"):
|
||||
return "measured"
|
||||
provider = data.get("source_provider")
|
||||
if provider in {"codex_session", "claude_transcript", "llm_connect"}:
|
||||
return "measured"
|
||||
return "estimated"
|
||||
|
||||
|
||||
def _infer_source_provider(data: dict[str, Any], measurement_kind: str) -> str:
|
||||
if data.get("source_provider"):
|
||||
return str(data["source_provider"])
|
||||
note = data.get("note")
|
||||
ref_id = str(data.get("ref_id") or "")
|
||||
agent = str(data.get("agent") or "").lower()
|
||||
if note == "heuristic":
|
||||
return "task_fallback"
|
||||
if ref_id.startswith("codex:") or str(note or "").startswith("backfill:codex-session"):
|
||||
return "codex_session"
|
||||
if measurement_kind == "measured" and "claude" in agent:
|
||||
return "claude_transcript"
|
||||
return "manual"
|
||||
|
||||
|
||||
def _apply_event_defaults(data: dict[str, Any]) -> dict[str, Any]:
|
||||
measurement_kind = _infer_measurement_kind(data)
|
||||
source_provider = _infer_source_provider(data, measurement_kind)
|
||||
data["measurement_kind"] = measurement_kind
|
||||
data["source_provider"] = source_provider
|
||||
|
||||
if not data.get("source_id") and source_provider in {"codex_session", "claude_transcript", "llm_connect"}:
|
||||
source_id = data.get("ref_id") or data.get("session_id")
|
||||
if source_id:
|
||||
data["source_id"] = str(source_id)
|
||||
|
||||
if not data.get("source_created_at") and data.get("created_at") and data.get("source_id"):
|
||||
data["source_created_at"] = data["created_at"]
|
||||
|
||||
data.setdefault("confidence", DEFAULT_CONFIDENCE.get(measurement_kind, 0.35))
|
||||
data.setdefault("cached_input_tokens", 0)
|
||||
data.setdefault("reasoning_output_tokens", 0)
|
||||
data.setdefault("raw_total_tokens", (data.get("tokens_in") or 0) + (data.get("tokens_out") or 0))
|
||||
data.setdefault("raw_metadata", {})
|
||||
if source_provider in SOURCE_PARSER_DEFAULTS:
|
||||
data.setdefault("parser_version", SOURCE_PARSER_DEFAULTS[source_provider])
|
||||
return data
|
||||
|
||||
|
||||
async def _populate_relationship_defaults(data: dict[str, Any], session: AsyncSession) -> dict[str, Any]:
|
||||
# Auto-populate workstream_id from task if not provided
|
||||
if data.get("task_id") and not data.get("workstream_id"):
|
||||
task = await session.get(Task, data["task_id"])
|
||||
@@ -33,6 +112,34 @@ async def create_token_event(
|
||||
ws = await session.get(Workstream, data["workstream_id"])
|
||||
if ws and ws.repo_id:
|
||||
data["repo_id"] = ws.repo_id
|
||||
return data
|
||||
|
||||
|
||||
async def _find_source_event(data: dict[str, Any], session: AsyncSession) -> TokenEvent | None:
|
||||
source_id = data.get("source_id")
|
||||
if not source_id:
|
||||
return None
|
||||
result = await session.execute(
|
||||
select(TokenEvent).where(
|
||||
TokenEvent.measurement_kind == data["measurement_kind"],
|
||||
TokenEvent.source_provider == data["source_provider"],
|
||||
TokenEvent.source_id == source_id,
|
||||
)
|
||||
)
|
||||
return result.scalar_one_or_none()
|
||||
|
||||
|
||||
async def _create_or_upsert_event(data: dict[str, Any], session: AsyncSession) -> TokenEvent:
|
||||
data = _apply_event_defaults(data)
|
||||
data = await _populate_relationship_defaults(data, session)
|
||||
|
||||
existing = await _find_source_event(data, session)
|
||||
if existing is not None:
|
||||
for field, value in data.items():
|
||||
setattr(existing, field, value)
|
||||
await session.commit()
|
||||
await session.refresh(existing)
|
||||
return existing
|
||||
|
||||
event = TokenEvent(**data)
|
||||
session.add(event)
|
||||
@@ -41,6 +148,77 @@ async def create_token_event(
|
||||
return event
|
||||
|
||||
|
||||
def _filter_query(
|
||||
q,
|
||||
*,
|
||||
task_id: uuid.UUID | None = None,
|
||||
workstream_id: uuid.UUID | None = None,
|
||||
repo_id: uuid.UUID | None = None,
|
||||
ref_type: str | None = None,
|
||||
ref_id: str | None = None,
|
||||
model: str | None = None,
|
||||
agent: str | None = None,
|
||||
note: str | None = None,
|
||||
measurement_kind: str | None = None,
|
||||
source_provider: str | None = None,
|
||||
since: datetime | None = None,
|
||||
until: datetime | None = None,
|
||||
include_superseded: bool = True,
|
||||
unattributed: bool = False,
|
||||
):
|
||||
if task_id:
|
||||
q = q.where(TokenEvent.task_id == task_id)
|
||||
if workstream_id:
|
||||
q = q.where(TokenEvent.workstream_id == workstream_id)
|
||||
if repo_id:
|
||||
q = q.where(TokenEvent.repo_id == repo_id)
|
||||
if ref_type:
|
||||
q = q.where(TokenEvent.ref_type == ref_type)
|
||||
if ref_id:
|
||||
q = q.where(TokenEvent.ref_id == ref_id)
|
||||
if model:
|
||||
q = q.where(TokenEvent.model == model)
|
||||
if agent:
|
||||
q = q.where(TokenEvent.agent == agent)
|
||||
if note:
|
||||
q = q.where(TokenEvent.note == note)
|
||||
if measurement_kind:
|
||||
q = q.where(TokenEvent.measurement_kind == measurement_kind)
|
||||
if source_provider:
|
||||
q = q.where(TokenEvent.source_provider == source_provider)
|
||||
if since:
|
||||
q = q.where(TokenEvent.created_at >= since)
|
||||
if until:
|
||||
q = q.where(TokenEvent.created_at < until)
|
||||
if not include_superseded:
|
||||
q = q.where(TokenEvent.measurement_kind != "superseded")
|
||||
if unattributed:
|
||||
q = q.where(
|
||||
TokenEvent.repo_id.is_(None),
|
||||
TokenEvent.workstream_id.is_(None),
|
||||
TokenEvent.task_id.is_(None),
|
||||
)
|
||||
return q
|
||||
|
||||
|
||||
@router.post("/", response_model=TokenEventRead, status_code=status.HTTP_201_CREATED)
|
||||
async def create_token_event(
|
||||
body: TokenEventCreate,
|
||||
session: AsyncSession = Depends(get_session),
|
||||
) -> TokenEvent:
|
||||
data = body.model_dump(exclude_none=True)
|
||||
return await _create_or_upsert_event(data, session)
|
||||
|
||||
|
||||
@router.post("/upsert", response_model=TokenEventRead)
|
||||
async def upsert_token_event(
|
||||
body: TokenEventCreate,
|
||||
session: AsyncSession = Depends(get_session),
|
||||
) -> TokenEvent:
|
||||
data = body.model_dump(exclude_none=True)
|
||||
return await _create_or_upsert_event(data, session)
|
||||
|
||||
|
||||
@router.get("/summary/", response_model=TokenSummary)
|
||||
async def get_token_summary(
|
||||
scope: str = Query(..., description="task|workstream|repo|commit|release|session"),
|
||||
@@ -80,11 +258,16 @@ async def get_token_summary(
|
||||
|
||||
by_model: dict[str, int] = defaultdict(int)
|
||||
by_agent: dict[str, int] = defaultdict(int)
|
||||
by_measurement_kind: dict[str, int] = defaultdict(int)
|
||||
by_source_provider: dict[str, int] = defaultdict(int)
|
||||
for e in events:
|
||||
total = _event_total(e)
|
||||
if e.model:
|
||||
by_model[e.model] += e.tokens_in + e.tokens_out
|
||||
by_model[e.model] += total
|
||||
if e.agent:
|
||||
by_agent[e.agent] += e.tokens_in + e.tokens_out
|
||||
by_agent[e.agent] += total
|
||||
by_measurement_kind[e.measurement_kind] += total
|
||||
by_source_provider[e.source_provider] += total
|
||||
|
||||
return TokenSummary(
|
||||
scope=scope,
|
||||
@@ -95,11 +278,18 @@ async def get_token_summary(
|
||||
event_count=len(events),
|
||||
by_model=dict(by_model),
|
||||
by_agent=dict(by_agent),
|
||||
by_measurement_kind=dict(by_measurement_kind),
|
||||
by_source_provider=dict(by_source_provider),
|
||||
)
|
||||
|
||||
|
||||
@router.get("/by-repo/", response_model=list[RepoTokenSummary])
|
||||
async def get_tokens_by_repo(
|
||||
measurement_kind: str | None = None,
|
||||
source_provider: str | None = None,
|
||||
since: datetime | None = None,
|
||||
until: datetime | None = None,
|
||||
include_superseded: bool = Query(True),
|
||||
session: AsyncSession = Depends(get_session),
|
||||
) -> list[RepoTokenSummary]:
|
||||
"""Aggregate token consumption per repo, resolving via the full graph.
|
||||
@@ -112,7 +302,16 @@ async def get_tokens_by_repo(
|
||||
Only events that resolve to a repo are included.
|
||||
"""
|
||||
# Fetch all events, workstreams, repos in three queries (avoids N+1)
|
||||
events_result = await session.execute(select(TokenEvent))
|
||||
events_result = await session.execute(
|
||||
_filter_query(
|
||||
select(TokenEvent),
|
||||
measurement_kind=measurement_kind,
|
||||
source_provider=source_provider,
|
||||
since=since,
|
||||
until=until,
|
||||
include_superseded=include_superseded,
|
||||
)
|
||||
)
|
||||
events = list(events_result.scalars().all())
|
||||
|
||||
ws_result = await session.execute(select(Workstream))
|
||||
@@ -148,14 +347,19 @@ async def get_tokens_by_repo(
|
||||
"event_count": 0,
|
||||
"by_model": defaultdict(int),
|
||||
"by_note": defaultdict(int),
|
||||
"by_measurement_kind": defaultdict(int),
|
||||
"by_source_provider": defaultdict(int),
|
||||
}
|
||||
g = groups[rid]
|
||||
g["tokens_in"] += e.tokens_in
|
||||
g["tokens_out"] += e.tokens_out
|
||||
g["event_count"] += 1
|
||||
total = _event_total(e)
|
||||
if e.model:
|
||||
g["by_model"][e.model] += e.tokens_in + e.tokens_out
|
||||
g["by_note"][e.note or "unknown"] += e.tokens_in + e.tokens_out
|
||||
g["by_model"][e.model] += total
|
||||
g["by_note"][e.note or "unknown"] += total
|
||||
g["by_measurement_kind"][e.measurement_kind] += total
|
||||
g["by_source_provider"][e.source_provider] += total
|
||||
|
||||
return [
|
||||
RepoTokenSummary(
|
||||
@@ -166,6 +370,188 @@ async def get_tokens_by_repo(
|
||||
]
|
||||
|
||||
|
||||
@router.get("/aggregate/", response_model=TokenAggregateSummary)
|
||||
async def get_token_aggregate(
|
||||
measurement_kind: str | None = None,
|
||||
source_provider: str | None = None,
|
||||
since: datetime | None = None,
|
||||
until: datetime | None = None,
|
||||
include_superseded: bool = Query(False),
|
||||
session: AsyncSession = Depends(get_session),
|
||||
) -> TokenAggregateSummary:
|
||||
events_result = await session.execute(
|
||||
_filter_query(
|
||||
select(TokenEvent),
|
||||
measurement_kind=measurement_kind,
|
||||
source_provider=source_provider,
|
||||
since=since,
|
||||
until=until,
|
||||
include_superseded=include_superseded,
|
||||
)
|
||||
)
|
||||
events = list(events_result.scalars().all())
|
||||
|
||||
ws_result = await session.execute(select(Workstream))
|
||||
ws_map: dict[uuid.UUID, Workstream] = {w.id: w for w in ws_result.scalars().all()}
|
||||
|
||||
task_result = await session.execute(select(Task))
|
||||
task_map: dict[uuid.UUID, Task] = {t.id: t for t in task_result.scalars().all()}
|
||||
|
||||
repo_result = await session.execute(select(ManagedRepo))
|
||||
repo_map: dict[uuid.UUID, ManagedRepo] = {r.id: r for r in repo_result.scalars().all()}
|
||||
|
||||
def resolve_repo_id(e: TokenEvent) -> uuid.UUID | None:
|
||||
if e.repo_id:
|
||||
return e.repo_id
|
||||
ws_id = e.workstream_id
|
||||
if not ws_id and e.task_id and e.task_id in task_map:
|
||||
ws_id = task_map[e.task_id].workstream_id
|
||||
if ws_id and ws_id in ws_map:
|
||||
return ws_map[ws_id].repo_id
|
||||
return None
|
||||
|
||||
def add(groups: dict[str, dict[str, Any]], key: str | None, label: str | None, e: TokenEvent) -> None:
|
||||
if not key:
|
||||
return
|
||||
if key not in groups:
|
||||
groups[key] = {
|
||||
"scope_id": key,
|
||||
"label": label,
|
||||
"tokens_in": 0,
|
||||
"tokens_out": 0,
|
||||
"event_count": 0,
|
||||
"by_measurement_kind": defaultdict(int),
|
||||
"by_source_provider": defaultdict(int),
|
||||
}
|
||||
row = groups[key]
|
||||
total = _event_total(e)
|
||||
row["tokens_in"] += e.tokens_in
|
||||
row["tokens_out"] += e.tokens_out
|
||||
row["event_count"] += 1
|
||||
row["by_measurement_kind"][e.measurement_kind] += total
|
||||
row["by_source_provider"][e.source_provider] += total
|
||||
|
||||
by_repo: dict[str, dict[str, Any]] = {}
|
||||
by_workstream: dict[str, dict[str, Any]] = {}
|
||||
by_task: dict[str, dict[str, Any]] = {}
|
||||
by_model: dict[str, dict[str, Any]] = {}
|
||||
by_measurement_kind: dict[str, int] = defaultdict(int)
|
||||
by_source_provider: dict[str, int] = defaultdict(int)
|
||||
|
||||
first_event_at = last_event_at = last_ingested_at = None
|
||||
tokens_in = tokens_out = 0
|
||||
for e in events:
|
||||
total = _event_total(e)
|
||||
tokens_in += e.tokens_in
|
||||
tokens_out += e.tokens_out
|
||||
by_measurement_kind[e.measurement_kind] += total
|
||||
by_source_provider[e.source_provider] += total
|
||||
|
||||
if first_event_at is None or e.created_at < first_event_at:
|
||||
first_event_at = e.created_at
|
||||
if last_event_at is None or e.created_at > last_event_at:
|
||||
last_event_at = e.created_at
|
||||
if last_ingested_at is None or e.ingested_at > last_ingested_at:
|
||||
last_ingested_at = e.ingested_at
|
||||
|
||||
rid = resolve_repo_id(e)
|
||||
repo = repo_map.get(rid) if rid else None
|
||||
add(by_repo, str(rid) if rid else None, repo.slug if repo else None, e)
|
||||
|
||||
ws_id = e.workstream_id or (task_map[e.task_id].workstream_id if e.task_id in task_map else None)
|
||||
ws = ws_map.get(ws_id) if ws_id else None
|
||||
add(by_workstream, str(ws_id) if ws_id else None, ws.title if ws else None, e)
|
||||
|
||||
task = task_map.get(e.task_id) if e.task_id else None
|
||||
add(by_task, str(e.task_id) if e.task_id else None, task.title if task else None, e)
|
||||
|
||||
add(by_model, e.model or "unknown", e.model or "unknown", e)
|
||||
|
||||
def rows(groups: dict[str, dict[str, Any]]) -> list[TokenAggregateRow]:
|
||||
result = []
|
||||
for row in groups.values():
|
||||
result.append(
|
||||
TokenAggregateRow(
|
||||
**{k: (dict(v) if isinstance(v, defaultdict) else v) for k, v in row.items()},
|
||||
tokens_total=row["tokens_in"] + row["tokens_out"],
|
||||
)
|
||||
)
|
||||
return sorted(result, key=lambda item: -item.tokens_total)
|
||||
|
||||
return TokenAggregateSummary(
|
||||
tokens_in=tokens_in,
|
||||
tokens_out=tokens_out,
|
||||
tokens_total=tokens_in + tokens_out,
|
||||
event_count=len(events),
|
||||
first_event_at=first_event_at,
|
||||
last_event_at=last_event_at,
|
||||
last_ingested_at=last_ingested_at,
|
||||
by_repo=rows(by_repo),
|
||||
by_workstream=rows(by_workstream),
|
||||
by_task=rows(by_task),
|
||||
by_model=rows(by_model),
|
||||
by_measurement_kind=dict(by_measurement_kind),
|
||||
by_source_provider=dict(by_source_provider),
|
||||
)
|
||||
|
||||
|
||||
@router.get("/quality/", response_model=TokenQualitySummary)
|
||||
async def get_token_quality(
|
||||
since: datetime | None = None,
|
||||
until: datetime | None = None,
|
||||
session: AsyncSession = Depends(get_session),
|
||||
) -> TokenQualitySummary:
|
||||
result = await session.execute(_filter_query(select(TokenEvent), since=since, until=until))
|
||||
events = list(result.scalars().all())
|
||||
|
||||
by_measurement_kind: dict[str, int] = defaultdict(int)
|
||||
by_source_provider: dict[str, int] = defaultdict(int)
|
||||
source_counts: dict[tuple[str, str, str], int] = defaultdict(int)
|
||||
last_codex_ingested_at = None
|
||||
last_claude_ingested_at = None
|
||||
|
||||
fallback_count = 0
|
||||
unattributed_measured_count = 0
|
||||
missing_provenance_count = 0
|
||||
for e in events:
|
||||
by_measurement_kind[e.measurement_kind] += 1
|
||||
by_source_provider[e.source_provider] += 1
|
||||
if e.source_id:
|
||||
source_counts[(e.measurement_kind, e.source_provider, e.source_id)] += 1
|
||||
if e.source_provider == "task_fallback" or e.note == "heuristic":
|
||||
fallback_count += 1
|
||||
if e.measurement_kind == "measured" and not (e.repo_id or e.workstream_id or e.task_id):
|
||||
unattributed_measured_count += 1
|
||||
if e.measurement_kind == "measured" and not e.source_id:
|
||||
missing_provenance_count += 1
|
||||
if e.source_provider == "codex_session" and (
|
||||
last_codex_ingested_at is None or e.ingested_at > last_codex_ingested_at
|
||||
):
|
||||
last_codex_ingested_at = e.ingested_at
|
||||
if e.source_provider == "claude_transcript" and (
|
||||
last_claude_ingested_at is None or e.ingested_at > last_claude_ingested_at
|
||||
):
|
||||
last_claude_ingested_at = e.ingested_at
|
||||
|
||||
duplicate_source_count = sum(1 for count in source_counts.values() if count > 1)
|
||||
return TokenQualitySummary(
|
||||
event_count=len(events),
|
||||
measured_event_count=by_measurement_kind.get("measured", 0),
|
||||
estimated_event_count=by_measurement_kind.get("estimated", 0),
|
||||
allocated_event_count=by_measurement_kind.get("allocated", 0),
|
||||
superseded_event_count=by_measurement_kind.get("superseded", 0),
|
||||
fallback_event_count=fallback_count,
|
||||
unattributed_measured_event_count=unattributed_measured_count,
|
||||
missing_provenance_event_count=missing_provenance_count,
|
||||
duplicate_source_count=duplicate_source_count,
|
||||
last_codex_ingested_at=last_codex_ingested_at,
|
||||
last_claude_ingested_at=last_claude_ingested_at,
|
||||
last_reconciliation_at=None,
|
||||
by_measurement_kind=dict(by_measurement_kind),
|
||||
by_source_provider=dict(by_source_provider),
|
||||
)
|
||||
|
||||
|
||||
@router.patch("/{event_id}", response_model=TokenEventRead)
|
||||
async def patch_token_event(
|
||||
event_id: uuid.UUID,
|
||||
@@ -175,7 +561,26 @@ async def patch_token_event(
|
||||
event = await session.get(TokenEvent, event_id)
|
||||
if event is None:
|
||||
raise HTTPException(status_code=404, detail="Token event not found")
|
||||
for field, value in body.model_dump(exclude_none=True).items():
|
||||
data = body.model_dump(exclude_none=True)
|
||||
if "note" in data or "measurement_kind" in data or "source_provider" in data:
|
||||
merged = {
|
||||
"tokens_in": data.get("tokens_in", event.tokens_in),
|
||||
"tokens_out": data.get("tokens_out", event.tokens_out),
|
||||
"note": data.get("note", event.note),
|
||||
"agent": data.get("agent", event.agent),
|
||||
"ref_id": data.get("ref_id", event.ref_id),
|
||||
"session_id": data.get("session_id", event.session_id),
|
||||
"measurement_kind": data.get("measurement_kind", event.measurement_kind),
|
||||
"source_provider": data.get("source_provider", event.source_provider),
|
||||
"source_id": data.get("source_id", event.source_id),
|
||||
}
|
||||
inferred = _apply_event_defaults({k: v for k, v in merged.items() if v is not None})
|
||||
data.setdefault("measurement_kind", inferred["measurement_kind"])
|
||||
data.setdefault("source_provider", inferred["source_provider"])
|
||||
data.setdefault("confidence", inferred["confidence"])
|
||||
if inferred.get("source_id"):
|
||||
data.setdefault("source_id", inferred["source_id"])
|
||||
for field, value in data.items():
|
||||
setattr(event, field, value)
|
||||
await session.commit()
|
||||
await session.refresh(event)
|
||||
@@ -203,26 +608,33 @@ async def list_token_events(
|
||||
model: str | None = None,
|
||||
agent: str | None = None,
|
||||
note: str | None = None,
|
||||
measurement_kind: str | None = None,
|
||||
source_provider: str | None = None,
|
||||
since: datetime | None = None,
|
||||
until: datetime | None = None,
|
||||
include_superseded: bool = Query(True),
|
||||
unattributed: bool = False,
|
||||
offset: int = Query(0, ge=0),
|
||||
limit: int = Query(100, le=1000),
|
||||
session: AsyncSession = Depends(get_session),
|
||||
) -> list[TokenEvent]:
|
||||
q = select(TokenEvent)
|
||||
if task_id:
|
||||
q = q.where(TokenEvent.task_id == task_id)
|
||||
if workstream_id:
|
||||
q = q.where(TokenEvent.workstream_id == workstream_id)
|
||||
if repo_id:
|
||||
q = q.where(TokenEvent.repo_id == repo_id)
|
||||
if ref_type:
|
||||
q = q.where(TokenEvent.ref_type == ref_type)
|
||||
if ref_id:
|
||||
q = q.where(TokenEvent.ref_id == ref_id)
|
||||
if model:
|
||||
q = q.where(TokenEvent.model == model)
|
||||
if agent:
|
||||
q = q.where(TokenEvent.agent == agent)
|
||||
if note:
|
||||
q = q.where(TokenEvent.note == note)
|
||||
q = q.order_by(TokenEvent.created_at.desc()).limit(limit)
|
||||
q = _filter_query(
|
||||
select(TokenEvent),
|
||||
task_id=task_id,
|
||||
workstream_id=workstream_id,
|
||||
repo_id=repo_id,
|
||||
ref_type=ref_type,
|
||||
ref_id=ref_id,
|
||||
model=model,
|
||||
agent=agent,
|
||||
note=note,
|
||||
measurement_kind=measurement_kind,
|
||||
source_provider=source_provider,
|
||||
since=since,
|
||||
until=until,
|
||||
include_superseded=include_superseded,
|
||||
unattributed=unattributed,
|
||||
)
|
||||
q = q.order_by(TokenEvent.created_at.desc()).offset(offset).limit(limit)
|
||||
result = await session.execute(q)
|
||||
return list(result.scalars().all())
|
||||
|
||||
@@ -43,6 +43,7 @@ class TaskUpdate(BaseModel):
|
||||
# 2. workplan_tokens_in + workplan_tokens_out → prorated across task count (note="workplan")
|
||||
# 3. neither provided, status=done → heuristic 1000/500 (note="heuristic")
|
||||
# token_note overrides the auto-assigned note for Tier 1 only (e.g. "userbased")
|
||||
# suppress_token_event lets file/cache sync update status without recording usage.
|
||||
tokens_in: int | None = None
|
||||
tokens_out: int | None = None
|
||||
workplan_tokens_in: int | None = None
|
||||
@@ -51,6 +52,7 @@ class TaskUpdate(BaseModel):
|
||||
model: str | None = None
|
||||
agent: str | None = None
|
||||
session_id: str | None = None
|
||||
suppress_token_event: bool | None = None
|
||||
|
||||
@model_validator(mode="after")
|
||||
def blocking_reason_required_when_blocked(self) -> Self:
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, computed_field
|
||||
from pydantic import BaseModel, ConfigDict, Field, computed_field
|
||||
|
||||
|
||||
class TokenEventCreate(BaseModel):
|
||||
@@ -16,6 +17,19 @@ class TokenEventCreate(BaseModel):
|
||||
ref_type: str | None = None
|
||||
ref_id: str | None = None
|
||||
note: str | None = None
|
||||
created_at: datetime | None = None
|
||||
measurement_kind: str | None = None
|
||||
source_provider: str | None = None
|
||||
source_id: str | None = None
|
||||
source_path: str | None = None
|
||||
source_created_at: datetime | None = None
|
||||
parser_version: str | None = None
|
||||
confidence: float | None = None
|
||||
cached_input_tokens: int | None = None
|
||||
reasoning_output_tokens: int | None = None
|
||||
raw_total_tokens: int | None = None
|
||||
cost_estimated_usd: float | None = None
|
||||
raw_metadata: dict[str, Any] | None = None
|
||||
|
||||
|
||||
class TokenEventRead(BaseModel):
|
||||
@@ -33,6 +47,19 @@ class TokenEventRead(BaseModel):
|
||||
ref_type: str | None = None
|
||||
ref_id: str | None = None
|
||||
note: str | None = None
|
||||
measurement_kind: str
|
||||
source_provider: str
|
||||
source_id: str | None = None
|
||||
source_path: str | None = None
|
||||
source_created_at: datetime | None = None
|
||||
ingested_at: datetime
|
||||
parser_version: str | None = None
|
||||
confidence: float
|
||||
cached_input_tokens: int
|
||||
reasoning_output_tokens: int
|
||||
raw_total_tokens: int | None = None
|
||||
cost_estimated_usd: float | None = None
|
||||
raw_metadata: dict[str, Any] = Field(default_factory=dict)
|
||||
created_at: datetime
|
||||
|
||||
@computed_field
|
||||
@@ -40,6 +67,11 @@ class TokenEventRead(BaseModel):
|
||||
def tokens_total(self) -> int:
|
||||
return self.tokens_in + self.tokens_out
|
||||
|
||||
@computed_field
|
||||
@property
|
||||
def token_evidence_total(self) -> int:
|
||||
return (self.raw_total_tokens or self.tokens_in + self.tokens_out)
|
||||
|
||||
|
||||
class TokenSummary(BaseModel):
|
||||
scope: str
|
||||
@@ -50,14 +82,36 @@ class TokenSummary(BaseModel):
|
||||
event_count: int
|
||||
by_model: dict[str, int]
|
||||
by_agent: dict[str, int]
|
||||
by_measurement_kind: dict[str, int] = Field(default_factory=dict)
|
||||
by_source_provider: dict[str, int] = Field(default_factory=dict)
|
||||
|
||||
|
||||
class TokenEventPatch(BaseModel):
|
||||
tokens_in: int | None = None
|
||||
tokens_out: int | None = None
|
||||
task_id: uuid.UUID | None = None
|
||||
workstream_id: uuid.UUID | None = None
|
||||
repo_id: uuid.UUID | None = None
|
||||
session_id: str | None = None
|
||||
note: str | None = None
|
||||
model: str | None = None
|
||||
agent: str | None = None
|
||||
ref_type: str | None = None
|
||||
ref_id: str | None = None
|
||||
created_at: datetime | None = None
|
||||
measurement_kind: str | None = None
|
||||
source_provider: str | None = None
|
||||
source_id: str | None = None
|
||||
source_path: str | None = None
|
||||
source_created_at: datetime | None = None
|
||||
ingested_at: datetime | None = None
|
||||
parser_version: str | None = None
|
||||
confidence: float | None = None
|
||||
cached_input_tokens: int | None = None
|
||||
reasoning_output_tokens: int | None = None
|
||||
raw_total_tokens: int | None = None
|
||||
cost_estimated_usd: float | None = None
|
||||
raw_metadata: dict[str, Any] | None = None
|
||||
|
||||
|
||||
class RepoTokenSummary(BaseModel):
|
||||
@@ -69,3 +123,49 @@ class RepoTokenSummary(BaseModel):
|
||||
event_count: int
|
||||
by_model: dict[str, int]
|
||||
by_note: dict[str, int]
|
||||
by_measurement_kind: dict[str, int] = Field(default_factory=dict)
|
||||
by_source_provider: dict[str, int] = Field(default_factory=dict)
|
||||
|
||||
|
||||
class TokenAggregateRow(BaseModel):
|
||||
scope_id: str
|
||||
label: str | None = None
|
||||
tokens_in: int
|
||||
tokens_out: int
|
||||
tokens_total: int
|
||||
event_count: int
|
||||
by_measurement_kind: dict[str, int] = Field(default_factory=dict)
|
||||
by_source_provider: dict[str, int] = Field(default_factory=dict)
|
||||
|
||||
|
||||
class TokenAggregateSummary(BaseModel):
|
||||
tokens_in: int
|
||||
tokens_out: int
|
||||
tokens_total: int
|
||||
event_count: int
|
||||
first_event_at: datetime | None = None
|
||||
last_event_at: datetime | None = None
|
||||
last_ingested_at: datetime | None = None
|
||||
by_repo: list[TokenAggregateRow] = Field(default_factory=list)
|
||||
by_workstream: list[TokenAggregateRow] = Field(default_factory=list)
|
||||
by_task: list[TokenAggregateRow] = Field(default_factory=list)
|
||||
by_model: list[TokenAggregateRow] = Field(default_factory=list)
|
||||
by_measurement_kind: dict[str, int] = Field(default_factory=dict)
|
||||
by_source_provider: dict[str, int] = Field(default_factory=dict)
|
||||
|
||||
|
||||
class TokenQualitySummary(BaseModel):
|
||||
event_count: int
|
||||
measured_event_count: int
|
||||
estimated_event_count: int
|
||||
allocated_event_count: int
|
||||
superseded_event_count: int
|
||||
fallback_event_count: int
|
||||
unattributed_measured_event_count: int
|
||||
missing_provenance_event_count: int
|
||||
duplicate_source_count: int
|
||||
last_codex_ingested_at: datetime | None = None
|
||||
last_claude_ingested_at: datetime | None = None
|
||||
last_reconciliation_at: datetime | None = None
|
||||
by_measurement_kind: dict[str, int] = Field(default_factory=dict)
|
||||
by_source_provider: dict[str, int] = Field(default_factory=dict)
|
||||
|
||||
16
api/services/token_sources/__init__.py
Normal file
16
api/services/token_sources/__init__.py
Normal file
@@ -0,0 +1,16 @@
|
||||
"""Token source adapters for measured agent usage."""
|
||||
|
||||
from api.services.token_sources.base import TokenSourceRecord, parse_iso
|
||||
from api.services.token_sources.codex import collect_codex_sessions, iter_codex_session_files, parse_codex_session
|
||||
from api.services.token_sources.claude import collect_claude_transcripts, iter_claude_transcript_files, parse_claude_transcript
|
||||
|
||||
__all__ = [
|
||||
"TokenSourceRecord",
|
||||
"parse_iso",
|
||||
"collect_codex_sessions",
|
||||
"iter_codex_session_files",
|
||||
"parse_codex_session",
|
||||
"collect_claude_transcripts",
|
||||
"iter_claude_transcript_files",
|
||||
"parse_claude_transcript",
|
||||
]
|
||||
171
api/services/token_sources/attribution.py
Normal file
171
api/services/token_sources/attribution.py
Normal file
@@ -0,0 +1,171 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class RepoRef:
|
||||
repo_id: str
|
||||
slug: str
|
||||
local_path: str | None = None
|
||||
host_paths: dict[str, Any] | None = None
|
||||
remote_url: str | None = None
|
||||
git_fingerprint: str | None = None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class RepoMatch:
|
||||
repo_id: str
|
||||
slug: str
|
||||
method: str
|
||||
confidence: float
|
||||
|
||||
|
||||
def normalise_cwd(raw: str | None) -> str | None:
|
||||
if not raw:
|
||||
return None
|
||||
value = raw.replace("\\", "/")
|
||||
prefixes = (
|
||||
"//wsl.localhost/Ubuntu-24.04",
|
||||
"//wsl$/Ubuntu-24.04",
|
||||
)
|
||||
for prefix in prefixes:
|
||||
if value.startswith(prefix):
|
||||
return value[len(prefix):] or "/"
|
||||
if len(value) >= 3 and value[1:3] == ":/":
|
||||
drive = value[0].lower()
|
||||
return f"/mnt/{drive}{value[2:]}"
|
||||
return value
|
||||
|
||||
|
||||
def normalise_remote_url(raw: str | None) -> str | None:
|
||||
if not raw:
|
||||
return None
|
||||
value = raw.strip()
|
||||
if value.endswith(".git"):
|
||||
value = value[:-4]
|
||||
if value.startswith("git@") and ":" in value:
|
||||
host, path = value[4:].split(":", 1)
|
||||
value = f"ssh://{host}/{path}"
|
||||
return value.lower().rstrip("/")
|
||||
|
||||
|
||||
def repo_refs_from_api(repos: list[dict[str, Any]]) -> list[RepoRef]:
|
||||
refs = []
|
||||
for repo in repos:
|
||||
repo_id = repo.get("id")
|
||||
slug = repo.get("slug")
|
||||
if not repo_id or not slug:
|
||||
continue
|
||||
refs.append(
|
||||
RepoRef(
|
||||
repo_id=str(repo_id),
|
||||
slug=str(slug),
|
||||
local_path=repo.get("local_path"),
|
||||
host_paths=repo.get("host_paths") if isinstance(repo.get("host_paths"), dict) else {},
|
||||
remote_url=repo.get("remote_url"),
|
||||
git_fingerprint=repo.get("git_fingerprint"),
|
||||
)
|
||||
)
|
||||
return refs
|
||||
|
||||
|
||||
def _git(cwd: str, *args: str) -> str | None:
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", *args],
|
||||
cwd=cwd,
|
||||
check=False,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
)
|
||||
except (OSError, subprocess.SubprocessError):
|
||||
return None
|
||||
if result.returncode != 0:
|
||||
return None
|
||||
value = result.stdout.strip().splitlines()
|
||||
return value[0] if value else None
|
||||
|
||||
|
||||
def git_fingerprint_for_path(cwd: str | None) -> str | None:
|
||||
path = normalise_cwd(cwd)
|
||||
if not path or not Path(path).exists():
|
||||
return None
|
||||
root = _git(path, "rev-parse", "--show-toplevel")
|
||||
if not root:
|
||||
return None
|
||||
return _git(root, "rev-list", "--max-parents=0", "HEAD")
|
||||
|
||||
|
||||
def git_remote_for_path(cwd: str | None) -> str | None:
|
||||
path = normalise_cwd(cwd)
|
||||
if not path or not Path(path).exists():
|
||||
return None
|
||||
root = _git(path, "rev-parse", "--show-toplevel")
|
||||
if not root:
|
||||
return None
|
||||
return _git(root, "remote", "get-url", "origin")
|
||||
|
||||
|
||||
def _repo_paths(repo: RepoRef) -> list[str]:
|
||||
paths = [repo.local_path]
|
||||
if repo.host_paths:
|
||||
paths.extend(str(v) for v in repo.host_paths.values() if v)
|
||||
result = []
|
||||
for raw in paths:
|
||||
path = normalise_cwd(str(raw)) if raw and raw != "(unknown)" else None
|
||||
if path:
|
||||
result.append(path.rstrip("/"))
|
||||
return result
|
||||
|
||||
|
||||
def resolve_repo(cwd: str | None, repos: list[RepoRef]) -> RepoMatch | None:
|
||||
path = normalise_cwd(cwd)
|
||||
fingerprint = git_fingerprint_for_path(path)
|
||||
remote = normalise_remote_url(git_remote_for_path(path))
|
||||
|
||||
if fingerprint:
|
||||
candidates = [repo for repo in repos if repo.git_fingerprint == fingerprint]
|
||||
if len(candidates) == 1:
|
||||
repo = candidates[0]
|
||||
return RepoMatch(repo.repo_id, repo.slug, "git_fingerprint", 0.98)
|
||||
if remote:
|
||||
remote_candidates = [
|
||||
repo for repo in candidates
|
||||
if normalise_remote_url(repo.remote_url) == remote
|
||||
]
|
||||
if len(remote_candidates) == 1:
|
||||
repo = remote_candidates[0]
|
||||
return RepoMatch(repo.repo_id, repo.slug, "git_fingerprint_remote", 0.99)
|
||||
|
||||
if remote:
|
||||
candidates = [repo for repo in repos if normalise_remote_url(repo.remote_url) == remote]
|
||||
if len(candidates) == 1:
|
||||
repo = candidates[0]
|
||||
return RepoMatch(repo.repo_id, repo.slug, "remote_url", 0.90)
|
||||
|
||||
if not path:
|
||||
return None
|
||||
|
||||
path_matches: list[tuple[str, RepoRef]] = []
|
||||
for repo in repos:
|
||||
for repo_path in _repo_paths(repo):
|
||||
if path == repo_path or path.startswith(f"{repo_path}/"):
|
||||
path_matches.append((repo_path, repo))
|
||||
if not path_matches:
|
||||
return None
|
||||
path_matches.sort(key=lambda item: len(item[0]), reverse=True)
|
||||
exact = [item for item in path_matches if path == item[0]]
|
||||
if exact:
|
||||
basename = Path(path).name
|
||||
for _, repo in exact:
|
||||
if repo.slug == basename:
|
||||
return RepoMatch(repo.repo_id, repo.slug, "path_exact_slug", 0.85)
|
||||
repo = exact[0][1]
|
||||
return RepoMatch(repo.repo_id, repo.slug, "path_exact", 0.80)
|
||||
repo = path_matches[0][1]
|
||||
return RepoMatch(repo.repo_id, repo.slug, "path_prefix", 0.75)
|
||||
71
api/services/token_sources/base.py
Normal file
71
api/services/token_sources/base.py
Normal file
@@ -0,0 +1,71 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
|
||||
def parse_iso(value: str) -> datetime:
|
||||
raw = value.strip()
|
||||
if raw.endswith("Z"):
|
||||
raw = raw[:-1] + "+00:00"
|
||||
if "T" not in raw:
|
||||
raw = f"{raw}T00:00:00+00:00"
|
||||
parsed = datetime.fromisoformat(raw)
|
||||
if parsed.tzinfo is None:
|
||||
parsed = parsed.replace(tzinfo=timezone.utc)
|
||||
return parsed.astimezone(timezone.utc)
|
||||
|
||||
|
||||
@dataclass
|
||||
class TokenSourceRecord:
|
||||
source_provider: str
|
||||
source_id: str
|
||||
source_path: Path
|
||||
source_created_at: datetime | None
|
||||
session_id: str | None = None
|
||||
cwd: str | None = None
|
||||
model: str | None = None
|
||||
agent: str | None = None
|
||||
tokens_in: int = 0
|
||||
tokens_out: int = 0
|
||||
cached_input_tokens: int = 0
|
||||
reasoning_output_tokens: int = 0
|
||||
raw_total_tokens: int | None = None
|
||||
parser_version: str | None = None
|
||||
confidence: float = 1.0
|
||||
raw_metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
@property
|
||||
def tokens_total(self) -> int:
|
||||
return self.tokens_in + self.tokens_out
|
||||
|
||||
def to_token_event_payload(self, repo_id: str | None = None) -> dict[str, Any]:
|
||||
raw_total = self.raw_total_tokens
|
||||
if raw_total is None:
|
||||
raw_total = self.tokens_in + self.tokens_out
|
||||
created_at = self.source_created_at.isoformat() if self.source_created_at else None
|
||||
return {
|
||||
"tokens_in": self.tokens_in,
|
||||
"tokens_out": self.tokens_out,
|
||||
"repo_id": repo_id,
|
||||
"session_id": self.session_id,
|
||||
"model": self.model,
|
||||
"agent": self.agent,
|
||||
"ref_type": "session",
|
||||
"ref_id": self.source_id,
|
||||
"note": f"measured:{self.source_provider}",
|
||||
"created_at": created_at,
|
||||
"measurement_kind": "measured",
|
||||
"source_provider": self.source_provider,
|
||||
"source_id": self.source_id,
|
||||
"source_path": str(self.source_path),
|
||||
"source_created_at": created_at,
|
||||
"parser_version": self.parser_version,
|
||||
"confidence": self.confidence,
|
||||
"cached_input_tokens": self.cached_input_tokens,
|
||||
"reasoning_output_tokens": self.reasoning_output_tokens,
|
||||
"raw_total_tokens": raw_total,
|
||||
"raw_metadata": self.raw_metadata,
|
||||
}
|
||||
120
api/services/token_sources/claude.py
Normal file
120
api/services/token_sources/claude.py
Normal file
@@ -0,0 +1,120 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from api.services.token_sources.base import TokenSourceRecord, parse_iso
|
||||
|
||||
PARSER_VERSION = "claude-transcript-v1"
|
||||
|
||||
|
||||
def iter_claude_transcript_files(claude_home: Path) -> list[Path]:
|
||||
projects = claude_home / "projects"
|
||||
if not projects.is_dir():
|
||||
return []
|
||||
return sorted(projects.glob("**/*.jsonl"))
|
||||
|
||||
|
||||
def _usage_from_entry(entry: dict[str, Any]) -> dict[str, Any]:
|
||||
message = entry.get("message")
|
||||
if isinstance(message, dict) and isinstance(message.get("usage"), dict):
|
||||
return message["usage"]
|
||||
usage = entry.get("usage")
|
||||
return usage if isinstance(usage, dict) else {}
|
||||
|
||||
|
||||
def parse_claude_transcript(path: Path, since: datetime) -> TokenSourceRecord | None:
|
||||
session_id = path.stem
|
||||
cwd: str | None = None
|
||||
model: str | None = None
|
||||
first_at: datetime | None = None
|
||||
last_at: datetime | None = None
|
||||
tokens_in = tokens_out = 0
|
||||
cached_input_tokens = 0
|
||||
raw_total_tokens = 0
|
||||
usage_records = 0
|
||||
malformed_lines = 0
|
||||
|
||||
try:
|
||||
handle = path.open("r", encoding="utf-8", errors="ignore")
|
||||
except OSError:
|
||||
return None
|
||||
|
||||
with handle:
|
||||
for line in handle:
|
||||
try:
|
||||
entry: dict[str, Any] = json.loads(line)
|
||||
except json.JSONDecodeError:
|
||||
malformed_lines += 1
|
||||
continue
|
||||
|
||||
ts = entry.get("timestamp") or entry.get("created_at")
|
||||
parsed_ts = parse_iso(ts) if isinstance(ts, str) else None
|
||||
if parsed_ts:
|
||||
first_at = first_at or parsed_ts
|
||||
last_at = parsed_ts
|
||||
|
||||
session_id = str(entry.get("session_id") or entry.get("conversation_id") or session_id)
|
||||
cwd = entry.get("cwd") or entry.get("project_cwd") or cwd
|
||||
model = entry.get("model") or model
|
||||
message = entry.get("message")
|
||||
if isinstance(message, dict):
|
||||
model = message.get("model") or model
|
||||
|
||||
usage = _usage_from_entry(entry)
|
||||
if not usage:
|
||||
continue
|
||||
if parsed_ts is not None and parsed_ts < since:
|
||||
continue
|
||||
|
||||
input_tokens = int(usage.get("input_tokens") or 0)
|
||||
cache_creation = int(usage.get("cache_creation_input_tokens") or 0)
|
||||
cache_read = int(usage.get("cache_read_input_tokens") or 0)
|
||||
output_tokens = int(usage.get("output_tokens") or 0)
|
||||
if input_tokens == 0 and output_tokens == 0 and cache_creation == 0 and cache_read == 0:
|
||||
continue
|
||||
tokens_in += input_tokens
|
||||
tokens_out += output_tokens
|
||||
cached_input_tokens += cache_creation + cache_read
|
||||
raw_total_tokens += input_tokens + cache_creation + cache_read + output_tokens
|
||||
usage_records += 1
|
||||
|
||||
if usage_records == 0 or tokens_in + tokens_out + cached_input_tokens == 0:
|
||||
return None
|
||||
|
||||
return TokenSourceRecord(
|
||||
source_provider="claude_transcript",
|
||||
source_id=f"claude:{session_id}",
|
||||
source_path=path,
|
||||
source_created_at=last_at,
|
||||
session_id=session_id,
|
||||
cwd=cwd,
|
||||
model=model,
|
||||
agent="claude",
|
||||
tokens_in=tokens_in,
|
||||
tokens_out=tokens_out,
|
||||
cached_input_tokens=cached_input_tokens,
|
||||
raw_total_tokens=raw_total_tokens or None,
|
||||
parser_version=PARSER_VERSION,
|
||||
confidence=1.0,
|
||||
raw_metadata={
|
||||
"started_at": first_at.isoformat() if first_at else None,
|
||||
"usage_records": usage_records,
|
||||
"malformed_lines": malformed_lines,
|
||||
"source_file_name": path.name,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def collect_claude_transcripts(claude_home: Path, since: datetime) -> list[TokenSourceRecord]:
|
||||
by_id: dict[str, TokenSourceRecord] = {}
|
||||
for path in iter_claude_transcript_files(claude_home):
|
||||
parsed = parse_claude_transcript(path, since)
|
||||
if parsed is None:
|
||||
continue
|
||||
current = by_id.get(parsed.source_id)
|
||||
if current is None or parsed.tokens_total > current.tokens_total:
|
||||
by_id[parsed.source_id] = parsed
|
||||
return sorted(by_id.values(), key=lambda item: item.source_created_at or datetime.min.replace(tzinfo=since.tzinfo))
|
||||
124
api/services/token_sources/codex.py
Normal file
124
api/services/token_sources/codex.py
Normal file
@@ -0,0 +1,124 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from api.services.token_sources.base import TokenSourceRecord, parse_iso
|
||||
|
||||
PARSER_VERSION = "codex-desktop-v1"
|
||||
|
||||
|
||||
def iter_codex_session_files(codex_home: Path) -> list[Path]:
|
||||
files: list[Path] = []
|
||||
sessions = codex_home / "sessions"
|
||||
archived = codex_home / "archived_sessions"
|
||||
if sessions.is_dir():
|
||||
files.extend(sorted(sessions.glob("*/*/*/*.jsonl")))
|
||||
if archived.is_dir():
|
||||
files.extend(sorted(archived.glob("*.jsonl")))
|
||||
return files
|
||||
|
||||
|
||||
def parse_codex_session(path: Path, since: datetime) -> TokenSourceRecord | None:
|
||||
fallback_id = path.stem.removeprefix("rollout-")
|
||||
session_id = fallback_id
|
||||
started_at: datetime | None = None
|
||||
last_at: datetime | None = None
|
||||
cwd: str | None = None
|
||||
model: str | None = None
|
||||
tokens_in = tokens_out = 0
|
||||
cached_input_tokens = reasoning_output_tokens = 0
|
||||
raw_total_tokens = 0
|
||||
usage_records = 0
|
||||
malformed_lines = 0
|
||||
|
||||
try:
|
||||
handle = path.open("r", encoding="utf-8", errors="ignore")
|
||||
except OSError:
|
||||
return None
|
||||
|
||||
with handle:
|
||||
for line in handle:
|
||||
try:
|
||||
entry: dict[str, Any] = json.loads(line)
|
||||
except json.JSONDecodeError:
|
||||
malformed_lines += 1
|
||||
continue
|
||||
|
||||
ts = entry.get("timestamp")
|
||||
parsed_ts = parse_iso(ts) if isinstance(ts, str) else None
|
||||
if parsed_ts:
|
||||
last_at = parsed_ts
|
||||
started_at = started_at or parsed_ts
|
||||
|
||||
payload = entry.get("payload") or {}
|
||||
if entry.get("type") == "session_meta":
|
||||
meta_id = payload.get("id")
|
||||
if meta_id:
|
||||
session_id = str(meta_id)
|
||||
cwd = payload.get("cwd") or cwd
|
||||
meta_ts = payload.get("timestamp")
|
||||
if isinstance(meta_ts, str):
|
||||
started_at = parse_iso(meta_ts)
|
||||
elif entry.get("type") == "turn_context":
|
||||
cwd = payload.get("cwd") or cwd
|
||||
model = payload.get("model") or model
|
||||
elif entry.get("type") == "event_msg" and payload.get("type") == "token_count":
|
||||
if parsed_ts is None or parsed_ts < since:
|
||||
continue
|
||||
info = payload.get("info") or {}
|
||||
last = info.get("last_token_usage") or {}
|
||||
if not isinstance(last, dict):
|
||||
continue
|
||||
input_tokens = int(last.get("input_tokens") or 0)
|
||||
output_tokens = int(last.get("output_tokens") or 0)
|
||||
if input_tokens == 0 and output_tokens == 0:
|
||||
continue
|
||||
tokens_in += input_tokens
|
||||
tokens_out += output_tokens
|
||||
cached_input_tokens += int(last.get("cached_input_tokens") or 0)
|
||||
reasoning_output_tokens += int(last.get("reasoning_output_tokens") or 0)
|
||||
raw_total_tokens += int(last.get("total_tokens") or input_tokens + output_tokens)
|
||||
usage_records += 1
|
||||
last_at = parsed_ts
|
||||
|
||||
if usage_records == 0 or tokens_in + tokens_out == 0:
|
||||
return None
|
||||
|
||||
return TokenSourceRecord(
|
||||
source_provider="codex_session",
|
||||
source_id=f"codex:{session_id}",
|
||||
source_path=path,
|
||||
source_created_at=last_at,
|
||||
session_id=session_id,
|
||||
cwd=cwd,
|
||||
model=model,
|
||||
agent="codex",
|
||||
tokens_in=tokens_in,
|
||||
tokens_out=tokens_out,
|
||||
cached_input_tokens=cached_input_tokens,
|
||||
reasoning_output_tokens=reasoning_output_tokens,
|
||||
raw_total_tokens=raw_total_tokens or None,
|
||||
parser_version=PARSER_VERSION,
|
||||
confidence=1.0,
|
||||
raw_metadata={
|
||||
"started_at": started_at.isoformat() if started_at else None,
|
||||
"usage_records": usage_records,
|
||||
"malformed_lines": malformed_lines,
|
||||
"source_file_name": path.name,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def collect_codex_sessions(codex_home: Path, since: datetime) -> list[TokenSourceRecord]:
|
||||
by_id: dict[str, TokenSourceRecord] = {}
|
||||
for path in iter_codex_session_files(codex_home):
|
||||
parsed = parse_codex_session(path, since)
|
||||
if parsed is None:
|
||||
continue
|
||||
current = by_id.get(parsed.source_id)
|
||||
if current is None or parsed.tokens_total > current.tokens_total:
|
||||
by_id[parsed.source_id] = parsed
|
||||
return sorted(by_id.values(), key=lambda item: item.source_created_at or datetime.min.replace(tzinfo=since.tzinfo))
|
||||
@@ -9,79 +9,54 @@ const POLL = 60_000;
|
||||
```
|
||||
|
||||
```js
|
||||
// Fetch token events, by-repo summary, workstreams, and tasks in parallel
|
||||
const evidenceSel = Inputs.radio(
|
||||
["Measured only", "Active evidence", "All evidence"],
|
||||
{value: "Measured only", label: "Evidence"}
|
||||
);
|
||||
const sortSel = Inputs.select(
|
||||
["Tokens Total", "Event Count"],
|
||||
{label: "Sort by"}
|
||||
);
|
||||
const maxSel = Inputs.select(
|
||||
[10, 20, 50, 100, 500],
|
||||
{value: 20, label: "Show"}
|
||||
);
|
||||
display(html`<div style="display:flex;gap:1.5rem;align-items:flex-end;flex-wrap:wrap;margin:0.5rem 0 1.5rem">${evidenceSel}${sortSel}${maxSel}</div>`);
|
||||
const evidenceMode = view(evidenceSel);
|
||||
const sortOrder = view(sortSel);
|
||||
const maxResults = view(maxSel);
|
||||
```
|
||||
|
||||
```js
|
||||
function aggregatePath(mode) {
|
||||
if (mode === "Measured only") return "/token-events/aggregate/?measurement_kind=measured&include_superseded=false";
|
||||
if (mode === "All evidence") return "/token-events/aggregate/?include_superseded=true";
|
||||
return "/token-events/aggregate/?include_superseded=false";
|
||||
}
|
||||
|
||||
const tokenState = (async function*() {
|
||||
let failures = 0;
|
||||
while (true) {
|
||||
let byRepo = [], events = [], wsMap = {}, taskMap = {}, ok = false;
|
||||
let aggregate = null, quality = null, ok = false;
|
||||
try {
|
||||
const [r1, r2, r3, r4] = await Promise.all([
|
||||
apiFetch("/token-events/by-repo/"),
|
||||
apiFetch("/token-events/?limit=1000"),
|
||||
apiFetch("/workstreams/"),
|
||||
apiFetch("/tasks/"),
|
||||
const [r1, r2] = await Promise.all([
|
||||
apiFetch(aggregatePath(evidenceMode)),
|
||||
apiFetch("/token-events/quality/"),
|
||||
]);
|
||||
ok = r1.ok && r2.ok;
|
||||
if (ok) {
|
||||
byRepo = await r1.json();
|
||||
events = await r2.json();
|
||||
}
|
||||
if (r3.ok) {
|
||||
const wsList = await r3.json();
|
||||
for (const w of wsList) wsMap[w.id] = w;
|
||||
}
|
||||
if (r4.ok) {
|
||||
const taskList = await r4.json();
|
||||
for (const t of taskList) taskMap[t.id] = t;
|
||||
aggregate = await r1.json();
|
||||
quality = await r2.json();
|
||||
}
|
||||
} catch {}
|
||||
failures = ok ? 0 : failures + 1;
|
||||
yield {byRepo, events, wsMap, taskMap, ok, ts: new Date()};
|
||||
yield {aggregate, quality, ok, ts: new Date()};
|
||||
await waitForVisible(pollDelay({ok, base: POLL, failures}));
|
||||
}
|
||||
})();
|
||||
```
|
||||
|
||||
```js
|
||||
// Resolve an event's repo_id via the 3-level chain: direct → workstream → task→workstream
|
||||
function resolveRepoId(e, wsMap, taskMap) {
|
||||
if (e.repo_id) return e.repo_id;
|
||||
const wsId = e.workstream_id ?? taskMap[e.task_id]?.workstream_id;
|
||||
return wsId ? (wsMap[wsId]?.repo_id ?? null) : null;
|
||||
}
|
||||
|
||||
function buildSummary(events) {
|
||||
const byWs = {}, byModel = {}, byTask = {};
|
||||
for (const e of events) {
|
||||
const tot = (e.tokens_in || 0) + (e.tokens_out || 0);
|
||||
if (e.workstream_id) {
|
||||
byWs[e.workstream_id] = byWs[e.workstream_id] || {scope_id: e.workstream_id, tokens_in: 0, tokens_out: 0, event_count: 0};
|
||||
byWs[e.workstream_id].tokens_in += e.tokens_in || 0;
|
||||
byWs[e.workstream_id].tokens_out += e.tokens_out || 0;
|
||||
byWs[e.workstream_id].event_count++;
|
||||
}
|
||||
const model = e.model || "unknown";
|
||||
byModel[model] = (byModel[model] || 0) + tot;
|
||||
if (e.task_id) {
|
||||
byTask[e.task_id] = byTask[e.task_id] || {task_id: e.task_id, tokens_in: 0, tokens_out: 0, event_count: 0};
|
||||
byTask[e.task_id].tokens_in += e.tokens_in || 0;
|
||||
byTask[e.task_id].tokens_out += e.tokens_out || 0;
|
||||
byTask[e.task_id].event_count++;
|
||||
}
|
||||
}
|
||||
const toRows = obj => Object.values(obj)
|
||||
.map(v => ({...v, tokens_total: (v.tokens_in || 0) + (v.tokens_out || 0)}))
|
||||
.sort((a, b) => b.tokens_total - a.tokens_total);
|
||||
return {
|
||||
by_workstream: toRows(byWs),
|
||||
by_model: Object.entries(byModel)
|
||||
.map(([model, tokens_total]) => ({model, tokens_total}))
|
||||
.sort((a, b) => b.tokens_total - a.tokens_total),
|
||||
top_tasks: toRows(byTask),
|
||||
total_events: events.length,
|
||||
};
|
||||
}
|
||||
|
||||
function nameCell(name, fullName) {
|
||||
const s = String(name ?? fullName ?? "—");
|
||||
const full = String(fullName ?? name ?? "—");
|
||||
@@ -92,21 +67,40 @@ function nameCell(name, fullName) {
|
||||
}
|
||||
|
||||
function sortRows(rows, sortField) {
|
||||
if (sortField === "Tokens Total") return rows; // already sorted by buildSummary / by-repo API
|
||||
const s = [...rows];
|
||||
if (sortField === "Tokens In") s.sort((a, b) => (b.tokens_in || 0) - (a.tokens_in || 0));
|
||||
else if (sortField === "Tokens Out") s.sort((a, b) => (b.tokens_out || 0) - (a.tokens_out || 0));
|
||||
else if (sortField === "Event Count") s.sort((a, b) => (b.event_count || 0) - (a.event_count || 0));
|
||||
else if (sortField === "Most Recent") s.sort((a, b) => (b._lastAt || 0) - (a._lastAt || 0));
|
||||
if (sortField === "Event Count") s.sort((a, b) => (b.event_count || 0) - (a.event_count || 0));
|
||||
else s.sort((a, b) => (b.tokens_total || 0) - (a.tokens_total || 0));
|
||||
return s;
|
||||
}
|
||||
|
||||
function dictRows(obj, labelKey) {
|
||||
return Object.entries(obj ?? {})
|
||||
.map(([label, tokens_total]) => ({[labelKey]: label, tokens_total}))
|
||||
.sort((a, b) => b.tokens_total - a.tokens_total);
|
||||
}
|
||||
|
||||
function metricRows(quality) {
|
||||
if (!quality) return [];
|
||||
return [
|
||||
{metric: "Measured", value: quality.measured_event_count},
|
||||
{metric: "Allocated", value: quality.allocated_event_count},
|
||||
{metric: "Estimated", value: quality.estimated_event_count},
|
||||
{metric: "Superseded", value: quality.superseded_event_count},
|
||||
{metric: "Fallback", value: quality.fallback_event_count},
|
||||
{metric: "Unattributed measured", value: quality.unattributed_measured_event_count},
|
||||
{metric: "Missing provenance", value: quality.missing_provenance_event_count},
|
||||
{metric: "Duplicate sources", value: quality.duplicate_source_count},
|
||||
];
|
||||
}
|
||||
```
|
||||
|
||||
```js
|
||||
const byRepo = tokenState.byRepo ?? [];
|
||||
const events = tokenState.events ?? [];
|
||||
const wsMap = tokenState.wsMap ?? {};
|
||||
const taskMap = tokenState.taskMap ?? {};
|
||||
const aggregate = tokenState.aggregate ?? {
|
||||
tokens_in: 0, tokens_out: 0, tokens_total: 0, event_count: 0,
|
||||
by_repo: [], by_workstream: [], by_task: [], by_model: [],
|
||||
by_measurement_kind: {}, by_source_provider: {},
|
||||
};
|
||||
const quality = tokenState.quality ?? null;
|
||||
const _ok = tokenState.ok ?? false;
|
||||
const _ts = tokenState.ts;
|
||||
```
|
||||
@@ -115,66 +109,37 @@ const _ts = tokenState.ts;
|
||||
|
||||
```js
|
||||
display(html`<div style="font-size:0.8rem;color:${_ok ? 'var(--theme-foreground-focus)' : 'red'}">
|
||||
● ${_ok ? `Live · ${_ts?.toLocaleTimeString()} · ${events.length} events` : "API offline"}
|
||||
● ${_ok ? `Live · ${_ts?.toLocaleTimeString()} · ${aggregate.event_count.toLocaleString()} events · ${aggregate.tokens_total.toLocaleString()} tokens` : "API offline"}
|
||||
</div>`);
|
||||
```
|
||||
|
||||
```js
|
||||
const repoSel = Inputs.select(
|
||||
["All repos", ...byRepo.map(r => r.repo_slug)],
|
||||
{label: "Filter by repo"}
|
||||
);
|
||||
const sortSel = Inputs.select(
|
||||
["Tokens Total", "Tokens In", "Tokens Out", "Event Count", "Most Recent"],
|
||||
{label: "Sort by"}
|
||||
);
|
||||
const maxSel = Inputs.select(
|
||||
[10, 20, 50, 100, 500],
|
||||
{value: 20, label: "Show"}
|
||||
);
|
||||
display(html`<div style="display:flex;gap:1.5rem;align-items:flex-end;flex-wrap:wrap;margin:0.5rem 0 1.5rem">${repoSel}${sortSel}${maxSel}</div>`);
|
||||
const repoFilter = view(repoSel);
|
||||
const sortOrder = view(sortSel);
|
||||
const maxResults = view(maxSel);
|
||||
```
|
||||
|
||||
```js
|
||||
// Build filtered and last-event-annotated row sets
|
||||
const selectedRepoId = repoFilter === "All repos"
|
||||
? null
|
||||
: (byRepo.find(r => r.repo_slug === repoFilter)?.repo_id ?? null);
|
||||
|
||||
const filteredEvents = selectedRepoId
|
||||
? events.filter(e => resolveRepoId(e, wsMap, taskMap) === selectedRepoId)
|
||||
: events;
|
||||
|
||||
const lastAtByRepo = {}, lastAtByWs = {}, lastAtByTask = {};
|
||||
for (const e of filteredEvents) {
|
||||
const t = e.created_at ? new Date(e.created_at).getTime() : 0;
|
||||
const rid = resolveRepoId(e, wsMap, taskMap);
|
||||
if (rid) lastAtByRepo[rid] = Math.max(lastAtByRepo[rid] || 0, t);
|
||||
if (e.workstream_id) lastAtByWs[e.workstream_id] = Math.max(lastAtByWs[e.workstream_id] || 0, t);
|
||||
if (e.task_id) lastAtByTask[e.task_id] = Math.max(lastAtByTask[e.task_id] || 0, t);
|
||||
}
|
||||
|
||||
const filteredByRepo = (selectedRepoId
|
||||
? byRepo.filter(r => r.repo_id === selectedRepoId)
|
||||
: byRepo
|
||||
).map(r => ({...r, _lastAt: lastAtByRepo[r.repo_id] || 0}));
|
||||
|
||||
const summary = buildSummary(filteredEvents);
|
||||
const wsRowsFull = summary.by_workstream.map(r => ({...r, _lastAt: lastAtByWs[r.scope_id] || 0}));
|
||||
const taskRowsFull = summary.top_tasks.map(r => ({...r, _lastAt: lastAtByTask[r.task_id] || 0}));
|
||||
display(html`<div style="display:grid;grid-template-columns:repeat(auto-fit,minmax(150px,1fr));gap:0.75rem;margin:1rem 0">
|
||||
<div style="border:1px solid var(--theme-foreground-faint);border-radius:6px;padding:0.75rem">
|
||||
<div style="font-size:0.75rem;color:var(--theme-foreground-muted)">Tokens</div>
|
||||
<div style="font-size:1.4rem;font-weight:650">${aggregate.tokens_total.toLocaleString()}</div>
|
||||
</div>
|
||||
<div style="border:1px solid var(--theme-foreground-faint);border-radius:6px;padding:0.75rem">
|
||||
<div style="font-size:0.75rem;color:var(--theme-foreground-muted)">Events</div>
|
||||
<div style="font-size:1.4rem;font-weight:650">${aggregate.event_count.toLocaleString()}</div>
|
||||
</div>
|
||||
<div style="border:1px solid var(--theme-foreground-faint);border-radius:6px;padding:0.75rem">
|
||||
<div style="font-size:0.75rem;color:var(--theme-foreground-muted)">Last Event</div>
|
||||
<div style="font-size:1rem;font-weight:650">${aggregate.last_event_at ? new Date(aggregate.last_event_at).toLocaleString() : "—"}</div>
|
||||
</div>
|
||||
<div style="border:1px solid var(--theme-foreground-faint);border-radius:6px;padding:0.75rem">
|
||||
<div style="font-size:0.75rem;color:var(--theme-foreground-muted)">Last Ingested</div>
|
||||
<div style="font-size:1rem;font-weight:650">${aggregate.last_ingested_at ? new Date(aggregate.last_ingested_at).toLocaleString() : "—"}</div>
|
||||
</div>
|
||||
</div>`);
|
||||
```
|
||||
|
||||
## By Repo
|
||||
|
||||
```js
|
||||
{
|
||||
const sorted = sortRows(filteredByRepo, sortOrder);
|
||||
const total = sorted.length;
|
||||
const rows = sorted.slice(0, maxResults);
|
||||
|
||||
const sorted = sortRows(aggregate.by_repo ?? [], sortOrder);
|
||||
const rows = sorted.slice(0, maxResults);
|
||||
if (rows.length === 0) {
|
||||
display(html`<p style="color:var(--theme-foreground-muted)">No token events with repo association yet.</p>`);
|
||||
} else {
|
||||
@@ -184,40 +149,20 @@ const taskRowsFull = summary.top_tasks.map(r => ({...r, _lastAt: lastAtByTask
|
||||
width: Math.min(900, width),
|
||||
x: {label: "Tokens", tickFormat: "~s"},
|
||||
y: {label: null},
|
||||
color: {legend: true, domain: ["tokens_in", "tokens_out"], range: ["#4e79a7","#f28e2b"]},
|
||||
marks: [
|
||||
Plot.barX(
|
||||
rows.flatMap(r => [
|
||||
{repo: r.repo_slug, type: "tokens_in", value: r.tokens_in},
|
||||
{repo: r.repo_slug, type: "tokens_out", value: r.tokens_out},
|
||||
]),
|
||||
{x: "value", y: "repo", fill: "type", tip: true}
|
||||
),
|
||||
],
|
||||
marks: [Plot.barX(rows, {x: "tokens_total", y: "label", fill: "#4e79a7", tip: true})],
|
||||
}));
|
||||
|
||||
display(Inputs.table(rows.map((r, i) => ({...r, _ref: i})), {
|
||||
columns: ["_ref", "repo_slug", "tokens_in", "tokens_out", "tokens_total", "event_count"],
|
||||
header: {
|
||||
_ref: "REF",
|
||||
repo_slug: "Repo",
|
||||
tokens_in: "Tokens In",
|
||||
tokens_out: "Tokens Out",
|
||||
tokens_total: "Total",
|
||||
event_count: "Events",
|
||||
},
|
||||
columns: ["_ref", "label", "tokens_in", "tokens_out", "tokens_total", "event_count"],
|
||||
header: {_ref: "REF", label: "Repo", tokens_in: "Tokens In", tokens_out: "Tokens Out", tokens_total: "Total", event_count: "Events"},
|
||||
format: {
|
||||
_ref: (_, i) => refCell(i + 1, "repos", rows[i].repo_slug),
|
||||
repo_slug: d => nameCell(d, d),
|
||||
tokens_in: d => d.toLocaleString(),
|
||||
tokens_out: d => d.toLocaleString(),
|
||||
_ref: (_, i) => refCell(i + 1, "repos", rows[i].label),
|
||||
label: d => nameCell(d, d),
|
||||
tokens_in: d => d.toLocaleString(),
|
||||
tokens_out: d => d.toLocaleString(),
|
||||
tokens_total: d => d.toLocaleString(),
|
||||
},
|
||||
width: {_ref: 50, repo_slug: 160, tokens_in: 110, tokens_out: 110, tokens_total: 110, event_count: 80},
|
||||
width: {_ref: 50, label: 160, tokens_in: 110, tokens_out: 110, tokens_total: 110, event_count: 80},
|
||||
}));
|
||||
|
||||
if (total > maxResults)
|
||||
display(html`<p style="font-size:0.8rem;color:var(--theme-foreground-muted);margin-top:0.25rem">Showing ${maxResults} of ${total} repos</p>`);
|
||||
}
|
||||
}
|
||||
```
|
||||
@@ -226,38 +171,48 @@ const taskRowsFull = summary.top_tasks.map(r => ({...r, _lastAt: lastAtByTask
|
||||
|
||||
```js
|
||||
{
|
||||
const sorted = sortRows(wsRowsFull, sortOrder);
|
||||
const total = sorted.length;
|
||||
const rows = sorted.slice(0, maxResults);
|
||||
|
||||
const sorted = sortRows(aggregate.by_workstream ?? [], sortOrder);
|
||||
const rows = sorted.slice(0, maxResults);
|
||||
if (rows.length === 0) {
|
||||
display(html`<p style="color:var(--theme-foreground-muted)">No workstream data yet.</p>`);
|
||||
} else {
|
||||
display(Inputs.table(rows.map((r, i) => ({...r, _ref: i})), {
|
||||
columns: ["_ref", "scope_id", "tokens_in", "tokens_out", "tokens_total", "event_count"],
|
||||
header: {
|
||||
_ref: "REF",
|
||||
scope_id: "Workstream",
|
||||
tokens_in: "Tokens In",
|
||||
tokens_out: "Tokens Out",
|
||||
tokens_total: "Total",
|
||||
event_count: "Events",
|
||||
},
|
||||
columns: ["_ref", "label", "tokens_in", "tokens_out", "tokens_total", "event_count"],
|
||||
header: {_ref: "REF", label: "Workstream", tokens_in: "Tokens In", tokens_out: "Tokens Out", tokens_total: "Total", event_count: "Events"},
|
||||
format: {
|
||||
_ref: (_, i) => refCell(i + 1, "workstreams", rows[i].scope_id),
|
||||
scope_id: d => {
|
||||
const ws = wsMap[d];
|
||||
return nameCell(ws?.title ?? ws?.slug, d);
|
||||
},
|
||||
tokens_in: d => d.toLocaleString(),
|
||||
tokens_out: d => d.toLocaleString(),
|
||||
_ref: (_, i) => refCell(i + 1, "workstreams", rows[i].scope_id),
|
||||
label: d => nameCell(d, d),
|
||||
tokens_in: d => d.toLocaleString(),
|
||||
tokens_out: d => d.toLocaleString(),
|
||||
tokens_total: d => d.toLocaleString(),
|
||||
},
|
||||
width: {_ref: 50, scope_id: 200, tokens_in: 110, tokens_out: 110, tokens_total: 110, event_count: 80},
|
||||
width: {_ref: 50, label: 240, tokens_in: 110, tokens_out: 110, tokens_total: 110, event_count: 80},
|
||||
}));
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
if (total > maxResults)
|
||||
display(html`<p style="font-size:0.8rem;color:var(--theme-foreground-muted);margin-top:0.25rem">Showing ${maxResults} of ${total} workstreams</p>`);
|
||||
## By Evidence
|
||||
|
||||
```js
|
||||
{
|
||||
const kindRows = dictRows(aggregate.by_measurement_kind, "kind");
|
||||
const sourceRows = dictRows(aggregate.by_source_provider, "source");
|
||||
if (kindRows.length === 0 && sourceRows.length === 0) {
|
||||
display(html`<p style="color:var(--theme-foreground-muted)">No evidence breakdown yet.</p>`);
|
||||
} else {
|
||||
display(html`<div style="display:grid;grid-template-columns:repeat(auto-fit,minmax(260px,1fr));gap:1rem">
|
||||
<div>${Inputs.table(kindRows, {
|
||||
columns: ["kind", "tokens_total"],
|
||||
header: {kind: "Kind", tokens_total: "Tokens"},
|
||||
format: {tokens_total: d => d.toLocaleString()},
|
||||
})}</div>
|
||||
<div>${Inputs.table(sourceRows, {
|
||||
columns: ["source", "tokens_total"],
|
||||
header: {source: "Source", tokens_total: "Tokens"},
|
||||
format: {tokens_total: d => d.toLocaleString()},
|
||||
})}</div>
|
||||
</div>`);
|
||||
}
|
||||
}
|
||||
```
|
||||
@@ -265,18 +220,38 @@ const taskRowsFull = summary.top_tasks.map(r => ({...r, _lastAt: lastAtByTask
|
||||
## By Model
|
||||
|
||||
```js
|
||||
if (summary.by_model.length === 0) {
|
||||
display(html`<p style="color:var(--theme-foreground-muted)">No model data yet.</p>`);
|
||||
{
|
||||
const rows = (aggregate.by_model ?? []).slice(0, maxResults);
|
||||
if (rows.length === 0) {
|
||||
display(html`<p style="color:var(--theme-foreground-muted)">No model data yet.</p>`);
|
||||
} else {
|
||||
display(Plot.plot({
|
||||
title: "Token consumption by model",
|
||||
marginLeft: 200,
|
||||
width: Math.min(700, width),
|
||||
x: {label: "Total tokens", tickFormat: "~s"},
|
||||
marks: [Plot.barX(rows, {x: "tokens_total", y: "label", fill: "#59a14f", tip: true})],
|
||||
}));
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Data Quality
|
||||
|
||||
```js
|
||||
if (!quality) {
|
||||
display(html`<p style="color:var(--theme-foreground-muted)">No quality data yet.</p>`);
|
||||
} else {
|
||||
display(Plot.plot({
|
||||
title: "Token consumption by model",
|
||||
marginLeft: 200,
|
||||
width: Math.min(700, width),
|
||||
x: {label: "Total tokens", tickFormat: "~s"},
|
||||
marks: [
|
||||
Plot.barX(summary.by_model, {x: "tokens_total", y: "model", fill: "#4e79a7", tip: true}),
|
||||
],
|
||||
display(Inputs.table(metricRows(quality), {
|
||||
columns: ["metric", "value"],
|
||||
header: {metric: "Signal", value: "Count"},
|
||||
format: {value: d => d.toLocaleString()},
|
||||
}));
|
||||
display(html`<p style="font-size:0.8rem;color:var(--theme-foreground-muted)">
|
||||
Codex: ${quality.last_codex_ingested_at ? new Date(quality.last_codex_ingested_at).toLocaleString() : "—"}
|
||||
· Claude: ${quality.last_claude_ingested_at ? new Date(quality.last_claude_ingested_at).toLocaleString() : "—"}
|
||||
· Reconcile: ${quality.last_reconciliation_at ? new Date(quality.last_reconciliation_at).toLocaleString() : "—"}
|
||||
</p>`);
|
||||
}
|
||||
```
|
||||
|
||||
@@ -284,31 +259,23 @@ if (summary.by_model.length === 0) {
|
||||
|
||||
```js
|
||||
{
|
||||
const sorted = sortRows(taskRowsFull, sortOrder);
|
||||
const total = sorted.length;
|
||||
const rows = sorted.slice(0, maxResults);
|
||||
|
||||
const sorted = sortRows(aggregate.by_task ?? [], sortOrder);
|
||||
const rows = sorted.slice(0, maxResults);
|
||||
if (rows.length === 0) {
|
||||
display(html`<p style="color:var(--theme-foreground-muted)">No task-level data yet.</p>`);
|
||||
} else {
|
||||
display(Inputs.table(rows.map((r, i) => ({...r, _ref: i})), {
|
||||
columns: ["_ref", "task_id", "tokens_in", "tokens_out", "tokens_total"],
|
||||
header: {_ref: "REF", task_id: "Task", tokens_in: "In", tokens_out: "Out", tokens_total: "Total"},
|
||||
columns: ["_ref", "label", "tokens_in", "tokens_out", "tokens_total"],
|
||||
header: {_ref: "REF", label: "Task", tokens_in: "In", tokens_out: "Out", tokens_total: "Total"},
|
||||
format: {
|
||||
_ref: (_, i) => refCell(i + 1, "tasks", rows[i].task_id),
|
||||
task_id: d => {
|
||||
const task = taskMap[d];
|
||||
return nameCell(task?.title, d);
|
||||
},
|
||||
tokens_in: d => d.toLocaleString(),
|
||||
tokens_out: d => d.toLocaleString(),
|
||||
_ref: (_, i) => refCell(i + 1, "tasks", rows[i].scope_id),
|
||||
label: d => nameCell(d, d),
|
||||
tokens_in: d => d.toLocaleString(),
|
||||
tokens_out: d => d.toLocaleString(),
|
||||
tokens_total: d => d.toLocaleString(),
|
||||
},
|
||||
width: {_ref: 50, task_id: 240},
|
||||
width: {_ref: 50, label: 260},
|
||||
}));
|
||||
|
||||
if (total > maxResults)
|
||||
display(html`<p style="font-size:0.8rem;color:var(--theme-foreground-muted);margin-top:0.25rem">Showing ${maxResults} of ${total} tasks</p>`);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
@@ -23,10 +23,14 @@ if (raw.error) {
|
||||
display(html`<p style="margin-top:0"><a href="/token-cost">← Token Cost</a></p>`);
|
||||
|
||||
const FIELD_ORDER = [
|
||||
"id","tokens_in","tokens_out","tokens_total",
|
||||
"id","measurement_kind","source_provider","source_id",
|
||||
"tokens_in","tokens_out","tokens_total","token_evidence_total",
|
||||
"cached_input_tokens","reasoning_output_tokens","raw_total_tokens",
|
||||
"note","model","agent","session_id",
|
||||
"task_id","workstream_id","repo_id",
|
||||
"ref_type","ref_id","created_at",
|
||||
"ref_type","ref_id","source_path","source_created_at",
|
||||
"parser_version","confidence","ingested_at","created_at",
|
||||
"raw_metadata",
|
||||
];
|
||||
|
||||
const rows = FIELD_ORDER.map(k => fieldRow(k, raw[k] ?? null));
|
||||
|
||||
75
docs/multi-user-access-model.md
Normal file
75
docs/multi-user-access-model.md
Normal file
@@ -0,0 +1,75 @@
|
||||
# State Hub Multi-User Access Model
|
||||
|
||||
State Hub is local-first coordination infrastructure. It reflects repo-backed
|
||||
workplans, progress, and operational state; it is not the authority for source
|
||||
control, host access, identity, or runtime secret custody.
|
||||
|
||||
## Decision
|
||||
|
||||
For the current phase, enforce user access through the systems that already own
|
||||
the boundary:
|
||||
|
||||
- Gitea controls repository read/write rights.
|
||||
- SSH authorized keys control host access.
|
||||
- ops-bridge controls whether a remote machine can reach local services.
|
||||
- OpenBao controls runtime secret custody after bootstrap.
|
||||
|
||||
State Hub API authentication is deferred until there is an active external
|
||||
collaborator or an exposed deployment that needs per-user write enforcement.
|
||||
Until then, State Hub stays private to local or tunneled operator networks.
|
||||
|
||||
## Roles
|
||||
|
||||
| Role | State Hub access | Source of authority |
|
||||
|------|------------------|---------------------|
|
||||
| Primary operator | Full read/write across domains | host access, repo ownership, operator secret custody |
|
||||
| Domain collaborator | Read all public coordination state; write through owned domain repo and approved hub actions | Gitea repo permissions plus SSH/tunnel authorization |
|
||||
| Observer | Read-only brief/dashboard access where explicitly exposed | tunnel or future API token |
|
||||
|
||||
## Current Enforcement Boundary
|
||||
|
||||
1. Repo files remain authoritative. A collaborator can change workplans only in
|
||||
repos where Gitea allows them to push.
|
||||
2. State Hub indexes files and records progress events, but it should not become
|
||||
the primary identity authority.
|
||||
3. Direct dashboard/API access is private by default. Do not publish State Hub
|
||||
unauthenticated on the public internet.
|
||||
4. Runtime secrets, service account keys, database credentials, and package
|
||||
tokens should move into OpenBao after the OpenBao bootstrap, unseal, audit,
|
||||
and recovery procedure is complete.
|
||||
|
||||
## Future API Auth Trigger
|
||||
|
||||
Add API-layer auth when one of these becomes true:
|
||||
|
||||
- a second human needs direct State Hub API/dashboard mutation rights
|
||||
- State Hub is exposed beyond localhost or a tightly controlled SSH tunnel
|
||||
- automation needs per-consumer attribution and revocation independent of repo
|
||||
commits
|
||||
- domain-scoped write checks are needed at request time
|
||||
|
||||
## Future Token Shape
|
||||
|
||||
When the trigger is reached, implement a small token model rather than a full
|
||||
identity provider inside State Hub:
|
||||
|
||||
- accept NetKingdom IAM Profile OIDC tokens when the identity plane is ready
|
||||
- support one emergency local admin token for break-glass operation
|
||||
- map claims to `primary_operator`, `domain_collaborator`, or `observer`
|
||||
- enforce domain write scopes in mutating endpoints
|
||||
- keep repo permissions as the durable source of contribution authority
|
||||
|
||||
Candidate scopes:
|
||||
|
||||
```text
|
||||
statehub:read
|
||||
statehub:write
|
||||
statehub:domain:<slug>:write
|
||||
statehub:admin
|
||||
```
|
||||
|
||||
## Operator Rule
|
||||
|
||||
Do not store collaborator credentials in the State Hub database. Store secrets
|
||||
in OpenBao or the approved bootstrap bundle, and store source permissions in
|
||||
Gitea.
|
||||
212
docs/onboarding.md
Normal file
212
docs/onboarding.md
Normal file
@@ -0,0 +1,212 @@
|
||||
# State Hub Onboarding
|
||||
|
||||
This guide turns a new machine into a usable State Hub operator or collaborator
|
||||
environment. It covers local credentials, SSH reachability, Gitea access, and
|
||||
Claude Code MCP registration.
|
||||
|
||||
State Hub remains a coordination read/cache layer. Repo permissions, SSH
|
||||
access, and controlled tunnels are the first access boundary. OpenBao is the
|
||||
runtime secret authority for platform and workload secrets once its bootstrap
|
||||
ceremony is complete.
|
||||
|
||||
## Quick Start
|
||||
|
||||
Clone the repo, then run the bootstrap script:
|
||||
|
||||
```bash
|
||||
git clone https://gitea.coulomb.social/coulomb/state-hub.git ~/state-hub
|
||||
cd ~/state-hub
|
||||
make bootstrap-env
|
||||
```
|
||||
|
||||
On a clean Ubuntu 24.04 machine, allow package installation explicitly:
|
||||
|
||||
```bash
|
||||
make bootstrap-env ARGS="--install-missing"
|
||||
```
|
||||
|
||||
For a remote machine that reaches State Hub through ops-bridge:
|
||||
|
||||
```bash
|
||||
make bridges
|
||||
make register-mcp MCP_URL=http://127.0.0.1:18001/sse API_BASE=http://127.0.0.1:18000
|
||||
```
|
||||
|
||||
Restart Claude Code after MCP registration.
|
||||
|
||||
## Primary Operator: New Machine
|
||||
|
||||
1. Install minimal host prerequisites:
|
||||
|
||||
```bash
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y git curl openssh-client make python3
|
||||
```
|
||||
|
||||
2. Clone `state-hub` and any domain repo you expect to operate:
|
||||
|
||||
```bash
|
||||
git clone https://gitea.coulomb.social/coulomb/state-hub.git ~/state-hub
|
||||
git clone https://gitea.coulomb.social/coulomb/the-custodian.git ~/the-custodian
|
||||
```
|
||||
|
||||
3. Run the bootstrap:
|
||||
|
||||
```bash
|
||||
cd ~/state-hub
|
||||
make bootstrap-env ARGS="--install-missing"
|
||||
```
|
||||
|
||||
The script will:
|
||||
|
||||
- check required tools
|
||||
- configure `git credential.helper`
|
||||
- create `~/.ssh/id_ed25519` when missing
|
||||
- print the public key for managed hosts
|
||||
- create `~/.railiance_gitea.conf` when you provide a Gitea token
|
||||
- register the State Hub MCP server for Claude Code
|
||||
- check State Hub API reachability
|
||||
|
||||
4. Authorize the SSH key on managed hosts. If password or existing key access
|
||||
is available, rerun:
|
||||
|
||||
```bash
|
||||
make bootstrap-env ARGS="--authorize-ssh --skip-gitea --skip-mcp"
|
||||
```
|
||||
|
||||
Default targets:
|
||||
|
||||
- `tegwick@92.205.62.239` for Railiance01
|
||||
- `tegwick@92.205.130.254` for CoulombCore
|
||||
|
||||
5. Start or connect to State Hub:
|
||||
|
||||
```bash
|
||||
make api
|
||||
make mcp-http
|
||||
```
|
||||
|
||||
If the hub is remote, use ops-bridge:
|
||||
|
||||
```bash
|
||||
make bridges
|
||||
```
|
||||
|
||||
6. Restart Claude Code and verify that `state-hub` appears in the MCP server
|
||||
list. In the first session, call `get_state_summary()` when MCP tools are
|
||||
available. If not, use:
|
||||
|
||||
```bash
|
||||
cat .custodian-brief.md
|
||||
curl -s "http://127.0.0.1:8000/workstreams/?status=active" | python3 -m json.tool
|
||||
```
|
||||
|
||||
## Domain Collaborator: New Person
|
||||
|
||||
1. Get a Gitea account with write access to the relevant domain repo.
|
||||
2. Clone this repo and the domain repo:
|
||||
|
||||
```bash
|
||||
git clone https://gitea.coulomb.social/coulomb/state-hub.git ~/state-hub
|
||||
git clone https://gitea.coulomb.social/coulomb/<domain-repo>.git ~/<domain-repo>
|
||||
```
|
||||
|
||||
3. Run the bootstrap:
|
||||
|
||||
```bash
|
||||
cd ~/state-hub
|
||||
make bootstrap-env
|
||||
```
|
||||
|
||||
4. Send the printed SSH public key to the operator, or authorize it yourself if
|
||||
you already have host access:
|
||||
|
||||
```bash
|
||||
ssh-copy-id -i ~/.ssh/id_ed25519.pub tegwick@92.205.62.239
|
||||
```
|
||||
|
||||
5. Bring up the State Hub tunnel when direct local access is unavailable:
|
||||
|
||||
```bash
|
||||
make bridges
|
||||
make register-mcp MCP_URL=http://127.0.0.1:18001/sse API_BASE=http://127.0.0.1:18000
|
||||
```
|
||||
|
||||
6. Restart Claude Code, open the domain repo, and orient from the repo brief:
|
||||
|
||||
```bash
|
||||
cat .custodian-brief.md
|
||||
```
|
||||
|
||||
7. Contribute work through repo-backed workplans. A new workplan lives under
|
||||
`workplans/` and follows ADR-001. The hub indexes files; the files remain
|
||||
authoritative.
|
||||
|
||||
## Credential Helper Choices
|
||||
|
||||
`make bootstrap-env` configures Git credentials only when no global helper is
|
||||
already set.
|
||||
|
||||
Default behavior:
|
||||
|
||||
- use `libsecret` when the helper exists
|
||||
- otherwise use `credential.helper=cache --timeout=3600`
|
||||
|
||||
For headless hosts where a persistent plaintext helper is acceptable:
|
||||
|
||||
```bash
|
||||
make bootstrap-env ARGS="--git-helper store --allow-plaintext-store"
|
||||
```
|
||||
|
||||
Prefer SSH remotes or a keyring-backed helper for normal operator machines.
|
||||
|
||||
## Gitea Token File
|
||||
|
||||
Some Railiance scripts read `~/.railiance_gitea.conf`:
|
||||
|
||||
```bash
|
||||
GITEA_URL="http://92.205.130.254:32166"
|
||||
GITEA_USER="<user>"
|
||||
GITEA_TOKEN="<token>"
|
||||
```
|
||||
|
||||
Required token capabilities depend on the action:
|
||||
|
||||
- repo creation needs `read:user` and repository write/admin scope
|
||||
- package publishing needs package write scope
|
||||
- inventory reads need repository read scope
|
||||
|
||||
The bootstrap script writes this file with mode `0600` and does not print the
|
||||
token.
|
||||
|
||||
## MCP Registration
|
||||
|
||||
Local registration:
|
||||
|
||||
```bash
|
||||
make register-mcp
|
||||
```
|
||||
|
||||
Tunnel registration:
|
||||
|
||||
```bash
|
||||
make register-mcp MCP_URL=http://127.0.0.1:18001/sse API_BASE=http://127.0.0.1:18000
|
||||
```
|
||||
|
||||
The current State Hub MCP transport is SSE. The old `.mcp.json`/stdio flow is
|
||||
legacy; use `make mcp-http` to run the SSE service on `127.0.0.1:8001`.
|
||||
|
||||
## Verification Checklist
|
||||
|
||||
Run these checks after bootstrap:
|
||||
|
||||
```bash
|
||||
git config --global --get credential.helper
|
||||
test -f ~/.ssh/id_ed25519.pub
|
||||
test -f ~/.railiance_gitea.conf
|
||||
curl -fsS http://127.0.0.1:8000/state/health || curl -fsS http://127.0.0.1:18000/state/health
|
||||
make register-mcp DRY_RUN=1
|
||||
```
|
||||
|
||||
Then restart Claude Code and confirm that the `state-hub` MCP server is
|
||||
available.
|
||||
57
docs/token-evidence-model.md
Normal file
57
docs/token-evidence-model.md
Normal file
@@ -0,0 +1,57 @@
|
||||
# Token Evidence Model
|
||||
|
||||
State Hub token events distinguish source-backed measurements from inferred
|
||||
operational signals. Dashboards and reports should use structured fields for
|
||||
quality and provenance; `note` remains human context only.
|
||||
|
||||
## Measurement Kinds
|
||||
|
||||
| Kind | Meaning | Default confidence |
|
||||
| --- | --- | --- |
|
||||
| `measured` | Parsed from a source that reports usage metadata, such as Codex session logs or Claude transcript usage blocks. | `1.0` |
|
||||
| `allocated` | A share of a larger known total, assigned to a task/workstream by a documented allocation method. | `0.70` |
|
||||
| `estimated` | A fallback or operator-entered estimate without direct source evidence. | `0.35` |
|
||||
| `superseded` | Historical rows retained for audit but excluded from active totals. | `0.0` |
|
||||
|
||||
## Source Providers
|
||||
|
||||
| Provider | Source |
|
||||
| --- | --- |
|
||||
| `codex_session` | Codex Desktop `.codex/sessions/**` and `.codex/archived_sessions/**` JSONL token_count events. |
|
||||
| `claude_transcript` | Claude Code `.claude/projects/**/*.jsonl` usage metadata. Transcript text is never stored. |
|
||||
| `llm_connect` | Future llm-connect usage metadata. |
|
||||
| `manual` | Explicit operator/API input. |
|
||||
| `task_fallback` | Fixed task-completion fallback rows created when no source data is available. |
|
||||
|
||||
## Provenance Fields
|
||||
|
||||
Each source-backed row should include:
|
||||
|
||||
- `source_provider`, `source_id`, `source_path`, `source_created_at`
|
||||
- `parser_version`, `ingested_at`, `confidence`
|
||||
- `cached_input_tokens`, `reasoning_output_tokens`, `raw_total_tokens`
|
||||
- `raw_metadata` with parser and attribution metadata, never transcript content
|
||||
|
||||
`tokens_in + tokens_out` remains the default active total. Cached input and
|
||||
reasoning output are preserved separately so dashboards can show both default
|
||||
and provider-style totals without rewriting history.
|
||||
|
||||
## Idempotency
|
||||
|
||||
Measured sources must be written with a stable `source_id`. State Hub enforces
|
||||
one row for each `(measurement_kind, source_provider, source_id)` tuple and
|
||||
`POST /token-events/upsert` updates a growing live session rather than creating
|
||||
duplicates.
|
||||
|
||||
## Migration Playbook
|
||||
|
||||
1. Run the token-event provenance migration.
|
||||
2. Run `python3 scripts/token_reconcile.py --since 2026-05-19` and inspect the
|
||||
dry-run report.
|
||||
3. Run `python3 scripts/token_reconcile.py --since 2026-05-19 --apply` to
|
||||
upsert measured Codex/Claude source rows.
|
||||
4. Run the same command with `--zero-superseded-fallbacks` only after measured
|
||||
source rows cover the affected window.
|
||||
5. Check `/token-events/quality/` or the Token Cost dashboard for fallback,
|
||||
missing-provenance, duplicate-source, and unattributed measured signals.
|
||||
6. Keep historical fallback rows as `superseded`; do not delete them.
|
||||
128
migrations/versions/v9q0r1s2t3u4_token_event_provenance.py
Normal file
128
migrations/versions/v9q0r1s2t3u4_token_event_provenance.py
Normal file
@@ -0,0 +1,128 @@
|
||||
"""add token event provenance fields
|
||||
|
||||
Revision ID: v9q0r1s2t3u4
|
||||
Revises: u8p9q0r1s2t3
|
||||
Create Date: 2026-05-23
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
revision = "v9q0r1s2t3u4"
|
||||
down_revision = "u8p9q0r1s2t3"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.add_column(
|
||||
"token_events",
|
||||
sa.Column("measurement_kind", sa.Text(), nullable=False, server_default="estimated"),
|
||||
)
|
||||
op.add_column(
|
||||
"token_events",
|
||||
sa.Column("source_provider", sa.Text(), nullable=False, server_default="manual"),
|
||||
)
|
||||
op.add_column("token_events", sa.Column("source_id", sa.Text(), nullable=True))
|
||||
op.add_column("token_events", sa.Column("source_path", sa.Text(), nullable=True))
|
||||
op.add_column(
|
||||
"token_events",
|
||||
sa.Column("source_created_at", sa.TIMESTAMP(timezone=True), nullable=True),
|
||||
)
|
||||
op.add_column(
|
||||
"token_events",
|
||||
sa.Column("ingested_at", sa.TIMESTAMP(timezone=True), nullable=False, server_default=sa.text("now()")),
|
||||
)
|
||||
op.add_column("token_events", sa.Column("parser_version", sa.Text(), nullable=True))
|
||||
op.add_column(
|
||||
"token_events",
|
||||
sa.Column("confidence", sa.Float(), nullable=False, server_default="0.35"),
|
||||
)
|
||||
op.add_column(
|
||||
"token_events",
|
||||
sa.Column("cached_input_tokens", sa.Integer(), nullable=False, server_default="0"),
|
||||
)
|
||||
op.add_column(
|
||||
"token_events",
|
||||
sa.Column("reasoning_output_tokens", sa.Integer(), nullable=False, server_default="0"),
|
||||
)
|
||||
op.add_column("token_events", sa.Column("raw_total_tokens", sa.Integer(), nullable=True))
|
||||
op.add_column("token_events", sa.Column("cost_estimated_usd", sa.Float(), nullable=True))
|
||||
op.add_column(
|
||||
"token_events",
|
||||
sa.Column(
|
||||
"raw_metadata",
|
||||
postgresql.JSONB(astext_type=sa.Text()),
|
||||
nullable=False,
|
||||
server_default=sa.text("'{}'::jsonb"),
|
||||
),
|
||||
)
|
||||
|
||||
op.execute(
|
||||
"""
|
||||
UPDATE token_events
|
||||
SET
|
||||
measurement_kind = CASE
|
||||
WHEN note = 'heuristic_superseded_by_codex_backfill' THEN 'superseded'
|
||||
WHEN note = 'workplan' THEN 'allocated'
|
||||
WHEN note = 'heuristic' THEN 'estimated'
|
||||
WHEN note = 'measured' OR note LIKE 'backfill:codex-session%' THEN 'measured'
|
||||
ELSE measurement_kind
|
||||
END,
|
||||
source_provider = CASE
|
||||
WHEN note = 'heuristic' THEN 'task_fallback'
|
||||
WHEN note LIKE 'backfill:codex-session%' OR ref_id LIKE 'codex:%' THEN 'codex_session'
|
||||
WHEN note = 'measured' AND agent ILIKE '%claude%' THEN 'claude_transcript'
|
||||
ELSE source_provider
|
||||
END,
|
||||
source_id = CASE
|
||||
WHEN source_id IS NULL AND (note LIKE 'backfill:codex-session%' OR ref_id LIKE 'codex:%')
|
||||
THEN ref_id
|
||||
ELSE source_id
|
||||
END,
|
||||
raw_total_tokens = CASE
|
||||
WHEN raw_total_tokens IS NULL THEN tokens_in + tokens_out
|
||||
ELSE raw_total_tokens
|
||||
END,
|
||||
confidence = CASE
|
||||
WHEN note = 'heuristic_superseded_by_codex_backfill' THEN 0.0
|
||||
WHEN note = 'heuristic' THEN 0.35
|
||||
WHEN note = 'workplan' THEN 0.70
|
||||
WHEN note = 'measured' OR note LIKE 'backfill:codex-session%' THEN 1.0
|
||||
ELSE confidence
|
||||
END
|
||||
"""
|
||||
)
|
||||
|
||||
op.create_index("ix_token_events_measurement_kind", "token_events", ["measurement_kind"])
|
||||
op.create_index("ix_token_events_source_provider", "token_events", ["source_provider"])
|
||||
op.create_index("ix_token_events_source_id", "token_events", ["source_id"])
|
||||
op.create_index("ix_token_events_source_created_at", "token_events", ["source_created_at"])
|
||||
op.create_index("ix_token_events_ingested_at", "token_events", ["ingested_at"])
|
||||
op.create_unique_constraint(
|
||||
"uq_token_events_source_identity",
|
||||
"token_events",
|
||||
["measurement_kind", "source_provider", "source_id"],
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_constraint("uq_token_events_source_identity", "token_events", type_="unique")
|
||||
op.drop_index("ix_token_events_ingested_at", table_name="token_events")
|
||||
op.drop_index("ix_token_events_source_created_at", table_name="token_events")
|
||||
op.drop_index("ix_token_events_source_id", table_name="token_events")
|
||||
op.drop_index("ix_token_events_source_provider", table_name="token_events")
|
||||
op.drop_index("ix_token_events_measurement_kind", table_name="token_events")
|
||||
op.drop_column("token_events", "raw_metadata")
|
||||
op.drop_column("token_events", "cost_estimated_usd")
|
||||
op.drop_column("token_events", "raw_total_tokens")
|
||||
op.drop_column("token_events", "reasoning_output_tokens")
|
||||
op.drop_column("token_events", "cached_input_tokens")
|
||||
op.drop_column("token_events", "confidence")
|
||||
op.drop_column("token_events", "parser_version")
|
||||
op.drop_column("token_events", "ingested_at")
|
||||
op.drop_column("token_events", "source_created_at")
|
||||
op.drop_column("token_events", "source_path")
|
||||
op.drop_column("token_events", "source_id")
|
||||
op.drop_column("token_events", "source_provider")
|
||||
op.drop_column("token_events", "measurement_kind")
|
||||
@@ -0,0 +1,33 @@
|
||||
"""assign legacy source ids to measured token events
|
||||
|
||||
Revision ID: w0r1s2t3u4v5
|
||||
Revises: v9q0r1s2t3u4
|
||||
Create Date: 2026-05-23
|
||||
"""
|
||||
from alembic import op
|
||||
|
||||
revision = "w0r1s2t3u4v5"
|
||||
down_revision = "v9q0r1s2t3u4"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.execute(
|
||||
"""
|
||||
UPDATE token_events
|
||||
SET source_id = source_provider || ':legacy:' || id::text
|
||||
WHERE measurement_kind = 'measured'
|
||||
AND source_id IS NULL
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.execute(
|
||||
"""
|
||||
UPDATE token_events
|
||||
SET source_id = NULL
|
||||
WHERE source_id = source_provider || ':legacy:' || id::text
|
||||
"""
|
||||
)
|
||||
192
scripts/backfill_codex_token_events.py
Normal file
192
scripts/backfill_codex_token_events.py
Normal file
@@ -0,0 +1,192 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Backfill State Hub token events from local Codex session logs.
|
||||
|
||||
The parser lives in ``api.services.token_sources.codex`` so this CLI only
|
||||
handles operator flags, repo attribution, idempotent writes, and fallback
|
||||
cleanup.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
ROOT = Path(__file__).resolve().parent.parent
|
||||
if str(ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
from api.services.token_sources import collect_codex_sessions, parse_iso # noqa: E402
|
||||
from api.services.token_sources.attribution import repo_refs_from_api, resolve_repo # noqa: E402
|
||||
|
||||
DEFAULT_API = os.environ.get("STATE_HUB_API", "http://127.0.0.1:8000")
|
||||
BACKFILL_NOTE = "backfill:codex-session"
|
||||
SUPERSEDED_HEURISTIC_NOTE = "heuristic_superseded_by_codex_backfill"
|
||||
|
||||
|
||||
def http_json(api_base: str, method: str, path: str, body: dict[str, Any] | None = None) -> Any:
|
||||
url = f"{api_base.rstrip('/')}/{path.lstrip('/')}"
|
||||
data = None
|
||||
headers = {"Content-Type": "application/json"}
|
||||
if body is not None:
|
||||
data = json.dumps(body).encode("utf-8")
|
||||
req = urllib.request.Request(url, data=data, headers=headers, method=method)
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
return json.loads(resp.read() or b"null")
|
||||
|
||||
|
||||
def find_codex_home(explicit: str | None) -> Path:
|
||||
candidates: list[Path] = []
|
||||
if explicit:
|
||||
candidates.append(Path(explicit))
|
||||
env_home = os.environ.get("CODEX_HOME")
|
||||
if env_home:
|
||||
candidates.append(Path(env_home))
|
||||
candidates.extend(
|
||||
[
|
||||
Path.home() / ".codex",
|
||||
Path("/mnt/c/Users/bernd.worsch/.codex"),
|
||||
]
|
||||
)
|
||||
for candidate in candidates:
|
||||
if candidate.is_dir():
|
||||
return candidate
|
||||
raise SystemExit("Could not find Codex home; pass --codex-home")
|
||||
|
||||
|
||||
def list_events(api_base: str, params: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
events: list[dict[str, Any]] = []
|
||||
offset = 0
|
||||
while True:
|
||||
page_params = {**params, "limit": 1000, "offset": offset}
|
||||
encoded = urllib.parse.urlencode(page_params)
|
||||
page = http_json(api_base, "GET", f"/token-events/?{encoded}")
|
||||
if not isinstance(page, list) or not page:
|
||||
break
|
||||
events.extend(page)
|
||||
if len(page) < 1000:
|
||||
break
|
||||
offset += 1000
|
||||
return events
|
||||
|
||||
|
||||
def existing_codex_events(api_base: str) -> dict[str, dict[str, Any]]:
|
||||
events = list_events(
|
||||
api_base,
|
||||
{"source_provider": "codex_session", "include_superseded": "true"},
|
||||
)
|
||||
by_source: dict[str, dict[str, Any]] = {}
|
||||
for event in events:
|
||||
source_id = event.get("source_id") or event.get("ref_id")
|
||||
if isinstance(source_id, str):
|
||||
by_source[source_id] = event
|
||||
return by_source
|
||||
|
||||
|
||||
def fetch_heuristics(api_base: str, since: str) -> list[dict[str, Any]]:
|
||||
return list_events(
|
||||
api_base,
|
||||
{
|
||||
"source_provider": "task_fallback",
|
||||
"note": "heuristic",
|
||||
"since": since,
|
||||
"include_superseded": "false",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def patch_superseded_heuristic(api_base: str, event_id: str) -> None:
|
||||
http_json(
|
||||
api_base,
|
||||
"PATCH",
|
||||
f"/token-events/{event_id}",
|
||||
{
|
||||
"tokens_in": 0,
|
||||
"tokens_out": 0,
|
||||
"note": SUPERSEDED_HEURISTIC_NOTE,
|
||||
"measurement_kind": "superseded",
|
||||
"source_provider": "task_fallback",
|
||||
"confidence": 0.0,
|
||||
"raw_total_tokens": 0,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--since", default="2026-05-19", help="UTC date/time to backfill from")
|
||||
parser.add_argument("--api-base", default=DEFAULT_API)
|
||||
parser.add_argument("--codex-home")
|
||||
parser.add_argument("--apply", action="store_true", help="write backfill events")
|
||||
parser.add_argument(
|
||||
"--zero-heuristics",
|
||||
action="store_true",
|
||||
help="set post-since heuristic task fallback events to zero after backfill",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
since = parse_iso(args.since)
|
||||
since_param = since.isoformat()
|
||||
codex_home = find_codex_home(args.codex_home)
|
||||
repo_refs = repo_refs_from_api(http_json(args.api_base, "GET", "/repos/"))
|
||||
existing = existing_codex_events(args.api_base)
|
||||
sessions = collect_codex_sessions(codex_home, since)
|
||||
|
||||
planned: list[tuple[str, Any, str | None, str | None]] = []
|
||||
by_repo: dict[str, list[int]] = {}
|
||||
for session in sessions:
|
||||
event = existing.get(session.source_id)
|
||||
existing_total = (event.get("tokens_in", 0) + event.get("tokens_out", 0)) if event else 0
|
||||
action = "create" if event is None else ("update" if session.tokens_total > existing_total else "skip")
|
||||
match = resolve_repo(session.cwd, repo_refs)
|
||||
repo_id = match.repo_id if match else None
|
||||
repo_slug = match.slug if match else None
|
||||
if action != "skip":
|
||||
planned.append((action, session, repo_id, repo_slug))
|
||||
label = repo_slug or "(unattributed)"
|
||||
totals = by_repo.setdefault(label, [0, 0, 0])
|
||||
totals[0] += 1
|
||||
totals[1] += session.tokens_in
|
||||
totals[2] += session.tokens_out
|
||||
|
||||
heuristics = fetch_heuristics(args.api_base, since_param) if args.zero_heuristics else []
|
||||
|
||||
print(f"codex_home: {codex_home}")
|
||||
print(f"since: {since.isoformat()}")
|
||||
print(f"sessions found: {len(sessions)}")
|
||||
print(f"backfill events to create: {sum(1 for action, *_ in planned if action == 'create')}")
|
||||
print(f"backfill events to update: {sum(1 for action, *_ in planned if action == 'update')}")
|
||||
for repo_slug, (count, tokens_in, tokens_out) in sorted(by_repo.items()):
|
||||
print(f" {repo_slug}: {count} sessions, {tokens_in + tokens_out:,} tokens")
|
||||
if args.zero_heuristics:
|
||||
total = sum((e.get("tokens_in") or 0) + (e.get("tokens_out") or 0) for e in heuristics)
|
||||
print(f"heuristic events to zero: {len(heuristics)} ({total:,} tokens)")
|
||||
|
||||
if not args.apply:
|
||||
print("dry run only; pass --apply to write changes")
|
||||
return 0
|
||||
|
||||
for _action, session, repo_id, repo_slug in planned:
|
||||
payload = session.to_token_event_payload(repo_id=repo_id)
|
||||
payload["note"] = BACKFILL_NOTE
|
||||
payload["raw_metadata"] = {
|
||||
**payload.get("raw_metadata", {}),
|
||||
"repo_slug": repo_slug,
|
||||
"attribution_method": resolve_repo(session.cwd, repo_refs).method if resolve_repo(session.cwd, repo_refs) else None,
|
||||
}
|
||||
http_json(args.api_base, "POST", "/token-events/upsert", payload)
|
||||
for event in heuristics:
|
||||
patch_superseded_heuristic(args.api_base, event["id"])
|
||||
|
||||
print(f"upserted {len(planned)} backfill events")
|
||||
if args.zero_heuristics:
|
||||
print(f"zeroed {len(heuristics)} heuristic events")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
369
scripts/bootstrap-env.sh
Executable file
369
scripts/bootstrap-env.sh
Executable file
@@ -0,0 +1,369 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
STATE_HUB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
GITEA_CONF="${GITEA_CONF:-$HOME/.railiance_gitea.conf}"
|
||||
GITEA_URL="${GITEA_URL:-http://92.205.130.254:32166}"
|
||||
GITEA_USER="${GITEA_USER:-}"
|
||||
GITEA_TOKEN="${GITEA_TOKEN:-}"
|
||||
GIT_HELPER="${GIT_HELPER:-auto}"
|
||||
INSTALL_MISSING=0
|
||||
NON_INTERACTIVE=0
|
||||
DRY_RUN=0
|
||||
AUTHORIZE_SSH=0
|
||||
ALLOW_PLAINTEXT_STORE=0
|
||||
SKIP_GITEA=0
|
||||
SKIP_MCP=0
|
||||
SSH_KEY="${SSH_KEY:-$HOME/.ssh/id_ed25519}"
|
||||
SSH_TARGETS=(
|
||||
"tegwick@92.205.62.239"
|
||||
"tegwick@92.205.130.254"
|
||||
)
|
||||
|
||||
usage() {
|
||||
cat <<'USAGE'
|
||||
Usage: scripts/bootstrap-env.sh [options]
|
||||
|
||||
Idempotently prepares a State Hub operator or collaborator environment.
|
||||
|
||||
Options:
|
||||
--install-missing Install missing apt packages when possible.
|
||||
--non-interactive Do not prompt; warn instead of asking for secrets.
|
||||
--dry-run Show intended actions without changing local config.
|
||||
--git-helper MODE auto, libsecret, cache, or store. Default: auto.
|
||||
--allow-plaintext-store Allow git credential.helper=store in auto mode.
|
||||
--authorize-ssh Run ssh-copy-id for configured SSH targets.
|
||||
--ssh-target USER@HOST Add an SSH authorization target. May repeat.
|
||||
--gitea-url URL Gitea base URL for ~/.railiance_gitea.conf.
|
||||
--gitea-user USER Gitea user for ~/.railiance_gitea.conf.
|
||||
--gitea-token TOKEN Gitea token; otherwise prompted when interactive.
|
||||
--skip-gitea Do not create or update ~/.railiance_gitea.conf.
|
||||
--skip-mcp Do not run make register-mcp.
|
||||
-h, --help Show this help.
|
||||
USAGE
|
||||
}
|
||||
|
||||
ok() { printf '[OK] %s\n' "$*"; }
|
||||
warn() { printf '[WARN] %s\n' "$*"; }
|
||||
err() { printf '[ERR] %s\n' "$*" >&2; }
|
||||
step() { printf '\n==> %s\n' "$*"; }
|
||||
|
||||
run() {
|
||||
if [ "$DRY_RUN" -eq 1 ]; then
|
||||
printf 'DRY-RUN: %s\n' "$*"
|
||||
else
|
||||
"$@"
|
||||
fi
|
||||
}
|
||||
|
||||
need_arg() {
|
||||
if [ -z "${2:-}" ]; then
|
||||
err "$1 requires a value"
|
||||
exit 2
|
||||
fi
|
||||
}
|
||||
|
||||
while [ "$#" -gt 0 ]; do
|
||||
case "$1" in
|
||||
--install-missing)
|
||||
INSTALL_MISSING=1
|
||||
shift
|
||||
;;
|
||||
--non-interactive)
|
||||
NON_INTERACTIVE=1
|
||||
shift
|
||||
;;
|
||||
--dry-run)
|
||||
DRY_RUN=1
|
||||
shift
|
||||
;;
|
||||
--git-helper)
|
||||
need_arg "$1" "${2:-}"
|
||||
GIT_HELPER="$2"
|
||||
shift 2
|
||||
;;
|
||||
--allow-plaintext-store)
|
||||
ALLOW_PLAINTEXT_STORE=1
|
||||
shift
|
||||
;;
|
||||
--authorize-ssh)
|
||||
AUTHORIZE_SSH=1
|
||||
shift
|
||||
;;
|
||||
--ssh-target)
|
||||
need_arg "$1" "${2:-}"
|
||||
SSH_TARGETS+=("$2")
|
||||
shift 2
|
||||
;;
|
||||
--gitea-url)
|
||||
need_arg "$1" "${2:-}"
|
||||
GITEA_URL="$2"
|
||||
shift 2
|
||||
;;
|
||||
--gitea-user)
|
||||
need_arg "$1" "${2:-}"
|
||||
GITEA_USER="$2"
|
||||
shift 2
|
||||
;;
|
||||
--gitea-token)
|
||||
need_arg "$1" "${2:-}"
|
||||
GITEA_TOKEN="$2"
|
||||
shift 2
|
||||
;;
|
||||
--skip-gitea)
|
||||
SKIP_GITEA=1
|
||||
shift
|
||||
;;
|
||||
--skip-mcp)
|
||||
SKIP_MCP=1
|
||||
shift
|
||||
;;
|
||||
-h|--help)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
err "unknown argument: $1"
|
||||
usage >&2
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
case "$GIT_HELPER" in
|
||||
auto|libsecret|cache|store) ;;
|
||||
*)
|
||||
err "--git-helper must be auto, libsecret, cache, or store"
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
|
||||
apt_install() {
|
||||
local packages=("$@")
|
||||
if [ "$INSTALL_MISSING" -ne 1 ]; then
|
||||
warn "Missing packages: ${packages[*]}"
|
||||
warn "Rerun with --install-missing or install them manually."
|
||||
return
|
||||
fi
|
||||
if ! command -v sudo >/dev/null 2>&1; then
|
||||
warn "sudo is not available; cannot install: ${packages[*]}"
|
||||
return
|
||||
fi
|
||||
run sudo apt-get update
|
||||
run sudo apt-get install -y "${packages[@]}"
|
||||
}
|
||||
|
||||
check_commands() {
|
||||
step "Checking prerequisites"
|
||||
local missing=()
|
||||
local commands=(git curl ssh-keygen ssh-copy-id python3 make)
|
||||
local optional=(sops age helm kubectl uv claude)
|
||||
|
||||
for cmd in "${commands[@]}"; do
|
||||
if command -v "$cmd" >/dev/null 2>&1; then
|
||||
ok "$cmd found"
|
||||
else
|
||||
missing+=("$cmd")
|
||||
warn "$cmd missing"
|
||||
fi
|
||||
done
|
||||
|
||||
for cmd in "${optional[@]}"; do
|
||||
if command -v "$cmd" >/dev/null 2>&1; then
|
||||
ok "$cmd found"
|
||||
else
|
||||
warn "$cmd missing"
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "${#missing[@]}" -gt 0 ]; then
|
||||
apt_install "${missing[@]}"
|
||||
fi
|
||||
}
|
||||
|
||||
libsecret_helper_path() {
|
||||
local candidates=(
|
||||
"/usr/share/doc/git/contrib/credential/libsecret/git-credential-libsecret"
|
||||
"/usr/lib/git-core/git-credential-libsecret"
|
||||
"/usr/libexec/git-core/git-credential-libsecret"
|
||||
)
|
||||
local candidate
|
||||
for candidate in "${candidates[@]}"; do
|
||||
if [ -x "$candidate" ]; then
|
||||
printf '%s\n' "$candidate"
|
||||
return 0
|
||||
fi
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
build_libsecret_helper() {
|
||||
local source_dir="/usr/share/doc/git/contrib/credential/libsecret"
|
||||
if [ ! -d "$source_dir" ]; then
|
||||
apt_install libsecret-1-0 libsecret-1-dev make gcc
|
||||
fi
|
||||
if [ -d "$source_dir" ]; then
|
||||
run sudo make -C "$source_dir"
|
||||
fi
|
||||
}
|
||||
|
||||
configure_git_helper() {
|
||||
step "Configuring Git credential helper"
|
||||
|
||||
local current
|
||||
current="$(git config --global --get credential.helper || true)"
|
||||
if [ -n "$current" ]; then
|
||||
ok "credential.helper already set: $current"
|
||||
return
|
||||
fi
|
||||
|
||||
local helper="$GIT_HELPER"
|
||||
if [ "$helper" = "auto" ]; then
|
||||
if libsecret_helper_path >/dev/null 2>&1; then
|
||||
helper="libsecret"
|
||||
elif [ "$ALLOW_PLAINTEXT_STORE" -eq 1 ]; then
|
||||
helper="store"
|
||||
else
|
||||
helper="cache"
|
||||
fi
|
||||
fi
|
||||
|
||||
case "$helper" in
|
||||
libsecret)
|
||||
local path
|
||||
path="$(libsecret_helper_path || true)"
|
||||
if [ -z "$path" ]; then
|
||||
build_libsecret_helper
|
||||
path="$(libsecret_helper_path || true)"
|
||||
fi
|
||||
if [ -z "$path" ]; then
|
||||
warn "libsecret helper is not available; using cache helper for this machine."
|
||||
run git config --global credential.helper "cache --timeout=3600"
|
||||
else
|
||||
run git config --global credential.helper "$path"
|
||||
fi
|
||||
;;
|
||||
cache)
|
||||
run git config --global credential.helper "cache --timeout=3600"
|
||||
;;
|
||||
store)
|
||||
if [ "$ALLOW_PLAINTEXT_STORE" -ne 1 ]; then
|
||||
err "credential.helper=store writes plaintext credentials."
|
||||
err "Rerun with --allow-plaintext-store if that is intended for this host."
|
||||
exit 1
|
||||
fi
|
||||
run git config --global credential.helper store
|
||||
;;
|
||||
esac
|
||||
|
||||
ok "credential.helper configured"
|
||||
}
|
||||
|
||||
setup_ssh_key() {
|
||||
step "Checking SSH key"
|
||||
mkdir -p "$HOME/.ssh"
|
||||
chmod 700 "$HOME/.ssh"
|
||||
|
||||
if [ -f "$SSH_KEY" ]; then
|
||||
ok "SSH key exists: $SSH_KEY"
|
||||
else
|
||||
run ssh-keygen -t ed25519 -f "$SSH_KEY" -N "" -C "$USER@$(hostname)-state-hub"
|
||||
ok "SSH key generated: $SSH_KEY"
|
||||
fi
|
||||
|
||||
if [ -f "${SSH_KEY}.pub" ]; then
|
||||
printf '\nPublic key to authorize on managed hosts:\n\n'
|
||||
sed 's/^/ /' "${SSH_KEY}.pub"
|
||||
printf '\n'
|
||||
fi
|
||||
|
||||
if [ "$AUTHORIZE_SSH" -eq 1 ]; then
|
||||
local target
|
||||
for target in "${SSH_TARGETS[@]}"; do
|
||||
run ssh-copy-id -i "${SSH_KEY}.pub" "$target"
|
||||
done
|
||||
else
|
||||
warn "SSH authorization not attempted. Use --authorize-ssh after confirming host access."
|
||||
fi
|
||||
}
|
||||
|
||||
write_gitea_conf() {
|
||||
step "Checking Gitea config"
|
||||
if [ "$SKIP_GITEA" -eq 1 ]; then
|
||||
warn "Skipping Gitea config by request."
|
||||
return
|
||||
fi
|
||||
|
||||
if [ -f "$GITEA_CONF" ]; then
|
||||
chmod 600 "$GITEA_CONF"
|
||||
ok "$GITEA_CONF already exists"
|
||||
return
|
||||
fi
|
||||
|
||||
if [ -z "$GITEA_USER" ] && [ "$NON_INTERACTIVE" -eq 0 ]; then
|
||||
read -r -p "Gitea username: " GITEA_USER
|
||||
fi
|
||||
|
||||
if [ -z "$GITEA_TOKEN" ] && [ "$NON_INTERACTIVE" -eq 0 ]; then
|
||||
read -r -s -p "Gitea token (requires read:user and repository write scopes): " GITEA_TOKEN
|
||||
printf '\n'
|
||||
fi
|
||||
|
||||
if [ -z "$GITEA_USER" ] || [ -z "$GITEA_TOKEN" ]; then
|
||||
warn "Gitea config not written. Set GITEA_USER/GITEA_TOKEN or rerun interactively."
|
||||
return
|
||||
fi
|
||||
|
||||
if [ "$DRY_RUN" -eq 1 ]; then
|
||||
printf 'DRY-RUN: would write %s with GITEA_URL and GITEA_USER; token hidden\n' "$GITEA_CONF"
|
||||
return
|
||||
fi
|
||||
|
||||
umask 077
|
||||
{
|
||||
printf 'GITEA_URL="%s"\n' "$GITEA_URL"
|
||||
printf 'GITEA_USER="%s"\n' "$GITEA_USER"
|
||||
printf 'GITEA_TOKEN="%s"\n' "$GITEA_TOKEN"
|
||||
} >"$GITEA_CONF"
|
||||
chmod 600 "$GITEA_CONF"
|
||||
ok "Wrote $GITEA_CONF"
|
||||
}
|
||||
|
||||
register_mcp() {
|
||||
step "Registering State Hub MCP"
|
||||
if [ "$SKIP_MCP" -eq 1 ]; then
|
||||
warn "Skipping MCP registration by request."
|
||||
return
|
||||
fi
|
||||
if [ "$DRY_RUN" -eq 1 ]; then
|
||||
run make -C "$STATE_HUB_DIR" register-mcp DRY_RUN=1
|
||||
else
|
||||
make -C "$STATE_HUB_DIR" register-mcp
|
||||
fi
|
||||
}
|
||||
|
||||
health_check() {
|
||||
step "Checking State Hub reachability"
|
||||
if curl -fsS --max-time 2 "http://127.0.0.1:8000/state/health" >/dev/null 2>&1; then
|
||||
ok "State Hub API reachable at http://127.0.0.1:8000"
|
||||
elif curl -fsS --max-time 2 "http://127.0.0.1:18000/state/health" >/dev/null 2>&1; then
|
||||
ok "State Hub API reachable through tunnel at http://127.0.0.1:18000"
|
||||
else
|
||||
warn "State Hub API is not reachable locally or through the default tunnel."
|
||||
warn "Start it with 'make api' or run 'make bridges' if this machine uses ops-bridge."
|
||||
fi
|
||||
}
|
||||
|
||||
main() {
|
||||
step "State Hub environment bootstrap"
|
||||
printf 'Repository: %s\n' "$STATE_HUB_DIR"
|
||||
check_commands
|
||||
configure_git_helper
|
||||
setup_ssh_key
|
||||
write_gitea_conf
|
||||
register_mcp
|
||||
health_check
|
||||
ok "Bootstrap checks complete."
|
||||
}
|
||||
|
||||
main "$@"
|
||||
@@ -1596,7 +1596,7 @@ def fix_repo(
|
||||
task_id = ctx["task_id"]
|
||||
status = ctx["status"]
|
||||
result = _api_patch(api_base, f"/tasks/{task_id}",
|
||||
{"status": status})
|
||||
{"status": status, "suppress_token_event": True})
|
||||
if result is not None and "_error" not in result:
|
||||
report.fixes_applied.append(
|
||||
f"C-10 fixed: task {task_id[:8]}… status → {status!r}"
|
||||
|
||||
151
scripts/register-mcp.sh
Executable file
151
scripts/register-mcp.sh
Executable file
@@ -0,0 +1,151 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
STATE_HUB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
CLAUDE_JSON="${CLAUDE_JSON:-$HOME/.claude.json}"
|
||||
SERVER_NAME="${STATE_HUB_MCP_NAME:-state-hub}"
|
||||
API_BASE="${API_BASE:-}"
|
||||
MCP_URL="${MCP_URL:-}"
|
||||
DRY_RUN=0
|
||||
|
||||
usage() {
|
||||
cat <<'USAGE'
|
||||
Usage: scripts/register-mcp.sh [--url URL] [--api-base URL] [--dry-run]
|
||||
|
||||
Registers the State Hub MCP server for Claude Code.
|
||||
|
||||
Options:
|
||||
--url URL MCP SSE URL to register. Defaults to local :8001 or tunnel :18001.
|
||||
--api-base URL State Hub API URL used for reachability checks.
|
||||
--dry-run Print what would happen without changing Claude config.
|
||||
-h, --help Show this help.
|
||||
USAGE
|
||||
}
|
||||
|
||||
while [ "$#" -gt 0 ]; do
|
||||
case "$1" in
|
||||
--url)
|
||||
MCP_URL="${2:-}"
|
||||
shift 2
|
||||
;;
|
||||
--api-base)
|
||||
API_BASE="${2:-}"
|
||||
shift 2
|
||||
;;
|
||||
--dry-run)
|
||||
DRY_RUN=1
|
||||
shift
|
||||
;;
|
||||
-h|--help)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo "ERROR: unknown argument: $1" >&2
|
||||
usage >&2
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
status() {
|
||||
printf '%s\n' "$*"
|
||||
}
|
||||
|
||||
api_healthy() {
|
||||
local base="$1"
|
||||
curl -fsS --max-time 2 "${base%/}/state/health" >/dev/null 2>&1
|
||||
}
|
||||
|
||||
port_open() {
|
||||
local host="$1"
|
||||
local port="$2"
|
||||
timeout 2 bash -c ":</dev/tcp/$host/$port" >/dev/null 2>&1
|
||||
}
|
||||
|
||||
if [ -z "$API_BASE" ]; then
|
||||
if api_healthy "http://127.0.0.1:8000"; then
|
||||
API_BASE="http://127.0.0.1:8000"
|
||||
elif api_healthy "http://127.0.0.1:18000"; then
|
||||
API_BASE="http://127.0.0.1:18000"
|
||||
else
|
||||
API_BASE="http://127.0.0.1:8000"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -z "$MCP_URL" ]; then
|
||||
if port_open 127.0.0.1 8001; then
|
||||
MCP_URL="http://127.0.0.1:8001/sse"
|
||||
elif port_open 127.0.0.1 18001; then
|
||||
MCP_URL="http://127.0.0.1:18001/sse"
|
||||
elif [ "$API_BASE" = "http://127.0.0.1:18000" ]; then
|
||||
MCP_URL="http://127.0.0.1:18001/sse"
|
||||
else
|
||||
MCP_URL="http://127.0.0.1:8001/sse"
|
||||
fi
|
||||
fi
|
||||
|
||||
CONFIG="$(python3 - "$MCP_URL" <<'PY'
|
||||
import json
|
||||
import sys
|
||||
|
||||
print(json.dumps({"type": "sse", "url": sys.argv[1]}, separators=(",", ":")))
|
||||
PY
|
||||
)"
|
||||
|
||||
status "State Hub directory: $STATE_HUB_DIR"
|
||||
status "API health check: ${API_BASE%/}/state/health"
|
||||
status "MCP registration: $SERVER_NAME -> $MCP_URL"
|
||||
|
||||
if api_healthy "$API_BASE"; then
|
||||
status "OK: State Hub API is reachable."
|
||||
else
|
||||
status "WARN: State Hub API is not reachable at ${API_BASE%/}/state/health."
|
||||
status " Start it with 'make api' or bring up the ops-bridge tunnel."
|
||||
fi
|
||||
|
||||
if ! command -v claude >/dev/null 2>&1; then
|
||||
if [ "$DRY_RUN" -eq 1 ]; then
|
||||
status "WARN: claude CLI not found on PATH; dry-run will still show the command."
|
||||
else
|
||||
status "ERROR: claude CLI not found on PATH."
|
||||
status " Install or expose Claude Code CLI, then rerun: make register-mcp"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
CURRENT_URL="$(python3 - "$CLAUDE_JSON" "$SERVER_NAME" <<'PY'
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
path = Path(sys.argv[1])
|
||||
name = sys.argv[2]
|
||||
if not path.exists():
|
||||
print("")
|
||||
raise SystemExit
|
||||
try:
|
||||
data = json.loads(path.read_text())
|
||||
except json.JSONDecodeError:
|
||||
print("")
|
||||
raise SystemExit
|
||||
entry = data.get("mcpServers", {}).get(name, {})
|
||||
print(entry.get("url", ""))
|
||||
PY
|
||||
)"
|
||||
|
||||
if [ "$CURRENT_URL" = "$MCP_URL" ]; then
|
||||
status "OK: $SERVER_NAME is already registered with this URL."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [ "$DRY_RUN" -eq 1 ]; then
|
||||
status "DRY-RUN: would run:"
|
||||
status " claude mcp add-json -s user $SERVER_NAME '$CONFIG'"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
claude mcp add-json -s user "$SERVER_NAME" "$CONFIG"
|
||||
|
||||
status "OK: registered $SERVER_NAME."
|
||||
status "Restart Claude Code so the MCP server list is refreshed."
|
||||
@@ -1,27 +1,48 @@
|
||||
#!/usr/bin/env python3
|
||||
"""PostToolUse hook: replace heuristic token events with real transcript-derived counts.
|
||||
|
||||
Fires after mcp__state-hub__update_task_status when status=done.
|
||||
Fires after supported task completion tools when status=done.
|
||||
Reads the Claude Code session transcript to compute the token delta since the
|
||||
previous task completion, then PATCHes the heuristic event with real counts.
|
||||
|
||||
State is persisted per session in /tmp/custodian_tokens_<session_id>.json so
|
||||
deltas are correctly scoped even when multiple tasks complete in one session.
|
||||
State is persisted per session in a durable cache directory so deltas survive
|
||||
restarts and multiple task completions in one session.
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
API = os.environ.get("CUSTODIAN_API", "http://127.0.0.1:8000")
|
||||
STATE_DIR = Path(os.environ.get("TMPDIR", "/tmp"))
|
||||
STATE_DIR = Path(os.environ.get("CUSTODIAN_TOKEN_STATE_DIR", Path.home() / ".cache" / "state-hub" / "token-hooks"))
|
||||
HEALTH_LOG = STATE_DIR / "hook-health.jsonl"
|
||||
PARSER_VERSION = "claude-transcript-delta-v1"
|
||||
SUPPORTED_TOOL_HINTS = (
|
||||
"update_task_status",
|
||||
"tasks",
|
||||
"task",
|
||||
)
|
||||
|
||||
|
||||
def read_transcript_totals(transcript_path: str) -> tuple[int, int]:
|
||||
def utc_now() -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
|
||||
def write_health(event: dict) -> None:
|
||||
try:
|
||||
STATE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
with HEALTH_LOG.open("a", encoding="utf-8") as handle:
|
||||
handle.write(json.dumps({"ts": utc_now(), **event}, sort_keys=True) + "\n")
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def read_transcript_totals(transcript_path: str) -> tuple[int, int, int]:
|
||||
"""Sum all usage entries in the transcript JSONL up to the current point."""
|
||||
total_in = total_out = 0
|
||||
total_in = total_out = cached_in = 0
|
||||
try:
|
||||
with open(transcript_path) as f:
|
||||
for line in f:
|
||||
@@ -29,10 +50,9 @@ def read_transcript_totals(transcript_path: str) -> tuple[int, int]:
|
||||
entry = json.loads(line)
|
||||
usage = entry.get("message", {}).get("usage", {})
|
||||
if usage:
|
||||
# Count all input token variants (direct + cache creation + cache read)
|
||||
total_in += (
|
||||
usage.get("input_tokens", 0)
|
||||
+ usage.get("cache_creation_input_tokens", 0)
|
||||
total_in += usage.get("input_tokens", 0)
|
||||
cached_in += (
|
||||
usage.get("cache_creation_input_tokens", 0)
|
||||
+ usage.get("cache_read_input_tokens", 0)
|
||||
)
|
||||
total_out += usage.get("output_tokens", 0)
|
||||
@@ -40,21 +60,22 @@ def read_transcript_totals(transcript_path: str) -> tuple[int, int]:
|
||||
continue
|
||||
except OSError:
|
||||
pass
|
||||
return total_in, total_out
|
||||
return total_in, total_out, cached_in
|
||||
|
||||
|
||||
def load_state(session_id: str) -> tuple[int, int]:
|
||||
def load_state(session_id: str) -> tuple[int, int, int]:
|
||||
state_file = STATE_DIR / f"custodian_tokens_{session_id}.json"
|
||||
try:
|
||||
data = json.loads(state_file.read_text())
|
||||
return data.get("total_in", 0), data.get("total_out", 0)
|
||||
return data.get("total_in", 0), data.get("total_out", 0), data.get("cached_in", 0)
|
||||
except (OSError, json.JSONDecodeError):
|
||||
return 0, 0
|
||||
return 0, 0, 0
|
||||
|
||||
|
||||
def save_state(session_id: str, total_in: int, total_out: int) -> None:
|
||||
def save_state(session_id: str, total_in: int, total_out: int, cached_in: int) -> None:
|
||||
STATE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
state_file = STATE_DIR / f"custodian_tokens_{session_id}.json"
|
||||
state_file.write_text(json.dumps({"total_in": total_in, "total_out": total_out}))
|
||||
state_file.write_text(json.dumps({"total_in": total_in, "total_out": total_out, "cached_in": cached_in}))
|
||||
|
||||
|
||||
def api_get(path: str):
|
||||
@@ -75,51 +96,89 @@ def api_patch(path: str, data: dict):
|
||||
return json.loads(r.read())
|
||||
|
||||
|
||||
def extract_done_task(payload: dict) -> tuple[str | None, dict]:
|
||||
tool_name = payload.get("tool_name", "")
|
||||
if not any(hint in tool_name for hint in SUPPORTED_TOOL_HINTS):
|
||||
return None, {}
|
||||
|
||||
tool_input = payload.get("tool_input", {}) or {}
|
||||
status = tool_input.get("status")
|
||||
if status != "done":
|
||||
return None, {}
|
||||
|
||||
task_id = (
|
||||
tool_input.get("task_id")
|
||||
or tool_input.get("id")
|
||||
or tool_input.get("taskId")
|
||||
)
|
||||
return task_id, tool_input
|
||||
|
||||
|
||||
def main() -> None:
|
||||
try:
|
||||
payload = json.loads(sys.stdin.read())
|
||||
except json.JSONDecodeError:
|
||||
return
|
||||
|
||||
tool_name = payload.get("tool_name", "")
|
||||
if "update_task_status" not in tool_name:
|
||||
return
|
||||
|
||||
tool_input = payload.get("tool_input", {})
|
||||
if tool_input.get("status") != "done":
|
||||
return
|
||||
|
||||
task_id = tool_input.get("task_id")
|
||||
task_id, tool_input = extract_done_task(payload)
|
||||
if not task_id:
|
||||
write_health({"status": "skipped", "reason": "not_done_task_completion", "tool_name": payload.get("tool_name")})
|
||||
return
|
||||
|
||||
transcript_path = payload.get("transcript_path", "")
|
||||
session_id = payload.get("session_id", "unknown")
|
||||
|
||||
# Compute token delta for this task
|
||||
current_in, current_out = read_transcript_totals(transcript_path)
|
||||
last_in, last_out = load_state(session_id)
|
||||
current_in, current_out, current_cached = read_transcript_totals(transcript_path)
|
||||
last_in, last_out, last_cached = load_state(session_id)
|
||||
delta_in = max(0, current_in - last_in)
|
||||
delta_out = max(0, current_out - last_out)
|
||||
save_state(session_id, current_in, current_out)
|
||||
delta_cached = max(0, current_cached - last_cached)
|
||||
save_state(session_id, current_in, current_out, current_cached)
|
||||
|
||||
if delta_in == 0 and delta_out == 0:
|
||||
return # Nothing measurable — leave heuristic in place
|
||||
if delta_in == 0 and delta_out == 0 and delta_cached == 0:
|
||||
write_health({
|
||||
"status": "skipped",
|
||||
"reason": "zero_delta",
|
||||
"session_id": session_id,
|
||||
"task_id": task_id,
|
||||
"source_path": transcript_path,
|
||||
})
|
||||
return
|
||||
|
||||
# Find the most recent heuristic event for this task and replace it
|
||||
try:
|
||||
events = api_get(f"/token-events/?task_id={task_id}¬e=heuristic&limit=5")
|
||||
except (urllib.error.URLError, OSError):
|
||||
write_health({"status": "skipped", "reason": "api_offline", "session_id": session_id, "task_id": task_id})
|
||||
return # API offline — leave heuristic as-is
|
||||
|
||||
if not events:
|
||||
write_health({"status": "skipped", "reason": "no_fallback_event", "session_id": session_id, "task_id": task_id})
|
||||
return
|
||||
|
||||
event_id = events[0]["id"]
|
||||
model = tool_input.get("model")
|
||||
agent = tool_input.get("agent")
|
||||
|
||||
patch_body: dict = {"tokens_in": delta_in, "tokens_out": delta_out, "note": "measured"}
|
||||
patch_body: dict = {
|
||||
"tokens_in": delta_in,
|
||||
"tokens_out": delta_out,
|
||||
"note": "measured",
|
||||
"measurement_kind": "measured",
|
||||
"source_provider": "claude_transcript",
|
||||
"source_id": f"claude:{session_id}:task:{task_id}",
|
||||
"source_path": transcript_path or None,
|
||||
"parser_version": PARSER_VERSION,
|
||||
"confidence": 1.0,
|
||||
"cached_input_tokens": delta_cached,
|
||||
"raw_total_tokens": delta_in + delta_out + delta_cached,
|
||||
"raw_metadata": {
|
||||
"hook": "post_tool_use",
|
||||
"tool_name": payload.get("tool_name"),
|
||||
"state_dir": str(STATE_DIR),
|
||||
},
|
||||
}
|
||||
if model:
|
||||
patch_body["model"] = model
|
||||
if agent:
|
||||
@@ -128,7 +187,19 @@ def main() -> None:
|
||||
try:
|
||||
api_patch(f"/token-events/{event_id}", patch_body)
|
||||
except (urllib.error.URLError, OSError):
|
||||
pass
|
||||
write_health({"status": "skipped", "reason": "patch_failed", "session_id": session_id, "task_id": task_id})
|
||||
return
|
||||
|
||||
write_health({
|
||||
"status": "patched",
|
||||
"session_id": session_id,
|
||||
"task_id": task_id,
|
||||
"event_id": event_id,
|
||||
"tokens_in": delta_in,
|
||||
"tokens_out": delta_out,
|
||||
"cached_input_tokens": delta_cached,
|
||||
"source_path": transcript_path,
|
||||
})
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
239
scripts/token_reconcile.py
Normal file
239
scripts/token_reconcile.py
Normal file
@@ -0,0 +1,239 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Reconcile token evidence from local agent sources against State Hub.
|
||||
|
||||
Dry-run is the default. Use ``--apply`` to upsert measured source events and
|
||||
``--zero-superseded-fallbacks`` to zero task fallback rows that are covered by
|
||||
source-backed measurements.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
from collections import Counter, defaultdict
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
ROOT = Path(__file__).resolve().parent.parent
|
||||
if str(ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
from api.services.token_sources import collect_claude_transcripts, collect_codex_sessions, parse_iso # noqa: E402
|
||||
from api.services.token_sources.attribution import repo_refs_from_api, resolve_repo # noqa: E402
|
||||
|
||||
DEFAULT_API = os.environ.get("STATE_HUB_API", "http://127.0.0.1:8000")
|
||||
SUPERSEDED_HEURISTIC_NOTE = "heuristic_superseded_by_source_measurement"
|
||||
|
||||
|
||||
def http_json(api_base: str, method: str, path: str, body: dict[str, Any] | None = None) -> Any:
|
||||
url = f"{api_base.rstrip('/')}/{path.lstrip('/')}"
|
||||
data = None
|
||||
headers = {"Content-Type": "application/json"}
|
||||
if body is not None:
|
||||
data = json.dumps(body).encode("utf-8")
|
||||
req = urllib.request.Request(url, data=data, headers=headers, method=method)
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
return json.loads(resp.read() or b"null")
|
||||
|
||||
|
||||
def list_events(api_base: str, params: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
events: list[dict[str, Any]] = []
|
||||
offset = 0
|
||||
while True:
|
||||
encoded = urllib.parse.urlencode({**params, "limit": 1000, "offset": offset})
|
||||
page = http_json(api_base, "GET", f"/token-events/?{encoded}")
|
||||
if not isinstance(page, list) or not page:
|
||||
break
|
||||
events.extend(page)
|
||||
if len(page) < 1000:
|
||||
break
|
||||
offset += 1000
|
||||
return events
|
||||
|
||||
|
||||
def find_home(explicit: str | None, env_name: str, default: Path) -> Path | None:
|
||||
candidates: list[Path] = []
|
||||
if explicit:
|
||||
candidates.append(Path(explicit))
|
||||
env_home = os.environ.get(env_name)
|
||||
if env_home:
|
||||
candidates.append(Path(env_home))
|
||||
candidates.append(default)
|
||||
for candidate in candidates:
|
||||
if candidate.is_dir():
|
||||
return candidate
|
||||
return None
|
||||
|
||||
|
||||
def event_total(event: dict[str, Any]) -> int:
|
||||
return int(event.get("tokens_in") or 0) + int(event.get("tokens_out") or 0)
|
||||
|
||||
|
||||
def source_index(events: list[dict[str, Any]]) -> dict[str, dict[str, Any]]:
|
||||
by_source: dict[str, dict[str, Any]] = {}
|
||||
for event in events:
|
||||
source_id = event.get("source_id") or event.get("ref_id")
|
||||
if isinstance(source_id, str):
|
||||
by_source[source_id] = event
|
||||
return by_source
|
||||
|
||||
|
||||
def print_report(report: dict[str, Any]) -> None:
|
||||
print(json.dumps(report, indent=2, sort_keys=True, default=str))
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--since", default="2026-05-19", help="UTC date/time to reconcile from")
|
||||
parser.add_argument("--api-base", default=DEFAULT_API)
|
||||
parser.add_argument("--codex-home")
|
||||
parser.add_argument("--claude-home")
|
||||
parser.add_argument("--apply", action="store_true", help="upsert measured source events")
|
||||
parser.add_argument(
|
||||
"--zero-superseded-fallbacks",
|
||||
action="store_true",
|
||||
help="with --apply, zero heuristic fallback rows after measured source ingestion",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
since = parse_iso(args.since)
|
||||
since_param = since.isoformat()
|
||||
codex_home = find_home(args.codex_home, "CODEX_HOME", Path.home() / ".codex")
|
||||
if codex_home is None:
|
||||
windows_codex = Path("/mnt/c/Users/bernd.worsch/.codex")
|
||||
codex_home = windows_codex if windows_codex.is_dir() else None
|
||||
claude_home = find_home(args.claude_home, "CLAUDE_HOME", Path.home() / ".claude")
|
||||
|
||||
records = []
|
||||
source_health: dict[str, dict[str, Any]] = {}
|
||||
if codex_home:
|
||||
codex_records = collect_codex_sessions(codex_home, since)
|
||||
records.extend(codex_records)
|
||||
source_health["codex_session"] = {"home": str(codex_home), "sessions_found": len(codex_records)}
|
||||
else:
|
||||
source_health["codex_session"] = {"home": None, "sessions_found": 0, "warning": "Codex home not found"}
|
||||
if claude_home:
|
||||
claude_records = collect_claude_transcripts(claude_home, since)
|
||||
records.extend(claude_records)
|
||||
source_health["claude_transcript"] = {"home": str(claude_home), "sessions_found": len(claude_records)}
|
||||
else:
|
||||
source_health["claude_transcript"] = {"home": None, "sessions_found": 0, "warning": "Claude home not found"}
|
||||
|
||||
repos = repo_refs_from_api(http_json(args.api_base, "GET", "/repos/"))
|
||||
existing_events = list_events(args.api_base, {"since": since_param, "include_superseded": "true"})
|
||||
existing_by_source = source_index(existing_events)
|
||||
fallback_events = [
|
||||
event for event in existing_events
|
||||
if event.get("source_provider") == "task_fallback" or event.get("note") == "heuristic"
|
||||
]
|
||||
superseded_events = [
|
||||
event for event in existing_events
|
||||
if event.get("measurement_kind") == "superseded" or str(event.get("note") or "").startswith("heuristic_superseded")
|
||||
]
|
||||
|
||||
planned_upserts = []
|
||||
unattributed = 0
|
||||
stale = 0
|
||||
source_totals: dict[str, int] = defaultdict(int)
|
||||
for record in records:
|
||||
source_totals[record.source_provider] += record.tokens_total
|
||||
existing = existing_by_source.get(record.source_id)
|
||||
if existing and event_total(existing) >= record.tokens_total:
|
||||
continue
|
||||
if existing:
|
||||
stale += 1
|
||||
match = resolve_repo(record.cwd, repos)
|
||||
if match is None:
|
||||
unattributed += 1
|
||||
planned_upserts.append((record, match))
|
||||
|
||||
source_ids = [
|
||||
event.get("source_id")
|
||||
for event in existing_events
|
||||
if event.get("source_id") and event.get("measurement_kind") == "measured"
|
||||
]
|
||||
duplicate_sources = {
|
||||
source_id: count for source_id, count in Counter(source_ids).items() if count > 1
|
||||
}
|
||||
missing_provenance = [
|
||||
event for event in existing_events
|
||||
if event.get("measurement_kind") == "measured" and not event.get("source_id")
|
||||
]
|
||||
progress_events = http_json(args.api_base, "GET", f"/progress/?since={urllib.parse.quote(since_param)}&limit=1000")
|
||||
measured_total = sum(
|
||||
event_total(event)
|
||||
for event in existing_events
|
||||
if event.get("measurement_kind") == "measured"
|
||||
) + sum(record.tokens_total for record, _ in planned_upserts)
|
||||
canary_failed = bool(progress_events) and measured_total == 0
|
||||
|
||||
report = {
|
||||
"since": since.isoformat(),
|
||||
"apply": args.apply,
|
||||
"sources": source_health,
|
||||
"sessions_found": len(records),
|
||||
"source_tokens_total": dict(source_totals),
|
||||
"events_existing": len(existing_events),
|
||||
"events_to_upsert": len(planned_upserts),
|
||||
"sessions_stale": stale,
|
||||
"fallback_events": len(fallback_events),
|
||||
"superseded_events": len(superseded_events),
|
||||
"unattributed_source_records": unattributed,
|
||||
"missing_provenance_events": len(missing_provenance),
|
||||
"duplicate_source_ids": duplicate_sources,
|
||||
"progress_events": len(progress_events) if isinstance(progress_events, list) else 0,
|
||||
"measured_tokens_total_after_plan": measured_total,
|
||||
"canary_failed": canary_failed,
|
||||
}
|
||||
|
||||
if args.apply:
|
||||
for record, match in planned_upserts:
|
||||
payload = record.to_token_event_payload(repo_id=match.repo_id if match else None)
|
||||
payload["raw_metadata"] = {
|
||||
**payload.get("raw_metadata", {}),
|
||||
"repo_slug": match.slug if match else None,
|
||||
"attribution_method": match.method if match else None,
|
||||
}
|
||||
http_json(args.api_base, "POST", "/token-events/upsert", payload)
|
||||
if args.zero_superseded_fallbacks:
|
||||
for event in fallback_events:
|
||||
http_json(
|
||||
args.api_base,
|
||||
"PATCH",
|
||||
f"/token-events/{event['id']}",
|
||||
{
|
||||
"tokens_in": 0,
|
||||
"tokens_out": 0,
|
||||
"note": SUPERSEDED_HEURISTIC_NOTE,
|
||||
"measurement_kind": "superseded",
|
||||
"source_provider": "task_fallback",
|
||||
"confidence": 0.0,
|
||||
"raw_total_tokens": 0,
|
||||
},
|
||||
)
|
||||
http_json(
|
||||
args.api_base,
|
||||
"POST",
|
||||
"/progress/",
|
||||
{
|
||||
"summary": (
|
||||
"Token reconciliation: "
|
||||
f"{len(records)} source records, {len(planned_upserts)} upserts, "
|
||||
f"{len(fallback_events)} fallback events, canary_failed={canary_failed}"
|
||||
),
|
||||
"event_type": "token_reconciliation",
|
||||
"author": "codex",
|
||||
"detail": report,
|
||||
},
|
||||
)
|
||||
|
||||
print_report(report)
|
||||
return 1 if canary_failed else 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@@ -55,8 +55,23 @@ class TestTokenEventsCreate:
|
||||
assert ev["tokens_in"] == 200
|
||||
assert ev["tokens_out"] == 100
|
||||
assert ev["tokens_total"] == 300
|
||||
assert ev["measurement_kind"] == "estimated"
|
||||
assert ev["source_provider"] == "manual"
|
||||
assert ev["raw_total_tokens"] == 300
|
||||
assert ev["id"] is not None
|
||||
|
||||
async def test_create_with_created_at_backfill_timestamp(self, client):
|
||||
created_at = "2026-05-19T01:02:03Z"
|
||||
ev = await _post_event(
|
||||
client,
|
||||
tokens_in=200,
|
||||
tokens_out=100,
|
||||
ref_type="session",
|
||||
ref_id="codex:test-session",
|
||||
created_at=created_at,
|
||||
)
|
||||
assert ev["created_at"].startswith("2026-05-19T01:02:03")
|
||||
|
||||
async def test_create_with_all_fields(self, client):
|
||||
await _create_domain(client)
|
||||
topic = await _create_topic(client)
|
||||
@@ -74,11 +89,76 @@ class TestTokenEventsCreate:
|
||||
ref_id=task["id"],
|
||||
note="T01 done",
|
||||
session_id="ses-abc",
|
||||
measurement_kind="measured",
|
||||
source_provider="manual",
|
||||
source_id="manual:test-event",
|
||||
confidence=0.95,
|
||||
cached_input_tokens=10,
|
||||
reasoning_output_tokens=20,
|
||||
raw_total_tokens=1530,
|
||||
raw_metadata={"source": "unit-test"},
|
||||
)
|
||||
assert ev["task_id"] == task["id"]
|
||||
assert ev["workstream_id"] == ws["id"] # auto-populated from task
|
||||
assert ev["model"] == "claude-sonnet-4-6"
|
||||
assert ev["tokens_total"] == 1500
|
||||
assert ev["measurement_kind"] == "measured"
|
||||
assert ev["source_provider"] == "manual"
|
||||
assert ev["source_id"] == "manual:test-event"
|
||||
assert ev["cached_input_tokens"] == 10
|
||||
assert ev["reasoning_output_tokens"] == 20
|
||||
assert ev["token_evidence_total"] == 1530
|
||||
assert ev["raw_metadata"] == {"source": "unit-test"}
|
||||
|
||||
async def test_upsert_source_event_updates_existing_session(self, client):
|
||||
body = {
|
||||
"tokens_in": 100,
|
||||
"tokens_out": 50,
|
||||
"measurement_kind": "measured",
|
||||
"source_provider": "codex_session",
|
||||
"source_id": "codex:abc",
|
||||
"ref_type": "session",
|
||||
"ref_id": "codex:abc",
|
||||
"session_id": "abc",
|
||||
"cached_input_tokens": 5,
|
||||
}
|
||||
first = await client.post("/token-events/upsert", json=body)
|
||||
assert first.status_code == 200, first.text
|
||||
second = await client.post("/token-events/upsert", json={**body, "tokens_in": 300, "tokens_out": 80})
|
||||
assert second.status_code == 200, second.text
|
||||
assert first.json()["id"] == second.json()["id"]
|
||||
assert second.json()["tokens_total"] == 380
|
||||
|
||||
listed = (await client.get("/token-events/", params={"source_provider": "codex_session"})).json()
|
||||
assert len(listed) == 1
|
||||
|
||||
async def test_patch_backfill_fields(self, client):
|
||||
ev = await _post_event(client, tokens_in=100, tokens_out=50)
|
||||
|
||||
r = await client.patch(f"/token-events/{ev['id']}", json={
|
||||
"tokens_in": 500,
|
||||
"tokens_out": 250,
|
||||
"session_id": "codex-session",
|
||||
"ref_type": "session",
|
||||
"ref_id": "codex:session",
|
||||
"created_at": "2026-05-20T01:02:03Z",
|
||||
"note": "backfill:codex-session",
|
||||
"measurement_kind": "measured",
|
||||
"source_provider": "codex_session",
|
||||
"source_id": "codex:session",
|
||||
"cached_input_tokens": 10,
|
||||
})
|
||||
assert r.status_code == 200
|
||||
patched = r.json()
|
||||
assert patched["tokens_total"] == 750
|
||||
assert patched["session_id"] == "codex-session"
|
||||
assert patched["ref_type"] == "session"
|
||||
assert patched["ref_id"] == "codex:session"
|
||||
assert patched["created_at"].startswith("2026-05-20T01:02:03")
|
||||
assert patched["measurement_kind"] == "measured"
|
||||
assert patched["source_provider"] == "codex_session"
|
||||
assert patched["source_id"] == "codex:session"
|
||||
assert patched["cached_input_tokens"] == 10
|
||||
|
||||
async def test_workstream_auto_populated_from_task(self, client):
|
||||
await _create_domain(client)
|
||||
@@ -129,6 +209,26 @@ class TestTokenEventsList:
|
||||
assert len(events) == 1
|
||||
assert events[0]["model"] == "claude-sonnet-4-6"
|
||||
|
||||
async def test_filter_by_measurement_kind_and_source_provider(self, client):
|
||||
await _post_event(
|
||||
client,
|
||||
tokens_in=100,
|
||||
tokens_out=50,
|
||||
measurement_kind="measured",
|
||||
source_provider="codex_session",
|
||||
source_id="codex:filter",
|
||||
)
|
||||
await _post_event(client, tokens_in=200, tokens_out=100, note="heuristic")
|
||||
|
||||
r = await client.get(
|
||||
"/token-events/",
|
||||
params={"measurement_kind": "measured", "source_provider": "codex_session"},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
events = r.json()
|
||||
assert len(events) == 1
|
||||
assert events[0]["source_id"] == "codex:filter"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestTokenSummary:
|
||||
@@ -184,6 +284,7 @@ class TestTokenSummary:
|
||||
s = r.json()
|
||||
assert s["event_count"] == 1
|
||||
assert s["tokens_total"] == 75
|
||||
assert s["by_measurement_kind"]["estimated"] == 75
|
||||
|
||||
async def test_summary_unknown_scope_returns_422(self, client):
|
||||
r = await client.get("/token-events/summary/", params={"scope": "foobar", "id": "x"})
|
||||
@@ -215,3 +316,32 @@ class TestTokenEventGetById:
|
||||
import uuid
|
||||
r = await client.get(f"/token-events/{uuid.uuid4()}")
|
||||
assert r.status_code == 404
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestTokenAggregateAndQuality:
|
||||
async def test_aggregate_and_quality_expose_evidence_breakdown(self, client):
|
||||
await _post_event(
|
||||
client,
|
||||
tokens_in=100,
|
||||
tokens_out=50,
|
||||
measurement_kind="measured",
|
||||
source_provider="codex_session",
|
||||
source_id="codex:agg",
|
||||
)
|
||||
await _post_event(client, tokens_in=1000, tokens_out=500, note="heuristic")
|
||||
|
||||
agg = (await client.get("/token-events/aggregate/", params={"include_superseded": "false"})).json()
|
||||
assert agg["tokens_total"] == 1650
|
||||
assert agg["by_measurement_kind"]["measured"] == 150
|
||||
assert agg["by_measurement_kind"]["estimated"] == 1500
|
||||
assert agg["by_source_provider"]["codex_session"] == 150
|
||||
assert agg["by_source_provider"]["task_fallback"] == 1500
|
||||
|
||||
measured = (await client.get("/token-events/aggregate/", params={"measurement_kind": "measured"})).json()
|
||||
assert measured["tokens_total"] == 150
|
||||
|
||||
quality = (await client.get("/token-events/quality/")).json()
|
||||
assert quality["measured_event_count"] == 1
|
||||
assert quality["fallback_event_count"] == 1
|
||||
assert quality["missing_provenance_event_count"] == 0
|
||||
|
||||
@@ -66,6 +66,9 @@ class TestTokenPassthrough:
|
||||
assert ev["agent"] == "custodian"
|
||||
assert ev["workstream_id"] == ws["id"]
|
||||
assert ev["note"] == "measured"
|
||||
assert ev["measurement_kind"] == "measured"
|
||||
assert ev["source_provider"] == "manual"
|
||||
assert ev["source_id"] == f"task:{task['id']}:manual"
|
||||
|
||||
async def test_tier1_userbased_note_override(self, client):
|
||||
"""Tier 1 with note='userbased' records that note instead of 'measured'."""
|
||||
@@ -84,6 +87,7 @@ class TestTokenPassthrough:
|
||||
|
||||
events = (await client.get("/token-events/", params={"task_id": task["id"]})).json()
|
||||
assert events[0]["note"] == "userbased"
|
||||
assert events[0]["measurement_kind"] == "measured"
|
||||
|
||||
async def test_tier2_workplan_prorated(self, client):
|
||||
"""Tier 2: workplan totals prorated across 4 tasks → 250/125 each, note='workplan'."""
|
||||
@@ -108,6 +112,8 @@ class TestTokenPassthrough:
|
||||
assert ev["tokens_in"] == 250 # 1000 // 4
|
||||
assert ev["tokens_out"] == 125 # 500 // 4
|
||||
assert ev["note"] == "workplan"
|
||||
assert ev["measurement_kind"] == "allocated"
|
||||
assert ev["raw_metadata"]["allocation_method"] == "workplan_prorated"
|
||||
|
||||
async def test_tier3_heuristic_fallback(self, client):
|
||||
"""Tier 3: status=done with no token args → heuristic 1000/500, note='heuristic'."""
|
||||
@@ -125,6 +131,40 @@ class TestTokenPassthrough:
|
||||
assert ev["tokens_in"] == 1000
|
||||
assert ev["tokens_out"] == 500
|
||||
assert ev["note"] == "heuristic"
|
||||
assert ev["measurement_kind"] == "estimated"
|
||||
assert ev["source_provider"] == "task_fallback"
|
||||
|
||||
async def test_suppress_token_event_skips_done_fallback(self, client):
|
||||
"""File/cache sync can mark a task done without minting a heuristic event."""
|
||||
await _create_domain(client)
|
||||
topic = await _create_topic(client)
|
||||
ws = await _create_workstream(client, topic["id"])
|
||||
task = await _create_task(client, ws["id"])
|
||||
|
||||
r = await client.patch(f"/tasks/{task['id']}", json={
|
||||
"status": "done",
|
||||
"suppress_token_event": True,
|
||||
})
|
||||
assert r.status_code == 200
|
||||
assert r.json()["status"] == "done"
|
||||
|
||||
events = (await client.get("/token-events/", params={"task_id": task["id"]})).json()
|
||||
assert events == []
|
||||
|
||||
async def test_repeated_done_update_does_not_duplicate_event(self, client):
|
||||
"""Only the transition into done records token usage."""
|
||||
await _create_domain(client)
|
||||
topic = await _create_topic(client)
|
||||
ws = await _create_workstream(client, topic["id"])
|
||||
task = await _create_task(client, ws["id"])
|
||||
|
||||
r = await client.patch(f"/tasks/{task['id']}", json={"status": "done"})
|
||||
assert r.status_code == 200
|
||||
r = await client.patch(f"/tasks/{task['id']}", json={"status": "done"})
|
||||
assert r.status_code == 200
|
||||
|
||||
events = (await client.get("/token-events/", params={"task_id": task["id"]})).json()
|
||||
assert len(events) == 1
|
||||
|
||||
async def test_non_done_status_creates_no_event(self, client):
|
||||
"""Non-done status updates never create a token event."""
|
||||
|
||||
139
tests/test_token_sources.py
Normal file
139
tests/test_token_sources.py
Normal file
@@ -0,0 +1,139 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
from api.services.token_sources import parse_iso
|
||||
from api.services.token_sources.attribution import RepoRef, normalise_cwd, resolve_repo
|
||||
from api.services.token_sources.claude import parse_claude_transcript
|
||||
from api.services.token_sources.codex import collect_codex_sessions, parse_codex_session
|
||||
|
||||
|
||||
def _write_jsonl(path, rows):
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with path.open("w", encoding="utf-8") as handle:
|
||||
for row in rows:
|
||||
if row == "BAD":
|
||||
handle.write("{not json}\n")
|
||||
else:
|
||||
handle.write(json.dumps(row) + "\n")
|
||||
|
||||
|
||||
def test_parse_codex_session_sums_token_count_records(tmp_path):
|
||||
path = tmp_path / "sessions" / "2026" / "05" / "23" / "rollout-local.jsonl"
|
||||
_write_jsonl(
|
||||
path,
|
||||
[
|
||||
{"type": "session_meta", "payload": {"id": "s1", "cwd": "/repo", "timestamp": "2026-05-23T00:00:00Z"}},
|
||||
{"type": "turn_context", "payload": {"cwd": "/repo", "model": "gpt-5.3-codex"}},
|
||||
{
|
||||
"type": "event_msg",
|
||||
"timestamp": "2026-05-22T23:00:00Z",
|
||||
"payload": {"type": "token_count", "info": {"last_token_usage": {"input_tokens": 99, "output_tokens": 1}}},
|
||||
},
|
||||
"BAD",
|
||||
{
|
||||
"type": "event_msg",
|
||||
"timestamp": "2026-05-23T01:00:00Z",
|
||||
"payload": {
|
||||
"type": "token_count",
|
||||
"info": {
|
||||
"last_token_usage": {
|
||||
"input_tokens": 100,
|
||||
"output_tokens": 40,
|
||||
"cached_input_tokens": 15,
|
||||
"reasoning_output_tokens": 7,
|
||||
"total_tokens": 155,
|
||||
}
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
record = parse_codex_session(path, parse_iso("2026-05-23"))
|
||||
|
||||
assert record is not None
|
||||
assert record.source_id == "codex:s1"
|
||||
assert record.tokens_in == 100
|
||||
assert record.tokens_out == 40
|
||||
assert record.cached_input_tokens == 15
|
||||
assert record.reasoning_output_tokens == 7
|
||||
assert record.raw_total_tokens == 155
|
||||
assert record.raw_metadata["malformed_lines"] == 1
|
||||
|
||||
|
||||
def test_collect_codex_sessions_dedupes_archived_and_live(tmp_path):
|
||||
live = tmp_path / "sessions" / "2026" / "05" / "23" / "rollout-live.jsonl"
|
||||
archived = tmp_path / "archived_sessions" / "rollout-archived.jsonl"
|
||||
rows = [
|
||||
{"type": "session_meta", "payload": {"id": "same", "cwd": "/repo", "timestamp": "2026-05-23T00:00:00Z"}},
|
||||
{
|
||||
"type": "event_msg",
|
||||
"timestamp": "2026-05-23T01:00:00Z",
|
||||
"payload": {"type": "token_count", "info": {"last_token_usage": {"input_tokens": 10, "output_tokens": 5}}},
|
||||
},
|
||||
]
|
||||
_write_jsonl(live, rows)
|
||||
_write_jsonl(
|
||||
archived,
|
||||
rows + [
|
||||
{
|
||||
"type": "event_msg",
|
||||
"timestamp": "2026-05-23T02:00:00Z",
|
||||
"payload": {"type": "token_count", "info": {"last_token_usage": {"input_tokens": 20, "output_tokens": 5}}},
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
records = collect_codex_sessions(tmp_path, parse_iso("2026-05-23"))
|
||||
|
||||
assert len(records) == 1
|
||||
assert records[0].source_id == "codex:same"
|
||||
assert records[0].tokens_total == 40
|
||||
|
||||
|
||||
def test_parse_claude_transcript_sums_usage_without_content(tmp_path):
|
||||
path = tmp_path / "projects" / "repo" / "session.jsonl"
|
||||
_write_jsonl(
|
||||
path,
|
||||
[
|
||||
{
|
||||
"timestamp": "2026-05-23T01:00:00Z",
|
||||
"session_id": "c1",
|
||||
"cwd": "/repo",
|
||||
"message": {
|
||||
"model": "claude-sonnet",
|
||||
"content": "do not store me",
|
||||
"usage": {
|
||||
"input_tokens": 30,
|
||||
"cache_creation_input_tokens": 5,
|
||||
"cache_read_input_tokens": 7,
|
||||
"output_tokens": 11,
|
||||
},
|
||||
},
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
record = parse_claude_transcript(path, parse_iso("2026-05-23"))
|
||||
|
||||
assert record is not None
|
||||
assert record.source_id == "claude:c1"
|
||||
assert record.tokens_in == 30
|
||||
assert record.cached_input_tokens == 12
|
||||
assert record.tokens_out == 11
|
||||
assert "content" not in record.raw_metadata
|
||||
|
||||
|
||||
def test_resolve_repo_uses_normalised_path_prefix():
|
||||
refs = [
|
||||
RepoRef(repo_id="1", slug="state-hub", local_path="/home/worsch/state-hub"),
|
||||
RepoRef(repo_id="2", slug="other", local_path="/home/worsch/other"),
|
||||
]
|
||||
|
||||
match = resolve_repo("//wsl.localhost/Ubuntu-24.04/home/worsch/state-hub/api", refs)
|
||||
|
||||
assert normalise_cwd("//wsl.localhost/Ubuntu-24.04/home/worsch/state-hub") == "/home/worsch/state-hub"
|
||||
assert match is not None
|
||||
assert match.repo_id == "1"
|
||||
assert match.method == "path_prefix"
|
||||
@@ -4,12 +4,12 @@ type: workplan
|
||||
title: "Multi-User Onboarding and Environment Bootstrap"
|
||||
domain: custodian
|
||||
repo: state-hub
|
||||
status: active
|
||||
status: finished
|
||||
owner: custodian
|
||||
topic_slug: custodian
|
||||
state_hub_workstream_id: "a28d9e29-4119-4b73-9469-f921920253ef"
|
||||
created: "2026-03-11"
|
||||
updated: "2026-05-17"
|
||||
updated: "2026-05-23"
|
||||
---
|
||||
|
||||
# Multi-User Onboarding and Environment Bootstrap
|
||||
@@ -51,7 +51,7 @@ Two personas:
|
||||
```task
|
||||
id: CUST-WP-0012-T01
|
||||
state_hub_task_id: 71628269-9a75-4dae-a347-e64a86040322
|
||||
status: todo
|
||||
status: done
|
||||
priority: medium
|
||||
```
|
||||
|
||||
@@ -79,6 +79,12 @@ git config --global credential.helper 'cache --timeout=3600'
|
||||
**Done when:** included in bootstrap script; push to Gitea works without
|
||||
re-entering credentials on second attempt.
|
||||
|
||||
**Implemented 2026-05-23:** `scripts/bootstrap-env.sh` configures a global
|
||||
credential helper when one is not already present. It prefers `libsecret`, uses
|
||||
`cache --timeout=3600` as the safe automatic fallback, and supports explicit
|
||||
headless plaintext storage via `--git-helper store --allow-plaintext-store`.
|
||||
`docs/onboarding.md` documents the tradeoffs.
|
||||
|
||||
---
|
||||
|
||||
### T02 — SSH key generation and authorization automation
|
||||
@@ -86,7 +92,7 @@ re-entering credentials on second attempt.
|
||||
```task
|
||||
id: CUST-WP-0012-T02
|
||||
state_hub_task_id: fea965e9-8a8f-439c-9096-8f7756eb71ed
|
||||
status: todo
|
||||
status: done
|
||||
priority: medium
|
||||
```
|
||||
|
||||
@@ -110,6 +116,11 @@ ssh-copy-id -i ~/.ssh/id_ed25519.pub tegwick@92.205.130.254
|
||||
|
||||
**Done when:** included in bootstrap script; documented in onboarding guide.
|
||||
|
||||
**Implemented 2026-05-23:** `scripts/bootstrap-env.sh` generates
|
||||
`~/.ssh/id_ed25519` if missing, prints the public key, and can run
|
||||
`ssh-copy-id` for Railiance01 and CoulombCore with `--authorize-ssh`.
|
||||
`docs/onboarding.md` documents the operator and collaborator path.
|
||||
|
||||
---
|
||||
|
||||
### T03 — Claude Code MCP registration automation
|
||||
@@ -117,7 +128,7 @@ ssh-copy-id -i ~/.ssh/id_ed25519.pub tegwick@92.205.130.254
|
||||
```task
|
||||
id: CUST-WP-0012-T03
|
||||
state_hub_task_id: 60318e9a-972e-45c8-afde-82ed0625f594
|
||||
status: todo
|
||||
status: done
|
||||
priority: medium
|
||||
```
|
||||
|
||||
@@ -132,10 +143,10 @@ make register-mcp # idempotent; safe to re-run
|
||||
|
||||
The script should:
|
||||
1. Detect whether `state-hub` is already in `~/.claude.json`
|
||||
2. Extract the server config from `.mcp.json`
|
||||
2. Use the current SSE MCP config (`http://127.0.0.1:8001/sse` locally or
|
||||
`http://127.0.0.1:18001/sse` through ops-bridge)
|
||||
3. Run `claude mcp add-json -s user state-hub <config>`
|
||||
4. Run `patch_mcp_cwd.py` to restore the cwd field
|
||||
5. Print instructions to restart Claude Code
|
||||
4. Print instructions to restart Claude Code
|
||||
|
||||
Should also detect whether the state hub is reachable directly
|
||||
(`http://127.0.0.1:8000`) or needs a tunnel (via ops-bridge), and emit
|
||||
@@ -144,6 +155,12 @@ a warning if neither is available.
|
||||
**Done when:** `make register-mcp` works on a clean machine; documented
|
||||
in onboarding guide.
|
||||
|
||||
**Implemented 2026-05-23:** `scripts/register-mcp.sh` and the
|
||||
`make register-mcp` target register the current SSE MCP transport
|
||||
idempotently. The script detects local/tunnel reachability, supports
|
||||
`MCP_URL`, `API_BASE`, and `DRY_RUN=1`, and documents the old `.mcp.json` cwd
|
||||
patch path as legacy.
|
||||
|
||||
---
|
||||
|
||||
### T04 — Environment bootstrap script
|
||||
@@ -151,7 +168,7 @@ in onboarding guide.
|
||||
```task
|
||||
id: CUST-WP-0012-T04
|
||||
state_hub_task_id: 84a94761-e424-4470-a9a2-64d9cabadb7f
|
||||
status: todo
|
||||
status: done
|
||||
priority: high
|
||||
```
|
||||
|
||||
@@ -176,6 +193,11 @@ Design constraints:
|
||||
**Done when:** running the script on a clean Ubuntu 24.04 machine
|
||||
produces a working Custodian environment with no additional manual steps.
|
||||
|
||||
**Implemented 2026-05-23:** `scripts/bootstrap-env.sh` and
|
||||
`make bootstrap-env` provide the idempotent entrypoint. It supports dry-run,
|
||||
non-interactive mode, optional apt package installation, SSH authorization,
|
||||
Gitea token prompting, MCP registration, and State Hub health checks.
|
||||
|
||||
---
|
||||
|
||||
### T05 — Onboarding guide and user journey documentation
|
||||
@@ -183,7 +205,7 @@ produces a working Custodian environment with no additional manual steps.
|
||||
```task
|
||||
id: CUST-WP-0012-T05
|
||||
state_hub_task_id: b0839802-659a-475b-8b84-ab7341ea3d15
|
||||
status: todo
|
||||
status: done
|
||||
priority: medium
|
||||
```
|
||||
|
||||
@@ -208,6 +230,10 @@ for both personas:
|
||||
**Done when:** a new collaborator can follow the guide without
|
||||
clarification from the primary operator.
|
||||
|
||||
**Implemented 2026-05-23:** `docs/onboarding.md` covers primary operator and
|
||||
domain collaborator journeys, including SSH, Gitea token file, credential
|
||||
helper choices, MCP registration, tunnel setup, and verification checks.
|
||||
|
||||
---
|
||||
|
||||
### T06 — State Hub multi-user model (deferred)
|
||||
@@ -215,7 +241,7 @@ clarification from the primary operator.
|
||||
```task
|
||||
id: CUST-WP-0012-T06
|
||||
state_hub_task_id: d5df3302-67b9-4765-a8d8-ea2df53dff6e
|
||||
status: todo
|
||||
status: done
|
||||
priority: low
|
||||
```
|
||||
|
||||
@@ -235,6 +261,11 @@ domain) or rely on Gitea repo permissions as the authoritative boundary
|
||||
Implement T01–T05 first; multi-user access control is only needed when
|
||||
there is more than one user.
|
||||
|
||||
**Implemented 2026-05-23:** `docs/multi-user-access-model.md` records the
|
||||
current decision: repo permissions, SSH access, tunnels, and OpenBao remain the
|
||||
authoritative boundaries for this phase; State Hub API auth is deferred until a
|
||||
real second-user or exposed-deployment trigger exists.
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
310
workplans/STATE-WP-0045-token-measurement-accuracy.md
Normal file
310
workplans/STATE-WP-0045-token-measurement-accuracy.md
Normal file
@@ -0,0 +1,310 @@
|
||||
---
|
||||
id: STATE-WP-0045
|
||||
type: workplan
|
||||
title: "Token Measurement Accuracy and Resilience"
|
||||
domain: custodian
|
||||
repo: state-hub
|
||||
status: finished
|
||||
owner: codex
|
||||
topic_slug: custodian
|
||||
created: "2026-05-23"
|
||||
updated: "2026-05-23"
|
||||
state_hub_workstream_id: "0aefe379-c182-4471-84dd-c136d5e1206b"
|
||||
---
|
||||
|
||||
# Token Measurement Accuracy and Resilience
|
||||
|
||||
## Summary
|
||||
|
||||
Make State Hub token tracking accurate enough to trust for daily operations and
|
||||
robust enough to survive agent/tool changes.
|
||||
|
||||
The May 19 flatline showed the current weak spots: token events mixed measured
|
||||
usage, task-completion fallbacks, and file-sync side effects in the same table;
|
||||
Claude measurement depended on one hook path; Codex usage lived in local session
|
||||
logs until a manual backfill; and the dashboard treated every token event as the
|
||||
same quality of evidence. The immediate fix restored Codex session totals and
|
||||
suppressed sync-generated fallback events, but the system still needs a durable
|
||||
measurement model, idempotent source adapters, reconciliation checks, and a
|
||||
dashboard that exposes provenance and confidence.
|
||||
|
||||
## Current Findings
|
||||
|
||||
- `token_events` stores counts, associations, free-text notes, and timestamps,
|
||||
but not structured provenance such as source system, source event id, parser
|
||||
version, raw token categories, confidence, or whether the row is measured,
|
||||
allocated, estimated, or superseded.
|
||||
- `PATCH /tasks/{id}` can still create heuristic token events on a transition to
|
||||
`done`. That fallback is useful as a temporary operational signal, but it is
|
||||
not a measurement and should not be blended into measured totals.
|
||||
- `fix-consistency` now suppresses token events while syncing file-backed task
|
||||
status, but this is a narrow guard. Other bulk sync, import, and migration
|
||||
paths need the same invariant.
|
||||
- Codex Desktop session logs contain structured `token_count` events with
|
||||
`last_token_usage`, `total_token_usage`, cached-input counts, and reasoning
|
||||
output counts. The new backfill script can restore these, but it is not yet a
|
||||
scheduled or monitored ingestion path.
|
||||
- Claude Code measurement currently depends on `scripts/task_token_hook.py`
|
||||
firing after one MCP tool name. It uses per-session state in `/tmp`, so missed
|
||||
hooks, restarts, renamed tools, and non-MCP REST paths can silently degrade to
|
||||
fallback events.
|
||||
- Repository attribution for Codex backfill is path-based. This is good enough
|
||||
for the emergency restore, but long-term attribution should prefer registered
|
||||
repo fingerprints/remotes and then fall back to paths.
|
||||
- The Token Cost dashboard currently aggregates all events returned by
|
||||
`/token-events/?limit=1000`; it does not show measurement quality, source,
|
||||
superseded rows, ingestion freshness, or possible gaps.
|
||||
|
||||
## Out of Scope
|
||||
|
||||
- Exact billing reconciliation against vendor invoices.
|
||||
- Capturing private transcript content in State Hub.
|
||||
- Replacing existing task/workstream/repo relationships.
|
||||
- Implementing every provider-specific parser in one pass. The first pass should
|
||||
cover Codex Desktop and Claude Code, with a documented adapter contract for
|
||||
others.
|
||||
|
||||
## T01 - Define Token Evidence Model
|
||||
|
||||
```task
|
||||
id: STATE-WP-0045-T01
|
||||
status: done
|
||||
priority: high
|
||||
state_hub_task_id: "29aed6d9-40aa-40fc-9e9a-3eb3e6f985bc"
|
||||
```
|
||||
|
||||
Define a structured model that separates measured usage from allocated,
|
||||
estimated, and superseded rows.
|
||||
|
||||
Implementation notes:
|
||||
|
||||
- Add a short design note or ADR section covering token event semantics.
|
||||
- Define measurement classes such as `measured`, `allocated`, `estimated`, and
|
||||
`superseded`.
|
||||
- Define source classes such as `codex_session`, `claude_transcript`,
|
||||
`llm_connect`, `manual`, and `task_fallback`.
|
||||
- Define structured provenance fields: source system, source id, source path or
|
||||
URI, source timestamp, parser version, ingestion timestamp, and confidence.
|
||||
- Decide how to represent raw token categories: input, cached input, output,
|
||||
reasoning output, and provider total.
|
||||
- Decide whether cached input should be included in default totals or shown as a
|
||||
separate metric. Preserve enough fields to support both views.
|
||||
- Replace free-text note taxonomy as the primary quality signal. Notes can
|
||||
remain for human context, but dashboards and APIs should rely on structured
|
||||
fields.
|
||||
|
||||
Done when the repo has a reviewed token evidence contract and the follow-on
|
||||
schema/API tasks can implement it without ambiguity.
|
||||
|
||||
## T02 - Add Provenance Schema and Idempotent Upsert API
|
||||
|
||||
```task
|
||||
id: STATE-WP-0045-T02
|
||||
status: done
|
||||
priority: high
|
||||
state_hub_task_id: "ade2bd40-343c-4829-ba4f-44bc8b7cbef9"
|
||||
```
|
||||
|
||||
Extend token storage so source-derived events can be written repeatedly without
|
||||
duplicates and without losing provenance.
|
||||
|
||||
Implementation notes:
|
||||
|
||||
- Add migration fields for the evidence model from T01. Candidate fields:
|
||||
`measurement_kind`, `source_provider`, `source_id`, `source_path`,
|
||||
`source_created_at`, `ingested_at`, `parser_version`, `confidence`,
|
||||
`cached_input_tokens`, `reasoning_output_tokens`, `raw_total_tokens`,
|
||||
`cost_estimated_usd`, and `raw_metadata`.
|
||||
- Add a unique constraint or partial unique index that prevents duplicate
|
||||
measured source rows. For example: source provider plus source id, scoped by
|
||||
measurement kind.
|
||||
- Provide an upsert endpoint or make `POST /token-events/` support an explicit
|
||||
idempotency key. The behavior should update a growing live session rather than
|
||||
creating a second row.
|
||||
- Keep backward compatibility for existing clients that only post
|
||||
`tokens_in`/`tokens_out`, but classify those rows explicitly.
|
||||
- Update schemas, router tests, and migration tests.
|
||||
|
||||
Done when source-backed token events can be inserted or updated idempotently and
|
||||
legacy callers continue to work.
|
||||
|
||||
## T03 - Build Reusable Token Source Adapters
|
||||
|
||||
```task
|
||||
id: STATE-WP-0045-T03
|
||||
status: done
|
||||
priority: high
|
||||
state_hub_task_id: "3844fb70-4ceb-4f90-9894-d4845970f0a6"
|
||||
```
|
||||
|
||||
Move source-specific parsing out of one-off scripts and hooks into reusable,
|
||||
tested adapter modules.
|
||||
|
||||
Implementation notes:
|
||||
|
||||
- Add an `api/services/token_sources/` package or equivalent service layer.
|
||||
- Implement a Codex Desktop adapter for `.codex/sessions/**` and
|
||||
`.codex/archived_sessions/**`.
|
||||
- Implement a Claude Code adapter for `.claude/projects/**/*.jsonl` that reads
|
||||
usage metadata without storing transcript text.
|
||||
- Provide a common adapter result type with source id, timestamps, token
|
||||
categories, model, agent, cwd/path context, and raw parser metadata.
|
||||
- Make parsing safe by default: no conversation text in logs, progress events,
|
||||
token notes, or API payloads.
|
||||
- Add fixtures with synthetic Codex and Claude session records that cover live
|
||||
sessions, archived sessions, duplicate files, malformed JSONL, resets, and
|
||||
missing usage records.
|
||||
- Keep `scripts/backfill_codex_token_events.py` as a thin CLI over the reusable
|
||||
service or replace it with a new unified CLI.
|
||||
|
||||
Done when Codex and Claude token sources have deterministic parser tests and a
|
||||
shared ingestion interface.
|
||||
|
||||
## T04 - Improve Repo, Workstream, and Task Attribution
|
||||
|
||||
```task
|
||||
id: STATE-WP-0045-T04
|
||||
status: done
|
||||
priority: high
|
||||
state_hub_task_id: "d78b36ea-2a1a-40d6-bd83-03d48ff2ad9b"
|
||||
```
|
||||
|
||||
Make attribution accurate without relying solely on local path string matching.
|
||||
|
||||
Implementation notes:
|
||||
|
||||
- Resolve repo attribution by git root fingerprint and remote URL when possible,
|
||||
then fall back to registered host paths.
|
||||
- Handle duplicate local paths or alias repos explicitly, especially where one
|
||||
checkout is registered under multiple slugs.
|
||||
- Attribute session-level usage to repo first, then optionally to workstreams or
|
||||
tasks when there is strong evidence.
|
||||
- Define task allocation rules that do not change measured session totals. For
|
||||
example, produce `allocated` child rows from measured session rows using task
|
||||
completion timestamps, tool-call metadata, or explicit operator input.
|
||||
- Record the allocation method and confidence for every task-level allocation.
|
||||
- Avoid minting task-level heuristic rows automatically for bulk import, status
|
||||
sync, migration, and consistency tooling.
|
||||
|
||||
Done when measured session totals are stable and task/workstream attribution is
|
||||
explicitly either measured, allocated, or estimated.
|
||||
|
||||
## T05 - Add Reconciliation, Gap Detection, and Backfill Operations
|
||||
|
||||
```task
|
||||
id: STATE-WP-0045-T05
|
||||
status: done
|
||||
priority: high
|
||||
state_hub_task_id: "efaa2629-4f9a-439c-b0a3-85d77b03580f"
|
||||
```
|
||||
|
||||
Add an operator-safe reconciliation command that detects flatlines, duplicate
|
||||
rows, stale ingestion, and fallback leakage.
|
||||
|
||||
Implementation notes:
|
||||
|
||||
- Add a command such as `make token-reconcile` or
|
||||
`python scripts/token_reconcile.py --since <date>`.
|
||||
- Report sessions found, sessions ingested, sessions stale, duplicate source
|
||||
ids, fallback events, superseded rows, unattributed sessions, and rows missing
|
||||
structured provenance.
|
||||
- Support `--dry-run` by default and `--apply` for writes.
|
||||
- Include an explicit `--zero-superseded-fallbacks` or equivalent flag rather
|
||||
than silently editing historical rows.
|
||||
- Store reconciliation summaries as progress events or report files without
|
||||
including transcript content.
|
||||
- Add a canary threshold: alert or fail when measured token volume is zero while
|
||||
task/progress activity exists for the same window.
|
||||
|
||||
Done when an operator can run one command to verify token tracking health and
|
||||
perform safe, idempotent backfills.
|
||||
|
||||
## T06 - Harden Hooks and Runtime Integration
|
||||
|
||||
```task
|
||||
id: STATE-WP-0045-T06
|
||||
status: done
|
||||
priority: medium
|
||||
state_hub_task_id: "5fd99241-e6dd-4ca6-8c58-a0048f08f0ca"
|
||||
```
|
||||
|
||||
Make token collection survive hook misses, tool renames, restarts, and multiple
|
||||
agent runtimes.
|
||||
|
||||
Implementation notes:
|
||||
|
||||
- Update Claude hook handling so it can match supported task completion paths,
|
||||
not just one exact MCP tool name.
|
||||
- Persist hook high-water marks in a durable State Hub or repo-local location
|
||||
instead of only `/tmp`.
|
||||
- Add hook health logging that records when a hook ran, what source id it
|
||||
processed, and whether it patched or skipped a token event.
|
||||
- Add a Codex ingestion path that can run on demand and from a schedule without
|
||||
requiring manual script execution.
|
||||
- Document required environment variables and path discovery for Windows, WSL,
|
||||
and remote Linux hosts.
|
||||
- Ensure failures degrade to visible `estimated` events or health warnings, not
|
||||
silent flatlines.
|
||||
|
||||
Done when missing or stale token ingestion becomes visible within one reporting
|
||||
window and can be recovered without ad hoc inspection.
|
||||
|
||||
## T07 - Upgrade Token APIs and Dashboard Quality Signals
|
||||
|
||||
```task
|
||||
id: STATE-WP-0045-T07
|
||||
status: done
|
||||
priority: medium
|
||||
state_hub_task_id: "ecaf6ff8-59aa-4c56-8163-125dc96b2068"
|
||||
```
|
||||
|
||||
Expose token quality, source, and freshness in APIs and dashboard views.
|
||||
|
||||
Implementation notes:
|
||||
|
||||
- Add API filters for measurement kind, source provider, repo, time range,
|
||||
superseded rows, and unattributed rows.
|
||||
- Replace the hard dashboard dependence on `/token-events/?limit=1000` with
|
||||
paginated or pre-aggregated endpoints that support time windows.
|
||||
- Add dashboard controls for measured-only, include allocated, include
|
||||
estimates, and show superseded rows.
|
||||
- Show ingestion freshness: last Codex session ingested, last Claude transcript
|
||||
ingested, and last reconciliation run.
|
||||
- Add a data-quality section listing fallback events, unattributed measured
|
||||
sessions, duplicate source ids, and days with progress/task activity but zero
|
||||
measured tokens.
|
||||
- Update the Token Cost page and docs so operators know which numbers are
|
||||
measured versus inferred.
|
||||
|
||||
Done when the dashboard no longer presents fallback, allocated, and measured
|
||||
usage as indistinguishable totals.
|
||||
|
||||
## T08 - Verification and Migration Playbook
|
||||
|
||||
```task
|
||||
id: STATE-WP-0045-T08
|
||||
status: done
|
||||
priority: medium
|
||||
state_hub_task_id: "61baff79-832e-45f8-80f3-106abe262096"
|
||||
```
|
||||
|
||||
Cover the new measurement system with tests and a safe rollout plan.
|
||||
|
||||
Implementation notes:
|
||||
|
||||
- Add unit tests for the evidence model, source adapters, source-id
|
||||
deduplication, repo attribution, and task allocation.
|
||||
- Add router tests for idempotent upsert, source filters, measurement-kind
|
||||
filters, created-at preservation, and backwards-compatible legacy posts.
|
||||
- Add reconciliation tests with synthetic pre-May-19 and post-May-19 flatline
|
||||
scenarios.
|
||||
- Add dashboard/data-loader tests or fixture checks for quality filters and
|
||||
aggregate counts.
|
||||
- Write a migration playbook covering old heuristic rows, existing
|
||||
`backfill:codex-session` rows, and any rows without structured provenance.
|
||||
- Verify the full suite and run a dry-run reconciliation before marking this
|
||||
workplan finished.
|
||||
|
||||
Done when the improved token measurement path has automated coverage, an
|
||||
operator playbook, and a dry-run reconciliation report showing no hidden
|
||||
fallback leakage.
|
||||
Reference in New Issue
Block a user