generated from coulomb/repo-seed
Fixed and improved token tracking
This commit is contained in:
192
scripts/backfill_codex_token_events.py
Normal file
192
scripts/backfill_codex_token_events.py
Normal file
@@ -0,0 +1,192 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Backfill State Hub token events from local Codex session logs.
|
||||
|
||||
The parser lives in ``api.services.token_sources.codex`` so this CLI only
|
||||
handles operator flags, repo attribution, idempotent writes, and fallback
|
||||
cleanup.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
ROOT = Path(__file__).resolve().parent.parent
|
||||
if str(ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
from api.services.token_sources import collect_codex_sessions, parse_iso # noqa: E402
|
||||
from api.services.token_sources.attribution import repo_refs_from_api, resolve_repo # noqa: E402
|
||||
|
||||
DEFAULT_API = os.environ.get("STATE_HUB_API", "http://127.0.0.1:8000")
|
||||
BACKFILL_NOTE = "backfill:codex-session"
|
||||
SUPERSEDED_HEURISTIC_NOTE = "heuristic_superseded_by_codex_backfill"
|
||||
|
||||
|
||||
def http_json(api_base: str, method: str, path: str, body: dict[str, Any] | None = None) -> Any:
|
||||
url = f"{api_base.rstrip('/')}/{path.lstrip('/')}"
|
||||
data = None
|
||||
headers = {"Content-Type": "application/json"}
|
||||
if body is not None:
|
||||
data = json.dumps(body).encode("utf-8")
|
||||
req = urllib.request.Request(url, data=data, headers=headers, method=method)
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
return json.loads(resp.read() or b"null")
|
||||
|
||||
|
||||
def find_codex_home(explicit: str | None) -> Path:
|
||||
candidates: list[Path] = []
|
||||
if explicit:
|
||||
candidates.append(Path(explicit))
|
||||
env_home = os.environ.get("CODEX_HOME")
|
||||
if env_home:
|
||||
candidates.append(Path(env_home))
|
||||
candidates.extend(
|
||||
[
|
||||
Path.home() / ".codex",
|
||||
Path("/mnt/c/Users/bernd.worsch/.codex"),
|
||||
]
|
||||
)
|
||||
for candidate in candidates:
|
||||
if candidate.is_dir():
|
||||
return candidate
|
||||
raise SystemExit("Could not find Codex home; pass --codex-home")
|
||||
|
||||
|
||||
def list_events(api_base: str, params: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
events: list[dict[str, Any]] = []
|
||||
offset = 0
|
||||
while True:
|
||||
page_params = {**params, "limit": 1000, "offset": offset}
|
||||
encoded = urllib.parse.urlencode(page_params)
|
||||
page = http_json(api_base, "GET", f"/token-events/?{encoded}")
|
||||
if not isinstance(page, list) or not page:
|
||||
break
|
||||
events.extend(page)
|
||||
if len(page) < 1000:
|
||||
break
|
||||
offset += 1000
|
||||
return events
|
||||
|
||||
|
||||
def existing_codex_events(api_base: str) -> dict[str, dict[str, Any]]:
|
||||
events = list_events(
|
||||
api_base,
|
||||
{"source_provider": "codex_session", "include_superseded": "true"},
|
||||
)
|
||||
by_source: dict[str, dict[str, Any]] = {}
|
||||
for event in events:
|
||||
source_id = event.get("source_id") or event.get("ref_id")
|
||||
if isinstance(source_id, str):
|
||||
by_source[source_id] = event
|
||||
return by_source
|
||||
|
||||
|
||||
def fetch_heuristics(api_base: str, since: str) -> list[dict[str, Any]]:
|
||||
return list_events(
|
||||
api_base,
|
||||
{
|
||||
"source_provider": "task_fallback",
|
||||
"note": "heuristic",
|
||||
"since": since,
|
||||
"include_superseded": "false",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def patch_superseded_heuristic(api_base: str, event_id: str) -> None:
|
||||
http_json(
|
||||
api_base,
|
||||
"PATCH",
|
||||
f"/token-events/{event_id}",
|
||||
{
|
||||
"tokens_in": 0,
|
||||
"tokens_out": 0,
|
||||
"note": SUPERSEDED_HEURISTIC_NOTE,
|
||||
"measurement_kind": "superseded",
|
||||
"source_provider": "task_fallback",
|
||||
"confidence": 0.0,
|
||||
"raw_total_tokens": 0,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--since", default="2026-05-19", help="UTC date/time to backfill from")
|
||||
parser.add_argument("--api-base", default=DEFAULT_API)
|
||||
parser.add_argument("--codex-home")
|
||||
parser.add_argument("--apply", action="store_true", help="write backfill events")
|
||||
parser.add_argument(
|
||||
"--zero-heuristics",
|
||||
action="store_true",
|
||||
help="set post-since heuristic task fallback events to zero after backfill",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
since = parse_iso(args.since)
|
||||
since_param = since.isoformat()
|
||||
codex_home = find_codex_home(args.codex_home)
|
||||
repo_refs = repo_refs_from_api(http_json(args.api_base, "GET", "/repos/"))
|
||||
existing = existing_codex_events(args.api_base)
|
||||
sessions = collect_codex_sessions(codex_home, since)
|
||||
|
||||
planned: list[tuple[str, Any, str | None, str | None]] = []
|
||||
by_repo: dict[str, list[int]] = {}
|
||||
for session in sessions:
|
||||
event = existing.get(session.source_id)
|
||||
existing_total = (event.get("tokens_in", 0) + event.get("tokens_out", 0)) if event else 0
|
||||
action = "create" if event is None else ("update" if session.tokens_total > existing_total else "skip")
|
||||
match = resolve_repo(session.cwd, repo_refs)
|
||||
repo_id = match.repo_id if match else None
|
||||
repo_slug = match.slug if match else None
|
||||
if action != "skip":
|
||||
planned.append((action, session, repo_id, repo_slug))
|
||||
label = repo_slug or "(unattributed)"
|
||||
totals = by_repo.setdefault(label, [0, 0, 0])
|
||||
totals[0] += 1
|
||||
totals[1] += session.tokens_in
|
||||
totals[2] += session.tokens_out
|
||||
|
||||
heuristics = fetch_heuristics(args.api_base, since_param) if args.zero_heuristics else []
|
||||
|
||||
print(f"codex_home: {codex_home}")
|
||||
print(f"since: {since.isoformat()}")
|
||||
print(f"sessions found: {len(sessions)}")
|
||||
print(f"backfill events to create: {sum(1 for action, *_ in planned if action == 'create')}")
|
||||
print(f"backfill events to update: {sum(1 for action, *_ in planned if action == 'update')}")
|
||||
for repo_slug, (count, tokens_in, tokens_out) in sorted(by_repo.items()):
|
||||
print(f" {repo_slug}: {count} sessions, {tokens_in + tokens_out:,} tokens")
|
||||
if args.zero_heuristics:
|
||||
total = sum((e.get("tokens_in") or 0) + (e.get("tokens_out") or 0) for e in heuristics)
|
||||
print(f"heuristic events to zero: {len(heuristics)} ({total:,} tokens)")
|
||||
|
||||
if not args.apply:
|
||||
print("dry run only; pass --apply to write changes")
|
||||
return 0
|
||||
|
||||
for _action, session, repo_id, repo_slug in planned:
|
||||
payload = session.to_token_event_payload(repo_id=repo_id)
|
||||
payload["note"] = BACKFILL_NOTE
|
||||
payload["raw_metadata"] = {
|
||||
**payload.get("raw_metadata", {}),
|
||||
"repo_slug": repo_slug,
|
||||
"attribution_method": resolve_repo(session.cwd, repo_refs).method if resolve_repo(session.cwd, repo_refs) else None,
|
||||
}
|
||||
http_json(args.api_base, "POST", "/token-events/upsert", payload)
|
||||
for event in heuristics:
|
||||
patch_superseded_heuristic(args.api_base, event["id"])
|
||||
|
||||
print(f"upserted {len(planned)} backfill events")
|
||||
if args.zero_heuristics:
|
||||
print(f"zeroed {len(heuristics)} heuristic events")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
369
scripts/bootstrap-env.sh
Executable file
369
scripts/bootstrap-env.sh
Executable file
@@ -0,0 +1,369 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
STATE_HUB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
GITEA_CONF="${GITEA_CONF:-$HOME/.railiance_gitea.conf}"
|
||||
GITEA_URL="${GITEA_URL:-http://92.205.130.254:32166}"
|
||||
GITEA_USER="${GITEA_USER:-}"
|
||||
GITEA_TOKEN="${GITEA_TOKEN:-}"
|
||||
GIT_HELPER="${GIT_HELPER:-auto}"
|
||||
INSTALL_MISSING=0
|
||||
NON_INTERACTIVE=0
|
||||
DRY_RUN=0
|
||||
AUTHORIZE_SSH=0
|
||||
ALLOW_PLAINTEXT_STORE=0
|
||||
SKIP_GITEA=0
|
||||
SKIP_MCP=0
|
||||
SSH_KEY="${SSH_KEY:-$HOME/.ssh/id_ed25519}"
|
||||
SSH_TARGETS=(
|
||||
"tegwick@92.205.62.239"
|
||||
"tegwick@92.205.130.254"
|
||||
)
|
||||
|
||||
usage() {
|
||||
cat <<'USAGE'
|
||||
Usage: scripts/bootstrap-env.sh [options]
|
||||
|
||||
Idempotently prepares a State Hub operator or collaborator environment.
|
||||
|
||||
Options:
|
||||
--install-missing Install missing apt packages when possible.
|
||||
--non-interactive Do not prompt; warn instead of asking for secrets.
|
||||
--dry-run Show intended actions without changing local config.
|
||||
--git-helper MODE auto, libsecret, cache, or store. Default: auto.
|
||||
--allow-plaintext-store Allow git credential.helper=store in auto mode.
|
||||
--authorize-ssh Run ssh-copy-id for configured SSH targets.
|
||||
--ssh-target USER@HOST Add an SSH authorization target. May repeat.
|
||||
--gitea-url URL Gitea base URL for ~/.railiance_gitea.conf.
|
||||
--gitea-user USER Gitea user for ~/.railiance_gitea.conf.
|
||||
--gitea-token TOKEN Gitea token; otherwise prompted when interactive.
|
||||
--skip-gitea Do not create or update ~/.railiance_gitea.conf.
|
||||
--skip-mcp Do not run make register-mcp.
|
||||
-h, --help Show this help.
|
||||
USAGE
|
||||
}
|
||||
|
||||
ok() { printf '[OK] %s\n' "$*"; }
|
||||
warn() { printf '[WARN] %s\n' "$*"; }
|
||||
err() { printf '[ERR] %s\n' "$*" >&2; }
|
||||
step() { printf '\n==> %s\n' "$*"; }
|
||||
|
||||
run() {
|
||||
if [ "$DRY_RUN" -eq 1 ]; then
|
||||
printf 'DRY-RUN: %s\n' "$*"
|
||||
else
|
||||
"$@"
|
||||
fi
|
||||
}
|
||||
|
||||
need_arg() {
|
||||
if [ -z "${2:-}" ]; then
|
||||
err "$1 requires a value"
|
||||
exit 2
|
||||
fi
|
||||
}
|
||||
|
||||
while [ "$#" -gt 0 ]; do
|
||||
case "$1" in
|
||||
--install-missing)
|
||||
INSTALL_MISSING=1
|
||||
shift
|
||||
;;
|
||||
--non-interactive)
|
||||
NON_INTERACTIVE=1
|
||||
shift
|
||||
;;
|
||||
--dry-run)
|
||||
DRY_RUN=1
|
||||
shift
|
||||
;;
|
||||
--git-helper)
|
||||
need_arg "$1" "${2:-}"
|
||||
GIT_HELPER="$2"
|
||||
shift 2
|
||||
;;
|
||||
--allow-plaintext-store)
|
||||
ALLOW_PLAINTEXT_STORE=1
|
||||
shift
|
||||
;;
|
||||
--authorize-ssh)
|
||||
AUTHORIZE_SSH=1
|
||||
shift
|
||||
;;
|
||||
--ssh-target)
|
||||
need_arg "$1" "${2:-}"
|
||||
SSH_TARGETS+=("$2")
|
||||
shift 2
|
||||
;;
|
||||
--gitea-url)
|
||||
need_arg "$1" "${2:-}"
|
||||
GITEA_URL="$2"
|
||||
shift 2
|
||||
;;
|
||||
--gitea-user)
|
||||
need_arg "$1" "${2:-}"
|
||||
GITEA_USER="$2"
|
||||
shift 2
|
||||
;;
|
||||
--gitea-token)
|
||||
need_arg "$1" "${2:-}"
|
||||
GITEA_TOKEN="$2"
|
||||
shift 2
|
||||
;;
|
||||
--skip-gitea)
|
||||
SKIP_GITEA=1
|
||||
shift
|
||||
;;
|
||||
--skip-mcp)
|
||||
SKIP_MCP=1
|
||||
shift
|
||||
;;
|
||||
-h|--help)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
err "unknown argument: $1"
|
||||
usage >&2
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
case "$GIT_HELPER" in
|
||||
auto|libsecret|cache|store) ;;
|
||||
*)
|
||||
err "--git-helper must be auto, libsecret, cache, or store"
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
|
||||
apt_install() {
|
||||
local packages=("$@")
|
||||
if [ "$INSTALL_MISSING" -ne 1 ]; then
|
||||
warn "Missing packages: ${packages[*]}"
|
||||
warn "Rerun with --install-missing or install them manually."
|
||||
return
|
||||
fi
|
||||
if ! command -v sudo >/dev/null 2>&1; then
|
||||
warn "sudo is not available; cannot install: ${packages[*]}"
|
||||
return
|
||||
fi
|
||||
run sudo apt-get update
|
||||
run sudo apt-get install -y "${packages[@]}"
|
||||
}
|
||||
|
||||
check_commands() {
|
||||
step "Checking prerequisites"
|
||||
local missing=()
|
||||
local commands=(git curl ssh-keygen ssh-copy-id python3 make)
|
||||
local optional=(sops age helm kubectl uv claude)
|
||||
|
||||
for cmd in "${commands[@]}"; do
|
||||
if command -v "$cmd" >/dev/null 2>&1; then
|
||||
ok "$cmd found"
|
||||
else
|
||||
missing+=("$cmd")
|
||||
warn "$cmd missing"
|
||||
fi
|
||||
done
|
||||
|
||||
for cmd in "${optional[@]}"; do
|
||||
if command -v "$cmd" >/dev/null 2>&1; then
|
||||
ok "$cmd found"
|
||||
else
|
||||
warn "$cmd missing"
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "${#missing[@]}" -gt 0 ]; then
|
||||
apt_install "${missing[@]}"
|
||||
fi
|
||||
}
|
||||
|
||||
libsecret_helper_path() {
|
||||
local candidates=(
|
||||
"/usr/share/doc/git/contrib/credential/libsecret/git-credential-libsecret"
|
||||
"/usr/lib/git-core/git-credential-libsecret"
|
||||
"/usr/libexec/git-core/git-credential-libsecret"
|
||||
)
|
||||
local candidate
|
||||
for candidate in "${candidates[@]}"; do
|
||||
if [ -x "$candidate" ]; then
|
||||
printf '%s\n' "$candidate"
|
||||
return 0
|
||||
fi
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
build_libsecret_helper() {
|
||||
local source_dir="/usr/share/doc/git/contrib/credential/libsecret"
|
||||
if [ ! -d "$source_dir" ]; then
|
||||
apt_install libsecret-1-0 libsecret-1-dev make gcc
|
||||
fi
|
||||
if [ -d "$source_dir" ]; then
|
||||
run sudo make -C "$source_dir"
|
||||
fi
|
||||
}
|
||||
|
||||
configure_git_helper() {
|
||||
step "Configuring Git credential helper"
|
||||
|
||||
local current
|
||||
current="$(git config --global --get credential.helper || true)"
|
||||
if [ -n "$current" ]; then
|
||||
ok "credential.helper already set: $current"
|
||||
return
|
||||
fi
|
||||
|
||||
local helper="$GIT_HELPER"
|
||||
if [ "$helper" = "auto" ]; then
|
||||
if libsecret_helper_path >/dev/null 2>&1; then
|
||||
helper="libsecret"
|
||||
elif [ "$ALLOW_PLAINTEXT_STORE" -eq 1 ]; then
|
||||
helper="store"
|
||||
else
|
||||
helper="cache"
|
||||
fi
|
||||
fi
|
||||
|
||||
case "$helper" in
|
||||
libsecret)
|
||||
local path
|
||||
path="$(libsecret_helper_path || true)"
|
||||
if [ -z "$path" ]; then
|
||||
build_libsecret_helper
|
||||
path="$(libsecret_helper_path || true)"
|
||||
fi
|
||||
if [ -z "$path" ]; then
|
||||
warn "libsecret helper is not available; using cache helper for this machine."
|
||||
run git config --global credential.helper "cache --timeout=3600"
|
||||
else
|
||||
run git config --global credential.helper "$path"
|
||||
fi
|
||||
;;
|
||||
cache)
|
||||
run git config --global credential.helper "cache --timeout=3600"
|
||||
;;
|
||||
store)
|
||||
if [ "$ALLOW_PLAINTEXT_STORE" -ne 1 ]; then
|
||||
err "credential.helper=store writes plaintext credentials."
|
||||
err "Rerun with --allow-plaintext-store if that is intended for this host."
|
||||
exit 1
|
||||
fi
|
||||
run git config --global credential.helper store
|
||||
;;
|
||||
esac
|
||||
|
||||
ok "credential.helper configured"
|
||||
}
|
||||
|
||||
setup_ssh_key() {
|
||||
step "Checking SSH key"
|
||||
mkdir -p "$HOME/.ssh"
|
||||
chmod 700 "$HOME/.ssh"
|
||||
|
||||
if [ -f "$SSH_KEY" ]; then
|
||||
ok "SSH key exists: $SSH_KEY"
|
||||
else
|
||||
run ssh-keygen -t ed25519 -f "$SSH_KEY" -N "" -C "$USER@$(hostname)-state-hub"
|
||||
ok "SSH key generated: $SSH_KEY"
|
||||
fi
|
||||
|
||||
if [ -f "${SSH_KEY}.pub" ]; then
|
||||
printf '\nPublic key to authorize on managed hosts:\n\n'
|
||||
sed 's/^/ /' "${SSH_KEY}.pub"
|
||||
printf '\n'
|
||||
fi
|
||||
|
||||
if [ "$AUTHORIZE_SSH" -eq 1 ]; then
|
||||
local target
|
||||
for target in "${SSH_TARGETS[@]}"; do
|
||||
run ssh-copy-id -i "${SSH_KEY}.pub" "$target"
|
||||
done
|
||||
else
|
||||
warn "SSH authorization not attempted. Use --authorize-ssh after confirming host access."
|
||||
fi
|
||||
}
|
||||
|
||||
write_gitea_conf() {
|
||||
step "Checking Gitea config"
|
||||
if [ "$SKIP_GITEA" -eq 1 ]; then
|
||||
warn "Skipping Gitea config by request."
|
||||
return
|
||||
fi
|
||||
|
||||
if [ -f "$GITEA_CONF" ]; then
|
||||
chmod 600 "$GITEA_CONF"
|
||||
ok "$GITEA_CONF already exists"
|
||||
return
|
||||
fi
|
||||
|
||||
if [ -z "$GITEA_USER" ] && [ "$NON_INTERACTIVE" -eq 0 ]; then
|
||||
read -r -p "Gitea username: " GITEA_USER
|
||||
fi
|
||||
|
||||
if [ -z "$GITEA_TOKEN" ] && [ "$NON_INTERACTIVE" -eq 0 ]; then
|
||||
read -r -s -p "Gitea token (requires read:user and repository write scopes): " GITEA_TOKEN
|
||||
printf '\n'
|
||||
fi
|
||||
|
||||
if [ -z "$GITEA_USER" ] || [ -z "$GITEA_TOKEN" ]; then
|
||||
warn "Gitea config not written. Set GITEA_USER/GITEA_TOKEN or rerun interactively."
|
||||
return
|
||||
fi
|
||||
|
||||
if [ "$DRY_RUN" -eq 1 ]; then
|
||||
printf 'DRY-RUN: would write %s with GITEA_URL and GITEA_USER; token hidden\n' "$GITEA_CONF"
|
||||
return
|
||||
fi
|
||||
|
||||
umask 077
|
||||
{
|
||||
printf 'GITEA_URL="%s"\n' "$GITEA_URL"
|
||||
printf 'GITEA_USER="%s"\n' "$GITEA_USER"
|
||||
printf 'GITEA_TOKEN="%s"\n' "$GITEA_TOKEN"
|
||||
} >"$GITEA_CONF"
|
||||
chmod 600 "$GITEA_CONF"
|
||||
ok "Wrote $GITEA_CONF"
|
||||
}
|
||||
|
||||
register_mcp() {
|
||||
step "Registering State Hub MCP"
|
||||
if [ "$SKIP_MCP" -eq 1 ]; then
|
||||
warn "Skipping MCP registration by request."
|
||||
return
|
||||
fi
|
||||
if [ "$DRY_RUN" -eq 1 ]; then
|
||||
run make -C "$STATE_HUB_DIR" register-mcp DRY_RUN=1
|
||||
else
|
||||
make -C "$STATE_HUB_DIR" register-mcp
|
||||
fi
|
||||
}
|
||||
|
||||
health_check() {
|
||||
step "Checking State Hub reachability"
|
||||
if curl -fsS --max-time 2 "http://127.0.0.1:8000/state/health" >/dev/null 2>&1; then
|
||||
ok "State Hub API reachable at http://127.0.0.1:8000"
|
||||
elif curl -fsS --max-time 2 "http://127.0.0.1:18000/state/health" >/dev/null 2>&1; then
|
||||
ok "State Hub API reachable through tunnel at http://127.0.0.1:18000"
|
||||
else
|
||||
warn "State Hub API is not reachable locally or through the default tunnel."
|
||||
warn "Start it with 'make api' or run 'make bridges' if this machine uses ops-bridge."
|
||||
fi
|
||||
}
|
||||
|
||||
main() {
|
||||
step "State Hub environment bootstrap"
|
||||
printf 'Repository: %s\n' "$STATE_HUB_DIR"
|
||||
check_commands
|
||||
configure_git_helper
|
||||
setup_ssh_key
|
||||
write_gitea_conf
|
||||
register_mcp
|
||||
health_check
|
||||
ok "Bootstrap checks complete."
|
||||
}
|
||||
|
||||
main "$@"
|
||||
@@ -1596,7 +1596,7 @@ def fix_repo(
|
||||
task_id = ctx["task_id"]
|
||||
status = ctx["status"]
|
||||
result = _api_patch(api_base, f"/tasks/{task_id}",
|
||||
{"status": status})
|
||||
{"status": status, "suppress_token_event": True})
|
||||
if result is not None and "_error" not in result:
|
||||
report.fixes_applied.append(
|
||||
f"C-10 fixed: task {task_id[:8]}… status → {status!r}"
|
||||
|
||||
151
scripts/register-mcp.sh
Executable file
151
scripts/register-mcp.sh
Executable file
@@ -0,0 +1,151 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
STATE_HUB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
CLAUDE_JSON="${CLAUDE_JSON:-$HOME/.claude.json}"
|
||||
SERVER_NAME="${STATE_HUB_MCP_NAME:-state-hub}"
|
||||
API_BASE="${API_BASE:-}"
|
||||
MCP_URL="${MCP_URL:-}"
|
||||
DRY_RUN=0
|
||||
|
||||
usage() {
|
||||
cat <<'USAGE'
|
||||
Usage: scripts/register-mcp.sh [--url URL] [--api-base URL] [--dry-run]
|
||||
|
||||
Registers the State Hub MCP server for Claude Code.
|
||||
|
||||
Options:
|
||||
--url URL MCP SSE URL to register. Defaults to local :8001 or tunnel :18001.
|
||||
--api-base URL State Hub API URL used for reachability checks.
|
||||
--dry-run Print what would happen without changing Claude config.
|
||||
-h, --help Show this help.
|
||||
USAGE
|
||||
}
|
||||
|
||||
while [ "$#" -gt 0 ]; do
|
||||
case "$1" in
|
||||
--url)
|
||||
MCP_URL="${2:-}"
|
||||
shift 2
|
||||
;;
|
||||
--api-base)
|
||||
API_BASE="${2:-}"
|
||||
shift 2
|
||||
;;
|
||||
--dry-run)
|
||||
DRY_RUN=1
|
||||
shift
|
||||
;;
|
||||
-h|--help)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo "ERROR: unknown argument: $1" >&2
|
||||
usage >&2
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
status() {
|
||||
printf '%s\n' "$*"
|
||||
}
|
||||
|
||||
api_healthy() {
|
||||
local base="$1"
|
||||
curl -fsS --max-time 2 "${base%/}/state/health" >/dev/null 2>&1
|
||||
}
|
||||
|
||||
port_open() {
|
||||
local host="$1"
|
||||
local port="$2"
|
||||
timeout 2 bash -c ":</dev/tcp/$host/$port" >/dev/null 2>&1
|
||||
}
|
||||
|
||||
if [ -z "$API_BASE" ]; then
|
||||
if api_healthy "http://127.0.0.1:8000"; then
|
||||
API_BASE="http://127.0.0.1:8000"
|
||||
elif api_healthy "http://127.0.0.1:18000"; then
|
||||
API_BASE="http://127.0.0.1:18000"
|
||||
else
|
||||
API_BASE="http://127.0.0.1:8000"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -z "$MCP_URL" ]; then
|
||||
if port_open 127.0.0.1 8001; then
|
||||
MCP_URL="http://127.0.0.1:8001/sse"
|
||||
elif port_open 127.0.0.1 18001; then
|
||||
MCP_URL="http://127.0.0.1:18001/sse"
|
||||
elif [ "$API_BASE" = "http://127.0.0.1:18000" ]; then
|
||||
MCP_URL="http://127.0.0.1:18001/sse"
|
||||
else
|
||||
MCP_URL="http://127.0.0.1:8001/sse"
|
||||
fi
|
||||
fi
|
||||
|
||||
CONFIG="$(python3 - "$MCP_URL" <<'PY'
|
||||
import json
|
||||
import sys
|
||||
|
||||
print(json.dumps({"type": "sse", "url": sys.argv[1]}, separators=(",", ":")))
|
||||
PY
|
||||
)"
|
||||
|
||||
status "State Hub directory: $STATE_HUB_DIR"
|
||||
status "API health check: ${API_BASE%/}/state/health"
|
||||
status "MCP registration: $SERVER_NAME -> $MCP_URL"
|
||||
|
||||
if api_healthy "$API_BASE"; then
|
||||
status "OK: State Hub API is reachable."
|
||||
else
|
||||
status "WARN: State Hub API is not reachable at ${API_BASE%/}/state/health."
|
||||
status " Start it with 'make api' or bring up the ops-bridge tunnel."
|
||||
fi
|
||||
|
||||
if ! command -v claude >/dev/null 2>&1; then
|
||||
if [ "$DRY_RUN" -eq 1 ]; then
|
||||
status "WARN: claude CLI not found on PATH; dry-run will still show the command."
|
||||
else
|
||||
status "ERROR: claude CLI not found on PATH."
|
||||
status " Install or expose Claude Code CLI, then rerun: make register-mcp"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
CURRENT_URL="$(python3 - "$CLAUDE_JSON" "$SERVER_NAME" <<'PY'
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
path = Path(sys.argv[1])
|
||||
name = sys.argv[2]
|
||||
if not path.exists():
|
||||
print("")
|
||||
raise SystemExit
|
||||
try:
|
||||
data = json.loads(path.read_text())
|
||||
except json.JSONDecodeError:
|
||||
print("")
|
||||
raise SystemExit
|
||||
entry = data.get("mcpServers", {}).get(name, {})
|
||||
print(entry.get("url", ""))
|
||||
PY
|
||||
)"
|
||||
|
||||
if [ "$CURRENT_URL" = "$MCP_URL" ]; then
|
||||
status "OK: $SERVER_NAME is already registered with this URL."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [ "$DRY_RUN" -eq 1 ]; then
|
||||
status "DRY-RUN: would run:"
|
||||
status " claude mcp add-json -s user $SERVER_NAME '$CONFIG'"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
claude mcp add-json -s user "$SERVER_NAME" "$CONFIG"
|
||||
|
||||
status "OK: registered $SERVER_NAME."
|
||||
status "Restart Claude Code so the MCP server list is refreshed."
|
||||
@@ -1,27 +1,48 @@
|
||||
#!/usr/bin/env python3
|
||||
"""PostToolUse hook: replace heuristic token events with real transcript-derived counts.
|
||||
|
||||
Fires after mcp__state-hub__update_task_status when status=done.
|
||||
Fires after supported task completion tools when status=done.
|
||||
Reads the Claude Code session transcript to compute the token delta since the
|
||||
previous task completion, then PATCHes the heuristic event with real counts.
|
||||
|
||||
State is persisted per session in /tmp/custodian_tokens_<session_id>.json so
|
||||
deltas are correctly scoped even when multiple tasks complete in one session.
|
||||
State is persisted per session in a durable cache directory so deltas survive
|
||||
restarts and multiple task completions in one session.
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
API = os.environ.get("CUSTODIAN_API", "http://127.0.0.1:8000")
|
||||
STATE_DIR = Path(os.environ.get("TMPDIR", "/tmp"))
|
||||
STATE_DIR = Path(os.environ.get("CUSTODIAN_TOKEN_STATE_DIR", Path.home() / ".cache" / "state-hub" / "token-hooks"))
|
||||
HEALTH_LOG = STATE_DIR / "hook-health.jsonl"
|
||||
PARSER_VERSION = "claude-transcript-delta-v1"
|
||||
SUPPORTED_TOOL_HINTS = (
|
||||
"update_task_status",
|
||||
"tasks",
|
||||
"task",
|
||||
)
|
||||
|
||||
|
||||
def read_transcript_totals(transcript_path: str) -> tuple[int, int]:
|
||||
def utc_now() -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
|
||||
def write_health(event: dict) -> None:
|
||||
try:
|
||||
STATE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
with HEALTH_LOG.open("a", encoding="utf-8") as handle:
|
||||
handle.write(json.dumps({"ts": utc_now(), **event}, sort_keys=True) + "\n")
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def read_transcript_totals(transcript_path: str) -> tuple[int, int, int]:
|
||||
"""Sum all usage entries in the transcript JSONL up to the current point."""
|
||||
total_in = total_out = 0
|
||||
total_in = total_out = cached_in = 0
|
||||
try:
|
||||
with open(transcript_path) as f:
|
||||
for line in f:
|
||||
@@ -29,10 +50,9 @@ def read_transcript_totals(transcript_path: str) -> tuple[int, int]:
|
||||
entry = json.loads(line)
|
||||
usage = entry.get("message", {}).get("usage", {})
|
||||
if usage:
|
||||
# Count all input token variants (direct + cache creation + cache read)
|
||||
total_in += (
|
||||
usage.get("input_tokens", 0)
|
||||
+ usage.get("cache_creation_input_tokens", 0)
|
||||
total_in += usage.get("input_tokens", 0)
|
||||
cached_in += (
|
||||
usage.get("cache_creation_input_tokens", 0)
|
||||
+ usage.get("cache_read_input_tokens", 0)
|
||||
)
|
||||
total_out += usage.get("output_tokens", 0)
|
||||
@@ -40,21 +60,22 @@ def read_transcript_totals(transcript_path: str) -> tuple[int, int]:
|
||||
continue
|
||||
except OSError:
|
||||
pass
|
||||
return total_in, total_out
|
||||
return total_in, total_out, cached_in
|
||||
|
||||
|
||||
def load_state(session_id: str) -> tuple[int, int]:
|
||||
def load_state(session_id: str) -> tuple[int, int, int]:
|
||||
state_file = STATE_DIR / f"custodian_tokens_{session_id}.json"
|
||||
try:
|
||||
data = json.loads(state_file.read_text())
|
||||
return data.get("total_in", 0), data.get("total_out", 0)
|
||||
return data.get("total_in", 0), data.get("total_out", 0), data.get("cached_in", 0)
|
||||
except (OSError, json.JSONDecodeError):
|
||||
return 0, 0
|
||||
return 0, 0, 0
|
||||
|
||||
|
||||
def save_state(session_id: str, total_in: int, total_out: int) -> None:
|
||||
def save_state(session_id: str, total_in: int, total_out: int, cached_in: int) -> None:
|
||||
STATE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
state_file = STATE_DIR / f"custodian_tokens_{session_id}.json"
|
||||
state_file.write_text(json.dumps({"total_in": total_in, "total_out": total_out}))
|
||||
state_file.write_text(json.dumps({"total_in": total_in, "total_out": total_out, "cached_in": cached_in}))
|
||||
|
||||
|
||||
def api_get(path: str):
|
||||
@@ -75,51 +96,89 @@ def api_patch(path: str, data: dict):
|
||||
return json.loads(r.read())
|
||||
|
||||
|
||||
def extract_done_task(payload: dict) -> tuple[str | None, dict]:
|
||||
tool_name = payload.get("tool_name", "")
|
||||
if not any(hint in tool_name for hint in SUPPORTED_TOOL_HINTS):
|
||||
return None, {}
|
||||
|
||||
tool_input = payload.get("tool_input", {}) or {}
|
||||
status = tool_input.get("status")
|
||||
if status != "done":
|
||||
return None, {}
|
||||
|
||||
task_id = (
|
||||
tool_input.get("task_id")
|
||||
or tool_input.get("id")
|
||||
or tool_input.get("taskId")
|
||||
)
|
||||
return task_id, tool_input
|
||||
|
||||
|
||||
def main() -> None:
|
||||
try:
|
||||
payload = json.loads(sys.stdin.read())
|
||||
except json.JSONDecodeError:
|
||||
return
|
||||
|
||||
tool_name = payload.get("tool_name", "")
|
||||
if "update_task_status" not in tool_name:
|
||||
return
|
||||
|
||||
tool_input = payload.get("tool_input", {})
|
||||
if tool_input.get("status") != "done":
|
||||
return
|
||||
|
||||
task_id = tool_input.get("task_id")
|
||||
task_id, tool_input = extract_done_task(payload)
|
||||
if not task_id:
|
||||
write_health({"status": "skipped", "reason": "not_done_task_completion", "tool_name": payload.get("tool_name")})
|
||||
return
|
||||
|
||||
transcript_path = payload.get("transcript_path", "")
|
||||
session_id = payload.get("session_id", "unknown")
|
||||
|
||||
# Compute token delta for this task
|
||||
current_in, current_out = read_transcript_totals(transcript_path)
|
||||
last_in, last_out = load_state(session_id)
|
||||
current_in, current_out, current_cached = read_transcript_totals(transcript_path)
|
||||
last_in, last_out, last_cached = load_state(session_id)
|
||||
delta_in = max(0, current_in - last_in)
|
||||
delta_out = max(0, current_out - last_out)
|
||||
save_state(session_id, current_in, current_out)
|
||||
delta_cached = max(0, current_cached - last_cached)
|
||||
save_state(session_id, current_in, current_out, current_cached)
|
||||
|
||||
if delta_in == 0 and delta_out == 0:
|
||||
return # Nothing measurable — leave heuristic in place
|
||||
if delta_in == 0 and delta_out == 0 and delta_cached == 0:
|
||||
write_health({
|
||||
"status": "skipped",
|
||||
"reason": "zero_delta",
|
||||
"session_id": session_id,
|
||||
"task_id": task_id,
|
||||
"source_path": transcript_path,
|
||||
})
|
||||
return
|
||||
|
||||
# Find the most recent heuristic event for this task and replace it
|
||||
try:
|
||||
events = api_get(f"/token-events/?task_id={task_id}¬e=heuristic&limit=5")
|
||||
except (urllib.error.URLError, OSError):
|
||||
write_health({"status": "skipped", "reason": "api_offline", "session_id": session_id, "task_id": task_id})
|
||||
return # API offline — leave heuristic as-is
|
||||
|
||||
if not events:
|
||||
write_health({"status": "skipped", "reason": "no_fallback_event", "session_id": session_id, "task_id": task_id})
|
||||
return
|
||||
|
||||
event_id = events[0]["id"]
|
||||
model = tool_input.get("model")
|
||||
agent = tool_input.get("agent")
|
||||
|
||||
patch_body: dict = {"tokens_in": delta_in, "tokens_out": delta_out, "note": "measured"}
|
||||
patch_body: dict = {
|
||||
"tokens_in": delta_in,
|
||||
"tokens_out": delta_out,
|
||||
"note": "measured",
|
||||
"measurement_kind": "measured",
|
||||
"source_provider": "claude_transcript",
|
||||
"source_id": f"claude:{session_id}:task:{task_id}",
|
||||
"source_path": transcript_path or None,
|
||||
"parser_version": PARSER_VERSION,
|
||||
"confidence": 1.0,
|
||||
"cached_input_tokens": delta_cached,
|
||||
"raw_total_tokens": delta_in + delta_out + delta_cached,
|
||||
"raw_metadata": {
|
||||
"hook": "post_tool_use",
|
||||
"tool_name": payload.get("tool_name"),
|
||||
"state_dir": str(STATE_DIR),
|
||||
},
|
||||
}
|
||||
if model:
|
||||
patch_body["model"] = model
|
||||
if agent:
|
||||
@@ -128,7 +187,19 @@ def main() -> None:
|
||||
try:
|
||||
api_patch(f"/token-events/{event_id}", patch_body)
|
||||
except (urllib.error.URLError, OSError):
|
||||
pass
|
||||
write_health({"status": "skipped", "reason": "patch_failed", "session_id": session_id, "task_id": task_id})
|
||||
return
|
||||
|
||||
write_health({
|
||||
"status": "patched",
|
||||
"session_id": session_id,
|
||||
"task_id": task_id,
|
||||
"event_id": event_id,
|
||||
"tokens_in": delta_in,
|
||||
"tokens_out": delta_out,
|
||||
"cached_input_tokens": delta_cached,
|
||||
"source_path": transcript_path,
|
||||
})
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
239
scripts/token_reconcile.py
Normal file
239
scripts/token_reconcile.py
Normal file
@@ -0,0 +1,239 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Reconcile token evidence from local agent sources against State Hub.
|
||||
|
||||
Dry-run is the default. Use ``--apply`` to upsert measured source events and
|
||||
``--zero-superseded-fallbacks`` to zero task fallback rows that are covered by
|
||||
source-backed measurements.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
from collections import Counter, defaultdict
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
ROOT = Path(__file__).resolve().parent.parent
|
||||
if str(ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
from api.services.token_sources import collect_claude_transcripts, collect_codex_sessions, parse_iso # noqa: E402
|
||||
from api.services.token_sources.attribution import repo_refs_from_api, resolve_repo # noqa: E402
|
||||
|
||||
DEFAULT_API = os.environ.get("STATE_HUB_API", "http://127.0.0.1:8000")
|
||||
SUPERSEDED_HEURISTIC_NOTE = "heuristic_superseded_by_source_measurement"
|
||||
|
||||
|
||||
def http_json(api_base: str, method: str, path: str, body: dict[str, Any] | None = None) -> Any:
|
||||
url = f"{api_base.rstrip('/')}/{path.lstrip('/')}"
|
||||
data = None
|
||||
headers = {"Content-Type": "application/json"}
|
||||
if body is not None:
|
||||
data = json.dumps(body).encode("utf-8")
|
||||
req = urllib.request.Request(url, data=data, headers=headers, method=method)
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
return json.loads(resp.read() or b"null")
|
||||
|
||||
|
||||
def list_events(api_base: str, params: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
events: list[dict[str, Any]] = []
|
||||
offset = 0
|
||||
while True:
|
||||
encoded = urllib.parse.urlencode({**params, "limit": 1000, "offset": offset})
|
||||
page = http_json(api_base, "GET", f"/token-events/?{encoded}")
|
||||
if not isinstance(page, list) or not page:
|
||||
break
|
||||
events.extend(page)
|
||||
if len(page) < 1000:
|
||||
break
|
||||
offset += 1000
|
||||
return events
|
||||
|
||||
|
||||
def find_home(explicit: str | None, env_name: str, default: Path) -> Path | None:
|
||||
candidates: list[Path] = []
|
||||
if explicit:
|
||||
candidates.append(Path(explicit))
|
||||
env_home = os.environ.get(env_name)
|
||||
if env_home:
|
||||
candidates.append(Path(env_home))
|
||||
candidates.append(default)
|
||||
for candidate in candidates:
|
||||
if candidate.is_dir():
|
||||
return candidate
|
||||
return None
|
||||
|
||||
|
||||
def event_total(event: dict[str, Any]) -> int:
|
||||
return int(event.get("tokens_in") or 0) + int(event.get("tokens_out") or 0)
|
||||
|
||||
|
||||
def source_index(events: list[dict[str, Any]]) -> dict[str, dict[str, Any]]:
|
||||
by_source: dict[str, dict[str, Any]] = {}
|
||||
for event in events:
|
||||
source_id = event.get("source_id") or event.get("ref_id")
|
||||
if isinstance(source_id, str):
|
||||
by_source[source_id] = event
|
||||
return by_source
|
||||
|
||||
|
||||
def print_report(report: dict[str, Any]) -> None:
|
||||
print(json.dumps(report, indent=2, sort_keys=True, default=str))
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--since", default="2026-05-19", help="UTC date/time to reconcile from")
|
||||
parser.add_argument("--api-base", default=DEFAULT_API)
|
||||
parser.add_argument("--codex-home")
|
||||
parser.add_argument("--claude-home")
|
||||
parser.add_argument("--apply", action="store_true", help="upsert measured source events")
|
||||
parser.add_argument(
|
||||
"--zero-superseded-fallbacks",
|
||||
action="store_true",
|
||||
help="with --apply, zero heuristic fallback rows after measured source ingestion",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
since = parse_iso(args.since)
|
||||
since_param = since.isoformat()
|
||||
codex_home = find_home(args.codex_home, "CODEX_HOME", Path.home() / ".codex")
|
||||
if codex_home is None:
|
||||
windows_codex = Path("/mnt/c/Users/bernd.worsch/.codex")
|
||||
codex_home = windows_codex if windows_codex.is_dir() else None
|
||||
claude_home = find_home(args.claude_home, "CLAUDE_HOME", Path.home() / ".claude")
|
||||
|
||||
records = []
|
||||
source_health: dict[str, dict[str, Any]] = {}
|
||||
if codex_home:
|
||||
codex_records = collect_codex_sessions(codex_home, since)
|
||||
records.extend(codex_records)
|
||||
source_health["codex_session"] = {"home": str(codex_home), "sessions_found": len(codex_records)}
|
||||
else:
|
||||
source_health["codex_session"] = {"home": None, "sessions_found": 0, "warning": "Codex home not found"}
|
||||
if claude_home:
|
||||
claude_records = collect_claude_transcripts(claude_home, since)
|
||||
records.extend(claude_records)
|
||||
source_health["claude_transcript"] = {"home": str(claude_home), "sessions_found": len(claude_records)}
|
||||
else:
|
||||
source_health["claude_transcript"] = {"home": None, "sessions_found": 0, "warning": "Claude home not found"}
|
||||
|
||||
repos = repo_refs_from_api(http_json(args.api_base, "GET", "/repos/"))
|
||||
existing_events = list_events(args.api_base, {"since": since_param, "include_superseded": "true"})
|
||||
existing_by_source = source_index(existing_events)
|
||||
fallback_events = [
|
||||
event for event in existing_events
|
||||
if event.get("source_provider") == "task_fallback" or event.get("note") == "heuristic"
|
||||
]
|
||||
superseded_events = [
|
||||
event for event in existing_events
|
||||
if event.get("measurement_kind") == "superseded" or str(event.get("note") or "").startswith("heuristic_superseded")
|
||||
]
|
||||
|
||||
planned_upserts = []
|
||||
unattributed = 0
|
||||
stale = 0
|
||||
source_totals: dict[str, int] = defaultdict(int)
|
||||
for record in records:
|
||||
source_totals[record.source_provider] += record.tokens_total
|
||||
existing = existing_by_source.get(record.source_id)
|
||||
if existing and event_total(existing) >= record.tokens_total:
|
||||
continue
|
||||
if existing:
|
||||
stale += 1
|
||||
match = resolve_repo(record.cwd, repos)
|
||||
if match is None:
|
||||
unattributed += 1
|
||||
planned_upserts.append((record, match))
|
||||
|
||||
source_ids = [
|
||||
event.get("source_id")
|
||||
for event in existing_events
|
||||
if event.get("source_id") and event.get("measurement_kind") == "measured"
|
||||
]
|
||||
duplicate_sources = {
|
||||
source_id: count for source_id, count in Counter(source_ids).items() if count > 1
|
||||
}
|
||||
missing_provenance = [
|
||||
event for event in existing_events
|
||||
if event.get("measurement_kind") == "measured" and not event.get("source_id")
|
||||
]
|
||||
progress_events = http_json(args.api_base, "GET", f"/progress/?since={urllib.parse.quote(since_param)}&limit=1000")
|
||||
measured_total = sum(
|
||||
event_total(event)
|
||||
for event in existing_events
|
||||
if event.get("measurement_kind") == "measured"
|
||||
) + sum(record.tokens_total for record, _ in planned_upserts)
|
||||
canary_failed = bool(progress_events) and measured_total == 0
|
||||
|
||||
report = {
|
||||
"since": since.isoformat(),
|
||||
"apply": args.apply,
|
||||
"sources": source_health,
|
||||
"sessions_found": len(records),
|
||||
"source_tokens_total": dict(source_totals),
|
||||
"events_existing": len(existing_events),
|
||||
"events_to_upsert": len(planned_upserts),
|
||||
"sessions_stale": stale,
|
||||
"fallback_events": len(fallback_events),
|
||||
"superseded_events": len(superseded_events),
|
||||
"unattributed_source_records": unattributed,
|
||||
"missing_provenance_events": len(missing_provenance),
|
||||
"duplicate_source_ids": duplicate_sources,
|
||||
"progress_events": len(progress_events) if isinstance(progress_events, list) else 0,
|
||||
"measured_tokens_total_after_plan": measured_total,
|
||||
"canary_failed": canary_failed,
|
||||
}
|
||||
|
||||
if args.apply:
|
||||
for record, match in planned_upserts:
|
||||
payload = record.to_token_event_payload(repo_id=match.repo_id if match else None)
|
||||
payload["raw_metadata"] = {
|
||||
**payload.get("raw_metadata", {}),
|
||||
"repo_slug": match.slug if match else None,
|
||||
"attribution_method": match.method if match else None,
|
||||
}
|
||||
http_json(args.api_base, "POST", "/token-events/upsert", payload)
|
||||
if args.zero_superseded_fallbacks:
|
||||
for event in fallback_events:
|
||||
http_json(
|
||||
args.api_base,
|
||||
"PATCH",
|
||||
f"/token-events/{event['id']}",
|
||||
{
|
||||
"tokens_in": 0,
|
||||
"tokens_out": 0,
|
||||
"note": SUPERSEDED_HEURISTIC_NOTE,
|
||||
"measurement_kind": "superseded",
|
||||
"source_provider": "task_fallback",
|
||||
"confidence": 0.0,
|
||||
"raw_total_tokens": 0,
|
||||
},
|
||||
)
|
||||
http_json(
|
||||
args.api_base,
|
||||
"POST",
|
||||
"/progress/",
|
||||
{
|
||||
"summary": (
|
||||
"Token reconciliation: "
|
||||
f"{len(records)} source records, {len(planned_upserts)} upserts, "
|
||||
f"{len(fallback_events)} fallback events, canary_failed={canary_failed}"
|
||||
),
|
||||
"event_type": "token_reconciliation",
|
||||
"author": "codex",
|
||||
"detail": report,
|
||||
},
|
||||
)
|
||||
|
||||
print_report(report)
|
||||
return 1 if canary_failed else 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user