#!/usr/bin/env python3 """Observable data loader: token consumption summary by repo and workstream.""" import json import os import urllib.error import urllib.request API_BASE = os.environ.get("API_BASE", "http://127.0.0.1:8000").rstrip("/") def fetch(url: str): try: with urllib.request.urlopen(url, timeout=10) as resp: return json.loads(resp.read()) except urllib.error.URLError: return None # Fetch all repos and workstreams for scope resolution repos = fetch(f"{API_BASE}/repos/") or [] workstreams_raw = fetch(f"{API_BASE}/workstreams/?limit=500") or [] # Fetch all token events (up to 1000) for aggregation events = fetch(f"{API_BASE}/token-events/?limit=1000") or [] def aggregate(events, key_fn, label_fn): """Group token events by a key function and return aggregated records.""" groups: dict = {} for e in events: k = key_fn(e) if not k: continue if k not in groups: groups[k] = {"scope_id": k, "label": label_fn(k), "tokens_in": 0, "tokens_out": 0, "event_count": 0, "by_model": {}} groups[k]["tokens_in"] += e.get("tokens_in", 0) groups[k]["tokens_out"] += e.get("tokens_out", 0) groups[k]["event_count"] += 1 model = e.get("model") or "unknown" groups[k]["by_model"][model] = groups[k]["by_model"].get(model, 0) + e.get("tokens_in", 0) + e.get("tokens_out", 0) for v in groups.values(): v["tokens_total"] = v["tokens_in"] + v["tokens_out"] return sorted(groups.values(), key=lambda x: -x["tokens_total"]) repo_map = {r["id"]: r.get("slug", r["id"]) for r in repos} ws_map = {w["id"]: w.get("title", w["id"]) for w in workstreams_raw} by_repo = aggregate(events, lambda e: e.get("repo_id"), lambda k: repo_map.get(k, k)) by_workstream = aggregate(events, lambda e: e.get("workstream_id"), lambda k: ws_map.get(k, k)) # Top 10 tasks by tokens task_groups: dict = {} for e in events: tid = e.get("task_id") if not tid: continue if tid not in task_groups: task_groups[tid] = {"task_id": tid, "tokens_in": 0, "tokens_out": 0, "event_count": 0} task_groups[tid]["tokens_in"] += e.get("tokens_in", 0) task_groups[tid]["tokens_out"] += e.get("tokens_out", 0) task_groups[tid]["event_count"] += 1 for v in task_groups.values(): v["tokens_total"] = v["tokens_in"] + v["tokens_out"] top_tasks = sorted(task_groups.values(), key=lambda x: -x["tokens_total"])[:10] # Model breakdown across all events model_totals: dict = {} for e in events: model = e.get("model") or "unknown" model_totals[model] = model_totals.get(model, 0) + e.get("tokens_in", 0) + e.get("tokens_out", 0) by_model = [{"model": k, "tokens_total": v} for k, v in sorted(model_totals.items(), key=lambda x: -x[1])] print(json.dumps({ "by_repo": by_repo, "by_workstream": by_workstream, "top_tasks": top_tasks, "by_model": by_model, "total_events": len(events), }))