Optimize dashboard overview loading

This commit is contained in:
2026-06-06 00:42:00 +02:00
parent a412998c96
commit b340489d96
14 changed files with 990 additions and 88 deletions

View File

@@ -1,5 +1,6 @@
import hashlib
import os
import time
from contextlib import asynccontextmanager
from fastapi import FastAPI
@@ -26,26 +27,37 @@ class ETagMiddleware(BaseHTTPMiddleware):
"""Add ETag + conditional-GET (304) support to all JSON GET responses."""
async def dispatch(self, request: Request, call_next):
started = time.perf_counter()
response = await call_next(request)
if request.method != "GET":
response.headers["X-StateHub-Elapsed-Ms"] = f"{(time.perf_counter() - started) * 1000:.1f}"
return response
if "application/json" not in response.headers.get("content-type", ""):
response.headers["X-StateHub-Elapsed-Ms"] = f"{(time.perf_counter() - started) * 1000:.1f}"
return response
body_parts = []
async for chunk in response.body_iterator:
body_parts.append(chunk)
body = b"".join(body_parts)
elapsed_ms = f"{(time.perf_counter() - started) * 1000:.1f}"
etag = '"' + hashlib.md5(body, usedforsecurity=False).hexdigest() + '"'
if request.headers.get("if-none-match") == etag:
return StarletteResponse(
status_code=304,
headers={"ETag": etag, "Cache-Control": "no-cache"},
headers={
"ETag": etag,
"Cache-Control": "no-cache",
"X-StateHub-Elapsed-Ms": elapsed_ms,
"X-StateHub-Response-Bytes": "0",
},
)
headers = {k: v for k, v in response.headers.items() if k.lower() != "content-length"}
headers["ETag"] = etag
headers["X-StateHub-Elapsed-Ms"] = elapsed_ms
headers["X-StateHub-Response-Bytes"] = str(len(body))
if not any(k.lower() == "cache-control" for k in headers):
headers["Cache-Control"] = "no-cache"
return StarletteResponse(
@@ -84,7 +96,7 @@ app.add_middleware(
allow_origins=_cors_origins,
allow_methods=["GET", "POST", "PATCH", "DELETE", "PUT"],
allow_headers=["Content-Type", "If-None-Match"],
expose_headers=["ETag"],
expose_headers=["ETag", "X-StateHub-Elapsed-Ms", "X-StateHub-Response-Bytes", "X-StateHub-Cache"],
)
app.include_router(domains.router)

View File

@@ -1,7 +1,7 @@
import time
from datetime import datetime, timedelta, timezone
from fastapi import APIRouter, Depends, Request
from fastapi import APIRouter, Depends, Request, Response
from fastapi.responses import JSONResponse
from sqlalchemy import func, select, text
from sqlalchemy.ext.asyncio import AsyncSession
@@ -17,6 +17,7 @@ from api.models.extension_point import ExtensionPoint
from api.models.managed_repo import ManagedRepo
from api.models.progress_event import ProgressEvent
from api.models.sbom_entry import SBOMEntry
from api.models.sbom_snapshot import SBOMSnapshot
from api.models.task import Task, TaskPriority, TaskStatus
from api.models.technical_debt import TechnicalDebt
from api.models.topic import Topic, TopicStatus
@@ -26,6 +27,9 @@ from api.schemas.decision import DecisionRead
from api.schemas.domain import DomainSummary
from api.schemas.progress_event import ProgressEventRead
from api.schemas.state import (
DashboardOverview,
DashboardSourceMeta,
DashboardWorkplanRow,
DecisionTotals,
NextStep,
StateSummary,
@@ -38,6 +42,7 @@ from api.schemas.task import TaskRead
from api.schemas.topic import TopicRead, TopicWithWorkstreams
from api.schemas.workstream import WorkstreamRead, WorkstreamWithTaskCounts, WorkstreamWithDeps
from api.schemas.workstream_dependency import WorkstreamDepStub
from api.routers.workstreams import _workplan_index
from api.task_status import TERMINAL_TASK_STATUSES, status_value
from api.workplan_status import (
CLOSED_WORKSTREAM_STATUSES,
@@ -51,17 +56,25 @@ router = APIRouter(prefix="/state", tags=["state"])
_SUMMARY_CACHE: StateSummary | None = None
_SUMMARY_CACHE_AT: float = 0.0
_SUMMARY_TTL = 15.0
_OVERVIEW_CACHE: DashboardOverview | None = None
_OVERVIEW_CACHE_AT: float = 0.0
_OVERVIEW_TTL = 10.0
@router.get("/summary", response_model=StateSummary)
async def get_summary(
request: Request,
response: Response,
session: AsyncSession = Depends(get_session),
) -> StateSummary:
global _SUMMARY_CACHE, _SUMMARY_CACHE_AT
no_cache = "no-cache" in request.headers.get("cache-control", "")
if not no_cache and _SUMMARY_CACHE is not None and (time.monotonic() - _SUMMARY_CACHE_AT) < _SUMMARY_TTL:
response.headers["X-StateHub-Cache"] = "hit"
response.headers["Cache-Control"] = "max-age=15, stale-while-revalidate=30"
return _SUMMARY_CACHE
response.headers["X-StateHub-Cache"] = "miss"
response.headers["Cache-Control"] = "max-age=15, stale-while-revalidate=30"
# Run all queries sequentially on one session.
# AsyncSession does not support concurrent operations (no gather on same session).
@@ -362,6 +375,309 @@ async def get_summary(
return result
@router.get("/overview", response_model=DashboardOverview)
async def get_overview(
request: Request,
response: Response,
session: AsyncSession = Depends(get_session),
) -> DashboardOverview:
"""Bounded dashboard overview read model.
This is intentionally narrower than /state/summary. The dashboard overview
needs counts, recent rows, and chart-ready workplan rows; it does not need
full task or workplan lists transferred to the browser on every poll.
"""
global _OVERVIEW_CACHE, _OVERVIEW_CACHE_AT
no_cache = "no-cache" in request.headers.get("cache-control", "")
if not no_cache and _OVERVIEW_CACHE is not None and (time.monotonic() - _OVERVIEW_CACHE_AT) < _OVERVIEW_TTL:
response.headers["X-StateHub-Cache"] = "hit"
response.headers["Cache-Control"] = "max-age=10, stale-while-revalidate=30"
return _OVERVIEW_CACHE
response.headers["X-StateHub-Cache"] = "miss"
response.headers["Cache-Control"] = "max-age=10, stale-while-revalidate=30"
result = await _build_dashboard_overview(session)
_OVERVIEW_CACHE = result
_OVERVIEW_CACHE_AT = time.monotonic()
return result
async def _build_dashboard_overview(session: AsyncSession) -> DashboardOverview:
topics_rows = await session.execute(
select(Topic)
.options(
selectinload(Topic.domain),
noload(Topic.workstreams),
noload(Topic.decisions),
noload(Topic.progress_events),
)
.where(Topic.status != TopicStatus.archived)
.order_by(Topic.created_at)
)
topics = list(topics_rows.scalars().all())
topic_map = {topic.id: topic for topic in topics}
workstream_rows = await session.execute(
select(Workstream)
.options(noload("*"))
.order_by(
Workstream.planning_priority.asc().nullslast(),
Workstream.planning_order.asc().nullslast(),
Workstream.updated_at.desc(),
)
)
workstreams_all = list(workstream_rows.scalars().all())
topic_workstreams: dict = {t.id: [] for t in topics}
for w in sorted(workstreams_all, key=lambda item: item.created_at):
if w.topic_id not in topic_workstreams:
continue
topic_workstreams[w.topic_id].append({
"id": w.id,
"slug": w.slug,
"title": w.title,
"status": w.status,
"owner": w.owner,
"due_date": w.due_date,
})
repo_rows = await session.execute(
select(ManagedRepo.id, ManagedRepo.slug, Domain.slug)
.join(Domain, Domain.id == ManagedRepo.domain_id)
.order_by(ManagedRepo.slug)
)
repo_map = {
repo_id: {"slug": repo_slug, "domain_slug": domain_slug}
for repo_id, repo_slug, domain_slug in repo_rows
}
task_counts_by_ws: dict = {}
task_statuses_per_ws: dict = {}
task_totals_by_status: dict[str, int] = {}
for ws_id, task_status, count in await session.execute(
select(Task.workstream_id, Task.status, func.count()).group_by(Task.workstream_id, Task.status)
):
status = status_value(task_status)
task_counts_by_ws.setdefault(ws_id, {"done": 0, "progress": 0, "wait": 0, "todo": 0, "total": 0})
task_counts_by_ws[ws_id]["total"] += count
if status in {"done", "progress", "wait", "todo"}:
task_counts_by_ws[ws_id][status] += count
task_statuses_per_ws.setdefault(ws_id, []).extend([status] * count)
task_totals_by_status[status] = task_totals_by_status.get(status, 0) + count
open_ws = [
w for w in workstreams_all
if normalize_workstream_status(w.status) in OPEN_WORKSTREAM_STATUSES
]
open_ws_ids = [w.id for w in open_ws]
dep_rows = []
if open_ws_ids:
dep_result = await session.execute(
select(WorkstreamDependency).where(
(WorkstreamDependency.from_workstream_id.in_(open_ws_ids))
| (WorkstreamDependency.to_workstream_id.in_(open_ws_ids))
)
)
dep_rows = list(dep_result.scalars().all())
ws_lookup = {w.id: w for w in workstreams_all}
workstream_flow = load_flow("workstream")
flow_engine = FlowEngine()
effective_status: dict = {}
for w in open_ws:
flow_obj = {
"status": w.status,
"workstation": w.status,
"tasks": [{"status": status} for status in task_statuses_per_ws.get(w.id, [])],
"dependencies": [
{"workstation": normalize_workstream_status(ws_lookup[d.to_workstream_id].status)}
for d in dep_rows
if d.from_workstream_id == w.id and d.to_workstream_id and d.to_workstream_id in ws_lookup
],
}
flow_result = flow_engine.evaluate(flow_obj, workstream_flow)
effective_status[w.id] = "blocked" if flow_result.exit_blocked else normalize_workstream_status(w.status)
topic_counts = {r[0]: r[1] for r in await session.execute(
select(Topic.status, func.count()).group_by(Topic.status)
)}
ws_counts = {r[0]: r[1] for r in await session.execute(
select(Workstream.status, func.count()).group_by(Workstream.status)
)}
dec_counts = {r[0]: r[1] for r in await session.execute(
select(Decision.status, func.count()).group_by(Decision.status)
)}
totals = Totals(
topics=TopicTotals(
active=topic_counts.get(TopicStatus.active, 0),
paused=topic_counts.get(TopicStatus.paused, 0),
archived=topic_counts.get(TopicStatus.archived, 0),
total=sum(topic_counts.values()),
),
workstreams=WorkstreamTotals(
proposed=ws_counts.get("proposed", 0),
ready=ws_counts.get("ready", 0) + ws_counts.get("todo", 0),
active=sum(1 for status in effective_status.values() if status == "active"),
blocked=sum(1 for status in effective_status.values() if status == "blocked"),
backlog=ws_counts.get("backlog", 0),
finished=(
ws_counts.get("finished", 0)
+ ws_counts.get("completed", 0)
+ ws_counts.get("accepted", 0)
),
archived=ws_counts.get("archived", 0),
total=sum(ws_counts.values()),
),
tasks=TaskTotals(
wait=task_totals_by_status.get("wait", 0),
todo=task_totals_by_status.get("todo", 0),
progress=task_totals_by_status.get("progress", 0),
done=task_totals_by_status.get("done", 0),
cancel=task_totals_by_status.get("cancel", 0),
total=sum(task_totals_by_status.values()),
),
decisions=DecisionTotals(
open=dec_counts.get(DecisionStatus.open, 0),
resolved=dec_counts.get(DecisionStatus.resolved, 0),
escalated=dec_counts.get(DecisionStatus.escalated, 0),
superseded=dec_counts.get(DecisionStatus.superseded, 0),
total=sum(dec_counts.values()),
),
)
blocking_rows = await session.execute(
select(Decision)
.where(Decision.decision_type == DecisionType.pending)
.where(Decision.status.in_([DecisionStatus.open, DecisionStatus.escalated]))
.order_by(Decision.deadline.asc().nullslast(), Decision.created_at)
)
blocking = list(blocking_rows.scalars().all())
waiting_rows = await session.execute(
select(Task).options(noload("*")).where(Task.status == TaskStatus.wait).order_by(Task.created_at)
)
waiting = list(waiting_rows.scalars().all())
recent_rows = await session.execute(
select(ProgressEvent).options(noload("*")).order_by(ProgressEvent.created_at.desc()).limit(20)
)
recent = list(recent_rows.scalars().all())
milestone_rows = await session.execute(
select(ProgressEvent)
.options(noload("*"))
.where(ProgressEvent.event_type == "milestone")
.where(ProgressEvent.summary.like("Project registered with State Hub:%"))
.order_by(ProgressEvent.created_at.desc())
.limit(500)
)
registration_milestones = list(milestone_rows.scalars().all())
contrib_type_counts = {r[0].value: r[1] for r in await session.execute(
select(Contribution.type, func.count()).group_by(Contribution.type)
)}
contrib_status_counts = {r[0].value: r[1] for r in await session.execute(
select(Contribution.status, func.count()).group_by(Contribution.status)
)}
contribution_counts = {**contrib_type_counts, **contrib_status_counts}
_COPYLEFT_PATS = ("GPL", "AGPL", "LGPL", "EUPL", "CDDL", "MPL")
all_direct_prod_rows = await session.execute(
select(SBOMEntry.license_spdx)
.where(SBOMEntry.is_direct.is_(True))
.where(SBOMEntry.is_dev.is_(False))
)
licence_risk_count = sum(
1 for (lic,) in all_direct_prod_rows.all()
if lic and any(pat in lic.upper() for pat in _COPYLEFT_PATS)
)
snapshot_count, package_total = (await session.execute(
select(
func.count(SBOMSnapshot.id),
func.coalesce(func.sum(SBOMSnapshot.entry_count), 0),
)
)).one()
open_cap_req_count = (await session.execute(
select(func.count()).select_from(CapabilityRequest).where(
CapabilityRequest.status.in_(["requested", "accepted", "in_progress", "ready_for_review"])
)
)).scalar() or 0
sources: dict[str, DashboardSourceMeta] = {}
try:
workplan_index = await _workplan_index(refresh=False, session=session)
workplan_map = workplan_index.get("workstreams", {})
index_meta = workplan_index.get("_meta", {})
sources["workplan_index"] = DashboardSourceMeta(
ok=not bool(index_meta.get("last_error")),
stale=bool(index_meta.get("stale")),
cache_age_seconds=index_meta.get("cache_age_seconds"),
refresh_in_progress=bool(index_meta.get("refresh_in_progress")),
error=index_meta.get("last_error"),
)
except Exception as exc:
workplan_map = {}
sources["workplan_index"] = DashboardSourceMeta(ok=False, error=str(exc))
workplan_rows: list[DashboardWorkplanRow] = []
for w in workstreams_all:
repo = repo_map.get(w.repo_id)
topic = topic_map.get(w.topic_id)
workplan = workplan_map.get(str(w.id), {})
counts = task_counts_by_ws.get(w.id, {"done": 0, "progress": 0, "wait": 0, "todo": 0, "total": 0})
workplan_rows.append(DashboardWorkplanRow(
id=w.id,
title=w.title,
status=normalize_workstream_status(w.status),
domain=repo["domain_slug"] if repo else (topic.domain_slug if topic else "unknown"),
repo_label=repo["slug"] if repo else workplan.get("repo_slug", "unassigned"),
workplan_filename=workplan.get("filename"),
workplan_relative_path=workplan.get("relative_path"),
workplan_archived=bool(workplan.get("archived", False)),
health_labels=workplan.get("health_labels", []),
href=f"./workstreams/{w.id}",
done=counts.get("done", 0),
progress=counts.get("progress", 0),
wait=counts.get("wait", 0),
todo=counts.get("todo", 0),
total=counts.get("total", 0),
created_at=w.created_at,
updated_at=w.updated_at,
))
return DashboardOverview(
generated_at=datetime.now(tz=timezone.utc),
totals=totals,
topics=[
TopicWithWorkstreams(
**TopicRead.model_validate(t).model_dump(),
workstreams=topic_workstreams.get(t.id, []),
)
for t in topics
],
blocking_decisions=[DecisionRead.model_validate(d) for d in blocking],
waiting_tasks=[TaskRead.model_validate(t) for t in waiting],
blocked_tasks=[TaskRead.model_validate(t) for t in waiting],
recent_progress=[ProgressEventRead.model_validate(e) for e in recent],
next_steps=await _derive_next_steps(session),
contribution_counts=contribution_counts,
licence_risk_count=licence_risk_count,
open_capability_requests=open_cap_req_count,
sbom_snapshot_count=int(snapshot_count or 0),
sbom_package_total=int(package_total or 0),
registration_milestones=[ProgressEventRead.model_validate(e) for e in registration_milestones],
workplan_rows=workplan_rows,
sources=sources,
diagnostics={
"workplan_row_count": len(workplan_rows),
"task_count_strategy": "grouped",
},
)
async def _build_domain_summaries(session: AsyncSession) -> list[DomainSummary]:
"""Compute per-domain stats for the state summary."""
domains_rows = await session.execute(

View File

@@ -9,7 +9,7 @@ from api.database import get_session
from api.models.task import Task, TaskStatus
from api.models.token_event import TokenEvent
from api.models.workstream import Workstream
from api.schemas.task import TaskCreate, TaskRead, TaskUpdate
from api.schemas.task import TaskCountRead, TaskCreate, TaskRead, TaskUpdate
from api.services.lifecycle import status_value, transition_task_status
from api.task_status import normalize_task_status
@@ -24,6 +24,8 @@ async def list_tasks(
needs_human: bool | None = Query(None),
priority: str | None = None,
due_date_before: date | None = None,
limit: int | None = Query(None, ge=1, le=5000),
offset: int = Query(0, ge=0),
session: AsyncSession = Depends(get_session),
) -> list[Task]:
q = select(Task)
@@ -40,10 +42,32 @@ async def list_tasks(
if due_date_before is not None:
q = q.where(Task.due_date <= due_date_before)
q = q.order_by(Task.created_at)
if offset:
q = q.offset(offset)
if limit is not None:
q = q.limit(limit)
result = await session.execute(q)
return list(result.scalars().all())
@router.get("/counts", response_model=list[TaskCountRead])
async def count_tasks(
workstream_id: uuid.UUID | None = None,
status: str | None = None,
session: AsyncSession = Depends(get_session),
) -> list[TaskCountRead]:
q = select(Task.workstream_id, Task.status, func.count()).group_by(Task.workstream_id, Task.status)
if workstream_id:
q = q.where(Task.workstream_id == workstream_id)
if status:
q = q.where(Task.status == TaskStatus(normalize_task_status(status)))
rows = await session.execute(q)
return [
TaskCountRead(workstream_id=ws_id, status=task_status, count=count)
for ws_id, task_status, count in rows
]
@router.post("/", response_model=TaskRead, status_code=status.HTTP_201_CREATED)
async def create_task(
body: TaskCreate,

View File

@@ -3,6 +3,7 @@ import logging
import uuid
import socket
import time
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
@@ -40,6 +41,8 @@ workplan_router = APIRouter(prefix="/workplans", tags=["workplans"])
_INDEX_CACHE: dict[str, Any] | None = None
_INDEX_CACHE_AT: float = 0.0
_INDEX_TTL = 30.0
_INDEX_REFRESH_TASK: asyncio.Task | None = None
_INDEX_LAST_ERROR: str | None = None
_LEGACY_OWNER = "state-hub.api"
_COMPLETED_WORKSTREAM_EVENT = "org.statehub.workstream.completed"
@@ -170,16 +173,7 @@ async def _list_workstreams(
return list(result.scalars().all())
async def _workplan_index(
*,
refresh: bool,
session: AsyncSession,
) -> dict[str, Any]:
"""Map file-backed workplan ids to their local workplan filenames."""
global _INDEX_CACHE, _INDEX_CACHE_AT
if not refresh and _INDEX_CACHE is not None and (time.monotonic() - _INDEX_CACHE_AT) < _INDEX_TTL:
return _INDEX_CACHE
async def _build_workplan_index(session: AsyncSession) -> dict[str, Any]:
result = await session.execute(
select(ManagedRepo).where(ManagedRepo.status == "active").order_by(ManagedRepo.slug)
)
@@ -218,8 +212,78 @@ async def _workplan_index(
"needs_review": bool(review and review.needs_review),
"health_labels": ["needs_review"] if review and review.needs_review else [],
}
_INDEX_CACHE = {"workplans": index, "workstreams": index}
return {"workplans": index, "workstreams": index}
def _index_with_meta(*, stale: bool, refresh_in_progress: bool) -> dict[str, Any]:
age = time.monotonic() - _INDEX_CACHE_AT if _INDEX_CACHE_AT else None
return {
**(_INDEX_CACHE or {"workplans": {}, "workstreams": {}}),
"_meta": {
"generated_at": _INDEX_CACHE.get("_meta", {}).get("generated_at") if _INDEX_CACHE else None,
"stale": stale,
"cache_age_seconds": round(age, 3) if age is not None else None,
"refresh_in_progress": refresh_in_progress,
"last_error": _INDEX_LAST_ERROR,
},
}
async def _refresh_workplan_index_background() -> None:
global _INDEX_CACHE, _INDEX_CACHE_AT, _INDEX_LAST_ERROR
from api.database import async_session_factory
try:
async with async_session_factory() as session:
index = await _build_workplan_index(session)
index["_meta"] = {
"generated_at": datetime.now(timezone.utc).isoformat(),
"stale": False,
"cache_age_seconds": 0.0,
"refresh_in_progress": False,
"last_error": None,
}
_INDEX_CACHE = index
_INDEX_CACHE_AT = time.monotonic()
_INDEX_LAST_ERROR = None
except Exception as exc:
_INDEX_LAST_ERROR = str(exc)
def _ensure_index_refresh_started() -> None:
global _INDEX_REFRESH_TASK
if _INDEX_REFRESH_TASK is not None and not _INDEX_REFRESH_TASK.done():
return
_INDEX_REFRESH_TASK = asyncio.create_task(_refresh_workplan_index_background())
async def _workplan_index(
*,
refresh: bool,
session: AsyncSession,
) -> dict[str, Any]:
"""Map file-backed workplan ids to their local workplan filenames."""
global _INDEX_CACHE, _INDEX_CACHE_AT, _INDEX_LAST_ERROR
cache_age = time.monotonic() - _INDEX_CACHE_AT if _INDEX_CACHE_AT else None
if not refresh and _INDEX_CACHE is not None and cache_age is not None and cache_age < _INDEX_TTL:
refresh_running = _INDEX_REFRESH_TASK is not None and not _INDEX_REFRESH_TASK.done()
return _index_with_meta(stale=False, refresh_in_progress=refresh_running)
if not refresh and _INDEX_CACHE is not None:
_ensure_index_refresh_started()
return _index_with_meta(stale=True, refresh_in_progress=True)
index = await _build_workplan_index(session)
index["_meta"] = {
"generated_at": datetime.now(timezone.utc).isoformat(),
"stale": False,
"cache_age_seconds": 0.0,
"refresh_in_progress": False,
"last_error": None,
}
_INDEX_CACHE = index
_INDEX_CACHE_AT = time.monotonic()
_INDEX_LAST_ERROR = None
return _INDEX_CACHE

View File

@@ -1,5 +1,6 @@
import uuid
from datetime import datetime
from typing import Any
from pydantic import BaseModel
@@ -84,3 +85,51 @@ class StateSummary(BaseModel):
contribution_counts: dict[str, int] = {}
licence_risk_count: int = 0
open_capability_requests: int = 0
class DashboardWorkplanRow(BaseModel):
id: uuid.UUID
title: str
status: str
domain: str = "unknown"
repo_label: str = "unassigned"
workplan_filename: str | None = None
workplan_relative_path: str | None = None
workplan_archived: bool = False
health_labels: list[str] = []
href: str
done: int = 0
progress: int = 0
wait: int = 0
todo: int = 0
total: int = 0
created_at: datetime
updated_at: datetime
class DashboardSourceMeta(BaseModel):
ok: bool = True
stale: bool = False
cache_age_seconds: float | None = None
refresh_in_progress: bool = False
error: str | None = None
class DashboardOverview(BaseModel):
generated_at: datetime
totals: Totals
topics: list[TopicWithWorkstreams]
blocking_decisions: list[DecisionRead]
waiting_tasks: list[TaskRead]
blocked_tasks: list[TaskRead] = []
recent_progress: list[ProgressEventRead]
next_steps: list[NextStep] = []
contribution_counts: dict[str, int] = {}
licence_risk_count: int = 0
open_capability_requests: int = 0
sbom_snapshot_count: int = 0
sbom_package_total: int = 0
registration_milestones: list[ProgressEventRead] = []
workplan_rows: list[DashboardWorkplanRow] = []
sources: dict[str, DashboardSourceMeta] = {}
diagnostics: dict[str, Any] = {}

View File

@@ -93,3 +93,9 @@ class TaskRead(TaskStatusMixin):
parent_task_id: uuid.UUID | None = None
created_at: datetime
updated_at: datetime
class TaskCountRead(TaskStatusMixin):
workstream_id: uuid.UUID
status: TaskStatus
count: int

View File

@@ -89,11 +89,23 @@ export async function waitForVisible(ms) {
export async function apiFetch(path, options = {}) {
const url = path.startsWith("http") ? path : `${API}${path}`;
const timeout = options.timeout ?? FETCH_TIMEOUT;
const {timeout: _timeout, ...fetchOptions} = options;
const {timeout: _timeout, cache = "no-store", ...fetchOptions} = options;
const ctrl = new AbortController();
const timer = setTimeout(() => ctrl.abort(), timeout);
let timedOut = false;
const timer = setTimeout(() => {
timedOut = true;
ctrl.abort();
}, timeout);
try {
return await fetch(url, {cache: "no-store", ...fetchOptions, signal: ctrl.signal});
return await fetch(url, {cache, ...fetchOptions, signal: ctrl.signal});
} catch (error) {
if (timedOut || error?.name === "AbortError") {
const message = `Request timed out after ${Math.round(timeout / 1000)}s: ${url}`;
const timeoutError = new Error(message);
timeoutError.name = "TimeoutError";
throw timeoutError;
}
throw error;
} finally {
clearTimeout(timer);
}

View File

@@ -10,7 +10,10 @@ All dashboard pages poll the State Hub API automatically. No manual refresh is e
## Poll interval
Every page fetches fresh data from `http://127.0.0.1:8000` every **15 seconds** using an async generator loop. The previous data stays visible while the next request is in flight, so the UI never goes blank.
Most live pages fetch fresh data from `http://127.0.0.1:8000` every **15 seconds**
using an async generator loop. The overview page uses a heavier bounded read
model and refreshes every **60 seconds**. The previous data stays visible while
the next request is in flight, so the UI never goes blank.
---
@@ -21,6 +24,7 @@ The **●** dot in the top-right corner of each page shows the current connectio
| Indicator | Meaning |
|---|---|
| **● Live · updated HH:MM:SS** | Last poll succeeded — data is current as of that time |
| **● Stale · last successful update HH:MM:SS** | Last refresh failed, but cached page data is still visible |
| **● Offline — run: `make api`** | API is unreachable — the dot turns red |
The timestamp updates on every successful poll. If you see a time that is more than ~30 seconds in the past, the poll is stalled (browser tab backgrounded or network issue) — reloading the page resets the loop.
@@ -48,7 +52,7 @@ make api # db + migrate + uvicorn (restarts if already running)
| Page | Endpoints |
|---|---|
| Overview | `/state/summary` |
| Overview | `/state/overview`, `/decisions/?decision_type=pending` |
| Workplans | `/workplans/`, `/topics/`, `/state/summary` |
| Decisions | `/decisions/?limit=500`, `/topics/` |
| Progress | `/progress/?limit=500` |
@@ -57,4 +61,4 @@ All endpoints are read-only GET requests. The dashboard never writes to the API.
---
*Poll interval: 15 s. Data is refreshed in the background — the page never reloads itself.*
*Poll interval: 15 s for most pages, 60 s for Overview. Data is refreshed in the background — the page never reloads itself.*

View File

@@ -82,9 +82,13 @@ and summary.
## Data source
Polls `GET /state/summary` every **15 seconds**. The workstream chart also polls
`GET /workplans/`, `GET /tasks/?limit=2000`, `GET /topics/`, `GET /repos/`,
and `GET /workplans/index` for repository grouping, task counts, and
workplan filename tooltips. Blocking decisions are fetched separately via
`GET /decisions/?decision_type=pending` and only re-fetched after a successful
resolve action — this prevents the inline form from being wiped on every poll.
Polls `GET /state/overview` every **60 seconds**. This endpoint is a bounded
dashboard read model: it returns summary totals, recent activity, registration
milestones, SBOM totals, and chart-ready workplan rows with task counts already
aggregated server-side.
The page keeps the last successful overview response visible if a refresh times
out, and marks the view stale instead of clearing the dashboard. Blocking
decisions are fetched separately via `GET /decisions/?decision_type=pending`
and only re-fetched after a successful resolve action — this prevents the inline
form from being wiped on every poll.

View File

@@ -14,11 +14,15 @@ import {
```
```js
// Single polling loop — fetches all data in one Promise.all batch, backs off uniformly.
// Single polling loop — loads one bounded overview read model and keeps
// last-known-good data visible if a refresh times out.
const pageState = (async function*() {
let failures = 0;
let lastGood = null;
while (true) {
let summary = {}, snapshots = [], totalPkgs = 0, milestones = [], wsAll = [], ok = false;
let nextState = lastGood
? {...lastGood, ok: false, stale: true, error: null}
: {summary: {}, snapshots: [], snapshotCount: 0, totalPkgs: 0, milestones: [], wsAll: [], ok: false, stale: false, error: null, sources: {}, ts: new Date()};
try {
const loadJson = async (name, path, options = {}) => {
const response = await apiFetch(path, options);
@@ -26,67 +30,71 @@ const pageState = (async function*() {
return response.json();
};
const [
summaryData,
snapList,
allEvents,
wsList,
taskList,
topicList,
repoList,
workplanIndex,
] = await Promise.all([
loadJson("summary", "/state/summary", {timeout: 20_000}),
loadJson("sbom snapshots", "/sbom/snapshots/"),
loadJson("milestones", "/progress/?event_type=milestone&limit=500"),
loadJson("workplans", "/workplans/"),
loadJson("tasks", "/tasks/?limit=2000"),
loadJson("topics", "/topics/"),
loadJson("repos", "/repos/"),
loadJson("workplan index", "/workplans/index").catch(() => ({workplans: {}, workstreams: {}})),
]);
const overview = await loadJson("overview", "/state/overview", {timeout: 20_000, cache: "reload"});
ok = true;
summary = summaryData;
snapshots = snapList;
totalPkgs = snapshots.reduce((s, sn) => s + (sn.entry_count ?? 0), 0);
milestones = allEvents.filter(e => e.summary?.startsWith("Project registered with State Hub:"));
const workplanMap = workplanIndex.workstreams ?? {};
const topicMap = Object.fromEntries(topicList.map(t => [t.id, t]));
const repoMap = Object.fromEntries(repoList.map(r => [r.id, r]));
const counts = {};
for (const t of taskList) {
const wid = t.workstream_id;
if (!counts[wid]) counts[wid] = {done: 0, progress: 0, wait: 0, todo: 0, total: 0};
counts[wid].total++;
if (t.status === "done") counts[wid].done++;
else if (t.status === "progress") counts[wid].progress++;
else if (t.status === "wait") counts[wid].wait++;
else if (t.status === "todo") counts[wid].todo++;
}
wsAll = wsList.map(w => {
const repo = repoMap[w.repo_id];
const topic = topicMap[w.topic_id];
const workplan = workplanMap[w.id] ?? {};
return {
const summaryData = {
generated_at: overview.generated_at,
totals: overview.totals ?? {},
topics: overview.topics ?? [],
blocking_decisions: overview.blocking_decisions ?? [],
waiting_tasks: overview.waiting_tasks ?? [],
blocked_tasks: overview.blocked_tasks ?? overview.waiting_tasks ?? [],
recent_progress: overview.recent_progress ?? [],
next_steps: overview.next_steps ?? [],
contribution_counts: overview.contribution_counts ?? {},
licence_risk_count: overview.licence_risk_count ?? 0,
open_capability_requests: overview.open_capability_requests ?? 0,
};
nextState = {
summary: summaryData,
snapshots: [],
snapshotCount: overview.sbom_snapshot_count ?? 0,
totalPkgs: overview.sbom_package_total ?? 0,
milestones: overview.registration_milestones ?? [],
wsAll: (overview.workplan_rows ?? []).map(w => ({
...w,
status: normalizeWorkstreamStatus(w.status),
domain: repo?.domain_slug ?? topic?.domain_slug ?? "unknown",
repo_label: repo?.slug ?? workplan.repo_slug ?? "unassigned",
workplan_filename: workplan.filename ?? null,
workplan_relative_path: workplan.relative_path ?? null,
workplan_archived: workplan.archived ?? false,
health_labels: workplan.health_labels ?? [],
href: `./workstreams/${w.id}`,
...(counts[w.id] ?? {done: 0, progress: 0, wait: 0, todo: 0, total: 0}),
};
});
})),
ok: true,
stale: false,
error: null,
sources: overview.sources ?? {},
ts: new Date(),
};
lastGood = nextState;
} catch (e) {
summary = {error: `Dashboard data load failed: ${e?.message ?? String(e)}`};
const message = `Dashboard refresh failed: ${e?.message ?? String(e)}`;
if (lastGood) {
nextState = {
...lastGood,
ok: false,
stale: true,
error: `${message}; showing last successful data from ${lastGood.ts?.toLocaleTimeString?.() ?? "previous refresh"}`,
summary: {
...(lastGood.summary ?? {}),
error: `${message}; showing last successful data from ${lastGood.ts?.toLocaleTimeString?.() ?? "previous refresh"}`,
},
};
} else {
nextState = {
summary: {error: message},
snapshots: [],
snapshotCount: 0,
totalPkgs: 0,
milestones: [],
wsAll: [],
ok: false,
stale: false,
error: message,
sources: {},
ts: new Date(),
};
}
}
failures = ok ? 0 : failures + 1;
yield {summary, snapshots, totalPkgs, milestones, wsAll, ok, ts: new Date()};
await waitForVisible(pollDelay({ok, base: POLL_HEAVY, failures}));
failures = nextState.ok ? 0 : failures + 1;
yield nextState;
await waitForVisible(pollDelay({ok: nextState.ok, base: POLL_HEAVY, failures}));
}
})();
```
@@ -94,6 +102,7 @@ const pageState = (async function*() {
```js
const summary = pageState.summary ?? {};
const _ok = pageState.ok ?? false;
const _stale = pageState.stale ?? false;
const _ts = pageState.ts;
const totals = summary.totals ?? {};
const ws = totals.workstreams ?? {};
@@ -107,7 +116,7 @@ const wsAll = pageState.wsAll ?? [];
// Kept separate from the main poll so in-progress form inputs aren't wiped every 60 s.
const blockingDecisions = Mutable([]);
const refreshDecisions = async () => {
const r = await fetch(`${API}/decisions/?decision_type=pending`).catch(() => null);
const r = await apiFetch("/decisions/?decision_type=pending", {timeout: 12_000}).catch(() => null);
const all = r?.ok ? await r.json() : [];
blockingDecisions.value = all.filter(d => ["open", "escalated"].includes(d.status));
};
@@ -121,9 +130,11 @@ import {injectTocTop} from "./components/toc-sidebar.js";
import {withDocHelp} from "./components/doc-overlay.js";
const _liveEl = html`<div class="live-indicator">
<span style="color:${_ok ? 'var(--theme-foreground-focus)' : 'red'}">●</span>
<span style="color:${_ok ? 'var(--theme-foreground-focus)' : _stale ? 'orange' : 'red'}">●</span>
${_ok
? `Live · updated ${_ts?.toLocaleTimeString()}`
: _stale
? `Stale · last successful update ${_ts?.toLocaleTimeString()}`
: html`<span style="color:red">Offline — run: <code>cd ~/state-hub && make api</code></span>`}
</div>`;
withDocHelp(_liveEl, "/docs/live-data");
@@ -346,6 +357,7 @@ const licenceRisk = summary.licence_risk_count ?? 0;
const totalContribs = ["br","fr","ep","upr"].reduce((s, t) => s + (contribCounts[t] ?? 0), 0);
const needsFollowUp = (contribCounts["submitted"] ?? 0) + (contribCounts["acknowledged"] ?? 0);
const sbomSnaps = pageState.snapshots ?? [];
const sbomSnapCount = pageState.snapshotCount ?? sbomSnaps.length;
const totalPkgs = pageState.totalPkgs ?? 0;
display(html`<div class="grid grid-cols-3" style="gap:1rem;margin-bottom:1.5rem">
@@ -362,7 +374,7 @@ display(html`<div class="grid grid-cols-3" style="gap:1rem;margin-bottom:1.5rem"
<a class="card card-link ${licenceRisk > 0 ? 'warn' : ''}" href="./sbom">
<h3>SBOM</h3>
<p class="big-num">${totalPkgs.toLocaleString()}</p>
<small>${sbomSnaps.length} repo${sbomSnaps.length !== 1 ? "s" : ""} tracked · ${licenceRisk > 0 ? html`<span style="color:red">${licenceRisk} copyleft risks</span>` : html`<span style="color:green">✓ no copyleft</span>`}</small>
<small>${sbomSnapCount} snapshot${sbomSnapCount !== 1 ? "s" : ""} tracked · ${licenceRisk > 0 ? html`<span style="color:red">${licenceRisk} copyleft risks</span>` : html`<span style="color:green">✓ no copyleft</span>`}</small>
</a>
</div>`);
```

View File

@@ -0,0 +1,63 @@
#!/usr/bin/env bash
set -euo pipefail
API_PORT="${API_PORT:-8012}"
DASHBOARD_PORT="${DASHBOARD_PORT:-3012}"
API_BASE="http://127.0.0.1:${API_PORT}"
DASHBOARD_URL="http://127.0.0.1:${DASHBOARD_PORT}/?api_base=${API_BASE}"
API_LOG="${API_LOG:-/tmp/statehub-api-${API_PORT}.log}"
DASHBOARD_LOG="${DASHBOARD_LOG:-/tmp/statehub-dashboard-${DASHBOARD_PORT}.log}"
OVERVIEW_JSON="${OVERVIEW_JSON:-/tmp/statehub-overview-${API_PORT}.json}"
OVERVIEW_HEADERS="${OVERVIEW_HEADERS:-/tmp/statehub-overview-${API_PORT}.headers}"
DASHBOARD_HTML="${DASHBOARD_HTML:-/tmp/statehub-dashboard-${DASHBOARD_PORT}.html}"
rm -f "$API_LOG" "$DASHBOARD_LOG" "$OVERVIEW_JSON" "$OVERVIEW_HEADERS" "$DASHBOARD_HTML"
.venv/bin/python -m uvicorn api.main:app --host 127.0.0.1 --port "$API_PORT" \
> "$API_LOG" 2>&1 &
api_pid=$!
(cd dashboard && npm run dev -- --host 127.0.0.1 --port "$DASHBOARD_PORT" \
> "$DASHBOARD_LOG" 2>&1) &
dashboard_pid=$!
cleanup() {
kill "$api_pid" "$dashboard_pid" 2>/dev/null || true
}
trap cleanup EXIT
wait_for_url() {
local label="$1"
local url="$2"
local output="$3"
local attempts="${4:-40}"
local i
for i in $(seq 1 "$attempts"); do
if curl -fsS "$url" -o "$output" >/dev/null 2>&1; then
return 0
fi
sleep 1
done
echo "$label did not become ready: $url" >&2
return 1
}
if ! wait_for_url "API overview" "${API_BASE}/state/overview" "$OVERVIEW_JSON"; then
echo "API log:" >&2
tail -80 "$API_LOG" >&2 || true
exit 1
fi
if ! wait_for_url "Dashboard" "$DASHBOARD_URL" "$DASHBOARD_HTML"; then
echo "Dashboard log:" >&2
tail -80 "$DASHBOARD_LOG" >&2 || true
exit 1
fi
curl -sS -D "$OVERVIEW_HEADERS" -o "$OVERVIEW_JSON" \
-w "overview %{http_code} %{time_total} %{size_download}\n" \
"${API_BASE}/state/overview"
printf "dashboard 200 %s\n" "$DASHBOARD_URL"
wc -c "$OVERVIEW_JSON"
grep -i "x-statehub" "$OVERVIEW_HEADERS" || true

View File

@@ -56,8 +56,12 @@ def _truncate(_schema):
# Reset in-process TTL caches so stale data from a previous test can't bleed through.
_state_router._SUMMARY_CACHE = None
_state_router._SUMMARY_CACHE_AT = 0.0
_state_router._OVERVIEW_CACHE = None
_state_router._OVERVIEW_CACHE_AT = 0.0
_ws_router._INDEX_CACHE = None
_ws_router._INDEX_CACHE_AT = 0.0
_ws_router._INDEX_REFRESH_TASK = None
_ws_router._INDEX_LAST_ERROR = None
yield
engine = sqlalchemy.create_engine(_SYNC_URL)

View File

@@ -226,6 +226,34 @@ class TestTasks:
assert "High prio" in titles
assert "Low prio" not in titles
async def test_list_pagination_and_counts(self, client):
await _create_domain(client)
topic = await _create_topic(client)
ws = await _create_workstream(client, topic["id"])
first = await _create_task(client, ws["id"], title="First")
second = await _create_task(client, ws["id"], title="Second")
third = await _create_task(client, ws["id"], title="Third")
await client.patch(f"/tasks/{second['id']}", json={"status": "progress"})
await client.patch(f"/tasks/{third['id']}", json={"status": "wait", "blocking_reason": "blocked"})
r = await client.get("/tasks/?limit=2")
assert r.status_code == 200
body = r.json()
assert len(body) == 2
assert body[0]["id"] == first["id"]
assert body[1]["id"] == second["id"]
r = await client.get("/tasks/?limit=1&offset=2")
assert r.status_code == 200
assert [task["id"] for task in r.json()] == [third["id"]]
r = await client.get(f"/tasks/counts?workstream_id={ws['id']}")
assert r.status_code == 200
counts = {(row["workstream_id"], row["status"]): row["count"] for row in r.json()}
assert counts[(ws["id"], "todo")] == 1
assert counts[(ws["id"], "progress")] == 1
assert counts[(ws["id"], "wait")] == 1
@pytest.mark.parametrize("initial_status", ["proposed", "ready", "backlog"])
async def test_task_start_activates_planning_workstream(self, client, initial_status):
await _create_domain(client)
@@ -358,6 +386,34 @@ class TestStateSummary:
assert summaries[blocked_ws["id"]]["blocked_reasons"][0]["id"] == "dependencies.all_complete"
assert body["totals"]["workstreams"]["blocked"] == 1
async def test_overview_returns_chart_ready_rows(self, client):
await _create_domain(client)
topic = await _create_topic(client)
repo = await _create_repo(client)
ws = await _create_workstream(client, topic["id"], repo_id=repo["id"])
first = await _create_task(client, ws["id"], title="Todo")
second = await _create_task(client, ws["id"], title="Done")
await client.patch(f"/tasks/{second['id']}", json={"status": "done", "suppress_token_event": True})
r = await client.get("/state/overview")
assert r.status_code == 200
assert r.headers["x-statehub-cache"] == "miss"
body = r.json()
rows = {row["id"]: row for row in body["workplan_rows"]}
assert ws["id"] in rows
assert rows[ws["id"]]["repo_label"] == "test-repo"
assert rows[ws["id"]]["domain"] == "testdomain"
assert rows[ws["id"]]["todo"] == 1
assert rows[ws["id"]]["done"] == 1
assert rows[ws["id"]]["total"] == 2
assert body["totals"]["tasks"]["total"] == 2
assert body["diagnostics"]["task_count_strategy"] == "grouped"
r = await client.get("/state/overview")
assert r.status_code == 200
assert r.headers["x-statehub-cache"] == "hit"
class TestFlowEndpoints:
async def test_list_flow_definitions(self, client):

View File

@@ -0,0 +1,276 @@
---
id: STATE-WP-0056
type: workplan
title: "Dashboard Loading Robustness and Efficiency"
domain: custodian
repo: state-hub
status: finished
owner: codex
topic_slug: custodian
created: "2026-06-05"
updated: "2026-06-05"
state_hub_workstream_id: "28f9569c-937b-4b79-b46c-f6b1f83c09c3"
---
# Dashboard Loading Robustness and Efficiency
## Summary
Make the State Hub dashboard overview page faster and more resilient under
normal polling. The current overview performs a broad concurrent fan-out of
full-list API calls and treats most request failures as whole-page failures.
This can surface frequent `Dashboard data load failed: The operation was
aborted.` warnings when one call crosses the frontend timeout, even if the API
eventually returns successfully.
This work should reduce request count, payload size, and backend contention;
preserve useful last-known data during partial failures; and give operators
clearer diagnostics when a section is stale or unavailable.
## Current Findings
Inspection on 2026-06-05 found:
- `dashboard/src/index.md` loads overview data with one eight-request
`Promise.all` batch.
- `dashboard/src/components/config.js` aborts most `apiFetch` calls after
`12_000` ms.
- A dashboard-style concurrent timing run produced several calls at or above the
default timeout: `/sbom/snapshots/`, `/repos/`, and `/workplans/index`.
- The same endpoints can be much faster when called alone, which points to
contention and over-fetching rather than one permanently slow endpoint.
- The overview calls `/tasks/?limit=2000`, but the tasks API currently ignores
`limit` and returns every task. In the observed run that response was roughly
2.1 MB just to compute per-workplan task counts.
- `/state/summary` has a short in-process cache, but a cache miss still runs a
large amount of sequential database and Python-side aggregation work.
- `/workplans/index` scans active repository workplan files and parses
frontmatter. It is cached, but concurrent dashboard loads can still wait on
the same expensive rebuild pattern.
- Several API routes set cache headers, but the shared dashboard fetch helper
forces `cache: "no-store"` for every request.
## Out of Scope
- Replacing Observable Framework.
- Redesigning the dashboard information architecture.
- Adding authentication, authorization, or multi-user session handling.
- Changing workplan file conventions.
- Moving State Hub to a different database or deployment substrate.
## T01 — Add Focused Dashboard Load Instrumentation
```task
id: STATE-WP-0056-T01
status: done
priority: high
state_hub_task_id: "e5208053-0db1-4842-a221-c5289422677a"
```
Add enough timing and error visibility to confirm which overview calls are slow,
aborted, or oversized during normal use.
Implementation notes:
- Add lightweight server-side timing logs or response headers for overview-hot
endpoints: `/state/summary`, `/workplans/`, `/tasks/`, `/topics/`, `/repos/`,
`/sbom/snapshots/`, `/progress/`, and `/workplans/index`.
- Include request path, status, elapsed time, response size when practical, and
whether a cached result was used.
- Keep instrumentation local and low-noise; avoid logging full payloads or
secrets.
- Add a small dashboard diagnostic surface or console logging that distinguishes
timeout aborts from HTTP errors and network failures.
- Capture before/after timing notes in this workplan or a progress event.
Done when a normal dashboard refresh can be diagnosed without manually timing
each endpoint from a shell.
## T02 — Make Overview Polling Partially Resilient
```task
id: STATE-WP-0056-T02
status: done
priority: high
state_hub_task_id: "2cdd960d-ba86-48d1-a7c6-e83671cd0e69"
```
Change the overview data loader so one slow or failed secondary request does
not mark the whole dashboard as failed.
Implementation notes:
- Replace fail-fast `Promise.all` behavior in `dashboard/src/index.md` with a
per-resource result model, for example `Promise.allSettled`.
- Keep last-known-good data for each section while a refresh is degraded.
- Treat optional resources such as SBOM snapshots, registration milestones, and
workplan file metadata independently from core summary/workplan status data.
- Display section-level stale/error indicators instead of one global warning
whenever possible.
- Keep exponential backoff for repeated failures, but do not discard usable
data just because one request timed out.
- Make abort errors user-readable, for example "timed out after 12s" instead of
only "The operation was aborted."
Done when an SBOM, repo-list, or workplan-index timeout leaves the rest of the
overview usable and visibly stale rather than failed.
## T03 — Respect Pagination and Add Task Count Aggregates
```task
id: STATE-WP-0056-T03
status: done
priority: high
state_hub_task_id: "78484226-9ccc-460c-a2b3-750b3204caa3"
```
Stop returning all tasks for overview count calculations.
Implementation notes:
- Add `limit` and `offset` support to `GET /tasks/`, preserving existing filter
behavior and sensible limits.
- Add a lightweight aggregate endpoint for task counts by workplan and status,
for example `GET /tasks/counts?group_by=workstream,status`, or add an
overview-specific aggregate route.
- Prefer SQL `GROUP BY` over transferring every task to the browser.
- Update `dashboard/src/index.md`, `dashboard/src/tasks.md`,
`dashboard/src/interventions.md`, and workplan detail pages as needed so list
views still receive the rows they need.
- Add tests for pagination compatibility and aggregate counts.
Done when the overview no longer fetches the full task table to draw the
workplan chart.
## T04 — Build a Lightweight Overview Read Endpoint
```task
id: STATE-WP-0056-T04
status: done
priority: high
state_hub_task_id: "2cf47a12-e8aa-49ca-963c-1f0d2933c344"
```
Create a dashboard-specific read model that returns exactly the data needed by
the overview page in one bounded response.
Implementation notes:
- Add an endpoint such as `GET /state/overview` or
`GET /state/dashboard-overview`.
- Include summary totals, recent progress needed by the page, blocking decision
counts, waiting-task counts, SBOM snapshot totals, registration milestones,
and workplan chart rows with repo/domain labels and task counts.
- Keep response fields stable and documented in dashboard reference docs.
- Reuse existing summary helpers where they are efficient, but avoid serializing
large full-list payloads that the overview does not display directly.
- Add cache headers and a short in-process cache with explicit invalidation
rules where appropriate.
- Update `dashboard/src/index.md` to prefer this endpoint and remove redundant
overview-only fetches.
Done when the overview's steady-state refresh is one bounded API call plus only
truly interactive secondary calls.
## T05 — Add Stale-While-Refresh for File-Backed Workplan Index
```task
id: STATE-WP-0056-T05
status: done
priority: medium
state_hub_task_id: "0c88c1a2-588b-41f8-bc1c-f94c8b4b0d1a"
```
Make `/workplans/index` resilient when repository filesystem scans are slow.
Implementation notes:
- Add singleflight behavior so concurrent requests share one in-progress
rebuild instead of starting or waiting on redundant scans.
- Return stale cached data quickly while a background refresh runs when the
cache is expired but still available.
- Include metadata such as `generated_at`, `stale`, `cache_age_seconds`, and
optionally `refresh_in_progress`.
- Consider reading only frontmatter rather than whole markdown files if this
can be done cleanly.
- Keep `refresh=true` as an explicit operator escape hatch.
- Add tests for cache hit, stale return, and forced refresh behavior.
Done when a slow filesystem scan cannot block normal dashboard refreshes for
longer than the frontend timeout if cached data exists.
## T06 — Use Browser and HTTP Caching Selectively
```task
id: STATE-WP-0056-T06
status: done
priority: medium
state_hub_task_id: "811f02ff-2e92-4c82-8b8a-e3d39a450b02"
```
Let stable lookup requests benefit from cache headers instead of forcing every
dashboard request to bypass caches.
Implementation notes:
- Extend `apiFetch` so callers can choose cache mode.
- Keep `no-store` for volatile mutation-sensitive resources.
- Use default browser caching or `reload` only where route cache headers are
already intentional, such as repo/topic lookup data.
- Review current route cache headers and align them with dashboard polling
needs.
- Avoid stale cached data for controls that immediately follow a mutation.
Done when stable overview lookup data no longer bypasses useful cache headers
by default.
## T07 — Optimize `/state/summary` Cache Misses
```task
id: STATE-WP-0056-T07
status: done
priority: medium
state_hub_task_id: "633f4cc6-ffeb-4086-9858-d239f50a9686"
```
Reduce the cost of a cold or expired `/state/summary` request.
Implementation notes:
- Profile the current sequential query groups in `api/routers/state.py`.
- Move Python-side counts and scans into SQL where straightforward.
- Remove unused work from the summary path, such as dead intermediate query
results.
- Cache derived sections independently when their freshness requirements differ.
- Add indexes only after profiling shows a query plan needs them.
- Keep summary response compatibility for existing consumers and MCP smoke
tests.
Done when a summary cache miss stays comfortably below the frontend timeout
under the current local data volume.
## T08 — Verify Under Dashboard-Style Load
```task
id: STATE-WP-0056-T08
status: done
priority: high
state_hub_task_id: "353fb25a-5306-416b-8d6d-9b201e6fac87"
```
Prove the dashboard no longer produces frequent abort warnings under realistic
refresh behavior.
Implementation notes:
- Add or document a repeatable script that performs dashboard-style concurrent
endpoint timing before and after the changes.
- Run API tests and dashboard component tests.
- Open the dashboard locally and verify that initial load, refresh, hidden-tab
pause/resume, and partial API failure states behave correctly.
- Confirm payload sizes are lower than the baseline for the overview page.
- Update `dashboard/src/docs/overview.md` and `dashboard/src/docs/live-data.md`
with the new data-loading model.
Done when repeated dashboard refreshes do not show the global aborted-operation
warning during normal local operation, and degraded sections recover cleanly.