generated from coulomb/repo-seed
Optimize dashboard overview loading
This commit is contained in:
16
api/main.py
16
api/main.py
@@ -1,5 +1,6 @@
|
||||
import hashlib
|
||||
import os
|
||||
import time
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI
|
||||
@@ -26,26 +27,37 @@ class ETagMiddleware(BaseHTTPMiddleware):
|
||||
"""Add ETag + conditional-GET (304) support to all JSON GET responses."""
|
||||
|
||||
async def dispatch(self, request: Request, call_next):
|
||||
started = time.perf_counter()
|
||||
response = await call_next(request)
|
||||
if request.method != "GET":
|
||||
response.headers["X-StateHub-Elapsed-Ms"] = f"{(time.perf_counter() - started) * 1000:.1f}"
|
||||
return response
|
||||
if "application/json" not in response.headers.get("content-type", ""):
|
||||
response.headers["X-StateHub-Elapsed-Ms"] = f"{(time.perf_counter() - started) * 1000:.1f}"
|
||||
return response
|
||||
|
||||
body_parts = []
|
||||
async for chunk in response.body_iterator:
|
||||
body_parts.append(chunk)
|
||||
body = b"".join(body_parts)
|
||||
elapsed_ms = f"{(time.perf_counter() - started) * 1000:.1f}"
|
||||
|
||||
etag = '"' + hashlib.md5(body, usedforsecurity=False).hexdigest() + '"'
|
||||
if request.headers.get("if-none-match") == etag:
|
||||
return StarletteResponse(
|
||||
status_code=304,
|
||||
headers={"ETag": etag, "Cache-Control": "no-cache"},
|
||||
headers={
|
||||
"ETag": etag,
|
||||
"Cache-Control": "no-cache",
|
||||
"X-StateHub-Elapsed-Ms": elapsed_ms,
|
||||
"X-StateHub-Response-Bytes": "0",
|
||||
},
|
||||
)
|
||||
|
||||
headers = {k: v for k, v in response.headers.items() if k.lower() != "content-length"}
|
||||
headers["ETag"] = etag
|
||||
headers["X-StateHub-Elapsed-Ms"] = elapsed_ms
|
||||
headers["X-StateHub-Response-Bytes"] = str(len(body))
|
||||
if not any(k.lower() == "cache-control" for k in headers):
|
||||
headers["Cache-Control"] = "no-cache"
|
||||
return StarletteResponse(
|
||||
@@ -84,7 +96,7 @@ app.add_middleware(
|
||||
allow_origins=_cors_origins,
|
||||
allow_methods=["GET", "POST", "PATCH", "DELETE", "PUT"],
|
||||
allow_headers=["Content-Type", "If-None-Match"],
|
||||
expose_headers=["ETag"],
|
||||
expose_headers=["ETag", "X-StateHub-Elapsed-Ms", "X-StateHub-Response-Bytes", "X-StateHub-Cache"],
|
||||
)
|
||||
|
||||
app.include_router(domains.router)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import time
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
from fastapi import APIRouter, Depends, Request
|
||||
from fastapi import APIRouter, Depends, Request, Response
|
||||
from fastapi.responses import JSONResponse
|
||||
from sqlalchemy import func, select, text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
@@ -17,6 +17,7 @@ from api.models.extension_point import ExtensionPoint
|
||||
from api.models.managed_repo import ManagedRepo
|
||||
from api.models.progress_event import ProgressEvent
|
||||
from api.models.sbom_entry import SBOMEntry
|
||||
from api.models.sbom_snapshot import SBOMSnapshot
|
||||
from api.models.task import Task, TaskPriority, TaskStatus
|
||||
from api.models.technical_debt import TechnicalDebt
|
||||
from api.models.topic import Topic, TopicStatus
|
||||
@@ -26,6 +27,9 @@ from api.schemas.decision import DecisionRead
|
||||
from api.schemas.domain import DomainSummary
|
||||
from api.schemas.progress_event import ProgressEventRead
|
||||
from api.schemas.state import (
|
||||
DashboardOverview,
|
||||
DashboardSourceMeta,
|
||||
DashboardWorkplanRow,
|
||||
DecisionTotals,
|
||||
NextStep,
|
||||
StateSummary,
|
||||
@@ -38,6 +42,7 @@ from api.schemas.task import TaskRead
|
||||
from api.schemas.topic import TopicRead, TopicWithWorkstreams
|
||||
from api.schemas.workstream import WorkstreamRead, WorkstreamWithTaskCounts, WorkstreamWithDeps
|
||||
from api.schemas.workstream_dependency import WorkstreamDepStub
|
||||
from api.routers.workstreams import _workplan_index
|
||||
from api.task_status import TERMINAL_TASK_STATUSES, status_value
|
||||
from api.workplan_status import (
|
||||
CLOSED_WORKSTREAM_STATUSES,
|
||||
@@ -51,17 +56,25 @@ router = APIRouter(prefix="/state", tags=["state"])
|
||||
_SUMMARY_CACHE: StateSummary | None = None
|
||||
_SUMMARY_CACHE_AT: float = 0.0
|
||||
_SUMMARY_TTL = 15.0
|
||||
_OVERVIEW_CACHE: DashboardOverview | None = None
|
||||
_OVERVIEW_CACHE_AT: float = 0.0
|
||||
_OVERVIEW_TTL = 10.0
|
||||
|
||||
|
||||
@router.get("/summary", response_model=StateSummary)
|
||||
async def get_summary(
|
||||
request: Request,
|
||||
response: Response,
|
||||
session: AsyncSession = Depends(get_session),
|
||||
) -> StateSummary:
|
||||
global _SUMMARY_CACHE, _SUMMARY_CACHE_AT
|
||||
no_cache = "no-cache" in request.headers.get("cache-control", "")
|
||||
if not no_cache and _SUMMARY_CACHE is not None and (time.monotonic() - _SUMMARY_CACHE_AT) < _SUMMARY_TTL:
|
||||
response.headers["X-StateHub-Cache"] = "hit"
|
||||
response.headers["Cache-Control"] = "max-age=15, stale-while-revalidate=30"
|
||||
return _SUMMARY_CACHE
|
||||
response.headers["X-StateHub-Cache"] = "miss"
|
||||
response.headers["Cache-Control"] = "max-age=15, stale-while-revalidate=30"
|
||||
# Run all queries sequentially on one session.
|
||||
# AsyncSession does not support concurrent operations (no gather on same session).
|
||||
|
||||
@@ -362,6 +375,309 @@ async def get_summary(
|
||||
return result
|
||||
|
||||
|
||||
@router.get("/overview", response_model=DashboardOverview)
|
||||
async def get_overview(
|
||||
request: Request,
|
||||
response: Response,
|
||||
session: AsyncSession = Depends(get_session),
|
||||
) -> DashboardOverview:
|
||||
"""Bounded dashboard overview read model.
|
||||
|
||||
This is intentionally narrower than /state/summary. The dashboard overview
|
||||
needs counts, recent rows, and chart-ready workplan rows; it does not need
|
||||
full task or workplan lists transferred to the browser on every poll.
|
||||
"""
|
||||
global _OVERVIEW_CACHE, _OVERVIEW_CACHE_AT
|
||||
no_cache = "no-cache" in request.headers.get("cache-control", "")
|
||||
if not no_cache and _OVERVIEW_CACHE is not None and (time.monotonic() - _OVERVIEW_CACHE_AT) < _OVERVIEW_TTL:
|
||||
response.headers["X-StateHub-Cache"] = "hit"
|
||||
response.headers["Cache-Control"] = "max-age=10, stale-while-revalidate=30"
|
||||
return _OVERVIEW_CACHE
|
||||
|
||||
response.headers["X-StateHub-Cache"] = "miss"
|
||||
response.headers["Cache-Control"] = "max-age=10, stale-while-revalidate=30"
|
||||
result = await _build_dashboard_overview(session)
|
||||
_OVERVIEW_CACHE = result
|
||||
_OVERVIEW_CACHE_AT = time.monotonic()
|
||||
return result
|
||||
|
||||
|
||||
async def _build_dashboard_overview(session: AsyncSession) -> DashboardOverview:
|
||||
topics_rows = await session.execute(
|
||||
select(Topic)
|
||||
.options(
|
||||
selectinload(Topic.domain),
|
||||
noload(Topic.workstreams),
|
||||
noload(Topic.decisions),
|
||||
noload(Topic.progress_events),
|
||||
)
|
||||
.where(Topic.status != TopicStatus.archived)
|
||||
.order_by(Topic.created_at)
|
||||
)
|
||||
topics = list(topics_rows.scalars().all())
|
||||
topic_map = {topic.id: topic for topic in topics}
|
||||
|
||||
workstream_rows = await session.execute(
|
||||
select(Workstream)
|
||||
.options(noload("*"))
|
||||
.order_by(
|
||||
Workstream.planning_priority.asc().nullslast(),
|
||||
Workstream.planning_order.asc().nullslast(),
|
||||
Workstream.updated_at.desc(),
|
||||
)
|
||||
)
|
||||
workstreams_all = list(workstream_rows.scalars().all())
|
||||
|
||||
topic_workstreams: dict = {t.id: [] for t in topics}
|
||||
for w in sorted(workstreams_all, key=lambda item: item.created_at):
|
||||
if w.topic_id not in topic_workstreams:
|
||||
continue
|
||||
topic_workstreams[w.topic_id].append({
|
||||
"id": w.id,
|
||||
"slug": w.slug,
|
||||
"title": w.title,
|
||||
"status": w.status,
|
||||
"owner": w.owner,
|
||||
"due_date": w.due_date,
|
||||
})
|
||||
|
||||
repo_rows = await session.execute(
|
||||
select(ManagedRepo.id, ManagedRepo.slug, Domain.slug)
|
||||
.join(Domain, Domain.id == ManagedRepo.domain_id)
|
||||
.order_by(ManagedRepo.slug)
|
||||
)
|
||||
repo_map = {
|
||||
repo_id: {"slug": repo_slug, "domain_slug": domain_slug}
|
||||
for repo_id, repo_slug, domain_slug in repo_rows
|
||||
}
|
||||
|
||||
task_counts_by_ws: dict = {}
|
||||
task_statuses_per_ws: dict = {}
|
||||
task_totals_by_status: dict[str, int] = {}
|
||||
for ws_id, task_status, count in await session.execute(
|
||||
select(Task.workstream_id, Task.status, func.count()).group_by(Task.workstream_id, Task.status)
|
||||
):
|
||||
status = status_value(task_status)
|
||||
task_counts_by_ws.setdefault(ws_id, {"done": 0, "progress": 0, "wait": 0, "todo": 0, "total": 0})
|
||||
task_counts_by_ws[ws_id]["total"] += count
|
||||
if status in {"done", "progress", "wait", "todo"}:
|
||||
task_counts_by_ws[ws_id][status] += count
|
||||
task_statuses_per_ws.setdefault(ws_id, []).extend([status] * count)
|
||||
task_totals_by_status[status] = task_totals_by_status.get(status, 0) + count
|
||||
|
||||
open_ws = [
|
||||
w for w in workstreams_all
|
||||
if normalize_workstream_status(w.status) in OPEN_WORKSTREAM_STATUSES
|
||||
]
|
||||
open_ws_ids = [w.id for w in open_ws]
|
||||
dep_rows = []
|
||||
if open_ws_ids:
|
||||
dep_result = await session.execute(
|
||||
select(WorkstreamDependency).where(
|
||||
(WorkstreamDependency.from_workstream_id.in_(open_ws_ids))
|
||||
| (WorkstreamDependency.to_workstream_id.in_(open_ws_ids))
|
||||
)
|
||||
)
|
||||
dep_rows = list(dep_result.scalars().all())
|
||||
|
||||
ws_lookup = {w.id: w for w in workstreams_all}
|
||||
workstream_flow = load_flow("workstream")
|
||||
flow_engine = FlowEngine()
|
||||
effective_status: dict = {}
|
||||
for w in open_ws:
|
||||
flow_obj = {
|
||||
"status": w.status,
|
||||
"workstation": w.status,
|
||||
"tasks": [{"status": status} for status in task_statuses_per_ws.get(w.id, [])],
|
||||
"dependencies": [
|
||||
{"workstation": normalize_workstream_status(ws_lookup[d.to_workstream_id].status)}
|
||||
for d in dep_rows
|
||||
if d.from_workstream_id == w.id and d.to_workstream_id and d.to_workstream_id in ws_lookup
|
||||
],
|
||||
}
|
||||
flow_result = flow_engine.evaluate(flow_obj, workstream_flow)
|
||||
effective_status[w.id] = "blocked" if flow_result.exit_blocked else normalize_workstream_status(w.status)
|
||||
|
||||
topic_counts = {r[0]: r[1] for r in await session.execute(
|
||||
select(Topic.status, func.count()).group_by(Topic.status)
|
||||
)}
|
||||
ws_counts = {r[0]: r[1] for r in await session.execute(
|
||||
select(Workstream.status, func.count()).group_by(Workstream.status)
|
||||
)}
|
||||
dec_counts = {r[0]: r[1] for r in await session.execute(
|
||||
select(Decision.status, func.count()).group_by(Decision.status)
|
||||
)}
|
||||
|
||||
totals = Totals(
|
||||
topics=TopicTotals(
|
||||
active=topic_counts.get(TopicStatus.active, 0),
|
||||
paused=topic_counts.get(TopicStatus.paused, 0),
|
||||
archived=topic_counts.get(TopicStatus.archived, 0),
|
||||
total=sum(topic_counts.values()),
|
||||
),
|
||||
workstreams=WorkstreamTotals(
|
||||
proposed=ws_counts.get("proposed", 0),
|
||||
ready=ws_counts.get("ready", 0) + ws_counts.get("todo", 0),
|
||||
active=sum(1 for status in effective_status.values() if status == "active"),
|
||||
blocked=sum(1 for status in effective_status.values() if status == "blocked"),
|
||||
backlog=ws_counts.get("backlog", 0),
|
||||
finished=(
|
||||
ws_counts.get("finished", 0)
|
||||
+ ws_counts.get("completed", 0)
|
||||
+ ws_counts.get("accepted", 0)
|
||||
),
|
||||
archived=ws_counts.get("archived", 0),
|
||||
total=sum(ws_counts.values()),
|
||||
),
|
||||
tasks=TaskTotals(
|
||||
wait=task_totals_by_status.get("wait", 0),
|
||||
todo=task_totals_by_status.get("todo", 0),
|
||||
progress=task_totals_by_status.get("progress", 0),
|
||||
done=task_totals_by_status.get("done", 0),
|
||||
cancel=task_totals_by_status.get("cancel", 0),
|
||||
total=sum(task_totals_by_status.values()),
|
||||
),
|
||||
decisions=DecisionTotals(
|
||||
open=dec_counts.get(DecisionStatus.open, 0),
|
||||
resolved=dec_counts.get(DecisionStatus.resolved, 0),
|
||||
escalated=dec_counts.get(DecisionStatus.escalated, 0),
|
||||
superseded=dec_counts.get(DecisionStatus.superseded, 0),
|
||||
total=sum(dec_counts.values()),
|
||||
),
|
||||
)
|
||||
|
||||
blocking_rows = await session.execute(
|
||||
select(Decision)
|
||||
.where(Decision.decision_type == DecisionType.pending)
|
||||
.where(Decision.status.in_([DecisionStatus.open, DecisionStatus.escalated]))
|
||||
.order_by(Decision.deadline.asc().nullslast(), Decision.created_at)
|
||||
)
|
||||
blocking = list(blocking_rows.scalars().all())
|
||||
|
||||
waiting_rows = await session.execute(
|
||||
select(Task).options(noload("*")).where(Task.status == TaskStatus.wait).order_by(Task.created_at)
|
||||
)
|
||||
waiting = list(waiting_rows.scalars().all())
|
||||
|
||||
recent_rows = await session.execute(
|
||||
select(ProgressEvent).options(noload("*")).order_by(ProgressEvent.created_at.desc()).limit(20)
|
||||
)
|
||||
recent = list(recent_rows.scalars().all())
|
||||
|
||||
milestone_rows = await session.execute(
|
||||
select(ProgressEvent)
|
||||
.options(noload("*"))
|
||||
.where(ProgressEvent.event_type == "milestone")
|
||||
.where(ProgressEvent.summary.like("Project registered with State Hub:%"))
|
||||
.order_by(ProgressEvent.created_at.desc())
|
||||
.limit(500)
|
||||
)
|
||||
registration_milestones = list(milestone_rows.scalars().all())
|
||||
|
||||
contrib_type_counts = {r[0].value: r[1] for r in await session.execute(
|
||||
select(Contribution.type, func.count()).group_by(Contribution.type)
|
||||
)}
|
||||
contrib_status_counts = {r[0].value: r[1] for r in await session.execute(
|
||||
select(Contribution.status, func.count()).group_by(Contribution.status)
|
||||
)}
|
||||
contribution_counts = {**contrib_type_counts, **contrib_status_counts}
|
||||
|
||||
_COPYLEFT_PATS = ("GPL", "AGPL", "LGPL", "EUPL", "CDDL", "MPL")
|
||||
all_direct_prod_rows = await session.execute(
|
||||
select(SBOMEntry.license_spdx)
|
||||
.where(SBOMEntry.is_direct.is_(True))
|
||||
.where(SBOMEntry.is_dev.is_(False))
|
||||
)
|
||||
licence_risk_count = sum(
|
||||
1 for (lic,) in all_direct_prod_rows.all()
|
||||
if lic and any(pat in lic.upper() for pat in _COPYLEFT_PATS)
|
||||
)
|
||||
|
||||
snapshot_count, package_total = (await session.execute(
|
||||
select(
|
||||
func.count(SBOMSnapshot.id),
|
||||
func.coalesce(func.sum(SBOMSnapshot.entry_count), 0),
|
||||
)
|
||||
)).one()
|
||||
|
||||
open_cap_req_count = (await session.execute(
|
||||
select(func.count()).select_from(CapabilityRequest).where(
|
||||
CapabilityRequest.status.in_(["requested", "accepted", "in_progress", "ready_for_review"])
|
||||
)
|
||||
)).scalar() or 0
|
||||
|
||||
sources: dict[str, DashboardSourceMeta] = {}
|
||||
try:
|
||||
workplan_index = await _workplan_index(refresh=False, session=session)
|
||||
workplan_map = workplan_index.get("workstreams", {})
|
||||
index_meta = workplan_index.get("_meta", {})
|
||||
sources["workplan_index"] = DashboardSourceMeta(
|
||||
ok=not bool(index_meta.get("last_error")),
|
||||
stale=bool(index_meta.get("stale")),
|
||||
cache_age_seconds=index_meta.get("cache_age_seconds"),
|
||||
refresh_in_progress=bool(index_meta.get("refresh_in_progress")),
|
||||
error=index_meta.get("last_error"),
|
||||
)
|
||||
except Exception as exc:
|
||||
workplan_map = {}
|
||||
sources["workplan_index"] = DashboardSourceMeta(ok=False, error=str(exc))
|
||||
|
||||
workplan_rows: list[DashboardWorkplanRow] = []
|
||||
for w in workstreams_all:
|
||||
repo = repo_map.get(w.repo_id)
|
||||
topic = topic_map.get(w.topic_id)
|
||||
workplan = workplan_map.get(str(w.id), {})
|
||||
counts = task_counts_by_ws.get(w.id, {"done": 0, "progress": 0, "wait": 0, "todo": 0, "total": 0})
|
||||
workplan_rows.append(DashboardWorkplanRow(
|
||||
id=w.id,
|
||||
title=w.title,
|
||||
status=normalize_workstream_status(w.status),
|
||||
domain=repo["domain_slug"] if repo else (topic.domain_slug if topic else "unknown"),
|
||||
repo_label=repo["slug"] if repo else workplan.get("repo_slug", "unassigned"),
|
||||
workplan_filename=workplan.get("filename"),
|
||||
workplan_relative_path=workplan.get("relative_path"),
|
||||
workplan_archived=bool(workplan.get("archived", False)),
|
||||
health_labels=workplan.get("health_labels", []),
|
||||
href=f"./workstreams/{w.id}",
|
||||
done=counts.get("done", 0),
|
||||
progress=counts.get("progress", 0),
|
||||
wait=counts.get("wait", 0),
|
||||
todo=counts.get("todo", 0),
|
||||
total=counts.get("total", 0),
|
||||
created_at=w.created_at,
|
||||
updated_at=w.updated_at,
|
||||
))
|
||||
|
||||
return DashboardOverview(
|
||||
generated_at=datetime.now(tz=timezone.utc),
|
||||
totals=totals,
|
||||
topics=[
|
||||
TopicWithWorkstreams(
|
||||
**TopicRead.model_validate(t).model_dump(),
|
||||
workstreams=topic_workstreams.get(t.id, []),
|
||||
)
|
||||
for t in topics
|
||||
],
|
||||
blocking_decisions=[DecisionRead.model_validate(d) for d in blocking],
|
||||
waiting_tasks=[TaskRead.model_validate(t) for t in waiting],
|
||||
blocked_tasks=[TaskRead.model_validate(t) for t in waiting],
|
||||
recent_progress=[ProgressEventRead.model_validate(e) for e in recent],
|
||||
next_steps=await _derive_next_steps(session),
|
||||
contribution_counts=contribution_counts,
|
||||
licence_risk_count=licence_risk_count,
|
||||
open_capability_requests=open_cap_req_count,
|
||||
sbom_snapshot_count=int(snapshot_count or 0),
|
||||
sbom_package_total=int(package_total or 0),
|
||||
registration_milestones=[ProgressEventRead.model_validate(e) for e in registration_milestones],
|
||||
workplan_rows=workplan_rows,
|
||||
sources=sources,
|
||||
diagnostics={
|
||||
"workplan_row_count": len(workplan_rows),
|
||||
"task_count_strategy": "grouped",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
async def _build_domain_summaries(session: AsyncSession) -> list[DomainSummary]:
|
||||
"""Compute per-domain stats for the state summary."""
|
||||
domains_rows = await session.execute(
|
||||
|
||||
@@ -9,7 +9,7 @@ from api.database import get_session
|
||||
from api.models.task import Task, TaskStatus
|
||||
from api.models.token_event import TokenEvent
|
||||
from api.models.workstream import Workstream
|
||||
from api.schemas.task import TaskCreate, TaskRead, TaskUpdate
|
||||
from api.schemas.task import TaskCountRead, TaskCreate, TaskRead, TaskUpdate
|
||||
from api.services.lifecycle import status_value, transition_task_status
|
||||
from api.task_status import normalize_task_status
|
||||
|
||||
@@ -24,6 +24,8 @@ async def list_tasks(
|
||||
needs_human: bool | None = Query(None),
|
||||
priority: str | None = None,
|
||||
due_date_before: date | None = None,
|
||||
limit: int | None = Query(None, ge=1, le=5000),
|
||||
offset: int = Query(0, ge=0),
|
||||
session: AsyncSession = Depends(get_session),
|
||||
) -> list[Task]:
|
||||
q = select(Task)
|
||||
@@ -40,10 +42,32 @@ async def list_tasks(
|
||||
if due_date_before is not None:
|
||||
q = q.where(Task.due_date <= due_date_before)
|
||||
q = q.order_by(Task.created_at)
|
||||
if offset:
|
||||
q = q.offset(offset)
|
||||
if limit is not None:
|
||||
q = q.limit(limit)
|
||||
result = await session.execute(q)
|
||||
return list(result.scalars().all())
|
||||
|
||||
|
||||
@router.get("/counts", response_model=list[TaskCountRead])
|
||||
async def count_tasks(
|
||||
workstream_id: uuid.UUID | None = None,
|
||||
status: str | None = None,
|
||||
session: AsyncSession = Depends(get_session),
|
||||
) -> list[TaskCountRead]:
|
||||
q = select(Task.workstream_id, Task.status, func.count()).group_by(Task.workstream_id, Task.status)
|
||||
if workstream_id:
|
||||
q = q.where(Task.workstream_id == workstream_id)
|
||||
if status:
|
||||
q = q.where(Task.status == TaskStatus(normalize_task_status(status)))
|
||||
rows = await session.execute(q)
|
||||
return [
|
||||
TaskCountRead(workstream_id=ws_id, status=task_status, count=count)
|
||||
for ws_id, task_status, count in rows
|
||||
]
|
||||
|
||||
|
||||
@router.post("/", response_model=TaskRead, status_code=status.HTTP_201_CREATED)
|
||||
async def create_task(
|
||||
body: TaskCreate,
|
||||
|
||||
@@ -3,6 +3,7 @@ import logging
|
||||
import uuid
|
||||
import socket
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
@@ -40,6 +41,8 @@ workplan_router = APIRouter(prefix="/workplans", tags=["workplans"])
|
||||
_INDEX_CACHE: dict[str, Any] | None = None
|
||||
_INDEX_CACHE_AT: float = 0.0
|
||||
_INDEX_TTL = 30.0
|
||||
_INDEX_REFRESH_TASK: asyncio.Task | None = None
|
||||
_INDEX_LAST_ERROR: str | None = None
|
||||
|
||||
_LEGACY_OWNER = "state-hub.api"
|
||||
_COMPLETED_WORKSTREAM_EVENT = "org.statehub.workstream.completed"
|
||||
@@ -170,16 +173,7 @@ async def _list_workstreams(
|
||||
return list(result.scalars().all())
|
||||
|
||||
|
||||
async def _workplan_index(
|
||||
*,
|
||||
refresh: bool,
|
||||
session: AsyncSession,
|
||||
) -> dict[str, Any]:
|
||||
"""Map file-backed workplan ids to their local workplan filenames."""
|
||||
global _INDEX_CACHE, _INDEX_CACHE_AT
|
||||
if not refresh and _INDEX_CACHE is not None and (time.monotonic() - _INDEX_CACHE_AT) < _INDEX_TTL:
|
||||
return _INDEX_CACHE
|
||||
|
||||
async def _build_workplan_index(session: AsyncSession) -> dict[str, Any]:
|
||||
result = await session.execute(
|
||||
select(ManagedRepo).where(ManagedRepo.status == "active").order_by(ManagedRepo.slug)
|
||||
)
|
||||
@@ -218,8 +212,78 @@ async def _workplan_index(
|
||||
"needs_review": bool(review and review.needs_review),
|
||||
"health_labels": ["needs_review"] if review and review.needs_review else [],
|
||||
}
|
||||
_INDEX_CACHE = {"workplans": index, "workstreams": index}
|
||||
return {"workplans": index, "workstreams": index}
|
||||
|
||||
|
||||
def _index_with_meta(*, stale: bool, refresh_in_progress: bool) -> dict[str, Any]:
|
||||
age = time.monotonic() - _INDEX_CACHE_AT if _INDEX_CACHE_AT else None
|
||||
return {
|
||||
**(_INDEX_CACHE or {"workplans": {}, "workstreams": {}}),
|
||||
"_meta": {
|
||||
"generated_at": _INDEX_CACHE.get("_meta", {}).get("generated_at") if _INDEX_CACHE else None,
|
||||
"stale": stale,
|
||||
"cache_age_seconds": round(age, 3) if age is not None else None,
|
||||
"refresh_in_progress": refresh_in_progress,
|
||||
"last_error": _INDEX_LAST_ERROR,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
async def _refresh_workplan_index_background() -> None:
|
||||
global _INDEX_CACHE, _INDEX_CACHE_AT, _INDEX_LAST_ERROR
|
||||
from api.database import async_session_factory
|
||||
|
||||
try:
|
||||
async with async_session_factory() as session:
|
||||
index = await _build_workplan_index(session)
|
||||
index["_meta"] = {
|
||||
"generated_at": datetime.now(timezone.utc).isoformat(),
|
||||
"stale": False,
|
||||
"cache_age_seconds": 0.0,
|
||||
"refresh_in_progress": False,
|
||||
"last_error": None,
|
||||
}
|
||||
_INDEX_CACHE = index
|
||||
_INDEX_CACHE_AT = time.monotonic()
|
||||
_INDEX_LAST_ERROR = None
|
||||
except Exception as exc:
|
||||
_INDEX_LAST_ERROR = str(exc)
|
||||
|
||||
|
||||
def _ensure_index_refresh_started() -> None:
|
||||
global _INDEX_REFRESH_TASK
|
||||
if _INDEX_REFRESH_TASK is not None and not _INDEX_REFRESH_TASK.done():
|
||||
return
|
||||
_INDEX_REFRESH_TASK = asyncio.create_task(_refresh_workplan_index_background())
|
||||
|
||||
|
||||
async def _workplan_index(
|
||||
*,
|
||||
refresh: bool,
|
||||
session: AsyncSession,
|
||||
) -> dict[str, Any]:
|
||||
"""Map file-backed workplan ids to their local workplan filenames."""
|
||||
global _INDEX_CACHE, _INDEX_CACHE_AT, _INDEX_LAST_ERROR
|
||||
cache_age = time.monotonic() - _INDEX_CACHE_AT if _INDEX_CACHE_AT else None
|
||||
if not refresh and _INDEX_CACHE is not None and cache_age is not None and cache_age < _INDEX_TTL:
|
||||
refresh_running = _INDEX_REFRESH_TASK is not None and not _INDEX_REFRESH_TASK.done()
|
||||
return _index_with_meta(stale=False, refresh_in_progress=refresh_running)
|
||||
|
||||
if not refresh and _INDEX_CACHE is not None:
|
||||
_ensure_index_refresh_started()
|
||||
return _index_with_meta(stale=True, refresh_in_progress=True)
|
||||
|
||||
index = await _build_workplan_index(session)
|
||||
index["_meta"] = {
|
||||
"generated_at": datetime.now(timezone.utc).isoformat(),
|
||||
"stale": False,
|
||||
"cache_age_seconds": 0.0,
|
||||
"refresh_in_progress": False,
|
||||
"last_error": None,
|
||||
}
|
||||
_INDEX_CACHE = index
|
||||
_INDEX_CACHE_AT = time.monotonic()
|
||||
_INDEX_LAST_ERROR = None
|
||||
return _INDEX_CACHE
|
||||
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
@@ -84,3 +85,51 @@ class StateSummary(BaseModel):
|
||||
contribution_counts: dict[str, int] = {}
|
||||
licence_risk_count: int = 0
|
||||
open_capability_requests: int = 0
|
||||
|
||||
|
||||
class DashboardWorkplanRow(BaseModel):
|
||||
id: uuid.UUID
|
||||
title: str
|
||||
status: str
|
||||
domain: str = "unknown"
|
||||
repo_label: str = "unassigned"
|
||||
workplan_filename: str | None = None
|
||||
workplan_relative_path: str | None = None
|
||||
workplan_archived: bool = False
|
||||
health_labels: list[str] = []
|
||||
href: str
|
||||
done: int = 0
|
||||
progress: int = 0
|
||||
wait: int = 0
|
||||
todo: int = 0
|
||||
total: int = 0
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
|
||||
class DashboardSourceMeta(BaseModel):
|
||||
ok: bool = True
|
||||
stale: bool = False
|
||||
cache_age_seconds: float | None = None
|
||||
refresh_in_progress: bool = False
|
||||
error: str | None = None
|
||||
|
||||
|
||||
class DashboardOverview(BaseModel):
|
||||
generated_at: datetime
|
||||
totals: Totals
|
||||
topics: list[TopicWithWorkstreams]
|
||||
blocking_decisions: list[DecisionRead]
|
||||
waiting_tasks: list[TaskRead]
|
||||
blocked_tasks: list[TaskRead] = []
|
||||
recent_progress: list[ProgressEventRead]
|
||||
next_steps: list[NextStep] = []
|
||||
contribution_counts: dict[str, int] = {}
|
||||
licence_risk_count: int = 0
|
||||
open_capability_requests: int = 0
|
||||
sbom_snapshot_count: int = 0
|
||||
sbom_package_total: int = 0
|
||||
registration_milestones: list[ProgressEventRead] = []
|
||||
workplan_rows: list[DashboardWorkplanRow] = []
|
||||
sources: dict[str, DashboardSourceMeta] = {}
|
||||
diagnostics: dict[str, Any] = {}
|
||||
|
||||
@@ -93,3 +93,9 @@ class TaskRead(TaskStatusMixin):
|
||||
parent_task_id: uuid.UUID | None = None
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
|
||||
class TaskCountRead(TaskStatusMixin):
|
||||
workstream_id: uuid.UUID
|
||||
status: TaskStatus
|
||||
count: int
|
||||
|
||||
@@ -89,11 +89,23 @@ export async function waitForVisible(ms) {
|
||||
export async function apiFetch(path, options = {}) {
|
||||
const url = path.startsWith("http") ? path : `${API}${path}`;
|
||||
const timeout = options.timeout ?? FETCH_TIMEOUT;
|
||||
const {timeout: _timeout, ...fetchOptions} = options;
|
||||
const {timeout: _timeout, cache = "no-store", ...fetchOptions} = options;
|
||||
const ctrl = new AbortController();
|
||||
const timer = setTimeout(() => ctrl.abort(), timeout);
|
||||
let timedOut = false;
|
||||
const timer = setTimeout(() => {
|
||||
timedOut = true;
|
||||
ctrl.abort();
|
||||
}, timeout);
|
||||
try {
|
||||
return await fetch(url, {cache: "no-store", ...fetchOptions, signal: ctrl.signal});
|
||||
return await fetch(url, {cache, ...fetchOptions, signal: ctrl.signal});
|
||||
} catch (error) {
|
||||
if (timedOut || error?.name === "AbortError") {
|
||||
const message = `Request timed out after ${Math.round(timeout / 1000)}s: ${url}`;
|
||||
const timeoutError = new Error(message);
|
||||
timeoutError.name = "TimeoutError";
|
||||
throw timeoutError;
|
||||
}
|
||||
throw error;
|
||||
} finally {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
|
||||
@@ -10,7 +10,10 @@ All dashboard pages poll the State Hub API automatically. No manual refresh is e
|
||||
|
||||
## Poll interval
|
||||
|
||||
Every page fetches fresh data from `http://127.0.0.1:8000` every **15 seconds** using an async generator loop. The previous data stays visible while the next request is in flight, so the UI never goes blank.
|
||||
Most live pages fetch fresh data from `http://127.0.0.1:8000` every **15 seconds**
|
||||
using an async generator loop. The overview page uses a heavier bounded read
|
||||
model and refreshes every **60 seconds**. The previous data stays visible while
|
||||
the next request is in flight, so the UI never goes blank.
|
||||
|
||||
---
|
||||
|
||||
@@ -21,6 +24,7 @@ The **●** dot in the top-right corner of each page shows the current connectio
|
||||
| Indicator | Meaning |
|
||||
|---|---|
|
||||
| **● Live · updated HH:MM:SS** | Last poll succeeded — data is current as of that time |
|
||||
| **● Stale · last successful update HH:MM:SS** | Last refresh failed, but cached page data is still visible |
|
||||
| **● Offline — run: `make api`** | API is unreachable — the dot turns red |
|
||||
|
||||
The timestamp updates on every successful poll. If you see a time that is more than ~30 seconds in the past, the poll is stalled (browser tab backgrounded or network issue) — reloading the page resets the loop.
|
||||
@@ -48,7 +52,7 @@ make api # db + migrate + uvicorn (restarts if already running)
|
||||
|
||||
| Page | Endpoints |
|
||||
|---|---|
|
||||
| Overview | `/state/summary` |
|
||||
| Overview | `/state/overview`, `/decisions/?decision_type=pending` |
|
||||
| Workplans | `/workplans/`, `/topics/`, `/state/summary` |
|
||||
| Decisions | `/decisions/?limit=500`, `/topics/` |
|
||||
| Progress | `/progress/?limit=500` |
|
||||
@@ -57,4 +61,4 @@ All endpoints are read-only GET requests. The dashboard never writes to the API.
|
||||
|
||||
---
|
||||
|
||||
*Poll interval: 15 s. Data is refreshed in the background — the page never reloads itself.*
|
||||
*Poll interval: 15 s for most pages, 60 s for Overview. Data is refreshed in the background — the page never reloads itself.*
|
||||
|
||||
@@ -82,9 +82,13 @@ and summary.
|
||||
|
||||
## Data source
|
||||
|
||||
Polls `GET /state/summary` every **15 seconds**. The workstream chart also polls
|
||||
`GET /workplans/`, `GET /tasks/?limit=2000`, `GET /topics/`, `GET /repos/`,
|
||||
and `GET /workplans/index` for repository grouping, task counts, and
|
||||
workplan filename tooltips. Blocking decisions are fetched separately via
|
||||
`GET /decisions/?decision_type=pending` and only re-fetched after a successful
|
||||
resolve action — this prevents the inline form from being wiped on every poll.
|
||||
Polls `GET /state/overview` every **60 seconds**. This endpoint is a bounded
|
||||
dashboard read model: it returns summary totals, recent activity, registration
|
||||
milestones, SBOM totals, and chart-ready workplan rows with task counts already
|
||||
aggregated server-side.
|
||||
|
||||
The page keeps the last successful overview response visible if a refresh times
|
||||
out, and marks the view stale instead of clearing the dashboard. Blocking
|
||||
decisions are fetched separately via `GET /decisions/?decision_type=pending`
|
||||
and only re-fetched after a successful resolve action — this prevents the inline
|
||||
form from being wiped on every poll.
|
||||
|
||||
@@ -14,11 +14,15 @@ import {
|
||||
```
|
||||
|
||||
```js
|
||||
// Single polling loop — fetches all data in one Promise.all batch, backs off uniformly.
|
||||
// Single polling loop — loads one bounded overview read model and keeps
|
||||
// last-known-good data visible if a refresh times out.
|
||||
const pageState = (async function*() {
|
||||
let failures = 0;
|
||||
let lastGood = null;
|
||||
while (true) {
|
||||
let summary = {}, snapshots = [], totalPkgs = 0, milestones = [], wsAll = [], ok = false;
|
||||
let nextState = lastGood
|
||||
? {...lastGood, ok: false, stale: true, error: null}
|
||||
: {summary: {}, snapshots: [], snapshotCount: 0, totalPkgs: 0, milestones: [], wsAll: [], ok: false, stale: false, error: null, sources: {}, ts: new Date()};
|
||||
try {
|
||||
const loadJson = async (name, path, options = {}) => {
|
||||
const response = await apiFetch(path, options);
|
||||
@@ -26,67 +30,71 @@ const pageState = (async function*() {
|
||||
return response.json();
|
||||
};
|
||||
|
||||
const [
|
||||
summaryData,
|
||||
snapList,
|
||||
allEvents,
|
||||
wsList,
|
||||
taskList,
|
||||
topicList,
|
||||
repoList,
|
||||
workplanIndex,
|
||||
] = await Promise.all([
|
||||
loadJson("summary", "/state/summary", {timeout: 20_000}),
|
||||
loadJson("sbom snapshots", "/sbom/snapshots/"),
|
||||
loadJson("milestones", "/progress/?event_type=milestone&limit=500"),
|
||||
loadJson("workplans", "/workplans/"),
|
||||
loadJson("tasks", "/tasks/?limit=2000"),
|
||||
loadJson("topics", "/topics/"),
|
||||
loadJson("repos", "/repos/"),
|
||||
loadJson("workplan index", "/workplans/index").catch(() => ({workplans: {}, workstreams: {}})),
|
||||
]);
|
||||
const overview = await loadJson("overview", "/state/overview", {timeout: 20_000, cache: "reload"});
|
||||
|
||||
ok = true;
|
||||
summary = summaryData;
|
||||
snapshots = snapList;
|
||||
totalPkgs = snapshots.reduce((s, sn) => s + (sn.entry_count ?? 0), 0);
|
||||
milestones = allEvents.filter(e => e.summary?.startsWith("Project registered with State Hub:"));
|
||||
const workplanMap = workplanIndex.workstreams ?? {};
|
||||
const topicMap = Object.fromEntries(topicList.map(t => [t.id, t]));
|
||||
const repoMap = Object.fromEntries(repoList.map(r => [r.id, r]));
|
||||
const counts = {};
|
||||
for (const t of taskList) {
|
||||
const wid = t.workstream_id;
|
||||
if (!counts[wid]) counts[wid] = {done: 0, progress: 0, wait: 0, todo: 0, total: 0};
|
||||
counts[wid].total++;
|
||||
if (t.status === "done") counts[wid].done++;
|
||||
else if (t.status === "progress") counts[wid].progress++;
|
||||
else if (t.status === "wait") counts[wid].wait++;
|
||||
else if (t.status === "todo") counts[wid].todo++;
|
||||
}
|
||||
wsAll = wsList.map(w => {
|
||||
const repo = repoMap[w.repo_id];
|
||||
const topic = topicMap[w.topic_id];
|
||||
const workplan = workplanMap[w.id] ?? {};
|
||||
return {
|
||||
const summaryData = {
|
||||
generated_at: overview.generated_at,
|
||||
totals: overview.totals ?? {},
|
||||
topics: overview.topics ?? [],
|
||||
blocking_decisions: overview.blocking_decisions ?? [],
|
||||
waiting_tasks: overview.waiting_tasks ?? [],
|
||||
blocked_tasks: overview.blocked_tasks ?? overview.waiting_tasks ?? [],
|
||||
recent_progress: overview.recent_progress ?? [],
|
||||
next_steps: overview.next_steps ?? [],
|
||||
contribution_counts: overview.contribution_counts ?? {},
|
||||
licence_risk_count: overview.licence_risk_count ?? 0,
|
||||
open_capability_requests: overview.open_capability_requests ?? 0,
|
||||
};
|
||||
|
||||
nextState = {
|
||||
summary: summaryData,
|
||||
snapshots: [],
|
||||
snapshotCount: overview.sbom_snapshot_count ?? 0,
|
||||
totalPkgs: overview.sbom_package_total ?? 0,
|
||||
milestones: overview.registration_milestones ?? [],
|
||||
wsAll: (overview.workplan_rows ?? []).map(w => ({
|
||||
...w,
|
||||
status: normalizeWorkstreamStatus(w.status),
|
||||
domain: repo?.domain_slug ?? topic?.domain_slug ?? "unknown",
|
||||
repo_label: repo?.slug ?? workplan.repo_slug ?? "unassigned",
|
||||
workplan_filename: workplan.filename ?? null,
|
||||
workplan_relative_path: workplan.relative_path ?? null,
|
||||
workplan_archived: workplan.archived ?? false,
|
||||
health_labels: workplan.health_labels ?? [],
|
||||
href: `./workstreams/${w.id}`,
|
||||
...(counts[w.id] ?? {done: 0, progress: 0, wait: 0, todo: 0, total: 0}),
|
||||
};
|
||||
});
|
||||
})),
|
||||
ok: true,
|
||||
stale: false,
|
||||
error: null,
|
||||
sources: overview.sources ?? {},
|
||||
ts: new Date(),
|
||||
};
|
||||
lastGood = nextState;
|
||||
} catch (e) {
|
||||
summary = {error: `Dashboard data load failed: ${e?.message ?? String(e)}`};
|
||||
const message = `Dashboard refresh failed: ${e?.message ?? String(e)}`;
|
||||
if (lastGood) {
|
||||
nextState = {
|
||||
...lastGood,
|
||||
ok: false,
|
||||
stale: true,
|
||||
error: `${message}; showing last successful data from ${lastGood.ts?.toLocaleTimeString?.() ?? "previous refresh"}`,
|
||||
summary: {
|
||||
...(lastGood.summary ?? {}),
|
||||
error: `${message}; showing last successful data from ${lastGood.ts?.toLocaleTimeString?.() ?? "previous refresh"}`,
|
||||
},
|
||||
};
|
||||
} else {
|
||||
nextState = {
|
||||
summary: {error: message},
|
||||
snapshots: [],
|
||||
snapshotCount: 0,
|
||||
totalPkgs: 0,
|
||||
milestones: [],
|
||||
wsAll: [],
|
||||
ok: false,
|
||||
stale: false,
|
||||
error: message,
|
||||
sources: {},
|
||||
ts: new Date(),
|
||||
};
|
||||
}
|
||||
}
|
||||
failures = ok ? 0 : failures + 1;
|
||||
yield {summary, snapshots, totalPkgs, milestones, wsAll, ok, ts: new Date()};
|
||||
await waitForVisible(pollDelay({ok, base: POLL_HEAVY, failures}));
|
||||
failures = nextState.ok ? 0 : failures + 1;
|
||||
yield nextState;
|
||||
await waitForVisible(pollDelay({ok: nextState.ok, base: POLL_HEAVY, failures}));
|
||||
}
|
||||
})();
|
||||
```
|
||||
@@ -94,6 +102,7 @@ const pageState = (async function*() {
|
||||
```js
|
||||
const summary = pageState.summary ?? {};
|
||||
const _ok = pageState.ok ?? false;
|
||||
const _stale = pageState.stale ?? false;
|
||||
const _ts = pageState.ts;
|
||||
const totals = summary.totals ?? {};
|
||||
const ws = totals.workstreams ?? {};
|
||||
@@ -107,7 +116,7 @@ const wsAll = pageState.wsAll ?? [];
|
||||
// Kept separate from the main poll so in-progress form inputs aren't wiped every 60 s.
|
||||
const blockingDecisions = Mutable([]);
|
||||
const refreshDecisions = async () => {
|
||||
const r = await fetch(`${API}/decisions/?decision_type=pending`).catch(() => null);
|
||||
const r = await apiFetch("/decisions/?decision_type=pending", {timeout: 12_000}).catch(() => null);
|
||||
const all = r?.ok ? await r.json() : [];
|
||||
blockingDecisions.value = all.filter(d => ["open", "escalated"].includes(d.status));
|
||||
};
|
||||
@@ -121,9 +130,11 @@ import {injectTocTop} from "./components/toc-sidebar.js";
|
||||
import {withDocHelp} from "./components/doc-overlay.js";
|
||||
|
||||
const _liveEl = html`<div class="live-indicator">
|
||||
<span style="color:${_ok ? 'var(--theme-foreground-focus)' : 'red'}">●</span>
|
||||
<span style="color:${_ok ? 'var(--theme-foreground-focus)' : _stale ? 'orange' : 'red'}">●</span>
|
||||
${_ok
|
||||
? `Live · updated ${_ts?.toLocaleTimeString()}`
|
||||
: _stale
|
||||
? `Stale · last successful update ${_ts?.toLocaleTimeString()}`
|
||||
: html`<span style="color:red">Offline — run: <code>cd ~/state-hub && make api</code></span>`}
|
||||
</div>`;
|
||||
withDocHelp(_liveEl, "/docs/live-data");
|
||||
@@ -346,6 +357,7 @@ const licenceRisk = summary.licence_risk_count ?? 0;
|
||||
const totalContribs = ["br","fr","ep","upr"].reduce((s, t) => s + (contribCounts[t] ?? 0), 0);
|
||||
const needsFollowUp = (contribCounts["submitted"] ?? 0) + (contribCounts["acknowledged"] ?? 0);
|
||||
const sbomSnaps = pageState.snapshots ?? [];
|
||||
const sbomSnapCount = pageState.snapshotCount ?? sbomSnaps.length;
|
||||
const totalPkgs = pageState.totalPkgs ?? 0;
|
||||
|
||||
display(html`<div class="grid grid-cols-3" style="gap:1rem;margin-bottom:1.5rem">
|
||||
@@ -362,7 +374,7 @@ display(html`<div class="grid grid-cols-3" style="gap:1rem;margin-bottom:1.5rem"
|
||||
<a class="card card-link ${licenceRisk > 0 ? 'warn' : ''}" href="./sbom">
|
||||
<h3>SBOM</h3>
|
||||
<p class="big-num">${totalPkgs.toLocaleString()}</p>
|
||||
<small>${sbomSnaps.length} repo${sbomSnaps.length !== 1 ? "s" : ""} tracked · ${licenceRisk > 0 ? html`<span style="color:red">${licenceRisk} copyleft risks</span>` : html`<span style="color:green">✓ no copyleft</span>`}</small>
|
||||
<small>${sbomSnapCount} snapshot${sbomSnapCount !== 1 ? "s" : ""} tracked · ${licenceRisk > 0 ? html`<span style="color:red">${licenceRisk} copyleft risks</span>` : html`<span style="color:green">✓ no copyleft</span>`}</small>
|
||||
</a>
|
||||
</div>`);
|
||||
```
|
||||
|
||||
63
scripts/smoke_dashboard_load.sh
Normal file
63
scripts/smoke_dashboard_load.sh
Normal file
@@ -0,0 +1,63 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
API_PORT="${API_PORT:-8012}"
|
||||
DASHBOARD_PORT="${DASHBOARD_PORT:-3012}"
|
||||
API_BASE="http://127.0.0.1:${API_PORT}"
|
||||
DASHBOARD_URL="http://127.0.0.1:${DASHBOARD_PORT}/?api_base=${API_BASE}"
|
||||
|
||||
API_LOG="${API_LOG:-/tmp/statehub-api-${API_PORT}.log}"
|
||||
DASHBOARD_LOG="${DASHBOARD_LOG:-/tmp/statehub-dashboard-${DASHBOARD_PORT}.log}"
|
||||
OVERVIEW_JSON="${OVERVIEW_JSON:-/tmp/statehub-overview-${API_PORT}.json}"
|
||||
OVERVIEW_HEADERS="${OVERVIEW_HEADERS:-/tmp/statehub-overview-${API_PORT}.headers}"
|
||||
DASHBOARD_HTML="${DASHBOARD_HTML:-/tmp/statehub-dashboard-${DASHBOARD_PORT}.html}"
|
||||
|
||||
rm -f "$API_LOG" "$DASHBOARD_LOG" "$OVERVIEW_JSON" "$OVERVIEW_HEADERS" "$DASHBOARD_HTML"
|
||||
|
||||
.venv/bin/python -m uvicorn api.main:app --host 127.0.0.1 --port "$API_PORT" \
|
||||
> "$API_LOG" 2>&1 &
|
||||
api_pid=$!
|
||||
|
||||
(cd dashboard && npm run dev -- --host 127.0.0.1 --port "$DASHBOARD_PORT" \
|
||||
> "$DASHBOARD_LOG" 2>&1) &
|
||||
dashboard_pid=$!
|
||||
|
||||
cleanup() {
|
||||
kill "$api_pid" "$dashboard_pid" 2>/dev/null || true
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
wait_for_url() {
|
||||
local label="$1"
|
||||
local url="$2"
|
||||
local output="$3"
|
||||
local attempts="${4:-40}"
|
||||
local i
|
||||
for i in $(seq 1 "$attempts"); do
|
||||
if curl -fsS "$url" -o "$output" >/dev/null 2>&1; then
|
||||
return 0
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
echo "$label did not become ready: $url" >&2
|
||||
return 1
|
||||
}
|
||||
|
||||
if ! wait_for_url "API overview" "${API_BASE}/state/overview" "$OVERVIEW_JSON"; then
|
||||
echo "API log:" >&2
|
||||
tail -80 "$API_LOG" >&2 || true
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! wait_for_url "Dashboard" "$DASHBOARD_URL" "$DASHBOARD_HTML"; then
|
||||
echo "Dashboard log:" >&2
|
||||
tail -80 "$DASHBOARD_LOG" >&2 || true
|
||||
exit 1
|
||||
fi
|
||||
|
||||
curl -sS -D "$OVERVIEW_HEADERS" -o "$OVERVIEW_JSON" \
|
||||
-w "overview %{http_code} %{time_total} %{size_download}\n" \
|
||||
"${API_BASE}/state/overview"
|
||||
printf "dashboard 200 %s\n" "$DASHBOARD_URL"
|
||||
wc -c "$OVERVIEW_JSON"
|
||||
grep -i "x-statehub" "$OVERVIEW_HEADERS" || true
|
||||
@@ -56,8 +56,12 @@ def _truncate(_schema):
|
||||
# Reset in-process TTL caches so stale data from a previous test can't bleed through.
|
||||
_state_router._SUMMARY_CACHE = None
|
||||
_state_router._SUMMARY_CACHE_AT = 0.0
|
||||
_state_router._OVERVIEW_CACHE = None
|
||||
_state_router._OVERVIEW_CACHE_AT = 0.0
|
||||
_ws_router._INDEX_CACHE = None
|
||||
_ws_router._INDEX_CACHE_AT = 0.0
|
||||
_ws_router._INDEX_REFRESH_TASK = None
|
||||
_ws_router._INDEX_LAST_ERROR = None
|
||||
|
||||
yield
|
||||
engine = sqlalchemy.create_engine(_SYNC_URL)
|
||||
|
||||
@@ -226,6 +226,34 @@ class TestTasks:
|
||||
assert "High prio" in titles
|
||||
assert "Low prio" not in titles
|
||||
|
||||
async def test_list_pagination_and_counts(self, client):
|
||||
await _create_domain(client)
|
||||
topic = await _create_topic(client)
|
||||
ws = await _create_workstream(client, topic["id"])
|
||||
first = await _create_task(client, ws["id"], title="First")
|
||||
second = await _create_task(client, ws["id"], title="Second")
|
||||
third = await _create_task(client, ws["id"], title="Third")
|
||||
await client.patch(f"/tasks/{second['id']}", json={"status": "progress"})
|
||||
await client.patch(f"/tasks/{third['id']}", json={"status": "wait", "blocking_reason": "blocked"})
|
||||
|
||||
r = await client.get("/tasks/?limit=2")
|
||||
assert r.status_code == 200
|
||||
body = r.json()
|
||||
assert len(body) == 2
|
||||
assert body[0]["id"] == first["id"]
|
||||
assert body[1]["id"] == second["id"]
|
||||
|
||||
r = await client.get("/tasks/?limit=1&offset=2")
|
||||
assert r.status_code == 200
|
||||
assert [task["id"] for task in r.json()] == [third["id"]]
|
||||
|
||||
r = await client.get(f"/tasks/counts?workstream_id={ws['id']}")
|
||||
assert r.status_code == 200
|
||||
counts = {(row["workstream_id"], row["status"]): row["count"] for row in r.json()}
|
||||
assert counts[(ws["id"], "todo")] == 1
|
||||
assert counts[(ws["id"], "progress")] == 1
|
||||
assert counts[(ws["id"], "wait")] == 1
|
||||
|
||||
@pytest.mark.parametrize("initial_status", ["proposed", "ready", "backlog"])
|
||||
async def test_task_start_activates_planning_workstream(self, client, initial_status):
|
||||
await _create_domain(client)
|
||||
@@ -358,6 +386,34 @@ class TestStateSummary:
|
||||
assert summaries[blocked_ws["id"]]["blocked_reasons"][0]["id"] == "dependencies.all_complete"
|
||||
assert body["totals"]["workstreams"]["blocked"] == 1
|
||||
|
||||
async def test_overview_returns_chart_ready_rows(self, client):
|
||||
await _create_domain(client)
|
||||
topic = await _create_topic(client)
|
||||
repo = await _create_repo(client)
|
||||
ws = await _create_workstream(client, topic["id"], repo_id=repo["id"])
|
||||
first = await _create_task(client, ws["id"], title="Todo")
|
||||
second = await _create_task(client, ws["id"], title="Done")
|
||||
await client.patch(f"/tasks/{second['id']}", json={"status": "done", "suppress_token_event": True})
|
||||
|
||||
r = await client.get("/state/overview")
|
||||
assert r.status_code == 200
|
||||
assert r.headers["x-statehub-cache"] == "miss"
|
||||
body = r.json()
|
||||
|
||||
rows = {row["id"]: row for row in body["workplan_rows"]}
|
||||
assert ws["id"] in rows
|
||||
assert rows[ws["id"]]["repo_label"] == "test-repo"
|
||||
assert rows[ws["id"]]["domain"] == "testdomain"
|
||||
assert rows[ws["id"]]["todo"] == 1
|
||||
assert rows[ws["id"]]["done"] == 1
|
||||
assert rows[ws["id"]]["total"] == 2
|
||||
assert body["totals"]["tasks"]["total"] == 2
|
||||
assert body["diagnostics"]["task_count_strategy"] == "grouped"
|
||||
|
||||
r = await client.get("/state/overview")
|
||||
assert r.status_code == 200
|
||||
assert r.headers["x-statehub-cache"] == "hit"
|
||||
|
||||
|
||||
class TestFlowEndpoints:
|
||||
async def test_list_flow_definitions(self, client):
|
||||
|
||||
276
workplans/STATE-WP-0056-dashboard-loading-robustness.md
Normal file
276
workplans/STATE-WP-0056-dashboard-loading-robustness.md
Normal file
@@ -0,0 +1,276 @@
|
||||
---
|
||||
id: STATE-WP-0056
|
||||
type: workplan
|
||||
title: "Dashboard Loading Robustness and Efficiency"
|
||||
domain: custodian
|
||||
repo: state-hub
|
||||
status: finished
|
||||
owner: codex
|
||||
topic_slug: custodian
|
||||
created: "2026-06-05"
|
||||
updated: "2026-06-05"
|
||||
state_hub_workstream_id: "28f9569c-937b-4b79-b46c-f6b1f83c09c3"
|
||||
---
|
||||
|
||||
# Dashboard Loading Robustness and Efficiency
|
||||
|
||||
## Summary
|
||||
|
||||
Make the State Hub dashboard overview page faster and more resilient under
|
||||
normal polling. The current overview performs a broad concurrent fan-out of
|
||||
full-list API calls and treats most request failures as whole-page failures.
|
||||
This can surface frequent `Dashboard data load failed: The operation was
|
||||
aborted.` warnings when one call crosses the frontend timeout, even if the API
|
||||
eventually returns successfully.
|
||||
|
||||
This work should reduce request count, payload size, and backend contention;
|
||||
preserve useful last-known data during partial failures; and give operators
|
||||
clearer diagnostics when a section is stale or unavailable.
|
||||
|
||||
## Current Findings
|
||||
|
||||
Inspection on 2026-06-05 found:
|
||||
|
||||
- `dashboard/src/index.md` loads overview data with one eight-request
|
||||
`Promise.all` batch.
|
||||
- `dashboard/src/components/config.js` aborts most `apiFetch` calls after
|
||||
`12_000` ms.
|
||||
- A dashboard-style concurrent timing run produced several calls at or above the
|
||||
default timeout: `/sbom/snapshots/`, `/repos/`, and `/workplans/index`.
|
||||
- The same endpoints can be much faster when called alone, which points to
|
||||
contention and over-fetching rather than one permanently slow endpoint.
|
||||
- The overview calls `/tasks/?limit=2000`, but the tasks API currently ignores
|
||||
`limit` and returns every task. In the observed run that response was roughly
|
||||
2.1 MB just to compute per-workplan task counts.
|
||||
- `/state/summary` has a short in-process cache, but a cache miss still runs a
|
||||
large amount of sequential database and Python-side aggregation work.
|
||||
- `/workplans/index` scans active repository workplan files and parses
|
||||
frontmatter. It is cached, but concurrent dashboard loads can still wait on
|
||||
the same expensive rebuild pattern.
|
||||
- Several API routes set cache headers, but the shared dashboard fetch helper
|
||||
forces `cache: "no-store"` for every request.
|
||||
|
||||
## Out of Scope
|
||||
|
||||
- Replacing Observable Framework.
|
||||
- Redesigning the dashboard information architecture.
|
||||
- Adding authentication, authorization, or multi-user session handling.
|
||||
- Changing workplan file conventions.
|
||||
- Moving State Hub to a different database or deployment substrate.
|
||||
|
||||
## T01 — Add Focused Dashboard Load Instrumentation
|
||||
|
||||
```task
|
||||
id: STATE-WP-0056-T01
|
||||
status: done
|
||||
priority: high
|
||||
state_hub_task_id: "e5208053-0db1-4842-a221-c5289422677a"
|
||||
```
|
||||
|
||||
Add enough timing and error visibility to confirm which overview calls are slow,
|
||||
aborted, or oversized during normal use.
|
||||
|
||||
Implementation notes:
|
||||
|
||||
- Add lightweight server-side timing logs or response headers for overview-hot
|
||||
endpoints: `/state/summary`, `/workplans/`, `/tasks/`, `/topics/`, `/repos/`,
|
||||
`/sbom/snapshots/`, `/progress/`, and `/workplans/index`.
|
||||
- Include request path, status, elapsed time, response size when practical, and
|
||||
whether a cached result was used.
|
||||
- Keep instrumentation local and low-noise; avoid logging full payloads or
|
||||
secrets.
|
||||
- Add a small dashboard diagnostic surface or console logging that distinguishes
|
||||
timeout aborts from HTTP errors and network failures.
|
||||
- Capture before/after timing notes in this workplan or a progress event.
|
||||
|
||||
Done when a normal dashboard refresh can be diagnosed without manually timing
|
||||
each endpoint from a shell.
|
||||
|
||||
## T02 — Make Overview Polling Partially Resilient
|
||||
|
||||
```task
|
||||
id: STATE-WP-0056-T02
|
||||
status: done
|
||||
priority: high
|
||||
state_hub_task_id: "2cdd960d-ba86-48d1-a7c6-e83671cd0e69"
|
||||
```
|
||||
|
||||
Change the overview data loader so one slow or failed secondary request does
|
||||
not mark the whole dashboard as failed.
|
||||
|
||||
Implementation notes:
|
||||
|
||||
- Replace fail-fast `Promise.all` behavior in `dashboard/src/index.md` with a
|
||||
per-resource result model, for example `Promise.allSettled`.
|
||||
- Keep last-known-good data for each section while a refresh is degraded.
|
||||
- Treat optional resources such as SBOM snapshots, registration milestones, and
|
||||
workplan file metadata independently from core summary/workplan status data.
|
||||
- Display section-level stale/error indicators instead of one global warning
|
||||
whenever possible.
|
||||
- Keep exponential backoff for repeated failures, but do not discard usable
|
||||
data just because one request timed out.
|
||||
- Make abort errors user-readable, for example "timed out after 12s" instead of
|
||||
only "The operation was aborted."
|
||||
|
||||
Done when an SBOM, repo-list, or workplan-index timeout leaves the rest of the
|
||||
overview usable and visibly stale rather than failed.
|
||||
|
||||
## T03 — Respect Pagination and Add Task Count Aggregates
|
||||
|
||||
```task
|
||||
id: STATE-WP-0056-T03
|
||||
status: done
|
||||
priority: high
|
||||
state_hub_task_id: "78484226-9ccc-460c-a2b3-750b3204caa3"
|
||||
```
|
||||
|
||||
Stop returning all tasks for overview count calculations.
|
||||
|
||||
Implementation notes:
|
||||
|
||||
- Add `limit` and `offset` support to `GET /tasks/`, preserving existing filter
|
||||
behavior and sensible limits.
|
||||
- Add a lightweight aggregate endpoint for task counts by workplan and status,
|
||||
for example `GET /tasks/counts?group_by=workstream,status`, or add an
|
||||
overview-specific aggregate route.
|
||||
- Prefer SQL `GROUP BY` over transferring every task to the browser.
|
||||
- Update `dashboard/src/index.md`, `dashboard/src/tasks.md`,
|
||||
`dashboard/src/interventions.md`, and workplan detail pages as needed so list
|
||||
views still receive the rows they need.
|
||||
- Add tests for pagination compatibility and aggregate counts.
|
||||
|
||||
Done when the overview no longer fetches the full task table to draw the
|
||||
workplan chart.
|
||||
|
||||
## T04 — Build a Lightweight Overview Read Endpoint
|
||||
|
||||
```task
|
||||
id: STATE-WP-0056-T04
|
||||
status: done
|
||||
priority: high
|
||||
state_hub_task_id: "2cf47a12-e8aa-49ca-963c-1f0d2933c344"
|
||||
```
|
||||
|
||||
Create a dashboard-specific read model that returns exactly the data needed by
|
||||
the overview page in one bounded response.
|
||||
|
||||
Implementation notes:
|
||||
|
||||
- Add an endpoint such as `GET /state/overview` or
|
||||
`GET /state/dashboard-overview`.
|
||||
- Include summary totals, recent progress needed by the page, blocking decision
|
||||
counts, waiting-task counts, SBOM snapshot totals, registration milestones,
|
||||
and workplan chart rows with repo/domain labels and task counts.
|
||||
- Keep response fields stable and documented in dashboard reference docs.
|
||||
- Reuse existing summary helpers where they are efficient, but avoid serializing
|
||||
large full-list payloads that the overview does not display directly.
|
||||
- Add cache headers and a short in-process cache with explicit invalidation
|
||||
rules where appropriate.
|
||||
- Update `dashboard/src/index.md` to prefer this endpoint and remove redundant
|
||||
overview-only fetches.
|
||||
|
||||
Done when the overview's steady-state refresh is one bounded API call plus only
|
||||
truly interactive secondary calls.
|
||||
|
||||
## T05 — Add Stale-While-Refresh for File-Backed Workplan Index
|
||||
|
||||
```task
|
||||
id: STATE-WP-0056-T05
|
||||
status: done
|
||||
priority: medium
|
||||
state_hub_task_id: "0c88c1a2-588b-41f8-bc1c-f94c8b4b0d1a"
|
||||
```
|
||||
|
||||
Make `/workplans/index` resilient when repository filesystem scans are slow.
|
||||
|
||||
Implementation notes:
|
||||
|
||||
- Add singleflight behavior so concurrent requests share one in-progress
|
||||
rebuild instead of starting or waiting on redundant scans.
|
||||
- Return stale cached data quickly while a background refresh runs when the
|
||||
cache is expired but still available.
|
||||
- Include metadata such as `generated_at`, `stale`, `cache_age_seconds`, and
|
||||
optionally `refresh_in_progress`.
|
||||
- Consider reading only frontmatter rather than whole markdown files if this
|
||||
can be done cleanly.
|
||||
- Keep `refresh=true` as an explicit operator escape hatch.
|
||||
- Add tests for cache hit, stale return, and forced refresh behavior.
|
||||
|
||||
Done when a slow filesystem scan cannot block normal dashboard refreshes for
|
||||
longer than the frontend timeout if cached data exists.
|
||||
|
||||
## T06 — Use Browser and HTTP Caching Selectively
|
||||
|
||||
```task
|
||||
id: STATE-WP-0056-T06
|
||||
status: done
|
||||
priority: medium
|
||||
state_hub_task_id: "811f02ff-2e92-4c82-8b8a-e3d39a450b02"
|
||||
```
|
||||
|
||||
Let stable lookup requests benefit from cache headers instead of forcing every
|
||||
dashboard request to bypass caches.
|
||||
|
||||
Implementation notes:
|
||||
|
||||
- Extend `apiFetch` so callers can choose cache mode.
|
||||
- Keep `no-store` for volatile mutation-sensitive resources.
|
||||
- Use default browser caching or `reload` only where route cache headers are
|
||||
already intentional, such as repo/topic lookup data.
|
||||
- Review current route cache headers and align them with dashboard polling
|
||||
needs.
|
||||
- Avoid stale cached data for controls that immediately follow a mutation.
|
||||
|
||||
Done when stable overview lookup data no longer bypasses useful cache headers
|
||||
by default.
|
||||
|
||||
## T07 — Optimize `/state/summary` Cache Misses
|
||||
|
||||
```task
|
||||
id: STATE-WP-0056-T07
|
||||
status: done
|
||||
priority: medium
|
||||
state_hub_task_id: "633f4cc6-ffeb-4086-9858-d239f50a9686"
|
||||
```
|
||||
|
||||
Reduce the cost of a cold or expired `/state/summary` request.
|
||||
|
||||
Implementation notes:
|
||||
|
||||
- Profile the current sequential query groups in `api/routers/state.py`.
|
||||
- Move Python-side counts and scans into SQL where straightforward.
|
||||
- Remove unused work from the summary path, such as dead intermediate query
|
||||
results.
|
||||
- Cache derived sections independently when their freshness requirements differ.
|
||||
- Add indexes only after profiling shows a query plan needs them.
|
||||
- Keep summary response compatibility for existing consumers and MCP smoke
|
||||
tests.
|
||||
|
||||
Done when a summary cache miss stays comfortably below the frontend timeout
|
||||
under the current local data volume.
|
||||
|
||||
## T08 — Verify Under Dashboard-Style Load
|
||||
|
||||
```task
|
||||
id: STATE-WP-0056-T08
|
||||
status: done
|
||||
priority: high
|
||||
state_hub_task_id: "353fb25a-5306-416b-8d6d-9b201e6fac87"
|
||||
```
|
||||
|
||||
Prove the dashboard no longer produces frequent abort warnings under realistic
|
||||
refresh behavior.
|
||||
|
||||
Implementation notes:
|
||||
|
||||
- Add or document a repeatable script that performs dashboard-style concurrent
|
||||
endpoint timing before and after the changes.
|
||||
- Run API tests and dashboard component tests.
|
||||
- Open the dashboard locally and verify that initial load, refresh, hidden-tab
|
||||
pause/resume, and partial API failure states behave correctly.
|
||||
- Confirm payload sizes are lower than the baseline for the overview page.
|
||||
- Update `dashboard/src/docs/overview.md` and `dashboard/src/docs/live-data.md`
|
||||
with the new data-loading model.
|
||||
|
||||
Done when repeated dashboard refreshes do not show the global aborted-operation
|
||||
warning during normal local operation, and degraded sections recover cleanly.
|
||||
Reference in New Issue
Block a user