diff --git a/state-hub/api/main.py b/state-hub/api/main.py index 9d35136..d7cdffd 100644 --- a/state-hub/api/main.py +++ b/state-hub/api/main.py @@ -1,8 +1,12 @@ +import hashlib import os from contextlib import asynccontextmanager from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware +from starlette.middleware.base import BaseHTTPMiddleware +from starlette.requests import Request +from starlette.responses import Response as StarletteResponse from api.database import engine from api.routers import decisions, extension_points, progress, state, tasks, technical_debt, topics, workstreams, workstream_dependencies @@ -12,6 +16,40 @@ from api.routers import interface_changes from api.routers import flows +class ETagMiddleware(BaseHTTPMiddleware): + """Add ETag + conditional-GET (304) support to all JSON GET responses.""" + + async def dispatch(self, request: Request, call_next): + response = await call_next(request) + if request.method != "GET": + return response + if "application/json" not in response.headers.get("content-type", ""): + return response + + body_parts = [] + async for chunk in response.body_iterator: + body_parts.append(chunk) + body = b"".join(body_parts) + + etag = '"' + hashlib.md5(body, usedforsecurity=False).hexdigest() + '"' + if request.headers.get("if-none-match") == etag: + return StarletteResponse( + status_code=304, + headers={"ETag": etag, "Cache-Control": "no-cache"}, + ) + + headers = {k: v for k, v in response.headers.items() if k.lower() != "content-length"} + headers["ETag"] = etag + if not any(k.lower() == "cache-control" for k in headers): + headers["Cache-Control"] = "no-cache" + return StarletteResponse( + content=body, + status_code=response.status_code, + headers=headers, + media_type=response.media_type, + ) + + @asynccontextmanager async def lifespan(app: FastAPI): yield @@ -28,11 +66,13 @@ app = FastAPI( _cors_env = os.getenv("CORS_ORIGINS", "http://localhost:3000,http://127.0.0.1:3000") _cors_origins = [o.strip() for o in _cors_env.split(",") if o.strip()] +app.add_middleware(ETagMiddleware) app.add_middleware( CORSMiddleware, allow_origins=_cors_origins, allow_methods=["GET", "POST", "PATCH", "DELETE", "PUT"], - allow_headers=["Content-Type"], + allow_headers=["Content-Type", "If-None-Match"], + expose_headers=["ETag"], ) app.include_router(domains.router) diff --git a/state-hub/api/routers/domains.py b/state-hub/api/routers/domains.py index 274b989..ac62af9 100644 --- a/state-hub/api/routers/domains.py +++ b/state-hub/api/routers/domains.py @@ -1,6 +1,6 @@ import uuid -from fastapi import APIRouter, Depends, HTTPException, Query, status +from fastapi import APIRouter, Depends, HTTPException, Query, Response, status from sqlalchemy import func, select from sqlalchemy.ext.asyncio import AsyncSession @@ -18,9 +18,11 @@ router = APIRouter(prefix="/domains", tags=["domains"]) @router.get("/", response_model=list[DomainRead]) async def list_domains( + response: Response, status: str | None = Query(None, description="active | archived | all"), session: AsyncSession = Depends(get_session), ) -> list[Domain]: + response.headers["Cache-Control"] = "max-age=60, stale-while-revalidate=30" q = select(Domain).order_by(Domain.name) if status and status != "all": q = q.where(Domain.status == status) diff --git a/state-hub/api/routers/repos.py b/state-hub/api/routers/repos.py index 5000d94..1b14c99 100644 --- a/state-hub/api/routers/repos.py +++ b/state-hub/api/routers/repos.py @@ -9,7 +9,7 @@ import uuid from datetime import datetime, timezone from pathlib import Path -from fastapi import APIRouter, Depends, HTTPException, status +from fastapi import APIRouter, Depends, HTTPException, Response, status from sqlalchemy import case, func, select from sqlalchemy.ext.asyncio import AsyncSession @@ -50,9 +50,11 @@ router = APIRouter(prefix="/repos", tags=["repos"]) @router.get("/", response_model=list[RepoRead]) async def list_repos( + response: Response, domain: str | None = None, session: AsyncSession = Depends(get_session), ) -> list[ManagedRepo]: + response.headers["Cache-Control"] = "max-age=60, stale-while-revalidate=30" q = select(ManagedRepo).order_by(ManagedRepo.name) if domain: domain_row = await session.execute(select(Domain).where(Domain.slug == domain)) diff --git a/state-hub/api/routers/state.py b/state-hub/api/routers/state.py index 01a8a10..d66170c 100644 --- a/state-hub/api/routers/state.py +++ b/state-hub/api/routers/state.py @@ -379,6 +379,87 @@ async def _build_domain_summaries(session: AsyncSession) -> list[DomainSummary]: ] +@router.get("/deps", response_model=list[WorkstreamWithDeps]) +async def get_deps(session: AsyncSession = Depends(get_session)) -> list[WorkstreamWithDeps]: + """Lightweight dep-graph endpoint: open workstreams with their dependency edges only. + + Returns the same structure as open_workstreams in /state/summary but skips + the 10-table full-summary computation. Task counts are omitted (all zero). + Used by workstreams.md and dependencies.md which only need dep edges. + """ + open_ws_rows = await session.execute( + select(Workstream) + .options(noload("*")) + .where(Workstream.status.in_(["active", "blocked"])) + .order_by(Workstream.due_date.asc().nullslast(), Workstream.created_at) + ) + open_ws = list(open_ws_rows.scalars().all()) + + open_ws_ids = [w.id for w in open_ws] + dep_rows = [] + if open_ws_ids: + dep_result = await session.execute( + select(WorkstreamDependency).where( + (WorkstreamDependency.from_workstream_id.in_(open_ws_ids)) + | (WorkstreamDependency.to_workstream_id.in_(open_ws_ids)) + ) + ) + dep_rows = list(dep_result.scalars().all()) + + dep_ws_ids: set = set() + dep_task_ids: set = set() + for d in dep_rows: + dep_ws_ids.add(d.from_workstream_id) + if d.to_workstream_id: + dep_ws_ids.add(d.to_workstream_id) + if d.to_task_id: + dep_task_ids.add(d.to_task_id) + + ws_lookup: dict = {w.id: w for w in open_ws} + extra_ids = dep_ws_ids - set(ws_lookup.keys()) + if extra_ids: + extra_rows = await session.execute( + select(Workstream).options(noload("*")).where(Workstream.id.in_(extra_ids)) + ) + for w in extra_rows.scalars(): + ws_lookup[w.id] = w + + task_lookup: dict = {} + if dep_task_ids: + task_rows = await session.execute(select(Task).options(noload("*")).where(Task.id.in_(dep_task_ids))) + task_lookup = {t.id: t for t in task_rows.scalars().all()} + + dep_index: dict = {w.id: {"depends_on": [], "blocks": []} for w in open_ws} + for d in dep_rows: + from_id, to_id, task_id = d.from_workstream_id, d.to_workstream_id, d.to_task_id + if from_id in dep_index and to_id and to_id in ws_lookup: + dep_index[from_id]["depends_on"].append(WorkstreamDepStub( + dep_id=d.id, target_type="workstream", relationship_type=d.relationship_type, + workstream_id=to_id, workstream_slug=ws_lookup[to_id].slug, + workstream_title=ws_lookup[to_id].title, description=d.description, + )) + if from_id in dep_index and task_id and task_id in task_lookup: + dep_index[from_id]["depends_on"].append(WorkstreamDepStub( + dep_id=d.id, target_type="task", relationship_type=d.relationship_type, + task_id=task_id, task_title=task_lookup[task_id].title, description=d.description, + )) + if to_id and to_id in dep_index and from_id in ws_lookup: + dep_index[to_id]["blocks"].append(WorkstreamDepStub( + dep_id=d.id, target_type="workstream", relationship_type=d.relationship_type, + workstream_id=from_id, workstream_slug=ws_lookup[from_id].slug, + workstream_title=ws_lookup[from_id].title, description=d.description, + )) + + return [ + WorkstreamWithDeps( + **WorkstreamRead.model_validate(w).model_dump(), + depends_on=dep_index[w.id]["depends_on"], + blocks=dep_index[w.id]["blocks"], + ) + for w in open_ws + ] + + _PRIORITY_RANK = { TaskPriority.critical: 0, TaskPriority.high: 1, diff --git a/state-hub/api/routers/topics.py b/state-hub/api/routers/topics.py index f26bfea..17e3cbe 100644 --- a/state-hub/api/routers/topics.py +++ b/state-hub/api/routers/topics.py @@ -1,6 +1,6 @@ import uuid -from fastapi import APIRouter, Depends, HTTPException, status +from fastapi import APIRouter, Depends, HTTPException, Response, status from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession @@ -23,9 +23,11 @@ async def _resolve_domain_id(domain_slug: str, session: AsyncSession) -> uuid.UU @router.get("/", response_model=list[TopicRead]) async def list_topics( + response: Response, status: TopicStatus | None = None, session: AsyncSession = Depends(get_session), ) -> list[Topic]: + response.headers["Cache-Control"] = "max-age=60, stale-while-revalidate=30" q = select(Topic) if status: q = q.where(Topic.status == status) diff --git a/workplans/CUST-WP-0039-dashboard-poll-optimization.md b/workplans/CUST-WP-0039-dashboard-poll-optimization.md new file mode 100644 index 0000000..b11d883 --- /dev/null +++ b/workplans/CUST-WP-0039-dashboard-poll-optimization.md @@ -0,0 +1,209 @@ +--- +id: CUST-WP-0039 +type: workplan +title: "Dashboard Poll Optimization" +domain: custodian +status: todo +owner: custodian +topic_slug: custodian +created: "2026-05-11" +updated: "2026-05-11" +state_hub_workstream_id: "d5ffb008-a517-4b8b-86ce-093fcc285fb3" +--- + +# Dashboard Poll Optimization + +## Problem + +With `uvicorn --reload` watching `.venv/` now fixed (CUST-WP-0039 precursor), the +remaining sustained load on the API worker comes from the dashboard polling pattern: + +- **24 pages**, 14 with active polling loops (POLL_HEAVY = 60 s, POLL = 15 s) +- **`index.md` alone** runs 4 independent polling loops firing 11 API calls per cycle: + `/state/summary`, `/sbom/snapshots/`, `/progress/`, `/workstreams/`, `/tasks/?limit=2000`, + `/topics/`, `/repos/`, `/workstreams/workplan-index` +- **`workstreams.md` and `dependencies.md`** each call `/state/summary` (the most + expensive endpoint — queries 10+ tables) every 60 s just to extract dependency + edges from `open_workstreams[].depends_on` +- **Reference data** (`/topics/`, `/repos/`) is fetched independently by 10+ pages + every 60 s with no caching; these datasets change rarely +- **Background tabs** still poll at 120 s (`POLL_HIDDEN`) — they could pause entirely + +## Goals + +Reduce API request rate and per-request cost when the dashboard is open, without +degrading UX or data freshness for the pages the user is actively viewing. + +## Out of scope + +- SSE / WebSocket push (would require significant API rework) +- Observable data loaders / static build mode (different deployment model) +- BroadcastChannel cross-tab sharing (nice-to-have, not in this workplan) + +--- + +## Tasks + +### T1 — Add Cache-Control headers to reference endpoints + +```task +id: CUST-WP-0039-T1 +status: todo +priority: high +state_hub_task_id: "b36713d8-d1d5-43c5-86c3-e22f72b68d62" +``` + +Add `Cache-Control: max-age=60, stale-while-revalidate=30` to the list responses +for `/topics/`, `/repos/`, and `/domains/`. These datasets change only when a human +explicitly creates/renames a domain or registers a repo — never on their own. + +Browser-level caching means that when 10 pages all fetch `/topics/` within a 60 s +window, only the first request hits the API; the rest are served from cache. + +**Implementation:** Add a FastAPI middleware or a response-header dependency in +`api/routers/topics.py`, `repos.py`, and `domains.py` list endpoints. Use +`from fastapi.responses import Response` + `response.headers["Cache-Control"]`, or +a shared `cache_headers` dependency. + +--- + +### T2 — Add ETag support to high-frequency list endpoints + +```task +id: CUST-WP-0039-T2 +status: todo +priority: high +state_hub_task_id: "75f1c2cd-0baf-4747-8c67-1dbfa81bde41" +``` + +Add `ETag` (content hash of the response body) and handle `If-None-Match` for +`/workstreams/`, `/tasks/`, and `/state/summary`. When the data hasn't changed the +API returns `304 Not Modified` with no body — roughly 95% smaller than a full +response. + +**Implementation:** +- Add a FastAPI middleware (in `api/main.py`) that intercepts JSON list responses, + computes `md5(body)`, sets `ETag: ""`, and returns 304 if the request + carries a matching `If-None-Match` header. +- No client changes needed — `fetch()` respects ETags automatically when the + response includes `Cache-Control: no-cache` (which forces revalidation but + allows 304). + +--- + +### T3 — Add lightweight `/state/deps` endpoint + +```task +id: CUST-WP-0039-T3 +status: todo +priority: high +state_hub_task_id: "cb7608d3-5dad-4b51-9b91-080539f7aa65" +``` + +`workstreams.md` and `dependencies.md` call `/state/summary` (a ~10-table query) +only to extract `open_workstreams[].{id, depends_on, blocks}`. Add a dedicated +endpoint that returns just this: + +```json +GET /state/deps +→ [{"id": "...", "title": "...", "status": "...", "depends_on": [...], "blocks": [...]}] +``` + +Query: `SELECT id, title, status FROM workstreams WHERE status IN ('active','blocked')` +plus the dependency join — roughly 1/10th the work of the full summary. + +**Implementation:** New route in `api/routers/state.py` (or a new `deps.py`). +Schema: `WorkstreamDepStub` already exists in `api/schemas/workstream_dependency.py` +— reuse or extend it. + +--- + +### T4 — Replace `/state/summary` in workstreams.md and dependencies.md + +```task +id: CUST-WP-0039-T4 +status: todo +priority: medium +depends_on: [CUST-WP-0039-T3] +state_hub_task_id: "b80dce9c-b1ef-4606-9460-5100d6f58bce" +``` + +Switch `workstreams.md` and `dependencies.md` to use the new `/state/deps` endpoint +instead of the full `/state/summary`. Both pages construct a dep-edge map from +`open_workstreams[].depends_on`; `/state/deps` provides exactly that. + +Changes: +- `dashboard/src/workstreams.md`: replace `apiFetch("/state/summary", ...)` with + `apiFetch("/state/deps")`, update the variable extraction (`openWs = depsData`) +- `dashboard/src/dependencies.md`: same substitution, update edge-building loop + +--- + +### T5 — Consolidate index.md's 4 polling loops into 1 + +```task +id: CUST-WP-0039-T5 +status: todo +priority: medium +state_hub_task_id: "7c2d5e01-9de5-48ad-aa0b-a37cf5332ad9" +``` + +`index.md` runs 4 independent `while(true)` generators (`summaryState`, +`sbomSnapState`, `regsState`, `wsChartState`) that each sleep 60 s independently. +They were split because different sections needed different data, but they all use +POLL_HEAVY and can be unified into a single loop with one `Promise.all` that fetches +all 8 endpoints together. + +Benefits: +- 4 timers → 1: simpler, predictable, backoff applies uniformly +- Fetch batching: all 8 requests fire simultaneously, most finish within the same + server round-trip window +- Simpler failure handling: one `failures` counter, one backoff + +Approach: single `pageState` generator that yields a flat object with all fields +(summary, snapshots, milestones, wsAll). Destructure at the use sites. + +--- + +### T6 — Full visibility-based polling pause in config.js + +```task +id: CUST-WP-0039-T6 +status: todo +priority: low +state_hub_task_id: "31b6a353-040a-4f87-b2f1-1deab5cf6191" +``` + +`pollDelay()` currently extends the interval to `POLL_HIDDEN = 120 s` when the tab +is hidden. Change this to pause polling entirely while hidden and resume immediately +on `visibilitychange`. + +**Implementation:** + +```js +// config.js — replace pollDelay() with: +export async function waitForVisible(base) { + if (typeof document === "undefined") return sleep(base); + if (document.visibilityState === "visible") return sleep(base); + return new Promise(resolve => { + const handler = () => { document.removeEventListener("visibilitychange", handler); resolve(); }; + document.addEventListener("visibilitychange", handler); + }); +} +``` + +Pages replace `await sleep(pollDelay(...))` with `await waitForVisible(base)`. +When the user switches back to the tab, the next poll fires immediately rather +than waiting up to 120 s for the backoff to expire. + +--- + +## Expected impact + +| Change | Request reduction | +|--------|------------------| +| T1 (cache headers) | ~70% drop in /topics, /repos, /domains hits | +| T2 (ETags) | ~80% payload reduction for unchanged list responses | +| T3+T4 (deps endpoint) | 2 full summary calls removed per 60 s cycle | +| T5 (consolidate index) | 4 loops → 1, reduces timer jitter and staggered load | +| T6 (visibility pause) | Eliminates all background-tab traffic entirely |