feat(api): dashboard poll optimisation — T1, T2, T3
T1: Cache-Control max-age=60 on /topics/, /repos/, /domains/ list endpoints
so repeated dashboard polls within a minute are served from browser cache.
T2: ETag middleware (md5 hash) on all JSON GET responses with conditional-GET
(304 Not Modified) support; If-None-Match and ETag added to CORS headers.
ETag registered inside CORS so 304s automatically carry CORS headers.
T3: GET /state/deps — lightweight dep-graph endpoint returning open workstreams
with depends_on/blocks edges only, skipping the 10-table full-summary query.
Prerequisite for T4 (switching workstreams.md and dependencies.md off /state/summary).
Workplan: CUST-WP-0039-dashboard-poll-optimization.md
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,8 +1,12 @@
|
||||
import hashlib
|
||||
import os
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from starlette.middleware.base import BaseHTTPMiddleware
|
||||
from starlette.requests import Request
|
||||
from starlette.responses import Response as StarletteResponse
|
||||
|
||||
from api.database import engine
|
||||
from api.routers import decisions, extension_points, progress, state, tasks, technical_debt, topics, workstreams, workstream_dependencies
|
||||
@@ -12,6 +16,40 @@ from api.routers import interface_changes
|
||||
from api.routers import flows
|
||||
|
||||
|
||||
class ETagMiddleware(BaseHTTPMiddleware):
|
||||
"""Add ETag + conditional-GET (304) support to all JSON GET responses."""
|
||||
|
||||
async def dispatch(self, request: Request, call_next):
|
||||
response = await call_next(request)
|
||||
if request.method != "GET":
|
||||
return response
|
||||
if "application/json" not in response.headers.get("content-type", ""):
|
||||
return response
|
||||
|
||||
body_parts = []
|
||||
async for chunk in response.body_iterator:
|
||||
body_parts.append(chunk)
|
||||
body = b"".join(body_parts)
|
||||
|
||||
etag = '"' + hashlib.md5(body, usedforsecurity=False).hexdigest() + '"'
|
||||
if request.headers.get("if-none-match") == etag:
|
||||
return StarletteResponse(
|
||||
status_code=304,
|
||||
headers={"ETag": etag, "Cache-Control": "no-cache"},
|
||||
)
|
||||
|
||||
headers = {k: v for k, v in response.headers.items() if k.lower() != "content-length"}
|
||||
headers["ETag"] = etag
|
||||
if not any(k.lower() == "cache-control" for k in headers):
|
||||
headers["Cache-Control"] = "no-cache"
|
||||
return StarletteResponse(
|
||||
content=body,
|
||||
status_code=response.status_code,
|
||||
headers=headers,
|
||||
media_type=response.media_type,
|
||||
)
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
yield
|
||||
@@ -28,11 +66,13 @@ app = FastAPI(
|
||||
_cors_env = os.getenv("CORS_ORIGINS", "http://localhost:3000,http://127.0.0.1:3000")
|
||||
_cors_origins = [o.strip() for o in _cors_env.split(",") if o.strip()]
|
||||
|
||||
app.add_middleware(ETagMiddleware)
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=_cors_origins,
|
||||
allow_methods=["GET", "POST", "PATCH", "DELETE", "PUT"],
|
||||
allow_headers=["Content-Type"],
|
||||
allow_headers=["Content-Type", "If-None-Match"],
|
||||
expose_headers=["ETag"],
|
||||
)
|
||||
|
||||
app.include_router(domains.router)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import uuid
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, status
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Response, status
|
||||
from sqlalchemy import func, select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
@@ -18,9 +18,11 @@ router = APIRouter(prefix="/domains", tags=["domains"])
|
||||
|
||||
@router.get("/", response_model=list[DomainRead])
|
||||
async def list_domains(
|
||||
response: Response,
|
||||
status: str | None = Query(None, description="active | archived | all"),
|
||||
session: AsyncSession = Depends(get_session),
|
||||
) -> list[Domain]:
|
||||
response.headers["Cache-Control"] = "max-age=60, stale-while-revalidate=30"
|
||||
q = select(Domain).order_by(Domain.name)
|
||||
if status and status != "all":
|
||||
q = q.where(Domain.status == status)
|
||||
|
||||
@@ -9,7 +9,7 @@ import uuid
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, status
|
||||
from fastapi import APIRouter, Depends, HTTPException, Response, status
|
||||
from sqlalchemy import case, func, select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
@@ -50,9 +50,11 @@ router = APIRouter(prefix="/repos", tags=["repos"])
|
||||
|
||||
@router.get("/", response_model=list[RepoRead])
|
||||
async def list_repos(
|
||||
response: Response,
|
||||
domain: str | None = None,
|
||||
session: AsyncSession = Depends(get_session),
|
||||
) -> list[ManagedRepo]:
|
||||
response.headers["Cache-Control"] = "max-age=60, stale-while-revalidate=30"
|
||||
q = select(ManagedRepo).order_by(ManagedRepo.name)
|
||||
if domain:
|
||||
domain_row = await session.execute(select(Domain).where(Domain.slug == domain))
|
||||
|
||||
@@ -379,6 +379,87 @@ async def _build_domain_summaries(session: AsyncSession) -> list[DomainSummary]:
|
||||
]
|
||||
|
||||
|
||||
@router.get("/deps", response_model=list[WorkstreamWithDeps])
|
||||
async def get_deps(session: AsyncSession = Depends(get_session)) -> list[WorkstreamWithDeps]:
|
||||
"""Lightweight dep-graph endpoint: open workstreams with their dependency edges only.
|
||||
|
||||
Returns the same structure as open_workstreams in /state/summary but skips
|
||||
the 10-table full-summary computation. Task counts are omitted (all zero).
|
||||
Used by workstreams.md and dependencies.md which only need dep edges.
|
||||
"""
|
||||
open_ws_rows = await session.execute(
|
||||
select(Workstream)
|
||||
.options(noload("*"))
|
||||
.where(Workstream.status.in_(["active", "blocked"]))
|
||||
.order_by(Workstream.due_date.asc().nullslast(), Workstream.created_at)
|
||||
)
|
||||
open_ws = list(open_ws_rows.scalars().all())
|
||||
|
||||
open_ws_ids = [w.id for w in open_ws]
|
||||
dep_rows = []
|
||||
if open_ws_ids:
|
||||
dep_result = await session.execute(
|
||||
select(WorkstreamDependency).where(
|
||||
(WorkstreamDependency.from_workstream_id.in_(open_ws_ids))
|
||||
| (WorkstreamDependency.to_workstream_id.in_(open_ws_ids))
|
||||
)
|
||||
)
|
||||
dep_rows = list(dep_result.scalars().all())
|
||||
|
||||
dep_ws_ids: set = set()
|
||||
dep_task_ids: set = set()
|
||||
for d in dep_rows:
|
||||
dep_ws_ids.add(d.from_workstream_id)
|
||||
if d.to_workstream_id:
|
||||
dep_ws_ids.add(d.to_workstream_id)
|
||||
if d.to_task_id:
|
||||
dep_task_ids.add(d.to_task_id)
|
||||
|
||||
ws_lookup: dict = {w.id: w for w in open_ws}
|
||||
extra_ids = dep_ws_ids - set(ws_lookup.keys())
|
||||
if extra_ids:
|
||||
extra_rows = await session.execute(
|
||||
select(Workstream).options(noload("*")).where(Workstream.id.in_(extra_ids))
|
||||
)
|
||||
for w in extra_rows.scalars():
|
||||
ws_lookup[w.id] = w
|
||||
|
||||
task_lookup: dict = {}
|
||||
if dep_task_ids:
|
||||
task_rows = await session.execute(select(Task).options(noload("*")).where(Task.id.in_(dep_task_ids)))
|
||||
task_lookup = {t.id: t for t in task_rows.scalars().all()}
|
||||
|
||||
dep_index: dict = {w.id: {"depends_on": [], "blocks": []} for w in open_ws}
|
||||
for d in dep_rows:
|
||||
from_id, to_id, task_id = d.from_workstream_id, d.to_workstream_id, d.to_task_id
|
||||
if from_id in dep_index and to_id and to_id in ws_lookup:
|
||||
dep_index[from_id]["depends_on"].append(WorkstreamDepStub(
|
||||
dep_id=d.id, target_type="workstream", relationship_type=d.relationship_type,
|
||||
workstream_id=to_id, workstream_slug=ws_lookup[to_id].slug,
|
||||
workstream_title=ws_lookup[to_id].title, description=d.description,
|
||||
))
|
||||
if from_id in dep_index and task_id and task_id in task_lookup:
|
||||
dep_index[from_id]["depends_on"].append(WorkstreamDepStub(
|
||||
dep_id=d.id, target_type="task", relationship_type=d.relationship_type,
|
||||
task_id=task_id, task_title=task_lookup[task_id].title, description=d.description,
|
||||
))
|
||||
if to_id and to_id in dep_index and from_id in ws_lookup:
|
||||
dep_index[to_id]["blocks"].append(WorkstreamDepStub(
|
||||
dep_id=d.id, target_type="workstream", relationship_type=d.relationship_type,
|
||||
workstream_id=from_id, workstream_slug=ws_lookup[from_id].slug,
|
||||
workstream_title=ws_lookup[from_id].title, description=d.description,
|
||||
))
|
||||
|
||||
return [
|
||||
WorkstreamWithDeps(
|
||||
**WorkstreamRead.model_validate(w).model_dump(),
|
||||
depends_on=dep_index[w.id]["depends_on"],
|
||||
blocks=dep_index[w.id]["blocks"],
|
||||
)
|
||||
for w in open_ws
|
||||
]
|
||||
|
||||
|
||||
_PRIORITY_RANK = {
|
||||
TaskPriority.critical: 0,
|
||||
TaskPriority.high: 1,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import uuid
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, status
|
||||
from fastapi import APIRouter, Depends, HTTPException, Response, status
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
@@ -23,9 +23,11 @@ async def _resolve_domain_id(domain_slug: str, session: AsyncSession) -> uuid.UU
|
||||
|
||||
@router.get("/", response_model=list[TopicRead])
|
||||
async def list_topics(
|
||||
response: Response,
|
||||
status: TopicStatus | None = None,
|
||||
session: AsyncSession = Depends(get_session),
|
||||
) -> list[Topic]:
|
||||
response.headers["Cache-Control"] = "max-age=60, stale-while-revalidate=30"
|
||||
q = select(Topic)
|
||||
if status:
|
||||
q = q.where(Topic.status == status)
|
||||
|
||||
209
workplans/CUST-WP-0039-dashboard-poll-optimization.md
Normal file
209
workplans/CUST-WP-0039-dashboard-poll-optimization.md
Normal file
@@ -0,0 +1,209 @@
|
||||
---
|
||||
id: CUST-WP-0039
|
||||
type: workplan
|
||||
title: "Dashboard Poll Optimization"
|
||||
domain: custodian
|
||||
status: todo
|
||||
owner: custodian
|
||||
topic_slug: custodian
|
||||
created: "2026-05-11"
|
||||
updated: "2026-05-11"
|
||||
state_hub_workstream_id: "d5ffb008-a517-4b8b-86ce-093fcc285fb3"
|
||||
---
|
||||
|
||||
# Dashboard Poll Optimization
|
||||
|
||||
## Problem
|
||||
|
||||
With `uvicorn --reload` watching `.venv/` now fixed (CUST-WP-0039 precursor), the
|
||||
remaining sustained load on the API worker comes from the dashboard polling pattern:
|
||||
|
||||
- **24 pages**, 14 with active polling loops (POLL_HEAVY = 60 s, POLL = 15 s)
|
||||
- **`index.md` alone** runs 4 independent polling loops firing 11 API calls per cycle:
|
||||
`/state/summary`, `/sbom/snapshots/`, `/progress/`, `/workstreams/`, `/tasks/?limit=2000`,
|
||||
`/topics/`, `/repos/`, `/workstreams/workplan-index`
|
||||
- **`workstreams.md` and `dependencies.md`** each call `/state/summary` (the most
|
||||
expensive endpoint — queries 10+ tables) every 60 s just to extract dependency
|
||||
edges from `open_workstreams[].depends_on`
|
||||
- **Reference data** (`/topics/`, `/repos/`) is fetched independently by 10+ pages
|
||||
every 60 s with no caching; these datasets change rarely
|
||||
- **Background tabs** still poll at 120 s (`POLL_HIDDEN`) — they could pause entirely
|
||||
|
||||
## Goals
|
||||
|
||||
Reduce API request rate and per-request cost when the dashboard is open, without
|
||||
degrading UX or data freshness for the pages the user is actively viewing.
|
||||
|
||||
## Out of scope
|
||||
|
||||
- SSE / WebSocket push (would require significant API rework)
|
||||
- Observable data loaders / static build mode (different deployment model)
|
||||
- BroadcastChannel cross-tab sharing (nice-to-have, not in this workplan)
|
||||
|
||||
---
|
||||
|
||||
## Tasks
|
||||
|
||||
### T1 — Add Cache-Control headers to reference endpoints
|
||||
|
||||
```task
|
||||
id: CUST-WP-0039-T1
|
||||
status: todo
|
||||
priority: high
|
||||
state_hub_task_id: "b36713d8-d1d5-43c5-86c3-e22f72b68d62"
|
||||
```
|
||||
|
||||
Add `Cache-Control: max-age=60, stale-while-revalidate=30` to the list responses
|
||||
for `/topics/`, `/repos/`, and `/domains/`. These datasets change only when a human
|
||||
explicitly creates/renames a domain or registers a repo — never on their own.
|
||||
|
||||
Browser-level caching means that when 10 pages all fetch `/topics/` within a 60 s
|
||||
window, only the first request hits the API; the rest are served from cache.
|
||||
|
||||
**Implementation:** Add a FastAPI middleware or a response-header dependency in
|
||||
`api/routers/topics.py`, `repos.py`, and `domains.py` list endpoints. Use
|
||||
`from fastapi.responses import Response` + `response.headers["Cache-Control"]`, or
|
||||
a shared `cache_headers` dependency.
|
||||
|
||||
---
|
||||
|
||||
### T2 — Add ETag support to high-frequency list endpoints
|
||||
|
||||
```task
|
||||
id: CUST-WP-0039-T2
|
||||
status: todo
|
||||
priority: high
|
||||
state_hub_task_id: "75f1c2cd-0baf-4747-8c67-1dbfa81bde41"
|
||||
```
|
||||
|
||||
Add `ETag` (content hash of the response body) and handle `If-None-Match` for
|
||||
`/workstreams/`, `/tasks/`, and `/state/summary`. When the data hasn't changed the
|
||||
API returns `304 Not Modified` with no body — roughly 95% smaller than a full
|
||||
response.
|
||||
|
||||
**Implementation:**
|
||||
- Add a FastAPI middleware (in `api/main.py`) that intercepts JSON list responses,
|
||||
computes `md5(body)`, sets `ETag: "<hash>"`, and returns 304 if the request
|
||||
carries a matching `If-None-Match` header.
|
||||
- No client changes needed — `fetch()` respects ETags automatically when the
|
||||
response includes `Cache-Control: no-cache` (which forces revalidation but
|
||||
allows 304).
|
||||
|
||||
---
|
||||
|
||||
### T3 — Add lightweight `/state/deps` endpoint
|
||||
|
||||
```task
|
||||
id: CUST-WP-0039-T3
|
||||
status: todo
|
||||
priority: high
|
||||
state_hub_task_id: "cb7608d3-5dad-4b51-9b91-080539f7aa65"
|
||||
```
|
||||
|
||||
`workstreams.md` and `dependencies.md` call `/state/summary` (a ~10-table query)
|
||||
only to extract `open_workstreams[].{id, depends_on, blocks}`. Add a dedicated
|
||||
endpoint that returns just this:
|
||||
|
||||
```json
|
||||
GET /state/deps
|
||||
→ [{"id": "...", "title": "...", "status": "...", "depends_on": [...], "blocks": [...]}]
|
||||
```
|
||||
|
||||
Query: `SELECT id, title, status FROM workstreams WHERE status IN ('active','blocked')`
|
||||
plus the dependency join — roughly 1/10th the work of the full summary.
|
||||
|
||||
**Implementation:** New route in `api/routers/state.py` (or a new `deps.py`).
|
||||
Schema: `WorkstreamDepStub` already exists in `api/schemas/workstream_dependency.py`
|
||||
— reuse or extend it.
|
||||
|
||||
---
|
||||
|
||||
### T4 — Replace `/state/summary` in workstreams.md and dependencies.md
|
||||
|
||||
```task
|
||||
id: CUST-WP-0039-T4
|
||||
status: todo
|
||||
priority: medium
|
||||
depends_on: [CUST-WP-0039-T3]
|
||||
state_hub_task_id: "b80dce9c-b1ef-4606-9460-5100d6f58bce"
|
||||
```
|
||||
|
||||
Switch `workstreams.md` and `dependencies.md` to use the new `/state/deps` endpoint
|
||||
instead of the full `/state/summary`. Both pages construct a dep-edge map from
|
||||
`open_workstreams[].depends_on`; `/state/deps` provides exactly that.
|
||||
|
||||
Changes:
|
||||
- `dashboard/src/workstreams.md`: replace `apiFetch("/state/summary", ...)` with
|
||||
`apiFetch("/state/deps")`, update the variable extraction (`openWs = depsData`)
|
||||
- `dashboard/src/dependencies.md`: same substitution, update edge-building loop
|
||||
|
||||
---
|
||||
|
||||
### T5 — Consolidate index.md's 4 polling loops into 1
|
||||
|
||||
```task
|
||||
id: CUST-WP-0039-T5
|
||||
status: todo
|
||||
priority: medium
|
||||
state_hub_task_id: "7c2d5e01-9de5-48ad-aa0b-a37cf5332ad9"
|
||||
```
|
||||
|
||||
`index.md` runs 4 independent `while(true)` generators (`summaryState`,
|
||||
`sbomSnapState`, `regsState`, `wsChartState`) that each sleep 60 s independently.
|
||||
They were split because different sections needed different data, but they all use
|
||||
POLL_HEAVY and can be unified into a single loop with one `Promise.all` that fetches
|
||||
all 8 endpoints together.
|
||||
|
||||
Benefits:
|
||||
- 4 timers → 1: simpler, predictable, backoff applies uniformly
|
||||
- Fetch batching: all 8 requests fire simultaneously, most finish within the same
|
||||
server round-trip window
|
||||
- Simpler failure handling: one `failures` counter, one backoff
|
||||
|
||||
Approach: single `pageState` generator that yields a flat object with all fields
|
||||
(summary, snapshots, milestones, wsAll). Destructure at the use sites.
|
||||
|
||||
---
|
||||
|
||||
### T6 — Full visibility-based polling pause in config.js
|
||||
|
||||
```task
|
||||
id: CUST-WP-0039-T6
|
||||
status: todo
|
||||
priority: low
|
||||
state_hub_task_id: "31b6a353-040a-4f87-b2f1-1deab5cf6191"
|
||||
```
|
||||
|
||||
`pollDelay()` currently extends the interval to `POLL_HIDDEN = 120 s` when the tab
|
||||
is hidden. Change this to pause polling entirely while hidden and resume immediately
|
||||
on `visibilitychange`.
|
||||
|
||||
**Implementation:**
|
||||
|
||||
```js
|
||||
// config.js — replace pollDelay() with:
|
||||
export async function waitForVisible(base) {
|
||||
if (typeof document === "undefined") return sleep(base);
|
||||
if (document.visibilityState === "visible") return sleep(base);
|
||||
return new Promise(resolve => {
|
||||
const handler = () => { document.removeEventListener("visibilitychange", handler); resolve(); };
|
||||
document.addEventListener("visibilitychange", handler);
|
||||
});
|
||||
}
|
||||
```
|
||||
|
||||
Pages replace `await sleep(pollDelay(...))` with `await waitForVisible(base)`.
|
||||
When the user switches back to the tab, the next poll fires immediately rather
|
||||
than waiting up to 120 s for the backoff to expire.
|
||||
|
||||
---
|
||||
|
||||
## Expected impact
|
||||
|
||||
| Change | Request reduction |
|
||||
|--------|------------------|
|
||||
| T1 (cache headers) | ~70% drop in /topics, /repos, /domains hits |
|
||||
| T2 (ETags) | ~80% payload reduction for unchanged list responses |
|
||||
| T3+T4 (deps endpoint) | 2 full summary calls removed per 60 s cycle |
|
||||
| T5 (consolidate index) | 4 loops → 1, reduces timer jitter and staggered load |
|
||||
| T6 (visibility pause) | Eliminates all background-tab traffic entirely |
|
||||
Reference in New Issue
Block a user