feat(api): dashboard poll optimisation — T1, T2, T3

T1: Cache-Control max-age=60 on /topics/, /repos/, /domains/ list endpoints
    so repeated dashboard polls within a minute are served from browser cache.

T2: ETag middleware (md5 hash) on all JSON GET responses with conditional-GET
    (304 Not Modified) support; If-None-Match and ETag added to CORS headers.
    ETag registered inside CORS so 304s automatically carry CORS headers.

T3: GET /state/deps — lightweight dep-graph endpoint returning open workstreams
    with depends_on/blocks edges only, skipping the 10-table full-summary query.
    Prerequisite for T4 (switching workstreams.md and dependencies.md off /state/summary).

Workplan: CUST-WP-0039-dashboard-poll-optimization.md

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-11 17:26:30 +02:00
parent 6f3a46dd07
commit 512c0a73ed
6 changed files with 340 additions and 4 deletions

View File

@@ -1,8 +1,12 @@
import hashlib
import os
from contextlib import asynccontextmanager
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.requests import Request
from starlette.responses import Response as StarletteResponse
from api.database import engine
from api.routers import decisions, extension_points, progress, state, tasks, technical_debt, topics, workstreams, workstream_dependencies
@@ -12,6 +16,40 @@ from api.routers import interface_changes
from api.routers import flows
class ETagMiddleware(BaseHTTPMiddleware):
"""Add ETag + conditional-GET (304) support to all JSON GET responses."""
async def dispatch(self, request: Request, call_next):
response = await call_next(request)
if request.method != "GET":
return response
if "application/json" not in response.headers.get("content-type", ""):
return response
body_parts = []
async for chunk in response.body_iterator:
body_parts.append(chunk)
body = b"".join(body_parts)
etag = '"' + hashlib.md5(body, usedforsecurity=False).hexdigest() + '"'
if request.headers.get("if-none-match") == etag:
return StarletteResponse(
status_code=304,
headers={"ETag": etag, "Cache-Control": "no-cache"},
)
headers = {k: v for k, v in response.headers.items() if k.lower() != "content-length"}
headers["ETag"] = etag
if not any(k.lower() == "cache-control" for k in headers):
headers["Cache-Control"] = "no-cache"
return StarletteResponse(
content=body,
status_code=response.status_code,
headers=headers,
media_type=response.media_type,
)
@asynccontextmanager
async def lifespan(app: FastAPI):
yield
@@ -28,11 +66,13 @@ app = FastAPI(
_cors_env = os.getenv("CORS_ORIGINS", "http://localhost:3000,http://127.0.0.1:3000")
_cors_origins = [o.strip() for o in _cors_env.split(",") if o.strip()]
app.add_middleware(ETagMiddleware)
app.add_middleware(
CORSMiddleware,
allow_origins=_cors_origins,
allow_methods=["GET", "POST", "PATCH", "DELETE", "PUT"],
allow_headers=["Content-Type"],
allow_headers=["Content-Type", "If-None-Match"],
expose_headers=["ETag"],
)
app.include_router(domains.router)

View File

@@ -1,6 +1,6 @@
import uuid
from fastapi import APIRouter, Depends, HTTPException, Query, status
from fastapi import APIRouter, Depends, HTTPException, Query, Response, status
from sqlalchemy import func, select
from sqlalchemy.ext.asyncio import AsyncSession
@@ -18,9 +18,11 @@ router = APIRouter(prefix="/domains", tags=["domains"])
@router.get("/", response_model=list[DomainRead])
async def list_domains(
response: Response,
status: str | None = Query(None, description="active | archived | all"),
session: AsyncSession = Depends(get_session),
) -> list[Domain]:
response.headers["Cache-Control"] = "max-age=60, stale-while-revalidate=30"
q = select(Domain).order_by(Domain.name)
if status and status != "all":
q = q.where(Domain.status == status)

View File

@@ -9,7 +9,7 @@ import uuid
from datetime import datetime, timezone
from pathlib import Path
from fastapi import APIRouter, Depends, HTTPException, status
from fastapi import APIRouter, Depends, HTTPException, Response, status
from sqlalchemy import case, func, select
from sqlalchemy.ext.asyncio import AsyncSession
@@ -50,9 +50,11 @@ router = APIRouter(prefix="/repos", tags=["repos"])
@router.get("/", response_model=list[RepoRead])
async def list_repos(
response: Response,
domain: str | None = None,
session: AsyncSession = Depends(get_session),
) -> list[ManagedRepo]:
response.headers["Cache-Control"] = "max-age=60, stale-while-revalidate=30"
q = select(ManagedRepo).order_by(ManagedRepo.name)
if domain:
domain_row = await session.execute(select(Domain).where(Domain.slug == domain))

View File

@@ -379,6 +379,87 @@ async def _build_domain_summaries(session: AsyncSession) -> list[DomainSummary]:
]
@router.get("/deps", response_model=list[WorkstreamWithDeps])
async def get_deps(session: AsyncSession = Depends(get_session)) -> list[WorkstreamWithDeps]:
"""Lightweight dep-graph endpoint: open workstreams with their dependency edges only.
Returns the same structure as open_workstreams in /state/summary but skips
the 10-table full-summary computation. Task counts are omitted (all zero).
Used by workstreams.md and dependencies.md which only need dep edges.
"""
open_ws_rows = await session.execute(
select(Workstream)
.options(noload("*"))
.where(Workstream.status.in_(["active", "blocked"]))
.order_by(Workstream.due_date.asc().nullslast(), Workstream.created_at)
)
open_ws = list(open_ws_rows.scalars().all())
open_ws_ids = [w.id for w in open_ws]
dep_rows = []
if open_ws_ids:
dep_result = await session.execute(
select(WorkstreamDependency).where(
(WorkstreamDependency.from_workstream_id.in_(open_ws_ids))
| (WorkstreamDependency.to_workstream_id.in_(open_ws_ids))
)
)
dep_rows = list(dep_result.scalars().all())
dep_ws_ids: set = set()
dep_task_ids: set = set()
for d in dep_rows:
dep_ws_ids.add(d.from_workstream_id)
if d.to_workstream_id:
dep_ws_ids.add(d.to_workstream_id)
if d.to_task_id:
dep_task_ids.add(d.to_task_id)
ws_lookup: dict = {w.id: w for w in open_ws}
extra_ids = dep_ws_ids - set(ws_lookup.keys())
if extra_ids:
extra_rows = await session.execute(
select(Workstream).options(noload("*")).where(Workstream.id.in_(extra_ids))
)
for w in extra_rows.scalars():
ws_lookup[w.id] = w
task_lookup: dict = {}
if dep_task_ids:
task_rows = await session.execute(select(Task).options(noload("*")).where(Task.id.in_(dep_task_ids)))
task_lookup = {t.id: t for t in task_rows.scalars().all()}
dep_index: dict = {w.id: {"depends_on": [], "blocks": []} for w in open_ws}
for d in dep_rows:
from_id, to_id, task_id = d.from_workstream_id, d.to_workstream_id, d.to_task_id
if from_id in dep_index and to_id and to_id in ws_lookup:
dep_index[from_id]["depends_on"].append(WorkstreamDepStub(
dep_id=d.id, target_type="workstream", relationship_type=d.relationship_type,
workstream_id=to_id, workstream_slug=ws_lookup[to_id].slug,
workstream_title=ws_lookup[to_id].title, description=d.description,
))
if from_id in dep_index and task_id and task_id in task_lookup:
dep_index[from_id]["depends_on"].append(WorkstreamDepStub(
dep_id=d.id, target_type="task", relationship_type=d.relationship_type,
task_id=task_id, task_title=task_lookup[task_id].title, description=d.description,
))
if to_id and to_id in dep_index and from_id in ws_lookup:
dep_index[to_id]["blocks"].append(WorkstreamDepStub(
dep_id=d.id, target_type="workstream", relationship_type=d.relationship_type,
workstream_id=from_id, workstream_slug=ws_lookup[from_id].slug,
workstream_title=ws_lookup[from_id].title, description=d.description,
))
return [
WorkstreamWithDeps(
**WorkstreamRead.model_validate(w).model_dump(),
depends_on=dep_index[w.id]["depends_on"],
blocks=dep_index[w.id]["blocks"],
)
for w in open_ws
]
_PRIORITY_RANK = {
TaskPriority.critical: 0,
TaskPriority.high: 1,

View File

@@ -1,6 +1,6 @@
import uuid
from fastapi import APIRouter, Depends, HTTPException, status
from fastapi import APIRouter, Depends, HTTPException, Response, status
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
@@ -23,9 +23,11 @@ async def _resolve_domain_id(domain_slug: str, session: AsyncSession) -> uuid.UU
@router.get("/", response_model=list[TopicRead])
async def list_topics(
response: Response,
status: TopicStatus | None = None,
session: AsyncSession = Depends(get_session),
) -> list[Topic]:
response.headers["Cache-Control"] = "max-age=60, stale-while-revalidate=30"
q = select(Topic)
if status:
q = q.where(Topic.status == status)

View File

@@ -0,0 +1,209 @@
---
id: CUST-WP-0039
type: workplan
title: "Dashboard Poll Optimization"
domain: custodian
status: todo
owner: custodian
topic_slug: custodian
created: "2026-05-11"
updated: "2026-05-11"
state_hub_workstream_id: "d5ffb008-a517-4b8b-86ce-093fcc285fb3"
---
# Dashboard Poll Optimization
## Problem
With `uvicorn --reload` watching `.venv/` now fixed (CUST-WP-0039 precursor), the
remaining sustained load on the API worker comes from the dashboard polling pattern:
- **24 pages**, 14 with active polling loops (POLL_HEAVY = 60 s, POLL = 15 s)
- **`index.md` alone** runs 4 independent polling loops firing 11 API calls per cycle:
`/state/summary`, `/sbom/snapshots/`, `/progress/`, `/workstreams/`, `/tasks/?limit=2000`,
`/topics/`, `/repos/`, `/workstreams/workplan-index`
- **`workstreams.md` and `dependencies.md`** each call `/state/summary` (the most
expensive endpoint — queries 10+ tables) every 60 s just to extract dependency
edges from `open_workstreams[].depends_on`
- **Reference data** (`/topics/`, `/repos/`) is fetched independently by 10+ pages
every 60 s with no caching; these datasets change rarely
- **Background tabs** still poll at 120 s (`POLL_HIDDEN`) — they could pause entirely
## Goals
Reduce API request rate and per-request cost when the dashboard is open, without
degrading UX or data freshness for the pages the user is actively viewing.
## Out of scope
- SSE / WebSocket push (would require significant API rework)
- Observable data loaders / static build mode (different deployment model)
- BroadcastChannel cross-tab sharing (nice-to-have, not in this workplan)
---
## Tasks
### T1 — Add Cache-Control headers to reference endpoints
```task
id: CUST-WP-0039-T1
status: todo
priority: high
state_hub_task_id: "b36713d8-d1d5-43c5-86c3-e22f72b68d62"
```
Add `Cache-Control: max-age=60, stale-while-revalidate=30` to the list responses
for `/topics/`, `/repos/`, and `/domains/`. These datasets change only when a human
explicitly creates/renames a domain or registers a repo — never on their own.
Browser-level caching means that when 10 pages all fetch `/topics/` within a 60 s
window, only the first request hits the API; the rest are served from cache.
**Implementation:** Add a FastAPI middleware or a response-header dependency in
`api/routers/topics.py`, `repos.py`, and `domains.py` list endpoints. Use
`from fastapi.responses import Response` + `response.headers["Cache-Control"]`, or
a shared `cache_headers` dependency.
---
### T2 — Add ETag support to high-frequency list endpoints
```task
id: CUST-WP-0039-T2
status: todo
priority: high
state_hub_task_id: "75f1c2cd-0baf-4747-8c67-1dbfa81bde41"
```
Add `ETag` (content hash of the response body) and handle `If-None-Match` for
`/workstreams/`, `/tasks/`, and `/state/summary`. When the data hasn't changed the
API returns `304 Not Modified` with no body — roughly 95% smaller than a full
response.
**Implementation:**
- Add a FastAPI middleware (in `api/main.py`) that intercepts JSON list responses,
computes `md5(body)`, sets `ETag: "<hash>"`, and returns 304 if the request
carries a matching `If-None-Match` header.
- No client changes needed — `fetch()` respects ETags automatically when the
response includes `Cache-Control: no-cache` (which forces revalidation but
allows 304).
---
### T3 — Add lightweight `/state/deps` endpoint
```task
id: CUST-WP-0039-T3
status: todo
priority: high
state_hub_task_id: "cb7608d3-5dad-4b51-9b91-080539f7aa65"
```
`workstreams.md` and `dependencies.md` call `/state/summary` (a ~10-table query)
only to extract `open_workstreams[].{id, depends_on, blocks}`. Add a dedicated
endpoint that returns just this:
```json
GET /state/deps
[{"id": "...", "title": "...", "status": "...", "depends_on": [...], "blocks": [...]}]
```
Query: `SELECT id, title, status FROM workstreams WHERE status IN ('active','blocked')`
plus the dependency join — roughly 1/10th the work of the full summary.
**Implementation:** New route in `api/routers/state.py` (or a new `deps.py`).
Schema: `WorkstreamDepStub` already exists in `api/schemas/workstream_dependency.py`
— reuse or extend it.
---
### T4 — Replace `/state/summary` in workstreams.md and dependencies.md
```task
id: CUST-WP-0039-T4
status: todo
priority: medium
depends_on: [CUST-WP-0039-T3]
state_hub_task_id: "b80dce9c-b1ef-4606-9460-5100d6f58bce"
```
Switch `workstreams.md` and `dependencies.md` to use the new `/state/deps` endpoint
instead of the full `/state/summary`. Both pages construct a dep-edge map from
`open_workstreams[].depends_on`; `/state/deps` provides exactly that.
Changes:
- `dashboard/src/workstreams.md`: replace `apiFetch("/state/summary", ...)` with
`apiFetch("/state/deps")`, update the variable extraction (`openWs = depsData`)
- `dashboard/src/dependencies.md`: same substitution, update edge-building loop
---
### T5 — Consolidate index.md's 4 polling loops into 1
```task
id: CUST-WP-0039-T5
status: todo
priority: medium
state_hub_task_id: "7c2d5e01-9de5-48ad-aa0b-a37cf5332ad9"
```
`index.md` runs 4 independent `while(true)` generators (`summaryState`,
`sbomSnapState`, `regsState`, `wsChartState`) that each sleep 60 s independently.
They were split because different sections needed different data, but they all use
POLL_HEAVY and can be unified into a single loop with one `Promise.all` that fetches
all 8 endpoints together.
Benefits:
- 4 timers → 1: simpler, predictable, backoff applies uniformly
- Fetch batching: all 8 requests fire simultaneously, most finish within the same
server round-trip window
- Simpler failure handling: one `failures` counter, one backoff
Approach: single `pageState` generator that yields a flat object with all fields
(summary, snapshots, milestones, wsAll). Destructure at the use sites.
---
### T6 — Full visibility-based polling pause in config.js
```task
id: CUST-WP-0039-T6
status: todo
priority: low
state_hub_task_id: "31b6a353-040a-4f87-b2f1-1deab5cf6191"
```
`pollDelay()` currently extends the interval to `POLL_HIDDEN = 120 s` when the tab
is hidden. Change this to pause polling entirely while hidden and resume immediately
on `visibilitychange`.
**Implementation:**
```js
// config.js — replace pollDelay() with:
export async function waitForVisible(base) {
if (typeof document === "undefined") return sleep(base);
if (document.visibilityState === "visible") return sleep(base);
return new Promise(resolve => {
const handler = () => { document.removeEventListener("visibilitychange", handler); resolve(); };
document.addEventListener("visibilitychange", handler);
});
}
```
Pages replace `await sleep(pollDelay(...))` with `await waitForVisible(base)`.
When the user switches back to the tab, the next poll fires immediately rather
than waiting up to 120 s for the backoff to expire.
---
## Expected impact
| Change | Request reduction |
|--------|------------------|
| T1 (cache headers) | ~70% drop in /topics, /repos, /domains hits |
| T2 (ETags) | ~80% payload reduction for unchanged list responses |
| T3+T4 (deps endpoint) | 2 full summary calls removed per 60 s cycle |
| T5 (consolidate index) | 4 loops → 1, reduces timer jitter and staggered load |
| T6 (visibility pause) | Eliminates all background-tab traffic entirely |