feat(summary): revision-gated cache with stale-while-revalidate (STATE-WP-0066)

Replace the fixed 15s TTL on GET /state/summary with per-table revision
watermarks, stale-while-revalidate background refresh, and a progress-tail
section split. SQLAlchemy write hooks invalidate core or progress sections
on mutation. Adds tests, benchmark script, and operator docs.
This commit is contained in:
2026-06-22 16:27:32 +02:00
parent f88e74288d
commit 94c7817339
10 changed files with 614 additions and 35 deletions

View File

@@ -43,6 +43,12 @@ from api.schemas.topic import TopicRead, TopicWithWorkstreams
from api.schemas.workstream import WorkstreamRead, WorkstreamWithTaskCounts, WorkstreamWithDeps
from api.schemas.workstream_dependency import WorkstreamDepStub
from api.routers.workstreams import _workplan_index
from api.services.summary_cache import (
apply_progress_section,
fetch_summary_revision,
get_summary_cache,
register_summary_cache_invalidation,
)
from api.task_status import TERMINAL_TASK_STATUSES, status_value
from api.workplan_status import (
CLOSED_WORKPLAN_STATUSES,
@@ -53,28 +59,58 @@ from task_flow_engine import FlowEngine
router = APIRouter(prefix="/state", tags=["state"])
_SUMMARY_CACHE: StateSummary | None = None
_SUMMARY_CACHE_AT: float = 0.0
_SUMMARY_TTL = 15.0
_OVERVIEW_CACHE: DashboardOverview | None = None
_OVERVIEW_CACHE_AT: float = 0.0
_OVERVIEW_TTL = 10.0
def _summary_cache_headers(
response: Response,
*,
cache_status: str,
revision: str,
) -> None:
response.headers["X-StateHub-Cache"] = cache_status
response.headers["X-StateHub-Revision"] = revision
response.headers["Cache-Control"] = "max-age=15, stale-while-revalidate=120"
@router.get("/summary", response_model=StateSummary)
async def get_summary(
request: Request,
response: Response,
session: AsyncSession = Depends(get_session),
refresh: bool = False,
) -> StateSummary:
global _SUMMARY_CACHE, _SUMMARY_CACHE_AT
no_cache = "no-cache" in request.headers.get("cache-control", "")
if not no_cache and _SUMMARY_CACHE is not None and (time.monotonic() - _SUMMARY_CACHE_AT) < _SUMMARY_TTL:
response.headers["X-StateHub-Cache"] = "hit"
response.headers["Cache-Control"] = "max-age=15, stale-while-revalidate=30"
return _SUMMARY_CACHE
response.headers["X-StateHub-Cache"] = "miss"
response.headers["Cache-Control"] = "max-age=15, stale-while-revalidate=30"
revision = await fetch_summary_revision(session)
revision_token = revision.combined_fingerprint()
force_refresh = refresh or "no-cache" in request.headers.get("cache-control", "")
cache = get_summary_cache()
cache_status, cached = cache.resolve(revision, force_refresh=force_refresh)
if cache_status == "hit-revision" and cached is not None:
_summary_cache_headers(response, cache_status="hit-revision", revision=revision_token)
return cached
if cache_status == "progress-section" and cached is not None:
result = await apply_progress_section(session, cached, revision)
_summary_cache_headers(response, cache_status="hit-revision", revision=revision_token)
return result
if cache_status == "stale" and cached is not None:
cache.schedule_refresh(revision)
_summary_cache_headers(response, cache_status="stale", revision=revision_token)
return cached
result = await build_state_summary(session)
cache.store(result, revision)
_summary_cache_headers(response, cache_status="miss", revision=revision_token)
return result
async def build_state_summary(session: AsyncSession) -> StateSummary:
"""Build the full state summary snapshot (cache miss / forced refresh)."""
# Run all queries sequentially on one session.
# AsyncSession does not support concurrent operations (no gather on same session).
@@ -370,11 +406,13 @@ async def get_summary(
for w in open_ws
],
)
_SUMMARY_CACHE = result
_SUMMARY_CACHE_AT = time.monotonic()
return result
get_summary_cache().configure(build_state_summary)
register_summary_cache_invalidation()
@router.get("/overview", response_model=DashboardOverview)
async def get_overview(
request: Request,