perf(doi): eliminate HTTP self-calls in summary — 48 calls → 3 bulk DB queries

Root cause: C2/C9/C10 each made a full HTTP round-trip back to the API (asyncio.to_thread → urllib → TCP → uvicorn → SQLAlchemy → DB) for every repo. 16 repos × 3 calls = 48 self-calls at ~80-150ms each = ~6s total. Fix: doi_engine.evaluate() accepts a prefetch dict. The summary endpoint runs 3 bulk GROUP BY queries (domain status, TPSC snapshot counts, active goal counts) and passes results directly — zero HTTP self-calls in summary mode. Result: /repos/doi/summary 6s → <1s (6× improvement on top of prior 13×). Total improvement from original: 108s → <1s. Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
2026-03-20 01:37:40 +01:00
parent 9ba1501b49
commit 245cd72ba3
2 changed files with 75 additions and 18 deletions
--- a/api/routers/repos.py
+++ b/api/routers/repos.py
@@ -5,11 +5,14 @@ from fastapi import APIRouter, Depends, HTTPException, status
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession

+from sqlalchemy import func
+
 from api.database import get_session
 from api.doi_engine import evaluate as _doi_evaluate
 from api.models.domain import Domain
 from api.models.managed_repo import ManagedRepo
 from api.models.repo_goal import RepoGoal
+from api.models.tpsc import TPSCSnapshot
 from api.models.task import Task
 from api.models.workstream import Workstream
 from api.schemas.doi import DoICriterion, DoIReport, DoISummaryEntry
@@ -74,12 +77,42 @@ async def register_repo(
@router.get("/doi/summary", response_model=list[DoISummaryEntry])
 async def doi_summary(session: AsyncSession = Depends(get_session)) -> list[DoISummaryEntry]:
    """Return DoI tier for all active repos, worst tier first."""
-    result = await session.execute(
+    repos_result = await session.execute(
        select(ManagedRepo).where(ManagedRepo.status == "active").order_by(ManagedRepo.name)
    )
-    repos = list(result.scalars().all())
-    domain_result = await session.execute(select(Domain))
-    domain_map = {d.id: d.slug for d in domain_result.scalars().all()}
+    repos = list(repos_result.scalars().all())
+
+    # ── 3 bulk DB queries instead of 48 HTTP self-calls ───────────────────────
+    # C2: domain status by slug
+    domains_result = await session.execute(select(Domain))
+    domain_obj_map = {d.id: d for d in domains_result.scalars().all()}
+    domain_map = {d.id: d.slug for d in domain_obj_map.values()}
+    domain_status = {d.slug: d.status for d in domain_obj_map.values()}
+
+    # C9: TPSC snapshot count per repo_id
+    repo_ids = [r.id for r in repos]
+    tpsc_result = await session.execute(
+        select(TPSCSnapshot.repo_id, func.count().label("cnt"))
+        .where(TPSCSnapshot.repo_id.in_(repo_ids))
+        .group_by(TPSCSnapshot.repo_id)
+    )
+    id_to_slug = {r.id: r.slug for r in repos}
+    tpsc_snap_counts = {id_to_slug[row.repo_id]: row.cnt for row in tpsc_result if row.repo_id in id_to_slug}
+
+    # C10: active repo goal count per repo_id
+    goals_result = await session.execute(
+        select(RepoGoal.repo_id, func.count().label("cnt"))
+        .where(RepoGoal.repo_id.in_(repo_ids), RepoGoal.status == "active")
+        .group_by(RepoGoal.repo_id)
+    )
+    active_goal_counts = {id_to_slug[row.repo_id]: row.cnt for row in goals_result if row.repo_id in id_to_slug}
+
+    prefetch = {
+        "domain_status":     domain_status,
+        "tpsc_snap_counts":  tpsc_snap_counts,
+        "active_goal_counts": active_goal_counts,
+    }
+    # ─────────────────────────────────────────────────────────────────────────

    async def _check_one(repo: ManagedRepo) -> DoISummaryEntry:
        repo_dict = {
@@ -90,9 +123,7 @@ async def doi_summary(session: AsyncSession = Depends(get_session)) -> list[DoIS
            "host_paths": repo.host_paths or {},
            "last_sbom_at": str(repo.last_sbom_at) if repo.last_sbom_at else None,
        }
-        # skip_consistency=True: omits C7/C13 subprocess calls for speed.
-        # The full check is available via GET /repos/{slug}/doi.
-        report = await _doi_evaluate(repo_dict, skip_consistency=True)
+        report = await _doi_evaluate(repo_dict, skip_consistency=True, prefetch=prefetch)
        return DoISummaryEntry(
            repo_slug=repo.slug,
            domain_slug=domain_map.get(repo.domain_id),