generated from coulomb/repo-seed
perf(doi): eliminate HTTP self-calls in summary — 48 calls → 3 bulk DB queries
Root cause: C2/C9/C10 each made a full HTTP round-trip back to the API (asyncio.to_thread → urllib → TCP → uvicorn → SQLAlchemy → DB) for every repo. 16 repos × 3 calls = 48 self-calls at ~80-150ms each = ~6s total. Fix: doi_engine.evaluate() accepts a prefetch dict. The summary endpoint runs 3 bulk GROUP BY queries (domain status, TPSC snapshot counts, active goal counts) and passes results directly — zero HTTP self-calls in summary mode. Result: /repos/doi/summary 6s → <1s (6× improvement on top of prior 13×). Total improvement from original: 108s → <1s. Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -5,11 +5,14 @@ from fastapi import APIRouter, Depends, HTTPException, status
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from sqlalchemy import func
|
||||
|
||||
from api.database import get_session
|
||||
from api.doi_engine import evaluate as _doi_evaluate
|
||||
from api.models.domain import Domain
|
||||
from api.models.managed_repo import ManagedRepo
|
||||
from api.models.repo_goal import RepoGoal
|
||||
from api.models.tpsc import TPSCSnapshot
|
||||
from api.models.task import Task
|
||||
from api.models.workstream import Workstream
|
||||
from api.schemas.doi import DoICriterion, DoIReport, DoISummaryEntry
|
||||
@@ -74,12 +77,42 @@ async def register_repo(
|
||||
@router.get("/doi/summary", response_model=list[DoISummaryEntry])
|
||||
async def doi_summary(session: AsyncSession = Depends(get_session)) -> list[DoISummaryEntry]:
|
||||
"""Return DoI tier for all active repos, worst tier first."""
|
||||
result = await session.execute(
|
||||
repos_result = await session.execute(
|
||||
select(ManagedRepo).where(ManagedRepo.status == "active").order_by(ManagedRepo.name)
|
||||
)
|
||||
repos = list(result.scalars().all())
|
||||
domain_result = await session.execute(select(Domain))
|
||||
domain_map = {d.id: d.slug for d in domain_result.scalars().all()}
|
||||
repos = list(repos_result.scalars().all())
|
||||
|
||||
# ── 3 bulk DB queries instead of 48 HTTP self-calls ───────────────────────
|
||||
# C2: domain status by slug
|
||||
domains_result = await session.execute(select(Domain))
|
||||
domain_obj_map = {d.id: d for d in domains_result.scalars().all()}
|
||||
domain_map = {d.id: d.slug for d in domain_obj_map.values()}
|
||||
domain_status = {d.slug: d.status for d in domain_obj_map.values()}
|
||||
|
||||
# C9: TPSC snapshot count per repo_id
|
||||
repo_ids = [r.id for r in repos]
|
||||
tpsc_result = await session.execute(
|
||||
select(TPSCSnapshot.repo_id, func.count().label("cnt"))
|
||||
.where(TPSCSnapshot.repo_id.in_(repo_ids))
|
||||
.group_by(TPSCSnapshot.repo_id)
|
||||
)
|
||||
id_to_slug = {r.id: r.slug for r in repos}
|
||||
tpsc_snap_counts = {id_to_slug[row.repo_id]: row.cnt for row in tpsc_result if row.repo_id in id_to_slug}
|
||||
|
||||
# C10: active repo goal count per repo_id
|
||||
goals_result = await session.execute(
|
||||
select(RepoGoal.repo_id, func.count().label("cnt"))
|
||||
.where(RepoGoal.repo_id.in_(repo_ids), RepoGoal.status == "active")
|
||||
.group_by(RepoGoal.repo_id)
|
||||
)
|
||||
active_goal_counts = {id_to_slug[row.repo_id]: row.cnt for row in goals_result if row.repo_id in id_to_slug}
|
||||
|
||||
prefetch = {
|
||||
"domain_status": domain_status,
|
||||
"tpsc_snap_counts": tpsc_snap_counts,
|
||||
"active_goal_counts": active_goal_counts,
|
||||
}
|
||||
# ─────────────────────────────────────────────────────────────────────────
|
||||
|
||||
async def _check_one(repo: ManagedRepo) -> DoISummaryEntry:
|
||||
repo_dict = {
|
||||
@@ -90,9 +123,7 @@ async def doi_summary(session: AsyncSession = Depends(get_session)) -> list[DoIS
|
||||
"host_paths": repo.host_paths or {},
|
||||
"last_sbom_at": str(repo.last_sbom_at) if repo.last_sbom_at else None,
|
||||
}
|
||||
# skip_consistency=True: omits C7/C13 subprocess calls for speed.
|
||||
# The full check is available via GET /repos/{slug}/doi.
|
||||
report = await _doi_evaluate(repo_dict, skip_consistency=True)
|
||||
report = await _doi_evaluate(repo_dict, skip_consistency=True, prefetch=prefetch)
|
||||
return DoISummaryEntry(
|
||||
repo_slug=repo.slug,
|
||||
domain_slug=domain_map.get(repo.domain_id),
|
||||
|
||||
Reference in New Issue
Block a user