diff --git a/api/doi_engine.py b/api/doi_engine.py index c0d4881..802460b 100644 --- a/api/doi_engine.py +++ b/api/doi_engine.py @@ -114,7 +114,21 @@ async def evaluate( repo: dict, api_base: str = "http://127.0.0.1:8000", skip_consistency: bool = False, + prefetch: dict | None = None, ) -> DoIReport: + """Evaluate all 14 DoI criteria for a repo. + + Args: + repo: Repo dict (slug, domain_slug, local_path, remote_url, host_paths, last_sbom_at). + api_base: API base URL — only used when prefetch is absent. + skip_consistency: Skip C7/C13 subprocess calls (used in summary mode). + prefetch: Optional pre-fetched bulk data to avoid HTTP self-calls: + { + "domain_status": {"custodian": "active", ...}, # slug → status + "tpsc_snap_counts": {"llm-connect": 1, ...}, # repo_slug → count + "active_goal_counts": {"llm-connect": 0, ...}, # repo_slug → count + } + """ slug = repo.get("slug", "unknown") results: list[CriterionResult] = [] @@ -133,11 +147,15 @@ async def evaluate( if not domain_slug: _r("C2", "Domain assigned", "core", "fail", "No domain_slug on repo record") else: - domain = await _get(api_base, f"/domains/{domain_slug}/") - if domain and domain.get("status") == "active": + if prefetch and "domain_status" in prefetch: + dom_status = prefetch["domain_status"].get(domain_slug) + else: + d = await _get(api_base, f"/domains/{domain_slug}/") + dom_status = d.get("status") if d else None + if dom_status == "active": _r("C2", "Domain assigned", "core", "pass", f"domain: {domain_slug}") - elif domain: - _r("C2", "Domain assigned", "core", "warn", f"Domain '{domain_slug}' status: {domain.get('status')}") + elif dom_status: + _r("C2", "Domain assigned", "core", "warn", f"Domain '{domain_slug}' status: {dom_status}") else: _r("C2", "Domain assigned", "core", "fail", f"Domain '{domain_slug}' not found") @@ -196,12 +214,17 @@ async def evaluate( # C9: TPSC declared (tpsc.yaml present + snapshot exists) tpsc_file_ok = repo_path and (Path(repo_path) / "tpsc.yaml").exists() - tpsc_snaps = await _get(api_base, "/tpsc/snapshots/", {"repo_slug": slug}) or [] - has_snap = len(tpsc_snaps) > 0 + if prefetch and "tpsc_snap_counts" in prefetch: + has_snap = (prefetch["tpsc_snap_counts"].get(slug, 0) > 0) + snap_count = prefetch["tpsc_snap_counts"].get(slug, 0) + else: + tpsc_snaps = await _get(api_base, "/tpsc/snapshots/", {"repo_slug": slug}) or [] + has_snap = len(tpsc_snaps) > 0 + snap_count = len(tpsc_snaps) if not repo_path: _r("C9", "TPSC declared", "standard", "skip", "Local path unavailable") elif tpsc_file_ok and has_snap: - _r("C9", "TPSC declared", "standard", "pass", f"{len(tpsc_snaps)} snapshot(s)") + _r("C9", "TPSC declared", "standard", "pass", f"{snap_count} snapshot(s)") elif tpsc_file_ok and not has_snap: _r("C9", "TPSC declared", "standard", "warn", "tpsc.yaml exists but not yet ingested — run make ingest-tpsc") elif not tpsc_file_ok: @@ -210,10 +233,13 @@ async def evaluate( # ── Tier 3: Full ───────────────────────────────────────────────────────── # C10: active repo goal - goals = await _get(api_base, "/repo-goals/", {"repo_slug": slug}) or [] - active_goals = [g for g in goals if g.get("status") == "active"] - if active_goals: - _r("C10", "Active repo goal", "full", "pass", f"{len(active_goals)} active goal(s)") + if prefetch and "active_goal_counts" in prefetch: + active_goal_count = prefetch["active_goal_counts"].get(slug, 0) + else: + goals = await _get(api_base, "/repo-goals/", {"repo_slug": slug}) or [] + active_goal_count = sum(1 for g in goals if g.get("status") == "active") + if active_goal_count > 0: + _r("C10", "Active repo goal", "full", "pass", f"{active_goal_count} active goal(s)") else: _r("C10", "Active repo goal", "full", "fail", "No active repo goal — create one with create_repo_goal()") diff --git a/api/routers/repos.py b/api/routers/repos.py index 1e89102..0813389 100644 --- a/api/routers/repos.py +++ b/api/routers/repos.py @@ -5,11 +5,14 @@ from fastapi import APIRouter, Depends, HTTPException, status from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy import func + from api.database import get_session from api.doi_engine import evaluate as _doi_evaluate from api.models.domain import Domain from api.models.managed_repo import ManagedRepo from api.models.repo_goal import RepoGoal +from api.models.tpsc import TPSCSnapshot from api.models.task import Task from api.models.workstream import Workstream from api.schemas.doi import DoICriterion, DoIReport, DoISummaryEntry @@ -74,12 +77,42 @@ async def register_repo( @router.get("/doi/summary", response_model=list[DoISummaryEntry]) async def doi_summary(session: AsyncSession = Depends(get_session)) -> list[DoISummaryEntry]: """Return DoI tier for all active repos, worst tier first.""" - result = await session.execute( + repos_result = await session.execute( select(ManagedRepo).where(ManagedRepo.status == "active").order_by(ManagedRepo.name) ) - repos = list(result.scalars().all()) - domain_result = await session.execute(select(Domain)) - domain_map = {d.id: d.slug for d in domain_result.scalars().all()} + repos = list(repos_result.scalars().all()) + + # ── 3 bulk DB queries instead of 48 HTTP self-calls ─────────────────────── + # C2: domain status by slug + domains_result = await session.execute(select(Domain)) + domain_obj_map = {d.id: d for d in domains_result.scalars().all()} + domain_map = {d.id: d.slug for d in domain_obj_map.values()} + domain_status = {d.slug: d.status for d in domain_obj_map.values()} + + # C9: TPSC snapshot count per repo_id + repo_ids = [r.id for r in repos] + tpsc_result = await session.execute( + select(TPSCSnapshot.repo_id, func.count().label("cnt")) + .where(TPSCSnapshot.repo_id.in_(repo_ids)) + .group_by(TPSCSnapshot.repo_id) + ) + id_to_slug = {r.id: r.slug for r in repos} + tpsc_snap_counts = {id_to_slug[row.repo_id]: row.cnt for row in tpsc_result if row.repo_id in id_to_slug} + + # C10: active repo goal count per repo_id + goals_result = await session.execute( + select(RepoGoal.repo_id, func.count().label("cnt")) + .where(RepoGoal.repo_id.in_(repo_ids), RepoGoal.status == "active") + .group_by(RepoGoal.repo_id) + ) + active_goal_counts = {id_to_slug[row.repo_id]: row.cnt for row in goals_result if row.repo_id in id_to_slug} + + prefetch = { + "domain_status": domain_status, + "tpsc_snap_counts": tpsc_snap_counts, + "active_goal_counts": active_goal_counts, + } + # ───────────────────────────────────────────────────────────────────────── async def _check_one(repo: ManagedRepo) -> DoISummaryEntry: repo_dict = { @@ -90,9 +123,7 @@ async def doi_summary(session: AsyncSession = Depends(get_session)) -> list[DoIS "host_paths": repo.host_paths or {}, "last_sbom_at": str(repo.last_sbom_at) if repo.last_sbom_at else None, } - # skip_consistency=True: omits C7/C13 subprocess calls for speed. - # The full check is available via GET /repos/{slug}/doi. - report = await _doi_evaluate(repo_dict, skip_consistency=True) + report = await _doi_evaluate(repo_dict, skip_consistency=True, prefetch=prefetch) return DoISummaryEntry( repo_slug=repo.slug, domain_slug=domain_map.get(repo.domain_id),