generated from coulomb/repo-seed
perf(doi): eliminate HTTP self-calls in summary — 48 calls → 3 bulk DB queries
Root cause: C2/C9/C10 each made a full HTTP round-trip back to the API (asyncio.to_thread → urllib → TCP → uvicorn → SQLAlchemy → DB) for every repo. 16 repos × 3 calls = 48 self-calls at ~80-150ms each = ~6s total. Fix: doi_engine.evaluate() accepts a prefetch dict. The summary endpoint runs 3 bulk GROUP BY queries (domain status, TPSC snapshot counts, active goal counts) and passes results directly — zero HTTP self-calls in summary mode. Result: /repos/doi/summary 6s → <1s (6× improvement on top of prior 13×). Total improvement from original: 108s → <1s. Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -114,7 +114,21 @@ async def evaluate(
|
||||
repo: dict,
|
||||
api_base: str = "http://127.0.0.1:8000",
|
||||
skip_consistency: bool = False,
|
||||
prefetch: dict | None = None,
|
||||
) -> DoIReport:
|
||||
"""Evaluate all 14 DoI criteria for a repo.
|
||||
|
||||
Args:
|
||||
repo: Repo dict (slug, domain_slug, local_path, remote_url, host_paths, last_sbom_at).
|
||||
api_base: API base URL — only used when prefetch is absent.
|
||||
skip_consistency: Skip C7/C13 subprocess calls (used in summary mode).
|
||||
prefetch: Optional pre-fetched bulk data to avoid HTTP self-calls:
|
||||
{
|
||||
"domain_status": {"custodian": "active", ...}, # slug → status
|
||||
"tpsc_snap_counts": {"llm-connect": 1, ...}, # repo_slug → count
|
||||
"active_goal_counts": {"llm-connect": 0, ...}, # repo_slug → count
|
||||
}
|
||||
"""
|
||||
slug = repo.get("slug", "unknown")
|
||||
results: list[CriterionResult] = []
|
||||
|
||||
@@ -133,11 +147,15 @@ async def evaluate(
|
||||
if not domain_slug:
|
||||
_r("C2", "Domain assigned", "core", "fail", "No domain_slug on repo record")
|
||||
else:
|
||||
domain = await _get(api_base, f"/domains/{domain_slug}/")
|
||||
if domain and domain.get("status") == "active":
|
||||
if prefetch and "domain_status" in prefetch:
|
||||
dom_status = prefetch["domain_status"].get(domain_slug)
|
||||
else:
|
||||
d = await _get(api_base, f"/domains/{domain_slug}/")
|
||||
dom_status = d.get("status") if d else None
|
||||
if dom_status == "active":
|
||||
_r("C2", "Domain assigned", "core", "pass", f"domain: {domain_slug}")
|
||||
elif domain:
|
||||
_r("C2", "Domain assigned", "core", "warn", f"Domain '{domain_slug}' status: {domain.get('status')}")
|
||||
elif dom_status:
|
||||
_r("C2", "Domain assigned", "core", "warn", f"Domain '{domain_slug}' status: {dom_status}")
|
||||
else:
|
||||
_r("C2", "Domain assigned", "core", "fail", f"Domain '{domain_slug}' not found")
|
||||
|
||||
@@ -196,12 +214,17 @@ async def evaluate(
|
||||
|
||||
# C9: TPSC declared (tpsc.yaml present + snapshot exists)
|
||||
tpsc_file_ok = repo_path and (Path(repo_path) / "tpsc.yaml").exists()
|
||||
tpsc_snaps = await _get(api_base, "/tpsc/snapshots/", {"repo_slug": slug}) or []
|
||||
has_snap = len(tpsc_snaps) > 0
|
||||
if prefetch and "tpsc_snap_counts" in prefetch:
|
||||
has_snap = (prefetch["tpsc_snap_counts"].get(slug, 0) > 0)
|
||||
snap_count = prefetch["tpsc_snap_counts"].get(slug, 0)
|
||||
else:
|
||||
tpsc_snaps = await _get(api_base, "/tpsc/snapshots/", {"repo_slug": slug}) or []
|
||||
has_snap = len(tpsc_snaps) > 0
|
||||
snap_count = len(tpsc_snaps)
|
||||
if not repo_path:
|
||||
_r("C9", "TPSC declared", "standard", "skip", "Local path unavailable")
|
||||
elif tpsc_file_ok and has_snap:
|
||||
_r("C9", "TPSC declared", "standard", "pass", f"{len(tpsc_snaps)} snapshot(s)")
|
||||
_r("C9", "TPSC declared", "standard", "pass", f"{snap_count} snapshot(s)")
|
||||
elif tpsc_file_ok and not has_snap:
|
||||
_r("C9", "TPSC declared", "standard", "warn", "tpsc.yaml exists but not yet ingested — run make ingest-tpsc")
|
||||
elif not tpsc_file_ok:
|
||||
@@ -210,10 +233,13 @@ async def evaluate(
|
||||
# ── Tier 3: Full ─────────────────────────────────────────────────────────
|
||||
|
||||
# C10: active repo goal
|
||||
goals = await _get(api_base, "/repo-goals/", {"repo_slug": slug}) or []
|
||||
active_goals = [g for g in goals if g.get("status") == "active"]
|
||||
if active_goals:
|
||||
_r("C10", "Active repo goal", "full", "pass", f"{len(active_goals)} active goal(s)")
|
||||
if prefetch and "active_goal_counts" in prefetch:
|
||||
active_goal_count = prefetch["active_goal_counts"].get(slug, 0)
|
||||
else:
|
||||
goals = await _get(api_base, "/repo-goals/", {"repo_slug": slug}) or []
|
||||
active_goal_count = sum(1 for g in goals if g.get("status") == "active")
|
||||
if active_goal_count > 0:
|
||||
_r("C10", "Active repo goal", "full", "pass", f"{active_goal_count} active goal(s)")
|
||||
else:
|
||||
_r("C10", "Active repo goal", "full", "fail", "No active repo goal — create one with create_repo_goal()")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user