import asyncio import uuid from datetime import datetime, timezone from fastapi import APIRouter, Depends, HTTPException, status from sqlalchemy import case, func, select from sqlalchemy.ext.asyncio import AsyncSession from api.database import get_session from api.doi_engine import compute_fingerprint, evaluate as _doi_evaluate from api.models.doi_cache import DOICache from api.models.domain import Domain from api.models.managed_repo import ManagedRepo from api.models.repo_goal import RepoGoal from api.models.tpsc import TPSCSnapshot from api.models.task import Task from api.models.workstream import Workstream from api.schemas.doi import DoICriterion, DoIReport, DoISummaryEntry from api.schemas.managed_repo import ( DispatchTask, DispatchWorkstream, RepoCreate, RepoDispatch, RepoPathRegister, RepoRead, RepoUpdate, ) router = APIRouter(prefix="/repos", tags=["repos"]) @router.get("/", response_model=list[RepoRead]) async def list_repos( domain: str | None = None, session: AsyncSession = Depends(get_session), ) -> list[ManagedRepo]: q = select(ManagedRepo).order_by(ManagedRepo.name) if domain: domain_row = await session.execute(select(Domain).where(Domain.slug == domain)) domain_obj = domain_row.scalar_one_or_none() if domain_obj is None: raise HTTPException(status_code=404, detail=f"Domain '{domain}' not found") q = q.where(ManagedRepo.domain_id == domain_obj.id) result = await session.execute(q) return list(result.scalars().all()) @router.post("/", response_model=RepoRead, status_code=status.HTTP_201_CREATED) async def register_repo( body: RepoCreate, session: AsyncSession = Depends(get_session), ) -> ManagedRepo: domain_row = await session.execute(select(Domain).where(Domain.slug == body.domain_slug)) domain_obj = domain_row.scalar_one_or_none() if domain_obj is None: raise HTTPException(status_code=404, detail=f"Domain '{body.domain_slug}' not found") existing = await session.execute(select(ManagedRepo).where(ManagedRepo.slug == body.slug)) if existing.scalar_one_or_none(): raise HTTPException(status_code=409, detail=f"Repo slug '{body.slug}' already exists") repo = ManagedRepo( domain_id=domain_obj.id, slug=body.slug, name=body.name, local_path=body.local_path, remote_url=body.remote_url, description=body.description, topic_id=body.topic_id, ) session.add(repo) await session.commit() await session.refresh(repo) return repo @router.get("/doi/summary", response_model=list[DoISummaryEntry]) async def doi_summary(session: AsyncSession = Depends(get_session)) -> list[DoISummaryEntry]: """Return DoI tier for all active repos, worst tier first. Results are cached in doi_cache. A repo is only re-evaluated when its fingerprint changes (repo record updated, new TPSC snapshot, goal change, or a key file mtime changes on disk). """ repos_result = await session.execute( select(ManagedRepo).where(ManagedRepo.status == "active").order_by(ManagedRepo.name) ) repos = list(repos_result.scalars().all()) repo_ids = [r.id for r in repos] id_to_slug = {r.id: r.slug for r in repos} # ── Bulk DB queries for fingerprint inputs ──────────────────────────────── domains_result = await session.execute(select(Domain)) domain_obj_map = {d.id: d for d in domains_result.scalars().all()} domain_map = {d.id: d.slug for d in domain_obj_map.values()} domain_status = {d.slug: d.status for d in domain_obj_map.values()} # Latest TPSC snapshot timestamp per repo (for fingerprint + C9 count) tpsc_result = await session.execute( select(TPSCSnapshot.repo_id, func.count().label("cnt"), func.max(TPSCSnapshot.snapshot_at).label("latest")) .where(TPSCSnapshot.repo_id.in_(repo_ids)) .group_by(TPSCSnapshot.repo_id) ) tpsc_by_id = {row.repo_id: row for row in tpsc_result} tpsc_snap_counts = {id_to_slug[rid]: row.cnt for rid, row in tpsc_by_id.items() if rid in id_to_slug} tpsc_snap_latest = {id_to_slug[rid]: str(row.latest) for rid, row in tpsc_by_id.items() if rid in id_to_slug} # Latest goal updated_at + active count per repo (for fingerprint + C10) goals_result = await session.execute( select(RepoGoal.repo_id, func.count().label("total"), func.sum(case((RepoGoal.status == "active", 1), else_=0)).label("active_cnt"), func.max(RepoGoal.updated_at).label("latest")) .where(RepoGoal.repo_id.in_(repo_ids)) .group_by(RepoGoal.repo_id) ) goals_by_id = {row.repo_id: row for row in goals_result} active_goal_counts = {id_to_slug[rid]: int(row.active_cnt or 0) for rid, row in goals_by_id.items() if rid in id_to_slug} goals_latest = {id_to_slug[rid]: str(row.latest) for rid, row in goals_by_id.items() if rid in id_to_slug} # Load existing cache rows cache_result = await session.execute( select(DOICache).where(DOICache.repo_id.in_(repo_ids)) ) cache_by_repo_id = {row.repo_id: row for row in cache_result.scalars().all()} # ───────────────────────────────────────────────────────────────────────── prefetch = { "domain_status": domain_status, "tpsc_snap_counts": tpsc_snap_counts, "active_goal_counts": active_goal_counts, } async def _get_or_refresh(repo: ManagedRepo) -> DoISummaryEntry: slug = repo.slug repo_dict = { "slug": slug, "domain_slug": domain_map.get(repo.domain_id), "local_path": repo.local_path, "remote_url": repo.remote_url, "host_paths": repo.host_paths or {}, "last_sbom_at": str(repo.last_sbom_at) if repo.last_sbom_at else None, "updated_at": str(repo.updated_at) if repo.updated_at else "", } fp = compute_fingerprint( repo_dict, tpsc_snap_latest.get(slug), goals_latest.get(slug), ) cached = cache_by_repo_id.get(repo.id) if cached and cached.fingerprint == fp: # Cache hit — return stored result return DoISummaryEntry( repo_slug=slug, domain_slug=domain_map.get(repo.domain_id), tier=cached.tier, core_pass=cached.core_pass, standard_pass=cached.standard_pass, full_pass=cached.full_pass, checked_at=cached.checked_at.isoformat(), ) # Cache miss — evaluate and store report = await _doi_evaluate(repo_dict, skip_consistency=True, prefetch=prefetch) now = datetime.now(tz=timezone.utc) if cached: cached.tier = report.tier cached.core_pass = report.core_pass cached.standard_pass = report.standard_pass cached.full_pass = report.full_pass cached.criteria = [{"id": c.id, "label": c.label, "tier": c.tier, "status": c.status, "detail": c.detail} for c in report.criteria] cached.fingerprint = fp cached.checked_at = now cached.updated_at = now else: session.add(DOICache( repo_id=repo.id, tier=report.tier, core_pass=report.core_pass, standard_pass=report.standard_pass, full_pass=report.full_pass, criteria=[{"id": c.id, "label": c.label, "tier": c.tier, "status": c.status, "detail": c.detail} for c in report.criteria], fingerprint=fp, checked_at=now, updated_at=now, )) return DoISummaryEntry( repo_slug=slug, domain_slug=domain_map.get(repo.domain_id), tier=report.tier, core_pass=report.core_pass, standard_pass=report.standard_pass, full_pass=report.full_pass, checked_at=now.isoformat(), ) entries: list[DoISummaryEntry] = list(await asyncio.gather(*[_get_or_refresh(r) for r in repos])) await session.commit() tier_order = {"none": 0, "core": 1, "standard": 2, "full": 3} entries.sort(key=lambda e: tier_order.get(e.tier, 0)) return entries @router.get("/{slug}/doi", response_model=DoIReport) async def get_repo_doi( slug: str, force_refresh: bool = False, session: AsyncSession = Depends(get_session), ) -> DoIReport: """Evaluate the 14 DoI criteria for a single repo (full check including C7/C13). Results are cached by fingerprint. Pass ?force_refresh=true to bypass the cache. """ repo = await _get_repo_by_slug(slug, session) domain_result = await session.execute(select(Domain).where(Domain.id == repo.domain_id)) domain_obj = domain_result.scalar_one_or_none() # Fingerprint inputs for this single repo tpsc_row = (await session.execute( select(func.count().label("cnt"), func.max(TPSCSnapshot.snapshot_at).label("latest")) .where(TPSCSnapshot.repo_id == repo.id) )).one() goal_row = (await session.execute( select(func.max(RepoGoal.updated_at).label("latest")) .where(RepoGoal.repo_id == repo.id) )).one() repo_dict = { "slug": repo.slug, "domain_slug": domain_obj.slug if domain_obj else None, "local_path": repo.local_path, "remote_url": repo.remote_url, "host_paths": repo.host_paths or {}, "last_sbom_at": str(repo.last_sbom_at) if repo.last_sbom_at else None, "updated_at": str(repo.updated_at) if repo.updated_at else "", } fp = compute_fingerprint(repo_dict, str(tpsc_row.latest) if tpsc_row.latest else None, str(goal_row.latest) if goal_row.latest else None) # Check cache (unless force_refresh) cached = (await session.execute( select(DOICache).where(DOICache.repo_id == repo.id) )).scalar_one_or_none() if not force_refresh and cached and cached.fingerprint == fp and cached.criteria: return DoIReport( repo_slug=slug, tier=cached.tier, core_pass=cached.core_pass, standard_pass=cached.standard_pass, full_pass=cached.full_pass, checked_at=cached.checked_at.isoformat(), criteria=[DoICriterion(**c) for c in cached.criteria], ) # Full evaluation (includes C7/C13 consistency subprocesses) report = await _doi_evaluate(repo_dict) now = datetime.now(tz=timezone.utc) criteria_json = [{"id": c.id, "label": c.label, "tier": c.tier, "status": c.status, "detail": c.detail} for c in report.criteria] if cached: cached.tier = report.tier; cached.core_pass = report.core_pass cached.standard_pass = report.standard_pass; cached.full_pass = report.full_pass cached.criteria = criteria_json; cached.fingerprint = fp cached.checked_at = now; cached.updated_at = now else: session.add(DOICache(repo_id=repo.id, tier=report.tier, core_pass=report.core_pass, standard_pass=report.standard_pass, full_pass=report.full_pass, criteria=criteria_json, fingerprint=fp, checked_at=now, updated_at=now)) await session.commit() return DoIReport( repo_slug=report.repo_slug, tier=report.tier, core_pass=report.core_pass, standard_pass=report.standard_pass, full_pass=report.full_pass, checked_at=report.checked_at, criteria=[DoICriterion(id=c.id, label=c.label, tier=c.tier, status=c.status, detail=c.detail) for c in report.criteria], ) @router.get("/{slug}/", response_model=RepoRead) async def get_repo( slug: str, session: AsyncSession = Depends(get_session), ) -> ManagedRepo: return await _get_repo_by_slug(slug, session) @router.patch("/{slug}/", response_model=RepoRead) async def update_repo( slug: str, body: RepoUpdate, session: AsyncSession = Depends(get_session), ) -> ManagedRepo: repo = await _get_repo_by_slug(slug, session) for field, value in body.model_dump(exclude_unset=True).items(): setattr(repo, field, value) await session.commit() await session.refresh(repo) return repo @router.post("/{slug}/paths/", response_model=RepoRead) async def register_host_path( slug: str, body: RepoPathRegister, session: AsyncSession = Depends(get_session), ) -> ManagedRepo: """Register or update the local path for a specific host. Merges {"host": path} into host_paths without overwriting other entries. Use this when a repo lives at a different absolute path on different machines. """ repo = await _get_repo_by_slug(slug, session) updated = dict(repo.host_paths or {}) updated[body.host] = body.path repo.host_paths = updated await session.commit() await session.refresh(repo) return repo @router.patch("/{slug}/archive", response_model=RepoRead) async def archive_repo( slug: str, session: AsyncSession = Depends(get_session), ) -> ManagedRepo: repo = await _get_repo_by_slug(slug, session) repo.status = "archived" await session.commit() await session.refresh(repo) return repo @router.get("/{slug}/dispatch", response_model=RepoDispatch) async def get_repo_dispatch( slug: str, session: AsyncSession = Depends(get_session), ) -> RepoDispatch: """Return active workstreams, pending tasks, and goal for a repo. This endpoint is the foundation for autonomous agent sessions: an agent can call it at session start to discover what work is pending without needing to read state-hub summary or scan workplan files manually. """ repo = await _get_repo_by_slug(slug, session) # Active goal goal_result = await session.execute( select(RepoGoal) .where(RepoGoal.repo_id == repo.id, RepoGoal.status == "active") .order_by(RepoGoal.priority) .limit(1) ) goal_obj = goal_result.scalar_one_or_none() active_goal = None if goal_obj: active_goal = { "id": str(goal_obj.id), "title": goal_obj.title, "description": goal_obj.description, "priority": goal_obj.priority, } # Active workstreams ws_result = await session.execute( select(Workstream) .where(Workstream.repo_id == repo.id, Workstream.status == "active") .order_by(Workstream.created_at) ) workstreams = list(ws_result.scalars().all()) dispatch_workstreams: list[DispatchWorkstream] = [] all_interventions: list[DispatchTask] = [] for ws in workstreams: task_result = await session.execute( select(Task) .where(Task.workstream_id == ws.id, Task.status.in_(["todo", "in_progress"])) .order_by(Task.created_at) ) tasks = list(task_result.scalars().all()) pending = [ DispatchTask( id=t.id, title=t.title, priority=t.priority, status=t.status, needs_human=t.needs_human, ) for t in tasks ] interventions = [t for t in pending if t.needs_human] all_interventions.extend(interventions) dispatch_workstreams.append( DispatchWorkstream( id=ws.id, title=ws.title, status=ws.status, pending_tasks=pending, ) ) return RepoDispatch( repo_slug=slug, active_goal=active_goal, active_workstreams=dispatch_workstreams, human_interventions=all_interventions, last_state_synced_at=repo.last_state_synced_at, ) async def _get_repo_by_slug(slug: str, session: AsyncSession) -> ManagedRepo: result = await session.execute(select(ManagedRepo).where(ManagedRepo.slug == slug)) repo = result.scalar_one_or_none() if repo is None: raise HTTPException(status_code=404, detail=f"Repo '{slug}' not found") return repo