state-hub/api/routers/repos.py

import asyncio
import uuid

from fastapi import APIRouter, Depends, HTTPException, status
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession

from sqlalchemy import func

from api.database import get_session
from api.doi_engine import evaluate as _doi_evaluate
from api.models.domain import Domain
from api.models.managed_repo import ManagedRepo
from api.models.repo_goal import RepoGoal
from api.models.tpsc import TPSCSnapshot
from api.models.task import Task
from api.models.workstream import Workstream
from api.schemas.doi import DoICriterion, DoIReport, DoISummaryEntry
from api.schemas.managed_repo import (
    DispatchTask,
    DispatchWorkstream,
    RepoCreate,
    RepoDispatch,
    RepoPathRegister,
    RepoRead,
    RepoUpdate,
)

router = APIRouter(prefix="/repos", tags=["repos"])


@router.get("/", response_model=list[RepoRead])
async def list_repos(
    domain: str | None = None,
    session: AsyncSession = Depends(get_session),
) -> list[ManagedRepo]:
    q = select(ManagedRepo).order_by(ManagedRepo.name)
    if domain:
        domain_row = await session.execute(select(Domain).where(Domain.slug == domain))
        domain_obj = domain_row.scalar_one_or_none()
        if domain_obj is None:
            raise HTTPException(status_code=404, detail=f"Domain '{domain}' not found")
        q = q.where(ManagedRepo.domain_id == domain_obj.id)
    result = await session.execute(q)
    return list(result.scalars().all())


@router.post("/", response_model=RepoRead, status_code=status.HTTP_201_CREATED)
async def register_repo(
    body: RepoCreate,
    session: AsyncSession = Depends(get_session),
) -> ManagedRepo:
    domain_row = await session.execute(select(Domain).where(Domain.slug == body.domain_slug))
    domain_obj = domain_row.scalar_one_or_none()
    if domain_obj is None:
        raise HTTPException(status_code=404, detail=f"Domain '{body.domain_slug}' not found")

    existing = await session.execute(select(ManagedRepo).where(ManagedRepo.slug == body.slug))
    if existing.scalar_one_or_none():
        raise HTTPException(status_code=409, detail=f"Repo slug '{body.slug}' already exists")

    repo = ManagedRepo(
        domain_id=domain_obj.id,
        slug=body.slug,
        name=body.name,
        local_path=body.local_path,
        remote_url=body.remote_url,
        description=body.description,
        topic_id=body.topic_id,
    )
    session.add(repo)
    await session.commit()
    await session.refresh(repo)
    return repo


@router.get("/doi/summary", response_model=list[DoISummaryEntry])
async def doi_summary(session: AsyncSession = Depends(get_session)) -> list[DoISummaryEntry]:
    """Return DoI tier for all active repos, worst tier first."""
    repos_result = await session.execute(
        select(ManagedRepo).where(ManagedRepo.status == "active").order_by(ManagedRepo.name)
    )
    repos = list(repos_result.scalars().all())

    # ── 3 bulk DB queries instead of 48 HTTP self-calls ───────────────────────
    # C2: domain status by slug
    domains_result = await session.execute(select(Domain))
    domain_obj_map = {d.id: d for d in domains_result.scalars().all()}
    domain_map = {d.id: d.slug for d in domain_obj_map.values()}
    domain_status = {d.slug: d.status for d in domain_obj_map.values()}

    # C9: TPSC snapshot count per repo_id
    repo_ids = [r.id for r in repos]
    tpsc_result = await session.execute(
        select(TPSCSnapshot.repo_id, func.count().label("cnt"))
        .where(TPSCSnapshot.repo_id.in_(repo_ids))
        .group_by(TPSCSnapshot.repo_id)
    )
    id_to_slug = {r.id: r.slug for r in repos}
    tpsc_snap_counts = {id_to_slug[row.repo_id]: row.cnt for row in tpsc_result if row.repo_id in id_to_slug}

    # C10: active repo goal count per repo_id
    goals_result = await session.execute(
        select(RepoGoal.repo_id, func.count().label("cnt"))
        .where(RepoGoal.repo_id.in_(repo_ids), RepoGoal.status == "active")
        .group_by(RepoGoal.repo_id)
    )
    active_goal_counts = {id_to_slug[row.repo_id]: row.cnt for row in goals_result if row.repo_id in id_to_slug}

    prefetch = {
        "domain_status":     domain_status,
        "tpsc_snap_counts":  tpsc_snap_counts,
        "active_goal_counts": active_goal_counts,
    }
    # ─────────────────────────────────────────────────────────────────────────

    async def _check_one(repo: ManagedRepo) -> DoISummaryEntry:
        repo_dict = {
            "slug": repo.slug,
            "domain_slug": domain_map.get(repo.domain_id),
            "local_path": repo.local_path,
            "remote_url": repo.remote_url,
            "host_paths": repo.host_paths or {},
            "last_sbom_at": str(repo.last_sbom_at) if repo.last_sbom_at else None,
        }
        report = await _doi_evaluate(repo_dict, skip_consistency=True, prefetch=prefetch)
        return DoISummaryEntry(
            repo_slug=repo.slug,
            domain_slug=domain_map.get(repo.domain_id),
            tier=report.tier,
            core_pass=report.core_pass,
            standard_pass=report.standard_pass,
            full_pass=report.full_pass,
            checked_at=report.checked_at,
        )

    entries: list[DoISummaryEntry] = list(await asyncio.gather(*[_check_one(r) for r in repos]))

    tier_order = {"none": 0, "core": 1, "standard": 2, "full": 3}
    entries.sort(key=lambda e: tier_order.get(e.tier, 0))
    return entries


@router.get("/{slug}/doi", response_model=DoIReport)
async def get_repo_doi(slug: str, session: AsyncSession = Depends(get_session)) -> DoIReport:
    """Evaluate the 14 DoI criteria for a single repo."""
    repo = await _get_repo_by_slug(slug, session)
    domain_result = await session.execute(select(Domain).where(Domain.id == repo.domain_id))
    domain_obj = domain_result.scalar_one_or_none()

    repo_dict = {
        "slug": repo.slug,
        "domain_slug": domain_obj.slug if domain_obj else None,
        "local_path": repo.local_path,
        "remote_url": repo.remote_url,
        "host_paths": repo.host_paths or {},
        "last_sbom_at": str(repo.last_sbom_at) if repo.last_sbom_at else None,
    }
    report = await _doi_evaluate(repo_dict)
    return DoIReport(
        repo_slug=report.repo_slug,
        tier=report.tier,
        core_pass=report.core_pass,
        standard_pass=report.standard_pass,
        full_pass=report.full_pass,
        checked_at=report.checked_at,
        criteria=[
            DoICriterion(id=c.id, label=c.label, tier=c.tier, status=c.status, detail=c.detail)
            for c in report.criteria
        ],
    )


@router.get("/{slug}/", response_model=RepoRead)
async def get_repo(
    slug: str,
    session: AsyncSession = Depends(get_session),
) -> ManagedRepo:
    return await _get_repo_by_slug(slug, session)


@router.patch("/{slug}/", response_model=RepoRead)
async def update_repo(
    slug: str,
    body: RepoUpdate,
    session: AsyncSession = Depends(get_session),
) -> ManagedRepo:
    repo = await _get_repo_by_slug(slug, session)
    for field, value in body.model_dump(exclude_unset=True).items():
        setattr(repo, field, value)
    await session.commit()
    await session.refresh(repo)
    return repo


@router.post("/{slug}/paths/", response_model=RepoRead)
async def register_host_path(
    slug: str,
    body: RepoPathRegister,
    session: AsyncSession = Depends(get_session),
) -> ManagedRepo:
    """Register or update the local path for a specific host.

    Merges {"host": path} into host_paths without overwriting other entries.
    Use this when a repo lives at a different absolute path on different machines.
    """
    repo = await _get_repo_by_slug(slug, session)
    updated = dict(repo.host_paths or {})
    updated[body.host] = body.path
    repo.host_paths = updated
    await session.commit()
    await session.refresh(repo)
    return repo


@router.patch("/{slug}/archive", response_model=RepoRead)
async def archive_repo(
    slug: str,
    session: AsyncSession = Depends(get_session),
) -> ManagedRepo:
    repo = await _get_repo_by_slug(slug, session)
    repo.status = "archived"
    await session.commit()
    await session.refresh(repo)
    return repo


@router.get("/{slug}/dispatch", response_model=RepoDispatch)
async def get_repo_dispatch(
    slug: str,
    session: AsyncSession = Depends(get_session),
) -> RepoDispatch:
    """Return active workstreams, pending tasks, and goal for a repo.

    This endpoint is the foundation for autonomous agent sessions: an agent can
    call it at session start to discover what work is pending without needing to
    read state-hub summary or scan workplan files manually.
    """
    repo = await _get_repo_by_slug(slug, session)

    # Active goal
    goal_result = await session.execute(
        select(RepoGoal)
        .where(RepoGoal.repo_id == repo.id, RepoGoal.status == "active")
        .order_by(RepoGoal.priority)
        .limit(1)
    )
    goal_obj = goal_result.scalar_one_or_none()
    active_goal = None
    if goal_obj:
        active_goal = {
            "id": str(goal_obj.id),
            "title": goal_obj.title,
            "description": goal_obj.description,
            "priority": goal_obj.priority,
        }

    # Active workstreams
    ws_result = await session.execute(
        select(Workstream)
        .where(Workstream.repo_id == repo.id, Workstream.status == "active")
        .order_by(Workstream.created_at)
    )
    workstreams = list(ws_result.scalars().all())

    dispatch_workstreams: list[DispatchWorkstream] = []
    all_interventions: list[DispatchTask] = []

    for ws in workstreams:
        task_result = await session.execute(
            select(Task)
            .where(Task.workstream_id == ws.id, Task.status.in_(["todo", "in_progress"]))
            .order_by(Task.created_at)
        )
        tasks = list(task_result.scalars().all())

        pending = [
            DispatchTask(
                id=t.id,
                title=t.title,
                priority=t.priority,
                status=t.status,
                needs_human=t.needs_human,
            )
            for t in tasks
        ]
        interventions = [t for t in pending if t.needs_human]
        all_interventions.extend(interventions)

        dispatch_workstreams.append(
            DispatchWorkstream(
                id=ws.id,
                title=ws.title,
                status=ws.status,
                pending_tasks=pending,
            )
        )

    return RepoDispatch(
        repo_slug=slug,
        active_goal=active_goal,
        active_workstreams=dispatch_workstreams,
        human_interventions=all_interventions,
        last_state_synced_at=repo.last_state_synced_at,
    )


async def _get_repo_by_slug(slug: str, session: AsyncSession) -> ManagedRepo:
    result = await session.execute(select(ManagedRepo).where(ManagedRepo.slug == slug))
    repo = result.scalar_one_or_none()
    if repo is None:
        raise HTTPException(status_code=404, detail=f"Repo '{slug}' not found")
    return repo