diff --git a/api/doi_engine.py b/api/doi_engine.py index 10a6627..1b79756 100644 --- a/api/doi_engine.py +++ b/api/doi_engine.py @@ -8,22 +8,42 @@ from __future__ import annotations import asyncio import json +import re import socket import urllib.error import urllib.request from dataclasses import dataclass, field from datetime import datetime, timezone from pathlib import Path -from typing import Literal +from typing import Any, Literal + +import yaml CriterionStatus = Literal["pass", "fail", "warn", "skip"] Tier = Literal["none", "core", "standard", "full"] # Criteria that belong to each tier (in check order) CORE_IDS = {"C1", "C2", "C3", "C4"} -STANDARD_IDS = {"C5", "C6", "C7", "C8", "C9"} +STANDARD_IDS = {"C5a", "C5b", "C5c", "C6", "C7", "C8", "C9"} FULL_IDS = {"C10", "C11", "C12", "C13", "C14"} +STANDARD_SCOPE_SECTIONS = [ + "One-liner", + "Core Idea", + "In Scope", + "Out of Scope", + "Relevant When", + "Not Relevant When", + "Current State", + "How It Fits", + "Terminology", + "Related / Overlapping", + "Provided Capabilities", +] + +_CAPABILITY_BLOCK_RE = re.compile(r"```capability\s*\n(.*?)```", re.DOTALL | re.IGNORECASE) +_H2_RE = re.compile(r"^##\s+(.+?)\s*$", re.MULTILINE) + @dataclass class CriterionResult: @@ -45,6 +65,154 @@ class DoIReport: checked_at: str = field(default_factory=lambda: datetime.now(tz=timezone.utc).isoformat()) +def evaluate_scope_health(repo: dict) -> list[dict[str, Any]]: + """Return machine-readable SCOPE.md health issues for C5a/C5b/C5c. + + The returned records intentionally mirror DoI criterion IDs while carrying + section-level hints that downstream repo-scoping can use to refresh only + the affected parts of SCOPE.md. + """ + repo_path = _resolve_path(repo) + if not repo_path: + return [ + { + "id": "C5a", + "label": "SCOPE.md present", + "status": "skip", + "detail": "Local path unavailable", + "missing_sections": [], + "invalid_capability_blocks": [], + "needs_refresh_sections": [], + }, + { + "id": "C5b", + "label": "SCOPE.md standard sections", + "status": "skip", + "detail": "Local path unavailable", + "missing_sections": [], + "invalid_capability_blocks": [], + "needs_refresh_sections": [], + }, + { + "id": "C5c", + "label": "SCOPE.md capability blocks", + "status": "skip", + "detail": "Local path unavailable", + "missing_sections": [], + "invalid_capability_blocks": [], + "needs_refresh_sections": [], + }, + ] + + scope_path = Path(repo_path) / "SCOPE.md" + if not scope_path.exists(): + return [ + { + "id": "C5a", + "label": "SCOPE.md present", + "status": "fail", + "detail": "SCOPE.md not found at repo root", + "missing_sections": STANDARD_SCOPE_SECTIONS.copy(), + "invalid_capability_blocks": [], + "needs_refresh_sections": STANDARD_SCOPE_SECTIONS.copy(), + }, + { + "id": "C5b", + "label": "SCOPE.md standard sections", + "status": "skip", + "detail": "SCOPE.md absent", + "missing_sections": STANDARD_SCOPE_SECTIONS.copy(), + "invalid_capability_blocks": [], + "needs_refresh_sections": STANDARD_SCOPE_SECTIONS.copy(), + }, + { + "id": "C5c", + "label": "SCOPE.md capability blocks", + "status": "skip", + "detail": "SCOPE.md absent", + "missing_sections": [], + "invalid_capability_blocks": [], + "needs_refresh_sections": ["Provided Capabilities"], + }, + ] + + text = scope_path.read_text() + issues: list[dict[str, Any]] = [{ + "id": "C5a", + "label": "SCOPE.md present", + "status": "pass", + "detail": "", + "missing_sections": [], + "invalid_capability_blocks": [], + "needs_refresh_sections": [], + }] + + headings = {h.strip() for h in _H2_RE.findall(text)} + missing_sections = [section for section in STANDARD_SCOPE_SECTIONS if section not in headings] + if missing_sections: + issues.append({ + "id": "C5b", + "label": "SCOPE.md standard sections", + "status": "warn", + "detail": f"Missing H2 section(s): {', '.join(missing_sections)}", + "missing_sections": missing_sections, + "invalid_capability_blocks": [], + "needs_refresh_sections": missing_sections, + }) + else: + issues.append({ + "id": "C5b", + "label": "SCOPE.md standard sections", + "status": "pass", + "detail": f"All {len(STANDARD_SCOPE_SECTIONS)} standard sections present", + "missing_sections": [], + "invalid_capability_blocks": [], + "needs_refresh_sections": [], + }) + + capability_blocks = _CAPABILITY_BLOCK_RE.findall(text) + valid_blocks = 0 + invalid_blocks: list[dict[str, Any]] = [] + for index, block in enumerate(capability_blocks, start=1): + try: + parsed = yaml.safe_load(block) or {} + if isinstance(parsed, dict) and parsed.get("type") and parsed.get("title"): + valid_blocks += 1 + else: + invalid_blocks.append({ + "index": index, + "reason": "Capability block must be YAML with type and title", + }) + except yaml.YAMLError as exc: + invalid_blocks.append({"index": index, "reason": str(exc)}) + + if valid_blocks > 0: + issues.append({ + "id": "C5c", + "label": "SCOPE.md capability blocks", + "status": "pass", + "detail": f"{valid_blocks} valid capability block(s)", + "missing_sections": [], + "invalid_capability_blocks": invalid_blocks, + "needs_refresh_sections": [], + }) + else: + detail = "No fenced capability block found" + if invalid_blocks: + detail = "No valid capability block found" + issues.append({ + "id": "C5c", + "label": "SCOPE.md capability blocks", + "status": "warn", + "detail": detail, + "missing_sections": [], + "invalid_capability_blocks": invalid_blocks, + "needs_refresh_sections": ["Provided Capabilities"], + }) + + return issues + + def compute_fingerprint( repo: dict, latest_tpsc_snap_at: str | None, @@ -205,13 +373,9 @@ async def evaluate( # ── Tier 2: Standard ───────────────────────────────────────────────────── - # C5: SCOPE.md - if not repo_path: - _r("C5", "SCOPE.md present", "standard", "skip", "Local path unavailable") - elif (Path(repo_path) / "SCOPE.md").exists(): - _r("C5", "SCOPE.md present", "standard", "pass") - else: - _r("C5", "SCOPE.md present", "standard", "fail", "SCOPE.md not found at repo root") + # C5a/C5b/C5c: SCOPE.md structure and capability declarations + for issue in evaluate_scope_health(repo): + _r(issue["id"], issue["label"], "standard", issue["status"], issue["detail"]) # C6: CLAUDE.md if not repo_path: diff --git a/api/routers/repos.py b/api/routers/repos.py index 7caf442..47bd9fc 100644 --- a/api/routers/repos.py +++ b/api/routers/repos.py @@ -13,7 +13,7 @@ from sqlalchemy.ext.asyncio import AsyncSession from api.config import settings from api.database import get_session -from api.doi_engine import compute_fingerprint, evaluate as _doi_evaluate +from api.doi_engine import compute_fingerprint, evaluate as _doi_evaluate, evaluate_scope_health from api.models.doi_cache import DOICache from api.models.domain import Domain from api.models.interface_change import InterfaceChange @@ -32,6 +32,7 @@ from api.schemas.managed_repo import ( RepoPathRegister, RepoRead, RepoUpdate, + ScopeIssueDetail, ) router = APIRouter(prefix="/repos", tags=["repos"]) @@ -491,12 +492,33 @@ async def get_repo_dispatch( for ic in ic_result.scalars().all() ] + domain_result = await session.execute(select(Domain).where(Domain.id == repo.domain_id)) + domain_obj = domain_result.scalar_one_or_none() + scope_issue_details = [ + ScopeIssueDetail(**issue) + for issue in evaluate_scope_health({ + "slug": repo.slug, + "domain_slug": domain_obj.slug if domain_obj else None, + "local_path": repo.local_path, + "remote_url": repo.remote_url, + "host_paths": repo.host_paths or {}, + "last_sbom_at": str(repo.last_sbom_at) if repo.last_sbom_at else None, + "updated_at": str(repo.updated_at) if repo.updated_at else "", + }) + ] + scope_needs_review = any( + issue.id in {"C5a", "C5b", "C5c"} and issue.status in {"fail", "warn"} + for issue in scope_issue_details + ) + return RepoDispatch( repo_slug=slug, active_goal=active_goal, active_workstreams=dispatch_workstreams, human_interventions=all_interventions, pending_interface_changes=pending_changes, + scope_needs_review=scope_needs_review, + scope_issue_details=scope_issue_details, last_state_synced_at=repo.last_state_synced_at, ) diff --git a/api/schemas/managed_repo.py b/api/schemas/managed_repo.py index d30b6bb..15ace6c 100644 --- a/api/schemas/managed_repo.py +++ b/api/schemas/managed_repo.py @@ -2,7 +2,7 @@ import uuid from datetime import date, datetime from typing import Any -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel, ConfigDict, Field class RepoCreate(BaseModel): @@ -79,10 +79,22 @@ class PendingInterfaceChange(BaseModel): published_at: datetime | None +class ScopeIssueDetail(BaseModel): + id: str + label: str + status: str + detail: str + missing_sections: list[str] = Field(default_factory=list) + invalid_capability_blocks: list[dict[str, Any]] = Field(default_factory=list) + needs_refresh_sections: list[str] = Field(default_factory=list) + + class RepoDispatch(BaseModel): repo_slug: str active_goal: dict[str, Any] | None active_workstreams: list[DispatchWorkstream] human_interventions: list[DispatchTask] pending_interface_changes: list[PendingInterfaceChange] + scope_needs_review: bool + scope_issue_details: list[ScopeIssueDetail] last_state_synced_at: datetime | None diff --git a/tests/test_doi_scope_health.py b/tests/test_doi_scope_health.py new file mode 100644 index 0000000..ddd7094 --- /dev/null +++ b/tests/test_doi_scope_health.py @@ -0,0 +1,136 @@ +from __future__ import annotations + +import pytest + +from api.doi_engine import evaluate, evaluate_scope_health + + +VALID_SCOPE = """# SCOPE + +## One-liner +One sentence. + +## Core Idea +Core idea. + +## In Scope +- One thing + +## Out of Scope +- Another thing + +## Relevant When +- Useful + +## Not Relevant When +- Not useful + +## Current State +Active. + +## How It Fits +Fits here. + +## Terminology +- Term + +## Related / Overlapping +- None + +## Provided Capabilities + +```capability +type: api +title: Example API +``` +""" + + +async def _create_domain(client, slug="scopedom"): + r = await client.post("/domains/", json={"slug": slug, "name": "Scope Domain"}) + assert r.status_code == 201, r.text + return r.json() + + +async def _create_repo(client, domain_slug, local_path, slug="scope-repo"): + r = await client.post("/repos/", json={ + "slug": slug, + "name": "Scope Repo", + "domain_slug": domain_slug, + "local_path": str(local_path), + "remote_url": "https://example.invalid/scope-repo.git", + }) + assert r.status_code == 201, r.text + return r.json() + + +def test_scope_health_reports_section_and_capability_detail(tmp_path): + (tmp_path / "SCOPE.md").write_text("# SCOPE\n", encoding="utf-8") + + issues = evaluate_scope_health({"slug": "stub", "local_path": str(tmp_path)}) + by_id = {issue["id"]: issue for issue in issues} + + assert by_id["C5a"]["status"] == "pass" + assert by_id["C5b"]["status"] == "warn" + assert "One-liner" in by_id["C5b"]["missing_sections"] + assert "One-liner" in by_id["C5b"]["needs_refresh_sections"] + assert by_id["C5c"]["status"] == "warn" + assert by_id["C5c"]["needs_refresh_sections"] == ["Provided Capabilities"] + + +@pytest.mark.asyncio +async def test_doi_reports_c5a_c5b_c5c_separately(tmp_path): + (tmp_path / "SCOPE.md").write_text(VALID_SCOPE, encoding="utf-8") + + report = await evaluate( + { + "slug": "valid", + "domain_slug": "scopedom", + "local_path": str(tmp_path), + "remote_url": "https://example.invalid/valid.git", + }, + skip_consistency=True, + prefetch={ + "domain_status": {"scopedom": "active"}, + "tpsc_snap_counts": {"valid": 0}, + "active_goal_counts": {"valid": 0}, + }, + ) + + c5 = {criterion.id: criterion for criterion in report.criteria if criterion.id.startswith("C5")} + assert set(c5) == {"C5a", "C5b", "C5c"} + assert c5["C5a"].status == "pass" + assert c5["C5b"].status == "pass" + assert c5["C5c"].status == "pass" + + +class TestRepoDispatchScopeHealth: + async def test_dispatch_flags_stub_scope_for_review(self, client, tmp_path): + await _create_domain(client) + (tmp_path / "SCOPE.md").write_text("# SCOPE\n", encoding="utf-8") + await _create_repo(client, "scopedom", tmp_path, slug="stub-scope") + + r = await client.get("/repos/stub-scope/dispatch") + assert r.status_code == 200, r.text + body = r.json() + + assert body["scope_needs_review"] is True + by_id = {issue["id"]: issue for issue in body["scope_issue_details"]} + assert by_id["C5b"]["missing_sections"] + assert by_id["C5c"]["needs_refresh_sections"] == ["Provided Capabilities"] + + async def test_dispatch_reports_valid_scope_without_review(self, client, tmp_path): + await _create_domain(client) + (tmp_path / "SCOPE.md").write_text(VALID_SCOPE, encoding="utf-8") + await _create_repo(client, "scopedom", tmp_path, slug="valid-scope") + + r = await client.get("/repos/valid-scope/dispatch") + assert r.status_code == 200, r.text + body = r.json() + + assert body["scope_needs_review"] is False + assert {issue["id"]: issue["status"] for issue in body["scope_issue_details"]} == { + "C5a": "pass", + "C5b": "pass", + "C5c": "pass", + }