state-hub scope functionality work

2026-05-01 01:33:15 +02:00
parent 45fb6e141d
commit fc725ec65f
4 changed files with 345 additions and 11 deletions
--- a/api/doi_engine.py
+++ b/api/doi_engine.py
@@ -8,22 +8,42 @@ from __future__ import annotations

 import asyncio
 import json
+import re
 import socket
 import urllib.error
 import urllib.request
 from dataclasses import dataclass, field
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import Literal
+from typing import Any, Literal
+
+import yaml

 CriterionStatus = Literal["pass", "fail", "warn", "skip"]
 Tier = Literal["none", "core", "standard", "full"]

 # Criteria that belong to each tier (in check order)
 CORE_IDS     = {"C1", "C2", "C3", "C4"}
-STANDARD_IDS = {"C5", "C6", "C7", "C8", "C9"}
+STANDARD_IDS = {"C5a", "C5b", "C5c", "C6", "C7", "C8", "C9"}
 FULL_IDS     = {"C10", "C11", "C12", "C13", "C14"}

+STANDARD_SCOPE_SECTIONS = [
+    "One-liner",
+    "Core Idea",
+    "In Scope",
+    "Out of Scope",
+    "Relevant When",
+    "Not Relevant When",
+    "Current State",
+    "How It Fits",
+    "Terminology",
+    "Related / Overlapping",
+    "Provided Capabilities",
+]
+
+_CAPABILITY_BLOCK_RE = re.compile(r"```capability\s*\n(.*?)```", re.DOTALL | re.IGNORECASE)
+_H2_RE = re.compile(r"^##\s+(.+?)\s*$", re.MULTILINE)
+

@dataclass
 class CriterionResult:
@@ -45,6 +65,154 @@ class DoIReport:
    checked_at: str = field(default_factory=lambda: datetime.now(tz=timezone.utc).isoformat())


+def evaluate_scope_health(repo: dict) -> list[dict[str, Any]]:
+    """Return machine-readable SCOPE.md health issues for C5a/C5b/C5c.
+
+    The returned records intentionally mirror DoI criterion IDs while carrying
+    section-level hints that downstream repo-scoping can use to refresh only
+    the affected parts of SCOPE.md.
+    """
+    repo_path = _resolve_path(repo)
+    if not repo_path:
+        return [
+            {
+                "id": "C5a",
+                "label": "SCOPE.md present",
+                "status": "skip",
+                "detail": "Local path unavailable",
+                "missing_sections": [],
+                "invalid_capability_blocks": [],
+                "needs_refresh_sections": [],
+            },
+            {
+                "id": "C5b",
+                "label": "SCOPE.md standard sections",
+                "status": "skip",
+                "detail": "Local path unavailable",
+                "missing_sections": [],
+                "invalid_capability_blocks": [],
+                "needs_refresh_sections": [],
+            },
+            {
+                "id": "C5c",
+                "label": "SCOPE.md capability blocks",
+                "status": "skip",
+                "detail": "Local path unavailable",
+                "missing_sections": [],
+                "invalid_capability_blocks": [],
+                "needs_refresh_sections": [],
+            },
+        ]
+
+    scope_path = Path(repo_path) / "SCOPE.md"
+    if not scope_path.exists():
+        return [
+            {
+                "id": "C5a",
+                "label": "SCOPE.md present",
+                "status": "fail",
+                "detail": "SCOPE.md not found at repo root",
+                "missing_sections": STANDARD_SCOPE_SECTIONS.copy(),
+                "invalid_capability_blocks": [],
+                "needs_refresh_sections": STANDARD_SCOPE_SECTIONS.copy(),
+            },
+            {
+                "id": "C5b",
+                "label": "SCOPE.md standard sections",
+                "status": "skip",
+                "detail": "SCOPE.md absent",
+                "missing_sections": STANDARD_SCOPE_SECTIONS.copy(),
+                "invalid_capability_blocks": [],
+                "needs_refresh_sections": STANDARD_SCOPE_SECTIONS.copy(),
+            },
+            {
+                "id": "C5c",
+                "label": "SCOPE.md capability blocks",
+                "status": "skip",
+                "detail": "SCOPE.md absent",
+                "missing_sections": [],
+                "invalid_capability_blocks": [],
+                "needs_refresh_sections": ["Provided Capabilities"],
+            },
+        ]
+
+    text = scope_path.read_text()
+    issues: list[dict[str, Any]] = [{
+        "id": "C5a",
+        "label": "SCOPE.md present",
+        "status": "pass",
+        "detail": "",
+        "missing_sections": [],
+        "invalid_capability_blocks": [],
+        "needs_refresh_sections": [],
+    }]
+
+    headings = {h.strip() for h in _H2_RE.findall(text)}
+    missing_sections = [section for section in STANDARD_SCOPE_SECTIONS if section not in headings]
+    if missing_sections:
+        issues.append({
+            "id": "C5b",
+            "label": "SCOPE.md standard sections",
+            "status": "warn",
+            "detail": f"Missing H2 section(s): {', '.join(missing_sections)}",
+            "missing_sections": missing_sections,
+            "invalid_capability_blocks": [],
+            "needs_refresh_sections": missing_sections,
+        })
+    else:
+        issues.append({
+            "id": "C5b",
+            "label": "SCOPE.md standard sections",
+            "status": "pass",
+            "detail": f"All {len(STANDARD_SCOPE_SECTIONS)} standard sections present",
+            "missing_sections": [],
+            "invalid_capability_blocks": [],
+            "needs_refresh_sections": [],
+        })
+
+    capability_blocks = _CAPABILITY_BLOCK_RE.findall(text)
+    valid_blocks = 0
+    invalid_blocks: list[dict[str, Any]] = []
+    for index, block in enumerate(capability_blocks, start=1):
+        try:
+            parsed = yaml.safe_load(block) or {}
+            if isinstance(parsed, dict) and parsed.get("type") and parsed.get("title"):
+                valid_blocks += 1
+            else:
+                invalid_blocks.append({
+                    "index": index,
+                    "reason": "Capability block must be YAML with type and title",
+                })
+        except yaml.YAMLError as exc:
+            invalid_blocks.append({"index": index, "reason": str(exc)})
+
+    if valid_blocks > 0:
+        issues.append({
+            "id": "C5c",
+            "label": "SCOPE.md capability blocks",
+            "status": "pass",
+            "detail": f"{valid_blocks} valid capability block(s)",
+            "missing_sections": [],
+            "invalid_capability_blocks": invalid_blocks,
+            "needs_refresh_sections": [],
+        })
+    else:
+        detail = "No fenced capability block found"
+        if invalid_blocks:
+            detail = "No valid capability block found"
+        issues.append({
+            "id": "C5c",
+            "label": "SCOPE.md capability blocks",
+            "status": "warn",
+            "detail": detail,
+            "missing_sections": [],
+            "invalid_capability_blocks": invalid_blocks,
+            "needs_refresh_sections": ["Provided Capabilities"],
+        })
+
+    return issues
+
+
 def compute_fingerprint(
    repo: dict,
    latest_tpsc_snap_at: str | None,
@@ -205,13 +373,9 @@ async def evaluate(

    # ── Tier 2: Standard ─────────────────────────────────────────────────────

-    # C5: SCOPE.md
-    if not repo_path:
-        _r("C5", "SCOPE.md present", "standard", "skip", "Local path unavailable")
-    elif (Path(repo_path) / "SCOPE.md").exists():
-        _r("C5", "SCOPE.md present", "standard", "pass")
-    else:
-        _r("C5", "SCOPE.md present", "standard", "fail", "SCOPE.md not found at repo root")
+    # C5a/C5b/C5c: SCOPE.md structure and capability declarations
+    for issue in evaluate_scope_health(repo):
+        _r(issue["id"], issue["label"], "standard", issue["status"], issue["detail"])

    # C6: CLAUDE.md
    if not repo_path:
--- a/api/routers/repos.py
+++ b/api/routers/repos.py
@@ -13,7 +13,7 @@ from sqlalchemy.ext.asyncio import AsyncSession

 from api.config import settings
 from api.database import get_session
-from api.doi_engine import compute_fingerprint, evaluate as _doi_evaluate
+from api.doi_engine import compute_fingerprint, evaluate as _doi_evaluate, evaluate_scope_health
 from api.models.doi_cache import DOICache
 from api.models.domain import Domain
 from api.models.interface_change import InterfaceChange
@@ -32,6 +32,7 @@ from api.schemas.managed_repo import (
    RepoPathRegister,
    RepoRead,
    RepoUpdate,
+    ScopeIssueDetail,
 )

 router = APIRouter(prefix="/repos", tags=["repos"])
@@ -491,12 +492,33 @@ async def get_repo_dispatch(
        for ic in ic_result.scalars().all()
    ]

+    domain_result = await session.execute(select(Domain).where(Domain.id == repo.domain_id))
+    domain_obj = domain_result.scalar_one_or_none()
+    scope_issue_details = [
+        ScopeIssueDetail(**issue)
+        for issue in evaluate_scope_health({
+            "slug": repo.slug,
+            "domain_slug": domain_obj.slug if domain_obj else None,
+            "local_path": repo.local_path,
+            "remote_url": repo.remote_url,
+            "host_paths": repo.host_paths or {},
+            "last_sbom_at": str(repo.last_sbom_at) if repo.last_sbom_at else None,
+            "updated_at": str(repo.updated_at) if repo.updated_at else "",
+        })
+    ]
+    scope_needs_review = any(
+        issue.id in {"C5a", "C5b", "C5c"} and issue.status in {"fail", "warn"}
+        for issue in scope_issue_details
+    )
+
    return RepoDispatch(
        repo_slug=slug,
        active_goal=active_goal,
        active_workstreams=dispatch_workstreams,
        human_interventions=all_interventions,
        pending_interface_changes=pending_changes,
+        scope_needs_review=scope_needs_review,
+        scope_issue_details=scope_issue_details,
        last_state_synced_at=repo.last_state_synced_at,
    )

--- a/api/schemas/managed_repo.py
+++ b/api/schemas/managed_repo.py
@@ -2,7 +2,7 @@ import uuid
 from datetime import date, datetime
 from typing import Any

-from pydantic import BaseModel, ConfigDict
+from pydantic import BaseModel, ConfigDict, Field


 class RepoCreate(BaseModel):
@@ -79,10 +79,22 @@ class PendingInterfaceChange(BaseModel):
    published_at: datetime | None


+class ScopeIssueDetail(BaseModel):
+    id: str
+    label: str
+    status: str
+    detail: str
+    missing_sections: list[str] = Field(default_factory=list)
+    invalid_capability_blocks: list[dict[str, Any]] = Field(default_factory=list)
+    needs_refresh_sections: list[str] = Field(default_factory=list)
+
+
 class RepoDispatch(BaseModel):
    repo_slug: str
    active_goal: dict[str, Any] | None
    active_workstreams: list[DispatchWorkstream]
    human_interventions: list[DispatchTask]
    pending_interface_changes: list[PendingInterfaceChange]
+    scope_needs_review: bool
+    scope_issue_details: list[ScopeIssueDetail]
    last_state_synced_at: datetime | None
--- a/tests/test_doi_scope_health.py
+++ b/tests/test_doi_scope_health.py
@@ -0,0 +1,136 @@
+from __future__ import annotations
+
+import pytest
+
+from api.doi_engine import evaluate, evaluate_scope_health
+
+
+VALID_SCOPE = """# SCOPE
+
+## One-liner
+One sentence.
+
+## Core Idea
+Core idea.
+
+## In Scope
+- One thing
+
+## Out of Scope
+- Another thing
+
+## Relevant When
+- Useful
+
+## Not Relevant When
+- Not useful
+
+## Current State
+Active.
+
+## How It Fits
+Fits here.
+
+## Terminology
+- Term
+
+## Related / Overlapping
+- None
+
+## Provided Capabilities
+
+```capability
+type: api
+title: Example API
+```
+"""
+
+
+async def _create_domain(client, slug="scopedom"):
+    r = await client.post("/domains/", json={"slug": slug, "name": "Scope Domain"})
+    assert r.status_code == 201, r.text
+    return r.json()
+
+
+async def _create_repo(client, domain_slug, local_path, slug="scope-repo"):
+    r = await client.post("/repos/", json={
+        "slug": slug,
+        "name": "Scope Repo",
+        "domain_slug": domain_slug,
+        "local_path": str(local_path),
+        "remote_url": "https://example.invalid/scope-repo.git",
+    })
+    assert r.status_code == 201, r.text
+    return r.json()
+
+
+def test_scope_health_reports_section_and_capability_detail(tmp_path):
+    (tmp_path / "SCOPE.md").write_text("# SCOPE\n", encoding="utf-8")
+
+    issues = evaluate_scope_health({"slug": "stub", "local_path": str(tmp_path)})
+    by_id = {issue["id"]: issue for issue in issues}
+
+    assert by_id["C5a"]["status"] == "pass"
+    assert by_id["C5b"]["status"] == "warn"
+    assert "One-liner" in by_id["C5b"]["missing_sections"]
+    assert "One-liner" in by_id["C5b"]["needs_refresh_sections"]
+    assert by_id["C5c"]["status"] == "warn"
+    assert by_id["C5c"]["needs_refresh_sections"] == ["Provided Capabilities"]
+
+
+@pytest.mark.asyncio
+async def test_doi_reports_c5a_c5b_c5c_separately(tmp_path):
+    (tmp_path / "SCOPE.md").write_text(VALID_SCOPE, encoding="utf-8")
+
+    report = await evaluate(
+        {
+            "slug": "valid",
+            "domain_slug": "scopedom",
+            "local_path": str(tmp_path),
+            "remote_url": "https://example.invalid/valid.git",
+        },
+        skip_consistency=True,
+        prefetch={
+            "domain_status": {"scopedom": "active"},
+            "tpsc_snap_counts": {"valid": 0},
+            "active_goal_counts": {"valid": 0},
+        },
+    )
+
+    c5 = {criterion.id: criterion for criterion in report.criteria if criterion.id.startswith("C5")}
+    assert set(c5) == {"C5a", "C5b", "C5c"}
+    assert c5["C5a"].status == "pass"
+    assert c5["C5b"].status == "pass"
+    assert c5["C5c"].status == "pass"
+
+
+class TestRepoDispatchScopeHealth:
+    async def test_dispatch_flags_stub_scope_for_review(self, client, tmp_path):
+        await _create_domain(client)
+        (tmp_path / "SCOPE.md").write_text("# SCOPE\n", encoding="utf-8")
+        await _create_repo(client, "scopedom", tmp_path, slug="stub-scope")
+
+        r = await client.get("/repos/stub-scope/dispatch")
+        assert r.status_code == 200, r.text
+        body = r.json()
+
+        assert body["scope_needs_review"] is True
+        by_id = {issue["id"]: issue for issue in body["scope_issue_details"]}
+        assert by_id["C5b"]["missing_sections"]
+        assert by_id["C5c"]["needs_refresh_sections"] == ["Provided Capabilities"]
+
+    async def test_dispatch_reports_valid_scope_without_review(self, client, tmp_path):
+        await _create_domain(client)
+        (tmp_path / "SCOPE.md").write_text(VALID_SCOPE, encoding="utf-8")
+        await _create_repo(client, "scopedom", tmp_path, slug="valid-scope")
+
+        r = await client.get("/repos/valid-scope/dispatch")
+        assert r.status_code == 200, r.text
+        body = r.json()
+
+        assert body["scope_needs_review"] is False
+        assert {issue["id"]: issue["status"] for issue in body["scope_issue_details"]} == {
+            "C5a": "pass",
+            "C5b": "pass",
+            "C5c": "pass",
+        }