Transfered deep scope functionality from the custodian

2026-05-01 00:42:10 +02:00
parent b424dea01b
commit 2d9da98257
10 changed files with 1397 additions and 47 deletions
--- a/src/repo_registry/scope/validator.py
+++ b/src/repo_registry/scope/validator.py
@@ -0,0 +1,184 @@
+from __future__ import annotations
+
+import re
+from dataclasses import dataclass
+from pathlib import Path
+
+from repo_registry.scope.generator import SCOPE_SECTIONS, ScopeGenerator
+
+
+@dataclass(frozen=True)
+class ScopeDiffSection:
+    section: str
+    status: str
+    current_text: str | None
+    proposed_text: str | None
+
+
+@dataclass(frozen=True)
+class ScopeDiff:
+    sections: list[ScopeDiffSection]
+
+    @property
+    def needs_update(self) -> bool:
+        return any(section.status != "ok" for section in self.sections)
+
+
+@dataclass(frozen=True)
+class ScopeValidationIssue:
+    check: str
+    severity: str
+    message: str
+
+
+@dataclass(frozen=True)
+class ValidationResult:
+    issues: list[ScopeValidationIssue]
+
+    @property
+    def ok(self) -> bool:
+        return not any(issue.severity == "error" for issue in self.issues)
+
+
+class ScopeValidator:
+    """Validate and diff SCOPE.md files."""
+
+    def __init__(self, generator: ScopeGenerator | None = None) -> None:
+        self.generator = generator
+
+    def diff(self, repo_slug: str, existing_path: Path) -> ScopeDiff:
+        if self.generator is None:
+            raise ValueError("ScopeValidator.diff requires a ScopeGenerator")
+        current = existing_path.read_text(encoding="utf-8") if existing_path.exists() else ""
+        proposed = self.generator.generate(repo_slug)
+        current_sections = self._parse_sections(current)
+        proposed_sections = self._parse_sections(proposed)
+        sections: list[ScopeDiffSection] = []
+        for section in SCOPE_SECTIONS:
+            current_text = current_sections.get(section)
+            proposed_text = proposed_sections.get(section, "")
+            if current_text is None:
+                status = "missing"
+            elif self._normalize(current_text) == self._normalize(proposed_text):
+                status = "ok"
+            else:
+                status = "stale"
+            sections.append(
+                ScopeDiffSection(
+                    section=section,
+                    status=status,
+                    current_text=current_text,
+                    proposed_text=proposed_text,
+                )
+            )
+        return ScopeDiff(sections=sections)
+
+    def validate(self, path: Path) -> ValidationResult:
+        issues: list[ScopeValidationIssue] = []
+        if not path.exists():
+            return ValidationResult(
+                issues=[
+                    ScopeValidationIssue(
+                        check="C5a",
+                        severity="error",
+                        message="SCOPE.md is missing.",
+                    )
+                ]
+            )
+        content = path.read_text(encoding="utf-8")
+        sections = self._parse_sections(content)
+        missing = [section for section in SCOPE_SECTIONS if section not in sections]
+        if missing:
+            severity = "warn" if missing == ["Provided Capabilities"] else "error"
+            issues.append(
+                ScopeValidationIssue(
+                    check="C5b",
+                    severity=severity,
+                    message=f"Missing SCOPE.md section(s): {', '.join(missing)}.",
+                )
+            )
+        ordered = self._heading_order(content)
+        expected_order = [section for section in SCOPE_SECTIONS if section in sections]
+        if ordered[: len(expected_order)] != expected_order:
+            issues.append(
+                ScopeValidationIssue(
+                    check="C5b",
+                    severity="warn",
+                    message="SCOPE.md sections are not in canonical order.",
+                )
+            )
+        capabilities = sections.get("Provided Capabilities")
+        if capabilities is None:
+            issues.append(
+                ScopeValidationIssue(
+                    check="C5c",
+                    severity="warn",
+                    message="Provided Capabilities section is missing.",
+                )
+            )
+        elif "```capability" in capabilities:
+            for index, block in enumerate(self._capability_blocks(capabilities), start=1):
+                keys = self._capability_keys(block)
+                missing_keys = {"type", "title"} - keys
+                if missing_keys:
+                    issues.append(
+                        ScopeValidationIssue(
+                            check="C5c",
+                            severity="warn",
+                            message=(
+                                f"Capability block {index} is missing required field(s): "
+                                f"{', '.join(sorted(missing_keys))}."
+                            ),
+                        )
+                    )
+        elif "No approved capabilities yet" not in capabilities:
+            issues.append(
+                ScopeValidationIssue(
+                    check="C5c",
+                    severity="warn",
+                    message=(
+                        "Provided Capabilities has no capability blocks or explicit "
+                        "empty-state note."
+                    ),
+                )
+            )
+        return ValidationResult(issues=issues)
+
+    def _parse_sections(self, content: str) -> dict[str, str]:
+        matches = list(re.finditer(r"^##\s+(.+?)\s*$", content, re.MULTILINE))
+        sections: dict[str, str] = {}
+        for index, match in enumerate(matches):
+            title = match.group(1).strip()
+            start = match.end()
+            end = matches[index + 1].start() if index + 1 < len(matches) else len(content)
+            body = content[start:end]
+            body = re.sub(r"\n---\s*$", "", body.strip())
+            sections[title] = body.strip()
+        return sections
+
+    def _heading_order(self, content: str) -> list[str]:
+        return [
+            match.group(1).strip()
+            for match in re.finditer(r"^##\s+(.+?)\s*$", content, re.MULTILINE)
+            if match.group(1).strip() in SCOPE_SECTIONS
+        ]
+
+    def _normalize(self, value: str | None) -> str:
+        if value is None:
+            return ""
+        without_comments = re.sub(r"<!--.*?-->", "", value, flags=re.DOTALL)
+        without_markdown = re.sub(r"[`*_>#-]+", " ", without_comments)
+        return re.sub(r"\s+", " ", without_markdown).strip().lower()
+
+    def _capability_blocks(self, content: str) -> list[str]:
+        return re.findall(
+            r"```capability\s*(.*?)```",
+            content,
+            flags=re.DOTALL | re.IGNORECASE,
+        )
+
+    def _capability_keys(self, block: str) -> set[str]:
+        return {
+            match.group(1)
+            for match in re.finditer(r"^([A-Za-z_][A-Za-z0-9_-]*):", block, re.MULTILINE)
+        }