from __future__ import annotations import re from dataclasses import dataclass from pathlib import Path from repo_registry.scope.generator import SCOPE_SECTIONS, ScopeGenerator @dataclass(frozen=True) class ScopeDiffSection: section: str status: str current_text: str | None proposed_text: str | None @dataclass(frozen=True) class ScopeDiff: sections: list[ScopeDiffSection] @property def needs_update(self) -> bool: return any(section.status != "ok" for section in self.sections) @dataclass(frozen=True) class ScopeValidationIssue: check: str severity: str message: str @dataclass(frozen=True) class ValidationResult: issues: list[ScopeValidationIssue] @property def ok(self) -> bool: return not any(issue.severity == "error" for issue in self.issues) class ScopeValidator: """Validate and diff SCOPE.md files.""" def __init__(self, generator: ScopeGenerator | None = None) -> None: self.generator = generator def diff(self, repo_slug: str, existing_path: Path) -> ScopeDiff: if self.generator is None: raise ValueError("ScopeValidator.diff requires a ScopeGenerator") current = existing_path.read_text(encoding="utf-8") if existing_path.exists() else "" proposed = self.generator.generate(repo_slug) current_sections = self._parse_sections(current) proposed_sections = self._parse_sections(proposed) sections: list[ScopeDiffSection] = [] for section in SCOPE_SECTIONS: current_text = current_sections.get(section) proposed_text = proposed_sections.get(section, "") if current_text is None: status = "missing" elif self._normalize(current_text) == self._normalize(proposed_text): status = "ok" else: status = "stale" sections.append( ScopeDiffSection( section=section, status=status, current_text=current_text, proposed_text=proposed_text, ) ) return ScopeDiff(sections=sections) def validate(self, path: Path) -> ValidationResult: issues: list[ScopeValidationIssue] = [] if not path.exists(): return ValidationResult( issues=[ ScopeValidationIssue( check="C5a", severity="error", message="SCOPE.md is missing.", ) ] ) content = path.read_text(encoding="utf-8") sections = self._parse_sections(content) missing = [section for section in SCOPE_SECTIONS if section not in sections] if missing: severity = "warn" if missing == ["Provided Capabilities"] else "error" issues.append( ScopeValidationIssue( check="C5b", severity=severity, message=f"Missing SCOPE.md section(s): {', '.join(missing)}.", ) ) ordered = self._heading_order(content) expected_order = [section for section in SCOPE_SECTIONS if section in sections] if ordered[: len(expected_order)] != expected_order: issues.append( ScopeValidationIssue( check="C5b", severity="warn", message="SCOPE.md sections are not in canonical order.", ) ) capabilities = sections.get("Provided Capabilities") if capabilities is None: issues.append( ScopeValidationIssue( check="C5c", severity="warn", message="Provided Capabilities section is missing.", ) ) elif "```capability" in capabilities: for index, block in enumerate(self._capability_blocks(capabilities), start=1): keys = self._capability_keys(block) missing_keys = {"type", "title"} - keys if missing_keys: issues.append( ScopeValidationIssue( check="C5c", severity="warn", message=( f"Capability block {index} is missing required field(s): " f"{', '.join(sorted(missing_keys))}." ), ) ) elif "No approved capabilities yet" not in capabilities: issues.append( ScopeValidationIssue( check="C5c", severity="warn", message=( "Provided Capabilities has no capability blocks or explicit " "empty-state note." ), ) ) return ValidationResult(issues=issues) def _parse_sections(self, content: str) -> dict[str, str]: matches = list(re.finditer(r"^##\s+(.+?)\s*$", content, re.MULTILINE)) sections: dict[str, str] = {} for index, match in enumerate(matches): title = match.group(1).strip() start = match.end() end = matches[index + 1].start() if index + 1 < len(matches) else len(content) body = content[start:end] body = re.sub(r"\n---\s*$", "", body.strip()) sections[title] = body.strip() return sections def _heading_order(self, content: str) -> list[str]: return [ match.group(1).strip() for match in re.finditer(r"^##\s+(.+?)\s*$", content, re.MULTILINE) if match.group(1).strip() in SCOPE_SECTIONS ] def _normalize(self, value: str | None) -> str: if value is None: return "" without_comments = re.sub(r"", "", value, flags=re.DOTALL) without_markdown = re.sub(r"[`*_>#-]+", " ", without_comments) return re.sub(r"\s+", " ", without_markdown).strip().lower() def _capability_blocks(self, content: str) -> list[str]: return re.findall( r"```capability\s*(.*?)```", content, flags=re.DOTALL | re.IGNORECASE, ) def _capability_keys(self, block: str) -> set[str]: return { match.group(1) for match in re.finditer(r"^([A-Za-z_][A-Za-z0-9_-]*):", block, re.MULTILINE) }