Files
repo-scoping/src/repo_registry/scope/validator.py

185 lines
6.5 KiB
Python

from __future__ import annotations
import re
from dataclasses import dataclass
from pathlib import Path
from repo_registry.scope.generator import SCOPE_SECTIONS, ScopeGenerator
@dataclass(frozen=True)
class ScopeDiffSection:
section: str
status: str
current_text: str | None
proposed_text: str | None
@dataclass(frozen=True)
class ScopeDiff:
sections: list[ScopeDiffSection]
@property
def needs_update(self) -> bool:
return any(section.status != "ok" for section in self.sections)
@dataclass(frozen=True)
class ScopeValidationIssue:
check: str
severity: str
message: str
@dataclass(frozen=True)
class ValidationResult:
issues: list[ScopeValidationIssue]
@property
def ok(self) -> bool:
return not any(issue.severity == "error" for issue in self.issues)
class ScopeValidator:
"""Validate and diff SCOPE.md files."""
def __init__(self, generator: ScopeGenerator | None = None) -> None:
self.generator = generator
def diff(self, repo_slug: str, existing_path: Path) -> ScopeDiff:
if self.generator is None:
raise ValueError("ScopeValidator.diff requires a ScopeGenerator")
current = existing_path.read_text(encoding="utf-8") if existing_path.exists() else ""
proposed = self.generator.generate(repo_slug)
current_sections = self._parse_sections(current)
proposed_sections = self._parse_sections(proposed)
sections: list[ScopeDiffSection] = []
for section in SCOPE_SECTIONS:
current_text = current_sections.get(section)
proposed_text = proposed_sections.get(section, "")
if current_text is None:
status = "missing"
elif self._normalize(current_text) == self._normalize(proposed_text):
status = "ok"
else:
status = "stale"
sections.append(
ScopeDiffSection(
section=section,
status=status,
current_text=current_text,
proposed_text=proposed_text,
)
)
return ScopeDiff(sections=sections)
def validate(self, path: Path) -> ValidationResult:
issues: list[ScopeValidationIssue] = []
if not path.exists():
return ValidationResult(
issues=[
ScopeValidationIssue(
check="C5a",
severity="error",
message="SCOPE.md is missing.",
)
]
)
content = path.read_text(encoding="utf-8")
sections = self._parse_sections(content)
missing = [section for section in SCOPE_SECTIONS if section not in sections]
if missing:
severity = "warn" if missing == ["Provided Capabilities"] else "error"
issues.append(
ScopeValidationIssue(
check="C5b",
severity=severity,
message=f"Missing SCOPE.md section(s): {', '.join(missing)}.",
)
)
ordered = self._heading_order(content)
expected_order = [section for section in SCOPE_SECTIONS if section in sections]
if ordered[: len(expected_order)] != expected_order:
issues.append(
ScopeValidationIssue(
check="C5b",
severity="warn",
message="SCOPE.md sections are not in canonical order.",
)
)
capabilities = sections.get("Provided Capabilities")
if capabilities is None:
issues.append(
ScopeValidationIssue(
check="C5c",
severity="warn",
message="Provided Capabilities section is missing.",
)
)
elif "```capability" in capabilities:
for index, block in enumerate(self._capability_blocks(capabilities), start=1):
keys = self._capability_keys(block)
missing_keys = {"type", "title"} - keys
if missing_keys:
issues.append(
ScopeValidationIssue(
check="C5c",
severity="warn",
message=(
f"Capability block {index} is missing required field(s): "
f"{', '.join(sorted(missing_keys))}."
),
)
)
elif "No approved capabilities yet" not in capabilities:
issues.append(
ScopeValidationIssue(
check="C5c",
severity="warn",
message=(
"Provided Capabilities has no capability blocks or explicit "
"empty-state note."
),
)
)
return ValidationResult(issues=issues)
def _parse_sections(self, content: str) -> dict[str, str]:
matches = list(re.finditer(r"^##\s+(.+?)\s*$", content, re.MULTILINE))
sections: dict[str, str] = {}
for index, match in enumerate(matches):
title = match.group(1).strip()
start = match.end()
end = matches[index + 1].start() if index + 1 < len(matches) else len(content)
body = content[start:end]
body = re.sub(r"\n---\s*$", "", body.strip())
sections[title] = body.strip()
return sections
def _heading_order(self, content: str) -> list[str]:
return [
match.group(1).strip()
for match in re.finditer(r"^##\s+(.+?)\s*$", content, re.MULTILINE)
if match.group(1).strip() in SCOPE_SECTIONS
]
def _normalize(self, value: str | None) -> str:
if value is None:
return ""
without_comments = re.sub(r"<!--.*?-->", "", value, flags=re.DOTALL)
without_markdown = re.sub(r"[`*_>#-]+", " ", without_comments)
return re.sub(r"\s+", " ", without_markdown).strip().lower()
def _capability_blocks(self, content: str) -> list[str]:
return re.findall(
r"```capability\s*(.*?)```",
content,
flags=re.DOTALL | re.IGNORECASE,
)
def _capability_keys(self, block: str) -> set[str]:
return {
match.group(1)
for match in re.finditer(r"^([A-Za-z_][A-Za-z0-9_-]*):", block, re.MULTILINE)
}