generated from coulomb/repo-seed
Transfered deep scope functionality from the custodian
This commit is contained in:
184
src/repo_registry/scope/validator.py
Normal file
184
src/repo_registry/scope/validator.py
Normal file
@@ -0,0 +1,184 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
from repo_registry.scope.generator import SCOPE_SECTIONS, ScopeGenerator
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ScopeDiffSection:
|
||||
section: str
|
||||
status: str
|
||||
current_text: str | None
|
||||
proposed_text: str | None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ScopeDiff:
|
||||
sections: list[ScopeDiffSection]
|
||||
|
||||
@property
|
||||
def needs_update(self) -> bool:
|
||||
return any(section.status != "ok" for section in self.sections)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ScopeValidationIssue:
|
||||
check: str
|
||||
severity: str
|
||||
message: str
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ValidationResult:
|
||||
issues: list[ScopeValidationIssue]
|
||||
|
||||
@property
|
||||
def ok(self) -> bool:
|
||||
return not any(issue.severity == "error" for issue in self.issues)
|
||||
|
||||
|
||||
class ScopeValidator:
|
||||
"""Validate and diff SCOPE.md files."""
|
||||
|
||||
def __init__(self, generator: ScopeGenerator | None = None) -> None:
|
||||
self.generator = generator
|
||||
|
||||
def diff(self, repo_slug: str, existing_path: Path) -> ScopeDiff:
|
||||
if self.generator is None:
|
||||
raise ValueError("ScopeValidator.diff requires a ScopeGenerator")
|
||||
current = existing_path.read_text(encoding="utf-8") if existing_path.exists() else ""
|
||||
proposed = self.generator.generate(repo_slug)
|
||||
current_sections = self._parse_sections(current)
|
||||
proposed_sections = self._parse_sections(proposed)
|
||||
sections: list[ScopeDiffSection] = []
|
||||
for section in SCOPE_SECTIONS:
|
||||
current_text = current_sections.get(section)
|
||||
proposed_text = proposed_sections.get(section, "")
|
||||
if current_text is None:
|
||||
status = "missing"
|
||||
elif self._normalize(current_text) == self._normalize(proposed_text):
|
||||
status = "ok"
|
||||
else:
|
||||
status = "stale"
|
||||
sections.append(
|
||||
ScopeDiffSection(
|
||||
section=section,
|
||||
status=status,
|
||||
current_text=current_text,
|
||||
proposed_text=proposed_text,
|
||||
)
|
||||
)
|
||||
return ScopeDiff(sections=sections)
|
||||
|
||||
def validate(self, path: Path) -> ValidationResult:
|
||||
issues: list[ScopeValidationIssue] = []
|
||||
if not path.exists():
|
||||
return ValidationResult(
|
||||
issues=[
|
||||
ScopeValidationIssue(
|
||||
check="C5a",
|
||||
severity="error",
|
||||
message="SCOPE.md is missing.",
|
||||
)
|
||||
]
|
||||
)
|
||||
content = path.read_text(encoding="utf-8")
|
||||
sections = self._parse_sections(content)
|
||||
missing = [section for section in SCOPE_SECTIONS if section not in sections]
|
||||
if missing:
|
||||
severity = "warn" if missing == ["Provided Capabilities"] else "error"
|
||||
issues.append(
|
||||
ScopeValidationIssue(
|
||||
check="C5b",
|
||||
severity=severity,
|
||||
message=f"Missing SCOPE.md section(s): {', '.join(missing)}.",
|
||||
)
|
||||
)
|
||||
ordered = self._heading_order(content)
|
||||
expected_order = [section for section in SCOPE_SECTIONS if section in sections]
|
||||
if ordered[: len(expected_order)] != expected_order:
|
||||
issues.append(
|
||||
ScopeValidationIssue(
|
||||
check="C5b",
|
||||
severity="warn",
|
||||
message="SCOPE.md sections are not in canonical order.",
|
||||
)
|
||||
)
|
||||
capabilities = sections.get("Provided Capabilities")
|
||||
if capabilities is None:
|
||||
issues.append(
|
||||
ScopeValidationIssue(
|
||||
check="C5c",
|
||||
severity="warn",
|
||||
message="Provided Capabilities section is missing.",
|
||||
)
|
||||
)
|
||||
elif "```capability" in capabilities:
|
||||
for index, block in enumerate(self._capability_blocks(capabilities), start=1):
|
||||
keys = self._capability_keys(block)
|
||||
missing_keys = {"type", "title"} - keys
|
||||
if missing_keys:
|
||||
issues.append(
|
||||
ScopeValidationIssue(
|
||||
check="C5c",
|
||||
severity="warn",
|
||||
message=(
|
||||
f"Capability block {index} is missing required field(s): "
|
||||
f"{', '.join(sorted(missing_keys))}."
|
||||
),
|
||||
)
|
||||
)
|
||||
elif "No approved capabilities yet" not in capabilities:
|
||||
issues.append(
|
||||
ScopeValidationIssue(
|
||||
check="C5c",
|
||||
severity="warn",
|
||||
message=(
|
||||
"Provided Capabilities has no capability blocks or explicit "
|
||||
"empty-state note."
|
||||
),
|
||||
)
|
||||
)
|
||||
return ValidationResult(issues=issues)
|
||||
|
||||
def _parse_sections(self, content: str) -> dict[str, str]:
|
||||
matches = list(re.finditer(r"^##\s+(.+?)\s*$", content, re.MULTILINE))
|
||||
sections: dict[str, str] = {}
|
||||
for index, match in enumerate(matches):
|
||||
title = match.group(1).strip()
|
||||
start = match.end()
|
||||
end = matches[index + 1].start() if index + 1 < len(matches) else len(content)
|
||||
body = content[start:end]
|
||||
body = re.sub(r"\n---\s*$", "", body.strip())
|
||||
sections[title] = body.strip()
|
||||
return sections
|
||||
|
||||
def _heading_order(self, content: str) -> list[str]:
|
||||
return [
|
||||
match.group(1).strip()
|
||||
for match in re.finditer(r"^##\s+(.+?)\s*$", content, re.MULTILINE)
|
||||
if match.group(1).strip() in SCOPE_SECTIONS
|
||||
]
|
||||
|
||||
def _normalize(self, value: str | None) -> str:
|
||||
if value is None:
|
||||
return ""
|
||||
without_comments = re.sub(r"<!--.*?-->", "", value, flags=re.DOTALL)
|
||||
without_markdown = re.sub(r"[`*_>#-]+", " ", without_comments)
|
||||
return re.sub(r"\s+", " ", without_markdown).strip().lower()
|
||||
|
||||
def _capability_blocks(self, content: str) -> list[str]:
|
||||
return re.findall(
|
||||
r"```capability\s*(.*?)```",
|
||||
content,
|
||||
flags=re.DOTALL | re.IGNORECASE,
|
||||
)
|
||||
|
||||
def _capability_keys(self, block: str) -> set[str]:
|
||||
return {
|
||||
match.group(1)
|
||||
for match in re.finditer(r"^([A-Za-z_][A-Za-z0-9_-]*):", block, re.MULTILINE)
|
||||
}
|
||||
Reference in New Issue
Block a user