generated from coulomb/repo-seed
185 lines
6.5 KiB
Python
185 lines
6.5 KiB
Python
from __future__ import annotations
|
|
|
|
import re
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
|
|
from repo_registry.scope.generator import SCOPE_SECTIONS, ScopeGenerator
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class ScopeDiffSection:
|
|
section: str
|
|
status: str
|
|
current_text: str | None
|
|
proposed_text: str | None
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class ScopeDiff:
|
|
sections: list[ScopeDiffSection]
|
|
|
|
@property
|
|
def needs_update(self) -> bool:
|
|
return any(section.status != "ok" for section in self.sections)
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class ScopeValidationIssue:
|
|
check: str
|
|
severity: str
|
|
message: str
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class ValidationResult:
|
|
issues: list[ScopeValidationIssue]
|
|
|
|
@property
|
|
def ok(self) -> bool:
|
|
return not any(issue.severity == "error" for issue in self.issues)
|
|
|
|
|
|
class ScopeValidator:
|
|
"""Validate and diff SCOPE.md files."""
|
|
|
|
def __init__(self, generator: ScopeGenerator | None = None) -> None:
|
|
self.generator = generator
|
|
|
|
def diff(self, repo_slug: str, existing_path: Path) -> ScopeDiff:
|
|
if self.generator is None:
|
|
raise ValueError("ScopeValidator.diff requires a ScopeGenerator")
|
|
current = existing_path.read_text(encoding="utf-8") if existing_path.exists() else ""
|
|
proposed = self.generator.generate(repo_slug)
|
|
current_sections = self._parse_sections(current)
|
|
proposed_sections = self._parse_sections(proposed)
|
|
sections: list[ScopeDiffSection] = []
|
|
for section in SCOPE_SECTIONS:
|
|
current_text = current_sections.get(section)
|
|
proposed_text = proposed_sections.get(section, "")
|
|
if current_text is None:
|
|
status = "missing"
|
|
elif self._normalize(current_text) == self._normalize(proposed_text):
|
|
status = "ok"
|
|
else:
|
|
status = "stale"
|
|
sections.append(
|
|
ScopeDiffSection(
|
|
section=section,
|
|
status=status,
|
|
current_text=current_text,
|
|
proposed_text=proposed_text,
|
|
)
|
|
)
|
|
return ScopeDiff(sections=sections)
|
|
|
|
def validate(self, path: Path) -> ValidationResult:
|
|
issues: list[ScopeValidationIssue] = []
|
|
if not path.exists():
|
|
return ValidationResult(
|
|
issues=[
|
|
ScopeValidationIssue(
|
|
check="C5a",
|
|
severity="error",
|
|
message="SCOPE.md is missing.",
|
|
)
|
|
]
|
|
)
|
|
content = path.read_text(encoding="utf-8")
|
|
sections = self._parse_sections(content)
|
|
missing = [section for section in SCOPE_SECTIONS if section not in sections]
|
|
if missing:
|
|
severity = "warn" if missing == ["Provided Capabilities"] else "error"
|
|
issues.append(
|
|
ScopeValidationIssue(
|
|
check="C5b",
|
|
severity=severity,
|
|
message=f"Missing SCOPE.md section(s): {', '.join(missing)}.",
|
|
)
|
|
)
|
|
ordered = self._heading_order(content)
|
|
expected_order = [section for section in SCOPE_SECTIONS if section in sections]
|
|
if ordered[: len(expected_order)] != expected_order:
|
|
issues.append(
|
|
ScopeValidationIssue(
|
|
check="C5b",
|
|
severity="warn",
|
|
message="SCOPE.md sections are not in canonical order.",
|
|
)
|
|
)
|
|
capabilities = sections.get("Provided Capabilities")
|
|
if capabilities is None:
|
|
issues.append(
|
|
ScopeValidationIssue(
|
|
check="C5c",
|
|
severity="warn",
|
|
message="Provided Capabilities section is missing.",
|
|
)
|
|
)
|
|
elif "```capability" in capabilities:
|
|
for index, block in enumerate(self._capability_blocks(capabilities), start=1):
|
|
keys = self._capability_keys(block)
|
|
missing_keys = {"type", "title"} - keys
|
|
if missing_keys:
|
|
issues.append(
|
|
ScopeValidationIssue(
|
|
check="C5c",
|
|
severity="warn",
|
|
message=(
|
|
f"Capability block {index} is missing required field(s): "
|
|
f"{', '.join(sorted(missing_keys))}."
|
|
),
|
|
)
|
|
)
|
|
elif "No approved capabilities yet" not in capabilities:
|
|
issues.append(
|
|
ScopeValidationIssue(
|
|
check="C5c",
|
|
severity="warn",
|
|
message=(
|
|
"Provided Capabilities has no capability blocks or explicit "
|
|
"empty-state note."
|
|
),
|
|
)
|
|
)
|
|
return ValidationResult(issues=issues)
|
|
|
|
def _parse_sections(self, content: str) -> dict[str, str]:
|
|
matches = list(re.finditer(r"^##\s+(.+?)\s*$", content, re.MULTILINE))
|
|
sections: dict[str, str] = {}
|
|
for index, match in enumerate(matches):
|
|
title = match.group(1).strip()
|
|
start = match.end()
|
|
end = matches[index + 1].start() if index + 1 < len(matches) else len(content)
|
|
body = content[start:end]
|
|
body = re.sub(r"\n---\s*$", "", body.strip())
|
|
sections[title] = body.strip()
|
|
return sections
|
|
|
|
def _heading_order(self, content: str) -> list[str]:
|
|
return [
|
|
match.group(1).strip()
|
|
for match in re.finditer(r"^##\s+(.+?)\s*$", content, re.MULTILINE)
|
|
if match.group(1).strip() in SCOPE_SECTIONS
|
|
]
|
|
|
|
def _normalize(self, value: str | None) -> str:
|
|
if value is None:
|
|
return ""
|
|
without_comments = re.sub(r"<!--.*?-->", "", value, flags=re.DOTALL)
|
|
without_markdown = re.sub(r"[`*_>#-]+", " ", without_comments)
|
|
return re.sub(r"\s+", " ", without_markdown).strip().lower()
|
|
|
|
def _capability_blocks(self, content: str) -> list[str]:
|
|
return re.findall(
|
|
r"```capability\s*(.*?)```",
|
|
content,
|
|
flags=re.DOTALL | re.IGNORECASE,
|
|
)
|
|
|
|
def _capability_keys(self, block: str) -> set[str]:
|
|
return {
|
|
match.group(1)
|
|
for match in re.finditer(r"^([A-Za-z_][A-Za-z0-9_-]*):", block, re.MULTILINE)
|
|
}
|