generated from coulomb/repo-seed
chore(consistency): sync task status from DB [auto]
Updated by fix-consistency on 2026-05-15: - update .custodian-brief.md for repo-scoping
This commit is contained in:
4
src/repo_scoping/scope/__init__.py
Normal file
4
src/repo_scoping/scope/__init__.py
Normal file
@@ -0,0 +1,4 @@
|
||||
from repo_registry.scope.generator import ScopeGenerator
|
||||
from repo_registry.scope.validator import ScopeValidator
|
||||
|
||||
__all__ = ["ScopeGenerator", "ScopeValidator"]
|
||||
323
src/repo_scoping/scope/generator.py
Normal file
323
src/repo_scoping/scope/generator.py
Normal file
@@ -0,0 +1,323 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import asdict
|
||||
|
||||
from repo_registry.core.service import RegistryService
|
||||
from repo_registry.storage.sqlite import NotFoundError
|
||||
|
||||
|
||||
SCOPE_SECTIONS = [
|
||||
"One-liner",
|
||||
"Core Idea",
|
||||
"In Scope",
|
||||
"Out of Scope",
|
||||
"Relevant When",
|
||||
"Not Relevant When",
|
||||
"Current State",
|
||||
"How It Fits",
|
||||
"Terminology",
|
||||
"Related / Overlapping",
|
||||
"Getting Oriented",
|
||||
"Provided Capabilities",
|
||||
"Notes",
|
||||
]
|
||||
|
||||
|
||||
NEEDS_INPUT = "<!-- needs curator input -->"
|
||||
|
||||
|
||||
class ScopeGenerator:
|
||||
"""Render SCOPE.md from approved repository characteristics."""
|
||||
|
||||
def __init__(self, service: RegistryService) -> None:
|
||||
self.service = service
|
||||
|
||||
def generate(self, repo_slug: str) -> str:
|
||||
repository = self._repository_by_slug(repo_slug)
|
||||
ability_map = asdict(self.service.ability_map(repository.id))
|
||||
facts = [asdict(fact) for fact in self.service.list_observed_facts(repository.id)]
|
||||
sections = {
|
||||
"One-liner": self._one_liner(ability_map),
|
||||
"Core Idea": self._core_idea(ability_map),
|
||||
"In Scope": self._in_scope(ability_map),
|
||||
"Out of Scope": self._curator_stub(),
|
||||
"Relevant When": self._relevant_when(ability_map),
|
||||
"Not Relevant When": self._curator_stub(),
|
||||
"Current State": self._current_state(repository.status, facts),
|
||||
"How It Fits": self._how_it_fits(ability_map),
|
||||
"Terminology": self._terminology(ability_map, facts),
|
||||
"Related / Overlapping": self._curator_stub(),
|
||||
"Getting Oriented": self._getting_oriented(ability_map, facts),
|
||||
"Provided Capabilities": self._provided_capabilities(ability_map),
|
||||
"Notes": self._curator_stub(),
|
||||
}
|
||||
lines = [
|
||||
"# SCOPE",
|
||||
"",
|
||||
"> This file helps you quickly understand what this repository is about,",
|
||||
"> when it is relevant, and when it is not.",
|
||||
"> It was generated from approved repo-scoping characteristics.",
|
||||
"",
|
||||
"---",
|
||||
"",
|
||||
]
|
||||
for section in SCOPE_SECTIONS:
|
||||
lines.extend([f"## {section}", "", sections[section].rstrip(), "", "---", ""])
|
||||
return "\n".join(lines).rstrip() + "\n"
|
||||
|
||||
def _repository_by_slug(self, repo_slug: str):
|
||||
wanted = self._slug(repo_slug)
|
||||
for repository in self.service.list_repositories():
|
||||
candidates = {
|
||||
self._slug(repository.name),
|
||||
self._slug(repository.url.rstrip("/").rsplit("/", 1)[-1].removesuffix(".git")),
|
||||
}
|
||||
if wanted in candidates:
|
||||
return repository
|
||||
raise NotFoundError(f"repository slug {repo_slug!r} was not found")
|
||||
|
||||
def _one_liner(self, ability_map: dict) -> str:
|
||||
scope = ability_map["scope"]
|
||||
description = self._sentence(scope.get("description", ""))
|
||||
if description:
|
||||
return description
|
||||
return f"{scope['name']} defines the repository scope for {ability_map['repository']['name']}."
|
||||
|
||||
def _core_idea(self, ability_map: dict) -> str:
|
||||
scope = ability_map["scope"]
|
||||
abilities = ability_map.get("abilities", [])
|
||||
lines = [scope.get("description") or self._one_liner(ability_map)]
|
||||
if abilities:
|
||||
lines.append("")
|
||||
lines.append("Approved abilities:")
|
||||
lines.extend(
|
||||
f"- {ability['name']} — {ability.get('description') or 'Approved repository ability.'}"
|
||||
for ability in abilities[:5]
|
||||
)
|
||||
else:
|
||||
lines.extend(["", NEEDS_INPUT])
|
||||
return "\n".join(lines)
|
||||
|
||||
def _in_scope(self, ability_map: dict) -> str:
|
||||
abilities = ability_map.get("abilities", [])
|
||||
if not abilities:
|
||||
return self._curator_stub()
|
||||
lines = []
|
||||
for ability in abilities:
|
||||
capabilities = ", ".join(
|
||||
capability["name"] for capability in ability.get("capabilities", [])[:4]
|
||||
)
|
||||
suffix = f" Includes {capabilities}." if capabilities else ""
|
||||
lines.append(
|
||||
f"- {ability['name']} — {ability.get('description') or 'Approved ability.'}{suffix}"
|
||||
)
|
||||
return "\n".join(lines)
|
||||
|
||||
def _relevant_when(self, ability_map: dict) -> str:
|
||||
features = [
|
||||
feature
|
||||
for feature in self._features(ability_map)
|
||||
if self._is_usecase_feature(feature)
|
||||
]
|
||||
if not features:
|
||||
features = self._features(ability_map)[:5]
|
||||
if not features:
|
||||
return self._curator_stub()
|
||||
lines = [
|
||||
f"- You need {feature['name']} ({feature.get('primary_class') or feature.get('type', 'feature')})."
|
||||
for feature in features
|
||||
]
|
||||
if not any(self._is_usecase_feature(feature) for feature in features):
|
||||
lines.append(NEEDS_INPUT)
|
||||
return "\n".join(lines)
|
||||
|
||||
def _current_state(self, status: str, facts: list[dict]) -> str:
|
||||
kinds = self._facts_by_kind(facts)
|
||||
languages = self._fact_names(kinds.get("language", []))
|
||||
frameworks = self._fact_names(kinds.get("framework", []))
|
||||
tests = kinds.get("test", [])
|
||||
interfaces = kinds.get("interface", [])
|
||||
manifests = kinds.get("manifest", [])
|
||||
implementation = "substantial" if interfaces or manifests else "partial"
|
||||
if not facts:
|
||||
implementation = "unknown"
|
||||
lines = [
|
||||
f"- Status: {status}",
|
||||
f"- Implementation: {implementation}",
|
||||
"- Stability: evolving",
|
||||
"- Usage: internal",
|
||||
f"- Languages: {', '.join(languages) if languages else 'unknown'}",
|
||||
f"- Frameworks: {', '.join(frameworks) if frameworks else 'none detected'}",
|
||||
f"- Tests observed: {len(tests)}",
|
||||
f"- Interfaces observed: {len(interfaces)}",
|
||||
f"- Manifests observed: {len(manifests)}",
|
||||
]
|
||||
if not facts:
|
||||
lines.append(NEEDS_INPUT)
|
||||
return "\n".join(lines)
|
||||
|
||||
def _how_it_fits(self, ability_map: dict) -> str:
|
||||
evidence = [
|
||||
item
|
||||
for capability in self._capabilities(ability_map)
|
||||
for item in capability.get("evidence", [])
|
||||
]
|
||||
if not evidence:
|
||||
return "\n".join(
|
||||
[
|
||||
"- Upstream dependencies: " + NEEDS_INPUT,
|
||||
"- Downstream consumers: " + NEEDS_INPUT,
|
||||
"- Often used with: " + NEEDS_INPUT,
|
||||
]
|
||||
)
|
||||
refs = ", ".join(
|
||||
sorted({item.get("reference", "") for item in evidence if item.get("reference")})[:8]
|
||||
)
|
||||
return "\n".join(
|
||||
[
|
||||
f"- Supported by evidence references: {refs or 'available evidence'}",
|
||||
"- Upstream dependencies: " + NEEDS_INPUT,
|
||||
"- Downstream consumers: " + NEEDS_INPUT,
|
||||
"- Often used with: " + NEEDS_INPUT,
|
||||
]
|
||||
)
|
||||
|
||||
def _terminology(self, ability_map: dict, facts: list[dict]) -> str:
|
||||
terms = set()
|
||||
for item in [ability_map["scope"], *ability_map.get("abilities", [])]:
|
||||
terms.add(item.get("name", ""))
|
||||
terms.add(item.get("primary_class", ""))
|
||||
terms.update(item.get("attributes", []))
|
||||
for capability in self._capabilities(ability_map):
|
||||
terms.add(capability.get("name", ""))
|
||||
terms.add(capability.get("primary_class", ""))
|
||||
terms.update(capability.get("attributes", []))
|
||||
for fact in facts:
|
||||
if fact.get("kind") in {"framework", "llm_provider", "provider_registry"}:
|
||||
terms.add(fact.get("name", ""))
|
||||
visible = [term for term in sorted(terms) if term]
|
||||
if not visible:
|
||||
return self._curator_stub()
|
||||
return "\n".join(
|
||||
[
|
||||
"- Preferred terms: " + ", ".join(visible[:12]),
|
||||
"- Also known as: " + NEEDS_INPUT,
|
||||
"- Potentially confusing terms: " + NEEDS_INPUT,
|
||||
]
|
||||
)
|
||||
|
||||
def _getting_oriented(self, ability_map: dict, facts: list[dict]) -> str:
|
||||
paths = self._source_paths(ability_map, facts)
|
||||
if not paths:
|
||||
return self._curator_stub()
|
||||
return "\n".join(
|
||||
[
|
||||
f"- Start with: {paths[0]}",
|
||||
f"- Key files / directories: {', '.join(paths[:8])}",
|
||||
f"- Entry points: {', '.join(paths[:5])}",
|
||||
]
|
||||
)
|
||||
|
||||
def _provided_capabilities(self, ability_map: dict) -> str:
|
||||
capabilities = self._capabilities(ability_map)
|
||||
if not capabilities:
|
||||
return f"<!-- No approved capabilities yet. -->\n{NEEDS_INPUT}"
|
||||
blocks = []
|
||||
for capability in capabilities:
|
||||
keywords = self._keywords_for_capability(capability)
|
||||
blocks.append(
|
||||
"\n".join(
|
||||
[
|
||||
"```capability",
|
||||
f"type: {self._capability_type(capability.get('primary_class', 'other'))}",
|
||||
f"title: {capability['name']}",
|
||||
"description: >",
|
||||
f" {capability.get('description') or 'Approved repository capability.'}",
|
||||
f"keywords: [{', '.join(keywords)}]",
|
||||
"```",
|
||||
]
|
||||
)
|
||||
)
|
||||
return "\n\n".join(blocks)
|
||||
|
||||
def _capabilities(self, ability_map: dict) -> list[dict]:
|
||||
return [
|
||||
capability
|
||||
for ability in ability_map.get("abilities", [])
|
||||
for capability in ability.get("capabilities", [])
|
||||
]
|
||||
|
||||
def _features(self, ability_map: dict) -> list[dict]:
|
||||
return [
|
||||
feature
|
||||
for capability in self._capabilities(ability_map)
|
||||
for feature in capability.get("features", [])
|
||||
]
|
||||
|
||||
def _is_usecase_feature(self, feature: dict) -> bool:
|
||||
labels = {str(feature.get("primary_class", "")).lower()}
|
||||
labels.update(str(item).lower() for item in feature.get("attributes", []))
|
||||
return bool(labels & {"business-usecase", "usecase", "workflow", "review"})
|
||||
|
||||
def _keywords_for_capability(self, capability: dict) -> list[str]:
|
||||
keywords = [capability.get("primary_class", "")]
|
||||
keywords.extend(capability.get("attributes", []))
|
||||
for feature in capability.get("features", []):
|
||||
keywords.append(feature.get("primary_class", ""))
|
||||
keywords.extend(feature.get("attributes", []))
|
||||
return [self._keyword(item) for item in self._unique(keywords)[:8] if item]
|
||||
|
||||
def _capability_type(self, primary_class: str) -> str:
|
||||
normalized = primary_class.lower()
|
||||
if normalized in {"api", "infrastructure", "data", "security", "documentation"}:
|
||||
return normalized
|
||||
if normalized in {"interface", "integration", "llm-integration"}:
|
||||
return "api"
|
||||
if normalized in {"storage", "repository-structure"}:
|
||||
return "data"
|
||||
return "other"
|
||||
|
||||
def _facts_by_kind(self, facts: list[dict]) -> dict[str, list[dict]]:
|
||||
grouped: dict[str, list[dict]] = {}
|
||||
for fact in facts:
|
||||
grouped.setdefault(fact.get("kind", ""), []).append(fact)
|
||||
return grouped
|
||||
|
||||
def _fact_names(self, facts: list[dict]) -> list[str]:
|
||||
return self._unique([fact.get("name", "") for fact in facts])
|
||||
|
||||
def _source_paths(self, ability_map: dict, facts: list[dict]) -> list[str]:
|
||||
paths = [fact.get("path", "") for fact in facts if fact.get("path")]
|
||||
for feature in self._features(ability_map):
|
||||
paths.append(feature.get("location", ""))
|
||||
for source_ref in feature.get("source_refs", []):
|
||||
paths.append(source_ref.get("path", ""))
|
||||
return self._unique(paths)
|
||||
|
||||
def _curator_stub(self) -> str:
|
||||
return f"- {NEEDS_INPUT}"
|
||||
|
||||
def _sentence(self, text: str) -> str:
|
||||
cleaned = re.sub(r"\s+", " ", text.strip())
|
||||
if not cleaned:
|
||||
return ""
|
||||
return re.split(r"(?<=[.!?])\s+", cleaned, maxsplit=1)[0]
|
||||
|
||||
def _slug(self, value: str) -> str:
|
||||
return re.sub(r"[^a-z0-9]+", "-", value.lower()).strip("-")
|
||||
|
||||
def _keyword(self, value: str) -> str:
|
||||
return self._slug(value) or "other"
|
||||
|
||||
def _unique(self, values: list[str]) -> list[str]:
|
||||
result: list[str] = []
|
||||
seen: set[str] = set()
|
||||
for value in values:
|
||||
item = str(value).strip()
|
||||
key = item.lower()
|
||||
if not item or key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
result.append(item)
|
||||
return result
|
||||
184
src/repo_scoping/scope/validator.py
Normal file
184
src/repo_scoping/scope/validator.py
Normal file
@@ -0,0 +1,184 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
from repo_registry.scope.generator import SCOPE_SECTIONS, ScopeGenerator
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ScopeDiffSection:
|
||||
section: str
|
||||
status: str
|
||||
current_text: str | None
|
||||
proposed_text: str | None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ScopeDiff:
|
||||
sections: list[ScopeDiffSection]
|
||||
|
||||
@property
|
||||
def needs_update(self) -> bool:
|
||||
return any(section.status != "ok" for section in self.sections)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ScopeValidationIssue:
|
||||
check: str
|
||||
severity: str
|
||||
message: str
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ValidationResult:
|
||||
issues: list[ScopeValidationIssue]
|
||||
|
||||
@property
|
||||
def ok(self) -> bool:
|
||||
return not any(issue.severity == "error" for issue in self.issues)
|
||||
|
||||
|
||||
class ScopeValidator:
|
||||
"""Validate and diff SCOPE.md files."""
|
||||
|
||||
def __init__(self, generator: ScopeGenerator | None = None) -> None:
|
||||
self.generator = generator
|
||||
|
||||
def diff(self, repo_slug: str, existing_path: Path) -> ScopeDiff:
|
||||
if self.generator is None:
|
||||
raise ValueError("ScopeValidator.diff requires a ScopeGenerator")
|
||||
current = existing_path.read_text(encoding="utf-8") if existing_path.exists() else ""
|
||||
proposed = self.generator.generate(repo_slug)
|
||||
current_sections = self._parse_sections(current)
|
||||
proposed_sections = self._parse_sections(proposed)
|
||||
sections: list[ScopeDiffSection] = []
|
||||
for section in SCOPE_SECTIONS:
|
||||
current_text = current_sections.get(section)
|
||||
proposed_text = proposed_sections.get(section, "")
|
||||
if current_text is None:
|
||||
status = "missing"
|
||||
elif self._normalize(current_text) == self._normalize(proposed_text):
|
||||
status = "ok"
|
||||
else:
|
||||
status = "stale"
|
||||
sections.append(
|
||||
ScopeDiffSection(
|
||||
section=section,
|
||||
status=status,
|
||||
current_text=current_text,
|
||||
proposed_text=proposed_text,
|
||||
)
|
||||
)
|
||||
return ScopeDiff(sections=sections)
|
||||
|
||||
def validate(self, path: Path) -> ValidationResult:
|
||||
issues: list[ScopeValidationIssue] = []
|
||||
if not path.exists():
|
||||
return ValidationResult(
|
||||
issues=[
|
||||
ScopeValidationIssue(
|
||||
check="C5a",
|
||||
severity="error",
|
||||
message="SCOPE.md is missing.",
|
||||
)
|
||||
]
|
||||
)
|
||||
content = path.read_text(encoding="utf-8")
|
||||
sections = self._parse_sections(content)
|
||||
missing = [section for section in SCOPE_SECTIONS if section not in sections]
|
||||
if missing:
|
||||
severity = "warn" if missing == ["Provided Capabilities"] else "error"
|
||||
issues.append(
|
||||
ScopeValidationIssue(
|
||||
check="C5b",
|
||||
severity=severity,
|
||||
message=f"Missing SCOPE.md section(s): {', '.join(missing)}.",
|
||||
)
|
||||
)
|
||||
ordered = self._heading_order(content)
|
||||
expected_order = [section for section in SCOPE_SECTIONS if section in sections]
|
||||
if ordered[: len(expected_order)] != expected_order:
|
||||
issues.append(
|
||||
ScopeValidationIssue(
|
||||
check="C5b",
|
||||
severity="warn",
|
||||
message="SCOPE.md sections are not in canonical order.",
|
||||
)
|
||||
)
|
||||
capabilities = sections.get("Provided Capabilities")
|
||||
if capabilities is None:
|
||||
issues.append(
|
||||
ScopeValidationIssue(
|
||||
check="C5c",
|
||||
severity="warn",
|
||||
message="Provided Capabilities section is missing.",
|
||||
)
|
||||
)
|
||||
elif "```capability" in capabilities:
|
||||
for index, block in enumerate(self._capability_blocks(capabilities), start=1):
|
||||
keys = self._capability_keys(block)
|
||||
missing_keys = {"type", "title"} - keys
|
||||
if missing_keys:
|
||||
issues.append(
|
||||
ScopeValidationIssue(
|
||||
check="C5c",
|
||||
severity="warn",
|
||||
message=(
|
||||
f"Capability block {index} is missing required field(s): "
|
||||
f"{', '.join(sorted(missing_keys))}."
|
||||
),
|
||||
)
|
||||
)
|
||||
elif "No approved capabilities yet" not in capabilities:
|
||||
issues.append(
|
||||
ScopeValidationIssue(
|
||||
check="C5c",
|
||||
severity="warn",
|
||||
message=(
|
||||
"Provided Capabilities has no capability blocks or explicit "
|
||||
"empty-state note."
|
||||
),
|
||||
)
|
||||
)
|
||||
return ValidationResult(issues=issues)
|
||||
|
||||
def _parse_sections(self, content: str) -> dict[str, str]:
|
||||
matches = list(re.finditer(r"^##\s+(.+?)\s*$", content, re.MULTILINE))
|
||||
sections: dict[str, str] = {}
|
||||
for index, match in enumerate(matches):
|
||||
title = match.group(1).strip()
|
||||
start = match.end()
|
||||
end = matches[index + 1].start() if index + 1 < len(matches) else len(content)
|
||||
body = content[start:end]
|
||||
body = re.sub(r"\n---\s*$", "", body.strip())
|
||||
sections[title] = body.strip()
|
||||
return sections
|
||||
|
||||
def _heading_order(self, content: str) -> list[str]:
|
||||
return [
|
||||
match.group(1).strip()
|
||||
for match in re.finditer(r"^##\s+(.+?)\s*$", content, re.MULTILINE)
|
||||
if match.group(1).strip() in SCOPE_SECTIONS
|
||||
]
|
||||
|
||||
def _normalize(self, value: str | None) -> str:
|
||||
if value is None:
|
||||
return ""
|
||||
without_comments = re.sub(r"<!--.*?-->", "", value, flags=re.DOTALL)
|
||||
without_markdown = re.sub(r"[`*_>#-]+", " ", without_comments)
|
||||
return re.sub(r"\s+", " ", without_markdown).strip().lower()
|
||||
|
||||
def _capability_blocks(self, content: str) -> list[str]:
|
||||
return re.findall(
|
||||
r"```capability\s*(.*?)```",
|
||||
content,
|
||||
flags=re.DOTALL | re.IGNORECASE,
|
||||
)
|
||||
|
||||
def _capability_keys(self, block: str) -> set[str]:
|
||||
return {
|
||||
match.group(1)
|
||||
for match in re.finditer(r"^([A-Za-z_][A-Za-z0-9_-]*):", block, re.MULTILINE)
|
||||
}
|
||||
Reference in New Issue
Block a user