chore(consistency): sync task status from DB [auto]

Updated by fix-consistency on 2026-05-15:
  - update .custodian-brief.md for repo-scoping
This commit is contained in:
2026-05-15 21:14:21 +02:00
parent f38ed6847c
commit 084159e51c
42 changed files with 5 additions and 5 deletions

View File

@@ -0,0 +1,4 @@
from repo_registry.scope.generator import ScopeGenerator
from repo_registry.scope.validator import ScopeValidator
__all__ = ["ScopeGenerator", "ScopeValidator"]

View File

@@ -0,0 +1,323 @@
from __future__ import annotations
import re
from dataclasses import asdict
from repo_registry.core.service import RegistryService
from repo_registry.storage.sqlite import NotFoundError
SCOPE_SECTIONS = [
"One-liner",
"Core Idea",
"In Scope",
"Out of Scope",
"Relevant When",
"Not Relevant When",
"Current State",
"How It Fits",
"Terminology",
"Related / Overlapping",
"Getting Oriented",
"Provided Capabilities",
"Notes",
]
NEEDS_INPUT = "<!-- needs curator input -->"
class ScopeGenerator:
"""Render SCOPE.md from approved repository characteristics."""
def __init__(self, service: RegistryService) -> None:
self.service = service
def generate(self, repo_slug: str) -> str:
repository = self._repository_by_slug(repo_slug)
ability_map = asdict(self.service.ability_map(repository.id))
facts = [asdict(fact) for fact in self.service.list_observed_facts(repository.id)]
sections = {
"One-liner": self._one_liner(ability_map),
"Core Idea": self._core_idea(ability_map),
"In Scope": self._in_scope(ability_map),
"Out of Scope": self._curator_stub(),
"Relevant When": self._relevant_when(ability_map),
"Not Relevant When": self._curator_stub(),
"Current State": self._current_state(repository.status, facts),
"How It Fits": self._how_it_fits(ability_map),
"Terminology": self._terminology(ability_map, facts),
"Related / Overlapping": self._curator_stub(),
"Getting Oriented": self._getting_oriented(ability_map, facts),
"Provided Capabilities": self._provided_capabilities(ability_map),
"Notes": self._curator_stub(),
}
lines = [
"# SCOPE",
"",
"> This file helps you quickly understand what this repository is about,",
"> when it is relevant, and when it is not.",
"> It was generated from approved repo-scoping characteristics.",
"",
"---",
"",
]
for section in SCOPE_SECTIONS:
lines.extend([f"## {section}", "", sections[section].rstrip(), "", "---", ""])
return "\n".join(lines).rstrip() + "\n"
def _repository_by_slug(self, repo_slug: str):
wanted = self._slug(repo_slug)
for repository in self.service.list_repositories():
candidates = {
self._slug(repository.name),
self._slug(repository.url.rstrip("/").rsplit("/", 1)[-1].removesuffix(".git")),
}
if wanted in candidates:
return repository
raise NotFoundError(f"repository slug {repo_slug!r} was not found")
def _one_liner(self, ability_map: dict) -> str:
scope = ability_map["scope"]
description = self._sentence(scope.get("description", ""))
if description:
return description
return f"{scope['name']} defines the repository scope for {ability_map['repository']['name']}."
def _core_idea(self, ability_map: dict) -> str:
scope = ability_map["scope"]
abilities = ability_map.get("abilities", [])
lines = [scope.get("description") or self._one_liner(ability_map)]
if abilities:
lines.append("")
lines.append("Approved abilities:")
lines.extend(
f"- {ability['name']}{ability.get('description') or 'Approved repository ability.'}"
for ability in abilities[:5]
)
else:
lines.extend(["", NEEDS_INPUT])
return "\n".join(lines)
def _in_scope(self, ability_map: dict) -> str:
abilities = ability_map.get("abilities", [])
if not abilities:
return self._curator_stub()
lines = []
for ability in abilities:
capabilities = ", ".join(
capability["name"] for capability in ability.get("capabilities", [])[:4]
)
suffix = f" Includes {capabilities}." if capabilities else ""
lines.append(
f"- {ability['name']}{ability.get('description') or 'Approved ability.'}{suffix}"
)
return "\n".join(lines)
def _relevant_when(self, ability_map: dict) -> str:
features = [
feature
for feature in self._features(ability_map)
if self._is_usecase_feature(feature)
]
if not features:
features = self._features(ability_map)[:5]
if not features:
return self._curator_stub()
lines = [
f"- You need {feature['name']} ({feature.get('primary_class') or feature.get('type', 'feature')})."
for feature in features
]
if not any(self._is_usecase_feature(feature) for feature in features):
lines.append(NEEDS_INPUT)
return "\n".join(lines)
def _current_state(self, status: str, facts: list[dict]) -> str:
kinds = self._facts_by_kind(facts)
languages = self._fact_names(kinds.get("language", []))
frameworks = self._fact_names(kinds.get("framework", []))
tests = kinds.get("test", [])
interfaces = kinds.get("interface", [])
manifests = kinds.get("manifest", [])
implementation = "substantial" if interfaces or manifests else "partial"
if not facts:
implementation = "unknown"
lines = [
f"- Status: {status}",
f"- Implementation: {implementation}",
"- Stability: evolving",
"- Usage: internal",
f"- Languages: {', '.join(languages) if languages else 'unknown'}",
f"- Frameworks: {', '.join(frameworks) if frameworks else 'none detected'}",
f"- Tests observed: {len(tests)}",
f"- Interfaces observed: {len(interfaces)}",
f"- Manifests observed: {len(manifests)}",
]
if not facts:
lines.append(NEEDS_INPUT)
return "\n".join(lines)
def _how_it_fits(self, ability_map: dict) -> str:
evidence = [
item
for capability in self._capabilities(ability_map)
for item in capability.get("evidence", [])
]
if not evidence:
return "\n".join(
[
"- Upstream dependencies: " + NEEDS_INPUT,
"- Downstream consumers: " + NEEDS_INPUT,
"- Often used with: " + NEEDS_INPUT,
]
)
refs = ", ".join(
sorted({item.get("reference", "") for item in evidence if item.get("reference")})[:8]
)
return "\n".join(
[
f"- Supported by evidence references: {refs or 'available evidence'}",
"- Upstream dependencies: " + NEEDS_INPUT,
"- Downstream consumers: " + NEEDS_INPUT,
"- Often used with: " + NEEDS_INPUT,
]
)
def _terminology(self, ability_map: dict, facts: list[dict]) -> str:
terms = set()
for item in [ability_map["scope"], *ability_map.get("abilities", [])]:
terms.add(item.get("name", ""))
terms.add(item.get("primary_class", ""))
terms.update(item.get("attributes", []))
for capability in self._capabilities(ability_map):
terms.add(capability.get("name", ""))
terms.add(capability.get("primary_class", ""))
terms.update(capability.get("attributes", []))
for fact in facts:
if fact.get("kind") in {"framework", "llm_provider", "provider_registry"}:
terms.add(fact.get("name", ""))
visible = [term for term in sorted(terms) if term]
if not visible:
return self._curator_stub()
return "\n".join(
[
"- Preferred terms: " + ", ".join(visible[:12]),
"- Also known as: " + NEEDS_INPUT,
"- Potentially confusing terms: " + NEEDS_INPUT,
]
)
def _getting_oriented(self, ability_map: dict, facts: list[dict]) -> str:
paths = self._source_paths(ability_map, facts)
if not paths:
return self._curator_stub()
return "\n".join(
[
f"- Start with: {paths[0]}",
f"- Key files / directories: {', '.join(paths[:8])}",
f"- Entry points: {', '.join(paths[:5])}",
]
)
def _provided_capabilities(self, ability_map: dict) -> str:
capabilities = self._capabilities(ability_map)
if not capabilities:
return f"<!-- No approved capabilities yet. -->\n{NEEDS_INPUT}"
blocks = []
for capability in capabilities:
keywords = self._keywords_for_capability(capability)
blocks.append(
"\n".join(
[
"```capability",
f"type: {self._capability_type(capability.get('primary_class', 'other'))}",
f"title: {capability['name']}",
"description: >",
f" {capability.get('description') or 'Approved repository capability.'}",
f"keywords: [{', '.join(keywords)}]",
"```",
]
)
)
return "\n\n".join(blocks)
def _capabilities(self, ability_map: dict) -> list[dict]:
return [
capability
for ability in ability_map.get("abilities", [])
for capability in ability.get("capabilities", [])
]
def _features(self, ability_map: dict) -> list[dict]:
return [
feature
for capability in self._capabilities(ability_map)
for feature in capability.get("features", [])
]
def _is_usecase_feature(self, feature: dict) -> bool:
labels = {str(feature.get("primary_class", "")).lower()}
labels.update(str(item).lower() for item in feature.get("attributes", []))
return bool(labels & {"business-usecase", "usecase", "workflow", "review"})
def _keywords_for_capability(self, capability: dict) -> list[str]:
keywords = [capability.get("primary_class", "")]
keywords.extend(capability.get("attributes", []))
for feature in capability.get("features", []):
keywords.append(feature.get("primary_class", ""))
keywords.extend(feature.get("attributes", []))
return [self._keyword(item) for item in self._unique(keywords)[:8] if item]
def _capability_type(self, primary_class: str) -> str:
normalized = primary_class.lower()
if normalized in {"api", "infrastructure", "data", "security", "documentation"}:
return normalized
if normalized in {"interface", "integration", "llm-integration"}:
return "api"
if normalized in {"storage", "repository-structure"}:
return "data"
return "other"
def _facts_by_kind(self, facts: list[dict]) -> dict[str, list[dict]]:
grouped: dict[str, list[dict]] = {}
for fact in facts:
grouped.setdefault(fact.get("kind", ""), []).append(fact)
return grouped
def _fact_names(self, facts: list[dict]) -> list[str]:
return self._unique([fact.get("name", "") for fact in facts])
def _source_paths(self, ability_map: dict, facts: list[dict]) -> list[str]:
paths = [fact.get("path", "") for fact in facts if fact.get("path")]
for feature in self._features(ability_map):
paths.append(feature.get("location", ""))
for source_ref in feature.get("source_refs", []):
paths.append(source_ref.get("path", ""))
return self._unique(paths)
def _curator_stub(self) -> str:
return f"- {NEEDS_INPUT}"
def _sentence(self, text: str) -> str:
cleaned = re.sub(r"\s+", " ", text.strip())
if not cleaned:
return ""
return re.split(r"(?<=[.!?])\s+", cleaned, maxsplit=1)[0]
def _slug(self, value: str) -> str:
return re.sub(r"[^a-z0-9]+", "-", value.lower()).strip("-")
def _keyword(self, value: str) -> str:
return self._slug(value) or "other"
def _unique(self, values: list[str]) -> list[str]:
result: list[str] = []
seen: set[str] = set()
for value in values:
item = str(value).strip()
key = item.lower()
if not item or key in seen:
continue
seen.add(key)
result.append(item)
return result

View File

@@ -0,0 +1,184 @@
from __future__ import annotations
import re
from dataclasses import dataclass
from pathlib import Path
from repo_registry.scope.generator import SCOPE_SECTIONS, ScopeGenerator
@dataclass(frozen=True)
class ScopeDiffSection:
section: str
status: str
current_text: str | None
proposed_text: str | None
@dataclass(frozen=True)
class ScopeDiff:
sections: list[ScopeDiffSection]
@property
def needs_update(self) -> bool:
return any(section.status != "ok" for section in self.sections)
@dataclass(frozen=True)
class ScopeValidationIssue:
check: str
severity: str
message: str
@dataclass(frozen=True)
class ValidationResult:
issues: list[ScopeValidationIssue]
@property
def ok(self) -> bool:
return not any(issue.severity == "error" for issue in self.issues)
class ScopeValidator:
"""Validate and diff SCOPE.md files."""
def __init__(self, generator: ScopeGenerator | None = None) -> None:
self.generator = generator
def diff(self, repo_slug: str, existing_path: Path) -> ScopeDiff:
if self.generator is None:
raise ValueError("ScopeValidator.diff requires a ScopeGenerator")
current = existing_path.read_text(encoding="utf-8") if existing_path.exists() else ""
proposed = self.generator.generate(repo_slug)
current_sections = self._parse_sections(current)
proposed_sections = self._parse_sections(proposed)
sections: list[ScopeDiffSection] = []
for section in SCOPE_SECTIONS:
current_text = current_sections.get(section)
proposed_text = proposed_sections.get(section, "")
if current_text is None:
status = "missing"
elif self._normalize(current_text) == self._normalize(proposed_text):
status = "ok"
else:
status = "stale"
sections.append(
ScopeDiffSection(
section=section,
status=status,
current_text=current_text,
proposed_text=proposed_text,
)
)
return ScopeDiff(sections=sections)
def validate(self, path: Path) -> ValidationResult:
issues: list[ScopeValidationIssue] = []
if not path.exists():
return ValidationResult(
issues=[
ScopeValidationIssue(
check="C5a",
severity="error",
message="SCOPE.md is missing.",
)
]
)
content = path.read_text(encoding="utf-8")
sections = self._parse_sections(content)
missing = [section for section in SCOPE_SECTIONS if section not in sections]
if missing:
severity = "warn" if missing == ["Provided Capabilities"] else "error"
issues.append(
ScopeValidationIssue(
check="C5b",
severity=severity,
message=f"Missing SCOPE.md section(s): {', '.join(missing)}.",
)
)
ordered = self._heading_order(content)
expected_order = [section for section in SCOPE_SECTIONS if section in sections]
if ordered[: len(expected_order)] != expected_order:
issues.append(
ScopeValidationIssue(
check="C5b",
severity="warn",
message="SCOPE.md sections are not in canonical order.",
)
)
capabilities = sections.get("Provided Capabilities")
if capabilities is None:
issues.append(
ScopeValidationIssue(
check="C5c",
severity="warn",
message="Provided Capabilities section is missing.",
)
)
elif "```capability" in capabilities:
for index, block in enumerate(self._capability_blocks(capabilities), start=1):
keys = self._capability_keys(block)
missing_keys = {"type", "title"} - keys
if missing_keys:
issues.append(
ScopeValidationIssue(
check="C5c",
severity="warn",
message=(
f"Capability block {index} is missing required field(s): "
f"{', '.join(sorted(missing_keys))}."
),
)
)
elif "No approved capabilities yet" not in capabilities:
issues.append(
ScopeValidationIssue(
check="C5c",
severity="warn",
message=(
"Provided Capabilities has no capability blocks or explicit "
"empty-state note."
),
)
)
return ValidationResult(issues=issues)
def _parse_sections(self, content: str) -> dict[str, str]:
matches = list(re.finditer(r"^##\s+(.+?)\s*$", content, re.MULTILINE))
sections: dict[str, str] = {}
for index, match in enumerate(matches):
title = match.group(1).strip()
start = match.end()
end = matches[index + 1].start() if index + 1 < len(matches) else len(content)
body = content[start:end]
body = re.sub(r"\n---\s*$", "", body.strip())
sections[title] = body.strip()
return sections
def _heading_order(self, content: str) -> list[str]:
return [
match.group(1).strip()
for match in re.finditer(r"^##\s+(.+?)\s*$", content, re.MULTILINE)
if match.group(1).strip() in SCOPE_SECTIONS
]
def _normalize(self, value: str | None) -> str:
if value is None:
return ""
without_comments = re.sub(r"<!--.*?-->", "", value, flags=re.DOTALL)
without_markdown = re.sub(r"[`*_>#-]+", " ", without_comments)
return re.sub(r"\s+", " ", without_markdown).strip().lower()
def _capability_blocks(self, content: str) -> list[str]:
return re.findall(
r"```capability\s*(.*?)```",
content,
flags=re.DOTALL | re.IGNORECASE,
)
def _capability_keys(self, block: str) -> set[str]:
return {
match.group(1)
for match in re.finditer(r"^([A-Za-z_][A-Za-z0-9_-]*):", block, re.MULTILINE)
}