Improved ability description

2026-04-28 04:07:05 +02:00
parent 360537ef05
commit 852eb082d9
4 changed files with 94 additions and 20 deletions
--- a/src/repo_registry/candidate_graph/generator.py
+++ b/src/repo_registry/candidate_graph/generator.py
@@ -67,7 +67,7 @@ class CandidateGraphGenerator:

        ability_sources = docs or manifests or languages
        ability = CandidateAbilityDraft(
-            name=f"Review {repository.name} Repository Usefulness",
+            name=self._ability_name(repository, chunks),
            description=self._ability_description(chunks),
            confidence=self._ability_confidence(
                docs=docs,
@@ -390,15 +390,86 @@ class CandidateGraphGenerator:
        doc_summary = self._document_summary(chunks)
        if doc_summary:
            return (
-                "Candidate usefulness summary seeded from repository content: "
-                f"{doc_summary} This is a review seed, not an asserted domain ability."
+                "Candidate repository purpose inferred from repository content: "
+                f"{doc_summary} Review is required before treating this as an "
+                "approved domain ability."
            )
        return (
-            "Candidate usefulness summary generated from observed repository "
-            "documentation, manifests, languages, and interfaces. This is a "
-            "review seed, not an asserted domain ability."
+            "Candidate repository purpose inferred from observed repository "
+            "documentation, manifests, languages, and interfaces. Review is "
+            "required before treating this as an approved domain ability."
        )

+    def _ability_name(
+        self,
+        repository: Repository,
+        chunks: list[ContentChunk],
+    ) -> str:
+        purpose_text = self._document_purpose_sentence(chunks) or repository.description
+        if purpose_text:
+            normalized = self._imperative_purpose(purpose_text)
+            if normalized:
+                return normalized
+        return f"Support {self._humanize_identifier(repository.name)}"
+
+    def _document_purpose_sentence(self, chunks: list[ContentChunk]) -> str:
+        for chunk in chunks:
+            if chunk.kind != "documentation":
+                continue
+            lines = [line.strip() for line in chunk.text.splitlines() if line.strip()]
+            paragraph = next((line for line in lines if not line.startswith("#")), "")
+            if paragraph:
+                return paragraph
+        return ""
+
+    def _imperative_purpose(self, text: str) -> str:
+        cleaned = re.sub(r"\s+", " ", text.strip())
+        cleaned = re.split(r"[.!?]\s+", cleaned, maxsplit=1)[0]
+        cleaned = re.sub(r"^[A-Z][A-Za-z0-9_-]*\s+(?:is|provides|offers)\s+", "", cleaned)
+        cleaned = cleaned.strip(" .:-")
+        if not cleaned:
+            return ""
+        words = cleaned.split()
+        if not words:
+            return ""
+        words[0] = self._imperative_verb(words[0])
+        return self._title_from_words(words[:8])
+
+    def _imperative_verb(self, word: str) -> str:
+        lower = word.lower().strip(",;:")
+        irregular = {
+            "does": "do",
+            "has": "have",
+            "is": "be",
+        }
+        if lower in irregular:
+            return irregular[lower]
+        if lower.endswith("ies") and len(lower) > 4:
+            return f"{lower[:-3]}y"
+        if lower.endswith(("des", "ses", "tes", "ves", "zes")) and len(lower) > 4:
+            return lower[:-1]
+        if lower.endswith("es") and len(lower) > 3:
+            return lower[:-2]
+        if lower.endswith("s") and len(lower) > 3:
+            return lower[:-1]
+        return lower
+
+    def _title_from_words(self, words: list[str]) -> str:
+        cleaned_words = [
+            re.sub(r"[^A-Za-z0-9_/{}-]", "", word)
+            for word in words
+        ]
+        return " ".join(
+            word[:1].upper() + word[1:]
+            for word in cleaned_words
+            if word
+        )
+
+    def _humanize_identifier(self, value: str) -> str:
+        spaced = re.sub(r"[_-]+", " ", value)
+        spaced = re.sub(r"(?<=[a-z0-9])(?=[A-Z])", " ", spaced)
+        return self._title_from_words(spaced.split())
+
    def _interface_description(self, chunks: list[ContentChunk]) -> str:
        interface_summary = self._interface_summary(chunks)
        if interface_summary: