Improved ability description

2026-04-28 04:07:05 +02:00
parent 360537ef05
commit 852eb082d9
4 changed files with 94 additions and 20 deletions
--- a/src/repo_registry/candidate_graph/generator.py
+++ b/src/repo_registry/candidate_graph/generator.py
@@ -67,7 +67,7 @@ class CandidateGraphGenerator:

        ability_sources = docs or manifests or languages
        ability = CandidateAbilityDraft(
-            name=f"Review {repository.name} Repository Usefulness",
+            name=self._ability_name(repository, chunks),
            description=self._ability_description(chunks),
            confidence=self._ability_confidence(
                docs=docs,
@@ -390,15 +390,86 @@ class CandidateGraphGenerator:
        doc_summary = self._document_summary(chunks)
        if doc_summary:
            return (
-                "Candidate usefulness summary seeded from repository content: "
-                f"{doc_summary} This is a review seed, not an asserted domain ability."
+                "Candidate repository purpose inferred from repository content: "
+                f"{doc_summary} Review is required before treating this as an "
+                "approved domain ability."
            )
        return (
-            "Candidate usefulness summary generated from observed repository "
-            "documentation, manifests, languages, and interfaces. This is a "
-            "review seed, not an asserted domain ability."
+            "Candidate repository purpose inferred from observed repository "
+            "documentation, manifests, languages, and interfaces. Review is "
+            "required before treating this as an approved domain ability."
        )

+    def _ability_name(
+        self,
+        repository: Repository,
+        chunks: list[ContentChunk],
+    ) -> str:
+        purpose_text = self._document_purpose_sentence(chunks) or repository.description
+        if purpose_text:
+            normalized = self._imperative_purpose(purpose_text)
+            if normalized:
+                return normalized
+        return f"Support {self._humanize_identifier(repository.name)}"
+
+    def _document_purpose_sentence(self, chunks: list[ContentChunk]) -> str:
+        for chunk in chunks:
+            if chunk.kind != "documentation":
+                continue
+            lines = [line.strip() for line in chunk.text.splitlines() if line.strip()]
+            paragraph = next((line for line in lines if not line.startswith("#")), "")
+            if paragraph:
+                return paragraph
+        return ""
+
+    def _imperative_purpose(self, text: str) -> str:
+        cleaned = re.sub(r"\s+", " ", text.strip())
+        cleaned = re.split(r"[.!?]\s+", cleaned, maxsplit=1)[0]
+        cleaned = re.sub(r"^[A-Z][A-Za-z0-9_-]*\s+(?:is|provides|offers)\s+", "", cleaned)
+        cleaned = cleaned.strip(" .:-")
+        if not cleaned:
+            return ""
+        words = cleaned.split()
+        if not words:
+            return ""
+        words[0] = self._imperative_verb(words[0])
+        return self._title_from_words(words[:8])
+
+    def _imperative_verb(self, word: str) -> str:
+        lower = word.lower().strip(",;:")
+        irregular = {
+            "does": "do",
+            "has": "have",
+            "is": "be",
+        }
+        if lower in irregular:
+            return irregular[lower]
+        if lower.endswith("ies") and len(lower) > 4:
+            return f"{lower[:-3]}y"
+        if lower.endswith(("des", "ses", "tes", "ves", "zes")) and len(lower) > 4:
+            return lower[:-1]
+        if lower.endswith("es") and len(lower) > 3:
+            return lower[:-2]
+        if lower.endswith("s") and len(lower) > 3:
+            return lower[:-1]
+        return lower
+
+    def _title_from_words(self, words: list[str]) -> str:
+        cleaned_words = [
+            re.sub(r"[^A-Za-z0-9_/{}-]", "", word)
+            for word in words
+        ]
+        return " ".join(
+            word[:1].upper() + word[1:]
+            for word in cleaned_words
+            if word
+        )
+
+    def _humanize_identifier(self, value: str) -> str:
+        spaced = re.sub(r"[_-]+", " ", value)
+        spaced = re.sub(r"(?<=[a-z0-9])(?=[A-Z])", " ", spaced)
+        return self._title_from_words(spaced.split())
+
    def _interface_description(self, chunks: list[ContentChunk]) -> str:
        interface_summary = self._interface_summary(chunks)
        if interface_summary:
--- a/tests/test_candidate_graph.py
+++ b/tests/test_candidate_graph.py
@@ -30,12 +30,12 @@ def chunk(id, kind, path, text, start_line=1, end_line=1):
    )


-def test_candidate_generator_builds_review_seed_from_observed_facts():
+def test_candidate_generator_builds_purpose_seed_from_observed_facts():
    repository = Repository(
        id=1,
        name="MailRouter",
        url="/tmp/mail-router",
-        description=None,
+        description="Routes incoming customer email to the right team.",
        branch="main",
        status="analyzed",
    )
@@ -50,7 +50,8 @@ def test_candidate_generator_builds_review_seed_from_observed_facts():

    assert len(graph) == 1
    ability = graph[0]
-    assert ability.name == "Review MailRouter Repository Usefulness"
+    assert ability.name == "Route Incoming Customer Email To The Right Team"
+    assert "Usefulness" not in ability.name
    assert ability.source_refs[0].path == "README.md"
    interface_capability = ability.capabilities[0]
    assert interface_capability.name == "Expose Repository Interface"
@@ -96,6 +97,7 @@ def test_candidate_generator_enriches_descriptions_from_content_chunks():

    graph = CandidateGraphGenerator().generate(repository, facts, chunks)

+    assert graph[0].name == "Route Incoming Customer Email To The Right Team"
    assert "MailRouter. Routes incoming customer email" in graph[0].description
    assert '@app.post("/classify")' in graph[0].capabilities[0].description

--- a/tests/test_registry_service.py
+++ b/tests/test_registry_service.py
@@ -277,7 +277,7 @@ def test_search_filters_by_status_language_and_framework(tmp_path):
        status="indexed",
        language="Python",
        framework="FastAPI",
-        ability="Repository Usefulness",
+        ability="Support Filterable",
        capability="Repository Structure",
    )
    wrong_language_results = service.search(
@@ -620,7 +620,7 @@ def test_analyze_repository_falls_back_when_optional_llm_extractor_returns_no_ca
    summary = service.analyze_repository(repository.id)
    graph = service.candidate_graph(repository.id, summary.analysis_run.id)

-    assert graph.abilities[0].name == "Review Fallback Repository Usefulness"
+    assert graph.abilities[0].name == "Support Fallback"


 def test_analyze_repository_records_llm_failure_and_falls_back(tmp_path):
@@ -641,7 +641,7 @@ def test_analyze_repository_records_llm_failure_and_falls_back(tmp_path):
    decisions = service.list_review_decisions(repository.id, summary.analysis_run.id)

    assert summary.analysis_run.status == "completed"
-    assert graph.abilities[0].name == "Review Failing LLM Repository Usefulness"
+    assert graph.abilities[0].name == "Support Failing LLM"
    assert decisions[0].action == "llm_extraction_failed"
    assert "provider unavailable" in decisions[0].notes

@@ -683,7 +683,7 @@ def test_approve_candidate_graph_publishes_ability_map_once(tmp_path):
    assert service.get_repository(repository.id).status == "indexed"
    assert len(ability_map.abilities) == 1
    assert len(second_approval.abilities) == 1
-    assert ability_map.abilities[0].name == "Review Example Repository Usefulness"
+    assert ability_map.abilities[0].name == "Support Example"
    assert ability_map.abilities[0].capabilities[0].features[0].location == "app.py"
    assert ability_map.abilities[0].capabilities[0].features[0].source_refs
    assert ability_map.abilities[0].capabilities[0].features[0].source_refs[0].line == 3
--- a/tests/test_web_api.py
+++ b/tests/test_web_api.py
@@ -736,9 +736,7 @@ def test_api_analysis_run_loop(tmp_path):
        )
        assert candidate_response.status_code == 200
        candidate_graph = candidate_response.json()
-        assert candidate_graph["abilities"][0]["name"] == (
-            "Review Frontend Repository Usefulness"
-        )
+        assert candidate_graph["abilities"][0]["name"] == "Support Frontend"
        candidate_ability_id = candidate_graph["abilities"][0]["id"]
        candidate_capability_id = candidate_graph["abilities"][0]["capabilities"][0]["id"]

@@ -1053,7 +1051,10 @@ def test_api_source_linked_candidate_and_repo_update_loop(tmp_path):
 def test_ui_register_analyze_and_approve_loop(tmp_path):
    source = tmp_path / "repo"
    source.mkdir()
-    (source / "README.md").write_text("# UI Repo\n", encoding="utf-8")
+    (source / "README.md").write_text(
+        "# UI Repo\nReports service status through API and CLI entry points.\n",
+        encoding="utf-8",
+    )
    (source / "requirements.txt").write_text("fastapi\n", encoding="utf-8")
    (source / "app.py").write_text(
        "from fastapi import FastAPI\n"
@@ -1144,7 +1145,7 @@ def test_ui_register_analyze_and_approve_loop(tmp_path):
        assert "2 features" in run_detail.text
        assert "7 facts" in run_detail.text
        assert "Content Chunks" in run_detail.text
-        assert "README.md:1-1" in run_detail.text
+        assert "README.md:1-2" in run_detail.text
        assert "ID " in run_detail.text
        assert "No review decisions yet." in run_detail.text

@@ -1177,7 +1178,7 @@ def test_ui_register_analyze_and_approve_loop(tmp_path):
            f"/ui/repos/{repository_id}/elements?scope=facts&amp;analysis_run_id={first_run_id}&amp;type=facts"
            in approved_detail.text
        )
-        assert "Review UI Repo Edited Repository Usefulness" in approved_detail.text
+        assert "Report Service Status Through API And CLI Entry" in approved_detail.text
        assert "Language: Python" in approved_detail.text
        assert "Framework: FastAPI" in approved_detail.text
        assert "interface:app.py:3" in approved_detail.text
@@ -1270,7 +1271,7 @@ def test_ui_register_analyze_and_approve_loop(tmp_path):
                "q": "repository",
                "status": "indexed",
                "language": "Python",
-                "ability": "Repository Usefulness",
+                "ability": "Report Service Status",
                "capability": "Repository",
            },
        )