diff --git a/src/repo_registry/candidate_graph/generator.py b/src/repo_registry/candidate_graph/generator.py index 04399fd..16bd8ce 100644 --- a/src/repo_registry/candidate_graph/generator.py +++ b/src/repo_registry/candidate_graph/generator.py @@ -67,7 +67,7 @@ class CandidateGraphGenerator: ability_sources = docs or manifests or languages ability = CandidateAbilityDraft( - name=f"Review {repository.name} Repository Usefulness", + name=self._ability_name(repository, chunks), description=self._ability_description(chunks), confidence=self._ability_confidence( docs=docs, @@ -390,15 +390,86 @@ class CandidateGraphGenerator: doc_summary = self._document_summary(chunks) if doc_summary: return ( - "Candidate usefulness summary seeded from repository content: " - f"{doc_summary} This is a review seed, not an asserted domain ability." + "Candidate repository purpose inferred from repository content: " + f"{doc_summary} Review is required before treating this as an " + "approved domain ability." ) return ( - "Candidate usefulness summary generated from observed repository " - "documentation, manifests, languages, and interfaces. This is a " - "review seed, not an asserted domain ability." + "Candidate repository purpose inferred from observed repository " + "documentation, manifests, languages, and interfaces. Review is " + "required before treating this as an approved domain ability." ) + def _ability_name( + self, + repository: Repository, + chunks: list[ContentChunk], + ) -> str: + purpose_text = self._document_purpose_sentence(chunks) or repository.description + if purpose_text: + normalized = self._imperative_purpose(purpose_text) + if normalized: + return normalized + return f"Support {self._humanize_identifier(repository.name)}" + + def _document_purpose_sentence(self, chunks: list[ContentChunk]) -> str: + for chunk in chunks: + if chunk.kind != "documentation": + continue + lines = [line.strip() for line in chunk.text.splitlines() if line.strip()] + paragraph = next((line for line in lines if not line.startswith("#")), "") + if paragraph: + return paragraph + return "" + + def _imperative_purpose(self, text: str) -> str: + cleaned = re.sub(r"\s+", " ", text.strip()) + cleaned = re.split(r"[.!?]\s+", cleaned, maxsplit=1)[0] + cleaned = re.sub(r"^[A-Z][A-Za-z0-9_-]*\s+(?:is|provides|offers)\s+", "", cleaned) + cleaned = cleaned.strip(" .:-") + if not cleaned: + return "" + words = cleaned.split() + if not words: + return "" + words[0] = self._imperative_verb(words[0]) + return self._title_from_words(words[:8]) + + def _imperative_verb(self, word: str) -> str: + lower = word.lower().strip(",;:") + irregular = { + "does": "do", + "has": "have", + "is": "be", + } + if lower in irregular: + return irregular[lower] + if lower.endswith("ies") and len(lower) > 4: + return f"{lower[:-3]}y" + if lower.endswith(("des", "ses", "tes", "ves", "zes")) and len(lower) > 4: + return lower[:-1] + if lower.endswith("es") and len(lower) > 3: + return lower[:-2] + if lower.endswith("s") and len(lower) > 3: + return lower[:-1] + return lower + + def _title_from_words(self, words: list[str]) -> str: + cleaned_words = [ + re.sub(r"[^A-Za-z0-9_/{}-]", "", word) + for word in words + ] + return " ".join( + word[:1].upper() + word[1:] + for word in cleaned_words + if word + ) + + def _humanize_identifier(self, value: str) -> str: + spaced = re.sub(r"[_-]+", " ", value) + spaced = re.sub(r"(?<=[a-z0-9])(?=[A-Z])", " ", spaced) + return self._title_from_words(spaced.split()) + def _interface_description(self, chunks: list[ContentChunk]) -> str: interface_summary = self._interface_summary(chunks) if interface_summary: diff --git a/tests/test_candidate_graph.py b/tests/test_candidate_graph.py index e1ee493..9922d76 100644 --- a/tests/test_candidate_graph.py +++ b/tests/test_candidate_graph.py @@ -30,12 +30,12 @@ def chunk(id, kind, path, text, start_line=1, end_line=1): ) -def test_candidate_generator_builds_review_seed_from_observed_facts(): +def test_candidate_generator_builds_purpose_seed_from_observed_facts(): repository = Repository( id=1, name="MailRouter", url="/tmp/mail-router", - description=None, + description="Routes incoming customer email to the right team.", branch="main", status="analyzed", ) @@ -50,7 +50,8 @@ def test_candidate_generator_builds_review_seed_from_observed_facts(): assert len(graph) == 1 ability = graph[0] - assert ability.name == "Review MailRouter Repository Usefulness" + assert ability.name == "Route Incoming Customer Email To The Right Team" + assert "Usefulness" not in ability.name assert ability.source_refs[0].path == "README.md" interface_capability = ability.capabilities[0] assert interface_capability.name == "Expose Repository Interface" @@ -96,6 +97,7 @@ def test_candidate_generator_enriches_descriptions_from_content_chunks(): graph = CandidateGraphGenerator().generate(repository, facts, chunks) + assert graph[0].name == "Route Incoming Customer Email To The Right Team" assert "MailRouter. Routes incoming customer email" in graph[0].description assert '@app.post("/classify")' in graph[0].capabilities[0].description diff --git a/tests/test_registry_service.py b/tests/test_registry_service.py index e37e658..bee4fe7 100644 --- a/tests/test_registry_service.py +++ b/tests/test_registry_service.py @@ -277,7 +277,7 @@ def test_search_filters_by_status_language_and_framework(tmp_path): status="indexed", language="Python", framework="FastAPI", - ability="Repository Usefulness", + ability="Support Filterable", capability="Repository Structure", ) wrong_language_results = service.search( @@ -620,7 +620,7 @@ def test_analyze_repository_falls_back_when_optional_llm_extractor_returns_no_ca summary = service.analyze_repository(repository.id) graph = service.candidate_graph(repository.id, summary.analysis_run.id) - assert graph.abilities[0].name == "Review Fallback Repository Usefulness" + assert graph.abilities[0].name == "Support Fallback" def test_analyze_repository_records_llm_failure_and_falls_back(tmp_path): @@ -641,7 +641,7 @@ def test_analyze_repository_records_llm_failure_and_falls_back(tmp_path): decisions = service.list_review_decisions(repository.id, summary.analysis_run.id) assert summary.analysis_run.status == "completed" - assert graph.abilities[0].name == "Review Failing LLM Repository Usefulness" + assert graph.abilities[0].name == "Support Failing LLM" assert decisions[0].action == "llm_extraction_failed" assert "provider unavailable" in decisions[0].notes @@ -683,7 +683,7 @@ def test_approve_candidate_graph_publishes_ability_map_once(tmp_path): assert service.get_repository(repository.id).status == "indexed" assert len(ability_map.abilities) == 1 assert len(second_approval.abilities) == 1 - assert ability_map.abilities[0].name == "Review Example Repository Usefulness" + assert ability_map.abilities[0].name == "Support Example" assert ability_map.abilities[0].capabilities[0].features[0].location == "app.py" assert ability_map.abilities[0].capabilities[0].features[0].source_refs assert ability_map.abilities[0].capabilities[0].features[0].source_refs[0].line == 3 diff --git a/tests/test_web_api.py b/tests/test_web_api.py index a322fe2..13bd3dd 100644 --- a/tests/test_web_api.py +++ b/tests/test_web_api.py @@ -736,9 +736,7 @@ def test_api_analysis_run_loop(tmp_path): ) assert candidate_response.status_code == 200 candidate_graph = candidate_response.json() - assert candidate_graph["abilities"][0]["name"] == ( - "Review Frontend Repository Usefulness" - ) + assert candidate_graph["abilities"][0]["name"] == "Support Frontend" candidate_ability_id = candidate_graph["abilities"][0]["id"] candidate_capability_id = candidate_graph["abilities"][0]["capabilities"][0]["id"] @@ -1053,7 +1051,10 @@ def test_api_source_linked_candidate_and_repo_update_loop(tmp_path): def test_ui_register_analyze_and_approve_loop(tmp_path): source = tmp_path / "repo" source.mkdir() - (source / "README.md").write_text("# UI Repo\n", encoding="utf-8") + (source / "README.md").write_text( + "# UI Repo\nReports service status through API and CLI entry points.\n", + encoding="utf-8", + ) (source / "requirements.txt").write_text("fastapi\n", encoding="utf-8") (source / "app.py").write_text( "from fastapi import FastAPI\n" @@ -1144,7 +1145,7 @@ def test_ui_register_analyze_and_approve_loop(tmp_path): assert "2 features" in run_detail.text assert "7 facts" in run_detail.text assert "Content Chunks" in run_detail.text - assert "README.md:1-1" in run_detail.text + assert "README.md:1-2" in run_detail.text assert "ID " in run_detail.text assert "No review decisions yet." in run_detail.text @@ -1177,7 +1178,7 @@ def test_ui_register_analyze_and_approve_loop(tmp_path): f"/ui/repos/{repository_id}/elements?scope=facts&analysis_run_id={first_run_id}&type=facts" in approved_detail.text ) - assert "Review UI Repo Edited Repository Usefulness" in approved_detail.text + assert "Report Service Status Through API And CLI Entry" in approved_detail.text assert "Language: Python" in approved_detail.text assert "Framework: FastAPI" in approved_detail.text assert "interface:app.py:3" in approved_detail.text @@ -1270,7 +1271,7 @@ def test_ui_register_analyze_and_approve_loop(tmp_path): "q": "repository", "status": "indexed", "language": "Python", - "ability": "Repository Usefulness", + "ability": "Report Service Status", "capability": "Repository", }, )