From 0cf3f9cb15659ee936a7413ec8347d2faef871ac Mon Sep 17 00:00:00 2001 From: tegwick Date: Sun, 26 Apr 2026 02:54:52 +0200 Subject: [PATCH] Improved candidate feature naming --- .../candidate_graph/generator.py | 37 +++++++++++++++- tests/test_candidate_graph.py | 42 +++++++++++++++++++ tests/test_registry_service.py | 1 + 3 files changed, 79 insertions(+), 1 deletion(-) diff --git a/src/repo_registry/candidate_graph/generator.py b/src/repo_registry/candidate_graph/generator.py index fc1fb57..9d1a959 100644 --- a/src/repo_registry/candidate_graph/generator.py +++ b/src/repo_registry/candidate_graph/generator.py @@ -1,5 +1,6 @@ from __future__ import annotations +import re from dataclasses import dataclass, field from repo_registry.core.models import ContentChunk, ObservedFact, Repository, SourceReference @@ -126,7 +127,7 @@ class CandidateGraphGenerator: ) -> CandidateCapabilityDraft: features = [ CandidateFeatureDraft( - name=fact.value or fact.name, + name=self._feature_name(fact, chunks), type=self._feature_type(fact), location=fact.path, confidence=0.65 if fact.value else 0.45, @@ -194,6 +195,40 @@ class CandidateGraphGenerator: return "API" return "interface" + def _feature_name(self, fact: ObservedFact, chunks: list[ContentChunk]) -> str: + route_name = self._route_feature_name(fact.value) + if route_name: + return route_name + if self._feature_type(fact) == "CLI": + function_name = self._function_name_near_fact(fact, chunks) + if function_name: + return f"CLI command {function_name}" + return fact.value or fact.name + + def _route_feature_name(self, value: str) -> str: + match = re.search(r"@(?:app|router)\.(get|post|put|patch|delete)\((['\"])(.*?)\2", value) + if match is None: + return "" + method = match.group(1).upper() + path = match.group(3) + return f"{method} {path}" + + def _function_name_near_fact( + self, + fact: ObservedFact, + chunks: list[ContentChunk], + ) -> str: + line = fact.metadata.get("line") + for chunk in chunks: + if chunk.path != fact.path or chunk.kind != "interface": + continue + if isinstance(line, int) and not (chunk.start_line <= line <= chunk.end_line): + continue + match = re.search(r"^\s*def\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\(", chunk.text, re.MULTILINE) + if match is not None: + return match.group(1) + return "" + def _ability_confidence( self, *, diff --git a/tests/test_candidate_graph.py b/tests/test_candidate_graph.py index 6287c43..1cec574 100644 --- a/tests/test_candidate_graph.py +++ b/tests/test_candidate_graph.py @@ -56,6 +56,7 @@ def test_candidate_generator_builds_review_seed_from_observed_facts(): assert interface_capability.name == "Expose Repository Interface" assert interface_capability.confidence == 0.75 assert interface_capability.features[0].type == "API" + assert interface_capability.features[0].name == "POST /classify" assert interface_capability.features[0].location == "app.py" assert interface_capability.evidence[0].strength == "strong" @@ -140,3 +141,44 @@ def test_candidate_confidence_scoring_increases_with_supporting_facts(): assert graph[0].confidence == 1.0 assert graph[0].capabilities[0].confidence == 0.85 assert graph[0].capabilities[1].confidence == 0.75 + + +def test_candidate_generator_names_cli_features_from_nearby_function(): + repository = Repository( + id=1, + name="CliTool", + url="/tmp/cli-tool", + description=None, + branch="main", + status="analyzed", + ) + facts = [ + fact(1, "documentation", "README", "README.md"), + ObservedFact( + id=2, + repository_id=1, + analysis_run_id=1, + snapshot_id=1, + kind="interface", + path="cli.py", + name="python CLI command decorator", + value="@click.command()", + metadata={"line": 3}, + ), + ] + chunks = [ + chunk( + 1, + "interface", + "cli.py", + "@click.command()\ndef import_repositories():\n pass", + start_line=3, + end_line=5, + ) + ] + + graph = CandidateGraphGenerator().generate(repository, facts, chunks) + + feature = graph[0].capabilities[0].features[0] + assert feature.type == "CLI" + assert feature.name == "CLI command import_repositories" diff --git a/tests/test_registry_service.py b/tests/test_registry_service.py index b041087..ca49358 100644 --- a/tests/test_registry_service.py +++ b/tests/test_registry_service.py @@ -354,6 +354,7 @@ def test_analyze_repository_records_snapshot_and_observed_facts(tmp_path): assert candidate_graph.abilities assert "Example" in candidate_graph.abilities[0].description assert "@app.get" in candidate_graph.abilities[0].capabilities[0].description + assert candidate_graph.abilities[0].capabilities[0].features[0].name == "GET /health" capability_names = { capability.name for ability in candidate_graph.abilities