generated from coulomb/repo-seed
Improved candidate feature naming
This commit is contained in:
@@ -1,5 +1,6 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
from repo_registry.core.models import ContentChunk, ObservedFact, Repository, SourceReference
|
from repo_registry.core.models import ContentChunk, ObservedFact, Repository, SourceReference
|
||||||
@@ -126,7 +127,7 @@ class CandidateGraphGenerator:
|
|||||||
) -> CandidateCapabilityDraft:
|
) -> CandidateCapabilityDraft:
|
||||||
features = [
|
features = [
|
||||||
CandidateFeatureDraft(
|
CandidateFeatureDraft(
|
||||||
name=fact.value or fact.name,
|
name=self._feature_name(fact, chunks),
|
||||||
type=self._feature_type(fact),
|
type=self._feature_type(fact),
|
||||||
location=fact.path,
|
location=fact.path,
|
||||||
confidence=0.65 if fact.value else 0.45,
|
confidence=0.65 if fact.value else 0.45,
|
||||||
@@ -194,6 +195,40 @@ class CandidateGraphGenerator:
|
|||||||
return "API"
|
return "API"
|
||||||
return "interface"
|
return "interface"
|
||||||
|
|
||||||
|
def _feature_name(self, fact: ObservedFact, chunks: list[ContentChunk]) -> str:
|
||||||
|
route_name = self._route_feature_name(fact.value)
|
||||||
|
if route_name:
|
||||||
|
return route_name
|
||||||
|
if self._feature_type(fact) == "CLI":
|
||||||
|
function_name = self._function_name_near_fact(fact, chunks)
|
||||||
|
if function_name:
|
||||||
|
return f"CLI command {function_name}"
|
||||||
|
return fact.value or fact.name
|
||||||
|
|
||||||
|
def _route_feature_name(self, value: str) -> str:
|
||||||
|
match = re.search(r"@(?:app|router)\.(get|post|put|patch|delete)\((['\"])(.*?)\2", value)
|
||||||
|
if match is None:
|
||||||
|
return ""
|
||||||
|
method = match.group(1).upper()
|
||||||
|
path = match.group(3)
|
||||||
|
return f"{method} {path}"
|
||||||
|
|
||||||
|
def _function_name_near_fact(
|
||||||
|
self,
|
||||||
|
fact: ObservedFact,
|
||||||
|
chunks: list[ContentChunk],
|
||||||
|
) -> str:
|
||||||
|
line = fact.metadata.get("line")
|
||||||
|
for chunk in chunks:
|
||||||
|
if chunk.path != fact.path or chunk.kind != "interface":
|
||||||
|
continue
|
||||||
|
if isinstance(line, int) and not (chunk.start_line <= line <= chunk.end_line):
|
||||||
|
continue
|
||||||
|
match = re.search(r"^\s*def\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\(", chunk.text, re.MULTILINE)
|
||||||
|
if match is not None:
|
||||||
|
return match.group(1)
|
||||||
|
return ""
|
||||||
|
|
||||||
def _ability_confidence(
|
def _ability_confidence(
|
||||||
self,
|
self,
|
||||||
*,
|
*,
|
||||||
|
|||||||
@@ -56,6 +56,7 @@ def test_candidate_generator_builds_review_seed_from_observed_facts():
|
|||||||
assert interface_capability.name == "Expose Repository Interface"
|
assert interface_capability.name == "Expose Repository Interface"
|
||||||
assert interface_capability.confidence == 0.75
|
assert interface_capability.confidence == 0.75
|
||||||
assert interface_capability.features[0].type == "API"
|
assert interface_capability.features[0].type == "API"
|
||||||
|
assert interface_capability.features[0].name == "POST /classify"
|
||||||
assert interface_capability.features[0].location == "app.py"
|
assert interface_capability.features[0].location == "app.py"
|
||||||
assert interface_capability.evidence[0].strength == "strong"
|
assert interface_capability.evidence[0].strength == "strong"
|
||||||
|
|
||||||
@@ -140,3 +141,44 @@ def test_candidate_confidence_scoring_increases_with_supporting_facts():
|
|||||||
assert graph[0].confidence == 1.0
|
assert graph[0].confidence == 1.0
|
||||||
assert graph[0].capabilities[0].confidence == 0.85
|
assert graph[0].capabilities[0].confidence == 0.85
|
||||||
assert graph[0].capabilities[1].confidence == 0.75
|
assert graph[0].capabilities[1].confidence == 0.75
|
||||||
|
|
||||||
|
|
||||||
|
def test_candidate_generator_names_cli_features_from_nearby_function():
|
||||||
|
repository = Repository(
|
||||||
|
id=1,
|
||||||
|
name="CliTool",
|
||||||
|
url="/tmp/cli-tool",
|
||||||
|
description=None,
|
||||||
|
branch="main",
|
||||||
|
status="analyzed",
|
||||||
|
)
|
||||||
|
facts = [
|
||||||
|
fact(1, "documentation", "README", "README.md"),
|
||||||
|
ObservedFact(
|
||||||
|
id=2,
|
||||||
|
repository_id=1,
|
||||||
|
analysis_run_id=1,
|
||||||
|
snapshot_id=1,
|
||||||
|
kind="interface",
|
||||||
|
path="cli.py",
|
||||||
|
name="python CLI command decorator",
|
||||||
|
value="@click.command()",
|
||||||
|
metadata={"line": 3},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
chunks = [
|
||||||
|
chunk(
|
||||||
|
1,
|
||||||
|
"interface",
|
||||||
|
"cli.py",
|
||||||
|
"@click.command()\ndef import_repositories():\n pass",
|
||||||
|
start_line=3,
|
||||||
|
end_line=5,
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
graph = CandidateGraphGenerator().generate(repository, facts, chunks)
|
||||||
|
|
||||||
|
feature = graph[0].capabilities[0].features[0]
|
||||||
|
assert feature.type == "CLI"
|
||||||
|
assert feature.name == "CLI command import_repositories"
|
||||||
|
|||||||
@@ -354,6 +354,7 @@ def test_analyze_repository_records_snapshot_and_observed_facts(tmp_path):
|
|||||||
assert candidate_graph.abilities
|
assert candidate_graph.abilities
|
||||||
assert "Example" in candidate_graph.abilities[0].description
|
assert "Example" in candidate_graph.abilities[0].description
|
||||||
assert "@app.get" in candidate_graph.abilities[0].capabilities[0].description
|
assert "@app.get" in candidate_graph.abilities[0].capabilities[0].description
|
||||||
|
assert candidate_graph.abilities[0].capabilities[0].features[0].name == "GET /health"
|
||||||
capability_names = {
|
capability_names = {
|
||||||
capability.name
|
capability.name
|
||||||
for ability in candidate_graph.abilities
|
for ability in candidate_graph.abilities
|
||||||
|
|||||||
Reference in New Issue
Block a user