candidate generation use content chunks

This commit is contained in:
2026-04-26 02:49:58 +02:00
parent 6416139176
commit 07c837d6bb
4 changed files with 119 additions and 13 deletions

View File

@@ -1,5 +1,5 @@
from repo_registry.candidate_graph.generator import CandidateGraphGenerator
from repo_registry.core.models import ObservedFact, Repository
from repo_registry.core.models import ContentChunk, ObservedFact, Repository
def fact(id, kind, name, path="", value=""):
@@ -16,6 +16,20 @@ def fact(id, kind, name, path="", value=""):
)
def chunk(id, kind, path, text, start_line=1, end_line=1):
return ContentChunk(
id=id,
repository_id=1,
analysis_run_id=1,
snapshot_id=1,
path=path,
kind=kind,
start_line=start_line,
end_line=end_line,
text=text,
)
def test_candidate_generator_builds_review_seed_from_observed_facts():
repository = Repository(
id=1,
@@ -43,3 +57,40 @@ def test_candidate_generator_builds_review_seed_from_observed_facts():
assert interface_capability.features[0].type == "API"
assert interface_capability.features[0].location == "app.py"
assert interface_capability.evidence[0].strength == "strong"
def test_candidate_generator_enriches_descriptions_from_content_chunks():
repository = Repository(
id=1,
name="MailRouter",
url="/tmp/mail-router",
description=None,
branch="main",
status="analyzed",
)
facts = [
fact(1, "documentation", "README", "README.md"),
fact(2, "interface", "python route decorator", "app.py", '@app.post("/classify")'),
]
chunks = [
chunk(
1,
"documentation",
"README.md",
"# MailRouter\nRoutes incoming customer email to the right team.",
end_line=2,
),
chunk(
2,
"interface",
"app.py",
'@app.post("/classify")\ndef classify_email():\n return {}',
start_line=5,
end_line=7,
),
]
graph = CandidateGraphGenerator().generate(repository, facts, chunks)
assert "MailRouter. Routes incoming customer email" in graph[0].description
assert '@app.post("/classify")' in graph[0].capabilities[0].description

View File

@@ -352,6 +352,8 @@ def test_analyze_repository_records_snapshot_and_observed_facts(tmp_path):
candidate_graph = service.candidate_graph(repository.id, summary.analysis_run.id)
assert candidate_graph.repository.name == "Example"
assert candidate_graph.abilities
assert "Example" in candidate_graph.abilities[0].description
assert "@app.get" in candidate_graph.abilities[0].capabilities[0].description
capability_names = {
capability.name
for ability in candidate_graph.abilities