generated from coulomb/repo-seed
249 lines
7.6 KiB
Python
249 lines
7.6 KiB
Python
from repo_registry.candidate_graph.generator import CandidateGraphGenerator
|
|
from repo_registry.core.models import ContentChunk, ObservedFact, Repository
|
|
|
|
|
|
def fact(id, kind, name, path="", value=""):
|
|
return ObservedFact(
|
|
id=id,
|
|
repository_id=1,
|
|
analysis_run_id=1,
|
|
snapshot_id=1,
|
|
kind=kind,
|
|
path=path,
|
|
name=name,
|
|
value=value,
|
|
metadata={},
|
|
)
|
|
|
|
|
|
def chunk(id, kind, path, text, start_line=1, end_line=1):
|
|
return ContentChunk(
|
|
id=id,
|
|
repository_id=1,
|
|
analysis_run_id=1,
|
|
snapshot_id=1,
|
|
path=path,
|
|
kind=kind,
|
|
start_line=start_line,
|
|
end_line=end_line,
|
|
text=text,
|
|
)
|
|
|
|
|
|
def test_candidate_generator_builds_purpose_seed_from_observed_facts():
|
|
repository = Repository(
|
|
id=1,
|
|
name="MailRouter",
|
|
url="/tmp/mail-router",
|
|
description="Routes incoming customer email to the right team.",
|
|
branch="main",
|
|
status="analyzed",
|
|
)
|
|
facts = [
|
|
fact(1, "documentation", "README", "README.md"),
|
|
fact(2, "interface", "python route decorator", "app.py", '@app.post("/classify")'),
|
|
fact(3, "test", "test_app.py", "tests/test_app.py"),
|
|
fact(4, "framework", "FastAPI", "requirements.txt"),
|
|
]
|
|
|
|
graph = CandidateGraphGenerator().generate(repository, facts)
|
|
|
|
assert len(graph) == 1
|
|
ability = graph[0]
|
|
assert ability.name == "Route Incoming Customer Email To The Right Team"
|
|
assert "Usefulness" not in ability.name
|
|
assert ability.source_refs[0].path == "README.md"
|
|
interface_capability = ability.capabilities[0]
|
|
assert interface_capability.name == "Expose Repository Interface"
|
|
assert interface_capability.confidence == 0.75
|
|
assert interface_capability.inputs == ["HTTP request"]
|
|
assert interface_capability.outputs == ["HTTP response"]
|
|
assert interface_capability.features[0].type == "API"
|
|
assert interface_capability.features[0].name == "POST /classify"
|
|
assert interface_capability.features[0].location == "app.py"
|
|
assert interface_capability.evidence[0].strength == "strong"
|
|
|
|
|
|
def test_candidate_generator_enriches_descriptions_from_content_chunks():
|
|
repository = Repository(
|
|
id=1,
|
|
name="MailRouter",
|
|
url="/tmp/mail-router",
|
|
description=None,
|
|
branch="main",
|
|
status="analyzed",
|
|
)
|
|
facts = [
|
|
fact(1, "documentation", "README", "README.md"),
|
|
fact(2, "interface", "python route decorator", "app.py", '@app.post("/classify")'),
|
|
]
|
|
chunks = [
|
|
chunk(
|
|
1,
|
|
"documentation",
|
|
"README.md",
|
|
"# MailRouter\nRoutes incoming customer email to the right team.",
|
|
end_line=2,
|
|
),
|
|
chunk(
|
|
2,
|
|
"interface",
|
|
"app.py",
|
|
'@app.post("/classify")\ndef classify_email():\n return {}',
|
|
start_line=5,
|
|
end_line=7,
|
|
),
|
|
]
|
|
|
|
graph = CandidateGraphGenerator().generate(repository, facts, chunks)
|
|
|
|
assert graph[0].name == "Route Incoming Customer Email To The Right Team"
|
|
assert "MailRouter. Routes incoming customer email" in graph[0].description
|
|
assert '@app.post("/classify")' in graph[0].capabilities[0].description
|
|
|
|
|
|
def test_candidate_confidence_scoring_stays_conservative_for_weak_facts():
|
|
repository = Repository(
|
|
id=1,
|
|
name="WeakDocs",
|
|
url="/tmp/weak-docs",
|
|
description=None,
|
|
branch="main",
|
|
status="analyzed",
|
|
)
|
|
|
|
graph = CandidateGraphGenerator().generate(
|
|
repository,
|
|
[fact(1, "documentation", "README", "README.md")],
|
|
)
|
|
|
|
assert graph[0].confidence == 0.45
|
|
assert graph[0].capabilities == []
|
|
|
|
|
|
def test_candidate_confidence_scoring_increases_with_supporting_facts():
|
|
repository = Repository(
|
|
id=1,
|
|
name="Supported",
|
|
url="/tmp/supported",
|
|
description=None,
|
|
branch="main",
|
|
status="analyzed",
|
|
)
|
|
facts = [
|
|
fact(1, "documentation", "README", "README.md"),
|
|
fact(2, "interface", "python route decorator", "app.py", '@app.get("/health")'),
|
|
fact(3, "test", "test_app.py", "tests/test_app.py"),
|
|
fact(4, "example", "client.py", "examples/client.py"),
|
|
fact(5, "framework", "FastAPI", "requirements.txt"),
|
|
fact(6, "language", "Python"),
|
|
fact(7, "manifest", "requirements.txt", "requirements.txt"),
|
|
]
|
|
|
|
graph = CandidateGraphGenerator().generate(repository, facts)
|
|
|
|
assert graph[0].confidence == 1.0
|
|
assert graph[0].capabilities[0].confidence == 0.85
|
|
assert graph[0].capabilities[1].confidence == 0.75
|
|
|
|
|
|
def test_candidate_generator_names_cli_features_from_nearby_function():
|
|
repository = Repository(
|
|
id=1,
|
|
name="CliTool",
|
|
url="/tmp/cli-tool",
|
|
description=None,
|
|
branch="main",
|
|
status="analyzed",
|
|
)
|
|
facts = [
|
|
fact(1, "documentation", "README", "README.md"),
|
|
ObservedFact(
|
|
id=2,
|
|
repository_id=1,
|
|
analysis_run_id=1,
|
|
snapshot_id=1,
|
|
kind="interface",
|
|
path="cli.py",
|
|
name="python CLI command decorator",
|
|
value="@click.command()",
|
|
metadata={"line": 3},
|
|
),
|
|
]
|
|
chunks = [
|
|
chunk(
|
|
1,
|
|
"interface",
|
|
"cli.py",
|
|
"@click.command()\ndef import_repositories():\n pass",
|
|
start_line=3,
|
|
end_line=5,
|
|
)
|
|
]
|
|
|
|
graph = CandidateGraphGenerator().generate(repository, facts, chunks)
|
|
|
|
feature = graph[0].capabilities[0].features[0]
|
|
capability = graph[0].capabilities[0]
|
|
assert feature.type == "CLI"
|
|
assert feature.name == "CLI command import_repositories"
|
|
assert capability.inputs == ["CLI arguments"]
|
|
assert capability.outputs == ["command output"]
|
|
|
|
|
|
def test_candidate_generator_uses_generic_io_for_unknown_interfaces():
|
|
repository = Repository(
|
|
id=1,
|
|
name="UnknownInterface",
|
|
url="/tmp/unknown-interface",
|
|
description=None,
|
|
branch="main",
|
|
status="analyzed",
|
|
)
|
|
facts = [
|
|
fact(1, "documentation", "README", "README.md"),
|
|
fact(2, "interface", "possible surface", "src/plugin.py"),
|
|
]
|
|
|
|
graph = CandidateGraphGenerator().generate(repository, facts)
|
|
|
|
capability = graph[0].capabilities[0]
|
|
assert capability.inputs == ["caller input"]
|
|
assert capability.outputs == ["callable interface result"]
|
|
|
|
|
|
def test_candidate_generator_groups_many_interface_facts_into_behavioral_features():
|
|
repository = Repository(
|
|
id=1,
|
|
name="Registry",
|
|
url="/tmp/registry",
|
|
description=None,
|
|
branch="main",
|
|
status="analyzed",
|
|
)
|
|
facts = [
|
|
fact(1, "documentation", "README", "README.md"),
|
|
fact(2, "interface", "python route decorator", "src/api.py", '@app.get("/repos")'),
|
|
fact(3, "interface", "python route decorator", "src/api.py", '@app.post("/repos")'),
|
|
fact(
|
|
4,
|
|
"interface",
|
|
"python route decorator",
|
|
"src/api.py",
|
|
'@app.post("/repos/{repository_id}/analysis-runs")',
|
|
),
|
|
fact(5, "test", "test_api.py", "tests/test_api.py"),
|
|
]
|
|
|
|
graph = CandidateGraphGenerator().generate(repository, facts)
|
|
|
|
capability = graph[0].capabilities[0]
|
|
assert len(capability.features) == 1
|
|
feature = capability.features[0]
|
|
assert feature.name == (
|
|
"HTTP API surface: GET /repos, POST /repos, POST /repos/{repository_id}/analysis-runs"
|
|
)
|
|
assert feature.type == "API"
|
|
assert feature.location == "src/api.py"
|
|
assert len(feature.source_refs) == 3
|