Files
repo-scoping/tests/test_candidate_graph.py

573 lines
18 KiB
Python

from repo_registry.candidate_graph.generator import CandidateGraphGenerator
from repo_registry.core.models import ContentChunk, ObservedFact, Repository
def fact(id, kind, name, path="", value="", metadata=None):
return ObservedFact(
id=id,
repository_id=1,
analysis_run_id=1,
snapshot_id=1,
kind=kind,
path=path,
name=name,
value=value,
metadata=metadata or {},
)
def chunk(id, kind, path, text, start_line=1, end_line=1):
return ContentChunk(
id=id,
repository_id=1,
analysis_run_id=1,
snapshot_id=1,
path=path,
kind=kind,
start_line=start_line,
end_line=end_line,
text=text,
)
def test_candidate_generator_builds_purpose_seed_from_observed_facts():
repository = Repository(
id=1,
name="MailRouter",
url="/tmp/mail-router",
description="Routes incoming customer email to the right team.",
branch="main",
status="analyzed",
)
facts = [
fact(1, "documentation", "README", "README.md"),
fact(2, "interface", "python route decorator", "app.py", '@app.post("/classify")'),
fact(3, "test", "test_app.py", "tests/test_app.py"),
fact(4, "framework", "FastAPI", "requirements.txt"),
]
graph = CandidateGraphGenerator().generate(repository, facts)
assert len(graph) == 1
ability = graph[0]
assert ability.name == "Route Incoming Customer Email To The Right Team"
assert "Usefulness" not in ability.name
assert ability.primary_class == "developer-tooling"
assert "interface" in ability.attributes
assert ability.source_refs[0].path == "README.md"
interface_capability = ability.capabilities[0]
assert interface_capability.name == "Expose Repository Interface"
assert interface_capability.primary_class == "interface"
assert {"surface", "api", "utility-owned"} <= set(interface_capability.attributes)
assert interface_capability.confidence == 0.75
assert interface_capability.inputs == ["HTTP request"]
assert interface_capability.outputs == ["HTTP response"]
assert interface_capability.features[0].type == "API"
assert interface_capability.features[0].primary_class == "API"
assert {"API", "surface", "http"} <= set(interface_capability.features[0].attributes)
assert interface_capability.features[0].name == "POST /classify"
assert interface_capability.features[0].location == "app.py"
assert interface_capability.evidence[0].strength == "strong"
assert len(ability.capabilities) == 1
def test_candidate_generator_extracts_intended_capability_blocks_from_intent_chunks():
repository = Repository(
id=1,
name="KeyCape",
url="/tmp/key-cape",
description=None,
branch="main",
status="analyzed",
)
facts = [
fact(
1,
"intent",
"INTENT",
"INTENT.md",
metadata={"source_role": "intent_summary"},
),
fact(
2,
"scope",
"SCOPE",
"SCOPE.md",
metadata={"source_role": "derived_scope"},
),
]
chunks = [
chunk(
1,
"intent",
"INTENT.md",
"# INTENT\n\n"
"Lightweight IAM for small deployments.\n\n"
"## Intended Capabilities\n\n"
"- Enforce OIDC PKCE profiles: reject unsafe client profiles.\n"
"- Validate LDAP schema migrations.\n",
),
chunk(
2,
"scope",
"SCOPE.md",
"# SCOPE\n\n## Intended Capabilities\n\n- Route LLM provider requests.\n",
),
]
graph = CandidateGraphGenerator().generate(repository, facts, chunks)
capability_names = {capability.name for capability in graph[0].capabilities}
assert "Enforce OIDC PKCE Profiles" in capability_names
assert "Validate LDAP Schema Migrations" in capability_names
assert "Route LLM Provider Requests" not in capability_names
intent_capability = next(
capability
for capability in graph[0].capabilities
if capability.name == "Enforce OIDC PKCE Profiles"
)
assert intent_capability.primary_class == "intent-capability"
assert {
"intent-derived",
"utility-owned",
"review-required-intent",
} <= set(intent_capability.attributes)
assert [ref.path for ref in intent_capability.source_refs] == ["INTENT.md"]
def test_candidate_generator_prefers_intent_over_derived_scope_for_ability_name():
repository = Repository(
id=1,
name="LLMConnect",
url="/tmp/llm-connect",
description=None,
branch="main",
status="analyzed",
)
facts = [
fact(
1,
"intent",
"INTENT",
"INTENT.md",
metadata={"source_role": "intent_summary"},
),
fact(
2,
"documentation",
"SCOPE",
"SCOPE.md",
metadata={"source_role": "derived_scope"},
),
]
chunks = [
chunk(
1,
"documentation",
"SCOPE.md",
"# SCOPE\n\nA stale first paragraph copied from another repository.",
),
chunk(
2,
"intent",
"INTENT.md",
"# INTENT\n\nProvide a provider-agnostic LLM connector.",
),
]
chunks[0].metadata["source_role"] = "derived_scope"
chunks[1].metadata["source_role"] = "intent_summary"
graph = CandidateGraphGenerator().generate(repository, facts, chunks)
assert graph[0].name == "Provide A Provider-agnostic LLM Connector"
def test_candidate_generator_enriches_descriptions_from_content_chunks():
repository = Repository(
id=1,
name="MailRouter",
url="/tmp/mail-router",
description=None,
branch="main",
status="analyzed",
)
facts = [
fact(1, "documentation", "README", "README.md"),
fact(2, "interface", "python route decorator", "app.py", '@app.post("/classify")'),
]
chunks = [
chunk(
1,
"documentation",
"README.md",
"# MailRouter\nRoutes incoming customer email to the right team.",
end_line=2,
),
chunk(
2,
"interface",
"app.py",
'@app.post("/classify")\ndef classify_email():\n return {}',
start_line=5,
end_line=7,
),
]
graph = CandidateGraphGenerator().generate(repository, facts, chunks)
assert graph[0].name == "Route Incoming Customer Email To The Right Team"
assert "MailRouter. Routes incoming customer email" in graph[0].description
assert '@app.post("/classify")' in graph[0].capabilities[0].description
def test_candidate_generator_prefers_intent_over_derived_scope_chunks():
repository = Repository(
id=1,
name="KeyCape",
url="/tmp/key-cape",
description=None,
branch="main",
status="analyzed",
)
facts = [
fact(1, "intent", "INTENT", "INTENT.md"),
fact(2, "scope", "SCOPE", "SCOPE.md"),
fact(3, "documentation", "README", "README.md"),
]
chunks = [
chunk(
1,
"scope",
"SCOPE.md",
"# SCOPE\nAlready provides deployed IAM runtime behavior.",
end_line=2,
),
chunk(
2,
"intent",
"INTENT.md",
"# INTENT\nDesign a lightweight IAM profile implementation.",
end_line=2,
),
chunk(
3,
"documentation",
"README.md",
"# KeyCape\nREADME fallback should not beat intent.",
end_line=2,
),
]
graph = CandidateGraphGenerator().generate(repository, facts, chunks)
assert graph[0].name == "Design A Lightweight IAM Profile Implementation"
assert "INTENT. Design a lightweight IAM profile implementation" in graph[0].description
assert graph[0].source_refs[0].path == "INTENT.md"
def test_candidate_confidence_scoring_stays_conservative_for_weak_facts():
repository = Repository(
id=1,
name="WeakDocs",
url="/tmp/weak-docs",
description=None,
branch="main",
status="analyzed",
)
graph = CandidateGraphGenerator().generate(
repository,
[fact(1, "documentation", "README", "README.md")],
)
assert graph[0].confidence == 0.45
assert graph[0].capabilities == []
def test_candidate_confidence_scoring_increases_with_supporting_facts():
repository = Repository(
id=1,
name="Supported",
url="/tmp/supported",
description=None,
branch="main",
status="analyzed",
)
facts = [
fact(1, "documentation", "README", "README.md"),
fact(2, "interface", "python route decorator", "app.py", '@app.get("/health")'),
fact(3, "test", "test_app.py", "tests/test_app.py"),
fact(4, "example", "client.py", "examples/client.py"),
fact(5, "framework", "FastAPI", "requirements.txt"),
fact(6, "language", "Python"),
fact(7, "manifest", "requirements.txt", "requirements.txt"),
]
graph = CandidateGraphGenerator().generate(repository, facts)
assert graph[0].confidence == 1.0
assert graph[0].capabilities[0].confidence == 0.85
assert len(graph[0].capabilities) == 1
def test_candidate_generator_names_cli_features_from_nearby_function():
repository = Repository(
id=1,
name="CliTool",
url="/tmp/cli-tool",
description=None,
branch="main",
status="analyzed",
)
facts = [
fact(1, "documentation", "README", "README.md"),
ObservedFact(
id=2,
repository_id=1,
analysis_run_id=1,
snapshot_id=1,
kind="interface",
path="cli.py",
name="python CLI command decorator",
value="@click.command()",
metadata={"line": 3},
),
]
chunks = [
chunk(
1,
"interface",
"cli.py",
"@click.command()\ndef import_repositories():\n pass",
start_line=3,
end_line=5,
)
]
graph = CandidateGraphGenerator().generate(repository, facts, chunks)
feature = graph[0].capabilities[0].features[0]
capability = graph[0].capabilities[0]
assert feature.type == "CLI"
assert feature.name == "CLI command import_repositories"
assert capability.inputs == ["CLI arguments"]
assert capability.outputs == ["command output"]
def test_candidate_generator_uses_generic_io_for_unknown_interfaces():
repository = Repository(
id=1,
name="UnknownInterface",
url="/tmp/unknown-interface",
description=None,
branch="main",
status="analyzed",
)
facts = [
fact(1, "documentation", "README", "README.md"),
fact(2, "interface", "possible surface", "src/plugin.py"),
]
graph = CandidateGraphGenerator().generate(repository, facts)
capability = graph[0].capabilities[0]
assert capability.inputs == ["caller input"]
assert capability.outputs == ["callable interface result"]
def test_candidate_generator_groups_many_interface_facts_into_behavioral_features():
repository = Repository(
id=1,
name="Registry",
url="/tmp/registry",
description=None,
branch="main",
status="analyzed",
)
facts = [
fact(1, "documentation", "README", "README.md"),
fact(2, "interface", "python route decorator", "src/api.py", '@app.get("/repos")'),
fact(3, "interface", "python route decorator", "src/api.py", '@app.post("/repos")'),
fact(
4,
"interface",
"python route decorator",
"src/api.py",
'@app.post("/repos/{repository_id}/analysis-runs")',
),
fact(5, "test", "test_api.py", "tests/test_api.py"),
]
graph = CandidateGraphGenerator().generate(repository, facts)
capability = graph[0].capabilities[0]
assert len(capability.features) == 1
feature = capability.features[0]
assert feature.name == (
"HTTP API surface: GET /repos, POST /repos, POST /repos/{repository_id}/analysis-runs"
)
assert feature.type == "API"
assert feature.location == "src/api.py"
assert len(feature.source_refs) == 3
def test_candidate_generator_maps_llm_provider_facts_to_capability():
repository = Repository(
id=1,
name="LLMConnect",
url="/tmp/llm-connect",
description=None,
branch="main",
status="analyzed",
)
facts = [
fact(1, "documentation", "README", "README.md"),
fact(
2,
"llm_provider",
"OpenRouter",
"providers.py",
"openrouter",
{"source_role": "implementation_source", "utility_relationship": "adapter"},
),
fact(
3,
"llm_provider",
"Claude",
"providers.py",
"claude",
{"source_role": "implementation_source", "utility_relationship": "adapter"},
),
fact(
4,
"credential_config",
"OpenRouter API key",
".env.example",
"OPENROUTER_API_KEY",
{"source_role": "configuration", "utility_relationship": "configure"},
),
fact(
5,
"provider_registry",
"LLM provider registry",
"providers.py",
metadata={"source_role": "implementation_source", "utility_relationship": "adapter"},
),
fact(
6,
"fallback_policy",
"LLM provider fallback policy",
"providers.py",
metadata={"source_role": "implementation_source", "utility_relationship": "adapter"},
),
]
graph = CandidateGraphGenerator().generate(repository, facts)
capability = next(
capability
for capability in graph[0].capabilities
if capability.name == "Route LLM Requests Across Providers"
)
assert graph[0].primary_class == "ai-integration"
assert capability.primary_class == "llm-integration"
assert {"llm-provider", "openrouter", "claude", "fallback-policy"} <= set(
capability.attributes
)
assert {"utility-adapter", "utility-configure"} <= set(capability.attributes)
feature_names = {feature.name for feature in capability.features}
assert {"Use OpenRouter Models", "Use Claude Models"} <= feature_names
assert "Configure LLM Provider Credentials" in feature_names
assert "Maintain LLM Provider Registry" in feature_names
assert "Apply LLM Provider Fallback Policy" in feature_names
openrouter_feature = next(
feature for feature in capability.features if feature.name == "Use OpenRouter Models"
)
assert openrouter_feature.primary_class == "integration"
assert {"llm-provider", "openrouter"} <= set(openrouter_feature.attributes)
def test_candidate_generator_does_not_promote_llm_provider_mentions_to_capability():
repository = Repository(
id=1,
name="MentionOnly",
url="/tmp/mention-only",
description=None,
branch="main",
status="analyzed",
)
facts = [
fact(1, "documentation", "README", "README.md"),
fact(
2,
"llm_provider",
"Claude",
"README.md",
"claude",
{"source_role": "product_documentation", "utility_relationship": "mention"},
),
]
graph = CandidateGraphGenerator().generate(repository, facts)
assert [
capability.name
for capability in graph[0].capabilities
if capability.name == "Route LLM Requests Across Providers"
] == []
def test_candidate_generator_excludes_mention_only_providers_from_promoted_capability():
repository = Repository(
id=1,
name="MixedSignals",
url="/tmp/mixed-signals",
description=None,
branch="main",
status="analyzed",
)
facts = [
fact(1, "documentation", "README", "README.md"),
fact(
2,
"llm_provider",
"OpenRouter",
"src/providers.py",
"openrouter",
{"source_role": "implementation_source", "utility_relationship": "adapter"},
),
fact(
3,
"llm_provider",
"Claude",
"README.md",
"claude",
{"source_role": "product_documentation", "utility_relationship": "mention"},
),
fact(
4,
"provider_registry",
"LLM provider registry",
"src/providers.py",
metadata={"source_role": "implementation_source", "utility_relationship": "adapter"},
),
]
graph = CandidateGraphGenerator().generate(repository, facts)
capability = next(
capability
for capability in graph[0].capabilities
if capability.name == "Route LLM Requests Across Providers"
)
assert "OpenRouter" in capability.description
assert "Claude" not in capability.description
assert {feature.name for feature in capability.features} == {
"Use OpenRouter Models",
"Maintain LLM Provider Registry",
}
assert [ref.path for ref in capability.source_refs] == [
"src/providers.py",
"src/providers.py",
]