from repo_registry.candidate_graph.generator import CandidateGraphGenerator from repo_registry.core.models import ContentChunk, ObservedFact, Repository def fact(id, kind, name, path="", value=""): return ObservedFact( id=id, repository_id=1, analysis_run_id=1, snapshot_id=1, kind=kind, path=path, name=name, value=value, metadata={}, ) def chunk(id, kind, path, text, start_line=1, end_line=1): return ContentChunk( id=id, repository_id=1, analysis_run_id=1, snapshot_id=1, path=path, kind=kind, start_line=start_line, end_line=end_line, text=text, ) def test_candidate_generator_builds_purpose_seed_from_observed_facts(): repository = Repository( id=1, name="MailRouter", url="/tmp/mail-router", description="Routes incoming customer email to the right team.", branch="main", status="analyzed", ) facts = [ fact(1, "documentation", "README", "README.md"), fact(2, "interface", "python route decorator", "app.py", '@app.post("/classify")'), fact(3, "test", "test_app.py", "tests/test_app.py"), fact(4, "framework", "FastAPI", "requirements.txt"), ] graph = CandidateGraphGenerator().generate(repository, facts) assert len(graph) == 1 ability = graph[0] assert ability.name == "Route Incoming Customer Email To The Right Team" assert "Usefulness" not in ability.name assert ability.source_refs[0].path == "README.md" interface_capability = ability.capabilities[0] assert interface_capability.name == "Expose Repository Interface" assert interface_capability.confidence == 0.75 assert interface_capability.inputs == ["HTTP request"] assert interface_capability.outputs == ["HTTP response"] assert interface_capability.features[0].type == "API" assert interface_capability.features[0].name == "POST /classify" assert interface_capability.features[0].location == "app.py" assert interface_capability.evidence[0].strength == "strong" def test_candidate_generator_enriches_descriptions_from_content_chunks(): repository = Repository( id=1, name="MailRouter", url="/tmp/mail-router", description=None, branch="main", status="analyzed", ) facts = [ fact(1, "documentation", "README", "README.md"), fact(2, "interface", "python route decorator", "app.py", '@app.post("/classify")'), ] chunks = [ chunk( 1, "documentation", "README.md", "# MailRouter\nRoutes incoming customer email to the right team.", end_line=2, ), chunk( 2, "interface", "app.py", '@app.post("/classify")\ndef classify_email():\n return {}', start_line=5, end_line=7, ), ] graph = CandidateGraphGenerator().generate(repository, facts, chunks) assert graph[0].name == "Route Incoming Customer Email To The Right Team" assert "MailRouter. Routes incoming customer email" in graph[0].description assert '@app.post("/classify")' in graph[0].capabilities[0].description def test_candidate_confidence_scoring_stays_conservative_for_weak_facts(): repository = Repository( id=1, name="WeakDocs", url="/tmp/weak-docs", description=None, branch="main", status="analyzed", ) graph = CandidateGraphGenerator().generate( repository, [fact(1, "documentation", "README", "README.md")], ) assert graph[0].confidence == 0.45 assert graph[0].capabilities == [] def test_candidate_confidence_scoring_increases_with_supporting_facts(): repository = Repository( id=1, name="Supported", url="/tmp/supported", description=None, branch="main", status="analyzed", ) facts = [ fact(1, "documentation", "README", "README.md"), fact(2, "interface", "python route decorator", "app.py", '@app.get("/health")'), fact(3, "test", "test_app.py", "tests/test_app.py"), fact(4, "example", "client.py", "examples/client.py"), fact(5, "framework", "FastAPI", "requirements.txt"), fact(6, "language", "Python"), fact(7, "manifest", "requirements.txt", "requirements.txt"), ] graph = CandidateGraphGenerator().generate(repository, facts) assert graph[0].confidence == 1.0 assert graph[0].capabilities[0].confidence == 0.85 assert graph[0].capabilities[1].confidence == 0.75 def test_candidate_generator_names_cli_features_from_nearby_function(): repository = Repository( id=1, name="CliTool", url="/tmp/cli-tool", description=None, branch="main", status="analyzed", ) facts = [ fact(1, "documentation", "README", "README.md"), ObservedFact( id=2, repository_id=1, analysis_run_id=1, snapshot_id=1, kind="interface", path="cli.py", name="python CLI command decorator", value="@click.command()", metadata={"line": 3}, ), ] chunks = [ chunk( 1, "interface", "cli.py", "@click.command()\ndef import_repositories():\n pass", start_line=3, end_line=5, ) ] graph = CandidateGraphGenerator().generate(repository, facts, chunks) feature = graph[0].capabilities[0].features[0] capability = graph[0].capabilities[0] assert feature.type == "CLI" assert feature.name == "CLI command import_repositories" assert capability.inputs == ["CLI arguments"] assert capability.outputs == ["command output"] def test_candidate_generator_uses_generic_io_for_unknown_interfaces(): repository = Repository( id=1, name="UnknownInterface", url="/tmp/unknown-interface", description=None, branch="main", status="analyzed", ) facts = [ fact(1, "documentation", "README", "README.md"), fact(2, "interface", "possible surface", "src/plugin.py"), ] graph = CandidateGraphGenerator().generate(repository, facts) capability = graph[0].capabilities[0] assert capability.inputs == ["caller input"] assert capability.outputs == ["callable interface result"] def test_candidate_generator_groups_many_interface_facts_into_behavioral_features(): repository = Repository( id=1, name="Registry", url="/tmp/registry", description=None, branch="main", status="analyzed", ) facts = [ fact(1, "documentation", "README", "README.md"), fact(2, "interface", "python route decorator", "src/api.py", '@app.get("/repos")'), fact(3, "interface", "python route decorator", "src/api.py", '@app.post("/repos")'), fact( 4, "interface", "python route decorator", "src/api.py", '@app.post("/repos/{repository_id}/analysis-runs")', ), fact(5, "test", "test_api.py", "tests/test_api.py"), ] graph = CandidateGraphGenerator().generate(repository, facts) capability = graph[0].capabilities[0] assert len(capability.features) == 1 feature = capability.features[0] assert feature.name == ( "HTTP API surface: GET /repos, POST /repos, POST /repos/{repository_id}/analysis-runs" ) assert feature.type == "API" assert feature.location == "src/api.py" assert len(feature.source_refs) == 3 def test_candidate_generator_maps_llm_provider_facts_to_capability(): repository = Repository( id=1, name="LLMConnect", url="/tmp/llm-connect", description=None, branch="main", status="analyzed", ) facts = [ fact(1, "documentation", "README", "README.md"), fact(2, "llm_provider", "OpenRouter", "providers.py", "openrouter"), fact(3, "llm_provider", "Claude", "providers.py", "claude"), fact(4, "credential_config", "OpenRouter API key", ".env.example", "OPENROUTER_API_KEY"), fact(5, "provider_registry", "LLM provider registry", "providers.py"), fact(6, "fallback_policy", "LLM provider fallback policy", "providers.py"), ] graph = CandidateGraphGenerator().generate(repository, facts) capability = next( capability for capability in graph[0].capabilities if capability.name == "Route LLM Requests Across Providers" ) feature_names = {feature.name for feature in capability.features} assert {"Use OpenRouter Models", "Use Claude Models"} <= feature_names assert "Configure LLM Provider Credentials" in feature_names assert "Maintain LLM Provider Registry" in feature_names assert "Apply LLM Provider Fallback Policy" in feature_names