from repo_registry.candidate_graph.generator import CandidateGraphGenerator from repo_registry.core.models import ContentChunk, ObservedFact, Repository def fact(id, kind, name, path="", value="", metadata=None): return ObservedFact( id=id, repository_id=1, analysis_run_id=1, snapshot_id=1, kind=kind, path=path, name=name, value=value, metadata=metadata or {}, ) def chunk(id, kind, path, text, start_line=1, end_line=1): return ContentChunk( id=id, repository_id=1, analysis_run_id=1, snapshot_id=1, path=path, kind=kind, start_line=start_line, end_line=end_line, text=text, ) def test_candidate_generator_builds_purpose_seed_from_observed_facts(): repository = Repository( id=1, name="MailRouter", url="/tmp/mail-router", description="Routes incoming customer email to the right team.", branch="main", status="analyzed", ) facts = [ fact(1, "documentation", "README", "README.md"), fact(2, "interface", "python route decorator", "app.py", '@app.post("/classify")'), fact(3, "test", "test_app.py", "tests/test_app.py"), fact(4, "framework", "FastAPI", "requirements.txt"), ] graph = CandidateGraphGenerator().generate(repository, facts) assert len(graph) == 1 ability = graph[0] assert ability.name == "Route Incoming Customer Email To The Right Team" assert "Usefulness" not in ability.name assert ability.primary_class == "developer-tooling" assert "interface" in ability.attributes assert ability.source_refs[0].path == "README.md" interface_capability = ability.capabilities[0] assert interface_capability.name == "Expose Repository Interface" assert interface_capability.primary_class == "interface" assert {"surface", "api", "utility-owned"} <= set(interface_capability.attributes) assert interface_capability.confidence == 0.75 assert interface_capability.inputs == ["HTTP request"] assert interface_capability.outputs == ["HTTP response"] assert interface_capability.features[0].type == "API" assert interface_capability.features[0].primary_class == "API" assert {"API", "surface", "http"} <= set(interface_capability.features[0].attributes) assert interface_capability.features[0].name == "POST /classify" assert interface_capability.features[0].location == "app.py" assert interface_capability.evidence[0].strength == "strong" assert len(ability.capabilities) == 1 def test_candidate_generator_extracts_intended_capability_blocks_from_intent_chunks(): repository = Repository( id=1, name="KeyCape", url="/tmp/key-cape", description=None, branch="main", status="analyzed", ) facts = [ fact( 1, "intent", "INTENT", "INTENT.md", metadata={"source_role": "intent_summary"}, ), fact( 2, "scope", "SCOPE", "SCOPE.md", metadata={"source_role": "derived_scope"}, ), ] chunks = [ chunk( 1, "intent", "INTENT.md", "# INTENT\n\n" "Lightweight IAM for small deployments.\n\n" "## Intended Capabilities\n\n" "- Enforce OIDC PKCE profiles: reject unsafe client profiles.\n" "- Validate LDAP schema migrations.\n", ), chunk( 2, "scope", "SCOPE.md", "# SCOPE\n\n## Intended Capabilities\n\n- Route LLM provider requests.\n", ), ] graph = CandidateGraphGenerator().generate(repository, facts, chunks) capability_names = {capability.name for capability in graph[0].capabilities} assert "Enforce OIDC PKCE Profiles" in capability_names assert "Validate LDAP Schema Migrations" in capability_names assert "Route LLM Provider Requests" not in capability_names intent_capability = next( capability for capability in graph[0].capabilities if capability.name == "Enforce OIDC PKCE Profiles" ) assert intent_capability.primary_class == "intent-capability" assert { "intent-derived", "utility-owned", "review-required-intent", } <= set(intent_capability.attributes) assert [ref.path for ref in intent_capability.source_refs] == ["INTENT.md"] def test_candidate_generator_prefers_intent_over_derived_scope_for_ability_name(): repository = Repository( id=1, name="LLMConnect", url="/tmp/llm-connect", description=None, branch="main", status="analyzed", ) facts = [ fact( 1, "intent", "INTENT", "INTENT.md", metadata={"source_role": "intent_summary"}, ), fact( 2, "documentation", "SCOPE", "SCOPE.md", metadata={"source_role": "derived_scope"}, ), ] chunks = [ chunk( 1, "documentation", "SCOPE.md", "# SCOPE\n\nA stale first paragraph copied from another repository.", ), chunk( 2, "intent", "INTENT.md", "# INTENT\n\nProvide a provider-agnostic LLM connector.", ), ] chunks[0].metadata["source_role"] = "derived_scope" chunks[1].metadata["source_role"] = "intent_summary" graph = CandidateGraphGenerator().generate(repository, facts, chunks) assert graph[0].name == "Provide A Provider-agnostic LLM Connector" def test_candidate_generator_enriches_descriptions_from_content_chunks(): repository = Repository( id=1, name="MailRouter", url="/tmp/mail-router", description=None, branch="main", status="analyzed", ) facts = [ fact(1, "documentation", "README", "README.md"), fact(2, "interface", "python route decorator", "app.py", '@app.post("/classify")'), ] chunks = [ chunk( 1, "documentation", "README.md", "# MailRouter\nRoutes incoming customer email to the right team.", end_line=2, ), chunk( 2, "interface", "app.py", '@app.post("/classify")\ndef classify_email():\n return {}', start_line=5, end_line=7, ), ] graph = CandidateGraphGenerator().generate(repository, facts, chunks) assert graph[0].name == "Route Incoming Customer Email To The Right Team" assert "MailRouter. Routes incoming customer email" in graph[0].description assert '@app.post("/classify")' in graph[0].capabilities[0].description def test_candidate_generator_prefers_intent_over_derived_scope_chunks(): repository = Repository( id=1, name="KeyCape", url="/tmp/key-cape", description=None, branch="main", status="analyzed", ) facts = [ fact(1, "intent", "INTENT", "INTENT.md"), fact(2, "scope", "SCOPE", "SCOPE.md"), fact(3, "documentation", "README", "README.md"), ] chunks = [ chunk( 1, "scope", "SCOPE.md", "# SCOPE\nAlready provides deployed IAM runtime behavior.", end_line=2, ), chunk( 2, "intent", "INTENT.md", "# INTENT\nDesign a lightweight IAM profile implementation.", end_line=2, ), chunk( 3, "documentation", "README.md", "# KeyCape\nREADME fallback should not beat intent.", end_line=2, ), ] graph = CandidateGraphGenerator().generate(repository, facts, chunks) assert graph[0].name == "Design A Lightweight IAM Profile Implementation" assert "INTENT. Design a lightweight IAM profile implementation" in graph[0].description assert graph[0].source_refs[0].path == "INTENT.md" def test_candidate_confidence_scoring_stays_conservative_for_weak_facts(): repository = Repository( id=1, name="WeakDocs", url="/tmp/weak-docs", description=None, branch="main", status="analyzed", ) graph = CandidateGraphGenerator().generate( repository, [fact(1, "documentation", "README", "README.md")], ) assert graph[0].confidence == 0.45 assert graph[0].capabilities == [] def test_candidate_confidence_scoring_increases_with_supporting_facts(): repository = Repository( id=1, name="Supported", url="/tmp/supported", description=None, branch="main", status="analyzed", ) facts = [ fact(1, "documentation", "README", "README.md"), fact(2, "interface", "python route decorator", "app.py", '@app.get("/health")'), fact(3, "test", "test_app.py", "tests/test_app.py"), fact(4, "example", "client.py", "examples/client.py"), fact(5, "framework", "FastAPI", "requirements.txt"), fact(6, "language", "Python"), fact(7, "manifest", "requirements.txt", "requirements.txt"), ] graph = CandidateGraphGenerator().generate(repository, facts) assert graph[0].confidence == 1.0 assert graph[0].capabilities[0].confidence == 0.85 assert len(graph[0].capabilities) == 1 def test_candidate_generator_names_cli_features_from_nearby_function(): repository = Repository( id=1, name="CliTool", url="/tmp/cli-tool", description=None, branch="main", status="analyzed", ) facts = [ fact(1, "documentation", "README", "README.md"), ObservedFact( id=2, repository_id=1, analysis_run_id=1, snapshot_id=1, kind="interface", path="cli.py", name="python CLI command decorator", value="@click.command()", metadata={"line": 3}, ), ] chunks = [ chunk( 1, "interface", "cli.py", "@click.command()\ndef import_repositories():\n pass", start_line=3, end_line=5, ) ] graph = CandidateGraphGenerator().generate(repository, facts, chunks) feature = graph[0].capabilities[0].features[0] capability = graph[0].capabilities[0] assert feature.type == "CLI" assert feature.name == "CLI command import_repositories" assert capability.inputs == ["CLI arguments"] assert capability.outputs == ["command output"] def test_candidate_generator_uses_generic_io_for_unknown_interfaces(): repository = Repository( id=1, name="UnknownInterface", url="/tmp/unknown-interface", description=None, branch="main", status="analyzed", ) facts = [ fact(1, "documentation", "README", "README.md"), fact(2, "interface", "possible surface", "src/plugin.py"), ] graph = CandidateGraphGenerator().generate(repository, facts) capability = graph[0].capabilities[0] assert capability.inputs == ["caller input"] assert capability.outputs == ["callable interface result"] def test_candidate_generator_groups_many_interface_facts_into_behavioral_features(): repository = Repository( id=1, name="Registry", url="/tmp/registry", description=None, branch="main", status="analyzed", ) facts = [ fact(1, "documentation", "README", "README.md"), fact(2, "interface", "python route decorator", "src/api.py", '@app.get("/repos")'), fact(3, "interface", "python route decorator", "src/api.py", '@app.post("/repos")'), fact( 4, "interface", "python route decorator", "src/api.py", '@app.post("/repos/{repository_id}/analysis-runs")', ), fact(5, "test", "test_api.py", "tests/test_api.py"), ] graph = CandidateGraphGenerator().generate(repository, facts) capability = graph[0].capabilities[0] assert len(capability.features) == 1 feature = capability.features[0] assert feature.name == ( "HTTP API surface: GET /repos, POST /repos, POST /repos/{repository_id}/analysis-runs" ) assert feature.type == "API" assert feature.location == "src/api.py" assert len(feature.source_refs) == 3 def test_candidate_generator_maps_llm_provider_facts_to_capability(): repository = Repository( id=1, name="LLMConnect", url="/tmp/llm-connect", description=None, branch="main", status="analyzed", ) facts = [ fact(1, "documentation", "README", "README.md"), fact( 2, "llm_provider", "OpenRouter", "providers.py", "openrouter", {"source_role": "implementation_source", "utility_relationship": "adapter"}, ), fact( 3, "llm_provider", "Claude", "providers.py", "claude", {"source_role": "implementation_source", "utility_relationship": "adapter"}, ), fact( 4, "credential_config", "OpenRouter API key", ".env.example", "OPENROUTER_API_KEY", {"source_role": "configuration", "utility_relationship": "configure"}, ), fact( 5, "provider_registry", "LLM provider registry", "providers.py", metadata={"source_role": "implementation_source", "utility_relationship": "adapter"}, ), fact( 6, "fallback_policy", "LLM provider fallback policy", "providers.py", metadata={"source_role": "implementation_source", "utility_relationship": "adapter"}, ), ] graph = CandidateGraphGenerator().generate(repository, facts) capability = next( capability for capability in graph[0].capabilities if capability.name == "Route LLM Requests Across Providers" ) assert graph[0].primary_class == "ai-integration" assert capability.primary_class == "llm-integration" assert {"llm-provider", "openrouter", "claude", "fallback-policy"} <= set( capability.attributes ) assert {"utility-adapter", "utility-configure"} <= set(capability.attributes) feature_names = {feature.name for feature in capability.features} assert {"Use OpenRouter Models", "Use Claude Models"} <= feature_names assert "Configure LLM Provider Credentials" in feature_names assert "Maintain LLM Provider Registry" in feature_names assert "Apply LLM Provider Fallback Policy" in feature_names openrouter_feature = next( feature for feature in capability.features if feature.name == "Use OpenRouter Models" ) assert openrouter_feature.primary_class == "integration" assert {"llm-provider", "openrouter"} <= set(openrouter_feature.attributes) def test_candidate_generator_does_not_promote_llm_provider_mentions_to_capability(): repository = Repository( id=1, name="MentionOnly", url="/tmp/mention-only", description=None, branch="main", status="analyzed", ) facts = [ fact(1, "documentation", "README", "README.md"), fact( 2, "llm_provider", "Claude", "README.md", "claude", {"source_role": "product_documentation", "utility_relationship": "mention"}, ), ] graph = CandidateGraphGenerator().generate(repository, facts) assert [ capability.name for capability in graph[0].capabilities if capability.name == "Route LLM Requests Across Providers" ] == [] def test_candidate_generator_excludes_mention_only_providers_from_promoted_capability(): repository = Repository( id=1, name="MixedSignals", url="/tmp/mixed-signals", description=None, branch="main", status="analyzed", ) facts = [ fact(1, "documentation", "README", "README.md"), fact( 2, "llm_provider", "OpenRouter", "src/providers.py", "openrouter", {"source_role": "implementation_source", "utility_relationship": "adapter"}, ), fact( 3, "llm_provider", "Claude", "README.md", "claude", {"source_role": "product_documentation", "utility_relationship": "mention"}, ), fact( 4, "provider_registry", "LLM provider registry", "src/providers.py", metadata={"source_role": "implementation_source", "utility_relationship": "adapter"}, ), ] graph = CandidateGraphGenerator().generate(repository, facts) capability = next( capability for capability in graph[0].capabilities if capability.name == "Route LLM Requests Across Providers" ) assert "OpenRouter" in capability.description assert "Claude" not in capability.description assert {feature.name for feature in capability.features} == { "Use OpenRouter Models", "Maintain LLM Provider Registry", } assert [ref.path for ref in capability.source_refs] == [ "src/providers.py", "src/providers.py", ]