from repo_scoping.candidate_graph.generator import CandidateGraphGenerator from repo_scoping.core.models import ContentChunk, ObservedFact, Repository def fact(id, kind, name, path="", value="", metadata=None): return ObservedFact( id=id, repository_id=1, analysis_run_id=1, snapshot_id=1, kind=kind, path=path, name=name, value=value, metadata=metadata or {}, ) def chunk(id, kind, path, text, start_line=1, end_line=1): return ContentChunk( id=id, repository_id=1, analysis_run_id=1, snapshot_id=1, path=path, kind=kind, start_line=start_line, end_line=end_line, text=text, ) def test_candidate_generator_builds_purpose_seed_from_observed_facts(): repository = Repository( id=1, name="MailRouter", url="/tmp/mail-router", description="Routes incoming customer email to the right team.", branch="main", status="analyzed", ) facts = [ fact(1, "documentation", "README", "README.md"), fact(2, "interface", "python route decorator", "app.py", '@app.post("/classify")'), fact(3, "test", "test_app.py", "tests/test_app.py"), fact(4, "framework", "FastAPI", "requirements.txt"), ] graph = CandidateGraphGenerator().generate(repository, facts) assert len(graph) == 1 ability = graph[0] assert ability.name == "Route Incoming Customer Email To The Right Team" assert "Usefulness" not in ability.name assert ability.primary_class == "developer-tooling" assert "interface" in ability.attributes assert ability.source_refs[0].path == "README.md" interface_capability = ability.capabilities[0] assert interface_capability.name == "Expose Repository Interface" assert interface_capability.primary_class == "interface" assert {"surface", "api", "utility-owned"} <= set(interface_capability.attributes) assert interface_capability.confidence == 0.75 assert interface_capability.inputs == ["HTTP request"] assert interface_capability.outputs == ["HTTP response"] assert interface_capability.features[0].type == "API" assert interface_capability.features[0].primary_class == "API" assert {"API", "surface", "http"} <= set(interface_capability.features[0].attributes) assert interface_capability.features[0].name == "POST /classify" assert interface_capability.features[0].location == "app.py" assert interface_capability.evidence[0].strength == "strong" assert len(ability.capabilities) == 1 def test_candidate_generator_extracts_intended_capability_blocks_from_intent_chunks(): repository = Repository( id=1, name="KeyCape", url="/tmp/key-cape", description=None, branch="main", status="analyzed", ) facts = [ fact( 1, "intent", "INTENT", "INTENT.md", metadata={"source_role": "intent_summary"}, ), fact( 2, "scope", "SCOPE", "SCOPE.md", metadata={"source_role": "derived_scope"}, ), ] chunks = [ chunk( 1, "intent", "INTENT.md", "# INTENT\n\n" "Lightweight IAM for small deployments.\n\n" "## Intended Capabilities\n\n" "- Enforce OIDC PKCE profiles: reject unsafe client profiles.\n" "- Validate LDAP schema migrations.\n", ), chunk( 2, "scope", "SCOPE.md", "# SCOPE\n\n## Intended Capabilities\n\n- Route LLM provider requests.\n", ), ] graph = CandidateGraphGenerator().generate(repository, facts, chunks) capability_names = {capability.name for capability in graph[0].capabilities} assert "Enforce OIDC PKCE Profiles" in capability_names assert "Validate LDAP Schema Migrations" in capability_names assert "Route LLM Provider Requests" not in capability_names intent_capability = next( capability for capability in graph[0].capabilities if capability.name == "Enforce OIDC PKCE Profiles" ) assert intent_capability.primary_class == "intent-capability" assert { "intent-derived", "utility-owned", "review-required-intent", } <= set(intent_capability.attributes) assert [ref.path for ref in intent_capability.source_refs] == ["INTENT.md"] def test_candidate_generator_preserves_unicode_and_normalizes_analysis_names(): repository = Repository( id=1, name="VergabeTeilnahme", url="/tmp/vergabe-teilnahme", description=None, branch="main", status="analyzed", ) facts = [ fact( 1, "intent", "INTENT", "INTENT.md", metadata={"source_role": "intent_summary"}, ) ] chunks = [ chunk( 1, "intent", "INTENT.md", "# INTENT\n\n" "Vollständiger Implementierungsplan in 12 Ralph-Loop-Workplans.\n\n" "## Intended Capabilities\n\n" "- Analysis of impact risk and dependency chains.\n", ) ] graph = CandidateGraphGenerator().generate(repository, facts, chunks) assert graph[0].name == "Vollständiger Implementierungsplan In 12 Ralph-Loop-Workplans" assert graph[0].capabilities[0].name == "Analyze Impact Risk And Dependency Chains" def test_candidate_generator_extracts_primary_outcome_subsections_from_intent(): repository = Repository( id=1, name="HelixForge", url="/tmp/helix-forge", description=None, branch="main", status="analyzed", ) facts = [ fact( 1, "intent", "INTENT", "INTENT.md", metadata={"source_role": "intent_summary"}, ) ] chunks = [ chunk( 1, "intent", "INTENT.md", "# INTENT\n\n" "HelixForge turns intent into structure.\n\n" "## 4\\. Primary outcomes\n\n" "### 4.1 Capability discovery\n\n" "Clarify scope and ownership.\n\n" "### 4.2 Capability validation\n\n" "Validate architecture descriptions structurally and semantically.\n\n" "## Architectural foundation\n\n" "This section should not become a capability.\n", ) ] graph = CandidateGraphGenerator().generate(repository, facts, chunks) capability_names = {capability.name for capability in graph[0].capabilities} assert "Support Capability Discovery" in capability_names assert "Validate Capabilities" in capability_names assert "Architectural Foundation" not in capability_names def test_candidate_generator_prefers_intent_over_derived_scope_for_ability_name(): repository = Repository( id=1, name="LLMConnect", url="/tmp/llm-connect", description=None, branch="main", status="analyzed", ) facts = [ fact( 1, "intent", "INTENT", "INTENT.md", metadata={"source_role": "intent_summary"}, ), fact( 2, "documentation", "SCOPE", "SCOPE.md", metadata={"source_role": "derived_scope"}, ), ] chunks = [ chunk( 1, "documentation", "SCOPE.md", "# SCOPE\n\nA stale first paragraph copied from another repository.", ), chunk( 2, "intent", "INTENT.md", "# INTENT\n\nProvide a provider-agnostic LLM connector.", ), ] chunks[0].metadata["source_role"] = "derived_scope" chunks[1].metadata["source_role"] = "intent_summary" graph = CandidateGraphGenerator().generate(repository, facts, chunks) assert graph[0].name == "Provide A Provider-agnostic LLM Connector" def test_candidate_generator_uses_scope_one_liner_over_template_readme(): repository = Repository( id=1, name="ops-warden", url="/tmp/ops-warden", description=None, branch="main", status="analyzed", ) facts = [ fact(1, "documentation", "README", "README.md"), fact(2, "scope", "SCOPE", "SCOPE.md", metadata={"source_role": "derived_scope"}), ] chunks = [ chunk( 1, "documentation", "README.md", "# repo-seed\nA git repository template to bootstrap coulomb projects from.", end_line=2, ), chunk( 2, "scope", "SCOPE.md", "# SCOPE\n\n## One-liner\n" "SSH Certificate Authority and credential issuance for the ops fleet.\n", end_line=4, ), ] chunks[1].metadata["source_role"] = "derived_scope" graph = CandidateGraphGenerator().generate(repository, facts, chunks) assert graph[0].name == "SSH Certificate Authority And Credential Issuance For The Ops Fleet" assert "repo-seed" not in graph[0].description def test_candidate_generator_extracts_current_capabilities_from_scope_blocks(): repository = Repository( id=1, name="railiance-apps", url="/tmp/railiance-apps", description=None, branch="main", status="analyzed", ) facts = [ fact(1, "scope", "SCOPE", "SCOPE.md", metadata={"source_role": "derived_scope"}), ] chunks = [ chunk( 1, "scope", "SCOPE.md", "# SCOPE\n\n## One-liner\n" "S5 Workloads and Experience layer of the Railiance OAS Stack -- owns applications.\n\n" "## Provided Capabilities\n\n" "```capability\n" "type: infrastructure\n" "title: Application workload deployment\n" "description: Deploy and manage user-facing applications as Helm releases.\n" "keywords: [gitea, helm, application]\n" "```\n", end_line=12, ), ] chunks[0].metadata["source_role"] = "derived_scope" graph = CandidateGraphGenerator().generate(repository, facts, chunks) ability = graph[0] assert ability.name == "S5 Workloads And Experience Layer Of The Railiance OAS Stack" assert ability.name == "S5 Workloads And Experience Layer Of The Railiance OAS Stack" capability = ability.capabilities[0] assert capability.name == "Application workload deployment" assert capability.primary_class == "infrastructure" assert {"scope-derived", "current-state", "review-required-scope"} <= set( capability.attributes ) assert capability.features[0].name == "Application workload deployment" assert capability.features[0].location == "SCOPE.md" assert capability.evidence[0].reference == "SCOPE.md" def test_candidate_generator_adds_fact_derived_capability_when_no_stronger_layers(): repository = Repository( id=1, name="railiance-empty-layer", url="/tmp/railiance-empty-layer", description=None, branch="main", status="analyzed", ) facts = [ fact(1, "config", "sops config", ".sops.yaml"), fact(2, "manifest", "pyproject.toml", "pyproject.toml"), ] graph = CandidateGraphGenerator().generate(repository, facts) capability = graph[0].capabilities[0] assert capability.name == "Manage Repository Configuration" assert capability.primary_class == "fact-derived" assert {feature.type for feature in capability.features} == { "configuration", "manifest", } def test_candidate_generator_enriches_descriptions_from_content_chunks(): repository = Repository( id=1, name="MailRouter", url="/tmp/mail-router", description=None, branch="main", status="analyzed", ) facts = [ fact(1, "documentation", "README", "README.md"), fact(2, "interface", "python route decorator", "app.py", '@app.post("/classify")'), ] chunks = [ chunk( 1, "documentation", "README.md", "# MailRouter\nRoutes incoming customer email to the right team.", end_line=2, ), chunk( 2, "interface", "app.py", '@app.post("/classify")\ndef classify_email():\n return {}', start_line=5, end_line=7, ), ] graph = CandidateGraphGenerator().generate(repository, facts, chunks) assert graph[0].name == "Route Incoming Customer Email To The Right Team" assert "MailRouter. Routes incoming customer email" in graph[0].description assert '@app.post("/classify")' in graph[0].capabilities[0].description def test_candidate_generator_prefers_intent_over_derived_scope_chunks(): repository = Repository( id=1, name="KeyCape", url="/tmp/key-cape", description=None, branch="main", status="analyzed", ) facts = [ fact(1, "intent", "INTENT", "INTENT.md"), fact(2, "scope", "SCOPE", "SCOPE.md"), fact(3, "documentation", "README", "README.md"), ] chunks = [ chunk( 1, "scope", "SCOPE.md", "# SCOPE\nAlready provides deployed IAM runtime behavior.", end_line=2, ), chunk( 2, "intent", "INTENT.md", "# INTENT\nDesign a lightweight IAM profile implementation.", end_line=2, ), chunk( 3, "documentation", "README.md", "# KeyCape\nREADME fallback should not beat intent.", end_line=2, ), ] graph = CandidateGraphGenerator().generate(repository, facts, chunks) assert graph[0].name == "Design A Lightweight IAM Profile Implementation" assert "INTENT. Design a lightweight IAM profile implementation" in graph[0].description assert graph[0].source_refs[0].path == "INTENT.md" def test_candidate_confidence_scoring_stays_conservative_for_weak_facts(): repository = Repository( id=1, name="WeakDocs", url="/tmp/weak-docs", description=None, branch="main", status="analyzed", ) graph = CandidateGraphGenerator().generate( repository, [fact(1, "documentation", "README", "README.md")], ) assert graph[0].confidence == 0.45 assert graph[0].capabilities == [] def test_candidate_confidence_scoring_increases_with_supporting_facts(): repository = Repository( id=1, name="Supported", url="/tmp/supported", description=None, branch="main", status="analyzed", ) facts = [ fact(1, "documentation", "README", "README.md"), fact(2, "interface", "python route decorator", "app.py", '@app.get("/health")'), fact(3, "test", "test_app.py", "tests/test_app.py"), fact(4, "example", "client.py", "examples/client.py"), fact(5, "framework", "FastAPI", "requirements.txt"), fact(6, "language", "Python"), fact(7, "manifest", "requirements.txt", "requirements.txt"), ] graph = CandidateGraphGenerator().generate(repository, facts) assert graph[0].confidence == 1.0 assert graph[0].capabilities[0].confidence == 0.85 assert len(graph[0].capabilities) == 1 def test_candidate_generator_names_cli_features_from_nearby_function(): repository = Repository( id=1, name="CliTool", url="/tmp/cli-tool", description=None, branch="main", status="analyzed", ) facts = [ fact(1, "documentation", "README", "README.md"), ObservedFact( id=2, repository_id=1, analysis_run_id=1, snapshot_id=1, kind="interface", path="cli.py", name="python CLI command decorator", value="@click.command()", metadata={"line": 3}, ), ] chunks = [ chunk( 1, "interface", "cli.py", "@click.command()\ndef import_repositories():\n pass", start_line=3, end_line=5, ) ] graph = CandidateGraphGenerator().generate(repository, facts, chunks) feature = graph[0].capabilities[0].features[0] capability = graph[0].capabilities[0] assert feature.type == "CLI" assert feature.name == "CLI command import_repositories" assert capability.inputs == ["CLI arguments"] assert capability.outputs == ["command output"] def test_candidate_generator_uses_generic_io_for_unknown_interfaces(): repository = Repository( id=1, name="UnknownInterface", url="/tmp/unknown-interface", description=None, branch="main", status="analyzed", ) facts = [ fact(1, "documentation", "README", "README.md"), fact(2, "interface", "possible surface", "src/plugin.py"), ] graph = CandidateGraphGenerator().generate(repository, facts) capability = graph[0].capabilities[0] assert capability.inputs == ["caller input"] assert capability.outputs == ["callable interface result"] def test_candidate_generator_groups_many_interface_facts_into_behavioral_features(): repository = Repository( id=1, name="Registry", url="/tmp/registry", description=None, branch="main", status="analyzed", ) facts = [ fact(1, "documentation", "README", "README.md"), fact(2, "interface", "python route decorator", "src/api.py", '@app.get("/repos")'), fact(3, "interface", "python route decorator", "src/api.py", '@app.post("/repos")'), fact( 4, "interface", "python route decorator", "src/api.py", '@app.post("/repos/{repository_id}/analysis-runs")', ), fact(5, "test", "test_api.py", "tests/test_api.py"), ] graph = CandidateGraphGenerator().generate(repository, facts) capability = graph[0].capabilities[0] assert len(capability.features) == 1 feature = capability.features[0] assert feature.name == ( "HTTP API surface: GET /repos, POST /repos, POST /repos/{repository_id}/analysis-runs" ) assert feature.type == "API" assert feature.location == "src/api.py" assert len(feature.source_refs) == 3 def test_candidate_generator_maps_llm_provider_facts_to_capability(): repository = Repository( id=1, name="LLMConnect", url="/tmp/llm-connect", description=None, branch="main", status="analyzed", ) facts = [ fact(1, "documentation", "README", "README.md"), fact( 2, "llm_provider", "OpenRouter", "providers.py", "openrouter", {"source_role": "implementation_source", "utility_relationship": "adapter"}, ), fact( 3, "llm_provider", "Claude", "providers.py", "claude", {"source_role": "implementation_source", "utility_relationship": "adapter"}, ), fact( 4, "credential_config", "OpenRouter API key", ".env.example", "OPENROUTER_API_KEY", {"source_role": "configuration", "utility_relationship": "configure"}, ), fact( 5, "provider_registry", "LLM provider registry", "providers.py", metadata={"source_role": "implementation_source", "utility_relationship": "adapter"}, ), fact( 6, "fallback_policy", "LLM provider fallback policy", "providers.py", metadata={"source_role": "implementation_source", "utility_relationship": "adapter"}, ), ] graph = CandidateGraphGenerator().generate(repository, facts) capability = next( capability for capability in graph[0].capabilities if capability.name == "Route LLM Requests Across Providers" ) assert graph[0].primary_class == "ai-integration" assert capability.primary_class == "llm-integration" assert {"llm-provider", "openrouter", "claude", "fallback-policy"} <= set( capability.attributes ) assert {"utility-adapter", "utility-configure"} <= set(capability.attributes) feature_names = {feature.name for feature in capability.features} assert {"Use OpenRouter Models", "Use Claude Models"} <= feature_names assert "Configure LLM Provider Credentials" in feature_names assert "Maintain LLM Provider Registry" in feature_names assert "Apply LLM Provider Fallback Policy" in feature_names openrouter_feature = next( feature for feature in capability.features if feature.name == "Use OpenRouter Models" ) assert openrouter_feature.primary_class == "integration" assert {"llm-provider", "openrouter"} <= set(openrouter_feature.attributes) def test_candidate_generator_does_not_promote_llm_provider_mentions_to_capability(): repository = Repository( id=1, name="MentionOnly", url="/tmp/mention-only", description=None, branch="main", status="analyzed", ) facts = [ fact(1, "documentation", "README", "README.md"), fact( 2, "llm_provider", "Claude", "README.md", "claude", {"source_role": "product_documentation", "utility_relationship": "mention"}, ), ] graph = CandidateGraphGenerator().generate(repository, facts) assert [ capability.name for capability in graph[0].capabilities if capability.name == "Route LLM Requests Across Providers" ] == [] def test_candidate_generator_does_not_promote_owned_provider_vocabulary_to_capability(): repository = Repository( id=1, name="RepoScoping", url="/tmp/repo-scoping", description="Maps repositories into reviewable capability graphs.", branch="main", status="analyzed", ) facts = [ fact(1, "documentation", "README", "README.md"), fact(2, "interface", "python route decorator", "src/api.py", '@app.get("/repos")'), fact( 3, "llm_provider", "OpenRouter", "src/repo_scoping/repo_scanning/scanner.py", "openrouter", {"source_role": "implementation_source", "utility_relationship": "owned"}, ), fact( 4, "provider_registry", "LLM provider registry", "src/repo_scoping/repo_scanning/scanner.py", metadata={ "source_role": "implementation_source", "utility_relationship": "owned", }, ), fact( 5, "credential_config", "OpenRouter API key", "src/repo_scoping/repo_scanning/scanner.py", "OPENROUTER_API_KEY", {"source_role": "implementation_source", "utility_relationship": "configure"}, ), ] graph = CandidateGraphGenerator().generate(repository, facts) capability_names = {capability.name for capability in graph[0].capabilities} assert "Route LLM Requests Across Providers" not in capability_names assert "Scan Repositories Into Observed Facts" in capability_names def test_candidate_generator_recovers_repo_scoping_native_candidate_families(): repository = Repository( id=1, name="repo-scoping", url="/tmp/repo-scoping", description="Maps repositories into reviewable capability graphs.", branch="main", status="analyzed", ) facts = [ fact(1, "documentation", "README", "README.md"), fact(2, "documentation", "api-contract.md", "docs/api-contract.md"), fact( 3, "documentation", "characteristic-evidence-model.md", "docs/characteristic-evidence-model.md", ), fact(4, "documentation", "scope-md-spec.md", "docs/scope-md-spec.md"), fact( 5, "documentation", "dependency-aware-scope-propagation.md", "docs/dependency-aware-scope-propagation.md", ), fact( 6, "documentation", "repo-scope-context-response.json", "docs/schemas/repo-scope-context-response.json", ), fact(7, "test", "test_git_ingestion.py", "tests/test_git_ingestion.py"), fact( 8, "test", "test_repository_metadata.py", "tests/test_repository_metadata.py", ), fact( 9, "test", "test_repository_scanner.py", "tests/test_repository_scanner.py", ), fact(10, "test", "test_content_indexing.py", "tests/test_content_indexing.py"), fact(11, "test", "test_candidate_graph.py", "tests/test_candidate_graph.py"), fact(12, "test", "test_llm_extraction.py", "tests/test_llm_extraction.py"), fact(13, "test", "test_registry_service.py", "tests/test_registry_service.py"), fact(14, "test", "test_scope_generator.py", "tests/test_scope_generator.py"), fact(15, "test", "test_web_api.py", "tests/test_web_api.py"), fact(16, "test", "test_scope_context_api.py", "tests/test_scope_context_api.py"), fact( 17, "interface", "python route decorator", "src/repo_scoping/web_api/app.py", '@app.post("/repos")', ), ] graph = CandidateGraphGenerator().generate(repository, facts) capability_names = {capability.name for capability in graph[0].capabilities} assert { "Register And Track Repositories", "Scan Repositories Into Observed Facts", "Index Source Content With Provenance", "Generate Reviewable Candidate Characteristics", "Review And Approve Candidate Characteristics", "Search Compare And Export Approved Profiles", "Generate And Maintain SCOPE.md", "Explore Dependency And Impact Graphs", "Provide Scope Context To Downstream Agents", } <= capability_names assert "Route LLM Requests Across Providers" not in capability_names scanning = next( capability for capability in graph[0].capabilities if capability.name == "Scan Repositories Into Observed Facts" ) assert scanning.primary_class == "analysis" assert {"deterministic", "facts", "provenance", "utility-owned"} <= set( scanning.attributes ) assert all(ref.path.startswith(("docs/", "tests/", "src/")) for ref in scanning.source_refs) def test_candidate_generator_excludes_mention_only_providers_from_promoted_capability(): repository = Repository( id=1, name="MixedSignals", url="/tmp/mixed-signals", description=None, branch="main", status="analyzed", ) facts = [ fact(1, "documentation", "README", "README.md"), fact( 2, "llm_provider", "OpenRouter", "src/providers.py", "openrouter", {"source_role": "implementation_source", "utility_relationship": "adapter"}, ), fact( 3, "llm_provider", "Claude", "README.md", "claude", {"source_role": "product_documentation", "utility_relationship": "mention"}, ), fact( 4, "provider_registry", "LLM provider registry", "src/providers.py", metadata={"source_role": "implementation_source", "utility_relationship": "adapter"}, ), ] graph = CandidateGraphGenerator().generate(repository, facts) capability = next( capability for capability in graph[0].capabilities if capability.name == "Route LLM Requests Across Providers" ) assert "OpenRouter" in capability.description assert "Claude" not in capability.description assert {feature.name for feature in capability.features} == { "Use OpenRouter Models", "Maintain LLM Provider Registry", } assert [ref.path for ref in capability.source_refs] == [ "src/providers.py", "src/providers.py", ]