Coevolution extension

2026-04-29 01:19:59 +02:00
parent 88afdc09fd
commit 991c34ce52
17 changed files with 764 additions and 4 deletions
--- a/src/repo_registry/candidate_graph/generator.py
+++ b/src/repo_registry/candidate_graph/generator.py
@@ -64,6 +64,10 @@ class CandidateGraphGenerator:
        manifests = self._facts(facts, "manifest")
        frameworks = self._facts(facts, "framework")
        languages = self._facts(facts, "language")
+        llm_providers = self._facts(facts, "llm_provider")
+        credential_configs = self._facts(facts, "credential_config")
+        provider_registries = self._facts(facts, "provider_registry")
+        fallback_policies = self._facts(facts, "fallback_policy")

        ability_sources = docs or manifests or languages
        ability = CandidateAbilityDraft(
@@ -86,6 +90,18 @@ class CandidateGraphGenerator:
            capabilities.append(
                self._interface_capability(interfaces, tests, examples, docs, chunks)
            )
+        if llm_providers or provider_registries or fallback_policies:
+            capabilities.append(
+                self._llm_provider_capability(
+                    llm_providers,
+                    credential_configs,
+                    provider_registries,
+                    fallback_policies,
+                    tests,
+                    examples,
+                    docs,
+                )
+            )
        if manifests or frameworks or languages:
            capabilities.append(
                CandidateCapabilityDraft(
@@ -142,6 +158,84 @@ class CandidateGraphGenerator:
            evidence=self._evidence(tests, examples, docs),
        )

+    def _llm_provider_capability(
+        self,
+        providers: list[ObservedFact],
+        credentials: list[ObservedFact],
+        registries: list[ObservedFact],
+        fallback_policies: list[ObservedFact],
+        tests: list[ObservedFact],
+        examples: list[ObservedFact],
+        docs: list[ObservedFact],
+    ) -> CandidateCapabilityDraft:
+        provider_names = sorted({fact.name for fact in providers})
+        provider_summary = ", ".join(provider_names) if provider_names else "LLM providers"
+        features = [
+            CandidateFeatureDraft(
+                name=f"Use {provider} Models",
+                type="integration",
+                location=self._grouped_location(
+                    [fact for fact in providers if fact.name == provider]
+                ),
+                confidence=0.75,
+                source_refs=self._source_refs(
+                    [fact for fact in providers if fact.name == provider]
+                ),
+            )
+            for provider in provider_names
+        ]
+        if credentials:
+            features.append(
+                CandidateFeatureDraft(
+                    name="Configure LLM Provider Credentials",
+                    type="configuration",
+                    location=self._grouped_location(credentials),
+                    confidence=0.7,
+                    source_refs=self._source_refs(credentials),
+                )
+            )
+        if registries:
+            features.append(
+                CandidateFeatureDraft(
+                    name="Maintain LLM Provider Registry",
+                    type="backend",
+                    location=self._grouped_location(registries),
+                    confidence=0.65,
+                    source_refs=self._source_refs(registries),
+                )
+            )
+        if fallback_policies:
+            features.append(
+                CandidateFeatureDraft(
+                    name="Apply LLM Provider Fallback Policy",
+                    type="backend",
+                    location=self._grouped_location(fallback_policies),
+                    confidence=0.6,
+                    source_refs=self._source_refs(fallback_policies),
+                )
+            )
+        return CandidateCapabilityDraft(
+            name="Route LLM Requests Across Providers",
+            description=(
+                "Expose or configure model-provider integrations detected from "
+                f"source-linked provider hints: {provider_summary}."
+            ),
+            inputs=["LLM request", "provider configuration"],
+            outputs=["provider-specific model response"],
+            confidence=self._llm_provider_confidence(
+                providers=providers,
+                credentials=credentials,
+                registries=registries,
+                fallback_policies=fallback_policies,
+                docs=docs,
+            ),
+            source_refs=self._source_refs(
+                providers + credentials + registries + fallback_policies
+            ),
+            features=features,
+            evidence=self._evidence(tests, examples, docs),
+        )
+
    def _interface_features(
        self,
        interfaces: list[ObservedFact],
@@ -378,6 +472,27 @@ class CandidateGraphGenerator:
            ],
        )

+    def _llm_provider_confidence(
+        self,
+        *,
+        providers: list[ObservedFact],
+        credentials: list[ObservedFact],
+        registries: list[ObservedFact],
+        fallback_policies: list[ObservedFact],
+        docs: list[ObservedFact],
+    ) -> float:
+        return self._confidence(
+            0.35,
+            [
+                (0.20, bool(providers)),
+                (0.10, len({fact.name for fact in providers}) > 1),
+                (0.10, bool(credentials)),
+                (0.10, bool(registries)),
+                (0.10, bool(fallback_policies)),
+                (0.05, bool(docs)),
+            ],
+        )
+
    def _confidence(
        self,
        base: float,
--- a/src/repo_registry/content_indexing/extractor.py
+++ b/src/repo_registry/content_indexing/extractor.py
@@ -6,7 +6,18 @@ from pathlib import Path
 from repo_registry.core.models import ObservedFact


-INDEXED_FACT_KINDS = {"documentation", "example", "test", "manifest", "interface"}
+INDEXED_FACT_KINDS = {
+    "documentation",
+    "example",
+    "test",
+    "manifest",
+    "interface",
+    "config",
+    "llm_provider",
+    "credential_config",
+    "provider_registry",
+    "fallback_policy",
+}
 MAX_CHUNK_LINES = 40
 MAX_FILE_BYTES = 200_000

--- a/src/repo_registry/core/models.py
+++ b/src/repo_registry/core/models.py
@@ -54,6 +54,19 @@ class ReviewDecision:
    created_at: str


+@dataclass(frozen=True)
+class ExpectationGap:
+    id: int
+    repository_id: int
+    analysis_run_id: int | None
+    expected_type: str
+    expected_name: str
+    source: str
+    notes: str
+    status: str
+    created_at: str
+
+
@dataclass(frozen=True)
 class AnalysisRunDiffItem:
    change_type: str
--- a/src/repo_registry/core/service.py
+++ b/src/repo_registry/core/service.py
@@ -16,6 +16,7 @@ from repo_registry.core.models import (
    CandidateFeature,
    CandidateGraph,
    ContentChunk,
+    ExpectationGap,
    ObservedFact,
    Repository,
    RepositoryAbilityMap,
@@ -282,6 +283,39 @@ class RegistryService:
    ) -> list[ReviewDecision]:
        return self.store.list_review_decisions(repository_id, analysis_run_id)

+    def record_expectation_gap(
+        self,
+        repository_id: int,
+        *,
+        analysis_run_id: int | None = None,
+        expected_type: str,
+        expected_name: str,
+        source: str,
+        notes: str = "",
+    ) -> ExpectationGap:
+        gap = self.store.create_expectation_gap(
+            repository_id,
+            analysis_run_id,
+            expected_type=expected_type,
+            expected_name=expected_name,
+            source=source,
+            notes=notes,
+        )
+        self.store.create_review_decision(
+            repository_id,
+            analysis_run_id,
+            action="record_expectation_gap",
+            notes=f"{source} expected {expected_type}: {expected_name}",
+        )
+        return gap
+
+    def list_expectation_gaps(
+        self,
+        repository_id: int,
+        analysis_run_id: int | None = None,
+    ) -> list[ExpectationGap]:
+        return self.store.list_expectation_gaps(repository_id, analysis_run_id)
+
    def list_observed_facts(
        self,
        repository_id: int,
--- a/src/repo_registry/repo_scanning/scanner.py
+++ b/src/repo_registry/repo_scanning/scanner.py
@@ -69,6 +69,23 @@ MANIFEST_FRAMEWORK_HINTS = {
    },
 }

+LLM_PROVIDER_HINTS = {
+    "openrouter": "OpenRouter",
+    "anthropic": "Anthropic",
+    "claude": "Claude",
+    "openai": "OpenAI",
+    "gemini": "Gemini",
+    "google-generativeai": "Gemini",
+}
+
+LLM_CREDENTIAL_HINTS = {
+    "OPENROUTER_API_KEY": "OpenRouter API key",
+    "ANTHROPIC_API_KEY": "Anthropic API key",
+    "OPENAI_API_KEY": "OpenAI API key",
+    "GEMINI_API_KEY": "Gemini API key",
+    "GOOGLE_API_KEY": "Google API key",
+}
+

@dataclass(frozen=True)
 class FactCandidate:
@@ -102,6 +119,7 @@ class DeterministicScanner:
        facts.extend(self._classified_file_facts(files, root))
        facts.extend(self._framework_facts(files, root))
        facts.extend(self._interface_facts(files, root))
+        facts.extend(self._llm_provider_facts(files, root))

        return ScanResult(
            source_path=str(root),
@@ -223,6 +241,96 @@ class DeterministicScanner:
                facts.append(FactCandidate("interface", "possible API surface", relative))
        return facts

+    def _llm_provider_facts(self, files: list[Path], root: Path) -> list[FactCandidate]:
+        facts: list[FactCandidate] = []
+        seen: set[tuple[str, str, str]] = set()
+        for path in files:
+            if path.suffix.lower() not in {
+                ".py",
+                ".ts",
+                ".js",
+                ".json",
+                ".toml",
+                ".yaml",
+                ".yml",
+                ".md",
+                ".txt",
+                ".env",
+            } and not path.name.lower().startswith(".env"):
+                continue
+            try:
+                text = path.read_text(encoding="utf-8", errors="ignore")
+            except OSError:
+                continue
+            lower_text = text.lower()
+            relative = path.relative_to(root).as_posix()
+            for needle, provider in LLM_PROVIDER_HINTS.items():
+                if needle not in lower_text:
+                    continue
+                self._append_once(
+                    facts,
+                    seen,
+                    FactCandidate(
+                        kind="llm_provider",
+                        name=provider,
+                        path=relative,
+                        value=needle,
+                        metadata={"source": "provider_hint"},
+                    ),
+                )
+            for env_name, label in LLM_CREDENTIAL_HINTS.items():
+                if env_name.lower() not in lower_text:
+                    continue
+                self._append_once(
+                    facts,
+                    seen,
+                    FactCandidate(
+                        kind="credential_config",
+                        name=label,
+                        path=relative,
+                        value=env_name,
+                        metadata={"source": "environment_variable"},
+                    ),
+                )
+            if any(term in lower_text for term in ("provider_registry", "providers =", "adapter")):
+                if any(needle in lower_text for needle in LLM_PROVIDER_HINTS):
+                    self._append_once(
+                        facts,
+                        seen,
+                        FactCandidate(
+                            kind="provider_registry",
+                            name="LLM provider registry",
+                            path=relative,
+                            metadata={"source": "provider_registry_hint"},
+                        ),
+                    )
+            if "fallback" in lower_text and any(
+                needle in lower_text for needle in LLM_PROVIDER_HINTS
+            ):
+                self._append_once(
+                    facts,
+                    seen,
+                    FactCandidate(
+                        kind="fallback_policy",
+                        name="LLM provider fallback policy",
+                        path=relative,
+                        metadata={"source": "fallback_hint"},
+                    ),
+                )
+        return facts
+
+    def _append_once(
+        self,
+        facts: list[FactCandidate],
+        seen: set[tuple[str, str, str]],
+        fact: FactCandidate,
+    ) -> None:
+        key = (fact.kind, fact.name, fact.path)
+        if key in seen:
+            return
+        seen.add(key)
+        facts.append(fact)
+
    def _python_interface_facts(self, path: Path, relative: str) -> list[FactCandidate]:
        facts: list[FactCandidate] = []
        try:
--- a/src/repo_registry/storage/sqlite.py
+++ b/src/repo_registry/storage/sqlite.py
@@ -17,6 +17,7 @@ from repo_registry.core.models import (
    CapabilitySummary,
    ContentChunk,
    Evidence,
+    ExpectationGap,
    Feature,
    ObservedFact,
    Repository,
@@ -47,6 +48,7 @@ class RegistryStore:
            connection.executescript(migration_path.read_text(encoding="utf-8"))
            self._ensure_content_chunks_table(connection)
            self._ensure_approved_source_ref_columns(connection)
+            self._ensure_expectation_gaps_table(connection)

    def connect(self) -> sqlite3.Connection:
        connection = sqlite3.connect(self.database_path)
@@ -92,6 +94,29 @@ class RegistryStore:
            "CREATE INDEX IF NOT EXISTS idx_content_chunks_run ON content_chunks(analysis_run_id)"
        )

+    def _ensure_expectation_gaps_table(self, connection: sqlite3.Connection) -> None:
+        connection.execute(
+            """
+            CREATE TABLE IF NOT EXISTS expectation_gaps (
+              id INTEGER PRIMARY KEY AUTOINCREMENT,
+              repository_id INTEGER NOT NULL REFERENCES repositories(id) ON DELETE CASCADE,
+              analysis_run_id INTEGER REFERENCES analysis_runs(id) ON DELETE SET NULL,
+              expected_type TEXT NOT NULL,
+              expected_name TEXT NOT NULL,
+              source TEXT NOT NULL,
+              notes TEXT NOT NULL DEFAULT '',
+              status TEXT NOT NULL DEFAULT 'open',
+              created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
+            )
+            """
+        )
+        connection.execute(
+            "CREATE INDEX IF NOT EXISTS idx_expectation_gaps_repository ON expectation_gaps(repository_id)"
+        )
+        connection.execute(
+            "CREATE INDEX IF NOT EXISTS idx_expectation_gaps_run ON expectation_gaps(analysis_run_id)"
+        )
+
    def create_repository(
        self,
        *,
@@ -1050,6 +1075,93 @@ class RegistryStore:
            for row in rows
        ]

+    def create_expectation_gap(
+        self,
+        repository_id: int,
+        analysis_run_id: int | None,
+        *,
+        expected_type: str,
+        expected_name: str,
+        source: str,
+        notes: str = "",
+    ) -> ExpectationGap:
+        self.get_repository(repository_id)
+        if analysis_run_id is not None:
+            self.get_analysis_run(repository_id, analysis_run_id)
+        with self.connect() as connection:
+            cursor = connection.execute(
+                """
+                INSERT INTO expectation_gaps
+                  (repository_id, analysis_run_id, expected_type, expected_name,
+                   source, notes)
+                VALUES (?, ?, ?, ?, ?, ?)
+                """,
+                (
+                    repository_id,
+                    analysis_run_id,
+                    expected_type,
+                    expected_name,
+                    source,
+                    notes,
+                ),
+            )
+            gap_id = int(cursor.lastrowid)
+        log_operation(
+            "expectation_gap_recorded",
+            repository_id=repository_id,
+            analysis_run_id=analysis_run_id,
+            expectation_gap_id=gap_id,
+            expected_type=expected_type,
+        )
+        return self.get_expectation_gap(repository_id, gap_id)
+
+    def get_expectation_gap(
+        self,
+        repository_id: int,
+        expectation_gap_id: int,
+    ) -> ExpectationGap:
+        with self.connect() as connection:
+            row = connection.execute(
+                """
+                SELECT id, repository_id, analysis_run_id, expected_type,
+                       expected_name, source, notes, status, created_at
+                FROM expectation_gaps
+                WHERE repository_id = ? AND id = ?
+                """,
+                (repository_id, expectation_gap_id),
+            ).fetchone()
+        if row is None:
+            raise NotFoundError(
+                f"expectation gap {expectation_gap_id} was not found for repository "
+                f"{repository_id}"
+            )
+        return self._expectation_gap_from_row(row)
+
+    def list_expectation_gaps(
+        self,
+        repository_id: int,
+        analysis_run_id: int | None = None,
+    ) -> list[ExpectationGap]:
+        self.get_repository(repository_id)
+        params: tuple[int, ...]
+        where = "WHERE repository_id = ?"
+        params = (repository_id,)
+        if analysis_run_id is not None:
+            where += " AND analysis_run_id = ?"
+            params = (repository_id, analysis_run_id)
+        with self.connect() as connection:
+            rows = connection.execute(
+                f"""
+                SELECT id, repository_id, analysis_run_id, expected_type,
+                       expected_name, source, notes, status, created_at
+                FROM expectation_gaps
+                {where}
+                ORDER BY created_at DESC, id DESC
+                """,
+                params,
+            ).fetchall()
+        return [self._expectation_gap_from_row(row) for row in rows]
+
    def fail_analysis_run(
        self,
        repository_id: int,
@@ -2215,3 +2327,17 @@ class RegistryStore:
            end_line=row["end_line"],
            text=row["text"],
        )
+
+    @staticmethod
+    def _expectation_gap_from_row(row: sqlite3.Row) -> ExpectationGap:
+        return ExpectationGap(
+            id=row["id"],
+            repository_id=row["repository_id"],
+            analysis_run_id=row["analysis_run_id"],
+            expected_type=row["expected_type"],
+            expected_name=row["expected_name"],
+            source=row["source"],
+            notes=row["notes"],
+            status=row["status"],
+            created_at=row["created_at"],
+        )
--- a/src/repo_registry/web_api/app.py
+++ b/src/repo_registry/web_api/app.py
@@ -41,6 +41,8 @@ from repo_registry.web_api.schemas import (
    EvidenceCreate,
    EvidenceUpdate,
    ErrorResponse,
+    ExpectationGapCreate,
+    ExpectationGapResponse,
    FeatureCreate,
    FeatureUpdate,
    IdResponse,
@@ -288,6 +290,51 @@ def get_analysis_run(
        raise HTTPException(status_code=404, detail=str(exc)) from exc


+@app.get(
+    "/repos/{repository_id}/expectation-gaps",
+    tags=["review"],
+    response_model=list[ExpectationGapResponse],
+)
+def list_expectation_gaps(
+    repository_id: int,
+    analysis_run_id: int | None = Query(default=None),
+    service: RegistryService = Depends(get_service),
+) -> list[dict[str, object]]:
+    try:
+        return [
+            asdict(gap)
+            for gap in service.list_expectation_gaps(repository_id, analysis_run_id)
+        ]
+    except NotFoundError as exc:
+        raise HTTPException(status_code=404, detail=str(exc)) from exc
+
+
+@app.post(
+    "/repos/{repository_id}/expectation-gaps",
+    status_code=201,
+    tags=["review"],
+    response_model=ExpectationGapResponse,
+)
+def create_expectation_gap(
+    repository_id: int,
+    payload: ExpectationGapCreate,
+    service: RegistryService = Depends(get_service),
+) -> dict[str, object]:
+    try:
+        return asdict(
+            service.record_expectation_gap(
+                repository_id,
+                analysis_run_id=payload.analysis_run_id,
+                expected_type=payload.expected_type,
+                expected_name=payload.expected_name,
+                source=payload.source,
+                notes=payload.notes,
+            )
+        )
+    except NotFoundError as exc:
+        raise HTTPException(status_code=404, detail=str(exc)) from exc
+
+
@app.get(
    "/repos/{repository_id}/analysis-runs/{base_analysis_run_id}/diff/{target_analysis_run_id}",
    tags=["review"],
--- a/src/repo_registry/web_api/schemas.py
+++ b/src/repo_registry/web_api/schemas.py
@@ -225,6 +225,40 @@ class AnalysisRunCreate(BaseModel):
    }


+class ExpectationGapCreate(BaseModel):
+    analysis_run_id: int | None = None
+    expected_type: str
+    expected_name: str
+    source: str = "human"
+    notes: str = ""
+
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "analysis_run_id": 1,
+                    "expected_type": "capability",
+                    "expected_name": "Use OpenRouter Models",
+                    "source": "human",
+                    "notes": "Expected from README/provider config but absent from candidates.",
+                }
+            ]
+        }
+    }
+
+
+class ExpectationGapResponse(BaseModel):
+    id: int
+    repository_id: int
+    analysis_run_id: int | None
+    expected_type: str
+    expected_name: str
+    source: str
+    notes: str
+    status: str
+    created_at: str
+
+
 class CandidateGraphApproval(BaseModel):
    notes: str = ""

--- a/src/repo_registry/web_ui/views.py
+++ b/src/repo_registry/web_ui/views.py
@@ -877,6 +877,7 @@ def analysis_run_detail(
    facts = service.list_observed_facts(repository_id, analysis_run_id)
    chunks = service.list_content_chunks(repository_id, analysis_run_id)
    decisions = service.list_review_decisions(repository_id, analysis_run_id)
+    expectation_gaps = service.list_expectation_gaps(repository_id, analysis_run_id)
    fact_rows = "\n".join(
        f"""
        <tr>
@@ -941,10 +942,47 @@ def analysis_run_detail(
      <h2>Content Chunks</h2>
      {render_content_chunks(chunks)}
    </section>
+    <section class="panel" style="margin-top:18px">
+      <h2>Expectation Gaps</h2>
+      <form class="stack" method="post" action="/ui/repos/{repository_id}/analysis-runs/{analysis_run_id}/expectation-gaps">
+        <div class="grid">
+          <label>Expected type <input name="expected_type" placeholder="capability, feature, fact, classification" required></label>
+          <label>Expected name <input name="expected_name" placeholder="Use OpenRouter Models" required></label>
+          <label>Source <input name="source" value="human" required></label>
+          <label>Notes <input name="notes" placeholder="What made you expect this?"></label>
+        </div>
+        <button type="submit">Record Gap</button>
+      </form>
+      {render_expectation_gaps(expectation_gaps)}
+    </section>
    """
    return page(f"{repository.name} Run {analysis_run_id}", body)


+@router.post("/ui/repos/{repository_id}/analysis-runs/{analysis_run_id}/expectation-gaps")
+def create_expectation_gap_from_form(
+    repository_id: int,
+    analysis_run_id: int,
+    expected_type: str = Form(...),
+    expected_name: str = Form(...),
+    source: str = Form("human"),
+    notes: str = Form(""),
+    service: RegistryService = Depends(get_service),
+) -> RedirectResponse:
+    service.record_expectation_gap(
+        repository_id,
+        analysis_run_id=analysis_run_id,
+        expected_type=expected_type,
+        expected_name=expected_name,
+        source=source,
+        notes=notes,
+    )
+    return RedirectResponse(
+        f"/ui/repos/{repository_id}/analysis-runs/{analysis_run_id}",
+        status_code=303,
+    )
+
+
@router.get("/ui/repos/{repository_id}/elements")
 def repository_element_listing(
    repository_id: int,
@@ -2028,6 +2066,29 @@ def render_review_decisions(decisions: list) -> str:
    """


+def render_expectation_gaps(gaps: list) -> str:
+    if not gaps:
+        return '<p class="muted">No expectation gaps recorded for this run.</p>'
+    rows = "\n".join(
+        f"""
+        <tr>
+          <td><span class="pill">{escape(gap.expected_type)}</span></td>
+          <td>{escape(gap.expected_name)}</td>
+          <td>{escape(gap.source)}</td>
+          <td>{escape(gap.notes)}</td>
+          <td>{escape(gap.status)}</td>
+        </tr>
+        """
+        for gap in gaps
+    )
+    return f"""
+    <table>
+      <thead><tr><th>Type</th><th>Name</th><th>Source</th><th>Notes</th><th>Status</th></tr></thead>
+      <tbody>{rows}</tbody>
+    </table>
+    """
+
+
 def render_content_chunks(chunks: list) -> str:
    if not chunks:
        return '<p class="muted">No content chunks extracted.</p>'
--- a/tests/expectations/llm_connect_provider_expectations.json
+++ b/tests/expectations/llm_connect_provider_expectations.json
@@ -0,0 +1,17 @@
+{
+  "repository": "llm-connect-like",
+  "expected_facts": [
+    {"kind": "llm_provider", "name": "OpenRouter"},
+    {"kind": "llm_provider", "name": "Claude"},
+    {"kind": "credential_config", "name": "OpenRouter API key"},
+    {"kind": "provider_registry", "name": "LLM provider registry"},
+    {"kind": "fallback_policy", "name": "LLM provider fallback policy"}
+  ],
+  "expected_candidates": [
+    "Route LLM Requests Across Providers",
+    "Use OpenRouter Models",
+    "Use Claude Models",
+    "Configure LLM Provider Credentials",
+    "Apply LLM Provider Fallback Policy"
+  ]
+}
--- a/tests/test_candidate_graph.py
+++ b/tests/test_candidate_graph.py
@@ -246,3 +246,35 @@ def test_candidate_generator_groups_many_interface_facts_into_behavioral_feature
    assert feature.type == "API"
    assert feature.location == "src/api.py"
    assert len(feature.source_refs) == 3
+
+
+def test_candidate_generator_maps_llm_provider_facts_to_capability():
+    repository = Repository(
+        id=1,
+        name="LLMConnect",
+        url="/tmp/llm-connect",
+        description=None,
+        branch="main",
+        status="analyzed",
+    )
+    facts = [
+        fact(1, "documentation", "README", "README.md"),
+        fact(2, "llm_provider", "OpenRouter", "providers.py", "openrouter"),
+        fact(3, "llm_provider", "Claude", "providers.py", "claude"),
+        fact(4, "credential_config", "OpenRouter API key", ".env.example", "OPENROUTER_API_KEY"),
+        fact(5, "provider_registry", "LLM provider registry", "providers.py"),
+        fact(6, "fallback_policy", "LLM provider fallback policy", "providers.py"),
+    ]
+
+    graph = CandidateGraphGenerator().generate(repository, facts)
+
+    capability = next(
+        capability
+        for capability in graph[0].capabilities
+        if capability.name == "Route LLM Requests Across Providers"
+    )
+    feature_names = {feature.name for feature in capability.features}
+    assert {"Use OpenRouter Models", "Use Claude Models"} <= feature_names
+    assert "Configure LLM Provider Credentials" in feature_names
+    assert "Maintain LLM Provider Registry" in feature_names
+    assert "Apply LLM Provider Fallback Policy" in feature_names
--- a/tests/test_content_indexing.py
+++ b/tests/test_content_indexing.py
@@ -65,3 +65,20 @@ def test_content_extractor_ignores_unindexed_and_missing_paths(tmp_path):
    )

    assert chunks == []
+
+
+def test_content_extractor_chunks_provider_related_config(tmp_path):
+    repo = tmp_path / "repo"
+    repo.mkdir()
+    (repo / ".env.example").write_text("OPENROUTER_API_KEY=\n", encoding="utf-8")
+
+    chunks = ContentExtractor().extract(
+        repo,
+        [
+            fact(1, "credential_config", "OpenRouter API key", ".env.example"),
+        ],
+    )
+
+    assert len(chunks) == 1
+    assert chunks[0].path == ".env.example"
+    assert "OPENROUTER_API_KEY" in chunks[0].text
--- a/tests/test_repository_scanner.py
+++ b/tests/test_repository_scanner.py
@@ -87,3 +87,32 @@ def test_scanner_javascript_typescript_package_records_package_facts(tmp_path):
    assert ("framework", "Vitest", "package.json") in facts
    assert ("interface", "possible API surface", "src/api/routes.ts") in facts
    assert ("test", "routes.spec.ts", "src/api/routes.spec.ts") in facts
+
+
+def test_scanner_records_llm_provider_and_fallback_facts(tmp_path):
+    repo = tmp_path / "llm-connect-like"
+    repo.mkdir()
+    (repo / "README.md").write_text(
+        "# LLM Connect\nSupports OpenRouter and Claude fallback.\n",
+        encoding="utf-8",
+    )
+    (repo / ".env.example").write_text(
+        "OPENROUTER_API_KEY=\nANTHROPIC_API_KEY=\n",
+        encoding="utf-8",
+    )
+    (repo / "providers.py").write_text(
+        "provider_registry = {'openrouter': OpenRouterAdapter, 'anthropic': ClaudeAdapter}\n"
+        "fallback_provider = 'claude'\n",
+        encoding="utf-8",
+    )
+
+    result = DeterministicScanner().scan(repo)
+
+    facts = {(fact.kind, fact.name, fact.path) for fact in result.facts}
+    assert ("llm_provider", "OpenRouter", "README.md") in facts
+    assert ("llm_provider", "Claude", "README.md") in facts
+    assert ("llm_provider", "Anthropic", ".env.example") in facts
+    assert ("credential_config", "OpenRouter API key", ".env.example") in facts
+    assert ("credential_config", "Anthropic API key", ".env.example") in facts
+    assert ("provider_registry", "LLM provider registry", "providers.py") in facts
+    assert ("fallback_policy", "LLM provider fallback policy", "README.md") in facts
--- a/tests/test_scanner_coevolution.py
+++ b/tests/test_scanner_coevolution.py
@@ -0,0 +1,65 @@
+import json
+from pathlib import Path
+
+from repo_registry.core.service import RegistryService
+from repo_registry.repo_ingestion.git import GitIngestionService
+from repo_registry.storage.sqlite import RegistryStore
+
+
+def test_llm_connect_provider_expectations_are_detected_without_llm(tmp_path):
+    expectation_path = (
+        Path(__file__).parent
+        / "expectations"
+        / "llm_connect_provider_expectations.json"
+    )
+    expectations = json.loads(expectation_path.read_text(encoding="utf-8"))
+    source = tmp_path / expectations["repository"]
+    source.mkdir()
+    (source / "README.md").write_text(
+        "# LLM Connect\nSupports OpenRouter and Claude fallback for hard prompts.\n",
+        encoding="utf-8",
+    )
+    (source / ".env.example").write_text(
+        "OPENROUTER_API_KEY=\nANTHROPIC_API_KEY=\n",
+        encoding="utf-8",
+    )
+    (source / "providers.py").write_text(
+        "provider_registry = {'openrouter': object(), 'claude': object()}\n"
+        "fallback_provider = 'claude'\n",
+        encoding="utf-8",
+    )
+    store = RegistryStore(tmp_path / "registry.sqlite3")
+    store.initialize()
+    service = RegistryService(
+        store,
+        ingestion=GitIngestionService(tmp_path / "checkouts"),
+    )
+    repository = service.register_repository(name="LLM Connect", url=str(source))
+
+    summary = service.analyze_repository(
+        repository.id,
+        use_llm_assistance=False,
+    )
+    facts = service.list_observed_facts(repository.id, summary.analysis_run.id)
+    graph = service.candidate_graph(repository.id, summary.analysis_run.id)
+
+    fact_pairs = {(fact.kind, fact.name) for fact in facts}
+    for expected in expectations["expected_facts"]:
+        assert (expected["kind"], expected["name"]) in fact_pairs
+
+    candidate_names = {
+        graph.abilities[0].name,
+        *[
+            capability.name
+            for ability in graph.abilities
+            for capability in ability.capabilities
+        ],
+        *[
+            feature.name
+            for ability in graph.abilities
+            for capability in ability.capabilities
+            for feature in capability.features
+        ],
+    }
+    for expected in expectations["expected_candidates"]:
+        assert expected in candidate_names
--- a/tests/test_storage_migrations.py
+++ b/tests/test_storage_migrations.py
@@ -35,6 +35,7 @@ def test_initialize_is_idempotent_and_applies_expected_columns(tmp_path):
    assert "source_refs" in feature_columns
    assert "source_refs" in evidence_columns
    assert "content_chunks" in tables
+    assert "expectation_gaps" in tables


 def test_approved_registry_schema_allows_future_nullable_vocabulary_ref(tmp_path):
@@ -132,6 +133,13 @@ def test_delete_repository_cascades_registry_and_review_rows(tmp_path):
        action="manual_test",
        notes="Cascade review decision.",
    )
+    service.store.create_expectation_gap(
+        repository.id,
+        run.id,
+        expected_type="capability",
+        expected_name="Expected Cascade Capability",
+        source="human",
+    )

    service.delete_repository(repository.id)

@@ -143,6 +151,7 @@ def test_delete_repository_cascades_registry_and_review_rows(tmp_path):
            "approved_evidence",
            "analysis_runs",
            "content_chunks",
+            "expectation_gaps",
            "review_decisions",
        ):
            count = connection.execute(f"SELECT COUNT(*) FROM {table}").fetchone()[0]
--- a/tests/test_web_api.py
+++ b/tests/test_web_api.py
@@ -252,6 +252,10 @@ def test_openapi_contract_snapshot_for_stable_agent_paths():
        "/repos/{repository_id}/export": {
            "get": {"tags": ["discovery"], "success_schema": "application/x-yaml"}
        },
+        "/repos/{repository_id}/expectation-gaps": {
+            "get": {"tags": ["review"], "success_schema": "list[ExpectationGapResponse]"},
+            "post": {"tags": ["review"], "success_schema": "ExpectationGapResponse"},
+        },
        "/repos/{repository_id}/features": {
            "post": {"tags": ["registry"], "success_schema": "IdResponse"}
        },
@@ -759,6 +763,24 @@ def test_api_analysis_run_loop(tmp_path):
        assert run_decisions_response.json()[0]["notes"] == (
            "Reject once to exercise review correction."
        )
+        gap_response = client.post(
+            f"/repos/{repository_id}/expectation-gaps",
+            json={
+                "analysis_run_id": run["analysis_run"]["id"],
+                "expected_type": "capability",
+                "expected_name": "Use OpenRouter Models",
+                "source": "human",
+                "notes": "Expected provider capability was missing.",
+            },
+        )
+        assert gap_response.status_code == 201
+        assert gap_response.json()["expected_name"] == "Use OpenRouter Models"
+        gaps_response = client.get(
+            f"/repos/{repository_id}/expectation-gaps",
+            params={"analysis_run_id": run["analysis_run"]["id"]},
+        )
+        assert gaps_response.status_code == 200
+        assert gaps_response.json()[0]["source"] == "human"

        run_response = client.post(f"/repos/{repository_id}/analysis-runs", json={})
        assert run_response.status_code == 201
@@ -1154,6 +1176,23 @@ def test_ui_register_analyze_and_approve_loop(tmp_path):
        assert "README.md:1-2" in run_detail.text
        assert "ID " in run_detail.text
        assert "No review decisions yet." in run_detail.text
+        assert "Expectation Gaps" in run_detail.text
+        assert "Record Gap" in run_detail.text
+
+        gap_response = client.post(
+            f"{run_path}/expectation-gaps",
+            data={
+                "expected_type": "capability",
+                "expected_name": "Use OpenRouter Models",
+                "source": "human",
+                "notes": "Expected from provider docs.",
+            },
+            follow_redirects=False,
+        )
+        assert gap_response.status_code == 303
+        run_detail = client.get(run_path)
+        assert "Use OpenRouter Models" in run_detail.text
+        assert "Expected from provider docs." in run_detail.text

        approve_response = client.post(
            f"{run_path}/candidate-graph/approve",
--- a/workplans/RREG-WP-0003-automatic-repository-exploration.md
+++ b/workplans/RREG-WP-0003-automatic-repository-exploration.md
@@ -136,8 +136,9 @@ useful even when a scan finds gaps or weak evidence.

 ```task
 id: RREG-WP-0003-T07
-status: todo
+status: done
 priority: medium
+state_hub_task_id: "8f49fffe-c7bf-4b59-b3c3-fafe89d75e53"
 ```

 Capture the gap between what a curator expected to see and what deterministic
@@ -157,8 +158,9 @@ fixtures.

 ```task
 id: RREG-WP-0003-T08
-status: todo
+status: done
 priority: medium
+state_hub_task_id: "93fd4bdc-bfdf-4f2b-95ad-1106094c7e23"
 ```

 Extend deterministic scanning and content indexing to identify provider and
@@ -178,8 +180,9 @@ explicit model fallback behavior found in code, docs, or config.

 ```task
 id: RREG-WP-0003-T09
-status: todo
+status: done
 priority: medium
+state_hub_task_id: "fe0a7807-9caa-4425-b66b-c88a2a09ece3"
 ```

 Create a repeatable improvement loop where reviewed expectation gaps become