From 31dd6259b5fbc3c225fd3df394e65d3b95b4865f Mon Sep 17 00:00:00 2001
From: tegwick <bernd.worsch@gmail.com>
Date: Wed, 29 Apr 2026 00:40:02 +0200
Subject: [PATCH] Coevolution workplan extension

---
 README.md                                     | 12 ++++
 src/repo_registry/core/service.py             | 20 +++++-
 src/repo_registry/web_api/app.py              |  5 +-
 src/repo_registry/web_api/schemas.py          |  4 ++
 src/repo_registry/web_ui/views.py             | 12 ++++
 tests/test_registry_service.py                | 71 +++++++++++++++++++
 tests/test_web_api.py                         | 16 ++++-
 ...P-0003-automatic-repository-exploration.md | 66 ++++++++++++++++-
 8 files changed, 200 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 673ae82..c2de96a 100644
--- a/README.md
+++ b/README.md
@@ -142,6 +142,18 @@ REPO_REGISTRY_LLM_PROVIDER=gemini
 REPO_REGISTRY_LLM_MODEL=gemini-2.5-flash
 ```
 
+LLM assistance can also be disabled even when a provider is configured:
+
+```bash
+REPO_REGISTRY_LLM_ENABLED=false
+```
+
+Individual analysis requests may opt out with `{"use_llm_assistance": false}`.
+For local demos, `{"trusted_auto_approve": true}` approves the generated
+candidate graph immediately after analysis and records the review decision as
+`trusted_auto_approve_candidate_graph`. The default remains review-first:
+automation is off unless explicitly requested.
+
 ## Agent-Facing Endpoints
 
 The v0.1 API covers the main registration, analysis, review, search, and inspection loop:
diff --git a/src/repo_registry/core/service.py b/src/repo_registry/core/service.py
index e86525e..0d68b68 100644
--- a/src/repo_registry/core/service.py
+++ b/src/repo_registry/core/service.py
@@ -121,6 +121,8 @@ class RegistryService:
         *,
         source_path: str | None = None,
         use_cached_checkout: bool = False,
+        use_llm_assistance: bool = True,
+        trusted_auto_approve: bool = False,
         access_username: str | None = None,
         access_password: str | None = None,
     ) -> ScanSummary:
@@ -187,6 +189,7 @@ class RegistryService:
                 repository,
                 facts,
                 stored_chunks,
+                use_llm_assistance=use_llm_assistance,
             )
         except Exception as exc:
             log_operation(
@@ -221,6 +224,16 @@ class RegistryService:
                 action="llm_extraction_used",
                 notes=f"Generated {len(candidates)} candidate ability draft(s).",
             )
+        if trusted_auto_approve:
+            self.approve_candidate_graph(
+                repository_id,
+                completed_run.id,
+                notes=(
+                    "Trusted auto-populate mode approved candidate graph "
+                    f"after {candidate_source} candidate generation."
+                ),
+                action="trusted_auto_approve_candidate_graph",
+            )
         log_operation(
             "analysis_completed",
             repository_id=repository_id,
@@ -241,8 +254,10 @@ class RegistryService:
         repository: Repository,
         facts: list[ObservedFact],
         chunks: list[ContentChunk],
+        *,
+        use_llm_assistance: bool = True,
     ):
-        if self.llm_extractor is not None:
+        if use_llm_assistance and self.llm_extractor is not None:
             extracted = self.llm_extractor.extract(repository, chunks)
             if extracted:
                 return self.llm_mapper.map(extracted, facts, chunks), "llm"
@@ -290,6 +305,7 @@ class RegistryService:
         analysis_run_id: int,
         *,
         notes: str = "",
+        action: str = "approve_candidate_graph",
     ) -> RepositoryAbilityMap:
         graph = self.store.get_candidate_graph(repository_id, analysis_run_id)
         pending_abilities = [
@@ -347,7 +363,7 @@ class RegistryService:
             self.store.create_review_decision(
                 repository_id,
                 analysis_run_id,
-                action="approve_candidate_graph",
+                action=action,
                 notes=notes,
             )
             self.store.update_repository_status(repository_id, "indexed")
diff --git a/src/repo_registry/web_api/app.py b/src/repo_registry/web_api/app.py
index 448b14c..413b744 100644
--- a/src/repo_registry/web_api/app.py
+++ b/src/repo_registry/web_api/app.py
@@ -61,6 +61,7 @@ class Settings(BaseSettings):
 
     database_path: str = Field(default="var/repo-registry.sqlite3")
     checkout_root: str = Field(default="var/checkouts")
+    llm_enabled: bool = Field(default=True)
     llm_provider: str | None = Field(default=None)
     llm_model: str | None = Field(default=None)
     embedding_provider: str | None = Field(default=None)
@@ -80,7 +81,7 @@ def get_service(settings: Settings = Depends(get_settings)) -> RegistryService:
     store = RegistryStore(database_path)
     store.initialize()
     llm_extractor = None
-    if settings.llm_provider:
+    if settings.llm_enabled and settings.llm_provider:
         adapter = create_llm_connect_adapter(
             settings.llm_provider,
             model=settings.llm_model,
@@ -246,6 +247,8 @@ def create_analysis_run(
             repository_id,
             source_path=payload.source_path,
             use_cached_checkout=payload.use_cached_checkout,
+            use_llm_assistance=payload.use_llm_assistance,
+            trusted_auto_approve=payload.trusted_auto_approve,
             access_username=payload.access_username,
             access_password=payload.access_password,
         )
diff --git a/src/repo_registry/web_api/schemas.py b/src/repo_registry/web_api/schemas.py
index 4800ba3..340d42c 100644
--- a/src/repo_registry/web_api/schemas.py
+++ b/src/repo_registry/web_api/schemas.py
@@ -203,6 +203,8 @@ class EvidenceUpdate(BaseModel):
 class AnalysisRunCreate(BaseModel):
     source_path: str | None = None
     use_cached_checkout: bool = False
+    use_llm_assistance: bool = True
+    trusted_auto_approve: bool = False
     access_username: str | None = None
     access_password: str | None = Field(default=None, repr=False)
 
@@ -212,6 +214,8 @@ class AnalysisRunCreate(BaseModel):
                 {},
                 {"source_path": "/path/to/local/repository"},
                 {"use_cached_checkout": True},
+                {"use_llm_assistance": False},
+                {"trusted_auto_approve": True},
                 {
                     "access_username": "git-user",
                     "access_password": "access-token",
diff --git a/src/repo_registry/web_ui/views.py b/src/repo_registry/web_ui/views.py
index c1bc4fe..225d491 100644
--- a/src/repo_registry/web_ui/views.py
+++ b/src/repo_registry/web_ui/views.py
@@ -214,6 +214,8 @@ def render_repository_index(
           <label>Username <input name="access_username" autocomplete="username" placeholder="Optional for private HTTP(S) repos"></label>
           <label>Password or access token <input name="access_password" type="password" autocomplete="current-password" placeholder="Used for this Git operation only"></label>
           <label class="checkbox"><input type="checkbox" name="explore_after_registration" value="1" checked> Explore after registration</label>
+          <label class="checkbox"><input type="checkbox" name="use_llm_assistance" value="1" checked> Use LLM assistance if configured</label>
+          <label class="checkbox"><input type="checkbox" name="trusted_auto_approve" value="1"> Trusted auto-populate after analysis</label>
           <div class="actions">
             <button type="submit">Register</button>
             <span data-pending>Registering repository...</span>
@@ -426,6 +428,8 @@ def create_repository_from_form(
     access_username: str = Form(""),
     access_password: str = Form(""),
     explore_after_registration: str | None = Form(None),
+    use_llm_assistance: str | None = Form(None),
+    trusted_auto_approve: str | None = Form(None),
     service: RegistryService = Depends(get_service),
 ):
     try:
@@ -444,6 +448,8 @@ def create_repository_from_form(
     if explore_after_registration:
         summary = service.analyze_repository(
             repository.id,
+            use_llm_assistance=bool(use_llm_assistance),
+            trusted_auto_approve=bool(trusted_auto_approve),
             access_username=access_username or None,
             access_password=access_password or None,
         )
@@ -508,6 +514,8 @@ def repository_detail(
         <form class="stack" method="post" action="/ui/repos/{repository_id}/analysis-runs">
           <label>Override source path <input name="source_path" placeholder="Optional local path"></label>
           <label class="checkbox"><input type="checkbox" name="use_cached_checkout" value="1"> Analyze cached checkout without fetching upstream</label>
+          <label class="checkbox"><input type="checkbox" name="use_llm_assistance" value="1" checked> Use LLM assistance if configured</label>
+          <label class="checkbox"><input type="checkbox" name="trusted_auto_approve" value="1"> Trusted auto-populate after analysis</label>
           <label>Username <input name="access_username" autocomplete="username" placeholder="Optional for private HTTP(S) repos"></label>
           <label>Password or access token <input name="access_password" type="password" autocomplete="current-password" placeholder="Used for this Git operation only"></label>
           <div class="actions">
@@ -837,6 +845,8 @@ def create_analysis_run_from_form(
     repository_id: int,
     source_path: str = Form(""),
     use_cached_checkout: str | None = Form(None),
+    use_llm_assistance: str | None = Form(None),
+    trusted_auto_approve: str | None = Form(None),
     access_username: str = Form(""),
     access_password: str = Form(""),
     service: RegistryService = Depends(get_service),
@@ -845,6 +855,8 @@ def create_analysis_run_from_form(
         repository_id,
         source_path=source_path or None,
         use_cached_checkout=bool(use_cached_checkout),
+        use_llm_assistance=bool(use_llm_assistance),
+        trusted_auto_approve=bool(trusted_auto_approve),
         access_username=access_username or None,
         access_password=access_password or None,
     )
diff --git a/tests/test_registry_service.py b/tests/test_registry_service.py
index bee4fe7..5e8ee26 100644
--- a/tests/test_registry_service.py
+++ b/tests/test_registry_service.py
@@ -604,6 +604,43 @@ def test_analyze_repository_can_use_optional_llm_extractor(tmp_path):
     assert "1 candidate ability" in decisions[0].notes
 
 
+def test_analyze_repository_can_disable_optional_llm_extractor(tmp_path):
+    source = tmp_path / "repo"
+    source.mkdir()
+    (source / "README.md").write_text(
+        "# Email Router\nRoutes incoming customer email.\n",
+        encoding="utf-8",
+    )
+    store = RegistryStore(tmp_path / "registry.sqlite3")
+    store.initialize()
+    extractor = FakeLLMExtractor(
+        [
+            ExtractedAbility(
+                name="Business Email Routing",
+                description="Route incoming messages.",
+                source_paths=["README.md"],
+            )
+        ]
+    )
+    service = RegistryService(
+        store,
+        ingestion=GitIngestionService(tmp_path / "checkouts"),
+        llm_extractor=extractor,
+    )
+    repository = service.register_repository(name="Email Router", url=str(source))
+
+    summary = service.analyze_repository(
+        repository.id,
+        use_llm_assistance=False,
+    )
+    graph = service.candidate_graph(repository.id, summary.analysis_run.id)
+    decisions = service.list_review_decisions(repository.id, summary.analysis_run.id)
+
+    assert extractor.calls == []
+    assert graph.abilities[0].name == "Route Incoming Customer Email"
+    assert all(decision.action != "llm_extraction_used" for decision in decisions)
+
+
 def test_analyze_repository_falls_back_when_optional_llm_extractor_returns_no_candidates(tmp_path):
     source = tmp_path / "repo"
     source.mkdir()
@@ -623,6 +660,40 @@ def test_analyze_repository_falls_back_when_optional_llm_extractor_returns_no_ca
     assert graph.abilities[0].name == "Support Fallback"
 
 
+def test_analyze_repository_can_trusted_auto_approve_candidates(tmp_path):
+    source = tmp_path / "repo"
+    source.mkdir()
+    (source / "README.md").write_text(
+        "# Auto Approved\nReports health over HTTP.\n",
+        encoding="utf-8",
+    )
+    (source / "app.py").write_text(
+        "from fastapi import FastAPI\n"
+        "app = FastAPI()\n"
+        '@app.get("/health")\n'
+        "def health():\n"
+        "    return {}\n",
+        encoding="utf-8",
+    )
+    service = make_service(tmp_path)
+    repository = service.register_repository(name="Auto Approved", url=str(source))
+
+    summary = service.analyze_repository(
+        repository.id,
+        trusted_auto_approve=True,
+        use_llm_assistance=False,
+    )
+    ability_map = service.ability_map(repository.id)
+    graph = service.candidate_graph(repository.id, summary.analysis_run.id)
+    decisions = service.list_review_decisions(repository.id, summary.analysis_run.id)
+
+    assert service.get_repository(repository.id).status == "indexed"
+    assert graph.abilities[0].status == "approved"
+    assert ability_map.abilities[0].name == "Report Health Over HTTP"
+    assert decisions[0].action == "trusted_auto_approve_candidate_graph"
+    assert "deterministic candidate generation" in decisions[0].notes
+
+
 def test_analyze_repository_records_llm_failure_and_falls_back(tmp_path):
     source = tmp_path / "repo"
     source.mkdir()
diff --git a/tests/test_web_api.py b/tests/test_web_api.py
index e286f0e..c5d733e 100644
--- a/tests/test_web_api.py
+++ b/tests/test_web_api.py
@@ -1087,6 +1087,8 @@ def test_ui_register_analyze_and_approve_loop(tmp_path):
         assert "Registering repository..." in index_response.text
         assert "Password or access token" in index_response.text
         assert "Explore after registration" in index_response.text
+        assert "Use LLM assistance if configured" in index_response.text
+        assert "Trusted auto-populate after analysis" in index_response.text
 
         create_response = client.post(
             "/ui/repos",
@@ -1096,6 +1098,7 @@ def test_ui_register_analyze_and_approve_loop(tmp_path):
                 "access_username": "",
                 "access_password": "",
                 "explore_after_registration": "",
+                "use_llm_assistance": "1",
             },
             follow_redirects=False,
         )
@@ -1108,6 +1111,8 @@ def test_ui_register_analyze_and_approve_loop(tmp_path):
         assert "Run Analysis" in detail_response.text
         assert "Running analysis..." in detail_response.text
         assert "Analyze cached checkout without fetching upstream" in detail_response.text
+        assert "Use LLM assistance if configured" in detail_response.text
+        assert "Trusted auto-populate after analysis" in detail_response.text
         assert "Repository Metadata" in detail_response.text
 
         edit_repository_response = client.post(
@@ -1128,6 +1133,7 @@ def test_ui_register_analyze_and_approve_loop(tmp_path):
             f"{repository_path}/analysis-runs",
             data={
                 "source_path": "",
+                "use_llm_assistance": "1",
                 "access_username": "",
                 "access_password": "",
             },
@@ -1266,7 +1272,7 @@ def test_ui_register_analyze_and_approve_loop(tmp_path):
         )
         second_run_response = client.post(
             f"{repository_path}/analysis-runs",
-            data={"source_path": ""},
+            data={"source_path": "", "use_llm_assistance": "1"},
             follow_redirects=False,
         )
         assert second_run_response.status_code == 303
@@ -1344,6 +1350,8 @@ def test_ui_register_and_explore_lands_on_analysis_result(tmp_path):
                 "access_username": "",
                 "access_password": "",
                 "explore_after_registration": "1",
+                "use_llm_assistance": "",
+                "trusted_auto_approve": "1",
             },
             follow_redirects=False,
         )
@@ -1354,7 +1362,13 @@ def test_ui_register_and_explore_lands_on_analysis_result(tmp_path):
         result = client.get(response.headers["location"])
         assert result.status_code == 200
         assert "Candidate Graph" in result.text
+        assert "approved" in result.text
         assert "Observed Facts" in result.text
+        assert "trusted_auto_approve_candidate_graph" in result.text
+
+        repository_detail = client.get("/ui/repos/1")
+        assert repository_detail.status_code == 200
+        assert "Use Approved Registry" in repository_detail.text
     finally:
         app.dependency_overrides.clear()
 
diff --git a/workplans/RREG-WP-0003-automatic-repository-exploration.md b/workplans/RREG-WP-0003-automatic-repository-exploration.md
index 5f514be..f80d946 100644
--- a/workplans/RREG-WP-0003-automatic-repository-exploration.md
+++ b/workplans/RREG-WP-0003-automatic-repository-exploration.md
@@ -8,7 +8,7 @@ status: active
 owner: codex
 topic_slug: foerster-capabilities
 created: "2026-04-26"
-updated: "2026-04-28"
+updated: "2026-04-29"
 state_hub_workstream_id: "c121d462-f2e4-45d3-9d2d-9c04a3556953"
 ---
 
@@ -102,7 +102,7 @@ export.
 
 ```task
 id: RREG-WP-0003-T04
-status: todo
+status: done
 priority: medium
 state_hub_task_id: "076385fe-4dbf-4aca-b89f-c7372d9eebd9"
 ```
@@ -131,3 +131,65 @@ that produce only weak candidates.
 
 Acceptance: trying the product on repo-registry itself feels understandable and
 useful even when a scan finds gaps or weak evidence.
+
+## P1: Expectation Gap Feedback Loop
+
+```task
+id: RREG-WP-0003-T07
+status: todo
+priority: medium
+```
+
+Capture the gap between what a curator expected to see and what deterministic
+analysis actually produced. Treat these gaps as first-class scanner optimization
+inputs: a user should be able to record missing expected abilities, capabilities,
+features, facts, or classifications for an analyzed repository. The system should
+preserve the source of the expectation (`human`, `llm-assisted`, or `comparison`)
+and link it to the analysis run that missed it.
+
+Acceptance: after inspecting a repository such as `llm-connect`, a curator can
+record that expected concepts like `OpenRouter provider support`, `Claude model
+usage`, or `provider fallback policy` were missing. The gap is visible from the
+repository/review UI and can be used to create deterministic scanner regression
+fixtures.
+
+## P1: Provider-Aware Deterministic Scanning
+
+```task
+id: RREG-WP-0003-T08
+status: todo
+priority: medium
+```
+
+Extend deterministic scanning and content indexing to identify provider and
+integration concepts that generic language/framework/file facts miss. Initial
+targets are LLM infrastructure repositories: OpenRouter, Anthropic/Claude,
+OpenAI, Gemini, model-provider registries, credential environment variables,
+adapter classes, routing rules, and fallback policies. These should appear as
+source-linked facts and map into useful candidate capabilities/features without
+requiring LLM assistance.
+
+Acceptance: analyzing `llm-connect` with LLM assistance disabled can surface
+source-linked facts and candidate graph entries for OpenRouter support, Claude or
+Anthropic support where present, provider configuration/credentials, and any
+explicit model fallback behavior found in code, docs, or config.
+
+## P1: Scanner Coevolution Regression Harness
+
+```task
+id: RREG-WP-0003-T09
+status: todo
+priority: medium
+```
+
+Create a repeatable improvement loop where reviewed expectation gaps become
+fixtures and tests. For each trial repository, store a small expectation profile
+that lists important concepts the deterministic scanner should eventually detect.
+Compare deterministic outputs against optional LLM-assisted or human-curated
+expectations, then promote confirmed misses into scanner/candidate-generator
+regression tests.
+
+Acceptance: the repository has at least one expectation fixture for an LLM
+infrastructure repo and a test that fails if deterministic analysis stops
+surfacing expected provider concepts. The workflow remains LLM-optional: LLMs may
+suggest expectations, but deterministic tests encode the accepted learning.