Coevolution workplan extension

This commit is contained in:
2026-04-29 00:40:02 +02:00
parent c070951c68
commit 31dd6259b5
8 changed files with 200 additions and 6 deletions

View File

@@ -142,6 +142,18 @@ REPO_REGISTRY_LLM_PROVIDER=gemini
REPO_REGISTRY_LLM_MODEL=gemini-2.5-flash
```
LLM assistance can also be disabled even when a provider is configured:
```bash
REPO_REGISTRY_LLM_ENABLED=false
```
Individual analysis requests may opt out with `{"use_llm_assistance": false}`.
For local demos, `{"trusted_auto_approve": true}` approves the generated
candidate graph immediately after analysis and records the review decision as
`trusted_auto_approve_candidate_graph`. The default remains review-first:
automation is off unless explicitly requested.
## Agent-Facing Endpoints
The v0.1 API covers the main registration, analysis, review, search, and inspection loop:

View File

@@ -121,6 +121,8 @@ class RegistryService:
*,
source_path: str | None = None,
use_cached_checkout: bool = False,
use_llm_assistance: bool = True,
trusted_auto_approve: bool = False,
access_username: str | None = None,
access_password: str | None = None,
) -> ScanSummary:
@@ -187,6 +189,7 @@ class RegistryService:
repository,
facts,
stored_chunks,
use_llm_assistance=use_llm_assistance,
)
except Exception as exc:
log_operation(
@@ -221,6 +224,16 @@ class RegistryService:
action="llm_extraction_used",
notes=f"Generated {len(candidates)} candidate ability draft(s).",
)
if trusted_auto_approve:
self.approve_candidate_graph(
repository_id,
completed_run.id,
notes=(
"Trusted auto-populate mode approved candidate graph "
f"after {candidate_source} candidate generation."
),
action="trusted_auto_approve_candidate_graph",
)
log_operation(
"analysis_completed",
repository_id=repository_id,
@@ -241,8 +254,10 @@ class RegistryService:
repository: Repository,
facts: list[ObservedFact],
chunks: list[ContentChunk],
*,
use_llm_assistance: bool = True,
):
if self.llm_extractor is not None:
if use_llm_assistance and self.llm_extractor is not None:
extracted = self.llm_extractor.extract(repository, chunks)
if extracted:
return self.llm_mapper.map(extracted, facts, chunks), "llm"
@@ -290,6 +305,7 @@ class RegistryService:
analysis_run_id: int,
*,
notes: str = "",
action: str = "approve_candidate_graph",
) -> RepositoryAbilityMap:
graph = self.store.get_candidate_graph(repository_id, analysis_run_id)
pending_abilities = [
@@ -347,7 +363,7 @@ class RegistryService:
self.store.create_review_decision(
repository_id,
analysis_run_id,
action="approve_candidate_graph",
action=action,
notes=notes,
)
self.store.update_repository_status(repository_id, "indexed")

View File

@@ -61,6 +61,7 @@ class Settings(BaseSettings):
database_path: str = Field(default="var/repo-registry.sqlite3")
checkout_root: str = Field(default="var/checkouts")
llm_enabled: bool = Field(default=True)
llm_provider: str | None = Field(default=None)
llm_model: str | None = Field(default=None)
embedding_provider: str | None = Field(default=None)
@@ -80,7 +81,7 @@ def get_service(settings: Settings = Depends(get_settings)) -> RegistryService:
store = RegistryStore(database_path)
store.initialize()
llm_extractor = None
if settings.llm_provider:
if settings.llm_enabled and settings.llm_provider:
adapter = create_llm_connect_adapter(
settings.llm_provider,
model=settings.llm_model,
@@ -246,6 +247,8 @@ def create_analysis_run(
repository_id,
source_path=payload.source_path,
use_cached_checkout=payload.use_cached_checkout,
use_llm_assistance=payload.use_llm_assistance,
trusted_auto_approve=payload.trusted_auto_approve,
access_username=payload.access_username,
access_password=payload.access_password,
)

View File

@@ -203,6 +203,8 @@ class EvidenceUpdate(BaseModel):
class AnalysisRunCreate(BaseModel):
source_path: str | None = None
use_cached_checkout: bool = False
use_llm_assistance: bool = True
trusted_auto_approve: bool = False
access_username: str | None = None
access_password: str | None = Field(default=None, repr=False)
@@ -212,6 +214,8 @@ class AnalysisRunCreate(BaseModel):
{},
{"source_path": "/path/to/local/repository"},
{"use_cached_checkout": True},
{"use_llm_assistance": False},
{"trusted_auto_approve": True},
{
"access_username": "git-user",
"access_password": "access-token",

View File

@@ -214,6 +214,8 @@ def render_repository_index(
<label>Username <input name="access_username" autocomplete="username" placeholder="Optional for private HTTP(S) repos"></label>
<label>Password or access token <input name="access_password" type="password" autocomplete="current-password" placeholder="Used for this Git operation only"></label>
<label class="checkbox"><input type="checkbox" name="explore_after_registration" value="1" checked> Explore after registration</label>
<label class="checkbox"><input type="checkbox" name="use_llm_assistance" value="1" checked> Use LLM assistance if configured</label>
<label class="checkbox"><input type="checkbox" name="trusted_auto_approve" value="1"> Trusted auto-populate after analysis</label>
<div class="actions">
<button type="submit">Register</button>
<span data-pending>Registering repository...</span>
@@ -426,6 +428,8 @@ def create_repository_from_form(
access_username: str = Form(""),
access_password: str = Form(""),
explore_after_registration: str | None = Form(None),
use_llm_assistance: str | None = Form(None),
trusted_auto_approve: str | None = Form(None),
service: RegistryService = Depends(get_service),
):
try:
@@ -444,6 +448,8 @@ def create_repository_from_form(
if explore_after_registration:
summary = service.analyze_repository(
repository.id,
use_llm_assistance=bool(use_llm_assistance),
trusted_auto_approve=bool(trusted_auto_approve),
access_username=access_username or None,
access_password=access_password or None,
)
@@ -508,6 +514,8 @@ def repository_detail(
<form class="stack" method="post" action="/ui/repos/{repository_id}/analysis-runs">
<label>Override source path <input name="source_path" placeholder="Optional local path"></label>
<label class="checkbox"><input type="checkbox" name="use_cached_checkout" value="1"> Analyze cached checkout without fetching upstream</label>
<label class="checkbox"><input type="checkbox" name="use_llm_assistance" value="1" checked> Use LLM assistance if configured</label>
<label class="checkbox"><input type="checkbox" name="trusted_auto_approve" value="1"> Trusted auto-populate after analysis</label>
<label>Username <input name="access_username" autocomplete="username" placeholder="Optional for private HTTP(S) repos"></label>
<label>Password or access token <input name="access_password" type="password" autocomplete="current-password" placeholder="Used for this Git operation only"></label>
<div class="actions">
@@ -837,6 +845,8 @@ def create_analysis_run_from_form(
repository_id: int,
source_path: str = Form(""),
use_cached_checkout: str | None = Form(None),
use_llm_assistance: str | None = Form(None),
trusted_auto_approve: str | None = Form(None),
access_username: str = Form(""),
access_password: str = Form(""),
service: RegistryService = Depends(get_service),
@@ -845,6 +855,8 @@ def create_analysis_run_from_form(
repository_id,
source_path=source_path or None,
use_cached_checkout=bool(use_cached_checkout),
use_llm_assistance=bool(use_llm_assistance),
trusted_auto_approve=bool(trusted_auto_approve),
access_username=access_username or None,
access_password=access_password or None,
)

View File

@@ -604,6 +604,43 @@ def test_analyze_repository_can_use_optional_llm_extractor(tmp_path):
assert "1 candidate ability" in decisions[0].notes
def test_analyze_repository_can_disable_optional_llm_extractor(tmp_path):
source = tmp_path / "repo"
source.mkdir()
(source / "README.md").write_text(
"# Email Router\nRoutes incoming customer email.\n",
encoding="utf-8",
)
store = RegistryStore(tmp_path / "registry.sqlite3")
store.initialize()
extractor = FakeLLMExtractor(
[
ExtractedAbility(
name="Business Email Routing",
description="Route incoming messages.",
source_paths=["README.md"],
)
]
)
service = RegistryService(
store,
ingestion=GitIngestionService(tmp_path / "checkouts"),
llm_extractor=extractor,
)
repository = service.register_repository(name="Email Router", url=str(source))
summary = service.analyze_repository(
repository.id,
use_llm_assistance=False,
)
graph = service.candidate_graph(repository.id, summary.analysis_run.id)
decisions = service.list_review_decisions(repository.id, summary.analysis_run.id)
assert extractor.calls == []
assert graph.abilities[0].name == "Route Incoming Customer Email"
assert all(decision.action != "llm_extraction_used" for decision in decisions)
def test_analyze_repository_falls_back_when_optional_llm_extractor_returns_no_candidates(tmp_path):
source = tmp_path / "repo"
source.mkdir()
@@ -623,6 +660,40 @@ def test_analyze_repository_falls_back_when_optional_llm_extractor_returns_no_ca
assert graph.abilities[0].name == "Support Fallback"
def test_analyze_repository_can_trusted_auto_approve_candidates(tmp_path):
source = tmp_path / "repo"
source.mkdir()
(source / "README.md").write_text(
"# Auto Approved\nReports health over HTTP.\n",
encoding="utf-8",
)
(source / "app.py").write_text(
"from fastapi import FastAPI\n"
"app = FastAPI()\n"
'@app.get("/health")\n'
"def health():\n"
" return {}\n",
encoding="utf-8",
)
service = make_service(tmp_path)
repository = service.register_repository(name="Auto Approved", url=str(source))
summary = service.analyze_repository(
repository.id,
trusted_auto_approve=True,
use_llm_assistance=False,
)
ability_map = service.ability_map(repository.id)
graph = service.candidate_graph(repository.id, summary.analysis_run.id)
decisions = service.list_review_decisions(repository.id, summary.analysis_run.id)
assert service.get_repository(repository.id).status == "indexed"
assert graph.abilities[0].status == "approved"
assert ability_map.abilities[0].name == "Report Health Over HTTP"
assert decisions[0].action == "trusted_auto_approve_candidate_graph"
assert "deterministic candidate generation" in decisions[0].notes
def test_analyze_repository_records_llm_failure_and_falls_back(tmp_path):
source = tmp_path / "repo"
source.mkdir()

View File

@@ -1087,6 +1087,8 @@ def test_ui_register_analyze_and_approve_loop(tmp_path):
assert "Registering repository..." in index_response.text
assert "Password or access token" in index_response.text
assert "Explore after registration" in index_response.text
assert "Use LLM assistance if configured" in index_response.text
assert "Trusted auto-populate after analysis" in index_response.text
create_response = client.post(
"/ui/repos",
@@ -1096,6 +1098,7 @@ def test_ui_register_analyze_and_approve_loop(tmp_path):
"access_username": "",
"access_password": "",
"explore_after_registration": "",
"use_llm_assistance": "1",
},
follow_redirects=False,
)
@@ -1108,6 +1111,8 @@ def test_ui_register_analyze_and_approve_loop(tmp_path):
assert "Run Analysis" in detail_response.text
assert "Running analysis..." in detail_response.text
assert "Analyze cached checkout without fetching upstream" in detail_response.text
assert "Use LLM assistance if configured" in detail_response.text
assert "Trusted auto-populate after analysis" in detail_response.text
assert "Repository Metadata" in detail_response.text
edit_repository_response = client.post(
@@ -1128,6 +1133,7 @@ def test_ui_register_analyze_and_approve_loop(tmp_path):
f"{repository_path}/analysis-runs",
data={
"source_path": "",
"use_llm_assistance": "1",
"access_username": "",
"access_password": "",
},
@@ -1266,7 +1272,7 @@ def test_ui_register_analyze_and_approve_loop(tmp_path):
)
second_run_response = client.post(
f"{repository_path}/analysis-runs",
data={"source_path": ""},
data={"source_path": "", "use_llm_assistance": "1"},
follow_redirects=False,
)
assert second_run_response.status_code == 303
@@ -1344,6 +1350,8 @@ def test_ui_register_and_explore_lands_on_analysis_result(tmp_path):
"access_username": "",
"access_password": "",
"explore_after_registration": "1",
"use_llm_assistance": "",
"trusted_auto_approve": "1",
},
follow_redirects=False,
)
@@ -1354,7 +1362,13 @@ def test_ui_register_and_explore_lands_on_analysis_result(tmp_path):
result = client.get(response.headers["location"])
assert result.status_code == 200
assert "Candidate Graph" in result.text
assert "approved" in result.text
assert "Observed Facts" in result.text
assert "trusted_auto_approve_candidate_graph" in result.text
repository_detail = client.get("/ui/repos/1")
assert repository_detail.status_code == 200
assert "Use Approved Registry" in repository_detail.text
finally:
app.dependency_overrides.clear()

View File

@@ -8,7 +8,7 @@ status: active
owner: codex
topic_slug: foerster-capabilities
created: "2026-04-26"
updated: "2026-04-28"
updated: "2026-04-29"
state_hub_workstream_id: "c121d462-f2e4-45d3-9d2d-9c04a3556953"
---
@@ -102,7 +102,7 @@ export.
```task
id: RREG-WP-0003-T04
status: todo
status: done
priority: medium
state_hub_task_id: "076385fe-4dbf-4aca-b89f-c7372d9eebd9"
```
@@ -131,3 +131,65 @@ that produce only weak candidates.
Acceptance: trying the product on repo-registry itself feels understandable and
useful even when a scan finds gaps or weak evidence.
## P1: Expectation Gap Feedback Loop
```task
id: RREG-WP-0003-T07
status: todo
priority: medium
```
Capture the gap between what a curator expected to see and what deterministic
analysis actually produced. Treat these gaps as first-class scanner optimization
inputs: a user should be able to record missing expected abilities, capabilities,
features, facts, or classifications for an analyzed repository. The system should
preserve the source of the expectation (`human`, `llm-assisted`, or `comparison`)
and link it to the analysis run that missed it.
Acceptance: after inspecting a repository such as `llm-connect`, a curator can
record that expected concepts like `OpenRouter provider support`, `Claude model
usage`, or `provider fallback policy` were missing. The gap is visible from the
repository/review UI and can be used to create deterministic scanner regression
fixtures.
## P1: Provider-Aware Deterministic Scanning
```task
id: RREG-WP-0003-T08
status: todo
priority: medium
```
Extend deterministic scanning and content indexing to identify provider and
integration concepts that generic language/framework/file facts miss. Initial
targets are LLM infrastructure repositories: OpenRouter, Anthropic/Claude,
OpenAI, Gemini, model-provider registries, credential environment variables,
adapter classes, routing rules, and fallback policies. These should appear as
source-linked facts and map into useful candidate capabilities/features without
requiring LLM assistance.
Acceptance: analyzing `llm-connect` with LLM assistance disabled can surface
source-linked facts and candidate graph entries for OpenRouter support, Claude or
Anthropic support where present, provider configuration/credentials, and any
explicit model fallback behavior found in code, docs, or config.
## P1: Scanner Coevolution Regression Harness
```task
id: RREG-WP-0003-T09
status: todo
priority: medium
```
Create a repeatable improvement loop where reviewed expectation gaps become
fixtures and tests. For each trial repository, store a small expectation profile
that lists important concepts the deterministic scanner should eventually detect.
Compare deterministic outputs against optional LLM-assisted or human-curated
expectations, then promote confirmed misses into scanner/candidate-generator
regression tests.
Acceptance: the repository has at least one expectation fixture for an LLM
infrastructure repo and a test that fails if deterministic analysis stops
surfacing expected provider concepts. The workflow remains LLM-optional: LLMs may
suggest expectations, but deterministic tests encode the accepted learning.