generated from coulomb/repo-seed
IB-WP-0018-T01+T02+T05: routing bridge to llm-connect
T01 — task-type taxonomy. docs/routing-task-types.md names the five generation stages as the default identity-mapped task types (summarize-source, extract-entities, extract-relations, evaluate-entity, synthesize-report) and records the recommended quality floors per stage. The taxonomy explicitly does not decide which adapter ships per task type, where the ledger lives, or what a quality score means — those stay with the caller per the LLM-WP-0004 scope guardrail. T02 — RoutingAssistedGenerationAdapter bridge in src/infospace_bench/routing.py. Wraps any llm-connect RoutingPolicy or AdaptiveRoutingPolicy as an infospace-bench AssistedGenerationAdapter: maps stage_id -> task_type (overridable), resolves an LLMAdapter, delegates execute_prompt with a configurable RunConfig, and surfaces the resolved adapter id, task type, model, usage, and finish_reason back on AssistedGenerationResult.metadata. Provider tag stays back-compatible with the strings already used in run records and the budget rollup (openrouter / claude_code / openai / gemini / mock / routing). T05 — eight tests in tests/test_routing_adapter.py cover: static-policy per-stage resolution, stage_to_task_type overrides, default-mapping completeness, fall-through for unmapped stage ids, the adaptive path selecting the cheaper qualifying adapter when a quality_floor is set, adaptive policy falling back to static when no floor is set, response metadata round-trip with provider tagging, and estimated_cost_per_1k pass-through. Adds llm-connect as a path dependency on pyproject.toml and to the pytest pythonpath. Static OpenRouter and fixture paths are unchanged; this commit only adds the option of routing. 139 tests pass, 1 skipped (the OpenRouter live smoke, gated as before). T03 (shadow-mode integration) and T04 (CLI + per-stage chosen-adapter in the generation report) follow next. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
137
src/infospace_bench/routing.py
Normal file
137
src/infospace_bench/routing.py
Normal file
@@ -0,0 +1,137 @@
|
||||
"""
|
||||
Bridge between infospace-bench's ``AssistedGenerationAdapter`` protocol and
|
||||
llm-connect's ``RoutingPolicy`` / ``AdaptiveRoutingPolicy`` primitives
|
||||
(LLM-WP-0004). Lets a generation run delegate each stage to a task-typed
|
||||
route without touching ``workflow.py``.
|
||||
|
||||
The mapping from infospace-bench workflow stage ids to llm-connect task
|
||||
types is the consumer side of LLM-WP-0004's scope guardrail: llm-connect
|
||||
ships the routing primitives, infospace-bench names the tasks.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
from llm_connect.adapter import LLMAdapter
|
||||
from llm_connect.models import RunConfig
|
||||
from llm_connect.routing import AdaptiveRoutingPolicy, RoutingPolicy
|
||||
|
||||
from .workflow import AssistedGenerationRequest, AssistedGenerationResult
|
||||
|
||||
|
||||
# Default identity mapping: every generation stage shipped by the
|
||||
# generic-source profile is its own task type. Callers can override
|
||||
# individual stages via the ``stage_to_task_type`` field — for example to
|
||||
# collapse ``extract-entities`` and ``extract-relations`` into a single
|
||||
# ``extraction`` route, or to widen ``evaluate-entity`` to ``judge``.
|
||||
STAGE_TO_TASK_TYPE_DEFAULT: dict[str, str] = {
|
||||
"summarize-source": "summarize-source",
|
||||
"extract-entities": "extract-entities",
|
||||
"extract-relations": "extract-relations",
|
||||
"evaluate-entity": "evaluate-entity",
|
||||
"synthesize-report": "synthesize-report",
|
||||
}
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class RoutingAssistedGenerationAdapter:
|
||||
"""Route assisted-generation requests through an llm-connect policy.
|
||||
|
||||
On each ``generate(request)`` call:
|
||||
|
||||
1. Resolves ``task_type`` from ``request.stage_id`` (overridable via
|
||||
``stage_to_task_type``; default falls back to the stage id itself).
|
||||
2. Asks the policy for an adapter. When the policy is an
|
||||
``AdaptiveRoutingPolicy`` and ``quality_floor`` is set, the
|
||||
adaptive path is used; otherwise the policy resolves statically.
|
||||
3. Calls the resolved llm-connect ``LLMAdapter.execute_prompt`` with a
|
||||
``RunConfig`` built from ``default_run_config``.
|
||||
4. Maps the ``LLMResponse`` back to an ``AssistedGenerationResult``
|
||||
and preserves model, usage, finish_reason, and the resolved
|
||||
task_type / adapter_id in ``metadata``.
|
||||
"""
|
||||
|
||||
policy: RoutingPolicy
|
||||
stage_to_task_type: dict[str, str] = field(default_factory=dict)
|
||||
default_run_config: RunConfig = field(default_factory=RunConfig)
|
||||
quality_floor: float | None = None
|
||||
estimated_cost_per_1k: float | None = None
|
||||
|
||||
def generate(
|
||||
self, request: AssistedGenerationRequest
|
||||
) -> AssistedGenerationResult:
|
||||
task_type = self._task_type_for(request.stage_id)
|
||||
adapter = self._resolve(task_type)
|
||||
response = adapter.execute_prompt(request.prompt, self.default_run_config)
|
||||
adapter_id = _identify_adapter(adapter)
|
||||
metadata: dict[str, Any] = {
|
||||
"task_type": task_type,
|
||||
"adapter_id": adapter_id,
|
||||
"model": response.model or self.default_run_config.model_name,
|
||||
"usage": dict(response.usage or {}),
|
||||
"finish_reason": response.finish_reason,
|
||||
}
|
||||
if response.metadata:
|
||||
metadata.update(response.metadata)
|
||||
return AssistedGenerationResult(
|
||||
markdown=response.content,
|
||||
provider=_provider_tag(adapter),
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
def _resolve(self, task_type: str) -> LLMAdapter:
|
||||
if isinstance(self.policy, AdaptiveRoutingPolicy) and self.quality_floor is not None:
|
||||
return self.policy.resolve(
|
||||
task_type,
|
||||
estimated_cost_per_1k=self.estimated_cost_per_1k,
|
||||
quality_floor=self.quality_floor,
|
||||
)
|
||||
return self.policy.resolve(
|
||||
task_type,
|
||||
estimated_cost_per_1k=self.estimated_cost_per_1k,
|
||||
)
|
||||
|
||||
def _task_type_for(self, stage_id: str) -> str:
|
||||
merged = dict(STAGE_TO_TASK_TYPE_DEFAULT)
|
||||
merged.update(self.stage_to_task_type)
|
||||
return merged.get(stage_id, stage_id)
|
||||
|
||||
|
||||
def _identify_adapter(adapter: LLMAdapter) -> str:
|
||||
"""Best-effort stable id for an llm-connect adapter instance.
|
||||
|
||||
Prefers an explicit ``adapter_id`` attribute (some adapters set it),
|
||||
falls back to ``{class_name}:{model_attr}`` when a model attribute is
|
||||
present, otherwise just the class name.
|
||||
"""
|
||||
adapter_id = getattr(adapter, "adapter_id", "")
|
||||
if adapter_id:
|
||||
return str(adapter_id)
|
||||
model = getattr(adapter, "model", "") or getattr(adapter, "model_name", "")
|
||||
name = type(adapter).__name__
|
||||
if model:
|
||||
return f"{name}:{model}"
|
||||
return name
|
||||
|
||||
|
||||
def _provider_tag(adapter: LLMAdapter) -> str:
|
||||
"""Coarse provider tag matching the strings already used in run records.
|
||||
|
||||
Returns ``openrouter`` / ``claude_code`` / ``openai`` / ``gemini`` /
|
||||
``routing`` so existing tooling (budget rollup buckets, archive
|
||||
metadata) keeps its bucket keys stable.
|
||||
"""
|
||||
name = type(adapter).__name__.lower()
|
||||
if "openrouter" in name:
|
||||
return "openrouter"
|
||||
if "claudecode" in name or "claude_code" in name:
|
||||
return "claude_code"
|
||||
if "openai" in name:
|
||||
return "openai"
|
||||
if "gemini" in name:
|
||||
return "gemini"
|
||||
if "mock" in name or "static" in name:
|
||||
return "mock"
|
||||
return "routing"
|
||||
Reference in New Issue
Block a user