infospace-bench/examples/routing/trading-literature.yaml

# Example routing config for a trading-literature Lefevre-style run.
#
# Captures the IB-WP-0018 task-type taxonomy from docs/routing-task-types.md:
#   summarize-source  → cheap model (volume-heavy, recoverable downstream)
#   extract-entities  → smart model (durable output; be strict)
#   extract-relations → smart model (depends on entities)
#   evaluate-entity   → judge model (different family from extraction)
#   synthesize-report → smart model (volume-of-one, quality matters, cheap)
#
# Quality floors are the recommended starting points from
# docs/routing-task-types.md. With a ledger configured, AdaptiveRoutingPolicy
# will pick the cheapest *qualifying* adapter per task type as observations
# accumulate; until then it falls back to the static prefer/fallback order.
#
# Refresh the model rates in src/infospace_bench/model_rates.yaml before any
# full-book run — list prices drift, and the rough USD estimate in the budget
# log depends on them.

schema_version: 1

# Workspace-relative ledger so QualityLedger observations from this workspace
# stay with this workspace. Drop this line to run pure static routing.
ledger_path: output/routing/quality.jsonl

# Floors apply when --quality-floor is not passed at the call site. The CLI
# flag wins, then the per-task quality_floor below, then this default.
default_quality_floor: 0.80

stage_to_task_type:
  summarize-source: cheap
  extract-entities: smart
  extract-relations: smart
  evaluate-entity: judge
  synthesize-report: smart

task_types:

  cheap:
    quality_floor: 0.70
    candidates:
      - id: openrouter:gpt-4o-mini
        provider: openrouter
        model: openai/gpt-4o-mini
        api_key_env: OPENROUTER_API_KEY
        max_cost_per_1k: 0.001
      - id: openrouter:claude-3.5-haiku
        provider: openrouter
        model: anthropic/claude-3.5-haiku
        api_key_env: OPENROUTER_API_KEY
        max_cost_per_1k: 0.003

  smart:
    quality_floor: 0.85
    candidates:
      - id: openrouter:claude-3.5-haiku
        provider: openrouter
        model: anthropic/claude-3.5-haiku
        api_key_env: OPENROUTER_API_KEY
      - id: openrouter:claude-3.5-sonnet
        provider: openrouter
        model: anthropic/claude-3.5-sonnet
        api_key_env: OPENROUTER_API_KEY

  judge:
    quality_floor: 0.80
    candidates:
      # Evaluation goes through a different family than extraction to limit
      # self-preference bias.
      - id: openrouter:gpt-4o-mini
        provider: openrouter
        model: openai/gpt-4o-mini
        api_key_env: OPENROUTER_API_KEY

  # Baseline is wired here so a follow-up T05 ShadowingAdapter step can
  # reference `claude-code` as the grading oracle without editing the
  # task_types stanza.
  baseline:
    candidates:
      - id: claude-code
        provider: claude_code
        model: claude-opus-4-7