feat(infospace): add per-entity evaluation pipeline and CLI command (S2.3)

Evaluation pipeline builds prompts from entity metadata, delegates
to BatchEvaluator, parses structured LLM responses into ScoreEntry
objects, and writes evaluation files. CLI: 'markitect infospace evaluate'
with --provider, --entity, --chapter filters.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-19 01:48:34 +01:00
parent 3726503adb
commit 3461d2f354
3 changed files with 504 additions and 0 deletions

View File

@@ -153,6 +153,71 @@ def entities(config_path: Optional[str], sort_key: str):
click.echo(f"\nTotal: {len(entity_list)} entities")
# ── evaluate ─────────────────────────────────────────────────────────
@infospace_commands.command()
@click.option("--config", "config_path", default=None, help="Path to infospace.yaml.")
@click.option("--provider", default="openrouter", help="LLM provider (openrouter, openai, etc.).")
@click.option("--model", default=None, help="LLM model name.")
@click.option("--entity", "entity_slug", default=None, help="Evaluate a single entity by slug.")
@click.option("--chapter", default=None, help="Evaluate entities from a specific chapter.")
def evaluate(config_path, provider, model, entity_slug, chapter):
"""Evaluate entities using LLM-based quality assessment."""
cfg, cfg_path = _load_config_or_exit(config_path)
root = cfg_path.parent
entities_dir = root / cfg.entities_dir
if not entities_dir.is_dir():
click.echo("Error: No entities directory found.", err=True)
raise SystemExit(1)
entity_list = parse_entity_directory(entities_dir)
if not entity_list:
click.echo("No entities to evaluate.")
return
# Filter
if entity_slug:
entity_list = [e for e in entity_list if e.slug == entity_slug]
if not entity_list:
click.echo(f"Error: Entity '{entity_slug}' not found.", err=True)
raise SystemExit(1)
elif chapter:
entity_list = [e for e in entity_list if chapter in e.source_chapter]
if not entity_list:
click.echo(f"No entities found for chapter '{chapter}'.")
return
# Create adapter
from markitect.llm import create_adapter
from markitect.prompts.execution.models import RunConfig
adapter = create_adapter(provider, model=model)
run_config = RunConfig(model_name=model or "default", temperature=0.3, max_tokens=2000)
# Progress callback
def on_progress(done, total, result):
status = result.status.upper()
click.echo(f" [{done}/{total}] {result.key}: {status}")
click.echo(f"Evaluating {len(entity_list)} entities via {provider}...")
from markitect.infospace.evaluate import run_entity_evaluation
output_dir = root / cfg.evaluations_dir
summary = run_entity_evaluation(
config=cfg,
entities=entity_list,
adapter=adapter,
run_config=run_config,
output_dir=output_dir,
progress_callback=on_progress,
)
click.echo(f"\nDone: {summary.succeeded} succeeded, {summary.failed} failed, {summary.skipped} skipped")
if summary.total_tokens > 0:
click.echo(f"Tokens used: {summary.total_tokens}")
# ── viability ────────────────────────────────────────────────────────