feat(infospace): add per-entity evaluation pipeline and CLI command (S2.3)

Evaluation pipeline builds prompts from entity metadata, delegates to BatchEvaluator, parses structured LLM responses into ScoreEntry objects, and writes evaluation files. CLI: 'markitect infospace evaluate' with --provider, --entity, --chapter filters. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-19 01:48:34 +01:00
parent 3726503adb
commit 3461d2f354
3 changed files with 504 additions and 0 deletions
--- a/markitect/infospace/cli.py
+++ b/markitect/infospace/cli.py
@@ -153,6 +153,71 @@ def entities(config_path: Optional[str], sort_key: str):
    click.echo(f"\nTotal: {len(entity_list)} entities")


+# ── evaluate ─────────────────────────────────────────────────────────
+
+
+@infospace_commands.command()
+@click.option("--config", "config_path", default=None, help="Path to infospace.yaml.")
+@click.option("--provider", default="openrouter", help="LLM provider (openrouter, openai, etc.).")
+@click.option("--model", default=None, help="LLM model name.")
+@click.option("--entity", "entity_slug", default=None, help="Evaluate a single entity by slug.")
+@click.option("--chapter", default=None, help="Evaluate entities from a specific chapter.")
+def evaluate(config_path, provider, model, entity_slug, chapter):
+    """Evaluate entities using LLM-based quality assessment."""
+    cfg, cfg_path = _load_config_or_exit(config_path)
+    root = cfg_path.parent
+
+    entities_dir = root / cfg.entities_dir
+    if not entities_dir.is_dir():
+        click.echo("Error: No entities directory found.", err=True)
+        raise SystemExit(1)
+
+    entity_list = parse_entity_directory(entities_dir)
+    if not entity_list:
+        click.echo("No entities to evaluate.")
+        return
+
+    # Filter
+    if entity_slug:
+        entity_list = [e for e in entity_list if e.slug == entity_slug]
+        if not entity_list:
+            click.echo(f"Error: Entity '{entity_slug}' not found.", err=True)
+            raise SystemExit(1)
+    elif chapter:
+        entity_list = [e for e in entity_list if chapter in e.source_chapter]
+        if not entity_list:
+            click.echo(f"No entities found for chapter '{chapter}'.")
+            return
+
+    # Create adapter
+    from markitect.llm import create_adapter
+    from markitect.prompts.execution.models import RunConfig
+    adapter = create_adapter(provider, model=model)
+    run_config = RunConfig(model_name=model or "default", temperature=0.3, max_tokens=2000)
+
+    # Progress callback
+    def on_progress(done, total, result):
+        status = result.status.upper()
+        click.echo(f"  [{done}/{total}] {result.key}: {status}")
+
+    click.echo(f"Evaluating {len(entity_list)} entities via {provider}...")
+
+    from markitect.infospace.evaluate import run_entity_evaluation
+    output_dir = root / cfg.evaluations_dir
+    summary = run_entity_evaluation(
+        config=cfg,
+        entities=entity_list,
+        adapter=adapter,
+        run_config=run_config,
+        output_dir=output_dir,
+        progress_callback=on_progress,
+    )
+
+    click.echo(f"\nDone: {summary.succeeded} succeeded, {summary.failed} failed, {summary.skipped} skipped")
+    if summary.total_tokens > 0:
+        click.echo(f"Tokens used: {summary.total_tokens}")
+
+
 # ── viability ────────────────────────────────────────────────────────