feat(infospace): add per-entity evaluation pipeline and CLI command (S2.3)
Evaluation pipeline builds prompts from entity metadata, delegates to BatchEvaluator, parses structured LLM responses into ScoreEntry objects, and writes evaluation files. CLI: 'markitect infospace evaluate' with --provider, --entity, --chapter filters. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -153,6 +153,71 @@ def entities(config_path: Optional[str], sort_key: str):
|
||||
click.echo(f"\nTotal: {len(entity_list)} entities")
|
||||
|
||||
|
||||
# ── evaluate ─────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@infospace_commands.command()
|
||||
@click.option("--config", "config_path", default=None, help="Path to infospace.yaml.")
|
||||
@click.option("--provider", default="openrouter", help="LLM provider (openrouter, openai, etc.).")
|
||||
@click.option("--model", default=None, help="LLM model name.")
|
||||
@click.option("--entity", "entity_slug", default=None, help="Evaluate a single entity by slug.")
|
||||
@click.option("--chapter", default=None, help="Evaluate entities from a specific chapter.")
|
||||
def evaluate(config_path, provider, model, entity_slug, chapter):
|
||||
"""Evaluate entities using LLM-based quality assessment."""
|
||||
cfg, cfg_path = _load_config_or_exit(config_path)
|
||||
root = cfg_path.parent
|
||||
|
||||
entities_dir = root / cfg.entities_dir
|
||||
if not entities_dir.is_dir():
|
||||
click.echo("Error: No entities directory found.", err=True)
|
||||
raise SystemExit(1)
|
||||
|
||||
entity_list = parse_entity_directory(entities_dir)
|
||||
if not entity_list:
|
||||
click.echo("No entities to evaluate.")
|
||||
return
|
||||
|
||||
# Filter
|
||||
if entity_slug:
|
||||
entity_list = [e for e in entity_list if e.slug == entity_slug]
|
||||
if not entity_list:
|
||||
click.echo(f"Error: Entity '{entity_slug}' not found.", err=True)
|
||||
raise SystemExit(1)
|
||||
elif chapter:
|
||||
entity_list = [e for e in entity_list if chapter in e.source_chapter]
|
||||
if not entity_list:
|
||||
click.echo(f"No entities found for chapter '{chapter}'.")
|
||||
return
|
||||
|
||||
# Create adapter
|
||||
from markitect.llm import create_adapter
|
||||
from markitect.prompts.execution.models import RunConfig
|
||||
adapter = create_adapter(provider, model=model)
|
||||
run_config = RunConfig(model_name=model or "default", temperature=0.3, max_tokens=2000)
|
||||
|
||||
# Progress callback
|
||||
def on_progress(done, total, result):
|
||||
status = result.status.upper()
|
||||
click.echo(f" [{done}/{total}] {result.key}: {status}")
|
||||
|
||||
click.echo(f"Evaluating {len(entity_list)} entities via {provider}...")
|
||||
|
||||
from markitect.infospace.evaluate import run_entity_evaluation
|
||||
output_dir = root / cfg.evaluations_dir
|
||||
summary = run_entity_evaluation(
|
||||
config=cfg,
|
||||
entities=entity_list,
|
||||
adapter=adapter,
|
||||
run_config=run_config,
|
||||
output_dir=output_dir,
|
||||
progress_callback=on_progress,
|
||||
)
|
||||
|
||||
click.echo(f"\nDone: {summary.succeeded} succeeded, {summary.failed} failed, {summary.skipped} skipped")
|
||||
if summary.total_tokens > 0:
|
||||
click.echo(f"Tokens used: {summary.total_tokens}")
|
||||
|
||||
|
||||
# ── viability ────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user