Evaluation pipeline builds prompts from entity metadata, delegates to BatchEvaluator, parses structured LLM responses into ScoreEntry objects, and writes evaluation files. CLI: 'markitect infospace evaluate' with --provider, --entity, --chapter filters. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
276 lines
10 KiB
Python
276 lines
10 KiB
Python
"""
|
|
CLI commands for infospace lifecycle management.
|
|
|
|
Provides ``markitect infospace`` subcommands for initialising,
|
|
inspecting, and evaluating infospaces.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
import click
|
|
|
|
from markitect.infospace.config import (
|
|
DisciplineBinding,
|
|
InfospaceConfig,
|
|
SchemaRegistry,
|
|
TopicConfig,
|
|
find_infospace_config,
|
|
load_infospace_config,
|
|
save_infospace_config,
|
|
)
|
|
from markitect.infospace.entity_parser import parse_entity_directory
|
|
from markitect.infospace.state import build_state
|
|
|
|
|
|
def _load_config_or_exit(config_path: Optional[str] = None) -> tuple:
|
|
"""Resolve and load infospace.yaml, or exit with an error."""
|
|
if config_path:
|
|
p = Path(config_path)
|
|
else:
|
|
p = find_infospace_config()
|
|
if p is None:
|
|
click.echo("Error: No infospace.yaml found. Run 'markitect infospace init' first.", err=True)
|
|
raise SystemExit(1)
|
|
cfg = load_infospace_config(p)
|
|
return cfg, p
|
|
|
|
|
|
@click.group(name="infospace")
|
|
def infospace_commands():
|
|
"""Manage infospaces — create, inspect, evaluate."""
|
|
pass
|
|
|
|
|
|
# ── init ─────────────────────────────────────────────────────────────
|
|
|
|
|
|
@infospace_commands.command()
|
|
@click.option("--topic", required=True, help="Topic name for the infospace.")
|
|
@click.option("--domain", default="", help="Knowledge domain.")
|
|
@click.option("--sources", default="", help="Path to source material directory.")
|
|
@click.option("--discipline", multiple=True, help="Discipline name (repeatable).")
|
|
@click.option("--output", "-o", default="infospace.yaml", help="Output config file path.")
|
|
def init(topic: str, domain: str, sources: str, discipline: tuple, output: str):
|
|
"""Initialise a new infospace configuration file."""
|
|
out_path = Path(output)
|
|
if out_path.exists():
|
|
click.echo(f"Error: {out_path} already exists.", err=True)
|
|
raise SystemExit(1)
|
|
|
|
disciplines = [DisciplineBinding(name=d) for d in discipline]
|
|
config = InfospaceConfig(
|
|
topic=TopicConfig(name=topic, domain=domain, sources=sources),
|
|
disciplines=disciplines,
|
|
)
|
|
save_infospace_config(config, out_path)
|
|
click.echo(f"Created {out_path}")
|
|
|
|
|
|
# ── status ───────────────────────────────────────────────────────────
|
|
|
|
|
|
@infospace_commands.command()
|
|
@click.option("--config", "config_path", default=None, help="Path to infospace.yaml.")
|
|
def status(config_path: Optional[str]):
|
|
"""Show infospace status — entity count, domains, evaluation state."""
|
|
cfg, cfg_path = _load_config_or_exit(config_path)
|
|
root = cfg_path.parent
|
|
|
|
# Parse entities
|
|
entities_dir = root / cfg.entities_dir
|
|
entities = []
|
|
if entities_dir.is_dir():
|
|
entities = parse_entity_directory(entities_dir)
|
|
|
|
# Load latest snapshot if available
|
|
snapshot = None
|
|
history_path = root / cfg.metrics_dir / "history.yaml"
|
|
if history_path.is_file():
|
|
from markitect.infospace.evaluation_io import read_history
|
|
history = read_history(history_path)
|
|
if history:
|
|
snapshot = history[-1]
|
|
|
|
state = build_state(cfg, entities=entities, snapshot=snapshot)
|
|
|
|
click.echo(f"Infospace: {state.topic_name}")
|
|
if cfg.topic.domain:
|
|
click.echo(f"Domain: {cfg.topic.domain}")
|
|
click.echo(f"Entities: {state.entity_count}")
|
|
if state.domains:
|
|
click.echo(f"Domains: {', '.join(state.domains)}")
|
|
if cfg.disciplines:
|
|
names = [d.name for d in cfg.disciplines]
|
|
click.echo(f"Disciplines: {', '.join(names)}")
|
|
if state.has_evaluations:
|
|
click.echo(f"Last evaluated: {state.latest_snapshot.created_at.isoformat()}")
|
|
else:
|
|
click.echo("Evaluations: none")
|
|
|
|
|
|
# ── entities ─────────────────────────────────────────────────────────
|
|
|
|
|
|
@infospace_commands.command()
|
|
@click.option("--config", "config_path", default=None, help="Path to infospace.yaml.")
|
|
@click.option(
|
|
"--sort-by", "sort_key",
|
|
type=click.Choice(["slug", "domain", "words"]),
|
|
default="slug",
|
|
help="Sort entities by field.",
|
|
)
|
|
def entities(config_path: Optional[str], sort_key: str):
|
|
"""List entities with metadata summary."""
|
|
cfg, cfg_path = _load_config_or_exit(config_path)
|
|
root = cfg_path.parent
|
|
entities_dir = root / cfg.entities_dir
|
|
|
|
if not entities_dir.is_dir():
|
|
click.echo("No entities directory found.")
|
|
return
|
|
|
|
entity_list = parse_entity_directory(entities_dir)
|
|
if not entity_list:
|
|
click.echo("No entities found.")
|
|
return
|
|
|
|
# Sort
|
|
if sort_key == "domain":
|
|
entity_list.sort(key=lambda e: (e.domain or "", e.slug))
|
|
elif sort_key == "words":
|
|
entity_list.sort(key=lambda e: e.total_word_count, reverse=True)
|
|
else:
|
|
entity_list.sort(key=lambda e: e.slug)
|
|
|
|
# Format as table
|
|
click.echo(f"{'Slug':<40} {'Domain':<20} {'Words':>6}")
|
|
click.echo("-" * 68)
|
|
for e in entity_list:
|
|
click.echo(f"{e.slug:<40} {(e.domain or '-'):<20} {e.total_word_count:>6}")
|
|
click.echo(f"\nTotal: {len(entity_list)} entities")
|
|
|
|
|
|
# ── evaluate ─────────────────────────────────────────────────────────
|
|
|
|
|
|
@infospace_commands.command()
|
|
@click.option("--config", "config_path", default=None, help="Path to infospace.yaml.")
|
|
@click.option("--provider", default="openrouter", help="LLM provider (openrouter, openai, etc.).")
|
|
@click.option("--model", default=None, help="LLM model name.")
|
|
@click.option("--entity", "entity_slug", default=None, help="Evaluate a single entity by slug.")
|
|
@click.option("--chapter", default=None, help="Evaluate entities from a specific chapter.")
|
|
def evaluate(config_path, provider, model, entity_slug, chapter):
|
|
"""Evaluate entities using LLM-based quality assessment."""
|
|
cfg, cfg_path = _load_config_or_exit(config_path)
|
|
root = cfg_path.parent
|
|
|
|
entities_dir = root / cfg.entities_dir
|
|
if not entities_dir.is_dir():
|
|
click.echo("Error: No entities directory found.", err=True)
|
|
raise SystemExit(1)
|
|
|
|
entity_list = parse_entity_directory(entities_dir)
|
|
if not entity_list:
|
|
click.echo("No entities to evaluate.")
|
|
return
|
|
|
|
# Filter
|
|
if entity_slug:
|
|
entity_list = [e for e in entity_list if e.slug == entity_slug]
|
|
if not entity_list:
|
|
click.echo(f"Error: Entity '{entity_slug}' not found.", err=True)
|
|
raise SystemExit(1)
|
|
elif chapter:
|
|
entity_list = [e for e in entity_list if chapter in e.source_chapter]
|
|
if not entity_list:
|
|
click.echo(f"No entities found for chapter '{chapter}'.")
|
|
return
|
|
|
|
# Create adapter
|
|
from markitect.llm import create_adapter
|
|
from markitect.prompts.execution.models import RunConfig
|
|
adapter = create_adapter(provider, model=model)
|
|
run_config = RunConfig(model_name=model or "default", temperature=0.3, max_tokens=2000)
|
|
|
|
# Progress callback
|
|
def on_progress(done, total, result):
|
|
status = result.status.upper()
|
|
click.echo(f" [{done}/{total}] {result.key}: {status}")
|
|
|
|
click.echo(f"Evaluating {len(entity_list)} entities via {provider}...")
|
|
|
|
from markitect.infospace.evaluate import run_entity_evaluation
|
|
output_dir = root / cfg.evaluations_dir
|
|
summary = run_entity_evaluation(
|
|
config=cfg,
|
|
entities=entity_list,
|
|
adapter=adapter,
|
|
run_config=run_config,
|
|
output_dir=output_dir,
|
|
progress_callback=on_progress,
|
|
)
|
|
|
|
click.echo(f"\nDone: {summary.succeeded} succeeded, {summary.failed} failed, {summary.skipped} skipped")
|
|
if summary.total_tokens > 0:
|
|
click.echo(f"Tokens used: {summary.total_tokens}")
|
|
|
|
|
|
# ── viability ────────────────────────────────────────────────────────
|
|
|
|
|
|
@infospace_commands.command()
|
|
@click.option("--config", "config_path", default=None, help="Path to infospace.yaml.")
|
|
def viability(config_path: Optional[str]):
|
|
"""Show viability dashboard — threshold checks and pass/fail."""
|
|
cfg, cfg_path = _load_config_or_exit(config_path)
|
|
|
|
if not cfg.viability:
|
|
click.echo("No viability thresholds configured in infospace.yaml.")
|
|
return
|
|
|
|
# Try to load latest metrics
|
|
root = cfg_path.parent
|
|
metrics: dict = {}
|
|
metrics_file = root / cfg.metrics_dir / "metrics.yaml"
|
|
if metrics_file.is_file():
|
|
import yaml
|
|
raw = yaml.safe_load(metrics_file.read_text(encoding="utf-8"))
|
|
if isinstance(raw, dict):
|
|
metrics = {k: float(v) for k, v in raw.items() if isinstance(v, (int, float))}
|
|
|
|
state = build_state(cfg, metrics=metrics if metrics else None)
|
|
|
|
if not state.viability_results:
|
|
click.echo("No metrics available. Run evaluations first.")
|
|
click.echo("\nConfigured thresholds:")
|
|
for name, t in cfg.viability.items():
|
|
bounds = []
|
|
if t.min is not None:
|
|
bounds.append(f"min={t.min}")
|
|
if t.max is not None:
|
|
bounds.append(f"max={t.max}")
|
|
click.echo(f" {name}: {', '.join(bounds)}")
|
|
return
|
|
|
|
click.echo(f"{'Metric':<30} {'Value':>8} {'Threshold':>15} {'Status':>8}")
|
|
click.echo("-" * 63)
|
|
for r in state.viability_results:
|
|
bounds = []
|
|
if r.threshold.min is not None:
|
|
bounds.append(f"min={r.threshold.min}")
|
|
if r.threshold.max is not None:
|
|
bounds.append(f"max={r.threshold.max}")
|
|
status_str = "PASS" if r.passed else "FAIL"
|
|
click.echo(
|
|
f"{r.metric:<30} {r.value:>8.4f} {', '.join(bounds):>15} {status_str:>8}"
|
|
)
|
|
|
|
click.echo()
|
|
if state.is_viable:
|
|
click.echo(f"Viable: YES ({state.viability_pass_count}/{state.viability_total_count} thresholds met)")
|
|
else:
|
|
click.echo(f"Viable: NO ({state.viability_pass_count}/{state.viability_total_count} thresholds met)")
|