markitect-main/markitect/infospace/cli.py

"""
CLI commands for infospace lifecycle management.

Provides ``markitect infospace`` subcommands for initialising,
inspecting, and evaluating infospaces.
"""

from __future__ import annotations

import re
from pathlib import Path
from typing import Dict, Optional

import click

from markitect.infospace.config import (
    DisciplineBinding,
    InfospaceConfig,
    SchemaRegistry,
    TopicConfig,
    find_infospace_config,
    load_infospace_config,
    save_infospace_config,
)
from markitect.infospace.entity_parser import parse_entity_directory
from markitect.infospace.state import build_state


def _load_config_or_exit(config_path: Optional[str] = None) -> tuple:
    """Resolve and load infospace.yaml, or exit with an error."""
    if config_path:
        p = Path(config_path)
    else:
        p = find_infospace_config()
    if p is None:
        click.echo("Error: No infospace.yaml found. Run 'markitect infospace init' first.", err=True)
        raise SystemExit(1)
    cfg = load_infospace_config(p)
    return cfg, p


@click.group(name="infospace")
def infospace_commands():
    """Manage infospaces — create, inspect, evaluate."""
    pass


# ── init ─────────────────────────────────────────────────────────────


@infospace_commands.command()
@click.option("--topic", required=True, help="Topic name for the infospace.")
@click.option("--domain", default="", help="Knowledge domain.")
@click.option("--sources", default="", help="Path to source material directory.")
@click.option("--discipline", multiple=True, help="Discipline name (repeatable).")
@click.option("--output", "-o", default="infospace.yaml", help="Output config file path.")
def init(topic: str, domain: str, sources: str, discipline: tuple, output: str):
    """Initialise a new infospace configuration file."""
    out_path = Path(output)
    if out_path.exists():
        click.echo(f"Error: {out_path} already exists.", err=True)
        raise SystemExit(1)

    disciplines = [DisciplineBinding(name=d) for d in discipline]
    config = InfospaceConfig(
        topic=TopicConfig(name=topic, domain=domain, sources=sources),
        disciplines=disciplines,
    )
    save_infospace_config(config, out_path)
    click.echo(f"Created {out_path}")


# ── status ───────────────────────────────────────────────────────────


@infospace_commands.command()
@click.option("--config", "config_path", default=None, help="Path to infospace.yaml.")
def status(config_path: Optional[str]):
    """Show infospace status — entity count, domains, evaluation state."""
    cfg, cfg_path = _load_config_or_exit(config_path)
    root = cfg_path.parent

    # Parse entities
    entities_dir = root / cfg.entities_dir
    entities = []
    if entities_dir.is_dir():
        entities = parse_entity_directory(entities_dir)

    # Load latest snapshot if available
    snapshot = None
    history_path = root / cfg.metrics_dir / "history.yaml"
    if history_path.is_file():
        from markitect.infospace.evaluation_io import read_history
        history = read_history(history_path)
        if history:
            snapshot = history[-1]

    state = build_state(cfg, entities=entities, snapshot=snapshot)

    click.echo(f"Infospace: {state.topic_name}")
    if cfg.topic.domain:
        click.echo(f"Domain:    {cfg.topic.domain}")
    click.echo(f"Entities:  {state.entity_count}")
    if state.domains:
        click.echo(f"Domains:   {', '.join(state.domains)}")
    if cfg.disciplines:
        names = [d.name for d in cfg.disciplines]
        click.echo(f"Disciplines: {', '.join(names)}")
    if state.has_evaluations:
        click.echo(f"Last evaluated: {state.latest_snapshot.created_at.isoformat()}")
    else:
        click.echo("Evaluations: none")


# ── entities ─────────────────────────────────────────────────────────


@infospace_commands.command()
@click.option("--config", "config_path", default=None, help="Path to infospace.yaml.")
@click.option(
    "--sort-by", "sort_key",
    type=click.Choice(["slug", "domain", "words"]),
    default="slug",
    help="Sort entities by field.",
)
@click.option("--by-type", "by_type", is_flag=True, default=False,
              help="Group entities by L2 entity type.")
def entities(config_path: Optional[str], sort_key: str, by_type: bool):
    """List entities with metadata summary."""
    cfg, cfg_path = _load_config_or_exit(config_path)
    root = cfg_path.parent
    entities_dir = root / cfg.entities_dir

    if not entities_dir.is_dir():
        click.echo("No entities directory found.")
        return

    entity_list = parse_entity_directory(entities_dir)
    if not entity_list:
        click.echo("No entities found.")
        return

    if by_type:
        _entities_by_type(cfg, root, entity_list)
        return

    # Sort
    if sort_key == "domain":
        entity_list.sort(key=lambda e: (e.domain or "", e.slug))
    elif sort_key == "words":
        entity_list.sort(key=lambda e: e.total_word_count, reverse=True)
    else:
        entity_list.sort(key=lambda e: e.slug)

    # Format as table
    click.echo(f"{'Slug':<40} {'Domain':<20} {'Words':>6}")
    click.echo("-" * 68)
    for e in entity_list:
        click.echo(f"{e.slug:<40} {(e.domain or '-'):<20} {e.total_word_count:>6}")
    click.echo(f"\nTotal: {len(entity_list)} entities")


def _entities_by_type(cfg, root: "Path", entity_list: list) -> None:
    """Print entities grouped by L2 entity type."""
    from markitect.infospace.classification import ENTITY_TYPES
    from markitect.infospace.classification_io import read_classifications_directory
    from markitect.infospace.evaluation_io import read_entity_evaluation

    # Load classifications
    cls_dir = root / cfg.classifications_dir
    cls_map: dict = {}
    if cls_dir.is_dir():
        from markitect.infospace.classification_io import read_classifications_directory
        for c in read_classifications_directory(cls_dir):
            cls_map[c.entity_slug] = c

    # Load evaluation scores (best-effort)
    eval_dir = root / cfg.evaluations_dir
    eval_scores: dict = {}  # slug → overall_score
    if eval_dir.is_dir():
        for ef in eval_dir.glob("*.md"):
            try:
                ev = read_entity_evaluation(ef)
                eval_scores[ev.entity_slug] = ev.overall_score
            except Exception:
                pass

    # Build index: entity_type → list of (entity, classification)
    entity_index = {
        t: [] for t in ENTITY_TYPES
    }
    entity_index["Unclassified"] = []

    entity_map = {e.slug: e for e in entity_list}
    for e in entity_list:
        cls = cls_map.get(e.slug)
        if cls is None:
            entity_index["Unclassified"].append((e, None))
        else:
            bucket = cls.entity_type if cls.entity_type in entity_index else "Unclassified"
            entity_index[bucket].append((e, cls))

    # Print each type group
    type_order = list(ENTITY_TYPES) + ["Unclassified"]
    total = 0
    for etype in type_order:
        group = entity_index.get(etype, [])
        if not group:
            continue
        click.echo(f"\n=== {etype} ({len(group)} entities) ===")
        group.sort(key=lambda x: x[0].slug)
        for e, cls in group:
            vsm = cls.vsm_system if cls else ""
            domain = (e.domain or "-")[:18]
            score = eval_scores.get(e.slug)
            score_str = f" \u2605{score:.1f}" if score is not None else ""
            slug_col = f"{e.slug:<40}"
            click.echo(f"  {slug_col} {domain:<18} {vsm:<4}{score_str}")
            if cls and cls.entity_type == "Relation" and cls.links_mechanism:
                subj = cls.links_subject or cls.links_subject_slug or "?"
                obj = cls.links_object or cls.links_object_slug or "?"
                click.echo(f"    \u2192 links: {subj} \u2194 {obj}")
                mech = cls.links_mechanism
                if len(mech) > 80:
                    mech = mech[:77] + "..."
                click.echo(f"    \u2192 mechanism: {mech}")
        total += len(group)

    click.echo(f"\nTotal: {total} entities")


# ── chapters (per-source triage view) ────────────────────────────────


@infospace_commands.command()
@click.option("--config", "config_path", default=None, help="Path to infospace.yaml.")
@click.option(
    "--format", "output_format",
    type=click.Choice(["text", "json"]),
    default="text",
    help="Output format.",
)
def chapters(config_path: Optional[str], output_format: str):
    """List source files in canonical order with per-source stats.

    For each source file in the sources directory, reports entity count,
    mean per-entity score (if evaluated), classification coverage, and
    processing status. Useful for triaging long-text infospaces.
    """
    cfg, cfg_path = _load_config_or_exit(config_path)
    root = cfg_path.parent

    sources_dir = root / cfg.topic.sources if cfg.topic.sources else root
    if not sources_dir.is_dir():
        click.echo(f"No sources directory at {sources_dir}.", err=True)
        raise SystemExit(1)

    source_files = sorted(sources_dir.glob("*.md"))
    if not source_files:
        click.echo(f"No source files in {sources_dir}.", err=True)
        raise SystemExit(1)

    entities_dir = root / cfg.entities_dir
    entity_list = (
        parse_entity_directory(entities_dir) if entities_dir.is_dir() else []
    )

    # Build a source_id → [entities] map using the source_chapter field.
    # Matching is lenient: entities with a source_chapter substring-equal
    # to a normalized form of the source stem count as belonging to it.
    def _chapter_keys(source_id: str) -> list:
        """Return strings an entity's source_chapter might contain."""
        keys = [source_id, source_id.replace("-", " ")]
        m = re.match(r"book-(\d+)-chapter-(\d+)", source_id)
        if m:
            book, chap = m.group(1), m.group(2)
            roman = {"1": "I", "2": "II", "3": "III", "4": "IV", "5": "V"}
            if book in roman:
                keys.append(f"Book {roman[book]}, Chapter {int(chap)}")
                keys.append(f"Book {roman[book]} Chapter {int(chap)}")
        return keys

    # Precompute evaluation scores and classification slugs once.
    evals_dir = root / cfg.evaluations_dir
    cls_dir = root / cfg.classifications_dir
    eval_scores: Dict[str, float] = {}
    if evals_dir.is_dir():
        from markitect.infospace.evaluation_io import read_entity_evaluation
        for ev_path in evals_dir.glob("*.md"):
            try:
                ev = read_entity_evaluation(ev_path)
                if ev.overall_score is not None:
                    eval_scores[ev_path.stem] = ev.overall_score
            except Exception:
                continue
    classified_slugs = (
        {p.stem for p in cls_dir.glob("*.md")} if cls_dir.is_dir() else set()
    )

    rows = []
    for source_file in source_files:
        source_id = source_file.stem
        keys = _chapter_keys(source_id)
        matched = [
            e for e in entity_list
            if any(k.lower() in (e.source_chapter or "").lower() for k in keys)
        ]
        slugs = {e.slug for e in matched}
        evaluated = slugs & set(eval_scores)
        classified = slugs & classified_slugs
        mean = (
            sum(eval_scores[s] for s in evaluated) / len(evaluated)
            if evaluated else None
        )
        rows.append({
            "source_id": source_id,
            "entities": len(matched),
            "evaluated": len(evaluated),
            "classified": len(classified),
            "mean_score": round(mean, 2) if mean is not None else None,
        })

    if output_format == "json":
        import json
        click.echo(json.dumps(rows, indent=2))
        return

    # Text: aligned table.
    headers = ("source", "entities", "evaluated", "classified", "mean_score")
    widths = [
        max(len(h), max((len(str(r[h.replace(' ', '_')])) if h != "source"
                         else len(r["source_id"]))
                        for r in rows)) if rows else len(h)
        for h in headers
    ]
    fmt = "  ".join(f"{{:<{w}}}" for w in widths)
    click.echo(fmt.format(*headers))
    click.echo(fmt.format(*("-" * w for w in widths)))
    for r in rows:
        click.echo(fmt.format(
            r["source_id"],
            r["entities"],
            r["evaluated"],
            r["classified"],
            "-" if r["mean_score"] is None else f"{r['mean_score']:.2f}",
        ))
    totals = {
        "entities": sum(r["entities"] for r in rows),
        "evaluated": sum(r["evaluated"] for r in rows),
        "classified": sum(r["classified"] for r in rows),
    }
    click.echo(
        f"\n{len(rows)} source file(s); "
        f"{totals['entities']} entities, "
        f"{totals['evaluated']} evaluated, "
        f"{totals['classified']} classified."
    )


# ── entity (single lookup) ───────────────────────────────────────────


@infospace_commands.command()
@click.argument("name")
@click.option("--config", "config_path", default=None, help="Path to infospace.yaml.")
def entity(name: str, config_path: Optional[str]):
    """Look up one entity by name, tolerating case / hyphens / underscores.

    Prints slug, source path, domain, chapter, word count, overall score,
    VSM system (if classified), and evaluation-file path.
    """
    cfg, cfg_path = _load_config_or_exit(config_path)
    root = cfg_path.parent
    entities_dir = root / cfg.entities_dir

    if not entities_dir.is_dir():
        click.echo("No entities directory found.", err=True)
        raise SystemExit(1)

    entity_list = parse_entity_directory(entities_dir)
    if not entity_list:
        click.echo("No entities found.", err=True)
        raise SystemExit(1)

    # Normalize: lowercase, underscores.
    def norm(s: str) -> str:
        return s.lower().replace("-", "_").replace(" ", "_")

    target = norm(name)
    by_slug = {e.slug: e for e in entity_list}

    match = by_slug.get(target)
    if match is None:
        # Substring fallback for partial input.
        candidates = [e for e in entity_list if target in norm(e.slug)]
        if len(candidates) == 1:
            match = candidates[0]
        elif len(candidates) > 1:
            click.echo(f"Ambiguous — '{name}' matches multiple entities:", err=True)
            for c in sorted(candidates, key=lambda e: e.slug)[:10]:
                click.echo(f"  {c.slug}", err=True)
            if len(candidates) > 10:
                click.echo(f"  … and {len(candidates) - 10} more", err=True)
            raise SystemExit(1)
        else:
            click.echo(f"No entity matching '{name}'.", err=True)
            near = sorted(
                e.slug for e in entity_list
                if target.split("_", 1)[0] in e.slug
            )[:5]
            if near:
                click.echo(f"  Near matches: {', '.join(near)}", err=True)
            raise SystemExit(1)

    # Load score + classification (best-effort).
    score: Optional[float] = None
    evaluator: Optional[str] = None
    eval_file = root / cfg.evaluations_dir / f"{match.slug}.md"
    if eval_file.is_file():
        try:
            from markitect.infospace.evaluation_io import read_entity_evaluation
            ev = read_entity_evaluation(eval_file)
            score = ev.overall_score
            evaluator = ev.evaluator
        except Exception:
            pass

    vsm: Optional[str] = None
    cls_file = root / cfg.classifications_dir / f"{match.slug}.md"
    if cls_file.is_file():
        try:
            from markitect.infospace.classification_io import read_entity_classification
            cls = read_entity_classification(cls_file)
            vsm = cls.vsm_system
        except Exception:
            pass

    # Output — one field per line so it's easy to grep or pipe.
    click.echo(f"slug:           {match.slug}")
    click.echo(f"source_path:    {match.source_path}")
    click.echo(f"domain:         {match.domain or '-'}")
    click.echo(f"chapter:        {match.source_chapter or '-'}")
    click.echo(f"word_count:     {match.total_word_count}")
    click.echo(f"vsm_system:     {vsm or '-'}")
    if score is not None:
        click.echo(f"overall_score:  {score:.2f}")
        click.echo(f"evaluator:      {evaluator or '-'}")
        click.echo(f"evaluation:     {eval_file}")
    else:
        click.echo("evaluation:     (not yet evaluated)")


# ── evaluate ─────────────────────────────────────────────────────────


@infospace_commands.command()
@click.option("--config", "config_path", default=None, help="Path to infospace.yaml.")
@click.option("--provider", default="openrouter", help="LLM provider (openrouter, openai, etc.).")
@click.option("--model", default=None, help="LLM model name.")
@click.option("--entity", "entity_slug", default=None, help="Evaluate a single entity by slug.")
@click.option("--chapter", default=None, help="Evaluate entities from a specific chapter.")
@click.option("--force", is_flag=True, default=False,
              help="Re-evaluate entities whose evaluation file already exists.")
@click.option("--model-fallback", "model_fallback", default=None,
              help="If the primary model hits a rate limit (429), retry the "
                   "failed entities once with this model. Useful on free tiers "
                   "where models have separate quota buckets (e.g. "
                   "gemini-2.5-flash → gemini-2.5-flash-lite).")
def evaluate(config_path, provider, model, entity_slug, chapter, force, model_fallback):
    """Evaluate entities using LLM-based quality assessment."""
    cfg, cfg_path = _load_config_or_exit(config_path)
    root = cfg_path.parent

    entities_dir = root / cfg.entities_dir
    if not entities_dir.is_dir():
        click.echo("Error: No entities directory found.", err=True)
        raise SystemExit(1)

    entity_list = parse_entity_directory(entities_dir)
    if not entity_list:
        click.echo("No entities to evaluate.")
        return

    # Filter. Accept hyphenated input for --entity by normalizing to the
    # underscore slug format produced by parse_entity_directory.
    if entity_slug:
        normalized = entity_slug.replace("-", "_")
        matches = [e for e in entity_list if e.slug == normalized]
        if not matches:
            # Build a short "did you mean…" list from entities sharing a stem.
            stem = normalized.split("_", 1)[0]
            near = sorted(e.slug for e in entity_list if e.slug.startswith(stem))[:5]
            msg = f"Error: Entity '{entity_slug}' not found."
            if near:
                msg += f" Did you mean: {', '.join(near)} ?"
            click.echo(msg, err=True)
            raise SystemExit(1)
        entity_list = matches
    elif chapter:
        entity_list = [e for e in entity_list if chapter in e.source_chapter]
        if not entity_list:
            click.echo(f"No entities found for chapter '{chapter}'.")
            return

    # Skip entities that already have evaluation files (incremental resume).
    # Applies uniformly to full-pass, --entity, and --chapter runs unless
    # --force is set.
    from markitect.infospace.evaluate import run_entity_evaluation
    output_dir = root / cfg.evaluations_dir
    if not force and output_dir.is_dir():
        existing = {p.stem for p in output_dir.glob("*.md")}
        before = len(entity_list)
        entity_list = [e for e in entity_list if e.slug not in existing]
        skipped = before - len(entity_list)
        if not entity_list:
            click.echo("All selected entities already evaluated. "
                       "Re-run with --force to overwrite.")
            return
        if skipped:
            click.echo(f"Skipping {skipped} already-evaluated entities. "
                       "Use --force to re-evaluate.")

    # Create adapter
    from markitect.llm import create_adapter
    from markitect.prompts.execution.models import RunConfig
    adapter = create_adapter(provider, model=model)
    run_config = RunConfig(model_name=model, temperature=0.3, max_tokens=2000)

    # Progress callback — surface error detail so agents don't have to
    # drop into Python to see whether an ERROR was 429, 503, or auth.
    def on_progress(done, total, result):
        status = result.status.upper()
        if status == "ERROR" and result.error:
            click.echo(f"  [{done}/{total}] {result.key}: ERROR — {result.error}")
        else:
            click.echo(f"  [{done}/{total}] {result.key}: {status}")

    click.echo(f"Evaluating {len(entity_list)} entities via {provider}...")

    summary = run_entity_evaluation(
        config=cfg,
        entities=entity_list,
        adapter=adapter,
        run_config=run_config,
        output_dir=output_dir,
        progress_callback=on_progress,
    )

    # Model fallback: if any entities failed with a rate-limit-looking
    # error and the user opted in with --model-fallback, retry them once
    # with a fresh adapter on the fallback model. Different free-tier
    # models have separate quota buckets, so this often succeeds when
    # the primary is exhausted.
    if model_fallback and summary.failed > 0:
        rate_limited = [
            r for r in summary.results
            if r.status == "error"
            and r.error
            and ("429" in r.error or "rate" in r.error.lower())
        ]
        if rate_limited:
            retry_slugs = {r.key for r in rate_limited}
            retry_entities = [e for e in entity_list if e.slug in retry_slugs]
            click.echo(
                f"\n{len(retry_entities)} rate-limited entities — "
                f"retrying with --model-fallback {model_fallback}..."
            )
            fb_adapter = create_adapter(provider, model=model_fallback)
            fb_run_config = RunConfig(
                model_name=model_fallback, temperature=0.3, max_tokens=2000
            )
            fb_summary = run_entity_evaluation(
                config=cfg,
                entities=retry_entities,
                adapter=fb_adapter,
                run_config=fb_run_config,
                output_dir=output_dir,
                progress_callback=on_progress,
            )
            summary.succeeded += fb_summary.succeeded
            summary.failed = (summary.failed - len(retry_entities)) + fb_summary.failed
            summary.total_prompt_tokens += fb_summary.total_prompt_tokens
            summary.total_completion_tokens += fb_summary.total_completion_tokens

    click.echo(f"\nDone: {summary.succeeded} succeeded, {summary.failed} failed, {summary.skipped} skipped")
    if summary.total_tokens > 0:
        click.echo(f"Tokens used: {summary.total_tokens}")


# ── eval-summary ──────────────────────────────────────────────────────


@infospace_commands.command(name="eval-summary")
@click.option("--config", "config_path", default=None, help="Path to infospace.yaml.")
@click.option("--update-metrics", is_flag=True, default=False,
              help="Merge per_entity_mean into metrics.yaml for viability checks.")
def eval_summary(config_path: Optional[str], update_metrics: bool):
    """Show aggregate statistics from per-entity evaluation files."""
    cfg, cfg_path = _load_config_or_exit(config_path)
    root = cfg_path.parent

    evaluations_dir = root / cfg.evaluations_dir
    if not evaluations_dir.is_dir():
        click.echo("No evaluations directory found. Run 'markitect infospace evaluate' first.")
        return

    from markitect.infospace.evaluation_io import read_entity_evaluation

    eval_files = sorted(evaluations_dir.glob("*.md"))
    if not eval_files:
        click.echo("No evaluation files found.")
        return

    overall_scores: list = []
    dim_scores: dict = {}
    failed: list = []

    for ef in eval_files:
        try:
            ev = read_entity_evaluation(ef)
            overall_scores.append(ev.overall_score)
            for s in ev.scores:
                dim_scores.setdefault(s.name, []).append(s.value)
        except Exception as exc:
            failed.append((ef.stem, str(exc)))

    n = len(overall_scores)
    if n == 0:
        click.echo("No evaluations could be read.")
        return

    mean_overall = sum(overall_scores) / n

    click.echo(f"Evaluation summary — {n} entities evaluated")
    if failed:
        click.echo(f"  (failed to read: {len(failed)})")
    click.echo()
    click.echo(f"  {'Dimension':<30} {'Mean':>6}")
    click.echo("  " + "-" * 38)
    click.echo(f"  {'overall':<30} {mean_overall:>6.3f}")
    for dim, vals in sorted(dim_scores.items()):
        click.echo(f"  {dim:<30} {sum(vals)/len(vals):>6.3f}")

    score_min = min(overall_scores)
    score_max = max(overall_scores)
    click.echo()
    click.echo(f"  Range: {score_min:.2f} – {score_max:.2f}")

    if update_metrics:
        from markitect.infospace.history import read_metrics_file, write_metrics_file
        metrics_file = root / cfg.metrics_dir / "metrics.yaml"
        existing = read_metrics_file(metrics_file)
        existing["per_entity_mean"] = round(mean_overall, 6)
        write_metrics_file(existing, metrics_file)
        click.echo(f"\nUpdated metrics.yaml: per_entity_mean = {mean_overall:.4f}")


# ── relations ─────────────────────────────────────────────────────────


@infospace_commands.command()
@click.option("--config", "config_path", default=None, help="Path to infospace.yaml.")
@click.option("--entity", "entity_slug", default=None,
              help="Show only relations involving this entity slug.")
@click.option("--vsm", "vsm_filter", default=None,
              help="Show only relations whose VSM channel contains this string (e.g. S2, S3).")
@click.option("--loops", "loops_only", is_flag=True, default=False,
              help="Show only feedback loops (cycles in the relation graph).")
@click.option("--stats", "stats_only", is_flag=True, default=False,
              help="Show aggregate statistics only, no individual relations.")
def relations(config_path: Optional[str], entity_slug: Optional[str],
              vsm_filter: Optional[str], loops_only: bool, stats_only: bool):
    """Show the L3 relation graph — triplets, feedback loops, and VSM channels."""
    cfg, cfg_path = _load_config_or_exit(config_path)
    root = cfg_path.parent

    from markitect.infospace.relation_parser import parse_relations_directory

    relations_dir = root / cfg.relations_dir
    if not relations_dir.is_dir():
        click.echo("No relations directory found. Create output/relations/ and add relation files.")
        return

    all_relations = parse_relations_directory(relations_dir)
    if not all_relations:
        click.echo("No relation files found in " + str(relations_dir))
        return

    # Build directed graph for cycle detection
    try:
        import networkx as nx
        G = nx.DiGraph()
        for r in all_relations:
            G.add_edge(r.subject_slug, r.object_slug,
                       predicate=r.predicate,
                       relation_type=r.relation_type,
                       vsm_channel=r.vsm_channel,
                       slug=r.slug)
    except ImportError:
        G = None

    # Find feedback loops
    loops = []
    if G is not None:
        try:
            loops = list(nx.simple_cycles(G))
        except Exception:
            loops = []

    # Stats summary
    import re as _re

    def _vsm_code(channel: str) -> str:
        """Strip parenthetical description, returning just the system code (e.g. 'S3 → S1')."""
        return _re.sub(r'\s*\(.*', '', channel).strip() or channel

    n = len(all_relations)
    vsm_counts: dict = {}
    type_counts: dict = {}
    for r in all_relations:
        vsm_counts[_vsm_code(r.vsm_channel)] = vsm_counts.get(_vsm_code(r.vsm_channel), 0) + 1
        type_counts[r.relation_type] = type_counts.get(r.relation_type, 0) + 1

    click.echo(f"Relation graph — {n} relations")
    if G is not None:
        click.echo(f"  Entities in graph: {G.number_of_nodes()}")
        click.echo(f"  Feedback loops: {len(loops)}")
    click.echo()

    if stats_only:
        click.echo("Relation types:")
        for rt, count in sorted(type_counts.items(), key=lambda x: -x[1]):
            click.echo(f"  {rt:<25} {count:>4}")
        click.echo()
        click.echo("VSM channels:")
        for ch, count in sorted(vsm_counts.items(), key=lambda x: -x[1]):
            click.echo(f"  {ch:<20} {count:>4}")
        return

    # Feedback loops section
    if loops or loops_only:
        if loops:
            click.echo(f"Feedback loops ({len(loops)}):")
            for i, cycle in enumerate(loops, 1):
                click.echo(f"  Loop {i}: {' → '.join(cycle)} → {cycle[0]}")
            click.echo()
        elif loops_only:
            click.echo("No feedback loops detected in current relation set.")
            return

    if loops_only:
        return

    # Filter relations
    filtered = all_relations
    if entity_slug:
        filtered = [r for r in filtered
                    if entity_slug in (r.subject_slug, r.object_slug)]
        if not filtered:
            click.echo(f"No relations found involving '{entity_slug}'.")
            return
    if vsm_filter:
        filtered = [r for r in filtered if vsm_filter in r.vsm_channel]
        if not filtered:
            click.echo(f"No relations with VSM channel containing '{vsm_filter}'.")
            return

    # Display relations
    click.echo(f"{'Subject':<35} {'Predicate':<30} {'Object':<35} {'VSM'}")
    click.echo("-" * 110)
    for r in filtered:
        subj = r.subject[:33] + ".." if len(r.subject) > 35 else r.subject
        obj = r.object[:33] + ".." if len(r.object) > 35 else r.object
        pred = r.predicate[:28] + ".." if len(r.predicate) > 30 else r.predicate
        click.echo(f"{subj:<35} {pred:<30} {obj:<35} {r.vsm_channel}")


# ── classify ─────────────────────────────────────────────────────────


@infospace_commands.command()
@click.option("--config", "config_path", default=None, help="Path to infospace.yaml.")
@click.option("--entity", "entity_slug", default=None,
              help="Classify a single entity by slug.")
@click.option("--provider", default="openrouter",
              help="LLM provider (openrouter, gemini, openai, …).")
@click.option("--model", default=None, help="Model name override.")
@click.option("--rpm", default=0, type=int,
              help="Max requests per minute (0 = unlimited). Use 10 for Gemini free tier.")
def classify(config_path: Optional[str], entity_slug: Optional[str],
             provider: str, model: Optional[str], rpm: int):
    """Classify entities with Entity Type and VSM System (L2)."""
    cfg, cfg_path = _load_config_or_exit(config_path)
    root = cfg_path.parent

    from markitect.infospace.classifier import run_entity_classification
    from markitect.llm import create_adapter
    from markitect.prompts.execution.models import RunConfig

    entity_list = parse_entity_directory(root / cfg.entities_dir)
    if not entity_list:
        click.echo("No entities found in " + str(root / cfg.entities_dir), err=True)
        return

    output_dir = root / cfg.classifications_dir

    if entity_slug:
        entity_list = [e for e in entity_list if e.slug == entity_slug]
        if not entity_list:
            click.echo(f"Entity '{entity_slug}' not found.", err=True)
            return
    else:
        # Incremental skip — entities already classified are omitted
        if output_dir.is_dir():
            done_slugs = {p.stem for p in output_dir.glob("*.md")}
            before = len(entity_list)
            entity_list = [e for e in entity_list if e.slug not in done_slugs]
            skipped = before - len(entity_list)
            if skipped:
                click.echo(f"Skipping {skipped} already-classified entities.")
        if not entity_list:
            click.echo("All entities already classified. Nothing to do.")
            return

    delay = (60.0 / rpm) if rpm > 0 else 0.0
    click.echo(f"Classifying {len(entity_list)} entities …" +
               (f" (rate: {rpm} RPM, {delay:.1f}s delay)" if delay else ""))
    output_dir.mkdir(parents=True, exist_ok=True)

    adapter = create_adapter(provider, model=model)
    run_config = RunConfig(model_name=model, temperature=0.1, max_tokens=2000)

    def _progress(done: int, total: int, result) -> None:
        if result.status == "success":
            click.echo(f"  [{done}/{total}] {result.key}")
        else:
            click.echo(f"  [{done}/{total}] {result.key} — FAILED: {result.error}")

    summary = run_entity_classification(
        config=cfg,
        entities=entity_list,
        adapter=adapter,
        run_config=run_config,
        output_dir=output_dir,
        progress_callback=_progress,
        delay_seconds=delay,
    )
    click.echo(f"\nDone: {summary.succeeded} classified, {summary.failed} failed.")


# ── classify-summary ──────────────────────────────────────────────────


@infospace_commands.command(name="classify-summary")
@click.option("--config", "config_path", default=None, help="Path to infospace.yaml.")
@click.option("--update-metrics", "update_metrics", is_flag=True, default=False,
              help="Write type_distribution metrics to metrics.yaml.")
def classify_summary(config_path: Optional[str], update_metrics: bool):
    """Show type × VSM distribution across all classified entities (L2)."""
    cfg, cfg_path = _load_config_or_exit(config_path)
    root = cfg_path.parent

    from markitect.infospace.classification import ENTITY_TYPES, VSM_SYSTEMS
    from markitect.infospace.classification_io import read_classifications_directory

    cls_dir = root / cfg.classifications_dir
    if not cls_dir.is_dir():
        click.echo("No classifications directory found. Run 'classify' first.")
        return

    all_cls = read_classifications_directory(cls_dir)
    if not all_cls:
        click.echo("No classification files found.")
        return

    n = len(all_cls)
    type_counts: dict = {}
    vsm_counts: dict = {}
    matrix: dict = {}   # (entity_type, vsm_system) → count

    for c in all_cls:
        type_counts[c.entity_type] = type_counts.get(c.entity_type, 0) + 1
        vsm_counts[c.vsm_system] = vsm_counts.get(c.vsm_system, 0) + 1
        key = (c.entity_type, c.vsm_system)
        matrix[key] = matrix.get(key, 0) + 1

    click.echo(f"Classification summary — {n} entities\n")

    click.echo("Entity types:")
    for t, count in sorted(type_counts.items(), key=lambda x: -x[1]):
        pct = 100 * count / n if n else 0.0
        click.echo(f"  {t:<15} {count:>4}  ({pct:.1f}%)")
    click.echo()

    vsm_order = ["S1", "S2", "S3", "S3*", "S4", "S5"]
    click.echo("VSM systems:")
    for v in vsm_order:
        if v in vsm_counts:
            count = vsm_counts[v]
            pct = 100 * count / n if n else 0.0
            click.echo(f"  {v:<6} {count:>4}  ({pct:.1f}%)")
    click.echo()

    # Type × VSM matrix
    header = f"{'':15}" + "".join(f"{v:>7}" for v in vsm_order)
    sep = "-" * (15 + 7 * len(vsm_order))
    click.echo(header)
    click.echo(sep)
    for t in ENTITY_TYPES:
        row = f"{t:<15}"
        for v in vsm_order:
            c = matrix.get((t, v), 0)
            row += f"{c if c else '.':>7}"
        click.echo(row)
    click.echo()

    filled_cells = len(matrix)
    total_cells = len(ENTITY_TYPES) * len(vsm_order)
    click.echo(f"Matrix fill: {filled_cells}/{total_cells} cells occupied")
    click.echo()

    if update_metrics:
        import math
        from markitect.infospace.history import read_metrics_file, write_metrics_file
        metrics_dir = root / cfg.metrics_dir
        metrics_dir.mkdir(parents=True, exist_ok=True)

        # Type entropy
        type_entropy = 0.0
        for count in type_counts.values():
            p = count / n
            if p > 0:
                type_entropy -= p * math.log2(p)

        existing = read_metrics_file(metrics_dir / "metrics.yaml")
        new_metrics = {
            "type_distribution": type_counts,
            "vsm_type_matrix_cells": filled_cells,
            "type_entropy": round(type_entropy, 4),
        }
        merged = {**existing, **new_metrics}
        write_metrics_file(merged, metrics_dir / "metrics.yaml")
        click.echo(
            f"Updated metrics.yaml: type_entropy={type_entropy:.4f}, "
            f"vsm_type_matrix_cells={filled_cells}"
        )


# ── classify-links ────────────────────────────────────────────────────


@infospace_commands.command(name="classify-links")
@click.option("--config", "config_path", default=None, help="Path to infospace.yaml.")
@click.option("--provider", default="openrouter",
              help="LLM provider (openrouter, gemini, openai, …).")
@click.option("--model", default=None, help="Model name override.")
def classify_links(config_path: Optional[str], provider: str, model: Optional[str]):
    """Capture relation endpoint data (subject, object, mechanism) for Relation-type entities."""
    cfg, cfg_path = _load_config_or_exit(config_path)
    root = cfg_path.parent

    from markitect.infospace.classification import ENTITY_TYPES
    from markitect.infospace.classification_io import read_classifications_directory
    from markitect.infospace.classifier import run_relation_link_capture
    from markitect.llm import create_adapter
    from markitect.prompts.execution.models import RunConfig

    cls_dir = root / cfg.classifications_dir
    if not cls_dir.is_dir():
        click.echo("No classifications directory found. Run 'classify' first.", err=True)
        raise SystemExit(1)

    all_cls = read_classifications_directory(cls_dir)
    cls_map = {c.entity_slug: c for c in all_cls}

    # Filter to Relation-type entities that are missing links_mechanism
    relation_slugs = [
        c.entity_slug for c in all_cls
        if c.entity_type == "Relation" and not c.links_mechanism
    ]

    if not relation_slugs:
        click.echo("All Relation-type entities already have endpoint data. Nothing to do.")
        return

    # Load entity metadata for these slugs
    entity_list = parse_entity_directory(root / cfg.entities_dir)
    entity_map = {e.slug: e for e in entity_list}

    relation_entities = [entity_map[s] for s in relation_slugs if s in entity_map]
    missing_from_entities = [s for s in relation_slugs if s not in entity_map]
    if missing_from_entities:
        click.echo(f"Warning: {len(missing_from_entities)} Relation-type slugs not found in "
                   f"entities directory and will be skipped.")

    if not relation_entities:
        click.echo("No Relation-type entities found to enrich.")
        return

    click.echo(f"Capturing relation links for {len(relation_entities)} Relation-type entities …")

    adapter = create_adapter(provider, model=model)
    run_config = RunConfig(model_name=model, temperature=0.1, max_tokens=512)

    def _progress(done: int, total: int, result) -> None:
        if result.status == "success":
            click.echo(f"  [{done}/{total}] {result.key}")
        else:
            click.echo(f"  [{done}/{total}] {result.key} — FAILED: {result.error}")

    summary = run_relation_link_capture(
        config=cfg,
        relation_entities=relation_entities,
        classifications=cls_map,
        adapter=adapter,
        run_config=run_config,
        output_dir=cls_dir,
        progress_callback=_progress,
    )
    click.echo(f"\nDone: {summary.succeeded} enriched, {summary.failed} failed.")


# ── viability ────────────────────────────────────────────────────────


@infospace_commands.command()
@click.option("--config", "config_path", default=None, help="Path to infospace.yaml.")
def viability(config_path: Optional[str]):
    """Show viability dashboard — threshold checks and pass/fail."""
    cfg, cfg_path = _load_config_or_exit(config_path)

    if not cfg.viability:
        click.echo("No viability thresholds configured in infospace.yaml.")
        return

    # Try to load latest metrics
    root = cfg_path.parent
    metrics: dict = {}
    metrics_file = root / cfg.metrics_dir / "metrics.yaml"
    if metrics_file.is_file():
        import yaml
        raw = yaml.safe_load(metrics_file.read_text(encoding="utf-8"))
        if isinstance(raw, dict):
            metrics = {k: float(v) for k, v in raw.items() if isinstance(v, (int, float))}

    state = build_state(cfg, metrics=metrics if metrics else None)

    if not state.viability_results:
        click.echo("No metrics available. Run evaluations first.")
        click.echo("\nConfigured thresholds:")
        for name, t in cfg.viability.items():
            bounds = []
            if t.min is not None:
                bounds.append(f"min={t.min}")
            if t.max is not None:
                bounds.append(f"max={t.max}")
            click.echo(f"  {name}: {', '.join(bounds)}")
        return

    click.echo(f"{'Metric':<30} {'Value':>8} {'Threshold':>15} {'Status':>8}")
    click.echo("-" * 63)
    for r in state.viability_results:
        bounds = []
        if r.threshold.min is not None:
            bounds.append(f"min={r.threshold.min}")
        if r.threshold.max is not None:
            bounds.append(f"max={r.threshold.max}")
        status_str = "PASS" if r.passed else "FAIL"
        click.echo(
            f"{r.metric:<30} {r.value:>8.4f} {', '.join(bounds):>15} {status_str:>8}"
        )

    click.echo()
    if state.is_viable:
        click.echo(f"Viable: YES ({state.viability_pass_count}/{state.viability_total_count} thresholds met)")
    else:
        click.echo(f"Viable: NO ({state.viability_pass_count}/{state.viability_total_count} thresholds met)")


# ── check ───────────────────────────────────────────────────────────


@infospace_commands.command()
@click.option("--config", "config_path", default=None, help="Path to infospace.yaml.")
@click.option(
    "--concern", "concerns", multiple=True,
    type=click.Choice(["redundancy", "coverage", "coherence", "consistency", "granularity"]),
    help="Run specific concern(s). Omit to run all five.",
)
@click.option("--json", "as_json", is_flag=True, help="Output results as JSON.")
def check(config_path: Optional[str], concerns: tuple, as_json: bool):
    """Run collection-level quality checks (C1–C5)."""
    cfg, cfg_path = _load_config_or_exit(config_path)
    root = cfg_path.parent

    entities_dir = root / cfg.entities_dir
    if not entities_dir.is_dir():
        click.echo("Error: No entities directory found.", err=True)
        raise SystemExit(1)

    entity_list = parse_entity_directory(entities_dir)
    if not entity_list:
        click.echo("No entities to check.")
        return

    from markitect.infospace.checks import run_all_checks

    checks_list = list(concerns) if concerns else None

    report = run_all_checks(
        entities=entity_list,
        checks=checks_list,
    )

    if as_json:
        import json
        click.echo(json.dumps(report.to_dict(), indent=2))
    else:
        click.echo(f"Collection checks — {len(entity_list)} entities\n")
        d = report.to_dict()
        for concern_name, concern_data in d.items():
            label = concern_data.get("concern", concern_name.upper())
            click.echo(f"  {label} — {concern_name}")
            for k, v in concern_data.items():
                if k == "concern":
                    continue
                click.echo(f"    {k}: {v}")
            click.echo()

    # Show summary metrics
    m = report.metrics()
    if m and not as_json:
        click.echo("Metrics summary:")
        for k, v in sorted(m.items()):
            click.echo(f"  {k}: {v:.4f}")

    # Record to history
    if m:
        from markitect.infospace.history import record_check_results
        snap = record_check_results(report, cfg, root, entity_count=len(entity_list))
        if not as_json:
            click.echo(f"\nRecorded snapshot {snap.snapshot_id}")


# ── history ─────────────────────────────────────────────────────────


@infospace_commands.command()
@click.option("--config", "config_path", default=None, help="Path to infospace.yaml.")
@click.option("--metric", default=None, help="Show trend for a specific metric.")
@click.option("--json", "as_json", is_flag=True, help="Output as JSON.")
def history(config_path: Optional[str], metric: Optional[str], as_json: bool):
    """Show metrics history — snapshots over time."""
    cfg, cfg_path = _load_config_or_exit(config_path)
    root = cfg_path.parent

    from markitect.infospace.history import get_history, metric_trend

    snapshots = get_history(cfg, root)
    if not snapshots:
        click.echo("No history found. Run 'markitect infospace check' first.")
        return

    if metric:
        trend = metric_trend(snapshots, metric)
        if not trend:
            click.echo(f"No data for metric '{metric}'.")
            return
        if as_json:
            import json
            click.echo(json.dumps(trend, indent=2))
        else:
            click.echo(f"Trend: {metric}\n")
            for entry in trend:
                click.echo(f"  {entry['date'][:19]}  {entry['value']:.4f}")
        return

    if as_json:
        import json
        click.echo(json.dumps([s.to_dict() for s in snapshots], indent=2, default=str))
        return

    click.echo(f"History: {len(snapshots)} snapshot(s)\n")
    click.echo(f"{'#':<4} {'Date':<20} {'Entities':>8} {'Metrics':>8}")
    click.echo("-" * 42)
    for i, snap in enumerate(snapshots, 1):
        date_str = snap.created_at.isoformat()[:19]
        n_metrics = len(snap.collection_metrics)
        click.echo(f"{i:<4} {date_str:<20} {snap.entity_count:>8} {n_metrics:>8}")


@infospace_commands.command(name="history-diff")
@click.argument("date_a")
@click.argument("date_b")
@click.option("--config", "config_path", default=None, help="Path to infospace.yaml.")
def history_diff(date_a: str, date_b: str, config_path: Optional[str]):
    """Compare two history snapshots by date (YYYY-MM-DD)."""
    cfg, cfg_path = _load_config_or_exit(config_path)
    root = cfg_path.parent

    from markitect.infospace.history import find_snapshot_by_date, get_history
    from markitect.infospace.evaluation_io import diff_snapshots

    snapshots = get_history(cfg, root)
    if len(snapshots) < 2:
        click.echo("Need at least two snapshots to diff.")
        return

    snap_a = find_snapshot_by_date(snapshots, date_a)
    snap_b = find_snapshot_by_date(snapshots, date_b)

    if snap_a is None:
        click.echo(f"No snapshot found near '{date_a}'.")
        return
    if snap_b is None:
        click.echo(f"No snapshot found near '{date_b}'.")
        return
    if snap_a.snapshot_id == snap_b.snapshot_id:
        click.echo("Both dates resolve to the same snapshot.")
        return

    diff = diff_snapshots(snap_a, snap_b)
    click.echo(diff.summary())


# ── bind-discipline ─────────────────────────────────────────────────


@infospace_commands.command(name="bind-discipline")
@click.argument("discipline_path")
@click.option("--name", required=True, help="Name for the discipline.")
@click.option("--config", "config_path", default=None, help="Path to infospace.yaml.")
def bind_discipline_cmd(discipline_path: str, name: str, config_path: Optional[str]):
    """Bind a discipline infospace to the current infospace."""
    cfg, cfg_path = _load_config_or_exit(config_path)
    root = cfg_path.parent

    from markitect.infospace.composition import bind_discipline

    status = bind_discipline(cfg, name=name, path=discipline_path, root=root)

    if status.error:
        click.echo(f"Error: {status.error}", err=True)
        raise SystemExit(1)

    # Persist updated config
    save_infospace_config(cfg, cfg_path)

    click.echo(f"Bound discipline '{name}' from {discipline_path}")
    click.echo(f"  Entities: {status.entity_count}")
    if status.has_config:
        viable_str = "YES" if status.is_viable else "NO"
        click.echo(f"  Viable: {viable_str}")


# ── disciplines ─────────────────────────────────────────────────────


@infospace_commands.command()
@click.option("--config", "config_path", default=None, help="Path to infospace.yaml.")
def disciplines(config_path: Optional[str]):
    """List bound disciplines and their viability status."""
    cfg, cfg_path = _load_config_or_exit(config_path)
    root = cfg_path.parent

    if not cfg.disciplines:
        click.echo("No disciplines bound.")
        return

    from markitect.infospace.composition import check_discipline_status

    click.echo(f"{'Name':<30} {'Entities':>8} {'Viable':>8} {'Path'}")
    click.echo("-" * 70)
    for binding in cfg.disciplines:
        status = check_discipline_status(binding, root)
        viable_str = "YES" if status.is_viable else ("NO" if status.has_config else "?")
        click.echo(
            f"{status.name:<30} {status.entity_count:>8} {viable_str:>8} {status.path}"
        )
        if status.error:
            click.echo(f"  Error: {status.error}")


# ── process ─────────────────────────────────────────────────────


@infospace_commands.command()
@click.argument("glob_pattern", default=None, required=False)
@click.option("--all", "process_all", is_flag=True, help="Process all source files.")
@click.option("--config", "config_path", default=None, help="Path to infospace.yaml.")
@click.option("--provider", default=None, help="LLM provider (openrouter, openai, etc.).")
@click.option("--model", default=None, help="LLM model name.")
@click.option(
    "--check-after-each",
    is_flag=True,
    help="Run collection checks (C1–C5) after each source file.",
)
@click.option("--no-commit", is_flag=True, help="Skip git commits.")
@click.option(
    "--eval-after-source",
    is_flag=True,
    help="After each source's stages succeed, evaluate just the newly-"
         "added entities so the per-source commit is self-contained.",
)
@click.option(
    "--classify-after-source",
    is_flag=True,
    help="After each source's stages succeed, classify just the newly-"
         "added entities so the per-source commit is self-contained.",
)
def process(
    glob_pattern: Optional[str],
    process_all: bool,
    config_path: Optional[str],
    provider: Optional[str],
    model: Optional[str],
    check_after_each: bool,
    no_commit: bool,
    eval_after_source: bool,
    classify_after_source: bool,
):
    """Process source files through the pipeline defined in infospace.yaml.

    GLOB_PATTERN is matched against the sources directory declared in
    infospace.yaml (default ``*.md``).  Use ``--all`` to process every
    source file.

    \b
    Examples:
      # Process chapters 1-3 from book 1
      markitect infospace process "book-1-chapter-0[1-3].md" --provider openrouter

      # Process all source files and check metrics after each
      markitect infospace process --all --provider openrouter --check-after-each

      # Dry run — load existing outputs only, no LLM calls
      markitect infospace process --all
    """
    cfg, cfg_path = _load_config_or_exit(config_path)
    root = cfg_path.parent

    if not cfg.pipeline or not cfg.pipeline.stages:
        click.echo(
            "Error: No pipeline stages defined in infospace.yaml.\n"
            "Add a 'pipeline.stages' section with at least one stage.",
            err=True,
        )
        raise SystemExit(1)

    # Resolve sources directory
    sources_dir = root / cfg.topic.sources if cfg.topic.sources else root
    if not sources_dir.is_dir():
        click.echo(
            f"Error: Sources directory not found: {sources_dir}\n"
            f"Set 'topic.sources' in infospace.yaml.",
            err=True,
        )
        raise SystemExit(1)

    # Collect source files
    if process_all:
        source_files = sorted(sources_dir.glob("*.md"))
    else:
        pattern = glob_pattern or "*.md"
        source_files = sorted(sources_dir.glob(pattern))

    if not source_files:
        if process_all:
            click.echo(f"No source files found in {sources_dir}")
        else:
            click.echo(
                f"No files matched: {glob_pattern or '*.md'}\n"
                f"Sources directory: {sources_dir}"
            )
        return

    click.echo(f"Found {len(source_files)} source file(s) in {sources_dir.name}/")

    # Create LLM adapter
    adapter = None
    if provider:
        from markitect.llm import create_adapter

        _PROVIDER_DEFAULTS = {"openrouter": "arcee-ai/trinity-large-preview:free"}
        resolved_model = model or _PROVIDER_DEFAULTS.get(provider)
        adapter = create_adapter(provider, model=resolved_model)
        click.echo(f"LLM: {provider} ({resolved_model or 'default'})")
    else:
        click.echo("No LLM provider — will use existing outputs only (manual mode).")

    # Run pipeline
    from markitect.infospace.pipeline import SourcePipeline

    if (eval_after_source or classify_after_source) and adapter is None:
        click.echo(
            "Error: --eval-after-source / --classify-after-source require "
            "--provider (they call the LLM).",
            err=True,
        )
        raise SystemExit(1)

    pipeline = SourcePipeline(
        cfg, root,
        adapter=adapter,
        provider=provider or "",
        model=(model or _PROVIDER_DEFAULTS.get(provider or "", "")) if provider else "",
        no_commit=no_commit,
        eval_after_source=eval_after_source,
        classify_after_source=classify_after_source,
    )

    total = len(source_files)
    completed = 0
    for i, source_file in enumerate(source_files, 1):
        click.echo(f"\n[{i}/{total}] {source_file.name}")
        success = pipeline.process_source(source_file)
        if success:
            completed += 1
            if check_after_each:
                pipeline.run_collection_check()

    click.echo(f"\nDone: {completed}/{total} source file(s) fully processed.")


# ── stale-mappings ──────────────────────────────────────────────────


@infospace_commands.command(name="stale-mappings")
@click.option("--config", "config_path", default=None, help="Path to infospace.yaml.")
def stale_mappings(config_path: Optional[str]):
    """Check for stale mappings due to discipline changes."""
    cfg, cfg_path = _load_config_or_exit(config_path)
    root = cfg_path.parent

    if not cfg.disciplines:
        click.echo("No disciplines bound — no mappings to check.")
        return

    from markitect.infospace.composition import find_stale_mappings

    # Try to load mapping references from output
    mapping_refs = _load_mapping_references(cfg, root)

    stale = find_stale_mappings(cfg, root, mapping_references=mapping_refs)

    if not stale:
        click.echo("No stale mappings detected.")
        return

    click.echo(f"Found {len(stale)} stale mapping(s):\n")
    for s in stale:
        click.echo(f"  {s.entity_slug} -> {s.discipline_entity}")
        click.echo(f"    {s.reason}")


# ── graph ──────────────────────────────────────────────────────────────────


@infospace_commands.command()
@click.option("--config", "config_path", default=None, help="Path to infospace.yaml.")
@click.option(
    "--format", "output_format",
    type=click.Choice(["mermaid", "dot"]),
    default="mermaid",
    show_default=True,
    help="Output format.",
)
@click.option(
    "--color-by",
    type=click.Choice(["type", "vsm"]),
    default="type",
    show_default=True,
    help="Color nodes by entity type or VSM system.",
)
@click.option("--type", "filter_type", default=None,
              help="Show only entities with this entity type (e.g. Relation, Process).")
@click.option("--vsm", "filter_vsm", default=None,
              help="Show only entities with this VSM system (e.g. S1, S3).")
@click.option("--entity", "filter_entity", default=None,
              help="Show neighborhood of a specific entity slug.")
@click.option("--loops", "loops_only", is_flag=True, default=False,
              help="Show only the feedback loop subgraph.")
@click.option("--output", "-o", default=None,
              help="Write to file instead of stdout.")
@click.option("--classified-only/--all-entities", "classified_only",
              default=True, show_default=True,
              help="Only include classified entities (default: true).")
def graph(
    config_path: Optional[str],
    output_format: str,
    color_by: str,
    filter_type: Optional[str],
    filter_vsm: Optional[str],
    filter_entity: Optional[str],
    loops_only: bool,
    output: Optional[str],
    classified_only: bool,
):
    """Render the entity-relation graph as Mermaid or DOT."""
    cfg, cfg_path = _load_config_or_exit(config_path)
    root = cfg_path.parent

    from markitect.infospace.classification_io import read_classifications_directory
    from markitect.infospace.relation_parser import parse_relations_directory
    from markitect.infospace.graph_export import (
        apply_filters,
        build_entity_graph,
        to_dot,
        to_mermaid,
    )

    # Load classifications
    cls_dir = root / cfg.classifications_dir
    classifications = []
    if cls_dir.is_dir():
        classifications = read_classifications_directory(cls_dir)

    classified_slugs = {c.entity_slug for c in classifications}

    # Load relations
    relations_dir = root / cfg.relations_dir
    relations = []
    if relations_dir.is_dir():
        relations = parse_relations_directory(relations_dir)

    if not classifications and not relations:
        click.echo("No classifications or relations found. Run 'classify' and add relation files.")
        return

    # Detect feedback loops via networkx
    feedback_cycles = []
    if relations:
        try:
            import networkx as nx
            G = nx.DiGraph()
            for r in relations:
                G.add_edge(r.subject_slug, r.object_slug)
            feedback_cycles = list(nx.simple_cycles(G))
        except ImportError:
            pass

    # Build graph
    g = build_entity_graph(classifications, relations, feedback_cycles)

    # Apply filters
    filtered = apply_filters(
        g,
        filter_type=filter_type,
        filter_vsm=filter_vsm,
        filter_entity=filter_entity,
        loops_only=loops_only,
        classified_only=classified_only,
        classified_slugs=classified_slugs,
    )

    if not filtered.nodes:
        click.echo("No nodes match the given filters.")
        return

    # Export
    if output_format == "dot":
        result = to_dot(filtered, color_by=color_by)
    else:
        result = to_mermaid(filtered, color_by=color_by)

    if output:
        out_path = Path(output)
        out_path.write_text(result, encoding="utf-8")
        click.echo(
            f"Wrote {output_format} graph ({len(filtered.nodes)} nodes, "
            f"{sum(len(v) for v in filtered.edges.values())} edges) to {out_path}"
        )
    else:
        click.echo(result, nl=False)


def _load_mapping_references(
    cfg: InfospaceConfig, root: Path
) -> Optional[dict]:
    """Try to load mapping references from YAML file in output dir."""
    mapping_file = root / cfg.metrics_dir / "mapping-references.yaml"
    if not mapping_file.is_file():
        return None
    import yaml
    data = yaml.safe_load(mapping_file.read_text(encoding="utf-8"))
    if isinstance(data, dict):
        return data
    return None