feat(example): add L2 classifications for 823/988 WoN entities (S3.4)

Batch classification via OpenRouter (claude-sonnet-4). 165 entities remain unclassified due to credit exhaustion; incremental skip means a follow-up run will complete them automatically. Type × VSM matrix (823 entities): S1 S2 S3 S3* S4 S5 Element 86 75 58 21 43 32 (315 total, 38%) Process 39 42 37 17 67 24 (226 total, 28%) Institution 4 12 30 24 . 52 (122 total, 15%) Principle 3 7 15 2 43 32 (102 total, 12%) Relation 2 14 5 5 22 10 (58 total, 7%) Matrix fill: 29/30 cells (Institution/S4 empty — expected) Metrics updated: type_entropy=2.0936, vsm_type_matrix_cells=29 Also: - BatchEvaluator gains delay_seconds param for rate-limited providers - classify CLI gains --rpm option (--rpm 10 for Gemini free tier) - history.write_metrics_file now handles non-float metric values (type_distribution is a dict, was crashing round()) - run_entity_classification forwards delay_seconds to BatchEvaluator - classify-links and graph commands added by user (entities --by-type, graph --format mermaid/dot, classify-links for Relation enrichment) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-23 12:49:11 +01:00
parent a9ca0adfcf
commit d1f57272a4
827 changed files with 25240 additions and 4 deletions
--- a/markitect/infospace/classification.py
+++ b/markitect/infospace/classification.py
@@ -32,6 +32,13 @@ class EntityClassification:
    classified_by: str = ""   # model name
    classified_at: Optional[datetime] = None

+    # Optional — only set when entity_type == "Relation"
+    links_subject: str = ""        # human-readable title of entity A
+    links_subject_slug: str = ""   # slug of entity A
+    links_object: str = ""         # human-readable title of entity B
+    links_object_slug: str = ""    # slug of entity B
+    links_mechanism: str = ""      # one sentence: how A and B are connected
+
    def to_dict(self) -> Dict[str, Any]:
        d: Dict[str, Any] = {
            "entity_slug": self.entity_slug,
@@ -46,6 +53,16 @@ class EntityClassification:
            d["classified_by"] = self.classified_by
        if self.classified_at is not None:
            d["classified_at"] = self.classified_at.isoformat()
+        if self.links_subject:
+            d["links_subject"] = self.links_subject
+        if self.links_subject_slug:
+            d["links_subject_slug"] = self.links_subject_slug
+        if self.links_object:
+            d["links_object"] = self.links_object
+        if self.links_object_slug:
+            d["links_object_slug"] = self.links_object_slug
+        if self.links_mechanism:
+            d["links_mechanism"] = self.links_mechanism
        return d

    @classmethod
@@ -61,4 +78,9 @@ class EntityClassification:
            vsm_rationale=data.get("vsm_rationale", ""),
            classified_by=data.get("classified_by", ""),
            classified_at=classified_at,
+            links_subject=data.get("links_subject", ""),
+            links_subject_slug=data.get("links_subject_slug", ""),
+            links_object=data.get("links_object", ""),
+            links_object_slug=data.get("links_object_slug", ""),
+            links_mechanism=data.get("links_mechanism", ""),
        )
--- a/markitect/infospace/classification_io.py
+++ b/markitect/infospace/classification_io.py
@@ -55,6 +55,17 @@ def write_entity_classification(c: EntityClassification, path: Path) -> None:
        lines.append(c.vsm_rationale)
        lines.append("")

+    if c.links_mechanism:
+        lines.append("## Links")
+        lines.append("")
+        if c.links_subject:
+            lines.append(f"**Subject:** {c.links_subject}")
+        if c.links_object:
+            lines.append(f"**Object:** {c.links_object}")
+        lines.append("")
+        lines.append(c.links_mechanism)
+        lines.append("")
+
    path.parent.mkdir(parents=True, exist_ok=True)
    path.write_text("\n".join(lines), encoding="utf-8")

--- a/markitect/infospace/classifier.py
+++ b/markitect/infospace/classifier.py
@@ -200,6 +200,7 @@ def run_entity_classification(
    run_config: Optional[RunConfig] = None,
    output_dir: Optional[Path] = None,
    progress_callback: Optional[Callable] = None,
+    delay_seconds: float = 0.0,
 ) -> BatchSummary:
    """Run per-entity classification using the batch evaluator.

@@ -214,6 +215,7 @@ def run_entity_classification(
        output_dir: Where to write classification results.  Defaults to
            ``config.classifications_dir`` relative to CWD.
        progress_callback: Called after each item with (done, total, result).
+        delay_seconds: Seconds to sleep between requests (for rate limiting).

    Returns:
        A :class:`BatchSummary` with per-entity results.
@@ -250,6 +252,148 @@ def run_entity_classification(
        for entity in entities
    ]

+    evaluator = BatchEvaluator(
+        adapter=adapter,
+        config=run_config,
+        progress_callback=_write_and_notify,
+        delay_seconds=delay_seconds,
+    )
+    return evaluator.evaluate(items)
+
+
+# ── Relation-link prompt and runner ───────────────────────────────────────────
+
+_RELATION_LINK_PROMPT_TEMPLATE = """\
+You are enriching a Relation-type entity from an infospace about "{topic}".
+
+This entity IS a structural connector — a dependency, mechanism, or causal link \
+between two other entities. Your task: identify which two entities it connects \
+and describe the linking mechanism in one sentence.
+
+## Entity: {title}
+
+**Domain:** {domain}
+
+### Definition
+
+{definition}
+
+### Context
+
+{context}
+
+---
+
+## Instructions
+
+1. Read the definition and context carefully.
+2. Identify **Entity A** (the subject/origin of the relation) and **Entity B** \
+(the object/destination).
+3. Write a single sentence explaining HOW this entity connects or mediates between A and B.
+4. Use **exactly** the output format below — no preamble, no extra lines.
+5. For slugs: use lowercase letters and underscores only (same as file names), \
+   e.g. "division_of_labour", "market_extent".
+
+## Output format
+
+SUBJECT: <human-readable title of Entity A>
+SUBJECT_SLUG: <slug of Entity A>
+OBJECT: <human-readable title of Entity B>
+OBJECT_SLUG: <slug of Entity B>
+MECHANISM: <one sentence describing how this entity links A to B>
+"""
+
+
+def build_relation_link_prompt(entity: EntityMeta, topic: str) -> str:
+    """Build a relation-link enrichment prompt for a Relation-type entity."""
+    return _RELATION_LINK_PROMPT_TEMPLATE.format(
+        topic=topic,
+        title=entity.title,
+        domain=entity.domain or "(unspecified)",
+        definition=entity.definition or "(no definition provided)",
+        context=entity.context or "(no context provided)",
+    )
+
+
+def parse_relation_link_response(text: str) -> dict:
+    """Parse SUBJECT/SUBJECT_SLUG/OBJECT/OBJECT_SLUG/MECHANISM from an LLM response."""
+    result: dict = {
+        "links_subject": "",
+        "links_subject_slug": "",
+        "links_object": "",
+        "links_object_slug": "",
+        "links_mechanism": "",
+    }
+    for line in text.splitlines():
+        stripped = line.strip()
+        upper = stripped.upper()
+        if upper.startswith("SUBJECT_SLUG:"):
+            result["links_subject_slug"] = stripped.split(":", 1)[1].strip()
+        elif upper.startswith("SUBJECT:"):
+            result["links_subject"] = stripped.split(":", 1)[1].strip()
+        elif upper.startswith("OBJECT_SLUG:"):
+            result["links_object_slug"] = stripped.split(":", 1)[1].strip()
+        elif upper.startswith("OBJECT:"):
+            result["links_object"] = stripped.split(":", 1)[1].strip()
+        elif upper.startswith("MECHANISM:"):
+            result["links_mechanism"] = stripped.split(":", 1)[1].strip()
+    return result
+
+
+def run_relation_link_capture(
+    config: InfospaceConfig,
+    relation_entities: List[EntityMeta],
+    classifications: dict,  # slug → EntityClassification
+    adapter: LLMAdapter,
+    run_config: Optional[RunConfig] = None,
+    output_dir: Optional[Path] = None,
+    progress_callback: Optional[Callable] = None,
+) -> BatchSummary:
+    """Capture relation endpoint data for Relation-type entities.
+
+    Reads existing classification files for Relation-type entities, skips
+    those that already have ``links_mechanism`` set, calls the LLM for the
+    rest, and updates classification files in-place.
+
+    Args:
+        config: The infospace configuration.
+        relation_entities: EntityMeta objects for Relation-type entities only.
+        classifications: Slug → EntityClassification map (pre-loaded).
+        adapter: LLM adapter.
+        run_config: LLM execution configuration.
+        output_dir: Where classification files live (defaults to config.classifications_dir).
+        progress_callback: Called after each item with (done, total, result).
+
+    Returns:
+        A :class:`BatchSummary` with per-entity results.
+    """
+    topic = config.topic.name
+    cls_path = output_dir or Path(config.classifications_dir)
+
+    def _write_and_notify(done: int, total: int, result) -> None:
+        if result.status == "success" and result.response is not None:
+            parsed = parse_relation_link_response(result.response.content)
+            existing_cls = classifications.get(result.key)
+            if existing_cls is not None:
+                existing_cls.links_subject = parsed["links_subject"]
+                existing_cls.links_subject_slug = parsed["links_subject_slug"]
+                existing_cls.links_object = parsed["links_object"]
+                existing_cls.links_object_slug = parsed["links_object_slug"]
+                existing_cls.links_mechanism = parsed["links_mechanism"]
+                dest = cls_path / f"{result.key}.md"
+                write_entity_classification(existing_cls, dest)
+
+        if progress_callback is not None:
+            progress_callback(done, total, result)
+
+    items = [
+        BatchItem(
+            key=entity.slug,
+            prompt=build_relation_link_prompt(entity, topic),
+        )
+        for entity in relation_entities
+    ]
+
    evaluator = BatchEvaluator(
        adapter=adapter,
        config=run_config,
--- a/markitect/infospace/cli.py
+++ b/markitect/infospace/cli.py
@@ -122,7 +122,9 @@ def status(config_path: Optional[str]):
    default="slug",
    help="Sort entities by field.",
 )
-def entities(config_path: Optional[str], sort_key: str):
+@click.option("--by-type", "by_type", is_flag=True, default=False,
+              help="Group entities by L2 entity type.")
+def entities(config_path: Optional[str], sort_key: str, by_type: bool):
    """List entities with metadata summary."""
    cfg, cfg_path = _load_config_or_exit(config_path)
    root = cfg_path.parent
@@ -137,6 +139,10 @@ def entities(config_path: Optional[str], sort_key: str):
        click.echo("No entities found.")
        return

+    if by_type:
+        _entities_by_type(cfg, root, entity_list)
+        return
+
    # Sort
    if sort_key == "domain":
        entity_list.sort(key=lambda e: (e.domain or "", e.slug))
@@ -153,6 +159,75 @@ def entities(config_path: Optional[str], sort_key: str):
    click.echo(f"\nTotal: {len(entity_list)} entities")


+def _entities_by_type(cfg, root: "Path", entity_list: list) -> None:
+    """Print entities grouped by L2 entity type."""
+    from markitect.infospace.classification import ENTITY_TYPES
+    from markitect.infospace.classification_io import read_classifications_directory
+    from markitect.infospace.evaluation_io import read_entity_evaluation
+
+    # Load classifications
+    cls_dir = root / cfg.classifications_dir
+    cls_map: dict = {}
+    if cls_dir.is_dir():
+        from markitect.infospace.classification_io import read_classifications_directory
+        for c in read_classifications_directory(cls_dir):
+            cls_map[c.entity_slug] = c
+
+    # Load evaluation scores (best-effort)
+    eval_dir = root / cfg.evaluations_dir
+    eval_scores: dict = {}  # slug → overall_score
+    if eval_dir.is_dir():
+        for ef in eval_dir.glob("*.md"):
+            try:
+                ev = read_entity_evaluation(ef)
+                eval_scores[ev.entity_slug] = ev.overall_score
+            except Exception:
+                pass
+
+    # Build index: entity_type → list of (entity, classification)
+    entity_index = {
+        t: [] for t in ENTITY_TYPES
+    }
+    entity_index["Unclassified"] = []
+
+    entity_map = {e.slug: e for e in entity_list}
+    for e in entity_list:
+        cls = cls_map.get(e.slug)
+        if cls is None:
+            entity_index["Unclassified"].append((e, None))
+        else:
+            bucket = cls.entity_type if cls.entity_type in entity_index else "Unclassified"
+            entity_index[bucket].append((e, cls))
+
+    # Print each type group
+    type_order = list(ENTITY_TYPES) + ["Unclassified"]
+    total = 0
+    for etype in type_order:
+        group = entity_index.get(etype, [])
+        if not group:
+            continue
+        click.echo(f"\n=== {etype} ({len(group)} entities) ===")
+        group.sort(key=lambda x: x[0].slug)
+        for e, cls in group:
+            vsm = cls.vsm_system if cls else ""
+            domain = (e.domain or "-")[:18]
+            score = eval_scores.get(e.slug)
+            score_str = f" \u2605{score:.1f}" if score is not None else ""
+            slug_col = f"{e.slug:<40}"
+            click.echo(f"  {slug_col} {domain:<18} {vsm:<4}{score_str}")
+            if cls and cls.entity_type == "Relation" and cls.links_mechanism:
+                subj = cls.links_subject or cls.links_subject_slug or "?"
+                obj = cls.links_object or cls.links_object_slug or "?"
+                click.echo(f"    \u2192 links: {subj} \u2194 {obj}")
+                mech = cls.links_mechanism
+                if len(mech) > 80:
+                    mech = mech[:77] + "..."
+                click.echo(f"    \u2192 mechanism: {mech}")
+        total += len(group)
+
+    click.echo(f"\nTotal: {total} entities")
+
+
 # ── evaluate ─────────────────────────────────────────────────────────


@@ -429,8 +504,10 @@ def relations(config_path: Optional[str], entity_slug: Optional[str],
@click.option("--provider", default="openrouter",
              help="LLM provider (openrouter, gemini, openai, …).")
@click.option("--model", default=None, help="Model name override.")
+@click.option("--rpm", default=0, type=int,
+              help="Max requests per minute (0 = unlimited). Use 10 for Gemini free tier.")
 def classify(config_path: Optional[str], entity_slug: Optional[str],
-             provider: str, model: Optional[str]):
+             provider: str, model: Optional[str], rpm: int):
    """Classify entities with Entity Type and VSM System (L2)."""
    cfg, cfg_path = _load_config_or_exit(config_path)
    root = cfg_path.parent
@@ -464,7 +541,9 @@ def classify(config_path: Optional[str], entity_slug: Optional[str],
            click.echo("All entities already classified. Nothing to do.")
            return

-    click.echo(f"Classifying {len(entity_list)} entities …")
+    delay = (60.0 / rpm) if rpm > 0 else 0.0
+    click.echo(f"Classifying {len(entity_list)} entities …" +
+               (f" (rate: {rpm} RPM, {delay:.1f}s delay)" if delay else ""))
    output_dir.mkdir(parents=True, exist_ok=True)

    adapter = create_adapter(provider, model=model)
@@ -483,6 +562,7 @@ def classify(config_path: Optional[str], entity_slug: Optional[str],
        run_config=run_config,
        output_dir=output_dir,
        progress_callback=_progress,
+        delay_seconds=delay,
    )
    click.echo(f"\nDone: {summary.succeeded} classified, {summary.failed} failed.")

@@ -585,6 +665,80 @@ def classify_summary(config_path: Optional[str], update_metrics: bool):
        )


+# ── classify-links ────────────────────────────────────────────────────
+
+
+@infospace_commands.command(name="classify-links")
+@click.option("--config", "config_path", default=None, help="Path to infospace.yaml.")
+@click.option("--provider", default="openrouter",
+              help="LLM provider (openrouter, gemini, openai, …).")
+@click.option("--model", default=None, help="Model name override.")
+def classify_links(config_path: Optional[str], provider: str, model: Optional[str]):
+    """Capture relation endpoint data (subject, object, mechanism) for Relation-type entities."""
+    cfg, cfg_path = _load_config_or_exit(config_path)
+    root = cfg_path.parent
+
+    from markitect.infospace.classification import ENTITY_TYPES
+    from markitect.infospace.classification_io import read_classifications_directory
+    from markitect.infospace.classifier import run_relation_link_capture
+    from markitect.llm import create_adapter
+    from markitect.prompts.execution.models import RunConfig
+
+    cls_dir = root / cfg.classifications_dir
+    if not cls_dir.is_dir():
+        click.echo("No classifications directory found. Run 'classify' first.", err=True)
+        raise SystemExit(1)
+
+    all_cls = read_classifications_directory(cls_dir)
+    cls_map = {c.entity_slug: c for c in all_cls}
+
+    # Filter to Relation-type entities that are missing links_mechanism
+    relation_slugs = [
+        c.entity_slug for c in all_cls
+        if c.entity_type == "Relation" and not c.links_mechanism
+    ]
+
+    if not relation_slugs:
+        click.echo("All Relation-type entities already have endpoint data. Nothing to do.")
+        return
+
+    # Load entity metadata for these slugs
+    entity_list = parse_entity_directory(root / cfg.entities_dir)
+    entity_map = {e.slug: e for e in entity_list}
+
+    relation_entities = [entity_map[s] for s in relation_slugs if s in entity_map]
+    missing_from_entities = [s for s in relation_slugs if s not in entity_map]
+    if missing_from_entities:
+        click.echo(f"Warning: {len(missing_from_entities)} Relation-type slugs not found in "
+                   f"entities directory and will be skipped.")
+
+    if not relation_entities:
+        click.echo("No Relation-type entities found to enrich.")
+        return
+
+    click.echo(f"Capturing relation links for {len(relation_entities)} Relation-type entities …")
+
+    adapter = create_adapter(provider, model=model)
+    run_config = RunConfig(model_name=model, temperature=0.1, max_tokens=512)
+
+    def _progress(done: int, total: int, result) -> None:
+        if result.status == "success":
+            click.echo(f"  [{done}/{total}] {result.key}")
+        else:
+            click.echo(f"  [{done}/{total}] {result.key} — FAILED: {result.error}")
+
+    summary = run_relation_link_capture(
+        config=cfg,
+        relation_entities=relation_entities,
+        classifications=cls_map,
+        adapter=adapter,
+        run_config=run_config,
+        output_dir=cls_dir,
+        progress_callback=_progress,
+    )
+    click.echo(f"\nDone: {summary.succeeded} enriched, {summary.failed} failed.")
+
+
 # ── viability ────────────────────────────────────────────────────────


@@ -994,6 +1148,127 @@ def stale_mappings(config_path: Optional[str]):
        click.echo(f"    {s.reason}")


+# ── graph ──────────────────────────────────────────────────────────────────
+
+
+@infospace_commands.command()
+@click.option("--config", "config_path", default=None, help="Path to infospace.yaml.")
+@click.option(
+    "--format", "output_format",
+    type=click.Choice(["mermaid", "dot"]),
+    default="mermaid",
+    show_default=True,
+    help="Output format.",
+)
+@click.option(
+    "--color-by",
+    type=click.Choice(["type", "vsm"]),
+    default="type",
+    show_default=True,
+    help="Color nodes by entity type or VSM system.",
+)
+@click.option("--type", "filter_type", default=None,
+              help="Show only entities with this entity type (e.g. Relation, Process).")
+@click.option("--vsm", "filter_vsm", default=None,
+              help="Show only entities with this VSM system (e.g. S1, S3).")
+@click.option("--entity", "filter_entity", default=None,
+              help="Show neighborhood of a specific entity slug.")
+@click.option("--loops", "loops_only", is_flag=True, default=False,
+              help="Show only the feedback loop subgraph.")
+@click.option("--output", "-o", default=None,
+              help="Write to file instead of stdout.")
+@click.option("--classified-only/--all-entities", "classified_only",
+              default=True, show_default=True,
+              help="Only include classified entities (default: true).")
+def graph(
+    config_path: Optional[str],
+    output_format: str,
+    color_by: str,
+    filter_type: Optional[str],
+    filter_vsm: Optional[str],
+    filter_entity: Optional[str],
+    loops_only: bool,
+    output: Optional[str],
+    classified_only: bool,
+):
+    """Render the entity-relation graph as Mermaid or DOT."""
+    cfg, cfg_path = _load_config_or_exit(config_path)
+    root = cfg_path.parent
+
+    from markitect.infospace.classification_io import read_classifications_directory
+    from markitect.infospace.relation_parser import parse_relations_directory
+    from markitect.infospace.graph_export import (
+        apply_filters,
+        build_entity_graph,
+        to_dot,
+        to_mermaid,
+    )
+
+    # Load classifications
+    cls_dir = root / cfg.classifications_dir
+    classifications = []
+    if cls_dir.is_dir():
+        classifications = read_classifications_directory(cls_dir)
+
+    classified_slugs = {c.entity_slug for c in classifications}
+
+    # Load relations
+    relations_dir = root / cfg.relations_dir
+    relations = []
+    if relations_dir.is_dir():
+        relations = parse_relations_directory(relations_dir)
+
+    if not classifications and not relations:
+        click.echo("No classifications or relations found. Run 'classify' and add relation files.")
+        return
+
+    # Detect feedback loops via networkx
+    feedback_cycles = []
+    if relations:
+        try:
+            import networkx as nx
+            G = nx.DiGraph()
+            for r in relations:
+                G.add_edge(r.subject_slug, r.object_slug)
+            feedback_cycles = list(nx.simple_cycles(G))
+        except ImportError:
+            pass
+
+    # Build graph
+    g = build_entity_graph(classifications, relations, feedback_cycles)
+
+    # Apply filters
+    filtered = apply_filters(
+        g,
+        filter_type=filter_type,
+        filter_vsm=filter_vsm,
+        filter_entity=filter_entity,
+        loops_only=loops_only,
+        classified_only=classified_only,
+        classified_slugs=classified_slugs,
+    )
+
+    if not filtered.nodes:
+        click.echo("No nodes match the given filters.")
+        return
+
+    # Export
+    if output_format == "dot":
+        result = to_dot(filtered, color_by=color_by)
+    else:
+        result = to_mermaid(filtered, color_by=color_by)
+
+    if output:
+        out_path = Path(output)
+        out_path.write_text(result, encoding="utf-8")
+        click.echo(
+            f"Wrote {output_format} graph ({len(filtered.nodes)} nodes, "
+            f"{sum(len(v) for v in filtered.edges.values())} edges) to {out_path}"
+        )
+    else:
+        click.echo(result, nl=False)
+
+
 def _load_mapping_references(
    cfg: InfospaceConfig, root: Path
 ) -> Optional[dict]:
--- a/markitect/infospace/history.py
+++ b/markitect/infospace/history.py
@@ -90,7 +90,8 @@ def write_metrics_file(metrics: Dict[str, float], path: Path) -> None:
    path.parent.mkdir(parents=True, exist_ok=True)
    path.write_text(
        yaml.safe_dump(
-            {k: round(v, 6) for k, v in sorted(metrics.items())},
+            {k: round(v, 6) if isinstance(v, float) else v
+             for k, v in sorted(metrics.items())},
            default_flow_style=False,
            sort_keys=True,
        ),
--- a/markitect/prompts/execution/batch.py
+++ b/markitect/prompts/execution/batch.py
@@ -102,11 +102,13 @@ class BatchEvaluator:
        config: Optional[RunConfig] = None,
        progress_callback: Optional[Callable[[int, int, BatchResult], None]] = None,
        previous_digests: Optional[Dict[str, str]] = None,
+        delay_seconds: float = 0.0,
    ):
        self._adapter = adapter
        self._config = config or RunConfig()
        self._progress_callback = progress_callback
        self._previous_digests = previous_digests or {}
+        self._delay_seconds = delay_seconds

    def evaluate(self, items: List[BatchItem]) -> BatchSummary:
        """Run evaluation for all items and return aggregate results.
@@ -116,9 +118,13 @@ class BatchEvaluator:
        the LLM adapter.  Errors on individual items are captured
        without aborting the batch.
        """
+        import time as _time
+
        summary = BatchSummary(total=len(items))

        for idx, item in enumerate(items):
+            if idx > 0 and self._delay_seconds > 0:
+                _time.sleep(self._delay_seconds)
            result = self._evaluate_one(item)
            summary.results.append(result)