feat(example): add L2 classifications for 823/988 WoN entities (S3.4)

Batch classification via OpenRouter (claude-sonnet-4). 165 entities remain unclassified due to credit exhaustion; incremental skip means a follow-up run will complete them automatically. Type × VSM matrix (823 entities): S1 S2 S3 S3* S4 S5 Element 86 75 58 21 43 32 (315 total, 38%) Process 39 42 37 17 67 24 (226 total, 28%) Institution 4 12 30 24 . 52 (122 total, 15%) Principle 3 7 15 2 43 32 (102 total, 12%) Relation 2 14 5 5 22 10 (58 total, 7%) Matrix fill: 29/30 cells (Institution/S4 empty — expected) Metrics updated: type_entropy=2.0936, vsm_type_matrix_cells=29 Also: - BatchEvaluator gains delay_seconds param for rate-limited providers - classify CLI gains --rpm option (--rpm 10 for Gemini free tier) - history.write_metrics_file now handles non-float metric values (type_distribution is a dict, was crashing round()) - run_entity_classification forwards delay_seconds to BatchEvaluator - classify-links and graph commands added by user (entities --by-type, graph --format mermaid/dot, classify-links for Relation enrichment) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-23 12:49:11 +01:00
parent a9ca0adfcf
commit d1f57272a4
827 changed files with 25240 additions and 4 deletions
--- a/markitect/infospace/classifier.py
+++ b/markitect/infospace/classifier.py
@@ -200,6 +200,7 @@ def run_entity_classification(
    run_config: Optional[RunConfig] = None,
    output_dir: Optional[Path] = None,
    progress_callback: Optional[Callable] = None,
+    delay_seconds: float = 0.0,
 ) -> BatchSummary:
    """Run per-entity classification using the batch evaluator.

@@ -214,6 +215,7 @@ def run_entity_classification(
        output_dir: Where to write classification results.  Defaults to
            ``config.classifications_dir`` relative to CWD.
        progress_callback: Called after each item with (done, total, result).
+        delay_seconds: Seconds to sleep between requests (for rate limiting).

    Returns:
        A :class:`BatchSummary` with per-entity results.
@@ -250,6 +252,148 @@ def run_entity_classification(
        for entity in entities
    ]

+    evaluator = BatchEvaluator(
+        adapter=adapter,
+        config=run_config,
+        progress_callback=_write_and_notify,
+        delay_seconds=delay_seconds,
+    )
+    return evaluator.evaluate(items)
+
+
+# ── Relation-link prompt and runner ───────────────────────────────────────────
+
+_RELATION_LINK_PROMPT_TEMPLATE = """\
+You are enriching a Relation-type entity from an infospace about "{topic}".
+
+This entity IS a structural connector — a dependency, mechanism, or causal link \
+between two other entities. Your task: identify which two entities it connects \
+and describe the linking mechanism in one sentence.
+
+## Entity: {title}
+
+**Domain:** {domain}
+
+### Definition
+
+{definition}
+
+### Context
+
+{context}
+
+---
+
+## Instructions
+
+1. Read the definition and context carefully.
+2. Identify **Entity A** (the subject/origin of the relation) and **Entity B** \
+(the object/destination).
+3. Write a single sentence explaining HOW this entity connects or mediates between A and B.
+4. Use **exactly** the output format below — no preamble, no extra lines.
+5. For slugs: use lowercase letters and underscores only (same as file names), \
+   e.g. "division_of_labour", "market_extent".
+
+## Output format
+
+SUBJECT: <human-readable title of Entity A>
+SUBJECT_SLUG: <slug of Entity A>
+OBJECT: <human-readable title of Entity B>
+OBJECT_SLUG: <slug of Entity B>
+MECHANISM: <one sentence describing how this entity links A to B>
+"""
+
+
+def build_relation_link_prompt(entity: EntityMeta, topic: str) -> str:
+    """Build a relation-link enrichment prompt for a Relation-type entity."""
+    return _RELATION_LINK_PROMPT_TEMPLATE.format(
+        topic=topic,
+        title=entity.title,
+        domain=entity.domain or "(unspecified)",
+        definition=entity.definition or "(no definition provided)",
+        context=entity.context or "(no context provided)",
+    )
+
+
+def parse_relation_link_response(text: str) -> dict:
+    """Parse SUBJECT/SUBJECT_SLUG/OBJECT/OBJECT_SLUG/MECHANISM from an LLM response."""
+    result: dict = {
+        "links_subject": "",
+        "links_subject_slug": "",
+        "links_object": "",
+        "links_object_slug": "",
+        "links_mechanism": "",
+    }
+    for line in text.splitlines():
+        stripped = line.strip()
+        upper = stripped.upper()
+        if upper.startswith("SUBJECT_SLUG:"):
+            result["links_subject_slug"] = stripped.split(":", 1)[1].strip()
+        elif upper.startswith("SUBJECT:"):
+            result["links_subject"] = stripped.split(":", 1)[1].strip()
+        elif upper.startswith("OBJECT_SLUG:"):
+            result["links_object_slug"] = stripped.split(":", 1)[1].strip()
+        elif upper.startswith("OBJECT:"):
+            result["links_object"] = stripped.split(":", 1)[1].strip()
+        elif upper.startswith("MECHANISM:"):
+            result["links_mechanism"] = stripped.split(":", 1)[1].strip()
+    return result
+
+
+def run_relation_link_capture(
+    config: InfospaceConfig,
+    relation_entities: List[EntityMeta],
+    classifications: dict,  # slug → EntityClassification
+    adapter: LLMAdapter,
+    run_config: Optional[RunConfig] = None,
+    output_dir: Optional[Path] = None,
+    progress_callback: Optional[Callable] = None,
+) -> BatchSummary:
+    """Capture relation endpoint data for Relation-type entities.
+
+    Reads existing classification files for Relation-type entities, skips
+    those that already have ``links_mechanism`` set, calls the LLM for the
+    rest, and updates classification files in-place.
+
+    Args:
+        config: The infospace configuration.
+        relation_entities: EntityMeta objects for Relation-type entities only.
+        classifications: Slug → EntityClassification map (pre-loaded).
+        adapter: LLM adapter.
+        run_config: LLM execution configuration.
+        output_dir: Where classification files live (defaults to config.classifications_dir).
+        progress_callback: Called after each item with (done, total, result).
+
+    Returns:
+        A :class:`BatchSummary` with per-entity results.
+    """
+    topic = config.topic.name
+    cls_path = output_dir or Path(config.classifications_dir)
+
+    def _write_and_notify(done: int, total: int, result) -> None:
+        if result.status == "success" and result.response is not None:
+            parsed = parse_relation_link_response(result.response.content)
+            existing_cls = classifications.get(result.key)
+            if existing_cls is not None:
+                existing_cls.links_subject = parsed["links_subject"]
+                existing_cls.links_subject_slug = parsed["links_subject_slug"]
+                existing_cls.links_object = parsed["links_object"]
+                existing_cls.links_object_slug = parsed["links_object_slug"]
+                existing_cls.links_mechanism = parsed["links_mechanism"]
+                dest = cls_path / f"{result.key}.md"
+                write_entity_classification(existing_cls, dest)
+
+        if progress_callback is not None:
+            progress_callback(done, total, result)
+
+    items = [
+        BatchItem(
+            key=entity.slug,
+            prompt=build_relation_link_prompt(entity, topic),
+        )
+        for entity in relation_entities
+    ]
+
    evaluator = BatchEvaluator(
        adapter=adapter,
        config=run_config,