feat(example): add L2 classifications for 823/988 WoN entities (S3.4)

Batch classification via OpenRouter (claude-sonnet-4). 165 entities
remain unclassified due to credit exhaustion; incremental skip means
a follow-up run will complete them automatically.

Type × VSM matrix (823 entities):
                  S1   S2   S3  S3*   S4   S5
  Element         86   75   58   21   43   32  (315 total, 38%)
  Process         39   42   37   17   67   24  (226 total, 28%)
  Institution      4   12   30   24    .   52  (122 total, 15%)
  Principle        3    7   15    2   43   32  (102 total, 12%)
  Relation         2   14    5    5   22   10   (58 total,  7%)
  Matrix fill: 29/30 cells (Institution/S4 empty — expected)

Metrics updated: type_entropy=2.0936, vsm_type_matrix_cells=29

Also:
- BatchEvaluator gains delay_seconds param for rate-limited providers
- classify CLI gains --rpm option (--rpm 10 for Gemini free tier)
- history.write_metrics_file now handles non-float metric values
  (type_distribution is a dict, was crashing round())
- run_entity_classification forwards delay_seconds to BatchEvaluator
- classify-links and graph commands added by user (entities --by-type,
  graph --format mermaid/dot, classify-links for Relation enrichment)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-23 12:49:11 +01:00
parent a9ca0adfcf
commit d1f57272a4
827 changed files with 25240 additions and 4 deletions

View File

@@ -200,6 +200,7 @@ def run_entity_classification(
run_config: Optional[RunConfig] = None,
output_dir: Optional[Path] = None,
progress_callback: Optional[Callable] = None,
delay_seconds: float = 0.0,
) -> BatchSummary:
"""Run per-entity classification using the batch evaluator.
@@ -214,6 +215,7 @@ def run_entity_classification(
output_dir: Where to write classification results. Defaults to
``config.classifications_dir`` relative to CWD.
progress_callback: Called after each item with (done, total, result).
delay_seconds: Seconds to sleep between requests (for rate limiting).
Returns:
A :class:`BatchSummary` with per-entity results.
@@ -250,6 +252,148 @@ def run_entity_classification(
for entity in entities
]
evaluator = BatchEvaluator(
adapter=adapter,
config=run_config,
progress_callback=_write_and_notify,
delay_seconds=delay_seconds,
)
return evaluator.evaluate(items)
# ── Relation-link prompt and runner ───────────────────────────────────────────
_RELATION_LINK_PROMPT_TEMPLATE = """\
You are enriching a Relation-type entity from an infospace about "{topic}".
This entity IS a structural connector — a dependency, mechanism, or causal link \
between two other entities. Your task: identify which two entities it connects \
and describe the linking mechanism in one sentence.
## Entity: {title}
**Domain:** {domain}
### Definition
{definition}
### Context
{context}
---
## Instructions
1. Read the definition and context carefully.
2. Identify **Entity A** (the subject/origin of the relation) and **Entity B** \
(the object/destination).
3. Write a single sentence explaining HOW this entity connects or mediates between A and B.
4. Use **exactly** the output format below — no preamble, no extra lines.
5. For slugs: use lowercase letters and underscores only (same as file names), \
e.g. "division_of_labour", "market_extent".
## Output format
SUBJECT: <human-readable title of Entity A>
SUBJECT_SLUG: <slug of Entity A>
OBJECT: <human-readable title of Entity B>
OBJECT_SLUG: <slug of Entity B>
MECHANISM: <one sentence describing how this entity links A to B>
"""
def build_relation_link_prompt(entity: EntityMeta, topic: str) -> str:
"""Build a relation-link enrichment prompt for a Relation-type entity."""
return _RELATION_LINK_PROMPT_TEMPLATE.format(
topic=topic,
title=entity.title,
domain=entity.domain or "(unspecified)",
definition=entity.definition or "(no definition provided)",
context=entity.context or "(no context provided)",
)
def parse_relation_link_response(text: str) -> dict:
"""Parse SUBJECT/SUBJECT_SLUG/OBJECT/OBJECT_SLUG/MECHANISM from an LLM response."""
result: dict = {
"links_subject": "",
"links_subject_slug": "",
"links_object": "",
"links_object_slug": "",
"links_mechanism": "",
}
for line in text.splitlines():
stripped = line.strip()
upper = stripped.upper()
if upper.startswith("SUBJECT_SLUG:"):
result["links_subject_slug"] = stripped.split(":", 1)[1].strip()
elif upper.startswith("SUBJECT:"):
result["links_subject"] = stripped.split(":", 1)[1].strip()
elif upper.startswith("OBJECT_SLUG:"):
result["links_object_slug"] = stripped.split(":", 1)[1].strip()
elif upper.startswith("OBJECT:"):
result["links_object"] = stripped.split(":", 1)[1].strip()
elif upper.startswith("MECHANISM:"):
result["links_mechanism"] = stripped.split(":", 1)[1].strip()
return result
def run_relation_link_capture(
config: InfospaceConfig,
relation_entities: List[EntityMeta],
classifications: dict, # slug → EntityClassification
adapter: LLMAdapter,
run_config: Optional[RunConfig] = None,
output_dir: Optional[Path] = None,
progress_callback: Optional[Callable] = None,
) -> BatchSummary:
"""Capture relation endpoint data for Relation-type entities.
Reads existing classification files for Relation-type entities, skips
those that already have ``links_mechanism`` set, calls the LLM for the
rest, and updates classification files in-place.
Args:
config: The infospace configuration.
relation_entities: EntityMeta objects for Relation-type entities only.
classifications: Slug → EntityClassification map (pre-loaded).
adapter: LLM adapter.
run_config: LLM execution configuration.
output_dir: Where classification files live (defaults to config.classifications_dir).
progress_callback: Called after each item with (done, total, result).
Returns:
A :class:`BatchSummary` with per-entity results.
"""
topic = config.topic.name
cls_path = output_dir or Path(config.classifications_dir)
def _write_and_notify(done: int, total: int, result) -> None:
if result.status == "success" and result.response is not None:
parsed = parse_relation_link_response(result.response.content)
existing_cls = classifications.get(result.key)
if existing_cls is not None:
existing_cls.links_subject = parsed["links_subject"]
existing_cls.links_subject_slug = parsed["links_subject_slug"]
existing_cls.links_object = parsed["links_object"]
existing_cls.links_object_slug = parsed["links_object_slug"]
existing_cls.links_mechanism = parsed["links_mechanism"]
dest = cls_path / f"{result.key}.md"
write_entity_classification(existing_cls, dest)
if progress_callback is not None:
progress_callback(done, total, result)
items = [
BatchItem(
key=entity.slug,
prompt=build_relation_link_prompt(entity, topic),
)
for entity in relation_entities
]
evaluator = BatchEvaluator(
adapter=adapter,
config=run_config,