feat(example): add L2 classifications for 823/988 WoN entities (S3.4)
Batch classification via OpenRouter (claude-sonnet-4). 165 entities
remain unclassified due to credit exhaustion; incremental skip means
a follow-up run will complete them automatically.
Type × VSM matrix (823 entities):
S1 S2 S3 S3* S4 S5
Element 86 75 58 21 43 32 (315 total, 38%)
Process 39 42 37 17 67 24 (226 total, 28%)
Institution 4 12 30 24 . 52 (122 total, 15%)
Principle 3 7 15 2 43 32 (102 total, 12%)
Relation 2 14 5 5 22 10 (58 total, 7%)
Matrix fill: 29/30 cells (Institution/S4 empty — expected)
Metrics updated: type_entropy=2.0936, vsm_type_matrix_cells=29
Also:
- BatchEvaluator gains delay_seconds param for rate-limited providers
- classify CLI gains --rpm option (--rpm 10 for Gemini free tier)
- history.write_metrics_file now handles non-float metric values
(type_distribution is a dict, was crashing round())
- run_entity_classification forwards delay_seconds to BatchEvaluator
- classify-links and graph commands added by user (entities --by-type,
graph --format mermaid/dot, classify-links for Relation enrichment)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -200,6 +200,7 @@ def run_entity_classification(
|
||||
run_config: Optional[RunConfig] = None,
|
||||
output_dir: Optional[Path] = None,
|
||||
progress_callback: Optional[Callable] = None,
|
||||
delay_seconds: float = 0.0,
|
||||
) -> BatchSummary:
|
||||
"""Run per-entity classification using the batch evaluator.
|
||||
|
||||
@@ -214,6 +215,7 @@ def run_entity_classification(
|
||||
output_dir: Where to write classification results. Defaults to
|
||||
``config.classifications_dir`` relative to CWD.
|
||||
progress_callback: Called after each item with (done, total, result).
|
||||
delay_seconds: Seconds to sleep between requests (for rate limiting).
|
||||
|
||||
Returns:
|
||||
A :class:`BatchSummary` with per-entity results.
|
||||
@@ -250,6 +252,148 @@ def run_entity_classification(
|
||||
for entity in entities
|
||||
]
|
||||
|
||||
evaluator = BatchEvaluator(
|
||||
adapter=adapter,
|
||||
config=run_config,
|
||||
progress_callback=_write_and_notify,
|
||||
delay_seconds=delay_seconds,
|
||||
)
|
||||
return evaluator.evaluate(items)
|
||||
|
||||
|
||||
# ── Relation-link prompt and runner ───────────────────────────────────────────
|
||||
|
||||
_RELATION_LINK_PROMPT_TEMPLATE = """\
|
||||
You are enriching a Relation-type entity from an infospace about "{topic}".
|
||||
|
||||
This entity IS a structural connector — a dependency, mechanism, or causal link \
|
||||
between two other entities. Your task: identify which two entities it connects \
|
||||
and describe the linking mechanism in one sentence.
|
||||
|
||||
## Entity: {title}
|
||||
|
||||
**Domain:** {domain}
|
||||
|
||||
### Definition
|
||||
|
||||
{definition}
|
||||
|
||||
### Context
|
||||
|
||||
{context}
|
||||
|
||||
---
|
||||
|
||||
## Instructions
|
||||
|
||||
1. Read the definition and context carefully.
|
||||
2. Identify **Entity A** (the subject/origin of the relation) and **Entity B** \
|
||||
(the object/destination).
|
||||
3. Write a single sentence explaining HOW this entity connects or mediates between A and B.
|
||||
4. Use **exactly** the output format below — no preamble, no extra lines.
|
||||
5. For slugs: use lowercase letters and underscores only (same as file names), \
|
||||
e.g. "division_of_labour", "market_extent".
|
||||
|
||||
## Output format
|
||||
|
||||
SUBJECT: <human-readable title of Entity A>
|
||||
SUBJECT_SLUG: <slug of Entity A>
|
||||
OBJECT: <human-readable title of Entity B>
|
||||
OBJECT_SLUG: <slug of Entity B>
|
||||
MECHANISM: <one sentence describing how this entity links A to B>
|
||||
"""
|
||||
|
||||
|
||||
def build_relation_link_prompt(entity: EntityMeta, topic: str) -> str:
|
||||
"""Build a relation-link enrichment prompt for a Relation-type entity."""
|
||||
return _RELATION_LINK_PROMPT_TEMPLATE.format(
|
||||
topic=topic,
|
||||
title=entity.title,
|
||||
domain=entity.domain or "(unspecified)",
|
||||
definition=entity.definition or "(no definition provided)",
|
||||
context=entity.context or "(no context provided)",
|
||||
)
|
||||
|
||||
|
||||
def parse_relation_link_response(text: str) -> dict:
|
||||
"""Parse SUBJECT/SUBJECT_SLUG/OBJECT/OBJECT_SLUG/MECHANISM from an LLM response."""
|
||||
result: dict = {
|
||||
"links_subject": "",
|
||||
"links_subject_slug": "",
|
||||
"links_object": "",
|
||||
"links_object_slug": "",
|
||||
"links_mechanism": "",
|
||||
}
|
||||
for line in text.splitlines():
|
||||
stripped = line.strip()
|
||||
upper = stripped.upper()
|
||||
if upper.startswith("SUBJECT_SLUG:"):
|
||||
result["links_subject_slug"] = stripped.split(":", 1)[1].strip()
|
||||
elif upper.startswith("SUBJECT:"):
|
||||
result["links_subject"] = stripped.split(":", 1)[1].strip()
|
||||
elif upper.startswith("OBJECT_SLUG:"):
|
||||
result["links_object_slug"] = stripped.split(":", 1)[1].strip()
|
||||
elif upper.startswith("OBJECT:"):
|
||||
result["links_object"] = stripped.split(":", 1)[1].strip()
|
||||
elif upper.startswith("MECHANISM:"):
|
||||
result["links_mechanism"] = stripped.split(":", 1)[1].strip()
|
||||
return result
|
||||
|
||||
|
||||
def run_relation_link_capture(
|
||||
config: InfospaceConfig,
|
||||
relation_entities: List[EntityMeta],
|
||||
classifications: dict, # slug → EntityClassification
|
||||
adapter: LLMAdapter,
|
||||
run_config: Optional[RunConfig] = None,
|
||||
output_dir: Optional[Path] = None,
|
||||
progress_callback: Optional[Callable] = None,
|
||||
) -> BatchSummary:
|
||||
"""Capture relation endpoint data for Relation-type entities.
|
||||
|
||||
Reads existing classification files for Relation-type entities, skips
|
||||
those that already have ``links_mechanism`` set, calls the LLM for the
|
||||
rest, and updates classification files in-place.
|
||||
|
||||
Args:
|
||||
config: The infospace configuration.
|
||||
relation_entities: EntityMeta objects for Relation-type entities only.
|
||||
classifications: Slug → EntityClassification map (pre-loaded).
|
||||
adapter: LLM adapter.
|
||||
run_config: LLM execution configuration.
|
||||
output_dir: Where classification files live (defaults to config.classifications_dir).
|
||||
progress_callback: Called after each item with (done, total, result).
|
||||
|
||||
Returns:
|
||||
A :class:`BatchSummary` with per-entity results.
|
||||
"""
|
||||
topic = config.topic.name
|
||||
cls_path = output_dir or Path(config.classifications_dir)
|
||||
|
||||
def _write_and_notify(done: int, total: int, result) -> None:
|
||||
if result.status == "success" and result.response is not None:
|
||||
parsed = parse_relation_link_response(result.response.content)
|
||||
existing_cls = classifications.get(result.key)
|
||||
if existing_cls is not None:
|
||||
existing_cls.links_subject = parsed["links_subject"]
|
||||
existing_cls.links_subject_slug = parsed["links_subject_slug"]
|
||||
existing_cls.links_object = parsed["links_object"]
|
||||
existing_cls.links_object_slug = parsed["links_object_slug"]
|
||||
existing_cls.links_mechanism = parsed["links_mechanism"]
|
||||
dest = cls_path / f"{result.key}.md"
|
||||
write_entity_classification(existing_cls, dest)
|
||||
|
||||
if progress_callback is not None:
|
||||
progress_callback(done, total, result)
|
||||
|
||||
items = [
|
||||
BatchItem(
|
||||
key=entity.slug,
|
||||
prompt=build_relation_link_prompt(entity, topic),
|
||||
)
|
||||
for entity in relation_entities
|
||||
]
|
||||
|
||||
evaluator = BatchEvaluator(
|
||||
adapter=adapter,
|
||||
config=run_config,
|
||||
|
||||
Reference in New Issue
Block a user