Batch classification via OpenRouter (claude-sonnet-4). 165 entities
remain unclassified due to credit exhaustion; incremental skip means
a follow-up run will complete them automatically.
Type × VSM matrix (823 entities):
S1 S2 S3 S3* S4 S5
Element 86 75 58 21 43 32 (315 total, 38%)
Process 39 42 37 17 67 24 (226 total, 28%)
Institution 4 12 30 24 . 52 (122 total, 15%)
Principle 3 7 15 2 43 32 (102 total, 12%)
Relation 2 14 5 5 22 10 (58 total, 7%)
Matrix fill: 29/30 cells (Institution/S4 empty — expected)
Metrics updated: type_entropy=2.0936, vsm_type_matrix_cells=29
Also:
- BatchEvaluator gains delay_seconds param for rate-limited providers
- classify CLI gains --rpm option (--rpm 10 for Gemini free tier)
- history.write_metrics_file now handles non-float metric values
(type_distribution is a dict, was crashing round())
- run_entity_classification forwards delay_seconds to BatchEvaluator
- classify-links and graph commands added by user (entities --by-type,
graph --format mermaid/dot, classify-links for Relation enrichment)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
92 lines
2.6 KiB
Python
92 lines
2.6 KiB
Python
"""
|
|
Read/write utilities for entity classification files (L2).
|
|
|
|
Classification files use YAML frontmatter (machine-readable) plus a
|
|
markdown body (human-readable), matching the convention used by evaluation
|
|
files.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
from typing import List
|
|
|
|
import yaml
|
|
|
|
from .classification import EntityClassification
|
|
|
|
|
|
_FRONTMATTER_SEP = "---"
|
|
|
|
|
|
def write_entity_classification(c: EntityClassification, path: Path) -> None:
|
|
"""Write a per-entity classification as YAML frontmatter + markdown body."""
|
|
fm = c.to_dict()
|
|
|
|
lines: List[str] = []
|
|
lines.append(_FRONTMATTER_SEP)
|
|
lines.append(yaml.safe_dump(fm, default_flow_style=False, sort_keys=False).rstrip())
|
|
lines.append(_FRONTMATTER_SEP)
|
|
lines.append("")
|
|
|
|
title = c.entity_slug.replace("_", " ").replace("-", " ").title()
|
|
lines.append(f"# Classification: {title}")
|
|
lines.append("")
|
|
|
|
lines.append("## Entity Type")
|
|
lines.append("")
|
|
lines.append(c.entity_type)
|
|
lines.append("")
|
|
|
|
lines.append("## VSM System")
|
|
lines.append("")
|
|
lines.append(c.vsm_system)
|
|
lines.append("")
|
|
|
|
if c.type_rationale:
|
|
lines.append("## Type Rationale")
|
|
lines.append("")
|
|
lines.append(c.type_rationale)
|
|
lines.append("")
|
|
|
|
if c.vsm_rationale:
|
|
lines.append("## VSM Rationale")
|
|
lines.append("")
|
|
lines.append(c.vsm_rationale)
|
|
lines.append("")
|
|
|
|
if c.links_mechanism:
|
|
lines.append("## Links")
|
|
lines.append("")
|
|
if c.links_subject:
|
|
lines.append(f"**Subject:** {c.links_subject}")
|
|
if c.links_object:
|
|
lines.append(f"**Object:** {c.links_object}")
|
|
lines.append("")
|
|
lines.append(c.links_mechanism)
|
|
lines.append("")
|
|
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
path.write_text("\n".join(lines), encoding="utf-8")
|
|
|
|
|
|
def read_entity_classification(path: Path) -> EntityClassification:
|
|
"""Read a classification file (YAML frontmatter + markdown body)."""
|
|
text = path.read_text(encoding="utf-8")
|
|
parts = text.split(f"{_FRONTMATTER_SEP}\n", maxsplit=2)
|
|
if len(parts) < 3:
|
|
raise ValueError(f"No YAML frontmatter found in {path}")
|
|
fm = yaml.safe_load(parts[1])
|
|
return EntityClassification.from_dict(fm)
|
|
|
|
|
|
def read_classifications_directory(directory: Path) -> List[EntityClassification]:
|
|
"""Read all classification files from a directory."""
|
|
results: List[EntityClassification] = []
|
|
for p in sorted(directory.glob("*.md")):
|
|
try:
|
|
results.append(read_entity_classification(p))
|
|
except Exception:
|
|
pass
|
|
return results
|