Batch classification via OpenRouter (claude-sonnet-4). 165 entities
remain unclassified due to credit exhaustion; incremental skip means
a follow-up run will complete them automatically.
Type × VSM matrix (823 entities):
S1 S2 S3 S3* S4 S5
Element 86 75 58 21 43 32 (315 total, 38%)
Process 39 42 37 17 67 24 (226 total, 28%)
Institution 4 12 30 24 . 52 (122 total, 15%)
Principle 3 7 15 2 43 32 (102 total, 12%)
Relation 2 14 5 5 22 10 (58 total, 7%)
Matrix fill: 29/30 cells (Institution/S4 empty — expected)
Metrics updated: type_entropy=2.0936, vsm_type_matrix_cells=29
Also:
- BatchEvaluator gains delay_seconds param for rate-limited providers
- classify CLI gains --rpm option (--rpm 10 for Gemini free tier)
- history.write_metrics_file now handles non-float metric values
(type_distribution is a dict, was crashing round())
- run_entity_classification forwards delay_seconds to BatchEvaluator
- classify-links and graph commands added by user (entities --by-type,
graph --format mermaid/dot, classify-links for Relation enrichment)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
87 lines
3.3 KiB
Python
87 lines
3.3 KiB
Python
"""
|
|
Data models for entity classification (L2 typed entities).
|
|
|
|
Each entity is assigned an Entity Type (what kind of thing it is) and a
|
|
VSM System (which control layer it inhabits). Both assignments come with
|
|
a one-sentence rationale from the LLM, stored alongside the classification.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
from datetime import datetime
|
|
from typing import Any, Dict, Optional
|
|
|
|
|
|
#: Controlled vocabulary for entity types.
|
|
ENTITY_TYPES = ["Element", "Process", "Relation", "Principle", "Institution"]
|
|
|
|
#: Controlled vocabulary for VSM system assignments.
|
|
VSM_SYSTEMS = ["S1", "S2", "S3", "S3*", "S4", "S5"]
|
|
|
|
|
|
@dataclass
|
|
class EntityClassification:
|
|
"""L2 classification for a single entity."""
|
|
|
|
entity_slug: str
|
|
entity_type: str # one of ENTITY_TYPES
|
|
vsm_system: str # one of VSM_SYSTEMS
|
|
type_rationale: str = "" # one sentence
|
|
vsm_rationale: str = "" # one sentence
|
|
classified_by: str = "" # model name
|
|
classified_at: Optional[datetime] = None
|
|
|
|
# Optional — only set when entity_type == "Relation"
|
|
links_subject: str = "" # human-readable title of entity A
|
|
links_subject_slug: str = "" # slug of entity A
|
|
links_object: str = "" # human-readable title of entity B
|
|
links_object_slug: str = "" # slug of entity B
|
|
links_mechanism: str = "" # one sentence: how A and B are connected
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
d: Dict[str, Any] = {
|
|
"entity_slug": self.entity_slug,
|
|
"entity_type": self.entity_type,
|
|
"vsm_system": self.vsm_system,
|
|
}
|
|
if self.type_rationale:
|
|
d["type_rationale"] = self.type_rationale
|
|
if self.vsm_rationale:
|
|
d["vsm_rationale"] = self.vsm_rationale
|
|
if self.classified_by:
|
|
d["classified_by"] = self.classified_by
|
|
if self.classified_at is not None:
|
|
d["classified_at"] = self.classified_at.isoformat()
|
|
if self.links_subject:
|
|
d["links_subject"] = self.links_subject
|
|
if self.links_subject_slug:
|
|
d["links_subject_slug"] = self.links_subject_slug
|
|
if self.links_object:
|
|
d["links_object"] = self.links_object
|
|
if self.links_object_slug:
|
|
d["links_object_slug"] = self.links_object_slug
|
|
if self.links_mechanism:
|
|
d["links_mechanism"] = self.links_mechanism
|
|
return d
|
|
|
|
@classmethod
|
|
def from_dict(cls, data: Dict[str, Any]) -> "EntityClassification":
|
|
classified_at: Optional[datetime] = None
|
|
if "classified_at" in data:
|
|
classified_at = datetime.fromisoformat(data["classified_at"])
|
|
return cls(
|
|
entity_slug=data["entity_slug"],
|
|
entity_type=data["entity_type"],
|
|
vsm_system=data["vsm_system"],
|
|
type_rationale=data.get("type_rationale", ""),
|
|
vsm_rationale=data.get("vsm_rationale", ""),
|
|
classified_by=data.get("classified_by", ""),
|
|
classified_at=classified_at,
|
|
links_subject=data.get("links_subject", ""),
|
|
links_subject_slug=data.get("links_subject_slug", ""),
|
|
links_object=data.get("links_object", ""),
|
|
links_object_slug=data.get("links_object_slug", ""),
|
|
links_mechanism=data.get("links_mechanism", ""),
|
|
)
|