Files
markitect-main/markitect/infospace/classification.py
tegwick d1f57272a4 feat(example): add L2 classifications for 823/988 WoN entities (S3.4)
Batch classification via OpenRouter (claude-sonnet-4). 165 entities
remain unclassified due to credit exhaustion; incremental skip means
a follow-up run will complete them automatically.

Type × VSM matrix (823 entities):
                  S1   S2   S3  S3*   S4   S5
  Element         86   75   58   21   43   32  (315 total, 38%)
  Process         39   42   37   17   67   24  (226 total, 28%)
  Institution      4   12   30   24    .   52  (122 total, 15%)
  Principle        3    7   15    2   43   32  (102 total, 12%)
  Relation         2   14    5    5   22   10   (58 total,  7%)
  Matrix fill: 29/30 cells (Institution/S4 empty — expected)

Metrics updated: type_entropy=2.0936, vsm_type_matrix_cells=29

Also:
- BatchEvaluator gains delay_seconds param for rate-limited providers
- classify CLI gains --rpm option (--rpm 10 for Gemini free tier)
- history.write_metrics_file now handles non-float metric values
  (type_distribution is a dict, was crashing round())
- run_entity_classification forwards delay_seconds to BatchEvaluator
- classify-links and graph commands added by user (entities --by-type,
  graph --format mermaid/dot, classify-links for Relation enrichment)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-23 12:49:11 +01:00

87 lines
3.3 KiB
Python

"""
Data models for entity classification (L2 typed entities).
Each entity is assigned an Entity Type (what kind of thing it is) and a
VSM System (which control layer it inhabits). Both assignments come with
a one-sentence rationale from the LLM, stored alongside the classification.
"""
from __future__ import annotations
from dataclasses import dataclass
from datetime import datetime
from typing import Any, Dict, Optional
#: Controlled vocabulary for entity types.
ENTITY_TYPES = ["Element", "Process", "Relation", "Principle", "Institution"]
#: Controlled vocabulary for VSM system assignments.
VSM_SYSTEMS = ["S1", "S2", "S3", "S3*", "S4", "S5"]
@dataclass
class EntityClassification:
"""L2 classification for a single entity."""
entity_slug: str
entity_type: str # one of ENTITY_TYPES
vsm_system: str # one of VSM_SYSTEMS
type_rationale: str = "" # one sentence
vsm_rationale: str = "" # one sentence
classified_by: str = "" # model name
classified_at: Optional[datetime] = None
# Optional — only set when entity_type == "Relation"
links_subject: str = "" # human-readable title of entity A
links_subject_slug: str = "" # slug of entity A
links_object: str = "" # human-readable title of entity B
links_object_slug: str = "" # slug of entity B
links_mechanism: str = "" # one sentence: how A and B are connected
def to_dict(self) -> Dict[str, Any]:
d: Dict[str, Any] = {
"entity_slug": self.entity_slug,
"entity_type": self.entity_type,
"vsm_system": self.vsm_system,
}
if self.type_rationale:
d["type_rationale"] = self.type_rationale
if self.vsm_rationale:
d["vsm_rationale"] = self.vsm_rationale
if self.classified_by:
d["classified_by"] = self.classified_by
if self.classified_at is not None:
d["classified_at"] = self.classified_at.isoformat()
if self.links_subject:
d["links_subject"] = self.links_subject
if self.links_subject_slug:
d["links_subject_slug"] = self.links_subject_slug
if self.links_object:
d["links_object"] = self.links_object
if self.links_object_slug:
d["links_object_slug"] = self.links_object_slug
if self.links_mechanism:
d["links_mechanism"] = self.links_mechanism
return d
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "EntityClassification":
classified_at: Optional[datetime] = None
if "classified_at" in data:
classified_at = datetime.fromisoformat(data["classified_at"])
return cls(
entity_slug=data["entity_slug"],
entity_type=data["entity_type"],
vsm_system=data["vsm_system"],
type_rationale=data.get("type_rationale", ""),
vsm_rationale=data.get("vsm_rationale", ""),
classified_by=data.get("classified_by", ""),
classified_at=classified_at,
links_subject=data.get("links_subject", ""),
links_subject_slug=data.get("links_subject_slug", ""),
links_object=data.get("links_object", ""),
links_object_slug=data.get("links_object_slug", ""),
links_mechanism=data.get("links_mechanism", ""),
)