markitect-main/markitect/infospace/classification.py

"""
Data models for entity classification (L2 typed entities).

Each entity is assigned an Entity Type (what kind of thing it is) and a
VSM System (which control layer it inhabits).  Both assignments come with
a one-sentence rationale from the LLM, stored alongside the classification.
"""

from __future__ import annotations

from dataclasses import dataclass
from datetime import datetime
from typing import Any, Dict, Optional


#: Controlled vocabulary for entity types.
ENTITY_TYPES = ["Element", "Process", "Relation", "Principle", "Institution"]

#: Controlled vocabulary for VSM system assignments.
VSM_SYSTEMS = ["S1", "S2", "S3", "S3*", "S4", "S5"]


@dataclass
class EntityClassification:
    """L2 classification for a single entity."""

    entity_slug: str
    entity_type: str          # one of ENTITY_TYPES
    vsm_system: str           # one of VSM_SYSTEMS
    type_rationale: str = ""  # one sentence
    vsm_rationale: str = ""   # one sentence
    classified_by: str = ""   # model name
    classified_at: Optional[datetime] = None

    def to_dict(self) -> Dict[str, Any]:
        d: Dict[str, Any] = {
            "entity_slug": self.entity_slug,
            "entity_type": self.entity_type,
            "vsm_system": self.vsm_system,
        }
        if self.type_rationale:
            d["type_rationale"] = self.type_rationale
        if self.vsm_rationale:
            d["vsm_rationale"] = self.vsm_rationale
        if self.classified_by:
            d["classified_by"] = self.classified_by
        if self.classified_at is not None:
            d["classified_at"] = self.classified_at.isoformat()
        return d

    @classmethod
    def from_dict(cls, data: Dict[str, Any]) -> "EntityClassification":
        classified_at: Optional[datetime] = None
        if "classified_at" in data:
            classified_at = datetime.fromisoformat(data["classified_at"])
        return cls(
            entity_slug=data["entity_slug"],
            entity_type=data["entity_type"],
            vsm_system=data["vsm_system"],
            type_rationale=data.get("type_rationale", ""),
            vsm_rationale=data.get("vsm_rationale", ""),
            classified_by=data.get("classified_by", ""),
            classified_at=classified_at,
        )