Implements the L2 typed-entities layer — each entity is assigned an
Entity Type (Element, Process, Relation, Principle, Institution) and a
VSM System (S1–S5) by an LLM, with one-sentence rationales for each.
New modules:
- markitect/infospace/classification.py — EntityClassification dataclass
+ ENTITY_TYPES / VSM_SYSTEMS controlled vocabularies
- markitect/infospace/classification_io.py — write/read classification
files (YAML frontmatter + markdown body, mirrors evaluation_io)
- markitect/infospace/classifier.py — build_classification_prompt(),
parse_classification_response(), run_entity_classification(); batch
runner writes files incrementally (same resumable pattern as evaluate)
CLI: markitect infospace classify [--entity SLUG] [--provider P] [--model M]
- Incremental skip: checks output/classifications/ for existing files
- Defaults to openrouter provider; 2000 max_tokens (Gemini 2.5 Flash
uses ~787 thinking tokens, so 800 was too low)
CLI: markitect infospace classify-summary [--update-metrics]
- Entity type counts + VSM system counts with percentages
- 5 × 6 type × VSM matrix (spots structural blind spots at a glance)
- --update-metrics writes type_distribution, type_entropy,
vsm_type_matrix_cells to metrics.yaml
Config: InfospaceConfig gains classifications_dir (default output/classifications)
Schema: schemas/typed-entity-schema-v1.0.md — type/VSM vocabulary tables,
rationale format rules, validation rules, metrics enabled at L2
infospace.yaml: schemas.typed_entity references typed-entity-schema-v1.0.md
Seed classifications (3): division_of_labour (Process/S1),
natural_price_as_central_price (Principle/S2),
invisible_hand_mechanism (Principle/S4)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
65 lines
2.2 KiB
Python
65 lines
2.2 KiB
Python
"""
|
|
Data models for entity classification (L2 typed entities).
|
|
|
|
Each entity is assigned an Entity Type (what kind of thing it is) and a
|
|
VSM System (which control layer it inhabits). Both assignments come with
|
|
a one-sentence rationale from the LLM, stored alongside the classification.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
from datetime import datetime
|
|
from typing import Any, Dict, Optional
|
|
|
|
|
|
#: Controlled vocabulary for entity types.
|
|
ENTITY_TYPES = ["Element", "Process", "Relation", "Principle", "Institution"]
|
|
|
|
#: Controlled vocabulary for VSM system assignments.
|
|
VSM_SYSTEMS = ["S1", "S2", "S3", "S3*", "S4", "S5"]
|
|
|
|
|
|
@dataclass
|
|
class EntityClassification:
|
|
"""L2 classification for a single entity."""
|
|
|
|
entity_slug: str
|
|
entity_type: str # one of ENTITY_TYPES
|
|
vsm_system: str # one of VSM_SYSTEMS
|
|
type_rationale: str = "" # one sentence
|
|
vsm_rationale: str = "" # one sentence
|
|
classified_by: str = "" # model name
|
|
classified_at: Optional[datetime] = None
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
d: Dict[str, Any] = {
|
|
"entity_slug": self.entity_slug,
|
|
"entity_type": self.entity_type,
|
|
"vsm_system": self.vsm_system,
|
|
}
|
|
if self.type_rationale:
|
|
d["type_rationale"] = self.type_rationale
|
|
if self.vsm_rationale:
|
|
d["vsm_rationale"] = self.vsm_rationale
|
|
if self.classified_by:
|
|
d["classified_by"] = self.classified_by
|
|
if self.classified_at is not None:
|
|
d["classified_at"] = self.classified_at.isoformat()
|
|
return d
|
|
|
|
@classmethod
|
|
def from_dict(cls, data: Dict[str, Any]) -> "EntityClassification":
|
|
classified_at: Optional[datetime] = None
|
|
if "classified_at" in data:
|
|
classified_at = datetime.fromisoformat(data["classified_at"])
|
|
return cls(
|
|
entity_slug=data["entity_slug"],
|
|
entity_type=data["entity_type"],
|
|
vsm_system=data["vsm_system"],
|
|
type_rationale=data.get("type_rationale", ""),
|
|
vsm_rationale=data.get("vsm_rationale", ""),
|
|
classified_by=data.get("classified_by", ""),
|
|
classified_at=classified_at,
|
|
)
|