Implements the L2 typed-entities layer — each entity is assigned an
Entity Type (Element, Process, Relation, Principle, Institution) and a
VSM System (S1–S5) by an LLM, with one-sentence rationales for each.
New modules:
- markitect/infospace/classification.py — EntityClassification dataclass
+ ENTITY_TYPES / VSM_SYSTEMS controlled vocabularies
- markitect/infospace/classification_io.py — write/read classification
files (YAML frontmatter + markdown body, mirrors evaluation_io)
- markitect/infospace/classifier.py — build_classification_prompt(),
parse_classification_response(), run_entity_classification(); batch
runner writes files incrementally (same resumable pattern as evaluate)
CLI: markitect infospace classify [--entity SLUG] [--provider P] [--model M]
- Incremental skip: checks output/classifications/ for existing files
- Defaults to openrouter provider; 2000 max_tokens (Gemini 2.5 Flash
uses ~787 thinking tokens, so 800 was too low)
CLI: markitect infospace classify-summary [--update-metrics]
- Entity type counts + VSM system counts with percentages
- 5 × 6 type × VSM matrix (spots structural blind spots at a glance)
- --update-metrics writes type_distribution, type_entropy,
vsm_type_matrix_cells to metrics.yaml
Config: InfospaceConfig gains classifications_dir (default output/classifications)
Schema: schemas/typed-entity-schema-v1.0.md — type/VSM vocabulary tables,
rationale format rules, validation rules, metrics enabled at L2
infospace.yaml: schemas.typed_entity references typed-entity-schema-v1.0.md
Seed classifications (3): division_of_labour (Process/S1),
natural_price_as_central_price (Principle/S2),
invisible_hand_mechanism (Principle/S4)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
81 lines
2.3 KiB
Python
81 lines
2.3 KiB
Python
"""
|
|
Read/write utilities for entity classification files (L2).
|
|
|
|
Classification files use YAML frontmatter (machine-readable) plus a
|
|
markdown body (human-readable), matching the convention used by evaluation
|
|
files.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
from typing import List
|
|
|
|
import yaml
|
|
|
|
from .classification import EntityClassification
|
|
|
|
|
|
_FRONTMATTER_SEP = "---"
|
|
|
|
|
|
def write_entity_classification(c: EntityClassification, path: Path) -> None:
|
|
"""Write a per-entity classification as YAML frontmatter + markdown body."""
|
|
fm = c.to_dict()
|
|
|
|
lines: List[str] = []
|
|
lines.append(_FRONTMATTER_SEP)
|
|
lines.append(yaml.safe_dump(fm, default_flow_style=False, sort_keys=False).rstrip())
|
|
lines.append(_FRONTMATTER_SEP)
|
|
lines.append("")
|
|
|
|
title = c.entity_slug.replace("_", " ").replace("-", " ").title()
|
|
lines.append(f"# Classification: {title}")
|
|
lines.append("")
|
|
|
|
lines.append("## Entity Type")
|
|
lines.append("")
|
|
lines.append(c.entity_type)
|
|
lines.append("")
|
|
|
|
lines.append("## VSM System")
|
|
lines.append("")
|
|
lines.append(c.vsm_system)
|
|
lines.append("")
|
|
|
|
if c.type_rationale:
|
|
lines.append("## Type Rationale")
|
|
lines.append("")
|
|
lines.append(c.type_rationale)
|
|
lines.append("")
|
|
|
|
if c.vsm_rationale:
|
|
lines.append("## VSM Rationale")
|
|
lines.append("")
|
|
lines.append(c.vsm_rationale)
|
|
lines.append("")
|
|
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
path.write_text("\n".join(lines), encoding="utf-8")
|
|
|
|
|
|
def read_entity_classification(path: Path) -> EntityClassification:
|
|
"""Read a classification file (YAML frontmatter + markdown body)."""
|
|
text = path.read_text(encoding="utf-8")
|
|
parts = text.split(f"{_FRONTMATTER_SEP}\n", maxsplit=2)
|
|
if len(parts) < 3:
|
|
raise ValueError(f"No YAML frontmatter found in {path}")
|
|
fm = yaml.safe_load(parts[1])
|
|
return EntityClassification.from_dict(fm)
|
|
|
|
|
|
def read_classifications_directory(directory: Path) -> List[EntityClassification]:
|
|
"""Read all classification files from a directory."""
|
|
results: List[EntityClassification] = []
|
|
for p in sorted(directory.glob("*.md")):
|
|
try:
|
|
results.append(read_entity_classification(p))
|
|
except Exception:
|
|
pass
|
|
return results
|