feat(infospace): add L2 entity classification with type × VSM matrix (S2.9)
Implements the L2 typed-entities layer — each entity is assigned an
Entity Type (Element, Process, Relation, Principle, Institution) and a
VSM System (S1–S5) by an LLM, with one-sentence rationales for each.
New modules:
- markitect/infospace/classification.py — EntityClassification dataclass
+ ENTITY_TYPES / VSM_SYSTEMS controlled vocabularies
- markitect/infospace/classification_io.py — write/read classification
files (YAML frontmatter + markdown body, mirrors evaluation_io)
- markitect/infospace/classifier.py — build_classification_prompt(),
parse_classification_response(), run_entity_classification(); batch
runner writes files incrementally (same resumable pattern as evaluate)
CLI: markitect infospace classify [--entity SLUG] [--provider P] [--model M]
- Incremental skip: checks output/classifications/ for existing files
- Defaults to openrouter provider; 2000 max_tokens (Gemini 2.5 Flash
uses ~787 thinking tokens, so 800 was too low)
CLI: markitect infospace classify-summary [--update-metrics]
- Entity type counts + VSM system counts with percentages
- 5 × 6 type × VSM matrix (spots structural blind spots at a glance)
- --update-metrics writes type_distribution, type_entropy,
vsm_type_matrix_cells to metrics.yaml
Config: InfospaceConfig gains classifications_dir (default output/classifications)
Schema: schemas/typed-entity-schema-v1.0.md — type/VSM vocabulary tables,
rationale format rules, validation rules, metrics enabled at L2
infospace.yaml: schemas.typed_entity references typed-entity-schema-v1.0.md
Seed classifications (3): division_of_labour (Process/S1),
natural_price_as_central_price (Principle/S2),
invisible_hand_mechanism (Principle/S4)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
80
markitect/infospace/classification_io.py
Normal file
80
markitect/infospace/classification_io.py
Normal file
@@ -0,0 +1,80 @@
|
||||
"""
|
||||
Read/write utilities for entity classification files (L2).
|
||||
|
||||
Classification files use YAML frontmatter (machine-readable) plus a
|
||||
markdown body (human-readable), matching the convention used by evaluation
|
||||
files.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
import yaml
|
||||
|
||||
from .classification import EntityClassification
|
||||
|
||||
|
||||
_FRONTMATTER_SEP = "---"
|
||||
|
||||
|
||||
def write_entity_classification(c: EntityClassification, path: Path) -> None:
|
||||
"""Write a per-entity classification as YAML frontmatter + markdown body."""
|
||||
fm = c.to_dict()
|
||||
|
||||
lines: List[str] = []
|
||||
lines.append(_FRONTMATTER_SEP)
|
||||
lines.append(yaml.safe_dump(fm, default_flow_style=False, sort_keys=False).rstrip())
|
||||
lines.append(_FRONTMATTER_SEP)
|
||||
lines.append("")
|
||||
|
||||
title = c.entity_slug.replace("_", " ").replace("-", " ").title()
|
||||
lines.append(f"# Classification: {title}")
|
||||
lines.append("")
|
||||
|
||||
lines.append("## Entity Type")
|
||||
lines.append("")
|
||||
lines.append(c.entity_type)
|
||||
lines.append("")
|
||||
|
||||
lines.append("## VSM System")
|
||||
lines.append("")
|
||||
lines.append(c.vsm_system)
|
||||
lines.append("")
|
||||
|
||||
if c.type_rationale:
|
||||
lines.append("## Type Rationale")
|
||||
lines.append("")
|
||||
lines.append(c.type_rationale)
|
||||
lines.append("")
|
||||
|
||||
if c.vsm_rationale:
|
||||
lines.append("## VSM Rationale")
|
||||
lines.append("")
|
||||
lines.append(c.vsm_rationale)
|
||||
lines.append("")
|
||||
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text("\n".join(lines), encoding="utf-8")
|
||||
|
||||
|
||||
def read_entity_classification(path: Path) -> EntityClassification:
|
||||
"""Read a classification file (YAML frontmatter + markdown body)."""
|
||||
text = path.read_text(encoding="utf-8")
|
||||
parts = text.split(f"{_FRONTMATTER_SEP}\n", maxsplit=2)
|
||||
if len(parts) < 3:
|
||||
raise ValueError(f"No YAML frontmatter found in {path}")
|
||||
fm = yaml.safe_load(parts[1])
|
||||
return EntityClassification.from_dict(fm)
|
||||
|
||||
|
||||
def read_classifications_directory(directory: Path) -> List[EntityClassification]:
|
||||
"""Read all classification files from a directory."""
|
||||
results: List[EntityClassification] = []
|
||||
for p in sorted(directory.glob("*.md")):
|
||||
try:
|
||||
results.append(read_entity_classification(p))
|
||||
except Exception:
|
||||
pass
|
||||
return results
|
||||
Reference in New Issue
Block a user