feat(infospace): add L2 entity classification with type × VSM matrix (S2.9)
Implements the L2 typed-entities layer — each entity is assigned an
Entity Type (Element, Process, Relation, Principle, Institution) and a
VSM System (S1–S5) by an LLM, with one-sentence rationales for each.
New modules:
- markitect/infospace/classification.py — EntityClassification dataclass
+ ENTITY_TYPES / VSM_SYSTEMS controlled vocabularies
- markitect/infospace/classification_io.py — write/read classification
files (YAML frontmatter + markdown body, mirrors evaluation_io)
- markitect/infospace/classifier.py — build_classification_prompt(),
parse_classification_response(), run_entity_classification(); batch
runner writes files incrementally (same resumable pattern as evaluate)
CLI: markitect infospace classify [--entity SLUG] [--provider P] [--model M]
- Incremental skip: checks output/classifications/ for existing files
- Defaults to openrouter provider; 2000 max_tokens (Gemini 2.5 Flash
uses ~787 thinking tokens, so 800 was too low)
CLI: markitect infospace classify-summary [--update-metrics]
- Entity type counts + VSM system counts with percentages
- 5 × 6 type × VSM matrix (spots structural blind spots at a glance)
- --update-metrics writes type_distribution, type_entropy,
vsm_type_matrix_cells to metrics.yaml
Config: InfospaceConfig gains classifications_dir (default output/classifications)
Schema: schemas/typed-entity-schema-v1.0.md — type/VSM vocabulary tables,
rationale format rules, validation rules, metrics enabled at L2
infospace.yaml: schemas.typed_entity references typed-entity-schema-v1.0.md
Seed classifications (3): division_of_labour (Process/S1),
natural_price_as_central_price (Principle/S2),
invisible_hand_mechanism (Principle/S4)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -419,6 +419,172 @@ def relations(config_path: Optional[str], entity_slug: Optional[str],
|
||||
click.echo(f"{subj:<35} {pred:<30} {obj:<35} {r.vsm_channel}")
|
||||
|
||||
|
||||
# ── classify ─────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@infospace_commands.command()
|
||||
@click.option("--config", "config_path", default=None, help="Path to infospace.yaml.")
|
||||
@click.option("--entity", "entity_slug", default=None,
|
||||
help="Classify a single entity by slug.")
|
||||
@click.option("--provider", default="openrouter",
|
||||
help="LLM provider (openrouter, gemini, openai, …).")
|
||||
@click.option("--model", default=None, help="Model name override.")
|
||||
def classify(config_path: Optional[str], entity_slug: Optional[str],
|
||||
provider: str, model: Optional[str]):
|
||||
"""Classify entities with Entity Type and VSM System (L2)."""
|
||||
cfg, cfg_path = _load_config_or_exit(config_path)
|
||||
root = cfg_path.parent
|
||||
|
||||
from markitect.infospace.classifier import run_entity_classification
|
||||
from markitect.llm import create_adapter
|
||||
from markitect.prompts.execution.models import RunConfig
|
||||
|
||||
entity_list = parse_entity_directory(root / cfg.entities_dir)
|
||||
if not entity_list:
|
||||
click.echo("No entities found in " + str(root / cfg.entities_dir), err=True)
|
||||
return
|
||||
|
||||
output_dir = root / cfg.classifications_dir
|
||||
|
||||
if entity_slug:
|
||||
entity_list = [e for e in entity_list if e.slug == entity_slug]
|
||||
if not entity_list:
|
||||
click.echo(f"Entity '{entity_slug}' not found.", err=True)
|
||||
return
|
||||
else:
|
||||
# Incremental skip — entities already classified are omitted
|
||||
if output_dir.is_dir():
|
||||
done_slugs = {p.stem for p in output_dir.glob("*.md")}
|
||||
before = len(entity_list)
|
||||
entity_list = [e for e in entity_list if e.slug not in done_slugs]
|
||||
skipped = before - len(entity_list)
|
||||
if skipped:
|
||||
click.echo(f"Skipping {skipped} already-classified entities.")
|
||||
if not entity_list:
|
||||
click.echo("All entities already classified. Nothing to do.")
|
||||
return
|
||||
|
||||
click.echo(f"Classifying {len(entity_list)} entities …")
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
adapter = create_adapter(provider, model=model)
|
||||
run_config = RunConfig(model_name=model, temperature=0.1, max_tokens=2000)
|
||||
|
||||
def _progress(done: int, total: int, result) -> None:
|
||||
if result.status == "success":
|
||||
click.echo(f" [{done}/{total}] {result.key}")
|
||||
else:
|
||||
click.echo(f" [{done}/{total}] {result.key} — FAILED: {result.error}")
|
||||
|
||||
summary = run_entity_classification(
|
||||
config=cfg,
|
||||
entities=entity_list,
|
||||
adapter=adapter,
|
||||
run_config=run_config,
|
||||
output_dir=output_dir,
|
||||
progress_callback=_progress,
|
||||
)
|
||||
click.echo(f"\nDone: {summary.succeeded} classified, {summary.failed} failed.")
|
||||
|
||||
|
||||
# ── classify-summary ──────────────────────────────────────────────────
|
||||
|
||||
|
||||
@infospace_commands.command(name="classify-summary")
|
||||
@click.option("--config", "config_path", default=None, help="Path to infospace.yaml.")
|
||||
@click.option("--update-metrics", "update_metrics", is_flag=True, default=False,
|
||||
help="Write type_distribution metrics to metrics.yaml.")
|
||||
def classify_summary(config_path: Optional[str], update_metrics: bool):
|
||||
"""Show type × VSM distribution across all classified entities (L2)."""
|
||||
cfg, cfg_path = _load_config_or_exit(config_path)
|
||||
root = cfg_path.parent
|
||||
|
||||
from markitect.infospace.classification import ENTITY_TYPES, VSM_SYSTEMS
|
||||
from markitect.infospace.classification_io import read_classifications_directory
|
||||
|
||||
cls_dir = root / cfg.classifications_dir
|
||||
if not cls_dir.is_dir():
|
||||
click.echo("No classifications directory found. Run 'classify' first.")
|
||||
return
|
||||
|
||||
all_cls = read_classifications_directory(cls_dir)
|
||||
if not all_cls:
|
||||
click.echo("No classification files found.")
|
||||
return
|
||||
|
||||
n = len(all_cls)
|
||||
type_counts: dict = {}
|
||||
vsm_counts: dict = {}
|
||||
matrix: dict = {} # (entity_type, vsm_system) → count
|
||||
|
||||
for c in all_cls:
|
||||
type_counts[c.entity_type] = type_counts.get(c.entity_type, 0) + 1
|
||||
vsm_counts[c.vsm_system] = vsm_counts.get(c.vsm_system, 0) + 1
|
||||
key = (c.entity_type, c.vsm_system)
|
||||
matrix[key] = matrix.get(key, 0) + 1
|
||||
|
||||
click.echo(f"Classification summary — {n} entities\n")
|
||||
|
||||
click.echo("Entity types:")
|
||||
for t, count in sorted(type_counts.items(), key=lambda x: -x[1]):
|
||||
pct = 100 * count / n if n else 0.0
|
||||
click.echo(f" {t:<15} {count:>4} ({pct:.1f}%)")
|
||||
click.echo()
|
||||
|
||||
vsm_order = ["S1", "S2", "S3", "S3*", "S4", "S5"]
|
||||
click.echo("VSM systems:")
|
||||
for v in vsm_order:
|
||||
if v in vsm_counts:
|
||||
count = vsm_counts[v]
|
||||
pct = 100 * count / n if n else 0.0
|
||||
click.echo(f" {v:<6} {count:>4} ({pct:.1f}%)")
|
||||
click.echo()
|
||||
|
||||
# Type × VSM matrix
|
||||
header = f"{'':15}" + "".join(f"{v:>7}" for v in vsm_order)
|
||||
sep = "-" * (15 + 7 * len(vsm_order))
|
||||
click.echo(header)
|
||||
click.echo(sep)
|
||||
for t in ENTITY_TYPES:
|
||||
row = f"{t:<15}"
|
||||
for v in vsm_order:
|
||||
c = matrix.get((t, v), 0)
|
||||
row += f"{c if c else '.':>7}"
|
||||
click.echo(row)
|
||||
click.echo()
|
||||
|
||||
filled_cells = len(matrix)
|
||||
total_cells = len(ENTITY_TYPES) * len(vsm_order)
|
||||
click.echo(f"Matrix fill: {filled_cells}/{total_cells} cells occupied")
|
||||
click.echo()
|
||||
|
||||
if update_metrics:
|
||||
import math
|
||||
from markitect.infospace.history import read_metrics_file, write_metrics_file
|
||||
metrics_dir = root / cfg.metrics_dir
|
||||
metrics_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Type entropy
|
||||
type_entropy = 0.0
|
||||
for count in type_counts.values():
|
||||
p = count / n
|
||||
if p > 0:
|
||||
type_entropy -= p * math.log2(p)
|
||||
|
||||
existing = read_metrics_file(metrics_dir / "metrics.yaml")
|
||||
new_metrics = {
|
||||
"type_distribution": type_counts,
|
||||
"vsm_type_matrix_cells": filled_cells,
|
||||
"type_entropy": round(type_entropy, 4),
|
||||
}
|
||||
merged = {**existing, **new_metrics}
|
||||
write_metrics_file(merged, metrics_dir / "metrics.yaml")
|
||||
click.echo(
|
||||
f"Updated metrics.yaml: type_entropy={type_entropy:.4f}, "
|
||||
f"vsm_type_matrix_cells={filled_cells}"
|
||||
)
|
||||
|
||||
|
||||
# ── viability ────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user