feat(example): add L2 classifications for 823/988 WoN entities (S3.4)

Batch classification via OpenRouter (claude-sonnet-4). 165 entities
remain unclassified due to credit exhaustion; incremental skip means
a follow-up run will complete them automatically.

Type × VSM matrix (823 entities):
                  S1   S2   S3  S3*   S4   S5
  Element         86   75   58   21   43   32  (315 total, 38%)
  Process         39   42   37   17   67   24  (226 total, 28%)
  Institution      4   12   30   24    .   52  (122 total, 15%)
  Principle        3    7   15    2   43   32  (102 total, 12%)
  Relation         2   14    5    5   22   10   (58 total,  7%)
  Matrix fill: 29/30 cells (Institution/S4 empty — expected)

Metrics updated: type_entropy=2.0936, vsm_type_matrix_cells=29

Also:
- BatchEvaluator gains delay_seconds param for rate-limited providers
- classify CLI gains --rpm option (--rpm 10 for Gemini free tier)
- history.write_metrics_file now handles non-float metric values
  (type_distribution is a dict, was crashing round())
- run_entity_classification forwards delay_seconds to BatchEvaluator
- classify-links and graph commands added by user (entities --by-type,
  graph --format mermaid/dot, classify-links for Relation enrichment)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-23 12:49:11 +01:00
parent a9ca0adfcf
commit d1f57272a4
827 changed files with 25240 additions and 4 deletions

View File

@@ -122,7 +122,9 @@ def status(config_path: Optional[str]):
default="slug",
help="Sort entities by field.",
)
def entities(config_path: Optional[str], sort_key: str):
@click.option("--by-type", "by_type", is_flag=True, default=False,
help="Group entities by L2 entity type.")
def entities(config_path: Optional[str], sort_key: str, by_type: bool):
"""List entities with metadata summary."""
cfg, cfg_path = _load_config_or_exit(config_path)
root = cfg_path.parent
@@ -137,6 +139,10 @@ def entities(config_path: Optional[str], sort_key: str):
click.echo("No entities found.")
return
if by_type:
_entities_by_type(cfg, root, entity_list)
return
# Sort
if sort_key == "domain":
entity_list.sort(key=lambda e: (e.domain or "", e.slug))
@@ -153,6 +159,75 @@ def entities(config_path: Optional[str], sort_key: str):
click.echo(f"\nTotal: {len(entity_list)} entities")
def _entities_by_type(cfg, root: "Path", entity_list: list) -> None:
"""Print entities grouped by L2 entity type."""
from markitect.infospace.classification import ENTITY_TYPES
from markitect.infospace.classification_io import read_classifications_directory
from markitect.infospace.evaluation_io import read_entity_evaluation
# Load classifications
cls_dir = root / cfg.classifications_dir
cls_map: dict = {}
if cls_dir.is_dir():
from markitect.infospace.classification_io import read_classifications_directory
for c in read_classifications_directory(cls_dir):
cls_map[c.entity_slug] = c
# Load evaluation scores (best-effort)
eval_dir = root / cfg.evaluations_dir
eval_scores: dict = {} # slug → overall_score
if eval_dir.is_dir():
for ef in eval_dir.glob("*.md"):
try:
ev = read_entity_evaluation(ef)
eval_scores[ev.entity_slug] = ev.overall_score
except Exception:
pass
# Build index: entity_type → list of (entity, classification)
entity_index = {
t: [] for t in ENTITY_TYPES
}
entity_index["Unclassified"] = []
entity_map = {e.slug: e for e in entity_list}
for e in entity_list:
cls = cls_map.get(e.slug)
if cls is None:
entity_index["Unclassified"].append((e, None))
else:
bucket = cls.entity_type if cls.entity_type in entity_index else "Unclassified"
entity_index[bucket].append((e, cls))
# Print each type group
type_order = list(ENTITY_TYPES) + ["Unclassified"]
total = 0
for etype in type_order:
group = entity_index.get(etype, [])
if not group:
continue
click.echo(f"\n=== {etype} ({len(group)} entities) ===")
group.sort(key=lambda x: x[0].slug)
for e, cls in group:
vsm = cls.vsm_system if cls else ""
domain = (e.domain or "-")[:18]
score = eval_scores.get(e.slug)
score_str = f" \u2605{score:.1f}" if score is not None else ""
slug_col = f"{e.slug:<40}"
click.echo(f" {slug_col} {domain:<18} {vsm:<4}{score_str}")
if cls and cls.entity_type == "Relation" and cls.links_mechanism:
subj = cls.links_subject or cls.links_subject_slug or "?"
obj = cls.links_object or cls.links_object_slug or "?"
click.echo(f" \u2192 links: {subj} \u2194 {obj}")
mech = cls.links_mechanism
if len(mech) > 80:
mech = mech[:77] + "..."
click.echo(f" \u2192 mechanism: {mech}")
total += len(group)
click.echo(f"\nTotal: {total} entities")
# ── evaluate ─────────────────────────────────────────────────────────
@@ -429,8 +504,10 @@ def relations(config_path: Optional[str], entity_slug: Optional[str],
@click.option("--provider", default="openrouter",
help="LLM provider (openrouter, gemini, openai, …).")
@click.option("--model", default=None, help="Model name override.")
@click.option("--rpm", default=0, type=int,
help="Max requests per minute (0 = unlimited). Use 10 for Gemini free tier.")
def classify(config_path: Optional[str], entity_slug: Optional[str],
provider: str, model: Optional[str]):
provider: str, model: Optional[str], rpm: int):
"""Classify entities with Entity Type and VSM System (L2)."""
cfg, cfg_path = _load_config_or_exit(config_path)
root = cfg_path.parent
@@ -464,7 +541,9 @@ def classify(config_path: Optional[str], entity_slug: Optional[str],
click.echo("All entities already classified. Nothing to do.")
return
click.echo(f"Classifying {len(entity_list)} entities …")
delay = (60.0 / rpm) if rpm > 0 else 0.0
click.echo(f"Classifying {len(entity_list)} entities …" +
(f" (rate: {rpm} RPM, {delay:.1f}s delay)" if delay else ""))
output_dir.mkdir(parents=True, exist_ok=True)
adapter = create_adapter(provider, model=model)
@@ -483,6 +562,7 @@ def classify(config_path: Optional[str], entity_slug: Optional[str],
run_config=run_config,
output_dir=output_dir,
progress_callback=_progress,
delay_seconds=delay,
)
click.echo(f"\nDone: {summary.succeeded} classified, {summary.failed} failed.")
@@ -585,6 +665,80 @@ def classify_summary(config_path: Optional[str], update_metrics: bool):
)
# ── classify-links ────────────────────────────────────────────────────
@infospace_commands.command(name="classify-links")
@click.option("--config", "config_path", default=None, help="Path to infospace.yaml.")
@click.option("--provider", default="openrouter",
help="LLM provider (openrouter, gemini, openai, …).")
@click.option("--model", default=None, help="Model name override.")
def classify_links(config_path: Optional[str], provider: str, model: Optional[str]):
"""Capture relation endpoint data (subject, object, mechanism) for Relation-type entities."""
cfg, cfg_path = _load_config_or_exit(config_path)
root = cfg_path.parent
from markitect.infospace.classification import ENTITY_TYPES
from markitect.infospace.classification_io import read_classifications_directory
from markitect.infospace.classifier import run_relation_link_capture
from markitect.llm import create_adapter
from markitect.prompts.execution.models import RunConfig
cls_dir = root / cfg.classifications_dir
if not cls_dir.is_dir():
click.echo("No classifications directory found. Run 'classify' first.", err=True)
raise SystemExit(1)
all_cls = read_classifications_directory(cls_dir)
cls_map = {c.entity_slug: c for c in all_cls}
# Filter to Relation-type entities that are missing links_mechanism
relation_slugs = [
c.entity_slug for c in all_cls
if c.entity_type == "Relation" and not c.links_mechanism
]
if not relation_slugs:
click.echo("All Relation-type entities already have endpoint data. Nothing to do.")
return
# Load entity metadata for these slugs
entity_list = parse_entity_directory(root / cfg.entities_dir)
entity_map = {e.slug: e for e in entity_list}
relation_entities = [entity_map[s] for s in relation_slugs if s in entity_map]
missing_from_entities = [s for s in relation_slugs if s not in entity_map]
if missing_from_entities:
click.echo(f"Warning: {len(missing_from_entities)} Relation-type slugs not found in "
f"entities directory and will be skipped.")
if not relation_entities:
click.echo("No Relation-type entities found to enrich.")
return
click.echo(f"Capturing relation links for {len(relation_entities)} Relation-type entities …")
adapter = create_adapter(provider, model=model)
run_config = RunConfig(model_name=model, temperature=0.1, max_tokens=512)
def _progress(done: int, total: int, result) -> None:
if result.status == "success":
click.echo(f" [{done}/{total}] {result.key}")
else:
click.echo(f" [{done}/{total}] {result.key} — FAILED: {result.error}")
summary = run_relation_link_capture(
config=cfg,
relation_entities=relation_entities,
classifications=cls_map,
adapter=adapter,
run_config=run_config,
output_dir=cls_dir,
progress_callback=_progress,
)
click.echo(f"\nDone: {summary.succeeded} enriched, {summary.failed} failed.")
# ── viability ────────────────────────────────────────────────────────
@@ -994,6 +1148,127 @@ def stale_mappings(config_path: Optional[str]):
click.echo(f" {s.reason}")
# ── graph ──────────────────────────────────────────────────────────────────
@infospace_commands.command()
@click.option("--config", "config_path", default=None, help="Path to infospace.yaml.")
@click.option(
"--format", "output_format",
type=click.Choice(["mermaid", "dot"]),
default="mermaid",
show_default=True,
help="Output format.",
)
@click.option(
"--color-by",
type=click.Choice(["type", "vsm"]),
default="type",
show_default=True,
help="Color nodes by entity type or VSM system.",
)
@click.option("--type", "filter_type", default=None,
help="Show only entities with this entity type (e.g. Relation, Process).")
@click.option("--vsm", "filter_vsm", default=None,
help="Show only entities with this VSM system (e.g. S1, S3).")
@click.option("--entity", "filter_entity", default=None,
help="Show neighborhood of a specific entity slug.")
@click.option("--loops", "loops_only", is_flag=True, default=False,
help="Show only the feedback loop subgraph.")
@click.option("--output", "-o", default=None,
help="Write to file instead of stdout.")
@click.option("--classified-only/--all-entities", "classified_only",
default=True, show_default=True,
help="Only include classified entities (default: true).")
def graph(
config_path: Optional[str],
output_format: str,
color_by: str,
filter_type: Optional[str],
filter_vsm: Optional[str],
filter_entity: Optional[str],
loops_only: bool,
output: Optional[str],
classified_only: bool,
):
"""Render the entity-relation graph as Mermaid or DOT."""
cfg, cfg_path = _load_config_or_exit(config_path)
root = cfg_path.parent
from markitect.infospace.classification_io import read_classifications_directory
from markitect.infospace.relation_parser import parse_relations_directory
from markitect.infospace.graph_export import (
apply_filters,
build_entity_graph,
to_dot,
to_mermaid,
)
# Load classifications
cls_dir = root / cfg.classifications_dir
classifications = []
if cls_dir.is_dir():
classifications = read_classifications_directory(cls_dir)
classified_slugs = {c.entity_slug for c in classifications}
# Load relations
relations_dir = root / cfg.relations_dir
relations = []
if relations_dir.is_dir():
relations = parse_relations_directory(relations_dir)
if not classifications and not relations:
click.echo("No classifications or relations found. Run 'classify' and add relation files.")
return
# Detect feedback loops via networkx
feedback_cycles = []
if relations:
try:
import networkx as nx
G = nx.DiGraph()
for r in relations:
G.add_edge(r.subject_slug, r.object_slug)
feedback_cycles = list(nx.simple_cycles(G))
except ImportError:
pass
# Build graph
g = build_entity_graph(classifications, relations, feedback_cycles)
# Apply filters
filtered = apply_filters(
g,
filter_type=filter_type,
filter_vsm=filter_vsm,
filter_entity=filter_entity,
loops_only=loops_only,
classified_only=classified_only,
classified_slugs=classified_slugs,
)
if not filtered.nodes:
click.echo("No nodes match the given filters.")
return
# Export
if output_format == "dot":
result = to_dot(filtered, color_by=color_by)
else:
result = to_mermaid(filtered, color_by=color_by)
if output:
out_path = Path(output)
out_path.write_text(result, encoding="utf-8")
click.echo(
f"Wrote {output_format} graph ({len(filtered.nodes)} nodes, "
f"{sum(len(v) for v in filtered.edges.values())} edges) to {out_path}"
)
else:
click.echo(result, nl=False)
def _load_mapping_references(
cfg: InfospaceConfig, root: Path
) -> Optional[dict]: