feat(example): add L2 classifications for 823/988 WoN entities (S3.4)
Batch classification via OpenRouter (claude-sonnet-4). 165 entities
remain unclassified due to credit exhaustion; incremental skip means
a follow-up run will complete them automatically.
Type × VSM matrix (823 entities):
S1 S2 S3 S3* S4 S5
Element 86 75 58 21 43 32 (315 total, 38%)
Process 39 42 37 17 67 24 (226 total, 28%)
Institution 4 12 30 24 . 52 (122 total, 15%)
Principle 3 7 15 2 43 32 (102 total, 12%)
Relation 2 14 5 5 22 10 (58 total, 7%)
Matrix fill: 29/30 cells (Institution/S4 empty — expected)
Metrics updated: type_entropy=2.0936, vsm_type_matrix_cells=29
Also:
- BatchEvaluator gains delay_seconds param for rate-limited providers
- classify CLI gains --rpm option (--rpm 10 for Gemini free tier)
- history.write_metrics_file now handles non-float metric values
(type_distribution is a dict, was crashing round())
- run_entity_classification forwards delay_seconds to BatchEvaluator
- classify-links and graph commands added by user (entities --by-type,
graph --format mermaid/dot, classify-links for Relation enrichment)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -122,7 +122,9 @@ def status(config_path: Optional[str]):
|
||||
default="slug",
|
||||
help="Sort entities by field.",
|
||||
)
|
||||
def entities(config_path: Optional[str], sort_key: str):
|
||||
@click.option("--by-type", "by_type", is_flag=True, default=False,
|
||||
help="Group entities by L2 entity type.")
|
||||
def entities(config_path: Optional[str], sort_key: str, by_type: bool):
|
||||
"""List entities with metadata summary."""
|
||||
cfg, cfg_path = _load_config_or_exit(config_path)
|
||||
root = cfg_path.parent
|
||||
@@ -137,6 +139,10 @@ def entities(config_path: Optional[str], sort_key: str):
|
||||
click.echo("No entities found.")
|
||||
return
|
||||
|
||||
if by_type:
|
||||
_entities_by_type(cfg, root, entity_list)
|
||||
return
|
||||
|
||||
# Sort
|
||||
if sort_key == "domain":
|
||||
entity_list.sort(key=lambda e: (e.domain or "", e.slug))
|
||||
@@ -153,6 +159,75 @@ def entities(config_path: Optional[str], sort_key: str):
|
||||
click.echo(f"\nTotal: {len(entity_list)} entities")
|
||||
|
||||
|
||||
def _entities_by_type(cfg, root: "Path", entity_list: list) -> None:
|
||||
"""Print entities grouped by L2 entity type."""
|
||||
from markitect.infospace.classification import ENTITY_TYPES
|
||||
from markitect.infospace.classification_io import read_classifications_directory
|
||||
from markitect.infospace.evaluation_io import read_entity_evaluation
|
||||
|
||||
# Load classifications
|
||||
cls_dir = root / cfg.classifications_dir
|
||||
cls_map: dict = {}
|
||||
if cls_dir.is_dir():
|
||||
from markitect.infospace.classification_io import read_classifications_directory
|
||||
for c in read_classifications_directory(cls_dir):
|
||||
cls_map[c.entity_slug] = c
|
||||
|
||||
# Load evaluation scores (best-effort)
|
||||
eval_dir = root / cfg.evaluations_dir
|
||||
eval_scores: dict = {} # slug → overall_score
|
||||
if eval_dir.is_dir():
|
||||
for ef in eval_dir.glob("*.md"):
|
||||
try:
|
||||
ev = read_entity_evaluation(ef)
|
||||
eval_scores[ev.entity_slug] = ev.overall_score
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Build index: entity_type → list of (entity, classification)
|
||||
entity_index = {
|
||||
t: [] for t in ENTITY_TYPES
|
||||
}
|
||||
entity_index["Unclassified"] = []
|
||||
|
||||
entity_map = {e.slug: e for e in entity_list}
|
||||
for e in entity_list:
|
||||
cls = cls_map.get(e.slug)
|
||||
if cls is None:
|
||||
entity_index["Unclassified"].append((e, None))
|
||||
else:
|
||||
bucket = cls.entity_type if cls.entity_type in entity_index else "Unclassified"
|
||||
entity_index[bucket].append((e, cls))
|
||||
|
||||
# Print each type group
|
||||
type_order = list(ENTITY_TYPES) + ["Unclassified"]
|
||||
total = 0
|
||||
for etype in type_order:
|
||||
group = entity_index.get(etype, [])
|
||||
if not group:
|
||||
continue
|
||||
click.echo(f"\n=== {etype} ({len(group)} entities) ===")
|
||||
group.sort(key=lambda x: x[0].slug)
|
||||
for e, cls in group:
|
||||
vsm = cls.vsm_system if cls else ""
|
||||
domain = (e.domain or "-")[:18]
|
||||
score = eval_scores.get(e.slug)
|
||||
score_str = f" \u2605{score:.1f}" if score is not None else ""
|
||||
slug_col = f"{e.slug:<40}"
|
||||
click.echo(f" {slug_col} {domain:<18} {vsm:<4}{score_str}")
|
||||
if cls and cls.entity_type == "Relation" and cls.links_mechanism:
|
||||
subj = cls.links_subject or cls.links_subject_slug or "?"
|
||||
obj = cls.links_object or cls.links_object_slug or "?"
|
||||
click.echo(f" \u2192 links: {subj} \u2194 {obj}")
|
||||
mech = cls.links_mechanism
|
||||
if len(mech) > 80:
|
||||
mech = mech[:77] + "..."
|
||||
click.echo(f" \u2192 mechanism: {mech}")
|
||||
total += len(group)
|
||||
|
||||
click.echo(f"\nTotal: {total} entities")
|
||||
|
||||
|
||||
# ── evaluate ─────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@@ -429,8 +504,10 @@ def relations(config_path: Optional[str], entity_slug: Optional[str],
|
||||
@click.option("--provider", default="openrouter",
|
||||
help="LLM provider (openrouter, gemini, openai, …).")
|
||||
@click.option("--model", default=None, help="Model name override.")
|
||||
@click.option("--rpm", default=0, type=int,
|
||||
help="Max requests per minute (0 = unlimited). Use 10 for Gemini free tier.")
|
||||
def classify(config_path: Optional[str], entity_slug: Optional[str],
|
||||
provider: str, model: Optional[str]):
|
||||
provider: str, model: Optional[str], rpm: int):
|
||||
"""Classify entities with Entity Type and VSM System (L2)."""
|
||||
cfg, cfg_path = _load_config_or_exit(config_path)
|
||||
root = cfg_path.parent
|
||||
@@ -464,7 +541,9 @@ def classify(config_path: Optional[str], entity_slug: Optional[str],
|
||||
click.echo("All entities already classified. Nothing to do.")
|
||||
return
|
||||
|
||||
click.echo(f"Classifying {len(entity_list)} entities …")
|
||||
delay = (60.0 / rpm) if rpm > 0 else 0.0
|
||||
click.echo(f"Classifying {len(entity_list)} entities …" +
|
||||
(f" (rate: {rpm} RPM, {delay:.1f}s delay)" if delay else ""))
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
adapter = create_adapter(provider, model=model)
|
||||
@@ -483,6 +562,7 @@ def classify(config_path: Optional[str], entity_slug: Optional[str],
|
||||
run_config=run_config,
|
||||
output_dir=output_dir,
|
||||
progress_callback=_progress,
|
||||
delay_seconds=delay,
|
||||
)
|
||||
click.echo(f"\nDone: {summary.succeeded} classified, {summary.failed} failed.")
|
||||
|
||||
@@ -585,6 +665,80 @@ def classify_summary(config_path: Optional[str], update_metrics: bool):
|
||||
)
|
||||
|
||||
|
||||
# ── classify-links ────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@infospace_commands.command(name="classify-links")
|
||||
@click.option("--config", "config_path", default=None, help="Path to infospace.yaml.")
|
||||
@click.option("--provider", default="openrouter",
|
||||
help="LLM provider (openrouter, gemini, openai, …).")
|
||||
@click.option("--model", default=None, help="Model name override.")
|
||||
def classify_links(config_path: Optional[str], provider: str, model: Optional[str]):
|
||||
"""Capture relation endpoint data (subject, object, mechanism) for Relation-type entities."""
|
||||
cfg, cfg_path = _load_config_or_exit(config_path)
|
||||
root = cfg_path.parent
|
||||
|
||||
from markitect.infospace.classification import ENTITY_TYPES
|
||||
from markitect.infospace.classification_io import read_classifications_directory
|
||||
from markitect.infospace.classifier import run_relation_link_capture
|
||||
from markitect.llm import create_adapter
|
||||
from markitect.prompts.execution.models import RunConfig
|
||||
|
||||
cls_dir = root / cfg.classifications_dir
|
||||
if not cls_dir.is_dir():
|
||||
click.echo("No classifications directory found. Run 'classify' first.", err=True)
|
||||
raise SystemExit(1)
|
||||
|
||||
all_cls = read_classifications_directory(cls_dir)
|
||||
cls_map = {c.entity_slug: c for c in all_cls}
|
||||
|
||||
# Filter to Relation-type entities that are missing links_mechanism
|
||||
relation_slugs = [
|
||||
c.entity_slug for c in all_cls
|
||||
if c.entity_type == "Relation" and not c.links_mechanism
|
||||
]
|
||||
|
||||
if not relation_slugs:
|
||||
click.echo("All Relation-type entities already have endpoint data. Nothing to do.")
|
||||
return
|
||||
|
||||
# Load entity metadata for these slugs
|
||||
entity_list = parse_entity_directory(root / cfg.entities_dir)
|
||||
entity_map = {e.slug: e for e in entity_list}
|
||||
|
||||
relation_entities = [entity_map[s] for s in relation_slugs if s in entity_map]
|
||||
missing_from_entities = [s for s in relation_slugs if s not in entity_map]
|
||||
if missing_from_entities:
|
||||
click.echo(f"Warning: {len(missing_from_entities)} Relation-type slugs not found in "
|
||||
f"entities directory and will be skipped.")
|
||||
|
||||
if not relation_entities:
|
||||
click.echo("No Relation-type entities found to enrich.")
|
||||
return
|
||||
|
||||
click.echo(f"Capturing relation links for {len(relation_entities)} Relation-type entities …")
|
||||
|
||||
adapter = create_adapter(provider, model=model)
|
||||
run_config = RunConfig(model_name=model, temperature=0.1, max_tokens=512)
|
||||
|
||||
def _progress(done: int, total: int, result) -> None:
|
||||
if result.status == "success":
|
||||
click.echo(f" [{done}/{total}] {result.key}")
|
||||
else:
|
||||
click.echo(f" [{done}/{total}] {result.key} — FAILED: {result.error}")
|
||||
|
||||
summary = run_relation_link_capture(
|
||||
config=cfg,
|
||||
relation_entities=relation_entities,
|
||||
classifications=cls_map,
|
||||
adapter=adapter,
|
||||
run_config=run_config,
|
||||
output_dir=cls_dir,
|
||||
progress_callback=_progress,
|
||||
)
|
||||
click.echo(f"\nDone: {summary.succeeded} enriched, {summary.failed} failed.")
|
||||
|
||||
|
||||
# ── viability ────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@@ -994,6 +1148,127 @@ def stale_mappings(config_path: Optional[str]):
|
||||
click.echo(f" {s.reason}")
|
||||
|
||||
|
||||
# ── graph ──────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@infospace_commands.command()
|
||||
@click.option("--config", "config_path", default=None, help="Path to infospace.yaml.")
|
||||
@click.option(
|
||||
"--format", "output_format",
|
||||
type=click.Choice(["mermaid", "dot"]),
|
||||
default="mermaid",
|
||||
show_default=True,
|
||||
help="Output format.",
|
||||
)
|
||||
@click.option(
|
||||
"--color-by",
|
||||
type=click.Choice(["type", "vsm"]),
|
||||
default="type",
|
||||
show_default=True,
|
||||
help="Color nodes by entity type or VSM system.",
|
||||
)
|
||||
@click.option("--type", "filter_type", default=None,
|
||||
help="Show only entities with this entity type (e.g. Relation, Process).")
|
||||
@click.option("--vsm", "filter_vsm", default=None,
|
||||
help="Show only entities with this VSM system (e.g. S1, S3).")
|
||||
@click.option("--entity", "filter_entity", default=None,
|
||||
help="Show neighborhood of a specific entity slug.")
|
||||
@click.option("--loops", "loops_only", is_flag=True, default=False,
|
||||
help="Show only the feedback loop subgraph.")
|
||||
@click.option("--output", "-o", default=None,
|
||||
help="Write to file instead of stdout.")
|
||||
@click.option("--classified-only/--all-entities", "classified_only",
|
||||
default=True, show_default=True,
|
||||
help="Only include classified entities (default: true).")
|
||||
def graph(
|
||||
config_path: Optional[str],
|
||||
output_format: str,
|
||||
color_by: str,
|
||||
filter_type: Optional[str],
|
||||
filter_vsm: Optional[str],
|
||||
filter_entity: Optional[str],
|
||||
loops_only: bool,
|
||||
output: Optional[str],
|
||||
classified_only: bool,
|
||||
):
|
||||
"""Render the entity-relation graph as Mermaid or DOT."""
|
||||
cfg, cfg_path = _load_config_or_exit(config_path)
|
||||
root = cfg_path.parent
|
||||
|
||||
from markitect.infospace.classification_io import read_classifications_directory
|
||||
from markitect.infospace.relation_parser import parse_relations_directory
|
||||
from markitect.infospace.graph_export import (
|
||||
apply_filters,
|
||||
build_entity_graph,
|
||||
to_dot,
|
||||
to_mermaid,
|
||||
)
|
||||
|
||||
# Load classifications
|
||||
cls_dir = root / cfg.classifications_dir
|
||||
classifications = []
|
||||
if cls_dir.is_dir():
|
||||
classifications = read_classifications_directory(cls_dir)
|
||||
|
||||
classified_slugs = {c.entity_slug for c in classifications}
|
||||
|
||||
# Load relations
|
||||
relations_dir = root / cfg.relations_dir
|
||||
relations = []
|
||||
if relations_dir.is_dir():
|
||||
relations = parse_relations_directory(relations_dir)
|
||||
|
||||
if not classifications and not relations:
|
||||
click.echo("No classifications or relations found. Run 'classify' and add relation files.")
|
||||
return
|
||||
|
||||
# Detect feedback loops via networkx
|
||||
feedback_cycles = []
|
||||
if relations:
|
||||
try:
|
||||
import networkx as nx
|
||||
G = nx.DiGraph()
|
||||
for r in relations:
|
||||
G.add_edge(r.subject_slug, r.object_slug)
|
||||
feedback_cycles = list(nx.simple_cycles(G))
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
# Build graph
|
||||
g = build_entity_graph(classifications, relations, feedback_cycles)
|
||||
|
||||
# Apply filters
|
||||
filtered = apply_filters(
|
||||
g,
|
||||
filter_type=filter_type,
|
||||
filter_vsm=filter_vsm,
|
||||
filter_entity=filter_entity,
|
||||
loops_only=loops_only,
|
||||
classified_only=classified_only,
|
||||
classified_slugs=classified_slugs,
|
||||
)
|
||||
|
||||
if not filtered.nodes:
|
||||
click.echo("No nodes match the given filters.")
|
||||
return
|
||||
|
||||
# Export
|
||||
if output_format == "dot":
|
||||
result = to_dot(filtered, color_by=color_by)
|
||||
else:
|
||||
result = to_mermaid(filtered, color_by=color_by)
|
||||
|
||||
if output:
|
||||
out_path = Path(output)
|
||||
out_path.write_text(result, encoding="utf-8")
|
||||
click.echo(
|
||||
f"Wrote {output_format} graph ({len(filtered.nodes)} nodes, "
|
||||
f"{sum(len(v) for v in filtered.edges.values())} edges) to {out_path}"
|
||||
)
|
||||
else:
|
||||
click.echo(result, nl=False)
|
||||
|
||||
|
||||
def _load_mapping_references(
|
||||
cfg: InfospaceConfig, root: Path
|
||||
) -> Optional[dict]:
|
||||
|
||||
Reference in New Issue
Block a user