diff --git a/examples/infospace-with-history/infospace.yaml b/examples/infospace-with-history/infospace.yaml index 0795190b..984171b7 100644 --- a/examples/infospace-with-history/infospace.yaml +++ b/examples/infospace-with-history/infospace.yaml @@ -18,6 +18,7 @@ schemas: mapping: schemas/vsm-mapping-schema-v1.0.md analysis: schemas/chapter-analysis-schema-v1.0.md relation: schemas/relation-schema-v1.0.md + typed_entity: schemas/typed-entity-schema-v1.0.md competency_questions: | 1. How does Smith's division of labour map to VSM System 1 operations? diff --git a/examples/infospace-with-history/output/classifications/division_of_labour.md b/examples/infospace-with-history/output/classifications/division_of_labour.md new file mode 100644 index 00000000..18aefd49 --- /dev/null +++ b/examples/infospace-with-history/output/classifications/division_of_labour.md @@ -0,0 +1,30 @@ +--- +entity_slug: division_of_labour +entity_type: Process +vsm_system: S1 +type_rationale: The definition describes "the separation of a work process into distinct + tasks performed by specialised workers," which is an activity or transformation + in how work is conducted. +vsm_rationale: Division of Labour directly concerns the organization and execution + of "productive activities" by specialized workers to increase output, which is the + core function of S1. +classified_at: '2026-02-23T05:14:54.928218' +--- + +# Classification: Division Of Labour + +## Entity Type + +Process + +## VSM System + +S1 + +## Type Rationale + +The definition describes "the separation of a work process into distinct tasks performed by specialised workers," which is an activity or transformation in how work is conducted. + +## VSM Rationale + +Division of Labour directly concerns the organization and execution of "productive activities" by specialized workers to increase output, which is the core function of S1. diff --git a/examples/infospace-with-history/output/classifications/invisible_hand_mechanism.md b/examples/infospace-with-history/output/classifications/invisible_hand_mechanism.md new file mode 100644 index 00000000..3404c197 --- /dev/null +++ b/examples/infospace-with-history/output/classifications/invisible_hand_mechanism.md @@ -0,0 +1,30 @@ +--- +entity_slug: invisible_hand_mechanism +entity_type: Principle +vsm_system: S4 +type_rationale: The Invisible Hand Mechanism is an abstract theoretical claim about + how individual self-interest unintentionally leads to broader public welfare, functioning + as a fundamental rule of market operation. +vsm_rationale: The Invisible Hand Mechanism describes the system's inherent capacity + for adaptation and self-organization, producing beneficial outcomes from individual + actions without central direction, aligning with S4's function of intelligence. +classified_at: '2026-02-23T05:15:10.936874' +--- + +# Classification: Invisible Hand Mechanism + +## Entity Type + +Principle + +## VSM System + +S4 + +## Type Rationale + +The Invisible Hand Mechanism is an abstract theoretical claim about how individual self-interest unintentionally leads to broader public welfare, functioning as a fundamental rule of market operation. + +## VSM Rationale + +The Invisible Hand Mechanism describes the system's inherent capacity for adaptation and self-organization, producing beneficial outcomes from individual actions without central direction, aligning with S4's function of intelligence. diff --git a/examples/infospace-with-history/output/classifications/natural_price_as_central_price.md b/examples/infospace-with-history/output/classifications/natural_price_as_central_price.md new file mode 100644 index 00000000..73f2c267 --- /dev/null +++ b/examples/infospace-with-history/output/classifications/natural_price_as_central_price.md @@ -0,0 +1,30 @@ +--- +entity_slug: natural_price_as_central_price +entity_type: Principle +vsm_system: S2 +type_rationale: The natural price is an abstract concept describing an equilibrium + point and a tendency for market prices to gravitate towards it, functioning as a + fundamental economic law. +vsm_rationale: The natural price acts as a central price signal that coordinates market + activity by drawing fluctuating market prices towards an equilibrium, thereby performing + an anti-oscillation function. +classified_at: '2026-02-23T05:15:04.916853' +--- + +# Classification: Natural Price As Central Price + +## Entity Type + +Principle + +## VSM System + +S2 + +## Type Rationale + +The natural price is an abstract concept describing an equilibrium point and a tendency for market prices to gravitate towards it, functioning as a fundamental economic law. + +## VSM Rationale + +The natural price acts as a central price signal that coordinates market activity by drawing fluctuating market prices towards an equilibrium, thereby performing an anti-oscillation function. diff --git a/examples/infospace-with-history/schemas/typed-entity-schema-v1.0.md b/examples/infospace-with-history/schemas/typed-entity-schema-v1.0.md new file mode 100644 index 00000000..214dc052 --- /dev/null +++ b/examples/infospace-with-history/schemas/typed-entity-schema-v1.0.md @@ -0,0 +1,126 @@ +# Typed Entity Schema v1.0 + +Extends the economic entity schema with two classification fields produced +by the L2 `classify-entities` pipeline stage. An entity that has passed +through L2 classification has been assigned an **Entity Type** and a +**VSM System** by an LLM, each with a one-sentence rationale. + +--- + +## Additional Sections + +The following sections are added to the base entity file (or stored as +separate classification files in `output/classifications/`): + +### Entity Type + +**Required.** One of the five controlled values below. + +| Value | Definition | +|---|---| +| **Element** | A stock, agent, artifact, or institution that persists — a *noun*, something that exists independently (e.g. Capital Stock, Corn, Colony, Guild) | +| **Process** | A flow, activity, or transformation with duration — something that *happens* rather than *exists* (e.g. Division of Labour, Credit Extension, Trade Route) | +| **Relation** | A structural dependency or causal link between two elements — a *connector* or mechanism (e.g. Rent determined by Price; Wages bounded by Profit Margin) | +| **Principle** | An abstract law or invariant that holds across contexts — a rule or theoretical claim (e.g. Comparative Advantage, Diminishing Returns, Opportunity Cost) | +| **Institution** | A socially constructed rule system, norm, or governance structure (e.g. Banking System, Apprenticeship Law, Taille, Navigation Acts) | + +**Note:** Types are not mutually exclusive at the margin — *Market Price* +is both a Relation (between cost components and clearing condition) and an +emergent property of an Element (the market). Assign the **primary** type: +the one that best explains the entity's role in Smith's argument. + +### VSM System + +**Required.** One of the six controlled values below. + +| Value | Beer's definition | WoN examples | +|---|---|---| +| **S1** | Primary operations — the productive activities of the system | Agricultural labour, manufacturing, carrying trade | +| **S2** | Coordination — anti-oscillation, price signals between operations | Market Price, Natural Price, Wages of Labour | +| **S3** | Management — resource allocation and operational control | Capital Allocation, Banking, Taxation | +| **S3\*** | Audit — inspection, compliance, integrity checking | Customs Enforcement, Assay, Coinage | +| **S4** | Intelligence — adaptation, environmental scanning | Invisible Hand, Comparative Advantage, Foreign Trade Intelligence | +| **S5** | Policy — identity, ultimate authority, normative purpose | Mercantile System, System of Natural Liberty, Public Debt Policy | + +### Type Rationale + +**Required.** One sentence explaining why this Entity Type was assigned, +grounded in the entity definition. + +> *Example:* "Capital Stock is a persistent stock of accumulated resources +> that enables productive operations, making it an Element rather than a +> Process." + +### VSM Rationale + +**Required.** One sentence grounding the VSM assignment in Beer's +definitions as applied to the WoN domain. + +> *Example:* "Capital Stock is deployed at the operational level to +> produce goods and services, placing it squarely within S1 (primary +> operations)." + +--- + +## Validation Rules + +1. **Entity Type** MUST be one of: Element, Process, Relation, Principle, + Institution. Any other value is a validation error. +2. **VSM System** MUST be one of: S1, S2, S3, S3*, S4, S5. +3. **Type Rationale** and **VSM Rationale** MUST be non-empty strings. +4. A classification file for slug `X` MUST be stored at + `output/classifications/X.md`. + +--- + +## Metrics Enabled by L2 + +Once all entities are classified, the following collection-level metrics +become available: + +| Metric | Concern | Question | +|---|---|---| +| **type_distribution** | Granularity | Is the collection balanced? | +| **vsm_type_matrix_cells** | Coverage | How many (type, VSM) coordinate pairs are occupied? | +| **type_entropy** | Granularity | Is the type distribution diverse or dominated by one type? | +| **orphan_relations** | Coherence | Are Relation-typed entities that name no elements they connect? | +| **principle_grounding** | Consistency | Does each Principle have at least one Element or Process it constrains? | + +--- + +## File Format + +Classification files use YAML frontmatter + markdown body: + +```markdown +--- +entity_slug: capital_stock +entity_type: Element +vsm_system: S1 +type_rationale: Capital Stock is a persistent stock of accumulated resources + that enables productive operations. +vsm_rationale: It is the primary productive resource deployed at the + operational level (S1). +classified_by: openrouter/claude-sonnet-4 +classified_at: 2026-02-23T14:00:00Z +--- + +# Classification: Capital Stock + +## Entity Type + +Element + +## VSM System + +S1 + +## Type Rationale + +Capital Stock is a persistent stock of accumulated resources that enables +productive operations. + +## VSM Rationale + +It is the primary productive resource deployed at the operational level (S1). +``` diff --git a/markitect/infospace/classification.py b/markitect/infospace/classification.py new file mode 100644 index 00000000..76b29442 --- /dev/null +++ b/markitect/infospace/classification.py @@ -0,0 +1,64 @@ +""" +Data models for entity classification (L2 typed entities). + +Each entity is assigned an Entity Type (what kind of thing it is) and a +VSM System (which control layer it inhabits). Both assignments come with +a one-sentence rationale from the LLM, stored alongside the classification. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from datetime import datetime +from typing import Any, Dict, Optional + + +#: Controlled vocabulary for entity types. +ENTITY_TYPES = ["Element", "Process", "Relation", "Principle", "Institution"] + +#: Controlled vocabulary for VSM system assignments. +VSM_SYSTEMS = ["S1", "S2", "S3", "S3*", "S4", "S5"] + + +@dataclass +class EntityClassification: + """L2 classification for a single entity.""" + + entity_slug: str + entity_type: str # one of ENTITY_TYPES + vsm_system: str # one of VSM_SYSTEMS + type_rationale: str = "" # one sentence + vsm_rationale: str = "" # one sentence + classified_by: str = "" # model name + classified_at: Optional[datetime] = None + + def to_dict(self) -> Dict[str, Any]: + d: Dict[str, Any] = { + "entity_slug": self.entity_slug, + "entity_type": self.entity_type, + "vsm_system": self.vsm_system, + } + if self.type_rationale: + d["type_rationale"] = self.type_rationale + if self.vsm_rationale: + d["vsm_rationale"] = self.vsm_rationale + if self.classified_by: + d["classified_by"] = self.classified_by + if self.classified_at is not None: + d["classified_at"] = self.classified_at.isoformat() + return d + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "EntityClassification": + classified_at: Optional[datetime] = None + if "classified_at" in data: + classified_at = datetime.fromisoformat(data["classified_at"]) + return cls( + entity_slug=data["entity_slug"], + entity_type=data["entity_type"], + vsm_system=data["vsm_system"], + type_rationale=data.get("type_rationale", ""), + vsm_rationale=data.get("vsm_rationale", ""), + classified_by=data.get("classified_by", ""), + classified_at=classified_at, + ) diff --git a/markitect/infospace/classification_io.py b/markitect/infospace/classification_io.py new file mode 100644 index 00000000..f4751c7a --- /dev/null +++ b/markitect/infospace/classification_io.py @@ -0,0 +1,80 @@ +""" +Read/write utilities for entity classification files (L2). + +Classification files use YAML frontmatter (machine-readable) plus a +markdown body (human-readable), matching the convention used by evaluation +files. +""" + +from __future__ import annotations + +from pathlib import Path +from typing import List + +import yaml + +from .classification import EntityClassification + + +_FRONTMATTER_SEP = "---" + + +def write_entity_classification(c: EntityClassification, path: Path) -> None: + """Write a per-entity classification as YAML frontmatter + markdown body.""" + fm = c.to_dict() + + lines: List[str] = [] + lines.append(_FRONTMATTER_SEP) + lines.append(yaml.safe_dump(fm, default_flow_style=False, sort_keys=False).rstrip()) + lines.append(_FRONTMATTER_SEP) + lines.append("") + + title = c.entity_slug.replace("_", " ").replace("-", " ").title() + lines.append(f"# Classification: {title}") + lines.append("") + + lines.append("## Entity Type") + lines.append("") + lines.append(c.entity_type) + lines.append("") + + lines.append("## VSM System") + lines.append("") + lines.append(c.vsm_system) + lines.append("") + + if c.type_rationale: + lines.append("## Type Rationale") + lines.append("") + lines.append(c.type_rationale) + lines.append("") + + if c.vsm_rationale: + lines.append("## VSM Rationale") + lines.append("") + lines.append(c.vsm_rationale) + lines.append("") + + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text("\n".join(lines), encoding="utf-8") + + +def read_entity_classification(path: Path) -> EntityClassification: + """Read a classification file (YAML frontmatter + markdown body).""" + text = path.read_text(encoding="utf-8") + parts = text.split(f"{_FRONTMATTER_SEP}\n", maxsplit=2) + if len(parts) < 3: + raise ValueError(f"No YAML frontmatter found in {path}") + fm = yaml.safe_load(parts[1]) + return EntityClassification.from_dict(fm) + + +def read_classifications_directory(directory: Path) -> List[EntityClassification]: + """Read all classification files from a directory.""" + results: List[EntityClassification] = [] + for p in sorted(directory.glob("*.md")): + try: + results.append(read_entity_classification(p)) + except Exception: + pass + return results diff --git a/markitect/infospace/classifier.py b/markitect/infospace/classifier.py new file mode 100644 index 00000000..26594df3 --- /dev/null +++ b/markitect/infospace/classifier.py @@ -0,0 +1,258 @@ +""" +Per-entity classification pipeline for L2 typed entities. + +Builds a concise LLM prompt asking the model to assign an Entity Type and +a VSM System to each entity, then parses the structured response. Batch +execution mirrors the evaluate.py pattern: incremental file writing makes +long runs safe to interrupt and resume. +""" + +from __future__ import annotations + +from datetime import datetime +from pathlib import Path +from typing import Callable, List, Optional + +from markitect.infospace.classification import ( + ENTITY_TYPES, + VSM_SYSTEMS, + EntityClassification, +) +from markitect.infospace.classification_io import write_entity_classification +from markitect.infospace.config import InfospaceConfig +from markitect.infospace.models import EntityMeta +from markitect.prompts.execution.batch import BatchEvaluator, BatchItem, BatchSummary +from markitect.prompts.execution.llm_adapter import LLMAdapter +from markitect.prompts.execution.models import RunConfig + + +# ── Type and VSM system descriptions ───────────────────────────────────────── + +_TYPE_DEFS = { + "Element": ( + "a stock, agent, artifact, or institution that persists — a noun, " + "something that exists independently (e.g. Capital Stock, Corn, Colony)" + ), + "Process": ( + "a flow, activity, or transformation with duration — something that " + "happens rather than exists (e.g. Division of Labour, Credit Extension, Trade)" + ), + "Relation": ( + "a structural dependency or causal link between two entities — a connector " + "or mechanism (e.g. Rent determined by Price; Wages bounded by Profit)" + ), + "Principle": ( + "an abstract law or invariant that holds across contexts — a rule or " + "theoretical claim (e.g. Comparative Advantage, Diminishing Returns)" + ), + "Institution": ( + "a socially constructed rule system, norm, or governance structure " + "(e.g. Banking System, Apprenticeship Law, Taille)" + ), +} + +_VSM_DEFS = { + "S1": "Primary operations — productive activities (agricultural labour, manufacturing, carrying trade)", + "S2": "Coordination — anti-oscillation, price signals (market price, natural price, wages)", + "S3": "Management — resource allocation, operational control (capital allocation, taxation, banking)", + "S3*": "Audit — inspection, compliance, integrity (customs enforcement, assay, coinage)", + "S4": "Intelligence — adaptation, environment scanning (invisible hand, foreign trade analysis)", + "S5": "Policy — identity, ultimate authority, purpose (political economy systems, public debt policy)", +} + +_PROMPT_TEMPLATE = """\ +You are classifying an entity from an infospace about "{topic}". + +Your task: assign exactly one **Entity Type** and one **VSM System** to the entity, \ +then give a one-sentence rationale for each choice. + +## Entity: {title} + +**Domain:** {domain} +**Source chapter:** {source_chapter} + +### Definition + +{definition} + +### Context + +{context} + +--- + +## Entity Types — choose exactly one + +- **Element** — {type_Element} +- **Process** — {type_Process} +- **Relation** — {type_Relation} +- **Principle** — {type_Principle} +- **Institution** — {type_Institution} + +## VSM Systems — choose exactly one + +- **S1** — {vsm_S1} +- **S2** — {vsm_S2} +- **S3** — {vsm_S3} +- **S3*** — {vsm_S3s} +- **S4** — {vsm_S4} +- **S5** — {vsm_S5} + +--- + +## Instructions + +1. Read the definition and context carefully. +2. Choose the **most appropriate** Entity Type. When uncertain between two, \ +pick the type that best reflects the entity's primary role in the argument. +3. Choose the **most appropriate** VSM System. An entity may relate to multiple \ +systems — assign the one where it does its primary work. +4. Write one sentence of rationale for each, grounded in the definition above. +5. Use **exactly** the output format below — no preamble, no extra lines. + +## Output format + +TYPE: +VSM: +TYPE_RATIONALE: +VSM_RATIONALE: +""" + + +# ── Prompt builder ──────────────────────────────────────────────────────────── + + +def build_classification_prompt(entity: EntityMeta, topic: str) -> str: + """Build a classification prompt for a single entity.""" + return _PROMPT_TEMPLATE.format( + topic=topic, + title=entity.title, + domain=entity.domain or "(unspecified)", + source_chapter=entity.source_chapter or "(unspecified)", + definition=entity.definition or "(no definition provided)", + context=entity.context or "(no context provided)", + type_Element=_TYPE_DEFS["Element"], + type_Process=_TYPE_DEFS["Process"], + type_Relation=_TYPE_DEFS["Relation"], + type_Principle=_TYPE_DEFS["Principle"], + type_Institution=_TYPE_DEFS["Institution"], + vsm_S1=_VSM_DEFS["S1"], + vsm_S2=_VSM_DEFS["S2"], + vsm_S3=_VSM_DEFS["S3"], + vsm_S3s=_VSM_DEFS["S3*"], + vsm_S4=_VSM_DEFS["S4"], + vsm_S5=_VSM_DEFS["S5"], + ) + + +# ── Response parser ─────────────────────────────────────────────────────────── + + +def parse_classification_response(text: str) -> dict: + """Parse TYPE/VSM/TYPE_RATIONALE/VSM_RATIONALE from an LLM response. + + Returns a dict with keys: entity_type, vsm_system, type_rationale, + vsm_rationale. Values are None / empty string if not found. + """ + result: dict = { + "entity_type": None, + "vsm_system": None, + "type_rationale": "", + "vsm_rationale": "", + } + + for line in text.splitlines(): + stripped = line.strip() + upper = stripped.upper() + + if upper.startswith("TYPE_RATIONALE:"): + result["type_rationale"] = stripped.split(":", 1)[1].strip() + elif upper.startswith("VSM_RATIONALE:"): + result["vsm_rationale"] = stripped.split(":", 1)[1].strip() + elif upper.startswith("TYPE:"): + raw = stripped.split(":", 1)[1].strip() + # Case-insensitive match against controlled vocabulary + for t in ENTITY_TYPES: + if t.lower() == raw.lower(): + result["entity_type"] = t + break + else: + result["entity_type"] = raw # keep raw if unrecognised + elif upper.startswith("VSM:"): + raw = stripped.split(":", 1)[1].strip() + for v in VSM_SYSTEMS: + if v.lower() == raw.lower(): + result["vsm_system"] = v + break + else: + result["vsm_system"] = raw + + return result + + +# ── Batch runner ────────────────────────────────────────────────────────────── + + +def run_entity_classification( + config: InfospaceConfig, + entities: List[EntityMeta], + adapter: LLMAdapter, + run_config: Optional[RunConfig] = None, + output_dir: Optional[Path] = None, + progress_callback: Optional[Callable] = None, +) -> BatchSummary: + """Run per-entity classification using the batch evaluator. + + Classification files are written **incrementally** after each successful + result, so a long run is resumable and safe to interrupt. + + Args: + config: The infospace configuration. + entities: Entities to classify. + adapter: LLM adapter. + run_config: LLM execution configuration. + output_dir: Where to write classification results. Defaults to + ``config.classifications_dir`` relative to CWD. + progress_callback: Called after each item with (done, total, result). + + Returns: + A :class:`BatchSummary` with per-entity results. + """ + topic = config.topic.name + cls_path = output_dir or Path(config.classifications_dir) + classifier_name = (run_config.model_name if run_config else "unknown") + + def _write_and_notify(done: int, total: int, result) -> None: + if result.status == "success" and result.response is not None: + parsed = parse_classification_response(result.response.content) + entity_type = parsed["entity_type"] or "Unknown" + vsm_system = parsed["vsm_system"] or "Unknown" + classification = EntityClassification( + entity_slug=result.key, + entity_type=entity_type, + vsm_system=vsm_system, + type_rationale=parsed["type_rationale"], + vsm_rationale=parsed["vsm_rationale"], + classified_by=classifier_name, + classified_at=datetime.utcnow(), + ) + dest = cls_path / f"{result.key}.md" + write_entity_classification(classification, dest) + + if progress_callback is not None: + progress_callback(done, total, result) + + items = [ + BatchItem( + key=entity.slug, + prompt=build_classification_prompt(entity, topic), + ) + for entity in entities + ] + + evaluator = BatchEvaluator( + adapter=adapter, + config=run_config, + progress_callback=_write_and_notify, + ) + return evaluator.evaluate(items) diff --git a/markitect/infospace/cli.py b/markitect/infospace/cli.py index 9796209a..6ac2e1cb 100644 --- a/markitect/infospace/cli.py +++ b/markitect/infospace/cli.py @@ -419,6 +419,172 @@ def relations(config_path: Optional[str], entity_slug: Optional[str], click.echo(f"{subj:<35} {pred:<30} {obj:<35} {r.vsm_channel}") +# ── classify ───────────────────────────────────────────────────────── + + +@infospace_commands.command() +@click.option("--config", "config_path", default=None, help="Path to infospace.yaml.") +@click.option("--entity", "entity_slug", default=None, + help="Classify a single entity by slug.") +@click.option("--provider", default="openrouter", + help="LLM provider (openrouter, gemini, openai, …).") +@click.option("--model", default=None, help="Model name override.") +def classify(config_path: Optional[str], entity_slug: Optional[str], + provider: str, model: Optional[str]): + """Classify entities with Entity Type and VSM System (L2).""" + cfg, cfg_path = _load_config_or_exit(config_path) + root = cfg_path.parent + + from markitect.infospace.classifier import run_entity_classification + from markitect.llm import create_adapter + from markitect.prompts.execution.models import RunConfig + + entity_list = parse_entity_directory(root / cfg.entities_dir) + if not entity_list: + click.echo("No entities found in " + str(root / cfg.entities_dir), err=True) + return + + output_dir = root / cfg.classifications_dir + + if entity_slug: + entity_list = [e for e in entity_list if e.slug == entity_slug] + if not entity_list: + click.echo(f"Entity '{entity_slug}' not found.", err=True) + return + else: + # Incremental skip — entities already classified are omitted + if output_dir.is_dir(): + done_slugs = {p.stem for p in output_dir.glob("*.md")} + before = len(entity_list) + entity_list = [e for e in entity_list if e.slug not in done_slugs] + skipped = before - len(entity_list) + if skipped: + click.echo(f"Skipping {skipped} already-classified entities.") + if not entity_list: + click.echo("All entities already classified. Nothing to do.") + return + + click.echo(f"Classifying {len(entity_list)} entities …") + output_dir.mkdir(parents=True, exist_ok=True) + + adapter = create_adapter(provider, model=model) + run_config = RunConfig(model_name=model, temperature=0.1, max_tokens=2000) + + def _progress(done: int, total: int, result) -> None: + if result.status == "success": + click.echo(f" [{done}/{total}] {result.key}") + else: + click.echo(f" [{done}/{total}] {result.key} — FAILED: {result.error}") + + summary = run_entity_classification( + config=cfg, + entities=entity_list, + adapter=adapter, + run_config=run_config, + output_dir=output_dir, + progress_callback=_progress, + ) + click.echo(f"\nDone: {summary.succeeded} classified, {summary.failed} failed.") + + +# ── classify-summary ────────────────────────────────────────────────── + + +@infospace_commands.command(name="classify-summary") +@click.option("--config", "config_path", default=None, help="Path to infospace.yaml.") +@click.option("--update-metrics", "update_metrics", is_flag=True, default=False, + help="Write type_distribution metrics to metrics.yaml.") +def classify_summary(config_path: Optional[str], update_metrics: bool): + """Show type × VSM distribution across all classified entities (L2).""" + cfg, cfg_path = _load_config_or_exit(config_path) + root = cfg_path.parent + + from markitect.infospace.classification import ENTITY_TYPES, VSM_SYSTEMS + from markitect.infospace.classification_io import read_classifications_directory + + cls_dir = root / cfg.classifications_dir + if not cls_dir.is_dir(): + click.echo("No classifications directory found. Run 'classify' first.") + return + + all_cls = read_classifications_directory(cls_dir) + if not all_cls: + click.echo("No classification files found.") + return + + n = len(all_cls) + type_counts: dict = {} + vsm_counts: dict = {} + matrix: dict = {} # (entity_type, vsm_system) → count + + for c in all_cls: + type_counts[c.entity_type] = type_counts.get(c.entity_type, 0) + 1 + vsm_counts[c.vsm_system] = vsm_counts.get(c.vsm_system, 0) + 1 + key = (c.entity_type, c.vsm_system) + matrix[key] = matrix.get(key, 0) + 1 + + click.echo(f"Classification summary — {n} entities\n") + + click.echo("Entity types:") + for t, count in sorted(type_counts.items(), key=lambda x: -x[1]): + pct = 100 * count / n if n else 0.0 + click.echo(f" {t:<15} {count:>4} ({pct:.1f}%)") + click.echo() + + vsm_order = ["S1", "S2", "S3", "S3*", "S4", "S5"] + click.echo("VSM systems:") + for v in vsm_order: + if v in vsm_counts: + count = vsm_counts[v] + pct = 100 * count / n if n else 0.0 + click.echo(f" {v:<6} {count:>4} ({pct:.1f}%)") + click.echo() + + # Type × VSM matrix + header = f"{'':15}" + "".join(f"{v:>7}" for v in vsm_order) + sep = "-" * (15 + 7 * len(vsm_order)) + click.echo(header) + click.echo(sep) + for t in ENTITY_TYPES: + row = f"{t:<15}" + for v in vsm_order: + c = matrix.get((t, v), 0) + row += f"{c if c else '.':>7}" + click.echo(row) + click.echo() + + filled_cells = len(matrix) + total_cells = len(ENTITY_TYPES) * len(vsm_order) + click.echo(f"Matrix fill: {filled_cells}/{total_cells} cells occupied") + click.echo() + + if update_metrics: + import math + from markitect.infospace.history import read_metrics_file, write_metrics_file + metrics_dir = root / cfg.metrics_dir + metrics_dir.mkdir(parents=True, exist_ok=True) + + # Type entropy + type_entropy = 0.0 + for count in type_counts.values(): + p = count / n + if p > 0: + type_entropy -= p * math.log2(p) + + existing = read_metrics_file(metrics_dir / "metrics.yaml") + new_metrics = { + "type_distribution": type_counts, + "vsm_type_matrix_cells": filled_cells, + "type_entropy": round(type_entropy, 4), + } + merged = {**existing, **new_metrics} + write_metrics_file(merged, metrics_dir / "metrics.yaml") + click.echo( + f"Updated metrics.yaml: type_entropy={type_entropy:.4f}, " + f"vsm_type_matrix_cells={filled_cells}" + ) + + # ── viability ──────────────────────────────────────────────────────── diff --git a/markitect/infospace/config.py b/markitect/infospace/config.py index 167b1e07..247fa2df 100644 --- a/markitect/infospace/config.py +++ b/markitect/infospace/config.py @@ -253,6 +253,7 @@ class InfospaceConfig: pipeline: Optional[PipelineConfig] = None entities_dir: str = "output/entities" evaluations_dir: str = "output/evaluations" + classifications_dir: str = "output/classifications" metrics_dir: str = "output/metrics" relations_dir: str = "output/relations" @@ -275,6 +276,8 @@ class InfospaceConfig: d["entities_dir"] = self.entities_dir if self.evaluations_dir != "output/evaluations": d["evaluations_dir"] = self.evaluations_dir + if self.classifications_dir != "output/classifications": + d["classifications_dir"] = self.classifications_dir if self.metrics_dir != "output/metrics": d["metrics_dir"] = self.metrics_dir if self.relations_dir != "output/relations": @@ -303,6 +306,7 @@ class InfospaceConfig: pipeline=pipeline, entities_dir=data.get("entities_dir", "output/entities"), evaluations_dir=data.get("evaluations_dir", "output/evaluations"), + classifications_dir=data.get("classifications_dir", "output/classifications"), metrics_dir=data.get("metrics_dir", "output/metrics"), relations_dir=data.get("relations_dir", "output/relations"), )