feat(infospace): add infospace configuration model and state (S2.1)

InfospaceConfig (topic, disciplines, schemas, competency questions, viability thresholds, pipeline) with YAML load/save and directory discovery. InfospaceState aggregates entities, evaluations, and viability checks for status reporting. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-19 01:44:14 +01:00
parent 144a88c0c2
commit b20fe4db68
4 changed files with 882 additions and 0 deletions
--- a/markitect/infospace/init.py
+++ b/markitect/infospace/init.py
@@ -39,6 +39,23 @@ from .evaluation_io import (
    write_entity_evaluation,
    write_snapshot,
 )
+from .config import (
+    DisciplineBinding,
+    InfospaceConfig,
+    PipelineConfig,
+    PipelineStage,
+    SchemaRegistry,
+    TopicConfig,
+    ViabilityThreshold,
+    find_infospace_config,
+    load_infospace_config,
+    save_infospace_config,
+)
+from .state import (
+    InfospaceState,
+    ViabilityResult,
+    build_state,
+)

 __all__ = [
    "EntityMeta",
@@ -72,4 +89,19 @@ __all__ = [
    "read_snapshot",
    "write_entity_evaluation",
    "write_snapshot",
+    # Config
+    "DisciplineBinding",
+    "InfospaceConfig",
+    "PipelineConfig",
+    "PipelineStage",
+    "SchemaRegistry",
+    "TopicConfig",
+    "ViabilityThreshold",
+    "find_infospace_config",
+    "load_infospace_config",
+    "save_infospace_config",
+    # State
+    "InfospaceState",
+    "ViabilityResult",
+    "build_state",
 ]
--- a/markitect/infospace/config.py
+++ b/markitect/infospace/config.py
@@ -0,0 +1,309 @@
+"""
+Infospace configuration model and YAML loader.
+
+An infospace is declared via an ``infospace.yaml`` file that specifies
+its topic, disciplines, schemas, competency questions, and viability
+thresholds.  This module provides the data models and I/O for that
+configuration.
+
+Example ``infospace.yaml``::
+
+    topic:
+      name: "The Wealth of Nations"
+      domain: "Classical Economics"
+      sources: artifacts/sources/
+
+    disciplines:
+      - name: "Viable System Model"
+        path: artifacts/vsm-reference/
+
+    schemas:
+      entity: schemas/economic-entity-schema-v1.0.md
+
+    competency_questions: schemas/competency-questions.md
+
+    viability:
+      coverage_ratio: { min: 0.60 }
+      per_entity_mean: { min: 3.5 }
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+import yaml
+
+
+@dataclass
+class TopicConfig:
+    """The subject matter an infospace explains.
+
+    Attributes:
+        name: Human-readable topic name.
+        domain: Broader knowledge domain.
+        sources: Path (relative to infospace root) to source material.
+    """
+
+    name: str
+    domain: str = ""
+    sources: str = ""
+
+    def to_dict(self) -> Dict[str, Any]:
+        d: Dict[str, Any] = {"name": self.name}
+        if self.domain:
+            d["domain"] = self.domain
+        if self.sources:
+            d["sources"] = self.sources
+        return d
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> TopicConfig:
+        return cls(
+            name=data["name"],
+            domain=data.get("domain", ""),
+            sources=data.get("sources", ""),
+        )
+
+
+@dataclass
+class DisciplineBinding:
+    """An external infospace applied as an analytical lens.
+
+    Attributes:
+        name: Human-readable discipline name.
+        path: Path to the discipline infospace (relative to root).
+    """
+
+    name: str
+    path: str = ""
+
+    def to_dict(self) -> Dict[str, Any]:
+        d: Dict[str, Any] = {"name": self.name}
+        if self.path:
+            d["path"] = self.path
+        return d
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> DisciplineBinding:
+        return cls(name=data["name"], path=data.get("path", ""))
+
+
+@dataclass
+class SchemaRegistry:
+    """Schema paths governing entity and document structure.
+
+    All paths are relative to the infospace root directory.
+    """
+
+    entity: str = ""
+    mapping: str = ""
+    analysis: str = ""
+    extra: Dict[str, str] = field(default_factory=dict)
+
+    def to_dict(self) -> Dict[str, Any]:
+        d: Dict[str, Any] = {}
+        if self.entity:
+            d["entity"] = self.entity
+        if self.mapping:
+            d["mapping"] = self.mapping
+        if self.analysis:
+            d["analysis"] = self.analysis
+        d.update(self.extra)
+        return d
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> SchemaRegistry:
+        known = {"entity", "mapping", "analysis"}
+        extra = {k: v for k, v in data.items() if k not in known}
+        return cls(
+            entity=data.get("entity", ""),
+            mapping=data.get("mapping", ""),
+            analysis=data.get("analysis", ""),
+            extra=extra,
+        )
+
+
+@dataclass
+class ViabilityThreshold:
+    """Threshold for a single viability metric.
+
+    At least one of *min* or *max* should be set.
+    """
+
+    metric: str
+    min: Optional[float] = None
+    max: Optional[float] = None
+
+    def check(self, value: float) -> bool:
+        """Return ``True`` if *value* is within the threshold."""
+        if self.min is not None and value < self.min:
+            return False
+        if self.max is not None and value > self.max:
+            return False
+        return True
+
+    def to_dict(self) -> Dict[str, Any]:
+        d: Dict[str, Any] = {}
+        if self.min is not None:
+            d["min"] = self.min
+        if self.max is not None:
+            d["max"] = self.max
+        return d
+
+
+@dataclass
+class PipelineStage:
+    """A single stage in the processing pipeline."""
+
+    template: str
+    spaces: List[str] = field(default_factory=list)
+
+    def to_dict(self) -> Dict[str, Any]:
+        d: Dict[str, Any] = {"template": self.template}
+        if self.spaces:
+            d["spaces"] = self.spaces
+        return d
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> PipelineStage:
+        return cls(
+            template=data["template"],
+            spaces=data.get("spaces", []),
+        )
+
+
+@dataclass
+class PipelineConfig:
+    """Processing pipeline configuration."""
+
+    stages: List[PipelineStage] = field(default_factory=list)
+    post_batch: List[PipelineStage] = field(default_factory=list)
+
+    def to_dict(self) -> Dict[str, Any]:
+        d: Dict[str, Any] = {}
+        if self.stages:
+            d["stages"] = [s.to_dict() for s in self.stages]
+        if self.post_batch:
+            d["post_batch"] = [s.to_dict() for s in self.post_batch]
+        return d
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> PipelineConfig:
+        return cls(
+            stages=[PipelineStage.from_dict(s) for s in data.get("stages", [])],
+            post_batch=[PipelineStage.from_dict(s) for s in data.get("post_batch", [])],
+        )
+
+
+@dataclass
+class InfospaceConfig:
+    """Complete infospace configuration, loaded from ``infospace.yaml``.
+
+    This is the declarative description of an infospace: what it
+    explains, through which lenses, governed by which schemas, and
+    what quality thresholds it must meet.
+    """
+
+    topic: TopicConfig
+    disciplines: List[DisciplineBinding] = field(default_factory=list)
+    schemas: SchemaRegistry = field(default_factory=SchemaRegistry)
+    competency_questions: str = ""
+    viability: Dict[str, ViabilityThreshold] = field(default_factory=dict)
+    pipeline: Optional[PipelineConfig] = None
+    entities_dir: str = "output/entities"
+    evaluations_dir: str = "output/evaluations"
+    metrics_dir: str = "output/metrics"
+
+    def to_dict(self) -> Dict[str, Any]:
+        d: Dict[str, Any] = {"topic": self.topic.to_dict()}
+        if self.disciplines:
+            d["disciplines"] = [db.to_dict() for db in self.disciplines]
+        schemas_dict = self.schemas.to_dict()
+        if schemas_dict:
+            d["schemas"] = schemas_dict
+        if self.competency_questions:
+            d["competency_questions"] = self.competency_questions
+        if self.viability:
+            d["viability"] = {
+                name: t.to_dict() for name, t in self.viability.items()
+            }
+        if self.pipeline:
+            d["pipeline"] = self.pipeline.to_dict()
+        if self.entities_dir != "output/entities":
+            d["entities_dir"] = self.entities_dir
+        if self.evaluations_dir != "output/evaluations":
+            d["evaluations_dir"] = self.evaluations_dir
+        if self.metrics_dir != "output/metrics":
+            d["metrics_dir"] = self.metrics_dir
+        return d
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> InfospaceConfig:
+        viability_raw = data.get("viability", {})
+        viability = {
+            name: ViabilityThreshold(metric=name, **bounds)
+            for name, bounds in viability_raw.items()
+        }
+        pipeline_raw = data.get("pipeline")
+        pipeline = PipelineConfig.from_dict(pipeline_raw) if pipeline_raw else None
+
+        return cls(
+            topic=TopicConfig.from_dict(data["topic"]),
+            disciplines=[
+                DisciplineBinding.from_dict(d)
+                for d in data.get("disciplines", [])
+            ],
+            schemas=SchemaRegistry.from_dict(data.get("schemas", {})),
+            competency_questions=data.get("competency_questions", ""),
+            viability=viability,
+            pipeline=pipeline,
+            entities_dir=data.get("entities_dir", "output/entities"),
+            evaluations_dir=data.get("evaluations_dir", "output/evaluations"),
+            metrics_dir=data.get("metrics_dir", "output/metrics"),
+        )
+
+
+def load_infospace_config(path: Path) -> InfospaceConfig:
+    """Load an :class:`InfospaceConfig` from a YAML file.
+
+    Args:
+        path: Path to ``infospace.yaml``.
+
+    Raises:
+        FileNotFoundError: If *path* does not exist.
+        ValueError: If required fields are missing.
+    """
+    data = yaml.safe_load(path.read_text(encoding="utf-8"))
+    if not isinstance(data, dict):
+        raise ValueError(f"Expected a YAML mapping in {path}")
+    if "topic" not in data:
+        raise ValueError(f"Missing required 'topic' key in {path}")
+    return InfospaceConfig.from_dict(data)
+
+
+def save_infospace_config(config: InfospaceConfig, path: Path) -> None:
+    """Write an :class:`InfospaceConfig` to a YAML file."""
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(
+        yaml.safe_dump(
+            config.to_dict(),
+            default_flow_style=False,
+            sort_keys=False,
+        ),
+        encoding="utf-8",
+    )
+
+
+def find_infospace_config(start: Optional[Path] = None) -> Optional[Path]:
+    """Walk up from *start* looking for ``infospace.yaml``.
+
+    Returns the path to the config file, or ``None``.
+    """
+    current = (start or Path.cwd()).resolve()
+    for directory in [current, *current.parents]:
+        candidate = directory / "infospace.yaml"
+        if candidate.is_file():
+            return candidate
+    return None
--- a/markitect/infospace/state.py
+++ b/markitect/infospace/state.py
@@ -0,0 +1,141 @@
+"""
+Infospace runtime state.
+
+Computed from the current entities, evaluations, and metrics on disk.
+Provides the data behind ``markitect infospace status`` and
+``markitect infospace viability``.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+from markitect.infospace.config import InfospaceConfig, ViabilityThreshold
+from markitect.infospace.models import EntityMeta
+from markitect.infospace.evaluation import EvaluationSnapshot
+
+
+@dataclass
+class ViabilityResult:
+    """Result of checking a single viability threshold."""
+
+    metric: str
+    value: float
+    threshold: ViabilityThreshold
+    passed: bool
+
+    def to_dict(self) -> Dict[str, Any]:
+        d: Dict[str, Any] = {
+            "metric": self.metric,
+            "value": self.value,
+            "passed": self.passed,
+        }
+        if self.threshold.min is not None:
+            d["min"] = self.threshold.min
+        if self.threshold.max is not None:
+            d["max"] = self.threshold.max
+        return d
+
+
+@dataclass
+class InfospaceState:
+    """Current runtime state of an infospace.
+
+    Aggregates entity metadata, evaluation results, and viability
+    checks into a single queryable object.
+    """
+
+    config: InfospaceConfig
+    entities: List[EntityMeta] = field(default_factory=list)
+    latest_snapshot: Optional[EvaluationSnapshot] = None
+    viability_results: List[ViabilityResult] = field(default_factory=list)
+    computed_at: datetime = field(default_factory=datetime.utcnow)
+
+    @property
+    def entity_count(self) -> int:
+        return len(self.entities)
+
+    @property
+    def topic_name(self) -> str:
+        return self.config.topic.name
+
+    @property
+    def is_viable(self) -> bool:
+        """``True`` if all viability thresholds are met."""
+        if not self.viability_results:
+            return False
+        return all(r.passed for r in self.viability_results)
+
+    @property
+    def viability_pass_count(self) -> int:
+        return sum(1 for r in self.viability_results if r.passed)
+
+    @property
+    def viability_total_count(self) -> int:
+        return len(self.viability_results)
+
+    @property
+    def domains(self) -> List[str]:
+        """Distinct domain values across all entities."""
+        return sorted({e.domain for e in self.entities if e.domain})
+
+    @property
+    def has_evaluations(self) -> bool:
+        return self.latest_snapshot is not None
+
+    def check_viability(self, metrics: Dict[str, float]) -> List[ViabilityResult]:
+        """Check *metrics* against the configured viability thresholds.
+
+        Updates :attr:`viability_results` and returns the results.
+        """
+        results: List[ViabilityResult] = []
+        for name, threshold in self.config.viability.items():
+            value = metrics.get(name, 0.0)
+            results.append(ViabilityResult(
+                metric=name,
+                value=value,
+                threshold=threshold,
+                passed=threshold.check(value),
+            ))
+        self.viability_results = results
+        return results
+
+    def summary(self) -> Dict[str, Any]:
+        """Return a summary dict suitable for display or serialisation."""
+        d: Dict[str, Any] = {
+            "topic": self.topic_name,
+            "entity_count": self.entity_count,
+            "domains": self.domains,
+            "has_evaluations": self.has_evaluations,
+        }
+        if self.viability_results:
+            d["viable"] = self.is_viable
+            d["viability_pass"] = self.viability_pass_count
+            d["viability_total"] = self.viability_total_count
+        if self.latest_snapshot:
+            d["last_evaluated"] = self.latest_snapshot.created_at.isoformat()
+        return d
+
+
+def build_state(
+    config: InfospaceConfig,
+    entities: Optional[List[EntityMeta]] = None,
+    snapshot: Optional[EvaluationSnapshot] = None,
+    metrics: Optional[Dict[str, float]] = None,
+) -> InfospaceState:
+    """Build an :class:`InfospaceState` from available data.
+
+    This is a convenience function that assembles the state object
+    and optionally runs viability checks if *metrics* are provided.
+    """
+    state = InfospaceState(
+        config=config,
+        entities=entities or [],
+        latest_snapshot=snapshot,
+    )
+    if metrics is not None:
+        state.check_viability(metrics)
+    return state