feat(infospace): add infospace configuration model and state (S2.1)

InfospaceConfig (topic, disciplines, schemas, competency questions,
viability thresholds, pipeline) with YAML load/save and directory
discovery. InfospaceState aggregates entities, evaluations, and
viability checks for status reporting.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-19 01:44:14 +01:00
parent 144a88c0c2
commit b20fe4db68
4 changed files with 882 additions and 0 deletions

View File

@@ -39,6 +39,23 @@ from .evaluation_io import (
write_entity_evaluation,
write_snapshot,
)
from .config import (
DisciplineBinding,
InfospaceConfig,
PipelineConfig,
PipelineStage,
SchemaRegistry,
TopicConfig,
ViabilityThreshold,
find_infospace_config,
load_infospace_config,
save_infospace_config,
)
from .state import (
InfospaceState,
ViabilityResult,
build_state,
)
__all__ = [
"EntityMeta",
@@ -72,4 +89,19 @@ __all__ = [
"read_snapshot",
"write_entity_evaluation",
"write_snapshot",
# Config
"DisciplineBinding",
"InfospaceConfig",
"PipelineConfig",
"PipelineStage",
"SchemaRegistry",
"TopicConfig",
"ViabilityThreshold",
"find_infospace_config",
"load_infospace_config",
"save_infospace_config",
# State
"InfospaceState",
"ViabilityResult",
"build_state",
]

View File

@@ -0,0 +1,309 @@
"""
Infospace configuration model and YAML loader.
An infospace is declared via an ``infospace.yaml`` file that specifies
its topic, disciplines, schemas, competency questions, and viability
thresholds. This module provides the data models and I/O for that
configuration.
Example ``infospace.yaml``::
topic:
name: "The Wealth of Nations"
domain: "Classical Economics"
sources: artifacts/sources/
disciplines:
- name: "Viable System Model"
path: artifacts/vsm-reference/
schemas:
entity: schemas/economic-entity-schema-v1.0.md
competency_questions: schemas/competency-questions.md
viability:
coverage_ratio: { min: 0.60 }
per_entity_mean: { min: 3.5 }
"""
from __future__ import annotations
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, List, Optional
import yaml
@dataclass
class TopicConfig:
"""The subject matter an infospace explains.
Attributes:
name: Human-readable topic name.
domain: Broader knowledge domain.
sources: Path (relative to infospace root) to source material.
"""
name: str
domain: str = ""
sources: str = ""
def to_dict(self) -> Dict[str, Any]:
d: Dict[str, Any] = {"name": self.name}
if self.domain:
d["domain"] = self.domain
if self.sources:
d["sources"] = self.sources
return d
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> TopicConfig:
return cls(
name=data["name"],
domain=data.get("domain", ""),
sources=data.get("sources", ""),
)
@dataclass
class DisciplineBinding:
"""An external infospace applied as an analytical lens.
Attributes:
name: Human-readable discipline name.
path: Path to the discipline infospace (relative to root).
"""
name: str
path: str = ""
def to_dict(self) -> Dict[str, Any]:
d: Dict[str, Any] = {"name": self.name}
if self.path:
d["path"] = self.path
return d
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> DisciplineBinding:
return cls(name=data["name"], path=data.get("path", ""))
@dataclass
class SchemaRegistry:
"""Schema paths governing entity and document structure.
All paths are relative to the infospace root directory.
"""
entity: str = ""
mapping: str = ""
analysis: str = ""
extra: Dict[str, str] = field(default_factory=dict)
def to_dict(self) -> Dict[str, Any]:
d: Dict[str, Any] = {}
if self.entity:
d["entity"] = self.entity
if self.mapping:
d["mapping"] = self.mapping
if self.analysis:
d["analysis"] = self.analysis
d.update(self.extra)
return d
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> SchemaRegistry:
known = {"entity", "mapping", "analysis"}
extra = {k: v for k, v in data.items() if k not in known}
return cls(
entity=data.get("entity", ""),
mapping=data.get("mapping", ""),
analysis=data.get("analysis", ""),
extra=extra,
)
@dataclass
class ViabilityThreshold:
"""Threshold for a single viability metric.
At least one of *min* or *max* should be set.
"""
metric: str
min: Optional[float] = None
max: Optional[float] = None
def check(self, value: float) -> bool:
"""Return ``True`` if *value* is within the threshold."""
if self.min is not None and value < self.min:
return False
if self.max is not None and value > self.max:
return False
return True
def to_dict(self) -> Dict[str, Any]:
d: Dict[str, Any] = {}
if self.min is not None:
d["min"] = self.min
if self.max is not None:
d["max"] = self.max
return d
@dataclass
class PipelineStage:
"""A single stage in the processing pipeline."""
template: str
spaces: List[str] = field(default_factory=list)
def to_dict(self) -> Dict[str, Any]:
d: Dict[str, Any] = {"template": self.template}
if self.spaces:
d["spaces"] = self.spaces
return d
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> PipelineStage:
return cls(
template=data["template"],
spaces=data.get("spaces", []),
)
@dataclass
class PipelineConfig:
"""Processing pipeline configuration."""
stages: List[PipelineStage] = field(default_factory=list)
post_batch: List[PipelineStage] = field(default_factory=list)
def to_dict(self) -> Dict[str, Any]:
d: Dict[str, Any] = {}
if self.stages:
d["stages"] = [s.to_dict() for s in self.stages]
if self.post_batch:
d["post_batch"] = [s.to_dict() for s in self.post_batch]
return d
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> PipelineConfig:
return cls(
stages=[PipelineStage.from_dict(s) for s in data.get("stages", [])],
post_batch=[PipelineStage.from_dict(s) for s in data.get("post_batch", [])],
)
@dataclass
class InfospaceConfig:
"""Complete infospace configuration, loaded from ``infospace.yaml``.
This is the declarative description of an infospace: what it
explains, through which lenses, governed by which schemas, and
what quality thresholds it must meet.
"""
topic: TopicConfig
disciplines: List[DisciplineBinding] = field(default_factory=list)
schemas: SchemaRegistry = field(default_factory=SchemaRegistry)
competency_questions: str = ""
viability: Dict[str, ViabilityThreshold] = field(default_factory=dict)
pipeline: Optional[PipelineConfig] = None
entities_dir: str = "output/entities"
evaluations_dir: str = "output/evaluations"
metrics_dir: str = "output/metrics"
def to_dict(self) -> Dict[str, Any]:
d: Dict[str, Any] = {"topic": self.topic.to_dict()}
if self.disciplines:
d["disciplines"] = [db.to_dict() for db in self.disciplines]
schemas_dict = self.schemas.to_dict()
if schemas_dict:
d["schemas"] = schemas_dict
if self.competency_questions:
d["competency_questions"] = self.competency_questions
if self.viability:
d["viability"] = {
name: t.to_dict() for name, t in self.viability.items()
}
if self.pipeline:
d["pipeline"] = self.pipeline.to_dict()
if self.entities_dir != "output/entities":
d["entities_dir"] = self.entities_dir
if self.evaluations_dir != "output/evaluations":
d["evaluations_dir"] = self.evaluations_dir
if self.metrics_dir != "output/metrics":
d["metrics_dir"] = self.metrics_dir
return d
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> InfospaceConfig:
viability_raw = data.get("viability", {})
viability = {
name: ViabilityThreshold(metric=name, **bounds)
for name, bounds in viability_raw.items()
}
pipeline_raw = data.get("pipeline")
pipeline = PipelineConfig.from_dict(pipeline_raw) if pipeline_raw else None
return cls(
topic=TopicConfig.from_dict(data["topic"]),
disciplines=[
DisciplineBinding.from_dict(d)
for d in data.get("disciplines", [])
],
schemas=SchemaRegistry.from_dict(data.get("schemas", {})),
competency_questions=data.get("competency_questions", ""),
viability=viability,
pipeline=pipeline,
entities_dir=data.get("entities_dir", "output/entities"),
evaluations_dir=data.get("evaluations_dir", "output/evaluations"),
metrics_dir=data.get("metrics_dir", "output/metrics"),
)
def load_infospace_config(path: Path) -> InfospaceConfig:
"""Load an :class:`InfospaceConfig` from a YAML file.
Args:
path: Path to ``infospace.yaml``.
Raises:
FileNotFoundError: If *path* does not exist.
ValueError: If required fields are missing.
"""
data = yaml.safe_load(path.read_text(encoding="utf-8"))
if not isinstance(data, dict):
raise ValueError(f"Expected a YAML mapping in {path}")
if "topic" not in data:
raise ValueError(f"Missing required 'topic' key in {path}")
return InfospaceConfig.from_dict(data)
def save_infospace_config(config: InfospaceConfig, path: Path) -> None:
"""Write an :class:`InfospaceConfig` to a YAML file."""
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(
yaml.safe_dump(
config.to_dict(),
default_flow_style=False,
sort_keys=False,
),
encoding="utf-8",
)
def find_infospace_config(start: Optional[Path] = None) -> Optional[Path]:
"""Walk up from *start* looking for ``infospace.yaml``.
Returns the path to the config file, or ``None``.
"""
current = (start or Path.cwd()).resolve()
for directory in [current, *current.parents]:
candidate = directory / "infospace.yaml"
if candidate.is_file():
return candidate
return None

View File

@@ -0,0 +1,141 @@
"""
Infospace runtime state.
Computed from the current entities, evaluations, and metrics on disk.
Provides the data behind ``markitect infospace status`` and
``markitect infospace viability``.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional
from markitect.infospace.config import InfospaceConfig, ViabilityThreshold
from markitect.infospace.models import EntityMeta
from markitect.infospace.evaluation import EvaluationSnapshot
@dataclass
class ViabilityResult:
"""Result of checking a single viability threshold."""
metric: str
value: float
threshold: ViabilityThreshold
passed: bool
def to_dict(self) -> Dict[str, Any]:
d: Dict[str, Any] = {
"metric": self.metric,
"value": self.value,
"passed": self.passed,
}
if self.threshold.min is not None:
d["min"] = self.threshold.min
if self.threshold.max is not None:
d["max"] = self.threshold.max
return d
@dataclass
class InfospaceState:
"""Current runtime state of an infospace.
Aggregates entity metadata, evaluation results, and viability
checks into a single queryable object.
"""
config: InfospaceConfig
entities: List[EntityMeta] = field(default_factory=list)
latest_snapshot: Optional[EvaluationSnapshot] = None
viability_results: List[ViabilityResult] = field(default_factory=list)
computed_at: datetime = field(default_factory=datetime.utcnow)
@property
def entity_count(self) -> int:
return len(self.entities)
@property
def topic_name(self) -> str:
return self.config.topic.name
@property
def is_viable(self) -> bool:
"""``True`` if all viability thresholds are met."""
if not self.viability_results:
return False
return all(r.passed for r in self.viability_results)
@property
def viability_pass_count(self) -> int:
return sum(1 for r in self.viability_results if r.passed)
@property
def viability_total_count(self) -> int:
return len(self.viability_results)
@property
def domains(self) -> List[str]:
"""Distinct domain values across all entities."""
return sorted({e.domain for e in self.entities if e.domain})
@property
def has_evaluations(self) -> bool:
return self.latest_snapshot is not None
def check_viability(self, metrics: Dict[str, float]) -> List[ViabilityResult]:
"""Check *metrics* against the configured viability thresholds.
Updates :attr:`viability_results` and returns the results.
"""
results: List[ViabilityResult] = []
for name, threshold in self.config.viability.items():
value = metrics.get(name, 0.0)
results.append(ViabilityResult(
metric=name,
value=value,
threshold=threshold,
passed=threshold.check(value),
))
self.viability_results = results
return results
def summary(self) -> Dict[str, Any]:
"""Return a summary dict suitable for display or serialisation."""
d: Dict[str, Any] = {
"topic": self.topic_name,
"entity_count": self.entity_count,
"domains": self.domains,
"has_evaluations": self.has_evaluations,
}
if self.viability_results:
d["viable"] = self.is_viable
d["viability_pass"] = self.viability_pass_count
d["viability_total"] = self.viability_total_count
if self.latest_snapshot:
d["last_evaluated"] = self.latest_snapshot.created_at.isoformat()
return d
def build_state(
config: InfospaceConfig,
entities: Optional[List[EntityMeta]] = None,
snapshot: Optional[EvaluationSnapshot] = None,
metrics: Optional[Dict[str, float]] = None,
) -> InfospaceState:
"""Build an :class:`InfospaceState` from available data.
This is a convenience function that assembles the state object
and optionally runs viability checks if *metrics* are provided.
"""
state = InfospaceState(
config=config,
entities=entities or [],
latest_snapshot=snapshot,
)
if metrics is not None:
state.check_viability(metrics)
return state