feat(infospace): add infospace configuration model and state (S2.1)
InfospaceConfig (topic, disciplines, schemas, competency questions, viability thresholds, pipeline) with YAML load/save and directory discovery. InfospaceState aggregates entities, evaluations, and viability checks for status reporting. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -39,6 +39,23 @@ from .evaluation_io import (
|
||||
write_entity_evaluation,
|
||||
write_snapshot,
|
||||
)
|
||||
from .config import (
|
||||
DisciplineBinding,
|
||||
InfospaceConfig,
|
||||
PipelineConfig,
|
||||
PipelineStage,
|
||||
SchemaRegistry,
|
||||
TopicConfig,
|
||||
ViabilityThreshold,
|
||||
find_infospace_config,
|
||||
load_infospace_config,
|
||||
save_infospace_config,
|
||||
)
|
||||
from .state import (
|
||||
InfospaceState,
|
||||
ViabilityResult,
|
||||
build_state,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"EntityMeta",
|
||||
@@ -72,4 +89,19 @@ __all__ = [
|
||||
"read_snapshot",
|
||||
"write_entity_evaluation",
|
||||
"write_snapshot",
|
||||
# Config
|
||||
"DisciplineBinding",
|
||||
"InfospaceConfig",
|
||||
"PipelineConfig",
|
||||
"PipelineStage",
|
||||
"SchemaRegistry",
|
||||
"TopicConfig",
|
||||
"ViabilityThreshold",
|
||||
"find_infospace_config",
|
||||
"load_infospace_config",
|
||||
"save_infospace_config",
|
||||
# State
|
||||
"InfospaceState",
|
||||
"ViabilityResult",
|
||||
"build_state",
|
||||
]
|
||||
|
||||
309
markitect/infospace/config.py
Normal file
309
markitect/infospace/config.py
Normal file
@@ -0,0 +1,309 @@
|
||||
"""
|
||||
Infospace configuration model and YAML loader.
|
||||
|
||||
An infospace is declared via an ``infospace.yaml`` file that specifies
|
||||
its topic, disciplines, schemas, competency questions, and viability
|
||||
thresholds. This module provides the data models and I/O for that
|
||||
configuration.
|
||||
|
||||
Example ``infospace.yaml``::
|
||||
|
||||
topic:
|
||||
name: "The Wealth of Nations"
|
||||
domain: "Classical Economics"
|
||||
sources: artifacts/sources/
|
||||
|
||||
disciplines:
|
||||
- name: "Viable System Model"
|
||||
path: artifacts/vsm-reference/
|
||||
|
||||
schemas:
|
||||
entity: schemas/economic-entity-schema-v1.0.md
|
||||
|
||||
competency_questions: schemas/competency-questions.md
|
||||
|
||||
viability:
|
||||
coverage_ratio: { min: 0.60 }
|
||||
per_entity_mean: { min: 3.5 }
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import yaml
|
||||
|
||||
|
||||
@dataclass
|
||||
class TopicConfig:
|
||||
"""The subject matter an infospace explains.
|
||||
|
||||
Attributes:
|
||||
name: Human-readable topic name.
|
||||
domain: Broader knowledge domain.
|
||||
sources: Path (relative to infospace root) to source material.
|
||||
"""
|
||||
|
||||
name: str
|
||||
domain: str = ""
|
||||
sources: str = ""
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
d: Dict[str, Any] = {"name": self.name}
|
||||
if self.domain:
|
||||
d["domain"] = self.domain
|
||||
if self.sources:
|
||||
d["sources"] = self.sources
|
||||
return d
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> TopicConfig:
|
||||
return cls(
|
||||
name=data["name"],
|
||||
domain=data.get("domain", ""),
|
||||
sources=data.get("sources", ""),
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class DisciplineBinding:
|
||||
"""An external infospace applied as an analytical lens.
|
||||
|
||||
Attributes:
|
||||
name: Human-readable discipline name.
|
||||
path: Path to the discipline infospace (relative to root).
|
||||
"""
|
||||
|
||||
name: str
|
||||
path: str = ""
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
d: Dict[str, Any] = {"name": self.name}
|
||||
if self.path:
|
||||
d["path"] = self.path
|
||||
return d
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> DisciplineBinding:
|
||||
return cls(name=data["name"], path=data.get("path", ""))
|
||||
|
||||
|
||||
@dataclass
|
||||
class SchemaRegistry:
|
||||
"""Schema paths governing entity and document structure.
|
||||
|
||||
All paths are relative to the infospace root directory.
|
||||
"""
|
||||
|
||||
entity: str = ""
|
||||
mapping: str = ""
|
||||
analysis: str = ""
|
||||
extra: Dict[str, str] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
d: Dict[str, Any] = {}
|
||||
if self.entity:
|
||||
d["entity"] = self.entity
|
||||
if self.mapping:
|
||||
d["mapping"] = self.mapping
|
||||
if self.analysis:
|
||||
d["analysis"] = self.analysis
|
||||
d.update(self.extra)
|
||||
return d
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> SchemaRegistry:
|
||||
known = {"entity", "mapping", "analysis"}
|
||||
extra = {k: v for k, v in data.items() if k not in known}
|
||||
return cls(
|
||||
entity=data.get("entity", ""),
|
||||
mapping=data.get("mapping", ""),
|
||||
analysis=data.get("analysis", ""),
|
||||
extra=extra,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ViabilityThreshold:
|
||||
"""Threshold for a single viability metric.
|
||||
|
||||
At least one of *min* or *max* should be set.
|
||||
"""
|
||||
|
||||
metric: str
|
||||
min: Optional[float] = None
|
||||
max: Optional[float] = None
|
||||
|
||||
def check(self, value: float) -> bool:
|
||||
"""Return ``True`` if *value* is within the threshold."""
|
||||
if self.min is not None and value < self.min:
|
||||
return False
|
||||
if self.max is not None and value > self.max:
|
||||
return False
|
||||
return True
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
d: Dict[str, Any] = {}
|
||||
if self.min is not None:
|
||||
d["min"] = self.min
|
||||
if self.max is not None:
|
||||
d["max"] = self.max
|
||||
return d
|
||||
|
||||
|
||||
@dataclass
|
||||
class PipelineStage:
|
||||
"""A single stage in the processing pipeline."""
|
||||
|
||||
template: str
|
||||
spaces: List[str] = field(default_factory=list)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
d: Dict[str, Any] = {"template": self.template}
|
||||
if self.spaces:
|
||||
d["spaces"] = self.spaces
|
||||
return d
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> PipelineStage:
|
||||
return cls(
|
||||
template=data["template"],
|
||||
spaces=data.get("spaces", []),
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class PipelineConfig:
|
||||
"""Processing pipeline configuration."""
|
||||
|
||||
stages: List[PipelineStage] = field(default_factory=list)
|
||||
post_batch: List[PipelineStage] = field(default_factory=list)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
d: Dict[str, Any] = {}
|
||||
if self.stages:
|
||||
d["stages"] = [s.to_dict() for s in self.stages]
|
||||
if self.post_batch:
|
||||
d["post_batch"] = [s.to_dict() for s in self.post_batch]
|
||||
return d
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> PipelineConfig:
|
||||
return cls(
|
||||
stages=[PipelineStage.from_dict(s) for s in data.get("stages", [])],
|
||||
post_batch=[PipelineStage.from_dict(s) for s in data.get("post_batch", [])],
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class InfospaceConfig:
|
||||
"""Complete infospace configuration, loaded from ``infospace.yaml``.
|
||||
|
||||
This is the declarative description of an infospace: what it
|
||||
explains, through which lenses, governed by which schemas, and
|
||||
what quality thresholds it must meet.
|
||||
"""
|
||||
|
||||
topic: TopicConfig
|
||||
disciplines: List[DisciplineBinding] = field(default_factory=list)
|
||||
schemas: SchemaRegistry = field(default_factory=SchemaRegistry)
|
||||
competency_questions: str = ""
|
||||
viability: Dict[str, ViabilityThreshold] = field(default_factory=dict)
|
||||
pipeline: Optional[PipelineConfig] = None
|
||||
entities_dir: str = "output/entities"
|
||||
evaluations_dir: str = "output/evaluations"
|
||||
metrics_dir: str = "output/metrics"
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
d: Dict[str, Any] = {"topic": self.topic.to_dict()}
|
||||
if self.disciplines:
|
||||
d["disciplines"] = [db.to_dict() for db in self.disciplines]
|
||||
schemas_dict = self.schemas.to_dict()
|
||||
if schemas_dict:
|
||||
d["schemas"] = schemas_dict
|
||||
if self.competency_questions:
|
||||
d["competency_questions"] = self.competency_questions
|
||||
if self.viability:
|
||||
d["viability"] = {
|
||||
name: t.to_dict() for name, t in self.viability.items()
|
||||
}
|
||||
if self.pipeline:
|
||||
d["pipeline"] = self.pipeline.to_dict()
|
||||
if self.entities_dir != "output/entities":
|
||||
d["entities_dir"] = self.entities_dir
|
||||
if self.evaluations_dir != "output/evaluations":
|
||||
d["evaluations_dir"] = self.evaluations_dir
|
||||
if self.metrics_dir != "output/metrics":
|
||||
d["metrics_dir"] = self.metrics_dir
|
||||
return d
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> InfospaceConfig:
|
||||
viability_raw = data.get("viability", {})
|
||||
viability = {
|
||||
name: ViabilityThreshold(metric=name, **bounds)
|
||||
for name, bounds in viability_raw.items()
|
||||
}
|
||||
pipeline_raw = data.get("pipeline")
|
||||
pipeline = PipelineConfig.from_dict(pipeline_raw) if pipeline_raw else None
|
||||
|
||||
return cls(
|
||||
topic=TopicConfig.from_dict(data["topic"]),
|
||||
disciplines=[
|
||||
DisciplineBinding.from_dict(d)
|
||||
for d in data.get("disciplines", [])
|
||||
],
|
||||
schemas=SchemaRegistry.from_dict(data.get("schemas", {})),
|
||||
competency_questions=data.get("competency_questions", ""),
|
||||
viability=viability,
|
||||
pipeline=pipeline,
|
||||
entities_dir=data.get("entities_dir", "output/entities"),
|
||||
evaluations_dir=data.get("evaluations_dir", "output/evaluations"),
|
||||
metrics_dir=data.get("metrics_dir", "output/metrics"),
|
||||
)
|
||||
|
||||
|
||||
def load_infospace_config(path: Path) -> InfospaceConfig:
|
||||
"""Load an :class:`InfospaceConfig` from a YAML file.
|
||||
|
||||
Args:
|
||||
path: Path to ``infospace.yaml``.
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If *path* does not exist.
|
||||
ValueError: If required fields are missing.
|
||||
"""
|
||||
data = yaml.safe_load(path.read_text(encoding="utf-8"))
|
||||
if not isinstance(data, dict):
|
||||
raise ValueError(f"Expected a YAML mapping in {path}")
|
||||
if "topic" not in data:
|
||||
raise ValueError(f"Missing required 'topic' key in {path}")
|
||||
return InfospaceConfig.from_dict(data)
|
||||
|
||||
|
||||
def save_infospace_config(config: InfospaceConfig, path: Path) -> None:
|
||||
"""Write an :class:`InfospaceConfig` to a YAML file."""
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(
|
||||
yaml.safe_dump(
|
||||
config.to_dict(),
|
||||
default_flow_style=False,
|
||||
sort_keys=False,
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
|
||||
def find_infospace_config(start: Optional[Path] = None) -> Optional[Path]:
|
||||
"""Walk up from *start* looking for ``infospace.yaml``.
|
||||
|
||||
Returns the path to the config file, or ``None``.
|
||||
"""
|
||||
current = (start or Path.cwd()).resolve()
|
||||
for directory in [current, *current.parents]:
|
||||
candidate = directory / "infospace.yaml"
|
||||
if candidate.is_file():
|
||||
return candidate
|
||||
return None
|
||||
141
markitect/infospace/state.py
Normal file
141
markitect/infospace/state.py
Normal file
@@ -0,0 +1,141 @@
|
||||
"""
|
||||
Infospace runtime state.
|
||||
|
||||
Computed from the current entities, evaluations, and metrics on disk.
|
||||
Provides the data behind ``markitect infospace status`` and
|
||||
``markitect infospace viability``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from markitect.infospace.config import InfospaceConfig, ViabilityThreshold
|
||||
from markitect.infospace.models import EntityMeta
|
||||
from markitect.infospace.evaluation import EvaluationSnapshot
|
||||
|
||||
|
||||
@dataclass
|
||||
class ViabilityResult:
|
||||
"""Result of checking a single viability threshold."""
|
||||
|
||||
metric: str
|
||||
value: float
|
||||
threshold: ViabilityThreshold
|
||||
passed: bool
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
d: Dict[str, Any] = {
|
||||
"metric": self.metric,
|
||||
"value": self.value,
|
||||
"passed": self.passed,
|
||||
}
|
||||
if self.threshold.min is not None:
|
||||
d["min"] = self.threshold.min
|
||||
if self.threshold.max is not None:
|
||||
d["max"] = self.threshold.max
|
||||
return d
|
||||
|
||||
|
||||
@dataclass
|
||||
class InfospaceState:
|
||||
"""Current runtime state of an infospace.
|
||||
|
||||
Aggregates entity metadata, evaluation results, and viability
|
||||
checks into a single queryable object.
|
||||
"""
|
||||
|
||||
config: InfospaceConfig
|
||||
entities: List[EntityMeta] = field(default_factory=list)
|
||||
latest_snapshot: Optional[EvaluationSnapshot] = None
|
||||
viability_results: List[ViabilityResult] = field(default_factory=list)
|
||||
computed_at: datetime = field(default_factory=datetime.utcnow)
|
||||
|
||||
@property
|
||||
def entity_count(self) -> int:
|
||||
return len(self.entities)
|
||||
|
||||
@property
|
||||
def topic_name(self) -> str:
|
||||
return self.config.topic.name
|
||||
|
||||
@property
|
||||
def is_viable(self) -> bool:
|
||||
"""``True`` if all viability thresholds are met."""
|
||||
if not self.viability_results:
|
||||
return False
|
||||
return all(r.passed for r in self.viability_results)
|
||||
|
||||
@property
|
||||
def viability_pass_count(self) -> int:
|
||||
return sum(1 for r in self.viability_results if r.passed)
|
||||
|
||||
@property
|
||||
def viability_total_count(self) -> int:
|
||||
return len(self.viability_results)
|
||||
|
||||
@property
|
||||
def domains(self) -> List[str]:
|
||||
"""Distinct domain values across all entities."""
|
||||
return sorted({e.domain for e in self.entities if e.domain})
|
||||
|
||||
@property
|
||||
def has_evaluations(self) -> bool:
|
||||
return self.latest_snapshot is not None
|
||||
|
||||
def check_viability(self, metrics: Dict[str, float]) -> List[ViabilityResult]:
|
||||
"""Check *metrics* against the configured viability thresholds.
|
||||
|
||||
Updates :attr:`viability_results` and returns the results.
|
||||
"""
|
||||
results: List[ViabilityResult] = []
|
||||
for name, threshold in self.config.viability.items():
|
||||
value = metrics.get(name, 0.0)
|
||||
results.append(ViabilityResult(
|
||||
metric=name,
|
||||
value=value,
|
||||
threshold=threshold,
|
||||
passed=threshold.check(value),
|
||||
))
|
||||
self.viability_results = results
|
||||
return results
|
||||
|
||||
def summary(self) -> Dict[str, Any]:
|
||||
"""Return a summary dict suitable for display or serialisation."""
|
||||
d: Dict[str, Any] = {
|
||||
"topic": self.topic_name,
|
||||
"entity_count": self.entity_count,
|
||||
"domains": self.domains,
|
||||
"has_evaluations": self.has_evaluations,
|
||||
}
|
||||
if self.viability_results:
|
||||
d["viable"] = self.is_viable
|
||||
d["viability_pass"] = self.viability_pass_count
|
||||
d["viability_total"] = self.viability_total_count
|
||||
if self.latest_snapshot:
|
||||
d["last_evaluated"] = self.latest_snapshot.created_at.isoformat()
|
||||
return d
|
||||
|
||||
|
||||
def build_state(
|
||||
config: InfospaceConfig,
|
||||
entities: Optional[List[EntityMeta]] = None,
|
||||
snapshot: Optional[EvaluationSnapshot] = None,
|
||||
metrics: Optional[Dict[str, float]] = None,
|
||||
) -> InfospaceState:
|
||||
"""Build an :class:`InfospaceState` from available data.
|
||||
|
||||
This is a convenience function that assembles the state object
|
||||
and optionally runs viability checks if *metrics* are provided.
|
||||
"""
|
||||
state = InfospaceState(
|
||||
config=config,
|
||||
entities=entities or [],
|
||||
latest_snapshot=snapshot,
|
||||
)
|
||||
if metrics is not None:
|
||||
state.check_viability(metrics)
|
||||
return state
|
||||
Reference in New Issue
Block a user