markitect-main/tests/unit/infospace/test_config.py

"""Tests for markitect.infospace.config and state."""

from datetime import datetime
from pathlib import Path

import pytest

from markitect.infospace.config import (
    DisciplineBinding,
    InfospaceConfig,
    PipelineConfig,
    PipelineStage,
    SchemaRegistry,
    TopicConfig,
    ViabilityThreshold,
    find_infospace_config,
    load_infospace_config,
    save_infospace_config,
)
from markitect.infospace.state import (
    InfospaceState,
    ViabilityResult,
    build_state,
)
from markitect.infospace.models import EntityMeta
from markitect.infospace.evaluation import (
    EntityEvaluation,
    EvaluationSnapshot,
    ScoreEntry,
)


# ── Helpers ──────────────────────────────────────────────────────────


_SAMPLE_YAML = """\
topic:
  name: "The Wealth of Nations"
  domain: "Classical Economics"
  sources: artifacts/sources/

disciplines:
  - name: "Viable System Model"
    path: artifacts/vsm-reference/

schemas:
  entity: schemas/economic-entity-schema-v1.0.md
  mapping: schemas/vsm-mapping-schema-v1.0.md

competency_questions: schemas/competency-questions.md

viability:
  coverage_ratio:
    min: 0.60
  per_entity_mean:
    min: 3.5
  redundancy_ratio:
    max: 0.05

pipeline:
  stages:
    - template: extract-entities
      spaces: [sources, guidelines]
    - template: map-to-vsm
      spaces: [entities, vsm-reference]
  post_batch:
    - template: assess-metrics
"""


def _sample_config() -> InfospaceConfig:
    return InfospaceConfig(
        topic=TopicConfig(name="Test Topic", domain="Testing"),
        disciplines=[DisciplineBinding(name="VSM", path="vsm/")],
        schemas=SchemaRegistry(entity="schemas/entity.md"),
        competency_questions="schemas/cq.md",
        viability={
            "coverage_ratio": ViabilityThreshold("coverage_ratio", min=0.6),
            "redundancy_ratio": ViabilityThreshold("redundancy_ratio", max=0.05),
        },
    )


def _sample_entities(n=5) -> list:
    return [
        EntityMeta(
            slug=f"entity-{i}",
            title=f"Entity {i}",
            h1_raw=f"Entity {i}",
            domain="Production" if i % 2 == 0 else "Distribution",
        )
        for i in range(n)
    ]


# ── TopicConfig ──────────────────────────────────────────────────────


class TestTopicConfig:
    def test_round_trip(self):
        tc = TopicConfig("WoN", "Economics", "sources/")
        d = tc.to_dict()
        restored = TopicConfig.from_dict(d)
        assert restored.name == "WoN"
        assert restored.domain == "Economics"
        assert restored.sources == "sources/"

    def test_minimal(self):
        tc = TopicConfig.from_dict({"name": "Minimal"})
        assert tc.domain == ""
        assert tc.sources == ""

    def test_to_dict_omits_empty(self):
        tc = TopicConfig("X")
        d = tc.to_dict()
        assert "domain" not in d
        assert "sources" not in d


# ── DisciplineBinding ────────────────────────────────────────────────


class TestDisciplineBinding:
    def test_round_trip(self):
        db = DisciplineBinding("VSM", "path/to/vsm")
        d = db.to_dict()
        restored = DisciplineBinding.from_dict(d)
        assert restored.name == "VSM"
        assert restored.path == "path/to/vsm"


# ── SchemaRegistry ───────────────────────────────────────────────────


class TestSchemaRegistry:
    def test_round_trip(self):
        sr = SchemaRegistry(entity="e.md", mapping="m.md", analysis="a.md")
        d = sr.to_dict()
        restored = SchemaRegistry.from_dict(d)
        assert restored.entity == "e.md"
        assert restored.mapping == "m.md"

    def test_extra_schemas(self):
        sr = SchemaRegistry.from_dict({"entity": "e.md", "custom": "c.md"})
        assert sr.entity == "e.md"
        assert sr.extra == {"custom": "c.md"}


# ── ViabilityThreshold ──────────────────────────────────────────────


class TestViabilityThreshold:
    def test_min_check(self):
        t = ViabilityThreshold("x", min=0.5)
        assert t.check(0.6) is True
        assert t.check(0.5) is True
        assert t.check(0.4) is False

    def test_max_check(self):
        t = ViabilityThreshold("x", max=0.1)
        assert t.check(0.05) is True
        assert t.check(0.1) is True
        assert t.check(0.2) is False

    def test_min_and_max(self):
        t = ViabilityThreshold("x", min=0.3, max=0.7)
        assert t.check(0.5) is True
        assert t.check(0.2) is False
        assert t.check(0.8) is False

    def test_no_bounds_always_passes(self):
        t = ViabilityThreshold("x")
        assert t.check(999.0) is True


# ── PipelineConfig ──────────────────────────────────────────────────


class TestPipelineConfig:
    def test_round_trip(self):
        pc = PipelineConfig(
            stages=[PipelineStage("extract", ["s1", "s2"])],
            post_batch=[PipelineStage("assess")],
        )
        d = pc.to_dict()
        restored = PipelineConfig.from_dict(d)
        assert len(restored.stages) == 1
        assert restored.stages[0].template == "extract"
        assert restored.stages[0].spaces == ["s1", "s2"]
        assert len(restored.post_batch) == 1


# ── InfospaceConfig ─────────────────────────────────────────────────


class TestInfospaceConfig:
    def test_to_dict_from_dict_round_trip(self):
        cfg = _sample_config()
        d = cfg.to_dict()
        restored = InfospaceConfig.from_dict(d)
        assert restored.topic.name == "Test Topic"
        assert len(restored.disciplines) == 1
        assert restored.schemas.entity == "schemas/entity.md"
        assert restored.competency_questions == "schemas/cq.md"
        assert len(restored.viability) == 2

    def test_viability_thresholds_preserved(self):
        cfg = _sample_config()
        d = cfg.to_dict()
        restored = InfospaceConfig.from_dict(d)
        assert restored.viability["coverage_ratio"].min == 0.6
        assert restored.viability["redundancy_ratio"].max == 0.05

    def test_default_dirs(self):
        cfg = InfospaceConfig(topic=TopicConfig("X"))
        assert cfg.entities_dir == "output/entities"
        assert cfg.evaluations_dir == "output/evaluations"
        assert cfg.metrics_dir == "output/metrics"

    def test_custom_dirs(self):
        cfg = InfospaceConfig.from_dict({
            "topic": {"name": "X"},
            "entities_dir": "custom/entities",
        })
        assert cfg.entities_dir == "custom/entities"


# ── YAML I/O ────────────────────────────────────────────────────────


class TestYAMLIO:
    def test_save_load_round_trip(self, tmp_path):
        cfg = _sample_config()
        p = tmp_path / "infospace.yaml"
        save_infospace_config(cfg, p)
        loaded = load_infospace_config(p)
        assert loaded.topic.name == cfg.topic.name
        assert len(loaded.viability) == len(cfg.viability)

    def test_load_full_example(self, tmp_path):
        p = tmp_path / "infospace.yaml"
        p.write_text(_SAMPLE_YAML, encoding="utf-8")
        cfg = load_infospace_config(p)
        assert cfg.topic.name == "The Wealth of Nations"
        assert cfg.topic.domain == "Classical Economics"
        assert len(cfg.disciplines) == 1
        assert cfg.disciplines[0].name == "Viable System Model"
        assert cfg.schemas.entity == "schemas/economic-entity-schema-v1.0.md"
        assert cfg.competency_questions == "schemas/competency-questions.md"
        assert len(cfg.viability) == 3
        assert cfg.viability["coverage_ratio"].min == 0.60
        assert cfg.viability["redundancy_ratio"].max == 0.05
        assert cfg.pipeline is not None
        assert len(cfg.pipeline.stages) == 2
        assert len(cfg.pipeline.post_batch) == 1

    def test_load_missing_file(self, tmp_path):
        with pytest.raises(FileNotFoundError):
            load_infospace_config(tmp_path / "nonexistent.yaml")

    def test_load_missing_topic(self, tmp_path):
        p = tmp_path / "bad.yaml"
        p.write_text("schemas:\n  entity: x.md\n")
        with pytest.raises(ValueError, match="topic"):
            load_infospace_config(p)

    def test_save_creates_parent_dirs(self, tmp_path):
        cfg = InfospaceConfig(topic=TopicConfig("X"))
        p = tmp_path / "deep" / "nested" / "infospace.yaml"
        save_infospace_config(cfg, p)
        assert p.exists()


class TestFindConfig:
    def test_finds_config_in_current_dir(self, tmp_path):
        (tmp_path / "infospace.yaml").write_text("topic:\n  name: X\n")
        found = find_infospace_config(tmp_path)
        assert found is not None
        assert found.name == "infospace.yaml"

    def test_finds_config_in_parent(self, tmp_path):
        (tmp_path / "infospace.yaml").write_text("topic:\n  name: X\n")
        child = tmp_path / "sub" / "dir"
        child.mkdir(parents=True)
        found = find_infospace_config(child)
        assert found is not None

    def test_returns_none_if_not_found(self, tmp_path):
        assert find_infospace_config(tmp_path) is None


# ── InfospaceState ──────────────────────────────────────────────────


class TestInfospaceState:
    def test_entity_count(self):
        cfg = _sample_config()
        state = InfospaceState(config=cfg, entities=_sample_entities(5))
        assert state.entity_count == 5

    def test_topic_name(self):
        cfg = _sample_config()
        state = InfospaceState(config=cfg)
        assert state.topic_name == "Test Topic"

    def test_domains(self):
        cfg = _sample_config()
        state = InfospaceState(config=cfg, entities=_sample_entities(4))
        assert "Production" in state.domains
        assert "Distribution" in state.domains

    def test_has_evaluations(self):
        cfg = _sample_config()
        state = InfospaceState(config=cfg)
        assert state.has_evaluations is False

        snap = EvaluationSnapshot(
            snapshot_id="s1",
            created_at=datetime(2026, 1, 1),
            schema_name="Test",
            entity_count=0,
        )
        state.latest_snapshot = snap
        assert state.has_evaluations is True


class TestViabilityCheck:
    def test_all_pass(self):
        cfg = _sample_config()
        state = InfospaceState(config=cfg)
        metrics = {"coverage_ratio": 0.8, "redundancy_ratio": 0.02}
        results = state.check_viability(metrics)
        assert all(r.passed for r in results)
        assert state.is_viable is True

    def test_one_fails(self):
        cfg = _sample_config()
        state = InfospaceState(config=cfg)
        metrics = {"coverage_ratio": 0.4, "redundancy_ratio": 0.02}
        results = state.check_viability(metrics)
        assert not all(r.passed for r in results)
        assert state.is_viable is False

    def test_missing_metric_defaults_to_zero(self):
        cfg = _sample_config()
        state = InfospaceState(config=cfg)
        # coverage_ratio min=0.6, missing → 0.0 → fails
        results = state.check_viability({})
        coverage = next(r for r in results if r.metric == "coverage_ratio")
        assert coverage.passed is False
        assert coverage.value == 0.0

    def test_viability_counts(self):
        cfg = _sample_config()
        state = InfospaceState(config=cfg)
        metrics = {"coverage_ratio": 0.8, "redundancy_ratio": 0.2}
        state.check_viability(metrics)
        assert state.viability_pass_count == 1  # coverage passes
        assert state.viability_total_count == 2

    def test_no_thresholds_not_viable(self):
        cfg = InfospaceConfig(topic=TopicConfig("X"))
        state = InfospaceState(config=cfg)
        assert state.is_viable is False


class TestBuildState:
    def test_builds_with_entities(self):
        cfg = _sample_config()
        entities = _sample_entities(3)
        state = build_state(cfg, entities=entities)
        assert state.entity_count == 3

    def test_builds_with_metrics(self):
        cfg = _sample_config()
        metrics = {"coverage_ratio": 0.9, "redundancy_ratio": 0.01}
        state = build_state(cfg, metrics=metrics)
        assert state.is_viable is True

    def test_summary(self):
        cfg = _sample_config()
        entities = _sample_entities(3)
        metrics = {"coverage_ratio": 0.9, "redundancy_ratio": 0.01}
        state = build_state(cfg, entities=entities, metrics=metrics)
        s = state.summary()
        assert s["topic"] == "Test Topic"
        assert s["entity_count"] == 3
        assert s["viable"] is True


class TestViabilityResult:
    def test_to_dict(self):
        t = ViabilityThreshold("x", min=0.5)
        r = ViabilityResult(metric="x", value=0.7, threshold=t, passed=True)
        d = r.to_dict()
        assert d["metric"] == "x"
        assert d["value"] == 0.7
        assert d["passed"] is True
        assert d["min"] == 0.5
        assert "max" not in d