""" Tests for metrics history and viability tracking (S2.5). """ from __future__ import annotations import json from datetime import datetime, timezone from pathlib import Path import pytest import yaml from markitect.infospace.checks.orchestrator import CheckReport from markitect.infospace.checks.granularity import GranularityReport from markitect.infospace.checks.redundancy import RedundancyReport from markitect.infospace.config import InfospaceConfig, TopicConfig, ViabilityThreshold from markitect.infospace.evaluation import EvaluationSnapshot, MetricValue from markitect.infospace.history import ( find_snapshot_by_date, get_history, get_latest_snapshot, metric_trend, read_metrics_file, record_check_results, snapshot_from_checks, write_metrics_file, ) # ── helpers ────────────────────────────────────────────────────────── def _check_report() -> CheckReport: return CheckReport( redundancy=RedundancyReport(redundancy_ratio=0.1, entity_count=10), granularity=GranularityReport(domain_entropy=1.5, entity_count=10), ) def _config(tmp_path: Path) -> InfospaceConfig: return InfospaceConfig( topic=TopicConfig(name="Test Topic", domain="Testing"), metrics_dir=str(tmp_path / "metrics"), ) def _snapshot(snap_id: str, date_str: str, metrics: dict) -> EvaluationSnapshot: return EvaluationSnapshot( snapshot_id=snap_id, created_at=datetime.fromisoformat(date_str).replace(tzinfo=timezone.utc), schema_name="default", entity_count=10, collection_metrics=[ MetricValue(name=k, value=v) for k, v in metrics.items() ], ) # ── snapshot_from_checks ──────────────────────────────────────────── class TestSnapshotFromChecks: def test_creates_snapshot(self): report = _check_report() snap = snapshot_from_checks(report, entity_count=10) assert snap.entity_count == 10 assert snap.snapshot_id # non-empty assert snap.created_at is not None def test_contains_metrics(self): report = _check_report() snap = snapshot_from_checks(report, entity_count=10) metric_names = {m.name for m in snap.collection_metrics} assert "redundancy_ratio" in metric_names assert "granularity_entropy" in metric_names def test_concern_labels(self): report = _check_report() snap = snapshot_from_checks(report, entity_count=10) by_name = {m.name: m for m in snap.collection_metrics} assert by_name["redundancy_ratio"].concern == "C1" assert by_name["granularity_entropy"].concern == "C5" def test_custom_schema(self): report = _check_report() snap = snapshot_from_checks(report, entity_count=5, schema_name="custom") assert snap.schema_name == "custom" def test_metadata(self): report = _check_report() snap = snapshot_from_checks(report, entity_count=5, metadata={"key": "val"}) assert snap.metadata == {"key": "val"} def test_empty_report(self): report = CheckReport() snap = snapshot_from_checks(report, entity_count=0) assert snap.collection_metrics == [] # ── write_metrics_file / read_metrics_file ────────────────────────── class TestMetricsFileIO: def test_round_trip(self, tmp_path): path = tmp_path / "metrics.yaml" metrics = {"redundancy_ratio": 0.05, "coverage_ratio": 0.85} write_metrics_file(metrics, path) loaded = read_metrics_file(path) assert loaded["redundancy_ratio"] == pytest.approx(0.05) assert loaded["coverage_ratio"] == pytest.approx(0.85) def test_creates_parent_dirs(self, tmp_path): path = tmp_path / "deep" / "nested" / "metrics.yaml" write_metrics_file({"x": 1.0}, path) assert path.is_file() def test_read_missing_file(self, tmp_path): path = tmp_path / "nonexistent.yaml" assert read_metrics_file(path) == {} def test_read_invalid_content(self, tmp_path): path = tmp_path / "bad.yaml" path.write_text("just a string", encoding="utf-8") assert read_metrics_file(path) == {} # ── record_check_results ──────────────────────────────────────────── class TestRecordCheckResults: def test_creates_metrics_file(self, tmp_path): cfg = _config(tmp_path) report = _check_report() record_check_results(report, cfg, tmp_path, entity_count=10) metrics_path = tmp_path / cfg.metrics_dir / "metrics.yaml" assert metrics_path.is_file() def test_creates_history_file(self, tmp_path): cfg = _config(tmp_path) report = _check_report() record_check_results(report, cfg, tmp_path, entity_count=10) history_path = tmp_path / cfg.metrics_dir / "history.yaml" assert history_path.is_file() def test_appends_to_history(self, tmp_path): cfg = _config(tmp_path) report = _check_report() record_check_results(report, cfg, tmp_path, entity_count=10) record_check_results(report, cfg, tmp_path, entity_count=12) history = get_history(cfg, tmp_path) assert len(history) == 2 assert history[0].entity_count == 10 assert history[1].entity_count == 12 def test_returns_snapshot(self, tmp_path): cfg = _config(tmp_path) report = _check_report() snap = record_check_results(report, cfg, tmp_path, entity_count=10) assert snap.snapshot_id assert snap.entity_count == 10 # ── get_history / get_latest_snapshot ──────────────────────────────── class TestGetHistory: def test_empty_history(self, tmp_path): cfg = _config(tmp_path) assert get_history(cfg, tmp_path) == [] def test_get_latest(self, tmp_path): cfg = _config(tmp_path) report = _check_report() record_check_results(report, cfg, tmp_path, entity_count=5) record_check_results(report, cfg, tmp_path, entity_count=10) latest = get_latest_snapshot(cfg, tmp_path) assert latest is not None assert latest.entity_count == 10 def test_latest_none_when_empty(self, tmp_path): cfg = _config(tmp_path) assert get_latest_snapshot(cfg, tmp_path) is None # ── find_snapshot_by_date ──────────────────────────────────────────── class TestFindSnapshotByDate: def test_finds_closest(self): history = [ _snapshot("a", "2026-01-01T10:00:00", {"x": 1.0}), _snapshot("b", "2026-02-15T10:00:00", {"x": 2.0}), _snapshot("c", "2026-03-01T10:00:00", {"x": 3.0}), ] result = find_snapshot_by_date(history, "2026-02-14") assert result is not None assert result.snapshot_id == "b" def test_exact_match(self): history = [ _snapshot("a", "2026-01-01T00:00:00", {"x": 1.0}), _snapshot("b", "2026-02-01T00:00:00", {"x": 2.0}), ] result = find_snapshot_by_date(history, "2026-02-01") assert result is not None assert result.snapshot_id == "b" def test_empty_history(self): assert find_snapshot_by_date([], "2026-01-01") is None def test_invalid_date(self): history = [_snapshot("a", "2026-01-01T00:00:00", {"x": 1.0})] assert find_snapshot_by_date(history, "not-a-date") is None def test_with_timestamp(self): history = [ _snapshot("a", "2026-01-01T10:00:00", {"x": 1.0}), _snapshot("b", "2026-01-01T14:00:00", {"x": 2.0}), ] result = find_snapshot_by_date(history, "2026-01-01T13:00:00") assert result is not None assert result.snapshot_id == "b" # ── metric_trend ───────────────────────────────────────────────────── class TestMetricTrend: def test_extracts_trend(self): history = [ _snapshot("a", "2026-01-01T00:00:00", {"x": 1.0, "y": 2.0}), _snapshot("b", "2026-02-01T00:00:00", {"x": 1.5, "y": 2.5}), ] trend = metric_trend(history, "x") assert len(trend) == 2 assert trend[0]["value"] == 1.0 assert trend[1]["value"] == 1.5 def test_missing_metric(self): history = [ _snapshot("a", "2026-01-01T00:00:00", {"x": 1.0}), ] assert metric_trend(history, "nonexistent") == [] def test_empty_history(self): assert metric_trend([], "x") == [] def test_partial_presence(self): history = [ _snapshot("a", "2026-01-01T00:00:00", {"x": 1.0}), _snapshot("b", "2026-02-01T00:00:00", {"y": 2.0}), # x missing _snapshot("c", "2026-03-01T00:00:00", {"x": 3.0}), ] trend = metric_trend(history, "x") assert len(trend) == 2 assert trend[0]["value"] == 1.0 assert trend[1]["value"] == 3.0