generated from coulomb/repo-seed
79 lines
2.3 KiB
Python
79 lines
2.3 KiB
Python
from datetime import datetime, timezone
|
|
|
|
from infospace_bench.evaluation import (
|
|
EntityEvaluation,
|
|
EvaluationSnapshot,
|
|
MetricValue,
|
|
ScoreEntry,
|
|
diff_snapshots,
|
|
)
|
|
|
|
|
|
def test_entity_evaluation_round_trips_and_computes_overall_score() -> None:
|
|
evaluated_at = datetime(2026, 5, 14, tzinfo=timezone.utc)
|
|
evaluation = EntityEvaluation(
|
|
artifact_id="source/chapter.md",
|
|
evaluator="test",
|
|
scores=[
|
|
ScoreEntry("definition_precision", 4),
|
|
ScoreEntry("provenance_quality", 5),
|
|
],
|
|
evaluated_at=evaluated_at,
|
|
notes=["clear enough"],
|
|
)
|
|
|
|
payload = evaluation.to_dict()
|
|
loaded = EntityEvaluation.from_dict(payload)
|
|
|
|
assert payload["overall_score"] == 4.5
|
|
assert loaded == evaluation
|
|
|
|
|
|
def test_snapshot_diff_reports_added_removed_score_and_metric_changes() -> None:
|
|
now = datetime(2026, 5, 14, tzinfo=timezone.utc)
|
|
before = EvaluationSnapshot(
|
|
snapshot_id="before",
|
|
created_at=now,
|
|
schema_name="baseline",
|
|
artifact_count=1,
|
|
artifact_evaluations=[
|
|
EntityEvaluation(
|
|
artifact_id="source/a.md",
|
|
evaluator="test",
|
|
scores=[ScoreEntry("quality", 3)],
|
|
evaluated_at=now,
|
|
)
|
|
],
|
|
collection_metrics=[MetricValue("coverage_ratio", 0.5)],
|
|
)
|
|
after = EvaluationSnapshot(
|
|
snapshot_id="after",
|
|
created_at=now,
|
|
schema_name="baseline",
|
|
artifact_count=1,
|
|
artifact_evaluations=[
|
|
EntityEvaluation(
|
|
artifact_id="source/a.md",
|
|
evaluator="test",
|
|
scores=[ScoreEntry("quality", 4)],
|
|
evaluated_at=now,
|
|
),
|
|
EntityEvaluation(
|
|
artifact_id="source/b.md",
|
|
evaluator="test",
|
|
scores=[ScoreEntry("quality", 5)],
|
|
evaluated_at=now,
|
|
),
|
|
],
|
|
collection_metrics=[MetricValue("coverage_ratio", 0.75)],
|
|
)
|
|
|
|
diff = diff_snapshots(before, after)
|
|
|
|
assert diff.added_artifacts == ["source/b.md"]
|
|
assert diff.removed_artifacts == []
|
|
assert diff.score_changes[0].artifact_id == "source/a.md"
|
|
assert diff.score_changes[0].delta == 1
|
|
assert diff.metric_changes[0].name == "coverage_ratio"
|
|
assert diff.metric_changes[0].delta == 0.25
|