""" Data models for structured evaluation output. Provides typed containers for per-entity LLM-evaluated scores and collection-level metrics. All models support ``to_dict()``/``from_dict()`` round-tripping for YAML serialisation. """ from dataclasses import dataclass, field from datetime import datetime from typing import Any, Dict, List, Optional @dataclass class ScoreEntry: """A single scored dimension (e.g. definition_precision: 4.5/5.0).""" name: str value: float max_value: float = 5.0 rationale: str = "" def to_dict(self) -> Dict[str, Any]: d: Dict[str, Any] = { "name": self.name, "value": self.value, "max_value": self.max_value, } if self.rationale: d["rationale"] = self.rationale return d @classmethod def from_dict(cls, data: Dict[str, Any]) -> "ScoreEntry": return cls( name=data["name"], value=float(data["value"]), max_value=float(data.get("max_value", 5.0)), rationale=data.get("rationale", ""), ) @dataclass class EntityEvaluation: """Per-entity evaluation result.""" entity_slug: str evaluator: str scores: List[ScoreEntry] evaluated_at: datetime notes: List[str] = field(default_factory=list) @property def overall_score(self) -> float: if not self.scores: return 0.0 return sum(s.value for s in self.scores) / len(self.scores) def to_dict(self) -> Dict[str, Any]: return { "entity_slug": self.entity_slug, "evaluator": self.evaluator, "evaluated_at": self.evaluated_at.isoformat(), "overall_score": round(self.overall_score, 4), "scores": [s.to_dict() for s in self.scores], "notes": self.notes, } @classmethod def from_dict(cls, data: Dict[str, Any]) -> "EntityEvaluation": return cls( entity_slug=data["entity_slug"], evaluator=data["evaluator"], scores=[ScoreEntry.from_dict(s) for s in data["scores"]], evaluated_at=datetime.fromisoformat(data["evaluated_at"]), notes=data.get("notes", []), ) @dataclass class MetricValue: """A single collection-level metric.""" name: str value: float concern: str = "" details: Dict[str, Any] = field(default_factory=dict) def to_dict(self) -> Dict[str, Any]: d: Dict[str, Any] = {"name": self.name, "value": self.value} if self.concern: d["concern"] = self.concern if self.details: d["details"] = self.details return d @classmethod def from_dict(cls, data: Dict[str, Any]) -> "MetricValue": return cls( name=data["name"], value=float(data["value"]), concern=data.get("concern", ""), details=data.get("details", {}), ) @dataclass class EvaluationSnapshot: """Timestamped snapshot of entity evaluations and collection metrics.""" snapshot_id: str created_at: datetime schema_name: str entity_count: int entity_evaluations: List[EntityEvaluation] = field(default_factory=list) collection_metrics: List[MetricValue] = field(default_factory=list) metadata: Dict[str, Any] = field(default_factory=dict) def to_dict(self) -> Dict[str, Any]: return { "snapshot_id": self.snapshot_id, "created_at": self.created_at.isoformat(), "schema_name": self.schema_name, "entity_count": self.entity_count, "entity_evaluations": [e.to_dict() for e in self.entity_evaluations], "collection_metrics": [m.to_dict() for m in self.collection_metrics], "metadata": self.metadata, } @classmethod def from_dict(cls, data: Dict[str, Any]) -> "EvaluationSnapshot": return cls( snapshot_id=data["snapshot_id"], created_at=datetime.fromisoformat(data["created_at"]), schema_name=data["schema_name"], entity_count=data["entity_count"], entity_evaluations=[ EntityEvaluation.from_dict(e) for e in data.get("entity_evaluations", []) ], collection_metrics=[ MetricValue.from_dict(m) for m in data.get("collection_metrics", []) ], metadata=data.get("metadata", {}), ) @dataclass class ScoreChange: """Delta record for a single score dimension between snapshots.""" entity_slug: str dimension: str before: float after: float @property def delta(self) -> float: return self.after - self.before @dataclass class MetricChange: """Delta record for a collection metric between snapshots.""" name: str before: float after: float @property def delta(self) -> float: return self.after - self.before @dataclass class SnapshotDiff: """Diff between two evaluation snapshots.""" before_id: str after_id: str added_entities: List[str] = field(default_factory=list) removed_entities: List[str] = field(default_factory=list) score_changes: List[ScoreChange] = field(default_factory=list) metric_changes: List[MetricChange] = field(default_factory=list) def summary(self) -> str: lines = [f"Diff: {self.before_id} -> {self.after_id}"] if self.added_entities: lines.append(f" Added entities: {', '.join(self.added_entities)}") if self.removed_entities: lines.append(f" Removed entities: {', '.join(self.removed_entities)}") if self.score_changes: lines.append(f" Score changes: {len(self.score_changes)}") for sc in self.score_changes: lines.append( f" {sc.entity_slug}/{sc.dimension}: " f"{sc.before} -> {sc.after} ({sc.delta:+.2f})" ) if self.metric_changes: lines.append(f" Metric changes: {len(self.metric_changes)}") for mc in self.metric_changes: lines.append( f" {mc.name}: {mc.before} -> {mc.after} ({mc.delta:+.2f})" ) if not any([self.added_entities, self.removed_entities, self.score_changes, self.metric_changes]): lines.append(" No changes") return "\n".join(lines)