Initial implementation

This commit is contained in:
2026-05-14 11:32:25 +02:00
parent 6fd1ff7581
commit 916a895a85
31 changed files with 1461 additions and 21 deletions

View File

@@ -0,0 +1,210 @@
from __future__ import annotations
from dataclasses import dataclass, field
from datetime import datetime
from typing import Any
@dataclass(frozen=True)
class ScoreEntry:
name: str
value: float
max_value: float = 5.0
rationale: str = ""
def to_dict(self) -> dict[str, Any]:
data: dict[str, Any] = {
"name": self.name,
"value": self.value,
"max_value": self.max_value,
}
if self.rationale:
data["rationale"] = self.rationale
return data
@classmethod
def from_dict(cls, data: dict[str, Any]) -> "ScoreEntry":
return cls(
name=str(data["name"]),
value=float(data["value"]),
max_value=float(data.get("max_value", 5.0)),
rationale=str(data.get("rationale") or ""),
)
@dataclass(frozen=True)
class EntityEvaluation:
artifact_id: str
evaluator: str
scores: list[ScoreEntry]
evaluated_at: datetime
notes: list[str] = field(default_factory=list)
@property
def overall_score(self) -> float:
if not self.scores:
return 0.0
return sum(score.value for score in self.scores) / len(self.scores)
def to_dict(self) -> dict[str, Any]:
return {
"artifact_id": self.artifact_id,
"evaluator": self.evaluator,
"evaluated_at": self.evaluated_at.isoformat(),
"overall_score": round(self.overall_score, 4),
"scores": [score.to_dict() for score in self.scores],
"notes": self.notes,
}
@classmethod
def from_dict(cls, data: dict[str, Any]) -> "EntityEvaluation":
return cls(
artifact_id=str(data["artifact_id"]),
evaluator=str(data["evaluator"]),
scores=[ScoreEntry.from_dict(item) for item in data.get("scores", [])],
evaluated_at=datetime.fromisoformat(str(data["evaluated_at"])),
notes=list(data.get("notes") or []),
)
@dataclass(frozen=True)
class MetricValue:
name: str
value: float
concern: str = ""
details: dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> dict[str, Any]:
data: dict[str, Any] = {"name": self.name, "value": self.value}
if self.concern:
data["concern"] = self.concern
if self.details:
data["details"] = self.details
return data
@classmethod
def from_dict(cls, data: dict[str, Any]) -> "MetricValue":
return cls(
name=str(data["name"]),
value=float(data["value"]),
concern=str(data.get("concern") or ""),
details=dict(data.get("details") or {}),
)
@dataclass(frozen=True)
class EvaluationSnapshot:
snapshot_id: str
created_at: datetime
schema_name: str
artifact_count: int
artifact_evaluations: list[EntityEvaluation] = field(default_factory=list)
collection_metrics: list[MetricValue] = field(default_factory=list)
metadata: dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> dict[str, Any]:
return {
"snapshot_id": self.snapshot_id,
"created_at": self.created_at.isoformat(),
"schema_name": self.schema_name,
"artifact_count": self.artifact_count,
"artifact_evaluations": [
evaluation.to_dict() for evaluation in self.artifact_evaluations
],
"collection_metrics": [
metric.to_dict() for metric in self.collection_metrics
],
"metadata": self.metadata,
}
@classmethod
def from_dict(cls, data: dict[str, Any]) -> "EvaluationSnapshot":
return cls(
snapshot_id=str(data["snapshot_id"]),
created_at=datetime.fromisoformat(str(data["created_at"])),
schema_name=str(data["schema_name"]),
artifact_count=int(data["artifact_count"]),
artifact_evaluations=[
EntityEvaluation.from_dict(item)
for item in data.get("artifact_evaluations", [])
],
collection_metrics=[
MetricValue.from_dict(item) for item in data.get("collection_metrics", [])
],
metadata=dict(data.get("metadata") or {}),
)
@dataclass(frozen=True)
class ScoreChange:
artifact_id: str
dimension: str
before: float
after: float
@property
def delta(self) -> float:
return self.after - self.before
@dataclass(frozen=True)
class MetricChange:
name: str
before: float
after: float
@property
def delta(self) -> float:
return self.after - self.before
@dataclass(frozen=True)
class SnapshotDiff:
before_id: str
after_id: str
added_artifacts: list[str] = field(default_factory=list)
removed_artifacts: list[str] = field(default_factory=list)
score_changes: list[ScoreChange] = field(default_factory=list)
metric_changes: list[MetricChange] = field(default_factory=list)
def diff_snapshots(
before: EvaluationSnapshot,
after: EvaluationSnapshot,
) -> SnapshotDiff:
before_scores = _score_index(before)
after_scores = _score_index(after)
before_artifacts = {artifact_id for artifact_id, _ in before_scores}
after_artifacts = {artifact_id for artifact_id, _ in after_scores}
score_changes = [
ScoreChange(artifact_id, dimension, before_scores[key], after_scores[key])
for key in sorted(before_scores.keys() & after_scores.keys())
for artifact_id, dimension in [key]
if before_scores[key] != after_scores[key]
]
before_metrics = {metric.name: metric.value for metric in before.collection_metrics}
after_metrics = {metric.name: metric.value for metric in after.collection_metrics}
metric_changes = [
MetricChange(name, before_metrics[name], after_metrics[name])
for name in sorted(before_metrics.keys() & after_metrics.keys())
if before_metrics[name] != after_metrics[name]
]
return SnapshotDiff(
before_id=before.snapshot_id,
after_id=after.snapshot_id,
added_artifacts=sorted(after_artifacts - before_artifacts),
removed_artifacts=sorted(before_artifacts - after_artifacts),
score_changes=score_changes,
metric_changes=metric_changes,
)
def _score_index(snapshot: EvaluationSnapshot) -> dict[tuple[str, str], float]:
return {
(evaluation.artifact_id, score.name): score.value
for evaluation in snapshot.artifact_evaluations
for score in evaluation.scores
}