generated from coulomb/repo-seed
eval history and metrics
This commit is contained in:
@@ -40,6 +40,11 @@ class EntityEvaluation:
|
||||
evaluated_at: datetime
|
||||
notes: list[str] = field(default_factory=list)
|
||||
|
||||
@property
|
||||
def entity_slug(self) -> str:
|
||||
"""Legacy alias for readers moving from entity-oriented history files."""
|
||||
return self.artifact_id
|
||||
|
||||
@property
|
||||
def overall_score(self) -> float:
|
||||
if not self.scores:
|
||||
@@ -102,6 +107,16 @@ class EvaluationSnapshot:
|
||||
collection_metrics: list[MetricValue] = field(default_factory=list)
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
@property
|
||||
def entity_count(self) -> int:
|
||||
"""Legacy alias retained for old infospace history readers."""
|
||||
return self.artifact_count
|
||||
|
||||
@property
|
||||
def entity_evaluations(self) -> list[EntityEvaluation]:
|
||||
"""Legacy alias retained for old infospace history readers."""
|
||||
return self.artifact_evaluations
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"snapshot_id": self.snapshot_id,
|
||||
@@ -122,11 +137,14 @@ class EvaluationSnapshot:
|
||||
return cls(
|
||||
snapshot_id=str(data["snapshot_id"]),
|
||||
created_at=datetime.fromisoformat(str(data["created_at"])),
|
||||
schema_name=str(data["schema_name"]),
|
||||
artifact_count=int(data["artifact_count"]),
|
||||
schema_name=str(data.get("schema_name") or "default"),
|
||||
artifact_count=int(data.get("artifact_count", data.get("entity_count", 0))),
|
||||
artifact_evaluations=[
|
||||
EntityEvaluation.from_dict(item)
|
||||
for item in data.get("artifact_evaluations", [])
|
||||
for item in data.get(
|
||||
"artifact_evaluations",
|
||||
data.get("entity_evaluations", []),
|
||||
)
|
||||
],
|
||||
collection_metrics=[
|
||||
MetricValue.from_dict(item) for item in data.get("collection_metrics", [])
|
||||
@@ -134,6 +152,9 @@ class EvaluationSnapshot:
|
||||
metadata=dict(data.get("metadata") or {}),
|
||||
)
|
||||
|
||||
def diff(self, after: "EvaluationSnapshot") -> "SnapshotDiff":
|
||||
return diff_snapshots(self, after)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ScoreChange:
|
||||
@@ -146,6 +167,20 @@ class ScoreChange:
|
||||
def delta(self) -> float:
|
||||
return self.after - self.before
|
||||
|
||||
@property
|
||||
def entity_slug(self) -> str:
|
||||
"""Legacy alias for old diff consumers."""
|
||||
return self.artifact_id
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"artifact_id": self.artifact_id,
|
||||
"dimension": self.dimension,
|
||||
"before": self.before,
|
||||
"after": self.after,
|
||||
"delta": self.delta,
|
||||
}
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MetricChange:
|
||||
@@ -157,6 +192,14 @@ class MetricChange:
|
||||
def delta(self) -> float:
|
||||
return self.after - self.before
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"name": self.name,
|
||||
"before": self.before,
|
||||
"after": self.after,
|
||||
"delta": self.delta,
|
||||
}
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SnapshotDiff:
|
||||
@@ -167,6 +210,51 @@ class SnapshotDiff:
|
||||
score_changes: list[ScoreChange] = field(default_factory=list)
|
||||
metric_changes: list[MetricChange] = field(default_factory=list)
|
||||
|
||||
@property
|
||||
def added_entities(self) -> list[str]:
|
||||
"""Legacy alias for old history diff output."""
|
||||
return self.added_artifacts
|
||||
|
||||
@property
|
||||
def removed_entities(self) -> list[str]:
|
||||
"""Legacy alias for old history diff output."""
|
||||
return self.removed_artifacts
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"before_id": self.before_id,
|
||||
"after_id": self.after_id,
|
||||
"added_artifacts": self.added_artifacts,
|
||||
"removed_artifacts": self.removed_artifacts,
|
||||
"score_changes": [change.to_dict() for change in self.score_changes],
|
||||
"metric_changes": [change.to_dict() for change in self.metric_changes],
|
||||
}
|
||||
|
||||
def summary(self) -> str:
|
||||
lines = [f"Snapshot diff: {self.before_id} -> {self.after_id}"]
|
||||
if not (
|
||||
self.added_artifacts
|
||||
or self.removed_artifacts
|
||||
or self.score_changes
|
||||
or self.metric_changes
|
||||
):
|
||||
return "\n".join([*lines, "No changes."])
|
||||
for artifact_id in self.added_artifacts:
|
||||
lines.append(f"Added artifact: {artifact_id}")
|
||||
for artifact_id in self.removed_artifacts:
|
||||
lines.append(f"Removed artifact: {artifact_id}")
|
||||
for change in self.score_changes:
|
||||
lines.append(
|
||||
f"Score {change.artifact_id} {change.dimension}: "
|
||||
f"{change.before} -> {change.after} ({change.delta:+.4f})"
|
||||
)
|
||||
for change in self.metric_changes:
|
||||
lines.append(
|
||||
f"Metric {change.name}: "
|
||||
f"{change.before} -> {change.after} ({change.delta:+.4f})"
|
||||
)
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def diff_snapshots(
|
||||
before: EvaluationSnapshot,
|
||||
@@ -174,22 +262,29 @@ def diff_snapshots(
|
||||
) -> SnapshotDiff:
|
||||
before_scores = _score_index(before)
|
||||
after_scores = _score_index(after)
|
||||
before_artifacts = {artifact_id for artifact_id, _ in before_scores}
|
||||
after_artifacts = {artifact_id for artifact_id, _ in after_scores}
|
||||
before_artifacts = {
|
||||
evaluation.artifact_id for evaluation in before.artifact_evaluations
|
||||
}
|
||||
after_artifacts = {evaluation.artifact_id for evaluation in after.artifact_evaluations}
|
||||
|
||||
score_changes = [
|
||||
ScoreChange(artifact_id, dimension, before_scores[key], after_scores[key])
|
||||
for key in sorted(before_scores.keys() & after_scores.keys())
|
||||
ScoreChange(
|
||||
artifact_id,
|
||||
dimension,
|
||||
before_scores.get(key, 0.0),
|
||||
after_scores.get(key, 0.0),
|
||||
)
|
||||
for key in sorted(before_scores.keys() | after_scores.keys())
|
||||
for artifact_id, dimension in [key]
|
||||
if before_scores[key] != after_scores[key]
|
||||
if before_scores.get(key) != after_scores.get(key)
|
||||
]
|
||||
|
||||
before_metrics = {metric.name: metric.value for metric in before.collection_metrics}
|
||||
after_metrics = {metric.name: metric.value for metric in after.collection_metrics}
|
||||
metric_changes = [
|
||||
MetricChange(name, before_metrics[name], after_metrics[name])
|
||||
for name in sorted(before_metrics.keys() & after_metrics.keys())
|
||||
if before_metrics[name] != after_metrics[name]
|
||||
MetricChange(name, before_metrics.get(name, 0.0), after_metrics.get(name, 0.0))
|
||||
for name in sorted(before_metrics.keys() | after_metrics.keys())
|
||||
if before_metrics.get(name) != after_metrics.get(name)
|
||||
]
|
||||
|
||||
return SnapshotDiff(
|
||||
|
||||
Reference in New Issue
Block a user