Files
kaizen-agentic/src/kaizen_agentic/metrics.py
tegwick 80c60ebd7a
Some checks failed
ci / test (3.12) (push) Has been cancelled
ci / test (3.10) (push) Has been cancelled
WP-0001: feedback channels, CI, pre-commit, telemetry docs
Add kaizen-agentic feedback CLI, Gitea issue templates, CI workflow,
pre-commit hooks, FEEDBACK/TELEMETRY docs, and cross-platform path tests.
Improve CLI registry error messages; remove agents_backup scaffolding.
Apply black formatting across src/tests for CI consistency.

State Hub message sent to agentic-resources for Helix correlation doc link.
2026-06-16 01:58:07 +02:00

279 lines
9.3 KiB
Python

"""Project-scoped agent metrics storage (ADR-004)."""
from __future__ import annotations
import json
from dataclasses import dataclass
from datetime import datetime, timedelta, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional
DEFAULT_RETENTION_DAYS = 180
def _utc_now_iso() -> str:
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
def _parse_timestamp(value: str) -> datetime:
normalized = value.replace("Z", "+00:00")
return datetime.fromisoformat(normalized)
_TREND_ARROWS = {"up": "", "down": "", "stable": "", "unknown": "?"}
def performance_summary_markdown(summary: Dict[str, Any]) -> str:
"""Format ADR-004 summary.json as a Coach brief markdown section."""
if not summary or summary.get("execution_count", 0) == 0:
return ""
trend = summary.get("trend", {})
success_trend = trend.get("success_rate", "unknown")
quality_trend = trend.get("quality_score", "unknown")
lines = [
"## Performance Summary",
"",
f"- Executions: {summary['execution_count']}",
(
f"- Success rate: {summary['success_rate']:.1%} "
f"({_TREND_ARROWS.get(success_trend, '?')} {success_trend})"
),
f"- Avg quality: {summary['avg_quality_score']:.2f} "
f"({_TREND_ARROWS.get(quality_trend, '?')} {quality_trend})",
f"- Avg execution time: {summary['avg_execution_time_s']:.1f}s",
]
if summary.get("last_execution"):
lines.append(f"- Last execution: {summary['last_execution']}")
lines.append("")
return "\n".join(lines)
def _trend_direction(recent: List[float], prior: List[float]) -> str:
if not recent:
return "unknown"
if not prior:
return "stable"
recent_avg = sum(recent) / len(recent)
prior_avg = sum(prior) / len(prior)
delta = recent_avg - prior_avg
if abs(delta) < 0.05:
return "stable"
return "up" if delta > 0 else "down"
@dataclass
class MetricsStore:
"""Append-only per-agent execution metrics under .kaizen/metrics/."""
project_root: Path
agent_name: str
retention_days: int = DEFAULT_RETENTION_DAYS
def __post_init__(self) -> None:
self.project_root = Path(self.project_root).resolve()
self.agent_dir = self.project_root / ".kaizen" / "metrics" / self.agent_name
self.executions_path = self.agent_dir / "executions.jsonl"
self.summary_path = self.agent_dir / "summary.json"
@classmethod
def list_agents(cls, project_root: Path) -> List[str]:
metrics_root = Path(project_root).resolve() / ".kaizen" / "metrics"
if not metrics_root.exists():
return []
agents = []
for child in sorted(metrics_root.iterdir()):
if child.is_dir() and (child / "executions.jsonl").exists():
agents.append(child.name)
return agents
def scaffold(self) -> Path:
"""Create metrics directory for this agent."""
self.agent_dir.mkdir(parents=True, exist_ok=True)
if not self.executions_path.exists():
self.executions_path.write_text("", encoding="utf-8")
return self.agent_dir
def append(
self,
record: Dict[str, Any],
*,
idempotency_key: Optional[str] = None,
) -> bool:
"""Append an execution record. Returns False if idempotency_key duplicates."""
self.scaffold()
payload = dict(record)
payload.setdefault("agent", self.agent_name)
payload.setdefault("timestamp", _utc_now_iso())
if idempotency_key is not None:
if self._has_idempotency_key(idempotency_key):
return False
payload["idempotency_key"] = idempotency_key
if "success" not in payload:
raise ValueError("execution record requires 'success' field")
with self.executions_path.open("a", encoding="utf-8") as handle:
handle.write(json.dumps(payload, sort_keys=True))
handle.write("\n")
self.prune()
self.write_summary()
return True
def read_executions(self) -> List[Dict[str, Any]]:
if not self.executions_path.exists():
return []
records: List[Dict[str, Any]] = []
with self.executions_path.open(encoding="utf-8") as handle:
for line in handle:
line = line.strip()
if line:
records.append(json.loads(line))
return records
def summarise(self) -> Dict[str, Any]:
records = self.read_executions()
if not records:
return {
"agent": self.agent_name,
"execution_count": 0,
"success_rate": 0.0,
"avg_quality_score": 0.0,
"avg_execution_time_s": 0.0,
"last_execution": None,
"trend": {
"success_rate": "unknown",
"quality_score": "unknown",
},
}
successes = [bool(r["success"]) for r in records]
success_rate = sum(successes) / len(successes)
quality_scores = [
float(r["quality_score"])
for r in records
if r.get("quality_score") is not None
]
execution_times = [
float(r["execution_time_s"])
for r in records
if r.get("execution_time_s") is not None
]
window = 5
recent_success = [1.0 if s else 0.0 for s in successes[-window:]]
prior_success = [1.0 if s else 0.0 for s in successes[:-window][-window:]]
recent_quality = quality_scores[-window:]
prior_quality = (
quality_scores[:-window][-window:] if len(quality_scores) > window else []
)
return {
"agent": self.agent_name,
"execution_count": len(records),
"success_rate": round(success_rate, 3),
"avg_quality_score": round(
sum(quality_scores) / len(quality_scores) if quality_scores else 0.0,
3,
),
"avg_execution_time_s": round(
sum(execution_times) / len(execution_times) if execution_times else 0.0,
3,
),
"last_execution": records[-1]["timestamp"],
"trend": {
"success_rate": _trend_direction(recent_success, prior_success),
"quality_score": _trend_direction(recent_quality, prior_quality),
},
}
def write_summary(self) -> Dict[str, Any]:
summary = self.summarise()
self.agent_dir.mkdir(parents=True, exist_ok=True)
self.summary_path.write_text(
json.dumps(summary, indent=2, sort_keys=True) + "\n",
encoding="utf-8",
)
return summary
def read_summary(self) -> Optional[Dict[str, Any]]:
if not self.summary_path.exists():
return None
return json.loads(self.summary_path.read_text(encoding="utf-8"))
def prune(self) -> int:
"""Drop execution records older than retention_days. Returns removed count."""
if not self.executions_path.exists():
return 0
cutoff = datetime.now(timezone.utc) - timedelta(days=self.retention_days)
kept: List[Dict[str, Any]] = []
removed = 0
for record in self.read_executions():
try:
ts = _parse_timestamp(record["timestamp"])
except (KeyError, ValueError):
kept.append(record)
continue
if ts >= cutoff:
kept.append(record)
else:
removed += 1
if removed:
with self.executions_path.open("w", encoding="utf-8") as handle:
for record in kept:
handle.write(json.dumps(record, sort_keys=True))
handle.write("\n")
self.write_summary()
return removed
def _has_idempotency_key(self, key: str) -> bool:
return any(r.get("idempotency_key") == key for r in self.read_executions())
@dataclass
class OptimizerStore:
"""Persist optimizer analysis output under .kaizen/metrics/optimizer/."""
project_root: Path
def __post_init__(self) -> None:
self.project_root = Path(self.project_root).resolve()
self.optimizer_dir = self.project_root / ".kaizen" / "metrics" / "optimizer"
self.analysis_path = self.optimizer_dir / "analysis.json"
self.recommendations_path = self.optimizer_dir / "recommendations.jsonl"
def write_analysis(self, report: Dict[str, Any]) -> Path:
self.optimizer_dir.mkdir(parents=True, exist_ok=True)
self.analysis_path.write_text(
json.dumps(report, indent=2, sort_keys=True) + "\n",
encoding="utf-8",
)
return self.analysis_path
def append_recommendations(
self,
agent_name: str,
recommendations: List[Dict[str, Any]],
*,
metrics_count: int,
) -> None:
self.optimizer_dir.mkdir(parents=True, exist_ok=True)
entry = {
"timestamp": _utc_now_iso(),
"agent": agent_name,
"metrics_count": metrics_count,
"recommendations": recommendations,
}
with self.recommendations_path.open("a", encoding="utf-8") as handle:
handle.write(json.dumps(entry, sort_keys=True))
handle.write("\n")