Bridge Coach memory brief with project metrics summaries.
Add Performance Summary block to memory brief, document metrics synthesis in agent-coach, and add e2e and CLI tests for qualitative plus quantitative briefs.
This commit is contained in:
@@ -83,6 +83,24 @@ root. Each follows ADR-002 structure:
|
||||
When synthesising, weight `## Watch Points` and `## Open Threads` most heavily —
|
||||
these are the signals most likely to be actionable for another agent.
|
||||
|
||||
### Project metrics (ADR-004)
|
||||
|
||||
Quantitative performance data lives at `.kaizen/metrics/<agent>/summary.json`.
|
||||
`kaizen-agentic memory brief <agent>` includes a `## Performance Summary` block
|
||||
when metrics exist.
|
||||
|
||||
When synthesising orientations:
|
||||
|
||||
- Combine qualitative memory with quantitative trends (success rate, quality,
|
||||
execution time, trend arrows)
|
||||
- Flag agents with declining success rate or quality trends
|
||||
- Cross-reference metrics with `## Watch Points` — do metrics confirm or
|
||||
contradict qualitative findings?
|
||||
- Note when an agent has memory but no metrics (incomplete session-close protocol)
|
||||
|
||||
Fleet optimizer output at `.kaizen/metrics/optimizer/analysis.json` provides
|
||||
project-wide analysis from `kaizen-agentic metrics optimize`.
|
||||
|
||||
---
|
||||
|
||||
## Output Format
|
||||
@@ -115,6 +133,9 @@ Project: <project name>
|
||||
Generated: <date>
|
||||
Sources: <which agent memories were read>
|
||||
|
||||
### Performance Summary
|
||||
<from .kaizen/metrics/<agent>/ when available — success rate, quality, trends>
|
||||
|
||||
### What to Know First
|
||||
<3–5 most important facts for this agent>
|
||||
|
||||
|
||||
@@ -262,8 +262,8 @@ kaizen-agentic metrics export <agent> # Dump executions.jsonl
|
||||
kaizen-agentic metrics optimize [agent] # Run optimizer on project metrics (≥10 records)
|
||||
```
|
||||
|
||||
`memory brief` includes a `## Performance Summary` when metrics exist (WP-0003
|
||||
Part 4).
|
||||
`memory brief` includes a `## Performance Summary` when metrics exist (success
|
||||
rate, avg quality, execution time, trend arrows).
|
||||
|
||||
`memory init` scaffolds `.kaizen/metrics/<agent>/` by default (`--no-metrics` to
|
||||
skip). Record outcomes at session close per
|
||||
|
||||
@@ -11,7 +11,7 @@ from typing import List, Optional
|
||||
|
||||
from .registry import AgentRegistry, AgentCategory
|
||||
from .installer import AgentInstaller, ProjectInitializer, InstallationConfig
|
||||
from .metrics import MetricsStore, OptimizerStore
|
||||
from .metrics import MetricsStore, OptimizerStore, performance_summary_markdown
|
||||
from .optimization import OptimizationLoop, MIN_SAMPLES_FOR_RECOMMENDATIONS
|
||||
|
||||
|
||||
@@ -892,12 +892,21 @@ def memory_brief(agent_name: str, target: str, raw: bool):
|
||||
click.echo(f"Sources: {', '.join(sources) if sources else 'none'}")
|
||||
click.echo()
|
||||
|
||||
if not sources:
|
||||
metrics_store = MetricsStore(project_root, agent_name)
|
||||
metrics_summary = metrics_store.read_summary()
|
||||
if metrics_summary is None and metrics_store.executions_path.exists():
|
||||
metrics_summary = metrics_store.write_summary()
|
||||
|
||||
if not sources and not metrics_summary:
|
||||
click.echo("No agent memory files found in this project.")
|
||||
click.echo(f" Run: kaizen-agentic memory init {agent_name}")
|
||||
click.echo(" Then load the coach agent (agents/agent-coach.md) for synthesis.")
|
||||
return
|
||||
|
||||
performance_block = performance_summary_markdown(metrics_summary or {})
|
||||
if performance_block:
|
||||
click.echo(performance_block)
|
||||
|
||||
# Own memory section
|
||||
if own_memory:
|
||||
click.echo("### Your Memory")
|
||||
|
||||
@@ -21,6 +21,36 @@ def _parse_timestamp(value: str) -> datetime:
|
||||
return datetime.fromisoformat(normalized)
|
||||
|
||||
|
||||
_TREND_ARROWS = {"up": "↑", "down": "↓", "stable": "→", "unknown": "?"}
|
||||
|
||||
|
||||
def performance_summary_markdown(summary: Dict[str, Any]) -> str:
|
||||
"""Format ADR-004 summary.json as a Coach brief markdown section."""
|
||||
if not summary or summary.get("execution_count", 0) == 0:
|
||||
return ""
|
||||
|
||||
trend = summary.get("trend", {})
|
||||
success_trend = trend.get("success_rate", "unknown")
|
||||
quality_trend = trend.get("quality_score", "unknown")
|
||||
|
||||
lines = [
|
||||
"## Performance Summary",
|
||||
"",
|
||||
f"- Executions: {summary['execution_count']}",
|
||||
(
|
||||
f"- Success rate: {summary['success_rate']:.1%} "
|
||||
f"({_TREND_ARROWS.get(success_trend, '?')} {success_trend})"
|
||||
),
|
||||
f"- Avg quality: {summary['avg_quality_score']:.2f} "
|
||||
f"({_TREND_ARROWS.get(quality_trend, '?')} {quality_trend})",
|
||||
f"- Avg execution time: {summary['avg_execution_time_s']:.1f}s",
|
||||
]
|
||||
if summary.get("last_execution"):
|
||||
lines.append(f"- Last execution: {summary['last_execution']}")
|
||||
lines.append("")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _trend_direction(recent: List[float], prior: List[float]) -> str:
|
||||
if not recent:
|
||||
return "unknown"
|
||||
|
||||
@@ -209,6 +209,49 @@ class TestMemoryBrief:
|
||||
# Raw mode should not include the orientation header
|
||||
assert "Orientation Brief for:" not in result.output
|
||||
|
||||
def test_brief_includes_performance_summary_with_memory_and_metrics(self, project):
|
||||
self._populate(project)
|
||||
runner = CliRunner()
|
||||
runner.invoke(
|
||||
cli,
|
||||
[
|
||||
"metrics",
|
||||
"record",
|
||||
"sys-medic",
|
||||
"--target",
|
||||
str(project),
|
||||
"--success",
|
||||
"--time",
|
||||
"30",
|
||||
"--quality",
|
||||
"0.88",
|
||||
],
|
||||
)
|
||||
runner.invoke(
|
||||
cli,
|
||||
[
|
||||
"metrics",
|
||||
"record",
|
||||
"project-management",
|
||||
"--target",
|
||||
str(project),
|
||||
"--success",
|
||||
"--time",
|
||||
"15",
|
||||
"--quality",
|
||||
"0.95",
|
||||
],
|
||||
)
|
||||
|
||||
result = runner.invoke(cli, ["memory", "brief", "sys-medic", "--target", str(project)])
|
||||
|
||||
assert result.exit_code == 0
|
||||
assert "## Performance Summary" in result.output
|
||||
assert "Success rate:" in result.output
|
||||
assert "tegpi-01" in result.output
|
||||
assert "Context From Other Agents" in result.output
|
||||
assert "project-management" in result.output
|
||||
|
||||
|
||||
class TestMemoryClear:
|
||||
def test_clear_removes_file(self, project):
|
||||
|
||||
@@ -114,6 +114,30 @@ class TestMetricsCli:
|
||||
assert metrics_dir.exists()
|
||||
assert (metrics_dir / "executions.jsonl").exists()
|
||||
|
||||
def test_memory_brief_includes_performance_summary(
|
||||
self, runner: CliRunner, project_dir: Path
|
||||
):
|
||||
target = str(project_dir)
|
||||
runner.invoke(cli, ["memory", "init", "tdd-workflow", "--target", target])
|
||||
runner.invoke(
|
||||
cli,
|
||||
[
|
||||
"metrics",
|
||||
"record",
|
||||
"tdd-workflow",
|
||||
"--target",
|
||||
target,
|
||||
"--success",
|
||||
"--quality",
|
||||
"0.9",
|
||||
],
|
||||
)
|
||||
|
||||
result = runner.invoke(cli, ["memory", "brief", "tdd-workflow", "--target", target])
|
||||
assert result.exit_code == 0
|
||||
assert "## Performance Summary" in result.output
|
||||
assert "Success rate: 100.0%" in result.output
|
||||
|
||||
def test_memory_init_no_metrics_flag(self, runner: CliRunner, project_dir: Path):
|
||||
result = runner.invoke(
|
||||
cli,
|
||||
|
||||
@@ -160,9 +160,9 @@ Unify qualitative memory and quantitative metrics in the orientation path.
|
||||
|
||||
### Tasks
|
||||
|
||||
- [ ] T14 — Extend `memory brief` to include metrics summary for target agent (recent success rate, avg quality, trend arrow)
|
||||
- [ ] T15 — Extend `agent-coach.md` to reference metrics context in synthesis instructions
|
||||
- [ ] T16 — E2e test: populate memory + metrics for two agents → `memory brief` includes both qualitative and quantitative sections
|
||||
- [x] T14 — Extend `memory brief` to include metrics summary for target agent (recent success rate, avg quality, trend arrow)
|
||||
- [x] T15 — Extend `agent-coach.md` to reference metrics context in synthesis instructions
|
||||
- [x] T16 — E2e test: populate memory + metrics for two agents → `memory brief` includes both qualitative and quantitative sections
|
||||
|
||||
### Definition of done
|
||||
|
||||
|
||||
Reference in New Issue
Block a user