Bridge Coach memory brief with project metrics summaries.

Add Performance Summary block to memory brief, document metrics synthesis in agent-coach, and add e2e and CLI tests for qualitative plus quantitative briefs.
2026-06-16 01:46:51 +02:00
parent 2711a3ebcc
commit 04fdc249f5
7 changed files with 134 additions and 7 deletions
--- a/agents/agent-coach.md
+++ b/agents/agent-coach.md
@@ -83,6 +83,24 @@ root. Each follows ADR-002 structure:
 When synthesising, weight `## Watch Points` and `## Open Threads` most heavily —
 these are the signals most likely to be actionable for another agent.

+### Project metrics (ADR-004)
+
+Quantitative performance data lives at `.kaizen/metrics/<agent>/summary.json`.
+`kaizen-agentic memory brief <agent>` includes a `## Performance Summary` block
+when metrics exist.
+
+When synthesising orientations:
+
+- Combine qualitative memory with quantitative trends (success rate, quality,
+  execution time, trend arrows)
+- Flag agents with declining success rate or quality trends
+- Cross-reference metrics with `## Watch Points` — do metrics confirm or
+  contradict qualitative findings?
+- Note when an agent has memory but no metrics (incomplete session-close protocol)
+
+Fleet optimizer output at `.kaizen/metrics/optimizer/analysis.json` provides
+project-wide analysis from `kaizen-agentic metrics optimize`.
+
 ---

 ## Output Format
@@ -115,6 +133,9 @@ Project: <project name>
 Generated: <date>
 Sources: <which agent memories were read>

+### Performance Summary
+<from .kaizen/metrics/<agent>/ when available — success rate, quality, trends>
+
 ### What to Know First
 <3–5 most important facts for this agent>

--- a/docs/agency-framework.md
+++ b/docs/agency-framework.md
@@ -262,8 +262,8 @@ kaizen-agentic metrics export <agent>   # Dump executions.jsonl
 kaizen-agentic metrics optimize [agent] # Run optimizer on project metrics (≥10 records)
 ```

-`memory brief` includes a `## Performance Summary` when metrics exist (WP-0003
-Part 4).
+`memory brief` includes a `## Performance Summary` when metrics exist (success
+rate, avg quality, execution time, trend arrows).

 `memory init` scaffolds `.kaizen/metrics/<agent>/` by default (`--no-metrics` to
 skip). Record outcomes at session close per
--- a/src/kaizen_agentic/cli.py
+++ b/src/kaizen_agentic/cli.py
@@ -11,7 +11,7 @@ from typing import List, Optional

 from .registry import AgentRegistry, AgentCategory
 from .installer import AgentInstaller, ProjectInitializer, InstallationConfig
-from .metrics import MetricsStore, OptimizerStore
+from .metrics import MetricsStore, OptimizerStore, performance_summary_markdown
 from .optimization import OptimizationLoop, MIN_SAMPLES_FOR_RECOMMENDATIONS


@@ -892,12 +892,21 @@ def memory_brief(agent_name: str, target: str, raw: bool):
    click.echo(f"Sources: {', '.join(sources) if sources else 'none'}")
    click.echo()

-    if not sources:
+    metrics_store = MetricsStore(project_root, agent_name)
+    metrics_summary = metrics_store.read_summary()
+    if metrics_summary is None and metrics_store.executions_path.exists():
+        metrics_summary = metrics_store.write_summary()
+
+    if not sources and not metrics_summary:
        click.echo("No agent memory files found in this project.")
        click.echo(f"  Run: kaizen-agentic memory init {agent_name}")
        click.echo("  Then load the coach agent (agents/agent-coach.md) for synthesis.")
        return

+    performance_block = performance_summary_markdown(metrics_summary or {})
+    if performance_block:
+        click.echo(performance_block)
+
    # Own memory section
    if own_memory:
        click.echo("### Your Memory")
--- a/src/kaizen_agentic/metrics.py
+++ b/src/kaizen_agentic/metrics.py
@@ -21,6 +21,36 @@ def _parse_timestamp(value: str) -> datetime:
    return datetime.fromisoformat(normalized)


+_TREND_ARROWS = {"up": "↑", "down": "↓", "stable": "→", "unknown": "?"}
+
+
+def performance_summary_markdown(summary: Dict[str, Any]) -> str:
+    """Format ADR-004 summary.json as a Coach brief markdown section."""
+    if not summary or summary.get("execution_count", 0) == 0:
+        return ""
+
+    trend = summary.get("trend", {})
+    success_trend = trend.get("success_rate", "unknown")
+    quality_trend = trend.get("quality_score", "unknown")
+
+    lines = [
+        "## Performance Summary",
+        "",
+        f"- Executions: {summary['execution_count']}",
+        (
+            f"- Success rate: {summary['success_rate']:.1%} "
+            f"({_TREND_ARROWS.get(success_trend, '?')} {success_trend})"
+        ),
+        f"- Avg quality: {summary['avg_quality_score']:.2f} "
+        f"({_TREND_ARROWS.get(quality_trend, '?')} {quality_trend})",
+        f"- Avg execution time: {summary['avg_execution_time_s']:.1f}s",
+    ]
+    if summary.get("last_execution"):
+        lines.append(f"- Last execution: {summary['last_execution']}")
+    lines.append("")
+    return "\n".join(lines)
+
+
 def _trend_direction(recent: List[float], prior: List[float]) -> str:
    if not recent:
        return "unknown"
--- a/tests/test_e2e_agency_framework.py
+++ b/tests/test_e2e_agency_framework.py
@@ -209,6 +209,49 @@ class TestMemoryBrief:
        # Raw mode should not include the orientation header
        assert "Orientation Brief for:" not in result.output

+    def test_brief_includes_performance_summary_with_memory_and_metrics(self, project):
+        self._populate(project)
+        runner = CliRunner()
+        runner.invoke(
+            cli,
+            [
+                "metrics",
+                "record",
+                "sys-medic",
+                "--target",
+                str(project),
+                "--success",
+                "--time",
+                "30",
+                "--quality",
+                "0.88",
+            ],
+        )
+        runner.invoke(
+            cli,
+            [
+                "metrics",
+                "record",
+                "project-management",
+                "--target",
+                str(project),
+                "--success",
+                "--time",
+                "15",
+                "--quality",
+                "0.95",
+            ],
+        )
+
+        result = runner.invoke(cli, ["memory", "brief", "sys-medic", "--target", str(project)])
+
+        assert result.exit_code == 0
+        assert "## Performance Summary" in result.output
+        assert "Success rate:" in result.output
+        assert "tegpi-01" in result.output
+        assert "Context From Other Agents" in result.output
+        assert "project-management" in result.output
+

 class TestMemoryClear:
    def test_clear_removes_file(self, project):
--- a/tests/test_metrics_cli.py
+++ b/tests/test_metrics_cli.py
@@ -114,6 +114,30 @@ class TestMetricsCli:
        assert metrics_dir.exists()
        assert (metrics_dir / "executions.jsonl").exists()

+    def test_memory_brief_includes_performance_summary(
+        self, runner: CliRunner, project_dir: Path
+    ):
+        target = str(project_dir)
+        runner.invoke(cli, ["memory", "init", "tdd-workflow", "--target", target])
+        runner.invoke(
+            cli,
+            [
+                "metrics",
+                "record",
+                "tdd-workflow",
+                "--target",
+                target,
+                "--success",
+                "--quality",
+                "0.9",
+            ],
+        )
+
+        result = runner.invoke(cli, ["memory", "brief", "tdd-workflow", "--target", target])
+        assert result.exit_code == 0
+        assert "## Performance Summary" in result.output
+        assert "Success rate: 100.0%" in result.output
+
    def test_memory_init_no_metrics_flag(self, runner: CliRunner, project_dir: Path):
        result = runner.invoke(
            cli,
--- a/workplans/kaizen-agentic-WP-0003-measurement-loop.md
+++ b/workplans/kaizen-agentic-WP-0003-measurement-loop.md
@@ -160,9 +160,9 @@ Unify qualitative memory and quantitative metrics in the orientation path.

 ### Tasks

- [ ] T14 — Extend `memory brief` to include metrics summary for target agent (recent success rate, avg quality, trend arrow)
- [ ] T15 — Extend `agent-coach.md` to reference metrics context in synthesis instructions
- [ ] T16 — E2e test: populate memory + metrics for two agents → `memory brief` includes both qualitative and quantitative sections
+- [x] T14 — Extend `memory brief` to include metrics summary for target agent (recent success rate, avg quality, trend arrow)
+- [x] T15 — Extend `agent-coach.md` to reference metrics context in synthesis instructions
+- [x] T16 — E2e test: populate memory + metrics for two agents → `memory brief` includes both qualitative and quantitative sections

 ### Definition of done