From 04fdc249f547416693ea0a344374f7abf0fe410b Mon Sep 17 00:00:00 2001
From: tegwick <bernd.worsch@gmail.com>
Date: Tue, 16 Jun 2026 01:46:51 +0200
Subject: [PATCH] Bridge Coach memory brief with project metrics summaries.

Add Performance Summary block to memory brief, document metrics synthesis in
agent-coach, and add e2e and CLI tests for qualitative plus quantitative briefs.
---
 agents/agent-coach.md                         | 21 +++++++++
 docs/agency-framework.md                      |  4 +-
 src/kaizen_agentic/cli.py                     | 13 +++++-
 src/kaizen_agentic/metrics.py                 | 30 +++++++++++++
 tests/test_e2e_agency_framework.py            | 43 +++++++++++++++++++
 tests/test_metrics_cli.py                     | 24 +++++++++++
 ...kaizen-agentic-WP-0003-measurement-loop.md |  6 +--
 7 files changed, 134 insertions(+), 7 deletions(-)
diff --git a/agents/agent-coach.md b/agents/agent-coach.md
index db95a4c..7e4a93b 100644
--- a/agents/agent-coach.md
+++ b/agents/agent-coach.md
@@ -83,6 +83,24 @@ root. Each follows ADR-002 structure:
 When synthesising, weight `## Watch Points` and `## Open Threads` most heavily —
 these are the signals most likely to be actionable for another agent.
 
+### Project metrics (ADR-004)
+
+Quantitative performance data lives at `.kaizen/metrics/<agent>/summary.json`.
+`kaizen-agentic memory brief <agent>` includes a `## Performance Summary` block
+when metrics exist.
+
+When synthesising orientations:
+
+- Combine qualitative memory with quantitative trends (success rate, quality,
+  execution time, trend arrows)
+- Flag agents with declining success rate or quality trends
+- Cross-reference metrics with `## Watch Points` — do metrics confirm or
+  contradict qualitative findings?
+- Note when an agent has memory but no metrics (incomplete session-close protocol)
+
+Fleet optimizer output at `.kaizen/metrics/optimizer/analysis.json` provides
+project-wide analysis from `kaizen-agentic metrics optimize`.
+
 ---
 
 ## Output Format
@@ -115,6 +133,9 @@ Project: <project name>
 Generated: <date>
 Sources: <which agent memories were read>
 
+### Performance Summary
+<from .kaizen/metrics/<agent>/ when available — success rate, quality, trends>
+
 ### What to Know First
 <3–5 most important facts for this agent>
 
diff --git a/docs/agency-framework.md b/docs/agency-framework.md
index 8da4abf..d63bf78 100644
--- a/docs/agency-framework.md
+++ b/docs/agency-framework.md
@@ -262,8 +262,8 @@ kaizen-agentic metrics export <agent>   # Dump executions.jsonl
 kaizen-agentic metrics optimize [agent] # Run optimizer on project metrics (≥10 records)
 ```
 
-`memory brief` includes a `## Performance Summary` when metrics exist (WP-0003
-Part 4).
+`memory brief` includes a `## Performance Summary` when metrics exist (success
+rate, avg quality, execution time, trend arrows).
 
 `memory init` scaffolds `.kaizen/metrics/<agent>/` by default (`--no-metrics` to
 skip). Record outcomes at session close per
diff --git a/src/kaizen_agentic/cli.py b/src/kaizen_agentic/cli.py
index a7dae4b..11a2620 100644
--- a/src/kaizen_agentic/cli.py
+++ b/src/kaizen_agentic/cli.py
@@ -11,7 +11,7 @@ from typing import List, Optional
 
 from .registry import AgentRegistry, AgentCategory
 from .installer import AgentInstaller, ProjectInitializer, InstallationConfig
-from .metrics import MetricsStore, OptimizerStore
+from .metrics import MetricsStore, OptimizerStore, performance_summary_markdown
 from .optimization import OptimizationLoop, MIN_SAMPLES_FOR_RECOMMENDATIONS
 
 
@@ -892,12 +892,21 @@ def memory_brief(agent_name: str, target: str, raw: bool):
     click.echo(f"Sources: {', '.join(sources) if sources else 'none'}")
     click.echo()
 
-    if not sources:
+    metrics_store = MetricsStore(project_root, agent_name)
+    metrics_summary = metrics_store.read_summary()
+    if metrics_summary is None and metrics_store.executions_path.exists():
+        metrics_summary = metrics_store.write_summary()
+
+    if not sources and not metrics_summary:
         click.echo("No agent memory files found in this project.")
         click.echo(f"  Run: kaizen-agentic memory init {agent_name}")
         click.echo("  Then load the coach agent (agents/agent-coach.md) for synthesis.")
         return
 
+    performance_block = performance_summary_markdown(metrics_summary or {})
+    if performance_block:
+        click.echo(performance_block)
+
     # Own memory section
     if own_memory:
         click.echo("### Your Memory")
diff --git a/src/kaizen_agentic/metrics.py b/src/kaizen_agentic/metrics.py
index ea37a7a..157a4aa 100644
--- a/src/kaizen_agentic/metrics.py
+++ b/src/kaizen_agentic/metrics.py
@@ -21,6 +21,36 @@ def _parse_timestamp(value: str) -> datetime:
     return datetime.fromisoformat(normalized)
 
 
+_TREND_ARROWS = {"up": "↑", "down": "↓", "stable": "→", "unknown": "?"}
+
+
+def performance_summary_markdown(summary: Dict[str, Any]) -> str:
+    """Format ADR-004 summary.json as a Coach brief markdown section."""
+    if not summary or summary.get("execution_count", 0) == 0:
+        return ""
+
+    trend = summary.get("trend", {})
+    success_trend = trend.get("success_rate", "unknown")
+    quality_trend = trend.get("quality_score", "unknown")
+
+    lines = [
+        "## Performance Summary",
+        "",
+        f"- Executions: {summary['execution_count']}",
+        (
+            f"- Success rate: {summary['success_rate']:.1%} "
+            f"({_TREND_ARROWS.get(success_trend, '?')} {success_trend})"
+        ),
+        f"- Avg quality: {summary['avg_quality_score']:.2f} "
+        f"({_TREND_ARROWS.get(quality_trend, '?')} {quality_trend})",
+        f"- Avg execution time: {summary['avg_execution_time_s']:.1f}s",
+    ]
+    if summary.get("last_execution"):
+        lines.append(f"- Last execution: {summary['last_execution']}")
+    lines.append("")
+    return "\n".join(lines)
+
+
 def _trend_direction(recent: List[float], prior: List[float]) -> str:
     if not recent:
         return "unknown"
diff --git a/tests/test_e2e_agency_framework.py b/tests/test_e2e_agency_framework.py
index ab478f7..06553c0 100644
--- a/tests/test_e2e_agency_framework.py
+++ b/tests/test_e2e_agency_framework.py
@@ -209,6 +209,49 @@ class TestMemoryBrief:
         # Raw mode should not include the orientation header
         assert "Orientation Brief for:" not in result.output
 
+    def test_brief_includes_performance_summary_with_memory_and_metrics(self, project):
+        self._populate(project)
+        runner = CliRunner()
+        runner.invoke(
+            cli,
+            [
+                "metrics",
+                "record",
+                "sys-medic",
+                "--target",
+                str(project),
+                "--success",
+                "--time",
+                "30",
+                "--quality",
+                "0.88",
+            ],
+        )
+        runner.invoke(
+            cli,
+            [
+                "metrics",
+                "record",
+                "project-management",
+                "--target",
+                str(project),
+                "--success",
+                "--time",
+                "15",
+                "--quality",
+                "0.95",
+            ],
+        )
+
+        result = runner.invoke(cli, ["memory", "brief", "sys-medic", "--target", str(project)])
+
+        assert result.exit_code == 0
+        assert "## Performance Summary" in result.output
+        assert "Success rate:" in result.output
+        assert "tegpi-01" in result.output
+        assert "Context From Other Agents" in result.output
+        assert "project-management" in result.output
+
 
 class TestMemoryClear:
     def test_clear_removes_file(self, project):
diff --git a/tests/test_metrics_cli.py b/tests/test_metrics_cli.py
index f811b54..c739fdb 100644
--- a/tests/test_metrics_cli.py
+++ b/tests/test_metrics_cli.py
@@ -114,6 +114,30 @@ class TestMetricsCli:
         assert metrics_dir.exists()
         assert (metrics_dir / "executions.jsonl").exists()
 
+    def test_memory_brief_includes_performance_summary(
+        self, runner: CliRunner, project_dir: Path
+    ):
+        target = str(project_dir)
+        runner.invoke(cli, ["memory", "init", "tdd-workflow", "--target", target])
+        runner.invoke(
+            cli,
+            [
+                "metrics",
+                "record",
+                "tdd-workflow",
+                "--target",
+                target,
+                "--success",
+                "--quality",
+                "0.9",
+            ],
+        )
+
+        result = runner.invoke(cli, ["memory", "brief", "tdd-workflow", "--target", target])
+        assert result.exit_code == 0
+        assert "## Performance Summary" in result.output
+        assert "Success rate: 100.0%" in result.output
+
     def test_memory_init_no_metrics_flag(self, runner: CliRunner, project_dir: Path):
         result = runner.invoke(
             cli,
diff --git a/workplans/kaizen-agentic-WP-0003-measurement-loop.md b/workplans/kaizen-agentic-WP-0003-measurement-loop.md
index 9824cfd..ec02597 100644
--- a/workplans/kaizen-agentic-WP-0003-measurement-loop.md
+++ b/workplans/kaizen-agentic-WP-0003-measurement-loop.md
@@ -160,9 +160,9 @@ Unify qualitative memory and quantitative metrics in the orientation path.
 
 ### Tasks
 
-- [ ] T14 — Extend `memory brief` to include metrics summary for target agent (recent success rate, avg quality, trend arrow)
-- [ ] T15 — Extend `agent-coach.md` to reference metrics context in synthesis instructions
-- [ ] T16 — E2e test: populate memory + metrics for two agents → `memory brief` includes both qualitative and quantitative sections
+- [x] T14 — Extend `memory brief` to include metrics summary for target agent (recent success rate, avg quality, trend arrow)
+- [x] T15 — Extend `agent-coach.md` to reference metrics context in synthesis instructions
+- [x] T16 — E2e test: populate memory + metrics for two agents → `memory brief` includes both qualitative and quantitative sections
 
 ### Definition of done