Bridge Coach memory brief with project metrics summaries.

Add Performance Summary block to memory brief, document metrics synthesis in
agent-coach, and add e2e and CLI tests for qualitative plus quantitative briefs.
This commit is contained in:
2026-06-16 01:46:51 +02:00
parent 2711a3ebcc
commit 04fdc249f5
7 changed files with 134 additions and 7 deletions

View File

@@ -83,6 +83,24 @@ root. Each follows ADR-002 structure:
When synthesising, weight `## Watch Points` and `## Open Threads` most heavily —
these are the signals most likely to be actionable for another agent.
### Project metrics (ADR-004)
Quantitative performance data lives at `.kaizen/metrics/<agent>/summary.json`.
`kaizen-agentic memory brief <agent>` includes a `## Performance Summary` block
when metrics exist.
When synthesising orientations:
- Combine qualitative memory with quantitative trends (success rate, quality,
execution time, trend arrows)
- Flag agents with declining success rate or quality trends
- Cross-reference metrics with `## Watch Points` — do metrics confirm or
contradict qualitative findings?
- Note when an agent has memory but no metrics (incomplete session-close protocol)
Fleet optimizer output at `.kaizen/metrics/optimizer/analysis.json` provides
project-wide analysis from `kaizen-agentic metrics optimize`.
---
## Output Format
@@ -115,6 +133,9 @@ Project: <project name>
Generated: <date>
Sources: <which agent memories were read>
### Performance Summary
<from .kaizen/metrics/<agent>/ when available — success rate, quality, trends>
### What to Know First
<35 most important facts for this agent>

View File

@@ -262,8 +262,8 @@ kaizen-agentic metrics export <agent> # Dump executions.jsonl
kaizen-agentic metrics optimize [agent] # Run optimizer on project metrics (≥10 records)
```
`memory brief` includes a `## Performance Summary` when metrics exist (WP-0003
Part 4).
`memory brief` includes a `## Performance Summary` when metrics exist (success
rate, avg quality, execution time, trend arrows).
`memory init` scaffolds `.kaizen/metrics/<agent>/` by default (`--no-metrics` to
skip). Record outcomes at session close per

View File

@@ -11,7 +11,7 @@ from typing import List, Optional
from .registry import AgentRegistry, AgentCategory
from .installer import AgentInstaller, ProjectInitializer, InstallationConfig
from .metrics import MetricsStore, OptimizerStore
from .metrics import MetricsStore, OptimizerStore, performance_summary_markdown
from .optimization import OptimizationLoop, MIN_SAMPLES_FOR_RECOMMENDATIONS
@@ -892,12 +892,21 @@ def memory_brief(agent_name: str, target: str, raw: bool):
click.echo(f"Sources: {', '.join(sources) if sources else 'none'}")
click.echo()
if not sources:
metrics_store = MetricsStore(project_root, agent_name)
metrics_summary = metrics_store.read_summary()
if metrics_summary is None and metrics_store.executions_path.exists():
metrics_summary = metrics_store.write_summary()
if not sources and not metrics_summary:
click.echo("No agent memory files found in this project.")
click.echo(f" Run: kaizen-agentic memory init {agent_name}")
click.echo(" Then load the coach agent (agents/agent-coach.md) for synthesis.")
return
performance_block = performance_summary_markdown(metrics_summary or {})
if performance_block:
click.echo(performance_block)
# Own memory section
if own_memory:
click.echo("### Your Memory")

View File

@@ -21,6 +21,36 @@ def _parse_timestamp(value: str) -> datetime:
return datetime.fromisoformat(normalized)
_TREND_ARROWS = {"up": "", "down": "", "stable": "", "unknown": "?"}
def performance_summary_markdown(summary: Dict[str, Any]) -> str:
"""Format ADR-004 summary.json as a Coach brief markdown section."""
if not summary or summary.get("execution_count", 0) == 0:
return ""
trend = summary.get("trend", {})
success_trend = trend.get("success_rate", "unknown")
quality_trend = trend.get("quality_score", "unknown")
lines = [
"## Performance Summary",
"",
f"- Executions: {summary['execution_count']}",
(
f"- Success rate: {summary['success_rate']:.1%} "
f"({_TREND_ARROWS.get(success_trend, '?')} {success_trend})"
),
f"- Avg quality: {summary['avg_quality_score']:.2f} "
f"({_TREND_ARROWS.get(quality_trend, '?')} {quality_trend})",
f"- Avg execution time: {summary['avg_execution_time_s']:.1f}s",
]
if summary.get("last_execution"):
lines.append(f"- Last execution: {summary['last_execution']}")
lines.append("")
return "\n".join(lines)
def _trend_direction(recent: List[float], prior: List[float]) -> str:
if not recent:
return "unknown"

View File

@@ -209,6 +209,49 @@ class TestMemoryBrief:
# Raw mode should not include the orientation header
assert "Orientation Brief for:" not in result.output
def test_brief_includes_performance_summary_with_memory_and_metrics(self, project):
self._populate(project)
runner = CliRunner()
runner.invoke(
cli,
[
"metrics",
"record",
"sys-medic",
"--target",
str(project),
"--success",
"--time",
"30",
"--quality",
"0.88",
],
)
runner.invoke(
cli,
[
"metrics",
"record",
"project-management",
"--target",
str(project),
"--success",
"--time",
"15",
"--quality",
"0.95",
],
)
result = runner.invoke(cli, ["memory", "brief", "sys-medic", "--target", str(project)])
assert result.exit_code == 0
assert "## Performance Summary" in result.output
assert "Success rate:" in result.output
assert "tegpi-01" in result.output
assert "Context From Other Agents" in result.output
assert "project-management" in result.output
class TestMemoryClear:
def test_clear_removes_file(self, project):

View File

@@ -114,6 +114,30 @@ class TestMetricsCli:
assert metrics_dir.exists()
assert (metrics_dir / "executions.jsonl").exists()
def test_memory_brief_includes_performance_summary(
self, runner: CliRunner, project_dir: Path
):
target = str(project_dir)
runner.invoke(cli, ["memory", "init", "tdd-workflow", "--target", target])
runner.invoke(
cli,
[
"metrics",
"record",
"tdd-workflow",
"--target",
target,
"--success",
"--quality",
"0.9",
],
)
result = runner.invoke(cli, ["memory", "brief", "tdd-workflow", "--target", target])
assert result.exit_code == 0
assert "## Performance Summary" in result.output
assert "Success rate: 100.0%" in result.output
def test_memory_init_no_metrics_flag(self, runner: CliRunner, project_dir: Path):
result = runner.invoke(
cli,

View File

@@ -160,9 +160,9 @@ Unify qualitative memory and quantitative metrics in the orientation path.
### Tasks
- [ ] T14 — Extend `memory brief` to include metrics summary for target agent (recent success rate, avg quality, trend arrow)
- [ ] T15 — Extend `agent-coach.md` to reference metrics context in synthesis instructions
- [ ] T16 — E2e test: populate memory + metrics for two agents → `memory brief` includes both qualitative and quantitative sections
- [x] T14 — Extend `memory brief` to include metrics summary for target agent (recent success rate, avg quality, trend arrow)
- [x] T15 — Extend `agent-coach.md` to reference metrics context in synthesis instructions
- [x] T16 — E2e test: populate memory + metrics for two agents → `memory brief` includes both qualitative and quantitative sections
### Definition of done