WP-0003 Part 5: tdd-workflow metrics pilot

Add metrics frontmatter and session-close recording to tdd-workflow,
document the reference implementation in wiki/AboutKaizenAgents.md,
and add an e2e test covering record → show → optimize → brief.
This commit is contained in:
2026-06-16 01:48:43 +02:00
parent 04fdc249f5
commit fd2edfbe6c
4 changed files with 231 additions and 17 deletions

View File

@@ -8,8 +8,10 @@ Tests the full workflow:
4. memory brief — verify orientation brief includes own memory and cross-agent context
5. protocols list / show — verify protocol discovery works
6. memory clear — verify wipe works
7. tdd-workflow pilot — record → show → optimize → brief (WP-0003 Part 5)
"""
import json
import textwrap
from pathlib import Path
@@ -17,6 +19,8 @@ import pytest
from click.testing import CliRunner
from kaizen_agentic.cli import cli
from kaizen_agentic.metrics import MetricsStore, OptimizerStore
from kaizen_agentic.optimization import MIN_SAMPLES_FOR_RECOMMENDATIONS
# ---------------------------------------------------------------------------
@@ -67,6 +71,34 @@ def _sys_medic_memory() -> str:
""")
def _tdd_workflow_memory() -> str:
"""Realistic tdd-workflow memory after two issue cycles."""
return textwrap.dedent("""\
---
agent: tdd-workflow
project: demo-app
last_updated: 2026-06-16
session_count: 2
---
## Project Context
Python service using TDD8 with Gitea issues and pytest.
## Accumulated Findings
- Sidequests from REFINE often block PUBLISH when lint debt accumulates
## What Worked
- `make tdd-start NUM=X` before writing tests keeps RED phase focused
## Watch Points
- Flaky integration tests under parallel pytest (-n auto)
## Session Log
2026-06-10 · issue 12 metrics store · PUBLISH complete · success
2026-06-16 · issue 15 CLI flags · stalled at REFINE · partial
""")
def _project_management_memory() -> str:
"""Minimal project-management agent memory."""
return textwrap.dedent("""\
@@ -275,6 +307,104 @@ class TestMemoryClear:
assert "nothing to clear" in result.output
class TestTddWorkflowMetricsPilot:
"""Full measure → analyse → orient loop for the tdd-workflow pilot agent."""
def _populate_memory(self, project: Path) -> None:
memory_dir = project / ".kaizen" / "agents" / "tdd-workflow"
memory_dir.mkdir(parents=True, exist_ok=True)
(memory_dir / "memory.md").write_text(_tdd_workflow_memory())
def test_full_metrics_loop_record_show_optimize_brief(self, project):
runner = CliRunner()
self._populate_memory(project)
sessions = [
{
"success": True,
"execution_time_s": 4200.0,
"quality_score": 0.92,
"primary_metric": {
"name": "test_pass_rate",
"value": 1.0,
"target": 1.0,
},
"metadata": {"issue": "12", "phase": "PUBLISH"},
},
{
"success": False,
"execution_time_s": 5400.0,
"quality_score": 0.45,
"primary_metric": {
"name": "test_pass_rate",
"value": 0.78,
"target": 1.0,
},
"metadata": {"issue": "15", "phase": "REFINE"},
},
]
for index, payload in enumerate(sessions, start=1):
result = runner.invoke(
cli,
[
"metrics",
"record",
"tdd-workflow",
"--target",
str(project),
"--json",
"--idempotency-key",
f"session-{index}",
],
input=json.dumps(payload),
)
assert result.exit_code == 0, result.output
assert "Recorded metrics" in result.output
show_result = runner.invoke(
cli,
["metrics", "show", "tdd-workflow", "--target", str(project)],
)
assert show_result.exit_code == 0
assert "test_pass_rate" in show_result.output or "2 execution" in show_result.output.lower()
store = MetricsStore(project, "tdd-workflow")
for i in range(MIN_SAMPLES_FOR_RECOMMENDATIONS - len(sessions)):
store.append(
{
"success": False,
"execution_time_s": 90.0 + i,
"quality_score": 0.35,
"primary_metric": {
"name": "test_pass_rate",
"value": 0.6,
"target": 1.0,
},
},
idempotency_key=f"seed-{i}",
)
optimize_result = runner.invoke(
cli,
["metrics", "optimize", "tdd-workflow", "--target", str(project)],
)
assert optimize_result.exit_code == 0, optimize_result.output
optimizer = OptimizerStore(project)
assert optimizer.analysis_path.exists()
assert optimizer.recommendations_path.exists()
brief_result = runner.invoke(
cli,
["memory", "brief", "tdd-workflow", "--target", str(project)],
)
assert brief_result.exit_code == 0
assert "## Performance Summary" in brief_result.output
assert "Success rate:" in brief_result.output
assert "issue 12" in brief_result.output or "TDD8" in brief_result.output
assert "Your Memory" in brief_result.output
class TestProtocolsCommand:
def test_protocols_list_finds_sys_medic(self):
"""Protocols list against the real agents dir should include sys-medic k3s protocol."""