Files
kaizen-agentic/tests/test_e2e_agency_framework.py
tegwick 04fdc249f5 Bridge Coach memory brief with project metrics summaries.
Add Performance Summary block to memory brief, document metrics synthesis in
agent-coach, and add e2e and CLI tests for qualitative plus quantitative briefs.
2026-06-16 01:46:51 +02:00

306 lines
11 KiB
Python

"""
End-to-end tests for the agency framework: memory lifecycle and coach orientation.
Tests the full workflow:
1. memory init — scaffold a memory file in a test project
2. Populate memory with realistic content (simulating sessions)
3. memory show — verify content is readable
4. memory brief — verify orientation brief includes own memory and cross-agent context
5. protocols list / show — verify protocol discovery works
6. memory clear — verify wipe works
"""
import textwrap
from pathlib import Path
import pytest
from click.testing import CliRunner
from kaizen_agentic.cli import cli
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _sys_medic_memory() -> str:
"""Realistic sys-medic memory after two simulated sessions."""
return textwrap.dedent("""\
---
agent: sys-medic
project: test-cluster
last_updated: 2026-03-18
session_count: 2
---
## Project Context
k3s single-node cluster on an ARM64 host (tegpi-01).
No external load balancer. Traefik ingress. Longhorn storage.
## Accumulated Findings
- kubelet log rotation was disabled; logs grew to 2.1 GB
- containerd image GC threshold was set too high (98%)
## What Worked
- `journalctl --vacuum-size=500M` recovered ~1.8 GB without restart
- Lowering GC threshold to 80% in containerd config resolved disk pressure
## Watch Points
- inotify watch limit hits ceiling under heavy Longhorn load
- node has only 4 GB RAM; memory pressure risk during backup windows
## Open Threads
- Check whether kube-system namespace daemonsets have resource limits set
## Node Profiles
tegpi-01 | load avg ~0.6 at idle | inotify-limited under load | 2026-03-18
## Recurring Findings
- kubelet log growth · first seen 2026-03-10 · 2 occurrences
## Cleared Issues
- containerd GC disk pressure · adjusted config 2026-03-18 · resolved
## Session Log
2026-03-10 · tegpi-01 initial assessment · found log bloat + GC issue · recommendations documented
2026-03-18 · tegpi-01 follow-up · verified GC fix; inotify limit noted · watch
""")
def _project_management_memory() -> str:
"""Minimal project-management agent memory."""
return textwrap.dedent("""\
---
agent: project-management
project: test-cluster
last_updated: 2026-03-15
session_count: 1
---
## Project Context
Operational runbook project for the k3s home cluster.
## Accumulated Findings
- Infra tasks are better tracked in Gitea issues than in TODO files
## Session Log
2026-03-15 · initial planning session · task structure agreed
""")
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture
def project(tmp_path):
"""A temporary 'project' directory with a name."""
p = tmp_path / "test-cluster"
p.mkdir()
return p
# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------
class TestMemoryInit:
def test_init_creates_file(self, project):
runner = CliRunner()
result = runner.invoke(cli, ["memory", "init", "sys-medic", "--target", str(project)])
assert result.exit_code == 0, result.output
assert "Initialized memory" in result.output
memory_file = project / ".kaizen" / "agents" / "sys-medic" / "memory.md"
assert memory_file.exists()
def test_init_file_content_has_required_sections(self, project):
runner = CliRunner()
runner.invoke(cli, ["memory", "init", "sys-medic", "--target", str(project)])
memory_file = project / ".kaizen" / "agents" / "sys-medic" / "memory.md"
content = memory_file.read_text()
assert "agent: sys-medic" in content
assert "project: test-cluster" in content
assert "session_count: 0" in content
assert "## Project Context" in content
assert "## Accumulated Findings" in content
assert "## What Worked" in content
assert "## Watch Points" in content
assert "## Open Threads" in content
assert "## Session Log" in content
def test_init_idempotent(self, project):
runner = CliRunner()
runner.invoke(cli, ["memory", "init", "sys-medic", "--target", str(project)])
result = runner.invoke(cli, ["memory", "init", "sys-medic", "--target", str(project)])
assert result.exit_code == 0
assert "already exists" in result.output
class TestMemoryShow:
def test_show_returns_content(self, project):
memory_file = project / ".kaizen" / "agents" / "sys-medic" / "memory.md"
memory_file.parent.mkdir(parents=True, exist_ok=True)
memory_file.write_text(_sys_medic_memory())
runner = CliRunner()
result = runner.invoke(cli, ["memory", "show", "sys-medic", "--target", str(project)])
assert result.exit_code == 0
assert "Node Profiles" in result.output
assert "tegpi-01" in result.output
def test_show_missing_prints_guidance(self, project):
runner = CliRunner()
result = runner.invoke(cli, ["memory", "show", "sys-medic", "--target", str(project)])
assert result.exit_code == 0
assert "No memory found" in result.output
assert "memory init" in result.output
class TestMemoryBrief:
def _populate(self, project):
"""Write both agent memories into the project."""
sm_dir = project / ".kaizen" / "agents" / "sys-medic"
sm_dir.mkdir(parents=True, exist_ok=True)
(sm_dir / "memory.md").write_text(_sys_medic_memory())
pm_dir = project / ".kaizen" / "agents" / "project-management"
pm_dir.mkdir(parents=True, exist_ok=True)
(pm_dir / "memory.md").write_text(_project_management_memory())
def test_brief_includes_own_memory(self, project):
self._populate(project)
runner = CliRunner()
result = runner.invoke(cli, ["memory", "brief", "sys-medic", "--target", str(project)])
assert result.exit_code == 0
assert "Orientation Brief for: sys-medic" in result.output
assert "Your Memory" in result.output
assert "tegpi-01" in result.output # content from sys-medic memory
def test_brief_includes_cross_agent_context(self, project):
self._populate(project)
runner = CliRunner()
result = runner.invoke(cli, ["memory", "brief", "sys-medic", "--target", str(project)])
assert result.exit_code == 0
assert "Context From Other Agents" in result.output
assert "project-management" in result.output
def test_brief_coach_tip_present(self, project):
self._populate(project)
runner = CliRunner()
result = runner.invoke(cli, ["memory", "brief", "sys-medic", "--target", str(project)])
assert result.exit_code == 0
assert "agent-coach" in result.output
def test_brief_no_memory_gives_guidance(self, project):
runner = CliRunner()
result = runner.invoke(cli, ["memory", "brief", "sys-medic", "--target", str(project)])
assert result.exit_code == 0
assert "No agent memory files found" in result.output
def test_brief_raw_flag_skips_header(self, project):
self._populate(project)
runner = CliRunner()
result = runner.invoke(cli, ["memory", "brief", "sys-medic", "--target", str(project), "--raw"])
assert result.exit_code == 0
assert "=== sys-medic ===" in result.output
# Raw mode should not include the orientation header
assert "Orientation Brief for:" not in result.output
def test_brief_includes_performance_summary_with_memory_and_metrics(self, project):
self._populate(project)
runner = CliRunner()
runner.invoke(
cli,
[
"metrics",
"record",
"sys-medic",
"--target",
str(project),
"--success",
"--time",
"30",
"--quality",
"0.88",
],
)
runner.invoke(
cli,
[
"metrics",
"record",
"project-management",
"--target",
str(project),
"--success",
"--time",
"15",
"--quality",
"0.95",
],
)
result = runner.invoke(cli, ["memory", "brief", "sys-medic", "--target", str(project)])
assert result.exit_code == 0
assert "## Performance Summary" in result.output
assert "Success rate:" in result.output
assert "tegpi-01" in result.output
assert "Context From Other Agents" in result.output
assert "project-management" in result.output
class TestMemoryClear:
def test_clear_removes_file(self, project):
memory_file = project / ".kaizen" / "agents" / "sys-medic" / "memory.md"
memory_file.parent.mkdir(parents=True, exist_ok=True)
memory_file.write_text(_sys_medic_memory())
runner = CliRunner()
result = runner.invoke(
cli, ["memory", "clear", "sys-medic", "--target", str(project)], input="y\n"
)
assert result.exit_code == 0
assert not memory_file.exists()
def test_clear_missing_is_graceful(self, project):
runner = CliRunner()
result = runner.invoke(
cli, ["memory", "clear", "sys-medic", "--target", str(project)], input="y\n"
)
assert result.exit_code == 0
assert "nothing to clear" in result.output
class TestProtocolsCommand:
def test_protocols_list_finds_sys_medic(self):
"""Protocols list against the real agents dir should include sys-medic k3s protocol."""
runner = CliRunner()
result = runner.invoke(cli, ["protocols", "list"])
assert result.exit_code == 0
assert "sys-medic" in result.output
assert "k3s-node-health-assessment" in result.output.replace("-", "-")
def test_protocols_list_filtered_by_agent(self):
runner = CliRunner()
result = runner.invoke(cli, ["protocols", "list", "sys-medic"])
assert result.exit_code == 0
assert "k3s" in result.output.lower()
def test_protocols_show_outputs_content(self):
runner = CliRunner()
result = runner.invoke(cli, ["protocols", "show", "sys-medic", "k3s-node-health-assessment"])
assert result.exit_code == 0
# Protocol should contain key structural sections
assert "k3s" in result.output.lower()
assert "Prerequisites" in result.output or "Scope" in result.output
def test_protocols_list_unknown_agent_no_crash(self):
runner = CliRunner()
result = runner.invoke(cli, ["protocols", "list", "nonexistent-agent"])
assert result.exit_code == 0
assert "No protocols found" in result.output