Add kaizen-agentic feedback CLI, Gitea issue templates, CI workflow, pre-commit hooks, FEEDBACK/TELEMETRY docs, and cross-platform path tests. Improve CLI registry error messages; remove agents_backup scaffolding. Apply black formatting across src/tests for CI consistency. State Hub message sent to agentic-resources for Helix correlation doc link.
463 lines
16 KiB
Python
463 lines
16 KiB
Python
"""
|
|
End-to-end tests for the agency framework: memory lifecycle and coach orientation.
|
|
|
|
Tests the full workflow:
|
|
1. memory init — scaffold a memory file in a test project
|
|
2. Populate memory with realistic content (simulating sessions)
|
|
3. memory show — verify content is readable
|
|
4. memory brief — verify orientation brief includes own memory and cross-agent context
|
|
5. protocols list / show — verify protocol discovery works
|
|
6. memory clear — verify wipe works
|
|
7. tdd-workflow pilot — record → show → optimize → brief (WP-0003 Part 5)
|
|
"""
|
|
|
|
import json
|
|
import textwrap
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
from click.testing import CliRunner
|
|
|
|
from kaizen_agentic.cli import cli
|
|
from kaizen_agentic.metrics import MetricsStore, OptimizerStore
|
|
from kaizen_agentic.optimization import MIN_SAMPLES_FOR_RECOMMENDATIONS
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _sys_medic_memory() -> str:
|
|
"""Realistic sys-medic memory after two simulated sessions."""
|
|
return textwrap.dedent("""\
|
|
---
|
|
agent: sys-medic
|
|
project: test-cluster
|
|
last_updated: 2026-03-18
|
|
session_count: 2
|
|
---
|
|
|
|
## Project Context
|
|
k3s single-node cluster on an ARM64 host (tegpi-01).
|
|
No external load balancer. Traefik ingress. Longhorn storage.
|
|
|
|
## Accumulated Findings
|
|
- kubelet log rotation was disabled; logs grew to 2.1 GB
|
|
- containerd image GC threshold was set too high (98%)
|
|
|
|
## What Worked
|
|
- `journalctl --vacuum-size=500M` recovered ~1.8 GB without restart
|
|
- Lowering GC threshold to 80% in containerd config resolved disk pressure
|
|
|
|
## Watch Points
|
|
- inotify watch limit hits ceiling under heavy Longhorn load
|
|
- node has only 4 GB RAM; memory pressure risk during backup windows
|
|
|
|
## Open Threads
|
|
- Check whether kube-system namespace daemonsets have resource limits set
|
|
|
|
## Node Profiles
|
|
tegpi-01 | load avg ~0.6 at idle | inotify-limited under load | 2026-03-18
|
|
|
|
## Recurring Findings
|
|
- kubelet log growth · first seen 2026-03-10 · 2 occurrences
|
|
|
|
## Cleared Issues
|
|
- containerd GC disk pressure · adjusted config 2026-03-18 · resolved
|
|
|
|
## Session Log
|
|
2026-03-10 · tegpi-01 initial assessment · found log bloat + GC issue · recommendations documented
|
|
2026-03-18 · tegpi-01 follow-up · verified GC fix; inotify limit noted · watch
|
|
""")
|
|
|
|
|
|
def _tdd_workflow_memory() -> str:
|
|
"""Realistic tdd-workflow memory after two issue cycles."""
|
|
return textwrap.dedent("""\
|
|
---
|
|
agent: tdd-workflow
|
|
project: demo-app
|
|
last_updated: 2026-06-16
|
|
session_count: 2
|
|
---
|
|
|
|
## Project Context
|
|
Python service using TDD8 with Gitea issues and pytest.
|
|
|
|
## Accumulated Findings
|
|
- Sidequests from REFINE often block PUBLISH when lint debt accumulates
|
|
|
|
## What Worked
|
|
- `make tdd-start NUM=X` before writing tests keeps RED phase focused
|
|
|
|
## Watch Points
|
|
- Flaky integration tests under parallel pytest (-n auto)
|
|
|
|
## Session Log
|
|
2026-06-10 · issue 12 metrics store · PUBLISH complete · success
|
|
2026-06-16 · issue 15 CLI flags · stalled at REFINE · partial
|
|
""")
|
|
|
|
|
|
def _project_management_memory() -> str:
|
|
"""Minimal project-management agent memory."""
|
|
return textwrap.dedent("""\
|
|
---
|
|
agent: project-management
|
|
project: test-cluster
|
|
last_updated: 2026-03-15
|
|
session_count: 1
|
|
---
|
|
|
|
## Project Context
|
|
Operational runbook project for the k3s home cluster.
|
|
|
|
## Accumulated Findings
|
|
- Infra tasks are better tracked in Gitea issues than in TODO files
|
|
|
|
## Session Log
|
|
2026-03-15 · initial planning session · task structure agreed
|
|
""")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Fixtures
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.fixture
|
|
def project(tmp_path):
|
|
"""A temporary 'project' directory with a name."""
|
|
p = tmp_path / "test-cluster"
|
|
p.mkdir()
|
|
return p
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestMemoryInit:
|
|
def test_init_creates_file(self, project):
|
|
runner = CliRunner()
|
|
result = runner.invoke(
|
|
cli, ["memory", "init", "sys-medic", "--target", str(project)]
|
|
)
|
|
assert result.exit_code == 0, result.output
|
|
assert "Initialized memory" in result.output
|
|
|
|
memory_file = project / ".kaizen" / "agents" / "sys-medic" / "memory.md"
|
|
assert memory_file.exists()
|
|
|
|
def test_init_file_content_has_required_sections(self, project):
|
|
runner = CliRunner()
|
|
runner.invoke(cli, ["memory", "init", "sys-medic", "--target", str(project)])
|
|
|
|
memory_file = project / ".kaizen" / "agents" / "sys-medic" / "memory.md"
|
|
content = memory_file.read_text()
|
|
|
|
assert "agent: sys-medic" in content
|
|
assert "project: test-cluster" in content
|
|
assert "session_count: 0" in content
|
|
assert "## Project Context" in content
|
|
assert "## Accumulated Findings" in content
|
|
assert "## What Worked" in content
|
|
assert "## Watch Points" in content
|
|
assert "## Open Threads" in content
|
|
assert "## Session Log" in content
|
|
|
|
def test_init_idempotent(self, project):
|
|
runner = CliRunner()
|
|
runner.invoke(cli, ["memory", "init", "sys-medic", "--target", str(project)])
|
|
result = runner.invoke(
|
|
cli, ["memory", "init", "sys-medic", "--target", str(project)]
|
|
)
|
|
assert result.exit_code == 0
|
|
assert "already exists" in result.output
|
|
|
|
|
|
class TestMemoryShow:
|
|
def test_show_returns_content(self, project):
|
|
memory_file = project / ".kaizen" / "agents" / "sys-medic" / "memory.md"
|
|
memory_file.parent.mkdir(parents=True, exist_ok=True)
|
|
memory_file.write_text(_sys_medic_memory())
|
|
|
|
runner = CliRunner()
|
|
result = runner.invoke(
|
|
cli, ["memory", "show", "sys-medic", "--target", str(project)]
|
|
)
|
|
assert result.exit_code == 0
|
|
assert "Node Profiles" in result.output
|
|
assert "tegpi-01" in result.output
|
|
|
|
def test_show_missing_prints_guidance(self, project):
|
|
runner = CliRunner()
|
|
result = runner.invoke(
|
|
cli, ["memory", "show", "sys-medic", "--target", str(project)]
|
|
)
|
|
assert result.exit_code == 0
|
|
assert "No memory found" in result.output
|
|
assert "memory init" in result.output
|
|
|
|
|
|
class TestMemoryBrief:
|
|
def _populate(self, project):
|
|
"""Write both agent memories into the project."""
|
|
sm_dir = project / ".kaizen" / "agents" / "sys-medic"
|
|
sm_dir.mkdir(parents=True, exist_ok=True)
|
|
(sm_dir / "memory.md").write_text(_sys_medic_memory())
|
|
|
|
pm_dir = project / ".kaizen" / "agents" / "project-management"
|
|
pm_dir.mkdir(parents=True, exist_ok=True)
|
|
(pm_dir / "memory.md").write_text(_project_management_memory())
|
|
|
|
def test_brief_includes_own_memory(self, project):
|
|
self._populate(project)
|
|
runner = CliRunner()
|
|
result = runner.invoke(
|
|
cli, ["memory", "brief", "sys-medic", "--target", str(project)]
|
|
)
|
|
assert result.exit_code == 0
|
|
assert "Orientation Brief for: sys-medic" in result.output
|
|
assert "Your Memory" in result.output
|
|
assert "tegpi-01" in result.output # content from sys-medic memory
|
|
|
|
def test_brief_includes_cross_agent_context(self, project):
|
|
self._populate(project)
|
|
runner = CliRunner()
|
|
result = runner.invoke(
|
|
cli, ["memory", "brief", "sys-medic", "--target", str(project)]
|
|
)
|
|
assert result.exit_code == 0
|
|
assert "Context From Other Agents" in result.output
|
|
assert "project-management" in result.output
|
|
|
|
def test_brief_coach_tip_present(self, project):
|
|
self._populate(project)
|
|
runner = CliRunner()
|
|
result = runner.invoke(
|
|
cli, ["memory", "brief", "sys-medic", "--target", str(project)]
|
|
)
|
|
assert result.exit_code == 0
|
|
assert "agent-coach" in result.output
|
|
|
|
def test_brief_no_memory_gives_guidance(self, project):
|
|
runner = CliRunner()
|
|
result = runner.invoke(
|
|
cli, ["memory", "brief", "sys-medic", "--target", str(project)]
|
|
)
|
|
assert result.exit_code == 0
|
|
assert "No agent memory files found" in result.output
|
|
|
|
def test_brief_raw_flag_skips_header(self, project):
|
|
self._populate(project)
|
|
runner = CliRunner()
|
|
result = runner.invoke(
|
|
cli, ["memory", "brief", "sys-medic", "--target", str(project), "--raw"]
|
|
)
|
|
assert result.exit_code == 0
|
|
assert "=== sys-medic ===" in result.output
|
|
# Raw mode should not include the orientation header
|
|
assert "Orientation Brief for:" not in result.output
|
|
|
|
def test_brief_includes_performance_summary_with_memory_and_metrics(self, project):
|
|
self._populate(project)
|
|
runner = CliRunner()
|
|
runner.invoke(
|
|
cli,
|
|
[
|
|
"metrics",
|
|
"record",
|
|
"sys-medic",
|
|
"--target",
|
|
str(project),
|
|
"--success",
|
|
"--time",
|
|
"30",
|
|
"--quality",
|
|
"0.88",
|
|
],
|
|
)
|
|
runner.invoke(
|
|
cli,
|
|
[
|
|
"metrics",
|
|
"record",
|
|
"project-management",
|
|
"--target",
|
|
str(project),
|
|
"--success",
|
|
"--time",
|
|
"15",
|
|
"--quality",
|
|
"0.95",
|
|
],
|
|
)
|
|
|
|
result = runner.invoke(
|
|
cli, ["memory", "brief", "sys-medic", "--target", str(project)]
|
|
)
|
|
|
|
assert result.exit_code == 0
|
|
assert "## Performance Summary" in result.output
|
|
assert "Success rate:" in result.output
|
|
assert "tegpi-01" in result.output
|
|
assert "Context From Other Agents" in result.output
|
|
assert "project-management" in result.output
|
|
|
|
|
|
class TestMemoryClear:
|
|
def test_clear_removes_file(self, project):
|
|
memory_file = project / ".kaizen" / "agents" / "sys-medic" / "memory.md"
|
|
memory_file.parent.mkdir(parents=True, exist_ok=True)
|
|
memory_file.write_text(_sys_medic_memory())
|
|
|
|
runner = CliRunner()
|
|
result = runner.invoke(
|
|
cli, ["memory", "clear", "sys-medic", "--target", str(project)], input="y\n"
|
|
)
|
|
assert result.exit_code == 0
|
|
assert not memory_file.exists()
|
|
|
|
def test_clear_missing_is_graceful(self, project):
|
|
runner = CliRunner()
|
|
result = runner.invoke(
|
|
cli, ["memory", "clear", "sys-medic", "--target", str(project)], input="y\n"
|
|
)
|
|
assert result.exit_code == 0
|
|
assert "nothing to clear" in result.output
|
|
|
|
|
|
class TestTddWorkflowMetricsPilot:
|
|
"""Full measure → analyse → orient loop for the tdd-workflow pilot agent."""
|
|
|
|
def _populate_memory(self, project: Path) -> None:
|
|
memory_dir = project / ".kaizen" / "agents" / "tdd-workflow"
|
|
memory_dir.mkdir(parents=True, exist_ok=True)
|
|
(memory_dir / "memory.md").write_text(_tdd_workflow_memory())
|
|
|
|
def test_full_metrics_loop_record_show_optimize_brief(self, project):
|
|
runner = CliRunner()
|
|
self._populate_memory(project)
|
|
|
|
sessions = [
|
|
{
|
|
"success": True,
|
|
"execution_time_s": 4200.0,
|
|
"quality_score": 0.92,
|
|
"primary_metric": {
|
|
"name": "test_pass_rate",
|
|
"value": 1.0,
|
|
"target": 1.0,
|
|
},
|
|
"metadata": {"issue": "12", "phase": "PUBLISH"},
|
|
},
|
|
{
|
|
"success": False,
|
|
"execution_time_s": 5400.0,
|
|
"quality_score": 0.45,
|
|
"primary_metric": {
|
|
"name": "test_pass_rate",
|
|
"value": 0.78,
|
|
"target": 1.0,
|
|
},
|
|
"metadata": {"issue": "15", "phase": "REFINE"},
|
|
},
|
|
]
|
|
|
|
for index, payload in enumerate(sessions, start=1):
|
|
result = runner.invoke(
|
|
cli,
|
|
[
|
|
"metrics",
|
|
"record",
|
|
"tdd-workflow",
|
|
"--target",
|
|
str(project),
|
|
"--json",
|
|
"--idempotency-key",
|
|
f"session-{index}",
|
|
],
|
|
input=json.dumps(payload),
|
|
)
|
|
assert result.exit_code == 0, result.output
|
|
assert "Recorded metrics" in result.output
|
|
|
|
show_result = runner.invoke(
|
|
cli,
|
|
["metrics", "show", "tdd-workflow", "--target", str(project)],
|
|
)
|
|
assert show_result.exit_code == 0
|
|
assert (
|
|
"test_pass_rate" in show_result.output
|
|
or "2 execution" in show_result.output.lower()
|
|
)
|
|
|
|
store = MetricsStore(project, "tdd-workflow")
|
|
for i in range(MIN_SAMPLES_FOR_RECOMMENDATIONS - len(sessions)):
|
|
store.append(
|
|
{
|
|
"success": False,
|
|
"execution_time_s": 90.0 + i,
|
|
"quality_score": 0.35,
|
|
"primary_metric": {
|
|
"name": "test_pass_rate",
|
|
"value": 0.6,
|
|
"target": 1.0,
|
|
},
|
|
},
|
|
idempotency_key=f"seed-{i}",
|
|
)
|
|
|
|
optimize_result = runner.invoke(
|
|
cli,
|
|
["metrics", "optimize", "tdd-workflow", "--target", str(project)],
|
|
)
|
|
assert optimize_result.exit_code == 0, optimize_result.output
|
|
optimizer = OptimizerStore(project)
|
|
assert optimizer.analysis_path.exists()
|
|
assert optimizer.recommendations_path.exists()
|
|
|
|
brief_result = runner.invoke(
|
|
cli,
|
|
["memory", "brief", "tdd-workflow", "--target", str(project)],
|
|
)
|
|
assert brief_result.exit_code == 0
|
|
assert "## Performance Summary" in brief_result.output
|
|
assert "Success rate:" in brief_result.output
|
|
assert "issue 12" in brief_result.output or "TDD8" in brief_result.output
|
|
assert "Your Memory" in brief_result.output
|
|
|
|
|
|
class TestProtocolsCommand:
|
|
def test_protocols_list_finds_sys_medic(self):
|
|
"""Protocols list against the real agents dir should include sys-medic k3s protocol."""
|
|
runner = CliRunner()
|
|
result = runner.invoke(cli, ["protocols", "list"])
|
|
assert result.exit_code == 0
|
|
assert "sys-medic" in result.output
|
|
assert "k3s-node-health-assessment" in result.output.replace("-", "-")
|
|
|
|
def test_protocols_list_filtered_by_agent(self):
|
|
runner = CliRunner()
|
|
result = runner.invoke(cli, ["protocols", "list", "sys-medic"])
|
|
assert result.exit_code == 0
|
|
assert "k3s" in result.output.lower()
|
|
|
|
def test_protocols_show_outputs_content(self):
|
|
runner = CliRunner()
|
|
result = runner.invoke(
|
|
cli, ["protocols", "show", "sys-medic", "k3s-node-health-assessment"]
|
|
)
|
|
assert result.exit_code == 0
|
|
# Protocol should contain key structural sections
|
|
assert "k3s" in result.output.lower()
|
|
assert "Prerequisites" in result.output or "Scope" in result.output
|
|
|
|
def test_protocols_list_unknown_agent_no_crash(self):
|
|
runner = CliRunner()
|
|
result = runner.invoke(cli, ["protocols", "list", "nonexistent-agent"])
|
|
assert result.exit_code == 0
|
|
assert "No protocols found" in result.output
|