Add metrics CLI for project-scoped agent performance records.

Implement record, show, list, and export commands; document session-close protocol template; extend cheat sheet and agency-framework docs; add CLI tests.
2026-06-16 01:38:42 +02:00
parent 5cd3da3166
commit 97b7eb8cba
6 changed files with 303 additions and 5 deletions
--- a/tests/test_metrics_cli.py
+++ b/tests/test_metrics_cli.py
@@ -0,0 +1,123 @@
+"""CLI tests for project-scoped metrics commands."""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+import pytest
+from click.testing import CliRunner
+
+from kaizen_agentic.cli import cli
+
+
+@pytest.fixture
+def runner() -> CliRunner:
+    return CliRunner()
+
+
+@pytest.fixture
+def project_dir(tmp_path: Path) -> Path:
+    root = tmp_path / "demo-project"
+    root.mkdir()
+    return root
+
+
+class TestMetricsCli:
+    def test_record_show_list_export_flow(self, runner: CliRunner, project_dir: Path):
+        target = str(project_dir)
+
+        record = runner.invoke(
+            cli,
+            [
+                "metrics",
+                "record",
+                "tdd-workflow",
+                "--target",
+                target,
+                "--success",
+                "--time",
+                "42",
+                "--quality",
+                "0.85",
+            ],
+        )
+        assert record.exit_code == 0
+        assert "Recorded metrics" in record.output
+
+        show = runner.invoke(cli, ["metrics", "show", "tdd-workflow", "--target", target])
+        assert show.exit_code == 0
+        assert '"execution_count": 1' in show.output
+        assert '"success": true' in show.output
+
+        listed = runner.invoke(cli, ["metrics", "list", "--target", target])
+        assert listed.exit_code == 0
+        assert "tdd-workflow" in listed.output
+
+        export = runner.invoke(cli, ["metrics", "export", "tdd-workflow", "--target", target])
+        assert export.exit_code == 0
+        lines = [line for line in export.output.splitlines() if line.strip()]
+        assert len(lines) == 1
+        assert json.loads(lines[0])["quality_score"] == 0.85
+
+    def test_record_json_from_stdin(self, runner: CliRunner, project_dir: Path):
+        payload = json.dumps({"success": False, "execution_time_s": 9.5})
+        result = runner.invoke(
+            cli,
+            ["metrics", "record", "coach", "--target", str(project_dir), "--json"],
+            input=payload,
+        )
+        assert result.exit_code == 0
+
+        show = runner.invoke(cli, ["metrics", "show", "coach", "--target", str(project_dir)])
+        assert '"success": false' in show.output
+
+    def test_record_idempotency_key_skips_duplicate(
+        self, runner: CliRunner, project_dir: Path
+    ):
+        args = [
+            "metrics",
+            "record",
+            "coach",
+            "--target",
+            str(project_dir),
+            "--success",
+            "--idempotency-key",
+            "sess-abc",
+        ]
+        first = runner.invoke(cli, args)
+        second = runner.invoke(cli, args)
+        assert first.exit_code == 0
+        assert second.exit_code == 0
+        assert "Skipped duplicate" in second.output
+
+        export = runner.invoke(
+            cli, ["metrics", "export", "coach", "--target", str(project_dir)]
+        )
+        assert len(export.output.strip().splitlines()) == 1
+
+    def test_record_requires_outcome_without_json(self, runner: CliRunner, project_dir: Path):
+        result = runner.invoke(
+            cli,
+            ["metrics", "record", "tdd-workflow", "--target", str(project_dir)],
+        )
+        assert result.exit_code != 0
+        assert "--success or --failure" in result.output
+
+    def test_memory_init_scaffolds_metrics(self, runner: CliRunner, project_dir: Path):
+        result = runner.invoke(
+            cli,
+            ["memory", "init", "tdd-workflow", "--target", str(project_dir)],
+        )
+        assert result.exit_code == 0
+        metrics_dir = project_dir / ".kaizen" / "metrics" / "tdd-workflow"
+        assert metrics_dir.exists()
+        assert (metrics_dir / "executions.jsonl").exists()
+
+    def test_memory_init_no_metrics_flag(self, runner: CliRunner, project_dir: Path):
+        result = runner.invoke(
+            cli,
+            ["memory", "init", "coach", "--target", str(project_dir), "--no-metrics"],
+        )
+        assert result.exit_code == 0
+        assert not (project_dir / ".kaizen" / "metrics" / "coach").exists()