"""Weekly retro report tests (AGENTIC-WP-0010 T01).""" import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from session_memory.curate.catalog import Catalog # noqa: E402 from session_memory.curate.schema import Resolution, SolutionPattern # noqa: E402 from session_memory.retro.build import weekly_retro # noqa: E402 def _digest(uid, repo, ts, flavor="claude", retries=5): return { "session_uid": uid, "flavor": flavor, "repo": repo, "outcome": "fail", "started_at": ts, "event_count": 40, "first_prompt": "Fix the failing build and retry the suite", "cost": {"input_tokens": 100, "output_tokens": 10}, "tool_histogram": {"Bash": 20, "Edit": 12, "Read": 8}, "markers": {"errors": 0, "retries": retries, "test_runs": 0}, "error_snippets": [], } def test_window_excludes_old_sessions(): digs = [ _digest("claude:a", "r1", "2026-06-01T10:00:00Z"), _digest("claude:b", "r1", "2026-06-02T10:00:00Z"), _digest("claude:old", "r1", "2026-01-01T10:00:00Z"), # outside window ] r = weekly_retro(digs, since="2026-05-30T00:00:00Z", until="2026-06-08T00:00:00Z") assert r["n_sessions"] == 2 assert r["window"]["days"] == 7 def test_retry_storm_becomes_suggestion(): digs = [_digest(f"claude:{i}", "r1", "2026-06-0{}T10:00:00Z".format(i + 1)) for i in range(2)] r = weekly_retro(digs, since="2026-05-30T00:00:00Z", until="2026-06-08T00:00:00Z") s = r["suggestions"] assert s and s[0]["repo"] == "r1" assert s[0]["signal_type"] == "retry_storm" assert "Investigate" in s[0]["recommendation"] # no catalog -> default def test_recommendation_from_catalog(tmp_path): cat = Catalog(str(tmp_path / "catalog")) key = "problem:retry_storm:retries" cat.upsert(SolutionPattern( id=SolutionPattern.make_id(key), name="Retry storm", version="1.0.0", polarity="problem", problem="repeated retries", resolutions=[Resolution(summary="Stop and diagnose before retrying")])) digs = [_digest(f"claude:{i}", "r1", "2026-06-0{}T10:00:00Z".format(i + 1)) for i in range(2)] r = weekly_retro(digs, catalog=cat, since="2026-05-30T00:00:00Z", until="2026-06-08T00:00:00Z") assert r["suggestions"][0]["recommendation"] == "Stop and diagnose before retrying" def test_recurring_error_inherits_recommendation_via_covers(tmp_path): cat = Catalog(str(tmp_path / "catalog")) cat.upsert(SolutionPattern( id="sp-rbe", name="Read before edit", version="1.0.0", polarity="problem", problem="edit before read", resolutions=[Resolution(summary="Read the file first before Edit/Write")], covers=["file has not been read"])) digs = [] for i in range(2): d = _digest(f"claude:{i}", "r1", "2026-06-0{}T10:00:00Z".format(i + 1)) d["error_snippets"] = [{ "fingerprint": "file has not been read yet. read it first...", "sample": "File has not been read yet", "count": 2, "tool": "Edit"}] digs.append(d) r = weekly_retro(digs, catalog=cat, since="2026-05-30T00:00:00Z", until="2026-06-08T00:00:00Z") rec_err = [s for s in r["suggestions"] if s["signal_type"] == "recurring_error"] assert rec_err, "expected a recurring_error suggestion" assert rec_err[0]["recommendation"] == "Read the file first before Edit/Write" def test_caps_three_per_repo(): # five distinct problem signals in one repo -> capped at 3 digs = [] for i in range(2): d = _digest(f"claude:{i}", "r1", "2026-06-0{}T10:00:00Z".format(i + 1)) d["markers"] = {"errors": 5, "retries": 5, "test_runs": 0, "human_interventions": 0} d["tool_histogram"] = {"Bash": 120, "ToolSearch": 9, "mcp__state-hub__x": 30, "Edit": 5} d["outcome"] = "abandoned" digs.append(d) r = weekly_retro(digs, since="2026-05-30T00:00:00Z", until="2026-06-08T00:00:00Z") per_repo = [s for s in r["suggestions"] if s["repo"] == "r1"] assert len(per_repo) <= 3 def test_cross_flavor_ranks_first(): digs = [ _digest("claude:a", "r1", "2026-06-01T10:00:00Z", flavor="claude"), _digest("grok:b", "r2", "2026-06-02T10:00:00Z", flavor="grok"), ] r = weekly_retro(digs, since="2026-05-30T00:00:00Z", until="2026-06-08T00:00:00Z") assert r["suggestions"][0]["cross_flavor"] is True assert r["suggestions"][0]["priority"] == "high" def test_includes_measure_snapshot(): digs = [_digest(f"claude:{i}", "r1", "2026-06-0{}T10:00:00Z".format(i + 1)) for i in range(2)] r = weekly_retro(digs, since="2026-05-30T00:00:00Z", until="2026-06-08T00:00:00Z") assert r["measure"]["n_sessions"] == 2