session-memory: weekly retro entrypoint + hub publish (AGENTIC-WP-0010)

The analysis half of the weekly coding retrospection. retro/build.py: windowed detect+measure -> top-3 improvement suggestions per repo (cross-flavor first, recommendations pulled from the Pattern Catalog) + fleet snapshot. retro/publish.py: publishes the report to the hub as the coding_retro read model (event_type= coding_retro progress event) + local JSON/md, graceful degrade. retro entrypoint with --window-days/--publish/--json. Live verify over real sessions surfaced per-repo suggestions with catalog recommendations. 13 new tests; suite 152/152. Consumed by activity-core ACTIVITY-WP-0008 (Weekly Coding Retrospection, Sat 19:00). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-07 19:17:24 +02:00
parent 15ba625351
commit 0d05dfcc5d
12 changed files with 932 additions and 0 deletions
--- a/tests/test_retro_build.py
+++ b/tests/test_retro_build.py
@@ -0,0 +1,86 @@
+"""Weekly retro report tests (AGENTIC-WP-0010 T01)."""
+
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from session_memory.curate.catalog import Catalog  # noqa: E402
+from session_memory.curate.schema import Resolution, SolutionPattern  # noqa: E402
+from session_memory.retro.build import weekly_retro  # noqa: E402
+
+
+def _digest(uid, repo, ts, flavor="claude", retries=5):
+    return {
+        "session_uid": uid, "flavor": flavor, "repo": repo, "outcome": "fail",
+        "started_at": ts, "event_count": 40,
+        "first_prompt": "Fix the failing build and retry the suite",
+        "cost": {"input_tokens": 100, "output_tokens": 10},
+        "tool_histogram": {"Bash": 20, "Edit": 12, "Read": 8},
+        "markers": {"errors": 0, "retries": retries, "test_runs": 0},
+        "error_snippets": [],
+    }
+
+
+def test_window_excludes_old_sessions():
+    digs = [
+        _digest("claude:a", "r1", "2026-06-01T10:00:00Z"),
+        _digest("claude:b", "r1", "2026-06-02T10:00:00Z"),
+        _digest("claude:old", "r1", "2026-01-01T10:00:00Z"),   # outside window
+    ]
+    r = weekly_retro(digs, since="2026-05-30T00:00:00Z", until="2026-06-08T00:00:00Z")
+    assert r["n_sessions"] == 2
+    assert r["window"]["days"] == 7
+
+
+def test_retry_storm_becomes_suggestion():
+    digs = [_digest(f"claude:{i}", "r1", "2026-06-0{}T10:00:00Z".format(i + 1))
+            for i in range(2)]
+    r = weekly_retro(digs, since="2026-05-30T00:00:00Z", until="2026-06-08T00:00:00Z")
+    s = r["suggestions"]
+    assert s and s[0]["repo"] == "r1"
+    assert s[0]["signal_type"] == "retry_storm"
+    assert "Investigate" in s[0]["recommendation"]  # no catalog -> default
+
+
+def test_recommendation_from_catalog(tmp_path):
+    cat = Catalog(str(tmp_path / "catalog"))
+    key = "problem:retry_storm:retries"
+    cat.upsert(SolutionPattern(
+        id=SolutionPattern.make_id(key), name="Retry storm", version="1.0.0",
+        polarity="problem", problem="repeated retries",
+        resolutions=[Resolution(summary="Stop and diagnose before retrying")]))
+    digs = [_digest(f"claude:{i}", "r1", "2026-06-0{}T10:00:00Z".format(i + 1)) for i in range(2)]
+    r = weekly_retro(digs, catalog=cat, since="2026-05-30T00:00:00Z", until="2026-06-08T00:00:00Z")
+    assert r["suggestions"][0]["recommendation"] == "Stop and diagnose before retrying"
+
+
+def test_caps_three_per_repo():
+    # five distinct problem signals in one repo -> capped at 3
+    digs = []
+    for i in range(2):
+        d = _digest(f"claude:{i}", "r1", "2026-06-0{}T10:00:00Z".format(i + 1))
+        d["markers"] = {"errors": 5, "retries": 5, "test_runs": 0, "human_interventions": 0}
+        d["tool_histogram"] = {"Bash": 120, "ToolSearch": 9,
+                               "mcp__state-hub__x": 30, "Edit": 5}
+        d["outcome"] = "abandoned"
+        digs.append(d)
+    r = weekly_retro(digs, since="2026-05-30T00:00:00Z", until="2026-06-08T00:00:00Z")
+    per_repo = [s for s in r["suggestions"] if s["repo"] == "r1"]
+    assert len(per_repo) <= 3
+
+
+def test_cross_flavor_ranks_first():
+    digs = [
+        _digest("claude:a", "r1", "2026-06-01T10:00:00Z", flavor="claude"),
+        _digest("grok:b", "r2", "2026-06-02T10:00:00Z", flavor="grok"),
+    ]
+    r = weekly_retro(digs, since="2026-05-30T00:00:00Z", until="2026-06-08T00:00:00Z")
+    assert r["suggestions"][0]["cross_flavor"] is True
+    assert r["suggestions"][0]["priority"] == "high"
+
+
+def test_includes_measure_snapshot():
+    digs = [_digest(f"claude:{i}", "r1", "2026-06-0{}T10:00:00Z".format(i + 1)) for i in range(2)]
+    r = weekly_retro(digs, since="2026-05-30T00:00:00Z", until="2026-06-08T00:00:00Z")
+    assert r["measure"]["n_sessions"] == 2
--- a/tests/test_retro_entrypoint.py
+++ b/tests/test_retro_entrypoint.py
@@ -0,0 +1,63 @@
+"""Retro entrypoint tests (AGENTIC-WP-0010 T03)."""
+
+import json
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from session_memory.core.store import Store  # noqa: E402
+from session_memory.retro.__main__ import main, run_retro  # noqa: E402
+
+
+def _digest(uid, repo, ts, retries=5):
+    return {
+        "session_uid": uid, "flavor": "claude", "repo": repo, "outcome": "fail",
+        "started_at": ts, "event_count": 40,
+        "first_prompt": "Fix the failing build and retry the suite repeatedly",
+        "cost": {"input_tokens": 100, "output_tokens": 10},
+        "tool_histogram": {"Bash": 20, "Edit": 12, "Read": 8},
+        "markers": {"errors": 0, "retries": retries, "test_runs": 0},
+        "error_snippets": [],
+    }
+
+
+def _config(tmp_path):
+    store = tmp_path / ".store"
+    toml = tmp_path / "config.toml"
+    toml.write_text(
+        f'[store]\ndb_path="{store / "m.db"}"\nblob_dir="{store / "blobs"}"\ncursor="{store / "c.json"}"\n'
+        f'[curate]\ncatalog_dir="{tmp_path / "catalog"}"\n'
+        f'[retro]\nwindow_days=7\nreport_json="{tmp_path / "r.json"}"\nreport_md="{tmp_path / "r.md"}"\n')
+    st = Store(str(store / "m.db"), str(store / "blobs"))
+    st.write_digest("claude:a", _digest("claude:a", "r1", "2026-06-01T10:00:00Z"))
+    st.write_digest("claude:b", _digest("claude:b", "r1", "2026-06-02T10:00:00Z"))
+    st.close()
+    return str(toml), tmp_path
+
+
+def test_run_retro_over_store(tmp_path):
+    from session_memory.ingest import load_config
+    cfg_path, _ = _config(tmp_path)
+    rep = run_retro(load_config(cfg_path), since="2026-05-30T00:00:00Z", until="2026-06-08T00:00:00Z")
+    assert rep["n_sessions"] == 2
+    assert rep["suggestions"]
+
+
+def test_main_writes_report_files(tmp_path, capsys):
+    cfg_path, tp = _config(tmp_path)
+    rc = main(["--config", cfg_path, "--since", "2026-05-30T00:00:00Z",
+               "--until", "2026-06-08T00:00:00Z"])
+    assert rc == 0
+    assert os.path.exists(str(tp / "r.json")) and os.path.exists(str(tp / "r.md"))
+    assert "Weekly Coding Retro" in capsys.readouterr().out
+
+
+def test_main_json(tmp_path, capsys):
+    cfg_path, _ = _config(tmp_path)
+    rc = main(["--config", cfg_path, "--since", "2026-05-30T00:00:00Z",
+               "--until", "2026-06-08T00:00:00Z", "--json"])
+    assert rc == 0
+    data = json.loads(capsys.readouterr().out)
+    assert data["report"]["n_sessions"] == 2
+    assert data["published"] is None  # no --publish
--- a/tests/test_retro_publish.py
+++ b/tests/test_retro_publish.py
@@ -0,0 +1,62 @@
+"""Retro publish tests (AGENTIC-WP-0010 T02)."""
+
+import json
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from session_memory.retro.publish import (  # noqa: E402
+    publish_to_hub,
+    render_markdown,
+    write_local,
+)
+
+
+def _report():
+    return {
+        "window": {"since": "2026-06-01T00:00:00Z", "until": "2026-06-08T00:00:00Z", "days": 7},
+        "generated_at": "2026-06-08T19:00:00Z", "n_sessions": 12,
+        "suggestions": [
+            {"repo": "state-hub", "title": "schema thrash", "recommendation": "front-load schemas",
+             "priority": "high", "score": 632.0, "cross_flavor": False, "signal_type": "schema_thrash"},
+        ],
+        "measure": {"infra_overhead_share_median": 0.117, "error_rate": 0.96,
+                    "schema_thrash_sessions": 8, "success_rate": 1.0, "tokens_p50": 250725},
+    }
+
+
+def test_render_markdown():
+    md = render_markdown(_report())
+    assert "Weekly Coding Retro" in md
+    assert "**state-hub**" in md and "front-load schemas" in md
+    assert "infra-overhead median: 0.117" in md
+
+
+def test_write_local_json_and_md(tmp_path):
+    jp = str(tmp_path / "out" / "retro.json")
+    mp = str(tmp_path / "out" / "retro.md")
+    write_local(_report(), jp, mp)
+    assert json.load(open(jp))["n_sessions"] == 12
+    assert "Weekly Coding Retro" in open(mp).read()
+
+
+def test_publish_calls_poster_with_coding_retro_event():
+    captured = {}
+
+    def poster(url, payload):
+        captured["url"] = url
+        captured["payload"] = payload
+
+    ok = publish_to_hub(_report(), base_url="http://hub", poster=poster)
+    assert ok is True
+    assert captured["url"] == "http://hub/progress/"
+    assert captured["payload"]["event_type"] == "coding_retro"
+    assert captured["payload"]["detail"]["n_sessions"] == 12
+
+
+def test_publish_degrades_gracefully_on_failure():
+    def boom(url, payload):
+        raise OSError("hub down")
+
+    assert publish_to_hub(_report(), poster=boom) is False