session-memory: weekly retro entrypoint + hub publish (AGENTIC-WP-0010)

The analysis half of the weekly coding retrospection. retro/build.py: windowed
detect+measure -> top-3 improvement suggestions per repo (cross-flavor first,
recommendations pulled from the Pattern Catalog) + fleet snapshot. retro/publish.py:
publishes the report to the hub as the coding_retro read model (event_type=
coding_retro progress event) + local JSON/md, graceful degrade. retro entrypoint
with --window-days/--publish/--json. Live verify over real sessions surfaced
per-repo suggestions with catalog recommendations. 13 new tests; suite 152/152.

Consumed by activity-core ACTIVITY-WP-0008 (Weekly Coding Retrospection, Sat 19:00).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-07 19:17:24 +02:00
parent 15ba625351
commit 0d05dfcc5d
12 changed files with 932 additions and 0 deletions

86
tests/test_retro_build.py Normal file
View File

@@ -0,0 +1,86 @@
"""Weekly retro report tests (AGENTIC-WP-0010 T01)."""
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from session_memory.curate.catalog import Catalog # noqa: E402
from session_memory.curate.schema import Resolution, SolutionPattern # noqa: E402
from session_memory.retro.build import weekly_retro # noqa: E402
def _digest(uid, repo, ts, flavor="claude", retries=5):
return {
"session_uid": uid, "flavor": flavor, "repo": repo, "outcome": "fail",
"started_at": ts, "event_count": 40,
"first_prompt": "Fix the failing build and retry the suite",
"cost": {"input_tokens": 100, "output_tokens": 10},
"tool_histogram": {"Bash": 20, "Edit": 12, "Read": 8},
"markers": {"errors": 0, "retries": retries, "test_runs": 0},
"error_snippets": [],
}
def test_window_excludes_old_sessions():
digs = [
_digest("claude:a", "r1", "2026-06-01T10:00:00Z"),
_digest("claude:b", "r1", "2026-06-02T10:00:00Z"),
_digest("claude:old", "r1", "2026-01-01T10:00:00Z"), # outside window
]
r = weekly_retro(digs, since="2026-05-30T00:00:00Z", until="2026-06-08T00:00:00Z")
assert r["n_sessions"] == 2
assert r["window"]["days"] == 7
def test_retry_storm_becomes_suggestion():
digs = [_digest(f"claude:{i}", "r1", "2026-06-0{}T10:00:00Z".format(i + 1))
for i in range(2)]
r = weekly_retro(digs, since="2026-05-30T00:00:00Z", until="2026-06-08T00:00:00Z")
s = r["suggestions"]
assert s and s[0]["repo"] == "r1"
assert s[0]["signal_type"] == "retry_storm"
assert "Investigate" in s[0]["recommendation"] # no catalog -> default
def test_recommendation_from_catalog(tmp_path):
cat = Catalog(str(tmp_path / "catalog"))
key = "problem:retry_storm:retries"
cat.upsert(SolutionPattern(
id=SolutionPattern.make_id(key), name="Retry storm", version="1.0.0",
polarity="problem", problem="repeated retries",
resolutions=[Resolution(summary="Stop and diagnose before retrying")]))
digs = [_digest(f"claude:{i}", "r1", "2026-06-0{}T10:00:00Z".format(i + 1)) for i in range(2)]
r = weekly_retro(digs, catalog=cat, since="2026-05-30T00:00:00Z", until="2026-06-08T00:00:00Z")
assert r["suggestions"][0]["recommendation"] == "Stop and diagnose before retrying"
def test_caps_three_per_repo():
# five distinct problem signals in one repo -> capped at 3
digs = []
for i in range(2):
d = _digest(f"claude:{i}", "r1", "2026-06-0{}T10:00:00Z".format(i + 1))
d["markers"] = {"errors": 5, "retries": 5, "test_runs": 0, "human_interventions": 0}
d["tool_histogram"] = {"Bash": 120, "ToolSearch": 9,
"mcp__state-hub__x": 30, "Edit": 5}
d["outcome"] = "abandoned"
digs.append(d)
r = weekly_retro(digs, since="2026-05-30T00:00:00Z", until="2026-06-08T00:00:00Z")
per_repo = [s for s in r["suggestions"] if s["repo"] == "r1"]
assert len(per_repo) <= 3
def test_cross_flavor_ranks_first():
digs = [
_digest("claude:a", "r1", "2026-06-01T10:00:00Z", flavor="claude"),
_digest("grok:b", "r2", "2026-06-02T10:00:00Z", flavor="grok"),
]
r = weekly_retro(digs, since="2026-05-30T00:00:00Z", until="2026-06-08T00:00:00Z")
assert r["suggestions"][0]["cross_flavor"] is True
assert r["suggestions"][0]["priority"] == "high"
def test_includes_measure_snapshot():
digs = [_digest(f"claude:{i}", "r1", "2026-06-0{}T10:00:00Z".format(i + 1)) for i in range(2)]
r = weekly_retro(digs, since="2026-05-30T00:00:00Z", until="2026-06-08T00:00:00Z")
assert r["measure"]["n_sessions"] == 2

View File

@@ -0,0 +1,63 @@
"""Retro entrypoint tests (AGENTIC-WP-0010 T03)."""
import json
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from session_memory.core.store import Store # noqa: E402
from session_memory.retro.__main__ import main, run_retro # noqa: E402
def _digest(uid, repo, ts, retries=5):
return {
"session_uid": uid, "flavor": "claude", "repo": repo, "outcome": "fail",
"started_at": ts, "event_count": 40,
"first_prompt": "Fix the failing build and retry the suite repeatedly",
"cost": {"input_tokens": 100, "output_tokens": 10},
"tool_histogram": {"Bash": 20, "Edit": 12, "Read": 8},
"markers": {"errors": 0, "retries": retries, "test_runs": 0},
"error_snippets": [],
}
def _config(tmp_path):
store = tmp_path / ".store"
toml = tmp_path / "config.toml"
toml.write_text(
f'[store]\ndb_path="{store / "m.db"}"\nblob_dir="{store / "blobs"}"\ncursor="{store / "c.json"}"\n'
f'[curate]\ncatalog_dir="{tmp_path / "catalog"}"\n'
f'[retro]\nwindow_days=7\nreport_json="{tmp_path / "r.json"}"\nreport_md="{tmp_path / "r.md"}"\n')
st = Store(str(store / "m.db"), str(store / "blobs"))
st.write_digest("claude:a", _digest("claude:a", "r1", "2026-06-01T10:00:00Z"))
st.write_digest("claude:b", _digest("claude:b", "r1", "2026-06-02T10:00:00Z"))
st.close()
return str(toml), tmp_path
def test_run_retro_over_store(tmp_path):
from session_memory.ingest import load_config
cfg_path, _ = _config(tmp_path)
rep = run_retro(load_config(cfg_path), since="2026-05-30T00:00:00Z", until="2026-06-08T00:00:00Z")
assert rep["n_sessions"] == 2
assert rep["suggestions"]
def test_main_writes_report_files(tmp_path, capsys):
cfg_path, tp = _config(tmp_path)
rc = main(["--config", cfg_path, "--since", "2026-05-30T00:00:00Z",
"--until", "2026-06-08T00:00:00Z"])
assert rc == 0
assert os.path.exists(str(tp / "r.json")) and os.path.exists(str(tp / "r.md"))
assert "Weekly Coding Retro" in capsys.readouterr().out
def test_main_json(tmp_path, capsys):
cfg_path, _ = _config(tmp_path)
rc = main(["--config", cfg_path, "--since", "2026-05-30T00:00:00Z",
"--until", "2026-06-08T00:00:00Z", "--json"])
assert rc == 0
data = json.loads(capsys.readouterr().out)
assert data["report"]["n_sessions"] == 2
assert data["published"] is None # no --publish

View File

@@ -0,0 +1,62 @@
"""Retro publish tests (AGENTIC-WP-0010 T02)."""
import json
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from session_memory.retro.publish import ( # noqa: E402
publish_to_hub,
render_markdown,
write_local,
)
def _report():
return {
"window": {"since": "2026-06-01T00:00:00Z", "until": "2026-06-08T00:00:00Z", "days": 7},
"generated_at": "2026-06-08T19:00:00Z", "n_sessions": 12,
"suggestions": [
{"repo": "state-hub", "title": "schema thrash", "recommendation": "front-load schemas",
"priority": "high", "score": 632.0, "cross_flavor": False, "signal_type": "schema_thrash"},
],
"measure": {"infra_overhead_share_median": 0.117, "error_rate": 0.96,
"schema_thrash_sessions": 8, "success_rate": 1.0, "tokens_p50": 250725},
}
def test_render_markdown():
md = render_markdown(_report())
assert "Weekly Coding Retro" in md
assert "**state-hub**" in md and "front-load schemas" in md
assert "infra-overhead median: 0.117" in md
def test_write_local_json_and_md(tmp_path):
jp = str(tmp_path / "out" / "retro.json")
mp = str(tmp_path / "out" / "retro.md")
write_local(_report(), jp, mp)
assert json.load(open(jp))["n_sessions"] == 12
assert "Weekly Coding Retro" in open(mp).read()
def test_publish_calls_poster_with_coding_retro_event():
captured = {}
def poster(url, payload):
captured["url"] = url
captured["payload"] = payload
ok = publish_to_hub(_report(), base_url="http://hub", poster=poster)
assert ok is True
assert captured["url"] == "http://hub/progress/"
assert captured["payload"]["event_type"] == "coding_retro"
assert captured["payload"]["detail"]["n_sessions"] == 12
def test_publish_degrades_gracefully_on_failure():
def boom(url, payload):
raise OSError("hub down")
assert publish_to_hub(_report(), poster=boom) is False