generated from coulomb/repo-seed
session-memory: weekly retro entrypoint + hub publish (AGENTIC-WP-0010)
The analysis half of the weekly coding retrospection. retro/build.py: windowed detect+measure -> top-3 improvement suggestions per repo (cross-flavor first, recommendations pulled from the Pattern Catalog) + fleet snapshot. retro/publish.py: publishes the report to the hub as the coding_retro read model (event_type= coding_retro progress event) + local JSON/md, graceful degrade. retro entrypoint with --window-days/--publish/--json. Live verify over real sessions surfaced per-repo suggestions with catalog recommendations. 13 new tests; suite 152/152. Consumed by activity-core ACTIVITY-WP-0008 (Weekly Coding Retrospection, Sat 19:00). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
86
tests/test_retro_build.py
Normal file
86
tests/test_retro_build.py
Normal file
@@ -0,0 +1,86 @@
|
||||
"""Weekly retro report tests (AGENTIC-WP-0010 T01)."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from session_memory.curate.catalog import Catalog # noqa: E402
|
||||
from session_memory.curate.schema import Resolution, SolutionPattern # noqa: E402
|
||||
from session_memory.retro.build import weekly_retro # noqa: E402
|
||||
|
||||
|
||||
def _digest(uid, repo, ts, flavor="claude", retries=5):
|
||||
return {
|
||||
"session_uid": uid, "flavor": flavor, "repo": repo, "outcome": "fail",
|
||||
"started_at": ts, "event_count": 40,
|
||||
"first_prompt": "Fix the failing build and retry the suite",
|
||||
"cost": {"input_tokens": 100, "output_tokens": 10},
|
||||
"tool_histogram": {"Bash": 20, "Edit": 12, "Read": 8},
|
||||
"markers": {"errors": 0, "retries": retries, "test_runs": 0},
|
||||
"error_snippets": [],
|
||||
}
|
||||
|
||||
|
||||
def test_window_excludes_old_sessions():
|
||||
digs = [
|
||||
_digest("claude:a", "r1", "2026-06-01T10:00:00Z"),
|
||||
_digest("claude:b", "r1", "2026-06-02T10:00:00Z"),
|
||||
_digest("claude:old", "r1", "2026-01-01T10:00:00Z"), # outside window
|
||||
]
|
||||
r = weekly_retro(digs, since="2026-05-30T00:00:00Z", until="2026-06-08T00:00:00Z")
|
||||
assert r["n_sessions"] == 2
|
||||
assert r["window"]["days"] == 7
|
||||
|
||||
|
||||
def test_retry_storm_becomes_suggestion():
|
||||
digs = [_digest(f"claude:{i}", "r1", "2026-06-0{}T10:00:00Z".format(i + 1))
|
||||
for i in range(2)]
|
||||
r = weekly_retro(digs, since="2026-05-30T00:00:00Z", until="2026-06-08T00:00:00Z")
|
||||
s = r["suggestions"]
|
||||
assert s and s[0]["repo"] == "r1"
|
||||
assert s[0]["signal_type"] == "retry_storm"
|
||||
assert "Investigate" in s[0]["recommendation"] # no catalog -> default
|
||||
|
||||
|
||||
def test_recommendation_from_catalog(tmp_path):
|
||||
cat = Catalog(str(tmp_path / "catalog"))
|
||||
key = "problem:retry_storm:retries"
|
||||
cat.upsert(SolutionPattern(
|
||||
id=SolutionPattern.make_id(key), name="Retry storm", version="1.0.0",
|
||||
polarity="problem", problem="repeated retries",
|
||||
resolutions=[Resolution(summary="Stop and diagnose before retrying")]))
|
||||
digs = [_digest(f"claude:{i}", "r1", "2026-06-0{}T10:00:00Z".format(i + 1)) for i in range(2)]
|
||||
r = weekly_retro(digs, catalog=cat, since="2026-05-30T00:00:00Z", until="2026-06-08T00:00:00Z")
|
||||
assert r["suggestions"][0]["recommendation"] == "Stop and diagnose before retrying"
|
||||
|
||||
|
||||
def test_caps_three_per_repo():
|
||||
# five distinct problem signals in one repo -> capped at 3
|
||||
digs = []
|
||||
for i in range(2):
|
||||
d = _digest(f"claude:{i}", "r1", "2026-06-0{}T10:00:00Z".format(i + 1))
|
||||
d["markers"] = {"errors": 5, "retries": 5, "test_runs": 0, "human_interventions": 0}
|
||||
d["tool_histogram"] = {"Bash": 120, "ToolSearch": 9,
|
||||
"mcp__state-hub__x": 30, "Edit": 5}
|
||||
d["outcome"] = "abandoned"
|
||||
digs.append(d)
|
||||
r = weekly_retro(digs, since="2026-05-30T00:00:00Z", until="2026-06-08T00:00:00Z")
|
||||
per_repo = [s for s in r["suggestions"] if s["repo"] == "r1"]
|
||||
assert len(per_repo) <= 3
|
||||
|
||||
|
||||
def test_cross_flavor_ranks_first():
|
||||
digs = [
|
||||
_digest("claude:a", "r1", "2026-06-01T10:00:00Z", flavor="claude"),
|
||||
_digest("grok:b", "r2", "2026-06-02T10:00:00Z", flavor="grok"),
|
||||
]
|
||||
r = weekly_retro(digs, since="2026-05-30T00:00:00Z", until="2026-06-08T00:00:00Z")
|
||||
assert r["suggestions"][0]["cross_flavor"] is True
|
||||
assert r["suggestions"][0]["priority"] == "high"
|
||||
|
||||
|
||||
def test_includes_measure_snapshot():
|
||||
digs = [_digest(f"claude:{i}", "r1", "2026-06-0{}T10:00:00Z".format(i + 1)) for i in range(2)]
|
||||
r = weekly_retro(digs, since="2026-05-30T00:00:00Z", until="2026-06-08T00:00:00Z")
|
||||
assert r["measure"]["n_sessions"] == 2
|
||||
Reference in New Issue
Block a user