From 0d05dfcc5dd6a466e22b9aa33878b30bba745623 Mon Sep 17 00:00:00 2001 From: tegwick Date: Sun, 7 Jun 2026 19:17:24 +0200 Subject: [PATCH] session-memory: weekly retro entrypoint + hub publish (AGENTIC-WP-0010) The analysis half of the weekly coding retrospection. retro/build.py: windowed detect+measure -> top-3 improvement suggestions per repo (cross-flavor first, recommendations pulled from the Pattern Catalog) + fleet snapshot. retro/publish.py: publishes the report to the hub as the coding_retro read model (event_type= coding_retro progress event) + local JSON/md, graceful degrade. retro entrypoint with --window-days/--publish/--json. Live verify over real sessions surfaced per-repo suggestions with catalog recommendations. 13 new tests; suite 152/152. Consumed by activity-core ACTIVITY-WP-0008 (Weekly Coding Retrospection, Sat 19:00). Co-Authored-By: Claude Opus 4.8 --- session_memory/README.md | 21 ++ session_memory/config.toml | 8 + session_memory/retro/__init__.py | 9 + session_memory/retro/__main__.py | 68 +++++ session_memory/retro/build.py | 100 +++++++ session_memory/retro/last_retro.json | 322 ++++++++++++++++++++++ session_memory/retro/last_retro.md | 39 +++ session_memory/retro/publish.py | 78 ++++++ tests/test_retro_build.py | 86 ++++++ tests/test_retro_entrypoint.py | 63 +++++ tests/test_retro_publish.py | 62 +++++ workplans/AGENTIC-WP-0010-weekly-retro.md | 76 +++++ 12 files changed, 932 insertions(+) create mode 100644 session_memory/retro/__init__.py create mode 100644 session_memory/retro/__main__.py create mode 100644 session_memory/retro/build.py create mode 100644 session_memory/retro/last_retro.json create mode 100644 session_memory/retro/last_retro.md create mode 100644 session_memory/retro/publish.py create mode 100644 tests/test_retro_build.py create mode 100644 tests/test_retro_entrypoint.py create mode 100644 tests/test_retro_publish.py create mode 100644 workplans/AGENTIC-WP-0010-weekly-retro.md diff --git a/session_memory/README.md b/session_memory/README.md index 65ec1e3..9c0ac9b 100644 --- a/session_memory/README.md +++ b/session_memory/README.md @@ -42,6 +42,9 @@ session_memory/ measure/metrics.py # fleet metrics + persisted baseline snapshots measure/effect.py # before/after per-pattern effectiveness measure/__main__.py # python -m session_memory.measure + retro/build.py # windowed top-3-per-repo suggestions + retro/publish.py # hub coding_retro read model + local report + retro/__main__.py # python -m session_memory.retro config.toml # store paths, retention caps, sources, repo->domain map, curate gate ``` @@ -163,6 +166,24 @@ python -m session_memory.measure --no-save --json retired. Recorded pre-fix baseline (2026-06-07): 27 sessions, infra-overhead median 11.7 %, error rate 0.96, schema-thrash 8 sessions. +## Weekly retro (the input to the scheduled retrospection) + +A windowed roll-up: detect + measure over the last N days → the **top-3 +improvement suggestions per repo** (cross-flavor first; recommendations pulled +from the Pattern Catalog) → published to the hub as the `coding_retro` read model. + +```bash +python -m session_memory.retro # last 7 days, local report +python -m session_memory.retro --window-days 30 --json +python -m session_memory.retro --publish # also post coding_retro to the hub +``` + +Writes `retro/last_retro.{json,md}` and (with `--publish`) posts an +`event_type=coding_retro` progress event. This is consumed by activity-core's +**Weekly Coding Retrospection** schedule (ACTIVITY-WP-0008, Saturday 19:00 Berlin), +which emits one improvement task per relevant repo. Hub publish degrades +gracefully when the hub is unreachable. + ## Retention knobs (`[retention]` in config.toml) | Key | Meaning | diff --git a/session_memory/config.toml b/session_memory/config.toml index 9ff43b1..de6c48f 100644 --- a/session_memory/config.toml +++ b/session_memory/config.toml @@ -43,6 +43,14 @@ min_prompt_len = 25 # first prompt shorter than this is treated as trivial [measure] baselines = "session_memory/measure/baselines.jsonl" # timestamped metric snapshots (committed) +# Weekly retro (AGENTIC-WP-0010): windowed top-3-per-repo report, published to the +# hub as the coding_retro read model that activity-core's weekly schedule consumes. +[retro] +window_days = 7 +report_json = "session_memory/retro/last_retro.json" # latest report (committed) +report_md = "session_memory/retro/last_retro.md" # human-readable mirror +hub_url = "http://127.0.0.1:8000" # for --publish (best-effort) + # Distribute phase (AGENTIC-WP-0007): where per-flavor proposals + the active # registry are written. Proposals are HITL — reviewed, never auto-applied. [distribute] diff --git a/session_memory/retro/__init__.py b/session_memory/retro/__init__.py new file mode 100644 index 0000000..5c7c693 --- /dev/null +++ b/session_memory/retro/__init__.py @@ -0,0 +1,9 @@ +"""Weekly retro (AGENTIC-WP-0010) — the analysis half of the coding retrospection. + + build.py windowed detect + measure -> ranked top-3 suggestions per repo (T01) + publish.py publish the retro to the hub read model + local report (T02) + __main__.py python -m session_memory.retro (T03) + +Consumed by activity-core's weekly-coding-retro schedule (ACTIVITY-WP-0008) via +the ``event_type=coding_retro`` read model. +""" diff --git a/session_memory/retro/__main__.py b/session_memory/retro/__main__.py new file mode 100644 index 0000000..82510bb --- /dev/null +++ b/session_memory/retro/__main__.py @@ -0,0 +1,68 @@ +"""Weekly retro entrypoint (AGENTIC-WP-0010 T03). + + python -m session_memory.retro [--window-days 7] [--since D] [--until D] + [--publish] [--json] + +Builds the windowed top-3-per-repo retro over the captured sessions, writes a local +JSON + markdown report, and (with ``--publish``) posts it to the hub as the +``coding_retro`` read model that activity-core's weekly schedule consumes. +""" + +from __future__ import annotations + +import argparse +import json +import os + +from ..core.store import Store +from ..curate.catalog import Catalog +from ..ingest import _expand, load_config +from .build import weekly_retro +from .publish import publish_to_hub, render_markdown, write_local + + +def run_retro(config: dict, *, window_days=None, since=None, until=None): + s = config.get("store", {}) + store = Store(_expand(s["db_path"]), _expand(s["blob_dir"])) + digests = store.list_digests() + store.close() + cur = config.get("curate", {}) + catalog = Catalog(_expand(cur.get("catalog_dir", "session_memory/catalog"))) + rcfg = config.get("retro", {}) + return weekly_retro(digests, catalog, since=since, until=until, + window_days=window_days or rcfg.get("window_days", 7)) + + +def main(argv=None) -> int: + here = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + ap = argparse.ArgumentParser(description="Build (and optionally publish) the weekly coding retro.") + ap.add_argument("--config", default=os.path.join(here, "config.toml")) + ap.add_argument("--window-days", type=int, default=None) + ap.add_argument("--since", default=None) + ap.add_argument("--until", default=None) + ap.add_argument("--publish", action="store_true", help="post to the hub coding_retro read model") + ap.add_argument("--json", action="store_true") + args = ap.parse_args(argv) + + config = load_config(args.config) + report = run_retro(config, window_days=args.window_days, since=args.since, until=args.until) + + rcfg = config.get("retro", {}) + write_local(report, _expand(rcfg.get("report_json", "session_memory/retro/last_retro.json")), + _expand(rcfg.get("report_md", "session_memory/retro/last_retro.md"))) + + published = None + if args.publish: + published = publish_to_hub(report, base_url=rcfg.get("hub_url", "http://127.0.0.1:8000")) + + if args.json: + print(json.dumps({"report": report, "published": published}, indent=2)) + else: + print(render_markdown(report)) + if args.publish: + print(f"\npublished to hub: {published}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/session_memory/retro/build.py b/session_memory/retro/build.py new file mode 100644 index 0000000..5cce9a0 --- /dev/null +++ b/session_memory/retro/build.py @@ -0,0 +1,100 @@ +"""Windowed weekly retro report (AGENTIC-WP-0010 T01). + +Runs the existing detect pipeline over a date window, ranks the recurring problem +patterns into **per-repo improvement suggestions** (top 3, cross-flavor first), +attaches a recommendation from the Pattern Catalog where one exists, and bundles a +fleet measure snapshot for context. Pure function over digests — the entrypoint +(T03) handles store/publish. +""" + +from __future__ import annotations + +import collections +from dataclasses import asdict, dataclass +from datetime import datetime, timedelta, timezone +from typing import Optional + +from ..curate.schema import SolutionPattern +from ..detect.cluster import cluster +from ..detect.quality import QualityConfig, filter_real +from ..detect.signals import extract_signals +from ..measure.metrics import aggregate + +# score at/above which a suggestion is "high" priority even when single-flavor +_HIGH_SCORE = 100.0 + + +def _parse(ts: str) -> datetime: + return datetime.fromisoformat(ts.replace("Z", "+00:00")) + + +def _iso(dt: datetime) -> str: + return dt.astimezone(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + + +def _now() -> datetime: + return datetime.now(timezone.utc) + + +@dataclass +class Suggestion: + repo: str + title: str + recommendation: str + priority: str # high | medium + score: float + signal_type: str + cross_flavor: bool + pattern_key: str + + +def _recommendation(pattern_key: str, catalog) -> Optional[str]: + if catalog is None: + return None + sp = catalog.load(SolutionPattern.make_id(pattern_key)) + if sp and sp.resolutions: + return sp.resolutions[0].summary + return None + + +def weekly_retro(digests: list[dict], catalog=None, *, since: Optional[str] = None, + until: Optional[str] = None, window_days: int = 7, + max_per_repo: int = 3, min_frequency: int = 2, + quality: Optional[QualityConfig] = None) -> dict: + """Build the ranked weekly retro report over a date window.""" + until_dt = _parse(until) if until else _now() + since_dt = _parse(since) if since else until_dt - timedelta(days=window_days) + + windowed = [d for d in digests + if d.get("started_at") and since_dt <= _parse(d["started_at"]) < until_dt] + real = filter_real(windowed, quality or QualityConfig()) + + patterns = cluster(extract_signals(real), min_frequency=min_frequency) + + by_repo: dict[str, list[Suggestion]] = collections.defaultdict(list) + for p in patterns: + if p.polarity != "problem": + continue # improvements come from problems + rec = (_recommendation(p.key, catalog) + or f"Investigate {p.signal_type.replace('_', ' ')} on {p.locus}") + priority = "high" if (p.cross_flavor or p.score >= _HIGH_SCORE) else "medium" + for repo in (p.repos or ["(unknown)"]): + by_repo[repo].append(Suggestion( + repo=repo, title=p.title, recommendation=rec, priority=priority, + score=p.score, signal_type=p.signal_type, cross_flavor=p.cross_flavor, + pattern_key=p.key)) + + suggestions: list[Suggestion] = [] + for repo in sorted(by_repo): + items = sorted(by_repo[repo], key=lambda s: -s.score) + suggestions.extend(items[:max_per_repo]) + # cross-flavor first, then by score (global ordering for the report) + suggestions.sort(key=lambda s: (not s.cross_flavor, -s.score)) + + return { + "window": {"since": _iso(since_dt), "until": _iso(until_dt), "days": window_days}, + "generated_at": _iso(_now()), + "n_sessions": len(real), + "suggestions": [asdict(s) for s in suggestions], + "measure": aggregate(real), + } diff --git a/session_memory/retro/last_retro.json b/session_memory/retro/last_retro.json new file mode 100644 index 0000000..317c9ca --- /dev/null +++ b/session_memory/retro/last_retro.json @@ -0,0 +1,322 @@ +{ + "generated_at": "2026-06-07T17:14:00Z", + "measure": { + "error_rate": 0.957, + "infra_overhead_share_median": 0.167, + "infra_overhead_share_p90": 0.23, + "n_sessions": 23, + "recurring_error_occurrences": 463, + "schema_thrash_sessions": 7, + "success_rate": 1.0, + "tokens_p50": 250725, + "tokens_p90": 901422 + }, + "n_sessions": 23, + "suggestions": [ + { + "cross_flavor": true, + "pattern_key": "problem:recurring_error:make: *** [makefile:: fix-consistency] error ", + "priority": "high", + "recommendation": "Investigate recurring error on make: *** [makefile:: fix-consistency] error ", + "repo": "net-kingdom", + "score": 54.0, + "signal_type": "recurring_error", + "title": "cross-flavor problem: recurring error" + }, + { + "cross_flavor": false, + "pattern_key": "problem:tool_thrash:tool:Bash", + "priority": "high", + "recommendation": "Batch related shell work into one script, not many small Bash calls", + "repo": "activity-core", + "score": 13128.0, + "signal_type": "tool_thrash", + "title": "problem: tool thrash" + }, + { + "cross_flavor": false, + "pattern_key": "problem:tool_thrash:tool:Bash", + "priority": "high", + "recommendation": "Batch related shell work into one script, not many small Bash calls", + "repo": "artifact-store", + "score": 13128.0, + "signal_type": "tool_thrash", + "title": "problem: tool thrash" + }, + { + "cross_flavor": false, + "pattern_key": "problem:tool_thrash:tool:Bash", + "priority": "high", + "recommendation": "Batch related shell work into one script, not many small Bash calls", + "repo": "citation-evidence", + "score": 13128.0, + "signal_type": "tool_thrash", + "title": "problem: tool thrash" + }, + { + "cross_flavor": false, + "pattern_key": "problem:tool_thrash:tool:Bash", + "priority": "high", + "recommendation": "Batch related shell work into one script, not many small Bash calls", + "repo": "infospace-bench", + "score": 13128.0, + "signal_type": "tool_thrash", + "title": "problem: tool thrash" + }, + { + "cross_flavor": false, + "pattern_key": "problem:tool_thrash:tool:Bash", + "priority": "high", + "recommendation": "Batch related shell work into one script, not many small Bash calls", + "repo": "railiance-apps", + "score": 13128.0, + "signal_type": "tool_thrash", + "title": "problem: tool thrash" + }, + { + "cross_flavor": false, + "pattern_key": "problem:tool_thrash:tool:Bash", + "priority": "high", + "recommendation": "Batch related shell work into one script, not many small Bash calls", + "repo": "state-hub", + "score": 13128.0, + "signal_type": "tool_thrash", + "title": "problem: tool thrash" + }, + { + "cross_flavor": false, + "pattern_key": "problem:schema_thrash:schema_load", + "priority": "high", + "recommendation": "Load the tool schemas you'll need once, up front", + "repo": "activity-core", + "score": 441.0, + "signal_type": "schema_thrash", + "title": "problem: schema thrash" + }, + { + "cross_flavor": false, + "pattern_key": "problem:schema_thrash:schema_load", + "priority": "high", + "recommendation": "Load the tool schemas you'll need once, up front", + "repo": "citation-evidence", + "score": 441.0, + "signal_type": "schema_thrash", + "title": "problem: schema thrash" + }, + { + "cross_flavor": false, + "pattern_key": "problem:schema_thrash:schema_load", + "priority": "high", + "recommendation": "Load the tool schemas you'll need once, up front", + "repo": "flex-auth", + "score": 441.0, + "signal_type": "schema_thrash", + "title": "problem: schema thrash" + }, + { + "cross_flavor": false, + "pattern_key": "problem:schema_thrash:schema_load", + "priority": "high", + "recommendation": "Load the tool schemas you'll need once, up front", + "repo": "infospace-bench", + "score": 441.0, + "signal_type": "schema_thrash", + "title": "problem: schema thrash" + }, + { + "cross_flavor": false, + "pattern_key": "problem:schema_thrash:schema_load", + "priority": "high", + "recommendation": "Load the tool schemas you'll need once, up front", + "repo": "ops-bridge", + "score": 441.0, + "signal_type": "schema_thrash", + "title": "problem: schema thrash" + }, + { + "cross_flavor": false, + "pattern_key": "problem:recurring_error:file has not been read yet. read it first before writing to it.<>", + "priority": "high", + "recommendation": "Investigate recurring error on file has not been read yet. read it first before writing to it.<>", + "repo": "activity-core", + "score": 290.0, + "signal_type": "recurring_error", + "title": "problem: recurring error" + }, + { + "cross_flavor": false, + "pattern_key": "problem:recurring_error:file has not been read yet. read it first before writing to it.<>", + "priority": "high", + "recommendation": "Investigate recurring error on file has not been read yet. read it first before writing to it.<>", + "repo": "citation-evidence", + "score": 290.0, + "signal_type": "recurring_error", + "title": "problem: recurring error" + }, + { + "cross_flavor": false, + "pattern_key": "problem:recurring_error:file has not been read yet. read it first before writing to it.<>", + "priority": "high", + "recommendation": "Investigate recurring error on file has not been read yet. read it first before writing to it.<>", + "repo": "infospace-bench", + "score": 290.0, + "signal_type": "recurring_error", + "title": "problem: recurring error" + }, + { + "cross_flavor": false, + "pattern_key": "problem:recurring_error:file has not been read yet. read it first before writing to it.<>", + "priority": "high", + "recommendation": "Investigate recurring error on file has not been read yet. read it first before writing to it.<>", + "repo": "issue-facade", + "score": 290.0, + "signal_type": "recurring_error", + "title": "problem: recurring error" + }, + { + "cross_flavor": false, + "pattern_key": "problem:recurring_error:file has not been read yet. read it first before writing to it.<>", + "priority": "high", + "recommendation": "Investigate recurring error on file has not been read yet. read it first before writing to it.<>", + "repo": "railiance-apps", + "score": 290.0, + "signal_type": "recurring_error", + "title": "problem: recurring error" + }, + { + "cross_flavor": false, + "pattern_key": "problem:recurring_error:file has not been read yet. read it first before writing to it.<>", + "priority": "high", + "recommendation": "Investigate recurring error on file has not been read yet. read it first before writing to it.<>", + "repo": "state-hub", + "score": 290.0, + "signal_type": "recurring_error", + "title": "problem: recurring error" + }, + { + "cross_flavor": false, + "pattern_key": "problem:recurring_error:file has not been read yet. read it first before writing to it.<>", + "priority": "high", + "recommendation": "Investigate recurring error on file has not been read yet. read it first before writing to it.<>", + "repo": "the-custodian", + "score": 290.0, + "signal_type": "recurring_error", + "title": "problem: recurring error" + }, + { + "cross_flavor": false, + "pattern_key": "problem:recurring_error:file has not been read yet. read it first before writing to it.<>", + "priority": "high", + "recommendation": "Investigate recurring error on file has not been read yet. read it first before writing to it.<>", + "repo": "vergabe-teilnahme", + "score": 290.0, + "signal_type": "recurring_error", + "title": "problem: recurring error" + }, + { + "cross_flavor": false, + "pattern_key": "problem:recurring_error:file has been modified since read, either by the user or by a linter. read it again before attempting to write it.<>", + "priority": "medium", + "recommendation": "Investigate recurring error on file has been modified since read, either by the user or by a linter. read it again before attempting to write it.<>", + "repo": "artifact-store", + "score": 78.0, + "signal_type": "recurring_error", + "title": "problem: recurring error" + }, + { + "cross_flavor": false, + "pattern_key": "problem:recurring_error:file has been modified since read, either by the user or by a linter. read it again before attempting to write it.<>", + "priority": "medium", + "recommendation": "Investigate recurring error on file has been modified since read, either by the user or by a linter. read it again before attempting to write it.<>", + "repo": "issue-facade", + "score": 78.0, + "signal_type": "recurring_error", + "title": "problem: recurring error" + }, + { + "cross_flavor": false, + "pattern_key": "problem:recurring_error:file has been modified since read, either by the user or by a linter. read it again before attempting to write it.<>", + "priority": "medium", + "recommendation": "Investigate recurring error on file has been modified since read, either by the user or by a linter. read it again before attempting to write it.<>", + "repo": "railiance-apps", + "score": 78.0, + "signal_type": "recurring_error", + "title": "problem: recurring error" + }, + { + "cross_flavor": false, + "pattern_key": "problem:recurring_error:file has been modified since read, either by the user or by a linter. read it again before attempting to write it.<>", + "priority": "medium", + "recommendation": "Investigate recurring error on file has been modified since read, either by the user or by a linter. read it again before attempting to write it.<>", + "repo": "state-hub", + "score": 78.0, + "signal_type": "recurring_error", + "title": "problem: recurring error" + }, + { + "cross_flavor": false, + "pattern_key": "problem:budget_overrun:tokens", + "priority": "medium", + "recommendation": "Read narrowly \u2014 target the region you need, not whole large files", + "repo": "artifact-store", + "score": 50.55, + "signal_type": "budget_overrun", + "title": "problem: budget overrun" + }, + { + "cross_flavor": false, + "pattern_key": "problem:recurring_error:{", + "priority": "medium", + "recommendation": "Investigate recurring error on {", + "repo": "vergabe-teilnahme", + "score": 12.0, + "signal_type": "recurring_error", + "title": "problem: recurring error" + }, + { + "cross_flavor": false, + "pattern_key": "problem:recurring_error:found errors ( fixed, remaining).", + "priority": "medium", + "recommendation": "Investigate recurring error on found errors ( fixed, remaining).", + "repo": "ops-bridge", + "score": 10.0, + "signal_type": "recurring_error", + "title": "problem: recurring error" + }, + { + "cross_flavor": false, + "pattern_key": "problem:recurring_error:(note: edit also tried swapping \\uxxxx escapes and their characters; neither form matched, so the mismatch is likely elsewhere in old_string. re-read the file a", + "priority": "medium", + "recommendation": "Investigate recurring error on (note: edit also tried swapping \\uxxxx escapes and their characters; neither form matched, so the mismatch is likely elsewhere in old_string. re-read the file a", + "repo": "net-kingdom", + "score": 6.0, + "signal_type": "recurring_error", + "title": "problem: recurring error" + }, + { + "cross_flavor": false, + "pattern_key": "problem:recurring_error:found error ( fixed, remaining).", + "priority": "medium", + "recommendation": "Investigate recurring error on found error ( fixed, remaining).", + "repo": "ops-bridge", + "score": 6.0, + "signal_type": "recurring_error", + "title": "problem: recurring error" + }, + { + "cross_flavor": false, + "pattern_key": "problem:recurring_error: failed, passed in .00s", + "priority": "medium", + "recommendation": "Investigate recurring error on failed, passed in .00s", + "repo": "agentic-resources", + "score": 4.0, + "signal_type": "recurring_error", + "title": "problem: recurring error" + } + ], + "window": { + "days": 30, + "since": "2026-05-08T17:14:00Z", + "until": "2026-06-07T17:14:00Z" + } +} diff --git a/session_memory/retro/last_retro.md b/session_memory/retro/last_retro.md new file mode 100644 index 0000000..539955b --- /dev/null +++ b/session_memory/retro/last_retro.md @@ -0,0 +1,39 @@ +# Weekly Coding Retro (2026-05-08 → 2026-06-07) +_23 real sessions · generated 2026-06-07T17:14:00Z_ + +## Top improvement suggestions (cross-flavor first, ≤3 per repo) +- **net-kingdom** (high, score=54.0) [CROSS-FLAVOR]: cross-flavor problem: recurring error — Investigate recurring error on make: *** [makefile:: fix-consistency] error +- **activity-core** (high, score=13128.0): problem: tool thrash — Batch related shell work into one script, not many small Bash calls +- **artifact-store** (high, score=13128.0): problem: tool thrash — Batch related shell work into one script, not many small Bash calls +- **citation-evidence** (high, score=13128.0): problem: tool thrash — Batch related shell work into one script, not many small Bash calls +- **infospace-bench** (high, score=13128.0): problem: tool thrash — Batch related shell work into one script, not many small Bash calls +- **railiance-apps** (high, score=13128.0): problem: tool thrash — Batch related shell work into one script, not many small Bash calls +- **state-hub** (high, score=13128.0): problem: tool thrash — Batch related shell work into one script, not many small Bash calls +- **activity-core** (high, score=441.0): problem: schema thrash — Load the tool schemas you'll need once, up front +- **citation-evidence** (high, score=441.0): problem: schema thrash — Load the tool schemas you'll need once, up front +- **flex-auth** (high, score=441.0): problem: schema thrash — Load the tool schemas you'll need once, up front +- **infospace-bench** (high, score=441.0): problem: schema thrash — Load the tool schemas you'll need once, up front +- **ops-bridge** (high, score=441.0): problem: schema thrash — Load the tool schemas you'll need once, up front +- **activity-core** (high, score=290.0): problem: recurring error — Investigate recurring error on file has not been read yet. read it first before writing to it.<> +- **citation-evidence** (high, score=290.0): problem: recurring error — Investigate recurring error on file has not been read yet. read it first before writing to it.<> +- **infospace-bench** (high, score=290.0): problem: recurring error — Investigate recurring error on file has not been read yet. read it first before writing to it.<> +- **issue-facade** (high, score=290.0): problem: recurring error — Investigate recurring error on file has not been read yet. read it first before writing to it.<> +- **railiance-apps** (high, score=290.0): problem: recurring error — Investigate recurring error on file has not been read yet. read it first before writing to it.<> +- **state-hub** (high, score=290.0): problem: recurring error — Investigate recurring error on file has not been read yet. read it first before writing to it.<> +- **the-custodian** (high, score=290.0): problem: recurring error — Investigate recurring error on file has not been read yet. read it first before writing to it.<> +- **vergabe-teilnahme** (high, score=290.0): problem: recurring error — Investigate recurring error on file has not been read yet. read it first before writing to it.<> +- **artifact-store** (medium, score=78.0): problem: recurring error — Investigate recurring error on file has been modified since read, either by the user or by a linter. read it again before attempting to write it.<> +- **issue-facade** (medium, score=78.0): problem: recurring error — Investigate recurring error on file has been modified since read, either by the user or by a linter. read it again before attempting to write it.<> +- **railiance-apps** (medium, score=78.0): problem: recurring error — Investigate recurring error on file has been modified since read, either by the user or by a linter. read it again before attempting to write it.<> +- **state-hub** (medium, score=78.0): problem: recurring error — Investigate recurring error on file has been modified since read, either by the user or by a linter. read it again before attempting to write it.<> +- **artifact-store** (medium, score=50.55): problem: budget overrun — Read narrowly — target the region you need, not whole large files +- **vergabe-teilnahme** (medium, score=12.0): problem: recurring error — Investigate recurring error on { +- **ops-bridge** (medium, score=10.0): problem: recurring error — Investigate recurring error on found errors ( fixed, remaining). +- **net-kingdom** (medium, score=6.0): problem: recurring error — Investigate recurring error on (note: edit also tried swapping \uxxxx escapes and their characters; neither form matched, so the mismatch is likely elsewhere in old_string. re-read the file a +- **ops-bridge** (medium, score=6.0): problem: recurring error — Investigate recurring error on found error ( fixed, remaining). +- **agentic-resources** (medium, score=4.0): problem: recurring error — Investigate recurring error on failed, passed in .00s + +## Fleet snapshot +- infra-overhead median: 0.167 +- error rate: 0.957 · schema-thrash: 7 +- success rate: 1.0 · tokens p50: 250725 diff --git a/session_memory/retro/publish.py b/session_memory/retro/publish.py new file mode 100644 index 0000000..0db64b1 --- /dev/null +++ b/session_memory/retro/publish.py @@ -0,0 +1,78 @@ +"""Publish the weekly retro (AGENTIC-WP-0010 T02). + +The retro is published to the State Hub as a **read model** — a progress event of +``event_type=coding_retro`` whose ``detail`` carries the structured report. This is +exactly how ``daily-triage-report`` surfaces, and it is what activity-core's +``coding_retro`` resolver (ACTIVITY-WP-0008) reads. A local JSON + markdown report +is always written; the hub publish is best-effort and **degrades gracefully** when +the hub is unreachable. +""" + +from __future__ import annotations + +import json +import os +import urllib.request +from typing import Callable, Optional + +DEFAULT_HUB = "http://127.0.0.1:8000" + + +def render_markdown(report: dict) -> str: + w = report.get("window", {}) + lines = [ + f"# Weekly Coding Retro ({w.get('since', '')[:10]} → {w.get('until', '')[:10]})", + f"_{report.get('n_sessions', 0)} real sessions · generated {report.get('generated_at', '')}_", + "", + "## Top improvement suggestions (cross-flavor first, ≤3 per repo)", + ] + if not report.get("suggestions"): + lines.append("- (no recurring problems above threshold this week)") + for s in report.get("suggestions", []): + flag = " [CROSS-FLAVOR]" if s.get("cross_flavor") else "" + lines.append(f"- **{s['repo']}** ({s['priority']}, score={s['score']}){flag}: " + f"{s['title']} — {s['recommendation']}") + m = report.get("measure", {}) + lines += ["", "## Fleet snapshot", + f"- infra-overhead median: {m.get('infra_overhead_share_median')}", + f"- error rate: {m.get('error_rate')} · schema-thrash: {m.get('schema_thrash_sessions')}", + f"- success rate: {m.get('success_rate')} · tokens p50: {m.get('tokens_p50')}"] + return "\n".join(lines) + + +def write_local(report: dict, json_path: str, md_path: Optional[str] = None) -> None: + os.makedirs(os.path.dirname(json_path) or ".", exist_ok=True) + with open(json_path, "w", encoding="utf-8") as fh: + json.dump(report, fh, indent=2, sort_keys=True) + fh.write("\n") + if md_path: + with open(md_path, "w", encoding="utf-8") as fh: + fh.write(render_markdown(report)) + fh.write("\n") + + +def _http_post(url: str, payload: dict) -> None: + req = urllib.request.Request(url, data=json.dumps(payload).encode(), + headers={"Content-Type": "application/json"}, method="POST") + with urllib.request.urlopen(req, timeout=10) as r: + r.read() + + +def publish_to_hub(report: dict, *, base_url: str = DEFAULT_HUB, + poster: Optional[Callable[[str, dict], None]] = None) -> bool: + """POST the retro as an event_type=coding_retro progress event. Best-effort.""" + poster = poster or _http_post + n = report.get("n_sessions", 0) + k = len(report.get("suggestions", [])) + payload = { + "event_type": "coding_retro", + "author": "helix-forge", + "summary": f"Weekly coding retro: {k} ranked suggestions across " + f"{report.get('window', {}).get('days', 7)} days ({n} sessions).", + "detail": report, + } + try: + poster(f"{base_url.rstrip('/')}/progress/", payload) + return True + except Exception: + return False diff --git a/tests/test_retro_build.py b/tests/test_retro_build.py new file mode 100644 index 0000000..ec36baa --- /dev/null +++ b/tests/test_retro_build.py @@ -0,0 +1,86 @@ +"""Weekly retro report tests (AGENTIC-WP-0010 T01).""" + +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from session_memory.curate.catalog import Catalog # noqa: E402 +from session_memory.curate.schema import Resolution, SolutionPattern # noqa: E402 +from session_memory.retro.build import weekly_retro # noqa: E402 + + +def _digest(uid, repo, ts, flavor="claude", retries=5): + return { + "session_uid": uid, "flavor": flavor, "repo": repo, "outcome": "fail", + "started_at": ts, "event_count": 40, + "first_prompt": "Fix the failing build and retry the suite", + "cost": {"input_tokens": 100, "output_tokens": 10}, + "tool_histogram": {"Bash": 20, "Edit": 12, "Read": 8}, + "markers": {"errors": 0, "retries": retries, "test_runs": 0}, + "error_snippets": [], + } + + +def test_window_excludes_old_sessions(): + digs = [ + _digest("claude:a", "r1", "2026-06-01T10:00:00Z"), + _digest("claude:b", "r1", "2026-06-02T10:00:00Z"), + _digest("claude:old", "r1", "2026-01-01T10:00:00Z"), # outside window + ] + r = weekly_retro(digs, since="2026-05-30T00:00:00Z", until="2026-06-08T00:00:00Z") + assert r["n_sessions"] == 2 + assert r["window"]["days"] == 7 + + +def test_retry_storm_becomes_suggestion(): + digs = [_digest(f"claude:{i}", "r1", "2026-06-0{}T10:00:00Z".format(i + 1)) + for i in range(2)] + r = weekly_retro(digs, since="2026-05-30T00:00:00Z", until="2026-06-08T00:00:00Z") + s = r["suggestions"] + assert s and s[0]["repo"] == "r1" + assert s[0]["signal_type"] == "retry_storm" + assert "Investigate" in s[0]["recommendation"] # no catalog -> default + + +def test_recommendation_from_catalog(tmp_path): + cat = Catalog(str(tmp_path / "catalog")) + key = "problem:retry_storm:retries" + cat.upsert(SolutionPattern( + id=SolutionPattern.make_id(key), name="Retry storm", version="1.0.0", + polarity="problem", problem="repeated retries", + resolutions=[Resolution(summary="Stop and diagnose before retrying")])) + digs = [_digest(f"claude:{i}", "r1", "2026-06-0{}T10:00:00Z".format(i + 1)) for i in range(2)] + r = weekly_retro(digs, catalog=cat, since="2026-05-30T00:00:00Z", until="2026-06-08T00:00:00Z") + assert r["suggestions"][0]["recommendation"] == "Stop and diagnose before retrying" + + +def test_caps_three_per_repo(): + # five distinct problem signals in one repo -> capped at 3 + digs = [] + for i in range(2): + d = _digest(f"claude:{i}", "r1", "2026-06-0{}T10:00:00Z".format(i + 1)) + d["markers"] = {"errors": 5, "retries": 5, "test_runs": 0, "human_interventions": 0} + d["tool_histogram"] = {"Bash": 120, "ToolSearch": 9, + "mcp__state-hub__x": 30, "Edit": 5} + d["outcome"] = "abandoned" + digs.append(d) + r = weekly_retro(digs, since="2026-05-30T00:00:00Z", until="2026-06-08T00:00:00Z") + per_repo = [s for s in r["suggestions"] if s["repo"] == "r1"] + assert len(per_repo) <= 3 + + +def test_cross_flavor_ranks_first(): + digs = [ + _digest("claude:a", "r1", "2026-06-01T10:00:00Z", flavor="claude"), + _digest("grok:b", "r2", "2026-06-02T10:00:00Z", flavor="grok"), + ] + r = weekly_retro(digs, since="2026-05-30T00:00:00Z", until="2026-06-08T00:00:00Z") + assert r["suggestions"][0]["cross_flavor"] is True + assert r["suggestions"][0]["priority"] == "high" + + +def test_includes_measure_snapshot(): + digs = [_digest(f"claude:{i}", "r1", "2026-06-0{}T10:00:00Z".format(i + 1)) for i in range(2)] + r = weekly_retro(digs, since="2026-05-30T00:00:00Z", until="2026-06-08T00:00:00Z") + assert r["measure"]["n_sessions"] == 2 diff --git a/tests/test_retro_entrypoint.py b/tests/test_retro_entrypoint.py new file mode 100644 index 0000000..0820c12 --- /dev/null +++ b/tests/test_retro_entrypoint.py @@ -0,0 +1,63 @@ +"""Retro entrypoint tests (AGENTIC-WP-0010 T03).""" + +import json +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from session_memory.core.store import Store # noqa: E402 +from session_memory.retro.__main__ import main, run_retro # noqa: E402 + + +def _digest(uid, repo, ts, retries=5): + return { + "session_uid": uid, "flavor": "claude", "repo": repo, "outcome": "fail", + "started_at": ts, "event_count": 40, + "first_prompt": "Fix the failing build and retry the suite repeatedly", + "cost": {"input_tokens": 100, "output_tokens": 10}, + "tool_histogram": {"Bash": 20, "Edit": 12, "Read": 8}, + "markers": {"errors": 0, "retries": retries, "test_runs": 0}, + "error_snippets": [], + } + + +def _config(tmp_path): + store = tmp_path / ".store" + toml = tmp_path / "config.toml" + toml.write_text( + f'[store]\ndb_path="{store / "m.db"}"\nblob_dir="{store / "blobs"}"\ncursor="{store / "c.json"}"\n' + f'[curate]\ncatalog_dir="{tmp_path / "catalog"}"\n' + f'[retro]\nwindow_days=7\nreport_json="{tmp_path / "r.json"}"\nreport_md="{tmp_path / "r.md"}"\n') + st = Store(str(store / "m.db"), str(store / "blobs")) + st.write_digest("claude:a", _digest("claude:a", "r1", "2026-06-01T10:00:00Z")) + st.write_digest("claude:b", _digest("claude:b", "r1", "2026-06-02T10:00:00Z")) + st.close() + return str(toml), tmp_path + + +def test_run_retro_over_store(tmp_path): + from session_memory.ingest import load_config + cfg_path, _ = _config(tmp_path) + rep = run_retro(load_config(cfg_path), since="2026-05-30T00:00:00Z", until="2026-06-08T00:00:00Z") + assert rep["n_sessions"] == 2 + assert rep["suggestions"] + + +def test_main_writes_report_files(tmp_path, capsys): + cfg_path, tp = _config(tmp_path) + rc = main(["--config", cfg_path, "--since", "2026-05-30T00:00:00Z", + "--until", "2026-06-08T00:00:00Z"]) + assert rc == 0 + assert os.path.exists(str(tp / "r.json")) and os.path.exists(str(tp / "r.md")) + assert "Weekly Coding Retro" in capsys.readouterr().out + + +def test_main_json(tmp_path, capsys): + cfg_path, _ = _config(tmp_path) + rc = main(["--config", cfg_path, "--since", "2026-05-30T00:00:00Z", + "--until", "2026-06-08T00:00:00Z", "--json"]) + assert rc == 0 + data = json.loads(capsys.readouterr().out) + assert data["report"]["n_sessions"] == 2 + assert data["published"] is None # no --publish diff --git a/tests/test_retro_publish.py b/tests/test_retro_publish.py new file mode 100644 index 0000000..8ace8cd --- /dev/null +++ b/tests/test_retro_publish.py @@ -0,0 +1,62 @@ +"""Retro publish tests (AGENTIC-WP-0010 T02).""" + +import json +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from session_memory.retro.publish import ( # noqa: E402 + publish_to_hub, + render_markdown, + write_local, +) + + +def _report(): + return { + "window": {"since": "2026-06-01T00:00:00Z", "until": "2026-06-08T00:00:00Z", "days": 7}, + "generated_at": "2026-06-08T19:00:00Z", "n_sessions": 12, + "suggestions": [ + {"repo": "state-hub", "title": "schema thrash", "recommendation": "front-load schemas", + "priority": "high", "score": 632.0, "cross_flavor": False, "signal_type": "schema_thrash"}, + ], + "measure": {"infra_overhead_share_median": 0.117, "error_rate": 0.96, + "schema_thrash_sessions": 8, "success_rate": 1.0, "tokens_p50": 250725}, + } + + +def test_render_markdown(): + md = render_markdown(_report()) + assert "Weekly Coding Retro" in md + assert "**state-hub**" in md and "front-load schemas" in md + assert "infra-overhead median: 0.117" in md + + +def test_write_local_json_and_md(tmp_path): + jp = str(tmp_path / "out" / "retro.json") + mp = str(tmp_path / "out" / "retro.md") + write_local(_report(), jp, mp) + assert json.load(open(jp))["n_sessions"] == 12 + assert "Weekly Coding Retro" in open(mp).read() + + +def test_publish_calls_poster_with_coding_retro_event(): + captured = {} + + def poster(url, payload): + captured["url"] = url + captured["payload"] = payload + + ok = publish_to_hub(_report(), base_url="http://hub", poster=poster) + assert ok is True + assert captured["url"] == "http://hub/progress/" + assert captured["payload"]["event_type"] == "coding_retro" + assert captured["payload"]["detail"]["n_sessions"] == 12 + + +def test_publish_degrades_gracefully_on_failure(): + def boom(url, payload): + raise OSError("hub down") + + assert publish_to_hub(_report(), poster=boom) is False diff --git a/workplans/AGENTIC-WP-0010-weekly-retro.md b/workplans/AGENTIC-WP-0010-weekly-retro.md new file mode 100644 index 0000000..b5a063e --- /dev/null +++ b/workplans/AGENTIC-WP-0010-weekly-retro.md @@ -0,0 +1,76 @@ +--- +id: AGENTIC-WP-0010 +type: workplan +title: "Coding Session Memory — Weekly Retro entrypoint + hub publish" +domain: helix_forge +repo: agentic-resources +status: finished +owner: codex +topic_slug: helix-forge +created: "2026-06-07" +updated: "2026-06-07" +state_hub_workstream_id: "6b9816e4-65bc-4fc7-b8e1-33f4edd51e7a" +--- + +# Coding Session Memory — Weekly Retro entrypoint + hub publish + +The **analysis half** of a weekly coding retrospection. A windowed retro runs +detect + measure over the previous week, ranks the **top-3 improvement +suggestions per repo** (impact × frequency, cross-flavor first; recommendations +pulled from the Pattern Catalog), and **publishes the ranked result to the State +Hub as a read model** (an `event_type=coding_retro` progress event, mirroring how +`daily-triage-report` publishes). + +This is the dependency that activity-core's weekly schedule consumes +(`activity-wp-0008` — *Weekly Coding Retrospection schedule*). Keeping the analysis +here and publishing to the hub keeps activity-core decoupled from the +workstation-local session store. + +## Windowed Weekly Retro Report (top-3 per repo) + +```task +id: AGENTIC-WP-0010-T01 +status: done +priority: high +state_hub_task_id: "34d30250-c0d3-4837-81c7-1c858c2ee801" +``` + +`retro/build.py`: window digests by date (last N days), run +`extract_signals` + `cluster` over the window, explode problem patterns per repo, +rank by score and cap at **3 per repo**. Attach a recommendation per suggestion +from the Pattern Catalog (lookup by pattern key → first resolution) with a sensible +default. Include a fleet measure snapshot for context. Pure function over digests; +unit-tested. + +## Publish Retro to the Hub + Local Report + +```task +id: AGENTIC-WP-0010-T02 +status: done +priority: high +state_hub_task_id: "cbe1288a-ce51-48c0-b741-adf4a6cbce3a" +``` + +Publish the ranked retro to the State Hub as a read model: POST a progress event +(`event_type=coding_retro`) with the structured report (`suggestions[]`, window, +`generated_at`) in `detail`. Also write a local JSON + markdown report. **Graceful +degrade** when the hub is unreachable (write local, skip publish). Hub URL under +`[retro]` in `config.toml`. + +## Retro Entrypoint + Tests + Live Verify + +```task +id: AGENTIC-WP-0010-T03 +status: done +priority: medium +state_hub_task_id: "af540220-58dd-4cf5-a9dc-6db4b995fa08" +``` + +`python -m session_memory.retro [--window-days 7] [--publish] [--json]`: windowed +retro → ranked top-3 per repo → optional hub publish + local report. Document in +`session_memory/README.md`. Live verify over the real local sessions. After +workplan updates, notify the operator to run from `~/state-hub`: + +```bash +make fix-consistency REPO=agentic-resources +```