diff --git a/session_memory/README.md b/session_memory/README.md index d4e629e..9c173bf 100644 --- a/session_memory/README.md +++ b/session_memory/README.md @@ -26,7 +26,14 @@ session_memory/ detect/signals.py # signal extractors over digests detect/cluster.py # cluster signals -> candidate patterns + cross-flavor flag detect/__main__.py # python -m session_memory.detect (ranked report) - config.toml # store paths, retention caps, sources, repo->domain map + curate/schema.py # SolutionPattern artifact + per-flavor rendering hints + curate/catalog.py # versioned, files-first Pattern Catalog (dedup on id) + curate/gating.py # promotion evidence bar + bloat guard + curate/review.py # discuss/approve/reject -> promote workflow + curate/decisions.py # hub decision audit trail (graceful local-queue fallback) + curate/__main__.py # python -m session_memory.curate (interactive / --auto-approve) + catalog/ # the committed Pattern Catalog (source of truth) + config.toml # store paths, retention caps, sources, repo->domain map, curate gate ``` The local store lives under `session_memory/.store/` (gitignored). @@ -71,6 +78,42 @@ Candidates are persisted to a Tier 2 `patterns` table and are the input to the Curate phase (Phase 2). Patterns whose evidence spans more than one agent flavor are flagged `[CROSS-FLAVOR]` — the highest-value reuse targets. +## Curate candidates into the Pattern Catalog + +Review detect candidates into versioned **Solution Patterns** held in the +files-first catalog (`session_memory/catalog/`). The flow is **detect → curate → +(Phase 3) distribute**; `curate` refreshes candidates by running detect first. + +```bash +python -m session_memory.curate # interactive review (a/r/d per candidate) +python -m session_memory.curate --auto-approve # batch: promote all that clear the evidence bar +python -m session_memory.curate --json # machine-readable result +``` + +- **Promotion** writes a `SolutionPattern` file (id = source candidate key, so + re-promoting the same candidate dedups; content changes bump the semver and + archive the prior version to `.history.jsonl`). +- The **evidence bar** (`[curate.gate]`) sets two floors: a promote floor and a + stricter *distribution* floor. A thin-but-real candidate lands `provisional`; + one clearing the distribution floor lands `approved` + `distribution_ready`. +- A **bloat guard** flags duplicate / near-duplicate candidates so the catalog + stays lean. +- Re-review is **idempotent** — a remembered decision is skipped unless the + candidate's evidence changed; a prior reject is not re-surfaced. +- Each final promote/reject is recorded as a **hub decision**; if the hub is + offline the decision is queued to `[curate].decision_queue` for later sync + (the same after-the-fact pattern used in Phase 1). + +### Curate knobs (`[curate]` / `[curate.gate]` in config.toml) + +| Key | Meaning | +|-----|---------| +| `catalog_dir` | committed Pattern Catalog dir (source of truth) | +| `review_log` / `decision_queue` | remembered decisions + pending hub decisions (gitignored) | +| `min_frequency` / `min_sessions` / `min_cost_impact` | floor to promote at all | +| `dist_require_cross_flavor` | require cross-flavor evidence to be distribution-eligible | +| `dist_min_frequency` / `dist_min_cost_impact` | stricter floor for `distribution_ready` | + ## Retention knobs (`[retention]` in config.toml) | Key | Meaning | @@ -86,7 +129,7 @@ exists, except the explicitly-reported hard-cap overflow path. ## Tests ```bash -python -m pytest # 26 tests: schema, adapter, store, digest, retention, ingest +python -m pytest # schema, adapters, store, digest, retention, ingest, detect, curate ``` ## Status @@ -95,5 +138,7 @@ python -m pytest # 26 tests: schema, adapter, store, digest, retention, adapter, ingest sweep. - **Phase 1** (AGENTIC-WP-0003): Codex + Grok adapters, multi-file session merge, and the Detect pipeline (signals → clustering → cross-flavor candidate patterns). -- **Next — Phase 2 (Curate):** review/approve candidates into a versioned pattern - catalog. **Phase 3 (Distribute) / Phase 4 (Measure)** follow per the PRD. +- **Phase 2** (AGENTIC-WP-0004): Curate — Solution Pattern schema, versioned + files-first Pattern Catalog, discuss/approve/reject review with an evidence bar + + bloat guard, and hub-decision audit trail. +- **Next — Phase 3 (Distribute) / Phase 4 (Measure)** follow per the PRD. diff --git a/session_memory/config.toml b/session_memory/config.toml index c0f989d..5ea5256 100644 --- a/session_memory/config.toml +++ b/session_memory/config.toml @@ -33,8 +33,10 @@ glob = "*/*/chat_history.jsonl" # Curate phase (AGENTIC-WP-0004): catalog location + promotion evidence bar. [curate] -catalog_dir = "session_memory/catalog" # files-first Pattern Catalog (committed) -review_log = "session_memory/.store/reviews.jsonl" # remembered decisions (gitignored) +catalog_dir = "session_memory/catalog" # files-first Pattern Catalog (committed) +review_log = "session_memory/.store/reviews.jsonl" # remembered decisions (gitignored) +decision_queue = "session_memory/.store/decisions.queue.jsonl" # hub decisions pending sync +state_hub_workstream_id = "b3703684-f60e-42f3-b03e-dabe3e8ce3f4" # AGENTIC-WP-0004 # Evidence bar (OQ5): floors to promote at all, and stricter floors to be # distribution-eligible (status=approved, distribution_ready=true). diff --git a/session_memory/curate/__main__.py b/session_memory/curate/__main__.py new file mode 100644 index 0000000..29e1373 --- /dev/null +++ b/session_memory/curate/__main__.py @@ -0,0 +1,130 @@ +"""Curate entrypoint (T06): review detect candidates into the Pattern Catalog. + + python -m session_memory.curate [--config PATH] [--auto-approve] [--json] + [--workstream-id ID] + +Refreshes candidate patterns (runs the detect pipeline), then drives them through +the review workflow — **interactive** by default, or **batch** with +``--auto-approve`` (promote everything clearing the evidence bar, reject the rest) +for kaizen-agent runs. Candidates are presented cross-flavor first (detect's +ranking). Emits a catalog diff summary and, with ``--json``, a machine-readable +result. Approvals land in the files-first catalog; each final decision is logged +as a hub decision (queued if the hub is down). +""" + +from __future__ import annotations + +import argparse +import json +import os + +from ..detect.__main__ import run_detect +from ..ingest import _expand, load_config +from .catalog import Catalog +from .decisions import DecisionRecorder +from .gating import bloat_warnings, evaluate, gate_config +from .review import APPROVE, DISCUSS, REJECT, ReviewLog, review + + +def _curate_paths(config: dict): + c = config.get("curate", {}) + catalog_dir = _expand(c.get("catalog_dir", "session_memory/catalog")) + review_log = _expand(c.get("review_log", "session_memory/.store/reviews.jsonl")) + queue = _expand(c.get("decision_queue", "session_memory/.store/decisions.queue.jsonl")) + ws_id = c.get("state_hub_workstream_id") + return catalog_dir, review_log, queue, ws_id + + +def _render_candidate(cand: dict, gate, existing) -> str: + g = evaluate(cand, gate) + flag = " [CROSS-FLAVOR]" if cand.get("cross_flavor") else "" + lines = [ + f"\n{cand['title']}{flag}", + f" key={cand['key']} score={cand.get('score')} freq={cand['frequency']} " + f"impact={cand.get('cost_impact')}", + f" flavors={','.join(cand.get('flavors', []))} " + f"repos={','.join(cand.get('repos', [])) or '-'} sessions={len(cand.get('sessions', []))}", + f" gate: promotable={g.promotable} distribution_ready={g.distribution_ready}" + + (f" ({'; '.join(g.reasons)})" if g.reasons else ""), + ] + for w in bloat_warnings(cand, existing): + lines.append(f" bloat: {w}") + return "\n".join(lines) + + +def _interactive_decider(gate, catalog): + def decide(cand): + print(_render_candidate(cand, gate, catalog.list())) + while True: + choice = input(" [a]pprove / [r]eject / [d]iscuss ? ").strip().lower() + if choice in ("a", "approve"): + return (APPROVE, input(" rationale: ").strip() or "approved") + if choice in ("r", "reject"): + return (REJECT, input(" rationale: ").strip() or "rejected") + if choice in ("d", "discuss"): + return (DISCUSS, "deferred for discussion") + return decide + + +def _auto_decider(gate): + """Batch policy: approve candidates clearing the promote floor, reject the rest.""" + def decide(cand): + g = evaluate(cand, gate) + if g.promotable: + return (APPROVE, "auto-approved: clears evidence bar") + return (REJECT, "auto-rejected: " + "; ".join(g.reasons)) + return decide + + +def _summary(result, n_candidates: int) -> str: + added = [k for k, a in result.approved if a in ("added", "versioned", "updated")] + lines = [ + f"# Curate summary ({n_candidates} candidates reviewed)", + f" approved : {len(result.approved)} ({', '.join(f'{k}:{a}' for k, a in result.approved) or '-'})", + f" rejected : {len(result.rejected)} ({', '.join(result.rejected) or '-'})", + f" deferred : {len(result.deferred)} ({', '.join(result.deferred) or '-'})", + f" skipped : {len(result.skipped)} (already decided)", + f" catalog writes: {len(added)}", + ] + return "\n".join(lines) + + +def main(argv=None) -> int: + here = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + ap = argparse.ArgumentParser(description="Curate detect candidates into the Pattern Catalog.") + ap.add_argument("--config", default=os.path.join(here, "config.toml")) + ap.add_argument("--auto-approve", action="store_true", + help="batch mode: promote everything clearing the evidence bar") + ap.add_argument("--min-frequency", type=int, default=2) + ap.add_argument("--workstream-id", default=None, help="hub workstream for decisions") + ap.add_argument("--json", action="store_true", help="emit machine-readable JSON") + args = ap.parse_args(argv) + + config = load_config(args.config) + candidates = run_detect(config, min_frequency=args.min_frequency) + + catalog_dir, review_log_path, queue_path, ws_id = _curate_paths(config) + gate = gate_config(config) + catalog = Catalog(catalog_dir) + log = ReviewLog(review_log_path) + recorder = DecisionRecorder(queue_path, workstream_id=args.workstream_id or ws_id) + + decide = _auto_decider(gate) if args.auto_approve else _interactive_decider(gate, catalog) + result = review(candidates, decide, catalog, log, gate=gate, recorder=recorder) + + if args.json: + print(json.dumps({ + "approved": result.approved, "rejected": result.rejected, + "deferred": result.deferred, "skipped": result.skipped, + "decisions_queued": len(recorder.pending()), + }, indent=2)) + else: + print(_summary(result, len(candidates))) + if recorder.pending(): + print(f" decisions queued (hub offline): {len(recorder.pending())} " + f"-> {queue_path}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tests/test_curate_entrypoint.py b/tests/test_curate_entrypoint.py new file mode 100644 index 0000000..c9b47d7 --- /dev/null +++ b/tests/test_curate_entrypoint.py @@ -0,0 +1,81 @@ +"""Curate entrypoint tests (T06): batch auto-approve end-to-end via the store.""" + +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from session_memory.core.store import Store # noqa: E402 +from session_memory.curate.__main__ import main # noqa: E402 +from session_memory.curate.catalog import Catalog # noqa: E402 + + +def _digest(uid, flavor, repo, **markers): + return { + "session_uid": uid, "flavor": flavor, "repo": repo, "outcome": "fail", + "cost": {"input_tokens": 10, "output_tokens": 1}, + "markers": {"errors": markers.get("errors", 0), "retries": markers.get("retries", 0), + "test_runs": 0, "edits": 0, "human_interventions": 0}, + } + + +def _write_config(tmp_path) -> str: + store = tmp_path / ".store" + catalog = tmp_path / "catalog" + cfg = f""" +[store] +db_path = "{store / 'm.db'}" +blob_dir = "{store / 'blobs'}" +cursor = "{store / 'c.json'}" + +[curate] +catalog_dir = "{catalog}" +review_log = "{store / 'reviews.jsonl'}" +decision_queue = "{store / 'decisions.queue.jsonl'}" + +[curate.gate] +min_frequency = 2 +min_sessions = 2 +""" + path = tmp_path / "config.toml" + path.write_text(cfg) + return str(path), str(store), str(catalog) + + +def test_auto_approve_promotes_cross_flavor(tmp_path, capsys): + cfg_path, store_dir, catalog_dir = _write_config(tmp_path) + st = Store(os.path.join(store_dir, "m.db"), os.path.join(store_dir, "blobs")) + st.write_digest("claude:a", _digest("claude:a", "claude", "r1", retries=5)) + st.write_digest("codex:b", _digest("codex:b", "codex", "r2", retries=4)) + st.close() + + rc = main(["--config", cfg_path, "--auto-approve"]) + assert rc == 0 + + cat = Catalog(catalog_dir) + patterns = cat.list() + assert len(patterns) == 1 + assert patterns[0].polarity == "problem" + # clears the promote floor (freq>=2) but below the default distribution + # floor (freq>=3) -> promoted as provisional, not distribution-ready + assert patterns[0].status == "provisional" + assert patterns[0].distribution_ready is False + + out = capsys.readouterr().out + assert "Curate summary" in out + # hub offline in tests -> decision queued + assert "decisions queued" in out + + +def test_rerun_is_idempotent(tmp_path): + cfg_path, store_dir, catalog_dir = _write_config(tmp_path) + st = Store(os.path.join(store_dir, "m.db"), os.path.join(store_dir, "blobs")) + st.write_digest("claude:a", _digest("claude:a", "claude", "r1", retries=5)) + st.write_digest("codex:b", _digest("codex:b", "codex", "r2", retries=4)) + st.close() + + main(["--config", cfg_path, "--auto-approve"]) + main(["--config", cfg_path, "--auto-approve"]) # second pass: already decided + cat = Catalog(catalog_dir) + assert len(cat.list()) == 1 + assert cat.load(cat.list()[0].id).version == "1.0.0" # no spurious bump diff --git a/workplans/AGENTIC-WP-0004-session-memory-phase2.md b/workplans/AGENTIC-WP-0004-session-memory-phase2.md index 578bcc3..9b9c507 100644 --- a/workplans/AGENTIC-WP-0004-session-memory-phase2.md +++ b/workplans/AGENTIC-WP-0004-session-memory-phase2.md @@ -129,7 +129,7 @@ audit trail. ```task id: AGENTIC-WP-0004-T06 -status: todo +status: done priority: medium state_hub_task_id: "95d7747e-8407-41af-9a60-b919a4ee5e06" ```