session-memory Phase 2: review workflow (T03)

UI-free discuss/approve/reject engine driving detect candidates into the catalog via a decide callback. candidate_to_pattern builds a provisional SolutionPattern with per-flavor rendering-hint stubs. ReviewLog makes re-review idempotent: prior rejects remembered, re-surfaced only when the evidence fingerprint changes. 6 new tests; suite 58/58 green. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-07 00:25:10 +02:00
parent c6164a82ba
commit e51fd8154d
3 changed files with 229 additions and 1 deletions
--- a/session_memory/curate/review.py
+++ b/session_memory/curate/review.py
@@ -0,0 +1,135 @@
+"""Curation review workflow (FR-U1/FR-U2; T03).
+
+Drives Phase 1 detect candidates through a **discuss / approve / reject** review
+and, on approve, promotes the candidate into a :class:`SolutionPattern` written to
+the :class:`Catalog`. The actual decision is supplied by a ``decide`` callback so
+this engine stays UI-free — the ``__main__`` entrypoint (T06) plugs in interactive
+or batch (auto-approve) logic.
+
+Re-review is **idempotent** via a :class:`ReviewLog`: a candidate already decided
+is skipped unless its *evidence fingerprint* changed (new sessions/frequency), so
+a prior **reject** is remembered and not re-surfaced, and a prior **approve** is
+updated in place rather than duplicated (catalog dedup does the rest).
+"""
+
+from __future__ import annotations
+
+import hashlib
+import json
+import os
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from typing import Callable, Optional
+
+from .catalog import Catalog
+from .schema import Provenance, Resolution, Scope, SolutionPattern
+
+APPROVE = "approve"
+REJECT = "reject"
+DISCUSS = "discuss"  # defer — no final decision recorded
+
+# Default per-flavor rendering-hint stubs a reviewer can later refine (OQ4).
+_DEFAULT_TARGET = {"claude": "CLAUDE.md", "codex": "AGENTS.md", "grok": "instructions"}
+
+# A decision callback: (candidate dict) -> (action, rationale)
+Decider = Callable[[dict], tuple]
+
+
+def _now() -> str:
+    return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
+
+
+def evidence_fingerprint(candidate: dict) -> str:
+    """Stable hash of the evidence that would justify (re)reviewing a candidate."""
+    keys = ("frequency", "cost_impact", "flavors", "repos", "sessions", "cross_flavor")
+    payload = {k: candidate.get(k) for k in keys}
+    return hashlib.sha1(json.dumps(payload, sort_keys=True).encode("utf-8")).hexdigest()
+
+
+def candidate_to_pattern(candidate: dict) -> SolutionPattern:
+    """Build a (provisional) Solution Pattern from a detect candidate."""
+    src = candidate["key"]
+    flavors = list(candidate.get("flavors", []))
+    hints = {f: {"target": _DEFAULT_TARGET.get(f, ""), "note": "TODO: refine rendering"}
+             for f in flavors}
+    return SolutionPattern(
+        id=SolutionPattern.make_id(src),
+        name=candidate.get("title") or src,
+        version="1.0.0",
+        polarity=candidate.get("polarity", "problem"),
+        problem=candidate.get("title") or src,
+        resolutions=[Resolution(summary="TODO: capture the recommended resolution")],
+        scope=Scope(flavors=flavors, repos=list(candidate.get("repos", []))),
+        provenance=Provenance(source_key=src, evidence=dict(candidate), promoted_at=_now()),
+        rendering_hints=hints,
+        status="provisional",
+    )
+
+
+@dataclass
+class ReviewLog:
+    """Append-only record of final decisions, keyed by candidate source key."""
+
+    path: str
+    _by_key: dict = field(default_factory=dict)
+
+    def __post_init__(self) -> None:
+        if os.path.exists(self.path):
+            with open(self.path, encoding="utf-8") as fh:
+                for line in fh:
+                    if line.strip():
+                        rec = json.loads(line)
+                        self._by_key[rec["source_key"]] = rec  # last write wins
+
+    def prior(self, source_key: str) -> Optional[dict]:
+        return self._by_key.get(source_key)
+
+    def already_decided(self, candidate: dict) -> bool:
+        rec = self._by_key.get(candidate["key"])
+        return bool(rec) and rec["fingerprint"] == evidence_fingerprint(candidate)
+
+    def record(self, candidate: dict, action: str, rationale: str) -> None:
+        rec = {
+            "source_key": candidate["key"],
+            "action": action,
+            "rationale": rationale,
+            "fingerprint": evidence_fingerprint(candidate),
+            "ts": _now(),
+        }
+        self._by_key[candidate["key"]] = rec
+        os.makedirs(os.path.dirname(self.path) or ".", exist_ok=True)
+        with open(self.path, "a", encoding="utf-8") as fh:
+            fh.write(json.dumps(rec, sort_keys=True))
+            fh.write("\n")
+
+
+@dataclass
+class ReviewResult:
+    approved: list = field(default_factory=list)   # (source_key, catalog_action)
+    rejected: list = field(default_factory=list)   # source_key
+    deferred: list = field(default_factory=list)   # source_key (discuss)
+    skipped: list = field(default_factory=list)    # source_key (already decided)
+
+
+def review(candidates: list[dict], decide: Decider, catalog: Catalog,
+           log: ReviewLog) -> ReviewResult:
+    """Run each candidate through ``decide``; promote approvals into ``catalog``."""
+    result = ReviewResult()
+    for cand in candidates:
+        key = cand["key"]
+        if log.already_decided(cand):
+            result.skipped.append(key)
+            continue
+        action, rationale = decide(cand)
+        if action == DISCUSS:
+            result.deferred.append(key)
+            continue  # not a final decision — leave for a later pass
+        if action == APPROVE:
+            cat_action = catalog.upsert(candidate_to_pattern(cand))
+            result.approved.append((key, cat_action))
+        elif action == REJECT:
+            result.rejected.append(key)
+        else:
+            raise ValueError(f"unknown review action {action!r}")
+        log.record(cand, action, rationale)
+    return result
--- a/tests/test_curate_review.py
+++ b/tests/test_curate_review.py
@@ -0,0 +1,93 @@
+"""Review workflow tests (T03): promote/reject/discuss + idempotent re-review."""
+
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from session_memory.curate.catalog import Catalog  # noqa: E402
+from session_memory.curate.review import (  # noqa: E402
+    APPROVE,
+    DISCUSS,
+    REJECT,
+    ReviewLog,
+    candidate_to_pattern,
+    review,
+)
+from session_memory.curate.schema import SolutionPattern  # noqa: E402
+
+
+def _candidate(key="success:clean_pass:outcome", freq=18, flavors=("claude", "grok")):
+    return {
+        "key": key,
+        "polarity": key.split(":")[0],
+        "signal_type": key.split(":")[1],
+        "locus": key.split(":")[2],
+        "title": "cross-flavor success: clean pass",
+        "frequency": freq,
+        "flavors": list(flavors),
+        "repos": ["agentic-resources"],
+        "sessions": [f"s{i}" for i in range(freq)],
+        "cross_flavor": len(flavors) > 1,
+        "cost_impact": 12.5,
+    }
+
+
+def _decider(action, rationale="because"):
+    return lambda cand: (action, rationale)
+
+
+def test_approve_promotes_to_catalog(tmp_path):
+    cat = Catalog(str(tmp_path / "catalog"))
+    log = ReviewLog(str(tmp_path / "reviews.jsonl"))
+    res = review([_candidate()], _decider(APPROVE), cat, log)
+    assert len(res.approved) == 1
+    p = cat.load(SolutionPattern.make_id("success:clean_pass:outcome"))
+    assert p is not None
+    assert p.scope.flavors == ["claude", "grok"]
+    assert set(p.rendering_hints) == {"claude", "grok"}
+    assert p.provenance.evidence["frequency"] == 18
+
+
+def test_reject_records_no_catalog_write(tmp_path):
+    cat = Catalog(str(tmp_path / "catalog"))
+    log = ReviewLog(str(tmp_path / "reviews.jsonl"))
+    res = review([_candidate()], _decider(REJECT), cat, log)
+    assert res.rejected == ["success:clean_pass:outcome"]
+    assert cat.list() == []
+
+
+def test_discuss_defers_and_is_not_final(tmp_path):
+    cat = Catalog(str(tmp_path / "catalog"))
+    log = ReviewLog(str(tmp_path / "reviews.jsonl"))
+    res = review([_candidate()], _decider(DISCUSS), cat, log)
+    assert res.deferred == ["success:clean_pass:outcome"]
+    # not recorded as final -> a later pass re-surfaces it
+    res2 = review([_candidate()], _decider(APPROVE), cat, log)
+    assert len(res2.approved) == 1
+
+
+def test_prior_reject_remembered_same_evidence(tmp_path):
+    cat = Catalog(str(tmp_path / "catalog"))
+    log_path = str(tmp_path / "reviews.jsonl")
+    review([_candidate()], _decider(REJECT), cat, ReviewLog(log_path))
+    # fresh log instance (reloads from disk) + same evidence -> skipped
+    res = review([_candidate()], _decider(APPROVE), cat, ReviewLog(log_path))
+    assert res.skipped == ["success:clean_pass:outcome"]
+    assert cat.list() == []
+
+
+def test_changed_evidence_resurfaces(tmp_path):
+    cat = Catalog(str(tmp_path / "catalog"))
+    log_path = str(tmp_path / "reviews.jsonl")
+    review([_candidate(freq=18)], _decider(REJECT), cat, ReviewLog(log_path))
+    # more evidence now -> not skipped, gets re-reviewed
+    res = review([_candidate(freq=40)], _decider(APPROVE), cat, ReviewLog(log_path))
+    assert len(res.approved) == 1
+
+
+def test_candidate_to_pattern_defaults():
+    p = candidate_to_pattern(_candidate(flavors=("claude",)))
+    assert p.status == "provisional"
+    assert p.rendering_hints["claude"]["target"] == "CLAUDE.md"
+    assert p.polarity == "success"
--- a/workplans/AGENTIC-WP-0004-session-memory-phase2.md
+++ b/workplans/AGENTIC-WP-0004-session-memory-phase2.md
@@ -76,7 +76,7 @@ re-saving an unchanged pattern is a no-op (no spurious version bump).

 ```task
 id: AGENTIC-WP-0004-T03
-status: todo
+status: done
 priority: high
 state_hub_task_id: "e303d01f-564e-4499-9ce5-22cf959ed84c"
 ```