session-memory Phase 2: review workflow (T03)

UI-free discuss/approve/reject engine driving detect candidates into the catalog via a decide callback. candidate_to_pattern builds a provisional SolutionPattern with per-flavor rendering-hint stubs. ReviewLog makes re-review idempotent: prior rejects remembered, re-surfaced only when the evidence fingerprint changes. 6 new tests; suite 58/58 green. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-07 00:25:10 +02:00
parent c6164a82ba
commit e51fd8154d
3 changed files with 229 additions and 1 deletions
--- a/session_memory/curate/review.py
+++ b/session_memory/curate/review.py
@@ -0,0 +1,135 @@
 """Curation review workflow (FR-U1/FR-U2; T03).
 Drives Phase 1 detect candidates through a **discuss / approve / reject** review
 and, on approve, promotes the candidate into a :class:`SolutionPattern` written to
 the :class:`Catalog`. The actual decision is supplied by a ``decide`` callback so
 this engine stays UI-free — the ``__main__`` entrypoint (T06) plugs in interactive
 or batch (auto-approve) logic.
 Re-review is **idempotent** via a :class:`ReviewLog`: a candidate already decided
 is skipped unless its *evidence fingerprint* changed (new sessions/frequency), so
 a prior **reject** is remembered and not re-surfaced, and a prior **approve** is
 updated in place rather than duplicated (catalog dedup does the rest).
 """
 from __future__ import annotations
 import hashlib
 import json
 import os
 from dataclasses import dataclass, field
 from datetime import datetime, timezone
 from typing import Callable, Optional
 from .catalog import Catalog
 from .schema import Provenance, Resolution, Scope, SolutionPattern
 APPROVE = "approve"
 REJECT = "reject"
 DISCUSS = "discuss"  # defer — no final decision recorded
 # Default per-flavor rendering-hint stubs a reviewer can later refine (OQ4).
 _DEFAULT_TARGET = {"claude": "CLAUDE.md", "codex": "AGENTS.md", "grok": "instructions"}
 # A decision callback: (candidate dict) -> (action, rationale)
 Decider = Callable[[dict], tuple]
 def _now() -> str:
    return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
 def evidence_fingerprint(candidate: dict) -> str:
    """Stable hash of the evidence that would justify (re)reviewing a candidate."""
    keys = ("frequency", "cost_impact", "flavors", "repos", "sessions", "cross_flavor")
    payload = {k: candidate.get(k) for k in keys}
    return hashlib.sha1(json.dumps(payload, sort_keys=True).encode("utf-8")).hexdigest()
 def candidate_to_pattern(candidate: dict) -> SolutionPattern:
    """Build a (provisional) Solution Pattern from a detect candidate."""
    src = candidate["key"]
    flavors = list(candidate.get("flavors", []))
    hints = {f: {"target": _DEFAULT_TARGET.get(f, ""), "note": "TODO: refine rendering"}
             for f in flavors}
    return SolutionPattern(
        id=SolutionPattern.make_id(src),
        name=candidate.get("title") or src,
        version="1.0.0",
        polarity=candidate.get("polarity", "problem"),
        problem=candidate.get("title") or src,
        resolutions=[Resolution(summary="TODO: capture the recommended resolution")],
        scope=Scope(flavors=flavors, repos=list(candidate.get("repos", []))),
        provenance=Provenance(source_key=src, evidence=dict(candidate), promoted_at=_now()),
        rendering_hints=hints,
        status="provisional",
    )
@dataclass
 class ReviewLog:
    """Append-only record of final decisions, keyed by candidate source key."""
    path: str
    _by_key: dict = field(default_factory=dict)
    def __post_init__(self) -> None:
        if os.path.exists(self.path):
            with open(self.path, encoding="utf-8") as fh:
                for line in fh:
                    if line.strip():
                        rec = json.loads(line)
                        self._by_key[rec["source_key"]] = rec  # last write wins
    def prior(self, source_key: str) -> Optional[dict]:
        return self._by_key.get(source_key)
    def already_decided(self, candidate: dict) -> bool:
        rec = self._by_key.get(candidate["key"])
        return bool(rec) and rec["fingerprint"] == evidence_fingerprint(candidate)
    def record(self, candidate: dict, action: str, rationale: str) -> None:
        rec = {
            "source_key": candidate["key"],
            "action": action,
            "rationale": rationale,
            "fingerprint": evidence_fingerprint(candidate),
            "ts": _now(),
        }
        self._by_key[candidate["key"]] = rec
        os.makedirs(os.path.dirname(self.path) or ".", exist_ok=True)
        with open(self.path, "a", encoding="utf-8") as fh:
            fh.write(json.dumps(rec, sort_keys=True))
            fh.write("\n")
@dataclass
 class ReviewResult:
    approved: list = field(default_factory=list)   # (source_key, catalog_action)
    rejected: list = field(default_factory=list)   # source_key
    deferred: list = field(default_factory=list)   # source_key (discuss)
    skipped: list = field(default_factory=list)    # source_key (already decided)
 def review(candidates: list[dict], decide: Decider, catalog: Catalog,
           log: ReviewLog) -> ReviewResult:
    """Run each candidate through ``decide``; promote approvals into ``catalog``."""
    result = ReviewResult()
    for cand in candidates:
        key = cand["key"]
        if log.already_decided(cand):
            result.skipped.append(key)
            continue
        action, rationale = decide(cand)
        if action == DISCUSS:
            result.deferred.append(key)
            continue  # not a final decision — leave for a later pass
        if action == APPROVE:
            cat_action = catalog.upsert(candidate_to_pattern(cand))
            result.approved.append((key, cat_action))
        elif action == REJECT:
            result.rejected.append(key)
        else:
            raise ValueError(f"unknown review action {action!r}")
        log.record(cand, action, rationale)
    return result
--- a/tests/test_curate_review.py
+++ b/tests/test_curate_review.py
@@ -0,0 +1,93 @@
 """Review workflow tests (T03): promote/reject/discuss + idempotent re-review."""
 import os
 import sys
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from session_memory.curate.catalog import Catalog  # noqa: E402
 from session_memory.curate.review import (  # noqa: E402
    APPROVE,
    DISCUSS,
    REJECT,
    ReviewLog,
    candidate_to_pattern,
    review,
 )
 from session_memory.curate.schema import SolutionPattern  # noqa: E402
 def _candidate(key="success:clean_pass:outcome", freq=18, flavors=("claude", "grok")):
    return {
        "key": key,
        "polarity": key.split(":")[0],
        "signal_type": key.split(":")[1],
        "locus": key.split(":")[2],
        "title": "cross-flavor success: clean pass",
        "frequency": freq,
        "flavors": list(flavors),
        "repos": ["agentic-resources"],
        "sessions": [f"s{i}" for i in range(freq)],
        "cross_flavor": len(flavors) > 1,
        "cost_impact": 12.5,
    }
 def _decider(action, rationale="because"):
    return lambda cand: (action, rationale)
 def test_approve_promotes_to_catalog(tmp_path):
    cat = Catalog(str(tmp_path / "catalog"))
    log = ReviewLog(str(tmp_path / "reviews.jsonl"))
    res = review([_candidate()], _decider(APPROVE), cat, log)
    assert len(res.approved) == 1
    p = cat.load(SolutionPattern.make_id("success:clean_pass:outcome"))
    assert p is not None
    assert p.scope.flavors == ["claude", "grok"]
    assert set(p.rendering_hints) == {"claude", "grok"}
    assert p.provenance.evidence["frequency"] == 18
 def test_reject_records_no_catalog_write(tmp_path):
    cat = Catalog(str(tmp_path / "catalog"))
    log = ReviewLog(str(tmp_path / "reviews.jsonl"))
    res = review([_candidate()], _decider(REJECT), cat, log)
    assert res.rejected == ["success:clean_pass:outcome"]
    assert cat.list() == []
 def test_discuss_defers_and_is_not_final(tmp_path):
    cat = Catalog(str(tmp_path / "catalog"))
    log = ReviewLog(str(tmp_path / "reviews.jsonl"))
    res = review([_candidate()], _decider(DISCUSS), cat, log)
    assert res.deferred == ["success:clean_pass:outcome"]
    # not recorded as final -> a later pass re-surfaces it
    res2 = review([_candidate()], _decider(APPROVE), cat, log)
    assert len(res2.approved) == 1
 def test_prior_reject_remembered_same_evidence(tmp_path):
    cat = Catalog(str(tmp_path / "catalog"))
    log_path = str(tmp_path / "reviews.jsonl")
    review([_candidate()], _decider(REJECT), cat, ReviewLog(log_path))
    # fresh log instance (reloads from disk) + same evidence -> skipped
    res = review([_candidate()], _decider(APPROVE), cat, ReviewLog(log_path))
    assert res.skipped == ["success:clean_pass:outcome"]
    assert cat.list() == []
 def test_changed_evidence_resurfaces(tmp_path):
    cat = Catalog(str(tmp_path / "catalog"))
    log_path = str(tmp_path / "reviews.jsonl")
    review([_candidate(freq=18)], _decider(REJECT), cat, ReviewLog(log_path))
    # more evidence now -> not skipped, gets re-reviewed
    res = review([_candidate(freq=40)], _decider(APPROVE), cat, ReviewLog(log_path))
    assert len(res.approved) == 1
 def test_candidate_to_pattern_defaults():
    p = candidate_to_pattern(_candidate(flavors=("claude",)))
    assert p.status == "provisional"
    assert p.rendering_hints["claude"]["target"] == "CLAUDE.md"
    assert p.polarity == "success"
--- a/workplans/AGENTIC-WP-0004-session-memory-phase2.md
+++ b/workplans/AGENTIC-WP-0004-session-memory-phase2.md
@@ -76,7 +76,7 @@ re-saving an unchanged pattern is a no-op (no spurious version bump).
 ```task
 id: AGENTIC-WP-0004-T03
-status: todo
+status: done
 priority: high
 state_hub_task_id: "e303d01f-564e-4499-9ce5-22cf959ed84c"
 ```