diff --git a/session_memory/curate/review.py b/session_memory/curate/review.py new file mode 100644 index 0000000..3294303 --- /dev/null +++ b/session_memory/curate/review.py @@ -0,0 +1,135 @@ +"""Curation review workflow (FR-U1/FR-U2; T03). + +Drives Phase 1 detect candidates through a **discuss / approve / reject** review +and, on approve, promotes the candidate into a :class:`SolutionPattern` written to +the :class:`Catalog`. The actual decision is supplied by a ``decide`` callback so +this engine stays UI-free — the ``__main__`` entrypoint (T06) plugs in interactive +or batch (auto-approve) logic. + +Re-review is **idempotent** via a :class:`ReviewLog`: a candidate already decided +is skipped unless its *evidence fingerprint* changed (new sessions/frequency), so +a prior **reject** is remembered and not re-surfaced, and a prior **approve** is +updated in place rather than duplicated (catalog dedup does the rest). +""" + +from __future__ import annotations + +import hashlib +import json +import os +from dataclasses import dataclass, field +from datetime import datetime, timezone +from typing import Callable, Optional + +from .catalog import Catalog +from .schema import Provenance, Resolution, Scope, SolutionPattern + +APPROVE = "approve" +REJECT = "reject" +DISCUSS = "discuss" # defer — no final decision recorded + +# Default per-flavor rendering-hint stubs a reviewer can later refine (OQ4). +_DEFAULT_TARGET = {"claude": "CLAUDE.md", "codex": "AGENTS.md", "grok": "instructions"} + +# A decision callback: (candidate dict) -> (action, rationale) +Decider = Callable[[dict], tuple] + + +def _now() -> str: + return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + + +def evidence_fingerprint(candidate: dict) -> str: + """Stable hash of the evidence that would justify (re)reviewing a candidate.""" + keys = ("frequency", "cost_impact", "flavors", "repos", "sessions", "cross_flavor") + payload = {k: candidate.get(k) for k in keys} + return hashlib.sha1(json.dumps(payload, sort_keys=True).encode("utf-8")).hexdigest() + + +def candidate_to_pattern(candidate: dict) -> SolutionPattern: + """Build a (provisional) Solution Pattern from a detect candidate.""" + src = candidate["key"] + flavors = list(candidate.get("flavors", [])) + hints = {f: {"target": _DEFAULT_TARGET.get(f, ""), "note": "TODO: refine rendering"} + for f in flavors} + return SolutionPattern( + id=SolutionPattern.make_id(src), + name=candidate.get("title") or src, + version="1.0.0", + polarity=candidate.get("polarity", "problem"), + problem=candidate.get("title") or src, + resolutions=[Resolution(summary="TODO: capture the recommended resolution")], + scope=Scope(flavors=flavors, repos=list(candidate.get("repos", []))), + provenance=Provenance(source_key=src, evidence=dict(candidate), promoted_at=_now()), + rendering_hints=hints, + status="provisional", + ) + + +@dataclass +class ReviewLog: + """Append-only record of final decisions, keyed by candidate source key.""" + + path: str + _by_key: dict = field(default_factory=dict) + + def __post_init__(self) -> None: + if os.path.exists(self.path): + with open(self.path, encoding="utf-8") as fh: + for line in fh: + if line.strip(): + rec = json.loads(line) + self._by_key[rec["source_key"]] = rec # last write wins + + def prior(self, source_key: str) -> Optional[dict]: + return self._by_key.get(source_key) + + def already_decided(self, candidate: dict) -> bool: + rec = self._by_key.get(candidate["key"]) + return bool(rec) and rec["fingerprint"] == evidence_fingerprint(candidate) + + def record(self, candidate: dict, action: str, rationale: str) -> None: + rec = { + "source_key": candidate["key"], + "action": action, + "rationale": rationale, + "fingerprint": evidence_fingerprint(candidate), + "ts": _now(), + } + self._by_key[candidate["key"]] = rec + os.makedirs(os.path.dirname(self.path) or ".", exist_ok=True) + with open(self.path, "a", encoding="utf-8") as fh: + fh.write(json.dumps(rec, sort_keys=True)) + fh.write("\n") + + +@dataclass +class ReviewResult: + approved: list = field(default_factory=list) # (source_key, catalog_action) + rejected: list = field(default_factory=list) # source_key + deferred: list = field(default_factory=list) # source_key (discuss) + skipped: list = field(default_factory=list) # source_key (already decided) + + +def review(candidates: list[dict], decide: Decider, catalog: Catalog, + log: ReviewLog) -> ReviewResult: + """Run each candidate through ``decide``; promote approvals into ``catalog``.""" + result = ReviewResult() + for cand in candidates: + key = cand["key"] + if log.already_decided(cand): + result.skipped.append(key) + continue + action, rationale = decide(cand) + if action == DISCUSS: + result.deferred.append(key) + continue # not a final decision — leave for a later pass + if action == APPROVE: + cat_action = catalog.upsert(candidate_to_pattern(cand)) + result.approved.append((key, cat_action)) + elif action == REJECT: + result.rejected.append(key) + else: + raise ValueError(f"unknown review action {action!r}") + log.record(cand, action, rationale) + return result diff --git a/tests/test_curate_review.py b/tests/test_curate_review.py new file mode 100644 index 0000000..0d5a744 --- /dev/null +++ b/tests/test_curate_review.py @@ -0,0 +1,93 @@ +"""Review workflow tests (T03): promote/reject/discuss + idempotent re-review.""" + +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from session_memory.curate.catalog import Catalog # noqa: E402 +from session_memory.curate.review import ( # noqa: E402 + APPROVE, + DISCUSS, + REJECT, + ReviewLog, + candidate_to_pattern, + review, +) +from session_memory.curate.schema import SolutionPattern # noqa: E402 + + +def _candidate(key="success:clean_pass:outcome", freq=18, flavors=("claude", "grok")): + return { + "key": key, + "polarity": key.split(":")[0], + "signal_type": key.split(":")[1], + "locus": key.split(":")[2], + "title": "cross-flavor success: clean pass", + "frequency": freq, + "flavors": list(flavors), + "repos": ["agentic-resources"], + "sessions": [f"s{i}" for i in range(freq)], + "cross_flavor": len(flavors) > 1, + "cost_impact": 12.5, + } + + +def _decider(action, rationale="because"): + return lambda cand: (action, rationale) + + +def test_approve_promotes_to_catalog(tmp_path): + cat = Catalog(str(tmp_path / "catalog")) + log = ReviewLog(str(tmp_path / "reviews.jsonl")) + res = review([_candidate()], _decider(APPROVE), cat, log) + assert len(res.approved) == 1 + p = cat.load(SolutionPattern.make_id("success:clean_pass:outcome")) + assert p is not None + assert p.scope.flavors == ["claude", "grok"] + assert set(p.rendering_hints) == {"claude", "grok"} + assert p.provenance.evidence["frequency"] == 18 + + +def test_reject_records_no_catalog_write(tmp_path): + cat = Catalog(str(tmp_path / "catalog")) + log = ReviewLog(str(tmp_path / "reviews.jsonl")) + res = review([_candidate()], _decider(REJECT), cat, log) + assert res.rejected == ["success:clean_pass:outcome"] + assert cat.list() == [] + + +def test_discuss_defers_and_is_not_final(tmp_path): + cat = Catalog(str(tmp_path / "catalog")) + log = ReviewLog(str(tmp_path / "reviews.jsonl")) + res = review([_candidate()], _decider(DISCUSS), cat, log) + assert res.deferred == ["success:clean_pass:outcome"] + # not recorded as final -> a later pass re-surfaces it + res2 = review([_candidate()], _decider(APPROVE), cat, log) + assert len(res2.approved) == 1 + + +def test_prior_reject_remembered_same_evidence(tmp_path): + cat = Catalog(str(tmp_path / "catalog")) + log_path = str(tmp_path / "reviews.jsonl") + review([_candidate()], _decider(REJECT), cat, ReviewLog(log_path)) + # fresh log instance (reloads from disk) + same evidence -> skipped + res = review([_candidate()], _decider(APPROVE), cat, ReviewLog(log_path)) + assert res.skipped == ["success:clean_pass:outcome"] + assert cat.list() == [] + + +def test_changed_evidence_resurfaces(tmp_path): + cat = Catalog(str(tmp_path / "catalog")) + log_path = str(tmp_path / "reviews.jsonl") + review([_candidate(freq=18)], _decider(REJECT), cat, ReviewLog(log_path)) + # more evidence now -> not skipped, gets re-reviewed + res = review([_candidate(freq=40)], _decider(APPROVE), cat, ReviewLog(log_path)) + assert len(res.approved) == 1 + + +def test_candidate_to_pattern_defaults(): + p = candidate_to_pattern(_candidate(flavors=("claude",))) + assert p.status == "provisional" + assert p.rendering_hints["claude"]["target"] == "CLAUDE.md" + assert p.polarity == "success" diff --git a/workplans/AGENTIC-WP-0004-session-memory-phase2.md b/workplans/AGENTIC-WP-0004-session-memory-phase2.md index c97c2ac..fa66cde 100644 --- a/workplans/AGENTIC-WP-0004-session-memory-phase2.md +++ b/workplans/AGENTIC-WP-0004-session-memory-phase2.md @@ -76,7 +76,7 @@ re-saving an unchanged pattern is a no-op (no spurious version bump). ```task id: AGENTIC-WP-0004-T03 -status: todo +status: done priority: high state_hub_task_id: "e303d01f-564e-4499-9ce5-22cf959ed84c" ```