session-memory Phase 2: review workflow (T03)

UI-free discuss/approve/reject engine driving detect candidates into the
catalog via a decide callback. candidate_to_pattern builds a provisional
SolutionPattern with per-flavor rendering-hint stubs. ReviewLog makes
re-review idempotent: prior rejects remembered, re-surfaced only when the
evidence fingerprint changes. 6 new tests; suite 58/58 green.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-07 00:25:10 +02:00
parent c6164a82ba
commit e51fd8154d
3 changed files with 229 additions and 1 deletions

View File

@@ -0,0 +1,135 @@
"""Curation review workflow (FR-U1/FR-U2; T03).
Drives Phase 1 detect candidates through a **discuss / approve / reject** review
and, on approve, promotes the candidate into a :class:`SolutionPattern` written to
the :class:`Catalog`. The actual decision is supplied by a ``decide`` callback so
this engine stays UI-free — the ``__main__`` entrypoint (T06) plugs in interactive
or batch (auto-approve) logic.
Re-review is **idempotent** via a :class:`ReviewLog`: a candidate already decided
is skipped unless its *evidence fingerprint* changed (new sessions/frequency), so
a prior **reject** is remembered and not re-surfaced, and a prior **approve** is
updated in place rather than duplicated (catalog dedup does the rest).
"""
from __future__ import annotations
import hashlib
import json
import os
from dataclasses import dataclass, field
from datetime import datetime, timezone
from typing import Callable, Optional
from .catalog import Catalog
from .schema import Provenance, Resolution, Scope, SolutionPattern
APPROVE = "approve"
REJECT = "reject"
DISCUSS = "discuss" # defer — no final decision recorded
# Default per-flavor rendering-hint stubs a reviewer can later refine (OQ4).
_DEFAULT_TARGET = {"claude": "CLAUDE.md", "codex": "AGENTS.md", "grok": "instructions"}
# A decision callback: (candidate dict) -> (action, rationale)
Decider = Callable[[dict], tuple]
def _now() -> str:
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
def evidence_fingerprint(candidate: dict) -> str:
"""Stable hash of the evidence that would justify (re)reviewing a candidate."""
keys = ("frequency", "cost_impact", "flavors", "repos", "sessions", "cross_flavor")
payload = {k: candidate.get(k) for k in keys}
return hashlib.sha1(json.dumps(payload, sort_keys=True).encode("utf-8")).hexdigest()
def candidate_to_pattern(candidate: dict) -> SolutionPattern:
"""Build a (provisional) Solution Pattern from a detect candidate."""
src = candidate["key"]
flavors = list(candidate.get("flavors", []))
hints = {f: {"target": _DEFAULT_TARGET.get(f, ""), "note": "TODO: refine rendering"}
for f in flavors}
return SolutionPattern(
id=SolutionPattern.make_id(src),
name=candidate.get("title") or src,
version="1.0.0",
polarity=candidate.get("polarity", "problem"),
problem=candidate.get("title") or src,
resolutions=[Resolution(summary="TODO: capture the recommended resolution")],
scope=Scope(flavors=flavors, repos=list(candidate.get("repos", []))),
provenance=Provenance(source_key=src, evidence=dict(candidate), promoted_at=_now()),
rendering_hints=hints,
status="provisional",
)
@dataclass
class ReviewLog:
"""Append-only record of final decisions, keyed by candidate source key."""
path: str
_by_key: dict = field(default_factory=dict)
def __post_init__(self) -> None:
if os.path.exists(self.path):
with open(self.path, encoding="utf-8") as fh:
for line in fh:
if line.strip():
rec = json.loads(line)
self._by_key[rec["source_key"]] = rec # last write wins
def prior(self, source_key: str) -> Optional[dict]:
return self._by_key.get(source_key)
def already_decided(self, candidate: dict) -> bool:
rec = self._by_key.get(candidate["key"])
return bool(rec) and rec["fingerprint"] == evidence_fingerprint(candidate)
def record(self, candidate: dict, action: str, rationale: str) -> None:
rec = {
"source_key": candidate["key"],
"action": action,
"rationale": rationale,
"fingerprint": evidence_fingerprint(candidate),
"ts": _now(),
}
self._by_key[candidate["key"]] = rec
os.makedirs(os.path.dirname(self.path) or ".", exist_ok=True)
with open(self.path, "a", encoding="utf-8") as fh:
fh.write(json.dumps(rec, sort_keys=True))
fh.write("\n")
@dataclass
class ReviewResult:
approved: list = field(default_factory=list) # (source_key, catalog_action)
rejected: list = field(default_factory=list) # source_key
deferred: list = field(default_factory=list) # source_key (discuss)
skipped: list = field(default_factory=list) # source_key (already decided)
def review(candidates: list[dict], decide: Decider, catalog: Catalog,
log: ReviewLog) -> ReviewResult:
"""Run each candidate through ``decide``; promote approvals into ``catalog``."""
result = ReviewResult()
for cand in candidates:
key = cand["key"]
if log.already_decided(cand):
result.skipped.append(key)
continue
action, rationale = decide(cand)
if action == DISCUSS:
result.deferred.append(key)
continue # not a final decision — leave for a later pass
if action == APPROVE:
cat_action = catalog.upsert(candidate_to_pattern(cand))
result.approved.append((key, cat_action))
elif action == REJECT:
result.rejected.append(key)
else:
raise ValueError(f"unknown review action {action!r}")
log.record(cand, action, rationale)
return result

View File

@@ -0,0 +1,93 @@
"""Review workflow tests (T03): promote/reject/discuss + idempotent re-review."""
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from session_memory.curate.catalog import Catalog # noqa: E402
from session_memory.curate.review import ( # noqa: E402
APPROVE,
DISCUSS,
REJECT,
ReviewLog,
candidate_to_pattern,
review,
)
from session_memory.curate.schema import SolutionPattern # noqa: E402
def _candidate(key="success:clean_pass:outcome", freq=18, flavors=("claude", "grok")):
return {
"key": key,
"polarity": key.split(":")[0],
"signal_type": key.split(":")[1],
"locus": key.split(":")[2],
"title": "cross-flavor success: clean pass",
"frequency": freq,
"flavors": list(flavors),
"repos": ["agentic-resources"],
"sessions": [f"s{i}" for i in range(freq)],
"cross_flavor": len(flavors) > 1,
"cost_impact": 12.5,
}
def _decider(action, rationale="because"):
return lambda cand: (action, rationale)
def test_approve_promotes_to_catalog(tmp_path):
cat = Catalog(str(tmp_path / "catalog"))
log = ReviewLog(str(tmp_path / "reviews.jsonl"))
res = review([_candidate()], _decider(APPROVE), cat, log)
assert len(res.approved) == 1
p = cat.load(SolutionPattern.make_id("success:clean_pass:outcome"))
assert p is not None
assert p.scope.flavors == ["claude", "grok"]
assert set(p.rendering_hints) == {"claude", "grok"}
assert p.provenance.evidence["frequency"] == 18
def test_reject_records_no_catalog_write(tmp_path):
cat = Catalog(str(tmp_path / "catalog"))
log = ReviewLog(str(tmp_path / "reviews.jsonl"))
res = review([_candidate()], _decider(REJECT), cat, log)
assert res.rejected == ["success:clean_pass:outcome"]
assert cat.list() == []
def test_discuss_defers_and_is_not_final(tmp_path):
cat = Catalog(str(tmp_path / "catalog"))
log = ReviewLog(str(tmp_path / "reviews.jsonl"))
res = review([_candidate()], _decider(DISCUSS), cat, log)
assert res.deferred == ["success:clean_pass:outcome"]
# not recorded as final -> a later pass re-surfaces it
res2 = review([_candidate()], _decider(APPROVE), cat, log)
assert len(res2.approved) == 1
def test_prior_reject_remembered_same_evidence(tmp_path):
cat = Catalog(str(tmp_path / "catalog"))
log_path = str(tmp_path / "reviews.jsonl")
review([_candidate()], _decider(REJECT), cat, ReviewLog(log_path))
# fresh log instance (reloads from disk) + same evidence -> skipped
res = review([_candidate()], _decider(APPROVE), cat, ReviewLog(log_path))
assert res.skipped == ["success:clean_pass:outcome"]
assert cat.list() == []
def test_changed_evidence_resurfaces(tmp_path):
cat = Catalog(str(tmp_path / "catalog"))
log_path = str(tmp_path / "reviews.jsonl")
review([_candidate(freq=18)], _decider(REJECT), cat, ReviewLog(log_path))
# more evidence now -> not skipped, gets re-reviewed
res = review([_candidate(freq=40)], _decider(APPROVE), cat, ReviewLog(log_path))
assert len(res.approved) == 1
def test_candidate_to_pattern_defaults():
p = candidate_to_pattern(_candidate(flavors=("claude",)))
assert p.status == "provisional"
assert p.rendering_hints["claude"]["target"] == "CLAUDE.md"
assert p.polarity == "success"

View File

@@ -76,7 +76,7 @@ re-saving an unchanged pattern is a no-op (no spurious version bump).
```task ```task
id: AGENTIC-WP-0004-T03 id: AGENTIC-WP-0004-T03
status: todo status: done
priority: high priority: high
state_hub_task_id: "e303d01f-564e-4499-9ce5-22cf959ed84c" state_hub_task_id: "e303d01f-564e-4499-9ce5-22cf959ed84c"
``` ```