Files
agentic-resources/session_memory/curate/review.py
tegwick 4b7a628b6f session-memory Phase 2: hub decision integration (T05)
decisions.py: every final promote/reject becomes a record_decision-shaped
payload (rationale + source key + evidence snapshot). DecisionRecorder degrades
gracefully under a hub outage — pluggable sink with a durable local-queue
fallback and ordered flush/replay (mirrors Phase 1's after-the-fact sync).
Wired into review() via an optional recorder. 6 new tests; suite 70/70 green.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-07 00:31:22 +02:00

159 lines
6.4 KiB
Python

"""Curation review workflow (FR-U1/FR-U2; T03).
Drives Phase 1 detect candidates through a **discuss / approve / reject** review
and, on approve, promotes the candidate into a :class:`SolutionPattern` written to
the :class:`Catalog`. The actual decision is supplied by a ``decide`` callback so
this engine stays UI-free — the ``__main__`` entrypoint (T06) plugs in interactive
or batch (auto-approve) logic.
Re-review is **idempotent** via a :class:`ReviewLog`: a candidate already decided
is skipped unless its *evidence fingerprint* changed (new sessions/frequency), so
a prior **reject** is remembered and not re-surfaced, and a prior **approve** is
updated in place rather than duplicated (catalog dedup does the rest).
"""
from __future__ import annotations
import hashlib
import json
import os
from dataclasses import dataclass, field
from datetime import datetime, timezone
from typing import Callable, Optional
from .catalog import Catalog
from .decisions import DecisionRecorder
from .gating import GateConfig, evaluate
from .schema import Provenance, Resolution, Scope, SolutionPattern
APPROVE = "approve"
REJECT = "reject"
DISCUSS = "discuss" # defer — no final decision recorded
# Default per-flavor rendering-hint stubs a reviewer can later refine (OQ4).
_DEFAULT_TARGET = {"claude": "CLAUDE.md", "codex": "AGENTS.md", "grok": "instructions"}
# A decision callback: (candidate dict) -> (action, rationale)
Decider = Callable[[dict], tuple]
def _now() -> str:
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
def evidence_fingerprint(candidate: dict) -> str:
"""Stable hash of the evidence that would justify (re)reviewing a candidate."""
keys = ("frequency", "cost_impact", "flavors", "repos", "sessions", "cross_flavor")
payload = {k: candidate.get(k) for k in keys}
return hashlib.sha1(json.dumps(payload, sort_keys=True).encode("utf-8")).hexdigest()
def candidate_to_pattern(candidate: dict, *, status: str = "provisional",
distribution_ready: bool = False) -> SolutionPattern:
"""Build a Solution Pattern from a detect candidate.
``status``/``distribution_ready`` come from the evidence gate (T04); they
default to a provisional, non-distribution-ready pattern when ungated.
"""
src = candidate["key"]
flavors = list(candidate.get("flavors", []))
hints = {f: {"target": _DEFAULT_TARGET.get(f, ""), "note": "TODO: refine rendering"}
for f in flavors}
return SolutionPattern(
id=SolutionPattern.make_id(src),
name=candidate.get("title") or src,
version="1.0.0",
polarity=candidate.get("polarity", "problem"),
problem=candidate.get("title") or src,
resolutions=[Resolution(summary="TODO: capture the recommended resolution")],
scope=Scope(flavors=flavors, repos=list(candidate.get("repos", []))),
provenance=Provenance(source_key=src, evidence=dict(candidate), promoted_at=_now()),
rendering_hints=hints,
status=status,
distribution_ready=distribution_ready,
)
@dataclass
class ReviewLog:
"""Append-only record of final decisions, keyed by candidate source key."""
path: str
_by_key: dict = field(default_factory=dict)
def __post_init__(self) -> None:
if os.path.exists(self.path):
with open(self.path, encoding="utf-8") as fh:
for line in fh:
if line.strip():
rec = json.loads(line)
self._by_key[rec["source_key"]] = rec # last write wins
def prior(self, source_key: str) -> Optional[dict]:
return self._by_key.get(source_key)
def already_decided(self, candidate: dict) -> bool:
rec = self._by_key.get(candidate["key"])
return bool(rec) and rec["fingerprint"] == evidence_fingerprint(candidate)
def record(self, candidate: dict, action: str, rationale: str) -> None:
rec = {
"source_key": candidate["key"],
"action": action,
"rationale": rationale,
"fingerprint": evidence_fingerprint(candidate),
"ts": _now(),
}
self._by_key[candidate["key"]] = rec
os.makedirs(os.path.dirname(self.path) or ".", exist_ok=True)
with open(self.path, "a", encoding="utf-8") as fh:
fh.write(json.dumps(rec, sort_keys=True))
fh.write("\n")
@dataclass
class ReviewResult:
approved: list = field(default_factory=list) # (source_key, catalog_action)
rejected: list = field(default_factory=list) # source_key
deferred: list = field(default_factory=list) # source_key (discuss)
skipped: list = field(default_factory=list) # source_key (already decided)
def review(candidates: list[dict], decide: Decider, catalog: Catalog,
log: ReviewLog, gate: Optional[GateConfig] = None,
recorder: Optional[DecisionRecorder] = None) -> ReviewResult:
"""Run each candidate through ``decide``; promote approvals into ``catalog``.
When a ``gate`` (T04 evidence bar) is supplied, the promoted pattern's
``status``/``distribution_ready`` are set from the gate evaluation, so an
approved-but-thin candidate lands as ``provisional`` rather than
distribution-ready. When a ``recorder`` (T05) is supplied, each final
promote/reject is logged as an auditable hub decision (queued if the hub is
down).
"""
result = ReviewResult()
for cand in candidates:
key = cand["key"]
if log.already_decided(cand):
result.skipped.append(key)
continue
action, rationale = decide(cand)
if action == DISCUSS:
result.deferred.append(key)
continue # not a final decision — leave for a later pass
if action == APPROVE:
g = evaluate(cand, gate) if gate is not None else None
pattern = (candidate_to_pattern(cand, status=g.status,
distribution_ready=g.distribution_ready)
if g is not None else candidate_to_pattern(cand))
cat_action = catalog.upsert(pattern)
result.approved.append((key, cat_action))
elif action == REJECT:
result.rejected.append(key)
else:
raise ValueError(f"unknown review action {action!r}")
log.record(cand, action, rationale)
if recorder is not None:
recorder.record(cand, action, rationale)
return result