Files
agentic-resources/session_memory/curate/review.py
tegwick ab22d22bfb session-memory Phase 2: evidence-bar + bloat guard (T04)
gating.py: two-tier evidence bar (OQ5) — promote floor (frequency/sessions/
cost_impact) plus a stricter distribution-eligibility floor that sets a
promoted pattern to approved+distribution_ready vs provisional. Wired into
review() so thin approvals land provisional. bloat_warnings flags duplicate
and near-duplicate (same signal-type+locus) candidates (OQ6). [curate]/
[curate.gate] knobs in config.toml. 6 new tests; suite 64/64 green.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-07 00:28:34 +02:00

153 lines
6.1 KiB
Python

"""Curation review workflow (FR-U1/FR-U2; T03).
Drives Phase 1 detect candidates through a **discuss / approve / reject** review
and, on approve, promotes the candidate into a :class:`SolutionPattern` written to
the :class:`Catalog`. The actual decision is supplied by a ``decide`` callback so
this engine stays UI-free — the ``__main__`` entrypoint (T06) plugs in interactive
or batch (auto-approve) logic.
Re-review is **idempotent** via a :class:`ReviewLog`: a candidate already decided
is skipped unless its *evidence fingerprint* changed (new sessions/frequency), so
a prior **reject** is remembered and not re-surfaced, and a prior **approve** is
updated in place rather than duplicated (catalog dedup does the rest).
"""
from __future__ import annotations
import hashlib
import json
import os
from dataclasses import dataclass, field
from datetime import datetime, timezone
from typing import Callable, Optional
from .catalog import Catalog
from .gating import GateConfig, evaluate
from .schema import Provenance, Resolution, Scope, SolutionPattern
APPROVE = "approve"
REJECT = "reject"
DISCUSS = "discuss" # defer — no final decision recorded
# Default per-flavor rendering-hint stubs a reviewer can later refine (OQ4).
_DEFAULT_TARGET = {"claude": "CLAUDE.md", "codex": "AGENTS.md", "grok": "instructions"}
# A decision callback: (candidate dict) -> (action, rationale)
Decider = Callable[[dict], tuple]
def _now() -> str:
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
def evidence_fingerprint(candidate: dict) -> str:
"""Stable hash of the evidence that would justify (re)reviewing a candidate."""
keys = ("frequency", "cost_impact", "flavors", "repos", "sessions", "cross_flavor")
payload = {k: candidate.get(k) for k in keys}
return hashlib.sha1(json.dumps(payload, sort_keys=True).encode("utf-8")).hexdigest()
def candidate_to_pattern(candidate: dict, *, status: str = "provisional",
distribution_ready: bool = False) -> SolutionPattern:
"""Build a Solution Pattern from a detect candidate.
``status``/``distribution_ready`` come from the evidence gate (T04); they
default to a provisional, non-distribution-ready pattern when ungated.
"""
src = candidate["key"]
flavors = list(candidate.get("flavors", []))
hints = {f: {"target": _DEFAULT_TARGET.get(f, ""), "note": "TODO: refine rendering"}
for f in flavors}
return SolutionPattern(
id=SolutionPattern.make_id(src),
name=candidate.get("title") or src,
version="1.0.0",
polarity=candidate.get("polarity", "problem"),
problem=candidate.get("title") or src,
resolutions=[Resolution(summary="TODO: capture the recommended resolution")],
scope=Scope(flavors=flavors, repos=list(candidate.get("repos", []))),
provenance=Provenance(source_key=src, evidence=dict(candidate), promoted_at=_now()),
rendering_hints=hints,
status=status,
distribution_ready=distribution_ready,
)
@dataclass
class ReviewLog:
"""Append-only record of final decisions, keyed by candidate source key."""
path: str
_by_key: dict = field(default_factory=dict)
def __post_init__(self) -> None:
if os.path.exists(self.path):
with open(self.path, encoding="utf-8") as fh:
for line in fh:
if line.strip():
rec = json.loads(line)
self._by_key[rec["source_key"]] = rec # last write wins
def prior(self, source_key: str) -> Optional[dict]:
return self._by_key.get(source_key)
def already_decided(self, candidate: dict) -> bool:
rec = self._by_key.get(candidate["key"])
return bool(rec) and rec["fingerprint"] == evidence_fingerprint(candidate)
def record(self, candidate: dict, action: str, rationale: str) -> None:
rec = {
"source_key": candidate["key"],
"action": action,
"rationale": rationale,
"fingerprint": evidence_fingerprint(candidate),
"ts": _now(),
}
self._by_key[candidate["key"]] = rec
os.makedirs(os.path.dirname(self.path) or ".", exist_ok=True)
with open(self.path, "a", encoding="utf-8") as fh:
fh.write(json.dumps(rec, sort_keys=True))
fh.write("\n")
@dataclass
class ReviewResult:
approved: list = field(default_factory=list) # (source_key, catalog_action)
rejected: list = field(default_factory=list) # source_key
deferred: list = field(default_factory=list) # source_key (discuss)
skipped: list = field(default_factory=list) # source_key (already decided)
def review(candidates: list[dict], decide: Decider, catalog: Catalog,
log: ReviewLog, gate: Optional[GateConfig] = None) -> ReviewResult:
"""Run each candidate through ``decide``; promote approvals into ``catalog``.
When a ``gate`` (T04 evidence bar) is supplied, the promoted pattern's
``status``/``distribution_ready`` are set from the gate evaluation, so an
approved-but-thin candidate lands as ``provisional`` rather than
distribution-ready.
"""
result = ReviewResult()
for cand in candidates:
key = cand["key"]
if log.already_decided(cand):
result.skipped.append(key)
continue
action, rationale = decide(cand)
if action == DISCUSS:
result.deferred.append(key)
continue # not a final decision — leave for a later pass
if action == APPROVE:
g = evaluate(cand, gate) if gate is not None else None
pattern = (candidate_to_pattern(cand, status=g.status,
distribution_ready=g.distribution_ready)
if g is not None else candidate_to_pattern(cand))
cat_action = catalog.upsert(pattern)
result.approved.append((key, cat_action))
elif action == REJECT:
result.rejected.append(key)
else:
raise ValueError(f"unknown review action {action!r}")
log.record(cand, action, rationale)
return result