diff --git a/session_memory/curate/__init__.py b/session_memory/curate/__init__.py new file mode 100644 index 0000000..3336051 --- /dev/null +++ b/session_memory/curate/__init__.py @@ -0,0 +1,9 @@ +"""Curate phase (PRD §6.3) — review candidate patterns into versioned Solution +Patterns held in an in-repo Pattern Catalog. + +Layout mirrors ``detect/``: + schema.py Solution Pattern artifact + per-flavor rendering hints (T01) + catalog.py versioned, files-first catalog store (T02) + review.py discuss/approve/reject -> promote workflow (T03) + __main__.py `python -m session_memory.curate` entrypoint (T06) +""" diff --git a/session_memory/curate/schema.py b/session_memory/curate/schema.py new file mode 100644 index 0000000..de43179 --- /dev/null +++ b/session_memory/curate/schema.py @@ -0,0 +1,155 @@ +"""Solution Pattern schema (PRD §6.3 FR-U2; design OQ4) — T01. + +A **Solution Pattern** is the curated, reviewed artifact a candidate pattern is +promoted into: a named, versioned record pairing a problem (or success) with one +or more recommended resolutions, written **flavor-agnostically**. Everything a +distributor needs to render a native artifact lives in a *separate* +``rendering_hints`` sub-structure, keyed by flavor — so the core stays neutral +(FR-A1/FR-A2) while Phase 3 distributors still get enough to render well (OQ4). + +The artifact is the durable unit of the Pattern Catalog (T02): files originate, +the State Hub indexes (ADR-001). Serialization is deterministic (sorted keys) so +catalog files diff cleanly and re-saving an unchanged pattern is a no-op. +""" + +from __future__ import annotations + +import json +import re +from dataclasses import asdict, dataclass, field, fields +from typing import Any, Optional + +from ..core.schema import FLAVORS + +SCHEMA_VERSION = 1 + +# Lifecycle of a catalogued pattern. +# provisional — promoted but below the distribution evidence bar (OQ5) +# approved — meets the bar; distribution-eligible (Phase 3) +# rejected — reviewed and declined; remembered so it is not re-surfaced +# superseded — replaced by a newer version of the same pattern id +STATUSES = ("provisional", "approved", "rejected", "superseded") + +POLARITIES = ("problem", "success") + + +@dataclass +class Resolution: + """One recommended resolution for the pattern's problem (FR-U2).""" + + summary: str + detail: str = "" + steps: list[str] = field(default_factory=list) + + +@dataclass +class Scope: + """Where the pattern applies (FR-X2 input). Empty list == unrestricted.""" + + repos: list[str] = field(default_factory=list) + domains: list[str] = field(default_factory=list) + flavors: list[str] = field(default_factory=list) + + def __post_init__(self) -> None: + bad = [f for f in self.flavors if f not in FLAVORS] + if bad: + raise ValueError(f"unknown flavor(s) in scope {bad!r}; expected {FLAVORS}") + + +@dataclass +class Provenance: + """Trace back to the detect candidate this pattern was promoted from.""" + + source_key: str # the detect Pattern.key — stable cluster identity + evidence: dict[str, Any] = field(default_factory=dict) # snapshot of the candidate + detected_at: Optional[str] = None + promoted_at: Optional[str] = None + + +@dataclass +class SolutionPattern: + """A curated, versioned solution pattern (PRD §5 / §6.3).""" + + id: str # stable, derived from provenance.source_key + name: str + version: str # semantic, e.g. "1.0.0" + polarity: str # problem | success + problem: str # human-readable description of the recurring situation + resolutions: list[Resolution] = field(default_factory=list) + scope: Scope = field(default_factory=Scope) + provenance: Provenance = field(default_factory=lambda: Provenance(source_key="")) + # per-flavor rendering hints, kept OUT of the agnostic core (OQ4): + # {"claude": {...}, "codex": {...}, "grok": {...}} + rendering_hints: dict[str, dict[str, Any]] = field(default_factory=dict) + status: str = "provisional" + distribution_ready: bool = False + created_at: Optional[str] = None + updated_at: Optional[str] = None + schema_version: int = SCHEMA_VERSION + + def __post_init__(self) -> None: + if self.polarity not in POLARITIES: + raise ValueError(f"unknown polarity {self.polarity!r}; expected {POLARITIES}") + if self.status not in STATUSES: + raise ValueError(f"unknown status {self.status!r}; expected {STATUSES}") + bad = [f for f in self.rendering_hints if f not in FLAVORS] + if bad: + raise ValueError(f"unknown flavor(s) in rendering_hints {bad!r}; expected {FLAVORS}") + + # --- identity / versioning helpers ------------------------------------- + + @staticmethod + def make_id(source_key: str) -> str: + """Stable catalog id from a detect candidate key (``polarity:type:locus``). + + Identity is the source key, so re-promoting the same candidate maps to the + same pattern (dedup in T02), independent of wording or version. + """ + slug = re.sub(r"[^a-z0-9_]+", "-", source_key.lower()).strip("-") + return f"sp-{slug}" + + @staticmethod + def bump_version(version: str, level: str = "patch") -> str: + """Increment a ``major.minor.patch`` version string.""" + parts = (version.split(".") + ["0", "0", "0"])[:3] + major, minor, patch = (int(p) for p in parts) + if level == "major": + major, minor, patch = major + 1, 0, 0 + elif level == "minor": + minor, patch = minor + 1, 0 + else: + patch += 1 + return f"{major}.{minor}.{patch}" + + # --- serialization ------------------------------------------------------ + + def to_dict(self) -> dict[str, Any]: + return asdict(self) + + def to_json(self) -> str: + return json.dumps(self.to_dict(), sort_keys=True, indent=2) + + @classmethod + def from_dict(cls, d: dict[str, Any]) -> "SolutionPattern": + d = dict(d) + resolutions = [Resolution(**{k: v for k, v in r.items() if k in _RESOLUTION_FIELDS}) + for r in d.pop("resolutions", [])] + scope = d.pop("scope", None) + prov = d.pop("provenance", None) + obj = cls(**{k: v for k, v in d.items() if k in _PATTERN_FIELDS}) + obj.resolutions = resolutions + if scope is not None: + obj.scope = Scope(**{k: v for k, v in scope.items() if k in _SCOPE_FIELDS}) + if prov is not None: + obj.provenance = Provenance(**{k: v for k, v in prov.items() if k in _PROV_FIELDS}) + return obj + + @classmethod + def from_json(cls, s: str) -> "SolutionPattern": + return cls.from_dict(json.loads(s)) + + +_PATTERN_FIELDS = {f.name for f in fields(SolutionPattern)} +_RESOLUTION_FIELDS = {f.name for f in fields(Resolution)} +_SCOPE_FIELDS = {f.name for f in fields(Scope)} +_PROV_FIELDS = {f.name for f in fields(Provenance)} diff --git a/tests/test_curate_schema.py b/tests/test_curate_schema.py new file mode 100644 index 0000000..fcd4fb1 --- /dev/null +++ b/tests/test_curate_schema.py @@ -0,0 +1,80 @@ +"""Round-trip + validation tests for the Solution Pattern schema (T01).""" + +import os +import sys + +import pytest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from session_memory.curate.schema import ( # noqa: E402 + Provenance, + Resolution, + Scope, + SolutionPattern, +) + + +def _sample() -> SolutionPattern: + src = "success:clean_pass:outcome" + return SolutionPattern( + id=SolutionPattern.make_id(src), + name="Run tests before declaring success", + version="1.0.0", + polarity="success", + problem="Sessions that run tests and finish with no retries resolve cheaply.", + resolutions=[Resolution(summary="Always run the suite", steps=["edit", "test", "commit"])], + scope=Scope(flavors=["claude", "grok"]), + provenance=Provenance(source_key=src, evidence={"frequency": 18, "cross_flavor": True}), + rendering_hints={"claude": {"target": "CLAUDE.md"}, "codex": {"target": "AGENTS.md"}}, + status="approved", + distribution_ready=True, + ) + + +def test_round_trip_is_lossless(): + p = _sample() + again = SolutionPattern.from_json(p.to_json()) + assert again.to_dict() == p.to_dict() + assert again.resolutions[0].steps == ["edit", "test", "commit"] + assert again.scope.flavors == ["claude", "grok"] + assert again.provenance.evidence["cross_flavor"] is True + + +def test_serialization_is_deterministic(): + p = _sample() + assert p.to_json() == p.to_json() + assert SolutionPattern.from_json(p.to_json()).to_json() == p.to_json() + + +def test_make_id_is_stable_and_slugged(): + assert SolutionPattern.make_id("success:clean_pass:outcome") == "sp-success-clean_pass-outcome" + # same source key -> same id regardless of later wording + assert SolutionPattern.make_id("problem:abandoned:outcome") == SolutionPattern.make_id( + "problem:abandoned:outcome" + ) + + +def test_bump_version(): + assert SolutionPattern.bump_version("1.0.0") == "1.0.1" + assert SolutionPattern.bump_version("1.2.3", "minor") == "1.3.0" + assert SolutionPattern.bump_version("1.2.3", "major") == "2.0.0" + + +def test_rejects_unknown_polarity(): + with pytest.raises(ValueError): + SolutionPattern(id="x", name="n", version="1.0.0", polarity="meh", problem="p") + + +def test_rejects_unknown_status(): + with pytest.raises(ValueError): + SolutionPattern(id="x", name="n", version="1.0.0", polarity="problem", + problem="p", status="bogus") + + +def test_rejects_unknown_flavor_in_hints_and_scope(): + with pytest.raises(ValueError): + SolutionPattern(id="x", name="n", version="1.0.0", polarity="problem", + problem="p", rendering_hints={"gpt": {}}) + with pytest.raises(ValueError): + Scope(flavors=["gpt"]) diff --git a/workplans/AGENTIC-WP-0004-session-memory-phase2.md b/workplans/AGENTIC-WP-0004-session-memory-phase2.md new file mode 100644 index 0000000..fda5740 --- /dev/null +++ b/workplans/AGENTIC-WP-0004-session-memory-phase2.md @@ -0,0 +1,165 @@ +--- +id: AGENTIC-WP-0004 +type: workplan +title: "Coding Session Memory — Phase 2 (Curate: review workflow + Pattern Catalog)" +domain: helix_forge +repo: agentic-resources +status: ready +owner: codex +topic_slug: helix-forge +created: "2026-06-06" +updated: "2026-06-06" +state_hub_workstream_id: "b3703684-f60e-42f3-b03e-dabe3e8ce3f4" +--- + +# Coding Session Memory — Phase 2 (Curate) + +Implements the **Curate** phase (PRD §6.3, FR-U1–FR-U4) of +[PRD-helix-forge](../docs/PRD-helix-forge.md), continuing +[AGENTIC-WP-0003](AGENTIC-WP-0003-session-memory-phase1.md) (Detect). + +Phase 1 surfaces ranked **candidate** problem/success patterns with evidence +(`python -m session_memory.detect --json`, persisted to the Tier 2 `patterns` +table by `detect/cluster.py::Pattern`). Phase 2 turns those candidates into +**reviewed, versioned Solution Patterns** held in an in-repo **Pattern Catalog** +— the source of truth that Phase 3 (Distribute) renders into per-flavor artifacts. + +Design boundary (ADR-001 / PRD §9): the catalog is **files-first** — solution +patterns originate as versioned files in this repo; the State Hub indexes them and +records each promote/reject as an auditable decision. The agnostic core stays +flavor-neutral; per-flavor knowledge lives only in **rendering hints** consumed +later by distributor adapters (PRD §6.4 / FR-A2). New code lands under a new +`session_memory/curate/` package, mirroring the `detect/` layout from Phase 1. + +Relevant design open questions this phase resolves: **OQ4** (one agnostic +representation that still gives distributors enough to render natively), **OQ5** +(minimum trustworthy evidence bar before a pattern is distribution-eligible), +**OQ6** (preventing pattern bloat / context-budget degradation). + +## Solution Pattern Schema + Per-Flavor Rendering Hints + +```task +id: AGENTIC-WP-0004-T01 +status: done +priority: high +state_hub_task_id: "c6d20bb6-7b6c-48fd-bd25-30a349514f41" +``` + +Define the agnostic **Solution Pattern** artifact (FR-U2, OQ4) in +`session_memory/curate/schema.py`: stable id, name, semantic `version`, problem +description, one or more recommended resolutions, applicability scope +(repos/domains/flavors), provenance (source candidate `key` + an evidence +snapshot copied from the detect `Pattern`), and **per-flavor rendering hints** +kept in a separate sub-structure so the core stays flavor-agnostic while +distributors get enough to render high-quality native artifacts. Dataclass + +deterministic serialization (sorted keys), reusing the `Pattern.to_dict()` +contract for the embedded evidence. Unit-tested for round-trip stability. + +## Versioned Pattern Catalog Store (files-first) + +```task +id: AGENTIC-WP-0004-T02 +status: todo +priority: high +state_hub_task_id: "d40c7810-fd1e-4b14-8577-b8a64ddd337b" +``` + +Implement the in-repo **Pattern Catalog** as the source of truth (FR-U3, ADR-001) +in `session_memory/curate/catalog.py`: versioned solution-pattern files under a +catalog dir (e.g. `session_memory/catalog/.json`), stable IDs, a +version bump on edit (supersede-in-place with history preserved), and +load/save/list with **dedup on pattern identity** (the source candidate key). +Files originate work; the hub indexes them. Verify save→load is lossless and +re-saving an unchanged pattern is a no-op (no spurious version bump). + +## Review Workflow (discuss / approve / reject → promote) + +```task +id: AGENTIC-WP-0004-T03 +status: todo +priority: high +state_hub_task_id: "e303d01f-564e-4499-9ce5-22cf959ed84c" +``` + +Implement the curation workflow (FR-U1/FR-U2) in +`session_memory/curate/review.py`: load Phase 1 detect candidates with their +evidence (cross-flavor first), present each candidate, accept a +**discuss/approve/reject** action, and on **approve** promote the candidate into +a Solution Pattern written to the catalog (T02) with default rendering-hint +stubs the reviewer can refine. Re-review is **idempotent**: candidates already +promoted are matched on source key and updated in place, never duplicated; a +prior reject is remembered so it is not re-surfaced unless evidence changed. + +## Promotion Evidence-Bar + Bloat Guard + +```task +id: AGENTIC-WP-0004-T04 +status: todo +priority: medium +state_hub_task_id: "d474425d-18af-48e4-8f5b-7716b2da0057" +``` + +Gate promotion on a **minimum trustworthy evidence threshold** (OQ5): +configurable floors on `frequency`, distinct supporting sessions, and — for +*distribution-eligible* patterns — `cross_flavor` and/or a `cost_impact` floor. +Candidates below the bar can be cataloged as `provisional` but not marked +distribution-ready. Add a **bloat guard** (OQ6): flag low-value or +near-duplicate patterns (same locus/signal-type already cataloged) so the +catalog stays lean and agent context budgets are protected. Knobs live in +`config.toml` alongside the existing retention/detect settings. + +## State Hub Decision Integration + +```task +id: AGENTIC-WP-0004-T05 +status: todo +priority: medium +state_hub_task_id: "449f12d4-fae0-450d-873f-143b3a570b5a" +``` + +Record every promote/reject as an **auditable hub decision** (FR-U4) via the +decision API (`record_decision` / `resolve_decision`), capturing rationale, the +source candidate key, and the evidence snapshot. **Degrade gracefully** when the +hub API is down — queue decisions locally and sync later (mirrors Phase 1's +after-the-fact status sync, recorded in the milestone for `055713a`). Keep the +hub a read model: the catalog file is the durable artifact; the decision is the +audit trail. + +## Curate Entrypoint (`python -m session_memory.curate`) + +```task +id: AGENTIC-WP-0004-T06 +status: todo +priority: medium +state_hub_task_id: "95d7747e-8407-41af-9a60-b919a4ee5e06" +``` + +Add a `session_memory/curate/__main__.py` entrypoint consuming detect candidates +(ranked cross-flavor first): an **interactive** review mode plus a +**batch/non-interactive** mode (e.g. `--auto-approve` above the evidence bar, for +kaizen-agent review). Emits a **catalog diff summary** (added / version-bumped / +rejected) and machine-readable JSON. Document usage in `session_memory/README.md` +next to the existing `detect` instructions, including the +detect → curate → (Phase 3) distribute flow. + +## Tests + Verify Against Live Phase 1 Candidates + +```task +id: AGENTIC-WP-0004-T07 +status: todo +priority: medium +state_hub_task_id: "20407007-0a8b-4999-a470-fa3c84e17eba" +``` + +Unit tests for schema/catalog/review/gating on synthetic candidates, plus an +**end-to-end** run that promotes at least one **real cross-flavor** candidate from +the live detect output (the Claude+Grok "clean pass" / "abandoned" patterns from +the WP-0003 verification) into the catalog and confirms a hub decision is logged +(or queued if the API is down). Confirm catalog round-trips and versioning is +idempotent on re-run. Refresh design open questions **OQ4/OQ5/OQ6** in +[DESIGN-session-memory.md](../docs/DESIGN-session-memory.md). After workplan file +updates, notify the custodian operator to run from `~/state-hub`: + +```bash +make fix-consistency REPO=agentic-resources +```