From c6164a82ba04ae500666d1d7cd247a9756b4317b Mon Sep 17 00:00:00 2001 From: tegwick Date: Sun, 7 Jun 2026 00:18:01 +0200 Subject: [PATCH] session-memory Phase 2: versioned Pattern Catalog store (T02) Files-first catalog (one JSON per pattern, id = source-key). Single idempotent upsert path: added / unchanged / updated (status-only, no bump) / versioned (content change bumps semver + archives prior to .history.jsonl). Dedup is structural on pattern id. 5 new tests; suite 52/52 green. Co-Authored-By: Claude Opus 4.8 --- session_memory/curate/catalog.py | 132 ++++++++++++++++++ tests/test_curate_catalog.py | 86 ++++++++++++ .../AGENTIC-WP-0004-session-memory-phase2.md | 2 +- 3 files changed, 219 insertions(+), 1 deletion(-) create mode 100644 session_memory/curate/catalog.py create mode 100644 tests/test_curate_catalog.py diff --git a/session_memory/curate/catalog.py b/session_memory/curate/catalog.py new file mode 100644 index 0000000..6d3a913 --- /dev/null +++ b/session_memory/curate/catalog.py @@ -0,0 +1,132 @@ +"""Versioned Pattern Catalog — files-first source of truth (FR-U3; T02). + +The catalog is a directory of one JSON file per Solution Pattern +(``/.json``). Files originate the work; the State Hub +indexes them (ADR-001 / PRD §9). Identity is the pattern ``id`` (derived from the +source candidate key), so re-promoting the same detect candidate maps to the same +file — dedup is structural, not heuristic. + +:meth:`Catalog.upsert` is the one write path and is **idempotent**: + +* new id -> written as-is (``added``) +* same id, identical content -> no write, no version bump (``unchanged``) +* same id, only status/flags -> updated in place, no bump (``updated``) +* same id, content changed -> version bumped, prior snapshot + appended to ``.history.jsonl`` (``versioned``) + +History is append-only alongside the current file, so the catalog dir stays one +clean current file per pattern while every superseded version is recoverable. +""" + +from __future__ import annotations + +import json +import os +from datetime import datetime, timezone +from typing import Optional + +from .schema import SolutionPattern + +# Content fields that define a pattern's substance. Version, timestamps, status, +# and distribution_ready are metadata — changes to them never bump the version. +_CONTENT_KEYS = ("name", "polarity", "problem", "resolutions", "scope", + "provenance", "rendering_hints") + +ADDED = "added" +UNCHANGED = "unchanged" +UPDATED = "updated" +VERSIONED = "versioned" + + +def _now() -> str: + return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + + +def _content(p: SolutionPattern) -> str: + d = p.to_dict() + return json.dumps({k: d[k] for k in _CONTENT_KEYS}, sort_keys=True) + + +class Catalog: + """File-backed catalog of versioned :class:`SolutionPattern` artifacts.""" + + def __init__(self, catalog_dir: str) -> None: + self.dir = catalog_dir + os.makedirs(self.dir, exist_ok=True) + + # --- paths -------------------------------------------------------------- + + def _path(self, pattern_id: str) -> str: + return os.path.join(self.dir, f"{pattern_id}.json") + + def _history_path(self, pattern_id: str) -> str: + return os.path.join(self.dir, f"{pattern_id}.history.jsonl") + + # --- reads -------------------------------------------------------------- + + def load(self, pattern_id: str) -> Optional[SolutionPattern]: + path = self._path(pattern_id) + if not os.path.exists(path): + return None + with open(path, encoding="utf-8") as fh: + return SolutionPattern.from_json(fh.read()) + + def list(self) -> list[SolutionPattern]: + out: list[SolutionPattern] = [] + for name in sorted(os.listdir(self.dir)): + if name.endswith(".json") and not name.endswith(".history.jsonl"): + with open(os.path.join(self.dir, name), encoding="utf-8") as fh: + out.append(SolutionPattern.from_json(fh.read())) + return out + + def history(self, pattern_id: str) -> list[dict]: + path = self._history_path(pattern_id) + if not os.path.exists(path): + return [] + with open(path, encoding="utf-8") as fh: + return [json.loads(line) for line in fh if line.strip()] + + # --- the single write path --------------------------------------------- + + def upsert(self, pattern: SolutionPattern) -> str: + """Insert or version-update a pattern. Returns the action taken.""" + existing = self.load(pattern.id) + now = _now() + + if existing is None: + pattern.created_at = pattern.created_at or now + pattern.updated_at = now + self._write(pattern) + return ADDED + + if _content(existing) == _content(pattern): + # substance unchanged — only persist a metadata (status/flag) change + if (existing.status == pattern.status + and existing.distribution_ready == pattern.distribution_ready): + return UNCHANGED + existing.status = pattern.status + existing.distribution_ready = pattern.distribution_ready + existing.updated_at = now + self._write(existing) + return UPDATED + + # substance changed: archive the old version, bump, write the new one + self._append_history(existing) + pattern.version = SolutionPattern.bump_version(existing.version) + pattern.created_at = existing.created_at or now + pattern.updated_at = now + self._write(pattern) + return VERSIONED + + # --- internals ---------------------------------------------------------- + + def _write(self, pattern: SolutionPattern) -> None: + with open(self._path(pattern.id), "w", encoding="utf-8") as fh: + fh.write(pattern.to_json()) + fh.write("\n") + + def _append_history(self, superseded: SolutionPattern) -> None: + superseded.status = "superseded" + with open(self._history_path(superseded.id), "a", encoding="utf-8") as fh: + fh.write(json.dumps(superseded.to_dict(), sort_keys=True)) + fh.write("\n") diff --git a/tests/test_curate_catalog.py b/tests/test_curate_catalog.py new file mode 100644 index 0000000..27e7d9f --- /dev/null +++ b/tests/test_curate_catalog.py @@ -0,0 +1,86 @@ +"""Versioned Pattern Catalog tests (T02): round-trip, dedup, idempotent upsert.""" + +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from session_memory.curate.catalog import ( # noqa: E402 + ADDED, + UNCHANGED, + UPDATED, + VERSIONED, + Catalog, +) +from session_memory.curate.schema import ( # noqa: E402 + Provenance, + Resolution, + Scope, + SolutionPattern, +) + + +def _pattern(src="success:clean_pass:outcome", problem="ran tests, clean finish"): + return SolutionPattern( + id=SolutionPattern.make_id(src), + name="Run tests before declaring success", + version="1.0.0", + polarity="success", + problem=problem, + resolutions=[Resolution(summary="run the suite")], + scope=Scope(flavors=["claude", "grok"]), + provenance=Provenance(source_key=src, evidence={"frequency": 18}), + ) + + +def test_add_then_load_round_trips(tmp_path): + cat = Catalog(str(tmp_path)) + assert cat.upsert(_pattern()) == ADDED + loaded = cat.load(SolutionPattern.make_id("success:clean_pass:outcome")) + assert loaded is not None + assert loaded.problem == "ran tests, clean finish" + assert loaded.created_at and loaded.updated_at + assert [p.id for p in cat.list()] == [loaded.id] + + +def test_resave_identical_is_noop(tmp_path): + cat = Catalog(str(tmp_path)) + cat.upsert(_pattern()) + assert cat.upsert(_pattern()) == UNCHANGED + # version not bumped, no history written + assert cat.load(_pattern().id).version == "1.0.0" + assert cat.history(_pattern().id) == [] + + +def test_dedup_on_source_key(tmp_path): + cat = Catalog(str(tmp_path)) + cat.upsert(_pattern()) + cat.upsert(_pattern()) # same source key -> same id -> one file + assert len(cat.list()) == 1 + + +def test_content_change_bumps_version_and_archives(tmp_path): + cat = Catalog(str(tmp_path)) + cat.upsert(_pattern()) + assert cat.upsert(_pattern(problem="now with more nuance")) == VERSIONED + current = cat.load(_pattern().id) + assert current.version == "1.0.1" + assert current.problem == "now with more nuance" + hist = cat.history(_pattern().id) + assert len(hist) == 1 + assert hist[0]["version"] == "1.0.0" + assert hist[0]["status"] == "superseded" + + +def test_status_only_change_updates_without_bump(tmp_path): + cat = Catalog(str(tmp_path)) + cat.upsert(_pattern()) + p = _pattern() + p.status = "approved" + p.distribution_ready = True + assert cat.upsert(p) == UPDATED + current = cat.load(p.id) + assert current.status == "approved" + assert current.distribution_ready is True + assert current.version == "1.0.0" # metadata change, no bump + assert cat.history(p.id) == [] diff --git a/workplans/AGENTIC-WP-0004-session-memory-phase2.md b/workplans/AGENTIC-WP-0004-session-memory-phase2.md index fda5740..c97c2ac 100644 --- a/workplans/AGENTIC-WP-0004-session-memory-phase2.md +++ b/workplans/AGENTIC-WP-0004-session-memory-phase2.md @@ -59,7 +59,7 @@ contract for the embedded evidence. Unit-tested for round-trip stability. ```task id: AGENTIC-WP-0004-T02 -status: todo +status: done priority: high state_hub_task_id: "d40c7810-fd1e-4b14-8577-b8a64ddd337b" ```