Files
tegwick e237dcc622 session-memory: map signals to catalog recommendations via covers (WP-0010 follow-up)
Closes the gap where recurring_error suggestions showed generic 'Investigate'
instead of the curated recommendation. Added a covers[] field to SolutionPattern
(lowercase substrings a pattern's recommendation also applies to) + Catalog.find_for
(exact key first, then covers match against signal key+locus). Retro now resolves
recommendations through find_for. Tagged the read-before-edit pattern with
covers=['file has not been read','modified since read','file_not_read'] (v1.0.1).
Live: file-not-read suggestions across all repos now inherit 'Read the file before
Edit/Write'. 6 new tests; suite 158/158.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-07 21:09:44 +02:00

149 lines
5.8 KiB
Python

"""Versioned Pattern Catalog — files-first source of truth (FR-U3; T02).
The catalog is a directory of one JSON file per Solution Pattern
(``<catalog_dir>/<pattern-id>.json``). Files originate the work; the State Hub
indexes them (ADR-001 / PRD §9). Identity is the pattern ``id`` (derived from the
source candidate key), so re-promoting the same detect candidate maps to the same
file — dedup is structural, not heuristic.
:meth:`Catalog.upsert` is the one write path and is **idempotent**:
* new id -> written as-is (``added``)
* same id, identical content -> no write, no version bump (``unchanged``)
* same id, only status/flags -> updated in place, no bump (``updated``)
* same id, content changed -> version bumped, prior snapshot
appended to ``<id>.history.jsonl`` (``versioned``)
History is append-only alongside the current file, so the catalog dir stays one
clean current file per pattern while every superseded version is recoverable.
"""
from __future__ import annotations
import json
import os
from datetime import datetime, timezone
from typing import Optional
from .schema import SolutionPattern
# Content fields that define a pattern's substance. Version, timestamps, status,
# and distribution_ready are metadata — changes to them never bump the version.
_CONTENT_KEYS = ("name", "polarity", "problem", "resolutions", "scope",
"provenance", "rendering_hints", "covers")
ADDED = "added"
UNCHANGED = "unchanged"
UPDATED = "updated"
VERSIONED = "versioned"
def _now() -> str:
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
def _content(p: SolutionPattern) -> str:
d = p.to_dict()
return json.dumps({k: d[k] for k in _CONTENT_KEYS}, sort_keys=True)
class Catalog:
"""File-backed catalog of versioned :class:`SolutionPattern` artifacts."""
def __init__(self, catalog_dir: str) -> None:
self.dir = catalog_dir
os.makedirs(self.dir, exist_ok=True)
# --- paths --------------------------------------------------------------
def _path(self, pattern_id: str) -> str:
return os.path.join(self.dir, f"{pattern_id}.json")
def _history_path(self, pattern_id: str) -> str:
return os.path.join(self.dir, f"{pattern_id}.history.jsonl")
# --- reads --------------------------------------------------------------
def load(self, pattern_id: str) -> Optional[SolutionPattern]:
path = self._path(pattern_id)
if not os.path.exists(path):
return None
with open(path, encoding="utf-8") as fh:
return SolutionPattern.from_json(fh.read())
def list(self) -> list[SolutionPattern]:
out: list[SolutionPattern] = []
for name in sorted(os.listdir(self.dir)):
if name.endswith(".json") and not name.endswith(".history.jsonl"):
with open(os.path.join(self.dir, name), encoding="utf-8") as fh:
out.append(SolutionPattern.from_json(fh.read()))
return out
def history(self, pattern_id: str) -> list[dict]:
path = self._history_path(pattern_id)
if not os.path.exists(path):
return []
with open(path, encoding="utf-8") as fh:
return [json.loads(line) for line in fh if line.strip()]
def find_for(self, signal_key: str, locus: str = "") -> Optional[SolutionPattern]:
"""Best catalog pattern for a detect signal: exact id first, then ``covers``.
Lets a signal that doesn't share a pattern's exact key (e.g. a
``recurring_error`` fingerprint) inherit the curated recommendation when a
pattern declares it covers that text.
"""
exact = self.load(SolutionPattern.make_id(signal_key))
if exact is not None:
return exact
hay = f"{signal_key} {locus}".lower()
for p in self.list(): # sorted by id -> deterministic
if any(c.lower() in hay for c in p.covers):
return p
return None
# --- the single write path ---------------------------------------------
def upsert(self, pattern: SolutionPattern) -> str:
"""Insert or version-update a pattern. Returns the action taken."""
existing = self.load(pattern.id)
now = _now()
if existing is None:
pattern.created_at = pattern.created_at or now
pattern.updated_at = now
self._write(pattern)
return ADDED
if _content(existing) == _content(pattern):
# substance unchanged — only persist a metadata (status/flag) change
if (existing.status == pattern.status
and existing.distribution_ready == pattern.distribution_ready):
return UNCHANGED
existing.status = pattern.status
existing.distribution_ready = pattern.distribution_ready
existing.updated_at = now
self._write(existing)
return UPDATED
# substance changed: archive the old version, bump, write the new one
self._append_history(existing)
pattern.version = SolutionPattern.bump_version(existing.version)
pattern.created_at = existing.created_at or now
pattern.updated_at = now
self._write(pattern)
return VERSIONED
# --- internals ----------------------------------------------------------
def _write(self, pattern: SolutionPattern) -> None:
with open(self._path(pattern.id), "w", encoding="utf-8") as fh:
fh.write(pattern.to_json())
fh.write("\n")
def _append_history(self, superseded: SolutionPattern) -> None:
superseded.status = "superseded"
with open(self._history_path(superseded.id), "a", encoding="utf-8") as fh:
fh.write(json.dumps(superseded.to_dict(), sort_keys=True))
fh.write("\n")