"""Versioned Pattern Catalog — files-first source of truth (FR-U3; T02). The catalog is a directory of one JSON file per Solution Pattern (``/.json``). Files originate the work; the State Hub indexes them (ADR-001 / PRD §9). Identity is the pattern ``id`` (derived from the source candidate key), so re-promoting the same detect candidate maps to the same file — dedup is structural, not heuristic. :meth:`Catalog.upsert` is the one write path and is **idempotent**: * new id -> written as-is (``added``) * same id, identical content -> no write, no version bump (``unchanged``) * same id, only status/flags -> updated in place, no bump (``updated``) * same id, content changed -> version bumped, prior snapshot appended to ``.history.jsonl`` (``versioned``) History is append-only alongside the current file, so the catalog dir stays one clean current file per pattern while every superseded version is recoverable. """ from __future__ import annotations import json import os from datetime import datetime, timezone from typing import Optional from .schema import SolutionPattern # Content fields that define a pattern's substance. Version, timestamps, status, # and distribution_ready are metadata — changes to them never bump the version. _CONTENT_KEYS = ("name", "polarity", "problem", "resolutions", "scope", "provenance", "rendering_hints", "covers") ADDED = "added" UNCHANGED = "unchanged" UPDATED = "updated" VERSIONED = "versioned" def _now() -> str: return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") def _content(p: SolutionPattern) -> str: d = p.to_dict() return json.dumps({k: d[k] for k in _CONTENT_KEYS}, sort_keys=True) class Catalog: """File-backed catalog of versioned :class:`SolutionPattern` artifacts.""" def __init__(self, catalog_dir: str) -> None: self.dir = catalog_dir os.makedirs(self.dir, exist_ok=True) # --- paths -------------------------------------------------------------- def _path(self, pattern_id: str) -> str: return os.path.join(self.dir, f"{pattern_id}.json") def _history_path(self, pattern_id: str) -> str: return os.path.join(self.dir, f"{pattern_id}.history.jsonl") # --- reads -------------------------------------------------------------- def load(self, pattern_id: str) -> Optional[SolutionPattern]: path = self._path(pattern_id) if not os.path.exists(path): return None with open(path, encoding="utf-8") as fh: return SolutionPattern.from_json(fh.read()) def list(self) -> list[SolutionPattern]: out: list[SolutionPattern] = [] for name in sorted(os.listdir(self.dir)): if name.endswith(".json") and not name.endswith(".history.jsonl"): with open(os.path.join(self.dir, name), encoding="utf-8") as fh: out.append(SolutionPattern.from_json(fh.read())) return out def history(self, pattern_id: str) -> list[dict]: path = self._history_path(pattern_id) if not os.path.exists(path): return [] with open(path, encoding="utf-8") as fh: return [json.loads(line) for line in fh if line.strip()] def find_for(self, signal_key: str, locus: str = "") -> Optional[SolutionPattern]: """Best catalog pattern for a detect signal: exact id first, then ``covers``. Lets a signal that doesn't share a pattern's exact key (e.g. a ``recurring_error`` fingerprint) inherit the curated recommendation when a pattern declares it covers that text. """ exact = self.load(SolutionPattern.make_id(signal_key)) if exact is not None: return exact hay = f"{signal_key} {locus}".lower() for p in self.list(): # sorted by id -> deterministic if any(c.lower() in hay for c in p.covers): return p return None # --- the single write path --------------------------------------------- def upsert(self, pattern: SolutionPattern) -> str: """Insert or version-update a pattern. Returns the action taken.""" existing = self.load(pattern.id) now = _now() if existing is None: pattern.created_at = pattern.created_at or now pattern.updated_at = now self._write(pattern) return ADDED if _content(existing) == _content(pattern): # substance unchanged — only persist a metadata (status/flag) change if (existing.status == pattern.status and existing.distribution_ready == pattern.distribution_ready): return UNCHANGED existing.status = pattern.status existing.distribution_ready = pattern.distribution_ready existing.updated_at = now self._write(existing) return UPDATED # substance changed: archive the old version, bump, write the new one self._append_history(existing) pattern.version = SolutionPattern.bump_version(existing.version) pattern.created_at = existing.created_at or now pattern.updated_at = now self._write(pattern) return VERSIONED # --- internals ---------------------------------------------------------- def _write(self, pattern: SolutionPattern) -> None: with open(self._path(pattern.id), "w", encoding="utf-8") as fh: fh.write(pattern.to_json()) fh.write("\n") def _append_history(self, superseded: SolutionPattern) -> None: superseded.status = "superseded" with open(self._history_path(superseded.id), "a", encoding="utf-8") as fh: fh.write(json.dumps(superseded.to_dict(), sort_keys=True)) fh.write("\n")