session-memory: error-body mining into digest (WP-0006 T01)

build_digest now extracts normalized error fingerprints + samples from failed events (error kind + failing tool_result bodies) into a durable error_snippets list — paths/numbers/uuids/addrs stripped so the same error collapses to one fingerprint with a count; Python traceback header skipped in favour of the real exception line. Durable in Tier 2 (survives Tier 1 eviction). SCHEMA_VERSION -> 2 (re-ingest needed to populate). 7 new tests; suite 95/95 green. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-07 12:45:01 +02:00
parent dbd212d2b1
commit 97379e9658
4 changed files with 160 additions and 2 deletions
--- a/session_memory/core/digest.py
+++ b/session_memory/core/digest.py
@@ -12,6 +12,7 @@ belongs to the Detect phase (PRD §6.2).
 from __future__ import annotations

 import collections
+import re
 from typing import Any

 from .schema import Session, SessionEvent
@@ -21,6 +22,16 @@ _FAIL_HINTS = ("error", "failed", "exception", "traceback", "fatal", "non-zero")
 # Substrings suggesting a clean test pass.
 _PASS_HINTS = ("passed", "0 failed", "ok", "success")

+# Normalization patterns so the same error collapses to one fingerprint
+# regardless of paths / ids / counts (WP-0006 T01).
+_UUID_RE = re.compile(r"\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b", re.I)
+_HEXADDR_RE = re.compile(r"\b0x[0-9a-f]+\b", re.I)
+_PATH_RE = re.compile(r"(?:/[\w.\-]+)+/?|[A-Za-z]:\\[\w.\\\-]+")
+_NUM_RE = re.compile(r"\b\d+\b")
+_WS_RE = re.compile(r"\s+")
+_ERR_SAMPLE_MAX = 200
+_ERR_FP_MAX = 160
+

 def infer_outcome(events: list[SessionEvent], blobs: dict[str, str] | None = None) -> str:
    """Heuristic outcome label across flavors (design OQ2).
@@ -100,6 +111,7 @@ def build_digest(session: Session, events: list[SessionEvent],
        },
        "first_prompt": _first_prompt(events, blobs),
        "last_assistant": _last_assistant(events, blobs),
+        "error_snippets": _error_snippets(events, blobs),
        "schema_version": session.schema_version,
    }

@@ -148,6 +160,78 @@ def _last_assistant(events, blobs):
    return None


+def _error_line(text: str) -> str:
+    """Pick the most error-like line from a body.
+
+    Prefers the *last* line matching a fail hint — in a Python traceback the
+    actual exception is the final line, while the bare ``Traceback (most recent
+    call last):`` header is just noise and is skipped.
+    """
+    lines = [ln.strip() for ln in text.splitlines() if ln.strip()]
+    matches = [ln for ln in lines
+               if any(h in ln.lower() for h in _FAIL_HINTS)
+               and not ln.lower().startswith("traceback")]
+    if matches:
+        return matches[-1]
+    # fall back to any fail-hint line (e.g. only the traceback header), else first
+    any_hint = [ln for ln in lines if any(h in ln.lower() for h in _FAIL_HINTS)]
+    return any_hint[-1] if any_hint else (lines[0] if lines else "")
+
+
+def _error_fingerprint(text: str) -> str:
+    """Stable, content-addressable key for an error, paths/ids/numbers removed."""
+    s = _error_line(text).lower()
+    s = _UUID_RE.sub("<uuid>", s)
+    s = _HEXADDR_RE.sub("<addr>", s)
+    s = _PATH_RE.sub("<path>", s)
+    s = _NUM_RE.sub("<n>", s)
+    return _WS_RE.sub(" ", s).strip()[:_ERR_FP_MAX]
+
+
+def _error_body(event: SessionEvent, blobs: dict) -> str:
+    """Best available text for a failed event."""
+    if event.payload_ref and event.payload_ref in blobs:
+        return blobs[event.payload_ref]
+    return event.summary or ""
+
+
+def _is_failed(event: SessionEvent, blobs: dict) -> bool:
+    if event.kind == "error":
+        return True
+    if event.kind == "tool_result":
+        body = _error_body(event, blobs).lower()
+        return bool(body) and any(h in body for h in _FAIL_HINTS)
+    return False
+
+
+def _error_snippets(events: list[SessionEvent], blobs: dict) -> list[dict]:
+    """Collapse a session's failures into deduped, normalized error fingerprints.
+
+    Durable in Tier 2 (the raw blobs may be evicted): each entry is
+    ``{fingerprint, sample, count, tool}`` with same-fingerprint occurrences
+    counted. Ordered by frequency (then first appearance) for stable output.
+    """
+    agg: dict[str, dict] = {}
+    order: list[str] = []
+    for e in events:
+        if not _is_failed(e, blobs):
+            continue
+        body = _error_body(e, blobs)
+        if not body.strip():
+            continue
+        fp = _error_fingerprint(body)
+        if not fp:
+            continue
+        if fp not in agg:
+            agg[fp] = {"fingerprint": fp, "sample": _error_line(body)[:_ERR_SAMPLE_MAX],
+                       "count": 0, "tool": e.tool}
+            order.append(fp)
+        agg[fp]["count"] += 1
+    snippets = [agg[fp] for fp in order]
+    snippets.sort(key=lambda s: (-s["count"], order.index(s["fingerprint"])))
+    return snippets
+
+
 def _read_blob(store, ref):
    row = store.db.execute("SELECT path FROM blobs WHERE ref=?", (ref,)).fetchone()
    if not row:
--- a/session_memory/core/schema.py
+++ b/session_memory/core/schema.py
@@ -11,7 +11,7 @@ import json
 from dataclasses import asdict, dataclass, field, fields
 from typing import Any, Optional

-SCHEMA_VERSION = 1
+SCHEMA_VERSION = 2  # v2: digest carries error_snippets (WP-0006 T01)

 # Supported agent flavors. ``session_uid`` is always "<flavor>:<native id>".
 FLAVORS = ("claude", "codex", "grok")