session-memory Phase 1: Detect pipeline (T04-T07)

- detect/signals.py: pure extractors over digests (retry storm, repeated errors, budget overrun vs corpus p90, abandoned, clean pass, recovery) - detect/cluster.py: deterministic clustering into candidate Patterns with evidence (sessions/repos/flavors/cost impact) + cross-flavor flagging - detect/__main__.py: python -m session_memory.detect, ranked report (cross-flavor first) + --json; persists candidates to Tier 2 patterns table - core/store.py: list_digests + save_patterns - tests for signals, cluster, detect entrypoint Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-06 22:31:13 +02:00
parent 06767ef924
commit 436a96dcd8
9 changed files with 436 additions and 4 deletions
--- a/tests/test_cluster.py
+++ b/tests/test_cluster.py
@@ -0,0 +1,54 @@
+"""Clusterer + evidence + cross-flavor tests (T05/T06)."""
+
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from session_memory.detect.cluster import cluster  # noqa: E402
+from session_memory.detect.signals import PROBLEM, SUCCESS, Signal  # noqa: E402
+
+
+def _sig(uid, flavor, repo, type_, polarity, locus, mag=1.0):
+    return Signal(session_uid=uid, flavor=flavor, repo=repo, type=type_,
+                  polarity=polarity, locus=locus, magnitude=mag)
+
+
+def test_min_frequency_filters_singletons():
+    sigs = [_sig("claude:a", "claude", "r1", "retry_storm", PROBLEM, "retries")]
+    assert cluster(sigs, min_frequency=2) == []
+
+
+def test_clusters_recurring_signal_with_evidence():
+    sigs = [
+        _sig("claude:a", "claude", "r1", "retry_storm", PROBLEM, "retries", 5),
+        _sig("claude:b", "claude", "r2", "retry_storm", PROBLEM, "retries", 3),
+    ]
+    pats = cluster(sigs, min_frequency=2)
+    assert len(pats) == 1
+    p = pats[0]
+    assert p.frequency == 2
+    assert p.sessions == ["claude:a", "claude:b"]
+    assert sorted(p.repos) == ["r1", "r2"]
+    assert p.flavors == ["claude"]
+    assert p.cross_flavor is False
+    assert p.cost_impact == 8.0
+
+
+def test_cross_flavor_flagged_and_ranked_first():
+    sigs = [
+        # cross-flavor problem (claude + codex)
+        _sig("claude:a", "claude", "r1", "repeated_errors", PROBLEM, "errors", 3),
+        _sig("codex:b", "codex", "r2", "repeated_errors", PROBLEM, "errors", 3),
+        # single-flavor success cluster with higher raw impact
+        _sig("grok:c", "grok", "r3", "clean_pass", SUCCESS, "outcome", 5),
+        _sig("grok:d", "grok", "r4", "clean_pass", SUCCESS, "outcome", 5),
+    ]
+    pats = cluster(sigs, min_frequency=2)
+    assert len(pats) == 2
+    xf = next(p for p in pats if p.signal_type == "repeated_errors")
+    assert xf.cross_flavor is True
+    assert sorted(xf.flavors) == ["claude", "codex"]
+    # cross-flavor pattern is ranked first even if another has higher raw impact
+    assert pats[0].cross_flavor is True
+    assert "cross-flavor" in pats[0].title
--- a/tests/test_detect_entrypoint.py
+++ b/tests/test_detect_entrypoint.py
@@ -0,0 +1,44 @@
+"""Detect entrypoint tests (T07): end-to-end digests -> patterns, persisted."""
+
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from session_memory.core.store import Store  # noqa: E402
+from session_memory.detect.__main__ import run_detect  # noqa: E402
+
+
+def _digest(uid, flavor, repo, **markers):
+    return {
+        "session_uid": uid, "flavor": flavor, "repo": repo, "outcome": "fail",
+        "cost": {"input_tokens": 10, "output_tokens": 1},
+        "markers": {"errors": markers.get("errors", 0), "retries": markers.get("retries", 0),
+                    "test_runs": 0, "edits": 0, "human_interventions": 0},
+    }
+
+
+def _config(tmp_path):
+    return {"store": {"db_path": str(tmp_path / ".store/m.db"),
+                      "blob_dir": str(tmp_path / ".store/blobs"),
+                      "cursor": str(tmp_path / ".store/c.json")}}
+
+
+def test_run_detect_persists_cross_flavor_pattern(tmp_path):
+    cfg = _config(tmp_path)
+    st = Store(cfg["store"]["db_path"], cfg["store"]["blob_dir"])
+    # same problem (retry_storm) across two flavors -> cross-flavor candidate
+    st.write_digest("claude:a", _digest("claude:a", "claude", "r1", retries=5))
+    st.write_digest("codex:b", _digest("codex:b", "codex", "r2", retries=4))
+    st.close()
+
+    patterns = run_detect(cfg, min_frequency=2)
+    assert len(patterns) == 1
+    assert patterns[0]["cross_flavor"] is True
+    assert patterns[0]["signal_type"] == "retry_storm"
+
+    # persisted to the Tier 2 patterns table
+    st2 = Store(cfg["store"]["db_path"], cfg["store"]["blob_dir"])
+    rows = st2.db.execute("SELECT key FROM patterns").fetchall()
+    assert len(rows) == 1
+    st2.close()
--- a/tests/test_signals.py
+++ b/tests/test_signals.py
@@ -0,0 +1,53 @@
+"""Signal extractor tests (T04)."""
+
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from session_memory.detect.signals import (  # noqa: E402
+    PROBLEM, SUCCESS, build_context, extract_signals,
+)
+
+
+def _digest(uid, flavor="claude", repo="r", outcome="success", tokens=100,
+            errors=0, retries=0, test_runs=0):
+    return {
+        "session_uid": uid, "flavor": flavor, "repo": repo, "outcome": outcome,
+        "cost": {"input_tokens": tokens, "output_tokens": 0},
+        "markers": {"errors": errors, "retries": retries, "test_runs": test_runs,
+                    "edits": 0, "human_interventions": 0},
+    }
+
+
+def test_problem_signals():
+    digests = [
+        _digest("claude:a", retries=5, outcome="fail"),
+        _digest("claude:b", errors=4),
+        _digest("claude:c", outcome="abandoned"),
+    ]
+    sigs = extract_signals(digests)
+    types = {(s.session_uid, s.type) for s in sigs}
+    assert ("claude:a", "retry_storm") in types
+    assert ("claude:b", "repeated_errors") in types
+    assert ("claude:c", "abandoned") in types
+    assert all(s.polarity == PROBLEM for s in sigs
+               if s.type in ("retry_storm", "repeated_errors", "abandoned"))
+
+
+def test_success_signals():
+    sigs = extract_signals([_digest("grok:x", outcome="success", test_runs=2)])
+    assert any(s.type == "clean_pass" and s.polarity == SUCCESS for s in sigs)
+
+    rec = extract_signals([_digest("codex:y", outcome="success", errors=2)])
+    assert any(s.type == "error_then_recovery" and s.polarity == SUCCESS for s in rec)
+
+
+def test_budget_overrun_uses_corpus_p90():
+    digests = [_digest(f"claude:{i}", tokens=100) for i in range(10)]
+    digests.append(_digest("claude:big", tokens=100000))
+    ctx = build_context(digests)
+    assert ctx["tokens_p90"] >= 100
+    sigs = extract_signals(digests, ctx)
+    overruns = [s for s in sigs if s.type == "budget_overrun"]
+    assert overruns and overruns[0].session_uid == "claude:big"