generated from coulomb/repo-seed
- session_memory/core/retention.py: RetentionConfig + sweep() with backstop, budget (oldest-analyzed-first, never touches un-analyzed), and hard-cap overflow (analyze-now then reported last-resort data_loss); EvictionReport - tests/test_retention.py covers all four branches Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
108 lines
4.7 KiB
Python
108 lines
4.7 KiB
Python
"""Retention tests (T05): each pass of the budget-based eviction, with tiny caps."""
|
|
|
|
import os
|
|
import sys
|
|
from datetime import datetime, timedelta, timezone
|
|
|
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
|
from session_memory.adapters.claude import Normalized # noqa: E402
|
|
from session_memory.core import digest as digest_mod # noqa: E402
|
|
from session_memory.core.retention import RetentionConfig, sweep # noqa: E402
|
|
from session_memory.core.schema import Cost, Session, SessionEvent # noqa: E402
|
|
from session_memory.core.store import Store # noqa: E402
|
|
|
|
NOW = datetime(2026, 6, 6, tzinfo=timezone.utc)
|
|
|
|
|
|
def _ingest(st, native, *, body_bytes=1000, ended=None, analyze=False):
|
|
uid = Session.make_uid("claude", native)
|
|
s = Session(session_uid=uid, flavor="claude", native_session_id=native,
|
|
ended_at=(ended or NOW).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
|
ingested_at=NOW.strftime("%Y-%m-%dT%H:%M:%SZ"))
|
|
ref = f"blob://{native}/0"
|
|
events = [SessionEvent(session_uid=uid, seq=0, kind="assistant_msg", payload_ref=ref)]
|
|
st.ingest(Normalized(session=s, events=events, blobs={ref: "x" * body_bytes}))
|
|
if analyze:
|
|
digest_mod.analyze(st, uid)
|
|
return uid
|
|
|
|
|
|
def _store(tmp_path):
|
|
return Store(str(tmp_path / "m.db"), str(tmp_path / "blobs"))
|
|
|
|
|
|
def test_backstop_evicts_old_analyzed_only(tmp_path):
|
|
st = _store(tmp_path)
|
|
old = _ingest(st, "old", ended=NOW - timedelta(days=60), analyze=True)
|
|
young = _ingest(st, "young", ended=NOW - timedelta(days=1), analyze=True)
|
|
unanalyzed_old = _ingest(st, "oldraw", ended=NOW - timedelta(days=60), analyze=False)
|
|
|
|
cfg = RetentionConfig(raw_soft_cap_bytes=10**12, raw_hard_cap_bytes=10**12, raw_max_age_days=45)
|
|
rep = sweep(st, cfg, now=NOW)
|
|
|
|
assert old in rep.backstop_evicted
|
|
assert young not in rep.backstop_evicted # too recent
|
|
assert unanalyzed_old not in rep.backstop_evicted # not analyzed -> protected
|
|
assert st.get_session(old).evicted_at is not None
|
|
assert st.get_session(unanalyzed_old).evicted_at is None
|
|
|
|
|
|
def test_budget_pass_evicts_oldest_analyzed_first(tmp_path):
|
|
st = _store(tmp_path)
|
|
a = _ingest(st, "a", body_bytes=2000, ended=NOW - timedelta(days=3), analyze=True)
|
|
b = _ingest(st, "b", body_bytes=2000, ended=NOW - timedelta(days=2), analyze=True)
|
|
c = _ingest(st, "c", body_bytes=2000, ended=NOW - timedelta(days=1), analyze=True)
|
|
|
|
# soft cap that forces evicting ~two of the three
|
|
cfg = RetentionConfig(raw_soft_cap_bytes=2500, raw_hard_cap_bytes=10**9, raw_max_age_days=10**6)
|
|
rep = sweep(st, cfg, now=NOW)
|
|
|
|
assert rep.budget_evicted[:2] == [a, b] # oldest-first
|
|
assert st.get_session(c).evicted_at is None # newest survives
|
|
assert st.tier1_usage_bytes() <= cfg.raw_soft_cap_bytes
|
|
|
|
|
|
def test_budget_pass_never_touches_unanalyzed(tmp_path):
|
|
st = _store(tmp_path)
|
|
raw1 = _ingest(st, "r1", body_bytes=5000, analyze=False)
|
|
raw2 = _ingest(st, "r2", body_bytes=5000, analyze=False)
|
|
|
|
cfg = RetentionConfig(raw_soft_cap_bytes=100, raw_hard_cap_bytes=10**9, raw_max_age_days=10**6)
|
|
rep = sweep(st, cfg, now=NOW)
|
|
|
|
# over soft cap but nothing analyzed -> no eviction, no data loss
|
|
assert rep.budget_evicted == []
|
|
assert rep.lost_data is False
|
|
assert st.get_session(raw1).evicted_at is None
|
|
assert st.get_session(raw2).evicted_at is None
|
|
assert st.tier1_usage_bytes() > cfg.raw_soft_cap_bytes # tolerated, not destroyed
|
|
|
|
|
|
def test_overflow_analyzes_then_evicts_without_data_loss(tmp_path):
|
|
st = _store(tmp_path)
|
|
r1 = _ingest(st, "r1", body_bytes=4000, ended=NOW - timedelta(days=2), analyze=False)
|
|
r2 = _ingest(st, "r2", body_bytes=4000, ended=NOW - timedelta(days=1), analyze=False)
|
|
|
|
cfg = RetentionConfig(raw_soft_cap_bytes=3000, raw_hard_cap_bytes=5000, raw_max_age_days=10**6)
|
|
rep = sweep(st, cfg, now=NOW, analyze_fn=digest_mod.analyze)
|
|
|
|
# overflow path analyzed the un-analyzed sessions, then budget-evicted
|
|
assert set(rep.overflow_analyzed) == {r1, r2}
|
|
assert rep.lost_data is False # analysis avoided data loss
|
|
assert st.tier1_usage_bytes() <= cfg.raw_soft_cap_bytes
|
|
|
|
|
|
def test_overflow_last_resort_reports_data_loss(tmp_path):
|
|
st = _store(tmp_path)
|
|
# one un-analyzed session bigger than the hard cap, analysis disabled (no fn)
|
|
big = _ingest(st, "big", body_bytes=20000, analyze=False)
|
|
|
|
cfg = RetentionConfig(raw_soft_cap_bytes=1000, raw_hard_cap_bytes=2000, raw_max_age_days=10**6)
|
|
rep = sweep(st, cfg, now=NOW, analyze_fn=None)
|
|
|
|
assert big in rep.overflow_data_loss
|
|
assert rep.lost_data is True
|
|
assert any("data_loss" in w for w in rep.warnings)
|
|
assert st.get_session(big).evicted_at is not None
|