generated from coulomb/repo-seed
session-memory Phase 2: evidence-bar + bloat guard (T04)
gating.py: two-tier evidence bar (OQ5) — promote floor (frequency/sessions/ cost_impact) plus a stricter distribution-eligibility floor that sets a promoted pattern to approved+distribution_ready vs provisional. Wired into review() so thin approvals land provisional. bloat_warnings flags duplicate and near-duplicate (same signal-type+locus) candidates (OQ6). [curate]/ [curate.gate] knobs in config.toml. 6 new tests; suite 64/64 green. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
76
tests/test_curate_gating.py
Normal file
76
tests/test_curate_gating.py
Normal file
@@ -0,0 +1,76 @@
|
||||
"""Evidence-bar + bloat-guard tests (T04)."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from session_memory.curate.catalog import Catalog # noqa: E402
|
||||
from session_memory.curate.gating import ( # noqa: E402
|
||||
GateConfig,
|
||||
bloat_warnings,
|
||||
evaluate,
|
||||
gate_config,
|
||||
)
|
||||
from session_memory.curate.review import candidate_to_pattern # noqa: E402
|
||||
|
||||
|
||||
def _candidate(key="success:clean_pass:outcome", freq=5, sessions=5, impact=10.0,
|
||||
cross=True, flavors=("claude", "grok")):
|
||||
return {
|
||||
"key": key,
|
||||
"frequency": freq,
|
||||
"sessions": [f"s{i}" for i in range(sessions)],
|
||||
"cost_impact": impact,
|
||||
"cross_flavor": cross,
|
||||
"flavors": list(flavors),
|
||||
}
|
||||
|
||||
|
||||
def test_clears_bar_and_distribution_ready():
|
||||
r = evaluate(_candidate(), GateConfig(dist_min_frequency=3))
|
||||
assert r.promotable and r.distribution_ready
|
||||
assert r.status == "approved"
|
||||
|
||||
|
||||
def test_thin_candidate_promotable_but_provisional():
|
||||
# meets promote floor (freq>=2) but below distribution floor (freq<3)
|
||||
r = evaluate(_candidate(freq=2, sessions=2), GateConfig(dist_min_frequency=3))
|
||||
assert r.promotable
|
||||
assert not r.distribution_ready
|
||||
assert r.status == "provisional"
|
||||
|
||||
|
||||
def test_below_promote_floor_not_promotable():
|
||||
r = evaluate(_candidate(freq=1, sessions=1))
|
||||
assert not r.promotable
|
||||
assert any("frequency" in reason for reason in r.reasons)
|
||||
|
||||
|
||||
def test_cross_flavor_required_for_distribution():
|
||||
r = evaluate(_candidate(cross=False), GateConfig(dist_require_cross_flavor=True))
|
||||
assert r.promotable
|
||||
assert not r.distribution_ready
|
||||
assert any("cross-flavor" in reason for reason in r.reasons)
|
||||
|
||||
|
||||
def test_gate_config_reads_toml_dict():
|
||||
cfg = gate_config({"curate": {"gate": {"min_frequency": 9, "dist_require_cross_flavor": True}}})
|
||||
assert cfg.min_frequency == 9
|
||||
assert cfg.dist_require_cross_flavor is True
|
||||
# defaults preserved for unspecified keys
|
||||
assert cfg.dist_min_frequency == 3
|
||||
|
||||
|
||||
def test_bloat_flags_duplicate_and_near_duplicate(tmp_path):
|
||||
cat = Catalog(str(tmp_path))
|
||||
cat.upsert(candidate_to_pattern(_candidate(key="success:clean_pass:outcome")))
|
||||
existing = cat.list()
|
||||
# exact same key -> duplicate
|
||||
dup = bloat_warnings(_candidate(key="success:clean_pass:outcome"), existing)
|
||||
assert any("duplicate" in w for w in dup)
|
||||
# different polarity, same signal_type+locus -> near-duplicate
|
||||
near = bloat_warnings(_candidate(key="problem:clean_pass:outcome"), existing)
|
||||
assert any("near-duplicate" in w for w in near)
|
||||
# unrelated -> no warnings
|
||||
assert bloat_warnings(_candidate(key="problem:retry_storm:retries"), existing) == []
|
||||
Reference in New Issue
Block a user