"""Before/after effectiveness tests (WP-0009 T02).""" import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from session_memory.measure.effect import effectiveness, split_by_date # noqa: E402 def _digest(ts, tools=None, errors=0, outcome="success"): return { "started_at": ts, "outcome": outcome, "cost": {"input_tokens": 100, "output_tokens": 0}, "tool_histogram": tools or {"Bash": 10}, "error_snippets": [{"fingerprint": f"e{i}", "count": 1} for i in range(errors)], } def test_split_by_date(): digs = [_digest("2026-06-01"), _digest("2026-06-05"), _digest("2026-06-10")] before, after = split_by_date(digs, "2026-06-05") assert len(before) == 1 and len(after) == 2 # >= applied_at goes to after def test_effectiveness_detects_improvement(): # before: lots of errors + hub overhead; after: clean before = [_digest("2026-06-01", tools={"mcp__state-hub__x": 8, "Bash": 2}, errors=3) for _ in range(3)] after = [_digest("2026-06-10", tools={"Bash": 10}, errors=0) for _ in range(3)] e = effectiveness(before + after, "2026-06-05", label="read-before-edit") assert not e["insufficient_data"] assert e["n_before"] == 3 and e["n_after"] == 3 assert e["deltas"]["error_rate"]["improved"] is True assert e["deltas"]["infra_overhead_share_median"]["improved"] is True assert e["deltas"]["error_rate"]["change"] < 0 def test_effectiveness_insufficient_data(): e = effectiveness([_digest("2026-06-01")], "2026-06-05") assert e["insufficient_data"] is True assert e["deltas"] == {} def test_success_rate_higher_is_better(): before = [_digest("2026-06-01", outcome="fail") for _ in range(2)] after = [_digest("2026-06-10", outcome="success") for _ in range(2)] e = effectiveness(before + after, "2026-06-05") assert e["deltas"]["success_rate"]["improved"] is True