generated from coulomb/repo-seed
Closes the loop. metrics.py: fleet metrics (infra-overhead share, error rate, schema-thrash, token percentiles, success) + persisted baseline trend. effect.py: before/after per-pattern effectiveness with an improved verdict per metric. measure entrypoint with trend + --since effectiveness + JSON. Recorded pre-fix baseline: 27 sessions, overhead median 11.7%, error rate 0.96, schema-thrash 8. 13 new tests; suite 139/139. Capture->Detect->Curate->Distribute->Measure complete. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
80 lines
2.8 KiB
Python
80 lines
2.8 KiB
Python
"""Measure entrypoint tests (WP-0009 T03)."""
|
|
|
|
import json
|
|
import os
|
|
import sys
|
|
|
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
|
from session_memory.core.store import Store # noqa: E402
|
|
from session_memory.measure.__main__ import main, real_digests # noqa: E402
|
|
from session_memory.measure.metrics import load_baselines # noqa: E402
|
|
|
|
|
|
def _digest(uid, ts, tools=None):
|
|
return {
|
|
"session_uid": uid, "flavor": "claude", "repo": "agentic-resources",
|
|
"outcome": "success", "started_at": ts,
|
|
"cost": {"input_tokens": 100, "output_tokens": 10},
|
|
"event_count": 40, "first_prompt": "Implement the measure entrypoint cleanly",
|
|
"tool_histogram": tools or {"Bash": 20, "Edit": 12, "Read": 8},
|
|
"error_snippets": [],
|
|
}
|
|
|
|
|
|
def _write_config(tmp_path) -> str:
|
|
store = tmp_path / ".store"
|
|
toml = tmp_path / "config.toml"
|
|
toml.write_text(
|
|
f'[store]\ndb_path = "{store / "m.db"}"\nblob_dir = "{store / "blobs"}"\n'
|
|
f'cursor = "{store / "c.json"}"\n'
|
|
f'[measure]\nbaselines = "{tmp_path / "baselines.jsonl"}"\n')
|
|
return str(toml), str(store)
|
|
|
|
|
|
def _seed(store_dir):
|
|
st = Store(os.path.join(store_dir, "m.db"), os.path.join(store_dir, "blobs"))
|
|
st.write_digest("claude:a", _digest("claude:a", "2026-06-01"))
|
|
st.write_digest("claude:b", _digest("claude:b", "2026-06-10",
|
|
tools={"mcp__state-hub__x": 18, "Bash": 8, "Edit": 4}))
|
|
st.close()
|
|
|
|
|
|
def test_real_digests_filters_and_loads(tmp_path):
|
|
cfg_path, store_dir = _write_config(tmp_path)
|
|
_seed(store_dir)
|
|
from session_memory.ingest import load_config
|
|
digs = real_digests(load_config(cfg_path))
|
|
assert len(digs) == 2
|
|
|
|
|
|
def test_main_writes_baseline_and_reports(tmp_path, capsys):
|
|
cfg_path, store_dir = _write_config(tmp_path)
|
|
_seed(store_dir)
|
|
rc = main(["--config", cfg_path, "--label", "first"])
|
|
assert rc == 0
|
|
out = capsys.readouterr().out
|
|
assert "Fleet metrics" in out
|
|
rows = load_baselines(str(tmp_path / "baselines.jsonl"))
|
|
assert len(rows) == 1 and rows[0]["label"] == "first"
|
|
|
|
|
|
def test_main_no_save_and_json(tmp_path, capsys):
|
|
cfg_path, store_dir = _write_config(tmp_path)
|
|
_seed(store_dir)
|
|
rc = main(["--config", cfg_path, "--no-save", "--json"])
|
|
assert rc == 0
|
|
data = json.loads(capsys.readouterr().out)
|
|
assert data["current"]["n_sessions"] == 2
|
|
assert not os.path.exists(str(tmp_path / "baselines.jsonl"))
|
|
|
|
|
|
def test_main_effectiveness_since(tmp_path, capsys):
|
|
cfg_path, store_dir = _write_config(tmp_path)
|
|
_seed(store_dir)
|
|
rc = main(["--config", cfg_path, "--no-save", "--since", "2026-06-05", "--json"])
|
|
assert rc == 0
|
|
data = json.loads(capsys.readouterr().out)
|
|
assert data["effectiveness"]["n_before"] == 1
|
|
assert data["effectiveness"]["n_after"] == 1
|