"""Measure entrypoint (T03): fleet trend + per-pattern effectiveness. python -m session_memory.measure [--config PATH] [--label L] [--since DATE] [--no-save] [--json] Computes current fleet metrics over the real (quality-filtered) sessions, appends them to the baseline trend, and reports whether the fleet is getting cheaper / more reliable over time (FR-M3). With ``--since DATE`` it also reports before/after effectiveness around a change (FR-M1/FR-M2). """ from __future__ import annotations import argparse import json import os from ..core.store import Store from ..detect.quality import filter_real, quality_config from ..ingest import _expand, load_config from .effect import effectiveness from .metrics import load_baselines, save_baseline, snapshot _TREND_KEYS = ("infra_overhead_share_median", "error_rate", "schema_thrash_sessions", "tokens_p50", "success_rate") def real_digests(config: dict) -> list[dict]: s = config.get("store", {}) store = Store(_expand(s["db_path"]), _expand(s["blob_dir"])) out = filter_real(store.list_digests(), quality_config(config)) store.close() return out def _fmt_trend(baselines: list[dict]) -> str: if not baselines: return " (no prior snapshots)" lines = [] recent = baselines[-5:] for b in recent: when = (b.get("captured_at") or "")[:10] lbl = f" {b['label']}" if b.get("label") else "" lines.append(f" {when}{lbl}: overhead_med={b.get('infra_overhead_share_median')} " f"err_rate={b.get('error_rate')} schema_thrash={b.get('schema_thrash_sessions')} " f"tok_p50={b.get('tokens_p50')} success={b.get('success_rate')} " f"(n={b.get('n_sessions')})") return "\n".join(lines) def _report(current: dict, baselines: list[dict], eff: dict | None) -> str: lines = [f"# Fleet metrics (n={current.get('n_sessions')} real sessions)"] for k in _TREND_KEYS: lines.append(f" {k} = {current.get(k)}") lines.append("\n## Trend (recent snapshots)") lines.append(_fmt_trend(baselines)) if eff is not None: lines.append(f"\n## Effectiveness since {eff['applied_at']} " f"(before={eff['n_before']}, after={eff['n_after']})") if eff["insufficient_data"]: lines.append(" insufficient data on one side of the date") else: for k in _TREND_KEYS: d = eff["deltas"].get(k, {}) mark = {True: "improved", False: "worse", None: "—"}[d.get("improved")] lines.append(f" {k}: {d.get('before')} -> {d.get('after')} " f"({d.get('change'):+}) {mark}") return "\n".join(lines) def main(argv=None) -> int: here = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) ap = argparse.ArgumentParser(description="Measure fleet metrics + per-pattern effectiveness.") ap.add_argument("--config", default=os.path.join(here, "config.toml")) ap.add_argument("--label", default="") ap.add_argument("--since", default=None, help="ISO date for before/after effectiveness") ap.add_argument("--no-save", action="store_true", help="don't append to the baseline trend") ap.add_argument("--json", action="store_true") args = ap.parse_args(argv) config = load_config(args.config) digests = real_digests(config) current = snapshot(digests, label=args.label) path = _expand(config.get("measure", {}).get("baselines", "session_memory/measure/baselines.jsonl")) prior = load_baselines(path) if not args.no_save: save_baseline(current, path) eff = effectiveness(digests, args.since, label=args.label) if args.since else None if args.json: print(json.dumps({"current": current, "trend": prior + [current], "effectiveness": eff}, indent=2)) else: print(_report(current, prior + [current], eff)) return 0 if __name__ == "__main__": raise SystemExit(main())