session-memory: infra-overhead + thrash signals (WP-0005 T02)

signals.py: tool_bucket helper + three tool_histogram-based extractors that the
outcome/marker signals were blind to — sig_infra_overhead (hub+task+schema share
of tool calls over threshold), sig_schema_thrash (repeated ToolSearch), and
sig_tool_thrash (one tool dominating). Thresholds in build_context. 8 new tests;
suite 88/88 green.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-07 11:12:09 +02:00
parent 70433cda61
commit 21c714e286
3 changed files with 153 additions and 2 deletions

View File

@@ -91,9 +91,75 @@ def sig_error_then_recovery(digest, ctx) -> list[Signal]:
return []
# --- tool-mix / infrastructure-overhead signals (WP-0005 T02) ----------------
# These read the captured ``tool_histogram`` — friction that the outcome+marker
# signals above are blind to (sessions still "succeed", just expensively).
def tool_bucket(tool: str) -> str:
"""Group a tool name into a coarse activity bucket (flavor-agnostic)."""
if tool.startswith("mcp__state-hub"):
return "statehub_mcp"
if tool in ("TaskUpdate", "TaskCreate", "TaskGet", "TaskList", "TaskOutput",
"TaskStop", "todo_write", "update_task_status"):
return "task_mgmt"
if tool == "ToolSearch":
return "schema_load"
if tool in ("Bash", "run_terminal_command"):
return "shell"
if tool in ("Edit", "Write", "search_replace", "write", "NotebookEdit"):
return "edit"
if tool in ("Read", "read_file", "grep", "Grep", "glob", "Glob"):
return "read"
return "other"
def _bucketed(digest) -> tuple[dict, int]:
buckets: dict[str, int] = {}
for tool, n in (digest.get("tool_histogram") or {}).items():
buckets[tool_bucket(tool)] = buckets.get(tool_bucket(tool), 0) + n
return buckets, sum(buckets.values())
def sig_infra_overhead(digest, ctx) -> list[Signal]:
"""Problem: a large share of tool calls is hub/task/schema plumbing, not work."""
buckets, total = _bucketed(digest)
if total < ctx.get("infra_min_calls", 20):
return []
overhead = buckets.get("statehub_mcp", 0) + buckets.get("task_mgmt", 0) + buckets.get("schema_load", 0)
share = overhead / total
if share >= ctx.get("infra_overhead_threshold", 0.30):
return [_base(digest, "infra_overhead", PROBLEM, "infra_overhead", round(share, 3),
overhead_calls=overhead, total_calls=total,
statehub=buckets.get("statehub_mcp", 0),
task_mgmt=buckets.get("task_mgmt", 0),
schema_load=buckets.get("schema_load", 0))]
return []
def sig_schema_thrash(digest, ctx) -> list[Signal]:
"""Problem: repeated ToolSearch — deferred-tool schemas reloaded over and over."""
buckets, _ = _bucketed(digest)
n = buckets.get("schema_load", 0)
if n >= ctx.get("schema_thrash_threshold", 5):
return [_base(digest, "schema_thrash", PROBLEM, "schema_load", float(n), tool_searches=n)]
return []
def sig_tool_thrash(digest, ctx) -> list[Signal]:
"""Problem: a single tool is hammered far more than any other — likely churn."""
hist = digest.get("tool_histogram") or {}
if not hist:
return []
tool, n = max(hist.items(), key=lambda kv: kv[1])
if n >= ctx.get("tool_thrash_threshold", 80):
return [_base(digest, "tool_thrash", PROBLEM, f"tool:{tool}", float(n), tool=tool, calls=n)]
return []
EXTRACTORS: list[Callable] = [
sig_retry_storm, sig_repeated_errors, sig_budget_overrun, sig_abandoned,
sig_clean_pass, sig_error_then_recovery,
sig_infra_overhead, sig_schema_thrash, sig_tool_thrash,
]
@@ -104,7 +170,12 @@ def build_context(digests: list[dict]) -> dict[str, Any]:
for d in digests
)
p90 = totals[int(0.9 * (len(totals) - 1))] if totals else 0
return {"tokens_p90": p90, "retry_storm_threshold": 3, "error_threshold": 3}
return {
"tokens_p90": p90, "retry_storm_threshold": 3, "error_threshold": 3,
# tool-mix / infra-overhead thresholds (WP-0005 T02)
"infra_min_calls": 20, "infra_overhead_threshold": 0.30,
"schema_thrash_threshold": 5, "tool_thrash_threshold": 80,
}
def extract_signals(digests: list[dict], ctx: Optional[dict] = None) -> list[Signal]: