generated from coulomb/repo-seed
session-memory: session-quality filter (WP-0005 T01)
detect/quality.py: is_real_coding_session drops health-checks / smoke-tests / interrupted / trivially-short sessions (event floor, repo present, substantive tool activity, non-trivial prompt). Wired into run_detect so signals only form over real sessions — fixes the abandoned false-positive. [detect.quality] knobs; existing detect/curate fixtures made realistic. 8 new tests; suite 80/80. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -16,13 +16,14 @@ import os
|
||||
from ..core.store import Store
|
||||
from ..ingest import _expand, load_config
|
||||
from .cluster import cluster
|
||||
from .quality import filter_real, quality_config
|
||||
from .signals import extract_signals
|
||||
|
||||
|
||||
def run_detect(config: dict, *, min_frequency: int = 2) -> list[dict]:
|
||||
store_cfg = config.get("store", {})
|
||||
store = Store(_expand(store_cfg["db_path"]), _expand(store_cfg["blob_dir"]))
|
||||
digests = store.list_digests()
|
||||
digests = filter_real(store.list_digests(), quality_config(config))
|
||||
signals = extract_signals(digests)
|
||||
patterns = [p.to_dict() for p in cluster(signals, min_frequency=min_frequency)]
|
||||
store.save_patterns(patterns)
|
||||
@@ -56,7 +57,8 @@ def main(argv=None) -> int:
|
||||
|
||||
config = load_config(args.config)
|
||||
store_cfg = config.get("store", {})
|
||||
n = len(Store(_expand(store_cfg["db_path"]), _expand(store_cfg["blob_dir"])).list_digests())
|
||||
all_digests = Store(_expand(store_cfg["db_path"]), _expand(store_cfg["blob_dir"])).list_digests()
|
||||
n = len(filter_real(all_digests, quality_config(config)))
|
||||
patterns = run_detect(config, min_frequency=args.min_frequency)
|
||||
|
||||
if args.json:
|
||||
|
||||
75
session_memory/detect/quality.py
Normal file
75
session_memory/detect/quality.py
Normal file
@@ -0,0 +1,75 @@
|
||||
"""Session-quality filter (T01).
|
||||
|
||||
The capture layer ingests *every* session it finds — including API health-checks,
|
||||
smoke-tests, and interrupted runs (e.g. ``llm-connect`` firing "Say hello in one
|
||||
word", or a transcript that is just ``[Request interrupted by user]``). These are
|
||||
not real coding work, but the outcome heuristic labels the short ones ``abandoned``
|
||||
and the clusterer then mints false-positive "problem" patterns from them.
|
||||
|
||||
:func:`is_real_coding_session` gates those out so Detect signals/clusters form only
|
||||
over genuine coding sessions. It is intentionally conservative — a session counts
|
||||
as real if it shows substantive activity, and is dropped only on clear trivial
|
||||
markers. Thresholds come from ``[detect.quality]`` in ``config.toml``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
# Prompt prefixes/markers that indicate a non-coding or interrupted session.
|
||||
_TRIVIAL_PROMPTS = (
|
||||
"say hello", "hello", "[request interrupted", "return only this json",
|
||||
"ping", "ok", "<system-reminder>",
|
||||
)
|
||||
|
||||
# Tool buckets that count as "substantive" coding activity.
|
||||
_SUBSTANTIVE_TOOLS = (
|
||||
"Edit", "Write", "Read", "Bash", "search_replace", "write", "read_file",
|
||||
"run_terminal_command", "grep", "Grep", "glob", "Glob", "NotebookEdit",
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class QualityConfig:
|
||||
min_events: int = 20 # below this, not a real coding session
|
||||
min_substantive: int = 3 # >= this many substantive tool calls required
|
||||
min_prompt_len: int = 25 # first prompt shorter than this is suspect
|
||||
|
||||
|
||||
def quality_config(config: Optional[dict] = None) -> QualityConfig:
|
||||
d = (config or {}).get("detect", {}).get("quality", {}) if config else {}
|
||||
return QualityConfig(
|
||||
min_events=d.get("min_events", 20),
|
||||
min_substantive=d.get("min_substantive", 3),
|
||||
min_prompt_len=d.get("min_prompt_len", 25),
|
||||
)
|
||||
|
||||
|
||||
def _substantive_calls(digest: dict) -> int:
|
||||
hist = digest.get("tool_histogram") or {}
|
||||
return sum(n for t, n in hist.items() if t in _SUBSTANTIVE_TOOLS)
|
||||
|
||||
|
||||
def is_real_coding_session(digest: dict, config: Optional[QualityConfig] = None) -> bool:
|
||||
cfg = config or QualityConfig()
|
||||
|
||||
if not digest.get("repo"):
|
||||
return False
|
||||
if digest.get("event_count", 0) < cfg.min_events:
|
||||
return False
|
||||
if _substantive_calls(digest) < cfg.min_substantive:
|
||||
return False
|
||||
|
||||
prompt = (digest.get("first_prompt") or "").strip().lower()
|
||||
if len(prompt) < cfg.min_prompt_len:
|
||||
return False
|
||||
if any(prompt.startswith(p) for p in _TRIVIAL_PROMPTS):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def filter_real(digests: list[dict], config: Optional[QualityConfig] = None) -> list[dict]:
|
||||
cfg = config or QualityConfig()
|
||||
return [d for d in digests if is_real_coding_session(d, cfg)]
|
||||
Reference in New Issue
Block a user