generated from coulomb/repo-seed
detect/quality.py: is_real_coding_session drops health-checks / smoke-tests / interrupted / trivially-short sessions (event floor, repo present, substantive tool activity, non-trivial prompt). Wired into run_detect so signals only form over real sessions — fixes the abandoned false-positive. [detect.quality] knobs; existing detect/curate fixtures made realistic. 8 new tests; suite 80/80. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
85 lines
3.0 KiB
Python
85 lines
3.0 KiB
Python
"""Curate entrypoint tests (T06): batch auto-approve end-to-end via the store."""
|
|
|
|
import os
|
|
import sys
|
|
|
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
|
from session_memory.core.store import Store # noqa: E402
|
|
from session_memory.curate.__main__ import main # noqa: E402
|
|
from session_memory.curate.catalog import Catalog # noqa: E402
|
|
|
|
|
|
def _digest(uid, flavor, repo, **markers):
|
|
return {
|
|
"session_uid": uid, "flavor": flavor, "repo": repo, "outcome": "fail",
|
|
"cost": {"input_tokens": 10, "output_tokens": 1},
|
|
"markers": {"errors": markers.get("errors", 0), "retries": markers.get("retries", 0),
|
|
"test_runs": 0, "edits": 0, "human_interventions": 0},
|
|
# real coding session per the quality filter (WP-0005 T01)
|
|
"event_count": 40, "first_prompt": "Fix the failing build and retry the suite",
|
|
"tool_histogram": {"Bash": 20, "Edit": 12, "Read": 8},
|
|
}
|
|
|
|
|
|
def _write_config(tmp_path) -> str:
|
|
store = tmp_path / ".store"
|
|
catalog = tmp_path / "catalog"
|
|
cfg = f"""
|
|
[store]
|
|
db_path = "{store / 'm.db'}"
|
|
blob_dir = "{store / 'blobs'}"
|
|
cursor = "{store / 'c.json'}"
|
|
|
|
[curate]
|
|
catalog_dir = "{catalog}"
|
|
review_log = "{store / 'reviews.jsonl'}"
|
|
decision_queue = "{store / 'decisions.queue.jsonl'}"
|
|
|
|
[curate.gate]
|
|
min_frequency = 2
|
|
min_sessions = 2
|
|
"""
|
|
path = tmp_path / "config.toml"
|
|
path.write_text(cfg)
|
|
return str(path), str(store), str(catalog)
|
|
|
|
|
|
def test_auto_approve_promotes_cross_flavor(tmp_path, capsys):
|
|
cfg_path, store_dir, catalog_dir = _write_config(tmp_path)
|
|
st = Store(os.path.join(store_dir, "m.db"), os.path.join(store_dir, "blobs"))
|
|
st.write_digest("claude:a", _digest("claude:a", "claude", "r1", retries=5))
|
|
st.write_digest("codex:b", _digest("codex:b", "codex", "r2", retries=4))
|
|
st.close()
|
|
|
|
rc = main(["--config", cfg_path, "--auto-approve"])
|
|
assert rc == 0
|
|
|
|
cat = Catalog(catalog_dir)
|
|
patterns = cat.list()
|
|
assert len(patterns) == 1
|
|
assert patterns[0].polarity == "problem"
|
|
# clears the promote floor (freq>=2) but below the default distribution
|
|
# floor (freq>=3) -> promoted as provisional, not distribution-ready
|
|
assert patterns[0].status == "provisional"
|
|
assert patterns[0].distribution_ready is False
|
|
|
|
out = capsys.readouterr().out
|
|
assert "Curate summary" in out
|
|
# hub offline in tests -> decision queued
|
|
assert "decisions queued" in out
|
|
|
|
|
|
def test_rerun_is_idempotent(tmp_path):
|
|
cfg_path, store_dir, catalog_dir = _write_config(tmp_path)
|
|
st = Store(os.path.join(store_dir, "m.db"), os.path.join(store_dir, "blobs"))
|
|
st.write_digest("claude:a", _digest("claude:a", "claude", "r1", retries=5))
|
|
st.write_digest("codex:b", _digest("codex:b", "codex", "r2", retries=4))
|
|
st.close()
|
|
|
|
main(["--config", cfg_path, "--auto-approve"])
|
|
main(["--config", cfg_path, "--auto-approve"]) # second pass: already decided
|
|
cat = Catalog(catalog_dir)
|
|
assert len(cat.list()) == 1
|
|
assert cat.load(cat.list()[0].id).version == "1.0.0" # no spurious bump
|