Files
agentic-resources/tests/test_ingest.py
tegwick 586ed90948 session-memory Phase 0: ingest cursor + sweep entrypoint + config (T06)
- session_memory/core/cursor.py: size/mtime change detection sidecar
- session_memory/config.toml: store paths, retention caps, per-source
  globs (claude on, codex/grok off for Phase 1), repo->domain map
- session_memory/ingest.py: discover->normalize->store->digest->evict;
  --dry-run creates/writes nothing; python -m session_memory.ingest
- tests/test_ingest.py; live dry-run parsed 84/85 real local sessions

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-06 21:41:59 +02:00

82 lines
2.9 KiB
Python

"""Ingest sweep + cursor tests (T06)."""
import json
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from session_memory.core.cursor import Cursors # noqa: E402
from session_memory.ingest import run_sweep # noqa: E402
def test_cursor_change_detection(tmp_path):
f = tmp_path / "a.jsonl"
f.write_text("{}\n")
cur = Cursors(str(tmp_path / "cur.json"))
assert cur.is_changed(str(f)) is True
cur.mark(str(f))
assert cur.is_changed(str(f)) is False
f.write_text("{}\n{}\n") # grow
assert cur.is_changed(str(f)) is True
def _claude_session_file(dir_path, native):
os.makedirs(dir_path, exist_ok=True)
p = os.path.join(dir_path, f"{native}.jsonl")
recs = [
{"type": "user", "uuid": "u1", "sessionId": native,
"timestamp": "2026-06-06T10:00:00Z", "cwd": "/home/worsch/agentic-resources",
"gitBranch": "main", "message": {"role": "user", "content": "hi"}},
{"type": "assistant", "uuid": "a1", "parentUuid": "u1", "sessionId": native,
"timestamp": "2026-06-06T10:00:02Z",
"message": {"role": "assistant", "model": "claude-opus-4-8",
"usage": {"input_tokens": 5, "output_tokens": 2},
"content": [{"type": "text", "text": "hello"}]}},
]
with open(p, "w", encoding="utf-8") as f:
for r in recs:
f.write(json.dumps(r) + "\n")
return p
def _config(tmp_path, projects_dir):
return {
"store": {
"db_path": str(tmp_path / ".store/mem.db"),
"blob_dir": str(tmp_path / ".store/blobs"),
"cursor": str(tmp_path / ".store/cursors.json"),
},
"retention": {"raw_soft_cap_bytes": 10**12, "raw_hard_cap_bytes": 10**12,
"raw_max_age_days": 10**6, "distilled_cap_bytes": 10**12},
"sources": {"claude": {"enabled": True, "root": str(projects_dir), "glob": "*/*.jsonl"}},
"repo_domain_map": {"agentic-resources": "helix_forge"},
}
def test_run_sweep_end_to_end(tmp_path):
projects = tmp_path / "projects"
_claude_session_file(str(projects / "-home-worsch-agentic-resources"), "sess-aaa")
cfg = _config(tmp_path, projects)
res = run_sweep(cfg)
assert res.discovered == 1
assert res.ingested == 1
assert res.analyzed == 1
assert res.retention is not None
# re-run: cursor skips the unchanged file (idempotent, cheap)
res2 = run_sweep(cfg)
assert res2.skipped_unchanged == 1
assert res2.ingested == 0
def test_dry_run_writes_nothing(tmp_path):
projects = tmp_path / "projects"
_claude_session_file(str(projects / "-home-worsch-agentic-resources"), "sess-bbb")
cfg = _config(tmp_path, projects)
res = run_sweep(cfg, dry_run=True)
assert res.discovered == 1 and res.ingested == 1
assert res.retention is None
assert not os.path.exists(cfg["store"]["db_path"]) # no store created