Files
tegwick 586ed90948 session-memory Phase 0: ingest cursor + sweep entrypoint + config (T06)
- session_memory/core/cursor.py: size/mtime change detection sidecar
- session_memory/config.toml: store paths, retention caps, per-source
  globs (claude on, codex/grok off for Phase 1), repo->domain map
- session_memory/ingest.py: discover->normalize->store->digest->evict;
  --dry-run creates/writes nothing; python -m session_memory.ingest
- tests/test_ingest.py; live dry-run parsed 84/85 real local sessions

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-06 21:41:59 +02:00

50 lines
1.6 KiB
Python

"""Per-source ingest cursors (design §6; T06).
Tracks ``(path -> size, mtime)`` so sweeps re-ingest only changed/grown files.
Persisted as a small JSON sidecar. Ingest itself is idempotent on
``(session_uid, seq)`` in the store, so the cursor is an optimization, not a
correctness requirement — a lost cursor just means a full (still-idempotent)
re-scan.
"""
from __future__ import annotations
import json
import os
from typing import Optional
class Cursors:
def __init__(self, path: str):
self.path = path
self._data: dict[str, dict] = {}
if os.path.exists(path):
try:
with open(path, "r", encoding="utf-8") as f:
self._data = json.load(f)
except (OSError, ValueError):
self._data = {}
def is_changed(self, file_path: str) -> bool:
"""True if the file is new or has changed size/mtime since last seen."""
try:
stat = os.stat(file_path)
except OSError:
return False
prev = self._data.get(file_path)
return prev is None or prev.get("size") != stat.st_size or prev.get("mtime") != stat.st_mtime
def mark(self, file_path: str) -> None:
try:
stat = os.stat(file_path)
except OSError:
return
self._data[file_path] = {"size": stat.st_size, "mtime": stat.st_mtime}
def save(self) -> None:
os.makedirs(os.path.dirname(self.path) or ".", exist_ok=True)
tmp = self.path + ".tmp"
with open(tmp, "w", encoding="utf-8") as f:
json.dump(self._data, f)
os.replace(tmp, self.path)