generated from coulomb/repo-seed
- session_memory/core/cursor.py: size/mtime change detection sidecar - session_memory/config.toml: store paths, retention caps, per-source globs (claude on, codex/grok off for Phase 1), repo->domain map - session_memory/ingest.py: discover->normalize->store->digest->evict; --dry-run creates/writes nothing; python -m session_memory.ingest - tests/test_ingest.py; live dry-run parsed 84/85 real local sessions Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
50 lines
1.6 KiB
Python
50 lines
1.6 KiB
Python
"""Per-source ingest cursors (design §6; T06).
|
|
|
|
Tracks ``(path -> size, mtime)`` so sweeps re-ingest only changed/grown files.
|
|
Persisted as a small JSON sidecar. Ingest itself is idempotent on
|
|
``(session_uid, seq)`` in the store, so the cursor is an optimization, not a
|
|
correctness requirement — a lost cursor just means a full (still-idempotent)
|
|
re-scan.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
from typing import Optional
|
|
|
|
|
|
class Cursors:
|
|
def __init__(self, path: str):
|
|
self.path = path
|
|
self._data: dict[str, dict] = {}
|
|
if os.path.exists(path):
|
|
try:
|
|
with open(path, "r", encoding="utf-8") as f:
|
|
self._data = json.load(f)
|
|
except (OSError, ValueError):
|
|
self._data = {}
|
|
|
|
def is_changed(self, file_path: str) -> bool:
|
|
"""True if the file is new or has changed size/mtime since last seen."""
|
|
try:
|
|
stat = os.stat(file_path)
|
|
except OSError:
|
|
return False
|
|
prev = self._data.get(file_path)
|
|
return prev is None or prev.get("size") != stat.st_size or prev.get("mtime") != stat.st_mtime
|
|
|
|
def mark(self, file_path: str) -> None:
|
|
try:
|
|
stat = os.stat(file_path)
|
|
except OSError:
|
|
return
|
|
self._data[file_path] = {"size": stat.st_size, "mtime": stat.st_mtime}
|
|
|
|
def save(self) -> None:
|
|
os.makedirs(os.path.dirname(self.path) or ".", exist_ok=True)
|
|
tmp = self.path + ".tmp"
|
|
with open(tmp, "w", encoding="utf-8") as f:
|
|
json.dump(self._data, f)
|
|
os.replace(tmp, self.path)
|