"""Per-source ingest cursors (design §6; T06). Tracks ``(path -> size, mtime)`` so sweeps re-ingest only changed/grown files. Persisted as a small JSON sidecar. Ingest itself is idempotent on ``(session_uid, seq)`` in the store, so the cursor is an optimization, not a correctness requirement — a lost cursor just means a full (still-idempotent) re-scan. """ from __future__ import annotations import json import os from typing import Optional class Cursors: def __init__(self, path: str): self.path = path self._data: dict[str, dict] = {} if os.path.exists(path): try: with open(path, "r", encoding="utf-8") as f: self._data = json.load(f) except (OSError, ValueError): self._data = {} def is_changed(self, file_path: str) -> bool: """True if the file is new or has changed size/mtime since last seen.""" try: stat = os.stat(file_path) except OSError: return False prev = self._data.get(file_path) return prev is None or prev.get("size") != stat.st_size or prev.get("mtime") != stat.st_mtime def mark(self, file_path: str) -> None: try: stat = os.stat(file_path) except OSError: return self._data[file_path] = {"size": stat.st_size, "mtime": stat.st_mtime} def save(self) -> None: os.makedirs(os.path.dirname(self.path) or ".", exist_ok=True) tmp = self.path + ".tmp" with open(tmp, "w", encoding="utf-8") as f: json.dump(self._data, f) os.replace(tmp, self.path)