"""GitShardAdapter — a second substrate: git-as-store (SHARD-WP-0012; TSD §A.3 git-IS-store). The home case where **git is the store *and* the journal**. Tracked ``*.md`` paths are the page keys; the working-tree file is the body; a page's ``source_rev`` is the **commit sha of the last commit touching its path** (per-path, so an edit to one page never drifts another). The declared profile is *git-IS-store ⟹ substrate=git ∧ history=git-native* — the implication rule the capability model enforces (§6.5), validated at registration like any other binding. This adapter adds **no core changes**: it implements the same :class:`ShardAdapter` contract the folder adapter does, proving "write an adapter + declare a verified profile" is the whole cost of a new substrate (capability-as-data, I-3). Built on the ``git`` CLI via subprocess — zero new deps. """ from __future__ import annotations import os import subprocess from collections.abc import Iterable from dataclasses import dataclass from pathlib import Path from shard_wiki.adapters.contract import ShardAdapter from shard_wiki.model import ( AccessGrant, Addressing, AttachmentMode, CapabilityProfile, ContentOpacity, History, Identity, MergeModel, NativeQuery, NotSupported, OperationalEnvelope, Page, Placement, Substrate, Translation, Verb, WriteGranularity, ) from shard_wiki.provenance import Liveness, ProvenanceEnvelope, Staleness __all__ = ["GitShardAdapter", "PageRevision"] @dataclass(frozen=True, slots=True) class PageRevision: """One adopted git-native revision of a page: the commit sha and its subject line.""" sha: str message: str _GIT_IDENTITY = { "GIT_AUTHOR_NAME": "shard-wiki", "GIT_AUTHOR_EMAIL": "shard@shard-wiki", "GIT_COMMITTER_NAME": "shard-wiki", "GIT_COMMITTER_EMAIL": "shard@shard-wiki", } class GitShardAdapter(ShardAdapter): """A shard whose store is a git repo: keys are tracked ``*.md`` paths, revs are commit shas.""" def __init__(self, shard_id: str, repo_path: str | Path, writable: bool = False) -> None: self._shard_id = shard_id self._repo = Path(repo_path) self._writable = writable self._repo.mkdir(parents=True, exist_ok=True) if not (self._repo / ".git").exists(): self._git("init", "--quiet") @property def shard_id(self) -> str: return self._shard_id def profile(self) -> CapabilityProfile: # VERSION is always available — a git-IS-store has git-native history to adopt (§A.5), # read-only or not. WRITE (= commit, PER_PAGE) is added only in writable mode. verbs = {Verb.READ, Verb.VERSION} granularity = WriteGranularity.NONE if self._writable: verbs |= {Verb.WRITE} granularity = WriteGranularity.PER_PAGE return CapabilityProfile( substrate=Substrate.GIT, attachment_mode=AttachmentMode.GIT_IS_STORE, write_granularity=granularity, content_opacity=ContentOpacity.TRANSPARENT, operational_envelope=OperationalEnvelope.LOCAL_UNBOUNDED, access_grant=AccessGrant.OPEN, liveness=Liveness.STATIC, history=History.GIT_NATIVE, # git-is-store ⟹ git-native (§6.5) merge_model=MergeModel.GIT_TEXT, addressing=Addressing.PATH, native_query=NativeQuery.NONE, translation=Translation.NATIVE, supported_verbs=frozenset(verbs), ).validate() def write(self, key: str, body: str) -> Page: """Write = **commit**: stage the file and commit it (skip a no-op so no empty commit), returning the page at the new sha. Drift detection rides on ``current_rev`` = that sha.""" if not self._writable: raise NotSupported(f"{type(self).__name__} is read-only") rel = f"{key}.md" path = self._path_for(key) path.parent.mkdir(parents=True, exist_ok=True) path.write_text(body, encoding="utf-8") self._git("add", "--", rel) if self._run("diff", "--cached", "--quiet").returncode != 0: # staged changes present self._git("commit", "-m", f"write {rel}", env=_GIT_IDENTITY) return self.read(key) def keys(self) -> Iterable[str]: out = self._git("ls-files", "*.md").decode() for line in out.splitlines(): yield line[: -len(".md")] if line.endswith(".md") else line def read(self, key: str) -> Page: path = self._path_for(key) if not path.is_file(): raise KeyError(key) rev = self.current_rev(key) return Page( identity=Identity(self._shard_id, key), body=path.read_text(encoding="utf-8"), envelope=ProvenanceEnvelope( source_shard=self._shard_id, liveness=Liveness.STATIC, staleness=Staleness.FRESH, source_rev=rev, lineage="git-native", ), placements=(Placement(self._shard_id, f"{key}.md"),), ) def current_rev(self, key: str) -> str | None: """The sha of the last commit touching ``key``'s path (per-path drift token), or None.""" rel = f"{key}.md" if not self._path_for(key).is_file(): return None sha = self._git("log", "-1", "--format=%H", "--", rel).decode().strip() return sha or None def history(self, key: str) -> tuple[PageRevision, ...]: """Adopt git-native history (§A.5): the commit list for ``key``'s path, newest-first. VERSION-gated; raises ``KeyError`` for an unknown page. Each revision is a commit sha + subject — the native log surfaced through the contract, not re-implemented. """ if not self.profile().supports(Verb.VERSION): raise NotSupported(f"{type(self).__name__} does not support version") if not self._path_for(key).is_file(): raise KeyError(key) out = self._git("log", "--format=%H%x00%s", "--", f"{key}.md").decode() revisions = [] for line in out.splitlines(): sha, _, message = line.partition("\x00") revisions.append(PageRevision(sha=sha, message=message)) return tuple(revisions) # -- git plumbing -------------------------------------------------------- def _path_for(self, key: str) -> Path: return self._repo / f"{key}.md" def _git(self, *args: str, stdin: bytes | None = None, env: dict | None = None) -> bytes: return self._run(*args, stdin=stdin, env=env, check=True).stdout def _run( self, *args: str, stdin: bytes | None = None, env: dict | None = None, check: bool = False ) -> subprocess.CompletedProcess: return subprocess.run( ["git", "-C", str(self._repo), *args], input=stdin, capture_output=True, env={**os.environ, **(env or {})}, check=check, )