generated from coulomb/repo-seed
feat(coordination): git-backed DecisionLog event store (WP-0009 T1)
Factor DecisionLog storage behind an EventStore abstraction: InMemoryEventStore stays the default/test double, GitEventStore makes the coordination log git-addressable. Each space is a ref (refs/spaces/<sha1>); append writes an immutable one-blob commit and advances the ref under compare-and-swap, so the commit chain is the per-space total order and a racing appender can never fork the log. Deterministic stable-JSON event serialization. Zero runtime deps (git CLI via subprocess). API and fold unchanged across backends. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -4,8 +4,13 @@ from shard_wiki.coordination.decision_log import (
|
|||||||
CoordinationState,
|
CoordinationState,
|
||||||
DecisionEvent,
|
DecisionEvent,
|
||||||
DecisionLog,
|
DecisionLog,
|
||||||
|
EventStore,
|
||||||
EventType,
|
EventType,
|
||||||
|
InMemoryEventStore,
|
||||||
|
deserialize_event,
|
||||||
|
serialize_event,
|
||||||
)
|
)
|
||||||
|
from shard_wiki.coordination.git_event_store import GitEventStore
|
||||||
from shard_wiki.coordination.overlay import (
|
from shard_wiki.coordination.overlay import (
|
||||||
ApplyResult,
|
ApplyResult,
|
||||||
ApplyStatus,
|
ApplyStatus,
|
||||||
@@ -19,6 +24,11 @@ __all__ = [
|
|||||||
"DecisionEvent",
|
"DecisionEvent",
|
||||||
"EventType",
|
"EventType",
|
||||||
"CoordinationState",
|
"CoordinationState",
|
||||||
|
"EventStore",
|
||||||
|
"InMemoryEventStore",
|
||||||
|
"GitEventStore",
|
||||||
|
"serialize_event",
|
||||||
|
"deserialize_event",
|
||||||
"Overlay",
|
"Overlay",
|
||||||
"OverlayEngine",
|
"OverlayEngine",
|
||||||
"ApplyStatus",
|
"ApplyStatus",
|
||||||
|
|||||||
@@ -3,22 +3,36 @@
|
|||||||
Coordination-canonical state (overlays, equivalence bindings, aliases, merges, forks) is an
|
Coordination-canonical state (overlays, equivalence bindings, aliases, merges, forks) is an
|
||||||
**append-only decision log**, not a mutable file; the queryable *current* state is a **derived
|
**append-only decision log**, not a mutable file; the queryable *current* state is a **derived
|
||||||
fold** of the log (tier-3 disposable). The log is **totally ordered per space** via a single
|
fold** of the log (tier-3 disposable). The log is **totally ordered per space** via a single
|
||||||
**append authority** — here an in-process counter; a git-backed, lease-held authority is a later
|
**append authority**. That total order is what gives read-your-writes across readers (§8.6).
|
||||||
binding. That total order is what gives read-your-writes across readers (§8.6).
|
|
||||||
|
Storage lives behind :class:`EventStore`: :class:`InMemoryEventStore` is the default test double
|
||||||
|
(an in-process counter); :class:`~shard_wiki.coordination.git_event_store.GitEventStore` is the
|
||||||
|
git-addressable backend (SHARD-WP-0009). The :class:`DecisionLog` API and the :meth:`fold` are
|
||||||
|
identical across backends — only storage + the concurrency model differ.
|
||||||
|
|
||||||
`derived = f(canonical)`: :class:`CoordinationState` is always reproducible by replaying the log.
|
`derived = f(canonical)`: :class:`CoordinationState` is always reproducible by replaying the log.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
from collections.abc import Mapping
|
from collections.abc import Mapping
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from types import MappingProxyType
|
from types import MappingProxyType
|
||||||
from typing import Any
|
from typing import Any, Protocol, runtime_checkable
|
||||||
|
|
||||||
__all__ = ["EventType", "DecisionEvent", "CoordinationState", "DecisionLog"]
|
__all__ = [
|
||||||
|
"EventType",
|
||||||
|
"DecisionEvent",
|
||||||
|
"CoordinationState",
|
||||||
|
"EventStore",
|
||||||
|
"InMemoryEventStore",
|
||||||
|
"DecisionLog",
|
||||||
|
"serialize_event",
|
||||||
|
"deserialize_event",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
class EventType(Enum):
|
class EventType(Enum):
|
||||||
@@ -63,10 +77,57 @@ class CoordinationState:
|
|||||||
return frozenset({identity})
|
return frozenset({identity})
|
||||||
|
|
||||||
|
|
||||||
class DecisionLog:
|
def serialize_event(event: DecisionEvent) -> bytes:
|
||||||
"""In-memory append-only log, totally ordered per space (the append authority for a process).
|
"""Deterministic, stable-JSON wire form of an event (same bytes for equal events, any process).
|
||||||
|
|
||||||
A later binding swaps the storage for git + a per-space lease without changing this API.
|
Sorted keys + compact separators make the serialization canonical, so a git object hashed from
|
||||||
|
it is reproducible — the basis for content-addressable, comparable logs across backends.
|
||||||
|
"""
|
||||||
|
obj = {
|
||||||
|
"seq": event.seq,
|
||||||
|
"space": event.space,
|
||||||
|
"type": event.type.value,
|
||||||
|
"payload": event.payload,
|
||||||
|
"actor": event.actor,
|
||||||
|
"timestamp": event.timestamp.isoformat(),
|
||||||
|
}
|
||||||
|
return json.dumps(obj, sort_keys=True, separators=(",", ":"), ensure_ascii=False).encode()
|
||||||
|
|
||||||
|
|
||||||
|
def deserialize_event(data: bytes | str) -> DecisionEvent:
|
||||||
|
"""Inverse of :func:`serialize_event` — round-trips an event byte-for-byte by field."""
|
||||||
|
obj = json.loads(data)
|
||||||
|
return DecisionEvent(
|
||||||
|
seq=obj["seq"],
|
||||||
|
space=obj["space"],
|
||||||
|
type=EventType(obj["type"]),
|
||||||
|
payload=obj["payload"],
|
||||||
|
actor=obj["actor"],
|
||||||
|
timestamp=datetime.fromisoformat(obj["timestamp"]),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@runtime_checkable
|
||||||
|
class EventStore(Protocol):
|
||||||
|
"""Append-only, per-space ordered storage behind :class:`DecisionLog`.
|
||||||
|
|
||||||
|
Two bindings exist: :class:`InMemoryEventStore` (default/test double) and
|
||||||
|
:class:`~shard_wiki.coordination.git_event_store.GitEventStore` (git-addressable). Both assign
|
||||||
|
a per-space monotonic ``seq`` at the log head and guarantee read-your-writes for their reach
|
||||||
|
(in-process for memory; cross-process for git).
|
||||||
|
"""
|
||||||
|
|
||||||
|
def append(
|
||||||
|
self, space: str, type: EventType, payload: Mapping[str, Any], actor: str | None = None
|
||||||
|
) -> DecisionEvent: ...
|
||||||
|
|
||||||
|
def events(self, space: str) -> tuple[DecisionEvent, ...]: ...
|
||||||
|
|
||||||
|
|
||||||
|
class InMemoryEventStore:
|
||||||
|
"""In-process append-only store, totally ordered per space (the append authority for a process).
|
||||||
|
|
||||||
|
The default test double; the git backend preserves this exact contract on durable storage.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
@@ -84,10 +145,33 @@ class DecisionLog:
|
|||||||
self._events.setdefault(space, []).append(event)
|
self._events.setdefault(space, []).append(event)
|
||||||
return event
|
return event
|
||||||
|
|
||||||
|
def events(self, space: str) -> tuple[DecisionEvent, ...]:
|
||||||
|
return tuple(self._events.get(space, ()))
|
||||||
|
|
||||||
|
|
||||||
|
class DecisionLog:
|
||||||
|
"""Append-only decision log, totally ordered per space, with a derived :meth:`fold`.
|
||||||
|
|
||||||
|
Storage is delegated to an :class:`EventStore` (default :class:`InMemoryEventStore`); swapping
|
||||||
|
in the git backend changes only durability + the concurrency model, not this API or the fold.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, store: EventStore | None = None) -> None:
|
||||||
|
self._store: EventStore = store if store is not None else InMemoryEventStore()
|
||||||
|
|
||||||
|
def append(
|
||||||
|
self,
|
||||||
|
space: str,
|
||||||
|
type: EventType,
|
||||||
|
payload: Mapping[str, Any],
|
||||||
|
actor: str | None = None,
|
||||||
|
) -> DecisionEvent:
|
||||||
|
return self._store.append(space, type, payload, actor=actor)
|
||||||
|
|
||||||
def events(self, space: str) -> tuple[DecisionEvent, ...]:
|
def events(self, space: str) -> tuple[DecisionEvent, ...]:
|
||||||
"""The space's events in append (total) order. Read-your-writes: a just-appended event
|
"""The space's events in append (total) order. Read-your-writes: a just-appended event
|
||||||
is present immediately."""
|
is present immediately."""
|
||||||
return tuple(self._events.get(space, ()))
|
return self._store.events(space)
|
||||||
|
|
||||||
def fold(self, space: str) -> CoordinationState:
|
def fold(self, space: str) -> CoordinationState:
|
||||||
"""Replay the log into current coordination state (derived = f(log))."""
|
"""Replay the log into current coordination state (derived = f(log))."""
|
||||||
|
|||||||
154
src/shard_wiki/coordination/git_event_store.py
Normal file
154
src/shard_wiki/coordination/git_event_store.py
Normal file
@@ -0,0 +1,154 @@
|
|||||||
|
"""GitEventStore — a git-addressable binding of :class:`EventStore` (SHARD-WP-0009 T1).
|
||||||
|
|
||||||
|
Each space is a ref (``refs/spaces/<sha1(space)>``); each ``append`` writes the event as an
|
||||||
|
immutable git object (a one-blob tree committed onto the ref) and advances the ref. The commit
|
||||||
|
chain *is* the totally ordered log: ``seq`` is the depth, ``events`` walks first-parent from the
|
||||||
|
head oldest→newest. Coordination-canonical state therefore inherits git's history / patch /
|
||||||
|
review / backup affordances (I-6) and is read-your-writes correct across processes.
|
||||||
|
|
||||||
|
The total order is enforced at storage by a **compare-and-swap** ref update
|
||||||
|
(``git update-ref <ref> <new> <old>``): two appenders racing off the same head — the loser's CAS
|
||||||
|
fails and it retries off the new head, so a non-holder can never fork the log. The lease layer
|
||||||
|
(T2) sits *above* this as the append-authority policy; CAS is the mechanism that makes it safe.
|
||||||
|
|
||||||
|
Implemented over the ``git`` CLI through :mod:`subprocess` — zero runtime dependencies.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
from collections.abc import Mapping
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from shard_wiki.coordination.decision_log import (
|
||||||
|
DecisionEvent,
|
||||||
|
EventType,
|
||||||
|
deserialize_event,
|
||||||
|
serialize_event,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = ["GitEventStore"]
|
||||||
|
|
||||||
|
# Fixed identity so commit objects are reproducible and never prompt for git config; the event's
|
||||||
|
# own timestamp/actor carry the real provenance, the commit is just the ordered container.
|
||||||
|
_GIT_IDENTITY = {
|
||||||
|
"GIT_AUTHOR_NAME": "shard-wiki",
|
||||||
|
"GIT_AUTHOR_EMAIL": "coordination@shard-wiki",
|
||||||
|
"GIT_COMMITTER_NAME": "shard-wiki",
|
||||||
|
"GIT_COMMITTER_EMAIL": "coordination@shard-wiki",
|
||||||
|
}
|
||||||
|
_EVENT_PATH = "event.json"
|
||||||
|
_MAX_CAS_RETRIES = 50
|
||||||
|
|
||||||
|
|
||||||
|
class GitEventStore:
|
||||||
|
"""Git-backed, append-only, per-space ordered event store (an :class:`EventStore`)."""
|
||||||
|
|
||||||
|
def __init__(self, repo_path: str | Path) -> None:
|
||||||
|
self.repo_path = Path(repo_path)
|
||||||
|
self.repo_path.mkdir(parents=True, exist_ok=True)
|
||||||
|
if not (self.repo_path / "HEAD").exists() and not (self.repo_path / ".git").exists():
|
||||||
|
self._git("init", "--quiet", str(self.repo_path), at_cwd=True)
|
||||||
|
|
||||||
|
# -- EventStore contract -------------------------------------------------
|
||||||
|
|
||||||
|
def append(
|
||||||
|
self,
|
||||||
|
space: str,
|
||||||
|
type: EventType,
|
||||||
|
payload: Mapping[str, Any],
|
||||||
|
actor: str | None = None,
|
||||||
|
) -> DecisionEvent:
|
||||||
|
"""Append one event, advancing the space ref under compare-and-swap (retry-on-race)."""
|
||||||
|
ref = self._ref(space)
|
||||||
|
for _ in range(_MAX_CAS_RETRIES):
|
||||||
|
head = self._head(ref)
|
||||||
|
seq = self._count(ref, head)
|
||||||
|
event = DecisionEvent(
|
||||||
|
seq=seq, space=space, type=type, payload=dict(payload), actor=actor
|
||||||
|
)
|
||||||
|
commit = self._commit_event(event, parent=head)
|
||||||
|
if self._cas_update(ref, new=commit, old=head):
|
||||||
|
return event
|
||||||
|
raise RuntimeError(f"append contention on {space!r}: exhausted {_MAX_CAS_RETRIES} retries")
|
||||||
|
|
||||||
|
def events(self, space: str) -> tuple[DecisionEvent, ...]:
|
||||||
|
"""The space's events oldest→newest (append/total order)."""
|
||||||
|
ref = self._ref(space)
|
||||||
|
head = self._head(ref)
|
||||||
|
if head is None:
|
||||||
|
return ()
|
||||||
|
shas = self._git("rev-list", "--reverse", "--first-parent", ref).decode().split()
|
||||||
|
return tuple(
|
||||||
|
deserialize_event(self._git("cat-file", "blob", f"{sha}:{_EVENT_PATH}"))
|
||||||
|
for sha in shas
|
||||||
|
)
|
||||||
|
|
||||||
|
# -- git plumbing --------------------------------------------------------
|
||||||
|
|
||||||
|
def _commit_event(self, event: DecisionEvent, parent: str | None) -> str:
|
||||||
|
blob = self._git(
|
||||||
|
"hash-object", "-w", "--stdin", stdin=serialize_event(event)
|
||||||
|
).decode().strip()
|
||||||
|
tree = self._git(
|
||||||
|
"mktree", stdin=f"100644 blob {blob}\t{_EVENT_PATH}\n".encode()
|
||||||
|
).decode().strip()
|
||||||
|
args = ["commit-tree", tree, "-m", f"event {event.seq} {event.type.value}"]
|
||||||
|
if parent is not None:
|
||||||
|
args += ["-p", parent]
|
||||||
|
# Pin the commit date to the event's timestamp for reproducible objects.
|
||||||
|
date = event.timestamp.isoformat()
|
||||||
|
env = {**_GIT_IDENTITY, "GIT_AUTHOR_DATE": date, "GIT_COMMITTER_DATE": date}
|
||||||
|
return self._git(*args, env=env).decode().strip()
|
||||||
|
|
||||||
|
def _cas_update(self, ref: str, new: str, old: str | None) -> bool:
|
||||||
|
"""``git update-ref`` with the old value as a CAS guard (empty oldvalue == must-not-exist).
|
||||||
|
|
||||||
|
Returns False if the ref moved since we read ``old`` (lost the race) — the caller retries.
|
||||||
|
"""
|
||||||
|
result = self._run("update-ref", ref, new, old if old is not None else "")
|
||||||
|
return result.returncode == 0
|
||||||
|
|
||||||
|
def _head(self, ref: str) -> str | None:
|
||||||
|
result = self._run("rev-parse", "--verify", "--quiet", ref)
|
||||||
|
out = result.stdout.decode().strip()
|
||||||
|
return out or None
|
||||||
|
|
||||||
|
def _count(self, ref: str, head: str | None) -> int:
|
||||||
|
if head is None:
|
||||||
|
return 0
|
||||||
|
return int(self._git("rev-list", "--count", "--first-parent", ref).decode().strip())
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _ref(space: str) -> str:
|
||||||
|
return f"refs/spaces/{hashlib.sha1(space.encode()).hexdigest()}"
|
||||||
|
|
||||||
|
def _git(
|
||||||
|
self,
|
||||||
|
*args: str,
|
||||||
|
stdin: bytes | None = None,
|
||||||
|
env: dict | None = None,
|
||||||
|
at_cwd: bool = False,
|
||||||
|
) -> bytes:
|
||||||
|
result = self._run(*args, stdin=stdin, env=env, at_cwd=at_cwd, check=True)
|
||||||
|
return result.stdout
|
||||||
|
|
||||||
|
def _run(
|
||||||
|
self,
|
||||||
|
*args: str,
|
||||||
|
stdin: bytes | None = None,
|
||||||
|
env: dict | None = None,
|
||||||
|
at_cwd: bool = False,
|
||||||
|
check: bool = False,
|
||||||
|
) -> subprocess.CompletedProcess:
|
||||||
|
base = ["git"] if at_cwd else ["git", "-C", str(self.repo_path)]
|
||||||
|
return subprocess.run(
|
||||||
|
[*base, *args],
|
||||||
|
input=stdin,
|
||||||
|
capture_output=True,
|
||||||
|
env={**os.environ, **(env or {})},
|
||||||
|
check=check,
|
||||||
|
)
|
||||||
84
tests/test_git_event_store.py
Normal file
84
tests/test_git_event_store.py
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
"""Tests for the git-backed event store (SHARD-WP-0009 T1).
|
||||||
|
|
||||||
|
The git backend must satisfy the same EventStore contract as the in-memory one (round-trip,
|
||||||
|
ordering, determinism) while making the log git-addressable.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from shard_wiki.coordination import (
|
||||||
|
DecisionLog,
|
||||||
|
EventType,
|
||||||
|
GitEventStore,
|
||||||
|
InMemoryEventStore,
|
||||||
|
deserialize_event,
|
||||||
|
serialize_event,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def git_store(tmp_path):
|
||||||
|
return GitEventStore(tmp_path / "coord")
|
||||||
|
|
||||||
|
|
||||||
|
def test_append_git_read_round_trips(git_store):
|
||||||
|
log = DecisionLog(git_store)
|
||||||
|
ev = log.append("s", EventType.ALIAS_SET, {"alias": "Home", "target": "shardA:Index"})
|
||||||
|
(read,) = log.events("s")
|
||||||
|
assert read.seq == ev.seq == 0
|
||||||
|
assert read.space == "s"
|
||||||
|
assert read.type is EventType.ALIAS_SET
|
||||||
|
assert read.payload == {"alias": "Home", "target": "shardA:Index"}
|
||||||
|
|
||||||
|
|
||||||
|
def test_ordering_preserved_and_per_space_monotonic(git_store):
|
||||||
|
log = DecisionLog(git_store)
|
||||||
|
log.append("a", EventType.ALIAS_SET, {"alias": "X", "target": "s:1"})
|
||||||
|
log.append("a", EventType.ALIAS_SET, {"alias": "Y", "target": "s:2"})
|
||||||
|
log.append("b", EventType.ALIAS_SET, {"alias": "Z", "target": "s:3"})
|
||||||
|
assert [e.seq for e in log.events("a")] == [0, 1]
|
||||||
|
assert [e.payload["alias"] for e in log.events("a")] == ["X", "Y"]
|
||||||
|
assert [e.seq for e in log.events("b")] == [0] # independent ref/ordering
|
||||||
|
|
||||||
|
|
||||||
|
def test_each_append_is_a_git_commit(git_store):
|
||||||
|
log = DecisionLog(git_store)
|
||||||
|
log.append("s", EventType.BINDING_MADE, {"members": ["a", "b"]})
|
||||||
|
log.append("s", EventType.PAGE_FORKED, {"source": "a", "fork": "c"})
|
||||||
|
ref = GitEventStore._ref("s")
|
||||||
|
count = subprocess.run(
|
||||||
|
["git", "-C", str(git_store.repo_path), "rev-list", "--count", ref],
|
||||||
|
capture_output=True, text=True, check=True,
|
||||||
|
).stdout.strip()
|
||||||
|
assert count == "2" # one immutable commit object per append
|
||||||
|
|
||||||
|
|
||||||
|
def test_deterministic_serialization_is_stable_and_sorted():
|
||||||
|
log = InMemoryEventStore()
|
||||||
|
ev = log.append("s", EventType.ALIAS_SET, {"target": "z", "alias": "a"})
|
||||||
|
blob = serialize_event(ev)
|
||||||
|
assert serialize_event(ev) == blob # stable across calls
|
||||||
|
assert blob.index(b'"alias"') < blob.index(b'"target"') # payload keys sorted, not insertion
|
||||||
|
assert deserialize_event(blob).payload == {"alias": "a", "target": "z"}
|
||||||
|
|
||||||
|
|
||||||
|
def test_git_fold_matches_in_memory_fold(git_store):
|
||||||
|
events = [
|
||||||
|
(EventType.ALIAS_SET, {"alias": "Home", "target": "shardA:Index"}),
|
||||||
|
(EventType.BINDING_MADE, {"members": ["a", "b"]}),
|
||||||
|
(EventType.BINDING_MADE, {"members": ["b", "c"]}),
|
||||||
|
(EventType.ALIAS_SET, {"alias": "Home", "target": "shardB:Main"}),
|
||||||
|
]
|
||||||
|
mem = DecisionLog(InMemoryEventStore())
|
||||||
|
git = DecisionLog(git_store)
|
||||||
|
for typ, payload in events:
|
||||||
|
mem.append("s", typ, payload)
|
||||||
|
git.append("s", typ, payload)
|
||||||
|
assert git.fold("s").aliases == mem.fold("s").aliases
|
||||||
|
assert git.fold("s").equivalence_groups == mem.fold("s").equivalence_groups
|
||||||
|
|
||||||
|
|
||||||
|
def test_default_decisionlog_is_in_memory():
|
||||||
|
assert isinstance(DecisionLog()._store, InMemoryEventStore)
|
||||||
Reference in New Issue
Block a user