feat(engine): page-store kernel skeleton (WP-0014 T1)

engine/ package: EngineKernel (in-process page store with per-page version
history; create/edit-as-version, recoverable delete-tombstone, keys, current_rev)
+ wikilink extraction + in-shard link resolution / red-link detection (EC-1..EC-4).
Reuses model/provenance; git-IS-store backing slots in later. 6 tests green,
pyflakes clean, full suite green. Marks T1 done.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-15 23:57:31 +02:00
parent e62560eb5a
commit 2100e956aa
5 changed files with 190 additions and 1 deletions

View File

@@ -0,0 +1,11 @@
"""engine/ — shard-wiki's native, headless wiki engine (a canonical-mode shard backend).
A small page-store kernel + a typed-extension runtime (WikiEngineCoreArchitecture). The engine
is *one shard*: it is consumed by the orchestrator only via its `EngineShardAdapter`; it never
imports the derived tier (`union`/`projection`).
"""
from shard_wiki.engine.kernel import EngineKernel
from shard_wiki.engine.links import extract_wikilinks
__all__ = ["EngineKernel", "extract_wikilinks"]

View File

@@ -0,0 +1,87 @@
"""Engine kernel — the small page-store core (WikiEngineCoreArchitecture §3, EC-1…EC-4).
The irreducible engine: author/read/edit pages (edit = a new version; delete = a recoverable
tombstone — history is the floor, I-10), enumerate keys, and resolve `[[wikilinks]]` (red-link =
an unresolved target). No feature beyond this c2-minimum lives in the kernel; everything else is
a typed extension (E-3).
Storage is intentionally simple here (in-memory version history); the git-IS-store backing
(SHARD-WP-0009/0012) slots in behind the same API later. The kernel reuses the page model and
provenance leaf; it does not redefine them.
"""
from __future__ import annotations
from collections.abc import Iterable
from datetime import datetime, timezone
from shard_wiki.engine.links import extract_wikilinks
from shard_wiki.model import Identity, Page, Placement
from shard_wiki.provenance import Liveness, ProvenanceEnvelope, Staleness
__all__ = ["EngineKernel"]
class EngineKernel:
"""An in-process page store with per-page version history for one engine shard."""
def __init__(self, shard_id: str) -> None:
self.shard_id = shard_id
self._versions: dict[str, list[Page]] = {}
self._deleted: set[str] = set()
# --- write path (create/edit are one operation; both append a version) ---
def write(self, key: str, body: str) -> Page:
versions = self._versions.setdefault(key, [])
rev = str(len(versions) + 1)
page = Page(
identity=Identity(self.shard_id, key),
body=body,
envelope=ProvenanceEnvelope(
source_shard=self.shard_id,
liveness=Liveness.STATIC,
staleness=Staleness.FRESH,
source_rev=rev,
observed_at=datetime.now(tz=timezone.utc),
),
placements=(Placement(self.shard_id, key),),
)
versions.append(page)
self._deleted.discard(key)
return page
# --- read path ---
def exists(self, key: str) -> bool:
return key in self._versions and key not in self._deleted
def read(self, key: str) -> Page:
"""Latest version of a live page. Raises ``KeyError`` if absent or deleted."""
if not self.exists(key):
raise KeyError(key)
return self._versions[key][-1]
def keys(self) -> Iterable[str]:
return (k for k in sorted(self._versions) if k not in self._deleted)
def current_rev(self, key: str) -> str | None:
return self._versions[key][-1].envelope.source_rev if self.exists(key) else None
# --- history & recoverability (I-10): versions are retained across delete ---
def history(self, key: str) -> tuple[Page, ...]:
"""All versions ever written for ``key`` (oldest→newest), even after delete."""
return tuple(self._versions.get(key, ()))
def delete(self, key: str) -> None:
"""Tombstone a page (history retained; restore by writing again)."""
if key not in self._versions:
raise KeyError(key)
self._deleted.add(key)
# --- links (EC-4): resolution + red-link detection within this shard ---
def links(self, key: str) -> list[str]:
"""Wikilink targets in a page's current body."""
return extract_wikilinks(self.read(key).body)
def resolve_link(self, target: str) -> Identity | None:
"""Resolve a wikilink target to a live page identity, or ``None`` (a **red-link**)."""
return self.read(target).identity if self.exists(target) else None

View File

@@ -0,0 +1,25 @@
"""Wikilink extraction — the kernel's link primitive (WikiEngineCoreArchitecture EC-4).
`[[Target]]` and `[[Target|label]]`. CamelCase auto-linking is intentionally NOT here (it is an
opt-in concern per FederationRequirements ADR-06); the kernel only knows explicit wikilinks.
Link *resolution* (and red-link detection) is the kernel's job (it knows which keys exist);
*rendering* is a consumer concern (headless engine, no UI).
"""
from __future__ import annotations
import re
__all__ = ["extract_wikilinks"]
_WIKILINK = re.compile(r"\[\[([^\]|]+?)(?:\|[^\]]*)?\]\]")
def extract_wikilinks(body: str) -> list[str]:
"""Return the ordered, de-duplicated wikilink targets in ``body`` (label part dropped)."""
seen: dict[str, None] = {}
for m in _WIKILINK.finditer(body):
target = m.group(1).strip()
if target:
seen.setdefault(target, None)
return list(seen)