From 951b24300d6065115fda464a94ede4405906631e Mon Sep 17 00:00:00 2001 From: tegwick Date: Tue, 16 Jun 2026 01:55:06 +0200 Subject: [PATCH] feat(views): wikilink + red-link model (WP-0010 T1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A CommonMark wikilink extension: extract [[Target]] / [[Target|label]] from a page body (skipping fenced + inline code, preserving offsets), and resolve each target through the union — resolved is a link, unresolved is a createable red-link (never a dropped reference). CamelCase auto-linking is off by default, opt-in per space, and never double-counts a target already inside [[...]]. Link model + resolution are core; rendering stays L6. New views/ package. Co-Authored-By: Claude Opus 4.8 --- src/shard_wiki/views/__init__.py | 21 ++++++++ src/shard_wiki/views/links.py | 91 ++++++++++++++++++++++++++++++++ tests/test_views_links.py | 69 ++++++++++++++++++++++++ 3 files changed, 181 insertions(+) create mode 100644 src/shard_wiki/views/__init__.py create mode 100644 src/shard_wiki/views/links.py create mode 100644 tests/test_views_links.py diff --git a/src/shard_wiki/views/__init__.py b/src/shard_wiki/views/__init__.py new file mode 100644 index 0000000..7593bd3 --- /dev/null +++ b/src/shard_wiki/views/__init__.py @@ -0,0 +1,21 @@ +"""views/ — derived, recomputable, provenance-carrying read views over the union (§8.4). + +All views here are *derived tier*: pure functions of the attached shards plus the coordination-log +fold, storing nothing canonical (SHARD-WP-0011 makes them incrementally maintainable). Presentation +stays out of core (L6) — these produce models, never rendered output. Per the dependency rule this +package imports down (union/model/coordination/provenance) and is imported only by the orchestrator. +""" + +from shard_wiki.views.links import ( + ResolvedLink, + WikiLink, + extract_links, + resolve_links, +) + +__all__ = [ + "WikiLink", + "ResolvedLink", + "extract_links", + "resolve_links", +] diff --git a/src/shard_wiki/views/links.py b/src/shard_wiki/views/links.py new file mode 100644 index 0000000..ed0fecb --- /dev/null +++ b/src/shard_wiki/views/links.py @@ -0,0 +1,91 @@ +"""Wikilink + red-link model (SHARD-WP-0010 T1; FederationRequirements ADR-06). + +A CommonMark *wikilink extension*: ``[[Target]]`` and ``[[Target|label]]`` are extracted from a +page body and each target is resolved through the union (ADR-01). A target that resolves is a +**link**; one that does not is a **red-link** — a createable hole (UC-23), never a dropped +reference (union without erasure). CamelCase auto-linking (``WikiWord``) is **off by default** and +opt-in per space, since bare CamelCase is noisy and policy-laden. + +The link *model and resolution* are core; turning a :class:`ResolvedLink` into an ```` (or a +red anchor) is L6 presentation and lives outside this package. Link spans are byte/char offsets in +the body so a later layer can address them precisely. +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass + +from shard_wiki.union import Resolution, UnionGraph + +__all__ = ["WikiLink", "ResolvedLink", "extract_links", "resolve_links"] + +_WIKILINK_RE = re.compile(r"\[\[\s*([^\]|]+?)\s*(?:\|\s*([^\]]+?)\s*)?\]\]") +# A WikiWord: ≥2 capitalized alphanumeric segments run together (e.g. FrontPage, WikiWord). +_CAMELCASE_RE = re.compile(r"\b([A-Z][a-z0-9]+(?:[A-Z][a-z0-9]+)+)\b") +_FENCED_RE = re.compile(r"```.*?```", re.DOTALL) +_INLINE_CODE_RE = re.compile(r"`[^`\n]*`") + + +@dataclass(frozen=True, slots=True) +class WikiLink: + """One extracted reference. ``target`` is the resolve key; ``label`` is the display text (or + None to use the target); ``span`` is the ``[start, end)`` offset of the whole token in the body; + ``auto`` marks a CamelCase auto-link (vs an explicit ``[[...]]``).""" + + target: str + label: str | None + span: tuple[int, int] + auto: bool = False + + @property + def text(self) -> str: + return self.label or self.target + + +@dataclass(frozen=True, slots=True) +class ResolvedLink: + """A :class:`WikiLink` paired with its union :class:`Resolution` (the link's truth status).""" + + link: WikiLink + resolution: Resolution + + @property + def is_red_link(self) -> bool: + return self.resolution.is_red_link + + +def _mask(body: str, pattern: re.Pattern[str]) -> str: + """Blank out ``pattern`` matches with equal-length spaces so later scans skip them while every + surviving match keeps its true offset.""" + return pattern.sub(lambda m: " " * len(m.group(0)), body) + + +def extract_links(body: str, *, camelcase: bool = False) -> tuple[WikiLink, ...]: + """Extract wikilinks from ``body`` in document order, skipping fenced/inline code. + + With ``camelcase=True`` (per-space opt-in), bare ``WikiWord`` tokens outside code and outside + existing ``[[...]]`` also become links. + """ + scan = _mask(_mask(body, _FENCED_RE), _INLINE_CODE_RE) + links: list[WikiLink] = [] + for m in _WIKILINK_RE.finditer(scan): + links.append(WikiLink(target=m.group(1).strip(), label=m.group(2), span=m.span())) + + if camelcase: + # Mask explicit-link spans too, so a CamelCase target inside [[...]] isn't double-counted. + cc_scan = _mask(scan, _WIKILINK_RE) + for m in _CAMELCASE_RE.finditer(cc_scan): + links.append(WikiLink(target=m.group(1), label=None, span=m.span(), auto=True)) + + return tuple(sorted(links, key=lambda link: link.span[0])) + + +def resolve_links( + union: UnionGraph, body: str, *, camelcase: bool = False +) -> tuple[ResolvedLink, ...]: + """Extract and resolve every link in ``body`` against ``union`` (link vs red-link, ADR-01).""" + return tuple( + ResolvedLink(link, union.resolve(link.target)) + for link in extract_links(body, camelcase=camelcase) + ) diff --git a/tests/test_views_links.py b/tests/test_views_links.py new file mode 100644 index 0000000..951edae --- /dev/null +++ b/tests/test_views_links.py @@ -0,0 +1,69 @@ +"""Tests for the wikilink + red-link model (SHARD-WP-0010 T1).""" + +from shard_wiki.adapters import FolderAdapter +from shard_wiki.union import ResolutionKind, UnionGraph +from shard_wiki.views import extract_links, resolve_links + + +def _shard(tmp_path, name, files): + root = tmp_path / name + for rel, text in files.items(): + p = root / rel + p.parent.mkdir(parents=True, exist_ok=True) + p.write_text(text, encoding="utf-8") + return FolderAdapter(name, root) + + +def test_extracts_plain_and_labelled_links(): + links = extract_links("See [[Home]] and [[Index|the index]].") + assert [(link.target, link.label, link.text) for link in links] == [ + ("Home", None, "Home"), + ("Index", "the index", "the index"), + ] + + +def test_links_carry_body_offsets_in_document_order(): + body = "a [[One]] b [[Two]]" + links = extract_links(body) + assert [link.target for link in links] == ["One", "Two"] + s, e = links[0].span + assert body[s:e] == "[[One]]" + + +def test_code_regions_are_not_scanned(): + body = "real [[Home]]\n```\n[[NotALink]]\n```\ninline `[[AlsoNot]]` done" + targets = [link.target for link in extract_links(body)] + assert targets == ["Home"] + + +def test_camelcase_off_by_default_then_opt_in(): + body = "FrontPage links to [[Home]]" + assert [link.target for link in extract_links(body)] == ["Home"] # CamelCase ignored + on = extract_links(body, camelcase=True) + assert {link.target for link in on} == {"FrontPage", "Home"} + assert next(link for link in on if link.target == "FrontPage").auto is True + + +def test_camelcase_does_not_double_count_inside_explicit_link(): + # [[FrontPage]] is one explicit link, not also a CamelCase auto-link. + links = extract_links("[[FrontPage]]", camelcase=True) + assert len(links) == 1 + assert links[0].auto is False + + +def test_resolve_links_distinguishes_link_from_red_link(tmp_path): + u = UnionGraph("space") + u.attach(_shard(tmp_path, "shardA", {"Home.md": "home"})) + resolved = resolve_links(u, "[[Home]] and [[Ghost]]") + by_target = {r.link.target: r for r in resolved} + assert by_target["Home"].resolution.kind is ResolutionKind.SINGLE + assert by_target["Home"].is_red_link is False + assert by_target["Ghost"].is_red_link is True # unresolved → createable red-link + + +def test_resolve_links_surfaces_chorus(tmp_path): + u = UnionGraph("space") + u.attach(_shard(tmp_path, "shardA", {"Home.md": "A"})) + u.attach(_shard(tmp_path, "shardB", {"Home.md": "B"})) + (resolved,) = resolve_links(u, "[[Home]]") + assert resolved.resolution.kind is ResolutionKind.CHORUS