feat(views): wikilink + red-link model (WP-0010 T1)

A CommonMark wikilink extension: extract [[Target]] / [[Target|label]] from a
page body (skipping fenced + inline code, preserving offsets), and resolve each
target through the union — resolved is a link, unresolved is a createable
red-link (never a dropped reference). CamelCase auto-linking is off by default,
opt-in per space, and never double-counts a target already inside [[...]]. Link
model + resolution are core; rendering stays L6. New views/ package.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-16 01:55:06 +02:00
parent c731c96634
commit 951b24300d
3 changed files with 181 additions and 0 deletions

View File

@@ -0,0 +1,21 @@
"""views/ — derived, recomputable, provenance-carrying read views over the union (§8.4).
All views here are *derived tier*: pure functions of the attached shards plus the coordination-log
fold, storing nothing canonical (SHARD-WP-0011 makes them incrementally maintainable). Presentation
stays out of core (L6) — these produce models, never rendered output. Per the dependency rule this
package imports down (union/model/coordination/provenance) and is imported only by the orchestrator.
"""
from shard_wiki.views.links import (
ResolvedLink,
WikiLink,
extract_links,
resolve_links,
)
__all__ = [
"WikiLink",
"ResolvedLink",
"extract_links",
"resolve_links",
]

View File

@@ -0,0 +1,91 @@
"""Wikilink + red-link model (SHARD-WP-0010 T1; FederationRequirements ADR-06).
A CommonMark *wikilink extension*: ``[[Target]]`` and ``[[Target|label]]`` are extracted from a
page body and each target is resolved through the union (ADR-01). A target that resolves is a
**link**; one that does not is a **red-link** — a createable hole (UC-23), never a dropped
reference (union without erasure). CamelCase auto-linking (``WikiWord``) is **off by default** and
opt-in per space, since bare CamelCase is noisy and policy-laden.
The link *model and resolution* are core; turning a :class:`ResolvedLink` into an ``<a>`` (or a
red anchor) is L6 presentation and lives outside this package. Link spans are byte/char offsets in
the body so a later layer can address them precisely.
"""
from __future__ import annotations
import re
from dataclasses import dataclass
from shard_wiki.union import Resolution, UnionGraph
__all__ = ["WikiLink", "ResolvedLink", "extract_links", "resolve_links"]
_WIKILINK_RE = re.compile(r"\[\[\s*([^\]|]+?)\s*(?:\|\s*([^\]]+?)\s*)?\]\]")
# A WikiWord: ≥2 capitalized alphanumeric segments run together (e.g. FrontPage, WikiWord).
_CAMELCASE_RE = re.compile(r"\b([A-Z][a-z0-9]+(?:[A-Z][a-z0-9]+)+)\b")
_FENCED_RE = re.compile(r"```.*?```", re.DOTALL)
_INLINE_CODE_RE = re.compile(r"`[^`\n]*`")
@dataclass(frozen=True, slots=True)
class WikiLink:
"""One extracted reference. ``target`` is the resolve key; ``label`` is the display text (or
None to use the target); ``span`` is the ``[start, end)`` offset of the whole token in the body;
``auto`` marks a CamelCase auto-link (vs an explicit ``[[...]]``)."""
target: str
label: str | None
span: tuple[int, int]
auto: bool = False
@property
def text(self) -> str:
return self.label or self.target
@dataclass(frozen=True, slots=True)
class ResolvedLink:
"""A :class:`WikiLink` paired with its union :class:`Resolution` (the link's truth status)."""
link: WikiLink
resolution: Resolution
@property
def is_red_link(self) -> bool:
return self.resolution.is_red_link
def _mask(body: str, pattern: re.Pattern[str]) -> str:
"""Blank out ``pattern`` matches with equal-length spaces so later scans skip them while every
surviving match keeps its true offset."""
return pattern.sub(lambda m: " " * len(m.group(0)), body)
def extract_links(body: str, *, camelcase: bool = False) -> tuple[WikiLink, ...]:
"""Extract wikilinks from ``body`` in document order, skipping fenced/inline code.
With ``camelcase=True`` (per-space opt-in), bare ``WikiWord`` tokens outside code and outside
existing ``[[...]]`` also become links.
"""
scan = _mask(_mask(body, _FENCED_RE), _INLINE_CODE_RE)
links: list[WikiLink] = []
for m in _WIKILINK_RE.finditer(scan):
links.append(WikiLink(target=m.group(1).strip(), label=m.group(2), span=m.span()))
if camelcase:
# Mask explicit-link spans too, so a CamelCase target inside [[...]] isn't double-counted.
cc_scan = _mask(scan, _WIKILINK_RE)
for m in _CAMELCASE_RE.finditer(cc_scan):
links.append(WikiLink(target=m.group(1), label=None, span=m.span(), auto=True))
return tuple(sorted(links, key=lambda link: link.span[0]))
def resolve_links(
union: UnionGraph, body: str, *, camelcase: bool = False
) -> tuple[ResolvedLink, ...]:
"""Extract and resolve every link in ``body`` against ``union`` (link vs red-link, ADR-01)."""
return tuple(
ResolvedLink(link, union.resolve(link.target))
for link in extract_links(body, camelcase=camelcase)
)

69
tests/test_views_links.py Normal file
View File

@@ -0,0 +1,69 @@
"""Tests for the wikilink + red-link model (SHARD-WP-0010 T1)."""
from shard_wiki.adapters import FolderAdapter
from shard_wiki.union import ResolutionKind, UnionGraph
from shard_wiki.views import extract_links, resolve_links
def _shard(tmp_path, name, files):
root = tmp_path / name
for rel, text in files.items():
p = root / rel
p.parent.mkdir(parents=True, exist_ok=True)
p.write_text(text, encoding="utf-8")
return FolderAdapter(name, root)
def test_extracts_plain_and_labelled_links():
links = extract_links("See [[Home]] and [[Index|the index]].")
assert [(link.target, link.label, link.text) for link in links] == [
("Home", None, "Home"),
("Index", "the index", "the index"),
]
def test_links_carry_body_offsets_in_document_order():
body = "a [[One]] b [[Two]]"
links = extract_links(body)
assert [link.target for link in links] == ["One", "Two"]
s, e = links[0].span
assert body[s:e] == "[[One]]"
def test_code_regions_are_not_scanned():
body = "real [[Home]]\n```\n[[NotALink]]\n```\ninline `[[AlsoNot]]` done"
targets = [link.target for link in extract_links(body)]
assert targets == ["Home"]
def test_camelcase_off_by_default_then_opt_in():
body = "FrontPage links to [[Home]]"
assert [link.target for link in extract_links(body)] == ["Home"] # CamelCase ignored
on = extract_links(body, camelcase=True)
assert {link.target for link in on} == {"FrontPage", "Home"}
assert next(link for link in on if link.target == "FrontPage").auto is True
def test_camelcase_does_not_double_count_inside_explicit_link():
# [[FrontPage]] is one explicit link, not also a CamelCase auto-link.
links = extract_links("[[FrontPage]]", camelcase=True)
assert len(links) == 1
assert links[0].auto is False
def test_resolve_links_distinguishes_link_from_red_link(tmp_path):
u = UnionGraph("space")
u.attach(_shard(tmp_path, "shardA", {"Home.md": "home"}))
resolved = resolve_links(u, "[[Home]] and [[Ghost]]")
by_target = {r.link.target: r for r in resolved}
assert by_target["Home"].resolution.kind is ResolutionKind.SINGLE
assert by_target["Home"].is_red_link is False
assert by_target["Ghost"].is_red_link is True # unresolved → createable red-link
def test_resolve_links_surfaces_chorus(tmp_path):
u = UnionGraph("space")
u.attach(_shard(tmp_path, "shardA", {"Home.md": "A"}))
u.attach(_shard(tmp_path, "shardB", {"Home.md": "B"}))
(resolved,) = resolve_links(u, "[[Home]]")
assert resolved.resolution.kind is ResolutionKind.CHORUS