generated from coulomb/repo-seed
Compare commits
2 Commits
c731c96634
...
da540d4eea
| Author | SHA1 | Date | |
|---|---|---|---|
| da540d4eea | |||
| 951b24300d |
@@ -13,6 +13,7 @@ imported by nothing.
|
||||
from __future__ import annotations
|
||||
|
||||
import dataclasses
|
||||
from collections.abc import Iterator
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
|
||||
@@ -68,6 +69,20 @@ class UnionGraph:
|
||||
def shard(self, shard_id: str) -> ShardAdapter | None:
|
||||
return next((s for s in self._shards if s.shard_id == shard_id), None)
|
||||
|
||||
@property
|
||||
def shards(self) -> tuple[ShardAdapter, ...]:
|
||||
return tuple(self._shards)
|
||||
|
||||
def iter_pages(self) -> Iterator[Page]:
|
||||
"""Every page across attached shards, raw (per-shard, not chorus-collapsed). The
|
||||
enumeration substrate for derived views — BackLinks, AllPages, SiteMap (§8.4)."""
|
||||
for shard in self._shards:
|
||||
for key in shard.keys():
|
||||
try:
|
||||
yield shard.read(key)
|
||||
except KeyError:
|
||||
continue
|
||||
|
||||
def _read_all(self, key: str) -> list[Page]:
|
||||
pages: list[Page] = []
|
||||
for shard in self._shards:
|
||||
|
||||
25
src/shard_wiki/views/__init__.py
Normal file
25
src/shard_wiki/views/__init__.py
Normal file
@@ -0,0 +1,25 @@
|
||||
"""views/ — derived, recomputable, provenance-carrying read views over the union (§8.4).
|
||||
|
||||
All views here are *derived tier*: pure functions of the attached shards plus the coordination-log
|
||||
fold, storing nothing canonical (SHARD-WP-0011 makes them incrementally maintainable). Presentation
|
||||
stays out of core (L6) — these produce models, never rendered output. Per the dependency rule this
|
||||
package imports down (union/model/coordination/provenance) and is imported only by the orchestrator.
|
||||
"""
|
||||
|
||||
from shard_wiki.views.backlinks import BackLink, BackLinksIndex, build_backlinks
|
||||
from shard_wiki.views.links import (
|
||||
ResolvedLink,
|
||||
WikiLink,
|
||||
extract_links,
|
||||
resolve_links,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"WikiLink",
|
||||
"ResolvedLink",
|
||||
"extract_links",
|
||||
"resolve_links",
|
||||
"BackLink",
|
||||
"BackLinksIndex",
|
||||
"build_backlinks",
|
||||
]
|
||||
65
src/shard_wiki/views/backlinks.py
Normal file
65
src/shard_wiki/views/backlinks.py
Normal file
@@ -0,0 +1,65 @@
|
||||
"""BackLinks — the strongest core derived view (SHARD-WP-0010 T2; UC-18).
|
||||
|
||||
For any page name, the set of pages that link to it. Built by extracting wikilinks (T1) from every
|
||||
page across the attached shards and resolving each through the union: only **resolved** links
|
||||
create a backlink (a red-link points at nothing, so it contributes none). Entries carry their
|
||||
**source provenance** (the linking page's identity / shard). Keying by the resolved *name* means a
|
||||
chorus target aggregates the backlinks of all its members into one bucket (union without erasure).
|
||||
|
||||
Derived/recomputable — stores nothing canonical; SHARD-WP-0011 maintains it incrementally.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Mapping
|
||||
from dataclasses import dataclass
|
||||
|
||||
from shard_wiki.model import Identity
|
||||
from shard_wiki.union import UnionGraph
|
||||
from shard_wiki.views.links import resolve_links
|
||||
|
||||
__all__ = ["BackLink", "BackLinksIndex", "build_backlinks"]
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class BackLink:
|
||||
"""One inbound link: ``source`` (the linking page) references ``target_name``."""
|
||||
|
||||
source: Identity
|
||||
target_name: str
|
||||
|
||||
@property
|
||||
def source_shard(self) -> str:
|
||||
return self.source.shard
|
||||
|
||||
|
||||
class BackLinksIndex:
|
||||
"""An immutable name → inbound-links index over the union link graph."""
|
||||
|
||||
def __init__(self, edges: Mapping[str, tuple[BackLink, ...]]) -> None:
|
||||
self._edges = dict(edges)
|
||||
|
||||
def to(self, name: str) -> tuple[BackLink, ...]:
|
||||
"""The backlinks pointing at ``name`` (empty if none)."""
|
||||
return self._edges.get(name, ())
|
||||
|
||||
def sources(self, name: str) -> frozenset[Identity]:
|
||||
"""Just the identities linking to ``name`` — convenient for set assertions."""
|
||||
return frozenset(bl.source for bl in self.to(name))
|
||||
|
||||
def names(self) -> frozenset[str]:
|
||||
return frozenset(self._edges)
|
||||
|
||||
|
||||
def build_backlinks(union: UnionGraph, *, camelcase: bool = False) -> BackLinksIndex:
|
||||
"""Scan every union page's links and index the resolved ones by target name."""
|
||||
edges: dict[str, set[BackLink]] = {}
|
||||
for page in union.iter_pages():
|
||||
for resolved in resolve_links(union, page.body, camelcase=camelcase):
|
||||
if resolved.is_red_link:
|
||||
continue # red-links don't create backlinks
|
||||
backlink = BackLink(source=page.identity, target_name=resolved.link.target)
|
||||
edges.setdefault(resolved.link.target, set()).add(backlink)
|
||||
return BackLinksIndex(
|
||||
{name: tuple(sorted(links, key=lambda bl: str(bl.source))) for name, links in edges.items()}
|
||||
)
|
||||
91
src/shard_wiki/views/links.py
Normal file
91
src/shard_wiki/views/links.py
Normal file
@@ -0,0 +1,91 @@
|
||||
"""Wikilink + red-link model (SHARD-WP-0010 T1; FederationRequirements ADR-06).
|
||||
|
||||
A CommonMark *wikilink extension*: ``[[Target]]`` and ``[[Target|label]]`` are extracted from a
|
||||
page body and each target is resolved through the union (ADR-01). A target that resolves is a
|
||||
**link**; one that does not is a **red-link** — a createable hole (UC-23), never a dropped
|
||||
reference (union without erasure). CamelCase auto-linking (``WikiWord``) is **off by default** and
|
||||
opt-in per space, since bare CamelCase is noisy and policy-laden.
|
||||
|
||||
The link *model and resolution* are core; turning a :class:`ResolvedLink` into an ``<a>`` (or a
|
||||
red anchor) is L6 presentation and lives outside this package. Link spans are byte/char offsets in
|
||||
the body so a later layer can address them precisely.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
|
||||
from shard_wiki.union import Resolution, UnionGraph
|
||||
|
||||
__all__ = ["WikiLink", "ResolvedLink", "extract_links", "resolve_links"]
|
||||
|
||||
_WIKILINK_RE = re.compile(r"\[\[\s*([^\]|]+?)\s*(?:\|\s*([^\]]+?)\s*)?\]\]")
|
||||
# A WikiWord: ≥2 capitalized alphanumeric segments run together (e.g. FrontPage, WikiWord).
|
||||
_CAMELCASE_RE = re.compile(r"\b([A-Z][a-z0-9]+(?:[A-Z][a-z0-9]+)+)\b")
|
||||
_FENCED_RE = re.compile(r"```.*?```", re.DOTALL)
|
||||
_INLINE_CODE_RE = re.compile(r"`[^`\n]*`")
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class WikiLink:
|
||||
"""One extracted reference. ``target`` is the resolve key; ``label`` is the display text (or
|
||||
None to use the target); ``span`` is the ``[start, end)`` offset of the whole token in the body;
|
||||
``auto`` marks a CamelCase auto-link (vs an explicit ``[[...]]``)."""
|
||||
|
||||
target: str
|
||||
label: str | None
|
||||
span: tuple[int, int]
|
||||
auto: bool = False
|
||||
|
||||
@property
|
||||
def text(self) -> str:
|
||||
return self.label or self.target
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class ResolvedLink:
|
||||
"""A :class:`WikiLink` paired with its union :class:`Resolution` (the link's truth status)."""
|
||||
|
||||
link: WikiLink
|
||||
resolution: Resolution
|
||||
|
||||
@property
|
||||
def is_red_link(self) -> bool:
|
||||
return self.resolution.is_red_link
|
||||
|
||||
|
||||
def _mask(body: str, pattern: re.Pattern[str]) -> str:
|
||||
"""Blank out ``pattern`` matches with equal-length spaces so later scans skip them while every
|
||||
surviving match keeps its true offset."""
|
||||
return pattern.sub(lambda m: " " * len(m.group(0)), body)
|
||||
|
||||
|
||||
def extract_links(body: str, *, camelcase: bool = False) -> tuple[WikiLink, ...]:
|
||||
"""Extract wikilinks from ``body`` in document order, skipping fenced/inline code.
|
||||
|
||||
With ``camelcase=True`` (per-space opt-in), bare ``WikiWord`` tokens outside code and outside
|
||||
existing ``[[...]]`` also become links.
|
||||
"""
|
||||
scan = _mask(_mask(body, _FENCED_RE), _INLINE_CODE_RE)
|
||||
links: list[WikiLink] = []
|
||||
for m in _WIKILINK_RE.finditer(scan):
|
||||
links.append(WikiLink(target=m.group(1).strip(), label=m.group(2), span=m.span()))
|
||||
|
||||
if camelcase:
|
||||
# Mask explicit-link spans too, so a CamelCase target inside [[...]] isn't double-counted.
|
||||
cc_scan = _mask(scan, _WIKILINK_RE)
|
||||
for m in _CAMELCASE_RE.finditer(cc_scan):
|
||||
links.append(WikiLink(target=m.group(1), label=None, span=m.span(), auto=True))
|
||||
|
||||
return tuple(sorted(links, key=lambda link: link.span[0]))
|
||||
|
||||
|
||||
def resolve_links(
|
||||
union: UnionGraph, body: str, *, camelcase: bool = False
|
||||
) -> tuple[ResolvedLink, ...]:
|
||||
"""Extract and resolve every link in ``body`` against ``union`` (link vs red-link, ADR-01)."""
|
||||
return tuple(
|
||||
ResolvedLink(link, union.resolve(link.target))
|
||||
for link in extract_links(body, camelcase=camelcase)
|
||||
)
|
||||
51
tests/test_views_backlinks.py
Normal file
51
tests/test_views_backlinks.py
Normal file
@@ -0,0 +1,51 @@
|
||||
"""Tests for the BackLinks derived view (SHARD-WP-0010 T2)."""
|
||||
|
||||
from shard_wiki.adapters import FolderAdapter
|
||||
from shard_wiki.model import Identity
|
||||
from shard_wiki.union import UnionGraph
|
||||
from shard_wiki.views import build_backlinks
|
||||
|
||||
|
||||
def _shard(tmp_path, name, files):
|
||||
root = tmp_path / name
|
||||
for rel, text in files.items():
|
||||
p = root / rel
|
||||
p.parent.mkdir(parents=True, exist_ok=True)
|
||||
p.write_text(text, encoding="utf-8")
|
||||
return FolderAdapter(name, root)
|
||||
|
||||
|
||||
def test_link_yields_backlink_with_provenance(tmp_path):
|
||||
u = UnionGraph("space")
|
||||
u.attach(_shard(tmp_path, "shardA", {"A.md": "see [[B]]", "B.md": "target"}))
|
||||
index = build_backlinks(u)
|
||||
assert index.sources("B") == frozenset({Identity("shardA", "A")})
|
||||
(bl,) = index.to("B")
|
||||
assert bl.source_shard == "shardA" # entry carries source provenance
|
||||
|
||||
|
||||
def test_red_links_create_no_backlinks(tmp_path):
|
||||
u = UnionGraph("space")
|
||||
u.attach(_shard(tmp_path, "shardA", {"A.md": "see [[Ghost]]"}))
|
||||
index = build_backlinks(u)
|
||||
assert index.to("Ghost") == () # unresolved target → no backlink
|
||||
assert "Ghost" not in index.names()
|
||||
|
||||
|
||||
def test_chorus_target_aggregates_backlinks(tmp_path):
|
||||
# "Home" exists in two shards (a chorus); links to it from anywhere aggregate under one name.
|
||||
u = UnionGraph("space")
|
||||
u.attach(_shard(tmp_path, "shardA", {"Home.md": "A home", "A.md": "[[Home]]"}))
|
||||
u.attach(_shard(tmp_path, "shardB", {"Home.md": "B home", "B.md": "[[Home]]"}))
|
||||
index = build_backlinks(u)
|
||||
assert index.sources("Home") == frozenset(
|
||||
{Identity("shardA", "A"), Identity("shardB", "B")}
|
||||
)
|
||||
|
||||
|
||||
def test_backlinks_span_shards(tmp_path):
|
||||
u = UnionGraph("space")
|
||||
u.attach(_shard(tmp_path, "shardA", {"Index.md": "x"}))
|
||||
u.attach(_shard(tmp_path, "shardB", {"B.md": "links [[Index]]"}))
|
||||
index = build_backlinks(u)
|
||||
assert index.sources("Index") == frozenset({Identity("shardB", "B")})
|
||||
69
tests/test_views_links.py
Normal file
69
tests/test_views_links.py
Normal file
@@ -0,0 +1,69 @@
|
||||
"""Tests for the wikilink + red-link model (SHARD-WP-0010 T1)."""
|
||||
|
||||
from shard_wiki.adapters import FolderAdapter
|
||||
from shard_wiki.union import ResolutionKind, UnionGraph
|
||||
from shard_wiki.views import extract_links, resolve_links
|
||||
|
||||
|
||||
def _shard(tmp_path, name, files):
|
||||
root = tmp_path / name
|
||||
for rel, text in files.items():
|
||||
p = root / rel
|
||||
p.parent.mkdir(parents=True, exist_ok=True)
|
||||
p.write_text(text, encoding="utf-8")
|
||||
return FolderAdapter(name, root)
|
||||
|
||||
|
||||
def test_extracts_plain_and_labelled_links():
|
||||
links = extract_links("See [[Home]] and [[Index|the index]].")
|
||||
assert [(link.target, link.label, link.text) for link in links] == [
|
||||
("Home", None, "Home"),
|
||||
("Index", "the index", "the index"),
|
||||
]
|
||||
|
||||
|
||||
def test_links_carry_body_offsets_in_document_order():
|
||||
body = "a [[One]] b [[Two]]"
|
||||
links = extract_links(body)
|
||||
assert [link.target for link in links] == ["One", "Two"]
|
||||
s, e = links[0].span
|
||||
assert body[s:e] == "[[One]]"
|
||||
|
||||
|
||||
def test_code_regions_are_not_scanned():
|
||||
body = "real [[Home]]\n```\n[[NotALink]]\n```\ninline `[[AlsoNot]]` done"
|
||||
targets = [link.target for link in extract_links(body)]
|
||||
assert targets == ["Home"]
|
||||
|
||||
|
||||
def test_camelcase_off_by_default_then_opt_in():
|
||||
body = "FrontPage links to [[Home]]"
|
||||
assert [link.target for link in extract_links(body)] == ["Home"] # CamelCase ignored
|
||||
on = extract_links(body, camelcase=True)
|
||||
assert {link.target for link in on} == {"FrontPage", "Home"}
|
||||
assert next(link for link in on if link.target == "FrontPage").auto is True
|
||||
|
||||
|
||||
def test_camelcase_does_not_double_count_inside_explicit_link():
|
||||
# [[FrontPage]] is one explicit link, not also a CamelCase auto-link.
|
||||
links = extract_links("[[FrontPage]]", camelcase=True)
|
||||
assert len(links) == 1
|
||||
assert links[0].auto is False
|
||||
|
||||
|
||||
def test_resolve_links_distinguishes_link_from_red_link(tmp_path):
|
||||
u = UnionGraph("space")
|
||||
u.attach(_shard(tmp_path, "shardA", {"Home.md": "home"}))
|
||||
resolved = resolve_links(u, "[[Home]] and [[Ghost]]")
|
||||
by_target = {r.link.target: r for r in resolved}
|
||||
assert by_target["Home"].resolution.kind is ResolutionKind.SINGLE
|
||||
assert by_target["Home"].is_red_link is False
|
||||
assert by_target["Ghost"].is_red_link is True # unresolved → createable red-link
|
||||
|
||||
|
||||
def test_resolve_links_surfaces_chorus(tmp_path):
|
||||
u = UnionGraph("space")
|
||||
u.attach(_shard(tmp_path, "shardA", {"Home.md": "A"}))
|
||||
u.attach(_shard(tmp_path, "shardB", {"Home.md": "B"}))
|
||||
(resolved,) = resolve_links(u, "[[Home]]")
|
||||
assert resolved.resolution.kind is ResolutionKind.CHORUS
|
||||
Reference in New Issue
Block a user