diff --git a/src/shard_wiki/union/resolver.py b/src/shard_wiki/union/resolver.py index 9fcfce2..71c374f 100644 --- a/src/shard_wiki/union/resolver.py +++ b/src/shard_wiki/union/resolver.py @@ -13,6 +13,7 @@ imported by nothing. from __future__ import annotations import dataclasses +from collections.abc import Iterator from dataclasses import dataclass from enum import Enum @@ -68,6 +69,20 @@ class UnionGraph: def shard(self, shard_id: str) -> ShardAdapter | None: return next((s for s in self._shards if s.shard_id == shard_id), None) + @property + def shards(self) -> tuple[ShardAdapter, ...]: + return tuple(self._shards) + + def iter_pages(self) -> Iterator[Page]: + """Every page across attached shards, raw (per-shard, not chorus-collapsed). The + enumeration substrate for derived views — BackLinks, AllPages, SiteMap (§8.4).""" + for shard in self._shards: + for key in shard.keys(): + try: + yield shard.read(key) + except KeyError: + continue + def _read_all(self, key: str) -> list[Page]: pages: list[Page] = [] for shard in self._shards: diff --git a/src/shard_wiki/views/__init__.py b/src/shard_wiki/views/__init__.py index 7593bd3..64003d3 100644 --- a/src/shard_wiki/views/__init__.py +++ b/src/shard_wiki/views/__init__.py @@ -6,6 +6,7 @@ stays out of core (L6) — these produce models, never rendered output. Per the package imports down (union/model/coordination/provenance) and is imported only by the orchestrator. """ +from shard_wiki.views.backlinks import BackLink, BackLinksIndex, build_backlinks from shard_wiki.views.links import ( ResolvedLink, WikiLink, @@ -18,4 +19,7 @@ __all__ = [ "ResolvedLink", "extract_links", "resolve_links", + "BackLink", + "BackLinksIndex", + "build_backlinks", ] diff --git a/src/shard_wiki/views/backlinks.py b/src/shard_wiki/views/backlinks.py new file mode 100644 index 0000000..c32fdd0 --- /dev/null +++ b/src/shard_wiki/views/backlinks.py @@ -0,0 +1,65 @@ +"""BackLinks — the strongest core derived view (SHARD-WP-0010 T2; UC-18). + +For any page name, the set of pages that link to it. Built by extracting wikilinks (T1) from every +page across the attached shards and resolving each through the union: only **resolved** links +create a backlink (a red-link points at nothing, so it contributes none). Entries carry their +**source provenance** (the linking page's identity / shard). Keying by the resolved *name* means a +chorus target aggregates the backlinks of all its members into one bucket (union without erasure). + +Derived/recomputable — stores nothing canonical; SHARD-WP-0011 maintains it incrementally. +""" + +from __future__ import annotations + +from collections.abc import Mapping +from dataclasses import dataclass + +from shard_wiki.model import Identity +from shard_wiki.union import UnionGraph +from shard_wiki.views.links import resolve_links + +__all__ = ["BackLink", "BackLinksIndex", "build_backlinks"] + + +@dataclass(frozen=True, slots=True) +class BackLink: + """One inbound link: ``source`` (the linking page) references ``target_name``.""" + + source: Identity + target_name: str + + @property + def source_shard(self) -> str: + return self.source.shard + + +class BackLinksIndex: + """An immutable name → inbound-links index over the union link graph.""" + + def __init__(self, edges: Mapping[str, tuple[BackLink, ...]]) -> None: + self._edges = dict(edges) + + def to(self, name: str) -> tuple[BackLink, ...]: + """The backlinks pointing at ``name`` (empty if none).""" + return self._edges.get(name, ()) + + def sources(self, name: str) -> frozenset[Identity]: + """Just the identities linking to ``name`` — convenient for set assertions.""" + return frozenset(bl.source for bl in self.to(name)) + + def names(self) -> frozenset[str]: + return frozenset(self._edges) + + +def build_backlinks(union: UnionGraph, *, camelcase: bool = False) -> BackLinksIndex: + """Scan every union page's links and index the resolved ones by target name.""" + edges: dict[str, set[BackLink]] = {} + for page in union.iter_pages(): + for resolved in resolve_links(union, page.body, camelcase=camelcase): + if resolved.is_red_link: + continue # red-links don't create backlinks + backlink = BackLink(source=page.identity, target_name=resolved.link.target) + edges.setdefault(resolved.link.target, set()).add(backlink) + return BackLinksIndex( + {name: tuple(sorted(links, key=lambda bl: str(bl.source))) for name, links in edges.items()} + ) diff --git a/tests/test_views_backlinks.py b/tests/test_views_backlinks.py new file mode 100644 index 0000000..844d810 --- /dev/null +++ b/tests/test_views_backlinks.py @@ -0,0 +1,51 @@ +"""Tests for the BackLinks derived view (SHARD-WP-0010 T2).""" + +from shard_wiki.adapters import FolderAdapter +from shard_wiki.model import Identity +from shard_wiki.union import UnionGraph +from shard_wiki.views import build_backlinks + + +def _shard(tmp_path, name, files): + root = tmp_path / name + for rel, text in files.items(): + p = root / rel + p.parent.mkdir(parents=True, exist_ok=True) + p.write_text(text, encoding="utf-8") + return FolderAdapter(name, root) + + +def test_link_yields_backlink_with_provenance(tmp_path): + u = UnionGraph("space") + u.attach(_shard(tmp_path, "shardA", {"A.md": "see [[B]]", "B.md": "target"})) + index = build_backlinks(u) + assert index.sources("B") == frozenset({Identity("shardA", "A")}) + (bl,) = index.to("B") + assert bl.source_shard == "shardA" # entry carries source provenance + + +def test_red_links_create_no_backlinks(tmp_path): + u = UnionGraph("space") + u.attach(_shard(tmp_path, "shardA", {"A.md": "see [[Ghost]]"})) + index = build_backlinks(u) + assert index.to("Ghost") == () # unresolved target → no backlink + assert "Ghost" not in index.names() + + +def test_chorus_target_aggregates_backlinks(tmp_path): + # "Home" exists in two shards (a chorus); links to it from anywhere aggregate under one name. + u = UnionGraph("space") + u.attach(_shard(tmp_path, "shardA", {"Home.md": "A home", "A.md": "[[Home]]"})) + u.attach(_shard(tmp_path, "shardB", {"Home.md": "B home", "B.md": "[[Home]]"})) + index = build_backlinks(u) + assert index.sources("Home") == frozenset( + {Identity("shardA", "A"), Identity("shardB", "B")} + ) + + +def test_backlinks_span_shards(tmp_path): + u = UnionGraph("space") + u.attach(_shard(tmp_path, "shardA", {"Index.md": "x"})) + u.attach(_shard(tmp_path, "shardB", {"B.md": "links [[Index]]"})) + index = build_backlinks(u) + assert index.sources("Index") == frozenset({Identity("shardB", "B")})