From 3a5acdcb286a78edd134e362700d53ce9544b667 Mon Sep 17 00:00:00 2001 From: tegwick Date: Tue, 16 Jun 2026 02:03:15 +0200 Subject: [PATCH] feat(views): AllPages + SiteMap enumeration views (WP-0010 T4) AllPages enumerates the union's distinct pages, collapsing chorus (same key across shards) and equivalence-bound identities into one entry via union-find, noting divergence when members' bodies differ (collapse acknowledged, not silent). SiteMap builds the namespace tree from page placements, spanning shards. Both derived/recomputable and presentation-free. Co-Authored-By: Claude Opus 4.8 --- src/shard_wiki/views/__init__.py | 5 ++ src/shard_wiki/views/allpages.py | 122 +++++++++++++++++++++++++++++++ tests/test_views_allpages.py | 76 +++++++++++++++++++ 3 files changed, 203 insertions(+) create mode 100644 src/shard_wiki/views/allpages.py create mode 100644 tests/test_views_allpages.py diff --git a/src/shard_wiki/views/__init__.py b/src/shard_wiki/views/__init__.py index 9009cbc..1ffec6b 100644 --- a/src/shard_wiki/views/__init__.py +++ b/src/shard_wiki/views/__init__.py @@ -6,6 +6,7 @@ stays out of core (L6) — these produce models, never rendered output. Per the package imports down (union/model/coordination/provenance) and is imported only by the orchestrator. """ +from shard_wiki.views.allpages import AllPagesEntry, SiteMapNode, all_pages, site_map from shard_wiki.views.backlinks import BackLink, BackLinksIndex, build_backlinks from shard_wiki.views.links import ( ResolvedLink, @@ -25,4 +26,8 @@ __all__ = [ "build_backlinks", "ChangeEntry", "recent_changes", + "AllPagesEntry", + "SiteMapNode", + "all_pages", + "site_map", ] diff --git a/src/shard_wiki/views/allpages.py b/src/shard_wiki/views/allpages.py new file mode 100644 index 0000000..d10e1c6 --- /dev/null +++ b/src/shard_wiki/views/allpages.py @@ -0,0 +1,122 @@ +"""AllPages + SiteMap — enumeration views over the union (SHARD-WP-0010 T4). + +**AllPages** lists the union's distinct pages, collapsing identities that name the same page: a +*chorus* (same key across shards) and *equivalence-bound* identities (decision-log bindings) fold +into one entry, with divergence noted when the members' bodies differ (union without erasure — the +collapse is acknowledged, never silent). **SiteMap** is the namespace tree built from page +placements (paths), spanning shards. + +Both are derived/recomputable and presentation-free (the tree is a model, not rendered HTML). +""" + +from __future__ import annotations + +from dataclasses import dataclass + +from shard_wiki.model import Identity, Page +from shard_wiki.union import UnionGraph + +__all__ = ["AllPagesEntry", "SiteMapNode", "all_pages", "site_map"] + + +@dataclass(frozen=True, slots=True) +class AllPagesEntry: + """One union page: its representative ``name``, the ``members`` collapsed into it, and whether + those members' bodies ``diverge`` (a chorus with differing content).""" + + name: str + members: tuple[Identity, ...] + diverges: bool + + +@dataclass(frozen=True, slots=True) +class SiteMapNode: + """A namespace node: its path ``name``, child namespaces, and pages directly under it.""" + + name: str + children: tuple[SiteMapNode, ...] + pages: tuple[Identity, ...] + + +class _UnionFind: + def __init__(self) -> None: + self._parent: dict[str, str] = {} + + def add(self, x: str) -> None: + self._parent.setdefault(x, x) + + def find(self, x: str) -> str: + self.add(x) + root = x + while self._parent[root] != root: + root = self._parent[root] + while self._parent[x] != root: + self._parent[x], x = root, self._parent[x] + return root + + def union(self, a: str, b: str) -> None: + self.add(a) + self.add(b) + ra, rb = self.find(a), self.find(b) + if ra != rb: + self._parent[max(ra, rb)] = min(ra, rb) + + +def all_pages(union: UnionGraph) -> tuple[AllPagesEntry, ...]: + """Enumerate the union's distinct pages, collapsing chorus + equivalence-bound members.""" + pages: dict[str, Page] = {} + by_key: dict[str, list[str]] = {} + for page in union.iter_pages(): + ident = str(page.identity) + pages[ident] = page + by_key.setdefault(page.identity.key, []).append(ident) + + uf = _UnionFind() + for ident in pages: + uf.add(ident) + for idents in by_key.values(): # same key across shards → chorus + for other in idents[1:]: + uf.union(idents[0], other) + fold = union.log.fold(union.space) + for group in fold.equivalence_groups: # decision-log bindings + present = [m for m in group if m in pages] + for other in present[1:]: + uf.union(present[0], other) + + groups: dict[str, list[str]] = {} + for ident in pages: + groups.setdefault(uf.find(ident), []).append(ident) + + entries: list[AllPagesEntry] = [] + for members in groups.values(): + member_pages = [pages[m] for m in members] + identities = tuple(p.identity for p in member_pages) + name = min(p.identity.key for p in member_pages) + diverges = len({p.body for p in member_pages}) > 1 + entries.append(AllPagesEntry(name=name, members=identities, diverges=diverges)) + return tuple(sorted(entries, key=lambda e: e.name)) + + +def _segments(page: Page) -> list[str]: + path = page.placements[0].path if page.placements else page.identity.key + if path.endswith(".md"): + path = path[:-3] + return [seg for seg in path.split("/") if seg] + + +def site_map(union: UnionGraph) -> SiteMapNode: + """The union namespace tree from page placements (directories nest; pages sit at their dir).""" + root: dict = {"children": {}, "pages": []} + for page in union.iter_pages(): + segments = _segments(page) + node = root + for seg in segments[:-1]: # directory segments build the nesting + node = node["children"].setdefault(seg, {"children": {}, "pages": []}) + node["pages"].append(page.identity) + return _freeze("", root) + + +def _freeze(name: str, node: dict) -> SiteMapNode: + children = tuple(_freeze(k, v) for k, v in sorted(node["children"].items())) + pages = tuple(sorted(node["pages"], key=str)) + return SiteMapNode(name=name, children=children, pages=pages) diff --git a/tests/test_views_allpages.py b/tests/test_views_allpages.py new file mode 100644 index 0000000..9002708 --- /dev/null +++ b/tests/test_views_allpages.py @@ -0,0 +1,76 @@ +"""Tests for the AllPages + SiteMap enumeration views (SHARD-WP-0010 T4).""" + +from shard_wiki.adapters import FolderAdapter +from shard_wiki.coordination import DecisionLog, EventType +from shard_wiki.model import Identity +from shard_wiki.union import UnionGraph +from shard_wiki.views import all_pages, site_map + + +def _shard(tmp_path, name, files): + root = tmp_path / name + for rel, text in files.items(): + p = root / rel + p.parent.mkdir(parents=True, exist_ok=True) + p.write_text(text, encoding="utf-8") + return FolderAdapter(name, root) + + +def test_all_pages_spans_shards(tmp_path): + u = UnionGraph("space") + u.attach(_shard(tmp_path, "shardA", {"A.md": "a"})) + u.attach(_shard(tmp_path, "shardB", {"B.md": "b"})) + names = {e.name for e in all_pages(u)} + assert names == {"A", "B"} + + +def test_chorus_collapses_to_one_entry_with_divergence(tmp_path): + u = UnionGraph("space") + u.attach(_shard(tmp_path, "shardA", {"Home.md": "A home"})) + u.attach(_shard(tmp_path, "shardB", {"Home.md": "B home"})) + entries = all_pages(u) + home = [e for e in entries if e.name == "Home"] + assert len(home) == 1 # chorus → single entry + assert set(home[0].members) == {Identity("shardA", "Home"), Identity("shardB", "Home")} + assert home[0].diverges is True # bodies differ — collapse acknowledged, not silent + + +def test_chorus_same_body_does_not_diverge(tmp_path): + u = UnionGraph("space") + u.attach(_shard(tmp_path, "shardA", {"Home.md": "same"})) + u.attach(_shard(tmp_path, "shardB", {"Home.md": "same"})) + (home,) = [e for e in all_pages(u) if e.name == "Home"] + assert home.diverges is False + + +def test_equivalence_binding_collapses_distinct_keys(tmp_path): + log = DecisionLog() + log.append( + "space", EventType.BINDING_MADE, {"members": ["shardA:Foo", "shardB:Bar"]} + ) + u = UnionGraph("space", log=log) + u.attach(_shard(tmp_path, "shardA", {"Foo.md": "x"})) + u.attach(_shard(tmp_path, "shardB", {"Bar.md": "x"})) + pair = {Identity("shardA", "Foo"), Identity("shardB", "Bar")} + # The two bound identities fold into one entry (named by the min key, "Bar"). + bound = [e for e in all_pages(u) if {*e.members} == pair] + assert len(bound) == 1 + assert bound[0].name == "Bar" + + +def test_sitemap_reflects_namespace_paths(tmp_path): + u = UnionGraph("space") + u.attach( + _shard( + tmp_path, + "shardA", + {"Home.md": "h", "docs/Guide.md": "g", "docs/api/Ref.md": "r"}, + ) + ) + root = site_map(u) + # Top level: "Home" page directly, and a "docs" namespace. + assert any(p.key == "Home" for p in root.pages) + docs = next(c for c in root.children if c.name == "docs") + assert any(p.key == "docs/Guide" for p in docs.pages) + api = next(c for c in docs.children if c.name == "api") + assert any(p.key == "docs/api/Ref" for p in api.pages)