feat(views): AllPages + SiteMap enumeration views (WP-0010 T4)

AllPages enumerates the union's distinct pages, collapsing chorus (same key
across shards) and equivalence-bound identities into one entry via union-find,
noting divergence when members' bodies differ (collapse acknowledged, not
silent). SiteMap builds the namespace tree from page placements, spanning shards.
Both derived/recomputable and presentation-free.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-16 02:03:15 +02:00
parent 34b0c539f3
commit 3a5acdcb28
3 changed files with 203 additions and 0 deletions

View File

@@ -6,6 +6,7 @@ stays out of core (L6) — these produce models, never rendered output. Per the
package imports down (union/model/coordination/provenance) and is imported only by the orchestrator.
"""
from shard_wiki.views.allpages import AllPagesEntry, SiteMapNode, all_pages, site_map
from shard_wiki.views.backlinks import BackLink, BackLinksIndex, build_backlinks
from shard_wiki.views.links import (
ResolvedLink,
@@ -25,4 +26,8 @@ __all__ = [
"build_backlinks",
"ChangeEntry",
"recent_changes",
"AllPagesEntry",
"SiteMapNode",
"all_pages",
"site_map",
]

View File

@@ -0,0 +1,122 @@
"""AllPages + SiteMap — enumeration views over the union (SHARD-WP-0010 T4).
**AllPages** lists the union's distinct pages, collapsing identities that name the same page: a
*chorus* (same key across shards) and *equivalence-bound* identities (decision-log bindings) fold
into one entry, with divergence noted when the members' bodies differ (union without erasure — the
collapse is acknowledged, never silent). **SiteMap** is the namespace tree built from page
placements (paths), spanning shards.
Both are derived/recomputable and presentation-free (the tree is a model, not rendered HTML).
"""
from __future__ import annotations
from dataclasses import dataclass
from shard_wiki.model import Identity, Page
from shard_wiki.union import UnionGraph
__all__ = ["AllPagesEntry", "SiteMapNode", "all_pages", "site_map"]
@dataclass(frozen=True, slots=True)
class AllPagesEntry:
"""One union page: its representative ``name``, the ``members`` collapsed into it, and whether
those members' bodies ``diverge`` (a chorus with differing content)."""
name: str
members: tuple[Identity, ...]
diverges: bool
@dataclass(frozen=True, slots=True)
class SiteMapNode:
"""A namespace node: its path ``name``, child namespaces, and pages directly under it."""
name: str
children: tuple[SiteMapNode, ...]
pages: tuple[Identity, ...]
class _UnionFind:
def __init__(self) -> None:
self._parent: dict[str, str] = {}
def add(self, x: str) -> None:
self._parent.setdefault(x, x)
def find(self, x: str) -> str:
self.add(x)
root = x
while self._parent[root] != root:
root = self._parent[root]
while self._parent[x] != root:
self._parent[x], x = root, self._parent[x]
return root
def union(self, a: str, b: str) -> None:
self.add(a)
self.add(b)
ra, rb = self.find(a), self.find(b)
if ra != rb:
self._parent[max(ra, rb)] = min(ra, rb)
def all_pages(union: UnionGraph) -> tuple[AllPagesEntry, ...]:
"""Enumerate the union's distinct pages, collapsing chorus + equivalence-bound members."""
pages: dict[str, Page] = {}
by_key: dict[str, list[str]] = {}
for page in union.iter_pages():
ident = str(page.identity)
pages[ident] = page
by_key.setdefault(page.identity.key, []).append(ident)
uf = _UnionFind()
for ident in pages:
uf.add(ident)
for idents in by_key.values(): # same key across shards → chorus
for other in idents[1:]:
uf.union(idents[0], other)
fold = union.log.fold(union.space)
for group in fold.equivalence_groups: # decision-log bindings
present = [m for m in group if m in pages]
for other in present[1:]:
uf.union(present[0], other)
groups: dict[str, list[str]] = {}
for ident in pages:
groups.setdefault(uf.find(ident), []).append(ident)
entries: list[AllPagesEntry] = []
for members in groups.values():
member_pages = [pages[m] for m in members]
identities = tuple(p.identity for p in member_pages)
name = min(p.identity.key for p in member_pages)
diverges = len({p.body for p in member_pages}) > 1
entries.append(AllPagesEntry(name=name, members=identities, diverges=diverges))
return tuple(sorted(entries, key=lambda e: e.name))
def _segments(page: Page) -> list[str]:
path = page.placements[0].path if page.placements else page.identity.key
if path.endswith(".md"):
path = path[:-3]
return [seg for seg in path.split("/") if seg]
def site_map(union: UnionGraph) -> SiteMapNode:
"""The union namespace tree from page placements (directories nest; pages sit at their dir)."""
root: dict = {"children": {}, "pages": []}
for page in union.iter_pages():
segments = _segments(page)
node = root
for seg in segments[:-1]: # directory segments build the nesting
node = node["children"].setdefault(seg, {"children": {}, "pages": []})
node["pages"].append(page.identity)
return _freeze("", root)
def _freeze(name: str, node: dict) -> SiteMapNode:
children = tuple(_freeze(k, v) for k, v in sorted(node["children"].items()))
pages = tuple(sorted(node["pages"], key=str))
return SiteMapNode(name=name, children=children, pages=pages)

View File

@@ -0,0 +1,76 @@
"""Tests for the AllPages + SiteMap enumeration views (SHARD-WP-0010 T4)."""
from shard_wiki.adapters import FolderAdapter
from shard_wiki.coordination import DecisionLog, EventType
from shard_wiki.model import Identity
from shard_wiki.union import UnionGraph
from shard_wiki.views import all_pages, site_map
def _shard(tmp_path, name, files):
root = tmp_path / name
for rel, text in files.items():
p = root / rel
p.parent.mkdir(parents=True, exist_ok=True)
p.write_text(text, encoding="utf-8")
return FolderAdapter(name, root)
def test_all_pages_spans_shards(tmp_path):
u = UnionGraph("space")
u.attach(_shard(tmp_path, "shardA", {"A.md": "a"}))
u.attach(_shard(tmp_path, "shardB", {"B.md": "b"}))
names = {e.name for e in all_pages(u)}
assert names == {"A", "B"}
def test_chorus_collapses_to_one_entry_with_divergence(tmp_path):
u = UnionGraph("space")
u.attach(_shard(tmp_path, "shardA", {"Home.md": "A home"}))
u.attach(_shard(tmp_path, "shardB", {"Home.md": "B home"}))
entries = all_pages(u)
home = [e for e in entries if e.name == "Home"]
assert len(home) == 1 # chorus → single entry
assert set(home[0].members) == {Identity("shardA", "Home"), Identity("shardB", "Home")}
assert home[0].diverges is True # bodies differ — collapse acknowledged, not silent
def test_chorus_same_body_does_not_diverge(tmp_path):
u = UnionGraph("space")
u.attach(_shard(tmp_path, "shardA", {"Home.md": "same"}))
u.attach(_shard(tmp_path, "shardB", {"Home.md": "same"}))
(home,) = [e for e in all_pages(u) if e.name == "Home"]
assert home.diverges is False
def test_equivalence_binding_collapses_distinct_keys(tmp_path):
log = DecisionLog()
log.append(
"space", EventType.BINDING_MADE, {"members": ["shardA:Foo", "shardB:Bar"]}
)
u = UnionGraph("space", log=log)
u.attach(_shard(tmp_path, "shardA", {"Foo.md": "x"}))
u.attach(_shard(tmp_path, "shardB", {"Bar.md": "x"}))
pair = {Identity("shardA", "Foo"), Identity("shardB", "Bar")}
# The two bound identities fold into one entry (named by the min key, "Bar").
bound = [e for e in all_pages(u) if {*e.members} == pair]
assert len(bound) == 1
assert bound[0].name == "Bar"
def test_sitemap_reflects_namespace_paths(tmp_path):
u = UnionGraph("space")
u.attach(
_shard(
tmp_path,
"shardA",
{"Home.md": "h", "docs/Guide.md": "g", "docs/api/Ref.md": "r"},
)
)
root = site_map(u)
# Top level: "Home" page directly, and a "docs" namespace.
assert any(p.key == "Home" for p in root.pages)
docs = next(c for c in root.children if c.name == "docs")
assert any(p.key == "docs/Guide" for p in docs.pages)
api = next(c for c in docs.children if c.name == "api")
assert any(p.key == "docs/api/Ref" for p in api.pages)