generated from coulomb/repo-seed
feat(views): BackLinks derived view over the union link graph (WP-0010 T2)
For any page name, the set of pages that link to it: extract wikilinks from every union page (new UnionGraph.iter_pages enumeration) and index the resolved ones by target name. Red-links create no backlinks; entries carry source provenance; a chorus target aggregates the backlinks of all members under one name. Derived/ recomputable, stores nothing canonical. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -13,6 +13,7 @@ imported by nothing.
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import dataclasses
|
import dataclasses
|
||||||
|
from collections.abc import Iterator
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
|
||||||
@@ -68,6 +69,20 @@ class UnionGraph:
|
|||||||
def shard(self, shard_id: str) -> ShardAdapter | None:
|
def shard(self, shard_id: str) -> ShardAdapter | None:
|
||||||
return next((s for s in self._shards if s.shard_id == shard_id), None)
|
return next((s for s in self._shards if s.shard_id == shard_id), None)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def shards(self) -> tuple[ShardAdapter, ...]:
|
||||||
|
return tuple(self._shards)
|
||||||
|
|
||||||
|
def iter_pages(self) -> Iterator[Page]:
|
||||||
|
"""Every page across attached shards, raw (per-shard, not chorus-collapsed). The
|
||||||
|
enumeration substrate for derived views — BackLinks, AllPages, SiteMap (§8.4)."""
|
||||||
|
for shard in self._shards:
|
||||||
|
for key in shard.keys():
|
||||||
|
try:
|
||||||
|
yield shard.read(key)
|
||||||
|
except KeyError:
|
||||||
|
continue
|
||||||
|
|
||||||
def _read_all(self, key: str) -> list[Page]:
|
def _read_all(self, key: str) -> list[Page]:
|
||||||
pages: list[Page] = []
|
pages: list[Page] = []
|
||||||
for shard in self._shards:
|
for shard in self._shards:
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ stays out of core (L6) — these produce models, never rendered output. Per the
|
|||||||
package imports down (union/model/coordination/provenance) and is imported only by the orchestrator.
|
package imports down (union/model/coordination/provenance) and is imported only by the orchestrator.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from shard_wiki.views.backlinks import BackLink, BackLinksIndex, build_backlinks
|
||||||
from shard_wiki.views.links import (
|
from shard_wiki.views.links import (
|
||||||
ResolvedLink,
|
ResolvedLink,
|
||||||
WikiLink,
|
WikiLink,
|
||||||
@@ -18,4 +19,7 @@ __all__ = [
|
|||||||
"ResolvedLink",
|
"ResolvedLink",
|
||||||
"extract_links",
|
"extract_links",
|
||||||
"resolve_links",
|
"resolve_links",
|
||||||
|
"BackLink",
|
||||||
|
"BackLinksIndex",
|
||||||
|
"build_backlinks",
|
||||||
]
|
]
|
||||||
|
|||||||
65
src/shard_wiki/views/backlinks.py
Normal file
65
src/shard_wiki/views/backlinks.py
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
"""BackLinks — the strongest core derived view (SHARD-WP-0010 T2; UC-18).
|
||||||
|
|
||||||
|
For any page name, the set of pages that link to it. Built by extracting wikilinks (T1) from every
|
||||||
|
page across the attached shards and resolving each through the union: only **resolved** links
|
||||||
|
create a backlink (a red-link points at nothing, so it contributes none). Entries carry their
|
||||||
|
**source provenance** (the linking page's identity / shard). Keying by the resolved *name* means a
|
||||||
|
chorus target aggregates the backlinks of all its members into one bucket (union without erasure).
|
||||||
|
|
||||||
|
Derived/recomputable — stores nothing canonical; SHARD-WP-0011 maintains it incrementally.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from collections.abc import Mapping
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
from shard_wiki.model import Identity
|
||||||
|
from shard_wiki.union import UnionGraph
|
||||||
|
from shard_wiki.views.links import resolve_links
|
||||||
|
|
||||||
|
__all__ = ["BackLink", "BackLinksIndex", "build_backlinks"]
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True, slots=True)
|
||||||
|
class BackLink:
|
||||||
|
"""One inbound link: ``source`` (the linking page) references ``target_name``."""
|
||||||
|
|
||||||
|
source: Identity
|
||||||
|
target_name: str
|
||||||
|
|
||||||
|
@property
|
||||||
|
def source_shard(self) -> str:
|
||||||
|
return self.source.shard
|
||||||
|
|
||||||
|
|
||||||
|
class BackLinksIndex:
|
||||||
|
"""An immutable name → inbound-links index over the union link graph."""
|
||||||
|
|
||||||
|
def __init__(self, edges: Mapping[str, tuple[BackLink, ...]]) -> None:
|
||||||
|
self._edges = dict(edges)
|
||||||
|
|
||||||
|
def to(self, name: str) -> tuple[BackLink, ...]:
|
||||||
|
"""The backlinks pointing at ``name`` (empty if none)."""
|
||||||
|
return self._edges.get(name, ())
|
||||||
|
|
||||||
|
def sources(self, name: str) -> frozenset[Identity]:
|
||||||
|
"""Just the identities linking to ``name`` — convenient for set assertions."""
|
||||||
|
return frozenset(bl.source for bl in self.to(name))
|
||||||
|
|
||||||
|
def names(self) -> frozenset[str]:
|
||||||
|
return frozenset(self._edges)
|
||||||
|
|
||||||
|
|
||||||
|
def build_backlinks(union: UnionGraph, *, camelcase: bool = False) -> BackLinksIndex:
|
||||||
|
"""Scan every union page's links and index the resolved ones by target name."""
|
||||||
|
edges: dict[str, set[BackLink]] = {}
|
||||||
|
for page in union.iter_pages():
|
||||||
|
for resolved in resolve_links(union, page.body, camelcase=camelcase):
|
||||||
|
if resolved.is_red_link:
|
||||||
|
continue # red-links don't create backlinks
|
||||||
|
backlink = BackLink(source=page.identity, target_name=resolved.link.target)
|
||||||
|
edges.setdefault(resolved.link.target, set()).add(backlink)
|
||||||
|
return BackLinksIndex(
|
||||||
|
{name: tuple(sorted(links, key=lambda bl: str(bl.source))) for name, links in edges.items()}
|
||||||
|
)
|
||||||
51
tests/test_views_backlinks.py
Normal file
51
tests/test_views_backlinks.py
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
"""Tests for the BackLinks derived view (SHARD-WP-0010 T2)."""
|
||||||
|
|
||||||
|
from shard_wiki.adapters import FolderAdapter
|
||||||
|
from shard_wiki.model import Identity
|
||||||
|
from shard_wiki.union import UnionGraph
|
||||||
|
from shard_wiki.views import build_backlinks
|
||||||
|
|
||||||
|
|
||||||
|
def _shard(tmp_path, name, files):
|
||||||
|
root = tmp_path / name
|
||||||
|
for rel, text in files.items():
|
||||||
|
p = root / rel
|
||||||
|
p.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
p.write_text(text, encoding="utf-8")
|
||||||
|
return FolderAdapter(name, root)
|
||||||
|
|
||||||
|
|
||||||
|
def test_link_yields_backlink_with_provenance(tmp_path):
|
||||||
|
u = UnionGraph("space")
|
||||||
|
u.attach(_shard(tmp_path, "shardA", {"A.md": "see [[B]]", "B.md": "target"}))
|
||||||
|
index = build_backlinks(u)
|
||||||
|
assert index.sources("B") == frozenset({Identity("shardA", "A")})
|
||||||
|
(bl,) = index.to("B")
|
||||||
|
assert bl.source_shard == "shardA" # entry carries source provenance
|
||||||
|
|
||||||
|
|
||||||
|
def test_red_links_create_no_backlinks(tmp_path):
|
||||||
|
u = UnionGraph("space")
|
||||||
|
u.attach(_shard(tmp_path, "shardA", {"A.md": "see [[Ghost]]"}))
|
||||||
|
index = build_backlinks(u)
|
||||||
|
assert index.to("Ghost") == () # unresolved target → no backlink
|
||||||
|
assert "Ghost" not in index.names()
|
||||||
|
|
||||||
|
|
||||||
|
def test_chorus_target_aggregates_backlinks(tmp_path):
|
||||||
|
# "Home" exists in two shards (a chorus); links to it from anywhere aggregate under one name.
|
||||||
|
u = UnionGraph("space")
|
||||||
|
u.attach(_shard(tmp_path, "shardA", {"Home.md": "A home", "A.md": "[[Home]]"}))
|
||||||
|
u.attach(_shard(tmp_path, "shardB", {"Home.md": "B home", "B.md": "[[Home]]"}))
|
||||||
|
index = build_backlinks(u)
|
||||||
|
assert index.sources("Home") == frozenset(
|
||||||
|
{Identity("shardA", "A"), Identity("shardB", "B")}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_backlinks_span_shards(tmp_path):
|
||||||
|
u = UnionGraph("space")
|
||||||
|
u.attach(_shard(tmp_path, "shardA", {"Index.md": "x"}))
|
||||||
|
u.attach(_shard(tmp_path, "shardB", {"B.md": "links [[Index]]"}))
|
||||||
|
index = build_backlinks(u)
|
||||||
|
assert index.sources("Index") == frozenset({Identity("shardB", "B")})
|
||||||
Reference in New Issue
Block a user