Files
shard-wiki/tests/test_incremental_verification.py
tegwick a8e65235a8 feat(incremental): I-2 digest + consistency-checker (WP-0011 T3)
A Merkle-style digest summarizes the derived tier (per-identity fingerprint +
incident edges as order-independent leaves) so equal states have equal digests
and the digest is stable under equivalent event orders. A ConsistencyChecker
recomputes the authoritative fold from the current source, compares it over a
sampled region, and on mismatch scoped-recomputes just the affected identities —
self-healing missed-delta drift, corrupted internal state, and vanished pages.
Makes derived = f(canonical) verified, not asserted.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-16 02:16:50 +02:00

90 lines
3.1 KiB
Python

"""Tests for I-2 verification — digest + consistency-checker (SHARD-WP-0011 T3)."""
from shard_wiki.incremental import (
ConsistencyChecker,
EquivalenceIndex,
derived_digest,
)
from shard_wiki.model import Identity, Page
from shard_wiki.provenance import ProvenanceEnvelope
def _page(shard, key, body):
return Page(
identity=Identity(shard, key),
body=body,
envelope=ProvenanceEnvelope(source_shard=shard),
)
def test_digest_is_stable_under_equivalent_event_orders():
pages = [
_page("A", "Foo", "shared body text here"),
_page("B", "Bar", "shared body text here"),
_page("C", "Baz", "an entirely separate unrelated document"),
]
forward = EquivalenceIndex()
for p in pages:
forward.add(p)
reverse = EquivalenceIndex()
for p in reversed(pages):
reverse.add(p)
assert derived_digest(forward) == derived_digest(reverse)
def test_clean_index_reports_healthy():
pages = [_page("A", "Foo", "same body"), _page("B", "Bar", "same body")]
idx = EquivalenceIndex()
idx.build(pages)
checker = ConsistencyChecker(idx, pages_fn := (lambda: pages))
report = checker.check_and_repair()
assert report.drifted is False and report.healthy is True
assert pages_fn() # source unchanged
def test_missed_delta_drift_is_detected_and_repaired():
a = _page("A", "Foo", "converging target body")
b = _page("B", "Bar", "initially unrelated separate text")
source = {"pages": [a, b]}
idx = EquivalenceIndex()
idx.build(source["pages"])
assert idx.groups() == () # not equivalent yet
# Source changes B to match A, but the index is never told (a missed delta → drift).
b2 = _page("B", "Bar", "converging target body")
source["pages"] = [a, b2]
checker = ConsistencyChecker(idx, lambda: source["pages"])
report = checker.check_and_repair()
assert report.drifted is True and report.repaired is True and report.healthy is True
# Self-healed: the index now reflects the equivalence.
assert idx.equivalent_to(Identity("A", "Foo")) == frozenset(
{Identity("A", "Foo"), Identity("B", "Bar")}
)
def test_corrupted_internal_state_is_healed():
a = _page("A", "Foo", "identical content")
b = _page("B", "Bar", "identical content")
idx = EquivalenceIndex()
idx.build([a, b])
# Corrupt the derived tier directly: delete a true edge (simulated index corruption).
idx._content_edges.clear()
assert idx.groups() == () # corrupted away
checker = ConsistencyChecker(idx, lambda: [a, b])
report = checker.check_and_repair()
assert report.drifted is True and report.healthy is True
assert idx.groups() # edge restored by scoped recompute
def test_removed_source_page_is_reconciled():
a = _page("A", "Foo", "same body")
b = _page("B", "Bar", "same body")
idx = EquivalenceIndex()
idx.build([a, b])
checker = ConsistencyChecker(idx, lambda: [a]) # B vanished from source
report = checker.check_and_repair()
assert report.healthy is True
assert Identity("B", "Bar") not in idx.identities()