"""Tests for I-2 verification — digest + consistency-checker (SHARD-WP-0011 T3).""" from shard_wiki.incremental import ( ConsistencyChecker, EquivalenceIndex, derived_digest, ) from shard_wiki.model import Identity, Page from shard_wiki.provenance import ProvenanceEnvelope def _page(shard, key, body): return Page( identity=Identity(shard, key), body=body, envelope=ProvenanceEnvelope(source_shard=shard), ) def test_digest_is_stable_under_equivalent_event_orders(): pages = [ _page("A", "Foo", "shared body text here"), _page("B", "Bar", "shared body text here"), _page("C", "Baz", "an entirely separate unrelated document"), ] forward = EquivalenceIndex() for p in pages: forward.add(p) reverse = EquivalenceIndex() for p in reversed(pages): reverse.add(p) assert derived_digest(forward) == derived_digest(reverse) def test_clean_index_reports_healthy(): pages = [_page("A", "Foo", "same body"), _page("B", "Bar", "same body")] idx = EquivalenceIndex() idx.build(pages) checker = ConsistencyChecker(idx, pages_fn := (lambda: pages)) report = checker.check_and_repair() assert report.drifted is False and report.healthy is True assert pages_fn() # source unchanged def test_missed_delta_drift_is_detected_and_repaired(): a = _page("A", "Foo", "converging target body") b = _page("B", "Bar", "initially unrelated separate text") source = {"pages": [a, b]} idx = EquivalenceIndex() idx.build(source["pages"]) assert idx.groups() == () # not equivalent yet # Source changes B to match A, but the index is never told (a missed delta → drift). b2 = _page("B", "Bar", "converging target body") source["pages"] = [a, b2] checker = ConsistencyChecker(idx, lambda: source["pages"]) report = checker.check_and_repair() assert report.drifted is True and report.repaired is True and report.healthy is True # Self-healed: the index now reflects the equivalence. assert idx.equivalent_to(Identity("A", "Foo")) == frozenset( {Identity("A", "Foo"), Identity("B", "Bar")} ) def test_corrupted_internal_state_is_healed(): a = _page("A", "Foo", "identical content") b = _page("B", "Bar", "identical content") idx = EquivalenceIndex() idx.build([a, b]) # Corrupt the derived tier directly: delete a true edge (simulated index corruption). idx._content_edges.clear() assert idx.groups() == () # corrupted away checker = ConsistencyChecker(idx, lambda: [a, b]) report = checker.check_and_repair() assert report.drifted is True and report.healthy is True assert idx.groups() # edge restored by scoped recompute def test_removed_source_page_is_reconciled(): a = _page("A", "Foo", "same body") b = _page("B", "Bar", "same body") idx = EquivalenceIndex() idx.build([a, b]) checker = ConsistencyChecker(idx, lambda: [a]) # B vanished from source report = checker.check_and_repair() assert report.healthy is True assert Identity("B", "Bar") not in idx.identities()