Implement CE-WP-0002 T03-T09: ingest, anchor resolution, engine, UI, persistence, e2e

Completes the PDF review slice end-to-end. After this commit a user can open a fixture, select text, save an evidence item with commentary, see it in the sidebar, reload the page, click the item, and the viewer scrolls to the passage. - T03 src/source/pdf/{fingerprint,extract,ingest}.ts + 39 fixture tests - SHA-256 fingerprint over a fresh ArrayBuffer (TS BufferSource-safe) - PDF.js text extract; per-page normalize then join with "\n\n" - PageMap + OffsetMap (gap-free coverage); pageLength = end - start - Updated manifest's Betriebskosten quote to one PDF.js extracts cleanly - T04 src/anchor/selectors/{create,resolve}.ts + 25 unit + 7 fixture tests - createSelectors emits the maximal redundant set (TextQuote + TextPosition + PdfRect + PdfPageText when available) - resolveSelectors implements the SharedContracts §7 ladder; confidence 1.0 (pos+quote) → 0.7 (rect-only) → 0 (unresolved) - Cross-module integration test moved to tests/integration/ to honor the anchor↛source boundary lint rule - T05 engine: sync event bus over the closed §4 vocabulary, Map-backed repos, services, createEngine() composition root, 12 tests - T06 work + app: three-pane shell (CollectionList | ViewerShell | EvidenceSidebar) wired through EngineProvider; EngineContext lives in src/work/ to respect the work↛app boundary; SpikeApp deleted - T07 AnnotationToolbar: pendingSelection in context; Save runs createSelectors → engine.annotations.create → engine.evidence.create - T08 click-to-reopen + localStorage persistence - scrollToAnnotation state in context with a version counter so a second click on the same item re-fires the viewer scroll - captureSnapshot/restoreSnapshot/attachPersister/restoreFromStorage; restore bypasses services to avoid event-loops - active-document id persisted alongside the snapshot so reload lands on the same fixture; ADR-0005 written - 9 persistence tests - T09 tests/integration/app-prd-scenario.dom.test.tsx - end-to-end happy-dom test of PRD scenario steps 1-8 through the real React tree; viewer + ingest mocked per ADR-0004's headless-Chromium limitation. Fixed memo-deps bug in EvidenceSidebar/ViewerShell where useEngineEventTick values were not included in the useMemo deps, leaving stale memoization across event-driven re-renders - vitest.config.ts: happy-dom for *.dom.test.{ts,tsx} files - noEmit added to tsconfig so tsc -b doesn't litter src/ with .js outputs Gates: typecheck ✓ lint ✓ test 109/109 across 11 files ✓ build ✓ Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-25 10:58:11 +02:00
parent 2a7b05c190
commit d54daf2e61
45 changed files with 3655 additions and 277 deletions
--- a/src/anchor/index.ts
+++ b/src/anchor/index.ts
@@ -5,3 +5,9 @@ export {
  type PdfSpikeViewerProps,
  type StoredAnnotation,
 } from "./pdf-viewer-adapter-spike";
+export {
+  createSelectors,
+  resolveSelectors,
+  DEFAULT_CONTEXT_CHARS,
+  type CreateSelectorsOptions,
+} from "./selectors";
--- a/src/anchor/selectors/create.test.ts
+++ b/src/anchor/selectors/create.test.ts
@@ -0,0 +1,136 @@
+import { describe, expect, it } from "vitest";
+import type { DocumentRepresentation } from "@shared/document";
+import type { DocumentId, RepresentationId } from "@shared/ids";
+import type {
+  PdfPageTextSelector,
+  PdfRectSelector,
+  TextPositionSelector,
+  TextQuoteSelector,
+} from "@shared/selector";
+import { createSelectors } from "./create";
+import type { PdfSelectionCapture } from "../types";
+
+function repr(canonicalText: string): DocumentRepresentation {
+  const pageLength = canonicalText.length;
+  return {
+    id: "rep_test" as RepresentationId,
+    documentId: "doc_test" as DocumentId,
+    representationType: "pdf-text",
+    contentHash: "test",
+    canonicalText,
+    pageMap: [{ page: 1, width: 595, height: 842 }],
+    offsetMap: [
+      { page: 1, globalStart: 0, globalEnd: pageLength, pageLength },
+    ],
+    generatedAt: "2026-05-25T00:00:00.000Z",
+  };
+}
+
+function capture(text: string, page = 1, rectsCount = 1): PdfSelectionCapture {
+  return {
+    kind: "pdf",
+    text,
+    page,
+    rects: Array.from({ length: rectsCount }, (_, i) => ({
+      x: 0.1,
+      y: 0.2 + i * 0.05,
+      width: 0.5,
+      height: 0.04,
+    })),
+    boundingRect: { x: 0.1, y: 0.2, width: 0.5, height: 0.04 * rectsCount },
+  };
+}
+
+describe("createSelectors", () => {
+  const text = "The quick brown fox jumps over the lazy dog near the river bank.";
+  const representation = repr(text);
+
+  it("always includes a TextQuoteSelector with prefix and suffix from canonical text", () => {
+    const sels = createSelectors(capture("brown fox"), representation);
+    const quote = sels.find((s): s is TextQuoteSelector => s.type === "TextQuoteSelector");
+    expect(quote).toBeDefined();
+    expect(quote!.exact).toBe("brown fox");
+    expect(quote!.prefix).toBe("The quick ");
+    expect(quote!.suffix).toBe(" jumps over the lazy dog near th");
+  });
+
+  it("includes a TextPositionSelector pointing at the matched offset", () => {
+    const sels = createSelectors(capture("brown fox"), representation);
+    const pos = sels.find((s): s is TextPositionSelector => s.type === "TextPositionSelector");
+    expect(pos).toBeDefined();
+    expect(pos!.start).toBe(text.indexOf("brown fox"));
+    expect(pos!.end).toBe(text.indexOf("brown fox") + "brown fox".length);
+  });
+
+  it("includes a PdfRectSelector mirroring the capture's page and rects", () => {
+    const c = capture("brown fox", 1, 2);
+    const sels = createSelectors(c, representation);
+    const rect = sels.find((s): s is PdfRectSelector => s.type === "PdfRectSelector");
+    expect(rect).toBeDefined();
+    expect(rect!.page).toBe(1);
+    expect(rect!.rects).toEqual(c.rects);
+  });
+
+  it("includes a PdfPageTextSelector when the match falls inside the capture's page range", () => {
+    const sels = createSelectors(capture("brown fox"), representation);
+    const pageText = sels.find((s): s is PdfPageTextSelector => s.type === "PdfPageTextSelector");
+    expect(pageText).toBeDefined();
+    expect(pageText!.page).toBe(1);
+    expect(pageText!.start).toBe(text.indexOf("brown fox"));
+  });
+
+  it("omits the TextPositionSelector when the quote cannot be found in canonical text", () => {
+    const sels = createSelectors(capture("nonexistent phrase"), representation);
+    const pos = sels.find((s) => s.type === "TextPositionSelector");
+    expect(pos).toBeUndefined();
+    const quote = sels.find((s): s is TextQuoteSelector => s.type === "TextQuoteSelector");
+    expect(quote!.exact).toBe("nonexistent phrase");
+    expect(quote!.prefix).toBeUndefined();
+    expect(quote!.suffix).toBeUndefined();
+  });
+
+  it("clamps prefix at the start of the canonical text", () => {
+    const sels = createSelectors(capture("The quick"), representation);
+    const quote = sels.find((s): s is TextQuoteSelector => s.type === "TextQuoteSelector")!;
+    expect(quote.prefix).toBeUndefined();
+    expect(quote.suffix).toBe(" brown fox jumps over the lazy d");
+  });
+
+  it("clamps suffix at the end of the canonical text", () => {
+    const sels = createSelectors(capture("river bank."), representation);
+    const quote = sels.find((s): s is TextQuoteSelector => s.type === "TextQuoteSelector")!;
+    expect(quote.prefix).toBe("umps over the lazy dog near the ");
+    expect(quote.suffix).toBeUndefined();
+  });
+
+  it("honors a custom contextChars option", () => {
+    const sels = createSelectors(capture("brown fox"), representation, { contextChars: 4 });
+    const quote = sels.find((s): s is TextQuoteSelector => s.type === "TextQuoteSelector")!;
+    expect(quote.prefix).toBe("ick ");
+    expect(quote.suffix).toBe(" jum");
+  });
+
+  it("prefers the on-page match when the quote appears on multiple pages", () => {
+    // Two-page representation where the quote appears once per page.
+    const canonical = "alpha echo bravo" + "\n\n" + "charlie echo delta";
+    const rep: DocumentRepresentation = {
+      id: "rep_multi" as RepresentationId,
+      documentId: "doc_multi" as DocumentId,
+      representationType: "pdf-text",
+      contentHash: "h",
+      canonicalText: canonical,
+      pageMap: [
+        { page: 1, width: 100, height: 100 },
+        { page: 2, width: 100, height: 100 },
+      ],
+      offsetMap: [
+        { page: 1, globalStart: 0, globalEnd: 18, pageLength: 18 },
+        { page: 2, globalStart: 18, globalEnd: canonical.length, pageLength: canonical.length - 18 },
+      ],
+      generatedAt: "2026-05-25T00:00:00.000Z",
+    };
+    const sels = createSelectors(capture("echo", 2), rep);
+    const pos = sels.find((s): s is TextPositionSelector => s.type === "TextPositionSelector")!;
+    expect(pos.start).toBe(canonical.indexOf("echo", 18));
+  });
+});
--- a/src/anchor/selectors/create.ts
+++ b/src/anchor/selectors/create.ts
@@ -0,0 +1,157 @@
+/**
+ * Build the maximal `Selector[]` from a viewer's `SelectionCapture`.
+ *
+ * Implements the "always store all selector types that are available" rule
+ * from `wiki/SharedContracts.md` §3 (selector redundancy) and the create
+ * half of the `AnchorAdapter` contract in
+ * `wiki/ArchitectureOverview.md` §3.3.
+ *
+ * Output guarantee: every returned `Selector[]` includes a
+ * `TextQuoteSelector` (always) and adds `TextPositionSelector`,
+ * `PdfRectSelector`, `PdfPageTextSelector` only when the underlying data
+ * actually supports them. Resolvers can rely on the union being trimmed —
+ * a missing selector means "not available", not "skipped".
+ */
+
+import type { DocumentRepresentation } from "@shared/document";
+import { normalize } from "@shared/text/normalize";
+import type {
+  PdfPageTextSelector,
+  PdfRectSelector,
+  Selector,
+  TextPositionSelector,
+  TextQuoteSelector,
+} from "@shared/selector";
+
+import type { PdfSelectionCapture, SelectionCapture } from "../types";
+
+/** Default characters of prefix/suffix context stored on TextQuoteSelector. */
+export const DEFAULT_CONTEXT_CHARS = 32;
+
+export interface CreateSelectorsOptions {
+  readonly contextChars?: number;
+}
+
+export function createSelectors(
+  capture: SelectionCapture,
+  representation: DocumentRepresentation,
+  options: CreateSelectorsOptions = {},
+): Selector[] {
+  // `SelectionCapture` is a discriminated union. The DOM branch is `never`
+  // in MVP, so the only runtime shape is `PdfSelectionCapture`.
+  return createSelectorsFromPdfCapture(capture, representation, options);
+}
+
+function createSelectorsFromPdfCapture(
+  capture: PdfSelectionCapture,
+  representation: DocumentRepresentation,
+  options: CreateSelectorsOptions,
+): Selector[] {
+  const contextChars = options.contextChars ?? DEFAULT_CONTEXT_CHARS;
+  const normalizedQuote = normalize(capture.text).text;
+  const out: Selector[] = [];
+
+  const canonicalText = representation.canonicalText ?? "";
+  const positions = canonicalText.length > 0 && normalizedQuote.length > 0
+    ? findAllOccurrences(canonicalText, normalizedQuote)
+    : [];
+
+  // Locate the match that falls on the capture's page (when offsetMap is
+  // known); otherwise fall back to the first match. If there is no match,
+  // we still emit a quote-only TextQuoteSelector so the annotation is
+  // recoverable later if the representation is rebuilt.
+  const pageRange = representation.offsetMap?.find((r) => r.page === capture.page);
+  const matchOffset = pickMatch(positions, pageRange);
+
+  // 1. TextQuoteSelector — always included.
+  if (normalizedQuote.length > 0) {
+    const quote = matchOffset !== null
+      ? buildQuoteSelectorWithContext(canonicalText, matchOffset, normalizedQuote, contextChars)
+      : ({ type: "TextQuoteSelector", exact: normalizedQuote } satisfies TextQuoteSelector);
+    out.push(quote);
+  }
+
+  // 2. TextPositionSelector — only when we have a unique-enough match.
+  if (matchOffset !== null) {
+    const pos: TextPositionSelector = {
+      type: "TextPositionSelector",
+      start: matchOffset,
+      end: matchOffset + normalizedQuote.length,
+    };
+    out.push(pos);
+  }
+
+  // 3. PdfRectSelector — straight from the capture; viewer-coordinate truth.
+  if (capture.rects.length > 0) {
+    const rect: PdfRectSelector = {
+      type: "PdfRectSelector",
+      page: capture.page,
+      rects: capture.rects,
+    };
+    out.push(rect);
+  }
+
+  // 4. PdfPageTextSelector — when we have offsetMap and a unique-enough match
+  //    that falls inside the capture's page range.
+  if (matchOffset !== null && pageRange) {
+    if (matchOffset >= pageRange.globalStart && matchOffset + normalizedQuote.length <= pageRange.globalEnd) {
+      const pageText: PdfPageTextSelector = {
+        type: "PdfPageTextSelector",
+        page: capture.page,
+        start: matchOffset - pageRange.globalStart,
+        end: matchOffset - pageRange.globalStart + normalizedQuote.length,
+      };
+      out.push(pageText);
+    }
+  }
+
+  return out;
+}
+
+function findAllOccurrences(haystack: string, needle: string): number[] {
+  if (needle.length === 0) return [];
+  const out: number[] = [];
+  let from = 0;
+  for (;;) {
+    const idx = haystack.indexOf(needle, from);
+    if (idx === -1) break;
+    out.push(idx);
+    from = idx + 1;
+  }
+  return out;
+}
+
+function pickMatch(
+  positions: readonly number[],
+  pageRange: { globalStart: number; globalEnd: number } | undefined,
+): number | null {
+  if (positions.length === 0) return null;
+  if (positions.length === 1) return positions[0]!;
+  if (pageRange) {
+    const onPage = positions.find(
+      (p) => p >= pageRange.globalStart && p < pageRange.globalEnd,
+    );
+    if (onPage !== undefined) return onPage;
+  }
+  // Multiple matches and no page hint — return the first; resolve.ts will
+  // need prefix/suffix to disambiguate.
+  return positions[0]!;
+}
+
+function buildQuoteSelectorWithContext(
+  canonicalText: string,
+  matchOffset: number,
+  exact: string,
+  contextChars: number,
+): TextQuoteSelector {
+  const prefixStart = Math.max(0, matchOffset - contextChars);
+  const suffixEnd = Math.min(canonicalText.length, matchOffset + exact.length + contextChars);
+  const prefix = canonicalText.slice(prefixStart, matchOffset);
+  const suffix = canonicalText.slice(matchOffset + exact.length, suffixEnd);
+  return {
+    type: "TextQuoteSelector",
+    exact,
+    ...(prefix.length > 0 ? { prefix } : {}),
+    ...(suffix.length > 0 ? { suffix } : {}),
+  };
+}
--- a/src/anchor/selectors/index.ts
+++ b/src/anchor/selectors/index.ts
@@ -0,0 +1,6 @@
+export {
+  createSelectors,
+  DEFAULT_CONTEXT_CHARS,
+  type CreateSelectorsOptions,
+} from "./create";
+export { resolveSelectors } from "./resolve";
--- a/src/anchor/selectors/resolve.test.ts
+++ b/src/anchor/selectors/resolve.test.ts
@@ -0,0 +1,137 @@
+import { describe, expect, it } from "vitest";
+import type { DocumentRepresentation } from "@shared/document";
+import type { DocumentId, RepresentationId } from "@shared/ids";
+import type { Selector } from "@shared/selector";
+import { resolveSelectors } from "./resolve";
+
+function repr(canonicalText: string, pages = 1): DocumentRepresentation {
+  const segmentLen = pages === 1
+    ? canonicalText.length
+    : Math.floor(canonicalText.length / pages);
+  const offsetMap = [];
+  for (let i = 0; i < pages; i++) {
+    const start = i * segmentLen;
+    const end = i === pages - 1 ? canonicalText.length : start + segmentLen;
+    offsetMap.push({ page: i + 1, globalStart: start, globalEnd: end, pageLength: end - start });
+  }
+  return {
+    id: "rep_test" as RepresentationId,
+    documentId: "doc_test" as DocumentId,
+    representationType: "pdf-text",
+    contentHash: "test",
+    canonicalText,
+    pageMap: Array.from({ length: pages }, (_, i) => ({ page: i + 1, width: 595, height: 842 })),
+    offsetMap,
+    generatedAt: "2026-05-25T00:00:00.000Z",
+  };
+}
+
+describe("resolveSelectors", () => {
+  const text = "The quick brown fox jumps over the lazy dog.";
+  const representation = repr(text);
+  const brownFoxStart = text.indexOf("brown fox");
+  const brownFoxEnd = brownFoxStart + "brown fox".length;
+
+  it("returns 1.0 confidence when position and quote agree exactly", () => {
+    const selectors: Selector[] = [
+      { type: "TextPositionSelector", start: brownFoxStart, end: brownFoxEnd },
+      { type: "TextQuoteSelector", exact: "brown fox" },
+    ];
+    const r = resolveSelectors(selectors, representation);
+    expect(r.status).toBe("resolved");
+    expect(r.confidence).toBe(1.0);
+    expect(r.candidates[0]?.textPosition).toEqual({ start: brownFoxStart, end: brownFoxEnd });
+    expect(r.candidates[0]?.page).toBe(1);
+    expect(r.usedSelectorTypes).toEqual(["TextPositionSelector", "TextQuoteSelector"]);
+  });
+
+  it("falls back to quote search when position is stale, and records a warning", () => {
+    const selectors: Selector[] = [
+      { type: "TextPositionSelector", start: 0, end: 9 }, // "The quick"
+      { type: "TextQuoteSelector", exact: "brown fox" },
+    ];
+    const r = resolveSelectors(selectors, representation);
+    expect(r.status).toBe("resolved");
+    expect(r.confidence).toBe(0.95);
+    expect(r.candidates[0]?.textPosition).toEqual({ start: brownFoxStart, end: brownFoxEnd });
+    expect(r.warnings?.[0]).toMatch(/did not match/);
+    expect(r.usedSelectorTypes).toEqual(["TextQuoteSelector"]);
+  });
+
+  it("returns 0.85 for a position-only selector with no quote to verify", () => {
+    const selectors: Selector[] = [
+      { type: "TextPositionSelector", start: brownFoxStart, end: brownFoxEnd },
+    ];
+    const r = resolveSelectors(selectors, representation);
+    expect(r.status).toBe("resolved");
+    expect(r.confidence).toBe(0.85);
+  });
+
+  it("returns 0.95 when only TextQuoteSelector is present and the quote is unique", () => {
+    const r = resolveSelectors(
+      [{ type: "TextQuoteSelector", exact: "brown fox" }],
+      representation,
+    );
+    expect(r.status).toBe("resolved");
+    expect(r.confidence).toBe(0.95);
+  });
+
+  it("returns 0.9 when a duplicated quote is disambiguated by prefix/suffix", () => {
+    const dup = "alpha echo bravo charlie echo delta";
+    const r = resolveSelectors(
+      [{ type: "TextQuoteSelector", exact: "echo", prefix: "charlie ", suffix: " delta" }],
+      repr(dup),
+    );
+    expect(r.status).toBe("resolved");
+    expect(r.confidence).toBe(0.9);
+    expect(r.candidates[0]?.textPosition?.start).toBe(dup.indexOf("echo", 10));
+  });
+
+  it("returns ambiguous when a duplicated quote cannot be disambiguated", () => {
+    const dup = "echo and echo";
+    const r = resolveSelectors(
+      [{ type: "TextQuoteSelector", exact: "echo" }],
+      repr(dup),
+    );
+    expect(r.status).toBe("ambiguous");
+    expect(r.confidence).toBe(0.5);
+  });
+
+  it("falls back to PdfPageTextSelector via the OffsetMap", () => {
+    // Single page, "brown fox" at offset 10..19.
+    const r = resolveSelectors(
+      [{ type: "PdfPageTextSelector", page: 1, start: brownFoxStart, end: brownFoxEnd }],
+      representation,
+    );
+    expect(r.status).toBe("resolved");
+    expect(r.confidence).toBe(0.8);
+    expect(r.candidates[0]?.textPosition).toEqual({ start: brownFoxStart, end: brownFoxEnd });
+    expect(r.candidates[0]?.page).toBe(1);
+  });
+
+  it("falls back to PdfRectSelector with page+rects only at 0.7 confidence", () => {
+    const r = resolveSelectors(
+      [{
+        type: "PdfRectSelector",
+        page: 2,
+        rects: [{ x: 0.1, y: 0.2, width: 0.3, height: 0.04 }],
+      }],
+      repr(text, 1),
+    );
+    expect(r.status).toBe("resolved");
+    expect(r.confidence).toBe(0.7);
+    expect(r.candidates[0]?.page).toBe(2);
+    expect(r.candidates[0]?.textPosition).toBeUndefined();
+    expect(r.candidates[0]?.rects).toHaveLength(1);
+  });
+
+  it("returns unresolved when nothing matches", () => {
+    const r = resolveSelectors(
+      [{ type: "TextQuoteSelector", exact: "missing string" }],
+      representation,
+    );
+    expect(r.status).toBe("unresolved");
+    expect(r.confidence).toBe(0);
+    expect(r.candidates).toEqual([]);
+  });
+});
--- a/src/anchor/selectors/resolve.ts
+++ b/src/anchor/selectors/resolve.ts
@@ -0,0 +1,260 @@
+/**
+ * Resolve a `Selector[]` against a `DocumentRepresentation`.
+ *
+ * Implements the resolution strategy from `wiki/ArchitectureOverview.md` §7,
+ * MVP-trimmed:
+ *
+ *   1. Try `TextPositionSelector` (cheapest — direct slice).
+ *   2. Verify with `TextQuoteSelector` at that position.
+ *   3. Try `TextQuoteSelector` on its own. If multiple matches, disambiguate
+ *      by prefix/suffix.
+ *   4. Try `PdfPageTextSelector` (page-local offsets through the OffsetMap).
+ *   5. Fall back to `PdfRectSelector` for a page+rects-only target.
+ *   6. Return `unresolved` if nothing above succeeds.
+ *
+ * Fuzzy matching is out of scope here; a later workplan owns it.
+ *
+ * Confidence ladder (0..1):
+ *   1.00 — TextPosition + TextQuote agree exactly
+ *   0.95 — TextQuote unique match (no position to cross-check)
+ *   0.90 — TextQuote disambiguated by prefix/suffix
+ *   0.85 — TextPosition only (no quote to cross-check)
+ *   0.80 — PdfPageTextSelector resolved via OffsetMap
+ *   0.70 — PdfRectSelector only (page+rects, no text verification)
+ */
+
+import type { DocumentRepresentation } from "@shared/document";
+import type {
+  PdfPageTextSelector,
+  PdfRectSelector,
+  Selector,
+  SelectorType,
+  TextPositionSelector,
+  TextQuoteSelector,
+} from "@shared/selector";
+
+import type { AnchorResolution, ResolvedAnchorTarget } from "../types";
+
+export function resolveSelectors(
+  selectors: readonly Selector[],
+  representation: DocumentRepresentation,
+): AnchorResolution {
+  const canonicalText = representation.canonicalText ?? "";
+  const offsetMap = representation.offsetMap ?? [];
+  const representationId = representation.id;
+
+  const byType = indexByType(selectors);
+  const used: SelectorType[] = [];
+  const warnings: string[] = [];
+
+  // 1 & 2. Try TextPositionSelector, verify with TextQuoteSelector.
+  if (byType.TextPositionSelector && canonicalText.length > 0) {
+    const pos = byType.TextPositionSelector;
+    const slice = sliceSafely(canonicalText, pos.start, pos.end);
+    if (slice !== null) {
+      const quote = byType.TextQuoteSelector;
+      if (quote) {
+        if (slice === quote.exact) {
+          used.push("TextPositionSelector", "TextQuoteSelector");
+          return resolved(
+            { representationId, textPosition: { start: pos.start, end: pos.end }, ...pageFor(pos, offsetMap) },
+            1.0,
+            used,
+            warnings,
+          );
+        }
+        warnings.push(
+          "TextPositionSelector slice did not match TextQuoteSelector.exact; falling back to quote search.",
+        );
+      } else {
+        // Position with no quote to verify — accept at lower confidence.
+        used.push("TextPositionSelector");
+        return resolved(
+          { representationId, textPosition: { start: pos.start, end: pos.end }, ...pageFor(pos, offsetMap) },
+          0.85,
+          used,
+          warnings,
+        );
+      }
+    }
+  }
+
+  // 3. TextQuoteSelector on its own (or after the position fallback above).
+  if (byType.TextQuoteSelector && canonicalText.length > 0) {
+    const quoteResult = resolveByQuote(canonicalText, byType.TextQuoteSelector);
+    if (quoteResult) {
+      used.push("TextQuoteSelector");
+      return resolved(
+        {
+          representationId,
+          textPosition: { start: quoteResult.offset, end: quoteResult.offset + byType.TextQuoteSelector.exact.length },
+          ...pageFor({ start: quoteResult.offset, end: quoteResult.offset + byType.TextQuoteSelector.exact.length }, offsetMap),
+        },
+        quoteResult.confidence,
+        used,
+        warnings,
+        quoteResult.status,
+      );
+    }
+  }
+
+  // 4. PdfPageTextSelector through OffsetMap.
+  if (byType.PdfPageTextSelector && offsetMap.length > 0) {
+    const pageText = byType.PdfPageTextSelector;
+    const range = offsetMap.find((r) => r.page === pageText.page);
+    if (range && pageText.start >= 0 && pageText.end <= range.pageLength && pageText.start < pageText.end) {
+      const globalStart = range.globalStart + pageText.start;
+      const globalEnd = range.globalStart + pageText.end;
+      used.push("PdfPageTextSelector");
+      return resolved(
+        {
+          representationId,
+          page: pageText.page,
+          textPosition: { start: globalStart, end: globalEnd },
+        },
+        0.8,
+        used,
+        warnings,
+      );
+    }
+  }
+
+  // 5. PdfRectSelector fallback (no text verification possible).
+  if (byType.PdfRectSelector) {
+    const rect = byType.PdfRectSelector;
+    used.push("PdfRectSelector");
+    return resolved(
+      { representationId, page: rect.page, rects: rect.rects },
+      0.7,
+      used,
+      warnings,
+    );
+  }
+
+  return unresolved(warnings);
+}
+
+interface QuoteResolutionResult {
+  readonly offset: number;
+  readonly confidence: number;
+  readonly status: "resolved" | "ambiguous";
+}
+
+function resolveByQuote(canonicalText: string, quote: TextQuoteSelector): QuoteResolutionResult | null {
+  const positions = findAllOccurrences(canonicalText, quote.exact);
+  if (positions.length === 0) return null;
+  if (positions.length === 1) {
+    return { offset: positions[0]!, confidence: 0.95, status: "resolved" };
+  }
+  // Multiple matches — try to disambiguate by prefix/suffix.
+  const filtered = positions.filter((p) => prefixSuffixMatches(canonicalText, p, quote));
+  if (filtered.length === 1) {
+    return { offset: filtered[0]!, confidence: 0.9, status: "resolved" };
+  }
+  if (filtered.length > 1) {
+    return { offset: filtered[0]!, confidence: 0.5, status: "ambiguous" };
+  }
+  // No prefix/suffix info or no matches with context — return ambiguous on first.
+  return { offset: positions[0]!, confidence: 0.5, status: "ambiguous" };
+}
+
+function prefixSuffixMatches(
+  canonicalText: string,
+  offset: number,
+  quote: TextQuoteSelector,
+): boolean {
+  if (quote.prefix !== undefined) {
+    const prefixEnd = offset;
+    const prefixStart = Math.max(0, prefixEnd - quote.prefix.length);
+    const actualPrefix = canonicalText.slice(prefixStart, prefixEnd);
+    if (!actualPrefix.endsWith(quote.prefix)) return false;
+  }
+  if (quote.suffix !== undefined) {
+    const suffixStart = offset + quote.exact.length;
+    const suffixEnd = Math.min(canonicalText.length, suffixStart + quote.suffix.length);
+    const actualSuffix = canonicalText.slice(suffixStart, suffixEnd);
+    if (!actualSuffix.startsWith(quote.suffix)) return false;
+  }
+  return true;
+}
+
+interface SelectorIndex {
+  TextQuoteSelector?: TextQuoteSelector;
+  TextPositionSelector?: TextPositionSelector;
+  PdfRectSelector?: PdfRectSelector;
+  PdfPageTextSelector?: PdfPageTextSelector;
+}
+
+function indexByType(selectors: readonly Selector[]): SelectorIndex {
+  const idx: SelectorIndex = {};
+  for (const s of selectors) {
+    switch (s.type) {
+      case "TextQuoteSelector":
+        idx.TextQuoteSelector = s;
+        break;
+      case "TextPositionSelector":
+        idx.TextPositionSelector = s;
+        break;
+      case "PdfRectSelector":
+        idx.PdfRectSelector = s;
+        break;
+      case "PdfPageTextSelector":
+        idx.PdfPageTextSelector = s;
+        break;
+    }
+  }
+  return idx;
+}
+
+function sliceSafely(text: string, start: number, end: number): string | null {
+  if (start < 0 || end > text.length || start >= end) return null;
+  return text.slice(start, end);
+}
+
+function pageFor(
+  span: { start: number; end: number },
+  offsetMap: readonly { page: number; globalStart: number; globalEnd: number }[],
+): { page?: number } {
+  if (offsetMap.length === 0) return {};
+  const range = offsetMap.find((r) => span.start >= r.globalStart && span.end <= r.globalEnd);
+  return range ? { page: range.page } : {};
+}
+
+function findAllOccurrences(haystack: string, needle: string): number[] {
+  if (needle.length === 0) return [];
+  const out: number[] = [];
+  let from = 0;
+  for (;;) {
+    const idx = haystack.indexOf(needle, from);
+    if (idx === -1) break;
+    out.push(idx);
+    from = idx + 1;
+  }
+  return out;
+}
+
+function resolved(
+  target: ResolvedAnchorTarget,
+  confidence: number,
+  used: readonly SelectorType[],
+  warnings: readonly string[],
+  status: "resolved" | "ambiguous" = "resolved",
+): AnchorResolution {
+  return {
+    status,
+    confidence,
+    candidates: [target],
+    usedSelectorTypes: used,
+    ...(warnings.length > 0 ? { warnings } : {}),
+  };
+}
+
+function unresolved(warnings: readonly string[]): AnchorResolution {
+  return {
+    status: "unresolved",
+    confidence: 0,
+    candidates: [],
+    usedSelectorTypes: [],
+    ...(warnings.length > 0 ? { warnings } : {}),
+  };
+}