Files
citation-evidence/src/anchor/selectors/create.test.ts
tegwick d54daf2e61 Implement CE-WP-0002 T03-T09: ingest, anchor resolution, engine, UI, persistence, e2e
Completes the PDF review slice end-to-end. After this commit a user can
open a fixture, select text, save an evidence item with commentary, see
it in the sidebar, reload the page, click the item, and the viewer
scrolls to the passage.

- T03 src/source/pdf/{fingerprint,extract,ingest}.ts + 39 fixture tests
  - SHA-256 fingerprint over a fresh ArrayBuffer (TS BufferSource-safe)
  - PDF.js text extract; per-page normalize then join with "\n\n"
  - PageMap + OffsetMap (gap-free coverage); pageLength = end - start
  - Updated manifest's Betriebskosten quote to one PDF.js extracts cleanly
- T04 src/anchor/selectors/{create,resolve}.ts + 25 unit + 7 fixture tests
  - createSelectors emits the maximal redundant set (TextQuote +
    TextPosition + PdfRect + PdfPageText when available)
  - resolveSelectors implements the SharedContracts §7 ladder; confidence
    1.0 (pos+quote) → 0.7 (rect-only) → 0 (unresolved)
  - Cross-module integration test moved to tests/integration/ to honor
    the anchor↛source boundary lint rule
- T05 engine: sync event bus over the closed §4 vocabulary, Map-backed
  repos, services, createEngine() composition root, 12 tests
- T06 work + app: three-pane shell (CollectionList | ViewerShell |
  EvidenceSidebar) wired through EngineProvider; EngineContext lives in
  src/work/ to respect the work↛app boundary; SpikeApp deleted
- T07 AnnotationToolbar: pendingSelection in context; Save runs
  createSelectors → engine.annotations.create → engine.evidence.create
- T08 click-to-reopen + localStorage persistence
  - scrollToAnnotation state in context with a version counter so a
    second click on the same item re-fires the viewer scroll
  - captureSnapshot/restoreSnapshot/attachPersister/restoreFromStorage;
    restore bypasses services to avoid event-loops
  - active-document id persisted alongside the snapshot so reload lands
    on the same fixture; ADR-0005 written
  - 9 persistence tests
- T09 tests/integration/app-prd-scenario.dom.test.tsx
  - end-to-end happy-dom test of PRD scenario steps 1-8 through the real
    React tree; viewer + ingest mocked per ADR-0004's headless-Chromium
    limitation. Fixed memo-deps bug in EvidenceSidebar/ViewerShell where
    useEngineEventTick values were not included in the useMemo deps,
    leaving stale memoization across event-driven re-renders
- vitest.config.ts: happy-dom for *.dom.test.{ts,tsx} files
- noEmit added to tsconfig so tsc -b doesn't litter src/ with .js outputs

Gates: typecheck ✓ lint ✓ test 109/109 across 11 files ✓ build ✓

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-25 10:58:11 +02:00

137 lines
5.5 KiB
TypeScript

import { describe, expect, it } from "vitest";
import type { DocumentRepresentation } from "@shared/document";
import type { DocumentId, RepresentationId } from "@shared/ids";
import type {
PdfPageTextSelector,
PdfRectSelector,
TextPositionSelector,
TextQuoteSelector,
} from "@shared/selector";
import { createSelectors } from "./create";
import type { PdfSelectionCapture } from "../types";
function repr(canonicalText: string): DocumentRepresentation {
const pageLength = canonicalText.length;
return {
id: "rep_test" as RepresentationId,
documentId: "doc_test" as DocumentId,
representationType: "pdf-text",
contentHash: "test",
canonicalText,
pageMap: [{ page: 1, width: 595, height: 842 }],
offsetMap: [
{ page: 1, globalStart: 0, globalEnd: pageLength, pageLength },
],
generatedAt: "2026-05-25T00:00:00.000Z",
};
}
function capture(text: string, page = 1, rectsCount = 1): PdfSelectionCapture {
return {
kind: "pdf",
text,
page,
rects: Array.from({ length: rectsCount }, (_, i) => ({
x: 0.1,
y: 0.2 + i * 0.05,
width: 0.5,
height: 0.04,
})),
boundingRect: { x: 0.1, y: 0.2, width: 0.5, height: 0.04 * rectsCount },
};
}
describe("createSelectors", () => {
const text = "The quick brown fox jumps over the lazy dog near the river bank.";
const representation = repr(text);
it("always includes a TextQuoteSelector with prefix and suffix from canonical text", () => {
const sels = createSelectors(capture("brown fox"), representation);
const quote = sels.find((s): s is TextQuoteSelector => s.type === "TextQuoteSelector");
expect(quote).toBeDefined();
expect(quote!.exact).toBe("brown fox");
expect(quote!.prefix).toBe("The quick ");
expect(quote!.suffix).toBe(" jumps over the lazy dog near th");
});
it("includes a TextPositionSelector pointing at the matched offset", () => {
const sels = createSelectors(capture("brown fox"), representation);
const pos = sels.find((s): s is TextPositionSelector => s.type === "TextPositionSelector");
expect(pos).toBeDefined();
expect(pos!.start).toBe(text.indexOf("brown fox"));
expect(pos!.end).toBe(text.indexOf("brown fox") + "brown fox".length);
});
it("includes a PdfRectSelector mirroring the capture's page and rects", () => {
const c = capture("brown fox", 1, 2);
const sels = createSelectors(c, representation);
const rect = sels.find((s): s is PdfRectSelector => s.type === "PdfRectSelector");
expect(rect).toBeDefined();
expect(rect!.page).toBe(1);
expect(rect!.rects).toEqual(c.rects);
});
it("includes a PdfPageTextSelector when the match falls inside the capture's page range", () => {
const sels = createSelectors(capture("brown fox"), representation);
const pageText = sels.find((s): s is PdfPageTextSelector => s.type === "PdfPageTextSelector");
expect(pageText).toBeDefined();
expect(pageText!.page).toBe(1);
expect(pageText!.start).toBe(text.indexOf("brown fox"));
});
it("omits the TextPositionSelector when the quote cannot be found in canonical text", () => {
const sels = createSelectors(capture("nonexistent phrase"), representation);
const pos = sels.find((s) => s.type === "TextPositionSelector");
expect(pos).toBeUndefined();
const quote = sels.find((s): s is TextQuoteSelector => s.type === "TextQuoteSelector");
expect(quote!.exact).toBe("nonexistent phrase");
expect(quote!.prefix).toBeUndefined();
expect(quote!.suffix).toBeUndefined();
});
it("clamps prefix at the start of the canonical text", () => {
const sels = createSelectors(capture("The quick"), representation);
const quote = sels.find((s): s is TextQuoteSelector => s.type === "TextQuoteSelector")!;
expect(quote.prefix).toBeUndefined();
expect(quote.suffix).toBe(" brown fox jumps over the lazy d");
});
it("clamps suffix at the end of the canonical text", () => {
const sels = createSelectors(capture("river bank."), representation);
const quote = sels.find((s): s is TextQuoteSelector => s.type === "TextQuoteSelector")!;
expect(quote.prefix).toBe("umps over the lazy dog near the ");
expect(quote.suffix).toBeUndefined();
});
it("honors a custom contextChars option", () => {
const sels = createSelectors(capture("brown fox"), representation, { contextChars: 4 });
const quote = sels.find((s): s is TextQuoteSelector => s.type === "TextQuoteSelector")!;
expect(quote.prefix).toBe("ick ");
expect(quote.suffix).toBe(" jum");
});
it("prefers the on-page match when the quote appears on multiple pages", () => {
// Two-page representation where the quote appears once per page.
const canonical = "alpha echo bravo" + "\n\n" + "charlie echo delta";
const rep: DocumentRepresentation = {
id: "rep_multi" as RepresentationId,
documentId: "doc_multi" as DocumentId,
representationType: "pdf-text",
contentHash: "h",
canonicalText: canonical,
pageMap: [
{ page: 1, width: 100, height: 100 },
{ page: 2, width: 100, height: 100 },
],
offsetMap: [
{ page: 1, globalStart: 0, globalEnd: 18, pageLength: 18 },
{ page: 2, globalStart: 18, globalEnd: canonical.length, pageLength: canonical.length - 18 },
],
generatedAt: "2026-05-25T00:00:00.000Z",
};
const sels = createSelectors(capture("echo", 2), rep);
const pos = sels.find((s): s is TextPositionSelector => s.type === "TextPositionSelector")!;
expect(pos.start).toBe(canonical.indexOf("echo", 18));
});
});