Files
citation-evidence/src/anchor/pdf-selector-math.test.ts
tegwick 2a7b05c190 Implement CE-WP-0002 T01-T02: engine types + PDF viewer adapter spike
T01: shared engine types (Document, Selector union, Annotation, EvidenceItem,
branded IDs with newId factory) per wiki/SharedContracts.md §1-§3.

T02: react-pdf-highlighter-plus v1.1.4 spike behind the §5
DocumentViewerAdapter contract in src/anchor/. Pure round-trip math
extracted to pdf-selector-math.ts with 11 unit tests proving lossless
capture → selectors → JSON → restored-rects. ADR-0004 accepted; full
user-flow Playwright verification deferred to T09.

Adds Vite app shell (index.html, src/app/SpikeApp.tsx) so the spike is
exercisable via pnpm dev. tsconfig --noEmit prevents tsc -b from
littering src/ with stray .js outputs.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-25 02:21:31 +02:00

112 lines
4.1 KiB
TypeScript

/**
* Round-trip tests for the spike's pure transformation layer.
*
* These tests are CE-WP-0002-T02's machine-verifiable evidence that the
* adapter's data round-trip is lossless: a captured PDF selection becomes
* a `Selector[]`, the `Selector[]` round-trips through JSON
* (localStorage-equivalent), and the reconstructed PDF rect + page match
* the original. The browser-side selection-capture path is exercised in
* T09 against production code.
*/
import { describe, expect, it } from "vitest";
import {
findPdfRectSelector,
findTextQuoteSelector,
selectorsFromPdfCapture,
unionRect,
} from "./pdf-selector-math";
import type { PdfSelectionCapture } from "./types";
import type { NormalizedRect, Selector } from "@shared/selector";
const SAMPLE_CAPTURE: PdfSelectionCapture = {
kind: "pdf",
text: "Mitglied beim Lohnsteuerhilfeverein Vereinigte Lohnsteuerhilfe e.V.",
page: 1,
rects: [
{ x: 0.12, y: 0.34, width: 0.55, height: 0.02 },
{ x: 0.12, y: 0.37, width: 0.31, height: 0.02 },
],
boundingRect: { x: 0.12, y: 0.34, width: 0.55, height: 0.05 },
};
describe("selectorsFromPdfCapture", () => {
it("produces a TextQuoteSelector and PdfRectSelector from a normal capture", () => {
const sels = selectorsFromPdfCapture(SAMPLE_CAPTURE);
expect(sels.map((s) => s.type)).toEqual(["TextQuoteSelector", "PdfRectSelector"]);
});
it("includes the verbatim quote on the TextQuoteSelector", () => {
const tq = findTextQuoteSelector(selectorsFromPdfCapture(SAMPLE_CAPTURE));
expect(tq?.exact).toBe(SAMPLE_CAPTURE.text);
});
it("preserves page + rects 1:1 on the PdfRectSelector", () => {
const rect = findPdfRectSelector(selectorsFromPdfCapture(SAMPLE_CAPTURE));
expect(rect?.page).toBe(SAMPLE_CAPTURE.page);
expect(rect?.rects).toEqual(SAMPLE_CAPTURE.rects);
});
it("omits TextQuoteSelector when text is empty", () => {
const sels = selectorsFromPdfCapture({ ...SAMPLE_CAPTURE, text: "" });
expect(sels.map((s) => s.type)).toEqual(["PdfRectSelector"]);
});
it("omits PdfRectSelector when no rects are present", () => {
const sels = selectorsFromPdfCapture({ ...SAMPLE_CAPTURE, rects: [] });
expect(sels.map((s) => s.type)).toEqual(["TextQuoteSelector"]);
});
});
describe("Selector[] JSON round-trip", () => {
it("survives JSON.stringify/parse without loss (the localStorage path)", () => {
const original = selectorsFromPdfCapture(SAMPLE_CAPTURE);
const blob = JSON.stringify(original);
const restored = JSON.parse(blob) as Selector[];
expect(restored).toEqual(original);
});
it("the restored PdfRectSelector still resolves to the same page and rects", () => {
const restored = JSON.parse(JSON.stringify(selectorsFromPdfCapture(SAMPLE_CAPTURE))) as Selector[];
const rect = findPdfRectSelector(restored);
expect(rect).not.toBeNull();
expect(rect?.page).toBe(SAMPLE_CAPTURE.page);
expect(rect?.rects).toEqual(SAMPLE_CAPTURE.rects);
});
});
describe("unionRect", () => {
it("returns null for an empty input", () => {
expect(unionRect([])).toBeNull();
});
it("returns the single rect when given exactly one", () => {
const r: NormalizedRect = { x: 0.1, y: 0.2, width: 0.3, height: 0.4 };
const u = unionRect([r]);
expect(u).not.toBeNull();
expect(u!.x).toBeCloseTo(r.x, 9);
expect(u!.y).toBeCloseTo(r.y, 9);
expect(u!.width).toBeCloseTo(r.width, 9);
expect(u!.height).toBeCloseTo(r.height, 9);
});
it("computes the bounding box of multi-line text rects", () => {
const u = unionRect(SAMPLE_CAPTURE.rects);
expect(u).not.toBeNull();
expect(u!.x).toBeCloseTo(0.12, 5);
expect(u!.y).toBeCloseTo(0.34, 5);
expect(u!.width).toBeCloseTo(0.55, 5);
expect(u!.height).toBeCloseTo(0.05, 5);
});
it("is order-independent", () => {
const reversed = [...SAMPLE_CAPTURE.rects].reverse();
const forward = unionRect(SAMPLE_CAPTURE.rects)!;
const back = unionRect(reversed)!;
expect(back.x).toBeCloseTo(forward.x, 9);
expect(back.y).toBeCloseTo(forward.y, 9);
expect(back.width).toBeCloseTo(forward.width, 9);
expect(back.height).toBeCloseTo(forward.height, 9);
});
});