generated from coulomb/repo-seed
Completes the PDF review slice end-to-end. After this commit a user can
open a fixture, select text, save an evidence item with commentary, see
it in the sidebar, reload the page, click the item, and the viewer
scrolls to the passage.
- T03 src/source/pdf/{fingerprint,extract,ingest}.ts + 39 fixture tests
- SHA-256 fingerprint over a fresh ArrayBuffer (TS BufferSource-safe)
- PDF.js text extract; per-page normalize then join with "\n\n"
- PageMap + OffsetMap (gap-free coverage); pageLength = end - start
- Updated manifest's Betriebskosten quote to one PDF.js extracts cleanly
- T04 src/anchor/selectors/{create,resolve}.ts + 25 unit + 7 fixture tests
- createSelectors emits the maximal redundant set (TextQuote +
TextPosition + PdfRect + PdfPageText when available)
- resolveSelectors implements the SharedContracts §7 ladder; confidence
1.0 (pos+quote) → 0.7 (rect-only) → 0 (unresolved)
- Cross-module integration test moved to tests/integration/ to honor
the anchor↛source boundary lint rule
- T05 engine: sync event bus over the closed §4 vocabulary, Map-backed
repos, services, createEngine() composition root, 12 tests
- T06 work + app: three-pane shell (CollectionList | ViewerShell |
EvidenceSidebar) wired through EngineProvider; EngineContext lives in
src/work/ to respect the work↛app boundary; SpikeApp deleted
- T07 AnnotationToolbar: pendingSelection in context; Save runs
createSelectors → engine.annotations.create → engine.evidence.create
- T08 click-to-reopen + localStorage persistence
- scrollToAnnotation state in context with a version counter so a
second click on the same item re-fires the viewer scroll
- captureSnapshot/restoreSnapshot/attachPersister/restoreFromStorage;
restore bypasses services to avoid event-loops
- active-document id persisted alongside the snapshot so reload lands
on the same fixture; ADR-0005 written
- 9 persistence tests
- T09 tests/integration/app-prd-scenario.dom.test.tsx
- end-to-end happy-dom test of PRD scenario steps 1-8 through the real
React tree; viewer + ingest mocked per ADR-0004's headless-Chromium
limitation. Fixed memo-deps bug in EvidenceSidebar/ViewerShell where
useEngineEventTick values were not included in the useMemo deps,
leaving stale memoization across event-driven re-renders
- vitest.config.ts: happy-dom for *.dom.test.{ts,tsx} files
- noEmit added to tsconfig so tsc -b doesn't litter src/ with .js outputs
Gates: typecheck ✓ lint ✓ test 109/109 across 11 files ✓ build ✓
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
137 lines
5.5 KiB
TypeScript
137 lines
5.5 KiB
TypeScript
import { describe, expect, it } from "vitest";
|
|
import type { DocumentRepresentation } from "@shared/document";
|
|
import type { DocumentId, RepresentationId } from "@shared/ids";
|
|
import type {
|
|
PdfPageTextSelector,
|
|
PdfRectSelector,
|
|
TextPositionSelector,
|
|
TextQuoteSelector,
|
|
} from "@shared/selector";
|
|
import { createSelectors } from "./create";
|
|
import type { PdfSelectionCapture } from "../types";
|
|
|
|
function repr(canonicalText: string): DocumentRepresentation {
|
|
const pageLength = canonicalText.length;
|
|
return {
|
|
id: "rep_test" as RepresentationId,
|
|
documentId: "doc_test" as DocumentId,
|
|
representationType: "pdf-text",
|
|
contentHash: "test",
|
|
canonicalText,
|
|
pageMap: [{ page: 1, width: 595, height: 842 }],
|
|
offsetMap: [
|
|
{ page: 1, globalStart: 0, globalEnd: pageLength, pageLength },
|
|
],
|
|
generatedAt: "2026-05-25T00:00:00.000Z",
|
|
};
|
|
}
|
|
|
|
function capture(text: string, page = 1, rectsCount = 1): PdfSelectionCapture {
|
|
return {
|
|
kind: "pdf",
|
|
text,
|
|
page,
|
|
rects: Array.from({ length: rectsCount }, (_, i) => ({
|
|
x: 0.1,
|
|
y: 0.2 + i * 0.05,
|
|
width: 0.5,
|
|
height: 0.04,
|
|
})),
|
|
boundingRect: { x: 0.1, y: 0.2, width: 0.5, height: 0.04 * rectsCount },
|
|
};
|
|
}
|
|
|
|
describe("createSelectors", () => {
|
|
const text = "The quick brown fox jumps over the lazy dog near the river bank.";
|
|
const representation = repr(text);
|
|
|
|
it("always includes a TextQuoteSelector with prefix and suffix from canonical text", () => {
|
|
const sels = createSelectors(capture("brown fox"), representation);
|
|
const quote = sels.find((s): s is TextQuoteSelector => s.type === "TextQuoteSelector");
|
|
expect(quote).toBeDefined();
|
|
expect(quote!.exact).toBe("brown fox");
|
|
expect(quote!.prefix).toBe("The quick ");
|
|
expect(quote!.suffix).toBe(" jumps over the lazy dog near th");
|
|
});
|
|
|
|
it("includes a TextPositionSelector pointing at the matched offset", () => {
|
|
const sels = createSelectors(capture("brown fox"), representation);
|
|
const pos = sels.find((s): s is TextPositionSelector => s.type === "TextPositionSelector");
|
|
expect(pos).toBeDefined();
|
|
expect(pos!.start).toBe(text.indexOf("brown fox"));
|
|
expect(pos!.end).toBe(text.indexOf("brown fox") + "brown fox".length);
|
|
});
|
|
|
|
it("includes a PdfRectSelector mirroring the capture's page and rects", () => {
|
|
const c = capture("brown fox", 1, 2);
|
|
const sels = createSelectors(c, representation);
|
|
const rect = sels.find((s): s is PdfRectSelector => s.type === "PdfRectSelector");
|
|
expect(rect).toBeDefined();
|
|
expect(rect!.page).toBe(1);
|
|
expect(rect!.rects).toEqual(c.rects);
|
|
});
|
|
|
|
it("includes a PdfPageTextSelector when the match falls inside the capture's page range", () => {
|
|
const sels = createSelectors(capture("brown fox"), representation);
|
|
const pageText = sels.find((s): s is PdfPageTextSelector => s.type === "PdfPageTextSelector");
|
|
expect(pageText).toBeDefined();
|
|
expect(pageText!.page).toBe(1);
|
|
expect(pageText!.start).toBe(text.indexOf("brown fox"));
|
|
});
|
|
|
|
it("omits the TextPositionSelector when the quote cannot be found in canonical text", () => {
|
|
const sels = createSelectors(capture("nonexistent phrase"), representation);
|
|
const pos = sels.find((s) => s.type === "TextPositionSelector");
|
|
expect(pos).toBeUndefined();
|
|
const quote = sels.find((s): s is TextQuoteSelector => s.type === "TextQuoteSelector");
|
|
expect(quote!.exact).toBe("nonexistent phrase");
|
|
expect(quote!.prefix).toBeUndefined();
|
|
expect(quote!.suffix).toBeUndefined();
|
|
});
|
|
|
|
it("clamps prefix at the start of the canonical text", () => {
|
|
const sels = createSelectors(capture("The quick"), representation);
|
|
const quote = sels.find((s): s is TextQuoteSelector => s.type === "TextQuoteSelector")!;
|
|
expect(quote.prefix).toBeUndefined();
|
|
expect(quote.suffix).toBe(" brown fox jumps over the lazy d");
|
|
});
|
|
|
|
it("clamps suffix at the end of the canonical text", () => {
|
|
const sels = createSelectors(capture("river bank."), representation);
|
|
const quote = sels.find((s): s is TextQuoteSelector => s.type === "TextQuoteSelector")!;
|
|
expect(quote.prefix).toBe("umps over the lazy dog near the ");
|
|
expect(quote.suffix).toBeUndefined();
|
|
});
|
|
|
|
it("honors a custom contextChars option", () => {
|
|
const sels = createSelectors(capture("brown fox"), representation, { contextChars: 4 });
|
|
const quote = sels.find((s): s is TextQuoteSelector => s.type === "TextQuoteSelector")!;
|
|
expect(quote.prefix).toBe("ick ");
|
|
expect(quote.suffix).toBe(" jum");
|
|
});
|
|
|
|
it("prefers the on-page match when the quote appears on multiple pages", () => {
|
|
// Two-page representation where the quote appears once per page.
|
|
const canonical = "alpha echo bravo" + "\n\n" + "charlie echo delta";
|
|
const rep: DocumentRepresentation = {
|
|
id: "rep_multi" as RepresentationId,
|
|
documentId: "doc_multi" as DocumentId,
|
|
representationType: "pdf-text",
|
|
contentHash: "h",
|
|
canonicalText: canonical,
|
|
pageMap: [
|
|
{ page: 1, width: 100, height: 100 },
|
|
{ page: 2, width: 100, height: 100 },
|
|
],
|
|
offsetMap: [
|
|
{ page: 1, globalStart: 0, globalEnd: 18, pageLength: 18 },
|
|
{ page: 2, globalStart: 18, globalEnd: canonical.length, pageLength: canonical.length - 18 },
|
|
],
|
|
generatedAt: "2026-05-25T00:00:00.000Z",
|
|
};
|
|
const sels = createSelectors(capture("echo", 2), rep);
|
|
const pos = sels.find((s): s is TextPositionSelector => s.type === "TextPositionSelector")!;
|
|
expect(pos.start).toBe(canonical.indexOf("echo", 18));
|
|
});
|
|
});
|