Implement CE-WP-0002 T03-T09: ingest, anchor resolution, engine, UI, persistence, e2e

Completes the PDF review slice end-to-end. After this commit a user can
open a fixture, select text, save an evidence item with commentary, see
it in the sidebar, reload the page, click the item, and the viewer
scrolls to the passage.

- T03 src/source/pdf/{fingerprint,extract,ingest}.ts + 39 fixture tests
  - SHA-256 fingerprint over a fresh ArrayBuffer (TS BufferSource-safe)
  - PDF.js text extract; per-page normalize then join with "\n\n"
  - PageMap + OffsetMap (gap-free coverage); pageLength = end - start
  - Updated manifest's Betriebskosten quote to one PDF.js extracts cleanly
- T04 src/anchor/selectors/{create,resolve}.ts + 25 unit + 7 fixture tests
  - createSelectors emits the maximal redundant set (TextQuote +
    TextPosition + PdfRect + PdfPageText when available)
  - resolveSelectors implements the SharedContracts §7 ladder; confidence
    1.0 (pos+quote) → 0.7 (rect-only) → 0 (unresolved)
  - Cross-module integration test moved to tests/integration/ to honor
    the anchor↛source boundary lint rule
- T05 engine: sync event bus over the closed §4 vocabulary, Map-backed
  repos, services, createEngine() composition root, 12 tests
- T06 work + app: three-pane shell (CollectionList | ViewerShell |
  EvidenceSidebar) wired through EngineProvider; EngineContext lives in
  src/work/ to respect the work↛app boundary; SpikeApp deleted
- T07 AnnotationToolbar: pendingSelection in context; Save runs
  createSelectors → engine.annotations.create → engine.evidence.create
- T08 click-to-reopen + localStorage persistence
  - scrollToAnnotation state in context with a version counter so a
    second click on the same item re-fires the viewer scroll
  - captureSnapshot/restoreSnapshot/attachPersister/restoreFromStorage;
    restore bypasses services to avoid event-loops
  - active-document id persisted alongside the snapshot so reload lands
    on the same fixture; ADR-0005 written
  - 9 persistence tests
- T09 tests/integration/app-prd-scenario.dom.test.tsx
  - end-to-end happy-dom test of PRD scenario steps 1-8 through the real
    React tree; viewer + ingest mocked per ADR-0004's headless-Chromium
    limitation. Fixed memo-deps bug in EvidenceSidebar/ViewerShell where
    useEngineEventTick values were not included in the useMemo deps,
    leaving stale memoization across event-driven re-renders
- vitest.config.ts: happy-dom for *.dom.test.{ts,tsx} files
- noEmit added to tsconfig so tsc -b doesn't litter src/ with .js outputs

Gates: typecheck ✓ lint ✓ test 109/109 across 11 files ✓ build ✓

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-25 10:58:11 +02:00
parent 2a7b05c190
commit d54daf2e61
45 changed files with 3655 additions and 277 deletions

View File

@@ -5,3 +5,9 @@ export {
type PdfSpikeViewerProps,
type StoredAnnotation,
} from "./pdf-viewer-adapter-spike";
export {
createSelectors,
resolveSelectors,
DEFAULT_CONTEXT_CHARS,
type CreateSelectorsOptions,
} from "./selectors";

View File

@@ -0,0 +1,136 @@
import { describe, expect, it } from "vitest";
import type { DocumentRepresentation } from "@shared/document";
import type { DocumentId, RepresentationId } from "@shared/ids";
import type {
PdfPageTextSelector,
PdfRectSelector,
TextPositionSelector,
TextQuoteSelector,
} from "@shared/selector";
import { createSelectors } from "./create";
import type { PdfSelectionCapture } from "../types";
function repr(canonicalText: string): DocumentRepresentation {
const pageLength = canonicalText.length;
return {
id: "rep_test" as RepresentationId,
documentId: "doc_test" as DocumentId,
representationType: "pdf-text",
contentHash: "test",
canonicalText,
pageMap: [{ page: 1, width: 595, height: 842 }],
offsetMap: [
{ page: 1, globalStart: 0, globalEnd: pageLength, pageLength },
],
generatedAt: "2026-05-25T00:00:00.000Z",
};
}
function capture(text: string, page = 1, rectsCount = 1): PdfSelectionCapture {
return {
kind: "pdf",
text,
page,
rects: Array.from({ length: rectsCount }, (_, i) => ({
x: 0.1,
y: 0.2 + i * 0.05,
width: 0.5,
height: 0.04,
})),
boundingRect: { x: 0.1, y: 0.2, width: 0.5, height: 0.04 * rectsCount },
};
}
describe("createSelectors", () => {
const text = "The quick brown fox jumps over the lazy dog near the river bank.";
const representation = repr(text);
it("always includes a TextQuoteSelector with prefix and suffix from canonical text", () => {
const sels = createSelectors(capture("brown fox"), representation);
const quote = sels.find((s): s is TextQuoteSelector => s.type === "TextQuoteSelector");
expect(quote).toBeDefined();
expect(quote!.exact).toBe("brown fox");
expect(quote!.prefix).toBe("The quick ");
expect(quote!.suffix).toBe(" jumps over the lazy dog near th");
});
it("includes a TextPositionSelector pointing at the matched offset", () => {
const sels = createSelectors(capture("brown fox"), representation);
const pos = sels.find((s): s is TextPositionSelector => s.type === "TextPositionSelector");
expect(pos).toBeDefined();
expect(pos!.start).toBe(text.indexOf("brown fox"));
expect(pos!.end).toBe(text.indexOf("brown fox") + "brown fox".length);
});
it("includes a PdfRectSelector mirroring the capture's page and rects", () => {
const c = capture("brown fox", 1, 2);
const sels = createSelectors(c, representation);
const rect = sels.find((s): s is PdfRectSelector => s.type === "PdfRectSelector");
expect(rect).toBeDefined();
expect(rect!.page).toBe(1);
expect(rect!.rects).toEqual(c.rects);
});
it("includes a PdfPageTextSelector when the match falls inside the capture's page range", () => {
const sels = createSelectors(capture("brown fox"), representation);
const pageText = sels.find((s): s is PdfPageTextSelector => s.type === "PdfPageTextSelector");
expect(pageText).toBeDefined();
expect(pageText!.page).toBe(1);
expect(pageText!.start).toBe(text.indexOf("brown fox"));
});
it("omits the TextPositionSelector when the quote cannot be found in canonical text", () => {
const sels = createSelectors(capture("nonexistent phrase"), representation);
const pos = sels.find((s) => s.type === "TextPositionSelector");
expect(pos).toBeUndefined();
const quote = sels.find((s): s is TextQuoteSelector => s.type === "TextQuoteSelector");
expect(quote!.exact).toBe("nonexistent phrase");
expect(quote!.prefix).toBeUndefined();
expect(quote!.suffix).toBeUndefined();
});
it("clamps prefix at the start of the canonical text", () => {
const sels = createSelectors(capture("The quick"), representation);
const quote = sels.find((s): s is TextQuoteSelector => s.type === "TextQuoteSelector")!;
expect(quote.prefix).toBeUndefined();
expect(quote.suffix).toBe(" brown fox jumps over the lazy d");
});
it("clamps suffix at the end of the canonical text", () => {
const sels = createSelectors(capture("river bank."), representation);
const quote = sels.find((s): s is TextQuoteSelector => s.type === "TextQuoteSelector")!;
expect(quote.prefix).toBe("umps over the lazy dog near the ");
expect(quote.suffix).toBeUndefined();
});
it("honors a custom contextChars option", () => {
const sels = createSelectors(capture("brown fox"), representation, { contextChars: 4 });
const quote = sels.find((s): s is TextQuoteSelector => s.type === "TextQuoteSelector")!;
expect(quote.prefix).toBe("ick ");
expect(quote.suffix).toBe(" jum");
});
it("prefers the on-page match when the quote appears on multiple pages", () => {
// Two-page representation where the quote appears once per page.
const canonical = "alpha echo bravo" + "\n\n" + "charlie echo delta";
const rep: DocumentRepresentation = {
id: "rep_multi" as RepresentationId,
documentId: "doc_multi" as DocumentId,
representationType: "pdf-text",
contentHash: "h",
canonicalText: canonical,
pageMap: [
{ page: 1, width: 100, height: 100 },
{ page: 2, width: 100, height: 100 },
],
offsetMap: [
{ page: 1, globalStart: 0, globalEnd: 18, pageLength: 18 },
{ page: 2, globalStart: 18, globalEnd: canonical.length, pageLength: canonical.length - 18 },
],
generatedAt: "2026-05-25T00:00:00.000Z",
};
const sels = createSelectors(capture("echo", 2), rep);
const pos = sels.find((s): s is TextPositionSelector => s.type === "TextPositionSelector")!;
expect(pos.start).toBe(canonical.indexOf("echo", 18));
});
});

View File

@@ -0,0 +1,157 @@
/**
* Build the maximal `Selector[]` from a viewer's `SelectionCapture`.
*
* Implements the "always store all selector types that are available" rule
* from `wiki/SharedContracts.md` §3 (selector redundancy) and the create
* half of the `AnchorAdapter` contract in
* `wiki/ArchitectureOverview.md` §3.3.
*
* Output guarantee: every returned `Selector[]` includes a
* `TextQuoteSelector` (always) and adds `TextPositionSelector`,
* `PdfRectSelector`, `PdfPageTextSelector` only when the underlying data
* actually supports them. Resolvers can rely on the union being trimmed —
* a missing selector means "not available", not "skipped".
*/
import type { DocumentRepresentation } from "@shared/document";
import { normalize } from "@shared/text/normalize";
import type {
PdfPageTextSelector,
PdfRectSelector,
Selector,
TextPositionSelector,
TextQuoteSelector,
} from "@shared/selector";
import type { PdfSelectionCapture, SelectionCapture } from "../types";
/** Default characters of prefix/suffix context stored on TextQuoteSelector. */
export const DEFAULT_CONTEXT_CHARS = 32;
export interface CreateSelectorsOptions {
readonly contextChars?: number;
}
export function createSelectors(
capture: SelectionCapture,
representation: DocumentRepresentation,
options: CreateSelectorsOptions = {},
): Selector[] {
// `SelectionCapture` is a discriminated union. The DOM branch is `never`
// in MVP, so the only runtime shape is `PdfSelectionCapture`.
return createSelectorsFromPdfCapture(capture, representation, options);
}
function createSelectorsFromPdfCapture(
capture: PdfSelectionCapture,
representation: DocumentRepresentation,
options: CreateSelectorsOptions,
): Selector[] {
const contextChars = options.contextChars ?? DEFAULT_CONTEXT_CHARS;
const normalizedQuote = normalize(capture.text).text;
const out: Selector[] = [];
const canonicalText = representation.canonicalText ?? "";
const positions = canonicalText.length > 0 && normalizedQuote.length > 0
? findAllOccurrences(canonicalText, normalizedQuote)
: [];
// Locate the match that falls on the capture's page (when offsetMap is
// known); otherwise fall back to the first match. If there is no match,
// we still emit a quote-only TextQuoteSelector so the annotation is
// recoverable later if the representation is rebuilt.
const pageRange = representation.offsetMap?.find((r) => r.page === capture.page);
const matchOffset = pickMatch(positions, pageRange);
// 1. TextQuoteSelector — always included.
if (normalizedQuote.length > 0) {
const quote = matchOffset !== null
? buildQuoteSelectorWithContext(canonicalText, matchOffset, normalizedQuote, contextChars)
: ({ type: "TextQuoteSelector", exact: normalizedQuote } satisfies TextQuoteSelector);
out.push(quote);
}
// 2. TextPositionSelector — only when we have a unique-enough match.
if (matchOffset !== null) {
const pos: TextPositionSelector = {
type: "TextPositionSelector",
start: matchOffset,
end: matchOffset + normalizedQuote.length,
};
out.push(pos);
}
// 3. PdfRectSelector — straight from the capture; viewer-coordinate truth.
if (capture.rects.length > 0) {
const rect: PdfRectSelector = {
type: "PdfRectSelector",
page: capture.page,
rects: capture.rects,
};
out.push(rect);
}
// 4. PdfPageTextSelector — when we have offsetMap and a unique-enough match
// that falls inside the capture's page range.
if (matchOffset !== null && pageRange) {
if (matchOffset >= pageRange.globalStart && matchOffset + normalizedQuote.length <= pageRange.globalEnd) {
const pageText: PdfPageTextSelector = {
type: "PdfPageTextSelector",
page: capture.page,
start: matchOffset - pageRange.globalStart,
end: matchOffset - pageRange.globalStart + normalizedQuote.length,
};
out.push(pageText);
}
}
return out;
}
function findAllOccurrences(haystack: string, needle: string): number[] {
if (needle.length === 0) return [];
const out: number[] = [];
let from = 0;
for (;;) {
const idx = haystack.indexOf(needle, from);
if (idx === -1) break;
out.push(idx);
from = idx + 1;
}
return out;
}
function pickMatch(
positions: readonly number[],
pageRange: { globalStart: number; globalEnd: number } | undefined,
): number | null {
if (positions.length === 0) return null;
if (positions.length === 1) return positions[0]!;
if (pageRange) {
const onPage = positions.find(
(p) => p >= pageRange.globalStart && p < pageRange.globalEnd,
);
if (onPage !== undefined) return onPage;
}
// Multiple matches and no page hint — return the first; resolve.ts will
// need prefix/suffix to disambiguate.
return positions[0]!;
}
function buildQuoteSelectorWithContext(
canonicalText: string,
matchOffset: number,
exact: string,
contextChars: number,
): TextQuoteSelector {
const prefixStart = Math.max(0, matchOffset - contextChars);
const suffixEnd = Math.min(canonicalText.length, matchOffset + exact.length + contextChars);
const prefix = canonicalText.slice(prefixStart, matchOffset);
const suffix = canonicalText.slice(matchOffset + exact.length, suffixEnd);
return {
type: "TextQuoteSelector",
exact,
...(prefix.length > 0 ? { prefix } : {}),
...(suffix.length > 0 ? { suffix } : {}),
};
}

View File

@@ -0,0 +1,6 @@
export {
createSelectors,
DEFAULT_CONTEXT_CHARS,
type CreateSelectorsOptions,
} from "./create";
export { resolveSelectors } from "./resolve";

View File

@@ -0,0 +1,137 @@
import { describe, expect, it } from "vitest";
import type { DocumentRepresentation } from "@shared/document";
import type { DocumentId, RepresentationId } from "@shared/ids";
import type { Selector } from "@shared/selector";
import { resolveSelectors } from "./resolve";
function repr(canonicalText: string, pages = 1): DocumentRepresentation {
const segmentLen = pages === 1
? canonicalText.length
: Math.floor(canonicalText.length / pages);
const offsetMap = [];
for (let i = 0; i < pages; i++) {
const start = i * segmentLen;
const end = i === pages - 1 ? canonicalText.length : start + segmentLen;
offsetMap.push({ page: i + 1, globalStart: start, globalEnd: end, pageLength: end - start });
}
return {
id: "rep_test" as RepresentationId,
documentId: "doc_test" as DocumentId,
representationType: "pdf-text",
contentHash: "test",
canonicalText,
pageMap: Array.from({ length: pages }, (_, i) => ({ page: i + 1, width: 595, height: 842 })),
offsetMap,
generatedAt: "2026-05-25T00:00:00.000Z",
};
}
describe("resolveSelectors", () => {
const text = "The quick brown fox jumps over the lazy dog.";
const representation = repr(text);
const brownFoxStart = text.indexOf("brown fox");
const brownFoxEnd = brownFoxStart + "brown fox".length;
it("returns 1.0 confidence when position and quote agree exactly", () => {
const selectors: Selector[] = [
{ type: "TextPositionSelector", start: brownFoxStart, end: brownFoxEnd },
{ type: "TextQuoteSelector", exact: "brown fox" },
];
const r = resolveSelectors(selectors, representation);
expect(r.status).toBe("resolved");
expect(r.confidence).toBe(1.0);
expect(r.candidates[0]?.textPosition).toEqual({ start: brownFoxStart, end: brownFoxEnd });
expect(r.candidates[0]?.page).toBe(1);
expect(r.usedSelectorTypes).toEqual(["TextPositionSelector", "TextQuoteSelector"]);
});
it("falls back to quote search when position is stale, and records a warning", () => {
const selectors: Selector[] = [
{ type: "TextPositionSelector", start: 0, end: 9 }, // "The quick"
{ type: "TextQuoteSelector", exact: "brown fox" },
];
const r = resolveSelectors(selectors, representation);
expect(r.status).toBe("resolved");
expect(r.confidence).toBe(0.95);
expect(r.candidates[0]?.textPosition).toEqual({ start: brownFoxStart, end: brownFoxEnd });
expect(r.warnings?.[0]).toMatch(/did not match/);
expect(r.usedSelectorTypes).toEqual(["TextQuoteSelector"]);
});
it("returns 0.85 for a position-only selector with no quote to verify", () => {
const selectors: Selector[] = [
{ type: "TextPositionSelector", start: brownFoxStart, end: brownFoxEnd },
];
const r = resolveSelectors(selectors, representation);
expect(r.status).toBe("resolved");
expect(r.confidence).toBe(0.85);
});
it("returns 0.95 when only TextQuoteSelector is present and the quote is unique", () => {
const r = resolveSelectors(
[{ type: "TextQuoteSelector", exact: "brown fox" }],
representation,
);
expect(r.status).toBe("resolved");
expect(r.confidence).toBe(0.95);
});
it("returns 0.9 when a duplicated quote is disambiguated by prefix/suffix", () => {
const dup = "alpha echo bravo charlie echo delta";
const r = resolveSelectors(
[{ type: "TextQuoteSelector", exact: "echo", prefix: "charlie ", suffix: " delta" }],
repr(dup),
);
expect(r.status).toBe("resolved");
expect(r.confidence).toBe(0.9);
expect(r.candidates[0]?.textPosition?.start).toBe(dup.indexOf("echo", 10));
});
it("returns ambiguous when a duplicated quote cannot be disambiguated", () => {
const dup = "echo and echo";
const r = resolveSelectors(
[{ type: "TextQuoteSelector", exact: "echo" }],
repr(dup),
);
expect(r.status).toBe("ambiguous");
expect(r.confidence).toBe(0.5);
});
it("falls back to PdfPageTextSelector via the OffsetMap", () => {
// Single page, "brown fox" at offset 10..19.
const r = resolveSelectors(
[{ type: "PdfPageTextSelector", page: 1, start: brownFoxStart, end: brownFoxEnd }],
representation,
);
expect(r.status).toBe("resolved");
expect(r.confidence).toBe(0.8);
expect(r.candidates[0]?.textPosition).toEqual({ start: brownFoxStart, end: brownFoxEnd });
expect(r.candidates[0]?.page).toBe(1);
});
it("falls back to PdfRectSelector with page+rects only at 0.7 confidence", () => {
const r = resolveSelectors(
[{
type: "PdfRectSelector",
page: 2,
rects: [{ x: 0.1, y: 0.2, width: 0.3, height: 0.04 }],
}],
repr(text, 1),
);
expect(r.status).toBe("resolved");
expect(r.confidence).toBe(0.7);
expect(r.candidates[0]?.page).toBe(2);
expect(r.candidates[0]?.textPosition).toBeUndefined();
expect(r.candidates[0]?.rects).toHaveLength(1);
});
it("returns unresolved when nothing matches", () => {
const r = resolveSelectors(
[{ type: "TextQuoteSelector", exact: "missing string" }],
representation,
);
expect(r.status).toBe("unresolved");
expect(r.confidence).toBe(0);
expect(r.candidates).toEqual([]);
});
});

View File

@@ -0,0 +1,260 @@
/**
* Resolve a `Selector[]` against a `DocumentRepresentation`.
*
* Implements the resolution strategy from `wiki/ArchitectureOverview.md` §7,
* MVP-trimmed:
*
* 1. Try `TextPositionSelector` (cheapest — direct slice).
* 2. Verify with `TextQuoteSelector` at that position.
* 3. Try `TextQuoteSelector` on its own. If multiple matches, disambiguate
* by prefix/suffix.
* 4. Try `PdfPageTextSelector` (page-local offsets through the OffsetMap).
* 5. Fall back to `PdfRectSelector` for a page+rects-only target.
* 6. Return `unresolved` if nothing above succeeds.
*
* Fuzzy matching is out of scope here; a later workplan owns it.
*
* Confidence ladder (0..1):
* 1.00 — TextPosition + TextQuote agree exactly
* 0.95 — TextQuote unique match (no position to cross-check)
* 0.90 — TextQuote disambiguated by prefix/suffix
* 0.85 — TextPosition only (no quote to cross-check)
* 0.80 — PdfPageTextSelector resolved via OffsetMap
* 0.70 — PdfRectSelector only (page+rects, no text verification)
*/
import type { DocumentRepresentation } from "@shared/document";
import type {
PdfPageTextSelector,
PdfRectSelector,
Selector,
SelectorType,
TextPositionSelector,
TextQuoteSelector,
} from "@shared/selector";
import type { AnchorResolution, ResolvedAnchorTarget } from "../types";
export function resolveSelectors(
selectors: readonly Selector[],
representation: DocumentRepresentation,
): AnchorResolution {
const canonicalText = representation.canonicalText ?? "";
const offsetMap = representation.offsetMap ?? [];
const representationId = representation.id;
const byType = indexByType(selectors);
const used: SelectorType[] = [];
const warnings: string[] = [];
// 1 & 2. Try TextPositionSelector, verify with TextQuoteSelector.
if (byType.TextPositionSelector && canonicalText.length > 0) {
const pos = byType.TextPositionSelector;
const slice = sliceSafely(canonicalText, pos.start, pos.end);
if (slice !== null) {
const quote = byType.TextQuoteSelector;
if (quote) {
if (slice === quote.exact) {
used.push("TextPositionSelector", "TextQuoteSelector");
return resolved(
{ representationId, textPosition: { start: pos.start, end: pos.end }, ...pageFor(pos, offsetMap) },
1.0,
used,
warnings,
);
}
warnings.push(
"TextPositionSelector slice did not match TextQuoteSelector.exact; falling back to quote search.",
);
} else {
// Position with no quote to verify — accept at lower confidence.
used.push("TextPositionSelector");
return resolved(
{ representationId, textPosition: { start: pos.start, end: pos.end }, ...pageFor(pos, offsetMap) },
0.85,
used,
warnings,
);
}
}
}
// 3. TextQuoteSelector on its own (or after the position fallback above).
if (byType.TextQuoteSelector && canonicalText.length > 0) {
const quoteResult = resolveByQuote(canonicalText, byType.TextQuoteSelector);
if (quoteResult) {
used.push("TextQuoteSelector");
return resolved(
{
representationId,
textPosition: { start: quoteResult.offset, end: quoteResult.offset + byType.TextQuoteSelector.exact.length },
...pageFor({ start: quoteResult.offset, end: quoteResult.offset + byType.TextQuoteSelector.exact.length }, offsetMap),
},
quoteResult.confidence,
used,
warnings,
quoteResult.status,
);
}
}
// 4. PdfPageTextSelector through OffsetMap.
if (byType.PdfPageTextSelector && offsetMap.length > 0) {
const pageText = byType.PdfPageTextSelector;
const range = offsetMap.find((r) => r.page === pageText.page);
if (range && pageText.start >= 0 && pageText.end <= range.pageLength && pageText.start < pageText.end) {
const globalStart = range.globalStart + pageText.start;
const globalEnd = range.globalStart + pageText.end;
used.push("PdfPageTextSelector");
return resolved(
{
representationId,
page: pageText.page,
textPosition: { start: globalStart, end: globalEnd },
},
0.8,
used,
warnings,
);
}
}
// 5. PdfRectSelector fallback (no text verification possible).
if (byType.PdfRectSelector) {
const rect = byType.PdfRectSelector;
used.push("PdfRectSelector");
return resolved(
{ representationId, page: rect.page, rects: rect.rects },
0.7,
used,
warnings,
);
}
return unresolved(warnings);
}
interface QuoteResolutionResult {
readonly offset: number;
readonly confidence: number;
readonly status: "resolved" | "ambiguous";
}
function resolveByQuote(canonicalText: string, quote: TextQuoteSelector): QuoteResolutionResult | null {
const positions = findAllOccurrences(canonicalText, quote.exact);
if (positions.length === 0) return null;
if (positions.length === 1) {
return { offset: positions[0]!, confidence: 0.95, status: "resolved" };
}
// Multiple matches — try to disambiguate by prefix/suffix.
const filtered = positions.filter((p) => prefixSuffixMatches(canonicalText, p, quote));
if (filtered.length === 1) {
return { offset: filtered[0]!, confidence: 0.9, status: "resolved" };
}
if (filtered.length > 1) {
return { offset: filtered[0]!, confidence: 0.5, status: "ambiguous" };
}
// No prefix/suffix info or no matches with context — return ambiguous on first.
return { offset: positions[0]!, confidence: 0.5, status: "ambiguous" };
}
function prefixSuffixMatches(
canonicalText: string,
offset: number,
quote: TextQuoteSelector,
): boolean {
if (quote.prefix !== undefined) {
const prefixEnd = offset;
const prefixStart = Math.max(0, prefixEnd - quote.prefix.length);
const actualPrefix = canonicalText.slice(prefixStart, prefixEnd);
if (!actualPrefix.endsWith(quote.prefix)) return false;
}
if (quote.suffix !== undefined) {
const suffixStart = offset + quote.exact.length;
const suffixEnd = Math.min(canonicalText.length, suffixStart + quote.suffix.length);
const actualSuffix = canonicalText.slice(suffixStart, suffixEnd);
if (!actualSuffix.startsWith(quote.suffix)) return false;
}
return true;
}
interface SelectorIndex {
TextQuoteSelector?: TextQuoteSelector;
TextPositionSelector?: TextPositionSelector;
PdfRectSelector?: PdfRectSelector;
PdfPageTextSelector?: PdfPageTextSelector;
}
function indexByType(selectors: readonly Selector[]): SelectorIndex {
const idx: SelectorIndex = {};
for (const s of selectors) {
switch (s.type) {
case "TextQuoteSelector":
idx.TextQuoteSelector = s;
break;
case "TextPositionSelector":
idx.TextPositionSelector = s;
break;
case "PdfRectSelector":
idx.PdfRectSelector = s;
break;
case "PdfPageTextSelector":
idx.PdfPageTextSelector = s;
break;
}
}
return idx;
}
function sliceSafely(text: string, start: number, end: number): string | null {
if (start < 0 || end > text.length || start >= end) return null;
return text.slice(start, end);
}
function pageFor(
span: { start: number; end: number },
offsetMap: readonly { page: number; globalStart: number; globalEnd: number }[],
): { page?: number } {
if (offsetMap.length === 0) return {};
const range = offsetMap.find((r) => span.start >= r.globalStart && span.end <= r.globalEnd);
return range ? { page: range.page } : {};
}
function findAllOccurrences(haystack: string, needle: string): number[] {
if (needle.length === 0) return [];
const out: number[] = [];
let from = 0;
for (;;) {
const idx = haystack.indexOf(needle, from);
if (idx === -1) break;
out.push(idx);
from = idx + 1;
}
return out;
}
function resolved(
target: ResolvedAnchorTarget,
confidence: number,
used: readonly SelectorType[],
warnings: readonly string[],
status: "resolved" | "ambiguous" = "resolved",
): AnchorResolution {
return {
status,
confidence,
candidates: [target],
usedSelectorTypes: used,
...(warnings.length > 0 ? { warnings } : {}),
};
}
function unresolved(warnings: readonly string[]): AnchorResolution {
return {
status: "unresolved",
confidence: 0,
candidates: [],
usedSelectorTypes: [],
...(warnings.length > 0 ? { warnings } : {}),
};
}

40
src/app/App.tsx Normal file
View File

@@ -0,0 +1,40 @@
/**
* App — the citation-evidence MVP shell.
*
* Three-pane layout per `wiki/ArchitectureOverview.md` §12.1:
*
* ┌────────────┬──────────────────┬────────────┐
* │ Collection │ Document Viewer │ Evidence │
* │ List │ │ Sidebar │
* └────────────┴──────────────────┴────────────┘
*
* CE-WP-0002-T06 stops at "viewer shell is rendered, evidence list is
* displayed". T07 wires the selection → annotation → evidence flow; T08
* wires the sidebar-click → scroll-to-passage round-trip.
*/
import {
CollectionList,
EngineProvider,
EvidenceSidebar,
ViewerShell,
} from "@work/index";
export function App() {
return (
<EngineProvider>
<div
style={{
display: "flex",
height: "100vh",
fontFamily: "system-ui, sans-serif",
color: "#222",
}}
>
<CollectionList />
<ViewerShell />
<EvidenceSidebar />
</div>
</EngineProvider>
);
}

View File

@@ -1,233 +0,0 @@
/**
* CE-WP-0002-T02 spike host page.
*
* Lists the fixtures from `fixtures/pdfs/manifest.json`, lets the user load
* one in the spike PDF viewer, capture a selection (the viewer's
* `onSelection` fires when text is selected), persist the resulting
* selectors to `localStorage`, and on reload restore + scroll to them.
*
* Success looks like: select a quote → click "save" → reload the tab →
* the highlight is rendered on the same passage and the page is scrolled
* to it.
*/
import { useEffect, useMemo, useState } from "react";
import {
PdfSpikeViewer,
type PdfSelectionCapture,
type StoredAnnotation,
} from "@anchor/index";
import type { Selector } from "@shared/selector";
import { newId } from "@shared/ids";
import manifest from "../../fixtures/pdfs/manifest.json";
interface FixtureEntry {
id: string;
filename: string;
description: string;
page_count: number;
known_good_quote: string;
known_good_quote_page: number;
}
const FIXTURES: FixtureEntry[] = (manifest as { fixtures: FixtureEntry[] }).fixtures;
const STORAGE_KEY = "ce-wp-0002-spike-annotations-v1";
interface StoredEntry {
id: string;
fixtureId: string;
text: string;
selectors: Selector[];
createdAt: string;
}
function loadStore(): StoredEntry[] {
try {
const raw = localStorage.getItem(STORAGE_KEY);
if (!raw) return [];
const parsed = JSON.parse(raw) as unknown;
if (!Array.isArray(parsed)) return [];
return parsed as StoredEntry[];
} catch {
return [];
}
}
function saveStore(entries: StoredEntry[]) {
localStorage.setItem(STORAGE_KEY, JSON.stringify(entries));
}
export function SpikeApp() {
const [activeFixtureId, setActiveFixtureId] = useState<string | null>(null);
const [entries, setEntries] = useState<StoredEntry[]>(() => loadStore());
const [pending, setPending] = useState<
| { capture: PdfSelectionCapture; selectors: Selector[] }
| null
>(null);
const [scrollTo, setScrollTo] = useState<string | null>(null);
useEffect(() => {
saveStore(entries);
}, [entries]);
const activeFixture = useMemo(
() => FIXTURES.find((f) => f.id === activeFixtureId) ?? null,
[activeFixtureId],
);
const annotationsForActive = useMemo<StoredAnnotation[]>(() => {
if (!activeFixtureId) return [];
return entries
.filter((e) => e.fixtureId === activeFixtureId)
.map((e) => ({ id: e.id, text: e.text, selectors: e.selectors }));
}, [activeFixtureId, entries]);
function handleSave() {
if (!pending || !activeFixtureId) return;
const entry: StoredEntry = {
id: newId("annotation"),
fixtureId: activeFixtureId,
text: pending.capture.text,
selectors: pending.selectors,
createdAt: new Date().toISOString(),
};
setEntries((prev) => [...prev, entry]);
setPending(null);
}
function handleClear() {
if (!activeFixtureId) return;
setEntries((prev) => prev.filter((e) => e.fixtureId !== activeFixtureId));
}
return (
<div style={{ display: "flex", height: "100vh", fontFamily: "system-ui, sans-serif" }}>
<aside
style={{
width: 320,
borderRight: "1px solid #ddd",
padding: 12,
overflow: "auto",
flex: "0 0 320px",
}}
>
<h2 style={{ marginTop: 0 }}>CE-WP-0002-T02 Spike</h2>
<p style={{ fontSize: 12, color: "#555" }}>
Pick a fixture, select text in the viewer, save, then reload the page
to verify the highlight is restored.
</p>
<h3 style={{ fontSize: 14 }}>Fixtures</h3>
<ul style={{ listStyle: "none", padding: 0, margin: 0 }}>
{FIXTURES.map((f) => (
<li key={f.id} style={{ marginBottom: 6 }}>
<button
onClick={() => {
setActiveFixtureId(f.id);
setPending(null);
setScrollTo(null);
}}
style={{
display: "block",
width: "100%",
textAlign: "left",
background: f.id === activeFixtureId ? "#e8f0ff" : "white",
border: "1px solid #ccc",
padding: 6,
cursor: "pointer",
}}
>
<div style={{ fontWeight: 600, fontSize: 13 }}>{f.id}</div>
<div style={{ fontSize: 11, color: "#666" }}>
{f.page_count} page{f.page_count === 1 ? "" : "s"} ·
known-good p{f.known_good_quote_page}
</div>
<div style={{ fontSize: 11, color: "#888", marginTop: 2 }}>
&ldquo;{f.known_good_quote}&rdquo;
</div>
</button>
</li>
))}
</ul>
{activeFixture && (
<>
<h3 style={{ fontSize: 14, marginTop: 16 }}>Saved annotations</h3>
{annotationsForActive.length === 0 && (
<p style={{ fontSize: 12, color: "#888" }}>(none)</p>
)}
<ul style={{ listStyle: "none", padding: 0, margin: 0 }}>
{annotationsForActive.map((a) => (
<li key={a.id} style={{ marginBottom: 4 }}>
<button
onClick={() => setScrollTo(a.id)}
style={{
display: "block",
width: "100%",
textAlign: "left",
background: "#fff8d6",
border: "1px solid #ccc",
padding: 4,
cursor: "pointer",
fontSize: 11,
}}
>
{a.text.slice(0, 80)}
{a.text.length > 80 ? "…" : ""}
</button>
</li>
))}
</ul>
{annotationsForActive.length > 0 && (
<button
onClick={handleClear}
style={{ marginTop: 8, fontSize: 11 }}
>
Clear all for this fixture
</button>
)}
</>
)}
{pending && (
<div
style={{
marginTop: 16,
padding: 8,
border: "1px solid #f0c040",
background: "#fff8d6",
}}
>
<div style={{ fontSize: 12 }}>
Pending selection ({pending.selectors.length} selector
{pending.selectors.length === 1 ? "" : "s"}):
</div>
<div style={{ fontSize: 11, color: "#666", margin: "4px 0" }}>
&ldquo;{pending.capture.text.slice(0, 120)}&rdquo;
</div>
<button onClick={handleSave}>Save</button>{" "}
<button onClick={() => setPending(null)}>Discard</button>
</div>
)}
</aside>
<main style={{ flex: 1, overflow: "hidden", position: "relative" }}>
{activeFixture ? (
<PdfSpikeViewer
key={activeFixture.id}
pdfUrl={`/fixtures/pdfs/${encodeURIComponent(activeFixture.filename)}`}
storedAnnotations={annotationsForActive}
{...(scrollTo ? { scrollToAnnotationId: scrollTo } : {})}
onSelectionCaptured={(capture, selectors) =>
setPending({ capture, selectors })
}
/>
) : (
<div style={{ padding: 24, color: "#666" }}>
Pick a fixture on the left to begin.
</div>
)}
</main>
</div>
);
}

View File

@@ -1 +1 @@
export { SpikeApp } from "./SpikeApp";
export { App } from "./App";

View File

@@ -1,12 +1,12 @@
import { StrictMode } from "react";
import { createRoot } from "react-dom/client";
import { SpikeApp } from "./SpikeApp";
import { App } from "./App";
const container = document.getElementById("root");
if (!container) throw new Error("#root not found");
createRoot(container).render(
<StrictMode>
<SpikeApp />
<App />
</StrictMode>,
);

168
src/engine/engine.test.ts Normal file
View File

@@ -0,0 +1,168 @@
import { beforeEach, describe, expect, it } from "vitest";
import type { Document, DocumentRepresentation } from "@shared/document";
import type { DocumentId, RepresentationId } from "@shared/ids";
import type { Selector } from "@shared/selector";
import { createEngine, type Engine, type EngineEvent } from "./index";
function fakeDocAndRep(): { document: Document; representation: DocumentRepresentation } {
const docId = "doc_fake" as DocumentId;
const repId = "rep_fake" as RepresentationId;
return {
document: {
id: docId,
mediaType: "application/pdf",
createdAt: "2026-05-25T00:00:00.000Z",
updatedAt: "2026-05-25T00:00:00.000Z",
},
representation: {
id: repId,
documentId: docId,
representationType: "pdf-text",
contentHash: "h",
canonicalText: "The quick brown fox.",
pageMap: [{ page: 1, width: 100, height: 100 }],
offsetMap: [{ page: 1, globalStart: 0, globalEnd: 20, pageLength: 20 }],
generatedAt: "2026-05-25T00:00:00.000Z",
},
};
}
describe("Engine integration", () => {
let engine: Engine;
let events: EngineEvent[];
beforeEach(() => {
engine = createEngine();
events = [];
engine.bus.onAny((e) => events.push(e));
});
it("documentService.register stores both and emits DocumentImported + DocumentRepresentationGenerated", () => {
const { document, representation } = fakeDocAndRep();
const result = engine.documents.register({ document, representation });
expect(result.document).toBe(document);
expect(result.representation).toBe(representation);
expect(engine.documents.get(document.id)).toBe(document);
expect(engine.documents.getRepresentation(representation.id)).toBe(representation);
expect(events.map((e) => e.type)).toEqual(["DocumentImported", "DocumentRepresentationGenerated"]);
});
it("annotationService.create stamps an ID + normalize version + timestamps, then emits AnnotationCreated", () => {
const { document, representation } = fakeDocAndRep();
engine.documents.register({ document, representation });
const selectors: Selector[] = [{ type: "TextQuoteSelector", exact: "brown fox" }];
const ann = engine.annotations.create({
documentId: document.id,
representationId: representation.id,
selectors,
quote: "brown fox",
note: "a quick mark",
});
expect(ann.id).toMatch(/^ann_/);
expect(ann.normalizeVersion).toBeGreaterThan(0);
expect(ann.createdAt).toBe(ann.updatedAt);
expect(engine.annotations.get(ann.id)).toBe(ann);
const created = events.find((e) => e.type === "AnnotationCreated");
expect(created?.type).toBe("AnnotationCreated");
});
it("setResolutionStatus emits AnnotationResolved for resolved/ambiguous and AnnotationResolutionFailed for unresolved/stale", () => {
const { document, representation } = fakeDocAndRep();
engine.documents.register({ document, representation });
const ann = engine.annotations.create({
documentId: document.id,
representationId: representation.id,
selectors: [{ type: "TextQuoteSelector", exact: "x" }],
});
events.length = 0;
engine.annotations.setResolutionStatus(ann.id, "resolved", { confidence: 0.95 });
expect(events.map((e) => e.type)).toEqual(["AnnotationResolved"]);
engine.annotations.setResolutionStatus(ann.id, "unresolved", { confidence: 0, reason: "no quote match" });
expect(events.map((e) => e.type)).toEqual(["AnnotationResolved", "AnnotationResolutionFailed"]);
});
it("evidenceService.create requires at least one annotation and emits EvidenceItemCreated", () => {
const { document, representation } = fakeDocAndRep();
engine.documents.register({ document, representation });
const ann = engine.annotations.create({
documentId: document.id,
representationId: representation.id,
selectors: [{ type: "TextQuoteSelector", exact: "brown fox" }],
});
expect(() => engine.evidence.create({ annotationIds: [] })).toThrow();
const item = engine.evidence.create({
annotationIds: [ann.id],
commentary: "good quote",
});
expect(item.status).toBe("candidate");
expect(item.annotationIds).toEqual([ann.id]);
expect(events.find((e) => e.type === "EvidenceItemCreated")).toBeDefined();
});
it("setStatus emits EvidenceItemUpdated only on real change and carries previousStatus", () => {
const { document, representation } = fakeDocAndRep();
engine.documents.register({ document, representation });
const ann = engine.annotations.create({
documentId: document.id,
representationId: representation.id,
selectors: [{ type: "TextQuoteSelector", exact: "brown fox" }],
});
const item = engine.evidence.create({ annotationIds: [ann.id] });
events.length = 0;
const same = engine.evidence.setStatus(item.id, "candidate");
expect(same).toBe(item);
expect(events).toEqual([]);
engine.evidence.setStatus(item.id, "confirmed");
const updated = events.find((e) => e.type === "EvidenceItemUpdated");
expect(updated).toBeDefined();
if (updated?.type === "EvidenceItemUpdated") {
expect(updated.previousStatus).toBe("candidate");
}
});
it("listByDocument scopes evidence items to a single document via annotation lookup", () => {
const a = fakeDocAndRep();
engine.documents.register(a);
const annA = engine.annotations.create({
documentId: a.document.id,
representationId: a.representation.id,
selectors: [{ type: "TextQuoteSelector", exact: "brown fox" }],
});
engine.evidence.create({ annotationIds: [annA.id], commentary: "a" });
// Second, distinct document.
const otherDocId = "doc_other" as DocumentId;
const otherRepId = "rep_other" as RepresentationId;
engine.documents.register({
document: { ...a.document, id: otherDocId },
representation: { ...a.representation, id: otherRepId, documentId: otherDocId },
});
const annB = engine.annotations.create({
documentId: otherDocId,
representationId: otherRepId,
selectors: [{ type: "TextQuoteSelector", exact: "z" }],
});
engine.evidence.create({ annotationIds: [annB.id], commentary: "b" });
expect(engine.evidence.listByDocument(a.document.id)).toHaveLength(1);
expect(engine.evidence.listByDocument(otherDocId)).toHaveLength(1);
});
it("activate emits EvidenceItemActivated without mutating the item", () => {
const { document, representation } = fakeDocAndRep();
engine.documents.register({ document, representation });
const ann = engine.annotations.create({
documentId: document.id,
representationId: representation.id,
selectors: [{ type: "TextQuoteSelector", exact: "x" }],
});
const item = engine.evidence.create({ annotationIds: [ann.id] });
events.length = 0;
engine.evidence.activate(item.id, "sidebar");
const activated = events.find((e) => e.type === "EvidenceItemActivated");
expect(activated).toBeDefined();
if (activated?.type === "EvidenceItemActivated") {
expect(activated.source).toBe("sidebar");
}
});
});

View File

@@ -0,0 +1,64 @@
import { describe, expect, it, vi } from "vitest";
import type { DocumentId } from "@shared/ids";
import { createEventBus } from "./bus";
const docId = "doc_test" as DocumentId;
const minimalDoc = {
id: docId,
mediaType: "application/pdf",
createdAt: "2026-05-25T00:00:00.000Z",
updatedAt: "2026-05-25T00:00:00.000Z",
};
describe("EventBus", () => {
it("delivers typed events to the registered listener", () => {
const bus = createEventBus();
const spy = vi.fn();
bus.on("DocumentImported", spy);
const result = bus.emit({ type: "DocumentImported", documentId: docId, document: minimalDoc });
expect(spy).toHaveBeenCalledOnce();
expect(spy.mock.calls[0]![0]).toMatchObject({ type: "DocumentImported", documentId: docId });
expect(result.listenerCount).toBe(1);
expect(result.errors).toEqual([]);
});
it("does not deliver an event to listeners of a different type", () => {
const bus = createEventBus();
const spy = vi.fn();
bus.on("AnnotationCreated", spy);
bus.emit({ type: "DocumentImported", documentId: docId, document: minimalDoc });
expect(spy).not.toHaveBeenCalled();
});
it("delivers every event to onAny listeners", () => {
const bus = createEventBus();
const spy = vi.fn();
bus.onAny(spy);
bus.emit({ type: "DocumentImported", documentId: docId, document: minimalDoc });
bus.emit({ type: "EvidenceItemActivated", evidenceItemId: "ev_x" as never });
expect(spy).toHaveBeenCalledTimes(2);
});
it("returns an unsubscribe function from on()", () => {
const bus = createEventBus();
const spy = vi.fn();
const off = bus.on("DocumentImported", spy);
off();
bus.emit({ type: "DocumentImported", documentId: docId, document: minimalDoc });
expect(spy).not.toHaveBeenCalled();
});
it("captures listener errors and still calls subsequent listeners", () => {
const bus = createEventBus();
const boom = new Error("listener exploded");
const a = vi.fn(() => { throw boom; });
const b = vi.fn();
bus.on("DocumentImported", a);
bus.on("DocumentImported", b);
const result = bus.emit({ type: "DocumentImported", documentId: docId, document: minimalDoc });
expect(a).toHaveBeenCalledOnce();
expect(b).toHaveBeenCalledOnce();
expect(result.errors).toEqual([boom]);
expect(result.listenerCount).toBe(2);
});
});

79
src/engine/events/bus.ts Normal file
View File

@@ -0,0 +1,79 @@
/**
* Synchronous in-process event bus.
*
* Listeners fire in registration order on the calling stack; `emit` returns
* after every listener has run. A listener throwing does not stop later
* listeners — its error surfaces through the returned `errors` array so
* callers can decide whether to log, rethrow, or ignore.
*
* MVP-sufficient. ADR-0005 (persistence) will decide whether to upgrade to
* an async/queued bus when storage becomes durable.
*/
import type { EngineEvent, EngineEventOf, EngineEventType } from "./types";
export type EngineEventListener<T extends EngineEventType = EngineEventType> = (
event: EngineEventOf<T>,
) => void;
export type AnyEngineEventListener = (event: EngineEvent) => void;
export interface EmitResult {
readonly listenerCount: number;
readonly errors: readonly unknown[];
}
export interface EventBus {
on<T extends EngineEventType>(type: T, listener: EngineEventListener<T>): () => void;
onAny(listener: AnyEngineEventListener): () => void;
emit<T extends EngineEventType>(event: EngineEventOf<T>): EmitResult;
}
export function createEventBus(): EventBus {
const typedListeners = new Map<EngineEventType, Set<EngineEventListener>>();
const anyListeners = new Set<AnyEngineEventListener>();
return {
on(type, listener) {
let set = typedListeners.get(type);
if (!set) {
set = new Set();
typedListeners.set(type, set);
}
set.add(listener as unknown as EngineEventListener);
return () => {
set!.delete(listener as unknown as EngineEventListener);
};
},
onAny(listener) {
anyListeners.add(listener);
return () => {
anyListeners.delete(listener);
};
},
emit(event) {
const errors: unknown[] = [];
let count = 0;
const typedSet = typedListeners.get(event.type);
if (typedSet) {
for (const l of typedSet) {
count++;
try {
(l as AnyEngineEventListener)(event);
} catch (err) {
errors.push(err);
}
}
}
for (const l of anyListeners) {
count++;
try {
l(event);
} catch (err) {
errors.push(err);
}
}
return { listenerCount: count, errors };
},
};
}

View File

@@ -0,0 +1,8 @@
export * from "./types";
export {
createEventBus,
type EventBus,
type EngineEventListener,
type AnyEngineEventListener,
type EmitResult,
} from "./bus";

View File

@@ -0,0 +1,84 @@
/**
* Engine event vocabulary.
*
* Implements `wiki/SharedContracts.md` §4 (closed event list). Each event
* carries the *minimum* identifying payload needed by downstream listeners;
* services hand back the full domain object to the caller separately.
*
* Adding an event requires updating SharedContracts.md first.
*/
import type { Annotation, AnnotationResolutionStatus } from "@shared/annotation";
import type { Document, DocumentRepresentation } from "@shared/document";
import type { EvidenceItem, EvidenceItemStatus } from "@shared/evidence";
import type {
AnnotationId,
DocumentId,
EvidenceItemId,
RepresentationId,
} from "@shared/ids";
export interface DocumentImportedEvent {
readonly type: "DocumentImported";
readonly documentId: DocumentId;
readonly document: Document;
}
export interface DocumentRepresentationGeneratedEvent {
readonly type: "DocumentRepresentationGenerated";
readonly documentId: DocumentId;
readonly representationId: RepresentationId;
readonly representation: DocumentRepresentation;
}
export interface AnnotationCreatedEvent {
readonly type: "AnnotationCreated";
readonly annotationId: AnnotationId;
readonly annotation: Annotation;
}
export interface AnnotationResolvedEvent {
readonly type: "AnnotationResolved";
readonly annotationId: AnnotationId;
readonly status: AnnotationResolutionStatus;
readonly confidence: number;
}
export interface AnnotationResolutionFailedEvent {
readonly type: "AnnotationResolutionFailed";
readonly annotationId: AnnotationId;
readonly reason: string;
}
export interface EvidenceItemCreatedEvent {
readonly type: "EvidenceItemCreated";
readonly evidenceItemId: EvidenceItemId;
readonly evidenceItem: EvidenceItem;
}
export interface EvidenceItemUpdatedEvent {
readonly type: "EvidenceItemUpdated";
readonly evidenceItemId: EvidenceItemId;
readonly evidenceItem: EvidenceItem;
readonly previousStatus: EvidenceItemStatus;
}
export interface EvidenceItemActivatedEvent {
readonly type: "EvidenceItemActivated";
readonly evidenceItemId: EvidenceItemId;
readonly source?: "sidebar" | "form-field" | "citation-card";
}
export type EngineEvent =
| DocumentImportedEvent
| DocumentRepresentationGeneratedEvent
| AnnotationCreatedEvent
| AnnotationResolvedEvent
| AnnotationResolutionFailedEvent
| EvidenceItemCreatedEvent
| EvidenceItemUpdatedEvent
| EvidenceItemActivatedEvent;
export type EngineEventType = EngineEvent["type"];
export type EngineEventOf<T extends EngineEventType> = Extract<EngineEvent, { type: T }>;

View File

@@ -1 +1,60 @@
export {};
/**
* Engine composition root.
*
* `createEngine()` wires in-memory repos to the services and shares a single
* event bus. The app layer holds the returned `Engine` instance and passes
* its services into the UI.
*
* Swapping the repository implementation later (ADR-0005) is a matter of
* replacing `createInMemoryRepos()` here. The service signatures don't
* change.
*/
import { createEventBus, type EventBus } from "./events";
import {
createInMemoryRepos,
type InMemoryRepos,
} from "./repos";
import {
createAnnotationService,
createDocumentService,
createEvidenceService,
type AnnotationService,
type DocumentService,
type EvidenceService,
} from "./services";
export * from "./events";
export * from "./repos";
export * from "./services";
export {
SNAPSHOT_VERSION,
attachPersister,
captureSnapshot,
documentIdsIn,
restoreFromStorage,
restoreSnapshot,
type EngineSnapshot,
type PersisterOptions,
} from "./persistence";
export interface Engine {
readonly bus: EventBus;
readonly repos: InMemoryRepos;
readonly documents: DocumentService;
readonly annotations: AnnotationService;
readonly evidence: EvidenceService;
}
export function createEngine(): Engine {
const bus = createEventBus();
const repos = createInMemoryRepos();
const documents = createDocumentService(repos.documents, repos.representations, bus);
const annotations = createAnnotationService(repos.annotations, bus);
const evidence = createEvidenceService(
repos.evidenceItems,
(id) => repos.annotations.get(id),
bus,
);
return { bus, repos, documents, annotations, evidence };
}

View File

@@ -0,0 +1,183 @@
import { beforeEach, describe, expect, it, vi } from "vitest";
import type { Document, DocumentRepresentation } from "@shared/document";
import type { DocumentId, RepresentationId } from "@shared/ids";
import {
attachPersister,
captureSnapshot,
createEngine,
restoreFromStorage,
restoreSnapshot,
type Engine,
type EngineEvent,
type EngineSnapshot,
} from "./index";
function fakeDocAndRep(suffix: string): {
document: Document;
representation: DocumentRepresentation;
} {
const docId = `doc_${suffix}` as DocumentId;
const repId = `rep_${suffix}` as RepresentationId;
return {
document: {
id: docId,
mediaType: "application/pdf",
title: `Doc ${suffix}`,
createdAt: "2026-05-25T00:00:00.000Z",
updatedAt: "2026-05-25T00:00:00.000Z",
},
representation: {
id: repId,
documentId: docId,
representationType: "pdf-text",
contentHash: `hash-${suffix}`,
canonicalText: "The quick brown fox.",
pageMap: [{ page: 1, width: 100, height: 100 }],
offsetMap: [{ page: 1, globalStart: 0, globalEnd: 20, pageLength: 20 }],
generatedAt: "2026-05-25T00:00:00.000Z",
},
};
}
function memoryStorage(): Pick<Storage, "getItem" | "setItem" | "removeItem"> {
const map = new Map<string, string>();
return {
getItem: (k) => map.get(k) ?? null,
setItem: (k, v) => void map.set(k, v),
removeItem: (k) => void map.delete(k),
};
}
function seed(engine: Engine, suffix: string) {
const { document, representation } = fakeDocAndRep(suffix);
engine.documents.register({ document, representation });
const ann = engine.annotations.create({
documentId: document.id,
representationId: representation.id,
selectors: [{ type: "TextQuoteSelector", exact: "brown fox" }],
quote: "brown fox",
});
const item = engine.evidence.create({
annotationIds: [ann.id],
commentary: `commentary-${suffix}`,
});
return { document, representation, ann, item };
}
describe("captureSnapshot + restoreSnapshot", () => {
it("round-trips documents, representations, annotations and evidence items", () => {
const src = createEngine();
const a = seed(src, "a");
const b = seed(src, "b");
const snap = captureSnapshot(src);
expect(snap.documents).toHaveLength(2);
expect(snap.representations).toHaveLength(2);
expect(snap.annotations).toHaveLength(2);
expect(snap.evidenceItems).toHaveLength(2);
const dst = createEngine();
restoreSnapshot(dst, snap);
expect(dst.documents.get(a.document.id)?.title).toBe("Doc a");
expect(dst.documents.get(b.document.id)?.title).toBe("Doc b");
expect(dst.annotations.get(a.ann.id)?.quote).toBe("brown fox");
expect(dst.evidence.get(a.item.id)?.commentary).toBe("commentary-a");
});
it("restoreSnapshot does NOT emit *Created events (events would loop the persister)", () => {
const src = createEngine();
seed(src, "x");
const snap = captureSnapshot(src);
const dst = createEngine();
const seen: EngineEvent["type"][] = [];
dst.bus.onAny((e) => seen.push(e.type));
restoreSnapshot(dst, snap);
expect(seen).toEqual([]);
});
it("rejects a snapshot with a mismatching version", () => {
const dst = createEngine();
expect(() =>
restoreSnapshot(dst, {
version: 999,
documents: [],
representations: [],
annotations: [],
evidenceItems: [],
} as EngineSnapshot),
).toThrow(/version/);
});
});
describe("attachPersister", () => {
let storage: ReturnType<typeof memoryStorage>;
let engine: Engine;
const KEY = "ce-test-snap";
beforeEach(() => {
storage = memoryStorage();
engine = createEngine();
});
it("writes a snapshot to storage on every mutating event", () => {
const off = attachPersister(engine, { key: KEY, storage });
expect(storage.getItem(KEY)).toBeNull();
seed(engine, "z");
const raw = storage.getItem(KEY);
expect(raw).not.toBeNull();
const snap = JSON.parse(raw!) as EngineSnapshot;
expect(snap.documents).toHaveLength(1);
expect(snap.evidenceItems).toHaveLength(1);
off();
});
it("stops writing after the unsubscribe is called", () => {
const off = attachPersister(engine, { key: KEY, storage });
seed(engine, "q");
const after = storage.getItem(KEY);
off();
seed(engine, "r");
expect(storage.getItem(KEY)).toBe(after);
});
it("survives a JSON.stringify failure without throwing into the caller", () => {
const warn = vi.spyOn(console, "warn").mockImplementation(() => {});
const failing = { ...memoryStorage(), setItem: () => { throw new Error("boom"); } };
attachPersister(engine, { key: KEY, storage: failing });
expect(() => seed(engine, "k")).not.toThrow();
expect(warn).toHaveBeenCalled();
warn.mockRestore();
});
});
describe("restoreFromStorage", () => {
it("returns {restored: false} when the key is empty", () => {
const storage = memoryStorage();
const engine = createEngine();
const result = restoreFromStorage(engine, { key: "missing", storage });
expect(result.restored).toBe(false);
});
it("hydrates the engine when storage holds a valid snapshot", () => {
const src = createEngine();
seed(src, "rs");
const storage = memoryStorage();
storage.setItem("snap", JSON.stringify(captureSnapshot(src)));
const dst = createEngine();
const result = restoreFromStorage(dst, { key: "snap", storage });
expect(result.restored).toBe(true);
expect(dst.documents.list()).toHaveLength(1);
});
it("ignores malformed JSON without throwing", () => {
const warn = vi.spyOn(console, "warn").mockImplementation(() => {});
const storage = memoryStorage();
storage.setItem("snap", "not-json");
const engine = createEngine();
const result = restoreFromStorage(engine, { key: "snap", storage });
expect(result.restored).toBe(false);
expect(warn).toHaveBeenCalled();
warn.mockRestore();
});
});

138
src/engine/persistence.ts Normal file
View File

@@ -0,0 +1,138 @@
/**
* Engine snapshot + restore.
*
* MVP "persistence" — capture the engine's in-memory state into a JSON blob
* and restore it later. Used by the SPA to survive page reloads via
* `localStorage` until ADR-0005 lands a real store.
*
* Restore deliberately bypasses the service layer: it writes directly to
* the repos so no `*Created` events fire. Without that, restoring would
* trigger the persister to re-write the same snapshot — and if the user
* has another tab open, it would also broadcast spurious "this annotation
* just appeared" events to UI listeners.
*/
import type { Annotation } from "@shared/annotation";
import type { Document, DocumentRepresentation } from "@shared/document";
import type { EvidenceItem } from "@shared/evidence";
import type { DocumentId } from "@shared/ids";
import type { Engine } from "./index";
export const SNAPSHOT_VERSION = 1;
export interface EngineSnapshot {
readonly version: number;
readonly documents: readonly Document[];
readonly representations: readonly DocumentRepresentation[];
readonly annotations: readonly Annotation[];
readonly evidenceItems: readonly EvidenceItem[];
}
export function captureSnapshot(engine: Engine): EngineSnapshot {
const documents = engine.documents.list();
// Gather representations per known document.
const representations: DocumentRepresentation[] = [];
const annotations: Annotation[] = [];
const evidenceItems: EvidenceItem[] = [];
const seenItemIds = new Set<string>();
for (const doc of documents) {
representations.push(...engine.documents.listRepresentations(doc.id));
annotations.push(...engine.annotations.listByDocument(doc.id));
for (const item of engine.evidence.listByDocument(doc.id)) {
// listByDocument keys off annotation lookup; an item that shares
// annotations across two documents would surface twice. De-dupe.
if (!seenItemIds.has(item.id)) {
seenItemIds.add(item.id);
evidenceItems.push(item);
}
}
}
return {
version: SNAPSHOT_VERSION,
documents: [...documents],
representations,
annotations,
evidenceItems,
};
}
export function restoreSnapshot(engine: Engine, snapshot: EngineSnapshot): void {
if (snapshot.version !== SNAPSHOT_VERSION) {
throw new Error(
`restoreSnapshot: snapshot version ${snapshot.version} does not match current ${SNAPSHOT_VERSION}`,
);
}
for (const d of snapshot.documents) engine.repos.documents.create(d);
for (const r of snapshot.representations) engine.repos.representations.create(r);
for (const a of snapshot.annotations) engine.repos.annotations.create(a);
for (const i of snapshot.evidenceItems) engine.repos.evidenceItems.create(i);
}
export interface PersisterOptions {
/** Storage key. */
readonly key: string;
/** Storage shim — defaults to globalThis.localStorage. */
readonly storage?: Pick<Storage, "getItem" | "setItem" | "removeItem">;
}
/**
* Subscribe to engine events and write a fresh snapshot on every mutation.
* Returns the unsubscribe function.
*
* Initial snapshot is NOT written — call `captureSnapshot` + `storage.setItem`
* yourself if you want a baseline.
*/
export function attachPersister(engine: Engine, options: PersisterOptions): () => void {
const storage = options.storage ?? globalThis.localStorage;
const write = () => {
const snap = captureSnapshot(engine);
try {
storage.setItem(options.key, JSON.stringify(snap));
} catch (err) {
// localStorage quota / serialization errors shouldn't crash the app.
// Surface to the console; ADR-0005 owns the durable fix.
console.warn("attachPersister: write failed", err);
}
};
const offs = [
engine.bus.on("DocumentImported", write),
engine.bus.on("DocumentRepresentationGenerated", write),
engine.bus.on("AnnotationCreated", write),
engine.bus.on("AnnotationResolved", write),
engine.bus.on("AnnotationResolutionFailed", write),
engine.bus.on("EvidenceItemCreated", write),
engine.bus.on("EvidenceItemUpdated", write),
];
return () => {
for (const off of offs) off();
};
}
export type RestoreFromStorageOptions = PersisterOptions;
export function restoreFromStorage(
engine: Engine,
options: RestoreFromStorageOptions,
): { readonly restored: boolean; readonly snapshot?: EngineSnapshot } {
const storage = options.storage ?? globalThis.localStorage;
const raw = storage.getItem(options.key);
if (!raw) return { restored: false };
try {
const parsed = JSON.parse(raw) as EngineSnapshot;
if (typeof parsed !== "object" || parsed === null) return { restored: false };
restoreSnapshot(engine, parsed);
return { restored: true, snapshot: parsed };
} catch (err) {
console.warn("restoreFromStorage: parse failed, ignoring stored snapshot", err);
return { restored: false };
}
}
/**
* Narrow helper: get the set of document ids restored from a snapshot.
* Useful for the SPA's "show me what was open last time" logic.
*/
export function documentIdsIn(snapshot: EngineSnapshot): readonly DocumentId[] {
return snapshot.documents.map((d) => d.id);
}

View File

@@ -0,0 +1,151 @@
/**
* In-memory `Map`-backed repositories.
*
* Implements the MVP storage layer. The repository interfaces match the
* shape that ADR-0005's eventual persistence implementation will satisfy,
* so swapping `createInMemoryRepos()` for a SQLite/Postgres factory later
* is a localised change.
*
* All mutating methods return the *stored* object so callers can pick up
* server-assigned fields (none in MVP, but the contract anticipates it).
*/
import type { Annotation } from "@shared/annotation";
import type { Document, DocumentRepresentation } from "@shared/document";
import type { EvidenceItem } from "@shared/evidence";
import type {
AnnotationId,
DocumentId,
EvidenceItemId,
RepresentationId,
} from "@shared/ids";
export interface DocumentRepository {
create(document: Document): Document;
get(id: DocumentId): Document | null;
list(): readonly Document[];
update(document: Document): Document;
}
export interface RepresentationRepository {
create(representation: DocumentRepresentation): DocumentRepresentation;
get(id: RepresentationId): DocumentRepresentation | null;
listByDocument(documentId: DocumentId): readonly DocumentRepresentation[];
}
export interface AnnotationRepository {
create(annotation: Annotation): Annotation;
get(id: AnnotationId): Annotation | null;
listByDocument(documentId: DocumentId): readonly Annotation[];
update(annotation: Annotation): Annotation;
}
export interface EvidenceItemRepository {
create(item: EvidenceItem): EvidenceItem;
get(id: EvidenceItemId): EvidenceItem | null;
listByDocument(
documentId: DocumentId,
annotationLookup: (id: AnnotationId) => Annotation | null,
): readonly EvidenceItem[];
update(item: EvidenceItem): EvidenceItem;
}
export interface InMemoryRepos {
readonly documents: DocumentRepository;
readonly representations: RepresentationRepository;
readonly annotations: AnnotationRepository;
readonly evidenceItems: EvidenceItemRepository;
}
export function createInMemoryRepos(): InMemoryRepos {
const documents = new Map<DocumentId, Document>();
const representations = new Map<RepresentationId, DocumentRepresentation>();
const annotations = new Map<AnnotationId, Annotation>();
const evidenceItems = new Map<EvidenceItemId, EvidenceItem>();
return {
documents: {
create(document) {
documents.set(document.id, document);
return document;
},
get(id) {
return documents.get(id) ?? null;
},
list() {
return [...documents.values()];
},
update(document) {
if (!documents.has(document.id)) {
throw new Error(`DocumentRepository.update: unknown id ${document.id}`);
}
documents.set(document.id, document);
return document;
},
},
representations: {
create(representation) {
representations.set(representation.id, representation);
return representation;
},
get(id) {
return representations.get(id) ?? null;
},
listByDocument(documentId) {
const out: DocumentRepresentation[] = [];
for (const rep of representations.values()) {
if (rep.documentId === documentId) out.push(rep);
}
return out;
},
},
annotations: {
create(annotation) {
annotations.set(annotation.id, annotation);
return annotation;
},
get(id) {
return annotations.get(id) ?? null;
},
listByDocument(documentId) {
const out: Annotation[] = [];
for (const ann of annotations.values()) {
if (ann.documentId === documentId) out.push(ann);
}
return out;
},
update(annotation) {
if (!annotations.has(annotation.id)) {
throw new Error(`AnnotationRepository.update: unknown id ${annotation.id}`);
}
annotations.set(annotation.id, annotation);
return annotation;
},
},
evidenceItems: {
create(item) {
evidenceItems.set(item.id, item);
return item;
},
get(id) {
return evidenceItems.get(id) ?? null;
},
listByDocument(documentId, annotationLookup) {
const out: EvidenceItem[] = [];
for (const item of evidenceItems.values()) {
if (item.annotationIds.some((aid) => annotationLookup(aid)?.documentId === documentId)) {
out.push(item);
}
}
return out;
},
update(item) {
if (!evidenceItems.has(item.id)) {
throw new Error(`EvidenceItemRepository.update: unknown id ${item.id}`);
}
evidenceItems.set(item.id, item);
return item;
},
},
};
}

View File

@@ -0,0 +1,8 @@
export {
createInMemoryRepos,
type InMemoryRepos,
type DocumentRepository,
type RepresentationRepository,
type AnnotationRepository,
type EvidenceItemRepository,
} from "./in-memory";

View File

@@ -0,0 +1,102 @@
/**
* Annotation service — creates technical marks on document ranges and
* emits `AnnotationCreated`. Resolution-status updates emit
* `AnnotationResolved` / `AnnotationResolutionFailed`.
*
* Annotation creation is the engine's response to a user action in the
* viewer (T07). The viewer adapter has already turned the selection into
* `Selector[]`; this service stamps an ID, normalize-version, timestamps,
* persists, and broadcasts.
*/
import type {
Annotation,
AnnotationResolutionStatus,
} from "@shared/annotation";
import type { DocumentId, RepresentationId, AnnotationId } from "@shared/ids";
import type { Selector } from "@shared/selector";
import { newId } from "@shared/ids";
import { NORMALIZE_VERSION } from "@shared/text/normalize";
import type { EventBus } from "../events";
import type { AnnotationRepository } from "../repos";
export interface CreateAnnotationInput {
readonly documentId: DocumentId;
readonly representationId?: RepresentationId;
readonly selectors: readonly Selector[];
readonly quote?: string;
readonly note?: string;
readonly createdBy?: string;
}
export interface AnnotationService {
create(input: CreateAnnotationInput): Annotation;
get(id: AnnotationId): Annotation | null;
listByDocument(documentId: DocumentId): readonly Annotation[];
setResolutionStatus(
id: AnnotationId,
status: AnnotationResolutionStatus,
opts: { readonly confidence: number; readonly reason?: string },
): Annotation;
}
export function createAnnotationService(
annotations: AnnotationRepository,
bus: EventBus,
now: () => string = () => new Date().toISOString(),
): AnnotationService {
return {
create(input) {
const ts = now();
const annotation: Annotation = {
id: newId("annotation"),
documentId: input.documentId,
...(input.representationId !== undefined ? { representationId: input.representationId } : {}),
selectors: input.selectors,
...(input.quote !== undefined ? { quote: input.quote } : {}),
...(input.note !== undefined ? { note: input.note } : {}),
normalizeVersion: NORMALIZE_VERSION,
...(input.createdBy !== undefined ? { createdBy: input.createdBy } : {}),
createdAt: ts,
updatedAt: ts,
};
const stored = annotations.create(annotation);
bus.emit({ type: "AnnotationCreated", annotationId: stored.id, annotation: stored });
return stored;
},
get(id) {
return annotations.get(id);
},
listByDocument(documentId) {
return annotations.listByDocument(documentId);
},
setResolutionStatus(id, status, opts) {
const existing = annotations.get(id);
if (!existing) {
throw new Error(`AnnotationService.setResolutionStatus: unknown id ${id}`);
}
const updated: Annotation = {
...existing,
resolutionStatus: status,
updatedAt: now(),
};
const stored = annotations.update(updated);
if (status === "unresolved" || status === "stale") {
bus.emit({
type: "AnnotationResolutionFailed",
annotationId: stored.id,
reason: opts.reason ?? status,
});
} else {
bus.emit({
type: "AnnotationResolved",
annotationId: stored.id,
status,
confidence: opts.confidence,
});
}
return stored;
},
};
}

View File

@@ -0,0 +1,63 @@
/**
* Document service — registers ingested documents and emits the §4 events.
*
* The ingest pipeline (`src/source/pdf/ingest.ts`) is a pure function over
* bytes — it does not touch the engine. The app composition root calls
* `ingestPdf` then hands the result to `documentService.register()`, which
* is where the engine takes over: persist into the repos, emit
* `DocumentImported` + `DocumentRepresentationGenerated`.
*/
import type { Document, DocumentRepresentation } from "@shared/document";
import type { DocumentId, RepresentationId } from "@shared/ids";
import type { EventBus } from "../events";
import type { DocumentRepository, RepresentationRepository } from "../repos";
export interface DocumentService {
register(input: {
readonly document: Document;
readonly representation: DocumentRepresentation;
}): { readonly document: Document; readonly representation: DocumentRepresentation };
get(id: DocumentId): Document | null;
list(): readonly Document[];
getRepresentation(id: RepresentationId): DocumentRepresentation | null;
listRepresentations(documentId: DocumentId): readonly DocumentRepresentation[];
}
export function createDocumentService(
documents: DocumentRepository,
representations: RepresentationRepository,
bus: EventBus,
): DocumentService {
return {
register({ document, representation }) {
const storedDocument = documents.create(document);
const storedRepresentation = representations.create(representation);
bus.emit({
type: "DocumentImported",
documentId: storedDocument.id,
document: storedDocument,
});
bus.emit({
type: "DocumentRepresentationGenerated",
documentId: storedDocument.id,
representationId: storedRepresentation.id,
representation: storedRepresentation,
});
return { document: storedDocument, representation: storedRepresentation };
},
get(id) {
return documents.get(id);
},
list() {
return documents.list();
},
getRepresentation(id) {
return representations.get(id);
},
listRepresentations(documentId) {
return representations.listByDocument(documentId);
},
};
}

View File

@@ -0,0 +1,127 @@
/**
* Evidence service — creates EvidenceItems on top of annotations and
* tracks their lifecycle. Emits §4 events: `EvidenceItemCreated`,
* `EvidenceItemUpdated`, `EvidenceItemActivated`.
*
* MVP item shape per `wiki/SharedContracts.md` §2.2: status starts at
* `candidate`, may transition to `confirmed | rejected | needs-check`.
* Item-level relation/strength (supports/contradicts/...) lives on the
* link, not the item — that's CE-WP-0003.
*/
import type { Annotation } from "@shared/annotation";
import type {
EvidenceItem,
EvidenceItemStatus,
} from "@shared/evidence";
import type {
AnnotationId,
DocumentId,
EvidenceItemId,
} from "@shared/ids";
import { newId } from "@shared/ids";
import type { EventBus, EvidenceItemActivatedEvent } from "../events";
import type { EvidenceItemRepository } from "../repos";
export interface CreateEvidenceItemInput {
readonly annotationIds: readonly AnnotationId[];
readonly title?: string;
readonly commentary?: string;
readonly status?: EvidenceItemStatus;
readonly confidence?: number;
readonly tags?: readonly string[];
readonly createdBy?: string;
}
export interface EvidenceService {
create(input: CreateEvidenceItemInput): EvidenceItem;
get(id: EvidenceItemId): EvidenceItem | null;
listByDocument(documentId: DocumentId): readonly EvidenceItem[];
setStatus(id: EvidenceItemId, status: EvidenceItemStatus): EvidenceItem;
updateCommentary(id: EvidenceItemId, commentary: string): EvidenceItem;
activate(
id: EvidenceItemId,
source?: EvidenceItemActivatedEvent["source"],
): EvidenceItem;
}
export function createEvidenceService(
items: EvidenceItemRepository,
annotationLookup: (id: AnnotationId) => Annotation | null,
bus: EventBus,
now: () => string = () => new Date().toISOString(),
): EvidenceService {
return {
create(input) {
if (input.annotationIds.length === 0) {
throw new Error("EvidenceService.create: at least one annotationId is required");
}
const ts = now();
const item: EvidenceItem = {
id: newId("evidence"),
annotationIds: input.annotationIds,
...(input.title !== undefined ? { title: input.title } : {}),
...(input.commentary !== undefined ? { commentary: input.commentary } : {}),
status: input.status ?? "candidate",
...(input.confidence !== undefined ? { confidence: input.confidence } : {}),
...(input.tags !== undefined ? { tags: input.tags } : {}),
...(input.createdBy !== undefined ? { createdBy: input.createdBy } : {}),
createdAt: ts,
updatedAt: ts,
};
const stored = items.create(item);
bus.emit({ type: "EvidenceItemCreated", evidenceItemId: stored.id, evidenceItem: stored });
return stored;
},
get(id) {
return items.get(id);
},
listByDocument(documentId) {
return items.listByDocument(documentId, annotationLookup);
},
setStatus(id, status) {
const existing = items.get(id);
if (!existing) {
throw new Error(`EvidenceService.setStatus: unknown id ${id}`);
}
if (existing.status === status) return existing;
const updated: EvidenceItem = { ...existing, status, updatedAt: now() };
const stored = items.update(updated);
bus.emit({
type: "EvidenceItemUpdated",
evidenceItemId: stored.id,
evidenceItem: stored,
previousStatus: existing.status,
});
return stored;
},
updateCommentary(id, commentary) {
const existing = items.get(id);
if (!existing) {
throw new Error(`EvidenceService.updateCommentary: unknown id ${id}`);
}
const updated: EvidenceItem = { ...existing, commentary, updatedAt: now() };
const stored = items.update(updated);
bus.emit({
type: "EvidenceItemUpdated",
evidenceItemId: stored.id,
evidenceItem: stored,
previousStatus: existing.status,
});
return stored;
},
activate(id, source) {
const existing = items.get(id);
if (!existing) {
throw new Error(`EvidenceService.activate: unknown id ${id}`);
}
bus.emit({
type: "EvidenceItemActivated",
evidenceItemId: existing.id,
...(source !== undefined ? { source } : {}),
});
return existing;
},
};
}

View File

@@ -0,0 +1,14 @@
export {
createDocumentService,
type DocumentService,
} from "./documents";
export {
createAnnotationService,
type AnnotationService,
type CreateAnnotationInput,
} from "./annotations";
export {
createEvidenceService,
type EvidenceService,
type CreateEvidenceItemInput,
} from "./evidence";

View File

@@ -1 +1,8 @@
export {};
export {
ingestPdf,
type IngestPdfInput,
type IngestPdfOptions,
type IngestPdfResult,
} from "./pdf/ingest";
export { extractPdf, type PdfExtractionResult } from "./pdf/extract";
export { fingerprintBytes } from "./pdf/fingerprint";

122
src/source/pdf/extract.ts Normal file
View File

@@ -0,0 +1,122 @@
/**
* PDF text extraction → canonical text + PageMap + OffsetMap.
*
* Implements `wiki/ArchitectureOverview.md` §3.4 ("extract canonical text /
* build format-specific maps") for the `pdf-text` representation
* (`wiki/SharedContracts.md` §1, §3) and §6 (canonical normalization).
*
* Runtime independence: the PDF.js worker must be configured by the host
* application (`GlobalWorkerOptions.workerSrc`) before this module is
* called. In Vite/browser code the worker is bundled via the viewer; in
* Node tests the test setup file points it at
* `pdfjs-dist/legacy/build/pdf.worker.mjs`. No worker setup happens here
* so the same module loads cleanly in both runtimes.
*
* Page boundary semantics: canonical text concatenates per-page normalized
* text with a single "\n\n" paragraph separator. The separator is treated
* as belonging to the *preceding* page in `OffsetMap`, so the map covers
* `[0, canonicalText.length)` with no gaps. The last page has no trailing
* separator. This means `pageLength = globalEnd - globalStart` for
* every page; for non-last pages it equals (normalized page text length +
* 2). See `PageOffsetRange` in `@shared/document.ts`.
*/
import { getDocument } from "pdfjs-dist";
import type { PDFPageProxy } from "pdfjs-dist";
import type {
OffsetMap,
PageInfo,
PageMap,
PageOffsetRange,
} from "@shared/document";
import { normalize } from "@shared/text/normalize";
const PAGE_SEPARATOR = "\n\n";
export interface PdfExtractionResult {
readonly canonicalText: string;
readonly pageMap: PageMap;
readonly offsetMap: OffsetMap;
readonly pageCount: number;
}
export async function extractPdf(bytes: Uint8Array): Promise<PdfExtractionResult> {
// PDF.js mutates the bytes buffer (transfers ownership). Pass a fresh copy
// so the caller's Uint8Array stays usable for fingerprinting after extract.
const data = new Uint8Array(bytes);
const loadingTask = getDocument({ data });
const doc = await loadingTask.promise;
try {
const pageCount = doc.numPages;
const pageInfos: PageInfo[] = [];
const pageNormalizedTexts: string[] = [];
for (let pageNumber = 1; pageNumber <= pageCount; pageNumber++) {
const page = await doc.getPage(pageNumber);
try {
const viewport = page.getViewport({ scale: 1 });
pageInfos.push({
page: pageNumber,
width: viewport.width,
height: viewport.height,
});
const rawText = await extractPageText(page);
pageNormalizedTexts.push(normalize(rawText).text);
} finally {
page.cleanup();
}
}
const { canonicalText, offsetMap } = buildOffsetMap(pageNormalizedTexts);
return {
canonicalText,
pageMap: pageInfos,
offsetMap,
pageCount,
};
} finally {
await doc.destroy();
}
}
async function extractPageText(page: PDFPageProxy): Promise<string> {
const content = await page.getTextContent();
// textContent.items are TextItem | TextMarkedContent. We want only the
// TextItem strings (those have a `str` field); marked-content entries are
// structural anchors and have no visible text.
const parts: string[] = [];
for (const item of content.items) {
if ("str" in item) {
parts.push(item.str);
if (item.hasEOL) parts.push("\n");
}
}
return parts.join("");
}
function buildOffsetMap(pageTexts: readonly string[]): {
canonicalText: string;
offsetMap: OffsetMap;
} {
const ranges: PageOffsetRange[] = [];
let offset = 0;
for (let i = 0; i < pageTexts.length; i++) {
const text = pageTexts[i]!;
const isLast = i === pageTexts.length - 1;
const segmentLength = text.length + (isLast ? 0 : PAGE_SEPARATOR.length);
const globalStart = offset;
const globalEnd = offset + segmentLength;
ranges.push({
page: i + 1,
globalStart,
globalEnd,
pageLength: segmentLength,
});
offset = globalEnd;
}
const canonicalText = pageTexts.join(PAGE_SEPARATOR);
return { canonicalText, offsetMap: ranges };
}

View File

@@ -0,0 +1,31 @@
/**
* SHA-256 fingerprint of raw document bytes.
*
* Implements the fingerprint half of `wiki/ArchitectureOverview.md` §3.4
* (the "compute fingerprint" pipeline step) and populates
* `Document.fingerprint` (`wiki/SharedContracts.md` §1).
*
* Uses Web Crypto's `crypto.subtle.digest`, which is available in browsers
* and in Node ≥ 20 (where it is exposed on `globalThis.crypto`). No
* platform branching — the API is the same in both environments.
*/
export async function fingerprintBytes(bytes: Uint8Array): Promise<string> {
// Copy into a fresh ArrayBuffer (not SharedArrayBuffer) so the digest call
// satisfies TS's updated `BufferSource` type, which excludes
// `SharedArrayBuffer`. The copy is O(n) — fine even for large PDFs since
// SHA-256 itself is already O(n).
const ab = new ArrayBuffer(bytes.byteLength);
new Uint8Array(ab).set(bytes);
const digest = await crypto.subtle.digest("SHA-256", ab);
return bytesToHex(new Uint8Array(digest));
}
function bytesToHex(bytes: Uint8Array): string {
let hex = "";
for (let i = 0; i < bytes.length; i++) {
const b = bytes[i]!;
hex += (b < 0x10 ? "0" : "") + b.toString(16);
}
return hex;
}

View File

@@ -0,0 +1,142 @@
/**
* Fixture-driven contract tests for the PDF ingest pipeline.
*
* For each fixture in `fixtures/pdfs/manifest.json`:
* 1. Read the PDF bytes from disk.
* 2. Run `ingestPdf` end-to-end.
* 3. Assert the resulting Document + DocumentRepresentation honour the
* manifest contract: media type is application/pdf, fingerprint is a
* 64-hex SHA-256, pageMap matches `page_count`, canonicalText
* contains `known_good_quote`, and the offsetMap covers
* `[0, canonicalText.length)` with no gaps.
*
* This is the verification gate for CE-WP-0002-T03.
*/
import { readFileSync } from "node:fs";
import { dirname, resolve } from "node:path";
import { createRequire } from "node:module";
import { fileURLToPath } from "node:url";
import { beforeAll, describe, expect, it } from "vitest";
import { ingestPdf } from "./ingest";
import { fingerprintBytes } from "./fingerprint";
import manifest from "../../../fixtures/pdfs/manifest.json" with { type: "json" };
const __dirname = dirname(fileURLToPath(import.meta.url));
const FIXTURE_DIR = resolve(__dirname, "../../../fixtures/pdfs");
interface Fixture {
id: string;
filename: string;
page_count: number;
known_good_quote: string;
known_good_quote_page: number;
}
const FIXTURES: readonly Fixture[] = manifest.fixtures;
beforeAll(async () => {
// PDF.js needs a workerSrc set. In Node tests we point it at the legacy
// worker bundle — the modern bundle uses APIs that aren't present in
// Node. The legacy worker is bundled as plain JS and runs through the
// fake-worker fallback that PDF.js spins up when no real Worker is
// available.
const pdfjs = await import("pdfjs-dist");
const require = createRequire(import.meta.url);
pdfjs.GlobalWorkerOptions.workerSrc = require.resolve(
"pdfjs-dist/legacy/build/pdf.worker.mjs",
);
});
describe("ingestPdf — fixture corpus", () => {
for (const fixture of FIXTURES) {
describe(fixture.id, () => {
const path = resolve(FIXTURE_DIR, fixture.filename);
const bytes = new Uint8Array(readFileSync(path));
it("produces a Document with PDF media type and SHA-256 fingerprint", async () => {
const { document } = await ingestPdf(bytes, { filename: fixture.filename });
expect(document.mediaType).toBe("application/pdf");
expect(document.fingerprint).toMatch(/^[0-9a-f]{64}$/);
expect(document.title).toBe(fixture.filename);
// Fingerprint must be deterministic across runs.
const expected = await fingerprintBytes(bytes);
expect(document.fingerprint).toBe(expected);
});
it("produces a pdf-text representation with the expected page count", async () => {
const { representation } = await ingestPdf(bytes);
expect(representation.representationType).toBe("pdf-text");
expect(representation.pageMap?.length).toBe(fixture.page_count);
expect(representation.offsetMap?.length).toBe(fixture.page_count);
});
it("canonical text contains the manifest's known-good quote", async () => {
const { representation } = await ingestPdf(bytes);
const text = representation.canonicalText ?? "";
expect(text).toContain(fixture.known_good_quote);
});
it("offsetMap is gap-free and covers [0, canonicalText.length)", async () => {
const { representation } = await ingestPdf(bytes);
const text = representation.canonicalText ?? "";
const offsets = representation.offsetMap ?? [];
expect(offsets.length).toBeGreaterThan(0);
expect(offsets[0]!.globalStart).toBe(0);
expect(offsets.at(-1)!.globalEnd).toBe(text.length);
for (let i = 0; i < offsets.length; i++) {
const r = offsets[i]!;
expect(r.page).toBe(i + 1);
expect(r.globalEnd - r.globalStart).toBe(r.pageLength);
if (i > 0) expect(r.globalStart).toBe(offsets[i - 1]!.globalEnd);
}
});
it("pageMap entries have positive width and height in user-space points", async () => {
const { representation } = await ingestPdf(bytes);
const pages = representation.pageMap ?? [];
for (let i = 0; i < pages.length; i++) {
const p = pages[i]!;
expect(p.page).toBe(i + 1);
expect(p.width).toBeGreaterThan(0);
expect(p.height).toBeGreaterThan(0);
}
});
});
}
});
describe("ingestPdf — option handling", () => {
const fixture = FIXTURES[0]!;
const path = resolve(FIXTURE_DIR, fixture.filename);
const bytes = new Uint8Array(readFileSync(path));
it("uses explicit title over filename", async () => {
const { document } = await ingestPdf(bytes, {
filename: fixture.filename,
title: "Custom Title",
});
expect(document.title).toBe("Custom Title");
});
it("omits title entirely when neither filename nor title is supplied", async () => {
const { document } = await ingestPdf(bytes);
expect(document.title).toBeUndefined();
});
it("propagates uri and metadata when supplied", async () => {
const { document } = await ingestPdf(bytes, {
uri: "file:///example.pdf",
metadata: { source: "test" },
});
expect(document.uri).toBe("file:///example.pdf");
expect(document.metadata).toEqual({ source: "test" });
});
it("accepts ArrayBuffer input", async () => {
const ab = bytes.buffer.slice(bytes.byteOffset, bytes.byteOffset + bytes.byteLength);
const { document } = await ingestPdf(ab);
expect(document.fingerprint).toMatch(/^[0-9a-f]{64}$/);
});
});

88
src/source/pdf/ingest.ts Normal file
View File

@@ -0,0 +1,88 @@
/**
* PDF ingest pipeline → `{ document, representation }`.
*
* Implements `wiki/ArchitectureOverview.md` §3.4 ("Raw Source → identify
* media type → compute fingerprint → extract metadata → extract canonical
* text → build format-specific maps → persist Document +
* DocumentRepresentation") for the PDF source format.
*
* Ingest is a pure function over bytes: it does not persist anything. The
* caller (engine repositories in T05, app layer in T06) writes the returned
* Document + DocumentRepresentation into the chosen store.
*/
import {
type Document,
type DocumentRepresentation,
} from "@shared/document";
import { newId } from "@shared/ids";
import { extractPdf } from "./extract";
import { fingerprintBytes } from "./fingerprint";
const PDF_MEDIA_TYPE = "application/pdf";
export interface IngestPdfOptions {
/** Original filename, used as the default title when no title is given. */
readonly filename?: string;
/** Optional pre-existing title (overrides filename). */
readonly title?: string;
/** Optional source URI (e.g. file:// or https://). */
readonly uri?: string;
/** Free-form metadata persisted on the Document record. */
readonly metadata?: Readonly<Record<string, unknown>>;
}
export interface IngestPdfResult {
readonly document: Document;
readonly representation: DocumentRepresentation;
}
export type IngestPdfInput = Uint8Array | ArrayBuffer | Blob;
export async function ingestPdf(
input: IngestPdfInput,
options: IngestPdfOptions = {},
): Promise<IngestPdfResult> {
const bytes = await toBytes(input);
const [fingerprint, extraction] = await Promise.all([
fingerprintBytes(bytes),
extractPdf(bytes),
]);
const now = new Date().toISOString();
const documentId = newId("document");
const representationId = newId("representation");
const title = options.title ?? options.filename;
const document: Document = {
id: documentId,
mediaType: PDF_MEDIA_TYPE,
fingerprint,
createdAt: now,
updatedAt: now,
...(title !== undefined ? { title } : {}),
...(options.uri !== undefined ? { uri: options.uri } : {}),
...(options.metadata !== undefined ? { metadata: options.metadata } : {}),
};
const representation: DocumentRepresentation = {
id: representationId,
documentId,
representationType: "pdf-text",
contentHash: fingerprint,
canonicalText: extraction.canonicalText,
pageMap: extraction.pageMap,
offsetMap: extraction.offsetMap,
generatedAt: now,
};
return { document, representation };
}
async function toBytes(input: IngestPdfInput): Promise<Uint8Array> {
if (input instanceof Uint8Array) return input;
if (input instanceof ArrayBuffer) return new Uint8Array(input);
// Blob (covers `File` in browsers — File extends Blob).
const buf = await input.arrayBuffer();
return new Uint8Array(buf);
}

View File

@@ -0,0 +1,100 @@
/**
* AnnotationToolbar — wires "I selected text" into "evidence appears in
* the sidebar".
*
* Visible only when a `pendingSelection` is set (the viewer publishes
* captures into context, then this toolbar lets the user attach commentary
* and commit). On Save it runs the full pipeline:
*
* 1. `createSelectors(capture, representation)` — anchor builds the
* maximal selector set against the active representation.
* 2. `engine.annotations.create(...)` — engine mints an Annotation +
* emits AnnotationCreated.
* 3. `engine.evidence.create(...)` — engine mints the EvidenceItem with
* the user's commentary, emits EvidenceItemCreated.
*
* The sidebar re-renders via the engine event bus, so no other glue is
* needed.
*/
import { useEffect, useState } from "react";
import { createSelectors } from "@anchor/index";
import {
useActiveDocument,
useEngine,
usePendingSelection,
} from "./EngineContext";
export function AnnotationToolbar() {
const engine = useEngine();
const { document, representation } = useActiveDocument();
const { pending, set } = usePendingSelection();
const [commentary, setCommentary] = useState("");
// Reset the commentary box whenever a fresh selection arrives.
useEffect(() => {
setCommentary("");
}, [pending]);
if (!pending || !document || !representation) return null;
const handleSave = () => {
const selectors = createSelectors(pending.capture, representation);
const annotation = engine.annotations.create({
documentId: document.id,
representationId: representation.id,
selectors,
quote: pending.capture.text,
});
engine.evidence.create({
annotationIds: [annotation.id],
...(commentary.trim().length > 0 ? { commentary: commentary.trim() } : {}),
});
set(null);
};
const handleDiscard = () => set(null);
const quote = pending.capture.text;
const shortQuote = quote.length > 200 ? `${quote.slice(0, 200)}` : quote;
return (
<div
style={{
borderBottom: "1px solid #f0c040",
background: "#fff8d6",
padding: 8,
fontFamily: "system-ui, sans-serif",
fontSize: 12,
}}
>
<div style={{ marginBottom: 6, fontWeight: 600 }}>
New annotation ({pending.selectors.length} selector{pending.selectors.length === 1 ? "" : "s"})
</div>
<div style={{ marginBottom: 6, fontStyle: "italic", color: "#444" }}>
&ldquo;{shortQuote}&rdquo;
</div>
<textarea
value={commentary}
onChange={(e) => setCommentary(e.target.value)}
placeholder="Add a one-line comment (optional)…"
rows={2}
style={{
width: "100%",
boxSizing: "border-box",
fontSize: 12,
padding: 4,
marginBottom: 6,
}}
/>
<div style={{ display: "flex", gap: 6 }}>
<button onClick={handleSave} style={{ fontSize: 12, padding: "4px 10px" }}>
Save evidence
</button>
<button onClick={handleDiscard} style={{ fontSize: 12, padding: "4px 10px" }}>
Discard
</button>
</div>
</div>
);
}

125
src/work/CollectionList.tsx Normal file
View File

@@ -0,0 +1,125 @@
/**
* CollectionList — the left pane.
*
* Lists the fixture corpus (the MVP stand-in for a real document collection).
* Clicking a fixture fetches the bytes, runs `ingestPdf` (PDF.js extraction
* + fingerprint + canonical text), registers the result with the engine
* (emitting §4 events), and activates it as the current document.
*
* Per CE-WP-0002-T06, the loaded fixture set is hard-wired to
* `fixtures/pdfs/manifest.json`. Real collections arrive in a later
* workplan.
*/
import { useCallback, useState } from "react";
import { ingestPdf } from "@source/index";
import { useEngine, useActiveDocumentId } from "./EngineContext";
import type { DocumentId } from "@shared/ids";
import manifest from "../../fixtures/pdfs/manifest.json";
interface Fixture {
id: string;
filename: string;
description: string;
page_count: number;
}
const FIXTURES: readonly Fixture[] = (manifest as { fixtures: Fixture[] }).fixtures;
export function CollectionList() {
const engine = useEngine();
const { id: activeId, setId } = useActiveDocumentId();
const [loadingFixtureId, setLoadingFixtureId] = useState<string | null>(null);
const [error, setError] = useState<string | null>(null);
// Remember which fixture-id maps to which loaded documentId so re-clicking
// a fixture activates the existing engine record rather than re-ingesting.
const [byFixture, setByFixture] = useState<Record<string, DocumentId>>({});
const handleLoad = useCallback(
async (fixture: Fixture) => {
setError(null);
const existing = byFixture[fixture.id];
if (existing) {
setId(existing);
return;
}
setLoadingFixtureId(fixture.id);
try {
const url = `/fixtures/pdfs/${encodeURIComponent(fixture.filename)}`;
const response = await fetch(url);
if (!response.ok) {
throw new Error(`fetch ${url}${response.status}`);
}
const buffer = await response.arrayBuffer();
const { document, representation } = await ingestPdf(new Uint8Array(buffer), {
filename: fixture.filename,
});
engine.documents.register({ document, representation });
setByFixture((prev) => ({ ...prev, [fixture.id]: document.id }));
setId(document.id);
} catch (err) {
setError(err instanceof Error ? err.message : String(err));
} finally {
setLoadingFixtureId(null);
}
},
[byFixture, engine, setId],
);
return (
<aside
style={{
width: 280,
borderRight: "1px solid #ddd",
padding: 12,
overflow: "auto",
flex: "0 0 280px",
}}
>
<h2 style={{ marginTop: 0, fontSize: 16 }}>Collection</h2>
<p style={{ fontSize: 12, color: "#555", marginTop: 0 }}>
{FIXTURES.length} fixture PDF{FIXTURES.length === 1 ? "" : "s"}
</p>
{error && (
<p style={{ fontSize: 12, color: "#b00020", background: "#fff4f4", padding: 6 }}>
{error}
</p>
)}
<ul style={{ listStyle: "none", padding: 0, margin: 0 }}>
{FIXTURES.map((f) => {
const isLoading = loadingFixtureId === f.id;
const documentId = byFixture[f.id];
const isActive = documentId !== undefined && documentId === activeId;
return (
<li key={f.id} style={{ marginBottom: 6 }}>
<button
onClick={() => {
void handleLoad(f);
}}
disabled={isLoading}
style={{
display: "block",
width: "100%",
textAlign: "left",
background: isActive ? "#e8f0ff" : "white",
border: "1px solid #ccc",
padding: 6,
cursor: isLoading ? "wait" : "pointer",
fontSize: 12,
}}
>
<div style={{ fontWeight: 600 }}>{f.id}</div>
<div style={{ color: "#666", fontSize: 11 }}>
{f.page_count} page{f.page_count === 1 ? "" : "s"}
{isLoading ? " · loading…" : isActive ? " · open" : ""}
</div>
</button>
</li>
);
})}
</ul>
</aside>
);
}

219
src/work/EngineContext.tsx Normal file
View File

@@ -0,0 +1,219 @@
/**
* Engine + active-document React context.
*
* MVP composition root for the UI: one `Engine` instance for the lifetime of
* the SPA, plus the "what's open in the viewer right now" pointer.
* `useEngine()` returns the engine; `useActiveDocument()` returns the
* currently-loaded `{document, representation}` pair, refreshed when the
* engine emits `DocumentImported` / `DocumentRepresentationGenerated`.
*
* Replaces ad-hoc engine wiring inside each component. Per the workplan
* (T07 note), state lives in a single React context; no Zustand or Redux.
*/
import {
createContext,
useCallback,
useContext,
useEffect,
useMemo,
useState,
type ReactNode,
} from "react";
import type { Document, DocumentRepresentation } from "@shared/document";
import type { AnnotationId, DocumentId } from "@shared/ids";
import type { Selector } from "@shared/selector";
import {
attachPersister,
createEngine,
restoreFromStorage,
type Engine,
} from "@engine/index";
import type { PdfSelectionCapture } from "@anchor/index";
/**
* localStorage keys for the engine snapshot and the UI's "what was open"
* pointer. ADR-0005 frames both as deliberately temporary — real
* persistence later.
*/
const STORAGE_KEY = "citation-evidence:engine-snapshot:v1";
const ACTIVE_KEY = "citation-evidence:active-document-id:v1";
/**
* The pending selection lives in context (not local component state) because
* the toolbar that consumes it is rendered above the viewer, not inside it.
* `null` means "no selection waiting for a comment".
*/
export interface PendingSelection {
readonly capture: PdfSelectionCapture;
readonly selectors: readonly Selector[];
}
interface EngineContextValue {
readonly engine: Engine;
readonly activeDocumentId: DocumentId | null;
setActiveDocumentId(id: DocumentId | null): void;
readonly pendingSelection: PendingSelection | null;
setPendingSelection(pending: PendingSelection | null): void;
readonly scrollToAnnotationId: AnnotationId | null;
/** The version counter bumps even when the same id is set twice in a row,
* so a second click on the same evidence item still triggers a scroll. */
readonly scrollVersion: number;
scrollToAnnotation(id: AnnotationId | null): void;
}
const EngineContext = createContext<EngineContextValue | null>(null);
interface EngineProviderProps {
readonly children: ReactNode;
/** Inject a pre-built engine for tests; production uses the default. */
readonly engine?: Engine;
}
export function EngineProvider({ children, engine: injected }: EngineProviderProps) {
const engine = useMemo(() => injected ?? createEngine(), [injected]);
const [activeDocumentId, setActiveDocumentIdState] = useState<DocumentId | null>(null);
const [pendingSelection, setPendingSelection] = useState<PendingSelection | null>(null);
const [scrollState, setScrollState] = useState<{ id: AnnotationId | null; version: number }>({
id: null,
version: 0,
});
// Restore from localStorage on first mount, then attach the persister.
// The injected-engine path skips persistence (tests own their lifecycle).
useEffect(() => {
if (injected) return;
if (typeof globalThis.localStorage === "undefined") return;
const result = restoreFromStorage(engine, { key: STORAGE_KEY });
if (result.restored) {
const saved = globalThis.localStorage.getItem(ACTIVE_KEY);
if (saved && engine.documents.get(saved as DocumentId)) {
setActiveDocumentIdState(saved as DocumentId);
}
}
return attachPersister(engine, { key: STORAGE_KEY });
}, [engine, injected]);
// Persist the active-document pointer alongside the engine snapshot so a
// reload lands the user back where they were.
useEffect(() => {
if (injected) return;
if (typeof globalThis.localStorage === "undefined") return;
if (activeDocumentId) {
globalThis.localStorage.setItem(ACTIVE_KEY, activeDocumentId);
} else {
globalThis.localStorage.removeItem(ACTIVE_KEY);
}
}, [activeDocumentId, injected]);
// Switching the active document discards any pending selection — it
// belongs to the previous document's viewer state.
const setActiveDocumentId = useCallback((id: DocumentId | null) => {
setActiveDocumentIdState(id);
setPendingSelection(null);
setScrollState((prev) => ({ id: null, version: prev.version + 1 }));
}, []);
const scrollToAnnotation = useCallback((id: AnnotationId | null) => {
setScrollState((prev) => ({ id, version: prev.version + 1 }));
}, []);
const value = useMemo<EngineContextValue>(
() => ({
engine,
activeDocumentId,
setActiveDocumentId,
pendingSelection,
setPendingSelection,
scrollToAnnotationId: scrollState.id,
scrollVersion: scrollState.version,
scrollToAnnotation,
}),
[engine, activeDocumentId, setActiveDocumentId, pendingSelection, scrollState, scrollToAnnotation],
);
return <EngineContext.Provider value={value}>{children}</EngineContext.Provider>;
}
export function useEngine(): Engine {
const ctx = useContext(EngineContext);
if (!ctx) throw new Error("useEngine: missing EngineProvider");
return ctx.engine;
}
export function useActiveDocumentId(): {
readonly id: DocumentId | null;
setId(id: DocumentId | null): void;
} {
const ctx = useContext(EngineContext);
if (!ctx) throw new Error("useActiveDocumentId: missing EngineProvider");
return { id: ctx.activeDocumentId, setId: ctx.setActiveDocumentId };
}
export function useActiveDocument(): {
readonly document: Document | null;
readonly representation: DocumentRepresentation | null;
} {
const engine = useEngine();
const { id } = useActiveDocumentId();
const [tick, setTick] = useState(0);
// Re-render when documents come and go so list views stay fresh.
useEffect(() => {
const off1 = engine.bus.on("DocumentImported", () => setTick((t) => t + 1));
const off2 = engine.bus.on("DocumentRepresentationGenerated", () => setTick((t) => t + 1));
return () => {
off1();
off2();
};
}, [engine]);
const document = id ? engine.documents.get(id) : null;
const representation = id
? engine.documents.listRepresentations(id).at(-1) ?? null
: null;
// `tick` is intentionally read to silence unused-var warnings; the dep
// chain is via useState so React handles the re-render. We don't actually
// need to consume the value.
void tick;
return { document, representation };
}
/**
* Subscribe to a single engine event type and trigger a re-render each time
* it fires. Returns the current monotonic counter — pure state-marker.
*/
export function useEngineEventTick<T extends Parameters<Engine["bus"]["on"]>[0]>(
type: T,
): number {
const engine = useEngine();
const [tick, setTick] = useState(0);
const bump = useCallback(() => setTick((t) => t + 1), []);
useEffect(() => engine.bus.on(type, bump), [engine, type, bump]);
return tick;
}
export function usePendingSelection(): {
readonly pending: PendingSelection | null;
set(pending: PendingSelection | null): void;
} {
const ctx = useContext(EngineContext);
if (!ctx) throw new Error("usePendingSelection: missing EngineProvider");
return { pending: ctx.pendingSelection, set: ctx.setPendingSelection };
}
export function useScrollToAnnotation(): {
readonly id: AnnotationId | null;
readonly version: number;
scrollTo(id: AnnotationId | null): void;
} {
const ctx = useContext(EngineContext);
if (!ctx) throw new Error("useScrollToAnnotation: missing EngineProvider");
return {
id: ctx.scrollToAnnotationId,
version: ctx.scrollVersion,
scrollTo: ctx.scrollToAnnotation,
};
}

View File

@@ -0,0 +1,101 @@
/**
* EvidenceSidebar — the right pane.
*
* Lists `EvidenceItem`s scoped to the currently-active document. Each row
* shows quote + commentary + status. Clicking a row emits
* `EvidenceItemActivated` via the engine, which T08 will translate into a
* scroll-to-passage in the viewer.
*
* T06 scope: read-only display + activation event. Item creation lives in
* T07; the click-to-reopen integration lives in T08.
*/
import { useMemo } from "react";
import type { EvidenceItem } from "@shared/evidence";
import {
useActiveDocument,
useEngine,
useEngineEventTick,
useScrollToAnnotation,
} from "./EngineContext";
export interface EvidenceSidebarProps {
onActivate?(item: EvidenceItem): void;
}
export function EvidenceSidebar(props: EvidenceSidebarProps) {
const engine = useEngine();
const { document } = useActiveDocument();
const { scrollTo } = useScrollToAnnotation();
// Refresh the list when items are created or updated. The tick values are
// included in the memo deps below so the list re-resolves on each event.
const createTick = useEngineEventTick("EvidenceItemCreated");
const updateTick = useEngineEventTick("EvidenceItemUpdated");
const items = useMemo<readonly EvidenceItem[]>(() => {
if (!document) return [];
return engine.evidence.listByDocument(document.id);
// createTick / updateTick are read here purely as memo invalidators.
}, [document, engine, createTick, updateTick]);
return (
<aside
style={{
width: 320,
borderLeft: "1px solid #ddd",
padding: 12,
overflow: "auto",
flex: "0 0 320px",
fontFamily: "system-ui, sans-serif",
}}
>
<h2 style={{ marginTop: 0, fontSize: 16 }}>Evidence</h2>
{!document && (
<p style={{ fontSize: 12, color: "#888" }}>No document open.</p>
)}
{document && items.length === 0 && (
<p style={{ fontSize: 12, color: "#888" }}>
No evidence yet. Select a passage in the viewer to create one.
</p>
)}
<ul style={{ listStyle: "none", padding: 0, margin: 0 }}>
{items.map((item) => {
const firstAnnotationId = item.annotationIds[0];
const annotation = firstAnnotationId ? engine.annotations.get(firstAnnotationId) : null;
const quote = annotation?.quote ?? "(no quote)";
return (
<li key={item.id} style={{ marginBottom: 8 }}>
<button
onClick={() => {
engine.evidence.activate(item.id, "sidebar");
if (firstAnnotationId) scrollTo(firstAnnotationId);
props.onActivate?.(item);
}}
style={{
display: "block",
width: "100%",
textAlign: "left",
background: "#fff8d6",
border: "1px solid #e0c050",
padding: 8,
cursor: "pointer",
fontSize: 12,
}}
>
<div style={{ fontStyle: "italic", marginBottom: 4 }}>
&ldquo;{quote.slice(0, 140)}
{quote.length > 140 ? "…" : ""}&rdquo;
</div>
{item.commentary && (
<div style={{ color: "#333", marginBottom: 4 }}>{item.commentary}</div>
)}
<div style={{ color: "#666", fontSize: 11 }}>status: {item.status}</div>
</button>
</li>
);
})}
</ul>
</aside>
);
}

94
src/work/ViewerShell.tsx Normal file
View File

@@ -0,0 +1,94 @@
/**
* ViewerShell — the centre pane.
*
* Hosts the viewer adapter (currently the T02 PDF spike) and shows whatever
* is active. `work/` consumes only the adapter's public surface
* (`PdfSpikeViewer`) — it never touches PDF.js or react-pdf-highlighter-plus
* directly. When the PDF library is swapped (or the spike is replaced),
* only the adapter module changes; this shell stays the same.
*
* T06 scope: load + render the active PDF + show stored annotations. The
* selection-capture → annotation pipeline is wired in T07; the
* click-to-reopen pipeline is wired in T08.
*/
import { useMemo } from "react";
import { PdfSpikeViewer, type StoredAnnotation } from "@anchor/index";
import {
useActiveDocument,
useEngine,
useEngineEventTick,
usePendingSelection,
useScrollToAnnotation,
} from "./EngineContext";
import { AnnotationToolbar } from "./AnnotationToolbar";
export function ViewerShell() {
const engine = useEngine();
const { document, representation } = useActiveDocument();
const { set: setPending } = usePendingSelection();
const { id: scrollToId, version: scrollVersion } = useScrollToAnnotation();
// The viewer needs to re-fetch its highlight list whenever annotations
// change. The tick is included in the memo deps so the list re-resolves.
const annotationTick = useEngineEventTick("AnnotationCreated");
const annotations = useMemo<StoredAnnotation[]>(() => {
if (!document) return [];
return engine.annotations.listByDocument(document.id).map((a) => ({
id: a.id,
text: a.quote ?? "",
selectors: a.selectors,
}));
}, [document, engine, annotationTick]);
const fileUrl = useMemo(() => {
if (!document) return null;
const titleOrId = document.title ?? document.id;
return `/fixtures/pdfs/${encodeURIComponent(titleOrId)}`;
}, [document]);
if (!document || !representation || !fileUrl) {
return (
<main
style={{
flex: 1,
display: "flex",
alignItems: "center",
justifyContent: "center",
color: "#666",
fontFamily: "system-ui, sans-serif",
}}
>
Pick a fixture on the left to begin.
</main>
);
}
return (
<main
style={{
flex: 1,
display: "flex",
flexDirection: "column",
overflow: "hidden",
position: "relative",
}}
>
<AnnotationToolbar />
<div style={{ flex: 1, overflow: "hidden", position: "relative" }}>
<PdfSpikeViewer
// Re-key on scrollVersion so clicking the same item twice still
// triggers the viewer's mount-time scroll effect.
key={`${document.id}#${scrollVersion}`}
pdfUrl={fileUrl}
storedAnnotations={annotations}
{...(scrollToId ? { scrollToAnnotationId: scrollToId } : {})}
onSelectionCaptured={(capture, selectors) => {
setPending({ capture, selectors });
}}
/>
</div>
</main>
);
}

View File

@@ -1 +1,13 @@
export {};
export { CollectionList } from "./CollectionList";
export { ViewerShell } from "./ViewerShell";
export { EvidenceSidebar, type EvidenceSidebarProps } from "./EvidenceSidebar";
export { AnnotationToolbar } from "./AnnotationToolbar";
export {
EngineProvider,
useEngine,
useActiveDocument,
useActiveDocumentId,
useEngineEventTick,
usePendingSelection,
type PendingSelection,
} from "./EngineContext";