generated from coulomb/repo-seed
Strong likelihood that the "text layer is misplaced / body text not selectable" symptoms across multiple PDFs come from PDF.js falling back to substitute font metrics. Without the cmaps directory (CID character maps for non-Latin fonts) and the standard_fonts directory (Helvetica/Times/Courier metrics for unembedded standard fonts), the canvas glyphs use embedded font data while the text-layer span positions are computed from fallback metrics. The two diverge — text spans land in the wrong place, or text content can't be decoded at all, leaving the body unselectable. Both directories are now copied into the served root by vite-plugin-static-copy and passed to pdfjs.getDocument() as `cMapUrl: "/cmaps/"` + `cMapPacked: true` + `standardFontDataUrl: "/standard_fonts/"` via PdfLoader's `document` prop (which accepts a full DocumentInitParameters object). If this is the right diagnosis, the textLayer overlay should now line up with the visible glyphs on the same PDFs that were producing fragmented captures. If the body text is still unselectable, the PDF genuinely lacks a text layer for those glyphs (image-only content) and OCR would be the only path forward. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
54 lines
1.8 KiB
TypeScript
54 lines
1.8 KiB
TypeScript
import { defineConfig } from "vite";
|
|
import react from "@vitejs/plugin-react";
|
|
import { viteStaticCopy } from "vite-plugin-static-copy";
|
|
import { fileURLToPath } from "node:url";
|
|
import { dirname, resolve } from "node:path";
|
|
|
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
|
|
export default defineConfig({
|
|
plugins: [
|
|
react(),
|
|
// PDF.js needs its cmaps (character maps for CID fonts) and
|
|
// standard fonts (Helvetica/Times/Courier metrics) to position
|
|
// text-layer spans correctly. Without them, PDFs with embedded
|
|
// CID fonts or unembedded standard fonts get rendered to canvas
|
|
// OK but their textLayer overlays land in the wrong place — text
|
|
// appears unselectable or selection jumps to a different region.
|
|
// We serve both directories straight from `node_modules/pdfjs-dist`.
|
|
viteStaticCopy({
|
|
targets: [
|
|
{
|
|
src: resolve(__dirname, "node_modules/pdfjs-dist/cmaps") + "/[!.]*",
|
|
dest: "cmaps",
|
|
},
|
|
{
|
|
src: resolve(__dirname, "node_modules/pdfjs-dist/standard_fonts") + "/[!.]*",
|
|
dest: "standard_fonts",
|
|
},
|
|
],
|
|
}),
|
|
],
|
|
resolve: {
|
|
alias: {
|
|
"@shared": resolve(__dirname, "src/shared"),
|
|
"@engine": resolve(__dirname, "src/engine"),
|
|
"@anchor": resolve(__dirname, "src/anchor"),
|
|
"@source": resolve(__dirname, "src/source"),
|
|
"@binder": resolve(__dirname, "src/binder"),
|
|
"@work": resolve(__dirname, "src/work"),
|
|
"@app": resolve(__dirname, "src/app"),
|
|
},
|
|
},
|
|
server: {
|
|
fs: {
|
|
// Allow Vite to serve /fixtures/pdfs/*.pdf from the project root.
|
|
allow: [resolve(__dirname)],
|
|
},
|
|
},
|
|
optimizeDeps: {
|
|
// pdfjs-dist ships its worker as a .mjs Vite needs to handle.
|
|
exclude: ["pdfjs-dist"],
|
|
},
|
|
});
|