Wire pdfjs cmaps + standard fonts so text layer positions correctly

Strong likelihood that the "text layer is misplaced / body text not
selectable" symptoms across multiple PDFs come from PDF.js falling
back to substitute font metrics. Without the cmaps directory (CID
character maps for non-Latin fonts) and the standard_fonts directory
(Helvetica/Times/Courier metrics for unembedded standard fonts), the
canvas glyphs use embedded font data while the text-layer span
positions are computed from fallback metrics. The two diverge — text
spans land in the wrong place, or text content can't be decoded at
all, leaving the body unselectable.

Both directories are now copied into the served root by
vite-plugin-static-copy and passed to pdfjs.getDocument() as
`cMapUrl: "/cmaps/"` + `cMapPacked: true` + `standardFontDataUrl:
"/standard_fonts/"` via PdfLoader's `document` prop (which accepts a
full DocumentInitParameters object).

If this is the right diagnosis, the textLayer overlay should now line
up with the visible glyphs on the same PDFs that were producing
fragmented captures. If the body text is still unselectable, the PDF
genuinely lacks a text layer for those glyphs (image-only content)
and OCR would be the only path forward.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-27 00:38:34 +02:00
parent 3834f5c209
commit c000ce6f73
4 changed files with 229 additions and 2 deletions

View File

@@ -42,6 +42,7 @@
"typescript": "^5.5.4",
"typescript-eslint": "^8.0.0",
"vite": "^5.4.0",
"vite-plugin-static-copy": "^2",
"vitest": "^2.0.5"
}
}

191
pnpm-lock.yaml generated
View File

@@ -72,6 +72,9 @@ importers:
vite:
specifier: ^5.4.0
version: 5.4.21(@types/node@20.19.41)
vite-plugin-static-copy:
specifier: ^2
version: 2.3.2(vite@5.4.21(@types/node@20.19.41))
vitest:
specifier: ^2.0.5
version: 2.1.9(@types/node@20.19.41)(happy-dom@20.9.0)
@@ -477,6 +480,18 @@ packages:
'@emnapi/core': ^1.7.1
'@emnapi/runtime': ^1.7.1
'@nodelib/fs.scandir@2.1.5':
resolution: {integrity: sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g==}
engines: {node: '>= 8'}
'@nodelib/fs.stat@2.0.5':
resolution: {integrity: sha512-RkhPPp2zrqDAQA/2jNhnztcPAlv64XdhIp7a7454A5ovI7Bukxgt7MX7udwAu3zg1DcpPU0rz3VV1SeaqvY4+A==}
engines: {node: '>= 8'}
'@nodelib/fs.walk@1.2.8':
resolution: {integrity: sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg==}
engines: {node: '>= 8'}
'@nolyfill/is-core-module@1.0.39':
resolution: {integrity: sha512-nn5ozdjYQpUCZlWGuxcJY/KpxkWQs4DcbMCmKojjyrYDEAGy4Ce19NN4v5MduafTwJlbKc99UA8YhSVqq9yPZA==}
engines: {node: '>=12.4.0'}
@@ -1374,6 +1389,10 @@ packages:
resolution: {integrity: sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==}
engines: {node: '>=10'}
anymatch@3.1.3:
resolution: {integrity: sha512-KMReFUr0B4t+D+OBkjR3KYqvocp2XaSzO55UcB6mgQMd3KbcE+mWTyvVV7D/zsdEbNnV6acZUutkiHQXvTr1Rw==}
engines: {node: '>= 8'}
argparse@2.0.1:
resolution: {integrity: sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==}
@@ -1432,6 +1451,10 @@ packages:
engines: {node: '>=6.0.0'}
hasBin: true
binary-extensions@2.3.0:
resolution: {integrity: sha512-Ceh+7ox5qe7LJuLHoY0feh3pHuUDHAcRUeyL2VYghZwfpkNIy/+8Ocg0a3UuSoYzavmylwuLWQOf3hl0jjMMIw==}
engines: {node: '>=8'}
brace-expansion@1.1.14:
resolution: {integrity: sha512-MWPGfDxnyzKU7rNOW9SP/c50vi3xrmrua/+6hfPbCS2ABNWfx24vPidzvC7krjU/RTo235sV776ymlsMtGKj8g==}
@@ -1483,6 +1506,10 @@ packages:
resolution: {integrity: sha512-PAJdDJusoxnwm1VwW07VWwUN1sl7smmC3OKggvndJFadxxDRyFJBX/ggnu/KE4kQAB7a3Dp8f/YXC1FlUprWmA==}
engines: {node: '>= 16'}
chokidar@3.6.0:
resolution: {integrity: sha512-7VT13fmjotKpGipCW9JEQAusEPE+Ei8nl6/g4FBAmIm0GOOLMua9NDDo/DWp0ZAxCr3cPq5ZpBqmPAQgDda2Pw==}
engines: {node: '>= 8.10.0'}
class-variance-authority@0.7.1:
resolution: {integrity: sha512-Ka+9Trutv7G8M6WT6SeiRWz792K5qEqIGEGzXKhAE6xOWAY6pPH8U+9IY3oCMv6kqTmLsv7Xh/2w2RigkePMsg==}
@@ -1759,12 +1786,19 @@ packages:
fast-deep-equal@3.1.3:
resolution: {integrity: sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==}
fast-glob@3.3.3:
resolution: {integrity: sha512-7MptL8U0cqcFdzIzwOTHoilX9x5BrNqye7Z/LuC7kCMRio1EMSyqRK3BEAUD7sXRq4iT4AzTVuZdhgQ2TCvYLg==}
engines: {node: '>=8.6.0'}
fast-json-stable-stringify@2.1.0:
resolution: {integrity: sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==}
fast-levenshtein@2.0.6:
resolution: {integrity: sha512-DCXu6Ifhqcks7TZKY3Hxp3y6qphY5SJZmrWMDrKcERSOXWQdMhU9Ig/PYrzyw/ul9jOIyh0N4M0tbC5hodg8dw==}
fastq@1.20.1:
resolution: {integrity: sha512-GGToxJ/w1x32s/D2EKND7kTil4n8OVk/9mycTc4VDza13lOvpUZTGX3mFSCtV9ksdGBVzvsyAVLM6mHFThxXxw==}
fdir@6.5.0:
resolution: {integrity: sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==}
engines: {node: '>=12.0.0'}
@@ -1797,6 +1831,10 @@ packages:
resolution: {integrity: sha512-dKx12eRCVIzqCxFGplyFKJMPvLEWgmNtUrpTiJIR5u97zEhRG8ySrtboPHZXx7daLxQVrl643cTzbab2tkQjxg==}
engines: {node: '>= 0.4'}
fs-extra@11.3.5:
resolution: {integrity: sha512-eKpRKAovdpZtR1WopLHxlBWvAgPny3c4gX1G5Jhwmmw4XJj0ifSD5qB5TOo8hmA0wlRKDAOAhEE1yVPgs6Fgcg==}
engines: {node: '>=14.14'}
fsevents@2.3.3:
resolution: {integrity: sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==}
engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0}
@@ -1839,6 +1877,10 @@ packages:
get-tsconfig@4.14.0:
resolution: {integrity: sha512-yTb+8DXzDREzgvYmh6s9vHsSVCHeC0G3PI5bEXNBHtmshPnO+S5O7qgLEOn0I5QvMy6kpZN8K1NKGyilLb93wA==}
glob-parent@5.1.2:
resolution: {integrity: sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==}
engines: {node: '>= 6'}
glob-parent@6.0.2:
resolution: {integrity: sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A==}
engines: {node: '>=10.13.0'}
@@ -1859,6 +1901,9 @@ packages:
resolution: {integrity: sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==}
engines: {node: '>= 0.4'}
graceful-fs@4.2.11:
resolution: {integrity: sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==}
happy-dom@20.9.0:
resolution: {integrity: sha512-GZZ9mKe8r646NUAf/zemnGbjYh4Bt8/MqASJY+pSm5ZDtc3YQox+4gsLI7yi1hba6o+eCsGxpHn5+iEVn31/FQ==}
engines: {node: '>=20.0.0'}
@@ -1928,6 +1973,10 @@ packages:
resolution: {integrity: sha512-n4ZT37wG78iz03xPRKJrHTdZbe3IicyucEtdRsV5yglwc3GyUfbAfpSeD0FJ41NbUNSt5wbhqfp1fS+BgnvDFQ==}
engines: {node: '>= 0.4'}
is-binary-path@2.1.0:
resolution: {integrity: sha512-ZMERYes6pDydyuGidse7OsHxtbI7WVeUEozgR/g7rd0xUimYNlvZRE/K2MgZTjWy725IfelLeVcEM97mmtRGXw==}
engines: {node: '>=8'}
is-boolean-object@1.2.2:
resolution: {integrity: sha512-wa56o2/ElJMYqjCjGkXri7it5FbebW5usLw/nPmCMs5DeZ7eziSYZhSmPRn0txqeW4LnAmQQU7FgqLpsEFKM4A==}
engines: {node: '>= 0.4'}
@@ -2058,6 +2107,9 @@ packages:
engines: {node: '>=6'}
hasBin: true
jsonfile@6.2.1:
resolution: {integrity: sha512-zwOTdL3rFQ/lRdBnntKVOX6k5cKJwEc1HdilT71BWEu7J41gXIB2MRp+vxduPSwZJPWBxEzv4yH1wYLJGUHX4Q==}
jszip@3.10.1:
resolution: {integrity: sha512-xXDvecyTpGLrqFrvkrUSoxxfJI5AH7U8zxxtVclpsUtMCq4JQ290LY8AW5c7Ggnr/Y/oK+bQMbqK2qmtk3pN4g==}
@@ -2107,10 +2159,18 @@ packages:
resolution: {integrity: sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==}
engines: {node: '>= 0.4'}
merge2@1.4.1:
resolution: {integrity: sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==}
engines: {node: '>= 8'}
micromatch@4.0.7:
resolution: {integrity: sha512-LPP/3KorzCwBxfeUuZmaR6bG2kdeHSbe0P2tY3FLRU4vYrjYz5hI4QZwV0njUx3jeuKe67YukQ1LSPZBKDqO/Q==}
engines: {node: '>=8.6'}
micromatch@4.0.8:
resolution: {integrity: sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==}
engines: {node: '>=8.6'}
minimatch@10.2.5:
resolution: {integrity: sha512-MULkVLfKGYDFYejP07QOurDLLQpcjk7Fw+7jXS2R2czRQzR56yHRveU5NDJEOviH+hETZKSkIk5c+T23GjFUMg==}
engines: {node: 18 || 20 || >=22}
@@ -2145,6 +2205,10 @@ packages:
resolution: {integrity: sha512-GYVXHE2KnrzAfsAjl4uP++evGFCrAU1jta4ubEjIG7YWt/64Gqv66a30yKwWczVjA6j3bM4nBwH7Pk1JmDHaxQ==}
engines: {node: '>=18'}
normalize-path@3.0.0:
resolution: {integrity: sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==}
engines: {node: '>=0.10.0'}
object-assign@4.1.1:
resolution: {integrity: sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==}
engines: {node: '>=0.10.0'}
@@ -2193,6 +2257,10 @@ packages:
resolution: {integrity: sha512-LaNjtRWUBY++zB5nE/NwcaoMylSPk+S+ZHNB1TzdbMJMny6dynpAGt7X/tl/QYq3TIeE6nxHppbo2LGymrG5Pw==}
engines: {node: '>=10'}
p-map@7.0.4:
resolution: {integrity: sha512-tkAQEw8ysMzmkhgw8k+1U/iPhWNhykKnSk4Rd5zLoPJCuJaGRPo6YposrZgaxHKzDHdDWWZvE/Sk7hsL2X/CpQ==}
engines: {node: '>=18'}
pako@1.0.11:
resolution: {integrity: sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==}
@@ -2262,6 +2330,9 @@ packages:
resolution: {integrity: sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==}
engines: {node: '>=6'}
queue-microtask@1.2.3:
resolution: {integrity: sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==}
re-resizable@6.11.2:
resolution: {integrity: sha512-2xI2P3OHs5qw7K0Ud1aLILK6MQxW50TcO+DetD9eIV58j84TqYeHoZcL9H4GXFXXIh7afhH8mv5iUCXII7OW7A==}
peerDependencies:
@@ -2339,6 +2410,10 @@ packages:
readable-stream@2.3.8:
resolution: {integrity: sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==}
readdirp@3.6.0:
resolution: {integrity: sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==}
engines: {node: '>=8.10.0'}
reflect.getprototypeof@1.0.10:
resolution: {integrity: sha512-00o4I+DVrefhv+nX0ulyi3biSHCPDe+yLv5o/p6d/UVlirijB8E16FtfwSAi4g3tcqrQ4lRAqQSoFEZJehYEcw==}
engines: {node: '>= 0.4'}
@@ -2364,11 +2439,18 @@ packages:
engines: {node: '>= 0.4'}
hasBin: true
reusify@1.1.0:
resolution: {integrity: sha512-g6QUff04oZpHs0eG5p83rFLhHeV00ug/Yf9nZM6fLeUrPguBTkTQOdpAWWspMh55TZfVQDPaN3NQJfbVRAxdIw==}
engines: {iojs: '>=1.0.0', node: '>=0.10.0'}
rollup@4.60.4:
resolution: {integrity: sha512-WHeFSbZYsPu3+bLoNRUuAO+wavNlocOPf3wSHTP7hcFKVnJeWsYlCDbr3mTS14FCizf9ccIxXA8sGL8zKeQN3g==}
engines: {node: '>=18.0.0', npm: '>=8.0.0'}
hasBin: true
run-parallel@1.2.0:
resolution: {integrity: sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA==}
safe-array-concat@1.1.4:
resolution: {integrity: sha512-wtZlHyOje6OZTGqAoaDKxFkgRtkF9CnHAVnCHKfuj200wAgL+bSJhdsCD2l0Qx/2ekEXjPWcyKkfGb5CPboslg==}
engines: {node: '>=0.4'}
@@ -2572,6 +2654,10 @@ packages:
undici-types@6.21.0:
resolution: {integrity: sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==}
universalify@2.0.1:
resolution: {integrity: sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw==}
engines: {node: '>= 10.0.0'}
unrs-resolver@1.12.2:
resolution: {integrity: sha512-dmlRxBJJayXjqTwC+JtF1HhJmgf3ftQ3YejFcZrf4+KKtJv0qDsK1pjqaaVjG7wJ5NJ6UVP1OqRMQ71Z4C3rxQ==}
@@ -2612,6 +2698,12 @@ packages:
engines: {node: ^18.0.0 || >=20.0.0}
hasBin: true
vite-plugin-static-copy@2.3.2:
resolution: {integrity: sha512-iwrrf+JupY4b9stBttRWzGHzZbeMjAHBhkrn67MNACXJVjEMRpCI10Q3AkxdBkl45IHaTfw/CNVevzQhP7yTwg==}
engines: {node: ^18.0.0 || >=20.0.0}
peerDependencies:
vite: ^5.0.0 || ^6.0.0
vite@5.4.21:
resolution: {integrity: sha512-o5a9xKjbtuhY6Bi5S3+HvbRERmouabWbyUcpXXUA1u+GNUKoROi9byOJ8M0nHbHYHkYICiMlqxkg1KkYmm25Sw==}
engines: {node: ^18.0.0 || >=20.0.0}
@@ -3075,6 +3167,18 @@ snapshots:
'@tybys/wasm-util': 0.10.2
optional: true
'@nodelib/fs.scandir@2.1.5':
dependencies:
'@nodelib/fs.stat': 2.0.5
run-parallel: 1.2.0
'@nodelib/fs.stat@2.0.5': {}
'@nodelib/fs.walk@1.2.8':
dependencies:
'@nodelib/fs.scandir': 2.1.5
fastq: 1.20.1
'@nolyfill/is-core-module@1.0.39': {}
'@pdf-lib/standard-fonts@1.0.0':
@@ -3942,6 +4046,11 @@ snapshots:
ansi-styles@5.2.0: {}
anymatch@3.1.3:
dependencies:
normalize-path: 3.0.0
picomatch: 2.3.2
argparse@2.0.1: {}
aria-hidden@1.2.6:
@@ -4016,6 +4125,8 @@ snapshots:
baseline-browser-mapping@2.10.32: {}
binary-extensions@2.3.0: {}
brace-expansion@1.1.14:
dependencies:
balanced-match: 1.0.2
@@ -4075,6 +4186,18 @@ snapshots:
check-error@2.1.3: {}
chokidar@3.6.0:
dependencies:
anymatch: 3.1.3
braces: 3.0.3
glob-parent: 5.1.2
is-binary-path: 2.1.0
is-glob: 4.0.3
normalize-path: 3.0.0
readdirp: 3.6.0
optionalDependencies:
fsevents: 2.3.3
class-variance-authority@0.7.1:
dependencies:
clsx: 2.1.1
@@ -4448,10 +4571,22 @@ snapshots:
fast-deep-equal@3.1.3: {}
fast-glob@3.3.3:
dependencies:
'@nodelib/fs.stat': 2.0.5
'@nodelib/fs.walk': 1.2.8
glob-parent: 5.1.2
merge2: 1.4.1
micromatch: 4.0.8
fast-json-stable-stringify@2.1.0: {}
fast-levenshtein@2.0.6: {}
fastq@1.20.1:
dependencies:
reusify: 1.1.0
fdir@6.5.0(picomatch@4.0.4):
optionalDependencies:
picomatch: 4.0.4
@@ -4480,6 +4615,12 @@ snapshots:
dependencies:
is-callable: 1.2.7
fs-extra@11.3.5:
dependencies:
graceful-fs: 4.2.11
jsonfile: 6.2.1
universalify: 2.0.1
fsevents@2.3.3:
optional: true
@@ -4530,6 +4671,10 @@ snapshots:
dependencies:
resolve-pkg-maps: 1.0.0
glob-parent@5.1.2:
dependencies:
is-glob: 4.0.3
glob-parent@6.0.2:
dependencies:
is-glob: 4.0.3
@@ -4545,6 +4690,8 @@ snapshots:
gopd@1.2.0: {}
graceful-fs@4.2.11: {}
happy-dom@20.9.0:
dependencies:
'@types/node': 20.19.41
@@ -4618,6 +4765,10 @@ snapshots:
dependencies:
has-bigints: 1.1.0
is-binary-path@2.1.0:
dependencies:
binary-extensions: 2.3.0
is-boolean-object@1.2.2:
dependencies:
call-bound: 1.0.4
@@ -4738,6 +4889,12 @@ snapshots:
json5@2.2.3: {}
jsonfile@6.2.1:
dependencies:
universalify: 2.0.1
optionalDependencies:
graceful-fs: 4.2.11
jszip@3.10.1:
dependencies:
lie: 3.3.0
@@ -4788,11 +4945,18 @@ snapshots:
math-intrinsics@1.1.0: {}
merge2@1.4.1: {}
micromatch@4.0.7:
dependencies:
braces: 3.0.3
picomatch: 2.3.2
micromatch@4.0.8:
dependencies:
braces: 3.0.3
picomatch: 2.3.2
minimatch@10.2.5:
dependencies:
brace-expansion: 5.0.6
@@ -4820,6 +4984,8 @@ snapshots:
node-releases@2.0.46: {}
normalize-path@3.0.0: {}
object-assign@4.1.1: {}
object-inspect@1.13.4: {}
@@ -4885,6 +5051,8 @@ snapshots:
dependencies:
p-limit: 3.1.0
p-map@7.0.4: {}
pako@1.0.11: {}
parent-module@1.0.1:
@@ -4944,6 +5112,8 @@ snapshots:
punycode@2.3.1: {}
queue-microtask@1.2.3: {}
re-resizable@6.11.2(react-dom@18.3.1(react@18.3.1))(react@18.3.1):
dependencies:
react: 18.3.1
@@ -5045,6 +5215,10 @@ snapshots:
string_decoder: 1.1.1
util-deprecate: 1.0.2
readdirp@3.6.0:
dependencies:
picomatch: 2.3.2
reflect.getprototypeof@1.0.10:
dependencies:
call-bind: 1.0.9
@@ -5085,6 +5259,8 @@ snapshots:
path-parse: 1.0.7
supports-preserve-symlinks-flag: 1.0.0
reusify@1.1.0: {}
rollup@4.60.4:
dependencies:
'@types/estree': 1.0.8
@@ -5116,6 +5292,10 @@ snapshots:
'@rollup/rollup-win32-x64-msvc': 4.60.4
fsevents: 2.3.3
run-parallel@1.2.0:
dependencies:
queue-microtask: 1.2.3
safe-array-concat@1.1.4:
dependencies:
call-bind: 1.0.9
@@ -5352,6 +5532,8 @@ snapshots:
undici-types@6.21.0: {}
universalify@2.0.1: {}
unrs-resolver@1.12.2:
dependencies:
napi-postinstall: 0.3.4
@@ -5424,6 +5606,15 @@ snapshots:
- supports-color
- terser
vite-plugin-static-copy@2.3.2(vite@5.4.21(@types/node@20.19.41)):
dependencies:
chokidar: 3.6.0
fast-glob: 3.3.3
fs-extra: 11.3.5
p-map: 7.0.4
picocolors: 1.1.1
vite: 5.4.21(@types/node@20.19.41)
vite@5.4.21(@types/node@20.19.41):
dependencies:
esbuild: 0.21.5

View File

@@ -298,7 +298,20 @@ export function PdfSpikeViewer(props: PdfSpikeViewerProps) {
className={wrapperClasses.length > 0 ? wrapperClasses : undefined}
style={{ height: "100%" }}
>
<PdfLoader document={pdfUrl}>
<PdfLoader
document={{
url: pdfUrl,
// Without these two, PDFs with CID fonts (most CJK + many
// European court documents) or unembedded standard fonts get
// rendered with substitute metrics, which shifts every
// text-layer span out of alignment with the canvas glyphs.
// vite.config.ts copies both directories from pdfjs-dist into
// the served root.
cMapUrl: "/cmaps/",
cMapPacked: true,
standardFontDataUrl: "/standard_fonts/",
}}
>
{(pdfDocument) => (
<PdfHighlighter
pdfDocument={pdfDocument}

View File

@@ -1,12 +1,34 @@
import { defineConfig } from "vite";
import react from "@vitejs/plugin-react";
import { viteStaticCopy } from "vite-plugin-static-copy";
import { fileURLToPath } from "node:url";
import { dirname, resolve } from "node:path";
const __dirname = dirname(fileURLToPath(import.meta.url));
export default defineConfig({
plugins: [react()],
plugins: [
react(),
// PDF.js needs its cmaps (character maps for CID fonts) and
// standard fonts (Helvetica/Times/Courier metrics) to position
// text-layer spans correctly. Without them, PDFs with embedded
// CID fonts or unembedded standard fonts get rendered to canvas
// OK but their textLayer overlays land in the wrong place — text
// appears unselectable or selection jumps to a different region.
// We serve both directories straight from `node_modules/pdfjs-dist`.
viteStaticCopy({
targets: [
{
src: resolve(__dirname, "node_modules/pdfjs-dist/cmaps") + "/[!.]*",
dest: "cmaps",
},
{
src: resolve(__dirname, "node_modules/pdfjs-dist/standard_fonts") + "/[!.]*",
dest: "standard_fonts",
},
],
}),
],
resolve: {
alias: {
"@shared": resolve(__dirname, "src/shared"),