Wire pdfjs cmaps + standard fonts so text layer positions correctly

Strong likelihood that the "text layer is misplaced / body text not
selectable" symptoms across multiple PDFs come from PDF.js falling
back to substitute font metrics. Without the cmaps directory (CID
character maps for non-Latin fonts) and the standard_fonts directory
(Helvetica/Times/Courier metrics for unembedded standard fonts), the
canvas glyphs use embedded font data while the text-layer span
positions are computed from fallback metrics. The two diverge — text
spans land in the wrong place, or text content can't be decoded at
all, leaving the body unselectable.

Both directories are now copied into the served root by
vite-plugin-static-copy and passed to pdfjs.getDocument() as
`cMapUrl: "/cmaps/"` + `cMapPacked: true` + `standardFontDataUrl:
"/standard_fonts/"` via PdfLoader's `document` prop (which accepts a
full DocumentInitParameters object).

If this is the right diagnosis, the textLayer overlay should now line
up with the visible glyphs on the same PDFs that were producing
fragmented captures. If the body text is still unselectable, the PDF
genuinely lacks a text layer for those glyphs (image-only content)
and OCR would be the only path forward.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-27 00:38:34 +02:00
parent 3834f5c209
commit c000ce6f73
4 changed files with 229 additions and 2 deletions

View File

@@ -42,6 +42,7 @@
"typescript": "^5.5.4", "typescript": "^5.5.4",
"typescript-eslint": "^8.0.0", "typescript-eslint": "^8.0.0",
"vite": "^5.4.0", "vite": "^5.4.0",
"vite-plugin-static-copy": "^2",
"vitest": "^2.0.5" "vitest": "^2.0.5"
} }
} }

191
pnpm-lock.yaml generated
View File

@@ -72,6 +72,9 @@ importers:
vite: vite:
specifier: ^5.4.0 specifier: ^5.4.0
version: 5.4.21(@types/node@20.19.41) version: 5.4.21(@types/node@20.19.41)
vite-plugin-static-copy:
specifier: ^2
version: 2.3.2(vite@5.4.21(@types/node@20.19.41))
vitest: vitest:
specifier: ^2.0.5 specifier: ^2.0.5
version: 2.1.9(@types/node@20.19.41)(happy-dom@20.9.0) version: 2.1.9(@types/node@20.19.41)(happy-dom@20.9.0)
@@ -477,6 +480,18 @@ packages:
'@emnapi/core': ^1.7.1 '@emnapi/core': ^1.7.1
'@emnapi/runtime': ^1.7.1 '@emnapi/runtime': ^1.7.1
'@nodelib/fs.scandir@2.1.5':
resolution: {integrity: sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g==}
engines: {node: '>= 8'}
'@nodelib/fs.stat@2.0.5':
resolution: {integrity: sha512-RkhPPp2zrqDAQA/2jNhnztcPAlv64XdhIp7a7454A5ovI7Bukxgt7MX7udwAu3zg1DcpPU0rz3VV1SeaqvY4+A==}
engines: {node: '>= 8'}
'@nodelib/fs.walk@1.2.8':
resolution: {integrity: sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg==}
engines: {node: '>= 8'}
'@nolyfill/is-core-module@1.0.39': '@nolyfill/is-core-module@1.0.39':
resolution: {integrity: sha512-nn5ozdjYQpUCZlWGuxcJY/KpxkWQs4DcbMCmKojjyrYDEAGy4Ce19NN4v5MduafTwJlbKc99UA8YhSVqq9yPZA==} resolution: {integrity: sha512-nn5ozdjYQpUCZlWGuxcJY/KpxkWQs4DcbMCmKojjyrYDEAGy4Ce19NN4v5MduafTwJlbKc99UA8YhSVqq9yPZA==}
engines: {node: '>=12.4.0'} engines: {node: '>=12.4.0'}
@@ -1374,6 +1389,10 @@ packages:
resolution: {integrity: sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==} resolution: {integrity: sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==}
engines: {node: '>=10'} engines: {node: '>=10'}
anymatch@3.1.3:
resolution: {integrity: sha512-KMReFUr0B4t+D+OBkjR3KYqvocp2XaSzO55UcB6mgQMd3KbcE+mWTyvVV7D/zsdEbNnV6acZUutkiHQXvTr1Rw==}
engines: {node: '>= 8'}
argparse@2.0.1: argparse@2.0.1:
resolution: {integrity: sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==} resolution: {integrity: sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==}
@@ -1432,6 +1451,10 @@ packages:
engines: {node: '>=6.0.0'} engines: {node: '>=6.0.0'}
hasBin: true hasBin: true
binary-extensions@2.3.0:
resolution: {integrity: sha512-Ceh+7ox5qe7LJuLHoY0feh3pHuUDHAcRUeyL2VYghZwfpkNIy/+8Ocg0a3UuSoYzavmylwuLWQOf3hl0jjMMIw==}
engines: {node: '>=8'}
brace-expansion@1.1.14: brace-expansion@1.1.14:
resolution: {integrity: sha512-MWPGfDxnyzKU7rNOW9SP/c50vi3xrmrua/+6hfPbCS2ABNWfx24vPidzvC7krjU/RTo235sV776ymlsMtGKj8g==} resolution: {integrity: sha512-MWPGfDxnyzKU7rNOW9SP/c50vi3xrmrua/+6hfPbCS2ABNWfx24vPidzvC7krjU/RTo235sV776ymlsMtGKj8g==}
@@ -1483,6 +1506,10 @@ packages:
resolution: {integrity: sha512-PAJdDJusoxnwm1VwW07VWwUN1sl7smmC3OKggvndJFadxxDRyFJBX/ggnu/KE4kQAB7a3Dp8f/YXC1FlUprWmA==} resolution: {integrity: sha512-PAJdDJusoxnwm1VwW07VWwUN1sl7smmC3OKggvndJFadxxDRyFJBX/ggnu/KE4kQAB7a3Dp8f/YXC1FlUprWmA==}
engines: {node: '>= 16'} engines: {node: '>= 16'}
chokidar@3.6.0:
resolution: {integrity: sha512-7VT13fmjotKpGipCW9JEQAusEPE+Ei8nl6/g4FBAmIm0GOOLMua9NDDo/DWp0ZAxCr3cPq5ZpBqmPAQgDda2Pw==}
engines: {node: '>= 8.10.0'}
class-variance-authority@0.7.1: class-variance-authority@0.7.1:
resolution: {integrity: sha512-Ka+9Trutv7G8M6WT6SeiRWz792K5qEqIGEGzXKhAE6xOWAY6pPH8U+9IY3oCMv6kqTmLsv7Xh/2w2RigkePMsg==} resolution: {integrity: sha512-Ka+9Trutv7G8M6WT6SeiRWz792K5qEqIGEGzXKhAE6xOWAY6pPH8U+9IY3oCMv6kqTmLsv7Xh/2w2RigkePMsg==}
@@ -1759,12 +1786,19 @@ packages:
fast-deep-equal@3.1.3: fast-deep-equal@3.1.3:
resolution: {integrity: sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==} resolution: {integrity: sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==}
fast-glob@3.3.3:
resolution: {integrity: sha512-7MptL8U0cqcFdzIzwOTHoilX9x5BrNqye7Z/LuC7kCMRio1EMSyqRK3BEAUD7sXRq4iT4AzTVuZdhgQ2TCvYLg==}
engines: {node: '>=8.6.0'}
fast-json-stable-stringify@2.1.0: fast-json-stable-stringify@2.1.0:
resolution: {integrity: sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==} resolution: {integrity: sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==}
fast-levenshtein@2.0.6: fast-levenshtein@2.0.6:
resolution: {integrity: sha512-DCXu6Ifhqcks7TZKY3Hxp3y6qphY5SJZmrWMDrKcERSOXWQdMhU9Ig/PYrzyw/ul9jOIyh0N4M0tbC5hodg8dw==} resolution: {integrity: sha512-DCXu6Ifhqcks7TZKY3Hxp3y6qphY5SJZmrWMDrKcERSOXWQdMhU9Ig/PYrzyw/ul9jOIyh0N4M0tbC5hodg8dw==}
fastq@1.20.1:
resolution: {integrity: sha512-GGToxJ/w1x32s/D2EKND7kTil4n8OVk/9mycTc4VDza13lOvpUZTGX3mFSCtV9ksdGBVzvsyAVLM6mHFThxXxw==}
fdir@6.5.0: fdir@6.5.0:
resolution: {integrity: sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==} resolution: {integrity: sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==}
engines: {node: '>=12.0.0'} engines: {node: '>=12.0.0'}
@@ -1797,6 +1831,10 @@ packages:
resolution: {integrity: sha512-dKx12eRCVIzqCxFGplyFKJMPvLEWgmNtUrpTiJIR5u97zEhRG8ySrtboPHZXx7daLxQVrl643cTzbab2tkQjxg==} resolution: {integrity: sha512-dKx12eRCVIzqCxFGplyFKJMPvLEWgmNtUrpTiJIR5u97zEhRG8ySrtboPHZXx7daLxQVrl643cTzbab2tkQjxg==}
engines: {node: '>= 0.4'} engines: {node: '>= 0.4'}
fs-extra@11.3.5:
resolution: {integrity: sha512-eKpRKAovdpZtR1WopLHxlBWvAgPny3c4gX1G5Jhwmmw4XJj0ifSD5qB5TOo8hmA0wlRKDAOAhEE1yVPgs6Fgcg==}
engines: {node: '>=14.14'}
fsevents@2.3.3: fsevents@2.3.3:
resolution: {integrity: sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==} resolution: {integrity: sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==}
engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0} engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0}
@@ -1839,6 +1877,10 @@ packages:
get-tsconfig@4.14.0: get-tsconfig@4.14.0:
resolution: {integrity: sha512-yTb+8DXzDREzgvYmh6s9vHsSVCHeC0G3PI5bEXNBHtmshPnO+S5O7qgLEOn0I5QvMy6kpZN8K1NKGyilLb93wA==} resolution: {integrity: sha512-yTb+8DXzDREzgvYmh6s9vHsSVCHeC0G3PI5bEXNBHtmshPnO+S5O7qgLEOn0I5QvMy6kpZN8K1NKGyilLb93wA==}
glob-parent@5.1.2:
resolution: {integrity: sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==}
engines: {node: '>= 6'}
glob-parent@6.0.2: glob-parent@6.0.2:
resolution: {integrity: sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A==} resolution: {integrity: sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A==}
engines: {node: '>=10.13.0'} engines: {node: '>=10.13.0'}
@@ -1859,6 +1901,9 @@ packages:
resolution: {integrity: sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==} resolution: {integrity: sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==}
engines: {node: '>= 0.4'} engines: {node: '>= 0.4'}
graceful-fs@4.2.11:
resolution: {integrity: sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==}
happy-dom@20.9.0: happy-dom@20.9.0:
resolution: {integrity: sha512-GZZ9mKe8r646NUAf/zemnGbjYh4Bt8/MqASJY+pSm5ZDtc3YQox+4gsLI7yi1hba6o+eCsGxpHn5+iEVn31/FQ==} resolution: {integrity: sha512-GZZ9mKe8r646NUAf/zemnGbjYh4Bt8/MqASJY+pSm5ZDtc3YQox+4gsLI7yi1hba6o+eCsGxpHn5+iEVn31/FQ==}
engines: {node: '>=20.0.0'} engines: {node: '>=20.0.0'}
@@ -1928,6 +1973,10 @@ packages:
resolution: {integrity: sha512-n4ZT37wG78iz03xPRKJrHTdZbe3IicyucEtdRsV5yglwc3GyUfbAfpSeD0FJ41NbUNSt5wbhqfp1fS+BgnvDFQ==} resolution: {integrity: sha512-n4ZT37wG78iz03xPRKJrHTdZbe3IicyucEtdRsV5yglwc3GyUfbAfpSeD0FJ41NbUNSt5wbhqfp1fS+BgnvDFQ==}
engines: {node: '>= 0.4'} engines: {node: '>= 0.4'}
is-binary-path@2.1.0:
resolution: {integrity: sha512-ZMERYes6pDydyuGidse7OsHxtbI7WVeUEozgR/g7rd0xUimYNlvZRE/K2MgZTjWy725IfelLeVcEM97mmtRGXw==}
engines: {node: '>=8'}
is-boolean-object@1.2.2: is-boolean-object@1.2.2:
resolution: {integrity: sha512-wa56o2/ElJMYqjCjGkXri7it5FbebW5usLw/nPmCMs5DeZ7eziSYZhSmPRn0txqeW4LnAmQQU7FgqLpsEFKM4A==} resolution: {integrity: sha512-wa56o2/ElJMYqjCjGkXri7it5FbebW5usLw/nPmCMs5DeZ7eziSYZhSmPRn0txqeW4LnAmQQU7FgqLpsEFKM4A==}
engines: {node: '>= 0.4'} engines: {node: '>= 0.4'}
@@ -2058,6 +2107,9 @@ packages:
engines: {node: '>=6'} engines: {node: '>=6'}
hasBin: true hasBin: true
jsonfile@6.2.1:
resolution: {integrity: sha512-zwOTdL3rFQ/lRdBnntKVOX6k5cKJwEc1HdilT71BWEu7J41gXIB2MRp+vxduPSwZJPWBxEzv4yH1wYLJGUHX4Q==}
jszip@3.10.1: jszip@3.10.1:
resolution: {integrity: sha512-xXDvecyTpGLrqFrvkrUSoxxfJI5AH7U8zxxtVclpsUtMCq4JQ290LY8AW5c7Ggnr/Y/oK+bQMbqK2qmtk3pN4g==} resolution: {integrity: sha512-xXDvecyTpGLrqFrvkrUSoxxfJI5AH7U8zxxtVclpsUtMCq4JQ290LY8AW5c7Ggnr/Y/oK+bQMbqK2qmtk3pN4g==}
@@ -2107,10 +2159,18 @@ packages:
resolution: {integrity: sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==} resolution: {integrity: sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==}
engines: {node: '>= 0.4'} engines: {node: '>= 0.4'}
merge2@1.4.1:
resolution: {integrity: sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==}
engines: {node: '>= 8'}
micromatch@4.0.7: micromatch@4.0.7:
resolution: {integrity: sha512-LPP/3KorzCwBxfeUuZmaR6bG2kdeHSbe0P2tY3FLRU4vYrjYz5hI4QZwV0njUx3jeuKe67YukQ1LSPZBKDqO/Q==} resolution: {integrity: sha512-LPP/3KorzCwBxfeUuZmaR6bG2kdeHSbe0P2tY3FLRU4vYrjYz5hI4QZwV0njUx3jeuKe67YukQ1LSPZBKDqO/Q==}
engines: {node: '>=8.6'} engines: {node: '>=8.6'}
micromatch@4.0.8:
resolution: {integrity: sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==}
engines: {node: '>=8.6'}
minimatch@10.2.5: minimatch@10.2.5:
resolution: {integrity: sha512-MULkVLfKGYDFYejP07QOurDLLQpcjk7Fw+7jXS2R2czRQzR56yHRveU5NDJEOviH+hETZKSkIk5c+T23GjFUMg==} resolution: {integrity: sha512-MULkVLfKGYDFYejP07QOurDLLQpcjk7Fw+7jXS2R2czRQzR56yHRveU5NDJEOviH+hETZKSkIk5c+T23GjFUMg==}
engines: {node: 18 || 20 || >=22} engines: {node: 18 || 20 || >=22}
@@ -2145,6 +2205,10 @@ packages:
resolution: {integrity: sha512-GYVXHE2KnrzAfsAjl4uP++evGFCrAU1jta4ubEjIG7YWt/64Gqv66a30yKwWczVjA6j3bM4nBwH7Pk1JmDHaxQ==} resolution: {integrity: sha512-GYVXHE2KnrzAfsAjl4uP++evGFCrAU1jta4ubEjIG7YWt/64Gqv66a30yKwWczVjA6j3bM4nBwH7Pk1JmDHaxQ==}
engines: {node: '>=18'} engines: {node: '>=18'}
normalize-path@3.0.0:
resolution: {integrity: sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==}
engines: {node: '>=0.10.0'}
object-assign@4.1.1: object-assign@4.1.1:
resolution: {integrity: sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==} resolution: {integrity: sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==}
engines: {node: '>=0.10.0'} engines: {node: '>=0.10.0'}
@@ -2193,6 +2257,10 @@ packages:
resolution: {integrity: sha512-LaNjtRWUBY++zB5nE/NwcaoMylSPk+S+ZHNB1TzdbMJMny6dynpAGt7X/tl/QYq3TIeE6nxHppbo2LGymrG5Pw==} resolution: {integrity: sha512-LaNjtRWUBY++zB5nE/NwcaoMylSPk+S+ZHNB1TzdbMJMny6dynpAGt7X/tl/QYq3TIeE6nxHppbo2LGymrG5Pw==}
engines: {node: '>=10'} engines: {node: '>=10'}
p-map@7.0.4:
resolution: {integrity: sha512-tkAQEw8ysMzmkhgw8k+1U/iPhWNhykKnSk4Rd5zLoPJCuJaGRPo6YposrZgaxHKzDHdDWWZvE/Sk7hsL2X/CpQ==}
engines: {node: '>=18'}
pako@1.0.11: pako@1.0.11:
resolution: {integrity: sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==} resolution: {integrity: sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==}
@@ -2262,6 +2330,9 @@ packages:
resolution: {integrity: sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==} resolution: {integrity: sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==}
engines: {node: '>=6'} engines: {node: '>=6'}
queue-microtask@1.2.3:
resolution: {integrity: sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==}
re-resizable@6.11.2: re-resizable@6.11.2:
resolution: {integrity: sha512-2xI2P3OHs5qw7K0Ud1aLILK6MQxW50TcO+DetD9eIV58j84TqYeHoZcL9H4GXFXXIh7afhH8mv5iUCXII7OW7A==} resolution: {integrity: sha512-2xI2P3OHs5qw7K0Ud1aLILK6MQxW50TcO+DetD9eIV58j84TqYeHoZcL9H4GXFXXIh7afhH8mv5iUCXII7OW7A==}
peerDependencies: peerDependencies:
@@ -2339,6 +2410,10 @@ packages:
readable-stream@2.3.8: readable-stream@2.3.8:
resolution: {integrity: sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==} resolution: {integrity: sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==}
readdirp@3.6.0:
resolution: {integrity: sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==}
engines: {node: '>=8.10.0'}
reflect.getprototypeof@1.0.10: reflect.getprototypeof@1.0.10:
resolution: {integrity: sha512-00o4I+DVrefhv+nX0ulyi3biSHCPDe+yLv5o/p6d/UVlirijB8E16FtfwSAi4g3tcqrQ4lRAqQSoFEZJehYEcw==} resolution: {integrity: sha512-00o4I+DVrefhv+nX0ulyi3biSHCPDe+yLv5o/p6d/UVlirijB8E16FtfwSAi4g3tcqrQ4lRAqQSoFEZJehYEcw==}
engines: {node: '>= 0.4'} engines: {node: '>= 0.4'}
@@ -2364,11 +2439,18 @@ packages:
engines: {node: '>= 0.4'} engines: {node: '>= 0.4'}
hasBin: true hasBin: true
reusify@1.1.0:
resolution: {integrity: sha512-g6QUff04oZpHs0eG5p83rFLhHeV00ug/Yf9nZM6fLeUrPguBTkTQOdpAWWspMh55TZfVQDPaN3NQJfbVRAxdIw==}
engines: {iojs: '>=1.0.0', node: '>=0.10.0'}
rollup@4.60.4: rollup@4.60.4:
resolution: {integrity: sha512-WHeFSbZYsPu3+bLoNRUuAO+wavNlocOPf3wSHTP7hcFKVnJeWsYlCDbr3mTS14FCizf9ccIxXA8sGL8zKeQN3g==} resolution: {integrity: sha512-WHeFSbZYsPu3+bLoNRUuAO+wavNlocOPf3wSHTP7hcFKVnJeWsYlCDbr3mTS14FCizf9ccIxXA8sGL8zKeQN3g==}
engines: {node: '>=18.0.0', npm: '>=8.0.0'} engines: {node: '>=18.0.0', npm: '>=8.0.0'}
hasBin: true hasBin: true
run-parallel@1.2.0:
resolution: {integrity: sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA==}
safe-array-concat@1.1.4: safe-array-concat@1.1.4:
resolution: {integrity: sha512-wtZlHyOje6OZTGqAoaDKxFkgRtkF9CnHAVnCHKfuj200wAgL+bSJhdsCD2l0Qx/2ekEXjPWcyKkfGb5CPboslg==} resolution: {integrity: sha512-wtZlHyOje6OZTGqAoaDKxFkgRtkF9CnHAVnCHKfuj200wAgL+bSJhdsCD2l0Qx/2ekEXjPWcyKkfGb5CPboslg==}
engines: {node: '>=0.4'} engines: {node: '>=0.4'}
@@ -2572,6 +2654,10 @@ packages:
undici-types@6.21.0: undici-types@6.21.0:
resolution: {integrity: sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==} resolution: {integrity: sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==}
universalify@2.0.1:
resolution: {integrity: sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw==}
engines: {node: '>= 10.0.0'}
unrs-resolver@1.12.2: unrs-resolver@1.12.2:
resolution: {integrity: sha512-dmlRxBJJayXjqTwC+JtF1HhJmgf3ftQ3YejFcZrf4+KKtJv0qDsK1pjqaaVjG7wJ5NJ6UVP1OqRMQ71Z4C3rxQ==} resolution: {integrity: sha512-dmlRxBJJayXjqTwC+JtF1HhJmgf3ftQ3YejFcZrf4+KKtJv0qDsK1pjqaaVjG7wJ5NJ6UVP1OqRMQ71Z4C3rxQ==}
@@ -2612,6 +2698,12 @@ packages:
engines: {node: ^18.0.0 || >=20.0.0} engines: {node: ^18.0.0 || >=20.0.0}
hasBin: true hasBin: true
vite-plugin-static-copy@2.3.2:
resolution: {integrity: sha512-iwrrf+JupY4b9stBttRWzGHzZbeMjAHBhkrn67MNACXJVjEMRpCI10Q3AkxdBkl45IHaTfw/CNVevzQhP7yTwg==}
engines: {node: ^18.0.0 || >=20.0.0}
peerDependencies:
vite: ^5.0.0 || ^6.0.0
vite@5.4.21: vite@5.4.21:
resolution: {integrity: sha512-o5a9xKjbtuhY6Bi5S3+HvbRERmouabWbyUcpXXUA1u+GNUKoROi9byOJ8M0nHbHYHkYICiMlqxkg1KkYmm25Sw==} resolution: {integrity: sha512-o5a9xKjbtuhY6Bi5S3+HvbRERmouabWbyUcpXXUA1u+GNUKoROi9byOJ8M0nHbHYHkYICiMlqxkg1KkYmm25Sw==}
engines: {node: ^18.0.0 || >=20.0.0} engines: {node: ^18.0.0 || >=20.0.0}
@@ -3075,6 +3167,18 @@ snapshots:
'@tybys/wasm-util': 0.10.2 '@tybys/wasm-util': 0.10.2
optional: true optional: true
'@nodelib/fs.scandir@2.1.5':
dependencies:
'@nodelib/fs.stat': 2.0.5
run-parallel: 1.2.0
'@nodelib/fs.stat@2.0.5': {}
'@nodelib/fs.walk@1.2.8':
dependencies:
'@nodelib/fs.scandir': 2.1.5
fastq: 1.20.1
'@nolyfill/is-core-module@1.0.39': {} '@nolyfill/is-core-module@1.0.39': {}
'@pdf-lib/standard-fonts@1.0.0': '@pdf-lib/standard-fonts@1.0.0':
@@ -3942,6 +4046,11 @@ snapshots:
ansi-styles@5.2.0: {} ansi-styles@5.2.0: {}
anymatch@3.1.3:
dependencies:
normalize-path: 3.0.0
picomatch: 2.3.2
argparse@2.0.1: {} argparse@2.0.1: {}
aria-hidden@1.2.6: aria-hidden@1.2.6:
@@ -4016,6 +4125,8 @@ snapshots:
baseline-browser-mapping@2.10.32: {} baseline-browser-mapping@2.10.32: {}
binary-extensions@2.3.0: {}
brace-expansion@1.1.14: brace-expansion@1.1.14:
dependencies: dependencies:
balanced-match: 1.0.2 balanced-match: 1.0.2
@@ -4075,6 +4186,18 @@ snapshots:
check-error@2.1.3: {} check-error@2.1.3: {}
chokidar@3.6.0:
dependencies:
anymatch: 3.1.3
braces: 3.0.3
glob-parent: 5.1.2
is-binary-path: 2.1.0
is-glob: 4.0.3
normalize-path: 3.0.0
readdirp: 3.6.0
optionalDependencies:
fsevents: 2.3.3
class-variance-authority@0.7.1: class-variance-authority@0.7.1:
dependencies: dependencies:
clsx: 2.1.1 clsx: 2.1.1
@@ -4448,10 +4571,22 @@ snapshots:
fast-deep-equal@3.1.3: {} fast-deep-equal@3.1.3: {}
fast-glob@3.3.3:
dependencies:
'@nodelib/fs.stat': 2.0.5
'@nodelib/fs.walk': 1.2.8
glob-parent: 5.1.2
merge2: 1.4.1
micromatch: 4.0.8
fast-json-stable-stringify@2.1.0: {} fast-json-stable-stringify@2.1.0: {}
fast-levenshtein@2.0.6: {} fast-levenshtein@2.0.6: {}
fastq@1.20.1:
dependencies:
reusify: 1.1.0
fdir@6.5.0(picomatch@4.0.4): fdir@6.5.0(picomatch@4.0.4):
optionalDependencies: optionalDependencies:
picomatch: 4.0.4 picomatch: 4.0.4
@@ -4480,6 +4615,12 @@ snapshots:
dependencies: dependencies:
is-callable: 1.2.7 is-callable: 1.2.7
fs-extra@11.3.5:
dependencies:
graceful-fs: 4.2.11
jsonfile: 6.2.1
universalify: 2.0.1
fsevents@2.3.3: fsevents@2.3.3:
optional: true optional: true
@@ -4530,6 +4671,10 @@ snapshots:
dependencies: dependencies:
resolve-pkg-maps: 1.0.0 resolve-pkg-maps: 1.0.0
glob-parent@5.1.2:
dependencies:
is-glob: 4.0.3
glob-parent@6.0.2: glob-parent@6.0.2:
dependencies: dependencies:
is-glob: 4.0.3 is-glob: 4.0.3
@@ -4545,6 +4690,8 @@ snapshots:
gopd@1.2.0: {} gopd@1.2.0: {}
graceful-fs@4.2.11: {}
happy-dom@20.9.0: happy-dom@20.9.0:
dependencies: dependencies:
'@types/node': 20.19.41 '@types/node': 20.19.41
@@ -4618,6 +4765,10 @@ snapshots:
dependencies: dependencies:
has-bigints: 1.1.0 has-bigints: 1.1.0
is-binary-path@2.1.0:
dependencies:
binary-extensions: 2.3.0
is-boolean-object@1.2.2: is-boolean-object@1.2.2:
dependencies: dependencies:
call-bound: 1.0.4 call-bound: 1.0.4
@@ -4738,6 +4889,12 @@ snapshots:
json5@2.2.3: {} json5@2.2.3: {}
jsonfile@6.2.1:
dependencies:
universalify: 2.0.1
optionalDependencies:
graceful-fs: 4.2.11
jszip@3.10.1: jszip@3.10.1:
dependencies: dependencies:
lie: 3.3.0 lie: 3.3.0
@@ -4788,11 +4945,18 @@ snapshots:
math-intrinsics@1.1.0: {} math-intrinsics@1.1.0: {}
merge2@1.4.1: {}
micromatch@4.0.7: micromatch@4.0.7:
dependencies: dependencies:
braces: 3.0.3 braces: 3.0.3
picomatch: 2.3.2 picomatch: 2.3.2
micromatch@4.0.8:
dependencies:
braces: 3.0.3
picomatch: 2.3.2
minimatch@10.2.5: minimatch@10.2.5:
dependencies: dependencies:
brace-expansion: 5.0.6 brace-expansion: 5.0.6
@@ -4820,6 +4984,8 @@ snapshots:
node-releases@2.0.46: {} node-releases@2.0.46: {}
normalize-path@3.0.0: {}
object-assign@4.1.1: {} object-assign@4.1.1: {}
object-inspect@1.13.4: {} object-inspect@1.13.4: {}
@@ -4885,6 +5051,8 @@ snapshots:
dependencies: dependencies:
p-limit: 3.1.0 p-limit: 3.1.0
p-map@7.0.4: {}
pako@1.0.11: {} pako@1.0.11: {}
parent-module@1.0.1: parent-module@1.0.1:
@@ -4944,6 +5112,8 @@ snapshots:
punycode@2.3.1: {} punycode@2.3.1: {}
queue-microtask@1.2.3: {}
re-resizable@6.11.2(react-dom@18.3.1(react@18.3.1))(react@18.3.1): re-resizable@6.11.2(react-dom@18.3.1(react@18.3.1))(react@18.3.1):
dependencies: dependencies:
react: 18.3.1 react: 18.3.1
@@ -5045,6 +5215,10 @@ snapshots:
string_decoder: 1.1.1 string_decoder: 1.1.1
util-deprecate: 1.0.2 util-deprecate: 1.0.2
readdirp@3.6.0:
dependencies:
picomatch: 2.3.2
reflect.getprototypeof@1.0.10: reflect.getprototypeof@1.0.10:
dependencies: dependencies:
call-bind: 1.0.9 call-bind: 1.0.9
@@ -5085,6 +5259,8 @@ snapshots:
path-parse: 1.0.7 path-parse: 1.0.7
supports-preserve-symlinks-flag: 1.0.0 supports-preserve-symlinks-flag: 1.0.0
reusify@1.1.0: {}
rollup@4.60.4: rollup@4.60.4:
dependencies: dependencies:
'@types/estree': 1.0.8 '@types/estree': 1.0.8
@@ -5116,6 +5292,10 @@ snapshots:
'@rollup/rollup-win32-x64-msvc': 4.60.4 '@rollup/rollup-win32-x64-msvc': 4.60.4
fsevents: 2.3.3 fsevents: 2.3.3
run-parallel@1.2.0:
dependencies:
queue-microtask: 1.2.3
safe-array-concat@1.1.4: safe-array-concat@1.1.4:
dependencies: dependencies:
call-bind: 1.0.9 call-bind: 1.0.9
@@ -5352,6 +5532,8 @@ snapshots:
undici-types@6.21.0: {} undici-types@6.21.0: {}
universalify@2.0.1: {}
unrs-resolver@1.12.2: unrs-resolver@1.12.2:
dependencies: dependencies:
napi-postinstall: 0.3.4 napi-postinstall: 0.3.4
@@ -5424,6 +5606,15 @@ snapshots:
- supports-color - supports-color
- terser - terser
vite-plugin-static-copy@2.3.2(vite@5.4.21(@types/node@20.19.41)):
dependencies:
chokidar: 3.6.0
fast-glob: 3.3.3
fs-extra: 11.3.5
p-map: 7.0.4
picocolors: 1.1.1
vite: 5.4.21(@types/node@20.19.41)
vite@5.4.21(@types/node@20.19.41): vite@5.4.21(@types/node@20.19.41):
dependencies: dependencies:
esbuild: 0.21.5 esbuild: 0.21.5

View File

@@ -298,7 +298,20 @@ export function PdfSpikeViewer(props: PdfSpikeViewerProps) {
className={wrapperClasses.length > 0 ? wrapperClasses : undefined} className={wrapperClasses.length > 0 ? wrapperClasses : undefined}
style={{ height: "100%" }} style={{ height: "100%" }}
> >
<PdfLoader document={pdfUrl}> <PdfLoader
document={{
url: pdfUrl,
// Without these two, PDFs with CID fonts (most CJK + many
// European court documents) or unembedded standard fonts get
// rendered with substitute metrics, which shifts every
// text-layer span out of alignment with the canvas glyphs.
// vite.config.ts copies both directories from pdfjs-dist into
// the served root.
cMapUrl: "/cmaps/",
cMapPacked: true,
standardFontDataUrl: "/standard_fonts/",
}}
>
{(pdfDocument) => ( {(pdfDocument) => (
<PdfHighlighter <PdfHighlighter
pdfDocument={pdfDocument} pdfDocument={pdfDocument}

View File

@@ -1,12 +1,34 @@
import { defineConfig } from "vite"; import { defineConfig } from "vite";
import react from "@vitejs/plugin-react"; import react from "@vitejs/plugin-react";
import { viteStaticCopy } from "vite-plugin-static-copy";
import { fileURLToPath } from "node:url"; import { fileURLToPath } from "node:url";
import { dirname, resolve } from "node:path"; import { dirname, resolve } from "node:path";
const __dirname = dirname(fileURLToPath(import.meta.url)); const __dirname = dirname(fileURLToPath(import.meta.url));
export default defineConfig({ export default defineConfig({
plugins: [react()], plugins: [
react(),
// PDF.js needs its cmaps (character maps for CID fonts) and
// standard fonts (Helvetica/Times/Courier metrics) to position
// text-layer spans correctly. Without them, PDFs with embedded
// CID fonts or unembedded standard fonts get rendered to canvas
// OK but their textLayer overlays land in the wrong place — text
// appears unselectable or selection jumps to a different region.
// We serve both directories straight from `node_modules/pdfjs-dist`.
viteStaticCopy({
targets: [
{
src: resolve(__dirname, "node_modules/pdfjs-dist/cmaps") + "/[!.]*",
dest: "cmaps",
},
{
src: resolve(__dirname, "node_modules/pdfjs-dist/standard_fonts") + "/[!.]*",
dest: "standard_fonts",
},
],
}),
],
resolve: { resolve: {
alias: { alias: {
"@shared": resolve(__dirname, "src/shared"), "@shared": resolve(__dirname, "src/shared"),