Add INTENT.md/SCOPE.md, reconcile PRD scope, rename content fingerprint

- Add INTENT.md (purpose and inviolable principles) and SCOPE.md
  (current operational boundary), matching the binect-js house style.
- Reconcile the PRD with the shipped document-lifecycle scope: add
  ordering/server-sync requirements (4.3a), split the proxy queue vs.
  tracking-log caps (4.6.3), and update the solution summary/closing.
- Rename computeMD5 -> computeContentFingerprint to be honest: it is a
  fast sampled non-cryptographic fingerprint for dedup, not MD5.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-22 21:42:57 +02:00
parent a1597c23fa
commit 9a42001972
5 changed files with 287 additions and 20 deletions

View File

@@ -8,7 +8,7 @@ import { uploadPDF, testConnection, BinectAPIError, Document } from '../utils/bi
import { fetchPDFBytes, DetectedPDF } from '../utils/pdf-detector';
import { addTrackingEntry } from '../tracking/tracker';
import { DocumentProxy, PDFQueueEntry, PDFStatus, PDFStatusMeta } from '../utils/pdf-queue';
import { computeMD5 } from '../utils/hash';
import { computeContentFingerprint } from '../utils/hash';
// DOM Elements
const authView = document.getElementById('authView')!;
@@ -827,8 +827,8 @@ async function handleSendPDF(id: string) {
// Fetch PDF bytes
const pdfBytes = await fetchPDFBytes(pdf.url);
// Compute content hash for deduplication
const contentHash = await computeMD5(pdfBytes);
// Compute content fingerprint for deduplication
const contentHash = await computeContentFingerprint(pdfBytes);
// Upload to Binect with credentials
const document = await uploadPDF(

View File

@@ -3,17 +3,16 @@
*/
/**
* Compute MD5 hash of an ArrayBuffer using Web Crypto API
* Falls back to a simple hash if crypto.subtle is unavailable
* Compute a fast, non-cryptographic content fingerprint for an ArrayBuffer.
*
* This is NOT a cryptographic hash (not MD5/SHA): it samples the bytes and
* combines them with the file size. It is used only for deduplicating detected
* PDFs, never for security. Returns a `${sizeHex}-${hashHex}` fingerprint.
*/
export async function computeMD5(data: ArrayBuffer): Promise<string> {
// Web Crypto API doesn't support MD5 (it's not cryptographically secure)
// We'll use a simple but fast hash for content identification
// This is fine for deduplication purposes
export async function computeContentFingerprint(data: ArrayBuffer): Promise<string> {
const bytes = new Uint8Array(data);
// Use a combination of length and sampled bytes for fast hashing
// For true MD5, we'd need a library, but this is sufficient for deduplication
// Sample bytes (not the full buffer) and fold them together for speed.
let hash = 0;
const sampleSize = Math.min(bytes.length, 10000); // Sample first 10KB
const step = Math.max(1, Math.floor(bytes.length / sampleSize));