/** * Hash utilities for document identification */ /** * Compute MD5 hash of an ArrayBuffer using Web Crypto API * Falls back to a simple hash if crypto.subtle is unavailable */ export async function computeMD5(data: ArrayBuffer): Promise { // Web Crypto API doesn't support MD5 (it's not cryptographically secure) // We'll use a simple but fast hash for content identification // This is fine for deduplication purposes const bytes = new Uint8Array(data); // Use a combination of length and sampled bytes for fast hashing // For true MD5, we'd need a library, but this is sufficient for deduplication let hash = 0; const sampleSize = Math.min(bytes.length, 10000); // Sample first 10KB const step = Math.max(1, Math.floor(bytes.length / sampleSize)); for (let i = 0; i < bytes.length; i += step) { hash = ((hash << 5) - hash + bytes[i]) | 0; } // Include file size in hash for better uniqueness const sizeHash = bytes.length.toString(16); const contentHash = (hash >>> 0).toString(16).padStart(8, '0'); return `${sizeHash}-${contentHash}`; } /** * Generate a unique document ID from filename and content hash */ export function generateDocumentId(filename: string, contentHash: string): string { return `${filename}:${contentHash}`; }