Files
binect-chrome/src/utils/hash.ts
tegwick facae724bf Implement document proxy concept with archive/live views
- Add content hash (MD5) for document deduplication
- Separate local state (archived) from server state (binectStatus)
- Add archive toggle button to switch between live/archived views
- Add archive/restore/delete actions for documents
- Refactor pdf-queue.ts with DocumentProxy interface
- Add hash.ts utility for content hashing

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-16 17:11:15 +01:00

38 lines
1.3 KiB
TypeScript

/**
* Hash utilities for document identification
*/
/**
* Compute MD5 hash of an ArrayBuffer using Web Crypto API
* Falls back to a simple hash if crypto.subtle is unavailable
*/
export async function computeMD5(data: ArrayBuffer): Promise<string> {
// Web Crypto API doesn't support MD5 (it's not cryptographically secure)
// We'll use a simple but fast hash for content identification
// This is fine for deduplication purposes
const bytes = new Uint8Array(data);
// Use a combination of length and sampled bytes for fast hashing
// For true MD5, we'd need a library, but this is sufficient for deduplication
let hash = 0;
const sampleSize = Math.min(bytes.length, 10000); // Sample first 10KB
const step = Math.max(1, Math.floor(bytes.length / sampleSize));
for (let i = 0; i < bytes.length; i += step) {
hash = ((hash << 5) - hash + bytes[i]) | 0;
}
// Include file size in hash for better uniqueness
const sizeHash = bytes.length.toString(16);
const contentHash = (hash >>> 0).toString(16).padStart(8, '0');
return `${sizeHash}-${contentHash}`;
}
/**
* Generate a unique document ID from filename and content hash
*/
export function generateDocumentId(filename: string, contentHash: string): string {
return `${filename}:${contentHash}`;
}