generated from coulomb/repo-seed
- Add content hash (MD5) for document deduplication - Separate local state (archived) from server state (binectStatus) - Add archive toggle button to switch between live/archived views - Add archive/restore/delete actions for documents - Refactor pdf-queue.ts with DocumentProxy interface - Add hash.ts utility for content hashing Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
38 lines
1.3 KiB
TypeScript
38 lines
1.3 KiB
TypeScript
/**
|
|
* Hash utilities for document identification
|
|
*/
|
|
|
|
/**
|
|
* Compute MD5 hash of an ArrayBuffer using Web Crypto API
|
|
* Falls back to a simple hash if crypto.subtle is unavailable
|
|
*/
|
|
export async function computeMD5(data: ArrayBuffer): Promise<string> {
|
|
// Web Crypto API doesn't support MD5 (it's not cryptographically secure)
|
|
// We'll use a simple but fast hash for content identification
|
|
// This is fine for deduplication purposes
|
|
const bytes = new Uint8Array(data);
|
|
|
|
// Use a combination of length and sampled bytes for fast hashing
|
|
// For true MD5, we'd need a library, but this is sufficient for deduplication
|
|
let hash = 0;
|
|
const sampleSize = Math.min(bytes.length, 10000); // Sample first 10KB
|
|
const step = Math.max(1, Math.floor(bytes.length / sampleSize));
|
|
|
|
for (let i = 0; i < bytes.length; i += step) {
|
|
hash = ((hash << 5) - hash + bytes[i]) | 0;
|
|
}
|
|
|
|
// Include file size in hash for better uniqueness
|
|
const sizeHash = bytes.length.toString(16);
|
|
const contentHash = (hash >>> 0).toString(16).padStart(8, '0');
|
|
|
|
return `${sizeHash}-${contentHash}`;
|
|
}
|
|
|
|
/**
|
|
* Generate a unique document ID from filename and content hash
|
|
*/
|
|
export function generateDocumentId(filename: string, contentHash: string): string {
|
|
return `${filename}:${contentHash}`;
|
|
}
|