generated from coulomb/repo-seed
Add INTENT.md/SCOPE.md, reconcile PRD scope, rename content fingerprint
- Add INTENT.md (purpose and inviolable principles) and SCOPE.md (current operational boundary), matching the binect-js house style. - Reconcile the PRD with the shipped document-lifecycle scope: add ordering/server-sync requirements (4.3a), split the proxy queue vs. tracking-log caps (4.6.3), and update the solution summary/closing. - Rename computeMD5 -> computeContentFingerprint to be honest: it is a fast sampled non-cryptographic fingerprint for dedup, not MD5. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -3,17 +3,16 @@
|
||||
*/
|
||||
|
||||
/**
|
||||
* Compute MD5 hash of an ArrayBuffer using Web Crypto API
|
||||
* Falls back to a simple hash if crypto.subtle is unavailable
|
||||
* Compute a fast, non-cryptographic content fingerprint for an ArrayBuffer.
|
||||
*
|
||||
* This is NOT a cryptographic hash (not MD5/SHA): it samples the bytes and
|
||||
* combines them with the file size. It is used only for deduplicating detected
|
||||
* PDFs, never for security. Returns a `${sizeHex}-${hashHex}` fingerprint.
|
||||
*/
|
||||
export async function computeMD5(data: ArrayBuffer): Promise<string> {
|
||||
// Web Crypto API doesn't support MD5 (it's not cryptographically secure)
|
||||
// We'll use a simple but fast hash for content identification
|
||||
// This is fine for deduplication purposes
|
||||
export async function computeContentFingerprint(data: ArrayBuffer): Promise<string> {
|
||||
const bytes = new Uint8Array(data);
|
||||
|
||||
// Use a combination of length and sampled bytes for fast hashing
|
||||
// For true MD5, we'd need a library, but this is sufficient for deduplication
|
||||
// Sample bytes (not the full buffer) and fold them together for speed.
|
||||
let hash = 0;
|
||||
const sampleSize = Math.min(bytes.length, 10000); // Sample first 10KB
|
||||
const step = Math.max(1, Math.floor(bytes.length / sampleSize));
|
||||
|
||||
Reference in New Issue
Block a user