generated from coulomb/repo-seed
Add INTENT.md/SCOPE.md, reconcile PRD scope, rename content fingerprint
- Add INTENT.md (purpose and inviolable principles) and SCOPE.md (current operational boundary), matching the binect-js house style. - Reconcile the PRD with the shipped document-lifecycle scope: add ordering/server-sync requirements (4.3a), split the proxy queue vs. tracking-log caps (4.6.3), and update the solution summary/closing. - Rename computeMD5 -> computeContentFingerprint to be honest: it is a fast sampled non-cryptographic fingerprint for dedup, not MD5. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -8,7 +8,7 @@ import { uploadPDF, testConnection, BinectAPIError, Document } from '../utils/bi
|
||||
import { fetchPDFBytes, DetectedPDF } from '../utils/pdf-detector';
|
||||
import { addTrackingEntry } from '../tracking/tracker';
|
||||
import { DocumentProxy, PDFQueueEntry, PDFStatus, PDFStatusMeta } from '../utils/pdf-queue';
|
||||
import { computeMD5 } from '../utils/hash';
|
||||
import { computeContentFingerprint } from '../utils/hash';
|
||||
|
||||
// DOM Elements
|
||||
const authView = document.getElementById('authView')!;
|
||||
@@ -827,8 +827,8 @@ async function handleSendPDF(id: string) {
|
||||
// Fetch PDF bytes
|
||||
const pdfBytes = await fetchPDFBytes(pdf.url);
|
||||
|
||||
// Compute content hash for deduplication
|
||||
const contentHash = await computeMD5(pdfBytes);
|
||||
// Compute content fingerprint for deduplication
|
||||
const contentHash = await computeContentFingerprint(pdfBytes);
|
||||
|
||||
// Upload to Binect with credentials
|
||||
const document = await uploadPDF(
|
||||
|
||||
@@ -3,17 +3,16 @@
|
||||
*/
|
||||
|
||||
/**
|
||||
* Compute MD5 hash of an ArrayBuffer using Web Crypto API
|
||||
* Falls back to a simple hash if crypto.subtle is unavailable
|
||||
* Compute a fast, non-cryptographic content fingerprint for an ArrayBuffer.
|
||||
*
|
||||
* This is NOT a cryptographic hash (not MD5/SHA): it samples the bytes and
|
||||
* combines them with the file size. It is used only for deduplicating detected
|
||||
* PDFs, never for security. Returns a `${sizeHex}-${hashHex}` fingerprint.
|
||||
*/
|
||||
export async function computeMD5(data: ArrayBuffer): Promise<string> {
|
||||
// Web Crypto API doesn't support MD5 (it's not cryptographically secure)
|
||||
// We'll use a simple but fast hash for content identification
|
||||
// This is fine for deduplication purposes
|
||||
export async function computeContentFingerprint(data: ArrayBuffer): Promise<string> {
|
||||
const bytes = new Uint8Array(data);
|
||||
|
||||
// Use a combination of length and sampled bytes for fast hashing
|
||||
// For true MD5, we'd need a library, but this is sufficient for deduplication
|
||||
// Sample bytes (not the full buffer) and fold them together for speed.
|
||||
let hash = 0;
|
||||
const sampleSize = Math.min(bytes.length, 10000); // Sample first 10KB
|
||||
const step = Math.max(1, Math.floor(bytes.length / sampleSize));
|
||||
|
||||
Reference in New Issue
Block a user