Introduces a new `markitect/proxy/` module with pluggable extractors that convert non-markdown sources (PDF, HTML) into tracked markdown proxy files. Proxy files preserve origin metadata (path, checksum, timestamp) so they can be kept in sync when the original changes. CLI commands: `proxy create`, `proxy update`, `proxy status`, `proxy extractors`. Built-in extractors: PDF (pymupdf4llm), HTML (markdownify), Markdown (built-in). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
27 lines
558 B
Python
27 lines
558 B
Python
"""
|
|
Data models for the proxy file system.
|
|
"""
|
|
|
|
from dataclasses import dataclass
|
|
|
|
|
|
@dataclass
|
|
class ProxyMetadata:
|
|
"""Metadata stored in a proxy file's YAML frontmatter."""
|
|
|
|
source_path: str
|
|
source_checksum: str # "sha256:<hex>"
|
|
source_size: int
|
|
generated_at: str # ISO 8601
|
|
extractor: str
|
|
extractor_version: str
|
|
|
|
|
|
@dataclass
|
|
class ExtractionResult:
|
|
"""Result returned by an extractor after processing a source file."""
|
|
|
|
content: str
|
|
extractor: str
|
|
extractor_version: str
|