feat(proxy): add proxy file system for non-markdown source conversion
Introduces a new `markitect/proxy/` module with pluggable extractors that convert non-markdown sources (PDF, HTML) into tracked markdown proxy files. Proxy files preserve origin metadata (path, checksum, timestamp) so they can be kept in sync when the original changes. CLI commands: `proxy create`, `proxy update`, `proxy status`, `proxy extractors`. Built-in extractors: PDF (pymupdf4llm), HTML (markdownify), Markdown (built-in). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
29
markitect/proxy/extractors/markdown.py
Normal file
29
markitect/proxy/extractors/markdown.py
Normal file
@@ -0,0 +1,29 @@
|
||||
"""
|
||||
Markdown normalizer — passes through Markdown with minimal transformation.
|
||||
|
||||
No external dependencies required.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from markitect.proxy.extractors.base import BaseExtractor
|
||||
from markitect.proxy.models import ExtractionResult
|
||||
|
||||
|
||||
class MarkdownNormalizer(BaseExtractor):
|
||||
"""Normalizes other Markdown files (built-in, no optional deps)."""
|
||||
|
||||
name = "markdown"
|
||||
version = "1.0"
|
||||
extensions = (".md", ".markdown", ".mdown")
|
||||
|
||||
def check_dependencies(self) -> bool:
|
||||
return True
|
||||
|
||||
def extract(self, source_path: Path) -> ExtractionResult:
|
||||
content = source_path.read_text(encoding="utf-8")
|
||||
return ExtractionResult(
|
||||
content=content,
|
||||
extractor=self.name,
|
||||
extractor_version=self.version,
|
||||
)
|
||||
Reference in New Issue
Block a user