feat(proxy): add markitdown as default proxy backend

Uses markitdown-no-magika (lighter fork without magika/onnxruntime) to
handle PDF, HTML, DOCX, PPTX, XLSX, XLS, CSV, JSON, and XML files.
Specialized extractors (pymupdf4llm, markdownify) remain as fallbacks
when markitdown is not installed.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-13 20:48:47 +01:00
parent ac334c679d
commit e4fbba8a57
3 changed files with 93 additions and 1 deletions

View File

@@ -34,7 +34,8 @@ development = [
]
proxy-pdf = ["pymupdf4llm>=0.0.10"]
proxy-html = ["markdownify>=0.13.1"]
proxy = ["pymupdf4llm>=0.0.10", "markdownify>=0.13.1"]
proxy-markitdown = ["markitdown-no-magika[pdf]"]
proxy = ["markitdown-no-magika[pdf]"]
[project.scripts]
markitect = "markitect.cli:main"