""" Built-in extractor registration. Importing this module registers all built-in extractors with the global registry. Markitdown is registered last and unconditionally — it overrides the specialized extractors for overlapping extensions (.pdf, .html, .htm) and adds new types. If markitdown is not installed, it gives a clear install hint at extraction time (same pattern as PdfExtractor / HtmlExtractor). """ from markitect.proxy.registry import registry from markitect.proxy.extractors.pdf import PdfExtractor from markitect.proxy.extractors.html import HtmlExtractor from markitect.proxy.extractors.markdown import MarkdownNormalizer from markitect.proxy.extractors.markitdown_ext import MarkitdownExtractor # 1. Specialized extractors (baseline — available as explicit fallbacks) registry.register(PdfExtractor()) registry.register(HtmlExtractor()) registry.register(MarkdownNormalizer()) # 2. Markitdown as default backend — overrides .pdf, .html, .htm and adds # new types (.docx, .pptx, .xlsx, .xls, .csv, .json, .xml) registry.register(MarkitdownExtractor())