""" Built-in extractor registration. Importing this module registers all built-in extractors with the global registry. Registration order matters: specialized extractors are registered first, then markitdown (if available) overwrites the overlapping extensions so it becomes the default backend. If markitdown is not installed, the specialized extractors remain active for their extensions. """ from markitect.proxy.registry import registry from markitect.proxy.extractors.pdf import PdfExtractor from markitect.proxy.extractors.html import HtmlExtractor from markitect.proxy.extractors.markdown import MarkdownNormalizer # 1. Specialized extractors (baseline) registry.register(PdfExtractor()) registry.register(HtmlExtractor()) registry.register(MarkdownNormalizer()) # 2. Markitdown as default backend — overrides .pdf, .html, .htm and adds # new types (.docx, .pptx, .xlsx, .xls, .csv, .json, .xml) try: from markitect.proxy.extractors.markitdown_ext import MarkitdownExtractor _ext = MarkitdownExtractor() if _ext.check_dependencies(): registry.register(_ext) except ImportError: pass