diff --git a/markitect/proxy/extractors/markitdown_ext.py b/markitect/proxy/extractors/markitdown_ext.py index f2e2de93..dc31fd7b 100644 --- a/markitect/proxy/extractors/markitdown_ext.py +++ b/markitect/proxy/extractors/markitdown_ext.py @@ -67,7 +67,20 @@ class MarkitdownExtractor(BaseExtractor): ) md = MarkItDown() - result = md.convert(str(source_path)) + try: + result = md.convert(str(source_path)) + except Exception as exc: + # Catch markitdown's FileConversionException (and sub-type + # MissingDependencyException) and surface a clean install hint. + msg = str(exc) + if "MissingDependency" in type(exc).__name__ or "MissingDependency" in msg: + ext = source_path.suffix.lstrip(".") + raise DependencyMissingError( + f"markitdown needs an extra dependency for {ext} files.", + package=f"markitdown-no-magika[{ext}]", + install_hint=f'pip install "markitdown-no-magika[{ext}]"', + ) from exc + raise return ExtractionResult( content=result.text_content, extractor=self.name,