From 120ed89780345e54dc87c82aecc12bf6fc528170 Mon Sep 17 00:00:00 2001 From: tegwick Date: Fri, 13 Feb 2026 21:00:51 +0100 Subject: [PATCH] fix(proxy): catch markitdown missing-dependency errors with clean hint When markitdown is installed but a format-specific sub-dependency is missing (e.g. pdfminer-six for PDF), translate the raw traceback into a DependencyMissingError with the correct install command. Co-Authored-By: Claude Opus 4.6 --- markitect/proxy/extractors/markitdown_ext.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/markitect/proxy/extractors/markitdown_ext.py b/markitect/proxy/extractors/markitdown_ext.py index f2e2de93..dc31fd7b 100644 --- a/markitect/proxy/extractors/markitdown_ext.py +++ b/markitect/proxy/extractors/markitdown_ext.py @@ -67,7 +67,20 @@ class MarkitdownExtractor(BaseExtractor): ) md = MarkItDown() - result = md.convert(str(source_path)) + try: + result = md.convert(str(source_path)) + except Exception as exc: + # Catch markitdown's FileConversionException (and sub-type + # MissingDependencyException) and surface a clean install hint. + msg = str(exc) + if "MissingDependency" in type(exc).__name__ or "MissingDependency" in msg: + ext = source_path.suffix.lstrip(".") + raise DependencyMissingError( + f"markitdown needs an extra dependency for {ext} files.", + package=f"markitdown-no-magika[{ext}]", + install_hint=f'pip install "markitdown-no-magika[{ext}]"', + ) from exc + raise return ExtractionResult( content=result.text_content, extractor=self.name,