fix(proxy): catch markitdown missing-dependency errors with clean hint
When markitdown is installed but a format-specific sub-dependency is missing (e.g. pdfminer-six for PDF), translate the raw traceback into a DependencyMissingError with the correct install command. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -67,7 +67,20 @@ class MarkitdownExtractor(BaseExtractor):
|
|||||||
)
|
)
|
||||||
|
|
||||||
md = MarkItDown()
|
md = MarkItDown()
|
||||||
result = md.convert(str(source_path))
|
try:
|
||||||
|
result = md.convert(str(source_path))
|
||||||
|
except Exception as exc:
|
||||||
|
# Catch markitdown's FileConversionException (and sub-type
|
||||||
|
# MissingDependencyException) and surface a clean install hint.
|
||||||
|
msg = str(exc)
|
||||||
|
if "MissingDependency" in type(exc).__name__ or "MissingDependency" in msg:
|
||||||
|
ext = source_path.suffix.lstrip(".")
|
||||||
|
raise DependencyMissingError(
|
||||||
|
f"markitdown needs an extra dependency for {ext} files.",
|
||||||
|
package=f"markitdown-no-magika[{ext}]",
|
||||||
|
install_hint=f'pip install "markitdown-no-magika[{ext}]"',
|
||||||
|
) from exc
|
||||||
|
raise
|
||||||
return ExtractionResult(
|
return ExtractionResult(
|
||||||
content=result.text_content,
|
content=result.text_content,
|
||||||
extractor=self.name,
|
extractor=self.name,
|
||||||
|
|||||||
Reference in New Issue
Block a user