import sys from pathlib import Path from types import SimpleNamespace import pytest from kontextual_engine import SourcePayload, SourceReference, content_digest from kontextual_engine.adapters.markitect_tool import MarkitectMarkdownExtractor from kontextual_engine.errors import AdapterUnavailableError def test_markitect_markdown_extractor_missing_dependency_is_structured(monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setitem(sys.modules, "markitect_tool", None) extractor = MarkitectMarkdownExtractor() payload = markdown_payload("# Missing Adapter\n") with pytest.raises(AdapterUnavailableError) as exc_info: extractor.extract(payload) assert exc_info.value.details == { "adapter": "markitect-tool", "media_type": "text/markdown", } def test_markitect_markdown_extractor_delegates_to_markitect_tool( monkeypatch: pytest.MonkeyPatch, tmp_path: Path, ) -> None: source = tmp_path / "decision.md" source.write_text("# Decision\n\nUse Markitect.\n", encoding="utf-8") calls: list[tuple[str, str]] = [] def parse_markdown_file(path: Path) -> SimpleNamespace: calls.append(("parse_markdown_file", str(path))) return SimpleNamespace( to_dict=lambda: { "frontmatter": {"status": "accepted"}, "blocks": [ {"type": "heading", "text": "Decision", "line_start": 1, "heading_level": 1}, {"type": "paragraph", "text": "Use Markitect.", "line_start": 3}, {"type": "table", "text": "| A |\n| - |", "line_start": 5, "line_end": 6}, ], "headings": [{"level": 1, "text": "Decision", "line": 1}], "sections": [ { "heading": {"level": 1, "text": "Decision", "line": 1}, "blocks": [{"type": "paragraph", "text": "Use Markitect.", "line_start": 3}], } ], "tokens": [ { "type": "inline", "children": [ { "type": "link_open", "attrs": {"href": "https://example.test/decision"}, } ], } ], } ) def snapshot_identity_for_file(path: Path, *, parse_options: dict) -> SimpleNamespace: calls.append(("snapshot_identity_for_file", f"{path}:{parse_options['profile']}")) return SimpleNamespace( to_dict=lambda: { "snapshot_id": "snapshot:decision", "content_hash": "sha256:decision", "parser": "markdown-it-py/commonmark", } ) monkeypatch.setitem( sys.modules, "markitect_tool", SimpleNamespace( parse_markdown_file=parse_markdown_file, parse_markdown=lambda text, source_path=None: None, snapshot_identity_for_file=snapshot_identity_for_file, ), ) result = MarkitectMarkdownExtractor().extract(markdown_payload(source.read_text(encoding="utf-8"), source)) assert calls == [ ("parse_markdown_file", str(source)), ("snapshot_identity_for_file", f"{source}:default"), ] assert result.normalized.structure["frontmatter"] == {"status": "accepted"} assert result.normalized.structure["blocks"][1]["type"] == "paragraph" assert result.normalized.links == [ {"url": "https://example.test/decision", "kind": "markdown_link"} ] assert result.normalized.tables[0]["text"] == "| A |\n| - |" assert result.normalized.fields["block_count"] == 3 assert result.normalized.fields["heading_count"] == 1 assert result.normalized.fields["section_count"] == 1 assert result.normalized.fields["link_count"] == 1 assert result.normalized.fields["table_count"] == 1 assert result.metadata["snapshot"]["snapshot_id"] == "snapshot:decision" assert result.normalized.extractor_metadata["snapshot"]["parser"] == "markdown-it-py/commonmark" def markdown_payload(markdown: str, path: Path | None = None) -> SourcePayload: data = markdown.encode("utf-8") source_ref = SourceReference( source_system="local_file", path=str(path) if path else None, checksum=content_digest(data), connector_ref=f"local_file:{path}" if path else None, ) return SourcePayload( connector_name="local_file", source_uri=str(path) if path else "memory://markdown", source_ref=source_ref, media_type="text/markdown", content=data, title=path.stem if path else "Markdown", )