kontextual-engine/tests/test_markitect_ingestion_adapter.py

import sys
from pathlib import Path
from types import SimpleNamespace

import pytest

from kontextual_engine import SourcePayload, SourceReference, content_digest
from kontextual_engine.adapters.markitect_tool import MarkitectMarkdownExtractor
from kontextual_engine.errors import AdapterUnavailableError


def test_markitect_markdown_extractor_missing_dependency_is_structured(monkeypatch: pytest.MonkeyPatch) -> None:
    monkeypatch.setitem(sys.modules, "markitect_tool", None)
    extractor = MarkitectMarkdownExtractor()
    payload = markdown_payload("# Missing Adapter\n")

    with pytest.raises(AdapterUnavailableError) as exc_info:
        extractor.extract(payload)

    assert exc_info.value.details == {
        "adapter": "markitect-tool",
        "media_type": "text/markdown",
    }


def test_markitect_markdown_extractor_delegates_to_markitect_tool(
    monkeypatch: pytest.MonkeyPatch,
    tmp_path: Path,
) -> None:
    source = tmp_path / "decision.md"
    source.write_text("# Decision\n\nUse Markitect.\n", encoding="utf-8")
    calls: list[tuple[str, str]] = []

    def parse_markdown_file(path: Path) -> SimpleNamespace:
        calls.append(("parse_markdown_file", str(path)))
        return SimpleNamespace(
            to_dict=lambda: {
                "frontmatter": {"status": "accepted"},
                "blocks": [
                    {"type": "heading", "text": "Decision", "line_start": 1, "heading_level": 1},
                    {"type": "paragraph", "text": "Use Markitect.", "line_start": 3},
                    {"type": "table", "text": "| A |\n| - |", "line_start": 5, "line_end": 6},
                ],
                "headings": [{"level": 1, "text": "Decision", "line": 1}],
                "sections": [
                    {
                        "heading": {"level": 1, "text": "Decision", "line": 1},
                        "blocks": [{"type": "paragraph", "text": "Use Markitect.", "line_start": 3}],
                    }
                ],
                "tokens": [
                    {
                        "type": "inline",
                        "children": [
                            {
                                "type": "link_open",
                                "attrs": {"href": "https://example.test/decision"},
                            }
                        ],
                    }
                ],
            }
        )

    def snapshot_identity_for_file(path: Path, *, parse_options: dict) -> SimpleNamespace:
        calls.append(("snapshot_identity_for_file", f"{path}:{parse_options['profile']}"))
        return SimpleNamespace(
            to_dict=lambda: {
                "snapshot_id": "snapshot:decision",
                "content_hash": "sha256:decision",
                "parser": "markdown-it-py/commonmark",
            }
        )

    monkeypatch.setitem(
        sys.modules,
        "markitect_tool",
        SimpleNamespace(
            parse_markdown_file=parse_markdown_file,
            parse_markdown=lambda text, source_path=None: None,
            snapshot_identity_for_file=snapshot_identity_for_file,
        ),
    )

    result = MarkitectMarkdownExtractor().extract(markdown_payload(source.read_text(encoding="utf-8"), source))

    assert calls == [
        ("parse_markdown_file", str(source)),
        ("snapshot_identity_for_file", f"{source}:default"),
    ]
    assert result.normalized.structure["frontmatter"] == {"status": "accepted"}
    assert result.normalized.structure["blocks"][1]["type"] == "paragraph"
    assert result.normalized.links == [
        {"url": "https://example.test/decision", "kind": "markdown_link"}
    ]
    assert result.normalized.tables[0]["text"] == "| A |\n| - |"
    assert result.normalized.fields["block_count"] == 3
    assert result.normalized.fields["heading_count"] == 1
    assert result.normalized.fields["section_count"] == 1
    assert result.normalized.fields["link_count"] == 1
    assert result.normalized.fields["table_count"] == 1
    assert result.metadata["snapshot"]["snapshot_id"] == "snapshot:decision"
    assert result.normalized.extractor_metadata["snapshot"]["parser"] == "markdown-it-py/commonmark"


def markdown_payload(markdown: str, path: Path | None = None) -> SourcePayload:
    data = markdown.encode("utf-8")
    source_ref = SourceReference(
        source_system="local_file",
        path=str(path) if path else None,
        checksum=content_digest(data),
        connector_ref=f"local_file:{path}" if path else None,
    )
    return SourcePayload(
        connector_name="local_file",
        source_uri=str(path) if path else "memory://markdown",
        source_ref=source_ref,
        media_type="text/markdown",
        content=data,
        title=path.stem if path else "Markdown",
    )