generated from coulomb/repo-seed
122 lines
4.7 KiB
Python
122 lines
4.7 KiB
Python
import sys
|
|
from pathlib import Path
|
|
from types import SimpleNamespace
|
|
|
|
import pytest
|
|
|
|
from kontextual_engine import SourcePayload, SourceReference, content_digest
|
|
from kontextual_engine.adapters.markitect_tool import MarkitectMarkdownExtractor
|
|
from kontextual_engine.errors import AdapterUnavailableError
|
|
|
|
|
|
def test_markitect_markdown_extractor_missing_dependency_is_structured(monkeypatch: pytest.MonkeyPatch) -> None:
|
|
monkeypatch.setitem(sys.modules, "markitect_tool", None)
|
|
extractor = MarkitectMarkdownExtractor()
|
|
payload = markdown_payload("# Missing Adapter\n")
|
|
|
|
with pytest.raises(AdapterUnavailableError) as exc_info:
|
|
extractor.extract(payload)
|
|
|
|
assert exc_info.value.details == {
|
|
"adapter": "markitect-tool",
|
|
"media_type": "text/markdown",
|
|
}
|
|
|
|
|
|
def test_markitect_markdown_extractor_delegates_to_markitect_tool(
|
|
monkeypatch: pytest.MonkeyPatch,
|
|
tmp_path: Path,
|
|
) -> None:
|
|
source = tmp_path / "decision.md"
|
|
source.write_text("# Decision\n\nUse Markitect.\n", encoding="utf-8")
|
|
calls: list[tuple[str, str]] = []
|
|
|
|
def parse_markdown_file(path: Path) -> SimpleNamespace:
|
|
calls.append(("parse_markdown_file", str(path)))
|
|
return SimpleNamespace(
|
|
to_dict=lambda: {
|
|
"frontmatter": {"status": "accepted"},
|
|
"blocks": [
|
|
{"type": "heading", "text": "Decision", "line_start": 1, "heading_level": 1},
|
|
{"type": "paragraph", "text": "Use Markitect.", "line_start": 3},
|
|
{"type": "table", "text": "| A |\n| - |", "line_start": 5, "line_end": 6},
|
|
],
|
|
"headings": [{"level": 1, "text": "Decision", "line": 1}],
|
|
"sections": [
|
|
{
|
|
"heading": {"level": 1, "text": "Decision", "line": 1},
|
|
"blocks": [{"type": "paragraph", "text": "Use Markitect.", "line_start": 3}],
|
|
}
|
|
],
|
|
"tokens": [
|
|
{
|
|
"type": "inline",
|
|
"children": [
|
|
{
|
|
"type": "link_open",
|
|
"attrs": {"href": "https://example.test/decision"},
|
|
}
|
|
],
|
|
}
|
|
],
|
|
}
|
|
)
|
|
|
|
def snapshot_identity_for_file(path: Path, *, parse_options: dict) -> SimpleNamespace:
|
|
calls.append(("snapshot_identity_for_file", f"{path}:{parse_options['profile']}"))
|
|
return SimpleNamespace(
|
|
to_dict=lambda: {
|
|
"snapshot_id": "snapshot:decision",
|
|
"content_hash": "sha256:decision",
|
|
"parser": "markdown-it-py/commonmark",
|
|
}
|
|
)
|
|
|
|
monkeypatch.setitem(
|
|
sys.modules,
|
|
"markitect_tool",
|
|
SimpleNamespace(
|
|
parse_markdown_file=parse_markdown_file,
|
|
parse_markdown=lambda text, source_path=None: None,
|
|
snapshot_identity_for_file=snapshot_identity_for_file,
|
|
),
|
|
)
|
|
|
|
result = MarkitectMarkdownExtractor().extract(markdown_payload(source.read_text(encoding="utf-8"), source))
|
|
|
|
assert calls == [
|
|
("parse_markdown_file", str(source)),
|
|
("snapshot_identity_for_file", f"{source}:default"),
|
|
]
|
|
assert result.normalized.structure["frontmatter"] == {"status": "accepted"}
|
|
assert result.normalized.structure["blocks"][1]["type"] == "paragraph"
|
|
assert result.normalized.links == [
|
|
{"url": "https://example.test/decision", "kind": "markdown_link"}
|
|
]
|
|
assert result.normalized.tables[0]["text"] == "| A |\n| - |"
|
|
assert result.normalized.fields["block_count"] == 3
|
|
assert result.normalized.fields["heading_count"] == 1
|
|
assert result.normalized.fields["section_count"] == 1
|
|
assert result.normalized.fields["link_count"] == 1
|
|
assert result.normalized.fields["table_count"] == 1
|
|
assert result.metadata["snapshot"]["snapshot_id"] == "snapshot:decision"
|
|
assert result.normalized.extractor_metadata["snapshot"]["parser"] == "markdown-it-py/commonmark"
|
|
|
|
|
|
def markdown_payload(markdown: str, path: Path | None = None) -> SourcePayload:
|
|
data = markdown.encode("utf-8")
|
|
source_ref = SourceReference(
|
|
source_system="local_file",
|
|
path=str(path) if path else None,
|
|
checksum=content_digest(data),
|
|
connector_ref=f"local_file:{path}" if path else None,
|
|
)
|
|
return SourcePayload(
|
|
connector_name="local_file",
|
|
source_uri=str(path) if path else "memory://markdown",
|
|
source_ref=source_ref,
|
|
media_type="text/markdown",
|
|
content=data,
|
|
title=path.stem if path else "Markdown",
|
|
)
|