Files
markitect-tool/tests/test_query_extraction.py

208 lines
5.4 KiB
Python

from pathlib import Path
import importlib.util
import pytest
from click.testing import CliRunner
from markitect_tool.cli import main
from markitect_tool.core import parse_markdown
from markitect_tool.query import (
InvalidQueryError,
extract_document,
query_document,
query_document_jsonpath,
)
QUERY_DOC = """---
document_type: adr
status: accepted
nested:
owner: Platform
---
# Use Query Selectors
## Context
The problem is that authors need predictable extraction from Markdown.
## Decision
We will use a small selector language before adopting a larger query backend.
## Consequences
- Queries remain readable.
- Extraction can feed later transforms.
"""
def test_query_frontmatter_path():
document = parse_markdown(QUERY_DOC)
matches = query_document(document, "frontmatter.nested.owner")
assert len(matches) == 1
assert matches[0].kind == "frontmatter"
assert matches[0].path == "$.frontmatter.nested.owner"
assert matches[0].text == "Platform"
def test_query_headings_by_level():
document = parse_markdown(QUERY_DOC)
matches = query_document(document, "headings[level=2]")
assert [match.value["text"] for match in matches] == [
"Context",
"Decision",
"Consequences",
]
def test_query_sections_by_exact_heading():
document = parse_markdown(QUERY_DOC)
matches = query_document(document, "sections[heading=Decision]")
assert len(matches) == 1
assert matches[0].kind == "section"
assert matches[0].line == 14
assert matches[0].text.startswith("## Decision")
assert "small selector language" in matches[0].text
def test_query_sections_by_case_insensitive_contains():
document = parse_markdown(QUERY_DOC)
matches = query_document(document, "sections[contains~=TRANSFORMS]")
assert [match.value["heading"]["text"] for match in matches] == ["Consequences"]
def test_query_blocks_by_type():
document = parse_markdown(QUERY_DOC)
matches = query_document(document, "blocks[type=bullet_list]")
assert len(matches) == 1
assert "Queries remain readable" in matches[0].text
def test_query_metrics_path():
document = parse_markdown(QUERY_DOC)
matches = query_document(document, "metrics.document.sections")
assert matches[0].value == 4
assert matches[0].text == "4"
def test_extract_document_returns_textual_matches():
document = parse_markdown(QUERY_DOC)
extracted = extract_document(document, "sections[heading=Context]")
assert extracted == [
"## Context\n\nThe problem is that authors need predictable extraction from Markdown."
]
def test_invalid_query_reports_error():
document = parse_markdown(QUERY_DOC)
with pytest.raises(InvalidQueryError):
query_document(document, "sections[heading")
@pytest.mark.skipif(
importlib.util.find_spec("jsonpath_ng") is None,
reason="jsonpath-ng optional dependency is not installed",
)
def test_query_document_jsonpath_returns_shared_match_envelope():
document = parse_markdown(QUERY_DOC)
matches = query_document_jsonpath(document, "$.headings[?(@.level == 2)].text")
assert [match.value for match in matches] == [
"Context",
"Decision",
"Consequences",
]
assert all(match.kind == "heading_value" for match in matches)
def test_query_document_jsonpath_reports_missing_optional_dependency(monkeypatch):
document = parse_markdown(QUERY_DOC)
import builtins
real_import = builtins.__import__
def fake_import(name, *args, **kwargs):
if name.startswith("jsonpath_ng"):
raise ImportError("blocked")
return real_import(name, *args, **kwargs)
monkeypatch.setattr(builtins, "__import__", fake_import)
with pytest.raises(InvalidQueryError, match="optional `jsonpath-ng`"):
query_document_jsonpath(document, "$.headings[*].text")
def test_mkt_query_outputs_json(tmp_path: Path):
source = tmp_path / "doc.md"
source.write_text(QUERY_DOC, encoding="utf-8")
result = CliRunner().invoke(
main, ["query", str(source), "sections[heading=Decision]"]
)
assert result.exit_code == 0
assert '"count": 1' in result.output
assert "Decision" in result.output
def test_mkt_query_outputs_text(tmp_path: Path):
source = tmp_path / "doc.md"
source.write_text(QUERY_DOC, encoding="utf-8")
result = CliRunner().invoke(
main, ["query", str(source), "headings[level=2]", "--format", "text"]
)
assert result.exit_code == 0
assert "3 match(es)" in result.output
assert "## Context" in result.output
@pytest.mark.skipif(
importlib.util.find_spec("jsonpath_ng") is None,
reason="jsonpath-ng optional dependency is not installed",
)
def test_mkt_query_jsonpath_outputs_json(tmp_path: Path):
source = tmp_path / "doc.md"
source.write_text(QUERY_DOC, encoding="utf-8")
result = CliRunner().invoke(
main,
["query", str(source), "$.frontmatter.status", "--engine", "jsonpath"],
)
assert result.exit_code == 0
assert '"engine": "jsonpath"' in result.output
assert '"value": "accepted"' in result.output
def test_mkt_extract_outputs_text(tmp_path: Path):
source = tmp_path / "doc.md"
source.write_text(QUERY_DOC, encoding="utf-8")
result = CliRunner().invoke(
main, ["extract", str(source), "frontmatter.status"]
)
assert result.exit_code == 0
assert result.output.strip() == "accepted"