generated from coulomb/repo-seed
208 lines
5.4 KiB
Python
208 lines
5.4 KiB
Python
from pathlib import Path
|
|
import importlib.util
|
|
|
|
import pytest
|
|
from click.testing import CliRunner
|
|
|
|
from markitect_tool.cli import main
|
|
from markitect_tool.core import parse_markdown
|
|
from markitect_tool.query import (
|
|
InvalidQueryError,
|
|
extract_document,
|
|
query_document,
|
|
query_document_jsonpath,
|
|
)
|
|
|
|
|
|
QUERY_DOC = """---
|
|
document_type: adr
|
|
status: accepted
|
|
nested:
|
|
owner: Platform
|
|
---
|
|
|
|
# Use Query Selectors
|
|
|
|
## Context
|
|
|
|
The problem is that authors need predictable extraction from Markdown.
|
|
|
|
## Decision
|
|
|
|
We will use a small selector language before adopting a larger query backend.
|
|
|
|
## Consequences
|
|
|
|
- Queries remain readable.
|
|
- Extraction can feed later transforms.
|
|
"""
|
|
|
|
|
|
def test_query_frontmatter_path():
|
|
document = parse_markdown(QUERY_DOC)
|
|
|
|
matches = query_document(document, "frontmatter.nested.owner")
|
|
|
|
assert len(matches) == 1
|
|
assert matches[0].kind == "frontmatter"
|
|
assert matches[0].path == "$.frontmatter.nested.owner"
|
|
assert matches[0].text == "Platform"
|
|
|
|
|
|
def test_query_headings_by_level():
|
|
document = parse_markdown(QUERY_DOC)
|
|
|
|
matches = query_document(document, "headings[level=2]")
|
|
|
|
assert [match.value["text"] for match in matches] == [
|
|
"Context",
|
|
"Decision",
|
|
"Consequences",
|
|
]
|
|
|
|
|
|
def test_query_sections_by_exact_heading():
|
|
document = parse_markdown(QUERY_DOC)
|
|
|
|
matches = query_document(document, "sections[heading=Decision]")
|
|
|
|
assert len(matches) == 1
|
|
assert matches[0].kind == "section"
|
|
assert matches[0].line == 14
|
|
assert matches[0].text.startswith("## Decision")
|
|
assert "small selector language" in matches[0].text
|
|
|
|
|
|
def test_query_sections_by_case_insensitive_contains():
|
|
document = parse_markdown(QUERY_DOC)
|
|
|
|
matches = query_document(document, "sections[contains~=TRANSFORMS]")
|
|
|
|
assert [match.value["heading"]["text"] for match in matches] == ["Consequences"]
|
|
|
|
|
|
def test_query_blocks_by_type():
|
|
document = parse_markdown(QUERY_DOC)
|
|
|
|
matches = query_document(document, "blocks[type=bullet_list]")
|
|
|
|
assert len(matches) == 1
|
|
assert "Queries remain readable" in matches[0].text
|
|
|
|
|
|
def test_query_metrics_path():
|
|
document = parse_markdown(QUERY_DOC)
|
|
|
|
matches = query_document(document, "metrics.document.sections")
|
|
|
|
assert matches[0].value == 4
|
|
assert matches[0].text == "4"
|
|
|
|
|
|
def test_extract_document_returns_textual_matches():
|
|
document = parse_markdown(QUERY_DOC)
|
|
|
|
extracted = extract_document(document, "sections[heading=Context]")
|
|
|
|
assert extracted == [
|
|
"## Context\n\nThe problem is that authors need predictable extraction from Markdown."
|
|
]
|
|
|
|
|
|
def test_invalid_query_reports_error():
|
|
document = parse_markdown(QUERY_DOC)
|
|
|
|
with pytest.raises(InvalidQueryError):
|
|
query_document(document, "sections[heading")
|
|
|
|
|
|
@pytest.mark.skipif(
|
|
importlib.util.find_spec("jsonpath_ng") is None,
|
|
reason="jsonpath-ng optional dependency is not installed",
|
|
)
|
|
def test_query_document_jsonpath_returns_shared_match_envelope():
|
|
document = parse_markdown(QUERY_DOC)
|
|
|
|
matches = query_document_jsonpath(document, "$.headings[?(@.level == 2)].text")
|
|
|
|
assert [match.value for match in matches] == [
|
|
"Context",
|
|
"Decision",
|
|
"Consequences",
|
|
]
|
|
assert all(match.kind == "heading_value" for match in matches)
|
|
|
|
|
|
def test_query_document_jsonpath_reports_missing_optional_dependency(monkeypatch):
|
|
document = parse_markdown(QUERY_DOC)
|
|
|
|
import builtins
|
|
|
|
real_import = builtins.__import__
|
|
|
|
def fake_import(name, *args, **kwargs):
|
|
if name.startswith("jsonpath_ng"):
|
|
raise ImportError("blocked")
|
|
return real_import(name, *args, **kwargs)
|
|
|
|
monkeypatch.setattr(builtins, "__import__", fake_import)
|
|
|
|
with pytest.raises(InvalidQueryError, match="optional `jsonpath-ng`"):
|
|
query_document_jsonpath(document, "$.headings[*].text")
|
|
|
|
|
|
def test_mkt_query_outputs_json(tmp_path: Path):
|
|
source = tmp_path / "doc.md"
|
|
source.write_text(QUERY_DOC, encoding="utf-8")
|
|
|
|
result = CliRunner().invoke(
|
|
main, ["query", str(source), "sections[heading=Decision]"]
|
|
)
|
|
|
|
assert result.exit_code == 0
|
|
assert '"count": 1' in result.output
|
|
assert "Decision" in result.output
|
|
|
|
|
|
def test_mkt_query_outputs_text(tmp_path: Path):
|
|
source = tmp_path / "doc.md"
|
|
source.write_text(QUERY_DOC, encoding="utf-8")
|
|
|
|
result = CliRunner().invoke(
|
|
main, ["query", str(source), "headings[level=2]", "--format", "text"]
|
|
)
|
|
|
|
assert result.exit_code == 0
|
|
assert "3 match(es)" in result.output
|
|
assert "## Context" in result.output
|
|
|
|
|
|
@pytest.mark.skipif(
|
|
importlib.util.find_spec("jsonpath_ng") is None,
|
|
reason="jsonpath-ng optional dependency is not installed",
|
|
)
|
|
def test_mkt_query_jsonpath_outputs_json(tmp_path: Path):
|
|
source = tmp_path / "doc.md"
|
|
source.write_text(QUERY_DOC, encoding="utf-8")
|
|
|
|
result = CliRunner().invoke(
|
|
main,
|
|
["query", str(source), "$.frontmatter.status", "--engine", "jsonpath"],
|
|
)
|
|
|
|
assert result.exit_code == 0
|
|
assert '"engine": "jsonpath"' in result.output
|
|
assert '"value": "accepted"' in result.output
|
|
|
|
|
|
def test_mkt_extract_outputs_text(tmp_path: Path):
|
|
source = tmp_path / "doc.md"
|
|
source.write_text(QUERY_DOC, encoding="utf-8")
|
|
|
|
result = CliRunner().invoke(
|
|
main, ["extract", str(source), "frontmatter.status"]
|
|
)
|
|
|
|
assert result.exit_code == 0
|
|
assert result.output.strip() == "accepted"
|