generated from coulomb/repo-seed
Workplan dependencies and prio for text research lab workplans
This commit is contained in:
148
tests/test_query_extraction.py
Normal file
148
tests/test_query_extraction.py
Normal file
@@ -0,0 +1,148 @@
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from click.testing import CliRunner
|
||||
|
||||
from markitect_tool.cli import main
|
||||
from markitect_tool.core import parse_markdown
|
||||
from markitect_tool.query import InvalidQueryError, extract_document, query_document
|
||||
|
||||
|
||||
QUERY_DOC = """---
|
||||
document_type: adr
|
||||
status: accepted
|
||||
nested:
|
||||
owner: Platform
|
||||
---
|
||||
|
||||
# Use Query Selectors
|
||||
|
||||
## Context
|
||||
|
||||
The problem is that authors need predictable extraction from Markdown.
|
||||
|
||||
## Decision
|
||||
|
||||
We will use a small selector language before adopting a larger query backend.
|
||||
|
||||
## Consequences
|
||||
|
||||
- Queries remain readable.
|
||||
- Extraction can feed later transforms.
|
||||
"""
|
||||
|
||||
|
||||
def test_query_frontmatter_path():
|
||||
document = parse_markdown(QUERY_DOC)
|
||||
|
||||
matches = query_document(document, "frontmatter.nested.owner")
|
||||
|
||||
assert len(matches) == 1
|
||||
assert matches[0].kind == "frontmatter"
|
||||
assert matches[0].path == "$.frontmatter.nested.owner"
|
||||
assert matches[0].text == "Platform"
|
||||
|
||||
|
||||
def test_query_headings_by_level():
|
||||
document = parse_markdown(QUERY_DOC)
|
||||
|
||||
matches = query_document(document, "headings[level=2]")
|
||||
|
||||
assert [match.value["text"] for match in matches] == [
|
||||
"Context",
|
||||
"Decision",
|
||||
"Consequences",
|
||||
]
|
||||
|
||||
|
||||
def test_query_sections_by_exact_heading():
|
||||
document = parse_markdown(QUERY_DOC)
|
||||
|
||||
matches = query_document(document, "sections[heading=Decision]")
|
||||
|
||||
assert len(matches) == 1
|
||||
assert matches[0].kind == "section"
|
||||
assert matches[0].line == 14
|
||||
assert matches[0].text.startswith("## Decision")
|
||||
assert "small selector language" in matches[0].text
|
||||
|
||||
|
||||
def test_query_sections_by_case_insensitive_contains():
|
||||
document = parse_markdown(QUERY_DOC)
|
||||
|
||||
matches = query_document(document, "sections[contains~=TRANSFORMS]")
|
||||
|
||||
assert [match.value["heading"]["text"] for match in matches] == ["Consequences"]
|
||||
|
||||
|
||||
def test_query_blocks_by_type():
|
||||
document = parse_markdown(QUERY_DOC)
|
||||
|
||||
matches = query_document(document, "blocks[type=bullet_list]")
|
||||
|
||||
assert len(matches) == 1
|
||||
assert "Queries remain readable" in matches[0].text
|
||||
|
||||
|
||||
def test_query_metrics_path():
|
||||
document = parse_markdown(QUERY_DOC)
|
||||
|
||||
matches = query_document(document, "metrics.document.sections")
|
||||
|
||||
assert matches[0].value == 4
|
||||
assert matches[0].text == "4"
|
||||
|
||||
|
||||
def test_extract_document_returns_textual_matches():
|
||||
document = parse_markdown(QUERY_DOC)
|
||||
|
||||
extracted = extract_document(document, "sections[heading=Context]")
|
||||
|
||||
assert extracted == [
|
||||
"## Context\n\nThe problem is that authors need predictable extraction from Markdown."
|
||||
]
|
||||
|
||||
|
||||
def test_invalid_query_reports_error():
|
||||
document = parse_markdown(QUERY_DOC)
|
||||
|
||||
with pytest.raises(InvalidQueryError):
|
||||
query_document(document, "sections[heading")
|
||||
|
||||
|
||||
def test_mkt_query_outputs_json(tmp_path: Path):
|
||||
source = tmp_path / "doc.md"
|
||||
source.write_text(QUERY_DOC, encoding="utf-8")
|
||||
|
||||
result = CliRunner().invoke(
|
||||
main, ["query", str(source), "sections[heading=Decision]"]
|
||||
)
|
||||
|
||||
assert result.exit_code == 0
|
||||
assert '"count": 1' in result.output
|
||||
assert "Decision" in result.output
|
||||
|
||||
|
||||
def test_mkt_query_outputs_text(tmp_path: Path):
|
||||
source = tmp_path / "doc.md"
|
||||
source.write_text(QUERY_DOC, encoding="utf-8")
|
||||
|
||||
result = CliRunner().invoke(
|
||||
main, ["query", str(source), "headings[level=2]", "--format", "text"]
|
||||
)
|
||||
|
||||
assert result.exit_code == 0
|
||||
assert "3 match(es)" in result.output
|
||||
assert "## Context" in result.output
|
||||
|
||||
|
||||
def test_mkt_extract_outputs_text(tmp_path: Path):
|
||||
source = tmp_path / "doc.md"
|
||||
source.write_text(QUERY_DOC, encoding="utf-8")
|
||||
|
||||
result = CliRunner().invoke(
|
||||
main, ["extract", str(source), "frontmatter.status"]
|
||||
)
|
||||
|
||||
assert result.exit_code == 0
|
||||
assert result.output.strip() == "accepted"
|
||||
Reference in New Issue
Block a user