Workplan dependencies and prio for text research lab workplans

2026-05-04 00:12:07 +02:00
parent 4fc891c076
commit 6f0facd744
18 changed files with 1644 additions and 1 deletions
--- a/tests/test_query_extraction.py
+++ b/tests/test_query_extraction.py
@@ -0,0 +1,148 @@
+from pathlib import Path
+
+import pytest
+from click.testing import CliRunner
+
+from markitect_tool.cli import main
+from markitect_tool.core import parse_markdown
+from markitect_tool.query import InvalidQueryError, extract_document, query_document
+
+
+QUERY_DOC = """---
+document_type: adr
+status: accepted
+nested:
+  owner: Platform
+---
+
+# Use Query Selectors
+
+## Context
+
+The problem is that authors need predictable extraction from Markdown.
+
+## Decision
+
+We will use a small selector language before adopting a larger query backend.
+
+## Consequences
+
+- Queries remain readable.
+- Extraction can feed later transforms.
+"""
+
+
+def test_query_frontmatter_path():
+    document = parse_markdown(QUERY_DOC)
+
+    matches = query_document(document, "frontmatter.nested.owner")
+
+    assert len(matches) == 1
+    assert matches[0].kind == "frontmatter"
+    assert matches[0].path == "$.frontmatter.nested.owner"
+    assert matches[0].text == "Platform"
+
+
+def test_query_headings_by_level():
+    document = parse_markdown(QUERY_DOC)
+
+    matches = query_document(document, "headings[level=2]")
+
+    assert [match.value["text"] for match in matches] == [
+        "Context",
+        "Decision",
+        "Consequences",
+    ]
+
+
+def test_query_sections_by_exact_heading():
+    document = parse_markdown(QUERY_DOC)
+
+    matches = query_document(document, "sections[heading=Decision]")
+
+    assert len(matches) == 1
+    assert matches[0].kind == "section"
+    assert matches[0].line == 14
+    assert matches[0].text.startswith("## Decision")
+    assert "small selector language" in matches[0].text
+
+
+def test_query_sections_by_case_insensitive_contains():
+    document = parse_markdown(QUERY_DOC)
+
+    matches = query_document(document, "sections[contains~=TRANSFORMS]")
+
+    assert [match.value["heading"]["text"] for match in matches] == ["Consequences"]
+
+
+def test_query_blocks_by_type():
+    document = parse_markdown(QUERY_DOC)
+
+    matches = query_document(document, "blocks[type=bullet_list]")
+
+    assert len(matches) == 1
+    assert "Queries remain readable" in matches[0].text
+
+
+def test_query_metrics_path():
+    document = parse_markdown(QUERY_DOC)
+
+    matches = query_document(document, "metrics.document.sections")
+
+    assert matches[0].value == 4
+    assert matches[0].text == "4"
+
+
+def test_extract_document_returns_textual_matches():
+    document = parse_markdown(QUERY_DOC)
+
+    extracted = extract_document(document, "sections[heading=Context]")
+
+    assert extracted == [
+        "## Context\n\nThe problem is that authors need predictable extraction from Markdown."
+    ]
+
+
+def test_invalid_query_reports_error():
+    document = parse_markdown(QUERY_DOC)
+
+    with pytest.raises(InvalidQueryError):
+        query_document(document, "sections[heading")
+
+
+def test_mkt_query_outputs_json(tmp_path: Path):
+    source = tmp_path / "doc.md"
+    source.write_text(QUERY_DOC, encoding="utf-8")
+
+    result = CliRunner().invoke(
+        main, ["query", str(source), "sections[heading=Decision]"]
+    )
+
+    assert result.exit_code == 0
+    assert '"count": 1' in result.output
+    assert "Decision" in result.output
+
+
+def test_mkt_query_outputs_text(tmp_path: Path):
+    source = tmp_path / "doc.md"
+    source.write_text(QUERY_DOC, encoding="utf-8")
+
+    result = CliRunner().invoke(
+        main, ["query", str(source), "headings[level=2]", "--format", "text"]
+    )
+
+    assert result.exit_code == 0
+    assert "3 match(es)" in result.output
+    assert "## Context" in result.output
+
+
+def test_mkt_extract_outputs_text(tmp_path: Path):
+    source = tmp_path / "doc.md"
+    source.write_text(QUERY_DOC, encoding="utf-8")
+
+    result = CliRunner().invoke(
+        main, ["extract", str(source), "frontmatter.status"]
+    )
+
+    assert result.exit_code == 0
+    assert result.output.strip() == "accepted"