Files
markitect-tool/tests/test_extension_characterization.py

177 lines
5.5 KiB
Python

from pathlib import Path
import builtins
from click.testing import CliRunner
import pytest
from markitect_tool.backend import (
LocalSnapshotStore,
capability_check,
load_backend_manifest,
load_backend_registry,
local_index_path_for,
)
from markitect_tool.cli import main
from markitect_tool.core import parse_markdown
from markitect_tool.processor import ProcessorContext, run_fenced_processors
from markitect_tool.query import (
default_query_engine_registry,
InvalidQueryError,
extract_document,
query_document,
query_document_jsonpath,
)
from markitect_tool.reference import ReferenceContext, resolve_reference
CHARACTERIZATION_DOC = """---
document_type: adr
status: accepted
---
# Decision Record
## Context
Authors need stable infrastructure seams.
## Decision
Use explicit registries and processing envelopes.
"""
def test_query_selector_and_extraction_characterization():
document = parse_markdown(CHARACTERIZATION_DOC)
registry = default_query_engine_registry()
section_matches = query_document(document, "sections[heading=Decision]")
extracted = extract_document(document, "frontmatter.status")
assert registry.get("selector").descriptor.kind == "query-engine"
assert len(section_matches) == 1
assert section_matches[0].kind == "section"
assert section_matches[0].path == "$.sections[2]"
assert section_matches[0].text.startswith("## Decision")
assert extracted == ["accepted"]
def test_jsonpath_missing_dependency_diagnostic_characterization(monkeypatch):
document = parse_markdown(CHARACTERIZATION_DOC)
real_import = builtins.__import__
def fake_import(name, *args, **kwargs):
if name.startswith("jsonpath_ng"):
raise ImportError("blocked")
return real_import(name, *args, **kwargs)
monkeypatch.setattr(builtins, "__import__", fake_import)
with pytest.raises(InvalidQueryError, match="optional `jsonpath-ng`"):
query_document_jsonpath(document, "$.headings[*].text")
def test_processor_registry_result_provenance_characterization():
markdown = """```mkt-uppercase {#shout}
hello
```
"""
run = run_fenced_processors(markdown, context=ProcessorContext())
assert run.valid
assert run.blocks[0].processor == "uppercase"
assert run.blocks[0].unit_id == "shout"
assert run.results[0].content == "HELLO\n"
assert run.results[0].provenance[0].operation == "processor.uppercase"
def test_unknown_processor_diagnostic_characterization():
markdown = """```mkt-missing {#x}
content
```
"""
run = run_fenced_processors(markdown, context=ProcessorContext())
assert not run.valid
diagnostic = run.results[0].diagnostics[0].to_dict()
assert diagnostic["severity"] == "error"
assert diagnostic["code"] == "processor.unknown"
assert "Unknown processor" in diagnostic["message"]
def test_backend_manifest_registry_characterization():
manifest = load_backend_manifest("examples/backends/local-sqlite-backend.md")
registry = load_backend_registry(["examples/backends"])
check = capability_check(manifest, ["snapshots", "fts", "provenance"])
assert manifest.id == "local-sqlite-cache"
assert registry.get("local-sqlite-cache").storage["engine"] == "sqlite"
assert check.compatible
def test_local_index_snapshot_query_search_characterization(tmp_path: Path):
source = tmp_path / "doc.md"
source.write_text(CHARACTERIZATION_DOC, encoding="utf-8")
store = LocalSnapshotStore(local_index_path_for(tmp_path))
build = store.build([tmp_path], root=tmp_path)
state = store.load_state()[0]
document = store.get_document("doc.md")
search_results = store.search("registries")
assert build.parsed == ["doc.md"]
assert state.path == "doc.md"
assert state.snapshot_id.startswith("snapshot:")
assert document["headings"][0]["text"] == "Decision Record"
assert search_results[0].path == "doc.md"
assert search_results[0].unit_kind in {"section", "block"}
def test_reference_resolution_characterization(tmp_path: Path):
context_file = tmp_path / "context.md"
target_file = tmp_path / "target.md"
context_file.write_text("# Context\n", encoding="utf-8")
target_file.write_text("# Target\n\n## Decision\n\nChosen text.\n", encoding="utf-8")
context = ReferenceContext(root=tmp_path, current_path=context_file)
resolution = resolve_reference("target.md#decision", context=context)
assert resolution.target_path == str(target_file.resolve())
assert resolution.units[0].kind == "section"
assert resolution.units[0].unit_id == "decision"
assert "Chosen text" in resolution.units[0].text
def test_cli_output_envelopes_characterization(tmp_path: Path):
source = tmp_path / "doc.md"
source.write_text(CHARACTERIZATION_DOC, encoding="utf-8")
runner = CliRunner()
query = runner.invoke(
main,
["query", str(source), "sections[heading=Decision]", "--format", "json"],
)
index = runner.invoke(main, ["cache", "index", str(tmp_path), "--root", str(tmp_path)])
cache_query = runner.invoke(
main,
[
"cache",
"query",
"frontmatter.status",
"--root",
str(tmp_path),
"--format",
"json",
],
)
assert query.exit_code == 0
assert '"engine": "selector"' in query.output
assert '"count": 1' in query.output
assert index.exit_code == 0
assert "parsed: 1" in index.output
assert cache_query.exit_code == 0
assert '"source_path": "doc.md"' in cache_query.output