from pathlib import Path import builtins from click.testing import CliRunner import pytest from markitect_tool.backend import ( LocalSnapshotStore, capability_check, load_backend_manifest, load_backend_registry, local_index_path_for, ) from markitect_tool.cli import main from markitect_tool.core import parse_markdown from markitect_tool.processor import ProcessorContext, run_fenced_processors from markitect_tool.query import ( default_query_engine_registry, InvalidQueryError, extract_document, query_document, query_document_jsonpath, ) from markitect_tool.reference import ReferenceContext, resolve_reference CHARACTERIZATION_DOC = """--- document_type: adr status: accepted --- # Decision Record ## Context Authors need stable infrastructure seams. ## Decision Use explicit registries and processing envelopes. """ def test_query_selector_and_extraction_characterization(): document = parse_markdown(CHARACTERIZATION_DOC) registry = default_query_engine_registry() section_matches = query_document(document, "sections[heading=Decision]") extracted = extract_document(document, "frontmatter.status") assert registry.get("selector").descriptor.kind == "query-engine" assert len(section_matches) == 1 assert section_matches[0].kind == "section" assert section_matches[0].path == "$.sections[2]" assert section_matches[0].text.startswith("## Decision") assert extracted == ["accepted"] def test_jsonpath_missing_dependency_diagnostic_characterization(monkeypatch): document = parse_markdown(CHARACTERIZATION_DOC) real_import = builtins.__import__ def fake_import(name, *args, **kwargs): if name.startswith("jsonpath_ng"): raise ImportError("blocked") return real_import(name, *args, **kwargs) monkeypatch.setattr(builtins, "__import__", fake_import) with pytest.raises(InvalidQueryError, match="optional `jsonpath-ng`"): query_document_jsonpath(document, "$.headings[*].text") def test_processor_registry_result_provenance_characterization(): markdown = """```mkt-uppercase {#shout} hello ``` """ run = run_fenced_processors(markdown, context=ProcessorContext()) assert run.valid assert run.blocks[0].processor == "uppercase" assert run.blocks[0].unit_id == "shout" assert run.results[0].content == "HELLO\n" assert run.results[0].provenance[0].operation == "processor.uppercase" def test_unknown_processor_diagnostic_characterization(): markdown = """```mkt-missing {#x} content ``` """ run = run_fenced_processors(markdown, context=ProcessorContext()) assert not run.valid diagnostic = run.results[0].diagnostics[0].to_dict() assert diagnostic["severity"] == "error" assert diagnostic["code"] == "processor.unknown" assert "Unknown processor" in diagnostic["message"] def test_backend_manifest_registry_characterization(): manifest = load_backend_manifest("examples/backends/local-sqlite-backend.md") registry = load_backend_registry(["examples/backends"]) check = capability_check(manifest, ["snapshots", "fts", "provenance"]) assert manifest.id == "local-sqlite-cache" assert registry.get("local-sqlite-cache").storage["engine"] == "sqlite" assert check.compatible def test_local_index_snapshot_query_search_characterization(tmp_path: Path): source = tmp_path / "doc.md" source.write_text(CHARACTERIZATION_DOC, encoding="utf-8") store = LocalSnapshotStore(local_index_path_for(tmp_path)) build = store.build([tmp_path], root=tmp_path) state = store.load_state()[0] document = store.get_document("doc.md") search_results = store.search("registries") assert build.parsed == ["doc.md"] assert state.path == "doc.md" assert state.snapshot_id.startswith("snapshot:") assert document["headings"][0]["text"] == "Decision Record" assert search_results[0].path == "doc.md" assert search_results[0].unit_kind in {"section", "block"} def test_reference_resolution_characterization(tmp_path: Path): context_file = tmp_path / "context.md" target_file = tmp_path / "target.md" context_file.write_text("# Context\n", encoding="utf-8") target_file.write_text("# Target\n\n## Decision\n\nChosen text.\n", encoding="utf-8") context = ReferenceContext(root=tmp_path, current_path=context_file) resolution = resolve_reference("target.md#decision", context=context) assert resolution.target_path == str(target_file.resolve()) assert resolution.units[0].kind == "section" assert resolution.units[0].unit_id == "decision" assert "Chosen text" in resolution.units[0].text def test_cli_output_envelopes_characterization(tmp_path: Path): source = tmp_path / "doc.md" source.write_text(CHARACTERIZATION_DOC, encoding="utf-8") runner = CliRunner() query = runner.invoke( main, ["query", str(source), "sections[heading=Decision]", "--format", "json"], ) index = runner.invoke(main, ["cache", "index", str(tmp_path), "--root", str(tmp_path)]) cache_query = runner.invoke( main, [ "cache", "query", "frontmatter.status", "--root", str(tmp_path), "--format", "json", ], ) assert query.exit_code == 0 assert '"engine": "selector"' in query.output assert '"count": 1' in query.output assert index.exit_code == 0 assert "parsed: 1" in index.output assert cache_query.exit_code == 0 assert '"source_path": "doc.md"' in cache_query.output