markitect-tool/tests/test_extension_characterization.py

from pathlib import Path
import builtins

from click.testing import CliRunner
import pytest

from markitect_tool.backend import (
    LocalSnapshotStore,
    capability_check,
    load_backend_manifest,
    load_backend_registry,
    local_index_path_for,
)
from markitect_tool.cli import main
from markitect_tool.core import parse_markdown
from markitect_tool.processor import ProcessorContext, run_fenced_processors
from markitect_tool.query import (
    default_query_engine_registry,
    InvalidQueryError,
    extract_document,
    query_document,
    query_document_jsonpath,
)
from markitect_tool.reference import ReferenceContext, resolve_reference


CHARACTERIZATION_DOC = """---
document_type: adr
status: accepted
---

# Decision Record

## Context

Authors need stable infrastructure seams.

## Decision

Use explicit registries and processing envelopes.
"""


def test_query_selector_and_extraction_characterization():
    document = parse_markdown(CHARACTERIZATION_DOC)
    registry = default_query_engine_registry()

    section_matches = query_document(document, "sections[heading=Decision]")
    extracted = extract_document(document, "frontmatter.status")

    assert registry.get("selector").descriptor.kind == "query-engine"
    assert len(section_matches) == 1
    assert section_matches[0].kind == "section"
    assert section_matches[0].path == "$.sections[2]"
    assert section_matches[0].text.startswith("## Decision")
    assert extracted == ["accepted"]


def test_jsonpath_missing_dependency_diagnostic_characterization(monkeypatch):
    document = parse_markdown(CHARACTERIZATION_DOC)
    real_import = builtins.__import__

    def fake_import(name, *args, **kwargs):
        if name.startswith("jsonpath_ng"):
            raise ImportError("blocked")
        return real_import(name, *args, **kwargs)

    monkeypatch.setattr(builtins, "__import__", fake_import)

    with pytest.raises(InvalidQueryError, match="optional `jsonpath-ng`"):
        query_document_jsonpath(document, "$.headings[*].text")


def test_processor_registry_result_provenance_characterization():
    markdown = """```mkt-uppercase {#shout}
hello
```
"""

    run = run_fenced_processors(markdown, context=ProcessorContext())

    assert run.valid
    assert run.blocks[0].processor == "uppercase"
    assert run.blocks[0].unit_id == "shout"
    assert run.results[0].content == "HELLO\n"
    assert run.results[0].provenance[0].operation == "processor.uppercase"


def test_unknown_processor_diagnostic_characterization():
    markdown = """```mkt-missing {#x}
content
```
"""

    run = run_fenced_processors(markdown, context=ProcessorContext())

    assert not run.valid
    diagnostic = run.results[0].diagnostics[0].to_dict()
    assert diagnostic["severity"] == "error"
    assert diagnostic["code"] == "processor.unknown"
    assert "Unknown processor" in diagnostic["message"]


def test_backend_manifest_registry_characterization():
    manifest = load_backend_manifest("examples/backends/local-sqlite-backend.md")
    registry = load_backend_registry(["examples/backends"])
    check = capability_check(manifest, ["snapshots", "fts", "provenance"])

    assert manifest.id == "local-sqlite-cache"
    assert registry.get("local-sqlite-cache").storage["engine"] == "sqlite"
    assert check.compatible


def test_local_index_snapshot_query_search_characterization(tmp_path: Path):
    source = tmp_path / "doc.md"
    source.write_text(CHARACTERIZATION_DOC, encoding="utf-8")
    store = LocalSnapshotStore(local_index_path_for(tmp_path))

    build = store.build([tmp_path], root=tmp_path)
    state = store.load_state()[0]
    document = store.get_document("doc.md")
    search_results = store.search("registries")

    assert build.parsed == ["doc.md"]
    assert state.path == "doc.md"
    assert state.snapshot_id.startswith("snapshot:")
    assert document["headings"][0]["text"] == "Decision Record"
    assert search_results[0].path == "doc.md"
    assert search_results[0].unit_kind in {"section", "block"}


def test_reference_resolution_characterization(tmp_path: Path):
    context_file = tmp_path / "context.md"
    target_file = tmp_path / "target.md"
    context_file.write_text("# Context\n", encoding="utf-8")
    target_file.write_text("# Target\n\n## Decision\n\nChosen text.\n", encoding="utf-8")
    context = ReferenceContext(root=tmp_path, current_path=context_file)

    resolution = resolve_reference("target.md#decision", context=context)

    assert resolution.target_path == str(target_file.resolve())
    assert resolution.units[0].kind == "section"
    assert resolution.units[0].unit_id == "decision"
    assert "Chosen text" in resolution.units[0].text


def test_cli_output_envelopes_characterization(tmp_path: Path):
    source = tmp_path / "doc.md"
    source.write_text(CHARACTERIZATION_DOC, encoding="utf-8")
    runner = CliRunner()

    query = runner.invoke(
        main,
        ["query", str(source), "sections[heading=Decision]", "--format", "json"],
    )
    index = runner.invoke(main, ["cache", "index", str(tmp_path), "--root", str(tmp_path)])
    cache_query = runner.invoke(
        main,
        [
            "cache",
            "query",
            "frontmatter.status",
            "--root",
            str(tmp_path),
            "--format",
            "json",
        ],
    )

    assert query.exit_code == 0
    assert '"engine": "selector"' in query.output
    assert '"count": 1' in query.output
    assert index.exit_code == 0
    assert "parsed: 1" in index.output
    assert cache_query.exit_code == 0
    assert '"source_path": "doc.md"' in cache_query.output