Files
markitect-tool/tests/test_practical_usecases_e2e.py
2026-05-04 22:58:59 +02:00

406 lines
11 KiB
Python

import json
import time
from pathlib import Path
import pytest
from click.testing import CliRunner
from markitect_tool.cli import main
@pytest.mark.parametrize(
("label", "count"),
[
("small", 1),
("typical", 12),
("large", 80),
],
)
def test_corpus_usecase_pipeline_small_typical_large(tmp_path: Path, label: str, count: int):
docs = tmp_path / "docs"
docs.mkdir()
for index in range(count):
(docs / f"adr-{index:03d}.md").write_text(
f"""---
title: Decision {index}
labels: [public]
trust_zone: public
---
# Decision {index}
## Context
The team needs predictable latency for document operations in the {label} corpus.
## Decision
Use deterministic Markdown contracts, selectors, and cache refresh planning.
## Consequences
Search and context packaging should stay fast as the corpus grows.
""",
encoding="utf-8",
)
runner = CliRunner()
parse = runner.invoke(main, ["parse", str(docs / "adr-000.md"), "--format", "json"])
assert parse.exit_code == 0, parse.output
assert json.loads(parse.output)["headings"][0]["text"] == "Decision 0"
query = runner.invoke(
main,
["query", str(docs / "adr-000.md"), "sections[heading=Decision]", "--format", "json"],
)
assert query.exit_code == 0, query.output
assert json.loads(query.output)["count"] == 1
index = runner.invoke(main, ["cache", "index", str(docs), "--root", str(tmp_path), "--format", "json"])
assert index.exit_code == 0, index.output
index_data = json.loads(index.output)
assert len(index_data["indexed"]) == count
search = runner.invoke(
main,
["search", "latency", "--root", str(tmp_path), "--limit", "5", "--format", "json"],
)
assert search.exit_code == 0, search.output
assert json.loads(search.output)["count"] >= 1
context = runner.invoke(
main,
[
"context",
"pack",
"latency",
"--search",
"--root",
str(tmp_path),
"--max-items",
"3",
"--no-save",
"--format",
"json",
],
)
assert context.exit_code == 0, context.output
assert 1 <= len(json.loads(context.output)["items"]) <= 3
def test_document_pipeline_usecases_from_examples(tmp_path: Path):
runner = CliRunner()
contract = runner.invoke(
main,
[
"contract",
"check",
"examples/documents/adr-valid.md",
"--contract",
"examples/contracts/adr.contract.md",
],
)
assert contract.exit_code == 0, contract.output
assert "valid" in contract.output
template = runner.invoke(
main,
[
"template",
"render",
"examples/templates/adr-summary.template.md",
"--data",
"examples/templates/adr-summary.data.yaml",
],
)
assert template.exit_code == 0, template.output
assert "Use Deterministic Templates" in template.output
function = runner.invoke(main, ["function", "render", "examples/functions/basic-functions.md"])
assert function.exit_code == 0, function.output
assert "DRAFT" in function.output
assert "Generated Section" in function.output
workflow = runner.invoke(
main,
[
"workflow",
"plan",
"examples/workflows/source-snippets.workflow.md",
"--output-dir",
str(tmp_path / "workflow-out"),
],
)
assert workflow.exit_code == 0, workflow.output
assert "valid" in workflow.output
def test_structure_reuse_policy_and_literate_usecases(tmp_path: Path):
runner = CliRunner()
reference = runner.invoke(
main,
[
"ref",
"resolve",
"examples/references/context.md",
"std:clauses.md#payment-terms",
"--root",
"examples/references",
"--format",
"json",
],
)
assert reference.exit_code == 0, reference.output
assert json.loads(reference.output)["count"] == 1
process = runner.invoke(
main,
["process", "examples/references/context.md", "--root", "examples/references"],
)
assert process.exit_code == 0, process.output
assert "payment-example" in process.output
policy = runner.invoke(
main,
[
"policy",
"check",
"public-agent",
"read",
"examples/policy/private/internal-note.md",
"--policy",
"examples/policy/local-label-policy.yaml",
"--path",
"private/internal-note.md",
],
)
assert policy.exit_code == 1
assert "denied" in policy.output
tangle = runner.invoke(
main,
["tangle", "examples/literate/app.md", "--output-dir", str(tmp_path / "tangle")],
)
assert tangle.exit_code == 0, tangle.output
assert (tmp_path / "tangle" / "src" / "app.py").exists()
exploded = tmp_path / "exploded"
explode = runner.invoke(
main,
[
"explode",
"examples/documents/adr-valid.md",
"--output-dir",
str(exploded),
"--format",
"json",
],
)
assert explode.exit_code == 0, explode.output
implode = runner.invoke(main, ["implode", str(exploded), "--format", "markdown"])
assert implode.exit_code == 0, implode.output
assert "# Use Markdown Contracts" in implode.output
@pytest.mark.parametrize(
("label", "document", "contract", "context"),
[
("small", "examples/documents/concept-note-valid.md", "examples/contracts/concept-note.contract.md", None),
(
"typical",
"examples/documents/business-letter-valid.md",
"examples/contracts/business-letter.contract.md",
"examples/runtime/business-letter.context.yaml",
),
("large", "examples/documents/prd-frs-valid.md", "examples/contracts/prd-frs.contract.md", None),
],
)
def test_contract_and_runtime_usecases_small_typical_large(
label: str,
document: str,
contract: str,
context: str | None,
):
runner = CliRunner()
command = ["contract", "check", document, "--contract", contract, "--format", "json"]
if context:
command.extend(["--context", context])
check = runner.invoke(main, command)
assert check.exit_code == 0, check.output
check_data = json.loads(check.output)
assert check_data["valid"] is True
assert check_data["document_path"].endswith(Path(document).name)
if context:
form_state = runner.invoke(
main,
[
"contract",
"form-state",
document,
"--contract",
contract,
"--context",
context,
"--format",
"json",
],
)
assert form_state.exit_code == 0, form_state.output
form_data = json.loads(form_state.output)
assert form_data["valid"] is True
assert form_data["fields"], label
@pytest.mark.parametrize(
("label", "count"),
[
("small", 1),
("typical", 5),
("large", 30),
],
)
def test_transform_compose_and_include_usecases_small_typical_large(
tmp_path: Path,
label: str,
count: int,
):
docs = tmp_path / "sources"
docs.mkdir()
for index in range(count):
(docs / f"source-{index:02d}.md").write_text(
f"""---
review:
state: draft
---
# Source {index}
## Decision
The {label} transform pipeline keeps source {index} reusable.
""",
encoding="utf-8",
)
runner = CliRunner()
transform = runner.invoke(
main,
[
"transform",
str(docs / "source-00.md"),
"--set",
"review.state=ready",
"--heading-delta",
"1",
],
)
assert transform.exit_code == 0, transform.output
assert "state: ready" in transform.output
assert "## Source 0" in transform.output
compose = runner.invoke(
main,
[
"compose",
*[str(path) for path in sorted(docs.glob("*.md"))],
"--title",
f"{label.title()} Corpus",
"--heading-delta",
"1",
"--format",
"markdown",
],
)
assert compose.exit_code == 0, compose.output
assert f"# {label.title()} Corpus" in compose.output
assert compose.output.count("### Decision") == count
include_file = tmp_path / "include.md"
include_file.write_text(
'<!-- mkt:include path="sources/source-00.md" selector="sections[heading=Decision]" heading_delta="1" -->',
encoding="utf-8",
)
include = runner.invoke(main, ["include", str(include_file), "--base-dir", str(tmp_path)])
assert include.exit_code == 0, include.output
assert "### Decision" in include.output
assert f"The {label} transform pipeline" in include.output
@pytest.mark.parametrize(
"workflow_file",
[
"examples/workflows/source-snippets.workflow.md",
"examples/workflows/adr-release-notes.workflow.md",
],
)
def test_workflow_and_refresh_planning_usecases(tmp_path: Path, workflow_file: str):
runner = CliRunner()
inspect = runner.invoke(main, ["workflow", "inspect", workflow_file, "--format", "json"])
assert inspect.exit_code == 0, inspect.output
assert json.loads(inspect.output)["valid"] is True
plan = runner.invoke(
main,
["workflow", "plan", workflow_file, "--output-dir", str(tmp_path / "out"), "--format", "json"],
)
assert plan.exit_code == 0, plan.output
plan_data = json.loads(plan.output)
assert plan_data["valid"] is True
assert plan_data["dry_run"] is True
refresh = runner.invoke(
main,
["backend", "refresh-plan", "examples/documents", "--root", ".", "--format", "json"],
)
assert refresh.exit_code == 1, refresh.output
refresh_data = json.loads(refresh.output)
assert refresh_data["dirty"] is True
assert refresh_data["counts"]["needs_parse"] >= 1
def test_large_corpus_performance_smoke_stays_bounded(tmp_path: Path):
docs = tmp_path / "docs"
docs.mkdir()
for index in range(120):
(docs / f"note-{index:03d}.md").write_text(
f"""# Note {index}
## Summary
This synthetic performance smoke document mentions adoption latency and search.
## Details
The index should stay responsive for local corpora without requiring services.
""",
encoding="utf-8",
)
runner = CliRunner()
started = time.perf_counter()
index = runner.invoke(main, ["cache", "index", str(docs), "--root", str(tmp_path), "--format", "json"])
index_seconds = time.perf_counter() - started
assert index.exit_code == 0, index.output
assert len(json.loads(index.output)["indexed"]) == 120
assert index_seconds < 30
started = time.perf_counter()
search = runner.invoke(
main,
["search", "adoption", "--root", str(tmp_path), "--limit", "10", "--format", "json"],
)
search_seconds = time.perf_counter() - started
assert search.exit_code == 0, search.output
assert json.loads(search.output)["count"] >= 1
assert search_seconds < 5