import json import time from pathlib import Path import pytest from click.testing import CliRunner from markitect_tool.cli import main @pytest.mark.parametrize( ("label", "count"), [ ("small", 1), ("typical", 12), ("large", 80), ], ) def test_corpus_usecase_pipeline_small_typical_large(tmp_path: Path, label: str, count: int): docs = tmp_path / "docs" docs.mkdir() for index in range(count): (docs / f"adr-{index:03d}.md").write_text( f"""--- title: Decision {index} labels: [public] trust_zone: public --- # Decision {index} ## Context The team needs predictable latency for document operations in the {label} corpus. ## Decision Use deterministic Markdown contracts, selectors, and cache refresh planning. ## Consequences Search and context packaging should stay fast as the corpus grows. """, encoding="utf-8", ) runner = CliRunner() parse = runner.invoke(main, ["parse", str(docs / "adr-000.md"), "--format", "json"]) assert parse.exit_code == 0, parse.output assert json.loads(parse.output)["headings"][0]["text"] == "Decision 0" query = runner.invoke( main, ["query", str(docs / "adr-000.md"), "sections[heading=Decision]", "--format", "json"], ) assert query.exit_code == 0, query.output assert json.loads(query.output)["count"] == 1 index = runner.invoke(main, ["cache", "index", str(docs), "--root", str(tmp_path), "--format", "json"]) assert index.exit_code == 0, index.output index_data = json.loads(index.output) assert len(index_data["indexed"]) == count search = runner.invoke( main, ["search", "latency", "--root", str(tmp_path), "--limit", "5", "--format", "json"], ) assert search.exit_code == 0, search.output assert json.loads(search.output)["count"] >= 1 context = runner.invoke( main, [ "context", "pack", "latency", "--search", "--root", str(tmp_path), "--max-items", "3", "--no-save", "--format", "json", ], ) assert context.exit_code == 0, context.output assert 1 <= len(json.loads(context.output)["items"]) <= 3 def test_document_pipeline_usecases_from_examples(tmp_path: Path): runner = CliRunner() contract = runner.invoke( main, [ "contract", "check", "examples/documents/adr-valid.md", "--contract", "examples/contracts/adr.contract.md", ], ) assert contract.exit_code == 0, contract.output assert "valid" in contract.output template = runner.invoke( main, [ "template", "render", "examples/templates/adr-summary.template.md", "--data", "examples/templates/adr-summary.data.yaml", ], ) assert template.exit_code == 0, template.output assert "Use Deterministic Templates" in template.output function = runner.invoke(main, ["function", "render", "examples/functions/basic-functions.md"]) assert function.exit_code == 0, function.output assert "DRAFT" in function.output assert "Generated Section" in function.output workflow = runner.invoke( main, [ "workflow", "plan", "examples/workflows/source-snippets.workflow.md", "--output-dir", str(tmp_path / "workflow-out"), ], ) assert workflow.exit_code == 0, workflow.output assert "valid" in workflow.output def test_structure_reuse_policy_and_literate_usecases(tmp_path: Path): runner = CliRunner() reference = runner.invoke( main, [ "ref", "resolve", "examples/references/context.md", "std:clauses.md#payment-terms", "--root", "examples/references", "--format", "json", ], ) assert reference.exit_code == 0, reference.output assert json.loads(reference.output)["count"] == 1 process = runner.invoke( main, ["process", "examples/references/context.md", "--root", "examples/references"], ) assert process.exit_code == 0, process.output assert "payment-example" in process.output policy = runner.invoke( main, [ "policy", "check", "public-agent", "read", "examples/policy/private/internal-note.md", "--policy", "examples/policy/local-label-policy.yaml", "--path", "private/internal-note.md", ], ) assert policy.exit_code == 1 assert "denied" in policy.output tangle = runner.invoke( main, ["tangle", "examples/literate/app.md", "--output-dir", str(tmp_path / "tangle")], ) assert tangle.exit_code == 0, tangle.output assert (tmp_path / "tangle" / "src" / "app.py").exists() exploded = tmp_path / "exploded" explode = runner.invoke( main, [ "explode", "examples/documents/adr-valid.md", "--output-dir", str(exploded), "--format", "json", ], ) assert explode.exit_code == 0, explode.output implode = runner.invoke(main, ["implode", str(exploded), "--format", "markdown"]) assert implode.exit_code == 0, implode.output assert "# Use Markdown Contracts" in implode.output @pytest.mark.parametrize( ("label", "document", "contract", "context"), [ ("small", "examples/documents/concept-note-valid.md", "examples/contracts/concept-note.contract.md", None), ( "typical", "examples/documents/business-letter-valid.md", "examples/contracts/business-letter.contract.md", "examples/runtime/business-letter.context.yaml", ), ("large", "examples/documents/prd-frs-valid.md", "examples/contracts/prd-frs.contract.md", None), ], ) def test_contract_and_runtime_usecases_small_typical_large( label: str, document: str, contract: str, context: str | None, ): runner = CliRunner() command = ["contract", "check", document, "--contract", contract, "--format", "json"] if context: command.extend(["--context", context]) check = runner.invoke(main, command) assert check.exit_code == 0, check.output check_data = json.loads(check.output) assert check_data["valid"] is True assert check_data["document_path"].endswith(Path(document).name) if context: form_state = runner.invoke( main, [ "contract", "form-state", document, "--contract", contract, "--context", context, "--format", "json", ], ) assert form_state.exit_code == 0, form_state.output form_data = json.loads(form_state.output) assert form_data["valid"] is True assert form_data["fields"], label @pytest.mark.parametrize( ("label", "count"), [ ("small", 1), ("typical", 5), ("large", 30), ], ) def test_transform_compose_and_include_usecases_small_typical_large( tmp_path: Path, label: str, count: int, ): docs = tmp_path / "sources" docs.mkdir() for index in range(count): (docs / f"source-{index:02d}.md").write_text( f"""--- review: state: draft --- # Source {index} ## Decision The {label} transform pipeline keeps source {index} reusable. """, encoding="utf-8", ) runner = CliRunner() transform = runner.invoke( main, [ "transform", str(docs / "source-00.md"), "--set", "review.state=ready", "--heading-delta", "1", ], ) assert transform.exit_code == 0, transform.output assert "state: ready" in transform.output assert "## Source 0" in transform.output compose = runner.invoke( main, [ "compose", *[str(path) for path in sorted(docs.glob("*.md"))], "--title", f"{label.title()} Corpus", "--heading-delta", "1", "--format", "markdown", ], ) assert compose.exit_code == 0, compose.output assert f"# {label.title()} Corpus" in compose.output assert compose.output.count("### Decision") == count include_file = tmp_path / "include.md" include_file.write_text( '', encoding="utf-8", ) include = runner.invoke(main, ["include", str(include_file), "--base-dir", str(tmp_path)]) assert include.exit_code == 0, include.output assert "### Decision" in include.output assert f"The {label} transform pipeline" in include.output @pytest.mark.parametrize( "workflow_file", [ "examples/workflows/source-snippets.workflow.md", "examples/workflows/adr-release-notes.workflow.md", ], ) def test_workflow_and_refresh_planning_usecases(tmp_path: Path, workflow_file: str): runner = CliRunner() inspect = runner.invoke(main, ["workflow", "inspect", workflow_file, "--format", "json"]) assert inspect.exit_code == 0, inspect.output assert json.loads(inspect.output)["valid"] is True plan = runner.invoke( main, ["workflow", "plan", workflow_file, "--output-dir", str(tmp_path / "out"), "--format", "json"], ) assert plan.exit_code == 0, plan.output plan_data = json.loads(plan.output) assert plan_data["valid"] is True assert plan_data["dry_run"] is True refresh = runner.invoke( main, ["backend", "refresh-plan", "examples/documents", "--root", ".", "--format", "json"], ) assert refresh.exit_code == 1, refresh.output refresh_data = json.loads(refresh.output) assert refresh_data["dirty"] is True assert refresh_data["counts"]["needs_parse"] >= 1 def test_large_corpus_performance_smoke_stays_bounded(tmp_path: Path): docs = tmp_path / "docs" docs.mkdir() for index in range(120): (docs / f"note-{index:03d}.md").write_text( f"""# Note {index} ## Summary This synthetic performance smoke document mentions adoption latency and search. ## Details The index should stay responsive for local corpora without requiring services. """, encoding="utf-8", ) runner = CliRunner() started = time.perf_counter() index = runner.invoke(main, ["cache", "index", str(docs), "--root", str(tmp_path), "--format", "json"]) index_seconds = time.perf_counter() - started assert index.exit_code == 0, index.output assert len(json.loads(index.output)["indexed"]) == 120 assert index_seconds < 30 started = time.perf_counter() search = runner.invoke( main, ["search", "adoption", "--root", str(tmp_path), "--limit", "10", "--format", "json"], ) search_seconds = time.perf_counter() - started assert search.exit_code == 0, search.output assert json.loads(search.output)["count"] >= 1 assert search_seconds < 5