From fcd50bdfe8f76af61e1ec428d98185f5c4db2905 Mon Sep 17 00:00:00 2001 From: tegwick Date: Tue, 5 May 2026 20:26:56 +0200 Subject: [PATCH] Contract robustness and bottleneck test --- README.md | 2 + docs/markitect-tool-capacity-risks.md | 82 ++++++ docs/markitect-tool-integration-usecases.md | 31 ++- examples/markitect-tool-contract/README.md | 21 ++ .../composition/context-bundle.md | 10 + .../contracts/decision-record.contract.md | 51 ++++ .../corpus/adr-0001-context-packages.md | 33 +++ .../corpus/adr-invalid-missing-decision.md | 18 ++ .../corpus/engineering-policy.md | 24 ++ .../corpus/internal-risk-note.md | 22 ++ .../manifests/agent-context.yaml | 21 ++ pyproject.toml | 1 + tests/test_markitect_tool_capacity.py | 249 ++++++++++++++++++ tests/test_markitect_tool_contract.py | 168 ++++++------ 14 files changed, 654 insertions(+), 79 deletions(-) create mode 100644 docs/markitect-tool-capacity-risks.md create mode 100644 examples/markitect-tool-contract/README.md create mode 100644 examples/markitect-tool-contract/composition/context-bundle.md create mode 100644 examples/markitect-tool-contract/contracts/decision-record.contract.md create mode 100644 examples/markitect-tool-contract/corpus/adr-0001-context-packages.md create mode 100644 examples/markitect-tool-contract/corpus/adr-invalid-missing-decision.md create mode 100644 examples/markitect-tool-contract/corpus/engineering-policy.md create mode 100644 examples/markitect-tool-contract/corpus/internal-risk-note.md create mode 100644 examples/markitect-tool-contract/manifests/agent-context.yaml create mode 100644 tests/test_markitect_tool_capacity.py diff --git a/README.md b/README.md index 1b52392..4775a06 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,8 @@ Start here: - `docs/markitect-main-scope-assessment.md` - `docs/markitect-tool-reuse-boundary.md` - `docs/markitect-tool-integration-usecases.md` +- `docs/markitect-tool-capacity-risks.md` +- `examples/markitect-tool-contract/` - `docs/phase-memory-boundary.md` - `docs/system-layer-extraction-inventory.md` - `docs/system-layer-migration-backlog.md` diff --git a/docs/markitect-tool-capacity-risks.md b/docs/markitect-tool-capacity-risks.md new file mode 100644 index 0000000..53bea85 --- /dev/null +++ b/docs/markitect-tool-capacity-risks.md @@ -0,0 +1,82 @@ +# markitect-tool Capacity Risk Sentinels + +Date: 2026-05-05 + +Status: opt-in bottleneck tests for the `kontextual-engine` to +`markitect-tool` integration boundary. + +## Purpose + +The example-backed contract tests prove that the Markitect interface behaves +correctly for representative documents. Capacity sentinels add one more layer: +they exercise larger generated examples so we can notice algorithmic trouble +before engine workplans depend on the interface. + +These tests are not microbenchmarks. They are deliberately coarse, generous, +and opt-in. A failure should trigger investigation, profiling, or an upstream +`markitect-tool` improvement before the engine builds more assumptions on top. + +## Suspected Bottleneck Areas + +| Area | Risk | Sentinel | +| --- | --- | --- | +| Large Markdown parsing | Section-heavy documents may create many headings, blocks, tokens, and sections. | Parse a generated document with hundreds of sections and verify document shape under a generous wall-clock budget. | +| Selector extraction | Repeated selectors over large documents can become `queries x document-size`. | Run multiple heading, section, frontmatter, and block selectors over one parsed large document. | +| Include resolution and composition | Fan-out includes with selectors may repeatedly parse included files and expand output size. | Resolve a generated include fan-out bundle and compose many Markdown files. | +| Context package creation | Packing many source files can parse and query each file, then filter by policy. | Create and activate a context package from many generated public/internal Markdown sources. | +| Snapshot identity | Hashing many or larger files should remain predictable and content-addressed. | Generate many Markdown files and compute stable snapshot identities. | + +## Running The Sentinels + +Normal test runs skip these tests. Run them against the sibling +`markitect-tool` checkout with: + +```bash +KONTEXTUAL_RUN_CAPACITY=1 \ +PYTHONPATH=/home/worsch/kontextual-engine/src:/home/worsch/markitect-tool/src \ + python3 -m pytest tests/test_markitect_tool_capacity.py -q +``` + +Run all Markitect interface checks with: + +```bash +KONTEXTUAL_RUN_CAPACITY=1 \ +PYTHONPATH=/home/worsch/kontextual-engine/src:/home/worsch/markitect-tool/src \ + python3 -m pytest -m "markitect_tool" -q +``` + +## Interpretation + +- Passing sentinels mean the current integration boundary is healthy enough for + the planned engine work. +- Failing sentinels should be treated as interface risk, not as proof of engine + failure. +- If a sentinel is too noisy, prefer improving its generated scenario or + threshold over deleting it. +- If a real use case exceeds the current generated sizes, add a new sentinel + before relying on the behavior in an engine workplan. + +## Current Generated Sizes + +The tests currently generate: + +- one section-heavy document with hundreds of decision sections, +- dozens of repeated selector queries over a large parsed document, +- a fan-out include bundle over many partial files, +- a context package over many public/internal source files, +- many snapshot identities over generated Markdown files. + +The generated data lives in temporary pytest directories so the repository +does not carry bulky synthetic corpora. + +## Initial Local Baseline + +On 2026-05-05, running against `/home/worsch/markitect-tool/src` on the local +WSL workspace, all sentinels passed. The slowest observed sentinel was repeated +selector queries over a large parsed document, followed by large parse/query +and context-package creation. This suggests selectors are the first area to +watch as engine retrieval workloads grow. + +The baseline is observational, not a committed performance guarantee. The +budgets in `tests/test_markitect_tool_capacity.py` are intentionally wider than +the observed timings to avoid false failures from normal workstation variance. diff --git a/docs/markitect-tool-integration-usecases.md b/docs/markitect-tool-integration-usecases.md index a53c2e7..bca54ed 100644 --- a/docs/markitect-tool-integration-usecases.md +++ b/docs/markitect-tool-integration-usecases.md @@ -14,7 +14,11 @@ Instead, it should wrap them as adapters and persist engine-owned assets, lineage, policy decisions, audit events, and service contracts around them. The executable companion for this document is -`tests/test_markitect_tool_contract.py`. +`tests/test_markitect_tool_contract.py`. The reusable fixture corpus lives in +`examples/markitect-tool-contract/`. +Opt-in bottleneck sentinels are described in +`docs/markitect-tool-capacity-risks.md` and implemented in +`tests/test_markitect_tool_capacity.py`. ## Expected Dependency Shape @@ -26,6 +30,22 @@ The executable companion for this document is - Persistence posture: store serializable Markitect results and provenance as adapter metadata, not as canonical domain objects. +Run the examples against the sibling source checkout during integration +development with: + +```bash +PYTHONPATH=/home/worsch/kontextual-engine/src:/home/worsch/markitect-tool/src \ + python3 -m pytest tests/test_markitect_tool_contract.py -q +``` + +Run the larger capacity sentinels with: + +```bash +KONTEXTUAL_RUN_CAPACITY=1 \ +PYTHONPATH=/home/worsch/kontextual-engine/src:/home/worsch/markitect-tool/src \ + python3 -m pytest tests/test_markitect_tool_capacity.py -q +``` + ## Use Case 1: Markdown Normalization Intent: convert Markdown source content into structured frontmatter, headings, @@ -207,7 +227,10 @@ Engine expectation: | Transform and include provenance | Markdown ops retain Markitect provenance. | | Snapshot identity | Engine stores Markitect snapshot metadata without owning the algorithm. | | Context package policy filtering | Agent context can reuse Markitect packages and local label policy. | +| Document contracts | Markdown validation can call Markitect contracts without moving contract semantics into the engine. | +| Capacity sentinels | Larger generated examples expose likely parser, selector, include, context-package, and snapshot bottlenecks. | -These tests are intentionally small. They are not a replacement for -`markitect-tool`'s own test suite; they assert only the behaviors this engine -depends on. +These tests are intentionally small but example-backed. They are not a +replacement for `markitect-tool`'s own test suite; they assert only the +behaviors this engine depends on and provide concrete data for diagnosing +interface drift. diff --git a/examples/markitect-tool-contract/README.md b/examples/markitect-tool-contract/README.md new file mode 100644 index 0000000..4703550 --- /dev/null +++ b/examples/markitect-tool-contract/README.md @@ -0,0 +1,21 @@ +# markitect-tool Contract Examples + +This directory is a small interface lab for the `kontextual-engine` dependency +on `markitect-tool`. + +The files are intentionally ordinary Markdown/YAML fixtures rather than inline +test strings. They should help us validate Markitect behavior before engine +workplans depend on it, and they should be updated whenever the expected +integration contract changes. + +Covered examples: + +- Markdown parsing with frontmatter, headings, sections, lists, and source + paths. +- Selector extraction for sections, frontmatter paths, and blocks. +- Include resolution, heading shifts, composition, and operation provenance. +- Snapshot identity for Markdown files. +- Context-package creation from sources and manifests. +- Local label policy filtering for public versus internal context. +- Basic document contract validation for decision records. + diff --git a/examples/markitect-tool-contract/composition/context-bundle.md b/examples/markitect-tool-contract/composition/context-bundle.md new file mode 100644 index 0000000..47cc7c3 --- /dev/null +++ b/examples/markitect-tool-contract/composition/context-bundle.md @@ -0,0 +1,10 @@ +# Kontextual Engine Context Bundle + +{{include:../corpus/adr-0001-context-packages.md}} + + + +```markdown +{{include:../corpus/internal-risk-note.md}} +``` + diff --git a/examples/markitect-tool-contract/contracts/decision-record.contract.md b/examples/markitect-tool-contract/contracts/decision-record.contract.md new file mode 100644 index 0000000..14e9bf5 --- /dev/null +++ b/examples/markitect-tool-contract/contracts/decision-record.contract.md @@ -0,0 +1,51 @@ +# Decision Record Contract + +```yaml contract +id: kontextual-decision-record-v1 +document: + type: adr + title: Architecture Decision Record +fields: + status: + type: string + required: true + enum: [proposed, accepted, superseded] + owner: + type: string + required: true +metrics: + document: + words: + min: 35 + max: 500 + severity: warning +sections: + - id: context + title: Context + presence: required + level: 2 + order: + before: decision + assertions: + - id: context-names-problem + contains_any: [problem, motivation, need] + severity: warning + guidance: Explain why the decision exists. + - id: decision + title: Decision + presence: required + level: 2 + assertions: + - id: decision-commits + matches: "\\b(use|adopt|choose|will)\\b" + severity: error + guidance: State the actual decision. + - id: consequences + title: Consequences + presence: recommended + level: 2 + - id: deprecated + title: Deprecated Approach + presence: forbidden +``` + diff --git a/examples/markitect-tool-contract/corpus/adr-0001-context-packages.md b/examples/markitect-tool-contract/corpus/adr-0001-context-packages.md new file mode 100644 index 0000000..af6cc8c --- /dev/null +++ b/examples/markitect-tool-contract/corpus/adr-0001-context-packages.md @@ -0,0 +1,33 @@ +--- +document_type: adr +status: accepted +owner: Platform Knowledge +tags: + - context + - markdown + - governance +policy: + labels: [public, engineering] +source: + system: repo + path: examples/markitect-tool-contract/corpus/adr-0001-context-packages.md +--- + +# Use Markitect Context Packages + +## Context + +The problem is that the engine needs Markdown-native structure and context +packages without owning a second Markdown parser or selector language. + +## Decision + +We will use markitect-tool as the Markdown syntax, selector, deterministic +operation, snapshot, and context-package layer for Markdown-backed assets. + +## Consequences + +- Engine assets stay cross-format and durable. +- Markdown selectors stay Markitect-owned. +- Adapter provenance can be stored with engine transformation runs. + diff --git a/examples/markitect-tool-contract/corpus/adr-invalid-missing-decision.md b/examples/markitect-tool-contract/corpus/adr-invalid-missing-decision.md new file mode 100644 index 0000000..801f3d9 --- /dev/null +++ b/examples/markitect-tool-contract/corpus/adr-invalid-missing-decision.md @@ -0,0 +1,18 @@ +--- +document_type: adr +status: accepted +owner: Platform Knowledge +policy: + labels: [public, engineering] +--- + +# Weak Decision Record + +## Context + +The note mentions a need but does not contain the required decision section. + +## Deprecated Approach + +This forbidden section should be reported by the Markitect contract checker. + diff --git a/examples/markitect-tool-contract/corpus/engineering-policy.md b/examples/markitect-tool-contract/corpus/engineering-policy.md new file mode 100644 index 0000000..8ed42ed --- /dev/null +++ b/examples/markitect-tool-contract/corpus/engineering-policy.md @@ -0,0 +1,24 @@ +--- +document_type: policy +status: active +owner: Platform Knowledge +policy: + labels: [public, governance] +source: + system: repo + path: examples/markitect-tool-contract/corpus/engineering-policy.md +--- + +# Engineering Knowledge Policy + +## Controls + +Published context packages must preserve source paths, source spans, policy +labels, and enough provenance for the engine to audit how the package was +assembled. + +## Review + +Sensitive or high-impact generated artifacts must pass through an engine-owned +review gate before publication or export. + diff --git a/examples/markitect-tool-contract/corpus/internal-risk-note.md b/examples/markitect-tool-contract/corpus/internal-risk-note.md new file mode 100644 index 0000000..76c60ba --- /dev/null +++ b/examples/markitect-tool-contract/corpus/internal-risk-note.md @@ -0,0 +1,22 @@ +--- +document_type: risk-note +status: draft +owner: Platform Knowledge +policy: + labels: [internal] +source: + system: repo + path: examples/markitect-tool-contract/corpus/internal-risk-note.md +--- + +# Internal Retrieval Risk + +## Risk + +This internal note should not appear in a public context activation. + +## Mitigation + +Permission filtering must happen before snippets, context packages, or derived +outputs are returned to a caller. + diff --git a/examples/markitect-tool-contract/manifests/agent-context.yaml b/examples/markitect-tool-contract/manifests/agent-context.yaml new file mode 100644 index 0000000..ed1818f --- /dev/null +++ b/examples/markitect-tool-contract/manifests/agent-context.yaml @@ -0,0 +1,21 @@ +title: Kontextual Engine Markdown Adapter Context +intent: Provide public Markdown-backed context for adapter boundary testing. +namespace: + project: kontextual-engine + task: markitect-tool-contract +budget: + max_items: 4 +retrieval_recipes: + - kind: selector + engine: selector + query: sections[heading=Decision] + sources: + - corpus/adr-0001-context-packages.md + - kind: selector + engine: selector + query: sections[heading=Controls] + sources: + - corpus/engineering-policy.md +metadata: + fixture: markitect-tool-contract + diff --git a/pyproject.toml b/pyproject.toml index b2cbffe..2955cc0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,4 +40,5 @@ pythonpath = ["src"] markers = [ "integration: tests that exercise optional external package contracts", "markitect_tool: tests for the optional markitect-tool adapter boundary", + "capacity: opt-in capacity sentinel tests for bottleneck and scaling risks", ] diff --git a/tests/test_markitect_tool_capacity.py b/tests/test_markitect_tool_capacity.py new file mode 100644 index 0000000..36a2f5e --- /dev/null +++ b/tests/test_markitect_tool_capacity.py @@ -0,0 +1,249 @@ +import importlib.util +import os +import time +from pathlib import Path + +import pytest + + +pytestmark = [pytest.mark.integration, pytest.mark.markitect_tool, pytest.mark.capacity] +if importlib.util.find_spec("markitect_tool") is None: + pytestmark.append( + pytest.mark.skip( + reason="Install kontextual-engine[markdown] to run markitect-tool capacity tests." + ) + ) + mkt = None +elif os.environ.get("KONTEXTUAL_RUN_CAPACITY", "").lower() not in {"1", "true", "yes"}: + pytestmark.append( + pytest.mark.skip( + reason="Set KONTEXTUAL_RUN_CAPACITY=1 to run opt-in capacity sentinels." + ) + ) + mkt = None +else: + import markitect_tool as mkt + + +def test_large_markdown_parse_query_and_extract_capacity() -> None: + markdown = _large_decision_markdown(section_count=650) + + elapsed, document = _timed(lambda: mkt.parse_markdown(markdown, source_path="large.md")) + _assert_within("parse 650-section markdown", elapsed, seconds=6.0) + + elapsed, matches = _timed(lambda: mkt.query_document(document, "sections[heading=Decision 640]")) + _assert_within("query exact section in 650-section markdown", elapsed, seconds=2.0) + + elapsed, extracted = _timed(lambda: mkt.extract_document(document, "sections[heading=Decision 640]")) + _assert_within("extract exact section in 650-section markdown", elapsed, seconds=2.0) + + assert len(document.sections) == 651 + assert len(document.headings) == 651 + assert len(matches) == 1 + assert "CAPACITY-MARKER-640" in extracted[0] + + +def test_repeated_selectors_over_large_document_capacity() -> None: + document = mkt.parse_markdown(_large_decision_markdown(section_count=420)) + selectors = [ + "frontmatter.status", + "headings[level=2]", + "blocks[type=bullet_list]", + "sections[contains~=CAPACITY-MARKER-120]", + "sections[heading=Decision 240]", + "metrics.document.sections", + ] + + def run_queries() -> list[int]: + counts = [] + for _ in range(12): + for selector in selectors: + counts.append(len(mkt.query_document(document, selector))) + return counts + + elapsed, counts = _timed(run_queries) + + _assert_within("72 selector queries over 420-section markdown", elapsed, seconds=5.0) + assert min(counts) >= 1 + assert max(counts) >= 420 + + +def test_include_fanout_compose_and_transform_capacity(tmp_path: Path) -> None: + partials = [] + for index in range(90): + partial = tmp_path / f"partial-{index:03}.md" + partial.write_text(_partial_markdown(index), encoding="utf-8") + partials.append(partial) + bundle = tmp_path / "bundle.md" + bundle.write_text( + "\n".join( + f'' + for partial in partials + ), + encoding="utf-8", + ) + + elapsed, included = _timed( + lambda: mkt.resolve_includes( + bundle.read_text(encoding="utf-8"), + base_dir=tmp_path, + current_path=bundle, + ) + ) + _assert_within("resolve 90 include fan-out bundle", elapsed, seconds=8.0) + + elapsed, composed = _timed(lambda: mkt.compose_files(partials, title="Capacity Bundle", heading_delta=1)) + _assert_within("compose 90 markdown partials", elapsed, seconds=5.0) + + elapsed, transformed = _timed( + lambda: mkt.transform_markdown( + included.markdown, + set_frontmatter={"status": "capacity-check"}, + heading_delta=1, + source_path=str(bundle), + ) + ) + _assert_within("transform resolved include fan-out bundle", elapsed, seconds=5.0) + + assert len(included.included_paths) == 90 + assert "### Include Target" in included.markdown + assert composed.markdown.startswith("# Capacity Bundle") + assert "status: capacity-check" in transformed.markdown + + +def test_context_package_many_sources_policy_filtering_capacity(tmp_path: Path) -> None: + sources = [] + for index in range(140): + source = tmp_path / f"source-{index:03}.md" + label = "public" if index % 2 == 0 else "internal" + source.write_text(_context_source_markdown(index, label), encoding="utf-8") + sources.append(source) + gateway = mkt.LocalLabelPolicyGateway( + { + "id": "capacity-policy", + "subjects": { + "reader": { + "allowed_labels": ["public"], + "allowed_actions": ["read", "activate"], + } + }, + "default_subject": "reader", + } + ) + + elapsed, package = _timed( + lambda: mkt.create_context_package_from_sources( + "sections[heading=Decision]", + sources, + root=tmp_path, + namespace=mkt.MemoryNamespace(project="kontextual-engine", task="capacity"), + budget=mkt.ContextBudget(max_items=160), + ) + ) + _assert_within("create context package from 140 markdown sources", elapsed, seconds=12.0) + + elapsed, activation = _timed( + lambda: mkt.activate_context_package( + package, + policy_gateway=gateway, + subject="reader", + ) + ) + _assert_within("activate and policy-filter 140-source context package", elapsed, seconds=6.0) + + assert len(package.items) == 140 + assert len(activation.items) == 70 + assert "PUBLIC-CAPACITY-000" in activation.content + assert "INTERNAL-CAPACITY-001" not in activation.content + assert activation.policy["summary"]["denied"] == 70 + + +def test_snapshot_identity_many_files_capacity(tmp_path: Path) -> None: + paths = [] + for index in range(120): + path = tmp_path / f"snapshot-{index:03}.md" + path.write_text(_context_source_markdown(index, "public"), encoding="utf-8") + paths.append(path) + + elapsed, identities = _timed(lambda: [mkt.snapshot_identity_for_file(path) for path in paths]) + _assert_within("compute 120 markdown snapshot identities", elapsed, seconds=4.0) + + assert len({identity.snapshot_id for identity in identities}) == 120 + assert all(identity.content_hash.startswith("sha256:") for identity in identities) + + +def _large_decision_markdown(section_count: int) -> str: + sections = [ + "---", + "document_type: capacity-fixture", + "status: active", + "owner: Platform Knowledge", + "---", + "", + "# Capacity Fixture", + "", + ] + for index in range(section_count): + sections.extend( + [ + f"## Decision {index}", + "", + ( + f"CAPACITY-MARKER-{index} records a synthetic decision section " + "with enough text to exercise parsing, selector matching, and extraction." + ), + "", + "- Parser shape must stay stable.", + "- Selector scans must remain bounded enough for adapter use.", + "", + ] + ) + return "\n".join(sections) + + +def _partial_markdown(index: int) -> str: + return "\n".join( + [ + f"# Partial {index}", + "", + "## Include Target", + "", + f"Included capacity text {index}.", + "", + "## Ignore", + "", + "This section should not be selected by the include resolver.", + "", + ] + ) + + +def _context_source_markdown(index: int, label: str) -> str: + marker = f"{label.upper()}-CAPACITY-{index:03}" + return "\n".join( + [ + "---", + "document_type: capacity-source", + f"status: {'active' if label == 'public' else 'draft'}", + "policy:", + f" labels: [{label}]", + "---", + "", + f"# Capacity Source {index}", + "", + "## Decision", + "", + f"{marker} uses Markitect context packaging for generated source {index}.", + "", + ] + ) + + +def _timed(operation): + start = time.perf_counter() + value = operation() + return time.perf_counter() - start, value + + +def _assert_within(name: str, elapsed: float, *, seconds: float) -> None: + assert elapsed <= seconds, f"{name} took {elapsed:.3f}s, expected <= {seconds:.3f}s" diff --git a/tests/test_markitect_tool_contract.py b/tests/test_markitect_tool_contract.py index 0cd327b..69d5219 100644 --- a/tests/test_markitect_tool_contract.py +++ b/tests/test_markitect_tool_contract.py @@ -1,48 +1,39 @@ +import importlib.util from pathlib import Path import pytest pytestmark = [pytest.mark.integration, pytest.mark.markitect_tool] +if importlib.util.find_spec("markitect_tool") is None: + pytestmark.append( + pytest.mark.skip( + reason="Install kontextual-engine[markdown] to run markitect-tool contract tests." + ) + ) + mkt = None +else: + import markitect_tool as mkt -mkt = pytest.importorskip( - "markitect_tool", - reason="Install kontextual-engine[markdown] to run markitect-tool contract tests.", -) - - -SAMPLE_MARKDOWN = """--- -document_type: decision -status: accepted -policy: - labels: [public] ---- - -# Engine Boundary - -## Context - -The engine needs Markdown-native structure without owning a Markdown parser. - -## Decision - -Use markitect-tool as the syntax and deterministic operations layer. - -## Consequences - -- Engine assets stay cross-format. -- Markdown selectors stay Markitect-owned. -""" +EXAMPLE_ROOT = Path(__file__).resolve().parents[1] / "examples" / "markitect-tool-contract" +ADR = EXAMPLE_ROOT / "corpus" / "adr-0001-context-packages.md" +INVALID_ADR = EXAMPLE_ROOT / "corpus" / "adr-invalid-missing-decision.md" +POLICY = EXAMPLE_ROOT / "corpus" / "engineering-policy.md" +INTERNAL = EXAMPLE_ROOT / "corpus" / "internal-risk-note.md" +BUNDLE = EXAMPLE_ROOT / "composition" / "context-bundle.md" +MANIFEST = EXAMPLE_ROOT / "manifests" / "agent-context.yaml" +CONTRACT = EXAMPLE_ROOT / "contracts" / "decision-record.contract.md" def test_markitect_parser_returns_structured_markdown_document() -> None: - document = mkt.parse_markdown(SAMPLE_MARKDOWN, source_path="docs/decision.md") + document = mkt.parse_markdown_file(ADR) serialized = document.to_dict() assert serialized["frontmatter"]["status"] == "accepted" - assert serialized["source_path"] == "docs/decision.md" + assert serialized["frontmatter"]["owner"] == "Platform Knowledge" + assert serialized["source_path"] == str(ADR) assert [heading["text"] for heading in serialized["headings"]] == [ - "Engine Boundary", + "Use Markitect Context Packages", "Context", "Decision", "Consequences", @@ -51,80 +42,93 @@ def test_markitect_parser_returns_structured_markdown_document() -> None: def test_markitect_selectors_extract_source_grounded_markdown_units() -> None: - document = mkt.parse_markdown(SAMPLE_MARKDOWN) + document = mkt.parse_markdown_file(ADR) + status = mkt.extract_document(document, "frontmatter.status") matches = mkt.query_document(document, "sections[heading=Decision]") extracted = mkt.extract_document(document, "sections[heading=Decision]") + bullets = mkt.query_document(document, "blocks[type=bullet_list]") + assert status == ["accepted"] assert len(matches) == 1 assert matches[0].kind == "section" assert matches[0].line is not None - assert "deterministic operations layer" in matches[0].text + assert "context-package layer" in matches[0].text assert extracted == [ - "## Decision\n\nUse markitect-tool as the syntax and deterministic operations layer." + "## Decision\n\n" + "We will use markitect-tool as the Markdown syntax, selector, deterministic\n" + "operation, snapshot, and context-package layer for Markdown-backed assets." ] + assert len(bullets) == 1 + assert "Engine assets stay cross-format" in bullets[0].text -def test_markitect_ops_resolve_includes_transform_and_return_provenance(tmp_path: Path) -> None: - partial = tmp_path / "partial.md" - partial.write_text( - "# Included\n\n## Decision\n\nReuse Markitect operations.\n", - encoding="utf-8", - ) - +def test_markitect_ops_compose_include_transform_and_return_provenance() -> None: included = mkt.resolve_includes( - '{{include:partial.md}}', - base_dir=tmp_path, + BUNDLE.read_text(encoding="utf-8"), + base_dir=EXAMPLE_ROOT, + current_path=BUNDLE, + ) + composed = mkt.compose_files( + [ADR, POLICY], + title="Combined Markdown Context", + heading_delta=1, ) transformed = mkt.transform_markdown( included.markdown, - set_frontmatter={"status": "draft"}, + set_frontmatter={"status": "draft", "producer": {"name": "kontextual-engine"}}, heading_delta=1, - source_path="composed.md", + source_path=str(BUNDLE), ) - assert included.included_paths == [str(partial.resolve())] - assert included.provenance[0].operation == "include" - assert included.provenance[0].target_path == str(partial.resolve()) - assert "status: draft" in transformed.markdown - assert "## Included" in transformed.markdown - assert "### Decision" in transformed.markdown + assert included.included_paths == [str(ADR.resolve()), str(POLICY.resolve())] + assert [event.operation for event in included.provenance] == ["include", "include"] + assert included.provenance[1].metadata["selector"] == "sections[heading=Controls]" + assert "### Controls" in included.markdown + assert "{{include:../corpus/internal-risk-note.md}}" in included.markdown + assert "This internal note should not appear" not in included.markdown + assert composed.markdown.startswith("# Combined Markdown Context") + assert "## Use Markitect Context Packages" in composed.markdown + assert "document_type: adr" not in composed.markdown + assert "producer:" in transformed.markdown assert [event.operation for event in transformed.provenance] == [ "set_frontmatter", "shift_headings", ] -def test_markitect_snapshot_identity_is_content_addressed_adapter_metadata(tmp_path: Path) -> None: - source = tmp_path / "decision.md" - source.write_text(SAMPLE_MARKDOWN, encoding="utf-8") - - first = mkt.snapshot_identity_for_file(source, parse_options={"profile": "default"}) - second = mkt.snapshot_identity_for_file(source, parse_options={"profile": "default"}) - changed = mkt.snapshot_identity_for_file(source, parse_options={"profile": "strict"}) +def test_markitect_snapshot_identity_is_content_addressed_adapter_metadata() -> None: + first = mkt.snapshot_identity_for_file(ADR, parse_options={"profile": "default"}) + second = mkt.snapshot_identity_for_file(ADR, parse_options={"profile": "default"}) + changed = mkt.snapshot_identity_for_file(ADR, parse_options={"profile": "strict"}) assert first.snapshot_id == second.snapshot_id assert first.content_hash == second.content_hash assert first.parser == "markdown-it-py/commonmark" assert first.snapshot_id != changed.snapshot_id - assert first.to_dict()["source_path"] == str(source) + assert first.to_dict()["source_path"] == str(ADR) -def test_markitect_context_packages_filter_by_local_policy(tmp_path: Path) -> None: - public = tmp_path / "public.md" - private = tmp_path / "private.md" - public.write_text( - "---\npolicy:\n labels: [public]\n---\n# Public\n\nVisible context.\n", - encoding="utf-8", - ) - private.write_text( - "---\npolicy:\n labels: [internal]\n---\n# Private\n\nHidden context.\n", - encoding="utf-8", - ) +def test_markitect_context_packages_from_manifest_preserve_sources() -> None: + package = mkt.create_context_package_from_manifest(MANIFEST, root=EXAMPLE_ROOT) + activation = mkt.activate_context_package(package, target="thread:contract-test") + + assert package.title == "Kontextual Engine Markdown Adapter Context" + assert package.namespace.project == "kontextual-engine" + assert [item.source.path for item in package.items] == [ + "corpus/adr-0001-context-packages.md", + "corpus/engineering-policy.md", + ] + assert "Markdown-backed assets" in activation.content + assert "source paths" in activation.content + assert activation.metadata["package_title"] == package.title + + +def test_markitect_context_packages_filter_by_local_policy() -> None: package = mkt.create_context_package_from_sources( "document", - [public, private], - root=tmp_path, + [ADR, INTERNAL], + root=EXAMPLE_ROOT, namespace=mkt.MemoryNamespace(project="kontextual-engine", task="boundary"), budget=mkt.ContextBudget(max_items=5), ) @@ -133,7 +137,7 @@ def test_markitect_context_packages_filter_by_local_policy(tmp_path: Path) -> No "id": "kontextual-engine-boundary", "subjects": { "reader": { - "allowed_labels": ["public"], + "allowed_labels": ["public", "engineering"], "allowed_actions": ["read", "activate"], } }, @@ -149,6 +153,20 @@ def test_markitect_context_packages_filter_by_local_policy(tmp_path: Path) -> No assert package.namespace.project == "kontextual-engine" assert len(activation.items) == 1 - assert "Visible context" in activation.content - assert "Hidden context" not in activation.content + assert "Use Markitect Context Packages" in activation.content + assert "Internal Retrieval Risk" not in activation.content assert activation.policy["summary"]["denied"] == 1 + + +def test_markitect_document_contracts_accept_valid_and_report_invalid_documents() -> None: + contract = mkt.load_contract_file(CONTRACT) + valid = mkt.check_markdown_file(ADR, CONTRACT) + invalid = mkt.check_markdown_file(INVALID_ADR, CONTRACT) + invalid_codes = {diagnostic.code for diagnostic in invalid.diagnostics} + + assert contract.id == "kontextual-decision-record-v1" + assert valid.valid is True + assert valid.diagnostics == [] + assert invalid.valid is False + assert "contract.section.missing" in invalid_codes + assert "contract.section.forbidden" in invalid_codes