import importlib.util import os import time from pathlib import Path import pytest pytestmark = [pytest.mark.integration, pytest.mark.markitect_tool, pytest.mark.capacity] if importlib.util.find_spec("markitect_tool") is None: pytestmark.append( pytest.mark.skip( reason="Install kontextual-engine[markdown] to run markitect-tool capacity tests." ) ) mkt = None elif os.environ.get("KONTEXTUAL_RUN_CAPACITY", "").lower() not in {"1", "true", "yes"}: pytestmark.append( pytest.mark.skip( reason="Set KONTEXTUAL_RUN_CAPACITY=1 to run opt-in capacity sentinels." ) ) mkt = None else: import markitect_tool as mkt def test_large_markdown_parse_query_and_extract_capacity() -> None: markdown = _large_decision_markdown(section_count=650) elapsed, document = _timed(lambda: mkt.parse_markdown(markdown, source_path="large.md")) _assert_within("parse 650-section markdown", elapsed, seconds=6.0) elapsed, matches = _timed(lambda: mkt.query_document(document, "sections[heading=Decision 640]")) _assert_within("query exact section in 650-section markdown", elapsed, seconds=2.0) elapsed, extracted = _timed(lambda: mkt.extract_document(document, "sections[heading=Decision 640]")) _assert_within("extract exact section in 650-section markdown", elapsed, seconds=2.0) assert len(document.sections) == 651 assert len(document.headings) == 651 assert len(matches) == 1 assert "CAPACITY-MARKER-640" in extracted[0] def test_repeated_selectors_over_large_document_capacity() -> None: document = mkt.parse_markdown(_large_decision_markdown(section_count=420)) selectors = [ "frontmatter.status", "headings[level=2]", "blocks[type=bullet_list]", "sections[contains~=CAPACITY-MARKER-120]", "sections[heading=Decision 240]", "metrics.document.sections", ] def run_queries() -> list[int]: counts = [] for _ in range(12): for selector in selectors: counts.append(len(mkt.query_document(document, selector))) return counts elapsed, counts = _timed(run_queries) _assert_within("72 selector queries over 420-section markdown", elapsed, seconds=5.0) assert min(counts) >= 1 assert max(counts) >= 420 def test_include_fanout_compose_and_transform_capacity(tmp_path: Path) -> None: partials = [] for index in range(90): partial = tmp_path / f"partial-{index:03}.md" partial.write_text(_partial_markdown(index), encoding="utf-8") partials.append(partial) bundle = tmp_path / "bundle.md" bundle.write_text( "\n".join( f'' for partial in partials ), encoding="utf-8", ) elapsed, included = _timed( lambda: mkt.resolve_includes( bundle.read_text(encoding="utf-8"), base_dir=tmp_path, current_path=bundle, ) ) _assert_within("resolve 90 include fan-out bundle", elapsed, seconds=8.0) elapsed, composed = _timed(lambda: mkt.compose_files(partials, title="Capacity Bundle", heading_delta=1)) _assert_within("compose 90 markdown partials", elapsed, seconds=5.0) elapsed, transformed = _timed( lambda: mkt.transform_markdown( included.markdown, set_frontmatter={"status": "capacity-check"}, heading_delta=1, source_path=str(bundle), ) ) _assert_within("transform resolved include fan-out bundle", elapsed, seconds=5.0) assert len(included.included_paths) == 90 assert "### Include Target" in included.markdown assert composed.markdown.startswith("# Capacity Bundle") assert "status: capacity-check" in transformed.markdown def test_context_package_many_sources_policy_filtering_capacity(tmp_path: Path) -> None: sources = [] for index in range(140): source = tmp_path / f"source-{index:03}.md" label = "public" if index % 2 == 0 else "internal" source.write_text(_context_source_markdown(index, label), encoding="utf-8") sources.append(source) gateway = mkt.LocalLabelPolicyGateway( { "id": "capacity-policy", "subjects": { "reader": { "allowed_labels": ["public"], "allowed_actions": ["read", "activate"], } }, "default_subject": "reader", } ) elapsed, package = _timed( lambda: mkt.create_context_package_from_sources( "sections[heading=Decision]", sources, root=tmp_path, namespace=mkt.MemoryNamespace(project="kontextual-engine", task="capacity"), budget=mkt.ContextBudget(max_items=160), ) ) _assert_within("create context package from 140 markdown sources", elapsed, seconds=12.0) elapsed, activation = _timed( lambda: mkt.activate_context_package( package, policy_gateway=gateway, subject="reader", ) ) _assert_within("activate and policy-filter 140-source context package", elapsed, seconds=6.0) assert len(package.items) == 140 assert len(activation.items) == 70 assert "PUBLIC-CAPACITY-000" in activation.content assert "INTERNAL-CAPACITY-001" not in activation.content assert activation.policy["summary"]["denied"] == 70 def test_snapshot_identity_many_files_capacity(tmp_path: Path) -> None: paths = [] for index in range(120): path = tmp_path / f"snapshot-{index:03}.md" path.write_text(_context_source_markdown(index, "public"), encoding="utf-8") paths.append(path) elapsed, identities = _timed(lambda: [mkt.snapshot_identity_for_file(path) for path in paths]) _assert_within("compute 120 markdown snapshot identities", elapsed, seconds=4.0) assert len({identity.snapshot_id for identity in identities}) == 120 assert all(identity.content_hash.startswith("sha256:") for identity in identities) def _large_decision_markdown(section_count: int) -> str: sections = [ "---", "document_type: capacity-fixture", "status: active", "owner: Platform Knowledge", "---", "", "# Capacity Fixture", "", ] for index in range(section_count): sections.extend( [ f"## Decision {index}", "", ( f"CAPACITY-MARKER-{index} records a synthetic decision section " "with enough text to exercise parsing, selector matching, and extraction." ), "", "- Parser shape must stay stable.", "- Selector scans must remain bounded enough for adapter use.", "", ] ) return "\n".join(sections) def _partial_markdown(index: int) -> str: return "\n".join( [ f"# Partial {index}", "", "## Include Target", "", f"Included capacity text {index}.", "", "## Ignore", "", "This section should not be selected by the include resolver.", "", ] ) def _context_source_markdown(index: int, label: str) -> str: marker = f"{label.upper()}-CAPACITY-{index:03}" return "\n".join( [ "---", "document_type: capacity-source", f"status: {'active' if label == 'public' else 'draft'}", "policy:", f" labels: [{label}]", "---", "", f"# Capacity Source {index}", "", "## Decision", "", f"{marker} uses Markitect context packaging for generated source {index}.", "", ] ) def _timed(operation): start = time.perf_counter() value = operation() return time.perf_counter() - start, value def _assert_within(name: str, elapsed: float, *, seconds: float) -> None: assert elapsed <= seconds, f"{name} took {elapsed:.3f}s, expected <= {seconds:.3f}s"