generated from coulomb/repo-seed
250 lines
8.2 KiB
Python
250 lines
8.2 KiB
Python
import importlib.util
|
|
import os
|
|
import time
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
|
|
pytestmark = [pytest.mark.integration, pytest.mark.markitect_tool, pytest.mark.capacity]
|
|
if importlib.util.find_spec("markitect_tool") is None:
|
|
pytestmark.append(
|
|
pytest.mark.skip(
|
|
reason="Install kontextual-engine[markdown] to run markitect-tool capacity tests."
|
|
)
|
|
)
|
|
mkt = None
|
|
elif os.environ.get("KONTEXTUAL_RUN_CAPACITY", "").lower() not in {"1", "true", "yes"}:
|
|
pytestmark.append(
|
|
pytest.mark.skip(
|
|
reason="Set KONTEXTUAL_RUN_CAPACITY=1 to run opt-in capacity sentinels."
|
|
)
|
|
)
|
|
mkt = None
|
|
else:
|
|
import markitect_tool as mkt
|
|
|
|
|
|
def test_large_markdown_parse_query_and_extract_capacity() -> None:
|
|
markdown = _large_decision_markdown(section_count=650)
|
|
|
|
elapsed, document = _timed(lambda: mkt.parse_markdown(markdown, source_path="large.md"))
|
|
_assert_within("parse 650-section markdown", elapsed, seconds=6.0)
|
|
|
|
elapsed, matches = _timed(lambda: mkt.query_document(document, "sections[heading=Decision 640]"))
|
|
_assert_within("query exact section in 650-section markdown", elapsed, seconds=2.0)
|
|
|
|
elapsed, extracted = _timed(lambda: mkt.extract_document(document, "sections[heading=Decision 640]"))
|
|
_assert_within("extract exact section in 650-section markdown", elapsed, seconds=2.0)
|
|
|
|
assert len(document.sections) == 651
|
|
assert len(document.headings) == 651
|
|
assert len(matches) == 1
|
|
assert "CAPACITY-MARKER-640" in extracted[0]
|
|
|
|
|
|
def test_repeated_selectors_over_large_document_capacity() -> None:
|
|
document = mkt.parse_markdown(_large_decision_markdown(section_count=420))
|
|
selectors = [
|
|
"frontmatter.status",
|
|
"headings[level=2]",
|
|
"blocks[type=bullet_list]",
|
|
"sections[contains~=CAPACITY-MARKER-120]",
|
|
"sections[heading=Decision 240]",
|
|
"metrics.document.sections",
|
|
]
|
|
|
|
def run_queries() -> list[int]:
|
|
counts = []
|
|
for _ in range(12):
|
|
for selector in selectors:
|
|
counts.append(len(mkt.query_document(document, selector)))
|
|
return counts
|
|
|
|
elapsed, counts = _timed(run_queries)
|
|
|
|
_assert_within("72 selector queries over 420-section markdown", elapsed, seconds=5.0)
|
|
assert min(counts) >= 1
|
|
assert max(counts) >= 420
|
|
|
|
|
|
def test_include_fanout_compose_and_transform_capacity(tmp_path: Path) -> None:
|
|
partials = []
|
|
for index in range(90):
|
|
partial = tmp_path / f"partial-{index:03}.md"
|
|
partial.write_text(_partial_markdown(index), encoding="utf-8")
|
|
partials.append(partial)
|
|
bundle = tmp_path / "bundle.md"
|
|
bundle.write_text(
|
|
"\n".join(
|
|
f'<!-- mkt:include path="{partial.name}" selector="sections[heading=Include Target]" heading_delta="1" -->'
|
|
for partial in partials
|
|
),
|
|
encoding="utf-8",
|
|
)
|
|
|
|
elapsed, included = _timed(
|
|
lambda: mkt.resolve_includes(
|
|
bundle.read_text(encoding="utf-8"),
|
|
base_dir=tmp_path,
|
|
current_path=bundle,
|
|
)
|
|
)
|
|
_assert_within("resolve 90 include fan-out bundle", elapsed, seconds=8.0)
|
|
|
|
elapsed, composed = _timed(lambda: mkt.compose_files(partials, title="Capacity Bundle", heading_delta=1))
|
|
_assert_within("compose 90 markdown partials", elapsed, seconds=5.0)
|
|
|
|
elapsed, transformed = _timed(
|
|
lambda: mkt.transform_markdown(
|
|
included.markdown,
|
|
set_frontmatter={"status": "capacity-check"},
|
|
heading_delta=1,
|
|
source_path=str(bundle),
|
|
)
|
|
)
|
|
_assert_within("transform resolved include fan-out bundle", elapsed, seconds=5.0)
|
|
|
|
assert len(included.included_paths) == 90
|
|
assert "### Include Target" in included.markdown
|
|
assert composed.markdown.startswith("# Capacity Bundle")
|
|
assert "status: capacity-check" in transformed.markdown
|
|
|
|
|
|
def test_context_package_many_sources_policy_filtering_capacity(tmp_path: Path) -> None:
|
|
sources = []
|
|
for index in range(140):
|
|
source = tmp_path / f"source-{index:03}.md"
|
|
label = "public" if index % 2 == 0 else "internal"
|
|
source.write_text(_context_source_markdown(index, label), encoding="utf-8")
|
|
sources.append(source)
|
|
gateway = mkt.LocalLabelPolicyGateway(
|
|
{
|
|
"id": "capacity-policy",
|
|
"subjects": {
|
|
"reader": {
|
|
"allowed_labels": ["public"],
|
|
"allowed_actions": ["read", "activate"],
|
|
}
|
|
},
|
|
"default_subject": "reader",
|
|
}
|
|
)
|
|
|
|
elapsed, package = _timed(
|
|
lambda: mkt.create_context_package_from_sources(
|
|
"sections[heading=Decision]",
|
|
sources,
|
|
root=tmp_path,
|
|
namespace=mkt.MemoryNamespace(project="kontextual-engine", task="capacity"),
|
|
budget=mkt.ContextBudget(max_items=160),
|
|
)
|
|
)
|
|
_assert_within("create context package from 140 markdown sources", elapsed, seconds=12.0)
|
|
|
|
elapsed, activation = _timed(
|
|
lambda: mkt.activate_context_package(
|
|
package,
|
|
policy_gateway=gateway,
|
|
subject="reader",
|
|
)
|
|
)
|
|
_assert_within("activate and policy-filter 140-source context package", elapsed, seconds=6.0)
|
|
|
|
assert len(package.items) == 140
|
|
assert len(activation.items) == 70
|
|
assert "PUBLIC-CAPACITY-000" in activation.content
|
|
assert "INTERNAL-CAPACITY-001" not in activation.content
|
|
assert activation.policy["summary"]["denied"] == 70
|
|
|
|
|
|
def test_snapshot_identity_many_files_capacity(tmp_path: Path) -> None:
|
|
paths = []
|
|
for index in range(120):
|
|
path = tmp_path / f"snapshot-{index:03}.md"
|
|
path.write_text(_context_source_markdown(index, "public"), encoding="utf-8")
|
|
paths.append(path)
|
|
|
|
elapsed, identities = _timed(lambda: [mkt.snapshot_identity_for_file(path) for path in paths])
|
|
_assert_within("compute 120 markdown snapshot identities", elapsed, seconds=4.0)
|
|
|
|
assert len({identity.snapshot_id for identity in identities}) == 120
|
|
assert all(identity.content_hash.startswith("sha256:") for identity in identities)
|
|
|
|
|
|
def _large_decision_markdown(section_count: int) -> str:
|
|
sections = [
|
|
"---",
|
|
"document_type: capacity-fixture",
|
|
"status: active",
|
|
"owner: Platform Knowledge",
|
|
"---",
|
|
"",
|
|
"# Capacity Fixture",
|
|
"",
|
|
]
|
|
for index in range(section_count):
|
|
sections.extend(
|
|
[
|
|
f"## Decision {index}",
|
|
"",
|
|
(
|
|
f"CAPACITY-MARKER-{index} records a synthetic decision section "
|
|
"with enough text to exercise parsing, selector matching, and extraction."
|
|
),
|
|
"",
|
|
"- Parser shape must stay stable.",
|
|
"- Selector scans must remain bounded enough for adapter use.",
|
|
"",
|
|
]
|
|
)
|
|
return "\n".join(sections)
|
|
|
|
|
|
def _partial_markdown(index: int) -> str:
|
|
return "\n".join(
|
|
[
|
|
f"# Partial {index}",
|
|
"",
|
|
"## Include Target",
|
|
"",
|
|
f"Included capacity text {index}.",
|
|
"",
|
|
"## Ignore",
|
|
"",
|
|
"This section should not be selected by the include resolver.",
|
|
"",
|
|
]
|
|
)
|
|
|
|
|
|
def _context_source_markdown(index: int, label: str) -> str:
|
|
marker = f"{label.upper()}-CAPACITY-{index:03}"
|
|
return "\n".join(
|
|
[
|
|
"---",
|
|
"document_type: capacity-source",
|
|
f"status: {'active' if label == 'public' else 'draft'}",
|
|
"policy:",
|
|
f" labels: [{label}]",
|
|
"---",
|
|
"",
|
|
f"# Capacity Source {index}",
|
|
"",
|
|
"## Decision",
|
|
"",
|
|
f"{marker} uses Markitect context packaging for generated source {index}.",
|
|
"",
|
|
]
|
|
)
|
|
|
|
|
|
def _timed(operation):
|
|
start = time.perf_counter()
|
|
value = operation()
|
|
return time.perf_counter() - start, value
|
|
|
|
|
|
def _assert_within(name: str, elapsed: float, *, seconds: float) -> None:
|
|
assert elapsed <= seconds, f"{name} took {elapsed:.3f}s, expected <= {seconds:.3f}s"
|