Files
marki-docx/tests/regression/test_corpus_regression.py
Bernd Worsch ebc5eaee77 feat: WP-0004 T01-T04 — stable corpus, ADRs, regression test
- corpus/markidocx-docs/manifest.yaml: specs as live markidocx project (FR-1101)
- corpus/markidocx-docs/known-drift.md: documented structural drift
- workflows.py: release-regression accepts manifest path; emits corpus_id (FR-1109)
- tests/regression/test_corpus_regression.py: corpus regression suite (FR-1102–1110)
- architecture/ADR-002: python-docx as conversion engine
- architecture/ADR-003: manifest YAML schema
- workplans/MRKD-WP-0004: T01–T04 done; T05 blocked (SBOM path mapping needed)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-16 17:48:33 +00:00

90 lines
4.0 KiB
Python

"""Corpus regression test — markidocx-docs (FR-1101 through FR-1110).
Runs the release-regression workflow against the real product documentation
corpus (corpus/markidocx-docs/manifest.yaml). This test validates that the
markidocx specs themselves survive a round-trip within documented tolerance.
"""
from __future__ import annotations
from pathlib import Path
import pytest
from markidocx.workflows import run_workflow
REPO_ROOT = Path(__file__).parent.parent.parent
CORPUS_MANIFEST = REPO_ROOT / "corpus" / "markidocx-docs" / "manifest.yaml"
# Drift tolerance per known-drift.md: 902 elements preserved, ~71 broken.
# Gate: preserve at least 800 elements; broken must not exceed 150.
MIN_PRESERVED = 800
MAX_BROKEN = 150
@pytest.mark.skipif(
not CORPUS_MANIFEST.exists(),
reason="Corpus manifest not found — run from repo root after WP-0004-T01",
)
class TestCorpusRegression:
def test_workflow_completes(self) -> None:
"""release-regression workflow must not be classified as 'failed' (FR-1102)."""
result = run_workflow("release-regression", CORPUS_MANIFEST)
assert result.classification != "failed", (
f"release-regression workflow failed: {result.aggregate_output}"
)
def test_workflow_classification_acceptable(self) -> None:
"""Workflow result must be 'full' or 'with-fallback' (FR-1305)."""
result = run_workflow("release-regression", CORPUS_MANIFEST)
assert result.classification in {"full", "with-fallback"}, (
f"Unexpected classification: {result.classification}"
)
def test_all_steps_executed(self) -> None:
"""All four steps must be executed (FR-1304): validate, build, import, compare."""
result = run_workflow("release-regression", CORPUS_MANIFEST)
executed = {s.name for s in result.steps if s.status == "executed"}
for step in ("validate", "build", "import", "compare"):
assert step in executed, f"Step '{step}' was not executed"
def test_corpus_identity_disclosed(self) -> None:
"""Workflow result must carry corpus_id with manifest_path and git_sha (FR-1109)."""
result = run_workflow("release-regression", CORPUS_MANIFEST)
corpus_id = result.aggregate_output.get("corpus_id")
assert corpus_id is not None, "corpus_id missing from aggregate_output"
assert "manifest_path" in corpus_id, "corpus_id missing manifest_path"
assert "git_sha" in corpus_id, "corpus_id missing git_sha"
def test_evidence_artefacts_written(self) -> None:
"""Evidence store must contain build, import, and drift reports (FR-1107, FR-1110)."""
from markidocx.evidence import EvidenceStore
store = EvidenceStore()
result = run_workflow("release-regression", CORPUS_MANIFEST, evidence_store=store)
reports = store.list_reports(result.run_id)
report_types = {r.report_type for r in reports}
for expected in ("build", "import", "drift"):
assert expected in report_types, (
f"Evidence missing '{expected}' report. Found: {report_types}"
)
def test_drift_within_tolerance(self) -> None:
"""Structural drift must stay within documented tolerance from known-drift.md."""
result = run_workflow("release-regression", CORPUS_MANIFEST)
compare_step = next(
(s for s in result.steps if s.name == "compare"), None
)
assert compare_step is not None, "compare step not found"
output = compare_step.output or {}
preserved = output.get("preserved", [])
broken = output.get("broken", [])
n_preserved = preserved if isinstance(preserved, int) else len(preserved)
n_broken = broken if isinstance(broken, int) else len(broken)
assert n_preserved >= MIN_PRESERVED, (
f"Preserved elements ({n_preserved}) below tolerance ({MIN_PRESERVED})"
)
assert n_broken <= MAX_BROKEN, (
f"Broken elements ({n_broken}) exceeds tolerance ({MAX_BROKEN})"
)