"""Corpus regression test — markidocx-docs (FR-1101 through FR-1110). Runs the release-regression workflow against the real product documentation corpus (corpus/markidocx-docs/manifest.yaml). This test validates that the markidocx specs themselves survive a round-trip within documented tolerance. """ from __future__ import annotations from pathlib import Path import pytest from markidocx.workflows import run_workflow REPO_ROOT = Path(__file__).parent.parent.parent CORPUS_MANIFEST = REPO_ROOT / "corpus" / "markidocx-docs" / "manifest.yaml" # Drift tolerance per known-drift.md: 902 elements preserved, ~71 broken. # Gate: preserve at least 800 elements; broken must not exceed 150. MIN_PRESERVED = 800 MAX_BROKEN = 150 @pytest.mark.skipif( not CORPUS_MANIFEST.exists(), reason="Corpus manifest not found — run from repo root after WP-0004-T01", ) class TestCorpusRegression: def test_workflow_completes(self) -> None: """release-regression workflow must not be classified as 'failed' (FR-1102).""" result = run_workflow("release-regression", CORPUS_MANIFEST) assert result.classification != "failed", ( f"release-regression workflow failed: {result.aggregate_output}" ) def test_workflow_classification_acceptable(self) -> None: """Workflow result must be 'full' or 'with-fallback' (FR-1305).""" result = run_workflow("release-regression", CORPUS_MANIFEST) assert result.classification in {"full", "with-fallback"}, ( f"Unexpected classification: {result.classification}" ) def test_all_steps_executed(self) -> None: """All four steps must be executed (FR-1304): validate, build, import, compare.""" result = run_workflow("release-regression", CORPUS_MANIFEST) executed = {s.name for s in result.steps if s.status == "executed"} for step in ("validate", "build", "import", "compare"): assert step in executed, f"Step '{step}' was not executed" def test_corpus_identity_disclosed(self) -> None: """Workflow result must carry corpus_id with manifest_path and git_sha (FR-1109).""" result = run_workflow("release-regression", CORPUS_MANIFEST) corpus_id = result.aggregate_output.get("corpus_id") assert corpus_id is not None, "corpus_id missing from aggregate_output" assert "manifest_path" in corpus_id, "corpus_id missing manifest_path" assert "git_sha" in corpus_id, "corpus_id missing git_sha" def test_evidence_artefacts_written(self) -> None: """Evidence store must contain build, import, and drift reports (FR-1107, FR-1110).""" from markidocx.evidence import EvidenceStore store = EvidenceStore() result = run_workflow("release-regression", CORPUS_MANIFEST, evidence_store=store) reports = store.list_reports(result.run_id) report_types = {r.report_type for r in reports} for expected in ("build", "import", "drift"): assert expected in report_types, ( f"Evidence missing '{expected}' report. Found: {report_types}" ) def test_drift_within_tolerance(self) -> None: """Structural drift must stay within documented tolerance from known-drift.md.""" result = run_workflow("release-regression", CORPUS_MANIFEST) compare_step = next( (s for s in result.steps if s.name == "compare"), None ) assert compare_step is not None, "compare step not found" output = compare_step.output or {} preserved = output.get("preserved", []) broken = output.get("broken", []) n_preserved = preserved if isinstance(preserved, int) else len(preserved) n_broken = broken if isinstance(broken, int) else len(broken) assert n_preserved >= MIN_PRESERVED, ( f"Preserved elements ({n_preserved}) below tolerance ({MIN_PRESERVED})" ) assert n_broken <= MAX_BROKEN, ( f"Broken elements ({n_broken}) exceeds tolerance ({MAX_BROKEN})" )