marki-docx/tests/regression/test_corpus_regression.py

"""Corpus regression test — markidocx-docs (FR-1101 through FR-1110).

Runs the release-regression workflow against the real product documentation
corpus (corpus/markidocx-docs/manifest.yaml). This test validates that the
markidocx specs themselves survive a round-trip within documented tolerance.
"""

from __future__ import annotations

from pathlib import Path

import pytest

from markidocx.workflows import run_workflow

REPO_ROOT = Path(__file__).parent.parent.parent
CORPUS_MANIFEST = REPO_ROOT / "corpus" / "markidocx-docs" / "manifest.yaml"

# Drift tolerance per known-drift.md: 902 elements preserved, ~71 broken.
# Gate: preserve at least 800 elements; broken must not exceed 150.
MIN_PRESERVED = 800
MAX_BROKEN = 150


@pytest.mark.skipif(
    not CORPUS_MANIFEST.exists(),
    reason="Corpus manifest not found — run from repo root after WP-0004-T01",
)
class TestCorpusRegression:
    def test_workflow_completes(self) -> None:
        """release-regression workflow must not be classified as 'failed' (FR-1102)."""
        result = run_workflow("release-regression", CORPUS_MANIFEST)
        assert result.classification != "failed", (
            f"release-regression workflow failed: {result.aggregate_output}"
        )

    def test_workflow_classification_acceptable(self) -> None:
        """Workflow result must be 'full' or 'with-fallback' (FR-1305)."""
        result = run_workflow("release-regression", CORPUS_MANIFEST)
        assert result.classification in {"full", "with-fallback"}, (
            f"Unexpected classification: {result.classification}"
        )

    def test_all_steps_executed(self) -> None:
        """All four steps must be executed (FR-1304): validate, build, import, compare."""
        result = run_workflow("release-regression", CORPUS_MANIFEST)
        executed = {s.name for s in result.steps if s.status == "executed"}
        for step in ("validate", "build", "import", "compare"):
            assert step in executed, f"Step '{step}' was not executed"

    def test_corpus_identity_disclosed(self) -> None:
        """Workflow result must carry corpus_id with manifest_path and git_sha (FR-1109)."""
        result = run_workflow("release-regression", CORPUS_MANIFEST)
        corpus_id = result.aggregate_output.get("corpus_id")
        assert corpus_id is not None, "corpus_id missing from aggregate_output"
        assert "manifest_path" in corpus_id, "corpus_id missing manifest_path"
        assert "git_sha" in corpus_id, "corpus_id missing git_sha"

    def test_evidence_artefacts_written(self) -> None:
        """Evidence store must contain build, import, and drift reports (FR-1107, FR-1110)."""
        from markidocx.evidence import EvidenceStore

        store = EvidenceStore()
        result = run_workflow("release-regression", CORPUS_MANIFEST, evidence_store=store)
        reports = store.list_reports(result.run_id)
        report_types = {r.report_type for r in reports}
        for expected in ("build", "import", "drift"):
            assert expected in report_types, (
                f"Evidence missing '{expected}' report. Found: {report_types}"
            )

    def test_drift_within_tolerance(self) -> None:
        """Structural drift must stay within documented tolerance from known-drift.md."""
        result = run_workflow("release-regression", CORPUS_MANIFEST)
        compare_step = next(
            (s for s in result.steps if s.name == "compare"), None
        )
        assert compare_step is not None, "compare step not found"
        output = compare_step.output or {}
        preserved = output.get("preserved", [])
        broken = output.get("broken", [])
        n_preserved = preserved if isinstance(preserved, int) else len(preserved)
        n_broken = broken if isinstance(broken, int) else len(broken)
        assert n_preserved >= MIN_PRESERVED, (
            f"Preserved elements ({n_preserved}) below tolerance ({MIN_PRESERVED})"
        )
        assert n_broken <= MAX_BROKEN, (
            f"Broken elements ({n_broken}) exceeds tolerance ({MAX_BROKEN})"
        )