generated from coulomb/repo-seed
feat: WP-0004 T01-T04 — stable corpus, ADRs, regression test
- corpus/markidocx-docs/manifest.yaml: specs as live markidocx project (FR-1101) - corpus/markidocx-docs/known-drift.md: documented structural drift - workflows.py: release-regression accepts manifest path; emits corpus_id (FR-1109) - tests/regression/test_corpus_regression.py: corpus regression suite (FR-1102–1110) - architecture/ADR-002: python-docx as conversion engine - architecture/ADR-003: manifest YAML schema - workplans/MRKD-WP-0004: T01–T04 done; T05 blocked (SBOM path mapping needed) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
89
tests/regression/test_corpus_regression.py
Normal file
89
tests/regression/test_corpus_regression.py
Normal file
@@ -0,0 +1,89 @@
|
||||
"""Corpus regression test — markidocx-docs (FR-1101 through FR-1110).
|
||||
|
||||
Runs the release-regression workflow against the real product documentation
|
||||
corpus (corpus/markidocx-docs/manifest.yaml). This test validates that the
|
||||
markidocx specs themselves survive a round-trip within documented tolerance.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from markidocx.workflows import run_workflow
|
||||
|
||||
REPO_ROOT = Path(__file__).parent.parent.parent
|
||||
CORPUS_MANIFEST = REPO_ROOT / "corpus" / "markidocx-docs" / "manifest.yaml"
|
||||
|
||||
# Drift tolerance per known-drift.md: 902 elements preserved, ~71 broken.
|
||||
# Gate: preserve at least 800 elements; broken must not exceed 150.
|
||||
MIN_PRESERVED = 800
|
||||
MAX_BROKEN = 150
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not CORPUS_MANIFEST.exists(),
|
||||
reason="Corpus manifest not found — run from repo root after WP-0004-T01",
|
||||
)
|
||||
class TestCorpusRegression:
|
||||
def test_workflow_completes(self) -> None:
|
||||
"""release-regression workflow must not be classified as 'failed' (FR-1102)."""
|
||||
result = run_workflow("release-regression", CORPUS_MANIFEST)
|
||||
assert result.classification != "failed", (
|
||||
f"release-regression workflow failed: {result.aggregate_output}"
|
||||
)
|
||||
|
||||
def test_workflow_classification_acceptable(self) -> None:
|
||||
"""Workflow result must be 'full' or 'with-fallback' (FR-1305)."""
|
||||
result = run_workflow("release-regression", CORPUS_MANIFEST)
|
||||
assert result.classification in {"full", "with-fallback"}, (
|
||||
f"Unexpected classification: {result.classification}"
|
||||
)
|
||||
|
||||
def test_all_steps_executed(self) -> None:
|
||||
"""All four steps must be executed (FR-1304): validate, build, import, compare."""
|
||||
result = run_workflow("release-regression", CORPUS_MANIFEST)
|
||||
executed = {s.name for s in result.steps if s.status == "executed"}
|
||||
for step in ("validate", "build", "import", "compare"):
|
||||
assert step in executed, f"Step '{step}' was not executed"
|
||||
|
||||
def test_corpus_identity_disclosed(self) -> None:
|
||||
"""Workflow result must carry corpus_id with manifest_path and git_sha (FR-1109)."""
|
||||
result = run_workflow("release-regression", CORPUS_MANIFEST)
|
||||
corpus_id = result.aggregate_output.get("corpus_id")
|
||||
assert corpus_id is not None, "corpus_id missing from aggregate_output"
|
||||
assert "manifest_path" in corpus_id, "corpus_id missing manifest_path"
|
||||
assert "git_sha" in corpus_id, "corpus_id missing git_sha"
|
||||
|
||||
def test_evidence_artefacts_written(self) -> None:
|
||||
"""Evidence store must contain build, import, and drift reports (FR-1107, FR-1110)."""
|
||||
from markidocx.evidence import EvidenceStore
|
||||
|
||||
store = EvidenceStore()
|
||||
result = run_workflow("release-regression", CORPUS_MANIFEST, evidence_store=store)
|
||||
reports = store.list_reports(result.run_id)
|
||||
report_types = {r.report_type for r in reports}
|
||||
for expected in ("build", "import", "drift"):
|
||||
assert expected in report_types, (
|
||||
f"Evidence missing '{expected}' report. Found: {report_types}"
|
||||
)
|
||||
|
||||
def test_drift_within_tolerance(self) -> None:
|
||||
"""Structural drift must stay within documented tolerance from known-drift.md."""
|
||||
result = run_workflow("release-regression", CORPUS_MANIFEST)
|
||||
compare_step = next(
|
||||
(s for s in result.steps if s.name == "compare"), None
|
||||
)
|
||||
assert compare_step is not None, "compare step not found"
|
||||
output = compare_step.output or {}
|
||||
preserved = output.get("preserved", [])
|
||||
broken = output.get("broken", [])
|
||||
n_preserved = preserved if isinstance(preserved, int) else len(preserved)
|
||||
n_broken = broken if isinstance(broken, int) else len(broken)
|
||||
assert n_preserved >= MIN_PRESERVED, (
|
||||
f"Preserved elements ({n_preserved}) below tolerance ({MIN_PRESERVED})"
|
||||
)
|
||||
assert n_broken <= MAX_BROKEN, (
|
||||
f"Broken elements ({n_broken}) exceeds tolerance ({MAX_BROKEN})"
|
||||
)
|
||||
Reference in New Issue
Block a user