Files
markitect-main/tests/unit/prompts/test_halting_policy.py
tegwick 704272644c
Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
feat(prompts): implement Phase 7 - Quality & Validation (FR-9, FR-10)
Add quality gate framework with schema validation (JSON Schema via
jsonschema library), pattern validation (regex-based), multi-gate
QualityValidator with SQLite persistence, HaltingPolicyEngine with
budget/iteration/improvement checks, and RefinementLoop for iterative
execute-validate-halt cycles.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 13:31:37 +01:00

222 lines
7.6 KiB
Python

"""
Unit tests for HaltingPolicyEngine.
Tests halting decisions based on quality results, iteration limits,
marginal improvement, and resource budgets.
"""
import pytest
from markitect.prompts.quality.models import (
GateType,
HaltDecision,
QualityPolicy,
ValidationResult,
ValidationStatus,
)
from markitect.prompts.quality.policy import HaltingPolicyEngine
def _make_result(status=ValidationStatus.PASS, score=1.0, gate_id="gate-1"):
"""Helper to create a ValidationResult."""
return ValidationResult.create(
gate_id=gate_id,
gate_type=GateType.PATTERN,
artifact_id="art-1",
status=status,
score=score,
)
class TestQualityMetDecision:
"""Tests for quality met halting."""
def test_all_pass_halts_quality_met(self):
"""Test all gates passing triggers quality met halt."""
policy = QualityPolicy(max_iterations=5)
engine = HaltingPolicyEngine(policy)
results = [_make_result(ValidationStatus.PASS, 1.0)]
record = engine.evaluate(results, iteration=1)
assert record.decision == HaltDecision.HALT_QUALITY_MET
assert "quality gates passed" in record.reason.lower()
def test_required_gates_all_pass(self):
"""Test required gates all passing triggers quality met."""
policy = QualityPolicy(
max_iterations=5,
required_gate_ids=["required-gate"],
)
engine = HaltingPolicyEngine(policy)
results = [
_make_result(ValidationStatus.PASS, 1.0, gate_id="required-gate"),
_make_result(ValidationStatus.FAIL, 0.5, gate_id="optional-gate"),
]
record = engine.evaluate(results, iteration=1)
assert record.decision == HaltDecision.HALT_QUALITY_MET
def test_required_gate_fails_continues(self):
"""Test required gate failing allows continuation."""
policy = QualityPolicy(
max_iterations=5,
required_gate_ids=["required-gate"],
)
engine = HaltingPolicyEngine(policy)
results = [
_make_result(ValidationStatus.FAIL, 0.5, gate_id="required-gate"),
]
record = engine.evaluate(results, iteration=1)
assert record.decision == HaltDecision.CONTINUE
class TestIterationLimitDecision:
"""Tests for iteration limit halting."""
def test_at_iteration_limit(self):
"""Test halting at iteration limit."""
policy = QualityPolicy(max_iterations=3)
engine = HaltingPolicyEngine(policy)
results = [_make_result(ValidationStatus.FAIL, 0.5)]
record = engine.evaluate(results, iteration=3)
assert record.decision == HaltDecision.HALT_ITERATION_LIMIT
assert record.iteration == 3
def test_before_iteration_limit(self):
"""Test not halting before iteration limit."""
policy = QualityPolicy(max_iterations=5)
engine = HaltingPolicyEngine(policy)
results = [_make_result(ValidationStatus.FAIL, 0.5)]
record = engine.evaluate(results, iteration=2)
assert record.decision == HaltDecision.CONTINUE
class TestBudgetExhaustedDecision:
"""Tests for resource budget exhaustion."""
def test_budget_exhausted(self):
"""Test halting when budget is exhausted."""
policy = QualityPolicy(max_iterations=10, resource_budget=5)
engine = HaltingPolicyEngine(policy)
results = [_make_result(ValidationStatus.FAIL, 0.5)]
record = engine.evaluate(results, iteration=1, total_runs=5)
assert record.decision == HaltDecision.HALT_BUDGET_EXHAUSTED
def test_budget_not_exhausted(self):
"""Test not halting when budget remains."""
policy = QualityPolicy(max_iterations=10, resource_budget=10)
engine = HaltingPolicyEngine(policy)
results = [_make_result(ValidationStatus.FAIL, 0.5)]
record = engine.evaluate(results, iteration=1, total_runs=3)
assert record.decision == HaltDecision.CONTINUE
class TestMarginalImprovementDecision:
"""Tests for marginal improvement halting."""
def test_no_improvement_halts(self):
"""Test halting when improvement is below threshold."""
policy = QualityPolicy(max_iterations=10, min_improvement=0.05)
engine = HaltingPolicyEngine(policy)
results = [_make_result(ValidationStatus.FAIL, 0.52)]
record = engine.evaluate(
results,
iteration=2,
score_history=[0.50], # improvement: 0.02 < 0.05
)
assert record.decision == HaltDecision.HALT_NO_IMPROVEMENT
def test_sufficient_improvement_continues(self):
"""Test continuing when improvement meets threshold."""
policy = QualityPolicy(max_iterations=10, min_improvement=0.05)
engine = HaltingPolicyEngine(policy)
results = [_make_result(ValidationStatus.FAIL, 0.60)]
record = engine.evaluate(
results,
iteration=2,
score_history=[0.50], # improvement: 0.10 >= 0.05
)
assert record.decision == HaltDecision.CONTINUE
def test_first_iteration_no_history(self):
"""Test first iteration with no history continues."""
policy = QualityPolicy(max_iterations=10, min_improvement=0.05)
engine = HaltingPolicyEngine(policy)
results = [_make_result(ValidationStatus.FAIL, 0.50)]
record = engine.evaluate(results, iteration=1)
assert record.decision == HaltDecision.CONTINUE
class TestPriorityOrder:
"""Tests for the priority order of halting checks."""
def test_budget_checked_before_iteration(self):
"""Test budget exhaustion takes priority over iteration limit."""
policy = QualityPolicy(max_iterations=3, resource_budget=2)
engine = HaltingPolicyEngine(policy)
results = [_make_result(ValidationStatus.FAIL, 0.5)]
record = engine.evaluate(results, iteration=3, total_runs=2)
assert record.decision == HaltDecision.HALT_BUDGET_EXHAUSTED
def test_iteration_checked_before_quality(self):
"""Test iteration limit checked before quality met."""
policy = QualityPolicy(max_iterations=2)
engine = HaltingPolicyEngine(policy)
results = [_make_result(ValidationStatus.PASS, 1.0)]
# At iteration limit AND quality met — iteration limit wins
# Actually quality is checked after iteration, so quality would be checked
# But iteration 2 >= max 2 triggers first
record = engine.evaluate(results, iteration=2)
assert record.decision == HaltDecision.HALT_ITERATION_LIMIT
class TestHaltingRecord:
"""Tests for HaltingRecord attributes."""
def test_record_has_scores(self):
"""Test halting record includes score history."""
policy = QualityPolicy(max_iterations=5)
engine = HaltingPolicyEngine(policy)
results = [_make_result(ValidationStatus.PASS, 0.9)]
record = engine.evaluate(
results, iteration=2, score_history=[0.5]
)
assert record.scores == [0.5, 0.9]
def test_record_to_dict(self):
"""Test halting record serialization."""
policy = QualityPolicy(max_iterations=3)
engine = HaltingPolicyEngine(policy)
results = [_make_result(ValidationStatus.PASS, 1.0)]
record = engine.evaluate(results, iteration=1)
d = record.to_dict()
assert d["decision"] == "halted_quality_met"
assert d["iteration"] == 1
assert d["max_iterations"] == 3