Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
Add quality gate framework with schema validation (JSON Schema via jsonschema library), pattern validation (regex-based), multi-gate QualityValidator with SQLite persistence, HaltingPolicyEngine with budget/iteration/improvement checks, and RefinementLoop for iterative execute-validate-halt cycles. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
222 lines
7.6 KiB
Python
222 lines
7.6 KiB
Python
"""
|
|
Unit tests for HaltingPolicyEngine.
|
|
|
|
Tests halting decisions based on quality results, iteration limits,
|
|
marginal improvement, and resource budgets.
|
|
"""
|
|
|
|
import pytest
|
|
|
|
from markitect.prompts.quality.models import (
|
|
GateType,
|
|
HaltDecision,
|
|
QualityPolicy,
|
|
ValidationResult,
|
|
ValidationStatus,
|
|
)
|
|
from markitect.prompts.quality.policy import HaltingPolicyEngine
|
|
|
|
|
|
def _make_result(status=ValidationStatus.PASS, score=1.0, gate_id="gate-1"):
|
|
"""Helper to create a ValidationResult."""
|
|
return ValidationResult.create(
|
|
gate_id=gate_id,
|
|
gate_type=GateType.PATTERN,
|
|
artifact_id="art-1",
|
|
status=status,
|
|
score=score,
|
|
)
|
|
|
|
|
|
class TestQualityMetDecision:
|
|
"""Tests for quality met halting."""
|
|
|
|
def test_all_pass_halts_quality_met(self):
|
|
"""Test all gates passing triggers quality met halt."""
|
|
policy = QualityPolicy(max_iterations=5)
|
|
engine = HaltingPolicyEngine(policy)
|
|
|
|
results = [_make_result(ValidationStatus.PASS, 1.0)]
|
|
record = engine.evaluate(results, iteration=1)
|
|
|
|
assert record.decision == HaltDecision.HALT_QUALITY_MET
|
|
assert "quality gates passed" in record.reason.lower()
|
|
|
|
def test_required_gates_all_pass(self):
|
|
"""Test required gates all passing triggers quality met."""
|
|
policy = QualityPolicy(
|
|
max_iterations=5,
|
|
required_gate_ids=["required-gate"],
|
|
)
|
|
engine = HaltingPolicyEngine(policy)
|
|
|
|
results = [
|
|
_make_result(ValidationStatus.PASS, 1.0, gate_id="required-gate"),
|
|
_make_result(ValidationStatus.FAIL, 0.5, gate_id="optional-gate"),
|
|
]
|
|
record = engine.evaluate(results, iteration=1)
|
|
|
|
assert record.decision == HaltDecision.HALT_QUALITY_MET
|
|
|
|
def test_required_gate_fails_continues(self):
|
|
"""Test required gate failing allows continuation."""
|
|
policy = QualityPolicy(
|
|
max_iterations=5,
|
|
required_gate_ids=["required-gate"],
|
|
)
|
|
engine = HaltingPolicyEngine(policy)
|
|
|
|
results = [
|
|
_make_result(ValidationStatus.FAIL, 0.5, gate_id="required-gate"),
|
|
]
|
|
record = engine.evaluate(results, iteration=1)
|
|
|
|
assert record.decision == HaltDecision.CONTINUE
|
|
|
|
|
|
class TestIterationLimitDecision:
|
|
"""Tests for iteration limit halting."""
|
|
|
|
def test_at_iteration_limit(self):
|
|
"""Test halting at iteration limit."""
|
|
policy = QualityPolicy(max_iterations=3)
|
|
engine = HaltingPolicyEngine(policy)
|
|
|
|
results = [_make_result(ValidationStatus.FAIL, 0.5)]
|
|
record = engine.evaluate(results, iteration=3)
|
|
|
|
assert record.decision == HaltDecision.HALT_ITERATION_LIMIT
|
|
assert record.iteration == 3
|
|
|
|
def test_before_iteration_limit(self):
|
|
"""Test not halting before iteration limit."""
|
|
policy = QualityPolicy(max_iterations=5)
|
|
engine = HaltingPolicyEngine(policy)
|
|
|
|
results = [_make_result(ValidationStatus.FAIL, 0.5)]
|
|
record = engine.evaluate(results, iteration=2)
|
|
|
|
assert record.decision == HaltDecision.CONTINUE
|
|
|
|
|
|
class TestBudgetExhaustedDecision:
|
|
"""Tests for resource budget exhaustion."""
|
|
|
|
def test_budget_exhausted(self):
|
|
"""Test halting when budget is exhausted."""
|
|
policy = QualityPolicy(max_iterations=10, resource_budget=5)
|
|
engine = HaltingPolicyEngine(policy)
|
|
|
|
results = [_make_result(ValidationStatus.FAIL, 0.5)]
|
|
record = engine.evaluate(results, iteration=1, total_runs=5)
|
|
|
|
assert record.decision == HaltDecision.HALT_BUDGET_EXHAUSTED
|
|
|
|
def test_budget_not_exhausted(self):
|
|
"""Test not halting when budget remains."""
|
|
policy = QualityPolicy(max_iterations=10, resource_budget=10)
|
|
engine = HaltingPolicyEngine(policy)
|
|
|
|
results = [_make_result(ValidationStatus.FAIL, 0.5)]
|
|
record = engine.evaluate(results, iteration=1, total_runs=3)
|
|
|
|
assert record.decision == HaltDecision.CONTINUE
|
|
|
|
|
|
class TestMarginalImprovementDecision:
|
|
"""Tests for marginal improvement halting."""
|
|
|
|
def test_no_improvement_halts(self):
|
|
"""Test halting when improvement is below threshold."""
|
|
policy = QualityPolicy(max_iterations=10, min_improvement=0.05)
|
|
engine = HaltingPolicyEngine(policy)
|
|
|
|
results = [_make_result(ValidationStatus.FAIL, 0.52)]
|
|
record = engine.evaluate(
|
|
results,
|
|
iteration=2,
|
|
score_history=[0.50], # improvement: 0.02 < 0.05
|
|
)
|
|
|
|
assert record.decision == HaltDecision.HALT_NO_IMPROVEMENT
|
|
|
|
def test_sufficient_improvement_continues(self):
|
|
"""Test continuing when improvement meets threshold."""
|
|
policy = QualityPolicy(max_iterations=10, min_improvement=0.05)
|
|
engine = HaltingPolicyEngine(policy)
|
|
|
|
results = [_make_result(ValidationStatus.FAIL, 0.60)]
|
|
record = engine.evaluate(
|
|
results,
|
|
iteration=2,
|
|
score_history=[0.50], # improvement: 0.10 >= 0.05
|
|
)
|
|
|
|
assert record.decision == HaltDecision.CONTINUE
|
|
|
|
def test_first_iteration_no_history(self):
|
|
"""Test first iteration with no history continues."""
|
|
policy = QualityPolicy(max_iterations=10, min_improvement=0.05)
|
|
engine = HaltingPolicyEngine(policy)
|
|
|
|
results = [_make_result(ValidationStatus.FAIL, 0.50)]
|
|
record = engine.evaluate(results, iteration=1)
|
|
|
|
assert record.decision == HaltDecision.CONTINUE
|
|
|
|
|
|
class TestPriorityOrder:
|
|
"""Tests for the priority order of halting checks."""
|
|
|
|
def test_budget_checked_before_iteration(self):
|
|
"""Test budget exhaustion takes priority over iteration limit."""
|
|
policy = QualityPolicy(max_iterations=3, resource_budget=2)
|
|
engine = HaltingPolicyEngine(policy)
|
|
|
|
results = [_make_result(ValidationStatus.FAIL, 0.5)]
|
|
record = engine.evaluate(results, iteration=3, total_runs=2)
|
|
|
|
assert record.decision == HaltDecision.HALT_BUDGET_EXHAUSTED
|
|
|
|
def test_iteration_checked_before_quality(self):
|
|
"""Test iteration limit checked before quality met."""
|
|
policy = QualityPolicy(max_iterations=2)
|
|
engine = HaltingPolicyEngine(policy)
|
|
|
|
results = [_make_result(ValidationStatus.PASS, 1.0)]
|
|
# At iteration limit AND quality met — iteration limit wins
|
|
# Actually quality is checked after iteration, so quality would be checked
|
|
# But iteration 2 >= max 2 triggers first
|
|
record = engine.evaluate(results, iteration=2)
|
|
|
|
assert record.decision == HaltDecision.HALT_ITERATION_LIMIT
|
|
|
|
|
|
class TestHaltingRecord:
|
|
"""Tests for HaltingRecord attributes."""
|
|
|
|
def test_record_has_scores(self):
|
|
"""Test halting record includes score history."""
|
|
policy = QualityPolicy(max_iterations=5)
|
|
engine = HaltingPolicyEngine(policy)
|
|
|
|
results = [_make_result(ValidationStatus.PASS, 0.9)]
|
|
record = engine.evaluate(
|
|
results, iteration=2, score_history=[0.5]
|
|
)
|
|
|
|
assert record.scores == [0.5, 0.9]
|
|
|
|
def test_record_to_dict(self):
|
|
"""Test halting record serialization."""
|
|
policy = QualityPolicy(max_iterations=3)
|
|
engine = HaltingPolicyEngine(policy)
|
|
|
|
results = [_make_result(ValidationStatus.PASS, 1.0)]
|
|
record = engine.evaluate(results, iteration=1)
|
|
|
|
d = record.to_dict()
|
|
assert d["decision"] == "halted_quality_met"
|
|
assert d["iteration"] == 1
|
|
assert d["max_iterations"] == 3
|