Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
Add quality gate framework with schema validation (JSON Schema via jsonschema library), pattern validation (regex-based), multi-gate QualityValidator with SQLite persistence, HaltingPolicyEngine with budget/iteration/improvement checks, and RefinementLoop for iterative execute-validate-halt cycles. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
154 lines
5.2 KiB
Python
154 lines
5.2 KiB
Python
"""
|
|
Halting policy engine for refinement control.
|
|
|
|
Implements FR-10: Halting and Refinement Policy
|
|
Evaluates halting conditions based on quality gate results,
|
|
marginal improvement, iteration limits, and resource budgets.
|
|
"""
|
|
|
|
from typing import List, Optional
|
|
|
|
from markitect.prompts.quality.models import (
|
|
HaltDecision,
|
|
HaltingRecord,
|
|
QualityPolicy,
|
|
ValidationResult,
|
|
ValidationStatus,
|
|
)
|
|
|
|
|
|
class HaltingPolicyEngine:
|
|
"""
|
|
Evaluates halting decisions based on configurable policies.
|
|
|
|
Implements FR-10.2: Evaluate halting based on quality gate results,
|
|
marginal improvement metrics, iteration limits, and resource budgets.
|
|
|
|
Implements FR-10.3: Record halting decisions.
|
|
"""
|
|
|
|
def __init__(self, policy: QualityPolicy):
|
|
"""
|
|
Initialize with a quality policy.
|
|
|
|
Args:
|
|
policy: Quality policy configuration
|
|
"""
|
|
self.policy = policy
|
|
|
|
def evaluate(
|
|
self,
|
|
results: List[ValidationResult],
|
|
iteration: int,
|
|
score_history: Optional[List[float]] = None,
|
|
total_runs: int = 0,
|
|
) -> HaltingRecord:
|
|
"""
|
|
Evaluate whether to halt or continue refinement.
|
|
|
|
Checks in order:
|
|
1. Resource budget exhaustion
|
|
2. Iteration limit
|
|
3. Quality met (all required gates pass)
|
|
4. Marginal improvement below threshold
|
|
|
|
Args:
|
|
results: Validation results from current iteration
|
|
iteration: Current iteration number (1-based)
|
|
score_history: Aggregate scores from previous iterations
|
|
total_runs: Total number of runs consumed
|
|
|
|
Returns:
|
|
HaltingRecord with the decision
|
|
"""
|
|
score_history = score_history or []
|
|
current_score = self._aggregate_score(results)
|
|
all_scores = score_history + [current_score]
|
|
|
|
# Check resource budget
|
|
if total_runs >= self.policy.resource_budget:
|
|
return HaltingRecord(
|
|
decision=HaltDecision.HALT_BUDGET_EXHAUSTED,
|
|
iteration=iteration,
|
|
max_iterations=self.policy.max_iterations,
|
|
scores=all_scores,
|
|
reason=f"Resource budget exhausted: {total_runs}/{self.policy.resource_budget} runs used",
|
|
)
|
|
|
|
# Check iteration limit
|
|
if iteration >= self.policy.max_iterations:
|
|
return HaltingRecord(
|
|
decision=HaltDecision.HALT_ITERATION_LIMIT,
|
|
iteration=iteration,
|
|
max_iterations=self.policy.max_iterations,
|
|
scores=all_scores,
|
|
reason=f"Iteration limit reached: {iteration}/{self.policy.max_iterations}",
|
|
)
|
|
|
|
# Check if quality is met
|
|
if self._quality_met(results):
|
|
return HaltingRecord(
|
|
decision=HaltDecision.HALT_QUALITY_MET,
|
|
iteration=iteration,
|
|
max_iterations=self.policy.max_iterations,
|
|
scores=all_scores,
|
|
reason="All quality gates passed",
|
|
)
|
|
|
|
# Check marginal improvement
|
|
if len(all_scores) >= 2:
|
|
improvement = all_scores[-1] - all_scores[-2]
|
|
if improvement < self.policy.min_improvement:
|
|
return HaltingRecord(
|
|
decision=HaltDecision.HALT_NO_IMPROVEMENT,
|
|
iteration=iteration,
|
|
max_iterations=self.policy.max_iterations,
|
|
scores=all_scores,
|
|
reason=(
|
|
f"Marginal improvement {improvement:.4f} below "
|
|
f"threshold {self.policy.min_improvement}"
|
|
),
|
|
)
|
|
|
|
# Continue refinement
|
|
return HaltingRecord(
|
|
decision=HaltDecision.CONTINUE,
|
|
iteration=iteration,
|
|
max_iterations=self.policy.max_iterations,
|
|
scores=all_scores,
|
|
reason="Continuing refinement",
|
|
)
|
|
|
|
def _quality_met(self, results: List[ValidationResult]) -> bool:
|
|
"""
|
|
Check if quality requirements are met.
|
|
|
|
If required_gate_ids is set, only those gates must pass.
|
|
Otherwise, all gates must pass.
|
|
|
|
Args:
|
|
results: Validation results to check
|
|
|
|
Returns:
|
|
True if quality requirements are met
|
|
"""
|
|
if self.policy.required_gate_ids:
|
|
for gate_id in self.policy.required_gate_ids:
|
|
gate_results = [r for r in results if r.gate_id == gate_id]
|
|
if not gate_results:
|
|
return False
|
|
if any(r.status == ValidationStatus.FAIL for r in gate_results):
|
|
return False
|
|
return True
|
|
|
|
return all(r.status == ValidationStatus.PASS for r in results)
|
|
|
|
def _aggregate_score(self, results: List[ValidationResult]) -> float:
|
|
"""Calculate aggregate score from results."""
|
|
if not results:
|
|
return 0.0
|
|
scores = [r.score for r in results if r.score is not None]
|
|
if not scores:
|
|
return 0.0
|
|
return sum(scores) / len(scores)
|