Files
markitect-main/markitect/prompts/quality/policy.py
tegwick 704272644c
Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
feat(prompts): implement Phase 7 - Quality & Validation (FR-9, FR-10)
Add quality gate framework with schema validation (JSON Schema via
jsonschema library), pattern validation (regex-based), multi-gate
QualityValidator with SQLite persistence, HaltingPolicyEngine with
budget/iteration/improvement checks, and RefinementLoop for iterative
execute-validate-halt cycles.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 13:31:37 +01:00

154 lines
5.2 KiB
Python

"""
Halting policy engine for refinement control.
Implements FR-10: Halting and Refinement Policy
Evaluates halting conditions based on quality gate results,
marginal improvement, iteration limits, and resource budgets.
"""
from typing import List, Optional
from markitect.prompts.quality.models import (
HaltDecision,
HaltingRecord,
QualityPolicy,
ValidationResult,
ValidationStatus,
)
class HaltingPolicyEngine:
"""
Evaluates halting decisions based on configurable policies.
Implements FR-10.2: Evaluate halting based on quality gate results,
marginal improvement metrics, iteration limits, and resource budgets.
Implements FR-10.3: Record halting decisions.
"""
def __init__(self, policy: QualityPolicy):
"""
Initialize with a quality policy.
Args:
policy: Quality policy configuration
"""
self.policy = policy
def evaluate(
self,
results: List[ValidationResult],
iteration: int,
score_history: Optional[List[float]] = None,
total_runs: int = 0,
) -> HaltingRecord:
"""
Evaluate whether to halt or continue refinement.
Checks in order:
1. Resource budget exhaustion
2. Iteration limit
3. Quality met (all required gates pass)
4. Marginal improvement below threshold
Args:
results: Validation results from current iteration
iteration: Current iteration number (1-based)
score_history: Aggregate scores from previous iterations
total_runs: Total number of runs consumed
Returns:
HaltingRecord with the decision
"""
score_history = score_history or []
current_score = self._aggregate_score(results)
all_scores = score_history + [current_score]
# Check resource budget
if total_runs >= self.policy.resource_budget:
return HaltingRecord(
decision=HaltDecision.HALT_BUDGET_EXHAUSTED,
iteration=iteration,
max_iterations=self.policy.max_iterations,
scores=all_scores,
reason=f"Resource budget exhausted: {total_runs}/{self.policy.resource_budget} runs used",
)
# Check iteration limit
if iteration >= self.policy.max_iterations:
return HaltingRecord(
decision=HaltDecision.HALT_ITERATION_LIMIT,
iteration=iteration,
max_iterations=self.policy.max_iterations,
scores=all_scores,
reason=f"Iteration limit reached: {iteration}/{self.policy.max_iterations}",
)
# Check if quality is met
if self._quality_met(results):
return HaltingRecord(
decision=HaltDecision.HALT_QUALITY_MET,
iteration=iteration,
max_iterations=self.policy.max_iterations,
scores=all_scores,
reason="All quality gates passed",
)
# Check marginal improvement
if len(all_scores) >= 2:
improvement = all_scores[-1] - all_scores[-2]
if improvement < self.policy.min_improvement:
return HaltingRecord(
decision=HaltDecision.HALT_NO_IMPROVEMENT,
iteration=iteration,
max_iterations=self.policy.max_iterations,
scores=all_scores,
reason=(
f"Marginal improvement {improvement:.4f} below "
f"threshold {self.policy.min_improvement}"
),
)
# Continue refinement
return HaltingRecord(
decision=HaltDecision.CONTINUE,
iteration=iteration,
max_iterations=self.policy.max_iterations,
scores=all_scores,
reason="Continuing refinement",
)
def _quality_met(self, results: List[ValidationResult]) -> bool:
"""
Check if quality requirements are met.
If required_gate_ids is set, only those gates must pass.
Otherwise, all gates must pass.
Args:
results: Validation results to check
Returns:
True if quality requirements are met
"""
if self.policy.required_gate_ids:
for gate_id in self.policy.required_gate_ids:
gate_results = [r for r in results if r.gate_id == gate_id]
if not gate_results:
return False
if any(r.status == ValidationStatus.FAIL for r in gate_results):
return False
return True
return all(r.status == ValidationStatus.PASS for r in results)
def _aggregate_score(self, results: List[ValidationResult]) -> float:
"""Calculate aggregate score from results."""
if not results:
return 0.0
scores = [r.score for r in results if r.score is not None]
if not scores:
return 0.0
return sum(scores) / len(scores)