Files
markitect-main/tests/integration/prompts/test_halting_execution.py
tegwick 704272644c
Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
feat(prompts): implement Phase 7 - Quality & Validation (FR-9, FR-10)
Add quality gate framework with schema validation (JSON Schema via
jsonschema library), pattern validation (regex-based), multi-gate
QualityValidator with SQLite persistence, HaltingPolicyEngine with
budget/iteration/improvement checks, and RefinementLoop for iterative
execute-validate-halt cycles.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 13:31:37 +01:00

240 lines
8.5 KiB
Python

"""
Integration tests for halting execution with refinement loop.
Tests the full execute → validate → halt or refine cycle with
real quality gates and persistence.
"""
import json
import pytest
import tempfile
from pathlib import Path
from markitect.prompts.models import Artifact, ArtifactType
from markitect.prompts.repositories.sqlite import SQLiteArtifactRepository
from markitect.prompts.quality.models import (
HaltDecision,
QualityPolicy,
ValidationStatus,
)
from markitect.prompts.quality.gates.pattern_gate import PatternValidationGate
from markitect.prompts.quality.gates.schema_gate import SchemaValidationGate
from markitect.prompts.quality.validator import QualityValidator
from markitect.prompts.quality.refinement import RefinementLoop
@pytest.fixture
def temp_db():
"""Create temporary database for testing."""
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
db_path = f.name
yield db_path
Path(db_path).unlink(missing_ok=True)
@pytest.fixture
def artifact_repo(temp_db):
"""Create artifact repository."""
return SQLiteArtifactRepository(temp_db)
class TestImmediateQualityMet:
"""Tests where quality is met on the first iteration."""
def test_single_iteration_success(self, temp_db):
"""Test refinement completes in one iteration when quality is met."""
gate = PatternValidationGate(
required_patterns=[r"## Summary", r"## Conclusion"],
gate_id="gate-1",
)
validator = QualityValidator(gates=[gate], db_path=temp_db)
policy = QualityPolicy(max_iterations=5)
loop = RefinementLoop(validator, policy)
def execute(iteration, prev_results):
return (
f"run-{iteration}",
"## Summary\nOverview.\n## Conclusion\nDone.",
"art-1",
)
result = loop.run(execute, "art-1")
assert result.iterations_run == 1
assert result.halting_record.decision == HaltDecision.HALT_QUALITY_MET
assert len(result.final_results) == 1
assert result.final_results[0].status == ValidationStatus.PASS
# Verify results persisted
persisted = validator.get_results_for_run("run-1")
assert len(persisted) == 1
class TestIterativeRefinement:
"""Tests for iterative refinement improving quality."""
def test_progressive_improvement(self, temp_db):
"""Test refinement improves content over iterations."""
gate = PatternValidationGate(
required_patterns=[r"## Summary", r"## Details", r"## Conclusion"],
gate_id="gate-1",
)
validator = QualityValidator(gates=[gate], db_path=temp_db)
policy = QualityPolicy(max_iterations=5)
loop = RefinementLoop(validator, policy)
versions = [
"## Summary\nBasic.", # iter 1: missing 2 patterns
"## Summary\n## Details\nBetter.", # iter 2: missing 1 pattern
"## Summary\n## Details\n## Conclusion\nComplete.", # iter 3: all pass
]
def execute(iteration, prev_results):
content = versions[min(iteration - 1, len(versions) - 1)]
return (f"run-{iteration}", content, "art-1")
result = loop.run(execute, "art-1")
assert result.iterations_run == 3
assert result.halting_record.decision == HaltDecision.HALT_QUALITY_MET
assert len(result.all_results) == 3
# Verify all iterations persisted
for i in range(1, 4):
persisted = validator.get_results_for_run(f"run-{i}")
assert len(persisted) == 1
class TestIterationLimit:
"""Tests for hitting iteration limits."""
def test_never_meets_quality(self, temp_db):
"""Test refinement stops at iteration limit when quality never met."""
gate = PatternValidationGate(
required_patterns=[r"NEVER_MATCHES_XYZ123"],
gate_id="gate-1",
)
validator = QualityValidator(gates=[gate], db_path=temp_db)
policy = QualityPolicy(max_iterations=3, min_improvement=0.0)
loop = RefinementLoop(validator, policy)
def execute(iteration, prev_results):
return (f"run-{iteration}", "always insufficient", "art-1")
result = loop.run(execute, "art-1")
assert result.iterations_run == 3
assert result.halting_record.decision == HaltDecision.HALT_ITERATION_LIMIT
assert len(result.run_ids) == 3
class TestBudgetExhaustion:
"""Tests for resource budget exhaustion."""
def test_budget_limits_iterations(self, temp_db):
"""Test budget exhaustion stops refinement."""
gate = PatternValidationGate(
required_patterns=[r"UNREACHABLE"],
gate_id="gate-1",
)
validator = QualityValidator(gates=[gate], db_path=temp_db)
policy = QualityPolicy(max_iterations=10, resource_budget=2)
loop = RefinementLoop(validator, policy)
def execute(iteration, prev_results):
return (f"run-{iteration}", "content", "art-1")
result = loop.run(execute, "art-1")
assert result.iterations_run == 2
assert result.halting_record.decision == HaltDecision.HALT_BUDGET_EXHAUSTED
class TestMultiGateRefinement:
"""Tests for refinement with multiple quality gates."""
def test_all_gates_must_pass(self, temp_db):
"""Test refinement continues until all gates pass."""
gate_a = PatternValidationGate(
required_patterns=[r"## Summary"],
gate_id="gate-a",
)
gate_b = PatternValidationGate(
forbidden_patterns=[r"TODO"],
gate_id="gate-b",
)
validator = QualityValidator(gates=[gate_a, gate_b], db_path=temp_db)
policy = QualityPolicy(max_iterations=5)
loop = RefinementLoop(validator, policy)
versions = [
"## Summary\nTODO: finish this", # gate-a pass, gate-b fail
"## Summary\nAll clean content.", # both pass
]
def execute(iteration, prev_results):
content = versions[min(iteration - 1, len(versions) - 1)]
return (f"run-{iteration}", content, "art-1")
result = loop.run(execute, "art-1")
assert result.iterations_run == 2
assert result.halting_record.decision == HaltDecision.HALT_QUALITY_MET
class TestRefinementWithSchemaGate:
"""Tests for refinement with schema validation gates."""
def test_json_refinement(self, temp_db):
"""Test refining JSON content to pass schema validation."""
schema = {
"type": "object",
"required": ["title", "version", "sections"],
"properties": {
"title": {"type": "string"},
"version": {"type": "integer"},
"sections": {"type": "array"},
},
}
gate = SchemaValidationGate(schema=schema, gate_id="schema-1")
validator = QualityValidator(gates=[gate], db_path=temp_db)
policy = QualityPolicy(max_iterations=5)
loop = RefinementLoop(validator, policy)
versions = [
json.dumps({"title": "Doc"}), # missing version & sections
json.dumps({"title": "Doc", "version": 1}), # missing sections
json.dumps({"title": "Doc", "version": 1, "sections": []}), # complete
]
def execute(iteration, prev_results):
content = versions[min(iteration - 1, len(versions) - 1)]
return (f"run-{iteration}", content, "art-1")
result = loop.run(execute, "art-1")
assert result.iterations_run == 3
assert result.halting_record.decision == HaltDecision.HALT_QUALITY_MET
class TestResultSerialization:
"""Tests for RefinementResult serialization."""
def test_result_to_dict(self, temp_db):
"""Test RefinementResult can be serialized."""
gate = PatternValidationGate(gate_id="gate-1")
validator = QualityValidator(gates=[gate], db_path=temp_db)
policy = QualityPolicy(max_iterations=1)
loop = RefinementLoop(validator, policy)
def execute(iteration, prev_results):
return ("run-1", "content", "art-1")
result = loop.run(execute, "art-1")
d = result.to_dict()
assert isinstance(d, dict)
assert "iterations_run" in d
assert "halting_record" in d
assert "run_ids" in d