feat(prompts): implement Phase 7 - Quality & Validation (FR-9, FR-10)
Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
Add quality gate framework with schema validation (JSON Schema via jsonschema library), pattern validation (regex-based), multi-gate QualityValidator with SQLite persistence, HaltingPolicyEngine with budget/iteration/improvement checks, and RefinementLoop for iterative execute-validate-halt cycles. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
239
tests/integration/prompts/test_halting_execution.py
Normal file
239
tests/integration/prompts/test_halting_execution.py
Normal file
@@ -0,0 +1,239 @@
|
||||
"""
|
||||
Integration tests for halting execution with refinement loop.
|
||||
|
||||
Tests the full execute → validate → halt or refine cycle with
|
||||
real quality gates and persistence.
|
||||
"""
|
||||
|
||||
import json
|
||||
import pytest
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
from markitect.prompts.models import Artifact, ArtifactType
|
||||
from markitect.prompts.repositories.sqlite import SQLiteArtifactRepository
|
||||
from markitect.prompts.quality.models import (
|
||||
HaltDecision,
|
||||
QualityPolicy,
|
||||
ValidationStatus,
|
||||
)
|
||||
from markitect.prompts.quality.gates.pattern_gate import PatternValidationGate
|
||||
from markitect.prompts.quality.gates.schema_gate import SchemaValidationGate
|
||||
from markitect.prompts.quality.validator import QualityValidator
|
||||
from markitect.prompts.quality.refinement import RefinementLoop
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def temp_db():
|
||||
"""Create temporary database for testing."""
|
||||
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
|
||||
db_path = f.name
|
||||
yield db_path
|
||||
Path(db_path).unlink(missing_ok=True)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def artifact_repo(temp_db):
|
||||
"""Create artifact repository."""
|
||||
return SQLiteArtifactRepository(temp_db)
|
||||
|
||||
|
||||
class TestImmediateQualityMet:
|
||||
"""Tests where quality is met on the first iteration."""
|
||||
|
||||
def test_single_iteration_success(self, temp_db):
|
||||
"""Test refinement completes in one iteration when quality is met."""
|
||||
gate = PatternValidationGate(
|
||||
required_patterns=[r"## Summary", r"## Conclusion"],
|
||||
gate_id="gate-1",
|
||||
)
|
||||
validator = QualityValidator(gates=[gate], db_path=temp_db)
|
||||
policy = QualityPolicy(max_iterations=5)
|
||||
loop = RefinementLoop(validator, policy)
|
||||
|
||||
def execute(iteration, prev_results):
|
||||
return (
|
||||
f"run-{iteration}",
|
||||
"## Summary\nOverview.\n## Conclusion\nDone.",
|
||||
"art-1",
|
||||
)
|
||||
|
||||
result = loop.run(execute, "art-1")
|
||||
|
||||
assert result.iterations_run == 1
|
||||
assert result.halting_record.decision == HaltDecision.HALT_QUALITY_MET
|
||||
assert len(result.final_results) == 1
|
||||
assert result.final_results[0].status == ValidationStatus.PASS
|
||||
|
||||
# Verify results persisted
|
||||
persisted = validator.get_results_for_run("run-1")
|
||||
assert len(persisted) == 1
|
||||
|
||||
|
||||
class TestIterativeRefinement:
|
||||
"""Tests for iterative refinement improving quality."""
|
||||
|
||||
def test_progressive_improvement(self, temp_db):
|
||||
"""Test refinement improves content over iterations."""
|
||||
gate = PatternValidationGate(
|
||||
required_patterns=[r"## Summary", r"## Details", r"## Conclusion"],
|
||||
gate_id="gate-1",
|
||||
)
|
||||
validator = QualityValidator(gates=[gate], db_path=temp_db)
|
||||
policy = QualityPolicy(max_iterations=5)
|
||||
loop = RefinementLoop(validator, policy)
|
||||
|
||||
versions = [
|
||||
"## Summary\nBasic.", # iter 1: missing 2 patterns
|
||||
"## Summary\n## Details\nBetter.", # iter 2: missing 1 pattern
|
||||
"## Summary\n## Details\n## Conclusion\nComplete.", # iter 3: all pass
|
||||
]
|
||||
|
||||
def execute(iteration, prev_results):
|
||||
content = versions[min(iteration - 1, len(versions) - 1)]
|
||||
return (f"run-{iteration}", content, "art-1")
|
||||
|
||||
result = loop.run(execute, "art-1")
|
||||
|
||||
assert result.iterations_run == 3
|
||||
assert result.halting_record.decision == HaltDecision.HALT_QUALITY_MET
|
||||
assert len(result.all_results) == 3
|
||||
|
||||
# Verify all iterations persisted
|
||||
for i in range(1, 4):
|
||||
persisted = validator.get_results_for_run(f"run-{i}")
|
||||
assert len(persisted) == 1
|
||||
|
||||
|
||||
class TestIterationLimit:
|
||||
"""Tests for hitting iteration limits."""
|
||||
|
||||
def test_never_meets_quality(self, temp_db):
|
||||
"""Test refinement stops at iteration limit when quality never met."""
|
||||
gate = PatternValidationGate(
|
||||
required_patterns=[r"NEVER_MATCHES_XYZ123"],
|
||||
gate_id="gate-1",
|
||||
)
|
||||
validator = QualityValidator(gates=[gate], db_path=temp_db)
|
||||
policy = QualityPolicy(max_iterations=3, min_improvement=0.0)
|
||||
loop = RefinementLoop(validator, policy)
|
||||
|
||||
def execute(iteration, prev_results):
|
||||
return (f"run-{iteration}", "always insufficient", "art-1")
|
||||
|
||||
result = loop.run(execute, "art-1")
|
||||
|
||||
assert result.iterations_run == 3
|
||||
assert result.halting_record.decision == HaltDecision.HALT_ITERATION_LIMIT
|
||||
assert len(result.run_ids) == 3
|
||||
|
||||
|
||||
class TestBudgetExhaustion:
|
||||
"""Tests for resource budget exhaustion."""
|
||||
|
||||
def test_budget_limits_iterations(self, temp_db):
|
||||
"""Test budget exhaustion stops refinement."""
|
||||
gate = PatternValidationGate(
|
||||
required_patterns=[r"UNREACHABLE"],
|
||||
gate_id="gate-1",
|
||||
)
|
||||
validator = QualityValidator(gates=[gate], db_path=temp_db)
|
||||
policy = QualityPolicy(max_iterations=10, resource_budget=2)
|
||||
loop = RefinementLoop(validator, policy)
|
||||
|
||||
def execute(iteration, prev_results):
|
||||
return (f"run-{iteration}", "content", "art-1")
|
||||
|
||||
result = loop.run(execute, "art-1")
|
||||
|
||||
assert result.iterations_run == 2
|
||||
assert result.halting_record.decision == HaltDecision.HALT_BUDGET_EXHAUSTED
|
||||
|
||||
|
||||
class TestMultiGateRefinement:
|
||||
"""Tests for refinement with multiple quality gates."""
|
||||
|
||||
def test_all_gates_must_pass(self, temp_db):
|
||||
"""Test refinement continues until all gates pass."""
|
||||
gate_a = PatternValidationGate(
|
||||
required_patterns=[r"## Summary"],
|
||||
gate_id="gate-a",
|
||||
)
|
||||
gate_b = PatternValidationGate(
|
||||
forbidden_patterns=[r"TODO"],
|
||||
gate_id="gate-b",
|
||||
)
|
||||
validator = QualityValidator(gates=[gate_a, gate_b], db_path=temp_db)
|
||||
policy = QualityPolicy(max_iterations=5)
|
||||
loop = RefinementLoop(validator, policy)
|
||||
|
||||
versions = [
|
||||
"## Summary\nTODO: finish this", # gate-a pass, gate-b fail
|
||||
"## Summary\nAll clean content.", # both pass
|
||||
]
|
||||
|
||||
def execute(iteration, prev_results):
|
||||
content = versions[min(iteration - 1, len(versions) - 1)]
|
||||
return (f"run-{iteration}", content, "art-1")
|
||||
|
||||
result = loop.run(execute, "art-1")
|
||||
|
||||
assert result.iterations_run == 2
|
||||
assert result.halting_record.decision == HaltDecision.HALT_QUALITY_MET
|
||||
|
||||
|
||||
class TestRefinementWithSchemaGate:
|
||||
"""Tests for refinement with schema validation gates."""
|
||||
|
||||
def test_json_refinement(self, temp_db):
|
||||
"""Test refining JSON content to pass schema validation."""
|
||||
schema = {
|
||||
"type": "object",
|
||||
"required": ["title", "version", "sections"],
|
||||
"properties": {
|
||||
"title": {"type": "string"},
|
||||
"version": {"type": "integer"},
|
||||
"sections": {"type": "array"},
|
||||
},
|
||||
}
|
||||
gate = SchemaValidationGate(schema=schema, gate_id="schema-1")
|
||||
validator = QualityValidator(gates=[gate], db_path=temp_db)
|
||||
policy = QualityPolicy(max_iterations=5)
|
||||
loop = RefinementLoop(validator, policy)
|
||||
|
||||
versions = [
|
||||
json.dumps({"title": "Doc"}), # missing version & sections
|
||||
json.dumps({"title": "Doc", "version": 1}), # missing sections
|
||||
json.dumps({"title": "Doc", "version": 1, "sections": []}), # complete
|
||||
]
|
||||
|
||||
def execute(iteration, prev_results):
|
||||
content = versions[min(iteration - 1, len(versions) - 1)]
|
||||
return (f"run-{iteration}", content, "art-1")
|
||||
|
||||
result = loop.run(execute, "art-1")
|
||||
|
||||
assert result.iterations_run == 3
|
||||
assert result.halting_record.decision == HaltDecision.HALT_QUALITY_MET
|
||||
|
||||
|
||||
class TestResultSerialization:
|
||||
"""Tests for RefinementResult serialization."""
|
||||
|
||||
def test_result_to_dict(self, temp_db):
|
||||
"""Test RefinementResult can be serialized."""
|
||||
gate = PatternValidationGate(gate_id="gate-1")
|
||||
validator = QualityValidator(gates=[gate], db_path=temp_db)
|
||||
policy = QualityPolicy(max_iterations=1)
|
||||
loop = RefinementLoop(validator, policy)
|
||||
|
||||
def execute(iteration, prev_results):
|
||||
return ("run-1", "content", "art-1")
|
||||
|
||||
result = loop.run(execute, "art-1")
|
||||
d = result.to_dict()
|
||||
|
||||
assert isinstance(d, dict)
|
||||
assert "iterations_run" in d
|
||||
assert "halting_record" in d
|
||||
assert "run_ids" in d
|
||||
208
tests/integration/prompts/test_quality_validation.py
Normal file
208
tests/integration/prompts/test_quality_validation.py
Normal file
@@ -0,0 +1,208 @@
|
||||
"""
|
||||
Integration tests for full quality validation workflow.
|
||||
|
||||
Tests applying quality gates to artifacts with real DB persistence,
|
||||
manifest integration, and multi-gate validation.
|
||||
"""
|
||||
|
||||
import json
|
||||
import pytest
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
from markitect.prompts.models import Artifact, ArtifactType
|
||||
from markitect.prompts.repositories.sqlite import SQLiteArtifactRepository
|
||||
from markitect.prompts.quality.models import (
|
||||
GateType,
|
||||
ValidationStatus,
|
||||
)
|
||||
from markitect.prompts.quality.gates.schema_gate import SchemaValidationGate
|
||||
from markitect.prompts.quality.gates.pattern_gate import PatternValidationGate
|
||||
from markitect.prompts.quality.validator import QualityValidator
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def temp_db():
|
||||
"""Create temporary database for testing."""
|
||||
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
|
||||
db_path = f.name
|
||||
yield db_path
|
||||
Path(db_path).unlink(missing_ok=True)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def artifact_repo(temp_db):
|
||||
"""Create artifact repository."""
|
||||
return SQLiteArtifactRepository(temp_db)
|
||||
|
||||
|
||||
def _create_artifact(repo, name, content, art_type=ArtifactType.GENERATED):
|
||||
"""Helper to create and persist an artifact."""
|
||||
artifact = Artifact.create(
|
||||
space_id="space-1",
|
||||
name=name,
|
||||
content=content,
|
||||
artifact_type=art_type,
|
||||
)
|
||||
return repo.create(artifact)
|
||||
|
||||
|
||||
class TestSchemaValidationWorkflow:
|
||||
"""Full schema validation workflow with real DB."""
|
||||
|
||||
def test_validate_json_artifact_passes(self, temp_db, artifact_repo):
|
||||
"""Test validating a valid JSON artifact."""
|
||||
content = json.dumps({
|
||||
"name": "API Spec",
|
||||
"version": 1,
|
||||
"endpoints": ["/users", "/auth"],
|
||||
})
|
||||
artifact = _create_artifact(artifact_repo, "api-spec", content)
|
||||
|
||||
schema = {
|
||||
"type": "object",
|
||||
"required": ["name", "version", "endpoints"],
|
||||
"properties": {
|
||||
"name": {"type": "string"},
|
||||
"version": {"type": "integer"},
|
||||
"endpoints": {"type": "array", "items": {"type": "string"}},
|
||||
},
|
||||
}
|
||||
gate = SchemaValidationGate(schema=schema, gate_id="schema-api")
|
||||
validator = QualityValidator(gates=[gate], db_path=temp_db)
|
||||
|
||||
results = validator.validate_artifact(
|
||||
content, artifact.id, run_id="run-1"
|
||||
)
|
||||
|
||||
assert len(results) == 1
|
||||
assert results[0].status == ValidationStatus.PASS
|
||||
|
||||
# Verify persisted
|
||||
persisted = validator.get_results_for_run("run-1")
|
||||
assert len(persisted) == 1
|
||||
assert persisted[0]["status"] == "pass"
|
||||
|
||||
def test_validate_json_artifact_fails(self, temp_db, artifact_repo):
|
||||
"""Test validating an invalid JSON artifact."""
|
||||
content = json.dumps({"name": "Incomplete"})
|
||||
artifact = _create_artifact(artifact_repo, "bad-spec", content)
|
||||
|
||||
schema = {
|
||||
"type": "object",
|
||||
"required": ["name", "version"],
|
||||
}
|
||||
gate = SchemaValidationGate(schema=schema, gate_id="schema-strict")
|
||||
validator = QualityValidator(gates=[gate], db_path=temp_db)
|
||||
|
||||
results = validator.validate_artifact(
|
||||
content, artifact.id, run_id="run-2"
|
||||
)
|
||||
|
||||
assert results[0].status == ValidationStatus.FAIL
|
||||
assert len(results[0].diagnostics) > 0
|
||||
|
||||
persisted = validator.get_results_for_run("run-2")
|
||||
assert persisted[0]["status"] == "fail"
|
||||
|
||||
|
||||
class TestPatternValidationWorkflow:
|
||||
"""Full pattern validation workflow with real DB."""
|
||||
|
||||
def test_validate_markdown_artifact(self, temp_db, artifact_repo):
|
||||
"""Test validating a markdown artifact against patterns."""
|
||||
content = "# API Documentation\n## Endpoints\n### Authentication\nOAuth2 flow."
|
||||
artifact = _create_artifact(artifact_repo, "api-docs", content)
|
||||
|
||||
gate = PatternValidationGate(
|
||||
required_patterns=[r"## Endpoints", r"### Authentication"],
|
||||
forbidden_patterns=[r"TODO", r"FIXME"],
|
||||
gate_id="pattern-api",
|
||||
)
|
||||
validator = QualityValidator(gates=[gate], db_path=temp_db)
|
||||
|
||||
results = validator.validate_artifact(
|
||||
content, artifact.id, run_id="run-3"
|
||||
)
|
||||
|
||||
assert results[0].status == ValidationStatus.PASS
|
||||
|
||||
def test_forbidden_pattern_detected(self, temp_db, artifact_repo):
|
||||
"""Test that forbidden patterns are caught."""
|
||||
content = "# Draft\n## Endpoints\nTODO: Add authentication."
|
||||
artifact = _create_artifact(artifact_repo, "draft-docs", content)
|
||||
|
||||
gate = PatternValidationGate(
|
||||
required_patterns=[r"## Endpoints"],
|
||||
forbidden_patterns=[r"TODO"],
|
||||
gate_id="pattern-clean",
|
||||
)
|
||||
validator = QualityValidator(gates=[gate], db_path=temp_db)
|
||||
|
||||
results = validator.validate_artifact(
|
||||
content, artifact.id, run_id="run-4"
|
||||
)
|
||||
|
||||
assert results[0].status == ValidationStatus.FAIL
|
||||
|
||||
|
||||
class TestMultiGateWorkflow:
|
||||
"""Tests applying multiple gates in a single validation."""
|
||||
|
||||
def test_multi_gate_validation(self, temp_db, artifact_repo):
|
||||
"""Test applying schema + pattern gates to an artifact."""
|
||||
content = json.dumps({
|
||||
"title": "Design Doc",
|
||||
"sections": ["## Overview", "## Details"],
|
||||
})
|
||||
artifact = _create_artifact(artifact_repo, "design-doc", content)
|
||||
|
||||
schema_gate = SchemaValidationGate(
|
||||
schema={
|
||||
"type": "object",
|
||||
"required": ["title", "sections"],
|
||||
},
|
||||
gate_id="schema-doc",
|
||||
)
|
||||
pattern_gate = PatternValidationGate(
|
||||
forbidden_patterns=[r"FIXME"],
|
||||
gate_id="pattern-clean",
|
||||
)
|
||||
validator = QualityValidator(
|
||||
gates=[schema_gate, pattern_gate],
|
||||
db_path=temp_db,
|
||||
)
|
||||
|
||||
results = validator.validate_artifact(
|
||||
content, artifact.id, run_id="run-5"
|
||||
)
|
||||
|
||||
assert len(results) == 2
|
||||
assert all(r.status == ValidationStatus.PASS for r in results)
|
||||
|
||||
# Check manifest dict
|
||||
manifest = validator.results_to_manifest_dict(results)
|
||||
assert manifest["all_passed"] is True
|
||||
assert manifest["aggregate_score"] == 1.0
|
||||
|
||||
# Verify all persisted
|
||||
persisted = validator.get_results_for_run("run-5")
|
||||
assert len(persisted) == 2
|
||||
|
||||
def test_retrieve_by_artifact(self, temp_db, artifact_repo):
|
||||
"""Test retrieving results by artifact across multiple runs."""
|
||||
content = json.dumps({"name": "test"})
|
||||
artifact = _create_artifact(artifact_repo, "test-art", content)
|
||||
|
||||
gate = SchemaValidationGate(
|
||||
schema={"type": "object", "required": ["name"]},
|
||||
gate_id="schema-1",
|
||||
)
|
||||
validator = QualityValidator(gates=[gate], db_path=temp_db)
|
||||
|
||||
# Validate across two runs
|
||||
validator.validate_artifact(content, artifact.id, run_id="run-a")
|
||||
validator.validate_artifact(content, artifact.id, run_id="run-b")
|
||||
|
||||
results = validator.get_results_for_artifact(artifact.id)
|
||||
assert len(results) == 2
|
||||
Reference in New Issue
Block a user