feat(prompts): implement Phase 7 - Quality & Validation (FR-9, FR-10)
Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled

Add quality gate framework with schema validation (JSON Schema via
jsonschema library), pattern validation (regex-based), multi-gate
QualityValidator with SQLite persistence, HaltingPolicyEngine with
budget/iteration/improvement checks, and RefinementLoop for iterative
execute-validate-halt cycles.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-09 13:31:37 +01:00
parent bd1d05ba79
commit 704272644c
15 changed files with 2615 additions and 0 deletions

View File

@@ -0,0 +1,239 @@
"""
Integration tests for halting execution with refinement loop.
Tests the full execute → validate → halt or refine cycle with
real quality gates and persistence.
"""
import json
import pytest
import tempfile
from pathlib import Path
from markitect.prompts.models import Artifact, ArtifactType
from markitect.prompts.repositories.sqlite import SQLiteArtifactRepository
from markitect.prompts.quality.models import (
HaltDecision,
QualityPolicy,
ValidationStatus,
)
from markitect.prompts.quality.gates.pattern_gate import PatternValidationGate
from markitect.prompts.quality.gates.schema_gate import SchemaValidationGate
from markitect.prompts.quality.validator import QualityValidator
from markitect.prompts.quality.refinement import RefinementLoop
@pytest.fixture
def temp_db():
"""Create temporary database for testing."""
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
db_path = f.name
yield db_path
Path(db_path).unlink(missing_ok=True)
@pytest.fixture
def artifact_repo(temp_db):
"""Create artifact repository."""
return SQLiteArtifactRepository(temp_db)
class TestImmediateQualityMet:
"""Tests where quality is met on the first iteration."""
def test_single_iteration_success(self, temp_db):
"""Test refinement completes in one iteration when quality is met."""
gate = PatternValidationGate(
required_patterns=[r"## Summary", r"## Conclusion"],
gate_id="gate-1",
)
validator = QualityValidator(gates=[gate], db_path=temp_db)
policy = QualityPolicy(max_iterations=5)
loop = RefinementLoop(validator, policy)
def execute(iteration, prev_results):
return (
f"run-{iteration}",
"## Summary\nOverview.\n## Conclusion\nDone.",
"art-1",
)
result = loop.run(execute, "art-1")
assert result.iterations_run == 1
assert result.halting_record.decision == HaltDecision.HALT_QUALITY_MET
assert len(result.final_results) == 1
assert result.final_results[0].status == ValidationStatus.PASS
# Verify results persisted
persisted = validator.get_results_for_run("run-1")
assert len(persisted) == 1
class TestIterativeRefinement:
"""Tests for iterative refinement improving quality."""
def test_progressive_improvement(self, temp_db):
"""Test refinement improves content over iterations."""
gate = PatternValidationGate(
required_patterns=[r"## Summary", r"## Details", r"## Conclusion"],
gate_id="gate-1",
)
validator = QualityValidator(gates=[gate], db_path=temp_db)
policy = QualityPolicy(max_iterations=5)
loop = RefinementLoop(validator, policy)
versions = [
"## Summary\nBasic.", # iter 1: missing 2 patterns
"## Summary\n## Details\nBetter.", # iter 2: missing 1 pattern
"## Summary\n## Details\n## Conclusion\nComplete.", # iter 3: all pass
]
def execute(iteration, prev_results):
content = versions[min(iteration - 1, len(versions) - 1)]
return (f"run-{iteration}", content, "art-1")
result = loop.run(execute, "art-1")
assert result.iterations_run == 3
assert result.halting_record.decision == HaltDecision.HALT_QUALITY_MET
assert len(result.all_results) == 3
# Verify all iterations persisted
for i in range(1, 4):
persisted = validator.get_results_for_run(f"run-{i}")
assert len(persisted) == 1
class TestIterationLimit:
"""Tests for hitting iteration limits."""
def test_never_meets_quality(self, temp_db):
"""Test refinement stops at iteration limit when quality never met."""
gate = PatternValidationGate(
required_patterns=[r"NEVER_MATCHES_XYZ123"],
gate_id="gate-1",
)
validator = QualityValidator(gates=[gate], db_path=temp_db)
policy = QualityPolicy(max_iterations=3, min_improvement=0.0)
loop = RefinementLoop(validator, policy)
def execute(iteration, prev_results):
return (f"run-{iteration}", "always insufficient", "art-1")
result = loop.run(execute, "art-1")
assert result.iterations_run == 3
assert result.halting_record.decision == HaltDecision.HALT_ITERATION_LIMIT
assert len(result.run_ids) == 3
class TestBudgetExhaustion:
"""Tests for resource budget exhaustion."""
def test_budget_limits_iterations(self, temp_db):
"""Test budget exhaustion stops refinement."""
gate = PatternValidationGate(
required_patterns=[r"UNREACHABLE"],
gate_id="gate-1",
)
validator = QualityValidator(gates=[gate], db_path=temp_db)
policy = QualityPolicy(max_iterations=10, resource_budget=2)
loop = RefinementLoop(validator, policy)
def execute(iteration, prev_results):
return (f"run-{iteration}", "content", "art-1")
result = loop.run(execute, "art-1")
assert result.iterations_run == 2
assert result.halting_record.decision == HaltDecision.HALT_BUDGET_EXHAUSTED
class TestMultiGateRefinement:
"""Tests for refinement with multiple quality gates."""
def test_all_gates_must_pass(self, temp_db):
"""Test refinement continues until all gates pass."""
gate_a = PatternValidationGate(
required_patterns=[r"## Summary"],
gate_id="gate-a",
)
gate_b = PatternValidationGate(
forbidden_patterns=[r"TODO"],
gate_id="gate-b",
)
validator = QualityValidator(gates=[gate_a, gate_b], db_path=temp_db)
policy = QualityPolicy(max_iterations=5)
loop = RefinementLoop(validator, policy)
versions = [
"## Summary\nTODO: finish this", # gate-a pass, gate-b fail
"## Summary\nAll clean content.", # both pass
]
def execute(iteration, prev_results):
content = versions[min(iteration - 1, len(versions) - 1)]
return (f"run-{iteration}", content, "art-1")
result = loop.run(execute, "art-1")
assert result.iterations_run == 2
assert result.halting_record.decision == HaltDecision.HALT_QUALITY_MET
class TestRefinementWithSchemaGate:
"""Tests for refinement with schema validation gates."""
def test_json_refinement(self, temp_db):
"""Test refining JSON content to pass schema validation."""
schema = {
"type": "object",
"required": ["title", "version", "sections"],
"properties": {
"title": {"type": "string"},
"version": {"type": "integer"},
"sections": {"type": "array"},
},
}
gate = SchemaValidationGate(schema=schema, gate_id="schema-1")
validator = QualityValidator(gates=[gate], db_path=temp_db)
policy = QualityPolicy(max_iterations=5)
loop = RefinementLoop(validator, policy)
versions = [
json.dumps({"title": "Doc"}), # missing version & sections
json.dumps({"title": "Doc", "version": 1}), # missing sections
json.dumps({"title": "Doc", "version": 1, "sections": []}), # complete
]
def execute(iteration, prev_results):
content = versions[min(iteration - 1, len(versions) - 1)]
return (f"run-{iteration}", content, "art-1")
result = loop.run(execute, "art-1")
assert result.iterations_run == 3
assert result.halting_record.decision == HaltDecision.HALT_QUALITY_MET
class TestResultSerialization:
"""Tests for RefinementResult serialization."""
def test_result_to_dict(self, temp_db):
"""Test RefinementResult can be serialized."""
gate = PatternValidationGate(gate_id="gate-1")
validator = QualityValidator(gates=[gate], db_path=temp_db)
policy = QualityPolicy(max_iterations=1)
loop = RefinementLoop(validator, policy)
def execute(iteration, prev_results):
return ("run-1", "content", "art-1")
result = loop.run(execute, "art-1")
d = result.to_dict()
assert isinstance(d, dict)
assert "iterations_run" in d
assert "halting_record" in d
assert "run_ids" in d

View File

@@ -0,0 +1,208 @@
"""
Integration tests for full quality validation workflow.
Tests applying quality gates to artifacts with real DB persistence,
manifest integration, and multi-gate validation.
"""
import json
import pytest
import tempfile
from pathlib import Path
from markitect.prompts.models import Artifact, ArtifactType
from markitect.prompts.repositories.sqlite import SQLiteArtifactRepository
from markitect.prompts.quality.models import (
GateType,
ValidationStatus,
)
from markitect.prompts.quality.gates.schema_gate import SchemaValidationGate
from markitect.prompts.quality.gates.pattern_gate import PatternValidationGate
from markitect.prompts.quality.validator import QualityValidator
@pytest.fixture
def temp_db():
"""Create temporary database for testing."""
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
db_path = f.name
yield db_path
Path(db_path).unlink(missing_ok=True)
@pytest.fixture
def artifact_repo(temp_db):
"""Create artifact repository."""
return SQLiteArtifactRepository(temp_db)
def _create_artifact(repo, name, content, art_type=ArtifactType.GENERATED):
"""Helper to create and persist an artifact."""
artifact = Artifact.create(
space_id="space-1",
name=name,
content=content,
artifact_type=art_type,
)
return repo.create(artifact)
class TestSchemaValidationWorkflow:
"""Full schema validation workflow with real DB."""
def test_validate_json_artifact_passes(self, temp_db, artifact_repo):
"""Test validating a valid JSON artifact."""
content = json.dumps({
"name": "API Spec",
"version": 1,
"endpoints": ["/users", "/auth"],
})
artifact = _create_artifact(artifact_repo, "api-spec", content)
schema = {
"type": "object",
"required": ["name", "version", "endpoints"],
"properties": {
"name": {"type": "string"},
"version": {"type": "integer"},
"endpoints": {"type": "array", "items": {"type": "string"}},
},
}
gate = SchemaValidationGate(schema=schema, gate_id="schema-api")
validator = QualityValidator(gates=[gate], db_path=temp_db)
results = validator.validate_artifact(
content, artifact.id, run_id="run-1"
)
assert len(results) == 1
assert results[0].status == ValidationStatus.PASS
# Verify persisted
persisted = validator.get_results_for_run("run-1")
assert len(persisted) == 1
assert persisted[0]["status"] == "pass"
def test_validate_json_artifact_fails(self, temp_db, artifact_repo):
"""Test validating an invalid JSON artifact."""
content = json.dumps({"name": "Incomplete"})
artifact = _create_artifact(artifact_repo, "bad-spec", content)
schema = {
"type": "object",
"required": ["name", "version"],
}
gate = SchemaValidationGate(schema=schema, gate_id="schema-strict")
validator = QualityValidator(gates=[gate], db_path=temp_db)
results = validator.validate_artifact(
content, artifact.id, run_id="run-2"
)
assert results[0].status == ValidationStatus.FAIL
assert len(results[0].diagnostics) > 0
persisted = validator.get_results_for_run("run-2")
assert persisted[0]["status"] == "fail"
class TestPatternValidationWorkflow:
"""Full pattern validation workflow with real DB."""
def test_validate_markdown_artifact(self, temp_db, artifact_repo):
"""Test validating a markdown artifact against patterns."""
content = "# API Documentation\n## Endpoints\n### Authentication\nOAuth2 flow."
artifact = _create_artifact(artifact_repo, "api-docs", content)
gate = PatternValidationGate(
required_patterns=[r"## Endpoints", r"### Authentication"],
forbidden_patterns=[r"TODO", r"FIXME"],
gate_id="pattern-api",
)
validator = QualityValidator(gates=[gate], db_path=temp_db)
results = validator.validate_artifact(
content, artifact.id, run_id="run-3"
)
assert results[0].status == ValidationStatus.PASS
def test_forbidden_pattern_detected(self, temp_db, artifact_repo):
"""Test that forbidden patterns are caught."""
content = "# Draft\n## Endpoints\nTODO: Add authentication."
artifact = _create_artifact(artifact_repo, "draft-docs", content)
gate = PatternValidationGate(
required_patterns=[r"## Endpoints"],
forbidden_patterns=[r"TODO"],
gate_id="pattern-clean",
)
validator = QualityValidator(gates=[gate], db_path=temp_db)
results = validator.validate_artifact(
content, artifact.id, run_id="run-4"
)
assert results[0].status == ValidationStatus.FAIL
class TestMultiGateWorkflow:
"""Tests applying multiple gates in a single validation."""
def test_multi_gate_validation(self, temp_db, artifact_repo):
"""Test applying schema + pattern gates to an artifact."""
content = json.dumps({
"title": "Design Doc",
"sections": ["## Overview", "## Details"],
})
artifact = _create_artifact(artifact_repo, "design-doc", content)
schema_gate = SchemaValidationGate(
schema={
"type": "object",
"required": ["title", "sections"],
},
gate_id="schema-doc",
)
pattern_gate = PatternValidationGate(
forbidden_patterns=[r"FIXME"],
gate_id="pattern-clean",
)
validator = QualityValidator(
gates=[schema_gate, pattern_gate],
db_path=temp_db,
)
results = validator.validate_artifact(
content, artifact.id, run_id="run-5"
)
assert len(results) == 2
assert all(r.status == ValidationStatus.PASS for r in results)
# Check manifest dict
manifest = validator.results_to_manifest_dict(results)
assert manifest["all_passed"] is True
assert manifest["aggregate_score"] == 1.0
# Verify all persisted
persisted = validator.get_results_for_run("run-5")
assert len(persisted) == 2
def test_retrieve_by_artifact(self, temp_db, artifact_repo):
"""Test retrieving results by artifact across multiple runs."""
content = json.dumps({"name": "test"})
artifact = _create_artifact(artifact_repo, "test-art", content)
gate = SchemaValidationGate(
schema={"type": "object", "required": ["name"]},
gate_id="schema-1",
)
validator = QualityValidator(gates=[gate], db_path=temp_db)
# Validate across two runs
validator.validate_artifact(content, artifact.id, run_id="run-a")
validator.validate_artifact(content, artifact.id, run_id="run-b")
results = validator.get_results_for_artifact(artifact.id)
assert len(results) == 2