feat(prompts): implement Phase 7 - Quality & Validation (FR-9, FR-10)
Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
Add quality gate framework with schema validation (JSON Schema via jsonschema library), pattern validation (regex-based), multi-gate QualityValidator with SQLite persistence, HaltingPolicyEngine with budget/iteration/improvement checks, and RefinementLoop for iterative execute-validate-halt cycles. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
264
tests/unit/prompts/test_quality_validator.py
Normal file
264
tests/unit/prompts/test_quality_validator.py
Normal file
@@ -0,0 +1,264 @@
|
||||
"""
|
||||
Unit tests for QualityValidator.
|
||||
|
||||
Tests applying multiple gates, aggregating results, and persistence.
|
||||
"""
|
||||
|
||||
import json
|
||||
import pytest
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
from markitect.prompts.quality.models import (
|
||||
GateType,
|
||||
ValidationStatus,
|
||||
ValidationResult,
|
||||
)
|
||||
from markitect.prompts.quality.gates.schema_gate import SchemaValidationGate
|
||||
from markitect.prompts.quality.gates.pattern_gate import PatternValidationGate
|
||||
from markitect.prompts.quality.validator import QualityValidator
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def temp_db():
|
||||
"""Create temporary database for testing."""
|
||||
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
|
||||
db_path = f.name
|
||||
yield db_path
|
||||
Path(db_path).unlink(missing_ok=True)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def schema_gate():
|
||||
"""Create a simple schema gate."""
|
||||
return SchemaValidationGate(
|
||||
schema={
|
||||
"type": "object",
|
||||
"required": ["name"],
|
||||
"properties": {"name": {"type": "string"}},
|
||||
},
|
||||
gate_id="schema-gate-1",
|
||||
name="test-schema",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def pattern_gate():
|
||||
"""Create a simple pattern gate."""
|
||||
return PatternValidationGate(
|
||||
required_patterns=[r"## Summary"],
|
||||
forbidden_patterns=[r"TODO"],
|
||||
gate_id="pattern-gate-1",
|
||||
name="test-pattern",
|
||||
)
|
||||
|
||||
|
||||
class TestValidateArtifact:
|
||||
"""Tests for validating artifacts with multiple gates."""
|
||||
|
||||
def test_all_gates_pass(self, schema_gate, pattern_gate):
|
||||
"""Test all gates passing."""
|
||||
validator = QualityValidator(gates=[schema_gate, pattern_gate])
|
||||
|
||||
# Content that satisfies both gates (JSON for schema, text for pattern)
|
||||
# Schema gate needs JSON, pattern gate needs text patterns
|
||||
# Use separate validators for different content types
|
||||
schema_validator = QualityValidator(gates=[schema_gate])
|
||||
results = schema_validator.validate_artifact(
|
||||
json.dumps({"name": "test"}), "art-1"
|
||||
)
|
||||
assert len(results) == 1
|
||||
assert results[0].status == ValidationStatus.PASS
|
||||
|
||||
def test_pattern_gate_validates(self, pattern_gate):
|
||||
"""Test pattern gate validation."""
|
||||
validator = QualityValidator(gates=[pattern_gate])
|
||||
results = validator.validate_artifact(
|
||||
"## Summary\nAll good here.", "art-1"
|
||||
)
|
||||
assert len(results) == 1
|
||||
assert results[0].status == ValidationStatus.PASS
|
||||
|
||||
def test_multiple_gates_mixed_results(self, pattern_gate):
|
||||
"""Test multiple gates with mixed pass/fail."""
|
||||
gate_a = PatternValidationGate(
|
||||
required_patterns=[r"## Summary"],
|
||||
gate_id="gate-a",
|
||||
)
|
||||
gate_b = PatternValidationGate(
|
||||
required_patterns=[r"## Missing Section"],
|
||||
gate_id="gate-b",
|
||||
)
|
||||
validator = QualityValidator(gates=[gate_a, gate_b])
|
||||
|
||||
results = validator.validate_artifact("## Summary\nContent.", "art-1")
|
||||
assert len(results) == 2
|
||||
statuses = {r.gate_id: r.status for r in results}
|
||||
assert statuses["gate-a"] == ValidationStatus.PASS
|
||||
assert statuses["gate-b"] == ValidationStatus.FAIL
|
||||
|
||||
def test_no_gates_returns_empty(self):
|
||||
"""Test validator with no gates returns empty list."""
|
||||
validator = QualityValidator()
|
||||
results = validator.validate_artifact("content", "art-1")
|
||||
assert results == []
|
||||
|
||||
|
||||
class TestAllPassed:
|
||||
"""Tests for the all_passed helper."""
|
||||
|
||||
def test_all_pass(self):
|
||||
"""Test all_passed returns True when all pass."""
|
||||
validator = QualityValidator()
|
||||
results = [
|
||||
ValidationResult.create(
|
||||
gate_id="g1", gate_type=GateType.PATTERN,
|
||||
artifact_id="a", status=ValidationStatus.PASS,
|
||||
),
|
||||
ValidationResult.create(
|
||||
gate_id="g2", gate_type=GateType.PATTERN,
|
||||
artifact_id="a", status=ValidationStatus.PASS,
|
||||
),
|
||||
]
|
||||
assert validator.all_passed(results) is True
|
||||
|
||||
def test_one_fails(self):
|
||||
"""Test all_passed returns False when one fails."""
|
||||
validator = QualityValidator()
|
||||
results = [
|
||||
ValidationResult.create(
|
||||
gate_id="g1", gate_type=GateType.PATTERN,
|
||||
artifact_id="a", status=ValidationStatus.PASS,
|
||||
),
|
||||
ValidationResult.create(
|
||||
gate_id="g2", gate_type=GateType.PATTERN,
|
||||
artifact_id="a", status=ValidationStatus.FAIL,
|
||||
),
|
||||
]
|
||||
assert validator.all_passed(results) is False
|
||||
|
||||
def test_empty_results(self):
|
||||
"""Test all_passed with empty list returns True."""
|
||||
validator = QualityValidator()
|
||||
assert validator.all_passed([]) is True
|
||||
|
||||
|
||||
class TestAggregateScore:
|
||||
"""Tests for aggregate score calculation."""
|
||||
|
||||
def test_average_scores(self):
|
||||
"""Test aggregate is average of scores."""
|
||||
validator = QualityValidator()
|
||||
results = [
|
||||
ValidationResult.create(
|
||||
gate_id="g1", gate_type=GateType.PATTERN,
|
||||
artifact_id="a", status=ValidationStatus.PASS, score=1.0,
|
||||
),
|
||||
ValidationResult.create(
|
||||
gate_id="g2", gate_type=GateType.PATTERN,
|
||||
artifact_id="a", status=ValidationStatus.FAIL, score=0.5,
|
||||
),
|
||||
]
|
||||
assert validator.aggregate_score(results) == 0.75
|
||||
|
||||
def test_no_results(self):
|
||||
"""Test aggregate with no results returns 1.0."""
|
||||
validator = QualityValidator()
|
||||
assert validator.aggregate_score([]) == 1.0
|
||||
|
||||
def test_none_scores_ignored(self):
|
||||
"""Test results with None scores are handled."""
|
||||
validator = QualityValidator()
|
||||
results = [
|
||||
ValidationResult.create(
|
||||
gate_id="g1", gate_type=GateType.PATTERN,
|
||||
artifact_id="a", status=ValidationStatus.PASS, score=None,
|
||||
),
|
||||
]
|
||||
assert validator.aggregate_score(results) == 1.0
|
||||
|
||||
|
||||
class TestGetFailedGates:
|
||||
"""Tests for getting failed gates."""
|
||||
|
||||
def test_get_failed(self):
|
||||
"""Test filtering failed results."""
|
||||
validator = QualityValidator()
|
||||
results = [
|
||||
ValidationResult.create(
|
||||
gate_id="g1", gate_type=GateType.PATTERN,
|
||||
artifact_id="a", status=ValidationStatus.PASS,
|
||||
),
|
||||
ValidationResult.create(
|
||||
gate_id="g2", gate_type=GateType.PATTERN,
|
||||
artifact_id="a", status=ValidationStatus.FAIL,
|
||||
),
|
||||
]
|
||||
failed = validator.get_failed_gates(results)
|
||||
assert len(failed) == 1
|
||||
assert failed[0].gate_id == "g2"
|
||||
|
||||
|
||||
class TestResultsToManifest:
|
||||
"""Tests for converting results to manifest dict."""
|
||||
|
||||
def test_manifest_dict_format(self):
|
||||
"""Test manifest dict has correct structure."""
|
||||
validator = QualityValidator()
|
||||
results = [
|
||||
ValidationResult.create(
|
||||
gate_id="g1", gate_type=GateType.PATTERN,
|
||||
artifact_id="a", status=ValidationStatus.PASS, score=1.0,
|
||||
),
|
||||
]
|
||||
manifest = validator.results_to_manifest_dict(results)
|
||||
assert "quality_gates" in manifest
|
||||
assert manifest["all_passed"] is True
|
||||
assert manifest["aggregate_score"] == 1.0
|
||||
|
||||
|
||||
class TestPersistence:
|
||||
"""Tests for persisting validation results."""
|
||||
|
||||
def test_persist_and_retrieve_by_run(self, temp_db, pattern_gate):
|
||||
"""Test persisting results and retrieving by run ID."""
|
||||
validator = QualityValidator(gates=[pattern_gate], db_path=temp_db)
|
||||
|
||||
validator.validate_artifact(
|
||||
"## Summary\nClean content.", "art-1", run_id="run-1"
|
||||
)
|
||||
|
||||
results = validator.get_results_for_run("run-1")
|
||||
assert len(results) == 1
|
||||
assert results[0]["status"] == "pass"
|
||||
|
||||
def test_persist_and_retrieve_by_artifact(self, temp_db, pattern_gate):
|
||||
"""Test persisting results and retrieving by artifact ID."""
|
||||
validator = QualityValidator(gates=[pattern_gate], db_path=temp_db)
|
||||
|
||||
validator.validate_artifact(
|
||||
"## Summary\nClean content.", "art-1", run_id="run-1"
|
||||
)
|
||||
|
||||
results = validator.get_results_for_artifact("art-1")
|
||||
assert len(results) == 1
|
||||
assert results[0]["artifact_id"] == "art-1"
|
||||
|
||||
def test_no_persistence_without_db(self, pattern_gate):
|
||||
"""Test no persistence when db_path is None."""
|
||||
validator = QualityValidator(gates=[pattern_gate])
|
||||
results = validator.validate_artifact(
|
||||
"## Summary\nContent.", "art-1", run_id="run-1"
|
||||
)
|
||||
assert len(results) == 1
|
||||
# No DB queries should work
|
||||
assert validator.get_results_for_run("run-1") == []
|
||||
|
||||
def test_add_gate(self):
|
||||
"""Test adding a gate after construction."""
|
||||
validator = QualityValidator()
|
||||
assert len(validator.gates) == 0
|
||||
|
||||
gate = PatternValidationGate(required_patterns=[r"test"])
|
||||
validator.add_gate(gate)
|
||||
assert len(validator.gates) == 1
|
||||
Reference in New Issue
Block a user