feat(prompts): implement Phase 7 - Quality & Validation (FR-9, FR-10)
Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
Add quality gate framework with schema validation (JSON Schema via jsonschema library), pattern validation (regex-based), multi-gate QualityValidator with SQLite persistence, HaltingPolicyEngine with budget/iteration/improvement checks, and RefinementLoop for iterative execute-validate-halt cycles. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
294
markitect/prompts/quality/validator.py
Normal file
294
markitect/prompts/quality/validator.py
Normal file
@@ -0,0 +1,294 @@
|
||||
"""
|
||||
Quality validator for applying multiple gates to artifacts.
|
||||
|
||||
Implements FR-9.2: Multiple QualityGates per artifact.
|
||||
Implements FR-9.3: Record pass/fail results and diagnostics in RunManifest.
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any, List, Optional
|
||||
|
||||
from markitect.prompts.quality.models import (
|
||||
GateType,
|
||||
QualityGate,
|
||||
ValidationResult,
|
||||
ValidationStatus,
|
||||
)
|
||||
|
||||
|
||||
# SQL schema for quality tables
|
||||
QUALITY_TABLES_SQL = """
|
||||
CREATE TABLE IF NOT EXISTS quality_gates (
|
||||
id TEXT PRIMARY KEY,
|
||||
name TEXT NOT NULL,
|
||||
gate_type TEXT NOT NULL,
|
||||
config JSON NOT NULL,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS validation_results (
|
||||
id TEXT PRIMARY KEY,
|
||||
run_id TEXT NOT NULL,
|
||||
gate_id TEXT NOT NULL,
|
||||
artifact_id TEXT,
|
||||
status TEXT NOT NULL,
|
||||
score REAL,
|
||||
diagnostics JSON,
|
||||
validated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_validations_run ON validation_results(run_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_validations_artifact ON validation_results(artifact_id);
|
||||
"""
|
||||
|
||||
|
||||
class QualityValidator:
|
||||
"""
|
||||
Applies multiple quality gates to artifacts and records results.
|
||||
|
||||
Implements FR-9.2 and FR-9.3.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
gates: Optional[List[QualityGate]] = None,
|
||||
db_path: Optional[str] = None,
|
||||
):
|
||||
"""
|
||||
Initialize validator with quality gates.
|
||||
|
||||
Args:
|
||||
gates: List of quality gates to apply
|
||||
db_path: Optional database path for persisting results
|
||||
"""
|
||||
self.gates: List[QualityGate] = gates or []
|
||||
self.db_path = db_path
|
||||
if db_path:
|
||||
self._initialize_tables()
|
||||
|
||||
def _initialize_tables(self) -> None:
|
||||
"""Initialize quality tables if DB path is set."""
|
||||
db_dir = Path(self.db_path).parent
|
||||
if db_dir and not db_dir.exists():
|
||||
db_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
conn = sqlite3.connect(self.db_path)
|
||||
try:
|
||||
conn.executescript(QUALITY_TABLES_SQL)
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def _get_connection(self) -> sqlite3.Connection:
|
||||
"""Get a database connection."""
|
||||
conn = sqlite3.connect(self.db_path)
|
||||
conn.row_factory = sqlite3.Row
|
||||
return conn
|
||||
|
||||
def add_gate(self, gate: QualityGate) -> None:
|
||||
"""
|
||||
Add a quality gate to the validator.
|
||||
|
||||
Args:
|
||||
gate: Quality gate to add
|
||||
"""
|
||||
self.gates.append(gate)
|
||||
|
||||
def validate_artifact(
|
||||
self,
|
||||
content: str,
|
||||
artifact_id: str,
|
||||
run_id: Optional[str] = None,
|
||||
) -> List[ValidationResult]:
|
||||
"""
|
||||
Apply all quality gates to an artifact.
|
||||
|
||||
Args:
|
||||
content: Artifact content to validate
|
||||
artifact_id: ID of the artifact
|
||||
run_id: Optional run ID for persistence
|
||||
|
||||
Returns:
|
||||
List of ValidationResult from all gates
|
||||
"""
|
||||
results = []
|
||||
for gate in self.gates:
|
||||
result = gate.validate(content, artifact_id)
|
||||
results.append(result)
|
||||
|
||||
if run_id and self.db_path:
|
||||
self._persist_result(result, run_id)
|
||||
|
||||
return results
|
||||
|
||||
def all_passed(self, results: List[ValidationResult]) -> bool:
|
||||
"""
|
||||
Check if all validation results passed.
|
||||
|
||||
Args:
|
||||
results: List of validation results
|
||||
|
||||
Returns:
|
||||
True if all results have PASS status
|
||||
"""
|
||||
return all(r.status == ValidationStatus.PASS for r in results)
|
||||
|
||||
def aggregate_score(self, results: List[ValidationResult]) -> float:
|
||||
"""
|
||||
Calculate aggregate score across all results.
|
||||
|
||||
Args:
|
||||
results: List of validation results
|
||||
|
||||
Returns:
|
||||
Average score (0.0-1.0), or 1.0 if no results
|
||||
"""
|
||||
if not results:
|
||||
return 1.0
|
||||
scores = [r.score for r in results if r.score is not None]
|
||||
if not scores:
|
||||
return 1.0
|
||||
return sum(scores) / len(scores)
|
||||
|
||||
def get_failed_gates(
|
||||
self,
|
||||
results: List[ValidationResult],
|
||||
) -> List[ValidationResult]:
|
||||
"""
|
||||
Get only failed validation results.
|
||||
|
||||
Args:
|
||||
results: List of validation results
|
||||
|
||||
Returns:
|
||||
List of results with FAIL status
|
||||
"""
|
||||
return [r for r in results if r.status == ValidationStatus.FAIL]
|
||||
|
||||
def results_to_manifest_dict(
|
||||
self,
|
||||
results: List[ValidationResult],
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Convert validation results to RunManifest-compatible dict.
|
||||
|
||||
Implements FR-9.3: Results for RunManifest.
|
||||
|
||||
Args:
|
||||
results: List of validation results
|
||||
|
||||
Returns:
|
||||
Dictionary suitable for RunManifest.validation_results
|
||||
"""
|
||||
return {
|
||||
"quality_gates": [r.to_dict() for r in results],
|
||||
"all_passed": self.all_passed(results),
|
||||
"aggregate_score": self.aggregate_score(results),
|
||||
}
|
||||
|
||||
def _persist_result(
|
||||
self,
|
||||
result: ValidationResult,
|
||||
run_id: str,
|
||||
) -> None:
|
||||
"""Persist a validation result to the database."""
|
||||
import json
|
||||
|
||||
conn = self._get_connection()
|
||||
try:
|
||||
conn.execute(
|
||||
"""
|
||||
INSERT INTO validation_results (
|
||||
id, run_id, gate_id, artifact_id,
|
||||
status, score, diagnostics, validated_at
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
result.id,
|
||||
run_id,
|
||||
result.gate_id,
|
||||
result.artifact_id,
|
||||
result.status.value,
|
||||
result.score,
|
||||
json.dumps([d.to_dict() for d in result.diagnostics]),
|
||||
result.validated_at.isoformat(),
|
||||
),
|
||||
)
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def get_results_for_run(self, run_id: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Retrieve persisted validation results for a run.
|
||||
|
||||
Args:
|
||||
run_id: Run identifier
|
||||
|
||||
Returns:
|
||||
List of result dictionaries
|
||||
"""
|
||||
if not self.db_path:
|
||||
return []
|
||||
|
||||
import json
|
||||
|
||||
conn = self._get_connection()
|
||||
try:
|
||||
cursor = conn.execute(
|
||||
"SELECT * FROM validation_results WHERE run_id = ?",
|
||||
(run_id,),
|
||||
)
|
||||
results = []
|
||||
for row in cursor.fetchall():
|
||||
results.append({
|
||||
"id": row["id"],
|
||||
"run_id": run_id,
|
||||
"gate_id": row["gate_id"],
|
||||
"artifact_id": row["artifact_id"],
|
||||
"status": row["status"],
|
||||
"score": row["score"],
|
||||
"diagnostics": json.loads(row["diagnostics"]) if row["diagnostics"] else [],
|
||||
"validated_at": row["validated_at"],
|
||||
})
|
||||
return results
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def get_results_for_artifact(self, artifact_id: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Retrieve persisted validation results for an artifact.
|
||||
|
||||
Args:
|
||||
artifact_id: Artifact identifier
|
||||
|
||||
Returns:
|
||||
List of result dictionaries
|
||||
"""
|
||||
if not self.db_path:
|
||||
return []
|
||||
|
||||
import json
|
||||
|
||||
conn = self._get_connection()
|
||||
try:
|
||||
cursor = conn.execute(
|
||||
"SELECT * FROM validation_results WHERE artifact_id = ?",
|
||||
(artifact_id,),
|
||||
)
|
||||
results = []
|
||||
for row in cursor.fetchall():
|
||||
results.append({
|
||||
"id": row["id"],
|
||||
"run_id": row["run_id"],
|
||||
"gate_id": row["gate_id"],
|
||||
"artifact_id": artifact_id,
|
||||
"status": row["status"],
|
||||
"score": row["score"],
|
||||
"diagnostics": json.loads(row["diagnostics"]) if row["diagnostics"] else [],
|
||||
"validated_at": row["validated_at"],
|
||||
})
|
||||
return results
|
||||
finally:
|
||||
conn.close()
|
||||
Reference in New Issue
Block a user