Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
Implement three-stage execution lifecycle with idempotent runs and complete provenance tracking via RunManifest. Core Features: - PromptRun model with execution lifecycle stages: 1. Analysis: Template analysis and macro extraction 2. Compilation: Macro resolution and context compilation 3. Processing: LLM execution and output generation - InputBundleHash for deterministic idempotency (FR-4.3) - RunManifest for complete execution provenance (FR-5) - LLMAdapter interface for pluggable model providers - MockLLMAdapter for testing without API calls - PromptExecutionEngine orchestrating full lifecycle Idempotent Execution (FR-4.4): - Calculate SHA-256 hash of complete input context - Skip execution if identical hash exists - Cache successful runs by hash - Support force re-execution via config flag RunManifest Tracking (FR-5.2): - Template metadata (id, name, digest) - Resolved input artifacts and digests - Compiled prompt digest - Model configuration - Output artifacts - Dependency edges for graph construction - Timing metadata for performance analysis Tests (27 passing): - 17 execution model tests (config, bundle, runs, stages) - 10 engine tests (execution, idempotency, errors, caching) Implements: - FR-4.1: Three-stage execution lifecycle - FR-4.2: CompiledPrompt during compilation - FR-4.3: InputBundleHash calculation - FR-4.4: Skip execution for identical hashes - FR-5.1: RunManifest persistence - FR-5.2: Complete manifest contents - FR-5.3: Nested run linking (foundation) Files Created: - markitect/prompts/execution/models.py - markitect/prompts/execution/manifest.py - markitect/prompts/execution/llm_adapter.py - markitect/prompts/execution/engine.py - migrations/prompts/003_create_runs_and_manifests.sql - tests/unit/prompts/test_execution_models.py - tests/unit/prompts/test_execution_engine.py Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
369 lines
11 KiB
Python
369 lines
11 KiB
Python
"""Unit tests for PromptExecutionEngine."""
|
|
|
|
import pytest
|
|
import tempfile
|
|
from pathlib import Path
|
|
|
|
from markitect.prompts.templates.models import PromptTemplate
|
|
from markitect.prompts.templates.analyzer import TemplateAnalyzer
|
|
from markitect.prompts.resolver.resolver import PromptResolver
|
|
from markitect.prompts.resolver.compiler import ContextCompiler
|
|
from markitect.prompts.resolver.strategy import (
|
|
MultiSpaceResolutionStrategy,
|
|
ResolutionConfig,
|
|
)
|
|
from markitect.prompts.execution.engine import PromptExecutionEngine
|
|
from markitect.prompts.execution.models import RunConfig, RunStatus, ExecutionStage
|
|
from markitect.prompts.execution.llm_adapter import MockLLMAdapter, ErrorLLMAdapter
|
|
from markitect.prompts.services.artifact_service import ArtifactService
|
|
from markitect.prompts.repositories.sqlite import SQLiteArtifactRepository
|
|
|
|
|
|
@pytest.fixture
|
|
def temp_db():
|
|
"""Create temporary database."""
|
|
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
|
|
db_path = f.name
|
|
yield db_path
|
|
Path(db_path).unlink(missing_ok=True)
|
|
|
|
|
|
@pytest.fixture
|
|
def artifact_service(temp_db):
|
|
"""Create artifact service."""
|
|
repository = SQLiteArtifactRepository(temp_db)
|
|
return ArtifactService(repository)
|
|
|
|
|
|
@pytest.fixture
|
|
def analyzer():
|
|
"""Create template analyzer."""
|
|
return TemplateAnalyzer()
|
|
|
|
|
|
@pytest.fixture
|
|
def resolver(artifact_service):
|
|
"""Create resolver."""
|
|
strategy = MultiSpaceResolutionStrategy()
|
|
return PromptResolver(artifact_service, strategy)
|
|
|
|
|
|
@pytest.fixture
|
|
def compiler():
|
|
"""Create compiler."""
|
|
return ContextCompiler()
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_llm():
|
|
"""Create mock LLM adapter."""
|
|
return MockLLMAdapter(mock_response="Mock LLM output")
|
|
|
|
|
|
@pytest.fixture
|
|
def engine(artifact_service, analyzer, resolver, compiler, mock_llm):
|
|
"""Create execution engine."""
|
|
return PromptExecutionEngine(
|
|
artifact_service,
|
|
analyzer,
|
|
resolver,
|
|
compiler,
|
|
mock_llm,
|
|
)
|
|
|
|
|
|
class TestPromptExecutionEngine:
|
|
"""Tests for PromptExecutionEngine."""
|
|
|
|
def test_execute_simple_template(
|
|
self, engine, artifact_service, mock_llm
|
|
):
|
|
"""Test executing simple template."""
|
|
# Create dependency
|
|
artifact_service.create_artifact(
|
|
space_id="space-1",
|
|
name="intro",
|
|
content="Introduction text",
|
|
)
|
|
|
|
# Create template
|
|
content = "# Document\n{{require:intro}}\nMore content"
|
|
template = PromptTemplate.create(
|
|
space_id="space-1",
|
|
name="doc",
|
|
content=content,
|
|
)
|
|
|
|
# Execute
|
|
resolution_config = ResolutionConfig(space_id="space-1")
|
|
run = engine.execute(template, content, resolution_config)
|
|
|
|
# Verify run
|
|
assert run.status == RunStatus.SUCCESS
|
|
assert run.stage == ExecutionStage.COMPLETE
|
|
assert run.completed_at is not None
|
|
assert "output_artifact_id" in run.metadata
|
|
|
|
# Verify LLM was called
|
|
assert mock_llm.call_count == 1
|
|
assert mock_llm.last_prompt is not None
|
|
|
|
def test_execute_with_failed_resolution(self, engine):
|
|
"""Test execution with missing required dependency."""
|
|
# Create template with missing dependency
|
|
content = "{{require:missing-dep}}"
|
|
template = PromptTemplate.create(
|
|
space_id="space-1",
|
|
name="test",
|
|
content=content,
|
|
)
|
|
|
|
resolution_config = ResolutionConfig(space_id="space-1")
|
|
run = engine.execute(template, content, resolution_config)
|
|
|
|
# Should fail during resolution
|
|
assert run.status == RunStatus.FAILED
|
|
assert "Resolution failed" in run.error_message
|
|
|
|
def test_idempotent_execution_skips_duplicate(
|
|
self, engine, artifact_service
|
|
):
|
|
"""Test idempotent execution (FR-4.4)."""
|
|
# Create dependency
|
|
artifact_service.create_artifact(
|
|
space_id="space-1",
|
|
name="dep",
|
|
content="Dependency",
|
|
)
|
|
|
|
content = "{{require:dep}}"
|
|
template = PromptTemplate.create(
|
|
space_id="space-1",
|
|
name="test",
|
|
content=content,
|
|
)
|
|
|
|
resolution_config = ResolutionConfig(space_id="space-1")
|
|
config = RunConfig(skip_if_exists=True)
|
|
|
|
# First execution
|
|
run1 = engine.execute(template, content, resolution_config, config)
|
|
assert run1.status == RunStatus.SUCCESS
|
|
|
|
# Second execution with same inputs
|
|
run2 = engine.execute(template, content, resolution_config, config)
|
|
|
|
# Should be skipped
|
|
assert run2.status == RunStatus.SKIPPED
|
|
assert run2.id != run1.id # Different run
|
|
assert run2.input_bundle_hash == run1.input_bundle_hash # Same hash
|
|
assert "skipped_due_to" in run2.metadata
|
|
|
|
def test_execution_without_skip_reruns(
|
|
self, engine, artifact_service
|
|
):
|
|
"""Test execution without skip flag reruns."""
|
|
artifact_service.create_artifact(
|
|
space_id="space-1",
|
|
name="dep",
|
|
content="Dependency",
|
|
)
|
|
|
|
content = "{{require:dep}}"
|
|
template = PromptTemplate.create(
|
|
space_id="space-1",
|
|
name="test",
|
|
content=content,
|
|
)
|
|
|
|
resolution_config = ResolutionConfig(space_id="space-1")
|
|
config = RunConfig(skip_if_exists=False)
|
|
|
|
# First execution
|
|
run1 = engine.execute(template, content, resolution_config, config)
|
|
assert run1.status == RunStatus.SUCCESS
|
|
|
|
# Second execution
|
|
run2 = engine.execute(template, content, resolution_config, config)
|
|
|
|
# Should execute again (not skipped)
|
|
assert run2.status == RunStatus.SUCCESS
|
|
assert run2.id != run1.id
|
|
|
|
def test_input_bundle_hash_changes_with_template(
|
|
self, engine, artifact_service, mock_llm
|
|
):
|
|
"""Test hash changes when template changes."""
|
|
artifact_service.create_artifact(
|
|
space_id="space-1",
|
|
name="dep",
|
|
content="Dependency",
|
|
)
|
|
|
|
# First template
|
|
content1 = "{{require:dep}} version 1"
|
|
template1 = PromptTemplate.create(
|
|
space_id="space-1",
|
|
name="test",
|
|
content=content1,
|
|
)
|
|
|
|
resolution_config = ResolutionConfig(space_id="space-1")
|
|
run1 = engine.execute(template1, content1, resolution_config)
|
|
|
|
# Different template content
|
|
content2 = "{{require:dep}} version 2"
|
|
template2 = PromptTemplate.create(
|
|
space_id="space-1",
|
|
name="test",
|
|
content=content2,
|
|
)
|
|
|
|
run2 = engine.execute(template2, content2, resolution_config)
|
|
|
|
# Different hashes, both execute
|
|
assert run1.input_bundle_hash != run2.input_bundle_hash
|
|
assert run1.status == RunStatus.SUCCESS
|
|
assert run2.status == RunStatus.SUCCESS
|
|
|
|
def test_input_bundle_hash_changes_with_dependencies(
|
|
self, engine, artifact_service
|
|
):
|
|
"""Test hash changes when dependency content changes."""
|
|
# Create initial dependency
|
|
dep = artifact_service.create_artifact(
|
|
space_id="space-1",
|
|
name="dep",
|
|
content="Original content",
|
|
)
|
|
|
|
content = "{{require:dep}}"
|
|
template = PromptTemplate.create(
|
|
space_id="space-1",
|
|
name="test",
|
|
content=content,
|
|
)
|
|
|
|
resolution_config = ResolutionConfig(space_id="space-1")
|
|
run1 = engine.execute(template, content, resolution_config)
|
|
|
|
# Update dependency
|
|
artifact_service.update_artifact_content(dep.id, "Modified content")
|
|
|
|
run2 = engine.execute(template, content, resolution_config)
|
|
|
|
# Hashes should differ
|
|
assert run1.input_bundle_hash != run2.input_bundle_hash
|
|
|
|
def test_execution_creates_manifest(
|
|
self, engine, artifact_service
|
|
):
|
|
"""Test execution creates RunManifest."""
|
|
artifact_service.create_artifact(
|
|
space_id="space-1",
|
|
name="dep",
|
|
content="Dependency",
|
|
)
|
|
|
|
content = "{{require:dep}}"
|
|
template = PromptTemplate.create(
|
|
space_id="space-1",
|
|
name="test",
|
|
content=content,
|
|
)
|
|
|
|
resolution_config = ResolutionConfig(space_id="space-1")
|
|
run = engine.execute(template, content, resolution_config)
|
|
|
|
# Check manifest in metadata
|
|
assert "manifest" in run.metadata
|
|
manifest_data = run.metadata["manifest"]
|
|
assert manifest_data["run_id"] == run.id
|
|
assert len(manifest_data["resolved_inputs"]) == 1
|
|
assert len(manifest_data["output_artifacts"]) == 1
|
|
assert "timing_metadata" in manifest_data
|
|
|
|
def test_execution_with_llm_error(
|
|
self, artifact_service, analyzer, resolver, compiler
|
|
):
|
|
"""Test execution handles LLM errors."""
|
|
# Create engine with error adapter
|
|
error_llm = ErrorLLMAdapter("LLM service unavailable")
|
|
engine = PromptExecutionEngine(
|
|
artifact_service,
|
|
analyzer,
|
|
resolver,
|
|
compiler,
|
|
error_llm,
|
|
)
|
|
|
|
artifact_service.create_artifact(
|
|
space_id="space-1",
|
|
name="dep",
|
|
content="Dependency",
|
|
)
|
|
|
|
content = "{{require:dep}}"
|
|
template = PromptTemplate.create(
|
|
space_id="space-1",
|
|
name="test",
|
|
content=content,
|
|
)
|
|
|
|
resolution_config = ResolutionConfig(space_id="space-1")
|
|
run = engine.execute(template, content, resolution_config)
|
|
|
|
# Should fail during processing
|
|
assert run.status == RunStatus.FAILED
|
|
assert "LLM service unavailable" in run.error_message
|
|
|
|
def test_get_run_by_hash(self, engine, artifact_service):
|
|
"""Test retrieving cached run by hash."""
|
|
artifact_service.create_artifact(
|
|
space_id="space-1",
|
|
name="dep",
|
|
content="Dependency",
|
|
)
|
|
|
|
content = "{{require:dep}}"
|
|
template = PromptTemplate.create(
|
|
space_id="space-1",
|
|
name="test",
|
|
content=content,
|
|
)
|
|
|
|
resolution_config = ResolutionConfig(space_id="space-1")
|
|
run = engine.execute(template, content, resolution_config)
|
|
|
|
# Retrieve by hash
|
|
cached = engine.get_run_by_hash(run.input_bundle_hash)
|
|
assert cached is not None
|
|
assert cached.id == run.id
|
|
|
|
def test_clear_cache(self, engine, artifact_service):
|
|
"""Test clearing run cache."""
|
|
artifact_service.create_artifact(
|
|
space_id="space-1",
|
|
name="dep",
|
|
content="Dependency",
|
|
)
|
|
|
|
content = "{{require:dep}}"
|
|
template = PromptTemplate.create(
|
|
space_id="space-1",
|
|
name="test",
|
|
content=content,
|
|
)
|
|
|
|
resolution_config = ResolutionConfig(space_id="space-1")
|
|
run = engine.execute(template, content, resolution_config)
|
|
|
|
# Cache should have run
|
|
assert engine.get_run_by_hash(run.input_bundle_hash) is not None
|
|
|
|
# Clear cache
|
|
engine.clear_cache()
|
|
|
|
# Should be gone
|
|
assert engine.get_run_by_hash(run.input_bundle_hash) is None
|