Files
markitect-main/tests/unit/prompts/test_execution_engine.py
tegwick c56c92c815
Some checks failed
Test Suite / unit-tests (3.11) (push) Has been cancelled
Test Suite / security-scan (push) Has been cancelled
Test Suite / unit-tests (3.12) (push) Has been cancelled
Test Suite / integration-tests (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / performance-tests (push) Has been cancelled
Test Suite / code-quality (push) Has been cancelled
Test Suite / test-summary (push) Has been cancelled
feat(prompts): implement Phase 4 - Execution Engine (FR-4, FR-5)
Implement three-stage execution lifecycle with idempotent runs and complete
provenance tracking via RunManifest.

Core Features:
- PromptRun model with execution lifecycle stages:
  1. Analysis: Template analysis and macro extraction
  2. Compilation: Macro resolution and context compilation
  3. Processing: LLM execution and output generation
- InputBundleHash for deterministic idempotency (FR-4.3)
- RunManifest for complete execution provenance (FR-5)
- LLMAdapter interface for pluggable model providers
- MockLLMAdapter for testing without API calls
- PromptExecutionEngine orchestrating full lifecycle

Idempotent Execution (FR-4.4):
- Calculate SHA-256 hash of complete input context
- Skip execution if identical hash exists
- Cache successful runs by hash
- Support force re-execution via config flag

RunManifest Tracking (FR-5.2):
- Template metadata (id, name, digest)
- Resolved input artifacts and digests
- Compiled prompt digest
- Model configuration
- Output artifacts
- Dependency edges for graph construction
- Timing metadata for performance analysis

Tests (27 passing):
- 17 execution model tests (config, bundle, runs, stages)
- 10 engine tests (execution, idempotency, errors, caching)

Implements:
- FR-4.1: Three-stage execution lifecycle
- FR-4.2: CompiledPrompt during compilation
- FR-4.3: InputBundleHash calculation
- FR-4.4: Skip execution for identical hashes
- FR-5.1: RunManifest persistence
- FR-5.2: Complete manifest contents
- FR-5.3: Nested run linking (foundation)

Files Created:
- markitect/prompts/execution/models.py
- markitect/prompts/execution/manifest.py
- markitect/prompts/execution/llm_adapter.py
- markitect/prompts/execution/engine.py
- migrations/prompts/003_create_runs_and_manifests.sql
- tests/unit/prompts/test_execution_models.py
- tests/unit/prompts/test_execution_engine.py

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-08 23:15:33 +01:00

369 lines
11 KiB
Python

"""Unit tests for PromptExecutionEngine."""
import pytest
import tempfile
from pathlib import Path
from markitect.prompts.templates.models import PromptTemplate
from markitect.prompts.templates.analyzer import TemplateAnalyzer
from markitect.prompts.resolver.resolver import PromptResolver
from markitect.prompts.resolver.compiler import ContextCompiler
from markitect.prompts.resolver.strategy import (
MultiSpaceResolutionStrategy,
ResolutionConfig,
)
from markitect.prompts.execution.engine import PromptExecutionEngine
from markitect.prompts.execution.models import RunConfig, RunStatus, ExecutionStage
from markitect.prompts.execution.llm_adapter import MockLLMAdapter, ErrorLLMAdapter
from markitect.prompts.services.artifact_service import ArtifactService
from markitect.prompts.repositories.sqlite import SQLiteArtifactRepository
@pytest.fixture
def temp_db():
"""Create temporary database."""
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
db_path = f.name
yield db_path
Path(db_path).unlink(missing_ok=True)
@pytest.fixture
def artifact_service(temp_db):
"""Create artifact service."""
repository = SQLiteArtifactRepository(temp_db)
return ArtifactService(repository)
@pytest.fixture
def analyzer():
"""Create template analyzer."""
return TemplateAnalyzer()
@pytest.fixture
def resolver(artifact_service):
"""Create resolver."""
strategy = MultiSpaceResolutionStrategy()
return PromptResolver(artifact_service, strategy)
@pytest.fixture
def compiler():
"""Create compiler."""
return ContextCompiler()
@pytest.fixture
def mock_llm():
"""Create mock LLM adapter."""
return MockLLMAdapter(mock_response="Mock LLM output")
@pytest.fixture
def engine(artifact_service, analyzer, resolver, compiler, mock_llm):
"""Create execution engine."""
return PromptExecutionEngine(
artifact_service,
analyzer,
resolver,
compiler,
mock_llm,
)
class TestPromptExecutionEngine:
"""Tests for PromptExecutionEngine."""
def test_execute_simple_template(
self, engine, artifact_service, mock_llm
):
"""Test executing simple template."""
# Create dependency
artifact_service.create_artifact(
space_id="space-1",
name="intro",
content="Introduction text",
)
# Create template
content = "# Document\n{{require:intro}}\nMore content"
template = PromptTemplate.create(
space_id="space-1",
name="doc",
content=content,
)
# Execute
resolution_config = ResolutionConfig(space_id="space-1")
run = engine.execute(template, content, resolution_config)
# Verify run
assert run.status == RunStatus.SUCCESS
assert run.stage == ExecutionStage.COMPLETE
assert run.completed_at is not None
assert "output_artifact_id" in run.metadata
# Verify LLM was called
assert mock_llm.call_count == 1
assert mock_llm.last_prompt is not None
def test_execute_with_failed_resolution(self, engine):
"""Test execution with missing required dependency."""
# Create template with missing dependency
content = "{{require:missing-dep}}"
template = PromptTemplate.create(
space_id="space-1",
name="test",
content=content,
)
resolution_config = ResolutionConfig(space_id="space-1")
run = engine.execute(template, content, resolution_config)
# Should fail during resolution
assert run.status == RunStatus.FAILED
assert "Resolution failed" in run.error_message
def test_idempotent_execution_skips_duplicate(
self, engine, artifact_service
):
"""Test idempotent execution (FR-4.4)."""
# Create dependency
artifact_service.create_artifact(
space_id="space-1",
name="dep",
content="Dependency",
)
content = "{{require:dep}}"
template = PromptTemplate.create(
space_id="space-1",
name="test",
content=content,
)
resolution_config = ResolutionConfig(space_id="space-1")
config = RunConfig(skip_if_exists=True)
# First execution
run1 = engine.execute(template, content, resolution_config, config)
assert run1.status == RunStatus.SUCCESS
# Second execution with same inputs
run2 = engine.execute(template, content, resolution_config, config)
# Should be skipped
assert run2.status == RunStatus.SKIPPED
assert run2.id != run1.id # Different run
assert run2.input_bundle_hash == run1.input_bundle_hash # Same hash
assert "skipped_due_to" in run2.metadata
def test_execution_without_skip_reruns(
self, engine, artifact_service
):
"""Test execution without skip flag reruns."""
artifact_service.create_artifact(
space_id="space-1",
name="dep",
content="Dependency",
)
content = "{{require:dep}}"
template = PromptTemplate.create(
space_id="space-1",
name="test",
content=content,
)
resolution_config = ResolutionConfig(space_id="space-1")
config = RunConfig(skip_if_exists=False)
# First execution
run1 = engine.execute(template, content, resolution_config, config)
assert run1.status == RunStatus.SUCCESS
# Second execution
run2 = engine.execute(template, content, resolution_config, config)
# Should execute again (not skipped)
assert run2.status == RunStatus.SUCCESS
assert run2.id != run1.id
def test_input_bundle_hash_changes_with_template(
self, engine, artifact_service, mock_llm
):
"""Test hash changes when template changes."""
artifact_service.create_artifact(
space_id="space-1",
name="dep",
content="Dependency",
)
# First template
content1 = "{{require:dep}} version 1"
template1 = PromptTemplate.create(
space_id="space-1",
name="test",
content=content1,
)
resolution_config = ResolutionConfig(space_id="space-1")
run1 = engine.execute(template1, content1, resolution_config)
# Different template content
content2 = "{{require:dep}} version 2"
template2 = PromptTemplate.create(
space_id="space-1",
name="test",
content=content2,
)
run2 = engine.execute(template2, content2, resolution_config)
# Different hashes, both execute
assert run1.input_bundle_hash != run2.input_bundle_hash
assert run1.status == RunStatus.SUCCESS
assert run2.status == RunStatus.SUCCESS
def test_input_bundle_hash_changes_with_dependencies(
self, engine, artifact_service
):
"""Test hash changes when dependency content changes."""
# Create initial dependency
dep = artifact_service.create_artifact(
space_id="space-1",
name="dep",
content="Original content",
)
content = "{{require:dep}}"
template = PromptTemplate.create(
space_id="space-1",
name="test",
content=content,
)
resolution_config = ResolutionConfig(space_id="space-1")
run1 = engine.execute(template, content, resolution_config)
# Update dependency
artifact_service.update_artifact_content(dep.id, "Modified content")
run2 = engine.execute(template, content, resolution_config)
# Hashes should differ
assert run1.input_bundle_hash != run2.input_bundle_hash
def test_execution_creates_manifest(
self, engine, artifact_service
):
"""Test execution creates RunManifest."""
artifact_service.create_artifact(
space_id="space-1",
name="dep",
content="Dependency",
)
content = "{{require:dep}}"
template = PromptTemplate.create(
space_id="space-1",
name="test",
content=content,
)
resolution_config = ResolutionConfig(space_id="space-1")
run = engine.execute(template, content, resolution_config)
# Check manifest in metadata
assert "manifest" in run.metadata
manifest_data = run.metadata["manifest"]
assert manifest_data["run_id"] == run.id
assert len(manifest_data["resolved_inputs"]) == 1
assert len(manifest_data["output_artifacts"]) == 1
assert "timing_metadata" in manifest_data
def test_execution_with_llm_error(
self, artifact_service, analyzer, resolver, compiler
):
"""Test execution handles LLM errors."""
# Create engine with error adapter
error_llm = ErrorLLMAdapter("LLM service unavailable")
engine = PromptExecutionEngine(
artifact_service,
analyzer,
resolver,
compiler,
error_llm,
)
artifact_service.create_artifact(
space_id="space-1",
name="dep",
content="Dependency",
)
content = "{{require:dep}}"
template = PromptTemplate.create(
space_id="space-1",
name="test",
content=content,
)
resolution_config = ResolutionConfig(space_id="space-1")
run = engine.execute(template, content, resolution_config)
# Should fail during processing
assert run.status == RunStatus.FAILED
assert "LLM service unavailable" in run.error_message
def test_get_run_by_hash(self, engine, artifact_service):
"""Test retrieving cached run by hash."""
artifact_service.create_artifact(
space_id="space-1",
name="dep",
content="Dependency",
)
content = "{{require:dep}}"
template = PromptTemplate.create(
space_id="space-1",
name="test",
content=content,
)
resolution_config = ResolutionConfig(space_id="space-1")
run = engine.execute(template, content, resolution_config)
# Retrieve by hash
cached = engine.get_run_by_hash(run.input_bundle_hash)
assert cached is not None
assert cached.id == run.id
def test_clear_cache(self, engine, artifact_service):
"""Test clearing run cache."""
artifact_service.create_artifact(
space_id="space-1",
name="dep",
content="Dependency",
)
content = "{{require:dep}}"
template = PromptTemplate.create(
space_id="space-1",
name="test",
content=content,
)
resolution_config = ResolutionConfig(space_id="space-1")
run = engine.execute(template, content, resolution_config)
# Cache should have run
assert engine.get_run_by_hash(run.input_bundle_hash) is not None
# Clear cache
engine.clear_cache()
# Should be gone
assert engine.get_run_by_hash(run.input_bundle_hash) is None